fml  0.1-0
Fused Matrix Library
atomics.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_GPU_INTERNALS_ATOMICS_H
6 #define FML_GPU_INTERNALS_ATOMICS_H
7 #pragma once
8 
9 
10 #include <cuda_runtime.h>
11 
12 #include "../../_internals/types.hh"
13 
14 #include "../../_internals/arraytools/src/arraytools.cuh"
15 
16 
17 namespace fml
18 {
19  namespace atomics
20  {
21  // Based on https://forums.developer.nvidia.com/t/atomicmax-with-floats/8791/10
22  static __device__ float atomicMaxf(float *address, float val)
23  {
24  int *address_int = (int*) address;
25  int old = *address_int;
26  int assumed;
27 
28  while (val > __int_as_float(old))
29  {
30  assumed = old;
31  old = atomicCAS(address_int, assumed, __float_as_int(val));
32  }
33 
34  return __int_as_float(old);
35  }
36 
37  static __device__ double atomicMaxf(double *address, double val)
38  {
39  unsigned long long *address_ull = (unsigned long long*) address;
40  unsigned long long old = *address_ull;
41  unsigned long long assumed;
42 
43  while (val > __longlong_as_double(old))
44  {
45  assumed = old;
46  old = atomicCAS(address_ull, assumed, __double_as_longlong(val));
47  }
48 
49  return __longlong_as_double(old);
50  }
51 
52 
53 
54  static __device__ float atomicMinf(float *address, float val)
55  {
56  int *address_int = (int*) address;
57  int old = *address_int;
58  int assumed;
59 
60  do
61  {
62  assumed = old;
63  old = atomicCAS(address_int, assumed,
64  __float_as_int(fmin(val, __int_as_float(assumed))));
65  } while (old != assumed);
66 
67  return __int_as_float(old);
68  }
69 
70  static __device__ double atomicMinf(double *address, double val)
71  {
72  unsigned long long * address_as_ull = (unsigned long long*) address;
73  unsigned long long old = *address_as_ull;
74  unsigned long long assumed;
75 
76  do {
77  assumed = old;
78  old = atomicCAS(address_as_ull, assumed,
79  __double_as_longlong(fmin(val, __longlong_as_double(assumed))));
80  } while (assumed != old);
81 
82  return __longlong_as_double(old);
83  }
84 
85 
86 
87  static __device__ float atomicMul(float *address, float val)
88  {
89  int *address_int = (int*) address;
90  int old = *address_int;
91  int assumed;
92 
93  do
94  {
95  assumed = old;
96  old = atomicCAS(address_int, assumed,
97  __float_as_int(val * __float_as_int(assumed)));
98  } while (old != assumed);
99 
100  return __int_as_float(old);
101  }
102 
103  static __device__ double atomicMul(double *address, double val)
104  {
105  unsigned long long *address_int = (unsigned long long*) address;
106  unsigned long long old = *address_int;
107  unsigned long long assumed;
108 
109  do
110  {
111  assumed = old;
112  old = atomicCAS(address_int, assumed,
113  __double_as_longlong(val * __double_as_longlong(assumed)));
114  } while (old != assumed);
115 
116  return __int_as_float(old);
117  }
118  }
119 }
120 
121 
122 #endif
fml
Core namespace.
Definition: dimops.hh:10