@@ -60,6 +60,42 @@ namespace cuBQL {
6060#endif
6161 }
6262
63+ /* ! iw - note: this implementation of atomic min/max via atomic
64+ compare-exchange (CAS); which is cetainly not optimal on any
65+ sort of modern GPU - but it works in any C++-21 compliant
66+ compiler, so it's what we do for now */
67+ inline void atomic_min (double *ptr, double value)
68+ {
69+ #ifdef __NVCOMPILER
70+ # if 1
71+ double &mem = *ptr;
72+ if (mem <= value) return ;
73+ while (1 ) {
74+ double wasBefore;
75+ #pragma omp atomic capture
76+ { wasBefore = mem; mem = value; }
77+ if (wasBefore >= value) break ;
78+ value = wasBefore;
79+ }
80+ # else
81+ double current = *(volatile double *)ptr;
82+ while (current > value) {
83+ bool wasChanged
84+ = ((std::atomic<long long int >*)ptr)
85+ ->compare_exchange_weak ((long long int &)current,(long long int &)value);
86+ if (wasChanged) break ;
87+ }
88+ # endif
89+ #else
90+ double &x = *ptr;
91+ #pragma omp atomic compare
92+ if (x > value) { x = value; }
93+ // double t;
94+ // #pragma omp atomic capture
95+ // { t = *ptr; *ptr = std::min(t,value); }
96+ #endif
97+ }
98+
6399 /* ! iw - note: this implementation of atomic min/max via atomic
64100 compare-exchange (CAS); which is cetainly not optimal on any
65101 sort of modern GPU - but it works in any C++-21 compliant
@@ -96,6 +132,42 @@ namespace cuBQL {
96132#endif
97133 }
98134
135+ /* ! iw - note: this implementation of atomic min/max via atomic
136+ compare-exchange (CAS); which is cetainly not optimal on any
137+ sort of modern GPU - but it works in any C++-21 compliant
138+ compiler, so it's what we do for now */
139+ inline void atomic_max (double *ptr, double value)
140+ {
141+ #ifdef __NVCOMPILER
142+ # if 1
143+ double &mem = *ptr;
144+ if (mem >= value) return ;
145+ while (1 ) {
146+ double wasBefore;
147+ #pragma omp atomic capture
148+ { wasBefore = mem; mem = value; }
149+ if (wasBefore <= value) break ;
150+ value = wasBefore;
151+ }
152+ # else
153+ double current = *(volatile double *)ptr;
154+ while (current < value) {
155+ bool wasChanged
156+ = ((std::atomic<long long int >*)ptr)
157+ ->compare_exchange_weak ((long long int &)current,(long long int &)value);
158+ if (wasChanged) break ;
159+ }
160+ # endif
161+ #else
162+ double &x = *ptr;
163+ #pragma omp atomic compare
164+ if (x < value) { x = value; }
165+ // double t;
166+ // #pragma omp atomic capture
167+ // { t = *ptr; *ptr = std::max(t,value); }
168+ #endif
169+ }
170+
99171 template <typename T, int D>
100172 inline void v_atomic_min (vec_t <T,D> *ptr, vec_t <T,D> v);
101173 template <typename T, int D>
0 commit comments