xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_atomic.h (revision e64fe029e9d3ce476e77a478318e0c3cd201ff08)
1 /*
2  * kmp_atomic.h - ATOMIC header file
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_ATOMIC_H
14 #define KMP_ATOMIC_H
15 
16 #include "kmp_lock.h"
17 #include "kmp_os.h"
18 
19 #if OMPT_SUPPORT
20 #include "ompt-specific.h"
21 #endif
22 
23 // C++ build port.
24 // Intel compiler does not support _Complex datatype on win.
25 // Intel compiler supports _Complex datatype on lin and mac.
26 // On the other side, there is a problem of stack alignment on lin_32 and mac_32
27 // if the rhs is cmplx80 or cmplx128 typedef'ed datatype.
28 // The decision is: to use compiler supported _Complex type on lin and mac,
29 //                  to use typedef'ed types on win.
30 // Condition for WIN64 was modified in anticipation of 10.1 build compiler.
31 
32 #if defined(__cplusplus) && (KMP_OS_WINDOWS)
33 // create shortcuts for c99 complex types
34 
35 // Visual Studio cannot have function parameters that have the
36 // align __declspec attribute, so we must remove it. (Compiler Error C2719)
37 #if KMP_COMPILER_MSVC
38 #undef KMP_DO_ALIGN
39 #define KMP_DO_ALIGN(alignment) /* Nothing */
40 #endif
41 
42 #if defined(_MSC_VER) && (_MSC_VER < 1600) && defined(_DEBUG)
43 // Workaround for the problem of _DebugHeapTag unresolved external.
44 // This problem prevented to use our static debug library for C tests
45 // compiled with /MDd option (the library itself built with /MTd),
46 #undef _DEBUG
47 #define _DEBUG_TEMPORARILY_UNSET_
48 #endif
49 
50 #include <complex>
51 
52 template <typename type_lhs, typename type_rhs>
53 std::complex<type_lhs> __kmp_lhs_div_rhs(const std::complex<type_lhs> &lhs,
54                                          const std::complex<type_rhs> &rhs) {
55   type_lhs a = lhs.real();
56   type_lhs b = lhs.imag();
57   type_rhs c = rhs.real();
58   type_rhs d = rhs.imag();
59   type_rhs den = c * c + d * d;
60   type_rhs r = (a * c + b * d);
61   type_rhs i = (b * c - a * d);
62   std::complex<type_lhs> ret(r / den, i / den);
63   return ret;
64 }
65 
66 // complex8
67 struct __kmp_cmplx64_t : std::complex<double> {
68 
69   __kmp_cmplx64_t() : std::complex<double>() {}
70 
71   __kmp_cmplx64_t(const std::complex<double> &cd) : std::complex<double>(cd) {}
72 
73   void operator/=(const __kmp_cmplx64_t &rhs) {
74     std::complex<double> lhs = *this;
75     *this = __kmp_lhs_div_rhs(lhs, rhs);
76   }
77 
78   __kmp_cmplx64_t operator/(const __kmp_cmplx64_t &rhs) {
79     std::complex<double> lhs = *this;
80     return __kmp_lhs_div_rhs(lhs, rhs);
81   }
82 };
83 typedef struct __kmp_cmplx64_t kmp_cmplx64;
84 
85 // complex4
86 struct __kmp_cmplx32_t : std::complex<float> {
87 
88   __kmp_cmplx32_t() : std::complex<float>() {}
89 
90   __kmp_cmplx32_t(const std::complex<float> &cf) : std::complex<float>(cf) {}
91 
92   __kmp_cmplx32_t operator+(const __kmp_cmplx32_t &b) {
93     std::complex<float> lhs = *this;
94     std::complex<float> rhs = b;
95     return (lhs + rhs);
96   }
97   __kmp_cmplx32_t operator-(const __kmp_cmplx32_t &b) {
98     std::complex<float> lhs = *this;
99     std::complex<float> rhs = b;
100     return (lhs - rhs);
101   }
102   __kmp_cmplx32_t operator*(const __kmp_cmplx32_t &b) {
103     std::complex<float> lhs = *this;
104     std::complex<float> rhs = b;
105     return (lhs * rhs);
106   }
107 
108   __kmp_cmplx32_t operator+(const kmp_cmplx64 &b) {
109     kmp_cmplx64 t = kmp_cmplx64(*this) + b;
110     std::complex<double> d(t);
111     std::complex<float> f(d);
112     __kmp_cmplx32_t r(f);
113     return r;
114   }
115   __kmp_cmplx32_t operator-(const kmp_cmplx64 &b) {
116     kmp_cmplx64 t = kmp_cmplx64(*this) - b;
117     std::complex<double> d(t);
118     std::complex<float> f(d);
119     __kmp_cmplx32_t r(f);
120     return r;
121   }
122   __kmp_cmplx32_t operator*(const kmp_cmplx64 &b) {
123     kmp_cmplx64 t = kmp_cmplx64(*this) * b;
124     std::complex<double> d(t);
125     std::complex<float> f(d);
126     __kmp_cmplx32_t r(f);
127     return r;
128   }
129 
130   void operator/=(const __kmp_cmplx32_t &rhs) {
131     std::complex<float> lhs = *this;
132     *this = __kmp_lhs_div_rhs(lhs, rhs);
133   }
134 
135   __kmp_cmplx32_t operator/(const __kmp_cmplx32_t &rhs) {
136     std::complex<float> lhs = *this;
137     return __kmp_lhs_div_rhs(lhs, rhs);
138   }
139 
140   void operator/=(const kmp_cmplx64 &rhs) {
141     std::complex<float> lhs = *this;
142     *this = __kmp_lhs_div_rhs(lhs, rhs);
143   }
144 
145   __kmp_cmplx32_t operator/(const kmp_cmplx64 &rhs) {
146     std::complex<float> lhs = *this;
147     return __kmp_lhs_div_rhs(lhs, rhs);
148   }
149 };
150 typedef struct __kmp_cmplx32_t kmp_cmplx32;
151 
152 // complex10
153 struct KMP_DO_ALIGN(16) __kmp_cmplx80_t : std::complex<long double> {
154 
155   __kmp_cmplx80_t() : std::complex<long double>() {}
156 
157   __kmp_cmplx80_t(const std::complex<long double> &cld)
158       : std::complex<long double>(cld) {}
159 
160   void operator/=(const __kmp_cmplx80_t &rhs) {
161     std::complex<long double> lhs = *this;
162     *this = __kmp_lhs_div_rhs(lhs, rhs);
163   }
164 
165   __kmp_cmplx80_t operator/(const __kmp_cmplx80_t &rhs) {
166     std::complex<long double> lhs = *this;
167     return __kmp_lhs_div_rhs(lhs, rhs);
168   }
169 };
170 typedef KMP_DO_ALIGN(16) struct __kmp_cmplx80_t kmp_cmplx80;
171 
172 // complex16
173 #if KMP_HAVE_QUAD
174 struct __kmp_cmplx128_t : std::complex<_Quad> {
175 
176   __kmp_cmplx128_t() : std::complex<_Quad>() {}
177 
178   __kmp_cmplx128_t(const std::complex<_Quad> &cq) : std::complex<_Quad>(cq) {}
179 
180   void operator/=(const __kmp_cmplx128_t &rhs) {
181     std::complex<_Quad> lhs = *this;
182     *this = __kmp_lhs_div_rhs(lhs, rhs);
183   }
184 
185   __kmp_cmplx128_t operator/(const __kmp_cmplx128_t &rhs) {
186     std::complex<_Quad> lhs = *this;
187     return __kmp_lhs_div_rhs(lhs, rhs);
188   }
189 };
190 typedef struct __kmp_cmplx128_t kmp_cmplx128;
191 #endif /* KMP_HAVE_QUAD */
192 
193 #ifdef _DEBUG_TEMPORARILY_UNSET_
194 #undef _DEBUG_TEMPORARILY_UNSET_
195 // Set it back now
196 #define _DEBUG 1
197 #endif
198 
199 #else
200 // create shortcuts for c99 complex types
201 typedef float _Complex kmp_cmplx32;
202 typedef double _Complex kmp_cmplx64;
203 typedef long double _Complex kmp_cmplx80;
204 #if KMP_HAVE_QUAD
205 typedef _Quad _Complex kmp_cmplx128;
206 #endif
207 #endif
208 
209 // Compiler 12.0 changed alignment of 16 and 32-byte arguments (like _Quad
210 // and kmp_cmplx128) on IA-32 architecture. The following aligned structures
211 // are implemented to support the old alignment in 10.1, 11.0, 11.1 and
212 // introduce the new alignment in 12.0. See CQ88405.
213 #if KMP_ARCH_X86 && KMP_HAVE_QUAD
214 
215 // 4-byte aligned structures for backward compatibility.
216 
217 #pragma pack(push, 4)
218 
219 struct KMP_DO_ALIGN(4) Quad_a4_t {
220   _Quad q;
221 
222   Quad_a4_t() : q() {}
223   Quad_a4_t(const _Quad &cq) : q(cq) {}
224 
225   Quad_a4_t operator+(const Quad_a4_t &b) {
226     _Quad lhs = (*this).q;
227     _Quad rhs = b.q;
228     return (Quad_a4_t)(lhs + rhs);
229   }
230 
231   Quad_a4_t operator-(const Quad_a4_t &b) {
232     _Quad lhs = (*this).q;
233     _Quad rhs = b.q;
234     return (Quad_a4_t)(lhs - rhs);
235   }
236   Quad_a4_t operator*(const Quad_a4_t &b) {
237     _Quad lhs = (*this).q;
238     _Quad rhs = b.q;
239     return (Quad_a4_t)(lhs * rhs);
240   }
241 
242   Quad_a4_t operator/(const Quad_a4_t &b) {
243     _Quad lhs = (*this).q;
244     _Quad rhs = b.q;
245     return (Quad_a4_t)(lhs / rhs);
246   }
247 };
248 
249 struct KMP_DO_ALIGN(4) kmp_cmplx128_a4_t {
250   kmp_cmplx128 q;
251 
252   kmp_cmplx128_a4_t() : q() {}
253 
254 #if defined(__cplusplus) && (KMP_OS_WINDOWS)
255   kmp_cmplx128_a4_t(const std::complex<_Quad> &c128) : q(c128) {}
256 #endif
257   kmp_cmplx128_a4_t(const kmp_cmplx128 &c128) : q(c128) {}
258 
259   kmp_cmplx128_a4_t operator+(const kmp_cmplx128_a4_t &b) {
260     kmp_cmplx128 lhs = (*this).q;
261     kmp_cmplx128 rhs = b.q;
262     return (kmp_cmplx128_a4_t)(lhs + rhs);
263   }
264   kmp_cmplx128_a4_t operator-(const kmp_cmplx128_a4_t &b) {
265     kmp_cmplx128 lhs = (*this).q;
266     kmp_cmplx128 rhs = b.q;
267     return (kmp_cmplx128_a4_t)(lhs - rhs);
268   }
269   kmp_cmplx128_a4_t operator*(const kmp_cmplx128_a4_t &b) {
270     kmp_cmplx128 lhs = (*this).q;
271     kmp_cmplx128 rhs = b.q;
272     return (kmp_cmplx128_a4_t)(lhs * rhs);
273   }
274 
275   kmp_cmplx128_a4_t operator/(const kmp_cmplx128_a4_t &b) {
276     kmp_cmplx128 lhs = (*this).q;
277     kmp_cmplx128 rhs = b.q;
278     return (kmp_cmplx128_a4_t)(lhs / rhs);
279   }
280 };
281 
282 #pragma pack(pop)
283 
284 // New 16-byte aligned structures for 12.0 compiler.
285 struct KMP_DO_ALIGN(16) Quad_a16_t {
286   _Quad q;
287 
288   Quad_a16_t() : q() {}
289   Quad_a16_t(const _Quad &cq) : q(cq) {}
290 
291   Quad_a16_t operator+(const Quad_a16_t &b) {
292     _Quad lhs = (*this).q;
293     _Quad rhs = b.q;
294     return (Quad_a16_t)(lhs + rhs);
295   }
296 
297   Quad_a16_t operator-(const Quad_a16_t &b) {
298     _Quad lhs = (*this).q;
299     _Quad rhs = b.q;
300     return (Quad_a16_t)(lhs - rhs);
301   }
302   Quad_a16_t operator*(const Quad_a16_t &b) {
303     _Quad lhs = (*this).q;
304     _Quad rhs = b.q;
305     return (Quad_a16_t)(lhs * rhs);
306   }
307 
308   Quad_a16_t operator/(const Quad_a16_t &b) {
309     _Quad lhs = (*this).q;
310     _Quad rhs = b.q;
311     return (Quad_a16_t)(lhs / rhs);
312   }
313 };
314 
315 struct KMP_DO_ALIGN(16) kmp_cmplx128_a16_t {
316   kmp_cmplx128 q;
317 
318   kmp_cmplx128_a16_t() : q() {}
319 
320 #if defined(__cplusplus) && (KMP_OS_WINDOWS)
321   kmp_cmplx128_a16_t(const std::complex<_Quad> &c128) : q(c128) {}
322 #endif
323   kmp_cmplx128_a16_t(const kmp_cmplx128 &c128) : q(c128) {}
324 
325   kmp_cmplx128_a16_t operator+(const kmp_cmplx128_a16_t &b) {
326     kmp_cmplx128 lhs = (*this).q;
327     kmp_cmplx128 rhs = b.q;
328     return (kmp_cmplx128_a16_t)(lhs + rhs);
329   }
330   kmp_cmplx128_a16_t operator-(const kmp_cmplx128_a16_t &b) {
331     kmp_cmplx128 lhs = (*this).q;
332     kmp_cmplx128 rhs = b.q;
333     return (kmp_cmplx128_a16_t)(lhs - rhs);
334   }
335   kmp_cmplx128_a16_t operator*(const kmp_cmplx128_a16_t &b) {
336     kmp_cmplx128 lhs = (*this).q;
337     kmp_cmplx128 rhs = b.q;
338     return (kmp_cmplx128_a16_t)(lhs * rhs);
339   }
340 
341   kmp_cmplx128_a16_t operator/(const kmp_cmplx128_a16_t &b) {
342     kmp_cmplx128 lhs = (*this).q;
343     kmp_cmplx128 rhs = b.q;
344     return (kmp_cmplx128_a16_t)(lhs / rhs);
345   }
346 };
347 
348 #endif
349 
350 #if (KMP_ARCH_X86)
351 #define QUAD_LEGACY Quad_a4_t
352 #define CPLX128_LEG kmp_cmplx128_a4_t
353 #else
354 #define QUAD_LEGACY _Quad
355 #define CPLX128_LEG kmp_cmplx128
356 #endif
357 
358 #ifdef __cplusplus
359 extern "C" {
360 #endif
361 
362 extern int __kmp_atomic_mode;
363 
364 // Atomic locks can easily become contended, so we use queuing locks for them.
365 typedef kmp_queuing_lock_t kmp_atomic_lock_t;
366 
367 static inline void __kmp_acquire_atomic_lock(kmp_atomic_lock_t *lck,
368                                              kmp_int32 gtid) {
369 #if OMPT_SUPPORT && OMPT_OPTIONAL
370   if (ompt_enabled.ompt_callback_mutex_acquire) {
371     ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
372         ompt_mutex_atomic, 0, kmp_mutex_impl_queuing,
373         (ompt_wait_id_t)(uintptr_t)lck, OMPT_GET_RETURN_ADDRESS(0));
374   }
375 #endif
376 
377   __kmp_acquire_queuing_lock(lck, gtid);
378 
379 #if OMPT_SUPPORT && OMPT_OPTIONAL
380   if (ompt_enabled.ompt_callback_mutex_acquired) {
381     ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
382         ompt_mutex_atomic, (ompt_wait_id_t)(uintptr_t)lck,
383         OMPT_GET_RETURN_ADDRESS(0));
384   }
385 #endif
386 }
387 
388 static inline int __kmp_test_atomic_lock(kmp_atomic_lock_t *lck,
389                                          kmp_int32 gtid) {
390   return __kmp_test_queuing_lock(lck, gtid);
391 }
392 
393 static inline void __kmp_release_atomic_lock(kmp_atomic_lock_t *lck,
394                                              kmp_int32 gtid) {
395   __kmp_release_queuing_lock(lck, gtid);
396 #if OMPT_SUPPORT && OMPT_OPTIONAL
397   if (ompt_enabled.ompt_callback_mutex_released) {
398     ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
399         ompt_mutex_atomic, (ompt_wait_id_t)(uintptr_t)lck,
400         OMPT_GET_RETURN_ADDRESS(0));
401   }
402 #endif
403 }
404 
405 static inline void __kmp_init_atomic_lock(kmp_atomic_lock_t *lck) {
406   __kmp_init_queuing_lock(lck);
407 }
408 
409 static inline void __kmp_destroy_atomic_lock(kmp_atomic_lock_t *lck) {
410   __kmp_destroy_queuing_lock(lck);
411 }
412 
413 // Global Locks
414 extern kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded
415                                                atomics in Gnu compat mode   */
416 extern kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user
417                                                   coded atomics for 1-byte fixed
418                                                   data types */
419 extern kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user
420                                                   coded atomics for 2-byte fixed
421                                                   data types */
422 extern kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user
423                                                   coded atomics for 4-byte fixed
424                                                   data types */
425 extern kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user
426                                                   coded atomics for kmp_real32
427                                                   data type    */
428 extern kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user
429                                                   coded atomics for 8-byte fixed
430                                                   data types */
431 extern kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user
432                                                   coded atomics for kmp_real64
433                                                   data type    */
434 extern kmp_atomic_lock_t
435     __kmp_atomic_lock_8c; /* Control access to all user coded atomics for
436                              complex byte data type  */
437 extern kmp_atomic_lock_t
438     __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long
439                               double data type   */
440 extern kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user
441                                                    coded atomics for _Quad data
442                                                    type         */
443 extern kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user
444                                                    coded atomics for double
445                                                    complex data type*/
446 extern kmp_atomic_lock_t
447     __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long
448                               double complex type*/
449 extern kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user
450                                                    coded atomics for _Quad
451                                                    complex data type */
452 
453 //  Below routines for atomic UPDATE are listed
454 
455 // 1-byte
456 void __kmpc_atomic_fixed1_add(ident_t *id_ref, int gtid, char *lhs, char rhs);
457 void __kmpc_atomic_fixed1_andb(ident_t *id_ref, int gtid, char *lhs, char rhs);
458 void __kmpc_atomic_fixed1_div(ident_t *id_ref, int gtid, char *lhs, char rhs);
459 void __kmpc_atomic_fixed1u_div(ident_t *id_ref, int gtid, unsigned char *lhs,
460                                unsigned char rhs);
461 void __kmpc_atomic_fixed1_mul(ident_t *id_ref, int gtid, char *lhs, char rhs);
462 void __kmpc_atomic_fixed1_orb(ident_t *id_ref, int gtid, char *lhs, char rhs);
463 void __kmpc_atomic_fixed1_shl(ident_t *id_ref, int gtid, char *lhs, char rhs);
464 void __kmpc_atomic_fixed1_shr(ident_t *id_ref, int gtid, char *lhs, char rhs);
465 void __kmpc_atomic_fixed1u_shr(ident_t *id_ref, int gtid, unsigned char *lhs,
466                                unsigned char rhs);
467 void __kmpc_atomic_fixed1_sub(ident_t *id_ref, int gtid, char *lhs, char rhs);
468 void __kmpc_atomic_fixed1_xor(ident_t *id_ref, int gtid, char *lhs, char rhs);
469 // 2-byte
470 void __kmpc_atomic_fixed2_add(ident_t *id_ref, int gtid, short *lhs, short rhs);
471 void __kmpc_atomic_fixed2_andb(ident_t *id_ref, int gtid, short *lhs,
472                                short rhs);
473 void __kmpc_atomic_fixed2_div(ident_t *id_ref, int gtid, short *lhs, short rhs);
474 void __kmpc_atomic_fixed2u_div(ident_t *id_ref, int gtid, unsigned short *lhs,
475                                unsigned short rhs);
476 void __kmpc_atomic_fixed2_mul(ident_t *id_ref, int gtid, short *lhs, short rhs);
477 void __kmpc_atomic_fixed2_orb(ident_t *id_ref, int gtid, short *lhs, short rhs);
478 void __kmpc_atomic_fixed2_shl(ident_t *id_ref, int gtid, short *lhs, short rhs);
479 void __kmpc_atomic_fixed2_shr(ident_t *id_ref, int gtid, short *lhs, short rhs);
480 void __kmpc_atomic_fixed2u_shr(ident_t *id_ref, int gtid, unsigned short *lhs,
481                                unsigned short rhs);
482 void __kmpc_atomic_fixed2_sub(ident_t *id_ref, int gtid, short *lhs, short rhs);
483 void __kmpc_atomic_fixed2_xor(ident_t *id_ref, int gtid, short *lhs, short rhs);
484 // 4-byte add / sub fixed
485 void __kmpc_atomic_fixed4_add(ident_t *id_ref, int gtid, kmp_int32 *lhs,
486                               kmp_int32 rhs);
487 void __kmpc_atomic_fixed4_sub(ident_t *id_ref, int gtid, kmp_int32 *lhs,
488                               kmp_int32 rhs);
489 // 4-byte add / sub float
490 void __kmpc_atomic_float4_add(ident_t *id_ref, int gtid, kmp_real32 *lhs,
491                               kmp_real32 rhs);
492 void __kmpc_atomic_float4_sub(ident_t *id_ref, int gtid, kmp_real32 *lhs,
493                               kmp_real32 rhs);
494 // 8-byte add / sub fixed
495 void __kmpc_atomic_fixed8_add(ident_t *id_ref, int gtid, kmp_int64 *lhs,
496                               kmp_int64 rhs);
497 void __kmpc_atomic_fixed8_sub(ident_t *id_ref, int gtid, kmp_int64 *lhs,
498                               kmp_int64 rhs);
499 // 8-byte add / sub float
500 void __kmpc_atomic_float8_add(ident_t *id_ref, int gtid, kmp_real64 *lhs,
501                               kmp_real64 rhs);
502 void __kmpc_atomic_float8_sub(ident_t *id_ref, int gtid, kmp_real64 *lhs,
503                               kmp_real64 rhs);
504 // 4-byte fixed
505 void __kmpc_atomic_fixed4_andb(ident_t *id_ref, int gtid, kmp_int32 *lhs,
506                                kmp_int32 rhs);
507 void __kmpc_atomic_fixed4_div(ident_t *id_ref, int gtid, kmp_int32 *lhs,
508                               kmp_int32 rhs);
509 void __kmpc_atomic_fixed4u_div(ident_t *id_ref, int gtid, kmp_uint32 *lhs,
510                                kmp_uint32 rhs);
511 void __kmpc_atomic_fixed4_mul(ident_t *id_ref, int gtid, kmp_int32 *lhs,
512                               kmp_int32 rhs);
513 void __kmpc_atomic_fixed4_orb(ident_t *id_ref, int gtid, kmp_int32 *lhs,
514                               kmp_int32 rhs);
515 void __kmpc_atomic_fixed4_shl(ident_t *id_ref, int gtid, kmp_int32 *lhs,
516                               kmp_int32 rhs);
517 void __kmpc_atomic_fixed4_shr(ident_t *id_ref, int gtid, kmp_int32 *lhs,
518                               kmp_int32 rhs);
519 void __kmpc_atomic_fixed4u_shr(ident_t *id_ref, int gtid, kmp_uint32 *lhs,
520                                kmp_uint32 rhs);
521 void __kmpc_atomic_fixed4_xor(ident_t *id_ref, int gtid, kmp_int32 *lhs,
522                               kmp_int32 rhs);
523 // 8-byte fixed
524 void __kmpc_atomic_fixed8_andb(ident_t *id_ref, int gtid, kmp_int64 *lhs,
525                                kmp_int64 rhs);
526 void __kmpc_atomic_fixed8_div(ident_t *id_ref, int gtid, kmp_int64 *lhs,
527                               kmp_int64 rhs);
528 void __kmpc_atomic_fixed8u_div(ident_t *id_ref, int gtid, kmp_uint64 *lhs,
529                                kmp_uint64 rhs);
530 void __kmpc_atomic_fixed8_mul(ident_t *id_ref, int gtid, kmp_int64 *lhs,
531                               kmp_int64 rhs);
532 void __kmpc_atomic_fixed8_orb(ident_t *id_ref, int gtid, kmp_int64 *lhs,
533                               kmp_int64 rhs);
534 void __kmpc_atomic_fixed8_shl(ident_t *id_ref, int gtid, kmp_int64 *lhs,
535                               kmp_int64 rhs);
536 void __kmpc_atomic_fixed8_shr(ident_t *id_ref, int gtid, kmp_int64 *lhs,
537                               kmp_int64 rhs);
538 void __kmpc_atomic_fixed8u_shr(ident_t *id_ref, int gtid, kmp_uint64 *lhs,
539                                kmp_uint64 rhs);
540 void __kmpc_atomic_fixed8_xor(ident_t *id_ref, int gtid, kmp_int64 *lhs,
541                               kmp_int64 rhs);
542 // 4-byte float
543 void __kmpc_atomic_float4_div(ident_t *id_ref, int gtid, kmp_real32 *lhs,
544                               kmp_real32 rhs);
545 void __kmpc_atomic_float4_mul(ident_t *id_ref, int gtid, kmp_real32 *lhs,
546                               kmp_real32 rhs);
547 // 8-byte float
548 void __kmpc_atomic_float8_div(ident_t *id_ref, int gtid, kmp_real64 *lhs,
549                               kmp_real64 rhs);
550 void __kmpc_atomic_float8_mul(ident_t *id_ref, int gtid, kmp_real64 *lhs,
551                               kmp_real64 rhs);
552 // 1-, 2-, 4-, 8-byte logical (&&, ||)
553 void __kmpc_atomic_fixed1_andl(ident_t *id_ref, int gtid, char *lhs, char rhs);
554 void __kmpc_atomic_fixed1_orl(ident_t *id_ref, int gtid, char *lhs, char rhs);
555 void __kmpc_atomic_fixed2_andl(ident_t *id_ref, int gtid, short *lhs,
556                                short rhs);
557 void __kmpc_atomic_fixed2_orl(ident_t *id_ref, int gtid, short *lhs, short rhs);
558 void __kmpc_atomic_fixed4_andl(ident_t *id_ref, int gtid, kmp_int32 *lhs,
559                                kmp_int32 rhs);
560 void __kmpc_atomic_fixed4_orl(ident_t *id_ref, int gtid, kmp_int32 *lhs,
561                               kmp_int32 rhs);
562 void __kmpc_atomic_fixed8_andl(ident_t *id_ref, int gtid, kmp_int64 *lhs,
563                                kmp_int64 rhs);
564 void __kmpc_atomic_fixed8_orl(ident_t *id_ref, int gtid, kmp_int64 *lhs,
565                               kmp_int64 rhs);
566 // MIN / MAX
567 void __kmpc_atomic_fixed1_max(ident_t *id_ref, int gtid, char *lhs, char rhs);
568 void __kmpc_atomic_fixed1_min(ident_t *id_ref, int gtid, char *lhs, char rhs);
569 void __kmpc_atomic_fixed2_max(ident_t *id_ref, int gtid, short *lhs, short rhs);
570 void __kmpc_atomic_fixed2_min(ident_t *id_ref, int gtid, short *lhs, short rhs);
571 void __kmpc_atomic_fixed4_max(ident_t *id_ref, int gtid, kmp_int32 *lhs,
572                               kmp_int32 rhs);
573 void __kmpc_atomic_fixed4_min(ident_t *id_ref, int gtid, kmp_int32 *lhs,
574                               kmp_int32 rhs);
575 void __kmpc_atomic_fixed8_max(ident_t *id_ref, int gtid, kmp_int64 *lhs,
576                               kmp_int64 rhs);
577 void __kmpc_atomic_fixed8_min(ident_t *id_ref, int gtid, kmp_int64 *lhs,
578                               kmp_int64 rhs);
579 void __kmpc_atomic_float4_max(ident_t *id_ref, int gtid, kmp_real32 *lhs,
580                               kmp_real32 rhs);
581 void __kmpc_atomic_float4_min(ident_t *id_ref, int gtid, kmp_real32 *lhs,
582                               kmp_real32 rhs);
583 void __kmpc_atomic_float8_max(ident_t *id_ref, int gtid, kmp_real64 *lhs,
584                               kmp_real64 rhs);
585 void __kmpc_atomic_float8_min(ident_t *id_ref, int gtid, kmp_real64 *lhs,
586                               kmp_real64 rhs);
587 void __kmpc_atomic_float10_max(ident_t *id_ref, int gtid, long double *lhs,
588                                long double rhs);
589 void __kmpc_atomic_float10_min(ident_t *id_ref, int gtid, long double *lhs,
590                                long double rhs);
591 #if KMP_HAVE_QUAD
592 void __kmpc_atomic_float16_max(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs,
593                                QUAD_LEGACY rhs);
594 void __kmpc_atomic_float16_min(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs,
595                                QUAD_LEGACY rhs);
596 #if (KMP_ARCH_X86)
597 // Routines with 16-byte arguments aligned to 16-byte boundary; IA-32
598 // architecture only
599 void __kmpc_atomic_float16_max_a16(ident_t *id_ref, int gtid, Quad_a16_t *lhs,
600                                    Quad_a16_t rhs);
601 void __kmpc_atomic_float16_min_a16(ident_t *id_ref, int gtid, Quad_a16_t *lhs,
602                                    Quad_a16_t rhs);
603 #endif
604 #endif
605 // .NEQV. (same as xor)
606 void __kmpc_atomic_fixed1_neqv(ident_t *id_ref, int gtid, char *lhs, char rhs);
607 void __kmpc_atomic_fixed2_neqv(ident_t *id_ref, int gtid, short *lhs,
608                                short rhs);
609 void __kmpc_atomic_fixed4_neqv(ident_t *id_ref, int gtid, kmp_int32 *lhs,
610                                kmp_int32 rhs);
611 void __kmpc_atomic_fixed8_neqv(ident_t *id_ref, int gtid, kmp_int64 *lhs,
612                                kmp_int64 rhs);
613 // .EQV. (same as ~xor)
614 void __kmpc_atomic_fixed1_eqv(ident_t *id_ref, int gtid, char *lhs, char rhs);
615 void __kmpc_atomic_fixed2_eqv(ident_t *id_ref, int gtid, short *lhs, short rhs);
616 void __kmpc_atomic_fixed4_eqv(ident_t *id_ref, int gtid, kmp_int32 *lhs,
617                               kmp_int32 rhs);
618 void __kmpc_atomic_fixed8_eqv(ident_t *id_ref, int gtid, kmp_int64 *lhs,
619                               kmp_int64 rhs);
620 // long double type
621 void __kmpc_atomic_float10_add(ident_t *id_ref, int gtid, long double *lhs,
622                                long double rhs);
623 void __kmpc_atomic_float10_sub(ident_t *id_ref, int gtid, long double *lhs,
624                                long double rhs);
625 void __kmpc_atomic_float10_mul(ident_t *id_ref, int gtid, long double *lhs,
626                                long double rhs);
627 void __kmpc_atomic_float10_div(ident_t *id_ref, int gtid, long double *lhs,
628                                long double rhs);
629 // _Quad type
630 #if KMP_HAVE_QUAD
631 void __kmpc_atomic_float16_add(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs,
632                                QUAD_LEGACY rhs);
633 void __kmpc_atomic_float16_sub(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs,
634                                QUAD_LEGACY rhs);
635 void __kmpc_atomic_float16_mul(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs,
636                                QUAD_LEGACY rhs);
637 void __kmpc_atomic_float16_div(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs,
638                                QUAD_LEGACY rhs);
639 #if (KMP_ARCH_X86)
640 // Routines with 16-byte arguments aligned to 16-byte boundary
641 void __kmpc_atomic_float16_add_a16(ident_t *id_ref, int gtid, Quad_a16_t *lhs,
642                                    Quad_a16_t rhs);
643 void __kmpc_atomic_float16_sub_a16(ident_t *id_ref, int gtid, Quad_a16_t *lhs,
644                                    Quad_a16_t rhs);
645 void __kmpc_atomic_float16_mul_a16(ident_t *id_ref, int gtid, Quad_a16_t *lhs,
646                                    Quad_a16_t rhs);
647 void __kmpc_atomic_float16_div_a16(ident_t *id_ref, int gtid, Quad_a16_t *lhs,
648                                    Quad_a16_t rhs);
649 #endif
650 #endif
651 // routines for complex types
652 void __kmpc_atomic_cmplx4_add(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs,
653                               kmp_cmplx32 rhs);
654 void __kmpc_atomic_cmplx4_sub(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs,
655                               kmp_cmplx32 rhs);
656 void __kmpc_atomic_cmplx4_mul(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs,
657                               kmp_cmplx32 rhs);
658 void __kmpc_atomic_cmplx4_div(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs,
659                               kmp_cmplx32 rhs);
660 void __kmpc_atomic_cmplx8_add(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs,
661                               kmp_cmplx64 rhs);
662 void __kmpc_atomic_cmplx8_sub(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs,
663                               kmp_cmplx64 rhs);
664 void __kmpc_atomic_cmplx8_mul(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs,
665                               kmp_cmplx64 rhs);
666 void __kmpc_atomic_cmplx8_div(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs,
667                               kmp_cmplx64 rhs);
668 void __kmpc_atomic_cmplx10_add(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs,
669                                kmp_cmplx80 rhs);
670 void __kmpc_atomic_cmplx10_sub(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs,
671                                kmp_cmplx80 rhs);
672 void __kmpc_atomic_cmplx10_mul(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs,
673                                kmp_cmplx80 rhs);
674 void __kmpc_atomic_cmplx10_div(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs,
675                                kmp_cmplx80 rhs);
676 #if KMP_HAVE_QUAD
677 void __kmpc_atomic_cmplx16_add(ident_t *id_ref, int gtid, CPLX128_LEG *lhs,
678                                CPLX128_LEG rhs);
679 void __kmpc_atomic_cmplx16_sub(ident_t *id_ref, int gtid, CPLX128_LEG *lhs,
680                                CPLX128_LEG rhs);
681 void __kmpc_atomic_cmplx16_mul(ident_t *id_ref, int gtid, CPLX128_LEG *lhs,
682                                CPLX128_LEG rhs);
683 void __kmpc_atomic_cmplx16_div(ident_t *id_ref, int gtid, CPLX128_LEG *lhs,
684                                CPLX128_LEG rhs);
685 #if (KMP_ARCH_X86)
686 // Routines with 16-byte arguments aligned to 16-byte boundary
687 void __kmpc_atomic_cmplx16_add_a16(ident_t *id_ref, int gtid,
688                                    kmp_cmplx128_a16_t *lhs,
689                                    kmp_cmplx128_a16_t rhs);
690 void __kmpc_atomic_cmplx16_sub_a16(ident_t *id_ref, int gtid,
691                                    kmp_cmplx128_a16_t *lhs,
692                                    kmp_cmplx128_a16_t rhs);
693 void __kmpc_atomic_cmplx16_mul_a16(ident_t *id_ref, int gtid,
694                                    kmp_cmplx128_a16_t *lhs,
695                                    kmp_cmplx128_a16_t rhs);
696 void __kmpc_atomic_cmplx16_div_a16(ident_t *id_ref, int gtid,
697                                    kmp_cmplx128_a16_t *lhs,
698                                    kmp_cmplx128_a16_t rhs);
699 #endif
700 #endif
701 
702 // OpenMP 4.0: x = expr binop x for non-commutative operations.
703 // Supported only on IA-32 architecture and Intel(R) 64
704 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
705 
706 void __kmpc_atomic_fixed1_sub_rev(ident_t *id_ref, int gtid, char *lhs,
707                                   char rhs);
708 void __kmpc_atomic_fixed1_div_rev(ident_t *id_ref, int gtid, char *lhs,
709                                   char rhs);
710 void __kmpc_atomic_fixed1u_div_rev(ident_t *id_ref, int gtid,
711                                    unsigned char *lhs, unsigned char rhs);
712 void __kmpc_atomic_fixed1_shl_rev(ident_t *id_ref, int gtid, char *lhs,
713                                   char rhs);
714 void __kmpc_atomic_fixed1_shr_rev(ident_t *id_ref, int gtid, char *lhs,
715                                   char rhs);
716 void __kmpc_atomic_fixed1u_shr_rev(ident_t *id_ref, int gtid,
717                                    unsigned char *lhs, unsigned char rhs);
718 void __kmpc_atomic_fixed2_sub_rev(ident_t *id_ref, int gtid, short *lhs,
719                                   short rhs);
720 void __kmpc_atomic_fixed2_div_rev(ident_t *id_ref, int gtid, short *lhs,
721                                   short rhs);
722 void __kmpc_atomic_fixed2u_div_rev(ident_t *id_ref, int gtid,
723                                    unsigned short *lhs, unsigned short rhs);
724 void __kmpc_atomic_fixed2_shl_rev(ident_t *id_ref, int gtid, short *lhs,
725                                   short rhs);
726 void __kmpc_atomic_fixed2_shr_rev(ident_t *id_ref, int gtid, short *lhs,
727                                   short rhs);
728 void __kmpc_atomic_fixed2u_shr_rev(ident_t *id_ref, int gtid,
729                                    unsigned short *lhs, unsigned short rhs);
730 void __kmpc_atomic_fixed4_sub_rev(ident_t *id_ref, int gtid, kmp_int32 *lhs,
731                                   kmp_int32 rhs);
732 void __kmpc_atomic_fixed4_div_rev(ident_t *id_ref, int gtid, kmp_int32 *lhs,
733                                   kmp_int32 rhs);
734 void __kmpc_atomic_fixed4u_div_rev(ident_t *id_ref, int gtid, kmp_uint32 *lhs,
735                                    kmp_uint32 rhs);
736 void __kmpc_atomic_fixed4_shl_rev(ident_t *id_ref, int gtid, kmp_int32 *lhs,
737                                   kmp_int32 rhs);
738 void __kmpc_atomic_fixed4_shr_rev(ident_t *id_ref, int gtid, kmp_int32 *lhs,
739                                   kmp_int32 rhs);
740 void __kmpc_atomic_fixed4u_shr_rev(ident_t *id_ref, int gtid, kmp_uint32 *lhs,
741                                    kmp_uint32 rhs);
742 void __kmpc_atomic_fixed8_sub_rev(ident_t *id_ref, int gtid, kmp_int64 *lhs,
743                                   kmp_int64 rhs);
744 void __kmpc_atomic_fixed8_div_rev(ident_t *id_ref, int gtid, kmp_int64 *lhs,
745                                   kmp_int64 rhs);
746 void __kmpc_atomic_fixed8u_div_rev(ident_t *id_ref, int gtid, kmp_uint64 *lhs,
747                                    kmp_uint64 rhs);
748 void __kmpc_atomic_fixed8_shl_rev(ident_t *id_ref, int gtid, kmp_int64 *lhs,
749                                   kmp_int64 rhs);
750 void __kmpc_atomic_fixed8_shr_rev(ident_t *id_ref, int gtid, kmp_int64 *lhs,
751                                   kmp_int64 rhs);
752 void __kmpc_atomic_fixed8u_shr_rev(ident_t *id_ref, int gtid, kmp_uint64 *lhs,
753                                    kmp_uint64 rhs);
754 void __kmpc_atomic_float4_sub_rev(ident_t *id_ref, int gtid, float *lhs,
755                                   float rhs);
756 void __kmpc_atomic_float4_div_rev(ident_t *id_ref, int gtid, float *lhs,
757                                   float rhs);
758 void __kmpc_atomic_float8_sub_rev(ident_t *id_ref, int gtid, double *lhs,
759                                   double rhs);
760 void __kmpc_atomic_float8_div_rev(ident_t *id_ref, int gtid, double *lhs,
761                                   double rhs);
762 void __kmpc_atomic_float10_sub_rev(ident_t *id_ref, int gtid, long double *lhs,
763                                    long double rhs);
764 void __kmpc_atomic_float10_div_rev(ident_t *id_ref, int gtid, long double *lhs,
765                                    long double rhs);
766 #if KMP_HAVE_QUAD
767 void __kmpc_atomic_float16_sub_rev(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs,
768                                    QUAD_LEGACY rhs);
769 void __kmpc_atomic_float16_div_rev(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs,
770                                    QUAD_LEGACY rhs);
771 #endif
772 void __kmpc_atomic_cmplx4_sub_rev(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs,
773                                   kmp_cmplx32 rhs);
774 void __kmpc_atomic_cmplx4_div_rev(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs,
775                                   kmp_cmplx32 rhs);
776 void __kmpc_atomic_cmplx8_sub_rev(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs,
777                                   kmp_cmplx64 rhs);
778 void __kmpc_atomic_cmplx8_div_rev(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs,
779                                   kmp_cmplx64 rhs);
780 void __kmpc_atomic_cmplx10_sub_rev(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs,
781                                    kmp_cmplx80 rhs);
782 void __kmpc_atomic_cmplx10_div_rev(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs,
783                                    kmp_cmplx80 rhs);
784 #if KMP_HAVE_QUAD
785 void __kmpc_atomic_cmplx16_sub_rev(ident_t *id_ref, int gtid, CPLX128_LEG *lhs,
786                                    CPLX128_LEG rhs);
787 void __kmpc_atomic_cmplx16_div_rev(ident_t *id_ref, int gtid, CPLX128_LEG *lhs,
788                                    CPLX128_LEG rhs);
789 #if (KMP_ARCH_X86)
790 // Routines with 16-byte arguments aligned to 16-byte boundary
791 void __kmpc_atomic_float16_sub_a16_rev(ident_t *id_ref, int gtid,
792                                        Quad_a16_t *lhs, Quad_a16_t rhs);
793 void __kmpc_atomic_float16_div_a16_rev(ident_t *id_ref, int gtid,
794                                        Quad_a16_t *lhs, Quad_a16_t rhs);
795 void __kmpc_atomic_cmplx16_sub_a16_rev(ident_t *id_ref, int gtid,
796                                        kmp_cmplx128_a16_t *lhs,
797                                        kmp_cmplx128_a16_t rhs);
798 void __kmpc_atomic_cmplx16_div_a16_rev(ident_t *id_ref, int gtid,
799                                        kmp_cmplx128_a16_t *lhs,
800                                        kmp_cmplx128_a16_t rhs);
801 #endif
802 #endif // KMP_HAVE_QUAD
803 
804 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
805 
806 // routines for mixed types
807 
808 // RHS=float8
809 void __kmpc_atomic_fixed1_mul_float8(ident_t *id_ref, int gtid, char *lhs,
810                                      kmp_real64 rhs);
811 void __kmpc_atomic_fixed1_div_float8(ident_t *id_ref, int gtid, char *lhs,
812                                      kmp_real64 rhs);
813 void __kmpc_atomic_fixed2_mul_float8(ident_t *id_ref, int gtid, short *lhs,
814                                      kmp_real64 rhs);
815 void __kmpc_atomic_fixed2_div_float8(ident_t *id_ref, int gtid, short *lhs,
816                                      kmp_real64 rhs);
817 void __kmpc_atomic_fixed4_mul_float8(ident_t *id_ref, int gtid, kmp_int32 *lhs,
818                                      kmp_real64 rhs);
819 void __kmpc_atomic_fixed4_div_float8(ident_t *id_ref, int gtid, kmp_int32 *lhs,
820                                      kmp_real64 rhs);
821 void __kmpc_atomic_fixed8_mul_float8(ident_t *id_ref, int gtid, kmp_int64 *lhs,
822                                      kmp_real64 rhs);
823 void __kmpc_atomic_fixed8_div_float8(ident_t *id_ref, int gtid, kmp_int64 *lhs,
824                                      kmp_real64 rhs);
825 void __kmpc_atomic_float4_add_float8(ident_t *id_ref, int gtid, kmp_real32 *lhs,
826                                      kmp_real64 rhs);
827 void __kmpc_atomic_float4_sub_float8(ident_t *id_ref, int gtid, kmp_real32 *lhs,
828                                      kmp_real64 rhs);
829 void __kmpc_atomic_float4_mul_float8(ident_t *id_ref, int gtid, kmp_real32 *lhs,
830                                      kmp_real64 rhs);
831 void __kmpc_atomic_float4_div_float8(ident_t *id_ref, int gtid, kmp_real32 *lhs,
832                                      kmp_real64 rhs);
833 
834 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
835 // use them)
836 #if KMP_HAVE_QUAD
837 void __kmpc_atomic_fixed1_add_fp(ident_t *id_ref, int gtid, char *lhs,
838                                  _Quad rhs);
839 void __kmpc_atomic_fixed1u_add_fp(ident_t *id_ref, int gtid, unsigned char *lhs,
840                                   _Quad rhs);
841 void __kmpc_atomic_fixed1_sub_fp(ident_t *id_ref, int gtid, char *lhs,
842                                  _Quad rhs);
843 void __kmpc_atomic_fixed1u_sub_fp(ident_t *id_ref, int gtid, unsigned char *lhs,
844                                   _Quad rhs);
845 void __kmpc_atomic_fixed1_mul_fp(ident_t *id_ref, int gtid, char *lhs,
846                                  _Quad rhs);
847 void __kmpc_atomic_fixed1u_mul_fp(ident_t *id_ref, int gtid, unsigned char *lhs,
848                                   _Quad rhs);
849 void __kmpc_atomic_fixed1_div_fp(ident_t *id_ref, int gtid, char *lhs,
850                                  _Quad rhs);
851 void __kmpc_atomic_fixed1u_div_fp(ident_t *id_ref, int gtid, unsigned char *lhs,
852                                   _Quad rhs);
853 
854 void __kmpc_atomic_fixed2_add_fp(ident_t *id_ref, int gtid, short *lhs,
855                                  _Quad rhs);
856 void __kmpc_atomic_fixed2u_add_fp(ident_t *id_ref, int gtid,
857                                   unsigned short *lhs, _Quad rhs);
858 void __kmpc_atomic_fixed2_sub_fp(ident_t *id_ref, int gtid, short *lhs,
859                                  _Quad rhs);
860 void __kmpc_atomic_fixed2u_sub_fp(ident_t *id_ref, int gtid,
861                                   unsigned short *lhs, _Quad rhs);
862 void __kmpc_atomic_fixed2_mul_fp(ident_t *id_ref, int gtid, short *lhs,
863                                  _Quad rhs);
864 void __kmpc_atomic_fixed2u_mul_fp(ident_t *id_ref, int gtid,
865                                   unsigned short *lhs, _Quad rhs);
866 void __kmpc_atomic_fixed2_div_fp(ident_t *id_ref, int gtid, short *lhs,
867                                  _Quad rhs);
868 void __kmpc_atomic_fixed2u_div_fp(ident_t *id_ref, int gtid,
869                                   unsigned short *lhs, _Quad rhs);
870 
871 void __kmpc_atomic_fixed4_add_fp(ident_t *id_ref, int gtid, kmp_int32 *lhs,
872                                  _Quad rhs);
873 void __kmpc_atomic_fixed4u_add_fp(ident_t *id_ref, int gtid, kmp_uint32 *lhs,
874                                   _Quad rhs);
875 void __kmpc_atomic_fixed4_sub_fp(ident_t *id_ref, int gtid, kmp_int32 *lhs,
876                                  _Quad rhs);
877 void __kmpc_atomic_fixed4u_sub_fp(ident_t *id_ref, int gtid, kmp_uint32 *lhs,
878                                   _Quad rhs);
879 void __kmpc_atomic_fixed4_mul_fp(ident_t *id_ref, int gtid, kmp_int32 *lhs,
880                                  _Quad rhs);
881 void __kmpc_atomic_fixed4u_mul_fp(ident_t *id_ref, int gtid, kmp_uint32 *lhs,
882                                   _Quad rhs);
883 void __kmpc_atomic_fixed4_div_fp(ident_t *id_ref, int gtid, kmp_int32 *lhs,
884                                  _Quad rhs);
885 void __kmpc_atomic_fixed4u_div_fp(ident_t *id_ref, int gtid, kmp_uint32 *lhs,
886                                   _Quad rhs);
887 
888 void __kmpc_atomic_fixed8_add_fp(ident_t *id_ref, int gtid, kmp_int64 *lhs,
889                                  _Quad rhs);
890 void __kmpc_atomic_fixed8u_add_fp(ident_t *id_ref, int gtid, kmp_uint64 *lhs,
891                                   _Quad rhs);
892 void __kmpc_atomic_fixed8_sub_fp(ident_t *id_ref, int gtid, kmp_int64 *lhs,
893                                  _Quad rhs);
894 void __kmpc_atomic_fixed8u_sub_fp(ident_t *id_ref, int gtid, kmp_uint64 *lhs,
895                                   _Quad rhs);
896 void __kmpc_atomic_fixed8_mul_fp(ident_t *id_ref, int gtid, kmp_int64 *lhs,
897                                  _Quad rhs);
898 void __kmpc_atomic_fixed8u_mul_fp(ident_t *id_ref, int gtid, kmp_uint64 *lhs,
899                                   _Quad rhs);
900 void __kmpc_atomic_fixed8_div_fp(ident_t *id_ref, int gtid, kmp_int64 *lhs,
901                                  _Quad rhs);
902 void __kmpc_atomic_fixed8u_div_fp(ident_t *id_ref, int gtid, kmp_uint64 *lhs,
903                                   _Quad rhs);
904 
905 void __kmpc_atomic_float4_add_fp(ident_t *id_ref, int gtid, kmp_real32 *lhs,
906                                  _Quad rhs);
907 void __kmpc_atomic_float4_sub_fp(ident_t *id_ref, int gtid, kmp_real32 *lhs,
908                                  _Quad rhs);
909 void __kmpc_atomic_float4_mul_fp(ident_t *id_ref, int gtid, kmp_real32 *lhs,
910                                  _Quad rhs);
911 void __kmpc_atomic_float4_div_fp(ident_t *id_ref, int gtid, kmp_real32 *lhs,
912                                  _Quad rhs);
913 
914 void __kmpc_atomic_float8_add_fp(ident_t *id_ref, int gtid, kmp_real64 *lhs,
915                                  _Quad rhs);
916 void __kmpc_atomic_float8_sub_fp(ident_t *id_ref, int gtid, kmp_real64 *lhs,
917                                  _Quad rhs);
918 void __kmpc_atomic_float8_mul_fp(ident_t *id_ref, int gtid, kmp_real64 *lhs,
919                                  _Quad rhs);
920 void __kmpc_atomic_float8_div_fp(ident_t *id_ref, int gtid, kmp_real64 *lhs,
921                                  _Quad rhs);
922 
923 void __kmpc_atomic_float10_add_fp(ident_t *id_ref, int gtid, long double *lhs,
924                                   _Quad rhs);
925 void __kmpc_atomic_float10_sub_fp(ident_t *id_ref, int gtid, long double *lhs,
926                                   _Quad rhs);
927 void __kmpc_atomic_float10_mul_fp(ident_t *id_ref, int gtid, long double *lhs,
928                                   _Quad rhs);
929 void __kmpc_atomic_float10_div_fp(ident_t *id_ref, int gtid, long double *lhs,
930                                   _Quad rhs);
931 
932 // Reverse operations
933 void __kmpc_atomic_fixed1_sub_rev_fp(ident_t *id_ref, int gtid, char *lhs,
934                                      _Quad rhs);
935 void __kmpc_atomic_fixed1u_sub_rev_fp(ident_t *id_ref, int gtid,
936                                       unsigned char *lhs, _Quad rhs);
937 void __kmpc_atomic_fixed1_div_rev_fp(ident_t *id_ref, int gtid, char *lhs,
938                                      _Quad rhs);
939 void __kmpc_atomic_fixed1u_div_rev_fp(ident_t *id_ref, int gtid,
940                                       unsigned char *lhs, _Quad rhs);
941 void __kmpc_atomic_fixed2_sub_rev_fp(ident_t *id_ref, int gtid, short *lhs,
942                                      _Quad rhs);
943 void __kmpc_atomic_fixed2u_sub_rev_fp(ident_t *id_ref, int gtid,
944                                       unsigned short *lhs, _Quad rhs);
945 void __kmpc_atomic_fixed2_div_rev_fp(ident_t *id_ref, int gtid, short *lhs,
946                                      _Quad rhs);
947 void __kmpc_atomic_fixed2u_div_rev_fp(ident_t *id_ref, int gtid,
948                                       unsigned short *lhs, _Quad rhs);
949 void __kmpc_atomic_fixed4_sub_rev_fp(ident_t *id_ref, int gtid, kmp_int32 *lhs,
950                                      _Quad rhs);
951 void __kmpc_atomic_fixed4u_sub_rev_fp(ident_t *id_ref, int gtid,
952                                       kmp_uint32 *lhs, _Quad rhs);
953 void __kmpc_atomic_fixed4_div_rev_fp(ident_t *id_ref, int gtid, kmp_int32 *lhs,
954                                      _Quad rhs);
955 void __kmpc_atomic_fixed4u_div_rev_fp(ident_t *id_ref, int gtid,
956                                       kmp_uint32 *lhs, _Quad rhs);
957 void __kmpc_atomic_fixed8_sub_rev_fp(ident_t *id_ref, int gtid, kmp_int64 *lhs,
958                                      _Quad rhs);
959 void __kmpc_atomic_fixed8u_sub_rev_fp(ident_t *id_ref, int gtid,
960                                       kmp_uint64 *lhs, _Quad rhs);
961 void __kmpc_atomic_fixed8_div_rev_fp(ident_t *id_ref, int gtid, kmp_int64 *lhs,
962                                      _Quad rhs);
963 void __kmpc_atomic_fixed8u_div_rev_fp(ident_t *id_ref, int gtid,
964                                       kmp_uint64 *lhs, _Quad rhs);
965 void __kmpc_atomic_float4_sub_rev_fp(ident_t *id_ref, int gtid, float *lhs,
966                                      _Quad rhs);
967 void __kmpc_atomic_float4_div_rev_fp(ident_t *id_ref, int gtid, float *lhs,
968                                      _Quad rhs);
969 void __kmpc_atomic_float8_sub_rev_fp(ident_t *id_ref, int gtid, double *lhs,
970                                      _Quad rhs);
971 void __kmpc_atomic_float8_div_rev_fp(ident_t *id_ref, int gtid, double *lhs,
972                                      _Quad rhs);
973 void __kmpc_atomic_float10_sub_rev_fp(ident_t *id_ref, int gtid,
974                                       long double *lhs, _Quad rhs);
975 void __kmpc_atomic_float10_div_rev_fp(ident_t *id_ref, int gtid,
976                                       long double *lhs, _Quad rhs);
977 
978 #endif // KMP_HAVE_QUAD
979 
980 // RHS=cmplx8
981 void __kmpc_atomic_cmplx4_add_cmplx8(ident_t *id_ref, int gtid,
982                                      kmp_cmplx32 *lhs, kmp_cmplx64 rhs);
983 void __kmpc_atomic_cmplx4_sub_cmplx8(ident_t *id_ref, int gtid,
984                                      kmp_cmplx32 *lhs, kmp_cmplx64 rhs);
985 void __kmpc_atomic_cmplx4_mul_cmplx8(ident_t *id_ref, int gtid,
986                                      kmp_cmplx32 *lhs, kmp_cmplx64 rhs);
987 void __kmpc_atomic_cmplx4_div_cmplx8(ident_t *id_ref, int gtid,
988                                      kmp_cmplx32 *lhs, kmp_cmplx64 rhs);
989 
990 // generic atomic routines
991 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
992                      void (*f)(void *, void *, void *));
993 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
994                      void (*f)(void *, void *, void *));
995 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
996                      void (*f)(void *, void *, void *));
997 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
998                      void (*f)(void *, void *, void *));
999 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
1000                       void (*f)(void *, void *, void *));
1001 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
1002                       void (*f)(void *, void *, void *));
1003 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
1004                       void (*f)(void *, void *, void *));
1005 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
1006                       void (*f)(void *, void *, void *));
1007 
1008 // READ, WRITE, CAPTURE
1009 
1010 //  Below routines for atomic READ are listed
1011 char __kmpc_atomic_fixed1_rd(ident_t *id_ref, int gtid, char *loc);
1012 short __kmpc_atomic_fixed2_rd(ident_t *id_ref, int gtid, short *loc);
1013 kmp_int32 __kmpc_atomic_fixed4_rd(ident_t *id_ref, int gtid, kmp_int32 *loc);
1014 kmp_int64 __kmpc_atomic_fixed8_rd(ident_t *id_ref, int gtid, kmp_int64 *loc);
1015 kmp_real32 __kmpc_atomic_float4_rd(ident_t *id_ref, int gtid, kmp_real32 *loc);
1016 kmp_real64 __kmpc_atomic_float8_rd(ident_t *id_ref, int gtid, kmp_real64 *loc);
1017 long double __kmpc_atomic_float10_rd(ident_t *id_ref, int gtid,
1018                                      long double *loc);
1019 #if KMP_HAVE_QUAD
1020 QUAD_LEGACY __kmpc_atomic_float16_rd(ident_t *id_ref, int gtid,
1021                                      QUAD_LEGACY *loc);
1022 #endif
1023 // Fix for CQ220361: cmplx4 READ will return void on Windows* OS; read value
1024 // will be returned through an additional parameter
1025 #if (KMP_OS_WINDOWS)
1026 void __kmpc_atomic_cmplx4_rd(kmp_cmplx32 *out, ident_t *id_ref, int gtid,
1027                              kmp_cmplx32 *loc);
1028 #else
1029 kmp_cmplx32 __kmpc_atomic_cmplx4_rd(ident_t *id_ref, int gtid,
1030                                     kmp_cmplx32 *loc);
1031 #endif
1032 kmp_cmplx64 __kmpc_atomic_cmplx8_rd(ident_t *id_ref, int gtid,
1033                                     kmp_cmplx64 *loc);
1034 kmp_cmplx80 __kmpc_atomic_cmplx10_rd(ident_t *id_ref, int gtid,
1035                                      kmp_cmplx80 *loc);
1036 #if KMP_HAVE_QUAD
1037 CPLX128_LEG __kmpc_atomic_cmplx16_rd(ident_t *id_ref, int gtid,
1038                                      CPLX128_LEG *loc);
1039 #if (KMP_ARCH_X86)
1040 // Routines with 16-byte arguments aligned to 16-byte boundary
1041 Quad_a16_t __kmpc_atomic_float16_a16_rd(ident_t *id_ref, int gtid,
1042                                         Quad_a16_t *loc);
1043 kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_rd(ident_t *id_ref, int gtid,
1044                                                 kmp_cmplx128_a16_t *loc);
1045 #endif
1046 #endif
1047 
1048 //  Below routines for atomic WRITE are listed
1049 void __kmpc_atomic_fixed1_wr(ident_t *id_ref, int gtid, char *lhs, char rhs);
1050 void __kmpc_atomic_fixed2_wr(ident_t *id_ref, int gtid, short *lhs, short rhs);
1051 void __kmpc_atomic_fixed4_wr(ident_t *id_ref, int gtid, kmp_int32 *lhs,
1052                              kmp_int32 rhs);
1053 void __kmpc_atomic_fixed8_wr(ident_t *id_ref, int gtid, kmp_int64 *lhs,
1054                              kmp_int64 rhs);
1055 void __kmpc_atomic_float4_wr(ident_t *id_ref, int gtid, kmp_real32 *lhs,
1056                              kmp_real32 rhs);
1057 void __kmpc_atomic_float8_wr(ident_t *id_ref, int gtid, kmp_real64 *lhs,
1058                              kmp_real64 rhs);
1059 void __kmpc_atomic_float10_wr(ident_t *id_ref, int gtid, long double *lhs,
1060                               long double rhs);
1061 #if KMP_HAVE_QUAD
1062 void __kmpc_atomic_float16_wr(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs,
1063                               QUAD_LEGACY rhs);
1064 #endif
1065 void __kmpc_atomic_cmplx4_wr(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs,
1066                              kmp_cmplx32 rhs);
1067 void __kmpc_atomic_cmplx8_wr(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs,
1068                              kmp_cmplx64 rhs);
1069 void __kmpc_atomic_cmplx10_wr(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs,
1070                               kmp_cmplx80 rhs);
1071 #if KMP_HAVE_QUAD
1072 void __kmpc_atomic_cmplx16_wr(ident_t *id_ref, int gtid, CPLX128_LEG *lhs,
1073                               CPLX128_LEG rhs);
1074 #if (KMP_ARCH_X86)
1075 // Routines with 16-byte arguments aligned to 16-byte boundary
1076 void __kmpc_atomic_float16_a16_wr(ident_t *id_ref, int gtid, Quad_a16_t *lhs,
1077                                   Quad_a16_t rhs);
1078 void __kmpc_atomic_cmplx16_a16_wr(ident_t *id_ref, int gtid,
1079                                   kmp_cmplx128_a16_t *lhs,
1080                                   kmp_cmplx128_a16_t rhs);
1081 #endif
1082 #endif
1083 
1084 //  Below routines for atomic CAPTURE are listed
1085 
1086 // 1-byte
1087 char __kmpc_atomic_fixed1_add_cpt(ident_t *id_ref, int gtid, char *lhs,
1088                                   char rhs, int flag);
1089 char __kmpc_atomic_fixed1_andb_cpt(ident_t *id_ref, int gtid, char *lhs,
1090                                    char rhs, int flag);
1091 char __kmpc_atomic_fixed1_div_cpt(ident_t *id_ref, int gtid, char *lhs,
1092                                   char rhs, int flag);
1093 unsigned char __kmpc_atomic_fixed1u_div_cpt(ident_t *id_ref, int gtid,
1094                                             unsigned char *lhs,
1095                                             unsigned char rhs, int flag);
1096 char __kmpc_atomic_fixed1_mul_cpt(ident_t *id_ref, int gtid, char *lhs,
1097                                   char rhs, int flag);
1098 char __kmpc_atomic_fixed1_orb_cpt(ident_t *id_ref, int gtid, char *lhs,
1099                                   char rhs, int flag);
1100 char __kmpc_atomic_fixed1_shl_cpt(ident_t *id_ref, int gtid, char *lhs,
1101                                   char rhs, int flag);
1102 char __kmpc_atomic_fixed1_shr_cpt(ident_t *id_ref, int gtid, char *lhs,
1103                                   char rhs, int flag);
1104 unsigned char __kmpc_atomic_fixed1u_shr_cpt(ident_t *id_ref, int gtid,
1105                                             unsigned char *lhs,
1106                                             unsigned char rhs, int flag);
1107 char __kmpc_atomic_fixed1_sub_cpt(ident_t *id_ref, int gtid, char *lhs,
1108                                   char rhs, int flag);
1109 char __kmpc_atomic_fixed1_xor_cpt(ident_t *id_ref, int gtid, char *lhs,
1110                                   char rhs, int flag);
1111 // 2-byte
1112 short __kmpc_atomic_fixed2_add_cpt(ident_t *id_ref, int gtid, short *lhs,
1113                                    short rhs, int flag);
1114 short __kmpc_atomic_fixed2_andb_cpt(ident_t *id_ref, int gtid, short *lhs,
1115                                     short rhs, int flag);
1116 short __kmpc_atomic_fixed2_div_cpt(ident_t *id_ref, int gtid, short *lhs,
1117                                    short rhs, int flag);
1118 unsigned short __kmpc_atomic_fixed2u_div_cpt(ident_t *id_ref, int gtid,
1119                                              unsigned short *lhs,
1120                                              unsigned short rhs, int flag);
1121 short __kmpc_atomic_fixed2_mul_cpt(ident_t *id_ref, int gtid, short *lhs,
1122                                    short rhs, int flag);
1123 short __kmpc_atomic_fixed2_orb_cpt(ident_t *id_ref, int gtid, short *lhs,
1124                                    short rhs, int flag);
1125 short __kmpc_atomic_fixed2_shl_cpt(ident_t *id_ref, int gtid, short *lhs,
1126                                    short rhs, int flag);
1127 short __kmpc_atomic_fixed2_shr_cpt(ident_t *id_ref, int gtid, short *lhs,
1128                                    short rhs, int flag);
1129 unsigned short __kmpc_atomic_fixed2u_shr_cpt(ident_t *id_ref, int gtid,
1130                                              unsigned short *lhs,
1131                                              unsigned short rhs, int flag);
1132 short __kmpc_atomic_fixed2_sub_cpt(ident_t *id_ref, int gtid, short *lhs,
1133                                    short rhs, int flag);
1134 short __kmpc_atomic_fixed2_xor_cpt(ident_t *id_ref, int gtid, short *lhs,
1135                                    short rhs, int flag);
1136 // 4-byte add / sub fixed
1137 kmp_int32 __kmpc_atomic_fixed4_add_cpt(ident_t *id_ref, int gtid,
1138                                        kmp_int32 *lhs, kmp_int32 rhs, int flag);
1139 kmp_int32 __kmpc_atomic_fixed4_sub_cpt(ident_t *id_ref, int gtid,
1140                                        kmp_int32 *lhs, kmp_int32 rhs, int flag);
1141 // 4-byte add / sub float
1142 kmp_real32 __kmpc_atomic_float4_add_cpt(ident_t *id_ref, int gtid,
1143                                         kmp_real32 *lhs, kmp_real32 rhs,
1144                                         int flag);
1145 kmp_real32 __kmpc_atomic_float4_sub_cpt(ident_t *id_ref, int gtid,
1146                                         kmp_real32 *lhs, kmp_real32 rhs,
1147                                         int flag);
1148 // 8-byte add / sub fixed
1149 kmp_int64 __kmpc_atomic_fixed8_add_cpt(ident_t *id_ref, int gtid,
1150                                        kmp_int64 *lhs, kmp_int64 rhs, int flag);
1151 kmp_int64 __kmpc_atomic_fixed8_sub_cpt(ident_t *id_ref, int gtid,
1152                                        kmp_int64 *lhs, kmp_int64 rhs, int flag);
1153 // 8-byte add / sub float
1154 kmp_real64 __kmpc_atomic_float8_add_cpt(ident_t *id_ref, int gtid,
1155                                         kmp_real64 *lhs, kmp_real64 rhs,
1156                                         int flag);
1157 kmp_real64 __kmpc_atomic_float8_sub_cpt(ident_t *id_ref, int gtid,
1158                                         kmp_real64 *lhs, kmp_real64 rhs,
1159                                         int flag);
1160 // 4-byte fixed
1161 kmp_int32 __kmpc_atomic_fixed4_andb_cpt(ident_t *id_ref, int gtid,
1162                                         kmp_int32 *lhs, kmp_int32 rhs,
1163                                         int flag);
1164 kmp_int32 __kmpc_atomic_fixed4_div_cpt(ident_t *id_ref, int gtid,
1165                                        kmp_int32 *lhs, kmp_int32 rhs, int flag);
1166 kmp_uint32 __kmpc_atomic_fixed4u_div_cpt(ident_t *id_ref, int gtid,
1167                                          kmp_uint32 *lhs, kmp_uint32 rhs,
1168                                          int flag);
1169 kmp_int32 __kmpc_atomic_fixed4_mul_cpt(ident_t *id_ref, int gtid,
1170                                        kmp_int32 *lhs, kmp_int32 rhs, int flag);
1171 kmp_int32 __kmpc_atomic_fixed4_orb_cpt(ident_t *id_ref, int gtid,
1172                                        kmp_int32 *lhs, kmp_int32 rhs, int flag);
1173 kmp_int32 __kmpc_atomic_fixed4_shl_cpt(ident_t *id_ref, int gtid,
1174                                        kmp_int32 *lhs, kmp_int32 rhs, int flag);
1175 kmp_int32 __kmpc_atomic_fixed4_shr_cpt(ident_t *id_ref, int gtid,
1176                                        kmp_int32 *lhs, kmp_int32 rhs, int flag);
1177 kmp_uint32 __kmpc_atomic_fixed4u_shr_cpt(ident_t *id_ref, int gtid,
1178                                          kmp_uint32 *lhs, kmp_uint32 rhs,
1179                                          int flag);
1180 kmp_int32 __kmpc_atomic_fixed4_xor_cpt(ident_t *id_ref, int gtid,
1181                                        kmp_int32 *lhs, kmp_int32 rhs, int flag);
1182 // 8-byte fixed
1183 kmp_int64 __kmpc_atomic_fixed8_andb_cpt(ident_t *id_ref, int gtid,
1184                                         kmp_int64 *lhs, kmp_int64 rhs,
1185                                         int flag);
1186 kmp_int64 __kmpc_atomic_fixed8_div_cpt(ident_t *id_ref, int gtid,
1187                                        kmp_int64 *lhs, kmp_int64 rhs, int flag);
1188 kmp_uint64 __kmpc_atomic_fixed8u_div_cpt(ident_t *id_ref, int gtid,
1189                                          kmp_uint64 *lhs, kmp_uint64 rhs,
1190                                          int flag);
1191 kmp_int64 __kmpc_atomic_fixed8_mul_cpt(ident_t *id_ref, int gtid,
1192                                        kmp_int64 *lhs, kmp_int64 rhs, int flag);
1193 kmp_int64 __kmpc_atomic_fixed8_orb_cpt(ident_t *id_ref, int gtid,
1194                                        kmp_int64 *lhs, kmp_int64 rhs, int flag);
1195 kmp_int64 __kmpc_atomic_fixed8_shl_cpt(ident_t *id_ref, int gtid,
1196                                        kmp_int64 *lhs, kmp_int64 rhs, int flag);
1197 kmp_int64 __kmpc_atomic_fixed8_shr_cpt(ident_t *id_ref, int gtid,
1198                                        kmp_int64 *lhs, kmp_int64 rhs, int flag);
1199 kmp_uint64 __kmpc_atomic_fixed8u_shr_cpt(ident_t *id_ref, int gtid,
1200                                          kmp_uint64 *lhs, kmp_uint64 rhs,
1201                                          int flag);
1202 kmp_int64 __kmpc_atomic_fixed8_xor_cpt(ident_t *id_ref, int gtid,
1203                                        kmp_int64 *lhs, kmp_int64 rhs, int flag);
1204 // 4-byte float
1205 kmp_real32 __kmpc_atomic_float4_div_cpt(ident_t *id_ref, int gtid,
1206                                         kmp_real32 *lhs, kmp_real32 rhs,
1207                                         int flag);
1208 kmp_real32 __kmpc_atomic_float4_mul_cpt(ident_t *id_ref, int gtid,
1209                                         kmp_real32 *lhs, kmp_real32 rhs,
1210                                         int flag);
1211 // 8-byte float
1212 kmp_real64 __kmpc_atomic_float8_div_cpt(ident_t *id_ref, int gtid,
1213                                         kmp_real64 *lhs, kmp_real64 rhs,
1214                                         int flag);
1215 kmp_real64 __kmpc_atomic_float8_mul_cpt(ident_t *id_ref, int gtid,
1216                                         kmp_real64 *lhs, kmp_real64 rhs,
1217                                         int flag);
1218 // 1-, 2-, 4-, 8-byte logical (&&, ||)
1219 char __kmpc_atomic_fixed1_andl_cpt(ident_t *id_ref, int gtid, char *lhs,
1220                                    char rhs, int flag);
1221 char __kmpc_atomic_fixed1_orl_cpt(ident_t *id_ref, int gtid, char *lhs,
1222                                   char rhs, int flag);
1223 short __kmpc_atomic_fixed2_andl_cpt(ident_t *id_ref, int gtid, short *lhs,
1224                                     short rhs, int flag);
1225 short __kmpc_atomic_fixed2_orl_cpt(ident_t *id_ref, int gtid, short *lhs,
1226                                    short rhs, int flag);
1227 kmp_int32 __kmpc_atomic_fixed4_andl_cpt(ident_t *id_ref, int gtid,
1228                                         kmp_int32 *lhs, kmp_int32 rhs,
1229                                         int flag);
1230 kmp_int32 __kmpc_atomic_fixed4_orl_cpt(ident_t *id_ref, int gtid,
1231                                        kmp_int32 *lhs, kmp_int32 rhs, int flag);
1232 kmp_int64 __kmpc_atomic_fixed8_andl_cpt(ident_t *id_ref, int gtid,
1233                                         kmp_int64 *lhs, kmp_int64 rhs,
1234                                         int flag);
1235 kmp_int64 __kmpc_atomic_fixed8_orl_cpt(ident_t *id_ref, int gtid,
1236                                        kmp_int64 *lhs, kmp_int64 rhs, int flag);
1237 // MIN / MAX
1238 char __kmpc_atomic_fixed1_max_cpt(ident_t *id_ref, int gtid, char *lhs,
1239                                   char rhs, int flag);
1240 char __kmpc_atomic_fixed1_min_cpt(ident_t *id_ref, int gtid, char *lhs,
1241                                   char rhs, int flag);
1242 short __kmpc_atomic_fixed2_max_cpt(ident_t *id_ref, int gtid, short *lhs,
1243                                    short rhs, int flag);
1244 short __kmpc_atomic_fixed2_min_cpt(ident_t *id_ref, int gtid, short *lhs,
1245                                    short rhs, int flag);
1246 kmp_int32 __kmpc_atomic_fixed4_max_cpt(ident_t *id_ref, int gtid,
1247                                        kmp_int32 *lhs, kmp_int32 rhs, int flag);
1248 kmp_int32 __kmpc_atomic_fixed4_min_cpt(ident_t *id_ref, int gtid,
1249                                        kmp_int32 *lhs, kmp_int32 rhs, int flag);
1250 kmp_int64 __kmpc_atomic_fixed8_max_cpt(ident_t *id_ref, int gtid,
1251                                        kmp_int64 *lhs, kmp_int64 rhs, int flag);
1252 kmp_int64 __kmpc_atomic_fixed8_min_cpt(ident_t *id_ref, int gtid,
1253                                        kmp_int64 *lhs, kmp_int64 rhs, int flag);
1254 kmp_real32 __kmpc_atomic_float4_max_cpt(ident_t *id_ref, int gtid,
1255                                         kmp_real32 *lhs, kmp_real32 rhs,
1256                                         int flag);
1257 kmp_real32 __kmpc_atomic_float4_min_cpt(ident_t *id_ref, int gtid,
1258                                         kmp_real32 *lhs, kmp_real32 rhs,
1259                                         int flag);
1260 kmp_real64 __kmpc_atomic_float8_max_cpt(ident_t *id_ref, int gtid,
1261                                         kmp_real64 *lhs, kmp_real64 rhs,
1262                                         int flag);
1263 kmp_real64 __kmpc_atomic_float8_min_cpt(ident_t *id_ref, int gtid,
1264                                         kmp_real64 *lhs, kmp_real64 rhs,
1265                                         int flag);
1266 long double __kmpc_atomic_float10_max_cpt(ident_t *id_ref, int gtid,
1267                                           long double *lhs, long double rhs,
1268                                           int flag);
1269 long double __kmpc_atomic_float10_min_cpt(ident_t *id_ref, int gtid,
1270                                           long double *lhs, long double rhs,
1271                                           int flag);
1272 #if KMP_HAVE_QUAD
1273 QUAD_LEGACY __kmpc_atomic_float16_max_cpt(ident_t *id_ref, int gtid,
1274                                           QUAD_LEGACY *lhs, QUAD_LEGACY rhs,
1275                                           int flag);
1276 QUAD_LEGACY __kmpc_atomic_float16_min_cpt(ident_t *id_ref, int gtid,
1277                                           QUAD_LEGACY *lhs, QUAD_LEGACY rhs,
1278                                           int flag);
1279 #endif
1280 // .NEQV. (same as xor)
1281 char __kmpc_atomic_fixed1_neqv_cpt(ident_t *id_ref, int gtid, char *lhs,
1282                                    char rhs, int flag);
1283 short __kmpc_atomic_fixed2_neqv_cpt(ident_t *id_ref, int gtid, short *lhs,
1284                                     short rhs, int flag);
1285 kmp_int32 __kmpc_atomic_fixed4_neqv_cpt(ident_t *id_ref, int gtid,
1286                                         kmp_int32 *lhs, kmp_int32 rhs,
1287                                         int flag);
1288 kmp_int64 __kmpc_atomic_fixed8_neqv_cpt(ident_t *id_ref, int gtid,
1289                                         kmp_int64 *lhs, kmp_int64 rhs,
1290                                         int flag);
1291 // .EQV. (same as ~xor)
1292 char __kmpc_atomic_fixed1_eqv_cpt(ident_t *id_ref, int gtid, char *lhs,
1293                                   char rhs, int flag);
1294 short __kmpc_atomic_fixed2_eqv_cpt(ident_t *id_ref, int gtid, short *lhs,
1295                                    short rhs, int flag);
1296 kmp_int32 __kmpc_atomic_fixed4_eqv_cpt(ident_t *id_ref, int gtid,
1297                                        kmp_int32 *lhs, kmp_int32 rhs, int flag);
1298 kmp_int64 __kmpc_atomic_fixed8_eqv_cpt(ident_t *id_ref, int gtid,
1299                                        kmp_int64 *lhs, kmp_int64 rhs, int flag);
1300 // long double type
1301 long double __kmpc_atomic_float10_add_cpt(ident_t *id_ref, int gtid,
1302                                           long double *lhs, long double rhs,
1303                                           int flag);
1304 long double __kmpc_atomic_float10_sub_cpt(ident_t *id_ref, int gtid,
1305                                           long double *lhs, long double rhs,
1306                                           int flag);
1307 long double __kmpc_atomic_float10_mul_cpt(ident_t *id_ref, int gtid,
1308                                           long double *lhs, long double rhs,
1309                                           int flag);
1310 long double __kmpc_atomic_float10_div_cpt(ident_t *id_ref, int gtid,
1311                                           long double *lhs, long double rhs,
1312                                           int flag);
1313 #if KMP_HAVE_QUAD
1314 // _Quad type
1315 QUAD_LEGACY __kmpc_atomic_float16_add_cpt(ident_t *id_ref, int gtid,
1316                                           QUAD_LEGACY *lhs, QUAD_LEGACY rhs,
1317                                           int flag);
1318 QUAD_LEGACY __kmpc_atomic_float16_sub_cpt(ident_t *id_ref, int gtid,
1319                                           QUAD_LEGACY *lhs, QUAD_LEGACY rhs,
1320                                           int flag);
1321 QUAD_LEGACY __kmpc_atomic_float16_mul_cpt(ident_t *id_ref, int gtid,
1322                                           QUAD_LEGACY *lhs, QUAD_LEGACY rhs,
1323                                           int flag);
1324 QUAD_LEGACY __kmpc_atomic_float16_div_cpt(ident_t *id_ref, int gtid,
1325                                           QUAD_LEGACY *lhs, QUAD_LEGACY rhs,
1326                                           int flag);
1327 #endif
1328 // routines for complex types
1329 // Workaround for cmplx4 routines - return void; captured value is returned via
1330 // the argument
1331 void __kmpc_atomic_cmplx4_add_cpt(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs,
1332                                   kmp_cmplx32 rhs, kmp_cmplx32 *out, int flag);
1333 void __kmpc_atomic_cmplx4_sub_cpt(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs,
1334                                   kmp_cmplx32 rhs, kmp_cmplx32 *out, int flag);
1335 void __kmpc_atomic_cmplx4_mul_cpt(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs,
1336                                   kmp_cmplx32 rhs, kmp_cmplx32 *out, int flag);
1337 void __kmpc_atomic_cmplx4_div_cpt(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs,
1338                                   kmp_cmplx32 rhs, kmp_cmplx32 *out, int flag);
1339 kmp_cmplx64 __kmpc_atomic_cmplx8_add_cpt(ident_t *id_ref, int gtid,
1340                                          kmp_cmplx64 *lhs, kmp_cmplx64 rhs,
1341                                          int flag);
1342 kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt(ident_t *id_ref, int gtid,
1343                                          kmp_cmplx64 *lhs, kmp_cmplx64 rhs,
1344                                          int flag);
1345 kmp_cmplx64 __kmpc_atomic_cmplx8_mul_cpt(ident_t *id_ref, int gtid,
1346                                          kmp_cmplx64 *lhs, kmp_cmplx64 rhs,
1347                                          int flag);
1348 kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt(ident_t *id_ref, int gtid,
1349                                          kmp_cmplx64 *lhs, kmp_cmplx64 rhs,
1350                                          int flag);
1351 kmp_cmplx80 __kmpc_atomic_cmplx10_add_cpt(ident_t *id_ref, int gtid,
1352                                           kmp_cmplx80 *lhs, kmp_cmplx80 rhs,
1353                                           int flag);
1354 kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt(ident_t *id_ref, int gtid,
1355                                           kmp_cmplx80 *lhs, kmp_cmplx80 rhs,
1356                                           int flag);
1357 kmp_cmplx80 __kmpc_atomic_cmplx10_mul_cpt(ident_t *id_ref, int gtid,
1358                                           kmp_cmplx80 *lhs, kmp_cmplx80 rhs,
1359                                           int flag);
1360 kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt(ident_t *id_ref, int gtid,
1361                                           kmp_cmplx80 *lhs, kmp_cmplx80 rhs,
1362                                           int flag);
1363 #if KMP_HAVE_QUAD
1364 CPLX128_LEG __kmpc_atomic_cmplx16_add_cpt(ident_t *id_ref, int gtid,
1365                                           CPLX128_LEG *lhs, CPLX128_LEG rhs,
1366                                           int flag);
1367 CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt(ident_t *id_ref, int gtid,
1368                                           CPLX128_LEG *lhs, CPLX128_LEG rhs,
1369                                           int flag);
1370 CPLX128_LEG __kmpc_atomic_cmplx16_mul_cpt(ident_t *id_ref, int gtid,
1371                                           CPLX128_LEG *lhs, CPLX128_LEG rhs,
1372                                           int flag);
1373 CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt(ident_t *id_ref, int gtid,
1374                                           CPLX128_LEG *lhs, CPLX128_LEG rhs,
1375                                           int flag);
1376 #if (KMP_ARCH_X86)
1377 // Routines with 16-byte arguments aligned to 16-byte boundary
1378 Quad_a16_t __kmpc_atomic_float16_add_a16_cpt(ident_t *id_ref, int gtid,
1379                                              Quad_a16_t *lhs, Quad_a16_t rhs,
1380                                              int flag);
1381 Quad_a16_t __kmpc_atomic_float16_sub_a16_cpt(ident_t *id_ref, int gtid,
1382                                              Quad_a16_t *lhs, Quad_a16_t rhs,
1383                                              int flag);
1384 Quad_a16_t __kmpc_atomic_float16_mul_a16_cpt(ident_t *id_ref, int gtid,
1385                                              Quad_a16_t *lhs, Quad_a16_t rhs,
1386                                              int flag);
1387 Quad_a16_t __kmpc_atomic_float16_div_a16_cpt(ident_t *id_ref, int gtid,
1388                                              Quad_a16_t *lhs, Quad_a16_t rhs,
1389                                              int flag);
1390 Quad_a16_t __kmpc_atomic_float16_max_a16_cpt(ident_t *id_ref, int gtid,
1391                                              Quad_a16_t *lhs, Quad_a16_t rhs,
1392                                              int flag);
1393 Quad_a16_t __kmpc_atomic_float16_min_a16_cpt(ident_t *id_ref, int gtid,
1394                                              Quad_a16_t *lhs, Quad_a16_t rhs,
1395                                              int flag);
1396 kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_add_a16_cpt(ident_t *id_ref, int gtid,
1397                                                      kmp_cmplx128_a16_t *lhs,
1398                                                      kmp_cmplx128_a16_t rhs,
1399                                                      int flag);
1400 kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_sub_a16_cpt(ident_t *id_ref, int gtid,
1401                                                      kmp_cmplx128_a16_t *lhs,
1402                                                      kmp_cmplx128_a16_t rhs,
1403                                                      int flag);
1404 kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_mul_a16_cpt(ident_t *id_ref, int gtid,
1405                                                      kmp_cmplx128_a16_t *lhs,
1406                                                      kmp_cmplx128_a16_t rhs,
1407                                                      int flag);
1408 kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt(ident_t *id_ref, int gtid,
1409                                                      kmp_cmplx128_a16_t *lhs,
1410                                                      kmp_cmplx128_a16_t rhs,
1411                                                      int flag);
1412 #endif
1413 #endif
1414 
1415 void __kmpc_atomic_start(void);
1416 void __kmpc_atomic_end(void);
1417 
1418 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
1419 // binop x; v = x; }  for non-commutative operations.
1420 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1421 char __kmpc_atomic_fixed1_sub_cpt_rev(ident_t *id_ref, int gtid, char *lhs,
1422                                       char rhs, int flag);
1423 char __kmpc_atomic_fixed1_div_cpt_rev(ident_t *id_ref, int gtid, char *lhs,
1424                                       char rhs, int flag);
1425 unsigned char __kmpc_atomic_fixed1u_div_cpt_rev(ident_t *id_ref, int gtid,
1426                                                 unsigned char *lhs,
1427                                                 unsigned char rhs, int flag);
1428 char __kmpc_atomic_fixed1_shl_cpt_rev(ident_t *id_ref, int gtid, char *lhs,
1429                                       char rhs, int flag);
1430 char __kmpc_atomic_fixed1_shr_cpt_rev(ident_t *id_ref, int gtid, char *lhs,
1431                                       char rhs, int flag);
1432 unsigned char __kmpc_atomic_fixed1u_shr_cpt_rev(ident_t *id_ref, int gtid,
1433                                                 unsigned char *lhs,
1434                                                 unsigned char rhs, int flag);
1435 short __kmpc_atomic_fixed2_sub_cpt_rev(ident_t *id_ref, int gtid, short *lhs,
1436                                        short rhs, int flag);
1437 short __kmpc_atomic_fixed2_div_cpt_rev(ident_t *id_ref, int gtid, short *lhs,
1438                                        short rhs, int flag);
1439 unsigned short __kmpc_atomic_fixed2u_div_cpt_rev(ident_t *id_ref, int gtid,
1440                                                  unsigned short *lhs,
1441                                                  unsigned short rhs, int flag);
1442 short __kmpc_atomic_fixed2_shl_cpt_rev(ident_t *id_ref, int gtid, short *lhs,
1443                                        short rhs, int flag);
1444 short __kmpc_atomic_fixed2_shr_cpt_rev(ident_t *id_ref, int gtid, short *lhs,
1445                                        short rhs, int flag);
1446 unsigned short __kmpc_atomic_fixed2u_shr_cpt_rev(ident_t *id_ref, int gtid,
1447                                                  unsigned short *lhs,
1448                                                  unsigned short rhs, int flag);
1449 kmp_int32 __kmpc_atomic_fixed4_sub_cpt_rev(ident_t *id_ref, int gtid,
1450                                            kmp_int32 *lhs, kmp_int32 rhs,
1451                                            int flag);
1452 kmp_int32 __kmpc_atomic_fixed4_div_cpt_rev(ident_t *id_ref, int gtid,
1453                                            kmp_int32 *lhs, kmp_int32 rhs,
1454                                            int flag);
1455 kmp_uint32 __kmpc_atomic_fixed4u_div_cpt_rev(ident_t *id_ref, int gtid,
1456                                              kmp_uint32 *lhs, kmp_uint32 rhs,
1457                                              int flag);
1458 kmp_int32 __kmpc_atomic_fixed4_shl_cpt_rev(ident_t *id_ref, int gtid,
1459                                            kmp_int32 *lhs, kmp_int32 rhs,
1460                                            int flag);
1461 kmp_int32 __kmpc_atomic_fixed4_shr_cpt_rev(ident_t *id_ref, int gtid,
1462                                            kmp_int32 *lhs, kmp_int32 rhs,
1463                                            int flag);
1464 kmp_uint32 __kmpc_atomic_fixed4u_shr_cpt_rev(ident_t *id_ref, int gtid,
1465                                              kmp_uint32 *lhs, kmp_uint32 rhs,
1466                                              int flag);
1467 kmp_int64 __kmpc_atomic_fixed8_sub_cpt_rev(ident_t *id_ref, int gtid,
1468                                            kmp_int64 *lhs, kmp_int64 rhs,
1469                                            int flag);
1470 kmp_int64 __kmpc_atomic_fixed8_div_cpt_rev(ident_t *id_ref, int gtid,
1471                                            kmp_int64 *lhs, kmp_int64 rhs,
1472                                            int flag);
1473 kmp_uint64 __kmpc_atomic_fixed8u_div_cpt_rev(ident_t *id_ref, int gtid,
1474                                              kmp_uint64 *lhs, kmp_uint64 rhs,
1475                                              int flag);
1476 kmp_int64 __kmpc_atomic_fixed8_shl_cpt_rev(ident_t *id_ref, int gtid,
1477                                            kmp_int64 *lhs, kmp_int64 rhs,
1478                                            int flag);
1479 kmp_int64 __kmpc_atomic_fixed8_shr_cpt_rev(ident_t *id_ref, int gtid,
1480                                            kmp_int64 *lhs, kmp_int64 rhs,
1481                                            int flag);
1482 kmp_uint64 __kmpc_atomic_fixed8u_shr_cpt_rev(ident_t *id_ref, int gtid,
1483                                              kmp_uint64 *lhs, kmp_uint64 rhs,
1484                                              int flag);
1485 float __kmpc_atomic_float4_sub_cpt_rev(ident_t *id_ref, int gtid, float *lhs,
1486                                        float rhs, int flag);
1487 float __kmpc_atomic_float4_div_cpt_rev(ident_t *id_ref, int gtid, float *lhs,
1488                                        float rhs, int flag);
1489 double __kmpc_atomic_float8_sub_cpt_rev(ident_t *id_ref, int gtid, double *lhs,
1490                                         double rhs, int flag);
1491 double __kmpc_atomic_float8_div_cpt_rev(ident_t *id_ref, int gtid, double *lhs,
1492                                         double rhs, int flag);
1493 long double __kmpc_atomic_float10_sub_cpt_rev(ident_t *id_ref, int gtid,
1494                                               long double *lhs, long double rhs,
1495                                               int flag);
1496 long double __kmpc_atomic_float10_div_cpt_rev(ident_t *id_ref, int gtid,
1497                                               long double *lhs, long double rhs,
1498                                               int flag);
1499 #if KMP_HAVE_QUAD
1500 QUAD_LEGACY __kmpc_atomic_float16_sub_cpt_rev(ident_t *id_ref, int gtid,
1501                                               QUAD_LEGACY *lhs, QUAD_LEGACY rhs,
1502                                               int flag);
1503 QUAD_LEGACY __kmpc_atomic_float16_div_cpt_rev(ident_t *id_ref, int gtid,
1504                                               QUAD_LEGACY *lhs, QUAD_LEGACY rhs,
1505                                               int flag);
1506 #endif
1507 // Workaround for cmplx4 routines - return void; captured value is returned via
1508 // the argument
1509 void __kmpc_atomic_cmplx4_sub_cpt_rev(ident_t *id_ref, int gtid,
1510                                       kmp_cmplx32 *lhs, kmp_cmplx32 rhs,
1511                                       kmp_cmplx32 *out, int flag);
1512 void __kmpc_atomic_cmplx4_div_cpt_rev(ident_t *id_ref, int gtid,
1513                                       kmp_cmplx32 *lhs, kmp_cmplx32 rhs,
1514                                       kmp_cmplx32 *out, int flag);
1515 kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt_rev(ident_t *id_ref, int gtid,
1516                                              kmp_cmplx64 *lhs, kmp_cmplx64 rhs,
1517                                              int flag);
1518 kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt_rev(ident_t *id_ref, int gtid,
1519                                              kmp_cmplx64 *lhs, kmp_cmplx64 rhs,
1520                                              int flag);
1521 kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt_rev(ident_t *id_ref, int gtid,
1522                                               kmp_cmplx80 *lhs, kmp_cmplx80 rhs,
1523                                               int flag);
1524 kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt_rev(ident_t *id_ref, int gtid,
1525                                               kmp_cmplx80 *lhs, kmp_cmplx80 rhs,
1526                                               int flag);
1527 #if KMP_HAVE_QUAD
1528 CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt_rev(ident_t *id_ref, int gtid,
1529                                               CPLX128_LEG *lhs, CPLX128_LEG rhs,
1530                                               int flag);
1531 CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt_rev(ident_t *id_ref, int gtid,
1532                                               CPLX128_LEG *lhs, CPLX128_LEG rhs,
1533                                               int flag);
1534 #if (KMP_ARCH_X86)
1535 Quad_a16_t __kmpc_atomic_float16_sub_a16_cpt_rev(ident_t *id_ref, int gtid,
1536                                                  Quad_a16_t *lhs,
1537                                                  Quad_a16_t rhs, int flag);
1538 Quad_a16_t __kmpc_atomic_float16_div_a16_cpt_rev(ident_t *id_ref, int gtid,
1539                                                  Quad_a16_t *lhs,
1540                                                  Quad_a16_t rhs, int flag);
1541 kmp_cmplx128_a16_t
1542 __kmpc_atomic_cmplx16_sub_a16_cpt_rev(ident_t *id_ref, int gtid,
1543                                       kmp_cmplx128_a16_t *lhs,
1544                                       kmp_cmplx128_a16_t rhs, int flag);
1545 kmp_cmplx128_a16_t
1546 __kmpc_atomic_cmplx16_div_a16_cpt_rev(ident_t *id_ref, int gtid,
1547                                       kmp_cmplx128_a16_t *lhs,
1548                                       kmp_cmplx128_a16_t rhs, int flag);
1549 #endif
1550 #endif
1551 
1552 //   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
1553 char __kmpc_atomic_fixed1_swp(ident_t *id_ref, int gtid, char *lhs, char rhs);
1554 short __kmpc_atomic_fixed2_swp(ident_t *id_ref, int gtid, short *lhs,
1555                                short rhs);
1556 kmp_int32 __kmpc_atomic_fixed4_swp(ident_t *id_ref, int gtid, kmp_int32 *lhs,
1557                                    kmp_int32 rhs);
1558 kmp_int64 __kmpc_atomic_fixed8_swp(ident_t *id_ref, int gtid, kmp_int64 *lhs,
1559                                    kmp_int64 rhs);
1560 float __kmpc_atomic_float4_swp(ident_t *id_ref, int gtid, float *lhs,
1561                                float rhs);
1562 double __kmpc_atomic_float8_swp(ident_t *id_ref, int gtid, double *lhs,
1563                                 double rhs);
1564 long double __kmpc_atomic_float10_swp(ident_t *id_ref, int gtid,
1565                                       long double *lhs, long double rhs);
1566 #if KMP_HAVE_QUAD
1567 QUAD_LEGACY __kmpc_atomic_float16_swp(ident_t *id_ref, int gtid,
1568                                       QUAD_LEGACY *lhs, QUAD_LEGACY rhs);
1569 #endif
1570 // !!! TODO: check if we need a workaround here
1571 void __kmpc_atomic_cmplx4_swp(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs,
1572                               kmp_cmplx32 rhs, kmp_cmplx32 *out);
1573 // kmp_cmplx32   	__kmpc_atomic_cmplx4_swp(  ident_t *id_ref, int gtid,
1574 // kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
1575 
1576 kmp_cmplx64 __kmpc_atomic_cmplx8_swp(ident_t *id_ref, int gtid,
1577                                      kmp_cmplx64 *lhs, kmp_cmplx64 rhs);
1578 kmp_cmplx80 __kmpc_atomic_cmplx10_swp(ident_t *id_ref, int gtid,
1579                                       kmp_cmplx80 *lhs, kmp_cmplx80 rhs);
1580 #if KMP_HAVE_QUAD
1581 CPLX128_LEG __kmpc_atomic_cmplx16_swp(ident_t *id_ref, int gtid,
1582                                       CPLX128_LEG *lhs, CPLX128_LEG rhs);
1583 #if (KMP_ARCH_X86)
1584 Quad_a16_t __kmpc_atomic_float16_a16_swp(ident_t *id_ref, int gtid,
1585                                          Quad_a16_t *lhs, Quad_a16_t rhs);
1586 kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_swp(ident_t *id_ref, int gtid,
1587                                                  kmp_cmplx128_a16_t *lhs,
1588                                                  kmp_cmplx128_a16_t rhs);
1589 #endif
1590 #endif
1591 
1592 // Capture routines for mixed types (RHS=float16)
1593 #if KMP_HAVE_QUAD
1594 
1595 char __kmpc_atomic_fixed1_add_cpt_fp(ident_t *id_ref, int gtid, char *lhs,
1596                                      _Quad rhs, int flag);
1597 char __kmpc_atomic_fixed1_sub_cpt_fp(ident_t *id_ref, int gtid, char *lhs,
1598                                      _Quad rhs, int flag);
1599 char __kmpc_atomic_fixed1_mul_cpt_fp(ident_t *id_ref, int gtid, char *lhs,
1600                                      _Quad rhs, int flag);
1601 char __kmpc_atomic_fixed1_div_cpt_fp(ident_t *id_ref, int gtid, char *lhs,
1602                                      _Quad rhs, int flag);
1603 unsigned char __kmpc_atomic_fixed1u_add_cpt_fp(ident_t *id_ref, int gtid,
1604                                                unsigned char *lhs, _Quad rhs,
1605                                                int flag);
1606 unsigned char __kmpc_atomic_fixed1u_sub_cpt_fp(ident_t *id_ref, int gtid,
1607                                                unsigned char *lhs, _Quad rhs,
1608                                                int flag);
1609 unsigned char __kmpc_atomic_fixed1u_mul_cpt_fp(ident_t *id_ref, int gtid,
1610                                                unsigned char *lhs, _Quad rhs,
1611                                                int flag);
1612 unsigned char __kmpc_atomic_fixed1u_div_cpt_fp(ident_t *id_ref, int gtid,
1613                                                unsigned char *lhs, _Quad rhs,
1614                                                int flag);
1615 
1616 short __kmpc_atomic_fixed2_add_cpt_fp(ident_t *id_ref, int gtid, short *lhs,
1617                                       _Quad rhs, int flag);
1618 short __kmpc_atomic_fixed2_sub_cpt_fp(ident_t *id_ref, int gtid, short *lhs,
1619                                       _Quad rhs, int flag);
1620 short __kmpc_atomic_fixed2_mul_cpt_fp(ident_t *id_ref, int gtid, short *lhs,
1621                                       _Quad rhs, int flag);
1622 short __kmpc_atomic_fixed2_div_cpt_fp(ident_t *id_ref, int gtid, short *lhs,
1623                                       _Quad rhs, int flag);
1624 unsigned short __kmpc_atomic_fixed2u_add_cpt_fp(ident_t *id_ref, int gtid,
1625                                                 unsigned short *lhs, _Quad rhs,
1626                                                 int flag);
1627 unsigned short __kmpc_atomic_fixed2u_sub_cpt_fp(ident_t *id_ref, int gtid,
1628                                                 unsigned short *lhs, _Quad rhs,
1629                                                 int flag);
1630 unsigned short __kmpc_atomic_fixed2u_mul_cpt_fp(ident_t *id_ref, int gtid,
1631                                                 unsigned short *lhs, _Quad rhs,
1632                                                 int flag);
1633 unsigned short __kmpc_atomic_fixed2u_div_cpt_fp(ident_t *id_ref, int gtid,
1634                                                 unsigned short *lhs, _Quad rhs,
1635                                                 int flag);
1636 
1637 kmp_int32 __kmpc_atomic_fixed4_add_cpt_fp(ident_t *id_ref, int gtid,
1638                                           kmp_int32 *lhs, _Quad rhs, int flag);
1639 kmp_int32 __kmpc_atomic_fixed4_sub_cpt_fp(ident_t *id_ref, int gtid,
1640                                           kmp_int32 *lhs, _Quad rhs, int flag);
1641 kmp_int32 __kmpc_atomic_fixed4_mul_cpt_fp(ident_t *id_ref, int gtid,
1642                                           kmp_int32 *lhs, _Quad rhs, int flag);
1643 kmp_int32 __kmpc_atomic_fixed4_div_cpt_fp(ident_t *id_ref, int gtid,
1644                                           kmp_int32 *lhs, _Quad rhs, int flag);
1645 kmp_uint32 __kmpc_atomic_fixed4u_add_cpt_fp(ident_t *id_ref, int gtid,
1646                                             kmp_uint32 *lhs, _Quad rhs,
1647                                             int flag);
1648 kmp_uint32 __kmpc_atomic_fixed4u_sub_cpt_fp(ident_t *id_ref, int gtid,
1649                                             kmp_uint32 *lhs, _Quad rhs,
1650                                             int flag);
1651 kmp_uint32 __kmpc_atomic_fixed4u_mul_cpt_fp(ident_t *id_ref, int gtid,
1652                                             kmp_uint32 *lhs, _Quad rhs,
1653                                             int flag);
1654 kmp_uint32 __kmpc_atomic_fixed4u_div_cpt_fp(ident_t *id_ref, int gtid,
1655                                             kmp_uint32 *lhs, _Quad rhs,
1656                                             int flag);
1657 
1658 kmp_int64 __kmpc_atomic_fixed8_add_cpt_fp(ident_t *id_ref, int gtid,
1659                                           kmp_int64 *lhs, _Quad rhs, int flag);
1660 kmp_int64 __kmpc_atomic_fixed8_sub_cpt_fp(ident_t *id_ref, int gtid,
1661                                           kmp_int64 *lhs, _Quad rhs, int flag);
1662 kmp_int64 __kmpc_atomic_fixed8_mul_cpt_fp(ident_t *id_ref, int gtid,
1663                                           kmp_int64 *lhs, _Quad rhs, int flag);
1664 kmp_int64 __kmpc_atomic_fixed8_div_cpt_fp(ident_t *id_ref, int gtid,
1665                                           kmp_int64 *lhs, _Quad rhs, int flag);
1666 kmp_uint64 __kmpc_atomic_fixed8u_add_cpt_fp(ident_t *id_ref, int gtid,
1667                                             kmp_uint64 *lhs, _Quad rhs,
1668                                             int flag);
1669 kmp_uint64 __kmpc_atomic_fixed8u_sub_cpt_fp(ident_t *id_ref, int gtid,
1670                                             kmp_uint64 *lhs, _Quad rhs,
1671                                             int flag);
1672 kmp_uint64 __kmpc_atomic_fixed8u_mul_cpt_fp(ident_t *id_ref, int gtid,
1673                                             kmp_uint64 *lhs, _Quad rhs,
1674                                             int flag);
1675 kmp_uint64 __kmpc_atomic_fixed8u_div_cpt_fp(ident_t *id_ref, int gtid,
1676                                             kmp_uint64 *lhs, _Quad rhs,
1677                                             int flag);
1678 
1679 float __kmpc_atomic_float4_add_cpt_fp(ident_t *id_ref, int gtid,
1680                                       kmp_real32 *lhs, _Quad rhs, int flag);
1681 float __kmpc_atomic_float4_sub_cpt_fp(ident_t *id_ref, int gtid,
1682                                       kmp_real32 *lhs, _Quad rhs, int flag);
1683 float __kmpc_atomic_float4_mul_cpt_fp(ident_t *id_ref, int gtid,
1684                                       kmp_real32 *lhs, _Quad rhs, int flag);
1685 float __kmpc_atomic_float4_div_cpt_fp(ident_t *id_ref, int gtid,
1686                                       kmp_real32 *lhs, _Quad rhs, int flag);
1687 
1688 double __kmpc_atomic_float8_add_cpt_fp(ident_t *id_ref, int gtid,
1689                                        kmp_real64 *lhs, _Quad rhs, int flag);
1690 double __kmpc_atomic_float8_sub_cpt_fp(ident_t *id_ref, int gtid,
1691                                        kmp_real64 *lhs, _Quad rhs, int flag);
1692 double __kmpc_atomic_float8_mul_cpt_fp(ident_t *id_ref, int gtid,
1693                                        kmp_real64 *lhs, _Quad rhs, int flag);
1694 double __kmpc_atomic_float8_div_cpt_fp(ident_t *id_ref, int gtid,
1695                                        kmp_real64 *lhs, _Quad rhs, int flag);
1696 
1697 long double __kmpc_atomic_float10_add_cpt_fp(ident_t *id_ref, int gtid,
1698                                              long double *lhs, _Quad rhs,
1699                                              int flag);
1700 long double __kmpc_atomic_float10_sub_cpt_fp(ident_t *id_ref, int gtid,
1701                                              long double *lhs, _Quad rhs,
1702                                              int flag);
1703 long double __kmpc_atomic_float10_mul_cpt_fp(ident_t *id_ref, int gtid,
1704                                              long double *lhs, _Quad rhs,
1705                                              int flag);
1706 long double __kmpc_atomic_float10_div_cpt_fp(ident_t *id_ref, int gtid,
1707                                              long double *lhs, _Quad rhs,
1708                                              int flag);
1709 
1710 char __kmpc_atomic_fixed1_sub_cpt_rev_fp(ident_t *id_ref, int gtid, char *lhs,
1711                                          _Quad rhs, int flag);
1712 unsigned char __kmpc_atomic_fixed1u_sub_cpt_rev_fp(ident_t *id_ref, int gtid,
1713                                                    unsigned char *lhs,
1714                                                    _Quad rhs, int flag);
1715 char __kmpc_atomic_fixed1_div_cpt_rev_fp(ident_t *id_ref, int gtid, char *lhs,
1716                                          _Quad rhs, int flag);
1717 unsigned char __kmpc_atomic_fixed1u_div_cpt_rev_fp(ident_t *id_ref, int gtid,
1718                                                    unsigned char *lhs,
1719                                                    _Quad rhs, int flag);
1720 short __kmpc_atomic_fixed2_sub_cpt_rev_fp(ident_t *id_ref, int gtid, short *lhs,
1721                                           _Quad rhs, int flag);
1722 unsigned short __kmpc_atomic_fixed2u_sub_cpt_rev_fp(ident_t *id_ref, int gtid,
1723                                                     unsigned short *lhs,
1724                                                     _Quad rhs, int flag);
1725 short __kmpc_atomic_fixed2_div_cpt_rev_fp(ident_t *id_ref, int gtid, short *lhs,
1726                                           _Quad rhs, int flag);
1727 unsigned short __kmpc_atomic_fixed2u_div_cpt_rev_fp(ident_t *id_ref, int gtid,
1728                                                     unsigned short *lhs,
1729                                                     _Quad rhs, int flag);
1730 kmp_int32 __kmpc_atomic_fixed4_sub_cpt_rev_fp(ident_t *id_ref, int gtid,
1731                                               kmp_int32 *lhs, _Quad rhs,
1732                                               int flag);
1733 kmp_uint32 __kmpc_atomic_fixed4u_sub_cpt_rev_fp(ident_t *id_ref, int gtid,
1734                                                 kmp_uint32 *lhs, _Quad rhs,
1735                                                 int flag);
1736 kmp_int32 __kmpc_atomic_fixed4_div_cpt_rev_fp(ident_t *id_ref, int gtid,
1737                                               kmp_int32 *lhs, _Quad rhs,
1738                                               int flag);
1739 kmp_uint32 __kmpc_atomic_fixed4u_div_cpt_rev_fp(ident_t *id_ref, int gtid,
1740                                                 kmp_uint32 *lhs, _Quad rhs,
1741                                                 int flag);
1742 kmp_int64 __kmpc_atomic_fixed8_sub_cpt_rev_fp(ident_t *id_ref, int gtid,
1743                                               kmp_int64 *lhs, _Quad rhs,
1744                                               int flag);
1745 kmp_uint64 __kmpc_atomic_fixed8u_sub_cpt_rev_fp(ident_t *id_ref, int gtid,
1746                                                 kmp_uint64 *lhs, _Quad rhs,
1747                                                 int flag);
1748 kmp_int64 __kmpc_atomic_fixed8_div_cpt_rev_fp(ident_t *id_ref, int gtid,
1749                                               kmp_int64 *lhs, _Quad rhs,
1750                                               int flag);
1751 kmp_uint64 __kmpc_atomic_fixed8u_div_cpt_rev_fp(ident_t *id_ref, int gtid,
1752                                                 kmp_uint64 *lhs, _Quad rhs,
1753                                                 int flag);
1754 float __kmpc_atomic_float4_sub_cpt_rev_fp(ident_t *id_ref, int gtid, float *lhs,
1755                                           _Quad rhs, int flag);
1756 float __kmpc_atomic_float4_div_cpt_rev_fp(ident_t *id_ref, int gtid, float *lhs,
1757                                           _Quad rhs, int flag);
1758 double __kmpc_atomic_float8_sub_cpt_rev_fp(ident_t *id_ref, int gtid,
1759                                            double *lhs, _Quad rhs, int flag);
1760 double __kmpc_atomic_float8_div_cpt_rev_fp(ident_t *id_ref, int gtid,
1761                                            double *lhs, _Quad rhs, int flag);
1762 long double __kmpc_atomic_float10_sub_cpt_rev_fp(ident_t *id_ref, int gtid,
1763                                                  long double *lhs, _Quad rhs,
1764                                                  int flag);
1765 long double __kmpc_atomic_float10_div_cpt_rev_fp(ident_t *id_ref, int gtid,
1766                                                  long double *lhs, _Quad rhs,
1767                                                  int flag);
1768 
1769 #endif // KMP_HAVE_QUAD
1770 
1771 // End of OpenMP 4.0 capture
1772 
1773 // OpenMP 5.1 compare and swap
1774 /*
1775     __kmpc_atomic_bool_1_cas
1776     __kmpc_atomic_bool_2_cas
1777     __kmpc_atomic_bool_4_cas
1778     __kmpc_atomic_bool_8_cas
1779     __kmpc_atomic_val_1_cas
1780     __kmpc_atomic_val_2_cas
1781     __kmpc_atomic_val_4_cas
1782     __kmpc_atomic_val_8_cas
1783     __kmpc_atomic_bool_1_cas_cpt
1784     __kmpc_atomic_bool_2_cas_cpt
1785     __kmpc_atomic_bool_4_cas_cpt
1786     __kmpc_atomic_bool_8_cas_cpt
1787     __kmpc_atomic_val_1_cas_cpt
1788     __kmpc_atomic_val_2_cas_cpt
1789     __kmpc_atomic_val_4_cas_cpt
1790     __kmpc_atomic_val_8_cas_cpt
1791 */
1792 // In all interfaces of CAS (Compare And Swap):
1793 // r is the boolean result of comparison
1794 // x is memory location to operate on
1795 // e is expected (old) value
1796 // d is desired (new) value
1797 // pv is pointer to captured value v whose location may coincide with e
1798 
1799 // { r = x == e; if(r) { x = d; } }
1800 // functions return result of comparison
1801 bool __kmpc_atomic_bool_1_cas(ident_t *loc, int gtid, char *x, char e, char d);
1802 bool __kmpc_atomic_bool_2_cas(ident_t *loc, int gtid, short *x, short e,
1803                               short d);
1804 bool __kmpc_atomic_bool_4_cas(ident_t *loc, int gtid, kmp_int32 *x, kmp_int32 e,
1805                               kmp_int32 d);
1806 bool __kmpc_atomic_bool_8_cas(ident_t *loc, int gtid, kmp_int64 *x, kmp_int64 e,
1807                               kmp_int64 d);
1808 
1809 // { v = x; if (x == e) { x = d; } }
1810 // functions return old value
1811 char __kmpc_atomic_val_1_cas(ident_t *loc, int gtid, char *x, char e, char d);
1812 short __kmpc_atomic_val_2_cas(ident_t *loc, int gtid, short *x, short e,
1813                               short d);
1814 kmp_int32 __kmpc_atomic_val_4_cas(ident_t *loc, int gtid, kmp_int32 *x,
1815                                   kmp_int32 e, kmp_int32 d);
1816 kmp_int64 __kmpc_atomic_val_8_cas(ident_t *loc, int gtid, kmp_int64 *x,
1817                                   kmp_int64 e, kmp_int64 d);
1818 
1819 // { r = x == e; if(r) { x = d; } else { v = x; } }
1820 // v gets old value if comparison failed, untouched otherwise
1821 // functions return result of comparison
1822 bool __kmpc_atomic_bool_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
1823                                   char d, char *pv);
1824 bool __kmpc_atomic_bool_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
1825                                   short d, short *pv);
1826 bool __kmpc_atomic_bool_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
1827                                   kmp_int32 e, kmp_int32 d, kmp_int32 *pv);
1828 bool __kmpc_atomic_bool_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
1829                                   kmp_int64 e, kmp_int64 d, kmp_int64 *pv);
1830 
1831 // { if (x == e) { x = d; }; v = x; }
1832 // v gets old value if comparison failed, new value otherwise
1833 // functions return old value
1834 char __kmpc_atomic_val_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
1835                                  char d, char *pv);
1836 short __kmpc_atomic_val_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
1837                                   short d, short *pv);
1838 kmp_int32 __kmpc_atomic_val_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
1839                                       kmp_int32 e, kmp_int32 d, kmp_int32 *pv);
1840 kmp_int64 __kmpc_atomic_val_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
1841                                       kmp_int64 e, kmp_int64 d, kmp_int64 *pv);
1842 
1843 // End OpenMP 5.1 compare + capture
1844 
1845 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1846 
1847 /* ------------------------------------------------------------------------ */
1848 
1849 #ifdef __cplusplus
1850 } // extern "C"
1851 #endif
1852 
1853 #endif /* KMP_ATOMIC_H */
1854 
1855 // end of file
1856