xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_atomic.cpp (revision 7c20397b724a55001c2054fa133a768e9d06eb1c)
1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp_atomic.h"
14 #include "kmp.h" // TRUE, asm routines prototypes
15 
16 typedef unsigned char uchar;
17 typedef unsigned short ushort;
18 
19 /*!
20 @defgroup ATOMIC_OPS Atomic Operations
21 These functions are used for implementing the many different varieties of atomic
22 operations.
23 
24 The compiler is at liberty to inline atomic operations that are naturally
25 supported by the target architecture. For instance on IA-32 architecture an
26 atomic like this can be inlined
27 @code
28 static int s = 0;
29 #pragma omp atomic
30     s++;
31 @endcode
32 using the single instruction: `lock; incl s`
33 
34 However the runtime does provide entrypoints for these operations to support
35 compilers that choose not to inline them. (For instance,
36 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
37 
38 The names of the functions are encoded by using the data type name and the
39 operation name, as in these tables.
40 
41 Data Type  | Data type encoding
42 -----------|---------------
43 int8_t     | `fixed1`
44 uint8_t    | `fixed1u`
45 int16_t    | `fixed2`
46 uint16_t   | `fixed2u`
47 int32_t    | `fixed4`
48 uint32_t   | `fixed4u`
49 int32_t    | `fixed8`
50 uint32_t   | `fixed8u`
51 float      | `float4`
52 double     | `float8`
53 float 10 (8087 eighty bit float)  | `float10`
54 complex<float>   |  `cmplx4`
55 complex<double>  | `cmplx8`
56 complex<float10> | `cmplx10`
57 <br>
58 
59 Operation | Operation encoding
60 ----------|-------------------
61 + | add
62 - | sub
63 \* | mul
64 / | div
65 & | andb
66 << | shl
67 \>\> | shr
68 \| | orb
69 ^  | xor
70 && | andl
71 \|\| | orl
72 maximum | max
73 minimum | min
74 .eqv.   | eqv
75 .neqv.  | neqv
76 
77 <br>
78 For non-commutative operations, `_rev` can also be added for the reversed
79 operation. For the functions that capture the result, the suffix `_cpt` is
80 added.
81 
82 Update Functions
83 ================
84 The general form of an atomic function that just performs an update (without a
85 `capture`)
86 @code
87 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
88 lhs, TYPE rhs );
89 @endcode
90 @param ident_t  a pointer to source location
91 @param gtid  the global thread id
92 @param lhs   a pointer to the left operand
93 @param rhs   the right operand
94 
95 `capture` functions
96 ===================
97 The capture functions perform an atomic update and return a result, which is
98 either the value before the capture, or that after. They take an additional
99 argument to determine which result is returned.
100 Their general form is therefore
101 @code
102 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
103 lhs, TYPE rhs, int flag );
104 @endcode
105 @param ident_t  a pointer to source location
106 @param gtid  the global thread id
107 @param lhs   a pointer to the left operand
108 @param rhs   the right operand
109 @param flag  one if the result is to be captured *after* the operation, zero if
110 captured *before*.
111 
112 The one set of exceptions to this is the `complex<float>` type where the value
113 is not returned, rather an extra argument pointer is passed.
114 
115 They look like
116 @code
117 void __kmpc_atomic_cmplx4_<op>_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 *
118 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
119 @endcode
120 
121 Read and Write Operations
122 =========================
123 The OpenMP<sup>*</sup> standard now supports atomic operations that simply
124 ensure that the value is read or written atomically, with no modification
125 performed. In many cases on IA-32 architecture these operations can be inlined
126 since the architecture guarantees that no tearing occurs on aligned objects
127 accessed with a single memory operation of up to 64 bits in size.
128 
129 The general form of the read operations is
130 @code
131 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
132 @endcode
133 
134 For the write operations the form is
135 @code
136 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs
137 );
138 @endcode
139 
140 Full list of functions
141 ======================
142 This leads to the generation of 376 atomic functions, as follows.
143 
144 Functions for integers
145 ---------------------
146 There are versions here for integers of size 1,2,4 and 8 bytes both signed and
147 unsigned (where that matters).
148 @code
149     __kmpc_atomic_fixed1_add
150     __kmpc_atomic_fixed1_add_cpt
151     __kmpc_atomic_fixed1_add_fp
152     __kmpc_atomic_fixed1_andb
153     __kmpc_atomic_fixed1_andb_cpt
154     __kmpc_atomic_fixed1_andl
155     __kmpc_atomic_fixed1_andl_cpt
156     __kmpc_atomic_fixed1_div
157     __kmpc_atomic_fixed1_div_cpt
158     __kmpc_atomic_fixed1_div_cpt_rev
159     __kmpc_atomic_fixed1_div_float8
160     __kmpc_atomic_fixed1_div_fp
161     __kmpc_atomic_fixed1_div_rev
162     __kmpc_atomic_fixed1_eqv
163     __kmpc_atomic_fixed1_eqv_cpt
164     __kmpc_atomic_fixed1_max
165     __kmpc_atomic_fixed1_max_cpt
166     __kmpc_atomic_fixed1_min
167     __kmpc_atomic_fixed1_min_cpt
168     __kmpc_atomic_fixed1_mul
169     __kmpc_atomic_fixed1_mul_cpt
170     __kmpc_atomic_fixed1_mul_float8
171     __kmpc_atomic_fixed1_mul_fp
172     __kmpc_atomic_fixed1_neqv
173     __kmpc_atomic_fixed1_neqv_cpt
174     __kmpc_atomic_fixed1_orb
175     __kmpc_atomic_fixed1_orb_cpt
176     __kmpc_atomic_fixed1_orl
177     __kmpc_atomic_fixed1_orl_cpt
178     __kmpc_atomic_fixed1_rd
179     __kmpc_atomic_fixed1_shl
180     __kmpc_atomic_fixed1_shl_cpt
181     __kmpc_atomic_fixed1_shl_cpt_rev
182     __kmpc_atomic_fixed1_shl_rev
183     __kmpc_atomic_fixed1_shr
184     __kmpc_atomic_fixed1_shr_cpt
185     __kmpc_atomic_fixed1_shr_cpt_rev
186     __kmpc_atomic_fixed1_shr_rev
187     __kmpc_atomic_fixed1_sub
188     __kmpc_atomic_fixed1_sub_cpt
189     __kmpc_atomic_fixed1_sub_cpt_rev
190     __kmpc_atomic_fixed1_sub_fp
191     __kmpc_atomic_fixed1_sub_rev
192     __kmpc_atomic_fixed1_swp
193     __kmpc_atomic_fixed1_wr
194     __kmpc_atomic_fixed1_xor
195     __kmpc_atomic_fixed1_xor_cpt
196     __kmpc_atomic_fixed1u_add_fp
197     __kmpc_atomic_fixed1u_sub_fp
198     __kmpc_atomic_fixed1u_mul_fp
199     __kmpc_atomic_fixed1u_div
200     __kmpc_atomic_fixed1u_div_cpt
201     __kmpc_atomic_fixed1u_div_cpt_rev
202     __kmpc_atomic_fixed1u_div_fp
203     __kmpc_atomic_fixed1u_div_rev
204     __kmpc_atomic_fixed1u_shr
205     __kmpc_atomic_fixed1u_shr_cpt
206     __kmpc_atomic_fixed1u_shr_cpt_rev
207     __kmpc_atomic_fixed1u_shr_rev
208     __kmpc_atomic_fixed2_add
209     __kmpc_atomic_fixed2_add_cpt
210     __kmpc_atomic_fixed2_add_fp
211     __kmpc_atomic_fixed2_andb
212     __kmpc_atomic_fixed2_andb_cpt
213     __kmpc_atomic_fixed2_andl
214     __kmpc_atomic_fixed2_andl_cpt
215     __kmpc_atomic_fixed2_div
216     __kmpc_atomic_fixed2_div_cpt
217     __kmpc_atomic_fixed2_div_cpt_rev
218     __kmpc_atomic_fixed2_div_float8
219     __kmpc_atomic_fixed2_div_fp
220     __kmpc_atomic_fixed2_div_rev
221     __kmpc_atomic_fixed2_eqv
222     __kmpc_atomic_fixed2_eqv_cpt
223     __kmpc_atomic_fixed2_max
224     __kmpc_atomic_fixed2_max_cpt
225     __kmpc_atomic_fixed2_min
226     __kmpc_atomic_fixed2_min_cpt
227     __kmpc_atomic_fixed2_mul
228     __kmpc_atomic_fixed2_mul_cpt
229     __kmpc_atomic_fixed2_mul_float8
230     __kmpc_atomic_fixed2_mul_fp
231     __kmpc_atomic_fixed2_neqv
232     __kmpc_atomic_fixed2_neqv_cpt
233     __kmpc_atomic_fixed2_orb
234     __kmpc_atomic_fixed2_orb_cpt
235     __kmpc_atomic_fixed2_orl
236     __kmpc_atomic_fixed2_orl_cpt
237     __kmpc_atomic_fixed2_rd
238     __kmpc_atomic_fixed2_shl
239     __kmpc_atomic_fixed2_shl_cpt
240     __kmpc_atomic_fixed2_shl_cpt_rev
241     __kmpc_atomic_fixed2_shl_rev
242     __kmpc_atomic_fixed2_shr
243     __kmpc_atomic_fixed2_shr_cpt
244     __kmpc_atomic_fixed2_shr_cpt_rev
245     __kmpc_atomic_fixed2_shr_rev
246     __kmpc_atomic_fixed2_sub
247     __kmpc_atomic_fixed2_sub_cpt
248     __kmpc_atomic_fixed2_sub_cpt_rev
249     __kmpc_atomic_fixed2_sub_fp
250     __kmpc_atomic_fixed2_sub_rev
251     __kmpc_atomic_fixed2_swp
252     __kmpc_atomic_fixed2_wr
253     __kmpc_atomic_fixed2_xor
254     __kmpc_atomic_fixed2_xor_cpt
255     __kmpc_atomic_fixed2u_add_fp
256     __kmpc_atomic_fixed2u_sub_fp
257     __kmpc_atomic_fixed2u_mul_fp
258     __kmpc_atomic_fixed2u_div
259     __kmpc_atomic_fixed2u_div_cpt
260     __kmpc_atomic_fixed2u_div_cpt_rev
261     __kmpc_atomic_fixed2u_div_fp
262     __kmpc_atomic_fixed2u_div_rev
263     __kmpc_atomic_fixed2u_shr
264     __kmpc_atomic_fixed2u_shr_cpt
265     __kmpc_atomic_fixed2u_shr_cpt_rev
266     __kmpc_atomic_fixed2u_shr_rev
267     __kmpc_atomic_fixed4_add
268     __kmpc_atomic_fixed4_add_cpt
269     __kmpc_atomic_fixed4_add_fp
270     __kmpc_atomic_fixed4_andb
271     __kmpc_atomic_fixed4_andb_cpt
272     __kmpc_atomic_fixed4_andl
273     __kmpc_atomic_fixed4_andl_cpt
274     __kmpc_atomic_fixed4_div
275     __kmpc_atomic_fixed4_div_cpt
276     __kmpc_atomic_fixed4_div_cpt_rev
277     __kmpc_atomic_fixed4_div_float8
278     __kmpc_atomic_fixed4_div_fp
279     __kmpc_atomic_fixed4_div_rev
280     __kmpc_atomic_fixed4_eqv
281     __kmpc_atomic_fixed4_eqv_cpt
282     __kmpc_atomic_fixed4_max
283     __kmpc_atomic_fixed4_max_cpt
284     __kmpc_atomic_fixed4_min
285     __kmpc_atomic_fixed4_min_cpt
286     __kmpc_atomic_fixed4_mul
287     __kmpc_atomic_fixed4_mul_cpt
288     __kmpc_atomic_fixed4_mul_float8
289     __kmpc_atomic_fixed4_mul_fp
290     __kmpc_atomic_fixed4_neqv
291     __kmpc_atomic_fixed4_neqv_cpt
292     __kmpc_atomic_fixed4_orb
293     __kmpc_atomic_fixed4_orb_cpt
294     __kmpc_atomic_fixed4_orl
295     __kmpc_atomic_fixed4_orl_cpt
296     __kmpc_atomic_fixed4_rd
297     __kmpc_atomic_fixed4_shl
298     __kmpc_atomic_fixed4_shl_cpt
299     __kmpc_atomic_fixed4_shl_cpt_rev
300     __kmpc_atomic_fixed4_shl_rev
301     __kmpc_atomic_fixed4_shr
302     __kmpc_atomic_fixed4_shr_cpt
303     __kmpc_atomic_fixed4_shr_cpt_rev
304     __kmpc_atomic_fixed4_shr_rev
305     __kmpc_atomic_fixed4_sub
306     __kmpc_atomic_fixed4_sub_cpt
307     __kmpc_atomic_fixed4_sub_cpt_rev
308     __kmpc_atomic_fixed4_sub_fp
309     __kmpc_atomic_fixed4_sub_rev
310     __kmpc_atomic_fixed4_swp
311     __kmpc_atomic_fixed4_wr
312     __kmpc_atomic_fixed4_xor
313     __kmpc_atomic_fixed4_xor_cpt
314     __kmpc_atomic_fixed4u_add_fp
315     __kmpc_atomic_fixed4u_sub_fp
316     __kmpc_atomic_fixed4u_mul_fp
317     __kmpc_atomic_fixed4u_div
318     __kmpc_atomic_fixed4u_div_cpt
319     __kmpc_atomic_fixed4u_div_cpt_rev
320     __kmpc_atomic_fixed4u_div_fp
321     __kmpc_atomic_fixed4u_div_rev
322     __kmpc_atomic_fixed4u_shr
323     __kmpc_atomic_fixed4u_shr_cpt
324     __kmpc_atomic_fixed4u_shr_cpt_rev
325     __kmpc_atomic_fixed4u_shr_rev
326     __kmpc_atomic_fixed8_add
327     __kmpc_atomic_fixed8_add_cpt
328     __kmpc_atomic_fixed8_add_fp
329     __kmpc_atomic_fixed8_andb
330     __kmpc_atomic_fixed8_andb_cpt
331     __kmpc_atomic_fixed8_andl
332     __kmpc_atomic_fixed8_andl_cpt
333     __kmpc_atomic_fixed8_div
334     __kmpc_atomic_fixed8_div_cpt
335     __kmpc_atomic_fixed8_div_cpt_rev
336     __kmpc_atomic_fixed8_div_float8
337     __kmpc_atomic_fixed8_div_fp
338     __kmpc_atomic_fixed8_div_rev
339     __kmpc_atomic_fixed8_eqv
340     __kmpc_atomic_fixed8_eqv_cpt
341     __kmpc_atomic_fixed8_max
342     __kmpc_atomic_fixed8_max_cpt
343     __kmpc_atomic_fixed8_min
344     __kmpc_atomic_fixed8_min_cpt
345     __kmpc_atomic_fixed8_mul
346     __kmpc_atomic_fixed8_mul_cpt
347     __kmpc_atomic_fixed8_mul_float8
348     __kmpc_atomic_fixed8_mul_fp
349     __kmpc_atomic_fixed8_neqv
350     __kmpc_atomic_fixed8_neqv_cpt
351     __kmpc_atomic_fixed8_orb
352     __kmpc_atomic_fixed8_orb_cpt
353     __kmpc_atomic_fixed8_orl
354     __kmpc_atomic_fixed8_orl_cpt
355     __kmpc_atomic_fixed8_rd
356     __kmpc_atomic_fixed8_shl
357     __kmpc_atomic_fixed8_shl_cpt
358     __kmpc_atomic_fixed8_shl_cpt_rev
359     __kmpc_atomic_fixed8_shl_rev
360     __kmpc_atomic_fixed8_shr
361     __kmpc_atomic_fixed8_shr_cpt
362     __kmpc_atomic_fixed8_shr_cpt_rev
363     __kmpc_atomic_fixed8_shr_rev
364     __kmpc_atomic_fixed8_sub
365     __kmpc_atomic_fixed8_sub_cpt
366     __kmpc_atomic_fixed8_sub_cpt_rev
367     __kmpc_atomic_fixed8_sub_fp
368     __kmpc_atomic_fixed8_sub_rev
369     __kmpc_atomic_fixed8_swp
370     __kmpc_atomic_fixed8_wr
371     __kmpc_atomic_fixed8_xor
372     __kmpc_atomic_fixed8_xor_cpt
373     __kmpc_atomic_fixed8u_add_fp
374     __kmpc_atomic_fixed8u_sub_fp
375     __kmpc_atomic_fixed8u_mul_fp
376     __kmpc_atomic_fixed8u_div
377     __kmpc_atomic_fixed8u_div_cpt
378     __kmpc_atomic_fixed8u_div_cpt_rev
379     __kmpc_atomic_fixed8u_div_fp
380     __kmpc_atomic_fixed8u_div_rev
381     __kmpc_atomic_fixed8u_shr
382     __kmpc_atomic_fixed8u_shr_cpt
383     __kmpc_atomic_fixed8u_shr_cpt_rev
384     __kmpc_atomic_fixed8u_shr_rev
385 @endcode
386 
387 Functions for floating point
388 ----------------------------
389 There are versions here for floating point numbers of size 4, 8, 10 and 16
390 bytes. (Ten byte floats are used by X87, but are now rare).
391 @code
392     __kmpc_atomic_float4_add
393     __kmpc_atomic_float4_add_cpt
394     __kmpc_atomic_float4_add_float8
395     __kmpc_atomic_float4_add_fp
396     __kmpc_atomic_float4_div
397     __kmpc_atomic_float4_div_cpt
398     __kmpc_atomic_float4_div_cpt_rev
399     __kmpc_atomic_float4_div_float8
400     __kmpc_atomic_float4_div_fp
401     __kmpc_atomic_float4_div_rev
402     __kmpc_atomic_float4_max
403     __kmpc_atomic_float4_max_cpt
404     __kmpc_atomic_float4_min
405     __kmpc_atomic_float4_min_cpt
406     __kmpc_atomic_float4_mul
407     __kmpc_atomic_float4_mul_cpt
408     __kmpc_atomic_float4_mul_float8
409     __kmpc_atomic_float4_mul_fp
410     __kmpc_atomic_float4_rd
411     __kmpc_atomic_float4_sub
412     __kmpc_atomic_float4_sub_cpt
413     __kmpc_atomic_float4_sub_cpt_rev
414     __kmpc_atomic_float4_sub_float8
415     __kmpc_atomic_float4_sub_fp
416     __kmpc_atomic_float4_sub_rev
417     __kmpc_atomic_float4_swp
418     __kmpc_atomic_float4_wr
419     __kmpc_atomic_float8_add
420     __kmpc_atomic_float8_add_cpt
421     __kmpc_atomic_float8_add_fp
422     __kmpc_atomic_float8_div
423     __kmpc_atomic_float8_div_cpt
424     __kmpc_atomic_float8_div_cpt_rev
425     __kmpc_atomic_float8_div_fp
426     __kmpc_atomic_float8_div_rev
427     __kmpc_atomic_float8_max
428     __kmpc_atomic_float8_max_cpt
429     __kmpc_atomic_float8_min
430     __kmpc_atomic_float8_min_cpt
431     __kmpc_atomic_float8_mul
432     __kmpc_atomic_float8_mul_cpt
433     __kmpc_atomic_float8_mul_fp
434     __kmpc_atomic_float8_rd
435     __kmpc_atomic_float8_sub
436     __kmpc_atomic_float8_sub_cpt
437     __kmpc_atomic_float8_sub_cpt_rev
438     __kmpc_atomic_float8_sub_fp
439     __kmpc_atomic_float8_sub_rev
440     __kmpc_atomic_float8_swp
441     __kmpc_atomic_float8_wr
442     __kmpc_atomic_float10_add
443     __kmpc_atomic_float10_add_cpt
444     __kmpc_atomic_float10_add_fp
445     __kmpc_atomic_float10_div
446     __kmpc_atomic_float10_div_cpt
447     __kmpc_atomic_float10_div_cpt_rev
448     __kmpc_atomic_float10_div_fp
449     __kmpc_atomic_float10_div_rev
450     __kmpc_atomic_float10_mul
451     __kmpc_atomic_float10_mul_cpt
452     __kmpc_atomic_float10_mul_fp
453     __kmpc_atomic_float10_rd
454     __kmpc_atomic_float10_sub
455     __kmpc_atomic_float10_sub_cpt
456     __kmpc_atomic_float10_sub_cpt_rev
457     __kmpc_atomic_float10_sub_fp
458     __kmpc_atomic_float10_sub_rev
459     __kmpc_atomic_float10_swp
460     __kmpc_atomic_float10_wr
461     __kmpc_atomic_float16_add
462     __kmpc_atomic_float16_add_cpt
463     __kmpc_atomic_float16_div
464     __kmpc_atomic_float16_div_cpt
465     __kmpc_atomic_float16_div_cpt_rev
466     __kmpc_atomic_float16_div_rev
467     __kmpc_atomic_float16_max
468     __kmpc_atomic_float16_max_cpt
469     __kmpc_atomic_float16_min
470     __kmpc_atomic_float16_min_cpt
471     __kmpc_atomic_float16_mul
472     __kmpc_atomic_float16_mul_cpt
473     __kmpc_atomic_float16_rd
474     __kmpc_atomic_float16_sub
475     __kmpc_atomic_float16_sub_cpt
476     __kmpc_atomic_float16_sub_cpt_rev
477     __kmpc_atomic_float16_sub_rev
478     __kmpc_atomic_float16_swp
479     __kmpc_atomic_float16_wr
480 @endcode
481 
482 Functions for Complex types
483 ---------------------------
484 Functions for complex types whose component floating point variables are of size
485 4,8,10 or 16 bytes. The names here are based on the size of the component float,
486 *not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an
487 operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
488 
489 @code
490     __kmpc_atomic_cmplx4_add
491     __kmpc_atomic_cmplx4_add_cmplx8
492     __kmpc_atomic_cmplx4_add_cpt
493     __kmpc_atomic_cmplx4_div
494     __kmpc_atomic_cmplx4_div_cmplx8
495     __kmpc_atomic_cmplx4_div_cpt
496     __kmpc_atomic_cmplx4_div_cpt_rev
497     __kmpc_atomic_cmplx4_div_rev
498     __kmpc_atomic_cmplx4_mul
499     __kmpc_atomic_cmplx4_mul_cmplx8
500     __kmpc_atomic_cmplx4_mul_cpt
501     __kmpc_atomic_cmplx4_rd
502     __kmpc_atomic_cmplx4_sub
503     __kmpc_atomic_cmplx4_sub_cmplx8
504     __kmpc_atomic_cmplx4_sub_cpt
505     __kmpc_atomic_cmplx4_sub_cpt_rev
506     __kmpc_atomic_cmplx4_sub_rev
507     __kmpc_atomic_cmplx4_swp
508     __kmpc_atomic_cmplx4_wr
509     __kmpc_atomic_cmplx8_add
510     __kmpc_atomic_cmplx8_add_cpt
511     __kmpc_atomic_cmplx8_div
512     __kmpc_atomic_cmplx8_div_cpt
513     __kmpc_atomic_cmplx8_div_cpt_rev
514     __kmpc_atomic_cmplx8_div_rev
515     __kmpc_atomic_cmplx8_mul
516     __kmpc_atomic_cmplx8_mul_cpt
517     __kmpc_atomic_cmplx8_rd
518     __kmpc_atomic_cmplx8_sub
519     __kmpc_atomic_cmplx8_sub_cpt
520     __kmpc_atomic_cmplx8_sub_cpt_rev
521     __kmpc_atomic_cmplx8_sub_rev
522     __kmpc_atomic_cmplx8_swp
523     __kmpc_atomic_cmplx8_wr
524     __kmpc_atomic_cmplx10_add
525     __kmpc_atomic_cmplx10_add_cpt
526     __kmpc_atomic_cmplx10_div
527     __kmpc_atomic_cmplx10_div_cpt
528     __kmpc_atomic_cmplx10_div_cpt_rev
529     __kmpc_atomic_cmplx10_div_rev
530     __kmpc_atomic_cmplx10_mul
531     __kmpc_atomic_cmplx10_mul_cpt
532     __kmpc_atomic_cmplx10_rd
533     __kmpc_atomic_cmplx10_sub
534     __kmpc_atomic_cmplx10_sub_cpt
535     __kmpc_atomic_cmplx10_sub_cpt_rev
536     __kmpc_atomic_cmplx10_sub_rev
537     __kmpc_atomic_cmplx10_swp
538     __kmpc_atomic_cmplx10_wr
539     __kmpc_atomic_cmplx16_add
540     __kmpc_atomic_cmplx16_add_cpt
541     __kmpc_atomic_cmplx16_div
542     __kmpc_atomic_cmplx16_div_cpt
543     __kmpc_atomic_cmplx16_div_cpt_rev
544     __kmpc_atomic_cmplx16_div_rev
545     __kmpc_atomic_cmplx16_mul
546     __kmpc_atomic_cmplx16_mul_cpt
547     __kmpc_atomic_cmplx16_rd
548     __kmpc_atomic_cmplx16_sub
549     __kmpc_atomic_cmplx16_sub_cpt
550     __kmpc_atomic_cmplx16_sub_cpt_rev
551     __kmpc_atomic_cmplx16_swp
552     __kmpc_atomic_cmplx16_wr
553 @endcode
554 */
555 
556 /*!
557 @ingroup ATOMIC_OPS
558 @{
559 */
560 
561 /*
562  * Global vars
563  */
564 
565 #ifndef KMP_GOMP_COMPAT
566 int __kmp_atomic_mode = 1; // Intel perf
567 #else
568 int __kmp_atomic_mode = 2; // GOMP compatibility
569 #endif /* KMP_GOMP_COMPAT */
570 
571 KMP_ALIGN(128)
572 
573 // Control access to all user coded atomics in Gnu compat mode
574 kmp_atomic_lock_t __kmp_atomic_lock;
575 // Control access to all user coded atomics for 1-byte fixed data types
576 kmp_atomic_lock_t __kmp_atomic_lock_1i;
577 // Control access to all user coded atomics for 2-byte fixed data types
578 kmp_atomic_lock_t __kmp_atomic_lock_2i;
579 // Control access to all user coded atomics for 4-byte fixed data types
580 kmp_atomic_lock_t __kmp_atomic_lock_4i;
581 // Control access to all user coded atomics for kmp_real32 data type
582 kmp_atomic_lock_t __kmp_atomic_lock_4r;
583 // Control access to all user coded atomics for 8-byte fixed data types
584 kmp_atomic_lock_t __kmp_atomic_lock_8i;
585 // Control access to all user coded atomics for kmp_real64 data type
586 kmp_atomic_lock_t __kmp_atomic_lock_8r;
587 // Control access to all user coded atomics for complex byte data type
588 kmp_atomic_lock_t __kmp_atomic_lock_8c;
589 // Control access to all user coded atomics for long double data type
590 kmp_atomic_lock_t __kmp_atomic_lock_10r;
591 // Control access to all user coded atomics for _Quad data type
592 kmp_atomic_lock_t __kmp_atomic_lock_16r;
593 // Control access to all user coded atomics for double complex data type
594 kmp_atomic_lock_t __kmp_atomic_lock_16c;
595 // Control access to all user coded atomics for long double complex type
596 kmp_atomic_lock_t __kmp_atomic_lock_20c;
597 // Control access to all user coded atomics for _Quad complex data type
598 kmp_atomic_lock_t __kmp_atomic_lock_32c;
599 
600 /* 2007-03-02:
601    Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602    on *_32 and *_32e. This is just a temporary workaround for the problem. It
603    seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604    in assembler language. */
605 #define KMP_ATOMIC_VOLATILE volatile
606 
607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608 
609 static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610   return lhs.q + rhs.q;
611 }
612 static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613   return lhs.q - rhs.q;
614 }
615 static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616   return lhs.q * rhs.q;
617 }
618 static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619   return lhs.q / rhs.q;
620 }
621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622   return lhs.q < rhs.q;
623 }
624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625   return lhs.q > rhs.q;
626 }
627 
628 static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629   return lhs.q + rhs.q;
630 }
631 static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632   return lhs.q - rhs.q;
633 }
634 static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635   return lhs.q * rhs.q;
636 }
637 static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638   return lhs.q / rhs.q;
639 }
640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641   return lhs.q < rhs.q;
642 }
643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644   return lhs.q > rhs.q;
645 }
646 
647 static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
648                                           kmp_cmplx128_a4_t &rhs) {
649   return lhs.q + rhs.q;
650 }
651 static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
652                                           kmp_cmplx128_a4_t &rhs) {
653   return lhs.q - rhs.q;
654 }
655 static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
656                                           kmp_cmplx128_a4_t &rhs) {
657   return lhs.q * rhs.q;
658 }
659 static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
660                                           kmp_cmplx128_a4_t &rhs) {
661   return lhs.q / rhs.q;
662 }
663 
664 static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
665                                            kmp_cmplx128_a16_t &rhs) {
666   return lhs.q + rhs.q;
667 }
668 static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
669                                            kmp_cmplx128_a16_t &rhs) {
670   return lhs.q - rhs.q;
671 }
672 static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
673                                            kmp_cmplx128_a16_t &rhs) {
674   return lhs.q * rhs.q;
675 }
676 static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
677                                            kmp_cmplx128_a16_t &rhs) {
678   return lhs.q / rhs.q;
679 }
680 
681 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
682 
683 // ATOMIC implementation routines -----------------------------------------
684 // One routine for each operation and operand type.
685 // All routines declarations looks like
686 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
687 
688 #define KMP_CHECK_GTID                                                         \
689   if (gtid == KMP_GTID_UNKNOWN) {                                              \
690     gtid = __kmp_entry_gtid();                                                 \
691   } // check and get gtid when needed
692 
693 // Beginning of a definition (provides name, parameters, gebug trace)
694 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
695 //     fixed)
696 //     OP_ID   - operation identifier (add, sub, mul, ...)
697 //     TYPE    - operands' type
698 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE)                           \
699   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
700                                              TYPE *lhs, TYPE rhs) {            \
701     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
702     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
703 
704 // ------------------------------------------------------------------------
705 // Lock variables used for critical sections for various size operands
706 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
707 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
708 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
709 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
710 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
711 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
712 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
713 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
714 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
715 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
716 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
717 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
718 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
719 
720 // ------------------------------------------------------------------------
721 // Operation on *lhs, rhs bound by critical section
722 //     OP     - operator (it's supposed to contain an assignment)
723 //     LCK_ID - lock identifier
724 // Note: don't check gtid as it should always be valid
725 // 1, 2-byte - expect valid parameter, other - check before this macro
726 #define OP_CRITICAL(OP, LCK_ID)                                                \
727   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
728                                                                                \
729   (*lhs) OP(rhs);                                                              \
730                                                                                \
731   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
732 
733 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID)                                   \
734   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
735   (*lhs) = (TYPE)((*lhs)OP rhs);                                               \
736   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
737 
738 // ------------------------------------------------------------------------
739 // For GNU compatibility, we may need to use a critical section,
740 // even though it is not required by the ISA.
741 //
742 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
743 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
744 // critical section.  On Intel(R) 64, all atomic operations are done with fetch
745 // and add or compare and exchange.  Therefore, the FLAG parameter to this
746 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
747 // require a critical section, where we predict that they will be implemented
748 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
749 //
750 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
751 // the FLAG parameter should always be 1.  If we know that we will be using
752 // a critical section, then we want to make certain that we use the generic
753 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
754 // locks that are specialized based upon the size or type of the data.
755 //
756 // If FLAG is 0, then we are relying on dead code elimination by the build
757 // compiler to get rid of the useless block of code, and save a needless
758 // branch at runtime.
759 
760 #ifdef KMP_GOMP_COMPAT
761 #define OP_GOMP_CRITICAL(OP, FLAG)                                             \
762   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
763     KMP_CHECK_GTID;                                                            \
764     OP_CRITICAL(OP, 0);                                                        \
765     return;                                                                    \
766   }
767 
768 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)                                \
769   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
770     KMP_CHECK_GTID;                                                            \
771     OP_UPDATE_CRITICAL(TYPE, OP, 0);                                           \
772     return;                                                                    \
773   }
774 #else
775 #define OP_GOMP_CRITICAL(OP, FLAG)
776 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
777 #endif /* KMP_GOMP_COMPAT */
778 
779 #if KMP_MIC
780 #define KMP_DO_PAUSE _mm_delay_32(1)
781 #else
782 #define KMP_DO_PAUSE
783 #endif /* KMP_MIC */
784 
785 // ------------------------------------------------------------------------
786 // Operation on *lhs, rhs using "compare_and_store" routine
787 //     TYPE    - operands' type
788 //     BITS    - size in bits, used to distinguish low level calls
789 //     OP      - operator
790 #define OP_CMPXCHG(TYPE, BITS, OP)                                             \
791   {                                                                            \
792     TYPE old_value, new_value;                                                 \
793     old_value = *(TYPE volatile *)lhs;                                         \
794     new_value = (TYPE)(old_value OP rhs);                                      \
795     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
796         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
797         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
798       KMP_DO_PAUSE;                                                            \
799                                                                                \
800       old_value = *(TYPE volatile *)lhs;                                       \
801       new_value = (TYPE)(old_value OP rhs);                                    \
802     }                                                                          \
803   }
804 
805 #if USE_CMPXCHG_FIX
806 // 2007-06-25:
807 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
808 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
809 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
810 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
811 // the workaround.
812 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                  \
813   {                                                                            \
814     struct _sss {                                                              \
815       TYPE cmp;                                                                \
816       kmp_int##BITS *vvv;                                                      \
817     };                                                                         \
818     struct _sss old_value, new_value;                                          \
819     old_value.vvv = (kmp_int##BITS *)&old_value.cmp;                           \
820     new_value.vvv = (kmp_int##BITS *)&new_value.cmp;                           \
821     *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                           \
822     new_value.cmp = (TYPE)(old_value.cmp OP rhs);                              \
823     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
824         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,   \
825         *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) {                      \
826       KMP_DO_PAUSE;                                                            \
827                                                                                \
828       *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                         \
829       new_value.cmp = (TYPE)(old_value.cmp OP rhs);                            \
830     }                                                                          \
831   }
832 // end of the first part of the workaround for C78287
833 #endif // USE_CMPXCHG_FIX
834 
835 #if KMP_OS_WINDOWS && KMP_ARCH_AARCH64
836 // Undo explicit type casts to get MSVC ARM64 to build. Uses
837 // OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG
838 #undef OP_CMPXCHG
839 #define OP_CMPXCHG(TYPE, BITS, OP)                                             \
840   {                                                                            \
841     struct _sss {                                                              \
842       TYPE cmp;                                                                \
843       kmp_int##BITS *vvv;                                                      \
844     };                                                                         \
845     struct _sss old_value, new_value;                                          \
846     old_value.vvv = (kmp_int##BITS *)&old_value.cmp;                           \
847     new_value.vvv = (kmp_int##BITS *)&new_value.cmp;                           \
848     *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                           \
849     new_value.cmp = old_value.cmp OP rhs;                                      \
850     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
851         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,   \
852         *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) {                      \
853       KMP_DO_PAUSE;                                                            \
854                                                                                \
855       *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                         \
856       new_value.cmp = old_value.cmp OP rhs;                                    \
857     }                                                                          \
858   }
859 
860 #undef OP_UPDATE_CRITICAL
861 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID)                                   \
862   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
863   (*lhs) = (*lhs)OP rhs;                                                       \
864   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
865 
866 #endif // KMP_OS_WINDOWS && KMP_ARCH_AARCH64
867 
868 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
869 
870 // ------------------------------------------------------------------------
871 // X86 or X86_64: no alignment problems ====================================
872 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
873                          GOMP_FLAG)                                            \
874   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
875   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
876   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
877   KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                        \
878   }
879 // -------------------------------------------------------------------------
880 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
881                        GOMP_FLAG)                                              \
882   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
883   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
884   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
885   }
886 #if USE_CMPXCHG_FIX
887 // -------------------------------------------------------------------------
888 // workaround for C78287 (complex(kind=4) data type)
889 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
890                                   MASK, GOMP_FLAG)                             \
891   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
892   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
893   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
894   }
895 // end of the second part of the workaround for C78287
896 #endif // USE_CMPXCHG_FIX
897 
898 #else
899 // -------------------------------------------------------------------------
900 // Code for other architectures that don't handle unaligned accesses.
901 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
902                          GOMP_FLAG)                                            \
903   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
904   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
905   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
906     /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */          \
907     KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                      \
908   } else {                                                                     \
909     KMP_CHECK_GTID;                                                            \
910     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
911                        LCK_ID) /* unaligned address - use critical */          \
912   }                                                                            \
913   }
914 // -------------------------------------------------------------------------
915 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
916                        GOMP_FLAG)                                              \
917   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
918   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
919   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
920     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
921   } else {                                                                     \
922     KMP_CHECK_GTID;                                                            \
923     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
924                        LCK_ID) /* unaligned address - use critical */          \
925   }                                                                            \
926   }
927 #if USE_CMPXCHG_FIX
928 // -------------------------------------------------------------------------
929 // workaround for C78287 (complex(kind=4) data type)
930 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
931                                   MASK, GOMP_FLAG)                             \
932   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
933   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
934   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
935     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
936   } else {                                                                     \
937     KMP_CHECK_GTID;                                                            \
938     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
939                        LCK_ID) /* unaligned address - use critical */          \
940   }                                                                            \
941   }
942 // end of the second part of the workaround for C78287
943 #endif // USE_CMPXCHG_FIX
944 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
945 
946 // Routines for ATOMIC 4-byte operands addition and subtraction
947 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
948                  0) // __kmpc_atomic_fixed4_add
949 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
950                  0) // __kmpc_atomic_fixed4_sub
951 
952 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
953                KMP_ARCH_X86) // __kmpc_atomic_float4_add
954 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
955                KMP_ARCH_X86) // __kmpc_atomic_float4_sub
956 
957 // Routines for ATOMIC 8-byte operands addition and subtraction
958 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
959                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
960 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
961                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
962 
963 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
964                KMP_ARCH_X86) // __kmpc_atomic_float8_add
965 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
966                KMP_ARCH_X86) // __kmpc_atomic_float8_sub
967 
968 // ------------------------------------------------------------------------
969 // Entries definition for integer operands
970 //     TYPE_ID - operands type and size (fixed4, float4)
971 //     OP_ID   - operation identifier (add, sub, mul, ...)
972 //     TYPE    - operand type
973 //     BITS    - size in bits, used to distinguish low level calls
974 //     OP      - operator (used in critical section)
975 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
976 //     MASK    - used for alignment check
977 
978 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,MASK,GOMP_FLAG
979 // ------------------------------------------------------------------------
980 // Routines for ATOMIC integer operands, other operators
981 // ------------------------------------------------------------------------
982 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
983 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
984                KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
985 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
986                0) // __kmpc_atomic_fixed1_andb
987 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
988                KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
989 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
990                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
991 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
992                KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
993 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
994                0) // __kmpc_atomic_fixed1_orb
995 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
996                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
997 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
998                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
999 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
1000                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
1001 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
1002                KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
1003 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
1004                0) // __kmpc_atomic_fixed1_xor
1005 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
1006                KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
1007 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
1008                0) // __kmpc_atomic_fixed2_andb
1009 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
1010                KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
1011 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
1012                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
1013 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
1014                KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
1015 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
1016                0) // __kmpc_atomic_fixed2_orb
1017 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
1018                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
1019 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
1020                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
1021 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
1022                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
1023 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
1024                KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
1025 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
1026                0) // __kmpc_atomic_fixed2_xor
1027 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
1028                0) // __kmpc_atomic_fixed4_andb
1029 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
1030                KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
1031 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
1032                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
1033 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
1034                KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
1035 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
1036                0) // __kmpc_atomic_fixed4_orb
1037 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
1038                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
1039 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
1040                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
1041 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
1042                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
1043 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
1044                0) // __kmpc_atomic_fixed4_xor
1045 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
1046                KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
1047 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
1048                KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
1049 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1050                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1051 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1052                KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1053 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1054                KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1055 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1056                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1057 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1058                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1059 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1060                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1061 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1062                KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1063 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1064                KMP_ARCH_X86) // __kmpc_atomic_float4_div
1065 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1066                KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1067 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1068                KMP_ARCH_X86) // __kmpc_atomic_float8_div
1069 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1070                KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1071 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
1072 
1073 /* ------------------------------------------------------------------------ */
1074 /* Routines for C/C++ Reduction operators && and ||                         */
1075 
1076 // ------------------------------------------------------------------------
1077 // Need separate macros for &&, || because there is no combined assignment
1078 //   TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1079 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)             \
1080   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1081   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1082   OP_CRITICAL(= *lhs OP, LCK_ID)                                               \
1083   }
1084 
1085 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1086 
1087 // ------------------------------------------------------------------------
1088 // X86 or X86_64: no alignment problems ===================================
1089 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1090   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1091   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1092   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1093   }
1094 
1095 #else
1096 // ------------------------------------------------------------------------
1097 // Code for other architectures that don't handle unaligned accesses.
1098 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1099   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1100   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1101   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1102     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1103   } else {                                                                     \
1104     KMP_CHECK_GTID;                                                            \
1105     OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */              \
1106   }                                                                            \
1107   }
1108 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1109 
1110 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1111               KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1112 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1113               KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1114 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1115               KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1116 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1117               KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1118 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1119               0) // __kmpc_atomic_fixed4_andl
1120 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1121               0) // __kmpc_atomic_fixed4_orl
1122 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1123               KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1124 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1125               KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1126 
1127 /* ------------------------------------------------------------------------- */
1128 /* Routines for Fortran operators that matched no one in C:                  */
1129 /* MAX, MIN, .EQV., .NEQV.                                                   */
1130 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}           */
1131 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}  */
1132 
1133 // -------------------------------------------------------------------------
1134 // MIN and MAX need separate macros
1135 // OP - operator to check if we need any actions?
1136 #define MIN_MAX_CRITSECT(OP, LCK_ID)                                           \
1137   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1138                                                                                \
1139   if (*lhs OP rhs) { /* still need actions? */                                 \
1140     *lhs = rhs;                                                                \
1141   }                                                                            \
1142   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1143 
1144 // -------------------------------------------------------------------------
1145 #ifdef KMP_GOMP_COMPAT
1146 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)                                        \
1147   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1148     KMP_CHECK_GTID;                                                            \
1149     MIN_MAX_CRITSECT(OP, 0);                                                   \
1150     return;                                                                    \
1151   }
1152 #else
1153 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1154 #endif /* KMP_GOMP_COMPAT */
1155 
1156 // -------------------------------------------------------------------------
1157 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                        \
1158   {                                                                            \
1159     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1160     TYPE old_value;                                                            \
1161     temp_val = *lhs;                                                           \
1162     old_value = temp_val;                                                      \
1163     while (old_value OP rhs && /* still need actions? */                       \
1164            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1165                (kmp_int##BITS *)lhs,                                           \
1166                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
1167                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
1168       temp_val = *lhs;                                                         \
1169       old_value = temp_val;                                                    \
1170     }                                                                          \
1171   }
1172 
1173 // -------------------------------------------------------------------------
1174 // 1-byte, 2-byte operands - use critical section
1175 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)          \
1176   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1177   if (*lhs OP rhs) { /* need actions? */                                       \
1178     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1179     MIN_MAX_CRITSECT(OP, LCK_ID)                                               \
1180   }                                                                            \
1181   }
1182 
1183 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1184 
1185 // -------------------------------------------------------------------------
1186 // X86 or X86_64: no alignment problems ====================================
1187 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1188                          GOMP_FLAG)                                            \
1189   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1190   if (*lhs OP rhs) {                                                           \
1191     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1192     MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                            \
1193   }                                                                            \
1194   }
1195 
1196 #else
1197 // -------------------------------------------------------------------------
1198 // Code for other architectures that don't handle unaligned accesses.
1199 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1200                          GOMP_FLAG)                                            \
1201   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1202   if (*lhs OP rhs) {                                                           \
1203     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1204     if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                    \
1205       MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */                    \
1206     } else {                                                                   \
1207       KMP_CHECK_GTID;                                                          \
1208       MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */                     \
1209     }                                                                          \
1210   }                                                                            \
1211   }
1212 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1213 
1214 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1215                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1216 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1217                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1218 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1219                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1220 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1221                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1222 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1223                  0) // __kmpc_atomic_fixed4_max
1224 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1225                  0) // __kmpc_atomic_fixed4_min
1226 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1227                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1228 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1229                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1230 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1231                  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1232 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1233                  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1234 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1235                  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1236 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1237                  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1238 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1239 MIN_MAX_CRITICAL(float10, max, long double, <, 10r,
1240                  1) // __kmpc_atomic_float10_max
1241 MIN_MAX_CRITICAL(float10, min, long double, >, 10r,
1242                  1) // __kmpc_atomic_float10_min
1243 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1244 #if KMP_HAVE_QUAD
1245 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1246                  1) // __kmpc_atomic_float16_max
1247 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1248                  1) // __kmpc_atomic_float16_min
1249 #if (KMP_ARCH_X86)
1250 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1251                  1) // __kmpc_atomic_float16_max_a16
1252 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1253                  1) // __kmpc_atomic_float16_min_a16
1254 #endif // (KMP_ARCH_X86)
1255 #endif // KMP_HAVE_QUAD
1256 // ------------------------------------------------------------------------
1257 // Need separate macros for .EQV. because of the need of complement (~)
1258 // OP ignored for critical sections, ^=~ used instead
1259 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1260   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1261   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */               \
1262   OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */        \
1263   }
1264 
1265 // ------------------------------------------------------------------------
1266 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1267 // ------------------------------------------------------------------------
1268 // X86 or X86_64: no alignment problems ===================================
1269 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1270                         GOMP_FLAG)                                             \
1271   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1272   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */               \
1273   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1274   }
1275 // ------------------------------------------------------------------------
1276 #else
1277 // ------------------------------------------------------------------------
1278 // Code for other architectures that don't handle unaligned accesses.
1279 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1280                         GOMP_FLAG)                                             \
1281   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1282   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG)                                     \
1283   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1284     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1285   } else {                                                                     \
1286     KMP_CHECK_GTID;                                                            \
1287     OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */    \
1288   }                                                                            \
1289   }
1290 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1291 
1292 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1293                KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1294 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1295                KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1296 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1297                KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1298 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1299                KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1300 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1301                 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1302 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1303                 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1304 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1305                 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1306 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1307                 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1308 
1309 // ------------------------------------------------------------------------
1310 // Routines for Extended types: long double, _Quad, complex flavours (use
1311 // critical section)
1312 //     TYPE_ID, OP_ID, TYPE - detailed above
1313 //     OP      - operator
1314 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1315 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1316   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1317   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */           \
1318   OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */                   \
1319   }
1320 
1321 /* ------------------------------------------------------------------------- */
1322 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1323 // routines for long double type
1324 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1325                 1) // __kmpc_atomic_float10_add
1326 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1327                 1) // __kmpc_atomic_float10_sub
1328 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1329                 1) // __kmpc_atomic_float10_mul
1330 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1331                 1) // __kmpc_atomic_float10_div
1332 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1333 #if KMP_HAVE_QUAD
1334 // routines for _Quad type
1335 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1336                 1) // __kmpc_atomic_float16_add
1337 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1338                 1) // __kmpc_atomic_float16_sub
1339 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1340                 1) // __kmpc_atomic_float16_mul
1341 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1342                 1) // __kmpc_atomic_float16_div
1343 #if (KMP_ARCH_X86)
1344 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1345                 1) // __kmpc_atomic_float16_add_a16
1346 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1347                 1) // __kmpc_atomic_float16_sub_a16
1348 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1349                 1) // __kmpc_atomic_float16_mul_a16
1350 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1351                 1) // __kmpc_atomic_float16_div_a16
1352 #endif // (KMP_ARCH_X86)
1353 #endif // KMP_HAVE_QUAD
1354 // routines for complex types
1355 
1356 #if USE_CMPXCHG_FIX
1357 // workaround for C78287 (complex(kind=4) data type)
1358 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1359                           1) // __kmpc_atomic_cmplx4_add
1360 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1361                           1) // __kmpc_atomic_cmplx4_sub
1362 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1363                           1) // __kmpc_atomic_cmplx4_mul
1364 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1365                           1) // __kmpc_atomic_cmplx4_div
1366 // end of the workaround for C78287
1367 #else
1368 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1369 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1370 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1371 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1372 #endif // USE_CMPXCHG_FIX
1373 
1374 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1375 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1376 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1377 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1378 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1379 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1380                 1) // __kmpc_atomic_cmplx10_add
1381 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1382                 1) // __kmpc_atomic_cmplx10_sub
1383 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1384                 1) // __kmpc_atomic_cmplx10_mul
1385 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1386                 1) // __kmpc_atomic_cmplx10_div
1387 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1388 #if KMP_HAVE_QUAD
1389 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1390                 1) // __kmpc_atomic_cmplx16_add
1391 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1392                 1) // __kmpc_atomic_cmplx16_sub
1393 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1394                 1) // __kmpc_atomic_cmplx16_mul
1395 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1396                 1) // __kmpc_atomic_cmplx16_div
1397 #if (KMP_ARCH_X86)
1398 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1399                 1) // __kmpc_atomic_cmplx16_add_a16
1400 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1401                 1) // __kmpc_atomic_cmplx16_sub_a16
1402 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1403                 1) // __kmpc_atomic_cmplx16_mul_a16
1404 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1405                 1) // __kmpc_atomic_cmplx16_div_a16
1406 #endif // (KMP_ARCH_X86)
1407 #endif // KMP_HAVE_QUAD
1408 
1409 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1410 // Supported only on IA-32 architecture and Intel(R) 64
1411 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1412 
1413 // ------------------------------------------------------------------------
1414 // Operation on *lhs, rhs bound by critical section
1415 //     OP     - operator (it's supposed to contain an assignment)
1416 //     LCK_ID - lock identifier
1417 // Note: don't check gtid as it should always be valid
1418 // 1, 2-byte - expect valid parameter, other - check before this macro
1419 #define OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                      \
1420   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1421                                                                                \
1422   (*lhs) = (TYPE)((rhs)OP(*lhs));                                              \
1423                                                                                \
1424   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1425 
1426 #ifdef KMP_GOMP_COMPAT
1427 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)                                   \
1428   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1429     KMP_CHECK_GTID;                                                            \
1430     OP_CRITICAL_REV(TYPE, OP, 0);                                              \
1431     return;                                                                    \
1432   }
1433 
1434 #else
1435 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
1436 #endif /* KMP_GOMP_COMPAT */
1437 
1438 // Beginning of a definition (provides name, parameters, gebug trace)
1439 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1440 //     fixed)
1441 //     OP_ID   - operation identifier (add, sub, mul, ...)
1442 //     TYPE    - operands' type
1443 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
1444   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid,  \
1445                                                    TYPE *lhs, TYPE rhs) {      \
1446     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1447     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1448 
1449 // ------------------------------------------------------------------------
1450 // Operation on *lhs, rhs using "compare_and_store" routine
1451 //     TYPE    - operands' type
1452 //     BITS    - size in bits, used to distinguish low level calls
1453 //     OP      - operator
1454 // Note: temp_val introduced in order to force the compiler to read
1455 //       *lhs only once (w/o it the compiler reads *lhs twice)
1456 #define OP_CMPXCHG_REV(TYPE, BITS, OP)                                         \
1457   {                                                                            \
1458     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1459     TYPE old_value, new_value;                                                 \
1460     temp_val = *lhs;                                                           \
1461     old_value = temp_val;                                                      \
1462     new_value = (TYPE)(rhs OP old_value);                                      \
1463     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1464         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
1465         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
1466       KMP_DO_PAUSE;                                                            \
1467                                                                                \
1468       temp_val = *lhs;                                                         \
1469       old_value = temp_val;                                                    \
1470       new_value = (TYPE)(rhs OP old_value);                                    \
1471     }                                                                          \
1472   }
1473 
1474 // -------------------------------------------------------------------------
1475 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG)  \
1476   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1477   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1478   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1479   }
1480 
1481 // ------------------------------------------------------------------------
1482 // Entries definition for integer operands
1483 //     TYPE_ID - operands type and size (fixed4, float4)
1484 //     OP_ID   - operation identifier (add, sub, mul, ...)
1485 //     TYPE    - operand type
1486 //     BITS    - size in bits, used to distinguish low level calls
1487 //     OP      - operator (used in critical section)
1488 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1489 
1490 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,GOMP_FLAG
1491 // ------------------------------------------------------------------------
1492 // Routines for ATOMIC integer operands, other operators
1493 // ------------------------------------------------------------------------
1494 //                  TYPE_ID,OP_ID, TYPE,    BITS, OP, LCK_ID, GOMP_FLAG
1495 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1496                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1497 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1498                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1499 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1500                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1501 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1502                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1503 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1504                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1505 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1506                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1507 
1508 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1509                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1510 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1511                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1512 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1513                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1514 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1515                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1516 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1517                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1518 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1519                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1520 
1521 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1522                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1523 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1524                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1525 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1526                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1527 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1528                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1529 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1530                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1531 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1532                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1533 
1534 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1535                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1536 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1537                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1538 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1539                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1540 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1541                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1542 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1543                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1544 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1545                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1546 
1547 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1548                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1549 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1550                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1551 
1552 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1553                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1554 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1555                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1556 //                  TYPE_ID,OP_ID, TYPE,     BITS,OP,LCK_ID, GOMP_FLAG
1557 
1558 // ------------------------------------------------------------------------
1559 // Routines for Extended types: long double, _Quad, complex flavours (use
1560 // critical section)
1561 //     TYPE_ID, OP_ID, TYPE - detailed above
1562 //     OP      - operator
1563 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1564 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
1565   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1566   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1567   OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                            \
1568   }
1569 
1570 /* ------------------------------------------------------------------------- */
1571 // routines for long double type
1572 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1573                     1) // __kmpc_atomic_float10_sub_rev
1574 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1575                     1) // __kmpc_atomic_float10_div_rev
1576 #if KMP_HAVE_QUAD
1577 // routines for _Quad type
1578 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1579                     1) // __kmpc_atomic_float16_sub_rev
1580 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1581                     1) // __kmpc_atomic_float16_div_rev
1582 #if (KMP_ARCH_X86)
1583 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1584                     1) // __kmpc_atomic_float16_sub_a16_rev
1585 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1586                     1) // __kmpc_atomic_float16_div_a16_rev
1587 #endif // KMP_ARCH_X86
1588 #endif // KMP_HAVE_QUAD
1589 
1590 // routines for complex types
1591 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1592                     1) // __kmpc_atomic_cmplx4_sub_rev
1593 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1594                     1) // __kmpc_atomic_cmplx4_div_rev
1595 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1596                     1) // __kmpc_atomic_cmplx8_sub_rev
1597 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1598                     1) // __kmpc_atomic_cmplx8_div_rev
1599 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1600                     1) // __kmpc_atomic_cmplx10_sub_rev
1601 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1602                     1) // __kmpc_atomic_cmplx10_div_rev
1603 #if KMP_HAVE_QUAD
1604 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1605                     1) // __kmpc_atomic_cmplx16_sub_rev
1606 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1607                     1) // __kmpc_atomic_cmplx16_div_rev
1608 #if (KMP_ARCH_X86)
1609 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1610                     1) // __kmpc_atomic_cmplx16_sub_a16_rev
1611 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1612                     1) // __kmpc_atomic_cmplx16_div_a16_rev
1613 #endif // KMP_ARCH_X86
1614 #endif // KMP_HAVE_QUAD
1615 
1616 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1617 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1618 
1619 /* ------------------------------------------------------------------------ */
1620 /* Routines for mixed types of LHS and RHS, when RHS is "larger"            */
1621 /* Note: in order to reduce the total number of types combinations          */
1622 /*       it is supposed that compiler converts RHS to longest floating type,*/
1623 /*       that is _Quad, before call to any of these routines                */
1624 /* Conversion to _Quad will be done by the compiler during calculation,     */
1625 /*    conversion back to TYPE - before the assignment, like:                */
1626 /*    *lhs = (TYPE)( (_Quad)(*lhs) OP rhs )                                 */
1627 /* Performance penalty expected because of SW emulation use                 */
1628 /* ------------------------------------------------------------------------ */
1629 
1630 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                \
1631   void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
1632       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) {                       \
1633     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1634     KA_TRACE(100,                                                              \
1635              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
1636               gtid));
1637 
1638 // -------------------------------------------------------------------------
1639 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID,  \
1640                            GOMP_FLAG)                                          \
1641   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1642   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */           \
1643   OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */                   \
1644   }
1645 
1646 // -------------------------------------------------------------------------
1647 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1648 // -------------------------------------------------------------------------
1649 // X86 or X86_64: no alignment problems ====================================
1650 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1651                            LCK_ID, MASK, GOMP_FLAG)                            \
1652   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1653   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1654   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1655   }
1656 // -------------------------------------------------------------------------
1657 #else
1658 // ------------------------------------------------------------------------
1659 // Code for other architectures that don't handle unaligned accesses.
1660 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1661                            LCK_ID, MASK, GOMP_FLAG)                            \
1662   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1663   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1664   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1665     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1666   } else {                                                                     \
1667     KMP_CHECK_GTID;                                                            \
1668     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
1669                        LCK_ID) /* unaligned address - use critical */          \
1670   }                                                                            \
1671   }
1672 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1673 
1674 // -------------------------------------------------------------------------
1675 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1676 // -------------------------------------------------------------------------
1677 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
1678                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
1679   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1680   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1681   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1682   }
1683 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,      \
1684                                LCK_ID, GOMP_FLAG)                              \
1685   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1686   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1687   OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                            \
1688   }
1689 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1690 
1691 // RHS=float8
1692 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1693                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1694 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1695                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1696 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1697                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1698 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1699                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1700 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1701                    0) // __kmpc_atomic_fixed4_mul_float8
1702 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1703                    0) // __kmpc_atomic_fixed4_div_float8
1704 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1705                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1706 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1707                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1708 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1709                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1710 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1711                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1712 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1713                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1714 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1715                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1716 
1717 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1718 // use them)
1719 #if KMP_HAVE_QUAD
1720 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1721                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1722 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1723                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1724 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1725                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1726 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1727                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1728 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1729                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1730 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1731                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1732 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1733                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1734 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1735                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1736 
1737 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1738                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1739 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1740                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1741 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1742                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1743 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1744                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1745 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1746                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1747 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1748                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1749 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1750                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1751 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1752                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1753 
1754 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1755                    0) // __kmpc_atomic_fixed4_add_fp
1756 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1757                    0) // __kmpc_atomic_fixed4u_add_fp
1758 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1759                    0) // __kmpc_atomic_fixed4_sub_fp
1760 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1761                    0) // __kmpc_atomic_fixed4u_sub_fp
1762 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1763                    0) // __kmpc_atomic_fixed4_mul_fp
1764 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1765                    0) // __kmpc_atomic_fixed4u_mul_fp
1766 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1767                    0) // __kmpc_atomic_fixed4_div_fp
1768 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1769                    0) // __kmpc_atomic_fixed4u_div_fp
1770 
1771 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1772                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1773 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1774                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1775 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1776                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1777 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1778                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1779 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1780                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1781 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1782                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1783 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1784                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1785 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1786                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1787 
1788 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1789                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1790 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1791                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1792 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1793                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1794 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1795                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1796 
1797 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1798                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1799 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1800                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1801 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1802                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1803 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1804                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1805 
1806 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1807 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1808                    1) // __kmpc_atomic_float10_add_fp
1809 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1810                    1) // __kmpc_atomic_float10_sub_fp
1811 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1812                    1) // __kmpc_atomic_float10_mul_fp
1813 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1814                    1) // __kmpc_atomic_float10_div_fp
1815 
1816 // Reverse operations
1817 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1818                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1819 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1820                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1821 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1822                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1823 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1824                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1825 
1826 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1827                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1828 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1829                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1830 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1831                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1832 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1833                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1834 
1835 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1836                        0) // __kmpc_atomic_fixed4_sub_rev_fp
1837 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1838                        0) // __kmpc_atomic_fixed4u_sub_rev_fp
1839 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1840                        0) // __kmpc_atomic_fixed4_div_rev_fp
1841 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1842                        0) // __kmpc_atomic_fixed4u_div_rev_fp
1843 
1844 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1845                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1846 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1847                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1848 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1849                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1850 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1851                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1852 
1853 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1854                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1855 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1856                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1857 
1858 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1859                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1860 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1861                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1862 
1863 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1864                        1) // __kmpc_atomic_float10_sub_rev_fp
1865 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1866                        1) // __kmpc_atomic_float10_div_rev_fp
1867 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1868 
1869 #endif // KMP_HAVE_QUAD
1870 
1871 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1872 // ------------------------------------------------------------------------
1873 // X86 or X86_64: no alignment problems ====================================
1874 #if USE_CMPXCHG_FIX
1875 // workaround for C78287 (complex(kind=4) data type)
1876 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1877                              LCK_ID, MASK, GOMP_FLAG)                          \
1878   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1879   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1880   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
1881   }
1882 // end of the second part of the workaround for C78287
1883 #else
1884 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1885                              LCK_ID, MASK, GOMP_FLAG)                          \
1886   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1887   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1888   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1889   }
1890 #endif // USE_CMPXCHG_FIX
1891 #else
1892 // ------------------------------------------------------------------------
1893 // Code for other architectures that don't handle unaligned accesses.
1894 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1895                              LCK_ID, MASK, GOMP_FLAG)                          \
1896   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1897   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1898   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1899     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1900   } else {                                                                     \
1901     KMP_CHECK_GTID;                                                            \
1902     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
1903                        LCK_ID) /* unaligned address - use critical */          \
1904   }                                                                            \
1905   }
1906 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1907 
1908 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1909                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1910 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1911                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1912 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1913                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1914 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1915                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1916 
1917 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1918 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1919 
1920 // ------------------------------------------------------------------------
1921 // Atomic READ routines
1922 
1923 // ------------------------------------------------------------------------
1924 // Beginning of a definition (provides name, parameters, gebug trace)
1925 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1926 //     fixed)
1927 //     OP_ID   - operation identifier (add, sub, mul, ...)
1928 //     TYPE    - operands' type
1929 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE)                      \
1930   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
1931                                              TYPE *loc) {                      \
1932     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1933     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1934 
1935 // ------------------------------------------------------------------------
1936 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1937 //     TYPE    - operands' type
1938 //     BITS    - size in bits, used to distinguish low level calls
1939 //     OP      - operator
1940 // Note: temp_val introduced in order to force the compiler to read
1941 //       *lhs only once (w/o it the compiler reads *lhs twice)
1942 // TODO: check if it is still necessary
1943 // Return old value regardless of the result of "compare & swap# operation
1944 #define OP_CMPXCHG_READ(TYPE, BITS, OP)                                        \
1945   {                                                                            \
1946     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1947     union f_i_union {                                                          \
1948       TYPE f_val;                                                              \
1949       kmp_int##BITS i_val;                                                     \
1950     };                                                                         \
1951     union f_i_union old_value;                                                 \
1952     temp_val = *loc;                                                           \
1953     old_value.f_val = temp_val;                                                \
1954     old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS(                         \
1955         (kmp_int##BITS *)loc,                                                  \
1956         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val,                     \
1957         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val);                    \
1958     new_value = old_value.f_val;                                               \
1959     return new_value;                                                          \
1960   }
1961 
1962 // -------------------------------------------------------------------------
1963 // Operation on *lhs, rhs bound by critical section
1964 //     OP     - operator (it's supposed to contain an assignment)
1965 //     LCK_ID - lock identifier
1966 // Note: don't check gtid as it should always be valid
1967 // 1, 2-byte - expect valid parameter, other - check before this macro
1968 #define OP_CRITICAL_READ(OP, LCK_ID)                                           \
1969   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1970                                                                                \
1971   new_value = (*loc);                                                          \
1972                                                                                \
1973   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1974 
1975 // -------------------------------------------------------------------------
1976 #ifdef KMP_GOMP_COMPAT
1977 #define OP_GOMP_CRITICAL_READ(OP, FLAG)                                        \
1978   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1979     KMP_CHECK_GTID;                                                            \
1980     OP_CRITICAL_READ(OP, 0);                                                   \
1981     return new_value;                                                          \
1982   }
1983 #else
1984 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1985 #endif /* KMP_GOMP_COMPAT */
1986 
1987 // -------------------------------------------------------------------------
1988 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
1989   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1990   TYPE new_value;                                                              \
1991   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1992   new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0);                              \
1993   return new_value;                                                            \
1994   }
1995 // -------------------------------------------------------------------------
1996 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
1997   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1998   TYPE new_value;                                                              \
1999   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
2000   OP_CMPXCHG_READ(TYPE, BITS, OP)                                              \
2001   }
2002 // ------------------------------------------------------------------------
2003 // Routines for Extended types: long double, _Quad, complex flavours (use
2004 // critical section)
2005 //     TYPE_ID, OP_ID, TYPE - detailed above
2006 //     OP      - operator
2007 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2008 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
2009   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
2010   TYPE new_value;                                                              \
2011   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */               \
2012   OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */                           \
2013   return new_value;                                                            \
2014   }
2015 
2016 // ------------------------------------------------------------------------
2017 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
2018 // value doesn't work.
2019 // Let's return the read value through the additional parameter.
2020 #if (KMP_OS_WINDOWS)
2021 
2022 #define OP_CRITICAL_READ_WRK(OP, LCK_ID)                                       \
2023   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2024                                                                                \
2025   (*out) = (*loc);                                                             \
2026                                                                                \
2027   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2028 // ------------------------------------------------------------------------
2029 #ifdef KMP_GOMP_COMPAT
2030 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)                                    \
2031   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2032     KMP_CHECK_GTID;                                                            \
2033     OP_CRITICAL_READ_WRK(OP, 0);                                               \
2034   }
2035 #else
2036 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
2037 #endif /* KMP_GOMP_COMPAT */
2038 // ------------------------------------------------------------------------
2039 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                            \
2040   void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
2041                                          TYPE *loc) {                          \
2042     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2043     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2044 
2045 // ------------------------------------------------------------------------
2046 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)  \
2047   ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                                  \
2048   OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */           \
2049   OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */                       \
2050   }
2051 
2052 #endif // KMP_OS_WINDOWS
2053 
2054 // ------------------------------------------------------------------------
2055 //                  TYPE_ID,OP_ID, TYPE,      OP, GOMP_FLAG
2056 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
2057 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2058                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2059 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2060                     KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2061 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2062                     KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2063 
2064 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2065 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2066                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2067 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2068                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2069 
2070 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2071                      1) // __kmpc_atomic_float10_rd
2072 #if KMP_HAVE_QUAD
2073 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2074                      1) // __kmpc_atomic_float16_rd
2075 #endif // KMP_HAVE_QUAD
2076 
2077 // Fix for CQ220361 on Windows* OS
2078 #if (KMP_OS_WINDOWS)
2079 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2080                          1) // __kmpc_atomic_cmplx4_rd
2081 #else
2082 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2083                      1) // __kmpc_atomic_cmplx4_rd
2084 #endif // (KMP_OS_WINDOWS)
2085 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2086                      1) // __kmpc_atomic_cmplx8_rd
2087 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2088                      1) // __kmpc_atomic_cmplx10_rd
2089 #if KMP_HAVE_QUAD
2090 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2091                      1) // __kmpc_atomic_cmplx16_rd
2092 #if (KMP_ARCH_X86)
2093 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2094                      1) // __kmpc_atomic_float16_a16_rd
2095 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2096                      1) // __kmpc_atomic_cmplx16_a16_rd
2097 #endif // (KMP_ARCH_X86)
2098 #endif // KMP_HAVE_QUAD
2099 
2100 // ------------------------------------------------------------------------
2101 // Atomic WRITE routines
2102 
2103 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)              \
2104   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2105   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2106   KMP_XCHG_FIXED##BITS(lhs, rhs);                                              \
2107   }
2108 // ------------------------------------------------------------------------
2109 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2110   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2111   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2112   KMP_XCHG_REAL##BITS(lhs, rhs);                                               \
2113   }
2114 
2115 // ------------------------------------------------------------------------
2116 // Operation on *lhs, rhs using "compare_and_store" routine
2117 //     TYPE    - operands' type
2118 //     BITS    - size in bits, used to distinguish low level calls
2119 //     OP      - operator
2120 // Note: temp_val introduced in order to force the compiler to read
2121 //       *lhs only once (w/o it the compiler reads *lhs twice)
2122 #define OP_CMPXCHG_WR(TYPE, BITS, OP)                                          \
2123   {                                                                            \
2124     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2125     TYPE old_value, new_value;                                                 \
2126     temp_val = *lhs;                                                           \
2127     old_value = temp_val;                                                      \
2128     new_value = rhs;                                                           \
2129     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2130         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2131         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2132       temp_val = *lhs;                                                         \
2133       old_value = temp_val;                                                    \
2134       new_value = rhs;                                                         \
2135     }                                                                          \
2136   }
2137 
2138 // -------------------------------------------------------------------------
2139 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2140   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2141   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2142   OP_CMPXCHG_WR(TYPE, BITS, OP)                                                \
2143   }
2144 
2145 // ------------------------------------------------------------------------
2146 // Routines for Extended types: long double, _Quad, complex flavours (use
2147 // critical section)
2148 //     TYPE_ID, OP_ID, TYPE - detailed above
2149 //     OP      - operator
2150 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2151 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)        \
2152   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2153   OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */                        \
2154   OP_CRITICAL(OP, LCK_ID) /* send assignment */                                \
2155   }
2156 // -------------------------------------------------------------------------
2157 
2158 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2159                KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2160 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2161                KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2162 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2163                KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2164 #if (KMP_ARCH_X86)
2165 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2166                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2167 #else
2168 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2169                KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2170 #endif // (KMP_ARCH_X86)
2171 
2172 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2173                      KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2174 #if (KMP_ARCH_X86)
2175 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2176                   KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2177 #else
2178 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2179                      KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2180 #endif // (KMP_ARCH_X86)
2181 
2182 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2183                    1) // __kmpc_atomic_float10_wr
2184 #if KMP_HAVE_QUAD
2185 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2186                    1) // __kmpc_atomic_float16_wr
2187 #endif // KMP_HAVE_QUAD
2188 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2189 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2190                    1) // __kmpc_atomic_cmplx8_wr
2191 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2192                    1) // __kmpc_atomic_cmplx10_wr
2193 #if KMP_HAVE_QUAD
2194 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2195                    1) // __kmpc_atomic_cmplx16_wr
2196 #if (KMP_ARCH_X86)
2197 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2198                    1) // __kmpc_atomic_float16_a16_wr
2199 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2200                    1) // __kmpc_atomic_cmplx16_a16_wr
2201 #endif // (KMP_ARCH_X86)
2202 #endif // KMP_HAVE_QUAD
2203 
2204 // ------------------------------------------------------------------------
2205 // Atomic CAPTURE routines
2206 
2207 // Beginning of a definition (provides name, parameters, gebug trace)
2208 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2209 //     fixed)
2210 //     OP_ID   - operation identifier (add, sub, mul, ...)
2211 //     TYPE    - operands' type
2212 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
2213   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
2214                                              TYPE *lhs, TYPE rhs, int flag) {  \
2215     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2216     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2217 
2218 // -------------------------------------------------------------------------
2219 // Operation on *lhs, rhs bound by critical section
2220 //     OP     - operator (it's supposed to contain an assignment)
2221 //     LCK_ID - lock identifier
2222 // Note: don't check gtid as it should always be valid
2223 // 1, 2-byte - expect valid parameter, other - check before this macro
2224 #define OP_CRITICAL_CPT(OP, LCK_ID)                                            \
2225   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2226                                                                                \
2227   if (flag) {                                                                  \
2228     (*lhs) OP rhs;                                                             \
2229     new_value = (*lhs);                                                        \
2230   } else {                                                                     \
2231     new_value = (*lhs);                                                        \
2232     (*lhs) OP rhs;                                                             \
2233   }                                                                            \
2234                                                                                \
2235   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2236   return new_value;
2237 
2238 #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID)                               \
2239   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2240                                                                                \
2241   if (flag) {                                                                  \
2242     (*lhs) = (TYPE)((*lhs)OP rhs);                                             \
2243     new_value = (*lhs);                                                        \
2244   } else {                                                                     \
2245     new_value = (*lhs);                                                        \
2246     (*lhs) = (TYPE)((*lhs)OP rhs);                                             \
2247   }                                                                            \
2248                                                                                \
2249   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2250   return new_value;
2251 
2252 // ------------------------------------------------------------------------
2253 #ifdef KMP_GOMP_COMPAT
2254 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)                                   \
2255   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2256     KMP_CHECK_GTID;                                                            \
2257     OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0);                                       \
2258   }
2259 #else
2260 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
2261 #endif /* KMP_GOMP_COMPAT */
2262 
2263 // ------------------------------------------------------------------------
2264 // Operation on *lhs, rhs using "compare_and_store" routine
2265 //     TYPE    - operands' type
2266 //     BITS    - size in bits, used to distinguish low level calls
2267 //     OP      - operator
2268 // Note: temp_val introduced in order to force the compiler to read
2269 //       *lhs only once (w/o it the compiler reads *lhs twice)
2270 #define OP_CMPXCHG_CPT(TYPE, BITS, OP)                                         \
2271   {                                                                            \
2272     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2273     TYPE old_value, new_value;                                                 \
2274     temp_val = *lhs;                                                           \
2275     old_value = temp_val;                                                      \
2276     new_value = (TYPE)(old_value OP rhs);                                      \
2277     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2278         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2279         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2280       temp_val = *lhs;                                                         \
2281       old_value = temp_val;                                                    \
2282       new_value = (TYPE)(old_value OP rhs);                                    \
2283     }                                                                          \
2284     if (flag) {                                                                \
2285       return new_value;                                                        \
2286     } else                                                                     \
2287       return old_value;                                                        \
2288   }
2289 
2290 // -------------------------------------------------------------------------
2291 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)          \
2292   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2293   TYPE new_value;                                                              \
2294   (void)new_value;                                                             \
2295   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2296   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2297   }
2298 
2299 // -------------------------------------------------------------------------
2300 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2301   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2302   TYPE old_value, new_value;                                                   \
2303   (void)new_value;                                                             \
2304   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2305   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
2306   old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                            \
2307   if (flag) {                                                                  \
2308     return old_value OP rhs;                                                   \
2309   } else                                                                       \
2310     return old_value;                                                          \
2311   }
2312 // -------------------------------------------------------------------------
2313 
2314 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2315                      0) // __kmpc_atomic_fixed4_add_cpt
2316 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2317                      0) // __kmpc_atomic_fixed4_sub_cpt
2318 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2319                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2320 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2321                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2322 
2323 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2324                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2325 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2326                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2327 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2328                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2329 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2330                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2331 
2332 // ------------------------------------------------------------------------
2333 // Entries definition for integer operands
2334 //     TYPE_ID - operands type and size (fixed4, float4)
2335 //     OP_ID   - operation identifier (add, sub, mul, ...)
2336 //     TYPE    - operand type
2337 //     BITS    - size in bits, used to distinguish low level calls
2338 //     OP      - operator (used in critical section)
2339 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,GOMP_FLAG
2340 // ------------------------------------------------------------------------
2341 // Routines for ATOMIC integer operands, other operators
2342 // ------------------------------------------------------------------------
2343 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2344 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2345                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2346 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2347                    0) // __kmpc_atomic_fixed1_andb_cpt
2348 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2349                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2350 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2351                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2352 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2353                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2354 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2355                    0) // __kmpc_atomic_fixed1_orb_cpt
2356 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2357                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2358 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2359                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2360 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2361                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2362 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2363                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2364 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2365                    0) // __kmpc_atomic_fixed1_xor_cpt
2366 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2367                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2368 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2369                    0) // __kmpc_atomic_fixed2_andb_cpt
2370 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2371                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2372 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2373                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2374 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2375                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2376 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2377                    0) // __kmpc_atomic_fixed2_orb_cpt
2378 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2379                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2380 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2381                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2382 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2383                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2384 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2385                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2386 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2387                    0) // __kmpc_atomic_fixed2_xor_cpt
2388 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2389                    0) // __kmpc_atomic_fixed4_andb_cpt
2390 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2391                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2392 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2393                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2394 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2395                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2396 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2397                    0) // __kmpc_atomic_fixed4_orb_cpt
2398 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2399                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2400 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2401                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2402 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2403                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2404 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2405                    0) // __kmpc_atomic_fixed4_xor_cpt
2406 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2407                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2408 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2409                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2410 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2411                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2412 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2413                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2414 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2415                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2416 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2417                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2418 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2419                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2420 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2421                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2422 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2423                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2424 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2425                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2426 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2427                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2428 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2429                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2430 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2431                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2432 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2433 
2434 // CAPTURE routines for mixed types RHS=float16
2435 #if KMP_HAVE_QUAD
2436 
2437 // Beginning of a definition (provides name, parameters, gebug trace)
2438 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2439 //     fixed)
2440 //     OP_ID   - operation identifier (add, sub, mul, ...)
2441 //     TYPE    - operands' type
2442 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)            \
2443   TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
2444       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) {             \
2445     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2446     KA_TRACE(100,                                                              \
2447              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
2448               gtid));
2449 
2450 // -------------------------------------------------------------------------
2451 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
2452                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
2453   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2454   TYPE new_value;                                                              \
2455   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2456   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2457   }
2458 
2459 // -------------------------------------------------------------------------
2460 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,     \
2461                                 LCK_ID, GOMP_FLAG)                             \
2462   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2463   TYPE new_value;                                                              \
2464   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */              \
2465   OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */               \
2466   }
2467 
2468 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2469                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2470 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2471                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2472 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2473                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2474 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2475                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2476 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2477                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2478 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2479                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2480 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2481                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2482 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2483                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2484 
2485 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2486                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2487 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2488                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2489 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2490                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2491 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2492                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2493 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2494                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2495 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2496                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2497 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2498                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2499 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2500                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2501 
2502 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2503                        0) // __kmpc_atomic_fixed4_add_cpt_fp
2504 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2505                        0) // __kmpc_atomic_fixed4u_add_cpt_fp
2506 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2507                        0) // __kmpc_atomic_fixed4_sub_cpt_fp
2508 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2509                        0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2510 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2511                        0) // __kmpc_atomic_fixed4_mul_cpt_fp
2512 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2513                        0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2514 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2515                        0) // __kmpc_atomic_fixed4_div_cpt_fp
2516 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2517                        0) // __kmpc_atomic_fixed4u_div_cpt_fp
2518 
2519 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2520                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2521 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2522                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2523 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2524                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2525 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2526                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2527 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2528                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2529 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2530                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2531 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2532                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2533 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2534                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2535 
2536 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2537                        KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2538 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2539                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2540 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2541                        KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2542 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2543                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2544 
2545 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2546                        KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2547 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2548                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2549 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2550                        KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2551 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2552                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2553 
2554 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2555                         1) // __kmpc_atomic_float10_add_cpt_fp
2556 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2557                         1) // __kmpc_atomic_float10_sub_cpt_fp
2558 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2559                         1) // __kmpc_atomic_float10_mul_cpt_fp
2560 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2561                         1) // __kmpc_atomic_float10_div_cpt_fp
2562 
2563 #endif // KMP_HAVE_QUAD
2564 
2565 // ------------------------------------------------------------------------
2566 // Routines for C/C++ Reduction operators && and ||
2567 
2568 // -------------------------------------------------------------------------
2569 // Operation on *lhs, rhs bound by critical section
2570 //     OP     - operator (it's supposed to contain an assignment)
2571 //     LCK_ID - lock identifier
2572 // Note: don't check gtid as it should always be valid
2573 // 1, 2-byte - expect valid parameter, other - check before this macro
2574 #define OP_CRITICAL_L_CPT(OP, LCK_ID)                                          \
2575   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2576                                                                                \
2577   if (flag) {                                                                  \
2578     new_value OP rhs;                                                          \
2579     (*lhs) = new_value;                                                        \
2580   } else {                                                                     \
2581     new_value = (*lhs);                                                        \
2582     (*lhs) OP rhs;                                                             \
2583   }                                                                            \
2584                                                                                \
2585   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2586 
2587 // ------------------------------------------------------------------------
2588 #ifdef KMP_GOMP_COMPAT
2589 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)                                       \
2590   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2591     KMP_CHECK_GTID;                                                            \
2592     OP_CRITICAL_L_CPT(OP, 0);                                                  \
2593     return new_value;                                                          \
2594   }
2595 #else
2596 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2597 #endif /* KMP_GOMP_COMPAT */
2598 
2599 // ------------------------------------------------------------------------
2600 // Need separate macros for &&, || because there is no combined assignment
2601 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2602   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2603   TYPE new_value;                                                              \
2604   (void)new_value;                                                             \
2605   OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG)                                 \
2606   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2607   }
2608 
2609 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2610                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2611 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2612                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2613 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2614                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2615 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2616                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2617 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2618                   0) // __kmpc_atomic_fixed4_andl_cpt
2619 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2620                   0) // __kmpc_atomic_fixed4_orl_cpt
2621 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2622                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2623 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2624                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2625 
2626 // -------------------------------------------------------------------------
2627 // Routines for Fortran operators that matched no one in C:
2628 // MAX, MIN, .EQV., .NEQV.
2629 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2630 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2631 
2632 // -------------------------------------------------------------------------
2633 // MIN and MAX need separate macros
2634 // OP - operator to check if we need any actions?
2635 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                       \
2636   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2637                                                                                \
2638   if (*lhs OP rhs) { /* still need actions? */                                 \
2639     old_value = *lhs;                                                          \
2640     *lhs = rhs;                                                                \
2641     if (flag)                                                                  \
2642       new_value = rhs;                                                         \
2643     else                                                                       \
2644       new_value = old_value;                                                   \
2645   } else {                                                                     \
2646     new_value = *lhs;                                                          \
2647   }                                                                            \
2648   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2649   return new_value;
2650 
2651 // -------------------------------------------------------------------------
2652 #ifdef KMP_GOMP_COMPAT
2653 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)                                    \
2654   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2655     KMP_CHECK_GTID;                                                            \
2656     MIN_MAX_CRITSECT_CPT(OP, 0);                                               \
2657   }
2658 #else
2659 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2660 #endif /* KMP_GOMP_COMPAT */
2661 
2662 // -------------------------------------------------------------------------
2663 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                    \
2664   {                                                                            \
2665     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2666     /*TYPE old_value; */                                                       \
2667     temp_val = *lhs;                                                           \
2668     old_value = temp_val;                                                      \
2669     while (old_value OP rhs && /* still need actions? */                       \
2670            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2671                (kmp_int##BITS *)lhs,                                           \
2672                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
2673                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
2674       temp_val = *lhs;                                                         \
2675       old_value = temp_val;                                                    \
2676     }                                                                          \
2677     if (flag)                                                                  \
2678       return rhs;                                                              \
2679     else                                                                       \
2680       return old_value;                                                        \
2681   }
2682 
2683 // -------------------------------------------------------------------------
2684 // 1-byte, 2-byte operands - use critical section
2685 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
2686   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2687   TYPE new_value, old_value;                                                   \
2688   if (*lhs OP rhs) { /* need actions? */                                       \
2689     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2690     MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                           \
2691   }                                                                            \
2692   return *lhs;                                                                 \
2693   }
2694 
2695 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2696   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2697   TYPE new_value, old_value;                                                   \
2698   (void)new_value;                                                             \
2699   if (*lhs OP rhs) {                                                           \
2700     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2701     MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                        \
2702   }                                                                            \
2703   return *lhs;                                                                 \
2704   }
2705 
2706 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2707                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2708 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2709                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2710 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2711                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2712 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2713                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2714 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2715                      0) // __kmpc_atomic_fixed4_max_cpt
2716 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2717                      0) // __kmpc_atomic_fixed4_min_cpt
2718 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2719                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2720 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2721                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2722 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2723                      KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2724 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2725                      KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2726 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2727                      KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2728 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2729                      KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2730 MIN_MAX_CRITICAL_CPT(float10, max_cpt, long double, <, 10r,
2731                      1) // __kmpc_atomic_float10_max_cpt
2732 MIN_MAX_CRITICAL_CPT(float10, min_cpt, long double, >, 10r,
2733                      1) // __kmpc_atomic_float10_min_cpt
2734 #if KMP_HAVE_QUAD
2735 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2736                      1) // __kmpc_atomic_float16_max_cpt
2737 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2738                      1) // __kmpc_atomic_float16_min_cpt
2739 #if (KMP_ARCH_X86)
2740 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2741                      1) // __kmpc_atomic_float16_max_a16_cpt
2742 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2743                      1) // __kmpc_atomic_float16_mix_a16_cpt
2744 #endif // (KMP_ARCH_X86)
2745 #endif // KMP_HAVE_QUAD
2746 
2747 // ------------------------------------------------------------------------
2748 #ifdef KMP_GOMP_COMPAT
2749 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)                                     \
2750   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2751     KMP_CHECK_GTID;                                                            \
2752     OP_CRITICAL_CPT(OP, 0);                                                    \
2753   }
2754 #else
2755 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2756 #endif /* KMP_GOMP_COMPAT */
2757 // ------------------------------------------------------------------------
2758 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
2759   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2760   TYPE new_value;                                                              \
2761   (void)new_value;                                                             \
2762   OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */       \
2763   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2764   }
2765 
2766 // ------------------------------------------------------------------------
2767 
2768 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2769                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2770 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2771                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2772 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2773                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2774 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2775                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2776 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2777                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2778 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2779                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2780 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2781                     KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2782 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2783                     KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2784 
2785 // ------------------------------------------------------------------------
2786 // Routines for Extended types: long double, _Quad, complex flavours (use
2787 // critical section)
2788 //     TYPE_ID, OP_ID, TYPE - detailed above
2789 //     OP      - operator
2790 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2791 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
2792   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2793   TYPE new_value;                                                              \
2794   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */              \
2795   OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */               \
2796   }
2797 
2798 // ------------------------------------------------------------------------
2799 // Workaround for cmplx4. Regular routines with return value don't work
2800 // on Win_32e. Let's return captured values through the additional parameter.
2801 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID)                                        \
2802   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2803                                                                                \
2804   if (flag) {                                                                  \
2805     (*lhs) OP rhs;                                                             \
2806     (*out) = (*lhs);                                                           \
2807   } else {                                                                     \
2808     (*out) = (*lhs);                                                           \
2809     (*lhs) OP rhs;                                                             \
2810   }                                                                            \
2811                                                                                \
2812   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2813   return;
2814 // ------------------------------------------------------------------------
2815 
2816 #ifdef KMP_GOMP_COMPAT
2817 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)                                     \
2818   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2819     KMP_CHECK_GTID;                                                            \
2820     OP_CRITICAL_CPT_WRK(OP## =, 0);                                            \
2821   }
2822 #else
2823 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2824 #endif /* KMP_GOMP_COMPAT */
2825 // ------------------------------------------------------------------------
2826 
2827 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                 \
2828   void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2829                                          TYPE rhs, TYPE *out, int flag) {      \
2830     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2831     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2832 // ------------------------------------------------------------------------
2833 
2834 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
2835   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
2836   OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG)                                      \
2837   OP_CRITICAL_CPT_WRK(OP## =, LCK_ID)                                          \
2838   }
2839 // The end of workaround for cmplx4
2840 
2841 /* ------------------------------------------------------------------------- */
2842 // routines for long double type
2843 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2844                     1) // __kmpc_atomic_float10_add_cpt
2845 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2846                     1) // __kmpc_atomic_float10_sub_cpt
2847 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2848                     1) // __kmpc_atomic_float10_mul_cpt
2849 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2850                     1) // __kmpc_atomic_float10_div_cpt
2851 #if KMP_HAVE_QUAD
2852 // routines for _Quad type
2853 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2854                     1) // __kmpc_atomic_float16_add_cpt
2855 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2856                     1) // __kmpc_atomic_float16_sub_cpt
2857 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2858                     1) // __kmpc_atomic_float16_mul_cpt
2859 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2860                     1) // __kmpc_atomic_float16_div_cpt
2861 #if (KMP_ARCH_X86)
2862 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2863                     1) // __kmpc_atomic_float16_add_a16_cpt
2864 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2865                     1) // __kmpc_atomic_float16_sub_a16_cpt
2866 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2867                     1) // __kmpc_atomic_float16_mul_a16_cpt
2868 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2869                     1) // __kmpc_atomic_float16_div_a16_cpt
2870 #endif // (KMP_ARCH_X86)
2871 #endif // KMP_HAVE_QUAD
2872 
2873 // routines for complex types
2874 
2875 // cmplx4 routines to return void
2876 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2877                         1) // __kmpc_atomic_cmplx4_add_cpt
2878 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2879                         1) // __kmpc_atomic_cmplx4_sub_cpt
2880 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2881                         1) // __kmpc_atomic_cmplx4_mul_cpt
2882 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2883                         1) // __kmpc_atomic_cmplx4_div_cpt
2884 
2885 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2886                     1) // __kmpc_atomic_cmplx8_add_cpt
2887 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2888                     1) // __kmpc_atomic_cmplx8_sub_cpt
2889 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2890                     1) // __kmpc_atomic_cmplx8_mul_cpt
2891 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2892                     1) // __kmpc_atomic_cmplx8_div_cpt
2893 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2894                     1) // __kmpc_atomic_cmplx10_add_cpt
2895 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2896                     1) // __kmpc_atomic_cmplx10_sub_cpt
2897 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2898                     1) // __kmpc_atomic_cmplx10_mul_cpt
2899 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2900                     1) // __kmpc_atomic_cmplx10_div_cpt
2901 #if KMP_HAVE_QUAD
2902 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2903                     1) // __kmpc_atomic_cmplx16_add_cpt
2904 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2905                     1) // __kmpc_atomic_cmplx16_sub_cpt
2906 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2907                     1) // __kmpc_atomic_cmplx16_mul_cpt
2908 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2909                     1) // __kmpc_atomic_cmplx16_div_cpt
2910 #if (KMP_ARCH_X86)
2911 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2912                     1) // __kmpc_atomic_cmplx16_add_a16_cpt
2913 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2914                     1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2915 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2916                     1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2917 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2918                     1) // __kmpc_atomic_cmplx16_div_a16_cpt
2919 #endif // (KMP_ARCH_X86)
2920 #endif // KMP_HAVE_QUAD
2921 
2922 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2923 // binop x; v = x; }  for non-commutative operations.
2924 // Supported only on IA-32 architecture and Intel(R) 64
2925 
2926 // -------------------------------------------------------------------------
2927 // Operation on *lhs, rhs bound by critical section
2928 //     OP     - operator (it's supposed to contain an assignment)
2929 //     LCK_ID - lock identifier
2930 // Note: don't check gtid as it should always be valid
2931 // 1, 2-byte - expect valid parameter, other - check before this macro
2932 #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID)                                  \
2933   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2934                                                                                \
2935   if (flag) {                                                                  \
2936     /*temp_val = (*lhs);*/                                                     \
2937     (*lhs) = (TYPE)((rhs)OP(*lhs));                                            \
2938     new_value = (*lhs);                                                        \
2939   } else {                                                                     \
2940     new_value = (*lhs);                                                        \
2941     (*lhs) = (TYPE)((rhs)OP(*lhs));                                            \
2942   }                                                                            \
2943   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2944   return new_value;
2945 
2946 // ------------------------------------------------------------------------
2947 #ifdef KMP_GOMP_COMPAT
2948 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)                               \
2949   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2950     KMP_CHECK_GTID;                                                            \
2951     OP_CRITICAL_CPT_REV(TYPE, OP, 0);                                          \
2952   }
2953 #else
2954 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
2955 #endif /* KMP_GOMP_COMPAT */
2956 
2957 // ------------------------------------------------------------------------
2958 // Operation on *lhs, rhs using "compare_and_store" routine
2959 //     TYPE    - operands' type
2960 //     BITS    - size in bits, used to distinguish low level calls
2961 //     OP      - operator
2962 // Note: temp_val introduced in order to force the compiler to read
2963 //       *lhs only once (w/o it the compiler reads *lhs twice)
2964 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                     \
2965   {                                                                            \
2966     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2967     TYPE old_value, new_value;                                                 \
2968     temp_val = *lhs;                                                           \
2969     old_value = temp_val;                                                      \
2970     new_value = (TYPE)(rhs OP old_value);                                      \
2971     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2972         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2973         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2974       temp_val = *lhs;                                                         \
2975       old_value = temp_val;                                                    \
2976       new_value = (TYPE)(rhs OP old_value);                                    \
2977     }                                                                          \
2978     if (flag) {                                                                \
2979       return new_value;                                                        \
2980     } else                                                                     \
2981       return old_value;                                                        \
2982   }
2983 
2984 // -------------------------------------------------------------------------
2985 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)      \
2986   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2987   TYPE new_value;                                                              \
2988   (void)new_value;                                                             \
2989   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
2990   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
2991   }
2992 
2993 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2994                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2995 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2996                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2997 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2998                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2999 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
3000                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
3001 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
3002                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
3003 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
3004                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
3005 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
3006                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
3007 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
3008                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
3009 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
3010                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
3011 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
3012                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
3013 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
3014                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
3015 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
3016                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
3017 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
3018                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
3019 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
3020                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
3021 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
3022                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
3023 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
3024                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
3025 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
3026                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
3027 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
3028                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
3029 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
3030                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
3031 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
3032                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
3033 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
3034                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
3035 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
3036                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
3037 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
3038                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
3039 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
3040                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
3041 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
3042                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
3043 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
3044                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
3045 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
3046                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
3047 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
3048                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
3049 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
3050 
3051 // ------------------------------------------------------------------------
3052 // Routines for Extended types: long double, _Quad, complex flavours (use
3053 // critical section)
3054 //     TYPE_ID, OP_ID, TYPE - detailed above
3055 //     OP      - operator
3056 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
3057 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
3058   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
3059   TYPE new_value;                                                              \
3060   /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/                   \
3061   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
3062   OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID)                                        \
3063   }
3064 
3065 /* ------------------------------------------------------------------------- */
3066 // routines for long double type
3067 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
3068                         1) // __kmpc_atomic_float10_sub_cpt_rev
3069 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
3070                         1) // __kmpc_atomic_float10_div_cpt_rev
3071 #if KMP_HAVE_QUAD
3072 // routines for _Quad type
3073 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
3074                         1) // __kmpc_atomic_float16_sub_cpt_rev
3075 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3076                         1) // __kmpc_atomic_float16_div_cpt_rev
3077 #if (KMP_ARCH_X86)
3078 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3079                         1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3080 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3081                         1) // __kmpc_atomic_float16_div_a16_cpt_rev
3082 #endif // (KMP_ARCH_X86)
3083 #endif // KMP_HAVE_QUAD
3084 
3085 // routines for complex types
3086 
3087 // ------------------------------------------------------------------------
3088 // Workaround for cmplx4. Regular routines with return value don't work
3089 // on Win_32e. Let's return captured values through the additional parameter.
3090 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                    \
3091   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3092                                                                                \
3093   if (flag) {                                                                  \
3094     (*lhs) = (rhs)OP(*lhs);                                                    \
3095     (*out) = (*lhs);                                                           \
3096   } else {                                                                     \
3097     (*out) = (*lhs);                                                           \
3098     (*lhs) = (rhs)OP(*lhs);                                                    \
3099   }                                                                            \
3100                                                                                \
3101   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3102   return;
3103 // ------------------------------------------------------------------------
3104 
3105 #ifdef KMP_GOMP_COMPAT
3106 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)                                 \
3107   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3108     KMP_CHECK_GTID;                                                            \
3109     OP_CRITICAL_CPT_REV_WRK(OP, 0);                                            \
3110   }
3111 #else
3112 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3113 #endif /* KMP_GOMP_COMPAT */
3114 // ------------------------------------------------------------------------
3115 
3116 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID,          \
3117                                     GOMP_FLAG)                                 \
3118   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
3119   OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG)                                  \
3120   OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                          \
3121   }
3122 // The end of workaround for cmplx4
3123 
3124 // !!! TODO: check if we need to return void for cmplx4 routines
3125 // cmplx4 routines to return void
3126 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3127                             1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3128 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3129                             1) // __kmpc_atomic_cmplx4_div_cpt_rev
3130 
3131 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3132                         1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3133 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3134                         1) // __kmpc_atomic_cmplx8_div_cpt_rev
3135 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3136                         1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3137 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3138                         1) // __kmpc_atomic_cmplx10_div_cpt_rev
3139 #if KMP_HAVE_QUAD
3140 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3141                         1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3142 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3143                         1) // __kmpc_atomic_cmplx16_div_cpt_rev
3144 #if (KMP_ARCH_X86)
3145 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3146                         1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3147 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3148                         1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3149 #endif // (KMP_ARCH_X86)
3150 #endif // KMP_HAVE_QUAD
3151 
3152 // Capture reverse for mixed type: RHS=float16
3153 #if KMP_HAVE_QUAD
3154 
3155 // Beginning of a definition (provides name, parameters, gebug trace)
3156 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3157 //     fixed)
3158 //     OP_ID   - operation identifier (add, sub, mul, ...)
3159 //     TYPE    - operands' type
3160 // -------------------------------------------------------------------------
3161 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,   \
3162                                    RTYPE, LCK_ID, MASK, GOMP_FLAG)             \
3163   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3164   TYPE new_value;                                                              \
3165   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
3166   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
3167   }
3168 
3169 // -------------------------------------------------------------------------
3170 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3171                                     LCK_ID, GOMP_FLAG)                         \
3172   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3173   TYPE new_value;                                                              \
3174   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */          \
3175   OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */                  \
3176   }
3177 
3178 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3179                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3180 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3181                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3182 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3183                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3184 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3185                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3186 
3187 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3188                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3189 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3190                            1,
3191                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3192 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3193                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3194 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3195                            1,
3196                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3197 
3198 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3199                            3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3200 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3201                            4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3202 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3203                            3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3204 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3205                            4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3206 
3207 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3208                            7,
3209                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3210 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3211                            8i, 7,
3212                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3213 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3214                            7,
3215                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3216 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3217                            8i, 7,
3218                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3219 
3220 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3221                            4r, 3,
3222                            KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3223 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3224                            4r, 3,
3225                            KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3226 
3227 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3228                            8r, 7,
3229                            KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3230 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3231                            8r, 7,
3232                            KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3233 
3234 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3235                             10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3236 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3237                             10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3238 
3239 #endif // KMP_HAVE_QUAD
3240 
3241 //   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3242 
3243 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                        \
3244   TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3245                                      TYPE rhs) {                               \
3246     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3247     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3248 
3249 #define CRITICAL_SWP(LCK_ID)                                                   \
3250   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3251                                                                                \
3252   old_value = (*lhs);                                                          \
3253   (*lhs) = rhs;                                                                \
3254                                                                                \
3255   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3256   return old_value;
3257 
3258 // ------------------------------------------------------------------------
3259 #ifdef KMP_GOMP_COMPAT
3260 #define GOMP_CRITICAL_SWP(FLAG)                                                \
3261   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3262     KMP_CHECK_GTID;                                                            \
3263     CRITICAL_SWP(0);                                                           \
3264   }
3265 #else
3266 #define GOMP_CRITICAL_SWP(FLAG)
3267 #endif /* KMP_GOMP_COMPAT */
3268 
3269 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                        \
3270   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3271   TYPE old_value;                                                              \
3272   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3273   old_value = KMP_XCHG_FIXED##BITS(lhs, rhs);                                  \
3274   return old_value;                                                            \
3275   }
3276 // ------------------------------------------------------------------------
3277 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                  \
3278   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3279   TYPE old_value;                                                              \
3280   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3281   old_value = KMP_XCHG_REAL##BITS(lhs, rhs);                                   \
3282   return old_value;                                                            \
3283   }
3284 
3285 // ------------------------------------------------------------------------
3286 #define CMPXCHG_SWP(TYPE, BITS)                                                \
3287   {                                                                            \
3288     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
3289     TYPE old_value, new_value;                                                 \
3290     temp_val = *lhs;                                                           \
3291     old_value = temp_val;                                                      \
3292     new_value = rhs;                                                           \
3293     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
3294         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
3295         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
3296       temp_val = *lhs;                                                         \
3297       old_value = temp_val;                                                    \
3298       new_value = rhs;                                                         \
3299     }                                                                          \
3300     return old_value;                                                          \
3301   }
3302 
3303 // -------------------------------------------------------------------------
3304 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                     \
3305   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3306   TYPE old_value;                                                              \
3307   (void)old_value;                                                             \
3308   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3309   CMPXCHG_SWP(TYPE, BITS)                                                      \
3310   }
3311 
3312 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3313 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3314 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3315 
3316 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3317                       KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3318 
3319 #if (KMP_ARCH_X86)
3320 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3321                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3322 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3323                    KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3324 #else
3325 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3326 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3327                       KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3328 #endif // (KMP_ARCH_X86)
3329 
3330 // ------------------------------------------------------------------------
3331 // Routines for Extended types: long double, _Quad, complex flavours (use
3332 // critical section)
3333 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)                  \
3334   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3335   TYPE old_value;                                                              \
3336   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3337   CRITICAL_SWP(LCK_ID)                                                         \
3338   }
3339 
3340 // ------------------------------------------------------------------------
3341 // !!! TODO: check if we need to return void for cmplx4 routines
3342 // Workaround for cmplx4. Regular routines with return value don't work
3343 // on Win_32e. Let's return captured values through the additional parameter.
3344 
3345 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                    \
3346   void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3347                                      TYPE rhs, TYPE *out) {                    \
3348     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3349     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3350 
3351 #define CRITICAL_SWP_WRK(LCK_ID)                                               \
3352   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3353                                                                                \
3354   tmp = (*lhs);                                                                \
3355   (*lhs) = (rhs);                                                              \
3356   (*out) = tmp;                                                                \
3357   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3358   return;
3359 // ------------------------------------------------------------------------
3360 
3361 #ifdef KMP_GOMP_COMPAT
3362 #define GOMP_CRITICAL_SWP_WRK(FLAG)                                            \
3363   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3364     KMP_CHECK_GTID;                                                            \
3365     CRITICAL_SWP_WRK(0);                                                       \
3366   }
3367 #else
3368 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3369 #endif /* KMP_GOMP_COMPAT */
3370 // ------------------------------------------------------------------------
3371 
3372 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)              \
3373   ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                          \
3374   TYPE tmp;                                                                    \
3375   GOMP_CRITICAL_SWP_WRK(GOMP_FLAG)                                             \
3376   CRITICAL_SWP_WRK(LCK_ID)                                                     \
3377   }
3378 // The end of workaround for cmplx4
3379 
3380 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3381 #if KMP_HAVE_QUAD
3382 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3383 #endif // KMP_HAVE_QUAD
3384 // cmplx4 routine to return void
3385 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3386 
3387 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32,  8c,   1 )           //
3388 // __kmpc_atomic_cmplx4_swp
3389 
3390 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3391 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3392 #if KMP_HAVE_QUAD
3393 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3394 #if (KMP_ARCH_X86)
3395 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3396                     1) // __kmpc_atomic_float16_a16_swp
3397 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3398                     1) // __kmpc_atomic_cmplx16_a16_swp
3399 #endif // (KMP_ARCH_X86)
3400 #endif // KMP_HAVE_QUAD
3401 
3402 // End of OpenMP 4.0 Capture
3403 
3404 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3405 
3406 #undef OP_CRITICAL
3407 
3408 /* ------------------------------------------------------------------------ */
3409 /* Generic atomic routines                                                  */
3410 
3411 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3412                      void (*f)(void *, void *, void *)) {
3413   KMP_DEBUG_ASSERT(__kmp_init_serial);
3414 
3415   if (
3416 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3417       FALSE /* must use lock */
3418 #else
3419       TRUE
3420 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3421   ) {
3422     kmp_int8 old_value, new_value;
3423 
3424     old_value = *(kmp_int8 *)lhs;
3425     (*f)(&new_value, &old_value, rhs);
3426 
3427     /* TODO: Should this be acquire or release? */
3428     while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3429                                        *(kmp_int8 *)&new_value)) {
3430       KMP_CPU_PAUSE();
3431 
3432       old_value = *(kmp_int8 *)lhs;
3433       (*f)(&new_value, &old_value, rhs);
3434     }
3435 
3436     return;
3437   } else {
3438     // All 1-byte data is of integer data type.
3439 
3440 #ifdef KMP_GOMP_COMPAT
3441     if (__kmp_atomic_mode == 2) {
3442       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3443     } else
3444 #endif /* KMP_GOMP_COMPAT */
3445       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3446 
3447     (*f)(lhs, lhs, rhs);
3448 
3449 #ifdef KMP_GOMP_COMPAT
3450     if (__kmp_atomic_mode == 2) {
3451       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3452     } else
3453 #endif /* KMP_GOMP_COMPAT */
3454       __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3455   }
3456 }
3457 
3458 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3459                      void (*f)(void *, void *, void *)) {
3460   if (
3461 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3462       FALSE /* must use lock */
3463 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3464       TRUE /* no alignment problems */
3465 #else
3466       !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3467 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3468   ) {
3469     kmp_int16 old_value, new_value;
3470 
3471     old_value = *(kmp_int16 *)lhs;
3472     (*f)(&new_value, &old_value, rhs);
3473 
3474     /* TODO: Should this be acquire or release? */
3475     while (!KMP_COMPARE_AND_STORE_ACQ16(
3476         (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3477       KMP_CPU_PAUSE();
3478 
3479       old_value = *(kmp_int16 *)lhs;
3480       (*f)(&new_value, &old_value, rhs);
3481     }
3482 
3483     return;
3484   } else {
3485     // All 2-byte data is of integer data type.
3486 
3487 #ifdef KMP_GOMP_COMPAT
3488     if (__kmp_atomic_mode == 2) {
3489       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3490     } else
3491 #endif /* KMP_GOMP_COMPAT */
3492       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3493 
3494     (*f)(lhs, lhs, rhs);
3495 
3496 #ifdef KMP_GOMP_COMPAT
3497     if (__kmp_atomic_mode == 2) {
3498       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3499     } else
3500 #endif /* KMP_GOMP_COMPAT */
3501       __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3502   }
3503 }
3504 
3505 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3506                      void (*f)(void *, void *, void *)) {
3507   KMP_DEBUG_ASSERT(__kmp_init_serial);
3508 
3509   if (
3510 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3511 // Gomp compatibility is broken if this routine is called for floats.
3512 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3513       TRUE /* no alignment problems */
3514 #else
3515       !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3516 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3517   ) {
3518     kmp_int32 old_value, new_value;
3519 
3520     old_value = *(kmp_int32 *)lhs;
3521     (*f)(&new_value, &old_value, rhs);
3522 
3523     /* TODO: Should this be acquire or release? */
3524     while (!KMP_COMPARE_AND_STORE_ACQ32(
3525         (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3526       KMP_CPU_PAUSE();
3527 
3528       old_value = *(kmp_int32 *)lhs;
3529       (*f)(&new_value, &old_value, rhs);
3530     }
3531 
3532     return;
3533   } else {
3534     // Use __kmp_atomic_lock_4i for all 4-byte data,
3535     // even if it isn't of integer data type.
3536 
3537 #ifdef KMP_GOMP_COMPAT
3538     if (__kmp_atomic_mode == 2) {
3539       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3540     } else
3541 #endif /* KMP_GOMP_COMPAT */
3542       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3543 
3544     (*f)(lhs, lhs, rhs);
3545 
3546 #ifdef KMP_GOMP_COMPAT
3547     if (__kmp_atomic_mode == 2) {
3548       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3549     } else
3550 #endif /* KMP_GOMP_COMPAT */
3551       __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3552   }
3553 }
3554 
3555 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3556                      void (*f)(void *, void *, void *)) {
3557   KMP_DEBUG_ASSERT(__kmp_init_serial);
3558   if (
3559 
3560 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3561       FALSE /* must use lock */
3562 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3563       TRUE /* no alignment problems */
3564 #else
3565       !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3566 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3567   ) {
3568     kmp_int64 old_value, new_value;
3569 
3570     old_value = *(kmp_int64 *)lhs;
3571     (*f)(&new_value, &old_value, rhs);
3572     /* TODO: Should this be acquire or release? */
3573     while (!KMP_COMPARE_AND_STORE_ACQ64(
3574         (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3575       KMP_CPU_PAUSE();
3576 
3577       old_value = *(kmp_int64 *)lhs;
3578       (*f)(&new_value, &old_value, rhs);
3579     }
3580 
3581     return;
3582   } else {
3583     // Use __kmp_atomic_lock_8i for all 8-byte data,
3584     // even if it isn't of integer data type.
3585 
3586 #ifdef KMP_GOMP_COMPAT
3587     if (__kmp_atomic_mode == 2) {
3588       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3589     } else
3590 #endif /* KMP_GOMP_COMPAT */
3591       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3592 
3593     (*f)(lhs, lhs, rhs);
3594 
3595 #ifdef KMP_GOMP_COMPAT
3596     if (__kmp_atomic_mode == 2) {
3597       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3598     } else
3599 #endif /* KMP_GOMP_COMPAT */
3600       __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3601   }
3602 }
3603 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3604 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3605                       void (*f)(void *, void *, void *)) {
3606   KMP_DEBUG_ASSERT(__kmp_init_serial);
3607 
3608 #ifdef KMP_GOMP_COMPAT
3609   if (__kmp_atomic_mode == 2) {
3610     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3611   } else
3612 #endif /* KMP_GOMP_COMPAT */
3613     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3614 
3615   (*f)(lhs, lhs, rhs);
3616 
3617 #ifdef KMP_GOMP_COMPAT
3618   if (__kmp_atomic_mode == 2) {
3619     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3620   } else
3621 #endif /* KMP_GOMP_COMPAT */
3622     __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3623 }
3624 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3625 
3626 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3627                       void (*f)(void *, void *, void *)) {
3628   KMP_DEBUG_ASSERT(__kmp_init_serial);
3629 
3630 #ifdef KMP_GOMP_COMPAT
3631   if (__kmp_atomic_mode == 2) {
3632     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3633   } else
3634 #endif /* KMP_GOMP_COMPAT */
3635     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3636 
3637   (*f)(lhs, lhs, rhs);
3638 
3639 #ifdef KMP_GOMP_COMPAT
3640   if (__kmp_atomic_mode == 2) {
3641     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3642   } else
3643 #endif /* KMP_GOMP_COMPAT */
3644     __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3645 }
3646 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3647 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3648                       void (*f)(void *, void *, void *)) {
3649   KMP_DEBUG_ASSERT(__kmp_init_serial);
3650 
3651 #ifdef KMP_GOMP_COMPAT
3652   if (__kmp_atomic_mode == 2) {
3653     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3654   } else
3655 #endif /* KMP_GOMP_COMPAT */
3656     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3657 
3658   (*f)(lhs, lhs, rhs);
3659 
3660 #ifdef KMP_GOMP_COMPAT
3661   if (__kmp_atomic_mode == 2) {
3662     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3663   } else
3664 #endif /* KMP_GOMP_COMPAT */
3665     __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3666 }
3667 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3668 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3669                       void (*f)(void *, void *, void *)) {
3670   KMP_DEBUG_ASSERT(__kmp_init_serial);
3671 
3672 #ifdef KMP_GOMP_COMPAT
3673   if (__kmp_atomic_mode == 2) {
3674     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3675   } else
3676 #endif /* KMP_GOMP_COMPAT */
3677     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3678 
3679   (*f)(lhs, lhs, rhs);
3680 
3681 #ifdef KMP_GOMP_COMPAT
3682   if (__kmp_atomic_mode == 2) {
3683     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3684   } else
3685 #endif /* KMP_GOMP_COMPAT */
3686     __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3687 }
3688 
3689 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3690 // compiler; duplicated in order to not use 3-party names in pure Intel code
3691 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3692 void __kmpc_atomic_start(void) {
3693   int gtid = __kmp_entry_gtid();
3694   KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3695   __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3696 }
3697 
3698 void __kmpc_atomic_end(void) {
3699   int gtid = __kmp_get_gtid();
3700   KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3701   __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3702 }
3703 
3704 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3705 
3706 // OpenMP 5.1 compare and swap
3707 
3708 /*!
3709 @param loc Source code location
3710 @param gtid Global thread id
3711 @param x Memory location to operate on
3712 @param e Expected value
3713 @param d Desired value
3714 @return Result of comparison
3715 
3716 Implements Compare And Swap atomic operation.
3717 
3718 Sample code:
3719 #pragma omp atomic compare update capture
3720   { r = x == e; if(r) { x = d; } }
3721 */
3722 bool __kmpc_atomic_bool_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3723   return KMP_COMPARE_AND_STORE_ACQ8(x, e, d);
3724 }
3725 bool __kmpc_atomic_bool_2_cas(ident_t *loc, int gtid, short *x, short e,
3726                               short d) {
3727   return KMP_COMPARE_AND_STORE_ACQ16(x, e, d);
3728 }
3729 bool __kmpc_atomic_bool_4_cas(ident_t *loc, int gtid, kmp_int32 *x, kmp_int32 e,
3730                               kmp_int32 d) {
3731   return KMP_COMPARE_AND_STORE_ACQ32(x, e, d);
3732 }
3733 bool __kmpc_atomic_bool_8_cas(ident_t *loc, int gtid, kmp_int64 *x, kmp_int64 e,
3734                               kmp_int64 d) {
3735   return KMP_COMPARE_AND_STORE_ACQ64(x, e, d);
3736 }
3737 
3738 /*!
3739 @param loc Source code location
3740 @param gtid Global thread id
3741 @param x Memory location to operate on
3742 @param e Expected value
3743 @param d Desired value
3744 @return Old value of x
3745 
3746 Implements Compare And Swap atomic operation.
3747 
3748 Sample code:
3749 #pragma omp atomic compare update capture
3750   { v = x; if (x == e) { x = d; } }
3751 */
3752 char __kmpc_atomic_val_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3753   return KMP_COMPARE_AND_STORE_RET8(x, e, d);
3754 }
3755 short __kmpc_atomic_val_2_cas(ident_t *loc, int gtid, short *x, short e,
3756                               short d) {
3757   return KMP_COMPARE_AND_STORE_RET16(x, e, d);
3758 }
3759 kmp_int32 __kmpc_atomic_val_4_cas(ident_t *loc, int gtid, kmp_int32 *x,
3760                                   kmp_int32 e, kmp_int32 d) {
3761   return KMP_COMPARE_AND_STORE_RET32(x, e, d);
3762 }
3763 kmp_int64 __kmpc_atomic_val_8_cas(ident_t *loc, int gtid, kmp_int64 *x,
3764                                   kmp_int64 e, kmp_int64 d) {
3765   return KMP_COMPARE_AND_STORE_RET64(x, e, d);
3766 }
3767 
3768 /*!
3769 @param loc Source code location
3770 @param gtid Global thread id
3771 @param x Memory location to operate on
3772 @param e Expected value
3773 @param d Desired value
3774 @param pv Captured value location
3775 @return Result of comparison
3776 
3777 Implements Compare And Swap + Capture atomic operation.
3778 
3779 v gets old valie of x if comparison failed, untouched otherwise.
3780 Sample code:
3781 #pragma omp atomic compare update capture
3782   { r = x == e; if(r) { x = d; } else { v = x; } }
3783 */
3784 bool __kmpc_atomic_bool_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3785                                   char d, char *pv) {
3786   char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3787   if (old == e)
3788     return true;
3789   KMP_ASSERT(pv != NULL);
3790   *pv = old;
3791   return false;
3792 }
3793 bool __kmpc_atomic_bool_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3794                                   short d, short *pv) {
3795   short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3796   if (old == e)
3797     return true;
3798   KMP_ASSERT(pv != NULL);
3799   *pv = old;
3800   return false;
3801 }
3802 bool __kmpc_atomic_bool_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3803                                   kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3804   kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3805   if (old == e)
3806     return true;
3807   KMP_ASSERT(pv != NULL);
3808   *pv = old;
3809   return false;
3810 }
3811 bool __kmpc_atomic_bool_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3812                                   kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3813   kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3814   if (old == e)
3815     return true;
3816   KMP_ASSERT(pv != NULL);
3817   *pv = old;
3818   return false;
3819 }
3820 
3821 /*!
3822 @param loc Source code location
3823 @param gtid Global thread id
3824 @param x Memory location to operate on
3825 @param e Expected value
3826 @param d Desired value
3827 @param pv Captured value location
3828 @return Old value of x
3829 
3830 Implements Compare And Swap + Capture atomic operation.
3831 
3832 v gets new valie of x.
3833 Sample code:
3834 #pragma omp atomic compare update capture
3835   { if (x == e) { x = d; }; v = x; }
3836 */
3837 char __kmpc_atomic_val_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3838                                  char d, char *pv) {
3839   char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3840   KMP_ASSERT(pv != NULL);
3841   *pv = old == e ? d : old;
3842   return old;
3843 }
3844 short __kmpc_atomic_val_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3845                                   short d, short *pv) {
3846   short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3847   KMP_ASSERT(pv != NULL);
3848   *pv = old == e ? d : old;
3849   return old;
3850 }
3851 kmp_int32 __kmpc_atomic_val_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3852                                       kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3853   kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3854   KMP_ASSERT(pv != NULL);
3855   *pv = old == e ? d : old;
3856   return old;
3857 }
3858 kmp_int64 __kmpc_atomic_val_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3859                                       kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3860   kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3861   KMP_ASSERT(pv != NULL);
3862   *pv = old == e ? d : old;
3863   return old;
3864 }
3865 
3866 // End OpenMP 5.1 compare + capture
3867 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3868 
3869 /*!
3870 @}
3871 */
3872 
3873 // end of file
3874