xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_atomic.cpp (revision 6f63e88c0166ed3e5f2805a9e667c7d24d304cf1)
1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp_atomic.h"
14 #include "kmp.h" // TRUE, asm routines prototypes
15 
16 typedef unsigned char uchar;
17 typedef unsigned short ushort;
18 
19 /*!
20 @defgroup ATOMIC_OPS Atomic Operations
21 These functions are used for implementing the many different varieties of atomic
22 operations.
23 
24 The compiler is at liberty to inline atomic operations that are naturally
25 supported by the target architecture. For instance on IA-32 architecture an
26 atomic like this can be inlined
27 @code
28 static int s = 0;
29 #pragma omp atomic
30     s++;
31 @endcode
32 using the single instruction: `lock; incl s`
33 
34 However the runtime does provide entrypoints for these operations to support
35 compilers that choose not to inline them. (For instance,
36 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
37 
38 The names of the functions are encoded by using the data type name and the
39 operation name, as in these tables.
40 
41 Data Type  | Data type encoding
42 -----------|---------------
43 int8_t     | `fixed1`
44 uint8_t    | `fixed1u`
45 int16_t    | `fixed2`
46 uint16_t   | `fixed2u`
47 int32_t    | `fixed4`
48 uint32_t   | `fixed4u`
49 int32_t    | `fixed8`
50 uint32_t   | `fixed8u`
51 float      | `float4`
52 double     | `float8`
53 float 10 (8087 eighty bit float)  | `float10`
54 complex<float>   |  `cmplx4`
55 complex<double>  | `cmplx8`
56 complex<float10> | `cmplx10`
57 <br>
58 
59 Operation | Operation encoding
60 ----------|-------------------
61 + | add
62 - | sub
63 \* | mul
64 / | div
65 & | andb
66 << | shl
67 \>\> | shr
68 \| | orb
69 ^  | xor
70 && | andl
71 \|\| | orl
72 maximum | max
73 minimum | min
74 .eqv.   | eqv
75 .neqv.  | neqv
76 
77 <br>
78 For non-commutative operations, `_rev` can also be added for the reversed
79 operation. For the functions that capture the result, the suffix `_cpt` is
80 added.
81 
82 Update Functions
83 ================
84 The general form of an atomic function that just performs an update (without a
85 `capture`)
86 @code
87 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
88 lhs, TYPE rhs );
89 @endcode
90 @param ident_t  a pointer to source location
91 @param gtid  the global thread id
92 @param lhs   a pointer to the left operand
93 @param rhs   the right operand
94 
95 `capture` functions
96 ===================
97 The capture functions perform an atomic update and return a result, which is
98 either the value before the capture, or that after. They take an additional
99 argument to determine which result is returned.
100 Their general form is therefore
101 @code
102 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
103 lhs, TYPE rhs, int flag );
104 @endcode
105 @param ident_t  a pointer to source location
106 @param gtid  the global thread id
107 @param lhs   a pointer to the left operand
108 @param rhs   the right operand
109 @param flag  one if the result is to be captured *after* the operation, zero if
110 captured *before*.
111 
112 The one set of exceptions to this is the `complex<float>` type where the value
113 is not returned, rather an extra argument pointer is passed.
114 
115 They look like
116 @code
117 void __kmpc_atomic_cmplx4_<op>_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 *
118 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
119 @endcode
120 
121 Read and Write Operations
122 =========================
123 The OpenMP<sup>*</sup> standard now supports atomic operations that simply
124 ensure that the value is read or written atomically, with no modification
125 performed. In many cases on IA-32 architecture these operations can be inlined
126 since the architecture guarantees that no tearing occurs on aligned objects
127 accessed with a single memory operation of up to 64 bits in size.
128 
129 The general form of the read operations is
130 @code
131 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
132 @endcode
133 
134 For the write operations the form is
135 @code
136 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs
137 );
138 @endcode
139 
140 Full list of functions
141 ======================
142 This leads to the generation of 376 atomic functions, as follows.
143 
144 Functions for integers
145 ---------------------
146 There are versions here for integers of size 1,2,4 and 8 bytes both signed and
147 unsigned (where that matters).
148 @code
149     __kmpc_atomic_fixed1_add
150     __kmpc_atomic_fixed1_add_cpt
151     __kmpc_atomic_fixed1_add_fp
152     __kmpc_atomic_fixed1_andb
153     __kmpc_atomic_fixed1_andb_cpt
154     __kmpc_atomic_fixed1_andl
155     __kmpc_atomic_fixed1_andl_cpt
156     __kmpc_atomic_fixed1_div
157     __kmpc_atomic_fixed1_div_cpt
158     __kmpc_atomic_fixed1_div_cpt_rev
159     __kmpc_atomic_fixed1_div_float8
160     __kmpc_atomic_fixed1_div_fp
161     __kmpc_atomic_fixed1_div_rev
162     __kmpc_atomic_fixed1_eqv
163     __kmpc_atomic_fixed1_eqv_cpt
164     __kmpc_atomic_fixed1_max
165     __kmpc_atomic_fixed1_max_cpt
166     __kmpc_atomic_fixed1_min
167     __kmpc_atomic_fixed1_min_cpt
168     __kmpc_atomic_fixed1_mul
169     __kmpc_atomic_fixed1_mul_cpt
170     __kmpc_atomic_fixed1_mul_float8
171     __kmpc_atomic_fixed1_mul_fp
172     __kmpc_atomic_fixed1_neqv
173     __kmpc_atomic_fixed1_neqv_cpt
174     __kmpc_atomic_fixed1_orb
175     __kmpc_atomic_fixed1_orb_cpt
176     __kmpc_atomic_fixed1_orl
177     __kmpc_atomic_fixed1_orl_cpt
178     __kmpc_atomic_fixed1_rd
179     __kmpc_atomic_fixed1_shl
180     __kmpc_atomic_fixed1_shl_cpt
181     __kmpc_atomic_fixed1_shl_cpt_rev
182     __kmpc_atomic_fixed1_shl_rev
183     __kmpc_atomic_fixed1_shr
184     __kmpc_atomic_fixed1_shr_cpt
185     __kmpc_atomic_fixed1_shr_cpt_rev
186     __kmpc_atomic_fixed1_shr_rev
187     __kmpc_atomic_fixed1_sub
188     __kmpc_atomic_fixed1_sub_cpt
189     __kmpc_atomic_fixed1_sub_cpt_rev
190     __kmpc_atomic_fixed1_sub_fp
191     __kmpc_atomic_fixed1_sub_rev
192     __kmpc_atomic_fixed1_swp
193     __kmpc_atomic_fixed1_wr
194     __kmpc_atomic_fixed1_xor
195     __kmpc_atomic_fixed1_xor_cpt
196     __kmpc_atomic_fixed1u_add_fp
197     __kmpc_atomic_fixed1u_sub_fp
198     __kmpc_atomic_fixed1u_mul_fp
199     __kmpc_atomic_fixed1u_div
200     __kmpc_atomic_fixed1u_div_cpt
201     __kmpc_atomic_fixed1u_div_cpt_rev
202     __kmpc_atomic_fixed1u_div_fp
203     __kmpc_atomic_fixed1u_div_rev
204     __kmpc_atomic_fixed1u_shr
205     __kmpc_atomic_fixed1u_shr_cpt
206     __kmpc_atomic_fixed1u_shr_cpt_rev
207     __kmpc_atomic_fixed1u_shr_rev
208     __kmpc_atomic_fixed2_add
209     __kmpc_atomic_fixed2_add_cpt
210     __kmpc_atomic_fixed2_add_fp
211     __kmpc_atomic_fixed2_andb
212     __kmpc_atomic_fixed2_andb_cpt
213     __kmpc_atomic_fixed2_andl
214     __kmpc_atomic_fixed2_andl_cpt
215     __kmpc_atomic_fixed2_div
216     __kmpc_atomic_fixed2_div_cpt
217     __kmpc_atomic_fixed2_div_cpt_rev
218     __kmpc_atomic_fixed2_div_float8
219     __kmpc_atomic_fixed2_div_fp
220     __kmpc_atomic_fixed2_div_rev
221     __kmpc_atomic_fixed2_eqv
222     __kmpc_atomic_fixed2_eqv_cpt
223     __kmpc_atomic_fixed2_max
224     __kmpc_atomic_fixed2_max_cpt
225     __kmpc_atomic_fixed2_min
226     __kmpc_atomic_fixed2_min_cpt
227     __kmpc_atomic_fixed2_mul
228     __kmpc_atomic_fixed2_mul_cpt
229     __kmpc_atomic_fixed2_mul_float8
230     __kmpc_atomic_fixed2_mul_fp
231     __kmpc_atomic_fixed2_neqv
232     __kmpc_atomic_fixed2_neqv_cpt
233     __kmpc_atomic_fixed2_orb
234     __kmpc_atomic_fixed2_orb_cpt
235     __kmpc_atomic_fixed2_orl
236     __kmpc_atomic_fixed2_orl_cpt
237     __kmpc_atomic_fixed2_rd
238     __kmpc_atomic_fixed2_shl
239     __kmpc_atomic_fixed2_shl_cpt
240     __kmpc_atomic_fixed2_shl_cpt_rev
241     __kmpc_atomic_fixed2_shl_rev
242     __kmpc_atomic_fixed2_shr
243     __kmpc_atomic_fixed2_shr_cpt
244     __kmpc_atomic_fixed2_shr_cpt_rev
245     __kmpc_atomic_fixed2_shr_rev
246     __kmpc_atomic_fixed2_sub
247     __kmpc_atomic_fixed2_sub_cpt
248     __kmpc_atomic_fixed2_sub_cpt_rev
249     __kmpc_atomic_fixed2_sub_fp
250     __kmpc_atomic_fixed2_sub_rev
251     __kmpc_atomic_fixed2_swp
252     __kmpc_atomic_fixed2_wr
253     __kmpc_atomic_fixed2_xor
254     __kmpc_atomic_fixed2_xor_cpt
255     __kmpc_atomic_fixed2u_add_fp
256     __kmpc_atomic_fixed2u_sub_fp
257     __kmpc_atomic_fixed2u_mul_fp
258     __kmpc_atomic_fixed2u_div
259     __kmpc_atomic_fixed2u_div_cpt
260     __kmpc_atomic_fixed2u_div_cpt_rev
261     __kmpc_atomic_fixed2u_div_fp
262     __kmpc_atomic_fixed2u_div_rev
263     __kmpc_atomic_fixed2u_shr
264     __kmpc_atomic_fixed2u_shr_cpt
265     __kmpc_atomic_fixed2u_shr_cpt_rev
266     __kmpc_atomic_fixed2u_shr_rev
267     __kmpc_atomic_fixed4_add
268     __kmpc_atomic_fixed4_add_cpt
269     __kmpc_atomic_fixed4_add_fp
270     __kmpc_atomic_fixed4_andb
271     __kmpc_atomic_fixed4_andb_cpt
272     __kmpc_atomic_fixed4_andl
273     __kmpc_atomic_fixed4_andl_cpt
274     __kmpc_atomic_fixed4_div
275     __kmpc_atomic_fixed4_div_cpt
276     __kmpc_atomic_fixed4_div_cpt_rev
277     __kmpc_atomic_fixed4_div_float8
278     __kmpc_atomic_fixed4_div_fp
279     __kmpc_atomic_fixed4_div_rev
280     __kmpc_atomic_fixed4_eqv
281     __kmpc_atomic_fixed4_eqv_cpt
282     __kmpc_atomic_fixed4_max
283     __kmpc_atomic_fixed4_max_cpt
284     __kmpc_atomic_fixed4_min
285     __kmpc_atomic_fixed4_min_cpt
286     __kmpc_atomic_fixed4_mul
287     __kmpc_atomic_fixed4_mul_cpt
288     __kmpc_atomic_fixed4_mul_float8
289     __kmpc_atomic_fixed4_mul_fp
290     __kmpc_atomic_fixed4_neqv
291     __kmpc_atomic_fixed4_neqv_cpt
292     __kmpc_atomic_fixed4_orb
293     __kmpc_atomic_fixed4_orb_cpt
294     __kmpc_atomic_fixed4_orl
295     __kmpc_atomic_fixed4_orl_cpt
296     __kmpc_atomic_fixed4_rd
297     __kmpc_atomic_fixed4_shl
298     __kmpc_atomic_fixed4_shl_cpt
299     __kmpc_atomic_fixed4_shl_cpt_rev
300     __kmpc_atomic_fixed4_shl_rev
301     __kmpc_atomic_fixed4_shr
302     __kmpc_atomic_fixed4_shr_cpt
303     __kmpc_atomic_fixed4_shr_cpt_rev
304     __kmpc_atomic_fixed4_shr_rev
305     __kmpc_atomic_fixed4_sub
306     __kmpc_atomic_fixed4_sub_cpt
307     __kmpc_atomic_fixed4_sub_cpt_rev
308     __kmpc_atomic_fixed4_sub_fp
309     __kmpc_atomic_fixed4_sub_rev
310     __kmpc_atomic_fixed4_swp
311     __kmpc_atomic_fixed4_wr
312     __kmpc_atomic_fixed4_xor
313     __kmpc_atomic_fixed4_xor_cpt
314     __kmpc_atomic_fixed4u_add_fp
315     __kmpc_atomic_fixed4u_sub_fp
316     __kmpc_atomic_fixed4u_mul_fp
317     __kmpc_atomic_fixed4u_div
318     __kmpc_atomic_fixed4u_div_cpt
319     __kmpc_atomic_fixed4u_div_cpt_rev
320     __kmpc_atomic_fixed4u_div_fp
321     __kmpc_atomic_fixed4u_div_rev
322     __kmpc_atomic_fixed4u_shr
323     __kmpc_atomic_fixed4u_shr_cpt
324     __kmpc_atomic_fixed4u_shr_cpt_rev
325     __kmpc_atomic_fixed4u_shr_rev
326     __kmpc_atomic_fixed8_add
327     __kmpc_atomic_fixed8_add_cpt
328     __kmpc_atomic_fixed8_add_fp
329     __kmpc_atomic_fixed8_andb
330     __kmpc_atomic_fixed8_andb_cpt
331     __kmpc_atomic_fixed8_andl
332     __kmpc_atomic_fixed8_andl_cpt
333     __kmpc_atomic_fixed8_div
334     __kmpc_atomic_fixed8_div_cpt
335     __kmpc_atomic_fixed8_div_cpt_rev
336     __kmpc_atomic_fixed8_div_float8
337     __kmpc_atomic_fixed8_div_fp
338     __kmpc_atomic_fixed8_div_rev
339     __kmpc_atomic_fixed8_eqv
340     __kmpc_atomic_fixed8_eqv_cpt
341     __kmpc_atomic_fixed8_max
342     __kmpc_atomic_fixed8_max_cpt
343     __kmpc_atomic_fixed8_min
344     __kmpc_atomic_fixed8_min_cpt
345     __kmpc_atomic_fixed8_mul
346     __kmpc_atomic_fixed8_mul_cpt
347     __kmpc_atomic_fixed8_mul_float8
348     __kmpc_atomic_fixed8_mul_fp
349     __kmpc_atomic_fixed8_neqv
350     __kmpc_atomic_fixed8_neqv_cpt
351     __kmpc_atomic_fixed8_orb
352     __kmpc_atomic_fixed8_orb_cpt
353     __kmpc_atomic_fixed8_orl
354     __kmpc_atomic_fixed8_orl_cpt
355     __kmpc_atomic_fixed8_rd
356     __kmpc_atomic_fixed8_shl
357     __kmpc_atomic_fixed8_shl_cpt
358     __kmpc_atomic_fixed8_shl_cpt_rev
359     __kmpc_atomic_fixed8_shl_rev
360     __kmpc_atomic_fixed8_shr
361     __kmpc_atomic_fixed8_shr_cpt
362     __kmpc_atomic_fixed8_shr_cpt_rev
363     __kmpc_atomic_fixed8_shr_rev
364     __kmpc_atomic_fixed8_sub
365     __kmpc_atomic_fixed8_sub_cpt
366     __kmpc_atomic_fixed8_sub_cpt_rev
367     __kmpc_atomic_fixed8_sub_fp
368     __kmpc_atomic_fixed8_sub_rev
369     __kmpc_atomic_fixed8_swp
370     __kmpc_atomic_fixed8_wr
371     __kmpc_atomic_fixed8_xor
372     __kmpc_atomic_fixed8_xor_cpt
373     __kmpc_atomic_fixed8u_add_fp
374     __kmpc_atomic_fixed8u_sub_fp
375     __kmpc_atomic_fixed8u_mul_fp
376     __kmpc_atomic_fixed8u_div
377     __kmpc_atomic_fixed8u_div_cpt
378     __kmpc_atomic_fixed8u_div_cpt_rev
379     __kmpc_atomic_fixed8u_div_fp
380     __kmpc_atomic_fixed8u_div_rev
381     __kmpc_atomic_fixed8u_shr
382     __kmpc_atomic_fixed8u_shr_cpt
383     __kmpc_atomic_fixed8u_shr_cpt_rev
384     __kmpc_atomic_fixed8u_shr_rev
385 @endcode
386 
387 Functions for floating point
388 ----------------------------
389 There are versions here for floating point numbers of size 4, 8, 10 and 16
390 bytes. (Ten byte floats are used by X87, but are now rare).
391 @code
392     __kmpc_atomic_float4_add
393     __kmpc_atomic_float4_add_cpt
394     __kmpc_atomic_float4_add_float8
395     __kmpc_atomic_float4_add_fp
396     __kmpc_atomic_float4_div
397     __kmpc_atomic_float4_div_cpt
398     __kmpc_atomic_float4_div_cpt_rev
399     __kmpc_atomic_float4_div_float8
400     __kmpc_atomic_float4_div_fp
401     __kmpc_atomic_float4_div_rev
402     __kmpc_atomic_float4_max
403     __kmpc_atomic_float4_max_cpt
404     __kmpc_atomic_float4_min
405     __kmpc_atomic_float4_min_cpt
406     __kmpc_atomic_float4_mul
407     __kmpc_atomic_float4_mul_cpt
408     __kmpc_atomic_float4_mul_float8
409     __kmpc_atomic_float4_mul_fp
410     __kmpc_atomic_float4_rd
411     __kmpc_atomic_float4_sub
412     __kmpc_atomic_float4_sub_cpt
413     __kmpc_atomic_float4_sub_cpt_rev
414     __kmpc_atomic_float4_sub_float8
415     __kmpc_atomic_float4_sub_fp
416     __kmpc_atomic_float4_sub_rev
417     __kmpc_atomic_float4_swp
418     __kmpc_atomic_float4_wr
419     __kmpc_atomic_float8_add
420     __kmpc_atomic_float8_add_cpt
421     __kmpc_atomic_float8_add_fp
422     __kmpc_atomic_float8_div
423     __kmpc_atomic_float8_div_cpt
424     __kmpc_atomic_float8_div_cpt_rev
425     __kmpc_atomic_float8_div_fp
426     __kmpc_atomic_float8_div_rev
427     __kmpc_atomic_float8_max
428     __kmpc_atomic_float8_max_cpt
429     __kmpc_atomic_float8_min
430     __kmpc_atomic_float8_min_cpt
431     __kmpc_atomic_float8_mul
432     __kmpc_atomic_float8_mul_cpt
433     __kmpc_atomic_float8_mul_fp
434     __kmpc_atomic_float8_rd
435     __kmpc_atomic_float8_sub
436     __kmpc_atomic_float8_sub_cpt
437     __kmpc_atomic_float8_sub_cpt_rev
438     __kmpc_atomic_float8_sub_fp
439     __kmpc_atomic_float8_sub_rev
440     __kmpc_atomic_float8_swp
441     __kmpc_atomic_float8_wr
442     __kmpc_atomic_float10_add
443     __kmpc_atomic_float10_add_cpt
444     __kmpc_atomic_float10_add_fp
445     __kmpc_atomic_float10_div
446     __kmpc_atomic_float10_div_cpt
447     __kmpc_atomic_float10_div_cpt_rev
448     __kmpc_atomic_float10_div_fp
449     __kmpc_atomic_float10_div_rev
450     __kmpc_atomic_float10_mul
451     __kmpc_atomic_float10_mul_cpt
452     __kmpc_atomic_float10_mul_fp
453     __kmpc_atomic_float10_rd
454     __kmpc_atomic_float10_sub
455     __kmpc_atomic_float10_sub_cpt
456     __kmpc_atomic_float10_sub_cpt_rev
457     __kmpc_atomic_float10_sub_fp
458     __kmpc_atomic_float10_sub_rev
459     __kmpc_atomic_float10_swp
460     __kmpc_atomic_float10_wr
461     __kmpc_atomic_float16_add
462     __kmpc_atomic_float16_add_cpt
463     __kmpc_atomic_float16_div
464     __kmpc_atomic_float16_div_cpt
465     __kmpc_atomic_float16_div_cpt_rev
466     __kmpc_atomic_float16_div_rev
467     __kmpc_atomic_float16_max
468     __kmpc_atomic_float16_max_cpt
469     __kmpc_atomic_float16_min
470     __kmpc_atomic_float16_min_cpt
471     __kmpc_atomic_float16_mul
472     __kmpc_atomic_float16_mul_cpt
473     __kmpc_atomic_float16_rd
474     __kmpc_atomic_float16_sub
475     __kmpc_atomic_float16_sub_cpt
476     __kmpc_atomic_float16_sub_cpt_rev
477     __kmpc_atomic_float16_sub_rev
478     __kmpc_atomic_float16_swp
479     __kmpc_atomic_float16_wr
480 @endcode
481 
482 Functions for Complex types
483 ---------------------------
484 Functions for complex types whose component floating point variables are of size
485 4,8,10 or 16 bytes. The names here are based on the size of the component float,
486 *not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an
487 operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
488 
489 @code
490     __kmpc_atomic_cmplx4_add
491     __kmpc_atomic_cmplx4_add_cmplx8
492     __kmpc_atomic_cmplx4_add_cpt
493     __kmpc_atomic_cmplx4_div
494     __kmpc_atomic_cmplx4_div_cmplx8
495     __kmpc_atomic_cmplx4_div_cpt
496     __kmpc_atomic_cmplx4_div_cpt_rev
497     __kmpc_atomic_cmplx4_div_rev
498     __kmpc_atomic_cmplx4_mul
499     __kmpc_atomic_cmplx4_mul_cmplx8
500     __kmpc_atomic_cmplx4_mul_cpt
501     __kmpc_atomic_cmplx4_rd
502     __kmpc_atomic_cmplx4_sub
503     __kmpc_atomic_cmplx4_sub_cmplx8
504     __kmpc_atomic_cmplx4_sub_cpt
505     __kmpc_atomic_cmplx4_sub_cpt_rev
506     __kmpc_atomic_cmplx4_sub_rev
507     __kmpc_atomic_cmplx4_swp
508     __kmpc_atomic_cmplx4_wr
509     __kmpc_atomic_cmplx8_add
510     __kmpc_atomic_cmplx8_add_cpt
511     __kmpc_atomic_cmplx8_div
512     __kmpc_atomic_cmplx8_div_cpt
513     __kmpc_atomic_cmplx8_div_cpt_rev
514     __kmpc_atomic_cmplx8_div_rev
515     __kmpc_atomic_cmplx8_mul
516     __kmpc_atomic_cmplx8_mul_cpt
517     __kmpc_atomic_cmplx8_rd
518     __kmpc_atomic_cmplx8_sub
519     __kmpc_atomic_cmplx8_sub_cpt
520     __kmpc_atomic_cmplx8_sub_cpt_rev
521     __kmpc_atomic_cmplx8_sub_rev
522     __kmpc_atomic_cmplx8_swp
523     __kmpc_atomic_cmplx8_wr
524     __kmpc_atomic_cmplx10_add
525     __kmpc_atomic_cmplx10_add_cpt
526     __kmpc_atomic_cmplx10_div
527     __kmpc_atomic_cmplx10_div_cpt
528     __kmpc_atomic_cmplx10_div_cpt_rev
529     __kmpc_atomic_cmplx10_div_rev
530     __kmpc_atomic_cmplx10_mul
531     __kmpc_atomic_cmplx10_mul_cpt
532     __kmpc_atomic_cmplx10_rd
533     __kmpc_atomic_cmplx10_sub
534     __kmpc_atomic_cmplx10_sub_cpt
535     __kmpc_atomic_cmplx10_sub_cpt_rev
536     __kmpc_atomic_cmplx10_sub_rev
537     __kmpc_atomic_cmplx10_swp
538     __kmpc_atomic_cmplx10_wr
539     __kmpc_atomic_cmplx16_add
540     __kmpc_atomic_cmplx16_add_cpt
541     __kmpc_atomic_cmplx16_div
542     __kmpc_atomic_cmplx16_div_cpt
543     __kmpc_atomic_cmplx16_div_cpt_rev
544     __kmpc_atomic_cmplx16_div_rev
545     __kmpc_atomic_cmplx16_mul
546     __kmpc_atomic_cmplx16_mul_cpt
547     __kmpc_atomic_cmplx16_rd
548     __kmpc_atomic_cmplx16_sub
549     __kmpc_atomic_cmplx16_sub_cpt
550     __kmpc_atomic_cmplx16_sub_cpt_rev
551     __kmpc_atomic_cmplx16_swp
552     __kmpc_atomic_cmplx16_wr
553 @endcode
554 */
555 
556 /*!
557 @ingroup ATOMIC_OPS
558 @{
559 */
560 
561 /*
562  * Global vars
563  */
564 
565 #ifndef KMP_GOMP_COMPAT
566 int __kmp_atomic_mode = 1; // Intel perf
567 #else
568 int __kmp_atomic_mode = 2; // GOMP compatibility
569 #endif /* KMP_GOMP_COMPAT */
570 
571 KMP_ALIGN(128)
572 
573 // Control access to all user coded atomics in Gnu compat mode
574 kmp_atomic_lock_t __kmp_atomic_lock;
575 // Control access to all user coded atomics for 1-byte fixed data types
576 kmp_atomic_lock_t __kmp_atomic_lock_1i;
577 // Control access to all user coded atomics for 2-byte fixed data types
578 kmp_atomic_lock_t __kmp_atomic_lock_2i;
579 // Control access to all user coded atomics for 4-byte fixed data types
580 kmp_atomic_lock_t __kmp_atomic_lock_4i;
581 // Control access to all user coded atomics for kmp_real32 data type
582 kmp_atomic_lock_t __kmp_atomic_lock_4r;
583 // Control access to all user coded atomics for 8-byte fixed data types
584 kmp_atomic_lock_t __kmp_atomic_lock_8i;
585 // Control access to all user coded atomics for kmp_real64 data type
586 kmp_atomic_lock_t __kmp_atomic_lock_8r;
587 // Control access to all user coded atomics for complex byte data type
588 kmp_atomic_lock_t __kmp_atomic_lock_8c;
589 // Control access to all user coded atomics for long double data type
590 kmp_atomic_lock_t __kmp_atomic_lock_10r;
591 // Control access to all user coded atomics for _Quad data type
592 kmp_atomic_lock_t __kmp_atomic_lock_16r;
593 // Control access to all user coded atomics for double complex data type
594 kmp_atomic_lock_t __kmp_atomic_lock_16c;
595 // Control access to all user coded atomics for long double complex type
596 kmp_atomic_lock_t __kmp_atomic_lock_20c;
597 // Control access to all user coded atomics for _Quad complex data type
598 kmp_atomic_lock_t __kmp_atomic_lock_32c;
599 
600 /* 2007-03-02:
601    Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602    on *_32 and *_32e. This is just a temporary workaround for the problem. It
603    seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604    in assembler language. */
605 #define KMP_ATOMIC_VOLATILE volatile
606 
607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608 
609 static inline void operator+=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610   lhs.q += rhs.q;
611 }
612 static inline void operator-=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613   lhs.q -= rhs.q;
614 }
615 static inline void operator*=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616   lhs.q *= rhs.q;
617 }
618 static inline void operator/=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619   lhs.q /= rhs.q;
620 }
621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622   return lhs.q < rhs.q;
623 }
624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625   return lhs.q > rhs.q;
626 }
627 
628 static inline void operator+=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629   lhs.q += rhs.q;
630 }
631 static inline void operator-=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632   lhs.q -= rhs.q;
633 }
634 static inline void operator*=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635   lhs.q *= rhs.q;
636 }
637 static inline void operator/=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638   lhs.q /= rhs.q;
639 }
640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641   return lhs.q < rhs.q;
642 }
643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644   return lhs.q > rhs.q;
645 }
646 
647 static inline void operator+=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
648   lhs.q += rhs.q;
649 }
650 static inline void operator-=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
651   lhs.q -= rhs.q;
652 }
653 static inline void operator*=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
654   lhs.q *= rhs.q;
655 }
656 static inline void operator/=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
657   lhs.q /= rhs.q;
658 }
659 
660 static inline void operator+=(kmp_cmplx128_a16_t &lhs,
661                               kmp_cmplx128_a16_t &rhs) {
662   lhs.q += rhs.q;
663 }
664 static inline void operator-=(kmp_cmplx128_a16_t &lhs,
665                               kmp_cmplx128_a16_t &rhs) {
666   lhs.q -= rhs.q;
667 }
668 static inline void operator*=(kmp_cmplx128_a16_t &lhs,
669                               kmp_cmplx128_a16_t &rhs) {
670   lhs.q *= rhs.q;
671 }
672 static inline void operator/=(kmp_cmplx128_a16_t &lhs,
673                               kmp_cmplx128_a16_t &rhs) {
674   lhs.q /= rhs.q;
675 }
676 
677 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
678 
679 // ATOMIC implementation routines -----------------------------------------
680 // One routine for each operation and operand type.
681 // All routines declarations looks like
682 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
683 
684 #define KMP_CHECK_GTID                                                         \
685   if (gtid == KMP_GTID_UNKNOWN) {                                              \
686     gtid = __kmp_entry_gtid();                                                 \
687   } // check and get gtid when needed
688 
689 // Beginning of a definition (provides name, parameters, gebug trace)
690 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
691 //     fixed)
692 //     OP_ID   - operation identifier (add, sub, mul, ...)
693 //     TYPE    - operands' type
694 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE)                           \
695   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
696                                              TYPE *lhs, TYPE rhs) {            \
697     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
698     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
699 
700 // ------------------------------------------------------------------------
701 // Lock variables used for critical sections for various size operands
702 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
703 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
704 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
705 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
706 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
707 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
708 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
709 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
710 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
711 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
712 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
713 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
714 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
715 
716 // ------------------------------------------------------------------------
717 // Operation on *lhs, rhs bound by critical section
718 //     OP     - operator (it's supposed to contain an assignment)
719 //     LCK_ID - lock identifier
720 // Note: don't check gtid as it should always be valid
721 // 1, 2-byte - expect valid parameter, other - check before this macro
722 #define OP_CRITICAL(OP, LCK_ID)                                                \
723   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
724                                                                                \
725   (*lhs) OP(rhs);                                                              \
726                                                                                \
727   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
728 
729 // ------------------------------------------------------------------------
730 // For GNU compatibility, we may need to use a critical section,
731 // even though it is not required by the ISA.
732 //
733 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
734 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
735 // critical section.  On Intel(R) 64, all atomic operations are done with fetch
736 // and add or compare and exchange.  Therefore, the FLAG parameter to this
737 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
738 // require a critical section, where we predict that they will be implemented
739 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
740 //
741 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
742 // the FLAG parameter should always be 1.  If we know that we will be using
743 // a critical section, then we want to make certain that we use the generic
744 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
745 // locks that are specialized based upon the size or type of the data.
746 //
747 // If FLAG is 0, then we are relying on dead code elimination by the build
748 // compiler to get rid of the useless block of code, and save a needless
749 // branch at runtime.
750 
751 #ifdef KMP_GOMP_COMPAT
752 #define OP_GOMP_CRITICAL(OP, FLAG)                                             \
753   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
754     KMP_CHECK_GTID;                                                            \
755     OP_CRITICAL(OP, 0);                                                        \
756     return;                                                                    \
757   }
758 #else
759 #define OP_GOMP_CRITICAL(OP, FLAG)
760 #endif /* KMP_GOMP_COMPAT */
761 
762 #if KMP_MIC
763 #define KMP_DO_PAUSE _mm_delay_32(1)
764 #else
765 #define KMP_DO_PAUSE KMP_CPU_PAUSE()
766 #endif /* KMP_MIC */
767 
768 // ------------------------------------------------------------------------
769 // Operation on *lhs, rhs using "compare_and_store" routine
770 //     TYPE    - operands' type
771 //     BITS    - size in bits, used to distinguish low level calls
772 //     OP      - operator
773 #define OP_CMPXCHG(TYPE, BITS, OP)                                             \
774   {                                                                            \
775     TYPE old_value, new_value;                                                 \
776     old_value = *(TYPE volatile *)lhs;                                         \
777     new_value = old_value OP rhs;                                              \
778     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
779         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
780         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
781       KMP_DO_PAUSE;                                                            \
782                                                                                \
783       old_value = *(TYPE volatile *)lhs;                                       \
784       new_value = old_value OP rhs;                                            \
785     }                                                                          \
786   }
787 
788 #if USE_CMPXCHG_FIX
789 // 2007-06-25:
790 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
791 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
792 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
793 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
794 // the workaround.
795 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                  \
796   {                                                                            \
797     struct _sss {                                                              \
798       TYPE cmp;                                                                \
799       kmp_int##BITS *vvv;                                                      \
800     };                                                                         \
801     struct _sss old_value, new_value;                                          \
802     old_value.vvv = (kmp_int##BITS *)&old_value.cmp;                           \
803     new_value.vvv = (kmp_int##BITS *)&new_value.cmp;                           \
804     *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                           \
805     new_value.cmp = old_value.cmp OP rhs;                                      \
806     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
807         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,   \
808         *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) {                      \
809       KMP_DO_PAUSE;                                                            \
810                                                                                \
811       *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                         \
812       new_value.cmp = old_value.cmp OP rhs;                                    \
813     }                                                                          \
814   }
815 // end of the first part of the workaround for C78287
816 #endif // USE_CMPXCHG_FIX
817 
818 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
819 
820 // ------------------------------------------------------------------------
821 // X86 or X86_64: no alignment problems ====================================
822 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
823                          GOMP_FLAG)                                            \
824   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
825   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
826   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
827   KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                        \
828   }
829 // -------------------------------------------------------------------------
830 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
831                        GOMP_FLAG)                                              \
832   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
833   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
834   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
835   }
836 #if USE_CMPXCHG_FIX
837 // -------------------------------------------------------------------------
838 // workaround for C78287 (complex(kind=4) data type)
839 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
840                                   MASK, GOMP_FLAG)                             \
841   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
842   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
843   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
844   }
845 // end of the second part of the workaround for C78287
846 #endif // USE_CMPXCHG_FIX
847 
848 #else
849 // -------------------------------------------------------------------------
850 // Code for other architectures that don't handle unaligned accesses.
851 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
852                          GOMP_FLAG)                                            \
853   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
854   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
855   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
856     /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */          \
857     KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                      \
858   } else {                                                                     \
859     KMP_CHECK_GTID;                                                            \
860     OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
861   }                                                                            \
862   }
863 // -------------------------------------------------------------------------
864 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
865                        GOMP_FLAG)                                              \
866   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
867   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
868   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
869     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
870   } else {                                                                     \
871     KMP_CHECK_GTID;                                                            \
872     OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
873   }                                                                            \
874   }
875 #if USE_CMPXCHG_FIX
876 // -------------------------------------------------------------------------
877 // workaround for C78287 (complex(kind=4) data type)
878 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
879                                   MASK, GOMP_FLAG)                             \
880   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
881   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
882   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
883     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
884   } else {                                                                     \
885     KMP_CHECK_GTID;                                                            \
886     OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
887   }                                                                            \
888   }
889 // end of the second part of the workaround for C78287
890 #endif // USE_CMPXCHG_FIX
891 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
892 
893 // Routines for ATOMIC 4-byte operands addition and subtraction
894 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
895                  0) // __kmpc_atomic_fixed4_add
896 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
897                  0) // __kmpc_atomic_fixed4_sub
898 
899 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
900                KMP_ARCH_X86) // __kmpc_atomic_float4_add
901 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
902                KMP_ARCH_X86) // __kmpc_atomic_float4_sub
903 
904 // Routines for ATOMIC 8-byte operands addition and subtraction
905 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
906                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
907 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
908                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
909 
910 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
911                KMP_ARCH_X86) // __kmpc_atomic_float8_add
912 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
913                KMP_ARCH_X86) // __kmpc_atomic_float8_sub
914 
915 // ------------------------------------------------------------------------
916 // Entries definition for integer operands
917 //     TYPE_ID - operands type and size (fixed4, float4)
918 //     OP_ID   - operation identifier (add, sub, mul, ...)
919 //     TYPE    - operand type
920 //     BITS    - size in bits, used to distinguish low level calls
921 //     OP      - operator (used in critical section)
922 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
923 //     MASK    - used for alignment check
924 
925 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,MASK,GOMP_FLAG
926 // ------------------------------------------------------------------------
927 // Routines for ATOMIC integer operands, other operators
928 // ------------------------------------------------------------------------
929 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
930 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
931                KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
932 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
933                0) // __kmpc_atomic_fixed1_andb
934 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
935                KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
936 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
937                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
938 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
939                KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
940 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
941                0) // __kmpc_atomic_fixed1_orb
942 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
943                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
944 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
945                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
946 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
947                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
948 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
949                KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
950 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
951                0) // __kmpc_atomic_fixed1_xor
952 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
953                KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
954 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
955                0) // __kmpc_atomic_fixed2_andb
956 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
957                KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
958 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
959                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
960 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
961                KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
962 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
963                0) // __kmpc_atomic_fixed2_orb
964 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
965                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
966 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
967                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
968 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
969                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
970 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
971                KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
972 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
973                0) // __kmpc_atomic_fixed2_xor
974 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
975                0) // __kmpc_atomic_fixed4_andb
976 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
977                KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
978 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
979                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
980 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
981                KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
982 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
983                0) // __kmpc_atomic_fixed4_orb
984 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
985                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
986 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
987                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
988 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
989                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
990 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
991                0) // __kmpc_atomic_fixed4_xor
992 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
993                KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
994 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
995                KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
996 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
997                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
998 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
999                KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1000 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1001                KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1002 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1003                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1004 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1005                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1006 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1007                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1008 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1009                KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1010 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1011                KMP_ARCH_X86) // __kmpc_atomic_float4_div
1012 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1013                KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1014 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1015                KMP_ARCH_X86) // __kmpc_atomic_float8_div
1016 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1017                KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1018 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
1019 
1020 /* ------------------------------------------------------------------------ */
1021 /* Routines for C/C++ Reduction operators && and ||                         */
1022 
1023 // ------------------------------------------------------------------------
1024 // Need separate macros for &&, || because there is no combined assignment
1025 //   TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1026 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)             \
1027   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1028   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1029   OP_CRITICAL(= *lhs OP, LCK_ID)                                               \
1030   }
1031 
1032 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1033 
1034 // ------------------------------------------------------------------------
1035 // X86 or X86_64: no alignment problems ===================================
1036 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1037   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1038   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1039   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1040   }
1041 
1042 #else
1043 // ------------------------------------------------------------------------
1044 // Code for other architectures that don't handle unaligned accesses.
1045 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1046   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1047   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1048   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1049     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1050   } else {                                                                     \
1051     KMP_CHECK_GTID;                                                            \
1052     OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */              \
1053   }                                                                            \
1054   }
1055 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1056 
1057 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1058               KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1059 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1060               KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1061 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1062               KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1063 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1064               KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1065 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1066               0) // __kmpc_atomic_fixed4_andl
1067 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1068               0) // __kmpc_atomic_fixed4_orl
1069 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1070               KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1071 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1072               KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1073 
1074 /* ------------------------------------------------------------------------- */
1075 /* Routines for Fortran operators that matched no one in C:                  */
1076 /* MAX, MIN, .EQV., .NEQV.                                                   */
1077 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}           */
1078 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}  */
1079 
1080 // -------------------------------------------------------------------------
1081 // MIN and MAX need separate macros
1082 // OP - operator to check if we need any actions?
1083 #define MIN_MAX_CRITSECT(OP, LCK_ID)                                           \
1084   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1085                                                                                \
1086   if (*lhs OP rhs) { /* still need actions? */                                 \
1087     *lhs = rhs;                                                                \
1088   }                                                                            \
1089   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1090 
1091 // -------------------------------------------------------------------------
1092 #ifdef KMP_GOMP_COMPAT
1093 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)                                        \
1094   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1095     KMP_CHECK_GTID;                                                            \
1096     MIN_MAX_CRITSECT(OP, 0);                                                   \
1097     return;                                                                    \
1098   }
1099 #else
1100 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1101 #endif /* KMP_GOMP_COMPAT */
1102 
1103 // -------------------------------------------------------------------------
1104 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                        \
1105   {                                                                            \
1106     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1107     TYPE old_value;                                                            \
1108     temp_val = *lhs;                                                           \
1109     old_value = temp_val;                                                      \
1110     while (old_value OP rhs && /* still need actions? */                       \
1111            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1112                (kmp_int##BITS *)lhs,                                           \
1113                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
1114                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
1115       KMP_CPU_PAUSE();                                                         \
1116       temp_val = *lhs;                                                         \
1117       old_value = temp_val;                                                    \
1118     }                                                                          \
1119   }
1120 
1121 // -------------------------------------------------------------------------
1122 // 1-byte, 2-byte operands - use critical section
1123 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)          \
1124   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1125   if (*lhs OP rhs) { /* need actions? */                                       \
1126     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1127     MIN_MAX_CRITSECT(OP, LCK_ID)                                               \
1128   }                                                                            \
1129   }
1130 
1131 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1132 
1133 // -------------------------------------------------------------------------
1134 // X86 or X86_64: no alignment problems ====================================
1135 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1136                          GOMP_FLAG)                                            \
1137   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1138   if (*lhs OP rhs) {                                                           \
1139     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1140     MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                            \
1141   }                                                                            \
1142   }
1143 
1144 #else
1145 // -------------------------------------------------------------------------
1146 // Code for other architectures that don't handle unaligned accesses.
1147 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1148                          GOMP_FLAG)                                            \
1149   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1150   if (*lhs OP rhs) {                                                           \
1151     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1152     if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                    \
1153       MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */                    \
1154     } else {                                                                   \
1155       KMP_CHECK_GTID;                                                          \
1156       MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */                     \
1157     }                                                                          \
1158   }                                                                            \
1159   }
1160 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1161 
1162 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1163                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1164 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1165                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1166 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1167                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1168 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1169                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1170 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1171                  0) // __kmpc_atomic_fixed4_max
1172 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1173                  0) // __kmpc_atomic_fixed4_min
1174 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1175                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1176 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1177                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1178 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1179                  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1180 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1181                  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1182 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1183                  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1184 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1185                  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1186 #if KMP_HAVE_QUAD
1187 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1188                  1) // __kmpc_atomic_float16_max
1189 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1190                  1) // __kmpc_atomic_float16_min
1191 #if (KMP_ARCH_X86)
1192 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1193                  1) // __kmpc_atomic_float16_max_a16
1194 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1195                  1) // __kmpc_atomic_float16_min_a16
1196 #endif // (KMP_ARCH_X86)
1197 #endif // KMP_HAVE_QUAD
1198 // ------------------------------------------------------------------------
1199 // Need separate macros for .EQV. because of the need of complement (~)
1200 // OP ignored for critical sections, ^=~ used instead
1201 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1202   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1203   OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */                      \
1204   OP_CRITICAL(^= ~, LCK_ID) /* send assignment and complement */               \
1205   }
1206 
1207 // ------------------------------------------------------------------------
1208 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1209 // ------------------------------------------------------------------------
1210 // X86 or X86_64: no alignment problems ===================================
1211 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1212                         GOMP_FLAG)                                             \
1213   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1214   OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */                      \
1215   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1216   }
1217 // ------------------------------------------------------------------------
1218 #else
1219 // ------------------------------------------------------------------------
1220 // Code for other architectures that don't handle unaligned accesses.
1221 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1222                         GOMP_FLAG)                                             \
1223   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1224   OP_GOMP_CRITICAL(^= ~, GOMP_FLAG)                                            \
1225   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1226     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1227   } else {                                                                     \
1228     KMP_CHECK_GTID;                                                            \
1229     OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */           \
1230   }                                                                            \
1231   }
1232 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1233 
1234 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1235                KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1236 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1237                KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1238 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1239                KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1240 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1241                KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1242 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1243                 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1244 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1245                 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1246 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1247                 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1248 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1249                 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1250 
1251 // ------------------------------------------------------------------------
1252 // Routines for Extended types: long double, _Quad, complex flavours (use
1253 // critical section)
1254 //     TYPE_ID, OP_ID, TYPE - detailed above
1255 //     OP      - operator
1256 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1257 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1258   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1259   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */                    \
1260   OP_CRITICAL(OP## =, LCK_ID) /* send assignment */                            \
1261   }
1262 
1263 /* ------------------------------------------------------------------------- */
1264 // routines for long double type
1265 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1266                 1) // __kmpc_atomic_float10_add
1267 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1268                 1) // __kmpc_atomic_float10_sub
1269 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1270                 1) // __kmpc_atomic_float10_mul
1271 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1272                 1) // __kmpc_atomic_float10_div
1273 #if KMP_HAVE_QUAD
1274 // routines for _Quad type
1275 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1276                 1) // __kmpc_atomic_float16_add
1277 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1278                 1) // __kmpc_atomic_float16_sub
1279 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1280                 1) // __kmpc_atomic_float16_mul
1281 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1282                 1) // __kmpc_atomic_float16_div
1283 #if (KMP_ARCH_X86)
1284 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1285                 1) // __kmpc_atomic_float16_add_a16
1286 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1287                 1) // __kmpc_atomic_float16_sub_a16
1288 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1289                 1) // __kmpc_atomic_float16_mul_a16
1290 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1291                 1) // __kmpc_atomic_float16_div_a16
1292 #endif // (KMP_ARCH_X86)
1293 #endif // KMP_HAVE_QUAD
1294 // routines for complex types
1295 
1296 #if USE_CMPXCHG_FIX
1297 // workaround for C78287 (complex(kind=4) data type)
1298 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1299                           1) // __kmpc_atomic_cmplx4_add
1300 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1301                           1) // __kmpc_atomic_cmplx4_sub
1302 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1303                           1) // __kmpc_atomic_cmplx4_mul
1304 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1305                           1) // __kmpc_atomic_cmplx4_div
1306 // end of the workaround for C78287
1307 #else
1308 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1309 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1310 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1311 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1312 #endif // USE_CMPXCHG_FIX
1313 
1314 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1315 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1316 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1317 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1318 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1319                 1) // __kmpc_atomic_cmplx10_add
1320 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1321                 1) // __kmpc_atomic_cmplx10_sub
1322 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1323                 1) // __kmpc_atomic_cmplx10_mul
1324 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1325                 1) // __kmpc_atomic_cmplx10_div
1326 #if KMP_HAVE_QUAD
1327 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1328                 1) // __kmpc_atomic_cmplx16_add
1329 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1330                 1) // __kmpc_atomic_cmplx16_sub
1331 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1332                 1) // __kmpc_atomic_cmplx16_mul
1333 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1334                 1) // __kmpc_atomic_cmplx16_div
1335 #if (KMP_ARCH_X86)
1336 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1337                 1) // __kmpc_atomic_cmplx16_add_a16
1338 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1339                 1) // __kmpc_atomic_cmplx16_sub_a16
1340 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1341                 1) // __kmpc_atomic_cmplx16_mul_a16
1342 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1343                 1) // __kmpc_atomic_cmplx16_div_a16
1344 #endif // (KMP_ARCH_X86)
1345 #endif // KMP_HAVE_QUAD
1346 
1347 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1348 // Supported only on IA-32 architecture and Intel(R) 64
1349 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1350 
1351 // ------------------------------------------------------------------------
1352 // Operation on *lhs, rhs bound by critical section
1353 //     OP     - operator (it's supposed to contain an assignment)
1354 //     LCK_ID - lock identifier
1355 // Note: don't check gtid as it should always be valid
1356 // 1, 2-byte - expect valid parameter, other - check before this macro
1357 #define OP_CRITICAL_REV(OP, LCK_ID)                                            \
1358   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1359                                                                                \
1360   (*lhs) = (rhs)OP(*lhs);                                                      \
1361                                                                                \
1362   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1363 
1364 #ifdef KMP_GOMP_COMPAT
1365 #define OP_GOMP_CRITICAL_REV(OP, FLAG)                                         \
1366   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1367     KMP_CHECK_GTID;                                                            \
1368     OP_CRITICAL_REV(OP, 0);                                                    \
1369     return;                                                                    \
1370   }
1371 #else
1372 #define OP_GOMP_CRITICAL_REV(OP, FLAG)
1373 #endif /* KMP_GOMP_COMPAT */
1374 
1375 // Beginning of a definition (provides name, parameters, gebug trace)
1376 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1377 //     fixed)
1378 //     OP_ID   - operation identifier (add, sub, mul, ...)
1379 //     TYPE    - operands' type
1380 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
1381   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid,  \
1382                                                    TYPE *lhs, TYPE rhs) {      \
1383     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1384     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1385 
1386 // ------------------------------------------------------------------------
1387 // Operation on *lhs, rhs using "compare_and_store" routine
1388 //     TYPE    - operands' type
1389 //     BITS    - size in bits, used to distinguish low level calls
1390 //     OP      - operator
1391 // Note: temp_val introduced in order to force the compiler to read
1392 //       *lhs only once (w/o it the compiler reads *lhs twice)
1393 #define OP_CMPXCHG_REV(TYPE, BITS, OP)                                         \
1394   {                                                                            \
1395     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1396     TYPE old_value, new_value;                                                 \
1397     temp_val = *lhs;                                                           \
1398     old_value = temp_val;                                                      \
1399     new_value = rhs OP old_value;                                              \
1400     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1401         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
1402         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
1403       KMP_DO_PAUSE;                                                            \
1404                                                                                \
1405       temp_val = *lhs;                                                         \
1406       old_value = temp_val;                                                    \
1407       new_value = rhs OP old_value;                                            \
1408     }                                                                          \
1409   }
1410 
1411 // -------------------------------------------------------------------------
1412 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG)  \
1413   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1414   OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG)                                          \
1415   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1416   }
1417 
1418 // ------------------------------------------------------------------------
1419 // Entries definition for integer operands
1420 //     TYPE_ID - operands type and size (fixed4, float4)
1421 //     OP_ID   - operation identifier (add, sub, mul, ...)
1422 //     TYPE    - operand type
1423 //     BITS    - size in bits, used to distinguish low level calls
1424 //     OP      - operator (used in critical section)
1425 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1426 
1427 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,GOMP_FLAG
1428 // ------------------------------------------------------------------------
1429 // Routines for ATOMIC integer operands, other operators
1430 // ------------------------------------------------------------------------
1431 //                  TYPE_ID,OP_ID, TYPE,    BITS, OP, LCK_ID, GOMP_FLAG
1432 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1433                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1434 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1435                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1436 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1437                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1438 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1439                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1440 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1441                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1442 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1443                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1444 
1445 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1446                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1447 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1448                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1449 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1450                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1451 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1452                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1453 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1454                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1455 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1456                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1457 
1458 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1459                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1460 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1461                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1462 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1463                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1464 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1465                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1466 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1467                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1468 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1469                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1470 
1471 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1472                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1473 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1474                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1475 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1476                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1477 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1478                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1479 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1480                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1481 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1482                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1483 
1484 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1485                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1486 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1487                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1488 
1489 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1490                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1491 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1492                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1493 //                  TYPE_ID,OP_ID, TYPE,     BITS,OP,LCK_ID, GOMP_FLAG
1494 
1495 // ------------------------------------------------------------------------
1496 // Routines for Extended types: long double, _Quad, complex flavours (use
1497 // critical section)
1498 //     TYPE_ID, OP_ID, TYPE - detailed above
1499 //     OP      - operator
1500 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1501 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
1502   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1503   OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG)                                          \
1504   OP_CRITICAL_REV(OP, LCK_ID)                                                  \
1505   }
1506 
1507 /* ------------------------------------------------------------------------- */
1508 // routines for long double type
1509 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1510                     1) // __kmpc_atomic_float10_sub_rev
1511 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1512                     1) // __kmpc_atomic_float10_div_rev
1513 #if KMP_HAVE_QUAD
1514 // routines for _Quad type
1515 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1516                     1) // __kmpc_atomic_float16_sub_rev
1517 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1518                     1) // __kmpc_atomic_float16_div_rev
1519 #if (KMP_ARCH_X86)
1520 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1521                     1) // __kmpc_atomic_float16_sub_a16_rev
1522 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1523                     1) // __kmpc_atomic_float16_div_a16_rev
1524 #endif // KMP_ARCH_X86
1525 #endif // KMP_HAVE_QUAD
1526 
1527 // routines for complex types
1528 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1529                     1) // __kmpc_atomic_cmplx4_sub_rev
1530 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1531                     1) // __kmpc_atomic_cmplx4_div_rev
1532 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1533                     1) // __kmpc_atomic_cmplx8_sub_rev
1534 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1535                     1) // __kmpc_atomic_cmplx8_div_rev
1536 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1537                     1) // __kmpc_atomic_cmplx10_sub_rev
1538 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1539                     1) // __kmpc_atomic_cmplx10_div_rev
1540 #if KMP_HAVE_QUAD
1541 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1542                     1) // __kmpc_atomic_cmplx16_sub_rev
1543 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1544                     1) // __kmpc_atomic_cmplx16_div_rev
1545 #if (KMP_ARCH_X86)
1546 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1547                     1) // __kmpc_atomic_cmplx16_sub_a16_rev
1548 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1549                     1) // __kmpc_atomic_cmplx16_div_a16_rev
1550 #endif // KMP_ARCH_X86
1551 #endif // KMP_HAVE_QUAD
1552 
1553 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1554 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1555 
1556 /* ------------------------------------------------------------------------ */
1557 /* Routines for mixed types of LHS and RHS, when RHS is "larger"            */
1558 /* Note: in order to reduce the total number of types combinations          */
1559 /*       it is supposed that compiler converts RHS to longest floating type,*/
1560 /*       that is _Quad, before call to any of these routines                */
1561 /* Conversion to _Quad will be done by the compiler during calculation,     */
1562 /*    conversion back to TYPE - before the assignment, like:                */
1563 /*    *lhs = (TYPE)( (_Quad)(*lhs) OP rhs )                                 */
1564 /* Performance penalty expected because of SW emulation use                 */
1565 /* ------------------------------------------------------------------------ */
1566 
1567 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                \
1568   void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
1569       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) {                       \
1570     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1571     KA_TRACE(100,                                                              \
1572              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
1573               gtid));
1574 
1575 // -------------------------------------------------------------------------
1576 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID,  \
1577                            GOMP_FLAG)                                          \
1578   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1579   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */                    \
1580   OP_CRITICAL(OP## =, LCK_ID) /* send assignment */                            \
1581   }
1582 
1583 // -------------------------------------------------------------------------
1584 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1585 // -------------------------------------------------------------------------
1586 // X86 or X86_64: no alignment problems ====================================
1587 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1588                            LCK_ID, MASK, GOMP_FLAG)                            \
1589   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1590   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1591   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1592   }
1593 // -------------------------------------------------------------------------
1594 #else
1595 // ------------------------------------------------------------------------
1596 // Code for other architectures that don't handle unaligned accesses.
1597 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1598                            LCK_ID, MASK, GOMP_FLAG)                            \
1599   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1600   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1601   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1602     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1603   } else {                                                                     \
1604     KMP_CHECK_GTID;                                                            \
1605     OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
1606   }                                                                            \
1607   }
1608 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1609 
1610 // -------------------------------------------------------------------------
1611 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1612 // -------------------------------------------------------------------------
1613 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
1614                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
1615   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1616   OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG)                                          \
1617   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1618   }
1619 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,      \
1620                                LCK_ID, GOMP_FLAG)                              \
1621   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1622   OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG)                                          \
1623   OP_CRITICAL_REV(OP, LCK_ID)                                                  \
1624   }
1625 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1626 
1627 // RHS=float8
1628 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1629                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1630 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1631                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1632 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1633                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1634 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1635                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1636 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1637                    0) // __kmpc_atomic_fixed4_mul_float8
1638 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1639                    0) // __kmpc_atomic_fixed4_div_float8
1640 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1641                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1642 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1643                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1644 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1645                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1646 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1647                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1648 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1649                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1650 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1651                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1652 
1653 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1654 // use them)
1655 #if KMP_HAVE_QUAD
1656 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1657                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1658 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1659                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1660 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1661                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1662 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1663                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1664 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1665                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1666 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1667                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1668 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1669                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1670 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1671                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1672 
1673 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1674                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1675 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1676                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1677 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1678                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1679 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1680                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1681 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1682                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1683 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1684                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1685 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1686                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1687 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1688                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1689 
1690 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1691                    0) // __kmpc_atomic_fixed4_add_fp
1692 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1693                    0) // __kmpc_atomic_fixed4u_add_fp
1694 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1695                    0) // __kmpc_atomic_fixed4_sub_fp
1696 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1697                    0) // __kmpc_atomic_fixed4u_sub_fp
1698 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1699                    0) // __kmpc_atomic_fixed4_mul_fp
1700 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1701                    0) // __kmpc_atomic_fixed4u_mul_fp
1702 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1703                    0) // __kmpc_atomic_fixed4_div_fp
1704 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1705                    0) // __kmpc_atomic_fixed4u_div_fp
1706 
1707 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1708                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1709 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1710                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1711 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1712                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1713 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1714                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1715 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1716                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1717 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1718                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1719 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1720                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1721 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1722                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1723 
1724 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1725                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1726 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1727                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1728 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1729                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1730 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1731                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1732 
1733 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1734                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1735 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1736                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1737 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1738                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1739 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1740                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1741 
1742 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1743                    1) // __kmpc_atomic_float10_add_fp
1744 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1745                    1) // __kmpc_atomic_float10_sub_fp
1746 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1747                    1) // __kmpc_atomic_float10_mul_fp
1748 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1749                    1) // __kmpc_atomic_float10_div_fp
1750 
1751 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1752 // Reverse operations
1753 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1754                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1755 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1756                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1757 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1758                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1759 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1760                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1761 
1762 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1763                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1764 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1765                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1766 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1767                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1768 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1769                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1770 
1771 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1772                        0) // __kmpc_atomic_fixed4_sub_rev_fp
1773 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1774                        0) // __kmpc_atomic_fixed4u_sub_rev_fp
1775 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1776                        0) // __kmpc_atomic_fixed4_div_rev_fp
1777 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1778                        0) // __kmpc_atomic_fixed4u_div_rev_fp
1779 
1780 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1781                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1782 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1783                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1784 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1785                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1786 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1787                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1788 
1789 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1790                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1791 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1792                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1793 
1794 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1795                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1796 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1797                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1798 
1799 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1800                        1) // __kmpc_atomic_float10_sub_rev_fp
1801 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1802                        1) // __kmpc_atomic_float10_div_rev_fp
1803 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1804 
1805 #endif // KMP_HAVE_QUAD
1806 
1807 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1808 // ------------------------------------------------------------------------
1809 // X86 or X86_64: no alignment problems ====================================
1810 #if USE_CMPXCHG_FIX
1811 // workaround for C78287 (complex(kind=4) data type)
1812 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1813                              LCK_ID, MASK, GOMP_FLAG)                          \
1814   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1815   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1816   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
1817   }
1818 // end of the second part of the workaround for C78287
1819 #else
1820 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1821                              LCK_ID, MASK, GOMP_FLAG)                          \
1822   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1823   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1824   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1825   }
1826 #endif // USE_CMPXCHG_FIX
1827 #else
1828 // ------------------------------------------------------------------------
1829 // Code for other architectures that don't handle unaligned accesses.
1830 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1831                              LCK_ID, MASK, GOMP_FLAG)                          \
1832   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1833   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1834   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1835     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1836   } else {                                                                     \
1837     KMP_CHECK_GTID;                                                            \
1838     OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
1839   }                                                                            \
1840   }
1841 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1842 
1843 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1844                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1845 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1846                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1847 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1848                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1849 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1850                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1851 
1852 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1853 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1854 
1855 // ------------------------------------------------------------------------
1856 // Atomic READ routines
1857 
1858 // ------------------------------------------------------------------------
1859 // Beginning of a definition (provides name, parameters, gebug trace)
1860 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1861 //     fixed)
1862 //     OP_ID   - operation identifier (add, sub, mul, ...)
1863 //     TYPE    - operands' type
1864 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE)                      \
1865   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
1866                                              TYPE *loc) {                      \
1867     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1868     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1869 
1870 // ------------------------------------------------------------------------
1871 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1872 //     TYPE    - operands' type
1873 //     BITS    - size in bits, used to distinguish low level calls
1874 //     OP      - operator
1875 // Note: temp_val introduced in order to force the compiler to read
1876 //       *lhs only once (w/o it the compiler reads *lhs twice)
1877 // TODO: check if it is still necessary
1878 // Return old value regardless of the result of "compare & swap# operation
1879 #define OP_CMPXCHG_READ(TYPE, BITS, OP)                                        \
1880   {                                                                            \
1881     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1882     union f_i_union {                                                          \
1883       TYPE f_val;                                                              \
1884       kmp_int##BITS i_val;                                                     \
1885     };                                                                         \
1886     union f_i_union old_value;                                                 \
1887     temp_val = *loc;                                                           \
1888     old_value.f_val = temp_val;                                                \
1889     old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS(                         \
1890         (kmp_int##BITS *)loc,                                                  \
1891         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val,                     \
1892         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val);                    \
1893     new_value = old_value.f_val;                                               \
1894     return new_value;                                                          \
1895   }
1896 
1897 // -------------------------------------------------------------------------
1898 // Operation on *lhs, rhs bound by critical section
1899 //     OP     - operator (it's supposed to contain an assignment)
1900 //     LCK_ID - lock identifier
1901 // Note: don't check gtid as it should always be valid
1902 // 1, 2-byte - expect valid parameter, other - check before this macro
1903 #define OP_CRITICAL_READ(OP, LCK_ID)                                           \
1904   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1905                                                                                \
1906   new_value = (*loc);                                                          \
1907                                                                                \
1908   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1909 
1910 // -------------------------------------------------------------------------
1911 #ifdef KMP_GOMP_COMPAT
1912 #define OP_GOMP_CRITICAL_READ(OP, FLAG)                                        \
1913   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1914     KMP_CHECK_GTID;                                                            \
1915     OP_CRITICAL_READ(OP, 0);                                                   \
1916     return new_value;                                                          \
1917   }
1918 #else
1919 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1920 #endif /* KMP_GOMP_COMPAT */
1921 
1922 // -------------------------------------------------------------------------
1923 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
1924   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1925   TYPE new_value;                                                              \
1926   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1927   new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0);                              \
1928   return new_value;                                                            \
1929   }
1930 // -------------------------------------------------------------------------
1931 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
1932   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1933   TYPE new_value;                                                              \
1934   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1935   OP_CMPXCHG_READ(TYPE, BITS, OP)                                              \
1936   }
1937 // ------------------------------------------------------------------------
1938 // Routines for Extended types: long double, _Quad, complex flavours (use
1939 // critical section)
1940 //     TYPE_ID, OP_ID, TYPE - detailed above
1941 //     OP      - operator
1942 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1943 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
1944   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1945   TYPE new_value;                                                              \
1946   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */               \
1947   OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */                           \
1948   return new_value;                                                            \
1949   }
1950 
1951 // ------------------------------------------------------------------------
1952 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
1953 // value doesn't work.
1954 // Let's return the read value through the additional parameter.
1955 #if (KMP_OS_WINDOWS)
1956 
1957 #define OP_CRITICAL_READ_WRK(OP, LCK_ID)                                       \
1958   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1959                                                                                \
1960   (*out) = (*loc);                                                             \
1961                                                                                \
1962   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1963 // ------------------------------------------------------------------------
1964 #ifdef KMP_GOMP_COMPAT
1965 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)                                    \
1966   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1967     KMP_CHECK_GTID;                                                            \
1968     OP_CRITICAL_READ_WRK(OP, 0);                                               \
1969   }
1970 #else
1971 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
1972 #endif /* KMP_GOMP_COMPAT */
1973 // ------------------------------------------------------------------------
1974 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                            \
1975   void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
1976                                          TYPE *loc) {                          \
1977     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1978     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1979 
1980 // ------------------------------------------------------------------------
1981 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)  \
1982   ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                                  \
1983   OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */           \
1984   OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */                       \
1985   }
1986 
1987 #endif // KMP_OS_WINDOWS
1988 
1989 // ------------------------------------------------------------------------
1990 //                  TYPE_ID,OP_ID, TYPE,      OP, GOMP_FLAG
1991 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
1992 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
1993                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
1994 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
1995                     KMP_ARCH_X86) // __kmpc_atomic_float4_rd
1996 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
1997                     KMP_ARCH_X86) // __kmpc_atomic_float8_rd
1998 
1999 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2000 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2001                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2002 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2003                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2004 
2005 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2006                      1) // __kmpc_atomic_float10_rd
2007 #if KMP_HAVE_QUAD
2008 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2009                      1) // __kmpc_atomic_float16_rd
2010 #endif // KMP_HAVE_QUAD
2011 
2012 // Fix for CQ220361 on Windows* OS
2013 #if (KMP_OS_WINDOWS)
2014 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2015                          1) // __kmpc_atomic_cmplx4_rd
2016 #else
2017 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2018                      1) // __kmpc_atomic_cmplx4_rd
2019 #endif // (KMP_OS_WINDOWS)
2020 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2021                      1) // __kmpc_atomic_cmplx8_rd
2022 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2023                      1) // __kmpc_atomic_cmplx10_rd
2024 #if KMP_HAVE_QUAD
2025 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2026                      1) // __kmpc_atomic_cmplx16_rd
2027 #if (KMP_ARCH_X86)
2028 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2029                      1) // __kmpc_atomic_float16_a16_rd
2030 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2031                      1) // __kmpc_atomic_cmplx16_a16_rd
2032 #endif // (KMP_ARCH_X86)
2033 #endif // KMP_HAVE_QUAD
2034 
2035 // ------------------------------------------------------------------------
2036 // Atomic WRITE routines
2037 
2038 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)              \
2039   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2040   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2041   KMP_XCHG_FIXED##BITS(lhs, rhs);                                              \
2042   }
2043 // ------------------------------------------------------------------------
2044 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2045   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2046   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2047   KMP_XCHG_REAL##BITS(lhs, rhs);                                               \
2048   }
2049 
2050 // ------------------------------------------------------------------------
2051 // Operation on *lhs, rhs using "compare_and_store" routine
2052 //     TYPE    - operands' type
2053 //     BITS    - size in bits, used to distinguish low level calls
2054 //     OP      - operator
2055 // Note: temp_val introduced in order to force the compiler to read
2056 //       *lhs only once (w/o it the compiler reads *lhs twice)
2057 #define OP_CMPXCHG_WR(TYPE, BITS, OP)                                          \
2058   {                                                                            \
2059     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2060     TYPE old_value, new_value;                                                 \
2061     temp_val = *lhs;                                                           \
2062     old_value = temp_val;                                                      \
2063     new_value = rhs;                                                           \
2064     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2065         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2066         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2067       KMP_CPU_PAUSE();                                                         \
2068                                                                                \
2069       temp_val = *lhs;                                                         \
2070       old_value = temp_val;                                                    \
2071       new_value = rhs;                                                         \
2072     }                                                                          \
2073   }
2074 
2075 // -------------------------------------------------------------------------
2076 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2077   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2078   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2079   OP_CMPXCHG_WR(TYPE, BITS, OP)                                                \
2080   }
2081 
2082 // ------------------------------------------------------------------------
2083 // Routines for Extended types: long double, _Quad, complex flavours (use
2084 // critical section)
2085 //     TYPE_ID, OP_ID, TYPE - detailed above
2086 //     OP      - operator
2087 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2088 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)        \
2089   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2090   OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */                        \
2091   OP_CRITICAL(OP, LCK_ID) /* send assignment */                                \
2092   }
2093 // -------------------------------------------------------------------------
2094 
2095 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2096                KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2097 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2098                KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2099 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2100                KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2101 #if (KMP_ARCH_X86)
2102 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2103                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2104 #else
2105 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2106                KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2107 #endif // (KMP_ARCH_X86)
2108 
2109 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2110                      KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2111 #if (KMP_ARCH_X86)
2112 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2113                   KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2114 #else
2115 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2116                      KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2117 #endif // (KMP_ARCH_X86)
2118 
2119 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2120                    1) // __kmpc_atomic_float10_wr
2121 #if KMP_HAVE_QUAD
2122 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2123                    1) // __kmpc_atomic_float16_wr
2124 #endif // KMP_HAVE_QUAD
2125 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2126 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2127                    1) // __kmpc_atomic_cmplx8_wr
2128 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2129                    1) // __kmpc_atomic_cmplx10_wr
2130 #if KMP_HAVE_QUAD
2131 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2132                    1) // __kmpc_atomic_cmplx16_wr
2133 #if (KMP_ARCH_X86)
2134 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2135                    1) // __kmpc_atomic_float16_a16_wr
2136 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2137                    1) // __kmpc_atomic_cmplx16_a16_wr
2138 #endif // (KMP_ARCH_X86)
2139 #endif // KMP_HAVE_QUAD
2140 
2141 // ------------------------------------------------------------------------
2142 // Atomic CAPTURE routines
2143 
2144 // Beginning of a definition (provides name, parameters, gebug trace)
2145 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2146 //     fixed)
2147 //     OP_ID   - operation identifier (add, sub, mul, ...)
2148 //     TYPE    - operands' type
2149 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
2150   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
2151                                              TYPE *lhs, TYPE rhs, int flag) {  \
2152     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2153     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2154 
2155 // -------------------------------------------------------------------------
2156 // Operation on *lhs, rhs bound by critical section
2157 //     OP     - operator (it's supposed to contain an assignment)
2158 //     LCK_ID - lock identifier
2159 // Note: don't check gtid as it should always be valid
2160 // 1, 2-byte - expect valid parameter, other - check before this macro
2161 #define OP_CRITICAL_CPT(OP, LCK_ID)                                            \
2162   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2163                                                                                \
2164   if (flag) {                                                                  \
2165     (*lhs) OP rhs;                                                             \
2166     new_value = (*lhs);                                                        \
2167   } else {                                                                     \
2168     new_value = (*lhs);                                                        \
2169     (*lhs) OP rhs;                                                             \
2170   }                                                                            \
2171                                                                                \
2172   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2173   return new_value;
2174 
2175 // ------------------------------------------------------------------------
2176 #ifdef KMP_GOMP_COMPAT
2177 #define OP_GOMP_CRITICAL_CPT(OP, FLAG)                                         \
2178   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2179     KMP_CHECK_GTID;                                                            \
2180     OP_CRITICAL_CPT(OP## =, 0);                                                \
2181   }
2182 #else
2183 #define OP_GOMP_CRITICAL_CPT(OP, FLAG)
2184 #endif /* KMP_GOMP_COMPAT */
2185 
2186 // ------------------------------------------------------------------------
2187 // Operation on *lhs, rhs using "compare_and_store" routine
2188 //     TYPE    - operands' type
2189 //     BITS    - size in bits, used to distinguish low level calls
2190 //     OP      - operator
2191 // Note: temp_val introduced in order to force the compiler to read
2192 //       *lhs only once (w/o it the compiler reads *lhs twice)
2193 #define OP_CMPXCHG_CPT(TYPE, BITS, OP)                                         \
2194   {                                                                            \
2195     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2196     TYPE old_value, new_value;                                                 \
2197     temp_val = *lhs;                                                           \
2198     old_value = temp_val;                                                      \
2199     new_value = old_value OP rhs;                                              \
2200     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2201         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2202         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2203       KMP_CPU_PAUSE();                                                         \
2204                                                                                \
2205       temp_val = *lhs;                                                         \
2206       old_value = temp_val;                                                    \
2207       new_value = old_value OP rhs;                                            \
2208     }                                                                          \
2209     if (flag) {                                                                \
2210       return new_value;                                                        \
2211     } else                                                                     \
2212       return old_value;                                                        \
2213   }
2214 
2215 // -------------------------------------------------------------------------
2216 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)          \
2217   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2218   TYPE new_value;                                                              \
2219   OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG)                                          \
2220   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2221   }
2222 
2223 // -------------------------------------------------------------------------
2224 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2225   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2226   TYPE old_value, new_value;                                                   \
2227   OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG)                                          \
2228   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
2229   old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                            \
2230   if (flag) {                                                                  \
2231     return old_value OP rhs;                                                   \
2232   } else                                                                       \
2233     return old_value;                                                          \
2234   }
2235 // -------------------------------------------------------------------------
2236 
2237 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2238                      0) // __kmpc_atomic_fixed4_add_cpt
2239 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2240                      0) // __kmpc_atomic_fixed4_sub_cpt
2241 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2242                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2243 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2244                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2245 
2246 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2247                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2248 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2249                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2250 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2251                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2252 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2253                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2254 
2255 // ------------------------------------------------------------------------
2256 // Entries definition for integer operands
2257 //     TYPE_ID - operands type and size (fixed4, float4)
2258 //     OP_ID   - operation identifier (add, sub, mul, ...)
2259 //     TYPE    - operand type
2260 //     BITS    - size in bits, used to distinguish low level calls
2261 //     OP      - operator (used in critical section)
2262 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,GOMP_FLAG
2263 // ------------------------------------------------------------------------
2264 // Routines for ATOMIC integer operands, other operators
2265 // ------------------------------------------------------------------------
2266 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2267 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2268                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2269 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2270                    0) // __kmpc_atomic_fixed1_andb_cpt
2271 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2272                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2273 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2274                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2275 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2276                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2277 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2278                    0) // __kmpc_atomic_fixed1_orb_cpt
2279 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2280                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2281 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2282                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2283 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2284                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2285 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2286                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2287 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2288                    0) // __kmpc_atomic_fixed1_xor_cpt
2289 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2290                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2291 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2292                    0) // __kmpc_atomic_fixed2_andb_cpt
2293 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2294                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2295 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2296                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2297 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2298                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2299 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2300                    0) // __kmpc_atomic_fixed2_orb_cpt
2301 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2302                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2303 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2304                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2305 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2306                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2307 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2308                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2309 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2310                    0) // __kmpc_atomic_fixed2_xor_cpt
2311 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2312                    0) // __kmpc_atomic_fixed4_andb_cpt
2313 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2314                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2315 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2316                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2317 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2318                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2319 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2320                    0) // __kmpc_atomic_fixed4_orb_cpt
2321 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2322                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2323 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2324                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2325 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2326                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2327 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2328                    0) // __kmpc_atomic_fixed4_xor_cpt
2329 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2330                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2331 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2332                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2333 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2334                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2335 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2336                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2337 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2338                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2339 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2340                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2341 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2342                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2343 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2344                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2345 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2346                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2347 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2348                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2349 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2350                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2351 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2352                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2353 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2354                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2355 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2356 
2357 // CAPTURE routines for mixed types RHS=float16
2358 #if KMP_HAVE_QUAD
2359 
2360 // Beginning of a definition (provides name, parameters, gebug trace)
2361 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2362 //     fixed)
2363 //     OP_ID   - operation identifier (add, sub, mul, ...)
2364 //     TYPE    - operands' type
2365 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)            \
2366   TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
2367       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) {             \
2368     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2369     KA_TRACE(100,                                                              \
2370              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
2371               gtid));
2372 
2373 // -------------------------------------------------------------------------
2374 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
2375                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
2376   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2377   TYPE new_value;                                                              \
2378   OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG)                                          \
2379   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2380   }
2381 
2382 // -------------------------------------------------------------------------
2383 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,     \
2384                                 LCK_ID, GOMP_FLAG)                             \
2385   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2386   TYPE new_value;                                                              \
2387   OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */                    \
2388   OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */                        \
2389   }
2390 
2391 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2392                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2393 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2394                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2395 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2396                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2397 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2398                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2399 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2400                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2401 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2402                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2403 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2404                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2405 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2406                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2407 
2408 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2409                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2410 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2411                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2412 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2413                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2414 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2415                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2416 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2417                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2418 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2419                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2420 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2421                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2422 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2423                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2424 
2425 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2426                        0) // __kmpc_atomic_fixed4_add_cpt_fp
2427 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2428                        0) // __kmpc_atomic_fixed4u_add_cpt_fp
2429 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2430                        0) // __kmpc_atomic_fixed4_sub_cpt_fp
2431 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2432                        0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2433 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2434                        0) // __kmpc_atomic_fixed4_mul_cpt_fp
2435 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2436                        0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2437 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2438                        0) // __kmpc_atomic_fixed4_div_cpt_fp
2439 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2440                        0) // __kmpc_atomic_fixed4u_div_cpt_fp
2441 
2442 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2443                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2444 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2445                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2446 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2447                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2448 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2449                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2450 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2451                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2452 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2453                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2454 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2455                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2456 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2457                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2458 
2459 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2460                        KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2461 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2462                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2463 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2464                        KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2465 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2466                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2467 
2468 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2469                        KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2470 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2471                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2472 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2473                        KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2474 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2475                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2476 
2477 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2478                         1) // __kmpc_atomic_float10_add_cpt_fp
2479 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2480                         1) // __kmpc_atomic_float10_sub_cpt_fp
2481 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2482                         1) // __kmpc_atomic_float10_mul_cpt_fp
2483 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2484                         1) // __kmpc_atomic_float10_div_cpt_fp
2485 
2486 #endif // KMP_HAVE_QUAD
2487 
2488 // ------------------------------------------------------------------------
2489 // Routines for C/C++ Reduction operators && and ||
2490 
2491 // -------------------------------------------------------------------------
2492 // Operation on *lhs, rhs bound by critical section
2493 //     OP     - operator (it's supposed to contain an assignment)
2494 //     LCK_ID - lock identifier
2495 // Note: don't check gtid as it should always be valid
2496 // 1, 2-byte - expect valid parameter, other - check before this macro
2497 #define OP_CRITICAL_L_CPT(OP, LCK_ID)                                          \
2498   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2499                                                                                \
2500   if (flag) {                                                                  \
2501     new_value OP rhs;                                                          \
2502   } else                                                                       \
2503     new_value = (*lhs);                                                        \
2504                                                                                \
2505   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2506 
2507 // ------------------------------------------------------------------------
2508 #ifdef KMP_GOMP_COMPAT
2509 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)                                       \
2510   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2511     KMP_CHECK_GTID;                                                            \
2512     OP_CRITICAL_L_CPT(OP, 0);                                                  \
2513     return new_value;                                                          \
2514   }
2515 #else
2516 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2517 #endif /* KMP_GOMP_COMPAT */
2518 
2519 // ------------------------------------------------------------------------
2520 // Need separate macros for &&, || because there is no combined assignment
2521 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2522   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2523   TYPE new_value;                                                              \
2524   OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG)                                 \
2525   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2526   }
2527 
2528 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2529                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2530 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2531                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2532 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2533                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2534 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2535                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2536 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2537                   0) // __kmpc_atomic_fixed4_andl_cpt
2538 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2539                   0) // __kmpc_atomic_fixed4_orl_cpt
2540 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2541                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2542 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2543                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2544 
2545 // -------------------------------------------------------------------------
2546 // Routines for Fortran operators that matched no one in C:
2547 // MAX, MIN, .EQV., .NEQV.
2548 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2549 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2550 
2551 // -------------------------------------------------------------------------
2552 // MIN and MAX need separate macros
2553 // OP - operator to check if we need any actions?
2554 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                       \
2555   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2556                                                                                \
2557   if (*lhs OP rhs) { /* still need actions? */                                 \
2558     old_value = *lhs;                                                          \
2559     *lhs = rhs;                                                                \
2560     if (flag)                                                                  \
2561       new_value = rhs;                                                         \
2562     else                                                                       \
2563       new_value = old_value;                                                   \
2564   } else {                                                                     \
2565     new_value = *lhs;                                                          \
2566   }                                                                            \
2567   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2568   return new_value;
2569 
2570 // -------------------------------------------------------------------------
2571 #ifdef KMP_GOMP_COMPAT
2572 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)                                    \
2573   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2574     KMP_CHECK_GTID;                                                            \
2575     MIN_MAX_CRITSECT_CPT(OP, 0);                                               \
2576   }
2577 #else
2578 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2579 #endif /* KMP_GOMP_COMPAT */
2580 
2581 // -------------------------------------------------------------------------
2582 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                    \
2583   {                                                                            \
2584     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2585     /*TYPE old_value; */                                                       \
2586     temp_val = *lhs;                                                           \
2587     old_value = temp_val;                                                      \
2588     while (old_value OP rhs && /* still need actions? */                       \
2589            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2590                (kmp_int##BITS *)lhs,                                           \
2591                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
2592                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
2593       KMP_CPU_PAUSE();                                                         \
2594       temp_val = *lhs;                                                         \
2595       old_value = temp_val;                                                    \
2596     }                                                                          \
2597     if (flag)                                                                  \
2598       return rhs;                                                              \
2599     else                                                                       \
2600       return old_value;                                                        \
2601   }
2602 
2603 // -------------------------------------------------------------------------
2604 // 1-byte, 2-byte operands - use critical section
2605 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
2606   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2607   TYPE new_value, old_value;                                                   \
2608   if (*lhs OP rhs) { /* need actions? */                                       \
2609     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2610     MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                           \
2611   }                                                                            \
2612   return *lhs;                                                                 \
2613   }
2614 
2615 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2616   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2617   TYPE new_value, old_value;                                                   \
2618   if (*lhs OP rhs) {                                                           \
2619     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2620     MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                        \
2621   }                                                                            \
2622   return *lhs;                                                                 \
2623   }
2624 
2625 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2626                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2627 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2628                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2629 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2630                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2631 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2632                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2633 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2634                      0) // __kmpc_atomic_fixed4_max_cpt
2635 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2636                      0) // __kmpc_atomic_fixed4_min_cpt
2637 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2638                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2639 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2640                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2641 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2642                      KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2643 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2644                      KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2645 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2646                      KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2647 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2648                      KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2649 #if KMP_HAVE_QUAD
2650 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2651                      1) // __kmpc_atomic_float16_max_cpt
2652 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2653                      1) // __kmpc_atomic_float16_min_cpt
2654 #if (KMP_ARCH_X86)
2655 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2656                      1) // __kmpc_atomic_float16_max_a16_cpt
2657 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2658                      1) // __kmpc_atomic_float16_mix_a16_cpt
2659 #endif // (KMP_ARCH_X86)
2660 #endif // KMP_HAVE_QUAD
2661 
2662 // ------------------------------------------------------------------------
2663 #ifdef KMP_GOMP_COMPAT
2664 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)                                     \
2665   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2666     KMP_CHECK_GTID;                                                            \
2667     OP_CRITICAL_CPT(OP, 0);                                                    \
2668   }
2669 #else
2670 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2671 #endif /* KMP_GOMP_COMPAT */
2672 // ------------------------------------------------------------------------
2673 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
2674   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2675   TYPE new_value;                                                              \
2676   OP_GOMP_CRITICAL_EQV_CPT(^= ~, GOMP_FLAG) /* send assignment */              \
2677   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2678   }
2679 
2680 // ------------------------------------------------------------------------
2681 
2682 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2683                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2684 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2685                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2686 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2687                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2688 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2689                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2690 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2691                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2692 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2693                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2694 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2695                     KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2696 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2697                     KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2698 
2699 // ------------------------------------------------------------------------
2700 // Routines for Extended types: long double, _Quad, complex flavours (use
2701 // critical section)
2702 //     TYPE_ID, OP_ID, TYPE - detailed above
2703 //     OP      - operator
2704 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2705 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
2706   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2707   TYPE new_value;                                                              \
2708   OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */                    \
2709   OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */                        \
2710   }
2711 
2712 // ------------------------------------------------------------------------
2713 // Workaround for cmplx4. Regular routines with return value don't work
2714 // on Win_32e. Let's return captured values through the additional parameter.
2715 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID)                                        \
2716   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2717                                                                                \
2718   if (flag) {                                                                  \
2719     (*lhs) OP rhs;                                                             \
2720     (*out) = (*lhs);                                                           \
2721   } else {                                                                     \
2722     (*out) = (*lhs);                                                           \
2723     (*lhs) OP rhs;                                                             \
2724   }                                                                            \
2725                                                                                \
2726   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2727   return;
2728 // ------------------------------------------------------------------------
2729 
2730 #ifdef KMP_GOMP_COMPAT
2731 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)                                     \
2732   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2733     KMP_CHECK_GTID;                                                            \
2734     OP_CRITICAL_CPT_WRK(OP## =, 0);                                            \
2735   }
2736 #else
2737 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2738 #endif /* KMP_GOMP_COMPAT */
2739 // ------------------------------------------------------------------------
2740 
2741 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                 \
2742   void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2743                                          TYPE rhs, TYPE *out, int flag) {      \
2744     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2745     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2746 // ------------------------------------------------------------------------
2747 
2748 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
2749   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
2750   OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG)                                      \
2751   OP_CRITICAL_CPT_WRK(OP## =, LCK_ID)                                          \
2752   }
2753 // The end of workaround for cmplx4
2754 
2755 /* ------------------------------------------------------------------------- */
2756 // routines for long double type
2757 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2758                     1) // __kmpc_atomic_float10_add_cpt
2759 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2760                     1) // __kmpc_atomic_float10_sub_cpt
2761 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2762                     1) // __kmpc_atomic_float10_mul_cpt
2763 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2764                     1) // __kmpc_atomic_float10_div_cpt
2765 #if KMP_HAVE_QUAD
2766 // routines for _Quad type
2767 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2768                     1) // __kmpc_atomic_float16_add_cpt
2769 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2770                     1) // __kmpc_atomic_float16_sub_cpt
2771 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2772                     1) // __kmpc_atomic_float16_mul_cpt
2773 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2774                     1) // __kmpc_atomic_float16_div_cpt
2775 #if (KMP_ARCH_X86)
2776 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2777                     1) // __kmpc_atomic_float16_add_a16_cpt
2778 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2779                     1) // __kmpc_atomic_float16_sub_a16_cpt
2780 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2781                     1) // __kmpc_atomic_float16_mul_a16_cpt
2782 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2783                     1) // __kmpc_atomic_float16_div_a16_cpt
2784 #endif // (KMP_ARCH_X86)
2785 #endif // KMP_HAVE_QUAD
2786 
2787 // routines for complex types
2788 
2789 // cmplx4 routines to return void
2790 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2791                         1) // __kmpc_atomic_cmplx4_add_cpt
2792 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2793                         1) // __kmpc_atomic_cmplx4_sub_cpt
2794 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2795                         1) // __kmpc_atomic_cmplx4_mul_cpt
2796 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2797                         1) // __kmpc_atomic_cmplx4_div_cpt
2798 
2799 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2800                     1) // __kmpc_atomic_cmplx8_add_cpt
2801 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2802                     1) // __kmpc_atomic_cmplx8_sub_cpt
2803 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2804                     1) // __kmpc_atomic_cmplx8_mul_cpt
2805 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2806                     1) // __kmpc_atomic_cmplx8_div_cpt
2807 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2808                     1) // __kmpc_atomic_cmplx10_add_cpt
2809 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2810                     1) // __kmpc_atomic_cmplx10_sub_cpt
2811 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2812                     1) // __kmpc_atomic_cmplx10_mul_cpt
2813 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2814                     1) // __kmpc_atomic_cmplx10_div_cpt
2815 #if KMP_HAVE_QUAD
2816 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2817                     1) // __kmpc_atomic_cmplx16_add_cpt
2818 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2819                     1) // __kmpc_atomic_cmplx16_sub_cpt
2820 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2821                     1) // __kmpc_atomic_cmplx16_mul_cpt
2822 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2823                     1) // __kmpc_atomic_cmplx16_div_cpt
2824 #if (KMP_ARCH_X86)
2825 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2826                     1) // __kmpc_atomic_cmplx16_add_a16_cpt
2827 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2828                     1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2829 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2830                     1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2831 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2832                     1) // __kmpc_atomic_cmplx16_div_a16_cpt
2833 #endif // (KMP_ARCH_X86)
2834 #endif // KMP_HAVE_QUAD
2835 
2836 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2837 // binop x; v = x; }  for non-commutative operations.
2838 // Supported only on IA-32 architecture and Intel(R) 64
2839 
2840 // -------------------------------------------------------------------------
2841 // Operation on *lhs, rhs bound by critical section
2842 //     OP     - operator (it's supposed to contain an assignment)
2843 //     LCK_ID - lock identifier
2844 // Note: don't check gtid as it should always be valid
2845 // 1, 2-byte - expect valid parameter, other - check before this macro
2846 #define OP_CRITICAL_CPT_REV(OP, LCK_ID)                                        \
2847   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2848                                                                                \
2849   if (flag) {                                                                  \
2850     /*temp_val = (*lhs);*/                                                     \
2851     (*lhs) = (rhs)OP(*lhs);                                                    \
2852     new_value = (*lhs);                                                        \
2853   } else {                                                                     \
2854     new_value = (*lhs);                                                        \
2855     (*lhs) = (rhs)OP(*lhs);                                                    \
2856   }                                                                            \
2857   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2858   return new_value;
2859 
2860 // ------------------------------------------------------------------------
2861 #ifdef KMP_GOMP_COMPAT
2862 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG)                                     \
2863   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2864     KMP_CHECK_GTID;                                                            \
2865     OP_CRITICAL_CPT_REV(OP, 0);                                                \
2866   }
2867 #else
2868 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG)
2869 #endif /* KMP_GOMP_COMPAT */
2870 
2871 // ------------------------------------------------------------------------
2872 // Operation on *lhs, rhs using "compare_and_store" routine
2873 //     TYPE    - operands' type
2874 //     BITS    - size in bits, used to distinguish low level calls
2875 //     OP      - operator
2876 // Note: temp_val introduced in order to force the compiler to read
2877 //       *lhs only once (w/o it the compiler reads *lhs twice)
2878 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                     \
2879   {                                                                            \
2880     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2881     TYPE old_value, new_value;                                                 \
2882     temp_val = *lhs;                                                           \
2883     old_value = temp_val;                                                      \
2884     new_value = rhs OP old_value;                                              \
2885     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2886         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2887         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2888       KMP_CPU_PAUSE();                                                         \
2889                                                                                \
2890       temp_val = *lhs;                                                         \
2891       old_value = temp_val;                                                    \
2892       new_value = rhs OP old_value;                                            \
2893     }                                                                          \
2894     if (flag) {                                                                \
2895       return new_value;                                                        \
2896     } else                                                                     \
2897       return old_value;                                                        \
2898   }
2899 
2900 // -------------------------------------------------------------------------
2901 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)      \
2902   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2903   TYPE new_value;                                                              \
2904   OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG)                                      \
2905   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
2906   }
2907 
2908 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2909                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2910 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2911                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2912 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2913                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2914 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2915                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2916 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2917                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2918 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2919                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2920 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2921                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2922 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2923                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2924 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2925                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2926 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2927                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2928 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
2929                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
2930 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
2931                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
2932 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
2933                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
2934 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
2935                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
2936 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
2937                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
2938 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
2939                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
2940 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
2941                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
2942 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
2943                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
2944 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
2945                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
2946 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
2947                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
2948 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
2949                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
2950 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
2951                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
2952 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
2953                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
2954 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
2955                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
2956 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
2957                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
2958 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
2959                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
2960 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
2961                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
2962 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
2963                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
2964 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2965 
2966 // ------------------------------------------------------------------------
2967 // Routines for Extended types: long double, _Quad, complex flavours (use
2968 // critical section)
2969 //     TYPE_ID, OP_ID, TYPE - detailed above
2970 //     OP      - operator
2971 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2972 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
2973   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2974   TYPE new_value;                                                              \
2975   /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/                   \
2976   OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG)                                      \
2977   OP_CRITICAL_CPT_REV(OP, LCK_ID)                                              \
2978   }
2979 
2980 /* ------------------------------------------------------------------------- */
2981 // routines for long double type
2982 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
2983                         1) // __kmpc_atomic_float10_sub_cpt_rev
2984 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
2985                         1) // __kmpc_atomic_float10_div_cpt_rev
2986 #if KMP_HAVE_QUAD
2987 // routines for _Quad type
2988 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
2989                         1) // __kmpc_atomic_float16_sub_cpt_rev
2990 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
2991                         1) // __kmpc_atomic_float16_div_cpt_rev
2992 #if (KMP_ARCH_X86)
2993 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
2994                         1) // __kmpc_atomic_float16_sub_a16_cpt_rev
2995 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
2996                         1) // __kmpc_atomic_float16_div_a16_cpt_rev
2997 #endif // (KMP_ARCH_X86)
2998 #endif // KMP_HAVE_QUAD
2999 
3000 // routines for complex types
3001 
3002 // ------------------------------------------------------------------------
3003 // Workaround for cmplx4. Regular routines with return value don't work
3004 // on Win_32e. Let's return captured values through the additional parameter.
3005 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                    \
3006   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3007                                                                                \
3008   if (flag) {                                                                  \
3009     (*lhs) = (rhs)OP(*lhs);                                                    \
3010     (*out) = (*lhs);                                                           \
3011   } else {                                                                     \
3012     (*out) = (*lhs);                                                           \
3013     (*lhs) = (rhs)OP(*lhs);                                                    \
3014   }                                                                            \
3015                                                                                \
3016   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3017   return;
3018 // ------------------------------------------------------------------------
3019 
3020 #ifdef KMP_GOMP_COMPAT
3021 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)                                 \
3022   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3023     KMP_CHECK_GTID;                                                            \
3024     OP_CRITICAL_CPT_REV_WRK(OP, 0);                                            \
3025   }
3026 #else
3027 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3028 #endif /* KMP_GOMP_COMPAT */
3029 // ------------------------------------------------------------------------
3030 
3031 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID,          \
3032                                     GOMP_FLAG)                                 \
3033   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
3034   OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG)                                  \
3035   OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                          \
3036   }
3037 // The end of workaround for cmplx4
3038 
3039 // !!! TODO: check if we need to return void for cmplx4 routines
3040 // cmplx4 routines to return void
3041 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3042                             1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3043 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3044                             1) // __kmpc_atomic_cmplx4_div_cpt_rev
3045 
3046 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3047                         1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3048 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3049                         1) // __kmpc_atomic_cmplx8_div_cpt_rev
3050 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3051                         1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3052 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3053                         1) // __kmpc_atomic_cmplx10_div_cpt_rev
3054 #if KMP_HAVE_QUAD
3055 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3056                         1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3057 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3058                         1) // __kmpc_atomic_cmplx16_div_cpt_rev
3059 #if (KMP_ARCH_X86)
3060 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3061                         1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3062 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3063                         1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3064 #endif // (KMP_ARCH_X86)
3065 #endif // KMP_HAVE_QUAD
3066 
3067 // Capture reverse for mixed type: RHS=float16
3068 #if KMP_HAVE_QUAD
3069 
3070 // Beginning of a definition (provides name, parameters, gebug trace)
3071 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3072 //     fixed)
3073 //     OP_ID   - operation identifier (add, sub, mul, ...)
3074 //     TYPE    - operands' type
3075 // -------------------------------------------------------------------------
3076 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,   \
3077                                    RTYPE, LCK_ID, MASK, GOMP_FLAG)             \
3078   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3079   TYPE new_value;                                                              \
3080   OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG)                                      \
3081   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
3082   }
3083 
3084 // -------------------------------------------------------------------------
3085 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3086                                     LCK_ID, GOMP_FLAG)                         \
3087   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3088   TYPE new_value;                                                              \
3089   OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) /* send assignment */                \
3090   OP_CRITICAL_CPT_REV(OP, LCK_ID) /* send assignment */                        \
3091   }
3092 
3093 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3094                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3095 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3096                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3097 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3098                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3099 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3100                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3101 
3102 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3103                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3104 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3105                            1,
3106                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3107 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3108                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3109 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3110                            1,
3111                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3112 
3113 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3114                            3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3115 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3116                            4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3117 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3118                            3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3119 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3120                            4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3121 
3122 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3123                            7,
3124                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3125 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3126                            8i, 7,
3127                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3128 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3129                            7,
3130                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3131 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3132                            8i, 7,
3133                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3134 
3135 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3136                            4r, 3,
3137                            KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3138 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3139                            4r, 3,
3140                            KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3141 
3142 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3143                            8r, 7,
3144                            KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3145 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3146                            8r, 7,
3147                            KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3148 
3149 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3150                             10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3151 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3152                             10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3153 
3154 #endif // KMP_HAVE_QUAD
3155 
3156 //   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3157 
3158 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                        \
3159   TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3160                                      TYPE rhs) {                               \
3161     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3162     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3163 
3164 #define CRITICAL_SWP(LCK_ID)                                                   \
3165   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3166                                                                                \
3167   old_value = (*lhs);                                                          \
3168   (*lhs) = rhs;                                                                \
3169                                                                                \
3170   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3171   return old_value;
3172 
3173 // ------------------------------------------------------------------------
3174 #ifdef KMP_GOMP_COMPAT
3175 #define GOMP_CRITICAL_SWP(FLAG)                                                \
3176   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3177     KMP_CHECK_GTID;                                                            \
3178     CRITICAL_SWP(0);                                                           \
3179   }
3180 #else
3181 #define GOMP_CRITICAL_SWP(FLAG)
3182 #endif /* KMP_GOMP_COMPAT */
3183 
3184 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                        \
3185   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3186   TYPE old_value;                                                              \
3187   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3188   old_value = KMP_XCHG_FIXED##BITS(lhs, rhs);                                  \
3189   return old_value;                                                            \
3190   }
3191 // ------------------------------------------------------------------------
3192 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                  \
3193   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3194   TYPE old_value;                                                              \
3195   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3196   old_value = KMP_XCHG_REAL##BITS(lhs, rhs);                                   \
3197   return old_value;                                                            \
3198   }
3199 
3200 // ------------------------------------------------------------------------
3201 #define CMPXCHG_SWP(TYPE, BITS)                                                \
3202   {                                                                            \
3203     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
3204     TYPE old_value, new_value;                                                 \
3205     temp_val = *lhs;                                                           \
3206     old_value = temp_val;                                                      \
3207     new_value = rhs;                                                           \
3208     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
3209         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
3210         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
3211       KMP_CPU_PAUSE();                                                         \
3212                                                                                \
3213       temp_val = *lhs;                                                         \
3214       old_value = temp_val;                                                    \
3215       new_value = rhs;                                                         \
3216     }                                                                          \
3217     return old_value;                                                          \
3218   }
3219 
3220 // -------------------------------------------------------------------------
3221 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                     \
3222   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3223   TYPE old_value;                                                              \
3224   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3225   CMPXCHG_SWP(TYPE, BITS)                                                      \
3226   }
3227 
3228 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3229 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3230 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3231 
3232 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3233                       KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3234 
3235 #if (KMP_ARCH_X86)
3236 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3237                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3238 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3239                    KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3240 #else
3241 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3242 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3243                       KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3244 #endif // (KMP_ARCH_X86)
3245 
3246 // ------------------------------------------------------------------------
3247 // Routines for Extended types: long double, _Quad, complex flavours (use
3248 // critical section)
3249 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)                  \
3250   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3251   TYPE old_value;                                                              \
3252   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3253   CRITICAL_SWP(LCK_ID)                                                         \
3254   }
3255 
3256 // ------------------------------------------------------------------------
3257 // !!! TODO: check if we need to return void for cmplx4 routines
3258 // Workaround for cmplx4. Regular routines with return value don't work
3259 // on Win_32e. Let's return captured values through the additional parameter.
3260 
3261 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                    \
3262   void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3263                                      TYPE rhs, TYPE *out) {                    \
3264     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3265     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3266 
3267 #define CRITICAL_SWP_WRK(LCK_ID)                                               \
3268   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3269                                                                                \
3270   tmp = (*lhs);                                                                \
3271   (*lhs) = (rhs);                                                              \
3272   (*out) = tmp;                                                                \
3273   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3274   return;
3275 // ------------------------------------------------------------------------
3276 
3277 #ifdef KMP_GOMP_COMPAT
3278 #define GOMP_CRITICAL_SWP_WRK(FLAG)                                            \
3279   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3280     KMP_CHECK_GTID;                                                            \
3281     CRITICAL_SWP_WRK(0);                                                       \
3282   }
3283 #else
3284 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3285 #endif /* KMP_GOMP_COMPAT */
3286 // ------------------------------------------------------------------------
3287 
3288 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)              \
3289   ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                          \
3290   TYPE tmp;                                                                    \
3291   GOMP_CRITICAL_SWP_WRK(GOMP_FLAG)                                             \
3292   CRITICAL_SWP_WRK(LCK_ID)                                                     \
3293   }
3294 // The end of workaround for cmplx4
3295 
3296 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3297 #if KMP_HAVE_QUAD
3298 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3299 #endif // KMP_HAVE_QUAD
3300 // cmplx4 routine to return void
3301 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3302 
3303 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32,  8c,   1 )           //
3304 // __kmpc_atomic_cmplx4_swp
3305 
3306 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3307 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3308 #if KMP_HAVE_QUAD
3309 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3310 #if (KMP_ARCH_X86)
3311 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3312                     1) // __kmpc_atomic_float16_a16_swp
3313 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3314                     1) // __kmpc_atomic_cmplx16_a16_swp
3315 #endif // (KMP_ARCH_X86)
3316 #endif // KMP_HAVE_QUAD
3317 
3318 // End of OpenMP 4.0 Capture
3319 
3320 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3321 
3322 #undef OP_CRITICAL
3323 
3324 /* ------------------------------------------------------------------------ */
3325 /* Generic atomic routines                                                  */
3326 
3327 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3328                      void (*f)(void *, void *, void *)) {
3329   KMP_DEBUG_ASSERT(__kmp_init_serial);
3330 
3331   if (
3332 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3333       FALSE /* must use lock */
3334 #else
3335       TRUE
3336 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3337       ) {
3338     kmp_int8 old_value, new_value;
3339 
3340     old_value = *(kmp_int8 *)lhs;
3341     (*f)(&new_value, &old_value, rhs);
3342 
3343     /* TODO: Should this be acquire or release? */
3344     while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3345                                        *(kmp_int8 *)&new_value)) {
3346       KMP_CPU_PAUSE();
3347 
3348       old_value = *(kmp_int8 *)lhs;
3349       (*f)(&new_value, &old_value, rhs);
3350     }
3351 
3352     return;
3353   } else {
3354 // All 1-byte data is of integer data type.
3355 
3356 #ifdef KMP_GOMP_COMPAT
3357     if (__kmp_atomic_mode == 2) {
3358       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3359     } else
3360 #endif /* KMP_GOMP_COMPAT */
3361       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3362 
3363     (*f)(lhs, lhs, rhs);
3364 
3365 #ifdef KMP_GOMP_COMPAT
3366     if (__kmp_atomic_mode == 2) {
3367       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3368     } else
3369 #endif /* KMP_GOMP_COMPAT */
3370       __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3371   }
3372 }
3373 
3374 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3375                      void (*f)(void *, void *, void *)) {
3376   if (
3377 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3378       FALSE /* must use lock */
3379 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3380       TRUE /* no alignment problems */
3381 #else
3382       !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3383 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3384       ) {
3385     kmp_int16 old_value, new_value;
3386 
3387     old_value = *(kmp_int16 *)lhs;
3388     (*f)(&new_value, &old_value, rhs);
3389 
3390     /* TODO: Should this be acquire or release? */
3391     while (!KMP_COMPARE_AND_STORE_ACQ16(
3392         (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3393       KMP_CPU_PAUSE();
3394 
3395       old_value = *(kmp_int16 *)lhs;
3396       (*f)(&new_value, &old_value, rhs);
3397     }
3398 
3399     return;
3400   } else {
3401 // All 2-byte data is of integer data type.
3402 
3403 #ifdef KMP_GOMP_COMPAT
3404     if (__kmp_atomic_mode == 2) {
3405       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3406     } else
3407 #endif /* KMP_GOMP_COMPAT */
3408       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3409 
3410     (*f)(lhs, lhs, rhs);
3411 
3412 #ifdef KMP_GOMP_COMPAT
3413     if (__kmp_atomic_mode == 2) {
3414       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3415     } else
3416 #endif /* KMP_GOMP_COMPAT */
3417       __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3418   }
3419 }
3420 
3421 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3422                      void (*f)(void *, void *, void *)) {
3423   KMP_DEBUG_ASSERT(__kmp_init_serial);
3424 
3425   if (
3426 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3427 // Gomp compatibility is broken if this routine is called for floats.
3428 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3429       TRUE /* no alignment problems */
3430 #else
3431       !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3432 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3433       ) {
3434     kmp_int32 old_value, new_value;
3435 
3436     old_value = *(kmp_int32 *)lhs;
3437     (*f)(&new_value, &old_value, rhs);
3438 
3439     /* TODO: Should this be acquire or release? */
3440     while (!KMP_COMPARE_AND_STORE_ACQ32(
3441         (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3442       KMP_CPU_PAUSE();
3443 
3444       old_value = *(kmp_int32 *)lhs;
3445       (*f)(&new_value, &old_value, rhs);
3446     }
3447 
3448     return;
3449   } else {
3450 // Use __kmp_atomic_lock_4i for all 4-byte data,
3451 // even if it isn't of integer data type.
3452 
3453 #ifdef KMP_GOMP_COMPAT
3454     if (__kmp_atomic_mode == 2) {
3455       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3456     } else
3457 #endif /* KMP_GOMP_COMPAT */
3458       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3459 
3460     (*f)(lhs, lhs, rhs);
3461 
3462 #ifdef KMP_GOMP_COMPAT
3463     if (__kmp_atomic_mode == 2) {
3464       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3465     } else
3466 #endif /* KMP_GOMP_COMPAT */
3467       __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3468   }
3469 }
3470 
3471 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3472                      void (*f)(void *, void *, void *)) {
3473   KMP_DEBUG_ASSERT(__kmp_init_serial);
3474   if (
3475 
3476 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3477       FALSE /* must use lock */
3478 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3479       TRUE /* no alignment problems */
3480 #else
3481       !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3482 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3483       ) {
3484     kmp_int64 old_value, new_value;
3485 
3486     old_value = *(kmp_int64 *)lhs;
3487     (*f)(&new_value, &old_value, rhs);
3488     /* TODO: Should this be acquire or release? */
3489     while (!KMP_COMPARE_AND_STORE_ACQ64(
3490         (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3491       KMP_CPU_PAUSE();
3492 
3493       old_value = *(kmp_int64 *)lhs;
3494       (*f)(&new_value, &old_value, rhs);
3495     }
3496 
3497     return;
3498   } else {
3499 // Use __kmp_atomic_lock_8i for all 8-byte data,
3500 // even if it isn't of integer data type.
3501 
3502 #ifdef KMP_GOMP_COMPAT
3503     if (__kmp_atomic_mode == 2) {
3504       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3505     } else
3506 #endif /* KMP_GOMP_COMPAT */
3507       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3508 
3509     (*f)(lhs, lhs, rhs);
3510 
3511 #ifdef KMP_GOMP_COMPAT
3512     if (__kmp_atomic_mode == 2) {
3513       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3514     } else
3515 #endif /* KMP_GOMP_COMPAT */
3516       __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3517   }
3518 }
3519 
3520 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3521                       void (*f)(void *, void *, void *)) {
3522   KMP_DEBUG_ASSERT(__kmp_init_serial);
3523 
3524 #ifdef KMP_GOMP_COMPAT
3525   if (__kmp_atomic_mode == 2) {
3526     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3527   } else
3528 #endif /* KMP_GOMP_COMPAT */
3529     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3530 
3531   (*f)(lhs, lhs, rhs);
3532 
3533 #ifdef KMP_GOMP_COMPAT
3534   if (__kmp_atomic_mode == 2) {
3535     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3536   } else
3537 #endif /* KMP_GOMP_COMPAT */
3538     __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3539 }
3540 
3541 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3542                       void (*f)(void *, void *, void *)) {
3543   KMP_DEBUG_ASSERT(__kmp_init_serial);
3544 
3545 #ifdef KMP_GOMP_COMPAT
3546   if (__kmp_atomic_mode == 2) {
3547     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3548   } else
3549 #endif /* KMP_GOMP_COMPAT */
3550     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3551 
3552   (*f)(lhs, lhs, rhs);
3553 
3554 #ifdef KMP_GOMP_COMPAT
3555   if (__kmp_atomic_mode == 2) {
3556     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3557   } else
3558 #endif /* KMP_GOMP_COMPAT */
3559     __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3560 }
3561 
3562 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3563                       void (*f)(void *, void *, void *)) {
3564   KMP_DEBUG_ASSERT(__kmp_init_serial);
3565 
3566 #ifdef KMP_GOMP_COMPAT
3567   if (__kmp_atomic_mode == 2) {
3568     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3569   } else
3570 #endif /* KMP_GOMP_COMPAT */
3571     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3572 
3573   (*f)(lhs, lhs, rhs);
3574 
3575 #ifdef KMP_GOMP_COMPAT
3576   if (__kmp_atomic_mode == 2) {
3577     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3578   } else
3579 #endif /* KMP_GOMP_COMPAT */
3580     __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3581 }
3582 
3583 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3584                       void (*f)(void *, void *, void *)) {
3585   KMP_DEBUG_ASSERT(__kmp_init_serial);
3586 
3587 #ifdef KMP_GOMP_COMPAT
3588   if (__kmp_atomic_mode == 2) {
3589     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3590   } else
3591 #endif /* KMP_GOMP_COMPAT */
3592     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3593 
3594   (*f)(lhs, lhs, rhs);
3595 
3596 #ifdef KMP_GOMP_COMPAT
3597   if (__kmp_atomic_mode == 2) {
3598     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3599   } else
3600 #endif /* KMP_GOMP_COMPAT */
3601     __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3602 }
3603 
3604 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3605 // compiler; duplicated in order to not use 3-party names in pure Intel code
3606 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3607 void __kmpc_atomic_start(void) {
3608   int gtid = __kmp_entry_gtid();
3609   KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3610   __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3611 }
3612 
3613 void __kmpc_atomic_end(void) {
3614   int gtid = __kmp_get_gtid();
3615   KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3616   __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3617 }
3618 
3619 /*!
3620 @}
3621 */
3622 
3623 // end of file
3624