1 /* 2 * kmp_atomic.cpp -- ATOMIC implementation routines 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "kmp_atomic.h" 14 #include "kmp.h" // TRUE, asm routines prototypes 15 16 typedef unsigned char uchar; 17 typedef unsigned short ushort; 18 19 /*! 20 @defgroup ATOMIC_OPS Atomic Operations 21 These functions are used for implementing the many different varieties of atomic 22 operations. 23 24 The compiler is at liberty to inline atomic operations that are naturally 25 supported by the target architecture. For instance on IA-32 architecture an 26 atomic like this can be inlined 27 @code 28 static int s = 0; 29 #pragma omp atomic 30 s++; 31 @endcode 32 using the single instruction: `lock; incl s` 33 34 However the runtime does provide entrypoints for these operations to support 35 compilers that choose not to inline them. (For instance, 36 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.) 37 38 The names of the functions are encoded by using the data type name and the 39 operation name, as in these tables. 40 41 Data Type | Data type encoding 42 -----------|--------------- 43 int8_t | `fixed1` 44 uint8_t | `fixed1u` 45 int16_t | `fixed2` 46 uint16_t | `fixed2u` 47 int32_t | `fixed4` 48 uint32_t | `fixed4u` 49 int32_t | `fixed8` 50 uint32_t | `fixed8u` 51 float | `float4` 52 double | `float8` 53 float 10 (8087 eighty bit float) | `float10` 54 complex<float> | `cmplx4` 55 complex<double> | `cmplx8` 56 complex<float10> | `cmplx10` 57 <br> 58 59 Operation | Operation encoding 60 ----------|------------------- 61 + | add 62 - | sub 63 \* | mul 64 / | div 65 & | andb 66 << | shl 67 \>\> | shr 68 \| | orb 69 ^ | xor 70 && | andl 71 \|\| | orl 72 maximum | max 73 minimum | min 74 .eqv. | eqv 75 .neqv. | neqv 76 77 <br> 78 For non-commutative operations, `_rev` can also be added for the reversed 79 operation. For the functions that capture the result, the suffix `_cpt` is 80 added. 81 82 Update Functions 83 ================ 84 The general form of an atomic function that just performs an update (without a 85 `capture`) 86 @code 87 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE * 88 lhs, TYPE rhs ); 89 @endcode 90 @param ident_t a pointer to source location 91 @param gtid the global thread id 92 @param lhs a pointer to the left operand 93 @param rhs the right operand 94 95 `capture` functions 96 =================== 97 The capture functions perform an atomic update and return a result, which is 98 either the value before the capture, or that after. They take an additional 99 argument to determine which result is returned. 100 Their general form is therefore 101 @code 102 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE * 103 lhs, TYPE rhs, int flag ); 104 @endcode 105 @param ident_t a pointer to source location 106 @param gtid the global thread id 107 @param lhs a pointer to the left operand 108 @param rhs the right operand 109 @param flag one if the result is to be captured *after* the operation, zero if 110 captured *before*. 111 112 The one set of exceptions to this is the `complex<float>` type where the value 113 is not returned, rather an extra argument pointer is passed. 114 115 They look like 116 @code 117 void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * 118 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); 119 @endcode 120 121 Read and Write Operations 122 ========================= 123 The OpenMP<sup>*</sup> standard now supports atomic operations that simply 124 ensure that the value is read or written atomically, with no modification 125 performed. In many cases on IA-32 architecture these operations can be inlined 126 since the architecture guarantees that no tearing occurs on aligned objects 127 accessed with a single memory operation of up to 64 bits in size. 128 129 The general form of the read operations is 130 @code 131 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc ); 132 @endcode 133 134 For the write operations the form is 135 @code 136 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs 137 ); 138 @endcode 139 140 Full list of functions 141 ====================== 142 This leads to the generation of 376 atomic functions, as follows. 143 144 Functions for integers 145 --------------------- 146 There are versions here for integers of size 1,2,4 and 8 bytes both signed and 147 unsigned (where that matters). 148 @code 149 __kmpc_atomic_fixed1_add 150 __kmpc_atomic_fixed1_add_cpt 151 __kmpc_atomic_fixed1_add_fp 152 __kmpc_atomic_fixed1_andb 153 __kmpc_atomic_fixed1_andb_cpt 154 __kmpc_atomic_fixed1_andl 155 __kmpc_atomic_fixed1_andl_cpt 156 __kmpc_atomic_fixed1_div 157 __kmpc_atomic_fixed1_div_cpt 158 __kmpc_atomic_fixed1_div_cpt_rev 159 __kmpc_atomic_fixed1_div_float8 160 __kmpc_atomic_fixed1_div_fp 161 __kmpc_atomic_fixed1_div_rev 162 __kmpc_atomic_fixed1_eqv 163 __kmpc_atomic_fixed1_eqv_cpt 164 __kmpc_atomic_fixed1_max 165 __kmpc_atomic_fixed1_max_cpt 166 __kmpc_atomic_fixed1_min 167 __kmpc_atomic_fixed1_min_cpt 168 __kmpc_atomic_fixed1_mul 169 __kmpc_atomic_fixed1_mul_cpt 170 __kmpc_atomic_fixed1_mul_float8 171 __kmpc_atomic_fixed1_mul_fp 172 __kmpc_atomic_fixed1_neqv 173 __kmpc_atomic_fixed1_neqv_cpt 174 __kmpc_atomic_fixed1_orb 175 __kmpc_atomic_fixed1_orb_cpt 176 __kmpc_atomic_fixed1_orl 177 __kmpc_atomic_fixed1_orl_cpt 178 __kmpc_atomic_fixed1_rd 179 __kmpc_atomic_fixed1_shl 180 __kmpc_atomic_fixed1_shl_cpt 181 __kmpc_atomic_fixed1_shl_cpt_rev 182 __kmpc_atomic_fixed1_shl_rev 183 __kmpc_atomic_fixed1_shr 184 __kmpc_atomic_fixed1_shr_cpt 185 __kmpc_atomic_fixed1_shr_cpt_rev 186 __kmpc_atomic_fixed1_shr_rev 187 __kmpc_atomic_fixed1_sub 188 __kmpc_atomic_fixed1_sub_cpt 189 __kmpc_atomic_fixed1_sub_cpt_rev 190 __kmpc_atomic_fixed1_sub_fp 191 __kmpc_atomic_fixed1_sub_rev 192 __kmpc_atomic_fixed1_swp 193 __kmpc_atomic_fixed1_wr 194 __kmpc_atomic_fixed1_xor 195 __kmpc_atomic_fixed1_xor_cpt 196 __kmpc_atomic_fixed1u_add_fp 197 __kmpc_atomic_fixed1u_sub_fp 198 __kmpc_atomic_fixed1u_mul_fp 199 __kmpc_atomic_fixed1u_div 200 __kmpc_atomic_fixed1u_div_cpt 201 __kmpc_atomic_fixed1u_div_cpt_rev 202 __kmpc_atomic_fixed1u_div_fp 203 __kmpc_atomic_fixed1u_div_rev 204 __kmpc_atomic_fixed1u_shr 205 __kmpc_atomic_fixed1u_shr_cpt 206 __kmpc_atomic_fixed1u_shr_cpt_rev 207 __kmpc_atomic_fixed1u_shr_rev 208 __kmpc_atomic_fixed2_add 209 __kmpc_atomic_fixed2_add_cpt 210 __kmpc_atomic_fixed2_add_fp 211 __kmpc_atomic_fixed2_andb 212 __kmpc_atomic_fixed2_andb_cpt 213 __kmpc_atomic_fixed2_andl 214 __kmpc_atomic_fixed2_andl_cpt 215 __kmpc_atomic_fixed2_div 216 __kmpc_atomic_fixed2_div_cpt 217 __kmpc_atomic_fixed2_div_cpt_rev 218 __kmpc_atomic_fixed2_div_float8 219 __kmpc_atomic_fixed2_div_fp 220 __kmpc_atomic_fixed2_div_rev 221 __kmpc_atomic_fixed2_eqv 222 __kmpc_atomic_fixed2_eqv_cpt 223 __kmpc_atomic_fixed2_max 224 __kmpc_atomic_fixed2_max_cpt 225 __kmpc_atomic_fixed2_min 226 __kmpc_atomic_fixed2_min_cpt 227 __kmpc_atomic_fixed2_mul 228 __kmpc_atomic_fixed2_mul_cpt 229 __kmpc_atomic_fixed2_mul_float8 230 __kmpc_atomic_fixed2_mul_fp 231 __kmpc_atomic_fixed2_neqv 232 __kmpc_atomic_fixed2_neqv_cpt 233 __kmpc_atomic_fixed2_orb 234 __kmpc_atomic_fixed2_orb_cpt 235 __kmpc_atomic_fixed2_orl 236 __kmpc_atomic_fixed2_orl_cpt 237 __kmpc_atomic_fixed2_rd 238 __kmpc_atomic_fixed2_shl 239 __kmpc_atomic_fixed2_shl_cpt 240 __kmpc_atomic_fixed2_shl_cpt_rev 241 __kmpc_atomic_fixed2_shl_rev 242 __kmpc_atomic_fixed2_shr 243 __kmpc_atomic_fixed2_shr_cpt 244 __kmpc_atomic_fixed2_shr_cpt_rev 245 __kmpc_atomic_fixed2_shr_rev 246 __kmpc_atomic_fixed2_sub 247 __kmpc_atomic_fixed2_sub_cpt 248 __kmpc_atomic_fixed2_sub_cpt_rev 249 __kmpc_atomic_fixed2_sub_fp 250 __kmpc_atomic_fixed2_sub_rev 251 __kmpc_atomic_fixed2_swp 252 __kmpc_atomic_fixed2_wr 253 __kmpc_atomic_fixed2_xor 254 __kmpc_atomic_fixed2_xor_cpt 255 __kmpc_atomic_fixed2u_add_fp 256 __kmpc_atomic_fixed2u_sub_fp 257 __kmpc_atomic_fixed2u_mul_fp 258 __kmpc_atomic_fixed2u_div 259 __kmpc_atomic_fixed2u_div_cpt 260 __kmpc_atomic_fixed2u_div_cpt_rev 261 __kmpc_atomic_fixed2u_div_fp 262 __kmpc_atomic_fixed2u_div_rev 263 __kmpc_atomic_fixed2u_shr 264 __kmpc_atomic_fixed2u_shr_cpt 265 __kmpc_atomic_fixed2u_shr_cpt_rev 266 __kmpc_atomic_fixed2u_shr_rev 267 __kmpc_atomic_fixed4_add 268 __kmpc_atomic_fixed4_add_cpt 269 __kmpc_atomic_fixed4_add_fp 270 __kmpc_atomic_fixed4_andb 271 __kmpc_atomic_fixed4_andb_cpt 272 __kmpc_atomic_fixed4_andl 273 __kmpc_atomic_fixed4_andl_cpt 274 __kmpc_atomic_fixed4_div 275 __kmpc_atomic_fixed4_div_cpt 276 __kmpc_atomic_fixed4_div_cpt_rev 277 __kmpc_atomic_fixed4_div_float8 278 __kmpc_atomic_fixed4_div_fp 279 __kmpc_atomic_fixed4_div_rev 280 __kmpc_atomic_fixed4_eqv 281 __kmpc_atomic_fixed4_eqv_cpt 282 __kmpc_atomic_fixed4_max 283 __kmpc_atomic_fixed4_max_cpt 284 __kmpc_atomic_fixed4_min 285 __kmpc_atomic_fixed4_min_cpt 286 __kmpc_atomic_fixed4_mul 287 __kmpc_atomic_fixed4_mul_cpt 288 __kmpc_atomic_fixed4_mul_float8 289 __kmpc_atomic_fixed4_mul_fp 290 __kmpc_atomic_fixed4_neqv 291 __kmpc_atomic_fixed4_neqv_cpt 292 __kmpc_atomic_fixed4_orb 293 __kmpc_atomic_fixed4_orb_cpt 294 __kmpc_atomic_fixed4_orl 295 __kmpc_atomic_fixed4_orl_cpt 296 __kmpc_atomic_fixed4_rd 297 __kmpc_atomic_fixed4_shl 298 __kmpc_atomic_fixed4_shl_cpt 299 __kmpc_atomic_fixed4_shl_cpt_rev 300 __kmpc_atomic_fixed4_shl_rev 301 __kmpc_atomic_fixed4_shr 302 __kmpc_atomic_fixed4_shr_cpt 303 __kmpc_atomic_fixed4_shr_cpt_rev 304 __kmpc_atomic_fixed4_shr_rev 305 __kmpc_atomic_fixed4_sub 306 __kmpc_atomic_fixed4_sub_cpt 307 __kmpc_atomic_fixed4_sub_cpt_rev 308 __kmpc_atomic_fixed4_sub_fp 309 __kmpc_atomic_fixed4_sub_rev 310 __kmpc_atomic_fixed4_swp 311 __kmpc_atomic_fixed4_wr 312 __kmpc_atomic_fixed4_xor 313 __kmpc_atomic_fixed4_xor_cpt 314 __kmpc_atomic_fixed4u_add_fp 315 __kmpc_atomic_fixed4u_sub_fp 316 __kmpc_atomic_fixed4u_mul_fp 317 __kmpc_atomic_fixed4u_div 318 __kmpc_atomic_fixed4u_div_cpt 319 __kmpc_atomic_fixed4u_div_cpt_rev 320 __kmpc_atomic_fixed4u_div_fp 321 __kmpc_atomic_fixed4u_div_rev 322 __kmpc_atomic_fixed4u_shr 323 __kmpc_atomic_fixed4u_shr_cpt 324 __kmpc_atomic_fixed4u_shr_cpt_rev 325 __kmpc_atomic_fixed4u_shr_rev 326 __kmpc_atomic_fixed8_add 327 __kmpc_atomic_fixed8_add_cpt 328 __kmpc_atomic_fixed8_add_fp 329 __kmpc_atomic_fixed8_andb 330 __kmpc_atomic_fixed8_andb_cpt 331 __kmpc_atomic_fixed8_andl 332 __kmpc_atomic_fixed8_andl_cpt 333 __kmpc_atomic_fixed8_div 334 __kmpc_atomic_fixed8_div_cpt 335 __kmpc_atomic_fixed8_div_cpt_rev 336 __kmpc_atomic_fixed8_div_float8 337 __kmpc_atomic_fixed8_div_fp 338 __kmpc_atomic_fixed8_div_rev 339 __kmpc_atomic_fixed8_eqv 340 __kmpc_atomic_fixed8_eqv_cpt 341 __kmpc_atomic_fixed8_max 342 __kmpc_atomic_fixed8_max_cpt 343 __kmpc_atomic_fixed8_min 344 __kmpc_atomic_fixed8_min_cpt 345 __kmpc_atomic_fixed8_mul 346 __kmpc_atomic_fixed8_mul_cpt 347 __kmpc_atomic_fixed8_mul_float8 348 __kmpc_atomic_fixed8_mul_fp 349 __kmpc_atomic_fixed8_neqv 350 __kmpc_atomic_fixed8_neqv_cpt 351 __kmpc_atomic_fixed8_orb 352 __kmpc_atomic_fixed8_orb_cpt 353 __kmpc_atomic_fixed8_orl 354 __kmpc_atomic_fixed8_orl_cpt 355 __kmpc_atomic_fixed8_rd 356 __kmpc_atomic_fixed8_shl 357 __kmpc_atomic_fixed8_shl_cpt 358 __kmpc_atomic_fixed8_shl_cpt_rev 359 __kmpc_atomic_fixed8_shl_rev 360 __kmpc_atomic_fixed8_shr 361 __kmpc_atomic_fixed8_shr_cpt 362 __kmpc_atomic_fixed8_shr_cpt_rev 363 __kmpc_atomic_fixed8_shr_rev 364 __kmpc_atomic_fixed8_sub 365 __kmpc_atomic_fixed8_sub_cpt 366 __kmpc_atomic_fixed8_sub_cpt_rev 367 __kmpc_atomic_fixed8_sub_fp 368 __kmpc_atomic_fixed8_sub_rev 369 __kmpc_atomic_fixed8_swp 370 __kmpc_atomic_fixed8_wr 371 __kmpc_atomic_fixed8_xor 372 __kmpc_atomic_fixed8_xor_cpt 373 __kmpc_atomic_fixed8u_add_fp 374 __kmpc_atomic_fixed8u_sub_fp 375 __kmpc_atomic_fixed8u_mul_fp 376 __kmpc_atomic_fixed8u_div 377 __kmpc_atomic_fixed8u_div_cpt 378 __kmpc_atomic_fixed8u_div_cpt_rev 379 __kmpc_atomic_fixed8u_div_fp 380 __kmpc_atomic_fixed8u_div_rev 381 __kmpc_atomic_fixed8u_shr 382 __kmpc_atomic_fixed8u_shr_cpt 383 __kmpc_atomic_fixed8u_shr_cpt_rev 384 __kmpc_atomic_fixed8u_shr_rev 385 @endcode 386 387 Functions for floating point 388 ---------------------------- 389 There are versions here for floating point numbers of size 4, 8, 10 and 16 390 bytes. (Ten byte floats are used by X87, but are now rare). 391 @code 392 __kmpc_atomic_float4_add 393 __kmpc_atomic_float4_add_cpt 394 __kmpc_atomic_float4_add_float8 395 __kmpc_atomic_float4_add_fp 396 __kmpc_atomic_float4_div 397 __kmpc_atomic_float4_div_cpt 398 __kmpc_atomic_float4_div_cpt_rev 399 __kmpc_atomic_float4_div_float8 400 __kmpc_atomic_float4_div_fp 401 __kmpc_atomic_float4_div_rev 402 __kmpc_atomic_float4_max 403 __kmpc_atomic_float4_max_cpt 404 __kmpc_atomic_float4_min 405 __kmpc_atomic_float4_min_cpt 406 __kmpc_atomic_float4_mul 407 __kmpc_atomic_float4_mul_cpt 408 __kmpc_atomic_float4_mul_float8 409 __kmpc_atomic_float4_mul_fp 410 __kmpc_atomic_float4_rd 411 __kmpc_atomic_float4_sub 412 __kmpc_atomic_float4_sub_cpt 413 __kmpc_atomic_float4_sub_cpt_rev 414 __kmpc_atomic_float4_sub_float8 415 __kmpc_atomic_float4_sub_fp 416 __kmpc_atomic_float4_sub_rev 417 __kmpc_atomic_float4_swp 418 __kmpc_atomic_float4_wr 419 __kmpc_atomic_float8_add 420 __kmpc_atomic_float8_add_cpt 421 __kmpc_atomic_float8_add_fp 422 __kmpc_atomic_float8_div 423 __kmpc_atomic_float8_div_cpt 424 __kmpc_atomic_float8_div_cpt_rev 425 __kmpc_atomic_float8_div_fp 426 __kmpc_atomic_float8_div_rev 427 __kmpc_atomic_float8_max 428 __kmpc_atomic_float8_max_cpt 429 __kmpc_atomic_float8_min 430 __kmpc_atomic_float8_min_cpt 431 __kmpc_atomic_float8_mul 432 __kmpc_atomic_float8_mul_cpt 433 __kmpc_atomic_float8_mul_fp 434 __kmpc_atomic_float8_rd 435 __kmpc_atomic_float8_sub 436 __kmpc_atomic_float8_sub_cpt 437 __kmpc_atomic_float8_sub_cpt_rev 438 __kmpc_atomic_float8_sub_fp 439 __kmpc_atomic_float8_sub_rev 440 __kmpc_atomic_float8_swp 441 __kmpc_atomic_float8_wr 442 __kmpc_atomic_float10_add 443 __kmpc_atomic_float10_add_cpt 444 __kmpc_atomic_float10_add_fp 445 __kmpc_atomic_float10_div 446 __kmpc_atomic_float10_div_cpt 447 __kmpc_atomic_float10_div_cpt_rev 448 __kmpc_atomic_float10_div_fp 449 __kmpc_atomic_float10_div_rev 450 __kmpc_atomic_float10_mul 451 __kmpc_atomic_float10_mul_cpt 452 __kmpc_atomic_float10_mul_fp 453 __kmpc_atomic_float10_rd 454 __kmpc_atomic_float10_sub 455 __kmpc_atomic_float10_sub_cpt 456 __kmpc_atomic_float10_sub_cpt_rev 457 __kmpc_atomic_float10_sub_fp 458 __kmpc_atomic_float10_sub_rev 459 __kmpc_atomic_float10_swp 460 __kmpc_atomic_float10_wr 461 __kmpc_atomic_float16_add 462 __kmpc_atomic_float16_add_cpt 463 __kmpc_atomic_float16_div 464 __kmpc_atomic_float16_div_cpt 465 __kmpc_atomic_float16_div_cpt_rev 466 __kmpc_atomic_float16_div_rev 467 __kmpc_atomic_float16_max 468 __kmpc_atomic_float16_max_cpt 469 __kmpc_atomic_float16_min 470 __kmpc_atomic_float16_min_cpt 471 __kmpc_atomic_float16_mul 472 __kmpc_atomic_float16_mul_cpt 473 __kmpc_atomic_float16_rd 474 __kmpc_atomic_float16_sub 475 __kmpc_atomic_float16_sub_cpt 476 __kmpc_atomic_float16_sub_cpt_rev 477 __kmpc_atomic_float16_sub_rev 478 __kmpc_atomic_float16_swp 479 __kmpc_atomic_float16_wr 480 @endcode 481 482 Functions for Complex types 483 --------------------------- 484 Functions for complex types whose component floating point variables are of size 485 4,8,10 or 16 bytes. The names here are based on the size of the component float, 486 *not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an 487 operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`. 488 489 @code 490 __kmpc_atomic_cmplx4_add 491 __kmpc_atomic_cmplx4_add_cmplx8 492 __kmpc_atomic_cmplx4_add_cpt 493 __kmpc_atomic_cmplx4_div 494 __kmpc_atomic_cmplx4_div_cmplx8 495 __kmpc_atomic_cmplx4_div_cpt 496 __kmpc_atomic_cmplx4_div_cpt_rev 497 __kmpc_atomic_cmplx4_div_rev 498 __kmpc_atomic_cmplx4_mul 499 __kmpc_atomic_cmplx4_mul_cmplx8 500 __kmpc_atomic_cmplx4_mul_cpt 501 __kmpc_atomic_cmplx4_rd 502 __kmpc_atomic_cmplx4_sub 503 __kmpc_atomic_cmplx4_sub_cmplx8 504 __kmpc_atomic_cmplx4_sub_cpt 505 __kmpc_atomic_cmplx4_sub_cpt_rev 506 __kmpc_atomic_cmplx4_sub_rev 507 __kmpc_atomic_cmplx4_swp 508 __kmpc_atomic_cmplx4_wr 509 __kmpc_atomic_cmplx8_add 510 __kmpc_atomic_cmplx8_add_cpt 511 __kmpc_atomic_cmplx8_div 512 __kmpc_atomic_cmplx8_div_cpt 513 __kmpc_atomic_cmplx8_div_cpt_rev 514 __kmpc_atomic_cmplx8_div_rev 515 __kmpc_atomic_cmplx8_mul 516 __kmpc_atomic_cmplx8_mul_cpt 517 __kmpc_atomic_cmplx8_rd 518 __kmpc_atomic_cmplx8_sub 519 __kmpc_atomic_cmplx8_sub_cpt 520 __kmpc_atomic_cmplx8_sub_cpt_rev 521 __kmpc_atomic_cmplx8_sub_rev 522 __kmpc_atomic_cmplx8_swp 523 __kmpc_atomic_cmplx8_wr 524 __kmpc_atomic_cmplx10_add 525 __kmpc_atomic_cmplx10_add_cpt 526 __kmpc_atomic_cmplx10_div 527 __kmpc_atomic_cmplx10_div_cpt 528 __kmpc_atomic_cmplx10_div_cpt_rev 529 __kmpc_atomic_cmplx10_div_rev 530 __kmpc_atomic_cmplx10_mul 531 __kmpc_atomic_cmplx10_mul_cpt 532 __kmpc_atomic_cmplx10_rd 533 __kmpc_atomic_cmplx10_sub 534 __kmpc_atomic_cmplx10_sub_cpt 535 __kmpc_atomic_cmplx10_sub_cpt_rev 536 __kmpc_atomic_cmplx10_sub_rev 537 __kmpc_atomic_cmplx10_swp 538 __kmpc_atomic_cmplx10_wr 539 __kmpc_atomic_cmplx16_add 540 __kmpc_atomic_cmplx16_add_cpt 541 __kmpc_atomic_cmplx16_div 542 __kmpc_atomic_cmplx16_div_cpt 543 __kmpc_atomic_cmplx16_div_cpt_rev 544 __kmpc_atomic_cmplx16_div_rev 545 __kmpc_atomic_cmplx16_mul 546 __kmpc_atomic_cmplx16_mul_cpt 547 __kmpc_atomic_cmplx16_rd 548 __kmpc_atomic_cmplx16_sub 549 __kmpc_atomic_cmplx16_sub_cpt 550 __kmpc_atomic_cmplx16_sub_cpt_rev 551 __kmpc_atomic_cmplx16_swp 552 __kmpc_atomic_cmplx16_wr 553 @endcode 554 */ 555 556 /*! 557 @ingroup ATOMIC_OPS 558 @{ 559 */ 560 561 /* 562 * Global vars 563 */ 564 565 #ifndef KMP_GOMP_COMPAT 566 int __kmp_atomic_mode = 1; // Intel perf 567 #else 568 int __kmp_atomic_mode = 2; // GOMP compatibility 569 #endif /* KMP_GOMP_COMPAT */ 570 571 KMP_ALIGN(128) 572 573 // Control access to all user coded atomics in Gnu compat mode 574 kmp_atomic_lock_t __kmp_atomic_lock; 575 // Control access to all user coded atomics for 1-byte fixed data types 576 kmp_atomic_lock_t __kmp_atomic_lock_1i; 577 // Control access to all user coded atomics for 2-byte fixed data types 578 kmp_atomic_lock_t __kmp_atomic_lock_2i; 579 // Control access to all user coded atomics for 4-byte fixed data types 580 kmp_atomic_lock_t __kmp_atomic_lock_4i; 581 // Control access to all user coded atomics for kmp_real32 data type 582 kmp_atomic_lock_t __kmp_atomic_lock_4r; 583 // Control access to all user coded atomics for 8-byte fixed data types 584 kmp_atomic_lock_t __kmp_atomic_lock_8i; 585 // Control access to all user coded atomics for kmp_real64 data type 586 kmp_atomic_lock_t __kmp_atomic_lock_8r; 587 // Control access to all user coded atomics for complex byte data type 588 kmp_atomic_lock_t __kmp_atomic_lock_8c; 589 // Control access to all user coded atomics for long double data type 590 kmp_atomic_lock_t __kmp_atomic_lock_10r; 591 // Control access to all user coded atomics for _Quad data type 592 kmp_atomic_lock_t __kmp_atomic_lock_16r; 593 // Control access to all user coded atomics for double complex data type 594 kmp_atomic_lock_t __kmp_atomic_lock_16c; 595 // Control access to all user coded atomics for long double complex type 596 kmp_atomic_lock_t __kmp_atomic_lock_20c; 597 // Control access to all user coded atomics for _Quad complex data type 598 kmp_atomic_lock_t __kmp_atomic_lock_32c; 599 600 /* 2007-03-02: 601 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug 602 on *_32 and *_32e. This is just a temporary workaround for the problem. It 603 seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines 604 in assembler language. */ 605 #define KMP_ATOMIC_VOLATILE volatile 606 607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD 608 609 static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) { 610 return lhs.q + rhs.q; 611 } 612 static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) { 613 return lhs.q - rhs.q; 614 } 615 static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) { 616 return lhs.q * rhs.q; 617 } 618 static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) { 619 return lhs.q / rhs.q; 620 } 621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) { 622 return lhs.q < rhs.q; 623 } 624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) { 625 return lhs.q > rhs.q; 626 } 627 628 static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) { 629 return lhs.q + rhs.q; 630 } 631 static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) { 632 return lhs.q - rhs.q; 633 } 634 static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) { 635 return lhs.q * rhs.q; 636 } 637 static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) { 638 return lhs.q / rhs.q; 639 } 640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) { 641 return lhs.q < rhs.q; 642 } 643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) { 644 return lhs.q > rhs.q; 645 } 646 647 static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs, 648 kmp_cmplx128_a4_t &rhs) { 649 return lhs.q + rhs.q; 650 } 651 static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs, 652 kmp_cmplx128_a4_t &rhs) { 653 return lhs.q - rhs.q; 654 } 655 static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs, 656 kmp_cmplx128_a4_t &rhs) { 657 return lhs.q * rhs.q; 658 } 659 static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs, 660 kmp_cmplx128_a4_t &rhs) { 661 return lhs.q / rhs.q; 662 } 663 664 static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs, 665 kmp_cmplx128_a16_t &rhs) { 666 return lhs.q + rhs.q; 667 } 668 static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs, 669 kmp_cmplx128_a16_t &rhs) { 670 return lhs.q - rhs.q; 671 } 672 static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs, 673 kmp_cmplx128_a16_t &rhs) { 674 return lhs.q * rhs.q; 675 } 676 static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs, 677 kmp_cmplx128_a16_t &rhs) { 678 return lhs.q / rhs.q; 679 } 680 681 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD 682 683 // ATOMIC implementation routines ----------------------------------------- 684 // One routine for each operation and operand type. 685 // All routines declarations looks like 686 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs ); 687 688 #define KMP_CHECK_GTID \ 689 if (gtid == KMP_GTID_UNKNOWN) { \ 690 gtid = __kmp_entry_gtid(); \ 691 } // check and get gtid when needed 692 693 // Beginning of a definition (provides name, parameters, gebug trace) 694 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 695 // fixed) 696 // OP_ID - operation identifier (add, sub, mul, ...) 697 // TYPE - operands' type 698 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 699 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 700 TYPE *lhs, TYPE rhs) { \ 701 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 702 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 703 704 // ------------------------------------------------------------------------ 705 // Lock variables used for critical sections for various size operands 706 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat 707 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char 708 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short 709 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int 710 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float 711 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int 712 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double 713 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex 714 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double 715 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad 716 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex 717 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex 718 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex 719 720 // ------------------------------------------------------------------------ 721 // Operation on *lhs, rhs bound by critical section 722 // OP - operator (it's supposed to contain an assignment) 723 // LCK_ID - lock identifier 724 // Note: don't check gtid as it should always be valid 725 // 1, 2-byte - expect valid parameter, other - check before this macro 726 #define OP_CRITICAL(OP, LCK_ID) \ 727 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 728 \ 729 (*lhs) OP(rhs); \ 730 \ 731 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 732 733 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \ 734 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 735 (*lhs) = (TYPE)((*lhs)OP((TYPE)rhs)); \ 736 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 737 738 // ------------------------------------------------------------------------ 739 // For GNU compatibility, we may need to use a critical section, 740 // even though it is not required by the ISA. 741 // 742 // On IA-32 architecture, all atomic operations except for fixed 4 byte add, 743 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common 744 // critical section. On Intel(R) 64, all atomic operations are done with fetch 745 // and add or compare and exchange. Therefore, the FLAG parameter to this 746 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which 747 // require a critical section, where we predict that they will be implemented 748 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()). 749 // 750 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct, 751 // the FLAG parameter should always be 1. If we know that we will be using 752 // a critical section, then we want to make certain that we use the generic 753 // lock __kmp_atomic_lock to protect the atomic update, and not of of the 754 // locks that are specialized based upon the size or type of the data. 755 // 756 // If FLAG is 0, then we are relying on dead code elimination by the build 757 // compiler to get rid of the useless block of code, and save a needless 758 // branch at runtime. 759 760 #ifdef KMP_GOMP_COMPAT 761 #define OP_GOMP_CRITICAL(OP, FLAG) \ 762 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 763 KMP_CHECK_GTID; \ 764 OP_CRITICAL(OP, 0); \ 765 return; \ 766 } 767 768 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \ 769 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 770 KMP_CHECK_GTID; \ 771 OP_UPDATE_CRITICAL(TYPE, OP, 0); \ 772 return; \ 773 } 774 #else 775 #define OP_GOMP_CRITICAL(OP, FLAG) 776 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) 777 #endif /* KMP_GOMP_COMPAT */ 778 779 #if KMP_MIC 780 #define KMP_DO_PAUSE _mm_delay_32(1) 781 #else 782 #define KMP_DO_PAUSE 783 #endif /* KMP_MIC */ 784 785 // ------------------------------------------------------------------------ 786 // Operation on *lhs, rhs using "compare_and_store" routine 787 // TYPE - operands' type 788 // BITS - size in bits, used to distinguish low level calls 789 // OP - operator 790 #define OP_CMPXCHG(TYPE, BITS, OP) \ 791 { \ 792 TYPE old_value, new_value; \ 793 old_value = *(TYPE volatile *)lhs; \ 794 new_value = (TYPE)(old_value OP((TYPE)rhs)); \ 795 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 796 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 797 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 798 KMP_DO_PAUSE; \ 799 \ 800 old_value = *(TYPE volatile *)lhs; \ 801 new_value = (TYPE)(old_value OP((TYPE)rhs)); \ 802 } \ 803 } 804 805 #if USE_CMPXCHG_FIX 806 // 2007-06-25: 807 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32 808 // and win_32e are affected (I verified the asm). Compiler ignores the volatile 809 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the 810 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of 811 // the workaround. 812 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 813 { \ 814 struct _sss { \ 815 TYPE cmp; \ 816 kmp_int##BITS *vvv; \ 817 }; \ 818 struct _sss old_value, new_value; \ 819 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \ 820 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \ 821 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 822 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \ 823 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 824 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ 825 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \ 826 KMP_DO_PAUSE; \ 827 \ 828 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 829 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \ 830 } \ 831 } 832 // end of the first part of the workaround for C78287 833 #endif // USE_CMPXCHG_FIX 834 835 #if KMP_OS_WINDOWS && KMP_ARCH_AARCH64 836 // Undo explicit type casts to get MSVC ARM64 to build. Uses 837 // OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG 838 #undef OP_CMPXCHG 839 #define OP_CMPXCHG(TYPE, BITS, OP) \ 840 { \ 841 struct _sss { \ 842 TYPE cmp; \ 843 kmp_int##BITS *vvv; \ 844 }; \ 845 struct _sss old_value, new_value; \ 846 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \ 847 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \ 848 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 849 new_value.cmp = old_value.cmp OP rhs; \ 850 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 851 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ 852 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \ 853 KMP_DO_PAUSE; \ 854 \ 855 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 856 new_value.cmp = old_value.cmp OP rhs; \ 857 } \ 858 } 859 860 #undef OP_UPDATE_CRITICAL 861 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \ 862 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 863 (*lhs) = (*lhs)OP rhs; \ 864 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 865 866 #endif // KMP_OS_WINDOWS && KMP_ARCH_AARCH64 867 868 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 869 870 // ------------------------------------------------------------------------ 871 // X86 or X86_64: no alignment problems ==================================== 872 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 873 GOMP_FLAG) \ 874 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 875 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 876 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 877 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 878 } 879 // ------------------------------------------------------------------------- 880 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 881 GOMP_FLAG) \ 882 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 883 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 884 OP_CMPXCHG(TYPE, BITS, OP) \ 885 } 886 #if USE_CMPXCHG_FIX 887 // ------------------------------------------------------------------------- 888 // workaround for C78287 (complex(kind=4) data type) 889 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 890 MASK, GOMP_FLAG) \ 891 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 892 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 893 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 894 } 895 // end of the second part of the workaround for C78287 896 #endif // USE_CMPXCHG_FIX 897 898 #else 899 // ------------------------------------------------------------------------- 900 // Code for other architectures that don't handle unaligned accesses. 901 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 902 GOMP_FLAG) \ 903 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 904 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 905 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 906 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 907 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 908 } else { \ 909 KMP_CHECK_GTID; \ 910 OP_UPDATE_CRITICAL(TYPE, OP, \ 911 LCK_ID) /* unaligned address - use critical */ \ 912 } \ 913 } 914 // ------------------------------------------------------------------------- 915 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 916 GOMP_FLAG) \ 917 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 918 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 919 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 920 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 921 } else { \ 922 KMP_CHECK_GTID; \ 923 OP_UPDATE_CRITICAL(TYPE, OP, \ 924 LCK_ID) /* unaligned address - use critical */ \ 925 } \ 926 } 927 #if USE_CMPXCHG_FIX 928 // ------------------------------------------------------------------------- 929 // workaround for C78287 (complex(kind=4) data type) 930 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 931 MASK, GOMP_FLAG) \ 932 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 933 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 934 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 935 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 936 } else { \ 937 KMP_CHECK_GTID; \ 938 OP_UPDATE_CRITICAL(TYPE, OP, \ 939 LCK_ID) /* unaligned address - use critical */ \ 940 } \ 941 } 942 // end of the second part of the workaround for C78287 943 #endif // USE_CMPXCHG_FIX 944 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 945 946 // Routines for ATOMIC 4-byte operands addition and subtraction 947 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3, 948 0) // __kmpc_atomic_fixed4_add 949 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3, 950 0) // __kmpc_atomic_fixed4_sub 951 952 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3, 953 KMP_ARCH_X86) // __kmpc_atomic_float4_add 954 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3, 955 KMP_ARCH_X86) // __kmpc_atomic_float4_sub 956 957 // Routines for ATOMIC 8-byte operands addition and subtraction 958 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7, 959 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add 960 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7, 961 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub 962 963 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7, 964 KMP_ARCH_X86) // __kmpc_atomic_float8_add 965 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7, 966 KMP_ARCH_X86) // __kmpc_atomic_float8_sub 967 968 // ------------------------------------------------------------------------ 969 // Entries definition for integer operands 970 // TYPE_ID - operands type and size (fixed4, float4) 971 // OP_ID - operation identifier (add, sub, mul, ...) 972 // TYPE - operand type 973 // BITS - size in bits, used to distinguish low level calls 974 // OP - operator (used in critical section) 975 // LCK_ID - lock identifier, used to possibly distinguish lock variable 976 // MASK - used for alignment check 977 978 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG 979 // ------------------------------------------------------------------------ 980 // Routines for ATOMIC integer operands, other operators 981 // ------------------------------------------------------------------------ 982 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 983 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0, 984 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add 985 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0, 986 0) // __kmpc_atomic_fixed1_andb 987 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0, 988 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div 989 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0, 990 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div 991 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0, 992 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul 993 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0, 994 0) // __kmpc_atomic_fixed1_orb 995 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0, 996 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl 997 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0, 998 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr 999 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, 1000 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr 1001 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0, 1002 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub 1003 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0, 1004 0) // __kmpc_atomic_fixed1_xor 1005 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1, 1006 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add 1007 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1, 1008 0) // __kmpc_atomic_fixed2_andb 1009 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1, 1010 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div 1011 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1, 1012 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div 1013 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1, 1014 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul 1015 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1, 1016 0) // __kmpc_atomic_fixed2_orb 1017 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1, 1018 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl 1019 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1, 1020 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr 1021 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, 1022 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr 1023 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1, 1024 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub 1025 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1, 1026 0) // __kmpc_atomic_fixed2_xor 1027 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3, 1028 0) // __kmpc_atomic_fixed4_andb 1029 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3, 1030 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div 1031 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3, 1032 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div 1033 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3, 1034 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul 1035 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3, 1036 0) // __kmpc_atomic_fixed4_orb 1037 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3, 1038 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl 1039 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3, 1040 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr 1041 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, 1042 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr 1043 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3, 1044 0) // __kmpc_atomic_fixed4_xor 1045 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7, 1046 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb 1047 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7, 1048 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div 1049 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7, 1050 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div 1051 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7, 1052 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul 1053 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7, 1054 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb 1055 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7, 1056 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl 1057 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7, 1058 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr 1059 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, 1060 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr 1061 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7, 1062 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor 1063 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3, 1064 KMP_ARCH_X86) // __kmpc_atomic_float4_div 1065 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3, 1066 KMP_ARCH_X86) // __kmpc_atomic_float4_mul 1067 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7, 1068 KMP_ARCH_X86) // __kmpc_atomic_float8_div 1069 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7, 1070 KMP_ARCH_X86) // __kmpc_atomic_float8_mul 1071 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 1072 1073 /* ------------------------------------------------------------------------ */ 1074 /* Routines for C/C++ Reduction operators && and || */ 1075 1076 // ------------------------------------------------------------------------ 1077 // Need separate macros for &&, || because there is no combined assignment 1078 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used 1079 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1080 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1081 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1082 OP_CRITICAL(= *lhs OP, LCK_ID) \ 1083 } 1084 1085 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1086 1087 // ------------------------------------------------------------------------ 1088 // X86 or X86_64: no alignment problems =================================== 1089 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1090 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1091 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1092 OP_CMPXCHG(TYPE, BITS, OP) \ 1093 } 1094 1095 #else 1096 // ------------------------------------------------------------------------ 1097 // Code for other architectures that don't handle unaligned accesses. 1098 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1099 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1100 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1101 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1102 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1103 } else { \ 1104 KMP_CHECK_GTID; \ 1105 OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \ 1106 } \ 1107 } 1108 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1109 1110 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0, 1111 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl 1112 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0, 1113 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl 1114 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1, 1115 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl 1116 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1, 1117 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl 1118 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3, 1119 0) // __kmpc_atomic_fixed4_andl 1120 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3, 1121 0) // __kmpc_atomic_fixed4_orl 1122 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7, 1123 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl 1124 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7, 1125 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl 1126 1127 /* ------------------------------------------------------------------------- */ 1128 /* Routines for Fortran operators that matched no one in C: */ 1129 /* MAX, MIN, .EQV., .NEQV. */ 1130 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */ 1131 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */ 1132 1133 // ------------------------------------------------------------------------- 1134 // MIN and MAX need separate macros 1135 // OP - operator to check if we need any actions? 1136 #define MIN_MAX_CRITSECT(OP, LCK_ID) \ 1137 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1138 \ 1139 if (*lhs OP rhs) { /* still need actions? */ \ 1140 *lhs = rhs; \ 1141 } \ 1142 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1143 1144 // ------------------------------------------------------------------------- 1145 #ifdef KMP_GOMP_COMPAT 1146 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \ 1147 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1148 KMP_CHECK_GTID; \ 1149 MIN_MAX_CRITSECT(OP, 0); \ 1150 return; \ 1151 } 1152 #else 1153 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) 1154 #endif /* KMP_GOMP_COMPAT */ 1155 1156 // ------------------------------------------------------------------------- 1157 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1158 { \ 1159 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1160 TYPE old_value; \ 1161 temp_val = *lhs; \ 1162 old_value = temp_val; \ 1163 while (old_value OP rhs && /* still need actions? */ \ 1164 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1165 (kmp_int##BITS *)lhs, \ 1166 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1167 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 1168 temp_val = *lhs; \ 1169 old_value = temp_val; \ 1170 } \ 1171 } 1172 1173 // ------------------------------------------------------------------------- 1174 // 1-byte, 2-byte operands - use critical section 1175 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1176 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1177 if (*lhs OP rhs) { /* need actions? */ \ 1178 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1179 MIN_MAX_CRITSECT(OP, LCK_ID) \ 1180 } \ 1181 } 1182 1183 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1184 1185 // ------------------------------------------------------------------------- 1186 // X86 or X86_64: no alignment problems ==================================== 1187 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1188 GOMP_FLAG) \ 1189 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1190 if (*lhs OP rhs) { \ 1191 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1192 MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1193 } \ 1194 } 1195 1196 #else 1197 // ------------------------------------------------------------------------- 1198 // Code for other architectures that don't handle unaligned accesses. 1199 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1200 GOMP_FLAG) \ 1201 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1202 if (*lhs OP rhs) { \ 1203 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1204 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1205 MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1206 } else { \ 1207 KMP_CHECK_GTID; \ 1208 MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \ 1209 } \ 1210 } \ 1211 } 1212 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1213 1214 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0, 1215 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max 1216 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0, 1217 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min 1218 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1, 1219 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max 1220 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1, 1221 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min 1222 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3, 1223 0) // __kmpc_atomic_fixed4_max 1224 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3, 1225 0) // __kmpc_atomic_fixed4_min 1226 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7, 1227 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max 1228 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7, 1229 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min 1230 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3, 1231 KMP_ARCH_X86) // __kmpc_atomic_float4_max 1232 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3, 1233 KMP_ARCH_X86) // __kmpc_atomic_float4_min 1234 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7, 1235 KMP_ARCH_X86) // __kmpc_atomic_float8_max 1236 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7, 1237 KMP_ARCH_X86) // __kmpc_atomic_float8_min 1238 #if KMP_HAVE_QUAD 1239 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r, 1240 1) // __kmpc_atomic_float16_max 1241 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r, 1242 1) // __kmpc_atomic_float16_min 1243 #if (KMP_ARCH_X86) 1244 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r, 1245 1) // __kmpc_atomic_float16_max_a16 1246 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r, 1247 1) // __kmpc_atomic_float16_min_a16 1248 #endif // (KMP_ARCH_X86) 1249 #endif // KMP_HAVE_QUAD 1250 // ------------------------------------------------------------------------ 1251 // Need separate macros for .EQV. because of the need of complement (~) 1252 // OP ignored for critical sections, ^=~ used instead 1253 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1254 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1255 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \ 1256 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \ 1257 } 1258 1259 // ------------------------------------------------------------------------ 1260 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1261 // ------------------------------------------------------------------------ 1262 // X86 or X86_64: no alignment problems =================================== 1263 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1264 GOMP_FLAG) \ 1265 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1266 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \ 1267 OP_CMPXCHG(TYPE, BITS, OP) \ 1268 } 1269 // ------------------------------------------------------------------------ 1270 #else 1271 // ------------------------------------------------------------------------ 1272 // Code for other architectures that don't handle unaligned accesses. 1273 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1274 GOMP_FLAG) \ 1275 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1276 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \ 1277 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1278 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1279 } else { \ 1280 KMP_CHECK_GTID; \ 1281 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \ 1282 } \ 1283 } 1284 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1285 1286 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0, 1287 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv 1288 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1, 1289 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv 1290 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3, 1291 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv 1292 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7, 1293 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv 1294 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, 1295 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv 1296 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, 1297 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv 1298 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, 1299 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv 1300 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, 1301 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv 1302 1303 // ------------------------------------------------------------------------ 1304 // Routines for Extended types: long double, _Quad, complex flavours (use 1305 // critical section) 1306 // TYPE_ID, OP_ID, TYPE - detailed above 1307 // OP - operator 1308 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1309 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1310 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1311 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 1312 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \ 1313 } 1314 1315 /* ------------------------------------------------------------------------- */ 1316 // routines for long double type 1317 ATOMIC_CRITICAL(float10, add, long double, +, 10r, 1318 1) // __kmpc_atomic_float10_add 1319 ATOMIC_CRITICAL(float10, sub, long double, -, 10r, 1320 1) // __kmpc_atomic_float10_sub 1321 ATOMIC_CRITICAL(float10, mul, long double, *, 10r, 1322 1) // __kmpc_atomic_float10_mul 1323 ATOMIC_CRITICAL(float10, div, long double, /, 10r, 1324 1) // __kmpc_atomic_float10_div 1325 #if KMP_HAVE_QUAD 1326 // routines for _Quad type 1327 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r, 1328 1) // __kmpc_atomic_float16_add 1329 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r, 1330 1) // __kmpc_atomic_float16_sub 1331 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r, 1332 1) // __kmpc_atomic_float16_mul 1333 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r, 1334 1) // __kmpc_atomic_float16_div 1335 #if (KMP_ARCH_X86) 1336 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r, 1337 1) // __kmpc_atomic_float16_add_a16 1338 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r, 1339 1) // __kmpc_atomic_float16_sub_a16 1340 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r, 1341 1) // __kmpc_atomic_float16_mul_a16 1342 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r, 1343 1) // __kmpc_atomic_float16_div_a16 1344 #endif // (KMP_ARCH_X86) 1345 #endif // KMP_HAVE_QUAD 1346 // routines for complex types 1347 1348 #if USE_CMPXCHG_FIX 1349 // workaround for C78287 (complex(kind=4) data type) 1350 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1351 1) // __kmpc_atomic_cmplx4_add 1352 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1353 1) // __kmpc_atomic_cmplx4_sub 1354 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1355 1) // __kmpc_atomic_cmplx4_mul 1356 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1357 1) // __kmpc_atomic_cmplx4_div 1358 // end of the workaround for C78287 1359 #else 1360 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add 1361 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub 1362 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul 1363 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div 1364 #endif // USE_CMPXCHG_FIX 1365 1366 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add 1367 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub 1368 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul 1369 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div 1370 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c, 1371 1) // __kmpc_atomic_cmplx10_add 1372 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c, 1373 1) // __kmpc_atomic_cmplx10_sub 1374 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c, 1375 1) // __kmpc_atomic_cmplx10_mul 1376 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c, 1377 1) // __kmpc_atomic_cmplx10_div 1378 #if KMP_HAVE_QUAD 1379 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c, 1380 1) // __kmpc_atomic_cmplx16_add 1381 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c, 1382 1) // __kmpc_atomic_cmplx16_sub 1383 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c, 1384 1) // __kmpc_atomic_cmplx16_mul 1385 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c, 1386 1) // __kmpc_atomic_cmplx16_div 1387 #if (KMP_ARCH_X86) 1388 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1389 1) // __kmpc_atomic_cmplx16_add_a16 1390 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1391 1) // __kmpc_atomic_cmplx16_sub_a16 1392 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1393 1) // __kmpc_atomic_cmplx16_mul_a16 1394 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1395 1) // __kmpc_atomic_cmplx16_div_a16 1396 #endif // (KMP_ARCH_X86) 1397 #endif // KMP_HAVE_QUAD 1398 1399 // OpenMP 4.0: x = expr binop x for non-commutative operations. 1400 // Supported only on IA-32 architecture and Intel(R) 64 1401 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1402 1403 // ------------------------------------------------------------------------ 1404 // Operation on *lhs, rhs bound by critical section 1405 // OP - operator (it's supposed to contain an assignment) 1406 // LCK_ID - lock identifier 1407 // Note: don't check gtid as it should always be valid 1408 // 1, 2-byte - expect valid parameter, other - check before this macro 1409 #define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \ 1410 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1411 \ 1412 (*lhs) = (TYPE)((rhs)OP(*lhs)); \ 1413 \ 1414 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1415 1416 #ifdef KMP_GOMP_COMPAT 1417 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \ 1418 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1419 KMP_CHECK_GTID; \ 1420 OP_CRITICAL_REV(TYPE, OP, 0); \ 1421 return; \ 1422 } 1423 1424 #else 1425 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) 1426 #endif /* KMP_GOMP_COMPAT */ 1427 1428 // Beginning of a definition (provides name, parameters, gebug trace) 1429 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1430 // fixed) 1431 // OP_ID - operation identifier (add, sub, mul, ...) 1432 // TYPE - operands' type 1433 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1434 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \ 1435 TYPE *lhs, TYPE rhs) { \ 1436 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1437 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid)); 1438 1439 // ------------------------------------------------------------------------ 1440 // Operation on *lhs, rhs using "compare_and_store" routine 1441 // TYPE - operands' type 1442 // BITS - size in bits, used to distinguish low level calls 1443 // OP - operator 1444 // Note: temp_val introduced in order to force the compiler to read 1445 // *lhs only once (w/o it the compiler reads *lhs twice) 1446 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1447 { \ 1448 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1449 TYPE old_value, new_value; \ 1450 temp_val = *lhs; \ 1451 old_value = temp_val; \ 1452 new_value = (TYPE)(rhs OP old_value); \ 1453 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1454 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1455 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 1456 KMP_DO_PAUSE; \ 1457 \ 1458 temp_val = *lhs; \ 1459 old_value = temp_val; \ 1460 new_value = (TYPE)(rhs OP old_value); \ 1461 } \ 1462 } 1463 1464 // ------------------------------------------------------------------------- 1465 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \ 1466 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1467 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ 1468 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1469 } 1470 1471 // ------------------------------------------------------------------------ 1472 // Entries definition for integer operands 1473 // TYPE_ID - operands type and size (fixed4, float4) 1474 // OP_ID - operation identifier (add, sub, mul, ...) 1475 // TYPE - operand type 1476 // BITS - size in bits, used to distinguish low level calls 1477 // OP - operator (used in critical section) 1478 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1479 1480 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG 1481 // ------------------------------------------------------------------------ 1482 // Routines for ATOMIC integer operands, other operators 1483 // ------------------------------------------------------------------------ 1484 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG 1485 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i, 1486 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev 1487 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i, 1488 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev 1489 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i, 1490 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev 1491 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i, 1492 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev 1493 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i, 1494 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev 1495 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i, 1496 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev 1497 1498 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i, 1499 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev 1500 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i, 1501 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev 1502 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i, 1503 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev 1504 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i, 1505 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev 1506 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1507 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev 1508 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i, 1509 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev 1510 1511 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i, 1512 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev 1513 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i, 1514 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev 1515 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i, 1516 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev 1517 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i, 1518 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev 1519 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i, 1520 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev 1521 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i, 1522 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev 1523 1524 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i, 1525 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev 1526 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i, 1527 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev 1528 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i, 1529 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev 1530 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i, 1531 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev 1532 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i, 1533 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev 1534 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i, 1535 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev 1536 1537 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r, 1538 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev 1539 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r, 1540 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev 1541 1542 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r, 1543 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev 1544 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r, 1545 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev 1546 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG 1547 1548 // ------------------------------------------------------------------------ 1549 // Routines for Extended types: long double, _Quad, complex flavours (use 1550 // critical section) 1551 // TYPE_ID, OP_ID, TYPE - detailed above 1552 // OP - operator 1553 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1554 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1555 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1556 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ 1557 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \ 1558 } 1559 1560 /* ------------------------------------------------------------------------- */ 1561 // routines for long double type 1562 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r, 1563 1) // __kmpc_atomic_float10_sub_rev 1564 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r, 1565 1) // __kmpc_atomic_float10_div_rev 1566 #if KMP_HAVE_QUAD 1567 // routines for _Quad type 1568 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r, 1569 1) // __kmpc_atomic_float16_sub_rev 1570 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r, 1571 1) // __kmpc_atomic_float16_div_rev 1572 #if (KMP_ARCH_X86) 1573 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r, 1574 1) // __kmpc_atomic_float16_sub_a16_rev 1575 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r, 1576 1) // __kmpc_atomic_float16_div_a16_rev 1577 #endif // KMP_ARCH_X86 1578 #endif // KMP_HAVE_QUAD 1579 1580 // routines for complex types 1581 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c, 1582 1) // __kmpc_atomic_cmplx4_sub_rev 1583 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c, 1584 1) // __kmpc_atomic_cmplx4_div_rev 1585 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c, 1586 1) // __kmpc_atomic_cmplx8_sub_rev 1587 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c, 1588 1) // __kmpc_atomic_cmplx8_div_rev 1589 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c, 1590 1) // __kmpc_atomic_cmplx10_sub_rev 1591 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c, 1592 1) // __kmpc_atomic_cmplx10_div_rev 1593 #if KMP_HAVE_QUAD 1594 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c, 1595 1) // __kmpc_atomic_cmplx16_sub_rev 1596 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c, 1597 1) // __kmpc_atomic_cmplx16_div_rev 1598 #if (KMP_ARCH_X86) 1599 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1600 1) // __kmpc_atomic_cmplx16_sub_a16_rev 1601 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1602 1) // __kmpc_atomic_cmplx16_div_a16_rev 1603 #endif // KMP_ARCH_X86 1604 #endif // KMP_HAVE_QUAD 1605 1606 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 1607 // End of OpenMP 4.0: x = expr binop x for non-commutative operations. 1608 1609 /* ------------------------------------------------------------------------ */ 1610 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */ 1611 /* Note: in order to reduce the total number of types combinations */ 1612 /* it is supposed that compiler converts RHS to longest floating type,*/ 1613 /* that is _Quad, before call to any of these routines */ 1614 /* Conversion to _Quad will be done by the compiler during calculation, */ 1615 /* conversion back to TYPE - before the assignment, like: */ 1616 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */ 1617 /* Performance penalty expected because of SW emulation use */ 1618 /* ------------------------------------------------------------------------ */ 1619 1620 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1621 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 1622 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \ 1623 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1624 KA_TRACE(100, \ 1625 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 1626 gtid)); 1627 1628 // ------------------------------------------------------------------------- 1629 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \ 1630 GOMP_FLAG) \ 1631 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1632 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 1633 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \ 1634 } 1635 1636 // ------------------------------------------------------------------------- 1637 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1638 // ------------------------------------------------------------------------- 1639 // X86 or X86_64: no alignment problems ==================================== 1640 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1641 LCK_ID, MASK, GOMP_FLAG) \ 1642 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1643 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1644 OP_CMPXCHG(TYPE, BITS, OP) \ 1645 } 1646 // ------------------------------------------------------------------------- 1647 #else 1648 // ------------------------------------------------------------------------ 1649 // Code for other architectures that don't handle unaligned accesses. 1650 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1651 LCK_ID, MASK, GOMP_FLAG) \ 1652 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1653 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1654 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1655 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1656 } else { \ 1657 KMP_CHECK_GTID; \ 1658 OP_UPDATE_CRITICAL(TYPE, OP, \ 1659 LCK_ID) /* unaligned address - use critical */ \ 1660 } \ 1661 } 1662 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1663 1664 // ------------------------------------------------------------------------- 1665 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1666 // ------------------------------------------------------------------------- 1667 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 1668 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 1669 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1670 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ 1671 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1672 } 1673 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 1674 LCK_ID, GOMP_FLAG) \ 1675 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1676 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ 1677 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \ 1678 } 1679 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1680 1681 // RHS=float8 1682 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, 1683 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8 1684 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, 1685 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8 1686 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, 1687 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8 1688 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, 1689 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8 1690 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 1691 0) // __kmpc_atomic_fixed4_mul_float8 1692 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 1693 0) // __kmpc_atomic_fixed4_div_float8 1694 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, 1695 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8 1696 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, 1697 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8 1698 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, 1699 KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8 1700 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, 1701 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8 1702 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, 1703 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8 1704 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, 1705 KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8 1706 1707 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not 1708 // use them) 1709 #if KMP_HAVE_QUAD 1710 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0, 1711 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp 1712 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0, 1713 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp 1714 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, 1715 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp 1716 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0, 1717 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp 1718 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, 1719 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp 1720 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0, 1721 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp 1722 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0, 1723 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp 1724 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, 1725 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp 1726 1727 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1, 1728 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp 1729 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1, 1730 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp 1731 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, 1732 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp 1733 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1, 1734 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp 1735 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, 1736 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp 1737 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1, 1738 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp 1739 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1, 1740 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp 1741 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, 1742 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp 1743 1744 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 1745 0) // __kmpc_atomic_fixed4_add_fp 1746 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3, 1747 0) // __kmpc_atomic_fixed4u_add_fp 1748 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 1749 0) // __kmpc_atomic_fixed4_sub_fp 1750 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3, 1751 0) // __kmpc_atomic_fixed4u_sub_fp 1752 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 1753 0) // __kmpc_atomic_fixed4_mul_fp 1754 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3, 1755 0) // __kmpc_atomic_fixed4u_mul_fp 1756 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 1757 0) // __kmpc_atomic_fixed4_div_fp 1758 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 1759 0) // __kmpc_atomic_fixed4u_div_fp 1760 1761 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, 1762 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp 1763 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7, 1764 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp 1765 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, 1766 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp 1767 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7, 1768 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp 1769 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, 1770 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp 1771 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7, 1772 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp 1773 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, 1774 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp 1775 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, 1776 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp 1777 1778 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, 1779 KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp 1780 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, 1781 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp 1782 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, 1783 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp 1784 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, 1785 KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp 1786 1787 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, 1788 KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp 1789 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, 1790 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp 1791 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, 1792 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp 1793 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, 1794 KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp 1795 1796 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r, 1797 1) // __kmpc_atomic_float10_add_fp 1798 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r, 1799 1) // __kmpc_atomic_float10_sub_fp 1800 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r, 1801 1) // __kmpc_atomic_float10_mul_fp 1802 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r, 1803 1) // __kmpc_atomic_float10_div_fp 1804 1805 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1806 // Reverse operations 1807 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0, 1808 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp 1809 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0, 1810 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp 1811 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0, 1812 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp 1813 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0, 1814 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp 1815 1816 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1, 1817 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp 1818 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1, 1819 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp 1820 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1, 1821 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp 1822 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1, 1823 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp 1824 1825 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1826 0) // __kmpc_atomic_fixed4_sub_rev_fp 1827 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1828 0) // __kmpc_atomic_fixed4u_sub_rev_fp 1829 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3, 1830 0) // __kmpc_atomic_fixed4_div_rev_fp 1831 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3, 1832 0) // __kmpc_atomic_fixed4u_div_rev_fp 1833 1834 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1835 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp 1836 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1837 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp 1838 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7, 1839 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp 1840 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7, 1841 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp 1842 1843 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3, 1844 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp 1845 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3, 1846 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp 1847 1848 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7, 1849 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp 1850 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7, 1851 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp 1852 1853 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r, 1854 1) // __kmpc_atomic_float10_sub_rev_fp 1855 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r, 1856 1) // __kmpc_atomic_float10_div_rev_fp 1857 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1858 1859 #endif // KMP_HAVE_QUAD 1860 1861 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1862 // ------------------------------------------------------------------------ 1863 // X86 or X86_64: no alignment problems ==================================== 1864 #if USE_CMPXCHG_FIX 1865 // workaround for C78287 (complex(kind=4) data type) 1866 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1867 LCK_ID, MASK, GOMP_FLAG) \ 1868 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1869 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1870 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 1871 } 1872 // end of the second part of the workaround for C78287 1873 #else 1874 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1875 LCK_ID, MASK, GOMP_FLAG) \ 1876 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1877 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1878 OP_CMPXCHG(TYPE, BITS, OP) \ 1879 } 1880 #endif // USE_CMPXCHG_FIX 1881 #else 1882 // ------------------------------------------------------------------------ 1883 // Code for other architectures that don't handle unaligned accesses. 1884 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1885 LCK_ID, MASK, GOMP_FLAG) \ 1886 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1887 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1888 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1889 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1890 } else { \ 1891 KMP_CHECK_GTID; \ 1892 OP_UPDATE_CRITICAL(TYPE, OP, \ 1893 LCK_ID) /* unaligned address - use critical */ \ 1894 } \ 1895 } 1896 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1897 1898 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 1899 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8 1900 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 1901 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8 1902 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 1903 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8 1904 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 1905 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8 1906 1907 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 1908 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1909 1910 // ------------------------------------------------------------------------ 1911 // Atomic READ routines 1912 1913 // ------------------------------------------------------------------------ 1914 // Beginning of a definition (provides name, parameters, gebug trace) 1915 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1916 // fixed) 1917 // OP_ID - operation identifier (add, sub, mul, ...) 1918 // TYPE - operands' type 1919 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1920 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 1921 TYPE *loc) { \ 1922 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1923 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 1924 1925 // ------------------------------------------------------------------------ 1926 // Operation on *lhs, rhs using "compare_and_store_ret" routine 1927 // TYPE - operands' type 1928 // BITS - size in bits, used to distinguish low level calls 1929 // OP - operator 1930 // Note: temp_val introduced in order to force the compiler to read 1931 // *lhs only once (w/o it the compiler reads *lhs twice) 1932 // TODO: check if it is still necessary 1933 // Return old value regardless of the result of "compare & swap# operation 1934 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1935 { \ 1936 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1937 union f_i_union { \ 1938 TYPE f_val; \ 1939 kmp_int##BITS i_val; \ 1940 }; \ 1941 union f_i_union old_value; \ 1942 temp_val = *loc; \ 1943 old_value.f_val = temp_val; \ 1944 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \ 1945 (kmp_int##BITS *)loc, \ 1946 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \ 1947 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \ 1948 new_value = old_value.f_val; \ 1949 return new_value; \ 1950 } 1951 1952 // ------------------------------------------------------------------------- 1953 // Operation on *lhs, rhs bound by critical section 1954 // OP - operator (it's supposed to contain an assignment) 1955 // LCK_ID - lock identifier 1956 // Note: don't check gtid as it should always be valid 1957 // 1, 2-byte - expect valid parameter, other - check before this macro 1958 #define OP_CRITICAL_READ(OP, LCK_ID) \ 1959 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1960 \ 1961 new_value = (*loc); \ 1962 \ 1963 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1964 1965 // ------------------------------------------------------------------------- 1966 #ifdef KMP_GOMP_COMPAT 1967 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \ 1968 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1969 KMP_CHECK_GTID; \ 1970 OP_CRITICAL_READ(OP, 0); \ 1971 return new_value; \ 1972 } 1973 #else 1974 #define OP_GOMP_CRITICAL_READ(OP, FLAG) 1975 #endif /* KMP_GOMP_COMPAT */ 1976 1977 // ------------------------------------------------------------------------- 1978 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1979 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1980 TYPE new_value; \ 1981 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1982 new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \ 1983 return new_value; \ 1984 } 1985 // ------------------------------------------------------------------------- 1986 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1987 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1988 TYPE new_value; \ 1989 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1990 OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1991 } 1992 // ------------------------------------------------------------------------ 1993 // Routines for Extended types: long double, _Quad, complex flavours (use 1994 // critical section) 1995 // TYPE_ID, OP_ID, TYPE - detailed above 1996 // OP - operator 1997 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1998 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1999 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 2000 TYPE new_value; \ 2001 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \ 2002 OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \ 2003 return new_value; \ 2004 } 2005 2006 // ------------------------------------------------------------------------ 2007 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return 2008 // value doesn't work. 2009 // Let's return the read value through the additional parameter. 2010 #if (KMP_OS_WINDOWS) 2011 2012 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \ 2013 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2014 \ 2015 (*out) = (*loc); \ 2016 \ 2017 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 2018 // ------------------------------------------------------------------------ 2019 #ifdef KMP_GOMP_COMPAT 2020 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \ 2021 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2022 KMP_CHECK_GTID; \ 2023 OP_CRITICAL_READ_WRK(OP, 0); \ 2024 } 2025 #else 2026 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) 2027 #endif /* KMP_GOMP_COMPAT */ 2028 // ------------------------------------------------------------------------ 2029 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 2030 void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \ 2031 TYPE *loc) { \ 2032 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2033 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2034 2035 // ------------------------------------------------------------------------ 2036 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2037 ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 2038 OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \ 2039 OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \ 2040 } 2041 2042 #endif // KMP_OS_WINDOWS 2043 2044 // ------------------------------------------------------------------------ 2045 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2046 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd 2047 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +, 2048 KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd 2049 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +, 2050 KMP_ARCH_X86) // __kmpc_atomic_float4_rd 2051 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +, 2052 KMP_ARCH_X86) // __kmpc_atomic_float8_rd 2053 2054 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic 2055 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +, 2056 KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd 2057 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +, 2058 KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd 2059 2060 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r, 2061 1) // __kmpc_atomic_float10_rd 2062 #if KMP_HAVE_QUAD 2063 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r, 2064 1) // __kmpc_atomic_float16_rd 2065 #endif // KMP_HAVE_QUAD 2066 2067 // Fix for CQ220361 on Windows* OS 2068 #if (KMP_OS_WINDOWS) 2069 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c, 2070 1) // __kmpc_atomic_cmplx4_rd 2071 #else 2072 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c, 2073 1) // __kmpc_atomic_cmplx4_rd 2074 #endif // (KMP_OS_WINDOWS) 2075 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c, 2076 1) // __kmpc_atomic_cmplx8_rd 2077 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c, 2078 1) // __kmpc_atomic_cmplx10_rd 2079 #if KMP_HAVE_QUAD 2080 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c, 2081 1) // __kmpc_atomic_cmplx16_rd 2082 #if (KMP_ARCH_X86) 2083 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r, 2084 1) // __kmpc_atomic_float16_a16_rd 2085 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 2086 1) // __kmpc_atomic_cmplx16_a16_rd 2087 #endif // (KMP_ARCH_X86) 2088 #endif // KMP_HAVE_QUAD 2089 2090 // ------------------------------------------------------------------------ 2091 // Atomic WRITE routines 2092 2093 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2094 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2095 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2096 KMP_XCHG_FIXED##BITS(lhs, rhs); \ 2097 } 2098 // ------------------------------------------------------------------------ 2099 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2100 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2101 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2102 KMP_XCHG_REAL##BITS(lhs, rhs); \ 2103 } 2104 2105 // ------------------------------------------------------------------------ 2106 // Operation on *lhs, rhs using "compare_and_store" routine 2107 // TYPE - operands' type 2108 // BITS - size in bits, used to distinguish low level calls 2109 // OP - operator 2110 // Note: temp_val introduced in order to force the compiler to read 2111 // *lhs only once (w/o it the compiler reads *lhs twice) 2112 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2113 { \ 2114 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2115 TYPE old_value, new_value; \ 2116 temp_val = *lhs; \ 2117 old_value = temp_val; \ 2118 new_value = rhs; \ 2119 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2120 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2121 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2122 temp_val = *lhs; \ 2123 old_value = temp_val; \ 2124 new_value = rhs; \ 2125 } \ 2126 } 2127 2128 // ------------------------------------------------------------------------- 2129 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2130 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2131 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2132 OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2133 } 2134 2135 // ------------------------------------------------------------------------ 2136 // Routines for Extended types: long double, _Quad, complex flavours (use 2137 // critical section) 2138 // TYPE_ID, OP_ID, TYPE - detailed above 2139 // OP - operator 2140 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2141 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2142 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2143 OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \ 2144 OP_CRITICAL(OP, LCK_ID) /* send assignment */ \ 2145 } 2146 // ------------------------------------------------------------------------- 2147 2148 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =, 2149 KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr 2150 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =, 2151 KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr 2152 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =, 2153 KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr 2154 #if (KMP_ARCH_X86) 2155 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =, 2156 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2157 #else 2158 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =, 2159 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2160 #endif // (KMP_ARCH_X86) 2161 2162 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =, 2163 KMP_ARCH_X86) // __kmpc_atomic_float4_wr 2164 #if (KMP_ARCH_X86) 2165 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =, 2166 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2167 #else 2168 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =, 2169 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2170 #endif // (KMP_ARCH_X86) 2171 2172 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r, 2173 1) // __kmpc_atomic_float10_wr 2174 #if KMP_HAVE_QUAD 2175 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r, 2176 1) // __kmpc_atomic_float16_wr 2177 #endif // KMP_HAVE_QUAD 2178 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr 2179 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c, 2180 1) // __kmpc_atomic_cmplx8_wr 2181 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c, 2182 1) // __kmpc_atomic_cmplx10_wr 2183 #if KMP_HAVE_QUAD 2184 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c, 2185 1) // __kmpc_atomic_cmplx16_wr 2186 #if (KMP_ARCH_X86) 2187 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r, 2188 1) // __kmpc_atomic_float16_a16_wr 2189 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 2190 1) // __kmpc_atomic_cmplx16_a16_wr 2191 #endif // (KMP_ARCH_X86) 2192 #endif // KMP_HAVE_QUAD 2193 2194 // ------------------------------------------------------------------------ 2195 // Atomic CAPTURE routines 2196 2197 // Beginning of a definition (provides name, parameters, gebug trace) 2198 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2199 // fixed) 2200 // OP_ID - operation identifier (add, sub, mul, ...) 2201 // TYPE - operands' type 2202 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 2203 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 2204 TYPE *lhs, TYPE rhs, int flag) { \ 2205 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2206 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2207 2208 // ------------------------------------------------------------------------- 2209 // Operation on *lhs, rhs bound by critical section 2210 // OP - operator (it's supposed to contain an assignment) 2211 // LCK_ID - lock identifier 2212 // Note: don't check gtid as it should always be valid 2213 // 1, 2-byte - expect valid parameter, other - check before this macro 2214 #define OP_CRITICAL_CPT(OP, LCK_ID) \ 2215 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2216 \ 2217 if (flag) { \ 2218 (*lhs) OP rhs; \ 2219 new_value = (*lhs); \ 2220 } else { \ 2221 new_value = (*lhs); \ 2222 (*lhs) OP rhs; \ 2223 } \ 2224 \ 2225 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2226 return new_value; 2227 2228 #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \ 2229 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2230 \ 2231 if (flag) { \ 2232 (*lhs) = (TYPE)((*lhs)OP rhs); \ 2233 new_value = (*lhs); \ 2234 } else { \ 2235 new_value = (*lhs); \ 2236 (*lhs) = (TYPE)((*lhs)OP rhs); \ 2237 } \ 2238 \ 2239 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2240 return new_value; 2241 2242 // ------------------------------------------------------------------------ 2243 #ifdef KMP_GOMP_COMPAT 2244 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \ 2245 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2246 KMP_CHECK_GTID; \ 2247 OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \ 2248 } 2249 #else 2250 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) 2251 #endif /* KMP_GOMP_COMPAT */ 2252 2253 // ------------------------------------------------------------------------ 2254 // Operation on *lhs, rhs using "compare_and_store" routine 2255 // TYPE - operands' type 2256 // BITS - size in bits, used to distinguish low level calls 2257 // OP - operator 2258 // Note: temp_val introduced in order to force the compiler to read 2259 // *lhs only once (w/o it the compiler reads *lhs twice) 2260 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2261 { \ 2262 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2263 TYPE old_value, new_value; \ 2264 temp_val = *lhs; \ 2265 old_value = temp_val; \ 2266 new_value = (TYPE)(old_value OP rhs); \ 2267 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2268 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2269 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2270 temp_val = *lhs; \ 2271 old_value = temp_val; \ 2272 new_value = (TYPE)(old_value OP rhs); \ 2273 } \ 2274 if (flag) { \ 2275 return new_value; \ 2276 } else \ 2277 return old_value; \ 2278 } 2279 2280 // ------------------------------------------------------------------------- 2281 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2282 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2283 TYPE new_value; \ 2284 (void)new_value; \ 2285 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \ 2286 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2287 } 2288 2289 // ------------------------------------------------------------------------- 2290 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2291 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2292 TYPE old_value, new_value; \ 2293 (void)new_value; \ 2294 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \ 2295 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 2296 old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 2297 if (flag) { \ 2298 return old_value OP rhs; \ 2299 } else \ 2300 return old_value; \ 2301 } 2302 // ------------------------------------------------------------------------- 2303 2304 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +, 2305 0) // __kmpc_atomic_fixed4_add_cpt 2306 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -, 2307 0) // __kmpc_atomic_fixed4_sub_cpt 2308 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +, 2309 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt 2310 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -, 2311 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt 2312 2313 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +, 2314 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt 2315 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -, 2316 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt 2317 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +, 2318 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt 2319 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -, 2320 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt 2321 2322 // ------------------------------------------------------------------------ 2323 // Entries definition for integer operands 2324 // TYPE_ID - operands type and size (fixed4, float4) 2325 // OP_ID - operation identifier (add, sub, mul, ...) 2326 // TYPE - operand type 2327 // BITS - size in bits, used to distinguish low level calls 2328 // OP - operator (used in critical section) 2329 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG 2330 // ------------------------------------------------------------------------ 2331 // Routines for ATOMIC integer operands, other operators 2332 // ------------------------------------------------------------------------ 2333 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2334 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +, 2335 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt 2336 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &, 2337 0) // __kmpc_atomic_fixed1_andb_cpt 2338 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /, 2339 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt 2340 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /, 2341 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt 2342 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *, 2343 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt 2344 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |, 2345 0) // __kmpc_atomic_fixed1_orb_cpt 2346 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<, 2347 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt 2348 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>, 2349 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt 2350 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>, 2351 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt 2352 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -, 2353 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt 2354 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^, 2355 0) // __kmpc_atomic_fixed1_xor_cpt 2356 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +, 2357 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt 2358 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &, 2359 0) // __kmpc_atomic_fixed2_andb_cpt 2360 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /, 2361 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt 2362 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /, 2363 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt 2364 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *, 2365 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt 2366 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |, 2367 0) // __kmpc_atomic_fixed2_orb_cpt 2368 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<, 2369 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt 2370 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>, 2371 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt 2372 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>, 2373 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt 2374 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -, 2375 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt 2376 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^, 2377 0) // __kmpc_atomic_fixed2_xor_cpt 2378 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &, 2379 0) // __kmpc_atomic_fixed4_andb_cpt 2380 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /, 2381 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt 2382 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /, 2383 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt 2384 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *, 2385 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt 2386 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |, 2387 0) // __kmpc_atomic_fixed4_orb_cpt 2388 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<, 2389 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt 2390 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>, 2391 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt 2392 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>, 2393 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt 2394 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^, 2395 0) // __kmpc_atomic_fixed4_xor_cpt 2396 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &, 2397 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt 2398 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /, 2399 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt 2400 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /, 2401 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt 2402 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *, 2403 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt 2404 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |, 2405 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt 2406 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<, 2407 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt 2408 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>, 2409 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt 2410 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>, 2411 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt 2412 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^, 2413 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt 2414 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /, 2415 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt 2416 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *, 2417 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt 2418 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /, 2419 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt 2420 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *, 2421 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt 2422 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2423 2424 // CAPTURE routines for mixed types RHS=float16 2425 #if KMP_HAVE_QUAD 2426 2427 // Beginning of a definition (provides name, parameters, gebug trace) 2428 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2429 // fixed) 2430 // OP_ID - operation identifier (add, sub, mul, ...) 2431 // TYPE - operands' type 2432 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2433 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 2434 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \ 2435 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2436 KA_TRACE(100, \ 2437 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 2438 gtid)); 2439 2440 // ------------------------------------------------------------------------- 2441 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 2442 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 2443 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2444 TYPE new_value; \ 2445 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \ 2446 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2447 } 2448 2449 // ------------------------------------------------------------------------- 2450 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 2451 LCK_ID, GOMP_FLAG) \ 2452 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2453 TYPE new_value; \ 2454 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 2455 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \ 2456 } 2457 2458 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0, 2459 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp 2460 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0, 2461 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp 2462 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2463 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp 2464 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2465 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp 2466 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2467 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp 2468 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2469 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp 2470 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0, 2471 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp 2472 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0, 2473 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp 2474 2475 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1, 2476 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp 2477 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1, 2478 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp 2479 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2480 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp 2481 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2482 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp 2483 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2484 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp 2485 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2486 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp 2487 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1, 2488 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp 2489 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1, 2490 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp 2491 2492 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2493 0) // __kmpc_atomic_fixed4_add_cpt_fp 2494 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2495 0) // __kmpc_atomic_fixed4u_add_cpt_fp 2496 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2497 0) // __kmpc_atomic_fixed4_sub_cpt_fp 2498 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2499 0) // __kmpc_atomic_fixed4u_sub_cpt_fp 2500 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2501 0) // __kmpc_atomic_fixed4_mul_cpt_fp 2502 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2503 0) // __kmpc_atomic_fixed4u_mul_cpt_fp 2504 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2505 0) // __kmpc_atomic_fixed4_div_cpt_fp 2506 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2507 0) // __kmpc_atomic_fixed4u_div_cpt_fp 2508 2509 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2510 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp 2511 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2512 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp 2513 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2514 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp 2515 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2516 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp 2517 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2518 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp 2519 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2520 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp 2521 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2522 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp 2523 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2524 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp 2525 2526 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3, 2527 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp 2528 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3, 2529 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp 2530 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3, 2531 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp 2532 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3, 2533 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp 2534 2535 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7, 2536 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp 2537 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7, 2538 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp 2539 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7, 2540 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp 2541 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7, 2542 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp 2543 2544 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r, 2545 1) // __kmpc_atomic_float10_add_cpt_fp 2546 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r, 2547 1) // __kmpc_atomic_float10_sub_cpt_fp 2548 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r, 2549 1) // __kmpc_atomic_float10_mul_cpt_fp 2550 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r, 2551 1) // __kmpc_atomic_float10_div_cpt_fp 2552 2553 #endif // KMP_HAVE_QUAD 2554 2555 // ------------------------------------------------------------------------ 2556 // Routines for C/C++ Reduction operators && and || 2557 2558 // ------------------------------------------------------------------------- 2559 // Operation on *lhs, rhs bound by critical section 2560 // OP - operator (it's supposed to contain an assignment) 2561 // LCK_ID - lock identifier 2562 // Note: don't check gtid as it should always be valid 2563 // 1, 2-byte - expect valid parameter, other - check before this macro 2564 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \ 2565 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2566 \ 2567 if (flag) { \ 2568 new_value OP rhs; \ 2569 (*lhs) = new_value; \ 2570 } else { \ 2571 new_value = (*lhs); \ 2572 (*lhs) OP rhs; \ 2573 } \ 2574 \ 2575 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 2576 2577 // ------------------------------------------------------------------------ 2578 #ifdef KMP_GOMP_COMPAT 2579 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \ 2580 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2581 KMP_CHECK_GTID; \ 2582 OP_CRITICAL_L_CPT(OP, 0); \ 2583 return new_value; \ 2584 } 2585 #else 2586 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) 2587 #endif /* KMP_GOMP_COMPAT */ 2588 2589 // ------------------------------------------------------------------------ 2590 // Need separate macros for &&, || because there is no combined assignment 2591 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2592 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2593 TYPE new_value; \ 2594 (void)new_value; \ 2595 OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \ 2596 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2597 } 2598 2599 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&, 2600 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt 2601 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||, 2602 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt 2603 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&, 2604 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt 2605 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||, 2606 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt 2607 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&, 2608 0) // __kmpc_atomic_fixed4_andl_cpt 2609 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||, 2610 0) // __kmpc_atomic_fixed4_orl_cpt 2611 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&, 2612 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt 2613 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||, 2614 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt 2615 2616 // ------------------------------------------------------------------------- 2617 // Routines for Fortran operators that matched no one in C: 2618 // MAX, MIN, .EQV., .NEQV. 2619 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt 2620 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt 2621 2622 // ------------------------------------------------------------------------- 2623 // MIN and MAX need separate macros 2624 // OP - operator to check if we need any actions? 2625 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2626 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2627 \ 2628 if (*lhs OP rhs) { /* still need actions? */ \ 2629 old_value = *lhs; \ 2630 *lhs = rhs; \ 2631 if (flag) \ 2632 new_value = rhs; \ 2633 else \ 2634 new_value = old_value; \ 2635 } else { \ 2636 new_value = *lhs; \ 2637 } \ 2638 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2639 return new_value; 2640 2641 // ------------------------------------------------------------------------- 2642 #ifdef KMP_GOMP_COMPAT 2643 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \ 2644 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2645 KMP_CHECK_GTID; \ 2646 MIN_MAX_CRITSECT_CPT(OP, 0); \ 2647 } 2648 #else 2649 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) 2650 #endif /* KMP_GOMP_COMPAT */ 2651 2652 // ------------------------------------------------------------------------- 2653 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2654 { \ 2655 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2656 /*TYPE old_value; */ \ 2657 temp_val = *lhs; \ 2658 old_value = temp_val; \ 2659 while (old_value OP rhs && /* still need actions? */ \ 2660 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2661 (kmp_int##BITS *)lhs, \ 2662 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2663 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 2664 temp_val = *lhs; \ 2665 old_value = temp_val; \ 2666 } \ 2667 if (flag) \ 2668 return rhs; \ 2669 else \ 2670 return old_value; \ 2671 } 2672 2673 // ------------------------------------------------------------------------- 2674 // 1-byte, 2-byte operands - use critical section 2675 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2676 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2677 TYPE new_value, old_value; \ 2678 if (*lhs OP rhs) { /* need actions? */ \ 2679 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2680 MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2681 } \ 2682 return *lhs; \ 2683 } 2684 2685 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2686 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2687 TYPE new_value, old_value; \ 2688 (void)new_value; \ 2689 if (*lhs OP rhs) { \ 2690 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2691 MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2692 } \ 2693 return *lhs; \ 2694 } 2695 2696 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <, 2697 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt 2698 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >, 2699 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt 2700 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <, 2701 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt 2702 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >, 2703 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt 2704 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <, 2705 0) // __kmpc_atomic_fixed4_max_cpt 2706 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >, 2707 0) // __kmpc_atomic_fixed4_min_cpt 2708 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <, 2709 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt 2710 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >, 2711 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt 2712 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <, 2713 KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt 2714 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >, 2715 KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt 2716 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <, 2717 KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt 2718 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >, 2719 KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt 2720 #if KMP_HAVE_QUAD 2721 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r, 2722 1) // __kmpc_atomic_float16_max_cpt 2723 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r, 2724 1) // __kmpc_atomic_float16_min_cpt 2725 #if (KMP_ARCH_X86) 2726 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r, 2727 1) // __kmpc_atomic_float16_max_a16_cpt 2728 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r, 2729 1) // __kmpc_atomic_float16_mix_a16_cpt 2730 #endif // (KMP_ARCH_X86) 2731 #endif // KMP_HAVE_QUAD 2732 2733 // ------------------------------------------------------------------------ 2734 #ifdef KMP_GOMP_COMPAT 2735 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \ 2736 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2737 KMP_CHECK_GTID; \ 2738 OP_CRITICAL_CPT(OP, 0); \ 2739 } 2740 #else 2741 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) 2742 #endif /* KMP_GOMP_COMPAT */ 2743 // ------------------------------------------------------------------------ 2744 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2745 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2746 TYPE new_value; \ 2747 (void)new_value; \ 2748 OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \ 2749 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2750 } 2751 2752 // ------------------------------------------------------------------------ 2753 2754 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^, 2755 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt 2756 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^, 2757 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt 2758 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^, 2759 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt 2760 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^, 2761 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt 2762 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~, 2763 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt 2764 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~, 2765 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt 2766 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~, 2767 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt 2768 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~, 2769 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt 2770 2771 // ------------------------------------------------------------------------ 2772 // Routines for Extended types: long double, _Quad, complex flavours (use 2773 // critical section) 2774 // TYPE_ID, OP_ID, TYPE - detailed above 2775 // OP - operator 2776 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2777 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2778 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2779 TYPE new_value; \ 2780 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 2781 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \ 2782 } 2783 2784 // ------------------------------------------------------------------------ 2785 // Workaround for cmplx4. Regular routines with return value don't work 2786 // on Win_32e. Let's return captured values through the additional parameter. 2787 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \ 2788 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2789 \ 2790 if (flag) { \ 2791 (*lhs) OP rhs; \ 2792 (*out) = (*lhs); \ 2793 } else { \ 2794 (*out) = (*lhs); \ 2795 (*lhs) OP rhs; \ 2796 } \ 2797 \ 2798 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2799 return; 2800 // ------------------------------------------------------------------------ 2801 2802 #ifdef KMP_GOMP_COMPAT 2803 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \ 2804 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2805 KMP_CHECK_GTID; \ 2806 OP_CRITICAL_CPT_WRK(OP## =, 0); \ 2807 } 2808 #else 2809 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) 2810 #endif /* KMP_GOMP_COMPAT */ 2811 // ------------------------------------------------------------------------ 2812 2813 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2814 void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \ 2815 TYPE rhs, TYPE *out, int flag) { \ 2816 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2817 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2818 // ------------------------------------------------------------------------ 2819 2820 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2821 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2822 OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \ 2823 OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \ 2824 } 2825 // The end of workaround for cmplx4 2826 2827 /* ------------------------------------------------------------------------- */ 2828 // routines for long double type 2829 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r, 2830 1) // __kmpc_atomic_float10_add_cpt 2831 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r, 2832 1) // __kmpc_atomic_float10_sub_cpt 2833 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r, 2834 1) // __kmpc_atomic_float10_mul_cpt 2835 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r, 2836 1) // __kmpc_atomic_float10_div_cpt 2837 #if KMP_HAVE_QUAD 2838 // routines for _Quad type 2839 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r, 2840 1) // __kmpc_atomic_float16_add_cpt 2841 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r, 2842 1) // __kmpc_atomic_float16_sub_cpt 2843 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r, 2844 1) // __kmpc_atomic_float16_mul_cpt 2845 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r, 2846 1) // __kmpc_atomic_float16_div_cpt 2847 #if (KMP_ARCH_X86) 2848 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r, 2849 1) // __kmpc_atomic_float16_add_a16_cpt 2850 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r, 2851 1) // __kmpc_atomic_float16_sub_a16_cpt 2852 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r, 2853 1) // __kmpc_atomic_float16_mul_a16_cpt 2854 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r, 2855 1) // __kmpc_atomic_float16_div_a16_cpt 2856 #endif // (KMP_ARCH_X86) 2857 #endif // KMP_HAVE_QUAD 2858 2859 // routines for complex types 2860 2861 // cmplx4 routines to return void 2862 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c, 2863 1) // __kmpc_atomic_cmplx4_add_cpt 2864 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 2865 1) // __kmpc_atomic_cmplx4_sub_cpt 2866 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 2867 1) // __kmpc_atomic_cmplx4_mul_cpt 2868 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c, 2869 1) // __kmpc_atomic_cmplx4_div_cpt 2870 2871 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c, 2872 1) // __kmpc_atomic_cmplx8_add_cpt 2873 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 2874 1) // __kmpc_atomic_cmplx8_sub_cpt 2875 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 2876 1) // __kmpc_atomic_cmplx8_mul_cpt 2877 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c, 2878 1) // __kmpc_atomic_cmplx8_div_cpt 2879 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c, 2880 1) // __kmpc_atomic_cmplx10_add_cpt 2881 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 2882 1) // __kmpc_atomic_cmplx10_sub_cpt 2883 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 2884 1) // __kmpc_atomic_cmplx10_mul_cpt 2885 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c, 2886 1) // __kmpc_atomic_cmplx10_div_cpt 2887 #if KMP_HAVE_QUAD 2888 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c, 2889 1) // __kmpc_atomic_cmplx16_add_cpt 2890 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 2891 1) // __kmpc_atomic_cmplx16_sub_cpt 2892 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 2893 1) // __kmpc_atomic_cmplx16_mul_cpt 2894 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c, 2895 1) // __kmpc_atomic_cmplx16_div_cpt 2896 #if (KMP_ARCH_X86) 2897 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 2898 1) // __kmpc_atomic_cmplx16_add_a16_cpt 2899 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 2900 1) // __kmpc_atomic_cmplx16_sub_a16_cpt 2901 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 2902 1) // __kmpc_atomic_cmplx16_mul_a16_cpt 2903 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 2904 1) // __kmpc_atomic_cmplx16_div_a16_cpt 2905 #endif // (KMP_ARCH_X86) 2906 #endif // KMP_HAVE_QUAD 2907 2908 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr 2909 // binop x; v = x; } for non-commutative operations. 2910 // Supported only on IA-32 architecture and Intel(R) 64 2911 2912 // ------------------------------------------------------------------------- 2913 // Operation on *lhs, rhs bound by critical section 2914 // OP - operator (it's supposed to contain an assignment) 2915 // LCK_ID - lock identifier 2916 // Note: don't check gtid as it should always be valid 2917 // 1, 2-byte - expect valid parameter, other - check before this macro 2918 #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \ 2919 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2920 \ 2921 if (flag) { \ 2922 /*temp_val = (*lhs);*/ \ 2923 (*lhs) = (TYPE)((rhs)OP(*lhs)); \ 2924 new_value = (*lhs); \ 2925 } else { \ 2926 new_value = (*lhs); \ 2927 (*lhs) = (TYPE)((rhs)OP(*lhs)); \ 2928 } \ 2929 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2930 return new_value; 2931 2932 // ------------------------------------------------------------------------ 2933 #ifdef KMP_GOMP_COMPAT 2934 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \ 2935 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2936 KMP_CHECK_GTID; \ 2937 OP_CRITICAL_CPT_REV(TYPE, OP, 0); \ 2938 } 2939 #else 2940 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) 2941 #endif /* KMP_GOMP_COMPAT */ 2942 2943 // ------------------------------------------------------------------------ 2944 // Operation on *lhs, rhs using "compare_and_store" routine 2945 // TYPE - operands' type 2946 // BITS - size in bits, used to distinguish low level calls 2947 // OP - operator 2948 // Note: temp_val introduced in order to force the compiler to read 2949 // *lhs only once (w/o it the compiler reads *lhs twice) 2950 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2951 { \ 2952 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2953 TYPE old_value, new_value; \ 2954 temp_val = *lhs; \ 2955 old_value = temp_val; \ 2956 new_value = (TYPE)(rhs OP old_value); \ 2957 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2958 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2959 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2960 temp_val = *lhs; \ 2961 old_value = temp_val; \ 2962 new_value = (TYPE)(rhs OP old_value); \ 2963 } \ 2964 if (flag) { \ 2965 return new_value; \ 2966 } else \ 2967 return old_value; \ 2968 } 2969 2970 // ------------------------------------------------------------------------- 2971 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2972 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2973 TYPE new_value; \ 2974 (void)new_value; \ 2975 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \ 2976 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2977 } 2978 2979 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /, 2980 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev 2981 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /, 2982 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev 2983 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<, 2984 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev 2985 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>, 2986 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev 2987 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, 2988 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev 2989 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -, 2990 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev 2991 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /, 2992 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev 2993 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /, 2994 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev 2995 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<, 2996 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev 2997 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>, 2998 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev 2999 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, 3000 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev 3001 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -, 3002 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev 3003 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /, 3004 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev 3005 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /, 3006 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev 3007 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<, 3008 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev 3009 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>, 3010 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev 3011 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, 3012 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev 3013 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -, 3014 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev 3015 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /, 3016 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev 3017 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /, 3018 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev 3019 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<, 3020 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev 3021 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>, 3022 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev 3023 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, 3024 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev 3025 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -, 3026 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev 3027 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /, 3028 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev 3029 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -, 3030 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev 3031 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /, 3032 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev 3033 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -, 3034 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev 3035 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 3036 3037 // ------------------------------------------------------------------------ 3038 // Routines for Extended types: long double, _Quad, complex flavours (use 3039 // critical section) 3040 // TYPE_ID, OP_ID, TYPE - detailed above 3041 // OP - operator 3042 // LCK_ID - lock identifier, used to possibly distinguish lock variable 3043 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 3044 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 3045 TYPE new_value; \ 3046 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \ 3047 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \ 3048 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \ 3049 } 3050 3051 /* ------------------------------------------------------------------------- */ 3052 // routines for long double type 3053 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r, 3054 1) // __kmpc_atomic_float10_sub_cpt_rev 3055 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r, 3056 1) // __kmpc_atomic_float10_div_cpt_rev 3057 #if KMP_HAVE_QUAD 3058 // routines for _Quad type 3059 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 3060 1) // __kmpc_atomic_float16_sub_cpt_rev 3061 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 3062 1) // __kmpc_atomic_float16_div_cpt_rev 3063 #if (KMP_ARCH_X86) 3064 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 3065 1) // __kmpc_atomic_float16_sub_a16_cpt_rev 3066 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 3067 1) // __kmpc_atomic_float16_div_a16_cpt_rev 3068 #endif // (KMP_ARCH_X86) 3069 #endif // KMP_HAVE_QUAD 3070 3071 // routines for complex types 3072 3073 // ------------------------------------------------------------------------ 3074 // Workaround for cmplx4. Regular routines with return value don't work 3075 // on Win_32e. Let's return captured values through the additional parameter. 3076 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3077 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3078 \ 3079 if (flag) { \ 3080 (*lhs) = (rhs)OP(*lhs); \ 3081 (*out) = (*lhs); \ 3082 } else { \ 3083 (*out) = (*lhs); \ 3084 (*lhs) = (rhs)OP(*lhs); \ 3085 } \ 3086 \ 3087 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3088 return; 3089 // ------------------------------------------------------------------------ 3090 3091 #ifdef KMP_GOMP_COMPAT 3092 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \ 3093 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3094 KMP_CHECK_GTID; \ 3095 OP_CRITICAL_CPT_REV_WRK(OP, 0); \ 3096 } 3097 #else 3098 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) 3099 #endif /* KMP_GOMP_COMPAT */ 3100 // ------------------------------------------------------------------------ 3101 3102 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \ 3103 GOMP_FLAG) \ 3104 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 3105 OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \ 3106 OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3107 } 3108 // The end of workaround for cmplx4 3109 3110 // !!! TODO: check if we need to return void for cmplx4 routines 3111 // cmplx4 routines to return void 3112 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 3113 1) // __kmpc_atomic_cmplx4_sub_cpt_rev 3114 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 3115 1) // __kmpc_atomic_cmplx4_div_cpt_rev 3116 3117 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 3118 1) // __kmpc_atomic_cmplx8_sub_cpt_rev 3119 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 3120 1) // __kmpc_atomic_cmplx8_div_cpt_rev 3121 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 3122 1) // __kmpc_atomic_cmplx10_sub_cpt_rev 3123 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 3124 1) // __kmpc_atomic_cmplx10_div_cpt_rev 3125 #if KMP_HAVE_QUAD 3126 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 3127 1) // __kmpc_atomic_cmplx16_sub_cpt_rev 3128 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 3129 1) // __kmpc_atomic_cmplx16_div_cpt_rev 3130 #if (KMP_ARCH_X86) 3131 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 3132 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev 3133 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 3134 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev 3135 #endif // (KMP_ARCH_X86) 3136 #endif // KMP_HAVE_QUAD 3137 3138 // Capture reverse for mixed type: RHS=float16 3139 #if KMP_HAVE_QUAD 3140 3141 // Beginning of a definition (provides name, parameters, gebug trace) 3142 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 3143 // fixed) 3144 // OP_ID - operation identifier (add, sub, mul, ...) 3145 // TYPE - operands' type 3146 // ------------------------------------------------------------------------- 3147 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 3148 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 3149 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3150 TYPE new_value; \ 3151 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \ 3152 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 3153 } 3154 3155 // ------------------------------------------------------------------------- 3156 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 3157 LCK_ID, GOMP_FLAG) \ 3158 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3159 TYPE new_value; \ 3160 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 3161 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \ 3162 } 3163 3164 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3165 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp 3166 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3167 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp 3168 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3169 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp 3170 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3171 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp 3172 3173 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, 3174 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp 3175 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i, 3176 1, 3177 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp 3178 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1, 3179 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp 3180 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i, 3181 1, 3182 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp 3183 3184 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i, 3185 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp 3186 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad, 3187 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp 3188 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i, 3189 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp 3190 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad, 3191 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp 3192 3193 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i, 3194 7, 3195 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp 3196 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad, 3197 8i, 7, 3198 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp 3199 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i, 3200 7, 3201 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp 3202 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad, 3203 8i, 7, 3204 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp 3205 3206 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad, 3207 4r, 3, 3208 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp 3209 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad, 3210 4r, 3, 3211 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp 3212 3213 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad, 3214 8r, 7, 3215 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp 3216 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad, 3217 8r, 7, 3218 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp 3219 3220 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad, 3221 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp 3222 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad, 3223 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp 3224 3225 #endif // KMP_HAVE_QUAD 3226 3227 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} 3228 3229 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3230 TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3231 TYPE rhs) { \ 3232 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3233 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3234 3235 #define CRITICAL_SWP(LCK_ID) \ 3236 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3237 \ 3238 old_value = (*lhs); \ 3239 (*lhs) = rhs; \ 3240 \ 3241 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3242 return old_value; 3243 3244 // ------------------------------------------------------------------------ 3245 #ifdef KMP_GOMP_COMPAT 3246 #define GOMP_CRITICAL_SWP(FLAG) \ 3247 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3248 KMP_CHECK_GTID; \ 3249 CRITICAL_SWP(0); \ 3250 } 3251 #else 3252 #define GOMP_CRITICAL_SWP(FLAG) 3253 #endif /* KMP_GOMP_COMPAT */ 3254 3255 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3256 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3257 TYPE old_value; \ 3258 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3259 old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \ 3260 return old_value; \ 3261 } 3262 // ------------------------------------------------------------------------ 3263 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3264 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3265 TYPE old_value; \ 3266 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3267 old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \ 3268 return old_value; \ 3269 } 3270 3271 // ------------------------------------------------------------------------ 3272 #define CMPXCHG_SWP(TYPE, BITS) \ 3273 { \ 3274 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 3275 TYPE old_value, new_value; \ 3276 temp_val = *lhs; \ 3277 old_value = temp_val; \ 3278 new_value = rhs; \ 3279 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 3280 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 3281 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 3282 temp_val = *lhs; \ 3283 old_value = temp_val; \ 3284 new_value = rhs; \ 3285 } \ 3286 return old_value; \ 3287 } 3288 3289 // ------------------------------------------------------------------------- 3290 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3291 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3292 TYPE old_value; \ 3293 (void)old_value; \ 3294 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3295 CMPXCHG_SWP(TYPE, BITS) \ 3296 } 3297 3298 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp 3299 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp 3300 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp 3301 3302 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32, 3303 KMP_ARCH_X86) // __kmpc_atomic_float4_swp 3304 3305 #if (KMP_ARCH_X86) 3306 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64, 3307 KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3308 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64, 3309 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3310 #else 3311 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3312 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64, 3313 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3314 #endif // (KMP_ARCH_X86) 3315 3316 // ------------------------------------------------------------------------ 3317 // Routines for Extended types: long double, _Quad, complex flavours (use 3318 // critical section) 3319 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3320 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3321 TYPE old_value; \ 3322 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3323 CRITICAL_SWP(LCK_ID) \ 3324 } 3325 3326 // ------------------------------------------------------------------------ 3327 // !!! TODO: check if we need to return void for cmplx4 routines 3328 // Workaround for cmplx4. Regular routines with return value don't work 3329 // on Win_32e. Let's return captured values through the additional parameter. 3330 3331 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3332 void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3333 TYPE rhs, TYPE *out) { \ 3334 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3335 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3336 3337 #define CRITICAL_SWP_WRK(LCK_ID) \ 3338 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3339 \ 3340 tmp = (*lhs); \ 3341 (*lhs) = (rhs); \ 3342 (*out) = tmp; \ 3343 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3344 return; 3345 // ------------------------------------------------------------------------ 3346 3347 #ifdef KMP_GOMP_COMPAT 3348 #define GOMP_CRITICAL_SWP_WRK(FLAG) \ 3349 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3350 KMP_CHECK_GTID; \ 3351 CRITICAL_SWP_WRK(0); \ 3352 } 3353 #else 3354 #define GOMP_CRITICAL_SWP_WRK(FLAG) 3355 #endif /* KMP_GOMP_COMPAT */ 3356 // ------------------------------------------------------------------------ 3357 3358 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3359 ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3360 TYPE tmp; \ 3361 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \ 3362 CRITICAL_SWP_WRK(LCK_ID) \ 3363 } 3364 // The end of workaround for cmplx4 3365 3366 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp 3367 #if KMP_HAVE_QUAD 3368 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp 3369 #endif // KMP_HAVE_QUAD 3370 // cmplx4 routine to return void 3371 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp 3372 3373 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // 3374 // __kmpc_atomic_cmplx4_swp 3375 3376 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp 3377 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp 3378 #if KMP_HAVE_QUAD 3379 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp 3380 #if (KMP_ARCH_X86) 3381 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r, 3382 1) // __kmpc_atomic_float16_a16_swp 3383 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c, 3384 1) // __kmpc_atomic_cmplx16_a16_swp 3385 #endif // (KMP_ARCH_X86) 3386 #endif // KMP_HAVE_QUAD 3387 3388 // End of OpenMP 4.0 Capture 3389 3390 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3391 3392 #undef OP_CRITICAL 3393 3394 /* ------------------------------------------------------------------------ */ 3395 /* Generic atomic routines */ 3396 3397 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3398 void (*f)(void *, void *, void *)) { 3399 KMP_DEBUG_ASSERT(__kmp_init_serial); 3400 3401 if ( 3402 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3403 FALSE /* must use lock */ 3404 #else 3405 TRUE 3406 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3407 ) { 3408 kmp_int8 old_value, new_value; 3409 3410 old_value = *(kmp_int8 *)lhs; 3411 (*f)(&new_value, &old_value, rhs); 3412 3413 /* TODO: Should this be acquire or release? */ 3414 while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value, 3415 *(kmp_int8 *)&new_value)) { 3416 KMP_CPU_PAUSE(); 3417 3418 old_value = *(kmp_int8 *)lhs; 3419 (*f)(&new_value, &old_value, rhs); 3420 } 3421 3422 return; 3423 } else { 3424 // All 1-byte data is of integer data type. 3425 3426 #ifdef KMP_GOMP_COMPAT 3427 if (__kmp_atomic_mode == 2) { 3428 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3429 } else 3430 #endif /* KMP_GOMP_COMPAT */ 3431 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3432 3433 (*f)(lhs, lhs, rhs); 3434 3435 #ifdef KMP_GOMP_COMPAT 3436 if (__kmp_atomic_mode == 2) { 3437 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3438 } else 3439 #endif /* KMP_GOMP_COMPAT */ 3440 __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3441 } 3442 } 3443 3444 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3445 void (*f)(void *, void *, void *)) { 3446 if ( 3447 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3448 FALSE /* must use lock */ 3449 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3450 TRUE /* no alignment problems */ 3451 #else 3452 !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */ 3453 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3454 ) { 3455 kmp_int16 old_value, new_value; 3456 3457 old_value = *(kmp_int16 *)lhs; 3458 (*f)(&new_value, &old_value, rhs); 3459 3460 /* TODO: Should this be acquire or release? */ 3461 while (!KMP_COMPARE_AND_STORE_ACQ16( 3462 (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) { 3463 KMP_CPU_PAUSE(); 3464 3465 old_value = *(kmp_int16 *)lhs; 3466 (*f)(&new_value, &old_value, rhs); 3467 } 3468 3469 return; 3470 } else { 3471 // All 2-byte data is of integer data type. 3472 3473 #ifdef KMP_GOMP_COMPAT 3474 if (__kmp_atomic_mode == 2) { 3475 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3476 } else 3477 #endif /* KMP_GOMP_COMPAT */ 3478 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3479 3480 (*f)(lhs, lhs, rhs); 3481 3482 #ifdef KMP_GOMP_COMPAT 3483 if (__kmp_atomic_mode == 2) { 3484 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3485 } else 3486 #endif /* KMP_GOMP_COMPAT */ 3487 __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3488 } 3489 } 3490 3491 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3492 void (*f)(void *, void *, void *)) { 3493 KMP_DEBUG_ASSERT(__kmp_init_serial); 3494 3495 if ( 3496 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. 3497 // Gomp compatibility is broken if this routine is called for floats. 3498 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3499 TRUE /* no alignment problems */ 3500 #else 3501 !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */ 3502 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3503 ) { 3504 kmp_int32 old_value, new_value; 3505 3506 old_value = *(kmp_int32 *)lhs; 3507 (*f)(&new_value, &old_value, rhs); 3508 3509 /* TODO: Should this be acquire or release? */ 3510 while (!KMP_COMPARE_AND_STORE_ACQ32( 3511 (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) { 3512 KMP_CPU_PAUSE(); 3513 3514 old_value = *(kmp_int32 *)lhs; 3515 (*f)(&new_value, &old_value, rhs); 3516 } 3517 3518 return; 3519 } else { 3520 // Use __kmp_atomic_lock_4i for all 4-byte data, 3521 // even if it isn't of integer data type. 3522 3523 #ifdef KMP_GOMP_COMPAT 3524 if (__kmp_atomic_mode == 2) { 3525 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3526 } else 3527 #endif /* KMP_GOMP_COMPAT */ 3528 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3529 3530 (*f)(lhs, lhs, rhs); 3531 3532 #ifdef KMP_GOMP_COMPAT 3533 if (__kmp_atomic_mode == 2) { 3534 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3535 } else 3536 #endif /* KMP_GOMP_COMPAT */ 3537 __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3538 } 3539 } 3540 3541 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3542 void (*f)(void *, void *, void *)) { 3543 KMP_DEBUG_ASSERT(__kmp_init_serial); 3544 if ( 3545 3546 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3547 FALSE /* must use lock */ 3548 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3549 TRUE /* no alignment problems */ 3550 #else 3551 !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */ 3552 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3553 ) { 3554 kmp_int64 old_value, new_value; 3555 3556 old_value = *(kmp_int64 *)lhs; 3557 (*f)(&new_value, &old_value, rhs); 3558 /* TODO: Should this be acquire or release? */ 3559 while (!KMP_COMPARE_AND_STORE_ACQ64( 3560 (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) { 3561 KMP_CPU_PAUSE(); 3562 3563 old_value = *(kmp_int64 *)lhs; 3564 (*f)(&new_value, &old_value, rhs); 3565 } 3566 3567 return; 3568 } else { 3569 // Use __kmp_atomic_lock_8i for all 8-byte data, 3570 // even if it isn't of integer data type. 3571 3572 #ifdef KMP_GOMP_COMPAT 3573 if (__kmp_atomic_mode == 2) { 3574 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3575 } else 3576 #endif /* KMP_GOMP_COMPAT */ 3577 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3578 3579 (*f)(lhs, lhs, rhs); 3580 3581 #ifdef KMP_GOMP_COMPAT 3582 if (__kmp_atomic_mode == 2) { 3583 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3584 } else 3585 #endif /* KMP_GOMP_COMPAT */ 3586 __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3587 } 3588 } 3589 3590 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3591 void (*f)(void *, void *, void *)) { 3592 KMP_DEBUG_ASSERT(__kmp_init_serial); 3593 3594 #ifdef KMP_GOMP_COMPAT 3595 if (__kmp_atomic_mode == 2) { 3596 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3597 } else 3598 #endif /* KMP_GOMP_COMPAT */ 3599 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3600 3601 (*f)(lhs, lhs, rhs); 3602 3603 #ifdef KMP_GOMP_COMPAT 3604 if (__kmp_atomic_mode == 2) { 3605 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3606 } else 3607 #endif /* KMP_GOMP_COMPAT */ 3608 __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3609 } 3610 3611 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3612 void (*f)(void *, void *, void *)) { 3613 KMP_DEBUG_ASSERT(__kmp_init_serial); 3614 3615 #ifdef KMP_GOMP_COMPAT 3616 if (__kmp_atomic_mode == 2) { 3617 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3618 } else 3619 #endif /* KMP_GOMP_COMPAT */ 3620 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3621 3622 (*f)(lhs, lhs, rhs); 3623 3624 #ifdef KMP_GOMP_COMPAT 3625 if (__kmp_atomic_mode == 2) { 3626 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3627 } else 3628 #endif /* KMP_GOMP_COMPAT */ 3629 __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3630 } 3631 3632 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3633 void (*f)(void *, void *, void *)) { 3634 KMP_DEBUG_ASSERT(__kmp_init_serial); 3635 3636 #ifdef KMP_GOMP_COMPAT 3637 if (__kmp_atomic_mode == 2) { 3638 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3639 } else 3640 #endif /* KMP_GOMP_COMPAT */ 3641 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3642 3643 (*f)(lhs, lhs, rhs); 3644 3645 #ifdef KMP_GOMP_COMPAT 3646 if (__kmp_atomic_mode == 2) { 3647 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3648 } else 3649 #endif /* KMP_GOMP_COMPAT */ 3650 __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3651 } 3652 3653 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3654 void (*f)(void *, void *, void *)) { 3655 KMP_DEBUG_ASSERT(__kmp_init_serial); 3656 3657 #ifdef KMP_GOMP_COMPAT 3658 if (__kmp_atomic_mode == 2) { 3659 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3660 } else 3661 #endif /* KMP_GOMP_COMPAT */ 3662 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3663 3664 (*f)(lhs, lhs, rhs); 3665 3666 #ifdef KMP_GOMP_COMPAT 3667 if (__kmp_atomic_mode == 2) { 3668 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3669 } else 3670 #endif /* KMP_GOMP_COMPAT */ 3671 __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3672 } 3673 3674 // AC: same two routines as GOMP_atomic_start/end, but will be called by our 3675 // compiler; duplicated in order to not use 3-party names in pure Intel code 3676 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin. 3677 void __kmpc_atomic_start(void) { 3678 int gtid = __kmp_entry_gtid(); 3679 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid)); 3680 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3681 } 3682 3683 void __kmpc_atomic_end(void) { 3684 int gtid = __kmp_get_gtid(); 3685 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid)); 3686 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3687 } 3688 3689 /*! 3690 @} 3691 */ 3692 3693 // end of file 3694