1 /* 2 * kmp_atomic.cpp -- ATOMIC implementation routines 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "kmp_atomic.h" 14 #include "kmp.h" // TRUE, asm routines prototypes 15 16 typedef unsigned char uchar; 17 typedef unsigned short ushort; 18 19 /*! 20 @defgroup ATOMIC_OPS Atomic Operations 21 These functions are used for implementing the many different varieties of atomic 22 operations. 23 24 The compiler is at liberty to inline atomic operations that are naturally 25 supported by the target architecture. For instance on IA-32 architecture an 26 atomic like this can be inlined 27 @code 28 static int s = 0; 29 #pragma omp atomic 30 s++; 31 @endcode 32 using the single instruction: `lock; incl s` 33 34 However the runtime does provide entrypoints for these operations to support 35 compilers that choose not to inline them. (For instance, 36 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.) 37 38 The names of the functions are encoded by using the data type name and the 39 operation name, as in these tables. 40 41 Data Type | Data type encoding 42 -----------|--------------- 43 int8_t | `fixed1` 44 uint8_t | `fixed1u` 45 int16_t | `fixed2` 46 uint16_t | `fixed2u` 47 int32_t | `fixed4` 48 uint32_t | `fixed4u` 49 int32_t | `fixed8` 50 uint32_t | `fixed8u` 51 float | `float4` 52 double | `float8` 53 float 10 (8087 eighty bit float) | `float10` 54 complex<float> | `cmplx4` 55 complex<double> | `cmplx8` 56 complex<float10> | `cmplx10` 57 <br> 58 59 Operation | Operation encoding 60 ----------|------------------- 61 + | add 62 - | sub 63 \* | mul 64 / | div 65 & | andb 66 << | shl 67 \>\> | shr 68 \| | orb 69 ^ | xor 70 && | andl 71 \|\| | orl 72 maximum | max 73 minimum | min 74 .eqv. | eqv 75 .neqv. | neqv 76 77 <br> 78 For non-commutative operations, `_rev` can also be added for the reversed 79 operation. For the functions that capture the result, the suffix `_cpt` is 80 added. 81 82 Update Functions 83 ================ 84 The general form of an atomic function that just performs an update (without a 85 `capture`) 86 @code 87 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE * 88 lhs, TYPE rhs ); 89 @endcode 90 @param ident_t a pointer to source location 91 @param gtid the global thread id 92 @param lhs a pointer to the left operand 93 @param rhs the right operand 94 95 `capture` functions 96 =================== 97 The capture functions perform an atomic update and return a result, which is 98 either the value before the capture, or that after. They take an additional 99 argument to determine which result is returned. 100 Their general form is therefore 101 @code 102 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE * 103 lhs, TYPE rhs, int flag ); 104 @endcode 105 @param ident_t a pointer to source location 106 @param gtid the global thread id 107 @param lhs a pointer to the left operand 108 @param rhs the right operand 109 @param flag one if the result is to be captured *after* the operation, zero if 110 captured *before*. 111 112 The one set of exceptions to this is the `complex<float>` type where the value 113 is not returned, rather an extra argument pointer is passed. 114 115 They look like 116 @code 117 void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * 118 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); 119 @endcode 120 121 Read and Write Operations 122 ========================= 123 The OpenMP<sup>*</sup> standard now supports atomic operations that simply 124 ensure that the value is read or written atomically, with no modification 125 performed. In many cases on IA-32 architecture these operations can be inlined 126 since the architecture guarantees that no tearing occurs on aligned objects 127 accessed with a single memory operation of up to 64 bits in size. 128 129 The general form of the read operations is 130 @code 131 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc ); 132 @endcode 133 134 For the write operations the form is 135 @code 136 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs 137 ); 138 @endcode 139 140 Full list of functions 141 ====================== 142 This leads to the generation of 376 atomic functions, as follows. 143 144 Functions for integers 145 --------------------- 146 There are versions here for integers of size 1,2,4 and 8 bytes both signed and 147 unsigned (where that matters). 148 @code 149 __kmpc_atomic_fixed1_add 150 __kmpc_atomic_fixed1_add_cpt 151 __kmpc_atomic_fixed1_add_fp 152 __kmpc_atomic_fixed1_andb 153 __kmpc_atomic_fixed1_andb_cpt 154 __kmpc_atomic_fixed1_andl 155 __kmpc_atomic_fixed1_andl_cpt 156 __kmpc_atomic_fixed1_div 157 __kmpc_atomic_fixed1_div_cpt 158 __kmpc_atomic_fixed1_div_cpt_rev 159 __kmpc_atomic_fixed1_div_float8 160 __kmpc_atomic_fixed1_div_fp 161 __kmpc_atomic_fixed1_div_rev 162 __kmpc_atomic_fixed1_eqv 163 __kmpc_atomic_fixed1_eqv_cpt 164 __kmpc_atomic_fixed1_max 165 __kmpc_atomic_fixed1_max_cpt 166 __kmpc_atomic_fixed1_min 167 __kmpc_atomic_fixed1_min_cpt 168 __kmpc_atomic_fixed1_mul 169 __kmpc_atomic_fixed1_mul_cpt 170 __kmpc_atomic_fixed1_mul_float8 171 __kmpc_atomic_fixed1_mul_fp 172 __kmpc_atomic_fixed1_neqv 173 __kmpc_atomic_fixed1_neqv_cpt 174 __kmpc_atomic_fixed1_orb 175 __kmpc_atomic_fixed1_orb_cpt 176 __kmpc_atomic_fixed1_orl 177 __kmpc_atomic_fixed1_orl_cpt 178 __kmpc_atomic_fixed1_rd 179 __kmpc_atomic_fixed1_shl 180 __kmpc_atomic_fixed1_shl_cpt 181 __kmpc_atomic_fixed1_shl_cpt_rev 182 __kmpc_atomic_fixed1_shl_rev 183 __kmpc_atomic_fixed1_shr 184 __kmpc_atomic_fixed1_shr_cpt 185 __kmpc_atomic_fixed1_shr_cpt_rev 186 __kmpc_atomic_fixed1_shr_rev 187 __kmpc_atomic_fixed1_sub 188 __kmpc_atomic_fixed1_sub_cpt 189 __kmpc_atomic_fixed1_sub_cpt_rev 190 __kmpc_atomic_fixed1_sub_fp 191 __kmpc_atomic_fixed1_sub_rev 192 __kmpc_atomic_fixed1_swp 193 __kmpc_atomic_fixed1_wr 194 __kmpc_atomic_fixed1_xor 195 __kmpc_atomic_fixed1_xor_cpt 196 __kmpc_atomic_fixed1u_add_fp 197 __kmpc_atomic_fixed1u_sub_fp 198 __kmpc_atomic_fixed1u_mul_fp 199 __kmpc_atomic_fixed1u_div 200 __kmpc_atomic_fixed1u_div_cpt 201 __kmpc_atomic_fixed1u_div_cpt_rev 202 __kmpc_atomic_fixed1u_div_fp 203 __kmpc_atomic_fixed1u_div_rev 204 __kmpc_atomic_fixed1u_shr 205 __kmpc_atomic_fixed1u_shr_cpt 206 __kmpc_atomic_fixed1u_shr_cpt_rev 207 __kmpc_atomic_fixed1u_shr_rev 208 __kmpc_atomic_fixed2_add 209 __kmpc_atomic_fixed2_add_cpt 210 __kmpc_atomic_fixed2_add_fp 211 __kmpc_atomic_fixed2_andb 212 __kmpc_atomic_fixed2_andb_cpt 213 __kmpc_atomic_fixed2_andl 214 __kmpc_atomic_fixed2_andl_cpt 215 __kmpc_atomic_fixed2_div 216 __kmpc_atomic_fixed2_div_cpt 217 __kmpc_atomic_fixed2_div_cpt_rev 218 __kmpc_atomic_fixed2_div_float8 219 __kmpc_atomic_fixed2_div_fp 220 __kmpc_atomic_fixed2_div_rev 221 __kmpc_atomic_fixed2_eqv 222 __kmpc_atomic_fixed2_eqv_cpt 223 __kmpc_atomic_fixed2_max 224 __kmpc_atomic_fixed2_max_cpt 225 __kmpc_atomic_fixed2_min 226 __kmpc_atomic_fixed2_min_cpt 227 __kmpc_atomic_fixed2_mul 228 __kmpc_atomic_fixed2_mul_cpt 229 __kmpc_atomic_fixed2_mul_float8 230 __kmpc_atomic_fixed2_mul_fp 231 __kmpc_atomic_fixed2_neqv 232 __kmpc_atomic_fixed2_neqv_cpt 233 __kmpc_atomic_fixed2_orb 234 __kmpc_atomic_fixed2_orb_cpt 235 __kmpc_atomic_fixed2_orl 236 __kmpc_atomic_fixed2_orl_cpt 237 __kmpc_atomic_fixed2_rd 238 __kmpc_atomic_fixed2_shl 239 __kmpc_atomic_fixed2_shl_cpt 240 __kmpc_atomic_fixed2_shl_cpt_rev 241 __kmpc_atomic_fixed2_shl_rev 242 __kmpc_atomic_fixed2_shr 243 __kmpc_atomic_fixed2_shr_cpt 244 __kmpc_atomic_fixed2_shr_cpt_rev 245 __kmpc_atomic_fixed2_shr_rev 246 __kmpc_atomic_fixed2_sub 247 __kmpc_atomic_fixed2_sub_cpt 248 __kmpc_atomic_fixed2_sub_cpt_rev 249 __kmpc_atomic_fixed2_sub_fp 250 __kmpc_atomic_fixed2_sub_rev 251 __kmpc_atomic_fixed2_swp 252 __kmpc_atomic_fixed2_wr 253 __kmpc_atomic_fixed2_xor 254 __kmpc_atomic_fixed2_xor_cpt 255 __kmpc_atomic_fixed2u_add_fp 256 __kmpc_atomic_fixed2u_sub_fp 257 __kmpc_atomic_fixed2u_mul_fp 258 __kmpc_atomic_fixed2u_div 259 __kmpc_atomic_fixed2u_div_cpt 260 __kmpc_atomic_fixed2u_div_cpt_rev 261 __kmpc_atomic_fixed2u_div_fp 262 __kmpc_atomic_fixed2u_div_rev 263 __kmpc_atomic_fixed2u_shr 264 __kmpc_atomic_fixed2u_shr_cpt 265 __kmpc_atomic_fixed2u_shr_cpt_rev 266 __kmpc_atomic_fixed2u_shr_rev 267 __kmpc_atomic_fixed4_add 268 __kmpc_atomic_fixed4_add_cpt 269 __kmpc_atomic_fixed4_add_fp 270 __kmpc_atomic_fixed4_andb 271 __kmpc_atomic_fixed4_andb_cpt 272 __kmpc_atomic_fixed4_andl 273 __kmpc_atomic_fixed4_andl_cpt 274 __kmpc_atomic_fixed4_div 275 __kmpc_atomic_fixed4_div_cpt 276 __kmpc_atomic_fixed4_div_cpt_rev 277 __kmpc_atomic_fixed4_div_float8 278 __kmpc_atomic_fixed4_div_fp 279 __kmpc_atomic_fixed4_div_rev 280 __kmpc_atomic_fixed4_eqv 281 __kmpc_atomic_fixed4_eqv_cpt 282 __kmpc_atomic_fixed4_max 283 __kmpc_atomic_fixed4_max_cpt 284 __kmpc_atomic_fixed4_min 285 __kmpc_atomic_fixed4_min_cpt 286 __kmpc_atomic_fixed4_mul 287 __kmpc_atomic_fixed4_mul_cpt 288 __kmpc_atomic_fixed4_mul_float8 289 __kmpc_atomic_fixed4_mul_fp 290 __kmpc_atomic_fixed4_neqv 291 __kmpc_atomic_fixed4_neqv_cpt 292 __kmpc_atomic_fixed4_orb 293 __kmpc_atomic_fixed4_orb_cpt 294 __kmpc_atomic_fixed4_orl 295 __kmpc_atomic_fixed4_orl_cpt 296 __kmpc_atomic_fixed4_rd 297 __kmpc_atomic_fixed4_shl 298 __kmpc_atomic_fixed4_shl_cpt 299 __kmpc_atomic_fixed4_shl_cpt_rev 300 __kmpc_atomic_fixed4_shl_rev 301 __kmpc_atomic_fixed4_shr 302 __kmpc_atomic_fixed4_shr_cpt 303 __kmpc_atomic_fixed4_shr_cpt_rev 304 __kmpc_atomic_fixed4_shr_rev 305 __kmpc_atomic_fixed4_sub 306 __kmpc_atomic_fixed4_sub_cpt 307 __kmpc_atomic_fixed4_sub_cpt_rev 308 __kmpc_atomic_fixed4_sub_fp 309 __kmpc_atomic_fixed4_sub_rev 310 __kmpc_atomic_fixed4_swp 311 __kmpc_atomic_fixed4_wr 312 __kmpc_atomic_fixed4_xor 313 __kmpc_atomic_fixed4_xor_cpt 314 __kmpc_atomic_fixed4u_add_fp 315 __kmpc_atomic_fixed4u_sub_fp 316 __kmpc_atomic_fixed4u_mul_fp 317 __kmpc_atomic_fixed4u_div 318 __kmpc_atomic_fixed4u_div_cpt 319 __kmpc_atomic_fixed4u_div_cpt_rev 320 __kmpc_atomic_fixed4u_div_fp 321 __kmpc_atomic_fixed4u_div_rev 322 __kmpc_atomic_fixed4u_shr 323 __kmpc_atomic_fixed4u_shr_cpt 324 __kmpc_atomic_fixed4u_shr_cpt_rev 325 __kmpc_atomic_fixed4u_shr_rev 326 __kmpc_atomic_fixed8_add 327 __kmpc_atomic_fixed8_add_cpt 328 __kmpc_atomic_fixed8_add_fp 329 __kmpc_atomic_fixed8_andb 330 __kmpc_atomic_fixed8_andb_cpt 331 __kmpc_atomic_fixed8_andl 332 __kmpc_atomic_fixed8_andl_cpt 333 __kmpc_atomic_fixed8_div 334 __kmpc_atomic_fixed8_div_cpt 335 __kmpc_atomic_fixed8_div_cpt_rev 336 __kmpc_atomic_fixed8_div_float8 337 __kmpc_atomic_fixed8_div_fp 338 __kmpc_atomic_fixed8_div_rev 339 __kmpc_atomic_fixed8_eqv 340 __kmpc_atomic_fixed8_eqv_cpt 341 __kmpc_atomic_fixed8_max 342 __kmpc_atomic_fixed8_max_cpt 343 __kmpc_atomic_fixed8_min 344 __kmpc_atomic_fixed8_min_cpt 345 __kmpc_atomic_fixed8_mul 346 __kmpc_atomic_fixed8_mul_cpt 347 __kmpc_atomic_fixed8_mul_float8 348 __kmpc_atomic_fixed8_mul_fp 349 __kmpc_atomic_fixed8_neqv 350 __kmpc_atomic_fixed8_neqv_cpt 351 __kmpc_atomic_fixed8_orb 352 __kmpc_atomic_fixed8_orb_cpt 353 __kmpc_atomic_fixed8_orl 354 __kmpc_atomic_fixed8_orl_cpt 355 __kmpc_atomic_fixed8_rd 356 __kmpc_atomic_fixed8_shl 357 __kmpc_atomic_fixed8_shl_cpt 358 __kmpc_atomic_fixed8_shl_cpt_rev 359 __kmpc_atomic_fixed8_shl_rev 360 __kmpc_atomic_fixed8_shr 361 __kmpc_atomic_fixed8_shr_cpt 362 __kmpc_atomic_fixed8_shr_cpt_rev 363 __kmpc_atomic_fixed8_shr_rev 364 __kmpc_atomic_fixed8_sub 365 __kmpc_atomic_fixed8_sub_cpt 366 __kmpc_atomic_fixed8_sub_cpt_rev 367 __kmpc_atomic_fixed8_sub_fp 368 __kmpc_atomic_fixed8_sub_rev 369 __kmpc_atomic_fixed8_swp 370 __kmpc_atomic_fixed8_wr 371 __kmpc_atomic_fixed8_xor 372 __kmpc_atomic_fixed8_xor_cpt 373 __kmpc_atomic_fixed8u_add_fp 374 __kmpc_atomic_fixed8u_sub_fp 375 __kmpc_atomic_fixed8u_mul_fp 376 __kmpc_atomic_fixed8u_div 377 __kmpc_atomic_fixed8u_div_cpt 378 __kmpc_atomic_fixed8u_div_cpt_rev 379 __kmpc_atomic_fixed8u_div_fp 380 __kmpc_atomic_fixed8u_div_rev 381 __kmpc_atomic_fixed8u_shr 382 __kmpc_atomic_fixed8u_shr_cpt 383 __kmpc_atomic_fixed8u_shr_cpt_rev 384 __kmpc_atomic_fixed8u_shr_rev 385 @endcode 386 387 Functions for floating point 388 ---------------------------- 389 There are versions here for floating point numbers of size 4, 8, 10 and 16 390 bytes. (Ten byte floats are used by X87, but are now rare). 391 @code 392 __kmpc_atomic_float4_add 393 __kmpc_atomic_float4_add_cpt 394 __kmpc_atomic_float4_add_float8 395 __kmpc_atomic_float4_add_fp 396 __kmpc_atomic_float4_div 397 __kmpc_atomic_float4_div_cpt 398 __kmpc_atomic_float4_div_cpt_rev 399 __kmpc_atomic_float4_div_float8 400 __kmpc_atomic_float4_div_fp 401 __kmpc_atomic_float4_div_rev 402 __kmpc_atomic_float4_max 403 __kmpc_atomic_float4_max_cpt 404 __kmpc_atomic_float4_min 405 __kmpc_atomic_float4_min_cpt 406 __kmpc_atomic_float4_mul 407 __kmpc_atomic_float4_mul_cpt 408 __kmpc_atomic_float4_mul_float8 409 __kmpc_atomic_float4_mul_fp 410 __kmpc_atomic_float4_rd 411 __kmpc_atomic_float4_sub 412 __kmpc_atomic_float4_sub_cpt 413 __kmpc_atomic_float4_sub_cpt_rev 414 __kmpc_atomic_float4_sub_float8 415 __kmpc_atomic_float4_sub_fp 416 __kmpc_atomic_float4_sub_rev 417 __kmpc_atomic_float4_swp 418 __kmpc_atomic_float4_wr 419 __kmpc_atomic_float8_add 420 __kmpc_atomic_float8_add_cpt 421 __kmpc_atomic_float8_add_fp 422 __kmpc_atomic_float8_div 423 __kmpc_atomic_float8_div_cpt 424 __kmpc_atomic_float8_div_cpt_rev 425 __kmpc_atomic_float8_div_fp 426 __kmpc_atomic_float8_div_rev 427 __kmpc_atomic_float8_max 428 __kmpc_atomic_float8_max_cpt 429 __kmpc_atomic_float8_min 430 __kmpc_atomic_float8_min_cpt 431 __kmpc_atomic_float8_mul 432 __kmpc_atomic_float8_mul_cpt 433 __kmpc_atomic_float8_mul_fp 434 __kmpc_atomic_float8_rd 435 __kmpc_atomic_float8_sub 436 __kmpc_atomic_float8_sub_cpt 437 __kmpc_atomic_float8_sub_cpt_rev 438 __kmpc_atomic_float8_sub_fp 439 __kmpc_atomic_float8_sub_rev 440 __kmpc_atomic_float8_swp 441 __kmpc_atomic_float8_wr 442 __kmpc_atomic_float10_add 443 __kmpc_atomic_float10_add_cpt 444 __kmpc_atomic_float10_add_fp 445 __kmpc_atomic_float10_div 446 __kmpc_atomic_float10_div_cpt 447 __kmpc_atomic_float10_div_cpt_rev 448 __kmpc_atomic_float10_div_fp 449 __kmpc_atomic_float10_div_rev 450 __kmpc_atomic_float10_mul 451 __kmpc_atomic_float10_mul_cpt 452 __kmpc_atomic_float10_mul_fp 453 __kmpc_atomic_float10_rd 454 __kmpc_atomic_float10_sub 455 __kmpc_atomic_float10_sub_cpt 456 __kmpc_atomic_float10_sub_cpt_rev 457 __kmpc_atomic_float10_sub_fp 458 __kmpc_atomic_float10_sub_rev 459 __kmpc_atomic_float10_swp 460 __kmpc_atomic_float10_wr 461 __kmpc_atomic_float16_add 462 __kmpc_atomic_float16_add_cpt 463 __kmpc_atomic_float16_div 464 __kmpc_atomic_float16_div_cpt 465 __kmpc_atomic_float16_div_cpt_rev 466 __kmpc_atomic_float16_div_rev 467 __kmpc_atomic_float16_max 468 __kmpc_atomic_float16_max_cpt 469 __kmpc_atomic_float16_min 470 __kmpc_atomic_float16_min_cpt 471 __kmpc_atomic_float16_mul 472 __kmpc_atomic_float16_mul_cpt 473 __kmpc_atomic_float16_rd 474 __kmpc_atomic_float16_sub 475 __kmpc_atomic_float16_sub_cpt 476 __kmpc_atomic_float16_sub_cpt_rev 477 __kmpc_atomic_float16_sub_rev 478 __kmpc_atomic_float16_swp 479 __kmpc_atomic_float16_wr 480 @endcode 481 482 Functions for Complex types 483 --------------------------- 484 Functions for complex types whose component floating point variables are of size 485 4,8,10 or 16 bytes. The names here are based on the size of the component float, 486 *not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an 487 operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`. 488 489 @code 490 __kmpc_atomic_cmplx4_add 491 __kmpc_atomic_cmplx4_add_cmplx8 492 __kmpc_atomic_cmplx4_add_cpt 493 __kmpc_atomic_cmplx4_div 494 __kmpc_atomic_cmplx4_div_cmplx8 495 __kmpc_atomic_cmplx4_div_cpt 496 __kmpc_atomic_cmplx4_div_cpt_rev 497 __kmpc_atomic_cmplx4_div_rev 498 __kmpc_atomic_cmplx4_mul 499 __kmpc_atomic_cmplx4_mul_cmplx8 500 __kmpc_atomic_cmplx4_mul_cpt 501 __kmpc_atomic_cmplx4_rd 502 __kmpc_atomic_cmplx4_sub 503 __kmpc_atomic_cmplx4_sub_cmplx8 504 __kmpc_atomic_cmplx4_sub_cpt 505 __kmpc_atomic_cmplx4_sub_cpt_rev 506 __kmpc_atomic_cmplx4_sub_rev 507 __kmpc_atomic_cmplx4_swp 508 __kmpc_atomic_cmplx4_wr 509 __kmpc_atomic_cmplx8_add 510 __kmpc_atomic_cmplx8_add_cpt 511 __kmpc_atomic_cmplx8_div 512 __kmpc_atomic_cmplx8_div_cpt 513 __kmpc_atomic_cmplx8_div_cpt_rev 514 __kmpc_atomic_cmplx8_div_rev 515 __kmpc_atomic_cmplx8_mul 516 __kmpc_atomic_cmplx8_mul_cpt 517 __kmpc_atomic_cmplx8_rd 518 __kmpc_atomic_cmplx8_sub 519 __kmpc_atomic_cmplx8_sub_cpt 520 __kmpc_atomic_cmplx8_sub_cpt_rev 521 __kmpc_atomic_cmplx8_sub_rev 522 __kmpc_atomic_cmplx8_swp 523 __kmpc_atomic_cmplx8_wr 524 __kmpc_atomic_cmplx10_add 525 __kmpc_atomic_cmplx10_add_cpt 526 __kmpc_atomic_cmplx10_div 527 __kmpc_atomic_cmplx10_div_cpt 528 __kmpc_atomic_cmplx10_div_cpt_rev 529 __kmpc_atomic_cmplx10_div_rev 530 __kmpc_atomic_cmplx10_mul 531 __kmpc_atomic_cmplx10_mul_cpt 532 __kmpc_atomic_cmplx10_rd 533 __kmpc_atomic_cmplx10_sub 534 __kmpc_atomic_cmplx10_sub_cpt 535 __kmpc_atomic_cmplx10_sub_cpt_rev 536 __kmpc_atomic_cmplx10_sub_rev 537 __kmpc_atomic_cmplx10_swp 538 __kmpc_atomic_cmplx10_wr 539 __kmpc_atomic_cmplx16_add 540 __kmpc_atomic_cmplx16_add_cpt 541 __kmpc_atomic_cmplx16_div 542 __kmpc_atomic_cmplx16_div_cpt 543 __kmpc_atomic_cmplx16_div_cpt_rev 544 __kmpc_atomic_cmplx16_div_rev 545 __kmpc_atomic_cmplx16_mul 546 __kmpc_atomic_cmplx16_mul_cpt 547 __kmpc_atomic_cmplx16_rd 548 __kmpc_atomic_cmplx16_sub 549 __kmpc_atomic_cmplx16_sub_cpt 550 __kmpc_atomic_cmplx16_sub_cpt_rev 551 __kmpc_atomic_cmplx16_swp 552 __kmpc_atomic_cmplx16_wr 553 @endcode 554 */ 555 556 /*! 557 @ingroup ATOMIC_OPS 558 @{ 559 */ 560 561 /* 562 * Global vars 563 */ 564 565 #ifndef KMP_GOMP_COMPAT 566 int __kmp_atomic_mode = 1; // Intel perf 567 #else 568 int __kmp_atomic_mode = 2; // GOMP compatibility 569 #endif /* KMP_GOMP_COMPAT */ 570 571 KMP_ALIGN(128) 572 573 // Control access to all user coded atomics in Gnu compat mode 574 kmp_atomic_lock_t __kmp_atomic_lock; 575 // Control access to all user coded atomics for 1-byte fixed data types 576 kmp_atomic_lock_t __kmp_atomic_lock_1i; 577 // Control access to all user coded atomics for 2-byte fixed data types 578 kmp_atomic_lock_t __kmp_atomic_lock_2i; 579 // Control access to all user coded atomics for 4-byte fixed data types 580 kmp_atomic_lock_t __kmp_atomic_lock_4i; 581 // Control access to all user coded atomics for kmp_real32 data type 582 kmp_atomic_lock_t __kmp_atomic_lock_4r; 583 // Control access to all user coded atomics for 8-byte fixed data types 584 kmp_atomic_lock_t __kmp_atomic_lock_8i; 585 // Control access to all user coded atomics for kmp_real64 data type 586 kmp_atomic_lock_t __kmp_atomic_lock_8r; 587 // Control access to all user coded atomics for complex byte data type 588 kmp_atomic_lock_t __kmp_atomic_lock_8c; 589 // Control access to all user coded atomics for long double data type 590 kmp_atomic_lock_t __kmp_atomic_lock_10r; 591 // Control access to all user coded atomics for _Quad data type 592 kmp_atomic_lock_t __kmp_atomic_lock_16r; 593 // Control access to all user coded atomics for double complex data type 594 kmp_atomic_lock_t __kmp_atomic_lock_16c; 595 // Control access to all user coded atomics for long double complex type 596 kmp_atomic_lock_t __kmp_atomic_lock_20c; 597 // Control access to all user coded atomics for _Quad complex data type 598 kmp_atomic_lock_t __kmp_atomic_lock_32c; 599 600 /* 2007-03-02: 601 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug 602 on *_32 and *_32e. This is just a temporary workaround for the problem. It 603 seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines 604 in assembler language. */ 605 #define KMP_ATOMIC_VOLATILE volatile 606 607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD 608 609 static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) { 610 return lhs.q + rhs.q; 611 } 612 static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) { 613 return lhs.q - rhs.q; 614 } 615 static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) { 616 return lhs.q * rhs.q; 617 } 618 static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) { 619 return lhs.q / rhs.q; 620 } 621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) { 622 return lhs.q < rhs.q; 623 } 624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) { 625 return lhs.q > rhs.q; 626 } 627 628 static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) { 629 return lhs.q + rhs.q; 630 } 631 static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) { 632 return lhs.q - rhs.q; 633 } 634 static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) { 635 return lhs.q * rhs.q; 636 } 637 static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) { 638 return lhs.q / rhs.q; 639 } 640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) { 641 return lhs.q < rhs.q; 642 } 643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) { 644 return lhs.q > rhs.q; 645 } 646 647 static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs, 648 kmp_cmplx128_a4_t &rhs) { 649 return lhs.q + rhs.q; 650 } 651 static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs, 652 kmp_cmplx128_a4_t &rhs) { 653 return lhs.q - rhs.q; 654 } 655 static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs, 656 kmp_cmplx128_a4_t &rhs) { 657 return lhs.q * rhs.q; 658 } 659 static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs, 660 kmp_cmplx128_a4_t &rhs) { 661 return lhs.q / rhs.q; 662 } 663 664 static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs, 665 kmp_cmplx128_a16_t &rhs) { 666 return lhs.q + rhs.q; 667 } 668 static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs, 669 kmp_cmplx128_a16_t &rhs) { 670 return lhs.q - rhs.q; 671 } 672 static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs, 673 kmp_cmplx128_a16_t &rhs) { 674 return lhs.q * rhs.q; 675 } 676 static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs, 677 kmp_cmplx128_a16_t &rhs) { 678 return lhs.q / rhs.q; 679 } 680 681 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD 682 683 // ATOMIC implementation routines ----------------------------------------- 684 // One routine for each operation and operand type. 685 // All routines declarations looks like 686 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs ); 687 688 #define KMP_CHECK_GTID \ 689 if (gtid == KMP_GTID_UNKNOWN) { \ 690 gtid = __kmp_entry_gtid(); \ 691 } // check and get gtid when needed 692 693 // Beginning of a definition (provides name, parameters, gebug trace) 694 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 695 // fixed) 696 // OP_ID - operation identifier (add, sub, mul, ...) 697 // TYPE - operands' type 698 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 699 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 700 TYPE *lhs, TYPE rhs) { \ 701 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 702 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 703 704 // ------------------------------------------------------------------------ 705 // Lock variables used for critical sections for various size operands 706 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat 707 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char 708 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short 709 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int 710 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float 711 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int 712 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double 713 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex 714 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double 715 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad 716 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex 717 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex 718 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex 719 720 // ------------------------------------------------------------------------ 721 // Operation on *lhs, rhs bound by critical section 722 // OP - operator (it's supposed to contain an assignment) 723 // LCK_ID - lock identifier 724 // Note: don't check gtid as it should always be valid 725 // 1, 2-byte - expect valid parameter, other - check before this macro 726 #define OP_CRITICAL(OP, LCK_ID) \ 727 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 728 \ 729 (*lhs) OP(rhs); \ 730 \ 731 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 732 733 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \ 734 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 735 (*lhs) = (TYPE)((*lhs)OP rhs); \ 736 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 737 738 // ------------------------------------------------------------------------ 739 // For GNU compatibility, we may need to use a critical section, 740 // even though it is not required by the ISA. 741 // 742 // On IA-32 architecture, all atomic operations except for fixed 4 byte add, 743 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common 744 // critical section. On Intel(R) 64, all atomic operations are done with fetch 745 // and add or compare and exchange. Therefore, the FLAG parameter to this 746 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which 747 // require a critical section, where we predict that they will be implemented 748 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()). 749 // 750 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct, 751 // the FLAG parameter should always be 1. If we know that we will be using 752 // a critical section, then we want to make certain that we use the generic 753 // lock __kmp_atomic_lock to protect the atomic update, and not of of the 754 // locks that are specialized based upon the size or type of the data. 755 // 756 // If FLAG is 0, then we are relying on dead code elimination by the build 757 // compiler to get rid of the useless block of code, and save a needless 758 // branch at runtime. 759 760 #ifdef KMP_GOMP_COMPAT 761 #define OP_GOMP_CRITICAL(OP, FLAG) \ 762 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 763 KMP_CHECK_GTID; \ 764 OP_CRITICAL(OP, 0); \ 765 return; \ 766 } 767 768 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \ 769 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 770 KMP_CHECK_GTID; \ 771 OP_UPDATE_CRITICAL(TYPE, OP, 0); \ 772 return; \ 773 } 774 #else 775 #define OP_GOMP_CRITICAL(OP, FLAG) 776 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) 777 #endif /* KMP_GOMP_COMPAT */ 778 779 #if KMP_MIC 780 #define KMP_DO_PAUSE _mm_delay_32(1) 781 #else 782 #define KMP_DO_PAUSE 783 #endif /* KMP_MIC */ 784 785 // ------------------------------------------------------------------------ 786 // Operation on *lhs, rhs using "compare_and_store" routine 787 // TYPE - operands' type 788 // BITS - size in bits, used to distinguish low level calls 789 // OP - operator 790 #define OP_CMPXCHG(TYPE, BITS, OP) \ 791 { \ 792 TYPE old_value, new_value; \ 793 old_value = *(TYPE volatile *)lhs; \ 794 new_value = (TYPE)(old_value OP rhs); \ 795 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 796 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 797 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 798 KMP_DO_PAUSE; \ 799 \ 800 old_value = *(TYPE volatile *)lhs; \ 801 new_value = (TYPE)(old_value OP rhs); \ 802 } \ 803 } 804 805 #if USE_CMPXCHG_FIX 806 // 2007-06-25: 807 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32 808 // and win_32e are affected (I verified the asm). Compiler ignores the volatile 809 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the 810 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of 811 // the workaround. 812 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 813 { \ 814 struct _sss { \ 815 TYPE cmp; \ 816 kmp_int##BITS *vvv; \ 817 }; \ 818 struct _sss old_value, new_value; \ 819 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \ 820 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \ 821 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 822 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \ 823 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 824 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ 825 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \ 826 KMP_DO_PAUSE; \ 827 \ 828 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 829 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \ 830 } \ 831 } 832 // end of the first part of the workaround for C78287 833 #endif // USE_CMPXCHG_FIX 834 835 #if KMP_OS_WINDOWS && KMP_ARCH_AARCH64 836 // Undo explicit type casts to get MSVC ARM64 to build. Uses 837 // OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG 838 #undef OP_CMPXCHG 839 #define OP_CMPXCHG(TYPE, BITS, OP) \ 840 { \ 841 struct _sss { \ 842 TYPE cmp; \ 843 kmp_int##BITS *vvv; \ 844 }; \ 845 struct _sss old_value, new_value; \ 846 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \ 847 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \ 848 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 849 new_value.cmp = old_value.cmp OP rhs; \ 850 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 851 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ 852 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \ 853 KMP_DO_PAUSE; \ 854 \ 855 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 856 new_value.cmp = old_value.cmp OP rhs; \ 857 } \ 858 } 859 860 #undef OP_UPDATE_CRITICAL 861 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \ 862 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 863 (*lhs) = (*lhs)OP rhs; \ 864 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 865 866 #endif // KMP_OS_WINDOWS && KMP_ARCH_AARCH64 867 868 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 869 870 // ------------------------------------------------------------------------ 871 // X86 or X86_64: no alignment problems ==================================== 872 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 873 GOMP_FLAG) \ 874 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 875 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 876 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 877 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 878 } 879 // ------------------------------------------------------------------------- 880 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 881 GOMP_FLAG) \ 882 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 883 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 884 OP_CMPXCHG(TYPE, BITS, OP) \ 885 } 886 #if USE_CMPXCHG_FIX 887 // ------------------------------------------------------------------------- 888 // workaround for C78287 (complex(kind=4) data type) 889 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 890 MASK, GOMP_FLAG) \ 891 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 892 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 893 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 894 } 895 // end of the second part of the workaround for C78287 896 #endif // USE_CMPXCHG_FIX 897 898 #else 899 // ------------------------------------------------------------------------- 900 // Code for other architectures that don't handle unaligned accesses. 901 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 902 GOMP_FLAG) \ 903 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 904 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 905 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 906 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 907 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 908 } else { \ 909 KMP_CHECK_GTID; \ 910 OP_UPDATE_CRITICAL(TYPE, OP, \ 911 LCK_ID) /* unaligned address - use critical */ \ 912 } \ 913 } 914 // ------------------------------------------------------------------------- 915 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 916 GOMP_FLAG) \ 917 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 918 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 919 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 920 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 921 } else { \ 922 KMP_CHECK_GTID; \ 923 OP_UPDATE_CRITICAL(TYPE, OP, \ 924 LCK_ID) /* unaligned address - use critical */ \ 925 } \ 926 } 927 #if USE_CMPXCHG_FIX 928 // ------------------------------------------------------------------------- 929 // workaround for C78287 (complex(kind=4) data type) 930 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 931 MASK, GOMP_FLAG) \ 932 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 933 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 934 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 935 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 936 } else { \ 937 KMP_CHECK_GTID; \ 938 OP_UPDATE_CRITICAL(TYPE, OP, \ 939 LCK_ID) /* unaligned address - use critical */ \ 940 } \ 941 } 942 // end of the second part of the workaround for C78287 943 #endif // USE_CMPXCHG_FIX 944 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 945 946 // Routines for ATOMIC 4-byte operands addition and subtraction 947 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3, 948 0) // __kmpc_atomic_fixed4_add 949 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3, 950 0) // __kmpc_atomic_fixed4_sub 951 952 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3, 953 KMP_ARCH_X86) // __kmpc_atomic_float4_add 954 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3, 955 KMP_ARCH_X86) // __kmpc_atomic_float4_sub 956 957 // Routines for ATOMIC 8-byte operands addition and subtraction 958 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7, 959 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add 960 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7, 961 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub 962 963 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7, 964 KMP_ARCH_X86) // __kmpc_atomic_float8_add 965 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7, 966 KMP_ARCH_X86) // __kmpc_atomic_float8_sub 967 968 // ------------------------------------------------------------------------ 969 // Entries definition for integer operands 970 // TYPE_ID - operands type and size (fixed4, float4) 971 // OP_ID - operation identifier (add, sub, mul, ...) 972 // TYPE - operand type 973 // BITS - size in bits, used to distinguish low level calls 974 // OP - operator (used in critical section) 975 // LCK_ID - lock identifier, used to possibly distinguish lock variable 976 // MASK - used for alignment check 977 978 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG 979 // ------------------------------------------------------------------------ 980 // Routines for ATOMIC integer operands, other operators 981 // ------------------------------------------------------------------------ 982 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 983 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0, 984 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add 985 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0, 986 0) // __kmpc_atomic_fixed1_andb 987 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0, 988 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div 989 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0, 990 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div 991 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0, 992 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul 993 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0, 994 0) // __kmpc_atomic_fixed1_orb 995 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0, 996 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl 997 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0, 998 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr 999 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, 1000 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr 1001 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0, 1002 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub 1003 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0, 1004 0) // __kmpc_atomic_fixed1_xor 1005 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1, 1006 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add 1007 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1, 1008 0) // __kmpc_atomic_fixed2_andb 1009 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1, 1010 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div 1011 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1, 1012 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div 1013 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1, 1014 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul 1015 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1, 1016 0) // __kmpc_atomic_fixed2_orb 1017 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1, 1018 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl 1019 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1, 1020 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr 1021 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, 1022 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr 1023 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1, 1024 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub 1025 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1, 1026 0) // __kmpc_atomic_fixed2_xor 1027 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3, 1028 0) // __kmpc_atomic_fixed4_andb 1029 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3, 1030 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div 1031 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3, 1032 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div 1033 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3, 1034 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul 1035 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3, 1036 0) // __kmpc_atomic_fixed4_orb 1037 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3, 1038 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl 1039 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3, 1040 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr 1041 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, 1042 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr 1043 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3, 1044 0) // __kmpc_atomic_fixed4_xor 1045 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7, 1046 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb 1047 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7, 1048 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div 1049 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7, 1050 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div 1051 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7, 1052 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul 1053 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7, 1054 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb 1055 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7, 1056 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl 1057 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7, 1058 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr 1059 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, 1060 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr 1061 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7, 1062 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor 1063 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3, 1064 KMP_ARCH_X86) // __kmpc_atomic_float4_div 1065 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3, 1066 KMP_ARCH_X86) // __kmpc_atomic_float4_mul 1067 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7, 1068 KMP_ARCH_X86) // __kmpc_atomic_float8_div 1069 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7, 1070 KMP_ARCH_X86) // __kmpc_atomic_float8_mul 1071 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 1072 1073 /* ------------------------------------------------------------------------ */ 1074 /* Routines for C/C++ Reduction operators && and || */ 1075 1076 // ------------------------------------------------------------------------ 1077 // Need separate macros for &&, || because there is no combined assignment 1078 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used 1079 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1080 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1081 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1082 OP_CRITICAL(= *lhs OP, LCK_ID) \ 1083 } 1084 1085 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1086 1087 // ------------------------------------------------------------------------ 1088 // X86 or X86_64: no alignment problems =================================== 1089 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1090 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1091 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1092 OP_CMPXCHG(TYPE, BITS, OP) \ 1093 } 1094 1095 #else 1096 // ------------------------------------------------------------------------ 1097 // Code for other architectures that don't handle unaligned accesses. 1098 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1099 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1100 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1101 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1102 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1103 } else { \ 1104 KMP_CHECK_GTID; \ 1105 OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \ 1106 } \ 1107 } 1108 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1109 1110 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0, 1111 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl 1112 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0, 1113 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl 1114 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1, 1115 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl 1116 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1, 1117 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl 1118 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3, 1119 0) // __kmpc_atomic_fixed4_andl 1120 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3, 1121 0) // __kmpc_atomic_fixed4_orl 1122 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7, 1123 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl 1124 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7, 1125 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl 1126 1127 /* ------------------------------------------------------------------------- */ 1128 /* Routines for Fortran operators that matched no one in C: */ 1129 /* MAX, MIN, .EQV., .NEQV. */ 1130 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */ 1131 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */ 1132 1133 // ------------------------------------------------------------------------- 1134 // MIN and MAX need separate macros 1135 // OP - operator to check if we need any actions? 1136 #define MIN_MAX_CRITSECT(OP, LCK_ID) \ 1137 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1138 \ 1139 if (*lhs OP rhs) { /* still need actions? */ \ 1140 *lhs = rhs; \ 1141 } \ 1142 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1143 1144 // ------------------------------------------------------------------------- 1145 #ifdef KMP_GOMP_COMPAT 1146 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \ 1147 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1148 KMP_CHECK_GTID; \ 1149 MIN_MAX_CRITSECT(OP, 0); \ 1150 return; \ 1151 } 1152 #else 1153 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) 1154 #endif /* KMP_GOMP_COMPAT */ 1155 1156 // ------------------------------------------------------------------------- 1157 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1158 { \ 1159 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1160 TYPE old_value; \ 1161 temp_val = *lhs; \ 1162 old_value = temp_val; \ 1163 while (old_value OP rhs && /* still need actions? */ \ 1164 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1165 (kmp_int##BITS *)lhs, \ 1166 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1167 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 1168 temp_val = *lhs; \ 1169 old_value = temp_val; \ 1170 } \ 1171 } 1172 1173 // ------------------------------------------------------------------------- 1174 // 1-byte, 2-byte operands - use critical section 1175 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1176 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1177 if (*lhs OP rhs) { /* need actions? */ \ 1178 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1179 MIN_MAX_CRITSECT(OP, LCK_ID) \ 1180 } \ 1181 } 1182 1183 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1184 1185 // ------------------------------------------------------------------------- 1186 // X86 or X86_64: no alignment problems ==================================== 1187 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1188 GOMP_FLAG) \ 1189 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1190 if (*lhs OP rhs) { \ 1191 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1192 MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1193 } \ 1194 } 1195 1196 #else 1197 // ------------------------------------------------------------------------- 1198 // Code for other architectures that don't handle unaligned accesses. 1199 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1200 GOMP_FLAG) \ 1201 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1202 if (*lhs OP rhs) { \ 1203 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1204 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1205 MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1206 } else { \ 1207 KMP_CHECK_GTID; \ 1208 MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \ 1209 } \ 1210 } \ 1211 } 1212 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1213 1214 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0, 1215 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max 1216 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0, 1217 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min 1218 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1, 1219 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max 1220 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1, 1221 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min 1222 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3, 1223 0) // __kmpc_atomic_fixed4_max 1224 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3, 1225 0) // __kmpc_atomic_fixed4_min 1226 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7, 1227 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max 1228 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7, 1229 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min 1230 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3, 1231 KMP_ARCH_X86) // __kmpc_atomic_float4_max 1232 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3, 1233 KMP_ARCH_X86) // __kmpc_atomic_float4_min 1234 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7, 1235 KMP_ARCH_X86) // __kmpc_atomic_float8_max 1236 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7, 1237 KMP_ARCH_X86) // __kmpc_atomic_float8_min 1238 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1239 MIN_MAX_CRITICAL(float10, max, long double, <, 10r, 1240 1) // __kmpc_atomic_float10_max 1241 MIN_MAX_CRITICAL(float10, min, long double, >, 10r, 1242 1) // __kmpc_atomic_float10_min 1243 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 1244 #if KMP_HAVE_QUAD 1245 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r, 1246 1) // __kmpc_atomic_float16_max 1247 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r, 1248 1) // __kmpc_atomic_float16_min 1249 #if (KMP_ARCH_X86) 1250 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r, 1251 1) // __kmpc_atomic_float16_max_a16 1252 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r, 1253 1) // __kmpc_atomic_float16_min_a16 1254 #endif // (KMP_ARCH_X86) 1255 #endif // KMP_HAVE_QUAD 1256 // ------------------------------------------------------------------------ 1257 // Need separate macros for .EQV. because of the need of complement (~) 1258 // OP ignored for critical sections, ^=~ used instead 1259 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1260 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1261 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \ 1262 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \ 1263 } 1264 1265 // ------------------------------------------------------------------------ 1266 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1267 // ------------------------------------------------------------------------ 1268 // X86 or X86_64: no alignment problems =================================== 1269 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1270 GOMP_FLAG) \ 1271 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1272 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \ 1273 OP_CMPXCHG(TYPE, BITS, OP) \ 1274 } 1275 // ------------------------------------------------------------------------ 1276 #else 1277 // ------------------------------------------------------------------------ 1278 // Code for other architectures that don't handle unaligned accesses. 1279 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1280 GOMP_FLAG) \ 1281 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1282 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \ 1283 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1284 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1285 } else { \ 1286 KMP_CHECK_GTID; \ 1287 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \ 1288 } \ 1289 } 1290 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1291 1292 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0, 1293 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv 1294 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1, 1295 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv 1296 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3, 1297 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv 1298 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7, 1299 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv 1300 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, 1301 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv 1302 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, 1303 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv 1304 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, 1305 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv 1306 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, 1307 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv 1308 1309 // ------------------------------------------------------------------------ 1310 // Routines for Extended types: long double, _Quad, complex flavours (use 1311 // critical section) 1312 // TYPE_ID, OP_ID, TYPE - detailed above 1313 // OP - operator 1314 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1315 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1316 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1317 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 1318 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \ 1319 } 1320 1321 /* ------------------------------------------------------------------------- */ 1322 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1323 // routines for long double type 1324 ATOMIC_CRITICAL(float10, add, long double, +, 10r, 1325 1) // __kmpc_atomic_float10_add 1326 ATOMIC_CRITICAL(float10, sub, long double, -, 10r, 1327 1) // __kmpc_atomic_float10_sub 1328 ATOMIC_CRITICAL(float10, mul, long double, *, 10r, 1329 1) // __kmpc_atomic_float10_mul 1330 ATOMIC_CRITICAL(float10, div, long double, /, 10r, 1331 1) // __kmpc_atomic_float10_div 1332 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 1333 #if KMP_HAVE_QUAD 1334 // routines for _Quad type 1335 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r, 1336 1) // __kmpc_atomic_float16_add 1337 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r, 1338 1) // __kmpc_atomic_float16_sub 1339 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r, 1340 1) // __kmpc_atomic_float16_mul 1341 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r, 1342 1) // __kmpc_atomic_float16_div 1343 #if (KMP_ARCH_X86) 1344 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r, 1345 1) // __kmpc_atomic_float16_add_a16 1346 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r, 1347 1) // __kmpc_atomic_float16_sub_a16 1348 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r, 1349 1) // __kmpc_atomic_float16_mul_a16 1350 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r, 1351 1) // __kmpc_atomic_float16_div_a16 1352 #endif // (KMP_ARCH_X86) 1353 #endif // KMP_HAVE_QUAD 1354 // routines for complex types 1355 1356 #if USE_CMPXCHG_FIX 1357 // workaround for C78287 (complex(kind=4) data type) 1358 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1359 1) // __kmpc_atomic_cmplx4_add 1360 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1361 1) // __kmpc_atomic_cmplx4_sub 1362 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1363 1) // __kmpc_atomic_cmplx4_mul 1364 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1365 1) // __kmpc_atomic_cmplx4_div 1366 // end of the workaround for C78287 1367 #else 1368 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add 1369 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub 1370 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul 1371 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div 1372 #endif // USE_CMPXCHG_FIX 1373 1374 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add 1375 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub 1376 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul 1377 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div 1378 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1379 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c, 1380 1) // __kmpc_atomic_cmplx10_add 1381 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c, 1382 1) // __kmpc_atomic_cmplx10_sub 1383 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c, 1384 1) // __kmpc_atomic_cmplx10_mul 1385 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c, 1386 1) // __kmpc_atomic_cmplx10_div 1387 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 1388 #if KMP_HAVE_QUAD 1389 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c, 1390 1) // __kmpc_atomic_cmplx16_add 1391 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c, 1392 1) // __kmpc_atomic_cmplx16_sub 1393 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c, 1394 1) // __kmpc_atomic_cmplx16_mul 1395 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c, 1396 1) // __kmpc_atomic_cmplx16_div 1397 #if (KMP_ARCH_X86) 1398 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1399 1) // __kmpc_atomic_cmplx16_add_a16 1400 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1401 1) // __kmpc_atomic_cmplx16_sub_a16 1402 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1403 1) // __kmpc_atomic_cmplx16_mul_a16 1404 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1405 1) // __kmpc_atomic_cmplx16_div_a16 1406 #endif // (KMP_ARCH_X86) 1407 #endif // KMP_HAVE_QUAD 1408 1409 // OpenMP 4.0: x = expr binop x for non-commutative operations. 1410 // Supported only on IA-32 architecture and Intel(R) 64 1411 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1412 1413 // ------------------------------------------------------------------------ 1414 // Operation on *lhs, rhs bound by critical section 1415 // OP - operator (it's supposed to contain an assignment) 1416 // LCK_ID - lock identifier 1417 // Note: don't check gtid as it should always be valid 1418 // 1, 2-byte - expect valid parameter, other - check before this macro 1419 #define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \ 1420 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1421 \ 1422 (*lhs) = (TYPE)((rhs)OP(*lhs)); \ 1423 \ 1424 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1425 1426 #ifdef KMP_GOMP_COMPAT 1427 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \ 1428 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1429 KMP_CHECK_GTID; \ 1430 OP_CRITICAL_REV(TYPE, OP, 0); \ 1431 return; \ 1432 } 1433 1434 #else 1435 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) 1436 #endif /* KMP_GOMP_COMPAT */ 1437 1438 // Beginning of a definition (provides name, parameters, gebug trace) 1439 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1440 // fixed) 1441 // OP_ID - operation identifier (add, sub, mul, ...) 1442 // TYPE - operands' type 1443 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1444 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \ 1445 TYPE *lhs, TYPE rhs) { \ 1446 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1447 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid)); 1448 1449 // ------------------------------------------------------------------------ 1450 // Operation on *lhs, rhs using "compare_and_store" routine 1451 // TYPE - operands' type 1452 // BITS - size in bits, used to distinguish low level calls 1453 // OP - operator 1454 // Note: temp_val introduced in order to force the compiler to read 1455 // *lhs only once (w/o it the compiler reads *lhs twice) 1456 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1457 { \ 1458 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1459 TYPE old_value, new_value; \ 1460 temp_val = *lhs; \ 1461 old_value = temp_val; \ 1462 new_value = (TYPE)(rhs OP old_value); \ 1463 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1464 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1465 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 1466 KMP_DO_PAUSE; \ 1467 \ 1468 temp_val = *lhs; \ 1469 old_value = temp_val; \ 1470 new_value = (TYPE)(rhs OP old_value); \ 1471 } \ 1472 } 1473 1474 // ------------------------------------------------------------------------- 1475 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \ 1476 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1477 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ 1478 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1479 } 1480 1481 // ------------------------------------------------------------------------ 1482 // Entries definition for integer operands 1483 // TYPE_ID - operands type and size (fixed4, float4) 1484 // OP_ID - operation identifier (add, sub, mul, ...) 1485 // TYPE - operand type 1486 // BITS - size in bits, used to distinguish low level calls 1487 // OP - operator (used in critical section) 1488 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1489 1490 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG 1491 // ------------------------------------------------------------------------ 1492 // Routines for ATOMIC integer operands, other operators 1493 // ------------------------------------------------------------------------ 1494 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG 1495 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i, 1496 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev 1497 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i, 1498 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev 1499 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i, 1500 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev 1501 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i, 1502 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev 1503 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i, 1504 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev 1505 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i, 1506 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev 1507 1508 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i, 1509 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev 1510 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i, 1511 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev 1512 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i, 1513 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev 1514 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i, 1515 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev 1516 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1517 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev 1518 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i, 1519 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev 1520 1521 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i, 1522 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev 1523 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i, 1524 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev 1525 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i, 1526 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev 1527 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i, 1528 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev 1529 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i, 1530 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev 1531 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i, 1532 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev 1533 1534 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i, 1535 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev 1536 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i, 1537 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev 1538 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i, 1539 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev 1540 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i, 1541 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev 1542 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i, 1543 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev 1544 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i, 1545 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev 1546 1547 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r, 1548 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev 1549 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r, 1550 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev 1551 1552 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r, 1553 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev 1554 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r, 1555 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev 1556 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG 1557 1558 // ------------------------------------------------------------------------ 1559 // Routines for Extended types: long double, _Quad, complex flavours (use 1560 // critical section) 1561 // TYPE_ID, OP_ID, TYPE - detailed above 1562 // OP - operator 1563 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1564 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1565 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1566 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ 1567 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \ 1568 } 1569 1570 /* ------------------------------------------------------------------------- */ 1571 // routines for long double type 1572 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r, 1573 1) // __kmpc_atomic_float10_sub_rev 1574 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r, 1575 1) // __kmpc_atomic_float10_div_rev 1576 #if KMP_HAVE_QUAD 1577 // routines for _Quad type 1578 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r, 1579 1) // __kmpc_atomic_float16_sub_rev 1580 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r, 1581 1) // __kmpc_atomic_float16_div_rev 1582 #if (KMP_ARCH_X86) 1583 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r, 1584 1) // __kmpc_atomic_float16_sub_a16_rev 1585 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r, 1586 1) // __kmpc_atomic_float16_div_a16_rev 1587 #endif // KMP_ARCH_X86 1588 #endif // KMP_HAVE_QUAD 1589 1590 // routines for complex types 1591 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c, 1592 1) // __kmpc_atomic_cmplx4_sub_rev 1593 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c, 1594 1) // __kmpc_atomic_cmplx4_div_rev 1595 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c, 1596 1) // __kmpc_atomic_cmplx8_sub_rev 1597 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c, 1598 1) // __kmpc_atomic_cmplx8_div_rev 1599 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c, 1600 1) // __kmpc_atomic_cmplx10_sub_rev 1601 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c, 1602 1) // __kmpc_atomic_cmplx10_div_rev 1603 #if KMP_HAVE_QUAD 1604 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c, 1605 1) // __kmpc_atomic_cmplx16_sub_rev 1606 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c, 1607 1) // __kmpc_atomic_cmplx16_div_rev 1608 #if (KMP_ARCH_X86) 1609 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1610 1) // __kmpc_atomic_cmplx16_sub_a16_rev 1611 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1612 1) // __kmpc_atomic_cmplx16_div_a16_rev 1613 #endif // KMP_ARCH_X86 1614 #endif // KMP_HAVE_QUAD 1615 1616 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 1617 // End of OpenMP 4.0: x = expr binop x for non-commutative operations. 1618 1619 /* ------------------------------------------------------------------------ */ 1620 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */ 1621 /* Note: in order to reduce the total number of types combinations */ 1622 /* it is supposed that compiler converts RHS to longest floating type,*/ 1623 /* that is _Quad, before call to any of these routines */ 1624 /* Conversion to _Quad will be done by the compiler during calculation, */ 1625 /* conversion back to TYPE - before the assignment, like: */ 1626 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */ 1627 /* Performance penalty expected because of SW emulation use */ 1628 /* ------------------------------------------------------------------------ */ 1629 1630 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1631 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 1632 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \ 1633 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1634 KA_TRACE(100, \ 1635 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 1636 gtid)); 1637 1638 // ------------------------------------------------------------------------- 1639 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \ 1640 GOMP_FLAG) \ 1641 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1642 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 1643 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \ 1644 } 1645 1646 // ------------------------------------------------------------------------- 1647 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1648 // ------------------------------------------------------------------------- 1649 // X86 or X86_64: no alignment problems ==================================== 1650 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1651 LCK_ID, MASK, GOMP_FLAG) \ 1652 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1653 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1654 OP_CMPXCHG(TYPE, BITS, OP) \ 1655 } 1656 // ------------------------------------------------------------------------- 1657 #else 1658 // ------------------------------------------------------------------------ 1659 // Code for other architectures that don't handle unaligned accesses. 1660 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1661 LCK_ID, MASK, GOMP_FLAG) \ 1662 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1663 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1664 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1665 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1666 } else { \ 1667 KMP_CHECK_GTID; \ 1668 OP_UPDATE_CRITICAL(TYPE, OP, \ 1669 LCK_ID) /* unaligned address - use critical */ \ 1670 } \ 1671 } 1672 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1673 1674 // ------------------------------------------------------------------------- 1675 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1676 // ------------------------------------------------------------------------- 1677 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 1678 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 1679 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1680 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ 1681 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1682 } 1683 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 1684 LCK_ID, GOMP_FLAG) \ 1685 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1686 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ 1687 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \ 1688 } 1689 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1690 1691 // RHS=float8 1692 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, 1693 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8 1694 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, 1695 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8 1696 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, 1697 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8 1698 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, 1699 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8 1700 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 1701 0) // __kmpc_atomic_fixed4_mul_float8 1702 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 1703 0) // __kmpc_atomic_fixed4_div_float8 1704 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, 1705 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8 1706 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, 1707 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8 1708 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, 1709 KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8 1710 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, 1711 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8 1712 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, 1713 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8 1714 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, 1715 KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8 1716 1717 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not 1718 // use them) 1719 #if KMP_HAVE_QUAD 1720 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0, 1721 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp 1722 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0, 1723 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp 1724 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, 1725 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp 1726 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0, 1727 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp 1728 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, 1729 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp 1730 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0, 1731 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp 1732 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0, 1733 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp 1734 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, 1735 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp 1736 1737 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1, 1738 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp 1739 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1, 1740 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp 1741 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, 1742 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp 1743 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1, 1744 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp 1745 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, 1746 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp 1747 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1, 1748 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp 1749 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1, 1750 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp 1751 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, 1752 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp 1753 1754 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 1755 0) // __kmpc_atomic_fixed4_add_fp 1756 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3, 1757 0) // __kmpc_atomic_fixed4u_add_fp 1758 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 1759 0) // __kmpc_atomic_fixed4_sub_fp 1760 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3, 1761 0) // __kmpc_atomic_fixed4u_sub_fp 1762 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 1763 0) // __kmpc_atomic_fixed4_mul_fp 1764 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3, 1765 0) // __kmpc_atomic_fixed4u_mul_fp 1766 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 1767 0) // __kmpc_atomic_fixed4_div_fp 1768 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 1769 0) // __kmpc_atomic_fixed4u_div_fp 1770 1771 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, 1772 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp 1773 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7, 1774 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp 1775 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, 1776 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp 1777 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7, 1778 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp 1779 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, 1780 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp 1781 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7, 1782 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp 1783 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, 1784 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp 1785 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, 1786 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp 1787 1788 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, 1789 KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp 1790 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, 1791 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp 1792 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, 1793 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp 1794 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, 1795 KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp 1796 1797 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, 1798 KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp 1799 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, 1800 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp 1801 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, 1802 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp 1803 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, 1804 KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp 1805 1806 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1807 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r, 1808 1) // __kmpc_atomic_float10_add_fp 1809 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r, 1810 1) // __kmpc_atomic_float10_sub_fp 1811 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r, 1812 1) // __kmpc_atomic_float10_mul_fp 1813 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r, 1814 1) // __kmpc_atomic_float10_div_fp 1815 1816 // Reverse operations 1817 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0, 1818 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp 1819 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0, 1820 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp 1821 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0, 1822 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp 1823 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0, 1824 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp 1825 1826 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1, 1827 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp 1828 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1, 1829 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp 1830 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1, 1831 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp 1832 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1, 1833 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp 1834 1835 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1836 0) // __kmpc_atomic_fixed4_sub_rev_fp 1837 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1838 0) // __kmpc_atomic_fixed4u_sub_rev_fp 1839 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3, 1840 0) // __kmpc_atomic_fixed4_div_rev_fp 1841 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3, 1842 0) // __kmpc_atomic_fixed4u_div_rev_fp 1843 1844 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1845 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp 1846 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1847 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp 1848 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7, 1849 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp 1850 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7, 1851 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp 1852 1853 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3, 1854 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp 1855 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3, 1856 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp 1857 1858 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7, 1859 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp 1860 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7, 1861 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp 1862 1863 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r, 1864 1) // __kmpc_atomic_float10_sub_rev_fp 1865 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r, 1866 1) // __kmpc_atomic_float10_div_rev_fp 1867 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1868 1869 #endif // KMP_HAVE_QUAD 1870 1871 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1872 // ------------------------------------------------------------------------ 1873 // X86 or X86_64: no alignment problems ==================================== 1874 #if USE_CMPXCHG_FIX 1875 // workaround for C78287 (complex(kind=4) data type) 1876 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1877 LCK_ID, MASK, GOMP_FLAG) \ 1878 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1879 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1880 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 1881 } 1882 // end of the second part of the workaround for C78287 1883 #else 1884 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1885 LCK_ID, MASK, GOMP_FLAG) \ 1886 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1887 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1888 OP_CMPXCHG(TYPE, BITS, OP) \ 1889 } 1890 #endif // USE_CMPXCHG_FIX 1891 #else 1892 // ------------------------------------------------------------------------ 1893 // Code for other architectures that don't handle unaligned accesses. 1894 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1895 LCK_ID, MASK, GOMP_FLAG) \ 1896 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1897 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1898 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1899 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1900 } else { \ 1901 KMP_CHECK_GTID; \ 1902 OP_UPDATE_CRITICAL(TYPE, OP, \ 1903 LCK_ID) /* unaligned address - use critical */ \ 1904 } \ 1905 } 1906 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1907 1908 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 1909 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8 1910 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 1911 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8 1912 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 1913 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8 1914 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 1915 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8 1916 1917 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 1918 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1919 1920 // ------------------------------------------------------------------------ 1921 // Atomic READ routines 1922 1923 // ------------------------------------------------------------------------ 1924 // Beginning of a definition (provides name, parameters, gebug trace) 1925 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1926 // fixed) 1927 // OP_ID - operation identifier (add, sub, mul, ...) 1928 // TYPE - operands' type 1929 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1930 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 1931 TYPE *loc) { \ 1932 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1933 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 1934 1935 // ------------------------------------------------------------------------ 1936 // Operation on *lhs, rhs using "compare_and_store_ret" routine 1937 // TYPE - operands' type 1938 // BITS - size in bits, used to distinguish low level calls 1939 // OP - operator 1940 // Note: temp_val introduced in order to force the compiler to read 1941 // *lhs only once (w/o it the compiler reads *lhs twice) 1942 // TODO: check if it is still necessary 1943 // Return old value regardless of the result of "compare & swap# operation 1944 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1945 { \ 1946 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1947 union f_i_union { \ 1948 TYPE f_val; \ 1949 kmp_int##BITS i_val; \ 1950 }; \ 1951 union f_i_union old_value; \ 1952 temp_val = *loc; \ 1953 old_value.f_val = temp_val; \ 1954 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \ 1955 (kmp_int##BITS *)loc, \ 1956 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \ 1957 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \ 1958 new_value = old_value.f_val; \ 1959 return new_value; \ 1960 } 1961 1962 // ------------------------------------------------------------------------- 1963 // Operation on *lhs, rhs bound by critical section 1964 // OP - operator (it's supposed to contain an assignment) 1965 // LCK_ID - lock identifier 1966 // Note: don't check gtid as it should always be valid 1967 // 1, 2-byte - expect valid parameter, other - check before this macro 1968 #define OP_CRITICAL_READ(OP, LCK_ID) \ 1969 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1970 \ 1971 new_value = (*loc); \ 1972 \ 1973 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1974 1975 // ------------------------------------------------------------------------- 1976 #ifdef KMP_GOMP_COMPAT 1977 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \ 1978 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1979 KMP_CHECK_GTID; \ 1980 OP_CRITICAL_READ(OP, 0); \ 1981 return new_value; \ 1982 } 1983 #else 1984 #define OP_GOMP_CRITICAL_READ(OP, FLAG) 1985 #endif /* KMP_GOMP_COMPAT */ 1986 1987 // ------------------------------------------------------------------------- 1988 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1989 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1990 TYPE new_value; \ 1991 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1992 new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \ 1993 return new_value; \ 1994 } 1995 // ------------------------------------------------------------------------- 1996 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1997 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1998 TYPE new_value; \ 1999 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 2000 OP_CMPXCHG_READ(TYPE, BITS, OP) \ 2001 } 2002 // ------------------------------------------------------------------------ 2003 // Routines for Extended types: long double, _Quad, complex flavours (use 2004 // critical section) 2005 // TYPE_ID, OP_ID, TYPE - detailed above 2006 // OP - operator 2007 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2008 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2009 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 2010 TYPE new_value; \ 2011 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \ 2012 OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \ 2013 return new_value; \ 2014 } 2015 2016 // ------------------------------------------------------------------------ 2017 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return 2018 // value doesn't work. 2019 // Let's return the read value through the additional parameter. 2020 #if (KMP_OS_WINDOWS) 2021 2022 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \ 2023 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2024 \ 2025 (*out) = (*loc); \ 2026 \ 2027 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 2028 // ------------------------------------------------------------------------ 2029 #ifdef KMP_GOMP_COMPAT 2030 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \ 2031 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2032 KMP_CHECK_GTID; \ 2033 OP_CRITICAL_READ_WRK(OP, 0); \ 2034 } 2035 #else 2036 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) 2037 #endif /* KMP_GOMP_COMPAT */ 2038 // ------------------------------------------------------------------------ 2039 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 2040 void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \ 2041 TYPE *loc) { \ 2042 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2043 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2044 2045 // ------------------------------------------------------------------------ 2046 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2047 ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 2048 OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \ 2049 OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \ 2050 } 2051 2052 #endif // KMP_OS_WINDOWS 2053 2054 // ------------------------------------------------------------------------ 2055 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2056 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd 2057 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +, 2058 KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd 2059 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +, 2060 KMP_ARCH_X86) // __kmpc_atomic_float4_rd 2061 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +, 2062 KMP_ARCH_X86) // __kmpc_atomic_float8_rd 2063 2064 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic 2065 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +, 2066 KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd 2067 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +, 2068 KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd 2069 2070 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r, 2071 1) // __kmpc_atomic_float10_rd 2072 #if KMP_HAVE_QUAD 2073 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r, 2074 1) // __kmpc_atomic_float16_rd 2075 #endif // KMP_HAVE_QUAD 2076 2077 // Fix for CQ220361 on Windows* OS 2078 #if (KMP_OS_WINDOWS) 2079 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c, 2080 1) // __kmpc_atomic_cmplx4_rd 2081 #else 2082 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c, 2083 1) // __kmpc_atomic_cmplx4_rd 2084 #endif // (KMP_OS_WINDOWS) 2085 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c, 2086 1) // __kmpc_atomic_cmplx8_rd 2087 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c, 2088 1) // __kmpc_atomic_cmplx10_rd 2089 #if KMP_HAVE_QUAD 2090 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c, 2091 1) // __kmpc_atomic_cmplx16_rd 2092 #if (KMP_ARCH_X86) 2093 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r, 2094 1) // __kmpc_atomic_float16_a16_rd 2095 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 2096 1) // __kmpc_atomic_cmplx16_a16_rd 2097 #endif // (KMP_ARCH_X86) 2098 #endif // KMP_HAVE_QUAD 2099 2100 // ------------------------------------------------------------------------ 2101 // Atomic WRITE routines 2102 2103 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2104 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2105 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2106 KMP_XCHG_FIXED##BITS(lhs, rhs); \ 2107 } 2108 // ------------------------------------------------------------------------ 2109 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2110 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2111 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2112 KMP_XCHG_REAL##BITS(lhs, rhs); \ 2113 } 2114 2115 // ------------------------------------------------------------------------ 2116 // Operation on *lhs, rhs using "compare_and_store" routine 2117 // TYPE - operands' type 2118 // BITS - size in bits, used to distinguish low level calls 2119 // OP - operator 2120 // Note: temp_val introduced in order to force the compiler to read 2121 // *lhs only once (w/o it the compiler reads *lhs twice) 2122 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2123 { \ 2124 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2125 TYPE old_value, new_value; \ 2126 temp_val = *lhs; \ 2127 old_value = temp_val; \ 2128 new_value = rhs; \ 2129 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2130 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2131 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2132 temp_val = *lhs; \ 2133 old_value = temp_val; \ 2134 new_value = rhs; \ 2135 } \ 2136 } 2137 2138 // ------------------------------------------------------------------------- 2139 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2140 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2141 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2142 OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2143 } 2144 2145 // ------------------------------------------------------------------------ 2146 // Routines for Extended types: long double, _Quad, complex flavours (use 2147 // critical section) 2148 // TYPE_ID, OP_ID, TYPE - detailed above 2149 // OP - operator 2150 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2151 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2152 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2153 OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \ 2154 OP_CRITICAL(OP, LCK_ID) /* send assignment */ \ 2155 } 2156 // ------------------------------------------------------------------------- 2157 2158 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =, 2159 KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr 2160 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =, 2161 KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr 2162 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =, 2163 KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr 2164 #if (KMP_ARCH_X86) 2165 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =, 2166 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2167 #else 2168 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =, 2169 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2170 #endif // (KMP_ARCH_X86) 2171 2172 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =, 2173 KMP_ARCH_X86) // __kmpc_atomic_float4_wr 2174 #if (KMP_ARCH_X86) 2175 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =, 2176 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2177 #else 2178 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =, 2179 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2180 #endif // (KMP_ARCH_X86) 2181 2182 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r, 2183 1) // __kmpc_atomic_float10_wr 2184 #if KMP_HAVE_QUAD 2185 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r, 2186 1) // __kmpc_atomic_float16_wr 2187 #endif // KMP_HAVE_QUAD 2188 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr 2189 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c, 2190 1) // __kmpc_atomic_cmplx8_wr 2191 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c, 2192 1) // __kmpc_atomic_cmplx10_wr 2193 #if KMP_HAVE_QUAD 2194 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c, 2195 1) // __kmpc_atomic_cmplx16_wr 2196 #if (KMP_ARCH_X86) 2197 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r, 2198 1) // __kmpc_atomic_float16_a16_wr 2199 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 2200 1) // __kmpc_atomic_cmplx16_a16_wr 2201 #endif // (KMP_ARCH_X86) 2202 #endif // KMP_HAVE_QUAD 2203 2204 // ------------------------------------------------------------------------ 2205 // Atomic CAPTURE routines 2206 2207 // Beginning of a definition (provides name, parameters, gebug trace) 2208 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2209 // fixed) 2210 // OP_ID - operation identifier (add, sub, mul, ...) 2211 // TYPE - operands' type 2212 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 2213 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 2214 TYPE *lhs, TYPE rhs, int flag) { \ 2215 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2216 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2217 2218 // ------------------------------------------------------------------------- 2219 // Operation on *lhs, rhs bound by critical section 2220 // OP - operator (it's supposed to contain an assignment) 2221 // LCK_ID - lock identifier 2222 // Note: don't check gtid as it should always be valid 2223 // 1, 2-byte - expect valid parameter, other - check before this macro 2224 #define OP_CRITICAL_CPT(OP, LCK_ID) \ 2225 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2226 \ 2227 if (flag) { \ 2228 (*lhs) OP rhs; \ 2229 new_value = (*lhs); \ 2230 } else { \ 2231 new_value = (*lhs); \ 2232 (*lhs) OP rhs; \ 2233 } \ 2234 \ 2235 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2236 return new_value; 2237 2238 #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \ 2239 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2240 \ 2241 if (flag) { \ 2242 (*lhs) = (TYPE)((*lhs)OP rhs); \ 2243 new_value = (*lhs); \ 2244 } else { \ 2245 new_value = (*lhs); \ 2246 (*lhs) = (TYPE)((*lhs)OP rhs); \ 2247 } \ 2248 \ 2249 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2250 return new_value; 2251 2252 // ------------------------------------------------------------------------ 2253 #ifdef KMP_GOMP_COMPAT 2254 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \ 2255 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2256 KMP_CHECK_GTID; \ 2257 OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \ 2258 } 2259 #else 2260 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) 2261 #endif /* KMP_GOMP_COMPAT */ 2262 2263 // ------------------------------------------------------------------------ 2264 // Operation on *lhs, rhs using "compare_and_store" routine 2265 // TYPE - operands' type 2266 // BITS - size in bits, used to distinguish low level calls 2267 // OP - operator 2268 // Note: temp_val introduced in order to force the compiler to read 2269 // *lhs only once (w/o it the compiler reads *lhs twice) 2270 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2271 { \ 2272 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2273 TYPE old_value, new_value; \ 2274 temp_val = *lhs; \ 2275 old_value = temp_val; \ 2276 new_value = (TYPE)(old_value OP rhs); \ 2277 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2278 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2279 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2280 temp_val = *lhs; \ 2281 old_value = temp_val; \ 2282 new_value = (TYPE)(old_value OP rhs); \ 2283 } \ 2284 if (flag) { \ 2285 return new_value; \ 2286 } else \ 2287 return old_value; \ 2288 } 2289 2290 // ------------------------------------------------------------------------- 2291 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2292 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2293 TYPE new_value; \ 2294 (void)new_value; \ 2295 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \ 2296 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2297 } 2298 2299 // ------------------------------------------------------------------------- 2300 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2301 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2302 TYPE old_value, new_value; \ 2303 (void)new_value; \ 2304 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \ 2305 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 2306 old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 2307 if (flag) { \ 2308 return old_value OP rhs; \ 2309 } else \ 2310 return old_value; \ 2311 } 2312 // ------------------------------------------------------------------------- 2313 2314 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +, 2315 0) // __kmpc_atomic_fixed4_add_cpt 2316 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -, 2317 0) // __kmpc_atomic_fixed4_sub_cpt 2318 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +, 2319 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt 2320 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -, 2321 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt 2322 2323 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +, 2324 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt 2325 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -, 2326 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt 2327 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +, 2328 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt 2329 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -, 2330 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt 2331 2332 // ------------------------------------------------------------------------ 2333 // Entries definition for integer operands 2334 // TYPE_ID - operands type and size (fixed4, float4) 2335 // OP_ID - operation identifier (add, sub, mul, ...) 2336 // TYPE - operand type 2337 // BITS - size in bits, used to distinguish low level calls 2338 // OP - operator (used in critical section) 2339 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG 2340 // ------------------------------------------------------------------------ 2341 // Routines for ATOMIC integer operands, other operators 2342 // ------------------------------------------------------------------------ 2343 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2344 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +, 2345 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt 2346 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &, 2347 0) // __kmpc_atomic_fixed1_andb_cpt 2348 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /, 2349 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt 2350 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /, 2351 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt 2352 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *, 2353 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt 2354 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |, 2355 0) // __kmpc_atomic_fixed1_orb_cpt 2356 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<, 2357 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt 2358 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>, 2359 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt 2360 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>, 2361 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt 2362 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -, 2363 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt 2364 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^, 2365 0) // __kmpc_atomic_fixed1_xor_cpt 2366 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +, 2367 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt 2368 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &, 2369 0) // __kmpc_atomic_fixed2_andb_cpt 2370 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /, 2371 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt 2372 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /, 2373 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt 2374 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *, 2375 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt 2376 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |, 2377 0) // __kmpc_atomic_fixed2_orb_cpt 2378 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<, 2379 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt 2380 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>, 2381 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt 2382 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>, 2383 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt 2384 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -, 2385 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt 2386 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^, 2387 0) // __kmpc_atomic_fixed2_xor_cpt 2388 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &, 2389 0) // __kmpc_atomic_fixed4_andb_cpt 2390 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /, 2391 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt 2392 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /, 2393 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt 2394 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *, 2395 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt 2396 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |, 2397 0) // __kmpc_atomic_fixed4_orb_cpt 2398 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<, 2399 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt 2400 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>, 2401 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt 2402 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>, 2403 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt 2404 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^, 2405 0) // __kmpc_atomic_fixed4_xor_cpt 2406 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &, 2407 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt 2408 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /, 2409 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt 2410 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /, 2411 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt 2412 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *, 2413 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt 2414 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |, 2415 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt 2416 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<, 2417 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt 2418 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>, 2419 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt 2420 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>, 2421 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt 2422 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^, 2423 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt 2424 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /, 2425 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt 2426 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *, 2427 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt 2428 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /, 2429 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt 2430 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *, 2431 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt 2432 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2433 2434 // CAPTURE routines for mixed types RHS=float16 2435 #if KMP_HAVE_QUAD 2436 2437 // Beginning of a definition (provides name, parameters, gebug trace) 2438 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2439 // fixed) 2440 // OP_ID - operation identifier (add, sub, mul, ...) 2441 // TYPE - operands' type 2442 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2443 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 2444 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \ 2445 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2446 KA_TRACE(100, \ 2447 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 2448 gtid)); 2449 2450 // ------------------------------------------------------------------------- 2451 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 2452 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 2453 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2454 TYPE new_value; \ 2455 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \ 2456 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2457 } 2458 2459 // ------------------------------------------------------------------------- 2460 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 2461 LCK_ID, GOMP_FLAG) \ 2462 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2463 TYPE new_value; \ 2464 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 2465 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \ 2466 } 2467 2468 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0, 2469 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp 2470 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0, 2471 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp 2472 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2473 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp 2474 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2475 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp 2476 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2477 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp 2478 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2479 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp 2480 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0, 2481 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp 2482 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0, 2483 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp 2484 2485 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1, 2486 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp 2487 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1, 2488 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp 2489 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2490 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp 2491 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2492 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp 2493 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2494 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp 2495 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2496 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp 2497 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1, 2498 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp 2499 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1, 2500 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp 2501 2502 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2503 0) // __kmpc_atomic_fixed4_add_cpt_fp 2504 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2505 0) // __kmpc_atomic_fixed4u_add_cpt_fp 2506 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2507 0) // __kmpc_atomic_fixed4_sub_cpt_fp 2508 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2509 0) // __kmpc_atomic_fixed4u_sub_cpt_fp 2510 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2511 0) // __kmpc_atomic_fixed4_mul_cpt_fp 2512 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2513 0) // __kmpc_atomic_fixed4u_mul_cpt_fp 2514 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2515 0) // __kmpc_atomic_fixed4_div_cpt_fp 2516 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2517 0) // __kmpc_atomic_fixed4u_div_cpt_fp 2518 2519 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2520 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp 2521 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2522 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp 2523 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2524 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp 2525 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2526 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp 2527 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2528 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp 2529 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2530 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp 2531 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2532 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp 2533 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2534 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp 2535 2536 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3, 2537 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp 2538 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3, 2539 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp 2540 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3, 2541 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp 2542 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3, 2543 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp 2544 2545 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7, 2546 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp 2547 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7, 2548 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp 2549 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7, 2550 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp 2551 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7, 2552 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp 2553 2554 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r, 2555 1) // __kmpc_atomic_float10_add_cpt_fp 2556 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r, 2557 1) // __kmpc_atomic_float10_sub_cpt_fp 2558 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r, 2559 1) // __kmpc_atomic_float10_mul_cpt_fp 2560 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r, 2561 1) // __kmpc_atomic_float10_div_cpt_fp 2562 2563 #endif // KMP_HAVE_QUAD 2564 2565 // ------------------------------------------------------------------------ 2566 // Routines for C/C++ Reduction operators && and || 2567 2568 // ------------------------------------------------------------------------- 2569 // Operation on *lhs, rhs bound by critical section 2570 // OP - operator (it's supposed to contain an assignment) 2571 // LCK_ID - lock identifier 2572 // Note: don't check gtid as it should always be valid 2573 // 1, 2-byte - expect valid parameter, other - check before this macro 2574 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \ 2575 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2576 \ 2577 if (flag) { \ 2578 new_value OP rhs; \ 2579 (*lhs) = new_value; \ 2580 } else { \ 2581 new_value = (*lhs); \ 2582 (*lhs) OP rhs; \ 2583 } \ 2584 \ 2585 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 2586 2587 // ------------------------------------------------------------------------ 2588 #ifdef KMP_GOMP_COMPAT 2589 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \ 2590 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2591 KMP_CHECK_GTID; \ 2592 OP_CRITICAL_L_CPT(OP, 0); \ 2593 return new_value; \ 2594 } 2595 #else 2596 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) 2597 #endif /* KMP_GOMP_COMPAT */ 2598 2599 // ------------------------------------------------------------------------ 2600 // Need separate macros for &&, || because there is no combined assignment 2601 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2602 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2603 TYPE new_value; \ 2604 (void)new_value; \ 2605 OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \ 2606 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2607 } 2608 2609 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&, 2610 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt 2611 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||, 2612 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt 2613 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&, 2614 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt 2615 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||, 2616 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt 2617 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&, 2618 0) // __kmpc_atomic_fixed4_andl_cpt 2619 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||, 2620 0) // __kmpc_atomic_fixed4_orl_cpt 2621 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&, 2622 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt 2623 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||, 2624 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt 2625 2626 // ------------------------------------------------------------------------- 2627 // Routines for Fortran operators that matched no one in C: 2628 // MAX, MIN, .EQV., .NEQV. 2629 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt 2630 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt 2631 2632 // ------------------------------------------------------------------------- 2633 // MIN and MAX need separate macros 2634 // OP - operator to check if we need any actions? 2635 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2636 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2637 \ 2638 if (*lhs OP rhs) { /* still need actions? */ \ 2639 old_value = *lhs; \ 2640 *lhs = rhs; \ 2641 if (flag) \ 2642 new_value = rhs; \ 2643 else \ 2644 new_value = old_value; \ 2645 } else { \ 2646 new_value = *lhs; \ 2647 } \ 2648 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2649 return new_value; 2650 2651 // ------------------------------------------------------------------------- 2652 #ifdef KMP_GOMP_COMPAT 2653 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \ 2654 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2655 KMP_CHECK_GTID; \ 2656 MIN_MAX_CRITSECT_CPT(OP, 0); \ 2657 } 2658 #else 2659 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) 2660 #endif /* KMP_GOMP_COMPAT */ 2661 2662 // ------------------------------------------------------------------------- 2663 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2664 { \ 2665 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2666 /*TYPE old_value; */ \ 2667 temp_val = *lhs; \ 2668 old_value = temp_val; \ 2669 while (old_value OP rhs && /* still need actions? */ \ 2670 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2671 (kmp_int##BITS *)lhs, \ 2672 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2673 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 2674 temp_val = *lhs; \ 2675 old_value = temp_val; \ 2676 } \ 2677 if (flag) \ 2678 return rhs; \ 2679 else \ 2680 return old_value; \ 2681 } 2682 2683 // ------------------------------------------------------------------------- 2684 // 1-byte, 2-byte operands - use critical section 2685 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2686 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2687 TYPE new_value, old_value; \ 2688 if (*lhs OP rhs) { /* need actions? */ \ 2689 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2690 MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2691 } \ 2692 return *lhs; \ 2693 } 2694 2695 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2696 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2697 TYPE new_value, old_value; \ 2698 (void)new_value; \ 2699 if (*lhs OP rhs) { \ 2700 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2701 MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2702 } \ 2703 return *lhs; \ 2704 } 2705 2706 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <, 2707 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt 2708 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >, 2709 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt 2710 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <, 2711 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt 2712 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >, 2713 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt 2714 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <, 2715 0) // __kmpc_atomic_fixed4_max_cpt 2716 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >, 2717 0) // __kmpc_atomic_fixed4_min_cpt 2718 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <, 2719 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt 2720 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >, 2721 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt 2722 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <, 2723 KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt 2724 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >, 2725 KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt 2726 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <, 2727 KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt 2728 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >, 2729 KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt 2730 MIN_MAX_CRITICAL_CPT(float10, max_cpt, long double, <, 10r, 2731 1) // __kmpc_atomic_float10_max_cpt 2732 MIN_MAX_CRITICAL_CPT(float10, min_cpt, long double, >, 10r, 2733 1) // __kmpc_atomic_float10_min_cpt 2734 #if KMP_HAVE_QUAD 2735 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r, 2736 1) // __kmpc_atomic_float16_max_cpt 2737 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r, 2738 1) // __kmpc_atomic_float16_min_cpt 2739 #if (KMP_ARCH_X86) 2740 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r, 2741 1) // __kmpc_atomic_float16_max_a16_cpt 2742 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r, 2743 1) // __kmpc_atomic_float16_mix_a16_cpt 2744 #endif // (KMP_ARCH_X86) 2745 #endif // KMP_HAVE_QUAD 2746 2747 // ------------------------------------------------------------------------ 2748 #ifdef KMP_GOMP_COMPAT 2749 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \ 2750 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2751 KMP_CHECK_GTID; \ 2752 OP_CRITICAL_CPT(OP, 0); \ 2753 } 2754 #else 2755 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) 2756 #endif /* KMP_GOMP_COMPAT */ 2757 // ------------------------------------------------------------------------ 2758 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2759 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2760 TYPE new_value; \ 2761 (void)new_value; \ 2762 OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \ 2763 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2764 } 2765 2766 // ------------------------------------------------------------------------ 2767 2768 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^, 2769 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt 2770 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^, 2771 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt 2772 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^, 2773 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt 2774 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^, 2775 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt 2776 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~, 2777 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt 2778 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~, 2779 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt 2780 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~, 2781 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt 2782 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~, 2783 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt 2784 2785 // ------------------------------------------------------------------------ 2786 // Routines for Extended types: long double, _Quad, complex flavours (use 2787 // critical section) 2788 // TYPE_ID, OP_ID, TYPE - detailed above 2789 // OP - operator 2790 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2791 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2792 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2793 TYPE new_value; \ 2794 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 2795 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \ 2796 } 2797 2798 // ------------------------------------------------------------------------ 2799 // Workaround for cmplx4. Regular routines with return value don't work 2800 // on Win_32e. Let's return captured values through the additional parameter. 2801 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \ 2802 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2803 \ 2804 if (flag) { \ 2805 (*lhs) OP rhs; \ 2806 (*out) = (*lhs); \ 2807 } else { \ 2808 (*out) = (*lhs); \ 2809 (*lhs) OP rhs; \ 2810 } \ 2811 \ 2812 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2813 return; 2814 // ------------------------------------------------------------------------ 2815 2816 #ifdef KMP_GOMP_COMPAT 2817 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \ 2818 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2819 KMP_CHECK_GTID; \ 2820 OP_CRITICAL_CPT_WRK(OP## =, 0); \ 2821 } 2822 #else 2823 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) 2824 #endif /* KMP_GOMP_COMPAT */ 2825 // ------------------------------------------------------------------------ 2826 2827 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2828 void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \ 2829 TYPE rhs, TYPE *out, int flag) { \ 2830 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2831 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2832 // ------------------------------------------------------------------------ 2833 2834 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2835 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2836 OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \ 2837 OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \ 2838 } 2839 // The end of workaround for cmplx4 2840 2841 /* ------------------------------------------------------------------------- */ 2842 // routines for long double type 2843 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r, 2844 1) // __kmpc_atomic_float10_add_cpt 2845 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r, 2846 1) // __kmpc_atomic_float10_sub_cpt 2847 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r, 2848 1) // __kmpc_atomic_float10_mul_cpt 2849 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r, 2850 1) // __kmpc_atomic_float10_div_cpt 2851 #if KMP_HAVE_QUAD 2852 // routines for _Quad type 2853 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r, 2854 1) // __kmpc_atomic_float16_add_cpt 2855 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r, 2856 1) // __kmpc_atomic_float16_sub_cpt 2857 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r, 2858 1) // __kmpc_atomic_float16_mul_cpt 2859 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r, 2860 1) // __kmpc_atomic_float16_div_cpt 2861 #if (KMP_ARCH_X86) 2862 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r, 2863 1) // __kmpc_atomic_float16_add_a16_cpt 2864 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r, 2865 1) // __kmpc_atomic_float16_sub_a16_cpt 2866 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r, 2867 1) // __kmpc_atomic_float16_mul_a16_cpt 2868 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r, 2869 1) // __kmpc_atomic_float16_div_a16_cpt 2870 #endif // (KMP_ARCH_X86) 2871 #endif // KMP_HAVE_QUAD 2872 2873 // routines for complex types 2874 2875 // cmplx4 routines to return void 2876 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c, 2877 1) // __kmpc_atomic_cmplx4_add_cpt 2878 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 2879 1) // __kmpc_atomic_cmplx4_sub_cpt 2880 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 2881 1) // __kmpc_atomic_cmplx4_mul_cpt 2882 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c, 2883 1) // __kmpc_atomic_cmplx4_div_cpt 2884 2885 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c, 2886 1) // __kmpc_atomic_cmplx8_add_cpt 2887 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 2888 1) // __kmpc_atomic_cmplx8_sub_cpt 2889 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 2890 1) // __kmpc_atomic_cmplx8_mul_cpt 2891 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c, 2892 1) // __kmpc_atomic_cmplx8_div_cpt 2893 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c, 2894 1) // __kmpc_atomic_cmplx10_add_cpt 2895 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 2896 1) // __kmpc_atomic_cmplx10_sub_cpt 2897 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 2898 1) // __kmpc_atomic_cmplx10_mul_cpt 2899 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c, 2900 1) // __kmpc_atomic_cmplx10_div_cpt 2901 #if KMP_HAVE_QUAD 2902 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c, 2903 1) // __kmpc_atomic_cmplx16_add_cpt 2904 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 2905 1) // __kmpc_atomic_cmplx16_sub_cpt 2906 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 2907 1) // __kmpc_atomic_cmplx16_mul_cpt 2908 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c, 2909 1) // __kmpc_atomic_cmplx16_div_cpt 2910 #if (KMP_ARCH_X86) 2911 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 2912 1) // __kmpc_atomic_cmplx16_add_a16_cpt 2913 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 2914 1) // __kmpc_atomic_cmplx16_sub_a16_cpt 2915 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 2916 1) // __kmpc_atomic_cmplx16_mul_a16_cpt 2917 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 2918 1) // __kmpc_atomic_cmplx16_div_a16_cpt 2919 #endif // (KMP_ARCH_X86) 2920 #endif // KMP_HAVE_QUAD 2921 2922 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr 2923 // binop x; v = x; } for non-commutative operations. 2924 // Supported only on IA-32 architecture and Intel(R) 64 2925 2926 // ------------------------------------------------------------------------- 2927 // Operation on *lhs, rhs bound by critical section 2928 // OP - operator (it's supposed to contain an assignment) 2929 // LCK_ID - lock identifier 2930 // Note: don't check gtid as it should always be valid 2931 // 1, 2-byte - expect valid parameter, other - check before this macro 2932 #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \ 2933 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2934 \ 2935 if (flag) { \ 2936 /*temp_val = (*lhs);*/ \ 2937 (*lhs) = (TYPE)((rhs)OP(*lhs)); \ 2938 new_value = (*lhs); \ 2939 } else { \ 2940 new_value = (*lhs); \ 2941 (*lhs) = (TYPE)((rhs)OP(*lhs)); \ 2942 } \ 2943 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2944 return new_value; 2945 2946 // ------------------------------------------------------------------------ 2947 #ifdef KMP_GOMP_COMPAT 2948 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \ 2949 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2950 KMP_CHECK_GTID; \ 2951 OP_CRITICAL_CPT_REV(TYPE, OP, 0); \ 2952 } 2953 #else 2954 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) 2955 #endif /* KMP_GOMP_COMPAT */ 2956 2957 // ------------------------------------------------------------------------ 2958 // Operation on *lhs, rhs using "compare_and_store" routine 2959 // TYPE - operands' type 2960 // BITS - size in bits, used to distinguish low level calls 2961 // OP - operator 2962 // Note: temp_val introduced in order to force the compiler to read 2963 // *lhs only once (w/o it the compiler reads *lhs twice) 2964 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2965 { \ 2966 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2967 TYPE old_value, new_value; \ 2968 temp_val = *lhs; \ 2969 old_value = temp_val; \ 2970 new_value = (TYPE)(rhs OP old_value); \ 2971 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2972 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2973 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2974 temp_val = *lhs; \ 2975 old_value = temp_val; \ 2976 new_value = (TYPE)(rhs OP old_value); \ 2977 } \ 2978 if (flag) { \ 2979 return new_value; \ 2980 } else \ 2981 return old_value; \ 2982 } 2983 2984 // ------------------------------------------------------------------------- 2985 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2986 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2987 TYPE new_value; \ 2988 (void)new_value; \ 2989 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \ 2990 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2991 } 2992 2993 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /, 2994 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev 2995 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /, 2996 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev 2997 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<, 2998 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev 2999 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>, 3000 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev 3001 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, 3002 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev 3003 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -, 3004 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev 3005 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /, 3006 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev 3007 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /, 3008 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev 3009 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<, 3010 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev 3011 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>, 3012 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev 3013 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, 3014 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev 3015 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -, 3016 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev 3017 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /, 3018 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev 3019 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /, 3020 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev 3021 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<, 3022 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev 3023 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>, 3024 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev 3025 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, 3026 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev 3027 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -, 3028 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev 3029 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /, 3030 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev 3031 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /, 3032 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev 3033 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<, 3034 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev 3035 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>, 3036 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev 3037 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, 3038 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev 3039 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -, 3040 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev 3041 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /, 3042 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev 3043 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -, 3044 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev 3045 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /, 3046 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev 3047 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -, 3048 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev 3049 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 3050 3051 // ------------------------------------------------------------------------ 3052 // Routines for Extended types: long double, _Quad, complex flavours (use 3053 // critical section) 3054 // TYPE_ID, OP_ID, TYPE - detailed above 3055 // OP - operator 3056 // LCK_ID - lock identifier, used to possibly distinguish lock variable 3057 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 3058 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 3059 TYPE new_value; \ 3060 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \ 3061 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \ 3062 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \ 3063 } 3064 3065 /* ------------------------------------------------------------------------- */ 3066 // routines for long double type 3067 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r, 3068 1) // __kmpc_atomic_float10_sub_cpt_rev 3069 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r, 3070 1) // __kmpc_atomic_float10_div_cpt_rev 3071 #if KMP_HAVE_QUAD 3072 // routines for _Quad type 3073 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 3074 1) // __kmpc_atomic_float16_sub_cpt_rev 3075 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 3076 1) // __kmpc_atomic_float16_div_cpt_rev 3077 #if (KMP_ARCH_X86) 3078 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 3079 1) // __kmpc_atomic_float16_sub_a16_cpt_rev 3080 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 3081 1) // __kmpc_atomic_float16_div_a16_cpt_rev 3082 #endif // (KMP_ARCH_X86) 3083 #endif // KMP_HAVE_QUAD 3084 3085 // routines for complex types 3086 3087 // ------------------------------------------------------------------------ 3088 // Workaround for cmplx4. Regular routines with return value don't work 3089 // on Win_32e. Let's return captured values through the additional parameter. 3090 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3091 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3092 \ 3093 if (flag) { \ 3094 (*lhs) = (rhs)OP(*lhs); \ 3095 (*out) = (*lhs); \ 3096 } else { \ 3097 (*out) = (*lhs); \ 3098 (*lhs) = (rhs)OP(*lhs); \ 3099 } \ 3100 \ 3101 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3102 return; 3103 // ------------------------------------------------------------------------ 3104 3105 #ifdef KMP_GOMP_COMPAT 3106 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \ 3107 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3108 KMP_CHECK_GTID; \ 3109 OP_CRITICAL_CPT_REV_WRK(OP, 0); \ 3110 } 3111 #else 3112 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) 3113 #endif /* KMP_GOMP_COMPAT */ 3114 // ------------------------------------------------------------------------ 3115 3116 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \ 3117 GOMP_FLAG) \ 3118 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 3119 OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \ 3120 OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3121 } 3122 // The end of workaround for cmplx4 3123 3124 // !!! TODO: check if we need to return void for cmplx4 routines 3125 // cmplx4 routines to return void 3126 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 3127 1) // __kmpc_atomic_cmplx4_sub_cpt_rev 3128 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 3129 1) // __kmpc_atomic_cmplx4_div_cpt_rev 3130 3131 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 3132 1) // __kmpc_atomic_cmplx8_sub_cpt_rev 3133 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 3134 1) // __kmpc_atomic_cmplx8_div_cpt_rev 3135 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 3136 1) // __kmpc_atomic_cmplx10_sub_cpt_rev 3137 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 3138 1) // __kmpc_atomic_cmplx10_div_cpt_rev 3139 #if KMP_HAVE_QUAD 3140 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 3141 1) // __kmpc_atomic_cmplx16_sub_cpt_rev 3142 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 3143 1) // __kmpc_atomic_cmplx16_div_cpt_rev 3144 #if (KMP_ARCH_X86) 3145 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 3146 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev 3147 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 3148 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev 3149 #endif // (KMP_ARCH_X86) 3150 #endif // KMP_HAVE_QUAD 3151 3152 // Capture reverse for mixed type: RHS=float16 3153 #if KMP_HAVE_QUAD 3154 3155 // Beginning of a definition (provides name, parameters, gebug trace) 3156 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 3157 // fixed) 3158 // OP_ID - operation identifier (add, sub, mul, ...) 3159 // TYPE - operands' type 3160 // ------------------------------------------------------------------------- 3161 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 3162 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 3163 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3164 TYPE new_value; \ 3165 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \ 3166 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 3167 } 3168 3169 // ------------------------------------------------------------------------- 3170 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 3171 LCK_ID, GOMP_FLAG) \ 3172 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3173 TYPE new_value; \ 3174 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 3175 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \ 3176 } 3177 3178 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3179 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp 3180 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3181 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp 3182 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3183 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp 3184 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3185 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp 3186 3187 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, 3188 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp 3189 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i, 3190 1, 3191 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp 3192 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1, 3193 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp 3194 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i, 3195 1, 3196 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp 3197 3198 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i, 3199 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp 3200 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad, 3201 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp 3202 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i, 3203 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp 3204 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad, 3205 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp 3206 3207 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i, 3208 7, 3209 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp 3210 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad, 3211 8i, 7, 3212 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp 3213 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i, 3214 7, 3215 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp 3216 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad, 3217 8i, 7, 3218 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp 3219 3220 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad, 3221 4r, 3, 3222 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp 3223 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad, 3224 4r, 3, 3225 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp 3226 3227 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad, 3228 8r, 7, 3229 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp 3230 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad, 3231 8r, 7, 3232 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp 3233 3234 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad, 3235 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp 3236 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad, 3237 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp 3238 3239 #endif // KMP_HAVE_QUAD 3240 3241 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} 3242 3243 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3244 TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3245 TYPE rhs) { \ 3246 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3247 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3248 3249 #define CRITICAL_SWP(LCK_ID) \ 3250 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3251 \ 3252 old_value = (*lhs); \ 3253 (*lhs) = rhs; \ 3254 \ 3255 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3256 return old_value; 3257 3258 // ------------------------------------------------------------------------ 3259 #ifdef KMP_GOMP_COMPAT 3260 #define GOMP_CRITICAL_SWP(FLAG) \ 3261 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3262 KMP_CHECK_GTID; \ 3263 CRITICAL_SWP(0); \ 3264 } 3265 #else 3266 #define GOMP_CRITICAL_SWP(FLAG) 3267 #endif /* KMP_GOMP_COMPAT */ 3268 3269 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3270 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3271 TYPE old_value; \ 3272 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3273 old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \ 3274 return old_value; \ 3275 } 3276 // ------------------------------------------------------------------------ 3277 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3278 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3279 TYPE old_value; \ 3280 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3281 old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \ 3282 return old_value; \ 3283 } 3284 3285 // ------------------------------------------------------------------------ 3286 #define CMPXCHG_SWP(TYPE, BITS) \ 3287 { \ 3288 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 3289 TYPE old_value, new_value; \ 3290 temp_val = *lhs; \ 3291 old_value = temp_val; \ 3292 new_value = rhs; \ 3293 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 3294 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 3295 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 3296 temp_val = *lhs; \ 3297 old_value = temp_val; \ 3298 new_value = rhs; \ 3299 } \ 3300 return old_value; \ 3301 } 3302 3303 // ------------------------------------------------------------------------- 3304 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3305 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3306 TYPE old_value; \ 3307 (void)old_value; \ 3308 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3309 CMPXCHG_SWP(TYPE, BITS) \ 3310 } 3311 3312 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp 3313 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp 3314 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp 3315 3316 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32, 3317 KMP_ARCH_X86) // __kmpc_atomic_float4_swp 3318 3319 #if (KMP_ARCH_X86) 3320 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64, 3321 KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3322 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64, 3323 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3324 #else 3325 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3326 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64, 3327 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3328 #endif // (KMP_ARCH_X86) 3329 3330 // ------------------------------------------------------------------------ 3331 // Routines for Extended types: long double, _Quad, complex flavours (use 3332 // critical section) 3333 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3334 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3335 TYPE old_value; \ 3336 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3337 CRITICAL_SWP(LCK_ID) \ 3338 } 3339 3340 // ------------------------------------------------------------------------ 3341 // !!! TODO: check if we need to return void for cmplx4 routines 3342 // Workaround for cmplx4. Regular routines with return value don't work 3343 // on Win_32e. Let's return captured values through the additional parameter. 3344 3345 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3346 void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3347 TYPE rhs, TYPE *out) { \ 3348 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3349 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3350 3351 #define CRITICAL_SWP_WRK(LCK_ID) \ 3352 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3353 \ 3354 tmp = (*lhs); \ 3355 (*lhs) = (rhs); \ 3356 (*out) = tmp; \ 3357 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3358 return; 3359 // ------------------------------------------------------------------------ 3360 3361 #ifdef KMP_GOMP_COMPAT 3362 #define GOMP_CRITICAL_SWP_WRK(FLAG) \ 3363 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3364 KMP_CHECK_GTID; \ 3365 CRITICAL_SWP_WRK(0); \ 3366 } 3367 #else 3368 #define GOMP_CRITICAL_SWP_WRK(FLAG) 3369 #endif /* KMP_GOMP_COMPAT */ 3370 // ------------------------------------------------------------------------ 3371 3372 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3373 ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3374 TYPE tmp; \ 3375 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \ 3376 CRITICAL_SWP_WRK(LCK_ID) \ 3377 } 3378 // The end of workaround for cmplx4 3379 3380 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp 3381 #if KMP_HAVE_QUAD 3382 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp 3383 #endif // KMP_HAVE_QUAD 3384 // cmplx4 routine to return void 3385 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp 3386 3387 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // 3388 // __kmpc_atomic_cmplx4_swp 3389 3390 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp 3391 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp 3392 #if KMP_HAVE_QUAD 3393 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp 3394 #if (KMP_ARCH_X86) 3395 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r, 3396 1) // __kmpc_atomic_float16_a16_swp 3397 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c, 3398 1) // __kmpc_atomic_cmplx16_a16_swp 3399 #endif // (KMP_ARCH_X86) 3400 #endif // KMP_HAVE_QUAD 3401 3402 // End of OpenMP 4.0 Capture 3403 3404 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3405 3406 #undef OP_CRITICAL 3407 3408 /* ------------------------------------------------------------------------ */ 3409 /* Generic atomic routines */ 3410 3411 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3412 void (*f)(void *, void *, void *)) { 3413 KMP_DEBUG_ASSERT(__kmp_init_serial); 3414 3415 if ( 3416 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3417 FALSE /* must use lock */ 3418 #else 3419 TRUE 3420 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3421 ) { 3422 kmp_int8 old_value, new_value; 3423 3424 old_value = *(kmp_int8 *)lhs; 3425 (*f)(&new_value, &old_value, rhs); 3426 3427 /* TODO: Should this be acquire or release? */ 3428 while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value, 3429 *(kmp_int8 *)&new_value)) { 3430 KMP_CPU_PAUSE(); 3431 3432 old_value = *(kmp_int8 *)lhs; 3433 (*f)(&new_value, &old_value, rhs); 3434 } 3435 3436 return; 3437 } else { 3438 // All 1-byte data is of integer data type. 3439 3440 #ifdef KMP_GOMP_COMPAT 3441 if (__kmp_atomic_mode == 2) { 3442 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3443 } else 3444 #endif /* KMP_GOMP_COMPAT */ 3445 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3446 3447 (*f)(lhs, lhs, rhs); 3448 3449 #ifdef KMP_GOMP_COMPAT 3450 if (__kmp_atomic_mode == 2) { 3451 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3452 } else 3453 #endif /* KMP_GOMP_COMPAT */ 3454 __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3455 } 3456 } 3457 3458 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3459 void (*f)(void *, void *, void *)) { 3460 if ( 3461 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3462 FALSE /* must use lock */ 3463 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3464 TRUE /* no alignment problems */ 3465 #else 3466 !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */ 3467 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3468 ) { 3469 kmp_int16 old_value, new_value; 3470 3471 old_value = *(kmp_int16 *)lhs; 3472 (*f)(&new_value, &old_value, rhs); 3473 3474 /* TODO: Should this be acquire or release? */ 3475 while (!KMP_COMPARE_AND_STORE_ACQ16( 3476 (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) { 3477 KMP_CPU_PAUSE(); 3478 3479 old_value = *(kmp_int16 *)lhs; 3480 (*f)(&new_value, &old_value, rhs); 3481 } 3482 3483 return; 3484 } else { 3485 // All 2-byte data is of integer data type. 3486 3487 #ifdef KMP_GOMP_COMPAT 3488 if (__kmp_atomic_mode == 2) { 3489 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3490 } else 3491 #endif /* KMP_GOMP_COMPAT */ 3492 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3493 3494 (*f)(lhs, lhs, rhs); 3495 3496 #ifdef KMP_GOMP_COMPAT 3497 if (__kmp_atomic_mode == 2) { 3498 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3499 } else 3500 #endif /* KMP_GOMP_COMPAT */ 3501 __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3502 } 3503 } 3504 3505 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3506 void (*f)(void *, void *, void *)) { 3507 KMP_DEBUG_ASSERT(__kmp_init_serial); 3508 3509 if ( 3510 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. 3511 // Gomp compatibility is broken if this routine is called for floats. 3512 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3513 TRUE /* no alignment problems */ 3514 #else 3515 !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */ 3516 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3517 ) { 3518 kmp_int32 old_value, new_value; 3519 3520 old_value = *(kmp_int32 *)lhs; 3521 (*f)(&new_value, &old_value, rhs); 3522 3523 /* TODO: Should this be acquire or release? */ 3524 while (!KMP_COMPARE_AND_STORE_ACQ32( 3525 (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) { 3526 KMP_CPU_PAUSE(); 3527 3528 old_value = *(kmp_int32 *)lhs; 3529 (*f)(&new_value, &old_value, rhs); 3530 } 3531 3532 return; 3533 } else { 3534 // Use __kmp_atomic_lock_4i for all 4-byte data, 3535 // even if it isn't of integer data type. 3536 3537 #ifdef KMP_GOMP_COMPAT 3538 if (__kmp_atomic_mode == 2) { 3539 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3540 } else 3541 #endif /* KMP_GOMP_COMPAT */ 3542 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3543 3544 (*f)(lhs, lhs, rhs); 3545 3546 #ifdef KMP_GOMP_COMPAT 3547 if (__kmp_atomic_mode == 2) { 3548 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3549 } else 3550 #endif /* KMP_GOMP_COMPAT */ 3551 __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3552 } 3553 } 3554 3555 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3556 void (*f)(void *, void *, void *)) { 3557 KMP_DEBUG_ASSERT(__kmp_init_serial); 3558 if ( 3559 3560 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3561 FALSE /* must use lock */ 3562 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3563 TRUE /* no alignment problems */ 3564 #else 3565 !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */ 3566 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3567 ) { 3568 kmp_int64 old_value, new_value; 3569 3570 old_value = *(kmp_int64 *)lhs; 3571 (*f)(&new_value, &old_value, rhs); 3572 /* TODO: Should this be acquire or release? */ 3573 while (!KMP_COMPARE_AND_STORE_ACQ64( 3574 (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) { 3575 KMP_CPU_PAUSE(); 3576 3577 old_value = *(kmp_int64 *)lhs; 3578 (*f)(&new_value, &old_value, rhs); 3579 } 3580 3581 return; 3582 } else { 3583 // Use __kmp_atomic_lock_8i for all 8-byte data, 3584 // even if it isn't of integer data type. 3585 3586 #ifdef KMP_GOMP_COMPAT 3587 if (__kmp_atomic_mode == 2) { 3588 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3589 } else 3590 #endif /* KMP_GOMP_COMPAT */ 3591 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3592 3593 (*f)(lhs, lhs, rhs); 3594 3595 #ifdef KMP_GOMP_COMPAT 3596 if (__kmp_atomic_mode == 2) { 3597 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3598 } else 3599 #endif /* KMP_GOMP_COMPAT */ 3600 __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3601 } 3602 } 3603 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3604 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3605 void (*f)(void *, void *, void *)) { 3606 KMP_DEBUG_ASSERT(__kmp_init_serial); 3607 3608 #ifdef KMP_GOMP_COMPAT 3609 if (__kmp_atomic_mode == 2) { 3610 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3611 } else 3612 #endif /* KMP_GOMP_COMPAT */ 3613 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3614 3615 (*f)(lhs, lhs, rhs); 3616 3617 #ifdef KMP_GOMP_COMPAT 3618 if (__kmp_atomic_mode == 2) { 3619 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3620 } else 3621 #endif /* KMP_GOMP_COMPAT */ 3622 __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3623 } 3624 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3625 3626 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3627 void (*f)(void *, void *, void *)) { 3628 KMP_DEBUG_ASSERT(__kmp_init_serial); 3629 3630 #ifdef KMP_GOMP_COMPAT 3631 if (__kmp_atomic_mode == 2) { 3632 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3633 } else 3634 #endif /* KMP_GOMP_COMPAT */ 3635 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3636 3637 (*f)(lhs, lhs, rhs); 3638 3639 #ifdef KMP_GOMP_COMPAT 3640 if (__kmp_atomic_mode == 2) { 3641 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3642 } else 3643 #endif /* KMP_GOMP_COMPAT */ 3644 __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3645 } 3646 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3647 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3648 void (*f)(void *, void *, void *)) { 3649 KMP_DEBUG_ASSERT(__kmp_init_serial); 3650 3651 #ifdef KMP_GOMP_COMPAT 3652 if (__kmp_atomic_mode == 2) { 3653 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3654 } else 3655 #endif /* KMP_GOMP_COMPAT */ 3656 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3657 3658 (*f)(lhs, lhs, rhs); 3659 3660 #ifdef KMP_GOMP_COMPAT 3661 if (__kmp_atomic_mode == 2) { 3662 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3663 } else 3664 #endif /* KMP_GOMP_COMPAT */ 3665 __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3666 } 3667 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3668 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3669 void (*f)(void *, void *, void *)) { 3670 KMP_DEBUG_ASSERT(__kmp_init_serial); 3671 3672 #ifdef KMP_GOMP_COMPAT 3673 if (__kmp_atomic_mode == 2) { 3674 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3675 } else 3676 #endif /* KMP_GOMP_COMPAT */ 3677 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3678 3679 (*f)(lhs, lhs, rhs); 3680 3681 #ifdef KMP_GOMP_COMPAT 3682 if (__kmp_atomic_mode == 2) { 3683 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3684 } else 3685 #endif /* KMP_GOMP_COMPAT */ 3686 __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3687 } 3688 3689 // AC: same two routines as GOMP_atomic_start/end, but will be called by our 3690 // compiler; duplicated in order to not use 3-party names in pure Intel code 3691 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin. 3692 void __kmpc_atomic_start(void) { 3693 int gtid = __kmp_entry_gtid(); 3694 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid)); 3695 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3696 } 3697 3698 void __kmpc_atomic_end(void) { 3699 int gtid = __kmp_get_gtid(); 3700 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid)); 3701 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3702 } 3703 3704 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3705 3706 // OpenMP 5.1 compare and swap 3707 3708 /*! 3709 @param loc Source code location 3710 @param gtid Global thread id 3711 @param x Memory location to operate on 3712 @param e Expected value 3713 @param d Desired value 3714 @return Result of comparison 3715 3716 Implements Compare And Swap atomic operation. 3717 3718 Sample code: 3719 #pragma omp atomic compare update capture 3720 { r = x == e; if(r) { x = d; } } 3721 */ 3722 bool __kmpc_atomic_bool_1_cas(ident_t *loc, int gtid, char *x, char e, char d) { 3723 return KMP_COMPARE_AND_STORE_ACQ8(x, e, d); 3724 } 3725 bool __kmpc_atomic_bool_2_cas(ident_t *loc, int gtid, short *x, short e, 3726 short d) { 3727 return KMP_COMPARE_AND_STORE_ACQ16(x, e, d); 3728 } 3729 bool __kmpc_atomic_bool_4_cas(ident_t *loc, int gtid, kmp_int32 *x, kmp_int32 e, 3730 kmp_int32 d) { 3731 return KMP_COMPARE_AND_STORE_ACQ32(x, e, d); 3732 } 3733 bool __kmpc_atomic_bool_8_cas(ident_t *loc, int gtid, kmp_int64 *x, kmp_int64 e, 3734 kmp_int64 d) { 3735 return KMP_COMPARE_AND_STORE_ACQ64(x, e, d); 3736 } 3737 3738 /*! 3739 @param loc Source code location 3740 @param gtid Global thread id 3741 @param x Memory location to operate on 3742 @param e Expected value 3743 @param d Desired value 3744 @return Old value of x 3745 3746 Implements Compare And Swap atomic operation. 3747 3748 Sample code: 3749 #pragma omp atomic compare update capture 3750 { v = x; if (x == e) { x = d; } } 3751 */ 3752 char __kmpc_atomic_val_1_cas(ident_t *loc, int gtid, char *x, char e, char d) { 3753 return KMP_COMPARE_AND_STORE_RET8(x, e, d); 3754 } 3755 short __kmpc_atomic_val_2_cas(ident_t *loc, int gtid, short *x, short e, 3756 short d) { 3757 return KMP_COMPARE_AND_STORE_RET16(x, e, d); 3758 } 3759 kmp_int32 __kmpc_atomic_val_4_cas(ident_t *loc, int gtid, kmp_int32 *x, 3760 kmp_int32 e, kmp_int32 d) { 3761 return KMP_COMPARE_AND_STORE_RET32(x, e, d); 3762 } 3763 kmp_int64 __kmpc_atomic_val_8_cas(ident_t *loc, int gtid, kmp_int64 *x, 3764 kmp_int64 e, kmp_int64 d) { 3765 return KMP_COMPARE_AND_STORE_RET64(x, e, d); 3766 } 3767 3768 /*! 3769 @param loc Source code location 3770 @param gtid Global thread id 3771 @param x Memory location to operate on 3772 @param e Expected value 3773 @param d Desired value 3774 @param pv Captured value location 3775 @return Result of comparison 3776 3777 Implements Compare And Swap + Capture atomic operation. 3778 3779 v gets old valie of x if comparison failed, untouched otherwise. 3780 Sample code: 3781 #pragma omp atomic compare update capture 3782 { r = x == e; if(r) { x = d; } else { v = x; } } 3783 */ 3784 bool __kmpc_atomic_bool_1_cas_cpt(ident_t *loc, int gtid, char *x, char e, 3785 char d, char *pv) { 3786 char old = KMP_COMPARE_AND_STORE_RET8(x, e, d); 3787 if (old == e) 3788 return true; 3789 KMP_ASSERT(pv != NULL); 3790 *pv = old; 3791 return false; 3792 } 3793 bool __kmpc_atomic_bool_2_cas_cpt(ident_t *loc, int gtid, short *x, short e, 3794 short d, short *pv) { 3795 short old = KMP_COMPARE_AND_STORE_RET16(x, e, d); 3796 if (old == e) 3797 return true; 3798 KMP_ASSERT(pv != NULL); 3799 *pv = old; 3800 return false; 3801 } 3802 bool __kmpc_atomic_bool_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x, 3803 kmp_int32 e, kmp_int32 d, kmp_int32 *pv) { 3804 kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d); 3805 if (old == e) 3806 return true; 3807 KMP_ASSERT(pv != NULL); 3808 *pv = old; 3809 return false; 3810 } 3811 bool __kmpc_atomic_bool_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x, 3812 kmp_int64 e, kmp_int64 d, kmp_int64 *pv) { 3813 kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d); 3814 if (old == e) 3815 return true; 3816 KMP_ASSERT(pv != NULL); 3817 *pv = old; 3818 return false; 3819 } 3820 3821 /*! 3822 @param loc Source code location 3823 @param gtid Global thread id 3824 @param x Memory location to operate on 3825 @param e Expected value 3826 @param d Desired value 3827 @param pv Captured value location 3828 @return Old value of x 3829 3830 Implements Compare And Swap + Capture atomic operation. 3831 3832 v gets new valie of x. 3833 Sample code: 3834 #pragma omp atomic compare update capture 3835 { if (x == e) { x = d; }; v = x; } 3836 */ 3837 char __kmpc_atomic_val_1_cas_cpt(ident_t *loc, int gtid, char *x, char e, 3838 char d, char *pv) { 3839 char old = KMP_COMPARE_AND_STORE_RET8(x, e, d); 3840 KMP_ASSERT(pv != NULL); 3841 *pv = old == e ? d : old; 3842 return old; 3843 } 3844 short __kmpc_atomic_val_2_cas_cpt(ident_t *loc, int gtid, short *x, short e, 3845 short d, short *pv) { 3846 short old = KMP_COMPARE_AND_STORE_RET16(x, e, d); 3847 KMP_ASSERT(pv != NULL); 3848 *pv = old == e ? d : old; 3849 return old; 3850 } 3851 kmp_int32 __kmpc_atomic_val_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x, 3852 kmp_int32 e, kmp_int32 d, kmp_int32 *pv) { 3853 kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d); 3854 KMP_ASSERT(pv != NULL); 3855 *pv = old == e ? d : old; 3856 return old; 3857 } 3858 kmp_int64 __kmpc_atomic_val_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x, 3859 kmp_int64 e, kmp_int64 d, kmp_int64 *pv) { 3860 kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d); 3861 KMP_ASSERT(pv != NULL); 3862 *pv = old == e ? d : old; 3863 return old; 3864 } 3865 3866 // End OpenMP 5.1 compare + capture 3867 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3868 3869 /*! 3870 @} 3871 */ 3872 3873 // end of file 3874