1 /* 2 * kmp_atomic.cpp -- ATOMIC implementation routines 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "kmp_atomic.h" 14 #include "kmp.h" // TRUE, asm routines prototypes 15 16 typedef unsigned char uchar; 17 typedef unsigned short ushort; 18 19 /*! 20 @defgroup ATOMIC_OPS Atomic Operations 21 These functions are used for implementing the many different varieties of atomic 22 operations. 23 24 The compiler is at liberty to inline atomic operations that are naturally 25 supported by the target architecture. For instance on IA-32 architecture an 26 atomic like this can be inlined 27 @code 28 static int s = 0; 29 #pragma omp atomic 30 s++; 31 @endcode 32 using the single instruction: `lock; incl s` 33 34 However the runtime does provide entrypoints for these operations to support 35 compilers that choose not to inline them. (For instance, 36 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.) 37 38 The names of the functions are encoded by using the data type name and the 39 operation name, as in these tables. 40 41 Data Type | Data type encoding 42 -----------|--------------- 43 int8_t | `fixed1` 44 uint8_t | `fixed1u` 45 int16_t | `fixed2` 46 uint16_t | `fixed2u` 47 int32_t | `fixed4` 48 uint32_t | `fixed4u` 49 int32_t | `fixed8` 50 uint32_t | `fixed8u` 51 float | `float4` 52 double | `float8` 53 float 10 (8087 eighty bit float) | `float10` 54 complex<float> | `cmplx4` 55 complex<double> | `cmplx8` 56 complex<float10> | `cmplx10` 57 <br> 58 59 Operation | Operation encoding 60 ----------|------------------- 61 + | add 62 - | sub 63 \* | mul 64 / | div 65 & | andb 66 << | shl 67 \>\> | shr 68 \| | orb 69 ^ | xor 70 && | andl 71 \|\| | orl 72 maximum | max 73 minimum | min 74 .eqv. | eqv 75 .neqv. | neqv 76 77 <br> 78 For non-commutative operations, `_rev` can also be added for the reversed 79 operation. For the functions that capture the result, the suffix `_cpt` is 80 added. 81 82 Update Functions 83 ================ 84 The general form of an atomic function that just performs an update (without a 85 `capture`) 86 @code 87 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE * 88 lhs, TYPE rhs ); 89 @endcode 90 @param ident_t a pointer to source location 91 @param gtid the global thread id 92 @param lhs a pointer to the left operand 93 @param rhs the right operand 94 95 `capture` functions 96 =================== 97 The capture functions perform an atomic update and return a result, which is 98 either the value before the capture, or that after. They take an additional 99 argument to determine which result is returned. 100 Their general form is therefore 101 @code 102 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE * 103 lhs, TYPE rhs, int flag ); 104 @endcode 105 @param ident_t a pointer to source location 106 @param gtid the global thread id 107 @param lhs a pointer to the left operand 108 @param rhs the right operand 109 @param flag one if the result is to be captured *after* the operation, zero if 110 captured *before*. 111 112 The one set of exceptions to this is the `complex<float>` type where the value 113 is not returned, rather an extra argument pointer is passed. 114 115 They look like 116 @code 117 void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * 118 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); 119 @endcode 120 121 Read and Write Operations 122 ========================= 123 The OpenMP<sup>*</sup> standard now supports atomic operations that simply 124 ensure that the value is read or written atomically, with no modification 125 performed. In many cases on IA-32 architecture these operations can be inlined 126 since the architecture guarantees that no tearing occurs on aligned objects 127 accessed with a single memory operation of up to 64 bits in size. 128 129 The general form of the read operations is 130 @code 131 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc ); 132 @endcode 133 134 For the write operations the form is 135 @code 136 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs 137 ); 138 @endcode 139 140 Full list of functions 141 ====================== 142 This leads to the generation of 376 atomic functions, as follows. 143 144 Functions for integers 145 --------------------- 146 There are versions here for integers of size 1,2,4 and 8 bytes both signed and 147 unsigned (where that matters). 148 @code 149 __kmpc_atomic_fixed1_add 150 __kmpc_atomic_fixed1_add_cpt 151 __kmpc_atomic_fixed1_add_fp 152 __kmpc_atomic_fixed1_andb 153 __kmpc_atomic_fixed1_andb_cpt 154 __kmpc_atomic_fixed1_andl 155 __kmpc_atomic_fixed1_andl_cpt 156 __kmpc_atomic_fixed1_div 157 __kmpc_atomic_fixed1_div_cpt 158 __kmpc_atomic_fixed1_div_cpt_rev 159 __kmpc_atomic_fixed1_div_float8 160 __kmpc_atomic_fixed1_div_fp 161 __kmpc_atomic_fixed1_div_rev 162 __kmpc_atomic_fixed1_eqv 163 __kmpc_atomic_fixed1_eqv_cpt 164 __kmpc_atomic_fixed1_max 165 __kmpc_atomic_fixed1_max_cpt 166 __kmpc_atomic_fixed1_min 167 __kmpc_atomic_fixed1_min_cpt 168 __kmpc_atomic_fixed1_mul 169 __kmpc_atomic_fixed1_mul_cpt 170 __kmpc_atomic_fixed1_mul_float8 171 __kmpc_atomic_fixed1_mul_fp 172 __kmpc_atomic_fixed1_neqv 173 __kmpc_atomic_fixed1_neqv_cpt 174 __kmpc_atomic_fixed1_orb 175 __kmpc_atomic_fixed1_orb_cpt 176 __kmpc_atomic_fixed1_orl 177 __kmpc_atomic_fixed1_orl_cpt 178 __kmpc_atomic_fixed1_rd 179 __kmpc_atomic_fixed1_shl 180 __kmpc_atomic_fixed1_shl_cpt 181 __kmpc_atomic_fixed1_shl_cpt_rev 182 __kmpc_atomic_fixed1_shl_rev 183 __kmpc_atomic_fixed1_shr 184 __kmpc_atomic_fixed1_shr_cpt 185 __kmpc_atomic_fixed1_shr_cpt_rev 186 __kmpc_atomic_fixed1_shr_rev 187 __kmpc_atomic_fixed1_sub 188 __kmpc_atomic_fixed1_sub_cpt 189 __kmpc_atomic_fixed1_sub_cpt_rev 190 __kmpc_atomic_fixed1_sub_fp 191 __kmpc_atomic_fixed1_sub_rev 192 __kmpc_atomic_fixed1_swp 193 __kmpc_atomic_fixed1_wr 194 __kmpc_atomic_fixed1_xor 195 __kmpc_atomic_fixed1_xor_cpt 196 __kmpc_atomic_fixed1u_add_fp 197 __kmpc_atomic_fixed1u_sub_fp 198 __kmpc_atomic_fixed1u_mul_fp 199 __kmpc_atomic_fixed1u_div 200 __kmpc_atomic_fixed1u_div_cpt 201 __kmpc_atomic_fixed1u_div_cpt_rev 202 __kmpc_atomic_fixed1u_div_fp 203 __kmpc_atomic_fixed1u_div_rev 204 __kmpc_atomic_fixed1u_shr 205 __kmpc_atomic_fixed1u_shr_cpt 206 __kmpc_atomic_fixed1u_shr_cpt_rev 207 __kmpc_atomic_fixed1u_shr_rev 208 __kmpc_atomic_fixed2_add 209 __kmpc_atomic_fixed2_add_cpt 210 __kmpc_atomic_fixed2_add_fp 211 __kmpc_atomic_fixed2_andb 212 __kmpc_atomic_fixed2_andb_cpt 213 __kmpc_atomic_fixed2_andl 214 __kmpc_atomic_fixed2_andl_cpt 215 __kmpc_atomic_fixed2_div 216 __kmpc_atomic_fixed2_div_cpt 217 __kmpc_atomic_fixed2_div_cpt_rev 218 __kmpc_atomic_fixed2_div_float8 219 __kmpc_atomic_fixed2_div_fp 220 __kmpc_atomic_fixed2_div_rev 221 __kmpc_atomic_fixed2_eqv 222 __kmpc_atomic_fixed2_eqv_cpt 223 __kmpc_atomic_fixed2_max 224 __kmpc_atomic_fixed2_max_cpt 225 __kmpc_atomic_fixed2_min 226 __kmpc_atomic_fixed2_min_cpt 227 __kmpc_atomic_fixed2_mul 228 __kmpc_atomic_fixed2_mul_cpt 229 __kmpc_atomic_fixed2_mul_float8 230 __kmpc_atomic_fixed2_mul_fp 231 __kmpc_atomic_fixed2_neqv 232 __kmpc_atomic_fixed2_neqv_cpt 233 __kmpc_atomic_fixed2_orb 234 __kmpc_atomic_fixed2_orb_cpt 235 __kmpc_atomic_fixed2_orl 236 __kmpc_atomic_fixed2_orl_cpt 237 __kmpc_atomic_fixed2_rd 238 __kmpc_atomic_fixed2_shl 239 __kmpc_atomic_fixed2_shl_cpt 240 __kmpc_atomic_fixed2_shl_cpt_rev 241 __kmpc_atomic_fixed2_shl_rev 242 __kmpc_atomic_fixed2_shr 243 __kmpc_atomic_fixed2_shr_cpt 244 __kmpc_atomic_fixed2_shr_cpt_rev 245 __kmpc_atomic_fixed2_shr_rev 246 __kmpc_atomic_fixed2_sub 247 __kmpc_atomic_fixed2_sub_cpt 248 __kmpc_atomic_fixed2_sub_cpt_rev 249 __kmpc_atomic_fixed2_sub_fp 250 __kmpc_atomic_fixed2_sub_rev 251 __kmpc_atomic_fixed2_swp 252 __kmpc_atomic_fixed2_wr 253 __kmpc_atomic_fixed2_xor 254 __kmpc_atomic_fixed2_xor_cpt 255 __kmpc_atomic_fixed2u_add_fp 256 __kmpc_atomic_fixed2u_sub_fp 257 __kmpc_atomic_fixed2u_mul_fp 258 __kmpc_atomic_fixed2u_div 259 __kmpc_atomic_fixed2u_div_cpt 260 __kmpc_atomic_fixed2u_div_cpt_rev 261 __kmpc_atomic_fixed2u_div_fp 262 __kmpc_atomic_fixed2u_div_rev 263 __kmpc_atomic_fixed2u_shr 264 __kmpc_atomic_fixed2u_shr_cpt 265 __kmpc_atomic_fixed2u_shr_cpt_rev 266 __kmpc_atomic_fixed2u_shr_rev 267 __kmpc_atomic_fixed4_add 268 __kmpc_atomic_fixed4_add_cpt 269 __kmpc_atomic_fixed4_add_fp 270 __kmpc_atomic_fixed4_andb 271 __kmpc_atomic_fixed4_andb_cpt 272 __kmpc_atomic_fixed4_andl 273 __kmpc_atomic_fixed4_andl_cpt 274 __kmpc_atomic_fixed4_div 275 __kmpc_atomic_fixed4_div_cpt 276 __kmpc_atomic_fixed4_div_cpt_rev 277 __kmpc_atomic_fixed4_div_float8 278 __kmpc_atomic_fixed4_div_fp 279 __kmpc_atomic_fixed4_div_rev 280 __kmpc_atomic_fixed4_eqv 281 __kmpc_atomic_fixed4_eqv_cpt 282 __kmpc_atomic_fixed4_max 283 __kmpc_atomic_fixed4_max_cpt 284 __kmpc_atomic_fixed4_min 285 __kmpc_atomic_fixed4_min_cpt 286 __kmpc_atomic_fixed4_mul 287 __kmpc_atomic_fixed4_mul_cpt 288 __kmpc_atomic_fixed4_mul_float8 289 __kmpc_atomic_fixed4_mul_fp 290 __kmpc_atomic_fixed4_neqv 291 __kmpc_atomic_fixed4_neqv_cpt 292 __kmpc_atomic_fixed4_orb 293 __kmpc_atomic_fixed4_orb_cpt 294 __kmpc_atomic_fixed4_orl 295 __kmpc_atomic_fixed4_orl_cpt 296 __kmpc_atomic_fixed4_rd 297 __kmpc_atomic_fixed4_shl 298 __kmpc_atomic_fixed4_shl_cpt 299 __kmpc_atomic_fixed4_shl_cpt_rev 300 __kmpc_atomic_fixed4_shl_rev 301 __kmpc_atomic_fixed4_shr 302 __kmpc_atomic_fixed4_shr_cpt 303 __kmpc_atomic_fixed4_shr_cpt_rev 304 __kmpc_atomic_fixed4_shr_rev 305 __kmpc_atomic_fixed4_sub 306 __kmpc_atomic_fixed4_sub_cpt 307 __kmpc_atomic_fixed4_sub_cpt_rev 308 __kmpc_atomic_fixed4_sub_fp 309 __kmpc_atomic_fixed4_sub_rev 310 __kmpc_atomic_fixed4_swp 311 __kmpc_atomic_fixed4_wr 312 __kmpc_atomic_fixed4_xor 313 __kmpc_atomic_fixed4_xor_cpt 314 __kmpc_atomic_fixed4u_add_fp 315 __kmpc_atomic_fixed4u_sub_fp 316 __kmpc_atomic_fixed4u_mul_fp 317 __kmpc_atomic_fixed4u_div 318 __kmpc_atomic_fixed4u_div_cpt 319 __kmpc_atomic_fixed4u_div_cpt_rev 320 __kmpc_atomic_fixed4u_div_fp 321 __kmpc_atomic_fixed4u_div_rev 322 __kmpc_atomic_fixed4u_shr 323 __kmpc_atomic_fixed4u_shr_cpt 324 __kmpc_atomic_fixed4u_shr_cpt_rev 325 __kmpc_atomic_fixed4u_shr_rev 326 __kmpc_atomic_fixed8_add 327 __kmpc_atomic_fixed8_add_cpt 328 __kmpc_atomic_fixed8_add_fp 329 __kmpc_atomic_fixed8_andb 330 __kmpc_atomic_fixed8_andb_cpt 331 __kmpc_atomic_fixed8_andl 332 __kmpc_atomic_fixed8_andl_cpt 333 __kmpc_atomic_fixed8_div 334 __kmpc_atomic_fixed8_div_cpt 335 __kmpc_atomic_fixed8_div_cpt_rev 336 __kmpc_atomic_fixed8_div_float8 337 __kmpc_atomic_fixed8_div_fp 338 __kmpc_atomic_fixed8_div_rev 339 __kmpc_atomic_fixed8_eqv 340 __kmpc_atomic_fixed8_eqv_cpt 341 __kmpc_atomic_fixed8_max 342 __kmpc_atomic_fixed8_max_cpt 343 __kmpc_atomic_fixed8_min 344 __kmpc_atomic_fixed8_min_cpt 345 __kmpc_atomic_fixed8_mul 346 __kmpc_atomic_fixed8_mul_cpt 347 __kmpc_atomic_fixed8_mul_float8 348 __kmpc_atomic_fixed8_mul_fp 349 __kmpc_atomic_fixed8_neqv 350 __kmpc_atomic_fixed8_neqv_cpt 351 __kmpc_atomic_fixed8_orb 352 __kmpc_atomic_fixed8_orb_cpt 353 __kmpc_atomic_fixed8_orl 354 __kmpc_atomic_fixed8_orl_cpt 355 __kmpc_atomic_fixed8_rd 356 __kmpc_atomic_fixed8_shl 357 __kmpc_atomic_fixed8_shl_cpt 358 __kmpc_atomic_fixed8_shl_cpt_rev 359 __kmpc_atomic_fixed8_shl_rev 360 __kmpc_atomic_fixed8_shr 361 __kmpc_atomic_fixed8_shr_cpt 362 __kmpc_atomic_fixed8_shr_cpt_rev 363 __kmpc_atomic_fixed8_shr_rev 364 __kmpc_atomic_fixed8_sub 365 __kmpc_atomic_fixed8_sub_cpt 366 __kmpc_atomic_fixed8_sub_cpt_rev 367 __kmpc_atomic_fixed8_sub_fp 368 __kmpc_atomic_fixed8_sub_rev 369 __kmpc_atomic_fixed8_swp 370 __kmpc_atomic_fixed8_wr 371 __kmpc_atomic_fixed8_xor 372 __kmpc_atomic_fixed8_xor_cpt 373 __kmpc_atomic_fixed8u_add_fp 374 __kmpc_atomic_fixed8u_sub_fp 375 __kmpc_atomic_fixed8u_mul_fp 376 __kmpc_atomic_fixed8u_div 377 __kmpc_atomic_fixed8u_div_cpt 378 __kmpc_atomic_fixed8u_div_cpt_rev 379 __kmpc_atomic_fixed8u_div_fp 380 __kmpc_atomic_fixed8u_div_rev 381 __kmpc_atomic_fixed8u_shr 382 __kmpc_atomic_fixed8u_shr_cpt 383 __kmpc_atomic_fixed8u_shr_cpt_rev 384 __kmpc_atomic_fixed8u_shr_rev 385 @endcode 386 387 Functions for floating point 388 ---------------------------- 389 There are versions here for floating point numbers of size 4, 8, 10 and 16 390 bytes. (Ten byte floats are used by X87, but are now rare). 391 @code 392 __kmpc_atomic_float4_add 393 __kmpc_atomic_float4_add_cpt 394 __kmpc_atomic_float4_add_float8 395 __kmpc_atomic_float4_add_fp 396 __kmpc_atomic_float4_div 397 __kmpc_atomic_float4_div_cpt 398 __kmpc_atomic_float4_div_cpt_rev 399 __kmpc_atomic_float4_div_float8 400 __kmpc_atomic_float4_div_fp 401 __kmpc_atomic_float4_div_rev 402 __kmpc_atomic_float4_max 403 __kmpc_atomic_float4_max_cpt 404 __kmpc_atomic_float4_min 405 __kmpc_atomic_float4_min_cpt 406 __kmpc_atomic_float4_mul 407 __kmpc_atomic_float4_mul_cpt 408 __kmpc_atomic_float4_mul_float8 409 __kmpc_atomic_float4_mul_fp 410 __kmpc_atomic_float4_rd 411 __kmpc_atomic_float4_sub 412 __kmpc_atomic_float4_sub_cpt 413 __kmpc_atomic_float4_sub_cpt_rev 414 __kmpc_atomic_float4_sub_float8 415 __kmpc_atomic_float4_sub_fp 416 __kmpc_atomic_float4_sub_rev 417 __kmpc_atomic_float4_swp 418 __kmpc_atomic_float4_wr 419 __kmpc_atomic_float8_add 420 __kmpc_atomic_float8_add_cpt 421 __kmpc_atomic_float8_add_fp 422 __kmpc_atomic_float8_div 423 __kmpc_atomic_float8_div_cpt 424 __kmpc_atomic_float8_div_cpt_rev 425 __kmpc_atomic_float8_div_fp 426 __kmpc_atomic_float8_div_rev 427 __kmpc_atomic_float8_max 428 __kmpc_atomic_float8_max_cpt 429 __kmpc_atomic_float8_min 430 __kmpc_atomic_float8_min_cpt 431 __kmpc_atomic_float8_mul 432 __kmpc_atomic_float8_mul_cpt 433 __kmpc_atomic_float8_mul_fp 434 __kmpc_atomic_float8_rd 435 __kmpc_atomic_float8_sub 436 __kmpc_atomic_float8_sub_cpt 437 __kmpc_atomic_float8_sub_cpt_rev 438 __kmpc_atomic_float8_sub_fp 439 __kmpc_atomic_float8_sub_rev 440 __kmpc_atomic_float8_swp 441 __kmpc_atomic_float8_wr 442 __kmpc_atomic_float10_add 443 __kmpc_atomic_float10_add_cpt 444 __kmpc_atomic_float10_add_fp 445 __kmpc_atomic_float10_div 446 __kmpc_atomic_float10_div_cpt 447 __kmpc_atomic_float10_div_cpt_rev 448 __kmpc_atomic_float10_div_fp 449 __kmpc_atomic_float10_div_rev 450 __kmpc_atomic_float10_mul 451 __kmpc_atomic_float10_mul_cpt 452 __kmpc_atomic_float10_mul_fp 453 __kmpc_atomic_float10_rd 454 __kmpc_atomic_float10_sub 455 __kmpc_atomic_float10_sub_cpt 456 __kmpc_atomic_float10_sub_cpt_rev 457 __kmpc_atomic_float10_sub_fp 458 __kmpc_atomic_float10_sub_rev 459 __kmpc_atomic_float10_swp 460 __kmpc_atomic_float10_wr 461 __kmpc_atomic_float16_add 462 __kmpc_atomic_float16_add_cpt 463 __kmpc_atomic_float16_div 464 __kmpc_atomic_float16_div_cpt 465 __kmpc_atomic_float16_div_cpt_rev 466 __kmpc_atomic_float16_div_rev 467 __kmpc_atomic_float16_max 468 __kmpc_atomic_float16_max_cpt 469 __kmpc_atomic_float16_min 470 __kmpc_atomic_float16_min_cpt 471 __kmpc_atomic_float16_mul 472 __kmpc_atomic_float16_mul_cpt 473 __kmpc_atomic_float16_rd 474 __kmpc_atomic_float16_sub 475 __kmpc_atomic_float16_sub_cpt 476 __kmpc_atomic_float16_sub_cpt_rev 477 __kmpc_atomic_float16_sub_rev 478 __kmpc_atomic_float16_swp 479 __kmpc_atomic_float16_wr 480 @endcode 481 482 Functions for Complex types 483 --------------------------- 484 Functions for complex types whose component floating point variables are of size 485 4,8,10 or 16 bytes. The names here are based on the size of the component float, 486 *not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an 487 operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`. 488 489 @code 490 __kmpc_atomic_cmplx4_add 491 __kmpc_atomic_cmplx4_add_cmplx8 492 __kmpc_atomic_cmplx4_add_cpt 493 __kmpc_atomic_cmplx4_div 494 __kmpc_atomic_cmplx4_div_cmplx8 495 __kmpc_atomic_cmplx4_div_cpt 496 __kmpc_atomic_cmplx4_div_cpt_rev 497 __kmpc_atomic_cmplx4_div_rev 498 __kmpc_atomic_cmplx4_mul 499 __kmpc_atomic_cmplx4_mul_cmplx8 500 __kmpc_atomic_cmplx4_mul_cpt 501 __kmpc_atomic_cmplx4_rd 502 __kmpc_atomic_cmplx4_sub 503 __kmpc_atomic_cmplx4_sub_cmplx8 504 __kmpc_atomic_cmplx4_sub_cpt 505 __kmpc_atomic_cmplx4_sub_cpt_rev 506 __kmpc_atomic_cmplx4_sub_rev 507 __kmpc_atomic_cmplx4_swp 508 __kmpc_atomic_cmplx4_wr 509 __kmpc_atomic_cmplx8_add 510 __kmpc_atomic_cmplx8_add_cpt 511 __kmpc_atomic_cmplx8_div 512 __kmpc_atomic_cmplx8_div_cpt 513 __kmpc_atomic_cmplx8_div_cpt_rev 514 __kmpc_atomic_cmplx8_div_rev 515 __kmpc_atomic_cmplx8_mul 516 __kmpc_atomic_cmplx8_mul_cpt 517 __kmpc_atomic_cmplx8_rd 518 __kmpc_atomic_cmplx8_sub 519 __kmpc_atomic_cmplx8_sub_cpt 520 __kmpc_atomic_cmplx8_sub_cpt_rev 521 __kmpc_atomic_cmplx8_sub_rev 522 __kmpc_atomic_cmplx8_swp 523 __kmpc_atomic_cmplx8_wr 524 __kmpc_atomic_cmplx10_add 525 __kmpc_atomic_cmplx10_add_cpt 526 __kmpc_atomic_cmplx10_div 527 __kmpc_atomic_cmplx10_div_cpt 528 __kmpc_atomic_cmplx10_div_cpt_rev 529 __kmpc_atomic_cmplx10_div_rev 530 __kmpc_atomic_cmplx10_mul 531 __kmpc_atomic_cmplx10_mul_cpt 532 __kmpc_atomic_cmplx10_rd 533 __kmpc_atomic_cmplx10_sub 534 __kmpc_atomic_cmplx10_sub_cpt 535 __kmpc_atomic_cmplx10_sub_cpt_rev 536 __kmpc_atomic_cmplx10_sub_rev 537 __kmpc_atomic_cmplx10_swp 538 __kmpc_atomic_cmplx10_wr 539 __kmpc_atomic_cmplx16_add 540 __kmpc_atomic_cmplx16_add_cpt 541 __kmpc_atomic_cmplx16_div 542 __kmpc_atomic_cmplx16_div_cpt 543 __kmpc_atomic_cmplx16_div_cpt_rev 544 __kmpc_atomic_cmplx16_div_rev 545 __kmpc_atomic_cmplx16_mul 546 __kmpc_atomic_cmplx16_mul_cpt 547 __kmpc_atomic_cmplx16_rd 548 __kmpc_atomic_cmplx16_sub 549 __kmpc_atomic_cmplx16_sub_cpt 550 __kmpc_atomic_cmplx16_sub_cpt_rev 551 __kmpc_atomic_cmplx16_swp 552 __kmpc_atomic_cmplx16_wr 553 @endcode 554 */ 555 556 /*! 557 @ingroup ATOMIC_OPS 558 @{ 559 */ 560 561 /* 562 * Global vars 563 */ 564 565 #ifndef KMP_GOMP_COMPAT 566 int __kmp_atomic_mode = 1; // Intel perf 567 #else 568 int __kmp_atomic_mode = 2; // GOMP compatibility 569 #endif /* KMP_GOMP_COMPAT */ 570 571 KMP_ALIGN(128) 572 573 // Control access to all user coded atomics in Gnu compat mode 574 kmp_atomic_lock_t __kmp_atomic_lock; 575 // Control access to all user coded atomics for 1-byte fixed data types 576 kmp_atomic_lock_t __kmp_atomic_lock_1i; 577 // Control access to all user coded atomics for 2-byte fixed data types 578 kmp_atomic_lock_t __kmp_atomic_lock_2i; 579 // Control access to all user coded atomics for 4-byte fixed data types 580 kmp_atomic_lock_t __kmp_atomic_lock_4i; 581 // Control access to all user coded atomics for kmp_real32 data type 582 kmp_atomic_lock_t __kmp_atomic_lock_4r; 583 // Control access to all user coded atomics for 8-byte fixed data types 584 kmp_atomic_lock_t __kmp_atomic_lock_8i; 585 // Control access to all user coded atomics for kmp_real64 data type 586 kmp_atomic_lock_t __kmp_atomic_lock_8r; 587 // Control access to all user coded atomics for complex byte data type 588 kmp_atomic_lock_t __kmp_atomic_lock_8c; 589 // Control access to all user coded atomics for long double data type 590 kmp_atomic_lock_t __kmp_atomic_lock_10r; 591 // Control access to all user coded atomics for _Quad data type 592 kmp_atomic_lock_t __kmp_atomic_lock_16r; 593 // Control access to all user coded atomics for double complex data type 594 kmp_atomic_lock_t __kmp_atomic_lock_16c; 595 // Control access to all user coded atomics for long double complex type 596 kmp_atomic_lock_t __kmp_atomic_lock_20c; 597 // Control access to all user coded atomics for _Quad complex data type 598 kmp_atomic_lock_t __kmp_atomic_lock_32c; 599 600 /* 2007-03-02: 601 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug 602 on *_32 and *_32e. This is just a temporary workaround for the problem. It 603 seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines 604 in assembler language. */ 605 #define KMP_ATOMIC_VOLATILE volatile 606 607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD 608 609 static inline void operator+=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 610 lhs.q += rhs.q; 611 } 612 static inline void operator-=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 613 lhs.q -= rhs.q; 614 } 615 static inline void operator*=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 616 lhs.q *= rhs.q; 617 } 618 static inline void operator/=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 619 lhs.q /= rhs.q; 620 } 621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) { 622 return lhs.q < rhs.q; 623 } 624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) { 625 return lhs.q > rhs.q; 626 } 627 628 static inline void operator+=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 629 lhs.q += rhs.q; 630 } 631 static inline void operator-=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 632 lhs.q -= rhs.q; 633 } 634 static inline void operator*=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 635 lhs.q *= rhs.q; 636 } 637 static inline void operator/=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 638 lhs.q /= rhs.q; 639 } 640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) { 641 return lhs.q < rhs.q; 642 } 643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) { 644 return lhs.q > rhs.q; 645 } 646 647 static inline void operator+=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 648 lhs.q += rhs.q; 649 } 650 static inline void operator-=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 651 lhs.q -= rhs.q; 652 } 653 static inline void operator*=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 654 lhs.q *= rhs.q; 655 } 656 static inline void operator/=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 657 lhs.q /= rhs.q; 658 } 659 660 static inline void operator+=(kmp_cmplx128_a16_t &lhs, 661 kmp_cmplx128_a16_t &rhs) { 662 lhs.q += rhs.q; 663 } 664 static inline void operator-=(kmp_cmplx128_a16_t &lhs, 665 kmp_cmplx128_a16_t &rhs) { 666 lhs.q -= rhs.q; 667 } 668 static inline void operator*=(kmp_cmplx128_a16_t &lhs, 669 kmp_cmplx128_a16_t &rhs) { 670 lhs.q *= rhs.q; 671 } 672 static inline void operator/=(kmp_cmplx128_a16_t &lhs, 673 kmp_cmplx128_a16_t &rhs) { 674 lhs.q /= rhs.q; 675 } 676 677 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD 678 679 // ATOMIC implementation routines ----------------------------------------- 680 // One routine for each operation and operand type. 681 // All routines declarations looks like 682 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs ); 683 684 #define KMP_CHECK_GTID \ 685 if (gtid == KMP_GTID_UNKNOWN) { \ 686 gtid = __kmp_entry_gtid(); \ 687 } // check and get gtid when needed 688 689 // Beginning of a definition (provides name, parameters, gebug trace) 690 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 691 // fixed) 692 // OP_ID - operation identifier (add, sub, mul, ...) 693 // TYPE - operands' type 694 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 695 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 696 TYPE *lhs, TYPE rhs) { \ 697 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 698 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 699 700 // ------------------------------------------------------------------------ 701 // Lock variables used for critical sections for various size operands 702 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat 703 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char 704 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short 705 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int 706 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float 707 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int 708 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double 709 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex 710 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double 711 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad 712 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex 713 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex 714 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex 715 716 // ------------------------------------------------------------------------ 717 // Operation on *lhs, rhs bound by critical section 718 // OP - operator (it's supposed to contain an assignment) 719 // LCK_ID - lock identifier 720 // Note: don't check gtid as it should always be valid 721 // 1, 2-byte - expect valid parameter, other - check before this macro 722 #define OP_CRITICAL(OP, LCK_ID) \ 723 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 724 \ 725 (*lhs) OP(rhs); \ 726 \ 727 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 728 729 // ------------------------------------------------------------------------ 730 // For GNU compatibility, we may need to use a critical section, 731 // even though it is not required by the ISA. 732 // 733 // On IA-32 architecture, all atomic operations except for fixed 4 byte add, 734 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common 735 // critical section. On Intel(R) 64, all atomic operations are done with fetch 736 // and add or compare and exchange. Therefore, the FLAG parameter to this 737 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which 738 // require a critical section, where we predict that they will be implemented 739 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()). 740 // 741 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct, 742 // the FLAG parameter should always be 1. If we know that we will be using 743 // a critical section, then we want to make certain that we use the generic 744 // lock __kmp_atomic_lock to protect the atomic update, and not of of the 745 // locks that are specialized based upon the size or type of the data. 746 // 747 // If FLAG is 0, then we are relying on dead code elimination by the build 748 // compiler to get rid of the useless block of code, and save a needless 749 // branch at runtime. 750 751 #ifdef KMP_GOMP_COMPAT 752 #define OP_GOMP_CRITICAL(OP, FLAG) \ 753 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 754 KMP_CHECK_GTID; \ 755 OP_CRITICAL(OP, 0); \ 756 return; \ 757 } 758 #else 759 #define OP_GOMP_CRITICAL(OP, FLAG) 760 #endif /* KMP_GOMP_COMPAT */ 761 762 #if KMP_MIC 763 #define KMP_DO_PAUSE _mm_delay_32(1) 764 #else 765 #define KMP_DO_PAUSE KMP_CPU_PAUSE() 766 #endif /* KMP_MIC */ 767 768 // ------------------------------------------------------------------------ 769 // Operation on *lhs, rhs using "compare_and_store" routine 770 // TYPE - operands' type 771 // BITS - size in bits, used to distinguish low level calls 772 // OP - operator 773 #define OP_CMPXCHG(TYPE, BITS, OP) \ 774 { \ 775 TYPE old_value, new_value; \ 776 old_value = *(TYPE volatile *)lhs; \ 777 new_value = old_value OP rhs; \ 778 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 779 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 780 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 781 KMP_DO_PAUSE; \ 782 \ 783 old_value = *(TYPE volatile *)lhs; \ 784 new_value = old_value OP rhs; \ 785 } \ 786 } 787 788 #if USE_CMPXCHG_FIX 789 // 2007-06-25: 790 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32 791 // and win_32e are affected (I verified the asm). Compiler ignores the volatile 792 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the 793 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of 794 // the workaround. 795 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 796 { \ 797 struct _sss { \ 798 TYPE cmp; \ 799 kmp_int##BITS *vvv; \ 800 }; \ 801 struct _sss old_value, new_value; \ 802 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \ 803 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \ 804 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 805 new_value.cmp = old_value.cmp OP rhs; \ 806 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 807 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ 808 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \ 809 KMP_DO_PAUSE; \ 810 \ 811 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 812 new_value.cmp = old_value.cmp OP rhs; \ 813 } \ 814 } 815 // end of the first part of the workaround for C78287 816 #endif // USE_CMPXCHG_FIX 817 818 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 819 820 // ------------------------------------------------------------------------ 821 // X86 or X86_64: no alignment problems ==================================== 822 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 823 GOMP_FLAG) \ 824 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 825 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 826 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 827 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 828 } 829 // ------------------------------------------------------------------------- 830 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 831 GOMP_FLAG) \ 832 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 833 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 834 OP_CMPXCHG(TYPE, BITS, OP) \ 835 } 836 #if USE_CMPXCHG_FIX 837 // ------------------------------------------------------------------------- 838 // workaround for C78287 (complex(kind=4) data type) 839 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 840 MASK, GOMP_FLAG) \ 841 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 842 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 843 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 844 } 845 // end of the second part of the workaround for C78287 846 #endif // USE_CMPXCHG_FIX 847 848 #else 849 // ------------------------------------------------------------------------- 850 // Code for other architectures that don't handle unaligned accesses. 851 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 852 GOMP_FLAG) \ 853 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 854 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 855 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 856 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 857 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 858 } else { \ 859 KMP_CHECK_GTID; \ 860 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 861 } \ 862 } 863 // ------------------------------------------------------------------------- 864 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 865 GOMP_FLAG) \ 866 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 867 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 868 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 869 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 870 } else { \ 871 KMP_CHECK_GTID; \ 872 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 873 } \ 874 } 875 #if USE_CMPXCHG_FIX 876 // ------------------------------------------------------------------------- 877 // workaround for C78287 (complex(kind=4) data type) 878 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 879 MASK, GOMP_FLAG) \ 880 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 881 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 882 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 883 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 884 } else { \ 885 KMP_CHECK_GTID; \ 886 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 887 } \ 888 } 889 // end of the second part of the workaround for C78287 890 #endif // USE_CMPXCHG_FIX 891 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 892 893 // Routines for ATOMIC 4-byte operands addition and subtraction 894 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3, 895 0) // __kmpc_atomic_fixed4_add 896 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3, 897 0) // __kmpc_atomic_fixed4_sub 898 899 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3, 900 KMP_ARCH_X86) // __kmpc_atomic_float4_add 901 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3, 902 KMP_ARCH_X86) // __kmpc_atomic_float4_sub 903 904 // Routines for ATOMIC 8-byte operands addition and subtraction 905 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7, 906 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add 907 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7, 908 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub 909 910 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7, 911 KMP_ARCH_X86) // __kmpc_atomic_float8_add 912 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7, 913 KMP_ARCH_X86) // __kmpc_atomic_float8_sub 914 915 // ------------------------------------------------------------------------ 916 // Entries definition for integer operands 917 // TYPE_ID - operands type and size (fixed4, float4) 918 // OP_ID - operation identifier (add, sub, mul, ...) 919 // TYPE - operand type 920 // BITS - size in bits, used to distinguish low level calls 921 // OP - operator (used in critical section) 922 // LCK_ID - lock identifier, used to possibly distinguish lock variable 923 // MASK - used for alignment check 924 925 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG 926 // ------------------------------------------------------------------------ 927 // Routines for ATOMIC integer operands, other operators 928 // ------------------------------------------------------------------------ 929 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 930 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0, 931 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add 932 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0, 933 0) // __kmpc_atomic_fixed1_andb 934 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0, 935 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div 936 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0, 937 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div 938 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0, 939 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul 940 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0, 941 0) // __kmpc_atomic_fixed1_orb 942 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0, 943 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl 944 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0, 945 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr 946 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, 947 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr 948 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0, 949 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub 950 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0, 951 0) // __kmpc_atomic_fixed1_xor 952 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1, 953 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add 954 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1, 955 0) // __kmpc_atomic_fixed2_andb 956 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1, 957 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div 958 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1, 959 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div 960 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1, 961 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul 962 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1, 963 0) // __kmpc_atomic_fixed2_orb 964 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1, 965 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl 966 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1, 967 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr 968 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, 969 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr 970 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1, 971 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub 972 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1, 973 0) // __kmpc_atomic_fixed2_xor 974 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3, 975 0) // __kmpc_atomic_fixed4_andb 976 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3, 977 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div 978 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3, 979 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div 980 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3, 981 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul 982 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3, 983 0) // __kmpc_atomic_fixed4_orb 984 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3, 985 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl 986 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3, 987 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr 988 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, 989 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr 990 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3, 991 0) // __kmpc_atomic_fixed4_xor 992 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7, 993 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb 994 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7, 995 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div 996 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7, 997 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div 998 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7, 999 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul 1000 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7, 1001 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb 1002 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7, 1003 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl 1004 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7, 1005 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr 1006 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, 1007 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr 1008 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7, 1009 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor 1010 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3, 1011 KMP_ARCH_X86) // __kmpc_atomic_float4_div 1012 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3, 1013 KMP_ARCH_X86) // __kmpc_atomic_float4_mul 1014 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7, 1015 KMP_ARCH_X86) // __kmpc_atomic_float8_div 1016 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7, 1017 KMP_ARCH_X86) // __kmpc_atomic_float8_mul 1018 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 1019 1020 /* ------------------------------------------------------------------------ */ 1021 /* Routines for C/C++ Reduction operators && and || */ 1022 1023 // ------------------------------------------------------------------------ 1024 // Need separate macros for &&, || because there is no combined assignment 1025 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used 1026 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1027 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1028 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1029 OP_CRITICAL(= *lhs OP, LCK_ID) \ 1030 } 1031 1032 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1033 1034 // ------------------------------------------------------------------------ 1035 // X86 or X86_64: no alignment problems =================================== 1036 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1037 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1038 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1039 OP_CMPXCHG(TYPE, BITS, OP) \ 1040 } 1041 1042 #else 1043 // ------------------------------------------------------------------------ 1044 // Code for other architectures that don't handle unaligned accesses. 1045 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1046 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1047 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1048 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1049 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1050 } else { \ 1051 KMP_CHECK_GTID; \ 1052 OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \ 1053 } \ 1054 } 1055 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1056 1057 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0, 1058 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl 1059 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0, 1060 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl 1061 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1, 1062 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl 1063 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1, 1064 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl 1065 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3, 1066 0) // __kmpc_atomic_fixed4_andl 1067 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3, 1068 0) // __kmpc_atomic_fixed4_orl 1069 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7, 1070 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl 1071 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7, 1072 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl 1073 1074 /* ------------------------------------------------------------------------- */ 1075 /* Routines for Fortran operators that matched no one in C: */ 1076 /* MAX, MIN, .EQV., .NEQV. */ 1077 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */ 1078 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */ 1079 1080 // ------------------------------------------------------------------------- 1081 // MIN and MAX need separate macros 1082 // OP - operator to check if we need any actions? 1083 #define MIN_MAX_CRITSECT(OP, LCK_ID) \ 1084 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1085 \ 1086 if (*lhs OP rhs) { /* still need actions? */ \ 1087 *lhs = rhs; \ 1088 } \ 1089 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1090 1091 // ------------------------------------------------------------------------- 1092 #ifdef KMP_GOMP_COMPAT 1093 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \ 1094 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1095 KMP_CHECK_GTID; \ 1096 MIN_MAX_CRITSECT(OP, 0); \ 1097 return; \ 1098 } 1099 #else 1100 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) 1101 #endif /* KMP_GOMP_COMPAT */ 1102 1103 // ------------------------------------------------------------------------- 1104 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1105 { \ 1106 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1107 TYPE old_value; \ 1108 temp_val = *lhs; \ 1109 old_value = temp_val; \ 1110 while (old_value OP rhs && /* still need actions? */ \ 1111 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1112 (kmp_int##BITS *)lhs, \ 1113 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1114 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 1115 KMP_CPU_PAUSE(); \ 1116 temp_val = *lhs; \ 1117 old_value = temp_val; \ 1118 } \ 1119 } 1120 1121 // ------------------------------------------------------------------------- 1122 // 1-byte, 2-byte operands - use critical section 1123 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1124 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1125 if (*lhs OP rhs) { /* need actions? */ \ 1126 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1127 MIN_MAX_CRITSECT(OP, LCK_ID) \ 1128 } \ 1129 } 1130 1131 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1132 1133 // ------------------------------------------------------------------------- 1134 // X86 or X86_64: no alignment problems ==================================== 1135 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1136 GOMP_FLAG) \ 1137 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1138 if (*lhs OP rhs) { \ 1139 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1140 MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1141 } \ 1142 } 1143 1144 #else 1145 // ------------------------------------------------------------------------- 1146 // Code for other architectures that don't handle unaligned accesses. 1147 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1148 GOMP_FLAG) \ 1149 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1150 if (*lhs OP rhs) { \ 1151 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1152 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1153 MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1154 } else { \ 1155 KMP_CHECK_GTID; \ 1156 MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \ 1157 } \ 1158 } \ 1159 } 1160 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1161 1162 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0, 1163 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max 1164 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0, 1165 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min 1166 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1, 1167 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max 1168 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1, 1169 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min 1170 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3, 1171 0) // __kmpc_atomic_fixed4_max 1172 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3, 1173 0) // __kmpc_atomic_fixed4_min 1174 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7, 1175 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max 1176 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7, 1177 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min 1178 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3, 1179 KMP_ARCH_X86) // __kmpc_atomic_float4_max 1180 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3, 1181 KMP_ARCH_X86) // __kmpc_atomic_float4_min 1182 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7, 1183 KMP_ARCH_X86) // __kmpc_atomic_float8_max 1184 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7, 1185 KMP_ARCH_X86) // __kmpc_atomic_float8_min 1186 #if KMP_HAVE_QUAD 1187 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r, 1188 1) // __kmpc_atomic_float16_max 1189 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r, 1190 1) // __kmpc_atomic_float16_min 1191 #if (KMP_ARCH_X86) 1192 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r, 1193 1) // __kmpc_atomic_float16_max_a16 1194 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r, 1195 1) // __kmpc_atomic_float16_min_a16 1196 #endif // (KMP_ARCH_X86) 1197 #endif // KMP_HAVE_QUAD 1198 // ------------------------------------------------------------------------ 1199 // Need separate macros for .EQV. because of the need of complement (~) 1200 // OP ignored for critical sections, ^=~ used instead 1201 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1202 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1203 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \ 1204 OP_CRITICAL(^= ~, LCK_ID) /* send assignment and complement */ \ 1205 } 1206 1207 // ------------------------------------------------------------------------ 1208 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1209 // ------------------------------------------------------------------------ 1210 // X86 or X86_64: no alignment problems =================================== 1211 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1212 GOMP_FLAG) \ 1213 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1214 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \ 1215 OP_CMPXCHG(TYPE, BITS, OP) \ 1216 } 1217 // ------------------------------------------------------------------------ 1218 #else 1219 // ------------------------------------------------------------------------ 1220 // Code for other architectures that don't handle unaligned accesses. 1221 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1222 GOMP_FLAG) \ 1223 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1224 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) \ 1225 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1226 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1227 } else { \ 1228 KMP_CHECK_GTID; \ 1229 OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */ \ 1230 } \ 1231 } 1232 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1233 1234 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0, 1235 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv 1236 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1, 1237 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv 1238 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3, 1239 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv 1240 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7, 1241 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv 1242 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, 1243 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv 1244 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, 1245 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv 1246 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, 1247 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv 1248 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, 1249 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv 1250 1251 // ------------------------------------------------------------------------ 1252 // Routines for Extended types: long double, _Quad, complex flavours (use 1253 // critical section) 1254 // TYPE_ID, OP_ID, TYPE - detailed above 1255 // OP - operator 1256 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1257 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1258 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1259 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \ 1260 OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \ 1261 } 1262 1263 /* ------------------------------------------------------------------------- */ 1264 // routines for long double type 1265 ATOMIC_CRITICAL(float10, add, long double, +, 10r, 1266 1) // __kmpc_atomic_float10_add 1267 ATOMIC_CRITICAL(float10, sub, long double, -, 10r, 1268 1) // __kmpc_atomic_float10_sub 1269 ATOMIC_CRITICAL(float10, mul, long double, *, 10r, 1270 1) // __kmpc_atomic_float10_mul 1271 ATOMIC_CRITICAL(float10, div, long double, /, 10r, 1272 1) // __kmpc_atomic_float10_div 1273 #if KMP_HAVE_QUAD 1274 // routines for _Quad type 1275 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r, 1276 1) // __kmpc_atomic_float16_add 1277 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r, 1278 1) // __kmpc_atomic_float16_sub 1279 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r, 1280 1) // __kmpc_atomic_float16_mul 1281 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r, 1282 1) // __kmpc_atomic_float16_div 1283 #if (KMP_ARCH_X86) 1284 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r, 1285 1) // __kmpc_atomic_float16_add_a16 1286 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r, 1287 1) // __kmpc_atomic_float16_sub_a16 1288 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r, 1289 1) // __kmpc_atomic_float16_mul_a16 1290 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r, 1291 1) // __kmpc_atomic_float16_div_a16 1292 #endif // (KMP_ARCH_X86) 1293 #endif // KMP_HAVE_QUAD 1294 // routines for complex types 1295 1296 #if USE_CMPXCHG_FIX 1297 // workaround for C78287 (complex(kind=4) data type) 1298 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1299 1) // __kmpc_atomic_cmplx4_add 1300 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1301 1) // __kmpc_atomic_cmplx4_sub 1302 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1303 1) // __kmpc_atomic_cmplx4_mul 1304 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1305 1) // __kmpc_atomic_cmplx4_div 1306 // end of the workaround for C78287 1307 #else 1308 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add 1309 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub 1310 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul 1311 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div 1312 #endif // USE_CMPXCHG_FIX 1313 1314 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add 1315 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub 1316 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul 1317 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div 1318 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c, 1319 1) // __kmpc_atomic_cmplx10_add 1320 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c, 1321 1) // __kmpc_atomic_cmplx10_sub 1322 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c, 1323 1) // __kmpc_atomic_cmplx10_mul 1324 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c, 1325 1) // __kmpc_atomic_cmplx10_div 1326 #if KMP_HAVE_QUAD 1327 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c, 1328 1) // __kmpc_atomic_cmplx16_add 1329 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c, 1330 1) // __kmpc_atomic_cmplx16_sub 1331 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c, 1332 1) // __kmpc_atomic_cmplx16_mul 1333 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c, 1334 1) // __kmpc_atomic_cmplx16_div 1335 #if (KMP_ARCH_X86) 1336 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1337 1) // __kmpc_atomic_cmplx16_add_a16 1338 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1339 1) // __kmpc_atomic_cmplx16_sub_a16 1340 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1341 1) // __kmpc_atomic_cmplx16_mul_a16 1342 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1343 1) // __kmpc_atomic_cmplx16_div_a16 1344 #endif // (KMP_ARCH_X86) 1345 #endif // KMP_HAVE_QUAD 1346 1347 // OpenMP 4.0: x = expr binop x for non-commutative operations. 1348 // Supported only on IA-32 architecture and Intel(R) 64 1349 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1350 1351 // ------------------------------------------------------------------------ 1352 // Operation on *lhs, rhs bound by critical section 1353 // OP - operator (it's supposed to contain an assignment) 1354 // LCK_ID - lock identifier 1355 // Note: don't check gtid as it should always be valid 1356 // 1, 2-byte - expect valid parameter, other - check before this macro 1357 #define OP_CRITICAL_REV(OP, LCK_ID) \ 1358 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1359 \ 1360 (*lhs) = (rhs)OP(*lhs); \ 1361 \ 1362 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1363 1364 #ifdef KMP_GOMP_COMPAT 1365 #define OP_GOMP_CRITICAL_REV(OP, FLAG) \ 1366 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1367 KMP_CHECK_GTID; \ 1368 OP_CRITICAL_REV(OP, 0); \ 1369 return; \ 1370 } 1371 #else 1372 #define OP_GOMP_CRITICAL_REV(OP, FLAG) 1373 #endif /* KMP_GOMP_COMPAT */ 1374 1375 // Beginning of a definition (provides name, parameters, gebug trace) 1376 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1377 // fixed) 1378 // OP_ID - operation identifier (add, sub, mul, ...) 1379 // TYPE - operands' type 1380 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1381 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \ 1382 TYPE *lhs, TYPE rhs) { \ 1383 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1384 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid)); 1385 1386 // ------------------------------------------------------------------------ 1387 // Operation on *lhs, rhs using "compare_and_store" routine 1388 // TYPE - operands' type 1389 // BITS - size in bits, used to distinguish low level calls 1390 // OP - operator 1391 // Note: temp_val introduced in order to force the compiler to read 1392 // *lhs only once (w/o it the compiler reads *lhs twice) 1393 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1394 { \ 1395 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1396 TYPE old_value, new_value; \ 1397 temp_val = *lhs; \ 1398 old_value = temp_val; \ 1399 new_value = rhs OP old_value; \ 1400 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1401 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1402 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 1403 KMP_DO_PAUSE; \ 1404 \ 1405 temp_val = *lhs; \ 1406 old_value = temp_val; \ 1407 new_value = rhs OP old_value; \ 1408 } \ 1409 } 1410 1411 // ------------------------------------------------------------------------- 1412 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \ 1413 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1414 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1415 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1416 } 1417 1418 // ------------------------------------------------------------------------ 1419 // Entries definition for integer operands 1420 // TYPE_ID - operands type and size (fixed4, float4) 1421 // OP_ID - operation identifier (add, sub, mul, ...) 1422 // TYPE - operand type 1423 // BITS - size in bits, used to distinguish low level calls 1424 // OP - operator (used in critical section) 1425 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1426 1427 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG 1428 // ------------------------------------------------------------------------ 1429 // Routines for ATOMIC integer operands, other operators 1430 // ------------------------------------------------------------------------ 1431 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG 1432 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i, 1433 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev 1434 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i, 1435 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev 1436 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i, 1437 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev 1438 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i, 1439 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev 1440 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i, 1441 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev 1442 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i, 1443 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev 1444 1445 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i, 1446 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev 1447 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i, 1448 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev 1449 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i, 1450 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev 1451 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i, 1452 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev 1453 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1454 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev 1455 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i, 1456 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev 1457 1458 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i, 1459 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev 1460 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i, 1461 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev 1462 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i, 1463 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev 1464 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i, 1465 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev 1466 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i, 1467 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev 1468 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i, 1469 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev 1470 1471 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i, 1472 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev 1473 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i, 1474 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev 1475 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i, 1476 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev 1477 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i, 1478 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev 1479 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i, 1480 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev 1481 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i, 1482 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev 1483 1484 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r, 1485 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev 1486 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r, 1487 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev 1488 1489 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r, 1490 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev 1491 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r, 1492 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev 1493 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG 1494 1495 // ------------------------------------------------------------------------ 1496 // Routines for Extended types: long double, _Quad, complex flavours (use 1497 // critical section) 1498 // TYPE_ID, OP_ID, TYPE - detailed above 1499 // OP - operator 1500 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1501 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1502 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1503 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1504 OP_CRITICAL_REV(OP, LCK_ID) \ 1505 } 1506 1507 /* ------------------------------------------------------------------------- */ 1508 // routines for long double type 1509 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r, 1510 1) // __kmpc_atomic_float10_sub_rev 1511 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r, 1512 1) // __kmpc_atomic_float10_div_rev 1513 #if KMP_HAVE_QUAD 1514 // routines for _Quad type 1515 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r, 1516 1) // __kmpc_atomic_float16_sub_rev 1517 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r, 1518 1) // __kmpc_atomic_float16_div_rev 1519 #if (KMP_ARCH_X86) 1520 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r, 1521 1) // __kmpc_atomic_float16_sub_a16_rev 1522 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r, 1523 1) // __kmpc_atomic_float16_div_a16_rev 1524 #endif // KMP_ARCH_X86 1525 #endif // KMP_HAVE_QUAD 1526 1527 // routines for complex types 1528 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c, 1529 1) // __kmpc_atomic_cmplx4_sub_rev 1530 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c, 1531 1) // __kmpc_atomic_cmplx4_div_rev 1532 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c, 1533 1) // __kmpc_atomic_cmplx8_sub_rev 1534 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c, 1535 1) // __kmpc_atomic_cmplx8_div_rev 1536 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c, 1537 1) // __kmpc_atomic_cmplx10_sub_rev 1538 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c, 1539 1) // __kmpc_atomic_cmplx10_div_rev 1540 #if KMP_HAVE_QUAD 1541 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c, 1542 1) // __kmpc_atomic_cmplx16_sub_rev 1543 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c, 1544 1) // __kmpc_atomic_cmplx16_div_rev 1545 #if (KMP_ARCH_X86) 1546 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1547 1) // __kmpc_atomic_cmplx16_sub_a16_rev 1548 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1549 1) // __kmpc_atomic_cmplx16_div_a16_rev 1550 #endif // KMP_ARCH_X86 1551 #endif // KMP_HAVE_QUAD 1552 1553 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 1554 // End of OpenMP 4.0: x = expr binop x for non-commutative operations. 1555 1556 /* ------------------------------------------------------------------------ */ 1557 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */ 1558 /* Note: in order to reduce the total number of types combinations */ 1559 /* it is supposed that compiler converts RHS to longest floating type,*/ 1560 /* that is _Quad, before call to any of these routines */ 1561 /* Conversion to _Quad will be done by the compiler during calculation, */ 1562 /* conversion back to TYPE - before the assignment, like: */ 1563 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */ 1564 /* Performance penalty expected because of SW emulation use */ 1565 /* ------------------------------------------------------------------------ */ 1566 1567 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1568 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 1569 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \ 1570 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1571 KA_TRACE(100, \ 1572 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 1573 gtid)); 1574 1575 // ------------------------------------------------------------------------- 1576 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \ 1577 GOMP_FLAG) \ 1578 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1579 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \ 1580 OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \ 1581 } 1582 1583 // ------------------------------------------------------------------------- 1584 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1585 // ------------------------------------------------------------------------- 1586 // X86 or X86_64: no alignment problems ==================================== 1587 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1588 LCK_ID, MASK, GOMP_FLAG) \ 1589 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1590 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1591 OP_CMPXCHG(TYPE, BITS, OP) \ 1592 } 1593 // ------------------------------------------------------------------------- 1594 #else 1595 // ------------------------------------------------------------------------ 1596 // Code for other architectures that don't handle unaligned accesses. 1597 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1598 LCK_ID, MASK, GOMP_FLAG) \ 1599 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1600 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1601 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1602 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1603 } else { \ 1604 KMP_CHECK_GTID; \ 1605 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 1606 } \ 1607 } 1608 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1609 1610 // ------------------------------------------------------------------------- 1611 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1612 // ------------------------------------------------------------------------- 1613 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 1614 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 1615 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1616 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1617 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1618 } 1619 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 1620 LCK_ID, GOMP_FLAG) \ 1621 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1622 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1623 OP_CRITICAL_REV(OP, LCK_ID) \ 1624 } 1625 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1626 1627 // RHS=float8 1628 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, 1629 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8 1630 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, 1631 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8 1632 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, 1633 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8 1634 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, 1635 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8 1636 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 1637 0) // __kmpc_atomic_fixed4_mul_float8 1638 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 1639 0) // __kmpc_atomic_fixed4_div_float8 1640 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, 1641 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8 1642 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, 1643 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8 1644 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, 1645 KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8 1646 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, 1647 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8 1648 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, 1649 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8 1650 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, 1651 KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8 1652 1653 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not 1654 // use them) 1655 #if KMP_HAVE_QUAD 1656 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0, 1657 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp 1658 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0, 1659 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp 1660 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, 1661 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp 1662 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0, 1663 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp 1664 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, 1665 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp 1666 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0, 1667 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp 1668 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0, 1669 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp 1670 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, 1671 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp 1672 1673 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1, 1674 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp 1675 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1, 1676 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp 1677 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, 1678 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp 1679 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1, 1680 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp 1681 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, 1682 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp 1683 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1, 1684 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp 1685 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1, 1686 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp 1687 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, 1688 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp 1689 1690 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 1691 0) // __kmpc_atomic_fixed4_add_fp 1692 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3, 1693 0) // __kmpc_atomic_fixed4u_add_fp 1694 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 1695 0) // __kmpc_atomic_fixed4_sub_fp 1696 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3, 1697 0) // __kmpc_atomic_fixed4u_sub_fp 1698 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 1699 0) // __kmpc_atomic_fixed4_mul_fp 1700 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3, 1701 0) // __kmpc_atomic_fixed4u_mul_fp 1702 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 1703 0) // __kmpc_atomic_fixed4_div_fp 1704 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 1705 0) // __kmpc_atomic_fixed4u_div_fp 1706 1707 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, 1708 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp 1709 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7, 1710 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp 1711 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, 1712 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp 1713 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7, 1714 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp 1715 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, 1716 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp 1717 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7, 1718 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp 1719 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, 1720 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp 1721 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, 1722 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp 1723 1724 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, 1725 KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp 1726 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, 1727 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp 1728 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, 1729 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp 1730 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, 1731 KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp 1732 1733 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, 1734 KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp 1735 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, 1736 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp 1737 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, 1738 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp 1739 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, 1740 KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp 1741 1742 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r, 1743 1) // __kmpc_atomic_float10_add_fp 1744 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r, 1745 1) // __kmpc_atomic_float10_sub_fp 1746 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r, 1747 1) // __kmpc_atomic_float10_mul_fp 1748 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r, 1749 1) // __kmpc_atomic_float10_div_fp 1750 1751 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1752 // Reverse operations 1753 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0, 1754 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp 1755 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0, 1756 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp 1757 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0, 1758 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp 1759 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0, 1760 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp 1761 1762 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1, 1763 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp 1764 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1, 1765 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp 1766 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1, 1767 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp 1768 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1, 1769 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp 1770 1771 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1772 0) // __kmpc_atomic_fixed4_sub_rev_fp 1773 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1774 0) // __kmpc_atomic_fixed4u_sub_rev_fp 1775 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3, 1776 0) // __kmpc_atomic_fixed4_div_rev_fp 1777 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3, 1778 0) // __kmpc_atomic_fixed4u_div_rev_fp 1779 1780 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1781 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp 1782 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1783 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp 1784 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7, 1785 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp 1786 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7, 1787 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp 1788 1789 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3, 1790 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp 1791 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3, 1792 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp 1793 1794 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7, 1795 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp 1796 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7, 1797 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp 1798 1799 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r, 1800 1) // __kmpc_atomic_float10_sub_rev_fp 1801 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r, 1802 1) // __kmpc_atomic_float10_div_rev_fp 1803 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1804 1805 #endif // KMP_HAVE_QUAD 1806 1807 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1808 // ------------------------------------------------------------------------ 1809 // X86 or X86_64: no alignment problems ==================================== 1810 #if USE_CMPXCHG_FIX 1811 // workaround for C78287 (complex(kind=4) data type) 1812 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1813 LCK_ID, MASK, GOMP_FLAG) \ 1814 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1815 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1816 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 1817 } 1818 // end of the second part of the workaround for C78287 1819 #else 1820 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1821 LCK_ID, MASK, GOMP_FLAG) \ 1822 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1823 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1824 OP_CMPXCHG(TYPE, BITS, OP) \ 1825 } 1826 #endif // USE_CMPXCHG_FIX 1827 #else 1828 // ------------------------------------------------------------------------ 1829 // Code for other architectures that don't handle unaligned accesses. 1830 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1831 LCK_ID, MASK, GOMP_FLAG) \ 1832 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1833 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1834 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1835 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1836 } else { \ 1837 KMP_CHECK_GTID; \ 1838 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 1839 } \ 1840 } 1841 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1842 1843 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 1844 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8 1845 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 1846 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8 1847 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 1848 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8 1849 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 1850 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8 1851 1852 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 1853 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1854 1855 // ------------------------------------------------------------------------ 1856 // Atomic READ routines 1857 1858 // ------------------------------------------------------------------------ 1859 // Beginning of a definition (provides name, parameters, gebug trace) 1860 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1861 // fixed) 1862 // OP_ID - operation identifier (add, sub, mul, ...) 1863 // TYPE - operands' type 1864 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1865 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 1866 TYPE *loc) { \ 1867 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1868 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 1869 1870 // ------------------------------------------------------------------------ 1871 // Operation on *lhs, rhs using "compare_and_store_ret" routine 1872 // TYPE - operands' type 1873 // BITS - size in bits, used to distinguish low level calls 1874 // OP - operator 1875 // Note: temp_val introduced in order to force the compiler to read 1876 // *lhs only once (w/o it the compiler reads *lhs twice) 1877 // TODO: check if it is still necessary 1878 // Return old value regardless of the result of "compare & swap# operation 1879 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1880 { \ 1881 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1882 union f_i_union { \ 1883 TYPE f_val; \ 1884 kmp_int##BITS i_val; \ 1885 }; \ 1886 union f_i_union old_value; \ 1887 temp_val = *loc; \ 1888 old_value.f_val = temp_val; \ 1889 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \ 1890 (kmp_int##BITS *)loc, \ 1891 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \ 1892 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \ 1893 new_value = old_value.f_val; \ 1894 return new_value; \ 1895 } 1896 1897 // ------------------------------------------------------------------------- 1898 // Operation on *lhs, rhs bound by critical section 1899 // OP - operator (it's supposed to contain an assignment) 1900 // LCK_ID - lock identifier 1901 // Note: don't check gtid as it should always be valid 1902 // 1, 2-byte - expect valid parameter, other - check before this macro 1903 #define OP_CRITICAL_READ(OP, LCK_ID) \ 1904 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1905 \ 1906 new_value = (*loc); \ 1907 \ 1908 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1909 1910 // ------------------------------------------------------------------------- 1911 #ifdef KMP_GOMP_COMPAT 1912 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \ 1913 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1914 KMP_CHECK_GTID; \ 1915 OP_CRITICAL_READ(OP, 0); \ 1916 return new_value; \ 1917 } 1918 #else 1919 #define OP_GOMP_CRITICAL_READ(OP, FLAG) 1920 #endif /* KMP_GOMP_COMPAT */ 1921 1922 // ------------------------------------------------------------------------- 1923 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1924 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1925 TYPE new_value; \ 1926 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1927 new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \ 1928 return new_value; \ 1929 } 1930 // ------------------------------------------------------------------------- 1931 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1932 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1933 TYPE new_value; \ 1934 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1935 OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1936 } 1937 // ------------------------------------------------------------------------ 1938 // Routines for Extended types: long double, _Quad, complex flavours (use 1939 // critical section) 1940 // TYPE_ID, OP_ID, TYPE - detailed above 1941 // OP - operator 1942 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1943 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1944 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1945 TYPE new_value; \ 1946 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \ 1947 OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \ 1948 return new_value; \ 1949 } 1950 1951 // ------------------------------------------------------------------------ 1952 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return 1953 // value doesn't work. 1954 // Let's return the read value through the additional parameter. 1955 #if (KMP_OS_WINDOWS) 1956 1957 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \ 1958 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1959 \ 1960 (*out) = (*loc); \ 1961 \ 1962 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1963 // ------------------------------------------------------------------------ 1964 #ifdef KMP_GOMP_COMPAT 1965 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \ 1966 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1967 KMP_CHECK_GTID; \ 1968 OP_CRITICAL_READ_WRK(OP, 0); \ 1969 } 1970 #else 1971 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) 1972 #endif /* KMP_GOMP_COMPAT */ 1973 // ------------------------------------------------------------------------ 1974 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 1975 void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \ 1976 TYPE *loc) { \ 1977 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1978 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 1979 1980 // ------------------------------------------------------------------------ 1981 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1982 ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 1983 OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \ 1984 OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \ 1985 } 1986 1987 #endif // KMP_OS_WINDOWS 1988 1989 // ------------------------------------------------------------------------ 1990 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 1991 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd 1992 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +, 1993 KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd 1994 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +, 1995 KMP_ARCH_X86) // __kmpc_atomic_float4_rd 1996 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +, 1997 KMP_ARCH_X86) // __kmpc_atomic_float8_rd 1998 1999 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic 2000 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +, 2001 KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd 2002 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +, 2003 KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd 2004 2005 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r, 2006 1) // __kmpc_atomic_float10_rd 2007 #if KMP_HAVE_QUAD 2008 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r, 2009 1) // __kmpc_atomic_float16_rd 2010 #endif // KMP_HAVE_QUAD 2011 2012 // Fix for CQ220361 on Windows* OS 2013 #if (KMP_OS_WINDOWS) 2014 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c, 2015 1) // __kmpc_atomic_cmplx4_rd 2016 #else 2017 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c, 2018 1) // __kmpc_atomic_cmplx4_rd 2019 #endif // (KMP_OS_WINDOWS) 2020 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c, 2021 1) // __kmpc_atomic_cmplx8_rd 2022 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c, 2023 1) // __kmpc_atomic_cmplx10_rd 2024 #if KMP_HAVE_QUAD 2025 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c, 2026 1) // __kmpc_atomic_cmplx16_rd 2027 #if (KMP_ARCH_X86) 2028 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r, 2029 1) // __kmpc_atomic_float16_a16_rd 2030 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 2031 1) // __kmpc_atomic_cmplx16_a16_rd 2032 #endif // (KMP_ARCH_X86) 2033 #endif // KMP_HAVE_QUAD 2034 2035 // ------------------------------------------------------------------------ 2036 // Atomic WRITE routines 2037 2038 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2039 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2040 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2041 KMP_XCHG_FIXED##BITS(lhs, rhs); \ 2042 } 2043 // ------------------------------------------------------------------------ 2044 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2045 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2046 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2047 KMP_XCHG_REAL##BITS(lhs, rhs); \ 2048 } 2049 2050 // ------------------------------------------------------------------------ 2051 // Operation on *lhs, rhs using "compare_and_store" routine 2052 // TYPE - operands' type 2053 // BITS - size in bits, used to distinguish low level calls 2054 // OP - operator 2055 // Note: temp_val introduced in order to force the compiler to read 2056 // *lhs only once (w/o it the compiler reads *lhs twice) 2057 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2058 { \ 2059 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2060 TYPE old_value, new_value; \ 2061 temp_val = *lhs; \ 2062 old_value = temp_val; \ 2063 new_value = rhs; \ 2064 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2065 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2066 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2067 KMP_CPU_PAUSE(); \ 2068 \ 2069 temp_val = *lhs; \ 2070 old_value = temp_val; \ 2071 new_value = rhs; \ 2072 } \ 2073 } 2074 2075 // ------------------------------------------------------------------------- 2076 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2077 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2078 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2079 OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2080 } 2081 2082 // ------------------------------------------------------------------------ 2083 // Routines for Extended types: long double, _Quad, complex flavours (use 2084 // critical section) 2085 // TYPE_ID, OP_ID, TYPE - detailed above 2086 // OP - operator 2087 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2088 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2089 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2090 OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \ 2091 OP_CRITICAL(OP, LCK_ID) /* send assignment */ \ 2092 } 2093 // ------------------------------------------------------------------------- 2094 2095 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =, 2096 KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr 2097 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =, 2098 KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr 2099 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =, 2100 KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr 2101 #if (KMP_ARCH_X86) 2102 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =, 2103 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2104 #else 2105 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =, 2106 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2107 #endif // (KMP_ARCH_X86) 2108 2109 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =, 2110 KMP_ARCH_X86) // __kmpc_atomic_float4_wr 2111 #if (KMP_ARCH_X86) 2112 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =, 2113 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2114 #else 2115 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =, 2116 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2117 #endif // (KMP_ARCH_X86) 2118 2119 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r, 2120 1) // __kmpc_atomic_float10_wr 2121 #if KMP_HAVE_QUAD 2122 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r, 2123 1) // __kmpc_atomic_float16_wr 2124 #endif // KMP_HAVE_QUAD 2125 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr 2126 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c, 2127 1) // __kmpc_atomic_cmplx8_wr 2128 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c, 2129 1) // __kmpc_atomic_cmplx10_wr 2130 #if KMP_HAVE_QUAD 2131 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c, 2132 1) // __kmpc_atomic_cmplx16_wr 2133 #if (KMP_ARCH_X86) 2134 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r, 2135 1) // __kmpc_atomic_float16_a16_wr 2136 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 2137 1) // __kmpc_atomic_cmplx16_a16_wr 2138 #endif // (KMP_ARCH_X86) 2139 #endif // KMP_HAVE_QUAD 2140 2141 // ------------------------------------------------------------------------ 2142 // Atomic CAPTURE routines 2143 2144 // Beginning of a definition (provides name, parameters, gebug trace) 2145 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2146 // fixed) 2147 // OP_ID - operation identifier (add, sub, mul, ...) 2148 // TYPE - operands' type 2149 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 2150 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 2151 TYPE *lhs, TYPE rhs, int flag) { \ 2152 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2153 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2154 2155 // ------------------------------------------------------------------------- 2156 // Operation on *lhs, rhs bound by critical section 2157 // OP - operator (it's supposed to contain an assignment) 2158 // LCK_ID - lock identifier 2159 // Note: don't check gtid as it should always be valid 2160 // 1, 2-byte - expect valid parameter, other - check before this macro 2161 #define OP_CRITICAL_CPT(OP, LCK_ID) \ 2162 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2163 \ 2164 if (flag) { \ 2165 (*lhs) OP rhs; \ 2166 new_value = (*lhs); \ 2167 } else { \ 2168 new_value = (*lhs); \ 2169 (*lhs) OP rhs; \ 2170 } \ 2171 \ 2172 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2173 return new_value; 2174 2175 // ------------------------------------------------------------------------ 2176 #ifdef KMP_GOMP_COMPAT 2177 #define OP_GOMP_CRITICAL_CPT(OP, FLAG) \ 2178 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2179 KMP_CHECK_GTID; \ 2180 OP_CRITICAL_CPT(OP## =, 0); \ 2181 } 2182 #else 2183 #define OP_GOMP_CRITICAL_CPT(OP, FLAG) 2184 #endif /* KMP_GOMP_COMPAT */ 2185 2186 // ------------------------------------------------------------------------ 2187 // Operation on *lhs, rhs using "compare_and_store" routine 2188 // TYPE - operands' type 2189 // BITS - size in bits, used to distinguish low level calls 2190 // OP - operator 2191 // Note: temp_val introduced in order to force the compiler to read 2192 // *lhs only once (w/o it the compiler reads *lhs twice) 2193 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2194 { \ 2195 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2196 TYPE old_value, new_value; \ 2197 temp_val = *lhs; \ 2198 old_value = temp_val; \ 2199 new_value = old_value OP rhs; \ 2200 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2201 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2202 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2203 KMP_CPU_PAUSE(); \ 2204 \ 2205 temp_val = *lhs; \ 2206 old_value = temp_val; \ 2207 new_value = old_value OP rhs; \ 2208 } \ 2209 if (flag) { \ 2210 return new_value; \ 2211 } else \ 2212 return old_value; \ 2213 } 2214 2215 // ------------------------------------------------------------------------- 2216 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2217 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2218 TYPE new_value; \ 2219 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ 2220 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2221 } 2222 2223 // ------------------------------------------------------------------------- 2224 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2225 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2226 TYPE old_value, new_value; \ 2227 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ 2228 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 2229 old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 2230 if (flag) { \ 2231 return old_value OP rhs; \ 2232 } else \ 2233 return old_value; \ 2234 } 2235 // ------------------------------------------------------------------------- 2236 2237 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +, 2238 0) // __kmpc_atomic_fixed4_add_cpt 2239 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -, 2240 0) // __kmpc_atomic_fixed4_sub_cpt 2241 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +, 2242 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt 2243 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -, 2244 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt 2245 2246 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +, 2247 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt 2248 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -, 2249 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt 2250 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +, 2251 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt 2252 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -, 2253 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt 2254 2255 // ------------------------------------------------------------------------ 2256 // Entries definition for integer operands 2257 // TYPE_ID - operands type and size (fixed4, float4) 2258 // OP_ID - operation identifier (add, sub, mul, ...) 2259 // TYPE - operand type 2260 // BITS - size in bits, used to distinguish low level calls 2261 // OP - operator (used in critical section) 2262 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG 2263 // ------------------------------------------------------------------------ 2264 // Routines for ATOMIC integer operands, other operators 2265 // ------------------------------------------------------------------------ 2266 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2267 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +, 2268 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt 2269 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &, 2270 0) // __kmpc_atomic_fixed1_andb_cpt 2271 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /, 2272 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt 2273 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /, 2274 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt 2275 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *, 2276 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt 2277 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |, 2278 0) // __kmpc_atomic_fixed1_orb_cpt 2279 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<, 2280 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt 2281 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>, 2282 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt 2283 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>, 2284 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt 2285 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -, 2286 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt 2287 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^, 2288 0) // __kmpc_atomic_fixed1_xor_cpt 2289 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +, 2290 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt 2291 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &, 2292 0) // __kmpc_atomic_fixed2_andb_cpt 2293 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /, 2294 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt 2295 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /, 2296 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt 2297 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *, 2298 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt 2299 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |, 2300 0) // __kmpc_atomic_fixed2_orb_cpt 2301 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<, 2302 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt 2303 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>, 2304 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt 2305 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>, 2306 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt 2307 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -, 2308 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt 2309 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^, 2310 0) // __kmpc_atomic_fixed2_xor_cpt 2311 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &, 2312 0) // __kmpc_atomic_fixed4_andb_cpt 2313 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /, 2314 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt 2315 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /, 2316 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt 2317 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *, 2318 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt 2319 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |, 2320 0) // __kmpc_atomic_fixed4_orb_cpt 2321 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<, 2322 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt 2323 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>, 2324 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt 2325 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>, 2326 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt 2327 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^, 2328 0) // __kmpc_atomic_fixed4_xor_cpt 2329 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &, 2330 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt 2331 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /, 2332 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt 2333 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /, 2334 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt 2335 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *, 2336 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt 2337 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |, 2338 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt 2339 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<, 2340 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt 2341 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>, 2342 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt 2343 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>, 2344 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt 2345 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^, 2346 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt 2347 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /, 2348 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt 2349 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *, 2350 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt 2351 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /, 2352 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt 2353 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *, 2354 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt 2355 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2356 2357 // CAPTURE routines for mixed types RHS=float16 2358 #if KMP_HAVE_QUAD 2359 2360 // Beginning of a definition (provides name, parameters, gebug trace) 2361 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2362 // fixed) 2363 // OP_ID - operation identifier (add, sub, mul, ...) 2364 // TYPE - operands' type 2365 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2366 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 2367 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \ 2368 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2369 KA_TRACE(100, \ 2370 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 2371 gtid)); 2372 2373 // ------------------------------------------------------------------------- 2374 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 2375 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 2376 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2377 TYPE new_value; \ 2378 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ 2379 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2380 } 2381 2382 // ------------------------------------------------------------------------- 2383 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 2384 LCK_ID, GOMP_FLAG) \ 2385 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2386 TYPE new_value; \ 2387 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \ 2388 OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \ 2389 } 2390 2391 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0, 2392 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp 2393 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0, 2394 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp 2395 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2396 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp 2397 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2398 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp 2399 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2400 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp 2401 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2402 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp 2403 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0, 2404 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp 2405 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0, 2406 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp 2407 2408 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1, 2409 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp 2410 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1, 2411 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp 2412 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2413 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp 2414 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2415 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp 2416 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2417 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp 2418 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2419 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp 2420 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1, 2421 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp 2422 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1, 2423 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp 2424 2425 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2426 0) // __kmpc_atomic_fixed4_add_cpt_fp 2427 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2428 0) // __kmpc_atomic_fixed4u_add_cpt_fp 2429 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2430 0) // __kmpc_atomic_fixed4_sub_cpt_fp 2431 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2432 0) // __kmpc_atomic_fixed4u_sub_cpt_fp 2433 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2434 0) // __kmpc_atomic_fixed4_mul_cpt_fp 2435 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2436 0) // __kmpc_atomic_fixed4u_mul_cpt_fp 2437 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2438 0) // __kmpc_atomic_fixed4_div_cpt_fp 2439 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2440 0) // __kmpc_atomic_fixed4u_div_cpt_fp 2441 2442 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2443 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp 2444 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2445 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp 2446 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2447 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp 2448 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2449 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp 2450 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2451 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp 2452 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2453 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp 2454 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2455 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp 2456 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2457 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp 2458 2459 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3, 2460 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp 2461 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3, 2462 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp 2463 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3, 2464 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp 2465 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3, 2466 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp 2467 2468 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7, 2469 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp 2470 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7, 2471 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp 2472 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7, 2473 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp 2474 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7, 2475 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp 2476 2477 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r, 2478 1) // __kmpc_atomic_float10_add_cpt_fp 2479 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r, 2480 1) // __kmpc_atomic_float10_sub_cpt_fp 2481 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r, 2482 1) // __kmpc_atomic_float10_mul_cpt_fp 2483 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r, 2484 1) // __kmpc_atomic_float10_div_cpt_fp 2485 2486 #endif // KMP_HAVE_QUAD 2487 2488 // ------------------------------------------------------------------------ 2489 // Routines for C/C++ Reduction operators && and || 2490 2491 // ------------------------------------------------------------------------- 2492 // Operation on *lhs, rhs bound by critical section 2493 // OP - operator (it's supposed to contain an assignment) 2494 // LCK_ID - lock identifier 2495 // Note: don't check gtid as it should always be valid 2496 // 1, 2-byte - expect valid parameter, other - check before this macro 2497 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \ 2498 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2499 \ 2500 if (flag) { \ 2501 new_value OP rhs; \ 2502 } else \ 2503 new_value = (*lhs); \ 2504 \ 2505 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 2506 2507 // ------------------------------------------------------------------------ 2508 #ifdef KMP_GOMP_COMPAT 2509 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \ 2510 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2511 KMP_CHECK_GTID; \ 2512 OP_CRITICAL_L_CPT(OP, 0); \ 2513 return new_value; \ 2514 } 2515 #else 2516 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) 2517 #endif /* KMP_GOMP_COMPAT */ 2518 2519 // ------------------------------------------------------------------------ 2520 // Need separate macros for &&, || because there is no combined assignment 2521 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2522 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2523 TYPE new_value; \ 2524 OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \ 2525 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2526 } 2527 2528 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&, 2529 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt 2530 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||, 2531 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt 2532 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&, 2533 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt 2534 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||, 2535 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt 2536 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&, 2537 0) // __kmpc_atomic_fixed4_andl_cpt 2538 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||, 2539 0) // __kmpc_atomic_fixed4_orl_cpt 2540 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&, 2541 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt 2542 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||, 2543 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt 2544 2545 // ------------------------------------------------------------------------- 2546 // Routines for Fortran operators that matched no one in C: 2547 // MAX, MIN, .EQV., .NEQV. 2548 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt 2549 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt 2550 2551 // ------------------------------------------------------------------------- 2552 // MIN and MAX need separate macros 2553 // OP - operator to check if we need any actions? 2554 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2555 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2556 \ 2557 if (*lhs OP rhs) { /* still need actions? */ \ 2558 old_value = *lhs; \ 2559 *lhs = rhs; \ 2560 if (flag) \ 2561 new_value = rhs; \ 2562 else \ 2563 new_value = old_value; \ 2564 } else { \ 2565 new_value = *lhs; \ 2566 } \ 2567 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2568 return new_value; 2569 2570 // ------------------------------------------------------------------------- 2571 #ifdef KMP_GOMP_COMPAT 2572 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \ 2573 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2574 KMP_CHECK_GTID; \ 2575 MIN_MAX_CRITSECT_CPT(OP, 0); \ 2576 } 2577 #else 2578 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) 2579 #endif /* KMP_GOMP_COMPAT */ 2580 2581 // ------------------------------------------------------------------------- 2582 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2583 { \ 2584 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2585 /*TYPE old_value; */ \ 2586 temp_val = *lhs; \ 2587 old_value = temp_val; \ 2588 while (old_value OP rhs && /* still need actions? */ \ 2589 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2590 (kmp_int##BITS *)lhs, \ 2591 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2592 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 2593 KMP_CPU_PAUSE(); \ 2594 temp_val = *lhs; \ 2595 old_value = temp_val; \ 2596 } \ 2597 if (flag) \ 2598 return rhs; \ 2599 else \ 2600 return old_value; \ 2601 } 2602 2603 // ------------------------------------------------------------------------- 2604 // 1-byte, 2-byte operands - use critical section 2605 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2606 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2607 TYPE new_value, old_value; \ 2608 if (*lhs OP rhs) { /* need actions? */ \ 2609 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2610 MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2611 } \ 2612 return *lhs; \ 2613 } 2614 2615 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2616 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2617 TYPE new_value, old_value; \ 2618 if (*lhs OP rhs) { \ 2619 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2620 MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2621 } \ 2622 return *lhs; \ 2623 } 2624 2625 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <, 2626 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt 2627 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >, 2628 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt 2629 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <, 2630 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt 2631 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >, 2632 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt 2633 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <, 2634 0) // __kmpc_atomic_fixed4_max_cpt 2635 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >, 2636 0) // __kmpc_atomic_fixed4_min_cpt 2637 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <, 2638 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt 2639 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >, 2640 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt 2641 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <, 2642 KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt 2643 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >, 2644 KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt 2645 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <, 2646 KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt 2647 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >, 2648 KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt 2649 #if KMP_HAVE_QUAD 2650 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r, 2651 1) // __kmpc_atomic_float16_max_cpt 2652 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r, 2653 1) // __kmpc_atomic_float16_min_cpt 2654 #if (KMP_ARCH_X86) 2655 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r, 2656 1) // __kmpc_atomic_float16_max_a16_cpt 2657 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r, 2658 1) // __kmpc_atomic_float16_mix_a16_cpt 2659 #endif // (KMP_ARCH_X86) 2660 #endif // KMP_HAVE_QUAD 2661 2662 // ------------------------------------------------------------------------ 2663 #ifdef KMP_GOMP_COMPAT 2664 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \ 2665 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2666 KMP_CHECK_GTID; \ 2667 OP_CRITICAL_CPT(OP, 0); \ 2668 } 2669 #else 2670 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) 2671 #endif /* KMP_GOMP_COMPAT */ 2672 // ------------------------------------------------------------------------ 2673 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2674 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2675 TYPE new_value; \ 2676 OP_GOMP_CRITICAL_EQV_CPT(^= ~, GOMP_FLAG) /* send assignment */ \ 2677 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2678 } 2679 2680 // ------------------------------------------------------------------------ 2681 2682 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^, 2683 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt 2684 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^, 2685 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt 2686 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^, 2687 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt 2688 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^, 2689 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt 2690 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~, 2691 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt 2692 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~, 2693 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt 2694 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~, 2695 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt 2696 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~, 2697 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt 2698 2699 // ------------------------------------------------------------------------ 2700 // Routines for Extended types: long double, _Quad, complex flavours (use 2701 // critical section) 2702 // TYPE_ID, OP_ID, TYPE - detailed above 2703 // OP - operator 2704 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2705 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2706 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2707 TYPE new_value; \ 2708 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \ 2709 OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \ 2710 } 2711 2712 // ------------------------------------------------------------------------ 2713 // Workaround for cmplx4. Regular routines with return value don't work 2714 // on Win_32e. Let's return captured values through the additional parameter. 2715 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \ 2716 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2717 \ 2718 if (flag) { \ 2719 (*lhs) OP rhs; \ 2720 (*out) = (*lhs); \ 2721 } else { \ 2722 (*out) = (*lhs); \ 2723 (*lhs) OP rhs; \ 2724 } \ 2725 \ 2726 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2727 return; 2728 // ------------------------------------------------------------------------ 2729 2730 #ifdef KMP_GOMP_COMPAT 2731 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \ 2732 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2733 KMP_CHECK_GTID; \ 2734 OP_CRITICAL_CPT_WRK(OP## =, 0); \ 2735 } 2736 #else 2737 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) 2738 #endif /* KMP_GOMP_COMPAT */ 2739 // ------------------------------------------------------------------------ 2740 2741 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2742 void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \ 2743 TYPE rhs, TYPE *out, int flag) { \ 2744 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2745 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2746 // ------------------------------------------------------------------------ 2747 2748 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2749 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2750 OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \ 2751 OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \ 2752 } 2753 // The end of workaround for cmplx4 2754 2755 /* ------------------------------------------------------------------------- */ 2756 // routines for long double type 2757 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r, 2758 1) // __kmpc_atomic_float10_add_cpt 2759 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r, 2760 1) // __kmpc_atomic_float10_sub_cpt 2761 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r, 2762 1) // __kmpc_atomic_float10_mul_cpt 2763 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r, 2764 1) // __kmpc_atomic_float10_div_cpt 2765 #if KMP_HAVE_QUAD 2766 // routines for _Quad type 2767 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r, 2768 1) // __kmpc_atomic_float16_add_cpt 2769 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r, 2770 1) // __kmpc_atomic_float16_sub_cpt 2771 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r, 2772 1) // __kmpc_atomic_float16_mul_cpt 2773 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r, 2774 1) // __kmpc_atomic_float16_div_cpt 2775 #if (KMP_ARCH_X86) 2776 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r, 2777 1) // __kmpc_atomic_float16_add_a16_cpt 2778 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r, 2779 1) // __kmpc_atomic_float16_sub_a16_cpt 2780 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r, 2781 1) // __kmpc_atomic_float16_mul_a16_cpt 2782 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r, 2783 1) // __kmpc_atomic_float16_div_a16_cpt 2784 #endif // (KMP_ARCH_X86) 2785 #endif // KMP_HAVE_QUAD 2786 2787 // routines for complex types 2788 2789 // cmplx4 routines to return void 2790 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c, 2791 1) // __kmpc_atomic_cmplx4_add_cpt 2792 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 2793 1) // __kmpc_atomic_cmplx4_sub_cpt 2794 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 2795 1) // __kmpc_atomic_cmplx4_mul_cpt 2796 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c, 2797 1) // __kmpc_atomic_cmplx4_div_cpt 2798 2799 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c, 2800 1) // __kmpc_atomic_cmplx8_add_cpt 2801 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 2802 1) // __kmpc_atomic_cmplx8_sub_cpt 2803 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 2804 1) // __kmpc_atomic_cmplx8_mul_cpt 2805 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c, 2806 1) // __kmpc_atomic_cmplx8_div_cpt 2807 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c, 2808 1) // __kmpc_atomic_cmplx10_add_cpt 2809 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 2810 1) // __kmpc_atomic_cmplx10_sub_cpt 2811 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 2812 1) // __kmpc_atomic_cmplx10_mul_cpt 2813 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c, 2814 1) // __kmpc_atomic_cmplx10_div_cpt 2815 #if KMP_HAVE_QUAD 2816 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c, 2817 1) // __kmpc_atomic_cmplx16_add_cpt 2818 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 2819 1) // __kmpc_atomic_cmplx16_sub_cpt 2820 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 2821 1) // __kmpc_atomic_cmplx16_mul_cpt 2822 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c, 2823 1) // __kmpc_atomic_cmplx16_div_cpt 2824 #if (KMP_ARCH_X86) 2825 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 2826 1) // __kmpc_atomic_cmplx16_add_a16_cpt 2827 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 2828 1) // __kmpc_atomic_cmplx16_sub_a16_cpt 2829 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 2830 1) // __kmpc_atomic_cmplx16_mul_a16_cpt 2831 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 2832 1) // __kmpc_atomic_cmplx16_div_a16_cpt 2833 #endif // (KMP_ARCH_X86) 2834 #endif // KMP_HAVE_QUAD 2835 2836 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr 2837 // binop x; v = x; } for non-commutative operations. 2838 // Supported only on IA-32 architecture and Intel(R) 64 2839 2840 // ------------------------------------------------------------------------- 2841 // Operation on *lhs, rhs bound by critical section 2842 // OP - operator (it's supposed to contain an assignment) 2843 // LCK_ID - lock identifier 2844 // Note: don't check gtid as it should always be valid 2845 // 1, 2-byte - expect valid parameter, other - check before this macro 2846 #define OP_CRITICAL_CPT_REV(OP, LCK_ID) \ 2847 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2848 \ 2849 if (flag) { \ 2850 /*temp_val = (*lhs);*/ \ 2851 (*lhs) = (rhs)OP(*lhs); \ 2852 new_value = (*lhs); \ 2853 } else { \ 2854 new_value = (*lhs); \ 2855 (*lhs) = (rhs)OP(*lhs); \ 2856 } \ 2857 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2858 return new_value; 2859 2860 // ------------------------------------------------------------------------ 2861 #ifdef KMP_GOMP_COMPAT 2862 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) \ 2863 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2864 KMP_CHECK_GTID; \ 2865 OP_CRITICAL_CPT_REV(OP, 0); \ 2866 } 2867 #else 2868 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) 2869 #endif /* KMP_GOMP_COMPAT */ 2870 2871 // ------------------------------------------------------------------------ 2872 // Operation on *lhs, rhs using "compare_and_store" routine 2873 // TYPE - operands' type 2874 // BITS - size in bits, used to distinguish low level calls 2875 // OP - operator 2876 // Note: temp_val introduced in order to force the compiler to read 2877 // *lhs only once (w/o it the compiler reads *lhs twice) 2878 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2879 { \ 2880 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2881 TYPE old_value, new_value; \ 2882 temp_val = *lhs; \ 2883 old_value = temp_val; \ 2884 new_value = rhs OP old_value; \ 2885 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2886 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2887 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2888 KMP_CPU_PAUSE(); \ 2889 \ 2890 temp_val = *lhs; \ 2891 old_value = temp_val; \ 2892 new_value = rhs OP old_value; \ 2893 } \ 2894 if (flag) { \ 2895 return new_value; \ 2896 } else \ 2897 return old_value; \ 2898 } 2899 2900 // ------------------------------------------------------------------------- 2901 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2902 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2903 TYPE new_value; \ 2904 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ 2905 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2906 } 2907 2908 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /, 2909 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev 2910 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /, 2911 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev 2912 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<, 2913 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev 2914 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>, 2915 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev 2916 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, 2917 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev 2918 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -, 2919 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev 2920 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /, 2921 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev 2922 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /, 2923 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev 2924 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<, 2925 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev 2926 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>, 2927 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev 2928 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, 2929 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev 2930 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -, 2931 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev 2932 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /, 2933 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev 2934 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /, 2935 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev 2936 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<, 2937 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev 2938 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>, 2939 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev 2940 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, 2941 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev 2942 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -, 2943 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev 2944 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /, 2945 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev 2946 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /, 2947 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev 2948 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<, 2949 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev 2950 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>, 2951 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev 2952 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, 2953 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev 2954 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -, 2955 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev 2956 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /, 2957 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev 2958 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -, 2959 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev 2960 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /, 2961 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev 2962 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -, 2963 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev 2964 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2965 2966 // ------------------------------------------------------------------------ 2967 // Routines for Extended types: long double, _Quad, complex flavours (use 2968 // critical section) 2969 // TYPE_ID, OP_ID, TYPE - detailed above 2970 // OP - operator 2971 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2972 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2973 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2974 TYPE new_value; \ 2975 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \ 2976 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ 2977 OP_CRITICAL_CPT_REV(OP, LCK_ID) \ 2978 } 2979 2980 /* ------------------------------------------------------------------------- */ 2981 // routines for long double type 2982 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r, 2983 1) // __kmpc_atomic_float10_sub_cpt_rev 2984 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r, 2985 1) // __kmpc_atomic_float10_div_cpt_rev 2986 #if KMP_HAVE_QUAD 2987 // routines for _Quad type 2988 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 2989 1) // __kmpc_atomic_float16_sub_cpt_rev 2990 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 2991 1) // __kmpc_atomic_float16_div_cpt_rev 2992 #if (KMP_ARCH_X86) 2993 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 2994 1) // __kmpc_atomic_float16_sub_a16_cpt_rev 2995 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 2996 1) // __kmpc_atomic_float16_div_a16_cpt_rev 2997 #endif // (KMP_ARCH_X86) 2998 #endif // KMP_HAVE_QUAD 2999 3000 // routines for complex types 3001 3002 // ------------------------------------------------------------------------ 3003 // Workaround for cmplx4. Regular routines with return value don't work 3004 // on Win_32e. Let's return captured values through the additional parameter. 3005 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3006 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3007 \ 3008 if (flag) { \ 3009 (*lhs) = (rhs)OP(*lhs); \ 3010 (*out) = (*lhs); \ 3011 } else { \ 3012 (*out) = (*lhs); \ 3013 (*lhs) = (rhs)OP(*lhs); \ 3014 } \ 3015 \ 3016 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3017 return; 3018 // ------------------------------------------------------------------------ 3019 3020 #ifdef KMP_GOMP_COMPAT 3021 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \ 3022 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3023 KMP_CHECK_GTID; \ 3024 OP_CRITICAL_CPT_REV_WRK(OP, 0); \ 3025 } 3026 #else 3027 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) 3028 #endif /* KMP_GOMP_COMPAT */ 3029 // ------------------------------------------------------------------------ 3030 3031 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \ 3032 GOMP_FLAG) \ 3033 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 3034 OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \ 3035 OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3036 } 3037 // The end of workaround for cmplx4 3038 3039 // !!! TODO: check if we need to return void for cmplx4 routines 3040 // cmplx4 routines to return void 3041 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 3042 1) // __kmpc_atomic_cmplx4_sub_cpt_rev 3043 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 3044 1) // __kmpc_atomic_cmplx4_div_cpt_rev 3045 3046 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 3047 1) // __kmpc_atomic_cmplx8_sub_cpt_rev 3048 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 3049 1) // __kmpc_atomic_cmplx8_div_cpt_rev 3050 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 3051 1) // __kmpc_atomic_cmplx10_sub_cpt_rev 3052 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 3053 1) // __kmpc_atomic_cmplx10_div_cpt_rev 3054 #if KMP_HAVE_QUAD 3055 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 3056 1) // __kmpc_atomic_cmplx16_sub_cpt_rev 3057 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 3058 1) // __kmpc_atomic_cmplx16_div_cpt_rev 3059 #if (KMP_ARCH_X86) 3060 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 3061 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev 3062 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 3063 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev 3064 #endif // (KMP_ARCH_X86) 3065 #endif // KMP_HAVE_QUAD 3066 3067 // Capture reverse for mixed type: RHS=float16 3068 #if KMP_HAVE_QUAD 3069 3070 // Beginning of a definition (provides name, parameters, gebug trace) 3071 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 3072 // fixed) 3073 // OP_ID - operation identifier (add, sub, mul, ...) 3074 // TYPE - operands' type 3075 // ------------------------------------------------------------------------- 3076 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 3077 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 3078 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3079 TYPE new_value; \ 3080 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ 3081 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 3082 } 3083 3084 // ------------------------------------------------------------------------- 3085 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 3086 LCK_ID, GOMP_FLAG) \ 3087 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3088 TYPE new_value; \ 3089 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) /* send assignment */ \ 3090 OP_CRITICAL_CPT_REV(OP, LCK_ID) /* send assignment */ \ 3091 } 3092 3093 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3094 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp 3095 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3096 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp 3097 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3098 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp 3099 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3100 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp 3101 3102 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, 3103 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp 3104 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i, 3105 1, 3106 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp 3107 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1, 3108 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp 3109 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i, 3110 1, 3111 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp 3112 3113 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i, 3114 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp 3115 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad, 3116 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp 3117 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i, 3118 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp 3119 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad, 3120 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp 3121 3122 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i, 3123 7, 3124 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp 3125 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad, 3126 8i, 7, 3127 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp 3128 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i, 3129 7, 3130 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp 3131 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad, 3132 8i, 7, 3133 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp 3134 3135 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad, 3136 4r, 3, 3137 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp 3138 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad, 3139 4r, 3, 3140 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp 3141 3142 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad, 3143 8r, 7, 3144 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp 3145 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad, 3146 8r, 7, 3147 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp 3148 3149 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad, 3150 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp 3151 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad, 3152 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp 3153 3154 #endif // KMP_HAVE_QUAD 3155 3156 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} 3157 3158 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3159 TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3160 TYPE rhs) { \ 3161 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3162 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3163 3164 #define CRITICAL_SWP(LCK_ID) \ 3165 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3166 \ 3167 old_value = (*lhs); \ 3168 (*lhs) = rhs; \ 3169 \ 3170 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3171 return old_value; 3172 3173 // ------------------------------------------------------------------------ 3174 #ifdef KMP_GOMP_COMPAT 3175 #define GOMP_CRITICAL_SWP(FLAG) \ 3176 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3177 KMP_CHECK_GTID; \ 3178 CRITICAL_SWP(0); \ 3179 } 3180 #else 3181 #define GOMP_CRITICAL_SWP(FLAG) 3182 #endif /* KMP_GOMP_COMPAT */ 3183 3184 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3185 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3186 TYPE old_value; \ 3187 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3188 old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \ 3189 return old_value; \ 3190 } 3191 // ------------------------------------------------------------------------ 3192 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3193 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3194 TYPE old_value; \ 3195 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3196 old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \ 3197 return old_value; \ 3198 } 3199 3200 // ------------------------------------------------------------------------ 3201 #define CMPXCHG_SWP(TYPE, BITS) \ 3202 { \ 3203 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 3204 TYPE old_value, new_value; \ 3205 temp_val = *lhs; \ 3206 old_value = temp_val; \ 3207 new_value = rhs; \ 3208 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 3209 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 3210 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 3211 KMP_CPU_PAUSE(); \ 3212 \ 3213 temp_val = *lhs; \ 3214 old_value = temp_val; \ 3215 new_value = rhs; \ 3216 } \ 3217 return old_value; \ 3218 } 3219 3220 // ------------------------------------------------------------------------- 3221 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3222 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3223 TYPE old_value; \ 3224 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3225 CMPXCHG_SWP(TYPE, BITS) \ 3226 } 3227 3228 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp 3229 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp 3230 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp 3231 3232 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32, 3233 KMP_ARCH_X86) // __kmpc_atomic_float4_swp 3234 3235 #if (KMP_ARCH_X86) 3236 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64, 3237 KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3238 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64, 3239 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3240 #else 3241 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3242 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64, 3243 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3244 #endif // (KMP_ARCH_X86) 3245 3246 // ------------------------------------------------------------------------ 3247 // Routines for Extended types: long double, _Quad, complex flavours (use 3248 // critical section) 3249 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3250 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3251 TYPE old_value; \ 3252 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3253 CRITICAL_SWP(LCK_ID) \ 3254 } 3255 3256 // ------------------------------------------------------------------------ 3257 // !!! TODO: check if we need to return void for cmplx4 routines 3258 // Workaround for cmplx4. Regular routines with return value don't work 3259 // on Win_32e. Let's return captured values through the additional parameter. 3260 3261 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3262 void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3263 TYPE rhs, TYPE *out) { \ 3264 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3265 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3266 3267 #define CRITICAL_SWP_WRK(LCK_ID) \ 3268 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3269 \ 3270 tmp = (*lhs); \ 3271 (*lhs) = (rhs); \ 3272 (*out) = tmp; \ 3273 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3274 return; 3275 // ------------------------------------------------------------------------ 3276 3277 #ifdef KMP_GOMP_COMPAT 3278 #define GOMP_CRITICAL_SWP_WRK(FLAG) \ 3279 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3280 KMP_CHECK_GTID; \ 3281 CRITICAL_SWP_WRK(0); \ 3282 } 3283 #else 3284 #define GOMP_CRITICAL_SWP_WRK(FLAG) 3285 #endif /* KMP_GOMP_COMPAT */ 3286 // ------------------------------------------------------------------------ 3287 3288 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3289 ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3290 TYPE tmp; \ 3291 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \ 3292 CRITICAL_SWP_WRK(LCK_ID) \ 3293 } 3294 // The end of workaround for cmplx4 3295 3296 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp 3297 #if KMP_HAVE_QUAD 3298 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp 3299 #endif // KMP_HAVE_QUAD 3300 // cmplx4 routine to return void 3301 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp 3302 3303 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // 3304 // __kmpc_atomic_cmplx4_swp 3305 3306 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp 3307 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp 3308 #if KMP_HAVE_QUAD 3309 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp 3310 #if (KMP_ARCH_X86) 3311 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r, 3312 1) // __kmpc_atomic_float16_a16_swp 3313 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c, 3314 1) // __kmpc_atomic_cmplx16_a16_swp 3315 #endif // (KMP_ARCH_X86) 3316 #endif // KMP_HAVE_QUAD 3317 3318 // End of OpenMP 4.0 Capture 3319 3320 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3321 3322 #undef OP_CRITICAL 3323 3324 /* ------------------------------------------------------------------------ */ 3325 /* Generic atomic routines */ 3326 3327 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3328 void (*f)(void *, void *, void *)) { 3329 KMP_DEBUG_ASSERT(__kmp_init_serial); 3330 3331 if ( 3332 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3333 FALSE /* must use lock */ 3334 #else 3335 TRUE 3336 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3337 ) { 3338 kmp_int8 old_value, new_value; 3339 3340 old_value = *(kmp_int8 *)lhs; 3341 (*f)(&new_value, &old_value, rhs); 3342 3343 /* TODO: Should this be acquire or release? */ 3344 while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value, 3345 *(kmp_int8 *)&new_value)) { 3346 KMP_CPU_PAUSE(); 3347 3348 old_value = *(kmp_int8 *)lhs; 3349 (*f)(&new_value, &old_value, rhs); 3350 } 3351 3352 return; 3353 } else { 3354 // All 1-byte data is of integer data type. 3355 3356 #ifdef KMP_GOMP_COMPAT 3357 if (__kmp_atomic_mode == 2) { 3358 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3359 } else 3360 #endif /* KMP_GOMP_COMPAT */ 3361 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3362 3363 (*f)(lhs, lhs, rhs); 3364 3365 #ifdef KMP_GOMP_COMPAT 3366 if (__kmp_atomic_mode == 2) { 3367 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3368 } else 3369 #endif /* KMP_GOMP_COMPAT */ 3370 __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3371 } 3372 } 3373 3374 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3375 void (*f)(void *, void *, void *)) { 3376 if ( 3377 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3378 FALSE /* must use lock */ 3379 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3380 TRUE /* no alignment problems */ 3381 #else 3382 !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */ 3383 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3384 ) { 3385 kmp_int16 old_value, new_value; 3386 3387 old_value = *(kmp_int16 *)lhs; 3388 (*f)(&new_value, &old_value, rhs); 3389 3390 /* TODO: Should this be acquire or release? */ 3391 while (!KMP_COMPARE_AND_STORE_ACQ16( 3392 (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) { 3393 KMP_CPU_PAUSE(); 3394 3395 old_value = *(kmp_int16 *)lhs; 3396 (*f)(&new_value, &old_value, rhs); 3397 } 3398 3399 return; 3400 } else { 3401 // All 2-byte data is of integer data type. 3402 3403 #ifdef KMP_GOMP_COMPAT 3404 if (__kmp_atomic_mode == 2) { 3405 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3406 } else 3407 #endif /* KMP_GOMP_COMPAT */ 3408 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3409 3410 (*f)(lhs, lhs, rhs); 3411 3412 #ifdef KMP_GOMP_COMPAT 3413 if (__kmp_atomic_mode == 2) { 3414 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3415 } else 3416 #endif /* KMP_GOMP_COMPAT */ 3417 __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3418 } 3419 } 3420 3421 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3422 void (*f)(void *, void *, void *)) { 3423 KMP_DEBUG_ASSERT(__kmp_init_serial); 3424 3425 if ( 3426 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. 3427 // Gomp compatibility is broken if this routine is called for floats. 3428 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3429 TRUE /* no alignment problems */ 3430 #else 3431 !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */ 3432 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3433 ) { 3434 kmp_int32 old_value, new_value; 3435 3436 old_value = *(kmp_int32 *)lhs; 3437 (*f)(&new_value, &old_value, rhs); 3438 3439 /* TODO: Should this be acquire or release? */ 3440 while (!KMP_COMPARE_AND_STORE_ACQ32( 3441 (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) { 3442 KMP_CPU_PAUSE(); 3443 3444 old_value = *(kmp_int32 *)lhs; 3445 (*f)(&new_value, &old_value, rhs); 3446 } 3447 3448 return; 3449 } else { 3450 // Use __kmp_atomic_lock_4i for all 4-byte data, 3451 // even if it isn't of integer data type. 3452 3453 #ifdef KMP_GOMP_COMPAT 3454 if (__kmp_atomic_mode == 2) { 3455 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3456 } else 3457 #endif /* KMP_GOMP_COMPAT */ 3458 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3459 3460 (*f)(lhs, lhs, rhs); 3461 3462 #ifdef KMP_GOMP_COMPAT 3463 if (__kmp_atomic_mode == 2) { 3464 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3465 } else 3466 #endif /* KMP_GOMP_COMPAT */ 3467 __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3468 } 3469 } 3470 3471 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3472 void (*f)(void *, void *, void *)) { 3473 KMP_DEBUG_ASSERT(__kmp_init_serial); 3474 if ( 3475 3476 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3477 FALSE /* must use lock */ 3478 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3479 TRUE /* no alignment problems */ 3480 #else 3481 !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */ 3482 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3483 ) { 3484 kmp_int64 old_value, new_value; 3485 3486 old_value = *(kmp_int64 *)lhs; 3487 (*f)(&new_value, &old_value, rhs); 3488 /* TODO: Should this be acquire or release? */ 3489 while (!KMP_COMPARE_AND_STORE_ACQ64( 3490 (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) { 3491 KMP_CPU_PAUSE(); 3492 3493 old_value = *(kmp_int64 *)lhs; 3494 (*f)(&new_value, &old_value, rhs); 3495 } 3496 3497 return; 3498 } else { 3499 // Use __kmp_atomic_lock_8i for all 8-byte data, 3500 // even if it isn't of integer data type. 3501 3502 #ifdef KMP_GOMP_COMPAT 3503 if (__kmp_atomic_mode == 2) { 3504 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3505 } else 3506 #endif /* KMP_GOMP_COMPAT */ 3507 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3508 3509 (*f)(lhs, lhs, rhs); 3510 3511 #ifdef KMP_GOMP_COMPAT 3512 if (__kmp_atomic_mode == 2) { 3513 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3514 } else 3515 #endif /* KMP_GOMP_COMPAT */ 3516 __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3517 } 3518 } 3519 3520 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3521 void (*f)(void *, void *, void *)) { 3522 KMP_DEBUG_ASSERT(__kmp_init_serial); 3523 3524 #ifdef KMP_GOMP_COMPAT 3525 if (__kmp_atomic_mode == 2) { 3526 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3527 } else 3528 #endif /* KMP_GOMP_COMPAT */ 3529 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3530 3531 (*f)(lhs, lhs, rhs); 3532 3533 #ifdef KMP_GOMP_COMPAT 3534 if (__kmp_atomic_mode == 2) { 3535 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3536 } else 3537 #endif /* KMP_GOMP_COMPAT */ 3538 __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3539 } 3540 3541 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3542 void (*f)(void *, void *, void *)) { 3543 KMP_DEBUG_ASSERT(__kmp_init_serial); 3544 3545 #ifdef KMP_GOMP_COMPAT 3546 if (__kmp_atomic_mode == 2) { 3547 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3548 } else 3549 #endif /* KMP_GOMP_COMPAT */ 3550 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3551 3552 (*f)(lhs, lhs, rhs); 3553 3554 #ifdef KMP_GOMP_COMPAT 3555 if (__kmp_atomic_mode == 2) { 3556 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3557 } else 3558 #endif /* KMP_GOMP_COMPAT */ 3559 __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3560 } 3561 3562 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3563 void (*f)(void *, void *, void *)) { 3564 KMP_DEBUG_ASSERT(__kmp_init_serial); 3565 3566 #ifdef KMP_GOMP_COMPAT 3567 if (__kmp_atomic_mode == 2) { 3568 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3569 } else 3570 #endif /* KMP_GOMP_COMPAT */ 3571 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3572 3573 (*f)(lhs, lhs, rhs); 3574 3575 #ifdef KMP_GOMP_COMPAT 3576 if (__kmp_atomic_mode == 2) { 3577 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3578 } else 3579 #endif /* KMP_GOMP_COMPAT */ 3580 __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3581 } 3582 3583 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3584 void (*f)(void *, void *, void *)) { 3585 KMP_DEBUG_ASSERT(__kmp_init_serial); 3586 3587 #ifdef KMP_GOMP_COMPAT 3588 if (__kmp_atomic_mode == 2) { 3589 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3590 } else 3591 #endif /* KMP_GOMP_COMPAT */ 3592 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3593 3594 (*f)(lhs, lhs, rhs); 3595 3596 #ifdef KMP_GOMP_COMPAT 3597 if (__kmp_atomic_mode == 2) { 3598 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3599 } else 3600 #endif /* KMP_GOMP_COMPAT */ 3601 __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3602 } 3603 3604 // AC: same two routines as GOMP_atomic_start/end, but will be called by our 3605 // compiler; duplicated in order to not use 3-party names in pure Intel code 3606 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin. 3607 void __kmpc_atomic_start(void) { 3608 int gtid = __kmp_entry_gtid(); 3609 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid)); 3610 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3611 } 3612 3613 void __kmpc_atomic_end(void) { 3614 int gtid = __kmp_get_gtid(); 3615 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid)); 3616 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3617 } 3618 3619 /*! 3620 @} 3621 */ 3622 3623 // end of file 3624