1 /* 2 * kmp_lock.cpp -- lock-related functions 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include <stddef.h> 14 #include <atomic> 15 16 #include "kmp.h" 17 #include "kmp_i18n.h" 18 #include "kmp_io.h" 19 #include "kmp_itt.h" 20 #include "kmp_lock.h" 21 #include "kmp_wait_release.h" 22 #include "kmp_wrapper_getpid.h" 23 24 #if KMP_USE_FUTEX 25 #include <sys/syscall.h> 26 #include <unistd.h> 27 // We should really include <futex.h>, but that causes compatibility problems on 28 // different Linux* OS distributions that either require that you include (or 29 // break when you try to include) <pci/types.h>. Since all we need is the two 30 // macros below (which are part of the kernel ABI, so can't change) we just 31 // define the constants here and don't include <futex.h> 32 #ifndef FUTEX_WAIT 33 #define FUTEX_WAIT 0 34 #endif 35 #ifndef FUTEX_WAKE 36 #define FUTEX_WAKE 1 37 #endif 38 #endif 39 40 /* Implement spin locks for internal library use. */ 41 /* The algorithm implemented is Lamport's bakery lock [1974]. */ 42 43 void __kmp_validate_locks(void) { 44 int i; 45 kmp_uint32 x, y; 46 47 /* Check to make sure unsigned arithmetic does wraps properly */ 48 x = ~((kmp_uint32)0) - 2; 49 y = x - 2; 50 51 for (i = 0; i < 8; ++i, ++x, ++y) { 52 kmp_uint32 z = (x - y); 53 KMP_ASSERT(z == 2); 54 } 55 56 KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0); 57 } 58 59 /* ------------------------------------------------------------------------ */ 60 /* test and set locks */ 61 62 // For the non-nested locks, we can only assume that the first 4 bytes were 63 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel 64 // compiler only allocates a 4 byte pointer on IA-32 architecture. On 65 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. 66 // 67 // gcc reserves >= 8 bytes for nested locks, so we can assume that the 68 // entire 8 bytes were allocated for nested locks on all 64-bit platforms. 69 70 static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) { 71 return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1; 72 } 73 74 static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) { 75 return lck->lk.depth_locked != -1; 76 } 77 78 __forceinline static int 79 __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) { 80 KMP_MB(); 81 82 #ifdef USE_LOCK_PROFILE 83 kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll); 84 if ((curr != 0) && (curr != gtid + 1)) 85 __kmp_printf("LOCK CONTENTION: %p\n", lck); 86 /* else __kmp_printf( "." );*/ 87 #endif /* USE_LOCK_PROFILE */ 88 89 kmp_int32 tas_free = KMP_LOCK_FREE(tas); 90 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); 91 92 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free && 93 __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) { 94 KMP_FSYNC_ACQUIRED(lck); 95 return KMP_LOCK_ACQUIRED_FIRST; 96 } 97 98 kmp_uint32 spins; 99 kmp_uint64 time; 100 KMP_FSYNC_PREPARE(lck); 101 KMP_INIT_YIELD(spins); 102 KMP_INIT_BACKOFF(time); 103 kmp_backoff_t backoff = __kmp_spin_backoff_params; 104 do { 105 #if !KMP_HAVE_UMWAIT 106 __kmp_spin_backoff(&backoff); 107 #else 108 if (!__kmp_tpause_enabled) 109 __kmp_spin_backoff(&backoff); 110 #endif 111 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time); 112 } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free || 113 !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)); 114 KMP_FSYNC_ACQUIRED(lck); 115 return KMP_LOCK_ACQUIRED_FIRST; 116 } 117 118 int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 119 int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid); 120 return retval; 121 } 122 123 static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck, 124 kmp_int32 gtid) { 125 char const *const func = "omp_set_lock"; 126 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 127 __kmp_is_tas_lock_nestable(lck)) { 128 KMP_FATAL(LockNestableUsedAsSimple, func); 129 } 130 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) { 131 KMP_FATAL(LockIsAlreadyOwned, func); 132 } 133 return __kmp_acquire_tas_lock(lck, gtid); 134 } 135 136 int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 137 kmp_int32 tas_free = KMP_LOCK_FREE(tas); 138 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); 139 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free && 140 __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) { 141 KMP_FSYNC_ACQUIRED(lck); 142 return TRUE; 143 } 144 return FALSE; 145 } 146 147 static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck, 148 kmp_int32 gtid) { 149 char const *const func = "omp_test_lock"; 150 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 151 __kmp_is_tas_lock_nestable(lck)) { 152 KMP_FATAL(LockNestableUsedAsSimple, func); 153 } 154 return __kmp_test_tas_lock(lck, gtid); 155 } 156 157 int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 158 KMP_MB(); /* Flush all pending memory write invalidates. */ 159 160 KMP_FSYNC_RELEASING(lck); 161 KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas)); 162 KMP_MB(); /* Flush all pending memory write invalidates. */ 163 164 KMP_YIELD_OVERSUB(); 165 return KMP_LOCK_RELEASED; 166 } 167 168 static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck, 169 kmp_int32 gtid) { 170 char const *const func = "omp_unset_lock"; 171 KMP_MB(); /* in case another processor initialized lock */ 172 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 173 __kmp_is_tas_lock_nestable(lck)) { 174 KMP_FATAL(LockNestableUsedAsSimple, func); 175 } 176 if (__kmp_get_tas_lock_owner(lck) == -1) { 177 KMP_FATAL(LockUnsettingFree, func); 178 } 179 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) && 180 (__kmp_get_tas_lock_owner(lck) != gtid)) { 181 KMP_FATAL(LockUnsettingSetByAnother, func); 182 } 183 return __kmp_release_tas_lock(lck, gtid); 184 } 185 186 void __kmp_init_tas_lock(kmp_tas_lock_t *lck) { 187 lck->lk.poll = KMP_LOCK_FREE(tas); 188 } 189 190 void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; } 191 192 static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) { 193 char const *const func = "omp_destroy_lock"; 194 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 195 __kmp_is_tas_lock_nestable(lck)) { 196 KMP_FATAL(LockNestableUsedAsSimple, func); 197 } 198 if (__kmp_get_tas_lock_owner(lck) != -1) { 199 KMP_FATAL(LockStillOwned, func); 200 } 201 __kmp_destroy_tas_lock(lck); 202 } 203 204 // nested test and set locks 205 206 int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 207 KMP_DEBUG_ASSERT(gtid >= 0); 208 209 if (__kmp_get_tas_lock_owner(lck) == gtid) { 210 lck->lk.depth_locked += 1; 211 return KMP_LOCK_ACQUIRED_NEXT; 212 } else { 213 __kmp_acquire_tas_lock_timed_template(lck, gtid); 214 lck->lk.depth_locked = 1; 215 return KMP_LOCK_ACQUIRED_FIRST; 216 } 217 } 218 219 static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 220 kmp_int32 gtid) { 221 char const *const func = "omp_set_nest_lock"; 222 if (!__kmp_is_tas_lock_nestable(lck)) { 223 KMP_FATAL(LockSimpleUsedAsNestable, func); 224 } 225 return __kmp_acquire_nested_tas_lock(lck, gtid); 226 } 227 228 int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 229 int retval; 230 231 KMP_DEBUG_ASSERT(gtid >= 0); 232 233 if (__kmp_get_tas_lock_owner(lck) == gtid) { 234 retval = ++lck->lk.depth_locked; 235 } else if (!__kmp_test_tas_lock(lck, gtid)) { 236 retval = 0; 237 } else { 238 KMP_MB(); 239 retval = lck->lk.depth_locked = 1; 240 } 241 return retval; 242 } 243 244 static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 245 kmp_int32 gtid) { 246 char const *const func = "omp_test_nest_lock"; 247 if (!__kmp_is_tas_lock_nestable(lck)) { 248 KMP_FATAL(LockSimpleUsedAsNestable, func); 249 } 250 return __kmp_test_nested_tas_lock(lck, gtid); 251 } 252 253 int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 254 KMP_DEBUG_ASSERT(gtid >= 0); 255 256 KMP_MB(); 257 if (--(lck->lk.depth_locked) == 0) { 258 __kmp_release_tas_lock(lck, gtid); 259 return KMP_LOCK_RELEASED; 260 } 261 return KMP_LOCK_STILL_HELD; 262 } 263 264 static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 265 kmp_int32 gtid) { 266 char const *const func = "omp_unset_nest_lock"; 267 KMP_MB(); /* in case another processor initialized lock */ 268 if (!__kmp_is_tas_lock_nestable(lck)) { 269 KMP_FATAL(LockSimpleUsedAsNestable, func); 270 } 271 if (__kmp_get_tas_lock_owner(lck) == -1) { 272 KMP_FATAL(LockUnsettingFree, func); 273 } 274 if (__kmp_get_tas_lock_owner(lck) != gtid) { 275 KMP_FATAL(LockUnsettingSetByAnother, func); 276 } 277 return __kmp_release_nested_tas_lock(lck, gtid); 278 } 279 280 void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) { 281 __kmp_init_tas_lock(lck); 282 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 283 } 284 285 void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) { 286 __kmp_destroy_tas_lock(lck); 287 lck->lk.depth_locked = 0; 288 } 289 290 static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { 291 char const *const func = "omp_destroy_nest_lock"; 292 if (!__kmp_is_tas_lock_nestable(lck)) { 293 KMP_FATAL(LockSimpleUsedAsNestable, func); 294 } 295 if (__kmp_get_tas_lock_owner(lck) != -1) { 296 KMP_FATAL(LockStillOwned, func); 297 } 298 __kmp_destroy_nested_tas_lock(lck); 299 } 300 301 #if KMP_USE_FUTEX 302 303 /* ------------------------------------------------------------------------ */ 304 /* futex locks */ 305 306 // futex locks are really just test and set locks, with a different method 307 // of handling contention. They take the same amount of space as test and 308 // set locks, and are allocated the same way (i.e. use the area allocated by 309 // the compiler for non-nested locks / allocate nested locks on the heap). 310 311 static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) { 312 return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1; 313 } 314 315 static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) { 316 return lck->lk.depth_locked != -1; 317 } 318 319 __forceinline static int 320 __kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) { 321 kmp_int32 gtid_code = (gtid + 1) << 1; 322 323 KMP_MB(); 324 325 #ifdef USE_LOCK_PROFILE 326 kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll)); 327 if ((curr != 0) && (curr != gtid_code)) 328 __kmp_printf("LOCK CONTENTION: %p\n", lck); 329 /* else __kmp_printf( "." );*/ 330 #endif /* USE_LOCK_PROFILE */ 331 332 KMP_FSYNC_PREPARE(lck); 333 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", 334 lck, lck->lk.poll, gtid)); 335 336 kmp_int32 poll_val; 337 338 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( 339 &(lck->lk.poll), KMP_LOCK_FREE(futex), 340 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { 341 342 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; 343 KA_TRACE( 344 1000, 345 ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", 346 lck, gtid, poll_val, cond)); 347 348 // NOTE: if you try to use the following condition for this branch 349 // 350 // if ( poll_val & 1 == 0 ) 351 // 352 // Then the 12.0 compiler has a bug where the following block will 353 // always be skipped, regardless of the value of the LSB of poll_val. 354 if (!cond) { 355 // Try to set the lsb in the poll to indicate to the owner 356 // thread that they need to wake this thread up. 357 if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val, 358 poll_val | KMP_LOCK_BUSY(1, futex))) { 359 KA_TRACE( 360 1000, 361 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", 362 lck, lck->lk.poll, gtid)); 363 continue; 364 } 365 poll_val |= KMP_LOCK_BUSY(1, futex); 366 367 KA_TRACE(1000, 368 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck, 369 lck->lk.poll, gtid)); 370 } 371 372 KA_TRACE( 373 1000, 374 ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", 375 lck, gtid, poll_val)); 376 377 long rc; 378 if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL, 379 NULL, 0)) != 0) { 380 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) " 381 "failed (rc=%ld errno=%d)\n", 382 lck, gtid, poll_val, rc, errno)); 383 continue; 384 } 385 386 KA_TRACE(1000, 387 ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", 388 lck, gtid, poll_val)); 389 // This thread has now done a successful futex wait call and was entered on 390 // the OS futex queue. We must now perform a futex wake call when releasing 391 // the lock, as we have no idea how many other threads are in the queue. 392 gtid_code |= 1; 393 } 394 395 KMP_FSYNC_ACQUIRED(lck); 396 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, 397 lck->lk.poll, gtid)); 398 return KMP_LOCK_ACQUIRED_FIRST; 399 } 400 401 int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 402 int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid); 403 return retval; 404 } 405 406 static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck, 407 kmp_int32 gtid) { 408 char const *const func = "omp_set_lock"; 409 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 410 __kmp_is_futex_lock_nestable(lck)) { 411 KMP_FATAL(LockNestableUsedAsSimple, func); 412 } 413 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) { 414 KMP_FATAL(LockIsAlreadyOwned, func); 415 } 416 return __kmp_acquire_futex_lock(lck, gtid); 417 } 418 419 int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 420 if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex), 421 KMP_LOCK_BUSY((gtid + 1) << 1, futex))) { 422 KMP_FSYNC_ACQUIRED(lck); 423 return TRUE; 424 } 425 return FALSE; 426 } 427 428 static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck, 429 kmp_int32 gtid) { 430 char const *const func = "omp_test_lock"; 431 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 432 __kmp_is_futex_lock_nestable(lck)) { 433 KMP_FATAL(LockNestableUsedAsSimple, func); 434 } 435 return __kmp_test_futex_lock(lck, gtid); 436 } 437 438 int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 439 KMP_MB(); /* Flush all pending memory write invalidates. */ 440 441 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", 442 lck, lck->lk.poll, gtid)); 443 444 KMP_FSYNC_RELEASING(lck); 445 446 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex)); 447 448 KA_TRACE(1000, 449 ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", 450 lck, gtid, poll_val)); 451 452 if (KMP_LOCK_STRIP(poll_val) & 1) { 453 KA_TRACE(1000, 454 ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", 455 lck, gtid)); 456 syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), 457 NULL, NULL, 0); 458 } 459 460 KMP_MB(); /* Flush all pending memory write invalidates. */ 461 462 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, 463 lck->lk.poll, gtid)); 464 465 KMP_YIELD_OVERSUB(); 466 return KMP_LOCK_RELEASED; 467 } 468 469 static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck, 470 kmp_int32 gtid) { 471 char const *const func = "omp_unset_lock"; 472 KMP_MB(); /* in case another processor initialized lock */ 473 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 474 __kmp_is_futex_lock_nestable(lck)) { 475 KMP_FATAL(LockNestableUsedAsSimple, func); 476 } 477 if (__kmp_get_futex_lock_owner(lck) == -1) { 478 KMP_FATAL(LockUnsettingFree, func); 479 } 480 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) && 481 (__kmp_get_futex_lock_owner(lck) != gtid)) { 482 KMP_FATAL(LockUnsettingSetByAnother, func); 483 } 484 return __kmp_release_futex_lock(lck, gtid); 485 } 486 487 void __kmp_init_futex_lock(kmp_futex_lock_t *lck) { 488 TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex)); 489 } 490 491 void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; } 492 493 static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) { 494 char const *const func = "omp_destroy_lock"; 495 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 496 __kmp_is_futex_lock_nestable(lck)) { 497 KMP_FATAL(LockNestableUsedAsSimple, func); 498 } 499 if (__kmp_get_futex_lock_owner(lck) != -1) { 500 KMP_FATAL(LockStillOwned, func); 501 } 502 __kmp_destroy_futex_lock(lck); 503 } 504 505 // nested futex locks 506 507 int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 508 KMP_DEBUG_ASSERT(gtid >= 0); 509 510 if (__kmp_get_futex_lock_owner(lck) == gtid) { 511 lck->lk.depth_locked += 1; 512 return KMP_LOCK_ACQUIRED_NEXT; 513 } else { 514 __kmp_acquire_futex_lock_timed_template(lck, gtid); 515 lck->lk.depth_locked = 1; 516 return KMP_LOCK_ACQUIRED_FIRST; 517 } 518 } 519 520 static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 521 kmp_int32 gtid) { 522 char const *const func = "omp_set_nest_lock"; 523 if (!__kmp_is_futex_lock_nestable(lck)) { 524 KMP_FATAL(LockSimpleUsedAsNestable, func); 525 } 526 return __kmp_acquire_nested_futex_lock(lck, gtid); 527 } 528 529 int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 530 int retval; 531 532 KMP_DEBUG_ASSERT(gtid >= 0); 533 534 if (__kmp_get_futex_lock_owner(lck) == gtid) { 535 retval = ++lck->lk.depth_locked; 536 } else if (!__kmp_test_futex_lock(lck, gtid)) { 537 retval = 0; 538 } else { 539 KMP_MB(); 540 retval = lck->lk.depth_locked = 1; 541 } 542 return retval; 543 } 544 545 static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 546 kmp_int32 gtid) { 547 char const *const func = "omp_test_nest_lock"; 548 if (!__kmp_is_futex_lock_nestable(lck)) { 549 KMP_FATAL(LockSimpleUsedAsNestable, func); 550 } 551 return __kmp_test_nested_futex_lock(lck, gtid); 552 } 553 554 int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 555 KMP_DEBUG_ASSERT(gtid >= 0); 556 557 KMP_MB(); 558 if (--(lck->lk.depth_locked) == 0) { 559 __kmp_release_futex_lock(lck, gtid); 560 return KMP_LOCK_RELEASED; 561 } 562 return KMP_LOCK_STILL_HELD; 563 } 564 565 static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 566 kmp_int32 gtid) { 567 char const *const func = "omp_unset_nest_lock"; 568 KMP_MB(); /* in case another processor initialized lock */ 569 if (!__kmp_is_futex_lock_nestable(lck)) { 570 KMP_FATAL(LockSimpleUsedAsNestable, func); 571 } 572 if (__kmp_get_futex_lock_owner(lck) == -1) { 573 KMP_FATAL(LockUnsettingFree, func); 574 } 575 if (__kmp_get_futex_lock_owner(lck) != gtid) { 576 KMP_FATAL(LockUnsettingSetByAnother, func); 577 } 578 return __kmp_release_nested_futex_lock(lck, gtid); 579 } 580 581 void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) { 582 __kmp_init_futex_lock(lck); 583 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 584 } 585 586 void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) { 587 __kmp_destroy_futex_lock(lck); 588 lck->lk.depth_locked = 0; 589 } 590 591 static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { 592 char const *const func = "omp_destroy_nest_lock"; 593 if (!__kmp_is_futex_lock_nestable(lck)) { 594 KMP_FATAL(LockSimpleUsedAsNestable, func); 595 } 596 if (__kmp_get_futex_lock_owner(lck) != -1) { 597 KMP_FATAL(LockStillOwned, func); 598 } 599 __kmp_destroy_nested_futex_lock(lck); 600 } 601 602 #endif // KMP_USE_FUTEX 603 604 /* ------------------------------------------------------------------------ */ 605 /* ticket (bakery) locks */ 606 607 static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) { 608 return std::atomic_load_explicit(&lck->lk.owner_id, 609 std::memory_order_relaxed) - 610 1; 611 } 612 613 static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) { 614 return std::atomic_load_explicit(&lck->lk.depth_locked, 615 std::memory_order_relaxed) != -1; 616 } 617 618 static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) { 619 return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving, 620 std::memory_order_acquire) == my_ticket; 621 } 622 623 __forceinline static int 624 __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck, 625 kmp_int32 gtid) { 626 kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( 627 &lck->lk.next_ticket, 1U, std::memory_order_relaxed); 628 629 #ifdef USE_LOCK_PROFILE 630 if (std::atomic_load_explicit(&lck->lk.now_serving, 631 std::memory_order_relaxed) != my_ticket) 632 __kmp_printf("LOCK CONTENTION: %p\n", lck); 633 /* else __kmp_printf( "." );*/ 634 #endif /* USE_LOCK_PROFILE */ 635 636 if (std::atomic_load_explicit(&lck->lk.now_serving, 637 std::memory_order_acquire) == my_ticket) { 638 return KMP_LOCK_ACQUIRED_FIRST; 639 } 640 KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck); 641 return KMP_LOCK_ACQUIRED_FIRST; 642 } 643 644 int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 645 int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid); 646 return retval; 647 } 648 649 static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 650 kmp_int32 gtid) { 651 char const *const func = "omp_set_lock"; 652 653 if (!std::atomic_load_explicit(&lck->lk.initialized, 654 std::memory_order_relaxed)) { 655 KMP_FATAL(LockIsUninitialized, func); 656 } 657 if (lck->lk.self != lck) { 658 KMP_FATAL(LockIsUninitialized, func); 659 } 660 if (__kmp_is_ticket_lock_nestable(lck)) { 661 KMP_FATAL(LockNestableUsedAsSimple, func); 662 } 663 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) { 664 KMP_FATAL(LockIsAlreadyOwned, func); 665 } 666 667 __kmp_acquire_ticket_lock(lck, gtid); 668 669 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 670 std::memory_order_relaxed); 671 return KMP_LOCK_ACQUIRED_FIRST; 672 } 673 674 int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 675 kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket, 676 std::memory_order_relaxed); 677 678 if (std::atomic_load_explicit(&lck->lk.now_serving, 679 std::memory_order_relaxed) == my_ticket) { 680 kmp_uint32 next_ticket = my_ticket + 1; 681 if (std::atomic_compare_exchange_strong_explicit( 682 &lck->lk.next_ticket, &my_ticket, next_ticket, 683 std::memory_order_acquire, std::memory_order_acquire)) { 684 return TRUE; 685 } 686 } 687 return FALSE; 688 } 689 690 static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 691 kmp_int32 gtid) { 692 char const *const func = "omp_test_lock"; 693 694 if (!std::atomic_load_explicit(&lck->lk.initialized, 695 std::memory_order_relaxed)) { 696 KMP_FATAL(LockIsUninitialized, func); 697 } 698 if (lck->lk.self != lck) { 699 KMP_FATAL(LockIsUninitialized, func); 700 } 701 if (__kmp_is_ticket_lock_nestable(lck)) { 702 KMP_FATAL(LockNestableUsedAsSimple, func); 703 } 704 705 int retval = __kmp_test_ticket_lock(lck, gtid); 706 707 if (retval) { 708 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 709 std::memory_order_relaxed); 710 } 711 return retval; 712 } 713 714 int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 715 kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket, 716 std::memory_order_relaxed) - 717 std::atomic_load_explicit(&lck->lk.now_serving, 718 std::memory_order_relaxed); 719 720 std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U, 721 std::memory_order_release); 722 723 KMP_YIELD(distance > 724 (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 725 return KMP_LOCK_RELEASED; 726 } 727 728 static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 729 kmp_int32 gtid) { 730 char const *const func = "omp_unset_lock"; 731 732 if (!std::atomic_load_explicit(&lck->lk.initialized, 733 std::memory_order_relaxed)) { 734 KMP_FATAL(LockIsUninitialized, func); 735 } 736 if (lck->lk.self != lck) { 737 KMP_FATAL(LockIsUninitialized, func); 738 } 739 if (__kmp_is_ticket_lock_nestable(lck)) { 740 KMP_FATAL(LockNestableUsedAsSimple, func); 741 } 742 if (__kmp_get_ticket_lock_owner(lck) == -1) { 743 KMP_FATAL(LockUnsettingFree, func); 744 } 745 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) && 746 (__kmp_get_ticket_lock_owner(lck) != gtid)) { 747 KMP_FATAL(LockUnsettingSetByAnother, func); 748 } 749 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 750 return __kmp_release_ticket_lock(lck, gtid); 751 } 752 753 void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) { 754 lck->lk.location = NULL; 755 lck->lk.self = lck; 756 std::atomic_store_explicit(&lck->lk.next_ticket, 0U, 757 std::memory_order_relaxed); 758 std::atomic_store_explicit(&lck->lk.now_serving, 0U, 759 std::memory_order_relaxed); 760 std::atomic_store_explicit( 761 &lck->lk.owner_id, 0, 762 std::memory_order_relaxed); // no thread owns the lock. 763 std::atomic_store_explicit( 764 &lck->lk.depth_locked, -1, 765 std::memory_order_relaxed); // -1 => not a nested lock. 766 std::atomic_store_explicit(&lck->lk.initialized, true, 767 std::memory_order_release); 768 } 769 770 void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) { 771 std::atomic_store_explicit(&lck->lk.initialized, false, 772 std::memory_order_release); 773 lck->lk.self = NULL; 774 lck->lk.location = NULL; 775 std::atomic_store_explicit(&lck->lk.next_ticket, 0U, 776 std::memory_order_relaxed); 777 std::atomic_store_explicit(&lck->lk.now_serving, 0U, 778 std::memory_order_relaxed); 779 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 780 std::atomic_store_explicit(&lck->lk.depth_locked, -1, 781 std::memory_order_relaxed); 782 } 783 784 static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 785 char const *const func = "omp_destroy_lock"; 786 787 if (!std::atomic_load_explicit(&lck->lk.initialized, 788 std::memory_order_relaxed)) { 789 KMP_FATAL(LockIsUninitialized, func); 790 } 791 if (lck->lk.self != lck) { 792 KMP_FATAL(LockIsUninitialized, func); 793 } 794 if (__kmp_is_ticket_lock_nestable(lck)) { 795 KMP_FATAL(LockNestableUsedAsSimple, func); 796 } 797 if (__kmp_get_ticket_lock_owner(lck) != -1) { 798 KMP_FATAL(LockStillOwned, func); 799 } 800 __kmp_destroy_ticket_lock(lck); 801 } 802 803 // nested ticket locks 804 805 int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 806 KMP_DEBUG_ASSERT(gtid >= 0); 807 808 if (__kmp_get_ticket_lock_owner(lck) == gtid) { 809 std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, 810 std::memory_order_relaxed); 811 return KMP_LOCK_ACQUIRED_NEXT; 812 } else { 813 __kmp_acquire_ticket_lock_timed_template(lck, gtid); 814 std::atomic_store_explicit(&lck->lk.depth_locked, 1, 815 std::memory_order_relaxed); 816 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 817 std::memory_order_relaxed); 818 return KMP_LOCK_ACQUIRED_FIRST; 819 } 820 } 821 822 static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 823 kmp_int32 gtid) { 824 char const *const func = "omp_set_nest_lock"; 825 826 if (!std::atomic_load_explicit(&lck->lk.initialized, 827 std::memory_order_relaxed)) { 828 KMP_FATAL(LockIsUninitialized, func); 829 } 830 if (lck->lk.self != lck) { 831 KMP_FATAL(LockIsUninitialized, func); 832 } 833 if (!__kmp_is_ticket_lock_nestable(lck)) { 834 KMP_FATAL(LockSimpleUsedAsNestable, func); 835 } 836 return __kmp_acquire_nested_ticket_lock(lck, gtid); 837 } 838 839 int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 840 int retval; 841 842 KMP_DEBUG_ASSERT(gtid >= 0); 843 844 if (__kmp_get_ticket_lock_owner(lck) == gtid) { 845 retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, 846 std::memory_order_relaxed) + 847 1; 848 } else if (!__kmp_test_ticket_lock(lck, gtid)) { 849 retval = 0; 850 } else { 851 std::atomic_store_explicit(&lck->lk.depth_locked, 1, 852 std::memory_order_relaxed); 853 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 854 std::memory_order_relaxed); 855 retval = 1; 856 } 857 return retval; 858 } 859 860 static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 861 kmp_int32 gtid) { 862 char const *const func = "omp_test_nest_lock"; 863 864 if (!std::atomic_load_explicit(&lck->lk.initialized, 865 std::memory_order_relaxed)) { 866 KMP_FATAL(LockIsUninitialized, func); 867 } 868 if (lck->lk.self != lck) { 869 KMP_FATAL(LockIsUninitialized, func); 870 } 871 if (!__kmp_is_ticket_lock_nestable(lck)) { 872 KMP_FATAL(LockSimpleUsedAsNestable, func); 873 } 874 return __kmp_test_nested_ticket_lock(lck, gtid); 875 } 876 877 int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 878 KMP_DEBUG_ASSERT(gtid >= 0); 879 880 if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1, 881 std::memory_order_relaxed) - 882 1) == 0) { 883 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 884 __kmp_release_ticket_lock(lck, gtid); 885 return KMP_LOCK_RELEASED; 886 } 887 return KMP_LOCK_STILL_HELD; 888 } 889 890 static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 891 kmp_int32 gtid) { 892 char const *const func = "omp_unset_nest_lock"; 893 894 if (!std::atomic_load_explicit(&lck->lk.initialized, 895 std::memory_order_relaxed)) { 896 KMP_FATAL(LockIsUninitialized, func); 897 } 898 if (lck->lk.self != lck) { 899 KMP_FATAL(LockIsUninitialized, func); 900 } 901 if (!__kmp_is_ticket_lock_nestable(lck)) { 902 KMP_FATAL(LockSimpleUsedAsNestable, func); 903 } 904 if (__kmp_get_ticket_lock_owner(lck) == -1) { 905 KMP_FATAL(LockUnsettingFree, func); 906 } 907 if (__kmp_get_ticket_lock_owner(lck) != gtid) { 908 KMP_FATAL(LockUnsettingSetByAnother, func); 909 } 910 return __kmp_release_nested_ticket_lock(lck, gtid); 911 } 912 913 void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) { 914 __kmp_init_ticket_lock(lck); 915 std::atomic_store_explicit(&lck->lk.depth_locked, 0, 916 std::memory_order_relaxed); 917 // >= 0 for nestable locks, -1 for simple locks 918 } 919 920 void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) { 921 __kmp_destroy_ticket_lock(lck); 922 std::atomic_store_explicit(&lck->lk.depth_locked, 0, 923 std::memory_order_relaxed); 924 } 925 926 static void 927 __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 928 char const *const func = "omp_destroy_nest_lock"; 929 930 if (!std::atomic_load_explicit(&lck->lk.initialized, 931 std::memory_order_relaxed)) { 932 KMP_FATAL(LockIsUninitialized, func); 933 } 934 if (lck->lk.self != lck) { 935 KMP_FATAL(LockIsUninitialized, func); 936 } 937 if (!__kmp_is_ticket_lock_nestable(lck)) { 938 KMP_FATAL(LockSimpleUsedAsNestable, func); 939 } 940 if (__kmp_get_ticket_lock_owner(lck) != -1) { 941 KMP_FATAL(LockStillOwned, func); 942 } 943 __kmp_destroy_nested_ticket_lock(lck); 944 } 945 946 // access functions to fields which don't exist for all lock kinds. 947 948 static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) { 949 return lck->lk.location; 950 } 951 952 static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck, 953 const ident_t *loc) { 954 lck->lk.location = loc; 955 } 956 957 static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) { 958 return lck->lk.flags; 959 } 960 961 static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck, 962 kmp_lock_flags_t flags) { 963 lck->lk.flags = flags; 964 } 965 966 /* ------------------------------------------------------------------------ */ 967 /* queuing locks */ 968 969 /* First the states 970 (head,tail) = 0, 0 means lock is unheld, nobody on queue 971 UINT_MAX or -1, 0 means lock is held, nobody on queue 972 h, h means lock held or about to transition, 973 1 element on queue 974 h, t h <> t, means lock is held or about to 975 transition, >1 elements on queue 976 977 Now the transitions 978 Acquire(0,0) = -1 ,0 979 Release(0,0) = Error 980 Acquire(-1,0) = h ,h h > 0 981 Release(-1,0) = 0 ,0 982 Acquire(h,h) = h ,t h > 0, t > 0, h <> t 983 Release(h,h) = -1 ,0 h > 0 984 Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' 985 Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t 986 987 And pictorially 988 989 +-----+ 990 | 0, 0|------- release -------> Error 991 +-----+ 992 | ^ 993 acquire| |release 994 | | 995 | | 996 v | 997 +-----+ 998 |-1, 0| 999 +-----+ 1000 | ^ 1001 acquire| |release 1002 | | 1003 | | 1004 v | 1005 +-----+ 1006 | h, h| 1007 +-----+ 1008 | ^ 1009 acquire| |release 1010 | | 1011 | | 1012 v | 1013 +-----+ 1014 | h, t|----- acquire, release loopback ---+ 1015 +-----+ | 1016 ^ | 1017 | | 1018 +------------------------------------+ 1019 */ 1020 1021 #ifdef DEBUG_QUEUING_LOCKS 1022 1023 /* Stuff for circular trace buffer */ 1024 #define TRACE_BUF_ELE 1024 1025 static char traces[TRACE_BUF_ELE][128] = {0}; 1026 static int tc = 0; 1027 #define TRACE_LOCK(X, Y) \ 1028 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y); 1029 #define TRACE_LOCK_T(X, Y, Z) \ 1030 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z); 1031 #define TRACE_LOCK_HT(X, Y, Z, Q) \ 1032 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \ 1033 Z, Q); 1034 1035 static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid, 1036 kmp_queuing_lock_t *lck, kmp_int32 head_id, 1037 kmp_int32 tail_id) { 1038 kmp_int32 t, i; 1039 1040 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n"); 1041 1042 i = tc % TRACE_BUF_ELE; 1043 __kmp_printf_no_lock("%s\n", traces[i]); 1044 i = (i + 1) % TRACE_BUF_ELE; 1045 while (i != (tc % TRACE_BUF_ELE)) { 1046 __kmp_printf_no_lock("%s", traces[i]); 1047 i = (i + 1) % TRACE_BUF_ELE; 1048 } 1049 __kmp_printf_no_lock("\n"); 1050 1051 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, " 1052 "next_wait:%d, head_id:%d, tail_id:%d\n", 1053 gtid + 1, this_thr->th.th_spin_here, 1054 this_thr->th.th_next_waiting, head_id, tail_id); 1055 1056 __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id); 1057 1058 if (lck->lk.head_id >= 1) { 1059 t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting; 1060 while (t > 0) { 1061 __kmp_printf_no_lock("-> %d ", t); 1062 t = __kmp_threads[t - 1]->th.th_next_waiting; 1063 } 1064 } 1065 __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id); 1066 __kmp_printf_no_lock("\n\n"); 1067 } 1068 1069 #endif /* DEBUG_QUEUING_LOCKS */ 1070 1071 static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) { 1072 return TCR_4(lck->lk.owner_id) - 1; 1073 } 1074 1075 static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) { 1076 return lck->lk.depth_locked != -1; 1077 } 1078 1079 /* Acquire a lock using a the queuing lock implementation */ 1080 template <bool takeTime> 1081 /* [TLW] The unused template above is left behind because of what BEB believes 1082 is a potential compiler problem with __forceinline. */ 1083 __forceinline static int 1084 __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck, 1085 kmp_int32 gtid) { 1086 kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid); 1087 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1088 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; 1089 volatile kmp_uint32 *spin_here_p; 1090 1091 #if OMPT_SUPPORT 1092 ompt_state_t prev_state = ompt_state_undefined; 1093 #endif 1094 1095 KA_TRACE(1000, 1096 ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); 1097 1098 KMP_FSYNC_PREPARE(lck); 1099 KMP_DEBUG_ASSERT(this_thr != NULL); 1100 spin_here_p = &this_thr->th.th_spin_here; 1101 1102 #ifdef DEBUG_QUEUING_LOCKS 1103 TRACE_LOCK(gtid + 1, "acq ent"); 1104 if (*spin_here_p) 1105 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1106 if (this_thr->th.th_next_waiting != 0) 1107 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1108 #endif 1109 KMP_DEBUG_ASSERT(!*spin_here_p); 1110 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1111 1112 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to 1113 head_id_p that may follow, not just in execution order, but also in 1114 visibility order. This way, when a releasing thread observes the changes to 1115 the queue by this thread, it can rightly assume that spin_here_p has 1116 already been set to TRUE, so that when it sets spin_here_p to FALSE, it is 1117 not premature. If the releasing thread sets spin_here_p to FALSE before 1118 this thread sets it to TRUE, this thread will hang. */ 1119 *spin_here_p = TRUE; /* before enqueuing to prevent race */ 1120 1121 while (1) { 1122 kmp_int32 enqueued; 1123 kmp_int32 head; 1124 kmp_int32 tail; 1125 1126 head = *head_id_p; 1127 1128 switch (head) { 1129 1130 case -1: { 1131 #ifdef DEBUG_QUEUING_LOCKS 1132 tail = *tail_id_p; 1133 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1134 #endif 1135 tail = 0; /* to make sure next link asynchronously read is not set 1136 accidentally; this assignment prevents us from entering the 1137 if ( t > 0 ) condition in the enqueued case below, which is not 1138 necessary for this state transition */ 1139 1140 /* try (-1,0)->(tid,tid) */ 1141 enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p, 1142 KMP_PACK_64(-1, 0), 1143 KMP_PACK_64(gtid + 1, gtid + 1)); 1144 #ifdef DEBUG_QUEUING_LOCKS 1145 if (enqueued) 1146 TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)"); 1147 #endif 1148 } break; 1149 1150 default: { 1151 tail = *tail_id_p; 1152 KMP_DEBUG_ASSERT(tail != gtid + 1); 1153 1154 #ifdef DEBUG_QUEUING_LOCKS 1155 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1156 #endif 1157 1158 if (tail == 0) { 1159 enqueued = FALSE; 1160 } else { 1161 /* try (h,t) or (h,h)->(h,tid) */ 1162 enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1); 1163 1164 #ifdef DEBUG_QUEUING_LOCKS 1165 if (enqueued) 1166 TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)"); 1167 #endif 1168 } 1169 } break; 1170 1171 case 0: /* empty queue */ 1172 { 1173 kmp_int32 grabbed_lock; 1174 1175 #ifdef DEBUG_QUEUING_LOCKS 1176 tail = *tail_id_p; 1177 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1178 #endif 1179 /* try (0,0)->(-1,0) */ 1180 1181 /* only legal transition out of head = 0 is head = -1 with no change to 1182 * tail */ 1183 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1); 1184 1185 if (grabbed_lock) { 1186 1187 *spin_here_p = FALSE; 1188 1189 KA_TRACE( 1190 1000, 1191 ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", 1192 lck, gtid)); 1193 #ifdef DEBUG_QUEUING_LOCKS 1194 TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0); 1195 #endif 1196 1197 #if OMPT_SUPPORT 1198 if (ompt_enabled.enabled && prev_state != ompt_state_undefined) { 1199 /* change the state before clearing wait_id */ 1200 this_thr->th.ompt_thread_info.state = prev_state; 1201 this_thr->th.ompt_thread_info.wait_id = 0; 1202 } 1203 #endif 1204 1205 KMP_FSYNC_ACQUIRED(lck); 1206 return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */ 1207 } 1208 enqueued = FALSE; 1209 } break; 1210 } 1211 1212 #if OMPT_SUPPORT 1213 if (ompt_enabled.enabled && prev_state == ompt_state_undefined) { 1214 /* this thread will spin; set wait_id before entering wait state */ 1215 prev_state = this_thr->th.ompt_thread_info.state; 1216 this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck; 1217 this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; 1218 } 1219 #endif 1220 1221 if (enqueued) { 1222 if (tail > 0) { 1223 kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1); 1224 KMP_ASSERT(tail_thr != NULL); 1225 tail_thr->th.th_next_waiting = gtid + 1; 1226 /* corresponding wait for this write in release code */ 1227 } 1228 KA_TRACE(1000, 1229 ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", 1230 lck, gtid)); 1231 1232 KMP_MB(); 1233 // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf 1234 KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck); 1235 // Synchronize writes to both runtime thread structures 1236 // and writes in user code. 1237 KMP_MB(); 1238 1239 #ifdef DEBUG_QUEUING_LOCKS 1240 TRACE_LOCK(gtid + 1, "acq spin"); 1241 1242 if (this_thr->th.th_next_waiting != 0) 1243 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1244 #endif 1245 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1246 KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after " 1247 "waiting on queue\n", 1248 lck, gtid)); 1249 1250 #ifdef DEBUG_QUEUING_LOCKS 1251 TRACE_LOCK(gtid + 1, "acq exit 2"); 1252 #endif 1253 1254 #if OMPT_SUPPORT 1255 /* change the state before clearing wait_id */ 1256 this_thr->th.ompt_thread_info.state = prev_state; 1257 this_thr->th.ompt_thread_info.wait_id = 0; 1258 #endif 1259 1260 /* got lock, we were dequeued by the thread that released lock */ 1261 return KMP_LOCK_ACQUIRED_FIRST; 1262 } 1263 1264 /* Yield if number of threads > number of logical processors */ 1265 /* ToDo: Not sure why this should only be in oversubscription case, 1266 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ 1267 KMP_YIELD_OVERSUB(); 1268 1269 #ifdef DEBUG_QUEUING_LOCKS 1270 TRACE_LOCK(gtid + 1, "acq retry"); 1271 #endif 1272 } 1273 KMP_ASSERT2(0, "should not get here"); 1274 return KMP_LOCK_ACQUIRED_FIRST; 1275 } 1276 1277 int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1278 KMP_DEBUG_ASSERT(gtid >= 0); 1279 1280 int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid); 1281 return retval; 1282 } 1283 1284 static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1285 kmp_int32 gtid) { 1286 char const *const func = "omp_set_lock"; 1287 if (lck->lk.initialized != lck) { 1288 KMP_FATAL(LockIsUninitialized, func); 1289 } 1290 if (__kmp_is_queuing_lock_nestable(lck)) { 1291 KMP_FATAL(LockNestableUsedAsSimple, func); 1292 } 1293 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1294 KMP_FATAL(LockIsAlreadyOwned, func); 1295 } 1296 1297 __kmp_acquire_queuing_lock(lck, gtid); 1298 1299 lck->lk.owner_id = gtid + 1; 1300 return KMP_LOCK_ACQUIRED_FIRST; 1301 } 1302 1303 int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1304 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1305 kmp_int32 head; 1306 #ifdef KMP_DEBUG 1307 kmp_info_t *this_thr; 1308 #endif 1309 1310 KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid)); 1311 KMP_DEBUG_ASSERT(gtid >= 0); 1312 #ifdef KMP_DEBUG 1313 this_thr = __kmp_thread_from_gtid(gtid); 1314 KMP_DEBUG_ASSERT(this_thr != NULL); 1315 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 1316 #endif 1317 1318 head = *head_id_p; 1319 1320 if (head == 0) { /* nobody on queue, nobody holding */ 1321 /* try (0,0)->(-1,0) */ 1322 if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) { 1323 KA_TRACE(1000, 1324 ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid)); 1325 KMP_FSYNC_ACQUIRED(lck); 1326 return TRUE; 1327 } 1328 } 1329 1330 KA_TRACE(1000, 1331 ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid)); 1332 return FALSE; 1333 } 1334 1335 static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1336 kmp_int32 gtid) { 1337 char const *const func = "omp_test_lock"; 1338 if (lck->lk.initialized != lck) { 1339 KMP_FATAL(LockIsUninitialized, func); 1340 } 1341 if (__kmp_is_queuing_lock_nestable(lck)) { 1342 KMP_FATAL(LockNestableUsedAsSimple, func); 1343 } 1344 1345 int retval = __kmp_test_queuing_lock(lck, gtid); 1346 1347 if (retval) { 1348 lck->lk.owner_id = gtid + 1; 1349 } 1350 return retval; 1351 } 1352 1353 int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1354 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1355 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; 1356 1357 KA_TRACE(1000, 1358 ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); 1359 KMP_DEBUG_ASSERT(gtid >= 0); 1360 #if KMP_DEBUG || DEBUG_QUEUING_LOCKS 1361 kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid); 1362 #endif 1363 KMP_DEBUG_ASSERT(this_thr != NULL); 1364 #ifdef DEBUG_QUEUING_LOCKS 1365 TRACE_LOCK(gtid + 1, "rel ent"); 1366 1367 if (this_thr->th.th_spin_here) 1368 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1369 if (this_thr->th.th_next_waiting != 0) 1370 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1371 #endif 1372 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 1373 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1374 1375 KMP_FSYNC_RELEASING(lck); 1376 1377 while (1) { 1378 kmp_int32 dequeued; 1379 kmp_int32 head; 1380 kmp_int32 tail; 1381 1382 head = *head_id_p; 1383 1384 #ifdef DEBUG_QUEUING_LOCKS 1385 tail = *tail_id_p; 1386 TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail); 1387 if (head == 0) 1388 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1389 #endif 1390 KMP_DEBUG_ASSERT(head != 1391 0); /* holding the lock, head must be -1 or queue head */ 1392 1393 if (head == -1) { /* nobody on queue */ 1394 /* try (-1,0)->(0,0) */ 1395 if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) { 1396 KA_TRACE( 1397 1000, 1398 ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", 1399 lck, gtid)); 1400 #ifdef DEBUG_QUEUING_LOCKS 1401 TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0); 1402 #endif 1403 1404 #if OMPT_SUPPORT 1405 /* nothing to do - no other thread is trying to shift blame */ 1406 #endif 1407 return KMP_LOCK_RELEASED; 1408 } 1409 dequeued = FALSE; 1410 } else { 1411 KMP_MB(); 1412 tail = *tail_id_p; 1413 if (head == tail) { /* only one thread on the queue */ 1414 #ifdef DEBUG_QUEUING_LOCKS 1415 if (head <= 0) 1416 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1417 #endif 1418 KMP_DEBUG_ASSERT(head > 0); 1419 1420 /* try (h,h)->(-1,0) */ 1421 dequeued = KMP_COMPARE_AND_STORE_REL64( 1422 RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head), 1423 KMP_PACK_64(-1, 0)); 1424 #ifdef DEBUG_QUEUING_LOCKS 1425 TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)"); 1426 #endif 1427 1428 } else { 1429 volatile kmp_int32 *waiting_id_p; 1430 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); 1431 KMP_DEBUG_ASSERT(head_thr != NULL); 1432 waiting_id_p = &head_thr->th.th_next_waiting; 1433 1434 /* Does this require synchronous reads? */ 1435 #ifdef DEBUG_QUEUING_LOCKS 1436 if (head <= 0 || tail <= 0) 1437 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1438 #endif 1439 KMP_DEBUG_ASSERT(head > 0 && tail > 0); 1440 1441 /* try (h,t)->(h',t) or (t,t) */ 1442 KMP_MB(); 1443 /* make sure enqueuing thread has time to update next waiting thread 1444 * field */ 1445 *head_id_p = 1446 KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL); 1447 #ifdef DEBUG_QUEUING_LOCKS 1448 TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)"); 1449 #endif 1450 dequeued = TRUE; 1451 } 1452 } 1453 1454 if (dequeued) { 1455 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); 1456 KMP_DEBUG_ASSERT(head_thr != NULL); 1457 1458 /* Does this require synchronous reads? */ 1459 #ifdef DEBUG_QUEUING_LOCKS 1460 if (head <= 0 || tail <= 0) 1461 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1462 #endif 1463 KMP_DEBUG_ASSERT(head > 0 && tail > 0); 1464 1465 /* For clean code only. Thread not released until next statement prevents 1466 race with acquire code. */ 1467 head_thr->th.th_next_waiting = 0; 1468 #ifdef DEBUG_QUEUING_LOCKS 1469 TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head); 1470 #endif 1471 1472 KMP_MB(); 1473 /* reset spin value */ 1474 head_thr->th.th_spin_here = FALSE; 1475 1476 KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after " 1477 "dequeuing\n", 1478 lck, gtid)); 1479 #ifdef DEBUG_QUEUING_LOCKS 1480 TRACE_LOCK(gtid + 1, "rel exit 2"); 1481 #endif 1482 return KMP_LOCK_RELEASED; 1483 } 1484 /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring 1485 threads */ 1486 1487 #ifdef DEBUG_QUEUING_LOCKS 1488 TRACE_LOCK(gtid + 1, "rel retry"); 1489 #endif 1490 1491 } /* while */ 1492 KMP_ASSERT2(0, "should not get here"); 1493 return KMP_LOCK_RELEASED; 1494 } 1495 1496 static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1497 kmp_int32 gtid) { 1498 char const *const func = "omp_unset_lock"; 1499 KMP_MB(); /* in case another processor initialized lock */ 1500 if (lck->lk.initialized != lck) { 1501 KMP_FATAL(LockIsUninitialized, func); 1502 } 1503 if (__kmp_is_queuing_lock_nestable(lck)) { 1504 KMP_FATAL(LockNestableUsedAsSimple, func); 1505 } 1506 if (__kmp_get_queuing_lock_owner(lck) == -1) { 1507 KMP_FATAL(LockUnsettingFree, func); 1508 } 1509 if (__kmp_get_queuing_lock_owner(lck) != gtid) { 1510 KMP_FATAL(LockUnsettingSetByAnother, func); 1511 } 1512 lck->lk.owner_id = 0; 1513 return __kmp_release_queuing_lock(lck, gtid); 1514 } 1515 1516 void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) { 1517 lck->lk.location = NULL; 1518 lck->lk.head_id = 0; 1519 lck->lk.tail_id = 0; 1520 lck->lk.next_ticket = 0; 1521 lck->lk.now_serving = 0; 1522 lck->lk.owner_id = 0; // no thread owns the lock. 1523 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 1524 lck->lk.initialized = lck; 1525 1526 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); 1527 } 1528 1529 void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) { 1530 lck->lk.initialized = NULL; 1531 lck->lk.location = NULL; 1532 lck->lk.head_id = 0; 1533 lck->lk.tail_id = 0; 1534 lck->lk.next_ticket = 0; 1535 lck->lk.now_serving = 0; 1536 lck->lk.owner_id = 0; 1537 lck->lk.depth_locked = -1; 1538 } 1539 1540 static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1541 char const *const func = "omp_destroy_lock"; 1542 if (lck->lk.initialized != lck) { 1543 KMP_FATAL(LockIsUninitialized, func); 1544 } 1545 if (__kmp_is_queuing_lock_nestable(lck)) { 1546 KMP_FATAL(LockNestableUsedAsSimple, func); 1547 } 1548 if (__kmp_get_queuing_lock_owner(lck) != -1) { 1549 KMP_FATAL(LockStillOwned, func); 1550 } 1551 __kmp_destroy_queuing_lock(lck); 1552 } 1553 1554 // nested queuing locks 1555 1556 int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1557 KMP_DEBUG_ASSERT(gtid >= 0); 1558 1559 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1560 lck->lk.depth_locked += 1; 1561 return KMP_LOCK_ACQUIRED_NEXT; 1562 } else { 1563 __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid); 1564 KMP_MB(); 1565 lck->lk.depth_locked = 1; 1566 KMP_MB(); 1567 lck->lk.owner_id = gtid + 1; 1568 return KMP_LOCK_ACQUIRED_FIRST; 1569 } 1570 } 1571 1572 static int 1573 __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1574 kmp_int32 gtid) { 1575 char const *const func = "omp_set_nest_lock"; 1576 if (lck->lk.initialized != lck) { 1577 KMP_FATAL(LockIsUninitialized, func); 1578 } 1579 if (!__kmp_is_queuing_lock_nestable(lck)) { 1580 KMP_FATAL(LockSimpleUsedAsNestable, func); 1581 } 1582 return __kmp_acquire_nested_queuing_lock(lck, gtid); 1583 } 1584 1585 int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1586 int retval; 1587 1588 KMP_DEBUG_ASSERT(gtid >= 0); 1589 1590 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1591 retval = ++lck->lk.depth_locked; 1592 } else if (!__kmp_test_queuing_lock(lck, gtid)) { 1593 retval = 0; 1594 } else { 1595 KMP_MB(); 1596 retval = lck->lk.depth_locked = 1; 1597 KMP_MB(); 1598 lck->lk.owner_id = gtid + 1; 1599 } 1600 return retval; 1601 } 1602 1603 static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1604 kmp_int32 gtid) { 1605 char const *const func = "omp_test_nest_lock"; 1606 if (lck->lk.initialized != lck) { 1607 KMP_FATAL(LockIsUninitialized, func); 1608 } 1609 if (!__kmp_is_queuing_lock_nestable(lck)) { 1610 KMP_FATAL(LockSimpleUsedAsNestable, func); 1611 } 1612 return __kmp_test_nested_queuing_lock(lck, gtid); 1613 } 1614 1615 int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1616 KMP_DEBUG_ASSERT(gtid >= 0); 1617 1618 KMP_MB(); 1619 if (--(lck->lk.depth_locked) == 0) { 1620 KMP_MB(); 1621 lck->lk.owner_id = 0; 1622 __kmp_release_queuing_lock(lck, gtid); 1623 return KMP_LOCK_RELEASED; 1624 } 1625 return KMP_LOCK_STILL_HELD; 1626 } 1627 1628 static int 1629 __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1630 kmp_int32 gtid) { 1631 char const *const func = "omp_unset_nest_lock"; 1632 KMP_MB(); /* in case another processor initialized lock */ 1633 if (lck->lk.initialized != lck) { 1634 KMP_FATAL(LockIsUninitialized, func); 1635 } 1636 if (!__kmp_is_queuing_lock_nestable(lck)) { 1637 KMP_FATAL(LockSimpleUsedAsNestable, func); 1638 } 1639 if (__kmp_get_queuing_lock_owner(lck) == -1) { 1640 KMP_FATAL(LockUnsettingFree, func); 1641 } 1642 if (__kmp_get_queuing_lock_owner(lck) != gtid) { 1643 KMP_FATAL(LockUnsettingSetByAnother, func); 1644 } 1645 return __kmp_release_nested_queuing_lock(lck, gtid); 1646 } 1647 1648 void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) { 1649 __kmp_init_queuing_lock(lck); 1650 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1651 } 1652 1653 void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) { 1654 __kmp_destroy_queuing_lock(lck); 1655 lck->lk.depth_locked = 0; 1656 } 1657 1658 static void 1659 __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1660 char const *const func = "omp_destroy_nest_lock"; 1661 if (lck->lk.initialized != lck) { 1662 KMP_FATAL(LockIsUninitialized, func); 1663 } 1664 if (!__kmp_is_queuing_lock_nestable(lck)) { 1665 KMP_FATAL(LockSimpleUsedAsNestable, func); 1666 } 1667 if (__kmp_get_queuing_lock_owner(lck) != -1) { 1668 KMP_FATAL(LockStillOwned, func); 1669 } 1670 __kmp_destroy_nested_queuing_lock(lck); 1671 } 1672 1673 // access functions to fields which don't exist for all lock kinds. 1674 1675 static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) { 1676 return lck->lk.location; 1677 } 1678 1679 static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck, 1680 const ident_t *loc) { 1681 lck->lk.location = loc; 1682 } 1683 1684 static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) { 1685 return lck->lk.flags; 1686 } 1687 1688 static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck, 1689 kmp_lock_flags_t flags) { 1690 lck->lk.flags = flags; 1691 } 1692 1693 #if KMP_USE_ADAPTIVE_LOCKS 1694 1695 /* RTM Adaptive locks */ 1696 1697 #if KMP_HAVE_RTM_INTRINSICS 1698 #include <immintrin.h> 1699 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1700 1701 #else 1702 1703 // Values from the status register after failed speculation. 1704 #define _XBEGIN_STARTED (~0u) 1705 #define _XABORT_EXPLICIT (1 << 0) 1706 #define _XABORT_RETRY (1 << 1) 1707 #define _XABORT_CONFLICT (1 << 2) 1708 #define _XABORT_CAPACITY (1 << 3) 1709 #define _XABORT_DEBUG (1 << 4) 1710 #define _XABORT_NESTED (1 << 5) 1711 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) 1712 1713 // Aborts for which it's worth trying again immediately 1714 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1715 1716 #define STRINGIZE_INTERNAL(arg) #arg 1717 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) 1718 1719 // Access to RTM instructions 1720 /*A version of XBegin which returns -1 on speculation, and the value of EAX on 1721 an abort. This is the same definition as the compiler intrinsic that will be 1722 supported at some point. */ 1723 static __inline int _xbegin() { 1724 int res = -1; 1725 1726 #if KMP_OS_WINDOWS 1727 #if KMP_ARCH_X86_64 1728 _asm { 1729 _emit 0xC7 1730 _emit 0xF8 1731 _emit 2 1732 _emit 0 1733 _emit 0 1734 _emit 0 1735 jmp L2 1736 mov res, eax 1737 L2: 1738 } 1739 #else /* IA32 */ 1740 _asm { 1741 _emit 0xC7 1742 _emit 0xF8 1743 _emit 2 1744 _emit 0 1745 _emit 0 1746 _emit 0 1747 jmp L2 1748 mov res, eax 1749 L2: 1750 } 1751 #endif // KMP_ARCH_X86_64 1752 #else 1753 /* Note that %eax must be noted as killed (clobbered), because the XSR is 1754 returned in %eax(%rax) on abort. Other register values are restored, so 1755 don't need to be killed. 1756 1757 We must also mark 'res' as an input and an output, since otherwise 1758 'res=-1' may be dropped as being dead, whereas we do need the assignment on 1759 the successful (i.e., non-abort) path. */ 1760 __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n" 1761 " .long 1f-1b-6\n" 1762 " jmp 2f\n" 1763 "1: movl %%eax,%0\n" 1764 "2:" 1765 : "+r"(res)::"memory", "%eax"); 1766 #endif // KMP_OS_WINDOWS 1767 return res; 1768 } 1769 1770 /* Transaction end */ 1771 static __inline void _xend() { 1772 #if KMP_OS_WINDOWS 1773 __asm { 1774 _emit 0x0f 1775 _emit 0x01 1776 _emit 0xd5 1777 } 1778 #else 1779 __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory"); 1780 #endif 1781 } 1782 1783 /* This is a macro, the argument must be a single byte constant which can be 1784 evaluated by the inline assembler, since it is emitted as a byte into the 1785 assembly code. */ 1786 // clang-format off 1787 #if KMP_OS_WINDOWS 1788 #define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG 1789 #else 1790 #define _xabort(ARG) \ 1791 __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory"); 1792 #endif 1793 // clang-format on 1794 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 1795 1796 // Statistics is collected for testing purpose 1797 #if KMP_DEBUG_ADAPTIVE_LOCKS 1798 1799 // We accumulate speculative lock statistics when the lock is destroyed. We 1800 // keep locks that haven't been destroyed in the liveLocks list so that we can 1801 // grab their statistics too. 1802 static kmp_adaptive_lock_statistics_t destroyedStats; 1803 1804 // To hold the list of live locks. 1805 static kmp_adaptive_lock_info_t liveLocks; 1806 1807 // A lock so we can safely update the list of locks. 1808 static kmp_bootstrap_lock_t chain_lock = 1809 KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock); 1810 1811 // Initialize the list of stats. 1812 void __kmp_init_speculative_stats() { 1813 kmp_adaptive_lock_info_t *lck = &liveLocks; 1814 1815 memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0, 1816 sizeof(lck->stats)); 1817 lck->stats.next = lck; 1818 lck->stats.prev = lck; 1819 1820 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1821 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1822 1823 __kmp_init_bootstrap_lock(&chain_lock); 1824 } 1825 1826 // Insert the lock into the circular list 1827 static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) { 1828 __kmp_acquire_bootstrap_lock(&chain_lock); 1829 1830 lck->stats.next = liveLocks.stats.next; 1831 lck->stats.prev = &liveLocks; 1832 1833 liveLocks.stats.next = lck; 1834 lck->stats.next->stats.prev = lck; 1835 1836 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1837 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1838 1839 __kmp_release_bootstrap_lock(&chain_lock); 1840 } 1841 1842 static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) { 1843 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1844 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1845 1846 kmp_adaptive_lock_info_t *n = lck->stats.next; 1847 kmp_adaptive_lock_info_t *p = lck->stats.prev; 1848 1849 n->stats.prev = p; 1850 p->stats.next = n; 1851 } 1852 1853 static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) { 1854 memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0, 1855 sizeof(lck->stats)); 1856 __kmp_remember_lock(lck); 1857 } 1858 1859 static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t, 1860 kmp_adaptive_lock_info_t *lck) { 1861 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; 1862 1863 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; 1864 t->successfulSpeculations += s->successfulSpeculations; 1865 t->hardFailedSpeculations += s->hardFailedSpeculations; 1866 t->softFailedSpeculations += s->softFailedSpeculations; 1867 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; 1868 t->lemmingYields += s->lemmingYields; 1869 } 1870 1871 static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) { 1872 __kmp_acquire_bootstrap_lock(&chain_lock); 1873 1874 __kmp_add_stats(&destroyedStats, lck); 1875 __kmp_forget_lock(lck); 1876 1877 __kmp_release_bootstrap_lock(&chain_lock); 1878 } 1879 1880 static float percent(kmp_uint32 count, kmp_uint32 total) { 1881 return (total == 0) ? 0.0 : (100.0 * count) / total; 1882 } 1883 1884 void __kmp_print_speculative_stats() { 1885 kmp_adaptive_lock_statistics_t total = destroyedStats; 1886 kmp_adaptive_lock_info_t *lck; 1887 1888 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { 1889 __kmp_add_stats(&total, lck); 1890 } 1891 kmp_adaptive_lock_statistics_t *t = &total; 1892 kmp_uint32 totalSections = 1893 t->nonSpeculativeAcquires + t->successfulSpeculations; 1894 kmp_uint32 totalSpeculations = t->successfulSpeculations + 1895 t->hardFailedSpeculations + 1896 t->softFailedSpeculations; 1897 if (totalSections <= 0) 1898 return; 1899 1900 kmp_safe_raii_file_t statsFile; 1901 if (strcmp(__kmp_speculative_statsfile, "-") == 0) { 1902 statsFile.set_stdout(); 1903 } else { 1904 size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20; 1905 char buffer[buffLen]; 1906 KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile, 1907 (kmp_int32)getpid()); 1908 statsFile.open(buffer, "w"); 1909 } 1910 1911 fprintf(statsFile, "Speculative lock statistics (all approximate!)\n"); 1912 fprintf(statsFile, 1913 " Lock parameters: \n" 1914 " max_soft_retries : %10d\n" 1915 " max_badness : %10d\n", 1916 __kmp_adaptive_backoff_params.max_soft_retries, 1917 __kmp_adaptive_backoff_params.max_badness); 1918 fprintf(statsFile, " Non-speculative acquire attempts : %10d\n", 1919 t->nonSpeculativeAcquireAttempts); 1920 fprintf(statsFile, " Total critical sections : %10d\n", 1921 totalSections); 1922 fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n", 1923 t->successfulSpeculations, 1924 percent(t->successfulSpeculations, totalSections)); 1925 fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", 1926 t->nonSpeculativeAcquires, 1927 percent(t->nonSpeculativeAcquires, totalSections)); 1928 fprintf(statsFile, " Lemming yields : %10d\n\n", 1929 t->lemmingYields); 1930 1931 fprintf(statsFile, " Speculative acquire attempts : %10d\n", 1932 totalSpeculations); 1933 fprintf(statsFile, " Successes : %10d (%5.1f%%)\n", 1934 t->successfulSpeculations, 1935 percent(t->successfulSpeculations, totalSpeculations)); 1936 fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n", 1937 t->softFailedSpeculations, 1938 percent(t->softFailedSpeculations, totalSpeculations)); 1939 fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n", 1940 t->hardFailedSpeculations, 1941 percent(t->hardFailedSpeculations, totalSpeculations)); 1942 } 1943 1944 #define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++) 1945 #else 1946 #define KMP_INC_STAT(lck, stat) 1947 1948 #endif // KMP_DEBUG_ADAPTIVE_LOCKS 1949 1950 static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) { 1951 // It is enough to check that the head_id is zero. 1952 // We don't also need to check the tail. 1953 bool res = lck->lk.head_id == 0; 1954 1955 // We need a fence here, since we must ensure that no memory operations 1956 // from later in this thread float above that read. 1957 #if KMP_COMPILER_ICC || KMP_COMPILER_ICX 1958 _mm_mfence(); 1959 #else 1960 __sync_synchronize(); 1961 #endif 1962 1963 return res; 1964 } 1965 1966 // Functions for manipulating the badness 1967 static __inline void 1968 __kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) { 1969 // Reset the badness to zero so we eagerly try to speculate again 1970 lck->lk.adaptive.badness = 0; 1971 KMP_INC_STAT(lck, successfulSpeculations); 1972 } 1973 1974 // Create a bit mask with one more set bit. 1975 static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) { 1976 kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1; 1977 if (newBadness > lck->lk.adaptive.max_badness) { 1978 return; 1979 } else { 1980 lck->lk.adaptive.badness = newBadness; 1981 } 1982 } 1983 1984 // Check whether speculation should be attempted. 1985 KMP_ATTRIBUTE_TARGET_RTM 1986 static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck, 1987 kmp_int32 gtid) { 1988 kmp_uint32 badness = lck->lk.adaptive.badness; 1989 kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts; 1990 int res = (attempts & badness) == 0; 1991 return res; 1992 } 1993 1994 // Attempt to acquire only the speculative lock. 1995 // Does not back off to the non-speculative lock. 1996 KMP_ATTRIBUTE_TARGET_RTM 1997 static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck, 1998 kmp_int32 gtid) { 1999 int retries = lck->lk.adaptive.max_soft_retries; 2000 2001 // We don't explicitly count the start of speculation, rather we record the 2002 // results (success, hard fail, soft fail). The sum of all of those is the 2003 // total number of times we started speculation since all speculations must 2004 // end one of those ways. 2005 do { 2006 kmp_uint32 status = _xbegin(); 2007 // Switch this in to disable actual speculation but exercise at least some 2008 // of the rest of the code. Useful for debugging... 2009 // kmp_uint32 status = _XABORT_NESTED; 2010 2011 if (status == _XBEGIN_STARTED) { 2012 /* We have successfully started speculation. Check that no-one acquired 2013 the lock for real between when we last looked and now. This also gets 2014 the lock cache line into our read-set, which we need so that we'll 2015 abort if anyone later claims it for real. */ 2016 if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2017 // Lock is now visibly acquired, so someone beat us to it. Abort the 2018 // transaction so we'll restart from _xbegin with the failure status. 2019 _xabort(0x01); 2020 KMP_ASSERT2(0, "should not get here"); 2021 } 2022 return 1; // Lock has been acquired (speculatively) 2023 } else { 2024 // We have aborted, update the statistics 2025 if (status & SOFT_ABORT_MASK) { 2026 KMP_INC_STAT(lck, softFailedSpeculations); 2027 // and loop round to retry. 2028 } else { 2029 KMP_INC_STAT(lck, hardFailedSpeculations); 2030 // Give up if we had a hard failure. 2031 break; 2032 } 2033 } 2034 } while (retries--); // Loop while we have retries, and didn't fail hard. 2035 2036 // Either we had a hard failure or we didn't succeed softly after 2037 // the full set of attempts, so back off the badness. 2038 __kmp_step_badness(lck); 2039 return 0; 2040 } 2041 2042 // Attempt to acquire the speculative lock, or back off to the non-speculative 2043 // one if the speculative lock cannot be acquired. 2044 // We can succeed speculatively, non-speculatively, or fail. 2045 static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) { 2046 // First try to acquire the lock speculatively 2047 if (__kmp_should_speculate(lck, gtid) && 2048 __kmp_test_adaptive_lock_only(lck, gtid)) 2049 return 1; 2050 2051 // Speculative acquisition failed, so try to acquire it non-speculatively. 2052 // Count the non-speculative acquire attempt 2053 lck->lk.adaptive.acquire_attempts++; 2054 2055 // Use base, non-speculative lock. 2056 if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) { 2057 KMP_INC_STAT(lck, nonSpeculativeAcquires); 2058 return 1; // Lock is acquired (non-speculatively) 2059 } else { 2060 return 0; // Failed to acquire the lock, it's already visibly locked. 2061 } 2062 } 2063 2064 static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2065 kmp_int32 gtid) { 2066 char const *const func = "omp_test_lock"; 2067 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2068 KMP_FATAL(LockIsUninitialized, func); 2069 } 2070 2071 int retval = __kmp_test_adaptive_lock(lck, gtid); 2072 2073 if (retval) { 2074 lck->lk.qlk.owner_id = gtid + 1; 2075 } 2076 return retval; 2077 } 2078 2079 // Block until we can acquire a speculative, adaptive lock. We check whether we 2080 // should be trying to speculate. If we should be, we check the real lock to see 2081 // if it is free, and, if not, pause without attempting to acquire it until it 2082 // is. Then we try the speculative acquire. This means that although we suffer 2083 // from lemmings a little (because all we can't acquire the lock speculatively 2084 // until the queue of threads waiting has cleared), we don't get into a state 2085 // where we can never acquire the lock speculatively (because we force the queue 2086 // to clear by preventing new arrivals from entering the queue). This does mean 2087 // that when we're trying to break lemmings, the lock is no longer fair. However 2088 // OpenMP makes no guarantee that its locks are fair, so this isn't a real 2089 // problem. 2090 static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck, 2091 kmp_int32 gtid) { 2092 if (__kmp_should_speculate(lck, gtid)) { 2093 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2094 if (__kmp_test_adaptive_lock_only(lck, gtid)) 2095 return; 2096 // We tried speculation and failed, so give up. 2097 } else { 2098 // We can't try speculation until the lock is free, so we pause here 2099 // (without suspending on the queueing lock, to allow it to drain, then 2100 // try again. All other threads will also see the same result for 2101 // shouldSpeculate, so will be doing the same if they try to claim the 2102 // lock from now on. 2103 while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2104 KMP_INC_STAT(lck, lemmingYields); 2105 KMP_YIELD(TRUE); 2106 } 2107 2108 if (__kmp_test_adaptive_lock_only(lck, gtid)) 2109 return; 2110 } 2111 } 2112 2113 // Speculative acquisition failed, so acquire it non-speculatively. 2114 // Count the non-speculative acquire attempt 2115 lck->lk.adaptive.acquire_attempts++; 2116 2117 __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid); 2118 // We have acquired the base lock, so count that. 2119 KMP_INC_STAT(lck, nonSpeculativeAcquires); 2120 } 2121 2122 static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2123 kmp_int32 gtid) { 2124 char const *const func = "omp_set_lock"; 2125 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2126 KMP_FATAL(LockIsUninitialized, func); 2127 } 2128 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) { 2129 KMP_FATAL(LockIsAlreadyOwned, func); 2130 } 2131 2132 __kmp_acquire_adaptive_lock(lck, gtid); 2133 2134 lck->lk.qlk.owner_id = gtid + 1; 2135 } 2136 2137 KMP_ATTRIBUTE_TARGET_RTM 2138 static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck, 2139 kmp_int32 gtid) { 2140 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR( 2141 lck))) { // If the lock doesn't look claimed we must be speculating. 2142 // (Or the user's code is buggy and they're releasing without locking; 2143 // if we had XTEST we'd be able to check that case...) 2144 _xend(); // Exit speculation 2145 __kmp_update_badness_after_success(lck); 2146 } else { // Since the lock *is* visibly locked we're not speculating, 2147 // so should use the underlying lock's release scheme. 2148 __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid); 2149 } 2150 return KMP_LOCK_RELEASED; 2151 } 2152 2153 static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2154 kmp_int32 gtid) { 2155 char const *const func = "omp_unset_lock"; 2156 KMP_MB(); /* in case another processor initialized lock */ 2157 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2158 KMP_FATAL(LockIsUninitialized, func); 2159 } 2160 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) { 2161 KMP_FATAL(LockUnsettingFree, func); 2162 } 2163 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) { 2164 KMP_FATAL(LockUnsettingSetByAnother, func); 2165 } 2166 lck->lk.qlk.owner_id = 0; 2167 __kmp_release_adaptive_lock(lck, gtid); 2168 return KMP_LOCK_RELEASED; 2169 } 2170 2171 static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) { 2172 __kmp_init_queuing_lock(GET_QLK_PTR(lck)); 2173 lck->lk.adaptive.badness = 0; 2174 lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0; 2175 lck->lk.adaptive.max_soft_retries = 2176 __kmp_adaptive_backoff_params.max_soft_retries; 2177 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; 2178 #if KMP_DEBUG_ADAPTIVE_LOCKS 2179 __kmp_zero_speculative_stats(&lck->lk.adaptive); 2180 #endif 2181 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); 2182 } 2183 2184 static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) { 2185 #if KMP_DEBUG_ADAPTIVE_LOCKS 2186 __kmp_accumulate_speculative_stats(&lck->lk.adaptive); 2187 #endif 2188 __kmp_destroy_queuing_lock(GET_QLK_PTR(lck)); 2189 // Nothing needed for the speculative part. 2190 } 2191 2192 static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { 2193 char const *const func = "omp_destroy_lock"; 2194 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2195 KMP_FATAL(LockIsUninitialized, func); 2196 } 2197 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) { 2198 KMP_FATAL(LockStillOwned, func); 2199 } 2200 __kmp_destroy_adaptive_lock(lck); 2201 } 2202 2203 #endif // KMP_USE_ADAPTIVE_LOCKS 2204 2205 /* ------------------------------------------------------------------------ */ 2206 /* DRDPA ticket locks */ 2207 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ 2208 2209 static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) { 2210 return lck->lk.owner_id - 1; 2211 } 2212 2213 static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) { 2214 return lck->lk.depth_locked != -1; 2215 } 2216 2217 __forceinline static int 2218 __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2219 kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket); 2220 kmp_uint64 mask = lck->lk.mask; // atomic load 2221 std::atomic<kmp_uint64> *polls = lck->lk.polls; 2222 2223 #ifdef USE_LOCK_PROFILE 2224 if (polls[ticket & mask] != ticket) 2225 __kmp_printf("LOCK CONTENTION: %p\n", lck); 2226 /* else __kmp_printf( "." );*/ 2227 #endif /* USE_LOCK_PROFILE */ 2228 2229 // Now spin-wait, but reload the polls pointer and mask, in case the 2230 // polling area has been reconfigured. Unless it is reconfigured, the 2231 // reloads stay in L1 cache and are cheap. 2232 // 2233 // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!! 2234 // The current implementation of KMP_WAIT doesn't allow for mask 2235 // and poll to be re-read every spin iteration. 2236 kmp_uint32 spins; 2237 kmp_uint64 time; 2238 KMP_FSYNC_PREPARE(lck); 2239 KMP_INIT_YIELD(spins); 2240 KMP_INIT_BACKOFF(time); 2241 while (polls[ticket & mask] < ticket) { // atomic load 2242 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time); 2243 // Re-read the mask and the poll pointer from the lock structure. 2244 // 2245 // Make certain that "mask" is read before "polls" !!! 2246 // 2247 // If another thread picks reconfigures the polling area and updates their 2248 // values, and we get the new value of mask and the old polls pointer, we 2249 // could access memory beyond the end of the old polling area. 2250 mask = lck->lk.mask; // atomic load 2251 polls = lck->lk.polls; // atomic load 2252 } 2253 2254 // Critical section starts here 2255 KMP_FSYNC_ACQUIRED(lck); 2256 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", 2257 ticket, lck)); 2258 lck->lk.now_serving = ticket; // non-volatile store 2259 2260 // Deallocate a garbage polling area if we know that we are the last 2261 // thread that could possibly access it. 2262 // 2263 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup 2264 // ticket. 2265 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { 2266 __kmp_free(lck->lk.old_polls); 2267 lck->lk.old_polls = NULL; 2268 lck->lk.cleanup_ticket = 0; 2269 } 2270 2271 // Check to see if we should reconfigure the polling area. 2272 // If there is still a garbage polling area to be deallocated from a 2273 // previous reconfiguration, let a later thread reconfigure it. 2274 if (lck->lk.old_polls == NULL) { 2275 bool reconfigure = false; 2276 std::atomic<kmp_uint64> *old_polls = polls; 2277 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); 2278 2279 if (TCR_4(__kmp_nth) > 2280 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 2281 // We are in oversubscription mode. Contract the polling area 2282 // down to a single location, if that hasn't been done already. 2283 if (num_polls > 1) { 2284 reconfigure = true; 2285 num_polls = TCR_4(lck->lk.num_polls); 2286 mask = 0; 2287 num_polls = 1; 2288 polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls * 2289 sizeof(*polls)); 2290 polls[0] = ticket; 2291 } 2292 } else { 2293 // We are in under/fully subscribed mode. Check the number of 2294 // threads waiting on the lock. The size of the polling area 2295 // should be at least the number of threads waiting. 2296 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; 2297 if (num_waiting > num_polls) { 2298 kmp_uint32 old_num_polls = num_polls; 2299 reconfigure = true; 2300 do { 2301 mask = (mask << 1) | 1; 2302 num_polls *= 2; 2303 } while (num_polls <= num_waiting); 2304 2305 // Allocate the new polling area, and copy the relevant portion 2306 // of the old polling area to the new area. __kmp_allocate() 2307 // zeroes the memory it allocates, and most of the old area is 2308 // just zero padding, so we only copy the release counters. 2309 polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls * 2310 sizeof(*polls)); 2311 kmp_uint32 i; 2312 for (i = 0; i < old_num_polls; i++) { 2313 polls[i].store(old_polls[i]); 2314 } 2315 } 2316 } 2317 2318 if (reconfigure) { 2319 // Now write the updated fields back to the lock structure. 2320 // 2321 // Make certain that "polls" is written before "mask" !!! 2322 // 2323 // If another thread picks up the new value of mask and the old polls 2324 // pointer , it could access memory beyond the end of the old polling 2325 // area. 2326 // 2327 // On x86, we need memory fences. 2328 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring " 2329 "lock %p to %d polls\n", 2330 ticket, lck, num_polls)); 2331 2332 lck->lk.old_polls = old_polls; 2333 lck->lk.polls = polls; // atomic store 2334 2335 KMP_MB(); 2336 2337 lck->lk.num_polls = num_polls; 2338 lck->lk.mask = mask; // atomic store 2339 2340 KMP_MB(); 2341 2342 // Only after the new polling area and mask have been flushed 2343 // to main memory can we update the cleanup ticket field. 2344 // 2345 // volatile load / non-volatile store 2346 lck->lk.cleanup_ticket = lck->lk.next_ticket; 2347 } 2348 } 2349 return KMP_LOCK_ACQUIRED_FIRST; 2350 } 2351 2352 int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2353 int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid); 2354 return retval; 2355 } 2356 2357 static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2358 kmp_int32 gtid) { 2359 char const *const func = "omp_set_lock"; 2360 if (lck->lk.initialized != lck) { 2361 KMP_FATAL(LockIsUninitialized, func); 2362 } 2363 if (__kmp_is_drdpa_lock_nestable(lck)) { 2364 KMP_FATAL(LockNestableUsedAsSimple, func); 2365 } 2366 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) { 2367 KMP_FATAL(LockIsAlreadyOwned, func); 2368 } 2369 2370 __kmp_acquire_drdpa_lock(lck, gtid); 2371 2372 lck->lk.owner_id = gtid + 1; 2373 return KMP_LOCK_ACQUIRED_FIRST; 2374 } 2375 2376 int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2377 // First get a ticket, then read the polls pointer and the mask. 2378 // The polls pointer must be read before the mask!!! (See above) 2379 kmp_uint64 ticket = lck->lk.next_ticket; // atomic load 2380 std::atomic<kmp_uint64> *polls = lck->lk.polls; 2381 kmp_uint64 mask = lck->lk.mask; // atomic load 2382 if (polls[ticket & mask] == ticket) { 2383 kmp_uint64 next_ticket = ticket + 1; 2384 if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket, 2385 next_ticket)) { 2386 KMP_FSYNC_ACQUIRED(lck); 2387 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", 2388 ticket, lck)); 2389 lck->lk.now_serving = ticket; // non-volatile store 2390 2391 // Since no threads are waiting, there is no possibility that we would 2392 // want to reconfigure the polling area. We might have the cleanup ticket 2393 // value (which says that it is now safe to deallocate old_polls), but 2394 // we'll let a later thread which calls __kmp_acquire_lock do that - this 2395 // routine isn't supposed to block, and we would risk blocks if we called 2396 // __kmp_free() to do the deallocation. 2397 return TRUE; 2398 } 2399 } 2400 return FALSE; 2401 } 2402 2403 static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2404 kmp_int32 gtid) { 2405 char const *const func = "omp_test_lock"; 2406 if (lck->lk.initialized != lck) { 2407 KMP_FATAL(LockIsUninitialized, func); 2408 } 2409 if (__kmp_is_drdpa_lock_nestable(lck)) { 2410 KMP_FATAL(LockNestableUsedAsSimple, func); 2411 } 2412 2413 int retval = __kmp_test_drdpa_lock(lck, gtid); 2414 2415 if (retval) { 2416 lck->lk.owner_id = gtid + 1; 2417 } 2418 return retval; 2419 } 2420 2421 int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2422 // Read the ticket value from the lock data struct, then the polls pointer and 2423 // the mask. The polls pointer must be read before the mask!!! (See above) 2424 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load 2425 std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load 2426 kmp_uint64 mask = lck->lk.mask; // atomic load 2427 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", 2428 ticket - 1, lck)); 2429 KMP_FSYNC_RELEASING(lck); 2430 polls[ticket & mask] = ticket; // atomic store 2431 return KMP_LOCK_RELEASED; 2432 } 2433 2434 static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2435 kmp_int32 gtid) { 2436 char const *const func = "omp_unset_lock"; 2437 KMP_MB(); /* in case another processor initialized lock */ 2438 if (lck->lk.initialized != lck) { 2439 KMP_FATAL(LockIsUninitialized, func); 2440 } 2441 if (__kmp_is_drdpa_lock_nestable(lck)) { 2442 KMP_FATAL(LockNestableUsedAsSimple, func); 2443 } 2444 if (__kmp_get_drdpa_lock_owner(lck) == -1) { 2445 KMP_FATAL(LockUnsettingFree, func); 2446 } 2447 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) && 2448 (__kmp_get_drdpa_lock_owner(lck) != gtid)) { 2449 KMP_FATAL(LockUnsettingSetByAnother, func); 2450 } 2451 lck->lk.owner_id = 0; 2452 return __kmp_release_drdpa_lock(lck, gtid); 2453 } 2454 2455 void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) { 2456 lck->lk.location = NULL; 2457 lck->lk.mask = 0; 2458 lck->lk.num_polls = 1; 2459 lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate( 2460 lck->lk.num_polls * sizeof(*(lck->lk.polls))); 2461 lck->lk.cleanup_ticket = 0; 2462 lck->lk.old_polls = NULL; 2463 lck->lk.next_ticket = 0; 2464 lck->lk.now_serving = 0; 2465 lck->lk.owner_id = 0; // no thread owns the lock. 2466 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 2467 lck->lk.initialized = lck; 2468 2469 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); 2470 } 2471 2472 void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) { 2473 lck->lk.initialized = NULL; 2474 lck->lk.location = NULL; 2475 if (lck->lk.polls.load() != NULL) { 2476 __kmp_free(lck->lk.polls.load()); 2477 lck->lk.polls = NULL; 2478 } 2479 if (lck->lk.old_polls != NULL) { 2480 __kmp_free(lck->lk.old_polls); 2481 lck->lk.old_polls = NULL; 2482 } 2483 lck->lk.mask = 0; 2484 lck->lk.num_polls = 0; 2485 lck->lk.cleanup_ticket = 0; 2486 lck->lk.next_ticket = 0; 2487 lck->lk.now_serving = 0; 2488 lck->lk.owner_id = 0; 2489 lck->lk.depth_locked = -1; 2490 } 2491 2492 static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2493 char const *const func = "omp_destroy_lock"; 2494 if (lck->lk.initialized != lck) { 2495 KMP_FATAL(LockIsUninitialized, func); 2496 } 2497 if (__kmp_is_drdpa_lock_nestable(lck)) { 2498 KMP_FATAL(LockNestableUsedAsSimple, func); 2499 } 2500 if (__kmp_get_drdpa_lock_owner(lck) != -1) { 2501 KMP_FATAL(LockStillOwned, func); 2502 } 2503 __kmp_destroy_drdpa_lock(lck); 2504 } 2505 2506 // nested drdpa ticket locks 2507 2508 int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2509 KMP_DEBUG_ASSERT(gtid >= 0); 2510 2511 if (__kmp_get_drdpa_lock_owner(lck) == gtid) { 2512 lck->lk.depth_locked += 1; 2513 return KMP_LOCK_ACQUIRED_NEXT; 2514 } else { 2515 __kmp_acquire_drdpa_lock_timed_template(lck, gtid); 2516 KMP_MB(); 2517 lck->lk.depth_locked = 1; 2518 KMP_MB(); 2519 lck->lk.owner_id = gtid + 1; 2520 return KMP_LOCK_ACQUIRED_FIRST; 2521 } 2522 } 2523 2524 static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2525 kmp_int32 gtid) { 2526 char const *const func = "omp_set_nest_lock"; 2527 if (lck->lk.initialized != lck) { 2528 KMP_FATAL(LockIsUninitialized, func); 2529 } 2530 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2531 KMP_FATAL(LockSimpleUsedAsNestable, func); 2532 } 2533 __kmp_acquire_nested_drdpa_lock(lck, gtid); 2534 } 2535 2536 int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2537 int retval; 2538 2539 KMP_DEBUG_ASSERT(gtid >= 0); 2540 2541 if (__kmp_get_drdpa_lock_owner(lck) == gtid) { 2542 retval = ++lck->lk.depth_locked; 2543 } else if (!__kmp_test_drdpa_lock(lck, gtid)) { 2544 retval = 0; 2545 } else { 2546 KMP_MB(); 2547 retval = lck->lk.depth_locked = 1; 2548 KMP_MB(); 2549 lck->lk.owner_id = gtid + 1; 2550 } 2551 return retval; 2552 } 2553 2554 static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2555 kmp_int32 gtid) { 2556 char const *const func = "omp_test_nest_lock"; 2557 if (lck->lk.initialized != lck) { 2558 KMP_FATAL(LockIsUninitialized, func); 2559 } 2560 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2561 KMP_FATAL(LockSimpleUsedAsNestable, func); 2562 } 2563 return __kmp_test_nested_drdpa_lock(lck, gtid); 2564 } 2565 2566 int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2567 KMP_DEBUG_ASSERT(gtid >= 0); 2568 2569 KMP_MB(); 2570 if (--(lck->lk.depth_locked) == 0) { 2571 KMP_MB(); 2572 lck->lk.owner_id = 0; 2573 __kmp_release_drdpa_lock(lck, gtid); 2574 return KMP_LOCK_RELEASED; 2575 } 2576 return KMP_LOCK_STILL_HELD; 2577 } 2578 2579 static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2580 kmp_int32 gtid) { 2581 char const *const func = "omp_unset_nest_lock"; 2582 KMP_MB(); /* in case another processor initialized lock */ 2583 if (lck->lk.initialized != lck) { 2584 KMP_FATAL(LockIsUninitialized, func); 2585 } 2586 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2587 KMP_FATAL(LockSimpleUsedAsNestable, func); 2588 } 2589 if (__kmp_get_drdpa_lock_owner(lck) == -1) { 2590 KMP_FATAL(LockUnsettingFree, func); 2591 } 2592 if (__kmp_get_drdpa_lock_owner(lck) != gtid) { 2593 KMP_FATAL(LockUnsettingSetByAnother, func); 2594 } 2595 return __kmp_release_nested_drdpa_lock(lck, gtid); 2596 } 2597 2598 void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { 2599 __kmp_init_drdpa_lock(lck); 2600 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 2601 } 2602 2603 void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { 2604 __kmp_destroy_drdpa_lock(lck); 2605 lck->lk.depth_locked = 0; 2606 } 2607 2608 static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2609 char const *const func = "omp_destroy_nest_lock"; 2610 if (lck->lk.initialized != lck) { 2611 KMP_FATAL(LockIsUninitialized, func); 2612 } 2613 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2614 KMP_FATAL(LockSimpleUsedAsNestable, func); 2615 } 2616 if (__kmp_get_drdpa_lock_owner(lck) != -1) { 2617 KMP_FATAL(LockStillOwned, func); 2618 } 2619 __kmp_destroy_nested_drdpa_lock(lck); 2620 } 2621 2622 // access functions to fields which don't exist for all lock kinds. 2623 2624 static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) { 2625 return lck->lk.location; 2626 } 2627 2628 static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck, 2629 const ident_t *loc) { 2630 lck->lk.location = loc; 2631 } 2632 2633 static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) { 2634 return lck->lk.flags; 2635 } 2636 2637 static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck, 2638 kmp_lock_flags_t flags) { 2639 lck->lk.flags = flags; 2640 } 2641 2642 // Time stamp counter 2643 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 2644 #define __kmp_tsc() __kmp_hardware_timestamp() 2645 // Runtime's default backoff parameters 2646 kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100}; 2647 #else 2648 // Use nanoseconds for other platforms 2649 extern kmp_uint64 __kmp_now_nsec(); 2650 kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100}; 2651 #define __kmp_tsc() __kmp_now_nsec() 2652 #endif 2653 2654 // A useful predicate for dealing with timestamps that may wrap. 2655 // Is a before b? Since the timestamps may wrap, this is asking whether it's 2656 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise. 2657 // Times where going clockwise is less distance than going anti-clockwise 2658 // are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0), 2659 // then a > b (true) does not mean a reached b; whereas signed(a) = -2, 2660 // signed(b) = 0 captures the actual difference 2661 static inline bool before(kmp_uint64 a, kmp_uint64 b) { 2662 return ((kmp_int64)b - (kmp_int64)a) > 0; 2663 } 2664 2665 // Truncated binary exponential backoff function 2666 void __kmp_spin_backoff(kmp_backoff_t *boff) { 2667 // We could flatten this loop, but making it a nested loop gives better result 2668 kmp_uint32 i; 2669 for (i = boff->step; i > 0; i--) { 2670 kmp_uint64 goal = __kmp_tsc() + boff->min_tick; 2671 #if KMP_HAVE_UMWAIT 2672 if (__kmp_umwait_enabled) { 2673 __kmp_tpause(0, boff->min_tick); 2674 } else { 2675 #endif 2676 do { 2677 KMP_CPU_PAUSE(); 2678 } while (before(__kmp_tsc(), goal)); 2679 #if KMP_HAVE_UMWAIT 2680 } 2681 #endif 2682 } 2683 boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1); 2684 } 2685 2686 #if KMP_USE_DYNAMIC_LOCK 2687 2688 // Direct lock initializers. It simply writes a tag to the low 8 bits of the 2689 // lock word. 2690 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, 2691 kmp_dyna_lockseq_t seq) { 2692 TCW_4(((kmp_base_tas_lock_t *)lck)->poll, KMP_GET_D_TAG(seq)); 2693 KA_TRACE( 2694 20, 2695 ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); 2696 } 2697 2698 #if KMP_USE_TSX 2699 2700 // HLE lock functions - imported from the testbed runtime. 2701 #define HLE_ACQUIRE ".byte 0xf2;" 2702 #define HLE_RELEASE ".byte 0xf3;" 2703 2704 static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) { 2705 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory"); 2706 return v; 2707 } 2708 2709 static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); } 2710 2711 static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) { 2712 TCW_4(*lck, 0); 2713 } 2714 2715 static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2716 // Use gtid for KMP_LOCK_BUSY if necessary 2717 if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) { 2718 int delay = 1; 2719 do { 2720 while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) { 2721 for (int i = delay; i != 0; --i) 2722 KMP_CPU_PAUSE(); 2723 delay = ((delay << 1) | 1) & 7; 2724 } 2725 } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)); 2726 } 2727 } 2728 2729 static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2730 kmp_int32 gtid) { 2731 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks 2732 } 2733 2734 static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2735 __asm__ volatile(HLE_RELEASE "movl %1,%0" 2736 : "=m"(*lck) 2737 : "r"(KMP_LOCK_FREE(hle)) 2738 : "memory"); 2739 return KMP_LOCK_RELEASED; 2740 } 2741 2742 static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2743 kmp_int32 gtid) { 2744 return __kmp_release_hle_lock(lck, gtid); // TODO: add checks 2745 } 2746 2747 static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2748 return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle); 2749 } 2750 2751 static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2752 kmp_int32 gtid) { 2753 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks 2754 } 2755 2756 static void __kmp_init_rtm_queuing_lock(kmp_queuing_lock_t *lck) { 2757 __kmp_init_queuing_lock(lck); 2758 } 2759 2760 static void __kmp_destroy_rtm_queuing_lock(kmp_queuing_lock_t *lck) { 2761 __kmp_destroy_queuing_lock(lck); 2762 } 2763 2764 static void 2765 __kmp_destroy_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 2766 __kmp_destroy_queuing_lock_with_checks(lck); 2767 } 2768 2769 KMP_ATTRIBUTE_TARGET_RTM 2770 static void __kmp_acquire_rtm_queuing_lock(kmp_queuing_lock_t *lck, 2771 kmp_int32 gtid) { 2772 unsigned retries = 3, status; 2773 do { 2774 status = _xbegin(); 2775 if (status == _XBEGIN_STARTED) { 2776 if (__kmp_is_unlocked_queuing_lock(lck)) 2777 return; 2778 _xabort(0xff); 2779 } 2780 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { 2781 // Wait until lock becomes free 2782 while (!__kmp_is_unlocked_queuing_lock(lck)) { 2783 KMP_YIELD(TRUE); 2784 } 2785 } else if (!(status & _XABORT_RETRY)) 2786 break; 2787 } while (retries--); 2788 2789 // Fall-back non-speculative lock (xchg) 2790 __kmp_acquire_queuing_lock(lck, gtid); 2791 } 2792 2793 static void __kmp_acquire_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 2794 kmp_int32 gtid) { 2795 __kmp_acquire_rtm_queuing_lock(lck, gtid); 2796 } 2797 2798 KMP_ATTRIBUTE_TARGET_RTM 2799 static int __kmp_release_rtm_queuing_lock(kmp_queuing_lock_t *lck, 2800 kmp_int32 gtid) { 2801 if (__kmp_is_unlocked_queuing_lock(lck)) { 2802 // Releasing from speculation 2803 _xend(); 2804 } else { 2805 // Releasing from a real lock 2806 __kmp_release_queuing_lock(lck, gtid); 2807 } 2808 return KMP_LOCK_RELEASED; 2809 } 2810 2811 static int __kmp_release_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 2812 kmp_int32 gtid) { 2813 return __kmp_release_rtm_queuing_lock(lck, gtid); 2814 } 2815 2816 KMP_ATTRIBUTE_TARGET_RTM 2817 static int __kmp_test_rtm_queuing_lock(kmp_queuing_lock_t *lck, 2818 kmp_int32 gtid) { 2819 unsigned retries = 3, status; 2820 do { 2821 status = _xbegin(); 2822 if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) { 2823 return 1; 2824 } 2825 if (!(status & _XABORT_RETRY)) 2826 break; 2827 } while (retries--); 2828 2829 return __kmp_test_queuing_lock(lck, gtid); 2830 } 2831 2832 static int __kmp_test_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 2833 kmp_int32 gtid) { 2834 return __kmp_test_rtm_queuing_lock(lck, gtid); 2835 } 2836 2837 // Reuse kmp_tas_lock_t for TSX lock which use RTM with fall-back spin lock. 2838 typedef kmp_tas_lock_t kmp_rtm_spin_lock_t; 2839 2840 static void __kmp_destroy_rtm_spin_lock(kmp_rtm_spin_lock_t *lck) { 2841 KMP_ATOMIC_ST_REL(&lck->lk.poll, 0); 2842 } 2843 2844 static void __kmp_destroy_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck) { 2845 __kmp_destroy_rtm_spin_lock(lck); 2846 } 2847 2848 KMP_ATTRIBUTE_TARGET_RTM 2849 static int __kmp_acquire_rtm_spin_lock(kmp_rtm_spin_lock_t *lck, 2850 kmp_int32 gtid) { 2851 unsigned retries = 3, status; 2852 kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin); 2853 kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin); 2854 do { 2855 status = _xbegin(); 2856 if (status == _XBEGIN_STARTED) { 2857 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free) 2858 return KMP_LOCK_ACQUIRED_FIRST; 2859 _xabort(0xff); 2860 } 2861 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { 2862 // Wait until lock becomes free 2863 while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free) { 2864 KMP_YIELD(TRUE); 2865 } 2866 } else if (!(status & _XABORT_RETRY)) 2867 break; 2868 } while (retries--); 2869 2870 // Fall-back spin lock 2871 KMP_FSYNC_PREPARE(lck); 2872 kmp_backoff_t backoff = __kmp_spin_backoff_params; 2873 while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free || 2874 !__kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) { 2875 __kmp_spin_backoff(&backoff); 2876 } 2877 KMP_FSYNC_ACQUIRED(lck); 2878 return KMP_LOCK_ACQUIRED_FIRST; 2879 } 2880 2881 static int __kmp_acquire_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck, 2882 kmp_int32 gtid) { 2883 return __kmp_acquire_rtm_spin_lock(lck, gtid); 2884 } 2885 2886 KMP_ATTRIBUTE_TARGET_RTM 2887 static int __kmp_release_rtm_spin_lock(kmp_rtm_spin_lock_t *lck, 2888 kmp_int32 gtid) { 2889 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == KMP_LOCK_FREE(rtm_spin)) { 2890 // Releasing from speculation 2891 _xend(); 2892 } else { 2893 // Releasing from a real lock 2894 KMP_FSYNC_RELEASING(lck); 2895 KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(rtm_spin)); 2896 } 2897 return KMP_LOCK_RELEASED; 2898 } 2899 2900 static int __kmp_release_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck, 2901 kmp_int32 gtid) { 2902 return __kmp_release_rtm_spin_lock(lck, gtid); 2903 } 2904 2905 KMP_ATTRIBUTE_TARGET_RTM 2906 static int __kmp_test_rtm_spin_lock(kmp_rtm_spin_lock_t *lck, kmp_int32 gtid) { 2907 unsigned retries = 3, status; 2908 kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin); 2909 kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin); 2910 do { 2911 status = _xbegin(); 2912 if (status == _XBEGIN_STARTED && 2913 KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free) { 2914 return TRUE; 2915 } 2916 if (!(status & _XABORT_RETRY)) 2917 break; 2918 } while (retries--); 2919 2920 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free && 2921 __kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) { 2922 KMP_FSYNC_ACQUIRED(lck); 2923 return TRUE; 2924 } 2925 return FALSE; 2926 } 2927 2928 static int __kmp_test_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck, 2929 kmp_int32 gtid) { 2930 return __kmp_test_rtm_spin_lock(lck, gtid); 2931 } 2932 2933 #endif // KMP_USE_TSX 2934 2935 // Entry functions for indirect locks (first element of direct lock jump tables) 2936 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l, 2937 kmp_dyna_lockseq_t tag); 2938 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock); 2939 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2940 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2941 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2942 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2943 kmp_int32); 2944 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2945 kmp_int32); 2946 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2947 kmp_int32); 2948 2949 // Lock function definitions for the union parameter type 2950 #define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) 2951 2952 #define expand1(lk, op) \ 2953 static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) { \ 2954 __kmp_##op##_##lk##_##lock(&lock->lk); \ 2955 } 2956 #define expand2(lk, op) \ 2957 static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock, \ 2958 kmp_int32 gtid) { \ 2959 return __kmp_##op##_##lk##_##lock(&lock->lk, gtid); \ 2960 } 2961 #define expand3(lk, op) \ 2962 static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock, \ 2963 kmp_lock_flags_t flags) { \ 2964 __kmp_set_##lk##_lock_flags(&lock->lk, flags); \ 2965 } 2966 #define expand4(lk, op) \ 2967 static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock, \ 2968 const ident_t *loc) { \ 2969 __kmp_set_##lk##_lock_location(&lock->lk, loc); \ 2970 } 2971 2972 KMP_FOREACH_LOCK_KIND(expand1, init) 2973 KMP_FOREACH_LOCK_KIND(expand1, init_nested) 2974 KMP_FOREACH_LOCK_KIND(expand1, destroy) 2975 KMP_FOREACH_LOCK_KIND(expand1, destroy_nested) 2976 KMP_FOREACH_LOCK_KIND(expand2, acquire) 2977 KMP_FOREACH_LOCK_KIND(expand2, acquire_nested) 2978 KMP_FOREACH_LOCK_KIND(expand2, release) 2979 KMP_FOREACH_LOCK_KIND(expand2, release_nested) 2980 KMP_FOREACH_LOCK_KIND(expand2, test) 2981 KMP_FOREACH_LOCK_KIND(expand2, test_nested) 2982 KMP_FOREACH_LOCK_KIND(expand3, ) 2983 KMP_FOREACH_LOCK_KIND(expand4, ) 2984 2985 #undef expand1 2986 #undef expand2 2987 #undef expand3 2988 #undef expand4 2989 2990 // Jump tables for the indirect lock functions 2991 // Only fill in the odd entries, that avoids the need to shift out the low bit 2992 2993 // init functions 2994 #define expand(l, op) 0, __kmp_init_direct_lock, 2995 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = { 2996 __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)}; 2997 #undef expand 2998 2999 // destroy functions 3000 #define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock, 3001 static void (*direct_destroy[])(kmp_dyna_lock_t *) = { 3002 __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)}; 3003 #undef expand 3004 #define expand(l, op) \ 3005 0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks, 3006 static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = { 3007 __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)}; 3008 #undef expand 3009 3010 // set/acquire functions 3011 #define expand(l, op) \ 3012 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 3013 static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = { 3014 __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)}; 3015 #undef expand 3016 #define expand(l, op) \ 3017 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 3018 static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = { 3019 __kmp_set_indirect_lock_with_checks, 0, 3020 KMP_FOREACH_D_LOCK(expand, acquire)}; 3021 #undef expand 3022 3023 // unset/release and test functions 3024 #define expand(l, op) \ 3025 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 3026 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = { 3027 __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)}; 3028 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = { 3029 __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)}; 3030 #undef expand 3031 #define expand(l, op) \ 3032 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 3033 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = { 3034 __kmp_unset_indirect_lock_with_checks, 0, 3035 KMP_FOREACH_D_LOCK(expand, release)}; 3036 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = { 3037 __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)}; 3038 #undef expand 3039 3040 // Exposes only one set of jump tables (*lock or *lock_with_checks). 3041 void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0; 3042 int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0; 3043 int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0; 3044 int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0; 3045 3046 // Jump tables for the indirect lock functions 3047 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock, 3048 void (*__kmp_indirect_init[])(kmp_user_lock_p) = { 3049 KMP_FOREACH_I_LOCK(expand, init)}; 3050 #undef expand 3051 3052 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock, 3053 static void (*indirect_destroy[])(kmp_user_lock_p) = { 3054 KMP_FOREACH_I_LOCK(expand, destroy)}; 3055 #undef expand 3056 #define expand(l, op) \ 3057 (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks, 3058 static void (*indirect_destroy_check[])(kmp_user_lock_p) = { 3059 KMP_FOREACH_I_LOCK(expand, destroy)}; 3060 #undef expand 3061 3062 // set/acquire functions 3063 #define expand(l, op) \ 3064 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, 3065 static int (*indirect_set[])(kmp_user_lock_p, 3066 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)}; 3067 #undef expand 3068 #define expand(l, op) \ 3069 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, 3070 static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { 3071 KMP_FOREACH_I_LOCK(expand, acquire)}; 3072 #undef expand 3073 3074 // unset/release and test functions 3075 #define expand(l, op) \ 3076 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, 3077 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { 3078 KMP_FOREACH_I_LOCK(expand, release)}; 3079 static int (*indirect_test[])(kmp_user_lock_p, 3080 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)}; 3081 #undef expand 3082 #define expand(l, op) \ 3083 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, 3084 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { 3085 KMP_FOREACH_I_LOCK(expand, release)}; 3086 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { 3087 KMP_FOREACH_I_LOCK(expand, test)}; 3088 #undef expand 3089 3090 // Exposes only one jump tables (*lock or *lock_with_checks). 3091 void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0; 3092 int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0; 3093 int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0; 3094 int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0; 3095 3096 // Lock index table. 3097 kmp_indirect_lock_table_t __kmp_i_lock_table; 3098 3099 // Size of indirect locks. 3100 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0}; 3101 3102 // Jump tables for lock accessor/modifier. 3103 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, 3104 const ident_t *) = {0}; 3105 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, 3106 kmp_lock_flags_t) = {0}; 3107 const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])( 3108 kmp_user_lock_p) = {0}; 3109 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])( 3110 kmp_user_lock_p) = {0}; 3111 3112 // Use different lock pools for different lock types. 3113 static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0}; 3114 3115 // User lock allocator for dynamically dispatched indirect locks. Every entry of 3116 // the indirect lock table holds the address and type of the allocated indirect 3117 // lock (kmp_indirect_lock_t), and the size of the table doubles when it is 3118 // full. A destroyed indirect lock object is returned to the reusable pool of 3119 // locks, unique to each lock type. 3120 kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock, 3121 kmp_int32 gtid, 3122 kmp_indirect_locktag_t tag) { 3123 kmp_indirect_lock_t *lck; 3124 kmp_lock_index_t idx, table_idx; 3125 3126 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3127 3128 if (__kmp_indirect_lock_pool[tag] != NULL) { 3129 // Reuse the allocated and destroyed lock object 3130 lck = __kmp_indirect_lock_pool[tag]; 3131 if (OMP_LOCK_T_SIZE < sizeof(void *)) 3132 idx = lck->lock->pool.index; 3133 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; 3134 KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", 3135 lck)); 3136 } else { 3137 kmp_uint32 row, col; 3138 kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table; 3139 idx = 0; 3140 // Find location in list of lock tables to put new lock 3141 while (1) { 3142 table_idx = lock_table->next; // index within this table 3143 idx += lock_table->next; // global index within list of tables 3144 if (table_idx < lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK) { 3145 row = table_idx / KMP_I_LOCK_CHUNK; 3146 col = table_idx % KMP_I_LOCK_CHUNK; 3147 // Allocate a new row of locks if necessary 3148 if (!lock_table->table[row]) { 3149 lock_table->table[row] = (kmp_indirect_lock_t *)__kmp_allocate( 3150 sizeof(kmp_indirect_lock_t) * KMP_I_LOCK_CHUNK); 3151 } 3152 break; 3153 } 3154 // Allocate a new lock table if necessary with double the capacity 3155 if (!lock_table->next_table) { 3156 kmp_indirect_lock_table_t *next_table = 3157 (kmp_indirect_lock_table_t *)__kmp_allocate( 3158 sizeof(kmp_indirect_lock_table_t)); 3159 next_table->table = (kmp_indirect_lock_t **)__kmp_allocate( 3160 sizeof(kmp_indirect_lock_t *) * 2 * lock_table->nrow_ptrs); 3161 next_table->nrow_ptrs = 2 * lock_table->nrow_ptrs; 3162 next_table->next = 0; 3163 next_table->next_table = nullptr; 3164 lock_table->next_table = next_table; 3165 } 3166 lock_table = lock_table->next_table; 3167 KMP_ASSERT(lock_table); 3168 } 3169 lock_table->next++; 3170 3171 lck = &lock_table->table[row][col]; 3172 // Allocate a new base lock object 3173 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); 3174 KA_TRACE(20, 3175 ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck)); 3176 } 3177 3178 __kmp_release_lock(&__kmp_global_lock, gtid); 3179 3180 lck->type = tag; 3181 3182 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3183 *(kmp_lock_index_t *)&(((kmp_base_tas_lock_t *)user_lock)->poll) = 3184 idx << 1; // indirect lock word must be even 3185 } else { 3186 *((kmp_indirect_lock_t **)user_lock) = lck; 3187 } 3188 3189 return lck; 3190 } 3191 3192 // User lock lookup for dynamically dispatched locks. 3193 static __forceinline kmp_indirect_lock_t * 3194 __kmp_lookup_indirect_lock(void **user_lock, const char *func) { 3195 if (__kmp_env_consistency_check) { 3196 kmp_indirect_lock_t *lck = NULL; 3197 if (user_lock == NULL) { 3198 KMP_FATAL(LockIsUninitialized, func); 3199 } 3200 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3201 kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); 3202 lck = __kmp_get_i_lock(idx); 3203 } else { 3204 lck = *((kmp_indirect_lock_t **)user_lock); 3205 } 3206 if (lck == NULL) { 3207 KMP_FATAL(LockIsUninitialized, func); 3208 } 3209 return lck; 3210 } else { 3211 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3212 return __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(user_lock)); 3213 } else { 3214 return *((kmp_indirect_lock_t **)user_lock); 3215 } 3216 } 3217 } 3218 3219 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock, 3220 kmp_dyna_lockseq_t seq) { 3221 #if KMP_USE_ADAPTIVE_LOCKS 3222 if (seq == lockseq_adaptive && !__kmp_cpuinfo.flags.rtm) { 3223 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); 3224 seq = lockseq_queuing; 3225 } 3226 #endif 3227 #if KMP_USE_TSX 3228 if (seq == lockseq_rtm_queuing && !__kmp_cpuinfo.flags.rtm) { 3229 seq = lockseq_queuing; 3230 } 3231 #endif 3232 kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq); 3233 kmp_indirect_lock_t *l = 3234 __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); 3235 KMP_I_LOCK_FUNC(l, init)(l->lock); 3236 KA_TRACE( 3237 20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", 3238 seq)); 3239 } 3240 3241 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) { 3242 kmp_uint32 gtid = __kmp_entry_gtid(); 3243 kmp_indirect_lock_t *l = 3244 __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); 3245 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3246 kmp_indirect_locktag_t tag = l->type; 3247 3248 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3249 3250 // Use the base lock's space to keep the pool chain. 3251 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; 3252 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3253 l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock); 3254 } 3255 __kmp_indirect_lock_pool[tag] = l; 3256 3257 __kmp_release_lock(&__kmp_global_lock, gtid); 3258 } 3259 3260 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3261 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3262 return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3263 } 3264 3265 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3266 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3267 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3268 } 3269 3270 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3271 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3272 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3273 } 3274 3275 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3276 kmp_int32 gtid) { 3277 kmp_indirect_lock_t *l = 3278 __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); 3279 return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3280 } 3281 3282 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3283 kmp_int32 gtid) { 3284 kmp_indirect_lock_t *l = 3285 __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); 3286 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3287 } 3288 3289 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3290 kmp_int32 gtid) { 3291 kmp_indirect_lock_t *l = 3292 __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); 3293 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3294 } 3295 3296 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; 3297 3298 // This is used only in kmp_error.cpp when consistency checking is on. 3299 kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) { 3300 switch (seq) { 3301 case lockseq_tas: 3302 case lockseq_nested_tas: 3303 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); 3304 #if KMP_USE_FUTEX 3305 case lockseq_futex: 3306 case lockseq_nested_futex: 3307 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); 3308 #endif 3309 case lockseq_ticket: 3310 case lockseq_nested_ticket: 3311 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); 3312 case lockseq_queuing: 3313 case lockseq_nested_queuing: 3314 #if KMP_USE_ADAPTIVE_LOCKS 3315 case lockseq_adaptive: 3316 #endif 3317 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); 3318 case lockseq_drdpa: 3319 case lockseq_nested_drdpa: 3320 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); 3321 default: 3322 return 0; 3323 } 3324 } 3325 3326 // Initializes data for dynamic user locks. 3327 void __kmp_init_dynamic_user_locks() { 3328 // Initialize jump table for the lock functions 3329 if (__kmp_env_consistency_check) { 3330 __kmp_direct_set = direct_set_check; 3331 __kmp_direct_unset = direct_unset_check; 3332 __kmp_direct_test = direct_test_check; 3333 __kmp_direct_destroy = direct_destroy_check; 3334 __kmp_indirect_set = indirect_set_check; 3335 __kmp_indirect_unset = indirect_unset_check; 3336 __kmp_indirect_test = indirect_test_check; 3337 __kmp_indirect_destroy = indirect_destroy_check; 3338 } else { 3339 __kmp_direct_set = direct_set; 3340 __kmp_direct_unset = direct_unset; 3341 __kmp_direct_test = direct_test; 3342 __kmp_direct_destroy = direct_destroy; 3343 __kmp_indirect_set = indirect_set; 3344 __kmp_indirect_unset = indirect_unset; 3345 __kmp_indirect_test = indirect_test; 3346 __kmp_indirect_destroy = indirect_destroy; 3347 } 3348 // If the user locks have already been initialized, then return. Allow the 3349 // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate 3350 // new lock tables if they have already been allocated. 3351 if (__kmp_init_user_locks) 3352 return; 3353 3354 // Initialize lock index table 3355 __kmp_i_lock_table.nrow_ptrs = KMP_I_LOCK_TABLE_INIT_NROW_PTRS; 3356 __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate( 3357 sizeof(kmp_indirect_lock_t *) * KMP_I_LOCK_TABLE_INIT_NROW_PTRS); 3358 *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate( 3359 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); 3360 __kmp_i_lock_table.next = 0; 3361 __kmp_i_lock_table.next_table = nullptr; 3362 3363 // Indirect lock size 3364 __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); 3365 __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t); 3366 #if KMP_USE_ADAPTIVE_LOCKS 3367 __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t); 3368 #endif 3369 __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t); 3370 #if KMP_USE_TSX 3371 __kmp_indirect_lock_size[locktag_rtm_queuing] = sizeof(kmp_queuing_lock_t); 3372 #endif 3373 __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t); 3374 #if KMP_USE_FUTEX 3375 __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t); 3376 #endif 3377 __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t); 3378 __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t); 3379 __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t); 3380 3381 // Initialize lock accessor/modifier 3382 #define fill_jumps(table, expand, sep) \ 3383 { \ 3384 table[locktag##sep##ticket] = expand(ticket); \ 3385 table[locktag##sep##queuing] = expand(queuing); \ 3386 table[locktag##sep##drdpa] = expand(drdpa); \ 3387 } 3388 3389 #if KMP_USE_ADAPTIVE_LOCKS 3390 #define fill_table(table, expand) \ 3391 { \ 3392 fill_jumps(table, expand, _); \ 3393 table[locktag_adaptive] = expand(queuing); \ 3394 fill_jumps(table, expand, _nested_); \ 3395 } 3396 #else 3397 #define fill_table(table, expand) \ 3398 { \ 3399 fill_jumps(table, expand, _); \ 3400 fill_jumps(table, expand, _nested_); \ 3401 } 3402 #endif // KMP_USE_ADAPTIVE_LOCKS 3403 3404 #define expand(l) \ 3405 (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location 3406 fill_table(__kmp_indirect_set_location, expand); 3407 #undef expand 3408 #define expand(l) \ 3409 (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags 3410 fill_table(__kmp_indirect_set_flags, expand); 3411 #undef expand 3412 #define expand(l) \ 3413 (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location 3414 fill_table(__kmp_indirect_get_location, expand); 3415 #undef expand 3416 #define expand(l) \ 3417 (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags 3418 fill_table(__kmp_indirect_get_flags, expand); 3419 #undef expand 3420 3421 __kmp_init_user_locks = TRUE; 3422 } 3423 3424 // Clean up the lock table. 3425 void __kmp_cleanup_indirect_user_locks() { 3426 int k; 3427 3428 // Clean up locks in the pools first (they were already destroyed before going 3429 // into the pools). 3430 for (k = 0; k < KMP_NUM_I_LOCKS; ++k) { 3431 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; 3432 while (l != NULL) { 3433 kmp_indirect_lock_t *ll = l; 3434 l = (kmp_indirect_lock_t *)l->lock->pool.next; 3435 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", 3436 ll)); 3437 __kmp_free(ll->lock); 3438 ll->lock = NULL; 3439 } 3440 __kmp_indirect_lock_pool[k] = NULL; 3441 } 3442 // Clean up the remaining undestroyed locks. 3443 kmp_indirect_lock_table_t *ptr = &__kmp_i_lock_table; 3444 while (ptr) { 3445 for (kmp_uint32 row = 0; row < ptr->nrow_ptrs; ++row) { 3446 if (!ptr->table[row]) 3447 continue; 3448 for (kmp_uint32 col = 0; col < KMP_I_LOCK_CHUNK; ++col) { 3449 kmp_indirect_lock_t *l = &ptr->table[row][col]; 3450 if (l->lock) { 3451 // Locks not destroyed explicitly need to be destroyed here. 3452 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3453 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p " 3454 "from table\n", 3455 l)); 3456 __kmp_free(l->lock); 3457 } 3458 } 3459 __kmp_free(ptr->table[row]); 3460 } 3461 kmp_indirect_lock_table_t *next_table = ptr->next_table; 3462 if (ptr != &__kmp_i_lock_table) 3463 __kmp_free(ptr); 3464 ptr = next_table; 3465 } 3466 3467 __kmp_init_user_locks = FALSE; 3468 } 3469 3470 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3471 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3472 3473 #else // KMP_USE_DYNAMIC_LOCK 3474 3475 static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) { 3476 __kmp_init_tas_lock(lck); 3477 } 3478 3479 static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { 3480 __kmp_init_nested_tas_lock(lck); 3481 } 3482 3483 #if KMP_USE_FUTEX 3484 static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) { 3485 __kmp_init_futex_lock(lck); 3486 } 3487 3488 static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { 3489 __kmp_init_nested_futex_lock(lck); 3490 } 3491 #endif 3492 3493 static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) { 3494 return lck == lck->lk.self; 3495 } 3496 3497 static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 3498 __kmp_init_ticket_lock(lck); 3499 } 3500 3501 static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 3502 __kmp_init_nested_ticket_lock(lck); 3503 } 3504 3505 static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) { 3506 return lck == lck->lk.initialized; 3507 } 3508 3509 static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 3510 __kmp_init_queuing_lock(lck); 3511 } 3512 3513 static void 3514 __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 3515 __kmp_init_nested_queuing_lock(lck); 3516 } 3517 3518 #if KMP_USE_ADAPTIVE_LOCKS 3519 static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { 3520 __kmp_init_adaptive_lock(lck); 3521 } 3522 #endif 3523 3524 static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) { 3525 return lck == lck->lk.initialized; 3526 } 3527 3528 static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 3529 __kmp_init_drdpa_lock(lck); 3530 } 3531 3532 static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 3533 __kmp_init_nested_drdpa_lock(lck); 3534 } 3535 3536 /* user locks 3537 * They are implemented as a table of function pointers which are set to the 3538 * lock functions of the appropriate kind, once that has been determined. */ 3539 3540 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3541 3542 size_t __kmp_base_user_lock_size = 0; 3543 size_t __kmp_user_lock_size = 0; 3544 3545 kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL; 3546 int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck, 3547 kmp_int32 gtid) = NULL; 3548 3549 int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck, 3550 kmp_int32 gtid) = NULL; 3551 int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck, 3552 kmp_int32 gtid) = NULL; 3553 void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3554 void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL; 3555 void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3556 int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3557 kmp_int32 gtid) = NULL; 3558 3559 int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3560 kmp_int32 gtid) = NULL; 3561 int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3562 kmp_int32 gtid) = NULL; 3563 void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3564 void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3565 3566 int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL; 3567 const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL; 3568 void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck, 3569 const ident_t *loc) = NULL; 3570 kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL; 3571 void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck, 3572 kmp_lock_flags_t flags) = NULL; 3573 3574 void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) { 3575 switch (user_lock_kind) { 3576 case lk_default: 3577 default: 3578 KMP_ASSERT(0); 3579 3580 case lk_tas: { 3581 __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t); 3582 __kmp_user_lock_size = sizeof(kmp_tas_lock_t); 3583 3584 __kmp_get_user_lock_owner_ = 3585 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner); 3586 3587 if (__kmp_env_consistency_check) { 3588 KMP_BIND_USER_LOCK_WITH_CHECKS(tas); 3589 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); 3590 } else { 3591 KMP_BIND_USER_LOCK(tas); 3592 KMP_BIND_NESTED_USER_LOCK(tas); 3593 } 3594 3595 __kmp_destroy_user_lock_ = 3596 (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock); 3597 3598 __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; 3599 3600 __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; 3601 3602 __kmp_set_user_lock_location_ = 3603 (void (*)(kmp_user_lock_p, const ident_t *))NULL; 3604 3605 __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; 3606 3607 __kmp_set_user_lock_flags_ = 3608 (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; 3609 } break; 3610 3611 #if KMP_USE_FUTEX 3612 3613 case lk_futex: { 3614 __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t); 3615 __kmp_user_lock_size = sizeof(kmp_futex_lock_t); 3616 3617 __kmp_get_user_lock_owner_ = 3618 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner); 3619 3620 if (__kmp_env_consistency_check) { 3621 KMP_BIND_USER_LOCK_WITH_CHECKS(futex); 3622 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); 3623 } else { 3624 KMP_BIND_USER_LOCK(futex); 3625 KMP_BIND_NESTED_USER_LOCK(futex); 3626 } 3627 3628 __kmp_destroy_user_lock_ = 3629 (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock); 3630 3631 __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; 3632 3633 __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; 3634 3635 __kmp_set_user_lock_location_ = 3636 (void (*)(kmp_user_lock_p, const ident_t *))NULL; 3637 3638 __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; 3639 3640 __kmp_set_user_lock_flags_ = 3641 (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; 3642 } break; 3643 3644 #endif // KMP_USE_FUTEX 3645 3646 case lk_ticket: { 3647 __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t); 3648 __kmp_user_lock_size = sizeof(kmp_ticket_lock_t); 3649 3650 __kmp_get_user_lock_owner_ = 3651 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner); 3652 3653 if (__kmp_env_consistency_check) { 3654 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); 3655 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); 3656 } else { 3657 KMP_BIND_USER_LOCK(ticket); 3658 KMP_BIND_NESTED_USER_LOCK(ticket); 3659 } 3660 3661 __kmp_destroy_user_lock_ = 3662 (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock); 3663 3664 __kmp_is_user_lock_initialized_ = 3665 (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized); 3666 3667 __kmp_get_user_lock_location_ = 3668 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location); 3669 3670 __kmp_set_user_lock_location_ = (void (*)( 3671 kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location); 3672 3673 __kmp_get_user_lock_flags_ = 3674 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags); 3675 3676 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3677 &__kmp_set_ticket_lock_flags); 3678 } break; 3679 3680 case lk_queuing: { 3681 __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t); 3682 __kmp_user_lock_size = sizeof(kmp_queuing_lock_t); 3683 3684 __kmp_get_user_lock_owner_ = 3685 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); 3686 3687 if (__kmp_env_consistency_check) { 3688 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); 3689 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); 3690 } else { 3691 KMP_BIND_USER_LOCK(queuing); 3692 KMP_BIND_NESTED_USER_LOCK(queuing); 3693 } 3694 3695 __kmp_destroy_user_lock_ = 3696 (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock); 3697 3698 __kmp_is_user_lock_initialized_ = 3699 (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); 3700 3701 __kmp_get_user_lock_location_ = 3702 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); 3703 3704 __kmp_set_user_lock_location_ = (void (*)( 3705 kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); 3706 3707 __kmp_get_user_lock_flags_ = 3708 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); 3709 3710 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3711 &__kmp_set_queuing_lock_flags); 3712 } break; 3713 3714 #if KMP_USE_ADAPTIVE_LOCKS 3715 case lk_adaptive: { 3716 __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t); 3717 __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t); 3718 3719 __kmp_get_user_lock_owner_ = 3720 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); 3721 3722 if (__kmp_env_consistency_check) { 3723 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); 3724 } else { 3725 KMP_BIND_USER_LOCK(adaptive); 3726 } 3727 3728 __kmp_destroy_user_lock_ = 3729 (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock); 3730 3731 __kmp_is_user_lock_initialized_ = 3732 (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); 3733 3734 __kmp_get_user_lock_location_ = 3735 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); 3736 3737 __kmp_set_user_lock_location_ = (void (*)( 3738 kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); 3739 3740 __kmp_get_user_lock_flags_ = 3741 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); 3742 3743 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3744 &__kmp_set_queuing_lock_flags); 3745 3746 } break; 3747 #endif // KMP_USE_ADAPTIVE_LOCKS 3748 3749 case lk_drdpa: { 3750 __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t); 3751 __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t); 3752 3753 __kmp_get_user_lock_owner_ = 3754 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner); 3755 3756 if (__kmp_env_consistency_check) { 3757 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); 3758 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); 3759 } else { 3760 KMP_BIND_USER_LOCK(drdpa); 3761 KMP_BIND_NESTED_USER_LOCK(drdpa); 3762 } 3763 3764 __kmp_destroy_user_lock_ = 3765 (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock); 3766 3767 __kmp_is_user_lock_initialized_ = 3768 (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized); 3769 3770 __kmp_get_user_lock_location_ = 3771 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location); 3772 3773 __kmp_set_user_lock_location_ = (void (*)( 3774 kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location); 3775 3776 __kmp_get_user_lock_flags_ = 3777 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags); 3778 3779 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3780 &__kmp_set_drdpa_lock_flags); 3781 } break; 3782 } 3783 } 3784 3785 // ---------------------------------------------------------------------------- 3786 // User lock table & lock allocation 3787 3788 kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL}; 3789 kmp_user_lock_p __kmp_lock_pool = NULL; 3790 3791 // Lock block-allocation support. 3792 kmp_block_of_locks *__kmp_lock_blocks = NULL; 3793 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3794 3795 static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) { 3796 // Assume that kmp_global_lock is held upon entry/exit. 3797 kmp_lock_index_t index; 3798 if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) { 3799 kmp_lock_index_t size; 3800 kmp_user_lock_p *table; 3801 // Reallocate lock table. 3802 if (__kmp_user_lock_table.allocated == 0) { 3803 size = 1024; 3804 } else { 3805 size = __kmp_user_lock_table.allocated * 2; 3806 } 3807 table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size); 3808 KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1, 3809 sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1)); 3810 table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table; 3811 // We cannot free the previous table now, since it may be in use by other 3812 // threads. So save the pointer to the previous table in the first 3813 // element of the new table. All the tables will be organized into a list, 3814 // and could be freed when library shutting down. 3815 __kmp_user_lock_table.table = table; 3816 __kmp_user_lock_table.allocated = size; 3817 } 3818 KMP_DEBUG_ASSERT(__kmp_user_lock_table.used < 3819 __kmp_user_lock_table.allocated); 3820 index = __kmp_user_lock_table.used; 3821 __kmp_user_lock_table.table[index] = lck; 3822 ++__kmp_user_lock_table.used; 3823 return index; 3824 } 3825 3826 static kmp_user_lock_p __kmp_lock_block_allocate() { 3827 // Assume that kmp_global_lock is held upon entry/exit. 3828 static int last_index = 0; 3829 if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) { 3830 // Restart the index. 3831 last_index = 0; 3832 // Need to allocate a new block. 3833 KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); 3834 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; 3835 char *buffer = 3836 (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks)); 3837 // Set up the new block. 3838 kmp_block_of_locks *new_block = 3839 (kmp_block_of_locks *)(&buffer[space_for_locks]); 3840 new_block->next_block = __kmp_lock_blocks; 3841 new_block->locks = (void *)buffer; 3842 // Publish the new block. 3843 KMP_MB(); 3844 __kmp_lock_blocks = new_block; 3845 } 3846 kmp_user_lock_p ret = (kmp_user_lock_p)(&( 3847 ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size])); 3848 last_index++; 3849 return ret; 3850 } 3851 3852 // Get memory for a lock. It may be freshly allocated memory or reused memory 3853 // from lock pool. 3854 kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid, 3855 kmp_lock_flags_t flags) { 3856 kmp_user_lock_p lck; 3857 kmp_lock_index_t index; 3858 KMP_DEBUG_ASSERT(user_lock); 3859 3860 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3861 3862 if (__kmp_lock_pool == NULL) { 3863 // Lock pool is empty. Allocate new memory. 3864 3865 if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point. 3866 lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size); 3867 } else { 3868 lck = __kmp_lock_block_allocate(); 3869 } 3870 3871 // Insert lock in the table so that it can be freed in __kmp_cleanup, 3872 // and debugger has info on all allocated locks. 3873 index = __kmp_lock_table_insert(lck); 3874 } else { 3875 // Pick up lock from pool. 3876 lck = __kmp_lock_pool; 3877 index = __kmp_lock_pool->pool.index; 3878 __kmp_lock_pool = __kmp_lock_pool->pool.next; 3879 } 3880 3881 // We could potentially differentiate between nested and regular locks 3882 // here, and do the lock table lookup for regular locks only. 3883 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3884 *((kmp_lock_index_t *)user_lock) = index; 3885 } else { 3886 *((kmp_user_lock_p *)user_lock) = lck; 3887 } 3888 3889 // mark the lock if it is critical section lock. 3890 __kmp_set_user_lock_flags(lck, flags); 3891 3892 __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper 3893 3894 return lck; 3895 } 3896 3897 // Put lock's memory to pool for reusing. 3898 void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid, 3899 kmp_user_lock_p lck) { 3900 KMP_DEBUG_ASSERT(user_lock != NULL); 3901 KMP_DEBUG_ASSERT(lck != NULL); 3902 3903 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3904 3905 lck->pool.next = __kmp_lock_pool; 3906 __kmp_lock_pool = lck; 3907 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3908 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); 3909 KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used); 3910 lck->pool.index = index; 3911 } 3912 3913 __kmp_release_lock(&__kmp_global_lock, gtid); 3914 } 3915 3916 kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) { 3917 kmp_user_lock_p lck = NULL; 3918 3919 if (__kmp_env_consistency_check) { 3920 if (user_lock == NULL) { 3921 KMP_FATAL(LockIsUninitialized, func); 3922 } 3923 } 3924 3925 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3926 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); 3927 if (__kmp_env_consistency_check) { 3928 if (!(0 < index && index < __kmp_user_lock_table.used)) { 3929 KMP_FATAL(LockIsUninitialized, func); 3930 } 3931 } 3932 KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used); 3933 KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); 3934 lck = __kmp_user_lock_table.table[index]; 3935 } else { 3936 lck = *((kmp_user_lock_p *)user_lock); 3937 } 3938 3939 if (__kmp_env_consistency_check) { 3940 if (lck == NULL) { 3941 KMP_FATAL(LockIsUninitialized, func); 3942 } 3943 } 3944 3945 return lck; 3946 } 3947 3948 void __kmp_cleanup_user_locks(void) { 3949 // Reset lock pool. Don't worry about lock in the pool--we will free them when 3950 // iterating through lock table (it includes all the locks, dead or alive). 3951 __kmp_lock_pool = NULL; 3952 3953 #define IS_CRITICAL(lck) \ 3954 ((__kmp_get_user_lock_flags_ != NULL) && \ 3955 ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section)) 3956 3957 // Loop through lock table, free all locks. 3958 // Do not free item [0], it is reserved for lock tables list. 3959 // 3960 // FIXME - we are iterating through a list of (pointers to) objects of type 3961 // union kmp_user_lock, but we have no way of knowing whether the base type is 3962 // currently "pool" or whatever the global user lock type is. 3963 // 3964 // We are relying on the fact that for all of the user lock types 3965 // (except "tas"), the first field in the lock struct is the "initialized" 3966 // field, which is set to the address of the lock object itself when 3967 // the lock is initialized. When the union is of type "pool", the 3968 // first field is a pointer to the next object in the free list, which 3969 // will not be the same address as the object itself. 3970 // 3971 // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail 3972 // for "pool" objects on the free list. This must happen as the "location" 3973 // field of real user locks overlaps the "index" field of "pool" objects. 3974 // 3975 // It would be better to run through the free list, and remove all "pool" 3976 // objects from the lock table before executing this loop. However, 3977 // "pool" objects do not always have their index field set (only on 3978 // lin_32e), and I don't want to search the lock table for the address 3979 // of every "pool" object on the free list. 3980 while (__kmp_user_lock_table.used > 1) { 3981 const ident *loc; 3982 3983 // reduce __kmp_user_lock_table.used before freeing the lock, 3984 // so that state of locks is consistent 3985 kmp_user_lock_p lck = 3986 __kmp_user_lock_table.table[--__kmp_user_lock_table.used]; 3987 3988 if ((__kmp_is_user_lock_initialized_ != NULL) && 3989 (*__kmp_is_user_lock_initialized_)(lck)) { 3990 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND 3991 // it is NOT a critical section (user is not responsible for destroying 3992 // criticals) AND we know source location to report. 3993 if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) && 3994 ((loc = __kmp_get_user_lock_location(lck)) != NULL) && 3995 (loc->psource != NULL)) { 3996 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false); 3997 KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line); 3998 __kmp_str_loc_free(&str_loc); 3999 } 4000 4001 #ifdef KMP_DEBUG 4002 if (IS_CRITICAL(lck)) { 4003 KA_TRACE( 4004 20, 4005 ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", 4006 lck, *(void **)lck)); 4007 } else { 4008 KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, 4009 *(void **)lck)); 4010 } 4011 #endif // KMP_DEBUG 4012 4013 // Cleanup internal lock dynamic resources (for drdpa locks particularly). 4014 __kmp_destroy_user_lock(lck); 4015 } 4016 4017 // Free the lock if block allocation of locks is not used. 4018 if (__kmp_lock_blocks == NULL) { 4019 __kmp_free(lck); 4020 } 4021 } 4022 4023 #undef IS_CRITICAL 4024 4025 // delete lock table(s). 4026 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; 4027 __kmp_user_lock_table.table = NULL; 4028 __kmp_user_lock_table.allocated = 0; 4029 4030 while (table_ptr != NULL) { 4031 // In the first element we saved the pointer to the previous 4032 // (smaller) lock table. 4033 kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]); 4034 __kmp_free(table_ptr); 4035 table_ptr = next; 4036 } 4037 4038 // Free buffers allocated for blocks of locks. 4039 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; 4040 __kmp_lock_blocks = NULL; 4041 4042 while (block_ptr != NULL) { 4043 kmp_block_of_locks_t *next = block_ptr->next_block; 4044 __kmp_free(block_ptr->locks); 4045 // *block_ptr itself was allocated at the end of the locks vector. 4046 block_ptr = next; 4047 } 4048 4049 TCW_4(__kmp_init_user_locks, FALSE); 4050 } 4051 4052 #endif // KMP_USE_DYNAMIC_LOCK 4053