1 /* 2 * kmp_lock.h -- lock header file 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef KMP_LOCK_H 14 #define KMP_LOCK_H 15 16 #include <limits.h> // CHAR_BIT 17 #include <stddef.h> // offsetof 18 19 #include "kmp_debug.h" 20 #include "kmp_os.h" 21 22 #ifdef __cplusplus 23 #include <atomic> 24 25 extern "C" { 26 #endif // __cplusplus 27 28 // ---------------------------------------------------------------------------- 29 // Have to copy these definitions from kmp.h because kmp.h cannot be included 30 // due to circular dependencies. Will undef these at end of file. 31 32 #define KMP_PAD(type, sz) \ 33 (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) 34 #define KMP_GTID_DNE (-2) 35 36 // Forward declaration of ident and ident_t 37 38 struct ident; 39 typedef struct ident ident_t; 40 41 // End of copied code. 42 // ---------------------------------------------------------------------------- 43 44 // We need to know the size of the area we can assume that the compiler(s) 45 // allocated for objects of type omp_lock_t and omp_nest_lock_t. The Intel 46 // compiler always allocates a pointer-sized area, as does visual studio. 47 // 48 // gcc however, only allocates 4 bytes for regular locks, even on 64-bit 49 // intel archs. It allocates at least 8 bytes for nested lock (more on 50 // recent versions), but we are bounded by the pointer-sized chunks that 51 // the Intel compiler allocates. 52 53 #if (KMP_OS_LINUX || KMP_OS_AIX) && defined(KMP_GOMP_COMPAT) 54 #define OMP_LOCK_T_SIZE sizeof(int) 55 #define OMP_NEST_LOCK_T_SIZE sizeof(void *) 56 #else 57 #define OMP_LOCK_T_SIZE sizeof(void *) 58 #define OMP_NEST_LOCK_T_SIZE sizeof(void *) 59 #endif 60 61 // The Intel compiler allocates a 32-byte chunk for a critical section. 62 // Both gcc and visual studio only allocate enough space for a pointer. 63 // Sometimes we know that the space was allocated by the Intel compiler. 64 #define OMP_CRITICAL_SIZE sizeof(void *) 65 #define INTEL_CRITICAL_SIZE 32 66 67 // lock flags 68 typedef kmp_uint32 kmp_lock_flags_t; 69 70 #define kmp_lf_critical_section 1 71 72 // When a lock table is used, the indices are of kmp_lock_index_t 73 typedef kmp_uint32 kmp_lock_index_t; 74 75 // When memory allocated for locks are on the lock pool (free list), 76 // it is treated as structs of this type. 77 struct kmp_lock_pool { 78 union kmp_user_lock *next; 79 kmp_lock_index_t index; 80 }; 81 82 typedef struct kmp_lock_pool kmp_lock_pool_t; 83 84 extern void __kmp_validate_locks(void); 85 86 // ---------------------------------------------------------------------------- 87 // There are 5 lock implementations: 88 // 1. Test and set locks. 89 // 2. futex locks (Linux* OS on x86 and 90 // Intel(R) Many Integrated Core Architecture) 91 // 3. Ticket (Lamport bakery) locks. 92 // 4. Queuing locks (with separate spin fields). 93 // 5. DRPA (Dynamically Reconfigurable Distributed Polling Area) locks 94 // 95 // and 3 lock purposes: 96 // 1. Bootstrap locks -- Used for a few locks available at library 97 // startup-shutdown time. 98 // These do not require non-negative global thread ID's. 99 // 2. Internal RTL locks -- Used everywhere else in the RTL 100 // 3. User locks (includes critical sections) 101 // ---------------------------------------------------------------------------- 102 103 // ============================================================================ 104 // Lock implementations. 105 // 106 // Test and set locks. 107 // 108 // Non-nested test and set locks differ from the other lock kinds (except 109 // futex) in that we use the memory allocated by the compiler for the lock, 110 // rather than a pointer to it. 111 // 112 // On lin32, lin_32e, and win_32, the space allocated may be as small as 4 113 // bytes, so we have to use a lock table for nested locks, and avoid accessing 114 // the depth_locked field for non-nested locks. 115 // 116 // Information normally available to the tools, such as lock location, lock 117 // usage (normal lock vs. critical section), etc. is not available with test and 118 // set locks. 119 // ---------------------------------------------------------------------------- 120 121 struct kmp_base_tas_lock { 122 // KMP_LOCK_FREE(tas) => unlocked; locked: (gtid+1) of owning thread 123 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __LP64__ 124 // Flip the ordering of the high and low 32-bit member to be consistent 125 // with the memory layout of the address in 64-bit big-endian. 126 kmp_int32 depth_locked; // depth locked, for nested locks only 127 std::atomic<kmp_int32> poll; 128 #else 129 std::atomic<kmp_int32> poll; 130 kmp_int32 depth_locked; // depth locked, for nested locks only 131 #endif 132 }; 133 134 typedef struct kmp_base_tas_lock kmp_base_tas_lock_t; 135 136 union kmp_tas_lock { 137 kmp_base_tas_lock_t lk; 138 kmp_lock_pool_t pool; // make certain struct is large enough 139 double lk_align; // use worst case alignment; no cache line padding 140 }; 141 142 typedef union kmp_tas_lock kmp_tas_lock_t; 143 144 // Static initializer for test and set lock variables. Usage: 145 // kmp_tas_lock_t xlock = KMP_TAS_LOCK_INITIALIZER( xlock ); 146 #define KMP_TAS_LOCK_INITIALIZER(lock) \ 147 { \ 148 { KMP_LOCK_FREE(tas), 0 } \ 149 } 150 151 extern int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); 152 extern int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); 153 extern int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); 154 extern void __kmp_init_tas_lock(kmp_tas_lock_t *lck); 155 extern void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck); 156 157 extern int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); 158 extern int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); 159 extern int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); 160 extern void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck); 161 extern void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck); 162 163 #define KMP_LOCK_RELEASED 1 164 #define KMP_LOCK_STILL_HELD 0 165 #define KMP_LOCK_ACQUIRED_FIRST 1 166 #define KMP_LOCK_ACQUIRED_NEXT 0 167 #ifndef KMP_USE_FUTEX 168 #define KMP_USE_FUTEX \ 169 (KMP_OS_LINUX && \ 170 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)) 171 #endif 172 #if KMP_USE_FUTEX 173 174 // ---------------------------------------------------------------------------- 175 // futex locks. futex locks are only available on Linux* OS. 176 // 177 // Like non-nested test and set lock, non-nested futex locks use the memory 178 // allocated by the compiler for the lock, rather than a pointer to it. 179 // 180 // Information normally available to the tools, such as lock location, lock 181 // usage (normal lock vs. critical section), etc. is not available with test and 182 // set locks. With non-nested futex locks, the lock owner is not even available. 183 // ---------------------------------------------------------------------------- 184 185 struct kmp_base_futex_lock { 186 volatile kmp_int32 poll; // KMP_LOCK_FREE(futex) => unlocked 187 // 2*(gtid+1) of owning thread, 0 if unlocked 188 // locked: (gtid+1) of owning thread 189 kmp_int32 depth_locked; // depth locked, for nested locks only 190 }; 191 192 typedef struct kmp_base_futex_lock kmp_base_futex_lock_t; 193 194 union kmp_futex_lock { 195 kmp_base_futex_lock_t lk; 196 kmp_lock_pool_t pool; // make certain struct is large enough 197 double lk_align; // use worst case alignment 198 // no cache line padding 199 }; 200 201 typedef union kmp_futex_lock kmp_futex_lock_t; 202 203 // Static initializer for futex lock variables. Usage: 204 // kmp_futex_lock_t xlock = KMP_FUTEX_LOCK_INITIALIZER( xlock ); 205 #define KMP_FUTEX_LOCK_INITIALIZER(lock) \ 206 { \ 207 { KMP_LOCK_FREE(futex), 0 } \ 208 } 209 210 extern int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid); 211 extern int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid); 212 extern int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid); 213 extern void __kmp_init_futex_lock(kmp_futex_lock_t *lck); 214 extern void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck); 215 216 extern int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, 217 kmp_int32 gtid); 218 extern int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid); 219 extern int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, 220 kmp_int32 gtid); 221 extern void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck); 222 extern void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck); 223 224 #endif // KMP_USE_FUTEX 225 226 // ---------------------------------------------------------------------------- 227 // Ticket locks. 228 229 #ifdef __cplusplus 230 231 #ifdef _MSC_VER 232 // MSVC won't allow use of std::atomic<> in a union since it has non-trivial 233 // copy constructor. 234 235 struct kmp_base_ticket_lock { 236 // `initialized' must be the first entry in the lock data structure! 237 std::atomic_bool initialized; 238 volatile union kmp_ticket_lock *self; // points to the lock union 239 ident_t const *location; // Source code location of omp_init_lock(). 240 std::atomic_uint 241 next_ticket; // ticket number to give to next thread which acquires 242 std::atomic_uint now_serving; // ticket number for thread which holds the lock 243 std::atomic_int owner_id; // (gtid+1) of owning thread, 0 if unlocked 244 std::atomic_int depth_locked; // depth locked, for nested locks only 245 kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock 246 }; 247 #else 248 struct kmp_base_ticket_lock { 249 // `initialized' must be the first entry in the lock data structure! 250 std::atomic<bool> initialized; 251 volatile union kmp_ticket_lock *self; // points to the lock union 252 ident_t const *location; // Source code location of omp_init_lock(). 253 std::atomic<unsigned> 254 next_ticket; // ticket number to give to next thread which acquires 255 std::atomic<unsigned> 256 now_serving; // ticket number for thread which holds the lock 257 std::atomic<int> owner_id; // (gtid+1) of owning thread, 0 if unlocked 258 std::atomic<int> depth_locked; // depth locked, for nested locks only 259 kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock 260 }; 261 #endif 262 263 #else // __cplusplus 264 265 struct kmp_base_ticket_lock; 266 267 #endif // !__cplusplus 268 269 typedef struct kmp_base_ticket_lock kmp_base_ticket_lock_t; 270 271 union KMP_ALIGN_CACHE kmp_ticket_lock { 272 kmp_base_ticket_lock_t 273 lk; // This field must be first to allow static initializing. 274 kmp_lock_pool_t pool; 275 double lk_align; // use worst case alignment 276 char lk_pad[KMP_PAD(kmp_base_ticket_lock_t, CACHE_LINE)]; 277 }; 278 279 typedef union kmp_ticket_lock kmp_ticket_lock_t; 280 281 // Static initializer for simple ticket lock variables. Usage: 282 // kmp_ticket_lock_t xlock = KMP_TICKET_LOCK_INITIALIZER( xlock ); 283 // Note the macro argument. It is important to make var properly initialized. 284 #define KMP_TICKET_LOCK_INITIALIZER(lock) \ 285 { \ 286 { true, &(lock), NULL, 0U, 0U, 0, -1 } \ 287 } 288 289 extern int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid); 290 extern int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid); 291 extern int __kmp_test_ticket_lock_with_cheks(kmp_ticket_lock_t *lck, 292 kmp_int32 gtid); 293 extern int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid); 294 extern void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck); 295 extern void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck); 296 297 extern int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, 298 kmp_int32 gtid); 299 extern int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, 300 kmp_int32 gtid); 301 extern int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, 302 kmp_int32 gtid); 303 extern void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck); 304 extern void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck); 305 306 // ---------------------------------------------------------------------------- 307 // Queuing locks. 308 309 #if KMP_USE_ADAPTIVE_LOCKS 310 311 struct kmp_adaptive_lock_info; 312 313 typedef struct kmp_adaptive_lock_info kmp_adaptive_lock_info_t; 314 315 #if KMP_DEBUG_ADAPTIVE_LOCKS 316 317 struct kmp_adaptive_lock_statistics { 318 /* So we can get stats from locks that haven't been destroyed. */ 319 kmp_adaptive_lock_info_t *next; 320 kmp_adaptive_lock_info_t *prev; 321 322 /* Other statistics */ 323 kmp_uint32 successfulSpeculations; 324 kmp_uint32 hardFailedSpeculations; 325 kmp_uint32 softFailedSpeculations; 326 kmp_uint32 nonSpeculativeAcquires; 327 kmp_uint32 nonSpeculativeAcquireAttempts; 328 kmp_uint32 lemmingYields; 329 }; 330 331 typedef struct kmp_adaptive_lock_statistics kmp_adaptive_lock_statistics_t; 332 333 extern void __kmp_print_speculative_stats(); 334 extern void __kmp_init_speculative_stats(); 335 336 #endif // KMP_DEBUG_ADAPTIVE_LOCKS 337 338 struct kmp_adaptive_lock_info { 339 /* Values used for adaptivity. 340 Although these are accessed from multiple threads we don't access them 341 atomically, because if we miss updates it probably doesn't matter much. (It 342 just affects our decision about whether to try speculation on the lock). */ 343 kmp_uint32 volatile badness; 344 kmp_uint32 volatile acquire_attempts; 345 /* Parameters of the lock. */ 346 kmp_uint32 max_badness; 347 kmp_uint32 max_soft_retries; 348 349 #if KMP_DEBUG_ADAPTIVE_LOCKS 350 kmp_adaptive_lock_statistics_t volatile stats; 351 #endif 352 }; 353 354 #endif // KMP_USE_ADAPTIVE_LOCKS 355 356 struct kmp_base_queuing_lock { 357 358 // `initialized' must be the first entry in the lock data structure! 359 volatile union kmp_queuing_lock 360 *initialized; // Points to the lock union if in initialized state. 361 362 ident_t const *location; // Source code location of omp_init_lock(). 363 364 KMP_ALIGN(8) // tail_id must be 8-byte aligned! 365 366 volatile kmp_int32 367 tail_id; // (gtid+1) of thread at tail of wait queue, 0 if empty 368 // Must be no padding here since head/tail used in 8-byte CAS 369 volatile kmp_int32 370 head_id; // (gtid+1) of thread at head of wait queue, 0 if empty 371 // Decl order assumes little endian 372 // bakery-style lock 373 volatile kmp_uint32 374 next_ticket; // ticket number to give to next thread which acquires 375 volatile kmp_uint32 376 now_serving; // ticket number for thread which holds the lock 377 volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked 378 kmp_int32 depth_locked; // depth locked, for nested locks only 379 380 kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock 381 }; 382 383 typedef struct kmp_base_queuing_lock kmp_base_queuing_lock_t; 384 385 KMP_BUILD_ASSERT(offsetof(kmp_base_queuing_lock_t, tail_id) % 8 == 0); 386 387 union KMP_ALIGN_CACHE kmp_queuing_lock { 388 kmp_base_queuing_lock_t 389 lk; // This field must be first to allow static initializing. 390 kmp_lock_pool_t pool; 391 double lk_align; // use worst case alignment 392 char lk_pad[KMP_PAD(kmp_base_queuing_lock_t, CACHE_LINE)]; 393 }; 394 395 typedef union kmp_queuing_lock kmp_queuing_lock_t; 396 397 extern int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid); 398 extern int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid); 399 extern int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid); 400 extern void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck); 401 extern void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck); 402 403 extern int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, 404 kmp_int32 gtid); 405 extern int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, 406 kmp_int32 gtid); 407 extern int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, 408 kmp_int32 gtid); 409 extern void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck); 410 extern void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck); 411 412 #if KMP_USE_ADAPTIVE_LOCKS 413 414 // ---------------------------------------------------------------------------- 415 // Adaptive locks. 416 struct kmp_base_adaptive_lock { 417 kmp_base_queuing_lock qlk; 418 KMP_ALIGN(CACHE_LINE) 419 kmp_adaptive_lock_info_t 420 adaptive; // Information for the speculative adaptive lock 421 }; 422 423 typedef struct kmp_base_adaptive_lock kmp_base_adaptive_lock_t; 424 425 union KMP_ALIGN_CACHE kmp_adaptive_lock { 426 kmp_base_adaptive_lock_t lk; 427 kmp_lock_pool_t pool; 428 double lk_align; 429 char lk_pad[KMP_PAD(kmp_base_adaptive_lock_t, CACHE_LINE)]; 430 }; 431 typedef union kmp_adaptive_lock kmp_adaptive_lock_t; 432 433 #define GET_QLK_PTR(l) ((kmp_queuing_lock_t *)&(l)->lk.qlk) 434 435 #endif // KMP_USE_ADAPTIVE_LOCKS 436 437 // ---------------------------------------------------------------------------- 438 // DRDPA ticket locks. 439 struct kmp_base_drdpa_lock { 440 // All of the fields on the first cache line are only written when 441 // initializing or reconfiguring the lock. These are relatively rare 442 // operations, so data from the first cache line will usually stay resident in 443 // the cache of each thread trying to acquire the lock. 444 // 445 // initialized must be the first entry in the lock data structure! 446 KMP_ALIGN_CACHE 447 448 volatile union kmp_drdpa_lock 449 *initialized; // points to the lock union if in initialized state 450 ident_t const *location; // Source code location of omp_init_lock(). 451 std::atomic<std::atomic<kmp_uint64> *> polls; 452 std::atomic<kmp_uint64> mask; // is 2**num_polls-1 for mod op 453 kmp_uint64 cleanup_ticket; // thread with cleanup ticket 454 std::atomic<kmp_uint64> *old_polls; // will deallocate old_polls 455 kmp_uint32 num_polls; // must be power of 2 456 457 // next_ticket it needs to exist in a separate cache line, as it is 458 // invalidated every time a thread takes a new ticket. 459 KMP_ALIGN_CACHE 460 461 std::atomic<kmp_uint64> next_ticket; 462 463 // now_serving is used to store our ticket value while we hold the lock. It 464 // has a slightly different meaning in the DRDPA ticket locks (where it is 465 // written by the acquiring thread) than it does in the simple ticket locks 466 // (where it is written by the releasing thread). 467 // 468 // Since now_serving is only read and written in the critical section, 469 // it is non-volatile, but it needs to exist on a separate cache line, 470 // as it is invalidated at every lock acquire. 471 // 472 // Likewise, the vars used for nested locks (owner_id and depth_locked) are 473 // only written by the thread owning the lock, so they are put in this cache 474 // line. owner_id is read by other threads, so it must be declared volatile. 475 KMP_ALIGN_CACHE 476 kmp_uint64 now_serving; // doesn't have to be volatile 477 volatile kmp_uint32 owner_id; // (gtid+1) of owning thread, 0 if unlocked 478 kmp_int32 depth_locked; // depth locked 479 kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock 480 }; 481 482 typedef struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t; 483 484 union KMP_ALIGN_CACHE kmp_drdpa_lock { 485 kmp_base_drdpa_lock_t 486 lk; // This field must be first to allow static initializing. */ 487 kmp_lock_pool_t pool; 488 double lk_align; // use worst case alignment 489 char lk_pad[KMP_PAD(kmp_base_drdpa_lock_t, CACHE_LINE)]; 490 }; 491 492 typedef union kmp_drdpa_lock kmp_drdpa_lock_t; 493 494 extern int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid); 495 extern int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid); 496 extern int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid); 497 extern void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck); 498 extern void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck); 499 500 extern int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, 501 kmp_int32 gtid); 502 extern int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid); 503 extern int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, 504 kmp_int32 gtid); 505 extern void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck); 506 extern void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck); 507 508 // ============================================================================ 509 // Lock purposes. 510 // ============================================================================ 511 512 // Bootstrap locks. 513 // 514 // Bootstrap locks -- very few locks used at library initialization time. 515 // Bootstrap locks are currently implemented as ticket locks. 516 // They could also be implemented as test and set lock, but cannot be 517 // implemented with other lock kinds as they require gtids which are not 518 // available at initialization time. 519 520 typedef kmp_ticket_lock_t kmp_bootstrap_lock_t; 521 522 #define KMP_BOOTSTRAP_LOCK_INITIALIZER(lock) KMP_TICKET_LOCK_INITIALIZER((lock)) 523 #define KMP_BOOTSTRAP_LOCK_INIT(lock) \ 524 kmp_bootstrap_lock_t lock = KMP_TICKET_LOCK_INITIALIZER(lock) 525 526 static inline int __kmp_acquire_bootstrap_lock(kmp_bootstrap_lock_t *lck) { 527 return __kmp_acquire_ticket_lock(lck, KMP_GTID_DNE); 528 } 529 530 static inline int __kmp_test_bootstrap_lock(kmp_bootstrap_lock_t *lck) { 531 return __kmp_test_ticket_lock(lck, KMP_GTID_DNE); 532 } 533 534 static inline void __kmp_release_bootstrap_lock(kmp_bootstrap_lock_t *lck) { 535 __kmp_release_ticket_lock(lck, KMP_GTID_DNE); 536 } 537 538 static inline void __kmp_init_bootstrap_lock(kmp_bootstrap_lock_t *lck) { 539 __kmp_init_ticket_lock(lck); 540 } 541 542 static inline void __kmp_destroy_bootstrap_lock(kmp_bootstrap_lock_t *lck) { 543 __kmp_destroy_ticket_lock(lck); 544 } 545 546 // Internal RTL locks. 547 // 548 // Internal RTL locks are also implemented as ticket locks, for now. 549 // 550 // FIXME - We should go through and figure out which lock kind works best for 551 // each internal lock, and use the type declaration and function calls for 552 // that explicit lock kind (and get rid of this section). 553 554 typedef kmp_ticket_lock_t kmp_lock_t; 555 556 #define KMP_LOCK_INIT(lock) kmp_lock_t lock = KMP_TICKET_LOCK_INITIALIZER(lock) 557 558 static inline int __kmp_acquire_lock(kmp_lock_t *lck, kmp_int32 gtid) { 559 return __kmp_acquire_ticket_lock(lck, gtid); 560 } 561 562 static inline int __kmp_test_lock(kmp_lock_t *lck, kmp_int32 gtid) { 563 return __kmp_test_ticket_lock(lck, gtid); 564 } 565 566 static inline void __kmp_release_lock(kmp_lock_t *lck, kmp_int32 gtid) { 567 __kmp_release_ticket_lock(lck, gtid); 568 } 569 570 static inline void __kmp_init_lock(kmp_lock_t *lck) { 571 __kmp_init_ticket_lock(lck); 572 } 573 574 static inline void __kmp_destroy_lock(kmp_lock_t *lck) { 575 __kmp_destroy_ticket_lock(lck); 576 } 577 578 // User locks. 579 // 580 // Do not allocate objects of type union kmp_user_lock!!! This will waste space 581 // unless __kmp_user_lock_kind == lk_drdpa. Instead, check the value of 582 // __kmp_user_lock_kind and allocate objects of the type of the appropriate 583 // union member, and cast their addresses to kmp_user_lock_p. 584 585 enum kmp_lock_kind { 586 lk_default = 0, 587 lk_tas, 588 #if KMP_USE_FUTEX 589 lk_futex, 590 #endif 591 #if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX 592 lk_hle, 593 lk_rtm_queuing, 594 lk_rtm_spin, 595 #endif 596 lk_ticket, 597 lk_queuing, 598 lk_drdpa, 599 #if KMP_USE_ADAPTIVE_LOCKS 600 lk_adaptive 601 #endif // KMP_USE_ADAPTIVE_LOCKS 602 }; 603 604 typedef enum kmp_lock_kind kmp_lock_kind_t; 605 606 extern kmp_lock_kind_t __kmp_user_lock_kind; 607 608 union kmp_user_lock { 609 kmp_tas_lock_t tas; 610 #if KMP_USE_FUTEX 611 kmp_futex_lock_t futex; 612 #endif 613 kmp_ticket_lock_t ticket; 614 kmp_queuing_lock_t queuing; 615 kmp_drdpa_lock_t drdpa; 616 #if KMP_USE_ADAPTIVE_LOCKS 617 kmp_adaptive_lock_t adaptive; 618 #endif // KMP_USE_ADAPTIVE_LOCKS 619 kmp_lock_pool_t pool; 620 }; 621 622 typedef union kmp_user_lock *kmp_user_lock_p; 623 624 #if !KMP_USE_DYNAMIC_LOCK 625 626 extern size_t __kmp_base_user_lock_size; 627 extern size_t __kmp_user_lock_size; 628 629 extern kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck); 630 631 static inline kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck) { 632 KMP_DEBUG_ASSERT(__kmp_get_user_lock_owner_ != NULL); 633 return (*__kmp_get_user_lock_owner_)(lck); 634 } 635 636 extern int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck, 637 kmp_int32 gtid); 638 639 #if KMP_OS_LINUX && \ 640 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 641 642 #define __kmp_acquire_user_lock_with_checks(lck, gtid) \ 643 if (__kmp_user_lock_kind == lk_tas) { \ 644 if (__kmp_env_consistency_check) { \ 645 char const *const func = "omp_set_lock"; \ 646 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && \ 647 lck->tas.lk.depth_locked != -1) { \ 648 KMP_FATAL(LockNestableUsedAsSimple, func); \ 649 } \ 650 if ((gtid >= 0) && (lck->tas.lk.poll - 1 == gtid)) { \ 651 KMP_FATAL(LockIsAlreadyOwned, func); \ 652 } \ 653 } \ 654 if (lck->tas.lk.poll != 0 || \ 655 !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \ 656 kmp_uint32 spins; \ 657 kmp_uint64 time; \ 658 KMP_FSYNC_PREPARE(lck); \ 659 KMP_INIT_YIELD(spins); \ 660 KMP_INIT_BACKOFF(time); \ 661 do { \ 662 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time); \ 663 } while ( \ 664 lck->tas.lk.poll != 0 || \ 665 !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); \ 666 } \ 667 KMP_FSYNC_ACQUIRED(lck); \ 668 } else { \ 669 KMP_DEBUG_ASSERT(__kmp_acquire_user_lock_with_checks_ != NULL); \ 670 (*__kmp_acquire_user_lock_with_checks_)(lck, gtid); \ 671 } 672 673 #else 674 static inline int __kmp_acquire_user_lock_with_checks(kmp_user_lock_p lck, 675 kmp_int32 gtid) { 676 KMP_DEBUG_ASSERT(__kmp_acquire_user_lock_with_checks_ != NULL); 677 return (*__kmp_acquire_user_lock_with_checks_)(lck, gtid); 678 } 679 #endif 680 681 extern int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck, 682 kmp_int32 gtid); 683 684 #if KMP_OS_LINUX && \ 685 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 686 687 #include "kmp_i18n.h" /* AC: KMP_FATAL definition */ 688 extern int __kmp_env_consistency_check; /* AC: copy from kmp.h here */ 689 static inline int __kmp_test_user_lock_with_checks(kmp_user_lock_p lck, 690 kmp_int32 gtid) { 691 if (__kmp_user_lock_kind == lk_tas) { 692 if (__kmp_env_consistency_check) { 693 char const *const func = "omp_test_lock"; 694 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 695 lck->tas.lk.depth_locked != -1) { 696 KMP_FATAL(LockNestableUsedAsSimple, func); 697 } 698 } 699 return ((lck->tas.lk.poll == 0) && 700 __kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); 701 } else { 702 KMP_DEBUG_ASSERT(__kmp_test_user_lock_with_checks_ != NULL); 703 return (*__kmp_test_user_lock_with_checks_)(lck, gtid); 704 } 705 } 706 #else 707 static inline int __kmp_test_user_lock_with_checks(kmp_user_lock_p lck, 708 kmp_int32 gtid) { 709 KMP_DEBUG_ASSERT(__kmp_test_user_lock_with_checks_ != NULL); 710 return (*__kmp_test_user_lock_with_checks_)(lck, gtid); 711 } 712 #endif 713 714 extern int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck, 715 kmp_int32 gtid); 716 717 static inline void __kmp_release_user_lock_with_checks(kmp_user_lock_p lck, 718 kmp_int32 gtid) { 719 KMP_DEBUG_ASSERT(__kmp_release_user_lock_with_checks_ != NULL); 720 (*__kmp_release_user_lock_with_checks_)(lck, gtid); 721 } 722 723 extern void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck); 724 725 static inline void __kmp_init_user_lock_with_checks(kmp_user_lock_p lck) { 726 KMP_DEBUG_ASSERT(__kmp_init_user_lock_with_checks_ != NULL); 727 (*__kmp_init_user_lock_with_checks_)(lck); 728 } 729 730 // We need a non-checking version of destroy lock for when the RTL is 731 // doing the cleanup as it can't always tell if the lock is nested or not. 732 extern void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck); 733 734 static inline void __kmp_destroy_user_lock(kmp_user_lock_p lck) { 735 KMP_DEBUG_ASSERT(__kmp_destroy_user_lock_ != NULL); 736 (*__kmp_destroy_user_lock_)(lck); 737 } 738 739 extern void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck); 740 741 static inline void __kmp_destroy_user_lock_with_checks(kmp_user_lock_p lck) { 742 KMP_DEBUG_ASSERT(__kmp_destroy_user_lock_with_checks_ != NULL); 743 (*__kmp_destroy_user_lock_with_checks_)(lck); 744 } 745 746 extern int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 747 kmp_int32 gtid); 748 749 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) 750 751 #define __kmp_acquire_nested_user_lock_with_checks(lck, gtid, depth) \ 752 if (__kmp_user_lock_kind == lk_tas) { \ 753 if (__kmp_env_consistency_check) { \ 754 char const *const func = "omp_set_nest_lock"; \ 755 if ((sizeof(kmp_tas_lock_t) <= OMP_NEST_LOCK_T_SIZE) && \ 756 lck->tas.lk.depth_locked == -1) { \ 757 KMP_FATAL(LockSimpleUsedAsNestable, func); \ 758 } \ 759 } \ 760 if (lck->tas.lk.poll - 1 == gtid) { \ 761 lck->tas.lk.depth_locked += 1; \ 762 *depth = KMP_LOCK_ACQUIRED_NEXT; \ 763 } else { \ 764 if ((lck->tas.lk.poll != 0) || \ 765 !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \ 766 kmp_uint32 spins; \ 767 kmp_uint64 time; \ 768 KMP_FSYNC_PREPARE(lck); \ 769 KMP_INIT_YIELD(spins); \ 770 KMP_INIT_BACKOFF(time); \ 771 do { \ 772 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time); \ 773 } while ( \ 774 (lck->tas.lk.poll != 0) || \ 775 !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); \ 776 } \ 777 lck->tas.lk.depth_locked = 1; \ 778 *depth = KMP_LOCK_ACQUIRED_FIRST; \ 779 } \ 780 KMP_FSYNC_ACQUIRED(lck); \ 781 } else { \ 782 KMP_DEBUG_ASSERT(__kmp_acquire_nested_user_lock_with_checks_ != NULL); \ 783 *depth = (*__kmp_acquire_nested_user_lock_with_checks_)(lck, gtid); \ 784 } 785 786 #else 787 static inline void 788 __kmp_acquire_nested_user_lock_with_checks(kmp_user_lock_p lck, kmp_int32 gtid, 789 int *depth) { 790 KMP_DEBUG_ASSERT(__kmp_acquire_nested_user_lock_with_checks_ != NULL); 791 *depth = (*__kmp_acquire_nested_user_lock_with_checks_)(lck, gtid); 792 } 793 #endif 794 795 extern int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 796 kmp_int32 gtid); 797 798 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) 799 static inline int __kmp_test_nested_user_lock_with_checks(kmp_user_lock_p lck, 800 kmp_int32 gtid) { 801 if (__kmp_user_lock_kind == lk_tas) { 802 int retval; 803 if (__kmp_env_consistency_check) { 804 char const *const func = "omp_test_nest_lock"; 805 if ((sizeof(kmp_tas_lock_t) <= OMP_NEST_LOCK_T_SIZE) && 806 lck->tas.lk.depth_locked == -1) { 807 KMP_FATAL(LockSimpleUsedAsNestable, func); 808 } 809 } 810 KMP_DEBUG_ASSERT(gtid >= 0); 811 if (lck->tas.lk.poll - 1 == 812 gtid) { /* __kmp_get_tas_lock_owner( lck ) == gtid */ 813 return ++lck->tas.lk.depth_locked; /* same owner, depth increased */ 814 } 815 retval = ((lck->tas.lk.poll == 0) && 816 __kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); 817 if (retval) { 818 KMP_MB(); 819 lck->tas.lk.depth_locked = 1; 820 } 821 return retval; 822 } else { 823 KMP_DEBUG_ASSERT(__kmp_test_nested_user_lock_with_checks_ != NULL); 824 return (*__kmp_test_nested_user_lock_with_checks_)(lck, gtid); 825 } 826 } 827 #else 828 static inline int __kmp_test_nested_user_lock_with_checks(kmp_user_lock_p lck, 829 kmp_int32 gtid) { 830 KMP_DEBUG_ASSERT(__kmp_test_nested_user_lock_with_checks_ != NULL); 831 return (*__kmp_test_nested_user_lock_with_checks_)(lck, gtid); 832 } 833 #endif 834 835 extern int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 836 kmp_int32 gtid); 837 838 static inline int 839 __kmp_release_nested_user_lock_with_checks(kmp_user_lock_p lck, 840 kmp_int32 gtid) { 841 KMP_DEBUG_ASSERT(__kmp_release_nested_user_lock_with_checks_ != NULL); 842 return (*__kmp_release_nested_user_lock_with_checks_)(lck, gtid); 843 } 844 845 extern void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck); 846 847 static inline void 848 __kmp_init_nested_user_lock_with_checks(kmp_user_lock_p lck) { 849 KMP_DEBUG_ASSERT(__kmp_init_nested_user_lock_with_checks_ != NULL); 850 (*__kmp_init_nested_user_lock_with_checks_)(lck); 851 } 852 853 extern void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck); 854 855 static inline void 856 __kmp_destroy_nested_user_lock_with_checks(kmp_user_lock_p lck) { 857 KMP_DEBUG_ASSERT(__kmp_destroy_nested_user_lock_with_checks_ != NULL); 858 (*__kmp_destroy_nested_user_lock_with_checks_)(lck); 859 } 860 861 // user lock functions which do not necessarily exist for all lock kinds. 862 // 863 // The "set" functions usually have wrapper routines that check for a NULL set 864 // function pointer and call it if non-NULL. 865 // 866 // In some cases, it makes sense to have a "get" wrapper function check for a 867 // NULL get function pointer and return NULL / invalid value / error code if 868 // the function pointer is NULL. 869 // 870 // In other cases, the calling code really should differentiate between an 871 // unimplemented function and one that is implemented but returning NULL / 872 // invalid value. If this is the case, no get function wrapper exists. 873 874 extern int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck); 875 876 // no set function; fields set during local allocation 877 878 extern const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck); 879 880 static inline const ident_t *__kmp_get_user_lock_location(kmp_user_lock_p lck) { 881 if (__kmp_get_user_lock_location_ != NULL) { 882 return (*__kmp_get_user_lock_location_)(lck); 883 } else { 884 return NULL; 885 } 886 } 887 888 extern void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck, 889 const ident_t *loc); 890 891 static inline void __kmp_set_user_lock_location(kmp_user_lock_p lck, 892 const ident_t *loc) { 893 if (__kmp_set_user_lock_location_ != NULL) { 894 (*__kmp_set_user_lock_location_)(lck, loc); 895 } 896 } 897 898 extern kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck); 899 900 extern void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck, 901 kmp_lock_flags_t flags); 902 903 static inline void __kmp_set_user_lock_flags(kmp_user_lock_p lck, 904 kmp_lock_flags_t flags) { 905 if (__kmp_set_user_lock_flags_ != NULL) { 906 (*__kmp_set_user_lock_flags_)(lck, flags); 907 } 908 } 909 910 // The function which sets up all of the vtbl pointers for kmp_user_lock_t. 911 extern void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind); 912 913 // Macros for binding user lock functions. 914 #define KMP_BIND_USER_LOCK_TEMPLATE(nest, kind, suffix) \ 915 { \ 916 __kmp_acquire##nest##user_lock_with_checks_ = (int (*)( \ 917 kmp_user_lock_p, kmp_int32))__kmp_acquire##nest##kind##_##suffix; \ 918 __kmp_release##nest##user_lock_with_checks_ = (int (*)( \ 919 kmp_user_lock_p, kmp_int32))__kmp_release##nest##kind##_##suffix; \ 920 __kmp_test##nest##user_lock_with_checks_ = (int (*)( \ 921 kmp_user_lock_p, kmp_int32))__kmp_test##nest##kind##_##suffix; \ 922 __kmp_init##nest##user_lock_with_checks_ = \ 923 (void (*)(kmp_user_lock_p))__kmp_init##nest##kind##_##suffix; \ 924 __kmp_destroy##nest##user_lock_with_checks_ = \ 925 (void (*)(kmp_user_lock_p))__kmp_destroy##nest##kind##_##suffix; \ 926 } 927 928 #define KMP_BIND_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock) 929 #define KMP_BIND_USER_LOCK_WITH_CHECKS(kind) \ 930 KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock_with_checks) 931 #define KMP_BIND_NESTED_USER_LOCK(kind) \ 932 KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock) 933 #define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind) \ 934 KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock_with_checks) 935 936 // User lock table & lock allocation 937 /* On 64-bit Linux* OS (and OS X*) GNU compiler allocates only 4 bytems memory 938 for lock variable, which is not enough to store a pointer, so we have to use 939 lock indexes instead of pointers and maintain lock table to map indexes to 940 pointers. 941 942 943 Note: The first element of the table is not a pointer to lock! It is a 944 pointer to previously allocated table (or NULL if it is the first table). 945 946 Usage: 947 948 if ( OMP_LOCK_T_SIZE < sizeof( <lock> ) ) { // or OMP_NEST_LOCK_T_SIZE 949 Lock table is fully utilized. User locks are indexes, so table is used on 950 user lock operation. 951 Note: it may be the case (lin_32) that we don't need to use a lock 952 table for regular locks, but do need the table for nested locks. 953 } 954 else { 955 Lock table initialized but not actually used. 956 } 957 */ 958 959 struct kmp_lock_table { 960 kmp_lock_index_t used; // Number of used elements 961 kmp_lock_index_t allocated; // Number of allocated elements 962 kmp_user_lock_p *table; // Lock table. 963 }; 964 965 typedef struct kmp_lock_table kmp_lock_table_t; 966 967 extern kmp_lock_table_t __kmp_user_lock_table; 968 extern kmp_user_lock_p __kmp_lock_pool; 969 970 struct kmp_block_of_locks { 971 struct kmp_block_of_locks *next_block; 972 void *locks; 973 }; 974 975 typedef struct kmp_block_of_locks kmp_block_of_locks_t; 976 977 extern kmp_block_of_locks_t *__kmp_lock_blocks; 978 extern int __kmp_num_locks_in_block; 979 980 extern kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, 981 kmp_int32 gtid, 982 kmp_lock_flags_t flags); 983 extern void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid, 984 kmp_user_lock_p lck); 985 extern kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, 986 char const *func); 987 extern void __kmp_cleanup_user_locks(); 988 989 #define KMP_CHECK_USER_LOCK_INIT() \ 990 { \ 991 if (!TCR_4(__kmp_init_user_locks)) { \ 992 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); \ 993 if (!TCR_4(__kmp_init_user_locks)) { \ 994 TCW_4(__kmp_init_user_locks, TRUE); \ 995 } \ 996 __kmp_release_bootstrap_lock(&__kmp_initz_lock); \ 997 } \ 998 } 999 1000 #endif // KMP_USE_DYNAMIC_LOCK 1001 1002 #undef KMP_PAD 1003 #undef KMP_GTID_DNE 1004 1005 #if KMP_USE_DYNAMIC_LOCK 1006 // KMP_USE_DYNAMIC_LOCK enables dynamic dispatch of lock functions without 1007 // breaking the current compatibility. Essential functionality of this new code 1008 // is dynamic dispatch, but it also implements (or enables implementation of) 1009 // hinted user lock and critical section which will be part of OMP 4.5 soon. 1010 // 1011 // Lock type can be decided at creation time (i.e., lock initialization), and 1012 // subsequent lock function call on the created lock object requires type 1013 // extraction and call through jump table using the extracted type. This type 1014 // information is stored in two different ways depending on the size of the lock 1015 // object, and we differentiate lock types by this size requirement - direct and 1016 // indirect locks. 1017 // 1018 // Direct locks: 1019 // A direct lock object fits into the space created by the compiler for an 1020 // omp_lock_t object, and TAS/Futex lock falls into this category. We use low 1021 // one byte of the lock object as the storage for the lock type, and appropriate 1022 // bit operation is required to access the data meaningful to the lock 1023 // algorithms. Also, to differentiate direct lock from indirect lock, 1 is 1024 // written to LSB of the lock object. The newly introduced "hle" lock is also a 1025 // direct lock. 1026 // 1027 // Indirect locks: 1028 // An indirect lock object requires more space than the compiler-generated 1029 // space, and it should be allocated from heap. Depending on the size of the 1030 // compiler-generated space for the lock (i.e., size of omp_lock_t), this 1031 // omp_lock_t object stores either the address of the heap-allocated indirect 1032 // lock (void * fits in the object) or an index to the indirect lock table entry 1033 // that holds the address. Ticket/Queuing/DRDPA/Adaptive lock falls into this 1034 // category, and the newly introduced "rtm" lock is also an indirect lock which 1035 // was implemented on top of the Queuing lock. When the omp_lock_t object holds 1036 // an index (not lock address), 0 is written to LSB to differentiate the lock 1037 // from a direct lock, and the remaining part is the actual index to the 1038 // indirect lock table. 1039 1040 #include <stdint.h> // for uintptr_t 1041 1042 // Shortcuts 1043 #define KMP_USE_INLINED_TAS \ 1044 (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)) && 1 1045 #define KMP_USE_INLINED_FUTEX KMP_USE_FUTEX && 0 1046 1047 // List of lock definitions; all nested locks are indirect locks. 1048 // hle lock is xchg lock prefixed with XACQUIRE/XRELEASE. 1049 // All nested locks are indirect lock types. 1050 #if KMP_USE_TSX 1051 #if KMP_USE_FUTEX 1052 #define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) m(hle, a) m(rtm_spin, a) 1053 #define KMP_FOREACH_I_LOCK(m, a) \ 1054 m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm_queuing, a) \ 1055 m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \ 1056 m(nested_queuing, a) m(nested_drdpa, a) 1057 #else 1058 #define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(hle, a) m(rtm_spin, a) 1059 #define KMP_FOREACH_I_LOCK(m, a) \ 1060 m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm_queuing, a) \ 1061 m(nested_tas, a) m(nested_ticket, a) m(nested_queuing, a) \ 1062 m(nested_drdpa, a) 1063 #endif // KMP_USE_FUTEX 1064 #define KMP_LAST_D_LOCK lockseq_rtm_spin 1065 #else 1066 #if KMP_USE_FUTEX 1067 #define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) 1068 #define KMP_FOREACH_I_LOCK(m, a) \ 1069 m(ticket, a) m(queuing, a) m(drdpa, a) m(nested_tas, a) m(nested_futex, a) \ 1070 m(nested_ticket, a) m(nested_queuing, a) m(nested_drdpa, a) 1071 #define KMP_LAST_D_LOCK lockseq_futex 1072 #else 1073 #define KMP_FOREACH_D_LOCK(m, a) m(tas, a) 1074 #define KMP_FOREACH_I_LOCK(m, a) \ 1075 m(ticket, a) m(queuing, a) m(drdpa, a) m(nested_tas, a) m(nested_ticket, a) \ 1076 m(nested_queuing, a) m(nested_drdpa, a) 1077 #define KMP_LAST_D_LOCK lockseq_tas 1078 #endif // KMP_USE_FUTEX 1079 #endif // KMP_USE_TSX 1080 1081 // Information used in dynamic dispatch 1082 #define KMP_LOCK_SHIFT \ 1083 8 // number of low bits to be used as tag for direct locks 1084 #define KMP_FIRST_D_LOCK lockseq_tas 1085 #define KMP_FIRST_I_LOCK lockseq_ticket 1086 #define KMP_LAST_I_LOCK lockseq_nested_drdpa 1087 #define KMP_NUM_I_LOCKS \ 1088 (locktag_nested_drdpa + 1) // number of indirect lock types 1089 1090 // Base type for dynamic locks. 1091 typedef kmp_uint32 kmp_dyna_lock_t; 1092 1093 // Lock sequence that enumerates all lock kinds. Always make this enumeration 1094 // consistent with kmp_lockseq_t in the include directory. 1095 typedef enum { 1096 lockseq_indirect = 0, 1097 #define expand_seq(l, a) lockseq_##l, 1098 KMP_FOREACH_D_LOCK(expand_seq, 0) KMP_FOREACH_I_LOCK(expand_seq, 0) 1099 #undef expand_seq 1100 } kmp_dyna_lockseq_t; 1101 1102 // Enumerates indirect lock tags. 1103 typedef enum { 1104 #define expand_tag(l, a) locktag_##l, 1105 KMP_FOREACH_I_LOCK(expand_tag, 0) 1106 #undef expand_tag 1107 } kmp_indirect_locktag_t; 1108 1109 // Utility macros that extract information from lock sequences. 1110 #define KMP_IS_D_LOCK(seq) \ 1111 ((seq) >= KMP_FIRST_D_LOCK && (seq) <= KMP_LAST_D_LOCK) 1112 #define KMP_IS_I_LOCK(seq) \ 1113 ((seq) >= KMP_FIRST_I_LOCK && (seq) <= KMP_LAST_I_LOCK) 1114 #define KMP_GET_I_TAG(seq) (kmp_indirect_locktag_t)((seq)-KMP_FIRST_I_LOCK) 1115 #define KMP_GET_D_TAG(seq) ((seq) << 1 | 1) 1116 1117 // Enumerates direct lock tags starting from indirect tag. 1118 typedef enum { 1119 #define expand_tag(l, a) locktag_##l = KMP_GET_D_TAG(lockseq_##l), 1120 KMP_FOREACH_D_LOCK(expand_tag, 0) 1121 #undef expand_tag 1122 } kmp_direct_locktag_t; 1123 1124 // Indirect lock type 1125 typedef struct { 1126 kmp_user_lock_p lock; 1127 kmp_indirect_locktag_t type; 1128 } kmp_indirect_lock_t; 1129 1130 // Function tables for direct locks. Set/unset/test differentiate functions 1131 // with/without consistency checking. 1132 extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t); 1133 extern void (**__kmp_direct_destroy)(kmp_dyna_lock_t *); 1134 extern int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32); 1135 extern int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32); 1136 extern int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32); 1137 1138 // Function tables for indirect locks. Set/unset/test differentiate functions 1139 // with/without consistency checking. 1140 extern void (*__kmp_indirect_init[])(kmp_user_lock_p); 1141 extern void (**__kmp_indirect_destroy)(kmp_user_lock_p); 1142 extern int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32); 1143 extern int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32); 1144 extern int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32); 1145 1146 // Extracts direct lock tag from a user lock pointer 1147 #define KMP_EXTRACT_D_TAG(l) \ 1148 ((kmp_dyna_lock_t)((kmp_base_tas_lock_t *)(l))->poll & \ 1149 ((1 << KMP_LOCK_SHIFT) - 1) & \ 1150 -((kmp_dyna_lock_t)((kmp_tas_lock_t *)(l))->lk.poll & 1)) 1151 1152 // Extracts indirect lock index from a user lock pointer 1153 #define KMP_EXTRACT_I_INDEX(l) \ 1154 ((kmp_lock_index_t)((kmp_base_tas_lock_t *)(l))->poll >> 1) 1155 1156 // Returns function pointer to the direct lock function with l (kmp_dyna_lock_t 1157 // *) and op (operation type). 1158 #define KMP_D_LOCK_FUNC(l, op) __kmp_direct_##op[KMP_EXTRACT_D_TAG(l)] 1159 1160 // Returns function pointer to the indirect lock function with l 1161 // (kmp_indirect_lock_t *) and op (operation type). 1162 #define KMP_I_LOCK_FUNC(l, op) \ 1163 __kmp_indirect_##op[((kmp_indirect_lock_t *)(l))->type] 1164 1165 // Initializes a direct lock with the given lock pointer and lock sequence. 1166 #define KMP_INIT_D_LOCK(l, seq) \ 1167 __kmp_direct_init[KMP_GET_D_TAG(seq)]((kmp_dyna_lock_t *)l, seq) 1168 1169 // Initializes an indirect lock with the given lock pointer and lock sequence. 1170 #define KMP_INIT_I_LOCK(l, seq) \ 1171 __kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq) 1172 1173 // Returns "free" lock value for the given lock type. 1174 #define KMP_LOCK_FREE(type) (locktag_##type) 1175 1176 // Returns "busy" lock value for the given lock teyp. 1177 #define KMP_LOCK_BUSY(v, type) ((v) << KMP_LOCK_SHIFT | locktag_##type) 1178 1179 // Returns lock value after removing (shifting) lock tag. 1180 #define KMP_LOCK_STRIP(v) ((v) >> KMP_LOCK_SHIFT) 1181 1182 // Initializes global states and data structures for managing dynamic user 1183 // locks. 1184 extern void __kmp_init_dynamic_user_locks(); 1185 1186 // Allocates and returns an indirect lock with the given indirect lock tag. 1187 extern kmp_indirect_lock_t * 1188 __kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t); 1189 1190 // Cleans up global states and data structures for managing dynamic user locks. 1191 extern void __kmp_cleanup_indirect_user_locks(); 1192 1193 // Default user lock sequence when not using hinted locks. 1194 extern kmp_dyna_lockseq_t __kmp_user_lock_seq; 1195 1196 // Jump table for "set lock location", available only for indirect locks. 1197 extern void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, 1198 const ident_t *); 1199 #define KMP_SET_I_LOCK_LOCATION(lck, loc) \ 1200 { \ 1201 if (__kmp_indirect_set_location[(lck)->type] != NULL) \ 1202 __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc); \ 1203 } 1204 1205 // Jump table for "set lock flags", available only for indirect locks. 1206 extern void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, 1207 kmp_lock_flags_t); 1208 #define KMP_SET_I_LOCK_FLAGS(lck, flag) \ 1209 { \ 1210 if (__kmp_indirect_set_flags[(lck)->type] != NULL) \ 1211 __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag); \ 1212 } 1213 1214 // Jump table for "get lock location", available only for indirect locks. 1215 extern const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])( 1216 kmp_user_lock_p); 1217 #define KMP_GET_I_LOCK_LOCATION(lck) \ 1218 (__kmp_indirect_get_location[(lck)->type] != NULL \ 1219 ? __kmp_indirect_get_location[(lck)->type]((lck)->lock) \ 1220 : NULL) 1221 1222 // Jump table for "get lock flags", available only for indirect locks. 1223 extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])( 1224 kmp_user_lock_p); 1225 #define KMP_GET_I_LOCK_FLAGS(lck) \ 1226 (__kmp_indirect_get_flags[(lck)->type] != NULL \ 1227 ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \ 1228 : NULL) 1229 1230 // number of kmp_indirect_lock_t objects to be allocated together 1231 #define KMP_I_LOCK_CHUNK 1024 1232 // Keep at a power of 2 since it is used in multiplication & division 1233 KMP_BUILD_ASSERT(KMP_I_LOCK_CHUNK % 2 == 0); 1234 // number of row entries in the initial lock table 1235 #define KMP_I_LOCK_TABLE_INIT_NROW_PTRS 8 1236 1237 // Lock table for indirect locks. 1238 typedef struct kmp_indirect_lock_table { 1239 kmp_indirect_lock_t **table; // blocks of indirect locks allocated 1240 kmp_uint32 nrow_ptrs; // number *table pointer entries in table 1241 kmp_lock_index_t next; // index to the next lock to be allocated 1242 struct kmp_indirect_lock_table *next_table; 1243 } kmp_indirect_lock_table_t; 1244 1245 extern kmp_indirect_lock_table_t __kmp_i_lock_table; 1246 1247 // Returns the indirect lock associated with the given index. 1248 // Returns nullptr if no lock at given index 1249 static inline kmp_indirect_lock_t *__kmp_get_i_lock(kmp_lock_index_t idx) { 1250 kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table; 1251 while (lock_table) { 1252 kmp_lock_index_t max_locks = lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK; 1253 if (idx < max_locks) { 1254 kmp_lock_index_t row = idx / KMP_I_LOCK_CHUNK; 1255 kmp_lock_index_t col = idx % KMP_I_LOCK_CHUNK; 1256 if (!lock_table->table[row] || idx >= lock_table->next) 1257 break; 1258 return &lock_table->table[row][col]; 1259 } 1260 idx -= max_locks; 1261 lock_table = lock_table->next_table; 1262 } 1263 return nullptr; 1264 } 1265 1266 // Number of locks in a lock block, which is fixed to "1" now. 1267 // TODO: No lock block implementation now. If we do support, we need to manage 1268 // lock block data structure for each indirect lock type. 1269 extern int __kmp_num_locks_in_block; 1270 1271 // Fast lock table lookup without consistency checking 1272 #define KMP_LOOKUP_I_LOCK(l) \ 1273 ((OMP_LOCK_T_SIZE < sizeof(void *)) \ 1274 ? __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(l)) \ 1275 : *((kmp_indirect_lock_t **)(l))) 1276 1277 // Used once in kmp_error.cpp 1278 extern kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32); 1279 1280 #else // KMP_USE_DYNAMIC_LOCK 1281 1282 #define KMP_LOCK_BUSY(v, type) (v) 1283 #define KMP_LOCK_FREE(type) 0 1284 #define KMP_LOCK_STRIP(v) (v) 1285 1286 #endif // KMP_USE_DYNAMIC_LOCK 1287 1288 // data structure for using backoff within spin locks. 1289 typedef struct { 1290 kmp_uint32 step; // current step 1291 kmp_uint32 max_backoff; // upper bound of outer delay loop 1292 kmp_uint32 min_tick; // size of inner delay loop in ticks (machine-dependent) 1293 } kmp_backoff_t; 1294 1295 // Runtime's default backoff parameters 1296 extern kmp_backoff_t __kmp_spin_backoff_params; 1297 1298 // Backoff function 1299 extern void __kmp_spin_backoff(kmp_backoff_t *); 1300 1301 #ifdef __cplusplus 1302 } // extern "C" 1303 #endif // __cplusplus 1304 1305 #endif /* KMP_LOCK_H */ 1306