xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_lock.h (revision 74626c16ff489c0d64cf2843dfd522e7c544f3ce)
1 /*
2  * kmp_lock.h -- lock header file
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_LOCK_H
14 #define KMP_LOCK_H
15 
16 #include <limits.h> // CHAR_BIT
17 #include <stddef.h> // offsetof
18 
19 #include "kmp_debug.h"
20 #include "kmp_os.h"
21 
22 #ifdef __cplusplus
23 #include <atomic>
24 
25 extern "C" {
26 #endif // __cplusplus
27 
28 // ----------------------------------------------------------------------------
29 // Have to copy these definitions from kmp.h because kmp.h cannot be included
30 // due to circular dependencies.  Will undef these at end of file.
31 
32 #define KMP_PAD(type, sz)                                                      \
33   (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
34 #define KMP_GTID_DNE (-2)
35 
36 // Forward declaration of ident and ident_t
37 
38 struct ident;
39 typedef struct ident ident_t;
40 
41 // End of copied code.
42 // ----------------------------------------------------------------------------
43 
44 // We need to know the size of the area we can assume that the compiler(s)
45 // allocated for objects of type omp_lock_t and omp_nest_lock_t.  The Intel
46 // compiler always allocates a pointer-sized area, as does visual studio.
47 //
48 // gcc however, only allocates 4 bytes for regular locks, even on 64-bit
49 // intel archs.  It allocates at least 8 bytes for nested lock (more on
50 // recent versions), but we are bounded by the pointer-sized chunks that
51 // the Intel compiler allocates.
52 
53 #if (KMP_OS_LINUX || KMP_OS_AIX) && defined(KMP_GOMP_COMPAT)
54 #define OMP_LOCK_T_SIZE sizeof(int)
55 #define OMP_NEST_LOCK_T_SIZE sizeof(void *)
56 #else
57 #define OMP_LOCK_T_SIZE sizeof(void *)
58 #define OMP_NEST_LOCK_T_SIZE sizeof(void *)
59 #endif
60 
61 // The Intel compiler allocates a 32-byte chunk for a critical section.
62 // Both gcc and visual studio only allocate enough space for a pointer.
63 // Sometimes we know that the space was allocated by the Intel compiler.
64 #define OMP_CRITICAL_SIZE sizeof(void *)
65 #define INTEL_CRITICAL_SIZE 32
66 
67 // lock flags
68 typedef kmp_uint32 kmp_lock_flags_t;
69 
70 #define kmp_lf_critical_section 1
71 
72 // When a lock table is used, the indices are of kmp_lock_index_t
73 typedef kmp_uint32 kmp_lock_index_t;
74 
75 // When memory allocated for locks are on the lock pool (free list),
76 // it is treated as structs of this type.
77 struct kmp_lock_pool {
78   union kmp_user_lock *next;
79   kmp_lock_index_t index;
80 };
81 
82 typedef struct kmp_lock_pool kmp_lock_pool_t;
83 
84 extern void __kmp_validate_locks(void);
85 
86 // ----------------------------------------------------------------------------
87 //  There are 5 lock implementations:
88 //       1. Test and set locks.
89 //       2. futex locks (Linux* OS on x86 and
90 //          Intel(R) Many Integrated Core Architecture)
91 //       3. Ticket (Lamport bakery) locks.
92 //       4. Queuing locks (with separate spin fields).
93 //       5. DRPA (Dynamically Reconfigurable Distributed Polling Area) locks
94 //
95 //   and 3 lock purposes:
96 //       1. Bootstrap locks -- Used for a few locks available at library
97 //       startup-shutdown time.
98 //          These do not require non-negative global thread ID's.
99 //       2. Internal RTL locks -- Used everywhere else in the RTL
100 //       3. User locks (includes critical sections)
101 // ----------------------------------------------------------------------------
102 
103 // ============================================================================
104 // Lock implementations.
105 //
106 // Test and set locks.
107 //
108 // Non-nested test and set locks differ from the other lock kinds (except
109 // futex) in that we use the memory allocated by the compiler for the lock,
110 // rather than a pointer to it.
111 //
112 // On lin32, lin_32e, and win_32, the space allocated may be as small as 4
113 // bytes, so we have to use a lock table for nested locks, and avoid accessing
114 // the depth_locked field for non-nested locks.
115 //
116 // Information normally available to the tools, such as lock location, lock
117 // usage (normal lock vs. critical section), etc. is not available with test and
118 // set locks.
119 // ----------------------------------------------------------------------------
120 
121 struct kmp_base_tas_lock {
122   // KMP_LOCK_FREE(tas) => unlocked; locked: (gtid+1) of owning thread
123 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __LP64__
124   // Flip the ordering of the high and low 32-bit member to be consistent
125   // with the memory layout of the address in 64-bit big-endian.
126   kmp_int32 depth_locked; // depth locked, for nested locks only
127   std::atomic<kmp_int32> poll;
128 #else
129   std::atomic<kmp_int32> poll;
130   kmp_int32 depth_locked; // depth locked, for nested locks only
131 #endif
132 };
133 
134 typedef struct kmp_base_tas_lock kmp_base_tas_lock_t;
135 
136 union kmp_tas_lock {
137   kmp_base_tas_lock_t lk;
138   kmp_lock_pool_t pool; // make certain struct is large enough
139   double lk_align; // use worst case alignment; no cache line padding
140 };
141 
142 typedef union kmp_tas_lock kmp_tas_lock_t;
143 
144 // Static initializer for test and set lock variables. Usage:
145 //    kmp_tas_lock_t xlock = KMP_TAS_LOCK_INITIALIZER( xlock );
146 #define KMP_TAS_LOCK_INITIALIZER(lock)                                         \
147   {                                                                            \
148     { KMP_LOCK_FREE(tas), 0 }                                                  \
149   }
150 
151 extern int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
152 extern int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
153 extern int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
154 extern void __kmp_init_tas_lock(kmp_tas_lock_t *lck);
155 extern void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck);
156 
157 extern int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
158 extern int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
159 extern int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
160 extern void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck);
161 extern void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck);
162 
163 #define KMP_LOCK_RELEASED 1
164 #define KMP_LOCK_STILL_HELD 0
165 #define KMP_LOCK_ACQUIRED_FIRST 1
166 #define KMP_LOCK_ACQUIRED_NEXT 0
167 #ifndef KMP_USE_FUTEX
168 #define KMP_USE_FUTEX                                                          \
169   (KMP_OS_LINUX &&                                                             \
170    (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64))
171 #endif
172 #if KMP_USE_FUTEX
173 
174 // ----------------------------------------------------------------------------
175 // futex locks.  futex locks are only available on Linux* OS.
176 //
177 // Like non-nested test and set lock, non-nested futex locks use the memory
178 // allocated by the compiler for the lock, rather than a pointer to it.
179 //
180 // Information normally available to the tools, such as lock location, lock
181 // usage (normal lock vs. critical section), etc. is not available with test and
182 // set locks. With non-nested futex locks, the lock owner is not even available.
183 // ----------------------------------------------------------------------------
184 
185 struct kmp_base_futex_lock {
186   volatile kmp_int32 poll; // KMP_LOCK_FREE(futex) => unlocked
187   // 2*(gtid+1) of owning thread, 0 if unlocked
188   // locked: (gtid+1) of owning thread
189   kmp_int32 depth_locked; // depth locked, for nested locks only
190 };
191 
192 typedef struct kmp_base_futex_lock kmp_base_futex_lock_t;
193 
194 union kmp_futex_lock {
195   kmp_base_futex_lock_t lk;
196   kmp_lock_pool_t pool; // make certain struct is large enough
197   double lk_align; // use worst case alignment
198   // no cache line padding
199 };
200 
201 typedef union kmp_futex_lock kmp_futex_lock_t;
202 
203 // Static initializer for futex lock variables. Usage:
204 //    kmp_futex_lock_t xlock = KMP_FUTEX_LOCK_INITIALIZER( xlock );
205 #define KMP_FUTEX_LOCK_INITIALIZER(lock)                                       \
206   {                                                                            \
207     { KMP_LOCK_FREE(futex), 0 }                                                \
208   }
209 
210 extern int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid);
211 extern int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid);
212 extern int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid);
213 extern void __kmp_init_futex_lock(kmp_futex_lock_t *lck);
214 extern void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck);
215 
216 extern int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck,
217                                            kmp_int32 gtid);
218 extern int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid);
219 extern int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck,
220                                            kmp_int32 gtid);
221 extern void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck);
222 extern void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck);
223 
224 #endif // KMP_USE_FUTEX
225 
226 // ----------------------------------------------------------------------------
227 // Ticket locks.
228 
229 #ifdef __cplusplus
230 
231 #ifdef _MSC_VER
232 // MSVC won't allow use of std::atomic<> in a union since it has non-trivial
233 // copy constructor.
234 
235 struct kmp_base_ticket_lock {
236   // `initialized' must be the first entry in the lock data structure!
237   std::atomic_bool initialized;
238   volatile union kmp_ticket_lock *self; // points to the lock union
239   ident_t const *location; // Source code location of omp_init_lock().
240   std::atomic_uint
241       next_ticket; // ticket number to give to next thread which acquires
242   std::atomic_uint now_serving; // ticket number for thread which holds the lock
243   std::atomic_int owner_id; // (gtid+1) of owning thread, 0 if unlocked
244   std::atomic_int depth_locked; // depth locked, for nested locks only
245   kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
246 };
247 #else
248 struct kmp_base_ticket_lock {
249   // `initialized' must be the first entry in the lock data structure!
250   std::atomic<bool> initialized;
251   volatile union kmp_ticket_lock *self; // points to the lock union
252   ident_t const *location; // Source code location of omp_init_lock().
253   std::atomic<unsigned>
254       next_ticket; // ticket number to give to next thread which acquires
255   std::atomic<unsigned>
256       now_serving; // ticket number for thread which holds the lock
257   std::atomic<int> owner_id; // (gtid+1) of owning thread, 0 if unlocked
258   std::atomic<int> depth_locked; // depth locked, for nested locks only
259   kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
260 };
261 #endif
262 
263 #else // __cplusplus
264 
265 struct kmp_base_ticket_lock;
266 
267 #endif // !__cplusplus
268 
269 typedef struct kmp_base_ticket_lock kmp_base_ticket_lock_t;
270 
271 union KMP_ALIGN_CACHE kmp_ticket_lock {
272   kmp_base_ticket_lock_t
273       lk; // This field must be first to allow static initializing.
274   kmp_lock_pool_t pool;
275   double lk_align; // use worst case alignment
276   char lk_pad[KMP_PAD(kmp_base_ticket_lock_t, CACHE_LINE)];
277 };
278 
279 typedef union kmp_ticket_lock kmp_ticket_lock_t;
280 
281 // Static initializer for simple ticket lock variables. Usage:
282 //    kmp_ticket_lock_t xlock = KMP_TICKET_LOCK_INITIALIZER( xlock );
283 // Note the macro argument. It is important to make var properly initialized.
284 #define KMP_TICKET_LOCK_INITIALIZER(lock)                                      \
285   {                                                                            \
286     { true, &(lock), NULL, 0U, 0U, 0, -1 }                                     \
287   }
288 
289 extern int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid);
290 extern int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid);
291 extern int __kmp_test_ticket_lock_with_cheks(kmp_ticket_lock_t *lck,
292                                              kmp_int32 gtid);
293 extern int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid);
294 extern void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck);
295 extern void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck);
296 
297 extern int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck,
298                                             kmp_int32 gtid);
299 extern int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck,
300                                          kmp_int32 gtid);
301 extern int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck,
302                                             kmp_int32 gtid);
303 extern void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck);
304 extern void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck);
305 
306 // ----------------------------------------------------------------------------
307 // Queuing locks.
308 
309 #if KMP_USE_ADAPTIVE_LOCKS
310 
311 struct kmp_adaptive_lock_info;
312 
313 typedef struct kmp_adaptive_lock_info kmp_adaptive_lock_info_t;
314 
315 #if KMP_DEBUG_ADAPTIVE_LOCKS
316 
317 struct kmp_adaptive_lock_statistics {
318   /* So we can get stats from locks that haven't been destroyed. */
319   kmp_adaptive_lock_info_t *next;
320   kmp_adaptive_lock_info_t *prev;
321 
322   /* Other statistics */
323   kmp_uint32 successfulSpeculations;
324   kmp_uint32 hardFailedSpeculations;
325   kmp_uint32 softFailedSpeculations;
326   kmp_uint32 nonSpeculativeAcquires;
327   kmp_uint32 nonSpeculativeAcquireAttempts;
328   kmp_uint32 lemmingYields;
329 };
330 
331 typedef struct kmp_adaptive_lock_statistics kmp_adaptive_lock_statistics_t;
332 
333 extern void __kmp_print_speculative_stats();
334 extern void __kmp_init_speculative_stats();
335 
336 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
337 
338 struct kmp_adaptive_lock_info {
339   /* Values used for adaptivity.
340      Although these are accessed from multiple threads we don't access them
341      atomically, because if we miss updates it probably doesn't matter much. (It
342      just affects our decision about whether to try speculation on the lock). */
343   kmp_uint32 volatile badness;
344   kmp_uint32 volatile acquire_attempts;
345   /* Parameters of the lock. */
346   kmp_uint32 max_badness;
347   kmp_uint32 max_soft_retries;
348 
349 #if KMP_DEBUG_ADAPTIVE_LOCKS
350   kmp_adaptive_lock_statistics_t volatile stats;
351 #endif
352 };
353 
354 #endif // KMP_USE_ADAPTIVE_LOCKS
355 
356 struct kmp_base_queuing_lock {
357 
358   //  `initialized' must be the first entry in the lock data structure!
359   volatile union kmp_queuing_lock
360       *initialized; // Points to the lock union if in initialized state.
361 
362   ident_t const *location; // Source code location of omp_init_lock().
363 
364   KMP_ALIGN(8) // tail_id  must be 8-byte aligned!
365 
366   volatile kmp_int32
367       tail_id; // (gtid+1) of thread at tail of wait queue, 0 if empty
368   // Must be no padding here since head/tail used in 8-byte CAS
369   volatile kmp_int32
370       head_id; // (gtid+1) of thread at head of wait queue, 0 if empty
371   // Decl order assumes little endian
372   // bakery-style lock
373   volatile kmp_uint32
374       next_ticket; // ticket number to give to next thread which acquires
375   volatile kmp_uint32
376       now_serving; // ticket number for thread which holds the lock
377   volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked
378   kmp_int32 depth_locked; // depth locked, for nested locks only
379 
380   kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
381 };
382 
383 typedef struct kmp_base_queuing_lock kmp_base_queuing_lock_t;
384 
385 KMP_BUILD_ASSERT(offsetof(kmp_base_queuing_lock_t, tail_id) % 8 == 0);
386 
387 union KMP_ALIGN_CACHE kmp_queuing_lock {
388   kmp_base_queuing_lock_t
389       lk; // This field must be first to allow static initializing.
390   kmp_lock_pool_t pool;
391   double lk_align; // use worst case alignment
392   char lk_pad[KMP_PAD(kmp_base_queuing_lock_t, CACHE_LINE)];
393 };
394 
395 typedef union kmp_queuing_lock kmp_queuing_lock_t;
396 
397 extern int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid);
398 extern int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid);
399 extern int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid);
400 extern void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck);
401 extern void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck);
402 
403 extern int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck,
404                                              kmp_int32 gtid);
405 extern int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck,
406                                           kmp_int32 gtid);
407 extern int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck,
408                                              kmp_int32 gtid);
409 extern void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck);
410 extern void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck);
411 
412 #if KMP_USE_ADAPTIVE_LOCKS
413 
414 // ----------------------------------------------------------------------------
415 // Adaptive locks.
416 struct kmp_base_adaptive_lock {
417   kmp_base_queuing_lock qlk;
418   KMP_ALIGN(CACHE_LINE)
419   kmp_adaptive_lock_info_t
420       adaptive; // Information for the speculative adaptive lock
421 };
422 
423 typedef struct kmp_base_adaptive_lock kmp_base_adaptive_lock_t;
424 
425 union KMP_ALIGN_CACHE kmp_adaptive_lock {
426   kmp_base_adaptive_lock_t lk;
427   kmp_lock_pool_t pool;
428   double lk_align;
429   char lk_pad[KMP_PAD(kmp_base_adaptive_lock_t, CACHE_LINE)];
430 };
431 typedef union kmp_adaptive_lock kmp_adaptive_lock_t;
432 
433 #define GET_QLK_PTR(l) ((kmp_queuing_lock_t *)&(l)->lk.qlk)
434 
435 #endif // KMP_USE_ADAPTIVE_LOCKS
436 
437 // ----------------------------------------------------------------------------
438 // DRDPA ticket locks.
439 struct kmp_base_drdpa_lock {
440   // All of the fields on the first cache line are only written when
441   // initializing or reconfiguring the lock.  These are relatively rare
442   // operations, so data from the first cache line will usually stay resident in
443   // the cache of each thread trying to acquire the lock.
444   //
445   // initialized must be the first entry in the lock data structure!
446   KMP_ALIGN_CACHE
447 
448   volatile union kmp_drdpa_lock
449       *initialized; // points to the lock union if in initialized state
450   ident_t const *location; // Source code location of omp_init_lock().
451   std::atomic<std::atomic<kmp_uint64> *> polls;
452   std::atomic<kmp_uint64> mask; // is 2**num_polls-1 for mod op
453   kmp_uint64 cleanup_ticket; // thread with cleanup ticket
454   std::atomic<kmp_uint64> *old_polls; // will deallocate old_polls
455   kmp_uint32 num_polls; // must be power of 2
456 
457   // next_ticket it needs to exist in a separate cache line, as it is
458   // invalidated every time a thread takes a new ticket.
459   KMP_ALIGN_CACHE
460 
461   std::atomic<kmp_uint64> next_ticket;
462 
463   // now_serving is used to store our ticket value while we hold the lock. It
464   // has a slightly different meaning in the DRDPA ticket locks (where it is
465   // written by the acquiring thread) than it does in the simple ticket locks
466   // (where it is written by the releasing thread).
467   //
468   // Since now_serving is only read and written in the critical section,
469   // it is non-volatile, but it needs to exist on a separate cache line,
470   // as it is invalidated at every lock acquire.
471   //
472   // Likewise, the vars used for nested locks (owner_id and depth_locked) are
473   // only written by the thread owning the lock, so they are put in this cache
474   // line.  owner_id is read by other threads, so it must be declared volatile.
475   KMP_ALIGN_CACHE
476   kmp_uint64 now_serving; // doesn't have to be volatile
477   volatile kmp_uint32 owner_id; // (gtid+1) of owning thread, 0 if unlocked
478   kmp_int32 depth_locked; // depth locked
479   kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
480 };
481 
482 typedef struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t;
483 
484 union KMP_ALIGN_CACHE kmp_drdpa_lock {
485   kmp_base_drdpa_lock_t
486       lk; // This field must be first to allow static initializing. */
487   kmp_lock_pool_t pool;
488   double lk_align; // use worst case alignment
489   char lk_pad[KMP_PAD(kmp_base_drdpa_lock_t, CACHE_LINE)];
490 };
491 
492 typedef union kmp_drdpa_lock kmp_drdpa_lock_t;
493 
494 extern int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid);
495 extern int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid);
496 extern int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid);
497 extern void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck);
498 extern void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck);
499 
500 extern int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck,
501                                            kmp_int32 gtid);
502 extern int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid);
503 extern int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck,
504                                            kmp_int32 gtid);
505 extern void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck);
506 extern void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck);
507 
508 // ============================================================================
509 // Lock purposes.
510 // ============================================================================
511 
512 // Bootstrap locks.
513 //
514 // Bootstrap locks -- very few locks used at library initialization time.
515 // Bootstrap locks are currently implemented as ticket locks.
516 // They could also be implemented as test and set lock, but cannot be
517 // implemented with other lock kinds as they require gtids which are not
518 // available at initialization time.
519 
520 typedef kmp_ticket_lock_t kmp_bootstrap_lock_t;
521 
522 #define KMP_BOOTSTRAP_LOCK_INITIALIZER(lock) KMP_TICKET_LOCK_INITIALIZER((lock))
523 #define KMP_BOOTSTRAP_LOCK_INIT(lock)                                          \
524   kmp_bootstrap_lock_t lock = KMP_TICKET_LOCK_INITIALIZER(lock)
525 
526 static inline int __kmp_acquire_bootstrap_lock(kmp_bootstrap_lock_t *lck) {
527   return __kmp_acquire_ticket_lock(lck, KMP_GTID_DNE);
528 }
529 
530 static inline int __kmp_test_bootstrap_lock(kmp_bootstrap_lock_t *lck) {
531   return __kmp_test_ticket_lock(lck, KMP_GTID_DNE);
532 }
533 
534 static inline void __kmp_release_bootstrap_lock(kmp_bootstrap_lock_t *lck) {
535   __kmp_release_ticket_lock(lck, KMP_GTID_DNE);
536 }
537 
538 static inline void __kmp_init_bootstrap_lock(kmp_bootstrap_lock_t *lck) {
539   __kmp_init_ticket_lock(lck);
540 }
541 
542 static inline void __kmp_destroy_bootstrap_lock(kmp_bootstrap_lock_t *lck) {
543   __kmp_destroy_ticket_lock(lck);
544 }
545 
546 // Internal RTL locks.
547 //
548 // Internal RTL locks are also implemented as ticket locks, for now.
549 //
550 // FIXME - We should go through and figure out which lock kind works best for
551 // each internal lock, and use the type declaration and function calls for
552 // that explicit lock kind (and get rid of this section).
553 
554 typedef kmp_ticket_lock_t kmp_lock_t;
555 
556 #define KMP_LOCK_INIT(lock) kmp_lock_t lock = KMP_TICKET_LOCK_INITIALIZER(lock)
557 
558 static inline int __kmp_acquire_lock(kmp_lock_t *lck, kmp_int32 gtid) {
559   return __kmp_acquire_ticket_lock(lck, gtid);
560 }
561 
562 static inline int __kmp_test_lock(kmp_lock_t *lck, kmp_int32 gtid) {
563   return __kmp_test_ticket_lock(lck, gtid);
564 }
565 
566 static inline void __kmp_release_lock(kmp_lock_t *lck, kmp_int32 gtid) {
567   __kmp_release_ticket_lock(lck, gtid);
568 }
569 
570 static inline void __kmp_init_lock(kmp_lock_t *lck) {
571   __kmp_init_ticket_lock(lck);
572 }
573 
574 static inline void __kmp_destroy_lock(kmp_lock_t *lck) {
575   __kmp_destroy_ticket_lock(lck);
576 }
577 
578 // User locks.
579 //
580 // Do not allocate objects of type union kmp_user_lock!!! This will waste space
581 // unless __kmp_user_lock_kind == lk_drdpa. Instead, check the value of
582 // __kmp_user_lock_kind and allocate objects of the type of the appropriate
583 // union member, and cast their addresses to kmp_user_lock_p.
584 
585 enum kmp_lock_kind {
586   lk_default = 0,
587   lk_tas,
588 #if KMP_USE_FUTEX
589   lk_futex,
590 #endif
591 #if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX
592   lk_hle,
593   lk_rtm_queuing,
594   lk_rtm_spin,
595 #endif
596   lk_ticket,
597   lk_queuing,
598   lk_drdpa,
599 #if KMP_USE_ADAPTIVE_LOCKS
600   lk_adaptive
601 #endif // KMP_USE_ADAPTIVE_LOCKS
602 };
603 
604 typedef enum kmp_lock_kind kmp_lock_kind_t;
605 
606 extern kmp_lock_kind_t __kmp_user_lock_kind;
607 
608 union kmp_user_lock {
609   kmp_tas_lock_t tas;
610 #if KMP_USE_FUTEX
611   kmp_futex_lock_t futex;
612 #endif
613   kmp_ticket_lock_t ticket;
614   kmp_queuing_lock_t queuing;
615   kmp_drdpa_lock_t drdpa;
616 #if KMP_USE_ADAPTIVE_LOCKS
617   kmp_adaptive_lock_t adaptive;
618 #endif // KMP_USE_ADAPTIVE_LOCKS
619   kmp_lock_pool_t pool;
620 };
621 
622 typedef union kmp_user_lock *kmp_user_lock_p;
623 
624 #if !KMP_USE_DYNAMIC_LOCK
625 
626 extern size_t __kmp_base_user_lock_size;
627 extern size_t __kmp_user_lock_size;
628 
629 extern kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck);
630 
631 static inline kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck) {
632   KMP_DEBUG_ASSERT(__kmp_get_user_lock_owner_ != NULL);
633   return (*__kmp_get_user_lock_owner_)(lck);
634 }
635 
636 extern int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
637                                                    kmp_int32 gtid);
638 
639 #if KMP_OS_LINUX &&                                                            \
640     (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
641 
642 #define __kmp_acquire_user_lock_with_checks(lck, gtid)                         \
643   if (__kmp_user_lock_kind == lk_tas) {                                        \
644     if (__kmp_env_consistency_check) {                                         \
645       char const *const func = "omp_set_lock";                                 \
646       if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&                       \
647           lck->tas.lk.depth_locked != -1) {                                    \
648         KMP_FATAL(LockNestableUsedAsSimple, func);                             \
649       }                                                                        \
650       if ((gtid >= 0) && (lck->tas.lk.poll - 1 == gtid)) {                     \
651         KMP_FATAL(LockIsAlreadyOwned, func);                                   \
652       }                                                                        \
653     }                                                                          \
654     if (lck->tas.lk.poll != 0 ||                                               \
655         !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) {     \
656       kmp_uint32 spins;                                                        \
657       kmp_uint64 time;                                                         \
658       KMP_FSYNC_PREPARE(lck);                                                  \
659       KMP_INIT_YIELD(spins);                                                   \
660       KMP_INIT_BACKOFF(time);                                                  \
661       do {                                                                     \
662         KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);                              \
663       } while (                                                                \
664           lck->tas.lk.poll != 0 ||                                             \
665           !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1));    \
666     }                                                                          \
667     KMP_FSYNC_ACQUIRED(lck);                                                   \
668   } else {                                                                     \
669     KMP_DEBUG_ASSERT(__kmp_acquire_user_lock_with_checks_ != NULL);            \
670     (*__kmp_acquire_user_lock_with_checks_)(lck, gtid);                        \
671   }
672 
673 #else
674 static inline int __kmp_acquire_user_lock_with_checks(kmp_user_lock_p lck,
675                                                       kmp_int32 gtid) {
676   KMP_DEBUG_ASSERT(__kmp_acquire_user_lock_with_checks_ != NULL);
677   return (*__kmp_acquire_user_lock_with_checks_)(lck, gtid);
678 }
679 #endif
680 
681 extern int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,
682                                                 kmp_int32 gtid);
683 
684 #if KMP_OS_LINUX &&                                                            \
685     (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
686 
687 #include "kmp_i18n.h" /* AC: KMP_FATAL definition */
688 extern int __kmp_env_consistency_check; /* AC: copy from kmp.h here */
689 static inline int __kmp_test_user_lock_with_checks(kmp_user_lock_p lck,
690                                                    kmp_int32 gtid) {
691   if (__kmp_user_lock_kind == lk_tas) {
692     if (__kmp_env_consistency_check) {
693       char const *const func = "omp_test_lock";
694       if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
695           lck->tas.lk.depth_locked != -1) {
696         KMP_FATAL(LockNestableUsedAsSimple, func);
697       }
698     }
699     return ((lck->tas.lk.poll == 0) &&
700             __kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1));
701   } else {
702     KMP_DEBUG_ASSERT(__kmp_test_user_lock_with_checks_ != NULL);
703     return (*__kmp_test_user_lock_with_checks_)(lck, gtid);
704   }
705 }
706 #else
707 static inline int __kmp_test_user_lock_with_checks(kmp_user_lock_p lck,
708                                                    kmp_int32 gtid) {
709   KMP_DEBUG_ASSERT(__kmp_test_user_lock_with_checks_ != NULL);
710   return (*__kmp_test_user_lock_with_checks_)(lck, gtid);
711 }
712 #endif
713 
714 extern int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,
715                                                    kmp_int32 gtid);
716 
717 static inline void __kmp_release_user_lock_with_checks(kmp_user_lock_p lck,
718                                                        kmp_int32 gtid) {
719   KMP_DEBUG_ASSERT(__kmp_release_user_lock_with_checks_ != NULL);
720   (*__kmp_release_user_lock_with_checks_)(lck, gtid);
721 }
722 
723 extern void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck);
724 
725 static inline void __kmp_init_user_lock_with_checks(kmp_user_lock_p lck) {
726   KMP_DEBUG_ASSERT(__kmp_init_user_lock_with_checks_ != NULL);
727   (*__kmp_init_user_lock_with_checks_)(lck);
728 }
729 
730 // We need a non-checking version of destroy lock for when the RTL is
731 // doing the cleanup as it can't always tell if the lock is nested or not.
732 extern void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck);
733 
734 static inline void __kmp_destroy_user_lock(kmp_user_lock_p lck) {
735   KMP_DEBUG_ASSERT(__kmp_destroy_user_lock_ != NULL);
736   (*__kmp_destroy_user_lock_)(lck);
737 }
738 
739 extern void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck);
740 
741 static inline void __kmp_destroy_user_lock_with_checks(kmp_user_lock_p lck) {
742   KMP_DEBUG_ASSERT(__kmp_destroy_user_lock_with_checks_ != NULL);
743   (*__kmp_destroy_user_lock_with_checks_)(lck);
744 }
745 
746 extern int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
747                                                           kmp_int32 gtid);
748 
749 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
750 
751 #define __kmp_acquire_nested_user_lock_with_checks(lck, gtid, depth)           \
752   if (__kmp_user_lock_kind == lk_tas) {                                        \
753     if (__kmp_env_consistency_check) {                                         \
754       char const *const func = "omp_set_nest_lock";                            \
755       if ((sizeof(kmp_tas_lock_t) <= OMP_NEST_LOCK_T_SIZE) &&                  \
756           lck->tas.lk.depth_locked == -1) {                                    \
757         KMP_FATAL(LockSimpleUsedAsNestable, func);                             \
758       }                                                                        \
759     }                                                                          \
760     if (lck->tas.lk.poll - 1 == gtid) {                                        \
761       lck->tas.lk.depth_locked += 1;                                           \
762       *depth = KMP_LOCK_ACQUIRED_NEXT;                                         \
763     } else {                                                                   \
764       if ((lck->tas.lk.poll != 0) ||                                           \
765           !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) {   \
766         kmp_uint32 spins;                                                      \
767         kmp_uint64 time;                                                       \
768         KMP_FSYNC_PREPARE(lck);                                                \
769         KMP_INIT_YIELD(spins);                                                 \
770         KMP_INIT_BACKOFF(time);                                                \
771         do {                                                                   \
772           KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);                            \
773         } while (                                                              \
774             (lck->tas.lk.poll != 0) ||                                         \
775             !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1));  \
776       }                                                                        \
777       lck->tas.lk.depth_locked = 1;                                            \
778       *depth = KMP_LOCK_ACQUIRED_FIRST;                                        \
779     }                                                                          \
780     KMP_FSYNC_ACQUIRED(lck);                                                   \
781   } else {                                                                     \
782     KMP_DEBUG_ASSERT(__kmp_acquire_nested_user_lock_with_checks_ != NULL);     \
783     *depth = (*__kmp_acquire_nested_user_lock_with_checks_)(lck, gtid);        \
784   }
785 
786 #else
787 static inline void
788 __kmp_acquire_nested_user_lock_with_checks(kmp_user_lock_p lck, kmp_int32 gtid,
789                                            int *depth) {
790   KMP_DEBUG_ASSERT(__kmp_acquire_nested_user_lock_with_checks_ != NULL);
791   *depth = (*__kmp_acquire_nested_user_lock_with_checks_)(lck, gtid);
792 }
793 #endif
794 
795 extern int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
796                                                        kmp_int32 gtid);
797 
798 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
799 static inline int __kmp_test_nested_user_lock_with_checks(kmp_user_lock_p lck,
800                                                           kmp_int32 gtid) {
801   if (__kmp_user_lock_kind == lk_tas) {
802     int retval;
803     if (__kmp_env_consistency_check) {
804       char const *const func = "omp_test_nest_lock";
805       if ((sizeof(kmp_tas_lock_t) <= OMP_NEST_LOCK_T_SIZE) &&
806           lck->tas.lk.depth_locked == -1) {
807         KMP_FATAL(LockSimpleUsedAsNestable, func);
808       }
809     }
810     KMP_DEBUG_ASSERT(gtid >= 0);
811     if (lck->tas.lk.poll - 1 ==
812         gtid) { /* __kmp_get_tas_lock_owner( lck ) == gtid */
813       return ++lck->tas.lk.depth_locked; /* same owner, depth increased */
814     }
815     retval = ((lck->tas.lk.poll == 0) &&
816               __kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1));
817     if (retval) {
818       KMP_MB();
819       lck->tas.lk.depth_locked = 1;
820     }
821     return retval;
822   } else {
823     KMP_DEBUG_ASSERT(__kmp_test_nested_user_lock_with_checks_ != NULL);
824     return (*__kmp_test_nested_user_lock_with_checks_)(lck, gtid);
825   }
826 }
827 #else
828 static inline int __kmp_test_nested_user_lock_with_checks(kmp_user_lock_p lck,
829                                                           kmp_int32 gtid) {
830   KMP_DEBUG_ASSERT(__kmp_test_nested_user_lock_with_checks_ != NULL);
831   return (*__kmp_test_nested_user_lock_with_checks_)(lck, gtid);
832 }
833 #endif
834 
835 extern int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
836                                                           kmp_int32 gtid);
837 
838 static inline int
839 __kmp_release_nested_user_lock_with_checks(kmp_user_lock_p lck,
840                                            kmp_int32 gtid) {
841   KMP_DEBUG_ASSERT(__kmp_release_nested_user_lock_with_checks_ != NULL);
842   return (*__kmp_release_nested_user_lock_with_checks_)(lck, gtid);
843 }
844 
845 extern void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck);
846 
847 static inline void
848 __kmp_init_nested_user_lock_with_checks(kmp_user_lock_p lck) {
849   KMP_DEBUG_ASSERT(__kmp_init_nested_user_lock_with_checks_ != NULL);
850   (*__kmp_init_nested_user_lock_with_checks_)(lck);
851 }
852 
853 extern void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck);
854 
855 static inline void
856 __kmp_destroy_nested_user_lock_with_checks(kmp_user_lock_p lck) {
857   KMP_DEBUG_ASSERT(__kmp_destroy_nested_user_lock_with_checks_ != NULL);
858   (*__kmp_destroy_nested_user_lock_with_checks_)(lck);
859 }
860 
861 // user lock functions which do not necessarily exist for all lock kinds.
862 //
863 // The "set" functions usually have wrapper routines that check for a NULL set
864 // function pointer and call it if non-NULL.
865 //
866 // In some cases, it makes sense to have a "get" wrapper function check for a
867 // NULL get function pointer and return NULL / invalid value / error code if
868 // the function pointer is NULL.
869 //
870 // In other cases, the calling code really should differentiate between an
871 // unimplemented function and one that is implemented but returning NULL /
872 // invalid value.  If this is the case, no get function wrapper exists.
873 
874 extern int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck);
875 
876 // no set function; fields set during local allocation
877 
878 extern const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck);
879 
880 static inline const ident_t *__kmp_get_user_lock_location(kmp_user_lock_p lck) {
881   if (__kmp_get_user_lock_location_ != NULL) {
882     return (*__kmp_get_user_lock_location_)(lck);
883   } else {
884     return NULL;
885   }
886 }
887 
888 extern void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,
889                                              const ident_t *loc);
890 
891 static inline void __kmp_set_user_lock_location(kmp_user_lock_p lck,
892                                                 const ident_t *loc) {
893   if (__kmp_set_user_lock_location_ != NULL) {
894     (*__kmp_set_user_lock_location_)(lck, loc);
895   }
896 }
897 
898 extern kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck);
899 
900 extern void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,
901                                           kmp_lock_flags_t flags);
902 
903 static inline void __kmp_set_user_lock_flags(kmp_user_lock_p lck,
904                                              kmp_lock_flags_t flags) {
905   if (__kmp_set_user_lock_flags_ != NULL) {
906     (*__kmp_set_user_lock_flags_)(lck, flags);
907   }
908 }
909 
910 // The function which sets up all of the vtbl pointers for kmp_user_lock_t.
911 extern void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind);
912 
913 // Macros for binding user lock functions.
914 #define KMP_BIND_USER_LOCK_TEMPLATE(nest, kind, suffix)                        \
915   {                                                                            \
916     __kmp_acquire##nest##user_lock_with_checks_ = (int (*)(                    \
917         kmp_user_lock_p, kmp_int32))__kmp_acquire##nest##kind##_##suffix;      \
918     __kmp_release##nest##user_lock_with_checks_ = (int (*)(                    \
919         kmp_user_lock_p, kmp_int32))__kmp_release##nest##kind##_##suffix;      \
920     __kmp_test##nest##user_lock_with_checks_ = (int (*)(                       \
921         kmp_user_lock_p, kmp_int32))__kmp_test##nest##kind##_##suffix;         \
922     __kmp_init##nest##user_lock_with_checks_ =                                 \
923         (void (*)(kmp_user_lock_p))__kmp_init##nest##kind##_##suffix;          \
924     __kmp_destroy##nest##user_lock_with_checks_ =                              \
925         (void (*)(kmp_user_lock_p))__kmp_destroy##nest##kind##_##suffix;       \
926   }
927 
928 #define KMP_BIND_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock)
929 #define KMP_BIND_USER_LOCK_WITH_CHECKS(kind)                                   \
930   KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock_with_checks)
931 #define KMP_BIND_NESTED_USER_LOCK(kind)                                        \
932   KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock)
933 #define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind)                            \
934   KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock_with_checks)
935 
936 // User lock table & lock allocation
937 /* On 64-bit Linux* OS (and OS X*) GNU compiler allocates only 4 bytems memory
938    for lock variable, which is not enough to store a pointer, so we have to use
939    lock indexes instead of pointers and maintain lock table to map indexes to
940    pointers.
941 
942 
943    Note: The first element of the table is not a pointer to lock! It is a
944    pointer to previously allocated table (or NULL if it is the first table).
945 
946    Usage:
947 
948    if ( OMP_LOCK_T_SIZE < sizeof( <lock> ) ) { // or OMP_NEST_LOCK_T_SIZE
949      Lock table is fully utilized. User locks are indexes, so table is used on
950      user lock operation.
951      Note: it may be the case (lin_32) that we don't need to use a lock
952      table for regular locks, but do need the table for nested locks.
953    }
954    else {
955      Lock table initialized but not actually used.
956    }
957 */
958 
959 struct kmp_lock_table {
960   kmp_lock_index_t used; // Number of used elements
961   kmp_lock_index_t allocated; // Number of allocated elements
962   kmp_user_lock_p *table; // Lock table.
963 };
964 
965 typedef struct kmp_lock_table kmp_lock_table_t;
966 
967 extern kmp_lock_table_t __kmp_user_lock_table;
968 extern kmp_user_lock_p __kmp_lock_pool;
969 
970 struct kmp_block_of_locks {
971   struct kmp_block_of_locks *next_block;
972   void *locks;
973 };
974 
975 typedef struct kmp_block_of_locks kmp_block_of_locks_t;
976 
977 extern kmp_block_of_locks_t *__kmp_lock_blocks;
978 extern int __kmp_num_locks_in_block;
979 
980 extern kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock,
981                                                 kmp_int32 gtid,
982                                                 kmp_lock_flags_t flags);
983 extern void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
984                                  kmp_user_lock_p lck);
985 extern kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock,
986                                               char const *func);
987 extern void __kmp_cleanup_user_locks();
988 
989 #define KMP_CHECK_USER_LOCK_INIT()                                             \
990   {                                                                            \
991     if (!TCR_4(__kmp_init_user_locks)) {                                       \
992       __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);                         \
993       if (!TCR_4(__kmp_init_user_locks)) {                                     \
994         TCW_4(__kmp_init_user_locks, TRUE);                                    \
995       }                                                                        \
996       __kmp_release_bootstrap_lock(&__kmp_initz_lock);                         \
997     }                                                                          \
998   }
999 
1000 #endif // KMP_USE_DYNAMIC_LOCK
1001 
1002 #undef KMP_PAD
1003 #undef KMP_GTID_DNE
1004 
1005 #if KMP_USE_DYNAMIC_LOCK
1006 // KMP_USE_DYNAMIC_LOCK enables dynamic dispatch of lock functions without
1007 // breaking the current compatibility. Essential functionality of this new code
1008 // is dynamic dispatch, but it also implements (or enables implementation of)
1009 // hinted user lock and critical section which will be part of OMP 4.5 soon.
1010 //
1011 // Lock type can be decided at creation time (i.e., lock initialization), and
1012 // subsequent lock function call on the created lock object requires type
1013 // extraction and call through jump table using the extracted type. This type
1014 // information is stored in two different ways depending on the size of the lock
1015 // object, and we differentiate lock types by this size requirement - direct and
1016 // indirect locks.
1017 //
1018 // Direct locks:
1019 // A direct lock object fits into the space created by the compiler for an
1020 // omp_lock_t object, and TAS/Futex lock falls into this category. We use low
1021 // one byte of the lock object as the storage for the lock type, and appropriate
1022 // bit operation is required to access the data meaningful to the lock
1023 // algorithms. Also, to differentiate direct lock from indirect lock, 1 is
1024 // written to LSB of the lock object. The newly introduced "hle" lock is also a
1025 // direct lock.
1026 //
1027 // Indirect locks:
1028 // An indirect lock object requires more space than the compiler-generated
1029 // space, and it should be allocated from heap. Depending on the size of the
1030 // compiler-generated space for the lock (i.e., size of omp_lock_t), this
1031 // omp_lock_t object stores either the address of the heap-allocated indirect
1032 // lock (void * fits in the object) or an index to the indirect lock table entry
1033 // that holds the address. Ticket/Queuing/DRDPA/Adaptive lock falls into this
1034 // category, and the newly introduced "rtm" lock is also an indirect lock which
1035 // was implemented on top of the Queuing lock. When the omp_lock_t object holds
1036 // an index (not lock address), 0 is written to LSB to differentiate the lock
1037 // from a direct lock, and the remaining part is the actual index to the
1038 // indirect lock table.
1039 
1040 #include <stdint.h> // for uintptr_t
1041 
1042 // Shortcuts
1043 #define KMP_USE_INLINED_TAS                                                    \
1044   (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)) && 1
1045 #define KMP_USE_INLINED_FUTEX KMP_USE_FUTEX && 0
1046 
1047 // List of lock definitions; all nested locks are indirect locks.
1048 // hle lock is xchg lock prefixed with XACQUIRE/XRELEASE.
1049 // All nested locks are indirect lock types.
1050 #if KMP_USE_TSX
1051 #if KMP_USE_FUTEX
1052 #define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) m(hle, a) m(rtm_spin, a)
1053 #define KMP_FOREACH_I_LOCK(m, a)                                               \
1054   m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm_queuing, a)      \
1055       m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a)                  \
1056           m(nested_queuing, a) m(nested_drdpa, a)
1057 #else
1058 #define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(hle, a) m(rtm_spin, a)
1059 #define KMP_FOREACH_I_LOCK(m, a)                                               \
1060   m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm_queuing, a)      \
1061       m(nested_tas, a) m(nested_ticket, a) m(nested_queuing, a)                \
1062           m(nested_drdpa, a)
1063 #endif // KMP_USE_FUTEX
1064 #define KMP_LAST_D_LOCK lockseq_rtm_spin
1065 #else
1066 #if KMP_USE_FUTEX
1067 #define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a)
1068 #define KMP_FOREACH_I_LOCK(m, a)                                               \
1069   m(ticket, a) m(queuing, a) m(drdpa, a) m(nested_tas, a) m(nested_futex, a)   \
1070       m(nested_ticket, a) m(nested_queuing, a) m(nested_drdpa, a)
1071 #define KMP_LAST_D_LOCK lockseq_futex
1072 #else
1073 #define KMP_FOREACH_D_LOCK(m, a) m(tas, a)
1074 #define KMP_FOREACH_I_LOCK(m, a)                                               \
1075   m(ticket, a) m(queuing, a) m(drdpa, a) m(nested_tas, a) m(nested_ticket, a)  \
1076       m(nested_queuing, a) m(nested_drdpa, a)
1077 #define KMP_LAST_D_LOCK lockseq_tas
1078 #endif // KMP_USE_FUTEX
1079 #endif // KMP_USE_TSX
1080 
1081 // Information used in dynamic dispatch
1082 #define KMP_LOCK_SHIFT                                                         \
1083   8 // number of low bits to be used as tag for direct locks
1084 #define KMP_FIRST_D_LOCK lockseq_tas
1085 #define KMP_FIRST_I_LOCK lockseq_ticket
1086 #define KMP_LAST_I_LOCK lockseq_nested_drdpa
1087 #define KMP_NUM_I_LOCKS                                                        \
1088   (locktag_nested_drdpa + 1) // number of indirect lock types
1089 
1090 // Base type for dynamic locks.
1091 typedef kmp_uint32 kmp_dyna_lock_t;
1092 
1093 // Lock sequence that enumerates all lock kinds. Always make this enumeration
1094 // consistent with kmp_lockseq_t in the include directory.
1095 typedef enum {
1096   lockseq_indirect = 0,
1097 #define expand_seq(l, a) lockseq_##l,
1098   KMP_FOREACH_D_LOCK(expand_seq, 0) KMP_FOREACH_I_LOCK(expand_seq, 0)
1099 #undef expand_seq
1100 } kmp_dyna_lockseq_t;
1101 
1102 // Enumerates indirect lock tags.
1103 typedef enum {
1104 #define expand_tag(l, a) locktag_##l,
1105   KMP_FOREACH_I_LOCK(expand_tag, 0)
1106 #undef expand_tag
1107 } kmp_indirect_locktag_t;
1108 
1109 // Utility macros that extract information from lock sequences.
1110 #define KMP_IS_D_LOCK(seq)                                                     \
1111   ((seq) >= KMP_FIRST_D_LOCK && (seq) <= KMP_LAST_D_LOCK)
1112 #define KMP_IS_I_LOCK(seq)                                                     \
1113   ((seq) >= KMP_FIRST_I_LOCK && (seq) <= KMP_LAST_I_LOCK)
1114 #define KMP_GET_I_TAG(seq) (kmp_indirect_locktag_t)((seq)-KMP_FIRST_I_LOCK)
1115 #define KMP_GET_D_TAG(seq) ((seq) << 1 | 1)
1116 
1117 // Enumerates direct lock tags starting from indirect tag.
1118 typedef enum {
1119 #define expand_tag(l, a) locktag_##l = KMP_GET_D_TAG(lockseq_##l),
1120   KMP_FOREACH_D_LOCK(expand_tag, 0)
1121 #undef expand_tag
1122 } kmp_direct_locktag_t;
1123 
1124 // Indirect lock type
1125 typedef struct {
1126   kmp_user_lock_p lock;
1127   kmp_indirect_locktag_t type;
1128 } kmp_indirect_lock_t;
1129 
1130 // Function tables for direct locks. Set/unset/test differentiate functions
1131 // with/without consistency checking.
1132 extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t);
1133 extern void (**__kmp_direct_destroy)(kmp_dyna_lock_t *);
1134 extern int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32);
1135 extern int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32);
1136 extern int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32);
1137 
1138 // Function tables for indirect locks. Set/unset/test differentiate functions
1139 // with/without consistency checking.
1140 extern void (*__kmp_indirect_init[])(kmp_user_lock_p);
1141 extern void (**__kmp_indirect_destroy)(kmp_user_lock_p);
1142 extern int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32);
1143 extern int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32);
1144 extern int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32);
1145 
1146 // Extracts direct lock tag from a user lock pointer
1147 #define KMP_EXTRACT_D_TAG(l)                                                   \
1148   ((kmp_dyna_lock_t)((kmp_base_tas_lock_t *)(l))->poll &                       \
1149    ((1 << KMP_LOCK_SHIFT) - 1) &                                               \
1150    -((kmp_dyna_lock_t)((kmp_tas_lock_t *)(l))->lk.poll & 1))
1151 
1152 // Extracts indirect lock index from a user lock pointer
1153 #define KMP_EXTRACT_I_INDEX(l)                                                 \
1154   ((kmp_lock_index_t)((kmp_base_tas_lock_t *)(l))->poll >> 1)
1155 
1156 // Returns function pointer to the direct lock function with l (kmp_dyna_lock_t
1157 // *) and op (operation type).
1158 #define KMP_D_LOCK_FUNC(l, op) __kmp_direct_##op[KMP_EXTRACT_D_TAG(l)]
1159 
1160 // Returns function pointer to the indirect lock function with l
1161 // (kmp_indirect_lock_t *) and op (operation type).
1162 #define KMP_I_LOCK_FUNC(l, op)                                                 \
1163   __kmp_indirect_##op[((kmp_indirect_lock_t *)(l))->type]
1164 
1165 // Initializes a direct lock with the given lock pointer and lock sequence.
1166 #define KMP_INIT_D_LOCK(l, seq)                                                \
1167   __kmp_direct_init[KMP_GET_D_TAG(seq)]((kmp_dyna_lock_t *)l, seq)
1168 
1169 // Initializes an indirect lock with the given lock pointer and lock sequence.
1170 #define KMP_INIT_I_LOCK(l, seq)                                                \
1171   __kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq)
1172 
1173 // Returns "free" lock value for the given lock type.
1174 #define KMP_LOCK_FREE(type) (locktag_##type)
1175 
1176 // Returns "busy" lock value for the given lock teyp.
1177 #define KMP_LOCK_BUSY(v, type) ((v) << KMP_LOCK_SHIFT | locktag_##type)
1178 
1179 // Returns lock value after removing (shifting) lock tag.
1180 #define KMP_LOCK_STRIP(v) ((v) >> KMP_LOCK_SHIFT)
1181 
1182 // Initializes global states and data structures for managing dynamic user
1183 // locks.
1184 extern void __kmp_init_dynamic_user_locks();
1185 
1186 // Allocates and returns an indirect lock with the given indirect lock tag.
1187 extern kmp_indirect_lock_t *
1188 __kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t);
1189 
1190 // Cleans up global states and data structures for managing dynamic user locks.
1191 extern void __kmp_cleanup_indirect_user_locks();
1192 
1193 // Default user lock sequence when not using hinted locks.
1194 extern kmp_dyna_lockseq_t __kmp_user_lock_seq;
1195 
1196 // Jump table for "set lock location", available only for indirect locks.
1197 extern void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
1198                                                             const ident_t *);
1199 #define KMP_SET_I_LOCK_LOCATION(lck, loc)                                      \
1200   {                                                                            \
1201     if (__kmp_indirect_set_location[(lck)->type] != NULL)                      \
1202       __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc);              \
1203   }
1204 
1205 // Jump table for "set lock flags", available only for indirect locks.
1206 extern void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
1207                                                          kmp_lock_flags_t);
1208 #define KMP_SET_I_LOCK_FLAGS(lck, flag)                                        \
1209   {                                                                            \
1210     if (__kmp_indirect_set_flags[(lck)->type] != NULL)                         \
1211       __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag);                \
1212   }
1213 
1214 // Jump table for "get lock location", available only for indirect locks.
1215 extern const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(
1216     kmp_user_lock_p);
1217 #define KMP_GET_I_LOCK_LOCATION(lck)                                           \
1218   (__kmp_indirect_get_location[(lck)->type] != NULL                            \
1219        ? __kmp_indirect_get_location[(lck)->type]((lck)->lock)                 \
1220        : NULL)
1221 
1222 // Jump table for "get lock flags", available only for indirect locks.
1223 extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
1224     kmp_user_lock_p);
1225 #define KMP_GET_I_LOCK_FLAGS(lck)                                              \
1226   (__kmp_indirect_get_flags[(lck)->type] != NULL                               \
1227        ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock)                    \
1228        : NULL)
1229 
1230 // number of kmp_indirect_lock_t objects to be allocated together
1231 #define KMP_I_LOCK_CHUNK 1024
1232 // Keep at a power of 2 since it is used in multiplication & division
1233 KMP_BUILD_ASSERT(KMP_I_LOCK_CHUNK % 2 == 0);
1234 // number of row entries in the initial lock table
1235 #define KMP_I_LOCK_TABLE_INIT_NROW_PTRS 8
1236 
1237 // Lock table for indirect locks.
1238 typedef struct kmp_indirect_lock_table {
1239   kmp_indirect_lock_t **table; // blocks of indirect locks allocated
1240   kmp_uint32 nrow_ptrs; // number *table pointer entries in table
1241   kmp_lock_index_t next; // index to the next lock to be allocated
1242   struct kmp_indirect_lock_table *next_table;
1243 } kmp_indirect_lock_table_t;
1244 
1245 extern kmp_indirect_lock_table_t __kmp_i_lock_table;
1246 
1247 // Returns the indirect lock associated with the given index.
1248 // Returns nullptr if no lock at given index
1249 static inline kmp_indirect_lock_t *__kmp_get_i_lock(kmp_lock_index_t idx) {
1250   kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table;
1251   while (lock_table) {
1252     kmp_lock_index_t max_locks = lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK;
1253     if (idx < max_locks) {
1254       kmp_lock_index_t row = idx / KMP_I_LOCK_CHUNK;
1255       kmp_lock_index_t col = idx % KMP_I_LOCK_CHUNK;
1256       if (!lock_table->table[row] || idx >= lock_table->next)
1257         break;
1258       return &lock_table->table[row][col];
1259     }
1260     idx -= max_locks;
1261     lock_table = lock_table->next_table;
1262   }
1263   return nullptr;
1264 }
1265 
1266 // Number of locks in a lock block, which is fixed to "1" now.
1267 // TODO: No lock block implementation now. If we do support, we need to manage
1268 // lock block data structure for each indirect lock type.
1269 extern int __kmp_num_locks_in_block;
1270 
1271 // Fast lock table lookup without consistency checking
1272 #define KMP_LOOKUP_I_LOCK(l)                                                   \
1273   ((OMP_LOCK_T_SIZE < sizeof(void *))                                          \
1274        ? __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(l))                              \
1275        : *((kmp_indirect_lock_t **)(l)))
1276 
1277 // Used once in kmp_error.cpp
1278 extern kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32);
1279 
1280 #else // KMP_USE_DYNAMIC_LOCK
1281 
1282 #define KMP_LOCK_BUSY(v, type) (v)
1283 #define KMP_LOCK_FREE(type) 0
1284 #define KMP_LOCK_STRIP(v) (v)
1285 
1286 #endif // KMP_USE_DYNAMIC_LOCK
1287 
1288 // data structure for using backoff within spin locks.
1289 typedef struct {
1290   kmp_uint32 step; // current step
1291   kmp_uint32 max_backoff; // upper bound of outer delay loop
1292   kmp_uint32 min_tick; // size of inner delay loop in ticks (machine-dependent)
1293 } kmp_backoff_t;
1294 
1295 // Runtime's default backoff parameters
1296 extern kmp_backoff_t __kmp_spin_backoff_params;
1297 
1298 // Backoff function
1299 extern void __kmp_spin_backoff(kmp_backoff_t *);
1300 
1301 #ifdef __cplusplus
1302 } // extern "C"
1303 #endif // __cplusplus
1304 
1305 #endif /* KMP_LOCK_H */
1306