xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_lock.cpp (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1 /*
2  * kmp_lock.cpp -- lock-related functions
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <stddef.h>
14 #include <atomic>
15 
16 #include "kmp.h"
17 #include "kmp_i18n.h"
18 #include "kmp_io.h"
19 #include "kmp_itt.h"
20 #include "kmp_lock.h"
21 #include "kmp_wait_release.h"
22 #include "kmp_wrapper_getpid.h"
23 
24 #if KMP_USE_FUTEX
25 #include <sys/syscall.h>
26 #include <unistd.h>
27 // We should really include <futex.h>, but that causes compatibility problems on
28 // different Linux* OS distributions that either require that you include (or
29 // break when you try to include) <pci/types.h>. Since all we need is the two
30 // macros below (which are part of the kernel ABI, so can't change) we just
31 // define the constants here and don't include <futex.h>
32 #ifndef FUTEX_WAIT
33 #define FUTEX_WAIT 0
34 #endif
35 #ifndef FUTEX_WAKE
36 #define FUTEX_WAKE 1
37 #endif
38 #endif
39 
40 /* Implement spin locks for internal library use.             */
41 /* The algorithm implemented is Lamport's bakery lock [1974]. */
42 
43 void __kmp_validate_locks(void) {
44   int i;
45   kmp_uint32 x, y;
46 
47   /* Check to make sure unsigned arithmetic does wraps properly */
48   x = ~((kmp_uint32)0) - 2;
49   y = x - 2;
50 
51   for (i = 0; i < 8; ++i, ++x, ++y) {
52     kmp_uint32 z = (x - y);
53     KMP_ASSERT(z == 2);
54   }
55 
56   KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0);
57 }
58 
59 /* ------------------------------------------------------------------------ */
60 /* test and set locks */
61 
62 // For the non-nested locks, we can only assume that the first 4 bytes were
63 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
64 // compiler only allocates a 4 byte pointer on IA-32 architecture.  On
65 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
66 //
67 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
68 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
69 
70 static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) {
71   return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1;
72 }
73 
74 static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) {
75   return lck->lk.depth_locked != -1;
76 }
77 
78 __forceinline static int
79 __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
80   KMP_MB();
81 
82 #ifdef USE_LOCK_PROFILE
83   kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll);
84   if ((curr != 0) && (curr != gtid + 1))
85     __kmp_printf("LOCK CONTENTION: %p\n", lck);
86 /* else __kmp_printf( "." );*/
87 #endif /* USE_LOCK_PROFILE */
88 
89   kmp_int32 tas_free = KMP_LOCK_FREE(tas);
90   kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
91 
92   if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
93       __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
94     KMP_FSYNC_ACQUIRED(lck);
95     return KMP_LOCK_ACQUIRED_FIRST;
96   }
97 
98   kmp_uint32 spins;
99   KMP_FSYNC_PREPARE(lck);
100   KMP_INIT_YIELD(spins);
101   kmp_backoff_t backoff = __kmp_spin_backoff_params;
102   do {
103     __kmp_spin_backoff(&backoff);
104     KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
105   } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
106            !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
107   KMP_FSYNC_ACQUIRED(lck);
108   return KMP_LOCK_ACQUIRED_FIRST;
109 }
110 
111 int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
112   int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid);
113   return retval;
114 }
115 
116 static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck,
117                                               kmp_int32 gtid) {
118   char const *const func = "omp_set_lock";
119   if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
120       __kmp_is_tas_lock_nestable(lck)) {
121     KMP_FATAL(LockNestableUsedAsSimple, func);
122   }
123   if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) {
124     KMP_FATAL(LockIsAlreadyOwned, func);
125   }
126   return __kmp_acquire_tas_lock(lck, gtid);
127 }
128 
129 int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
130   kmp_int32 tas_free = KMP_LOCK_FREE(tas);
131   kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
132   if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
133       __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
134     KMP_FSYNC_ACQUIRED(lck);
135     return TRUE;
136   }
137   return FALSE;
138 }
139 
140 static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck,
141                                            kmp_int32 gtid) {
142   char const *const func = "omp_test_lock";
143   if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
144       __kmp_is_tas_lock_nestable(lck)) {
145     KMP_FATAL(LockNestableUsedAsSimple, func);
146   }
147   return __kmp_test_tas_lock(lck, gtid);
148 }
149 
150 int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
151   KMP_MB(); /* Flush all pending memory write invalidates.  */
152 
153   KMP_FSYNC_RELEASING(lck);
154   KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));
155   KMP_MB(); /* Flush all pending memory write invalidates.  */
156 
157   KMP_YIELD_OVERSUB();
158   return KMP_LOCK_RELEASED;
159 }
160 
161 static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck,
162                                               kmp_int32 gtid) {
163   char const *const func = "omp_unset_lock";
164   KMP_MB(); /* in case another processor initialized lock */
165   if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
166       __kmp_is_tas_lock_nestable(lck)) {
167     KMP_FATAL(LockNestableUsedAsSimple, func);
168   }
169   if (__kmp_get_tas_lock_owner(lck) == -1) {
170     KMP_FATAL(LockUnsettingFree, func);
171   }
172   if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) &&
173       (__kmp_get_tas_lock_owner(lck) != gtid)) {
174     KMP_FATAL(LockUnsettingSetByAnother, func);
175   }
176   return __kmp_release_tas_lock(lck, gtid);
177 }
178 
179 void __kmp_init_tas_lock(kmp_tas_lock_t *lck) {
180   lck->lk.poll = KMP_LOCK_FREE(tas);
181 }
182 
183 void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; }
184 
185 static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) {
186   char const *const func = "omp_destroy_lock";
187   if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
188       __kmp_is_tas_lock_nestable(lck)) {
189     KMP_FATAL(LockNestableUsedAsSimple, func);
190   }
191   if (__kmp_get_tas_lock_owner(lck) != -1) {
192     KMP_FATAL(LockStillOwned, func);
193   }
194   __kmp_destroy_tas_lock(lck);
195 }
196 
197 // nested test and set locks
198 
199 int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
200   KMP_DEBUG_ASSERT(gtid >= 0);
201 
202   if (__kmp_get_tas_lock_owner(lck) == gtid) {
203     lck->lk.depth_locked += 1;
204     return KMP_LOCK_ACQUIRED_NEXT;
205   } else {
206     __kmp_acquire_tas_lock_timed_template(lck, gtid);
207     lck->lk.depth_locked = 1;
208     return KMP_LOCK_ACQUIRED_FIRST;
209   }
210 }
211 
212 static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
213                                                      kmp_int32 gtid) {
214   char const *const func = "omp_set_nest_lock";
215   if (!__kmp_is_tas_lock_nestable(lck)) {
216     KMP_FATAL(LockSimpleUsedAsNestable, func);
217   }
218   return __kmp_acquire_nested_tas_lock(lck, gtid);
219 }
220 
221 int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
222   int retval;
223 
224   KMP_DEBUG_ASSERT(gtid >= 0);
225 
226   if (__kmp_get_tas_lock_owner(lck) == gtid) {
227     retval = ++lck->lk.depth_locked;
228   } else if (!__kmp_test_tas_lock(lck, gtid)) {
229     retval = 0;
230   } else {
231     KMP_MB();
232     retval = lck->lk.depth_locked = 1;
233   }
234   return retval;
235 }
236 
237 static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
238                                                   kmp_int32 gtid) {
239   char const *const func = "omp_test_nest_lock";
240   if (!__kmp_is_tas_lock_nestable(lck)) {
241     KMP_FATAL(LockSimpleUsedAsNestable, func);
242   }
243   return __kmp_test_nested_tas_lock(lck, gtid);
244 }
245 
246 int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
247   KMP_DEBUG_ASSERT(gtid >= 0);
248 
249   KMP_MB();
250   if (--(lck->lk.depth_locked) == 0) {
251     __kmp_release_tas_lock(lck, gtid);
252     return KMP_LOCK_RELEASED;
253   }
254   return KMP_LOCK_STILL_HELD;
255 }
256 
257 static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
258                                                      kmp_int32 gtid) {
259   char const *const func = "omp_unset_nest_lock";
260   KMP_MB(); /* in case another processor initialized lock */
261   if (!__kmp_is_tas_lock_nestable(lck)) {
262     KMP_FATAL(LockSimpleUsedAsNestable, func);
263   }
264   if (__kmp_get_tas_lock_owner(lck) == -1) {
265     KMP_FATAL(LockUnsettingFree, func);
266   }
267   if (__kmp_get_tas_lock_owner(lck) != gtid) {
268     KMP_FATAL(LockUnsettingSetByAnother, func);
269   }
270   return __kmp_release_nested_tas_lock(lck, gtid);
271 }
272 
273 void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) {
274   __kmp_init_tas_lock(lck);
275   lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
276 }
277 
278 void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) {
279   __kmp_destroy_tas_lock(lck);
280   lck->lk.depth_locked = 0;
281 }
282 
283 static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
284   char const *const func = "omp_destroy_nest_lock";
285   if (!__kmp_is_tas_lock_nestable(lck)) {
286     KMP_FATAL(LockSimpleUsedAsNestable, func);
287   }
288   if (__kmp_get_tas_lock_owner(lck) != -1) {
289     KMP_FATAL(LockStillOwned, func);
290   }
291   __kmp_destroy_nested_tas_lock(lck);
292 }
293 
294 #if KMP_USE_FUTEX
295 
296 /* ------------------------------------------------------------------------ */
297 /* futex locks */
298 
299 // futex locks are really just test and set locks, with a different method
300 // of handling contention.  They take the same amount of space as test and
301 // set locks, and are allocated the same way (i.e. use the area allocated by
302 // the compiler for non-nested locks / allocate nested locks on the heap).
303 
304 static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) {
305   return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1;
306 }
307 
308 static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) {
309   return lck->lk.depth_locked != -1;
310 }
311 
312 __forceinline static int
313 __kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) {
314   kmp_int32 gtid_code = (gtid + 1) << 1;
315 
316   KMP_MB();
317 
318 #ifdef USE_LOCK_PROFILE
319   kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll));
320   if ((curr != 0) && (curr != gtid_code))
321     __kmp_printf("LOCK CONTENTION: %p\n", lck);
322 /* else __kmp_printf( "." );*/
323 #endif /* USE_LOCK_PROFILE */
324 
325   KMP_FSYNC_PREPARE(lck);
326   KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
327                   lck, lck->lk.poll, gtid));
328 
329   kmp_int32 poll_val;
330 
331   while ((poll_val = KMP_COMPARE_AND_STORE_RET32(
332               &(lck->lk.poll), KMP_LOCK_FREE(futex),
333               KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {
334 
335     kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
336     KA_TRACE(
337         1000,
338         ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
339          lck, gtid, poll_val, cond));
340 
341     // NOTE: if you try to use the following condition for this branch
342     //
343     // if ( poll_val & 1 == 0 )
344     //
345     // Then the 12.0 compiler has a bug where the following block will
346     // always be skipped, regardless of the value of the LSB of poll_val.
347     if (!cond) {
348       // Try to set the lsb in the poll to indicate to the owner
349       // thread that they need to wake this thread up.
350       if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val,
351                                        poll_val | KMP_LOCK_BUSY(1, futex))) {
352         KA_TRACE(
353             1000,
354             ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
355              lck, lck->lk.poll, gtid));
356         continue;
357       }
358       poll_val |= KMP_LOCK_BUSY(1, futex);
359 
360       KA_TRACE(1000,
361                ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck,
362                 lck->lk.poll, gtid));
363     }
364 
365     KA_TRACE(
366         1000,
367         ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
368          lck, gtid, poll_val));
369 
370     long rc;
371     if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL,
372                       NULL, 0)) != 0) {
373       KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) "
374                       "failed (rc=%ld errno=%d)\n",
375                       lck, gtid, poll_val, rc, errno));
376       continue;
377     }
378 
379     KA_TRACE(1000,
380              ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
381               lck, gtid, poll_val));
382     // This thread has now done a successful futex wait call and was entered on
383     // the OS futex queue.  We must now perform a futex wake call when releasing
384     // the lock, as we have no idea how many other threads are in the queue.
385     gtid_code |= 1;
386   }
387 
388   KMP_FSYNC_ACQUIRED(lck);
389   KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
390                   lck->lk.poll, gtid));
391   return KMP_LOCK_ACQUIRED_FIRST;
392 }
393 
394 int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
395   int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid);
396   return retval;
397 }
398 
399 static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck,
400                                                 kmp_int32 gtid) {
401   char const *const func = "omp_set_lock";
402   if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
403       __kmp_is_futex_lock_nestable(lck)) {
404     KMP_FATAL(LockNestableUsedAsSimple, func);
405   }
406   if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) {
407     KMP_FATAL(LockIsAlreadyOwned, func);
408   }
409   return __kmp_acquire_futex_lock(lck, gtid);
410 }
411 
412 int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
413   if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex),
414                                   KMP_LOCK_BUSY((gtid + 1) << 1, futex))) {
415     KMP_FSYNC_ACQUIRED(lck);
416     return TRUE;
417   }
418   return FALSE;
419 }
420 
421 static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck,
422                                              kmp_int32 gtid) {
423   char const *const func = "omp_test_lock";
424   if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
425       __kmp_is_futex_lock_nestable(lck)) {
426     KMP_FATAL(LockNestableUsedAsSimple, func);
427   }
428   return __kmp_test_futex_lock(lck, gtid);
429 }
430 
431 int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
432   KMP_MB(); /* Flush all pending memory write invalidates.  */
433 
434   KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
435                   lck, lck->lk.poll, gtid));
436 
437   KMP_FSYNC_RELEASING(lck);
438 
439   kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex));
440 
441   KA_TRACE(1000,
442            ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
443             lck, gtid, poll_val));
444 
445   if (KMP_LOCK_STRIP(poll_val) & 1) {
446     KA_TRACE(1000,
447              ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
448               lck, gtid));
449     syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex),
450             NULL, NULL, 0);
451   }
452 
453   KMP_MB(); /* Flush all pending memory write invalidates.  */
454 
455   KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
456                   lck->lk.poll, gtid));
457 
458   KMP_YIELD_OVERSUB();
459   return KMP_LOCK_RELEASED;
460 }
461 
462 static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck,
463                                                 kmp_int32 gtid) {
464   char const *const func = "omp_unset_lock";
465   KMP_MB(); /* in case another processor initialized lock */
466   if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
467       __kmp_is_futex_lock_nestable(lck)) {
468     KMP_FATAL(LockNestableUsedAsSimple, func);
469   }
470   if (__kmp_get_futex_lock_owner(lck) == -1) {
471     KMP_FATAL(LockUnsettingFree, func);
472   }
473   if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) &&
474       (__kmp_get_futex_lock_owner(lck) != gtid)) {
475     KMP_FATAL(LockUnsettingSetByAnother, func);
476   }
477   return __kmp_release_futex_lock(lck, gtid);
478 }
479 
480 void __kmp_init_futex_lock(kmp_futex_lock_t *lck) {
481   TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex));
482 }
483 
484 void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; }
485 
486 static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) {
487   char const *const func = "omp_destroy_lock";
488   if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
489       __kmp_is_futex_lock_nestable(lck)) {
490     KMP_FATAL(LockNestableUsedAsSimple, func);
491   }
492   if (__kmp_get_futex_lock_owner(lck) != -1) {
493     KMP_FATAL(LockStillOwned, func);
494   }
495   __kmp_destroy_futex_lock(lck);
496 }
497 
498 // nested futex locks
499 
500 int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
501   KMP_DEBUG_ASSERT(gtid >= 0);
502 
503   if (__kmp_get_futex_lock_owner(lck) == gtid) {
504     lck->lk.depth_locked += 1;
505     return KMP_LOCK_ACQUIRED_NEXT;
506   } else {
507     __kmp_acquire_futex_lock_timed_template(lck, gtid);
508     lck->lk.depth_locked = 1;
509     return KMP_LOCK_ACQUIRED_FIRST;
510   }
511 }
512 
513 static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
514                                                        kmp_int32 gtid) {
515   char const *const func = "omp_set_nest_lock";
516   if (!__kmp_is_futex_lock_nestable(lck)) {
517     KMP_FATAL(LockSimpleUsedAsNestable, func);
518   }
519   return __kmp_acquire_nested_futex_lock(lck, gtid);
520 }
521 
522 int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
523   int retval;
524 
525   KMP_DEBUG_ASSERT(gtid >= 0);
526 
527   if (__kmp_get_futex_lock_owner(lck) == gtid) {
528     retval = ++lck->lk.depth_locked;
529   } else if (!__kmp_test_futex_lock(lck, gtid)) {
530     retval = 0;
531   } else {
532     KMP_MB();
533     retval = lck->lk.depth_locked = 1;
534   }
535   return retval;
536 }
537 
538 static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
539                                                     kmp_int32 gtid) {
540   char const *const func = "omp_test_nest_lock";
541   if (!__kmp_is_futex_lock_nestable(lck)) {
542     KMP_FATAL(LockSimpleUsedAsNestable, func);
543   }
544   return __kmp_test_nested_futex_lock(lck, gtid);
545 }
546 
547 int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
548   KMP_DEBUG_ASSERT(gtid >= 0);
549 
550   KMP_MB();
551   if (--(lck->lk.depth_locked) == 0) {
552     __kmp_release_futex_lock(lck, gtid);
553     return KMP_LOCK_RELEASED;
554   }
555   return KMP_LOCK_STILL_HELD;
556 }
557 
558 static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
559                                                        kmp_int32 gtid) {
560   char const *const func = "omp_unset_nest_lock";
561   KMP_MB(); /* in case another processor initialized lock */
562   if (!__kmp_is_futex_lock_nestable(lck)) {
563     KMP_FATAL(LockSimpleUsedAsNestable, func);
564   }
565   if (__kmp_get_futex_lock_owner(lck) == -1) {
566     KMP_FATAL(LockUnsettingFree, func);
567   }
568   if (__kmp_get_futex_lock_owner(lck) != gtid) {
569     KMP_FATAL(LockUnsettingSetByAnother, func);
570   }
571   return __kmp_release_nested_futex_lock(lck, gtid);
572 }
573 
574 void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) {
575   __kmp_init_futex_lock(lck);
576   lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
577 }
578 
579 void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) {
580   __kmp_destroy_futex_lock(lck);
581   lck->lk.depth_locked = 0;
582 }
583 
584 static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
585   char const *const func = "omp_destroy_nest_lock";
586   if (!__kmp_is_futex_lock_nestable(lck)) {
587     KMP_FATAL(LockSimpleUsedAsNestable, func);
588   }
589   if (__kmp_get_futex_lock_owner(lck) != -1) {
590     KMP_FATAL(LockStillOwned, func);
591   }
592   __kmp_destroy_nested_futex_lock(lck);
593 }
594 
595 #endif // KMP_USE_FUTEX
596 
597 /* ------------------------------------------------------------------------ */
598 /* ticket (bakery) locks */
599 
600 static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) {
601   return std::atomic_load_explicit(&lck->lk.owner_id,
602                                    std::memory_order_relaxed) -
603          1;
604 }
605 
606 static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) {
607   return std::atomic_load_explicit(&lck->lk.depth_locked,
608                                    std::memory_order_relaxed) != -1;
609 }
610 
611 static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) {
612   return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving,
613                                    std::memory_order_acquire) == my_ticket;
614 }
615 
616 __forceinline static int
617 __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck,
618                                          kmp_int32 gtid) {
619   kmp_uint32 my_ticket = std::atomic_fetch_add_explicit(
620       &lck->lk.next_ticket, 1U, std::memory_order_relaxed);
621 
622 #ifdef USE_LOCK_PROFILE
623   if (std::atomic_load_explicit(&lck->lk.now_serving,
624                                 std::memory_order_relaxed) != my_ticket)
625     __kmp_printf("LOCK CONTENTION: %p\n", lck);
626 /* else __kmp_printf( "." );*/
627 #endif /* USE_LOCK_PROFILE */
628 
629   if (std::atomic_load_explicit(&lck->lk.now_serving,
630                                 std::memory_order_acquire) == my_ticket) {
631     return KMP_LOCK_ACQUIRED_FIRST;
632   }
633   KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
634   return KMP_LOCK_ACQUIRED_FIRST;
635 }
636 
637 int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
638   int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid);
639   return retval;
640 }
641 
642 static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
643                                                  kmp_int32 gtid) {
644   char const *const func = "omp_set_lock";
645 
646   if (!std::atomic_load_explicit(&lck->lk.initialized,
647                                  std::memory_order_relaxed)) {
648     KMP_FATAL(LockIsUninitialized, func);
649   }
650   if (lck->lk.self != lck) {
651     KMP_FATAL(LockIsUninitialized, func);
652   }
653   if (__kmp_is_ticket_lock_nestable(lck)) {
654     KMP_FATAL(LockNestableUsedAsSimple, func);
655   }
656   if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) {
657     KMP_FATAL(LockIsAlreadyOwned, func);
658   }
659 
660   __kmp_acquire_ticket_lock(lck, gtid);
661 
662   std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
663                              std::memory_order_relaxed);
664   return KMP_LOCK_ACQUIRED_FIRST;
665 }
666 
667 int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
668   kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket,
669                                                    std::memory_order_relaxed);
670 
671   if (std::atomic_load_explicit(&lck->lk.now_serving,
672                                 std::memory_order_relaxed) == my_ticket) {
673     kmp_uint32 next_ticket = my_ticket + 1;
674     if (std::atomic_compare_exchange_strong_explicit(
675             &lck->lk.next_ticket, &my_ticket, next_ticket,
676             std::memory_order_acquire, std::memory_order_acquire)) {
677       return TRUE;
678     }
679   }
680   return FALSE;
681 }
682 
683 static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
684                                               kmp_int32 gtid) {
685   char const *const func = "omp_test_lock";
686 
687   if (!std::atomic_load_explicit(&lck->lk.initialized,
688                                  std::memory_order_relaxed)) {
689     KMP_FATAL(LockIsUninitialized, func);
690   }
691   if (lck->lk.self != lck) {
692     KMP_FATAL(LockIsUninitialized, func);
693   }
694   if (__kmp_is_ticket_lock_nestable(lck)) {
695     KMP_FATAL(LockNestableUsedAsSimple, func);
696   }
697 
698   int retval = __kmp_test_ticket_lock(lck, gtid);
699 
700   if (retval) {
701     std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
702                                std::memory_order_relaxed);
703   }
704   return retval;
705 }
706 
707 int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
708   kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket,
709                                                   std::memory_order_relaxed) -
710                         std::atomic_load_explicit(&lck->lk.now_serving,
711                                                   std::memory_order_relaxed);
712 
713   std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U,
714                                  std::memory_order_release);
715 
716   KMP_YIELD(distance >
717             (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
718   return KMP_LOCK_RELEASED;
719 }
720 
721 static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
722                                                  kmp_int32 gtid) {
723   char const *const func = "omp_unset_lock";
724 
725   if (!std::atomic_load_explicit(&lck->lk.initialized,
726                                  std::memory_order_relaxed)) {
727     KMP_FATAL(LockIsUninitialized, func);
728   }
729   if (lck->lk.self != lck) {
730     KMP_FATAL(LockIsUninitialized, func);
731   }
732   if (__kmp_is_ticket_lock_nestable(lck)) {
733     KMP_FATAL(LockNestableUsedAsSimple, func);
734   }
735   if (__kmp_get_ticket_lock_owner(lck) == -1) {
736     KMP_FATAL(LockUnsettingFree, func);
737   }
738   if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) &&
739       (__kmp_get_ticket_lock_owner(lck) != gtid)) {
740     KMP_FATAL(LockUnsettingSetByAnother, func);
741   }
742   std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
743   return __kmp_release_ticket_lock(lck, gtid);
744 }
745 
746 void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) {
747   lck->lk.location = NULL;
748   lck->lk.self = lck;
749   std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
750                              std::memory_order_relaxed);
751   std::atomic_store_explicit(&lck->lk.now_serving, 0U,
752                              std::memory_order_relaxed);
753   std::atomic_store_explicit(
754       &lck->lk.owner_id, 0,
755       std::memory_order_relaxed); // no thread owns the lock.
756   std::atomic_store_explicit(
757       &lck->lk.depth_locked, -1,
758       std::memory_order_relaxed); // -1 => not a nested lock.
759   std::atomic_store_explicit(&lck->lk.initialized, true,
760                              std::memory_order_release);
761 }
762 
763 void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) {
764   std::atomic_store_explicit(&lck->lk.initialized, false,
765                              std::memory_order_release);
766   lck->lk.self = NULL;
767   lck->lk.location = NULL;
768   std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
769                              std::memory_order_relaxed);
770   std::atomic_store_explicit(&lck->lk.now_serving, 0U,
771                              std::memory_order_relaxed);
772   std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
773   std::atomic_store_explicit(&lck->lk.depth_locked, -1,
774                              std::memory_order_relaxed);
775 }
776 
777 static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
778   char const *const func = "omp_destroy_lock";
779 
780   if (!std::atomic_load_explicit(&lck->lk.initialized,
781                                  std::memory_order_relaxed)) {
782     KMP_FATAL(LockIsUninitialized, func);
783   }
784   if (lck->lk.self != lck) {
785     KMP_FATAL(LockIsUninitialized, func);
786   }
787   if (__kmp_is_ticket_lock_nestable(lck)) {
788     KMP_FATAL(LockNestableUsedAsSimple, func);
789   }
790   if (__kmp_get_ticket_lock_owner(lck) != -1) {
791     KMP_FATAL(LockStillOwned, func);
792   }
793   __kmp_destroy_ticket_lock(lck);
794 }
795 
796 // nested ticket locks
797 
798 int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
799   KMP_DEBUG_ASSERT(gtid >= 0);
800 
801   if (__kmp_get_ticket_lock_owner(lck) == gtid) {
802     std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
803                                    std::memory_order_relaxed);
804     return KMP_LOCK_ACQUIRED_NEXT;
805   } else {
806     __kmp_acquire_ticket_lock_timed_template(lck, gtid);
807     std::atomic_store_explicit(&lck->lk.depth_locked, 1,
808                                std::memory_order_relaxed);
809     std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
810                                std::memory_order_relaxed);
811     return KMP_LOCK_ACQUIRED_FIRST;
812   }
813 }
814 
815 static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
816                                                         kmp_int32 gtid) {
817   char const *const func = "omp_set_nest_lock";
818 
819   if (!std::atomic_load_explicit(&lck->lk.initialized,
820                                  std::memory_order_relaxed)) {
821     KMP_FATAL(LockIsUninitialized, func);
822   }
823   if (lck->lk.self != lck) {
824     KMP_FATAL(LockIsUninitialized, func);
825   }
826   if (!__kmp_is_ticket_lock_nestable(lck)) {
827     KMP_FATAL(LockSimpleUsedAsNestable, func);
828   }
829   return __kmp_acquire_nested_ticket_lock(lck, gtid);
830 }
831 
832 int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
833   int retval;
834 
835   KMP_DEBUG_ASSERT(gtid >= 0);
836 
837   if (__kmp_get_ticket_lock_owner(lck) == gtid) {
838     retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
839                                             std::memory_order_relaxed) +
840              1;
841   } else if (!__kmp_test_ticket_lock(lck, gtid)) {
842     retval = 0;
843   } else {
844     std::atomic_store_explicit(&lck->lk.depth_locked, 1,
845                                std::memory_order_relaxed);
846     std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
847                                std::memory_order_relaxed);
848     retval = 1;
849   }
850   return retval;
851 }
852 
853 static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
854                                                      kmp_int32 gtid) {
855   char const *const func = "omp_test_nest_lock";
856 
857   if (!std::atomic_load_explicit(&lck->lk.initialized,
858                                  std::memory_order_relaxed)) {
859     KMP_FATAL(LockIsUninitialized, func);
860   }
861   if (lck->lk.self != lck) {
862     KMP_FATAL(LockIsUninitialized, func);
863   }
864   if (!__kmp_is_ticket_lock_nestable(lck)) {
865     KMP_FATAL(LockSimpleUsedAsNestable, func);
866   }
867   return __kmp_test_nested_ticket_lock(lck, gtid);
868 }
869 
870 int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
871   KMP_DEBUG_ASSERT(gtid >= 0);
872 
873   if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1,
874                                       std::memory_order_relaxed) -
875        1) == 0) {
876     std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
877     __kmp_release_ticket_lock(lck, gtid);
878     return KMP_LOCK_RELEASED;
879   }
880   return KMP_LOCK_STILL_HELD;
881 }
882 
883 static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
884                                                         kmp_int32 gtid) {
885   char const *const func = "omp_unset_nest_lock";
886 
887   if (!std::atomic_load_explicit(&lck->lk.initialized,
888                                  std::memory_order_relaxed)) {
889     KMP_FATAL(LockIsUninitialized, func);
890   }
891   if (lck->lk.self != lck) {
892     KMP_FATAL(LockIsUninitialized, func);
893   }
894   if (!__kmp_is_ticket_lock_nestable(lck)) {
895     KMP_FATAL(LockSimpleUsedAsNestable, func);
896   }
897   if (__kmp_get_ticket_lock_owner(lck) == -1) {
898     KMP_FATAL(LockUnsettingFree, func);
899   }
900   if (__kmp_get_ticket_lock_owner(lck) != gtid) {
901     KMP_FATAL(LockUnsettingSetByAnother, func);
902   }
903   return __kmp_release_nested_ticket_lock(lck, gtid);
904 }
905 
906 void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) {
907   __kmp_init_ticket_lock(lck);
908   std::atomic_store_explicit(&lck->lk.depth_locked, 0,
909                              std::memory_order_relaxed);
910   // >= 0 for nestable locks, -1 for simple locks
911 }
912 
913 void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) {
914   __kmp_destroy_ticket_lock(lck);
915   std::atomic_store_explicit(&lck->lk.depth_locked, 0,
916                              std::memory_order_relaxed);
917 }
918 
919 static void
920 __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
921   char const *const func = "omp_destroy_nest_lock";
922 
923   if (!std::atomic_load_explicit(&lck->lk.initialized,
924                                  std::memory_order_relaxed)) {
925     KMP_FATAL(LockIsUninitialized, func);
926   }
927   if (lck->lk.self != lck) {
928     KMP_FATAL(LockIsUninitialized, func);
929   }
930   if (!__kmp_is_ticket_lock_nestable(lck)) {
931     KMP_FATAL(LockSimpleUsedAsNestable, func);
932   }
933   if (__kmp_get_ticket_lock_owner(lck) != -1) {
934     KMP_FATAL(LockStillOwned, func);
935   }
936   __kmp_destroy_nested_ticket_lock(lck);
937 }
938 
939 // access functions to fields which don't exist for all lock kinds.
940 
941 static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) {
942   return lck->lk.location;
943 }
944 
945 static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck,
946                                            const ident_t *loc) {
947   lck->lk.location = loc;
948 }
949 
950 static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) {
951   return lck->lk.flags;
952 }
953 
954 static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck,
955                                         kmp_lock_flags_t flags) {
956   lck->lk.flags = flags;
957 }
958 
959 /* ------------------------------------------------------------------------ */
960 /* queuing locks */
961 
962 /* First the states
963    (head,tail) =              0, 0  means lock is unheld, nobody on queue
964                  UINT_MAX or -1, 0  means lock is held, nobody on queue
965                               h, h  means lock held or about to transition,
966                                     1 element on queue
967                               h, t  h <> t, means lock is held or about to
968                                     transition, >1 elements on queue
969 
970    Now the transitions
971       Acquire(0,0)  = -1 ,0
972       Release(0,0)  = Error
973       Acquire(-1,0) =  h ,h    h > 0
974       Release(-1,0) =  0 ,0
975       Acquire(h,h)  =  h ,t    h > 0, t > 0, h <> t
976       Release(h,h)  = -1 ,0    h > 0
977       Acquire(h,t)  =  h ,t'   h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
978       Release(h,t)  =  h',t    h > 0, t > 0, h <> t, h <> h', h' maybe = t
979 
980    And pictorially
981 
982            +-----+
983            | 0, 0|------- release -------> Error
984            +-----+
985              |  ^
986       acquire|  |release
987              |  |
988              |  |
989              v  |
990            +-----+
991            |-1, 0|
992            +-----+
993              |  ^
994       acquire|  |release
995              |  |
996              |  |
997              v  |
998            +-----+
999            | h, h|
1000            +-----+
1001              |  ^
1002       acquire|  |release
1003              |  |
1004              |  |
1005              v  |
1006            +-----+
1007            | h, t|----- acquire, release loopback ---+
1008            +-----+                                   |
1009                 ^                                    |
1010                 |                                    |
1011                 +------------------------------------+
1012  */
1013 
1014 #ifdef DEBUG_QUEUING_LOCKS
1015 
1016 /* Stuff for circular trace buffer */
1017 #define TRACE_BUF_ELE 1024
1018 static char traces[TRACE_BUF_ELE][128] = {0};
1019 static int tc = 0;
1020 #define TRACE_LOCK(X, Y)                                                       \
1021   KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y);
1022 #define TRACE_LOCK_T(X, Y, Z)                                                  \
1023   KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z);
1024 #define TRACE_LOCK_HT(X, Y, Z, Q)                                              \
1025   KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y,   \
1026                Z, Q);
1027 
1028 static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid,
1029                                     kmp_queuing_lock_t *lck, kmp_int32 head_id,
1030                                     kmp_int32 tail_id) {
1031   kmp_int32 t, i;
1032 
1033   __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n");
1034 
1035   i = tc % TRACE_BUF_ELE;
1036   __kmp_printf_no_lock("%s\n", traces[i]);
1037   i = (i + 1) % TRACE_BUF_ELE;
1038   while (i != (tc % TRACE_BUF_ELE)) {
1039     __kmp_printf_no_lock("%s", traces[i]);
1040     i = (i + 1) % TRACE_BUF_ELE;
1041   }
1042   __kmp_printf_no_lock("\n");
1043 
1044   __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, "
1045                        "next_wait:%d, head_id:%d, tail_id:%d\n",
1046                        gtid + 1, this_thr->th.th_spin_here,
1047                        this_thr->th.th_next_waiting, head_id, tail_id);
1048 
1049   __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id);
1050 
1051   if (lck->lk.head_id >= 1) {
1052     t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting;
1053     while (t > 0) {
1054       __kmp_printf_no_lock("-> %d ", t);
1055       t = __kmp_threads[t - 1]->th.th_next_waiting;
1056     }
1057   }
1058   __kmp_printf_no_lock(";  tail: %d ", lck->lk.tail_id);
1059   __kmp_printf_no_lock("\n\n");
1060 }
1061 
1062 #endif /* DEBUG_QUEUING_LOCKS */
1063 
1064 static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) {
1065   return TCR_4(lck->lk.owner_id) - 1;
1066 }
1067 
1068 static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) {
1069   return lck->lk.depth_locked != -1;
1070 }
1071 
1072 /* Acquire a lock using a the queuing lock implementation */
1073 template <bool takeTime>
1074 /* [TLW] The unused template above is left behind because of what BEB believes
1075    is a potential compiler problem with __forceinline. */
1076 __forceinline static int
1077 __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
1078                                           kmp_int32 gtid) {
1079   kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1080   volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1081   volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1082   volatile kmp_uint32 *spin_here_p;
1083 
1084 #if OMPT_SUPPORT
1085   ompt_state_t prev_state = ompt_state_undefined;
1086 #endif
1087 
1088   KA_TRACE(1000,
1089            ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1090 
1091   KMP_FSYNC_PREPARE(lck);
1092   KMP_DEBUG_ASSERT(this_thr != NULL);
1093   spin_here_p = &this_thr->th.th_spin_here;
1094 
1095 #ifdef DEBUG_QUEUING_LOCKS
1096   TRACE_LOCK(gtid + 1, "acq ent");
1097   if (*spin_here_p)
1098     __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1099   if (this_thr->th.th_next_waiting != 0)
1100     __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1101 #endif
1102   KMP_DEBUG_ASSERT(!*spin_here_p);
1103   KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1104 
1105   /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to
1106      head_id_p that may follow, not just in execution order, but also in
1107      visibility order. This way, when a releasing thread observes the changes to
1108      the queue by this thread, it can rightly assume that spin_here_p has
1109      already been set to TRUE, so that when it sets spin_here_p to FALSE, it is
1110      not premature.  If the releasing thread sets spin_here_p to FALSE before
1111      this thread sets it to TRUE, this thread will hang. */
1112   *spin_here_p = TRUE; /* before enqueuing to prevent race */
1113 
1114   while (1) {
1115     kmp_int32 enqueued;
1116     kmp_int32 head;
1117     kmp_int32 tail;
1118 
1119     head = *head_id_p;
1120 
1121     switch (head) {
1122 
1123     case -1: {
1124 #ifdef DEBUG_QUEUING_LOCKS
1125       tail = *tail_id_p;
1126       TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1127 #endif
1128       tail = 0; /* to make sure next link asynchronously read is not set
1129                 accidentally; this assignment prevents us from entering the
1130                 if ( t > 0 ) condition in the enqueued case below, which is not
1131                 necessary for this state transition */
1132 
1133       /* try (-1,0)->(tid,tid) */
1134       enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p,
1135                                              KMP_PACK_64(-1, 0),
1136                                              KMP_PACK_64(gtid + 1, gtid + 1));
1137 #ifdef DEBUG_QUEUING_LOCKS
1138       if (enqueued)
1139         TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)");
1140 #endif
1141     } break;
1142 
1143     default: {
1144       tail = *tail_id_p;
1145       KMP_DEBUG_ASSERT(tail != gtid + 1);
1146 
1147 #ifdef DEBUG_QUEUING_LOCKS
1148       TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1149 #endif
1150 
1151       if (tail == 0) {
1152         enqueued = FALSE;
1153       } else {
1154         /* try (h,t) or (h,h)->(h,tid) */
1155         enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1);
1156 
1157 #ifdef DEBUG_QUEUING_LOCKS
1158         if (enqueued)
1159           TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)");
1160 #endif
1161       }
1162     } break;
1163 
1164     case 0: /* empty queue */
1165     {
1166       kmp_int32 grabbed_lock;
1167 
1168 #ifdef DEBUG_QUEUING_LOCKS
1169       tail = *tail_id_p;
1170       TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1171 #endif
1172       /* try (0,0)->(-1,0) */
1173 
1174       /* only legal transition out of head = 0 is head = -1 with no change to
1175        * tail */
1176       grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1);
1177 
1178       if (grabbed_lock) {
1179 
1180         *spin_here_p = FALSE;
1181 
1182         KA_TRACE(
1183             1000,
1184             ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1185              lck, gtid));
1186 #ifdef DEBUG_QUEUING_LOCKS
1187         TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0);
1188 #endif
1189 
1190 #if OMPT_SUPPORT
1191         if (ompt_enabled.enabled && prev_state != ompt_state_undefined) {
1192           /* change the state before clearing wait_id */
1193           this_thr->th.ompt_thread_info.state = prev_state;
1194           this_thr->th.ompt_thread_info.wait_id = 0;
1195         }
1196 #endif
1197 
1198         KMP_FSYNC_ACQUIRED(lck);
1199         return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1200       }
1201       enqueued = FALSE;
1202     } break;
1203     }
1204 
1205 #if OMPT_SUPPORT
1206     if (ompt_enabled.enabled && prev_state == ompt_state_undefined) {
1207       /* this thread will spin; set wait_id before entering wait state */
1208       prev_state = this_thr->th.ompt_thread_info.state;
1209       this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
1210       this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1211     }
1212 #endif
1213 
1214     if (enqueued) {
1215       if (tail > 0) {
1216         kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1);
1217         KMP_ASSERT(tail_thr != NULL);
1218         tail_thr->th.th_next_waiting = gtid + 1;
1219         /* corresponding wait for this write in release code */
1220       }
1221       KA_TRACE(1000,
1222                ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
1223                 lck, gtid));
1224 
1225       KMP_MB();
1226       // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
1227       KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);
1228       // Synchronize writes to both runtime thread structures
1229       // and writes in user code.
1230       KMP_MB();
1231 
1232 #ifdef DEBUG_QUEUING_LOCKS
1233       TRACE_LOCK(gtid + 1, "acq spin");
1234 
1235       if (this_thr->th.th_next_waiting != 0)
1236         __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1237 #endif
1238       KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1239       KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after "
1240                       "waiting on queue\n",
1241                       lck, gtid));
1242 
1243 #ifdef DEBUG_QUEUING_LOCKS
1244       TRACE_LOCK(gtid + 1, "acq exit 2");
1245 #endif
1246 
1247 #if OMPT_SUPPORT
1248       /* change the state before clearing wait_id */
1249       this_thr->th.ompt_thread_info.state = prev_state;
1250       this_thr->th.ompt_thread_info.wait_id = 0;
1251 #endif
1252 
1253       /* got lock, we were dequeued by the thread that released lock */
1254       return KMP_LOCK_ACQUIRED_FIRST;
1255     }
1256 
1257     /* Yield if number of threads > number of logical processors */
1258     /* ToDo: Not sure why this should only be in oversubscription case,
1259        maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1260     KMP_YIELD_OVERSUB();
1261 
1262 #ifdef DEBUG_QUEUING_LOCKS
1263     TRACE_LOCK(gtid + 1, "acq retry");
1264 #endif
1265   }
1266   KMP_ASSERT2(0, "should not get here");
1267   return KMP_LOCK_ACQUIRED_FIRST;
1268 }
1269 
1270 int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1271   KMP_DEBUG_ASSERT(gtid >= 0);
1272 
1273   int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1274   return retval;
1275 }
1276 
1277 static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1278                                                   kmp_int32 gtid) {
1279   char const *const func = "omp_set_lock";
1280   if (lck->lk.initialized != lck) {
1281     KMP_FATAL(LockIsUninitialized, func);
1282   }
1283   if (__kmp_is_queuing_lock_nestable(lck)) {
1284     KMP_FATAL(LockNestableUsedAsSimple, func);
1285   }
1286   if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1287     KMP_FATAL(LockIsAlreadyOwned, func);
1288   }
1289 
1290   __kmp_acquire_queuing_lock(lck, gtid);
1291 
1292   lck->lk.owner_id = gtid + 1;
1293   return KMP_LOCK_ACQUIRED_FIRST;
1294 }
1295 
1296 int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1297   volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1298   kmp_int32 head;
1299 #ifdef KMP_DEBUG
1300   kmp_info_t *this_thr;
1301 #endif
1302 
1303   KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid));
1304   KMP_DEBUG_ASSERT(gtid >= 0);
1305 #ifdef KMP_DEBUG
1306   this_thr = __kmp_thread_from_gtid(gtid);
1307   KMP_DEBUG_ASSERT(this_thr != NULL);
1308   KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1309 #endif
1310 
1311   head = *head_id_p;
1312 
1313   if (head == 0) { /* nobody on queue, nobody holding */
1314     /* try (0,0)->(-1,0) */
1315     if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) {
1316       KA_TRACE(1000,
1317                ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid));
1318       KMP_FSYNC_ACQUIRED(lck);
1319       return TRUE;
1320     }
1321   }
1322 
1323   KA_TRACE(1000,
1324            ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid));
1325   return FALSE;
1326 }
1327 
1328 static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1329                                                kmp_int32 gtid) {
1330   char const *const func = "omp_test_lock";
1331   if (lck->lk.initialized != lck) {
1332     KMP_FATAL(LockIsUninitialized, func);
1333   }
1334   if (__kmp_is_queuing_lock_nestable(lck)) {
1335     KMP_FATAL(LockNestableUsedAsSimple, func);
1336   }
1337 
1338   int retval = __kmp_test_queuing_lock(lck, gtid);
1339 
1340   if (retval) {
1341     lck->lk.owner_id = gtid + 1;
1342   }
1343   return retval;
1344 }
1345 
1346 int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1347   volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1348   volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1349 
1350   KA_TRACE(1000,
1351            ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1352   KMP_DEBUG_ASSERT(gtid >= 0);
1353 #if KMP_DEBUG || DEBUG_QUEUING_LOCKS
1354   kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1355 #endif
1356   KMP_DEBUG_ASSERT(this_thr != NULL);
1357 #ifdef DEBUG_QUEUING_LOCKS
1358   TRACE_LOCK(gtid + 1, "rel ent");
1359 
1360   if (this_thr->th.th_spin_here)
1361     __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1362   if (this_thr->th.th_next_waiting != 0)
1363     __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1364 #endif
1365   KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1366   KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1367 
1368   KMP_FSYNC_RELEASING(lck);
1369 
1370   while (1) {
1371     kmp_int32 dequeued;
1372     kmp_int32 head;
1373     kmp_int32 tail;
1374 
1375     head = *head_id_p;
1376 
1377 #ifdef DEBUG_QUEUING_LOCKS
1378     tail = *tail_id_p;
1379     TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail);
1380     if (head == 0)
1381       __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1382 #endif
1383     KMP_DEBUG_ASSERT(head !=
1384                      0); /* holding the lock, head must be -1 or queue head */
1385 
1386     if (head == -1) { /* nobody on queue */
1387       /* try (-1,0)->(0,0) */
1388       if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) {
1389         KA_TRACE(
1390             1000,
1391             ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1392              lck, gtid));
1393 #ifdef DEBUG_QUEUING_LOCKS
1394         TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0);
1395 #endif
1396 
1397 #if OMPT_SUPPORT
1398 /* nothing to do - no other thread is trying to shift blame */
1399 #endif
1400         return KMP_LOCK_RELEASED;
1401       }
1402       dequeued = FALSE;
1403     } else {
1404       KMP_MB();
1405       tail = *tail_id_p;
1406       if (head == tail) { /* only one thread on the queue */
1407 #ifdef DEBUG_QUEUING_LOCKS
1408         if (head <= 0)
1409           __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1410 #endif
1411         KMP_DEBUG_ASSERT(head > 0);
1412 
1413         /* try (h,h)->(-1,0) */
1414         dequeued = KMP_COMPARE_AND_STORE_REL64(
1415             RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head),
1416             KMP_PACK_64(-1, 0));
1417 #ifdef DEBUG_QUEUING_LOCKS
1418         TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)");
1419 #endif
1420 
1421       } else {
1422         volatile kmp_int32 *waiting_id_p;
1423         kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1424         KMP_DEBUG_ASSERT(head_thr != NULL);
1425         waiting_id_p = &head_thr->th.th_next_waiting;
1426 
1427 /* Does this require synchronous reads? */
1428 #ifdef DEBUG_QUEUING_LOCKS
1429         if (head <= 0 || tail <= 0)
1430           __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1431 #endif
1432         KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1433 
1434         /* try (h,t)->(h',t) or (t,t) */
1435         KMP_MB();
1436         /* make sure enqueuing thread has time to update next waiting thread
1437          * field */
1438         *head_id_p =
1439             KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL);
1440 #ifdef DEBUG_QUEUING_LOCKS
1441         TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
1442 #endif
1443         dequeued = TRUE;
1444       }
1445     }
1446 
1447     if (dequeued) {
1448       kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1449       KMP_DEBUG_ASSERT(head_thr != NULL);
1450 
1451 /* Does this require synchronous reads? */
1452 #ifdef DEBUG_QUEUING_LOCKS
1453       if (head <= 0 || tail <= 0)
1454         __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1455 #endif
1456       KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1457 
1458       /* For clean code only. Thread not released until next statement prevents
1459          race with acquire code. */
1460       head_thr->th.th_next_waiting = 0;
1461 #ifdef DEBUG_QUEUING_LOCKS
1462       TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head);
1463 #endif
1464 
1465       KMP_MB();
1466       /* reset spin value */
1467       head_thr->th.th_spin_here = FALSE;
1468 
1469       KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after "
1470                       "dequeuing\n",
1471                       lck, gtid));
1472 #ifdef DEBUG_QUEUING_LOCKS
1473       TRACE_LOCK(gtid + 1, "rel exit 2");
1474 #endif
1475       return KMP_LOCK_RELEASED;
1476     }
1477     /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring
1478        threads */
1479 
1480 #ifdef DEBUG_QUEUING_LOCKS
1481     TRACE_LOCK(gtid + 1, "rel retry");
1482 #endif
1483 
1484   } /* while */
1485   KMP_ASSERT2(0, "should not get here");
1486   return KMP_LOCK_RELEASED;
1487 }
1488 
1489 static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1490                                                   kmp_int32 gtid) {
1491   char const *const func = "omp_unset_lock";
1492   KMP_MB(); /* in case another processor initialized lock */
1493   if (lck->lk.initialized != lck) {
1494     KMP_FATAL(LockIsUninitialized, func);
1495   }
1496   if (__kmp_is_queuing_lock_nestable(lck)) {
1497     KMP_FATAL(LockNestableUsedAsSimple, func);
1498   }
1499   if (__kmp_get_queuing_lock_owner(lck) == -1) {
1500     KMP_FATAL(LockUnsettingFree, func);
1501   }
1502   if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1503     KMP_FATAL(LockUnsettingSetByAnother, func);
1504   }
1505   lck->lk.owner_id = 0;
1506   return __kmp_release_queuing_lock(lck, gtid);
1507 }
1508 
1509 void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) {
1510   lck->lk.location = NULL;
1511   lck->lk.head_id = 0;
1512   lck->lk.tail_id = 0;
1513   lck->lk.next_ticket = 0;
1514   lck->lk.now_serving = 0;
1515   lck->lk.owner_id = 0; // no thread owns the lock.
1516   lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1517   lck->lk.initialized = lck;
1518 
1519   KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1520 }
1521 
1522 void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) {
1523   lck->lk.initialized = NULL;
1524   lck->lk.location = NULL;
1525   lck->lk.head_id = 0;
1526   lck->lk.tail_id = 0;
1527   lck->lk.next_ticket = 0;
1528   lck->lk.now_serving = 0;
1529   lck->lk.owner_id = 0;
1530   lck->lk.depth_locked = -1;
1531 }
1532 
1533 static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1534   char const *const func = "omp_destroy_lock";
1535   if (lck->lk.initialized != lck) {
1536     KMP_FATAL(LockIsUninitialized, func);
1537   }
1538   if (__kmp_is_queuing_lock_nestable(lck)) {
1539     KMP_FATAL(LockNestableUsedAsSimple, func);
1540   }
1541   if (__kmp_get_queuing_lock_owner(lck) != -1) {
1542     KMP_FATAL(LockStillOwned, func);
1543   }
1544   __kmp_destroy_queuing_lock(lck);
1545 }
1546 
1547 // nested queuing locks
1548 
1549 int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1550   KMP_DEBUG_ASSERT(gtid >= 0);
1551 
1552   if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1553     lck->lk.depth_locked += 1;
1554     return KMP_LOCK_ACQUIRED_NEXT;
1555   } else {
1556     __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1557     KMP_MB();
1558     lck->lk.depth_locked = 1;
1559     KMP_MB();
1560     lck->lk.owner_id = gtid + 1;
1561     return KMP_LOCK_ACQUIRED_FIRST;
1562   }
1563 }
1564 
1565 static int
1566 __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1567                                               kmp_int32 gtid) {
1568   char const *const func = "omp_set_nest_lock";
1569   if (lck->lk.initialized != lck) {
1570     KMP_FATAL(LockIsUninitialized, func);
1571   }
1572   if (!__kmp_is_queuing_lock_nestable(lck)) {
1573     KMP_FATAL(LockSimpleUsedAsNestable, func);
1574   }
1575   return __kmp_acquire_nested_queuing_lock(lck, gtid);
1576 }
1577 
1578 int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1579   int retval;
1580 
1581   KMP_DEBUG_ASSERT(gtid >= 0);
1582 
1583   if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1584     retval = ++lck->lk.depth_locked;
1585   } else if (!__kmp_test_queuing_lock(lck, gtid)) {
1586     retval = 0;
1587   } else {
1588     KMP_MB();
1589     retval = lck->lk.depth_locked = 1;
1590     KMP_MB();
1591     lck->lk.owner_id = gtid + 1;
1592   }
1593   return retval;
1594 }
1595 
1596 static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1597                                                       kmp_int32 gtid) {
1598   char const *const func = "omp_test_nest_lock";
1599   if (lck->lk.initialized != lck) {
1600     KMP_FATAL(LockIsUninitialized, func);
1601   }
1602   if (!__kmp_is_queuing_lock_nestable(lck)) {
1603     KMP_FATAL(LockSimpleUsedAsNestable, func);
1604   }
1605   return __kmp_test_nested_queuing_lock(lck, gtid);
1606 }
1607 
1608 int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1609   KMP_DEBUG_ASSERT(gtid >= 0);
1610 
1611   KMP_MB();
1612   if (--(lck->lk.depth_locked) == 0) {
1613     KMP_MB();
1614     lck->lk.owner_id = 0;
1615     __kmp_release_queuing_lock(lck, gtid);
1616     return KMP_LOCK_RELEASED;
1617   }
1618   return KMP_LOCK_STILL_HELD;
1619 }
1620 
1621 static int
1622 __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1623                                               kmp_int32 gtid) {
1624   char const *const func = "omp_unset_nest_lock";
1625   KMP_MB(); /* in case another processor initialized lock */
1626   if (lck->lk.initialized != lck) {
1627     KMP_FATAL(LockIsUninitialized, func);
1628   }
1629   if (!__kmp_is_queuing_lock_nestable(lck)) {
1630     KMP_FATAL(LockSimpleUsedAsNestable, func);
1631   }
1632   if (__kmp_get_queuing_lock_owner(lck) == -1) {
1633     KMP_FATAL(LockUnsettingFree, func);
1634   }
1635   if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1636     KMP_FATAL(LockUnsettingSetByAnother, func);
1637   }
1638   return __kmp_release_nested_queuing_lock(lck, gtid);
1639 }
1640 
1641 void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1642   __kmp_init_queuing_lock(lck);
1643   lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1644 }
1645 
1646 void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1647   __kmp_destroy_queuing_lock(lck);
1648   lck->lk.depth_locked = 0;
1649 }
1650 
1651 static void
1652 __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1653   char const *const func = "omp_destroy_nest_lock";
1654   if (lck->lk.initialized != lck) {
1655     KMP_FATAL(LockIsUninitialized, func);
1656   }
1657   if (!__kmp_is_queuing_lock_nestable(lck)) {
1658     KMP_FATAL(LockSimpleUsedAsNestable, func);
1659   }
1660   if (__kmp_get_queuing_lock_owner(lck) != -1) {
1661     KMP_FATAL(LockStillOwned, func);
1662   }
1663   __kmp_destroy_nested_queuing_lock(lck);
1664 }
1665 
1666 // access functions to fields which don't exist for all lock kinds.
1667 
1668 static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) {
1669   return lck->lk.location;
1670 }
1671 
1672 static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck,
1673                                             const ident_t *loc) {
1674   lck->lk.location = loc;
1675 }
1676 
1677 static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) {
1678   return lck->lk.flags;
1679 }
1680 
1681 static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck,
1682                                          kmp_lock_flags_t flags) {
1683   lck->lk.flags = flags;
1684 }
1685 
1686 #if KMP_USE_ADAPTIVE_LOCKS
1687 
1688 /* RTM Adaptive locks */
1689 
1690 #if KMP_HAVE_RTM_INTRINSICS
1691 #include <immintrin.h>
1692 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1693 
1694 #else
1695 
1696 // Values from the status register after failed speculation.
1697 #define _XBEGIN_STARTED (~0u)
1698 #define _XABORT_EXPLICIT (1 << 0)
1699 #define _XABORT_RETRY (1 << 1)
1700 #define _XABORT_CONFLICT (1 << 2)
1701 #define _XABORT_CAPACITY (1 << 3)
1702 #define _XABORT_DEBUG (1 << 4)
1703 #define _XABORT_NESTED (1 << 5)
1704 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1705 
1706 // Aborts for which it's worth trying again immediately
1707 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1708 
1709 #define STRINGIZE_INTERNAL(arg) #arg
1710 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1711 
1712 // Access to RTM instructions
1713 /*A version of XBegin which returns -1 on speculation, and the value of EAX on
1714   an abort. This is the same definition as the compiler intrinsic that will be
1715   supported at some point. */
1716 static __inline int _xbegin() {
1717   int res = -1;
1718 
1719 #if KMP_OS_WINDOWS
1720 #if KMP_ARCH_X86_64
1721   _asm {
1722         _emit 0xC7
1723         _emit 0xF8
1724         _emit 2
1725         _emit 0
1726         _emit 0
1727         _emit 0
1728         jmp   L2
1729         mov   res, eax
1730     L2:
1731   }
1732 #else /* IA32 */
1733   _asm {
1734         _emit 0xC7
1735         _emit 0xF8
1736         _emit 2
1737         _emit 0
1738         _emit 0
1739         _emit 0
1740         jmp   L2
1741         mov   res, eax
1742     L2:
1743   }
1744 #endif // KMP_ARCH_X86_64
1745 #else
1746   /* Note that %eax must be noted as killed (clobbered), because the XSR is
1747      returned in %eax(%rax) on abort.  Other register values are restored, so
1748      don't need to be killed.
1749 
1750      We must also mark 'res' as an input and an output, since otherwise
1751      'res=-1' may be dropped as being dead, whereas we do need the assignment on
1752      the successful (i.e., non-abort) path. */
1753   __asm__ volatile("1: .byte  0xC7; .byte 0xF8;\n"
1754                    "   .long  1f-1b-6\n"
1755                    "    jmp   2f\n"
1756                    "1:  movl  %%eax,%0\n"
1757                    "2:"
1758                    : "+r"(res)::"memory", "%eax");
1759 #endif // KMP_OS_WINDOWS
1760   return res;
1761 }
1762 
1763 /* Transaction end */
1764 static __inline void _xend() {
1765 #if KMP_OS_WINDOWS
1766   __asm {
1767         _emit 0x0f
1768         _emit 0x01
1769         _emit 0xd5
1770   }
1771 #else
1772   __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
1773 #endif
1774 }
1775 
1776 /* This is a macro, the argument must be a single byte constant which can be
1777    evaluated by the inline assembler, since it is emitted as a byte into the
1778    assembly code. */
1779 // clang-format off
1780 #if KMP_OS_WINDOWS
1781 #define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG
1782 #else
1783 #define _xabort(ARG)                                                           \
1784   __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory");
1785 #endif
1786 // clang-format on
1787 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1788 
1789 // Statistics is collected for testing purpose
1790 #if KMP_DEBUG_ADAPTIVE_LOCKS
1791 
1792 // We accumulate speculative lock statistics when the lock is destroyed. We
1793 // keep locks that haven't been destroyed in the liveLocks list so that we can
1794 // grab their statistics too.
1795 static kmp_adaptive_lock_statistics_t destroyedStats;
1796 
1797 // To hold the list of live locks.
1798 static kmp_adaptive_lock_info_t liveLocks;
1799 
1800 // A lock so we can safely update the list of locks.
1801 static kmp_bootstrap_lock_t chain_lock =
1802     KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock);
1803 
1804 // Initialize the list of stats.
1805 void __kmp_init_speculative_stats() {
1806   kmp_adaptive_lock_info_t *lck = &liveLocks;
1807 
1808   memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0,
1809          sizeof(lck->stats));
1810   lck->stats.next = lck;
1811   lck->stats.prev = lck;
1812 
1813   KMP_ASSERT(lck->stats.next->stats.prev == lck);
1814   KMP_ASSERT(lck->stats.prev->stats.next == lck);
1815 
1816   __kmp_init_bootstrap_lock(&chain_lock);
1817 }
1818 
1819 // Insert the lock into the circular list
1820 static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) {
1821   __kmp_acquire_bootstrap_lock(&chain_lock);
1822 
1823   lck->stats.next = liveLocks.stats.next;
1824   lck->stats.prev = &liveLocks;
1825 
1826   liveLocks.stats.next = lck;
1827   lck->stats.next->stats.prev = lck;
1828 
1829   KMP_ASSERT(lck->stats.next->stats.prev == lck);
1830   KMP_ASSERT(lck->stats.prev->stats.next == lck);
1831 
1832   __kmp_release_bootstrap_lock(&chain_lock);
1833 }
1834 
1835 static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) {
1836   KMP_ASSERT(lck->stats.next->stats.prev == lck);
1837   KMP_ASSERT(lck->stats.prev->stats.next == lck);
1838 
1839   kmp_adaptive_lock_info_t *n = lck->stats.next;
1840   kmp_adaptive_lock_info_t *p = lck->stats.prev;
1841 
1842   n->stats.prev = p;
1843   p->stats.next = n;
1844 }
1845 
1846 static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1847   memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0,
1848          sizeof(lck->stats));
1849   __kmp_remember_lock(lck);
1850 }
1851 
1852 static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t,
1853                             kmp_adaptive_lock_info_t *lck) {
1854   kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
1855 
1856   t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
1857   t->successfulSpeculations += s->successfulSpeculations;
1858   t->hardFailedSpeculations += s->hardFailedSpeculations;
1859   t->softFailedSpeculations += s->softFailedSpeculations;
1860   t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
1861   t->lemmingYields += s->lemmingYields;
1862 }
1863 
1864 static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1865   __kmp_acquire_bootstrap_lock(&chain_lock);
1866 
1867   __kmp_add_stats(&destroyedStats, lck);
1868   __kmp_forget_lock(lck);
1869 
1870   __kmp_release_bootstrap_lock(&chain_lock);
1871 }
1872 
1873 static float percent(kmp_uint32 count, kmp_uint32 total) {
1874   return (total == 0) ? 0.0 : (100.0 * count) / total;
1875 }
1876 
1877 void __kmp_print_speculative_stats() {
1878   kmp_adaptive_lock_statistics_t total = destroyedStats;
1879   kmp_adaptive_lock_info_t *lck;
1880 
1881   for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
1882     __kmp_add_stats(&total, lck);
1883   }
1884   kmp_adaptive_lock_statistics_t *t = &total;
1885   kmp_uint32 totalSections =
1886       t->nonSpeculativeAcquires + t->successfulSpeculations;
1887   kmp_uint32 totalSpeculations = t->successfulSpeculations +
1888                                  t->hardFailedSpeculations +
1889                                  t->softFailedSpeculations;
1890   if (totalSections <= 0)
1891     return;
1892 
1893   kmp_safe_raii_file_t statsFile;
1894   if (strcmp(__kmp_speculative_statsfile, "-") == 0) {
1895     statsFile.set_stdout();
1896   } else {
1897     size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20;
1898     char buffer[buffLen];
1899     KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile,
1900                  (kmp_int32)getpid());
1901     statsFile.open(buffer, "w");
1902   }
1903 
1904   fprintf(statsFile, "Speculative lock statistics (all approximate!)\n");
1905   fprintf(statsFile,
1906           " Lock parameters: \n"
1907           "   max_soft_retries               : %10d\n"
1908           "   max_badness                    : %10d\n",
1909           __kmp_adaptive_backoff_params.max_soft_retries,
1910           __kmp_adaptive_backoff_params.max_badness);
1911   fprintf(statsFile, " Non-speculative acquire attempts : %10d\n",
1912           t->nonSpeculativeAcquireAttempts);
1913   fprintf(statsFile, " Total critical sections          : %10d\n",
1914           totalSections);
1915   fprintf(statsFile, " Successful speculations          : %10d (%5.1f%%)\n",
1916           t->successfulSpeculations,
1917           percent(t->successfulSpeculations, totalSections));
1918   fprintf(statsFile, " Non-speculative acquires         : %10d (%5.1f%%)\n",
1919           t->nonSpeculativeAcquires,
1920           percent(t->nonSpeculativeAcquires, totalSections));
1921   fprintf(statsFile, " Lemming yields                   : %10d\n\n",
1922           t->lemmingYields);
1923 
1924   fprintf(statsFile, " Speculative acquire attempts     : %10d\n",
1925           totalSpeculations);
1926   fprintf(statsFile, " Successes                        : %10d (%5.1f%%)\n",
1927           t->successfulSpeculations,
1928           percent(t->successfulSpeculations, totalSpeculations));
1929   fprintf(statsFile, " Soft failures                    : %10d (%5.1f%%)\n",
1930           t->softFailedSpeculations,
1931           percent(t->softFailedSpeculations, totalSpeculations));
1932   fprintf(statsFile, " Hard failures                    : %10d (%5.1f%%)\n",
1933           t->hardFailedSpeculations,
1934           percent(t->hardFailedSpeculations, totalSpeculations));
1935 }
1936 
1937 #define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++)
1938 #else
1939 #define KMP_INC_STAT(lck, stat)
1940 
1941 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
1942 
1943 static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {
1944   // It is enough to check that the head_id is zero.
1945   // We don't also need to check the tail.
1946   bool res = lck->lk.head_id == 0;
1947 
1948 // We need a fence here, since we must ensure that no memory operations
1949 // from later in this thread float above that read.
1950 #if KMP_COMPILER_ICC
1951   _mm_mfence();
1952 #else
1953   __sync_synchronize();
1954 #endif
1955 
1956   return res;
1957 }
1958 
1959 // Functions for manipulating the badness
1960 static __inline void
1961 __kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) {
1962   // Reset the badness to zero so we eagerly try to speculate again
1963   lck->lk.adaptive.badness = 0;
1964   KMP_INC_STAT(lck, successfulSpeculations);
1965 }
1966 
1967 // Create a bit mask with one more set bit.
1968 static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) {
1969   kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1;
1970   if (newBadness > lck->lk.adaptive.max_badness) {
1971     return;
1972   } else {
1973     lck->lk.adaptive.badness = newBadness;
1974   }
1975 }
1976 
1977 // Check whether speculation should be attempted.
1978 KMP_ATTRIBUTE_TARGET_RTM
1979 static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck,
1980                                            kmp_int32 gtid) {
1981   kmp_uint32 badness = lck->lk.adaptive.badness;
1982   kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts;
1983   int res = (attempts & badness) == 0;
1984   return res;
1985 }
1986 
1987 // Attempt to acquire only the speculative lock.
1988 // Does not back off to the non-speculative lock.
1989 KMP_ATTRIBUTE_TARGET_RTM
1990 static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck,
1991                                          kmp_int32 gtid) {
1992   int retries = lck->lk.adaptive.max_soft_retries;
1993 
1994   // We don't explicitly count the start of speculation, rather we record the
1995   // results (success, hard fail, soft fail). The sum of all of those is the
1996   // total number of times we started speculation since all speculations must
1997   // end one of those ways.
1998   do {
1999     kmp_uint32 status = _xbegin();
2000     // Switch this in to disable actual speculation but exercise at least some
2001     // of the rest of the code. Useful for debugging...
2002     // kmp_uint32 status = _XABORT_NESTED;
2003 
2004     if (status == _XBEGIN_STARTED) {
2005       /* We have successfully started speculation. Check that no-one acquired
2006          the lock for real between when we last looked and now. This also gets
2007          the lock cache line into our read-set, which we need so that we'll
2008          abort if anyone later claims it for real. */
2009       if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2010         // Lock is now visibly acquired, so someone beat us to it. Abort the
2011         // transaction so we'll restart from _xbegin with the failure status.
2012         _xabort(0x01);
2013         KMP_ASSERT2(0, "should not get here");
2014       }
2015       return 1; // Lock has been acquired (speculatively)
2016     } else {
2017       // We have aborted, update the statistics
2018       if (status & SOFT_ABORT_MASK) {
2019         KMP_INC_STAT(lck, softFailedSpeculations);
2020         // and loop round to retry.
2021       } else {
2022         KMP_INC_STAT(lck, hardFailedSpeculations);
2023         // Give up if we had a hard failure.
2024         break;
2025       }
2026     }
2027   } while (retries--); // Loop while we have retries, and didn't fail hard.
2028 
2029   // Either we had a hard failure or we didn't succeed softly after
2030   // the full set of attempts, so back off the badness.
2031   __kmp_step_badness(lck);
2032   return 0;
2033 }
2034 
2035 // Attempt to acquire the speculative lock, or back off to the non-speculative
2036 // one if the speculative lock cannot be acquired.
2037 // We can succeed speculatively, non-speculatively, or fail.
2038 static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) {
2039   // First try to acquire the lock speculatively
2040   if (__kmp_should_speculate(lck, gtid) &&
2041       __kmp_test_adaptive_lock_only(lck, gtid))
2042     return 1;
2043 
2044   // Speculative acquisition failed, so try to acquire it non-speculatively.
2045   // Count the non-speculative acquire attempt
2046   lck->lk.adaptive.acquire_attempts++;
2047 
2048   // Use base, non-speculative lock.
2049   if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) {
2050     KMP_INC_STAT(lck, nonSpeculativeAcquires);
2051     return 1; // Lock is acquired (non-speculatively)
2052   } else {
2053     return 0; // Failed to acquire the lock, it's already visibly locked.
2054   }
2055 }
2056 
2057 static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2058                                                 kmp_int32 gtid) {
2059   char const *const func = "omp_test_lock";
2060   if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2061     KMP_FATAL(LockIsUninitialized, func);
2062   }
2063 
2064   int retval = __kmp_test_adaptive_lock(lck, gtid);
2065 
2066   if (retval) {
2067     lck->lk.qlk.owner_id = gtid + 1;
2068   }
2069   return retval;
2070 }
2071 
2072 // Block until we can acquire a speculative, adaptive lock. We check whether we
2073 // should be trying to speculate. If we should be, we check the real lock to see
2074 // if it is free, and, if not, pause without attempting to acquire it until it
2075 // is. Then we try the speculative acquire. This means that although we suffer
2076 // from lemmings a little (because all we can't acquire the lock speculatively
2077 // until the queue of threads waiting has cleared), we don't get into a state
2078 // where we can never acquire the lock speculatively (because we force the queue
2079 // to clear by preventing new arrivals from entering the queue). This does mean
2080 // that when we're trying to break lemmings, the lock is no longer fair. However
2081 // OpenMP makes no guarantee that its locks are fair, so this isn't a real
2082 // problem.
2083 static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
2084                                         kmp_int32 gtid) {
2085   if (__kmp_should_speculate(lck, gtid)) {
2086     if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2087       if (__kmp_test_adaptive_lock_only(lck, gtid))
2088         return;
2089       // We tried speculation and failed, so give up.
2090     } else {
2091       // We can't try speculation until the lock is free, so we pause here
2092       // (without suspending on the queueing lock, to allow it to drain, then
2093       // try again. All other threads will also see the same result for
2094       // shouldSpeculate, so will be doing the same if they try to claim the
2095       // lock from now on.
2096       while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2097         KMP_INC_STAT(lck, lemmingYields);
2098         KMP_YIELD(TRUE);
2099       }
2100 
2101       if (__kmp_test_adaptive_lock_only(lck, gtid))
2102         return;
2103     }
2104   }
2105 
2106   // Speculative acquisition failed, so acquire it non-speculatively.
2107   // Count the non-speculative acquire attempt
2108   lck->lk.adaptive.acquire_attempts++;
2109 
2110   __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid);
2111   // We have acquired the base lock, so count that.
2112   KMP_INC_STAT(lck, nonSpeculativeAcquires);
2113 }
2114 
2115 static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2116                                                     kmp_int32 gtid) {
2117   char const *const func = "omp_set_lock";
2118   if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2119     KMP_FATAL(LockIsUninitialized, func);
2120   }
2121   if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) {
2122     KMP_FATAL(LockIsAlreadyOwned, func);
2123   }
2124 
2125   __kmp_acquire_adaptive_lock(lck, gtid);
2126 
2127   lck->lk.qlk.owner_id = gtid + 1;
2128 }
2129 
2130 KMP_ATTRIBUTE_TARGET_RTM
2131 static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck,
2132                                        kmp_int32 gtid) {
2133   if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(
2134           lck))) { // If the lock doesn't look claimed we must be speculating.
2135     // (Or the user's code is buggy and they're releasing without locking;
2136     // if we had XTEST we'd be able to check that case...)
2137     _xend(); // Exit speculation
2138     __kmp_update_badness_after_success(lck);
2139   } else { // Since the lock *is* visibly locked we're not speculating,
2140     // so should use the underlying lock's release scheme.
2141     __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid);
2142   }
2143   return KMP_LOCK_RELEASED;
2144 }
2145 
2146 static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2147                                                    kmp_int32 gtid) {
2148   char const *const func = "omp_unset_lock";
2149   KMP_MB(); /* in case another processor initialized lock */
2150   if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2151     KMP_FATAL(LockIsUninitialized, func);
2152   }
2153   if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) {
2154     KMP_FATAL(LockUnsettingFree, func);
2155   }
2156   if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) {
2157     KMP_FATAL(LockUnsettingSetByAnother, func);
2158   }
2159   lck->lk.qlk.owner_id = 0;
2160   __kmp_release_adaptive_lock(lck, gtid);
2161   return KMP_LOCK_RELEASED;
2162 }
2163 
2164 static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) {
2165   __kmp_init_queuing_lock(GET_QLK_PTR(lck));
2166   lck->lk.adaptive.badness = 0;
2167   lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0;
2168   lck->lk.adaptive.max_soft_retries =
2169       __kmp_adaptive_backoff_params.max_soft_retries;
2170   lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2171 #if KMP_DEBUG_ADAPTIVE_LOCKS
2172   __kmp_zero_speculative_stats(&lck->lk.adaptive);
2173 #endif
2174   KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2175 }
2176 
2177 static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) {
2178 #if KMP_DEBUG_ADAPTIVE_LOCKS
2179   __kmp_accumulate_speculative_stats(&lck->lk.adaptive);
2180 #endif
2181   __kmp_destroy_queuing_lock(GET_QLK_PTR(lck));
2182   // Nothing needed for the speculative part.
2183 }
2184 
2185 static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
2186   char const *const func = "omp_destroy_lock";
2187   if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2188     KMP_FATAL(LockIsUninitialized, func);
2189   }
2190   if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) {
2191     KMP_FATAL(LockStillOwned, func);
2192   }
2193   __kmp_destroy_adaptive_lock(lck);
2194 }
2195 
2196 #endif // KMP_USE_ADAPTIVE_LOCKS
2197 
2198 /* ------------------------------------------------------------------------ */
2199 /* DRDPA ticket locks                                                */
2200 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2201 
2202 static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) {
2203   return lck->lk.owner_id - 1;
2204 }
2205 
2206 static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) {
2207   return lck->lk.depth_locked != -1;
2208 }
2209 
2210 __forceinline static int
2211 __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2212   kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket);
2213   kmp_uint64 mask = lck->lk.mask; // atomic load
2214   std::atomic<kmp_uint64> *polls = lck->lk.polls;
2215 
2216 #ifdef USE_LOCK_PROFILE
2217   if (polls[ticket & mask] != ticket)
2218     __kmp_printf("LOCK CONTENTION: %p\n", lck);
2219 /* else __kmp_printf( "." );*/
2220 #endif /* USE_LOCK_PROFILE */
2221 
2222   // Now spin-wait, but reload the polls pointer and mask, in case the
2223   // polling area has been reconfigured.  Unless it is reconfigured, the
2224   // reloads stay in L1 cache and are cheap.
2225   //
2226   // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!
2227   // The current implementation of KMP_WAIT doesn't allow for mask
2228   // and poll to be re-read every spin iteration.
2229   kmp_uint32 spins;
2230   KMP_FSYNC_PREPARE(lck);
2231   KMP_INIT_YIELD(spins);
2232   while (polls[ticket & mask] < ticket) { // atomic load
2233     KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
2234     // Re-read the mask and the poll pointer from the lock structure.
2235     //
2236     // Make certain that "mask" is read before "polls" !!!
2237     //
2238     // If another thread picks reconfigures the polling area and updates their
2239     // values, and we get the new value of mask and the old polls pointer, we
2240     // could access memory beyond the end of the old polling area.
2241     mask = lck->lk.mask; // atomic load
2242     polls = lck->lk.polls; // atomic load
2243   }
2244 
2245   // Critical section starts here
2246   KMP_FSYNC_ACQUIRED(lck);
2247   KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2248                   ticket, lck));
2249   lck->lk.now_serving = ticket; // non-volatile store
2250 
2251   // Deallocate a garbage polling area if we know that we are the last
2252   // thread that could possibly access it.
2253   //
2254   // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2255   // ticket.
2256   if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2257     __kmp_free(lck->lk.old_polls);
2258     lck->lk.old_polls = NULL;
2259     lck->lk.cleanup_ticket = 0;
2260   }
2261 
2262   // Check to see if we should reconfigure the polling area.
2263   // If there is still a garbage polling area to be deallocated from a
2264   // previous reconfiguration, let a later thread reconfigure it.
2265   if (lck->lk.old_polls == NULL) {
2266     bool reconfigure = false;
2267     std::atomic<kmp_uint64> *old_polls = polls;
2268     kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2269 
2270     if (TCR_4(__kmp_nth) >
2271         (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2272       // We are in oversubscription mode.  Contract the polling area
2273       // down to a single location, if that hasn't been done already.
2274       if (num_polls > 1) {
2275         reconfigure = true;
2276         num_polls = TCR_4(lck->lk.num_polls);
2277         mask = 0;
2278         num_polls = 1;
2279         polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2280                                                           sizeof(*polls));
2281         polls[0] = ticket;
2282       }
2283     } else {
2284       // We are in under/fully subscribed mode.  Check the number of
2285       // threads waiting on the lock.  The size of the polling area
2286       // should be at least the number of threads waiting.
2287       kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2288       if (num_waiting > num_polls) {
2289         kmp_uint32 old_num_polls = num_polls;
2290         reconfigure = true;
2291         do {
2292           mask = (mask << 1) | 1;
2293           num_polls *= 2;
2294         } while (num_polls <= num_waiting);
2295 
2296         // Allocate the new polling area, and copy the relevant portion
2297         // of the old polling area to the new area.  __kmp_allocate()
2298         // zeroes the memory it allocates, and most of the old area is
2299         // just zero padding, so we only copy the release counters.
2300         polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2301                                                           sizeof(*polls));
2302         kmp_uint32 i;
2303         for (i = 0; i < old_num_polls; i++) {
2304           polls[i].store(old_polls[i]);
2305         }
2306       }
2307     }
2308 
2309     if (reconfigure) {
2310       // Now write the updated fields back to the lock structure.
2311       //
2312       // Make certain that "polls" is written before "mask" !!!
2313       //
2314       // If another thread picks up the new value of mask and the old polls
2315       // pointer , it could access memory beyond the end of the old polling
2316       // area.
2317       //
2318       // On x86, we need memory fences.
2319       KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring "
2320                       "lock %p to %d polls\n",
2321                       ticket, lck, num_polls));
2322 
2323       lck->lk.old_polls = old_polls;
2324       lck->lk.polls = polls; // atomic store
2325 
2326       KMP_MB();
2327 
2328       lck->lk.num_polls = num_polls;
2329       lck->lk.mask = mask; // atomic store
2330 
2331       KMP_MB();
2332 
2333       // Only after the new polling area and mask have been flushed
2334       // to main memory can we update the cleanup ticket field.
2335       //
2336       // volatile load / non-volatile store
2337       lck->lk.cleanup_ticket = lck->lk.next_ticket;
2338     }
2339   }
2340   return KMP_LOCK_ACQUIRED_FIRST;
2341 }
2342 
2343 int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2344   int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2345   return retval;
2346 }
2347 
2348 static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2349                                                 kmp_int32 gtid) {
2350   char const *const func = "omp_set_lock";
2351   if (lck->lk.initialized != lck) {
2352     KMP_FATAL(LockIsUninitialized, func);
2353   }
2354   if (__kmp_is_drdpa_lock_nestable(lck)) {
2355     KMP_FATAL(LockNestableUsedAsSimple, func);
2356   }
2357   if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) {
2358     KMP_FATAL(LockIsAlreadyOwned, func);
2359   }
2360 
2361   __kmp_acquire_drdpa_lock(lck, gtid);
2362 
2363   lck->lk.owner_id = gtid + 1;
2364   return KMP_LOCK_ACQUIRED_FIRST;
2365 }
2366 
2367 int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2368   // First get a ticket, then read the polls pointer and the mask.
2369   // The polls pointer must be read before the mask!!! (See above)
2370   kmp_uint64 ticket = lck->lk.next_ticket; // atomic load
2371   std::atomic<kmp_uint64> *polls = lck->lk.polls;
2372   kmp_uint64 mask = lck->lk.mask; // atomic load
2373   if (polls[ticket & mask] == ticket) {
2374     kmp_uint64 next_ticket = ticket + 1;
2375     if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket,
2376                                        next_ticket)) {
2377       KMP_FSYNC_ACQUIRED(lck);
2378       KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2379                       ticket, lck));
2380       lck->lk.now_serving = ticket; // non-volatile store
2381 
2382       // Since no threads are waiting, there is no possibility that we would
2383       // want to reconfigure the polling area.  We might have the cleanup ticket
2384       // value (which says that it is now safe to deallocate old_polls), but
2385       // we'll let a later thread which calls __kmp_acquire_lock do that - this
2386       // routine isn't supposed to block, and we would risk blocks if we called
2387       // __kmp_free() to do the deallocation.
2388       return TRUE;
2389     }
2390   }
2391   return FALSE;
2392 }
2393 
2394 static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2395                                              kmp_int32 gtid) {
2396   char const *const func = "omp_test_lock";
2397   if (lck->lk.initialized != lck) {
2398     KMP_FATAL(LockIsUninitialized, func);
2399   }
2400   if (__kmp_is_drdpa_lock_nestable(lck)) {
2401     KMP_FATAL(LockNestableUsedAsSimple, func);
2402   }
2403 
2404   int retval = __kmp_test_drdpa_lock(lck, gtid);
2405 
2406   if (retval) {
2407     lck->lk.owner_id = gtid + 1;
2408   }
2409   return retval;
2410 }
2411 
2412 int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2413   // Read the ticket value from the lock data struct, then the polls pointer and
2414   // the mask.  The polls pointer must be read before the mask!!! (See above)
2415   kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load
2416   std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load
2417   kmp_uint64 mask = lck->lk.mask; // atomic load
2418   KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2419                   ticket - 1, lck));
2420   KMP_FSYNC_RELEASING(lck);
2421   polls[ticket & mask] = ticket; // atomic store
2422   return KMP_LOCK_RELEASED;
2423 }
2424 
2425 static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2426                                                 kmp_int32 gtid) {
2427   char const *const func = "omp_unset_lock";
2428   KMP_MB(); /* in case another processor initialized lock */
2429   if (lck->lk.initialized != lck) {
2430     KMP_FATAL(LockIsUninitialized, func);
2431   }
2432   if (__kmp_is_drdpa_lock_nestable(lck)) {
2433     KMP_FATAL(LockNestableUsedAsSimple, func);
2434   }
2435   if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2436     KMP_FATAL(LockUnsettingFree, func);
2437   }
2438   if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) &&
2439       (__kmp_get_drdpa_lock_owner(lck) != gtid)) {
2440     KMP_FATAL(LockUnsettingSetByAnother, func);
2441   }
2442   lck->lk.owner_id = 0;
2443   return __kmp_release_drdpa_lock(lck, gtid);
2444 }
2445 
2446 void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) {
2447   lck->lk.location = NULL;
2448   lck->lk.mask = 0;
2449   lck->lk.num_polls = 1;
2450   lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate(
2451       lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2452   lck->lk.cleanup_ticket = 0;
2453   lck->lk.old_polls = NULL;
2454   lck->lk.next_ticket = 0;
2455   lck->lk.now_serving = 0;
2456   lck->lk.owner_id = 0; // no thread owns the lock.
2457   lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2458   lck->lk.initialized = lck;
2459 
2460   KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2461 }
2462 
2463 void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) {
2464   lck->lk.initialized = NULL;
2465   lck->lk.location = NULL;
2466   if (lck->lk.polls.load() != NULL) {
2467     __kmp_free(lck->lk.polls.load());
2468     lck->lk.polls = NULL;
2469   }
2470   if (lck->lk.old_polls != NULL) {
2471     __kmp_free(lck->lk.old_polls);
2472     lck->lk.old_polls = NULL;
2473   }
2474   lck->lk.mask = 0;
2475   lck->lk.num_polls = 0;
2476   lck->lk.cleanup_ticket = 0;
2477   lck->lk.next_ticket = 0;
2478   lck->lk.now_serving = 0;
2479   lck->lk.owner_id = 0;
2480   lck->lk.depth_locked = -1;
2481 }
2482 
2483 static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2484   char const *const func = "omp_destroy_lock";
2485   if (lck->lk.initialized != lck) {
2486     KMP_FATAL(LockIsUninitialized, func);
2487   }
2488   if (__kmp_is_drdpa_lock_nestable(lck)) {
2489     KMP_FATAL(LockNestableUsedAsSimple, func);
2490   }
2491   if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2492     KMP_FATAL(LockStillOwned, func);
2493   }
2494   __kmp_destroy_drdpa_lock(lck);
2495 }
2496 
2497 // nested drdpa ticket locks
2498 
2499 int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2500   KMP_DEBUG_ASSERT(gtid >= 0);
2501 
2502   if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2503     lck->lk.depth_locked += 1;
2504     return KMP_LOCK_ACQUIRED_NEXT;
2505   } else {
2506     __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2507     KMP_MB();
2508     lck->lk.depth_locked = 1;
2509     KMP_MB();
2510     lck->lk.owner_id = gtid + 1;
2511     return KMP_LOCK_ACQUIRED_FIRST;
2512   }
2513 }
2514 
2515 static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2516                                                         kmp_int32 gtid) {
2517   char const *const func = "omp_set_nest_lock";
2518   if (lck->lk.initialized != lck) {
2519     KMP_FATAL(LockIsUninitialized, func);
2520   }
2521   if (!__kmp_is_drdpa_lock_nestable(lck)) {
2522     KMP_FATAL(LockSimpleUsedAsNestable, func);
2523   }
2524   __kmp_acquire_nested_drdpa_lock(lck, gtid);
2525 }
2526 
2527 int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2528   int retval;
2529 
2530   KMP_DEBUG_ASSERT(gtid >= 0);
2531 
2532   if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2533     retval = ++lck->lk.depth_locked;
2534   } else if (!__kmp_test_drdpa_lock(lck, gtid)) {
2535     retval = 0;
2536   } else {
2537     KMP_MB();
2538     retval = lck->lk.depth_locked = 1;
2539     KMP_MB();
2540     lck->lk.owner_id = gtid + 1;
2541   }
2542   return retval;
2543 }
2544 
2545 static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2546                                                     kmp_int32 gtid) {
2547   char const *const func = "omp_test_nest_lock";
2548   if (lck->lk.initialized != lck) {
2549     KMP_FATAL(LockIsUninitialized, func);
2550   }
2551   if (!__kmp_is_drdpa_lock_nestable(lck)) {
2552     KMP_FATAL(LockSimpleUsedAsNestable, func);
2553   }
2554   return __kmp_test_nested_drdpa_lock(lck, gtid);
2555 }
2556 
2557 int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2558   KMP_DEBUG_ASSERT(gtid >= 0);
2559 
2560   KMP_MB();
2561   if (--(lck->lk.depth_locked) == 0) {
2562     KMP_MB();
2563     lck->lk.owner_id = 0;
2564     __kmp_release_drdpa_lock(lck, gtid);
2565     return KMP_LOCK_RELEASED;
2566   }
2567   return KMP_LOCK_STILL_HELD;
2568 }
2569 
2570 static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2571                                                        kmp_int32 gtid) {
2572   char const *const func = "omp_unset_nest_lock";
2573   KMP_MB(); /* in case another processor initialized lock */
2574   if (lck->lk.initialized != lck) {
2575     KMP_FATAL(LockIsUninitialized, func);
2576   }
2577   if (!__kmp_is_drdpa_lock_nestable(lck)) {
2578     KMP_FATAL(LockSimpleUsedAsNestable, func);
2579   }
2580   if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2581     KMP_FATAL(LockUnsettingFree, func);
2582   }
2583   if (__kmp_get_drdpa_lock_owner(lck) != gtid) {
2584     KMP_FATAL(LockUnsettingSetByAnother, func);
2585   }
2586   return __kmp_release_nested_drdpa_lock(lck, gtid);
2587 }
2588 
2589 void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2590   __kmp_init_drdpa_lock(lck);
2591   lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2592 }
2593 
2594 void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2595   __kmp_destroy_drdpa_lock(lck);
2596   lck->lk.depth_locked = 0;
2597 }
2598 
2599 static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2600   char const *const func = "omp_destroy_nest_lock";
2601   if (lck->lk.initialized != lck) {
2602     KMP_FATAL(LockIsUninitialized, func);
2603   }
2604   if (!__kmp_is_drdpa_lock_nestable(lck)) {
2605     KMP_FATAL(LockSimpleUsedAsNestable, func);
2606   }
2607   if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2608     KMP_FATAL(LockStillOwned, func);
2609   }
2610   __kmp_destroy_nested_drdpa_lock(lck);
2611 }
2612 
2613 // access functions to fields which don't exist for all lock kinds.
2614 
2615 static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) {
2616   return lck->lk.location;
2617 }
2618 
2619 static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck,
2620                                           const ident_t *loc) {
2621   lck->lk.location = loc;
2622 }
2623 
2624 static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) {
2625   return lck->lk.flags;
2626 }
2627 
2628 static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck,
2629                                        kmp_lock_flags_t flags) {
2630   lck->lk.flags = flags;
2631 }
2632 
2633 // Time stamp counter
2634 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2635 #define __kmp_tsc() __kmp_hardware_timestamp()
2636 // Runtime's default backoff parameters
2637 kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100};
2638 #else
2639 // Use nanoseconds for other platforms
2640 extern kmp_uint64 __kmp_now_nsec();
2641 kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100};
2642 #define __kmp_tsc() __kmp_now_nsec()
2643 #endif
2644 
2645 // A useful predicate for dealing with timestamps that may wrap.
2646 // Is a before b? Since the timestamps may wrap, this is asking whether it's
2647 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
2648 // Times where going clockwise is less distance than going anti-clockwise
2649 // are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0),
2650 // then a > b (true) does not mean a reached b; whereas signed(a) = -2,
2651 // signed(b) = 0 captures the actual difference
2652 static inline bool before(kmp_uint64 a, kmp_uint64 b) {
2653   return ((kmp_int64)b - (kmp_int64)a) > 0;
2654 }
2655 
2656 // Truncated binary exponential backoff function
2657 void __kmp_spin_backoff(kmp_backoff_t *boff) {
2658   // We could flatten this loop, but making it a nested loop gives better result
2659   kmp_uint32 i;
2660   for (i = boff->step; i > 0; i--) {
2661     kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
2662     do {
2663       KMP_CPU_PAUSE();
2664     } while (before(__kmp_tsc(), goal));
2665   }
2666   boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1);
2667 }
2668 
2669 #if KMP_USE_DYNAMIC_LOCK
2670 
2671 // Direct lock initializers. It simply writes a tag to the low 8 bits of the
2672 // lock word.
2673 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck,
2674                                    kmp_dyna_lockseq_t seq) {
2675   TCW_4(*lck, KMP_GET_D_TAG(seq));
2676   KA_TRACE(
2677       20,
2678       ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
2679 }
2680 
2681 #if KMP_USE_TSX
2682 
2683 // HLE lock functions - imported from the testbed runtime.
2684 #define HLE_ACQUIRE ".byte 0xf2;"
2685 #define HLE_RELEASE ".byte 0xf3;"
2686 
2687 static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) {
2688   __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory");
2689   return v;
2690 }
2691 
2692 static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); }
2693 
2694 static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) {
2695   TCW_4(*lck, 0);
2696 }
2697 
2698 static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2699   // Use gtid for KMP_LOCK_BUSY if necessary
2700   if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
2701     int delay = 1;
2702     do {
2703       while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
2704         for (int i = delay; i != 0; --i)
2705           KMP_CPU_PAUSE();
2706         delay = ((delay << 1) | 1) & 7;
2707       }
2708     } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
2709   }
2710 }
2711 
2712 static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2713                                                kmp_int32 gtid) {
2714   __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
2715 }
2716 
2717 static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2718   __asm__ volatile(HLE_RELEASE "movl %1,%0"
2719                    : "=m"(*lck)
2720                    : "r"(KMP_LOCK_FREE(hle))
2721                    : "memory");
2722   return KMP_LOCK_RELEASED;
2723 }
2724 
2725 static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2726                                               kmp_int32 gtid) {
2727   return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
2728 }
2729 
2730 static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2731   return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
2732 }
2733 
2734 static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2735                                            kmp_int32 gtid) {
2736   return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
2737 }
2738 
2739 static void __kmp_init_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2740   __kmp_init_queuing_lock(lck);
2741 }
2742 
2743 static void __kmp_destroy_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2744   __kmp_destroy_queuing_lock(lck);
2745 }
2746 
2747 static void
2748 __kmp_destroy_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
2749   __kmp_destroy_queuing_lock_with_checks(lck);
2750 }
2751 
2752 KMP_ATTRIBUTE_TARGET_RTM
2753 static void __kmp_acquire_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2754                                            kmp_int32 gtid) {
2755   unsigned retries = 3, status;
2756   do {
2757     status = _xbegin();
2758     if (status == _XBEGIN_STARTED) {
2759       if (__kmp_is_unlocked_queuing_lock(lck))
2760         return;
2761       _xabort(0xff);
2762     }
2763     if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2764       // Wait until lock becomes free
2765       while (!__kmp_is_unlocked_queuing_lock(lck)) {
2766         KMP_YIELD(TRUE);
2767       }
2768     } else if (!(status & _XABORT_RETRY))
2769       break;
2770   } while (retries--);
2771 
2772   // Fall-back non-speculative lock (xchg)
2773   __kmp_acquire_queuing_lock(lck, gtid);
2774 }
2775 
2776 static void __kmp_acquire_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2777                                                        kmp_int32 gtid) {
2778   __kmp_acquire_rtm_queuing_lock(lck, gtid);
2779 }
2780 
2781 KMP_ATTRIBUTE_TARGET_RTM
2782 static int __kmp_release_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2783                                           kmp_int32 gtid) {
2784   if (__kmp_is_unlocked_queuing_lock(lck)) {
2785     // Releasing from speculation
2786     _xend();
2787   } else {
2788     // Releasing from a real lock
2789     __kmp_release_queuing_lock(lck, gtid);
2790   }
2791   return KMP_LOCK_RELEASED;
2792 }
2793 
2794 static int __kmp_release_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2795                                                       kmp_int32 gtid) {
2796   return __kmp_release_rtm_queuing_lock(lck, gtid);
2797 }
2798 
2799 KMP_ATTRIBUTE_TARGET_RTM
2800 static int __kmp_test_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2801                                        kmp_int32 gtid) {
2802   unsigned retries = 3, status;
2803   do {
2804     status = _xbegin();
2805     if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
2806       return 1;
2807     }
2808     if (!(status & _XABORT_RETRY))
2809       break;
2810   } while (retries--);
2811 
2812   return __kmp_test_queuing_lock(lck, gtid);
2813 }
2814 
2815 static int __kmp_test_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2816                                                    kmp_int32 gtid) {
2817   return __kmp_test_rtm_queuing_lock(lck, gtid);
2818 }
2819 
2820 // Reuse kmp_tas_lock_t for TSX lock which use RTM with fall-back spin lock.
2821 typedef kmp_tas_lock_t kmp_rtm_spin_lock_t;
2822 
2823 static void __kmp_destroy_rtm_spin_lock(kmp_rtm_spin_lock_t *lck) {
2824   KMP_ATOMIC_ST_REL(&lck->lk.poll, 0);
2825 }
2826 
2827 static void __kmp_destroy_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck) {
2828   __kmp_destroy_rtm_spin_lock(lck);
2829 }
2830 
2831 KMP_ATTRIBUTE_TARGET_RTM
2832 static int __kmp_acquire_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2833                                        kmp_int32 gtid) {
2834   unsigned retries = 3, status;
2835   kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2836   kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2837   do {
2838     status = _xbegin();
2839     if (status == _XBEGIN_STARTED) {
2840       if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free)
2841         return KMP_LOCK_ACQUIRED_FIRST;
2842       _xabort(0xff);
2843     }
2844     if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2845       // Wait until lock becomes free
2846       while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free) {
2847         KMP_YIELD(TRUE);
2848       }
2849     } else if (!(status & _XABORT_RETRY))
2850       break;
2851   } while (retries--);
2852 
2853   // Fall-back spin lock
2854   KMP_FSYNC_PREPARE(lck);
2855   kmp_backoff_t backoff = __kmp_spin_backoff_params;
2856   while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free ||
2857          !__kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2858     __kmp_spin_backoff(&backoff);
2859   }
2860   KMP_FSYNC_ACQUIRED(lck);
2861   return KMP_LOCK_ACQUIRED_FIRST;
2862 }
2863 
2864 static int __kmp_acquire_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2865                                                    kmp_int32 gtid) {
2866   return __kmp_acquire_rtm_spin_lock(lck, gtid);
2867 }
2868 
2869 KMP_ATTRIBUTE_TARGET_RTM
2870 static int __kmp_release_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2871                                        kmp_int32 gtid) {
2872   if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == KMP_LOCK_FREE(rtm_spin)) {
2873     // Releasing from speculation
2874     _xend();
2875   } else {
2876     // Releasing from a real lock
2877     KMP_FSYNC_RELEASING(lck);
2878     KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(rtm_spin));
2879   }
2880   return KMP_LOCK_RELEASED;
2881 }
2882 
2883 static int __kmp_release_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2884                                                    kmp_int32 gtid) {
2885   return __kmp_release_rtm_spin_lock(lck, gtid);
2886 }
2887 
2888 KMP_ATTRIBUTE_TARGET_RTM
2889 static int __kmp_test_rtm_spin_lock(kmp_rtm_spin_lock_t *lck, kmp_int32 gtid) {
2890   unsigned retries = 3, status;
2891   kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2892   kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2893   do {
2894     status = _xbegin();
2895     if (status == _XBEGIN_STARTED &&
2896         KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free) {
2897       return TRUE;
2898     }
2899     if (!(status & _XABORT_RETRY))
2900       break;
2901   } while (retries--);
2902 
2903   if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free &&
2904       __kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2905     KMP_FSYNC_ACQUIRED(lck);
2906     return TRUE;
2907   }
2908   return FALSE;
2909 }
2910 
2911 static int __kmp_test_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2912                                                 kmp_int32 gtid) {
2913   return __kmp_test_rtm_spin_lock(lck, gtid);
2914 }
2915 
2916 #endif // KMP_USE_TSX
2917 
2918 // Entry functions for indirect locks (first element of direct lock jump tables)
2919 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,
2920                                      kmp_dyna_lockseq_t tag);
2921 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);
2922 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2923 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2924 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2925 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2926                                                kmp_int32);
2927 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2928                                                  kmp_int32);
2929 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2930                                                 kmp_int32);
2931 
2932 // Lock function definitions for the union parameter type
2933 #define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a)
2934 
2935 #define expand1(lk, op)                                                        \
2936   static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) {               \
2937     __kmp_##op##_##lk##_##lock(&lock->lk);                                     \
2938   }
2939 #define expand2(lk, op)                                                        \
2940   static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock,                  \
2941                                         kmp_int32 gtid) {                      \
2942     return __kmp_##op##_##lk##_##lock(&lock->lk, gtid);                        \
2943   }
2944 #define expand3(lk, op)                                                        \
2945   static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock,              \
2946                                             kmp_lock_flags_t flags) {          \
2947     __kmp_set_##lk##_lock_flags(&lock->lk, flags);                             \
2948   }
2949 #define expand4(lk, op)                                                        \
2950   static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock,           \
2951                                                const ident_t *loc) {           \
2952     __kmp_set_##lk##_lock_location(&lock->lk, loc);                            \
2953   }
2954 
2955 KMP_FOREACH_LOCK_KIND(expand1, init)
2956 KMP_FOREACH_LOCK_KIND(expand1, init_nested)
2957 KMP_FOREACH_LOCK_KIND(expand1, destroy)
2958 KMP_FOREACH_LOCK_KIND(expand1, destroy_nested)
2959 KMP_FOREACH_LOCK_KIND(expand2, acquire)
2960 KMP_FOREACH_LOCK_KIND(expand2, acquire_nested)
2961 KMP_FOREACH_LOCK_KIND(expand2, release)
2962 KMP_FOREACH_LOCK_KIND(expand2, release_nested)
2963 KMP_FOREACH_LOCK_KIND(expand2, test)
2964 KMP_FOREACH_LOCK_KIND(expand2, test_nested)
2965 KMP_FOREACH_LOCK_KIND(expand3, )
2966 KMP_FOREACH_LOCK_KIND(expand4, )
2967 
2968 #undef expand1
2969 #undef expand2
2970 #undef expand3
2971 #undef expand4
2972 
2973 // Jump tables for the indirect lock functions
2974 // Only fill in the odd entries, that avoids the need to shift out the low bit
2975 
2976 // init functions
2977 #define expand(l, op) 0, __kmp_init_direct_lock,
2978 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = {
2979     __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)};
2980 #undef expand
2981 
2982 // destroy functions
2983 #define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
2984 static void (*direct_destroy[])(kmp_dyna_lock_t *) = {
2985     __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2986 #undef expand
2987 #define expand(l, op)                                                          \
2988   0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks,
2989 static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = {
2990     __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2991 #undef expand
2992 
2993 // set/acquire functions
2994 #define expand(l, op)                                                          \
2995   0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
2996 static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
2997     __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)};
2998 #undef expand
2999 #define expand(l, op)                                                          \
3000   0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3001 static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3002     __kmp_set_indirect_lock_with_checks, 0,
3003     KMP_FOREACH_D_LOCK(expand, acquire)};
3004 #undef expand
3005 
3006 // unset/release and test functions
3007 #define expand(l, op)                                                          \
3008   0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3009 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = {
3010     __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)};
3011 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = {
3012     __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)};
3013 #undef expand
3014 #define expand(l, op)                                                          \
3015   0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3016 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3017     __kmp_unset_indirect_lock_with_checks, 0,
3018     KMP_FOREACH_D_LOCK(expand, release)};
3019 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3020     __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)};
3021 #undef expand
3022 
3023 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3024 void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0;
3025 int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0;
3026 int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0;
3027 int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0;
3028 
3029 // Jump tables for the indirect lock functions
3030 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3031 void (*__kmp_indirect_init[])(kmp_user_lock_p) = {
3032     KMP_FOREACH_I_LOCK(expand, init)};
3033 #undef expand
3034 
3035 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3036 static void (*indirect_destroy[])(kmp_user_lock_p) = {
3037     KMP_FOREACH_I_LOCK(expand, destroy)};
3038 #undef expand
3039 #define expand(l, op)                                                          \
3040   (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks,
3041 static void (*indirect_destroy_check[])(kmp_user_lock_p) = {
3042     KMP_FOREACH_I_LOCK(expand, destroy)};
3043 #undef expand
3044 
3045 // set/acquire functions
3046 #define expand(l, op)                                                          \
3047   (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3048 static int (*indirect_set[])(kmp_user_lock_p,
3049                              kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)};
3050 #undef expand
3051 #define expand(l, op)                                                          \
3052   (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3053 static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
3054     KMP_FOREACH_I_LOCK(expand, acquire)};
3055 #undef expand
3056 
3057 // unset/release and test functions
3058 #define expand(l, op)                                                          \
3059   (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3060 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = {
3061     KMP_FOREACH_I_LOCK(expand, release)};
3062 static int (*indirect_test[])(kmp_user_lock_p,
3063                               kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)};
3064 #undef expand
3065 #define expand(l, op)                                                          \
3066   (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3067 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = {
3068     KMP_FOREACH_I_LOCK(expand, release)};
3069 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
3070     KMP_FOREACH_I_LOCK(expand, test)};
3071 #undef expand
3072 
3073 // Exposes only one jump tables (*lock or *lock_with_checks).
3074 void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0;
3075 int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0;
3076 int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0;
3077 int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0;
3078 
3079 // Lock index table.
3080 kmp_indirect_lock_table_t __kmp_i_lock_table;
3081 
3082 // Size of indirect locks.
3083 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0};
3084 
3085 // Jump tables for lock accessor/modifier.
3086 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3087                                                      const ident_t *) = {0};
3088 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3089                                                   kmp_lock_flags_t) = {0};
3090 const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(
3091     kmp_user_lock_p) = {0};
3092 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
3093     kmp_user_lock_p) = {0};
3094 
3095 // Use different lock pools for different lock types.
3096 static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0};
3097 
3098 // User lock allocator for dynamically dispatched indirect locks. Every entry of
3099 // the indirect lock table holds the address and type of the allocated indirect
3100 // lock (kmp_indirect_lock_t), and the size of the table doubles when it is
3101 // full. A destroyed indirect lock object is returned to the reusable pool of
3102 // locks, unique to each lock type.
3103 kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
3104                                                   kmp_int32 gtid,
3105                                                   kmp_indirect_locktag_t tag) {
3106   kmp_indirect_lock_t *lck;
3107   kmp_lock_index_t idx, table_idx;
3108 
3109   __kmp_acquire_lock(&__kmp_global_lock, gtid);
3110 
3111   if (__kmp_indirect_lock_pool[tag] != NULL) {
3112     // Reuse the allocated and destroyed lock object
3113     lck = __kmp_indirect_lock_pool[tag];
3114     if (OMP_LOCK_T_SIZE < sizeof(void *))
3115       idx = lck->lock->pool.index;
3116     __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3117     KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",
3118                   lck));
3119   } else {
3120     kmp_uint32 row, col;
3121     kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table;
3122     idx = 0;
3123     // Find location in list of lock tables to put new lock
3124     while (1) {
3125       table_idx = lock_table->next; // index within this table
3126       idx += lock_table->next; // global index within list of tables
3127       if (table_idx < lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK) {
3128         row = table_idx / KMP_I_LOCK_CHUNK;
3129         col = table_idx % KMP_I_LOCK_CHUNK;
3130         // Allocate a new row of locks if necessary
3131         if (!lock_table->table[row]) {
3132           lock_table->table[row] = (kmp_indirect_lock_t *)__kmp_allocate(
3133               sizeof(kmp_indirect_lock_t) * KMP_I_LOCK_CHUNK);
3134         }
3135         break;
3136       }
3137       // Allocate a new lock table if necessary with double the capacity
3138       if (!lock_table->next_table) {
3139         kmp_indirect_lock_table_t *next_table =
3140             (kmp_indirect_lock_table_t *)__kmp_allocate(
3141                 sizeof(kmp_indirect_lock_table_t));
3142         next_table->table = (kmp_indirect_lock_t **)__kmp_allocate(
3143             sizeof(kmp_indirect_lock_t *) * 2 * lock_table->nrow_ptrs);
3144         next_table->nrow_ptrs = 2 * lock_table->nrow_ptrs;
3145         next_table->next = 0;
3146         next_table->next_table = nullptr;
3147         lock_table->next_table = next_table;
3148       }
3149       lock_table = lock_table->next_table;
3150       KMP_ASSERT(lock_table);
3151     }
3152     lock_table->next++;
3153 
3154     lck = &lock_table->table[row][col];
3155     // Allocate a new base lock object
3156     lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3157     KA_TRACE(20,
3158              ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3159   }
3160 
3161   __kmp_release_lock(&__kmp_global_lock, gtid);
3162 
3163   lck->type = tag;
3164 
3165   if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3166     *((kmp_lock_index_t *)user_lock) = idx
3167                                        << 1; // indirect lock word must be even
3168   } else {
3169     *((kmp_indirect_lock_t **)user_lock) = lck;
3170   }
3171 
3172   return lck;
3173 }
3174 
3175 // User lock lookup for dynamically dispatched locks.
3176 static __forceinline kmp_indirect_lock_t *
3177 __kmp_lookup_indirect_lock(void **user_lock, const char *func) {
3178   if (__kmp_env_consistency_check) {
3179     kmp_indirect_lock_t *lck = NULL;
3180     if (user_lock == NULL) {
3181       KMP_FATAL(LockIsUninitialized, func);
3182     }
3183     if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3184       kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3185       lck = __kmp_get_i_lock(idx);
3186     } else {
3187       lck = *((kmp_indirect_lock_t **)user_lock);
3188     }
3189     if (lck == NULL) {
3190       KMP_FATAL(LockIsUninitialized, func);
3191     }
3192     return lck;
3193   } else {
3194     if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3195       return __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(user_lock));
3196     } else {
3197       return *((kmp_indirect_lock_t **)user_lock);
3198     }
3199   }
3200 }
3201 
3202 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,
3203                                      kmp_dyna_lockseq_t seq) {
3204 #if KMP_USE_ADAPTIVE_LOCKS
3205   if (seq == lockseq_adaptive && !__kmp_cpuinfo.flags.rtm) {
3206     KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3207     seq = lockseq_queuing;
3208   }
3209 #endif
3210 #if KMP_USE_TSX
3211   if (seq == lockseq_rtm_queuing && !__kmp_cpuinfo.flags.rtm) {
3212     seq = lockseq_queuing;
3213   }
3214 #endif
3215   kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3216   kmp_indirect_lock_t *l =
3217       __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3218   KMP_I_LOCK_FUNC(l, init)(l->lock);
3219   KA_TRACE(
3220       20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n",
3221            seq));
3222 }
3223 
3224 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {
3225   kmp_uint32 gtid = __kmp_entry_gtid();
3226   kmp_indirect_lock_t *l =
3227       __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3228   KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3229   kmp_indirect_locktag_t tag = l->type;
3230 
3231   __kmp_acquire_lock(&__kmp_global_lock, gtid);
3232 
3233   // Use the base lock's space to keep the pool chain.
3234   l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3235   if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3236     l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3237   }
3238   __kmp_indirect_lock_pool[tag] = l;
3239 
3240   __kmp_release_lock(&__kmp_global_lock, gtid);
3241 }
3242 
3243 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3244   kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3245   return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3246 }
3247 
3248 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3249   kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3250   return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3251 }
3252 
3253 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3254   kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3255   return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3256 }
3257 
3258 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3259                                                kmp_int32 gtid) {
3260   kmp_indirect_lock_t *l =
3261       __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3262   return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3263 }
3264 
3265 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3266                                                  kmp_int32 gtid) {
3267   kmp_indirect_lock_t *l =
3268       __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3269   return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3270 }
3271 
3272 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3273                                                 kmp_int32 gtid) {
3274   kmp_indirect_lock_t *l =
3275       __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3276   return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3277 }
3278 
3279 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3280 
3281 // This is used only in kmp_error.cpp when consistency checking is on.
3282 kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) {
3283   switch (seq) {
3284   case lockseq_tas:
3285   case lockseq_nested_tas:
3286     return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3287 #if KMP_USE_FUTEX
3288   case lockseq_futex:
3289   case lockseq_nested_futex:
3290     return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3291 #endif
3292   case lockseq_ticket:
3293   case lockseq_nested_ticket:
3294     return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3295   case lockseq_queuing:
3296   case lockseq_nested_queuing:
3297 #if KMP_USE_ADAPTIVE_LOCKS
3298   case lockseq_adaptive:
3299 #endif
3300     return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3301   case lockseq_drdpa:
3302   case lockseq_nested_drdpa:
3303     return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3304   default:
3305     return 0;
3306   }
3307 }
3308 
3309 // Initializes data for dynamic user locks.
3310 void __kmp_init_dynamic_user_locks() {
3311   // Initialize jump table for the lock functions
3312   if (__kmp_env_consistency_check) {
3313     __kmp_direct_set = direct_set_check;
3314     __kmp_direct_unset = direct_unset_check;
3315     __kmp_direct_test = direct_test_check;
3316     __kmp_direct_destroy = direct_destroy_check;
3317     __kmp_indirect_set = indirect_set_check;
3318     __kmp_indirect_unset = indirect_unset_check;
3319     __kmp_indirect_test = indirect_test_check;
3320     __kmp_indirect_destroy = indirect_destroy_check;
3321   } else {
3322     __kmp_direct_set = direct_set;
3323     __kmp_direct_unset = direct_unset;
3324     __kmp_direct_test = direct_test;
3325     __kmp_direct_destroy = direct_destroy;
3326     __kmp_indirect_set = indirect_set;
3327     __kmp_indirect_unset = indirect_unset;
3328     __kmp_indirect_test = indirect_test;
3329     __kmp_indirect_destroy = indirect_destroy;
3330   }
3331   // If the user locks have already been initialized, then return. Allow the
3332   // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate
3333   // new lock tables if they have already been allocated.
3334   if (__kmp_init_user_locks)
3335     return;
3336 
3337   // Initialize lock index table
3338   __kmp_i_lock_table.nrow_ptrs = KMP_I_LOCK_TABLE_INIT_NROW_PTRS;
3339   __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(
3340       sizeof(kmp_indirect_lock_t *) * KMP_I_LOCK_TABLE_INIT_NROW_PTRS);
3341   *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate(
3342       KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3343   __kmp_i_lock_table.next = 0;
3344   __kmp_i_lock_table.next_table = nullptr;
3345 
3346   // Indirect lock size
3347   __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3348   __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3349 #if KMP_USE_ADAPTIVE_LOCKS
3350   __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3351 #endif
3352   __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3353 #if KMP_USE_TSX
3354   __kmp_indirect_lock_size[locktag_rtm_queuing] = sizeof(kmp_queuing_lock_t);
3355 #endif
3356   __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3357 #if KMP_USE_FUTEX
3358   __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3359 #endif
3360   __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3361   __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3362   __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
3363 
3364 // Initialize lock accessor/modifier
3365 #define fill_jumps(table, expand, sep)                                         \
3366   {                                                                            \
3367     table[locktag##sep##ticket] = expand(ticket);                              \
3368     table[locktag##sep##queuing] = expand(queuing);                            \
3369     table[locktag##sep##drdpa] = expand(drdpa);                                \
3370   }
3371 
3372 #if KMP_USE_ADAPTIVE_LOCKS
3373 #define fill_table(table, expand)                                              \
3374   {                                                                            \
3375     fill_jumps(table, expand, _);                                              \
3376     table[locktag_adaptive] = expand(queuing);                                 \
3377     fill_jumps(table, expand, _nested_);                                       \
3378   }
3379 #else
3380 #define fill_table(table, expand)                                              \
3381   {                                                                            \
3382     fill_jumps(table, expand, _);                                              \
3383     fill_jumps(table, expand, _nested_);                                       \
3384   }
3385 #endif // KMP_USE_ADAPTIVE_LOCKS
3386 
3387 #define expand(l)                                                              \
3388   (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location
3389   fill_table(__kmp_indirect_set_location, expand);
3390 #undef expand
3391 #define expand(l)                                                              \
3392   (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags
3393   fill_table(__kmp_indirect_set_flags, expand);
3394 #undef expand
3395 #define expand(l)                                                              \
3396   (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location
3397   fill_table(__kmp_indirect_get_location, expand);
3398 #undef expand
3399 #define expand(l)                                                              \
3400   (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags
3401   fill_table(__kmp_indirect_get_flags, expand);
3402 #undef expand
3403 
3404   __kmp_init_user_locks = TRUE;
3405 }
3406 
3407 // Clean up the lock table.
3408 void __kmp_cleanup_indirect_user_locks() {
3409   int k;
3410 
3411   // Clean up locks in the pools first (they were already destroyed before going
3412   // into the pools).
3413   for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3414     kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3415     while (l != NULL) {
3416       kmp_indirect_lock_t *ll = l;
3417       l = (kmp_indirect_lock_t *)l->lock->pool.next;
3418       KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n",
3419                     ll));
3420       __kmp_free(ll->lock);
3421       ll->lock = NULL;
3422     }
3423     __kmp_indirect_lock_pool[k] = NULL;
3424   }
3425   // Clean up the remaining undestroyed locks.
3426   kmp_indirect_lock_table_t *ptr = &__kmp_i_lock_table;
3427   while (ptr) {
3428     for (kmp_uint32 row = 0; row < ptr->nrow_ptrs; ++row) {
3429       if (!ptr->table[row])
3430         continue;
3431       for (kmp_uint32 col = 0; col < KMP_I_LOCK_CHUNK; ++col) {
3432         kmp_indirect_lock_t *l = &ptr->table[row][col];
3433         if (l->lock) {
3434           // Locks not destroyed explicitly need to be destroyed here.
3435           KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3436           KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p "
3437                         "from table\n",
3438                         l));
3439           __kmp_free(l->lock);
3440         }
3441       }
3442       __kmp_free(ptr->table[row]);
3443     }
3444     kmp_indirect_lock_table_t *next_table = ptr->next_table;
3445     if (ptr != &__kmp_i_lock_table)
3446       __kmp_free(ptr);
3447     ptr = next_table;
3448   }
3449 
3450   __kmp_init_user_locks = FALSE;
3451 }
3452 
3453 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3454 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3455 
3456 #else // KMP_USE_DYNAMIC_LOCK
3457 
3458 static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3459   __kmp_init_tas_lock(lck);
3460 }
3461 
3462 static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3463   __kmp_init_nested_tas_lock(lck);
3464 }
3465 
3466 #if KMP_USE_FUTEX
3467 static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3468   __kmp_init_futex_lock(lck);
3469 }
3470 
3471 static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3472   __kmp_init_nested_futex_lock(lck);
3473 }
3474 #endif
3475 
3476 static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) {
3477   return lck == lck->lk.self;
3478 }
3479 
3480 static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3481   __kmp_init_ticket_lock(lck);
3482 }
3483 
3484 static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3485   __kmp_init_nested_ticket_lock(lck);
3486 }
3487 
3488 static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) {
3489   return lck == lck->lk.initialized;
3490 }
3491 
3492 static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3493   __kmp_init_queuing_lock(lck);
3494 }
3495 
3496 static void
3497 __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3498   __kmp_init_nested_queuing_lock(lck);
3499 }
3500 
3501 #if KMP_USE_ADAPTIVE_LOCKS
3502 static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
3503   __kmp_init_adaptive_lock(lck);
3504 }
3505 #endif
3506 
3507 static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) {
3508   return lck == lck->lk.initialized;
3509 }
3510 
3511 static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3512   __kmp_init_drdpa_lock(lck);
3513 }
3514 
3515 static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3516   __kmp_init_nested_drdpa_lock(lck);
3517 }
3518 
3519 /* user locks
3520  * They are implemented as a table of function pointers which are set to the
3521  * lock functions of the appropriate kind, once that has been determined. */
3522 
3523 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3524 
3525 size_t __kmp_base_user_lock_size = 0;
3526 size_t __kmp_user_lock_size = 0;
3527 
3528 kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL;
3529 int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
3530                                             kmp_int32 gtid) = NULL;
3531 
3532 int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,
3533                                          kmp_int32 gtid) = NULL;
3534 int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,
3535                                             kmp_int32 gtid) = NULL;
3536 void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3537 void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL;
3538 void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3539 int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3540                                                    kmp_int32 gtid) = NULL;
3541 
3542 int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3543                                                 kmp_int32 gtid) = NULL;
3544 int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3545                                                    kmp_int32 gtid) = NULL;
3546 void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3547 void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3548 
3549 int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL;
3550 const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL;
3551 void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,
3552                                       const ident_t *loc) = NULL;
3553 kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL;
3554 void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,
3555                                    kmp_lock_flags_t flags) = NULL;
3556 
3557 void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) {
3558   switch (user_lock_kind) {
3559   case lk_default:
3560   default:
3561     KMP_ASSERT(0);
3562 
3563   case lk_tas: {
3564     __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t);
3565     __kmp_user_lock_size = sizeof(kmp_tas_lock_t);
3566 
3567     __kmp_get_user_lock_owner_ =
3568         (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner);
3569 
3570     if (__kmp_env_consistency_check) {
3571       KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3572       KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3573     } else {
3574       KMP_BIND_USER_LOCK(tas);
3575       KMP_BIND_NESTED_USER_LOCK(tas);
3576     }
3577 
3578     __kmp_destroy_user_lock_ =
3579         (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock);
3580 
3581     __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3582 
3583     __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3584 
3585     __kmp_set_user_lock_location_ =
3586         (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3587 
3588     __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3589 
3590     __kmp_set_user_lock_flags_ =
3591         (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3592   } break;
3593 
3594 #if KMP_USE_FUTEX
3595 
3596   case lk_futex: {
3597     __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t);
3598     __kmp_user_lock_size = sizeof(kmp_futex_lock_t);
3599 
3600     __kmp_get_user_lock_owner_ =
3601         (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner);
3602 
3603     if (__kmp_env_consistency_check) {
3604       KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3605       KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3606     } else {
3607       KMP_BIND_USER_LOCK(futex);
3608       KMP_BIND_NESTED_USER_LOCK(futex);
3609     }
3610 
3611     __kmp_destroy_user_lock_ =
3612         (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock);
3613 
3614     __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3615 
3616     __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3617 
3618     __kmp_set_user_lock_location_ =
3619         (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3620 
3621     __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3622 
3623     __kmp_set_user_lock_flags_ =
3624         (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3625   } break;
3626 
3627 #endif // KMP_USE_FUTEX
3628 
3629   case lk_ticket: {
3630     __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t);
3631     __kmp_user_lock_size = sizeof(kmp_ticket_lock_t);
3632 
3633     __kmp_get_user_lock_owner_ =
3634         (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner);
3635 
3636     if (__kmp_env_consistency_check) {
3637       KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3638       KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3639     } else {
3640       KMP_BIND_USER_LOCK(ticket);
3641       KMP_BIND_NESTED_USER_LOCK(ticket);
3642     }
3643 
3644     __kmp_destroy_user_lock_ =
3645         (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock);
3646 
3647     __kmp_is_user_lock_initialized_ =
3648         (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized);
3649 
3650     __kmp_get_user_lock_location_ =
3651         (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location);
3652 
3653     __kmp_set_user_lock_location_ = (void (*)(
3654         kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location);
3655 
3656     __kmp_get_user_lock_flags_ =
3657         (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags);
3658 
3659     __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3660         &__kmp_set_ticket_lock_flags);
3661   } break;
3662 
3663   case lk_queuing: {
3664     __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t);
3665     __kmp_user_lock_size = sizeof(kmp_queuing_lock_t);
3666 
3667     __kmp_get_user_lock_owner_ =
3668         (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3669 
3670     if (__kmp_env_consistency_check) {
3671       KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3672       KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3673     } else {
3674       KMP_BIND_USER_LOCK(queuing);
3675       KMP_BIND_NESTED_USER_LOCK(queuing);
3676     }
3677 
3678     __kmp_destroy_user_lock_ =
3679         (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock);
3680 
3681     __kmp_is_user_lock_initialized_ =
3682         (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3683 
3684     __kmp_get_user_lock_location_ =
3685         (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3686 
3687     __kmp_set_user_lock_location_ = (void (*)(
3688         kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3689 
3690     __kmp_get_user_lock_flags_ =
3691         (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3692 
3693     __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3694         &__kmp_set_queuing_lock_flags);
3695   } break;
3696 
3697 #if KMP_USE_ADAPTIVE_LOCKS
3698   case lk_adaptive: {
3699     __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t);
3700     __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t);
3701 
3702     __kmp_get_user_lock_owner_ =
3703         (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3704 
3705     if (__kmp_env_consistency_check) {
3706       KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3707     } else {
3708       KMP_BIND_USER_LOCK(adaptive);
3709     }
3710 
3711     __kmp_destroy_user_lock_ =
3712         (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock);
3713 
3714     __kmp_is_user_lock_initialized_ =
3715         (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3716 
3717     __kmp_get_user_lock_location_ =
3718         (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3719 
3720     __kmp_set_user_lock_location_ = (void (*)(
3721         kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3722 
3723     __kmp_get_user_lock_flags_ =
3724         (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3725 
3726     __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3727         &__kmp_set_queuing_lock_flags);
3728 
3729   } break;
3730 #endif // KMP_USE_ADAPTIVE_LOCKS
3731 
3732   case lk_drdpa: {
3733     __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t);
3734     __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t);
3735 
3736     __kmp_get_user_lock_owner_ =
3737         (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner);
3738 
3739     if (__kmp_env_consistency_check) {
3740       KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3741       KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3742     } else {
3743       KMP_BIND_USER_LOCK(drdpa);
3744       KMP_BIND_NESTED_USER_LOCK(drdpa);
3745     }
3746 
3747     __kmp_destroy_user_lock_ =
3748         (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock);
3749 
3750     __kmp_is_user_lock_initialized_ =
3751         (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized);
3752 
3753     __kmp_get_user_lock_location_ =
3754         (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location);
3755 
3756     __kmp_set_user_lock_location_ = (void (*)(
3757         kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location);
3758 
3759     __kmp_get_user_lock_flags_ =
3760         (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags);
3761 
3762     __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3763         &__kmp_set_drdpa_lock_flags);
3764   } break;
3765   }
3766 }
3767 
3768 // ----------------------------------------------------------------------------
3769 // User lock table & lock allocation
3770 
3771 kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL};
3772 kmp_user_lock_p __kmp_lock_pool = NULL;
3773 
3774 // Lock block-allocation support.
3775 kmp_block_of_locks *__kmp_lock_blocks = NULL;
3776 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3777 
3778 static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) {
3779   // Assume that kmp_global_lock is held upon entry/exit.
3780   kmp_lock_index_t index;
3781   if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) {
3782     kmp_lock_index_t size;
3783     kmp_user_lock_p *table;
3784     // Reallocate lock table.
3785     if (__kmp_user_lock_table.allocated == 0) {
3786       size = 1024;
3787     } else {
3788       size = __kmp_user_lock_table.allocated * 2;
3789     }
3790     table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size);
3791     KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1,
3792                sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1));
3793     table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3794     // We cannot free the previous table now, since it may be in use by other
3795     // threads. So save the pointer to the previous table in in the first
3796     // element of the new table. All the tables will be organized into a list,
3797     // and could be freed when library shutting down.
3798     __kmp_user_lock_table.table = table;
3799     __kmp_user_lock_table.allocated = size;
3800   }
3801   KMP_DEBUG_ASSERT(__kmp_user_lock_table.used <
3802                    __kmp_user_lock_table.allocated);
3803   index = __kmp_user_lock_table.used;
3804   __kmp_user_lock_table.table[index] = lck;
3805   ++__kmp_user_lock_table.used;
3806   return index;
3807 }
3808 
3809 static kmp_user_lock_p __kmp_lock_block_allocate() {
3810   // Assume that kmp_global_lock is held upon entry/exit.
3811   static int last_index = 0;
3812   if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) {
3813     // Restart the index.
3814     last_index = 0;
3815     // Need to allocate a new block.
3816     KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3817     size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3818     char *buffer =
3819         (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks));
3820     // Set up the new block.
3821     kmp_block_of_locks *new_block =
3822         (kmp_block_of_locks *)(&buffer[space_for_locks]);
3823     new_block->next_block = __kmp_lock_blocks;
3824     new_block->locks = (void *)buffer;
3825     // Publish the new block.
3826     KMP_MB();
3827     __kmp_lock_blocks = new_block;
3828   }
3829   kmp_user_lock_p ret = (kmp_user_lock_p)(&(
3830       ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size]));
3831   last_index++;
3832   return ret;
3833 }
3834 
3835 // Get memory for a lock. It may be freshly allocated memory or reused memory
3836 // from lock pool.
3837 kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid,
3838                                          kmp_lock_flags_t flags) {
3839   kmp_user_lock_p lck;
3840   kmp_lock_index_t index;
3841   KMP_DEBUG_ASSERT(user_lock);
3842 
3843   __kmp_acquire_lock(&__kmp_global_lock, gtid);
3844 
3845   if (__kmp_lock_pool == NULL) {
3846     // Lock pool is empty. Allocate new memory.
3847 
3848     if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point.
3849       lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size);
3850     } else {
3851       lck = __kmp_lock_block_allocate();
3852     }
3853 
3854     // Insert lock in the table so that it can be freed in __kmp_cleanup,
3855     // and debugger has info on all allocated locks.
3856     index = __kmp_lock_table_insert(lck);
3857   } else {
3858     // Pick up lock from pool.
3859     lck = __kmp_lock_pool;
3860     index = __kmp_lock_pool->pool.index;
3861     __kmp_lock_pool = __kmp_lock_pool->pool.next;
3862   }
3863 
3864   // We could potentially differentiate between nested and regular locks
3865   // here, and do the lock table lookup for regular locks only.
3866   if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3867     *((kmp_lock_index_t *)user_lock) = index;
3868   } else {
3869     *((kmp_user_lock_p *)user_lock) = lck;
3870   }
3871 
3872   // mark the lock if it is critical section lock.
3873   __kmp_set_user_lock_flags(lck, flags);
3874 
3875   __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper
3876 
3877   return lck;
3878 }
3879 
3880 // Put lock's memory to pool for reusing.
3881 void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
3882                           kmp_user_lock_p lck) {
3883   KMP_DEBUG_ASSERT(user_lock != NULL);
3884   KMP_DEBUG_ASSERT(lck != NULL);
3885 
3886   __kmp_acquire_lock(&__kmp_global_lock, gtid);
3887 
3888   lck->pool.next = __kmp_lock_pool;
3889   __kmp_lock_pool = lck;
3890   if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3891     kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3892     KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used);
3893     lck->pool.index = index;
3894   }
3895 
3896   __kmp_release_lock(&__kmp_global_lock, gtid);
3897 }
3898 
3899 kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) {
3900   kmp_user_lock_p lck = NULL;
3901 
3902   if (__kmp_env_consistency_check) {
3903     if (user_lock == NULL) {
3904       KMP_FATAL(LockIsUninitialized, func);
3905     }
3906   }
3907 
3908   if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3909     kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3910     if (__kmp_env_consistency_check) {
3911       if (!(0 < index && index < __kmp_user_lock_table.used)) {
3912         KMP_FATAL(LockIsUninitialized, func);
3913       }
3914     }
3915     KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used);
3916     KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3917     lck = __kmp_user_lock_table.table[index];
3918   } else {
3919     lck = *((kmp_user_lock_p *)user_lock);
3920   }
3921 
3922   if (__kmp_env_consistency_check) {
3923     if (lck == NULL) {
3924       KMP_FATAL(LockIsUninitialized, func);
3925     }
3926   }
3927 
3928   return lck;
3929 }
3930 
3931 void __kmp_cleanup_user_locks(void) {
3932   // Reset lock pool. Don't worry about lock in the pool--we will free them when
3933   // iterating through lock table (it includes all the locks, dead or alive).
3934   __kmp_lock_pool = NULL;
3935 
3936 #define IS_CRITICAL(lck)                                                       \
3937   ((__kmp_get_user_lock_flags_ != NULL) &&                                     \
3938    ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section))
3939 
3940   // Loop through lock table, free all locks.
3941   // Do not free item [0], it is reserved for lock tables list.
3942   //
3943   // FIXME - we are iterating through a list of (pointers to) objects of type
3944   // union kmp_user_lock, but we have no way of knowing whether the base type is
3945   // currently "pool" or whatever the global user lock type is.
3946   //
3947   // We are relying on the fact that for all of the user lock types
3948   // (except "tas"), the first field in the lock struct is the "initialized"
3949   // field, which is set to the address of the lock object itself when
3950   // the lock is initialized.  When the union is of type "pool", the
3951   // first field is a pointer to the next object in the free list, which
3952   // will not be the same address as the object itself.
3953   //
3954   // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail
3955   // for "pool" objects on the free list.  This must happen as the "location"
3956   // field of real user locks overlaps the "index" field of "pool" objects.
3957   //
3958   // It would be better to run through the free list, and remove all "pool"
3959   // objects from the lock table before executing this loop.  However,
3960   // "pool" objects do not always have their index field set (only on
3961   // lin_32e), and I don't want to search the lock table for the address
3962   // of every "pool" object on the free list.
3963   while (__kmp_user_lock_table.used > 1) {
3964     const ident *loc;
3965 
3966     // reduce __kmp_user_lock_table.used before freeing the lock,
3967     // so that state of locks is consistent
3968     kmp_user_lock_p lck =
3969         __kmp_user_lock_table.table[--__kmp_user_lock_table.used];
3970 
3971     if ((__kmp_is_user_lock_initialized_ != NULL) &&
3972         (*__kmp_is_user_lock_initialized_)(lck)) {
3973       // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND
3974       // it is NOT a critical section (user is not responsible for destroying
3975       // criticals) AND we know source location to report.
3976       if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) &&
3977           ((loc = __kmp_get_user_lock_location(lck)) != NULL) &&
3978           (loc->psource != NULL)) {
3979         kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
3980         KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line);
3981         __kmp_str_loc_free(&str_loc);
3982       }
3983 
3984 #ifdef KMP_DEBUG
3985       if (IS_CRITICAL(lck)) {
3986         KA_TRACE(
3987             20,
3988             ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n",
3989              lck, *(void **)lck));
3990       } else {
3991         KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck,
3992                       *(void **)lck));
3993       }
3994 #endif // KMP_DEBUG
3995 
3996       // Cleanup internal lock dynamic resources (for drdpa locks particularly).
3997       __kmp_destroy_user_lock(lck);
3998     }
3999 
4000     // Free the lock if block allocation of locks is not used.
4001     if (__kmp_lock_blocks == NULL) {
4002       __kmp_free(lck);
4003     }
4004   }
4005 
4006 #undef IS_CRITICAL
4007 
4008   // delete lock table(s).
4009   kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4010   __kmp_user_lock_table.table = NULL;
4011   __kmp_user_lock_table.allocated = 0;
4012 
4013   while (table_ptr != NULL) {
4014     // In the first element we saved the pointer to the previous
4015     // (smaller) lock table.
4016     kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]);
4017     __kmp_free(table_ptr);
4018     table_ptr = next;
4019   }
4020 
4021   // Free buffers allocated for blocks of locks.
4022   kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4023   __kmp_lock_blocks = NULL;
4024 
4025   while (block_ptr != NULL) {
4026     kmp_block_of_locks_t *next = block_ptr->next_block;
4027     __kmp_free(block_ptr->locks);
4028     // *block_ptr itself was allocated at the end of the locks vector.
4029     block_ptr = next;
4030   }
4031 
4032   TCW_4(__kmp_init_user_locks, FALSE);
4033 }
4034 
4035 #endif // KMP_USE_DYNAMIC_LOCK
4036