xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_lock.cpp (revision 963f5dc7a30624e95d72fb7f87b8892651164e46)
1 /*
2  * kmp_lock.cpp -- lock-related functions
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <stddef.h>
14 #include <atomic>
15 
16 #include "kmp.h"
17 #include "kmp_i18n.h"
18 #include "kmp_io.h"
19 #include "kmp_itt.h"
20 #include "kmp_lock.h"
21 #include "kmp_wait_release.h"
22 #include "kmp_wrapper_getpid.h"
23 
24 #if KMP_USE_FUTEX
25 #include <sys/syscall.h>
26 #include <unistd.h>
27 // We should really include <futex.h>, but that causes compatibility problems on
28 // different Linux* OS distributions that either require that you include (or
29 // break when you try to include) <pci/types.h>. Since all we need is the two
30 // macros below (which are part of the kernel ABI, so can't change) we just
31 // define the constants here and don't include <futex.h>
32 #ifndef FUTEX_WAIT
33 #define FUTEX_WAIT 0
34 #endif
35 #ifndef FUTEX_WAKE
36 #define FUTEX_WAKE 1
37 #endif
38 #endif
39 
40 /* Implement spin locks for internal library use.             */
41 /* The algorithm implemented is Lamport's bakery lock [1974]. */
42 
43 void __kmp_validate_locks(void) {
44   int i;
45   kmp_uint32 x, y;
46 
47   /* Check to make sure unsigned arithmetic does wraps properly */
48   x = ~((kmp_uint32)0) - 2;
49   y = x - 2;
50 
51   for (i = 0; i < 8; ++i, ++x, ++y) {
52     kmp_uint32 z = (x - y);
53     KMP_ASSERT(z == 2);
54   }
55 
56   KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0);
57 }
58 
59 /* ------------------------------------------------------------------------ */
60 /* test and set locks */
61 
62 // For the non-nested locks, we can only assume that the first 4 bytes were
63 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
64 // compiler only allocates a 4 byte pointer on IA-32 architecture.  On
65 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
66 //
67 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
68 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
69 
70 static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) {
71   return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1;
72 }
73 
74 static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) {
75   return lck->lk.depth_locked != -1;
76 }
77 
78 __forceinline static int
79 __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
80   KMP_MB();
81 
82 #ifdef USE_LOCK_PROFILE
83   kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll);
84   if ((curr != 0) && (curr != gtid + 1))
85     __kmp_printf("LOCK CONTENTION: %p\n", lck);
86 /* else __kmp_printf( "." );*/
87 #endif /* USE_LOCK_PROFILE */
88 
89   kmp_int32 tas_free = KMP_LOCK_FREE(tas);
90   kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
91 
92   if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
93       __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
94     KMP_FSYNC_ACQUIRED(lck);
95     return KMP_LOCK_ACQUIRED_FIRST;
96   }
97 
98   kmp_uint32 spins;
99   KMP_FSYNC_PREPARE(lck);
100   KMP_INIT_YIELD(spins);
101   kmp_backoff_t backoff = __kmp_spin_backoff_params;
102   do {
103     __kmp_spin_backoff(&backoff);
104     KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
105   } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
106            !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
107   KMP_FSYNC_ACQUIRED(lck);
108   return KMP_LOCK_ACQUIRED_FIRST;
109 }
110 
111 int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
112   int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid);
113   return retval;
114 }
115 
116 static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck,
117                                               kmp_int32 gtid) {
118   char const *const func = "omp_set_lock";
119   if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
120       __kmp_is_tas_lock_nestable(lck)) {
121     KMP_FATAL(LockNestableUsedAsSimple, func);
122   }
123   if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) {
124     KMP_FATAL(LockIsAlreadyOwned, func);
125   }
126   return __kmp_acquire_tas_lock(lck, gtid);
127 }
128 
129 int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
130   kmp_int32 tas_free = KMP_LOCK_FREE(tas);
131   kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
132   if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
133       __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
134     KMP_FSYNC_ACQUIRED(lck);
135     return TRUE;
136   }
137   return FALSE;
138 }
139 
140 static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck,
141                                            kmp_int32 gtid) {
142   char const *const func = "omp_test_lock";
143   if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
144       __kmp_is_tas_lock_nestable(lck)) {
145     KMP_FATAL(LockNestableUsedAsSimple, func);
146   }
147   return __kmp_test_tas_lock(lck, gtid);
148 }
149 
150 int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
151   KMP_MB(); /* Flush all pending memory write invalidates.  */
152 
153   KMP_FSYNC_RELEASING(lck);
154   KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));
155   KMP_MB(); /* Flush all pending memory write invalidates.  */
156 
157   KMP_YIELD_OVERSUB();
158   return KMP_LOCK_RELEASED;
159 }
160 
161 static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck,
162                                               kmp_int32 gtid) {
163   char const *const func = "omp_unset_lock";
164   KMP_MB(); /* in case another processor initialized lock */
165   if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
166       __kmp_is_tas_lock_nestable(lck)) {
167     KMP_FATAL(LockNestableUsedAsSimple, func);
168   }
169   if (__kmp_get_tas_lock_owner(lck) == -1) {
170     KMP_FATAL(LockUnsettingFree, func);
171   }
172   if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) &&
173       (__kmp_get_tas_lock_owner(lck) != gtid)) {
174     KMP_FATAL(LockUnsettingSetByAnother, func);
175   }
176   return __kmp_release_tas_lock(lck, gtid);
177 }
178 
179 void __kmp_init_tas_lock(kmp_tas_lock_t *lck) {
180   lck->lk.poll = KMP_LOCK_FREE(tas);
181 }
182 
183 void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; }
184 
185 static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) {
186   char const *const func = "omp_destroy_lock";
187   if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
188       __kmp_is_tas_lock_nestable(lck)) {
189     KMP_FATAL(LockNestableUsedAsSimple, func);
190   }
191   if (__kmp_get_tas_lock_owner(lck) != -1) {
192     KMP_FATAL(LockStillOwned, func);
193   }
194   __kmp_destroy_tas_lock(lck);
195 }
196 
197 // nested test and set locks
198 
199 int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
200   KMP_DEBUG_ASSERT(gtid >= 0);
201 
202   if (__kmp_get_tas_lock_owner(lck) == gtid) {
203     lck->lk.depth_locked += 1;
204     return KMP_LOCK_ACQUIRED_NEXT;
205   } else {
206     __kmp_acquire_tas_lock_timed_template(lck, gtid);
207     lck->lk.depth_locked = 1;
208     return KMP_LOCK_ACQUIRED_FIRST;
209   }
210 }
211 
212 static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
213                                                      kmp_int32 gtid) {
214   char const *const func = "omp_set_nest_lock";
215   if (!__kmp_is_tas_lock_nestable(lck)) {
216     KMP_FATAL(LockSimpleUsedAsNestable, func);
217   }
218   return __kmp_acquire_nested_tas_lock(lck, gtid);
219 }
220 
221 int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
222   int retval;
223 
224   KMP_DEBUG_ASSERT(gtid >= 0);
225 
226   if (__kmp_get_tas_lock_owner(lck) == gtid) {
227     retval = ++lck->lk.depth_locked;
228   } else if (!__kmp_test_tas_lock(lck, gtid)) {
229     retval = 0;
230   } else {
231     KMP_MB();
232     retval = lck->lk.depth_locked = 1;
233   }
234   return retval;
235 }
236 
237 static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
238                                                   kmp_int32 gtid) {
239   char const *const func = "omp_test_nest_lock";
240   if (!__kmp_is_tas_lock_nestable(lck)) {
241     KMP_FATAL(LockSimpleUsedAsNestable, func);
242   }
243   return __kmp_test_nested_tas_lock(lck, gtid);
244 }
245 
246 int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
247   KMP_DEBUG_ASSERT(gtid >= 0);
248 
249   KMP_MB();
250   if (--(lck->lk.depth_locked) == 0) {
251     __kmp_release_tas_lock(lck, gtid);
252     return KMP_LOCK_RELEASED;
253   }
254   return KMP_LOCK_STILL_HELD;
255 }
256 
257 static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
258                                                      kmp_int32 gtid) {
259   char const *const func = "omp_unset_nest_lock";
260   KMP_MB(); /* in case another processor initialized lock */
261   if (!__kmp_is_tas_lock_nestable(lck)) {
262     KMP_FATAL(LockSimpleUsedAsNestable, func);
263   }
264   if (__kmp_get_tas_lock_owner(lck) == -1) {
265     KMP_FATAL(LockUnsettingFree, func);
266   }
267   if (__kmp_get_tas_lock_owner(lck) != gtid) {
268     KMP_FATAL(LockUnsettingSetByAnother, func);
269   }
270   return __kmp_release_nested_tas_lock(lck, gtid);
271 }
272 
273 void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) {
274   __kmp_init_tas_lock(lck);
275   lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
276 }
277 
278 void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) {
279   __kmp_destroy_tas_lock(lck);
280   lck->lk.depth_locked = 0;
281 }
282 
283 static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
284   char const *const func = "omp_destroy_nest_lock";
285   if (!__kmp_is_tas_lock_nestable(lck)) {
286     KMP_FATAL(LockSimpleUsedAsNestable, func);
287   }
288   if (__kmp_get_tas_lock_owner(lck) != -1) {
289     KMP_FATAL(LockStillOwned, func);
290   }
291   __kmp_destroy_nested_tas_lock(lck);
292 }
293 
294 #if KMP_USE_FUTEX
295 
296 /* ------------------------------------------------------------------------ */
297 /* futex locks */
298 
299 // futex locks are really just test and set locks, with a different method
300 // of handling contention.  They take the same amount of space as test and
301 // set locks, and are allocated the same way (i.e. use the area allocated by
302 // the compiler for non-nested locks / allocate nested locks on the heap).
303 
304 static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) {
305   return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1;
306 }
307 
308 static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) {
309   return lck->lk.depth_locked != -1;
310 }
311 
312 __forceinline static int
313 __kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) {
314   kmp_int32 gtid_code = (gtid + 1) << 1;
315 
316   KMP_MB();
317 
318 #ifdef USE_LOCK_PROFILE
319   kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll));
320   if ((curr != 0) && (curr != gtid_code))
321     __kmp_printf("LOCK CONTENTION: %p\n", lck);
322 /* else __kmp_printf( "." );*/
323 #endif /* USE_LOCK_PROFILE */
324 
325   KMP_FSYNC_PREPARE(lck);
326   KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
327                   lck, lck->lk.poll, gtid));
328 
329   kmp_int32 poll_val;
330 
331   while ((poll_val = KMP_COMPARE_AND_STORE_RET32(
332               &(lck->lk.poll), KMP_LOCK_FREE(futex),
333               KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {
334 
335     kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
336     KA_TRACE(
337         1000,
338         ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
339          lck, gtid, poll_val, cond));
340 
341     // NOTE: if you try to use the following condition for this branch
342     //
343     // if ( poll_val & 1 == 0 )
344     //
345     // Then the 12.0 compiler has a bug where the following block will
346     // always be skipped, regardless of the value of the LSB of poll_val.
347     if (!cond) {
348       // Try to set the lsb in the poll to indicate to the owner
349       // thread that they need to wake this thread up.
350       if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val,
351                                        poll_val | KMP_LOCK_BUSY(1, futex))) {
352         KA_TRACE(
353             1000,
354             ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
355              lck, lck->lk.poll, gtid));
356         continue;
357       }
358       poll_val |= KMP_LOCK_BUSY(1, futex);
359 
360       KA_TRACE(1000,
361                ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck,
362                 lck->lk.poll, gtid));
363     }
364 
365     KA_TRACE(
366         1000,
367         ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
368          lck, gtid, poll_val));
369 
370     long rc;
371     if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL,
372                       NULL, 0)) != 0) {
373       KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) "
374                       "failed (rc=%ld errno=%d)\n",
375                       lck, gtid, poll_val, rc, errno));
376       continue;
377     }
378 
379     KA_TRACE(1000,
380              ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
381               lck, gtid, poll_val));
382     // This thread has now done a successful futex wait call and was entered on
383     // the OS futex queue.  We must now perform a futex wake call when releasing
384     // the lock, as we have no idea how many other threads are in the queue.
385     gtid_code |= 1;
386   }
387 
388   KMP_FSYNC_ACQUIRED(lck);
389   KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
390                   lck->lk.poll, gtid));
391   return KMP_LOCK_ACQUIRED_FIRST;
392 }
393 
394 int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
395   int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid);
396   return retval;
397 }
398 
399 static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck,
400                                                 kmp_int32 gtid) {
401   char const *const func = "omp_set_lock";
402   if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
403       __kmp_is_futex_lock_nestable(lck)) {
404     KMP_FATAL(LockNestableUsedAsSimple, func);
405   }
406   if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) {
407     KMP_FATAL(LockIsAlreadyOwned, func);
408   }
409   return __kmp_acquire_futex_lock(lck, gtid);
410 }
411 
412 int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
413   if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex),
414                                   KMP_LOCK_BUSY((gtid + 1) << 1, futex))) {
415     KMP_FSYNC_ACQUIRED(lck);
416     return TRUE;
417   }
418   return FALSE;
419 }
420 
421 static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck,
422                                              kmp_int32 gtid) {
423   char const *const func = "omp_test_lock";
424   if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
425       __kmp_is_futex_lock_nestable(lck)) {
426     KMP_FATAL(LockNestableUsedAsSimple, func);
427   }
428   return __kmp_test_futex_lock(lck, gtid);
429 }
430 
431 int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
432   KMP_MB(); /* Flush all pending memory write invalidates.  */
433 
434   KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
435                   lck, lck->lk.poll, gtid));
436 
437   KMP_FSYNC_RELEASING(lck);
438 
439   kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex));
440 
441   KA_TRACE(1000,
442            ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
443             lck, gtid, poll_val));
444 
445   if (KMP_LOCK_STRIP(poll_val) & 1) {
446     KA_TRACE(1000,
447              ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
448               lck, gtid));
449     syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex),
450             NULL, NULL, 0);
451   }
452 
453   KMP_MB(); /* Flush all pending memory write invalidates.  */
454 
455   KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
456                   lck->lk.poll, gtid));
457 
458   KMP_YIELD_OVERSUB();
459   return KMP_LOCK_RELEASED;
460 }
461 
462 static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck,
463                                                 kmp_int32 gtid) {
464   char const *const func = "omp_unset_lock";
465   KMP_MB(); /* in case another processor initialized lock */
466   if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
467       __kmp_is_futex_lock_nestable(lck)) {
468     KMP_FATAL(LockNestableUsedAsSimple, func);
469   }
470   if (__kmp_get_futex_lock_owner(lck) == -1) {
471     KMP_FATAL(LockUnsettingFree, func);
472   }
473   if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) &&
474       (__kmp_get_futex_lock_owner(lck) != gtid)) {
475     KMP_FATAL(LockUnsettingSetByAnother, func);
476   }
477   return __kmp_release_futex_lock(lck, gtid);
478 }
479 
480 void __kmp_init_futex_lock(kmp_futex_lock_t *lck) {
481   TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex));
482 }
483 
484 void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; }
485 
486 static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) {
487   char const *const func = "omp_destroy_lock";
488   if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
489       __kmp_is_futex_lock_nestable(lck)) {
490     KMP_FATAL(LockNestableUsedAsSimple, func);
491   }
492   if (__kmp_get_futex_lock_owner(lck) != -1) {
493     KMP_FATAL(LockStillOwned, func);
494   }
495   __kmp_destroy_futex_lock(lck);
496 }
497 
498 // nested futex locks
499 
500 int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
501   KMP_DEBUG_ASSERT(gtid >= 0);
502 
503   if (__kmp_get_futex_lock_owner(lck) == gtid) {
504     lck->lk.depth_locked += 1;
505     return KMP_LOCK_ACQUIRED_NEXT;
506   } else {
507     __kmp_acquire_futex_lock_timed_template(lck, gtid);
508     lck->lk.depth_locked = 1;
509     return KMP_LOCK_ACQUIRED_FIRST;
510   }
511 }
512 
513 static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
514                                                        kmp_int32 gtid) {
515   char const *const func = "omp_set_nest_lock";
516   if (!__kmp_is_futex_lock_nestable(lck)) {
517     KMP_FATAL(LockSimpleUsedAsNestable, func);
518   }
519   return __kmp_acquire_nested_futex_lock(lck, gtid);
520 }
521 
522 int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
523   int retval;
524 
525   KMP_DEBUG_ASSERT(gtid >= 0);
526 
527   if (__kmp_get_futex_lock_owner(lck) == gtid) {
528     retval = ++lck->lk.depth_locked;
529   } else if (!__kmp_test_futex_lock(lck, gtid)) {
530     retval = 0;
531   } else {
532     KMP_MB();
533     retval = lck->lk.depth_locked = 1;
534   }
535   return retval;
536 }
537 
538 static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
539                                                     kmp_int32 gtid) {
540   char const *const func = "omp_test_nest_lock";
541   if (!__kmp_is_futex_lock_nestable(lck)) {
542     KMP_FATAL(LockSimpleUsedAsNestable, func);
543   }
544   return __kmp_test_nested_futex_lock(lck, gtid);
545 }
546 
547 int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
548   KMP_DEBUG_ASSERT(gtid >= 0);
549 
550   KMP_MB();
551   if (--(lck->lk.depth_locked) == 0) {
552     __kmp_release_futex_lock(lck, gtid);
553     return KMP_LOCK_RELEASED;
554   }
555   return KMP_LOCK_STILL_HELD;
556 }
557 
558 static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
559                                                        kmp_int32 gtid) {
560   char const *const func = "omp_unset_nest_lock";
561   KMP_MB(); /* in case another processor initialized lock */
562   if (!__kmp_is_futex_lock_nestable(lck)) {
563     KMP_FATAL(LockSimpleUsedAsNestable, func);
564   }
565   if (__kmp_get_futex_lock_owner(lck) == -1) {
566     KMP_FATAL(LockUnsettingFree, func);
567   }
568   if (__kmp_get_futex_lock_owner(lck) != gtid) {
569     KMP_FATAL(LockUnsettingSetByAnother, func);
570   }
571   return __kmp_release_nested_futex_lock(lck, gtid);
572 }
573 
574 void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) {
575   __kmp_init_futex_lock(lck);
576   lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
577 }
578 
579 void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) {
580   __kmp_destroy_futex_lock(lck);
581   lck->lk.depth_locked = 0;
582 }
583 
584 static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
585   char const *const func = "omp_destroy_nest_lock";
586   if (!__kmp_is_futex_lock_nestable(lck)) {
587     KMP_FATAL(LockSimpleUsedAsNestable, func);
588   }
589   if (__kmp_get_futex_lock_owner(lck) != -1) {
590     KMP_FATAL(LockStillOwned, func);
591   }
592   __kmp_destroy_nested_futex_lock(lck);
593 }
594 
595 #endif // KMP_USE_FUTEX
596 
597 /* ------------------------------------------------------------------------ */
598 /* ticket (bakery) locks */
599 
600 static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) {
601   return std::atomic_load_explicit(&lck->lk.owner_id,
602                                    std::memory_order_relaxed) -
603          1;
604 }
605 
606 static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) {
607   return std::atomic_load_explicit(&lck->lk.depth_locked,
608                                    std::memory_order_relaxed) != -1;
609 }
610 
611 static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) {
612   return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving,
613                                    std::memory_order_acquire) == my_ticket;
614 }
615 
616 __forceinline static int
617 __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck,
618                                          kmp_int32 gtid) {
619   kmp_uint32 my_ticket = std::atomic_fetch_add_explicit(
620       &lck->lk.next_ticket, 1U, std::memory_order_relaxed);
621 
622 #ifdef USE_LOCK_PROFILE
623   if (std::atomic_load_explicit(&lck->lk.now_serving,
624                                 std::memory_order_relaxed) != my_ticket)
625     __kmp_printf("LOCK CONTENTION: %p\n", lck);
626 /* else __kmp_printf( "." );*/
627 #endif /* USE_LOCK_PROFILE */
628 
629   if (std::atomic_load_explicit(&lck->lk.now_serving,
630                                 std::memory_order_acquire) == my_ticket) {
631     return KMP_LOCK_ACQUIRED_FIRST;
632   }
633   KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
634   return KMP_LOCK_ACQUIRED_FIRST;
635 }
636 
637 int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
638   int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid);
639   return retval;
640 }
641 
642 static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
643                                                  kmp_int32 gtid) {
644   char const *const func = "omp_set_lock";
645 
646   if (!std::atomic_load_explicit(&lck->lk.initialized,
647                                  std::memory_order_relaxed)) {
648     KMP_FATAL(LockIsUninitialized, func);
649   }
650   if (lck->lk.self != lck) {
651     KMP_FATAL(LockIsUninitialized, func);
652   }
653   if (__kmp_is_ticket_lock_nestable(lck)) {
654     KMP_FATAL(LockNestableUsedAsSimple, func);
655   }
656   if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) {
657     KMP_FATAL(LockIsAlreadyOwned, func);
658   }
659 
660   __kmp_acquire_ticket_lock(lck, gtid);
661 
662   std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
663                              std::memory_order_relaxed);
664   return KMP_LOCK_ACQUIRED_FIRST;
665 }
666 
667 int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
668   kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket,
669                                                    std::memory_order_relaxed);
670 
671   if (std::atomic_load_explicit(&lck->lk.now_serving,
672                                 std::memory_order_relaxed) == my_ticket) {
673     kmp_uint32 next_ticket = my_ticket + 1;
674     if (std::atomic_compare_exchange_strong_explicit(
675             &lck->lk.next_ticket, &my_ticket, next_ticket,
676             std::memory_order_acquire, std::memory_order_acquire)) {
677       return TRUE;
678     }
679   }
680   return FALSE;
681 }
682 
683 static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
684                                               kmp_int32 gtid) {
685   char const *const func = "omp_test_lock";
686 
687   if (!std::atomic_load_explicit(&lck->lk.initialized,
688                                  std::memory_order_relaxed)) {
689     KMP_FATAL(LockIsUninitialized, func);
690   }
691   if (lck->lk.self != lck) {
692     KMP_FATAL(LockIsUninitialized, func);
693   }
694   if (__kmp_is_ticket_lock_nestable(lck)) {
695     KMP_FATAL(LockNestableUsedAsSimple, func);
696   }
697 
698   int retval = __kmp_test_ticket_lock(lck, gtid);
699 
700   if (retval) {
701     std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
702                                std::memory_order_relaxed);
703   }
704   return retval;
705 }
706 
707 int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
708   kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket,
709                                                   std::memory_order_relaxed) -
710                         std::atomic_load_explicit(&lck->lk.now_serving,
711                                                   std::memory_order_relaxed);
712 
713   std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U,
714                                  std::memory_order_release);
715 
716   KMP_YIELD(distance >
717             (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
718   return KMP_LOCK_RELEASED;
719 }
720 
721 static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
722                                                  kmp_int32 gtid) {
723   char const *const func = "omp_unset_lock";
724 
725   if (!std::atomic_load_explicit(&lck->lk.initialized,
726                                  std::memory_order_relaxed)) {
727     KMP_FATAL(LockIsUninitialized, func);
728   }
729   if (lck->lk.self != lck) {
730     KMP_FATAL(LockIsUninitialized, func);
731   }
732   if (__kmp_is_ticket_lock_nestable(lck)) {
733     KMP_FATAL(LockNestableUsedAsSimple, func);
734   }
735   if (__kmp_get_ticket_lock_owner(lck) == -1) {
736     KMP_FATAL(LockUnsettingFree, func);
737   }
738   if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) &&
739       (__kmp_get_ticket_lock_owner(lck) != gtid)) {
740     KMP_FATAL(LockUnsettingSetByAnother, func);
741   }
742   std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
743   return __kmp_release_ticket_lock(lck, gtid);
744 }
745 
746 void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) {
747   lck->lk.location = NULL;
748   lck->lk.self = lck;
749   std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
750                              std::memory_order_relaxed);
751   std::atomic_store_explicit(&lck->lk.now_serving, 0U,
752                              std::memory_order_relaxed);
753   std::atomic_store_explicit(
754       &lck->lk.owner_id, 0,
755       std::memory_order_relaxed); // no thread owns the lock.
756   std::atomic_store_explicit(
757       &lck->lk.depth_locked, -1,
758       std::memory_order_relaxed); // -1 => not a nested lock.
759   std::atomic_store_explicit(&lck->lk.initialized, true,
760                              std::memory_order_release);
761 }
762 
763 void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) {
764   std::atomic_store_explicit(&lck->lk.initialized, false,
765                              std::memory_order_release);
766   lck->lk.self = NULL;
767   lck->lk.location = NULL;
768   std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
769                              std::memory_order_relaxed);
770   std::atomic_store_explicit(&lck->lk.now_serving, 0U,
771                              std::memory_order_relaxed);
772   std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
773   std::atomic_store_explicit(&lck->lk.depth_locked, -1,
774                              std::memory_order_relaxed);
775 }
776 
777 static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
778   char const *const func = "omp_destroy_lock";
779 
780   if (!std::atomic_load_explicit(&lck->lk.initialized,
781                                  std::memory_order_relaxed)) {
782     KMP_FATAL(LockIsUninitialized, func);
783   }
784   if (lck->lk.self != lck) {
785     KMP_FATAL(LockIsUninitialized, func);
786   }
787   if (__kmp_is_ticket_lock_nestable(lck)) {
788     KMP_FATAL(LockNestableUsedAsSimple, func);
789   }
790   if (__kmp_get_ticket_lock_owner(lck) != -1) {
791     KMP_FATAL(LockStillOwned, func);
792   }
793   __kmp_destroy_ticket_lock(lck);
794 }
795 
796 // nested ticket locks
797 
798 int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
799   KMP_DEBUG_ASSERT(gtid >= 0);
800 
801   if (__kmp_get_ticket_lock_owner(lck) == gtid) {
802     std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
803                                    std::memory_order_relaxed);
804     return KMP_LOCK_ACQUIRED_NEXT;
805   } else {
806     __kmp_acquire_ticket_lock_timed_template(lck, gtid);
807     std::atomic_store_explicit(&lck->lk.depth_locked, 1,
808                                std::memory_order_relaxed);
809     std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
810                                std::memory_order_relaxed);
811     return KMP_LOCK_ACQUIRED_FIRST;
812   }
813 }
814 
815 static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
816                                                         kmp_int32 gtid) {
817   char const *const func = "omp_set_nest_lock";
818 
819   if (!std::atomic_load_explicit(&lck->lk.initialized,
820                                  std::memory_order_relaxed)) {
821     KMP_FATAL(LockIsUninitialized, func);
822   }
823   if (lck->lk.self != lck) {
824     KMP_FATAL(LockIsUninitialized, func);
825   }
826   if (!__kmp_is_ticket_lock_nestable(lck)) {
827     KMP_FATAL(LockSimpleUsedAsNestable, func);
828   }
829   return __kmp_acquire_nested_ticket_lock(lck, gtid);
830 }
831 
832 int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
833   int retval;
834 
835   KMP_DEBUG_ASSERT(gtid >= 0);
836 
837   if (__kmp_get_ticket_lock_owner(lck) == gtid) {
838     retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
839                                             std::memory_order_relaxed) +
840              1;
841   } else if (!__kmp_test_ticket_lock(lck, gtid)) {
842     retval = 0;
843   } else {
844     std::atomic_store_explicit(&lck->lk.depth_locked, 1,
845                                std::memory_order_relaxed);
846     std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
847                                std::memory_order_relaxed);
848     retval = 1;
849   }
850   return retval;
851 }
852 
853 static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
854                                                      kmp_int32 gtid) {
855   char const *const func = "omp_test_nest_lock";
856 
857   if (!std::atomic_load_explicit(&lck->lk.initialized,
858                                  std::memory_order_relaxed)) {
859     KMP_FATAL(LockIsUninitialized, func);
860   }
861   if (lck->lk.self != lck) {
862     KMP_FATAL(LockIsUninitialized, func);
863   }
864   if (!__kmp_is_ticket_lock_nestable(lck)) {
865     KMP_FATAL(LockSimpleUsedAsNestable, func);
866   }
867   return __kmp_test_nested_ticket_lock(lck, gtid);
868 }
869 
870 int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
871   KMP_DEBUG_ASSERT(gtid >= 0);
872 
873   if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1,
874                                       std::memory_order_relaxed) -
875        1) == 0) {
876     std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
877     __kmp_release_ticket_lock(lck, gtid);
878     return KMP_LOCK_RELEASED;
879   }
880   return KMP_LOCK_STILL_HELD;
881 }
882 
883 static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
884                                                         kmp_int32 gtid) {
885   char const *const func = "omp_unset_nest_lock";
886 
887   if (!std::atomic_load_explicit(&lck->lk.initialized,
888                                  std::memory_order_relaxed)) {
889     KMP_FATAL(LockIsUninitialized, func);
890   }
891   if (lck->lk.self != lck) {
892     KMP_FATAL(LockIsUninitialized, func);
893   }
894   if (!__kmp_is_ticket_lock_nestable(lck)) {
895     KMP_FATAL(LockSimpleUsedAsNestable, func);
896   }
897   if (__kmp_get_ticket_lock_owner(lck) == -1) {
898     KMP_FATAL(LockUnsettingFree, func);
899   }
900   if (__kmp_get_ticket_lock_owner(lck) != gtid) {
901     KMP_FATAL(LockUnsettingSetByAnother, func);
902   }
903   return __kmp_release_nested_ticket_lock(lck, gtid);
904 }
905 
906 void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) {
907   __kmp_init_ticket_lock(lck);
908   std::atomic_store_explicit(&lck->lk.depth_locked, 0,
909                              std::memory_order_relaxed);
910   // >= 0 for nestable locks, -1 for simple locks
911 }
912 
913 void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) {
914   __kmp_destroy_ticket_lock(lck);
915   std::atomic_store_explicit(&lck->lk.depth_locked, 0,
916                              std::memory_order_relaxed);
917 }
918 
919 static void
920 __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
921   char const *const func = "omp_destroy_nest_lock";
922 
923   if (!std::atomic_load_explicit(&lck->lk.initialized,
924                                  std::memory_order_relaxed)) {
925     KMP_FATAL(LockIsUninitialized, func);
926   }
927   if (lck->lk.self != lck) {
928     KMP_FATAL(LockIsUninitialized, func);
929   }
930   if (!__kmp_is_ticket_lock_nestable(lck)) {
931     KMP_FATAL(LockSimpleUsedAsNestable, func);
932   }
933   if (__kmp_get_ticket_lock_owner(lck) != -1) {
934     KMP_FATAL(LockStillOwned, func);
935   }
936   __kmp_destroy_nested_ticket_lock(lck);
937 }
938 
939 // access functions to fields which don't exist for all lock kinds.
940 
941 static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) {
942   return lck->lk.location;
943 }
944 
945 static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck,
946                                            const ident_t *loc) {
947   lck->lk.location = loc;
948 }
949 
950 static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) {
951   return lck->lk.flags;
952 }
953 
954 static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck,
955                                         kmp_lock_flags_t flags) {
956   lck->lk.flags = flags;
957 }
958 
959 /* ------------------------------------------------------------------------ */
960 /* queuing locks */
961 
962 /* First the states
963    (head,tail) =              0, 0  means lock is unheld, nobody on queue
964                  UINT_MAX or -1, 0  means lock is held, nobody on queue
965                               h, h  means lock held or about to transition,
966                                     1 element on queue
967                               h, t  h <> t, means lock is held or about to
968                                     transition, >1 elements on queue
969 
970    Now the transitions
971       Acquire(0,0)  = -1 ,0
972       Release(0,0)  = Error
973       Acquire(-1,0) =  h ,h    h > 0
974       Release(-1,0) =  0 ,0
975       Acquire(h,h)  =  h ,t    h > 0, t > 0, h <> t
976       Release(h,h)  = -1 ,0    h > 0
977       Acquire(h,t)  =  h ,t'   h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
978       Release(h,t)  =  h',t    h > 0, t > 0, h <> t, h <> h', h' maybe = t
979 
980    And pictorially
981 
982            +-----+
983            | 0, 0|------- release -------> Error
984            +-----+
985              |  ^
986       acquire|  |release
987              |  |
988              |  |
989              v  |
990            +-----+
991            |-1, 0|
992            +-----+
993              |  ^
994       acquire|  |release
995              |  |
996              |  |
997              v  |
998            +-----+
999            | h, h|
1000            +-----+
1001              |  ^
1002       acquire|  |release
1003              |  |
1004              |  |
1005              v  |
1006            +-----+
1007            | h, t|----- acquire, release loopback ---+
1008            +-----+                                   |
1009                 ^                                    |
1010                 |                                    |
1011                 +------------------------------------+
1012  */
1013 
1014 #ifdef DEBUG_QUEUING_LOCKS
1015 
1016 /* Stuff for circular trace buffer */
1017 #define TRACE_BUF_ELE 1024
1018 static char traces[TRACE_BUF_ELE][128] = {0};
1019 static int tc = 0;
1020 #define TRACE_LOCK(X, Y)                                                       \
1021   KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y);
1022 #define TRACE_LOCK_T(X, Y, Z)                                                  \
1023   KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z);
1024 #define TRACE_LOCK_HT(X, Y, Z, Q)                                              \
1025   KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y,   \
1026                Z, Q);
1027 
1028 static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid,
1029                                     kmp_queuing_lock_t *lck, kmp_int32 head_id,
1030                                     kmp_int32 tail_id) {
1031   kmp_int32 t, i;
1032 
1033   __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n");
1034 
1035   i = tc % TRACE_BUF_ELE;
1036   __kmp_printf_no_lock("%s\n", traces[i]);
1037   i = (i + 1) % TRACE_BUF_ELE;
1038   while (i != (tc % TRACE_BUF_ELE)) {
1039     __kmp_printf_no_lock("%s", traces[i]);
1040     i = (i + 1) % TRACE_BUF_ELE;
1041   }
1042   __kmp_printf_no_lock("\n");
1043 
1044   __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, "
1045                        "next_wait:%d, head_id:%d, tail_id:%d\n",
1046                        gtid + 1, this_thr->th.th_spin_here,
1047                        this_thr->th.th_next_waiting, head_id, tail_id);
1048 
1049   __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id);
1050 
1051   if (lck->lk.head_id >= 1) {
1052     t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting;
1053     while (t > 0) {
1054       __kmp_printf_no_lock("-> %d ", t);
1055       t = __kmp_threads[t - 1]->th.th_next_waiting;
1056     }
1057   }
1058   __kmp_printf_no_lock(";  tail: %d ", lck->lk.tail_id);
1059   __kmp_printf_no_lock("\n\n");
1060 }
1061 
1062 #endif /* DEBUG_QUEUING_LOCKS */
1063 
1064 static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) {
1065   return TCR_4(lck->lk.owner_id) - 1;
1066 }
1067 
1068 static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) {
1069   return lck->lk.depth_locked != -1;
1070 }
1071 
1072 /* Acquire a lock using a the queuing lock implementation */
1073 template <bool takeTime>
1074 /* [TLW] The unused template above is left behind because of what BEB believes
1075    is a potential compiler problem with __forceinline. */
1076 __forceinline static int
1077 __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
1078                                           kmp_int32 gtid) {
1079   kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1080   volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1081   volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1082   volatile kmp_uint32 *spin_here_p;
1083 
1084 #if OMPT_SUPPORT
1085   ompt_state_t prev_state = ompt_state_undefined;
1086 #endif
1087 
1088   KA_TRACE(1000,
1089            ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1090 
1091   KMP_FSYNC_PREPARE(lck);
1092   KMP_DEBUG_ASSERT(this_thr != NULL);
1093   spin_here_p = &this_thr->th.th_spin_here;
1094 
1095 #ifdef DEBUG_QUEUING_LOCKS
1096   TRACE_LOCK(gtid + 1, "acq ent");
1097   if (*spin_here_p)
1098     __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1099   if (this_thr->th.th_next_waiting != 0)
1100     __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1101 #endif
1102   KMP_DEBUG_ASSERT(!*spin_here_p);
1103   KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1104 
1105   /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to
1106      head_id_p that may follow, not just in execution order, but also in
1107      visibility order. This way, when a releasing thread observes the changes to
1108      the queue by this thread, it can rightly assume that spin_here_p has
1109      already been set to TRUE, so that when it sets spin_here_p to FALSE, it is
1110      not premature.  If the releasing thread sets spin_here_p to FALSE before
1111      this thread sets it to TRUE, this thread will hang. */
1112   *spin_here_p = TRUE; /* before enqueuing to prevent race */
1113 
1114   while (1) {
1115     kmp_int32 enqueued;
1116     kmp_int32 head;
1117     kmp_int32 tail;
1118 
1119     head = *head_id_p;
1120 
1121     switch (head) {
1122 
1123     case -1: {
1124 #ifdef DEBUG_QUEUING_LOCKS
1125       tail = *tail_id_p;
1126       TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1127 #endif
1128       tail = 0; /* to make sure next link asynchronously read is not set
1129                 accidentally; this assignment prevents us from entering the
1130                 if ( t > 0 ) condition in the enqueued case below, which is not
1131                 necessary for this state transition */
1132 
1133       /* try (-1,0)->(tid,tid) */
1134       enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p,
1135                                              KMP_PACK_64(-1, 0),
1136                                              KMP_PACK_64(gtid + 1, gtid + 1));
1137 #ifdef DEBUG_QUEUING_LOCKS
1138       if (enqueued)
1139         TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)");
1140 #endif
1141     } break;
1142 
1143     default: {
1144       tail = *tail_id_p;
1145       KMP_DEBUG_ASSERT(tail != gtid + 1);
1146 
1147 #ifdef DEBUG_QUEUING_LOCKS
1148       TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1149 #endif
1150 
1151       if (tail == 0) {
1152         enqueued = FALSE;
1153       } else {
1154         /* try (h,t) or (h,h)->(h,tid) */
1155         enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1);
1156 
1157 #ifdef DEBUG_QUEUING_LOCKS
1158         if (enqueued)
1159           TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)");
1160 #endif
1161       }
1162     } break;
1163 
1164     case 0: /* empty queue */
1165     {
1166       kmp_int32 grabbed_lock;
1167 
1168 #ifdef DEBUG_QUEUING_LOCKS
1169       tail = *tail_id_p;
1170       TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1171 #endif
1172       /* try (0,0)->(-1,0) */
1173 
1174       /* only legal transition out of head = 0 is head = -1 with no change to
1175        * tail */
1176       grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1);
1177 
1178       if (grabbed_lock) {
1179 
1180         *spin_here_p = FALSE;
1181 
1182         KA_TRACE(
1183             1000,
1184             ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1185              lck, gtid));
1186 #ifdef DEBUG_QUEUING_LOCKS
1187         TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0);
1188 #endif
1189 
1190 #if OMPT_SUPPORT
1191         if (ompt_enabled.enabled && prev_state != ompt_state_undefined) {
1192           /* change the state before clearing wait_id */
1193           this_thr->th.ompt_thread_info.state = prev_state;
1194           this_thr->th.ompt_thread_info.wait_id = 0;
1195         }
1196 #endif
1197 
1198         KMP_FSYNC_ACQUIRED(lck);
1199         return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1200       }
1201       enqueued = FALSE;
1202     } break;
1203     }
1204 
1205 #if OMPT_SUPPORT
1206     if (ompt_enabled.enabled && prev_state == ompt_state_undefined) {
1207       /* this thread will spin; set wait_id before entering wait state */
1208       prev_state = this_thr->th.ompt_thread_info.state;
1209       this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
1210       this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1211     }
1212 #endif
1213 
1214     if (enqueued) {
1215       if (tail > 0) {
1216         kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1);
1217         KMP_ASSERT(tail_thr != NULL);
1218         tail_thr->th.th_next_waiting = gtid + 1;
1219         /* corresponding wait for this write in release code */
1220       }
1221       KA_TRACE(1000,
1222                ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
1223                 lck, gtid));
1224 
1225       KMP_MB();
1226       // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
1227       KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);
1228       // Synchronize writes to both runtime thread structures
1229       // and writes in user code.
1230       KMP_MB();
1231 
1232 #ifdef DEBUG_QUEUING_LOCKS
1233       TRACE_LOCK(gtid + 1, "acq spin");
1234 
1235       if (this_thr->th.th_next_waiting != 0)
1236         __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1237 #endif
1238       KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1239       KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after "
1240                       "waiting on queue\n",
1241                       lck, gtid));
1242 
1243 #ifdef DEBUG_QUEUING_LOCKS
1244       TRACE_LOCK(gtid + 1, "acq exit 2");
1245 #endif
1246 
1247 #if OMPT_SUPPORT
1248       /* change the state before clearing wait_id */
1249       this_thr->th.ompt_thread_info.state = prev_state;
1250       this_thr->th.ompt_thread_info.wait_id = 0;
1251 #endif
1252 
1253       /* got lock, we were dequeued by the thread that released lock */
1254       return KMP_LOCK_ACQUIRED_FIRST;
1255     }
1256 
1257     /* Yield if number of threads > number of logical processors */
1258     /* ToDo: Not sure why this should only be in oversubscription case,
1259        maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1260     KMP_YIELD_OVERSUB();
1261 
1262 #ifdef DEBUG_QUEUING_LOCKS
1263     TRACE_LOCK(gtid + 1, "acq retry");
1264 #endif
1265   }
1266   KMP_ASSERT2(0, "should not get here");
1267   return KMP_LOCK_ACQUIRED_FIRST;
1268 }
1269 
1270 int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1271   KMP_DEBUG_ASSERT(gtid >= 0);
1272 
1273   int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1274   return retval;
1275 }
1276 
1277 static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1278                                                   kmp_int32 gtid) {
1279   char const *const func = "omp_set_lock";
1280   if (lck->lk.initialized != lck) {
1281     KMP_FATAL(LockIsUninitialized, func);
1282   }
1283   if (__kmp_is_queuing_lock_nestable(lck)) {
1284     KMP_FATAL(LockNestableUsedAsSimple, func);
1285   }
1286   if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1287     KMP_FATAL(LockIsAlreadyOwned, func);
1288   }
1289 
1290   __kmp_acquire_queuing_lock(lck, gtid);
1291 
1292   lck->lk.owner_id = gtid + 1;
1293   return KMP_LOCK_ACQUIRED_FIRST;
1294 }
1295 
1296 int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1297   volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1298   kmp_int32 head;
1299 #ifdef KMP_DEBUG
1300   kmp_info_t *this_thr;
1301 #endif
1302 
1303   KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid));
1304   KMP_DEBUG_ASSERT(gtid >= 0);
1305 #ifdef KMP_DEBUG
1306   this_thr = __kmp_thread_from_gtid(gtid);
1307   KMP_DEBUG_ASSERT(this_thr != NULL);
1308   KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1309 #endif
1310 
1311   head = *head_id_p;
1312 
1313   if (head == 0) { /* nobody on queue, nobody holding */
1314     /* try (0,0)->(-1,0) */
1315     if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) {
1316       KA_TRACE(1000,
1317                ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid));
1318       KMP_FSYNC_ACQUIRED(lck);
1319       return TRUE;
1320     }
1321   }
1322 
1323   KA_TRACE(1000,
1324            ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid));
1325   return FALSE;
1326 }
1327 
1328 static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1329                                                kmp_int32 gtid) {
1330   char const *const func = "omp_test_lock";
1331   if (lck->lk.initialized != lck) {
1332     KMP_FATAL(LockIsUninitialized, func);
1333   }
1334   if (__kmp_is_queuing_lock_nestable(lck)) {
1335     KMP_FATAL(LockNestableUsedAsSimple, func);
1336   }
1337 
1338   int retval = __kmp_test_queuing_lock(lck, gtid);
1339 
1340   if (retval) {
1341     lck->lk.owner_id = gtid + 1;
1342   }
1343   return retval;
1344 }
1345 
1346 int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1347   kmp_info_t *this_thr;
1348   volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1349   volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1350 
1351   KA_TRACE(1000,
1352            ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1353   KMP_DEBUG_ASSERT(gtid >= 0);
1354   this_thr = __kmp_thread_from_gtid(gtid);
1355   KMP_DEBUG_ASSERT(this_thr != NULL);
1356 #ifdef DEBUG_QUEUING_LOCKS
1357   TRACE_LOCK(gtid + 1, "rel ent");
1358 
1359   if (this_thr->th.th_spin_here)
1360     __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1361   if (this_thr->th.th_next_waiting != 0)
1362     __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1363 #endif
1364   KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1365   KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1366 
1367   KMP_FSYNC_RELEASING(lck);
1368 
1369   while (1) {
1370     kmp_int32 dequeued;
1371     kmp_int32 head;
1372     kmp_int32 tail;
1373 
1374     head = *head_id_p;
1375 
1376 #ifdef DEBUG_QUEUING_LOCKS
1377     tail = *tail_id_p;
1378     TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail);
1379     if (head == 0)
1380       __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1381 #endif
1382     KMP_DEBUG_ASSERT(head !=
1383                      0); /* holding the lock, head must be -1 or queue head */
1384 
1385     if (head == -1) { /* nobody on queue */
1386       /* try (-1,0)->(0,0) */
1387       if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) {
1388         KA_TRACE(
1389             1000,
1390             ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1391              lck, gtid));
1392 #ifdef DEBUG_QUEUING_LOCKS
1393         TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0);
1394 #endif
1395 
1396 #if OMPT_SUPPORT
1397 /* nothing to do - no other thread is trying to shift blame */
1398 #endif
1399         return KMP_LOCK_RELEASED;
1400       }
1401       dequeued = FALSE;
1402     } else {
1403       KMP_MB();
1404       tail = *tail_id_p;
1405       if (head == tail) { /* only one thread on the queue */
1406 #ifdef DEBUG_QUEUING_LOCKS
1407         if (head <= 0)
1408           __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1409 #endif
1410         KMP_DEBUG_ASSERT(head > 0);
1411 
1412         /* try (h,h)->(-1,0) */
1413         dequeued = KMP_COMPARE_AND_STORE_REL64(
1414             RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head),
1415             KMP_PACK_64(-1, 0));
1416 #ifdef DEBUG_QUEUING_LOCKS
1417         TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)");
1418 #endif
1419 
1420       } else {
1421         volatile kmp_int32 *waiting_id_p;
1422         kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1423         KMP_DEBUG_ASSERT(head_thr != NULL);
1424         waiting_id_p = &head_thr->th.th_next_waiting;
1425 
1426 /* Does this require synchronous reads? */
1427 #ifdef DEBUG_QUEUING_LOCKS
1428         if (head <= 0 || tail <= 0)
1429           __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1430 #endif
1431         KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1432 
1433         /* try (h,t)->(h',t) or (t,t) */
1434         KMP_MB();
1435         /* make sure enqueuing thread has time to update next waiting thread
1436          * field */
1437         *head_id_p =
1438             KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL);
1439 #ifdef DEBUG_QUEUING_LOCKS
1440         TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
1441 #endif
1442         dequeued = TRUE;
1443       }
1444     }
1445 
1446     if (dequeued) {
1447       kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1448       KMP_DEBUG_ASSERT(head_thr != NULL);
1449 
1450 /* Does this require synchronous reads? */
1451 #ifdef DEBUG_QUEUING_LOCKS
1452       if (head <= 0 || tail <= 0)
1453         __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1454 #endif
1455       KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1456 
1457       /* For clean code only. Thread not released until next statement prevents
1458          race with acquire code. */
1459       head_thr->th.th_next_waiting = 0;
1460 #ifdef DEBUG_QUEUING_LOCKS
1461       TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head);
1462 #endif
1463 
1464       KMP_MB();
1465       /* reset spin value */
1466       head_thr->th.th_spin_here = FALSE;
1467 
1468       KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after "
1469                       "dequeuing\n",
1470                       lck, gtid));
1471 #ifdef DEBUG_QUEUING_LOCKS
1472       TRACE_LOCK(gtid + 1, "rel exit 2");
1473 #endif
1474       return KMP_LOCK_RELEASED;
1475     }
1476     /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring
1477        threads */
1478 
1479 #ifdef DEBUG_QUEUING_LOCKS
1480     TRACE_LOCK(gtid + 1, "rel retry");
1481 #endif
1482 
1483   } /* while */
1484   KMP_ASSERT2(0, "should not get here");
1485   return KMP_LOCK_RELEASED;
1486 }
1487 
1488 static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1489                                                   kmp_int32 gtid) {
1490   char const *const func = "omp_unset_lock";
1491   KMP_MB(); /* in case another processor initialized lock */
1492   if (lck->lk.initialized != lck) {
1493     KMP_FATAL(LockIsUninitialized, func);
1494   }
1495   if (__kmp_is_queuing_lock_nestable(lck)) {
1496     KMP_FATAL(LockNestableUsedAsSimple, func);
1497   }
1498   if (__kmp_get_queuing_lock_owner(lck) == -1) {
1499     KMP_FATAL(LockUnsettingFree, func);
1500   }
1501   if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1502     KMP_FATAL(LockUnsettingSetByAnother, func);
1503   }
1504   lck->lk.owner_id = 0;
1505   return __kmp_release_queuing_lock(lck, gtid);
1506 }
1507 
1508 void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) {
1509   lck->lk.location = NULL;
1510   lck->lk.head_id = 0;
1511   lck->lk.tail_id = 0;
1512   lck->lk.next_ticket = 0;
1513   lck->lk.now_serving = 0;
1514   lck->lk.owner_id = 0; // no thread owns the lock.
1515   lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1516   lck->lk.initialized = lck;
1517 
1518   KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1519 }
1520 
1521 void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) {
1522   lck->lk.initialized = NULL;
1523   lck->lk.location = NULL;
1524   lck->lk.head_id = 0;
1525   lck->lk.tail_id = 0;
1526   lck->lk.next_ticket = 0;
1527   lck->lk.now_serving = 0;
1528   lck->lk.owner_id = 0;
1529   lck->lk.depth_locked = -1;
1530 }
1531 
1532 static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1533   char const *const func = "omp_destroy_lock";
1534   if (lck->lk.initialized != lck) {
1535     KMP_FATAL(LockIsUninitialized, func);
1536   }
1537   if (__kmp_is_queuing_lock_nestable(lck)) {
1538     KMP_FATAL(LockNestableUsedAsSimple, func);
1539   }
1540   if (__kmp_get_queuing_lock_owner(lck) != -1) {
1541     KMP_FATAL(LockStillOwned, func);
1542   }
1543   __kmp_destroy_queuing_lock(lck);
1544 }
1545 
1546 // nested queuing locks
1547 
1548 int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1549   KMP_DEBUG_ASSERT(gtid >= 0);
1550 
1551   if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1552     lck->lk.depth_locked += 1;
1553     return KMP_LOCK_ACQUIRED_NEXT;
1554   } else {
1555     __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1556     KMP_MB();
1557     lck->lk.depth_locked = 1;
1558     KMP_MB();
1559     lck->lk.owner_id = gtid + 1;
1560     return KMP_LOCK_ACQUIRED_FIRST;
1561   }
1562 }
1563 
1564 static int
1565 __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1566                                               kmp_int32 gtid) {
1567   char const *const func = "omp_set_nest_lock";
1568   if (lck->lk.initialized != lck) {
1569     KMP_FATAL(LockIsUninitialized, func);
1570   }
1571   if (!__kmp_is_queuing_lock_nestable(lck)) {
1572     KMP_FATAL(LockSimpleUsedAsNestable, func);
1573   }
1574   return __kmp_acquire_nested_queuing_lock(lck, gtid);
1575 }
1576 
1577 int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1578   int retval;
1579 
1580   KMP_DEBUG_ASSERT(gtid >= 0);
1581 
1582   if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1583     retval = ++lck->lk.depth_locked;
1584   } else if (!__kmp_test_queuing_lock(lck, gtid)) {
1585     retval = 0;
1586   } else {
1587     KMP_MB();
1588     retval = lck->lk.depth_locked = 1;
1589     KMP_MB();
1590     lck->lk.owner_id = gtid + 1;
1591   }
1592   return retval;
1593 }
1594 
1595 static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1596                                                       kmp_int32 gtid) {
1597   char const *const func = "omp_test_nest_lock";
1598   if (lck->lk.initialized != lck) {
1599     KMP_FATAL(LockIsUninitialized, func);
1600   }
1601   if (!__kmp_is_queuing_lock_nestable(lck)) {
1602     KMP_FATAL(LockSimpleUsedAsNestable, func);
1603   }
1604   return __kmp_test_nested_queuing_lock(lck, gtid);
1605 }
1606 
1607 int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1608   KMP_DEBUG_ASSERT(gtid >= 0);
1609 
1610   KMP_MB();
1611   if (--(lck->lk.depth_locked) == 0) {
1612     KMP_MB();
1613     lck->lk.owner_id = 0;
1614     __kmp_release_queuing_lock(lck, gtid);
1615     return KMP_LOCK_RELEASED;
1616   }
1617   return KMP_LOCK_STILL_HELD;
1618 }
1619 
1620 static int
1621 __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1622                                               kmp_int32 gtid) {
1623   char const *const func = "omp_unset_nest_lock";
1624   KMP_MB(); /* in case another processor initialized lock */
1625   if (lck->lk.initialized != lck) {
1626     KMP_FATAL(LockIsUninitialized, func);
1627   }
1628   if (!__kmp_is_queuing_lock_nestable(lck)) {
1629     KMP_FATAL(LockSimpleUsedAsNestable, func);
1630   }
1631   if (__kmp_get_queuing_lock_owner(lck) == -1) {
1632     KMP_FATAL(LockUnsettingFree, func);
1633   }
1634   if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1635     KMP_FATAL(LockUnsettingSetByAnother, func);
1636   }
1637   return __kmp_release_nested_queuing_lock(lck, gtid);
1638 }
1639 
1640 void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1641   __kmp_init_queuing_lock(lck);
1642   lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1643 }
1644 
1645 void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1646   __kmp_destroy_queuing_lock(lck);
1647   lck->lk.depth_locked = 0;
1648 }
1649 
1650 static void
1651 __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1652   char const *const func = "omp_destroy_nest_lock";
1653   if (lck->lk.initialized != lck) {
1654     KMP_FATAL(LockIsUninitialized, func);
1655   }
1656   if (!__kmp_is_queuing_lock_nestable(lck)) {
1657     KMP_FATAL(LockSimpleUsedAsNestable, func);
1658   }
1659   if (__kmp_get_queuing_lock_owner(lck) != -1) {
1660     KMP_FATAL(LockStillOwned, func);
1661   }
1662   __kmp_destroy_nested_queuing_lock(lck);
1663 }
1664 
1665 // access functions to fields which don't exist for all lock kinds.
1666 
1667 static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) {
1668   return lck->lk.location;
1669 }
1670 
1671 static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck,
1672                                             const ident_t *loc) {
1673   lck->lk.location = loc;
1674 }
1675 
1676 static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) {
1677   return lck->lk.flags;
1678 }
1679 
1680 static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck,
1681                                          kmp_lock_flags_t flags) {
1682   lck->lk.flags = flags;
1683 }
1684 
1685 #if KMP_USE_ADAPTIVE_LOCKS
1686 
1687 /* RTM Adaptive locks */
1688 
1689 #if KMP_HAVE_RTM_INTRINSICS
1690 #include <immintrin.h>
1691 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1692 
1693 #else
1694 
1695 // Values from the status register after failed speculation.
1696 #define _XBEGIN_STARTED (~0u)
1697 #define _XABORT_EXPLICIT (1 << 0)
1698 #define _XABORT_RETRY (1 << 1)
1699 #define _XABORT_CONFLICT (1 << 2)
1700 #define _XABORT_CAPACITY (1 << 3)
1701 #define _XABORT_DEBUG (1 << 4)
1702 #define _XABORT_NESTED (1 << 5)
1703 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1704 
1705 // Aborts for which it's worth trying again immediately
1706 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1707 
1708 #define STRINGIZE_INTERNAL(arg) #arg
1709 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1710 
1711 // Access to RTM instructions
1712 /*A version of XBegin which returns -1 on speculation, and the value of EAX on
1713   an abort. This is the same definition as the compiler intrinsic that will be
1714   supported at some point. */
1715 static __inline int _xbegin() {
1716   int res = -1;
1717 
1718 #if KMP_OS_WINDOWS
1719 #if KMP_ARCH_X86_64
1720   _asm {
1721         _emit 0xC7
1722         _emit 0xF8
1723         _emit 2
1724         _emit 0
1725         _emit 0
1726         _emit 0
1727         jmp   L2
1728         mov   res, eax
1729     L2:
1730   }
1731 #else /* IA32 */
1732   _asm {
1733         _emit 0xC7
1734         _emit 0xF8
1735         _emit 2
1736         _emit 0
1737         _emit 0
1738         _emit 0
1739         jmp   L2
1740         mov   res, eax
1741     L2:
1742   }
1743 #endif // KMP_ARCH_X86_64
1744 #else
1745   /* Note that %eax must be noted as killed (clobbered), because the XSR is
1746      returned in %eax(%rax) on abort.  Other register values are restored, so
1747      don't need to be killed.
1748 
1749      We must also mark 'res' as an input and an output, since otherwise
1750      'res=-1' may be dropped as being dead, whereas we do need the assignment on
1751      the successful (i.e., non-abort) path. */
1752   __asm__ volatile("1: .byte  0xC7; .byte 0xF8;\n"
1753                    "   .long  1f-1b-6\n"
1754                    "    jmp   2f\n"
1755                    "1:  movl  %%eax,%0\n"
1756                    "2:"
1757                    : "+r"(res)::"memory", "%eax");
1758 #endif // KMP_OS_WINDOWS
1759   return res;
1760 }
1761 
1762 /* Transaction end */
1763 static __inline void _xend() {
1764 #if KMP_OS_WINDOWS
1765   __asm {
1766         _emit 0x0f
1767         _emit 0x01
1768         _emit 0xd5
1769   }
1770 #else
1771   __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
1772 #endif
1773 }
1774 
1775 /* This is a macro, the argument must be a single byte constant which can be
1776    evaluated by the inline assembler, since it is emitted as a byte into the
1777    assembly code. */
1778 // clang-format off
1779 #if KMP_OS_WINDOWS
1780 #define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG
1781 #else
1782 #define _xabort(ARG)                                                           \
1783   __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory");
1784 #endif
1785 // clang-format on
1786 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1787 
1788 // Statistics is collected for testing purpose
1789 #if KMP_DEBUG_ADAPTIVE_LOCKS
1790 
1791 // We accumulate speculative lock statistics when the lock is destroyed. We
1792 // keep locks that haven't been destroyed in the liveLocks list so that we can
1793 // grab their statistics too.
1794 static kmp_adaptive_lock_statistics_t destroyedStats;
1795 
1796 // To hold the list of live locks.
1797 static kmp_adaptive_lock_info_t liveLocks;
1798 
1799 // A lock so we can safely update the list of locks.
1800 static kmp_bootstrap_lock_t chain_lock =
1801     KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock);
1802 
1803 // Initialize the list of stats.
1804 void __kmp_init_speculative_stats() {
1805   kmp_adaptive_lock_info_t *lck = &liveLocks;
1806 
1807   memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0,
1808          sizeof(lck->stats));
1809   lck->stats.next = lck;
1810   lck->stats.prev = lck;
1811 
1812   KMP_ASSERT(lck->stats.next->stats.prev == lck);
1813   KMP_ASSERT(lck->stats.prev->stats.next == lck);
1814 
1815   __kmp_init_bootstrap_lock(&chain_lock);
1816 }
1817 
1818 // Insert the lock into the circular list
1819 static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) {
1820   __kmp_acquire_bootstrap_lock(&chain_lock);
1821 
1822   lck->stats.next = liveLocks.stats.next;
1823   lck->stats.prev = &liveLocks;
1824 
1825   liveLocks.stats.next = lck;
1826   lck->stats.next->stats.prev = lck;
1827 
1828   KMP_ASSERT(lck->stats.next->stats.prev == lck);
1829   KMP_ASSERT(lck->stats.prev->stats.next == lck);
1830 
1831   __kmp_release_bootstrap_lock(&chain_lock);
1832 }
1833 
1834 static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) {
1835   KMP_ASSERT(lck->stats.next->stats.prev == lck);
1836   KMP_ASSERT(lck->stats.prev->stats.next == lck);
1837 
1838   kmp_adaptive_lock_info_t *n = lck->stats.next;
1839   kmp_adaptive_lock_info_t *p = lck->stats.prev;
1840 
1841   n->stats.prev = p;
1842   p->stats.next = n;
1843 }
1844 
1845 static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1846   memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0,
1847          sizeof(lck->stats));
1848   __kmp_remember_lock(lck);
1849 }
1850 
1851 static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t,
1852                             kmp_adaptive_lock_info_t *lck) {
1853   kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
1854 
1855   t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
1856   t->successfulSpeculations += s->successfulSpeculations;
1857   t->hardFailedSpeculations += s->hardFailedSpeculations;
1858   t->softFailedSpeculations += s->softFailedSpeculations;
1859   t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
1860   t->lemmingYields += s->lemmingYields;
1861 }
1862 
1863 static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1864   __kmp_acquire_bootstrap_lock(&chain_lock);
1865 
1866   __kmp_add_stats(&destroyedStats, lck);
1867   __kmp_forget_lock(lck);
1868 
1869   __kmp_release_bootstrap_lock(&chain_lock);
1870 }
1871 
1872 static float percent(kmp_uint32 count, kmp_uint32 total) {
1873   return (total == 0) ? 0.0 : (100.0 * count) / total;
1874 }
1875 
1876 void __kmp_print_speculative_stats() {
1877   kmp_adaptive_lock_statistics_t total = destroyedStats;
1878   kmp_adaptive_lock_info_t *lck;
1879 
1880   for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
1881     __kmp_add_stats(&total, lck);
1882   }
1883   kmp_adaptive_lock_statistics_t *t = &total;
1884   kmp_uint32 totalSections =
1885       t->nonSpeculativeAcquires + t->successfulSpeculations;
1886   kmp_uint32 totalSpeculations = t->successfulSpeculations +
1887                                  t->hardFailedSpeculations +
1888                                  t->softFailedSpeculations;
1889   if (totalSections <= 0)
1890     return;
1891 
1892   kmp_safe_raii_file_t statsFile;
1893   if (strcmp(__kmp_speculative_statsfile, "-") == 0) {
1894     statsFile.set_stdout();
1895   } else {
1896     size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20;
1897     char buffer[buffLen];
1898     KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile,
1899                  (kmp_int32)getpid());
1900     statsFile.open(buffer, "w");
1901   }
1902 
1903   fprintf(statsFile, "Speculative lock statistics (all approximate!)\n");
1904   fprintf(statsFile,
1905           " Lock parameters: \n"
1906           "   max_soft_retries               : %10d\n"
1907           "   max_badness                    : %10d\n",
1908           __kmp_adaptive_backoff_params.max_soft_retries,
1909           __kmp_adaptive_backoff_params.max_badness);
1910   fprintf(statsFile, " Non-speculative acquire attempts : %10d\n",
1911           t->nonSpeculativeAcquireAttempts);
1912   fprintf(statsFile, " Total critical sections          : %10d\n",
1913           totalSections);
1914   fprintf(statsFile, " Successful speculations          : %10d (%5.1f%%)\n",
1915           t->successfulSpeculations,
1916           percent(t->successfulSpeculations, totalSections));
1917   fprintf(statsFile, " Non-speculative acquires         : %10d (%5.1f%%)\n",
1918           t->nonSpeculativeAcquires,
1919           percent(t->nonSpeculativeAcquires, totalSections));
1920   fprintf(statsFile, " Lemming yields                   : %10d\n\n",
1921           t->lemmingYields);
1922 
1923   fprintf(statsFile, " Speculative acquire attempts     : %10d\n",
1924           totalSpeculations);
1925   fprintf(statsFile, " Successes                        : %10d (%5.1f%%)\n",
1926           t->successfulSpeculations,
1927           percent(t->successfulSpeculations, totalSpeculations));
1928   fprintf(statsFile, " Soft failures                    : %10d (%5.1f%%)\n",
1929           t->softFailedSpeculations,
1930           percent(t->softFailedSpeculations, totalSpeculations));
1931   fprintf(statsFile, " Hard failures                    : %10d (%5.1f%%)\n",
1932           t->hardFailedSpeculations,
1933           percent(t->hardFailedSpeculations, totalSpeculations));
1934 }
1935 
1936 #define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++)
1937 #else
1938 #define KMP_INC_STAT(lck, stat)
1939 
1940 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
1941 
1942 static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {
1943   // It is enough to check that the head_id is zero.
1944   // We don't also need to check the tail.
1945   bool res = lck->lk.head_id == 0;
1946 
1947 // We need a fence here, since we must ensure that no memory operations
1948 // from later in this thread float above that read.
1949 #if KMP_COMPILER_ICC
1950   _mm_mfence();
1951 #else
1952   __sync_synchronize();
1953 #endif
1954 
1955   return res;
1956 }
1957 
1958 // Functions for manipulating the badness
1959 static __inline void
1960 __kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) {
1961   // Reset the badness to zero so we eagerly try to speculate again
1962   lck->lk.adaptive.badness = 0;
1963   KMP_INC_STAT(lck, successfulSpeculations);
1964 }
1965 
1966 // Create a bit mask with one more set bit.
1967 static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) {
1968   kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1;
1969   if (newBadness > lck->lk.adaptive.max_badness) {
1970     return;
1971   } else {
1972     lck->lk.adaptive.badness = newBadness;
1973   }
1974 }
1975 
1976 // Check whether speculation should be attempted.
1977 KMP_ATTRIBUTE_TARGET_RTM
1978 static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck,
1979                                            kmp_int32 gtid) {
1980   kmp_uint32 badness = lck->lk.adaptive.badness;
1981   kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts;
1982   int res = (attempts & badness) == 0;
1983   return res;
1984 }
1985 
1986 // Attempt to acquire only the speculative lock.
1987 // Does not back off to the non-speculative lock.
1988 KMP_ATTRIBUTE_TARGET_RTM
1989 static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck,
1990                                          kmp_int32 gtid) {
1991   int retries = lck->lk.adaptive.max_soft_retries;
1992 
1993   // We don't explicitly count the start of speculation, rather we record the
1994   // results (success, hard fail, soft fail). The sum of all of those is the
1995   // total number of times we started speculation since all speculations must
1996   // end one of those ways.
1997   do {
1998     kmp_uint32 status = _xbegin();
1999     // Switch this in to disable actual speculation but exercise at least some
2000     // of the rest of the code. Useful for debugging...
2001     // kmp_uint32 status = _XABORT_NESTED;
2002 
2003     if (status == _XBEGIN_STARTED) {
2004       /* We have successfully started speculation. Check that no-one acquired
2005          the lock for real between when we last looked and now. This also gets
2006          the lock cache line into our read-set, which we need so that we'll
2007          abort if anyone later claims it for real. */
2008       if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2009         // Lock is now visibly acquired, so someone beat us to it. Abort the
2010         // transaction so we'll restart from _xbegin with the failure status.
2011         _xabort(0x01);
2012         KMP_ASSERT2(0, "should not get here");
2013       }
2014       return 1; // Lock has been acquired (speculatively)
2015     } else {
2016       // We have aborted, update the statistics
2017       if (status & SOFT_ABORT_MASK) {
2018         KMP_INC_STAT(lck, softFailedSpeculations);
2019         // and loop round to retry.
2020       } else {
2021         KMP_INC_STAT(lck, hardFailedSpeculations);
2022         // Give up if we had a hard failure.
2023         break;
2024       }
2025     }
2026   } while (retries--); // Loop while we have retries, and didn't fail hard.
2027 
2028   // Either we had a hard failure or we didn't succeed softly after
2029   // the full set of attempts, so back off the badness.
2030   __kmp_step_badness(lck);
2031   return 0;
2032 }
2033 
2034 // Attempt to acquire the speculative lock, or back off to the non-speculative
2035 // one if the speculative lock cannot be acquired.
2036 // We can succeed speculatively, non-speculatively, or fail.
2037 static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) {
2038   // First try to acquire the lock speculatively
2039   if (__kmp_should_speculate(lck, gtid) &&
2040       __kmp_test_adaptive_lock_only(lck, gtid))
2041     return 1;
2042 
2043   // Speculative acquisition failed, so try to acquire it non-speculatively.
2044   // Count the non-speculative acquire attempt
2045   lck->lk.adaptive.acquire_attempts++;
2046 
2047   // Use base, non-speculative lock.
2048   if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) {
2049     KMP_INC_STAT(lck, nonSpeculativeAcquires);
2050     return 1; // Lock is acquired (non-speculatively)
2051   } else {
2052     return 0; // Failed to acquire the lock, it's already visibly locked.
2053   }
2054 }
2055 
2056 static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2057                                                 kmp_int32 gtid) {
2058   char const *const func = "omp_test_lock";
2059   if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2060     KMP_FATAL(LockIsUninitialized, func);
2061   }
2062 
2063   int retval = __kmp_test_adaptive_lock(lck, gtid);
2064 
2065   if (retval) {
2066     lck->lk.qlk.owner_id = gtid + 1;
2067   }
2068   return retval;
2069 }
2070 
2071 // Block until we can acquire a speculative, adaptive lock. We check whether we
2072 // should be trying to speculate. If we should be, we check the real lock to see
2073 // if it is free, and, if not, pause without attempting to acquire it until it
2074 // is. Then we try the speculative acquire. This means that although we suffer
2075 // from lemmings a little (because all we can't acquire the lock speculatively
2076 // until the queue of threads waiting has cleared), we don't get into a state
2077 // where we can never acquire the lock speculatively (because we force the queue
2078 // to clear by preventing new arrivals from entering the queue). This does mean
2079 // that when we're trying to break lemmings, the lock is no longer fair. However
2080 // OpenMP makes no guarantee that its locks are fair, so this isn't a real
2081 // problem.
2082 static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
2083                                         kmp_int32 gtid) {
2084   if (__kmp_should_speculate(lck, gtid)) {
2085     if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2086       if (__kmp_test_adaptive_lock_only(lck, gtid))
2087         return;
2088       // We tried speculation and failed, so give up.
2089     } else {
2090       // We can't try speculation until the lock is free, so we pause here
2091       // (without suspending on the queueing lock, to allow it to drain, then
2092       // try again. All other threads will also see the same result for
2093       // shouldSpeculate, so will be doing the same if they try to claim the
2094       // lock from now on.
2095       while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2096         KMP_INC_STAT(lck, lemmingYields);
2097         KMP_YIELD(TRUE);
2098       }
2099 
2100       if (__kmp_test_adaptive_lock_only(lck, gtid))
2101         return;
2102     }
2103   }
2104 
2105   // Speculative acquisition failed, so acquire it non-speculatively.
2106   // Count the non-speculative acquire attempt
2107   lck->lk.adaptive.acquire_attempts++;
2108 
2109   __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid);
2110   // We have acquired the base lock, so count that.
2111   KMP_INC_STAT(lck, nonSpeculativeAcquires);
2112 }
2113 
2114 static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2115                                                     kmp_int32 gtid) {
2116   char const *const func = "omp_set_lock";
2117   if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2118     KMP_FATAL(LockIsUninitialized, func);
2119   }
2120   if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) {
2121     KMP_FATAL(LockIsAlreadyOwned, func);
2122   }
2123 
2124   __kmp_acquire_adaptive_lock(lck, gtid);
2125 
2126   lck->lk.qlk.owner_id = gtid + 1;
2127 }
2128 
2129 KMP_ATTRIBUTE_TARGET_RTM
2130 static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck,
2131                                        kmp_int32 gtid) {
2132   if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(
2133           lck))) { // If the lock doesn't look claimed we must be speculating.
2134     // (Or the user's code is buggy and they're releasing without locking;
2135     // if we had XTEST we'd be able to check that case...)
2136     _xend(); // Exit speculation
2137     __kmp_update_badness_after_success(lck);
2138   } else { // Since the lock *is* visibly locked we're not speculating,
2139     // so should use the underlying lock's release scheme.
2140     __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid);
2141   }
2142   return KMP_LOCK_RELEASED;
2143 }
2144 
2145 static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2146                                                    kmp_int32 gtid) {
2147   char const *const func = "omp_unset_lock";
2148   KMP_MB(); /* in case another processor initialized lock */
2149   if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2150     KMP_FATAL(LockIsUninitialized, func);
2151   }
2152   if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) {
2153     KMP_FATAL(LockUnsettingFree, func);
2154   }
2155   if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) {
2156     KMP_FATAL(LockUnsettingSetByAnother, func);
2157   }
2158   lck->lk.qlk.owner_id = 0;
2159   __kmp_release_adaptive_lock(lck, gtid);
2160   return KMP_LOCK_RELEASED;
2161 }
2162 
2163 static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) {
2164   __kmp_init_queuing_lock(GET_QLK_PTR(lck));
2165   lck->lk.adaptive.badness = 0;
2166   lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0;
2167   lck->lk.adaptive.max_soft_retries =
2168       __kmp_adaptive_backoff_params.max_soft_retries;
2169   lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2170 #if KMP_DEBUG_ADAPTIVE_LOCKS
2171   __kmp_zero_speculative_stats(&lck->lk.adaptive);
2172 #endif
2173   KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2174 }
2175 
2176 static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) {
2177 #if KMP_DEBUG_ADAPTIVE_LOCKS
2178   __kmp_accumulate_speculative_stats(&lck->lk.adaptive);
2179 #endif
2180   __kmp_destroy_queuing_lock(GET_QLK_PTR(lck));
2181   // Nothing needed for the speculative part.
2182 }
2183 
2184 static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
2185   char const *const func = "omp_destroy_lock";
2186   if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2187     KMP_FATAL(LockIsUninitialized, func);
2188   }
2189   if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) {
2190     KMP_FATAL(LockStillOwned, func);
2191   }
2192   __kmp_destroy_adaptive_lock(lck);
2193 }
2194 
2195 #endif // KMP_USE_ADAPTIVE_LOCKS
2196 
2197 /* ------------------------------------------------------------------------ */
2198 /* DRDPA ticket locks                                                */
2199 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2200 
2201 static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) {
2202   return lck->lk.owner_id - 1;
2203 }
2204 
2205 static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) {
2206   return lck->lk.depth_locked != -1;
2207 }
2208 
2209 __forceinline static int
2210 __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2211   kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket);
2212   kmp_uint64 mask = lck->lk.mask; // atomic load
2213   std::atomic<kmp_uint64> *polls = lck->lk.polls;
2214 
2215 #ifdef USE_LOCK_PROFILE
2216   if (polls[ticket & mask] != ticket)
2217     __kmp_printf("LOCK CONTENTION: %p\n", lck);
2218 /* else __kmp_printf( "." );*/
2219 #endif /* USE_LOCK_PROFILE */
2220 
2221   // Now spin-wait, but reload the polls pointer and mask, in case the
2222   // polling area has been reconfigured.  Unless it is reconfigured, the
2223   // reloads stay in L1 cache and are cheap.
2224   //
2225   // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!
2226   // The current implementation of KMP_WAIT doesn't allow for mask
2227   // and poll to be re-read every spin iteration.
2228   kmp_uint32 spins;
2229   KMP_FSYNC_PREPARE(lck);
2230   KMP_INIT_YIELD(spins);
2231   while (polls[ticket & mask] < ticket) { // atomic load
2232     KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
2233     // Re-read the mask and the poll pointer from the lock structure.
2234     //
2235     // Make certain that "mask" is read before "polls" !!!
2236     //
2237     // If another thread picks reconfigures the polling area and updates their
2238     // values, and we get the new value of mask and the old polls pointer, we
2239     // could access memory beyond the end of the old polling area.
2240     mask = lck->lk.mask; // atomic load
2241     polls = lck->lk.polls; // atomic load
2242   }
2243 
2244   // Critical section starts here
2245   KMP_FSYNC_ACQUIRED(lck);
2246   KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2247                   ticket, lck));
2248   lck->lk.now_serving = ticket; // non-volatile store
2249 
2250   // Deallocate a garbage polling area if we know that we are the last
2251   // thread that could possibly access it.
2252   //
2253   // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2254   // ticket.
2255   if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2256     __kmp_free(lck->lk.old_polls);
2257     lck->lk.old_polls = NULL;
2258     lck->lk.cleanup_ticket = 0;
2259   }
2260 
2261   // Check to see if we should reconfigure the polling area.
2262   // If there is still a garbage polling area to be deallocated from a
2263   // previous reconfiguration, let a later thread reconfigure it.
2264   if (lck->lk.old_polls == NULL) {
2265     bool reconfigure = false;
2266     std::atomic<kmp_uint64> *old_polls = polls;
2267     kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2268 
2269     if (TCR_4(__kmp_nth) >
2270         (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2271       // We are in oversubscription mode.  Contract the polling area
2272       // down to a single location, if that hasn't been done already.
2273       if (num_polls > 1) {
2274         reconfigure = true;
2275         num_polls = TCR_4(lck->lk.num_polls);
2276         mask = 0;
2277         num_polls = 1;
2278         polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2279                                                           sizeof(*polls));
2280         polls[0] = ticket;
2281       }
2282     } else {
2283       // We are in under/fully subscribed mode.  Check the number of
2284       // threads waiting on the lock.  The size of the polling area
2285       // should be at least the number of threads waiting.
2286       kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2287       if (num_waiting > num_polls) {
2288         kmp_uint32 old_num_polls = num_polls;
2289         reconfigure = true;
2290         do {
2291           mask = (mask << 1) | 1;
2292           num_polls *= 2;
2293         } while (num_polls <= num_waiting);
2294 
2295         // Allocate the new polling area, and copy the relevant portion
2296         // of the old polling area to the new area.  __kmp_allocate()
2297         // zeroes the memory it allocates, and most of the old area is
2298         // just zero padding, so we only copy the release counters.
2299         polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2300                                                           sizeof(*polls));
2301         kmp_uint32 i;
2302         for (i = 0; i < old_num_polls; i++) {
2303           polls[i].store(old_polls[i]);
2304         }
2305       }
2306     }
2307 
2308     if (reconfigure) {
2309       // Now write the updated fields back to the lock structure.
2310       //
2311       // Make certain that "polls" is written before "mask" !!!
2312       //
2313       // If another thread picks up the new value of mask and the old polls
2314       // pointer , it could access memory beyond the end of the old polling
2315       // area.
2316       //
2317       // On x86, we need memory fences.
2318       KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring "
2319                       "lock %p to %d polls\n",
2320                       ticket, lck, num_polls));
2321 
2322       lck->lk.old_polls = old_polls;
2323       lck->lk.polls = polls; // atomic store
2324 
2325       KMP_MB();
2326 
2327       lck->lk.num_polls = num_polls;
2328       lck->lk.mask = mask; // atomic store
2329 
2330       KMP_MB();
2331 
2332       // Only after the new polling area and mask have been flushed
2333       // to main memory can we update the cleanup ticket field.
2334       //
2335       // volatile load / non-volatile store
2336       lck->lk.cleanup_ticket = lck->lk.next_ticket;
2337     }
2338   }
2339   return KMP_LOCK_ACQUIRED_FIRST;
2340 }
2341 
2342 int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2343   int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2344   return retval;
2345 }
2346 
2347 static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2348                                                 kmp_int32 gtid) {
2349   char const *const func = "omp_set_lock";
2350   if (lck->lk.initialized != lck) {
2351     KMP_FATAL(LockIsUninitialized, func);
2352   }
2353   if (__kmp_is_drdpa_lock_nestable(lck)) {
2354     KMP_FATAL(LockNestableUsedAsSimple, func);
2355   }
2356   if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) {
2357     KMP_FATAL(LockIsAlreadyOwned, func);
2358   }
2359 
2360   __kmp_acquire_drdpa_lock(lck, gtid);
2361 
2362   lck->lk.owner_id = gtid + 1;
2363   return KMP_LOCK_ACQUIRED_FIRST;
2364 }
2365 
2366 int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2367   // First get a ticket, then read the polls pointer and the mask.
2368   // The polls pointer must be read before the mask!!! (See above)
2369   kmp_uint64 ticket = lck->lk.next_ticket; // atomic load
2370   std::atomic<kmp_uint64> *polls = lck->lk.polls;
2371   kmp_uint64 mask = lck->lk.mask; // atomic load
2372   if (polls[ticket & mask] == ticket) {
2373     kmp_uint64 next_ticket = ticket + 1;
2374     if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket,
2375                                        next_ticket)) {
2376       KMP_FSYNC_ACQUIRED(lck);
2377       KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2378                       ticket, lck));
2379       lck->lk.now_serving = ticket; // non-volatile store
2380 
2381       // Since no threads are waiting, there is no possibility that we would
2382       // want to reconfigure the polling area.  We might have the cleanup ticket
2383       // value (which says that it is now safe to deallocate old_polls), but
2384       // we'll let a later thread which calls __kmp_acquire_lock do that - this
2385       // routine isn't supposed to block, and we would risk blocks if we called
2386       // __kmp_free() to do the deallocation.
2387       return TRUE;
2388     }
2389   }
2390   return FALSE;
2391 }
2392 
2393 static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2394                                              kmp_int32 gtid) {
2395   char const *const func = "omp_test_lock";
2396   if (lck->lk.initialized != lck) {
2397     KMP_FATAL(LockIsUninitialized, func);
2398   }
2399   if (__kmp_is_drdpa_lock_nestable(lck)) {
2400     KMP_FATAL(LockNestableUsedAsSimple, func);
2401   }
2402 
2403   int retval = __kmp_test_drdpa_lock(lck, gtid);
2404 
2405   if (retval) {
2406     lck->lk.owner_id = gtid + 1;
2407   }
2408   return retval;
2409 }
2410 
2411 int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2412   // Read the ticket value from the lock data struct, then the polls pointer and
2413   // the mask.  The polls pointer must be read before the mask!!! (See above)
2414   kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load
2415   std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load
2416   kmp_uint64 mask = lck->lk.mask; // atomic load
2417   KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2418                   ticket - 1, lck));
2419   KMP_FSYNC_RELEASING(lck);
2420   polls[ticket & mask] = ticket; // atomic store
2421   return KMP_LOCK_RELEASED;
2422 }
2423 
2424 static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2425                                                 kmp_int32 gtid) {
2426   char const *const func = "omp_unset_lock";
2427   KMP_MB(); /* in case another processor initialized lock */
2428   if (lck->lk.initialized != lck) {
2429     KMP_FATAL(LockIsUninitialized, func);
2430   }
2431   if (__kmp_is_drdpa_lock_nestable(lck)) {
2432     KMP_FATAL(LockNestableUsedAsSimple, func);
2433   }
2434   if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2435     KMP_FATAL(LockUnsettingFree, func);
2436   }
2437   if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) &&
2438       (__kmp_get_drdpa_lock_owner(lck) != gtid)) {
2439     KMP_FATAL(LockUnsettingSetByAnother, func);
2440   }
2441   lck->lk.owner_id = 0;
2442   return __kmp_release_drdpa_lock(lck, gtid);
2443 }
2444 
2445 void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) {
2446   lck->lk.location = NULL;
2447   lck->lk.mask = 0;
2448   lck->lk.num_polls = 1;
2449   lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate(
2450       lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2451   lck->lk.cleanup_ticket = 0;
2452   lck->lk.old_polls = NULL;
2453   lck->lk.next_ticket = 0;
2454   lck->lk.now_serving = 0;
2455   lck->lk.owner_id = 0; // no thread owns the lock.
2456   lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2457   lck->lk.initialized = lck;
2458 
2459   KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2460 }
2461 
2462 void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) {
2463   lck->lk.initialized = NULL;
2464   lck->lk.location = NULL;
2465   if (lck->lk.polls.load() != NULL) {
2466     __kmp_free(lck->lk.polls.load());
2467     lck->lk.polls = NULL;
2468   }
2469   if (lck->lk.old_polls != NULL) {
2470     __kmp_free(lck->lk.old_polls);
2471     lck->lk.old_polls = NULL;
2472   }
2473   lck->lk.mask = 0;
2474   lck->lk.num_polls = 0;
2475   lck->lk.cleanup_ticket = 0;
2476   lck->lk.next_ticket = 0;
2477   lck->lk.now_serving = 0;
2478   lck->lk.owner_id = 0;
2479   lck->lk.depth_locked = -1;
2480 }
2481 
2482 static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2483   char const *const func = "omp_destroy_lock";
2484   if (lck->lk.initialized != lck) {
2485     KMP_FATAL(LockIsUninitialized, func);
2486   }
2487   if (__kmp_is_drdpa_lock_nestable(lck)) {
2488     KMP_FATAL(LockNestableUsedAsSimple, func);
2489   }
2490   if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2491     KMP_FATAL(LockStillOwned, func);
2492   }
2493   __kmp_destroy_drdpa_lock(lck);
2494 }
2495 
2496 // nested drdpa ticket locks
2497 
2498 int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2499   KMP_DEBUG_ASSERT(gtid >= 0);
2500 
2501   if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2502     lck->lk.depth_locked += 1;
2503     return KMP_LOCK_ACQUIRED_NEXT;
2504   } else {
2505     __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2506     KMP_MB();
2507     lck->lk.depth_locked = 1;
2508     KMP_MB();
2509     lck->lk.owner_id = gtid + 1;
2510     return KMP_LOCK_ACQUIRED_FIRST;
2511   }
2512 }
2513 
2514 static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2515                                                         kmp_int32 gtid) {
2516   char const *const func = "omp_set_nest_lock";
2517   if (lck->lk.initialized != lck) {
2518     KMP_FATAL(LockIsUninitialized, func);
2519   }
2520   if (!__kmp_is_drdpa_lock_nestable(lck)) {
2521     KMP_FATAL(LockSimpleUsedAsNestable, func);
2522   }
2523   __kmp_acquire_nested_drdpa_lock(lck, gtid);
2524 }
2525 
2526 int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2527   int retval;
2528 
2529   KMP_DEBUG_ASSERT(gtid >= 0);
2530 
2531   if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2532     retval = ++lck->lk.depth_locked;
2533   } else if (!__kmp_test_drdpa_lock(lck, gtid)) {
2534     retval = 0;
2535   } else {
2536     KMP_MB();
2537     retval = lck->lk.depth_locked = 1;
2538     KMP_MB();
2539     lck->lk.owner_id = gtid + 1;
2540   }
2541   return retval;
2542 }
2543 
2544 static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2545                                                     kmp_int32 gtid) {
2546   char const *const func = "omp_test_nest_lock";
2547   if (lck->lk.initialized != lck) {
2548     KMP_FATAL(LockIsUninitialized, func);
2549   }
2550   if (!__kmp_is_drdpa_lock_nestable(lck)) {
2551     KMP_FATAL(LockSimpleUsedAsNestable, func);
2552   }
2553   return __kmp_test_nested_drdpa_lock(lck, gtid);
2554 }
2555 
2556 int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2557   KMP_DEBUG_ASSERT(gtid >= 0);
2558 
2559   KMP_MB();
2560   if (--(lck->lk.depth_locked) == 0) {
2561     KMP_MB();
2562     lck->lk.owner_id = 0;
2563     __kmp_release_drdpa_lock(lck, gtid);
2564     return KMP_LOCK_RELEASED;
2565   }
2566   return KMP_LOCK_STILL_HELD;
2567 }
2568 
2569 static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2570                                                        kmp_int32 gtid) {
2571   char const *const func = "omp_unset_nest_lock";
2572   KMP_MB(); /* in case another processor initialized lock */
2573   if (lck->lk.initialized != lck) {
2574     KMP_FATAL(LockIsUninitialized, func);
2575   }
2576   if (!__kmp_is_drdpa_lock_nestable(lck)) {
2577     KMP_FATAL(LockSimpleUsedAsNestable, func);
2578   }
2579   if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2580     KMP_FATAL(LockUnsettingFree, func);
2581   }
2582   if (__kmp_get_drdpa_lock_owner(lck) != gtid) {
2583     KMP_FATAL(LockUnsettingSetByAnother, func);
2584   }
2585   return __kmp_release_nested_drdpa_lock(lck, gtid);
2586 }
2587 
2588 void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2589   __kmp_init_drdpa_lock(lck);
2590   lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2591 }
2592 
2593 void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2594   __kmp_destroy_drdpa_lock(lck);
2595   lck->lk.depth_locked = 0;
2596 }
2597 
2598 static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2599   char const *const func = "omp_destroy_nest_lock";
2600   if (lck->lk.initialized != lck) {
2601     KMP_FATAL(LockIsUninitialized, func);
2602   }
2603   if (!__kmp_is_drdpa_lock_nestable(lck)) {
2604     KMP_FATAL(LockSimpleUsedAsNestable, func);
2605   }
2606   if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2607     KMP_FATAL(LockStillOwned, func);
2608   }
2609   __kmp_destroy_nested_drdpa_lock(lck);
2610 }
2611 
2612 // access functions to fields which don't exist for all lock kinds.
2613 
2614 static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) {
2615   return lck->lk.location;
2616 }
2617 
2618 static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck,
2619                                           const ident_t *loc) {
2620   lck->lk.location = loc;
2621 }
2622 
2623 static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) {
2624   return lck->lk.flags;
2625 }
2626 
2627 static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck,
2628                                        kmp_lock_flags_t flags) {
2629   lck->lk.flags = flags;
2630 }
2631 
2632 // Time stamp counter
2633 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2634 #define __kmp_tsc() __kmp_hardware_timestamp()
2635 // Runtime's default backoff parameters
2636 kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100};
2637 #else
2638 // Use nanoseconds for other platforms
2639 extern kmp_uint64 __kmp_now_nsec();
2640 kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100};
2641 #define __kmp_tsc() __kmp_now_nsec()
2642 #endif
2643 
2644 // A useful predicate for dealing with timestamps that may wrap.
2645 // Is a before b? Since the timestamps may wrap, this is asking whether it's
2646 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
2647 // Times where going clockwise is less distance than going anti-clockwise
2648 // are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0),
2649 // then a > b (true) does not mean a reached b; whereas signed(a) = -2,
2650 // signed(b) = 0 captures the actual difference
2651 static inline bool before(kmp_uint64 a, kmp_uint64 b) {
2652   return ((kmp_int64)b - (kmp_int64)a) > 0;
2653 }
2654 
2655 // Truncated binary exponential backoff function
2656 void __kmp_spin_backoff(kmp_backoff_t *boff) {
2657   // We could flatten this loop, but making it a nested loop gives better result
2658   kmp_uint32 i;
2659   for (i = boff->step; i > 0; i--) {
2660     kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
2661     do {
2662       KMP_CPU_PAUSE();
2663     } while (before(__kmp_tsc(), goal));
2664   }
2665   boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1);
2666 }
2667 
2668 #if KMP_USE_DYNAMIC_LOCK
2669 
2670 // Direct lock initializers. It simply writes a tag to the low 8 bits of the
2671 // lock word.
2672 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck,
2673                                    kmp_dyna_lockseq_t seq) {
2674   TCW_4(*lck, KMP_GET_D_TAG(seq));
2675   KA_TRACE(
2676       20,
2677       ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
2678 }
2679 
2680 #if KMP_USE_TSX
2681 
2682 // HLE lock functions - imported from the testbed runtime.
2683 #define HLE_ACQUIRE ".byte 0xf2;"
2684 #define HLE_RELEASE ".byte 0xf3;"
2685 
2686 static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) {
2687   __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory");
2688   return v;
2689 }
2690 
2691 static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); }
2692 
2693 static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) {
2694   TCW_4(*lck, 0);
2695 }
2696 
2697 static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2698   // Use gtid for KMP_LOCK_BUSY if necessary
2699   if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
2700     int delay = 1;
2701     do {
2702       while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
2703         for (int i = delay; i != 0; --i)
2704           KMP_CPU_PAUSE();
2705         delay = ((delay << 1) | 1) & 7;
2706       }
2707     } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
2708   }
2709 }
2710 
2711 static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2712                                                kmp_int32 gtid) {
2713   __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
2714 }
2715 
2716 static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2717   __asm__ volatile(HLE_RELEASE "movl %1,%0"
2718                    : "=m"(*lck)
2719                    : "r"(KMP_LOCK_FREE(hle))
2720                    : "memory");
2721   return KMP_LOCK_RELEASED;
2722 }
2723 
2724 static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2725                                               kmp_int32 gtid) {
2726   return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
2727 }
2728 
2729 static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2730   return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
2731 }
2732 
2733 static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2734                                            kmp_int32 gtid) {
2735   return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
2736 }
2737 
2738 static void __kmp_init_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2739   __kmp_init_queuing_lock(lck);
2740 }
2741 
2742 static void __kmp_destroy_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2743   __kmp_destroy_queuing_lock(lck);
2744 }
2745 
2746 static void
2747 __kmp_destroy_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
2748   __kmp_destroy_queuing_lock_with_checks(lck);
2749 }
2750 
2751 KMP_ATTRIBUTE_TARGET_RTM
2752 static void __kmp_acquire_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2753                                            kmp_int32 gtid) {
2754   unsigned retries = 3, status;
2755   do {
2756     status = _xbegin();
2757     if (status == _XBEGIN_STARTED) {
2758       if (__kmp_is_unlocked_queuing_lock(lck))
2759         return;
2760       _xabort(0xff);
2761     }
2762     if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2763       // Wait until lock becomes free
2764       while (!__kmp_is_unlocked_queuing_lock(lck)) {
2765         KMP_YIELD(TRUE);
2766       }
2767     } else if (!(status & _XABORT_RETRY))
2768       break;
2769   } while (retries--);
2770 
2771   // Fall-back non-speculative lock (xchg)
2772   __kmp_acquire_queuing_lock(lck, gtid);
2773 }
2774 
2775 static void __kmp_acquire_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2776                                                        kmp_int32 gtid) {
2777   __kmp_acquire_rtm_queuing_lock(lck, gtid);
2778 }
2779 
2780 KMP_ATTRIBUTE_TARGET_RTM
2781 static int __kmp_release_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2782                                           kmp_int32 gtid) {
2783   if (__kmp_is_unlocked_queuing_lock(lck)) {
2784     // Releasing from speculation
2785     _xend();
2786   } else {
2787     // Releasing from a real lock
2788     __kmp_release_queuing_lock(lck, gtid);
2789   }
2790   return KMP_LOCK_RELEASED;
2791 }
2792 
2793 static int __kmp_release_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2794                                                       kmp_int32 gtid) {
2795   return __kmp_release_rtm_queuing_lock(lck, gtid);
2796 }
2797 
2798 KMP_ATTRIBUTE_TARGET_RTM
2799 static int __kmp_test_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2800                                        kmp_int32 gtid) {
2801   unsigned retries = 3, status;
2802   do {
2803     status = _xbegin();
2804     if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
2805       return 1;
2806     }
2807     if (!(status & _XABORT_RETRY))
2808       break;
2809   } while (retries--);
2810 
2811   return __kmp_test_queuing_lock(lck, gtid);
2812 }
2813 
2814 static int __kmp_test_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2815                                                    kmp_int32 gtid) {
2816   return __kmp_test_rtm_queuing_lock(lck, gtid);
2817 }
2818 
2819 // Reuse kmp_tas_lock_t for TSX lock which use RTM with fall-back spin lock.
2820 typedef kmp_tas_lock_t kmp_rtm_spin_lock_t;
2821 
2822 static void __kmp_destroy_rtm_spin_lock(kmp_rtm_spin_lock_t *lck) {
2823   KMP_ATOMIC_ST_REL(&lck->lk.poll, 0);
2824 }
2825 
2826 static void __kmp_destroy_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck) {
2827   __kmp_destroy_rtm_spin_lock(lck);
2828 }
2829 
2830 KMP_ATTRIBUTE_TARGET_RTM
2831 static int __kmp_acquire_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2832                                        kmp_int32 gtid) {
2833   unsigned retries = 3, status;
2834   kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2835   kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2836   do {
2837     status = _xbegin();
2838     if (status == _XBEGIN_STARTED) {
2839       if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free)
2840         return KMP_LOCK_ACQUIRED_FIRST;
2841       _xabort(0xff);
2842     }
2843     if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2844       // Wait until lock becomes free
2845       while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free) {
2846         KMP_YIELD(TRUE);
2847       }
2848     } else if (!(status & _XABORT_RETRY))
2849       break;
2850   } while (retries--);
2851 
2852   // Fall-back spin lock
2853   KMP_FSYNC_PREPARE(lck);
2854   kmp_backoff_t backoff = __kmp_spin_backoff_params;
2855   while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free ||
2856          !__kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2857     __kmp_spin_backoff(&backoff);
2858   }
2859   KMP_FSYNC_ACQUIRED(lck);
2860   return KMP_LOCK_ACQUIRED_FIRST;
2861 }
2862 
2863 static int __kmp_acquire_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2864                                                    kmp_int32 gtid) {
2865   return __kmp_acquire_rtm_spin_lock(lck, gtid);
2866 }
2867 
2868 KMP_ATTRIBUTE_TARGET_RTM
2869 static int __kmp_release_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2870                                        kmp_int32 gtid) {
2871   if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == KMP_LOCK_FREE(rtm_spin)) {
2872     // Releasing from speculation
2873     _xend();
2874   } else {
2875     // Releasing from a real lock
2876     KMP_FSYNC_RELEASING(lck);
2877     KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(rtm_spin));
2878   }
2879   return KMP_LOCK_RELEASED;
2880 }
2881 
2882 static int __kmp_release_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2883                                                    kmp_int32 gtid) {
2884   return __kmp_release_rtm_spin_lock(lck, gtid);
2885 }
2886 
2887 KMP_ATTRIBUTE_TARGET_RTM
2888 static int __kmp_test_rtm_spin_lock(kmp_rtm_spin_lock_t *lck, kmp_int32 gtid) {
2889   unsigned retries = 3, status;
2890   kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2891   kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2892   do {
2893     status = _xbegin();
2894     if (status == _XBEGIN_STARTED &&
2895         KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free) {
2896       return TRUE;
2897     }
2898     if (!(status & _XABORT_RETRY))
2899       break;
2900   } while (retries--);
2901 
2902   if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free &&
2903       __kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2904     KMP_FSYNC_ACQUIRED(lck);
2905     return TRUE;
2906   }
2907   return FALSE;
2908 }
2909 
2910 static int __kmp_test_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2911                                                 kmp_int32 gtid) {
2912   return __kmp_test_rtm_spin_lock(lck, gtid);
2913 }
2914 
2915 #endif // KMP_USE_TSX
2916 
2917 // Entry functions for indirect locks (first element of direct lock jump tables)
2918 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,
2919                                      kmp_dyna_lockseq_t tag);
2920 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);
2921 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2922 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2923 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2924 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2925                                                kmp_int32);
2926 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2927                                                  kmp_int32);
2928 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2929                                                 kmp_int32);
2930 
2931 // Lock function definitions for the union parameter type
2932 #define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a)
2933 
2934 #define expand1(lk, op)                                                        \
2935   static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) {               \
2936     __kmp_##op##_##lk##_##lock(&lock->lk);                                     \
2937   }
2938 #define expand2(lk, op)                                                        \
2939   static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock,                  \
2940                                         kmp_int32 gtid) {                      \
2941     return __kmp_##op##_##lk##_##lock(&lock->lk, gtid);                        \
2942   }
2943 #define expand3(lk, op)                                                        \
2944   static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock,              \
2945                                             kmp_lock_flags_t flags) {          \
2946     __kmp_set_##lk##_lock_flags(&lock->lk, flags);                             \
2947   }
2948 #define expand4(lk, op)                                                        \
2949   static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock,           \
2950                                                const ident_t *loc) {           \
2951     __kmp_set_##lk##_lock_location(&lock->lk, loc);                            \
2952   }
2953 
2954 KMP_FOREACH_LOCK_KIND(expand1, init)
2955 KMP_FOREACH_LOCK_KIND(expand1, init_nested)
2956 KMP_FOREACH_LOCK_KIND(expand1, destroy)
2957 KMP_FOREACH_LOCK_KIND(expand1, destroy_nested)
2958 KMP_FOREACH_LOCK_KIND(expand2, acquire)
2959 KMP_FOREACH_LOCK_KIND(expand2, acquire_nested)
2960 KMP_FOREACH_LOCK_KIND(expand2, release)
2961 KMP_FOREACH_LOCK_KIND(expand2, release_nested)
2962 KMP_FOREACH_LOCK_KIND(expand2, test)
2963 KMP_FOREACH_LOCK_KIND(expand2, test_nested)
2964 KMP_FOREACH_LOCK_KIND(expand3, )
2965 KMP_FOREACH_LOCK_KIND(expand4, )
2966 
2967 #undef expand1
2968 #undef expand2
2969 #undef expand3
2970 #undef expand4
2971 
2972 // Jump tables for the indirect lock functions
2973 // Only fill in the odd entries, that avoids the need to shift out the low bit
2974 
2975 // init functions
2976 #define expand(l, op) 0, __kmp_init_direct_lock,
2977 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = {
2978     __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)};
2979 #undef expand
2980 
2981 // destroy functions
2982 #define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
2983 static void (*direct_destroy[])(kmp_dyna_lock_t *) = {
2984     __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2985 #undef expand
2986 #define expand(l, op)                                                          \
2987   0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks,
2988 static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = {
2989     __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2990 #undef expand
2991 
2992 // set/acquire functions
2993 #define expand(l, op)                                                          \
2994   0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
2995 static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
2996     __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)};
2997 #undef expand
2998 #define expand(l, op)                                                          \
2999   0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3000 static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3001     __kmp_set_indirect_lock_with_checks, 0,
3002     KMP_FOREACH_D_LOCK(expand, acquire)};
3003 #undef expand
3004 
3005 // unset/release and test functions
3006 #define expand(l, op)                                                          \
3007   0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3008 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = {
3009     __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)};
3010 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = {
3011     __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)};
3012 #undef expand
3013 #define expand(l, op)                                                          \
3014   0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3015 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3016     __kmp_unset_indirect_lock_with_checks, 0,
3017     KMP_FOREACH_D_LOCK(expand, release)};
3018 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3019     __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)};
3020 #undef expand
3021 
3022 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3023 void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0;
3024 int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0;
3025 int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0;
3026 int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0;
3027 
3028 // Jump tables for the indirect lock functions
3029 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3030 void (*__kmp_indirect_init[])(kmp_user_lock_p) = {
3031     KMP_FOREACH_I_LOCK(expand, init)};
3032 #undef expand
3033 
3034 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3035 static void (*indirect_destroy[])(kmp_user_lock_p) = {
3036     KMP_FOREACH_I_LOCK(expand, destroy)};
3037 #undef expand
3038 #define expand(l, op)                                                          \
3039   (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks,
3040 static void (*indirect_destroy_check[])(kmp_user_lock_p) = {
3041     KMP_FOREACH_I_LOCK(expand, destroy)};
3042 #undef expand
3043 
3044 // set/acquire functions
3045 #define expand(l, op)                                                          \
3046   (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3047 static int (*indirect_set[])(kmp_user_lock_p,
3048                              kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)};
3049 #undef expand
3050 #define expand(l, op)                                                          \
3051   (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3052 static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
3053     KMP_FOREACH_I_LOCK(expand, acquire)};
3054 #undef expand
3055 
3056 // unset/release and test functions
3057 #define expand(l, op)                                                          \
3058   (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3059 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = {
3060     KMP_FOREACH_I_LOCK(expand, release)};
3061 static int (*indirect_test[])(kmp_user_lock_p,
3062                               kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)};
3063 #undef expand
3064 #define expand(l, op)                                                          \
3065   (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3066 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = {
3067     KMP_FOREACH_I_LOCK(expand, release)};
3068 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
3069     KMP_FOREACH_I_LOCK(expand, test)};
3070 #undef expand
3071 
3072 // Exposes only one jump tables (*lock or *lock_with_checks).
3073 void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0;
3074 int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0;
3075 int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0;
3076 int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0;
3077 
3078 // Lock index table.
3079 kmp_indirect_lock_table_t __kmp_i_lock_table;
3080 
3081 // Size of indirect locks.
3082 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0};
3083 
3084 // Jump tables for lock accessor/modifier.
3085 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3086                                                      const ident_t *) = {0};
3087 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3088                                                   kmp_lock_flags_t) = {0};
3089 const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(
3090     kmp_user_lock_p) = {0};
3091 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
3092     kmp_user_lock_p) = {0};
3093 
3094 // Use different lock pools for different lock types.
3095 static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0};
3096 
3097 // User lock allocator for dynamically dispatched indirect locks. Every entry of
3098 // the indirect lock table holds the address and type of the allocated indirect
3099 // lock (kmp_indirect_lock_t), and the size of the table doubles when it is
3100 // full. A destroyed indirect lock object is returned to the reusable pool of
3101 // locks, unique to each lock type.
3102 kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
3103                                                   kmp_int32 gtid,
3104                                                   kmp_indirect_locktag_t tag) {
3105   kmp_indirect_lock_t *lck;
3106   kmp_lock_index_t idx;
3107 
3108   __kmp_acquire_lock(&__kmp_global_lock, gtid);
3109 
3110   if (__kmp_indirect_lock_pool[tag] != NULL) {
3111     // Reuse the allocated and destroyed lock object
3112     lck = __kmp_indirect_lock_pool[tag];
3113     if (OMP_LOCK_T_SIZE < sizeof(void *))
3114       idx = lck->lock->pool.index;
3115     __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3116     KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",
3117                   lck));
3118   } else {
3119     idx = __kmp_i_lock_table.next;
3120     // Check capacity and double the size if it is full
3121     if (idx == __kmp_i_lock_table.size) {
3122       // Double up the space for block pointers
3123       int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK;
3124       kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate(
3125           2 * row * sizeof(kmp_indirect_lock_t *));
3126       KMP_MEMCPY(new_table, __kmp_i_lock_table.table,
3127                  row * sizeof(kmp_indirect_lock_t *));
3128       kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
3129       __kmp_i_lock_table.table = new_table;
3130       __kmp_free(old_table);
3131       // Allocate new objects in the new blocks
3132       for (int i = row; i < 2 * row; ++i)
3133         *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate(
3134             KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3135       __kmp_i_lock_table.size = 2 * idx;
3136     }
3137     __kmp_i_lock_table.next++;
3138     lck = KMP_GET_I_LOCK(idx);
3139     // Allocate a new base lock object
3140     lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3141     KA_TRACE(20,
3142              ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3143   }
3144 
3145   __kmp_release_lock(&__kmp_global_lock, gtid);
3146 
3147   lck->type = tag;
3148 
3149   if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3150     *((kmp_lock_index_t *)user_lock) = idx
3151                                        << 1; // indirect lock word must be even
3152   } else {
3153     *((kmp_indirect_lock_t **)user_lock) = lck;
3154   }
3155 
3156   return lck;
3157 }
3158 
3159 // User lock lookup for dynamically dispatched locks.
3160 static __forceinline kmp_indirect_lock_t *
3161 __kmp_lookup_indirect_lock(void **user_lock, const char *func) {
3162   if (__kmp_env_consistency_check) {
3163     kmp_indirect_lock_t *lck = NULL;
3164     if (user_lock == NULL) {
3165       KMP_FATAL(LockIsUninitialized, func);
3166     }
3167     if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3168       kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3169       if (idx >= __kmp_i_lock_table.size) {
3170         KMP_FATAL(LockIsUninitialized, func);
3171       }
3172       lck = KMP_GET_I_LOCK(idx);
3173     } else {
3174       lck = *((kmp_indirect_lock_t **)user_lock);
3175     }
3176     if (lck == NULL) {
3177       KMP_FATAL(LockIsUninitialized, func);
3178     }
3179     return lck;
3180   } else {
3181     if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3182       return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
3183     } else {
3184       return *((kmp_indirect_lock_t **)user_lock);
3185     }
3186   }
3187 }
3188 
3189 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,
3190                                      kmp_dyna_lockseq_t seq) {
3191 #if KMP_USE_ADAPTIVE_LOCKS
3192   if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3193     KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3194     seq = lockseq_queuing;
3195   }
3196 #endif
3197 #if KMP_USE_TSX
3198   if (seq == lockseq_rtm_queuing && !__kmp_cpuinfo.rtm) {
3199     seq = lockseq_queuing;
3200   }
3201 #endif
3202   kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3203   kmp_indirect_lock_t *l =
3204       __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3205   KMP_I_LOCK_FUNC(l, init)(l->lock);
3206   KA_TRACE(
3207       20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n",
3208            seq));
3209 }
3210 
3211 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {
3212   kmp_uint32 gtid = __kmp_entry_gtid();
3213   kmp_indirect_lock_t *l =
3214       __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3215   KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3216   kmp_indirect_locktag_t tag = l->type;
3217 
3218   __kmp_acquire_lock(&__kmp_global_lock, gtid);
3219 
3220   // Use the base lock's space to keep the pool chain.
3221   l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3222   if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3223     l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3224   }
3225   __kmp_indirect_lock_pool[tag] = l;
3226 
3227   __kmp_release_lock(&__kmp_global_lock, gtid);
3228 }
3229 
3230 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3231   kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3232   return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3233 }
3234 
3235 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3236   kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3237   return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3238 }
3239 
3240 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3241   kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3242   return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3243 }
3244 
3245 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3246                                                kmp_int32 gtid) {
3247   kmp_indirect_lock_t *l =
3248       __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3249   return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3250 }
3251 
3252 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3253                                                  kmp_int32 gtid) {
3254   kmp_indirect_lock_t *l =
3255       __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3256   return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3257 }
3258 
3259 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3260                                                 kmp_int32 gtid) {
3261   kmp_indirect_lock_t *l =
3262       __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3263   return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3264 }
3265 
3266 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3267 
3268 // This is used only in kmp_error.cpp when consistency checking is on.
3269 kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) {
3270   switch (seq) {
3271   case lockseq_tas:
3272   case lockseq_nested_tas:
3273     return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3274 #if KMP_USE_FUTEX
3275   case lockseq_futex:
3276   case lockseq_nested_futex:
3277     return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3278 #endif
3279   case lockseq_ticket:
3280   case lockseq_nested_ticket:
3281     return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3282   case lockseq_queuing:
3283   case lockseq_nested_queuing:
3284 #if KMP_USE_ADAPTIVE_LOCKS
3285   case lockseq_adaptive:
3286 #endif
3287     return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3288   case lockseq_drdpa:
3289   case lockseq_nested_drdpa:
3290     return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3291   default:
3292     return 0;
3293   }
3294 }
3295 
3296 // Initializes data for dynamic user locks.
3297 void __kmp_init_dynamic_user_locks() {
3298   // Initialize jump table for the lock functions
3299   if (__kmp_env_consistency_check) {
3300     __kmp_direct_set = direct_set_check;
3301     __kmp_direct_unset = direct_unset_check;
3302     __kmp_direct_test = direct_test_check;
3303     __kmp_direct_destroy = direct_destroy_check;
3304     __kmp_indirect_set = indirect_set_check;
3305     __kmp_indirect_unset = indirect_unset_check;
3306     __kmp_indirect_test = indirect_test_check;
3307     __kmp_indirect_destroy = indirect_destroy_check;
3308   } else {
3309     __kmp_direct_set = direct_set;
3310     __kmp_direct_unset = direct_unset;
3311     __kmp_direct_test = direct_test;
3312     __kmp_direct_destroy = direct_destroy;
3313     __kmp_indirect_set = indirect_set;
3314     __kmp_indirect_unset = indirect_unset;
3315     __kmp_indirect_test = indirect_test;
3316     __kmp_indirect_destroy = indirect_destroy;
3317   }
3318   // If the user locks have already been initialized, then return. Allow the
3319   // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate
3320   // new lock tables if they have already been allocated.
3321   if (__kmp_init_user_locks)
3322     return;
3323 
3324   // Initialize lock index table
3325   __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
3326   __kmp_i_lock_table.table =
3327       (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
3328   *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate(
3329       KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3330   __kmp_i_lock_table.next = 0;
3331 
3332   // Indirect lock size
3333   __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3334   __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3335 #if KMP_USE_ADAPTIVE_LOCKS
3336   __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3337 #endif
3338   __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3339 #if KMP_USE_TSX
3340   __kmp_indirect_lock_size[locktag_rtm_queuing] = sizeof(kmp_queuing_lock_t);
3341 #endif
3342   __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3343 #if KMP_USE_FUTEX
3344   __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3345 #endif
3346   __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3347   __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3348   __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
3349 
3350 // Initialize lock accessor/modifier
3351 #define fill_jumps(table, expand, sep)                                         \
3352   {                                                                            \
3353     table[locktag##sep##ticket] = expand(ticket);                              \
3354     table[locktag##sep##queuing] = expand(queuing);                            \
3355     table[locktag##sep##drdpa] = expand(drdpa);                                \
3356   }
3357 
3358 #if KMP_USE_ADAPTIVE_LOCKS
3359 #define fill_table(table, expand)                                              \
3360   {                                                                            \
3361     fill_jumps(table, expand, _);                                              \
3362     table[locktag_adaptive] = expand(queuing);                                 \
3363     fill_jumps(table, expand, _nested_);                                       \
3364   }
3365 #else
3366 #define fill_table(table, expand)                                              \
3367   {                                                                            \
3368     fill_jumps(table, expand, _);                                              \
3369     fill_jumps(table, expand, _nested_);                                       \
3370   }
3371 #endif // KMP_USE_ADAPTIVE_LOCKS
3372 
3373 #define expand(l)                                                              \
3374   (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location
3375   fill_table(__kmp_indirect_set_location, expand);
3376 #undef expand
3377 #define expand(l)                                                              \
3378   (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags
3379   fill_table(__kmp_indirect_set_flags, expand);
3380 #undef expand
3381 #define expand(l)                                                              \
3382   (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location
3383   fill_table(__kmp_indirect_get_location, expand);
3384 #undef expand
3385 #define expand(l)                                                              \
3386   (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags
3387   fill_table(__kmp_indirect_get_flags, expand);
3388 #undef expand
3389 
3390   __kmp_init_user_locks = TRUE;
3391 }
3392 
3393 // Clean up the lock table.
3394 void __kmp_cleanup_indirect_user_locks() {
3395   kmp_lock_index_t i;
3396   int k;
3397 
3398   // Clean up locks in the pools first (they were already destroyed before going
3399   // into the pools).
3400   for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3401     kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3402     while (l != NULL) {
3403       kmp_indirect_lock_t *ll = l;
3404       l = (kmp_indirect_lock_t *)l->lock->pool.next;
3405       KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n",
3406                     ll));
3407       __kmp_free(ll->lock);
3408       ll->lock = NULL;
3409     }
3410     __kmp_indirect_lock_pool[k] = NULL;
3411   }
3412   // Clean up the remaining undestroyed locks.
3413   for (i = 0; i < __kmp_i_lock_table.next; i++) {
3414     kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
3415     if (l->lock != NULL) {
3416       // Locks not destroyed explicitly need to be destroyed here.
3417       KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3418       KA_TRACE(
3419           20,
3420           ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n",
3421            l));
3422       __kmp_free(l->lock);
3423     }
3424   }
3425   // Free the table
3426   for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
3427     __kmp_free(__kmp_i_lock_table.table[i]);
3428   __kmp_free(__kmp_i_lock_table.table);
3429 
3430   __kmp_init_user_locks = FALSE;
3431 }
3432 
3433 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3434 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3435 
3436 #else // KMP_USE_DYNAMIC_LOCK
3437 
3438 static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3439   __kmp_init_tas_lock(lck);
3440 }
3441 
3442 static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3443   __kmp_init_nested_tas_lock(lck);
3444 }
3445 
3446 #if KMP_USE_FUTEX
3447 static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3448   __kmp_init_futex_lock(lck);
3449 }
3450 
3451 static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3452   __kmp_init_nested_futex_lock(lck);
3453 }
3454 #endif
3455 
3456 static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) {
3457   return lck == lck->lk.self;
3458 }
3459 
3460 static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3461   __kmp_init_ticket_lock(lck);
3462 }
3463 
3464 static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3465   __kmp_init_nested_ticket_lock(lck);
3466 }
3467 
3468 static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) {
3469   return lck == lck->lk.initialized;
3470 }
3471 
3472 static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3473   __kmp_init_queuing_lock(lck);
3474 }
3475 
3476 static void
3477 __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3478   __kmp_init_nested_queuing_lock(lck);
3479 }
3480 
3481 #if KMP_USE_ADAPTIVE_LOCKS
3482 static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
3483   __kmp_init_adaptive_lock(lck);
3484 }
3485 #endif
3486 
3487 static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) {
3488   return lck == lck->lk.initialized;
3489 }
3490 
3491 static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3492   __kmp_init_drdpa_lock(lck);
3493 }
3494 
3495 static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3496   __kmp_init_nested_drdpa_lock(lck);
3497 }
3498 
3499 /* user locks
3500  * They are implemented as a table of function pointers which are set to the
3501  * lock functions of the appropriate kind, once that has been determined. */
3502 
3503 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3504 
3505 size_t __kmp_base_user_lock_size = 0;
3506 size_t __kmp_user_lock_size = 0;
3507 
3508 kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL;
3509 int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
3510                                             kmp_int32 gtid) = NULL;
3511 
3512 int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,
3513                                          kmp_int32 gtid) = NULL;
3514 int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,
3515                                             kmp_int32 gtid) = NULL;
3516 void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3517 void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL;
3518 void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3519 int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3520                                                    kmp_int32 gtid) = NULL;
3521 
3522 int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3523                                                 kmp_int32 gtid) = NULL;
3524 int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3525                                                    kmp_int32 gtid) = NULL;
3526 void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3527 void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3528 
3529 int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL;
3530 const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL;
3531 void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,
3532                                       const ident_t *loc) = NULL;
3533 kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL;
3534 void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,
3535                                    kmp_lock_flags_t flags) = NULL;
3536 
3537 void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) {
3538   switch (user_lock_kind) {
3539   case lk_default:
3540   default:
3541     KMP_ASSERT(0);
3542 
3543   case lk_tas: {
3544     __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t);
3545     __kmp_user_lock_size = sizeof(kmp_tas_lock_t);
3546 
3547     __kmp_get_user_lock_owner_ =
3548         (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner);
3549 
3550     if (__kmp_env_consistency_check) {
3551       KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3552       KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3553     } else {
3554       KMP_BIND_USER_LOCK(tas);
3555       KMP_BIND_NESTED_USER_LOCK(tas);
3556     }
3557 
3558     __kmp_destroy_user_lock_ =
3559         (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock);
3560 
3561     __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3562 
3563     __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3564 
3565     __kmp_set_user_lock_location_ =
3566         (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3567 
3568     __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3569 
3570     __kmp_set_user_lock_flags_ =
3571         (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3572   } break;
3573 
3574 #if KMP_USE_FUTEX
3575 
3576   case lk_futex: {
3577     __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t);
3578     __kmp_user_lock_size = sizeof(kmp_futex_lock_t);
3579 
3580     __kmp_get_user_lock_owner_ =
3581         (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner);
3582 
3583     if (__kmp_env_consistency_check) {
3584       KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3585       KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3586     } else {
3587       KMP_BIND_USER_LOCK(futex);
3588       KMP_BIND_NESTED_USER_LOCK(futex);
3589     }
3590 
3591     __kmp_destroy_user_lock_ =
3592         (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock);
3593 
3594     __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3595 
3596     __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3597 
3598     __kmp_set_user_lock_location_ =
3599         (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3600 
3601     __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3602 
3603     __kmp_set_user_lock_flags_ =
3604         (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3605   } break;
3606 
3607 #endif // KMP_USE_FUTEX
3608 
3609   case lk_ticket: {
3610     __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t);
3611     __kmp_user_lock_size = sizeof(kmp_ticket_lock_t);
3612 
3613     __kmp_get_user_lock_owner_ =
3614         (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner);
3615 
3616     if (__kmp_env_consistency_check) {
3617       KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3618       KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3619     } else {
3620       KMP_BIND_USER_LOCK(ticket);
3621       KMP_BIND_NESTED_USER_LOCK(ticket);
3622     }
3623 
3624     __kmp_destroy_user_lock_ =
3625         (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock);
3626 
3627     __kmp_is_user_lock_initialized_ =
3628         (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized);
3629 
3630     __kmp_get_user_lock_location_ =
3631         (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location);
3632 
3633     __kmp_set_user_lock_location_ = (void (*)(
3634         kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location);
3635 
3636     __kmp_get_user_lock_flags_ =
3637         (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags);
3638 
3639     __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3640         &__kmp_set_ticket_lock_flags);
3641   } break;
3642 
3643   case lk_queuing: {
3644     __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t);
3645     __kmp_user_lock_size = sizeof(kmp_queuing_lock_t);
3646 
3647     __kmp_get_user_lock_owner_ =
3648         (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3649 
3650     if (__kmp_env_consistency_check) {
3651       KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3652       KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3653     } else {
3654       KMP_BIND_USER_LOCK(queuing);
3655       KMP_BIND_NESTED_USER_LOCK(queuing);
3656     }
3657 
3658     __kmp_destroy_user_lock_ =
3659         (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock);
3660 
3661     __kmp_is_user_lock_initialized_ =
3662         (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3663 
3664     __kmp_get_user_lock_location_ =
3665         (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3666 
3667     __kmp_set_user_lock_location_ = (void (*)(
3668         kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3669 
3670     __kmp_get_user_lock_flags_ =
3671         (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3672 
3673     __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3674         &__kmp_set_queuing_lock_flags);
3675   } break;
3676 
3677 #if KMP_USE_ADAPTIVE_LOCKS
3678   case lk_adaptive: {
3679     __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t);
3680     __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t);
3681 
3682     __kmp_get_user_lock_owner_ =
3683         (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3684 
3685     if (__kmp_env_consistency_check) {
3686       KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3687     } else {
3688       KMP_BIND_USER_LOCK(adaptive);
3689     }
3690 
3691     __kmp_destroy_user_lock_ =
3692         (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock);
3693 
3694     __kmp_is_user_lock_initialized_ =
3695         (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3696 
3697     __kmp_get_user_lock_location_ =
3698         (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3699 
3700     __kmp_set_user_lock_location_ = (void (*)(
3701         kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3702 
3703     __kmp_get_user_lock_flags_ =
3704         (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3705 
3706     __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3707         &__kmp_set_queuing_lock_flags);
3708 
3709   } break;
3710 #endif // KMP_USE_ADAPTIVE_LOCKS
3711 
3712   case lk_drdpa: {
3713     __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t);
3714     __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t);
3715 
3716     __kmp_get_user_lock_owner_ =
3717         (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner);
3718 
3719     if (__kmp_env_consistency_check) {
3720       KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3721       KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3722     } else {
3723       KMP_BIND_USER_LOCK(drdpa);
3724       KMP_BIND_NESTED_USER_LOCK(drdpa);
3725     }
3726 
3727     __kmp_destroy_user_lock_ =
3728         (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock);
3729 
3730     __kmp_is_user_lock_initialized_ =
3731         (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized);
3732 
3733     __kmp_get_user_lock_location_ =
3734         (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location);
3735 
3736     __kmp_set_user_lock_location_ = (void (*)(
3737         kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location);
3738 
3739     __kmp_get_user_lock_flags_ =
3740         (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags);
3741 
3742     __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3743         &__kmp_set_drdpa_lock_flags);
3744   } break;
3745   }
3746 }
3747 
3748 // ----------------------------------------------------------------------------
3749 // User lock table & lock allocation
3750 
3751 kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL};
3752 kmp_user_lock_p __kmp_lock_pool = NULL;
3753 
3754 // Lock block-allocation support.
3755 kmp_block_of_locks *__kmp_lock_blocks = NULL;
3756 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3757 
3758 static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) {
3759   // Assume that kmp_global_lock is held upon entry/exit.
3760   kmp_lock_index_t index;
3761   if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) {
3762     kmp_lock_index_t size;
3763     kmp_user_lock_p *table;
3764     // Reallocate lock table.
3765     if (__kmp_user_lock_table.allocated == 0) {
3766       size = 1024;
3767     } else {
3768       size = __kmp_user_lock_table.allocated * 2;
3769     }
3770     table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size);
3771     KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1,
3772                sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1));
3773     table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3774     // We cannot free the previous table now, since it may be in use by other
3775     // threads. So save the pointer to the previous table in in the first
3776     // element of the new table. All the tables will be organized into a list,
3777     // and could be freed when library shutting down.
3778     __kmp_user_lock_table.table = table;
3779     __kmp_user_lock_table.allocated = size;
3780   }
3781   KMP_DEBUG_ASSERT(__kmp_user_lock_table.used <
3782                    __kmp_user_lock_table.allocated);
3783   index = __kmp_user_lock_table.used;
3784   __kmp_user_lock_table.table[index] = lck;
3785   ++__kmp_user_lock_table.used;
3786   return index;
3787 }
3788 
3789 static kmp_user_lock_p __kmp_lock_block_allocate() {
3790   // Assume that kmp_global_lock is held upon entry/exit.
3791   static int last_index = 0;
3792   if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) {
3793     // Restart the index.
3794     last_index = 0;
3795     // Need to allocate a new block.
3796     KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3797     size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3798     char *buffer =
3799         (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks));
3800     // Set up the new block.
3801     kmp_block_of_locks *new_block =
3802         (kmp_block_of_locks *)(&buffer[space_for_locks]);
3803     new_block->next_block = __kmp_lock_blocks;
3804     new_block->locks = (void *)buffer;
3805     // Publish the new block.
3806     KMP_MB();
3807     __kmp_lock_blocks = new_block;
3808   }
3809   kmp_user_lock_p ret = (kmp_user_lock_p)(&(
3810       ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size]));
3811   last_index++;
3812   return ret;
3813 }
3814 
3815 // Get memory for a lock. It may be freshly allocated memory or reused memory
3816 // from lock pool.
3817 kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid,
3818                                          kmp_lock_flags_t flags) {
3819   kmp_user_lock_p lck;
3820   kmp_lock_index_t index;
3821   KMP_DEBUG_ASSERT(user_lock);
3822 
3823   __kmp_acquire_lock(&__kmp_global_lock, gtid);
3824 
3825   if (__kmp_lock_pool == NULL) {
3826     // Lock pool is empty. Allocate new memory.
3827 
3828     if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point.
3829       lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size);
3830     } else {
3831       lck = __kmp_lock_block_allocate();
3832     }
3833 
3834     // Insert lock in the table so that it can be freed in __kmp_cleanup,
3835     // and debugger has info on all allocated locks.
3836     index = __kmp_lock_table_insert(lck);
3837   } else {
3838     // Pick up lock from pool.
3839     lck = __kmp_lock_pool;
3840     index = __kmp_lock_pool->pool.index;
3841     __kmp_lock_pool = __kmp_lock_pool->pool.next;
3842   }
3843 
3844   // We could potentially differentiate between nested and regular locks
3845   // here, and do the lock table lookup for regular locks only.
3846   if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3847     *((kmp_lock_index_t *)user_lock) = index;
3848   } else {
3849     *((kmp_user_lock_p *)user_lock) = lck;
3850   }
3851 
3852   // mark the lock if it is critical section lock.
3853   __kmp_set_user_lock_flags(lck, flags);
3854 
3855   __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper
3856 
3857   return lck;
3858 }
3859 
3860 // Put lock's memory to pool for reusing.
3861 void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
3862                           kmp_user_lock_p lck) {
3863   KMP_DEBUG_ASSERT(user_lock != NULL);
3864   KMP_DEBUG_ASSERT(lck != NULL);
3865 
3866   __kmp_acquire_lock(&__kmp_global_lock, gtid);
3867 
3868   lck->pool.next = __kmp_lock_pool;
3869   __kmp_lock_pool = lck;
3870   if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3871     kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3872     KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used);
3873     lck->pool.index = index;
3874   }
3875 
3876   __kmp_release_lock(&__kmp_global_lock, gtid);
3877 }
3878 
3879 kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) {
3880   kmp_user_lock_p lck = NULL;
3881 
3882   if (__kmp_env_consistency_check) {
3883     if (user_lock == NULL) {
3884       KMP_FATAL(LockIsUninitialized, func);
3885     }
3886   }
3887 
3888   if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3889     kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3890     if (__kmp_env_consistency_check) {
3891       if (!(0 < index && index < __kmp_user_lock_table.used)) {
3892         KMP_FATAL(LockIsUninitialized, func);
3893       }
3894     }
3895     KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used);
3896     KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3897     lck = __kmp_user_lock_table.table[index];
3898   } else {
3899     lck = *((kmp_user_lock_p *)user_lock);
3900   }
3901 
3902   if (__kmp_env_consistency_check) {
3903     if (lck == NULL) {
3904       KMP_FATAL(LockIsUninitialized, func);
3905     }
3906   }
3907 
3908   return lck;
3909 }
3910 
3911 void __kmp_cleanup_user_locks(void) {
3912   // Reset lock pool. Don't worry about lock in the pool--we will free them when
3913   // iterating through lock table (it includes all the locks, dead or alive).
3914   __kmp_lock_pool = NULL;
3915 
3916 #define IS_CRITICAL(lck)                                                       \
3917   ((__kmp_get_user_lock_flags_ != NULL) &&                                     \
3918    ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section))
3919 
3920   // Loop through lock table, free all locks.
3921   // Do not free item [0], it is reserved for lock tables list.
3922   //
3923   // FIXME - we are iterating through a list of (pointers to) objects of type
3924   // union kmp_user_lock, but we have no way of knowing whether the base type is
3925   // currently "pool" or whatever the global user lock type is.
3926   //
3927   // We are relying on the fact that for all of the user lock types
3928   // (except "tas"), the first field in the lock struct is the "initialized"
3929   // field, which is set to the address of the lock object itself when
3930   // the lock is initialized.  When the union is of type "pool", the
3931   // first field is a pointer to the next object in the free list, which
3932   // will not be the same address as the object itself.
3933   //
3934   // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail
3935   // for "pool" objects on the free list.  This must happen as the "location"
3936   // field of real user locks overlaps the "index" field of "pool" objects.
3937   //
3938   // It would be better to run through the free list, and remove all "pool"
3939   // objects from the lock table before executing this loop.  However,
3940   // "pool" objects do not always have their index field set (only on
3941   // lin_32e), and I don't want to search the lock table for the address
3942   // of every "pool" object on the free list.
3943   while (__kmp_user_lock_table.used > 1) {
3944     const ident *loc;
3945 
3946     // reduce __kmp_user_lock_table.used before freeing the lock,
3947     // so that state of locks is consistent
3948     kmp_user_lock_p lck =
3949         __kmp_user_lock_table.table[--__kmp_user_lock_table.used];
3950 
3951     if ((__kmp_is_user_lock_initialized_ != NULL) &&
3952         (*__kmp_is_user_lock_initialized_)(lck)) {
3953       // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND
3954       // it is NOT a critical section (user is not responsible for destroying
3955       // criticals) AND we know source location to report.
3956       if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) &&
3957           ((loc = __kmp_get_user_lock_location(lck)) != NULL) &&
3958           (loc->psource != NULL)) {
3959         kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
3960         KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line);
3961         __kmp_str_loc_free(&str_loc);
3962       }
3963 
3964 #ifdef KMP_DEBUG
3965       if (IS_CRITICAL(lck)) {
3966         KA_TRACE(
3967             20,
3968             ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n",
3969              lck, *(void **)lck));
3970       } else {
3971         KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck,
3972                       *(void **)lck));
3973       }
3974 #endif // KMP_DEBUG
3975 
3976       // Cleanup internal lock dynamic resources (for drdpa locks particularly).
3977       __kmp_destroy_user_lock(lck);
3978     }
3979 
3980     // Free the lock if block allocation of locks is not used.
3981     if (__kmp_lock_blocks == NULL) {
3982       __kmp_free(lck);
3983     }
3984   }
3985 
3986 #undef IS_CRITICAL
3987 
3988   // delete lock table(s).
3989   kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
3990   __kmp_user_lock_table.table = NULL;
3991   __kmp_user_lock_table.allocated = 0;
3992 
3993   while (table_ptr != NULL) {
3994     // In the first element we saved the pointer to the previous
3995     // (smaller) lock table.
3996     kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]);
3997     __kmp_free(table_ptr);
3998     table_ptr = next;
3999   }
4000 
4001   // Free buffers allocated for blocks of locks.
4002   kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4003   __kmp_lock_blocks = NULL;
4004 
4005   while (block_ptr != NULL) {
4006     kmp_block_of_locks_t *next = block_ptr->next_block;
4007     __kmp_free(block_ptr->locks);
4008     // *block_ptr itself was allocated at the end of the locks vector.
4009     block_ptr = next;
4010   }
4011 
4012   TCW_4(__kmp_init_user_locks, FALSE);
4013 }
4014 
4015 #endif // KMP_USE_DYNAMIC_LOCK
4016