xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_wait_release.h (revision f976241773df2260e6170317080761d1c5814fe5)
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_WAIT_RELEASE_H
14 #define KMP_WAIT_RELEASE_H
15 
16 #include "kmp.h"
17 #include "kmp_itt.h"
18 #include "kmp_stats.h"
19 #if OMPT_SUPPORT
20 #include "ompt-specific.h"
21 #endif
22 
23 /*!
24 @defgroup WAIT_RELEASE Wait/Release operations
25 
26 The definitions and functions here implement the lowest level thread
27 synchronizations of suspending a thread and awaking it. They are used to build
28 higher level operations such as barriers and fork/join.
29 */
30 
31 /*!
32 @ingroup WAIT_RELEASE
33 @{
34 */
35 
36 /*!
37  * The flag_type describes the storage used for the flag.
38  */
39 enum flag_type {
40   flag32, /**< 32 bit flags */
41   flag64, /**< 64 bit flags */
42   flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */
43 };
44 
45 /*!
46  * Base class for wait/release volatile flag
47  */
48 template <typename P> class kmp_flag_native {
49   volatile P *loc;
50   flag_type t;
51 
52 public:
53   typedef P flag_t;
54   kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {}
55   volatile P *get() { return loc; }
56   void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
57   void set(volatile P *new_loc) { loc = new_loc; }
58   flag_type get_type() { return t; }
59   P load() { return *loc; }
60   void store(P val) { *loc = val; }
61 };
62 
63 /*!
64  * Base class for wait/release atomic flag
65  */
66 template <typename P> class kmp_flag {
67   std::atomic<P>
68       *loc; /**< Pointer to the flag storage that is modified by another thread
69              */
70   flag_type t; /**< "Type" of the flag in loc */
71 public:
72   typedef P flag_t;
73   kmp_flag(std::atomic<P> *p, flag_type ft) : loc(p), t(ft) {}
74   /*!
75    * @result the pointer to the actual flag
76    */
77   std::atomic<P> *get() { return loc; }
78   /*!
79    * @result void* pointer to the actual flag
80    */
81   void *get_void_p() { return RCAST(void *, loc); }
82   /*!
83    * @param new_loc in   set loc to point at new_loc
84    */
85   void set(std::atomic<P> *new_loc) { loc = new_loc; }
86   /*!
87    * @result the flag_type
88    */
89   flag_type get_type() { return t; }
90   /*!
91    * @result flag value
92    */
93   P load() { return loc->load(std::memory_order_acquire); }
94   /*!
95    * @param val the new flag value to be stored
96    */
97   void store(P val) { loc->store(val, std::memory_order_release); }
98   // Derived classes must provide the following:
99   /*
100   kmp_info_t * get_waiter(kmp_uint32 i);
101   kmp_uint32 get_num_waiters();
102   bool done_check();
103   bool done_check_val(P old_loc);
104   bool notdone_check();
105   P internal_release();
106   void suspend(int th_gtid);
107   void resume(int th_gtid);
108   P set_sleeping();
109   P unset_sleeping();
110   bool is_sleeping();
111   bool is_any_sleeping();
112   bool is_sleeping_val(P old_loc);
113   int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
114                     int *thread_finished
115                     USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
116                     is_constrained);
117   */
118 };
119 
120 #if OMPT_SUPPORT
121 OMPT_NOINLINE
122 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
123                                      ompt_state_t ompt_state,
124                                      ompt_data_t *tId) {
125   int ds_tid = this_thr->th.th_info.ds.ds_tid;
126   if (ompt_state == ompt_state_wait_barrier_implicit) {
127     this_thr->th.ompt_thread_info.state = ompt_state_overhead;
128 #if OMPT_OPTIONAL
129     void *codeptr = NULL;
130     if (ompt_enabled.ompt_callback_sync_region_wait) {
131       ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
132           ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
133           codeptr);
134     }
135     if (ompt_enabled.ompt_callback_sync_region) {
136       ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
137           ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
138           codeptr);
139     }
140 #endif
141     if (!KMP_MASTER_TID(ds_tid)) {
142       if (ompt_enabled.ompt_callback_implicit_task) {
143         ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
144             ompt_scope_end, NULL, tId, 0, ds_tid, ompt_task_implicit);
145       }
146       // return to idle state
147       this_thr->th.ompt_thread_info.state = ompt_state_idle;
148     } else {
149       this_thr->th.ompt_thread_info.state = ompt_state_overhead;
150     }
151   }
152 }
153 #endif
154 
155 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls
156    __kmp_wait_*  must make certain that another thread calls __kmp_release
157    to wake it back up to prevent deadlocks!
158 
159    NOTE: We may not belong to a team at this point.  */
160 template <class C, int final_spin, bool cancellable = false,
161           bool sleepable = true>
162 static inline bool
163 __kmp_wait_template(kmp_info_t *this_thr,
164                     C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
165 #if USE_ITT_BUILD && USE_ITT_NOTIFY
166   volatile void *spin = flag->get();
167 #endif
168   kmp_uint32 spins;
169   int th_gtid;
170   int tasks_completed = FALSE;
171   int oversubscribed;
172 #if !KMP_USE_MONITOR
173   kmp_uint64 poll_count;
174   kmp_uint64 hibernate_goal;
175 #else
176   kmp_uint32 hibernate;
177 #endif
178 
179   KMP_FSYNC_SPIN_INIT(spin, NULL);
180   if (flag->done_check()) {
181     KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
182     return false;
183   }
184   th_gtid = this_thr->th.th_info.ds.ds_gtid;
185   if (cancellable) {
186     kmp_team_t *team = this_thr->th.th_team;
187     if (team && team->t.t_cancel_request == cancel_parallel)
188       return true;
189   }
190 #if KMP_OS_UNIX
191   if (final_spin)
192     KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
193 #endif
194   KA_TRACE(20,
195            ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
196 #if KMP_STATS_ENABLED
197   stats_state_e thread_state = KMP_GET_THREAD_STATE();
198 #endif
199 
200 /* OMPT Behavior:
201 THIS function is called from
202   __kmp_barrier (2 times)  (implicit or explicit barrier in parallel regions)
203             these have join / fork behavior
204 
205        In these cases, we don't change the state or trigger events in THIS
206 function.
207        Events are triggered in the calling code (__kmp_barrier):
208 
209                 state := ompt_state_overhead
210             barrier-begin
211             barrier-wait-begin
212                 state := ompt_state_wait_barrier
213           call join-barrier-implementation (finally arrive here)
214           {}
215           call fork-barrier-implementation (finally arrive here)
216           {}
217                 state := ompt_state_overhead
218             barrier-wait-end
219             barrier-end
220                 state := ompt_state_work_parallel
221 
222 
223   __kmp_fork_barrier  (after thread creation, before executing implicit task)
224           call fork-barrier-implementation (finally arrive here)
225           {} // worker arrive here with state = ompt_state_idle
226 
227 
228   __kmp_join_barrier  (implicit barrier at end of parallel region)
229                 state := ompt_state_barrier_implicit
230             barrier-begin
231             barrier-wait-begin
232           call join-barrier-implementation (finally arrive here
233 final_spin=FALSE)
234           {
235           }
236   __kmp_fork_barrier  (implicit barrier at end of parallel region)
237           call fork-barrier-implementation (finally arrive here final_spin=TRUE)
238 
239        Worker after task-team is finished:
240             barrier-wait-end
241             barrier-end
242             implicit-task-end
243             idle-begin
244                 state := ompt_state_idle
245 
246        Before leaving, if state = ompt_state_idle
247             idle-end
248                 state := ompt_state_overhead
249 */
250 #if OMPT_SUPPORT
251   ompt_state_t ompt_entry_state;
252   ompt_data_t *tId;
253   if (ompt_enabled.enabled) {
254     ompt_entry_state = this_thr->th.ompt_thread_info.state;
255     if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
256         KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
257       ompt_lw_taskteam_t *team =
258           this_thr->th.th_team->t.ompt_serialized_team_info;
259       if (team) {
260         tId = &(team->ompt_task_info.task_data);
261       } else {
262         tId = OMPT_CUR_TASK_DATA(this_thr);
263       }
264     } else {
265       tId = &(this_thr->th.ompt_thread_info.task_data);
266     }
267     if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
268                        this_thr->th.th_task_team == NULL)) {
269       // implicit task is done. Either no taskqueue, or task-team finished
270       __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
271     }
272   }
273 #endif
274 
275   KMP_INIT_YIELD(spins); // Setup for waiting
276 
277   if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
278       __kmp_pause_status == kmp_soft_paused) {
279 #if KMP_USE_MONITOR
280 // The worker threads cannot rely on the team struct existing at this point.
281 // Use the bt values cached in the thread struct instead.
282 #ifdef KMP_ADJUST_BLOCKTIME
283     if (__kmp_pause_status == kmp_soft_paused ||
284         (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
285       // Force immediate suspend if not set by user and more threads than
286       // available procs
287       hibernate = 0;
288     else
289       hibernate = this_thr->th.th_team_bt_intervals;
290 #else
291     hibernate = this_thr->th.th_team_bt_intervals;
292 #endif /* KMP_ADJUST_BLOCKTIME */
293 
294     /* If the blocktime is nonzero, we want to make sure that we spin wait for
295        the entirety of the specified #intervals, plus up to one interval more.
296        This increment make certain that this thread doesn't go to sleep too
297        soon.  */
298     if (hibernate != 0)
299       hibernate++;
300 
301     // Add in the current time value.
302     hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
303     KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
304                   th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
305                   hibernate - __kmp_global.g.g_time.dt.t_value));
306 #else
307     if (__kmp_pause_status == kmp_soft_paused) {
308       // Force immediate suspend
309       hibernate_goal = KMP_NOW();
310     } else
311       hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
312     poll_count = 0;
313 #endif // KMP_USE_MONITOR
314   }
315 
316   oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
317   KMP_MB();
318 
319   // Main wait spin loop
320   while (flag->notdone_check()) {
321     kmp_task_team_t *task_team = NULL;
322     if (__kmp_tasking_mode != tskm_immediate_exec) {
323       task_team = this_thr->th.th_task_team;
324       /* If the thread's task team pointer is NULL, it means one of 3 things:
325          1) A newly-created thread is first being released by
326          __kmp_fork_barrier(), and its task team has not been set up yet.
327          2) All tasks have been executed to completion.
328          3) Tasking is off for this region.  This could be because we are in a
329          serialized region (perhaps the outer one), or else tasking was manually
330          disabled (KMP_TASKING=0).  */
331       if (task_team != NULL) {
332         if (TCR_SYNC_4(task_team->tt.tt_active)) {
333           if (KMP_TASKING_ENABLED(task_team))
334             flag->execute_tasks(
335                 this_thr, th_gtid, final_spin,
336                 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
337           else
338             this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
339         } else {
340           KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
341 #if OMPT_SUPPORT
342           // task-team is done now, other cases should be catched above
343           if (final_spin && ompt_enabled.enabled)
344             __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
345 #endif
346           this_thr->th.th_task_team = NULL;
347           this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
348         }
349       } else {
350         this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
351       } // if
352     } // if
353 
354     KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
355     if (TCR_4(__kmp_global.g.g_done)) {
356       if (__kmp_global.g.g_abort)
357         __kmp_abort_thread();
358       break;
359     }
360 
361     // If we are oversubscribed, or have waited a bit (and
362     // KMP_LIBRARY=throughput), then yield
363     KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
364 
365 #if KMP_STATS_ENABLED
366     // Check if thread has been signalled to idle state
367     // This indicates that the logical "join-barrier" has finished
368     if (this_thr->th.th_stats->isIdle() &&
369         KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
370       KMP_SET_THREAD_STATE(IDLE);
371       KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
372     }
373 #endif
374     // Check if the barrier surrounding this wait loop has been cancelled
375     if (cancellable) {
376       kmp_team_t *team = this_thr->th.th_team;
377       if (team && team->t.t_cancel_request == cancel_parallel)
378         break;
379     }
380 
381     // Don't suspend if KMP_BLOCKTIME is set to "infinite"
382     if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
383         __kmp_pause_status != kmp_soft_paused)
384       continue;
385 
386     // Don't suspend if there is a likelihood of new tasks being spawned.
387     if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
388       continue;
389 
390 #if KMP_USE_MONITOR
391     // If we have waited a bit more, fall asleep
392     if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
393       continue;
394 #else
395     if (KMP_BLOCKING(hibernate_goal, poll_count++))
396       continue;
397 #endif
398     // Don't suspend if wait loop designated non-sleepable
399     // in template parameters
400     if (!sleepable)
401       continue;
402 
403     if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
404         __kmp_pause_status != kmp_soft_paused)
405       continue;
406 
407     KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
408 
409 #if KMP_OS_UNIX
410     if (final_spin)
411       KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
412 #endif
413     flag->suspend(th_gtid);
414 #if KMP_OS_UNIX
415     if (final_spin)
416       KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
417 #endif
418 
419     if (TCR_4(__kmp_global.g.g_done)) {
420       if (__kmp_global.g.g_abort)
421         __kmp_abort_thread();
422       break;
423     } else if (__kmp_tasking_mode != tskm_immediate_exec &&
424                this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
425       this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
426     }
427     // TODO: If thread is done with work and times out, disband/free
428   }
429 
430 #if OMPT_SUPPORT
431   ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
432   if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
433 #if OMPT_OPTIONAL
434     if (final_spin) {
435       __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
436       ompt_exit_state = this_thr->th.ompt_thread_info.state;
437     }
438 #endif
439     if (ompt_exit_state == ompt_state_idle) {
440       this_thr->th.ompt_thread_info.state = ompt_state_overhead;
441     }
442   }
443 #endif
444 #if KMP_STATS_ENABLED
445   // If we were put into idle state, pop that off the state stack
446   if (KMP_GET_THREAD_STATE() == IDLE) {
447     KMP_POP_PARTITIONED_TIMER();
448     KMP_SET_THREAD_STATE(thread_state);
449     this_thr->th.th_stats->resetIdleFlag();
450   }
451 #endif
452 
453 #if KMP_OS_UNIX
454   if (final_spin)
455     KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
456 #endif
457   KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
458   if (cancellable) {
459     kmp_team_t *team = this_thr->th.th_team;
460     if (team && team->t.t_cancel_request == cancel_parallel) {
461       if (tasks_completed) {
462         // undo the previous decrement of unfinished_threads so that the
463         // thread can decrement at the join barrier with no problem
464         kmp_task_team_t *task_team = this_thr->th.th_task_team;
465         std::atomic<kmp_int32> *unfinished_threads =
466             &(task_team->tt.tt_unfinished_threads);
467         KMP_ATOMIC_INC(unfinished_threads);
468       }
469       return true;
470     }
471   }
472   return false;
473 }
474 
475 /* Release any threads specified as waiting on the flag by releasing the flag
476    and resume the waiting thread if indicated by the sleep bit(s). A thread that
477    calls __kmp_wait_template must call this function to wake up the potentially
478    sleeping thread and prevent deadlocks!  */
479 template <class C> static inline void __kmp_release_template(C *flag) {
480 #ifdef KMP_DEBUG
481   int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
482 #endif
483   KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
484   KMP_DEBUG_ASSERT(flag->get());
485   KMP_FSYNC_RELEASING(flag->get_void_p());
486 
487   flag->internal_release();
488 
489   KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
490                  flag->load()));
491 
492   if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
493     // Only need to check sleep stuff if infinite block time not set.
494     // Are *any* threads waiting on flag sleeping?
495     if (flag->is_any_sleeping()) {
496       for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
497         // if sleeping waiter exists at i, sets current_waiter to i inside flag
498         kmp_info_t *waiter = flag->get_waiter(i);
499         if (waiter) {
500           int wait_gtid = waiter->th.th_info.ds.ds_gtid;
501           // Wake up thread if needed
502           KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
503                         "flag(%p) set\n",
504                         gtid, wait_gtid, flag->get()));
505           flag->resume(wait_gtid); // unsets flag's current_waiter when done
506         }
507       }
508     }
509   }
510 }
511 
512 template <typename FlagType> struct flag_traits {};
513 
514 template <> struct flag_traits<kmp_uint32> {
515   typedef kmp_uint32 flag_t;
516   static const flag_type t = flag32;
517   static inline flag_t tcr(flag_t f) { return TCR_4(f); }
518   static inline flag_t test_then_add4(volatile flag_t *f) {
519     return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
520   }
521   static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
522     return KMP_TEST_THEN_OR32(f, v);
523   }
524   static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
525     return KMP_TEST_THEN_AND32(f, v);
526   }
527 };
528 
529 template <> struct flag_traits<kmp_uint64> {
530   typedef kmp_uint64 flag_t;
531   static const flag_type t = flag64;
532   static inline flag_t tcr(flag_t f) { return TCR_8(f); }
533   static inline flag_t test_then_add4(volatile flag_t *f) {
534     return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
535   }
536   static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
537     return KMP_TEST_THEN_OR64(f, v);
538   }
539   static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
540     return KMP_TEST_THEN_AND64(f, v);
541   }
542 };
543 
544 // Basic flag that does not use C11 Atomics
545 template <typename FlagType>
546 class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
547   typedef flag_traits<FlagType> traits_type;
548   FlagType checker; /**< Value to compare flag to to check if flag has been
549                        released. */
550   kmp_info_t
551       *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
552   kmp_uint32
553       num_waiting_threads; /**< Number of threads sleeping on this thread. */
554 public:
555   kmp_basic_flag_native(volatile FlagType *p)
556       : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
557   kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
558       : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
559     waiting_threads[0] = thr;
560   }
561   kmp_basic_flag_native(volatile FlagType *p, FlagType c)
562       : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
563         num_waiting_threads(0) {}
564   /*!
565    * param i in   index into waiting_threads
566    * @result the thread that is waiting at index i
567    */
568   kmp_info_t *get_waiter(kmp_uint32 i) {
569     KMP_DEBUG_ASSERT(i < num_waiting_threads);
570     return waiting_threads[i];
571   }
572   /*!
573    * @result num_waiting_threads
574    */
575   kmp_uint32 get_num_waiters() { return num_waiting_threads; }
576   /*!
577    * @param thr in   the thread which is now waiting
578    *
579    * Insert a waiting thread at index 0.
580    */
581   void set_waiter(kmp_info_t *thr) {
582     waiting_threads[0] = thr;
583     num_waiting_threads = 1;
584   }
585   /*!
586    * @result true if the flag object has been released.
587    */
588   bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
589   /*!
590    * @param old_loc in   old value of flag
591    * @result true if the flag's old value indicates it was released.
592    */
593   bool done_check_val(FlagType old_loc) { return old_loc == checker; }
594   /*!
595    * @result true if the flag object is not yet released.
596    * Used in __kmp_wait_template like:
597    * @code
598    * while (flag.notdone_check()) { pause(); }
599    * @endcode
600    */
601   bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
602   /*!
603    * @result Actual flag value before release was applied.
604    * Trigger all waiting threads to run by modifying flag to release state.
605    */
606   void internal_release() {
607     (void)traits_type::test_then_add4((volatile FlagType *)this->get());
608   }
609   /*!
610    * @result Actual flag value before sleep bit(s) set.
611    * Notes that there is at least one thread sleeping on the flag by setting
612    * sleep bit(s).
613    */
614   FlagType set_sleeping() {
615     return traits_type::test_then_or((volatile FlagType *)this->get(),
616                                      KMP_BARRIER_SLEEP_STATE);
617   }
618   /*!
619    * @result Actual flag value before sleep bit(s) cleared.
620    * Notes that there are no longer threads sleeping on the flag by clearing
621    * sleep bit(s).
622    */
623   FlagType unset_sleeping() {
624     return traits_type::test_then_and((volatile FlagType *)this->get(),
625                                       ~KMP_BARRIER_SLEEP_STATE);
626   }
627   /*!
628    * @param old_loc in   old value of flag
629    * Test whether there are threads sleeping on the flag's old value in old_loc.
630    */
631   bool is_sleeping_val(FlagType old_loc) {
632     return old_loc & KMP_BARRIER_SLEEP_STATE;
633   }
634   /*!
635    * Test whether there are threads sleeping on the flag.
636    */
637   bool is_sleeping() { return is_sleeping_val(*(this->get())); }
638   bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
639   kmp_uint8 *get_stolen() { return NULL; }
640   enum barrier_type get_bt() { return bs_last_barrier; }
641 };
642 
643 template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> {
644   typedef flag_traits<FlagType> traits_type;
645   FlagType checker; /**< Value to compare flag to to check if flag has been
646                        released. */
647   kmp_info_t
648       *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
649   kmp_uint32
650       num_waiting_threads; /**< Number of threads sleeping on this thread. */
651 public:
652   kmp_basic_flag(std::atomic<FlagType> *p)
653       : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
654   kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
655       : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
656     waiting_threads[0] = thr;
657   }
658   kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
659       : kmp_flag<FlagType>(p, traits_type::t), checker(c),
660         num_waiting_threads(0) {}
661   /*!
662    * param i in   index into waiting_threads
663    * @result the thread that is waiting at index i
664    */
665   kmp_info_t *get_waiter(kmp_uint32 i) {
666     KMP_DEBUG_ASSERT(i < num_waiting_threads);
667     return waiting_threads[i];
668   }
669   /*!
670    * @result num_waiting_threads
671    */
672   kmp_uint32 get_num_waiters() { return num_waiting_threads; }
673   /*!
674    * @param thr in   the thread which is now waiting
675    *
676    * Insert a waiting thread at index 0.
677    */
678   void set_waiter(kmp_info_t *thr) {
679     waiting_threads[0] = thr;
680     num_waiting_threads = 1;
681   }
682   /*!
683    * @result true if the flag object has been released.
684    */
685   bool done_check() { return this->load() == checker; }
686   /*!
687    * @param old_loc in   old value of flag
688    * @result true if the flag's old value indicates it was released.
689    */
690   bool done_check_val(FlagType old_loc) { return old_loc == checker; }
691   /*!
692    * @result true if the flag object is not yet released.
693    * Used in __kmp_wait_template like:
694    * @code
695    * while (flag.notdone_check()) { pause(); }
696    * @endcode
697    */
698   bool notdone_check() { return this->load() != checker; }
699   /*!
700    * @result Actual flag value before release was applied.
701    * Trigger all waiting threads to run by modifying flag to release state.
702    */
703   void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
704   /*!
705    * @result Actual flag value before sleep bit(s) set.
706    * Notes that there is at least one thread sleeping on the flag by setting
707    * sleep bit(s).
708    */
709   FlagType set_sleeping() {
710     return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
711   }
712   /*!
713    * @result Actual flag value before sleep bit(s) cleared.
714    * Notes that there are no longer threads sleeping on the flag by clearing
715    * sleep bit(s).
716    */
717   FlagType unset_sleeping() {
718     return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
719   }
720   /*!
721    * @param old_loc in   old value of flag
722    * Test whether there are threads sleeping on the flag's old value in old_loc.
723    */
724   bool is_sleeping_val(FlagType old_loc) {
725     return old_loc & KMP_BARRIER_SLEEP_STATE;
726   }
727   /*!
728    * Test whether there are threads sleeping on the flag.
729    */
730   bool is_sleeping() { return is_sleeping_val(this->load()); }
731   bool is_any_sleeping() { return is_sleeping_val(this->load()); }
732   kmp_uint8 *get_stolen() { return NULL; }
733   enum barrier_type get_bt() { return bs_last_barrier; }
734 };
735 
736 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
737 public:
738   kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {}
739   kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
740       : kmp_basic_flag<kmp_uint32>(p, thr) {}
741   kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
742       : kmp_basic_flag<kmp_uint32>(p, c) {}
743   void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
744   void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
745   int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
746                     int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
747                     kmp_int32 is_constrained) {
748     return __kmp_execute_tasks_32(
749         this_thr, gtid, this, final_spin,
750         thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
751   }
752   void wait(kmp_info_t *this_thr,
753             int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
754     if (final_spin)
755       __kmp_wait_template<kmp_flag_32, TRUE>(
756           this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
757     else
758       __kmp_wait_template<kmp_flag_32, FALSE>(
759           this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
760   }
761   void release() { __kmp_release_template(this); }
762   flag_type get_ptr_type() { return flag32; }
763 };
764 
765 class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64> {
766 public:
767   kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {}
768   kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
769       : kmp_basic_flag_native<kmp_uint64>(p, thr) {}
770   kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
771       : kmp_basic_flag_native<kmp_uint64>(p, c) {}
772   void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
773   void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
774   int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
775                     int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
776                     kmp_int32 is_constrained) {
777     return __kmp_execute_tasks_64(
778         this_thr, gtid, this, final_spin,
779         thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
780   }
781   void wait(kmp_info_t *this_thr,
782             int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
783     if (final_spin)
784       __kmp_wait_template<kmp_flag_64, TRUE>(
785           this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
786     else
787       __kmp_wait_template<kmp_flag_64, FALSE>(
788           this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
789   }
790   bool wait_cancellable_nosleep(kmp_info_t *this_thr,
791                                 int final_spin
792                                     USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
793     bool retval = false;
794     if (final_spin)
795       retval = __kmp_wait_template<kmp_flag_64, TRUE, true, false>(
796           this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
797     else
798       retval = __kmp_wait_template<kmp_flag_64, FALSE, true, false>(
799           this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
800     return retval;
801   }
802   void release() { __kmp_release_template(this); }
803   flag_type get_ptr_type() { return flag64; }
804 };
805 
806 // Hierarchical 64-bit on-core barrier instantiation
807 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
808   kmp_uint64 checker;
809   kmp_info_t *waiting_threads[1];
810   kmp_uint32 num_waiting_threads;
811   kmp_uint32
812       offset; /**< Portion of flag that is of interest for an operation. */
813   bool flag_switch; /**< Indicates a switch in flag location. */
814   enum barrier_type bt; /**< Barrier type. */
815   kmp_info_t *this_thr; /**< Thread that may be redirected to different flag
816                            location. */
817 #if USE_ITT_BUILD
818   void *
819       itt_sync_obj; /**< ITT object that must be passed to new flag location. */
820 #endif
821   unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
822     return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
823   }
824 
825 public:
826   kmp_flag_oncore(volatile kmp_uint64 *p)
827       : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
828         flag_switch(false) {}
829   kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
830       : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
831         offset(idx), flag_switch(false) {}
832   kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
833                   enum barrier_type bar_t,
834                   kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
835       : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
836         num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
837         this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
838   kmp_info_t *get_waiter(kmp_uint32 i) {
839     KMP_DEBUG_ASSERT(i < num_waiting_threads);
840     return waiting_threads[i];
841   }
842   kmp_uint32 get_num_waiters() { return num_waiting_threads; }
843   void set_waiter(kmp_info_t *thr) {
844     waiting_threads[0] = thr;
845     num_waiting_threads = 1;
846   }
847   bool done_check_val(kmp_uint64 old_loc) {
848     return byteref(&old_loc, offset) == checker;
849   }
850   bool done_check() { return done_check_val(*get()); }
851   bool notdone_check() {
852     // Calculate flag_switch
853     if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
854       flag_switch = true;
855     if (byteref(get(), offset) != 1 && !flag_switch)
856       return true;
857     else if (flag_switch) {
858       this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
859       kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
860                        (kmp_uint64)KMP_BARRIER_STATE_BUMP);
861       __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
862     }
863     return false;
864   }
865   void internal_release() {
866     // Other threads can write their own bytes simultaneously.
867     if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
868       byteref(get(), offset) = 1;
869     } else {
870       kmp_uint64 mask = 0;
871       byteref(&mask, offset) = 1;
872       KMP_TEST_THEN_OR64(get(), mask);
873     }
874   }
875   kmp_uint64 set_sleeping() {
876     return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
877   }
878   kmp_uint64 unset_sleeping() {
879     return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
880   }
881   bool is_sleeping_val(kmp_uint64 old_loc) {
882     return old_loc & KMP_BARRIER_SLEEP_STATE;
883   }
884   bool is_sleeping() { return is_sleeping_val(*get()); }
885   bool is_any_sleeping() { return is_sleeping_val(*get()); }
886   void wait(kmp_info_t *this_thr, int final_spin) {
887     if (final_spin)
888       __kmp_wait_template<kmp_flag_oncore, TRUE>(
889           this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
890     else
891       __kmp_wait_template<kmp_flag_oncore, FALSE>(
892           this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
893   }
894   void release() { __kmp_release_template(this); }
895   void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
896   void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
897   int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
898                     int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
899                     kmp_int32 is_constrained) {
900     return __kmp_execute_tasks_oncore(
901         this_thr, gtid, this, final_spin,
902         thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
903   }
904   kmp_uint8 *get_stolen() { return NULL; }
905   enum barrier_type get_bt() { return bt; }
906   flag_type get_ptr_type() { return flag_oncore; }
907 };
908 
909 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc
910 // associated with int gtid.
911 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
912   if (!flag)
913     return;
914 
915   switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) {
916   case flag32:
917     __kmp_resume_32(gtid, NULL);
918     break;
919   case flag64:
920     __kmp_resume_64(gtid, NULL);
921     break;
922   case flag_oncore:
923     __kmp_resume_oncore(gtid, NULL);
924     break;
925   }
926 }
927 
928 /*!
929 @}
930 */
931 
932 #endif // KMP_WAIT_RELEASE_H
933