1 /* 2 * kmp_wait_release.h -- Wait/Release implementation 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef KMP_WAIT_RELEASE_H 14 #define KMP_WAIT_RELEASE_H 15 16 #include "kmp.h" 17 #include "kmp_itt.h" 18 #include "kmp_stats.h" 19 #if OMPT_SUPPORT 20 #include "ompt-specific.h" 21 #endif 22 23 /*! 24 @defgroup WAIT_RELEASE Wait/Release operations 25 26 The definitions and functions here implement the lowest level thread 27 synchronizations of suspending a thread and awaking it. They are used to build 28 higher level operations such as barriers and fork/join. 29 */ 30 31 /*! 32 @ingroup WAIT_RELEASE 33 @{ 34 */ 35 36 /*! 37 * The flag_type describes the storage used for the flag. 38 */ 39 enum flag_type { 40 flag32, /**< 32 bit flags */ 41 flag64, /**< 64 bit flags */ 42 flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */ 43 }; 44 45 /*! 46 * Base class for wait/release volatile flag 47 */ 48 template <typename P> class kmp_flag_native { 49 volatile P *loc; 50 flag_type t; 51 52 public: 53 typedef P flag_t; 54 kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {} 55 volatile P *get() { return loc; } 56 void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); } 57 void set(volatile P *new_loc) { loc = new_loc; } 58 flag_type get_type() { return t; } 59 P load() { return *loc; } 60 void store(P val) { *loc = val; } 61 }; 62 63 /*! 64 * Base class for wait/release atomic flag 65 */ 66 template <typename P> class kmp_flag { 67 std::atomic<P> 68 *loc; /**< Pointer to the flag storage that is modified by another thread 69 */ 70 flag_type t; /**< "Type" of the flag in loc */ 71 public: 72 typedef P flag_t; 73 kmp_flag(std::atomic<P> *p, flag_type ft) : loc(p), t(ft) {} 74 /*! 75 * @result the pointer to the actual flag 76 */ 77 std::atomic<P> *get() { return loc; } 78 /*! 79 * @result void* pointer to the actual flag 80 */ 81 void *get_void_p() { return RCAST(void *, loc); } 82 /*! 83 * @param new_loc in set loc to point at new_loc 84 */ 85 void set(std::atomic<P> *new_loc) { loc = new_loc; } 86 /*! 87 * @result the flag_type 88 */ 89 flag_type get_type() { return t; } 90 /*! 91 * @result flag value 92 */ 93 P load() { return loc->load(std::memory_order_acquire); } 94 /*! 95 * @param val the new flag value to be stored 96 */ 97 void store(P val) { loc->store(val, std::memory_order_release); } 98 // Derived classes must provide the following: 99 /* 100 kmp_info_t * get_waiter(kmp_uint32 i); 101 kmp_uint32 get_num_waiters(); 102 bool done_check(); 103 bool done_check_val(P old_loc); 104 bool notdone_check(); 105 P internal_release(); 106 void suspend(int th_gtid); 107 void resume(int th_gtid); 108 P set_sleeping(); 109 P unset_sleeping(); 110 bool is_sleeping(); 111 bool is_any_sleeping(); 112 bool is_sleeping_val(P old_loc); 113 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, 114 int *thread_finished 115 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 116 is_constrained); 117 */ 118 }; 119 120 #if OMPT_SUPPORT 121 OMPT_NOINLINE 122 static void __ompt_implicit_task_end(kmp_info_t *this_thr, 123 ompt_state_t ompt_state, 124 ompt_data_t *tId) { 125 int ds_tid = this_thr->th.th_info.ds.ds_tid; 126 if (ompt_state == ompt_state_wait_barrier_implicit) { 127 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 128 #if OMPT_OPTIONAL 129 void *codeptr = NULL; 130 if (ompt_enabled.ompt_callback_sync_region_wait) { 131 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( 132 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId, 133 codeptr); 134 } 135 if (ompt_enabled.ompt_callback_sync_region) { 136 ompt_callbacks.ompt_callback(ompt_callback_sync_region)( 137 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId, 138 codeptr); 139 } 140 #endif 141 if (!KMP_MASTER_TID(ds_tid)) { 142 if (ompt_enabled.ompt_callback_implicit_task) { 143 int flags = this_thr->th.ompt_thread_info.parallel_flags; 144 flags = (flags & ompt_parallel_league) ? ompt_task_initial 145 : ompt_task_implicit; 146 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 147 ompt_scope_end, NULL, tId, 0, ds_tid, flags); 148 } 149 // return to idle state 150 this_thr->th.ompt_thread_info.state = ompt_state_idle; 151 } else { 152 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 153 } 154 } 155 } 156 #endif 157 158 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls 159 __kmp_wait_* must make certain that another thread calls __kmp_release 160 to wake it back up to prevent deadlocks! 161 162 NOTE: We may not belong to a team at this point. */ 163 template <class C, int final_spin, bool cancellable = false, 164 bool sleepable = true> 165 static inline bool 166 __kmp_wait_template(kmp_info_t *this_thr, 167 C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) { 168 #if USE_ITT_BUILD && USE_ITT_NOTIFY 169 volatile void *spin = flag->get(); 170 #endif 171 kmp_uint32 spins; 172 int th_gtid; 173 int tasks_completed = FALSE; 174 int oversubscribed; 175 #if !KMP_USE_MONITOR 176 kmp_uint64 poll_count; 177 kmp_uint64 hibernate_goal; 178 #else 179 kmp_uint32 hibernate; 180 #endif 181 182 KMP_FSYNC_SPIN_INIT(spin, NULL); 183 if (flag->done_check()) { 184 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin)); 185 return false; 186 } 187 th_gtid = this_thr->th.th_info.ds.ds_gtid; 188 if (cancellable) { 189 kmp_team_t *team = this_thr->th.th_team; 190 if (team && team->t.t_cancel_request == cancel_parallel) 191 return true; 192 } 193 #if KMP_OS_UNIX 194 if (final_spin) 195 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true); 196 #endif 197 KA_TRACE(20, 198 ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag)); 199 #if KMP_STATS_ENABLED 200 stats_state_e thread_state = KMP_GET_THREAD_STATE(); 201 #endif 202 203 /* OMPT Behavior: 204 THIS function is called from 205 __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions) 206 these have join / fork behavior 207 208 In these cases, we don't change the state or trigger events in THIS 209 function. 210 Events are triggered in the calling code (__kmp_barrier): 211 212 state := ompt_state_overhead 213 barrier-begin 214 barrier-wait-begin 215 state := ompt_state_wait_barrier 216 call join-barrier-implementation (finally arrive here) 217 {} 218 call fork-barrier-implementation (finally arrive here) 219 {} 220 state := ompt_state_overhead 221 barrier-wait-end 222 barrier-end 223 state := ompt_state_work_parallel 224 225 226 __kmp_fork_barrier (after thread creation, before executing implicit task) 227 call fork-barrier-implementation (finally arrive here) 228 {} // worker arrive here with state = ompt_state_idle 229 230 231 __kmp_join_barrier (implicit barrier at end of parallel region) 232 state := ompt_state_barrier_implicit 233 barrier-begin 234 barrier-wait-begin 235 call join-barrier-implementation (finally arrive here 236 final_spin=FALSE) 237 { 238 } 239 __kmp_fork_barrier (implicit barrier at end of parallel region) 240 call fork-barrier-implementation (finally arrive here final_spin=TRUE) 241 242 Worker after task-team is finished: 243 barrier-wait-end 244 barrier-end 245 implicit-task-end 246 idle-begin 247 state := ompt_state_idle 248 249 Before leaving, if state = ompt_state_idle 250 idle-end 251 state := ompt_state_overhead 252 */ 253 #if OMPT_SUPPORT 254 ompt_state_t ompt_entry_state; 255 ompt_data_t *tId; 256 if (ompt_enabled.enabled) { 257 ompt_entry_state = this_thr->th.ompt_thread_info.state; 258 if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit || 259 KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) { 260 ompt_lw_taskteam_t *team = 261 this_thr->th.th_team->t.ompt_serialized_team_info; 262 if (team) { 263 tId = &(team->ompt_task_info.task_data); 264 } else { 265 tId = OMPT_CUR_TASK_DATA(this_thr); 266 } 267 } else { 268 tId = &(this_thr->th.ompt_thread_info.task_data); 269 } 270 if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec || 271 this_thr->th.th_task_team == NULL)) { 272 // implicit task is done. Either no taskqueue, or task-team finished 273 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId); 274 } 275 } 276 #endif 277 278 KMP_INIT_YIELD(spins); // Setup for waiting 279 280 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || 281 __kmp_pause_status == kmp_soft_paused) { 282 #if KMP_USE_MONITOR 283 // The worker threads cannot rely on the team struct existing at this point. 284 // Use the bt values cached in the thread struct instead. 285 #ifdef KMP_ADJUST_BLOCKTIME 286 if (__kmp_pause_status == kmp_soft_paused || 287 (__kmp_zero_bt && !this_thr->th.th_team_bt_set)) 288 // Force immediate suspend if not set by user and more threads than 289 // available procs 290 hibernate = 0; 291 else 292 hibernate = this_thr->th.th_team_bt_intervals; 293 #else 294 hibernate = this_thr->th.th_team_bt_intervals; 295 #endif /* KMP_ADJUST_BLOCKTIME */ 296 297 /* If the blocktime is nonzero, we want to make sure that we spin wait for 298 the entirety of the specified #intervals, plus up to one interval more. 299 This increment make certain that this thread doesn't go to sleep too 300 soon. */ 301 if (hibernate != 0) 302 hibernate++; 303 304 // Add in the current time value. 305 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value); 306 KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n", 307 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, 308 hibernate - __kmp_global.g.g_time.dt.t_value)); 309 #else 310 if (__kmp_pause_status == kmp_soft_paused) { 311 // Force immediate suspend 312 hibernate_goal = KMP_NOW(); 313 } else 314 hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals; 315 poll_count = 0; 316 #endif // KMP_USE_MONITOR 317 } 318 319 oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc); 320 KMP_MB(); 321 322 // Main wait spin loop 323 while (flag->notdone_check()) { 324 kmp_task_team_t *task_team = NULL; 325 if (__kmp_tasking_mode != tskm_immediate_exec) { 326 task_team = this_thr->th.th_task_team; 327 /* If the thread's task team pointer is NULL, it means one of 3 things: 328 1) A newly-created thread is first being released by 329 __kmp_fork_barrier(), and its task team has not been set up yet. 330 2) All tasks have been executed to completion. 331 3) Tasking is off for this region. This could be because we are in a 332 serialized region (perhaps the outer one), or else tasking was manually 333 disabled (KMP_TASKING=0). */ 334 if (task_team != NULL) { 335 if (TCR_SYNC_4(task_team->tt.tt_active)) { 336 if (KMP_TASKING_ENABLED(task_team)) 337 flag->execute_tasks( 338 this_thr, th_gtid, final_spin, 339 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0); 340 else 341 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 342 } else { 343 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)); 344 #if OMPT_SUPPORT 345 // task-team is done now, other cases should be catched above 346 if (final_spin && ompt_enabled.enabled) 347 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId); 348 #endif 349 this_thr->th.th_task_team = NULL; 350 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 351 } 352 } else { 353 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 354 } // if 355 } // if 356 357 KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin)); 358 if (TCR_4(__kmp_global.g.g_done)) { 359 if (__kmp_global.g.g_abort) 360 __kmp_abort_thread(); 361 break; 362 } 363 364 // If we are oversubscribed, or have waited a bit (and 365 // KMP_LIBRARY=throughput), then yield 366 KMP_YIELD_OVERSUB_ELSE_SPIN(spins); 367 368 #if KMP_STATS_ENABLED 369 // Check if thread has been signalled to idle state 370 // This indicates that the logical "join-barrier" has finished 371 if (this_thr->th.th_stats->isIdle() && 372 KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) { 373 KMP_SET_THREAD_STATE(IDLE); 374 KMP_PUSH_PARTITIONED_TIMER(OMP_idle); 375 } 376 #endif 377 // Check if the barrier surrounding this wait loop has been cancelled 378 if (cancellable) { 379 kmp_team_t *team = this_thr->th.th_team; 380 if (team && team->t.t_cancel_request == cancel_parallel) 381 break; 382 } 383 384 // Don't suspend if KMP_BLOCKTIME is set to "infinite" 385 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && 386 __kmp_pause_status != kmp_soft_paused) 387 continue; 388 389 // Don't suspend if there is a likelihood of new tasks being spawned. 390 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks)) 391 continue; 392 393 #if KMP_USE_MONITOR 394 // If we have waited a bit more, fall asleep 395 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate) 396 continue; 397 #else 398 if (KMP_BLOCKING(hibernate_goal, poll_count++)) 399 continue; 400 #endif 401 // Don't suspend if wait loop designated non-sleepable 402 // in template parameters 403 if (!sleepable) 404 continue; 405 406 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && 407 __kmp_pause_status != kmp_soft_paused) 408 continue; 409 410 KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid)); 411 412 #if KMP_OS_UNIX 413 if (final_spin) 414 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false); 415 #endif 416 flag->suspend(th_gtid); 417 #if KMP_OS_UNIX 418 if (final_spin) 419 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true); 420 #endif 421 422 if (TCR_4(__kmp_global.g.g_done)) { 423 if (__kmp_global.g.g_abort) 424 __kmp_abort_thread(); 425 break; 426 } else if (__kmp_tasking_mode != tskm_immediate_exec && 427 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) { 428 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; 429 } 430 // TODO: If thread is done with work and times out, disband/free 431 } 432 433 #if OMPT_SUPPORT 434 ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state; 435 if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) { 436 #if OMPT_OPTIONAL 437 if (final_spin) { 438 __ompt_implicit_task_end(this_thr, ompt_exit_state, tId); 439 ompt_exit_state = this_thr->th.ompt_thread_info.state; 440 } 441 #endif 442 if (ompt_exit_state == ompt_state_idle) { 443 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 444 } 445 } 446 #endif 447 #if KMP_STATS_ENABLED 448 // If we were put into idle state, pop that off the state stack 449 if (KMP_GET_THREAD_STATE() == IDLE) { 450 KMP_POP_PARTITIONED_TIMER(); 451 KMP_SET_THREAD_STATE(thread_state); 452 this_thr->th.th_stats->resetIdleFlag(); 453 } 454 #endif 455 456 #if KMP_OS_UNIX 457 if (final_spin) 458 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false); 459 #endif 460 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin)); 461 if (cancellable) { 462 kmp_team_t *team = this_thr->th.th_team; 463 if (team && team->t.t_cancel_request == cancel_parallel) { 464 if (tasks_completed) { 465 // undo the previous decrement of unfinished_threads so that the 466 // thread can decrement at the join barrier with no problem 467 kmp_task_team_t *task_team = this_thr->th.th_task_team; 468 std::atomic<kmp_int32> *unfinished_threads = 469 &(task_team->tt.tt_unfinished_threads); 470 KMP_ATOMIC_INC(unfinished_threads); 471 } 472 return true; 473 } 474 } 475 return false; 476 } 477 478 /* Release any threads specified as waiting on the flag by releasing the flag 479 and resume the waiting thread if indicated by the sleep bit(s). A thread that 480 calls __kmp_wait_template must call this function to wake up the potentially 481 sleeping thread and prevent deadlocks! */ 482 template <class C> static inline void __kmp_release_template(C *flag) { 483 #ifdef KMP_DEBUG 484 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; 485 #endif 486 KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get())); 487 KMP_DEBUG_ASSERT(flag->get()); 488 KMP_FSYNC_RELEASING(flag->get_void_p()); 489 490 flag->internal_release(); 491 492 KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(), 493 flag->load())); 494 495 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 496 // Only need to check sleep stuff if infinite block time not set. 497 // Are *any* threads waiting on flag sleeping? 498 if (flag->is_any_sleeping()) { 499 for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) { 500 // if sleeping waiter exists at i, sets current_waiter to i inside flag 501 kmp_info_t *waiter = flag->get_waiter(i); 502 if (waiter) { 503 int wait_gtid = waiter->th.th_info.ds.ds_gtid; 504 // Wake up thread if needed 505 KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep " 506 "flag(%p) set\n", 507 gtid, wait_gtid, flag->get())); 508 flag->resume(wait_gtid); // unsets flag's current_waiter when done 509 } 510 } 511 } 512 } 513 } 514 515 template <typename FlagType> struct flag_traits {}; 516 517 template <> struct flag_traits<kmp_uint32> { 518 typedef kmp_uint32 flag_t; 519 static const flag_type t = flag32; 520 static inline flag_t tcr(flag_t f) { return TCR_4(f); } 521 static inline flag_t test_then_add4(volatile flag_t *f) { 522 return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f)); 523 } 524 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { 525 return KMP_TEST_THEN_OR32(f, v); 526 } 527 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { 528 return KMP_TEST_THEN_AND32(f, v); 529 } 530 }; 531 532 template <> struct flag_traits<kmp_uint64> { 533 typedef kmp_uint64 flag_t; 534 static const flag_type t = flag64; 535 static inline flag_t tcr(flag_t f) { return TCR_8(f); } 536 static inline flag_t test_then_add4(volatile flag_t *f) { 537 return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f)); 538 } 539 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { 540 return KMP_TEST_THEN_OR64(f, v); 541 } 542 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { 543 return KMP_TEST_THEN_AND64(f, v); 544 } 545 }; 546 547 // Basic flag that does not use C11 Atomics 548 template <typename FlagType> 549 class kmp_basic_flag_native : public kmp_flag_native<FlagType> { 550 typedef flag_traits<FlagType> traits_type; 551 FlagType checker; /**< Value to compare flag to to check if flag has been 552 released. */ 553 kmp_info_t 554 *waiting_threads[1]; /**< Array of threads sleeping on this thread. */ 555 kmp_uint32 556 num_waiting_threads; /**< Number of threads sleeping on this thread. */ 557 public: 558 kmp_basic_flag_native(volatile FlagType *p) 559 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {} 560 kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr) 561 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) { 562 waiting_threads[0] = thr; 563 } 564 kmp_basic_flag_native(volatile FlagType *p, FlagType c) 565 : kmp_flag_native<FlagType>(p, traits_type::t), checker(c), 566 num_waiting_threads(0) {} 567 /*! 568 * param i in index into waiting_threads 569 * @result the thread that is waiting at index i 570 */ 571 kmp_info_t *get_waiter(kmp_uint32 i) { 572 KMP_DEBUG_ASSERT(i < num_waiting_threads); 573 return waiting_threads[i]; 574 } 575 /*! 576 * @result num_waiting_threads 577 */ 578 kmp_uint32 get_num_waiters() { return num_waiting_threads; } 579 /*! 580 * @param thr in the thread which is now waiting 581 * 582 * Insert a waiting thread at index 0. 583 */ 584 void set_waiter(kmp_info_t *thr) { 585 waiting_threads[0] = thr; 586 num_waiting_threads = 1; 587 } 588 /*! 589 * @result true if the flag object has been released. 590 */ 591 bool done_check() { return traits_type::tcr(*(this->get())) == checker; } 592 /*! 593 * @param old_loc in old value of flag 594 * @result true if the flag's old value indicates it was released. 595 */ 596 bool done_check_val(FlagType old_loc) { return old_loc == checker; } 597 /*! 598 * @result true if the flag object is not yet released. 599 * Used in __kmp_wait_template like: 600 * @code 601 * while (flag.notdone_check()) { pause(); } 602 * @endcode 603 */ 604 bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; } 605 /*! 606 * @result Actual flag value before release was applied. 607 * Trigger all waiting threads to run by modifying flag to release state. 608 */ 609 void internal_release() { 610 (void)traits_type::test_then_add4((volatile FlagType *)this->get()); 611 } 612 /*! 613 * @result Actual flag value before sleep bit(s) set. 614 * Notes that there is at least one thread sleeping on the flag by setting 615 * sleep bit(s). 616 */ 617 FlagType set_sleeping() { 618 return traits_type::test_then_or((volatile FlagType *)this->get(), 619 KMP_BARRIER_SLEEP_STATE); 620 } 621 /*! 622 * @result Actual flag value before sleep bit(s) cleared. 623 * Notes that there are no longer threads sleeping on the flag by clearing 624 * sleep bit(s). 625 */ 626 FlagType unset_sleeping() { 627 return traits_type::test_then_and((volatile FlagType *)this->get(), 628 ~KMP_BARRIER_SLEEP_STATE); 629 } 630 /*! 631 * @param old_loc in old value of flag 632 * Test whether there are threads sleeping on the flag's old value in old_loc. 633 */ 634 bool is_sleeping_val(FlagType old_loc) { 635 return old_loc & KMP_BARRIER_SLEEP_STATE; 636 } 637 /*! 638 * Test whether there are threads sleeping on the flag. 639 */ 640 bool is_sleeping() { return is_sleeping_val(*(this->get())); } 641 bool is_any_sleeping() { return is_sleeping_val(*(this->get())); } 642 kmp_uint8 *get_stolen() { return NULL; } 643 enum barrier_type get_bt() { return bs_last_barrier; } 644 }; 645 646 template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> { 647 typedef flag_traits<FlagType> traits_type; 648 FlagType checker; /**< Value to compare flag to to check if flag has been 649 released. */ 650 kmp_info_t 651 *waiting_threads[1]; /**< Array of threads sleeping on this thread. */ 652 kmp_uint32 653 num_waiting_threads; /**< Number of threads sleeping on this thread. */ 654 public: 655 kmp_basic_flag(std::atomic<FlagType> *p) 656 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {} 657 kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr) 658 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) { 659 waiting_threads[0] = thr; 660 } 661 kmp_basic_flag(std::atomic<FlagType> *p, FlagType c) 662 : kmp_flag<FlagType>(p, traits_type::t), checker(c), 663 num_waiting_threads(0) {} 664 /*! 665 * param i in index into waiting_threads 666 * @result the thread that is waiting at index i 667 */ 668 kmp_info_t *get_waiter(kmp_uint32 i) { 669 KMP_DEBUG_ASSERT(i < num_waiting_threads); 670 return waiting_threads[i]; 671 } 672 /*! 673 * @result num_waiting_threads 674 */ 675 kmp_uint32 get_num_waiters() { return num_waiting_threads; } 676 /*! 677 * @param thr in the thread which is now waiting 678 * 679 * Insert a waiting thread at index 0. 680 */ 681 void set_waiter(kmp_info_t *thr) { 682 waiting_threads[0] = thr; 683 num_waiting_threads = 1; 684 } 685 /*! 686 * @result true if the flag object has been released. 687 */ 688 bool done_check() { return this->load() == checker; } 689 /*! 690 * @param old_loc in old value of flag 691 * @result true if the flag's old value indicates it was released. 692 */ 693 bool done_check_val(FlagType old_loc) { return old_loc == checker; } 694 /*! 695 * @result true if the flag object is not yet released. 696 * Used in __kmp_wait_template like: 697 * @code 698 * while (flag.notdone_check()) { pause(); } 699 * @endcode 700 */ 701 bool notdone_check() { return this->load() != checker; } 702 /*! 703 * @result Actual flag value before release was applied. 704 * Trigger all waiting threads to run by modifying flag to release state. 705 */ 706 void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); } 707 /*! 708 * @result Actual flag value before sleep bit(s) set. 709 * Notes that there is at least one thread sleeping on the flag by setting 710 * sleep bit(s). 711 */ 712 FlagType set_sleeping() { 713 return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE); 714 } 715 /*! 716 * @result Actual flag value before sleep bit(s) cleared. 717 * Notes that there are no longer threads sleeping on the flag by clearing 718 * sleep bit(s). 719 */ 720 FlagType unset_sleeping() { 721 return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE); 722 } 723 /*! 724 * @param old_loc in old value of flag 725 * Test whether there are threads sleeping on the flag's old value in old_loc. 726 */ 727 bool is_sleeping_val(FlagType old_loc) { 728 return old_loc & KMP_BARRIER_SLEEP_STATE; 729 } 730 /*! 731 * Test whether there are threads sleeping on the flag. 732 */ 733 bool is_sleeping() { return is_sleeping_val(this->load()); } 734 bool is_any_sleeping() { return is_sleeping_val(this->load()); } 735 kmp_uint8 *get_stolen() { return NULL; } 736 enum barrier_type get_bt() { return bs_last_barrier; } 737 }; 738 739 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> { 740 public: 741 kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {} 742 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr) 743 : kmp_basic_flag<kmp_uint32>(p, thr) {} 744 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c) 745 : kmp_basic_flag<kmp_uint32>(p, c) {} 746 void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); } 747 void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); } 748 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, 749 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), 750 kmp_int32 is_constrained) { 751 return __kmp_execute_tasks_32( 752 this_thr, gtid, this, final_spin, 753 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 754 } 755 void wait(kmp_info_t *this_thr, 756 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { 757 if (final_spin) 758 __kmp_wait_template<kmp_flag_32, TRUE>( 759 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 760 else 761 __kmp_wait_template<kmp_flag_32, FALSE>( 762 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 763 } 764 void release() { __kmp_release_template(this); } 765 flag_type get_ptr_type() { return flag32; } 766 }; 767 768 class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64> { 769 public: 770 kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {} 771 kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) 772 : kmp_basic_flag_native<kmp_uint64>(p, thr) {} 773 kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) 774 : kmp_basic_flag_native<kmp_uint64>(p, c) {} 775 void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); } 776 void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); } 777 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, 778 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), 779 kmp_int32 is_constrained) { 780 return __kmp_execute_tasks_64( 781 this_thr, gtid, this, final_spin, 782 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 783 } 784 void wait(kmp_info_t *this_thr, 785 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { 786 if (final_spin) 787 __kmp_wait_template<kmp_flag_64, TRUE>( 788 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 789 else 790 __kmp_wait_template<kmp_flag_64, FALSE>( 791 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 792 } 793 bool wait_cancellable_nosleep(kmp_info_t *this_thr, 794 int final_spin 795 USE_ITT_BUILD_ARG(void *itt_sync_obj)) { 796 bool retval = false; 797 if (final_spin) 798 retval = __kmp_wait_template<kmp_flag_64, TRUE, true, false>( 799 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 800 else 801 retval = __kmp_wait_template<kmp_flag_64, FALSE, true, false>( 802 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 803 return retval; 804 } 805 void release() { __kmp_release_template(this); } 806 flag_type get_ptr_type() { return flag64; } 807 }; 808 809 // Hierarchical 64-bit on-core barrier instantiation 810 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> { 811 kmp_uint64 checker; 812 kmp_info_t *waiting_threads[1]; 813 kmp_uint32 num_waiting_threads; 814 kmp_uint32 815 offset; /**< Portion of flag that is of interest for an operation. */ 816 bool flag_switch; /**< Indicates a switch in flag location. */ 817 enum barrier_type bt; /**< Barrier type. */ 818 kmp_info_t *this_thr; /**< Thread that may be redirected to different flag 819 location. */ 820 #if USE_ITT_BUILD 821 void * 822 itt_sync_obj; /**< ITT object that must be passed to new flag location. */ 823 #endif 824 unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) { 825 return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset]; 826 } 827 828 public: 829 kmp_flag_oncore(volatile kmp_uint64 *p) 830 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), 831 flag_switch(false) {} 832 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx) 833 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), 834 offset(idx), flag_switch(false) {} 835 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, 836 enum barrier_type bar_t, 837 kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt)) 838 : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c), 839 num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t), 840 this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {} 841 kmp_info_t *get_waiter(kmp_uint32 i) { 842 KMP_DEBUG_ASSERT(i < num_waiting_threads); 843 return waiting_threads[i]; 844 } 845 kmp_uint32 get_num_waiters() { return num_waiting_threads; } 846 void set_waiter(kmp_info_t *thr) { 847 waiting_threads[0] = thr; 848 num_waiting_threads = 1; 849 } 850 bool done_check_val(kmp_uint64 old_loc) { 851 return byteref(&old_loc, offset) == checker; 852 } 853 bool done_check() { return done_check_val(*get()); } 854 bool notdone_check() { 855 // Calculate flag_switch 856 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG) 857 flag_switch = true; 858 if (byteref(get(), offset) != 1 && !flag_switch) 859 return true; 860 else if (flag_switch) { 861 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING; 862 kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, 863 (kmp_uint64)KMP_BARRIER_STATE_BUMP); 864 __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); 865 } 866 return false; 867 } 868 void internal_release() { 869 // Other threads can write their own bytes simultaneously. 870 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { 871 byteref(get(), offset) = 1; 872 } else { 873 kmp_uint64 mask = 0; 874 byteref(&mask, offset) = 1; 875 KMP_TEST_THEN_OR64(get(), mask); 876 } 877 } 878 kmp_uint64 set_sleeping() { 879 return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE); 880 } 881 kmp_uint64 unset_sleeping() { 882 return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE); 883 } 884 bool is_sleeping_val(kmp_uint64 old_loc) { 885 return old_loc & KMP_BARRIER_SLEEP_STATE; 886 } 887 bool is_sleeping() { return is_sleeping_val(*get()); } 888 bool is_any_sleeping() { return is_sleeping_val(*get()); } 889 void wait(kmp_info_t *this_thr, int final_spin) { 890 if (final_spin) 891 __kmp_wait_template<kmp_flag_oncore, TRUE>( 892 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 893 else 894 __kmp_wait_template<kmp_flag_oncore, FALSE>( 895 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 896 } 897 void release() { __kmp_release_template(this); } 898 void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); } 899 void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); } 900 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, 901 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), 902 kmp_int32 is_constrained) { 903 return __kmp_execute_tasks_oncore( 904 this_thr, gtid, this, final_spin, 905 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 906 } 907 kmp_uint8 *get_stolen() { return NULL; } 908 enum barrier_type get_bt() { return bt; } 909 flag_type get_ptr_type() { return flag_oncore; } 910 }; 911 912 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc 913 // associated with int gtid. 914 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) { 915 if (!flag) 916 return; 917 918 switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) { 919 case flag32: 920 __kmp_resume_32(gtid, NULL); 921 break; 922 case flag64: 923 __kmp_resume_64(gtid, NULL); 924 break; 925 case flag_oncore: 926 __kmp_resume_oncore(gtid, NULL); 927 break; 928 } 929 } 930 931 /*! 932 @} 933 */ 934 935 #endif // KMP_WAIT_RELEASE_H 936