1 /* 2 * kmp_wait_release.h -- Wait/Release implementation 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef KMP_WAIT_RELEASE_H 14 #define KMP_WAIT_RELEASE_H 15 16 #include "kmp.h" 17 #include "kmp_itt.h" 18 #include "kmp_stats.h" 19 #if OMPT_SUPPORT 20 #include "ompt-specific.h" 21 #endif 22 23 /*! 24 @defgroup WAIT_RELEASE Wait/Release operations 25 26 The definitions and functions here implement the lowest level thread 27 synchronizations of suspending a thread and awaking it. They are used to build 28 higher level operations such as barriers and fork/join. 29 */ 30 31 /*! 32 @ingroup WAIT_RELEASE 33 @{ 34 */ 35 36 /*! 37 * The flag_type describes the storage used for the flag. 38 */ 39 enum flag_type { 40 flag32, /**< 32 bit flags */ 41 flag64, /**< 64 bit flags */ 42 flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */ 43 }; 44 45 /*! 46 * Base class for wait/release volatile flag 47 */ 48 template <typename P> class kmp_flag_native { 49 volatile P *loc; 50 flag_type t; 51 52 public: 53 typedef P flag_t; 54 kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {} 55 volatile P *get() { return loc; } 56 void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); } 57 void set(volatile P *new_loc) { loc = new_loc; } 58 flag_type get_type() { return t; } 59 P load() { return *loc; } 60 void store(P val) { *loc = val; } 61 }; 62 63 /*! 64 * Base class for wait/release atomic flag 65 */ 66 template <typename P> class kmp_flag { 67 std::atomic<P> 68 *loc; /**< Pointer to the flag storage that is modified by another thread 69 */ 70 flag_type t; /**< "Type" of the flag in loc */ 71 public: 72 typedef P flag_t; 73 kmp_flag(std::atomic<P> *p, flag_type ft) : loc(p), t(ft) {} 74 /*! 75 * @result the pointer to the actual flag 76 */ 77 std::atomic<P> *get() { return loc; } 78 /*! 79 * @result void* pointer to the actual flag 80 */ 81 void *get_void_p() { return RCAST(void *, loc); } 82 /*! 83 * @param new_loc in set loc to point at new_loc 84 */ 85 void set(std::atomic<P> *new_loc) { loc = new_loc; } 86 /*! 87 * @result the flag_type 88 */ 89 flag_type get_type() { return t; } 90 /*! 91 * @result flag value 92 */ 93 P load() { return loc->load(std::memory_order_acquire); } 94 /*! 95 * @param val the new flag value to be stored 96 */ 97 void store(P val) { loc->store(val, std::memory_order_release); } 98 // Derived classes must provide the following: 99 /* 100 kmp_info_t * get_waiter(kmp_uint32 i); 101 kmp_uint32 get_num_waiters(); 102 bool done_check(); 103 bool done_check_val(P old_loc); 104 bool notdone_check(); 105 P internal_release(); 106 void suspend(int th_gtid); 107 void resume(int th_gtid); 108 P set_sleeping(); 109 P unset_sleeping(); 110 bool is_sleeping(); 111 bool is_any_sleeping(); 112 bool is_sleeping_val(P old_loc); 113 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, 114 int *thread_finished 115 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 116 is_constrained); 117 */ 118 }; 119 120 #if OMPT_SUPPORT 121 OMPT_NOINLINE 122 static void __ompt_implicit_task_end(kmp_info_t *this_thr, 123 ompt_state_t ompt_state, 124 ompt_data_t *tId) { 125 int ds_tid = this_thr->th.th_info.ds.ds_tid; 126 if (ompt_state == ompt_state_wait_barrier_implicit) { 127 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 128 #if OMPT_OPTIONAL 129 void *codeptr = NULL; 130 if (ompt_enabled.ompt_callback_sync_region_wait) { 131 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( 132 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId, 133 codeptr); 134 } 135 if (ompt_enabled.ompt_callback_sync_region) { 136 ompt_callbacks.ompt_callback(ompt_callback_sync_region)( 137 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId, 138 codeptr); 139 } 140 #endif 141 if (!KMP_MASTER_TID(ds_tid)) { 142 if (ompt_enabled.ompt_callback_implicit_task) { 143 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 144 ompt_scope_end, NULL, tId, 0, ds_tid, ompt_task_implicit); 145 } 146 // return to idle state 147 this_thr->th.ompt_thread_info.state = ompt_state_idle; 148 } else { 149 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 150 } 151 } 152 } 153 #endif 154 155 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls 156 __kmp_wait_* must make certain that another thread calls __kmp_release 157 to wake it back up to prevent deadlocks! 158 159 NOTE: We may not belong to a team at this point. */ 160 template <class C, int final_spin, bool cancellable = false, 161 bool sleepable = true> 162 static inline bool 163 __kmp_wait_template(kmp_info_t *this_thr, 164 C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) { 165 #if USE_ITT_BUILD && USE_ITT_NOTIFY 166 volatile void *spin = flag->get(); 167 #endif 168 kmp_uint32 spins; 169 int th_gtid; 170 int tasks_completed = FALSE; 171 int oversubscribed; 172 #if !KMP_USE_MONITOR 173 kmp_uint64 poll_count; 174 kmp_uint64 hibernate_goal; 175 #else 176 kmp_uint32 hibernate; 177 #endif 178 179 KMP_FSYNC_SPIN_INIT(spin, NULL); 180 if (flag->done_check()) { 181 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin)); 182 return false; 183 } 184 th_gtid = this_thr->th.th_info.ds.ds_gtid; 185 if (cancellable) { 186 kmp_team_t *team = this_thr->th.th_team; 187 if (team && team->t.t_cancel_request == cancel_parallel) 188 return true; 189 } 190 #if KMP_OS_UNIX 191 if (final_spin) 192 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true); 193 #endif 194 KA_TRACE(20, 195 ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag)); 196 #if KMP_STATS_ENABLED 197 stats_state_e thread_state = KMP_GET_THREAD_STATE(); 198 #endif 199 200 /* OMPT Behavior: 201 THIS function is called from 202 __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions) 203 these have join / fork behavior 204 205 In these cases, we don't change the state or trigger events in THIS 206 function. 207 Events are triggered in the calling code (__kmp_barrier): 208 209 state := ompt_state_overhead 210 barrier-begin 211 barrier-wait-begin 212 state := ompt_state_wait_barrier 213 call join-barrier-implementation (finally arrive here) 214 {} 215 call fork-barrier-implementation (finally arrive here) 216 {} 217 state := ompt_state_overhead 218 barrier-wait-end 219 barrier-end 220 state := ompt_state_work_parallel 221 222 223 __kmp_fork_barrier (after thread creation, before executing implicit task) 224 call fork-barrier-implementation (finally arrive here) 225 {} // worker arrive here with state = ompt_state_idle 226 227 228 __kmp_join_barrier (implicit barrier at end of parallel region) 229 state := ompt_state_barrier_implicit 230 barrier-begin 231 barrier-wait-begin 232 call join-barrier-implementation (finally arrive here 233 final_spin=FALSE) 234 { 235 } 236 __kmp_fork_barrier (implicit barrier at end of parallel region) 237 call fork-barrier-implementation (finally arrive here final_spin=TRUE) 238 239 Worker after task-team is finished: 240 barrier-wait-end 241 barrier-end 242 implicit-task-end 243 idle-begin 244 state := ompt_state_idle 245 246 Before leaving, if state = ompt_state_idle 247 idle-end 248 state := ompt_state_overhead 249 */ 250 #if OMPT_SUPPORT 251 ompt_state_t ompt_entry_state; 252 ompt_data_t *tId; 253 if (ompt_enabled.enabled) { 254 ompt_entry_state = this_thr->th.ompt_thread_info.state; 255 if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit || 256 KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) { 257 ompt_lw_taskteam_t *team = 258 this_thr->th.th_team->t.ompt_serialized_team_info; 259 if (team) { 260 tId = &(team->ompt_task_info.task_data); 261 } else { 262 tId = OMPT_CUR_TASK_DATA(this_thr); 263 } 264 } else { 265 tId = &(this_thr->th.ompt_thread_info.task_data); 266 } 267 if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec || 268 this_thr->th.th_task_team == NULL)) { 269 // implicit task is done. Either no taskqueue, or task-team finished 270 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId); 271 } 272 } 273 #endif 274 275 KMP_INIT_YIELD(spins); // Setup for waiting 276 277 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || 278 __kmp_pause_status == kmp_soft_paused) { 279 #if KMP_USE_MONITOR 280 // The worker threads cannot rely on the team struct existing at this point. 281 // Use the bt values cached in the thread struct instead. 282 #ifdef KMP_ADJUST_BLOCKTIME 283 if (__kmp_pause_status == kmp_soft_paused || 284 (__kmp_zero_bt && !this_thr->th.th_team_bt_set)) 285 // Force immediate suspend if not set by user and more threads than 286 // available procs 287 hibernate = 0; 288 else 289 hibernate = this_thr->th.th_team_bt_intervals; 290 #else 291 hibernate = this_thr->th.th_team_bt_intervals; 292 #endif /* KMP_ADJUST_BLOCKTIME */ 293 294 /* If the blocktime is nonzero, we want to make sure that we spin wait for 295 the entirety of the specified #intervals, plus up to one interval more. 296 This increment make certain that this thread doesn't go to sleep too 297 soon. */ 298 if (hibernate != 0) 299 hibernate++; 300 301 // Add in the current time value. 302 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value); 303 KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n", 304 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, 305 hibernate - __kmp_global.g.g_time.dt.t_value)); 306 #else 307 if (__kmp_pause_status == kmp_soft_paused) { 308 // Force immediate suspend 309 hibernate_goal = KMP_NOW(); 310 } else 311 hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals; 312 poll_count = 0; 313 #endif // KMP_USE_MONITOR 314 } 315 316 oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc); 317 KMP_MB(); 318 319 // Main wait spin loop 320 while (flag->notdone_check()) { 321 kmp_task_team_t *task_team = NULL; 322 if (__kmp_tasking_mode != tskm_immediate_exec) { 323 task_team = this_thr->th.th_task_team; 324 /* If the thread's task team pointer is NULL, it means one of 3 things: 325 1) A newly-created thread is first being released by 326 __kmp_fork_barrier(), and its task team has not been set up yet. 327 2) All tasks have been executed to completion. 328 3) Tasking is off for this region. This could be because we are in a 329 serialized region (perhaps the outer one), or else tasking was manually 330 disabled (KMP_TASKING=0). */ 331 if (task_team != NULL) { 332 if (TCR_SYNC_4(task_team->tt.tt_active)) { 333 if (KMP_TASKING_ENABLED(task_team)) 334 flag->execute_tasks( 335 this_thr, th_gtid, final_spin, 336 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0); 337 else 338 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 339 } else { 340 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)); 341 #if OMPT_SUPPORT 342 // task-team is done now, other cases should be catched above 343 if (final_spin && ompt_enabled.enabled) 344 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId); 345 #endif 346 this_thr->th.th_task_team = NULL; 347 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 348 } 349 } else { 350 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 351 } // if 352 } // if 353 354 KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin)); 355 if (TCR_4(__kmp_global.g.g_done)) { 356 if (__kmp_global.g.g_abort) 357 __kmp_abort_thread(); 358 break; 359 } 360 361 // If we are oversubscribed, or have waited a bit (and 362 // KMP_LIBRARY=throughput), then yield 363 KMP_YIELD_OVERSUB_ELSE_SPIN(spins); 364 365 #if KMP_STATS_ENABLED 366 // Check if thread has been signalled to idle state 367 // This indicates that the logical "join-barrier" has finished 368 if (this_thr->th.th_stats->isIdle() && 369 KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) { 370 KMP_SET_THREAD_STATE(IDLE); 371 KMP_PUSH_PARTITIONED_TIMER(OMP_idle); 372 } 373 #endif 374 // Check if the barrier surrounding this wait loop has been cancelled 375 if (cancellable) { 376 kmp_team_t *team = this_thr->th.th_team; 377 if (team && team->t.t_cancel_request == cancel_parallel) 378 break; 379 } 380 381 // Don't suspend if KMP_BLOCKTIME is set to "infinite" 382 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && 383 __kmp_pause_status != kmp_soft_paused) 384 continue; 385 386 // Don't suspend if there is a likelihood of new tasks being spawned. 387 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks)) 388 continue; 389 390 #if KMP_USE_MONITOR 391 // If we have waited a bit more, fall asleep 392 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate) 393 continue; 394 #else 395 if (KMP_BLOCKING(hibernate_goal, poll_count++)) 396 continue; 397 #endif 398 // Don't suspend if wait loop designated non-sleepable 399 // in template parameters 400 if (!sleepable) 401 continue; 402 403 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && 404 __kmp_pause_status != kmp_soft_paused) 405 continue; 406 407 KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid)); 408 409 #if KMP_OS_UNIX 410 if (final_spin) 411 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false); 412 #endif 413 flag->suspend(th_gtid); 414 #if KMP_OS_UNIX 415 if (final_spin) 416 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true); 417 #endif 418 419 if (TCR_4(__kmp_global.g.g_done)) { 420 if (__kmp_global.g.g_abort) 421 __kmp_abort_thread(); 422 break; 423 } else if (__kmp_tasking_mode != tskm_immediate_exec && 424 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) { 425 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; 426 } 427 // TODO: If thread is done with work and times out, disband/free 428 } 429 430 #if OMPT_SUPPORT 431 ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state; 432 if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) { 433 #if OMPT_OPTIONAL 434 if (final_spin) { 435 __ompt_implicit_task_end(this_thr, ompt_exit_state, tId); 436 ompt_exit_state = this_thr->th.ompt_thread_info.state; 437 } 438 #endif 439 if (ompt_exit_state == ompt_state_idle) { 440 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 441 } 442 } 443 #endif 444 #if KMP_STATS_ENABLED 445 // If we were put into idle state, pop that off the state stack 446 if (KMP_GET_THREAD_STATE() == IDLE) { 447 KMP_POP_PARTITIONED_TIMER(); 448 KMP_SET_THREAD_STATE(thread_state); 449 this_thr->th.th_stats->resetIdleFlag(); 450 } 451 #endif 452 453 #if KMP_OS_UNIX 454 if (final_spin) 455 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false); 456 #endif 457 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin)); 458 if (cancellable) { 459 kmp_team_t *team = this_thr->th.th_team; 460 if (team && team->t.t_cancel_request == cancel_parallel) { 461 if (tasks_completed) { 462 // undo the previous decrement of unfinished_threads so that the 463 // thread can decrement at the join barrier with no problem 464 kmp_task_team_t *task_team = this_thr->th.th_task_team; 465 std::atomic<kmp_int32> *unfinished_threads = 466 &(task_team->tt.tt_unfinished_threads); 467 KMP_ATOMIC_INC(unfinished_threads); 468 } 469 return true; 470 } 471 } 472 return false; 473 } 474 475 /* Release any threads specified as waiting on the flag by releasing the flag 476 and resume the waiting thread if indicated by the sleep bit(s). A thread that 477 calls __kmp_wait_template must call this function to wake up the potentially 478 sleeping thread and prevent deadlocks! */ 479 template <class C> static inline void __kmp_release_template(C *flag) { 480 #ifdef KMP_DEBUG 481 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; 482 #endif 483 KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get())); 484 KMP_DEBUG_ASSERT(flag->get()); 485 KMP_FSYNC_RELEASING(flag->get_void_p()); 486 487 flag->internal_release(); 488 489 KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(), 490 flag->load())); 491 492 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 493 // Only need to check sleep stuff if infinite block time not set. 494 // Are *any* threads waiting on flag sleeping? 495 if (flag->is_any_sleeping()) { 496 for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) { 497 // if sleeping waiter exists at i, sets current_waiter to i inside flag 498 kmp_info_t *waiter = flag->get_waiter(i); 499 if (waiter) { 500 int wait_gtid = waiter->th.th_info.ds.ds_gtid; 501 // Wake up thread if needed 502 KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep " 503 "flag(%p) set\n", 504 gtid, wait_gtid, flag->get())); 505 flag->resume(wait_gtid); // unsets flag's current_waiter when done 506 } 507 } 508 } 509 } 510 } 511 512 template <typename FlagType> struct flag_traits {}; 513 514 template <> struct flag_traits<kmp_uint32> { 515 typedef kmp_uint32 flag_t; 516 static const flag_type t = flag32; 517 static inline flag_t tcr(flag_t f) { return TCR_4(f); } 518 static inline flag_t test_then_add4(volatile flag_t *f) { 519 return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f)); 520 } 521 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { 522 return KMP_TEST_THEN_OR32(f, v); 523 } 524 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { 525 return KMP_TEST_THEN_AND32(f, v); 526 } 527 }; 528 529 template <> struct flag_traits<kmp_uint64> { 530 typedef kmp_uint64 flag_t; 531 static const flag_type t = flag64; 532 static inline flag_t tcr(flag_t f) { return TCR_8(f); } 533 static inline flag_t test_then_add4(volatile flag_t *f) { 534 return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f)); 535 } 536 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { 537 return KMP_TEST_THEN_OR64(f, v); 538 } 539 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { 540 return KMP_TEST_THEN_AND64(f, v); 541 } 542 }; 543 544 // Basic flag that does not use C11 Atomics 545 template <typename FlagType> 546 class kmp_basic_flag_native : public kmp_flag_native<FlagType> { 547 typedef flag_traits<FlagType> traits_type; 548 FlagType checker; /**< Value to compare flag to to check if flag has been 549 released. */ 550 kmp_info_t 551 *waiting_threads[1]; /**< Array of threads sleeping on this thread. */ 552 kmp_uint32 553 num_waiting_threads; /**< Number of threads sleeping on this thread. */ 554 public: 555 kmp_basic_flag_native(volatile FlagType *p) 556 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {} 557 kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr) 558 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) { 559 waiting_threads[0] = thr; 560 } 561 kmp_basic_flag_native(volatile FlagType *p, FlagType c) 562 : kmp_flag_native<FlagType>(p, traits_type::t), checker(c), 563 num_waiting_threads(0) {} 564 /*! 565 * param i in index into waiting_threads 566 * @result the thread that is waiting at index i 567 */ 568 kmp_info_t *get_waiter(kmp_uint32 i) { 569 KMP_DEBUG_ASSERT(i < num_waiting_threads); 570 return waiting_threads[i]; 571 } 572 /*! 573 * @result num_waiting_threads 574 */ 575 kmp_uint32 get_num_waiters() { return num_waiting_threads; } 576 /*! 577 * @param thr in the thread which is now waiting 578 * 579 * Insert a waiting thread at index 0. 580 */ 581 void set_waiter(kmp_info_t *thr) { 582 waiting_threads[0] = thr; 583 num_waiting_threads = 1; 584 } 585 /*! 586 * @result true if the flag object has been released. 587 */ 588 bool done_check() { return traits_type::tcr(*(this->get())) == checker; } 589 /*! 590 * @param old_loc in old value of flag 591 * @result true if the flag's old value indicates it was released. 592 */ 593 bool done_check_val(FlagType old_loc) { return old_loc == checker; } 594 /*! 595 * @result true if the flag object is not yet released. 596 * Used in __kmp_wait_template like: 597 * @code 598 * while (flag.notdone_check()) { pause(); } 599 * @endcode 600 */ 601 bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; } 602 /*! 603 * @result Actual flag value before release was applied. 604 * Trigger all waiting threads to run by modifying flag to release state. 605 */ 606 void internal_release() { 607 (void)traits_type::test_then_add4((volatile FlagType *)this->get()); 608 } 609 /*! 610 * @result Actual flag value before sleep bit(s) set. 611 * Notes that there is at least one thread sleeping on the flag by setting 612 * sleep bit(s). 613 */ 614 FlagType set_sleeping() { 615 return traits_type::test_then_or((volatile FlagType *)this->get(), 616 KMP_BARRIER_SLEEP_STATE); 617 } 618 /*! 619 * @result Actual flag value before sleep bit(s) cleared. 620 * Notes that there are no longer threads sleeping on the flag by clearing 621 * sleep bit(s). 622 */ 623 FlagType unset_sleeping() { 624 return traits_type::test_then_and((volatile FlagType *)this->get(), 625 ~KMP_BARRIER_SLEEP_STATE); 626 } 627 /*! 628 * @param old_loc in old value of flag 629 * Test whether there are threads sleeping on the flag's old value in old_loc. 630 */ 631 bool is_sleeping_val(FlagType old_loc) { 632 return old_loc & KMP_BARRIER_SLEEP_STATE; 633 } 634 /*! 635 * Test whether there are threads sleeping on the flag. 636 */ 637 bool is_sleeping() { return is_sleeping_val(*(this->get())); } 638 bool is_any_sleeping() { return is_sleeping_val(*(this->get())); } 639 kmp_uint8 *get_stolen() { return NULL; } 640 enum barrier_type get_bt() { return bs_last_barrier; } 641 }; 642 643 template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> { 644 typedef flag_traits<FlagType> traits_type; 645 FlagType checker; /**< Value to compare flag to to check if flag has been 646 released. */ 647 kmp_info_t 648 *waiting_threads[1]; /**< Array of threads sleeping on this thread. */ 649 kmp_uint32 650 num_waiting_threads; /**< Number of threads sleeping on this thread. */ 651 public: 652 kmp_basic_flag(std::atomic<FlagType> *p) 653 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {} 654 kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr) 655 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) { 656 waiting_threads[0] = thr; 657 } 658 kmp_basic_flag(std::atomic<FlagType> *p, FlagType c) 659 : kmp_flag<FlagType>(p, traits_type::t), checker(c), 660 num_waiting_threads(0) {} 661 /*! 662 * param i in index into waiting_threads 663 * @result the thread that is waiting at index i 664 */ 665 kmp_info_t *get_waiter(kmp_uint32 i) { 666 KMP_DEBUG_ASSERT(i < num_waiting_threads); 667 return waiting_threads[i]; 668 } 669 /*! 670 * @result num_waiting_threads 671 */ 672 kmp_uint32 get_num_waiters() { return num_waiting_threads; } 673 /*! 674 * @param thr in the thread which is now waiting 675 * 676 * Insert a waiting thread at index 0. 677 */ 678 void set_waiter(kmp_info_t *thr) { 679 waiting_threads[0] = thr; 680 num_waiting_threads = 1; 681 } 682 /*! 683 * @result true if the flag object has been released. 684 */ 685 bool done_check() { return this->load() == checker; } 686 /*! 687 * @param old_loc in old value of flag 688 * @result true if the flag's old value indicates it was released. 689 */ 690 bool done_check_val(FlagType old_loc) { return old_loc == checker; } 691 /*! 692 * @result true if the flag object is not yet released. 693 * Used in __kmp_wait_template like: 694 * @code 695 * while (flag.notdone_check()) { pause(); } 696 * @endcode 697 */ 698 bool notdone_check() { return this->load() != checker; } 699 /*! 700 * @result Actual flag value before release was applied. 701 * Trigger all waiting threads to run by modifying flag to release state. 702 */ 703 void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); } 704 /*! 705 * @result Actual flag value before sleep bit(s) set. 706 * Notes that there is at least one thread sleeping on the flag by setting 707 * sleep bit(s). 708 */ 709 FlagType set_sleeping() { 710 return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE); 711 } 712 /*! 713 * @result Actual flag value before sleep bit(s) cleared. 714 * Notes that there are no longer threads sleeping on the flag by clearing 715 * sleep bit(s). 716 */ 717 FlagType unset_sleeping() { 718 return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE); 719 } 720 /*! 721 * @param old_loc in old value of flag 722 * Test whether there are threads sleeping on the flag's old value in old_loc. 723 */ 724 bool is_sleeping_val(FlagType old_loc) { 725 return old_loc & KMP_BARRIER_SLEEP_STATE; 726 } 727 /*! 728 * Test whether there are threads sleeping on the flag. 729 */ 730 bool is_sleeping() { return is_sleeping_val(this->load()); } 731 bool is_any_sleeping() { return is_sleeping_val(this->load()); } 732 kmp_uint8 *get_stolen() { return NULL; } 733 enum barrier_type get_bt() { return bs_last_barrier; } 734 }; 735 736 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> { 737 public: 738 kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {} 739 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr) 740 : kmp_basic_flag<kmp_uint32>(p, thr) {} 741 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c) 742 : kmp_basic_flag<kmp_uint32>(p, c) {} 743 void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); } 744 void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); } 745 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, 746 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), 747 kmp_int32 is_constrained) { 748 return __kmp_execute_tasks_32( 749 this_thr, gtid, this, final_spin, 750 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 751 } 752 void wait(kmp_info_t *this_thr, 753 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { 754 if (final_spin) 755 __kmp_wait_template<kmp_flag_32, TRUE>( 756 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 757 else 758 __kmp_wait_template<kmp_flag_32, FALSE>( 759 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 760 } 761 void release() { __kmp_release_template(this); } 762 flag_type get_ptr_type() { return flag32; } 763 }; 764 765 class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64> { 766 public: 767 kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {} 768 kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) 769 : kmp_basic_flag_native<kmp_uint64>(p, thr) {} 770 kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) 771 : kmp_basic_flag_native<kmp_uint64>(p, c) {} 772 void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); } 773 void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); } 774 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, 775 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), 776 kmp_int32 is_constrained) { 777 return __kmp_execute_tasks_64( 778 this_thr, gtid, this, final_spin, 779 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 780 } 781 void wait(kmp_info_t *this_thr, 782 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { 783 if (final_spin) 784 __kmp_wait_template<kmp_flag_64, TRUE>( 785 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 786 else 787 __kmp_wait_template<kmp_flag_64, FALSE>( 788 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 789 } 790 bool wait_cancellable_nosleep(kmp_info_t *this_thr, 791 int final_spin 792 USE_ITT_BUILD_ARG(void *itt_sync_obj)) { 793 bool retval = false; 794 if (final_spin) 795 retval = __kmp_wait_template<kmp_flag_64, TRUE, true, false>( 796 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 797 else 798 retval = __kmp_wait_template<kmp_flag_64, FALSE, true, false>( 799 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 800 return retval; 801 } 802 void release() { __kmp_release_template(this); } 803 flag_type get_ptr_type() { return flag64; } 804 }; 805 806 // Hierarchical 64-bit on-core barrier instantiation 807 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> { 808 kmp_uint64 checker; 809 kmp_info_t *waiting_threads[1]; 810 kmp_uint32 num_waiting_threads; 811 kmp_uint32 812 offset; /**< Portion of flag that is of interest for an operation. */ 813 bool flag_switch; /**< Indicates a switch in flag location. */ 814 enum barrier_type bt; /**< Barrier type. */ 815 kmp_info_t *this_thr; /**< Thread that may be redirected to different flag 816 location. */ 817 #if USE_ITT_BUILD 818 void * 819 itt_sync_obj; /**< ITT object that must be passed to new flag location. */ 820 #endif 821 unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) { 822 return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset]; 823 } 824 825 public: 826 kmp_flag_oncore(volatile kmp_uint64 *p) 827 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), 828 flag_switch(false) {} 829 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx) 830 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), 831 offset(idx), flag_switch(false) {} 832 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, 833 enum barrier_type bar_t, 834 kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt)) 835 : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c), 836 num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t), 837 this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {} 838 kmp_info_t *get_waiter(kmp_uint32 i) { 839 KMP_DEBUG_ASSERT(i < num_waiting_threads); 840 return waiting_threads[i]; 841 } 842 kmp_uint32 get_num_waiters() { return num_waiting_threads; } 843 void set_waiter(kmp_info_t *thr) { 844 waiting_threads[0] = thr; 845 num_waiting_threads = 1; 846 } 847 bool done_check_val(kmp_uint64 old_loc) { 848 return byteref(&old_loc, offset) == checker; 849 } 850 bool done_check() { return done_check_val(*get()); } 851 bool notdone_check() { 852 // Calculate flag_switch 853 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG) 854 flag_switch = true; 855 if (byteref(get(), offset) != 1 && !flag_switch) 856 return true; 857 else if (flag_switch) { 858 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING; 859 kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, 860 (kmp_uint64)KMP_BARRIER_STATE_BUMP); 861 __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); 862 } 863 return false; 864 } 865 void internal_release() { 866 // Other threads can write their own bytes simultaneously. 867 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { 868 byteref(get(), offset) = 1; 869 } else { 870 kmp_uint64 mask = 0; 871 byteref(&mask, offset) = 1; 872 KMP_TEST_THEN_OR64(get(), mask); 873 } 874 } 875 kmp_uint64 set_sleeping() { 876 return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE); 877 } 878 kmp_uint64 unset_sleeping() { 879 return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE); 880 } 881 bool is_sleeping_val(kmp_uint64 old_loc) { 882 return old_loc & KMP_BARRIER_SLEEP_STATE; 883 } 884 bool is_sleeping() { return is_sleeping_val(*get()); } 885 bool is_any_sleeping() { return is_sleeping_val(*get()); } 886 void wait(kmp_info_t *this_thr, int final_spin) { 887 if (final_spin) 888 __kmp_wait_template<kmp_flag_oncore, TRUE>( 889 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 890 else 891 __kmp_wait_template<kmp_flag_oncore, FALSE>( 892 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 893 } 894 void release() { __kmp_release_template(this); } 895 void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); } 896 void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); } 897 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, 898 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), 899 kmp_int32 is_constrained) { 900 return __kmp_execute_tasks_oncore( 901 this_thr, gtid, this, final_spin, 902 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 903 } 904 kmp_uint8 *get_stolen() { return NULL; } 905 enum barrier_type get_bt() { return bt; } 906 flag_type get_ptr_type() { return flag_oncore; } 907 }; 908 909 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc 910 // associated with int gtid. 911 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) { 912 if (!flag) 913 return; 914 915 switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) { 916 case flag32: 917 __kmp_resume_32(gtid, NULL); 918 break; 919 case flag64: 920 __kmp_resume_64(gtid, NULL); 921 break; 922 case flag_oncore: 923 __kmp_resume_oncore(gtid, NULL); 924 break; 925 } 926 } 927 928 /*! 929 @} 930 */ 931 932 #endif // KMP_WAIT_RELEASE_H 933