1 /* 2 * kmp_wait_release.h -- Wait/Release implementation 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef KMP_WAIT_RELEASE_H 14 #define KMP_WAIT_RELEASE_H 15 16 #include "kmp.h" 17 #include "kmp_itt.h" 18 #include "kmp_stats.h" 19 #if OMPT_SUPPORT 20 #include "ompt-specific.h" 21 #endif 22 23 /*! 24 @defgroup WAIT_RELEASE Wait/Release operations 25 26 The definitions and functions here implement the lowest level thread 27 synchronizations of suspending a thread and awaking it. They are used to build 28 higher level operations such as barriers and fork/join. 29 */ 30 31 /*! 32 @ingroup WAIT_RELEASE 33 @{ 34 */ 35 36 /*! 37 * The flag_type describes the storage used for the flag. 38 */ 39 enum flag_type { 40 flag32, /**< 32 bit flags */ 41 flag64, /**< 64 bit flags */ 42 flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */ 43 }; 44 45 struct flag_properties { 46 unsigned int type : 16; 47 unsigned int reserved : 16; 48 }; 49 50 /*! 51 * Base class for wait/release volatile flag 52 */ 53 template <typename P> class kmp_flag_native { 54 volatile P *loc; 55 flag_properties t; 56 57 public: 58 typedef P flag_t; 59 kmp_flag_native(volatile P *p, flag_type ft) 60 : loc(p), t({(short unsigned int)ft, 0U}) {} 61 volatile P *get() { return loc; } 62 void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); } 63 void set(volatile P *new_loc) { loc = new_loc; } 64 flag_type get_type() { return (flag_type)(t.type); } 65 P load() { return *loc; } 66 void store(P val) { *loc = val; } 67 }; 68 69 /*! 70 * Base class for wait/release atomic flag 71 */ 72 template <typename P> class kmp_flag { 73 std::atomic<P> 74 *loc; /**< Pointer to the flag storage that is modified by another thread 75 */ 76 flag_properties t; /**< "Type" of the flag in loc */ 77 public: 78 typedef P flag_t; 79 kmp_flag(std::atomic<P> *p, flag_type ft) 80 : loc(p), t({(short unsigned int)ft, 0U}) {} 81 /*! 82 * @result the pointer to the actual flag 83 */ 84 std::atomic<P> *get() { return loc; } 85 /*! 86 * @result void* pointer to the actual flag 87 */ 88 void *get_void_p() { return RCAST(void *, loc); } 89 /*! 90 * @param new_loc in set loc to point at new_loc 91 */ 92 void set(std::atomic<P> *new_loc) { loc = new_loc; } 93 /*! 94 * @result the flag_type 95 */ 96 flag_type get_type() { return (flag_type)(t.type); } 97 /*! 98 * @result flag value 99 */ 100 P load() { return loc->load(std::memory_order_acquire); } 101 /*! 102 * @param val the new flag value to be stored 103 */ 104 void store(P val) { loc->store(val, std::memory_order_release); } 105 // Derived classes must provide the following: 106 /* 107 kmp_info_t * get_waiter(kmp_uint32 i); 108 kmp_uint32 get_num_waiters(); 109 bool done_check(); 110 bool done_check_val(P old_loc); 111 bool notdone_check(); 112 P internal_release(); 113 void suspend(int th_gtid); 114 void mwait(int th_gtid); 115 void resume(int th_gtid); 116 P set_sleeping(); 117 P unset_sleeping(); 118 bool is_sleeping(); 119 bool is_any_sleeping(); 120 bool is_sleeping_val(P old_loc); 121 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, 122 int *thread_finished 123 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 124 is_constrained); 125 */ 126 }; 127 128 #if OMPT_SUPPORT 129 OMPT_NOINLINE 130 static void __ompt_implicit_task_end(kmp_info_t *this_thr, 131 ompt_state_t ompt_state, 132 ompt_data_t *tId) { 133 int ds_tid = this_thr->th.th_info.ds.ds_tid; 134 if (ompt_state == ompt_state_wait_barrier_implicit) { 135 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 136 #if OMPT_OPTIONAL 137 void *codeptr = NULL; 138 if (ompt_enabled.ompt_callback_sync_region_wait) { 139 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( 140 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId, 141 codeptr); 142 } 143 if (ompt_enabled.ompt_callback_sync_region) { 144 ompt_callbacks.ompt_callback(ompt_callback_sync_region)( 145 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId, 146 codeptr); 147 } 148 #endif 149 if (!KMP_MASTER_TID(ds_tid)) { 150 if (ompt_enabled.ompt_callback_implicit_task) { 151 int flags = this_thr->th.ompt_thread_info.parallel_flags; 152 flags = (flags & ompt_parallel_league) ? ompt_task_initial 153 : ompt_task_implicit; 154 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 155 ompt_scope_end, NULL, tId, 0, ds_tid, flags); 156 } 157 // return to idle state 158 this_thr->th.ompt_thread_info.state = ompt_state_idle; 159 } else { 160 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 161 } 162 } 163 } 164 #endif 165 166 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls 167 __kmp_wait_* must make certain that another thread calls __kmp_release 168 to wake it back up to prevent deadlocks! 169 170 NOTE: We may not belong to a team at this point. */ 171 template <class C, bool final_spin, bool Cancellable = false, 172 bool Sleepable = true> 173 static inline bool 174 __kmp_wait_template(kmp_info_t *this_thr, 175 C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) { 176 #if USE_ITT_BUILD && USE_ITT_NOTIFY 177 volatile void *spin = flag->get(); 178 #endif 179 kmp_uint32 spins; 180 int th_gtid; 181 int tasks_completed = FALSE; 182 int oversubscribed; 183 #if !KMP_USE_MONITOR 184 kmp_uint64 poll_count; 185 kmp_uint64 hibernate_goal; 186 #else 187 kmp_uint32 hibernate; 188 #endif 189 190 KMP_FSYNC_SPIN_INIT(spin, NULL); 191 if (flag->done_check()) { 192 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin)); 193 return false; 194 } 195 th_gtid = this_thr->th.th_info.ds.ds_gtid; 196 if (Cancellable) { 197 kmp_team_t *team = this_thr->th.th_team; 198 if (team && team->t.t_cancel_request == cancel_parallel) 199 return true; 200 } 201 #if KMP_OS_UNIX 202 if (final_spin) 203 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true); 204 #endif 205 KA_TRACE(20, 206 ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag)); 207 #if KMP_STATS_ENABLED 208 stats_state_e thread_state = KMP_GET_THREAD_STATE(); 209 #endif 210 211 /* OMPT Behavior: 212 THIS function is called from 213 __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions) 214 these have join / fork behavior 215 216 In these cases, we don't change the state or trigger events in THIS 217 function. 218 Events are triggered in the calling code (__kmp_barrier): 219 220 state := ompt_state_overhead 221 barrier-begin 222 barrier-wait-begin 223 state := ompt_state_wait_barrier 224 call join-barrier-implementation (finally arrive here) 225 {} 226 call fork-barrier-implementation (finally arrive here) 227 {} 228 state := ompt_state_overhead 229 barrier-wait-end 230 barrier-end 231 state := ompt_state_work_parallel 232 233 234 __kmp_fork_barrier (after thread creation, before executing implicit task) 235 call fork-barrier-implementation (finally arrive here) 236 {} // worker arrive here with state = ompt_state_idle 237 238 239 __kmp_join_barrier (implicit barrier at end of parallel region) 240 state := ompt_state_barrier_implicit 241 barrier-begin 242 barrier-wait-begin 243 call join-barrier-implementation (finally arrive here 244 final_spin=FALSE) 245 { 246 } 247 __kmp_fork_barrier (implicit barrier at end of parallel region) 248 call fork-barrier-implementation (finally arrive here final_spin=TRUE) 249 250 Worker after task-team is finished: 251 barrier-wait-end 252 barrier-end 253 implicit-task-end 254 idle-begin 255 state := ompt_state_idle 256 257 Before leaving, if state = ompt_state_idle 258 idle-end 259 state := ompt_state_overhead 260 */ 261 #if OMPT_SUPPORT 262 ompt_state_t ompt_entry_state; 263 ompt_data_t *tId; 264 if (ompt_enabled.enabled) { 265 ompt_entry_state = this_thr->th.ompt_thread_info.state; 266 if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit || 267 KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) { 268 ompt_lw_taskteam_t *team = 269 this_thr->th.th_team->t.ompt_serialized_team_info; 270 if (team) { 271 tId = &(team->ompt_task_info.task_data); 272 } else { 273 tId = OMPT_CUR_TASK_DATA(this_thr); 274 } 275 } else { 276 tId = &(this_thr->th.ompt_thread_info.task_data); 277 } 278 if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec || 279 this_thr->th.th_task_team == NULL)) { 280 // implicit task is done. Either no taskqueue, or task-team finished 281 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId); 282 } 283 } 284 #endif 285 286 KMP_INIT_YIELD(spins); // Setup for waiting 287 288 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || 289 __kmp_pause_status == kmp_soft_paused) { 290 #if KMP_USE_MONITOR 291 // The worker threads cannot rely on the team struct existing at this point. 292 // Use the bt values cached in the thread struct instead. 293 #ifdef KMP_ADJUST_BLOCKTIME 294 if (__kmp_pause_status == kmp_soft_paused || 295 (__kmp_zero_bt && !this_thr->th.th_team_bt_set)) 296 // Force immediate suspend if not set by user and more threads than 297 // available procs 298 hibernate = 0; 299 else 300 hibernate = this_thr->th.th_team_bt_intervals; 301 #else 302 hibernate = this_thr->th.th_team_bt_intervals; 303 #endif /* KMP_ADJUST_BLOCKTIME */ 304 305 /* If the blocktime is nonzero, we want to make sure that we spin wait for 306 the entirety of the specified #intervals, plus up to one interval more. 307 This increment make certain that this thread doesn't go to sleep too 308 soon. */ 309 if (hibernate != 0) 310 hibernate++; 311 312 // Add in the current time value. 313 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value); 314 KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n", 315 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, 316 hibernate - __kmp_global.g.g_time.dt.t_value)); 317 #else 318 if (__kmp_pause_status == kmp_soft_paused) { 319 // Force immediate suspend 320 hibernate_goal = KMP_NOW(); 321 } else 322 hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals; 323 poll_count = 0; 324 #endif // KMP_USE_MONITOR 325 } 326 327 oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc); 328 KMP_MB(); 329 330 // Main wait spin loop 331 while (flag->notdone_check()) { 332 kmp_task_team_t *task_team = NULL; 333 if (__kmp_tasking_mode != tskm_immediate_exec) { 334 task_team = this_thr->th.th_task_team; 335 /* If the thread's task team pointer is NULL, it means one of 3 things: 336 1) A newly-created thread is first being released by 337 __kmp_fork_barrier(), and its task team has not been set up yet. 338 2) All tasks have been executed to completion. 339 3) Tasking is off for this region. This could be because we are in a 340 serialized region (perhaps the outer one), or else tasking was manually 341 disabled (KMP_TASKING=0). */ 342 if (task_team != NULL) { 343 if (TCR_SYNC_4(task_team->tt.tt_active)) { 344 if (KMP_TASKING_ENABLED(task_team)) 345 flag->execute_tasks( 346 this_thr, th_gtid, final_spin, 347 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0); 348 else 349 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 350 } else { 351 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)); 352 #if OMPT_SUPPORT 353 // task-team is done now, other cases should be catched above 354 if (final_spin && ompt_enabled.enabled) 355 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId); 356 #endif 357 this_thr->th.th_task_team = NULL; 358 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 359 } 360 } else { 361 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 362 } // if 363 } // if 364 365 KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin)); 366 if (TCR_4(__kmp_global.g.g_done)) { 367 if (__kmp_global.g.g_abort) 368 __kmp_abort_thread(); 369 break; 370 } 371 372 // If we are oversubscribed, or have waited a bit (and 373 // KMP_LIBRARY=throughput), then yield 374 KMP_YIELD_OVERSUB_ELSE_SPIN(spins); 375 376 #if KMP_STATS_ENABLED 377 // Check if thread has been signalled to idle state 378 // This indicates that the logical "join-barrier" has finished 379 if (this_thr->th.th_stats->isIdle() && 380 KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) { 381 KMP_SET_THREAD_STATE(IDLE); 382 KMP_PUSH_PARTITIONED_TIMER(OMP_idle); 383 } 384 #endif 385 // Check if the barrier surrounding this wait loop has been cancelled 386 if (Cancellable) { 387 kmp_team_t *team = this_thr->th.th_team; 388 if (team && team->t.t_cancel_request == cancel_parallel) 389 break; 390 } 391 392 // For hidden helper thread, if task_team is nullptr, it means the main 393 // thread has not released the barrier. We cannot wait here because once the 394 // main thread releases all children barriers, all hidden helper threads are 395 // still sleeping. This leads to a problem that following configuration, 396 // such as task team sync, will not be performed such that this thread does 397 // not have task team. Usually it is not bad. However, a corner case is, 398 // when the first task encountered is an untied task, the check in 399 // __kmp_task_alloc will crash because it uses the task team pointer without 400 // checking whether it is nullptr. It is probably under some kind of 401 // assumption. 402 if (task_team && KMP_HIDDEN_HELPER_WORKER_THREAD(th_gtid) && 403 !TCR_4(__kmp_hidden_helper_team_done)) { 404 // If there is still hidden helper tasks to be executed, the hidden helper 405 // thread will not enter a waiting status. 406 if (KMP_ATOMIC_LD_ACQ(&__kmp_unexecuted_hidden_helper_tasks) == 0) { 407 __kmp_hidden_helper_worker_thread_wait(); 408 } 409 continue; 410 } 411 412 // Don't suspend if KMP_BLOCKTIME is set to "infinite" 413 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && 414 __kmp_pause_status != kmp_soft_paused) 415 continue; 416 417 // Don't suspend if there is a likelihood of new tasks being spawned. 418 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks)) 419 continue; 420 421 #if KMP_USE_MONITOR 422 // If we have waited a bit more, fall asleep 423 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate) 424 continue; 425 #else 426 if (KMP_BLOCKING(hibernate_goal, poll_count++)) 427 continue; 428 #endif 429 // Don't suspend if wait loop designated non-sleepable 430 // in template parameters 431 if (!Sleepable) 432 continue; 433 434 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && 435 __kmp_pause_status != kmp_soft_paused) 436 continue; 437 438 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT 439 if (__kmp_mwait_enabled || __kmp_umwait_enabled) { 440 KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid)); 441 flag->mwait(th_gtid); 442 } else { 443 #endif 444 KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid)); 445 #if KMP_OS_UNIX 446 if (final_spin) 447 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false); 448 #endif 449 flag->suspend(th_gtid); 450 #if KMP_OS_UNIX 451 if (final_spin) 452 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true); 453 #endif 454 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT 455 } 456 #endif 457 458 if (TCR_4(__kmp_global.g.g_done)) { 459 if (__kmp_global.g.g_abort) 460 __kmp_abort_thread(); 461 break; 462 } else if (__kmp_tasking_mode != tskm_immediate_exec && 463 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) { 464 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; 465 } 466 // TODO: If thread is done with work and times out, disband/free 467 } 468 469 #if OMPT_SUPPORT 470 ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state; 471 if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) { 472 #if OMPT_OPTIONAL 473 if (final_spin) { 474 __ompt_implicit_task_end(this_thr, ompt_exit_state, tId); 475 ompt_exit_state = this_thr->th.ompt_thread_info.state; 476 } 477 #endif 478 if (ompt_exit_state == ompt_state_idle) { 479 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 480 } 481 } 482 #endif 483 #if KMP_STATS_ENABLED 484 // If we were put into idle state, pop that off the state stack 485 if (KMP_GET_THREAD_STATE() == IDLE) { 486 KMP_POP_PARTITIONED_TIMER(); 487 KMP_SET_THREAD_STATE(thread_state); 488 this_thr->th.th_stats->resetIdleFlag(); 489 } 490 #endif 491 492 #if KMP_OS_UNIX 493 if (final_spin) 494 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false); 495 #endif 496 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin)); 497 if (Cancellable) { 498 kmp_team_t *team = this_thr->th.th_team; 499 if (team && team->t.t_cancel_request == cancel_parallel) { 500 if (tasks_completed) { 501 // undo the previous decrement of unfinished_threads so that the 502 // thread can decrement at the join barrier with no problem 503 kmp_task_team_t *task_team = this_thr->th.th_task_team; 504 std::atomic<kmp_int32> *unfinished_threads = 505 &(task_team->tt.tt_unfinished_threads); 506 KMP_ATOMIC_INC(unfinished_threads); 507 } 508 return true; 509 } 510 } 511 return false; 512 } 513 514 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT 515 // Set up a monitor on the flag variable causing the calling thread to wait in 516 // a less active state until the flag variable is modified. 517 template <class C> 518 static inline void __kmp_mwait_template(int th_gtid, C *flag) { 519 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait); 520 kmp_info_t *th = __kmp_threads[th_gtid]; 521 522 KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid, 523 flag->get())); 524 525 // User-level mwait is available 526 KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled); 527 528 __kmp_suspend_initialize_thread(th); 529 __kmp_lock_suspend_mx(th); 530 531 volatile void *spin = flag->get(); 532 void *cacheline = (void *)(kmp_uintptr_t(spin) & ~(CACHE_LINE - 1)); 533 534 if (!flag->done_check()) { 535 // Mark thread as no longer active 536 th->th.th_active = FALSE; 537 if (th->th.th_active_in_pool) { 538 th->th.th_active_in_pool = FALSE; 539 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth); 540 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); 541 } 542 flag->set_sleeping(); 543 KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n", th_gtid)); 544 #if KMP_HAVE_UMWAIT 545 if (__kmp_umwait_enabled) { 546 __kmp_umonitor(cacheline); 547 } 548 #elif KMP_HAVE_MWAIT 549 if (__kmp_mwait_enabled) { 550 __kmp_mm_monitor(cacheline, 0, 0); 551 } 552 #endif 553 // To avoid a race, check flag between 'monitor' and 'mwait'. A write to 554 // the address could happen after the last time we checked and before 555 // monitoring started, in which case monitor can't detect the change. 556 if (flag->done_check()) 557 flag->unset_sleeping(); 558 else { 559 // if flag changes here, wake-up happens immediately 560 TCW_PTR(th->th.th_sleep_loc, (void *)flag); 561 __kmp_unlock_suspend_mx(th); 562 KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid)); 563 #if KMP_HAVE_UMWAIT 564 if (__kmp_umwait_enabled) { 565 __kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter 566 } 567 #elif KMP_HAVE_MWAIT 568 if (__kmp_mwait_enabled) { 569 __kmp_mm_mwait(0, __kmp_mwait_hints); 570 } 571 #endif 572 KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid)); 573 __kmp_lock_suspend_mx(th); 574 // Clean up sleep info; doesn't matter how/why this thread stopped waiting 575 if (flag->is_sleeping()) 576 flag->unset_sleeping(); 577 TCW_PTR(th->th.th_sleep_loc, NULL); 578 } 579 // Mark thread as active again 580 th->th.th_active = TRUE; 581 if (TCR_4(th->th.th_in_pool)) { 582 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth); 583 th->th.th_active_in_pool = TRUE; 584 } 585 } // Drop out to main wait loop to check flag, handle tasks, etc. 586 __kmp_unlock_suspend_mx(th); 587 KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n", th_gtid)); 588 } 589 #endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT 590 591 /* Release any threads specified as waiting on the flag by releasing the flag 592 and resume the waiting thread if indicated by the sleep bit(s). A thread that 593 calls __kmp_wait_template must call this function to wake up the potentially 594 sleeping thread and prevent deadlocks! */ 595 template <class C> static inline void __kmp_release_template(C *flag) { 596 #ifdef KMP_DEBUG 597 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; 598 #endif 599 KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get())); 600 KMP_DEBUG_ASSERT(flag->get()); 601 KMP_FSYNC_RELEASING(flag->get_void_p()); 602 603 flag->internal_release(); 604 605 KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(), 606 flag->load())); 607 608 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 609 // Only need to check sleep stuff if infinite block time not set. 610 // Are *any* threads waiting on flag sleeping? 611 if (flag->is_any_sleeping()) { 612 for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) { 613 // if sleeping waiter exists at i, sets current_waiter to i inside flag 614 kmp_info_t *waiter = flag->get_waiter(i); 615 if (waiter) { 616 int wait_gtid = waiter->th.th_info.ds.ds_gtid; 617 // Wake up thread if needed 618 KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep " 619 "flag(%p) set\n", 620 gtid, wait_gtid, flag->get())); 621 flag->resume(wait_gtid); // unsets flag's current_waiter when done 622 } 623 } 624 } 625 } 626 } 627 628 template <typename FlagType> struct flag_traits {}; 629 630 template <> struct flag_traits<kmp_uint32> { 631 typedef kmp_uint32 flag_t; 632 static const flag_type t = flag32; 633 static inline flag_t tcr(flag_t f) { return TCR_4(f); } 634 static inline flag_t test_then_add4(volatile flag_t *f) { 635 return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f)); 636 } 637 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { 638 return KMP_TEST_THEN_OR32(f, v); 639 } 640 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { 641 return KMP_TEST_THEN_AND32(f, v); 642 } 643 }; 644 645 template <> struct flag_traits<kmp_uint64> { 646 typedef kmp_uint64 flag_t; 647 static const flag_type t = flag64; 648 static inline flag_t tcr(flag_t f) { return TCR_8(f); } 649 static inline flag_t test_then_add4(volatile flag_t *f) { 650 return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f)); 651 } 652 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { 653 return KMP_TEST_THEN_OR64(f, v); 654 } 655 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { 656 return KMP_TEST_THEN_AND64(f, v); 657 } 658 }; 659 660 // Basic flag that does not use C11 Atomics 661 template <typename FlagType, bool Sleepable> 662 class kmp_basic_flag_native : public kmp_flag_native<FlagType> { 663 typedef flag_traits<FlagType> traits_type; 664 FlagType checker; /**< Value to compare flag to to check if flag has been 665 released. */ 666 kmp_info_t 667 *waiting_threads[1]; /**< Array of threads sleeping on this thread. */ 668 kmp_uint32 669 num_waiting_threads; /**< Number of threads sleeping on this thread. */ 670 public: 671 kmp_basic_flag_native(volatile FlagType *p) 672 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {} 673 kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr) 674 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) { 675 waiting_threads[0] = thr; 676 } 677 kmp_basic_flag_native(volatile FlagType *p, FlagType c) 678 : kmp_flag_native<FlagType>(p, traits_type::t), checker(c), 679 num_waiting_threads(0) {} 680 /*! 681 * param i in index into waiting_threads 682 * @result the thread that is waiting at index i 683 */ 684 kmp_info_t *get_waiter(kmp_uint32 i) { 685 KMP_DEBUG_ASSERT(i < num_waiting_threads); 686 return waiting_threads[i]; 687 } 688 /*! 689 * @result num_waiting_threads 690 */ 691 kmp_uint32 get_num_waiters() { return num_waiting_threads; } 692 /*! 693 * @param thr in the thread which is now waiting 694 * 695 * Insert a waiting thread at index 0. 696 */ 697 void set_waiter(kmp_info_t *thr) { 698 waiting_threads[0] = thr; 699 num_waiting_threads = 1; 700 } 701 /*! 702 * @result true if the flag object has been released. 703 */ 704 bool done_check() { 705 if (Sleepable) 706 return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) == 707 checker; 708 else 709 return traits_type::tcr(*(this->get())) == checker; 710 } 711 /*! 712 * @param old_loc in old value of flag 713 * @result true if the flag's old value indicates it was released. 714 */ 715 bool done_check_val(FlagType old_loc) { return old_loc == checker; } 716 /*! 717 * @result true if the flag object is not yet released. 718 * Used in __kmp_wait_template like: 719 * @code 720 * while (flag.notdone_check()) { pause(); } 721 * @endcode 722 */ 723 bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; } 724 /*! 725 * @result Actual flag value before release was applied. 726 * Trigger all waiting threads to run by modifying flag to release state. 727 */ 728 void internal_release() { 729 (void)traits_type::test_then_add4((volatile FlagType *)this->get()); 730 } 731 /*! 732 * @result Actual flag value before sleep bit(s) set. 733 * Notes that there is at least one thread sleeping on the flag by setting 734 * sleep bit(s). 735 */ 736 FlagType set_sleeping() { 737 return traits_type::test_then_or((volatile FlagType *)this->get(), 738 KMP_BARRIER_SLEEP_STATE); 739 } 740 /*! 741 * @result Actual flag value before sleep bit(s) cleared. 742 * Notes that there are no longer threads sleeping on the flag by clearing 743 * sleep bit(s). 744 */ 745 FlagType unset_sleeping() { 746 return traits_type::test_then_and((volatile FlagType *)this->get(), 747 ~KMP_BARRIER_SLEEP_STATE); 748 } 749 /*! 750 * @param old_loc in old value of flag 751 * Test whether there are threads sleeping on the flag's old value in old_loc. 752 */ 753 bool is_sleeping_val(FlagType old_loc) { 754 return old_loc & KMP_BARRIER_SLEEP_STATE; 755 } 756 /*! 757 * Test whether there are threads sleeping on the flag. 758 */ 759 bool is_sleeping() { return is_sleeping_val(*(this->get())); } 760 bool is_any_sleeping() { return is_sleeping_val(*(this->get())); } 761 kmp_uint8 *get_stolen() { return NULL; } 762 enum barrier_type get_bt() { return bs_last_barrier; } 763 }; 764 765 template <typename FlagType, bool Sleepable> 766 class kmp_basic_flag : public kmp_flag<FlagType> { 767 typedef flag_traits<FlagType> traits_type; 768 FlagType checker; /**< Value to compare flag to to check if flag has been 769 released. */ 770 kmp_info_t 771 *waiting_threads[1]; /**< Array of threads sleeping on this thread. */ 772 kmp_uint32 773 num_waiting_threads; /**< Number of threads sleeping on this thread. */ 774 public: 775 kmp_basic_flag(std::atomic<FlagType> *p) 776 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {} 777 kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr) 778 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) { 779 waiting_threads[0] = thr; 780 } 781 kmp_basic_flag(std::atomic<FlagType> *p, FlagType c) 782 : kmp_flag<FlagType>(p, traits_type::t), checker(c), 783 num_waiting_threads(0) {} 784 /*! 785 * param i in index into waiting_threads 786 * @result the thread that is waiting at index i 787 */ 788 kmp_info_t *get_waiter(kmp_uint32 i) { 789 KMP_DEBUG_ASSERT(i < num_waiting_threads); 790 return waiting_threads[i]; 791 } 792 /*! 793 * @result num_waiting_threads 794 */ 795 kmp_uint32 get_num_waiters() { return num_waiting_threads; } 796 /*! 797 * @param thr in the thread which is now waiting 798 * 799 * Insert a waiting thread at index 0. 800 */ 801 void set_waiter(kmp_info_t *thr) { 802 waiting_threads[0] = thr; 803 num_waiting_threads = 1; 804 } 805 /*! 806 * @result true if the flag object has been released. 807 */ 808 bool done_check() { 809 if (Sleepable) 810 return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker; 811 else 812 return this->load() == checker; 813 } 814 /*! 815 * @param old_loc in old value of flag 816 * @result true if the flag's old value indicates it was released. 817 */ 818 bool done_check_val(FlagType old_loc) { return old_loc == checker; } 819 /*! 820 * @result true if the flag object is not yet released. 821 * Used in __kmp_wait_template like: 822 * @code 823 * while (flag.notdone_check()) { pause(); } 824 * @endcode 825 */ 826 bool notdone_check() { return this->load() != checker; } 827 /*! 828 * @result Actual flag value before release was applied. 829 * Trigger all waiting threads to run by modifying flag to release state. 830 */ 831 void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); } 832 /*! 833 * @result Actual flag value before sleep bit(s) set. 834 * Notes that there is at least one thread sleeping on the flag by setting 835 * sleep bit(s). 836 */ 837 FlagType set_sleeping() { 838 return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE); 839 } 840 /*! 841 * @result Actual flag value before sleep bit(s) cleared. 842 * Notes that there are no longer threads sleeping on the flag by clearing 843 * sleep bit(s). 844 */ 845 FlagType unset_sleeping() { 846 return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE); 847 } 848 /*! 849 * @param old_loc in old value of flag 850 * Test whether there are threads sleeping on the flag's old value in old_loc. 851 */ 852 bool is_sleeping_val(FlagType old_loc) { 853 return old_loc & KMP_BARRIER_SLEEP_STATE; 854 } 855 /*! 856 * Test whether there are threads sleeping on the flag. 857 */ 858 bool is_sleeping() { return is_sleeping_val(this->load()); } 859 bool is_any_sleeping() { return is_sleeping_val(this->load()); } 860 kmp_uint8 *get_stolen() { return NULL; } 861 enum barrier_type get_bt() { return bs_last_barrier; } 862 }; 863 864 template <bool Cancellable, bool Sleepable> 865 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32, Sleepable> { 866 public: 867 kmp_flag_32(std::atomic<kmp_uint32> *p) 868 : kmp_basic_flag<kmp_uint32, Sleepable>(p) {} 869 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr) 870 : kmp_basic_flag<kmp_uint32, Sleepable>(p, thr) {} 871 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c) 872 : kmp_basic_flag<kmp_uint32, Sleepable>(p, c) {} 873 void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); } 874 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT 875 void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); } 876 #endif 877 void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); } 878 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, 879 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), 880 kmp_int32 is_constrained) { 881 return __kmp_execute_tasks_32( 882 this_thr, gtid, this, final_spin, 883 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 884 } 885 bool wait(kmp_info_t *this_thr, 886 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { 887 if (final_spin) 888 return __kmp_wait_template<kmp_flag_32, TRUE, Cancellable, Sleepable>( 889 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 890 else 891 return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>( 892 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 893 } 894 void release() { __kmp_release_template(this); } 895 flag_type get_ptr_type() { return flag32; } 896 }; 897 898 template <bool Cancellable, bool Sleepable> 899 class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64, Sleepable> { 900 public: 901 kmp_flag_64(volatile kmp_uint64 *p) 902 : kmp_basic_flag_native<kmp_uint64, Sleepable>(p) {} 903 kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) 904 : kmp_basic_flag_native<kmp_uint64, Sleepable>(p, thr) {} 905 kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) 906 : kmp_basic_flag_native<kmp_uint64, Sleepable>(p, c) {} 907 void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); } 908 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT 909 void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); } 910 #endif 911 void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); } 912 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, 913 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), 914 kmp_int32 is_constrained) { 915 return __kmp_execute_tasks_64( 916 this_thr, gtid, this, final_spin, 917 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 918 } 919 bool wait(kmp_info_t *this_thr, 920 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { 921 if (final_spin) 922 return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>( 923 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 924 else 925 return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>( 926 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 927 } 928 void release() { __kmp_release_template(this); } 929 flag_type get_ptr_type() { return flag64; } 930 }; 931 932 // Hierarchical 64-bit on-core barrier instantiation 933 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> { 934 kmp_uint64 checker; 935 kmp_info_t *waiting_threads[1]; 936 kmp_uint32 num_waiting_threads; 937 kmp_uint32 938 offset; /**< Portion of flag that is of interest for an operation. */ 939 bool flag_switch; /**< Indicates a switch in flag location. */ 940 enum barrier_type bt; /**< Barrier type. */ 941 kmp_info_t *this_thr; /**< Thread that may be redirected to different flag 942 location. */ 943 #if USE_ITT_BUILD 944 void * 945 itt_sync_obj; /**< ITT object that must be passed to new flag location. */ 946 #endif 947 unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) { 948 return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset]; 949 } 950 951 public: 952 kmp_flag_oncore(volatile kmp_uint64 *p) 953 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), 954 flag_switch(false) {} 955 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx) 956 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), 957 offset(idx), flag_switch(false) {} 958 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, 959 enum barrier_type bar_t, 960 kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt)) 961 : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c), 962 num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t), 963 this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {} 964 kmp_info_t *get_waiter(kmp_uint32 i) { 965 KMP_DEBUG_ASSERT(i < num_waiting_threads); 966 return waiting_threads[i]; 967 } 968 kmp_uint32 get_num_waiters() { return num_waiting_threads; } 969 void set_waiter(kmp_info_t *thr) { 970 waiting_threads[0] = thr; 971 num_waiting_threads = 1; 972 } 973 bool done_check_val(kmp_uint64 old_loc) { 974 return byteref(&old_loc, offset) == checker; 975 } 976 bool done_check() { return done_check_val(*get()); } 977 bool notdone_check() { 978 // Calculate flag_switch 979 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG) 980 flag_switch = true; 981 if (byteref(get(), offset) != 1 && !flag_switch) 982 return true; 983 else if (flag_switch) { 984 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING; 985 kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go, 986 (kmp_uint64)KMP_BARRIER_STATE_BUMP); 987 __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); 988 } 989 return false; 990 } 991 void internal_release() { 992 // Other threads can write their own bytes simultaneously. 993 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { 994 byteref(get(), offset) = 1; 995 } else { 996 kmp_uint64 mask = 0; 997 byteref(&mask, offset) = 1; 998 KMP_TEST_THEN_OR64(get(), mask); 999 } 1000 } 1001 kmp_uint64 set_sleeping() { 1002 return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE); 1003 } 1004 kmp_uint64 unset_sleeping() { 1005 return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE); 1006 } 1007 bool is_sleeping_val(kmp_uint64 old_loc) { 1008 return old_loc & KMP_BARRIER_SLEEP_STATE; 1009 } 1010 bool is_sleeping() { return is_sleeping_val(*get()); } 1011 bool is_any_sleeping() { return is_sleeping_val(*get()); } 1012 void wait(kmp_info_t *this_thr, int final_spin) { 1013 if (final_spin) 1014 __kmp_wait_template<kmp_flag_oncore, TRUE>( 1015 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 1016 else 1017 __kmp_wait_template<kmp_flag_oncore, FALSE>( 1018 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 1019 } 1020 void release() { __kmp_release_template(this); } 1021 void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); } 1022 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT 1023 void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, this); } 1024 #endif 1025 void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); } 1026 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, 1027 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), 1028 kmp_int32 is_constrained) { 1029 return __kmp_execute_tasks_oncore( 1030 this_thr, gtid, this, final_spin, 1031 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 1032 } 1033 kmp_uint8 *get_stolen() { return NULL; } 1034 enum barrier_type get_bt() { return bt; } 1035 flag_type get_ptr_type() { return flag_oncore; } 1036 }; 1037 1038 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc 1039 // associated with int gtid. 1040 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) { 1041 if (!flag) 1042 return; 1043 1044 switch (RCAST(kmp_flag_64<> *, CCAST(void *, flag))->get_type()) { 1045 case flag32: 1046 __kmp_resume_32(gtid, (kmp_flag_32<> *)NULL); 1047 break; 1048 case flag64: 1049 __kmp_resume_64(gtid, (kmp_flag_64<> *)NULL); 1050 break; 1051 case flag_oncore: 1052 __kmp_resume_oncore(gtid, (kmp_flag_oncore *)NULL); 1053 break; 1054 } 1055 } 1056 1057 /*! 1058 @} 1059 */ 1060 1061 #endif // KMP_WAIT_RELEASE_H 1062