1 /* 2 * kmp_wait_release.h -- Wait/Release implementation 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef KMP_WAIT_RELEASE_H 14 #define KMP_WAIT_RELEASE_H 15 16 #include "kmp.h" 17 #include "kmp_itt.h" 18 #include "kmp_stats.h" 19 #if OMPT_SUPPORT 20 #include "ompt-specific.h" 21 #endif 22 23 /*! 24 @defgroup WAIT_RELEASE Wait/Release operations 25 26 The definitions and functions here implement the lowest level thread 27 synchronizations of suspending a thread and awaking it. They are used to build 28 higher level operations such as barriers and fork/join. 29 */ 30 31 /*! 32 @ingroup WAIT_RELEASE 33 @{ 34 */ 35 36 /*! 37 * The flag_type describes the storage used for the flag. 38 */ 39 enum flag_type { 40 flag32, /**< 32 bit flags */ 41 flag64, /**< 64 bit flags */ 42 flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */ 43 }; 44 45 struct flag_properties { 46 unsigned int type : 16; 47 unsigned int reserved : 16; 48 }; 49 50 /*! 51 * Base class for wait/release volatile flag 52 */ 53 template <typename P> class kmp_flag_native { 54 volatile P *loc; 55 flag_properties t; 56 57 public: 58 typedef P flag_t; 59 kmp_flag_native(volatile P *p, flag_type ft) 60 : loc(p), t({(short unsigned int)ft, 0U}) {} 61 volatile P *get() { return loc; } 62 void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); } 63 void set(volatile P *new_loc) { loc = new_loc; } 64 flag_type get_type() { return (flag_type)(t.type); } 65 P load() { return *loc; } 66 void store(P val) { *loc = val; } 67 }; 68 69 /*! 70 * Base class for wait/release atomic flag 71 */ 72 template <typename P> class kmp_flag { 73 std::atomic<P> 74 *loc; /**< Pointer to the flag storage that is modified by another thread 75 */ 76 flag_properties t; /**< "Type" of the flag in loc */ 77 public: 78 typedef P flag_t; 79 kmp_flag(std::atomic<P> *p, flag_type ft) 80 : loc(p), t({(short unsigned int)ft, 0U}) {} 81 /*! 82 * @result the pointer to the actual flag 83 */ 84 std::atomic<P> *get() { return loc; } 85 /*! 86 * @result void* pointer to the actual flag 87 */ 88 void *get_void_p() { return RCAST(void *, loc); } 89 /*! 90 * @param new_loc in set loc to point at new_loc 91 */ 92 void set(std::atomic<P> *new_loc) { loc = new_loc; } 93 /*! 94 * @result the flag_type 95 */ 96 flag_type get_type() { return (flag_type)(t.type); } 97 /*! 98 * @result flag value 99 */ 100 P load() { return loc->load(std::memory_order_acquire); } 101 /*! 102 * @param val the new flag value to be stored 103 */ 104 void store(P val) { loc->store(val, std::memory_order_release); } 105 // Derived classes must provide the following: 106 /* 107 kmp_info_t * get_waiter(kmp_uint32 i); 108 kmp_uint32 get_num_waiters(); 109 bool done_check(); 110 bool done_check_val(P old_loc); 111 bool notdone_check(); 112 P internal_release(); 113 void suspend(int th_gtid); 114 void mwait(int th_gtid); 115 void resume(int th_gtid); 116 P set_sleeping(); 117 P unset_sleeping(); 118 bool is_sleeping(); 119 bool is_any_sleeping(); 120 bool is_sleeping_val(P old_loc); 121 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, 122 int *thread_finished 123 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 124 is_constrained); 125 */ 126 }; 127 128 #if OMPT_SUPPORT 129 OMPT_NOINLINE 130 static void __ompt_implicit_task_end(kmp_info_t *this_thr, 131 ompt_state_t ompt_state, 132 ompt_data_t *tId) { 133 int ds_tid = this_thr->th.th_info.ds.ds_tid; 134 if (ompt_state == ompt_state_wait_barrier_implicit) { 135 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 136 #if OMPT_OPTIONAL 137 void *codeptr = NULL; 138 if (ompt_enabled.ompt_callback_sync_region_wait) { 139 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( 140 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId, 141 codeptr); 142 } 143 if (ompt_enabled.ompt_callback_sync_region) { 144 ompt_callbacks.ompt_callback(ompt_callback_sync_region)( 145 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId, 146 codeptr); 147 } 148 #endif 149 if (!KMP_MASTER_TID(ds_tid)) { 150 if (ompt_enabled.ompt_callback_implicit_task) { 151 int flags = this_thr->th.ompt_thread_info.parallel_flags; 152 flags = (flags & ompt_parallel_league) ? ompt_task_initial 153 : ompt_task_implicit; 154 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 155 ompt_scope_end, NULL, tId, 0, ds_tid, flags); 156 } 157 // return to idle state 158 this_thr->th.ompt_thread_info.state = ompt_state_idle; 159 } else { 160 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 161 } 162 } 163 } 164 #endif 165 166 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls 167 __kmp_wait_* must make certain that another thread calls __kmp_release 168 to wake it back up to prevent deadlocks! 169 170 NOTE: We may not belong to a team at this point. */ 171 template <class C, bool final_spin, bool Cancellable = false, 172 bool Sleepable = true> 173 static inline bool 174 __kmp_wait_template(kmp_info_t *this_thr, 175 C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) { 176 #if USE_ITT_BUILD && USE_ITT_NOTIFY 177 volatile void *spin = flag->get(); 178 #endif 179 kmp_uint32 spins; 180 int th_gtid; 181 int tasks_completed = FALSE; 182 #if !KMP_USE_MONITOR 183 kmp_uint64 poll_count; 184 kmp_uint64 hibernate_goal; 185 #else 186 kmp_uint32 hibernate; 187 #endif 188 189 KMP_FSYNC_SPIN_INIT(spin, NULL); 190 if (flag->done_check()) { 191 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin)); 192 return false; 193 } 194 th_gtid = this_thr->th.th_info.ds.ds_gtid; 195 if (Cancellable) { 196 kmp_team_t *team = this_thr->th.th_team; 197 if (team && team->t.t_cancel_request == cancel_parallel) 198 return true; 199 } 200 #if KMP_OS_UNIX 201 if (final_spin) 202 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true); 203 #endif 204 KA_TRACE(20, 205 ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag)); 206 #if KMP_STATS_ENABLED 207 stats_state_e thread_state = KMP_GET_THREAD_STATE(); 208 #endif 209 210 /* OMPT Behavior: 211 THIS function is called from 212 __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions) 213 these have join / fork behavior 214 215 In these cases, we don't change the state or trigger events in THIS 216 function. 217 Events are triggered in the calling code (__kmp_barrier): 218 219 state := ompt_state_overhead 220 barrier-begin 221 barrier-wait-begin 222 state := ompt_state_wait_barrier 223 call join-barrier-implementation (finally arrive here) 224 {} 225 call fork-barrier-implementation (finally arrive here) 226 {} 227 state := ompt_state_overhead 228 barrier-wait-end 229 barrier-end 230 state := ompt_state_work_parallel 231 232 233 __kmp_fork_barrier (after thread creation, before executing implicit task) 234 call fork-barrier-implementation (finally arrive here) 235 {} // worker arrive here with state = ompt_state_idle 236 237 238 __kmp_join_barrier (implicit barrier at end of parallel region) 239 state := ompt_state_barrier_implicit 240 barrier-begin 241 barrier-wait-begin 242 call join-barrier-implementation (finally arrive here 243 final_spin=FALSE) 244 { 245 } 246 __kmp_fork_barrier (implicit barrier at end of parallel region) 247 call fork-barrier-implementation (finally arrive here final_spin=TRUE) 248 249 Worker after task-team is finished: 250 barrier-wait-end 251 barrier-end 252 implicit-task-end 253 idle-begin 254 state := ompt_state_idle 255 256 Before leaving, if state = ompt_state_idle 257 idle-end 258 state := ompt_state_overhead 259 */ 260 #if OMPT_SUPPORT 261 ompt_state_t ompt_entry_state; 262 ompt_data_t *tId; 263 if (ompt_enabled.enabled) { 264 ompt_entry_state = this_thr->th.ompt_thread_info.state; 265 if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit || 266 KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) { 267 ompt_lw_taskteam_t *team = 268 this_thr->th.th_team->t.ompt_serialized_team_info; 269 if (team) { 270 tId = &(team->ompt_task_info.task_data); 271 } else { 272 tId = OMPT_CUR_TASK_DATA(this_thr); 273 } 274 } else { 275 tId = &(this_thr->th.ompt_thread_info.task_data); 276 } 277 if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec || 278 this_thr->th.th_task_team == NULL)) { 279 // implicit task is done. Either no taskqueue, or task-team finished 280 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId); 281 } 282 } 283 #endif 284 285 KMP_INIT_YIELD(spins); // Setup for waiting 286 287 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || 288 __kmp_pause_status == kmp_soft_paused) { 289 #if KMP_USE_MONITOR 290 // The worker threads cannot rely on the team struct existing at this point. 291 // Use the bt values cached in the thread struct instead. 292 #ifdef KMP_ADJUST_BLOCKTIME 293 if (__kmp_pause_status == kmp_soft_paused || 294 (__kmp_zero_bt && !this_thr->th.th_team_bt_set)) 295 // Force immediate suspend if not set by user and more threads than 296 // available procs 297 hibernate = 0; 298 else 299 hibernate = this_thr->th.th_team_bt_intervals; 300 #else 301 hibernate = this_thr->th.th_team_bt_intervals; 302 #endif /* KMP_ADJUST_BLOCKTIME */ 303 304 /* If the blocktime is nonzero, we want to make sure that we spin wait for 305 the entirety of the specified #intervals, plus up to one interval more. 306 This increment make certain that this thread doesn't go to sleep too 307 soon. */ 308 if (hibernate != 0) 309 hibernate++; 310 311 // Add in the current time value. 312 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value); 313 KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n", 314 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, 315 hibernate - __kmp_global.g.g_time.dt.t_value)); 316 #else 317 if (__kmp_pause_status == kmp_soft_paused) { 318 // Force immediate suspend 319 hibernate_goal = KMP_NOW(); 320 } else 321 hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals; 322 poll_count = 0; 323 (void)poll_count; 324 #endif // KMP_USE_MONITOR 325 } 326 327 KMP_MB(); 328 329 // Main wait spin loop 330 while (flag->notdone_check()) { 331 kmp_task_team_t *task_team = NULL; 332 if (__kmp_tasking_mode != tskm_immediate_exec) { 333 task_team = this_thr->th.th_task_team; 334 /* If the thread's task team pointer is NULL, it means one of 3 things: 335 1) A newly-created thread is first being released by 336 __kmp_fork_barrier(), and its task team has not been set up yet. 337 2) All tasks have been executed to completion. 338 3) Tasking is off for this region. This could be because we are in a 339 serialized region (perhaps the outer one), or else tasking was manually 340 disabled (KMP_TASKING=0). */ 341 if (task_team != NULL) { 342 if (TCR_SYNC_4(task_team->tt.tt_active)) { 343 if (KMP_TASKING_ENABLED(task_team)) 344 flag->execute_tasks( 345 this_thr, th_gtid, final_spin, 346 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0); 347 else 348 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 349 } else { 350 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)); 351 #if OMPT_SUPPORT 352 // task-team is done now, other cases should be catched above 353 if (final_spin && ompt_enabled.enabled) 354 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId); 355 #endif 356 this_thr->th.th_task_team = NULL; 357 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 358 } 359 } else { 360 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 361 } // if 362 } // if 363 364 KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin)); 365 if (TCR_4(__kmp_global.g.g_done)) { 366 if (__kmp_global.g.g_abort) 367 __kmp_abort_thread(); 368 break; 369 } 370 371 // If we are oversubscribed, or have waited a bit (and 372 // KMP_LIBRARY=throughput), then yield 373 KMP_YIELD_OVERSUB_ELSE_SPIN(spins); 374 375 #if KMP_STATS_ENABLED 376 // Check if thread has been signalled to idle state 377 // This indicates that the logical "join-barrier" has finished 378 if (this_thr->th.th_stats->isIdle() && 379 KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) { 380 KMP_SET_THREAD_STATE(IDLE); 381 KMP_PUSH_PARTITIONED_TIMER(OMP_idle); 382 } 383 #endif 384 // Check if the barrier surrounding this wait loop has been cancelled 385 if (Cancellable) { 386 kmp_team_t *team = this_thr->th.th_team; 387 if (team && team->t.t_cancel_request == cancel_parallel) 388 break; 389 } 390 391 // For hidden helper thread, if task_team is nullptr, it means the main 392 // thread has not released the barrier. We cannot wait here because once the 393 // main thread releases all children barriers, all hidden helper threads are 394 // still sleeping. This leads to a problem that following configuration, 395 // such as task team sync, will not be performed such that this thread does 396 // not have task team. Usually it is not bad. However, a corner case is, 397 // when the first task encountered is an untied task, the check in 398 // __kmp_task_alloc will crash because it uses the task team pointer without 399 // checking whether it is nullptr. It is probably under some kind of 400 // assumption. 401 if (task_team && KMP_HIDDEN_HELPER_WORKER_THREAD(th_gtid) && 402 !TCR_4(__kmp_hidden_helper_team_done)) { 403 // If there is still hidden helper tasks to be executed, the hidden helper 404 // thread will not enter a waiting status. 405 if (KMP_ATOMIC_LD_ACQ(&__kmp_unexecuted_hidden_helper_tasks) == 0) { 406 __kmp_hidden_helper_worker_thread_wait(); 407 } 408 continue; 409 } 410 411 // Don't suspend if KMP_BLOCKTIME is set to "infinite" 412 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && 413 __kmp_pause_status != kmp_soft_paused) 414 continue; 415 416 // Don't suspend if there is a likelihood of new tasks being spawned. 417 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks)) 418 continue; 419 420 #if KMP_USE_MONITOR 421 // If we have waited a bit more, fall asleep 422 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate) 423 continue; 424 #else 425 if (KMP_BLOCKING(hibernate_goal, poll_count++)) 426 continue; 427 #endif 428 // Don't suspend if wait loop designated non-sleepable 429 // in template parameters 430 if (!Sleepable) 431 continue; 432 433 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && 434 __kmp_pause_status != kmp_soft_paused) 435 continue; 436 437 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT 438 if (__kmp_mwait_enabled || __kmp_umwait_enabled) { 439 KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid)); 440 flag->mwait(th_gtid); 441 } else { 442 #endif 443 KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid)); 444 #if KMP_OS_UNIX 445 if (final_spin) 446 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false); 447 #endif 448 flag->suspend(th_gtid); 449 #if KMP_OS_UNIX 450 if (final_spin) 451 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true); 452 #endif 453 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT 454 } 455 #endif 456 457 if (TCR_4(__kmp_global.g.g_done)) { 458 if (__kmp_global.g.g_abort) 459 __kmp_abort_thread(); 460 break; 461 } else if (__kmp_tasking_mode != tskm_immediate_exec && 462 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) { 463 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; 464 } 465 // TODO: If thread is done with work and times out, disband/free 466 } 467 468 #if OMPT_SUPPORT 469 ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state; 470 if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) { 471 #if OMPT_OPTIONAL 472 if (final_spin) { 473 __ompt_implicit_task_end(this_thr, ompt_exit_state, tId); 474 ompt_exit_state = this_thr->th.ompt_thread_info.state; 475 } 476 #endif 477 if (ompt_exit_state == ompt_state_idle) { 478 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 479 } 480 } 481 #endif 482 #if KMP_STATS_ENABLED 483 // If we were put into idle state, pop that off the state stack 484 if (KMP_GET_THREAD_STATE() == IDLE) { 485 KMP_POP_PARTITIONED_TIMER(); 486 KMP_SET_THREAD_STATE(thread_state); 487 this_thr->th.th_stats->resetIdleFlag(); 488 } 489 #endif 490 491 #if KMP_OS_UNIX 492 if (final_spin) 493 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false); 494 #endif 495 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin)); 496 if (Cancellable) { 497 kmp_team_t *team = this_thr->th.th_team; 498 if (team && team->t.t_cancel_request == cancel_parallel) { 499 if (tasks_completed) { 500 // undo the previous decrement of unfinished_threads so that the 501 // thread can decrement at the join barrier with no problem 502 kmp_task_team_t *task_team = this_thr->th.th_task_team; 503 std::atomic<kmp_int32> *unfinished_threads = 504 &(task_team->tt.tt_unfinished_threads); 505 KMP_ATOMIC_INC(unfinished_threads); 506 } 507 return true; 508 } 509 } 510 return false; 511 } 512 513 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT 514 // Set up a monitor on the flag variable causing the calling thread to wait in 515 // a less active state until the flag variable is modified. 516 template <class C> 517 static inline void __kmp_mwait_template(int th_gtid, C *flag) { 518 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait); 519 kmp_info_t *th = __kmp_threads[th_gtid]; 520 521 KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid, 522 flag->get())); 523 524 // User-level mwait is available 525 KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled); 526 527 __kmp_suspend_initialize_thread(th); 528 __kmp_lock_suspend_mx(th); 529 530 volatile void *spin = flag->get(); 531 void *cacheline = (void *)(kmp_uintptr_t(spin) & ~(CACHE_LINE - 1)); 532 533 if (!flag->done_check()) { 534 // Mark thread as no longer active 535 th->th.th_active = FALSE; 536 if (th->th.th_active_in_pool) { 537 th->th.th_active_in_pool = FALSE; 538 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth); 539 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); 540 } 541 flag->set_sleeping(); 542 KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n", th_gtid)); 543 #if KMP_HAVE_UMWAIT 544 if (__kmp_umwait_enabled) { 545 __kmp_umonitor(cacheline); 546 } 547 #elif KMP_HAVE_MWAIT 548 if (__kmp_mwait_enabled) { 549 __kmp_mm_monitor(cacheline, 0, 0); 550 } 551 #endif 552 // To avoid a race, check flag between 'monitor' and 'mwait'. A write to 553 // the address could happen after the last time we checked and before 554 // monitoring started, in which case monitor can't detect the change. 555 if (flag->done_check()) 556 flag->unset_sleeping(); 557 else { 558 // if flag changes here, wake-up happens immediately 559 TCW_PTR(th->th.th_sleep_loc, (void *)flag); 560 __kmp_unlock_suspend_mx(th); 561 KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid)); 562 #if KMP_HAVE_UMWAIT 563 if (__kmp_umwait_enabled) { 564 __kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter 565 } 566 #elif KMP_HAVE_MWAIT 567 if (__kmp_mwait_enabled) { 568 __kmp_mm_mwait(0, __kmp_mwait_hints); 569 } 570 #endif 571 KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid)); 572 __kmp_lock_suspend_mx(th); 573 // Clean up sleep info; doesn't matter how/why this thread stopped waiting 574 if (flag->is_sleeping()) 575 flag->unset_sleeping(); 576 TCW_PTR(th->th.th_sleep_loc, NULL); 577 } 578 // Mark thread as active again 579 th->th.th_active = TRUE; 580 if (TCR_4(th->th.th_in_pool)) { 581 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth); 582 th->th.th_active_in_pool = TRUE; 583 } 584 } // Drop out to main wait loop to check flag, handle tasks, etc. 585 __kmp_unlock_suspend_mx(th); 586 KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n", th_gtid)); 587 } 588 #endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT 589 590 /* Release any threads specified as waiting on the flag by releasing the flag 591 and resume the waiting thread if indicated by the sleep bit(s). A thread that 592 calls __kmp_wait_template must call this function to wake up the potentially 593 sleeping thread and prevent deadlocks! */ 594 template <class C> static inline void __kmp_release_template(C *flag) { 595 #ifdef KMP_DEBUG 596 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; 597 #endif 598 KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get())); 599 KMP_DEBUG_ASSERT(flag->get()); 600 KMP_FSYNC_RELEASING(flag->get_void_p()); 601 602 flag->internal_release(); 603 604 KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(), 605 flag->load())); 606 607 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 608 // Only need to check sleep stuff if infinite block time not set. 609 // Are *any* threads waiting on flag sleeping? 610 if (flag->is_any_sleeping()) { 611 for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) { 612 // if sleeping waiter exists at i, sets current_waiter to i inside flag 613 kmp_info_t *waiter = flag->get_waiter(i); 614 if (waiter) { 615 int wait_gtid = waiter->th.th_info.ds.ds_gtid; 616 // Wake up thread if needed 617 KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep " 618 "flag(%p) set\n", 619 gtid, wait_gtid, flag->get())); 620 flag->resume(wait_gtid); // unsets flag's current_waiter when done 621 } 622 } 623 } 624 } 625 } 626 627 template <typename FlagType> struct flag_traits {}; 628 629 template <> struct flag_traits<kmp_uint32> { 630 typedef kmp_uint32 flag_t; 631 static const flag_type t = flag32; 632 static inline flag_t tcr(flag_t f) { return TCR_4(f); } 633 static inline flag_t test_then_add4(volatile flag_t *f) { 634 return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f)); 635 } 636 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { 637 return KMP_TEST_THEN_OR32(f, v); 638 } 639 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { 640 return KMP_TEST_THEN_AND32(f, v); 641 } 642 }; 643 644 template <> struct flag_traits<kmp_uint64> { 645 typedef kmp_uint64 flag_t; 646 static const flag_type t = flag64; 647 static inline flag_t tcr(flag_t f) { return TCR_8(f); } 648 static inline flag_t test_then_add4(volatile flag_t *f) { 649 return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f)); 650 } 651 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { 652 return KMP_TEST_THEN_OR64(f, v); 653 } 654 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { 655 return KMP_TEST_THEN_AND64(f, v); 656 } 657 }; 658 659 // Basic flag that does not use C11 Atomics 660 template <typename FlagType, bool Sleepable> 661 class kmp_basic_flag_native : public kmp_flag_native<FlagType> { 662 typedef flag_traits<FlagType> traits_type; 663 FlagType checker; /**< Value to compare flag to to check if flag has been 664 released. */ 665 kmp_info_t 666 *waiting_threads[1]; /**< Array of threads sleeping on this thread. */ 667 kmp_uint32 668 num_waiting_threads; /**< Number of threads sleeping on this thread. */ 669 public: 670 kmp_basic_flag_native(volatile FlagType *p) 671 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {} 672 kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr) 673 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) { 674 waiting_threads[0] = thr; 675 } 676 kmp_basic_flag_native(volatile FlagType *p, FlagType c) 677 : kmp_flag_native<FlagType>(p, traits_type::t), checker(c), 678 num_waiting_threads(0) {} 679 /*! 680 * param i in index into waiting_threads 681 * @result the thread that is waiting at index i 682 */ 683 kmp_info_t *get_waiter(kmp_uint32 i) { 684 KMP_DEBUG_ASSERT(i < num_waiting_threads); 685 return waiting_threads[i]; 686 } 687 /*! 688 * @result num_waiting_threads 689 */ 690 kmp_uint32 get_num_waiters() { return num_waiting_threads; } 691 /*! 692 * @param thr in the thread which is now waiting 693 * 694 * Insert a waiting thread at index 0. 695 */ 696 void set_waiter(kmp_info_t *thr) { 697 waiting_threads[0] = thr; 698 num_waiting_threads = 1; 699 } 700 /*! 701 * @result true if the flag object has been released. 702 */ 703 bool done_check() { 704 if (Sleepable) 705 return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) == 706 checker; 707 else 708 return traits_type::tcr(*(this->get())) == checker; 709 } 710 /*! 711 * @param old_loc in old value of flag 712 * @result true if the flag's old value indicates it was released. 713 */ 714 bool done_check_val(FlagType old_loc) { return old_loc == checker; } 715 /*! 716 * @result true if the flag object is not yet released. 717 * Used in __kmp_wait_template like: 718 * @code 719 * while (flag.notdone_check()) { pause(); } 720 * @endcode 721 */ 722 bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; } 723 /*! 724 * @result Actual flag value before release was applied. 725 * Trigger all waiting threads to run by modifying flag to release state. 726 */ 727 void internal_release() { 728 (void)traits_type::test_then_add4((volatile FlagType *)this->get()); 729 } 730 /*! 731 * @result Actual flag value before sleep bit(s) set. 732 * Notes that there is at least one thread sleeping on the flag by setting 733 * sleep bit(s). 734 */ 735 FlagType set_sleeping() { 736 return traits_type::test_then_or((volatile FlagType *)this->get(), 737 KMP_BARRIER_SLEEP_STATE); 738 } 739 /*! 740 * @result Actual flag value before sleep bit(s) cleared. 741 * Notes that there are no longer threads sleeping on the flag by clearing 742 * sleep bit(s). 743 */ 744 FlagType unset_sleeping() { 745 return traits_type::test_then_and((volatile FlagType *)this->get(), 746 ~KMP_BARRIER_SLEEP_STATE); 747 } 748 /*! 749 * @param old_loc in old value of flag 750 * Test whether there are threads sleeping on the flag's old value in old_loc. 751 */ 752 bool is_sleeping_val(FlagType old_loc) { 753 return old_loc & KMP_BARRIER_SLEEP_STATE; 754 } 755 /*! 756 * Test whether there are threads sleeping on the flag. 757 */ 758 bool is_sleeping() { return is_sleeping_val(*(this->get())); } 759 bool is_any_sleeping() { return is_sleeping_val(*(this->get())); } 760 kmp_uint8 *get_stolen() { return NULL; } 761 enum barrier_type get_bt() { return bs_last_barrier; } 762 }; 763 764 template <typename FlagType, bool Sleepable> 765 class kmp_basic_flag : public kmp_flag<FlagType> { 766 typedef flag_traits<FlagType> traits_type; 767 FlagType checker; /**< Value to compare flag to to check if flag has been 768 released. */ 769 kmp_info_t 770 *waiting_threads[1]; /**< Array of threads sleeping on this thread. */ 771 kmp_uint32 772 num_waiting_threads; /**< Number of threads sleeping on this thread. */ 773 public: 774 kmp_basic_flag(std::atomic<FlagType> *p) 775 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {} 776 kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr) 777 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) { 778 waiting_threads[0] = thr; 779 } 780 kmp_basic_flag(std::atomic<FlagType> *p, FlagType c) 781 : kmp_flag<FlagType>(p, traits_type::t), checker(c), 782 num_waiting_threads(0) {} 783 /*! 784 * param i in index into waiting_threads 785 * @result the thread that is waiting at index i 786 */ 787 kmp_info_t *get_waiter(kmp_uint32 i) { 788 KMP_DEBUG_ASSERT(i < num_waiting_threads); 789 return waiting_threads[i]; 790 } 791 /*! 792 * @result num_waiting_threads 793 */ 794 kmp_uint32 get_num_waiters() { return num_waiting_threads; } 795 /*! 796 * @param thr in the thread which is now waiting 797 * 798 * Insert a waiting thread at index 0. 799 */ 800 void set_waiter(kmp_info_t *thr) { 801 waiting_threads[0] = thr; 802 num_waiting_threads = 1; 803 } 804 /*! 805 * @result true if the flag object has been released. 806 */ 807 bool done_check() { 808 if (Sleepable) 809 return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker; 810 else 811 return this->load() == checker; 812 } 813 /*! 814 * @param old_loc in old value of flag 815 * @result true if the flag's old value indicates it was released. 816 */ 817 bool done_check_val(FlagType old_loc) { return old_loc == checker; } 818 /*! 819 * @result true if the flag object is not yet released. 820 * Used in __kmp_wait_template like: 821 * @code 822 * while (flag.notdone_check()) { pause(); } 823 * @endcode 824 */ 825 bool notdone_check() { return this->load() != checker; } 826 /*! 827 * @result Actual flag value before release was applied. 828 * Trigger all waiting threads to run by modifying flag to release state. 829 */ 830 void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); } 831 /*! 832 * @result Actual flag value before sleep bit(s) set. 833 * Notes that there is at least one thread sleeping on the flag by setting 834 * sleep bit(s). 835 */ 836 FlagType set_sleeping() { 837 return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE); 838 } 839 /*! 840 * @result Actual flag value before sleep bit(s) cleared. 841 * Notes that there are no longer threads sleeping on the flag by clearing 842 * sleep bit(s). 843 */ 844 FlagType unset_sleeping() { 845 return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE); 846 } 847 /*! 848 * @param old_loc in old value of flag 849 * Test whether there are threads sleeping on the flag's old value in old_loc. 850 */ 851 bool is_sleeping_val(FlagType old_loc) { 852 return old_loc & KMP_BARRIER_SLEEP_STATE; 853 } 854 /*! 855 * Test whether there are threads sleeping on the flag. 856 */ 857 bool is_sleeping() { return is_sleeping_val(this->load()); } 858 bool is_any_sleeping() { return is_sleeping_val(this->load()); } 859 kmp_uint8 *get_stolen() { return NULL; } 860 enum barrier_type get_bt() { return bs_last_barrier; } 861 }; 862 863 template <bool Cancellable, bool Sleepable> 864 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32, Sleepable> { 865 public: 866 kmp_flag_32(std::atomic<kmp_uint32> *p) 867 : kmp_basic_flag<kmp_uint32, Sleepable>(p) {} 868 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr) 869 : kmp_basic_flag<kmp_uint32, Sleepable>(p, thr) {} 870 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c) 871 : kmp_basic_flag<kmp_uint32, Sleepable>(p, c) {} 872 void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); } 873 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT 874 void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); } 875 #endif 876 void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); } 877 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, 878 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), 879 kmp_int32 is_constrained) { 880 return __kmp_execute_tasks_32( 881 this_thr, gtid, this, final_spin, 882 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 883 } 884 bool wait(kmp_info_t *this_thr, 885 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { 886 if (final_spin) 887 return __kmp_wait_template<kmp_flag_32, TRUE, Cancellable, Sleepable>( 888 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 889 else 890 return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>( 891 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 892 } 893 void release() { __kmp_release_template(this); } 894 flag_type get_ptr_type() { return flag32; } 895 }; 896 897 template <bool Cancellable, bool Sleepable> 898 class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64, Sleepable> { 899 public: 900 kmp_flag_64(volatile kmp_uint64 *p) 901 : kmp_basic_flag_native<kmp_uint64, Sleepable>(p) {} 902 kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) 903 : kmp_basic_flag_native<kmp_uint64, Sleepable>(p, thr) {} 904 kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) 905 : kmp_basic_flag_native<kmp_uint64, Sleepable>(p, c) {} 906 void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); } 907 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT 908 void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); } 909 #endif 910 void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); } 911 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, 912 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), 913 kmp_int32 is_constrained) { 914 return __kmp_execute_tasks_64( 915 this_thr, gtid, this, final_spin, 916 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 917 } 918 bool wait(kmp_info_t *this_thr, 919 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { 920 if (final_spin) 921 return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>( 922 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 923 else 924 return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>( 925 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 926 } 927 void release() { __kmp_release_template(this); } 928 flag_type get_ptr_type() { return flag64; } 929 }; 930 931 // Hierarchical 64-bit on-core barrier instantiation 932 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> { 933 kmp_uint64 checker; 934 kmp_info_t *waiting_threads[1]; 935 kmp_uint32 num_waiting_threads; 936 kmp_uint32 937 offset; /**< Portion of flag that is of interest for an operation. */ 938 bool flag_switch; /**< Indicates a switch in flag location. */ 939 enum barrier_type bt; /**< Barrier type. */ 940 kmp_info_t *this_thr; /**< Thread that may be redirected to different flag 941 location. */ 942 #if USE_ITT_BUILD 943 void * 944 itt_sync_obj; /**< ITT object that must be passed to new flag location. */ 945 #endif 946 unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) { 947 return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset]; 948 } 949 950 public: 951 kmp_flag_oncore(volatile kmp_uint64 *p) 952 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), 953 flag_switch(false) {} 954 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx) 955 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), 956 offset(idx), flag_switch(false) {} 957 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, 958 enum barrier_type bar_t, 959 kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt)) 960 : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c), 961 num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t), 962 this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {} 963 kmp_info_t *get_waiter(kmp_uint32 i) { 964 KMP_DEBUG_ASSERT(i < num_waiting_threads); 965 return waiting_threads[i]; 966 } 967 kmp_uint32 get_num_waiters() { return num_waiting_threads; } 968 void set_waiter(kmp_info_t *thr) { 969 waiting_threads[0] = thr; 970 num_waiting_threads = 1; 971 } 972 bool done_check_val(kmp_uint64 old_loc) { 973 return byteref(&old_loc, offset) == checker; 974 } 975 bool done_check() { return done_check_val(*get()); } 976 bool notdone_check() { 977 // Calculate flag_switch 978 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG) 979 flag_switch = true; 980 if (byteref(get(), offset) != 1 && !flag_switch) 981 return true; 982 else if (flag_switch) { 983 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING; 984 kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go, 985 (kmp_uint64)KMP_BARRIER_STATE_BUMP); 986 __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); 987 } 988 return false; 989 } 990 void internal_release() { 991 // Other threads can write their own bytes simultaneously. 992 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { 993 byteref(get(), offset) = 1; 994 } else { 995 kmp_uint64 mask = 0; 996 byteref(&mask, offset) = 1; 997 KMP_TEST_THEN_OR64(get(), mask); 998 } 999 } 1000 kmp_uint64 set_sleeping() { 1001 return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE); 1002 } 1003 kmp_uint64 unset_sleeping() { 1004 return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE); 1005 } 1006 bool is_sleeping_val(kmp_uint64 old_loc) { 1007 return old_loc & KMP_BARRIER_SLEEP_STATE; 1008 } 1009 bool is_sleeping() { return is_sleeping_val(*get()); } 1010 bool is_any_sleeping() { return is_sleeping_val(*get()); } 1011 void wait(kmp_info_t *this_thr, int final_spin) { 1012 if (final_spin) 1013 __kmp_wait_template<kmp_flag_oncore, TRUE>( 1014 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 1015 else 1016 __kmp_wait_template<kmp_flag_oncore, FALSE>( 1017 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); 1018 } 1019 void release() { __kmp_release_template(this); } 1020 void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); } 1021 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT 1022 void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, this); } 1023 #endif 1024 void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); } 1025 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, 1026 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), 1027 kmp_int32 is_constrained) { 1028 #if OMPD_SUPPORT 1029 int ret = __kmp_execute_tasks_oncore( 1030 this_thr, gtid, this, final_spin, 1031 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 1032 if (ompd_state & OMPD_ENABLE_BP) 1033 ompd_bp_task_end(); 1034 return ret; 1035 #else 1036 return __kmp_execute_tasks_oncore( 1037 this_thr, gtid, this, final_spin, 1038 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 1039 #endif 1040 } 1041 kmp_uint8 *get_stolen() { return NULL; } 1042 enum barrier_type get_bt() { return bt; } 1043 flag_type get_ptr_type() { return flag_oncore; } 1044 }; 1045 1046 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc 1047 // associated with int gtid. 1048 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) { 1049 if (!flag) 1050 return; 1051 1052 switch (RCAST(kmp_flag_64<> *, CCAST(void *, flag))->get_type()) { 1053 case flag32: 1054 __kmp_resume_32(gtid, (kmp_flag_32<> *)NULL); 1055 break; 1056 case flag64: 1057 __kmp_resume_64(gtid, (kmp_flag_64<> *)NULL); 1058 break; 1059 case flag_oncore: 1060 __kmp_resume_oncore(gtid, (kmp_flag_oncore *)NULL); 1061 break; 1062 } 1063 } 1064 1065 /*! 1066 @} 1067 */ 1068 1069 #endif // KMP_WAIT_RELEASE_H 1070