1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 __kmp_assign_root_init_mask(); 43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 44 } else if (__kmp_ignore_mppbeg() == FALSE) { 45 // By default __kmp_ignore_mppbeg() returns TRUE. 46 __kmp_internal_begin(); 47 KC_TRACE(10, ("__kmpc_begin: called\n")); 48 } 49 } 50 51 /*! 52 * @ingroup STARTUP_SHUTDOWN 53 * @param loc source location information 54 * 55 * Shutdown the runtime library. This is also optional, and even if called will 56 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 57 * zero. 58 */ 59 void __kmpc_end(ident_t *loc) { 60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 63 // returns FALSE and __kmpc_end() will unregister this root (it can cause 64 // library shut down). 65 if (__kmp_ignore_mppend() == FALSE) { 66 KC_TRACE(10, ("__kmpc_end: called\n")); 67 KA_TRACE(30, ("__kmpc_end\n")); 68 69 __kmp_internal_end_thread(-1); 70 } 71 #if KMP_OS_WINDOWS && OMPT_SUPPORT 72 // Normal exit process on Windows does not allow worker threads of the final 73 // parallel region to finish reporting their events, so shutting down the 74 // library here fixes the issue at least for the cases where __kmpc_end() is 75 // placed properly. 76 if (ompt_enabled.enabled) 77 __kmp_internal_end_library(__kmp_gtid_get_specific()); 78 #endif 79 } 80 81 /*! 82 @ingroup THREAD_STATES 83 @param loc Source location information. 84 @return The global thread index of the active thread. 85 86 This function can be called in any context. 87 88 If the runtime has ony been entered at the outermost level from a 89 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 90 that which would be returned by omp_get_thread_num() in the outermost 91 active parallel construct. (Or zero if there is no active parallel 92 construct, since the primary thread is necessarily thread zero). 93 94 If multiple non-OpenMP threads all enter an OpenMP construct then this 95 will be a unique thread identifier among all the threads created by 96 the OpenMP runtime (but the value cannot be defined in terms of 97 OpenMP thread ids returned by omp_get_thread_num()). 98 */ 99 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 100 kmp_int32 gtid = __kmp_entry_gtid(); 101 102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 103 104 return gtid; 105 } 106 107 /*! 108 @ingroup THREAD_STATES 109 @param loc Source location information. 110 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 111 112 This function can be called in any context. 113 It returns the total number of threads under the control of the OpenMP runtime. 114 That is not a number that can be determined by any OpenMP standard calls, since 115 the library may be called from more than one non-OpenMP thread, and this 116 reflects the total over all such calls. Similarly the runtime maintains 117 underlying threads even when they are not active (since the cost of creating 118 and destroying OS threads is high), this call counts all such threads even if 119 they are not waiting for work. 120 */ 121 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 122 KC_TRACE(10, 123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 124 125 return TCR_4(__kmp_all_nth); 126 } 127 128 /*! 129 @ingroup THREAD_STATES 130 @param loc Source location information. 131 @return The thread number of the calling thread in the innermost active parallel 132 construct. 133 */ 134 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 136 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 137 } 138 139 /*! 140 @ingroup THREAD_STATES 141 @param loc Source location information. 142 @return The number of threads in the innermost active parallel construct. 143 */ 144 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 146 147 return __kmp_entry_thread()->th.th_team->t.t_nproc; 148 } 149 150 /*! 151 * @ingroup DEPRECATED 152 * @param loc location description 153 * 154 * This function need not be called. It always returns TRUE. 155 */ 156 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 157 #ifndef KMP_DEBUG 158 159 return TRUE; 160 161 #else 162 163 const char *semi2; 164 const char *semi3; 165 int line_no; 166 167 if (__kmp_par_range == 0) { 168 return TRUE; 169 } 170 semi2 = loc->psource; 171 if (semi2 == NULL) { 172 return TRUE; 173 } 174 semi2 = strchr(semi2, ';'); 175 if (semi2 == NULL) { 176 return TRUE; 177 } 178 semi2 = strchr(semi2 + 1, ';'); 179 if (semi2 == NULL) { 180 return TRUE; 181 } 182 if (__kmp_par_range_filename[0]) { 183 const char *name = semi2 - 1; 184 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 185 name--; 186 } 187 if ((*name == '/') || (*name == ';')) { 188 name++; 189 } 190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 191 return __kmp_par_range < 0; 192 } 193 } 194 semi3 = strchr(semi2 + 1, ';'); 195 if (__kmp_par_range_routine[0]) { 196 if ((semi3 != NULL) && (semi3 > semi2) && 197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 198 return __kmp_par_range < 0; 199 } 200 } 201 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 203 return __kmp_par_range > 0; 204 } 205 return __kmp_par_range < 0; 206 } 207 return TRUE; 208 209 #endif /* KMP_DEBUG */ 210 } 211 212 /*! 213 @ingroup THREAD_STATES 214 @param loc Source location information. 215 @return 1 if this thread is executing inside an active parallel region, zero if 216 not. 217 */ 218 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 219 return __kmp_entry_thread()->th.th_root->r.r_active; 220 } 221 222 /*! 223 @ingroup PARALLEL 224 @param loc source location information 225 @param global_tid global thread number 226 @param num_threads number of threads requested for this parallel construct 227 228 Set the number of threads to be used by the next fork spawned by this thread. 229 This call is only required if the parallel construct has a `num_threads` clause. 230 */ 231 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 232 kmp_int32 num_threads) { 233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 234 global_tid, num_threads)); 235 __kmp_assert_valid_gtid(global_tid); 236 __kmp_push_num_threads(loc, global_tid, num_threads); 237 } 238 239 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 241 /* the num_threads are automatically popped */ 242 } 243 244 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 245 kmp_int32 proc_bind) { 246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 247 proc_bind)); 248 __kmp_assert_valid_gtid(global_tid); 249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 250 } 251 252 /*! 253 @ingroup PARALLEL 254 @param loc source location information 255 @param argc total number of arguments in the ellipsis 256 @param microtask pointer to callback routine consisting of outlined parallel 257 construct 258 @param ... pointers to shared variables that aren't global 259 260 Do the actual fork and call the microtask in the relevant number of threads. 261 */ 262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 263 int gtid = __kmp_entry_gtid(); 264 265 #if (KMP_STATS_ENABLED) 266 // If we were in a serial region, then stop the serial timer, record 267 // the event, and start parallel region timer 268 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 269 if (previous_state == stats_state_e::SERIAL_REGION) { 270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 271 } else { 272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 273 } 274 int inParallel = __kmpc_in_parallel(loc); 275 if (inParallel) { 276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 277 } else { 278 KMP_COUNT_BLOCK(OMP_PARALLEL); 279 } 280 #endif 281 282 // maybe to save thr_state is enough here 283 { 284 va_list ap; 285 va_start(ap, microtask); 286 287 #if OMPT_SUPPORT 288 ompt_frame_t *ompt_frame; 289 if (ompt_enabled.enabled) { 290 kmp_info_t *master_th = __kmp_threads[gtid]; 291 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame; 292 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 293 } 294 OMPT_STORE_RETURN_ADDRESS(gtid); 295 #endif 296 297 #if INCLUDE_SSC_MARKS 298 SSC_MARK_FORKING(); 299 #endif 300 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 301 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 302 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 303 kmp_va_addr_of(ap)); 304 #if INCLUDE_SSC_MARKS 305 SSC_MARK_JOINING(); 306 #endif 307 __kmp_join_call(loc, gtid 308 #if OMPT_SUPPORT 309 , 310 fork_context_intel 311 #endif 312 ); 313 314 va_end(ap); 315 316 #if OMPT_SUPPORT 317 if (ompt_enabled.enabled) { 318 ompt_frame->enter_frame = ompt_data_none; 319 } 320 #endif 321 } 322 323 #if KMP_STATS_ENABLED 324 if (previous_state == stats_state_e::SERIAL_REGION) { 325 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 326 KMP_SET_THREAD_STATE(previous_state); 327 } else { 328 KMP_POP_PARTITIONED_TIMER(); 329 } 330 #endif // KMP_STATS_ENABLED 331 } 332 333 /*! 334 @ingroup PARALLEL 335 @param loc source location information 336 @param global_tid global thread number 337 @param num_teams number of teams requested for the teams construct 338 @param num_threads number of threads per team requested for the teams construct 339 340 Set the number of teams to be used by the teams construct. 341 This call is only required if the teams construct has a `num_teams` clause 342 or a `thread_limit` clause (or both). 343 */ 344 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 345 kmp_int32 num_teams, kmp_int32 num_threads) { 346 KA_TRACE(20, 347 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 348 global_tid, num_teams, num_threads)); 349 __kmp_assert_valid_gtid(global_tid); 350 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 351 } 352 353 /*! 354 @ingroup PARALLEL 355 @param loc source location information 356 @param global_tid global thread number 357 @param num_teams_lo lower bound on number of teams requested for the teams 358 construct 359 @param num_teams_up upper bound on number of teams requested for the teams 360 construct 361 @param num_threads number of threads per team requested for the teams construct 362 363 Set the number of teams to be used by the teams construct. The number of initial 364 teams cretaed will be greater than or equal to the lower bound and less than or 365 equal to the upper bound. 366 This call is only required if the teams construct has a `num_teams` clause 367 or a `thread_limit` clause (or both). 368 */ 369 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, 370 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, 371 kmp_int32 num_threads) { 372 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d" 373 " num_teams_ub=%d num_threads=%d\n", 374 global_tid, num_teams_lb, num_teams_ub, num_threads)); 375 __kmp_assert_valid_gtid(global_tid); 376 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub, 377 num_threads); 378 } 379 380 /*! 381 @ingroup PARALLEL 382 @param loc source location information 383 @param argc total number of arguments in the ellipsis 384 @param microtask pointer to callback routine consisting of outlined teams 385 construct 386 @param ... pointers to shared variables that aren't global 387 388 Do the actual fork and call the microtask in the relevant number of threads. 389 */ 390 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 391 ...) { 392 int gtid = __kmp_entry_gtid(); 393 kmp_info_t *this_thr = __kmp_threads[gtid]; 394 va_list ap; 395 va_start(ap, microtask); 396 397 #if KMP_STATS_ENABLED 398 KMP_COUNT_BLOCK(OMP_TEAMS); 399 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 400 if (previous_state == stats_state_e::SERIAL_REGION) { 401 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 402 } else { 403 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 404 } 405 #endif 406 407 // remember teams entry point and nesting level 408 this_thr->th.th_teams_microtask = microtask; 409 this_thr->th.th_teams_level = 410 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 411 412 #if OMPT_SUPPORT 413 kmp_team_t *parent_team = this_thr->th.th_team; 414 int tid = __kmp_tid_from_gtid(gtid); 415 if (ompt_enabled.enabled) { 416 parent_team->t.t_implicit_task_taskdata[tid] 417 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 418 } 419 OMPT_STORE_RETURN_ADDRESS(gtid); 420 #endif 421 422 // check if __kmpc_push_num_teams called, set default number of teams 423 // otherwise 424 if (this_thr->th.th_teams_size.nteams == 0) { 425 __kmp_push_num_teams(loc, gtid, 0, 0); 426 } 427 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 428 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 429 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 430 431 __kmp_fork_call( 432 loc, gtid, fork_context_intel, argc, 433 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 434 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 435 __kmp_join_call(loc, gtid 436 #if OMPT_SUPPORT 437 , 438 fork_context_intel 439 #endif 440 ); 441 442 // Pop current CG root off list 443 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 444 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 445 this_thr->th.th_cg_roots = tmp->up; 446 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 447 " to node %p. cg_nthreads was %d\n", 448 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 449 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 450 int i = tmp->cg_nthreads--; 451 if (i == 1) { // check is we are the last thread in CG (not always the case) 452 __kmp_free(tmp); 453 } 454 // Restore current task's thread_limit from CG root 455 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 456 this_thr->th.th_current_task->td_icvs.thread_limit = 457 this_thr->th.th_cg_roots->cg_thread_limit; 458 459 this_thr->th.th_teams_microtask = NULL; 460 this_thr->th.th_teams_level = 0; 461 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 462 va_end(ap); 463 #if KMP_STATS_ENABLED 464 if (previous_state == stats_state_e::SERIAL_REGION) { 465 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 466 KMP_SET_THREAD_STATE(previous_state); 467 } else { 468 KMP_POP_PARTITIONED_TIMER(); 469 } 470 #endif // KMP_STATS_ENABLED 471 } 472 473 // I don't think this function should ever have been exported. 474 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 475 // openmp code ever called it, but it's been exported from the RTL for so 476 // long that I'm afraid to remove the definition. 477 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 478 479 /*! 480 @ingroup PARALLEL 481 @param loc source location information 482 @param global_tid global thread number 483 484 Enter a serialized parallel construct. This interface is used to handle a 485 conditional parallel region, like this, 486 @code 487 #pragma omp parallel if (condition) 488 @endcode 489 when the condition is false. 490 */ 491 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 492 // The implementation is now in kmp_runtime.cpp so that it can share static 493 // functions with kmp_fork_call since the tasks to be done are similar in 494 // each case. 495 __kmp_assert_valid_gtid(global_tid); 496 #if OMPT_SUPPORT 497 OMPT_STORE_RETURN_ADDRESS(global_tid); 498 #endif 499 __kmp_serialized_parallel(loc, global_tid); 500 } 501 502 /*! 503 @ingroup PARALLEL 504 @param loc source location information 505 @param global_tid global thread number 506 507 Leave a serialized parallel construct. 508 */ 509 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 510 kmp_internal_control_t *top; 511 kmp_info_t *this_thr; 512 kmp_team_t *serial_team; 513 514 KC_TRACE(10, 515 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 516 517 /* skip all this code for autopar serialized loops since it results in 518 unacceptable overhead */ 519 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 520 return; 521 522 // Not autopar code 523 __kmp_assert_valid_gtid(global_tid); 524 if (!TCR_4(__kmp_init_parallel)) 525 __kmp_parallel_initialize(); 526 527 __kmp_resume_if_soft_paused(); 528 529 this_thr = __kmp_threads[global_tid]; 530 serial_team = this_thr->th.th_serial_team; 531 532 kmp_task_team_t *task_team = this_thr->th.th_task_team; 533 // we need to wait for the proxy tasks before finishing the thread 534 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 535 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 536 537 KMP_MB(); 538 KMP_DEBUG_ASSERT(serial_team); 539 KMP_ASSERT(serial_team->t.t_serialized); 540 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 541 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 542 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 543 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 544 545 #if OMPT_SUPPORT 546 if (ompt_enabled.enabled && 547 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 548 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 549 if (ompt_enabled.ompt_callback_implicit_task) { 550 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 551 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 552 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 553 } 554 555 // reset clear the task id only after unlinking the task 556 ompt_data_t *parent_task_data; 557 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 558 559 if (ompt_enabled.ompt_callback_parallel_end) { 560 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 561 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 562 ompt_parallel_invoker_program | ompt_parallel_team, 563 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 564 } 565 __ompt_lw_taskteam_unlink(this_thr); 566 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 567 } 568 #endif 569 570 /* If necessary, pop the internal control stack values and replace the team 571 * values */ 572 top = serial_team->t.t_control_stack_top; 573 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 574 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 575 serial_team->t.t_control_stack_top = top->next; 576 __kmp_free(top); 577 } 578 579 /* pop dispatch buffers stack */ 580 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 581 { 582 dispatch_private_info_t *disp_buffer = 583 serial_team->t.t_dispatch->th_disp_buffer; 584 serial_team->t.t_dispatch->th_disp_buffer = 585 serial_team->t.t_dispatch->th_disp_buffer->next; 586 __kmp_free(disp_buffer); 587 } 588 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 589 590 --serial_team->t.t_serialized; 591 if (serial_team->t.t_serialized == 0) { 592 593 /* return to the parallel section */ 594 595 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 596 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 597 __kmp_clear_x87_fpu_status_word(); 598 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 599 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 600 } 601 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 602 603 __kmp_pop_current_task_from_thread(this_thr); 604 #if OMPD_SUPPORT 605 if (ompd_state & OMPD_ENABLE_BP) 606 ompd_bp_parallel_end(); 607 #endif 608 609 this_thr->th.th_team = serial_team->t.t_parent; 610 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 611 612 /* restore values cached in the thread */ 613 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 614 this_thr->th.th_team_master = 615 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 616 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 617 618 /* TODO the below shouldn't need to be adjusted for serialized teams */ 619 this_thr->th.th_dispatch = 620 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 621 622 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 623 this_thr->th.th_current_task->td_flags.executing = 1; 624 625 if (__kmp_tasking_mode != tskm_immediate_exec) { 626 // Copy the task team from the new child / old parent team to the thread. 627 this_thr->th.th_task_team = 628 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 629 KA_TRACE(20, 630 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 631 "team %p\n", 632 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 633 } 634 } else { 635 if (__kmp_tasking_mode != tskm_immediate_exec) { 636 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 637 "depth of serial team %p to %d\n", 638 global_tid, serial_team, serial_team->t.t_serialized)); 639 } 640 } 641 642 serial_team->t.t_level--; 643 if (__kmp_env_consistency_check) 644 __kmp_pop_parallel(global_tid, NULL); 645 #if OMPT_SUPPORT 646 if (ompt_enabled.enabled) 647 this_thr->th.ompt_thread_info.state = 648 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 649 : ompt_state_work_parallel); 650 #endif 651 } 652 653 /*! 654 @ingroup SYNCHRONIZATION 655 @param loc source location information. 656 657 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 658 depending on the memory ordering convention obeyed by the compiler 659 even that may not be necessary). 660 */ 661 void __kmpc_flush(ident_t *loc) { 662 KC_TRACE(10, ("__kmpc_flush: called\n")); 663 664 /* need explicit __mf() here since use volatile instead in library */ 665 KMP_MB(); /* Flush all pending memory write invalidates. */ 666 667 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 668 #if KMP_MIC 669 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 670 // We shouldn't need it, though, since the ABI rules require that 671 // * If the compiler generates NGO stores it also generates the fence 672 // * If users hand-code NGO stores they should insert the fence 673 // therefore no incomplete unordered stores should be visible. 674 #else 675 // C74404 676 // This is to address non-temporal store instructions (sfence needed). 677 // The clflush instruction is addressed either (mfence needed). 678 // Probably the non-temporal load monvtdqa instruction should also be 679 // addressed. 680 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 681 if (!__kmp_cpuinfo.initialized) { 682 __kmp_query_cpuid(&__kmp_cpuinfo); 683 } 684 if (!__kmp_cpuinfo.flags.sse2) { 685 // CPU cannot execute SSE2 instructions. 686 } else { 687 #if KMP_COMPILER_ICC 688 _mm_mfence(); 689 #elif KMP_COMPILER_MSVC 690 MemoryBarrier(); 691 #else 692 __sync_synchronize(); 693 #endif // KMP_COMPILER_ICC 694 } 695 #endif // KMP_MIC 696 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 697 KMP_ARCH_RISCV64) 698 // Nothing to see here move along 699 #elif KMP_ARCH_PPC64 700 // Nothing needed here (we have a real MB above). 701 #else 702 #error Unknown or unsupported architecture 703 #endif 704 705 #if OMPT_SUPPORT && OMPT_OPTIONAL 706 if (ompt_enabled.ompt_callback_flush) { 707 ompt_callbacks.ompt_callback(ompt_callback_flush)( 708 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 709 } 710 #endif 711 } 712 713 /* -------------------------------------------------------------------------- */ 714 /*! 715 @ingroup SYNCHRONIZATION 716 @param loc source location information 717 @param global_tid thread id. 718 719 Execute a barrier. 720 */ 721 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 722 KMP_COUNT_BLOCK(OMP_BARRIER); 723 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 724 __kmp_assert_valid_gtid(global_tid); 725 726 if (!TCR_4(__kmp_init_parallel)) 727 __kmp_parallel_initialize(); 728 729 __kmp_resume_if_soft_paused(); 730 731 if (__kmp_env_consistency_check) { 732 if (loc == 0) { 733 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 734 } 735 __kmp_check_barrier(global_tid, ct_barrier, loc); 736 } 737 738 #if OMPT_SUPPORT 739 ompt_frame_t *ompt_frame; 740 if (ompt_enabled.enabled) { 741 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 742 if (ompt_frame->enter_frame.ptr == NULL) 743 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 744 } 745 OMPT_STORE_RETURN_ADDRESS(global_tid); 746 #endif 747 __kmp_threads[global_tid]->th.th_ident = loc; 748 // TODO: explicit barrier_wait_id: 749 // this function is called when 'barrier' directive is present or 750 // implicit barrier at the end of a worksharing construct. 751 // 1) better to add a per-thread barrier counter to a thread data structure 752 // 2) set to 0 when a new team is created 753 // 4) no sync is required 754 755 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 756 #if OMPT_SUPPORT && OMPT_OPTIONAL 757 if (ompt_enabled.enabled) { 758 ompt_frame->enter_frame = ompt_data_none; 759 } 760 #endif 761 } 762 763 /* The BARRIER for a MASTER section is always explicit */ 764 /*! 765 @ingroup WORK_SHARING 766 @param loc source location information. 767 @param global_tid global thread number . 768 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 769 */ 770 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 771 int status = 0; 772 773 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 774 __kmp_assert_valid_gtid(global_tid); 775 776 if (!TCR_4(__kmp_init_parallel)) 777 __kmp_parallel_initialize(); 778 779 __kmp_resume_if_soft_paused(); 780 781 if (KMP_MASTER_GTID(global_tid)) { 782 KMP_COUNT_BLOCK(OMP_MASTER); 783 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 784 status = 1; 785 } 786 787 #if OMPT_SUPPORT && OMPT_OPTIONAL 788 if (status) { 789 if (ompt_enabled.ompt_callback_masked) { 790 kmp_info_t *this_thr = __kmp_threads[global_tid]; 791 kmp_team_t *team = this_thr->th.th_team; 792 793 int tid = __kmp_tid_from_gtid(global_tid); 794 ompt_callbacks.ompt_callback(ompt_callback_masked)( 795 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 796 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 797 OMPT_GET_RETURN_ADDRESS(0)); 798 } 799 } 800 #endif 801 802 if (__kmp_env_consistency_check) { 803 #if KMP_USE_DYNAMIC_LOCK 804 if (status) 805 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 806 else 807 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 808 #else 809 if (status) 810 __kmp_push_sync(global_tid, ct_master, loc, NULL); 811 else 812 __kmp_check_sync(global_tid, ct_master, loc, NULL); 813 #endif 814 } 815 816 return status; 817 } 818 819 /*! 820 @ingroup WORK_SHARING 821 @param loc source location information. 822 @param global_tid global thread number . 823 824 Mark the end of a <tt>master</tt> region. This should only be called by the 825 thread that executes the <tt>master</tt> region. 826 */ 827 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 828 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 829 __kmp_assert_valid_gtid(global_tid); 830 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 831 KMP_POP_PARTITIONED_TIMER(); 832 833 #if OMPT_SUPPORT && OMPT_OPTIONAL 834 kmp_info_t *this_thr = __kmp_threads[global_tid]; 835 kmp_team_t *team = this_thr->th.th_team; 836 if (ompt_enabled.ompt_callback_masked) { 837 int tid = __kmp_tid_from_gtid(global_tid); 838 ompt_callbacks.ompt_callback(ompt_callback_masked)( 839 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 840 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 841 OMPT_GET_RETURN_ADDRESS(0)); 842 } 843 #endif 844 845 if (__kmp_env_consistency_check) { 846 if (KMP_MASTER_GTID(global_tid)) 847 __kmp_pop_sync(global_tid, ct_master, loc); 848 } 849 } 850 851 /*! 852 @ingroup WORK_SHARING 853 @param loc source location information. 854 @param global_tid global thread number. 855 @param filter result of evaluating filter clause on thread global_tid, or zero 856 if no filter clause present 857 @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise. 858 */ 859 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) { 860 int status = 0; 861 int tid; 862 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid)); 863 __kmp_assert_valid_gtid(global_tid); 864 865 if (!TCR_4(__kmp_init_parallel)) 866 __kmp_parallel_initialize(); 867 868 __kmp_resume_if_soft_paused(); 869 870 tid = __kmp_tid_from_gtid(global_tid); 871 if (tid == filter) { 872 KMP_COUNT_BLOCK(OMP_MASKED); 873 KMP_PUSH_PARTITIONED_TIMER(OMP_masked); 874 status = 1; 875 } 876 877 #if OMPT_SUPPORT && OMPT_OPTIONAL 878 if (status) { 879 if (ompt_enabled.ompt_callback_masked) { 880 kmp_info_t *this_thr = __kmp_threads[global_tid]; 881 kmp_team_t *team = this_thr->th.th_team; 882 ompt_callbacks.ompt_callback(ompt_callback_masked)( 883 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 884 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 885 OMPT_GET_RETURN_ADDRESS(0)); 886 } 887 } 888 #endif 889 890 if (__kmp_env_consistency_check) { 891 #if KMP_USE_DYNAMIC_LOCK 892 if (status) 893 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0); 894 else 895 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0); 896 #else 897 if (status) 898 __kmp_push_sync(global_tid, ct_masked, loc, NULL); 899 else 900 __kmp_check_sync(global_tid, ct_masked, loc, NULL); 901 #endif 902 } 903 904 return status; 905 } 906 907 /*! 908 @ingroup WORK_SHARING 909 @param loc source location information. 910 @param global_tid global thread number . 911 912 Mark the end of a <tt>masked</tt> region. This should only be called by the 913 thread that executes the <tt>masked</tt> region. 914 */ 915 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) { 916 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid)); 917 __kmp_assert_valid_gtid(global_tid); 918 KMP_POP_PARTITIONED_TIMER(); 919 920 #if OMPT_SUPPORT && OMPT_OPTIONAL 921 kmp_info_t *this_thr = __kmp_threads[global_tid]; 922 kmp_team_t *team = this_thr->th.th_team; 923 if (ompt_enabled.ompt_callback_masked) { 924 int tid = __kmp_tid_from_gtid(global_tid); 925 ompt_callbacks.ompt_callback(ompt_callback_masked)( 926 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 927 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 928 OMPT_GET_RETURN_ADDRESS(0)); 929 } 930 #endif 931 932 if (__kmp_env_consistency_check) { 933 __kmp_pop_sync(global_tid, ct_masked, loc); 934 } 935 } 936 937 /*! 938 @ingroup WORK_SHARING 939 @param loc source location information. 940 @param gtid global thread number. 941 942 Start execution of an <tt>ordered</tt> construct. 943 */ 944 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 945 int cid = 0; 946 kmp_info_t *th; 947 KMP_DEBUG_ASSERT(__kmp_init_serial); 948 949 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 950 __kmp_assert_valid_gtid(gtid); 951 952 if (!TCR_4(__kmp_init_parallel)) 953 __kmp_parallel_initialize(); 954 955 __kmp_resume_if_soft_paused(); 956 957 #if USE_ITT_BUILD 958 __kmp_itt_ordered_prep(gtid); 959 // TODO: ordered_wait_id 960 #endif /* USE_ITT_BUILD */ 961 962 th = __kmp_threads[gtid]; 963 964 #if OMPT_SUPPORT && OMPT_OPTIONAL 965 kmp_team_t *team; 966 ompt_wait_id_t lck; 967 void *codeptr_ra; 968 OMPT_STORE_RETURN_ADDRESS(gtid); 969 if (ompt_enabled.enabled) { 970 team = __kmp_team_from_gtid(gtid); 971 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 972 /* OMPT state update */ 973 th->th.ompt_thread_info.wait_id = lck; 974 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 975 976 /* OMPT event callback */ 977 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 978 if (ompt_enabled.ompt_callback_mutex_acquire) { 979 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 980 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 981 codeptr_ra); 982 } 983 } 984 #endif 985 986 if (th->th.th_dispatch->th_deo_fcn != 0) 987 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 988 else 989 __kmp_parallel_deo(>id, &cid, loc); 990 991 #if OMPT_SUPPORT && OMPT_OPTIONAL 992 if (ompt_enabled.enabled) { 993 /* OMPT state update */ 994 th->th.ompt_thread_info.state = ompt_state_work_parallel; 995 th->th.ompt_thread_info.wait_id = 0; 996 997 /* OMPT event callback */ 998 if (ompt_enabled.ompt_callback_mutex_acquired) { 999 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1000 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1001 } 1002 } 1003 #endif 1004 1005 #if USE_ITT_BUILD 1006 __kmp_itt_ordered_start(gtid); 1007 #endif /* USE_ITT_BUILD */ 1008 } 1009 1010 /*! 1011 @ingroup WORK_SHARING 1012 @param loc source location information. 1013 @param gtid global thread number. 1014 1015 End execution of an <tt>ordered</tt> construct. 1016 */ 1017 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 1018 int cid = 0; 1019 kmp_info_t *th; 1020 1021 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 1022 __kmp_assert_valid_gtid(gtid); 1023 1024 #if USE_ITT_BUILD 1025 __kmp_itt_ordered_end(gtid); 1026 // TODO: ordered_wait_id 1027 #endif /* USE_ITT_BUILD */ 1028 1029 th = __kmp_threads[gtid]; 1030 1031 if (th->th.th_dispatch->th_dxo_fcn != 0) 1032 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 1033 else 1034 __kmp_parallel_dxo(>id, &cid, loc); 1035 1036 #if OMPT_SUPPORT && OMPT_OPTIONAL 1037 OMPT_STORE_RETURN_ADDRESS(gtid); 1038 if (ompt_enabled.ompt_callback_mutex_released) { 1039 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1040 ompt_mutex_ordered, 1041 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 1042 ->t.t_ordered.dt.t_value, 1043 OMPT_LOAD_RETURN_ADDRESS(gtid)); 1044 } 1045 #endif 1046 } 1047 1048 #if KMP_USE_DYNAMIC_LOCK 1049 1050 static __forceinline void 1051 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 1052 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 1053 // Pointer to the allocated indirect lock is written to crit, while indexing 1054 // is ignored. 1055 void *idx; 1056 kmp_indirect_lock_t **lck; 1057 lck = (kmp_indirect_lock_t **)crit; 1058 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 1059 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 1060 KMP_SET_I_LOCK_LOCATION(ilk, loc); 1061 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 1062 KA_TRACE(20, 1063 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 1064 #if USE_ITT_BUILD 1065 __kmp_itt_critical_creating(ilk->lock, loc); 1066 #endif 1067 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 1068 if (status == 0) { 1069 #if USE_ITT_BUILD 1070 __kmp_itt_critical_destroyed(ilk->lock); 1071 #endif 1072 // We don't really need to destroy the unclaimed lock here since it will be 1073 // cleaned up at program exit. 1074 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 1075 } 1076 KMP_DEBUG_ASSERT(*lck != NULL); 1077 } 1078 1079 // Fast-path acquire tas lock 1080 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 1081 { \ 1082 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1083 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1084 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1085 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1086 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1087 kmp_uint32 spins; \ 1088 KMP_FSYNC_PREPARE(l); \ 1089 KMP_INIT_YIELD(spins); \ 1090 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1091 do { \ 1092 if (TCR_4(__kmp_nth) > \ 1093 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1094 KMP_YIELD(TRUE); \ 1095 } else { \ 1096 KMP_YIELD_SPIN(spins); \ 1097 } \ 1098 __kmp_spin_backoff(&backoff); \ 1099 } while ( \ 1100 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1101 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1102 } \ 1103 KMP_FSYNC_ACQUIRED(l); \ 1104 } 1105 1106 // Fast-path test tas lock 1107 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1108 { \ 1109 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1110 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1111 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1112 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1113 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1114 } 1115 1116 // Fast-path release tas lock 1117 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1118 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1119 1120 #if KMP_USE_FUTEX 1121 1122 #include <sys/syscall.h> 1123 #include <unistd.h> 1124 #ifndef FUTEX_WAIT 1125 #define FUTEX_WAIT 0 1126 #endif 1127 #ifndef FUTEX_WAKE 1128 #define FUTEX_WAKE 1 1129 #endif 1130 1131 // Fast-path acquire futex lock 1132 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1133 { \ 1134 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1135 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1136 KMP_MB(); \ 1137 KMP_FSYNC_PREPARE(ftx); \ 1138 kmp_int32 poll_val; \ 1139 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1140 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1141 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1142 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1143 if (!cond) { \ 1144 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1145 poll_val | \ 1146 KMP_LOCK_BUSY(1, futex))) { \ 1147 continue; \ 1148 } \ 1149 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1150 } \ 1151 kmp_int32 rc; \ 1152 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1153 NULL, NULL, 0)) != 0) { \ 1154 continue; \ 1155 } \ 1156 gtid_code |= 1; \ 1157 } \ 1158 KMP_FSYNC_ACQUIRED(ftx); \ 1159 } 1160 1161 // Fast-path test futex lock 1162 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1163 { \ 1164 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1165 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1166 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1167 KMP_FSYNC_ACQUIRED(ftx); \ 1168 rc = TRUE; \ 1169 } else { \ 1170 rc = FALSE; \ 1171 } \ 1172 } 1173 1174 // Fast-path release futex lock 1175 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1176 { \ 1177 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1178 KMP_MB(); \ 1179 KMP_FSYNC_RELEASING(ftx); \ 1180 kmp_int32 poll_val = \ 1181 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1182 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1183 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1184 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1185 } \ 1186 KMP_MB(); \ 1187 KMP_YIELD_OVERSUB(); \ 1188 } 1189 1190 #endif // KMP_USE_FUTEX 1191 1192 #else // KMP_USE_DYNAMIC_LOCK 1193 1194 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1195 ident_t const *loc, 1196 kmp_int32 gtid) { 1197 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1198 1199 // Because of the double-check, the following load doesn't need to be volatile 1200 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1201 1202 if (lck == NULL) { 1203 void *idx; 1204 1205 // Allocate & initialize the lock. 1206 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1207 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1208 __kmp_init_user_lock_with_checks(lck); 1209 __kmp_set_user_lock_location(lck, loc); 1210 #if USE_ITT_BUILD 1211 __kmp_itt_critical_creating(lck); 1212 // __kmp_itt_critical_creating() should be called *before* the first usage 1213 // of underlying lock. It is the only place where we can guarantee it. There 1214 // are chances the lock will destroyed with no usage, but it is not a 1215 // problem, because this is not real event seen by user but rather setting 1216 // name for object (lock). See more details in kmp_itt.h. 1217 #endif /* USE_ITT_BUILD */ 1218 1219 // Use a cmpxchg instruction to slam the start of the critical section with 1220 // the lock pointer. If another thread beat us to it, deallocate the lock, 1221 // and use the lock that the other thread allocated. 1222 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1223 1224 if (status == 0) { 1225 // Deallocate the lock and reload the value. 1226 #if USE_ITT_BUILD 1227 __kmp_itt_critical_destroyed(lck); 1228 // Let ITT know the lock is destroyed and the same memory location may be reused 1229 // for another purpose. 1230 #endif /* USE_ITT_BUILD */ 1231 __kmp_destroy_user_lock_with_checks(lck); 1232 __kmp_user_lock_free(&idx, gtid, lck); 1233 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1234 KMP_DEBUG_ASSERT(lck != NULL); 1235 } 1236 } 1237 return lck; 1238 } 1239 1240 #endif // KMP_USE_DYNAMIC_LOCK 1241 1242 /*! 1243 @ingroup WORK_SHARING 1244 @param loc source location information. 1245 @param global_tid global thread number. 1246 @param crit identity of the critical section. This could be a pointer to a lock 1247 associated with the critical section, or some other suitably unique value. 1248 1249 Enter code protected by a `critical` construct. 1250 This function blocks until the executing thread can enter the critical section. 1251 */ 1252 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1253 kmp_critical_name *crit) { 1254 #if KMP_USE_DYNAMIC_LOCK 1255 #if OMPT_SUPPORT && OMPT_OPTIONAL 1256 OMPT_STORE_RETURN_ADDRESS(global_tid); 1257 #endif // OMPT_SUPPORT 1258 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1259 #else 1260 KMP_COUNT_BLOCK(OMP_CRITICAL); 1261 #if OMPT_SUPPORT && OMPT_OPTIONAL 1262 ompt_state_t prev_state = ompt_state_undefined; 1263 ompt_thread_info_t ti; 1264 #endif 1265 kmp_user_lock_p lck; 1266 1267 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1268 __kmp_assert_valid_gtid(global_tid); 1269 1270 // TODO: add THR_OVHD_STATE 1271 1272 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1273 KMP_CHECK_USER_LOCK_INIT(); 1274 1275 if ((__kmp_user_lock_kind == lk_tas) && 1276 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1277 lck = (kmp_user_lock_p)crit; 1278 } 1279 #if KMP_USE_FUTEX 1280 else if ((__kmp_user_lock_kind == lk_futex) && 1281 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1282 lck = (kmp_user_lock_p)crit; 1283 } 1284 #endif 1285 else { // ticket, queuing or drdpa 1286 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1287 } 1288 1289 if (__kmp_env_consistency_check) 1290 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1291 1292 // since the critical directive binds to all threads, not just the current 1293 // team we have to check this even if we are in a serialized team. 1294 // also, even if we are the uber thread, we still have to conduct the lock, 1295 // as we have to contend with sibling threads. 1296 1297 #if USE_ITT_BUILD 1298 __kmp_itt_critical_acquiring(lck); 1299 #endif /* USE_ITT_BUILD */ 1300 #if OMPT_SUPPORT && OMPT_OPTIONAL 1301 OMPT_STORE_RETURN_ADDRESS(gtid); 1302 void *codeptr_ra = NULL; 1303 if (ompt_enabled.enabled) { 1304 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1305 /* OMPT state update */ 1306 prev_state = ti.state; 1307 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1308 ti.state = ompt_state_wait_critical; 1309 1310 /* OMPT event callback */ 1311 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1312 if (ompt_enabled.ompt_callback_mutex_acquire) { 1313 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1314 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1315 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1316 } 1317 } 1318 #endif 1319 // Value of 'crit' should be good for using as a critical_id of the critical 1320 // section directive. 1321 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1322 1323 #if USE_ITT_BUILD 1324 __kmp_itt_critical_acquired(lck); 1325 #endif /* USE_ITT_BUILD */ 1326 #if OMPT_SUPPORT && OMPT_OPTIONAL 1327 if (ompt_enabled.enabled) { 1328 /* OMPT state update */ 1329 ti.state = prev_state; 1330 ti.wait_id = 0; 1331 1332 /* OMPT event callback */ 1333 if (ompt_enabled.ompt_callback_mutex_acquired) { 1334 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1335 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1336 } 1337 } 1338 #endif 1339 KMP_POP_PARTITIONED_TIMER(); 1340 1341 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1342 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1343 #endif // KMP_USE_DYNAMIC_LOCK 1344 } 1345 1346 #if KMP_USE_DYNAMIC_LOCK 1347 1348 // Converts the given hint to an internal lock implementation 1349 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1350 #if KMP_USE_TSX 1351 #define KMP_TSX_LOCK(seq) lockseq_##seq 1352 #else 1353 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1354 #endif 1355 1356 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1357 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm) 1358 #else 1359 #define KMP_CPUINFO_RTM 0 1360 #endif 1361 1362 // Hints that do not require further logic 1363 if (hint & kmp_lock_hint_hle) 1364 return KMP_TSX_LOCK(hle); 1365 if (hint & kmp_lock_hint_rtm) 1366 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq; 1367 if (hint & kmp_lock_hint_adaptive) 1368 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1369 1370 // Rule out conflicting hints first by returning the default lock 1371 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1372 return __kmp_user_lock_seq; 1373 if ((hint & omp_lock_hint_speculative) && 1374 (hint & omp_lock_hint_nonspeculative)) 1375 return __kmp_user_lock_seq; 1376 1377 // Do not even consider speculation when it appears to be contended 1378 if (hint & omp_lock_hint_contended) 1379 return lockseq_queuing; 1380 1381 // Uncontended lock without speculation 1382 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1383 return lockseq_tas; 1384 1385 // Use RTM lock for speculation 1386 if (hint & omp_lock_hint_speculative) 1387 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq; 1388 1389 return __kmp_user_lock_seq; 1390 } 1391 1392 #if OMPT_SUPPORT && OMPT_OPTIONAL 1393 #if KMP_USE_DYNAMIC_LOCK 1394 static kmp_mutex_impl_t 1395 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1396 if (user_lock) { 1397 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1398 case 0: 1399 break; 1400 #if KMP_USE_FUTEX 1401 case locktag_futex: 1402 return kmp_mutex_impl_queuing; 1403 #endif 1404 case locktag_tas: 1405 return kmp_mutex_impl_spin; 1406 #if KMP_USE_TSX 1407 case locktag_hle: 1408 case locktag_rtm_spin: 1409 return kmp_mutex_impl_speculative; 1410 #endif 1411 default: 1412 return kmp_mutex_impl_none; 1413 } 1414 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1415 } 1416 KMP_ASSERT(ilock); 1417 switch (ilock->type) { 1418 #if KMP_USE_TSX 1419 case locktag_adaptive: 1420 case locktag_rtm_queuing: 1421 return kmp_mutex_impl_speculative; 1422 #endif 1423 case locktag_nested_tas: 1424 return kmp_mutex_impl_spin; 1425 #if KMP_USE_FUTEX 1426 case locktag_nested_futex: 1427 #endif 1428 case locktag_ticket: 1429 case locktag_queuing: 1430 case locktag_drdpa: 1431 case locktag_nested_ticket: 1432 case locktag_nested_queuing: 1433 case locktag_nested_drdpa: 1434 return kmp_mutex_impl_queuing; 1435 default: 1436 return kmp_mutex_impl_none; 1437 } 1438 } 1439 #else 1440 // For locks without dynamic binding 1441 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1442 switch (__kmp_user_lock_kind) { 1443 case lk_tas: 1444 return kmp_mutex_impl_spin; 1445 #if KMP_USE_FUTEX 1446 case lk_futex: 1447 #endif 1448 case lk_ticket: 1449 case lk_queuing: 1450 case lk_drdpa: 1451 return kmp_mutex_impl_queuing; 1452 #if KMP_USE_TSX 1453 case lk_hle: 1454 case lk_rtm_queuing: 1455 case lk_rtm_spin: 1456 case lk_adaptive: 1457 return kmp_mutex_impl_speculative; 1458 #endif 1459 default: 1460 return kmp_mutex_impl_none; 1461 } 1462 } 1463 #endif // KMP_USE_DYNAMIC_LOCK 1464 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1465 1466 /*! 1467 @ingroup WORK_SHARING 1468 @param loc source location information. 1469 @param global_tid global thread number. 1470 @param crit identity of the critical section. This could be a pointer to a lock 1471 associated with the critical section, or some other suitably unique value. 1472 @param hint the lock hint. 1473 1474 Enter code protected by a `critical` construct with a hint. The hint value is 1475 used to suggest a lock implementation. This function blocks until the executing 1476 thread can enter the critical section unless the hint suggests use of 1477 speculative execution and the hardware supports it. 1478 */ 1479 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1480 kmp_critical_name *crit, uint32_t hint) { 1481 KMP_COUNT_BLOCK(OMP_CRITICAL); 1482 kmp_user_lock_p lck; 1483 #if OMPT_SUPPORT && OMPT_OPTIONAL 1484 ompt_state_t prev_state = ompt_state_undefined; 1485 ompt_thread_info_t ti; 1486 // This is the case, if called from __kmpc_critical: 1487 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1488 if (!codeptr) 1489 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1490 #endif 1491 1492 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1493 __kmp_assert_valid_gtid(global_tid); 1494 1495 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1496 // Check if it is initialized. 1497 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1498 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint); 1499 if (*lk == 0) { 1500 if (KMP_IS_D_LOCK(lockseq)) { 1501 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1502 KMP_GET_D_TAG(lockseq)); 1503 } else { 1504 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq)); 1505 } 1506 } 1507 // Branch for accessing the actual lock object and set operation. This 1508 // branching is inevitable since this lock initialization does not follow the 1509 // normal dispatch path (lock table is not used). 1510 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1511 lck = (kmp_user_lock_p)lk; 1512 if (__kmp_env_consistency_check) { 1513 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1514 __kmp_map_hint_to_lock(hint)); 1515 } 1516 #if USE_ITT_BUILD 1517 __kmp_itt_critical_acquiring(lck); 1518 #endif 1519 #if OMPT_SUPPORT && OMPT_OPTIONAL 1520 if (ompt_enabled.enabled) { 1521 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1522 /* OMPT state update */ 1523 prev_state = ti.state; 1524 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1525 ti.state = ompt_state_wait_critical; 1526 1527 /* OMPT event callback */ 1528 if (ompt_enabled.ompt_callback_mutex_acquire) { 1529 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1530 ompt_mutex_critical, (unsigned int)hint, 1531 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1532 codeptr); 1533 } 1534 } 1535 #endif 1536 #if KMP_USE_INLINED_TAS 1537 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) { 1538 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1539 } else 1540 #elif KMP_USE_INLINED_FUTEX 1541 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) { 1542 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1543 } else 1544 #endif 1545 { 1546 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1547 } 1548 } else { 1549 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1550 lck = ilk->lock; 1551 if (__kmp_env_consistency_check) { 1552 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1553 __kmp_map_hint_to_lock(hint)); 1554 } 1555 #if USE_ITT_BUILD 1556 __kmp_itt_critical_acquiring(lck); 1557 #endif 1558 #if OMPT_SUPPORT && OMPT_OPTIONAL 1559 if (ompt_enabled.enabled) { 1560 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1561 /* OMPT state update */ 1562 prev_state = ti.state; 1563 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1564 ti.state = ompt_state_wait_critical; 1565 1566 /* OMPT event callback */ 1567 if (ompt_enabled.ompt_callback_mutex_acquire) { 1568 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1569 ompt_mutex_critical, (unsigned int)hint, 1570 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1571 codeptr); 1572 } 1573 } 1574 #endif 1575 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1576 } 1577 KMP_POP_PARTITIONED_TIMER(); 1578 1579 #if USE_ITT_BUILD 1580 __kmp_itt_critical_acquired(lck); 1581 #endif /* USE_ITT_BUILD */ 1582 #if OMPT_SUPPORT && OMPT_OPTIONAL 1583 if (ompt_enabled.enabled) { 1584 /* OMPT state update */ 1585 ti.state = prev_state; 1586 ti.wait_id = 0; 1587 1588 /* OMPT event callback */ 1589 if (ompt_enabled.ompt_callback_mutex_acquired) { 1590 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1591 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1592 } 1593 } 1594 #endif 1595 1596 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1597 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1598 } // __kmpc_critical_with_hint 1599 1600 #endif // KMP_USE_DYNAMIC_LOCK 1601 1602 /*! 1603 @ingroup WORK_SHARING 1604 @param loc source location information. 1605 @param global_tid global thread number . 1606 @param crit identity of the critical section. This could be a pointer to a lock 1607 associated with the critical section, or some other suitably unique value. 1608 1609 Leave a critical section, releasing any lock that was held during its execution. 1610 */ 1611 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1612 kmp_critical_name *crit) { 1613 kmp_user_lock_p lck; 1614 1615 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1616 1617 #if KMP_USE_DYNAMIC_LOCK 1618 int locktag = KMP_EXTRACT_D_TAG(crit); 1619 if (locktag) { 1620 lck = (kmp_user_lock_p)crit; 1621 KMP_ASSERT(lck != NULL); 1622 if (__kmp_env_consistency_check) { 1623 __kmp_pop_sync(global_tid, ct_critical, loc); 1624 } 1625 #if USE_ITT_BUILD 1626 __kmp_itt_critical_releasing(lck); 1627 #endif 1628 #if KMP_USE_INLINED_TAS 1629 if (locktag == locktag_tas && !__kmp_env_consistency_check) { 1630 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1631 } else 1632 #elif KMP_USE_INLINED_FUTEX 1633 if (locktag == locktag_futex && !__kmp_env_consistency_check) { 1634 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1635 } else 1636 #endif 1637 { 1638 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1639 } 1640 } else { 1641 kmp_indirect_lock_t *ilk = 1642 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1643 KMP_ASSERT(ilk != NULL); 1644 lck = ilk->lock; 1645 if (__kmp_env_consistency_check) { 1646 __kmp_pop_sync(global_tid, ct_critical, loc); 1647 } 1648 #if USE_ITT_BUILD 1649 __kmp_itt_critical_releasing(lck); 1650 #endif 1651 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1652 } 1653 1654 #else // KMP_USE_DYNAMIC_LOCK 1655 1656 if ((__kmp_user_lock_kind == lk_tas) && 1657 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1658 lck = (kmp_user_lock_p)crit; 1659 } 1660 #if KMP_USE_FUTEX 1661 else if ((__kmp_user_lock_kind == lk_futex) && 1662 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1663 lck = (kmp_user_lock_p)crit; 1664 } 1665 #endif 1666 else { // ticket, queuing or drdpa 1667 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1668 } 1669 1670 KMP_ASSERT(lck != NULL); 1671 1672 if (__kmp_env_consistency_check) 1673 __kmp_pop_sync(global_tid, ct_critical, loc); 1674 1675 #if USE_ITT_BUILD 1676 __kmp_itt_critical_releasing(lck); 1677 #endif /* USE_ITT_BUILD */ 1678 // Value of 'crit' should be good for using as a critical_id of the critical 1679 // section directive. 1680 __kmp_release_user_lock_with_checks(lck, global_tid); 1681 1682 #endif // KMP_USE_DYNAMIC_LOCK 1683 1684 #if OMPT_SUPPORT && OMPT_OPTIONAL 1685 /* OMPT release event triggers after lock is released; place here to trigger 1686 * for all #if branches */ 1687 OMPT_STORE_RETURN_ADDRESS(global_tid); 1688 if (ompt_enabled.ompt_callback_mutex_released) { 1689 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1690 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1691 OMPT_LOAD_RETURN_ADDRESS(0)); 1692 } 1693 #endif 1694 1695 KMP_POP_PARTITIONED_TIMER(); 1696 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1697 } 1698 1699 /*! 1700 @ingroup SYNCHRONIZATION 1701 @param loc source location information 1702 @param global_tid thread id. 1703 @return one if the thread should execute the master block, zero otherwise 1704 1705 Start execution of a combined barrier and master. The barrier is executed inside 1706 this function. 1707 */ 1708 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1709 int status; 1710 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1711 __kmp_assert_valid_gtid(global_tid); 1712 1713 if (!TCR_4(__kmp_init_parallel)) 1714 __kmp_parallel_initialize(); 1715 1716 __kmp_resume_if_soft_paused(); 1717 1718 if (__kmp_env_consistency_check) 1719 __kmp_check_barrier(global_tid, ct_barrier, loc); 1720 1721 #if OMPT_SUPPORT 1722 ompt_frame_t *ompt_frame; 1723 if (ompt_enabled.enabled) { 1724 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1725 if (ompt_frame->enter_frame.ptr == NULL) 1726 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1727 } 1728 OMPT_STORE_RETURN_ADDRESS(global_tid); 1729 #endif 1730 #if USE_ITT_NOTIFY 1731 __kmp_threads[global_tid]->th.th_ident = loc; 1732 #endif 1733 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1734 #if OMPT_SUPPORT && OMPT_OPTIONAL 1735 if (ompt_enabled.enabled) { 1736 ompt_frame->enter_frame = ompt_data_none; 1737 } 1738 #endif 1739 1740 return (status != 0) ? 0 : 1; 1741 } 1742 1743 /*! 1744 @ingroup SYNCHRONIZATION 1745 @param loc source location information 1746 @param global_tid thread id. 1747 1748 Complete the execution of a combined barrier and master. This function should 1749 only be called at the completion of the <tt>master</tt> code. Other threads will 1750 still be waiting at the barrier and this call releases them. 1751 */ 1752 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1753 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1754 __kmp_assert_valid_gtid(global_tid); 1755 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1756 } 1757 1758 /*! 1759 @ingroup SYNCHRONIZATION 1760 @param loc source location information 1761 @param global_tid thread id. 1762 @return one if the thread should execute the master block, zero otherwise 1763 1764 Start execution of a combined barrier and master(nowait) construct. 1765 The barrier is executed inside this function. 1766 There is no equivalent "end" function, since the 1767 */ 1768 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1769 kmp_int32 ret; 1770 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1771 __kmp_assert_valid_gtid(global_tid); 1772 1773 if (!TCR_4(__kmp_init_parallel)) 1774 __kmp_parallel_initialize(); 1775 1776 __kmp_resume_if_soft_paused(); 1777 1778 if (__kmp_env_consistency_check) { 1779 if (loc == 0) { 1780 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1781 } 1782 __kmp_check_barrier(global_tid, ct_barrier, loc); 1783 } 1784 1785 #if OMPT_SUPPORT 1786 ompt_frame_t *ompt_frame; 1787 if (ompt_enabled.enabled) { 1788 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1789 if (ompt_frame->enter_frame.ptr == NULL) 1790 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1791 } 1792 OMPT_STORE_RETURN_ADDRESS(global_tid); 1793 #endif 1794 #if USE_ITT_NOTIFY 1795 __kmp_threads[global_tid]->th.th_ident = loc; 1796 #endif 1797 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1798 #if OMPT_SUPPORT && OMPT_OPTIONAL 1799 if (ompt_enabled.enabled) { 1800 ompt_frame->enter_frame = ompt_data_none; 1801 } 1802 #endif 1803 1804 ret = __kmpc_master(loc, global_tid); 1805 1806 if (__kmp_env_consistency_check) { 1807 /* there's no __kmpc_end_master called; so the (stats) */ 1808 /* actions of __kmpc_end_master are done here */ 1809 if (ret) { 1810 /* only one thread should do the pop since only */ 1811 /* one did the push (see __kmpc_master()) */ 1812 __kmp_pop_sync(global_tid, ct_master, loc); 1813 } 1814 } 1815 1816 return (ret); 1817 } 1818 1819 /* The BARRIER for a SINGLE process section is always explicit */ 1820 /*! 1821 @ingroup WORK_SHARING 1822 @param loc source location information 1823 @param global_tid global thread number 1824 @return One if this thread should execute the single construct, zero otherwise. 1825 1826 Test whether to execute a <tt>single</tt> construct. 1827 There are no implicit barriers in the two "single" calls, rather the compiler 1828 should introduce an explicit barrier if it is required. 1829 */ 1830 1831 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1832 __kmp_assert_valid_gtid(global_tid); 1833 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1834 1835 if (rc) { 1836 // We are going to execute the single statement, so we should count it. 1837 KMP_COUNT_BLOCK(OMP_SINGLE); 1838 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1839 } 1840 1841 #if OMPT_SUPPORT && OMPT_OPTIONAL 1842 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1843 kmp_team_t *team = this_thr->th.th_team; 1844 int tid = __kmp_tid_from_gtid(global_tid); 1845 1846 if (ompt_enabled.enabled) { 1847 if (rc) { 1848 if (ompt_enabled.ompt_callback_work) { 1849 ompt_callbacks.ompt_callback(ompt_callback_work)( 1850 ompt_work_single_executor, ompt_scope_begin, 1851 &(team->t.ompt_team_info.parallel_data), 1852 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1853 1, OMPT_GET_RETURN_ADDRESS(0)); 1854 } 1855 } else { 1856 if (ompt_enabled.ompt_callback_work) { 1857 ompt_callbacks.ompt_callback(ompt_callback_work)( 1858 ompt_work_single_other, ompt_scope_begin, 1859 &(team->t.ompt_team_info.parallel_data), 1860 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1861 1, OMPT_GET_RETURN_ADDRESS(0)); 1862 ompt_callbacks.ompt_callback(ompt_callback_work)( 1863 ompt_work_single_other, ompt_scope_end, 1864 &(team->t.ompt_team_info.parallel_data), 1865 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1866 1, OMPT_GET_RETURN_ADDRESS(0)); 1867 } 1868 } 1869 } 1870 #endif 1871 1872 return rc; 1873 } 1874 1875 /*! 1876 @ingroup WORK_SHARING 1877 @param loc source location information 1878 @param global_tid global thread number 1879 1880 Mark the end of a <tt>single</tt> construct. This function should 1881 only be called by the thread that executed the block of code protected 1882 by the `single` construct. 1883 */ 1884 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1885 __kmp_assert_valid_gtid(global_tid); 1886 __kmp_exit_single(global_tid); 1887 KMP_POP_PARTITIONED_TIMER(); 1888 1889 #if OMPT_SUPPORT && OMPT_OPTIONAL 1890 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1891 kmp_team_t *team = this_thr->th.th_team; 1892 int tid = __kmp_tid_from_gtid(global_tid); 1893 1894 if (ompt_enabled.ompt_callback_work) { 1895 ompt_callbacks.ompt_callback(ompt_callback_work)( 1896 ompt_work_single_executor, ompt_scope_end, 1897 &(team->t.ompt_team_info.parallel_data), 1898 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1899 OMPT_GET_RETURN_ADDRESS(0)); 1900 } 1901 #endif 1902 } 1903 1904 /*! 1905 @ingroup WORK_SHARING 1906 @param loc Source location 1907 @param global_tid Global thread id 1908 1909 Mark the end of a statically scheduled loop. 1910 */ 1911 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1912 KMP_POP_PARTITIONED_TIMER(); 1913 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1914 1915 #if OMPT_SUPPORT && OMPT_OPTIONAL 1916 if (ompt_enabled.ompt_callback_work) { 1917 ompt_work_t ompt_work_type = ompt_work_loop; 1918 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1919 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1920 // Determine workshare type 1921 if (loc != NULL) { 1922 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1923 ompt_work_type = ompt_work_loop; 1924 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1925 ompt_work_type = ompt_work_sections; 1926 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1927 ompt_work_type = ompt_work_distribute; 1928 } else { 1929 // use default set above. 1930 // a warning about this case is provided in __kmpc_for_static_init 1931 } 1932 KMP_DEBUG_ASSERT(ompt_work_type); 1933 } 1934 ompt_callbacks.ompt_callback(ompt_callback_work)( 1935 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1936 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1937 } 1938 #endif 1939 if (__kmp_env_consistency_check) 1940 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1941 } 1942 1943 // User routines which take C-style arguments (call by value) 1944 // different from the Fortran equivalent routines 1945 1946 void ompc_set_num_threads(int arg) { 1947 // !!!!! TODO: check the per-task binding 1948 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1949 } 1950 1951 void ompc_set_dynamic(int flag) { 1952 kmp_info_t *thread; 1953 1954 /* For the thread-private implementation of the internal controls */ 1955 thread = __kmp_entry_thread(); 1956 1957 __kmp_save_internal_controls(thread); 1958 1959 set__dynamic(thread, flag ? true : false); 1960 } 1961 1962 void ompc_set_nested(int flag) { 1963 kmp_info_t *thread; 1964 1965 /* For the thread-private internal controls implementation */ 1966 thread = __kmp_entry_thread(); 1967 1968 __kmp_save_internal_controls(thread); 1969 1970 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1971 } 1972 1973 void ompc_set_max_active_levels(int max_active_levels) { 1974 /* TO DO */ 1975 /* we want per-task implementation of this internal control */ 1976 1977 /* For the per-thread internal controls implementation */ 1978 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1979 } 1980 1981 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1982 // !!!!! TODO: check the per-task binding 1983 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1984 } 1985 1986 int ompc_get_ancestor_thread_num(int level) { 1987 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1988 } 1989 1990 int ompc_get_team_size(int level) { 1991 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1992 } 1993 1994 /* OpenMP 5.0 Affinity Format API */ 1995 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) { 1996 if (!__kmp_init_serial) { 1997 __kmp_serial_initialize(); 1998 } 1999 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 2000 format, KMP_STRLEN(format) + 1); 2001 } 2002 2003 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) { 2004 size_t format_size; 2005 if (!__kmp_init_serial) { 2006 __kmp_serial_initialize(); 2007 } 2008 format_size = KMP_STRLEN(__kmp_affinity_format); 2009 if (buffer && size) { 2010 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 2011 format_size + 1); 2012 } 2013 return format_size; 2014 } 2015 2016 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) { 2017 int gtid; 2018 if (!TCR_4(__kmp_init_middle)) { 2019 __kmp_middle_initialize(); 2020 } 2021 __kmp_assign_root_init_mask(); 2022 gtid = __kmp_get_gtid(); 2023 __kmp_aux_display_affinity(gtid, format); 2024 } 2025 2026 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size, 2027 char const *format) { 2028 int gtid; 2029 size_t num_required; 2030 kmp_str_buf_t capture_buf; 2031 if (!TCR_4(__kmp_init_middle)) { 2032 __kmp_middle_initialize(); 2033 } 2034 __kmp_assign_root_init_mask(); 2035 gtid = __kmp_get_gtid(); 2036 __kmp_str_buf_init(&capture_buf); 2037 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 2038 if (buffer && buf_size) { 2039 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 2040 capture_buf.used + 1); 2041 } 2042 __kmp_str_buf_free(&capture_buf); 2043 return num_required; 2044 } 2045 2046 void kmpc_set_stacksize(int arg) { 2047 // __kmp_aux_set_stacksize initializes the library if needed 2048 __kmp_aux_set_stacksize(arg); 2049 } 2050 2051 void kmpc_set_stacksize_s(size_t arg) { 2052 // __kmp_aux_set_stacksize initializes the library if needed 2053 __kmp_aux_set_stacksize(arg); 2054 } 2055 2056 void kmpc_set_blocktime(int arg) { 2057 int gtid, tid; 2058 kmp_info_t *thread; 2059 2060 gtid = __kmp_entry_gtid(); 2061 tid = __kmp_tid_from_gtid(gtid); 2062 thread = __kmp_thread_from_gtid(gtid); 2063 2064 __kmp_aux_set_blocktime(arg, thread, tid); 2065 } 2066 2067 void kmpc_set_library(int arg) { 2068 // __kmp_user_set_library initializes the library if needed 2069 __kmp_user_set_library((enum library_type)arg); 2070 } 2071 2072 void kmpc_set_defaults(char const *str) { 2073 // __kmp_aux_set_defaults initializes the library if needed 2074 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 2075 } 2076 2077 void kmpc_set_disp_num_buffers(int arg) { 2078 // ignore after initialization because some teams have already 2079 // allocated dispatch buffers 2080 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF && 2081 arg <= KMP_MAX_DISP_NUM_BUFF) { 2082 __kmp_dispatch_num_buffers = arg; 2083 } 2084 } 2085 2086 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 2087 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2088 return -1; 2089 #else 2090 if (!TCR_4(__kmp_init_middle)) { 2091 __kmp_middle_initialize(); 2092 } 2093 __kmp_assign_root_init_mask(); 2094 return __kmp_aux_set_affinity_mask_proc(proc, mask); 2095 #endif 2096 } 2097 2098 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2099 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2100 return -1; 2101 #else 2102 if (!TCR_4(__kmp_init_middle)) { 2103 __kmp_middle_initialize(); 2104 } 2105 __kmp_assign_root_init_mask(); 2106 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2107 #endif 2108 } 2109 2110 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2111 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2112 return -1; 2113 #else 2114 if (!TCR_4(__kmp_init_middle)) { 2115 __kmp_middle_initialize(); 2116 } 2117 __kmp_assign_root_init_mask(); 2118 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2119 #endif 2120 } 2121 2122 /* -------------------------------------------------------------------------- */ 2123 /*! 2124 @ingroup THREADPRIVATE 2125 @param loc source location information 2126 @param gtid global thread number 2127 @param cpy_size size of the cpy_data buffer 2128 @param cpy_data pointer to data to be copied 2129 @param cpy_func helper function to call for copying data 2130 @param didit flag variable: 1=single thread; 0=not single thread 2131 2132 __kmpc_copyprivate implements the interface for the private data broadcast 2133 needed for the copyprivate clause associated with a single region in an 2134 OpenMP<sup>*</sup> program (both C and Fortran). 2135 All threads participating in the parallel region call this routine. 2136 One of the threads (called the single thread) should have the <tt>didit</tt> 2137 variable set to 1 and all other threads should have that variable set to 0. 2138 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2139 2140 The OpenMP specification forbids the use of nowait on the single region when a 2141 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2142 barrier internally to avoid race conditions, so the code generation for the 2143 single region should avoid generating a barrier after the call to @ref 2144 __kmpc_copyprivate. 2145 2146 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2147 The <tt>loc</tt> parameter is a pointer to source location information. 2148 2149 Internal implementation: The single thread will first copy its descriptor 2150 address (cpy_data) to a team-private location, then the other threads will each 2151 call the function pointed to by the parameter cpy_func, which carries out the 2152 copy by copying the data using the cpy_data buffer. 2153 2154 The cpy_func routine used for the copy and the contents of the data area defined 2155 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2156 to be done. For instance, the cpy_data buffer can hold the actual data to be 2157 copied or it may hold a list of pointers to the data. The cpy_func routine must 2158 interpret the cpy_data buffer appropriately. 2159 2160 The interface to cpy_func is as follows: 2161 @code 2162 void cpy_func( void *destination, void *source ) 2163 @endcode 2164 where void *destination is the cpy_data pointer for the thread being copied to 2165 and void *source is the cpy_data pointer for the thread being copied from. 2166 */ 2167 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2168 void *cpy_data, void (*cpy_func)(void *, void *), 2169 kmp_int32 didit) { 2170 void **data_ptr; 2171 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2172 __kmp_assert_valid_gtid(gtid); 2173 2174 KMP_MB(); 2175 2176 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2177 2178 if (__kmp_env_consistency_check) { 2179 if (loc == 0) { 2180 KMP_WARNING(ConstructIdentInvalid); 2181 } 2182 } 2183 2184 // ToDo: Optimize the following two barriers into some kind of split barrier 2185 2186 if (didit) 2187 *data_ptr = cpy_data; 2188 2189 #if OMPT_SUPPORT 2190 ompt_frame_t *ompt_frame; 2191 if (ompt_enabled.enabled) { 2192 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2193 if (ompt_frame->enter_frame.ptr == NULL) 2194 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2195 } 2196 OMPT_STORE_RETURN_ADDRESS(gtid); 2197 #endif 2198 /* This barrier is not a barrier region boundary */ 2199 #if USE_ITT_NOTIFY 2200 __kmp_threads[gtid]->th.th_ident = loc; 2201 #endif 2202 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2203 2204 if (!didit) 2205 (*cpy_func)(cpy_data, *data_ptr); 2206 2207 // Consider next barrier a user-visible barrier for barrier region boundaries 2208 // Nesting checks are already handled by the single construct checks 2209 { 2210 #if OMPT_SUPPORT 2211 OMPT_STORE_RETURN_ADDRESS(gtid); 2212 #endif 2213 #if USE_ITT_NOTIFY 2214 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2215 // tasks can overwrite the location) 2216 #endif 2217 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2218 #if OMPT_SUPPORT && OMPT_OPTIONAL 2219 if (ompt_enabled.enabled) { 2220 ompt_frame->enter_frame = ompt_data_none; 2221 } 2222 #endif 2223 } 2224 } 2225 2226 /* -------------------------------------------------------------------------- */ 2227 2228 #define INIT_LOCK __kmp_init_user_lock_with_checks 2229 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2230 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2231 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2232 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2233 #define ACQUIRE_NESTED_LOCK_TIMED \ 2234 __kmp_acquire_nested_user_lock_with_checks_timed 2235 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2236 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2237 #define TEST_LOCK __kmp_test_user_lock_with_checks 2238 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2239 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2240 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2241 2242 // TODO: Make check abort messages use location info & pass it into 2243 // with_checks routines 2244 2245 #if KMP_USE_DYNAMIC_LOCK 2246 2247 // internal lock initializer 2248 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2249 kmp_dyna_lockseq_t seq) { 2250 if (KMP_IS_D_LOCK(seq)) { 2251 KMP_INIT_D_LOCK(lock, seq); 2252 #if USE_ITT_BUILD 2253 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2254 #endif 2255 } else { 2256 KMP_INIT_I_LOCK(lock, seq); 2257 #if USE_ITT_BUILD 2258 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2259 __kmp_itt_lock_creating(ilk->lock, loc); 2260 #endif 2261 } 2262 } 2263 2264 // internal nest lock initializer 2265 static __forceinline void 2266 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2267 kmp_dyna_lockseq_t seq) { 2268 #if KMP_USE_TSX 2269 // Don't have nested lock implementation for speculative locks 2270 if (seq == lockseq_hle || seq == lockseq_rtm_queuing || 2271 seq == lockseq_rtm_spin || seq == lockseq_adaptive) 2272 seq = __kmp_user_lock_seq; 2273 #endif 2274 switch (seq) { 2275 case lockseq_tas: 2276 seq = lockseq_nested_tas; 2277 break; 2278 #if KMP_USE_FUTEX 2279 case lockseq_futex: 2280 seq = lockseq_nested_futex; 2281 break; 2282 #endif 2283 case lockseq_ticket: 2284 seq = lockseq_nested_ticket; 2285 break; 2286 case lockseq_queuing: 2287 seq = lockseq_nested_queuing; 2288 break; 2289 case lockseq_drdpa: 2290 seq = lockseq_nested_drdpa; 2291 break; 2292 default: 2293 seq = lockseq_nested_queuing; 2294 } 2295 KMP_INIT_I_LOCK(lock, seq); 2296 #if USE_ITT_BUILD 2297 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2298 __kmp_itt_lock_creating(ilk->lock, loc); 2299 #endif 2300 } 2301 2302 /* initialize the lock with a hint */ 2303 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2304 uintptr_t hint) { 2305 KMP_DEBUG_ASSERT(__kmp_init_serial); 2306 if (__kmp_env_consistency_check && user_lock == NULL) { 2307 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2308 } 2309 2310 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2311 2312 #if OMPT_SUPPORT && OMPT_OPTIONAL 2313 // This is the case, if called from omp_init_lock_with_hint: 2314 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2315 if (!codeptr) 2316 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2317 if (ompt_enabled.ompt_callback_lock_init) { 2318 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2319 ompt_mutex_lock, (omp_lock_hint_t)hint, 2320 __ompt_get_mutex_impl_type(user_lock), 2321 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2322 } 2323 #endif 2324 } 2325 2326 /* initialize the lock with a hint */ 2327 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2328 void **user_lock, uintptr_t hint) { 2329 KMP_DEBUG_ASSERT(__kmp_init_serial); 2330 if (__kmp_env_consistency_check && user_lock == NULL) { 2331 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2332 } 2333 2334 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2335 2336 #if OMPT_SUPPORT && OMPT_OPTIONAL 2337 // This is the case, if called from omp_init_lock_with_hint: 2338 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2339 if (!codeptr) 2340 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2341 if (ompt_enabled.ompt_callback_lock_init) { 2342 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2343 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2344 __ompt_get_mutex_impl_type(user_lock), 2345 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2346 } 2347 #endif 2348 } 2349 2350 #endif // KMP_USE_DYNAMIC_LOCK 2351 2352 /* initialize the lock */ 2353 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2354 #if KMP_USE_DYNAMIC_LOCK 2355 2356 KMP_DEBUG_ASSERT(__kmp_init_serial); 2357 if (__kmp_env_consistency_check && user_lock == NULL) { 2358 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2359 } 2360 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2361 2362 #if OMPT_SUPPORT && OMPT_OPTIONAL 2363 // This is the case, if called from omp_init_lock_with_hint: 2364 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2365 if (!codeptr) 2366 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2367 if (ompt_enabled.ompt_callback_lock_init) { 2368 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2369 ompt_mutex_lock, omp_lock_hint_none, 2370 __ompt_get_mutex_impl_type(user_lock), 2371 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2372 } 2373 #endif 2374 2375 #else // KMP_USE_DYNAMIC_LOCK 2376 2377 static char const *const func = "omp_init_lock"; 2378 kmp_user_lock_p lck; 2379 KMP_DEBUG_ASSERT(__kmp_init_serial); 2380 2381 if (__kmp_env_consistency_check) { 2382 if (user_lock == NULL) { 2383 KMP_FATAL(LockIsUninitialized, func); 2384 } 2385 } 2386 2387 KMP_CHECK_USER_LOCK_INIT(); 2388 2389 if ((__kmp_user_lock_kind == lk_tas) && 2390 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2391 lck = (kmp_user_lock_p)user_lock; 2392 } 2393 #if KMP_USE_FUTEX 2394 else if ((__kmp_user_lock_kind == lk_futex) && 2395 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2396 lck = (kmp_user_lock_p)user_lock; 2397 } 2398 #endif 2399 else { 2400 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2401 } 2402 INIT_LOCK(lck); 2403 __kmp_set_user_lock_location(lck, loc); 2404 2405 #if OMPT_SUPPORT && OMPT_OPTIONAL 2406 // This is the case, if called from omp_init_lock_with_hint: 2407 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2408 if (!codeptr) 2409 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2410 if (ompt_enabled.ompt_callback_lock_init) { 2411 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2412 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2413 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2414 } 2415 #endif 2416 2417 #if USE_ITT_BUILD 2418 __kmp_itt_lock_creating(lck); 2419 #endif /* USE_ITT_BUILD */ 2420 2421 #endif // KMP_USE_DYNAMIC_LOCK 2422 } // __kmpc_init_lock 2423 2424 /* initialize the lock */ 2425 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2426 #if KMP_USE_DYNAMIC_LOCK 2427 2428 KMP_DEBUG_ASSERT(__kmp_init_serial); 2429 if (__kmp_env_consistency_check && user_lock == NULL) { 2430 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2431 } 2432 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2433 2434 #if OMPT_SUPPORT && OMPT_OPTIONAL 2435 // This is the case, if called from omp_init_lock_with_hint: 2436 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2437 if (!codeptr) 2438 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2439 if (ompt_enabled.ompt_callback_lock_init) { 2440 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2441 ompt_mutex_nest_lock, omp_lock_hint_none, 2442 __ompt_get_mutex_impl_type(user_lock), 2443 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2444 } 2445 #endif 2446 2447 #else // KMP_USE_DYNAMIC_LOCK 2448 2449 static char const *const func = "omp_init_nest_lock"; 2450 kmp_user_lock_p lck; 2451 KMP_DEBUG_ASSERT(__kmp_init_serial); 2452 2453 if (__kmp_env_consistency_check) { 2454 if (user_lock == NULL) { 2455 KMP_FATAL(LockIsUninitialized, func); 2456 } 2457 } 2458 2459 KMP_CHECK_USER_LOCK_INIT(); 2460 2461 if ((__kmp_user_lock_kind == lk_tas) && 2462 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2463 OMP_NEST_LOCK_T_SIZE)) { 2464 lck = (kmp_user_lock_p)user_lock; 2465 } 2466 #if KMP_USE_FUTEX 2467 else if ((__kmp_user_lock_kind == lk_futex) && 2468 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2469 OMP_NEST_LOCK_T_SIZE)) { 2470 lck = (kmp_user_lock_p)user_lock; 2471 } 2472 #endif 2473 else { 2474 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2475 } 2476 2477 INIT_NESTED_LOCK(lck); 2478 __kmp_set_user_lock_location(lck, loc); 2479 2480 #if OMPT_SUPPORT && OMPT_OPTIONAL 2481 // This is the case, if called from omp_init_lock_with_hint: 2482 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2483 if (!codeptr) 2484 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2485 if (ompt_enabled.ompt_callback_lock_init) { 2486 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2487 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2488 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2489 } 2490 #endif 2491 2492 #if USE_ITT_BUILD 2493 __kmp_itt_lock_creating(lck); 2494 #endif /* USE_ITT_BUILD */ 2495 2496 #endif // KMP_USE_DYNAMIC_LOCK 2497 } // __kmpc_init_nest_lock 2498 2499 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2500 #if KMP_USE_DYNAMIC_LOCK 2501 2502 #if USE_ITT_BUILD 2503 kmp_user_lock_p lck; 2504 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2505 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2506 } else { 2507 lck = (kmp_user_lock_p)user_lock; 2508 } 2509 __kmp_itt_lock_destroyed(lck); 2510 #endif 2511 #if OMPT_SUPPORT && OMPT_OPTIONAL 2512 // This is the case, if called from omp_init_lock_with_hint: 2513 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2514 if (!codeptr) 2515 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2516 if (ompt_enabled.ompt_callback_lock_destroy) { 2517 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2518 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2519 } 2520 #endif 2521 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2522 #else 2523 kmp_user_lock_p lck; 2524 2525 if ((__kmp_user_lock_kind == lk_tas) && 2526 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2527 lck = (kmp_user_lock_p)user_lock; 2528 } 2529 #if KMP_USE_FUTEX 2530 else if ((__kmp_user_lock_kind == lk_futex) && 2531 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2532 lck = (kmp_user_lock_p)user_lock; 2533 } 2534 #endif 2535 else { 2536 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2537 } 2538 2539 #if OMPT_SUPPORT && OMPT_OPTIONAL 2540 // This is the case, if called from omp_init_lock_with_hint: 2541 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2542 if (!codeptr) 2543 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2544 if (ompt_enabled.ompt_callback_lock_destroy) { 2545 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2546 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2547 } 2548 #endif 2549 2550 #if USE_ITT_BUILD 2551 __kmp_itt_lock_destroyed(lck); 2552 #endif /* USE_ITT_BUILD */ 2553 DESTROY_LOCK(lck); 2554 2555 if ((__kmp_user_lock_kind == lk_tas) && 2556 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2557 ; 2558 } 2559 #if KMP_USE_FUTEX 2560 else if ((__kmp_user_lock_kind == lk_futex) && 2561 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2562 ; 2563 } 2564 #endif 2565 else { 2566 __kmp_user_lock_free(user_lock, gtid, lck); 2567 } 2568 #endif // KMP_USE_DYNAMIC_LOCK 2569 } // __kmpc_destroy_lock 2570 2571 /* destroy the lock */ 2572 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2573 #if KMP_USE_DYNAMIC_LOCK 2574 2575 #if USE_ITT_BUILD 2576 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2577 __kmp_itt_lock_destroyed(ilk->lock); 2578 #endif 2579 #if OMPT_SUPPORT && OMPT_OPTIONAL 2580 // This is the case, if called from omp_init_lock_with_hint: 2581 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2582 if (!codeptr) 2583 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2584 if (ompt_enabled.ompt_callback_lock_destroy) { 2585 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2586 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2587 } 2588 #endif 2589 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2590 2591 #else // KMP_USE_DYNAMIC_LOCK 2592 2593 kmp_user_lock_p lck; 2594 2595 if ((__kmp_user_lock_kind == lk_tas) && 2596 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2597 OMP_NEST_LOCK_T_SIZE)) { 2598 lck = (kmp_user_lock_p)user_lock; 2599 } 2600 #if KMP_USE_FUTEX 2601 else if ((__kmp_user_lock_kind == lk_futex) && 2602 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2603 OMP_NEST_LOCK_T_SIZE)) { 2604 lck = (kmp_user_lock_p)user_lock; 2605 } 2606 #endif 2607 else { 2608 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2609 } 2610 2611 #if OMPT_SUPPORT && OMPT_OPTIONAL 2612 // This is the case, if called from omp_init_lock_with_hint: 2613 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2614 if (!codeptr) 2615 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2616 if (ompt_enabled.ompt_callback_lock_destroy) { 2617 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2618 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2619 } 2620 #endif 2621 2622 #if USE_ITT_BUILD 2623 __kmp_itt_lock_destroyed(lck); 2624 #endif /* USE_ITT_BUILD */ 2625 2626 DESTROY_NESTED_LOCK(lck); 2627 2628 if ((__kmp_user_lock_kind == lk_tas) && 2629 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2630 OMP_NEST_LOCK_T_SIZE)) { 2631 ; 2632 } 2633 #if KMP_USE_FUTEX 2634 else if ((__kmp_user_lock_kind == lk_futex) && 2635 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2636 OMP_NEST_LOCK_T_SIZE)) { 2637 ; 2638 } 2639 #endif 2640 else { 2641 __kmp_user_lock_free(user_lock, gtid, lck); 2642 } 2643 #endif // KMP_USE_DYNAMIC_LOCK 2644 } // __kmpc_destroy_nest_lock 2645 2646 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2647 KMP_COUNT_BLOCK(OMP_set_lock); 2648 #if KMP_USE_DYNAMIC_LOCK 2649 int tag = KMP_EXTRACT_D_TAG(user_lock); 2650 #if USE_ITT_BUILD 2651 __kmp_itt_lock_acquiring( 2652 (kmp_user_lock_p) 2653 user_lock); // itt function will get to the right lock object. 2654 #endif 2655 #if OMPT_SUPPORT && OMPT_OPTIONAL 2656 // This is the case, if called from omp_init_lock_with_hint: 2657 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2658 if (!codeptr) 2659 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2660 if (ompt_enabled.ompt_callback_mutex_acquire) { 2661 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2662 ompt_mutex_lock, omp_lock_hint_none, 2663 __ompt_get_mutex_impl_type(user_lock), 2664 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2665 } 2666 #endif 2667 #if KMP_USE_INLINED_TAS 2668 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2669 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2670 } else 2671 #elif KMP_USE_INLINED_FUTEX 2672 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2673 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2674 } else 2675 #endif 2676 { 2677 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2678 } 2679 #if USE_ITT_BUILD 2680 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2681 #endif 2682 #if OMPT_SUPPORT && OMPT_OPTIONAL 2683 if (ompt_enabled.ompt_callback_mutex_acquired) { 2684 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2685 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2686 } 2687 #endif 2688 2689 #else // KMP_USE_DYNAMIC_LOCK 2690 2691 kmp_user_lock_p lck; 2692 2693 if ((__kmp_user_lock_kind == lk_tas) && 2694 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2695 lck = (kmp_user_lock_p)user_lock; 2696 } 2697 #if KMP_USE_FUTEX 2698 else if ((__kmp_user_lock_kind == lk_futex) && 2699 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2700 lck = (kmp_user_lock_p)user_lock; 2701 } 2702 #endif 2703 else { 2704 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2705 } 2706 2707 #if USE_ITT_BUILD 2708 __kmp_itt_lock_acquiring(lck); 2709 #endif /* USE_ITT_BUILD */ 2710 #if OMPT_SUPPORT && OMPT_OPTIONAL 2711 // This is the case, if called from omp_init_lock_with_hint: 2712 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2713 if (!codeptr) 2714 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2715 if (ompt_enabled.ompt_callback_mutex_acquire) { 2716 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2717 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2718 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2719 } 2720 #endif 2721 2722 ACQUIRE_LOCK(lck, gtid); 2723 2724 #if USE_ITT_BUILD 2725 __kmp_itt_lock_acquired(lck); 2726 #endif /* USE_ITT_BUILD */ 2727 2728 #if OMPT_SUPPORT && OMPT_OPTIONAL 2729 if (ompt_enabled.ompt_callback_mutex_acquired) { 2730 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2731 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2732 } 2733 #endif 2734 2735 #endif // KMP_USE_DYNAMIC_LOCK 2736 } 2737 2738 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2739 #if KMP_USE_DYNAMIC_LOCK 2740 2741 #if USE_ITT_BUILD 2742 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2743 #endif 2744 #if OMPT_SUPPORT && OMPT_OPTIONAL 2745 // This is the case, if called from omp_init_lock_with_hint: 2746 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2747 if (!codeptr) 2748 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2749 if (ompt_enabled.enabled) { 2750 if (ompt_enabled.ompt_callback_mutex_acquire) { 2751 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2752 ompt_mutex_nest_lock, omp_lock_hint_none, 2753 __ompt_get_mutex_impl_type(user_lock), 2754 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2755 } 2756 } 2757 #endif 2758 int acquire_status = 2759 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2760 (void)acquire_status; 2761 #if USE_ITT_BUILD 2762 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2763 #endif 2764 2765 #if OMPT_SUPPORT && OMPT_OPTIONAL 2766 if (ompt_enabled.enabled) { 2767 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2768 if (ompt_enabled.ompt_callback_mutex_acquired) { 2769 // lock_first 2770 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2771 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2772 codeptr); 2773 } 2774 } else { 2775 if (ompt_enabled.ompt_callback_nest_lock) { 2776 // lock_next 2777 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2778 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2779 } 2780 } 2781 } 2782 #endif 2783 2784 #else // KMP_USE_DYNAMIC_LOCK 2785 int acquire_status; 2786 kmp_user_lock_p lck; 2787 2788 if ((__kmp_user_lock_kind == lk_tas) && 2789 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2790 OMP_NEST_LOCK_T_SIZE)) { 2791 lck = (kmp_user_lock_p)user_lock; 2792 } 2793 #if KMP_USE_FUTEX 2794 else if ((__kmp_user_lock_kind == lk_futex) && 2795 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2796 OMP_NEST_LOCK_T_SIZE)) { 2797 lck = (kmp_user_lock_p)user_lock; 2798 } 2799 #endif 2800 else { 2801 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2802 } 2803 2804 #if USE_ITT_BUILD 2805 __kmp_itt_lock_acquiring(lck); 2806 #endif /* USE_ITT_BUILD */ 2807 #if OMPT_SUPPORT && OMPT_OPTIONAL 2808 // This is the case, if called from omp_init_lock_with_hint: 2809 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2810 if (!codeptr) 2811 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2812 if (ompt_enabled.enabled) { 2813 if (ompt_enabled.ompt_callback_mutex_acquire) { 2814 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2815 ompt_mutex_nest_lock, omp_lock_hint_none, 2816 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2817 codeptr); 2818 } 2819 } 2820 #endif 2821 2822 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2823 2824 #if USE_ITT_BUILD 2825 __kmp_itt_lock_acquired(lck); 2826 #endif /* USE_ITT_BUILD */ 2827 2828 #if OMPT_SUPPORT && OMPT_OPTIONAL 2829 if (ompt_enabled.enabled) { 2830 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2831 if (ompt_enabled.ompt_callback_mutex_acquired) { 2832 // lock_first 2833 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2834 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2835 } 2836 } else { 2837 if (ompt_enabled.ompt_callback_nest_lock) { 2838 // lock_next 2839 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2840 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2841 } 2842 } 2843 } 2844 #endif 2845 2846 #endif // KMP_USE_DYNAMIC_LOCK 2847 } 2848 2849 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2850 #if KMP_USE_DYNAMIC_LOCK 2851 2852 int tag = KMP_EXTRACT_D_TAG(user_lock); 2853 #if USE_ITT_BUILD 2854 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2855 #endif 2856 #if KMP_USE_INLINED_TAS 2857 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2858 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2859 } else 2860 #elif KMP_USE_INLINED_FUTEX 2861 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2862 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2863 } else 2864 #endif 2865 { 2866 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2867 } 2868 2869 #if OMPT_SUPPORT && OMPT_OPTIONAL 2870 // This is the case, if called from omp_init_lock_with_hint: 2871 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2872 if (!codeptr) 2873 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2874 if (ompt_enabled.ompt_callback_mutex_released) { 2875 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2876 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2877 } 2878 #endif 2879 2880 #else // KMP_USE_DYNAMIC_LOCK 2881 2882 kmp_user_lock_p lck; 2883 2884 /* Can't use serial interval since not block structured */ 2885 /* release the lock */ 2886 2887 if ((__kmp_user_lock_kind == lk_tas) && 2888 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2889 #if KMP_OS_LINUX && \ 2890 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2891 // "fast" path implemented to fix customer performance issue 2892 #if USE_ITT_BUILD 2893 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2894 #endif /* USE_ITT_BUILD */ 2895 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2896 KMP_MB(); 2897 2898 #if OMPT_SUPPORT && OMPT_OPTIONAL 2899 // This is the case, if called from omp_init_lock_with_hint: 2900 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2901 if (!codeptr) 2902 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2903 if (ompt_enabled.ompt_callback_mutex_released) { 2904 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2905 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2906 } 2907 #endif 2908 2909 return; 2910 #else 2911 lck = (kmp_user_lock_p)user_lock; 2912 #endif 2913 } 2914 #if KMP_USE_FUTEX 2915 else if ((__kmp_user_lock_kind == lk_futex) && 2916 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2917 lck = (kmp_user_lock_p)user_lock; 2918 } 2919 #endif 2920 else { 2921 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2922 } 2923 2924 #if USE_ITT_BUILD 2925 __kmp_itt_lock_releasing(lck); 2926 #endif /* USE_ITT_BUILD */ 2927 2928 RELEASE_LOCK(lck, gtid); 2929 2930 #if OMPT_SUPPORT && OMPT_OPTIONAL 2931 // This is the case, if called from omp_init_lock_with_hint: 2932 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2933 if (!codeptr) 2934 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2935 if (ompt_enabled.ompt_callback_mutex_released) { 2936 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2937 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2938 } 2939 #endif 2940 2941 #endif // KMP_USE_DYNAMIC_LOCK 2942 } 2943 2944 /* release the lock */ 2945 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2946 #if KMP_USE_DYNAMIC_LOCK 2947 2948 #if USE_ITT_BUILD 2949 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2950 #endif 2951 int release_status = 2952 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2953 (void)release_status; 2954 2955 #if OMPT_SUPPORT && OMPT_OPTIONAL 2956 // This is the case, if called from omp_init_lock_with_hint: 2957 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2958 if (!codeptr) 2959 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2960 if (ompt_enabled.enabled) { 2961 if (release_status == KMP_LOCK_RELEASED) { 2962 if (ompt_enabled.ompt_callback_mutex_released) { 2963 // release_lock_last 2964 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2965 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2966 codeptr); 2967 } 2968 } else if (ompt_enabled.ompt_callback_nest_lock) { 2969 // release_lock_prev 2970 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2971 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2972 } 2973 } 2974 #endif 2975 2976 #else // KMP_USE_DYNAMIC_LOCK 2977 2978 kmp_user_lock_p lck; 2979 2980 /* Can't use serial interval since not block structured */ 2981 2982 if ((__kmp_user_lock_kind == lk_tas) && 2983 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2984 OMP_NEST_LOCK_T_SIZE)) { 2985 #if KMP_OS_LINUX && \ 2986 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2987 // "fast" path implemented to fix customer performance issue 2988 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2989 #if USE_ITT_BUILD 2990 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2991 #endif /* USE_ITT_BUILD */ 2992 2993 #if OMPT_SUPPORT && OMPT_OPTIONAL 2994 int release_status = KMP_LOCK_STILL_HELD; 2995 #endif 2996 2997 if (--(tl->lk.depth_locked) == 0) { 2998 TCW_4(tl->lk.poll, 0); 2999 #if OMPT_SUPPORT && OMPT_OPTIONAL 3000 release_status = KMP_LOCK_RELEASED; 3001 #endif 3002 } 3003 KMP_MB(); 3004 3005 #if OMPT_SUPPORT && OMPT_OPTIONAL 3006 // This is the case, if called from omp_init_lock_with_hint: 3007 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3008 if (!codeptr) 3009 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3010 if (ompt_enabled.enabled) { 3011 if (release_status == KMP_LOCK_RELEASED) { 3012 if (ompt_enabled.ompt_callback_mutex_released) { 3013 // release_lock_last 3014 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3015 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3016 } 3017 } else if (ompt_enabled.ompt_callback_nest_lock) { 3018 // release_lock_previous 3019 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3020 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3021 } 3022 } 3023 #endif 3024 3025 return; 3026 #else 3027 lck = (kmp_user_lock_p)user_lock; 3028 #endif 3029 } 3030 #if KMP_USE_FUTEX 3031 else if ((__kmp_user_lock_kind == lk_futex) && 3032 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3033 OMP_NEST_LOCK_T_SIZE)) { 3034 lck = (kmp_user_lock_p)user_lock; 3035 } 3036 #endif 3037 else { 3038 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 3039 } 3040 3041 #if USE_ITT_BUILD 3042 __kmp_itt_lock_releasing(lck); 3043 #endif /* USE_ITT_BUILD */ 3044 3045 int release_status; 3046 release_status = RELEASE_NESTED_LOCK(lck, gtid); 3047 #if OMPT_SUPPORT && OMPT_OPTIONAL 3048 // This is the case, if called from omp_init_lock_with_hint: 3049 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3050 if (!codeptr) 3051 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3052 if (ompt_enabled.enabled) { 3053 if (release_status == KMP_LOCK_RELEASED) { 3054 if (ompt_enabled.ompt_callback_mutex_released) { 3055 // release_lock_last 3056 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3057 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3058 } 3059 } else if (ompt_enabled.ompt_callback_nest_lock) { 3060 // release_lock_previous 3061 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3062 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3063 } 3064 } 3065 #endif 3066 3067 #endif // KMP_USE_DYNAMIC_LOCK 3068 } 3069 3070 /* try to acquire the lock */ 3071 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3072 KMP_COUNT_BLOCK(OMP_test_lock); 3073 3074 #if KMP_USE_DYNAMIC_LOCK 3075 int rc; 3076 int tag = KMP_EXTRACT_D_TAG(user_lock); 3077 #if USE_ITT_BUILD 3078 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3079 #endif 3080 #if OMPT_SUPPORT && OMPT_OPTIONAL 3081 // This is the case, if called from omp_init_lock_with_hint: 3082 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3083 if (!codeptr) 3084 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3085 if (ompt_enabled.ompt_callback_mutex_acquire) { 3086 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3087 ompt_mutex_lock, omp_lock_hint_none, 3088 __ompt_get_mutex_impl_type(user_lock), 3089 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3090 } 3091 #endif 3092 #if KMP_USE_INLINED_TAS 3093 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3094 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3095 } else 3096 #elif KMP_USE_INLINED_FUTEX 3097 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3098 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3099 } else 3100 #endif 3101 { 3102 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3103 } 3104 if (rc) { 3105 #if USE_ITT_BUILD 3106 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3107 #endif 3108 #if OMPT_SUPPORT && OMPT_OPTIONAL 3109 if (ompt_enabled.ompt_callback_mutex_acquired) { 3110 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3111 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3112 } 3113 #endif 3114 return FTN_TRUE; 3115 } else { 3116 #if USE_ITT_BUILD 3117 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3118 #endif 3119 return FTN_FALSE; 3120 } 3121 3122 #else // KMP_USE_DYNAMIC_LOCK 3123 3124 kmp_user_lock_p lck; 3125 int rc; 3126 3127 if ((__kmp_user_lock_kind == lk_tas) && 3128 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3129 lck = (kmp_user_lock_p)user_lock; 3130 } 3131 #if KMP_USE_FUTEX 3132 else if ((__kmp_user_lock_kind == lk_futex) && 3133 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3134 lck = (kmp_user_lock_p)user_lock; 3135 } 3136 #endif 3137 else { 3138 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3139 } 3140 3141 #if USE_ITT_BUILD 3142 __kmp_itt_lock_acquiring(lck); 3143 #endif /* USE_ITT_BUILD */ 3144 #if OMPT_SUPPORT && OMPT_OPTIONAL 3145 // This is the case, if called from omp_init_lock_with_hint: 3146 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3147 if (!codeptr) 3148 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3149 if (ompt_enabled.ompt_callback_mutex_acquire) { 3150 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3151 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3152 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3153 } 3154 #endif 3155 3156 rc = TEST_LOCK(lck, gtid); 3157 #if USE_ITT_BUILD 3158 if (rc) { 3159 __kmp_itt_lock_acquired(lck); 3160 } else { 3161 __kmp_itt_lock_cancelled(lck); 3162 } 3163 #endif /* USE_ITT_BUILD */ 3164 #if OMPT_SUPPORT && OMPT_OPTIONAL 3165 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3166 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3167 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3168 } 3169 #endif 3170 3171 return (rc ? FTN_TRUE : FTN_FALSE); 3172 3173 /* Can't use serial interval since not block structured */ 3174 3175 #endif // KMP_USE_DYNAMIC_LOCK 3176 } 3177 3178 /* try to acquire the lock */ 3179 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3180 #if KMP_USE_DYNAMIC_LOCK 3181 int rc; 3182 #if USE_ITT_BUILD 3183 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3184 #endif 3185 #if OMPT_SUPPORT && OMPT_OPTIONAL 3186 // This is the case, if called from omp_init_lock_with_hint: 3187 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3188 if (!codeptr) 3189 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3190 if (ompt_enabled.ompt_callback_mutex_acquire) { 3191 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3192 ompt_mutex_nest_lock, omp_lock_hint_none, 3193 __ompt_get_mutex_impl_type(user_lock), 3194 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3195 } 3196 #endif 3197 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3198 #if USE_ITT_BUILD 3199 if (rc) { 3200 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3201 } else { 3202 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3203 } 3204 #endif 3205 #if OMPT_SUPPORT && OMPT_OPTIONAL 3206 if (ompt_enabled.enabled && rc) { 3207 if (rc == 1) { 3208 if (ompt_enabled.ompt_callback_mutex_acquired) { 3209 // lock_first 3210 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3211 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3212 codeptr); 3213 } 3214 } else { 3215 if (ompt_enabled.ompt_callback_nest_lock) { 3216 // lock_next 3217 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3218 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3219 } 3220 } 3221 } 3222 #endif 3223 return rc; 3224 3225 #else // KMP_USE_DYNAMIC_LOCK 3226 3227 kmp_user_lock_p lck; 3228 int rc; 3229 3230 if ((__kmp_user_lock_kind == lk_tas) && 3231 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3232 OMP_NEST_LOCK_T_SIZE)) { 3233 lck = (kmp_user_lock_p)user_lock; 3234 } 3235 #if KMP_USE_FUTEX 3236 else if ((__kmp_user_lock_kind == lk_futex) && 3237 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3238 OMP_NEST_LOCK_T_SIZE)) { 3239 lck = (kmp_user_lock_p)user_lock; 3240 } 3241 #endif 3242 else { 3243 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3244 } 3245 3246 #if USE_ITT_BUILD 3247 __kmp_itt_lock_acquiring(lck); 3248 #endif /* USE_ITT_BUILD */ 3249 3250 #if OMPT_SUPPORT && OMPT_OPTIONAL 3251 // This is the case, if called from omp_init_lock_with_hint: 3252 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3253 if (!codeptr) 3254 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3255 if (ompt_enabled.enabled) && 3256 ompt_enabled.ompt_callback_mutex_acquire) { 3257 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3258 ompt_mutex_nest_lock, omp_lock_hint_none, 3259 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3260 codeptr); 3261 } 3262 #endif 3263 3264 rc = TEST_NESTED_LOCK(lck, gtid); 3265 #if USE_ITT_BUILD 3266 if (rc) { 3267 __kmp_itt_lock_acquired(lck); 3268 } else { 3269 __kmp_itt_lock_cancelled(lck); 3270 } 3271 #endif /* USE_ITT_BUILD */ 3272 #if OMPT_SUPPORT && OMPT_OPTIONAL 3273 if (ompt_enabled.enabled && rc) { 3274 if (rc == 1) { 3275 if (ompt_enabled.ompt_callback_mutex_acquired) { 3276 // lock_first 3277 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3278 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3279 } 3280 } else { 3281 if (ompt_enabled.ompt_callback_nest_lock) { 3282 // lock_next 3283 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3284 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3285 } 3286 } 3287 } 3288 #endif 3289 return rc; 3290 3291 /* Can't use serial interval since not block structured */ 3292 3293 #endif // KMP_USE_DYNAMIC_LOCK 3294 } 3295 3296 // Interface to fast scalable reduce methods routines 3297 3298 // keep the selected method in a thread local structure for cross-function 3299 // usage: will be used in __kmpc_end_reduce* functions; 3300 // another solution: to re-determine the method one more time in 3301 // __kmpc_end_reduce* functions (new prototype required then) 3302 // AT: which solution is better? 3303 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3304 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3305 3306 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3307 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3308 3309 // description of the packed_reduction_method variable: look at the macros in 3310 // kmp.h 3311 3312 // used in a critical section reduce block 3313 static __forceinline void 3314 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3315 kmp_critical_name *crit) { 3316 3317 // this lock was visible to a customer and to the threading profile tool as a 3318 // serial overhead span (although it's used for an internal purpose only) 3319 // why was it visible in previous implementation? 3320 // should we keep it visible in new reduce block? 3321 kmp_user_lock_p lck; 3322 3323 #if KMP_USE_DYNAMIC_LOCK 3324 3325 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3326 // Check if it is initialized. 3327 if (*lk == 0) { 3328 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3329 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3330 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3331 } else { 3332 __kmp_init_indirect_csptr(crit, loc, global_tid, 3333 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3334 } 3335 } 3336 // Branch for accessing the actual lock object and set operation. This 3337 // branching is inevitable since this lock initialization does not follow the 3338 // normal dispatch path (lock table is not used). 3339 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3340 lck = (kmp_user_lock_p)lk; 3341 KMP_DEBUG_ASSERT(lck != NULL); 3342 if (__kmp_env_consistency_check) { 3343 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3344 } 3345 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3346 } else { 3347 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3348 lck = ilk->lock; 3349 KMP_DEBUG_ASSERT(lck != NULL); 3350 if (__kmp_env_consistency_check) { 3351 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3352 } 3353 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3354 } 3355 3356 #else // KMP_USE_DYNAMIC_LOCK 3357 3358 // We know that the fast reduction code is only emitted by Intel compilers 3359 // with 32 byte critical sections. If there isn't enough space, then we 3360 // have to use a pointer. 3361 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3362 lck = (kmp_user_lock_p)crit; 3363 } else { 3364 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3365 } 3366 KMP_DEBUG_ASSERT(lck != NULL); 3367 3368 if (__kmp_env_consistency_check) 3369 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3370 3371 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3372 3373 #endif // KMP_USE_DYNAMIC_LOCK 3374 } 3375 3376 // used in a critical section reduce block 3377 static __forceinline void 3378 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3379 kmp_critical_name *crit) { 3380 3381 kmp_user_lock_p lck; 3382 3383 #if KMP_USE_DYNAMIC_LOCK 3384 3385 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3386 lck = (kmp_user_lock_p)crit; 3387 if (__kmp_env_consistency_check) 3388 __kmp_pop_sync(global_tid, ct_critical, loc); 3389 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3390 } else { 3391 kmp_indirect_lock_t *ilk = 3392 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3393 if (__kmp_env_consistency_check) 3394 __kmp_pop_sync(global_tid, ct_critical, loc); 3395 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3396 } 3397 3398 #else // KMP_USE_DYNAMIC_LOCK 3399 3400 // We know that the fast reduction code is only emitted by Intel compilers 3401 // with 32 byte critical sections. If there isn't enough space, then we have 3402 // to use a pointer. 3403 if (__kmp_base_user_lock_size > 32) { 3404 lck = *((kmp_user_lock_p *)crit); 3405 KMP_ASSERT(lck != NULL); 3406 } else { 3407 lck = (kmp_user_lock_p)crit; 3408 } 3409 3410 if (__kmp_env_consistency_check) 3411 __kmp_pop_sync(global_tid, ct_critical, loc); 3412 3413 __kmp_release_user_lock_with_checks(lck, global_tid); 3414 3415 #endif // KMP_USE_DYNAMIC_LOCK 3416 } // __kmp_end_critical_section_reduce_block 3417 3418 static __forceinline int 3419 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3420 int *task_state) { 3421 kmp_team_t *team; 3422 3423 // Check if we are inside the teams construct? 3424 if (th->th.th_teams_microtask) { 3425 *team_p = team = th->th.th_team; 3426 if (team->t.t_level == th->th.th_teams_level) { 3427 // This is reduction at teams construct. 3428 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3429 // Let's swap teams temporarily for the reduction. 3430 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3431 th->th.th_team = team->t.t_parent; 3432 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3433 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3434 *task_state = th->th.th_task_state; 3435 th->th.th_task_state = 0; 3436 3437 return 1; 3438 } 3439 } 3440 return 0; 3441 } 3442 3443 static __forceinline void 3444 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3445 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3446 th->th.th_info.ds.ds_tid = 0; 3447 th->th.th_team = team; 3448 th->th.th_team_nproc = team->t.t_nproc; 3449 th->th.th_task_team = team->t.t_task_team[task_state]; 3450 __kmp_type_convert(task_state, &(th->th.th_task_state)); 3451 } 3452 3453 /* 2.a.i. Reduce Block without a terminating barrier */ 3454 /*! 3455 @ingroup SYNCHRONIZATION 3456 @param loc source location information 3457 @param global_tid global thread number 3458 @param num_vars number of items (variables) to be reduced 3459 @param reduce_size size of data in bytes to be reduced 3460 @param reduce_data pointer to data to be reduced 3461 @param reduce_func callback function providing reduction operation on two 3462 operands and returning result of reduction in lhs_data 3463 @param lck pointer to the unique lock data structure 3464 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3465 threads if atomic reduction needed 3466 3467 The nowait version is used for a reduce clause with the nowait argument. 3468 */ 3469 kmp_int32 3470 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3471 size_t reduce_size, void *reduce_data, 3472 void (*reduce_func)(void *lhs_data, void *rhs_data), 3473 kmp_critical_name *lck) { 3474 3475 KMP_COUNT_BLOCK(REDUCE_nowait); 3476 int retval = 0; 3477 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3478 kmp_info_t *th; 3479 kmp_team_t *team; 3480 int teams_swapped = 0, task_state; 3481 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3482 __kmp_assert_valid_gtid(global_tid); 3483 3484 // why do we need this initialization here at all? 3485 // Reduction clause can not be used as a stand-alone directive. 3486 3487 // do not call __kmp_serial_initialize(), it will be called by 3488 // __kmp_parallel_initialize() if needed 3489 // possible detection of false-positive race by the threadchecker ??? 3490 if (!TCR_4(__kmp_init_parallel)) 3491 __kmp_parallel_initialize(); 3492 3493 __kmp_resume_if_soft_paused(); 3494 3495 // check correctness of reduce block nesting 3496 #if KMP_USE_DYNAMIC_LOCK 3497 if (__kmp_env_consistency_check) 3498 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3499 #else 3500 if (__kmp_env_consistency_check) 3501 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3502 #endif 3503 3504 th = __kmp_thread_from_gtid(global_tid); 3505 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3506 3507 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3508 // the value should be kept in a variable 3509 // the variable should be either a construct-specific or thread-specific 3510 // property, not a team specific property 3511 // (a thread can reach the next reduce block on the next construct, reduce 3512 // method may differ on the next construct) 3513 // an ident_t "loc" parameter could be used as a construct-specific property 3514 // (what if loc == 0?) 3515 // (if both construct-specific and team-specific variables were shared, 3516 // then unness extra syncs should be needed) 3517 // a thread-specific variable is better regarding two issues above (next 3518 // construct and extra syncs) 3519 // a thread-specific "th_local.reduction_method" variable is used currently 3520 // each thread executes 'determine' and 'set' lines (no need to execute by one 3521 // thread, to avoid unness extra syncs) 3522 3523 packed_reduction_method = __kmp_determine_reduction_method( 3524 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3525 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3526 3527 OMPT_REDUCTION_DECL(th, global_tid); 3528 if (packed_reduction_method == critical_reduce_block) { 3529 3530 OMPT_REDUCTION_BEGIN; 3531 3532 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3533 retval = 1; 3534 3535 } else if (packed_reduction_method == empty_reduce_block) { 3536 3537 OMPT_REDUCTION_BEGIN; 3538 3539 // usage: if team size == 1, no synchronization is required ( Intel 3540 // platforms only ) 3541 retval = 1; 3542 3543 } else if (packed_reduction_method == atomic_reduce_block) { 3544 3545 retval = 2; 3546 3547 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3548 // won't be called by the code gen) 3549 // (it's not quite good, because the checking block has been closed by 3550 // this 'pop', 3551 // but atomic operation has not been executed yet, will be executed 3552 // slightly later, literally on next instruction) 3553 if (__kmp_env_consistency_check) 3554 __kmp_pop_sync(global_tid, ct_reduce, loc); 3555 3556 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3557 tree_reduce_block)) { 3558 3559 // AT: performance issue: a real barrier here 3560 // AT: (if primary thread is slow, other threads are blocked here waiting for 3561 // the primary thread to come and release them) 3562 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3563 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3564 // be confusing to a customer) 3565 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3566 // might go faster and be more in line with sense of NOWAIT 3567 // AT: TO DO: do epcc test and compare times 3568 3569 // this barrier should be invisible to a customer and to the threading profile 3570 // tool (it's neither a terminating barrier nor customer's code, it's 3571 // used for an internal purpose) 3572 #if OMPT_SUPPORT 3573 // JP: can this barrier potentially leed to task scheduling? 3574 // JP: as long as there is a barrier in the implementation, OMPT should and 3575 // will provide the barrier events 3576 // so we set-up the necessary frame/return addresses. 3577 ompt_frame_t *ompt_frame; 3578 if (ompt_enabled.enabled) { 3579 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3580 if (ompt_frame->enter_frame.ptr == NULL) 3581 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3582 } 3583 OMPT_STORE_RETURN_ADDRESS(global_tid); 3584 #endif 3585 #if USE_ITT_NOTIFY 3586 __kmp_threads[global_tid]->th.th_ident = loc; 3587 #endif 3588 retval = 3589 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3590 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3591 retval = (retval != 0) ? (0) : (1); 3592 #if OMPT_SUPPORT && OMPT_OPTIONAL 3593 if (ompt_enabled.enabled) { 3594 ompt_frame->enter_frame = ompt_data_none; 3595 } 3596 #endif 3597 3598 // all other workers except primary thread should do this pop here 3599 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3600 if (__kmp_env_consistency_check) { 3601 if (retval == 0) { 3602 __kmp_pop_sync(global_tid, ct_reduce, loc); 3603 } 3604 } 3605 3606 } else { 3607 3608 // should never reach this block 3609 KMP_ASSERT(0); // "unexpected method" 3610 } 3611 if (teams_swapped) { 3612 __kmp_restore_swapped_teams(th, team, task_state); 3613 } 3614 KA_TRACE( 3615 10, 3616 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3617 global_tid, packed_reduction_method, retval)); 3618 3619 return retval; 3620 } 3621 3622 /*! 3623 @ingroup SYNCHRONIZATION 3624 @param loc source location information 3625 @param global_tid global thread id. 3626 @param lck pointer to the unique lock data structure 3627 3628 Finish the execution of a reduce nowait. 3629 */ 3630 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3631 kmp_critical_name *lck) { 3632 3633 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3634 3635 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3636 __kmp_assert_valid_gtid(global_tid); 3637 3638 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3639 3640 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3641 3642 if (packed_reduction_method == critical_reduce_block) { 3643 3644 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3645 OMPT_REDUCTION_END; 3646 3647 } else if (packed_reduction_method == empty_reduce_block) { 3648 3649 // usage: if team size == 1, no synchronization is required ( on Intel 3650 // platforms only ) 3651 3652 OMPT_REDUCTION_END; 3653 3654 } else if (packed_reduction_method == atomic_reduce_block) { 3655 3656 // neither primary thread nor other workers should get here 3657 // (code gen does not generate this call in case 2: atomic reduce block) 3658 // actually it's better to remove this elseif at all; 3659 // after removal this value will checked by the 'else' and will assert 3660 3661 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3662 tree_reduce_block)) { 3663 3664 // only primary thread gets here 3665 // OMPT: tree reduction is annotated in the barrier code 3666 3667 } else { 3668 3669 // should never reach this block 3670 KMP_ASSERT(0); // "unexpected method" 3671 } 3672 3673 if (__kmp_env_consistency_check) 3674 __kmp_pop_sync(global_tid, ct_reduce, loc); 3675 3676 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3677 global_tid, packed_reduction_method)); 3678 3679 return; 3680 } 3681 3682 /* 2.a.ii. Reduce Block with a terminating barrier */ 3683 3684 /*! 3685 @ingroup SYNCHRONIZATION 3686 @param loc source location information 3687 @param global_tid global thread number 3688 @param num_vars number of items (variables) to be reduced 3689 @param reduce_size size of data in bytes to be reduced 3690 @param reduce_data pointer to data to be reduced 3691 @param reduce_func callback function providing reduction operation on two 3692 operands and returning result of reduction in lhs_data 3693 @param lck pointer to the unique lock data structure 3694 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3695 threads if atomic reduction needed 3696 3697 A blocking reduce that includes an implicit barrier. 3698 */ 3699 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3700 size_t reduce_size, void *reduce_data, 3701 void (*reduce_func)(void *lhs_data, void *rhs_data), 3702 kmp_critical_name *lck) { 3703 KMP_COUNT_BLOCK(REDUCE_wait); 3704 int retval = 0; 3705 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3706 kmp_info_t *th; 3707 kmp_team_t *team; 3708 int teams_swapped = 0, task_state; 3709 3710 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3711 __kmp_assert_valid_gtid(global_tid); 3712 3713 // why do we need this initialization here at all? 3714 // Reduction clause can not be a stand-alone directive. 3715 3716 // do not call __kmp_serial_initialize(), it will be called by 3717 // __kmp_parallel_initialize() if needed 3718 // possible detection of false-positive race by the threadchecker ??? 3719 if (!TCR_4(__kmp_init_parallel)) 3720 __kmp_parallel_initialize(); 3721 3722 __kmp_resume_if_soft_paused(); 3723 3724 // check correctness of reduce block nesting 3725 #if KMP_USE_DYNAMIC_LOCK 3726 if (__kmp_env_consistency_check) 3727 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3728 #else 3729 if (__kmp_env_consistency_check) 3730 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3731 #endif 3732 3733 th = __kmp_thread_from_gtid(global_tid); 3734 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3735 3736 packed_reduction_method = __kmp_determine_reduction_method( 3737 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3738 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3739 3740 OMPT_REDUCTION_DECL(th, global_tid); 3741 3742 if (packed_reduction_method == critical_reduce_block) { 3743 3744 OMPT_REDUCTION_BEGIN; 3745 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3746 retval = 1; 3747 3748 } else if (packed_reduction_method == empty_reduce_block) { 3749 3750 OMPT_REDUCTION_BEGIN; 3751 // usage: if team size == 1, no synchronization is required ( Intel 3752 // platforms only ) 3753 retval = 1; 3754 3755 } else if (packed_reduction_method == atomic_reduce_block) { 3756 3757 retval = 2; 3758 3759 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3760 tree_reduce_block)) { 3761 3762 // case tree_reduce_block: 3763 // this barrier should be visible to a customer and to the threading profile 3764 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3765 #if OMPT_SUPPORT 3766 ompt_frame_t *ompt_frame; 3767 if (ompt_enabled.enabled) { 3768 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3769 if (ompt_frame->enter_frame.ptr == NULL) 3770 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3771 } 3772 OMPT_STORE_RETURN_ADDRESS(global_tid); 3773 #endif 3774 #if USE_ITT_NOTIFY 3775 __kmp_threads[global_tid]->th.th_ident = 3776 loc; // needed for correct notification of frames 3777 #endif 3778 retval = 3779 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3780 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3781 retval = (retval != 0) ? (0) : (1); 3782 #if OMPT_SUPPORT && OMPT_OPTIONAL 3783 if (ompt_enabled.enabled) { 3784 ompt_frame->enter_frame = ompt_data_none; 3785 } 3786 #endif 3787 3788 // all other workers except primary thread should do this pop here 3789 // (none of other workers except primary will enter __kmpc_end_reduce()) 3790 if (__kmp_env_consistency_check) { 3791 if (retval == 0) { // 0: all other workers; 1: primary thread 3792 __kmp_pop_sync(global_tid, ct_reduce, loc); 3793 } 3794 } 3795 3796 } else { 3797 3798 // should never reach this block 3799 KMP_ASSERT(0); // "unexpected method" 3800 } 3801 if (teams_swapped) { 3802 __kmp_restore_swapped_teams(th, team, task_state); 3803 } 3804 3805 KA_TRACE(10, 3806 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3807 global_tid, packed_reduction_method, retval)); 3808 return retval; 3809 } 3810 3811 /*! 3812 @ingroup SYNCHRONIZATION 3813 @param loc source location information 3814 @param global_tid global thread id. 3815 @param lck pointer to the unique lock data structure 3816 3817 Finish the execution of a blocking reduce. 3818 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3819 start function. 3820 */ 3821 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3822 kmp_critical_name *lck) { 3823 3824 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3825 kmp_info_t *th; 3826 kmp_team_t *team; 3827 int teams_swapped = 0, task_state; 3828 3829 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3830 __kmp_assert_valid_gtid(global_tid); 3831 3832 th = __kmp_thread_from_gtid(global_tid); 3833 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3834 3835 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3836 3837 // this barrier should be visible to a customer and to the threading profile 3838 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3839 OMPT_REDUCTION_DECL(th, global_tid); 3840 3841 if (packed_reduction_method == critical_reduce_block) { 3842 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3843 3844 OMPT_REDUCTION_END; 3845 3846 // TODO: implicit barrier: should be exposed 3847 #if OMPT_SUPPORT 3848 ompt_frame_t *ompt_frame; 3849 if (ompt_enabled.enabled) { 3850 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3851 if (ompt_frame->enter_frame.ptr == NULL) 3852 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3853 } 3854 OMPT_STORE_RETURN_ADDRESS(global_tid); 3855 #endif 3856 #if USE_ITT_NOTIFY 3857 __kmp_threads[global_tid]->th.th_ident = loc; 3858 #endif 3859 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3860 #if OMPT_SUPPORT && OMPT_OPTIONAL 3861 if (ompt_enabled.enabled) { 3862 ompt_frame->enter_frame = ompt_data_none; 3863 } 3864 #endif 3865 3866 } else if (packed_reduction_method == empty_reduce_block) { 3867 3868 OMPT_REDUCTION_END; 3869 3870 // usage: if team size==1, no synchronization is required (Intel platforms only) 3871 3872 // TODO: implicit barrier: should be exposed 3873 #if OMPT_SUPPORT 3874 ompt_frame_t *ompt_frame; 3875 if (ompt_enabled.enabled) { 3876 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3877 if (ompt_frame->enter_frame.ptr == NULL) 3878 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3879 } 3880 OMPT_STORE_RETURN_ADDRESS(global_tid); 3881 #endif 3882 #if USE_ITT_NOTIFY 3883 __kmp_threads[global_tid]->th.th_ident = loc; 3884 #endif 3885 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3886 #if OMPT_SUPPORT && OMPT_OPTIONAL 3887 if (ompt_enabled.enabled) { 3888 ompt_frame->enter_frame = ompt_data_none; 3889 } 3890 #endif 3891 3892 } else if (packed_reduction_method == atomic_reduce_block) { 3893 3894 #if OMPT_SUPPORT 3895 ompt_frame_t *ompt_frame; 3896 if (ompt_enabled.enabled) { 3897 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3898 if (ompt_frame->enter_frame.ptr == NULL) 3899 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3900 } 3901 OMPT_STORE_RETURN_ADDRESS(global_tid); 3902 #endif 3903 // TODO: implicit barrier: should be exposed 3904 #if USE_ITT_NOTIFY 3905 __kmp_threads[global_tid]->th.th_ident = loc; 3906 #endif 3907 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3908 #if OMPT_SUPPORT && OMPT_OPTIONAL 3909 if (ompt_enabled.enabled) { 3910 ompt_frame->enter_frame = ompt_data_none; 3911 } 3912 #endif 3913 3914 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3915 tree_reduce_block)) { 3916 3917 // only primary thread executes here (primary releases all other workers) 3918 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3919 global_tid); 3920 3921 } else { 3922 3923 // should never reach this block 3924 KMP_ASSERT(0); // "unexpected method" 3925 } 3926 if (teams_swapped) { 3927 __kmp_restore_swapped_teams(th, team, task_state); 3928 } 3929 3930 if (__kmp_env_consistency_check) 3931 __kmp_pop_sync(global_tid, ct_reduce, loc); 3932 3933 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3934 global_tid, packed_reduction_method)); 3935 3936 return; 3937 } 3938 3939 #undef __KMP_GET_REDUCTION_METHOD 3940 #undef __KMP_SET_REDUCTION_METHOD 3941 3942 /* end of interface to fast scalable reduce routines */ 3943 3944 kmp_uint64 __kmpc_get_taskid() { 3945 3946 kmp_int32 gtid; 3947 kmp_info_t *thread; 3948 3949 gtid = __kmp_get_gtid(); 3950 if (gtid < 0) { 3951 return 0; 3952 } 3953 thread = __kmp_thread_from_gtid(gtid); 3954 return thread->th.th_current_task->td_task_id; 3955 3956 } // __kmpc_get_taskid 3957 3958 kmp_uint64 __kmpc_get_parent_taskid() { 3959 3960 kmp_int32 gtid; 3961 kmp_info_t *thread; 3962 kmp_taskdata_t *parent_task; 3963 3964 gtid = __kmp_get_gtid(); 3965 if (gtid < 0) { 3966 return 0; 3967 } 3968 thread = __kmp_thread_from_gtid(gtid); 3969 parent_task = thread->th.th_current_task->td_parent; 3970 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3971 3972 } // __kmpc_get_parent_taskid 3973 3974 /*! 3975 @ingroup WORK_SHARING 3976 @param loc source location information. 3977 @param gtid global thread number. 3978 @param num_dims number of associated doacross loops. 3979 @param dims info on loops bounds. 3980 3981 Initialize doacross loop information. 3982 Expect compiler send us inclusive bounds, 3983 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3984 */ 3985 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3986 const struct kmp_dim *dims) { 3987 __kmp_assert_valid_gtid(gtid); 3988 int j, idx; 3989 kmp_int64 last, trace_count; 3990 kmp_info_t *th = __kmp_threads[gtid]; 3991 kmp_team_t *team = th->th.th_team; 3992 kmp_uint32 *flags; 3993 kmp_disp_t *pr_buf = th->th.th_dispatch; 3994 dispatch_shared_info_t *sh_buf; 3995 3996 KA_TRACE( 3997 20, 3998 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 3999 gtid, num_dims, !team->t.t_serialized)); 4000 KMP_DEBUG_ASSERT(dims != NULL); 4001 KMP_DEBUG_ASSERT(num_dims > 0); 4002 4003 if (team->t.t_serialized) { 4004 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 4005 return; // no dependencies if team is serialized 4006 } 4007 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 4008 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 4009 // the next loop 4010 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4011 4012 // Save bounds info into allocated private buffer 4013 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 4014 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 4015 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 4016 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4017 pr_buf->th_doacross_info[0] = 4018 (kmp_int64)num_dims; // first element is number of dimensions 4019 // Save also address of num_done in order to access it later without knowing 4020 // the buffer index 4021 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 4022 pr_buf->th_doacross_info[2] = dims[0].lo; 4023 pr_buf->th_doacross_info[3] = dims[0].up; 4024 pr_buf->th_doacross_info[4] = dims[0].st; 4025 last = 5; 4026 for (j = 1; j < num_dims; ++j) { 4027 kmp_int64 4028 range_length; // To keep ranges of all dimensions but the first dims[0] 4029 if (dims[j].st == 1) { // most common case 4030 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 4031 range_length = dims[j].up - dims[j].lo + 1; 4032 } else { 4033 if (dims[j].st > 0) { 4034 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 4035 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 4036 } else { // negative increment 4037 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 4038 range_length = 4039 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 4040 } 4041 } 4042 pr_buf->th_doacross_info[last++] = range_length; 4043 pr_buf->th_doacross_info[last++] = dims[j].lo; 4044 pr_buf->th_doacross_info[last++] = dims[j].up; 4045 pr_buf->th_doacross_info[last++] = dims[j].st; 4046 } 4047 4048 // Compute total trip count. 4049 // Start with range of dims[0] which we don't need to keep in the buffer. 4050 if (dims[0].st == 1) { // most common case 4051 trace_count = dims[0].up - dims[0].lo + 1; 4052 } else if (dims[0].st > 0) { 4053 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 4054 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 4055 } else { // negative increment 4056 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 4057 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 4058 } 4059 for (j = 1; j < num_dims; ++j) { 4060 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 4061 } 4062 KMP_DEBUG_ASSERT(trace_count > 0); 4063 4064 // Check if shared buffer is not occupied by other loop (idx - 4065 // __kmp_dispatch_num_buffers) 4066 if (idx != sh_buf->doacross_buf_idx) { 4067 // Shared buffer is occupied, wait for it to be free 4068 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 4069 __kmp_eq_4, NULL); 4070 } 4071 #if KMP_32_BIT_ARCH 4072 // Check if we are the first thread. After the CAS the first thread gets 0, 4073 // others get 1 if initialization is in progress, allocated pointer otherwise. 4074 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 4075 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 4076 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 4077 #else 4078 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 4079 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 4080 #endif 4081 if (flags == NULL) { 4082 // we are the first thread, allocate the array of flags 4083 size_t size = 4084 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration 4085 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 4086 KMP_MB(); 4087 sh_buf->doacross_flags = flags; 4088 } else if (flags == (kmp_uint32 *)1) { 4089 #if KMP_32_BIT_ARCH 4090 // initialization is still in progress, need to wait 4091 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 4092 #else 4093 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 4094 #endif 4095 KMP_YIELD(TRUE); 4096 KMP_MB(); 4097 } else { 4098 KMP_MB(); 4099 } 4100 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4101 pr_buf->th_doacross_flags = 4102 sh_buf->doacross_flags; // save private copy in order to not 4103 // touch shared buffer on each iteration 4104 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4105 } 4106 4107 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4108 __kmp_assert_valid_gtid(gtid); 4109 kmp_int64 shft; 4110 size_t num_dims, i; 4111 kmp_uint32 flag; 4112 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4113 kmp_info_t *th = __kmp_threads[gtid]; 4114 kmp_team_t *team = th->th.th_team; 4115 kmp_disp_t *pr_buf; 4116 kmp_int64 lo, up, st; 4117 4118 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4119 if (team->t.t_serialized) { 4120 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4121 return; // no dependencies if team is serialized 4122 } 4123 4124 // calculate sequential iteration number and check out-of-bounds condition 4125 pr_buf = th->th.th_dispatch; 4126 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4127 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4128 lo = pr_buf->th_doacross_info[2]; 4129 up = pr_buf->th_doacross_info[3]; 4130 st = pr_buf->th_doacross_info[4]; 4131 #if OMPT_SUPPORT && OMPT_OPTIONAL 4132 ompt_dependence_t deps[num_dims]; 4133 #endif 4134 if (st == 1) { // most common case 4135 if (vec[0] < lo || vec[0] > up) { 4136 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4137 "bounds [%lld,%lld]\n", 4138 gtid, vec[0], lo, up)); 4139 return; 4140 } 4141 iter_number = vec[0] - lo; 4142 } else if (st > 0) { 4143 if (vec[0] < lo || vec[0] > up) { 4144 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4145 "bounds [%lld,%lld]\n", 4146 gtid, vec[0], lo, up)); 4147 return; 4148 } 4149 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4150 } else { // negative increment 4151 if (vec[0] > lo || vec[0] < up) { 4152 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4153 "bounds [%lld,%lld]\n", 4154 gtid, vec[0], lo, up)); 4155 return; 4156 } 4157 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4158 } 4159 #if OMPT_SUPPORT && OMPT_OPTIONAL 4160 deps[0].variable.value = iter_number; 4161 deps[0].dependence_type = ompt_dependence_type_sink; 4162 #endif 4163 for (i = 1; i < num_dims; ++i) { 4164 kmp_int64 iter, ln; 4165 size_t j = i * 4; 4166 ln = pr_buf->th_doacross_info[j + 1]; 4167 lo = pr_buf->th_doacross_info[j + 2]; 4168 up = pr_buf->th_doacross_info[j + 3]; 4169 st = pr_buf->th_doacross_info[j + 4]; 4170 if (st == 1) { 4171 if (vec[i] < lo || vec[i] > up) { 4172 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4173 "bounds [%lld,%lld]\n", 4174 gtid, vec[i], lo, up)); 4175 return; 4176 } 4177 iter = vec[i] - lo; 4178 } else if (st > 0) { 4179 if (vec[i] < lo || vec[i] > up) { 4180 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4181 "bounds [%lld,%lld]\n", 4182 gtid, vec[i], lo, up)); 4183 return; 4184 } 4185 iter = (kmp_uint64)(vec[i] - lo) / st; 4186 } else { // st < 0 4187 if (vec[i] > lo || vec[i] < up) { 4188 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4189 "bounds [%lld,%lld]\n", 4190 gtid, vec[i], lo, up)); 4191 return; 4192 } 4193 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4194 } 4195 iter_number = iter + ln * iter_number; 4196 #if OMPT_SUPPORT && OMPT_OPTIONAL 4197 deps[i].variable.value = iter; 4198 deps[i].dependence_type = ompt_dependence_type_sink; 4199 #endif 4200 } 4201 shft = iter_number % 32; // use 32-bit granularity 4202 iter_number >>= 5; // divided by 32 4203 flag = 1 << shft; 4204 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4205 KMP_YIELD(TRUE); 4206 } 4207 KMP_MB(); 4208 #if OMPT_SUPPORT && OMPT_OPTIONAL 4209 if (ompt_enabled.ompt_callback_dependences) { 4210 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4211 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4212 } 4213 #endif 4214 KA_TRACE(20, 4215 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4216 gtid, (iter_number << 5) + shft)); 4217 } 4218 4219 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4220 __kmp_assert_valid_gtid(gtid); 4221 kmp_int64 shft; 4222 size_t num_dims, i; 4223 kmp_uint32 flag; 4224 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4225 kmp_info_t *th = __kmp_threads[gtid]; 4226 kmp_team_t *team = th->th.th_team; 4227 kmp_disp_t *pr_buf; 4228 kmp_int64 lo, st; 4229 4230 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4231 if (team->t.t_serialized) { 4232 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4233 return; // no dependencies if team is serialized 4234 } 4235 4236 // calculate sequential iteration number (same as in "wait" but no 4237 // out-of-bounds checks) 4238 pr_buf = th->th.th_dispatch; 4239 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4240 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4241 lo = pr_buf->th_doacross_info[2]; 4242 st = pr_buf->th_doacross_info[4]; 4243 #if OMPT_SUPPORT && OMPT_OPTIONAL 4244 ompt_dependence_t deps[num_dims]; 4245 #endif 4246 if (st == 1) { // most common case 4247 iter_number = vec[0] - lo; 4248 } else if (st > 0) { 4249 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4250 } else { // negative increment 4251 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4252 } 4253 #if OMPT_SUPPORT && OMPT_OPTIONAL 4254 deps[0].variable.value = iter_number; 4255 deps[0].dependence_type = ompt_dependence_type_source; 4256 #endif 4257 for (i = 1; i < num_dims; ++i) { 4258 kmp_int64 iter, ln; 4259 size_t j = i * 4; 4260 ln = pr_buf->th_doacross_info[j + 1]; 4261 lo = pr_buf->th_doacross_info[j + 2]; 4262 st = pr_buf->th_doacross_info[j + 4]; 4263 if (st == 1) { 4264 iter = vec[i] - lo; 4265 } else if (st > 0) { 4266 iter = (kmp_uint64)(vec[i] - lo) / st; 4267 } else { // st < 0 4268 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4269 } 4270 iter_number = iter + ln * iter_number; 4271 #if OMPT_SUPPORT && OMPT_OPTIONAL 4272 deps[i].variable.value = iter; 4273 deps[i].dependence_type = ompt_dependence_type_source; 4274 #endif 4275 } 4276 #if OMPT_SUPPORT && OMPT_OPTIONAL 4277 if (ompt_enabled.ompt_callback_dependences) { 4278 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4279 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4280 } 4281 #endif 4282 shft = iter_number % 32; // use 32-bit granularity 4283 iter_number >>= 5; // divided by 32 4284 flag = 1 << shft; 4285 KMP_MB(); 4286 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4287 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4288 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4289 (iter_number << 5) + shft)); 4290 } 4291 4292 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4293 __kmp_assert_valid_gtid(gtid); 4294 kmp_int32 num_done; 4295 kmp_info_t *th = __kmp_threads[gtid]; 4296 kmp_team_t *team = th->th.th_team; 4297 kmp_disp_t *pr_buf = th->th.th_dispatch; 4298 4299 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4300 if (team->t.t_serialized) { 4301 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4302 return; // nothing to do 4303 } 4304 num_done = 4305 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1; 4306 if (num_done == th->th.th_team_nproc) { 4307 // we are the last thread, need to free shared resources 4308 int idx = pr_buf->th_doacross_buf_idx - 1; 4309 dispatch_shared_info_t *sh_buf = 4310 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4311 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4312 (kmp_int64)&sh_buf->doacross_num_done); 4313 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4314 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4315 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4316 sh_buf->doacross_flags = NULL; 4317 sh_buf->doacross_num_done = 0; 4318 sh_buf->doacross_buf_idx += 4319 __kmp_dispatch_num_buffers; // free buffer for future re-use 4320 } 4321 // free private resources (need to keep buffer index forever) 4322 pr_buf->th_doacross_flags = NULL; 4323 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4324 pr_buf->th_doacross_info = NULL; 4325 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4326 } 4327 4328 /* OpenMP 5.1 Memory Management routines */ 4329 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4330 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator); 4331 } 4332 4333 void *omp_aligned_alloc(size_t align, size_t size, 4334 omp_allocator_handle_t allocator) { 4335 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator); 4336 } 4337 4338 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { 4339 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator); 4340 } 4341 4342 void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size, 4343 omp_allocator_handle_t allocator) { 4344 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator); 4345 } 4346 4347 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, 4348 omp_allocator_handle_t free_allocator) { 4349 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator, 4350 free_allocator); 4351 } 4352 4353 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4354 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4355 } 4356 /* end of OpenMP 5.1 Memory Management routines */ 4357 4358 int __kmpc_get_target_offload(void) { 4359 if (!__kmp_init_serial) { 4360 __kmp_serial_initialize(); 4361 } 4362 return __kmp_target_offload; 4363 } 4364 4365 int __kmpc_pause_resource(kmp_pause_status_t level) { 4366 if (!__kmp_init_serial) { 4367 return 1; // Can't pause if runtime is not initialized 4368 } 4369 return __kmp_pause_resource(level); 4370 } 4371 4372 void __kmpc_error(ident_t *loc, int severity, const char *message) { 4373 if (!__kmp_init_serial) 4374 __kmp_serial_initialize(); 4375 4376 KMP_ASSERT(severity == severity_warning || severity == severity_fatal); 4377 4378 #if OMPT_SUPPORT 4379 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) { 4380 ompt_callbacks.ompt_callback(ompt_callback_error)( 4381 (ompt_severity_t)severity, message, KMP_STRLEN(message), 4382 OMPT_GET_RETURN_ADDRESS(0)); 4383 } 4384 #endif // OMPT_SUPPORT 4385 4386 char *src_loc; 4387 if (loc && loc->psource) { 4388 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false); 4389 src_loc = 4390 __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col); 4391 __kmp_str_loc_free(&str_loc); 4392 } else { 4393 src_loc = __kmp_str_format("unknown"); 4394 } 4395 4396 if (severity == severity_warning) 4397 KMP_WARNING(UserDirectedWarning, src_loc, message); 4398 else 4399 KMP_FATAL(UserDirectedError, src_loc, message); 4400 4401 __kmp_str_free(&src_loc); 4402 } 4403 4404 // Mark begin of scope directive. 4405 void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { 4406 // reserved is for extension of scope directive and not used. 4407 #if OMPT_SUPPORT && OMPT_OPTIONAL 4408 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { 4409 kmp_team_t *team = __kmp_threads[gtid]->th.th_team; 4410 int tid = __kmp_tid_from_gtid(gtid); 4411 ompt_callbacks.ompt_callback(ompt_callback_work)( 4412 ompt_work_scope, ompt_scope_begin, 4413 &(team->t.ompt_team_info.parallel_data), 4414 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 4415 OMPT_GET_RETURN_ADDRESS(0)); 4416 } 4417 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 4418 } 4419 4420 // Mark end of scope directive 4421 void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { 4422 // reserved is for extension of scope directive and not used. 4423 #if OMPT_SUPPORT && OMPT_OPTIONAL 4424 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { 4425 kmp_team_t *team = __kmp_threads[gtid]->th.th_team; 4426 int tid = __kmp_tid_from_gtid(gtid); 4427 ompt_callbacks.ompt_callback(ompt_callback_work)( 4428 ompt_work_scope, ompt_scope_end, 4429 &(team->t.ompt_team_info.parallel_data), 4430 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 4431 OMPT_GET_RETURN_ADDRESS(0)); 4432 } 4433 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 4434 } 4435 4436 #ifdef KMP_USE_VERSION_SYMBOLS 4437 // For GOMP compatibility there are two versions of each omp_* API. 4438 // One is the plain C symbol and one is the Fortran symbol with an appended 4439 // underscore. When we implement a specific ompc_* version of an omp_* 4440 // function, we want the plain GOMP versioned symbol to alias the ompc_* version 4441 // instead of the Fortran versions in kmp_ftn_entry.h 4442 extern "C" { 4443 // Have to undef these from omp.h so they aren't translated into 4444 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below 4445 #ifdef omp_set_affinity_format 4446 #undef omp_set_affinity_format 4447 #endif 4448 #ifdef omp_get_affinity_format 4449 #undef omp_get_affinity_format 4450 #endif 4451 #ifdef omp_display_affinity 4452 #undef omp_display_affinity 4453 #endif 4454 #ifdef omp_capture_affinity 4455 #undef omp_capture_affinity 4456 #endif 4457 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50, 4458 "OMP_5.0"); 4459 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50, 4460 "OMP_5.0"); 4461 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50, 4462 "OMP_5.0"); 4463 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50, 4464 "OMP_5.0"); 4465 } // extern "C" 4466 #endif 4467