1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 __kmp_assign_root_init_mask(); 43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 44 } else if (__kmp_ignore_mppbeg() == FALSE) { 45 // By default __kmp_ignore_mppbeg() returns TRUE. 46 __kmp_internal_begin(); 47 KC_TRACE(10, ("__kmpc_begin: called\n")); 48 } 49 } 50 51 /*! 52 * @ingroup STARTUP_SHUTDOWN 53 * @param loc source location information 54 * 55 * Shutdown the runtime library. This is also optional, and even if called will 56 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 57 * zero. 58 */ 59 void __kmpc_end(ident_t *loc) { 60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 63 // returns FALSE and __kmpc_end() will unregister this root (it can cause 64 // library shut down). 65 if (__kmp_ignore_mppend() == FALSE) { 66 KC_TRACE(10, ("__kmpc_end: called\n")); 67 KA_TRACE(30, ("__kmpc_end\n")); 68 69 __kmp_internal_end_thread(-1); 70 } 71 #if KMP_OS_WINDOWS && OMPT_SUPPORT 72 // Normal exit process on Windows does not allow worker threads of the final 73 // parallel region to finish reporting their events, so shutting down the 74 // library here fixes the issue at least for the cases where __kmpc_end() is 75 // placed properly. 76 if (ompt_enabled.enabled) 77 __kmp_internal_end_library(__kmp_gtid_get_specific()); 78 #endif 79 } 80 81 /*! 82 @ingroup THREAD_STATES 83 @param loc Source location information. 84 @return The global thread index of the active thread. 85 86 This function can be called in any context. 87 88 If the runtime has ony been entered at the outermost level from a 89 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 90 that which would be returned by omp_get_thread_num() in the outermost 91 active parallel construct. (Or zero if there is no active parallel 92 construct, since the primary thread is necessarily thread zero). 93 94 If multiple non-OpenMP threads all enter an OpenMP construct then this 95 will be a unique thread identifier among all the threads created by 96 the OpenMP runtime (but the value cannot be defined in terms of 97 OpenMP thread ids returned by omp_get_thread_num()). 98 */ 99 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 100 kmp_int32 gtid = __kmp_entry_gtid(); 101 102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 103 104 return gtid; 105 } 106 107 /*! 108 @ingroup THREAD_STATES 109 @param loc Source location information. 110 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 111 112 This function can be called in any context. 113 It returns the total number of threads under the control of the OpenMP runtime. 114 That is not a number that can be determined by any OpenMP standard calls, since 115 the library may be called from more than one non-OpenMP thread, and this 116 reflects the total over all such calls. Similarly the runtime maintains 117 underlying threads even when they are not active (since the cost of creating 118 and destroying OS threads is high), this call counts all such threads even if 119 they are not waiting for work. 120 */ 121 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 122 KC_TRACE(10, 123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 124 125 return TCR_4(__kmp_all_nth); 126 } 127 128 /*! 129 @ingroup THREAD_STATES 130 @param loc Source location information. 131 @return The thread number of the calling thread in the innermost active parallel 132 construct. 133 */ 134 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 136 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 137 } 138 139 /*! 140 @ingroup THREAD_STATES 141 @param loc Source location information. 142 @return The number of threads in the innermost active parallel construct. 143 */ 144 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 146 147 return __kmp_entry_thread()->th.th_team->t.t_nproc; 148 } 149 150 /*! 151 * @ingroup DEPRECATED 152 * @param loc location description 153 * 154 * This function need not be called. It always returns TRUE. 155 */ 156 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 157 #ifndef KMP_DEBUG 158 159 return TRUE; 160 161 #else 162 163 const char *semi2; 164 const char *semi3; 165 int line_no; 166 167 if (__kmp_par_range == 0) { 168 return TRUE; 169 } 170 semi2 = loc->psource; 171 if (semi2 == NULL) { 172 return TRUE; 173 } 174 semi2 = strchr(semi2, ';'); 175 if (semi2 == NULL) { 176 return TRUE; 177 } 178 semi2 = strchr(semi2 + 1, ';'); 179 if (semi2 == NULL) { 180 return TRUE; 181 } 182 if (__kmp_par_range_filename[0]) { 183 const char *name = semi2 - 1; 184 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 185 name--; 186 } 187 if ((*name == '/') || (*name == ';')) { 188 name++; 189 } 190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 191 return __kmp_par_range < 0; 192 } 193 } 194 semi3 = strchr(semi2 + 1, ';'); 195 if (__kmp_par_range_routine[0]) { 196 if ((semi3 != NULL) && (semi3 > semi2) && 197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 198 return __kmp_par_range < 0; 199 } 200 } 201 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 203 return __kmp_par_range > 0; 204 } 205 return __kmp_par_range < 0; 206 } 207 return TRUE; 208 209 #endif /* KMP_DEBUG */ 210 } 211 212 /*! 213 @ingroup THREAD_STATES 214 @param loc Source location information. 215 @return 1 if this thread is executing inside an active parallel region, zero if 216 not. 217 */ 218 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 219 return __kmp_entry_thread()->th.th_root->r.r_active; 220 } 221 222 /*! 223 @ingroup PARALLEL 224 @param loc source location information 225 @param global_tid global thread number 226 @param num_threads number of threads requested for this parallel construct 227 228 Set the number of threads to be used by the next fork spawned by this thread. 229 This call is only required if the parallel construct has a `num_threads` clause. 230 */ 231 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 232 kmp_int32 num_threads) { 233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 234 global_tid, num_threads)); 235 __kmp_assert_valid_gtid(global_tid); 236 __kmp_push_num_threads(loc, global_tid, num_threads); 237 } 238 239 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 241 /* the num_threads are automatically popped */ 242 } 243 244 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 245 kmp_int32 proc_bind) { 246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 247 proc_bind)); 248 __kmp_assert_valid_gtid(global_tid); 249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 250 } 251 252 /*! 253 @ingroup PARALLEL 254 @param loc source location information 255 @param argc total number of arguments in the ellipsis 256 @param microtask pointer to callback routine consisting of outlined parallel 257 construct 258 @param ... pointers to shared variables that aren't global 259 260 Do the actual fork and call the microtask in the relevant number of threads. 261 */ 262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 263 int gtid = __kmp_entry_gtid(); 264 265 #if (KMP_STATS_ENABLED) 266 // If we were in a serial region, then stop the serial timer, record 267 // the event, and start parallel region timer 268 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 269 if (previous_state == stats_state_e::SERIAL_REGION) { 270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 271 } else { 272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 273 } 274 int inParallel = __kmpc_in_parallel(loc); 275 if (inParallel) { 276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 277 } else { 278 KMP_COUNT_BLOCK(OMP_PARALLEL); 279 } 280 #endif 281 282 // maybe to save thr_state is enough here 283 { 284 va_list ap; 285 va_start(ap, microtask); 286 287 #if OMPT_SUPPORT 288 ompt_frame_t *ompt_frame; 289 if (ompt_enabled.enabled) { 290 kmp_info_t *master_th = __kmp_threads[gtid]; 291 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame; 292 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 293 } 294 OMPT_STORE_RETURN_ADDRESS(gtid); 295 #endif 296 297 #if INCLUDE_SSC_MARKS 298 SSC_MARK_FORKING(); 299 #endif 300 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 301 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 302 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 303 kmp_va_addr_of(ap)); 304 #if INCLUDE_SSC_MARKS 305 SSC_MARK_JOINING(); 306 #endif 307 __kmp_join_call(loc, gtid 308 #if OMPT_SUPPORT 309 , 310 fork_context_intel 311 #endif 312 ); 313 314 va_end(ap); 315 316 #if OMPT_SUPPORT 317 if (ompt_enabled.enabled) { 318 ompt_frame->enter_frame = ompt_data_none; 319 } 320 #endif 321 } 322 323 #if KMP_STATS_ENABLED 324 if (previous_state == stats_state_e::SERIAL_REGION) { 325 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 326 KMP_SET_THREAD_STATE(previous_state); 327 } else { 328 KMP_POP_PARTITIONED_TIMER(); 329 } 330 #endif // KMP_STATS_ENABLED 331 } 332 333 /*! 334 @ingroup PARALLEL 335 @param loc source location information 336 @param global_tid global thread number 337 @param num_teams number of teams requested for the teams construct 338 @param num_threads number of threads per team requested for the teams construct 339 340 Set the number of teams to be used by the teams construct. 341 This call is only required if the teams construct has a `num_teams` clause 342 or a `thread_limit` clause (or both). 343 */ 344 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 345 kmp_int32 num_teams, kmp_int32 num_threads) { 346 KA_TRACE(20, 347 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 348 global_tid, num_teams, num_threads)); 349 __kmp_assert_valid_gtid(global_tid); 350 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 351 } 352 353 /*! 354 @ingroup PARALLEL 355 @param loc source location information 356 @param global_tid global thread number 357 @param num_teams_lo lower bound on number of teams requested for the teams 358 construct 359 @param num_teams_up upper bound on number of teams requested for the teams 360 construct 361 @param num_threads number of threads per team requested for the teams construct 362 363 Set the number of teams to be used by the teams construct. The number of initial 364 teams cretaed will be greater than or equal to the lower bound and less than or 365 equal to the upper bound. 366 This call is only required if the teams construct has a `num_teams` clause 367 or a `thread_limit` clause (or both). 368 */ 369 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, 370 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, 371 kmp_int32 num_threads) { 372 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d" 373 " num_teams_ub=%d num_threads=%d\n", 374 global_tid, num_teams_lb, num_teams_ub, num_threads)); 375 __kmp_assert_valid_gtid(global_tid); 376 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub, 377 num_threads); 378 } 379 380 /*! 381 @ingroup PARALLEL 382 @param loc source location information 383 @param argc total number of arguments in the ellipsis 384 @param microtask pointer to callback routine consisting of outlined teams 385 construct 386 @param ... pointers to shared variables that aren't global 387 388 Do the actual fork and call the microtask in the relevant number of threads. 389 */ 390 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 391 ...) { 392 int gtid = __kmp_entry_gtid(); 393 kmp_info_t *this_thr = __kmp_threads[gtid]; 394 va_list ap; 395 va_start(ap, microtask); 396 397 #if KMP_STATS_ENABLED 398 KMP_COUNT_BLOCK(OMP_TEAMS); 399 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 400 if (previous_state == stats_state_e::SERIAL_REGION) { 401 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 402 } else { 403 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 404 } 405 #endif 406 407 // remember teams entry point and nesting level 408 this_thr->th.th_teams_microtask = microtask; 409 this_thr->th.th_teams_level = 410 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 411 412 #if OMPT_SUPPORT 413 kmp_team_t *parent_team = this_thr->th.th_team; 414 int tid = __kmp_tid_from_gtid(gtid); 415 if (ompt_enabled.enabled) { 416 parent_team->t.t_implicit_task_taskdata[tid] 417 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 418 } 419 OMPT_STORE_RETURN_ADDRESS(gtid); 420 #endif 421 422 // check if __kmpc_push_num_teams called, set default number of teams 423 // otherwise 424 if (this_thr->th.th_teams_size.nteams == 0) { 425 __kmp_push_num_teams(loc, gtid, 0, 0); 426 } 427 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 428 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 429 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 430 431 __kmp_fork_call( 432 loc, gtid, fork_context_intel, argc, 433 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 434 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 435 __kmp_join_call(loc, gtid 436 #if OMPT_SUPPORT 437 , 438 fork_context_intel 439 #endif 440 ); 441 442 // Pop current CG root off list 443 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 444 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 445 this_thr->th.th_cg_roots = tmp->up; 446 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 447 " to node %p. cg_nthreads was %d\n", 448 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 449 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 450 int i = tmp->cg_nthreads--; 451 if (i == 1) { // check is we are the last thread in CG (not always the case) 452 __kmp_free(tmp); 453 } 454 // Restore current task's thread_limit from CG root 455 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 456 this_thr->th.th_current_task->td_icvs.thread_limit = 457 this_thr->th.th_cg_roots->cg_thread_limit; 458 459 this_thr->th.th_teams_microtask = NULL; 460 this_thr->th.th_teams_level = 0; 461 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 462 va_end(ap); 463 #if KMP_STATS_ENABLED 464 if (previous_state == stats_state_e::SERIAL_REGION) { 465 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 466 KMP_SET_THREAD_STATE(previous_state); 467 } else { 468 KMP_POP_PARTITIONED_TIMER(); 469 } 470 #endif // KMP_STATS_ENABLED 471 } 472 473 // I don't think this function should ever have been exported. 474 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 475 // openmp code ever called it, but it's been exported from the RTL for so 476 // long that I'm afraid to remove the definition. 477 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 478 479 /*! 480 @ingroup PARALLEL 481 @param loc source location information 482 @param global_tid global thread number 483 484 Enter a serialized parallel construct. This interface is used to handle a 485 conditional parallel region, like this, 486 @code 487 #pragma omp parallel if (condition) 488 @endcode 489 when the condition is false. 490 */ 491 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 492 // The implementation is now in kmp_runtime.cpp so that it can share static 493 // functions with kmp_fork_call since the tasks to be done are similar in 494 // each case. 495 __kmp_assert_valid_gtid(global_tid); 496 #if OMPT_SUPPORT 497 OMPT_STORE_RETURN_ADDRESS(global_tid); 498 #endif 499 __kmp_serialized_parallel(loc, global_tid); 500 } 501 502 /*! 503 @ingroup PARALLEL 504 @param loc source location information 505 @param global_tid global thread number 506 507 Leave a serialized parallel construct. 508 */ 509 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 510 kmp_internal_control_t *top; 511 kmp_info_t *this_thr; 512 kmp_team_t *serial_team; 513 514 KC_TRACE(10, 515 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 516 517 /* skip all this code for autopar serialized loops since it results in 518 unacceptable overhead */ 519 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 520 return; 521 522 // Not autopar code 523 __kmp_assert_valid_gtid(global_tid); 524 if (!TCR_4(__kmp_init_parallel)) 525 __kmp_parallel_initialize(); 526 527 __kmp_resume_if_soft_paused(); 528 529 this_thr = __kmp_threads[global_tid]; 530 serial_team = this_thr->th.th_serial_team; 531 532 kmp_task_team_t *task_team = this_thr->th.th_task_team; 533 // we need to wait for the proxy tasks before finishing the thread 534 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks || 535 task_team->tt.tt_hidden_helper_task_encountered)) 536 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 537 538 KMP_MB(); 539 KMP_DEBUG_ASSERT(serial_team); 540 KMP_ASSERT(serial_team->t.t_serialized); 541 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 542 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 543 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 544 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 545 546 #if OMPT_SUPPORT 547 if (ompt_enabled.enabled && 548 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 549 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 550 if (ompt_enabled.ompt_callback_implicit_task) { 551 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 552 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 553 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 554 } 555 556 // reset clear the task id only after unlinking the task 557 ompt_data_t *parent_task_data; 558 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 559 560 if (ompt_enabled.ompt_callback_parallel_end) { 561 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 562 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 563 ompt_parallel_invoker_program | ompt_parallel_team, 564 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 565 } 566 __ompt_lw_taskteam_unlink(this_thr); 567 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 568 } 569 #endif 570 571 /* If necessary, pop the internal control stack values and replace the team 572 * values */ 573 top = serial_team->t.t_control_stack_top; 574 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 575 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 576 serial_team->t.t_control_stack_top = top->next; 577 __kmp_free(top); 578 } 579 580 /* pop dispatch buffers stack */ 581 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 582 { 583 dispatch_private_info_t *disp_buffer = 584 serial_team->t.t_dispatch->th_disp_buffer; 585 serial_team->t.t_dispatch->th_disp_buffer = 586 serial_team->t.t_dispatch->th_disp_buffer->next; 587 __kmp_free(disp_buffer); 588 } 589 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 590 591 --serial_team->t.t_serialized; 592 if (serial_team->t.t_serialized == 0) { 593 594 /* return to the parallel section */ 595 596 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 597 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 598 __kmp_clear_x87_fpu_status_word(); 599 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 600 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 601 } 602 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 603 604 __kmp_pop_current_task_from_thread(this_thr); 605 #if OMPD_SUPPORT 606 if (ompd_state & OMPD_ENABLE_BP) 607 ompd_bp_parallel_end(); 608 #endif 609 610 this_thr->th.th_team = serial_team->t.t_parent; 611 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 612 613 /* restore values cached in the thread */ 614 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 615 this_thr->th.th_team_master = 616 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 617 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 618 619 /* TODO the below shouldn't need to be adjusted for serialized teams */ 620 this_thr->th.th_dispatch = 621 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 622 623 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 624 this_thr->th.th_current_task->td_flags.executing = 1; 625 626 if (__kmp_tasking_mode != tskm_immediate_exec) { 627 // Copy the task team from the new child / old parent team to the thread. 628 this_thr->th.th_task_team = 629 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 630 KA_TRACE(20, 631 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 632 "team %p\n", 633 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 634 } 635 } else { 636 if (__kmp_tasking_mode != tskm_immediate_exec) { 637 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 638 "depth of serial team %p to %d\n", 639 global_tid, serial_team, serial_team->t.t_serialized)); 640 } 641 } 642 643 serial_team->t.t_level--; 644 if (__kmp_env_consistency_check) 645 __kmp_pop_parallel(global_tid, NULL); 646 #if OMPT_SUPPORT 647 if (ompt_enabled.enabled) 648 this_thr->th.ompt_thread_info.state = 649 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 650 : ompt_state_work_parallel); 651 #endif 652 } 653 654 /*! 655 @ingroup SYNCHRONIZATION 656 @param loc source location information. 657 658 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 659 depending on the memory ordering convention obeyed by the compiler 660 even that may not be necessary). 661 */ 662 void __kmpc_flush(ident_t *loc) { 663 KC_TRACE(10, ("__kmpc_flush: called\n")); 664 665 /* need explicit __mf() here since use volatile instead in library */ 666 KMP_MB(); /* Flush all pending memory write invalidates. */ 667 668 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 669 #if KMP_MIC 670 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 671 // We shouldn't need it, though, since the ABI rules require that 672 // * If the compiler generates NGO stores it also generates the fence 673 // * If users hand-code NGO stores they should insert the fence 674 // therefore no incomplete unordered stores should be visible. 675 #else 676 // C74404 677 // This is to address non-temporal store instructions (sfence needed). 678 // The clflush instruction is addressed either (mfence needed). 679 // Probably the non-temporal load monvtdqa instruction should also be 680 // addressed. 681 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 682 if (!__kmp_cpuinfo.initialized) { 683 __kmp_query_cpuid(&__kmp_cpuinfo); 684 } 685 if (!__kmp_cpuinfo.flags.sse2) { 686 // CPU cannot execute SSE2 instructions. 687 } else { 688 #if KMP_COMPILER_ICC 689 _mm_mfence(); 690 #elif KMP_COMPILER_MSVC 691 MemoryBarrier(); 692 #else 693 __sync_synchronize(); 694 #endif // KMP_COMPILER_ICC 695 } 696 #endif // KMP_MIC 697 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 698 KMP_ARCH_RISCV64) 699 // Nothing to see here move along 700 #elif KMP_ARCH_PPC64 701 // Nothing needed here (we have a real MB above). 702 #else 703 #error Unknown or unsupported architecture 704 #endif 705 706 #if OMPT_SUPPORT && OMPT_OPTIONAL 707 if (ompt_enabled.ompt_callback_flush) { 708 ompt_callbacks.ompt_callback(ompt_callback_flush)( 709 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 710 } 711 #endif 712 } 713 714 /* -------------------------------------------------------------------------- */ 715 /*! 716 @ingroup SYNCHRONIZATION 717 @param loc source location information 718 @param global_tid thread id. 719 720 Execute a barrier. 721 */ 722 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 723 KMP_COUNT_BLOCK(OMP_BARRIER); 724 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 725 __kmp_assert_valid_gtid(global_tid); 726 727 if (!TCR_4(__kmp_init_parallel)) 728 __kmp_parallel_initialize(); 729 730 __kmp_resume_if_soft_paused(); 731 732 if (__kmp_env_consistency_check) { 733 if (loc == 0) { 734 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 735 } 736 __kmp_check_barrier(global_tid, ct_barrier, loc); 737 } 738 739 #if OMPT_SUPPORT 740 ompt_frame_t *ompt_frame; 741 if (ompt_enabled.enabled) { 742 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 743 if (ompt_frame->enter_frame.ptr == NULL) 744 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 745 } 746 OMPT_STORE_RETURN_ADDRESS(global_tid); 747 #endif 748 __kmp_threads[global_tid]->th.th_ident = loc; 749 // TODO: explicit barrier_wait_id: 750 // this function is called when 'barrier' directive is present or 751 // implicit barrier at the end of a worksharing construct. 752 // 1) better to add a per-thread barrier counter to a thread data structure 753 // 2) set to 0 when a new team is created 754 // 4) no sync is required 755 756 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 757 #if OMPT_SUPPORT && OMPT_OPTIONAL 758 if (ompt_enabled.enabled) { 759 ompt_frame->enter_frame = ompt_data_none; 760 } 761 #endif 762 } 763 764 /* The BARRIER for a MASTER section is always explicit */ 765 /*! 766 @ingroup WORK_SHARING 767 @param loc source location information. 768 @param global_tid global thread number . 769 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 770 */ 771 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 772 int status = 0; 773 774 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 775 __kmp_assert_valid_gtid(global_tid); 776 777 if (!TCR_4(__kmp_init_parallel)) 778 __kmp_parallel_initialize(); 779 780 __kmp_resume_if_soft_paused(); 781 782 if (KMP_MASTER_GTID(global_tid)) { 783 KMP_COUNT_BLOCK(OMP_MASTER); 784 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 785 status = 1; 786 } 787 788 #if OMPT_SUPPORT && OMPT_OPTIONAL 789 if (status) { 790 if (ompt_enabled.ompt_callback_masked) { 791 kmp_info_t *this_thr = __kmp_threads[global_tid]; 792 kmp_team_t *team = this_thr->th.th_team; 793 794 int tid = __kmp_tid_from_gtid(global_tid); 795 ompt_callbacks.ompt_callback(ompt_callback_masked)( 796 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 797 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 798 OMPT_GET_RETURN_ADDRESS(0)); 799 } 800 } 801 #endif 802 803 if (__kmp_env_consistency_check) { 804 #if KMP_USE_DYNAMIC_LOCK 805 if (status) 806 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 807 else 808 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 809 #else 810 if (status) 811 __kmp_push_sync(global_tid, ct_master, loc, NULL); 812 else 813 __kmp_check_sync(global_tid, ct_master, loc, NULL); 814 #endif 815 } 816 817 return status; 818 } 819 820 /*! 821 @ingroup WORK_SHARING 822 @param loc source location information. 823 @param global_tid global thread number . 824 825 Mark the end of a <tt>master</tt> region. This should only be called by the 826 thread that executes the <tt>master</tt> region. 827 */ 828 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 829 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 830 __kmp_assert_valid_gtid(global_tid); 831 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 832 KMP_POP_PARTITIONED_TIMER(); 833 834 #if OMPT_SUPPORT && OMPT_OPTIONAL 835 kmp_info_t *this_thr = __kmp_threads[global_tid]; 836 kmp_team_t *team = this_thr->th.th_team; 837 if (ompt_enabled.ompt_callback_masked) { 838 int tid = __kmp_tid_from_gtid(global_tid); 839 ompt_callbacks.ompt_callback(ompt_callback_masked)( 840 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 841 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 842 OMPT_GET_RETURN_ADDRESS(0)); 843 } 844 #endif 845 846 if (__kmp_env_consistency_check) { 847 if (KMP_MASTER_GTID(global_tid)) 848 __kmp_pop_sync(global_tid, ct_master, loc); 849 } 850 } 851 852 /*! 853 @ingroup WORK_SHARING 854 @param loc source location information. 855 @param global_tid global thread number. 856 @param filter result of evaluating filter clause on thread global_tid, or zero 857 if no filter clause present 858 @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise. 859 */ 860 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) { 861 int status = 0; 862 int tid; 863 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid)); 864 __kmp_assert_valid_gtid(global_tid); 865 866 if (!TCR_4(__kmp_init_parallel)) 867 __kmp_parallel_initialize(); 868 869 __kmp_resume_if_soft_paused(); 870 871 tid = __kmp_tid_from_gtid(global_tid); 872 if (tid == filter) { 873 KMP_COUNT_BLOCK(OMP_MASKED); 874 KMP_PUSH_PARTITIONED_TIMER(OMP_masked); 875 status = 1; 876 } 877 878 #if OMPT_SUPPORT && OMPT_OPTIONAL 879 if (status) { 880 if (ompt_enabled.ompt_callback_masked) { 881 kmp_info_t *this_thr = __kmp_threads[global_tid]; 882 kmp_team_t *team = this_thr->th.th_team; 883 ompt_callbacks.ompt_callback(ompt_callback_masked)( 884 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 885 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 886 OMPT_GET_RETURN_ADDRESS(0)); 887 } 888 } 889 #endif 890 891 if (__kmp_env_consistency_check) { 892 #if KMP_USE_DYNAMIC_LOCK 893 if (status) 894 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0); 895 else 896 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0); 897 #else 898 if (status) 899 __kmp_push_sync(global_tid, ct_masked, loc, NULL); 900 else 901 __kmp_check_sync(global_tid, ct_masked, loc, NULL); 902 #endif 903 } 904 905 return status; 906 } 907 908 /*! 909 @ingroup WORK_SHARING 910 @param loc source location information. 911 @param global_tid global thread number . 912 913 Mark the end of a <tt>masked</tt> region. This should only be called by the 914 thread that executes the <tt>masked</tt> region. 915 */ 916 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) { 917 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid)); 918 __kmp_assert_valid_gtid(global_tid); 919 KMP_POP_PARTITIONED_TIMER(); 920 921 #if OMPT_SUPPORT && OMPT_OPTIONAL 922 kmp_info_t *this_thr = __kmp_threads[global_tid]; 923 kmp_team_t *team = this_thr->th.th_team; 924 if (ompt_enabled.ompt_callback_masked) { 925 int tid = __kmp_tid_from_gtid(global_tid); 926 ompt_callbacks.ompt_callback(ompt_callback_masked)( 927 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 928 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 929 OMPT_GET_RETURN_ADDRESS(0)); 930 } 931 #endif 932 933 if (__kmp_env_consistency_check) { 934 __kmp_pop_sync(global_tid, ct_masked, loc); 935 } 936 } 937 938 /*! 939 @ingroup WORK_SHARING 940 @param loc source location information. 941 @param gtid global thread number. 942 943 Start execution of an <tt>ordered</tt> construct. 944 */ 945 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 946 int cid = 0; 947 kmp_info_t *th; 948 KMP_DEBUG_ASSERT(__kmp_init_serial); 949 950 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 951 __kmp_assert_valid_gtid(gtid); 952 953 if (!TCR_4(__kmp_init_parallel)) 954 __kmp_parallel_initialize(); 955 956 __kmp_resume_if_soft_paused(); 957 958 #if USE_ITT_BUILD 959 __kmp_itt_ordered_prep(gtid); 960 // TODO: ordered_wait_id 961 #endif /* USE_ITT_BUILD */ 962 963 th = __kmp_threads[gtid]; 964 965 #if OMPT_SUPPORT && OMPT_OPTIONAL 966 kmp_team_t *team; 967 ompt_wait_id_t lck; 968 void *codeptr_ra; 969 OMPT_STORE_RETURN_ADDRESS(gtid); 970 if (ompt_enabled.enabled) { 971 team = __kmp_team_from_gtid(gtid); 972 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 973 /* OMPT state update */ 974 th->th.ompt_thread_info.wait_id = lck; 975 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 976 977 /* OMPT event callback */ 978 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 979 if (ompt_enabled.ompt_callback_mutex_acquire) { 980 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 981 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 982 codeptr_ra); 983 } 984 } 985 #endif 986 987 if (th->th.th_dispatch->th_deo_fcn != 0) 988 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 989 else 990 __kmp_parallel_deo(>id, &cid, loc); 991 992 #if OMPT_SUPPORT && OMPT_OPTIONAL 993 if (ompt_enabled.enabled) { 994 /* OMPT state update */ 995 th->th.ompt_thread_info.state = ompt_state_work_parallel; 996 th->th.ompt_thread_info.wait_id = 0; 997 998 /* OMPT event callback */ 999 if (ompt_enabled.ompt_callback_mutex_acquired) { 1000 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1001 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1002 } 1003 } 1004 #endif 1005 1006 #if USE_ITT_BUILD 1007 __kmp_itt_ordered_start(gtid); 1008 #endif /* USE_ITT_BUILD */ 1009 } 1010 1011 /*! 1012 @ingroup WORK_SHARING 1013 @param loc source location information. 1014 @param gtid global thread number. 1015 1016 End execution of an <tt>ordered</tt> construct. 1017 */ 1018 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 1019 int cid = 0; 1020 kmp_info_t *th; 1021 1022 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 1023 __kmp_assert_valid_gtid(gtid); 1024 1025 #if USE_ITT_BUILD 1026 __kmp_itt_ordered_end(gtid); 1027 // TODO: ordered_wait_id 1028 #endif /* USE_ITT_BUILD */ 1029 1030 th = __kmp_threads[gtid]; 1031 1032 if (th->th.th_dispatch->th_dxo_fcn != 0) 1033 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 1034 else 1035 __kmp_parallel_dxo(>id, &cid, loc); 1036 1037 #if OMPT_SUPPORT && OMPT_OPTIONAL 1038 OMPT_STORE_RETURN_ADDRESS(gtid); 1039 if (ompt_enabled.ompt_callback_mutex_released) { 1040 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1041 ompt_mutex_ordered, 1042 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 1043 ->t.t_ordered.dt.t_value, 1044 OMPT_LOAD_RETURN_ADDRESS(gtid)); 1045 } 1046 #endif 1047 } 1048 1049 #if KMP_USE_DYNAMIC_LOCK 1050 1051 static __forceinline void 1052 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 1053 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 1054 // Pointer to the allocated indirect lock is written to crit, while indexing 1055 // is ignored. 1056 void *idx; 1057 kmp_indirect_lock_t **lck; 1058 lck = (kmp_indirect_lock_t **)crit; 1059 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 1060 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 1061 KMP_SET_I_LOCK_LOCATION(ilk, loc); 1062 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 1063 KA_TRACE(20, 1064 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 1065 #if USE_ITT_BUILD 1066 __kmp_itt_critical_creating(ilk->lock, loc); 1067 #endif 1068 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 1069 if (status == 0) { 1070 #if USE_ITT_BUILD 1071 __kmp_itt_critical_destroyed(ilk->lock); 1072 #endif 1073 // We don't really need to destroy the unclaimed lock here since it will be 1074 // cleaned up at program exit. 1075 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 1076 } 1077 KMP_DEBUG_ASSERT(*lck != NULL); 1078 } 1079 1080 // Fast-path acquire tas lock 1081 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 1082 { \ 1083 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1084 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1085 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1086 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1087 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1088 kmp_uint32 spins; \ 1089 KMP_FSYNC_PREPARE(l); \ 1090 KMP_INIT_YIELD(spins); \ 1091 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1092 do { \ 1093 if (TCR_4(__kmp_nth) > \ 1094 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1095 KMP_YIELD(TRUE); \ 1096 } else { \ 1097 KMP_YIELD_SPIN(spins); \ 1098 } \ 1099 __kmp_spin_backoff(&backoff); \ 1100 } while ( \ 1101 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1102 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1103 } \ 1104 KMP_FSYNC_ACQUIRED(l); \ 1105 } 1106 1107 // Fast-path test tas lock 1108 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1109 { \ 1110 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1111 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1112 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1113 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1114 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1115 } 1116 1117 // Fast-path release tas lock 1118 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1119 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1120 1121 #if KMP_USE_FUTEX 1122 1123 #include <sys/syscall.h> 1124 #include <unistd.h> 1125 #ifndef FUTEX_WAIT 1126 #define FUTEX_WAIT 0 1127 #endif 1128 #ifndef FUTEX_WAKE 1129 #define FUTEX_WAKE 1 1130 #endif 1131 1132 // Fast-path acquire futex lock 1133 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1134 { \ 1135 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1136 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1137 KMP_MB(); \ 1138 KMP_FSYNC_PREPARE(ftx); \ 1139 kmp_int32 poll_val; \ 1140 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1141 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1142 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1143 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1144 if (!cond) { \ 1145 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1146 poll_val | \ 1147 KMP_LOCK_BUSY(1, futex))) { \ 1148 continue; \ 1149 } \ 1150 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1151 } \ 1152 kmp_int32 rc; \ 1153 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1154 NULL, NULL, 0)) != 0) { \ 1155 continue; \ 1156 } \ 1157 gtid_code |= 1; \ 1158 } \ 1159 KMP_FSYNC_ACQUIRED(ftx); \ 1160 } 1161 1162 // Fast-path test futex lock 1163 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1164 { \ 1165 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1166 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1167 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1168 KMP_FSYNC_ACQUIRED(ftx); \ 1169 rc = TRUE; \ 1170 } else { \ 1171 rc = FALSE; \ 1172 } \ 1173 } 1174 1175 // Fast-path release futex lock 1176 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1177 { \ 1178 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1179 KMP_MB(); \ 1180 KMP_FSYNC_RELEASING(ftx); \ 1181 kmp_int32 poll_val = \ 1182 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1183 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1184 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1185 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1186 } \ 1187 KMP_MB(); \ 1188 KMP_YIELD_OVERSUB(); \ 1189 } 1190 1191 #endif // KMP_USE_FUTEX 1192 1193 #else // KMP_USE_DYNAMIC_LOCK 1194 1195 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1196 ident_t const *loc, 1197 kmp_int32 gtid) { 1198 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1199 1200 // Because of the double-check, the following load doesn't need to be volatile 1201 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1202 1203 if (lck == NULL) { 1204 void *idx; 1205 1206 // Allocate & initialize the lock. 1207 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1208 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1209 __kmp_init_user_lock_with_checks(lck); 1210 __kmp_set_user_lock_location(lck, loc); 1211 #if USE_ITT_BUILD 1212 __kmp_itt_critical_creating(lck); 1213 // __kmp_itt_critical_creating() should be called *before* the first usage 1214 // of underlying lock. It is the only place where we can guarantee it. There 1215 // are chances the lock will destroyed with no usage, but it is not a 1216 // problem, because this is not real event seen by user but rather setting 1217 // name for object (lock). See more details in kmp_itt.h. 1218 #endif /* USE_ITT_BUILD */ 1219 1220 // Use a cmpxchg instruction to slam the start of the critical section with 1221 // the lock pointer. If another thread beat us to it, deallocate the lock, 1222 // and use the lock that the other thread allocated. 1223 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1224 1225 if (status == 0) { 1226 // Deallocate the lock and reload the value. 1227 #if USE_ITT_BUILD 1228 __kmp_itt_critical_destroyed(lck); 1229 // Let ITT know the lock is destroyed and the same memory location may be reused 1230 // for another purpose. 1231 #endif /* USE_ITT_BUILD */ 1232 __kmp_destroy_user_lock_with_checks(lck); 1233 __kmp_user_lock_free(&idx, gtid, lck); 1234 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1235 KMP_DEBUG_ASSERT(lck != NULL); 1236 } 1237 } 1238 return lck; 1239 } 1240 1241 #endif // KMP_USE_DYNAMIC_LOCK 1242 1243 /*! 1244 @ingroup WORK_SHARING 1245 @param loc source location information. 1246 @param global_tid global thread number. 1247 @param crit identity of the critical section. This could be a pointer to a lock 1248 associated with the critical section, or some other suitably unique value. 1249 1250 Enter code protected by a `critical` construct. 1251 This function blocks until the executing thread can enter the critical section. 1252 */ 1253 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1254 kmp_critical_name *crit) { 1255 #if KMP_USE_DYNAMIC_LOCK 1256 #if OMPT_SUPPORT && OMPT_OPTIONAL 1257 OMPT_STORE_RETURN_ADDRESS(global_tid); 1258 #endif // OMPT_SUPPORT 1259 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1260 #else 1261 KMP_COUNT_BLOCK(OMP_CRITICAL); 1262 #if OMPT_SUPPORT && OMPT_OPTIONAL 1263 ompt_state_t prev_state = ompt_state_undefined; 1264 ompt_thread_info_t ti; 1265 #endif 1266 kmp_user_lock_p lck; 1267 1268 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1269 __kmp_assert_valid_gtid(global_tid); 1270 1271 // TODO: add THR_OVHD_STATE 1272 1273 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1274 KMP_CHECK_USER_LOCK_INIT(); 1275 1276 if ((__kmp_user_lock_kind == lk_tas) && 1277 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1278 lck = (kmp_user_lock_p)crit; 1279 } 1280 #if KMP_USE_FUTEX 1281 else if ((__kmp_user_lock_kind == lk_futex) && 1282 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1283 lck = (kmp_user_lock_p)crit; 1284 } 1285 #endif 1286 else { // ticket, queuing or drdpa 1287 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1288 } 1289 1290 if (__kmp_env_consistency_check) 1291 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1292 1293 // since the critical directive binds to all threads, not just the current 1294 // team we have to check this even if we are in a serialized team. 1295 // also, even if we are the uber thread, we still have to conduct the lock, 1296 // as we have to contend with sibling threads. 1297 1298 #if USE_ITT_BUILD 1299 __kmp_itt_critical_acquiring(lck); 1300 #endif /* USE_ITT_BUILD */ 1301 #if OMPT_SUPPORT && OMPT_OPTIONAL 1302 OMPT_STORE_RETURN_ADDRESS(gtid); 1303 void *codeptr_ra = NULL; 1304 if (ompt_enabled.enabled) { 1305 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1306 /* OMPT state update */ 1307 prev_state = ti.state; 1308 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1309 ti.state = ompt_state_wait_critical; 1310 1311 /* OMPT event callback */ 1312 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1313 if (ompt_enabled.ompt_callback_mutex_acquire) { 1314 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1315 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1316 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1317 } 1318 } 1319 #endif 1320 // Value of 'crit' should be good for using as a critical_id of the critical 1321 // section directive. 1322 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1323 1324 #if USE_ITT_BUILD 1325 __kmp_itt_critical_acquired(lck); 1326 #endif /* USE_ITT_BUILD */ 1327 #if OMPT_SUPPORT && OMPT_OPTIONAL 1328 if (ompt_enabled.enabled) { 1329 /* OMPT state update */ 1330 ti.state = prev_state; 1331 ti.wait_id = 0; 1332 1333 /* OMPT event callback */ 1334 if (ompt_enabled.ompt_callback_mutex_acquired) { 1335 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1336 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1337 } 1338 } 1339 #endif 1340 KMP_POP_PARTITIONED_TIMER(); 1341 1342 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1343 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1344 #endif // KMP_USE_DYNAMIC_LOCK 1345 } 1346 1347 #if KMP_USE_DYNAMIC_LOCK 1348 1349 // Converts the given hint to an internal lock implementation 1350 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1351 #if KMP_USE_TSX 1352 #define KMP_TSX_LOCK(seq) lockseq_##seq 1353 #else 1354 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1355 #endif 1356 1357 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1358 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm) 1359 #else 1360 #define KMP_CPUINFO_RTM 0 1361 #endif 1362 1363 // Hints that do not require further logic 1364 if (hint & kmp_lock_hint_hle) 1365 return KMP_TSX_LOCK(hle); 1366 if (hint & kmp_lock_hint_rtm) 1367 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq; 1368 if (hint & kmp_lock_hint_adaptive) 1369 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1370 1371 // Rule out conflicting hints first by returning the default lock 1372 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1373 return __kmp_user_lock_seq; 1374 if ((hint & omp_lock_hint_speculative) && 1375 (hint & omp_lock_hint_nonspeculative)) 1376 return __kmp_user_lock_seq; 1377 1378 // Do not even consider speculation when it appears to be contended 1379 if (hint & omp_lock_hint_contended) 1380 return lockseq_queuing; 1381 1382 // Uncontended lock without speculation 1383 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1384 return lockseq_tas; 1385 1386 // Use RTM lock for speculation 1387 if (hint & omp_lock_hint_speculative) 1388 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq; 1389 1390 return __kmp_user_lock_seq; 1391 } 1392 1393 #if OMPT_SUPPORT && OMPT_OPTIONAL 1394 #if KMP_USE_DYNAMIC_LOCK 1395 static kmp_mutex_impl_t 1396 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1397 if (user_lock) { 1398 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1399 case 0: 1400 break; 1401 #if KMP_USE_FUTEX 1402 case locktag_futex: 1403 return kmp_mutex_impl_queuing; 1404 #endif 1405 case locktag_tas: 1406 return kmp_mutex_impl_spin; 1407 #if KMP_USE_TSX 1408 case locktag_hle: 1409 case locktag_rtm_spin: 1410 return kmp_mutex_impl_speculative; 1411 #endif 1412 default: 1413 return kmp_mutex_impl_none; 1414 } 1415 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1416 } 1417 KMP_ASSERT(ilock); 1418 switch (ilock->type) { 1419 #if KMP_USE_TSX 1420 case locktag_adaptive: 1421 case locktag_rtm_queuing: 1422 return kmp_mutex_impl_speculative; 1423 #endif 1424 case locktag_nested_tas: 1425 return kmp_mutex_impl_spin; 1426 #if KMP_USE_FUTEX 1427 case locktag_nested_futex: 1428 #endif 1429 case locktag_ticket: 1430 case locktag_queuing: 1431 case locktag_drdpa: 1432 case locktag_nested_ticket: 1433 case locktag_nested_queuing: 1434 case locktag_nested_drdpa: 1435 return kmp_mutex_impl_queuing; 1436 default: 1437 return kmp_mutex_impl_none; 1438 } 1439 } 1440 #else 1441 // For locks without dynamic binding 1442 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1443 switch (__kmp_user_lock_kind) { 1444 case lk_tas: 1445 return kmp_mutex_impl_spin; 1446 #if KMP_USE_FUTEX 1447 case lk_futex: 1448 #endif 1449 case lk_ticket: 1450 case lk_queuing: 1451 case lk_drdpa: 1452 return kmp_mutex_impl_queuing; 1453 #if KMP_USE_TSX 1454 case lk_hle: 1455 case lk_rtm_queuing: 1456 case lk_rtm_spin: 1457 case lk_adaptive: 1458 return kmp_mutex_impl_speculative; 1459 #endif 1460 default: 1461 return kmp_mutex_impl_none; 1462 } 1463 } 1464 #endif // KMP_USE_DYNAMIC_LOCK 1465 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1466 1467 /*! 1468 @ingroup WORK_SHARING 1469 @param loc source location information. 1470 @param global_tid global thread number. 1471 @param crit identity of the critical section. This could be a pointer to a lock 1472 associated with the critical section, or some other suitably unique value. 1473 @param hint the lock hint. 1474 1475 Enter code protected by a `critical` construct with a hint. The hint value is 1476 used to suggest a lock implementation. This function blocks until the executing 1477 thread can enter the critical section unless the hint suggests use of 1478 speculative execution and the hardware supports it. 1479 */ 1480 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1481 kmp_critical_name *crit, uint32_t hint) { 1482 KMP_COUNT_BLOCK(OMP_CRITICAL); 1483 kmp_user_lock_p lck; 1484 #if OMPT_SUPPORT && OMPT_OPTIONAL 1485 ompt_state_t prev_state = ompt_state_undefined; 1486 ompt_thread_info_t ti; 1487 // This is the case, if called from __kmpc_critical: 1488 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1489 if (!codeptr) 1490 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1491 #endif 1492 1493 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1494 __kmp_assert_valid_gtid(global_tid); 1495 1496 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1497 // Check if it is initialized. 1498 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1499 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint); 1500 if (*lk == 0) { 1501 if (KMP_IS_D_LOCK(lockseq)) { 1502 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1503 KMP_GET_D_TAG(lockseq)); 1504 } else { 1505 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq)); 1506 } 1507 } 1508 // Branch for accessing the actual lock object and set operation. This 1509 // branching is inevitable since this lock initialization does not follow the 1510 // normal dispatch path (lock table is not used). 1511 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1512 lck = (kmp_user_lock_p)lk; 1513 if (__kmp_env_consistency_check) { 1514 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1515 __kmp_map_hint_to_lock(hint)); 1516 } 1517 #if USE_ITT_BUILD 1518 __kmp_itt_critical_acquiring(lck); 1519 #endif 1520 #if OMPT_SUPPORT && OMPT_OPTIONAL 1521 if (ompt_enabled.enabled) { 1522 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1523 /* OMPT state update */ 1524 prev_state = ti.state; 1525 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1526 ti.state = ompt_state_wait_critical; 1527 1528 /* OMPT event callback */ 1529 if (ompt_enabled.ompt_callback_mutex_acquire) { 1530 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1531 ompt_mutex_critical, (unsigned int)hint, 1532 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1533 codeptr); 1534 } 1535 } 1536 #endif 1537 #if KMP_USE_INLINED_TAS 1538 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) { 1539 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1540 } else 1541 #elif KMP_USE_INLINED_FUTEX 1542 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) { 1543 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1544 } else 1545 #endif 1546 { 1547 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1548 } 1549 } else { 1550 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1551 lck = ilk->lock; 1552 if (__kmp_env_consistency_check) { 1553 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1554 __kmp_map_hint_to_lock(hint)); 1555 } 1556 #if USE_ITT_BUILD 1557 __kmp_itt_critical_acquiring(lck); 1558 #endif 1559 #if OMPT_SUPPORT && OMPT_OPTIONAL 1560 if (ompt_enabled.enabled) { 1561 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1562 /* OMPT state update */ 1563 prev_state = ti.state; 1564 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1565 ti.state = ompt_state_wait_critical; 1566 1567 /* OMPT event callback */ 1568 if (ompt_enabled.ompt_callback_mutex_acquire) { 1569 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1570 ompt_mutex_critical, (unsigned int)hint, 1571 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1572 codeptr); 1573 } 1574 } 1575 #endif 1576 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1577 } 1578 KMP_POP_PARTITIONED_TIMER(); 1579 1580 #if USE_ITT_BUILD 1581 __kmp_itt_critical_acquired(lck); 1582 #endif /* USE_ITT_BUILD */ 1583 #if OMPT_SUPPORT && OMPT_OPTIONAL 1584 if (ompt_enabled.enabled) { 1585 /* OMPT state update */ 1586 ti.state = prev_state; 1587 ti.wait_id = 0; 1588 1589 /* OMPT event callback */ 1590 if (ompt_enabled.ompt_callback_mutex_acquired) { 1591 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1592 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1593 } 1594 } 1595 #endif 1596 1597 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1598 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1599 } // __kmpc_critical_with_hint 1600 1601 #endif // KMP_USE_DYNAMIC_LOCK 1602 1603 /*! 1604 @ingroup WORK_SHARING 1605 @param loc source location information. 1606 @param global_tid global thread number . 1607 @param crit identity of the critical section. This could be a pointer to a lock 1608 associated with the critical section, or some other suitably unique value. 1609 1610 Leave a critical section, releasing any lock that was held during its execution. 1611 */ 1612 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1613 kmp_critical_name *crit) { 1614 kmp_user_lock_p lck; 1615 1616 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1617 1618 #if KMP_USE_DYNAMIC_LOCK 1619 int locktag = KMP_EXTRACT_D_TAG(crit); 1620 if (locktag) { 1621 lck = (kmp_user_lock_p)crit; 1622 KMP_ASSERT(lck != NULL); 1623 if (__kmp_env_consistency_check) { 1624 __kmp_pop_sync(global_tid, ct_critical, loc); 1625 } 1626 #if USE_ITT_BUILD 1627 __kmp_itt_critical_releasing(lck); 1628 #endif 1629 #if KMP_USE_INLINED_TAS 1630 if (locktag == locktag_tas && !__kmp_env_consistency_check) { 1631 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1632 } else 1633 #elif KMP_USE_INLINED_FUTEX 1634 if (locktag == locktag_futex && !__kmp_env_consistency_check) { 1635 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1636 } else 1637 #endif 1638 { 1639 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1640 } 1641 } else { 1642 kmp_indirect_lock_t *ilk = 1643 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1644 KMP_ASSERT(ilk != NULL); 1645 lck = ilk->lock; 1646 if (__kmp_env_consistency_check) { 1647 __kmp_pop_sync(global_tid, ct_critical, loc); 1648 } 1649 #if USE_ITT_BUILD 1650 __kmp_itt_critical_releasing(lck); 1651 #endif 1652 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1653 } 1654 1655 #else // KMP_USE_DYNAMIC_LOCK 1656 1657 if ((__kmp_user_lock_kind == lk_tas) && 1658 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1659 lck = (kmp_user_lock_p)crit; 1660 } 1661 #if KMP_USE_FUTEX 1662 else if ((__kmp_user_lock_kind == lk_futex) && 1663 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1664 lck = (kmp_user_lock_p)crit; 1665 } 1666 #endif 1667 else { // ticket, queuing or drdpa 1668 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1669 } 1670 1671 KMP_ASSERT(lck != NULL); 1672 1673 if (__kmp_env_consistency_check) 1674 __kmp_pop_sync(global_tid, ct_critical, loc); 1675 1676 #if USE_ITT_BUILD 1677 __kmp_itt_critical_releasing(lck); 1678 #endif /* USE_ITT_BUILD */ 1679 // Value of 'crit' should be good for using as a critical_id of the critical 1680 // section directive. 1681 __kmp_release_user_lock_with_checks(lck, global_tid); 1682 1683 #endif // KMP_USE_DYNAMIC_LOCK 1684 1685 #if OMPT_SUPPORT && OMPT_OPTIONAL 1686 /* OMPT release event triggers after lock is released; place here to trigger 1687 * for all #if branches */ 1688 OMPT_STORE_RETURN_ADDRESS(global_tid); 1689 if (ompt_enabled.ompt_callback_mutex_released) { 1690 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1691 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1692 OMPT_LOAD_RETURN_ADDRESS(0)); 1693 } 1694 #endif 1695 1696 KMP_POP_PARTITIONED_TIMER(); 1697 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1698 } 1699 1700 /*! 1701 @ingroup SYNCHRONIZATION 1702 @param loc source location information 1703 @param global_tid thread id. 1704 @return one if the thread should execute the master block, zero otherwise 1705 1706 Start execution of a combined barrier and master. The barrier is executed inside 1707 this function. 1708 */ 1709 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1710 int status; 1711 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1712 __kmp_assert_valid_gtid(global_tid); 1713 1714 if (!TCR_4(__kmp_init_parallel)) 1715 __kmp_parallel_initialize(); 1716 1717 __kmp_resume_if_soft_paused(); 1718 1719 if (__kmp_env_consistency_check) 1720 __kmp_check_barrier(global_tid, ct_barrier, loc); 1721 1722 #if OMPT_SUPPORT 1723 ompt_frame_t *ompt_frame; 1724 if (ompt_enabled.enabled) { 1725 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1726 if (ompt_frame->enter_frame.ptr == NULL) 1727 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1728 } 1729 OMPT_STORE_RETURN_ADDRESS(global_tid); 1730 #endif 1731 #if USE_ITT_NOTIFY 1732 __kmp_threads[global_tid]->th.th_ident = loc; 1733 #endif 1734 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1735 #if OMPT_SUPPORT && OMPT_OPTIONAL 1736 if (ompt_enabled.enabled) { 1737 ompt_frame->enter_frame = ompt_data_none; 1738 } 1739 #endif 1740 1741 return (status != 0) ? 0 : 1; 1742 } 1743 1744 /*! 1745 @ingroup SYNCHRONIZATION 1746 @param loc source location information 1747 @param global_tid thread id. 1748 1749 Complete the execution of a combined barrier and master. This function should 1750 only be called at the completion of the <tt>master</tt> code. Other threads will 1751 still be waiting at the barrier and this call releases them. 1752 */ 1753 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1754 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1755 __kmp_assert_valid_gtid(global_tid); 1756 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1757 } 1758 1759 /*! 1760 @ingroup SYNCHRONIZATION 1761 @param loc source location information 1762 @param global_tid thread id. 1763 @return one if the thread should execute the master block, zero otherwise 1764 1765 Start execution of a combined barrier and master(nowait) construct. 1766 The barrier is executed inside this function. 1767 There is no equivalent "end" function, since the 1768 */ 1769 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1770 kmp_int32 ret; 1771 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1772 __kmp_assert_valid_gtid(global_tid); 1773 1774 if (!TCR_4(__kmp_init_parallel)) 1775 __kmp_parallel_initialize(); 1776 1777 __kmp_resume_if_soft_paused(); 1778 1779 if (__kmp_env_consistency_check) { 1780 if (loc == 0) { 1781 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1782 } 1783 __kmp_check_barrier(global_tid, ct_barrier, loc); 1784 } 1785 1786 #if OMPT_SUPPORT 1787 ompt_frame_t *ompt_frame; 1788 if (ompt_enabled.enabled) { 1789 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1790 if (ompt_frame->enter_frame.ptr == NULL) 1791 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1792 } 1793 OMPT_STORE_RETURN_ADDRESS(global_tid); 1794 #endif 1795 #if USE_ITT_NOTIFY 1796 __kmp_threads[global_tid]->th.th_ident = loc; 1797 #endif 1798 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1799 #if OMPT_SUPPORT && OMPT_OPTIONAL 1800 if (ompt_enabled.enabled) { 1801 ompt_frame->enter_frame = ompt_data_none; 1802 } 1803 #endif 1804 1805 ret = __kmpc_master(loc, global_tid); 1806 1807 if (__kmp_env_consistency_check) { 1808 /* there's no __kmpc_end_master called; so the (stats) */ 1809 /* actions of __kmpc_end_master are done here */ 1810 if (ret) { 1811 /* only one thread should do the pop since only */ 1812 /* one did the push (see __kmpc_master()) */ 1813 __kmp_pop_sync(global_tid, ct_master, loc); 1814 } 1815 } 1816 1817 return (ret); 1818 } 1819 1820 /* The BARRIER for a SINGLE process section is always explicit */ 1821 /*! 1822 @ingroup WORK_SHARING 1823 @param loc source location information 1824 @param global_tid global thread number 1825 @return One if this thread should execute the single construct, zero otherwise. 1826 1827 Test whether to execute a <tt>single</tt> construct. 1828 There are no implicit barriers in the two "single" calls, rather the compiler 1829 should introduce an explicit barrier if it is required. 1830 */ 1831 1832 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1833 __kmp_assert_valid_gtid(global_tid); 1834 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1835 1836 if (rc) { 1837 // We are going to execute the single statement, so we should count it. 1838 KMP_COUNT_BLOCK(OMP_SINGLE); 1839 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1840 } 1841 1842 #if OMPT_SUPPORT && OMPT_OPTIONAL 1843 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1844 kmp_team_t *team = this_thr->th.th_team; 1845 int tid = __kmp_tid_from_gtid(global_tid); 1846 1847 if (ompt_enabled.enabled) { 1848 if (rc) { 1849 if (ompt_enabled.ompt_callback_work) { 1850 ompt_callbacks.ompt_callback(ompt_callback_work)( 1851 ompt_work_single_executor, ompt_scope_begin, 1852 &(team->t.ompt_team_info.parallel_data), 1853 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1854 1, OMPT_GET_RETURN_ADDRESS(0)); 1855 } 1856 } else { 1857 if (ompt_enabled.ompt_callback_work) { 1858 ompt_callbacks.ompt_callback(ompt_callback_work)( 1859 ompt_work_single_other, ompt_scope_begin, 1860 &(team->t.ompt_team_info.parallel_data), 1861 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1862 1, OMPT_GET_RETURN_ADDRESS(0)); 1863 ompt_callbacks.ompt_callback(ompt_callback_work)( 1864 ompt_work_single_other, ompt_scope_end, 1865 &(team->t.ompt_team_info.parallel_data), 1866 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1867 1, OMPT_GET_RETURN_ADDRESS(0)); 1868 } 1869 } 1870 } 1871 #endif 1872 1873 return rc; 1874 } 1875 1876 /*! 1877 @ingroup WORK_SHARING 1878 @param loc source location information 1879 @param global_tid global thread number 1880 1881 Mark the end of a <tt>single</tt> construct. This function should 1882 only be called by the thread that executed the block of code protected 1883 by the `single` construct. 1884 */ 1885 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1886 __kmp_assert_valid_gtid(global_tid); 1887 __kmp_exit_single(global_tid); 1888 KMP_POP_PARTITIONED_TIMER(); 1889 1890 #if OMPT_SUPPORT && OMPT_OPTIONAL 1891 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1892 kmp_team_t *team = this_thr->th.th_team; 1893 int tid = __kmp_tid_from_gtid(global_tid); 1894 1895 if (ompt_enabled.ompt_callback_work) { 1896 ompt_callbacks.ompt_callback(ompt_callback_work)( 1897 ompt_work_single_executor, ompt_scope_end, 1898 &(team->t.ompt_team_info.parallel_data), 1899 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1900 OMPT_GET_RETURN_ADDRESS(0)); 1901 } 1902 #endif 1903 } 1904 1905 /*! 1906 @ingroup WORK_SHARING 1907 @param loc Source location 1908 @param global_tid Global thread id 1909 1910 Mark the end of a statically scheduled loop. 1911 */ 1912 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1913 KMP_POP_PARTITIONED_TIMER(); 1914 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1915 1916 #if OMPT_SUPPORT && OMPT_OPTIONAL 1917 if (ompt_enabled.ompt_callback_work) { 1918 ompt_work_t ompt_work_type = ompt_work_loop; 1919 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1920 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1921 // Determine workshare type 1922 if (loc != NULL) { 1923 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1924 ompt_work_type = ompt_work_loop; 1925 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1926 ompt_work_type = ompt_work_sections; 1927 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1928 ompt_work_type = ompt_work_distribute; 1929 } else { 1930 // use default set above. 1931 // a warning about this case is provided in __kmpc_for_static_init 1932 } 1933 KMP_DEBUG_ASSERT(ompt_work_type); 1934 } 1935 ompt_callbacks.ompt_callback(ompt_callback_work)( 1936 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1937 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1938 } 1939 #endif 1940 if (__kmp_env_consistency_check) 1941 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1942 } 1943 1944 // User routines which take C-style arguments (call by value) 1945 // different from the Fortran equivalent routines 1946 1947 void ompc_set_num_threads(int arg) { 1948 // !!!!! TODO: check the per-task binding 1949 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1950 } 1951 1952 void ompc_set_dynamic(int flag) { 1953 kmp_info_t *thread; 1954 1955 /* For the thread-private implementation of the internal controls */ 1956 thread = __kmp_entry_thread(); 1957 1958 __kmp_save_internal_controls(thread); 1959 1960 set__dynamic(thread, flag ? true : false); 1961 } 1962 1963 void ompc_set_nested(int flag) { 1964 kmp_info_t *thread; 1965 1966 /* For the thread-private internal controls implementation */ 1967 thread = __kmp_entry_thread(); 1968 1969 __kmp_save_internal_controls(thread); 1970 1971 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1972 } 1973 1974 void ompc_set_max_active_levels(int max_active_levels) { 1975 /* TO DO */ 1976 /* we want per-task implementation of this internal control */ 1977 1978 /* For the per-thread internal controls implementation */ 1979 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1980 } 1981 1982 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1983 // !!!!! TODO: check the per-task binding 1984 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1985 } 1986 1987 int ompc_get_ancestor_thread_num(int level) { 1988 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1989 } 1990 1991 int ompc_get_team_size(int level) { 1992 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1993 } 1994 1995 /* OpenMP 5.0 Affinity Format API */ 1996 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) { 1997 if (!__kmp_init_serial) { 1998 __kmp_serial_initialize(); 1999 } 2000 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 2001 format, KMP_STRLEN(format) + 1); 2002 } 2003 2004 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) { 2005 size_t format_size; 2006 if (!__kmp_init_serial) { 2007 __kmp_serial_initialize(); 2008 } 2009 format_size = KMP_STRLEN(__kmp_affinity_format); 2010 if (buffer && size) { 2011 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 2012 format_size + 1); 2013 } 2014 return format_size; 2015 } 2016 2017 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) { 2018 int gtid; 2019 if (!TCR_4(__kmp_init_middle)) { 2020 __kmp_middle_initialize(); 2021 } 2022 __kmp_assign_root_init_mask(); 2023 gtid = __kmp_get_gtid(); 2024 __kmp_aux_display_affinity(gtid, format); 2025 } 2026 2027 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size, 2028 char const *format) { 2029 int gtid; 2030 size_t num_required; 2031 kmp_str_buf_t capture_buf; 2032 if (!TCR_4(__kmp_init_middle)) { 2033 __kmp_middle_initialize(); 2034 } 2035 __kmp_assign_root_init_mask(); 2036 gtid = __kmp_get_gtid(); 2037 __kmp_str_buf_init(&capture_buf); 2038 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 2039 if (buffer && buf_size) { 2040 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 2041 capture_buf.used + 1); 2042 } 2043 __kmp_str_buf_free(&capture_buf); 2044 return num_required; 2045 } 2046 2047 void kmpc_set_stacksize(int arg) { 2048 // __kmp_aux_set_stacksize initializes the library if needed 2049 __kmp_aux_set_stacksize(arg); 2050 } 2051 2052 void kmpc_set_stacksize_s(size_t arg) { 2053 // __kmp_aux_set_stacksize initializes the library if needed 2054 __kmp_aux_set_stacksize(arg); 2055 } 2056 2057 void kmpc_set_blocktime(int arg) { 2058 int gtid, tid; 2059 kmp_info_t *thread; 2060 2061 gtid = __kmp_entry_gtid(); 2062 tid = __kmp_tid_from_gtid(gtid); 2063 thread = __kmp_thread_from_gtid(gtid); 2064 2065 __kmp_aux_set_blocktime(arg, thread, tid); 2066 } 2067 2068 void kmpc_set_library(int arg) { 2069 // __kmp_user_set_library initializes the library if needed 2070 __kmp_user_set_library((enum library_type)arg); 2071 } 2072 2073 void kmpc_set_defaults(char const *str) { 2074 // __kmp_aux_set_defaults initializes the library if needed 2075 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 2076 } 2077 2078 void kmpc_set_disp_num_buffers(int arg) { 2079 // ignore after initialization because some teams have already 2080 // allocated dispatch buffers 2081 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF && 2082 arg <= KMP_MAX_DISP_NUM_BUFF) { 2083 __kmp_dispatch_num_buffers = arg; 2084 } 2085 } 2086 2087 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 2088 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2089 return -1; 2090 #else 2091 if (!TCR_4(__kmp_init_middle)) { 2092 __kmp_middle_initialize(); 2093 } 2094 __kmp_assign_root_init_mask(); 2095 return __kmp_aux_set_affinity_mask_proc(proc, mask); 2096 #endif 2097 } 2098 2099 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2100 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2101 return -1; 2102 #else 2103 if (!TCR_4(__kmp_init_middle)) { 2104 __kmp_middle_initialize(); 2105 } 2106 __kmp_assign_root_init_mask(); 2107 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2108 #endif 2109 } 2110 2111 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2112 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2113 return -1; 2114 #else 2115 if (!TCR_4(__kmp_init_middle)) { 2116 __kmp_middle_initialize(); 2117 } 2118 __kmp_assign_root_init_mask(); 2119 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2120 #endif 2121 } 2122 2123 /* -------------------------------------------------------------------------- */ 2124 /*! 2125 @ingroup THREADPRIVATE 2126 @param loc source location information 2127 @param gtid global thread number 2128 @param cpy_size size of the cpy_data buffer 2129 @param cpy_data pointer to data to be copied 2130 @param cpy_func helper function to call for copying data 2131 @param didit flag variable: 1=single thread; 0=not single thread 2132 2133 __kmpc_copyprivate implements the interface for the private data broadcast 2134 needed for the copyprivate clause associated with a single region in an 2135 OpenMP<sup>*</sup> program (both C and Fortran). 2136 All threads participating in the parallel region call this routine. 2137 One of the threads (called the single thread) should have the <tt>didit</tt> 2138 variable set to 1 and all other threads should have that variable set to 0. 2139 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2140 2141 The OpenMP specification forbids the use of nowait on the single region when a 2142 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2143 barrier internally to avoid race conditions, so the code generation for the 2144 single region should avoid generating a barrier after the call to @ref 2145 __kmpc_copyprivate. 2146 2147 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2148 The <tt>loc</tt> parameter is a pointer to source location information. 2149 2150 Internal implementation: The single thread will first copy its descriptor 2151 address (cpy_data) to a team-private location, then the other threads will each 2152 call the function pointed to by the parameter cpy_func, which carries out the 2153 copy by copying the data using the cpy_data buffer. 2154 2155 The cpy_func routine used for the copy and the contents of the data area defined 2156 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2157 to be done. For instance, the cpy_data buffer can hold the actual data to be 2158 copied or it may hold a list of pointers to the data. The cpy_func routine must 2159 interpret the cpy_data buffer appropriately. 2160 2161 The interface to cpy_func is as follows: 2162 @code 2163 void cpy_func( void *destination, void *source ) 2164 @endcode 2165 where void *destination is the cpy_data pointer for the thread being copied to 2166 and void *source is the cpy_data pointer for the thread being copied from. 2167 */ 2168 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2169 void *cpy_data, void (*cpy_func)(void *, void *), 2170 kmp_int32 didit) { 2171 void **data_ptr; 2172 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2173 __kmp_assert_valid_gtid(gtid); 2174 2175 KMP_MB(); 2176 2177 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2178 2179 if (__kmp_env_consistency_check) { 2180 if (loc == 0) { 2181 KMP_WARNING(ConstructIdentInvalid); 2182 } 2183 } 2184 2185 // ToDo: Optimize the following two barriers into some kind of split barrier 2186 2187 if (didit) 2188 *data_ptr = cpy_data; 2189 2190 #if OMPT_SUPPORT 2191 ompt_frame_t *ompt_frame; 2192 if (ompt_enabled.enabled) { 2193 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2194 if (ompt_frame->enter_frame.ptr == NULL) 2195 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2196 } 2197 OMPT_STORE_RETURN_ADDRESS(gtid); 2198 #endif 2199 /* This barrier is not a barrier region boundary */ 2200 #if USE_ITT_NOTIFY 2201 __kmp_threads[gtid]->th.th_ident = loc; 2202 #endif 2203 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2204 2205 if (!didit) 2206 (*cpy_func)(cpy_data, *data_ptr); 2207 2208 // Consider next barrier a user-visible barrier for barrier region boundaries 2209 // Nesting checks are already handled by the single construct checks 2210 { 2211 #if OMPT_SUPPORT 2212 OMPT_STORE_RETURN_ADDRESS(gtid); 2213 #endif 2214 #if USE_ITT_NOTIFY 2215 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2216 // tasks can overwrite the location) 2217 #endif 2218 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2219 #if OMPT_SUPPORT && OMPT_OPTIONAL 2220 if (ompt_enabled.enabled) { 2221 ompt_frame->enter_frame = ompt_data_none; 2222 } 2223 #endif 2224 } 2225 } 2226 2227 /* -------------------------------------------------------------------------- */ 2228 2229 #define INIT_LOCK __kmp_init_user_lock_with_checks 2230 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2231 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2232 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2233 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2234 #define ACQUIRE_NESTED_LOCK_TIMED \ 2235 __kmp_acquire_nested_user_lock_with_checks_timed 2236 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2237 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2238 #define TEST_LOCK __kmp_test_user_lock_with_checks 2239 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2240 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2241 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2242 2243 // TODO: Make check abort messages use location info & pass it into 2244 // with_checks routines 2245 2246 #if KMP_USE_DYNAMIC_LOCK 2247 2248 // internal lock initializer 2249 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2250 kmp_dyna_lockseq_t seq) { 2251 if (KMP_IS_D_LOCK(seq)) { 2252 KMP_INIT_D_LOCK(lock, seq); 2253 #if USE_ITT_BUILD 2254 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2255 #endif 2256 } else { 2257 KMP_INIT_I_LOCK(lock, seq); 2258 #if USE_ITT_BUILD 2259 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2260 __kmp_itt_lock_creating(ilk->lock, loc); 2261 #endif 2262 } 2263 } 2264 2265 // internal nest lock initializer 2266 static __forceinline void 2267 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2268 kmp_dyna_lockseq_t seq) { 2269 #if KMP_USE_TSX 2270 // Don't have nested lock implementation for speculative locks 2271 if (seq == lockseq_hle || seq == lockseq_rtm_queuing || 2272 seq == lockseq_rtm_spin || seq == lockseq_adaptive) 2273 seq = __kmp_user_lock_seq; 2274 #endif 2275 switch (seq) { 2276 case lockseq_tas: 2277 seq = lockseq_nested_tas; 2278 break; 2279 #if KMP_USE_FUTEX 2280 case lockseq_futex: 2281 seq = lockseq_nested_futex; 2282 break; 2283 #endif 2284 case lockseq_ticket: 2285 seq = lockseq_nested_ticket; 2286 break; 2287 case lockseq_queuing: 2288 seq = lockseq_nested_queuing; 2289 break; 2290 case lockseq_drdpa: 2291 seq = lockseq_nested_drdpa; 2292 break; 2293 default: 2294 seq = lockseq_nested_queuing; 2295 } 2296 KMP_INIT_I_LOCK(lock, seq); 2297 #if USE_ITT_BUILD 2298 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2299 __kmp_itt_lock_creating(ilk->lock, loc); 2300 #endif 2301 } 2302 2303 /* initialize the lock with a hint */ 2304 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2305 uintptr_t hint) { 2306 KMP_DEBUG_ASSERT(__kmp_init_serial); 2307 if (__kmp_env_consistency_check && user_lock == NULL) { 2308 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2309 } 2310 2311 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2312 2313 #if OMPT_SUPPORT && OMPT_OPTIONAL 2314 // This is the case, if called from omp_init_lock_with_hint: 2315 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2316 if (!codeptr) 2317 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2318 if (ompt_enabled.ompt_callback_lock_init) { 2319 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2320 ompt_mutex_lock, (omp_lock_hint_t)hint, 2321 __ompt_get_mutex_impl_type(user_lock), 2322 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2323 } 2324 #endif 2325 } 2326 2327 /* initialize the lock with a hint */ 2328 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2329 void **user_lock, uintptr_t hint) { 2330 KMP_DEBUG_ASSERT(__kmp_init_serial); 2331 if (__kmp_env_consistency_check && user_lock == NULL) { 2332 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2333 } 2334 2335 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2336 2337 #if OMPT_SUPPORT && OMPT_OPTIONAL 2338 // This is the case, if called from omp_init_lock_with_hint: 2339 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2340 if (!codeptr) 2341 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2342 if (ompt_enabled.ompt_callback_lock_init) { 2343 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2344 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2345 __ompt_get_mutex_impl_type(user_lock), 2346 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2347 } 2348 #endif 2349 } 2350 2351 #endif // KMP_USE_DYNAMIC_LOCK 2352 2353 /* initialize the lock */ 2354 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2355 #if KMP_USE_DYNAMIC_LOCK 2356 2357 KMP_DEBUG_ASSERT(__kmp_init_serial); 2358 if (__kmp_env_consistency_check && user_lock == NULL) { 2359 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2360 } 2361 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2362 2363 #if OMPT_SUPPORT && OMPT_OPTIONAL 2364 // This is the case, if called from omp_init_lock_with_hint: 2365 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2366 if (!codeptr) 2367 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2368 if (ompt_enabled.ompt_callback_lock_init) { 2369 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2370 ompt_mutex_lock, omp_lock_hint_none, 2371 __ompt_get_mutex_impl_type(user_lock), 2372 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2373 } 2374 #endif 2375 2376 #else // KMP_USE_DYNAMIC_LOCK 2377 2378 static char const *const func = "omp_init_lock"; 2379 kmp_user_lock_p lck; 2380 KMP_DEBUG_ASSERT(__kmp_init_serial); 2381 2382 if (__kmp_env_consistency_check) { 2383 if (user_lock == NULL) { 2384 KMP_FATAL(LockIsUninitialized, func); 2385 } 2386 } 2387 2388 KMP_CHECK_USER_LOCK_INIT(); 2389 2390 if ((__kmp_user_lock_kind == lk_tas) && 2391 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2392 lck = (kmp_user_lock_p)user_lock; 2393 } 2394 #if KMP_USE_FUTEX 2395 else if ((__kmp_user_lock_kind == lk_futex) && 2396 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2397 lck = (kmp_user_lock_p)user_lock; 2398 } 2399 #endif 2400 else { 2401 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2402 } 2403 INIT_LOCK(lck); 2404 __kmp_set_user_lock_location(lck, loc); 2405 2406 #if OMPT_SUPPORT && OMPT_OPTIONAL 2407 // This is the case, if called from omp_init_lock_with_hint: 2408 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2409 if (!codeptr) 2410 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2411 if (ompt_enabled.ompt_callback_lock_init) { 2412 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2413 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2414 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2415 } 2416 #endif 2417 2418 #if USE_ITT_BUILD 2419 __kmp_itt_lock_creating(lck); 2420 #endif /* USE_ITT_BUILD */ 2421 2422 #endif // KMP_USE_DYNAMIC_LOCK 2423 } // __kmpc_init_lock 2424 2425 /* initialize the lock */ 2426 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2427 #if KMP_USE_DYNAMIC_LOCK 2428 2429 KMP_DEBUG_ASSERT(__kmp_init_serial); 2430 if (__kmp_env_consistency_check && user_lock == NULL) { 2431 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2432 } 2433 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2434 2435 #if OMPT_SUPPORT && OMPT_OPTIONAL 2436 // This is the case, if called from omp_init_lock_with_hint: 2437 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2438 if (!codeptr) 2439 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2440 if (ompt_enabled.ompt_callback_lock_init) { 2441 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2442 ompt_mutex_nest_lock, omp_lock_hint_none, 2443 __ompt_get_mutex_impl_type(user_lock), 2444 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2445 } 2446 #endif 2447 2448 #else // KMP_USE_DYNAMIC_LOCK 2449 2450 static char const *const func = "omp_init_nest_lock"; 2451 kmp_user_lock_p lck; 2452 KMP_DEBUG_ASSERT(__kmp_init_serial); 2453 2454 if (__kmp_env_consistency_check) { 2455 if (user_lock == NULL) { 2456 KMP_FATAL(LockIsUninitialized, func); 2457 } 2458 } 2459 2460 KMP_CHECK_USER_LOCK_INIT(); 2461 2462 if ((__kmp_user_lock_kind == lk_tas) && 2463 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2464 OMP_NEST_LOCK_T_SIZE)) { 2465 lck = (kmp_user_lock_p)user_lock; 2466 } 2467 #if KMP_USE_FUTEX 2468 else if ((__kmp_user_lock_kind == lk_futex) && 2469 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2470 OMP_NEST_LOCK_T_SIZE)) { 2471 lck = (kmp_user_lock_p)user_lock; 2472 } 2473 #endif 2474 else { 2475 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2476 } 2477 2478 INIT_NESTED_LOCK(lck); 2479 __kmp_set_user_lock_location(lck, loc); 2480 2481 #if OMPT_SUPPORT && OMPT_OPTIONAL 2482 // This is the case, if called from omp_init_lock_with_hint: 2483 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2484 if (!codeptr) 2485 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2486 if (ompt_enabled.ompt_callback_lock_init) { 2487 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2488 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2489 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2490 } 2491 #endif 2492 2493 #if USE_ITT_BUILD 2494 __kmp_itt_lock_creating(lck); 2495 #endif /* USE_ITT_BUILD */ 2496 2497 #endif // KMP_USE_DYNAMIC_LOCK 2498 } // __kmpc_init_nest_lock 2499 2500 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2501 #if KMP_USE_DYNAMIC_LOCK 2502 2503 #if USE_ITT_BUILD 2504 kmp_user_lock_p lck; 2505 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2506 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2507 } else { 2508 lck = (kmp_user_lock_p)user_lock; 2509 } 2510 __kmp_itt_lock_destroyed(lck); 2511 #endif 2512 #if OMPT_SUPPORT && OMPT_OPTIONAL 2513 // This is the case, if called from omp_init_lock_with_hint: 2514 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2515 if (!codeptr) 2516 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2517 if (ompt_enabled.ompt_callback_lock_destroy) { 2518 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2519 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2520 } 2521 #endif 2522 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2523 #else 2524 kmp_user_lock_p lck; 2525 2526 if ((__kmp_user_lock_kind == lk_tas) && 2527 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2528 lck = (kmp_user_lock_p)user_lock; 2529 } 2530 #if KMP_USE_FUTEX 2531 else if ((__kmp_user_lock_kind == lk_futex) && 2532 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2533 lck = (kmp_user_lock_p)user_lock; 2534 } 2535 #endif 2536 else { 2537 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2538 } 2539 2540 #if OMPT_SUPPORT && OMPT_OPTIONAL 2541 // This is the case, if called from omp_init_lock_with_hint: 2542 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2543 if (!codeptr) 2544 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2545 if (ompt_enabled.ompt_callback_lock_destroy) { 2546 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2547 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2548 } 2549 #endif 2550 2551 #if USE_ITT_BUILD 2552 __kmp_itt_lock_destroyed(lck); 2553 #endif /* USE_ITT_BUILD */ 2554 DESTROY_LOCK(lck); 2555 2556 if ((__kmp_user_lock_kind == lk_tas) && 2557 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2558 ; 2559 } 2560 #if KMP_USE_FUTEX 2561 else if ((__kmp_user_lock_kind == lk_futex) && 2562 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2563 ; 2564 } 2565 #endif 2566 else { 2567 __kmp_user_lock_free(user_lock, gtid, lck); 2568 } 2569 #endif // KMP_USE_DYNAMIC_LOCK 2570 } // __kmpc_destroy_lock 2571 2572 /* destroy the lock */ 2573 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2574 #if KMP_USE_DYNAMIC_LOCK 2575 2576 #if USE_ITT_BUILD 2577 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2578 __kmp_itt_lock_destroyed(ilk->lock); 2579 #endif 2580 #if OMPT_SUPPORT && OMPT_OPTIONAL 2581 // This is the case, if called from omp_init_lock_with_hint: 2582 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2583 if (!codeptr) 2584 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2585 if (ompt_enabled.ompt_callback_lock_destroy) { 2586 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2587 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2588 } 2589 #endif 2590 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2591 2592 #else // KMP_USE_DYNAMIC_LOCK 2593 2594 kmp_user_lock_p lck; 2595 2596 if ((__kmp_user_lock_kind == lk_tas) && 2597 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2598 OMP_NEST_LOCK_T_SIZE)) { 2599 lck = (kmp_user_lock_p)user_lock; 2600 } 2601 #if KMP_USE_FUTEX 2602 else if ((__kmp_user_lock_kind == lk_futex) && 2603 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2604 OMP_NEST_LOCK_T_SIZE)) { 2605 lck = (kmp_user_lock_p)user_lock; 2606 } 2607 #endif 2608 else { 2609 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2610 } 2611 2612 #if OMPT_SUPPORT && OMPT_OPTIONAL 2613 // This is the case, if called from omp_init_lock_with_hint: 2614 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2615 if (!codeptr) 2616 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2617 if (ompt_enabled.ompt_callback_lock_destroy) { 2618 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2619 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2620 } 2621 #endif 2622 2623 #if USE_ITT_BUILD 2624 __kmp_itt_lock_destroyed(lck); 2625 #endif /* USE_ITT_BUILD */ 2626 2627 DESTROY_NESTED_LOCK(lck); 2628 2629 if ((__kmp_user_lock_kind == lk_tas) && 2630 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2631 OMP_NEST_LOCK_T_SIZE)) { 2632 ; 2633 } 2634 #if KMP_USE_FUTEX 2635 else if ((__kmp_user_lock_kind == lk_futex) && 2636 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2637 OMP_NEST_LOCK_T_SIZE)) { 2638 ; 2639 } 2640 #endif 2641 else { 2642 __kmp_user_lock_free(user_lock, gtid, lck); 2643 } 2644 #endif // KMP_USE_DYNAMIC_LOCK 2645 } // __kmpc_destroy_nest_lock 2646 2647 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2648 KMP_COUNT_BLOCK(OMP_set_lock); 2649 #if KMP_USE_DYNAMIC_LOCK 2650 int tag = KMP_EXTRACT_D_TAG(user_lock); 2651 #if USE_ITT_BUILD 2652 __kmp_itt_lock_acquiring( 2653 (kmp_user_lock_p) 2654 user_lock); // itt function will get to the right lock object. 2655 #endif 2656 #if OMPT_SUPPORT && OMPT_OPTIONAL 2657 // This is the case, if called from omp_init_lock_with_hint: 2658 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2659 if (!codeptr) 2660 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2661 if (ompt_enabled.ompt_callback_mutex_acquire) { 2662 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2663 ompt_mutex_lock, omp_lock_hint_none, 2664 __ompt_get_mutex_impl_type(user_lock), 2665 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2666 } 2667 #endif 2668 #if KMP_USE_INLINED_TAS 2669 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2670 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2671 } else 2672 #elif KMP_USE_INLINED_FUTEX 2673 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2674 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2675 } else 2676 #endif 2677 { 2678 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2679 } 2680 #if USE_ITT_BUILD 2681 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2682 #endif 2683 #if OMPT_SUPPORT && OMPT_OPTIONAL 2684 if (ompt_enabled.ompt_callback_mutex_acquired) { 2685 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2686 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2687 } 2688 #endif 2689 2690 #else // KMP_USE_DYNAMIC_LOCK 2691 2692 kmp_user_lock_p lck; 2693 2694 if ((__kmp_user_lock_kind == lk_tas) && 2695 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2696 lck = (kmp_user_lock_p)user_lock; 2697 } 2698 #if KMP_USE_FUTEX 2699 else if ((__kmp_user_lock_kind == lk_futex) && 2700 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2701 lck = (kmp_user_lock_p)user_lock; 2702 } 2703 #endif 2704 else { 2705 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2706 } 2707 2708 #if USE_ITT_BUILD 2709 __kmp_itt_lock_acquiring(lck); 2710 #endif /* USE_ITT_BUILD */ 2711 #if OMPT_SUPPORT && OMPT_OPTIONAL 2712 // This is the case, if called from omp_init_lock_with_hint: 2713 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2714 if (!codeptr) 2715 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2716 if (ompt_enabled.ompt_callback_mutex_acquire) { 2717 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2718 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2719 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2720 } 2721 #endif 2722 2723 ACQUIRE_LOCK(lck, gtid); 2724 2725 #if USE_ITT_BUILD 2726 __kmp_itt_lock_acquired(lck); 2727 #endif /* USE_ITT_BUILD */ 2728 2729 #if OMPT_SUPPORT && OMPT_OPTIONAL 2730 if (ompt_enabled.ompt_callback_mutex_acquired) { 2731 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2732 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2733 } 2734 #endif 2735 2736 #endif // KMP_USE_DYNAMIC_LOCK 2737 } 2738 2739 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2740 #if KMP_USE_DYNAMIC_LOCK 2741 2742 #if USE_ITT_BUILD 2743 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2744 #endif 2745 #if OMPT_SUPPORT && OMPT_OPTIONAL 2746 // This is the case, if called from omp_init_lock_with_hint: 2747 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2748 if (!codeptr) 2749 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2750 if (ompt_enabled.enabled) { 2751 if (ompt_enabled.ompt_callback_mutex_acquire) { 2752 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2753 ompt_mutex_nest_lock, omp_lock_hint_none, 2754 __ompt_get_mutex_impl_type(user_lock), 2755 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2756 } 2757 } 2758 #endif 2759 int acquire_status = 2760 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2761 (void)acquire_status; 2762 #if USE_ITT_BUILD 2763 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2764 #endif 2765 2766 #if OMPT_SUPPORT && OMPT_OPTIONAL 2767 if (ompt_enabled.enabled) { 2768 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2769 if (ompt_enabled.ompt_callback_mutex_acquired) { 2770 // lock_first 2771 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2772 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2773 codeptr); 2774 } 2775 } else { 2776 if (ompt_enabled.ompt_callback_nest_lock) { 2777 // lock_next 2778 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2779 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2780 } 2781 } 2782 } 2783 #endif 2784 2785 #else // KMP_USE_DYNAMIC_LOCK 2786 int acquire_status; 2787 kmp_user_lock_p lck; 2788 2789 if ((__kmp_user_lock_kind == lk_tas) && 2790 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2791 OMP_NEST_LOCK_T_SIZE)) { 2792 lck = (kmp_user_lock_p)user_lock; 2793 } 2794 #if KMP_USE_FUTEX 2795 else if ((__kmp_user_lock_kind == lk_futex) && 2796 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2797 OMP_NEST_LOCK_T_SIZE)) { 2798 lck = (kmp_user_lock_p)user_lock; 2799 } 2800 #endif 2801 else { 2802 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2803 } 2804 2805 #if USE_ITT_BUILD 2806 __kmp_itt_lock_acquiring(lck); 2807 #endif /* USE_ITT_BUILD */ 2808 #if OMPT_SUPPORT && OMPT_OPTIONAL 2809 // This is the case, if called from omp_init_lock_with_hint: 2810 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2811 if (!codeptr) 2812 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2813 if (ompt_enabled.enabled) { 2814 if (ompt_enabled.ompt_callback_mutex_acquire) { 2815 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2816 ompt_mutex_nest_lock, omp_lock_hint_none, 2817 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2818 codeptr); 2819 } 2820 } 2821 #endif 2822 2823 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2824 2825 #if USE_ITT_BUILD 2826 __kmp_itt_lock_acquired(lck); 2827 #endif /* USE_ITT_BUILD */ 2828 2829 #if OMPT_SUPPORT && OMPT_OPTIONAL 2830 if (ompt_enabled.enabled) { 2831 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2832 if (ompt_enabled.ompt_callback_mutex_acquired) { 2833 // lock_first 2834 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2835 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2836 } 2837 } else { 2838 if (ompt_enabled.ompt_callback_nest_lock) { 2839 // lock_next 2840 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2841 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2842 } 2843 } 2844 } 2845 #endif 2846 2847 #endif // KMP_USE_DYNAMIC_LOCK 2848 } 2849 2850 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2851 #if KMP_USE_DYNAMIC_LOCK 2852 2853 int tag = KMP_EXTRACT_D_TAG(user_lock); 2854 #if USE_ITT_BUILD 2855 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2856 #endif 2857 #if KMP_USE_INLINED_TAS 2858 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2859 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2860 } else 2861 #elif KMP_USE_INLINED_FUTEX 2862 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2863 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2864 } else 2865 #endif 2866 { 2867 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2868 } 2869 2870 #if OMPT_SUPPORT && OMPT_OPTIONAL 2871 // This is the case, if called from omp_init_lock_with_hint: 2872 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2873 if (!codeptr) 2874 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2875 if (ompt_enabled.ompt_callback_mutex_released) { 2876 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2877 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2878 } 2879 #endif 2880 2881 #else // KMP_USE_DYNAMIC_LOCK 2882 2883 kmp_user_lock_p lck; 2884 2885 /* Can't use serial interval since not block structured */ 2886 /* release the lock */ 2887 2888 if ((__kmp_user_lock_kind == lk_tas) && 2889 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2890 #if KMP_OS_LINUX && \ 2891 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2892 // "fast" path implemented to fix customer performance issue 2893 #if USE_ITT_BUILD 2894 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2895 #endif /* USE_ITT_BUILD */ 2896 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2897 KMP_MB(); 2898 2899 #if OMPT_SUPPORT && OMPT_OPTIONAL 2900 // This is the case, if called from omp_init_lock_with_hint: 2901 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2902 if (!codeptr) 2903 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2904 if (ompt_enabled.ompt_callback_mutex_released) { 2905 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2906 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2907 } 2908 #endif 2909 2910 return; 2911 #else 2912 lck = (kmp_user_lock_p)user_lock; 2913 #endif 2914 } 2915 #if KMP_USE_FUTEX 2916 else if ((__kmp_user_lock_kind == lk_futex) && 2917 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2918 lck = (kmp_user_lock_p)user_lock; 2919 } 2920 #endif 2921 else { 2922 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2923 } 2924 2925 #if USE_ITT_BUILD 2926 __kmp_itt_lock_releasing(lck); 2927 #endif /* USE_ITT_BUILD */ 2928 2929 RELEASE_LOCK(lck, gtid); 2930 2931 #if OMPT_SUPPORT && OMPT_OPTIONAL 2932 // This is the case, if called from omp_init_lock_with_hint: 2933 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2934 if (!codeptr) 2935 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2936 if (ompt_enabled.ompt_callback_mutex_released) { 2937 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2938 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2939 } 2940 #endif 2941 2942 #endif // KMP_USE_DYNAMIC_LOCK 2943 } 2944 2945 /* release the lock */ 2946 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2947 #if KMP_USE_DYNAMIC_LOCK 2948 2949 #if USE_ITT_BUILD 2950 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2951 #endif 2952 int release_status = 2953 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2954 (void)release_status; 2955 2956 #if OMPT_SUPPORT && OMPT_OPTIONAL 2957 // This is the case, if called from omp_init_lock_with_hint: 2958 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2959 if (!codeptr) 2960 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2961 if (ompt_enabled.enabled) { 2962 if (release_status == KMP_LOCK_RELEASED) { 2963 if (ompt_enabled.ompt_callback_mutex_released) { 2964 // release_lock_last 2965 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2966 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2967 codeptr); 2968 } 2969 } else if (ompt_enabled.ompt_callback_nest_lock) { 2970 // release_lock_prev 2971 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2972 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2973 } 2974 } 2975 #endif 2976 2977 #else // KMP_USE_DYNAMIC_LOCK 2978 2979 kmp_user_lock_p lck; 2980 2981 /* Can't use serial interval since not block structured */ 2982 2983 if ((__kmp_user_lock_kind == lk_tas) && 2984 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2985 OMP_NEST_LOCK_T_SIZE)) { 2986 #if KMP_OS_LINUX && \ 2987 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2988 // "fast" path implemented to fix customer performance issue 2989 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2990 #if USE_ITT_BUILD 2991 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2992 #endif /* USE_ITT_BUILD */ 2993 2994 #if OMPT_SUPPORT && OMPT_OPTIONAL 2995 int release_status = KMP_LOCK_STILL_HELD; 2996 #endif 2997 2998 if (--(tl->lk.depth_locked) == 0) { 2999 TCW_4(tl->lk.poll, 0); 3000 #if OMPT_SUPPORT && OMPT_OPTIONAL 3001 release_status = KMP_LOCK_RELEASED; 3002 #endif 3003 } 3004 KMP_MB(); 3005 3006 #if OMPT_SUPPORT && OMPT_OPTIONAL 3007 // This is the case, if called from omp_init_lock_with_hint: 3008 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3009 if (!codeptr) 3010 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3011 if (ompt_enabled.enabled) { 3012 if (release_status == KMP_LOCK_RELEASED) { 3013 if (ompt_enabled.ompt_callback_mutex_released) { 3014 // release_lock_last 3015 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3016 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3017 } 3018 } else if (ompt_enabled.ompt_callback_nest_lock) { 3019 // release_lock_previous 3020 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3021 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3022 } 3023 } 3024 #endif 3025 3026 return; 3027 #else 3028 lck = (kmp_user_lock_p)user_lock; 3029 #endif 3030 } 3031 #if KMP_USE_FUTEX 3032 else if ((__kmp_user_lock_kind == lk_futex) && 3033 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3034 OMP_NEST_LOCK_T_SIZE)) { 3035 lck = (kmp_user_lock_p)user_lock; 3036 } 3037 #endif 3038 else { 3039 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 3040 } 3041 3042 #if USE_ITT_BUILD 3043 __kmp_itt_lock_releasing(lck); 3044 #endif /* USE_ITT_BUILD */ 3045 3046 int release_status; 3047 release_status = RELEASE_NESTED_LOCK(lck, gtid); 3048 #if OMPT_SUPPORT && OMPT_OPTIONAL 3049 // This is the case, if called from omp_init_lock_with_hint: 3050 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3051 if (!codeptr) 3052 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3053 if (ompt_enabled.enabled) { 3054 if (release_status == KMP_LOCK_RELEASED) { 3055 if (ompt_enabled.ompt_callback_mutex_released) { 3056 // release_lock_last 3057 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3058 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3059 } 3060 } else if (ompt_enabled.ompt_callback_nest_lock) { 3061 // release_lock_previous 3062 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3063 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3064 } 3065 } 3066 #endif 3067 3068 #endif // KMP_USE_DYNAMIC_LOCK 3069 } 3070 3071 /* try to acquire the lock */ 3072 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3073 KMP_COUNT_BLOCK(OMP_test_lock); 3074 3075 #if KMP_USE_DYNAMIC_LOCK 3076 int rc; 3077 int tag = KMP_EXTRACT_D_TAG(user_lock); 3078 #if USE_ITT_BUILD 3079 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3080 #endif 3081 #if OMPT_SUPPORT && OMPT_OPTIONAL 3082 // This is the case, if called from omp_init_lock_with_hint: 3083 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3084 if (!codeptr) 3085 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3086 if (ompt_enabled.ompt_callback_mutex_acquire) { 3087 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3088 ompt_mutex_lock, omp_lock_hint_none, 3089 __ompt_get_mutex_impl_type(user_lock), 3090 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3091 } 3092 #endif 3093 #if KMP_USE_INLINED_TAS 3094 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3095 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3096 } else 3097 #elif KMP_USE_INLINED_FUTEX 3098 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3099 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3100 } else 3101 #endif 3102 { 3103 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3104 } 3105 if (rc) { 3106 #if USE_ITT_BUILD 3107 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3108 #endif 3109 #if OMPT_SUPPORT && OMPT_OPTIONAL 3110 if (ompt_enabled.ompt_callback_mutex_acquired) { 3111 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3112 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3113 } 3114 #endif 3115 return FTN_TRUE; 3116 } else { 3117 #if USE_ITT_BUILD 3118 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3119 #endif 3120 return FTN_FALSE; 3121 } 3122 3123 #else // KMP_USE_DYNAMIC_LOCK 3124 3125 kmp_user_lock_p lck; 3126 int rc; 3127 3128 if ((__kmp_user_lock_kind == lk_tas) && 3129 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3130 lck = (kmp_user_lock_p)user_lock; 3131 } 3132 #if KMP_USE_FUTEX 3133 else if ((__kmp_user_lock_kind == lk_futex) && 3134 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3135 lck = (kmp_user_lock_p)user_lock; 3136 } 3137 #endif 3138 else { 3139 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3140 } 3141 3142 #if USE_ITT_BUILD 3143 __kmp_itt_lock_acquiring(lck); 3144 #endif /* USE_ITT_BUILD */ 3145 #if OMPT_SUPPORT && OMPT_OPTIONAL 3146 // This is the case, if called from omp_init_lock_with_hint: 3147 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3148 if (!codeptr) 3149 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3150 if (ompt_enabled.ompt_callback_mutex_acquire) { 3151 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3152 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3153 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3154 } 3155 #endif 3156 3157 rc = TEST_LOCK(lck, gtid); 3158 #if USE_ITT_BUILD 3159 if (rc) { 3160 __kmp_itt_lock_acquired(lck); 3161 } else { 3162 __kmp_itt_lock_cancelled(lck); 3163 } 3164 #endif /* USE_ITT_BUILD */ 3165 #if OMPT_SUPPORT && OMPT_OPTIONAL 3166 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3167 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3168 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3169 } 3170 #endif 3171 3172 return (rc ? FTN_TRUE : FTN_FALSE); 3173 3174 /* Can't use serial interval since not block structured */ 3175 3176 #endif // KMP_USE_DYNAMIC_LOCK 3177 } 3178 3179 /* try to acquire the lock */ 3180 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3181 #if KMP_USE_DYNAMIC_LOCK 3182 int rc; 3183 #if USE_ITT_BUILD 3184 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3185 #endif 3186 #if OMPT_SUPPORT && OMPT_OPTIONAL 3187 // This is the case, if called from omp_init_lock_with_hint: 3188 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3189 if (!codeptr) 3190 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3191 if (ompt_enabled.ompt_callback_mutex_acquire) { 3192 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3193 ompt_mutex_nest_lock, omp_lock_hint_none, 3194 __ompt_get_mutex_impl_type(user_lock), 3195 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3196 } 3197 #endif 3198 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3199 #if USE_ITT_BUILD 3200 if (rc) { 3201 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3202 } else { 3203 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3204 } 3205 #endif 3206 #if OMPT_SUPPORT && OMPT_OPTIONAL 3207 if (ompt_enabled.enabled && rc) { 3208 if (rc == 1) { 3209 if (ompt_enabled.ompt_callback_mutex_acquired) { 3210 // lock_first 3211 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3212 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3213 codeptr); 3214 } 3215 } else { 3216 if (ompt_enabled.ompt_callback_nest_lock) { 3217 // lock_next 3218 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3219 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3220 } 3221 } 3222 } 3223 #endif 3224 return rc; 3225 3226 #else // KMP_USE_DYNAMIC_LOCK 3227 3228 kmp_user_lock_p lck; 3229 int rc; 3230 3231 if ((__kmp_user_lock_kind == lk_tas) && 3232 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3233 OMP_NEST_LOCK_T_SIZE)) { 3234 lck = (kmp_user_lock_p)user_lock; 3235 } 3236 #if KMP_USE_FUTEX 3237 else if ((__kmp_user_lock_kind == lk_futex) && 3238 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3239 OMP_NEST_LOCK_T_SIZE)) { 3240 lck = (kmp_user_lock_p)user_lock; 3241 } 3242 #endif 3243 else { 3244 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3245 } 3246 3247 #if USE_ITT_BUILD 3248 __kmp_itt_lock_acquiring(lck); 3249 #endif /* USE_ITT_BUILD */ 3250 3251 #if OMPT_SUPPORT && OMPT_OPTIONAL 3252 // This is the case, if called from omp_init_lock_with_hint: 3253 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3254 if (!codeptr) 3255 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3256 if (ompt_enabled.enabled) && 3257 ompt_enabled.ompt_callback_mutex_acquire) { 3258 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3259 ompt_mutex_nest_lock, omp_lock_hint_none, 3260 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3261 codeptr); 3262 } 3263 #endif 3264 3265 rc = TEST_NESTED_LOCK(lck, gtid); 3266 #if USE_ITT_BUILD 3267 if (rc) { 3268 __kmp_itt_lock_acquired(lck); 3269 } else { 3270 __kmp_itt_lock_cancelled(lck); 3271 } 3272 #endif /* USE_ITT_BUILD */ 3273 #if OMPT_SUPPORT && OMPT_OPTIONAL 3274 if (ompt_enabled.enabled && rc) { 3275 if (rc == 1) { 3276 if (ompt_enabled.ompt_callback_mutex_acquired) { 3277 // lock_first 3278 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3279 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3280 } 3281 } else { 3282 if (ompt_enabled.ompt_callback_nest_lock) { 3283 // lock_next 3284 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3285 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3286 } 3287 } 3288 } 3289 #endif 3290 return rc; 3291 3292 /* Can't use serial interval since not block structured */ 3293 3294 #endif // KMP_USE_DYNAMIC_LOCK 3295 } 3296 3297 // Interface to fast scalable reduce methods routines 3298 3299 // keep the selected method in a thread local structure for cross-function 3300 // usage: will be used in __kmpc_end_reduce* functions; 3301 // another solution: to re-determine the method one more time in 3302 // __kmpc_end_reduce* functions (new prototype required then) 3303 // AT: which solution is better? 3304 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3305 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3306 3307 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3308 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3309 3310 // description of the packed_reduction_method variable: look at the macros in 3311 // kmp.h 3312 3313 // used in a critical section reduce block 3314 static __forceinline void 3315 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3316 kmp_critical_name *crit) { 3317 3318 // this lock was visible to a customer and to the threading profile tool as a 3319 // serial overhead span (although it's used for an internal purpose only) 3320 // why was it visible in previous implementation? 3321 // should we keep it visible in new reduce block? 3322 kmp_user_lock_p lck; 3323 3324 #if KMP_USE_DYNAMIC_LOCK 3325 3326 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3327 // Check if it is initialized. 3328 if (*lk == 0) { 3329 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3330 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3331 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3332 } else { 3333 __kmp_init_indirect_csptr(crit, loc, global_tid, 3334 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3335 } 3336 } 3337 // Branch for accessing the actual lock object and set operation. This 3338 // branching is inevitable since this lock initialization does not follow the 3339 // normal dispatch path (lock table is not used). 3340 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3341 lck = (kmp_user_lock_p)lk; 3342 KMP_DEBUG_ASSERT(lck != NULL); 3343 if (__kmp_env_consistency_check) { 3344 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3345 } 3346 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3347 } else { 3348 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3349 lck = ilk->lock; 3350 KMP_DEBUG_ASSERT(lck != NULL); 3351 if (__kmp_env_consistency_check) { 3352 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3353 } 3354 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3355 } 3356 3357 #else // KMP_USE_DYNAMIC_LOCK 3358 3359 // We know that the fast reduction code is only emitted by Intel compilers 3360 // with 32 byte critical sections. If there isn't enough space, then we 3361 // have to use a pointer. 3362 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3363 lck = (kmp_user_lock_p)crit; 3364 } else { 3365 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3366 } 3367 KMP_DEBUG_ASSERT(lck != NULL); 3368 3369 if (__kmp_env_consistency_check) 3370 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3371 3372 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3373 3374 #endif // KMP_USE_DYNAMIC_LOCK 3375 } 3376 3377 // used in a critical section reduce block 3378 static __forceinline void 3379 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3380 kmp_critical_name *crit) { 3381 3382 kmp_user_lock_p lck; 3383 3384 #if KMP_USE_DYNAMIC_LOCK 3385 3386 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3387 lck = (kmp_user_lock_p)crit; 3388 if (__kmp_env_consistency_check) 3389 __kmp_pop_sync(global_tid, ct_critical, loc); 3390 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3391 } else { 3392 kmp_indirect_lock_t *ilk = 3393 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3394 if (__kmp_env_consistency_check) 3395 __kmp_pop_sync(global_tid, ct_critical, loc); 3396 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3397 } 3398 3399 #else // KMP_USE_DYNAMIC_LOCK 3400 3401 // We know that the fast reduction code is only emitted by Intel compilers 3402 // with 32 byte critical sections. If there isn't enough space, then we have 3403 // to use a pointer. 3404 if (__kmp_base_user_lock_size > 32) { 3405 lck = *((kmp_user_lock_p *)crit); 3406 KMP_ASSERT(lck != NULL); 3407 } else { 3408 lck = (kmp_user_lock_p)crit; 3409 } 3410 3411 if (__kmp_env_consistency_check) 3412 __kmp_pop_sync(global_tid, ct_critical, loc); 3413 3414 __kmp_release_user_lock_with_checks(lck, global_tid); 3415 3416 #endif // KMP_USE_DYNAMIC_LOCK 3417 } // __kmp_end_critical_section_reduce_block 3418 3419 static __forceinline int 3420 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3421 int *task_state) { 3422 kmp_team_t *team; 3423 3424 // Check if we are inside the teams construct? 3425 if (th->th.th_teams_microtask) { 3426 *team_p = team = th->th.th_team; 3427 if (team->t.t_level == th->th.th_teams_level) { 3428 // This is reduction at teams construct. 3429 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3430 // Let's swap teams temporarily for the reduction. 3431 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3432 th->th.th_team = team->t.t_parent; 3433 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3434 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3435 *task_state = th->th.th_task_state; 3436 th->th.th_task_state = 0; 3437 3438 return 1; 3439 } 3440 } 3441 return 0; 3442 } 3443 3444 static __forceinline void 3445 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3446 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3447 th->th.th_info.ds.ds_tid = 0; 3448 th->th.th_team = team; 3449 th->th.th_team_nproc = team->t.t_nproc; 3450 th->th.th_task_team = team->t.t_task_team[task_state]; 3451 __kmp_type_convert(task_state, &(th->th.th_task_state)); 3452 } 3453 3454 /* 2.a.i. Reduce Block without a terminating barrier */ 3455 /*! 3456 @ingroup SYNCHRONIZATION 3457 @param loc source location information 3458 @param global_tid global thread number 3459 @param num_vars number of items (variables) to be reduced 3460 @param reduce_size size of data in bytes to be reduced 3461 @param reduce_data pointer to data to be reduced 3462 @param reduce_func callback function providing reduction operation on two 3463 operands and returning result of reduction in lhs_data 3464 @param lck pointer to the unique lock data structure 3465 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3466 threads if atomic reduction needed 3467 3468 The nowait version is used for a reduce clause with the nowait argument. 3469 */ 3470 kmp_int32 3471 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3472 size_t reduce_size, void *reduce_data, 3473 void (*reduce_func)(void *lhs_data, void *rhs_data), 3474 kmp_critical_name *lck) { 3475 3476 KMP_COUNT_BLOCK(REDUCE_nowait); 3477 int retval = 0; 3478 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3479 kmp_info_t *th; 3480 kmp_team_t *team; 3481 int teams_swapped = 0, task_state; 3482 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3483 __kmp_assert_valid_gtid(global_tid); 3484 3485 // why do we need this initialization here at all? 3486 // Reduction clause can not be used as a stand-alone directive. 3487 3488 // do not call __kmp_serial_initialize(), it will be called by 3489 // __kmp_parallel_initialize() if needed 3490 // possible detection of false-positive race by the threadchecker ??? 3491 if (!TCR_4(__kmp_init_parallel)) 3492 __kmp_parallel_initialize(); 3493 3494 __kmp_resume_if_soft_paused(); 3495 3496 // check correctness of reduce block nesting 3497 #if KMP_USE_DYNAMIC_LOCK 3498 if (__kmp_env_consistency_check) 3499 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3500 #else 3501 if (__kmp_env_consistency_check) 3502 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3503 #endif 3504 3505 th = __kmp_thread_from_gtid(global_tid); 3506 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3507 3508 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3509 // the value should be kept in a variable 3510 // the variable should be either a construct-specific or thread-specific 3511 // property, not a team specific property 3512 // (a thread can reach the next reduce block on the next construct, reduce 3513 // method may differ on the next construct) 3514 // an ident_t "loc" parameter could be used as a construct-specific property 3515 // (what if loc == 0?) 3516 // (if both construct-specific and team-specific variables were shared, 3517 // then unness extra syncs should be needed) 3518 // a thread-specific variable is better regarding two issues above (next 3519 // construct and extra syncs) 3520 // a thread-specific "th_local.reduction_method" variable is used currently 3521 // each thread executes 'determine' and 'set' lines (no need to execute by one 3522 // thread, to avoid unness extra syncs) 3523 3524 packed_reduction_method = __kmp_determine_reduction_method( 3525 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3526 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3527 3528 OMPT_REDUCTION_DECL(th, global_tid); 3529 if (packed_reduction_method == critical_reduce_block) { 3530 3531 OMPT_REDUCTION_BEGIN; 3532 3533 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3534 retval = 1; 3535 3536 } else if (packed_reduction_method == empty_reduce_block) { 3537 3538 OMPT_REDUCTION_BEGIN; 3539 3540 // usage: if team size == 1, no synchronization is required ( Intel 3541 // platforms only ) 3542 retval = 1; 3543 3544 } else if (packed_reduction_method == atomic_reduce_block) { 3545 3546 retval = 2; 3547 3548 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3549 // won't be called by the code gen) 3550 // (it's not quite good, because the checking block has been closed by 3551 // this 'pop', 3552 // but atomic operation has not been executed yet, will be executed 3553 // slightly later, literally on next instruction) 3554 if (__kmp_env_consistency_check) 3555 __kmp_pop_sync(global_tid, ct_reduce, loc); 3556 3557 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3558 tree_reduce_block)) { 3559 3560 // AT: performance issue: a real barrier here 3561 // AT: (if primary thread is slow, other threads are blocked here waiting for 3562 // the primary thread to come and release them) 3563 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3564 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3565 // be confusing to a customer) 3566 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3567 // might go faster and be more in line with sense of NOWAIT 3568 // AT: TO DO: do epcc test and compare times 3569 3570 // this barrier should be invisible to a customer and to the threading profile 3571 // tool (it's neither a terminating barrier nor customer's code, it's 3572 // used for an internal purpose) 3573 #if OMPT_SUPPORT 3574 // JP: can this barrier potentially leed to task scheduling? 3575 // JP: as long as there is a barrier in the implementation, OMPT should and 3576 // will provide the barrier events 3577 // so we set-up the necessary frame/return addresses. 3578 ompt_frame_t *ompt_frame; 3579 if (ompt_enabled.enabled) { 3580 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3581 if (ompt_frame->enter_frame.ptr == NULL) 3582 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3583 } 3584 OMPT_STORE_RETURN_ADDRESS(global_tid); 3585 #endif 3586 #if USE_ITT_NOTIFY 3587 __kmp_threads[global_tid]->th.th_ident = loc; 3588 #endif 3589 retval = 3590 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3591 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3592 retval = (retval != 0) ? (0) : (1); 3593 #if OMPT_SUPPORT && OMPT_OPTIONAL 3594 if (ompt_enabled.enabled) { 3595 ompt_frame->enter_frame = ompt_data_none; 3596 } 3597 #endif 3598 3599 // all other workers except primary thread should do this pop here 3600 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3601 if (__kmp_env_consistency_check) { 3602 if (retval == 0) { 3603 __kmp_pop_sync(global_tid, ct_reduce, loc); 3604 } 3605 } 3606 3607 } else { 3608 3609 // should never reach this block 3610 KMP_ASSERT(0); // "unexpected method" 3611 } 3612 if (teams_swapped) { 3613 __kmp_restore_swapped_teams(th, team, task_state); 3614 } 3615 KA_TRACE( 3616 10, 3617 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3618 global_tid, packed_reduction_method, retval)); 3619 3620 return retval; 3621 } 3622 3623 /*! 3624 @ingroup SYNCHRONIZATION 3625 @param loc source location information 3626 @param global_tid global thread id. 3627 @param lck pointer to the unique lock data structure 3628 3629 Finish the execution of a reduce nowait. 3630 */ 3631 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3632 kmp_critical_name *lck) { 3633 3634 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3635 3636 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3637 __kmp_assert_valid_gtid(global_tid); 3638 3639 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3640 3641 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3642 3643 if (packed_reduction_method == critical_reduce_block) { 3644 3645 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3646 OMPT_REDUCTION_END; 3647 3648 } else if (packed_reduction_method == empty_reduce_block) { 3649 3650 // usage: if team size == 1, no synchronization is required ( on Intel 3651 // platforms only ) 3652 3653 OMPT_REDUCTION_END; 3654 3655 } else if (packed_reduction_method == atomic_reduce_block) { 3656 3657 // neither primary thread nor other workers should get here 3658 // (code gen does not generate this call in case 2: atomic reduce block) 3659 // actually it's better to remove this elseif at all; 3660 // after removal this value will checked by the 'else' and will assert 3661 3662 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3663 tree_reduce_block)) { 3664 3665 // only primary thread gets here 3666 // OMPT: tree reduction is annotated in the barrier code 3667 3668 } else { 3669 3670 // should never reach this block 3671 KMP_ASSERT(0); // "unexpected method" 3672 } 3673 3674 if (__kmp_env_consistency_check) 3675 __kmp_pop_sync(global_tid, ct_reduce, loc); 3676 3677 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3678 global_tid, packed_reduction_method)); 3679 3680 return; 3681 } 3682 3683 /* 2.a.ii. Reduce Block with a terminating barrier */ 3684 3685 /*! 3686 @ingroup SYNCHRONIZATION 3687 @param loc source location information 3688 @param global_tid global thread number 3689 @param num_vars number of items (variables) to be reduced 3690 @param reduce_size size of data in bytes to be reduced 3691 @param reduce_data pointer to data to be reduced 3692 @param reduce_func callback function providing reduction operation on two 3693 operands and returning result of reduction in lhs_data 3694 @param lck pointer to the unique lock data structure 3695 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3696 threads if atomic reduction needed 3697 3698 A blocking reduce that includes an implicit barrier. 3699 */ 3700 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3701 size_t reduce_size, void *reduce_data, 3702 void (*reduce_func)(void *lhs_data, void *rhs_data), 3703 kmp_critical_name *lck) { 3704 KMP_COUNT_BLOCK(REDUCE_wait); 3705 int retval = 0; 3706 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3707 kmp_info_t *th; 3708 kmp_team_t *team; 3709 int teams_swapped = 0, task_state; 3710 3711 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3712 __kmp_assert_valid_gtid(global_tid); 3713 3714 // why do we need this initialization here at all? 3715 // Reduction clause can not be a stand-alone directive. 3716 3717 // do not call __kmp_serial_initialize(), it will be called by 3718 // __kmp_parallel_initialize() if needed 3719 // possible detection of false-positive race by the threadchecker ??? 3720 if (!TCR_4(__kmp_init_parallel)) 3721 __kmp_parallel_initialize(); 3722 3723 __kmp_resume_if_soft_paused(); 3724 3725 // check correctness of reduce block nesting 3726 #if KMP_USE_DYNAMIC_LOCK 3727 if (__kmp_env_consistency_check) 3728 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3729 #else 3730 if (__kmp_env_consistency_check) 3731 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3732 #endif 3733 3734 th = __kmp_thread_from_gtid(global_tid); 3735 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3736 3737 packed_reduction_method = __kmp_determine_reduction_method( 3738 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3739 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3740 3741 OMPT_REDUCTION_DECL(th, global_tid); 3742 3743 if (packed_reduction_method == critical_reduce_block) { 3744 3745 OMPT_REDUCTION_BEGIN; 3746 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3747 retval = 1; 3748 3749 } else if (packed_reduction_method == empty_reduce_block) { 3750 3751 OMPT_REDUCTION_BEGIN; 3752 // usage: if team size == 1, no synchronization is required ( Intel 3753 // platforms only ) 3754 retval = 1; 3755 3756 } else if (packed_reduction_method == atomic_reduce_block) { 3757 3758 retval = 2; 3759 3760 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3761 tree_reduce_block)) { 3762 3763 // case tree_reduce_block: 3764 // this barrier should be visible to a customer and to the threading profile 3765 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3766 #if OMPT_SUPPORT 3767 ompt_frame_t *ompt_frame; 3768 if (ompt_enabled.enabled) { 3769 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3770 if (ompt_frame->enter_frame.ptr == NULL) 3771 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3772 } 3773 OMPT_STORE_RETURN_ADDRESS(global_tid); 3774 #endif 3775 #if USE_ITT_NOTIFY 3776 __kmp_threads[global_tid]->th.th_ident = 3777 loc; // needed for correct notification of frames 3778 #endif 3779 retval = 3780 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3781 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3782 retval = (retval != 0) ? (0) : (1); 3783 #if OMPT_SUPPORT && OMPT_OPTIONAL 3784 if (ompt_enabled.enabled) { 3785 ompt_frame->enter_frame = ompt_data_none; 3786 } 3787 #endif 3788 3789 // all other workers except primary thread should do this pop here 3790 // (none of other workers except primary will enter __kmpc_end_reduce()) 3791 if (__kmp_env_consistency_check) { 3792 if (retval == 0) { // 0: all other workers; 1: primary thread 3793 __kmp_pop_sync(global_tid, ct_reduce, loc); 3794 } 3795 } 3796 3797 } else { 3798 3799 // should never reach this block 3800 KMP_ASSERT(0); // "unexpected method" 3801 } 3802 if (teams_swapped) { 3803 __kmp_restore_swapped_teams(th, team, task_state); 3804 } 3805 3806 KA_TRACE(10, 3807 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3808 global_tid, packed_reduction_method, retval)); 3809 return retval; 3810 } 3811 3812 /*! 3813 @ingroup SYNCHRONIZATION 3814 @param loc source location information 3815 @param global_tid global thread id. 3816 @param lck pointer to the unique lock data structure 3817 3818 Finish the execution of a blocking reduce. 3819 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3820 start function. 3821 */ 3822 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3823 kmp_critical_name *lck) { 3824 3825 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3826 kmp_info_t *th; 3827 kmp_team_t *team; 3828 int teams_swapped = 0, task_state; 3829 3830 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3831 __kmp_assert_valid_gtid(global_tid); 3832 3833 th = __kmp_thread_from_gtid(global_tid); 3834 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3835 3836 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3837 3838 // this barrier should be visible to a customer and to the threading profile 3839 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3840 OMPT_REDUCTION_DECL(th, global_tid); 3841 3842 if (packed_reduction_method == critical_reduce_block) { 3843 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3844 3845 OMPT_REDUCTION_END; 3846 3847 // TODO: implicit barrier: should be exposed 3848 #if OMPT_SUPPORT 3849 ompt_frame_t *ompt_frame; 3850 if (ompt_enabled.enabled) { 3851 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3852 if (ompt_frame->enter_frame.ptr == NULL) 3853 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3854 } 3855 OMPT_STORE_RETURN_ADDRESS(global_tid); 3856 #endif 3857 #if USE_ITT_NOTIFY 3858 __kmp_threads[global_tid]->th.th_ident = loc; 3859 #endif 3860 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3861 #if OMPT_SUPPORT && OMPT_OPTIONAL 3862 if (ompt_enabled.enabled) { 3863 ompt_frame->enter_frame = ompt_data_none; 3864 } 3865 #endif 3866 3867 } else if (packed_reduction_method == empty_reduce_block) { 3868 3869 OMPT_REDUCTION_END; 3870 3871 // usage: if team size==1, no synchronization is required (Intel platforms only) 3872 3873 // TODO: implicit barrier: should be exposed 3874 #if OMPT_SUPPORT 3875 ompt_frame_t *ompt_frame; 3876 if (ompt_enabled.enabled) { 3877 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3878 if (ompt_frame->enter_frame.ptr == NULL) 3879 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3880 } 3881 OMPT_STORE_RETURN_ADDRESS(global_tid); 3882 #endif 3883 #if USE_ITT_NOTIFY 3884 __kmp_threads[global_tid]->th.th_ident = loc; 3885 #endif 3886 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3887 #if OMPT_SUPPORT && OMPT_OPTIONAL 3888 if (ompt_enabled.enabled) { 3889 ompt_frame->enter_frame = ompt_data_none; 3890 } 3891 #endif 3892 3893 } else if (packed_reduction_method == atomic_reduce_block) { 3894 3895 #if OMPT_SUPPORT 3896 ompt_frame_t *ompt_frame; 3897 if (ompt_enabled.enabled) { 3898 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3899 if (ompt_frame->enter_frame.ptr == NULL) 3900 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3901 } 3902 OMPT_STORE_RETURN_ADDRESS(global_tid); 3903 #endif 3904 // TODO: implicit barrier: should be exposed 3905 #if USE_ITT_NOTIFY 3906 __kmp_threads[global_tid]->th.th_ident = loc; 3907 #endif 3908 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3909 #if OMPT_SUPPORT && OMPT_OPTIONAL 3910 if (ompt_enabled.enabled) { 3911 ompt_frame->enter_frame = ompt_data_none; 3912 } 3913 #endif 3914 3915 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3916 tree_reduce_block)) { 3917 3918 // only primary thread executes here (primary releases all other workers) 3919 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3920 global_tid); 3921 3922 } else { 3923 3924 // should never reach this block 3925 KMP_ASSERT(0); // "unexpected method" 3926 } 3927 if (teams_swapped) { 3928 __kmp_restore_swapped_teams(th, team, task_state); 3929 } 3930 3931 if (__kmp_env_consistency_check) 3932 __kmp_pop_sync(global_tid, ct_reduce, loc); 3933 3934 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3935 global_tid, packed_reduction_method)); 3936 3937 return; 3938 } 3939 3940 #undef __KMP_GET_REDUCTION_METHOD 3941 #undef __KMP_SET_REDUCTION_METHOD 3942 3943 /* end of interface to fast scalable reduce routines */ 3944 3945 kmp_uint64 __kmpc_get_taskid() { 3946 3947 kmp_int32 gtid; 3948 kmp_info_t *thread; 3949 3950 gtid = __kmp_get_gtid(); 3951 if (gtid < 0) { 3952 return 0; 3953 } 3954 thread = __kmp_thread_from_gtid(gtid); 3955 return thread->th.th_current_task->td_task_id; 3956 3957 } // __kmpc_get_taskid 3958 3959 kmp_uint64 __kmpc_get_parent_taskid() { 3960 3961 kmp_int32 gtid; 3962 kmp_info_t *thread; 3963 kmp_taskdata_t *parent_task; 3964 3965 gtid = __kmp_get_gtid(); 3966 if (gtid < 0) { 3967 return 0; 3968 } 3969 thread = __kmp_thread_from_gtid(gtid); 3970 parent_task = thread->th.th_current_task->td_parent; 3971 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3972 3973 } // __kmpc_get_parent_taskid 3974 3975 /*! 3976 @ingroup WORK_SHARING 3977 @param loc source location information. 3978 @param gtid global thread number. 3979 @param num_dims number of associated doacross loops. 3980 @param dims info on loops bounds. 3981 3982 Initialize doacross loop information. 3983 Expect compiler send us inclusive bounds, 3984 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3985 */ 3986 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3987 const struct kmp_dim *dims) { 3988 __kmp_assert_valid_gtid(gtid); 3989 int j, idx; 3990 kmp_int64 last, trace_count; 3991 kmp_info_t *th = __kmp_threads[gtid]; 3992 kmp_team_t *team = th->th.th_team; 3993 kmp_uint32 *flags; 3994 kmp_disp_t *pr_buf = th->th.th_dispatch; 3995 dispatch_shared_info_t *sh_buf; 3996 3997 KA_TRACE( 3998 20, 3999 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 4000 gtid, num_dims, !team->t.t_serialized)); 4001 KMP_DEBUG_ASSERT(dims != NULL); 4002 KMP_DEBUG_ASSERT(num_dims > 0); 4003 4004 if (team->t.t_serialized) { 4005 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 4006 return; // no dependencies if team is serialized 4007 } 4008 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 4009 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 4010 // the next loop 4011 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4012 4013 // Save bounds info into allocated private buffer 4014 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 4015 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 4016 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 4017 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4018 pr_buf->th_doacross_info[0] = 4019 (kmp_int64)num_dims; // first element is number of dimensions 4020 // Save also address of num_done in order to access it later without knowing 4021 // the buffer index 4022 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 4023 pr_buf->th_doacross_info[2] = dims[0].lo; 4024 pr_buf->th_doacross_info[3] = dims[0].up; 4025 pr_buf->th_doacross_info[4] = dims[0].st; 4026 last = 5; 4027 for (j = 1; j < num_dims; ++j) { 4028 kmp_int64 4029 range_length; // To keep ranges of all dimensions but the first dims[0] 4030 if (dims[j].st == 1) { // most common case 4031 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 4032 range_length = dims[j].up - dims[j].lo + 1; 4033 } else { 4034 if (dims[j].st > 0) { 4035 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 4036 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 4037 } else { // negative increment 4038 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 4039 range_length = 4040 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 4041 } 4042 } 4043 pr_buf->th_doacross_info[last++] = range_length; 4044 pr_buf->th_doacross_info[last++] = dims[j].lo; 4045 pr_buf->th_doacross_info[last++] = dims[j].up; 4046 pr_buf->th_doacross_info[last++] = dims[j].st; 4047 } 4048 4049 // Compute total trip count. 4050 // Start with range of dims[0] which we don't need to keep in the buffer. 4051 if (dims[0].st == 1) { // most common case 4052 trace_count = dims[0].up - dims[0].lo + 1; 4053 } else if (dims[0].st > 0) { 4054 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 4055 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 4056 } else { // negative increment 4057 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 4058 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 4059 } 4060 for (j = 1; j < num_dims; ++j) { 4061 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 4062 } 4063 KMP_DEBUG_ASSERT(trace_count > 0); 4064 4065 // Check if shared buffer is not occupied by other loop (idx - 4066 // __kmp_dispatch_num_buffers) 4067 if (idx != sh_buf->doacross_buf_idx) { 4068 // Shared buffer is occupied, wait for it to be free 4069 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 4070 __kmp_eq_4, NULL); 4071 } 4072 #if KMP_32_BIT_ARCH 4073 // Check if we are the first thread. After the CAS the first thread gets 0, 4074 // others get 1 if initialization is in progress, allocated pointer otherwise. 4075 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 4076 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 4077 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 4078 #else 4079 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 4080 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 4081 #endif 4082 if (flags == NULL) { 4083 // we are the first thread, allocate the array of flags 4084 size_t size = 4085 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration 4086 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 4087 KMP_MB(); 4088 sh_buf->doacross_flags = flags; 4089 } else if (flags == (kmp_uint32 *)1) { 4090 #if KMP_32_BIT_ARCH 4091 // initialization is still in progress, need to wait 4092 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 4093 #else 4094 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 4095 #endif 4096 KMP_YIELD(TRUE); 4097 KMP_MB(); 4098 } else { 4099 KMP_MB(); 4100 } 4101 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4102 pr_buf->th_doacross_flags = 4103 sh_buf->doacross_flags; // save private copy in order to not 4104 // touch shared buffer on each iteration 4105 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4106 } 4107 4108 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4109 __kmp_assert_valid_gtid(gtid); 4110 kmp_int64 shft; 4111 size_t num_dims, i; 4112 kmp_uint32 flag; 4113 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4114 kmp_info_t *th = __kmp_threads[gtid]; 4115 kmp_team_t *team = th->th.th_team; 4116 kmp_disp_t *pr_buf; 4117 kmp_int64 lo, up, st; 4118 4119 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4120 if (team->t.t_serialized) { 4121 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4122 return; // no dependencies if team is serialized 4123 } 4124 4125 // calculate sequential iteration number and check out-of-bounds condition 4126 pr_buf = th->th.th_dispatch; 4127 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4128 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4129 lo = pr_buf->th_doacross_info[2]; 4130 up = pr_buf->th_doacross_info[3]; 4131 st = pr_buf->th_doacross_info[4]; 4132 #if OMPT_SUPPORT && OMPT_OPTIONAL 4133 ompt_dependence_t deps[num_dims]; 4134 #endif 4135 if (st == 1) { // most common case 4136 if (vec[0] < lo || vec[0] > up) { 4137 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4138 "bounds [%lld,%lld]\n", 4139 gtid, vec[0], lo, up)); 4140 return; 4141 } 4142 iter_number = vec[0] - lo; 4143 } else if (st > 0) { 4144 if (vec[0] < lo || vec[0] > up) { 4145 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4146 "bounds [%lld,%lld]\n", 4147 gtid, vec[0], lo, up)); 4148 return; 4149 } 4150 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4151 } else { // negative increment 4152 if (vec[0] > lo || vec[0] < up) { 4153 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4154 "bounds [%lld,%lld]\n", 4155 gtid, vec[0], lo, up)); 4156 return; 4157 } 4158 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4159 } 4160 #if OMPT_SUPPORT && OMPT_OPTIONAL 4161 deps[0].variable.value = iter_number; 4162 deps[0].dependence_type = ompt_dependence_type_sink; 4163 #endif 4164 for (i = 1; i < num_dims; ++i) { 4165 kmp_int64 iter, ln; 4166 size_t j = i * 4; 4167 ln = pr_buf->th_doacross_info[j + 1]; 4168 lo = pr_buf->th_doacross_info[j + 2]; 4169 up = pr_buf->th_doacross_info[j + 3]; 4170 st = pr_buf->th_doacross_info[j + 4]; 4171 if (st == 1) { 4172 if (vec[i] < lo || vec[i] > up) { 4173 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4174 "bounds [%lld,%lld]\n", 4175 gtid, vec[i], lo, up)); 4176 return; 4177 } 4178 iter = vec[i] - lo; 4179 } else if (st > 0) { 4180 if (vec[i] < lo || vec[i] > up) { 4181 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4182 "bounds [%lld,%lld]\n", 4183 gtid, vec[i], lo, up)); 4184 return; 4185 } 4186 iter = (kmp_uint64)(vec[i] - lo) / st; 4187 } else { // st < 0 4188 if (vec[i] > lo || vec[i] < up) { 4189 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4190 "bounds [%lld,%lld]\n", 4191 gtid, vec[i], lo, up)); 4192 return; 4193 } 4194 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4195 } 4196 iter_number = iter + ln * iter_number; 4197 #if OMPT_SUPPORT && OMPT_OPTIONAL 4198 deps[i].variable.value = iter; 4199 deps[i].dependence_type = ompt_dependence_type_sink; 4200 #endif 4201 } 4202 shft = iter_number % 32; // use 32-bit granularity 4203 iter_number >>= 5; // divided by 32 4204 flag = 1 << shft; 4205 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4206 KMP_YIELD(TRUE); 4207 } 4208 KMP_MB(); 4209 #if OMPT_SUPPORT && OMPT_OPTIONAL 4210 if (ompt_enabled.ompt_callback_dependences) { 4211 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4212 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4213 } 4214 #endif 4215 KA_TRACE(20, 4216 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4217 gtid, (iter_number << 5) + shft)); 4218 } 4219 4220 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4221 __kmp_assert_valid_gtid(gtid); 4222 kmp_int64 shft; 4223 size_t num_dims, i; 4224 kmp_uint32 flag; 4225 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4226 kmp_info_t *th = __kmp_threads[gtid]; 4227 kmp_team_t *team = th->th.th_team; 4228 kmp_disp_t *pr_buf; 4229 kmp_int64 lo, st; 4230 4231 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4232 if (team->t.t_serialized) { 4233 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4234 return; // no dependencies if team is serialized 4235 } 4236 4237 // calculate sequential iteration number (same as in "wait" but no 4238 // out-of-bounds checks) 4239 pr_buf = th->th.th_dispatch; 4240 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4241 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4242 lo = pr_buf->th_doacross_info[2]; 4243 st = pr_buf->th_doacross_info[4]; 4244 #if OMPT_SUPPORT && OMPT_OPTIONAL 4245 ompt_dependence_t deps[num_dims]; 4246 #endif 4247 if (st == 1) { // most common case 4248 iter_number = vec[0] - lo; 4249 } else if (st > 0) { 4250 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4251 } else { // negative increment 4252 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4253 } 4254 #if OMPT_SUPPORT && OMPT_OPTIONAL 4255 deps[0].variable.value = iter_number; 4256 deps[0].dependence_type = ompt_dependence_type_source; 4257 #endif 4258 for (i = 1; i < num_dims; ++i) { 4259 kmp_int64 iter, ln; 4260 size_t j = i * 4; 4261 ln = pr_buf->th_doacross_info[j + 1]; 4262 lo = pr_buf->th_doacross_info[j + 2]; 4263 st = pr_buf->th_doacross_info[j + 4]; 4264 if (st == 1) { 4265 iter = vec[i] - lo; 4266 } else if (st > 0) { 4267 iter = (kmp_uint64)(vec[i] - lo) / st; 4268 } else { // st < 0 4269 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4270 } 4271 iter_number = iter + ln * iter_number; 4272 #if OMPT_SUPPORT && OMPT_OPTIONAL 4273 deps[i].variable.value = iter; 4274 deps[i].dependence_type = ompt_dependence_type_source; 4275 #endif 4276 } 4277 #if OMPT_SUPPORT && OMPT_OPTIONAL 4278 if (ompt_enabled.ompt_callback_dependences) { 4279 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4280 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4281 } 4282 #endif 4283 shft = iter_number % 32; // use 32-bit granularity 4284 iter_number >>= 5; // divided by 32 4285 flag = 1 << shft; 4286 KMP_MB(); 4287 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4288 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4289 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4290 (iter_number << 5) + shft)); 4291 } 4292 4293 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4294 __kmp_assert_valid_gtid(gtid); 4295 kmp_int32 num_done; 4296 kmp_info_t *th = __kmp_threads[gtid]; 4297 kmp_team_t *team = th->th.th_team; 4298 kmp_disp_t *pr_buf = th->th.th_dispatch; 4299 4300 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4301 if (team->t.t_serialized) { 4302 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4303 return; // nothing to do 4304 } 4305 num_done = 4306 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1; 4307 if (num_done == th->th.th_team_nproc) { 4308 // we are the last thread, need to free shared resources 4309 int idx = pr_buf->th_doacross_buf_idx - 1; 4310 dispatch_shared_info_t *sh_buf = 4311 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4312 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4313 (kmp_int64)&sh_buf->doacross_num_done); 4314 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4315 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4316 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4317 sh_buf->doacross_flags = NULL; 4318 sh_buf->doacross_num_done = 0; 4319 sh_buf->doacross_buf_idx += 4320 __kmp_dispatch_num_buffers; // free buffer for future re-use 4321 } 4322 // free private resources (need to keep buffer index forever) 4323 pr_buf->th_doacross_flags = NULL; 4324 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4325 pr_buf->th_doacross_info = NULL; 4326 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4327 } 4328 4329 /* OpenMP 5.1 Memory Management routines */ 4330 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4331 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator); 4332 } 4333 4334 void *omp_aligned_alloc(size_t align, size_t size, 4335 omp_allocator_handle_t allocator) { 4336 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator); 4337 } 4338 4339 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { 4340 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator); 4341 } 4342 4343 void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size, 4344 omp_allocator_handle_t allocator) { 4345 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator); 4346 } 4347 4348 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, 4349 omp_allocator_handle_t free_allocator) { 4350 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator, 4351 free_allocator); 4352 } 4353 4354 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4355 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4356 } 4357 /* end of OpenMP 5.1 Memory Management routines */ 4358 4359 int __kmpc_get_target_offload(void) { 4360 if (!__kmp_init_serial) { 4361 __kmp_serial_initialize(); 4362 } 4363 return __kmp_target_offload; 4364 } 4365 4366 int __kmpc_pause_resource(kmp_pause_status_t level) { 4367 if (!__kmp_init_serial) { 4368 return 1; // Can't pause if runtime is not initialized 4369 } 4370 return __kmp_pause_resource(level); 4371 } 4372 4373 void __kmpc_error(ident_t *loc, int severity, const char *message) { 4374 if (!__kmp_init_serial) 4375 __kmp_serial_initialize(); 4376 4377 KMP_ASSERT(severity == severity_warning || severity == severity_fatal); 4378 4379 #if OMPT_SUPPORT 4380 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) { 4381 ompt_callbacks.ompt_callback(ompt_callback_error)( 4382 (ompt_severity_t)severity, message, KMP_STRLEN(message), 4383 OMPT_GET_RETURN_ADDRESS(0)); 4384 } 4385 #endif // OMPT_SUPPORT 4386 4387 char *src_loc; 4388 if (loc && loc->psource) { 4389 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false); 4390 src_loc = 4391 __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col); 4392 __kmp_str_loc_free(&str_loc); 4393 } else { 4394 src_loc = __kmp_str_format("unknown"); 4395 } 4396 4397 if (severity == severity_warning) 4398 KMP_WARNING(UserDirectedWarning, src_loc, message); 4399 else 4400 KMP_FATAL(UserDirectedError, src_loc, message); 4401 4402 __kmp_str_free(&src_loc); 4403 } 4404 4405 // Mark begin of scope directive. 4406 void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { 4407 // reserved is for extension of scope directive and not used. 4408 #if OMPT_SUPPORT && OMPT_OPTIONAL 4409 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { 4410 kmp_team_t *team = __kmp_threads[gtid]->th.th_team; 4411 int tid = __kmp_tid_from_gtid(gtid); 4412 ompt_callbacks.ompt_callback(ompt_callback_work)( 4413 ompt_work_scope, ompt_scope_begin, 4414 &(team->t.ompt_team_info.parallel_data), 4415 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 4416 OMPT_GET_RETURN_ADDRESS(0)); 4417 } 4418 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 4419 } 4420 4421 // Mark end of scope directive 4422 void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { 4423 // reserved is for extension of scope directive and not used. 4424 #if OMPT_SUPPORT && OMPT_OPTIONAL 4425 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { 4426 kmp_team_t *team = __kmp_threads[gtid]->th.th_team; 4427 int tid = __kmp_tid_from_gtid(gtid); 4428 ompt_callbacks.ompt_callback(ompt_callback_work)( 4429 ompt_work_scope, ompt_scope_end, 4430 &(team->t.ompt_team_info.parallel_data), 4431 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 4432 OMPT_GET_RETURN_ADDRESS(0)); 4433 } 4434 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 4435 } 4436 4437 #ifdef KMP_USE_VERSION_SYMBOLS 4438 // For GOMP compatibility there are two versions of each omp_* API. 4439 // One is the plain C symbol and one is the Fortran symbol with an appended 4440 // underscore. When we implement a specific ompc_* version of an omp_* 4441 // function, we want the plain GOMP versioned symbol to alias the ompc_* version 4442 // instead of the Fortran versions in kmp_ftn_entry.h 4443 extern "C" { 4444 // Have to undef these from omp.h so they aren't translated into 4445 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below 4446 #ifdef omp_set_affinity_format 4447 #undef omp_set_affinity_format 4448 #endif 4449 #ifdef omp_get_affinity_format 4450 #undef omp_get_affinity_format 4451 #endif 4452 #ifdef omp_display_affinity 4453 #undef omp_display_affinity 4454 #endif 4455 #ifdef omp_capture_affinity 4456 #undef omp_capture_affinity 4457 #endif 4458 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50, 4459 "OMP_5.0"); 4460 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50, 4461 "OMP_5.0"); 4462 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50, 4463 "OMP_5.0"); 4464 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50, 4465 "OMP_5.0"); 4466 } // extern "C" 4467 #endif 4468