1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 __kmp_assign_root_init_mask(); 43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 44 } else if (__kmp_ignore_mppbeg() == FALSE) { 45 // By default __kmp_ignore_mppbeg() returns TRUE. 46 __kmp_internal_begin(); 47 KC_TRACE(10, ("__kmpc_begin: called\n")); 48 } 49 } 50 51 /*! 52 * @ingroup STARTUP_SHUTDOWN 53 * @param loc source location information 54 * 55 * Shutdown the runtime library. This is also optional, and even if called will 56 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 57 * zero. 58 */ 59 void __kmpc_end(ident_t *loc) { 60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 63 // returns FALSE and __kmpc_end() will unregister this root (it can cause 64 // library shut down). 65 if (__kmp_ignore_mppend() == FALSE) { 66 KC_TRACE(10, ("__kmpc_end: called\n")); 67 KA_TRACE(30, ("__kmpc_end\n")); 68 69 __kmp_internal_end_thread(-1); 70 } 71 #if KMP_OS_WINDOWS && OMPT_SUPPORT 72 // Normal exit process on Windows does not allow worker threads of the final 73 // parallel region to finish reporting their events, so shutting down the 74 // library here fixes the issue at least for the cases where __kmpc_end() is 75 // placed properly. 76 if (ompt_enabled.enabled) 77 __kmp_internal_end_library(__kmp_gtid_get_specific()); 78 #endif 79 } 80 81 /*! 82 @ingroup THREAD_STATES 83 @param loc Source location information. 84 @return The global thread index of the active thread. 85 86 This function can be called in any context. 87 88 If the runtime has ony been entered at the outermost level from a 89 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 90 that which would be returned by omp_get_thread_num() in the outermost 91 active parallel construct. (Or zero if there is no active parallel 92 construct, since the primary thread is necessarily thread zero). 93 94 If multiple non-OpenMP threads all enter an OpenMP construct then this 95 will be a unique thread identifier among all the threads created by 96 the OpenMP runtime (but the value cannot be defined in terms of 97 OpenMP thread ids returned by omp_get_thread_num()). 98 */ 99 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 100 kmp_int32 gtid = __kmp_entry_gtid(); 101 102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 103 104 return gtid; 105 } 106 107 /*! 108 @ingroup THREAD_STATES 109 @param loc Source location information. 110 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 111 112 This function can be called in any context. 113 It returns the total number of threads under the control of the OpenMP runtime. 114 That is not a number that can be determined by any OpenMP standard calls, since 115 the library may be called from more than one non-OpenMP thread, and this 116 reflects the total over all such calls. Similarly the runtime maintains 117 underlying threads even when they are not active (since the cost of creating 118 and destroying OS threads is high), this call counts all such threads even if 119 they are not waiting for work. 120 */ 121 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 122 KC_TRACE(10, 123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 124 125 return TCR_4(__kmp_all_nth); 126 } 127 128 /*! 129 @ingroup THREAD_STATES 130 @param loc Source location information. 131 @return The thread number of the calling thread in the innermost active parallel 132 construct. 133 */ 134 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 136 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 137 } 138 139 /*! 140 @ingroup THREAD_STATES 141 @param loc Source location information. 142 @return The number of threads in the innermost active parallel construct. 143 */ 144 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 146 147 return __kmp_entry_thread()->th.th_team->t.t_nproc; 148 } 149 150 /*! 151 * @ingroup DEPRECATED 152 * @param loc location description 153 * 154 * This function need not be called. It always returns TRUE. 155 */ 156 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 157 #ifndef KMP_DEBUG 158 159 return TRUE; 160 161 #else 162 163 const char *semi2; 164 const char *semi3; 165 int line_no; 166 167 if (__kmp_par_range == 0) { 168 return TRUE; 169 } 170 semi2 = loc->psource; 171 if (semi2 == NULL) { 172 return TRUE; 173 } 174 semi2 = strchr(semi2, ';'); 175 if (semi2 == NULL) { 176 return TRUE; 177 } 178 semi2 = strchr(semi2 + 1, ';'); 179 if (semi2 == NULL) { 180 return TRUE; 181 } 182 if (__kmp_par_range_filename[0]) { 183 const char *name = semi2 - 1; 184 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 185 name--; 186 } 187 if ((*name == '/') || (*name == ';')) { 188 name++; 189 } 190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 191 return __kmp_par_range < 0; 192 } 193 } 194 semi3 = strchr(semi2 + 1, ';'); 195 if (__kmp_par_range_routine[0]) { 196 if ((semi3 != NULL) && (semi3 > semi2) && 197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 198 return __kmp_par_range < 0; 199 } 200 } 201 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 203 return __kmp_par_range > 0; 204 } 205 return __kmp_par_range < 0; 206 } 207 return TRUE; 208 209 #endif /* KMP_DEBUG */ 210 } 211 212 /*! 213 @ingroup THREAD_STATES 214 @param loc Source location information. 215 @return 1 if this thread is executing inside an active parallel region, zero if 216 not. 217 */ 218 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 219 return __kmp_entry_thread()->th.th_root->r.r_active; 220 } 221 222 /*! 223 @ingroup PARALLEL 224 @param loc source location information 225 @param global_tid global thread number 226 @param num_threads number of threads requested for this parallel construct 227 228 Set the number of threads to be used by the next fork spawned by this thread. 229 This call is only required if the parallel construct has a `num_threads` clause. 230 */ 231 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 232 kmp_int32 num_threads) { 233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 234 global_tid, num_threads)); 235 __kmp_assert_valid_gtid(global_tid); 236 __kmp_push_num_threads(loc, global_tid, num_threads); 237 } 238 239 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 241 /* the num_threads are automatically popped */ 242 } 243 244 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 245 kmp_int32 proc_bind) { 246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 247 proc_bind)); 248 __kmp_assert_valid_gtid(global_tid); 249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 250 } 251 252 /*! 253 @ingroup PARALLEL 254 @param loc source location information 255 @param argc total number of arguments in the ellipsis 256 @param microtask pointer to callback routine consisting of outlined parallel 257 construct 258 @param ... pointers to shared variables that aren't global 259 260 Do the actual fork and call the microtask in the relevant number of threads. 261 */ 262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 263 int gtid = __kmp_entry_gtid(); 264 265 #if (KMP_STATS_ENABLED) 266 // If we were in a serial region, then stop the serial timer, record 267 // the event, and start parallel region timer 268 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 269 if (previous_state == stats_state_e::SERIAL_REGION) { 270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 271 } else { 272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 273 } 274 int inParallel = __kmpc_in_parallel(loc); 275 if (inParallel) { 276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 277 } else { 278 KMP_COUNT_BLOCK(OMP_PARALLEL); 279 } 280 #endif 281 282 // maybe to save thr_state is enough here 283 { 284 va_list ap; 285 va_start(ap, microtask); 286 287 #if OMPT_SUPPORT 288 ompt_frame_t *ompt_frame; 289 if (ompt_enabled.enabled) { 290 kmp_info_t *master_th = __kmp_threads[gtid]; 291 kmp_team_t *parent_team = master_th->th.th_team; 292 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 293 if (lwt) 294 ompt_frame = &(lwt->ompt_task_info.frame); 295 else { 296 int tid = __kmp_tid_from_gtid(gtid); 297 ompt_frame = &( 298 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 299 } 300 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 301 } 302 OMPT_STORE_RETURN_ADDRESS(gtid); 303 #endif 304 305 #if INCLUDE_SSC_MARKS 306 SSC_MARK_FORKING(); 307 #endif 308 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 309 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 310 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 311 kmp_va_addr_of(ap)); 312 #if INCLUDE_SSC_MARKS 313 SSC_MARK_JOINING(); 314 #endif 315 __kmp_join_call(loc, gtid 316 #if OMPT_SUPPORT 317 , 318 fork_context_intel 319 #endif 320 ); 321 322 va_end(ap); 323 } 324 325 #if KMP_STATS_ENABLED 326 if (previous_state == stats_state_e::SERIAL_REGION) { 327 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 328 KMP_SET_THREAD_STATE(previous_state); 329 } else { 330 KMP_POP_PARTITIONED_TIMER(); 331 } 332 #endif // KMP_STATS_ENABLED 333 } 334 335 /*! 336 @ingroup PARALLEL 337 @param loc source location information 338 @param global_tid global thread number 339 @param num_teams number of teams requested for the teams construct 340 @param num_threads number of threads per team requested for the teams construct 341 342 Set the number of teams to be used by the teams construct. 343 This call is only required if the teams construct has a `num_teams` clause 344 or a `thread_limit` clause (or both). 345 */ 346 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 347 kmp_int32 num_teams, kmp_int32 num_threads) { 348 KA_TRACE(20, 349 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 350 global_tid, num_teams, num_threads)); 351 __kmp_assert_valid_gtid(global_tid); 352 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 353 } 354 355 /*! 356 @ingroup PARALLEL 357 @param loc source location information 358 @param global_tid global thread number 359 @param num_teams_lo lower bound on number of teams requested for the teams 360 construct 361 @param num_teams_up upper bound on number of teams requested for the teams 362 construct 363 @param num_threads number of threads per team requested for the teams construct 364 365 Set the number of teams to be used by the teams construct. The number of initial 366 teams cretaed will be greater than or equal to the lower bound and less than or 367 equal to the upper bound. 368 This call is only required if the teams construct has a `num_teams` clause 369 or a `thread_limit` clause (or both). 370 */ 371 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, 372 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, 373 kmp_int32 num_threads) { 374 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d" 375 " num_teams_ub=%d num_threads=%d\n", 376 global_tid, num_teams_lb, num_teams_ub, num_threads)); 377 __kmp_assert_valid_gtid(global_tid); 378 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub, 379 num_threads); 380 } 381 382 /*! 383 @ingroup PARALLEL 384 @param loc source location information 385 @param argc total number of arguments in the ellipsis 386 @param microtask pointer to callback routine consisting of outlined teams 387 construct 388 @param ... pointers to shared variables that aren't global 389 390 Do the actual fork and call the microtask in the relevant number of threads. 391 */ 392 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 393 ...) { 394 int gtid = __kmp_entry_gtid(); 395 kmp_info_t *this_thr = __kmp_threads[gtid]; 396 va_list ap; 397 va_start(ap, microtask); 398 399 #if KMP_STATS_ENABLED 400 KMP_COUNT_BLOCK(OMP_TEAMS); 401 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 402 if (previous_state == stats_state_e::SERIAL_REGION) { 403 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 404 } else { 405 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 406 } 407 #endif 408 409 // remember teams entry point and nesting level 410 this_thr->th.th_teams_microtask = microtask; 411 this_thr->th.th_teams_level = 412 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 413 414 #if OMPT_SUPPORT 415 kmp_team_t *parent_team = this_thr->th.th_team; 416 int tid = __kmp_tid_from_gtid(gtid); 417 if (ompt_enabled.enabled) { 418 parent_team->t.t_implicit_task_taskdata[tid] 419 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 420 } 421 OMPT_STORE_RETURN_ADDRESS(gtid); 422 #endif 423 424 // check if __kmpc_push_num_teams called, set default number of teams 425 // otherwise 426 if (this_thr->th.th_teams_size.nteams == 0) { 427 __kmp_push_num_teams(loc, gtid, 0, 0); 428 } 429 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 430 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 431 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 432 433 __kmp_fork_call( 434 loc, gtid, fork_context_intel, argc, 435 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 436 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 437 __kmp_join_call(loc, gtid 438 #if OMPT_SUPPORT 439 , 440 fork_context_intel 441 #endif 442 ); 443 444 // Pop current CG root off list 445 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 446 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 447 this_thr->th.th_cg_roots = tmp->up; 448 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 449 " to node %p. cg_nthreads was %d\n", 450 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 451 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 452 int i = tmp->cg_nthreads--; 453 if (i == 1) { // check is we are the last thread in CG (not always the case) 454 __kmp_free(tmp); 455 } 456 // Restore current task's thread_limit from CG root 457 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 458 this_thr->th.th_current_task->td_icvs.thread_limit = 459 this_thr->th.th_cg_roots->cg_thread_limit; 460 461 this_thr->th.th_teams_microtask = NULL; 462 this_thr->th.th_teams_level = 0; 463 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 464 va_end(ap); 465 #if KMP_STATS_ENABLED 466 if (previous_state == stats_state_e::SERIAL_REGION) { 467 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 468 KMP_SET_THREAD_STATE(previous_state); 469 } else { 470 KMP_POP_PARTITIONED_TIMER(); 471 } 472 #endif // KMP_STATS_ENABLED 473 } 474 475 // I don't think this function should ever have been exported. 476 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 477 // openmp code ever called it, but it's been exported from the RTL for so 478 // long that I'm afraid to remove the definition. 479 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 480 481 /*! 482 @ingroup PARALLEL 483 @param loc source location information 484 @param global_tid global thread number 485 486 Enter a serialized parallel construct. This interface is used to handle a 487 conditional parallel region, like this, 488 @code 489 #pragma omp parallel if (condition) 490 @endcode 491 when the condition is false. 492 */ 493 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 494 // The implementation is now in kmp_runtime.cpp so that it can share static 495 // functions with kmp_fork_call since the tasks to be done are similar in 496 // each case. 497 __kmp_assert_valid_gtid(global_tid); 498 #if OMPT_SUPPORT 499 OMPT_STORE_RETURN_ADDRESS(global_tid); 500 #endif 501 __kmp_serialized_parallel(loc, global_tid); 502 } 503 504 /*! 505 @ingroup PARALLEL 506 @param loc source location information 507 @param global_tid global thread number 508 509 Leave a serialized parallel construct. 510 */ 511 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 512 kmp_internal_control_t *top; 513 kmp_info_t *this_thr; 514 kmp_team_t *serial_team; 515 516 KC_TRACE(10, 517 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 518 519 /* skip all this code for autopar serialized loops since it results in 520 unacceptable overhead */ 521 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 522 return; 523 524 // Not autopar code 525 __kmp_assert_valid_gtid(global_tid); 526 if (!TCR_4(__kmp_init_parallel)) 527 __kmp_parallel_initialize(); 528 529 __kmp_resume_if_soft_paused(); 530 531 this_thr = __kmp_threads[global_tid]; 532 serial_team = this_thr->th.th_serial_team; 533 534 kmp_task_team_t *task_team = this_thr->th.th_task_team; 535 // we need to wait for the proxy tasks before finishing the thread 536 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 537 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 538 539 KMP_MB(); 540 KMP_DEBUG_ASSERT(serial_team); 541 KMP_ASSERT(serial_team->t.t_serialized); 542 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 543 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 544 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 545 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 546 547 #if OMPT_SUPPORT 548 if (ompt_enabled.enabled && 549 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 550 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 551 if (ompt_enabled.ompt_callback_implicit_task) { 552 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 553 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 554 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 555 } 556 557 // reset clear the task id only after unlinking the task 558 ompt_data_t *parent_task_data; 559 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 560 561 if (ompt_enabled.ompt_callback_parallel_end) { 562 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 563 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 564 ompt_parallel_invoker_program | ompt_parallel_team, 565 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 566 } 567 __ompt_lw_taskteam_unlink(this_thr); 568 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 569 } 570 #endif 571 572 /* If necessary, pop the internal control stack values and replace the team 573 * values */ 574 top = serial_team->t.t_control_stack_top; 575 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 576 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 577 serial_team->t.t_control_stack_top = top->next; 578 __kmp_free(top); 579 } 580 581 // if( serial_team -> t.t_serialized > 1 ) 582 serial_team->t.t_level--; 583 584 /* pop dispatch buffers stack */ 585 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 586 { 587 dispatch_private_info_t *disp_buffer = 588 serial_team->t.t_dispatch->th_disp_buffer; 589 serial_team->t.t_dispatch->th_disp_buffer = 590 serial_team->t.t_dispatch->th_disp_buffer->next; 591 __kmp_free(disp_buffer); 592 } 593 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 594 595 --serial_team->t.t_serialized; 596 if (serial_team->t.t_serialized == 0) { 597 598 /* return to the parallel section */ 599 600 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 601 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 602 __kmp_clear_x87_fpu_status_word(); 603 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 604 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 605 } 606 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 607 608 #if OMPD_SUPPORT 609 if (ompd_state & OMPD_ENABLE_BP) 610 ompd_bp_parallel_end(); 611 #endif 612 613 this_thr->th.th_team = serial_team->t.t_parent; 614 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 615 616 /* restore values cached in the thread */ 617 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 618 this_thr->th.th_team_master = 619 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 620 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 621 622 /* TODO the below shouldn't need to be adjusted for serialized teams */ 623 this_thr->th.th_dispatch = 624 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 625 626 __kmp_pop_current_task_from_thread(this_thr); 627 628 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 629 this_thr->th.th_current_task->td_flags.executing = 1; 630 631 if (__kmp_tasking_mode != tskm_immediate_exec) { 632 // Copy the task team from the new child / old parent team to the thread. 633 this_thr->th.th_task_team = 634 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 635 KA_TRACE(20, 636 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 637 "team %p\n", 638 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 639 } 640 } else { 641 if (__kmp_tasking_mode != tskm_immediate_exec) { 642 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 643 "depth of serial team %p to %d\n", 644 global_tid, serial_team, serial_team->t.t_serialized)); 645 } 646 } 647 648 if (__kmp_env_consistency_check) 649 __kmp_pop_parallel(global_tid, NULL); 650 #if OMPT_SUPPORT 651 if (ompt_enabled.enabled) 652 this_thr->th.ompt_thread_info.state = 653 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 654 : ompt_state_work_parallel); 655 #endif 656 } 657 658 /*! 659 @ingroup SYNCHRONIZATION 660 @param loc source location information. 661 662 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 663 depending on the memory ordering convention obeyed by the compiler 664 even that may not be necessary). 665 */ 666 void __kmpc_flush(ident_t *loc) { 667 KC_TRACE(10, ("__kmpc_flush: called\n")); 668 669 /* need explicit __mf() here since use volatile instead in library */ 670 KMP_MB(); /* Flush all pending memory write invalidates. */ 671 672 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 673 #if KMP_MIC 674 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 675 // We shouldn't need it, though, since the ABI rules require that 676 // * If the compiler generates NGO stores it also generates the fence 677 // * If users hand-code NGO stores they should insert the fence 678 // therefore no incomplete unordered stores should be visible. 679 #else 680 // C74404 681 // This is to address non-temporal store instructions (sfence needed). 682 // The clflush instruction is addressed either (mfence needed). 683 // Probably the non-temporal load monvtdqa instruction should also be 684 // addressed. 685 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 686 if (!__kmp_cpuinfo.initialized) { 687 __kmp_query_cpuid(&__kmp_cpuinfo); 688 } 689 if (!__kmp_cpuinfo.sse2) { 690 // CPU cannot execute SSE2 instructions. 691 } else { 692 #if KMP_COMPILER_ICC 693 _mm_mfence(); 694 #elif KMP_COMPILER_MSVC 695 MemoryBarrier(); 696 #else 697 __sync_synchronize(); 698 #endif // KMP_COMPILER_ICC 699 } 700 #endif // KMP_MIC 701 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 702 KMP_ARCH_RISCV64) 703 // Nothing to see here move along 704 #elif KMP_ARCH_PPC64 705 // Nothing needed here (we have a real MB above). 706 #else 707 #error Unknown or unsupported architecture 708 #endif 709 710 #if OMPT_SUPPORT && OMPT_OPTIONAL 711 if (ompt_enabled.ompt_callback_flush) { 712 ompt_callbacks.ompt_callback(ompt_callback_flush)( 713 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 714 } 715 #endif 716 } 717 718 /* -------------------------------------------------------------------------- */ 719 /*! 720 @ingroup SYNCHRONIZATION 721 @param loc source location information 722 @param global_tid thread id. 723 724 Execute a barrier. 725 */ 726 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 727 KMP_COUNT_BLOCK(OMP_BARRIER); 728 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 729 __kmp_assert_valid_gtid(global_tid); 730 731 if (!TCR_4(__kmp_init_parallel)) 732 __kmp_parallel_initialize(); 733 734 __kmp_resume_if_soft_paused(); 735 736 if (__kmp_env_consistency_check) { 737 if (loc == 0) { 738 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 739 } 740 __kmp_check_barrier(global_tid, ct_barrier, loc); 741 } 742 743 #if OMPT_SUPPORT 744 ompt_frame_t *ompt_frame; 745 if (ompt_enabled.enabled) { 746 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 747 if (ompt_frame->enter_frame.ptr == NULL) 748 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 749 } 750 OMPT_STORE_RETURN_ADDRESS(global_tid); 751 #endif 752 __kmp_threads[global_tid]->th.th_ident = loc; 753 // TODO: explicit barrier_wait_id: 754 // this function is called when 'barrier' directive is present or 755 // implicit barrier at the end of a worksharing construct. 756 // 1) better to add a per-thread barrier counter to a thread data structure 757 // 2) set to 0 when a new team is created 758 // 4) no sync is required 759 760 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 761 #if OMPT_SUPPORT && OMPT_OPTIONAL 762 if (ompt_enabled.enabled) { 763 ompt_frame->enter_frame = ompt_data_none; 764 } 765 #endif 766 } 767 768 /* The BARRIER for a MASTER section is always explicit */ 769 /*! 770 @ingroup WORK_SHARING 771 @param loc source location information. 772 @param global_tid global thread number . 773 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 774 */ 775 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 776 int status = 0; 777 778 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 779 __kmp_assert_valid_gtid(global_tid); 780 781 if (!TCR_4(__kmp_init_parallel)) 782 __kmp_parallel_initialize(); 783 784 __kmp_resume_if_soft_paused(); 785 786 if (KMP_MASTER_GTID(global_tid)) { 787 KMP_COUNT_BLOCK(OMP_MASTER); 788 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 789 status = 1; 790 } 791 792 #if OMPT_SUPPORT && OMPT_OPTIONAL 793 if (status) { 794 if (ompt_enabled.ompt_callback_masked) { 795 kmp_info_t *this_thr = __kmp_threads[global_tid]; 796 kmp_team_t *team = this_thr->th.th_team; 797 798 int tid = __kmp_tid_from_gtid(global_tid); 799 ompt_callbacks.ompt_callback(ompt_callback_masked)( 800 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 801 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 802 OMPT_GET_RETURN_ADDRESS(0)); 803 } 804 } 805 #endif 806 807 if (__kmp_env_consistency_check) { 808 #if KMP_USE_DYNAMIC_LOCK 809 if (status) 810 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 811 else 812 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 813 #else 814 if (status) 815 __kmp_push_sync(global_tid, ct_master, loc, NULL); 816 else 817 __kmp_check_sync(global_tid, ct_master, loc, NULL); 818 #endif 819 } 820 821 return status; 822 } 823 824 /*! 825 @ingroup WORK_SHARING 826 @param loc source location information. 827 @param global_tid global thread number . 828 829 Mark the end of a <tt>master</tt> region. This should only be called by the 830 thread that executes the <tt>master</tt> region. 831 */ 832 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 833 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 834 __kmp_assert_valid_gtid(global_tid); 835 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 836 KMP_POP_PARTITIONED_TIMER(); 837 838 #if OMPT_SUPPORT && OMPT_OPTIONAL 839 kmp_info_t *this_thr = __kmp_threads[global_tid]; 840 kmp_team_t *team = this_thr->th.th_team; 841 if (ompt_enabled.ompt_callback_masked) { 842 int tid = __kmp_tid_from_gtid(global_tid); 843 ompt_callbacks.ompt_callback(ompt_callback_masked)( 844 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 845 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 846 OMPT_GET_RETURN_ADDRESS(0)); 847 } 848 #endif 849 850 if (__kmp_env_consistency_check) { 851 if (KMP_MASTER_GTID(global_tid)) 852 __kmp_pop_sync(global_tid, ct_master, loc); 853 } 854 } 855 856 /*! 857 @ingroup WORK_SHARING 858 @param loc source location information. 859 @param global_tid global thread number. 860 @param filter result of evaluating filter clause on thread global_tid, or zero 861 if no filter clause present 862 @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise. 863 */ 864 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) { 865 int status = 0; 866 int tid; 867 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid)); 868 __kmp_assert_valid_gtid(global_tid); 869 870 if (!TCR_4(__kmp_init_parallel)) 871 __kmp_parallel_initialize(); 872 873 __kmp_resume_if_soft_paused(); 874 875 tid = __kmp_tid_from_gtid(global_tid); 876 if (tid == filter) { 877 KMP_COUNT_BLOCK(OMP_MASKED); 878 KMP_PUSH_PARTITIONED_TIMER(OMP_masked); 879 status = 1; 880 } 881 882 #if OMPT_SUPPORT && OMPT_OPTIONAL 883 if (status) { 884 if (ompt_enabled.ompt_callback_masked) { 885 kmp_info_t *this_thr = __kmp_threads[global_tid]; 886 kmp_team_t *team = this_thr->th.th_team; 887 ompt_callbacks.ompt_callback(ompt_callback_masked)( 888 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 889 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 890 OMPT_GET_RETURN_ADDRESS(0)); 891 } 892 } 893 #endif 894 895 if (__kmp_env_consistency_check) { 896 #if KMP_USE_DYNAMIC_LOCK 897 if (status) 898 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0); 899 else 900 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0); 901 #else 902 if (status) 903 __kmp_push_sync(global_tid, ct_masked, loc, NULL); 904 else 905 __kmp_check_sync(global_tid, ct_masked, loc, NULL); 906 #endif 907 } 908 909 return status; 910 } 911 912 /*! 913 @ingroup WORK_SHARING 914 @param loc source location information. 915 @param global_tid global thread number . 916 917 Mark the end of a <tt>masked</tt> region. This should only be called by the 918 thread that executes the <tt>masked</tt> region. 919 */ 920 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) { 921 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid)); 922 __kmp_assert_valid_gtid(global_tid); 923 KMP_POP_PARTITIONED_TIMER(); 924 925 #if OMPT_SUPPORT && OMPT_OPTIONAL 926 kmp_info_t *this_thr = __kmp_threads[global_tid]; 927 kmp_team_t *team = this_thr->th.th_team; 928 if (ompt_enabled.ompt_callback_masked) { 929 int tid = __kmp_tid_from_gtid(global_tid); 930 ompt_callbacks.ompt_callback(ompt_callback_masked)( 931 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 932 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 933 OMPT_GET_RETURN_ADDRESS(0)); 934 } 935 #endif 936 937 if (__kmp_env_consistency_check) { 938 __kmp_pop_sync(global_tid, ct_masked, loc); 939 } 940 } 941 942 /*! 943 @ingroup WORK_SHARING 944 @param loc source location information. 945 @param gtid global thread number. 946 947 Start execution of an <tt>ordered</tt> construct. 948 */ 949 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 950 int cid = 0; 951 kmp_info_t *th; 952 KMP_DEBUG_ASSERT(__kmp_init_serial); 953 954 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 955 __kmp_assert_valid_gtid(gtid); 956 957 if (!TCR_4(__kmp_init_parallel)) 958 __kmp_parallel_initialize(); 959 960 __kmp_resume_if_soft_paused(); 961 962 #if USE_ITT_BUILD 963 __kmp_itt_ordered_prep(gtid); 964 // TODO: ordered_wait_id 965 #endif /* USE_ITT_BUILD */ 966 967 th = __kmp_threads[gtid]; 968 969 #if OMPT_SUPPORT && OMPT_OPTIONAL 970 kmp_team_t *team; 971 ompt_wait_id_t lck; 972 void *codeptr_ra; 973 OMPT_STORE_RETURN_ADDRESS(gtid); 974 if (ompt_enabled.enabled) { 975 team = __kmp_team_from_gtid(gtid); 976 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 977 /* OMPT state update */ 978 th->th.ompt_thread_info.wait_id = lck; 979 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 980 981 /* OMPT event callback */ 982 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 983 if (ompt_enabled.ompt_callback_mutex_acquire) { 984 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 985 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 986 codeptr_ra); 987 } 988 } 989 #endif 990 991 if (th->th.th_dispatch->th_deo_fcn != 0) 992 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 993 else 994 __kmp_parallel_deo(>id, &cid, loc); 995 996 #if OMPT_SUPPORT && OMPT_OPTIONAL 997 if (ompt_enabled.enabled) { 998 /* OMPT state update */ 999 th->th.ompt_thread_info.state = ompt_state_work_parallel; 1000 th->th.ompt_thread_info.wait_id = 0; 1001 1002 /* OMPT event callback */ 1003 if (ompt_enabled.ompt_callback_mutex_acquired) { 1004 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1005 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1006 } 1007 } 1008 #endif 1009 1010 #if USE_ITT_BUILD 1011 __kmp_itt_ordered_start(gtid); 1012 #endif /* USE_ITT_BUILD */ 1013 } 1014 1015 /*! 1016 @ingroup WORK_SHARING 1017 @param loc source location information. 1018 @param gtid global thread number. 1019 1020 End execution of an <tt>ordered</tt> construct. 1021 */ 1022 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 1023 int cid = 0; 1024 kmp_info_t *th; 1025 1026 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 1027 __kmp_assert_valid_gtid(gtid); 1028 1029 #if USE_ITT_BUILD 1030 __kmp_itt_ordered_end(gtid); 1031 // TODO: ordered_wait_id 1032 #endif /* USE_ITT_BUILD */ 1033 1034 th = __kmp_threads[gtid]; 1035 1036 if (th->th.th_dispatch->th_dxo_fcn != 0) 1037 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 1038 else 1039 __kmp_parallel_dxo(>id, &cid, loc); 1040 1041 #if OMPT_SUPPORT && OMPT_OPTIONAL 1042 OMPT_STORE_RETURN_ADDRESS(gtid); 1043 if (ompt_enabled.ompt_callback_mutex_released) { 1044 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1045 ompt_mutex_ordered, 1046 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 1047 ->t.t_ordered.dt.t_value, 1048 OMPT_LOAD_RETURN_ADDRESS(gtid)); 1049 } 1050 #endif 1051 } 1052 1053 #if KMP_USE_DYNAMIC_LOCK 1054 1055 static __forceinline void 1056 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 1057 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 1058 // Pointer to the allocated indirect lock is written to crit, while indexing 1059 // is ignored. 1060 void *idx; 1061 kmp_indirect_lock_t **lck; 1062 lck = (kmp_indirect_lock_t **)crit; 1063 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 1064 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 1065 KMP_SET_I_LOCK_LOCATION(ilk, loc); 1066 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 1067 KA_TRACE(20, 1068 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 1069 #if USE_ITT_BUILD 1070 __kmp_itt_critical_creating(ilk->lock, loc); 1071 #endif 1072 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 1073 if (status == 0) { 1074 #if USE_ITT_BUILD 1075 __kmp_itt_critical_destroyed(ilk->lock); 1076 #endif 1077 // We don't really need to destroy the unclaimed lock here since it will be 1078 // cleaned up at program exit. 1079 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 1080 } 1081 KMP_DEBUG_ASSERT(*lck != NULL); 1082 } 1083 1084 // Fast-path acquire tas lock 1085 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 1086 { \ 1087 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1088 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1089 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1090 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1091 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1092 kmp_uint32 spins; \ 1093 KMP_FSYNC_PREPARE(l); \ 1094 KMP_INIT_YIELD(spins); \ 1095 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1096 do { \ 1097 if (TCR_4(__kmp_nth) > \ 1098 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1099 KMP_YIELD(TRUE); \ 1100 } else { \ 1101 KMP_YIELD_SPIN(spins); \ 1102 } \ 1103 __kmp_spin_backoff(&backoff); \ 1104 } while ( \ 1105 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1106 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1107 } \ 1108 KMP_FSYNC_ACQUIRED(l); \ 1109 } 1110 1111 // Fast-path test tas lock 1112 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1113 { \ 1114 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1115 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1116 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1117 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1118 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1119 } 1120 1121 // Fast-path release tas lock 1122 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1123 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1124 1125 #if KMP_USE_FUTEX 1126 1127 #include <sys/syscall.h> 1128 #include <unistd.h> 1129 #ifndef FUTEX_WAIT 1130 #define FUTEX_WAIT 0 1131 #endif 1132 #ifndef FUTEX_WAKE 1133 #define FUTEX_WAKE 1 1134 #endif 1135 1136 // Fast-path acquire futex lock 1137 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1138 { \ 1139 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1140 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1141 KMP_MB(); \ 1142 KMP_FSYNC_PREPARE(ftx); \ 1143 kmp_int32 poll_val; \ 1144 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1145 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1146 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1147 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1148 if (!cond) { \ 1149 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1150 poll_val | \ 1151 KMP_LOCK_BUSY(1, futex))) { \ 1152 continue; \ 1153 } \ 1154 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1155 } \ 1156 kmp_int32 rc; \ 1157 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1158 NULL, NULL, 0)) != 0) { \ 1159 continue; \ 1160 } \ 1161 gtid_code |= 1; \ 1162 } \ 1163 KMP_FSYNC_ACQUIRED(ftx); \ 1164 } 1165 1166 // Fast-path test futex lock 1167 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1168 { \ 1169 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1170 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1171 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1172 KMP_FSYNC_ACQUIRED(ftx); \ 1173 rc = TRUE; \ 1174 } else { \ 1175 rc = FALSE; \ 1176 } \ 1177 } 1178 1179 // Fast-path release futex lock 1180 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1181 { \ 1182 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1183 KMP_MB(); \ 1184 KMP_FSYNC_RELEASING(ftx); \ 1185 kmp_int32 poll_val = \ 1186 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1187 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1188 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1189 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1190 } \ 1191 KMP_MB(); \ 1192 KMP_YIELD_OVERSUB(); \ 1193 } 1194 1195 #endif // KMP_USE_FUTEX 1196 1197 #else // KMP_USE_DYNAMIC_LOCK 1198 1199 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1200 ident_t const *loc, 1201 kmp_int32 gtid) { 1202 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1203 1204 // Because of the double-check, the following load doesn't need to be volatile 1205 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1206 1207 if (lck == NULL) { 1208 void *idx; 1209 1210 // Allocate & initialize the lock. 1211 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1212 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1213 __kmp_init_user_lock_with_checks(lck); 1214 __kmp_set_user_lock_location(lck, loc); 1215 #if USE_ITT_BUILD 1216 __kmp_itt_critical_creating(lck); 1217 // __kmp_itt_critical_creating() should be called *before* the first usage 1218 // of underlying lock. It is the only place where we can guarantee it. There 1219 // are chances the lock will destroyed with no usage, but it is not a 1220 // problem, because this is not real event seen by user but rather setting 1221 // name for object (lock). See more details in kmp_itt.h. 1222 #endif /* USE_ITT_BUILD */ 1223 1224 // Use a cmpxchg instruction to slam the start of the critical section with 1225 // the lock pointer. If another thread beat us to it, deallocate the lock, 1226 // and use the lock that the other thread allocated. 1227 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1228 1229 if (status == 0) { 1230 // Deallocate the lock and reload the value. 1231 #if USE_ITT_BUILD 1232 __kmp_itt_critical_destroyed(lck); 1233 // Let ITT know the lock is destroyed and the same memory location may be reused 1234 // for another purpose. 1235 #endif /* USE_ITT_BUILD */ 1236 __kmp_destroy_user_lock_with_checks(lck); 1237 __kmp_user_lock_free(&idx, gtid, lck); 1238 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1239 KMP_DEBUG_ASSERT(lck != NULL); 1240 } 1241 } 1242 return lck; 1243 } 1244 1245 #endif // KMP_USE_DYNAMIC_LOCK 1246 1247 /*! 1248 @ingroup WORK_SHARING 1249 @param loc source location information. 1250 @param global_tid global thread number. 1251 @param crit identity of the critical section. This could be a pointer to a lock 1252 associated with the critical section, or some other suitably unique value. 1253 1254 Enter code protected by a `critical` construct. 1255 This function blocks until the executing thread can enter the critical section. 1256 */ 1257 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1258 kmp_critical_name *crit) { 1259 #if KMP_USE_DYNAMIC_LOCK 1260 #if OMPT_SUPPORT && OMPT_OPTIONAL 1261 OMPT_STORE_RETURN_ADDRESS(global_tid); 1262 #endif // OMPT_SUPPORT 1263 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1264 #else 1265 KMP_COUNT_BLOCK(OMP_CRITICAL); 1266 #if OMPT_SUPPORT && OMPT_OPTIONAL 1267 ompt_state_t prev_state = ompt_state_undefined; 1268 ompt_thread_info_t ti; 1269 #endif 1270 kmp_user_lock_p lck; 1271 1272 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1273 __kmp_assert_valid_gtid(global_tid); 1274 1275 // TODO: add THR_OVHD_STATE 1276 1277 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1278 KMP_CHECK_USER_LOCK_INIT(); 1279 1280 if ((__kmp_user_lock_kind == lk_tas) && 1281 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1282 lck = (kmp_user_lock_p)crit; 1283 } 1284 #if KMP_USE_FUTEX 1285 else if ((__kmp_user_lock_kind == lk_futex) && 1286 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1287 lck = (kmp_user_lock_p)crit; 1288 } 1289 #endif 1290 else { // ticket, queuing or drdpa 1291 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1292 } 1293 1294 if (__kmp_env_consistency_check) 1295 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1296 1297 // since the critical directive binds to all threads, not just the current 1298 // team we have to check this even if we are in a serialized team. 1299 // also, even if we are the uber thread, we still have to conduct the lock, 1300 // as we have to contend with sibling threads. 1301 1302 #if USE_ITT_BUILD 1303 __kmp_itt_critical_acquiring(lck); 1304 #endif /* USE_ITT_BUILD */ 1305 #if OMPT_SUPPORT && OMPT_OPTIONAL 1306 OMPT_STORE_RETURN_ADDRESS(gtid); 1307 void *codeptr_ra = NULL; 1308 if (ompt_enabled.enabled) { 1309 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1310 /* OMPT state update */ 1311 prev_state = ti.state; 1312 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1313 ti.state = ompt_state_wait_critical; 1314 1315 /* OMPT event callback */ 1316 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1317 if (ompt_enabled.ompt_callback_mutex_acquire) { 1318 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1319 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1320 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1321 } 1322 } 1323 #endif 1324 // Value of 'crit' should be good for using as a critical_id of the critical 1325 // section directive. 1326 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1327 1328 #if USE_ITT_BUILD 1329 __kmp_itt_critical_acquired(lck); 1330 #endif /* USE_ITT_BUILD */ 1331 #if OMPT_SUPPORT && OMPT_OPTIONAL 1332 if (ompt_enabled.enabled) { 1333 /* OMPT state update */ 1334 ti.state = prev_state; 1335 ti.wait_id = 0; 1336 1337 /* OMPT event callback */ 1338 if (ompt_enabled.ompt_callback_mutex_acquired) { 1339 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1340 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1341 } 1342 } 1343 #endif 1344 KMP_POP_PARTITIONED_TIMER(); 1345 1346 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1347 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1348 #endif // KMP_USE_DYNAMIC_LOCK 1349 } 1350 1351 #if KMP_USE_DYNAMIC_LOCK 1352 1353 // Converts the given hint to an internal lock implementation 1354 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1355 #if KMP_USE_TSX 1356 #define KMP_TSX_LOCK(seq) lockseq_##seq 1357 #else 1358 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1359 #endif 1360 1361 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1362 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1363 #else 1364 #define KMP_CPUINFO_RTM 0 1365 #endif 1366 1367 // Hints that do not require further logic 1368 if (hint & kmp_lock_hint_hle) 1369 return KMP_TSX_LOCK(hle); 1370 if (hint & kmp_lock_hint_rtm) 1371 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq; 1372 if (hint & kmp_lock_hint_adaptive) 1373 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1374 1375 // Rule out conflicting hints first by returning the default lock 1376 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1377 return __kmp_user_lock_seq; 1378 if ((hint & omp_lock_hint_speculative) && 1379 (hint & omp_lock_hint_nonspeculative)) 1380 return __kmp_user_lock_seq; 1381 1382 // Do not even consider speculation when it appears to be contended 1383 if (hint & omp_lock_hint_contended) 1384 return lockseq_queuing; 1385 1386 // Uncontended lock without speculation 1387 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1388 return lockseq_tas; 1389 1390 // Use RTM lock for speculation 1391 if (hint & omp_lock_hint_speculative) 1392 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq; 1393 1394 return __kmp_user_lock_seq; 1395 } 1396 1397 #if OMPT_SUPPORT && OMPT_OPTIONAL 1398 #if KMP_USE_DYNAMIC_LOCK 1399 static kmp_mutex_impl_t 1400 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1401 if (user_lock) { 1402 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1403 case 0: 1404 break; 1405 #if KMP_USE_FUTEX 1406 case locktag_futex: 1407 return kmp_mutex_impl_queuing; 1408 #endif 1409 case locktag_tas: 1410 return kmp_mutex_impl_spin; 1411 #if KMP_USE_TSX 1412 case locktag_hle: 1413 case locktag_rtm_spin: 1414 return kmp_mutex_impl_speculative; 1415 #endif 1416 default: 1417 return kmp_mutex_impl_none; 1418 } 1419 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1420 } 1421 KMP_ASSERT(ilock); 1422 switch (ilock->type) { 1423 #if KMP_USE_TSX 1424 case locktag_adaptive: 1425 case locktag_rtm_queuing: 1426 return kmp_mutex_impl_speculative; 1427 #endif 1428 case locktag_nested_tas: 1429 return kmp_mutex_impl_spin; 1430 #if KMP_USE_FUTEX 1431 case locktag_nested_futex: 1432 #endif 1433 case locktag_ticket: 1434 case locktag_queuing: 1435 case locktag_drdpa: 1436 case locktag_nested_ticket: 1437 case locktag_nested_queuing: 1438 case locktag_nested_drdpa: 1439 return kmp_mutex_impl_queuing; 1440 default: 1441 return kmp_mutex_impl_none; 1442 } 1443 } 1444 #else 1445 // For locks without dynamic binding 1446 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1447 switch (__kmp_user_lock_kind) { 1448 case lk_tas: 1449 return kmp_mutex_impl_spin; 1450 #if KMP_USE_FUTEX 1451 case lk_futex: 1452 #endif 1453 case lk_ticket: 1454 case lk_queuing: 1455 case lk_drdpa: 1456 return kmp_mutex_impl_queuing; 1457 #if KMP_USE_TSX 1458 case lk_hle: 1459 case lk_rtm_queuing: 1460 case lk_rtm_spin: 1461 case lk_adaptive: 1462 return kmp_mutex_impl_speculative; 1463 #endif 1464 default: 1465 return kmp_mutex_impl_none; 1466 } 1467 } 1468 #endif // KMP_USE_DYNAMIC_LOCK 1469 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1470 1471 /*! 1472 @ingroup WORK_SHARING 1473 @param loc source location information. 1474 @param global_tid global thread number. 1475 @param crit identity of the critical section. This could be a pointer to a lock 1476 associated with the critical section, or some other suitably unique value. 1477 @param hint the lock hint. 1478 1479 Enter code protected by a `critical` construct with a hint. The hint value is 1480 used to suggest a lock implementation. This function blocks until the executing 1481 thread can enter the critical section unless the hint suggests use of 1482 speculative execution and the hardware supports it. 1483 */ 1484 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1485 kmp_critical_name *crit, uint32_t hint) { 1486 KMP_COUNT_BLOCK(OMP_CRITICAL); 1487 kmp_user_lock_p lck; 1488 #if OMPT_SUPPORT && OMPT_OPTIONAL 1489 ompt_state_t prev_state = ompt_state_undefined; 1490 ompt_thread_info_t ti; 1491 // This is the case, if called from __kmpc_critical: 1492 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1493 if (!codeptr) 1494 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1495 #endif 1496 1497 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1498 __kmp_assert_valid_gtid(global_tid); 1499 1500 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1501 // Check if it is initialized. 1502 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1503 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint); 1504 if (*lk == 0) { 1505 if (KMP_IS_D_LOCK(lockseq)) { 1506 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1507 KMP_GET_D_TAG(lockseq)); 1508 } else { 1509 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq)); 1510 } 1511 } 1512 // Branch for accessing the actual lock object and set operation. This 1513 // branching is inevitable since this lock initialization does not follow the 1514 // normal dispatch path (lock table is not used). 1515 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1516 lck = (kmp_user_lock_p)lk; 1517 if (__kmp_env_consistency_check) { 1518 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1519 __kmp_map_hint_to_lock(hint)); 1520 } 1521 #if USE_ITT_BUILD 1522 __kmp_itt_critical_acquiring(lck); 1523 #endif 1524 #if OMPT_SUPPORT && OMPT_OPTIONAL 1525 if (ompt_enabled.enabled) { 1526 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1527 /* OMPT state update */ 1528 prev_state = ti.state; 1529 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1530 ti.state = ompt_state_wait_critical; 1531 1532 /* OMPT event callback */ 1533 if (ompt_enabled.ompt_callback_mutex_acquire) { 1534 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1535 ompt_mutex_critical, (unsigned int)hint, 1536 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1537 codeptr); 1538 } 1539 } 1540 #endif 1541 #if KMP_USE_INLINED_TAS 1542 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) { 1543 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1544 } else 1545 #elif KMP_USE_INLINED_FUTEX 1546 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) { 1547 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1548 } else 1549 #endif 1550 { 1551 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1552 } 1553 } else { 1554 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1555 lck = ilk->lock; 1556 if (__kmp_env_consistency_check) { 1557 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1558 __kmp_map_hint_to_lock(hint)); 1559 } 1560 #if USE_ITT_BUILD 1561 __kmp_itt_critical_acquiring(lck); 1562 #endif 1563 #if OMPT_SUPPORT && OMPT_OPTIONAL 1564 if (ompt_enabled.enabled) { 1565 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1566 /* OMPT state update */ 1567 prev_state = ti.state; 1568 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1569 ti.state = ompt_state_wait_critical; 1570 1571 /* OMPT event callback */ 1572 if (ompt_enabled.ompt_callback_mutex_acquire) { 1573 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1574 ompt_mutex_critical, (unsigned int)hint, 1575 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1576 codeptr); 1577 } 1578 } 1579 #endif 1580 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1581 } 1582 KMP_POP_PARTITIONED_TIMER(); 1583 1584 #if USE_ITT_BUILD 1585 __kmp_itt_critical_acquired(lck); 1586 #endif /* USE_ITT_BUILD */ 1587 #if OMPT_SUPPORT && OMPT_OPTIONAL 1588 if (ompt_enabled.enabled) { 1589 /* OMPT state update */ 1590 ti.state = prev_state; 1591 ti.wait_id = 0; 1592 1593 /* OMPT event callback */ 1594 if (ompt_enabled.ompt_callback_mutex_acquired) { 1595 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1596 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1597 } 1598 } 1599 #endif 1600 1601 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1602 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1603 } // __kmpc_critical_with_hint 1604 1605 #endif // KMP_USE_DYNAMIC_LOCK 1606 1607 /*! 1608 @ingroup WORK_SHARING 1609 @param loc source location information. 1610 @param global_tid global thread number . 1611 @param crit identity of the critical section. This could be a pointer to a lock 1612 associated with the critical section, or some other suitably unique value. 1613 1614 Leave a critical section, releasing any lock that was held during its execution. 1615 */ 1616 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1617 kmp_critical_name *crit) { 1618 kmp_user_lock_p lck; 1619 1620 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1621 1622 #if KMP_USE_DYNAMIC_LOCK 1623 int locktag = KMP_EXTRACT_D_TAG(crit); 1624 if (locktag) { 1625 lck = (kmp_user_lock_p)crit; 1626 KMP_ASSERT(lck != NULL); 1627 if (__kmp_env_consistency_check) { 1628 __kmp_pop_sync(global_tid, ct_critical, loc); 1629 } 1630 #if USE_ITT_BUILD 1631 __kmp_itt_critical_releasing(lck); 1632 #endif 1633 #if KMP_USE_INLINED_TAS 1634 if (locktag == locktag_tas && !__kmp_env_consistency_check) { 1635 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1636 } else 1637 #elif KMP_USE_INLINED_FUTEX 1638 if (locktag == locktag_futex && !__kmp_env_consistency_check) { 1639 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1640 } else 1641 #endif 1642 { 1643 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1644 } 1645 } else { 1646 kmp_indirect_lock_t *ilk = 1647 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1648 KMP_ASSERT(ilk != NULL); 1649 lck = ilk->lock; 1650 if (__kmp_env_consistency_check) { 1651 __kmp_pop_sync(global_tid, ct_critical, loc); 1652 } 1653 #if USE_ITT_BUILD 1654 __kmp_itt_critical_releasing(lck); 1655 #endif 1656 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1657 } 1658 1659 #else // KMP_USE_DYNAMIC_LOCK 1660 1661 if ((__kmp_user_lock_kind == lk_tas) && 1662 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1663 lck = (kmp_user_lock_p)crit; 1664 } 1665 #if KMP_USE_FUTEX 1666 else if ((__kmp_user_lock_kind == lk_futex) && 1667 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1668 lck = (kmp_user_lock_p)crit; 1669 } 1670 #endif 1671 else { // ticket, queuing or drdpa 1672 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1673 } 1674 1675 KMP_ASSERT(lck != NULL); 1676 1677 if (__kmp_env_consistency_check) 1678 __kmp_pop_sync(global_tid, ct_critical, loc); 1679 1680 #if USE_ITT_BUILD 1681 __kmp_itt_critical_releasing(lck); 1682 #endif /* USE_ITT_BUILD */ 1683 // Value of 'crit' should be good for using as a critical_id of the critical 1684 // section directive. 1685 __kmp_release_user_lock_with_checks(lck, global_tid); 1686 1687 #endif // KMP_USE_DYNAMIC_LOCK 1688 1689 #if OMPT_SUPPORT && OMPT_OPTIONAL 1690 /* OMPT release event triggers after lock is released; place here to trigger 1691 * for all #if branches */ 1692 OMPT_STORE_RETURN_ADDRESS(global_tid); 1693 if (ompt_enabled.ompt_callback_mutex_released) { 1694 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1695 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1696 OMPT_LOAD_RETURN_ADDRESS(0)); 1697 } 1698 #endif 1699 1700 KMP_POP_PARTITIONED_TIMER(); 1701 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1702 } 1703 1704 /*! 1705 @ingroup SYNCHRONIZATION 1706 @param loc source location information 1707 @param global_tid thread id. 1708 @return one if the thread should execute the master block, zero otherwise 1709 1710 Start execution of a combined barrier and master. The barrier is executed inside 1711 this function. 1712 */ 1713 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1714 int status; 1715 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1716 __kmp_assert_valid_gtid(global_tid); 1717 1718 if (!TCR_4(__kmp_init_parallel)) 1719 __kmp_parallel_initialize(); 1720 1721 __kmp_resume_if_soft_paused(); 1722 1723 if (__kmp_env_consistency_check) 1724 __kmp_check_barrier(global_tid, ct_barrier, loc); 1725 1726 #if OMPT_SUPPORT 1727 ompt_frame_t *ompt_frame; 1728 if (ompt_enabled.enabled) { 1729 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1730 if (ompt_frame->enter_frame.ptr == NULL) 1731 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1732 } 1733 OMPT_STORE_RETURN_ADDRESS(global_tid); 1734 #endif 1735 #if USE_ITT_NOTIFY 1736 __kmp_threads[global_tid]->th.th_ident = loc; 1737 #endif 1738 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1739 #if OMPT_SUPPORT && OMPT_OPTIONAL 1740 if (ompt_enabled.enabled) { 1741 ompt_frame->enter_frame = ompt_data_none; 1742 } 1743 #endif 1744 1745 return (status != 0) ? 0 : 1; 1746 } 1747 1748 /*! 1749 @ingroup SYNCHRONIZATION 1750 @param loc source location information 1751 @param global_tid thread id. 1752 1753 Complete the execution of a combined barrier and master. This function should 1754 only be called at the completion of the <tt>master</tt> code. Other threads will 1755 still be waiting at the barrier and this call releases them. 1756 */ 1757 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1758 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1759 __kmp_assert_valid_gtid(global_tid); 1760 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1761 } 1762 1763 /*! 1764 @ingroup SYNCHRONIZATION 1765 @param loc source location information 1766 @param global_tid thread id. 1767 @return one if the thread should execute the master block, zero otherwise 1768 1769 Start execution of a combined barrier and master(nowait) construct. 1770 The barrier is executed inside this function. 1771 There is no equivalent "end" function, since the 1772 */ 1773 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1774 kmp_int32 ret; 1775 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1776 __kmp_assert_valid_gtid(global_tid); 1777 1778 if (!TCR_4(__kmp_init_parallel)) 1779 __kmp_parallel_initialize(); 1780 1781 __kmp_resume_if_soft_paused(); 1782 1783 if (__kmp_env_consistency_check) { 1784 if (loc == 0) { 1785 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1786 } 1787 __kmp_check_barrier(global_tid, ct_barrier, loc); 1788 } 1789 1790 #if OMPT_SUPPORT 1791 ompt_frame_t *ompt_frame; 1792 if (ompt_enabled.enabled) { 1793 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1794 if (ompt_frame->enter_frame.ptr == NULL) 1795 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1796 } 1797 OMPT_STORE_RETURN_ADDRESS(global_tid); 1798 #endif 1799 #if USE_ITT_NOTIFY 1800 __kmp_threads[global_tid]->th.th_ident = loc; 1801 #endif 1802 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1803 #if OMPT_SUPPORT && OMPT_OPTIONAL 1804 if (ompt_enabled.enabled) { 1805 ompt_frame->enter_frame = ompt_data_none; 1806 } 1807 #endif 1808 1809 ret = __kmpc_master(loc, global_tid); 1810 1811 if (__kmp_env_consistency_check) { 1812 /* there's no __kmpc_end_master called; so the (stats) */ 1813 /* actions of __kmpc_end_master are done here */ 1814 if (ret) { 1815 /* only one thread should do the pop since only */ 1816 /* one did the push (see __kmpc_master()) */ 1817 __kmp_pop_sync(global_tid, ct_master, loc); 1818 } 1819 } 1820 1821 return (ret); 1822 } 1823 1824 /* The BARRIER for a SINGLE process section is always explicit */ 1825 /*! 1826 @ingroup WORK_SHARING 1827 @param loc source location information 1828 @param global_tid global thread number 1829 @return One if this thread should execute the single construct, zero otherwise. 1830 1831 Test whether to execute a <tt>single</tt> construct. 1832 There are no implicit barriers in the two "single" calls, rather the compiler 1833 should introduce an explicit barrier if it is required. 1834 */ 1835 1836 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1837 __kmp_assert_valid_gtid(global_tid); 1838 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1839 1840 if (rc) { 1841 // We are going to execute the single statement, so we should count it. 1842 KMP_COUNT_BLOCK(OMP_SINGLE); 1843 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1844 } 1845 1846 #if OMPT_SUPPORT && OMPT_OPTIONAL 1847 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1848 kmp_team_t *team = this_thr->th.th_team; 1849 int tid = __kmp_tid_from_gtid(global_tid); 1850 1851 if (ompt_enabled.enabled) { 1852 if (rc) { 1853 if (ompt_enabled.ompt_callback_work) { 1854 ompt_callbacks.ompt_callback(ompt_callback_work)( 1855 ompt_work_single_executor, ompt_scope_begin, 1856 &(team->t.ompt_team_info.parallel_data), 1857 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1858 1, OMPT_GET_RETURN_ADDRESS(0)); 1859 } 1860 } else { 1861 if (ompt_enabled.ompt_callback_work) { 1862 ompt_callbacks.ompt_callback(ompt_callback_work)( 1863 ompt_work_single_other, ompt_scope_begin, 1864 &(team->t.ompt_team_info.parallel_data), 1865 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1866 1, OMPT_GET_RETURN_ADDRESS(0)); 1867 ompt_callbacks.ompt_callback(ompt_callback_work)( 1868 ompt_work_single_other, ompt_scope_end, 1869 &(team->t.ompt_team_info.parallel_data), 1870 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1871 1, OMPT_GET_RETURN_ADDRESS(0)); 1872 } 1873 } 1874 } 1875 #endif 1876 1877 return rc; 1878 } 1879 1880 /*! 1881 @ingroup WORK_SHARING 1882 @param loc source location information 1883 @param global_tid global thread number 1884 1885 Mark the end of a <tt>single</tt> construct. This function should 1886 only be called by the thread that executed the block of code protected 1887 by the `single` construct. 1888 */ 1889 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1890 __kmp_assert_valid_gtid(global_tid); 1891 __kmp_exit_single(global_tid); 1892 KMP_POP_PARTITIONED_TIMER(); 1893 1894 #if OMPT_SUPPORT && OMPT_OPTIONAL 1895 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1896 kmp_team_t *team = this_thr->th.th_team; 1897 int tid = __kmp_tid_from_gtid(global_tid); 1898 1899 if (ompt_enabled.ompt_callback_work) { 1900 ompt_callbacks.ompt_callback(ompt_callback_work)( 1901 ompt_work_single_executor, ompt_scope_end, 1902 &(team->t.ompt_team_info.parallel_data), 1903 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1904 OMPT_GET_RETURN_ADDRESS(0)); 1905 } 1906 #endif 1907 } 1908 1909 /*! 1910 @ingroup WORK_SHARING 1911 @param loc Source location 1912 @param global_tid Global thread id 1913 1914 Mark the end of a statically scheduled loop. 1915 */ 1916 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1917 KMP_POP_PARTITIONED_TIMER(); 1918 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1919 1920 #if OMPT_SUPPORT && OMPT_OPTIONAL 1921 if (ompt_enabled.ompt_callback_work) { 1922 ompt_work_t ompt_work_type = ompt_work_loop; 1923 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1924 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1925 // Determine workshare type 1926 if (loc != NULL) { 1927 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1928 ompt_work_type = ompt_work_loop; 1929 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1930 ompt_work_type = ompt_work_sections; 1931 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1932 ompt_work_type = ompt_work_distribute; 1933 } else { 1934 // use default set above. 1935 // a warning about this case is provided in __kmpc_for_static_init 1936 } 1937 KMP_DEBUG_ASSERT(ompt_work_type); 1938 } 1939 ompt_callbacks.ompt_callback(ompt_callback_work)( 1940 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1941 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1942 } 1943 #endif 1944 if (__kmp_env_consistency_check) 1945 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1946 } 1947 1948 // User routines which take C-style arguments (call by value) 1949 // different from the Fortran equivalent routines 1950 1951 void ompc_set_num_threads(int arg) { 1952 // !!!!! TODO: check the per-task binding 1953 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1954 } 1955 1956 void ompc_set_dynamic(int flag) { 1957 kmp_info_t *thread; 1958 1959 /* For the thread-private implementation of the internal controls */ 1960 thread = __kmp_entry_thread(); 1961 1962 __kmp_save_internal_controls(thread); 1963 1964 set__dynamic(thread, flag ? true : false); 1965 } 1966 1967 void ompc_set_nested(int flag) { 1968 kmp_info_t *thread; 1969 1970 /* For the thread-private internal controls implementation */ 1971 thread = __kmp_entry_thread(); 1972 1973 __kmp_save_internal_controls(thread); 1974 1975 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1976 } 1977 1978 void ompc_set_max_active_levels(int max_active_levels) { 1979 /* TO DO */ 1980 /* we want per-task implementation of this internal control */ 1981 1982 /* For the per-thread internal controls implementation */ 1983 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1984 } 1985 1986 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1987 // !!!!! TODO: check the per-task binding 1988 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1989 } 1990 1991 int ompc_get_ancestor_thread_num(int level) { 1992 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1993 } 1994 1995 int ompc_get_team_size(int level) { 1996 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1997 } 1998 1999 /* OpenMP 5.0 Affinity Format API */ 2000 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) { 2001 if (!__kmp_init_serial) { 2002 __kmp_serial_initialize(); 2003 } 2004 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 2005 format, KMP_STRLEN(format) + 1); 2006 } 2007 2008 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) { 2009 size_t format_size; 2010 if (!__kmp_init_serial) { 2011 __kmp_serial_initialize(); 2012 } 2013 format_size = KMP_STRLEN(__kmp_affinity_format); 2014 if (buffer && size) { 2015 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 2016 format_size + 1); 2017 } 2018 return format_size; 2019 } 2020 2021 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) { 2022 int gtid; 2023 if (!TCR_4(__kmp_init_middle)) { 2024 __kmp_middle_initialize(); 2025 } 2026 __kmp_assign_root_init_mask(); 2027 gtid = __kmp_get_gtid(); 2028 __kmp_aux_display_affinity(gtid, format); 2029 } 2030 2031 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size, 2032 char const *format) { 2033 int gtid; 2034 size_t num_required; 2035 kmp_str_buf_t capture_buf; 2036 if (!TCR_4(__kmp_init_middle)) { 2037 __kmp_middle_initialize(); 2038 } 2039 __kmp_assign_root_init_mask(); 2040 gtid = __kmp_get_gtid(); 2041 __kmp_str_buf_init(&capture_buf); 2042 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 2043 if (buffer && buf_size) { 2044 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 2045 capture_buf.used + 1); 2046 } 2047 __kmp_str_buf_free(&capture_buf); 2048 return num_required; 2049 } 2050 2051 void kmpc_set_stacksize(int arg) { 2052 // __kmp_aux_set_stacksize initializes the library if needed 2053 __kmp_aux_set_stacksize(arg); 2054 } 2055 2056 void kmpc_set_stacksize_s(size_t arg) { 2057 // __kmp_aux_set_stacksize initializes the library if needed 2058 __kmp_aux_set_stacksize(arg); 2059 } 2060 2061 void kmpc_set_blocktime(int arg) { 2062 int gtid, tid; 2063 kmp_info_t *thread; 2064 2065 gtid = __kmp_entry_gtid(); 2066 tid = __kmp_tid_from_gtid(gtid); 2067 thread = __kmp_thread_from_gtid(gtid); 2068 2069 __kmp_aux_set_blocktime(arg, thread, tid); 2070 } 2071 2072 void kmpc_set_library(int arg) { 2073 // __kmp_user_set_library initializes the library if needed 2074 __kmp_user_set_library((enum library_type)arg); 2075 } 2076 2077 void kmpc_set_defaults(char const *str) { 2078 // __kmp_aux_set_defaults initializes the library if needed 2079 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 2080 } 2081 2082 void kmpc_set_disp_num_buffers(int arg) { 2083 // ignore after initialization because some teams have already 2084 // allocated dispatch buffers 2085 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF && 2086 arg <= KMP_MAX_DISP_NUM_BUFF) { 2087 __kmp_dispatch_num_buffers = arg; 2088 } 2089 } 2090 2091 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 2092 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2093 return -1; 2094 #else 2095 if (!TCR_4(__kmp_init_middle)) { 2096 __kmp_middle_initialize(); 2097 } 2098 __kmp_assign_root_init_mask(); 2099 return __kmp_aux_set_affinity_mask_proc(proc, mask); 2100 #endif 2101 } 2102 2103 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2104 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2105 return -1; 2106 #else 2107 if (!TCR_4(__kmp_init_middle)) { 2108 __kmp_middle_initialize(); 2109 } 2110 __kmp_assign_root_init_mask(); 2111 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2112 #endif 2113 } 2114 2115 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2116 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2117 return -1; 2118 #else 2119 if (!TCR_4(__kmp_init_middle)) { 2120 __kmp_middle_initialize(); 2121 } 2122 __kmp_assign_root_init_mask(); 2123 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2124 #endif 2125 } 2126 2127 /* -------------------------------------------------------------------------- */ 2128 /*! 2129 @ingroup THREADPRIVATE 2130 @param loc source location information 2131 @param gtid global thread number 2132 @param cpy_size size of the cpy_data buffer 2133 @param cpy_data pointer to data to be copied 2134 @param cpy_func helper function to call for copying data 2135 @param didit flag variable: 1=single thread; 0=not single thread 2136 2137 __kmpc_copyprivate implements the interface for the private data broadcast 2138 needed for the copyprivate clause associated with a single region in an 2139 OpenMP<sup>*</sup> program (both C and Fortran). 2140 All threads participating in the parallel region call this routine. 2141 One of the threads (called the single thread) should have the <tt>didit</tt> 2142 variable set to 1 and all other threads should have that variable set to 0. 2143 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2144 2145 The OpenMP specification forbids the use of nowait on the single region when a 2146 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2147 barrier internally to avoid race conditions, so the code generation for the 2148 single region should avoid generating a barrier after the call to @ref 2149 __kmpc_copyprivate. 2150 2151 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2152 The <tt>loc</tt> parameter is a pointer to source location information. 2153 2154 Internal implementation: The single thread will first copy its descriptor 2155 address (cpy_data) to a team-private location, then the other threads will each 2156 call the function pointed to by the parameter cpy_func, which carries out the 2157 copy by copying the data using the cpy_data buffer. 2158 2159 The cpy_func routine used for the copy and the contents of the data area defined 2160 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2161 to be done. For instance, the cpy_data buffer can hold the actual data to be 2162 copied or it may hold a list of pointers to the data. The cpy_func routine must 2163 interpret the cpy_data buffer appropriately. 2164 2165 The interface to cpy_func is as follows: 2166 @code 2167 void cpy_func( void *destination, void *source ) 2168 @endcode 2169 where void *destination is the cpy_data pointer for the thread being copied to 2170 and void *source is the cpy_data pointer for the thread being copied from. 2171 */ 2172 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2173 void *cpy_data, void (*cpy_func)(void *, void *), 2174 kmp_int32 didit) { 2175 void **data_ptr; 2176 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2177 __kmp_assert_valid_gtid(gtid); 2178 2179 KMP_MB(); 2180 2181 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2182 2183 if (__kmp_env_consistency_check) { 2184 if (loc == 0) { 2185 KMP_WARNING(ConstructIdentInvalid); 2186 } 2187 } 2188 2189 // ToDo: Optimize the following two barriers into some kind of split barrier 2190 2191 if (didit) 2192 *data_ptr = cpy_data; 2193 2194 #if OMPT_SUPPORT 2195 ompt_frame_t *ompt_frame; 2196 if (ompt_enabled.enabled) { 2197 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2198 if (ompt_frame->enter_frame.ptr == NULL) 2199 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2200 } 2201 OMPT_STORE_RETURN_ADDRESS(gtid); 2202 #endif 2203 /* This barrier is not a barrier region boundary */ 2204 #if USE_ITT_NOTIFY 2205 __kmp_threads[gtid]->th.th_ident = loc; 2206 #endif 2207 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2208 2209 if (!didit) 2210 (*cpy_func)(cpy_data, *data_ptr); 2211 2212 // Consider next barrier a user-visible barrier for barrier region boundaries 2213 // Nesting checks are already handled by the single construct checks 2214 { 2215 #if OMPT_SUPPORT 2216 OMPT_STORE_RETURN_ADDRESS(gtid); 2217 #endif 2218 #if USE_ITT_NOTIFY 2219 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2220 // tasks can overwrite the location) 2221 #endif 2222 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2223 #if OMPT_SUPPORT && OMPT_OPTIONAL 2224 if (ompt_enabled.enabled) { 2225 ompt_frame->enter_frame = ompt_data_none; 2226 } 2227 #endif 2228 } 2229 } 2230 2231 /* -------------------------------------------------------------------------- */ 2232 2233 #define INIT_LOCK __kmp_init_user_lock_with_checks 2234 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2235 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2236 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2237 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2238 #define ACQUIRE_NESTED_LOCK_TIMED \ 2239 __kmp_acquire_nested_user_lock_with_checks_timed 2240 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2241 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2242 #define TEST_LOCK __kmp_test_user_lock_with_checks 2243 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2244 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2245 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2246 2247 // TODO: Make check abort messages use location info & pass it into 2248 // with_checks routines 2249 2250 #if KMP_USE_DYNAMIC_LOCK 2251 2252 // internal lock initializer 2253 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2254 kmp_dyna_lockseq_t seq) { 2255 if (KMP_IS_D_LOCK(seq)) { 2256 KMP_INIT_D_LOCK(lock, seq); 2257 #if USE_ITT_BUILD 2258 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2259 #endif 2260 } else { 2261 KMP_INIT_I_LOCK(lock, seq); 2262 #if USE_ITT_BUILD 2263 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2264 __kmp_itt_lock_creating(ilk->lock, loc); 2265 #endif 2266 } 2267 } 2268 2269 // internal nest lock initializer 2270 static __forceinline void 2271 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2272 kmp_dyna_lockseq_t seq) { 2273 #if KMP_USE_TSX 2274 // Don't have nested lock implementation for speculative locks 2275 if (seq == lockseq_hle || seq == lockseq_rtm_queuing || 2276 seq == lockseq_rtm_spin || seq == lockseq_adaptive) 2277 seq = __kmp_user_lock_seq; 2278 #endif 2279 switch (seq) { 2280 case lockseq_tas: 2281 seq = lockseq_nested_tas; 2282 break; 2283 #if KMP_USE_FUTEX 2284 case lockseq_futex: 2285 seq = lockseq_nested_futex; 2286 break; 2287 #endif 2288 case lockseq_ticket: 2289 seq = lockseq_nested_ticket; 2290 break; 2291 case lockseq_queuing: 2292 seq = lockseq_nested_queuing; 2293 break; 2294 case lockseq_drdpa: 2295 seq = lockseq_nested_drdpa; 2296 break; 2297 default: 2298 seq = lockseq_nested_queuing; 2299 } 2300 KMP_INIT_I_LOCK(lock, seq); 2301 #if USE_ITT_BUILD 2302 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2303 __kmp_itt_lock_creating(ilk->lock, loc); 2304 #endif 2305 } 2306 2307 /* initialize the lock with a hint */ 2308 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2309 uintptr_t hint) { 2310 KMP_DEBUG_ASSERT(__kmp_init_serial); 2311 if (__kmp_env_consistency_check && user_lock == NULL) { 2312 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2313 } 2314 2315 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2316 2317 #if OMPT_SUPPORT && OMPT_OPTIONAL 2318 // This is the case, if called from omp_init_lock_with_hint: 2319 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2320 if (!codeptr) 2321 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2322 if (ompt_enabled.ompt_callback_lock_init) { 2323 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2324 ompt_mutex_lock, (omp_lock_hint_t)hint, 2325 __ompt_get_mutex_impl_type(user_lock), 2326 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2327 } 2328 #endif 2329 } 2330 2331 /* initialize the lock with a hint */ 2332 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2333 void **user_lock, uintptr_t hint) { 2334 KMP_DEBUG_ASSERT(__kmp_init_serial); 2335 if (__kmp_env_consistency_check && user_lock == NULL) { 2336 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2337 } 2338 2339 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2340 2341 #if OMPT_SUPPORT && OMPT_OPTIONAL 2342 // This is the case, if called from omp_init_lock_with_hint: 2343 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2344 if (!codeptr) 2345 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2346 if (ompt_enabled.ompt_callback_lock_init) { 2347 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2348 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2349 __ompt_get_mutex_impl_type(user_lock), 2350 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2351 } 2352 #endif 2353 } 2354 2355 #endif // KMP_USE_DYNAMIC_LOCK 2356 2357 /* initialize the lock */ 2358 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2359 #if KMP_USE_DYNAMIC_LOCK 2360 2361 KMP_DEBUG_ASSERT(__kmp_init_serial); 2362 if (__kmp_env_consistency_check && user_lock == NULL) { 2363 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2364 } 2365 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2366 2367 #if OMPT_SUPPORT && OMPT_OPTIONAL 2368 // This is the case, if called from omp_init_lock_with_hint: 2369 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2370 if (!codeptr) 2371 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2372 if (ompt_enabled.ompt_callback_lock_init) { 2373 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2374 ompt_mutex_lock, omp_lock_hint_none, 2375 __ompt_get_mutex_impl_type(user_lock), 2376 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2377 } 2378 #endif 2379 2380 #else // KMP_USE_DYNAMIC_LOCK 2381 2382 static char const *const func = "omp_init_lock"; 2383 kmp_user_lock_p lck; 2384 KMP_DEBUG_ASSERT(__kmp_init_serial); 2385 2386 if (__kmp_env_consistency_check) { 2387 if (user_lock == NULL) { 2388 KMP_FATAL(LockIsUninitialized, func); 2389 } 2390 } 2391 2392 KMP_CHECK_USER_LOCK_INIT(); 2393 2394 if ((__kmp_user_lock_kind == lk_tas) && 2395 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2396 lck = (kmp_user_lock_p)user_lock; 2397 } 2398 #if KMP_USE_FUTEX 2399 else if ((__kmp_user_lock_kind == lk_futex) && 2400 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2401 lck = (kmp_user_lock_p)user_lock; 2402 } 2403 #endif 2404 else { 2405 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2406 } 2407 INIT_LOCK(lck); 2408 __kmp_set_user_lock_location(lck, loc); 2409 2410 #if OMPT_SUPPORT && OMPT_OPTIONAL 2411 // This is the case, if called from omp_init_lock_with_hint: 2412 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2413 if (!codeptr) 2414 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2415 if (ompt_enabled.ompt_callback_lock_init) { 2416 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2417 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2418 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2419 } 2420 #endif 2421 2422 #if USE_ITT_BUILD 2423 __kmp_itt_lock_creating(lck); 2424 #endif /* USE_ITT_BUILD */ 2425 2426 #endif // KMP_USE_DYNAMIC_LOCK 2427 } // __kmpc_init_lock 2428 2429 /* initialize the lock */ 2430 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2431 #if KMP_USE_DYNAMIC_LOCK 2432 2433 KMP_DEBUG_ASSERT(__kmp_init_serial); 2434 if (__kmp_env_consistency_check && user_lock == NULL) { 2435 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2436 } 2437 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2438 2439 #if OMPT_SUPPORT && OMPT_OPTIONAL 2440 // This is the case, if called from omp_init_lock_with_hint: 2441 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2442 if (!codeptr) 2443 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2444 if (ompt_enabled.ompt_callback_lock_init) { 2445 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2446 ompt_mutex_nest_lock, omp_lock_hint_none, 2447 __ompt_get_mutex_impl_type(user_lock), 2448 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2449 } 2450 #endif 2451 2452 #else // KMP_USE_DYNAMIC_LOCK 2453 2454 static char const *const func = "omp_init_nest_lock"; 2455 kmp_user_lock_p lck; 2456 KMP_DEBUG_ASSERT(__kmp_init_serial); 2457 2458 if (__kmp_env_consistency_check) { 2459 if (user_lock == NULL) { 2460 KMP_FATAL(LockIsUninitialized, func); 2461 } 2462 } 2463 2464 KMP_CHECK_USER_LOCK_INIT(); 2465 2466 if ((__kmp_user_lock_kind == lk_tas) && 2467 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2468 OMP_NEST_LOCK_T_SIZE)) { 2469 lck = (kmp_user_lock_p)user_lock; 2470 } 2471 #if KMP_USE_FUTEX 2472 else if ((__kmp_user_lock_kind == lk_futex) && 2473 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2474 OMP_NEST_LOCK_T_SIZE)) { 2475 lck = (kmp_user_lock_p)user_lock; 2476 } 2477 #endif 2478 else { 2479 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2480 } 2481 2482 INIT_NESTED_LOCK(lck); 2483 __kmp_set_user_lock_location(lck, loc); 2484 2485 #if OMPT_SUPPORT && OMPT_OPTIONAL 2486 // This is the case, if called from omp_init_lock_with_hint: 2487 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2488 if (!codeptr) 2489 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2490 if (ompt_enabled.ompt_callback_lock_init) { 2491 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2492 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2493 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2494 } 2495 #endif 2496 2497 #if USE_ITT_BUILD 2498 __kmp_itt_lock_creating(lck); 2499 #endif /* USE_ITT_BUILD */ 2500 2501 #endif // KMP_USE_DYNAMIC_LOCK 2502 } // __kmpc_init_nest_lock 2503 2504 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2505 #if KMP_USE_DYNAMIC_LOCK 2506 2507 #if USE_ITT_BUILD 2508 kmp_user_lock_p lck; 2509 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2510 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2511 } else { 2512 lck = (kmp_user_lock_p)user_lock; 2513 } 2514 __kmp_itt_lock_destroyed(lck); 2515 #endif 2516 #if OMPT_SUPPORT && OMPT_OPTIONAL 2517 // This is the case, if called from omp_init_lock_with_hint: 2518 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2519 if (!codeptr) 2520 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2521 if (ompt_enabled.ompt_callback_lock_destroy) { 2522 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2523 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2524 } 2525 #endif 2526 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2527 #else 2528 kmp_user_lock_p lck; 2529 2530 if ((__kmp_user_lock_kind == lk_tas) && 2531 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2532 lck = (kmp_user_lock_p)user_lock; 2533 } 2534 #if KMP_USE_FUTEX 2535 else if ((__kmp_user_lock_kind == lk_futex) && 2536 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2537 lck = (kmp_user_lock_p)user_lock; 2538 } 2539 #endif 2540 else { 2541 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2542 } 2543 2544 #if OMPT_SUPPORT && OMPT_OPTIONAL 2545 // This is the case, if called from omp_init_lock_with_hint: 2546 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2547 if (!codeptr) 2548 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2549 if (ompt_enabled.ompt_callback_lock_destroy) { 2550 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2551 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2552 } 2553 #endif 2554 2555 #if USE_ITT_BUILD 2556 __kmp_itt_lock_destroyed(lck); 2557 #endif /* USE_ITT_BUILD */ 2558 DESTROY_LOCK(lck); 2559 2560 if ((__kmp_user_lock_kind == lk_tas) && 2561 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2562 ; 2563 } 2564 #if KMP_USE_FUTEX 2565 else if ((__kmp_user_lock_kind == lk_futex) && 2566 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2567 ; 2568 } 2569 #endif 2570 else { 2571 __kmp_user_lock_free(user_lock, gtid, lck); 2572 } 2573 #endif // KMP_USE_DYNAMIC_LOCK 2574 } // __kmpc_destroy_lock 2575 2576 /* destroy the lock */ 2577 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2578 #if KMP_USE_DYNAMIC_LOCK 2579 2580 #if USE_ITT_BUILD 2581 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2582 __kmp_itt_lock_destroyed(ilk->lock); 2583 #endif 2584 #if OMPT_SUPPORT && OMPT_OPTIONAL 2585 // This is the case, if called from omp_init_lock_with_hint: 2586 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2587 if (!codeptr) 2588 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2589 if (ompt_enabled.ompt_callback_lock_destroy) { 2590 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2591 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2592 } 2593 #endif 2594 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2595 2596 #else // KMP_USE_DYNAMIC_LOCK 2597 2598 kmp_user_lock_p lck; 2599 2600 if ((__kmp_user_lock_kind == lk_tas) && 2601 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2602 OMP_NEST_LOCK_T_SIZE)) { 2603 lck = (kmp_user_lock_p)user_lock; 2604 } 2605 #if KMP_USE_FUTEX 2606 else if ((__kmp_user_lock_kind == lk_futex) && 2607 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2608 OMP_NEST_LOCK_T_SIZE)) { 2609 lck = (kmp_user_lock_p)user_lock; 2610 } 2611 #endif 2612 else { 2613 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2614 } 2615 2616 #if OMPT_SUPPORT && OMPT_OPTIONAL 2617 // This is the case, if called from omp_init_lock_with_hint: 2618 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2619 if (!codeptr) 2620 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2621 if (ompt_enabled.ompt_callback_lock_destroy) { 2622 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2623 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2624 } 2625 #endif 2626 2627 #if USE_ITT_BUILD 2628 __kmp_itt_lock_destroyed(lck); 2629 #endif /* USE_ITT_BUILD */ 2630 2631 DESTROY_NESTED_LOCK(lck); 2632 2633 if ((__kmp_user_lock_kind == lk_tas) && 2634 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2635 OMP_NEST_LOCK_T_SIZE)) { 2636 ; 2637 } 2638 #if KMP_USE_FUTEX 2639 else if ((__kmp_user_lock_kind == lk_futex) && 2640 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2641 OMP_NEST_LOCK_T_SIZE)) { 2642 ; 2643 } 2644 #endif 2645 else { 2646 __kmp_user_lock_free(user_lock, gtid, lck); 2647 } 2648 #endif // KMP_USE_DYNAMIC_LOCK 2649 } // __kmpc_destroy_nest_lock 2650 2651 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2652 KMP_COUNT_BLOCK(OMP_set_lock); 2653 #if KMP_USE_DYNAMIC_LOCK 2654 int tag = KMP_EXTRACT_D_TAG(user_lock); 2655 #if USE_ITT_BUILD 2656 __kmp_itt_lock_acquiring( 2657 (kmp_user_lock_p) 2658 user_lock); // itt function will get to the right lock object. 2659 #endif 2660 #if OMPT_SUPPORT && OMPT_OPTIONAL 2661 // This is the case, if called from omp_init_lock_with_hint: 2662 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2663 if (!codeptr) 2664 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2665 if (ompt_enabled.ompt_callback_mutex_acquire) { 2666 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2667 ompt_mutex_lock, omp_lock_hint_none, 2668 __ompt_get_mutex_impl_type(user_lock), 2669 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2670 } 2671 #endif 2672 #if KMP_USE_INLINED_TAS 2673 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2674 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2675 } else 2676 #elif KMP_USE_INLINED_FUTEX 2677 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2678 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2679 } else 2680 #endif 2681 { 2682 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2683 } 2684 #if USE_ITT_BUILD 2685 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2686 #endif 2687 #if OMPT_SUPPORT && OMPT_OPTIONAL 2688 if (ompt_enabled.ompt_callback_mutex_acquired) { 2689 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2690 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2691 } 2692 #endif 2693 2694 #else // KMP_USE_DYNAMIC_LOCK 2695 2696 kmp_user_lock_p lck; 2697 2698 if ((__kmp_user_lock_kind == lk_tas) && 2699 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2700 lck = (kmp_user_lock_p)user_lock; 2701 } 2702 #if KMP_USE_FUTEX 2703 else if ((__kmp_user_lock_kind == lk_futex) && 2704 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2705 lck = (kmp_user_lock_p)user_lock; 2706 } 2707 #endif 2708 else { 2709 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2710 } 2711 2712 #if USE_ITT_BUILD 2713 __kmp_itt_lock_acquiring(lck); 2714 #endif /* USE_ITT_BUILD */ 2715 #if OMPT_SUPPORT && OMPT_OPTIONAL 2716 // This is the case, if called from omp_init_lock_with_hint: 2717 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2718 if (!codeptr) 2719 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2720 if (ompt_enabled.ompt_callback_mutex_acquire) { 2721 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2722 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2723 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2724 } 2725 #endif 2726 2727 ACQUIRE_LOCK(lck, gtid); 2728 2729 #if USE_ITT_BUILD 2730 __kmp_itt_lock_acquired(lck); 2731 #endif /* USE_ITT_BUILD */ 2732 2733 #if OMPT_SUPPORT && OMPT_OPTIONAL 2734 if (ompt_enabled.ompt_callback_mutex_acquired) { 2735 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2736 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2737 } 2738 #endif 2739 2740 #endif // KMP_USE_DYNAMIC_LOCK 2741 } 2742 2743 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2744 #if KMP_USE_DYNAMIC_LOCK 2745 2746 #if USE_ITT_BUILD 2747 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2748 #endif 2749 #if OMPT_SUPPORT && OMPT_OPTIONAL 2750 // This is the case, if called from omp_init_lock_with_hint: 2751 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2752 if (!codeptr) 2753 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2754 if (ompt_enabled.enabled) { 2755 if (ompt_enabled.ompt_callback_mutex_acquire) { 2756 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2757 ompt_mutex_nest_lock, omp_lock_hint_none, 2758 __ompt_get_mutex_impl_type(user_lock), 2759 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2760 } 2761 } 2762 #endif 2763 int acquire_status = 2764 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2765 (void)acquire_status; 2766 #if USE_ITT_BUILD 2767 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2768 #endif 2769 2770 #if OMPT_SUPPORT && OMPT_OPTIONAL 2771 if (ompt_enabled.enabled) { 2772 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2773 if (ompt_enabled.ompt_callback_mutex_acquired) { 2774 // lock_first 2775 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2776 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2777 codeptr); 2778 } 2779 } else { 2780 if (ompt_enabled.ompt_callback_nest_lock) { 2781 // lock_next 2782 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2783 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2784 } 2785 } 2786 } 2787 #endif 2788 2789 #else // KMP_USE_DYNAMIC_LOCK 2790 int acquire_status; 2791 kmp_user_lock_p lck; 2792 2793 if ((__kmp_user_lock_kind == lk_tas) && 2794 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2795 OMP_NEST_LOCK_T_SIZE)) { 2796 lck = (kmp_user_lock_p)user_lock; 2797 } 2798 #if KMP_USE_FUTEX 2799 else if ((__kmp_user_lock_kind == lk_futex) && 2800 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2801 OMP_NEST_LOCK_T_SIZE)) { 2802 lck = (kmp_user_lock_p)user_lock; 2803 } 2804 #endif 2805 else { 2806 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2807 } 2808 2809 #if USE_ITT_BUILD 2810 __kmp_itt_lock_acquiring(lck); 2811 #endif /* USE_ITT_BUILD */ 2812 #if OMPT_SUPPORT && OMPT_OPTIONAL 2813 // This is the case, if called from omp_init_lock_with_hint: 2814 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2815 if (!codeptr) 2816 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2817 if (ompt_enabled.enabled) { 2818 if (ompt_enabled.ompt_callback_mutex_acquire) { 2819 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2820 ompt_mutex_nest_lock, omp_lock_hint_none, 2821 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2822 codeptr); 2823 } 2824 } 2825 #endif 2826 2827 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2828 2829 #if USE_ITT_BUILD 2830 __kmp_itt_lock_acquired(lck); 2831 #endif /* USE_ITT_BUILD */ 2832 2833 #if OMPT_SUPPORT && OMPT_OPTIONAL 2834 if (ompt_enabled.enabled) { 2835 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2836 if (ompt_enabled.ompt_callback_mutex_acquired) { 2837 // lock_first 2838 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2839 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2840 } 2841 } else { 2842 if (ompt_enabled.ompt_callback_nest_lock) { 2843 // lock_next 2844 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2845 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2846 } 2847 } 2848 } 2849 #endif 2850 2851 #endif // KMP_USE_DYNAMIC_LOCK 2852 } 2853 2854 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2855 #if KMP_USE_DYNAMIC_LOCK 2856 2857 int tag = KMP_EXTRACT_D_TAG(user_lock); 2858 #if USE_ITT_BUILD 2859 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2860 #endif 2861 #if KMP_USE_INLINED_TAS 2862 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2863 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2864 } else 2865 #elif KMP_USE_INLINED_FUTEX 2866 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2867 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2868 } else 2869 #endif 2870 { 2871 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2872 } 2873 2874 #if OMPT_SUPPORT && OMPT_OPTIONAL 2875 // This is the case, if called from omp_init_lock_with_hint: 2876 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2877 if (!codeptr) 2878 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2879 if (ompt_enabled.ompt_callback_mutex_released) { 2880 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2881 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2882 } 2883 #endif 2884 2885 #else // KMP_USE_DYNAMIC_LOCK 2886 2887 kmp_user_lock_p lck; 2888 2889 /* Can't use serial interval since not block structured */ 2890 /* release the lock */ 2891 2892 if ((__kmp_user_lock_kind == lk_tas) && 2893 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2894 #if KMP_OS_LINUX && \ 2895 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2896 // "fast" path implemented to fix customer performance issue 2897 #if USE_ITT_BUILD 2898 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2899 #endif /* USE_ITT_BUILD */ 2900 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2901 KMP_MB(); 2902 2903 #if OMPT_SUPPORT && OMPT_OPTIONAL 2904 // This is the case, if called from omp_init_lock_with_hint: 2905 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2906 if (!codeptr) 2907 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2908 if (ompt_enabled.ompt_callback_mutex_released) { 2909 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2910 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2911 } 2912 #endif 2913 2914 return; 2915 #else 2916 lck = (kmp_user_lock_p)user_lock; 2917 #endif 2918 } 2919 #if KMP_USE_FUTEX 2920 else if ((__kmp_user_lock_kind == lk_futex) && 2921 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2922 lck = (kmp_user_lock_p)user_lock; 2923 } 2924 #endif 2925 else { 2926 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2927 } 2928 2929 #if USE_ITT_BUILD 2930 __kmp_itt_lock_releasing(lck); 2931 #endif /* USE_ITT_BUILD */ 2932 2933 RELEASE_LOCK(lck, gtid); 2934 2935 #if OMPT_SUPPORT && OMPT_OPTIONAL 2936 // This is the case, if called from omp_init_lock_with_hint: 2937 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2938 if (!codeptr) 2939 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2940 if (ompt_enabled.ompt_callback_mutex_released) { 2941 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2942 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2943 } 2944 #endif 2945 2946 #endif // KMP_USE_DYNAMIC_LOCK 2947 } 2948 2949 /* release the lock */ 2950 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2951 #if KMP_USE_DYNAMIC_LOCK 2952 2953 #if USE_ITT_BUILD 2954 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2955 #endif 2956 int release_status = 2957 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2958 (void)release_status; 2959 2960 #if OMPT_SUPPORT && OMPT_OPTIONAL 2961 // This is the case, if called from omp_init_lock_with_hint: 2962 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2963 if (!codeptr) 2964 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2965 if (ompt_enabled.enabled) { 2966 if (release_status == KMP_LOCK_RELEASED) { 2967 if (ompt_enabled.ompt_callback_mutex_released) { 2968 // release_lock_last 2969 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2970 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2971 codeptr); 2972 } 2973 } else if (ompt_enabled.ompt_callback_nest_lock) { 2974 // release_lock_prev 2975 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2976 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2977 } 2978 } 2979 #endif 2980 2981 #else // KMP_USE_DYNAMIC_LOCK 2982 2983 kmp_user_lock_p lck; 2984 2985 /* Can't use serial interval since not block structured */ 2986 2987 if ((__kmp_user_lock_kind == lk_tas) && 2988 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2989 OMP_NEST_LOCK_T_SIZE)) { 2990 #if KMP_OS_LINUX && \ 2991 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2992 // "fast" path implemented to fix customer performance issue 2993 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2994 #if USE_ITT_BUILD 2995 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2996 #endif /* USE_ITT_BUILD */ 2997 2998 #if OMPT_SUPPORT && OMPT_OPTIONAL 2999 int release_status = KMP_LOCK_STILL_HELD; 3000 #endif 3001 3002 if (--(tl->lk.depth_locked) == 0) { 3003 TCW_4(tl->lk.poll, 0); 3004 #if OMPT_SUPPORT && OMPT_OPTIONAL 3005 release_status = KMP_LOCK_RELEASED; 3006 #endif 3007 } 3008 KMP_MB(); 3009 3010 #if OMPT_SUPPORT && OMPT_OPTIONAL 3011 // This is the case, if called from omp_init_lock_with_hint: 3012 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3013 if (!codeptr) 3014 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3015 if (ompt_enabled.enabled) { 3016 if (release_status == KMP_LOCK_RELEASED) { 3017 if (ompt_enabled.ompt_callback_mutex_released) { 3018 // release_lock_last 3019 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3020 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3021 } 3022 } else if (ompt_enabled.ompt_callback_nest_lock) { 3023 // release_lock_previous 3024 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3025 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3026 } 3027 } 3028 #endif 3029 3030 return; 3031 #else 3032 lck = (kmp_user_lock_p)user_lock; 3033 #endif 3034 } 3035 #if KMP_USE_FUTEX 3036 else if ((__kmp_user_lock_kind == lk_futex) && 3037 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3038 OMP_NEST_LOCK_T_SIZE)) { 3039 lck = (kmp_user_lock_p)user_lock; 3040 } 3041 #endif 3042 else { 3043 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 3044 } 3045 3046 #if USE_ITT_BUILD 3047 __kmp_itt_lock_releasing(lck); 3048 #endif /* USE_ITT_BUILD */ 3049 3050 int release_status; 3051 release_status = RELEASE_NESTED_LOCK(lck, gtid); 3052 #if OMPT_SUPPORT && OMPT_OPTIONAL 3053 // This is the case, if called from omp_init_lock_with_hint: 3054 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3055 if (!codeptr) 3056 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3057 if (ompt_enabled.enabled) { 3058 if (release_status == KMP_LOCK_RELEASED) { 3059 if (ompt_enabled.ompt_callback_mutex_released) { 3060 // release_lock_last 3061 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3062 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3063 } 3064 } else if (ompt_enabled.ompt_callback_nest_lock) { 3065 // release_lock_previous 3066 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3067 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3068 } 3069 } 3070 #endif 3071 3072 #endif // KMP_USE_DYNAMIC_LOCK 3073 } 3074 3075 /* try to acquire the lock */ 3076 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3077 KMP_COUNT_BLOCK(OMP_test_lock); 3078 3079 #if KMP_USE_DYNAMIC_LOCK 3080 int rc; 3081 int tag = KMP_EXTRACT_D_TAG(user_lock); 3082 #if USE_ITT_BUILD 3083 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3084 #endif 3085 #if OMPT_SUPPORT && OMPT_OPTIONAL 3086 // This is the case, if called from omp_init_lock_with_hint: 3087 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3088 if (!codeptr) 3089 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3090 if (ompt_enabled.ompt_callback_mutex_acquire) { 3091 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3092 ompt_mutex_lock, omp_lock_hint_none, 3093 __ompt_get_mutex_impl_type(user_lock), 3094 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3095 } 3096 #endif 3097 #if KMP_USE_INLINED_TAS 3098 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3099 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3100 } else 3101 #elif KMP_USE_INLINED_FUTEX 3102 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3103 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3104 } else 3105 #endif 3106 { 3107 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3108 } 3109 if (rc) { 3110 #if USE_ITT_BUILD 3111 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3112 #endif 3113 #if OMPT_SUPPORT && OMPT_OPTIONAL 3114 if (ompt_enabled.ompt_callback_mutex_acquired) { 3115 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3116 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3117 } 3118 #endif 3119 return FTN_TRUE; 3120 } else { 3121 #if USE_ITT_BUILD 3122 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3123 #endif 3124 return FTN_FALSE; 3125 } 3126 3127 #else // KMP_USE_DYNAMIC_LOCK 3128 3129 kmp_user_lock_p lck; 3130 int rc; 3131 3132 if ((__kmp_user_lock_kind == lk_tas) && 3133 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3134 lck = (kmp_user_lock_p)user_lock; 3135 } 3136 #if KMP_USE_FUTEX 3137 else if ((__kmp_user_lock_kind == lk_futex) && 3138 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3139 lck = (kmp_user_lock_p)user_lock; 3140 } 3141 #endif 3142 else { 3143 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3144 } 3145 3146 #if USE_ITT_BUILD 3147 __kmp_itt_lock_acquiring(lck); 3148 #endif /* USE_ITT_BUILD */ 3149 #if OMPT_SUPPORT && OMPT_OPTIONAL 3150 // This is the case, if called from omp_init_lock_with_hint: 3151 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3152 if (!codeptr) 3153 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3154 if (ompt_enabled.ompt_callback_mutex_acquire) { 3155 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3156 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3157 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3158 } 3159 #endif 3160 3161 rc = TEST_LOCK(lck, gtid); 3162 #if USE_ITT_BUILD 3163 if (rc) { 3164 __kmp_itt_lock_acquired(lck); 3165 } else { 3166 __kmp_itt_lock_cancelled(lck); 3167 } 3168 #endif /* USE_ITT_BUILD */ 3169 #if OMPT_SUPPORT && OMPT_OPTIONAL 3170 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3171 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3172 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3173 } 3174 #endif 3175 3176 return (rc ? FTN_TRUE : FTN_FALSE); 3177 3178 /* Can't use serial interval since not block structured */ 3179 3180 #endif // KMP_USE_DYNAMIC_LOCK 3181 } 3182 3183 /* try to acquire the lock */ 3184 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3185 #if KMP_USE_DYNAMIC_LOCK 3186 int rc; 3187 #if USE_ITT_BUILD 3188 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3189 #endif 3190 #if OMPT_SUPPORT && OMPT_OPTIONAL 3191 // This is the case, if called from omp_init_lock_with_hint: 3192 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3193 if (!codeptr) 3194 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3195 if (ompt_enabled.ompt_callback_mutex_acquire) { 3196 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3197 ompt_mutex_nest_lock, omp_lock_hint_none, 3198 __ompt_get_mutex_impl_type(user_lock), 3199 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3200 } 3201 #endif 3202 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3203 #if USE_ITT_BUILD 3204 if (rc) { 3205 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3206 } else { 3207 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3208 } 3209 #endif 3210 #if OMPT_SUPPORT && OMPT_OPTIONAL 3211 if (ompt_enabled.enabled && rc) { 3212 if (rc == 1) { 3213 if (ompt_enabled.ompt_callback_mutex_acquired) { 3214 // lock_first 3215 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3216 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3217 codeptr); 3218 } 3219 } else { 3220 if (ompt_enabled.ompt_callback_nest_lock) { 3221 // lock_next 3222 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3223 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3224 } 3225 } 3226 } 3227 #endif 3228 return rc; 3229 3230 #else // KMP_USE_DYNAMIC_LOCK 3231 3232 kmp_user_lock_p lck; 3233 int rc; 3234 3235 if ((__kmp_user_lock_kind == lk_tas) && 3236 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3237 OMP_NEST_LOCK_T_SIZE)) { 3238 lck = (kmp_user_lock_p)user_lock; 3239 } 3240 #if KMP_USE_FUTEX 3241 else if ((__kmp_user_lock_kind == lk_futex) && 3242 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3243 OMP_NEST_LOCK_T_SIZE)) { 3244 lck = (kmp_user_lock_p)user_lock; 3245 } 3246 #endif 3247 else { 3248 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3249 } 3250 3251 #if USE_ITT_BUILD 3252 __kmp_itt_lock_acquiring(lck); 3253 #endif /* USE_ITT_BUILD */ 3254 3255 #if OMPT_SUPPORT && OMPT_OPTIONAL 3256 // This is the case, if called from omp_init_lock_with_hint: 3257 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3258 if (!codeptr) 3259 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3260 if (ompt_enabled.enabled) && 3261 ompt_enabled.ompt_callback_mutex_acquire) { 3262 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3263 ompt_mutex_nest_lock, omp_lock_hint_none, 3264 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3265 codeptr); 3266 } 3267 #endif 3268 3269 rc = TEST_NESTED_LOCK(lck, gtid); 3270 #if USE_ITT_BUILD 3271 if (rc) { 3272 __kmp_itt_lock_acquired(lck); 3273 } else { 3274 __kmp_itt_lock_cancelled(lck); 3275 } 3276 #endif /* USE_ITT_BUILD */ 3277 #if OMPT_SUPPORT && OMPT_OPTIONAL 3278 if (ompt_enabled.enabled && rc) { 3279 if (rc == 1) { 3280 if (ompt_enabled.ompt_callback_mutex_acquired) { 3281 // lock_first 3282 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3283 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3284 } 3285 } else { 3286 if (ompt_enabled.ompt_callback_nest_lock) { 3287 // lock_next 3288 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3289 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3290 } 3291 } 3292 } 3293 #endif 3294 return rc; 3295 3296 /* Can't use serial interval since not block structured */ 3297 3298 #endif // KMP_USE_DYNAMIC_LOCK 3299 } 3300 3301 // Interface to fast scalable reduce methods routines 3302 3303 // keep the selected method in a thread local structure for cross-function 3304 // usage: will be used in __kmpc_end_reduce* functions; 3305 // another solution: to re-determine the method one more time in 3306 // __kmpc_end_reduce* functions (new prototype required then) 3307 // AT: which solution is better? 3308 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3309 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3310 3311 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3312 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3313 3314 // description of the packed_reduction_method variable: look at the macros in 3315 // kmp.h 3316 3317 // used in a critical section reduce block 3318 static __forceinline void 3319 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3320 kmp_critical_name *crit) { 3321 3322 // this lock was visible to a customer and to the threading profile tool as a 3323 // serial overhead span (although it's used for an internal purpose only) 3324 // why was it visible in previous implementation? 3325 // should we keep it visible in new reduce block? 3326 kmp_user_lock_p lck; 3327 3328 #if KMP_USE_DYNAMIC_LOCK 3329 3330 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3331 // Check if it is initialized. 3332 if (*lk == 0) { 3333 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3334 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3335 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3336 } else { 3337 __kmp_init_indirect_csptr(crit, loc, global_tid, 3338 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3339 } 3340 } 3341 // Branch for accessing the actual lock object and set operation. This 3342 // branching is inevitable since this lock initialization does not follow the 3343 // normal dispatch path (lock table is not used). 3344 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3345 lck = (kmp_user_lock_p)lk; 3346 KMP_DEBUG_ASSERT(lck != NULL); 3347 if (__kmp_env_consistency_check) { 3348 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3349 } 3350 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3351 } else { 3352 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3353 lck = ilk->lock; 3354 KMP_DEBUG_ASSERT(lck != NULL); 3355 if (__kmp_env_consistency_check) { 3356 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3357 } 3358 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3359 } 3360 3361 #else // KMP_USE_DYNAMIC_LOCK 3362 3363 // We know that the fast reduction code is only emitted by Intel compilers 3364 // with 32 byte critical sections. If there isn't enough space, then we 3365 // have to use a pointer. 3366 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3367 lck = (kmp_user_lock_p)crit; 3368 } else { 3369 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3370 } 3371 KMP_DEBUG_ASSERT(lck != NULL); 3372 3373 if (__kmp_env_consistency_check) 3374 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3375 3376 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3377 3378 #endif // KMP_USE_DYNAMIC_LOCK 3379 } 3380 3381 // used in a critical section reduce block 3382 static __forceinline void 3383 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3384 kmp_critical_name *crit) { 3385 3386 kmp_user_lock_p lck; 3387 3388 #if KMP_USE_DYNAMIC_LOCK 3389 3390 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3391 lck = (kmp_user_lock_p)crit; 3392 if (__kmp_env_consistency_check) 3393 __kmp_pop_sync(global_tid, ct_critical, loc); 3394 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3395 } else { 3396 kmp_indirect_lock_t *ilk = 3397 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3398 if (__kmp_env_consistency_check) 3399 __kmp_pop_sync(global_tid, ct_critical, loc); 3400 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3401 } 3402 3403 #else // KMP_USE_DYNAMIC_LOCK 3404 3405 // We know that the fast reduction code is only emitted by Intel compilers 3406 // with 32 byte critical sections. If there isn't enough space, then we have 3407 // to use a pointer. 3408 if (__kmp_base_user_lock_size > 32) { 3409 lck = *((kmp_user_lock_p *)crit); 3410 KMP_ASSERT(lck != NULL); 3411 } else { 3412 lck = (kmp_user_lock_p)crit; 3413 } 3414 3415 if (__kmp_env_consistency_check) 3416 __kmp_pop_sync(global_tid, ct_critical, loc); 3417 3418 __kmp_release_user_lock_with_checks(lck, global_tid); 3419 3420 #endif // KMP_USE_DYNAMIC_LOCK 3421 } // __kmp_end_critical_section_reduce_block 3422 3423 static __forceinline int 3424 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3425 int *task_state) { 3426 kmp_team_t *team; 3427 3428 // Check if we are inside the teams construct? 3429 if (th->th.th_teams_microtask) { 3430 *team_p = team = th->th.th_team; 3431 if (team->t.t_level == th->th.th_teams_level) { 3432 // This is reduction at teams construct. 3433 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3434 // Let's swap teams temporarily for the reduction. 3435 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3436 th->th.th_team = team->t.t_parent; 3437 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3438 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3439 *task_state = th->th.th_task_state; 3440 th->th.th_task_state = 0; 3441 3442 return 1; 3443 } 3444 } 3445 return 0; 3446 } 3447 3448 static __forceinline void 3449 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3450 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3451 th->th.th_info.ds.ds_tid = 0; 3452 th->th.th_team = team; 3453 th->th.th_team_nproc = team->t.t_nproc; 3454 th->th.th_task_team = team->t.t_task_team[task_state]; 3455 __kmp_type_convert(task_state, &(th->th.th_task_state)); 3456 } 3457 3458 /* 2.a.i. Reduce Block without a terminating barrier */ 3459 /*! 3460 @ingroup SYNCHRONIZATION 3461 @param loc source location information 3462 @param global_tid global thread number 3463 @param num_vars number of items (variables) to be reduced 3464 @param reduce_size size of data in bytes to be reduced 3465 @param reduce_data pointer to data to be reduced 3466 @param reduce_func callback function providing reduction operation on two 3467 operands and returning result of reduction in lhs_data 3468 @param lck pointer to the unique lock data structure 3469 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3470 threads if atomic reduction needed 3471 3472 The nowait version is used for a reduce clause with the nowait argument. 3473 */ 3474 kmp_int32 3475 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3476 size_t reduce_size, void *reduce_data, 3477 void (*reduce_func)(void *lhs_data, void *rhs_data), 3478 kmp_critical_name *lck) { 3479 3480 KMP_COUNT_BLOCK(REDUCE_nowait); 3481 int retval = 0; 3482 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3483 kmp_info_t *th; 3484 kmp_team_t *team; 3485 int teams_swapped = 0, task_state; 3486 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3487 __kmp_assert_valid_gtid(global_tid); 3488 3489 // why do we need this initialization here at all? 3490 // Reduction clause can not be used as a stand-alone directive. 3491 3492 // do not call __kmp_serial_initialize(), it will be called by 3493 // __kmp_parallel_initialize() if needed 3494 // possible detection of false-positive race by the threadchecker ??? 3495 if (!TCR_4(__kmp_init_parallel)) 3496 __kmp_parallel_initialize(); 3497 3498 __kmp_resume_if_soft_paused(); 3499 3500 // check correctness of reduce block nesting 3501 #if KMP_USE_DYNAMIC_LOCK 3502 if (__kmp_env_consistency_check) 3503 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3504 #else 3505 if (__kmp_env_consistency_check) 3506 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3507 #endif 3508 3509 th = __kmp_thread_from_gtid(global_tid); 3510 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3511 3512 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3513 // the value should be kept in a variable 3514 // the variable should be either a construct-specific or thread-specific 3515 // property, not a team specific property 3516 // (a thread can reach the next reduce block on the next construct, reduce 3517 // method may differ on the next construct) 3518 // an ident_t "loc" parameter could be used as a construct-specific property 3519 // (what if loc == 0?) 3520 // (if both construct-specific and team-specific variables were shared, 3521 // then unness extra syncs should be needed) 3522 // a thread-specific variable is better regarding two issues above (next 3523 // construct and extra syncs) 3524 // a thread-specific "th_local.reduction_method" variable is used currently 3525 // each thread executes 'determine' and 'set' lines (no need to execute by one 3526 // thread, to avoid unness extra syncs) 3527 3528 packed_reduction_method = __kmp_determine_reduction_method( 3529 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3530 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3531 3532 OMPT_REDUCTION_DECL(th, global_tid); 3533 if (packed_reduction_method == critical_reduce_block) { 3534 3535 OMPT_REDUCTION_BEGIN; 3536 3537 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3538 retval = 1; 3539 3540 } else if (packed_reduction_method == empty_reduce_block) { 3541 3542 OMPT_REDUCTION_BEGIN; 3543 3544 // usage: if team size == 1, no synchronization is required ( Intel 3545 // platforms only ) 3546 retval = 1; 3547 3548 } else if (packed_reduction_method == atomic_reduce_block) { 3549 3550 retval = 2; 3551 3552 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3553 // won't be called by the code gen) 3554 // (it's not quite good, because the checking block has been closed by 3555 // this 'pop', 3556 // but atomic operation has not been executed yet, will be executed 3557 // slightly later, literally on next instruction) 3558 if (__kmp_env_consistency_check) 3559 __kmp_pop_sync(global_tid, ct_reduce, loc); 3560 3561 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3562 tree_reduce_block)) { 3563 3564 // AT: performance issue: a real barrier here 3565 // AT: (if primary thread is slow, other threads are blocked here waiting for 3566 // the primary thread to come and release them) 3567 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3568 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3569 // be confusing to a customer) 3570 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3571 // might go faster and be more in line with sense of NOWAIT 3572 // AT: TO DO: do epcc test and compare times 3573 3574 // this barrier should be invisible to a customer and to the threading profile 3575 // tool (it's neither a terminating barrier nor customer's code, it's 3576 // used for an internal purpose) 3577 #if OMPT_SUPPORT 3578 // JP: can this barrier potentially leed to task scheduling? 3579 // JP: as long as there is a barrier in the implementation, OMPT should and 3580 // will provide the barrier events 3581 // so we set-up the necessary frame/return addresses. 3582 ompt_frame_t *ompt_frame; 3583 if (ompt_enabled.enabled) { 3584 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3585 if (ompt_frame->enter_frame.ptr == NULL) 3586 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3587 } 3588 OMPT_STORE_RETURN_ADDRESS(global_tid); 3589 #endif 3590 #if USE_ITT_NOTIFY 3591 __kmp_threads[global_tid]->th.th_ident = loc; 3592 #endif 3593 retval = 3594 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3595 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3596 retval = (retval != 0) ? (0) : (1); 3597 #if OMPT_SUPPORT && OMPT_OPTIONAL 3598 if (ompt_enabled.enabled) { 3599 ompt_frame->enter_frame = ompt_data_none; 3600 } 3601 #endif 3602 3603 // all other workers except primary thread should do this pop here 3604 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3605 if (__kmp_env_consistency_check) { 3606 if (retval == 0) { 3607 __kmp_pop_sync(global_tid, ct_reduce, loc); 3608 } 3609 } 3610 3611 } else { 3612 3613 // should never reach this block 3614 KMP_ASSERT(0); // "unexpected method" 3615 } 3616 if (teams_swapped) { 3617 __kmp_restore_swapped_teams(th, team, task_state); 3618 } 3619 KA_TRACE( 3620 10, 3621 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3622 global_tid, packed_reduction_method, retval)); 3623 3624 return retval; 3625 } 3626 3627 /*! 3628 @ingroup SYNCHRONIZATION 3629 @param loc source location information 3630 @param global_tid global thread id. 3631 @param lck pointer to the unique lock data structure 3632 3633 Finish the execution of a reduce nowait. 3634 */ 3635 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3636 kmp_critical_name *lck) { 3637 3638 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3639 3640 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3641 __kmp_assert_valid_gtid(global_tid); 3642 3643 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3644 3645 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3646 3647 if (packed_reduction_method == critical_reduce_block) { 3648 3649 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3650 OMPT_REDUCTION_END; 3651 3652 } else if (packed_reduction_method == empty_reduce_block) { 3653 3654 // usage: if team size == 1, no synchronization is required ( on Intel 3655 // platforms only ) 3656 3657 OMPT_REDUCTION_END; 3658 3659 } else if (packed_reduction_method == atomic_reduce_block) { 3660 3661 // neither primary thread nor other workers should get here 3662 // (code gen does not generate this call in case 2: atomic reduce block) 3663 // actually it's better to remove this elseif at all; 3664 // after removal this value will checked by the 'else' and will assert 3665 3666 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3667 tree_reduce_block)) { 3668 3669 // only primary thread gets here 3670 // OMPT: tree reduction is annotated in the barrier code 3671 3672 } else { 3673 3674 // should never reach this block 3675 KMP_ASSERT(0); // "unexpected method" 3676 } 3677 3678 if (__kmp_env_consistency_check) 3679 __kmp_pop_sync(global_tid, ct_reduce, loc); 3680 3681 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3682 global_tid, packed_reduction_method)); 3683 3684 return; 3685 } 3686 3687 /* 2.a.ii. Reduce Block with a terminating barrier */ 3688 3689 /*! 3690 @ingroup SYNCHRONIZATION 3691 @param loc source location information 3692 @param global_tid global thread number 3693 @param num_vars number of items (variables) to be reduced 3694 @param reduce_size size of data in bytes to be reduced 3695 @param reduce_data pointer to data to be reduced 3696 @param reduce_func callback function providing reduction operation on two 3697 operands and returning result of reduction in lhs_data 3698 @param lck pointer to the unique lock data structure 3699 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3700 threads if atomic reduction needed 3701 3702 A blocking reduce that includes an implicit barrier. 3703 */ 3704 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3705 size_t reduce_size, void *reduce_data, 3706 void (*reduce_func)(void *lhs_data, void *rhs_data), 3707 kmp_critical_name *lck) { 3708 KMP_COUNT_BLOCK(REDUCE_wait); 3709 int retval = 0; 3710 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3711 kmp_info_t *th; 3712 kmp_team_t *team; 3713 int teams_swapped = 0, task_state; 3714 3715 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3716 __kmp_assert_valid_gtid(global_tid); 3717 3718 // why do we need this initialization here at all? 3719 // Reduction clause can not be a stand-alone directive. 3720 3721 // do not call __kmp_serial_initialize(), it will be called by 3722 // __kmp_parallel_initialize() if needed 3723 // possible detection of false-positive race by the threadchecker ??? 3724 if (!TCR_4(__kmp_init_parallel)) 3725 __kmp_parallel_initialize(); 3726 3727 __kmp_resume_if_soft_paused(); 3728 3729 // check correctness of reduce block nesting 3730 #if KMP_USE_DYNAMIC_LOCK 3731 if (__kmp_env_consistency_check) 3732 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3733 #else 3734 if (__kmp_env_consistency_check) 3735 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3736 #endif 3737 3738 th = __kmp_thread_from_gtid(global_tid); 3739 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3740 3741 packed_reduction_method = __kmp_determine_reduction_method( 3742 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3743 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3744 3745 OMPT_REDUCTION_DECL(th, global_tid); 3746 3747 if (packed_reduction_method == critical_reduce_block) { 3748 3749 OMPT_REDUCTION_BEGIN; 3750 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3751 retval = 1; 3752 3753 } else if (packed_reduction_method == empty_reduce_block) { 3754 3755 OMPT_REDUCTION_BEGIN; 3756 // usage: if team size == 1, no synchronization is required ( Intel 3757 // platforms only ) 3758 retval = 1; 3759 3760 } else if (packed_reduction_method == atomic_reduce_block) { 3761 3762 retval = 2; 3763 3764 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3765 tree_reduce_block)) { 3766 3767 // case tree_reduce_block: 3768 // this barrier should be visible to a customer and to the threading profile 3769 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3770 #if OMPT_SUPPORT 3771 ompt_frame_t *ompt_frame; 3772 if (ompt_enabled.enabled) { 3773 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3774 if (ompt_frame->enter_frame.ptr == NULL) 3775 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3776 } 3777 OMPT_STORE_RETURN_ADDRESS(global_tid); 3778 #endif 3779 #if USE_ITT_NOTIFY 3780 __kmp_threads[global_tid]->th.th_ident = 3781 loc; // needed for correct notification of frames 3782 #endif 3783 retval = 3784 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3785 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3786 retval = (retval != 0) ? (0) : (1); 3787 #if OMPT_SUPPORT && OMPT_OPTIONAL 3788 if (ompt_enabled.enabled) { 3789 ompt_frame->enter_frame = ompt_data_none; 3790 } 3791 #endif 3792 3793 // all other workers except primary thread should do this pop here 3794 // (none of other workers except primary will enter __kmpc_end_reduce()) 3795 if (__kmp_env_consistency_check) { 3796 if (retval == 0) { // 0: all other workers; 1: primary thread 3797 __kmp_pop_sync(global_tid, ct_reduce, loc); 3798 } 3799 } 3800 3801 } else { 3802 3803 // should never reach this block 3804 KMP_ASSERT(0); // "unexpected method" 3805 } 3806 if (teams_swapped) { 3807 __kmp_restore_swapped_teams(th, team, task_state); 3808 } 3809 3810 KA_TRACE(10, 3811 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3812 global_tid, packed_reduction_method, retval)); 3813 return retval; 3814 } 3815 3816 /*! 3817 @ingroup SYNCHRONIZATION 3818 @param loc source location information 3819 @param global_tid global thread id. 3820 @param lck pointer to the unique lock data structure 3821 3822 Finish the execution of a blocking reduce. 3823 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3824 start function. 3825 */ 3826 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3827 kmp_critical_name *lck) { 3828 3829 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3830 kmp_info_t *th; 3831 kmp_team_t *team; 3832 int teams_swapped = 0, task_state; 3833 3834 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3835 __kmp_assert_valid_gtid(global_tid); 3836 3837 th = __kmp_thread_from_gtid(global_tid); 3838 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3839 3840 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3841 3842 // this barrier should be visible to a customer and to the threading profile 3843 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3844 OMPT_REDUCTION_DECL(th, global_tid); 3845 3846 if (packed_reduction_method == critical_reduce_block) { 3847 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3848 3849 OMPT_REDUCTION_END; 3850 3851 // TODO: implicit barrier: should be exposed 3852 #if OMPT_SUPPORT 3853 ompt_frame_t *ompt_frame; 3854 if (ompt_enabled.enabled) { 3855 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3856 if (ompt_frame->enter_frame.ptr == NULL) 3857 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3858 } 3859 OMPT_STORE_RETURN_ADDRESS(global_tid); 3860 #endif 3861 #if USE_ITT_NOTIFY 3862 __kmp_threads[global_tid]->th.th_ident = loc; 3863 #endif 3864 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3865 #if OMPT_SUPPORT && OMPT_OPTIONAL 3866 if (ompt_enabled.enabled) { 3867 ompt_frame->enter_frame = ompt_data_none; 3868 } 3869 #endif 3870 3871 } else if (packed_reduction_method == empty_reduce_block) { 3872 3873 OMPT_REDUCTION_END; 3874 3875 // usage: if team size==1, no synchronization is required (Intel platforms only) 3876 3877 // TODO: implicit barrier: should be exposed 3878 #if OMPT_SUPPORT 3879 ompt_frame_t *ompt_frame; 3880 if (ompt_enabled.enabled) { 3881 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3882 if (ompt_frame->enter_frame.ptr == NULL) 3883 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3884 } 3885 OMPT_STORE_RETURN_ADDRESS(global_tid); 3886 #endif 3887 #if USE_ITT_NOTIFY 3888 __kmp_threads[global_tid]->th.th_ident = loc; 3889 #endif 3890 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3891 #if OMPT_SUPPORT && OMPT_OPTIONAL 3892 if (ompt_enabled.enabled) { 3893 ompt_frame->enter_frame = ompt_data_none; 3894 } 3895 #endif 3896 3897 } else if (packed_reduction_method == atomic_reduce_block) { 3898 3899 #if OMPT_SUPPORT 3900 ompt_frame_t *ompt_frame; 3901 if (ompt_enabled.enabled) { 3902 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3903 if (ompt_frame->enter_frame.ptr == NULL) 3904 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3905 } 3906 OMPT_STORE_RETURN_ADDRESS(global_tid); 3907 #endif 3908 // TODO: implicit barrier: should be exposed 3909 #if USE_ITT_NOTIFY 3910 __kmp_threads[global_tid]->th.th_ident = loc; 3911 #endif 3912 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3913 #if OMPT_SUPPORT && OMPT_OPTIONAL 3914 if (ompt_enabled.enabled) { 3915 ompt_frame->enter_frame = ompt_data_none; 3916 } 3917 #endif 3918 3919 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3920 tree_reduce_block)) { 3921 3922 // only primary thread executes here (primary releases all other workers) 3923 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3924 global_tid); 3925 3926 } else { 3927 3928 // should never reach this block 3929 KMP_ASSERT(0); // "unexpected method" 3930 } 3931 if (teams_swapped) { 3932 __kmp_restore_swapped_teams(th, team, task_state); 3933 } 3934 3935 if (__kmp_env_consistency_check) 3936 __kmp_pop_sync(global_tid, ct_reduce, loc); 3937 3938 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3939 global_tid, packed_reduction_method)); 3940 3941 return; 3942 } 3943 3944 #undef __KMP_GET_REDUCTION_METHOD 3945 #undef __KMP_SET_REDUCTION_METHOD 3946 3947 /* end of interface to fast scalable reduce routines */ 3948 3949 kmp_uint64 __kmpc_get_taskid() { 3950 3951 kmp_int32 gtid; 3952 kmp_info_t *thread; 3953 3954 gtid = __kmp_get_gtid(); 3955 if (gtid < 0) { 3956 return 0; 3957 } 3958 thread = __kmp_thread_from_gtid(gtid); 3959 return thread->th.th_current_task->td_task_id; 3960 3961 } // __kmpc_get_taskid 3962 3963 kmp_uint64 __kmpc_get_parent_taskid() { 3964 3965 kmp_int32 gtid; 3966 kmp_info_t *thread; 3967 kmp_taskdata_t *parent_task; 3968 3969 gtid = __kmp_get_gtid(); 3970 if (gtid < 0) { 3971 return 0; 3972 } 3973 thread = __kmp_thread_from_gtid(gtid); 3974 parent_task = thread->th.th_current_task->td_parent; 3975 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3976 3977 } // __kmpc_get_parent_taskid 3978 3979 /*! 3980 @ingroup WORK_SHARING 3981 @param loc source location information. 3982 @param gtid global thread number. 3983 @param num_dims number of associated doacross loops. 3984 @param dims info on loops bounds. 3985 3986 Initialize doacross loop information. 3987 Expect compiler send us inclusive bounds, 3988 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3989 */ 3990 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3991 const struct kmp_dim *dims) { 3992 __kmp_assert_valid_gtid(gtid); 3993 int j, idx; 3994 kmp_int64 last, trace_count; 3995 kmp_info_t *th = __kmp_threads[gtid]; 3996 kmp_team_t *team = th->th.th_team; 3997 kmp_uint32 *flags; 3998 kmp_disp_t *pr_buf = th->th.th_dispatch; 3999 dispatch_shared_info_t *sh_buf; 4000 4001 KA_TRACE( 4002 20, 4003 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 4004 gtid, num_dims, !team->t.t_serialized)); 4005 KMP_DEBUG_ASSERT(dims != NULL); 4006 KMP_DEBUG_ASSERT(num_dims > 0); 4007 4008 if (team->t.t_serialized) { 4009 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 4010 return; // no dependencies if team is serialized 4011 } 4012 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 4013 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 4014 // the next loop 4015 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4016 4017 // Save bounds info into allocated private buffer 4018 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 4019 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 4020 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 4021 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4022 pr_buf->th_doacross_info[0] = 4023 (kmp_int64)num_dims; // first element is number of dimensions 4024 // Save also address of num_done in order to access it later without knowing 4025 // the buffer index 4026 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 4027 pr_buf->th_doacross_info[2] = dims[0].lo; 4028 pr_buf->th_doacross_info[3] = dims[0].up; 4029 pr_buf->th_doacross_info[4] = dims[0].st; 4030 last = 5; 4031 for (j = 1; j < num_dims; ++j) { 4032 kmp_int64 4033 range_length; // To keep ranges of all dimensions but the first dims[0] 4034 if (dims[j].st == 1) { // most common case 4035 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 4036 range_length = dims[j].up - dims[j].lo + 1; 4037 } else { 4038 if (dims[j].st > 0) { 4039 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 4040 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 4041 } else { // negative increment 4042 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 4043 range_length = 4044 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 4045 } 4046 } 4047 pr_buf->th_doacross_info[last++] = range_length; 4048 pr_buf->th_doacross_info[last++] = dims[j].lo; 4049 pr_buf->th_doacross_info[last++] = dims[j].up; 4050 pr_buf->th_doacross_info[last++] = dims[j].st; 4051 } 4052 4053 // Compute total trip count. 4054 // Start with range of dims[0] which we don't need to keep in the buffer. 4055 if (dims[0].st == 1) { // most common case 4056 trace_count = dims[0].up - dims[0].lo + 1; 4057 } else if (dims[0].st > 0) { 4058 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 4059 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 4060 } else { // negative increment 4061 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 4062 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 4063 } 4064 for (j = 1; j < num_dims; ++j) { 4065 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 4066 } 4067 KMP_DEBUG_ASSERT(trace_count > 0); 4068 4069 // Check if shared buffer is not occupied by other loop (idx - 4070 // __kmp_dispatch_num_buffers) 4071 if (idx != sh_buf->doacross_buf_idx) { 4072 // Shared buffer is occupied, wait for it to be free 4073 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 4074 __kmp_eq_4, NULL); 4075 } 4076 #if KMP_32_BIT_ARCH 4077 // Check if we are the first thread. After the CAS the first thread gets 0, 4078 // others get 1 if initialization is in progress, allocated pointer otherwise. 4079 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 4080 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 4081 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 4082 #else 4083 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 4084 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 4085 #endif 4086 if (flags == NULL) { 4087 // we are the first thread, allocate the array of flags 4088 size_t size = 4089 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration 4090 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 4091 KMP_MB(); 4092 sh_buf->doacross_flags = flags; 4093 } else if (flags == (kmp_uint32 *)1) { 4094 #if KMP_32_BIT_ARCH 4095 // initialization is still in progress, need to wait 4096 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 4097 #else 4098 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 4099 #endif 4100 KMP_YIELD(TRUE); 4101 KMP_MB(); 4102 } else { 4103 KMP_MB(); 4104 } 4105 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4106 pr_buf->th_doacross_flags = 4107 sh_buf->doacross_flags; // save private copy in order to not 4108 // touch shared buffer on each iteration 4109 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4110 } 4111 4112 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4113 __kmp_assert_valid_gtid(gtid); 4114 kmp_int64 shft; 4115 size_t num_dims, i; 4116 kmp_uint32 flag; 4117 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4118 kmp_info_t *th = __kmp_threads[gtid]; 4119 kmp_team_t *team = th->th.th_team; 4120 kmp_disp_t *pr_buf; 4121 kmp_int64 lo, up, st; 4122 4123 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4124 if (team->t.t_serialized) { 4125 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4126 return; // no dependencies if team is serialized 4127 } 4128 4129 // calculate sequential iteration number and check out-of-bounds condition 4130 pr_buf = th->th.th_dispatch; 4131 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4132 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4133 lo = pr_buf->th_doacross_info[2]; 4134 up = pr_buf->th_doacross_info[3]; 4135 st = pr_buf->th_doacross_info[4]; 4136 #if OMPT_SUPPORT && OMPT_OPTIONAL 4137 ompt_dependence_t deps[num_dims]; 4138 #endif 4139 if (st == 1) { // most common case 4140 if (vec[0] < lo || vec[0] > up) { 4141 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4142 "bounds [%lld,%lld]\n", 4143 gtid, vec[0], lo, up)); 4144 return; 4145 } 4146 iter_number = vec[0] - lo; 4147 } else if (st > 0) { 4148 if (vec[0] < lo || vec[0] > up) { 4149 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4150 "bounds [%lld,%lld]\n", 4151 gtid, vec[0], lo, up)); 4152 return; 4153 } 4154 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4155 } else { // negative increment 4156 if (vec[0] > lo || vec[0] < up) { 4157 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4158 "bounds [%lld,%lld]\n", 4159 gtid, vec[0], lo, up)); 4160 return; 4161 } 4162 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4163 } 4164 #if OMPT_SUPPORT && OMPT_OPTIONAL 4165 deps[0].variable.value = iter_number; 4166 deps[0].dependence_type = ompt_dependence_type_sink; 4167 #endif 4168 for (i = 1; i < num_dims; ++i) { 4169 kmp_int64 iter, ln; 4170 size_t j = i * 4; 4171 ln = pr_buf->th_doacross_info[j + 1]; 4172 lo = pr_buf->th_doacross_info[j + 2]; 4173 up = pr_buf->th_doacross_info[j + 3]; 4174 st = pr_buf->th_doacross_info[j + 4]; 4175 if (st == 1) { 4176 if (vec[i] < lo || vec[i] > up) { 4177 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4178 "bounds [%lld,%lld]\n", 4179 gtid, vec[i], lo, up)); 4180 return; 4181 } 4182 iter = vec[i] - lo; 4183 } else if (st > 0) { 4184 if (vec[i] < lo || vec[i] > up) { 4185 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4186 "bounds [%lld,%lld]\n", 4187 gtid, vec[i], lo, up)); 4188 return; 4189 } 4190 iter = (kmp_uint64)(vec[i] - lo) / st; 4191 } else { // st < 0 4192 if (vec[i] > lo || vec[i] < up) { 4193 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4194 "bounds [%lld,%lld]\n", 4195 gtid, vec[i], lo, up)); 4196 return; 4197 } 4198 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4199 } 4200 iter_number = iter + ln * iter_number; 4201 #if OMPT_SUPPORT && OMPT_OPTIONAL 4202 deps[i].variable.value = iter; 4203 deps[i].dependence_type = ompt_dependence_type_sink; 4204 #endif 4205 } 4206 shft = iter_number % 32; // use 32-bit granularity 4207 iter_number >>= 5; // divided by 32 4208 flag = 1 << shft; 4209 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4210 KMP_YIELD(TRUE); 4211 } 4212 KMP_MB(); 4213 #if OMPT_SUPPORT && OMPT_OPTIONAL 4214 if (ompt_enabled.ompt_callback_dependences) { 4215 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4216 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4217 } 4218 #endif 4219 KA_TRACE(20, 4220 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4221 gtid, (iter_number << 5) + shft)); 4222 } 4223 4224 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4225 __kmp_assert_valid_gtid(gtid); 4226 kmp_int64 shft; 4227 size_t num_dims, i; 4228 kmp_uint32 flag; 4229 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4230 kmp_info_t *th = __kmp_threads[gtid]; 4231 kmp_team_t *team = th->th.th_team; 4232 kmp_disp_t *pr_buf; 4233 kmp_int64 lo, st; 4234 4235 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4236 if (team->t.t_serialized) { 4237 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4238 return; // no dependencies if team is serialized 4239 } 4240 4241 // calculate sequential iteration number (same as in "wait" but no 4242 // out-of-bounds checks) 4243 pr_buf = th->th.th_dispatch; 4244 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4245 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4246 lo = pr_buf->th_doacross_info[2]; 4247 st = pr_buf->th_doacross_info[4]; 4248 #if OMPT_SUPPORT && OMPT_OPTIONAL 4249 ompt_dependence_t deps[num_dims]; 4250 #endif 4251 if (st == 1) { // most common case 4252 iter_number = vec[0] - lo; 4253 } else if (st > 0) { 4254 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4255 } else { // negative increment 4256 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4257 } 4258 #if OMPT_SUPPORT && OMPT_OPTIONAL 4259 deps[0].variable.value = iter_number; 4260 deps[0].dependence_type = ompt_dependence_type_source; 4261 #endif 4262 for (i = 1; i < num_dims; ++i) { 4263 kmp_int64 iter, ln; 4264 size_t j = i * 4; 4265 ln = pr_buf->th_doacross_info[j + 1]; 4266 lo = pr_buf->th_doacross_info[j + 2]; 4267 st = pr_buf->th_doacross_info[j + 4]; 4268 if (st == 1) { 4269 iter = vec[i] - lo; 4270 } else if (st > 0) { 4271 iter = (kmp_uint64)(vec[i] - lo) / st; 4272 } else { // st < 0 4273 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4274 } 4275 iter_number = iter + ln * iter_number; 4276 #if OMPT_SUPPORT && OMPT_OPTIONAL 4277 deps[i].variable.value = iter; 4278 deps[i].dependence_type = ompt_dependence_type_source; 4279 #endif 4280 } 4281 #if OMPT_SUPPORT && OMPT_OPTIONAL 4282 if (ompt_enabled.ompt_callback_dependences) { 4283 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4284 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4285 } 4286 #endif 4287 shft = iter_number % 32; // use 32-bit granularity 4288 iter_number >>= 5; // divided by 32 4289 flag = 1 << shft; 4290 KMP_MB(); 4291 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4292 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4293 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4294 (iter_number << 5) + shft)); 4295 } 4296 4297 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4298 __kmp_assert_valid_gtid(gtid); 4299 kmp_int32 num_done; 4300 kmp_info_t *th = __kmp_threads[gtid]; 4301 kmp_team_t *team = th->th.th_team; 4302 kmp_disp_t *pr_buf = th->th.th_dispatch; 4303 4304 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4305 if (team->t.t_serialized) { 4306 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4307 return; // nothing to do 4308 } 4309 num_done = 4310 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1; 4311 if (num_done == th->th.th_team_nproc) { 4312 // we are the last thread, need to free shared resources 4313 int idx = pr_buf->th_doacross_buf_idx - 1; 4314 dispatch_shared_info_t *sh_buf = 4315 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4316 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4317 (kmp_int64)&sh_buf->doacross_num_done); 4318 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4319 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4320 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4321 sh_buf->doacross_flags = NULL; 4322 sh_buf->doacross_num_done = 0; 4323 sh_buf->doacross_buf_idx += 4324 __kmp_dispatch_num_buffers; // free buffer for future re-use 4325 } 4326 // free private resources (need to keep buffer index forever) 4327 pr_buf->th_doacross_flags = NULL; 4328 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4329 pr_buf->th_doacross_info = NULL; 4330 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4331 } 4332 4333 /* omp_alloc/omp_calloc/omp_free only defined for C/C++, not for Fortran */ 4334 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4335 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); 4336 } 4337 4338 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { 4339 return __kmpc_calloc(__kmp_entry_gtid(), nmemb, size, allocator); 4340 } 4341 4342 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, 4343 omp_allocator_handle_t free_allocator) { 4344 return __kmpc_realloc(__kmp_entry_gtid(), ptr, size, allocator, 4345 free_allocator); 4346 } 4347 4348 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4349 __kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4350 } 4351 4352 int __kmpc_get_target_offload(void) { 4353 if (!__kmp_init_serial) { 4354 __kmp_serial_initialize(); 4355 } 4356 return __kmp_target_offload; 4357 } 4358 4359 int __kmpc_pause_resource(kmp_pause_status_t level) { 4360 if (!__kmp_init_serial) { 4361 return 1; // Can't pause if runtime is not initialized 4362 } 4363 return __kmp_pause_resource(level); 4364 } 4365 4366 void __kmpc_error(ident_t *loc, int severity, const char *message) { 4367 if (!__kmp_init_serial) 4368 __kmp_serial_initialize(); 4369 4370 KMP_ASSERT(severity == severity_warning || severity == severity_fatal); 4371 4372 #if OMPT_SUPPORT 4373 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) { 4374 ompt_callbacks.ompt_callback(ompt_callback_error)( 4375 (ompt_severity_t)severity, message, KMP_STRLEN(message), 4376 OMPT_GET_RETURN_ADDRESS(0)); 4377 } 4378 #endif // OMPT_SUPPORT 4379 4380 char *src_loc; 4381 if (loc && loc->psource) { 4382 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false); 4383 src_loc = 4384 __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col); 4385 __kmp_str_loc_free(&str_loc); 4386 } else { 4387 src_loc = __kmp_str_format("unknown"); 4388 } 4389 4390 if (severity == severity_warning) 4391 KMP_WARNING(UserDirectedWarning, src_loc, message); 4392 else 4393 KMP_FATAL(UserDirectedError, src_loc, message); 4394 4395 __kmp_str_free(&src_loc); 4396 } 4397 4398 #ifdef KMP_USE_VERSION_SYMBOLS 4399 // For GOMP compatibility there are two versions of each omp_* API. 4400 // One is the plain C symbol and one is the Fortran symbol with an appended 4401 // underscore. When we implement a specific ompc_* version of an omp_* 4402 // function, we want the plain GOMP versioned symbol to alias the ompc_* version 4403 // instead of the Fortran versions in kmp_ftn_entry.h 4404 extern "C" { 4405 // Have to undef these from omp.h so they aren't translated into 4406 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below 4407 #ifdef omp_set_affinity_format 4408 #undef omp_set_affinity_format 4409 #endif 4410 #ifdef omp_get_affinity_format 4411 #undef omp_get_affinity_format 4412 #endif 4413 #ifdef omp_display_affinity 4414 #undef omp_display_affinity 4415 #endif 4416 #ifdef omp_capture_affinity 4417 #undef omp_capture_affinity 4418 #endif 4419 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50, 4420 "OMP_5.0"); 4421 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50, 4422 "OMP_5.0"); 4423 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50, 4424 "OMP_5.0"); 4425 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50, 4426 "OMP_5.0"); 4427 } // extern "C" 4428 #endif 4429