1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 __kmp_assign_root_init_mask(); 43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 44 } else if (__kmp_ignore_mppbeg() == FALSE) { 45 // By default __kmp_ignore_mppbeg() returns TRUE. 46 __kmp_internal_begin(); 47 KC_TRACE(10, ("__kmpc_begin: called\n")); 48 } 49 } 50 51 /*! 52 * @ingroup STARTUP_SHUTDOWN 53 * @param loc source location information 54 * 55 * Shutdown the runtime library. This is also optional, and even if called will 56 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 57 * zero. 58 */ 59 void __kmpc_end(ident_t *loc) { 60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 63 // returns FALSE and __kmpc_end() will unregister this root (it can cause 64 // library shut down). 65 if (__kmp_ignore_mppend() == FALSE) { 66 KC_TRACE(10, ("__kmpc_end: called\n")); 67 KA_TRACE(30, ("__kmpc_end\n")); 68 69 __kmp_internal_end_thread(-1); 70 } 71 #if KMP_OS_WINDOWS && OMPT_SUPPORT 72 // Normal exit process on Windows does not allow worker threads of the final 73 // parallel region to finish reporting their events, so shutting down the 74 // library here fixes the issue at least for the cases where __kmpc_end() is 75 // placed properly. 76 if (ompt_enabled.enabled) 77 __kmp_internal_end_library(__kmp_gtid_get_specific()); 78 #endif 79 } 80 81 /*! 82 @ingroup THREAD_STATES 83 @param loc Source location information. 84 @return The global thread index of the active thread. 85 86 This function can be called in any context. 87 88 If the runtime has ony been entered at the outermost level from a 89 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 90 that which would be returned by omp_get_thread_num() in the outermost 91 active parallel construct. (Or zero if there is no active parallel 92 construct, since the primary thread is necessarily thread zero). 93 94 If multiple non-OpenMP threads all enter an OpenMP construct then this 95 will be a unique thread identifier among all the threads created by 96 the OpenMP runtime (but the value cannot be defined in terms of 97 OpenMP thread ids returned by omp_get_thread_num()). 98 */ 99 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 100 kmp_int32 gtid = __kmp_entry_gtid(); 101 102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 103 104 return gtid; 105 } 106 107 /*! 108 @ingroup THREAD_STATES 109 @param loc Source location information. 110 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 111 112 This function can be called in any context. 113 It returns the total number of threads under the control of the OpenMP runtime. 114 That is not a number that can be determined by any OpenMP standard calls, since 115 the library may be called from more than one non-OpenMP thread, and this 116 reflects the total over all such calls. Similarly the runtime maintains 117 underlying threads even when they are not active (since the cost of creating 118 and destroying OS threads is high), this call counts all such threads even if 119 they are not waiting for work. 120 */ 121 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 122 KC_TRACE(10, 123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 124 125 return TCR_4(__kmp_all_nth); 126 } 127 128 /*! 129 @ingroup THREAD_STATES 130 @param loc Source location information. 131 @return The thread number of the calling thread in the innermost active parallel 132 construct. 133 */ 134 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 136 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 137 } 138 139 /*! 140 @ingroup THREAD_STATES 141 @param loc Source location information. 142 @return The number of threads in the innermost active parallel construct. 143 */ 144 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 146 147 return __kmp_entry_thread()->th.th_team->t.t_nproc; 148 } 149 150 /*! 151 * @ingroup DEPRECATED 152 * @param loc location description 153 * 154 * This function need not be called. It always returns TRUE. 155 */ 156 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 157 #ifndef KMP_DEBUG 158 159 return TRUE; 160 161 #else 162 163 const char *semi2; 164 const char *semi3; 165 int line_no; 166 167 if (__kmp_par_range == 0) { 168 return TRUE; 169 } 170 semi2 = loc->psource; 171 if (semi2 == NULL) { 172 return TRUE; 173 } 174 semi2 = strchr(semi2, ';'); 175 if (semi2 == NULL) { 176 return TRUE; 177 } 178 semi2 = strchr(semi2 + 1, ';'); 179 if (semi2 == NULL) { 180 return TRUE; 181 } 182 if (__kmp_par_range_filename[0]) { 183 const char *name = semi2 - 1; 184 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 185 name--; 186 } 187 if ((*name == '/') || (*name == ';')) { 188 name++; 189 } 190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 191 return __kmp_par_range < 0; 192 } 193 } 194 semi3 = strchr(semi2 + 1, ';'); 195 if (__kmp_par_range_routine[0]) { 196 if ((semi3 != NULL) && (semi3 > semi2) && 197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 198 return __kmp_par_range < 0; 199 } 200 } 201 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 203 return __kmp_par_range > 0; 204 } 205 return __kmp_par_range < 0; 206 } 207 return TRUE; 208 209 #endif /* KMP_DEBUG */ 210 } 211 212 /*! 213 @ingroup THREAD_STATES 214 @param loc Source location information. 215 @return 1 if this thread is executing inside an active parallel region, zero if 216 not. 217 */ 218 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 219 return __kmp_entry_thread()->th.th_root->r.r_active; 220 } 221 222 /*! 223 @ingroup PARALLEL 224 @param loc source location information 225 @param global_tid global thread number 226 @param num_threads number of threads requested for this parallel construct 227 228 Set the number of threads to be used by the next fork spawned by this thread. 229 This call is only required if the parallel construct has a `num_threads` clause. 230 */ 231 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 232 kmp_int32 num_threads) { 233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 234 global_tid, num_threads)); 235 __kmp_assert_valid_gtid(global_tid); 236 __kmp_push_num_threads(loc, global_tid, num_threads); 237 } 238 239 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 241 /* the num_threads are automatically popped */ 242 } 243 244 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 245 kmp_int32 proc_bind) { 246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 247 proc_bind)); 248 __kmp_assert_valid_gtid(global_tid); 249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 250 } 251 252 /*! 253 @ingroup PARALLEL 254 @param loc source location information 255 @param argc total number of arguments in the ellipsis 256 @param microtask pointer to callback routine consisting of outlined parallel 257 construct 258 @param ... pointers to shared variables that aren't global 259 260 Do the actual fork and call the microtask in the relevant number of threads. 261 */ 262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 263 int gtid = __kmp_entry_gtid(); 264 265 #if (KMP_STATS_ENABLED) 266 // If we were in a serial region, then stop the serial timer, record 267 // the event, and start parallel region timer 268 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 269 if (previous_state == stats_state_e::SERIAL_REGION) { 270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 271 } else { 272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 273 } 274 int inParallel = __kmpc_in_parallel(loc); 275 if (inParallel) { 276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 277 } else { 278 KMP_COUNT_BLOCK(OMP_PARALLEL); 279 } 280 #endif 281 282 // maybe to save thr_state is enough here 283 { 284 va_list ap; 285 va_start(ap, microtask); 286 287 #if OMPT_SUPPORT 288 ompt_frame_t *ompt_frame; 289 if (ompt_enabled.enabled) { 290 kmp_info_t *master_th = __kmp_threads[gtid]; 291 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame; 292 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 293 } 294 OMPT_STORE_RETURN_ADDRESS(gtid); 295 #endif 296 297 #if INCLUDE_SSC_MARKS 298 SSC_MARK_FORKING(); 299 #endif 300 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 301 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 302 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 303 kmp_va_addr_of(ap)); 304 #if INCLUDE_SSC_MARKS 305 SSC_MARK_JOINING(); 306 #endif 307 __kmp_join_call(loc, gtid 308 #if OMPT_SUPPORT 309 , 310 fork_context_intel 311 #endif 312 ); 313 314 va_end(ap); 315 316 #if OMPT_SUPPORT 317 if (ompt_enabled.enabled) { 318 ompt_frame->enter_frame = ompt_data_none; 319 } 320 #endif 321 } 322 323 #if KMP_STATS_ENABLED 324 if (previous_state == stats_state_e::SERIAL_REGION) { 325 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 326 KMP_SET_THREAD_STATE(previous_state); 327 } else { 328 KMP_POP_PARTITIONED_TIMER(); 329 } 330 #endif // KMP_STATS_ENABLED 331 } 332 333 /*! 334 @ingroup PARALLEL 335 @param loc source location information 336 @param global_tid global thread number 337 @param num_teams number of teams requested for the teams construct 338 @param num_threads number of threads per team requested for the teams construct 339 340 Set the number of teams to be used by the teams construct. 341 This call is only required if the teams construct has a `num_teams` clause 342 or a `thread_limit` clause (or both). 343 */ 344 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 345 kmp_int32 num_teams, kmp_int32 num_threads) { 346 KA_TRACE(20, 347 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 348 global_tid, num_teams, num_threads)); 349 __kmp_assert_valid_gtid(global_tid); 350 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 351 } 352 353 /*! 354 @ingroup PARALLEL 355 @param loc source location information 356 @param global_tid global thread number 357 @param num_teams_lb lower bound on number of teams requested for the teams 358 construct 359 @param num_teams_ub upper bound on number of teams requested for the teams 360 construct 361 @param num_threads number of threads per team requested for the teams construct 362 363 Set the number of teams to be used by the teams construct. The number of initial 364 teams cretaed will be greater than or equal to the lower bound and less than or 365 equal to the upper bound. 366 This call is only required if the teams construct has a `num_teams` clause 367 or a `thread_limit` clause (or both). 368 */ 369 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, 370 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, 371 kmp_int32 num_threads) { 372 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d" 373 " num_teams_ub=%d num_threads=%d\n", 374 global_tid, num_teams_lb, num_teams_ub, num_threads)); 375 __kmp_assert_valid_gtid(global_tid); 376 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub, 377 num_threads); 378 } 379 380 /*! 381 @ingroup PARALLEL 382 @param loc source location information 383 @param argc total number of arguments in the ellipsis 384 @param microtask pointer to callback routine consisting of outlined teams 385 construct 386 @param ... pointers to shared variables that aren't global 387 388 Do the actual fork and call the microtask in the relevant number of threads. 389 */ 390 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 391 ...) { 392 int gtid = __kmp_entry_gtid(); 393 kmp_info_t *this_thr = __kmp_threads[gtid]; 394 va_list ap; 395 va_start(ap, microtask); 396 397 #if KMP_STATS_ENABLED 398 KMP_COUNT_BLOCK(OMP_TEAMS); 399 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 400 if (previous_state == stats_state_e::SERIAL_REGION) { 401 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 402 } else { 403 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 404 } 405 #endif 406 407 // remember teams entry point and nesting level 408 this_thr->th.th_teams_microtask = microtask; 409 this_thr->th.th_teams_level = 410 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 411 412 #if OMPT_SUPPORT 413 kmp_team_t *parent_team = this_thr->th.th_team; 414 int tid = __kmp_tid_from_gtid(gtid); 415 if (ompt_enabled.enabled) { 416 parent_team->t.t_implicit_task_taskdata[tid] 417 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 418 } 419 OMPT_STORE_RETURN_ADDRESS(gtid); 420 #endif 421 422 // check if __kmpc_push_num_teams called, set default number of teams 423 // otherwise 424 if (this_thr->th.th_teams_size.nteams == 0) { 425 __kmp_push_num_teams(loc, gtid, 0, 0); 426 } 427 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 428 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 429 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 430 431 __kmp_fork_call( 432 loc, gtid, fork_context_intel, argc, 433 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 434 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 435 __kmp_join_call(loc, gtid 436 #if OMPT_SUPPORT 437 , 438 fork_context_intel 439 #endif 440 ); 441 442 // Pop current CG root off list 443 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 444 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 445 this_thr->th.th_cg_roots = tmp->up; 446 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 447 " to node %p. cg_nthreads was %d\n", 448 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 449 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 450 int i = tmp->cg_nthreads--; 451 if (i == 1) { // check is we are the last thread in CG (not always the case) 452 __kmp_free(tmp); 453 } 454 // Restore current task's thread_limit from CG root 455 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 456 this_thr->th.th_current_task->td_icvs.thread_limit = 457 this_thr->th.th_cg_roots->cg_thread_limit; 458 459 this_thr->th.th_teams_microtask = NULL; 460 this_thr->th.th_teams_level = 0; 461 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 462 va_end(ap); 463 #if KMP_STATS_ENABLED 464 if (previous_state == stats_state_e::SERIAL_REGION) { 465 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 466 KMP_SET_THREAD_STATE(previous_state); 467 } else { 468 KMP_POP_PARTITIONED_TIMER(); 469 } 470 #endif // KMP_STATS_ENABLED 471 } 472 473 // I don't think this function should ever have been exported. 474 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 475 // openmp code ever called it, but it's been exported from the RTL for so 476 // long that I'm afraid to remove the definition. 477 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 478 479 /*! 480 @ingroup PARALLEL 481 @param loc source location information 482 @param global_tid global thread number 483 484 Enter a serialized parallel construct. This interface is used to handle a 485 conditional parallel region, like this, 486 @code 487 #pragma omp parallel if (condition) 488 @endcode 489 when the condition is false. 490 */ 491 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 492 // The implementation is now in kmp_runtime.cpp so that it can share static 493 // functions with kmp_fork_call since the tasks to be done are similar in 494 // each case. 495 __kmp_assert_valid_gtid(global_tid); 496 #if OMPT_SUPPORT 497 OMPT_STORE_RETURN_ADDRESS(global_tid); 498 #endif 499 __kmp_serialized_parallel(loc, global_tid); 500 } 501 502 /*! 503 @ingroup PARALLEL 504 @param loc source location information 505 @param global_tid global thread number 506 507 Leave a serialized parallel construct. 508 */ 509 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 510 kmp_internal_control_t *top; 511 kmp_info_t *this_thr; 512 kmp_team_t *serial_team; 513 514 KC_TRACE(10, 515 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 516 517 /* skip all this code for autopar serialized loops since it results in 518 unacceptable overhead */ 519 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 520 return; 521 522 // Not autopar code 523 __kmp_assert_valid_gtid(global_tid); 524 if (!TCR_4(__kmp_init_parallel)) 525 __kmp_parallel_initialize(); 526 527 __kmp_resume_if_soft_paused(); 528 529 this_thr = __kmp_threads[global_tid]; 530 serial_team = this_thr->th.th_serial_team; 531 532 kmp_task_team_t *task_team = this_thr->th.th_task_team; 533 // we need to wait for the proxy tasks before finishing the thread 534 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks || 535 task_team->tt.tt_hidden_helper_task_encountered)) 536 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 537 538 KMP_MB(); 539 KMP_DEBUG_ASSERT(serial_team); 540 KMP_ASSERT(serial_team->t.t_serialized); 541 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 542 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 543 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 544 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 545 546 #if OMPT_SUPPORT 547 if (ompt_enabled.enabled && 548 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 549 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 550 if (ompt_enabled.ompt_callback_implicit_task) { 551 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 552 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 553 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 554 } 555 556 // reset clear the task id only after unlinking the task 557 ompt_data_t *parent_task_data; 558 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 559 560 if (ompt_enabled.ompt_callback_parallel_end) { 561 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 562 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 563 ompt_parallel_invoker_program | ompt_parallel_team, 564 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 565 } 566 __ompt_lw_taskteam_unlink(this_thr); 567 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 568 } 569 #endif 570 571 /* If necessary, pop the internal control stack values and replace the team 572 * values */ 573 top = serial_team->t.t_control_stack_top; 574 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 575 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 576 serial_team->t.t_control_stack_top = top->next; 577 __kmp_free(top); 578 } 579 580 /* pop dispatch buffers stack */ 581 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 582 { 583 dispatch_private_info_t *disp_buffer = 584 serial_team->t.t_dispatch->th_disp_buffer; 585 serial_team->t.t_dispatch->th_disp_buffer = 586 serial_team->t.t_dispatch->th_disp_buffer->next; 587 __kmp_free(disp_buffer); 588 } 589 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 590 591 --serial_team->t.t_serialized; 592 if (serial_team->t.t_serialized == 0) { 593 594 /* return to the parallel section */ 595 596 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 597 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 598 __kmp_clear_x87_fpu_status_word(); 599 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 600 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 601 } 602 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 603 604 __kmp_pop_current_task_from_thread(this_thr); 605 #if OMPD_SUPPORT 606 if (ompd_state & OMPD_ENABLE_BP) 607 ompd_bp_parallel_end(); 608 #endif 609 610 this_thr->th.th_team = serial_team->t.t_parent; 611 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 612 613 /* restore values cached in the thread */ 614 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 615 this_thr->th.th_team_master = 616 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 617 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 618 619 /* TODO the below shouldn't need to be adjusted for serialized teams */ 620 this_thr->th.th_dispatch = 621 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 622 623 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 624 this_thr->th.th_current_task->td_flags.executing = 1; 625 626 if (__kmp_tasking_mode != tskm_immediate_exec) { 627 // Copy the task team from the new child / old parent team to the thread. 628 this_thr->th.th_task_team = 629 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 630 KA_TRACE(20, 631 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 632 "team %p\n", 633 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 634 } 635 #if KMP_AFFINITY_SUPPORTED 636 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affin_reset) { 637 __kmp_reset_root_init_mask(global_tid); 638 } 639 #endif 640 } else { 641 if (__kmp_tasking_mode != tskm_immediate_exec) { 642 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 643 "depth of serial team %p to %d\n", 644 global_tid, serial_team, serial_team->t.t_serialized)); 645 } 646 } 647 648 serial_team->t.t_level--; 649 if (__kmp_env_consistency_check) 650 __kmp_pop_parallel(global_tid, NULL); 651 #if OMPT_SUPPORT 652 if (ompt_enabled.enabled) 653 this_thr->th.ompt_thread_info.state = 654 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 655 : ompt_state_work_parallel); 656 #endif 657 } 658 659 /*! 660 @ingroup SYNCHRONIZATION 661 @param loc source location information. 662 663 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 664 depending on the memory ordering convention obeyed by the compiler 665 even that may not be necessary). 666 */ 667 void __kmpc_flush(ident_t *loc) { 668 KC_TRACE(10, ("__kmpc_flush: called\n")); 669 670 /* need explicit __mf() here since use volatile instead in library */ 671 KMP_MB(); /* Flush all pending memory write invalidates. */ 672 673 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 674 #if KMP_MIC 675 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 676 // We shouldn't need it, though, since the ABI rules require that 677 // * If the compiler generates NGO stores it also generates the fence 678 // * If users hand-code NGO stores they should insert the fence 679 // therefore no incomplete unordered stores should be visible. 680 #else 681 // C74404 682 // This is to address non-temporal store instructions (sfence needed). 683 // The clflush instruction is addressed either (mfence needed). 684 // Probably the non-temporal load monvtdqa instruction should also be 685 // addressed. 686 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 687 if (!__kmp_cpuinfo.initialized) { 688 __kmp_query_cpuid(&__kmp_cpuinfo); 689 } 690 if (!__kmp_cpuinfo.flags.sse2) { 691 // CPU cannot execute SSE2 instructions. 692 } else { 693 #if KMP_COMPILER_ICC || KMP_COMPILER_ICX 694 _mm_mfence(); 695 #elif KMP_COMPILER_MSVC 696 MemoryBarrier(); 697 #else 698 __sync_synchronize(); 699 #endif // KMP_COMPILER_ICC || KMP_COMPILER_ICX 700 } 701 #endif // KMP_MIC 702 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 703 KMP_ARCH_RISCV64) 704 // Nothing to see here move along 705 #elif KMP_ARCH_PPC64 706 // Nothing needed here (we have a real MB above). 707 #else 708 #error Unknown or unsupported architecture 709 #endif 710 711 #if OMPT_SUPPORT && OMPT_OPTIONAL 712 if (ompt_enabled.ompt_callback_flush) { 713 ompt_callbacks.ompt_callback(ompt_callback_flush)( 714 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 715 } 716 #endif 717 } 718 719 /* -------------------------------------------------------------------------- */ 720 /*! 721 @ingroup SYNCHRONIZATION 722 @param loc source location information 723 @param global_tid thread id. 724 725 Execute a barrier. 726 */ 727 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 728 KMP_COUNT_BLOCK(OMP_BARRIER); 729 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 730 __kmp_assert_valid_gtid(global_tid); 731 732 if (!TCR_4(__kmp_init_parallel)) 733 __kmp_parallel_initialize(); 734 735 __kmp_resume_if_soft_paused(); 736 737 if (__kmp_env_consistency_check) { 738 if (loc == 0) { 739 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 740 } 741 __kmp_check_barrier(global_tid, ct_barrier, loc); 742 } 743 744 #if OMPT_SUPPORT 745 ompt_frame_t *ompt_frame; 746 if (ompt_enabled.enabled) { 747 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 748 if (ompt_frame->enter_frame.ptr == NULL) 749 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 750 } 751 OMPT_STORE_RETURN_ADDRESS(global_tid); 752 #endif 753 __kmp_threads[global_tid]->th.th_ident = loc; 754 // TODO: explicit barrier_wait_id: 755 // this function is called when 'barrier' directive is present or 756 // implicit barrier at the end of a worksharing construct. 757 // 1) better to add a per-thread barrier counter to a thread data structure 758 // 2) set to 0 when a new team is created 759 // 4) no sync is required 760 761 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 762 #if OMPT_SUPPORT && OMPT_OPTIONAL 763 if (ompt_enabled.enabled) { 764 ompt_frame->enter_frame = ompt_data_none; 765 } 766 #endif 767 } 768 769 /* The BARRIER for a MASTER section is always explicit */ 770 /*! 771 @ingroup WORK_SHARING 772 @param loc source location information. 773 @param global_tid global thread number . 774 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 775 */ 776 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 777 int status = 0; 778 779 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 780 __kmp_assert_valid_gtid(global_tid); 781 782 if (!TCR_4(__kmp_init_parallel)) 783 __kmp_parallel_initialize(); 784 785 __kmp_resume_if_soft_paused(); 786 787 if (KMP_MASTER_GTID(global_tid)) { 788 KMP_COUNT_BLOCK(OMP_MASTER); 789 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 790 status = 1; 791 } 792 793 #if OMPT_SUPPORT && OMPT_OPTIONAL 794 if (status) { 795 if (ompt_enabled.ompt_callback_masked) { 796 kmp_info_t *this_thr = __kmp_threads[global_tid]; 797 kmp_team_t *team = this_thr->th.th_team; 798 799 int tid = __kmp_tid_from_gtid(global_tid); 800 ompt_callbacks.ompt_callback(ompt_callback_masked)( 801 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 802 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 803 OMPT_GET_RETURN_ADDRESS(0)); 804 } 805 } 806 #endif 807 808 if (__kmp_env_consistency_check) { 809 #if KMP_USE_DYNAMIC_LOCK 810 if (status) 811 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 812 else 813 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 814 #else 815 if (status) 816 __kmp_push_sync(global_tid, ct_master, loc, NULL); 817 else 818 __kmp_check_sync(global_tid, ct_master, loc, NULL); 819 #endif 820 } 821 822 return status; 823 } 824 825 /*! 826 @ingroup WORK_SHARING 827 @param loc source location information. 828 @param global_tid global thread number . 829 830 Mark the end of a <tt>master</tt> region. This should only be called by the 831 thread that executes the <tt>master</tt> region. 832 */ 833 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 834 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 835 __kmp_assert_valid_gtid(global_tid); 836 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 837 KMP_POP_PARTITIONED_TIMER(); 838 839 #if OMPT_SUPPORT && OMPT_OPTIONAL 840 kmp_info_t *this_thr = __kmp_threads[global_tid]; 841 kmp_team_t *team = this_thr->th.th_team; 842 if (ompt_enabled.ompt_callback_masked) { 843 int tid = __kmp_tid_from_gtid(global_tid); 844 ompt_callbacks.ompt_callback(ompt_callback_masked)( 845 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 846 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 847 OMPT_GET_RETURN_ADDRESS(0)); 848 } 849 #endif 850 851 if (__kmp_env_consistency_check) { 852 if (KMP_MASTER_GTID(global_tid)) 853 __kmp_pop_sync(global_tid, ct_master, loc); 854 } 855 } 856 857 /*! 858 @ingroup WORK_SHARING 859 @param loc source location information. 860 @param global_tid global thread number. 861 @param filter result of evaluating filter clause on thread global_tid, or zero 862 if no filter clause present 863 @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise. 864 */ 865 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) { 866 int status = 0; 867 int tid; 868 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid)); 869 __kmp_assert_valid_gtid(global_tid); 870 871 if (!TCR_4(__kmp_init_parallel)) 872 __kmp_parallel_initialize(); 873 874 __kmp_resume_if_soft_paused(); 875 876 tid = __kmp_tid_from_gtid(global_tid); 877 if (tid == filter) { 878 KMP_COUNT_BLOCK(OMP_MASKED); 879 KMP_PUSH_PARTITIONED_TIMER(OMP_masked); 880 status = 1; 881 } 882 883 #if OMPT_SUPPORT && OMPT_OPTIONAL 884 if (status) { 885 if (ompt_enabled.ompt_callback_masked) { 886 kmp_info_t *this_thr = __kmp_threads[global_tid]; 887 kmp_team_t *team = this_thr->th.th_team; 888 ompt_callbacks.ompt_callback(ompt_callback_masked)( 889 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 890 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 891 OMPT_GET_RETURN_ADDRESS(0)); 892 } 893 } 894 #endif 895 896 if (__kmp_env_consistency_check) { 897 #if KMP_USE_DYNAMIC_LOCK 898 if (status) 899 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0); 900 else 901 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0); 902 #else 903 if (status) 904 __kmp_push_sync(global_tid, ct_masked, loc, NULL); 905 else 906 __kmp_check_sync(global_tid, ct_masked, loc, NULL); 907 #endif 908 } 909 910 return status; 911 } 912 913 /*! 914 @ingroup WORK_SHARING 915 @param loc source location information. 916 @param global_tid global thread number . 917 918 Mark the end of a <tt>masked</tt> region. This should only be called by the 919 thread that executes the <tt>masked</tt> region. 920 */ 921 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) { 922 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid)); 923 __kmp_assert_valid_gtid(global_tid); 924 KMP_POP_PARTITIONED_TIMER(); 925 926 #if OMPT_SUPPORT && OMPT_OPTIONAL 927 kmp_info_t *this_thr = __kmp_threads[global_tid]; 928 kmp_team_t *team = this_thr->th.th_team; 929 if (ompt_enabled.ompt_callback_masked) { 930 int tid = __kmp_tid_from_gtid(global_tid); 931 ompt_callbacks.ompt_callback(ompt_callback_masked)( 932 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 933 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 934 OMPT_GET_RETURN_ADDRESS(0)); 935 } 936 #endif 937 938 if (__kmp_env_consistency_check) { 939 __kmp_pop_sync(global_tid, ct_masked, loc); 940 } 941 } 942 943 /*! 944 @ingroup WORK_SHARING 945 @param loc source location information. 946 @param gtid global thread number. 947 948 Start execution of an <tt>ordered</tt> construct. 949 */ 950 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 951 int cid = 0; 952 kmp_info_t *th; 953 KMP_DEBUG_ASSERT(__kmp_init_serial); 954 955 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 956 __kmp_assert_valid_gtid(gtid); 957 958 if (!TCR_4(__kmp_init_parallel)) 959 __kmp_parallel_initialize(); 960 961 __kmp_resume_if_soft_paused(); 962 963 #if USE_ITT_BUILD 964 __kmp_itt_ordered_prep(gtid); 965 // TODO: ordered_wait_id 966 #endif /* USE_ITT_BUILD */ 967 968 th = __kmp_threads[gtid]; 969 970 #if OMPT_SUPPORT && OMPT_OPTIONAL 971 kmp_team_t *team; 972 ompt_wait_id_t lck; 973 void *codeptr_ra; 974 OMPT_STORE_RETURN_ADDRESS(gtid); 975 if (ompt_enabled.enabled) { 976 team = __kmp_team_from_gtid(gtid); 977 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 978 /* OMPT state update */ 979 th->th.ompt_thread_info.wait_id = lck; 980 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 981 982 /* OMPT event callback */ 983 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 984 if (ompt_enabled.ompt_callback_mutex_acquire) { 985 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 986 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 987 codeptr_ra); 988 } 989 } 990 #endif 991 992 if (th->th.th_dispatch->th_deo_fcn != 0) 993 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 994 else 995 __kmp_parallel_deo(>id, &cid, loc); 996 997 #if OMPT_SUPPORT && OMPT_OPTIONAL 998 if (ompt_enabled.enabled) { 999 /* OMPT state update */ 1000 th->th.ompt_thread_info.state = ompt_state_work_parallel; 1001 th->th.ompt_thread_info.wait_id = 0; 1002 1003 /* OMPT event callback */ 1004 if (ompt_enabled.ompt_callback_mutex_acquired) { 1005 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1006 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1007 } 1008 } 1009 #endif 1010 1011 #if USE_ITT_BUILD 1012 __kmp_itt_ordered_start(gtid); 1013 #endif /* USE_ITT_BUILD */ 1014 } 1015 1016 /*! 1017 @ingroup WORK_SHARING 1018 @param loc source location information. 1019 @param gtid global thread number. 1020 1021 End execution of an <tt>ordered</tt> construct. 1022 */ 1023 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 1024 int cid = 0; 1025 kmp_info_t *th; 1026 1027 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 1028 __kmp_assert_valid_gtid(gtid); 1029 1030 #if USE_ITT_BUILD 1031 __kmp_itt_ordered_end(gtid); 1032 // TODO: ordered_wait_id 1033 #endif /* USE_ITT_BUILD */ 1034 1035 th = __kmp_threads[gtid]; 1036 1037 if (th->th.th_dispatch->th_dxo_fcn != 0) 1038 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 1039 else 1040 __kmp_parallel_dxo(>id, &cid, loc); 1041 1042 #if OMPT_SUPPORT && OMPT_OPTIONAL 1043 OMPT_STORE_RETURN_ADDRESS(gtid); 1044 if (ompt_enabled.ompt_callback_mutex_released) { 1045 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1046 ompt_mutex_ordered, 1047 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 1048 ->t.t_ordered.dt.t_value, 1049 OMPT_LOAD_RETURN_ADDRESS(gtid)); 1050 } 1051 #endif 1052 } 1053 1054 #if KMP_USE_DYNAMIC_LOCK 1055 1056 static __forceinline void 1057 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 1058 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 1059 // Pointer to the allocated indirect lock is written to crit, while indexing 1060 // is ignored. 1061 void *idx; 1062 kmp_indirect_lock_t **lck; 1063 lck = (kmp_indirect_lock_t **)crit; 1064 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 1065 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 1066 KMP_SET_I_LOCK_LOCATION(ilk, loc); 1067 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 1068 KA_TRACE(20, 1069 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 1070 #if USE_ITT_BUILD 1071 __kmp_itt_critical_creating(ilk->lock, loc); 1072 #endif 1073 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 1074 if (status == 0) { 1075 #if USE_ITT_BUILD 1076 __kmp_itt_critical_destroyed(ilk->lock); 1077 #endif 1078 // We don't really need to destroy the unclaimed lock here since it will be 1079 // cleaned up at program exit. 1080 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 1081 } 1082 KMP_DEBUG_ASSERT(*lck != NULL); 1083 } 1084 1085 // Fast-path acquire tas lock 1086 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 1087 { \ 1088 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1089 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1090 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1091 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1092 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1093 kmp_uint32 spins; \ 1094 KMP_FSYNC_PREPARE(l); \ 1095 KMP_INIT_YIELD(spins); \ 1096 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1097 do { \ 1098 if (TCR_4(__kmp_nth) > \ 1099 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1100 KMP_YIELD(TRUE); \ 1101 } else { \ 1102 KMP_YIELD_SPIN(spins); \ 1103 } \ 1104 __kmp_spin_backoff(&backoff); \ 1105 } while ( \ 1106 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1107 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1108 } \ 1109 KMP_FSYNC_ACQUIRED(l); \ 1110 } 1111 1112 // Fast-path test tas lock 1113 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1114 { \ 1115 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1116 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1117 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1118 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1119 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1120 } 1121 1122 // Fast-path release tas lock 1123 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1124 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1125 1126 #if KMP_USE_FUTEX 1127 1128 #include <sys/syscall.h> 1129 #include <unistd.h> 1130 #ifndef FUTEX_WAIT 1131 #define FUTEX_WAIT 0 1132 #endif 1133 #ifndef FUTEX_WAKE 1134 #define FUTEX_WAKE 1 1135 #endif 1136 1137 // Fast-path acquire futex lock 1138 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1139 { \ 1140 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1141 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1142 KMP_MB(); \ 1143 KMP_FSYNC_PREPARE(ftx); \ 1144 kmp_int32 poll_val; \ 1145 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1146 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1147 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1148 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1149 if (!cond) { \ 1150 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1151 poll_val | \ 1152 KMP_LOCK_BUSY(1, futex))) { \ 1153 continue; \ 1154 } \ 1155 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1156 } \ 1157 kmp_int32 rc; \ 1158 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1159 NULL, NULL, 0)) != 0) { \ 1160 continue; \ 1161 } \ 1162 gtid_code |= 1; \ 1163 } \ 1164 KMP_FSYNC_ACQUIRED(ftx); \ 1165 } 1166 1167 // Fast-path test futex lock 1168 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1169 { \ 1170 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1171 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1172 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1173 KMP_FSYNC_ACQUIRED(ftx); \ 1174 rc = TRUE; \ 1175 } else { \ 1176 rc = FALSE; \ 1177 } \ 1178 } 1179 1180 // Fast-path release futex lock 1181 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1182 { \ 1183 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1184 KMP_MB(); \ 1185 KMP_FSYNC_RELEASING(ftx); \ 1186 kmp_int32 poll_val = \ 1187 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1188 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1189 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1190 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1191 } \ 1192 KMP_MB(); \ 1193 KMP_YIELD_OVERSUB(); \ 1194 } 1195 1196 #endif // KMP_USE_FUTEX 1197 1198 #else // KMP_USE_DYNAMIC_LOCK 1199 1200 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1201 ident_t const *loc, 1202 kmp_int32 gtid) { 1203 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1204 1205 // Because of the double-check, the following load doesn't need to be volatile 1206 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1207 1208 if (lck == NULL) { 1209 void *idx; 1210 1211 // Allocate & initialize the lock. 1212 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1213 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1214 __kmp_init_user_lock_with_checks(lck); 1215 __kmp_set_user_lock_location(lck, loc); 1216 #if USE_ITT_BUILD 1217 __kmp_itt_critical_creating(lck); 1218 // __kmp_itt_critical_creating() should be called *before* the first usage 1219 // of underlying lock. It is the only place where we can guarantee it. There 1220 // are chances the lock will destroyed with no usage, but it is not a 1221 // problem, because this is not real event seen by user but rather setting 1222 // name for object (lock). See more details in kmp_itt.h. 1223 #endif /* USE_ITT_BUILD */ 1224 1225 // Use a cmpxchg instruction to slam the start of the critical section with 1226 // the lock pointer. If another thread beat us to it, deallocate the lock, 1227 // and use the lock that the other thread allocated. 1228 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1229 1230 if (status == 0) { 1231 // Deallocate the lock and reload the value. 1232 #if USE_ITT_BUILD 1233 __kmp_itt_critical_destroyed(lck); 1234 // Let ITT know the lock is destroyed and the same memory location may be reused 1235 // for another purpose. 1236 #endif /* USE_ITT_BUILD */ 1237 __kmp_destroy_user_lock_with_checks(lck); 1238 __kmp_user_lock_free(&idx, gtid, lck); 1239 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1240 KMP_DEBUG_ASSERT(lck != NULL); 1241 } 1242 } 1243 return lck; 1244 } 1245 1246 #endif // KMP_USE_DYNAMIC_LOCK 1247 1248 /*! 1249 @ingroup WORK_SHARING 1250 @param loc source location information. 1251 @param global_tid global thread number. 1252 @param crit identity of the critical section. This could be a pointer to a lock 1253 associated with the critical section, or some other suitably unique value. 1254 1255 Enter code protected by a `critical` construct. 1256 This function blocks until the executing thread can enter the critical section. 1257 */ 1258 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1259 kmp_critical_name *crit) { 1260 #if KMP_USE_DYNAMIC_LOCK 1261 #if OMPT_SUPPORT && OMPT_OPTIONAL 1262 OMPT_STORE_RETURN_ADDRESS(global_tid); 1263 #endif // OMPT_SUPPORT 1264 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1265 #else 1266 KMP_COUNT_BLOCK(OMP_CRITICAL); 1267 #if OMPT_SUPPORT && OMPT_OPTIONAL 1268 ompt_state_t prev_state = ompt_state_undefined; 1269 ompt_thread_info_t ti; 1270 #endif 1271 kmp_user_lock_p lck; 1272 1273 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1274 __kmp_assert_valid_gtid(global_tid); 1275 1276 // TODO: add THR_OVHD_STATE 1277 1278 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1279 KMP_CHECK_USER_LOCK_INIT(); 1280 1281 if ((__kmp_user_lock_kind == lk_tas) && 1282 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1283 lck = (kmp_user_lock_p)crit; 1284 } 1285 #if KMP_USE_FUTEX 1286 else if ((__kmp_user_lock_kind == lk_futex) && 1287 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1288 lck = (kmp_user_lock_p)crit; 1289 } 1290 #endif 1291 else { // ticket, queuing or drdpa 1292 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1293 } 1294 1295 if (__kmp_env_consistency_check) 1296 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1297 1298 // since the critical directive binds to all threads, not just the current 1299 // team we have to check this even if we are in a serialized team. 1300 // also, even if we are the uber thread, we still have to conduct the lock, 1301 // as we have to contend with sibling threads. 1302 1303 #if USE_ITT_BUILD 1304 __kmp_itt_critical_acquiring(lck); 1305 #endif /* USE_ITT_BUILD */ 1306 #if OMPT_SUPPORT && OMPT_OPTIONAL 1307 OMPT_STORE_RETURN_ADDRESS(gtid); 1308 void *codeptr_ra = NULL; 1309 if (ompt_enabled.enabled) { 1310 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1311 /* OMPT state update */ 1312 prev_state = ti.state; 1313 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1314 ti.state = ompt_state_wait_critical; 1315 1316 /* OMPT event callback */ 1317 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1318 if (ompt_enabled.ompt_callback_mutex_acquire) { 1319 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1320 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1321 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1322 } 1323 } 1324 #endif 1325 // Value of 'crit' should be good for using as a critical_id of the critical 1326 // section directive. 1327 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1328 1329 #if USE_ITT_BUILD 1330 __kmp_itt_critical_acquired(lck); 1331 #endif /* USE_ITT_BUILD */ 1332 #if OMPT_SUPPORT && OMPT_OPTIONAL 1333 if (ompt_enabled.enabled) { 1334 /* OMPT state update */ 1335 ti.state = prev_state; 1336 ti.wait_id = 0; 1337 1338 /* OMPT event callback */ 1339 if (ompt_enabled.ompt_callback_mutex_acquired) { 1340 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1341 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1342 } 1343 } 1344 #endif 1345 KMP_POP_PARTITIONED_TIMER(); 1346 1347 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1348 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1349 #endif // KMP_USE_DYNAMIC_LOCK 1350 } 1351 1352 #if KMP_USE_DYNAMIC_LOCK 1353 1354 // Converts the given hint to an internal lock implementation 1355 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1356 #if KMP_USE_TSX 1357 #define KMP_TSX_LOCK(seq) lockseq_##seq 1358 #else 1359 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1360 #endif 1361 1362 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1363 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm) 1364 #else 1365 #define KMP_CPUINFO_RTM 0 1366 #endif 1367 1368 // Hints that do not require further logic 1369 if (hint & kmp_lock_hint_hle) 1370 return KMP_TSX_LOCK(hle); 1371 if (hint & kmp_lock_hint_rtm) 1372 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq; 1373 if (hint & kmp_lock_hint_adaptive) 1374 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1375 1376 // Rule out conflicting hints first by returning the default lock 1377 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1378 return __kmp_user_lock_seq; 1379 if ((hint & omp_lock_hint_speculative) && 1380 (hint & omp_lock_hint_nonspeculative)) 1381 return __kmp_user_lock_seq; 1382 1383 // Do not even consider speculation when it appears to be contended 1384 if (hint & omp_lock_hint_contended) 1385 return lockseq_queuing; 1386 1387 // Uncontended lock without speculation 1388 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1389 return lockseq_tas; 1390 1391 // Use RTM lock for speculation 1392 if (hint & omp_lock_hint_speculative) 1393 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq; 1394 1395 return __kmp_user_lock_seq; 1396 } 1397 1398 #if OMPT_SUPPORT && OMPT_OPTIONAL 1399 #if KMP_USE_DYNAMIC_LOCK 1400 static kmp_mutex_impl_t 1401 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1402 if (user_lock) { 1403 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1404 case 0: 1405 break; 1406 #if KMP_USE_FUTEX 1407 case locktag_futex: 1408 return kmp_mutex_impl_queuing; 1409 #endif 1410 case locktag_tas: 1411 return kmp_mutex_impl_spin; 1412 #if KMP_USE_TSX 1413 case locktag_hle: 1414 case locktag_rtm_spin: 1415 return kmp_mutex_impl_speculative; 1416 #endif 1417 default: 1418 return kmp_mutex_impl_none; 1419 } 1420 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1421 } 1422 KMP_ASSERT(ilock); 1423 switch (ilock->type) { 1424 #if KMP_USE_TSX 1425 case locktag_adaptive: 1426 case locktag_rtm_queuing: 1427 return kmp_mutex_impl_speculative; 1428 #endif 1429 case locktag_nested_tas: 1430 return kmp_mutex_impl_spin; 1431 #if KMP_USE_FUTEX 1432 case locktag_nested_futex: 1433 #endif 1434 case locktag_ticket: 1435 case locktag_queuing: 1436 case locktag_drdpa: 1437 case locktag_nested_ticket: 1438 case locktag_nested_queuing: 1439 case locktag_nested_drdpa: 1440 return kmp_mutex_impl_queuing; 1441 default: 1442 return kmp_mutex_impl_none; 1443 } 1444 } 1445 #else 1446 // For locks without dynamic binding 1447 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1448 switch (__kmp_user_lock_kind) { 1449 case lk_tas: 1450 return kmp_mutex_impl_spin; 1451 #if KMP_USE_FUTEX 1452 case lk_futex: 1453 #endif 1454 case lk_ticket: 1455 case lk_queuing: 1456 case lk_drdpa: 1457 return kmp_mutex_impl_queuing; 1458 #if KMP_USE_TSX 1459 case lk_hle: 1460 case lk_rtm_queuing: 1461 case lk_rtm_spin: 1462 case lk_adaptive: 1463 return kmp_mutex_impl_speculative; 1464 #endif 1465 default: 1466 return kmp_mutex_impl_none; 1467 } 1468 } 1469 #endif // KMP_USE_DYNAMIC_LOCK 1470 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1471 1472 /*! 1473 @ingroup WORK_SHARING 1474 @param loc source location information. 1475 @param global_tid global thread number. 1476 @param crit identity of the critical section. This could be a pointer to a lock 1477 associated with the critical section, or some other suitably unique value. 1478 @param hint the lock hint. 1479 1480 Enter code protected by a `critical` construct with a hint. The hint value is 1481 used to suggest a lock implementation. This function blocks until the executing 1482 thread can enter the critical section unless the hint suggests use of 1483 speculative execution and the hardware supports it. 1484 */ 1485 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1486 kmp_critical_name *crit, uint32_t hint) { 1487 KMP_COUNT_BLOCK(OMP_CRITICAL); 1488 kmp_user_lock_p lck; 1489 #if OMPT_SUPPORT && OMPT_OPTIONAL 1490 ompt_state_t prev_state = ompt_state_undefined; 1491 ompt_thread_info_t ti; 1492 // This is the case, if called from __kmpc_critical: 1493 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1494 if (!codeptr) 1495 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1496 #endif 1497 1498 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1499 __kmp_assert_valid_gtid(global_tid); 1500 1501 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1502 // Check if it is initialized. 1503 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1504 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint); 1505 if (*lk == 0) { 1506 if (KMP_IS_D_LOCK(lockseq)) { 1507 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1508 KMP_GET_D_TAG(lockseq)); 1509 } else { 1510 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq)); 1511 } 1512 } 1513 // Branch for accessing the actual lock object and set operation. This 1514 // branching is inevitable since this lock initialization does not follow the 1515 // normal dispatch path (lock table is not used). 1516 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1517 lck = (kmp_user_lock_p)lk; 1518 if (__kmp_env_consistency_check) { 1519 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1520 __kmp_map_hint_to_lock(hint)); 1521 } 1522 #if USE_ITT_BUILD 1523 __kmp_itt_critical_acquiring(lck); 1524 #endif 1525 #if OMPT_SUPPORT && OMPT_OPTIONAL 1526 if (ompt_enabled.enabled) { 1527 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1528 /* OMPT state update */ 1529 prev_state = ti.state; 1530 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1531 ti.state = ompt_state_wait_critical; 1532 1533 /* OMPT event callback */ 1534 if (ompt_enabled.ompt_callback_mutex_acquire) { 1535 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1536 ompt_mutex_critical, (unsigned int)hint, 1537 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1538 codeptr); 1539 } 1540 } 1541 #endif 1542 #if KMP_USE_INLINED_TAS 1543 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) { 1544 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1545 } else 1546 #elif KMP_USE_INLINED_FUTEX 1547 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) { 1548 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1549 } else 1550 #endif 1551 { 1552 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1553 } 1554 } else { 1555 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1556 lck = ilk->lock; 1557 if (__kmp_env_consistency_check) { 1558 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1559 __kmp_map_hint_to_lock(hint)); 1560 } 1561 #if USE_ITT_BUILD 1562 __kmp_itt_critical_acquiring(lck); 1563 #endif 1564 #if OMPT_SUPPORT && OMPT_OPTIONAL 1565 if (ompt_enabled.enabled) { 1566 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1567 /* OMPT state update */ 1568 prev_state = ti.state; 1569 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1570 ti.state = ompt_state_wait_critical; 1571 1572 /* OMPT event callback */ 1573 if (ompt_enabled.ompt_callback_mutex_acquire) { 1574 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1575 ompt_mutex_critical, (unsigned int)hint, 1576 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1577 codeptr); 1578 } 1579 } 1580 #endif 1581 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1582 } 1583 KMP_POP_PARTITIONED_TIMER(); 1584 1585 #if USE_ITT_BUILD 1586 __kmp_itt_critical_acquired(lck); 1587 #endif /* USE_ITT_BUILD */ 1588 #if OMPT_SUPPORT && OMPT_OPTIONAL 1589 if (ompt_enabled.enabled) { 1590 /* OMPT state update */ 1591 ti.state = prev_state; 1592 ti.wait_id = 0; 1593 1594 /* OMPT event callback */ 1595 if (ompt_enabled.ompt_callback_mutex_acquired) { 1596 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1597 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1598 } 1599 } 1600 #endif 1601 1602 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1603 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1604 } // __kmpc_critical_with_hint 1605 1606 #endif // KMP_USE_DYNAMIC_LOCK 1607 1608 /*! 1609 @ingroup WORK_SHARING 1610 @param loc source location information. 1611 @param global_tid global thread number . 1612 @param crit identity of the critical section. This could be a pointer to a lock 1613 associated with the critical section, or some other suitably unique value. 1614 1615 Leave a critical section, releasing any lock that was held during its execution. 1616 */ 1617 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1618 kmp_critical_name *crit) { 1619 kmp_user_lock_p lck; 1620 1621 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1622 1623 #if KMP_USE_DYNAMIC_LOCK 1624 int locktag = KMP_EXTRACT_D_TAG(crit); 1625 if (locktag) { 1626 lck = (kmp_user_lock_p)crit; 1627 KMP_ASSERT(lck != NULL); 1628 if (__kmp_env_consistency_check) { 1629 __kmp_pop_sync(global_tid, ct_critical, loc); 1630 } 1631 #if USE_ITT_BUILD 1632 __kmp_itt_critical_releasing(lck); 1633 #endif 1634 #if KMP_USE_INLINED_TAS 1635 if (locktag == locktag_tas && !__kmp_env_consistency_check) { 1636 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1637 } else 1638 #elif KMP_USE_INLINED_FUTEX 1639 if (locktag == locktag_futex && !__kmp_env_consistency_check) { 1640 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1641 } else 1642 #endif 1643 { 1644 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1645 } 1646 } else { 1647 kmp_indirect_lock_t *ilk = 1648 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1649 KMP_ASSERT(ilk != NULL); 1650 lck = ilk->lock; 1651 if (__kmp_env_consistency_check) { 1652 __kmp_pop_sync(global_tid, ct_critical, loc); 1653 } 1654 #if USE_ITT_BUILD 1655 __kmp_itt_critical_releasing(lck); 1656 #endif 1657 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1658 } 1659 1660 #else // KMP_USE_DYNAMIC_LOCK 1661 1662 if ((__kmp_user_lock_kind == lk_tas) && 1663 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1664 lck = (kmp_user_lock_p)crit; 1665 } 1666 #if KMP_USE_FUTEX 1667 else if ((__kmp_user_lock_kind == lk_futex) && 1668 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1669 lck = (kmp_user_lock_p)crit; 1670 } 1671 #endif 1672 else { // ticket, queuing or drdpa 1673 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1674 } 1675 1676 KMP_ASSERT(lck != NULL); 1677 1678 if (__kmp_env_consistency_check) 1679 __kmp_pop_sync(global_tid, ct_critical, loc); 1680 1681 #if USE_ITT_BUILD 1682 __kmp_itt_critical_releasing(lck); 1683 #endif /* USE_ITT_BUILD */ 1684 // Value of 'crit' should be good for using as a critical_id of the critical 1685 // section directive. 1686 __kmp_release_user_lock_with_checks(lck, global_tid); 1687 1688 #endif // KMP_USE_DYNAMIC_LOCK 1689 1690 #if OMPT_SUPPORT && OMPT_OPTIONAL 1691 /* OMPT release event triggers after lock is released; place here to trigger 1692 * for all #if branches */ 1693 OMPT_STORE_RETURN_ADDRESS(global_tid); 1694 if (ompt_enabled.ompt_callback_mutex_released) { 1695 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1696 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1697 OMPT_LOAD_RETURN_ADDRESS(0)); 1698 } 1699 #endif 1700 1701 KMP_POP_PARTITIONED_TIMER(); 1702 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1703 } 1704 1705 /*! 1706 @ingroup SYNCHRONIZATION 1707 @param loc source location information 1708 @param global_tid thread id. 1709 @return one if the thread should execute the master block, zero otherwise 1710 1711 Start execution of a combined barrier and master. The barrier is executed inside 1712 this function. 1713 */ 1714 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1715 int status; 1716 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1717 __kmp_assert_valid_gtid(global_tid); 1718 1719 if (!TCR_4(__kmp_init_parallel)) 1720 __kmp_parallel_initialize(); 1721 1722 __kmp_resume_if_soft_paused(); 1723 1724 if (__kmp_env_consistency_check) 1725 __kmp_check_barrier(global_tid, ct_barrier, loc); 1726 1727 #if OMPT_SUPPORT 1728 ompt_frame_t *ompt_frame; 1729 if (ompt_enabled.enabled) { 1730 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1731 if (ompt_frame->enter_frame.ptr == NULL) 1732 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1733 } 1734 OMPT_STORE_RETURN_ADDRESS(global_tid); 1735 #endif 1736 #if USE_ITT_NOTIFY 1737 __kmp_threads[global_tid]->th.th_ident = loc; 1738 #endif 1739 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1740 #if OMPT_SUPPORT && OMPT_OPTIONAL 1741 if (ompt_enabled.enabled) { 1742 ompt_frame->enter_frame = ompt_data_none; 1743 } 1744 #endif 1745 1746 return (status != 0) ? 0 : 1; 1747 } 1748 1749 /*! 1750 @ingroup SYNCHRONIZATION 1751 @param loc source location information 1752 @param global_tid thread id. 1753 1754 Complete the execution of a combined barrier and master. This function should 1755 only be called at the completion of the <tt>master</tt> code. Other threads will 1756 still be waiting at the barrier and this call releases them. 1757 */ 1758 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1759 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1760 __kmp_assert_valid_gtid(global_tid); 1761 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1762 } 1763 1764 /*! 1765 @ingroup SYNCHRONIZATION 1766 @param loc source location information 1767 @param global_tid thread id. 1768 @return one if the thread should execute the master block, zero otherwise 1769 1770 Start execution of a combined barrier and master(nowait) construct. 1771 The barrier is executed inside this function. 1772 There is no equivalent "end" function, since the 1773 */ 1774 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1775 kmp_int32 ret; 1776 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1777 __kmp_assert_valid_gtid(global_tid); 1778 1779 if (!TCR_4(__kmp_init_parallel)) 1780 __kmp_parallel_initialize(); 1781 1782 __kmp_resume_if_soft_paused(); 1783 1784 if (__kmp_env_consistency_check) { 1785 if (loc == 0) { 1786 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1787 } 1788 __kmp_check_barrier(global_tid, ct_barrier, loc); 1789 } 1790 1791 #if OMPT_SUPPORT 1792 ompt_frame_t *ompt_frame; 1793 if (ompt_enabled.enabled) { 1794 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1795 if (ompt_frame->enter_frame.ptr == NULL) 1796 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1797 } 1798 OMPT_STORE_RETURN_ADDRESS(global_tid); 1799 #endif 1800 #if USE_ITT_NOTIFY 1801 __kmp_threads[global_tid]->th.th_ident = loc; 1802 #endif 1803 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1804 #if OMPT_SUPPORT && OMPT_OPTIONAL 1805 if (ompt_enabled.enabled) { 1806 ompt_frame->enter_frame = ompt_data_none; 1807 } 1808 #endif 1809 1810 ret = __kmpc_master(loc, global_tid); 1811 1812 if (__kmp_env_consistency_check) { 1813 /* there's no __kmpc_end_master called; so the (stats) */ 1814 /* actions of __kmpc_end_master are done here */ 1815 if (ret) { 1816 /* only one thread should do the pop since only */ 1817 /* one did the push (see __kmpc_master()) */ 1818 __kmp_pop_sync(global_tid, ct_master, loc); 1819 } 1820 } 1821 1822 return (ret); 1823 } 1824 1825 /* The BARRIER for a SINGLE process section is always explicit */ 1826 /*! 1827 @ingroup WORK_SHARING 1828 @param loc source location information 1829 @param global_tid global thread number 1830 @return One if this thread should execute the single construct, zero otherwise. 1831 1832 Test whether to execute a <tt>single</tt> construct. 1833 There are no implicit barriers in the two "single" calls, rather the compiler 1834 should introduce an explicit barrier if it is required. 1835 */ 1836 1837 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1838 __kmp_assert_valid_gtid(global_tid); 1839 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1840 1841 if (rc) { 1842 // We are going to execute the single statement, so we should count it. 1843 KMP_COUNT_BLOCK(OMP_SINGLE); 1844 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1845 } 1846 1847 #if OMPT_SUPPORT && OMPT_OPTIONAL 1848 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1849 kmp_team_t *team = this_thr->th.th_team; 1850 int tid = __kmp_tid_from_gtid(global_tid); 1851 1852 if (ompt_enabled.enabled) { 1853 if (rc) { 1854 if (ompt_enabled.ompt_callback_work) { 1855 ompt_callbacks.ompt_callback(ompt_callback_work)( 1856 ompt_work_single_executor, ompt_scope_begin, 1857 &(team->t.ompt_team_info.parallel_data), 1858 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1859 1, OMPT_GET_RETURN_ADDRESS(0)); 1860 } 1861 } else { 1862 if (ompt_enabled.ompt_callback_work) { 1863 ompt_callbacks.ompt_callback(ompt_callback_work)( 1864 ompt_work_single_other, ompt_scope_begin, 1865 &(team->t.ompt_team_info.parallel_data), 1866 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1867 1, OMPT_GET_RETURN_ADDRESS(0)); 1868 ompt_callbacks.ompt_callback(ompt_callback_work)( 1869 ompt_work_single_other, ompt_scope_end, 1870 &(team->t.ompt_team_info.parallel_data), 1871 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1872 1, OMPT_GET_RETURN_ADDRESS(0)); 1873 } 1874 } 1875 } 1876 #endif 1877 1878 return rc; 1879 } 1880 1881 /*! 1882 @ingroup WORK_SHARING 1883 @param loc source location information 1884 @param global_tid global thread number 1885 1886 Mark the end of a <tt>single</tt> construct. This function should 1887 only be called by the thread that executed the block of code protected 1888 by the `single` construct. 1889 */ 1890 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1891 __kmp_assert_valid_gtid(global_tid); 1892 __kmp_exit_single(global_tid); 1893 KMP_POP_PARTITIONED_TIMER(); 1894 1895 #if OMPT_SUPPORT && OMPT_OPTIONAL 1896 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1897 kmp_team_t *team = this_thr->th.th_team; 1898 int tid = __kmp_tid_from_gtid(global_tid); 1899 1900 if (ompt_enabled.ompt_callback_work) { 1901 ompt_callbacks.ompt_callback(ompt_callback_work)( 1902 ompt_work_single_executor, ompt_scope_end, 1903 &(team->t.ompt_team_info.parallel_data), 1904 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1905 OMPT_GET_RETURN_ADDRESS(0)); 1906 } 1907 #endif 1908 } 1909 1910 /*! 1911 @ingroup WORK_SHARING 1912 @param loc Source location 1913 @param global_tid Global thread id 1914 1915 Mark the end of a statically scheduled loop. 1916 */ 1917 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1918 KMP_POP_PARTITIONED_TIMER(); 1919 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1920 1921 #if OMPT_SUPPORT && OMPT_OPTIONAL 1922 if (ompt_enabled.ompt_callback_work) { 1923 ompt_work_t ompt_work_type = ompt_work_loop; 1924 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1925 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1926 // Determine workshare type 1927 if (loc != NULL) { 1928 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1929 ompt_work_type = ompt_work_loop; 1930 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1931 ompt_work_type = ompt_work_sections; 1932 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1933 ompt_work_type = ompt_work_distribute; 1934 } else { 1935 // use default set above. 1936 // a warning about this case is provided in __kmpc_for_static_init 1937 } 1938 KMP_DEBUG_ASSERT(ompt_work_type); 1939 } 1940 ompt_callbacks.ompt_callback(ompt_callback_work)( 1941 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1942 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1943 } 1944 #endif 1945 if (__kmp_env_consistency_check) 1946 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1947 } 1948 1949 // User routines which take C-style arguments (call by value) 1950 // different from the Fortran equivalent routines 1951 1952 void ompc_set_num_threads(int arg) { 1953 // !!!!! TODO: check the per-task binding 1954 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1955 } 1956 1957 void ompc_set_dynamic(int flag) { 1958 kmp_info_t *thread; 1959 1960 /* For the thread-private implementation of the internal controls */ 1961 thread = __kmp_entry_thread(); 1962 1963 __kmp_save_internal_controls(thread); 1964 1965 set__dynamic(thread, flag ? true : false); 1966 } 1967 1968 void ompc_set_nested(int flag) { 1969 kmp_info_t *thread; 1970 1971 /* For the thread-private internal controls implementation */ 1972 thread = __kmp_entry_thread(); 1973 1974 __kmp_save_internal_controls(thread); 1975 1976 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1977 } 1978 1979 void ompc_set_max_active_levels(int max_active_levels) { 1980 /* TO DO */ 1981 /* we want per-task implementation of this internal control */ 1982 1983 /* For the per-thread internal controls implementation */ 1984 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1985 } 1986 1987 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1988 // !!!!! TODO: check the per-task binding 1989 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1990 } 1991 1992 int ompc_get_ancestor_thread_num(int level) { 1993 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1994 } 1995 1996 int ompc_get_team_size(int level) { 1997 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1998 } 1999 2000 /* OpenMP 5.0 Affinity Format API */ 2001 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) { 2002 if (!__kmp_init_serial) { 2003 __kmp_serial_initialize(); 2004 } 2005 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 2006 format, KMP_STRLEN(format) + 1); 2007 } 2008 2009 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) { 2010 size_t format_size; 2011 if (!__kmp_init_serial) { 2012 __kmp_serial_initialize(); 2013 } 2014 format_size = KMP_STRLEN(__kmp_affinity_format); 2015 if (buffer && size) { 2016 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 2017 format_size + 1); 2018 } 2019 return format_size; 2020 } 2021 2022 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) { 2023 int gtid; 2024 if (!TCR_4(__kmp_init_middle)) { 2025 __kmp_middle_initialize(); 2026 } 2027 __kmp_assign_root_init_mask(); 2028 gtid = __kmp_get_gtid(); 2029 #if KMP_AFFINITY_SUPPORTED 2030 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) { 2031 __kmp_reset_root_init_mask(gtid); 2032 } 2033 #endif 2034 __kmp_aux_display_affinity(gtid, format); 2035 } 2036 2037 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size, 2038 char const *format) { 2039 int gtid; 2040 size_t num_required; 2041 kmp_str_buf_t capture_buf; 2042 if (!TCR_4(__kmp_init_middle)) { 2043 __kmp_middle_initialize(); 2044 } 2045 __kmp_assign_root_init_mask(); 2046 gtid = __kmp_get_gtid(); 2047 #if KMP_AFFINITY_SUPPORTED 2048 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) { 2049 __kmp_reset_root_init_mask(gtid); 2050 } 2051 #endif 2052 __kmp_str_buf_init(&capture_buf); 2053 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 2054 if (buffer && buf_size) { 2055 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 2056 capture_buf.used + 1); 2057 } 2058 __kmp_str_buf_free(&capture_buf); 2059 return num_required; 2060 } 2061 2062 void kmpc_set_stacksize(int arg) { 2063 // __kmp_aux_set_stacksize initializes the library if needed 2064 __kmp_aux_set_stacksize(arg); 2065 } 2066 2067 void kmpc_set_stacksize_s(size_t arg) { 2068 // __kmp_aux_set_stacksize initializes the library if needed 2069 __kmp_aux_set_stacksize(arg); 2070 } 2071 2072 void kmpc_set_blocktime(int arg) { 2073 int gtid, tid; 2074 kmp_info_t *thread; 2075 2076 gtid = __kmp_entry_gtid(); 2077 tid = __kmp_tid_from_gtid(gtid); 2078 thread = __kmp_thread_from_gtid(gtid); 2079 2080 __kmp_aux_set_blocktime(arg, thread, tid); 2081 } 2082 2083 void kmpc_set_library(int arg) { 2084 // __kmp_user_set_library initializes the library if needed 2085 __kmp_user_set_library((enum library_type)arg); 2086 } 2087 2088 void kmpc_set_defaults(char const *str) { 2089 // __kmp_aux_set_defaults initializes the library if needed 2090 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 2091 } 2092 2093 void kmpc_set_disp_num_buffers(int arg) { 2094 // ignore after initialization because some teams have already 2095 // allocated dispatch buffers 2096 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF && 2097 arg <= KMP_MAX_DISP_NUM_BUFF) { 2098 __kmp_dispatch_num_buffers = arg; 2099 } 2100 } 2101 2102 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 2103 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2104 return -1; 2105 #else 2106 if (!TCR_4(__kmp_init_middle)) { 2107 __kmp_middle_initialize(); 2108 } 2109 __kmp_assign_root_init_mask(); 2110 return __kmp_aux_set_affinity_mask_proc(proc, mask); 2111 #endif 2112 } 2113 2114 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2115 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2116 return -1; 2117 #else 2118 if (!TCR_4(__kmp_init_middle)) { 2119 __kmp_middle_initialize(); 2120 } 2121 __kmp_assign_root_init_mask(); 2122 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2123 #endif 2124 } 2125 2126 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2127 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2128 return -1; 2129 #else 2130 if (!TCR_4(__kmp_init_middle)) { 2131 __kmp_middle_initialize(); 2132 } 2133 __kmp_assign_root_init_mask(); 2134 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2135 #endif 2136 } 2137 2138 /* -------------------------------------------------------------------------- */ 2139 /*! 2140 @ingroup THREADPRIVATE 2141 @param loc source location information 2142 @param gtid global thread number 2143 @param cpy_size size of the cpy_data buffer 2144 @param cpy_data pointer to data to be copied 2145 @param cpy_func helper function to call for copying data 2146 @param didit flag variable: 1=single thread; 0=not single thread 2147 2148 __kmpc_copyprivate implements the interface for the private data broadcast 2149 needed for the copyprivate clause associated with a single region in an 2150 OpenMP<sup>*</sup> program (both C and Fortran). 2151 All threads participating in the parallel region call this routine. 2152 One of the threads (called the single thread) should have the <tt>didit</tt> 2153 variable set to 1 and all other threads should have that variable set to 0. 2154 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2155 2156 The OpenMP specification forbids the use of nowait on the single region when a 2157 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2158 barrier internally to avoid race conditions, so the code generation for the 2159 single region should avoid generating a barrier after the call to @ref 2160 __kmpc_copyprivate. 2161 2162 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2163 The <tt>loc</tt> parameter is a pointer to source location information. 2164 2165 Internal implementation: The single thread will first copy its descriptor 2166 address (cpy_data) to a team-private location, then the other threads will each 2167 call the function pointed to by the parameter cpy_func, which carries out the 2168 copy by copying the data using the cpy_data buffer. 2169 2170 The cpy_func routine used for the copy and the contents of the data area defined 2171 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2172 to be done. For instance, the cpy_data buffer can hold the actual data to be 2173 copied or it may hold a list of pointers to the data. The cpy_func routine must 2174 interpret the cpy_data buffer appropriately. 2175 2176 The interface to cpy_func is as follows: 2177 @code 2178 void cpy_func( void *destination, void *source ) 2179 @endcode 2180 where void *destination is the cpy_data pointer for the thread being copied to 2181 and void *source is the cpy_data pointer for the thread being copied from. 2182 */ 2183 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2184 void *cpy_data, void (*cpy_func)(void *, void *), 2185 kmp_int32 didit) { 2186 void **data_ptr; 2187 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2188 __kmp_assert_valid_gtid(gtid); 2189 2190 KMP_MB(); 2191 2192 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2193 2194 if (__kmp_env_consistency_check) { 2195 if (loc == 0) { 2196 KMP_WARNING(ConstructIdentInvalid); 2197 } 2198 } 2199 2200 // ToDo: Optimize the following two barriers into some kind of split barrier 2201 2202 if (didit) 2203 *data_ptr = cpy_data; 2204 2205 #if OMPT_SUPPORT 2206 ompt_frame_t *ompt_frame; 2207 if (ompt_enabled.enabled) { 2208 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2209 if (ompt_frame->enter_frame.ptr == NULL) 2210 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2211 } 2212 OMPT_STORE_RETURN_ADDRESS(gtid); 2213 #endif 2214 /* This barrier is not a barrier region boundary */ 2215 #if USE_ITT_NOTIFY 2216 __kmp_threads[gtid]->th.th_ident = loc; 2217 #endif 2218 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2219 2220 if (!didit) 2221 (*cpy_func)(cpy_data, *data_ptr); 2222 2223 // Consider next barrier a user-visible barrier for barrier region boundaries 2224 // Nesting checks are already handled by the single construct checks 2225 { 2226 #if OMPT_SUPPORT 2227 OMPT_STORE_RETURN_ADDRESS(gtid); 2228 #endif 2229 #if USE_ITT_NOTIFY 2230 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2231 // tasks can overwrite the location) 2232 #endif 2233 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2234 #if OMPT_SUPPORT && OMPT_OPTIONAL 2235 if (ompt_enabled.enabled) { 2236 ompt_frame->enter_frame = ompt_data_none; 2237 } 2238 #endif 2239 } 2240 } 2241 2242 /* --------------------------------------------------------------------------*/ 2243 /*! 2244 @ingroup THREADPRIVATE 2245 @param loc source location information 2246 @param gtid global thread number 2247 @param cpy_data pointer to the data to be saved/copied or 0 2248 @return the saved pointer to the data 2249 2250 __kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate: 2251 __kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so 2252 coming from single), and returns that pointer in all calls (for single thread 2253 it's not needed). This version doesn't do any actual data copying. Data copying 2254 has to be done somewhere else, e.g. inline in the generated code. Due to this, 2255 this function doesn't have any barrier at the end of the function, like 2256 __kmpc_copyprivate does, so generated code needs barrier after copying of all 2257 data was done. 2258 */ 2259 void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) { 2260 void **data_ptr; 2261 2262 KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid)); 2263 2264 KMP_MB(); 2265 2266 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2267 2268 if (__kmp_env_consistency_check) { 2269 if (loc == 0) { 2270 KMP_WARNING(ConstructIdentInvalid); 2271 } 2272 } 2273 2274 // ToDo: Optimize the following barrier 2275 2276 if (cpy_data) 2277 *data_ptr = cpy_data; 2278 2279 #if OMPT_SUPPORT 2280 ompt_frame_t *ompt_frame; 2281 if (ompt_enabled.enabled) { 2282 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2283 if (ompt_frame->enter_frame.ptr == NULL) 2284 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2285 OMPT_STORE_RETURN_ADDRESS(gtid); 2286 } 2287 #endif 2288 /* This barrier is not a barrier region boundary */ 2289 #if USE_ITT_NOTIFY 2290 __kmp_threads[gtid]->th.th_ident = loc; 2291 #endif 2292 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2293 2294 return *data_ptr; 2295 } 2296 2297 /* -------------------------------------------------------------------------- */ 2298 2299 #define INIT_LOCK __kmp_init_user_lock_with_checks 2300 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2301 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2302 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2303 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2304 #define ACQUIRE_NESTED_LOCK_TIMED \ 2305 __kmp_acquire_nested_user_lock_with_checks_timed 2306 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2307 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2308 #define TEST_LOCK __kmp_test_user_lock_with_checks 2309 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2310 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2311 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2312 2313 // TODO: Make check abort messages use location info & pass it into 2314 // with_checks routines 2315 2316 #if KMP_USE_DYNAMIC_LOCK 2317 2318 // internal lock initializer 2319 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2320 kmp_dyna_lockseq_t seq) { 2321 if (KMP_IS_D_LOCK(seq)) { 2322 KMP_INIT_D_LOCK(lock, seq); 2323 #if USE_ITT_BUILD 2324 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2325 #endif 2326 } else { 2327 KMP_INIT_I_LOCK(lock, seq); 2328 #if USE_ITT_BUILD 2329 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2330 __kmp_itt_lock_creating(ilk->lock, loc); 2331 #endif 2332 } 2333 } 2334 2335 // internal nest lock initializer 2336 static __forceinline void 2337 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2338 kmp_dyna_lockseq_t seq) { 2339 #if KMP_USE_TSX 2340 // Don't have nested lock implementation for speculative locks 2341 if (seq == lockseq_hle || seq == lockseq_rtm_queuing || 2342 seq == lockseq_rtm_spin || seq == lockseq_adaptive) 2343 seq = __kmp_user_lock_seq; 2344 #endif 2345 switch (seq) { 2346 case lockseq_tas: 2347 seq = lockseq_nested_tas; 2348 break; 2349 #if KMP_USE_FUTEX 2350 case lockseq_futex: 2351 seq = lockseq_nested_futex; 2352 break; 2353 #endif 2354 case lockseq_ticket: 2355 seq = lockseq_nested_ticket; 2356 break; 2357 case lockseq_queuing: 2358 seq = lockseq_nested_queuing; 2359 break; 2360 case lockseq_drdpa: 2361 seq = lockseq_nested_drdpa; 2362 break; 2363 default: 2364 seq = lockseq_nested_queuing; 2365 } 2366 KMP_INIT_I_LOCK(lock, seq); 2367 #if USE_ITT_BUILD 2368 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2369 __kmp_itt_lock_creating(ilk->lock, loc); 2370 #endif 2371 } 2372 2373 /* initialize the lock with a hint */ 2374 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2375 uintptr_t hint) { 2376 KMP_DEBUG_ASSERT(__kmp_init_serial); 2377 if (__kmp_env_consistency_check && user_lock == NULL) { 2378 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2379 } 2380 2381 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2382 2383 #if OMPT_SUPPORT && OMPT_OPTIONAL 2384 // This is the case, if called from omp_init_lock_with_hint: 2385 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2386 if (!codeptr) 2387 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2388 if (ompt_enabled.ompt_callback_lock_init) { 2389 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2390 ompt_mutex_lock, (omp_lock_hint_t)hint, 2391 __ompt_get_mutex_impl_type(user_lock), 2392 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2393 } 2394 #endif 2395 } 2396 2397 /* initialize the lock with a hint */ 2398 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2399 void **user_lock, uintptr_t hint) { 2400 KMP_DEBUG_ASSERT(__kmp_init_serial); 2401 if (__kmp_env_consistency_check && user_lock == NULL) { 2402 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2403 } 2404 2405 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2406 2407 #if OMPT_SUPPORT && OMPT_OPTIONAL 2408 // This is the case, if called from omp_init_lock_with_hint: 2409 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2410 if (!codeptr) 2411 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2412 if (ompt_enabled.ompt_callback_lock_init) { 2413 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2414 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2415 __ompt_get_mutex_impl_type(user_lock), 2416 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2417 } 2418 #endif 2419 } 2420 2421 #endif // KMP_USE_DYNAMIC_LOCK 2422 2423 /* initialize the lock */ 2424 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2425 #if KMP_USE_DYNAMIC_LOCK 2426 2427 KMP_DEBUG_ASSERT(__kmp_init_serial); 2428 if (__kmp_env_consistency_check && user_lock == NULL) { 2429 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2430 } 2431 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2432 2433 #if OMPT_SUPPORT && OMPT_OPTIONAL 2434 // This is the case, if called from omp_init_lock_with_hint: 2435 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2436 if (!codeptr) 2437 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2438 if (ompt_enabled.ompt_callback_lock_init) { 2439 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2440 ompt_mutex_lock, omp_lock_hint_none, 2441 __ompt_get_mutex_impl_type(user_lock), 2442 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2443 } 2444 #endif 2445 2446 #else // KMP_USE_DYNAMIC_LOCK 2447 2448 static char const *const func = "omp_init_lock"; 2449 kmp_user_lock_p lck; 2450 KMP_DEBUG_ASSERT(__kmp_init_serial); 2451 2452 if (__kmp_env_consistency_check) { 2453 if (user_lock == NULL) { 2454 KMP_FATAL(LockIsUninitialized, func); 2455 } 2456 } 2457 2458 KMP_CHECK_USER_LOCK_INIT(); 2459 2460 if ((__kmp_user_lock_kind == lk_tas) && 2461 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2462 lck = (kmp_user_lock_p)user_lock; 2463 } 2464 #if KMP_USE_FUTEX 2465 else if ((__kmp_user_lock_kind == lk_futex) && 2466 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2467 lck = (kmp_user_lock_p)user_lock; 2468 } 2469 #endif 2470 else { 2471 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2472 } 2473 INIT_LOCK(lck); 2474 __kmp_set_user_lock_location(lck, loc); 2475 2476 #if OMPT_SUPPORT && OMPT_OPTIONAL 2477 // This is the case, if called from omp_init_lock_with_hint: 2478 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2479 if (!codeptr) 2480 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2481 if (ompt_enabled.ompt_callback_lock_init) { 2482 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2483 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2484 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2485 } 2486 #endif 2487 2488 #if USE_ITT_BUILD 2489 __kmp_itt_lock_creating(lck); 2490 #endif /* USE_ITT_BUILD */ 2491 2492 #endif // KMP_USE_DYNAMIC_LOCK 2493 } // __kmpc_init_lock 2494 2495 /* initialize the lock */ 2496 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2497 #if KMP_USE_DYNAMIC_LOCK 2498 2499 KMP_DEBUG_ASSERT(__kmp_init_serial); 2500 if (__kmp_env_consistency_check && user_lock == NULL) { 2501 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2502 } 2503 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2504 2505 #if OMPT_SUPPORT && OMPT_OPTIONAL 2506 // This is the case, if called from omp_init_lock_with_hint: 2507 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2508 if (!codeptr) 2509 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2510 if (ompt_enabled.ompt_callback_lock_init) { 2511 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2512 ompt_mutex_nest_lock, omp_lock_hint_none, 2513 __ompt_get_mutex_impl_type(user_lock), 2514 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2515 } 2516 #endif 2517 2518 #else // KMP_USE_DYNAMIC_LOCK 2519 2520 static char const *const func = "omp_init_nest_lock"; 2521 kmp_user_lock_p lck; 2522 KMP_DEBUG_ASSERT(__kmp_init_serial); 2523 2524 if (__kmp_env_consistency_check) { 2525 if (user_lock == NULL) { 2526 KMP_FATAL(LockIsUninitialized, func); 2527 } 2528 } 2529 2530 KMP_CHECK_USER_LOCK_INIT(); 2531 2532 if ((__kmp_user_lock_kind == lk_tas) && 2533 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2534 OMP_NEST_LOCK_T_SIZE)) { 2535 lck = (kmp_user_lock_p)user_lock; 2536 } 2537 #if KMP_USE_FUTEX 2538 else if ((__kmp_user_lock_kind == lk_futex) && 2539 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2540 OMP_NEST_LOCK_T_SIZE)) { 2541 lck = (kmp_user_lock_p)user_lock; 2542 } 2543 #endif 2544 else { 2545 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2546 } 2547 2548 INIT_NESTED_LOCK(lck); 2549 __kmp_set_user_lock_location(lck, loc); 2550 2551 #if OMPT_SUPPORT && OMPT_OPTIONAL 2552 // This is the case, if called from omp_init_lock_with_hint: 2553 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2554 if (!codeptr) 2555 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2556 if (ompt_enabled.ompt_callback_lock_init) { 2557 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2558 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2559 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2560 } 2561 #endif 2562 2563 #if USE_ITT_BUILD 2564 __kmp_itt_lock_creating(lck); 2565 #endif /* USE_ITT_BUILD */ 2566 2567 #endif // KMP_USE_DYNAMIC_LOCK 2568 } // __kmpc_init_nest_lock 2569 2570 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2571 #if KMP_USE_DYNAMIC_LOCK 2572 2573 #if USE_ITT_BUILD 2574 kmp_user_lock_p lck; 2575 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2576 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2577 } else { 2578 lck = (kmp_user_lock_p)user_lock; 2579 } 2580 __kmp_itt_lock_destroyed(lck); 2581 #endif 2582 #if OMPT_SUPPORT && OMPT_OPTIONAL 2583 // This is the case, if called from omp_init_lock_with_hint: 2584 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2585 if (!codeptr) 2586 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2587 if (ompt_enabled.ompt_callback_lock_destroy) { 2588 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2589 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2590 } 2591 #endif 2592 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2593 #else 2594 kmp_user_lock_p lck; 2595 2596 if ((__kmp_user_lock_kind == lk_tas) && 2597 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2598 lck = (kmp_user_lock_p)user_lock; 2599 } 2600 #if KMP_USE_FUTEX 2601 else if ((__kmp_user_lock_kind == lk_futex) && 2602 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2603 lck = (kmp_user_lock_p)user_lock; 2604 } 2605 #endif 2606 else { 2607 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2608 } 2609 2610 #if OMPT_SUPPORT && OMPT_OPTIONAL 2611 // This is the case, if called from omp_init_lock_with_hint: 2612 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2613 if (!codeptr) 2614 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2615 if (ompt_enabled.ompt_callback_lock_destroy) { 2616 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2617 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2618 } 2619 #endif 2620 2621 #if USE_ITT_BUILD 2622 __kmp_itt_lock_destroyed(lck); 2623 #endif /* USE_ITT_BUILD */ 2624 DESTROY_LOCK(lck); 2625 2626 if ((__kmp_user_lock_kind == lk_tas) && 2627 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2628 ; 2629 } 2630 #if KMP_USE_FUTEX 2631 else if ((__kmp_user_lock_kind == lk_futex) && 2632 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2633 ; 2634 } 2635 #endif 2636 else { 2637 __kmp_user_lock_free(user_lock, gtid, lck); 2638 } 2639 #endif // KMP_USE_DYNAMIC_LOCK 2640 } // __kmpc_destroy_lock 2641 2642 /* destroy the lock */ 2643 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2644 #if KMP_USE_DYNAMIC_LOCK 2645 2646 #if USE_ITT_BUILD 2647 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2648 __kmp_itt_lock_destroyed(ilk->lock); 2649 #endif 2650 #if OMPT_SUPPORT && OMPT_OPTIONAL 2651 // This is the case, if called from omp_init_lock_with_hint: 2652 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2653 if (!codeptr) 2654 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2655 if (ompt_enabled.ompt_callback_lock_destroy) { 2656 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2657 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2658 } 2659 #endif 2660 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2661 2662 #else // KMP_USE_DYNAMIC_LOCK 2663 2664 kmp_user_lock_p lck; 2665 2666 if ((__kmp_user_lock_kind == lk_tas) && 2667 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2668 OMP_NEST_LOCK_T_SIZE)) { 2669 lck = (kmp_user_lock_p)user_lock; 2670 } 2671 #if KMP_USE_FUTEX 2672 else if ((__kmp_user_lock_kind == lk_futex) && 2673 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2674 OMP_NEST_LOCK_T_SIZE)) { 2675 lck = (kmp_user_lock_p)user_lock; 2676 } 2677 #endif 2678 else { 2679 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2680 } 2681 2682 #if OMPT_SUPPORT && OMPT_OPTIONAL 2683 // This is the case, if called from omp_init_lock_with_hint: 2684 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2685 if (!codeptr) 2686 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2687 if (ompt_enabled.ompt_callback_lock_destroy) { 2688 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2689 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2690 } 2691 #endif 2692 2693 #if USE_ITT_BUILD 2694 __kmp_itt_lock_destroyed(lck); 2695 #endif /* USE_ITT_BUILD */ 2696 2697 DESTROY_NESTED_LOCK(lck); 2698 2699 if ((__kmp_user_lock_kind == lk_tas) && 2700 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2701 OMP_NEST_LOCK_T_SIZE)) { 2702 ; 2703 } 2704 #if KMP_USE_FUTEX 2705 else if ((__kmp_user_lock_kind == lk_futex) && 2706 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2707 OMP_NEST_LOCK_T_SIZE)) { 2708 ; 2709 } 2710 #endif 2711 else { 2712 __kmp_user_lock_free(user_lock, gtid, lck); 2713 } 2714 #endif // KMP_USE_DYNAMIC_LOCK 2715 } // __kmpc_destroy_nest_lock 2716 2717 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2718 KMP_COUNT_BLOCK(OMP_set_lock); 2719 #if KMP_USE_DYNAMIC_LOCK 2720 int tag = KMP_EXTRACT_D_TAG(user_lock); 2721 #if USE_ITT_BUILD 2722 __kmp_itt_lock_acquiring( 2723 (kmp_user_lock_p) 2724 user_lock); // itt function will get to the right lock object. 2725 #endif 2726 #if OMPT_SUPPORT && OMPT_OPTIONAL 2727 // This is the case, if called from omp_init_lock_with_hint: 2728 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2729 if (!codeptr) 2730 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2731 if (ompt_enabled.ompt_callback_mutex_acquire) { 2732 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2733 ompt_mutex_lock, omp_lock_hint_none, 2734 __ompt_get_mutex_impl_type(user_lock), 2735 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2736 } 2737 #endif 2738 #if KMP_USE_INLINED_TAS 2739 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2740 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2741 } else 2742 #elif KMP_USE_INLINED_FUTEX 2743 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2744 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2745 } else 2746 #endif 2747 { 2748 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2749 } 2750 #if USE_ITT_BUILD 2751 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2752 #endif 2753 #if OMPT_SUPPORT && OMPT_OPTIONAL 2754 if (ompt_enabled.ompt_callback_mutex_acquired) { 2755 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2756 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2757 } 2758 #endif 2759 2760 #else // KMP_USE_DYNAMIC_LOCK 2761 2762 kmp_user_lock_p lck; 2763 2764 if ((__kmp_user_lock_kind == lk_tas) && 2765 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2766 lck = (kmp_user_lock_p)user_lock; 2767 } 2768 #if KMP_USE_FUTEX 2769 else if ((__kmp_user_lock_kind == lk_futex) && 2770 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2771 lck = (kmp_user_lock_p)user_lock; 2772 } 2773 #endif 2774 else { 2775 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2776 } 2777 2778 #if USE_ITT_BUILD 2779 __kmp_itt_lock_acquiring(lck); 2780 #endif /* USE_ITT_BUILD */ 2781 #if OMPT_SUPPORT && OMPT_OPTIONAL 2782 // This is the case, if called from omp_init_lock_with_hint: 2783 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2784 if (!codeptr) 2785 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2786 if (ompt_enabled.ompt_callback_mutex_acquire) { 2787 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2788 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2789 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2790 } 2791 #endif 2792 2793 ACQUIRE_LOCK(lck, gtid); 2794 2795 #if USE_ITT_BUILD 2796 __kmp_itt_lock_acquired(lck); 2797 #endif /* USE_ITT_BUILD */ 2798 2799 #if OMPT_SUPPORT && OMPT_OPTIONAL 2800 if (ompt_enabled.ompt_callback_mutex_acquired) { 2801 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2802 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2803 } 2804 #endif 2805 2806 #endif // KMP_USE_DYNAMIC_LOCK 2807 } 2808 2809 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2810 #if KMP_USE_DYNAMIC_LOCK 2811 2812 #if USE_ITT_BUILD 2813 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2814 #endif 2815 #if OMPT_SUPPORT && OMPT_OPTIONAL 2816 // This is the case, if called from omp_init_lock_with_hint: 2817 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2818 if (!codeptr) 2819 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2820 if (ompt_enabled.enabled) { 2821 if (ompt_enabled.ompt_callback_mutex_acquire) { 2822 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2823 ompt_mutex_nest_lock, omp_lock_hint_none, 2824 __ompt_get_mutex_impl_type(user_lock), 2825 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2826 } 2827 } 2828 #endif 2829 int acquire_status = 2830 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2831 (void)acquire_status; 2832 #if USE_ITT_BUILD 2833 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2834 #endif 2835 2836 #if OMPT_SUPPORT && OMPT_OPTIONAL 2837 if (ompt_enabled.enabled) { 2838 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2839 if (ompt_enabled.ompt_callback_mutex_acquired) { 2840 // lock_first 2841 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2842 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2843 codeptr); 2844 } 2845 } else { 2846 if (ompt_enabled.ompt_callback_nest_lock) { 2847 // lock_next 2848 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2849 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2850 } 2851 } 2852 } 2853 #endif 2854 2855 #else // KMP_USE_DYNAMIC_LOCK 2856 int acquire_status; 2857 kmp_user_lock_p lck; 2858 2859 if ((__kmp_user_lock_kind == lk_tas) && 2860 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2861 OMP_NEST_LOCK_T_SIZE)) { 2862 lck = (kmp_user_lock_p)user_lock; 2863 } 2864 #if KMP_USE_FUTEX 2865 else if ((__kmp_user_lock_kind == lk_futex) && 2866 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2867 OMP_NEST_LOCK_T_SIZE)) { 2868 lck = (kmp_user_lock_p)user_lock; 2869 } 2870 #endif 2871 else { 2872 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2873 } 2874 2875 #if USE_ITT_BUILD 2876 __kmp_itt_lock_acquiring(lck); 2877 #endif /* USE_ITT_BUILD */ 2878 #if OMPT_SUPPORT && OMPT_OPTIONAL 2879 // This is the case, if called from omp_init_lock_with_hint: 2880 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2881 if (!codeptr) 2882 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2883 if (ompt_enabled.enabled) { 2884 if (ompt_enabled.ompt_callback_mutex_acquire) { 2885 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2886 ompt_mutex_nest_lock, omp_lock_hint_none, 2887 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2888 codeptr); 2889 } 2890 } 2891 #endif 2892 2893 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2894 2895 #if USE_ITT_BUILD 2896 __kmp_itt_lock_acquired(lck); 2897 #endif /* USE_ITT_BUILD */ 2898 2899 #if OMPT_SUPPORT && OMPT_OPTIONAL 2900 if (ompt_enabled.enabled) { 2901 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2902 if (ompt_enabled.ompt_callback_mutex_acquired) { 2903 // lock_first 2904 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2905 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2906 } 2907 } else { 2908 if (ompt_enabled.ompt_callback_nest_lock) { 2909 // lock_next 2910 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2911 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2912 } 2913 } 2914 } 2915 #endif 2916 2917 #endif // KMP_USE_DYNAMIC_LOCK 2918 } 2919 2920 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2921 #if KMP_USE_DYNAMIC_LOCK 2922 2923 int tag = KMP_EXTRACT_D_TAG(user_lock); 2924 #if USE_ITT_BUILD 2925 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2926 #endif 2927 #if KMP_USE_INLINED_TAS 2928 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2929 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2930 } else 2931 #elif KMP_USE_INLINED_FUTEX 2932 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2933 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2934 } else 2935 #endif 2936 { 2937 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2938 } 2939 2940 #if OMPT_SUPPORT && OMPT_OPTIONAL 2941 // This is the case, if called from omp_init_lock_with_hint: 2942 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2943 if (!codeptr) 2944 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2945 if (ompt_enabled.ompt_callback_mutex_released) { 2946 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2947 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2948 } 2949 #endif 2950 2951 #else // KMP_USE_DYNAMIC_LOCK 2952 2953 kmp_user_lock_p lck; 2954 2955 /* Can't use serial interval since not block structured */ 2956 /* release the lock */ 2957 2958 if ((__kmp_user_lock_kind == lk_tas) && 2959 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2960 #if KMP_OS_LINUX && \ 2961 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2962 // "fast" path implemented to fix customer performance issue 2963 #if USE_ITT_BUILD 2964 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2965 #endif /* USE_ITT_BUILD */ 2966 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2967 KMP_MB(); 2968 2969 #if OMPT_SUPPORT && OMPT_OPTIONAL 2970 // This is the case, if called from omp_init_lock_with_hint: 2971 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2972 if (!codeptr) 2973 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2974 if (ompt_enabled.ompt_callback_mutex_released) { 2975 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2976 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2977 } 2978 #endif 2979 2980 return; 2981 #else 2982 lck = (kmp_user_lock_p)user_lock; 2983 #endif 2984 } 2985 #if KMP_USE_FUTEX 2986 else if ((__kmp_user_lock_kind == lk_futex) && 2987 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2988 lck = (kmp_user_lock_p)user_lock; 2989 } 2990 #endif 2991 else { 2992 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2993 } 2994 2995 #if USE_ITT_BUILD 2996 __kmp_itt_lock_releasing(lck); 2997 #endif /* USE_ITT_BUILD */ 2998 2999 RELEASE_LOCK(lck, gtid); 3000 3001 #if OMPT_SUPPORT && OMPT_OPTIONAL 3002 // This is the case, if called from omp_init_lock_with_hint: 3003 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3004 if (!codeptr) 3005 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3006 if (ompt_enabled.ompt_callback_mutex_released) { 3007 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3008 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3009 } 3010 #endif 3011 3012 #endif // KMP_USE_DYNAMIC_LOCK 3013 } 3014 3015 /* release the lock */ 3016 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3017 #if KMP_USE_DYNAMIC_LOCK 3018 3019 #if USE_ITT_BUILD 3020 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 3021 #endif 3022 int release_status = 3023 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 3024 (void)release_status; 3025 3026 #if OMPT_SUPPORT && OMPT_OPTIONAL 3027 // This is the case, if called from omp_init_lock_with_hint: 3028 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3029 if (!codeptr) 3030 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3031 if (ompt_enabled.enabled) { 3032 if (release_status == KMP_LOCK_RELEASED) { 3033 if (ompt_enabled.ompt_callback_mutex_released) { 3034 // release_lock_last 3035 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3036 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3037 codeptr); 3038 } 3039 } else if (ompt_enabled.ompt_callback_nest_lock) { 3040 // release_lock_prev 3041 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3042 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3043 } 3044 } 3045 #endif 3046 3047 #else // KMP_USE_DYNAMIC_LOCK 3048 3049 kmp_user_lock_p lck; 3050 3051 /* Can't use serial interval since not block structured */ 3052 3053 if ((__kmp_user_lock_kind == lk_tas) && 3054 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3055 OMP_NEST_LOCK_T_SIZE)) { 3056 #if KMP_OS_LINUX && \ 3057 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 3058 // "fast" path implemented to fix customer performance issue 3059 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 3060 #if USE_ITT_BUILD 3061 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 3062 #endif /* USE_ITT_BUILD */ 3063 3064 #if OMPT_SUPPORT && OMPT_OPTIONAL 3065 int release_status = KMP_LOCK_STILL_HELD; 3066 #endif 3067 3068 if (--(tl->lk.depth_locked) == 0) { 3069 TCW_4(tl->lk.poll, 0); 3070 #if OMPT_SUPPORT && OMPT_OPTIONAL 3071 release_status = KMP_LOCK_RELEASED; 3072 #endif 3073 } 3074 KMP_MB(); 3075 3076 #if OMPT_SUPPORT && OMPT_OPTIONAL 3077 // This is the case, if called from omp_init_lock_with_hint: 3078 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3079 if (!codeptr) 3080 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3081 if (ompt_enabled.enabled) { 3082 if (release_status == KMP_LOCK_RELEASED) { 3083 if (ompt_enabled.ompt_callback_mutex_released) { 3084 // release_lock_last 3085 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3086 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3087 } 3088 } else if (ompt_enabled.ompt_callback_nest_lock) { 3089 // release_lock_previous 3090 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3091 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3092 } 3093 } 3094 #endif 3095 3096 return; 3097 #else 3098 lck = (kmp_user_lock_p)user_lock; 3099 #endif 3100 } 3101 #if KMP_USE_FUTEX 3102 else if ((__kmp_user_lock_kind == lk_futex) && 3103 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3104 OMP_NEST_LOCK_T_SIZE)) { 3105 lck = (kmp_user_lock_p)user_lock; 3106 } 3107 #endif 3108 else { 3109 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 3110 } 3111 3112 #if USE_ITT_BUILD 3113 __kmp_itt_lock_releasing(lck); 3114 #endif /* USE_ITT_BUILD */ 3115 3116 int release_status; 3117 release_status = RELEASE_NESTED_LOCK(lck, gtid); 3118 #if OMPT_SUPPORT && OMPT_OPTIONAL 3119 // This is the case, if called from omp_init_lock_with_hint: 3120 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3121 if (!codeptr) 3122 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3123 if (ompt_enabled.enabled) { 3124 if (release_status == KMP_LOCK_RELEASED) { 3125 if (ompt_enabled.ompt_callback_mutex_released) { 3126 // release_lock_last 3127 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3128 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3129 } 3130 } else if (ompt_enabled.ompt_callback_nest_lock) { 3131 // release_lock_previous 3132 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3133 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3134 } 3135 } 3136 #endif 3137 3138 #endif // KMP_USE_DYNAMIC_LOCK 3139 } 3140 3141 /* try to acquire the lock */ 3142 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3143 KMP_COUNT_BLOCK(OMP_test_lock); 3144 3145 #if KMP_USE_DYNAMIC_LOCK 3146 int rc; 3147 int tag = KMP_EXTRACT_D_TAG(user_lock); 3148 #if USE_ITT_BUILD 3149 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3150 #endif 3151 #if OMPT_SUPPORT && OMPT_OPTIONAL 3152 // This is the case, if called from omp_init_lock_with_hint: 3153 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3154 if (!codeptr) 3155 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3156 if (ompt_enabled.ompt_callback_mutex_acquire) { 3157 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3158 ompt_mutex_lock, omp_lock_hint_none, 3159 __ompt_get_mutex_impl_type(user_lock), 3160 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3161 } 3162 #endif 3163 #if KMP_USE_INLINED_TAS 3164 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3165 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3166 } else 3167 #elif KMP_USE_INLINED_FUTEX 3168 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3169 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3170 } else 3171 #endif 3172 { 3173 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3174 } 3175 if (rc) { 3176 #if USE_ITT_BUILD 3177 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3178 #endif 3179 #if OMPT_SUPPORT && OMPT_OPTIONAL 3180 if (ompt_enabled.ompt_callback_mutex_acquired) { 3181 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3182 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3183 } 3184 #endif 3185 return FTN_TRUE; 3186 } else { 3187 #if USE_ITT_BUILD 3188 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3189 #endif 3190 return FTN_FALSE; 3191 } 3192 3193 #else // KMP_USE_DYNAMIC_LOCK 3194 3195 kmp_user_lock_p lck; 3196 int rc; 3197 3198 if ((__kmp_user_lock_kind == lk_tas) && 3199 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3200 lck = (kmp_user_lock_p)user_lock; 3201 } 3202 #if KMP_USE_FUTEX 3203 else if ((__kmp_user_lock_kind == lk_futex) && 3204 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3205 lck = (kmp_user_lock_p)user_lock; 3206 } 3207 #endif 3208 else { 3209 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3210 } 3211 3212 #if USE_ITT_BUILD 3213 __kmp_itt_lock_acquiring(lck); 3214 #endif /* USE_ITT_BUILD */ 3215 #if OMPT_SUPPORT && OMPT_OPTIONAL 3216 // This is the case, if called from omp_init_lock_with_hint: 3217 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3218 if (!codeptr) 3219 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3220 if (ompt_enabled.ompt_callback_mutex_acquire) { 3221 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3222 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3223 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3224 } 3225 #endif 3226 3227 rc = TEST_LOCK(lck, gtid); 3228 #if USE_ITT_BUILD 3229 if (rc) { 3230 __kmp_itt_lock_acquired(lck); 3231 } else { 3232 __kmp_itt_lock_cancelled(lck); 3233 } 3234 #endif /* USE_ITT_BUILD */ 3235 #if OMPT_SUPPORT && OMPT_OPTIONAL 3236 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3237 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3238 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3239 } 3240 #endif 3241 3242 return (rc ? FTN_TRUE : FTN_FALSE); 3243 3244 /* Can't use serial interval since not block structured */ 3245 3246 #endif // KMP_USE_DYNAMIC_LOCK 3247 } 3248 3249 /* try to acquire the lock */ 3250 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3251 #if KMP_USE_DYNAMIC_LOCK 3252 int rc; 3253 #if USE_ITT_BUILD 3254 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3255 #endif 3256 #if OMPT_SUPPORT && OMPT_OPTIONAL 3257 // This is the case, if called from omp_init_lock_with_hint: 3258 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3259 if (!codeptr) 3260 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3261 if (ompt_enabled.ompt_callback_mutex_acquire) { 3262 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3263 ompt_mutex_nest_lock, omp_lock_hint_none, 3264 __ompt_get_mutex_impl_type(user_lock), 3265 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3266 } 3267 #endif 3268 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3269 #if USE_ITT_BUILD 3270 if (rc) { 3271 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3272 } else { 3273 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3274 } 3275 #endif 3276 #if OMPT_SUPPORT && OMPT_OPTIONAL 3277 if (ompt_enabled.enabled && rc) { 3278 if (rc == 1) { 3279 if (ompt_enabled.ompt_callback_mutex_acquired) { 3280 // lock_first 3281 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3282 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3283 codeptr); 3284 } 3285 } else { 3286 if (ompt_enabled.ompt_callback_nest_lock) { 3287 // lock_next 3288 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3289 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3290 } 3291 } 3292 } 3293 #endif 3294 return rc; 3295 3296 #else // KMP_USE_DYNAMIC_LOCK 3297 3298 kmp_user_lock_p lck; 3299 int rc; 3300 3301 if ((__kmp_user_lock_kind == lk_tas) && 3302 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3303 OMP_NEST_LOCK_T_SIZE)) { 3304 lck = (kmp_user_lock_p)user_lock; 3305 } 3306 #if KMP_USE_FUTEX 3307 else if ((__kmp_user_lock_kind == lk_futex) && 3308 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3309 OMP_NEST_LOCK_T_SIZE)) { 3310 lck = (kmp_user_lock_p)user_lock; 3311 } 3312 #endif 3313 else { 3314 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3315 } 3316 3317 #if USE_ITT_BUILD 3318 __kmp_itt_lock_acquiring(lck); 3319 #endif /* USE_ITT_BUILD */ 3320 3321 #if OMPT_SUPPORT && OMPT_OPTIONAL 3322 // This is the case, if called from omp_init_lock_with_hint: 3323 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3324 if (!codeptr) 3325 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3326 if (ompt_enabled.enabled) && 3327 ompt_enabled.ompt_callback_mutex_acquire) { 3328 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3329 ompt_mutex_nest_lock, omp_lock_hint_none, 3330 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3331 codeptr); 3332 } 3333 #endif 3334 3335 rc = TEST_NESTED_LOCK(lck, gtid); 3336 #if USE_ITT_BUILD 3337 if (rc) { 3338 __kmp_itt_lock_acquired(lck); 3339 } else { 3340 __kmp_itt_lock_cancelled(lck); 3341 } 3342 #endif /* USE_ITT_BUILD */ 3343 #if OMPT_SUPPORT && OMPT_OPTIONAL 3344 if (ompt_enabled.enabled && rc) { 3345 if (rc == 1) { 3346 if (ompt_enabled.ompt_callback_mutex_acquired) { 3347 // lock_first 3348 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3349 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3350 } 3351 } else { 3352 if (ompt_enabled.ompt_callback_nest_lock) { 3353 // lock_next 3354 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3355 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3356 } 3357 } 3358 } 3359 #endif 3360 return rc; 3361 3362 /* Can't use serial interval since not block structured */ 3363 3364 #endif // KMP_USE_DYNAMIC_LOCK 3365 } 3366 3367 // Interface to fast scalable reduce methods routines 3368 3369 // keep the selected method in a thread local structure for cross-function 3370 // usage: will be used in __kmpc_end_reduce* functions; 3371 // another solution: to re-determine the method one more time in 3372 // __kmpc_end_reduce* functions (new prototype required then) 3373 // AT: which solution is better? 3374 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3375 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3376 3377 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3378 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3379 3380 // description of the packed_reduction_method variable: look at the macros in 3381 // kmp.h 3382 3383 // used in a critical section reduce block 3384 static __forceinline void 3385 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3386 kmp_critical_name *crit) { 3387 3388 // this lock was visible to a customer and to the threading profile tool as a 3389 // serial overhead span (although it's used for an internal purpose only) 3390 // why was it visible in previous implementation? 3391 // should we keep it visible in new reduce block? 3392 kmp_user_lock_p lck; 3393 3394 #if KMP_USE_DYNAMIC_LOCK 3395 3396 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3397 // Check if it is initialized. 3398 if (*lk == 0) { 3399 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3400 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3401 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3402 } else { 3403 __kmp_init_indirect_csptr(crit, loc, global_tid, 3404 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3405 } 3406 } 3407 // Branch for accessing the actual lock object and set operation. This 3408 // branching is inevitable since this lock initialization does not follow the 3409 // normal dispatch path (lock table is not used). 3410 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3411 lck = (kmp_user_lock_p)lk; 3412 KMP_DEBUG_ASSERT(lck != NULL); 3413 if (__kmp_env_consistency_check) { 3414 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3415 } 3416 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3417 } else { 3418 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3419 lck = ilk->lock; 3420 KMP_DEBUG_ASSERT(lck != NULL); 3421 if (__kmp_env_consistency_check) { 3422 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3423 } 3424 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3425 } 3426 3427 #else // KMP_USE_DYNAMIC_LOCK 3428 3429 // We know that the fast reduction code is only emitted by Intel compilers 3430 // with 32 byte critical sections. If there isn't enough space, then we 3431 // have to use a pointer. 3432 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3433 lck = (kmp_user_lock_p)crit; 3434 } else { 3435 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3436 } 3437 KMP_DEBUG_ASSERT(lck != NULL); 3438 3439 if (__kmp_env_consistency_check) 3440 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3441 3442 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3443 3444 #endif // KMP_USE_DYNAMIC_LOCK 3445 } 3446 3447 // used in a critical section reduce block 3448 static __forceinline void 3449 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3450 kmp_critical_name *crit) { 3451 3452 kmp_user_lock_p lck; 3453 3454 #if KMP_USE_DYNAMIC_LOCK 3455 3456 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3457 lck = (kmp_user_lock_p)crit; 3458 if (__kmp_env_consistency_check) 3459 __kmp_pop_sync(global_tid, ct_critical, loc); 3460 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3461 } else { 3462 kmp_indirect_lock_t *ilk = 3463 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3464 if (__kmp_env_consistency_check) 3465 __kmp_pop_sync(global_tid, ct_critical, loc); 3466 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3467 } 3468 3469 #else // KMP_USE_DYNAMIC_LOCK 3470 3471 // We know that the fast reduction code is only emitted by Intel compilers 3472 // with 32 byte critical sections. If there isn't enough space, then we have 3473 // to use a pointer. 3474 if (__kmp_base_user_lock_size > 32) { 3475 lck = *((kmp_user_lock_p *)crit); 3476 KMP_ASSERT(lck != NULL); 3477 } else { 3478 lck = (kmp_user_lock_p)crit; 3479 } 3480 3481 if (__kmp_env_consistency_check) 3482 __kmp_pop_sync(global_tid, ct_critical, loc); 3483 3484 __kmp_release_user_lock_with_checks(lck, global_tid); 3485 3486 #endif // KMP_USE_DYNAMIC_LOCK 3487 } // __kmp_end_critical_section_reduce_block 3488 3489 static __forceinline int 3490 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3491 int *task_state) { 3492 kmp_team_t *team; 3493 3494 // Check if we are inside the teams construct? 3495 if (th->th.th_teams_microtask) { 3496 *team_p = team = th->th.th_team; 3497 if (team->t.t_level == th->th.th_teams_level) { 3498 // This is reduction at teams construct. 3499 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3500 // Let's swap teams temporarily for the reduction. 3501 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3502 th->th.th_team = team->t.t_parent; 3503 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3504 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3505 *task_state = th->th.th_task_state; 3506 th->th.th_task_state = 0; 3507 3508 return 1; 3509 } 3510 } 3511 return 0; 3512 } 3513 3514 static __forceinline void 3515 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3516 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3517 th->th.th_info.ds.ds_tid = 0; 3518 th->th.th_team = team; 3519 th->th.th_team_nproc = team->t.t_nproc; 3520 th->th.th_task_team = team->t.t_task_team[task_state]; 3521 __kmp_type_convert(task_state, &(th->th.th_task_state)); 3522 } 3523 3524 /* 2.a.i. Reduce Block without a terminating barrier */ 3525 /*! 3526 @ingroup SYNCHRONIZATION 3527 @param loc source location information 3528 @param global_tid global thread number 3529 @param num_vars number of items (variables) to be reduced 3530 @param reduce_size size of data in bytes to be reduced 3531 @param reduce_data pointer to data to be reduced 3532 @param reduce_func callback function providing reduction operation on two 3533 operands and returning result of reduction in lhs_data 3534 @param lck pointer to the unique lock data structure 3535 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3536 threads if atomic reduction needed 3537 3538 The nowait version is used for a reduce clause with the nowait argument. 3539 */ 3540 kmp_int32 3541 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3542 size_t reduce_size, void *reduce_data, 3543 void (*reduce_func)(void *lhs_data, void *rhs_data), 3544 kmp_critical_name *lck) { 3545 3546 KMP_COUNT_BLOCK(REDUCE_nowait); 3547 int retval = 0; 3548 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3549 kmp_info_t *th; 3550 kmp_team_t *team; 3551 int teams_swapped = 0, task_state; 3552 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3553 __kmp_assert_valid_gtid(global_tid); 3554 3555 // why do we need this initialization here at all? 3556 // Reduction clause can not be used as a stand-alone directive. 3557 3558 // do not call __kmp_serial_initialize(), it will be called by 3559 // __kmp_parallel_initialize() if needed 3560 // possible detection of false-positive race by the threadchecker ??? 3561 if (!TCR_4(__kmp_init_parallel)) 3562 __kmp_parallel_initialize(); 3563 3564 __kmp_resume_if_soft_paused(); 3565 3566 // check correctness of reduce block nesting 3567 #if KMP_USE_DYNAMIC_LOCK 3568 if (__kmp_env_consistency_check) 3569 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3570 #else 3571 if (__kmp_env_consistency_check) 3572 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3573 #endif 3574 3575 th = __kmp_thread_from_gtid(global_tid); 3576 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3577 3578 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3579 // the value should be kept in a variable 3580 // the variable should be either a construct-specific or thread-specific 3581 // property, not a team specific property 3582 // (a thread can reach the next reduce block on the next construct, reduce 3583 // method may differ on the next construct) 3584 // an ident_t "loc" parameter could be used as a construct-specific property 3585 // (what if loc == 0?) 3586 // (if both construct-specific and team-specific variables were shared, 3587 // then unness extra syncs should be needed) 3588 // a thread-specific variable is better regarding two issues above (next 3589 // construct and extra syncs) 3590 // a thread-specific "th_local.reduction_method" variable is used currently 3591 // each thread executes 'determine' and 'set' lines (no need to execute by one 3592 // thread, to avoid unness extra syncs) 3593 3594 packed_reduction_method = __kmp_determine_reduction_method( 3595 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3596 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3597 3598 OMPT_REDUCTION_DECL(th, global_tid); 3599 if (packed_reduction_method == critical_reduce_block) { 3600 3601 OMPT_REDUCTION_BEGIN; 3602 3603 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3604 retval = 1; 3605 3606 } else if (packed_reduction_method == empty_reduce_block) { 3607 3608 OMPT_REDUCTION_BEGIN; 3609 3610 // usage: if team size == 1, no synchronization is required ( Intel 3611 // platforms only ) 3612 retval = 1; 3613 3614 } else if (packed_reduction_method == atomic_reduce_block) { 3615 3616 retval = 2; 3617 3618 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3619 // won't be called by the code gen) 3620 // (it's not quite good, because the checking block has been closed by 3621 // this 'pop', 3622 // but atomic operation has not been executed yet, will be executed 3623 // slightly later, literally on next instruction) 3624 if (__kmp_env_consistency_check) 3625 __kmp_pop_sync(global_tid, ct_reduce, loc); 3626 3627 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3628 tree_reduce_block)) { 3629 3630 // AT: performance issue: a real barrier here 3631 // AT: (if primary thread is slow, other threads are blocked here waiting for 3632 // the primary thread to come and release them) 3633 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3634 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3635 // be confusing to a customer) 3636 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3637 // might go faster and be more in line with sense of NOWAIT 3638 // AT: TO DO: do epcc test and compare times 3639 3640 // this barrier should be invisible to a customer and to the threading profile 3641 // tool (it's neither a terminating barrier nor customer's code, it's 3642 // used for an internal purpose) 3643 #if OMPT_SUPPORT 3644 // JP: can this barrier potentially leed to task scheduling? 3645 // JP: as long as there is a barrier in the implementation, OMPT should and 3646 // will provide the barrier events 3647 // so we set-up the necessary frame/return addresses. 3648 ompt_frame_t *ompt_frame; 3649 if (ompt_enabled.enabled) { 3650 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3651 if (ompt_frame->enter_frame.ptr == NULL) 3652 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3653 } 3654 OMPT_STORE_RETURN_ADDRESS(global_tid); 3655 #endif 3656 #if USE_ITT_NOTIFY 3657 __kmp_threads[global_tid]->th.th_ident = loc; 3658 #endif 3659 retval = 3660 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3661 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3662 retval = (retval != 0) ? (0) : (1); 3663 #if OMPT_SUPPORT && OMPT_OPTIONAL 3664 if (ompt_enabled.enabled) { 3665 ompt_frame->enter_frame = ompt_data_none; 3666 } 3667 #endif 3668 3669 // all other workers except primary thread should do this pop here 3670 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3671 if (__kmp_env_consistency_check) { 3672 if (retval == 0) { 3673 __kmp_pop_sync(global_tid, ct_reduce, loc); 3674 } 3675 } 3676 3677 } else { 3678 3679 // should never reach this block 3680 KMP_ASSERT(0); // "unexpected method" 3681 } 3682 if (teams_swapped) { 3683 __kmp_restore_swapped_teams(th, team, task_state); 3684 } 3685 KA_TRACE( 3686 10, 3687 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3688 global_tid, packed_reduction_method, retval)); 3689 3690 return retval; 3691 } 3692 3693 /*! 3694 @ingroup SYNCHRONIZATION 3695 @param loc source location information 3696 @param global_tid global thread id. 3697 @param lck pointer to the unique lock data structure 3698 3699 Finish the execution of a reduce nowait. 3700 */ 3701 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3702 kmp_critical_name *lck) { 3703 3704 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3705 3706 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3707 __kmp_assert_valid_gtid(global_tid); 3708 3709 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3710 3711 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3712 3713 if (packed_reduction_method == critical_reduce_block) { 3714 3715 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3716 OMPT_REDUCTION_END; 3717 3718 } else if (packed_reduction_method == empty_reduce_block) { 3719 3720 // usage: if team size == 1, no synchronization is required ( on Intel 3721 // platforms only ) 3722 3723 OMPT_REDUCTION_END; 3724 3725 } else if (packed_reduction_method == atomic_reduce_block) { 3726 3727 // neither primary thread nor other workers should get here 3728 // (code gen does not generate this call in case 2: atomic reduce block) 3729 // actually it's better to remove this elseif at all; 3730 // after removal this value will checked by the 'else' and will assert 3731 3732 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3733 tree_reduce_block)) { 3734 3735 // only primary thread gets here 3736 // OMPT: tree reduction is annotated in the barrier code 3737 3738 } else { 3739 3740 // should never reach this block 3741 KMP_ASSERT(0); // "unexpected method" 3742 } 3743 3744 if (__kmp_env_consistency_check) 3745 __kmp_pop_sync(global_tid, ct_reduce, loc); 3746 3747 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3748 global_tid, packed_reduction_method)); 3749 3750 return; 3751 } 3752 3753 /* 2.a.ii. Reduce Block with a terminating barrier */ 3754 3755 /*! 3756 @ingroup SYNCHRONIZATION 3757 @param loc source location information 3758 @param global_tid global thread number 3759 @param num_vars number of items (variables) to be reduced 3760 @param reduce_size size of data in bytes to be reduced 3761 @param reduce_data pointer to data to be reduced 3762 @param reduce_func callback function providing reduction operation on two 3763 operands and returning result of reduction in lhs_data 3764 @param lck pointer to the unique lock data structure 3765 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3766 threads if atomic reduction needed 3767 3768 A blocking reduce that includes an implicit barrier. 3769 */ 3770 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3771 size_t reduce_size, void *reduce_data, 3772 void (*reduce_func)(void *lhs_data, void *rhs_data), 3773 kmp_critical_name *lck) { 3774 KMP_COUNT_BLOCK(REDUCE_wait); 3775 int retval = 0; 3776 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3777 kmp_info_t *th; 3778 kmp_team_t *team; 3779 int teams_swapped = 0, task_state; 3780 3781 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3782 __kmp_assert_valid_gtid(global_tid); 3783 3784 // why do we need this initialization here at all? 3785 // Reduction clause can not be a stand-alone directive. 3786 3787 // do not call __kmp_serial_initialize(), it will be called by 3788 // __kmp_parallel_initialize() if needed 3789 // possible detection of false-positive race by the threadchecker ??? 3790 if (!TCR_4(__kmp_init_parallel)) 3791 __kmp_parallel_initialize(); 3792 3793 __kmp_resume_if_soft_paused(); 3794 3795 // check correctness of reduce block nesting 3796 #if KMP_USE_DYNAMIC_LOCK 3797 if (__kmp_env_consistency_check) 3798 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3799 #else 3800 if (__kmp_env_consistency_check) 3801 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3802 #endif 3803 3804 th = __kmp_thread_from_gtid(global_tid); 3805 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3806 3807 packed_reduction_method = __kmp_determine_reduction_method( 3808 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3809 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3810 3811 OMPT_REDUCTION_DECL(th, global_tid); 3812 3813 if (packed_reduction_method == critical_reduce_block) { 3814 3815 OMPT_REDUCTION_BEGIN; 3816 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3817 retval = 1; 3818 3819 } else if (packed_reduction_method == empty_reduce_block) { 3820 3821 OMPT_REDUCTION_BEGIN; 3822 // usage: if team size == 1, no synchronization is required ( Intel 3823 // platforms only ) 3824 retval = 1; 3825 3826 } else if (packed_reduction_method == atomic_reduce_block) { 3827 3828 retval = 2; 3829 3830 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3831 tree_reduce_block)) { 3832 3833 // case tree_reduce_block: 3834 // this barrier should be visible to a customer and to the threading profile 3835 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3836 #if OMPT_SUPPORT 3837 ompt_frame_t *ompt_frame; 3838 if (ompt_enabled.enabled) { 3839 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3840 if (ompt_frame->enter_frame.ptr == NULL) 3841 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3842 } 3843 OMPT_STORE_RETURN_ADDRESS(global_tid); 3844 #endif 3845 #if USE_ITT_NOTIFY 3846 __kmp_threads[global_tid]->th.th_ident = 3847 loc; // needed for correct notification of frames 3848 #endif 3849 retval = 3850 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3851 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3852 retval = (retval != 0) ? (0) : (1); 3853 #if OMPT_SUPPORT && OMPT_OPTIONAL 3854 if (ompt_enabled.enabled) { 3855 ompt_frame->enter_frame = ompt_data_none; 3856 } 3857 #endif 3858 3859 // all other workers except primary thread should do this pop here 3860 // (none of other workers except primary will enter __kmpc_end_reduce()) 3861 if (__kmp_env_consistency_check) { 3862 if (retval == 0) { // 0: all other workers; 1: primary thread 3863 __kmp_pop_sync(global_tid, ct_reduce, loc); 3864 } 3865 } 3866 3867 } else { 3868 3869 // should never reach this block 3870 KMP_ASSERT(0); // "unexpected method" 3871 } 3872 if (teams_swapped) { 3873 __kmp_restore_swapped_teams(th, team, task_state); 3874 } 3875 3876 KA_TRACE(10, 3877 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3878 global_tid, packed_reduction_method, retval)); 3879 return retval; 3880 } 3881 3882 /*! 3883 @ingroup SYNCHRONIZATION 3884 @param loc source location information 3885 @param global_tid global thread id. 3886 @param lck pointer to the unique lock data structure 3887 3888 Finish the execution of a blocking reduce. 3889 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3890 start function. 3891 */ 3892 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3893 kmp_critical_name *lck) { 3894 3895 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3896 kmp_info_t *th; 3897 kmp_team_t *team; 3898 int teams_swapped = 0, task_state; 3899 3900 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3901 __kmp_assert_valid_gtid(global_tid); 3902 3903 th = __kmp_thread_from_gtid(global_tid); 3904 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3905 3906 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3907 3908 // this barrier should be visible to a customer and to the threading profile 3909 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3910 OMPT_REDUCTION_DECL(th, global_tid); 3911 3912 if (packed_reduction_method == critical_reduce_block) { 3913 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3914 3915 OMPT_REDUCTION_END; 3916 3917 // TODO: implicit barrier: should be exposed 3918 #if OMPT_SUPPORT 3919 ompt_frame_t *ompt_frame; 3920 if (ompt_enabled.enabled) { 3921 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3922 if (ompt_frame->enter_frame.ptr == NULL) 3923 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3924 } 3925 OMPT_STORE_RETURN_ADDRESS(global_tid); 3926 #endif 3927 #if USE_ITT_NOTIFY 3928 __kmp_threads[global_tid]->th.th_ident = loc; 3929 #endif 3930 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3931 #if OMPT_SUPPORT && OMPT_OPTIONAL 3932 if (ompt_enabled.enabled) { 3933 ompt_frame->enter_frame = ompt_data_none; 3934 } 3935 #endif 3936 3937 } else if (packed_reduction_method == empty_reduce_block) { 3938 3939 OMPT_REDUCTION_END; 3940 3941 // usage: if team size==1, no synchronization is required (Intel platforms only) 3942 3943 // TODO: implicit barrier: should be exposed 3944 #if OMPT_SUPPORT 3945 ompt_frame_t *ompt_frame; 3946 if (ompt_enabled.enabled) { 3947 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3948 if (ompt_frame->enter_frame.ptr == NULL) 3949 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3950 } 3951 OMPT_STORE_RETURN_ADDRESS(global_tid); 3952 #endif 3953 #if USE_ITT_NOTIFY 3954 __kmp_threads[global_tid]->th.th_ident = loc; 3955 #endif 3956 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3957 #if OMPT_SUPPORT && OMPT_OPTIONAL 3958 if (ompt_enabled.enabled) { 3959 ompt_frame->enter_frame = ompt_data_none; 3960 } 3961 #endif 3962 3963 } else if (packed_reduction_method == atomic_reduce_block) { 3964 3965 #if OMPT_SUPPORT 3966 ompt_frame_t *ompt_frame; 3967 if (ompt_enabled.enabled) { 3968 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3969 if (ompt_frame->enter_frame.ptr == NULL) 3970 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3971 } 3972 OMPT_STORE_RETURN_ADDRESS(global_tid); 3973 #endif 3974 // TODO: implicit barrier: should be exposed 3975 #if USE_ITT_NOTIFY 3976 __kmp_threads[global_tid]->th.th_ident = loc; 3977 #endif 3978 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3979 #if OMPT_SUPPORT && OMPT_OPTIONAL 3980 if (ompt_enabled.enabled) { 3981 ompt_frame->enter_frame = ompt_data_none; 3982 } 3983 #endif 3984 3985 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3986 tree_reduce_block)) { 3987 3988 // only primary thread executes here (primary releases all other workers) 3989 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3990 global_tid); 3991 3992 } else { 3993 3994 // should never reach this block 3995 KMP_ASSERT(0); // "unexpected method" 3996 } 3997 if (teams_swapped) { 3998 __kmp_restore_swapped_teams(th, team, task_state); 3999 } 4000 4001 if (__kmp_env_consistency_check) 4002 __kmp_pop_sync(global_tid, ct_reduce, loc); 4003 4004 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 4005 global_tid, packed_reduction_method)); 4006 4007 return; 4008 } 4009 4010 #undef __KMP_GET_REDUCTION_METHOD 4011 #undef __KMP_SET_REDUCTION_METHOD 4012 4013 /* end of interface to fast scalable reduce routines */ 4014 4015 kmp_uint64 __kmpc_get_taskid() { 4016 4017 kmp_int32 gtid; 4018 kmp_info_t *thread; 4019 4020 gtid = __kmp_get_gtid(); 4021 if (gtid < 0) { 4022 return 0; 4023 } 4024 thread = __kmp_thread_from_gtid(gtid); 4025 return thread->th.th_current_task->td_task_id; 4026 4027 } // __kmpc_get_taskid 4028 4029 kmp_uint64 __kmpc_get_parent_taskid() { 4030 4031 kmp_int32 gtid; 4032 kmp_info_t *thread; 4033 kmp_taskdata_t *parent_task; 4034 4035 gtid = __kmp_get_gtid(); 4036 if (gtid < 0) { 4037 return 0; 4038 } 4039 thread = __kmp_thread_from_gtid(gtid); 4040 parent_task = thread->th.th_current_task->td_parent; 4041 return (parent_task == NULL ? 0 : parent_task->td_task_id); 4042 4043 } // __kmpc_get_parent_taskid 4044 4045 /*! 4046 @ingroup WORK_SHARING 4047 @param loc source location information. 4048 @param gtid global thread number. 4049 @param num_dims number of associated doacross loops. 4050 @param dims info on loops bounds. 4051 4052 Initialize doacross loop information. 4053 Expect compiler send us inclusive bounds, 4054 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 4055 */ 4056 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 4057 const struct kmp_dim *dims) { 4058 __kmp_assert_valid_gtid(gtid); 4059 int j, idx; 4060 kmp_int64 last, trace_count; 4061 kmp_info_t *th = __kmp_threads[gtid]; 4062 kmp_team_t *team = th->th.th_team; 4063 kmp_uint32 *flags; 4064 kmp_disp_t *pr_buf = th->th.th_dispatch; 4065 dispatch_shared_info_t *sh_buf; 4066 4067 KA_TRACE( 4068 20, 4069 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 4070 gtid, num_dims, !team->t.t_serialized)); 4071 KMP_DEBUG_ASSERT(dims != NULL); 4072 KMP_DEBUG_ASSERT(num_dims > 0); 4073 4074 if (team->t.t_serialized) { 4075 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 4076 return; // no dependencies if team is serialized 4077 } 4078 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 4079 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 4080 // the next loop 4081 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4082 4083 // Save bounds info into allocated private buffer 4084 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 4085 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 4086 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 4087 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4088 pr_buf->th_doacross_info[0] = 4089 (kmp_int64)num_dims; // first element is number of dimensions 4090 // Save also address of num_done in order to access it later without knowing 4091 // the buffer index 4092 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 4093 pr_buf->th_doacross_info[2] = dims[0].lo; 4094 pr_buf->th_doacross_info[3] = dims[0].up; 4095 pr_buf->th_doacross_info[4] = dims[0].st; 4096 last = 5; 4097 for (j = 1; j < num_dims; ++j) { 4098 kmp_int64 4099 range_length; // To keep ranges of all dimensions but the first dims[0] 4100 if (dims[j].st == 1) { // most common case 4101 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 4102 range_length = dims[j].up - dims[j].lo + 1; 4103 } else { 4104 if (dims[j].st > 0) { 4105 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 4106 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 4107 } else { // negative increment 4108 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 4109 range_length = 4110 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 4111 } 4112 } 4113 pr_buf->th_doacross_info[last++] = range_length; 4114 pr_buf->th_doacross_info[last++] = dims[j].lo; 4115 pr_buf->th_doacross_info[last++] = dims[j].up; 4116 pr_buf->th_doacross_info[last++] = dims[j].st; 4117 } 4118 4119 // Compute total trip count. 4120 // Start with range of dims[0] which we don't need to keep in the buffer. 4121 if (dims[0].st == 1) { // most common case 4122 trace_count = dims[0].up - dims[0].lo + 1; 4123 } else if (dims[0].st > 0) { 4124 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 4125 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 4126 } else { // negative increment 4127 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 4128 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 4129 } 4130 for (j = 1; j < num_dims; ++j) { 4131 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 4132 } 4133 KMP_DEBUG_ASSERT(trace_count > 0); 4134 4135 // Check if shared buffer is not occupied by other loop (idx - 4136 // __kmp_dispatch_num_buffers) 4137 if (idx != sh_buf->doacross_buf_idx) { 4138 // Shared buffer is occupied, wait for it to be free 4139 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 4140 __kmp_eq_4, NULL); 4141 } 4142 #if KMP_32_BIT_ARCH 4143 // Check if we are the first thread. After the CAS the first thread gets 0, 4144 // others get 1 if initialization is in progress, allocated pointer otherwise. 4145 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 4146 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 4147 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 4148 #else 4149 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 4150 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 4151 #endif 4152 if (flags == NULL) { 4153 // we are the first thread, allocate the array of flags 4154 size_t size = 4155 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration 4156 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 4157 KMP_MB(); 4158 sh_buf->doacross_flags = flags; 4159 } else if (flags == (kmp_uint32 *)1) { 4160 #if KMP_32_BIT_ARCH 4161 // initialization is still in progress, need to wait 4162 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 4163 #else 4164 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 4165 #endif 4166 KMP_YIELD(TRUE); 4167 KMP_MB(); 4168 } else { 4169 KMP_MB(); 4170 } 4171 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4172 pr_buf->th_doacross_flags = 4173 sh_buf->doacross_flags; // save private copy in order to not 4174 // touch shared buffer on each iteration 4175 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4176 } 4177 4178 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4179 __kmp_assert_valid_gtid(gtid); 4180 kmp_int64 shft; 4181 size_t num_dims, i; 4182 kmp_uint32 flag; 4183 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4184 kmp_info_t *th = __kmp_threads[gtid]; 4185 kmp_team_t *team = th->th.th_team; 4186 kmp_disp_t *pr_buf; 4187 kmp_int64 lo, up, st; 4188 4189 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4190 if (team->t.t_serialized) { 4191 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4192 return; // no dependencies if team is serialized 4193 } 4194 4195 // calculate sequential iteration number and check out-of-bounds condition 4196 pr_buf = th->th.th_dispatch; 4197 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4198 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4199 lo = pr_buf->th_doacross_info[2]; 4200 up = pr_buf->th_doacross_info[3]; 4201 st = pr_buf->th_doacross_info[4]; 4202 #if OMPT_SUPPORT && OMPT_OPTIONAL 4203 ompt_dependence_t deps[num_dims]; 4204 #endif 4205 if (st == 1) { // most common case 4206 if (vec[0] < lo || vec[0] > up) { 4207 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4208 "bounds [%lld,%lld]\n", 4209 gtid, vec[0], lo, up)); 4210 return; 4211 } 4212 iter_number = vec[0] - lo; 4213 } else if (st > 0) { 4214 if (vec[0] < lo || vec[0] > up) { 4215 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4216 "bounds [%lld,%lld]\n", 4217 gtid, vec[0], lo, up)); 4218 return; 4219 } 4220 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4221 } else { // negative increment 4222 if (vec[0] > lo || vec[0] < up) { 4223 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4224 "bounds [%lld,%lld]\n", 4225 gtid, vec[0], lo, up)); 4226 return; 4227 } 4228 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4229 } 4230 #if OMPT_SUPPORT && OMPT_OPTIONAL 4231 deps[0].variable.value = iter_number; 4232 deps[0].dependence_type = ompt_dependence_type_sink; 4233 #endif 4234 for (i = 1; i < num_dims; ++i) { 4235 kmp_int64 iter, ln; 4236 size_t j = i * 4; 4237 ln = pr_buf->th_doacross_info[j + 1]; 4238 lo = pr_buf->th_doacross_info[j + 2]; 4239 up = pr_buf->th_doacross_info[j + 3]; 4240 st = pr_buf->th_doacross_info[j + 4]; 4241 if (st == 1) { 4242 if (vec[i] < lo || vec[i] > up) { 4243 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4244 "bounds [%lld,%lld]\n", 4245 gtid, vec[i], lo, up)); 4246 return; 4247 } 4248 iter = vec[i] - lo; 4249 } else if (st > 0) { 4250 if (vec[i] < lo || vec[i] > up) { 4251 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4252 "bounds [%lld,%lld]\n", 4253 gtid, vec[i], lo, up)); 4254 return; 4255 } 4256 iter = (kmp_uint64)(vec[i] - lo) / st; 4257 } else { // st < 0 4258 if (vec[i] > lo || vec[i] < up) { 4259 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4260 "bounds [%lld,%lld]\n", 4261 gtid, vec[i], lo, up)); 4262 return; 4263 } 4264 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4265 } 4266 iter_number = iter + ln * iter_number; 4267 #if OMPT_SUPPORT && OMPT_OPTIONAL 4268 deps[i].variable.value = iter; 4269 deps[i].dependence_type = ompt_dependence_type_sink; 4270 #endif 4271 } 4272 shft = iter_number % 32; // use 32-bit granularity 4273 iter_number >>= 5; // divided by 32 4274 flag = 1 << shft; 4275 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4276 KMP_YIELD(TRUE); 4277 } 4278 KMP_MB(); 4279 #if OMPT_SUPPORT && OMPT_OPTIONAL 4280 if (ompt_enabled.ompt_callback_dependences) { 4281 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4282 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4283 } 4284 #endif 4285 KA_TRACE(20, 4286 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4287 gtid, (iter_number << 5) + shft)); 4288 } 4289 4290 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4291 __kmp_assert_valid_gtid(gtid); 4292 kmp_int64 shft; 4293 size_t num_dims, i; 4294 kmp_uint32 flag; 4295 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4296 kmp_info_t *th = __kmp_threads[gtid]; 4297 kmp_team_t *team = th->th.th_team; 4298 kmp_disp_t *pr_buf; 4299 kmp_int64 lo, st; 4300 4301 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4302 if (team->t.t_serialized) { 4303 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4304 return; // no dependencies if team is serialized 4305 } 4306 4307 // calculate sequential iteration number (same as in "wait" but no 4308 // out-of-bounds checks) 4309 pr_buf = th->th.th_dispatch; 4310 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4311 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4312 lo = pr_buf->th_doacross_info[2]; 4313 st = pr_buf->th_doacross_info[4]; 4314 #if OMPT_SUPPORT && OMPT_OPTIONAL 4315 ompt_dependence_t deps[num_dims]; 4316 #endif 4317 if (st == 1) { // most common case 4318 iter_number = vec[0] - lo; 4319 } else if (st > 0) { 4320 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4321 } else { // negative increment 4322 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4323 } 4324 #if OMPT_SUPPORT && OMPT_OPTIONAL 4325 deps[0].variable.value = iter_number; 4326 deps[0].dependence_type = ompt_dependence_type_source; 4327 #endif 4328 for (i = 1; i < num_dims; ++i) { 4329 kmp_int64 iter, ln; 4330 size_t j = i * 4; 4331 ln = pr_buf->th_doacross_info[j + 1]; 4332 lo = pr_buf->th_doacross_info[j + 2]; 4333 st = pr_buf->th_doacross_info[j + 4]; 4334 if (st == 1) { 4335 iter = vec[i] - lo; 4336 } else if (st > 0) { 4337 iter = (kmp_uint64)(vec[i] - lo) / st; 4338 } else { // st < 0 4339 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4340 } 4341 iter_number = iter + ln * iter_number; 4342 #if OMPT_SUPPORT && OMPT_OPTIONAL 4343 deps[i].variable.value = iter; 4344 deps[i].dependence_type = ompt_dependence_type_source; 4345 #endif 4346 } 4347 #if OMPT_SUPPORT && OMPT_OPTIONAL 4348 if (ompt_enabled.ompt_callback_dependences) { 4349 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4350 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4351 } 4352 #endif 4353 shft = iter_number % 32; // use 32-bit granularity 4354 iter_number >>= 5; // divided by 32 4355 flag = 1 << shft; 4356 KMP_MB(); 4357 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4358 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4359 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4360 (iter_number << 5) + shft)); 4361 } 4362 4363 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4364 __kmp_assert_valid_gtid(gtid); 4365 kmp_int32 num_done; 4366 kmp_info_t *th = __kmp_threads[gtid]; 4367 kmp_team_t *team = th->th.th_team; 4368 kmp_disp_t *pr_buf = th->th.th_dispatch; 4369 4370 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4371 if (team->t.t_serialized) { 4372 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4373 return; // nothing to do 4374 } 4375 num_done = 4376 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1; 4377 if (num_done == th->th.th_team_nproc) { 4378 // we are the last thread, need to free shared resources 4379 int idx = pr_buf->th_doacross_buf_idx - 1; 4380 dispatch_shared_info_t *sh_buf = 4381 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4382 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4383 (kmp_int64)&sh_buf->doacross_num_done); 4384 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4385 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4386 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4387 sh_buf->doacross_flags = NULL; 4388 sh_buf->doacross_num_done = 0; 4389 sh_buf->doacross_buf_idx += 4390 __kmp_dispatch_num_buffers; // free buffer for future re-use 4391 } 4392 // free private resources (need to keep buffer index forever) 4393 pr_buf->th_doacross_flags = NULL; 4394 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4395 pr_buf->th_doacross_info = NULL; 4396 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4397 } 4398 4399 /* OpenMP 5.1 Memory Management routines */ 4400 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4401 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator); 4402 } 4403 4404 void *omp_aligned_alloc(size_t align, size_t size, 4405 omp_allocator_handle_t allocator) { 4406 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator); 4407 } 4408 4409 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { 4410 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator); 4411 } 4412 4413 void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size, 4414 omp_allocator_handle_t allocator) { 4415 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator); 4416 } 4417 4418 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, 4419 omp_allocator_handle_t free_allocator) { 4420 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator, 4421 free_allocator); 4422 } 4423 4424 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4425 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4426 } 4427 /* end of OpenMP 5.1 Memory Management routines */ 4428 4429 int __kmpc_get_target_offload(void) { 4430 if (!__kmp_init_serial) { 4431 __kmp_serial_initialize(); 4432 } 4433 return __kmp_target_offload; 4434 } 4435 4436 int __kmpc_pause_resource(kmp_pause_status_t level) { 4437 if (!__kmp_init_serial) { 4438 return 1; // Can't pause if runtime is not initialized 4439 } 4440 return __kmp_pause_resource(level); 4441 } 4442 4443 void __kmpc_error(ident_t *loc, int severity, const char *message) { 4444 if (!__kmp_init_serial) 4445 __kmp_serial_initialize(); 4446 4447 KMP_ASSERT(severity == severity_warning || severity == severity_fatal); 4448 4449 #if OMPT_SUPPORT 4450 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) { 4451 ompt_callbacks.ompt_callback(ompt_callback_error)( 4452 (ompt_severity_t)severity, message, KMP_STRLEN(message), 4453 OMPT_GET_RETURN_ADDRESS(0)); 4454 } 4455 #endif // OMPT_SUPPORT 4456 4457 char *src_loc; 4458 if (loc && loc->psource) { 4459 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false); 4460 src_loc = 4461 __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col); 4462 __kmp_str_loc_free(&str_loc); 4463 } else { 4464 src_loc = __kmp_str_format("unknown"); 4465 } 4466 4467 if (severity == severity_warning) 4468 KMP_WARNING(UserDirectedWarning, src_loc, message); 4469 else 4470 KMP_FATAL(UserDirectedError, src_loc, message); 4471 4472 __kmp_str_free(&src_loc); 4473 } 4474 4475 // Mark begin of scope directive. 4476 void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { 4477 // reserved is for extension of scope directive and not used. 4478 #if OMPT_SUPPORT && OMPT_OPTIONAL 4479 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { 4480 kmp_team_t *team = __kmp_threads[gtid]->th.th_team; 4481 int tid = __kmp_tid_from_gtid(gtid); 4482 ompt_callbacks.ompt_callback(ompt_callback_work)( 4483 ompt_work_scope, ompt_scope_begin, 4484 &(team->t.ompt_team_info.parallel_data), 4485 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 4486 OMPT_GET_RETURN_ADDRESS(0)); 4487 } 4488 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 4489 } 4490 4491 // Mark end of scope directive 4492 void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { 4493 // reserved is for extension of scope directive and not used. 4494 #if OMPT_SUPPORT && OMPT_OPTIONAL 4495 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { 4496 kmp_team_t *team = __kmp_threads[gtid]->th.th_team; 4497 int tid = __kmp_tid_from_gtid(gtid); 4498 ompt_callbacks.ompt_callback(ompt_callback_work)( 4499 ompt_work_scope, ompt_scope_end, 4500 &(team->t.ompt_team_info.parallel_data), 4501 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 4502 OMPT_GET_RETURN_ADDRESS(0)); 4503 } 4504 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 4505 } 4506 4507 #ifdef KMP_USE_VERSION_SYMBOLS 4508 // For GOMP compatibility there are two versions of each omp_* API. 4509 // One is the plain C symbol and one is the Fortran symbol with an appended 4510 // underscore. When we implement a specific ompc_* version of an omp_* 4511 // function, we want the plain GOMP versioned symbol to alias the ompc_* version 4512 // instead of the Fortran versions in kmp_ftn_entry.h 4513 extern "C" { 4514 // Have to undef these from omp.h so they aren't translated into 4515 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below 4516 #ifdef omp_set_affinity_format 4517 #undef omp_set_affinity_format 4518 #endif 4519 #ifdef omp_get_affinity_format 4520 #undef omp_get_affinity_format 4521 #endif 4522 #ifdef omp_display_affinity 4523 #undef omp_display_affinity 4524 #endif 4525 #ifdef omp_capture_affinity 4526 #undef omp_capture_affinity 4527 #endif 4528 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50, 4529 "OMP_5.0"); 4530 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50, 4531 "OMP_5.0"); 4532 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50, 4533 "OMP_5.0"); 4534 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50, 4535 "OMP_5.0"); 4536 } // extern "C" 4537 #endif 4538