1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 43 } else if (__kmp_ignore_mppbeg() == FALSE) { 44 // By default __kmp_ignore_mppbeg() returns TRUE. 45 __kmp_internal_begin(); 46 KC_TRACE(10, ("__kmpc_begin: called\n")); 47 } 48 } 49 50 /*! 51 * @ingroup STARTUP_SHUTDOWN 52 * @param loc source location information 53 * 54 * Shutdown the runtime library. This is also optional, and even if called will 55 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 56 * zero. 57 */ 58 void __kmpc_end(ident_t *loc) { 59 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 60 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 61 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 62 // returns FALSE and __kmpc_end() will unregister this root (it can cause 63 // library shut down). 64 if (__kmp_ignore_mppend() == FALSE) { 65 KC_TRACE(10, ("__kmpc_end: called\n")); 66 KA_TRACE(30, ("__kmpc_end\n")); 67 68 __kmp_internal_end_thread(-1); 69 } 70 #if KMP_OS_WINDOWS && OMPT_SUPPORT 71 // Normal exit process on Windows does not allow worker threads of the final 72 // parallel region to finish reporting their events, so shutting down the 73 // library here fixes the issue at least for the cases where __kmpc_end() is 74 // placed properly. 75 if (ompt_enabled.enabled) 76 __kmp_internal_end_library(__kmp_gtid_get_specific()); 77 #endif 78 } 79 80 /*! 81 @ingroup THREAD_STATES 82 @param loc Source location information. 83 @return The global thread index of the active thread. 84 85 This function can be called in any context. 86 87 If the runtime has ony been entered at the outermost level from a 88 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 89 that which would be returned by omp_get_thread_num() in the outermost 90 active parallel construct. (Or zero if there is no active parallel 91 construct, since the master thread is necessarily thread zero). 92 93 If multiple non-OpenMP threads all enter an OpenMP construct then this 94 will be a unique thread identifier among all the threads created by 95 the OpenMP runtime (but the value cannot be defined in terms of 96 OpenMP thread ids returned by omp_get_thread_num()). 97 */ 98 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 99 kmp_int32 gtid = __kmp_entry_gtid(); 100 101 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 102 103 return gtid; 104 } 105 106 /*! 107 @ingroup THREAD_STATES 108 @param loc Source location information. 109 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 110 111 This function can be called in any context. 112 It returns the total number of threads under the control of the OpenMP runtime. 113 That is not a number that can be determined by any OpenMP standard calls, since 114 the library may be called from more than one non-OpenMP thread, and this 115 reflects the total over all such calls. Similarly the runtime maintains 116 underlying threads even when they are not active (since the cost of creating 117 and destroying OS threads is high), this call counts all such threads even if 118 they are not waiting for work. 119 */ 120 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 121 KC_TRACE(10, 122 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 123 124 return TCR_4(__kmp_all_nth); 125 } 126 127 /*! 128 @ingroup THREAD_STATES 129 @param loc Source location information. 130 @return The thread number of the calling thread in the innermost active parallel 131 construct. 132 */ 133 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 134 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 135 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 136 } 137 138 /*! 139 @ingroup THREAD_STATES 140 @param loc Source location information. 141 @return The number of threads in the innermost active parallel construct. 142 */ 143 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 144 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 145 146 return __kmp_entry_thread()->th.th_team->t.t_nproc; 147 } 148 149 /*! 150 * @ingroup DEPRECATED 151 * @param loc location description 152 * 153 * This function need not be called. It always returns TRUE. 154 */ 155 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 156 #ifndef KMP_DEBUG 157 158 return TRUE; 159 160 #else 161 162 const char *semi2; 163 const char *semi3; 164 int line_no; 165 166 if (__kmp_par_range == 0) { 167 return TRUE; 168 } 169 semi2 = loc->psource; 170 if (semi2 == NULL) { 171 return TRUE; 172 } 173 semi2 = strchr(semi2, ';'); 174 if (semi2 == NULL) { 175 return TRUE; 176 } 177 semi2 = strchr(semi2 + 1, ';'); 178 if (semi2 == NULL) { 179 return TRUE; 180 } 181 if (__kmp_par_range_filename[0]) { 182 const char *name = semi2 - 1; 183 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 184 name--; 185 } 186 if ((*name == '/') || (*name == ';')) { 187 name++; 188 } 189 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 190 return __kmp_par_range < 0; 191 } 192 } 193 semi3 = strchr(semi2 + 1, ';'); 194 if (__kmp_par_range_routine[0]) { 195 if ((semi3 != NULL) && (semi3 > semi2) && 196 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 197 return __kmp_par_range < 0; 198 } 199 } 200 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 201 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 202 return __kmp_par_range > 0; 203 } 204 return __kmp_par_range < 0; 205 } 206 return TRUE; 207 208 #endif /* KMP_DEBUG */ 209 } 210 211 /*! 212 @ingroup THREAD_STATES 213 @param loc Source location information. 214 @return 1 if this thread is executing inside an active parallel region, zero if 215 not. 216 */ 217 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 218 return __kmp_entry_thread()->th.th_root->r.r_active; 219 } 220 221 /*! 222 @ingroup PARALLEL 223 @param loc source location information 224 @param global_tid global thread number 225 @param num_threads number of threads requested for this parallel construct 226 227 Set the number of threads to be used by the next fork spawned by this thread. 228 This call is only required if the parallel construct has a `num_threads` clause. 229 */ 230 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 231 kmp_int32 num_threads) { 232 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 233 global_tid, num_threads)); 234 __kmp_assert_valid_gtid(global_tid); 235 __kmp_push_num_threads(loc, global_tid, num_threads); 236 } 237 238 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 239 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 240 /* the num_threads are automatically popped */ 241 } 242 243 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 244 kmp_int32 proc_bind) { 245 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 246 proc_bind)); 247 __kmp_assert_valid_gtid(global_tid); 248 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 249 } 250 251 /*! 252 @ingroup PARALLEL 253 @param loc source location information 254 @param argc total number of arguments in the ellipsis 255 @param microtask pointer to callback routine consisting of outlined parallel 256 construct 257 @param ... pointers to shared variables that aren't global 258 259 Do the actual fork and call the microtask in the relevant number of threads. 260 */ 261 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 262 int gtid = __kmp_entry_gtid(); 263 264 #if (KMP_STATS_ENABLED) 265 // If we were in a serial region, then stop the serial timer, record 266 // the event, and start parallel region timer 267 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 268 if (previous_state == stats_state_e::SERIAL_REGION) { 269 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 270 } else { 271 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 272 } 273 int inParallel = __kmpc_in_parallel(loc); 274 if (inParallel) { 275 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 276 } else { 277 KMP_COUNT_BLOCK(OMP_PARALLEL); 278 } 279 #endif 280 281 // maybe to save thr_state is enough here 282 { 283 va_list ap; 284 va_start(ap, microtask); 285 286 #if OMPT_SUPPORT 287 ompt_frame_t *ompt_frame; 288 if (ompt_enabled.enabled) { 289 kmp_info_t *master_th = __kmp_threads[gtid]; 290 kmp_team_t *parent_team = master_th->th.th_team; 291 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 292 if (lwt) 293 ompt_frame = &(lwt->ompt_task_info.frame); 294 else { 295 int tid = __kmp_tid_from_gtid(gtid); 296 ompt_frame = &( 297 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 298 } 299 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 300 } 301 OMPT_STORE_RETURN_ADDRESS(gtid); 302 #endif 303 304 #if INCLUDE_SSC_MARKS 305 SSC_MARK_FORKING(); 306 #endif 307 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 308 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 309 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 310 kmp_va_addr_of(ap)); 311 #if INCLUDE_SSC_MARKS 312 SSC_MARK_JOINING(); 313 #endif 314 __kmp_join_call(loc, gtid 315 #if OMPT_SUPPORT 316 , 317 fork_context_intel 318 #endif 319 ); 320 321 va_end(ap); 322 } 323 324 #if KMP_STATS_ENABLED 325 if (previous_state == stats_state_e::SERIAL_REGION) { 326 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 327 KMP_SET_THREAD_STATE(previous_state); 328 } else { 329 KMP_POP_PARTITIONED_TIMER(); 330 } 331 #endif // KMP_STATS_ENABLED 332 } 333 334 /*! 335 @ingroup PARALLEL 336 @param loc source location information 337 @param global_tid global thread number 338 @param num_teams number of teams requested for the teams construct 339 @param num_threads number of threads per team requested for the teams construct 340 341 Set the number of teams to be used by the teams construct. 342 This call is only required if the teams construct has a `num_teams` clause 343 or a `thread_limit` clause (or both). 344 */ 345 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 346 kmp_int32 num_teams, kmp_int32 num_threads) { 347 KA_TRACE(20, 348 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 349 global_tid, num_teams, num_threads)); 350 __kmp_assert_valid_gtid(global_tid); 351 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 352 } 353 354 /*! 355 @ingroup PARALLEL 356 @param loc source location information 357 @param argc total number of arguments in the ellipsis 358 @param microtask pointer to callback routine consisting of outlined teams 359 construct 360 @param ... pointers to shared variables that aren't global 361 362 Do the actual fork and call the microtask in the relevant number of threads. 363 */ 364 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 365 ...) { 366 int gtid = __kmp_entry_gtid(); 367 kmp_info_t *this_thr = __kmp_threads[gtid]; 368 va_list ap; 369 va_start(ap, microtask); 370 371 #if KMP_STATS_ENABLED 372 KMP_COUNT_BLOCK(OMP_TEAMS); 373 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 374 if (previous_state == stats_state_e::SERIAL_REGION) { 375 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 376 } else { 377 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 378 } 379 #endif 380 381 // remember teams entry point and nesting level 382 this_thr->th.th_teams_microtask = microtask; 383 this_thr->th.th_teams_level = 384 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 385 386 #if OMPT_SUPPORT 387 kmp_team_t *parent_team = this_thr->th.th_team; 388 int tid = __kmp_tid_from_gtid(gtid); 389 if (ompt_enabled.enabled) { 390 parent_team->t.t_implicit_task_taskdata[tid] 391 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 392 } 393 OMPT_STORE_RETURN_ADDRESS(gtid); 394 #endif 395 396 // check if __kmpc_push_num_teams called, set default number of teams 397 // otherwise 398 if (this_thr->th.th_teams_size.nteams == 0) { 399 __kmp_push_num_teams(loc, gtid, 0, 0); 400 } 401 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 402 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 403 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 404 405 __kmp_fork_call( 406 loc, gtid, fork_context_intel, argc, 407 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 408 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 409 __kmp_join_call(loc, gtid 410 #if OMPT_SUPPORT 411 , 412 fork_context_intel 413 #endif 414 ); 415 416 // Pop current CG root off list 417 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 418 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 419 this_thr->th.th_cg_roots = tmp->up; 420 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 421 " to node %p. cg_nthreads was %d\n", 422 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 423 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 424 int i = tmp->cg_nthreads--; 425 if (i == 1) { // check is we are the last thread in CG (not always the case) 426 __kmp_free(tmp); 427 } 428 // Restore current task's thread_limit from CG root 429 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 430 this_thr->th.th_current_task->td_icvs.thread_limit = 431 this_thr->th.th_cg_roots->cg_thread_limit; 432 433 this_thr->th.th_teams_microtask = NULL; 434 this_thr->th.th_teams_level = 0; 435 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 436 va_end(ap); 437 #if KMP_STATS_ENABLED 438 if (previous_state == stats_state_e::SERIAL_REGION) { 439 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 440 KMP_SET_THREAD_STATE(previous_state); 441 } else { 442 KMP_POP_PARTITIONED_TIMER(); 443 } 444 #endif // KMP_STATS_ENABLED 445 } 446 447 // I don't think this function should ever have been exported. 448 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 449 // openmp code ever called it, but it's been exported from the RTL for so 450 // long that I'm afraid to remove the definition. 451 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 452 453 /*! 454 @ingroup PARALLEL 455 @param loc source location information 456 @param global_tid global thread number 457 458 Enter a serialized parallel construct. This interface is used to handle a 459 conditional parallel region, like this, 460 @code 461 #pragma omp parallel if (condition) 462 @endcode 463 when the condition is false. 464 */ 465 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 466 // The implementation is now in kmp_runtime.cpp so that it can share static 467 // functions with kmp_fork_call since the tasks to be done are similar in 468 // each case. 469 __kmp_assert_valid_gtid(global_tid); 470 #if OMPT_SUPPORT 471 OMPT_STORE_RETURN_ADDRESS(global_tid); 472 #endif 473 __kmp_serialized_parallel(loc, global_tid); 474 } 475 476 /*! 477 @ingroup PARALLEL 478 @param loc source location information 479 @param global_tid global thread number 480 481 Leave a serialized parallel construct. 482 */ 483 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 484 kmp_internal_control_t *top; 485 kmp_info_t *this_thr; 486 kmp_team_t *serial_team; 487 488 KC_TRACE(10, 489 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 490 491 /* skip all this code for autopar serialized loops since it results in 492 unacceptable overhead */ 493 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 494 return; 495 496 // Not autopar code 497 __kmp_assert_valid_gtid(global_tid); 498 if (!TCR_4(__kmp_init_parallel)) 499 __kmp_parallel_initialize(); 500 501 __kmp_resume_if_soft_paused(); 502 503 this_thr = __kmp_threads[global_tid]; 504 serial_team = this_thr->th.th_serial_team; 505 506 kmp_task_team_t *task_team = this_thr->th.th_task_team; 507 // we need to wait for the proxy tasks before finishing the thread 508 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 509 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 510 511 KMP_MB(); 512 KMP_DEBUG_ASSERT(serial_team); 513 KMP_ASSERT(serial_team->t.t_serialized); 514 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 515 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 516 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 517 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 518 519 #if OMPT_SUPPORT 520 if (ompt_enabled.enabled && 521 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 522 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 523 if (ompt_enabled.ompt_callback_implicit_task) { 524 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 525 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 526 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 527 } 528 529 // reset clear the task id only after unlinking the task 530 ompt_data_t *parent_task_data; 531 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 532 533 if (ompt_enabled.ompt_callback_parallel_end) { 534 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 535 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 536 ompt_parallel_invoker_program | ompt_parallel_team, 537 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 538 } 539 __ompt_lw_taskteam_unlink(this_thr); 540 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 541 } 542 #endif 543 544 /* If necessary, pop the internal control stack values and replace the team 545 * values */ 546 top = serial_team->t.t_control_stack_top; 547 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 548 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 549 serial_team->t.t_control_stack_top = top->next; 550 __kmp_free(top); 551 } 552 553 // if( serial_team -> t.t_serialized > 1 ) 554 serial_team->t.t_level--; 555 556 /* pop dispatch buffers stack */ 557 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 558 { 559 dispatch_private_info_t *disp_buffer = 560 serial_team->t.t_dispatch->th_disp_buffer; 561 serial_team->t.t_dispatch->th_disp_buffer = 562 serial_team->t.t_dispatch->th_disp_buffer->next; 563 __kmp_free(disp_buffer); 564 } 565 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 566 567 --serial_team->t.t_serialized; 568 if (serial_team->t.t_serialized == 0) { 569 570 /* return to the parallel section */ 571 572 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 573 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 574 __kmp_clear_x87_fpu_status_word(); 575 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 576 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 577 } 578 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 579 580 this_thr->th.th_team = serial_team->t.t_parent; 581 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 582 583 /* restore values cached in the thread */ 584 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 585 this_thr->th.th_team_master = 586 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 587 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 588 589 /* TODO the below shouldn't need to be adjusted for serialized teams */ 590 this_thr->th.th_dispatch = 591 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 592 593 __kmp_pop_current_task_from_thread(this_thr); 594 595 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 596 this_thr->th.th_current_task->td_flags.executing = 1; 597 598 if (__kmp_tasking_mode != tskm_immediate_exec) { 599 // Copy the task team from the new child / old parent team to the thread. 600 this_thr->th.th_task_team = 601 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 602 KA_TRACE(20, 603 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 604 "team %p\n", 605 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 606 } 607 } else { 608 if (__kmp_tasking_mode != tskm_immediate_exec) { 609 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 610 "depth of serial team %p to %d\n", 611 global_tid, serial_team, serial_team->t.t_serialized)); 612 } 613 } 614 615 if (__kmp_env_consistency_check) 616 __kmp_pop_parallel(global_tid, NULL); 617 #if OMPT_SUPPORT 618 if (ompt_enabled.enabled) 619 this_thr->th.ompt_thread_info.state = 620 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 621 : ompt_state_work_parallel); 622 #endif 623 } 624 625 /*! 626 @ingroup SYNCHRONIZATION 627 @param loc source location information. 628 629 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 630 depending on the memory ordering convention obeyed by the compiler 631 even that may not be necessary). 632 */ 633 void __kmpc_flush(ident_t *loc) { 634 KC_TRACE(10, ("__kmpc_flush: called\n")); 635 636 /* need explicit __mf() here since use volatile instead in library */ 637 KMP_MB(); /* Flush all pending memory write invalidates. */ 638 639 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 640 #if KMP_MIC 641 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 642 // We shouldn't need it, though, since the ABI rules require that 643 // * If the compiler generates NGO stores it also generates the fence 644 // * If users hand-code NGO stores they should insert the fence 645 // therefore no incomplete unordered stores should be visible. 646 #else 647 // C74404 648 // This is to address non-temporal store instructions (sfence needed). 649 // The clflush instruction is addressed either (mfence needed). 650 // Probably the non-temporal load monvtdqa instruction should also be 651 // addressed. 652 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 653 if (!__kmp_cpuinfo.initialized) { 654 __kmp_query_cpuid(&__kmp_cpuinfo); 655 } 656 if (!__kmp_cpuinfo.sse2) { 657 // CPU cannot execute SSE2 instructions. 658 } else { 659 #if KMP_COMPILER_ICC 660 _mm_mfence(); 661 #elif KMP_COMPILER_MSVC 662 MemoryBarrier(); 663 #else 664 __sync_synchronize(); 665 #endif // KMP_COMPILER_ICC 666 } 667 #endif // KMP_MIC 668 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 669 KMP_ARCH_RISCV64) 670 // Nothing to see here move along 671 #elif KMP_ARCH_PPC64 672 // Nothing needed here (we have a real MB above). 673 #else 674 #error Unknown or unsupported architecture 675 #endif 676 677 #if OMPT_SUPPORT && OMPT_OPTIONAL 678 if (ompt_enabled.ompt_callback_flush) { 679 ompt_callbacks.ompt_callback(ompt_callback_flush)( 680 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 681 } 682 #endif 683 } 684 685 /* -------------------------------------------------------------------------- */ 686 /*! 687 @ingroup SYNCHRONIZATION 688 @param loc source location information 689 @param global_tid thread id. 690 691 Execute a barrier. 692 */ 693 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 694 KMP_COUNT_BLOCK(OMP_BARRIER); 695 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 696 __kmp_assert_valid_gtid(global_tid); 697 698 if (!TCR_4(__kmp_init_parallel)) 699 __kmp_parallel_initialize(); 700 701 __kmp_resume_if_soft_paused(); 702 703 if (__kmp_env_consistency_check) { 704 if (loc == 0) { 705 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 706 } 707 __kmp_check_barrier(global_tid, ct_barrier, loc); 708 } 709 710 #if OMPT_SUPPORT 711 ompt_frame_t *ompt_frame; 712 if (ompt_enabled.enabled) { 713 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 714 if (ompt_frame->enter_frame.ptr == NULL) 715 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 716 } 717 OMPT_STORE_RETURN_ADDRESS(global_tid); 718 #endif 719 __kmp_threads[global_tid]->th.th_ident = loc; 720 // TODO: explicit barrier_wait_id: 721 // this function is called when 'barrier' directive is present or 722 // implicit barrier at the end of a worksharing construct. 723 // 1) better to add a per-thread barrier counter to a thread data structure 724 // 2) set to 0 when a new team is created 725 // 4) no sync is required 726 727 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 728 #if OMPT_SUPPORT && OMPT_OPTIONAL 729 if (ompt_enabled.enabled) { 730 ompt_frame->enter_frame = ompt_data_none; 731 } 732 #endif 733 } 734 735 /* The BARRIER for a MASTER section is always explicit */ 736 /*! 737 @ingroup WORK_SHARING 738 @param loc source location information. 739 @param global_tid global thread number . 740 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 741 */ 742 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 743 int status = 0; 744 745 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 746 __kmp_assert_valid_gtid(global_tid); 747 748 if (!TCR_4(__kmp_init_parallel)) 749 __kmp_parallel_initialize(); 750 751 __kmp_resume_if_soft_paused(); 752 753 if (KMP_MASTER_GTID(global_tid)) { 754 KMP_COUNT_BLOCK(OMP_MASTER); 755 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 756 status = 1; 757 } 758 759 #if OMPT_SUPPORT && OMPT_OPTIONAL 760 if (status) { 761 if (ompt_enabled.ompt_callback_masked) { 762 kmp_info_t *this_thr = __kmp_threads[global_tid]; 763 kmp_team_t *team = this_thr->th.th_team; 764 765 int tid = __kmp_tid_from_gtid(global_tid); 766 ompt_callbacks.ompt_callback(ompt_callback_masked)( 767 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 768 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 769 OMPT_GET_RETURN_ADDRESS(0)); 770 } 771 } 772 #endif 773 774 if (__kmp_env_consistency_check) { 775 #if KMP_USE_DYNAMIC_LOCK 776 if (status) 777 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 778 else 779 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 780 #else 781 if (status) 782 __kmp_push_sync(global_tid, ct_master, loc, NULL); 783 else 784 __kmp_check_sync(global_tid, ct_master, loc, NULL); 785 #endif 786 } 787 788 return status; 789 } 790 791 /*! 792 @ingroup WORK_SHARING 793 @param loc source location information. 794 @param global_tid global thread number . 795 796 Mark the end of a <tt>master</tt> region. This should only be called by the 797 thread that executes the <tt>master</tt> region. 798 */ 799 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 800 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 801 __kmp_assert_valid_gtid(global_tid); 802 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 803 KMP_POP_PARTITIONED_TIMER(); 804 805 #if OMPT_SUPPORT && OMPT_OPTIONAL 806 kmp_info_t *this_thr = __kmp_threads[global_tid]; 807 kmp_team_t *team = this_thr->th.th_team; 808 if (ompt_enabled.ompt_callback_masked) { 809 int tid = __kmp_tid_from_gtid(global_tid); 810 ompt_callbacks.ompt_callback(ompt_callback_masked)( 811 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 812 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 813 OMPT_GET_RETURN_ADDRESS(0)); 814 } 815 #endif 816 817 if (__kmp_env_consistency_check) { 818 if (KMP_MASTER_GTID(global_tid)) 819 __kmp_pop_sync(global_tid, ct_master, loc); 820 } 821 } 822 823 /*! 824 @ingroup WORK_SHARING 825 @param loc source location information. 826 @param gtid global thread number. 827 828 Start execution of an <tt>ordered</tt> construct. 829 */ 830 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 831 int cid = 0; 832 kmp_info_t *th; 833 KMP_DEBUG_ASSERT(__kmp_init_serial); 834 835 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 836 __kmp_assert_valid_gtid(gtid); 837 838 if (!TCR_4(__kmp_init_parallel)) 839 __kmp_parallel_initialize(); 840 841 __kmp_resume_if_soft_paused(); 842 843 #if USE_ITT_BUILD 844 __kmp_itt_ordered_prep(gtid); 845 // TODO: ordered_wait_id 846 #endif /* USE_ITT_BUILD */ 847 848 th = __kmp_threads[gtid]; 849 850 #if OMPT_SUPPORT && OMPT_OPTIONAL 851 kmp_team_t *team; 852 ompt_wait_id_t lck; 853 void *codeptr_ra; 854 OMPT_STORE_RETURN_ADDRESS(gtid); 855 if (ompt_enabled.enabled) { 856 team = __kmp_team_from_gtid(gtid); 857 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 858 /* OMPT state update */ 859 th->th.ompt_thread_info.wait_id = lck; 860 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 861 862 /* OMPT event callback */ 863 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 864 if (ompt_enabled.ompt_callback_mutex_acquire) { 865 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 866 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 867 codeptr_ra); 868 } 869 } 870 #endif 871 872 if (th->th.th_dispatch->th_deo_fcn != 0) 873 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 874 else 875 __kmp_parallel_deo(>id, &cid, loc); 876 877 #if OMPT_SUPPORT && OMPT_OPTIONAL 878 if (ompt_enabled.enabled) { 879 /* OMPT state update */ 880 th->th.ompt_thread_info.state = ompt_state_work_parallel; 881 th->th.ompt_thread_info.wait_id = 0; 882 883 /* OMPT event callback */ 884 if (ompt_enabled.ompt_callback_mutex_acquired) { 885 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 886 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 887 } 888 } 889 #endif 890 891 #if USE_ITT_BUILD 892 __kmp_itt_ordered_start(gtid); 893 #endif /* USE_ITT_BUILD */ 894 } 895 896 /*! 897 @ingroup WORK_SHARING 898 @param loc source location information. 899 @param gtid global thread number. 900 901 End execution of an <tt>ordered</tt> construct. 902 */ 903 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 904 int cid = 0; 905 kmp_info_t *th; 906 907 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 908 __kmp_assert_valid_gtid(gtid); 909 910 #if USE_ITT_BUILD 911 __kmp_itt_ordered_end(gtid); 912 // TODO: ordered_wait_id 913 #endif /* USE_ITT_BUILD */ 914 915 th = __kmp_threads[gtid]; 916 917 if (th->th.th_dispatch->th_dxo_fcn != 0) 918 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 919 else 920 __kmp_parallel_dxo(>id, &cid, loc); 921 922 #if OMPT_SUPPORT && OMPT_OPTIONAL 923 OMPT_STORE_RETURN_ADDRESS(gtid); 924 if (ompt_enabled.ompt_callback_mutex_released) { 925 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 926 ompt_mutex_ordered, 927 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 928 ->t.t_ordered.dt.t_value, 929 OMPT_LOAD_RETURN_ADDRESS(gtid)); 930 } 931 #endif 932 } 933 934 #if KMP_USE_DYNAMIC_LOCK 935 936 static __forceinline void 937 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 938 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 939 // Pointer to the allocated indirect lock is written to crit, while indexing 940 // is ignored. 941 void *idx; 942 kmp_indirect_lock_t **lck; 943 lck = (kmp_indirect_lock_t **)crit; 944 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 945 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 946 KMP_SET_I_LOCK_LOCATION(ilk, loc); 947 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 948 KA_TRACE(20, 949 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 950 #if USE_ITT_BUILD 951 __kmp_itt_critical_creating(ilk->lock, loc); 952 #endif 953 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 954 if (status == 0) { 955 #if USE_ITT_BUILD 956 __kmp_itt_critical_destroyed(ilk->lock); 957 #endif 958 // We don't really need to destroy the unclaimed lock here since it will be 959 // cleaned up at program exit. 960 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 961 } 962 KMP_DEBUG_ASSERT(*lck != NULL); 963 } 964 965 // Fast-path acquire tas lock 966 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 967 { \ 968 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 969 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 970 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 971 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 972 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 973 kmp_uint32 spins; \ 974 KMP_FSYNC_PREPARE(l); \ 975 KMP_INIT_YIELD(spins); \ 976 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 977 do { \ 978 if (TCR_4(__kmp_nth) > \ 979 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 980 KMP_YIELD(TRUE); \ 981 } else { \ 982 KMP_YIELD_SPIN(spins); \ 983 } \ 984 __kmp_spin_backoff(&backoff); \ 985 } while ( \ 986 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 987 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 988 } \ 989 KMP_FSYNC_ACQUIRED(l); \ 990 } 991 992 // Fast-path test tas lock 993 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 994 { \ 995 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 996 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 997 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 998 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 999 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1000 } 1001 1002 // Fast-path release tas lock 1003 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1004 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1005 1006 #if KMP_USE_FUTEX 1007 1008 #include <sys/syscall.h> 1009 #include <unistd.h> 1010 #ifndef FUTEX_WAIT 1011 #define FUTEX_WAIT 0 1012 #endif 1013 #ifndef FUTEX_WAKE 1014 #define FUTEX_WAKE 1 1015 #endif 1016 1017 // Fast-path acquire futex lock 1018 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1019 { \ 1020 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1021 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1022 KMP_MB(); \ 1023 KMP_FSYNC_PREPARE(ftx); \ 1024 kmp_int32 poll_val; \ 1025 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1026 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1027 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1028 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1029 if (!cond) { \ 1030 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1031 poll_val | \ 1032 KMP_LOCK_BUSY(1, futex))) { \ 1033 continue; \ 1034 } \ 1035 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1036 } \ 1037 kmp_int32 rc; \ 1038 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1039 NULL, NULL, 0)) != 0) { \ 1040 continue; \ 1041 } \ 1042 gtid_code |= 1; \ 1043 } \ 1044 KMP_FSYNC_ACQUIRED(ftx); \ 1045 } 1046 1047 // Fast-path test futex lock 1048 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1049 { \ 1050 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1051 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1052 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1053 KMP_FSYNC_ACQUIRED(ftx); \ 1054 rc = TRUE; \ 1055 } else { \ 1056 rc = FALSE; \ 1057 } \ 1058 } 1059 1060 // Fast-path release futex lock 1061 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1062 { \ 1063 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1064 KMP_MB(); \ 1065 KMP_FSYNC_RELEASING(ftx); \ 1066 kmp_int32 poll_val = \ 1067 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1068 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1069 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1070 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1071 } \ 1072 KMP_MB(); \ 1073 KMP_YIELD_OVERSUB(); \ 1074 } 1075 1076 #endif // KMP_USE_FUTEX 1077 1078 #else // KMP_USE_DYNAMIC_LOCK 1079 1080 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1081 ident_t const *loc, 1082 kmp_int32 gtid) { 1083 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1084 1085 // Because of the double-check, the following load doesn't need to be volatile 1086 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1087 1088 if (lck == NULL) { 1089 void *idx; 1090 1091 // Allocate & initialize the lock. 1092 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1093 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1094 __kmp_init_user_lock_with_checks(lck); 1095 __kmp_set_user_lock_location(lck, loc); 1096 #if USE_ITT_BUILD 1097 __kmp_itt_critical_creating(lck); 1098 // __kmp_itt_critical_creating() should be called *before* the first usage 1099 // of underlying lock. It is the only place where we can guarantee it. There 1100 // are chances the lock will destroyed with no usage, but it is not a 1101 // problem, because this is not real event seen by user but rather setting 1102 // name for object (lock). See more details in kmp_itt.h. 1103 #endif /* USE_ITT_BUILD */ 1104 1105 // Use a cmpxchg instruction to slam the start of the critical section with 1106 // the lock pointer. If another thread beat us to it, deallocate the lock, 1107 // and use the lock that the other thread allocated. 1108 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1109 1110 if (status == 0) { 1111 // Deallocate the lock and reload the value. 1112 #if USE_ITT_BUILD 1113 __kmp_itt_critical_destroyed(lck); 1114 // Let ITT know the lock is destroyed and the same memory location may be reused 1115 // for another purpose. 1116 #endif /* USE_ITT_BUILD */ 1117 __kmp_destroy_user_lock_with_checks(lck); 1118 __kmp_user_lock_free(&idx, gtid, lck); 1119 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1120 KMP_DEBUG_ASSERT(lck != NULL); 1121 } 1122 } 1123 return lck; 1124 } 1125 1126 #endif // KMP_USE_DYNAMIC_LOCK 1127 1128 /*! 1129 @ingroup WORK_SHARING 1130 @param loc source location information. 1131 @param global_tid global thread number. 1132 @param crit identity of the critical section. This could be a pointer to a lock 1133 associated with the critical section, or some other suitably unique value. 1134 1135 Enter code protected by a `critical` construct. 1136 This function blocks until the executing thread can enter the critical section. 1137 */ 1138 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1139 kmp_critical_name *crit) { 1140 #if KMP_USE_DYNAMIC_LOCK 1141 #if OMPT_SUPPORT && OMPT_OPTIONAL 1142 OMPT_STORE_RETURN_ADDRESS(global_tid); 1143 #endif // OMPT_SUPPORT 1144 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1145 #else 1146 KMP_COUNT_BLOCK(OMP_CRITICAL); 1147 #if OMPT_SUPPORT && OMPT_OPTIONAL 1148 ompt_state_t prev_state = ompt_state_undefined; 1149 ompt_thread_info_t ti; 1150 #endif 1151 kmp_user_lock_p lck; 1152 1153 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1154 __kmp_assert_valid_gtid(global_tid); 1155 1156 // TODO: add THR_OVHD_STATE 1157 1158 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1159 KMP_CHECK_USER_LOCK_INIT(); 1160 1161 if ((__kmp_user_lock_kind == lk_tas) && 1162 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1163 lck = (kmp_user_lock_p)crit; 1164 } 1165 #if KMP_USE_FUTEX 1166 else if ((__kmp_user_lock_kind == lk_futex) && 1167 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1168 lck = (kmp_user_lock_p)crit; 1169 } 1170 #endif 1171 else { // ticket, queuing or drdpa 1172 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1173 } 1174 1175 if (__kmp_env_consistency_check) 1176 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1177 1178 // since the critical directive binds to all threads, not just the current 1179 // team we have to check this even if we are in a serialized team. 1180 // also, even if we are the uber thread, we still have to conduct the lock, 1181 // as we have to contend with sibling threads. 1182 1183 #if USE_ITT_BUILD 1184 __kmp_itt_critical_acquiring(lck); 1185 #endif /* USE_ITT_BUILD */ 1186 #if OMPT_SUPPORT && OMPT_OPTIONAL 1187 OMPT_STORE_RETURN_ADDRESS(gtid); 1188 void *codeptr_ra = NULL; 1189 if (ompt_enabled.enabled) { 1190 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1191 /* OMPT state update */ 1192 prev_state = ti.state; 1193 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1194 ti.state = ompt_state_wait_critical; 1195 1196 /* OMPT event callback */ 1197 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1198 if (ompt_enabled.ompt_callback_mutex_acquire) { 1199 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1200 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1201 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1202 } 1203 } 1204 #endif 1205 // Value of 'crit' should be good for using as a critical_id of the critical 1206 // section directive. 1207 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1208 1209 #if USE_ITT_BUILD 1210 __kmp_itt_critical_acquired(lck); 1211 #endif /* USE_ITT_BUILD */ 1212 #if OMPT_SUPPORT && OMPT_OPTIONAL 1213 if (ompt_enabled.enabled) { 1214 /* OMPT state update */ 1215 ti.state = prev_state; 1216 ti.wait_id = 0; 1217 1218 /* OMPT event callback */ 1219 if (ompt_enabled.ompt_callback_mutex_acquired) { 1220 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1221 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1222 } 1223 } 1224 #endif 1225 KMP_POP_PARTITIONED_TIMER(); 1226 1227 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1228 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1229 #endif // KMP_USE_DYNAMIC_LOCK 1230 } 1231 1232 #if KMP_USE_DYNAMIC_LOCK 1233 1234 // Converts the given hint to an internal lock implementation 1235 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1236 #if KMP_USE_TSX 1237 #define KMP_TSX_LOCK(seq) lockseq_##seq 1238 #else 1239 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1240 #endif 1241 1242 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1243 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1244 #else 1245 #define KMP_CPUINFO_RTM 0 1246 #endif 1247 1248 // Hints that do not require further logic 1249 if (hint & kmp_lock_hint_hle) 1250 return KMP_TSX_LOCK(hle); 1251 if (hint & kmp_lock_hint_rtm) 1252 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq; 1253 if (hint & kmp_lock_hint_adaptive) 1254 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1255 1256 // Rule out conflicting hints first by returning the default lock 1257 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1258 return __kmp_user_lock_seq; 1259 if ((hint & omp_lock_hint_speculative) && 1260 (hint & omp_lock_hint_nonspeculative)) 1261 return __kmp_user_lock_seq; 1262 1263 // Do not even consider speculation when it appears to be contended 1264 if (hint & omp_lock_hint_contended) 1265 return lockseq_queuing; 1266 1267 // Uncontended lock without speculation 1268 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1269 return lockseq_tas; 1270 1271 // Use RTM lock for speculation 1272 if (hint & omp_lock_hint_speculative) 1273 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq; 1274 1275 return __kmp_user_lock_seq; 1276 } 1277 1278 #if OMPT_SUPPORT && OMPT_OPTIONAL 1279 #if KMP_USE_DYNAMIC_LOCK 1280 static kmp_mutex_impl_t 1281 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1282 if (user_lock) { 1283 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1284 case 0: 1285 break; 1286 #if KMP_USE_FUTEX 1287 case locktag_futex: 1288 return kmp_mutex_impl_queuing; 1289 #endif 1290 case locktag_tas: 1291 return kmp_mutex_impl_spin; 1292 #if KMP_USE_TSX 1293 case locktag_hle: 1294 case locktag_rtm_spin: 1295 return kmp_mutex_impl_speculative; 1296 #endif 1297 default: 1298 return kmp_mutex_impl_none; 1299 } 1300 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1301 } 1302 KMP_ASSERT(ilock); 1303 switch (ilock->type) { 1304 #if KMP_USE_TSX 1305 case locktag_adaptive: 1306 case locktag_rtm_queuing: 1307 return kmp_mutex_impl_speculative; 1308 #endif 1309 case locktag_nested_tas: 1310 return kmp_mutex_impl_spin; 1311 #if KMP_USE_FUTEX 1312 case locktag_nested_futex: 1313 #endif 1314 case locktag_ticket: 1315 case locktag_queuing: 1316 case locktag_drdpa: 1317 case locktag_nested_ticket: 1318 case locktag_nested_queuing: 1319 case locktag_nested_drdpa: 1320 return kmp_mutex_impl_queuing; 1321 default: 1322 return kmp_mutex_impl_none; 1323 } 1324 } 1325 #else 1326 // For locks without dynamic binding 1327 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1328 switch (__kmp_user_lock_kind) { 1329 case lk_tas: 1330 return kmp_mutex_impl_spin; 1331 #if KMP_USE_FUTEX 1332 case lk_futex: 1333 #endif 1334 case lk_ticket: 1335 case lk_queuing: 1336 case lk_drdpa: 1337 return kmp_mutex_impl_queuing; 1338 #if KMP_USE_TSX 1339 case lk_hle: 1340 case lk_rtm_queuing: 1341 case lk_rtm_spin: 1342 case lk_adaptive: 1343 return kmp_mutex_impl_speculative; 1344 #endif 1345 default: 1346 return kmp_mutex_impl_none; 1347 } 1348 } 1349 #endif // KMP_USE_DYNAMIC_LOCK 1350 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1351 1352 /*! 1353 @ingroup WORK_SHARING 1354 @param loc source location information. 1355 @param global_tid global thread number. 1356 @param crit identity of the critical section. This could be a pointer to a lock 1357 associated with the critical section, or some other suitably unique value. 1358 @param hint the lock hint. 1359 1360 Enter code protected by a `critical` construct with a hint. The hint value is 1361 used to suggest a lock implementation. This function blocks until the executing 1362 thread can enter the critical section unless the hint suggests use of 1363 speculative execution and the hardware supports it. 1364 */ 1365 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1366 kmp_critical_name *crit, uint32_t hint) { 1367 KMP_COUNT_BLOCK(OMP_CRITICAL); 1368 kmp_user_lock_p lck; 1369 #if OMPT_SUPPORT && OMPT_OPTIONAL 1370 ompt_state_t prev_state = ompt_state_undefined; 1371 ompt_thread_info_t ti; 1372 // This is the case, if called from __kmpc_critical: 1373 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1374 if (!codeptr) 1375 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1376 #endif 1377 1378 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1379 __kmp_assert_valid_gtid(global_tid); 1380 1381 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1382 // Check if it is initialized. 1383 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1384 if (*lk == 0) { 1385 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); 1386 if (KMP_IS_D_LOCK(lckseq)) { 1387 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1388 KMP_GET_D_TAG(lckseq)); 1389 } else { 1390 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); 1391 } 1392 } 1393 // Branch for accessing the actual lock object and set operation. This 1394 // branching is inevitable since this lock initialization does not follow the 1395 // normal dispatch path (lock table is not used). 1396 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1397 lck = (kmp_user_lock_p)lk; 1398 if (__kmp_env_consistency_check) { 1399 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1400 __kmp_map_hint_to_lock(hint)); 1401 } 1402 #if USE_ITT_BUILD 1403 __kmp_itt_critical_acquiring(lck); 1404 #endif 1405 #if OMPT_SUPPORT && OMPT_OPTIONAL 1406 if (ompt_enabled.enabled) { 1407 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1408 /* OMPT state update */ 1409 prev_state = ti.state; 1410 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1411 ti.state = ompt_state_wait_critical; 1412 1413 /* OMPT event callback */ 1414 if (ompt_enabled.ompt_callback_mutex_acquire) { 1415 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1416 ompt_mutex_critical, (unsigned int)hint, 1417 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1418 codeptr); 1419 } 1420 } 1421 #endif 1422 #if KMP_USE_INLINED_TAS 1423 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1424 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1425 } else 1426 #elif KMP_USE_INLINED_FUTEX 1427 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1428 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1429 } else 1430 #endif 1431 { 1432 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1433 } 1434 } else { 1435 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1436 lck = ilk->lock; 1437 if (__kmp_env_consistency_check) { 1438 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1439 __kmp_map_hint_to_lock(hint)); 1440 } 1441 #if USE_ITT_BUILD 1442 __kmp_itt_critical_acquiring(lck); 1443 #endif 1444 #if OMPT_SUPPORT && OMPT_OPTIONAL 1445 if (ompt_enabled.enabled) { 1446 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1447 /* OMPT state update */ 1448 prev_state = ti.state; 1449 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1450 ti.state = ompt_state_wait_critical; 1451 1452 /* OMPT event callback */ 1453 if (ompt_enabled.ompt_callback_mutex_acquire) { 1454 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1455 ompt_mutex_critical, (unsigned int)hint, 1456 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1457 codeptr); 1458 } 1459 } 1460 #endif 1461 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1462 } 1463 KMP_POP_PARTITIONED_TIMER(); 1464 1465 #if USE_ITT_BUILD 1466 __kmp_itt_critical_acquired(lck); 1467 #endif /* USE_ITT_BUILD */ 1468 #if OMPT_SUPPORT && OMPT_OPTIONAL 1469 if (ompt_enabled.enabled) { 1470 /* OMPT state update */ 1471 ti.state = prev_state; 1472 ti.wait_id = 0; 1473 1474 /* OMPT event callback */ 1475 if (ompt_enabled.ompt_callback_mutex_acquired) { 1476 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1477 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1478 } 1479 } 1480 #endif 1481 1482 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1483 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1484 } // __kmpc_critical_with_hint 1485 1486 #endif // KMP_USE_DYNAMIC_LOCK 1487 1488 /*! 1489 @ingroup WORK_SHARING 1490 @param loc source location information. 1491 @param global_tid global thread number . 1492 @param crit identity of the critical section. This could be a pointer to a lock 1493 associated with the critical section, or some other suitably unique value. 1494 1495 Leave a critical section, releasing any lock that was held during its execution. 1496 */ 1497 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1498 kmp_critical_name *crit) { 1499 kmp_user_lock_p lck; 1500 1501 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1502 1503 #if KMP_USE_DYNAMIC_LOCK 1504 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 1505 lck = (kmp_user_lock_p)crit; 1506 KMP_ASSERT(lck != NULL); 1507 if (__kmp_env_consistency_check) { 1508 __kmp_pop_sync(global_tid, ct_critical, loc); 1509 } 1510 #if USE_ITT_BUILD 1511 __kmp_itt_critical_releasing(lck); 1512 #endif 1513 #if KMP_USE_INLINED_TAS 1514 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1515 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1516 } else 1517 #elif KMP_USE_INLINED_FUTEX 1518 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1519 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1520 } else 1521 #endif 1522 { 1523 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1524 } 1525 } else { 1526 kmp_indirect_lock_t *ilk = 1527 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1528 KMP_ASSERT(ilk != NULL); 1529 lck = ilk->lock; 1530 if (__kmp_env_consistency_check) { 1531 __kmp_pop_sync(global_tid, ct_critical, loc); 1532 } 1533 #if USE_ITT_BUILD 1534 __kmp_itt_critical_releasing(lck); 1535 #endif 1536 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1537 } 1538 1539 #else // KMP_USE_DYNAMIC_LOCK 1540 1541 if ((__kmp_user_lock_kind == lk_tas) && 1542 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1543 lck = (kmp_user_lock_p)crit; 1544 } 1545 #if KMP_USE_FUTEX 1546 else if ((__kmp_user_lock_kind == lk_futex) && 1547 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1548 lck = (kmp_user_lock_p)crit; 1549 } 1550 #endif 1551 else { // ticket, queuing or drdpa 1552 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1553 } 1554 1555 KMP_ASSERT(lck != NULL); 1556 1557 if (__kmp_env_consistency_check) 1558 __kmp_pop_sync(global_tid, ct_critical, loc); 1559 1560 #if USE_ITT_BUILD 1561 __kmp_itt_critical_releasing(lck); 1562 #endif /* USE_ITT_BUILD */ 1563 // Value of 'crit' should be good for using as a critical_id of the critical 1564 // section directive. 1565 __kmp_release_user_lock_with_checks(lck, global_tid); 1566 1567 #endif // KMP_USE_DYNAMIC_LOCK 1568 1569 #if OMPT_SUPPORT && OMPT_OPTIONAL 1570 /* OMPT release event triggers after lock is released; place here to trigger 1571 * for all #if branches */ 1572 OMPT_STORE_RETURN_ADDRESS(global_tid); 1573 if (ompt_enabled.ompt_callback_mutex_released) { 1574 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1575 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1576 OMPT_LOAD_RETURN_ADDRESS(0)); 1577 } 1578 #endif 1579 1580 KMP_POP_PARTITIONED_TIMER(); 1581 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1582 } 1583 1584 /*! 1585 @ingroup SYNCHRONIZATION 1586 @param loc source location information 1587 @param global_tid thread id. 1588 @return one if the thread should execute the master block, zero otherwise 1589 1590 Start execution of a combined barrier and master. The barrier is executed inside 1591 this function. 1592 */ 1593 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1594 int status; 1595 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1596 __kmp_assert_valid_gtid(global_tid); 1597 1598 if (!TCR_4(__kmp_init_parallel)) 1599 __kmp_parallel_initialize(); 1600 1601 __kmp_resume_if_soft_paused(); 1602 1603 if (__kmp_env_consistency_check) 1604 __kmp_check_barrier(global_tid, ct_barrier, loc); 1605 1606 #if OMPT_SUPPORT 1607 ompt_frame_t *ompt_frame; 1608 if (ompt_enabled.enabled) { 1609 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1610 if (ompt_frame->enter_frame.ptr == NULL) 1611 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1612 } 1613 OMPT_STORE_RETURN_ADDRESS(global_tid); 1614 #endif 1615 #if USE_ITT_NOTIFY 1616 __kmp_threads[global_tid]->th.th_ident = loc; 1617 #endif 1618 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1619 #if OMPT_SUPPORT && OMPT_OPTIONAL 1620 if (ompt_enabled.enabled) { 1621 ompt_frame->enter_frame = ompt_data_none; 1622 } 1623 #endif 1624 1625 return (status != 0) ? 0 : 1; 1626 } 1627 1628 /*! 1629 @ingroup SYNCHRONIZATION 1630 @param loc source location information 1631 @param global_tid thread id. 1632 1633 Complete the execution of a combined barrier and master. This function should 1634 only be called at the completion of the <tt>master</tt> code. Other threads will 1635 still be waiting at the barrier and this call releases them. 1636 */ 1637 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1638 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1639 __kmp_assert_valid_gtid(global_tid); 1640 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1641 } 1642 1643 /*! 1644 @ingroup SYNCHRONIZATION 1645 @param loc source location information 1646 @param global_tid thread id. 1647 @return one if the thread should execute the master block, zero otherwise 1648 1649 Start execution of a combined barrier and master(nowait) construct. 1650 The barrier is executed inside this function. 1651 There is no equivalent "end" function, since the 1652 */ 1653 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1654 kmp_int32 ret; 1655 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1656 __kmp_assert_valid_gtid(global_tid); 1657 1658 if (!TCR_4(__kmp_init_parallel)) 1659 __kmp_parallel_initialize(); 1660 1661 __kmp_resume_if_soft_paused(); 1662 1663 if (__kmp_env_consistency_check) { 1664 if (loc == 0) { 1665 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1666 } 1667 __kmp_check_barrier(global_tid, ct_barrier, loc); 1668 } 1669 1670 #if OMPT_SUPPORT 1671 ompt_frame_t *ompt_frame; 1672 if (ompt_enabled.enabled) { 1673 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1674 if (ompt_frame->enter_frame.ptr == NULL) 1675 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1676 } 1677 OMPT_STORE_RETURN_ADDRESS(global_tid); 1678 #endif 1679 #if USE_ITT_NOTIFY 1680 __kmp_threads[global_tid]->th.th_ident = loc; 1681 #endif 1682 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1683 #if OMPT_SUPPORT && OMPT_OPTIONAL 1684 if (ompt_enabled.enabled) { 1685 ompt_frame->enter_frame = ompt_data_none; 1686 } 1687 #endif 1688 1689 ret = __kmpc_master(loc, global_tid); 1690 1691 if (__kmp_env_consistency_check) { 1692 /* there's no __kmpc_end_master called; so the (stats) */ 1693 /* actions of __kmpc_end_master are done here */ 1694 if (ret) { 1695 /* only one thread should do the pop since only */ 1696 /* one did the push (see __kmpc_master()) */ 1697 __kmp_pop_sync(global_tid, ct_master, loc); 1698 } 1699 } 1700 1701 return (ret); 1702 } 1703 1704 /* The BARRIER for a SINGLE process section is always explicit */ 1705 /*! 1706 @ingroup WORK_SHARING 1707 @param loc source location information 1708 @param global_tid global thread number 1709 @return One if this thread should execute the single construct, zero otherwise. 1710 1711 Test whether to execute a <tt>single</tt> construct. 1712 There are no implicit barriers in the two "single" calls, rather the compiler 1713 should introduce an explicit barrier if it is required. 1714 */ 1715 1716 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1717 __kmp_assert_valid_gtid(global_tid); 1718 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1719 1720 if (rc) { 1721 // We are going to execute the single statement, so we should count it. 1722 KMP_COUNT_BLOCK(OMP_SINGLE); 1723 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1724 } 1725 1726 #if OMPT_SUPPORT && OMPT_OPTIONAL 1727 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1728 kmp_team_t *team = this_thr->th.th_team; 1729 int tid = __kmp_tid_from_gtid(global_tid); 1730 1731 if (ompt_enabled.enabled) { 1732 if (rc) { 1733 if (ompt_enabled.ompt_callback_work) { 1734 ompt_callbacks.ompt_callback(ompt_callback_work)( 1735 ompt_work_single_executor, ompt_scope_begin, 1736 &(team->t.ompt_team_info.parallel_data), 1737 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1738 1, OMPT_GET_RETURN_ADDRESS(0)); 1739 } 1740 } else { 1741 if (ompt_enabled.ompt_callback_work) { 1742 ompt_callbacks.ompt_callback(ompt_callback_work)( 1743 ompt_work_single_other, ompt_scope_begin, 1744 &(team->t.ompt_team_info.parallel_data), 1745 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1746 1, OMPT_GET_RETURN_ADDRESS(0)); 1747 ompt_callbacks.ompt_callback(ompt_callback_work)( 1748 ompt_work_single_other, ompt_scope_end, 1749 &(team->t.ompt_team_info.parallel_data), 1750 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1751 1, OMPT_GET_RETURN_ADDRESS(0)); 1752 } 1753 } 1754 } 1755 #endif 1756 1757 return rc; 1758 } 1759 1760 /*! 1761 @ingroup WORK_SHARING 1762 @param loc source location information 1763 @param global_tid global thread number 1764 1765 Mark the end of a <tt>single</tt> construct. This function should 1766 only be called by the thread that executed the block of code protected 1767 by the `single` construct. 1768 */ 1769 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1770 __kmp_assert_valid_gtid(global_tid); 1771 __kmp_exit_single(global_tid); 1772 KMP_POP_PARTITIONED_TIMER(); 1773 1774 #if OMPT_SUPPORT && OMPT_OPTIONAL 1775 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1776 kmp_team_t *team = this_thr->th.th_team; 1777 int tid = __kmp_tid_from_gtid(global_tid); 1778 1779 if (ompt_enabled.ompt_callback_work) { 1780 ompt_callbacks.ompt_callback(ompt_callback_work)( 1781 ompt_work_single_executor, ompt_scope_end, 1782 &(team->t.ompt_team_info.parallel_data), 1783 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1784 OMPT_GET_RETURN_ADDRESS(0)); 1785 } 1786 #endif 1787 } 1788 1789 /*! 1790 @ingroup WORK_SHARING 1791 @param loc Source location 1792 @param global_tid Global thread id 1793 1794 Mark the end of a statically scheduled loop. 1795 */ 1796 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1797 KMP_POP_PARTITIONED_TIMER(); 1798 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1799 1800 #if OMPT_SUPPORT && OMPT_OPTIONAL 1801 if (ompt_enabled.ompt_callback_work) { 1802 ompt_work_t ompt_work_type = ompt_work_loop; 1803 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1804 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1805 // Determine workshare type 1806 if (loc != NULL) { 1807 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1808 ompt_work_type = ompt_work_loop; 1809 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1810 ompt_work_type = ompt_work_sections; 1811 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1812 ompt_work_type = ompt_work_distribute; 1813 } else { 1814 // use default set above. 1815 // a warning about this case is provided in __kmpc_for_static_init 1816 } 1817 KMP_DEBUG_ASSERT(ompt_work_type); 1818 } 1819 ompt_callbacks.ompt_callback(ompt_callback_work)( 1820 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1821 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1822 } 1823 #endif 1824 if (__kmp_env_consistency_check) 1825 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1826 } 1827 1828 // User routines which take C-style arguments (call by value) 1829 // different from the Fortran equivalent routines 1830 1831 void ompc_set_num_threads(int arg) { 1832 // !!!!! TODO: check the per-task binding 1833 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1834 } 1835 1836 void ompc_set_dynamic(int flag) { 1837 kmp_info_t *thread; 1838 1839 /* For the thread-private implementation of the internal controls */ 1840 thread = __kmp_entry_thread(); 1841 1842 __kmp_save_internal_controls(thread); 1843 1844 set__dynamic(thread, flag ? true : false); 1845 } 1846 1847 void ompc_set_nested(int flag) { 1848 kmp_info_t *thread; 1849 1850 /* For the thread-private internal controls implementation */ 1851 thread = __kmp_entry_thread(); 1852 1853 __kmp_save_internal_controls(thread); 1854 1855 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1856 } 1857 1858 void ompc_set_max_active_levels(int max_active_levels) { 1859 /* TO DO */ 1860 /* we want per-task implementation of this internal control */ 1861 1862 /* For the per-thread internal controls implementation */ 1863 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1864 } 1865 1866 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1867 // !!!!! TODO: check the per-task binding 1868 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1869 } 1870 1871 int ompc_get_ancestor_thread_num(int level) { 1872 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1873 } 1874 1875 int ompc_get_team_size(int level) { 1876 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1877 } 1878 1879 /* OpenMP 5.0 Affinity Format API */ 1880 1881 void ompc_set_affinity_format(char const *format) { 1882 if (!__kmp_init_serial) { 1883 __kmp_serial_initialize(); 1884 } 1885 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 1886 format, KMP_STRLEN(format) + 1); 1887 } 1888 1889 size_t ompc_get_affinity_format(char *buffer, size_t size) { 1890 size_t format_size; 1891 if (!__kmp_init_serial) { 1892 __kmp_serial_initialize(); 1893 } 1894 format_size = KMP_STRLEN(__kmp_affinity_format); 1895 if (buffer && size) { 1896 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 1897 format_size + 1); 1898 } 1899 return format_size; 1900 } 1901 1902 void ompc_display_affinity(char const *format) { 1903 int gtid; 1904 if (!TCR_4(__kmp_init_middle)) { 1905 __kmp_middle_initialize(); 1906 } 1907 gtid = __kmp_get_gtid(); 1908 __kmp_aux_display_affinity(gtid, format); 1909 } 1910 1911 size_t ompc_capture_affinity(char *buffer, size_t buf_size, 1912 char const *format) { 1913 int gtid; 1914 size_t num_required; 1915 kmp_str_buf_t capture_buf; 1916 if (!TCR_4(__kmp_init_middle)) { 1917 __kmp_middle_initialize(); 1918 } 1919 gtid = __kmp_get_gtid(); 1920 __kmp_str_buf_init(&capture_buf); 1921 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 1922 if (buffer && buf_size) { 1923 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 1924 capture_buf.used + 1); 1925 } 1926 __kmp_str_buf_free(&capture_buf); 1927 return num_required; 1928 } 1929 1930 void kmpc_set_stacksize(int arg) { 1931 // __kmp_aux_set_stacksize initializes the library if needed 1932 __kmp_aux_set_stacksize(arg); 1933 } 1934 1935 void kmpc_set_stacksize_s(size_t arg) { 1936 // __kmp_aux_set_stacksize initializes the library if needed 1937 __kmp_aux_set_stacksize(arg); 1938 } 1939 1940 void kmpc_set_blocktime(int arg) { 1941 int gtid, tid; 1942 kmp_info_t *thread; 1943 1944 gtid = __kmp_entry_gtid(); 1945 tid = __kmp_tid_from_gtid(gtid); 1946 thread = __kmp_thread_from_gtid(gtid); 1947 1948 __kmp_aux_set_blocktime(arg, thread, tid); 1949 } 1950 1951 void kmpc_set_library(int arg) { 1952 // __kmp_user_set_library initializes the library if needed 1953 __kmp_user_set_library((enum library_type)arg); 1954 } 1955 1956 void kmpc_set_defaults(char const *str) { 1957 // __kmp_aux_set_defaults initializes the library if needed 1958 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 1959 } 1960 1961 void kmpc_set_disp_num_buffers(int arg) { 1962 // ignore after initialization because some teams have already 1963 // allocated dispatch buffers 1964 if (__kmp_init_serial == 0 && arg > 0) 1965 __kmp_dispatch_num_buffers = arg; 1966 } 1967 1968 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 1969 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1970 return -1; 1971 #else 1972 if (!TCR_4(__kmp_init_middle)) { 1973 __kmp_middle_initialize(); 1974 } 1975 return __kmp_aux_set_affinity_mask_proc(proc, mask); 1976 #endif 1977 } 1978 1979 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 1980 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1981 return -1; 1982 #else 1983 if (!TCR_4(__kmp_init_middle)) { 1984 __kmp_middle_initialize(); 1985 } 1986 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 1987 #endif 1988 } 1989 1990 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 1991 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1992 return -1; 1993 #else 1994 if (!TCR_4(__kmp_init_middle)) { 1995 __kmp_middle_initialize(); 1996 } 1997 return __kmp_aux_get_affinity_mask_proc(proc, mask); 1998 #endif 1999 } 2000 2001 /* -------------------------------------------------------------------------- */ 2002 /*! 2003 @ingroup THREADPRIVATE 2004 @param loc source location information 2005 @param gtid global thread number 2006 @param cpy_size size of the cpy_data buffer 2007 @param cpy_data pointer to data to be copied 2008 @param cpy_func helper function to call for copying data 2009 @param didit flag variable: 1=single thread; 0=not single thread 2010 2011 __kmpc_copyprivate implements the interface for the private data broadcast 2012 needed for the copyprivate clause associated with a single region in an 2013 OpenMP<sup>*</sup> program (both C and Fortran). 2014 All threads participating in the parallel region call this routine. 2015 One of the threads (called the single thread) should have the <tt>didit</tt> 2016 variable set to 1 and all other threads should have that variable set to 0. 2017 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2018 2019 The OpenMP specification forbids the use of nowait on the single region when a 2020 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2021 barrier internally to avoid race conditions, so the code generation for the 2022 single region should avoid generating a barrier after the call to @ref 2023 __kmpc_copyprivate. 2024 2025 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2026 The <tt>loc</tt> parameter is a pointer to source location information. 2027 2028 Internal implementation: The single thread will first copy its descriptor 2029 address (cpy_data) to a team-private location, then the other threads will each 2030 call the function pointed to by the parameter cpy_func, which carries out the 2031 copy by copying the data using the cpy_data buffer. 2032 2033 The cpy_func routine used for the copy and the contents of the data area defined 2034 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2035 to be done. For instance, the cpy_data buffer can hold the actual data to be 2036 copied or it may hold a list of pointers to the data. The cpy_func routine must 2037 interpret the cpy_data buffer appropriately. 2038 2039 The interface to cpy_func is as follows: 2040 @code 2041 void cpy_func( void *destination, void *source ) 2042 @endcode 2043 where void *destination is the cpy_data pointer for the thread being copied to 2044 and void *source is the cpy_data pointer for the thread being copied from. 2045 */ 2046 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2047 void *cpy_data, void (*cpy_func)(void *, void *), 2048 kmp_int32 didit) { 2049 void **data_ptr; 2050 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2051 __kmp_assert_valid_gtid(gtid); 2052 2053 KMP_MB(); 2054 2055 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2056 2057 if (__kmp_env_consistency_check) { 2058 if (loc == 0) { 2059 KMP_WARNING(ConstructIdentInvalid); 2060 } 2061 } 2062 2063 // ToDo: Optimize the following two barriers into some kind of split barrier 2064 2065 if (didit) 2066 *data_ptr = cpy_data; 2067 2068 #if OMPT_SUPPORT 2069 ompt_frame_t *ompt_frame; 2070 if (ompt_enabled.enabled) { 2071 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2072 if (ompt_frame->enter_frame.ptr == NULL) 2073 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2074 } 2075 OMPT_STORE_RETURN_ADDRESS(gtid); 2076 #endif 2077 /* This barrier is not a barrier region boundary */ 2078 #if USE_ITT_NOTIFY 2079 __kmp_threads[gtid]->th.th_ident = loc; 2080 #endif 2081 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2082 2083 if (!didit) 2084 (*cpy_func)(cpy_data, *data_ptr); 2085 2086 // Consider next barrier a user-visible barrier for barrier region boundaries 2087 // Nesting checks are already handled by the single construct checks 2088 { 2089 #if OMPT_SUPPORT 2090 OMPT_STORE_RETURN_ADDRESS(gtid); 2091 #endif 2092 #if USE_ITT_NOTIFY 2093 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2094 // tasks can overwrite the location) 2095 #endif 2096 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2097 #if OMPT_SUPPORT && OMPT_OPTIONAL 2098 if (ompt_enabled.enabled) { 2099 ompt_frame->enter_frame = ompt_data_none; 2100 } 2101 #endif 2102 } 2103 } 2104 2105 /* -------------------------------------------------------------------------- */ 2106 2107 #define INIT_LOCK __kmp_init_user_lock_with_checks 2108 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2109 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2110 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2111 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2112 #define ACQUIRE_NESTED_LOCK_TIMED \ 2113 __kmp_acquire_nested_user_lock_with_checks_timed 2114 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2115 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2116 #define TEST_LOCK __kmp_test_user_lock_with_checks 2117 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2118 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2119 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2120 2121 // TODO: Make check abort messages use location info & pass it into 2122 // with_checks routines 2123 2124 #if KMP_USE_DYNAMIC_LOCK 2125 2126 // internal lock initializer 2127 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2128 kmp_dyna_lockseq_t seq) { 2129 if (KMP_IS_D_LOCK(seq)) { 2130 KMP_INIT_D_LOCK(lock, seq); 2131 #if USE_ITT_BUILD 2132 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2133 #endif 2134 } else { 2135 KMP_INIT_I_LOCK(lock, seq); 2136 #if USE_ITT_BUILD 2137 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2138 __kmp_itt_lock_creating(ilk->lock, loc); 2139 #endif 2140 } 2141 } 2142 2143 // internal nest lock initializer 2144 static __forceinline void 2145 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2146 kmp_dyna_lockseq_t seq) { 2147 #if KMP_USE_TSX 2148 // Don't have nested lock implementation for speculative locks 2149 if (seq == lockseq_hle || seq == lockseq_rtm_queuing || 2150 seq == lockseq_rtm_spin || seq == lockseq_adaptive) 2151 seq = __kmp_user_lock_seq; 2152 #endif 2153 switch (seq) { 2154 case lockseq_tas: 2155 seq = lockseq_nested_tas; 2156 break; 2157 #if KMP_USE_FUTEX 2158 case lockseq_futex: 2159 seq = lockseq_nested_futex; 2160 break; 2161 #endif 2162 case lockseq_ticket: 2163 seq = lockseq_nested_ticket; 2164 break; 2165 case lockseq_queuing: 2166 seq = lockseq_nested_queuing; 2167 break; 2168 case lockseq_drdpa: 2169 seq = lockseq_nested_drdpa; 2170 break; 2171 default: 2172 seq = lockseq_nested_queuing; 2173 } 2174 KMP_INIT_I_LOCK(lock, seq); 2175 #if USE_ITT_BUILD 2176 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2177 __kmp_itt_lock_creating(ilk->lock, loc); 2178 #endif 2179 } 2180 2181 /* initialize the lock with a hint */ 2182 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2183 uintptr_t hint) { 2184 KMP_DEBUG_ASSERT(__kmp_init_serial); 2185 if (__kmp_env_consistency_check && user_lock == NULL) { 2186 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2187 } 2188 2189 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2190 2191 #if OMPT_SUPPORT && OMPT_OPTIONAL 2192 // This is the case, if called from omp_init_lock_with_hint: 2193 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2194 if (!codeptr) 2195 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2196 if (ompt_enabled.ompt_callback_lock_init) { 2197 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2198 ompt_mutex_lock, (omp_lock_hint_t)hint, 2199 __ompt_get_mutex_impl_type(user_lock), 2200 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2201 } 2202 #endif 2203 } 2204 2205 /* initialize the lock with a hint */ 2206 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2207 void **user_lock, uintptr_t hint) { 2208 KMP_DEBUG_ASSERT(__kmp_init_serial); 2209 if (__kmp_env_consistency_check && user_lock == NULL) { 2210 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2211 } 2212 2213 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2214 2215 #if OMPT_SUPPORT && OMPT_OPTIONAL 2216 // This is the case, if called from omp_init_lock_with_hint: 2217 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2218 if (!codeptr) 2219 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2220 if (ompt_enabled.ompt_callback_lock_init) { 2221 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2222 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2223 __ompt_get_mutex_impl_type(user_lock), 2224 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2225 } 2226 #endif 2227 } 2228 2229 #endif // KMP_USE_DYNAMIC_LOCK 2230 2231 /* initialize the lock */ 2232 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2233 #if KMP_USE_DYNAMIC_LOCK 2234 2235 KMP_DEBUG_ASSERT(__kmp_init_serial); 2236 if (__kmp_env_consistency_check && user_lock == NULL) { 2237 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2238 } 2239 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2240 2241 #if OMPT_SUPPORT && OMPT_OPTIONAL 2242 // This is the case, if called from omp_init_lock_with_hint: 2243 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2244 if (!codeptr) 2245 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2246 if (ompt_enabled.ompt_callback_lock_init) { 2247 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2248 ompt_mutex_lock, omp_lock_hint_none, 2249 __ompt_get_mutex_impl_type(user_lock), 2250 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2251 } 2252 #endif 2253 2254 #else // KMP_USE_DYNAMIC_LOCK 2255 2256 static char const *const func = "omp_init_lock"; 2257 kmp_user_lock_p lck; 2258 KMP_DEBUG_ASSERT(__kmp_init_serial); 2259 2260 if (__kmp_env_consistency_check) { 2261 if (user_lock == NULL) { 2262 KMP_FATAL(LockIsUninitialized, func); 2263 } 2264 } 2265 2266 KMP_CHECK_USER_LOCK_INIT(); 2267 2268 if ((__kmp_user_lock_kind == lk_tas) && 2269 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2270 lck = (kmp_user_lock_p)user_lock; 2271 } 2272 #if KMP_USE_FUTEX 2273 else if ((__kmp_user_lock_kind == lk_futex) && 2274 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2275 lck = (kmp_user_lock_p)user_lock; 2276 } 2277 #endif 2278 else { 2279 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2280 } 2281 INIT_LOCK(lck); 2282 __kmp_set_user_lock_location(lck, loc); 2283 2284 #if OMPT_SUPPORT && OMPT_OPTIONAL 2285 // This is the case, if called from omp_init_lock_with_hint: 2286 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2287 if (!codeptr) 2288 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2289 if (ompt_enabled.ompt_callback_lock_init) { 2290 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2291 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2292 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2293 } 2294 #endif 2295 2296 #if USE_ITT_BUILD 2297 __kmp_itt_lock_creating(lck); 2298 #endif /* USE_ITT_BUILD */ 2299 2300 #endif // KMP_USE_DYNAMIC_LOCK 2301 } // __kmpc_init_lock 2302 2303 /* initialize the lock */ 2304 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2305 #if KMP_USE_DYNAMIC_LOCK 2306 2307 KMP_DEBUG_ASSERT(__kmp_init_serial); 2308 if (__kmp_env_consistency_check && user_lock == NULL) { 2309 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2310 } 2311 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2312 2313 #if OMPT_SUPPORT && OMPT_OPTIONAL 2314 // This is the case, if called from omp_init_lock_with_hint: 2315 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2316 if (!codeptr) 2317 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2318 if (ompt_enabled.ompt_callback_lock_init) { 2319 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2320 ompt_mutex_nest_lock, omp_lock_hint_none, 2321 __ompt_get_mutex_impl_type(user_lock), 2322 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2323 } 2324 #endif 2325 2326 #else // KMP_USE_DYNAMIC_LOCK 2327 2328 static char const *const func = "omp_init_nest_lock"; 2329 kmp_user_lock_p lck; 2330 KMP_DEBUG_ASSERT(__kmp_init_serial); 2331 2332 if (__kmp_env_consistency_check) { 2333 if (user_lock == NULL) { 2334 KMP_FATAL(LockIsUninitialized, func); 2335 } 2336 } 2337 2338 KMP_CHECK_USER_LOCK_INIT(); 2339 2340 if ((__kmp_user_lock_kind == lk_tas) && 2341 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2342 OMP_NEST_LOCK_T_SIZE)) { 2343 lck = (kmp_user_lock_p)user_lock; 2344 } 2345 #if KMP_USE_FUTEX 2346 else if ((__kmp_user_lock_kind == lk_futex) && 2347 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2348 OMP_NEST_LOCK_T_SIZE)) { 2349 lck = (kmp_user_lock_p)user_lock; 2350 } 2351 #endif 2352 else { 2353 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2354 } 2355 2356 INIT_NESTED_LOCK(lck); 2357 __kmp_set_user_lock_location(lck, loc); 2358 2359 #if OMPT_SUPPORT && OMPT_OPTIONAL 2360 // This is the case, if called from omp_init_lock_with_hint: 2361 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2362 if (!codeptr) 2363 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2364 if (ompt_enabled.ompt_callback_lock_init) { 2365 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2366 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2367 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2368 } 2369 #endif 2370 2371 #if USE_ITT_BUILD 2372 __kmp_itt_lock_creating(lck); 2373 #endif /* USE_ITT_BUILD */ 2374 2375 #endif // KMP_USE_DYNAMIC_LOCK 2376 } // __kmpc_init_nest_lock 2377 2378 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2379 #if KMP_USE_DYNAMIC_LOCK 2380 2381 #if USE_ITT_BUILD 2382 kmp_user_lock_p lck; 2383 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2384 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2385 } else { 2386 lck = (kmp_user_lock_p)user_lock; 2387 } 2388 __kmp_itt_lock_destroyed(lck); 2389 #endif 2390 #if OMPT_SUPPORT && OMPT_OPTIONAL 2391 // This is the case, if called from omp_init_lock_with_hint: 2392 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2393 if (!codeptr) 2394 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2395 if (ompt_enabled.ompt_callback_lock_destroy) { 2396 kmp_user_lock_p lck; 2397 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2398 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2399 } else { 2400 lck = (kmp_user_lock_p)user_lock; 2401 } 2402 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2403 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2404 } 2405 #endif 2406 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2407 #else 2408 kmp_user_lock_p lck; 2409 2410 if ((__kmp_user_lock_kind == lk_tas) && 2411 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2412 lck = (kmp_user_lock_p)user_lock; 2413 } 2414 #if KMP_USE_FUTEX 2415 else if ((__kmp_user_lock_kind == lk_futex) && 2416 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2417 lck = (kmp_user_lock_p)user_lock; 2418 } 2419 #endif 2420 else { 2421 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2422 } 2423 2424 #if OMPT_SUPPORT && OMPT_OPTIONAL 2425 // This is the case, if called from omp_init_lock_with_hint: 2426 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2427 if (!codeptr) 2428 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2429 if (ompt_enabled.ompt_callback_lock_destroy) { 2430 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2431 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2432 } 2433 #endif 2434 2435 #if USE_ITT_BUILD 2436 __kmp_itt_lock_destroyed(lck); 2437 #endif /* USE_ITT_BUILD */ 2438 DESTROY_LOCK(lck); 2439 2440 if ((__kmp_user_lock_kind == lk_tas) && 2441 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2442 ; 2443 } 2444 #if KMP_USE_FUTEX 2445 else if ((__kmp_user_lock_kind == lk_futex) && 2446 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2447 ; 2448 } 2449 #endif 2450 else { 2451 __kmp_user_lock_free(user_lock, gtid, lck); 2452 } 2453 #endif // KMP_USE_DYNAMIC_LOCK 2454 } // __kmpc_destroy_lock 2455 2456 /* destroy the lock */ 2457 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2458 #if KMP_USE_DYNAMIC_LOCK 2459 2460 #if USE_ITT_BUILD 2461 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2462 __kmp_itt_lock_destroyed(ilk->lock); 2463 #endif 2464 #if OMPT_SUPPORT && OMPT_OPTIONAL 2465 // This is the case, if called from omp_init_lock_with_hint: 2466 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2467 if (!codeptr) 2468 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2469 if (ompt_enabled.ompt_callback_lock_destroy) { 2470 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2471 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2472 } 2473 #endif 2474 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2475 2476 #else // KMP_USE_DYNAMIC_LOCK 2477 2478 kmp_user_lock_p lck; 2479 2480 if ((__kmp_user_lock_kind == lk_tas) && 2481 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2482 OMP_NEST_LOCK_T_SIZE)) { 2483 lck = (kmp_user_lock_p)user_lock; 2484 } 2485 #if KMP_USE_FUTEX 2486 else if ((__kmp_user_lock_kind == lk_futex) && 2487 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2488 OMP_NEST_LOCK_T_SIZE)) { 2489 lck = (kmp_user_lock_p)user_lock; 2490 } 2491 #endif 2492 else { 2493 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2494 } 2495 2496 #if OMPT_SUPPORT && OMPT_OPTIONAL 2497 // This is the case, if called from omp_init_lock_with_hint: 2498 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2499 if (!codeptr) 2500 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2501 if (ompt_enabled.ompt_callback_lock_destroy) { 2502 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2503 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2504 } 2505 #endif 2506 2507 #if USE_ITT_BUILD 2508 __kmp_itt_lock_destroyed(lck); 2509 #endif /* USE_ITT_BUILD */ 2510 2511 DESTROY_NESTED_LOCK(lck); 2512 2513 if ((__kmp_user_lock_kind == lk_tas) && 2514 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2515 OMP_NEST_LOCK_T_SIZE)) { 2516 ; 2517 } 2518 #if KMP_USE_FUTEX 2519 else if ((__kmp_user_lock_kind == lk_futex) && 2520 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2521 OMP_NEST_LOCK_T_SIZE)) { 2522 ; 2523 } 2524 #endif 2525 else { 2526 __kmp_user_lock_free(user_lock, gtid, lck); 2527 } 2528 #endif // KMP_USE_DYNAMIC_LOCK 2529 } // __kmpc_destroy_nest_lock 2530 2531 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2532 KMP_COUNT_BLOCK(OMP_set_lock); 2533 #if KMP_USE_DYNAMIC_LOCK 2534 int tag = KMP_EXTRACT_D_TAG(user_lock); 2535 #if USE_ITT_BUILD 2536 __kmp_itt_lock_acquiring( 2537 (kmp_user_lock_p) 2538 user_lock); // itt function will get to the right lock object. 2539 #endif 2540 #if OMPT_SUPPORT && OMPT_OPTIONAL 2541 // This is the case, if called from omp_init_lock_with_hint: 2542 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2543 if (!codeptr) 2544 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2545 if (ompt_enabled.ompt_callback_mutex_acquire) { 2546 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2547 ompt_mutex_lock, omp_lock_hint_none, 2548 __ompt_get_mutex_impl_type(user_lock), 2549 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2550 } 2551 #endif 2552 #if KMP_USE_INLINED_TAS 2553 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2554 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2555 } else 2556 #elif KMP_USE_INLINED_FUTEX 2557 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2558 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2559 } else 2560 #endif 2561 { 2562 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2563 } 2564 #if USE_ITT_BUILD 2565 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2566 #endif 2567 #if OMPT_SUPPORT && OMPT_OPTIONAL 2568 if (ompt_enabled.ompt_callback_mutex_acquired) { 2569 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2570 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2571 } 2572 #endif 2573 2574 #else // KMP_USE_DYNAMIC_LOCK 2575 2576 kmp_user_lock_p lck; 2577 2578 if ((__kmp_user_lock_kind == lk_tas) && 2579 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2580 lck = (kmp_user_lock_p)user_lock; 2581 } 2582 #if KMP_USE_FUTEX 2583 else if ((__kmp_user_lock_kind == lk_futex) && 2584 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2585 lck = (kmp_user_lock_p)user_lock; 2586 } 2587 #endif 2588 else { 2589 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2590 } 2591 2592 #if USE_ITT_BUILD 2593 __kmp_itt_lock_acquiring(lck); 2594 #endif /* USE_ITT_BUILD */ 2595 #if OMPT_SUPPORT && OMPT_OPTIONAL 2596 // This is the case, if called from omp_init_lock_with_hint: 2597 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2598 if (!codeptr) 2599 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2600 if (ompt_enabled.ompt_callback_mutex_acquire) { 2601 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2602 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2603 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2604 } 2605 #endif 2606 2607 ACQUIRE_LOCK(lck, gtid); 2608 2609 #if USE_ITT_BUILD 2610 __kmp_itt_lock_acquired(lck); 2611 #endif /* USE_ITT_BUILD */ 2612 2613 #if OMPT_SUPPORT && OMPT_OPTIONAL 2614 if (ompt_enabled.ompt_callback_mutex_acquired) { 2615 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2616 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2617 } 2618 #endif 2619 2620 #endif // KMP_USE_DYNAMIC_LOCK 2621 } 2622 2623 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2624 #if KMP_USE_DYNAMIC_LOCK 2625 2626 #if USE_ITT_BUILD 2627 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2628 #endif 2629 #if OMPT_SUPPORT && OMPT_OPTIONAL 2630 // This is the case, if called from omp_init_lock_with_hint: 2631 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2632 if (!codeptr) 2633 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2634 if (ompt_enabled.enabled) { 2635 if (ompt_enabled.ompt_callback_mutex_acquire) { 2636 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2637 ompt_mutex_nest_lock, omp_lock_hint_none, 2638 __ompt_get_mutex_impl_type(user_lock), 2639 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2640 } 2641 } 2642 #endif 2643 int acquire_status = 2644 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2645 (void) acquire_status; 2646 #if USE_ITT_BUILD 2647 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2648 #endif 2649 2650 #if OMPT_SUPPORT && OMPT_OPTIONAL 2651 if (ompt_enabled.enabled) { 2652 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2653 if (ompt_enabled.ompt_callback_mutex_acquired) { 2654 // lock_first 2655 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2656 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2657 codeptr); 2658 } 2659 } else { 2660 if (ompt_enabled.ompt_callback_nest_lock) { 2661 // lock_next 2662 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2663 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2664 } 2665 } 2666 } 2667 #endif 2668 2669 #else // KMP_USE_DYNAMIC_LOCK 2670 int acquire_status; 2671 kmp_user_lock_p lck; 2672 2673 if ((__kmp_user_lock_kind == lk_tas) && 2674 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2675 OMP_NEST_LOCK_T_SIZE)) { 2676 lck = (kmp_user_lock_p)user_lock; 2677 } 2678 #if KMP_USE_FUTEX 2679 else if ((__kmp_user_lock_kind == lk_futex) && 2680 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2681 OMP_NEST_LOCK_T_SIZE)) { 2682 lck = (kmp_user_lock_p)user_lock; 2683 } 2684 #endif 2685 else { 2686 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2687 } 2688 2689 #if USE_ITT_BUILD 2690 __kmp_itt_lock_acquiring(lck); 2691 #endif /* USE_ITT_BUILD */ 2692 #if OMPT_SUPPORT && OMPT_OPTIONAL 2693 // This is the case, if called from omp_init_lock_with_hint: 2694 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2695 if (!codeptr) 2696 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2697 if (ompt_enabled.enabled) { 2698 if (ompt_enabled.ompt_callback_mutex_acquire) { 2699 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2700 ompt_mutex_nest_lock, omp_lock_hint_none, 2701 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2702 codeptr); 2703 } 2704 } 2705 #endif 2706 2707 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2708 2709 #if USE_ITT_BUILD 2710 __kmp_itt_lock_acquired(lck); 2711 #endif /* USE_ITT_BUILD */ 2712 2713 #if OMPT_SUPPORT && OMPT_OPTIONAL 2714 if (ompt_enabled.enabled) { 2715 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2716 if (ompt_enabled.ompt_callback_mutex_acquired) { 2717 // lock_first 2718 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2719 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2720 } 2721 } else { 2722 if (ompt_enabled.ompt_callback_nest_lock) { 2723 // lock_next 2724 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2725 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2726 } 2727 } 2728 } 2729 #endif 2730 2731 #endif // KMP_USE_DYNAMIC_LOCK 2732 } 2733 2734 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2735 #if KMP_USE_DYNAMIC_LOCK 2736 2737 int tag = KMP_EXTRACT_D_TAG(user_lock); 2738 #if USE_ITT_BUILD 2739 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2740 #endif 2741 #if KMP_USE_INLINED_TAS 2742 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2743 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2744 } else 2745 #elif KMP_USE_INLINED_FUTEX 2746 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2747 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2748 } else 2749 #endif 2750 { 2751 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2752 } 2753 2754 #if OMPT_SUPPORT && OMPT_OPTIONAL 2755 // This is the case, if called from omp_init_lock_with_hint: 2756 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2757 if (!codeptr) 2758 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2759 if (ompt_enabled.ompt_callback_mutex_released) { 2760 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2761 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2762 } 2763 #endif 2764 2765 #else // KMP_USE_DYNAMIC_LOCK 2766 2767 kmp_user_lock_p lck; 2768 2769 /* Can't use serial interval since not block structured */ 2770 /* release the lock */ 2771 2772 if ((__kmp_user_lock_kind == lk_tas) && 2773 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2774 #if KMP_OS_LINUX && \ 2775 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2776 // "fast" path implemented to fix customer performance issue 2777 #if USE_ITT_BUILD 2778 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2779 #endif /* USE_ITT_BUILD */ 2780 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2781 KMP_MB(); 2782 2783 #if OMPT_SUPPORT && OMPT_OPTIONAL 2784 // This is the case, if called from omp_init_lock_with_hint: 2785 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2786 if (!codeptr) 2787 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2788 if (ompt_enabled.ompt_callback_mutex_released) { 2789 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2790 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2791 } 2792 #endif 2793 2794 return; 2795 #else 2796 lck = (kmp_user_lock_p)user_lock; 2797 #endif 2798 } 2799 #if KMP_USE_FUTEX 2800 else if ((__kmp_user_lock_kind == lk_futex) && 2801 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2802 lck = (kmp_user_lock_p)user_lock; 2803 } 2804 #endif 2805 else { 2806 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2807 } 2808 2809 #if USE_ITT_BUILD 2810 __kmp_itt_lock_releasing(lck); 2811 #endif /* USE_ITT_BUILD */ 2812 2813 RELEASE_LOCK(lck, gtid); 2814 2815 #if OMPT_SUPPORT && OMPT_OPTIONAL 2816 // This is the case, if called from omp_init_lock_with_hint: 2817 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2818 if (!codeptr) 2819 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2820 if (ompt_enabled.ompt_callback_mutex_released) { 2821 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2822 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2823 } 2824 #endif 2825 2826 #endif // KMP_USE_DYNAMIC_LOCK 2827 } 2828 2829 /* release the lock */ 2830 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2831 #if KMP_USE_DYNAMIC_LOCK 2832 2833 #if USE_ITT_BUILD 2834 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2835 #endif 2836 int release_status = 2837 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2838 (void) release_status; 2839 2840 #if OMPT_SUPPORT && OMPT_OPTIONAL 2841 // This is the case, if called from omp_init_lock_with_hint: 2842 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2843 if (!codeptr) 2844 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2845 if (ompt_enabled.enabled) { 2846 if (release_status == KMP_LOCK_RELEASED) { 2847 if (ompt_enabled.ompt_callback_mutex_released) { 2848 // release_lock_last 2849 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2850 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2851 codeptr); 2852 } 2853 } else if (ompt_enabled.ompt_callback_nest_lock) { 2854 // release_lock_prev 2855 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2856 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2857 } 2858 } 2859 #endif 2860 2861 #else // KMP_USE_DYNAMIC_LOCK 2862 2863 kmp_user_lock_p lck; 2864 2865 /* Can't use serial interval since not block structured */ 2866 2867 if ((__kmp_user_lock_kind == lk_tas) && 2868 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2869 OMP_NEST_LOCK_T_SIZE)) { 2870 #if KMP_OS_LINUX && \ 2871 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2872 // "fast" path implemented to fix customer performance issue 2873 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2874 #if USE_ITT_BUILD 2875 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2876 #endif /* USE_ITT_BUILD */ 2877 2878 #if OMPT_SUPPORT && OMPT_OPTIONAL 2879 int release_status = KMP_LOCK_STILL_HELD; 2880 #endif 2881 2882 if (--(tl->lk.depth_locked) == 0) { 2883 TCW_4(tl->lk.poll, 0); 2884 #if OMPT_SUPPORT && OMPT_OPTIONAL 2885 release_status = KMP_LOCK_RELEASED; 2886 #endif 2887 } 2888 KMP_MB(); 2889 2890 #if OMPT_SUPPORT && OMPT_OPTIONAL 2891 // This is the case, if called from omp_init_lock_with_hint: 2892 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2893 if (!codeptr) 2894 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2895 if (ompt_enabled.enabled) { 2896 if (release_status == KMP_LOCK_RELEASED) { 2897 if (ompt_enabled.ompt_callback_mutex_released) { 2898 // release_lock_last 2899 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2900 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2901 } 2902 } else if (ompt_enabled.ompt_callback_nest_lock) { 2903 // release_lock_previous 2904 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2905 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2906 } 2907 } 2908 #endif 2909 2910 return; 2911 #else 2912 lck = (kmp_user_lock_p)user_lock; 2913 #endif 2914 } 2915 #if KMP_USE_FUTEX 2916 else if ((__kmp_user_lock_kind == lk_futex) && 2917 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2918 OMP_NEST_LOCK_T_SIZE)) { 2919 lck = (kmp_user_lock_p)user_lock; 2920 } 2921 #endif 2922 else { 2923 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 2924 } 2925 2926 #if USE_ITT_BUILD 2927 __kmp_itt_lock_releasing(lck); 2928 #endif /* USE_ITT_BUILD */ 2929 2930 int release_status; 2931 release_status = RELEASE_NESTED_LOCK(lck, gtid); 2932 #if OMPT_SUPPORT && OMPT_OPTIONAL 2933 // This is the case, if called from omp_init_lock_with_hint: 2934 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2935 if (!codeptr) 2936 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2937 if (ompt_enabled.enabled) { 2938 if (release_status == KMP_LOCK_RELEASED) { 2939 if (ompt_enabled.ompt_callback_mutex_released) { 2940 // release_lock_last 2941 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2942 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2943 } 2944 } else if (ompt_enabled.ompt_callback_nest_lock) { 2945 // release_lock_previous 2946 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2947 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2948 } 2949 } 2950 #endif 2951 2952 #endif // KMP_USE_DYNAMIC_LOCK 2953 } 2954 2955 /* try to acquire the lock */ 2956 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2957 KMP_COUNT_BLOCK(OMP_test_lock); 2958 2959 #if KMP_USE_DYNAMIC_LOCK 2960 int rc; 2961 int tag = KMP_EXTRACT_D_TAG(user_lock); 2962 #if USE_ITT_BUILD 2963 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2964 #endif 2965 #if OMPT_SUPPORT && OMPT_OPTIONAL 2966 // This is the case, if called from omp_init_lock_with_hint: 2967 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2968 if (!codeptr) 2969 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2970 if (ompt_enabled.ompt_callback_mutex_acquire) { 2971 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2972 ompt_mutex_lock, omp_lock_hint_none, 2973 __ompt_get_mutex_impl_type(user_lock), 2974 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2975 } 2976 #endif 2977 #if KMP_USE_INLINED_TAS 2978 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2979 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 2980 } else 2981 #elif KMP_USE_INLINED_FUTEX 2982 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2983 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 2984 } else 2985 #endif 2986 { 2987 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2988 } 2989 if (rc) { 2990 #if USE_ITT_BUILD 2991 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2992 #endif 2993 #if OMPT_SUPPORT && OMPT_OPTIONAL 2994 if (ompt_enabled.ompt_callback_mutex_acquired) { 2995 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2996 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2997 } 2998 #endif 2999 return FTN_TRUE; 3000 } else { 3001 #if USE_ITT_BUILD 3002 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3003 #endif 3004 return FTN_FALSE; 3005 } 3006 3007 #else // KMP_USE_DYNAMIC_LOCK 3008 3009 kmp_user_lock_p lck; 3010 int rc; 3011 3012 if ((__kmp_user_lock_kind == lk_tas) && 3013 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3014 lck = (kmp_user_lock_p)user_lock; 3015 } 3016 #if KMP_USE_FUTEX 3017 else if ((__kmp_user_lock_kind == lk_futex) && 3018 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3019 lck = (kmp_user_lock_p)user_lock; 3020 } 3021 #endif 3022 else { 3023 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3024 } 3025 3026 #if USE_ITT_BUILD 3027 __kmp_itt_lock_acquiring(lck); 3028 #endif /* USE_ITT_BUILD */ 3029 #if OMPT_SUPPORT && OMPT_OPTIONAL 3030 // This is the case, if called from omp_init_lock_with_hint: 3031 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3032 if (!codeptr) 3033 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3034 if (ompt_enabled.ompt_callback_mutex_acquire) { 3035 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3036 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3037 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3038 } 3039 #endif 3040 3041 rc = TEST_LOCK(lck, gtid); 3042 #if USE_ITT_BUILD 3043 if (rc) { 3044 __kmp_itt_lock_acquired(lck); 3045 } else { 3046 __kmp_itt_lock_cancelled(lck); 3047 } 3048 #endif /* USE_ITT_BUILD */ 3049 #if OMPT_SUPPORT && OMPT_OPTIONAL 3050 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3051 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3052 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3053 } 3054 #endif 3055 3056 return (rc ? FTN_TRUE : FTN_FALSE); 3057 3058 /* Can't use serial interval since not block structured */ 3059 3060 #endif // KMP_USE_DYNAMIC_LOCK 3061 } 3062 3063 /* try to acquire the lock */ 3064 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3065 #if KMP_USE_DYNAMIC_LOCK 3066 int rc; 3067 #if USE_ITT_BUILD 3068 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3069 #endif 3070 #if OMPT_SUPPORT && OMPT_OPTIONAL 3071 // This is the case, if called from omp_init_lock_with_hint: 3072 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3073 if (!codeptr) 3074 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3075 if (ompt_enabled.ompt_callback_mutex_acquire) { 3076 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3077 ompt_mutex_nest_lock, omp_lock_hint_none, 3078 __ompt_get_mutex_impl_type(user_lock), 3079 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3080 } 3081 #endif 3082 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3083 #if USE_ITT_BUILD 3084 if (rc) { 3085 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3086 } else { 3087 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3088 } 3089 #endif 3090 #if OMPT_SUPPORT && OMPT_OPTIONAL 3091 if (ompt_enabled.enabled && rc) { 3092 if (rc == 1) { 3093 if (ompt_enabled.ompt_callback_mutex_acquired) { 3094 // lock_first 3095 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3096 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3097 codeptr); 3098 } 3099 } else { 3100 if (ompt_enabled.ompt_callback_nest_lock) { 3101 // lock_next 3102 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3103 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3104 } 3105 } 3106 } 3107 #endif 3108 return rc; 3109 3110 #else // KMP_USE_DYNAMIC_LOCK 3111 3112 kmp_user_lock_p lck; 3113 int rc; 3114 3115 if ((__kmp_user_lock_kind == lk_tas) && 3116 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3117 OMP_NEST_LOCK_T_SIZE)) { 3118 lck = (kmp_user_lock_p)user_lock; 3119 } 3120 #if KMP_USE_FUTEX 3121 else if ((__kmp_user_lock_kind == lk_futex) && 3122 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3123 OMP_NEST_LOCK_T_SIZE)) { 3124 lck = (kmp_user_lock_p)user_lock; 3125 } 3126 #endif 3127 else { 3128 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3129 } 3130 3131 #if USE_ITT_BUILD 3132 __kmp_itt_lock_acquiring(lck); 3133 #endif /* USE_ITT_BUILD */ 3134 3135 #if OMPT_SUPPORT && OMPT_OPTIONAL 3136 // This is the case, if called from omp_init_lock_with_hint: 3137 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3138 if (!codeptr) 3139 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3140 if (ompt_enabled.enabled) && 3141 ompt_enabled.ompt_callback_mutex_acquire) { 3142 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3143 ompt_mutex_nest_lock, omp_lock_hint_none, 3144 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3145 codeptr); 3146 } 3147 #endif 3148 3149 rc = TEST_NESTED_LOCK(lck, gtid); 3150 #if USE_ITT_BUILD 3151 if (rc) { 3152 __kmp_itt_lock_acquired(lck); 3153 } else { 3154 __kmp_itt_lock_cancelled(lck); 3155 } 3156 #endif /* USE_ITT_BUILD */ 3157 #if OMPT_SUPPORT && OMPT_OPTIONAL 3158 if (ompt_enabled.enabled && rc) { 3159 if (rc == 1) { 3160 if (ompt_enabled.ompt_callback_mutex_acquired) { 3161 // lock_first 3162 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3163 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3164 } 3165 } else { 3166 if (ompt_enabled.ompt_callback_nest_lock) { 3167 // lock_next 3168 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3169 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3170 } 3171 } 3172 } 3173 #endif 3174 return rc; 3175 3176 /* Can't use serial interval since not block structured */ 3177 3178 #endif // KMP_USE_DYNAMIC_LOCK 3179 } 3180 3181 // Interface to fast scalable reduce methods routines 3182 3183 // keep the selected method in a thread local structure for cross-function 3184 // usage: will be used in __kmpc_end_reduce* functions; 3185 // another solution: to re-determine the method one more time in 3186 // __kmpc_end_reduce* functions (new prototype required then) 3187 // AT: which solution is better? 3188 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3189 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3190 3191 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3192 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3193 3194 // description of the packed_reduction_method variable: look at the macros in 3195 // kmp.h 3196 3197 // used in a critical section reduce block 3198 static __forceinline void 3199 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3200 kmp_critical_name *crit) { 3201 3202 // this lock was visible to a customer and to the threading profile tool as a 3203 // serial overhead span (although it's used for an internal purpose only) 3204 // why was it visible in previous implementation? 3205 // should we keep it visible in new reduce block? 3206 kmp_user_lock_p lck; 3207 3208 #if KMP_USE_DYNAMIC_LOCK 3209 3210 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3211 // Check if it is initialized. 3212 if (*lk == 0) { 3213 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3214 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3215 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3216 } else { 3217 __kmp_init_indirect_csptr(crit, loc, global_tid, 3218 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3219 } 3220 } 3221 // Branch for accessing the actual lock object and set operation. This 3222 // branching is inevitable since this lock initialization does not follow the 3223 // normal dispatch path (lock table is not used). 3224 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3225 lck = (kmp_user_lock_p)lk; 3226 KMP_DEBUG_ASSERT(lck != NULL); 3227 if (__kmp_env_consistency_check) { 3228 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3229 } 3230 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3231 } else { 3232 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3233 lck = ilk->lock; 3234 KMP_DEBUG_ASSERT(lck != NULL); 3235 if (__kmp_env_consistency_check) { 3236 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3237 } 3238 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3239 } 3240 3241 #else // KMP_USE_DYNAMIC_LOCK 3242 3243 // We know that the fast reduction code is only emitted by Intel compilers 3244 // with 32 byte critical sections. If there isn't enough space, then we 3245 // have to use a pointer. 3246 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3247 lck = (kmp_user_lock_p)crit; 3248 } else { 3249 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3250 } 3251 KMP_DEBUG_ASSERT(lck != NULL); 3252 3253 if (__kmp_env_consistency_check) 3254 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3255 3256 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3257 3258 #endif // KMP_USE_DYNAMIC_LOCK 3259 } 3260 3261 // used in a critical section reduce block 3262 static __forceinline void 3263 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3264 kmp_critical_name *crit) { 3265 3266 kmp_user_lock_p lck; 3267 3268 #if KMP_USE_DYNAMIC_LOCK 3269 3270 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3271 lck = (kmp_user_lock_p)crit; 3272 if (__kmp_env_consistency_check) 3273 __kmp_pop_sync(global_tid, ct_critical, loc); 3274 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3275 } else { 3276 kmp_indirect_lock_t *ilk = 3277 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3278 if (__kmp_env_consistency_check) 3279 __kmp_pop_sync(global_tid, ct_critical, loc); 3280 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3281 } 3282 3283 #else // KMP_USE_DYNAMIC_LOCK 3284 3285 // We know that the fast reduction code is only emitted by Intel compilers 3286 // with 32 byte critical sections. If there isn't enough space, then we have 3287 // to use a pointer. 3288 if (__kmp_base_user_lock_size > 32) { 3289 lck = *((kmp_user_lock_p *)crit); 3290 KMP_ASSERT(lck != NULL); 3291 } else { 3292 lck = (kmp_user_lock_p)crit; 3293 } 3294 3295 if (__kmp_env_consistency_check) 3296 __kmp_pop_sync(global_tid, ct_critical, loc); 3297 3298 __kmp_release_user_lock_with_checks(lck, global_tid); 3299 3300 #endif // KMP_USE_DYNAMIC_LOCK 3301 } // __kmp_end_critical_section_reduce_block 3302 3303 static __forceinline int 3304 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3305 int *task_state) { 3306 kmp_team_t *team; 3307 3308 // Check if we are inside the teams construct? 3309 if (th->th.th_teams_microtask) { 3310 *team_p = team = th->th.th_team; 3311 if (team->t.t_level == th->th.th_teams_level) { 3312 // This is reduction at teams construct. 3313 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3314 // Let's swap teams temporarily for the reduction. 3315 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3316 th->th.th_team = team->t.t_parent; 3317 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3318 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3319 *task_state = th->th.th_task_state; 3320 th->th.th_task_state = 0; 3321 3322 return 1; 3323 } 3324 } 3325 return 0; 3326 } 3327 3328 static __forceinline void 3329 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3330 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3331 th->th.th_info.ds.ds_tid = 0; 3332 th->th.th_team = team; 3333 th->th.th_team_nproc = team->t.t_nproc; 3334 th->th.th_task_team = team->t.t_task_team[task_state]; 3335 __kmp_type_convert(task_state, &(th->th.th_task_state)); 3336 } 3337 3338 /* 2.a.i. Reduce Block without a terminating barrier */ 3339 /*! 3340 @ingroup SYNCHRONIZATION 3341 @param loc source location information 3342 @param global_tid global thread number 3343 @param num_vars number of items (variables) to be reduced 3344 @param reduce_size size of data in bytes to be reduced 3345 @param reduce_data pointer to data to be reduced 3346 @param reduce_func callback function providing reduction operation on two 3347 operands and returning result of reduction in lhs_data 3348 @param lck pointer to the unique lock data structure 3349 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3350 threads if atomic reduction needed 3351 3352 The nowait version is used for a reduce clause with the nowait argument. 3353 */ 3354 kmp_int32 3355 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3356 size_t reduce_size, void *reduce_data, 3357 void (*reduce_func)(void *lhs_data, void *rhs_data), 3358 kmp_critical_name *lck) { 3359 3360 KMP_COUNT_BLOCK(REDUCE_nowait); 3361 int retval = 0; 3362 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3363 kmp_info_t *th; 3364 kmp_team_t *team; 3365 int teams_swapped = 0, task_state; 3366 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3367 __kmp_assert_valid_gtid(global_tid); 3368 3369 // why do we need this initialization here at all? 3370 // Reduction clause can not be used as a stand-alone directive. 3371 3372 // do not call __kmp_serial_initialize(), it will be called by 3373 // __kmp_parallel_initialize() if needed 3374 // possible detection of false-positive race by the threadchecker ??? 3375 if (!TCR_4(__kmp_init_parallel)) 3376 __kmp_parallel_initialize(); 3377 3378 __kmp_resume_if_soft_paused(); 3379 3380 // check correctness of reduce block nesting 3381 #if KMP_USE_DYNAMIC_LOCK 3382 if (__kmp_env_consistency_check) 3383 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3384 #else 3385 if (__kmp_env_consistency_check) 3386 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3387 #endif 3388 3389 th = __kmp_thread_from_gtid(global_tid); 3390 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3391 3392 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3393 // the value should be kept in a variable 3394 // the variable should be either a construct-specific or thread-specific 3395 // property, not a team specific property 3396 // (a thread can reach the next reduce block on the next construct, reduce 3397 // method may differ on the next construct) 3398 // an ident_t "loc" parameter could be used as a construct-specific property 3399 // (what if loc == 0?) 3400 // (if both construct-specific and team-specific variables were shared, 3401 // then unness extra syncs should be needed) 3402 // a thread-specific variable is better regarding two issues above (next 3403 // construct and extra syncs) 3404 // a thread-specific "th_local.reduction_method" variable is used currently 3405 // each thread executes 'determine' and 'set' lines (no need to execute by one 3406 // thread, to avoid unness extra syncs) 3407 3408 packed_reduction_method = __kmp_determine_reduction_method( 3409 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3410 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3411 3412 OMPT_REDUCTION_DECL(th, global_tid); 3413 if (packed_reduction_method == critical_reduce_block) { 3414 3415 OMPT_REDUCTION_BEGIN; 3416 3417 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3418 retval = 1; 3419 3420 } else if (packed_reduction_method == empty_reduce_block) { 3421 3422 OMPT_REDUCTION_BEGIN; 3423 3424 // usage: if team size == 1, no synchronization is required ( Intel 3425 // platforms only ) 3426 retval = 1; 3427 3428 } else if (packed_reduction_method == atomic_reduce_block) { 3429 3430 retval = 2; 3431 3432 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3433 // won't be called by the code gen) 3434 // (it's not quite good, because the checking block has been closed by 3435 // this 'pop', 3436 // but atomic operation has not been executed yet, will be executed 3437 // slightly later, literally on next instruction) 3438 if (__kmp_env_consistency_check) 3439 __kmp_pop_sync(global_tid, ct_reduce, loc); 3440 3441 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3442 tree_reduce_block)) { 3443 3444 // AT: performance issue: a real barrier here 3445 // AT: (if master goes slow, other threads are blocked here waiting for the 3446 // master to come and release them) 3447 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3448 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3449 // be confusing to a customer) 3450 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3451 // might go faster and be more in line with sense of NOWAIT 3452 // AT: TO DO: do epcc test and compare times 3453 3454 // this barrier should be invisible to a customer and to the threading profile 3455 // tool (it's neither a terminating barrier nor customer's code, it's 3456 // used for an internal purpose) 3457 #if OMPT_SUPPORT 3458 // JP: can this barrier potentially leed to task scheduling? 3459 // JP: as long as there is a barrier in the implementation, OMPT should and 3460 // will provide the barrier events 3461 // so we set-up the necessary frame/return addresses. 3462 ompt_frame_t *ompt_frame; 3463 if (ompt_enabled.enabled) { 3464 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3465 if (ompt_frame->enter_frame.ptr == NULL) 3466 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3467 } 3468 OMPT_STORE_RETURN_ADDRESS(global_tid); 3469 #endif 3470 #if USE_ITT_NOTIFY 3471 __kmp_threads[global_tid]->th.th_ident = loc; 3472 #endif 3473 retval = 3474 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3475 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3476 retval = (retval != 0) ? (0) : (1); 3477 #if OMPT_SUPPORT && OMPT_OPTIONAL 3478 if (ompt_enabled.enabled) { 3479 ompt_frame->enter_frame = ompt_data_none; 3480 } 3481 #endif 3482 3483 // all other workers except master should do this pop here 3484 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3485 if (__kmp_env_consistency_check) { 3486 if (retval == 0) { 3487 __kmp_pop_sync(global_tid, ct_reduce, loc); 3488 } 3489 } 3490 3491 } else { 3492 3493 // should never reach this block 3494 KMP_ASSERT(0); // "unexpected method" 3495 } 3496 if (teams_swapped) { 3497 __kmp_restore_swapped_teams(th, team, task_state); 3498 } 3499 KA_TRACE( 3500 10, 3501 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3502 global_tid, packed_reduction_method, retval)); 3503 3504 return retval; 3505 } 3506 3507 /*! 3508 @ingroup SYNCHRONIZATION 3509 @param loc source location information 3510 @param global_tid global thread id. 3511 @param lck pointer to the unique lock data structure 3512 3513 Finish the execution of a reduce nowait. 3514 */ 3515 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3516 kmp_critical_name *lck) { 3517 3518 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3519 3520 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3521 __kmp_assert_valid_gtid(global_tid); 3522 3523 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3524 3525 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3526 3527 if (packed_reduction_method == critical_reduce_block) { 3528 3529 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3530 OMPT_REDUCTION_END; 3531 3532 } else if (packed_reduction_method == empty_reduce_block) { 3533 3534 // usage: if team size == 1, no synchronization is required ( on Intel 3535 // platforms only ) 3536 3537 OMPT_REDUCTION_END; 3538 3539 } else if (packed_reduction_method == atomic_reduce_block) { 3540 3541 // neither master nor other workers should get here 3542 // (code gen does not generate this call in case 2: atomic reduce block) 3543 // actually it's better to remove this elseif at all; 3544 // after removal this value will checked by the 'else' and will assert 3545 3546 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3547 tree_reduce_block)) { 3548 3549 // only master gets here 3550 // OMPT: tree reduction is annotated in the barrier code 3551 3552 } else { 3553 3554 // should never reach this block 3555 KMP_ASSERT(0); // "unexpected method" 3556 } 3557 3558 if (__kmp_env_consistency_check) 3559 __kmp_pop_sync(global_tid, ct_reduce, loc); 3560 3561 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3562 global_tid, packed_reduction_method)); 3563 3564 return; 3565 } 3566 3567 /* 2.a.ii. Reduce Block with a terminating barrier */ 3568 3569 /*! 3570 @ingroup SYNCHRONIZATION 3571 @param loc source location information 3572 @param global_tid global thread number 3573 @param num_vars number of items (variables) to be reduced 3574 @param reduce_size size of data in bytes to be reduced 3575 @param reduce_data pointer to data to be reduced 3576 @param reduce_func callback function providing reduction operation on two 3577 operands and returning result of reduction in lhs_data 3578 @param lck pointer to the unique lock data structure 3579 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3580 threads if atomic reduction needed 3581 3582 A blocking reduce that includes an implicit barrier. 3583 */ 3584 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3585 size_t reduce_size, void *reduce_data, 3586 void (*reduce_func)(void *lhs_data, void *rhs_data), 3587 kmp_critical_name *lck) { 3588 KMP_COUNT_BLOCK(REDUCE_wait); 3589 int retval = 0; 3590 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3591 kmp_info_t *th; 3592 kmp_team_t *team; 3593 int teams_swapped = 0, task_state; 3594 3595 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3596 __kmp_assert_valid_gtid(global_tid); 3597 3598 // why do we need this initialization here at all? 3599 // Reduction clause can not be a stand-alone directive. 3600 3601 // do not call __kmp_serial_initialize(), it will be called by 3602 // __kmp_parallel_initialize() if needed 3603 // possible detection of false-positive race by the threadchecker ??? 3604 if (!TCR_4(__kmp_init_parallel)) 3605 __kmp_parallel_initialize(); 3606 3607 __kmp_resume_if_soft_paused(); 3608 3609 // check correctness of reduce block nesting 3610 #if KMP_USE_DYNAMIC_LOCK 3611 if (__kmp_env_consistency_check) 3612 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3613 #else 3614 if (__kmp_env_consistency_check) 3615 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3616 #endif 3617 3618 th = __kmp_thread_from_gtid(global_tid); 3619 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3620 3621 packed_reduction_method = __kmp_determine_reduction_method( 3622 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3623 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3624 3625 OMPT_REDUCTION_DECL(th, global_tid); 3626 3627 if (packed_reduction_method == critical_reduce_block) { 3628 3629 OMPT_REDUCTION_BEGIN; 3630 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3631 retval = 1; 3632 3633 } else if (packed_reduction_method == empty_reduce_block) { 3634 3635 OMPT_REDUCTION_BEGIN; 3636 // usage: if team size == 1, no synchronization is required ( Intel 3637 // platforms only ) 3638 retval = 1; 3639 3640 } else if (packed_reduction_method == atomic_reduce_block) { 3641 3642 retval = 2; 3643 3644 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3645 tree_reduce_block)) { 3646 3647 // case tree_reduce_block: 3648 // this barrier should be visible to a customer and to the threading profile 3649 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3650 #if OMPT_SUPPORT 3651 ompt_frame_t *ompt_frame; 3652 if (ompt_enabled.enabled) { 3653 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3654 if (ompt_frame->enter_frame.ptr == NULL) 3655 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3656 } 3657 OMPT_STORE_RETURN_ADDRESS(global_tid); 3658 #endif 3659 #if USE_ITT_NOTIFY 3660 __kmp_threads[global_tid]->th.th_ident = 3661 loc; // needed for correct notification of frames 3662 #endif 3663 retval = 3664 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3665 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3666 retval = (retval != 0) ? (0) : (1); 3667 #if OMPT_SUPPORT && OMPT_OPTIONAL 3668 if (ompt_enabled.enabled) { 3669 ompt_frame->enter_frame = ompt_data_none; 3670 } 3671 #endif 3672 3673 // all other workers except master should do this pop here 3674 // ( none of other workers except master will enter __kmpc_end_reduce() ) 3675 if (__kmp_env_consistency_check) { 3676 if (retval == 0) { // 0: all other workers; 1: master 3677 __kmp_pop_sync(global_tid, ct_reduce, loc); 3678 } 3679 } 3680 3681 } else { 3682 3683 // should never reach this block 3684 KMP_ASSERT(0); // "unexpected method" 3685 } 3686 if (teams_swapped) { 3687 __kmp_restore_swapped_teams(th, team, task_state); 3688 } 3689 3690 KA_TRACE(10, 3691 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3692 global_tid, packed_reduction_method, retval)); 3693 return retval; 3694 } 3695 3696 /*! 3697 @ingroup SYNCHRONIZATION 3698 @param loc source location information 3699 @param global_tid global thread id. 3700 @param lck pointer to the unique lock data structure 3701 3702 Finish the execution of a blocking reduce. 3703 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3704 start function. 3705 */ 3706 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3707 kmp_critical_name *lck) { 3708 3709 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3710 kmp_info_t *th; 3711 kmp_team_t *team; 3712 int teams_swapped = 0, task_state; 3713 3714 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3715 __kmp_assert_valid_gtid(global_tid); 3716 3717 th = __kmp_thread_from_gtid(global_tid); 3718 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3719 3720 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3721 3722 // this barrier should be visible to a customer and to the threading profile 3723 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3724 OMPT_REDUCTION_DECL(th, global_tid); 3725 3726 if (packed_reduction_method == critical_reduce_block) { 3727 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3728 3729 OMPT_REDUCTION_END; 3730 3731 // TODO: implicit barrier: should be exposed 3732 #if OMPT_SUPPORT 3733 ompt_frame_t *ompt_frame; 3734 if (ompt_enabled.enabled) { 3735 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3736 if (ompt_frame->enter_frame.ptr == NULL) 3737 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3738 } 3739 OMPT_STORE_RETURN_ADDRESS(global_tid); 3740 #endif 3741 #if USE_ITT_NOTIFY 3742 __kmp_threads[global_tid]->th.th_ident = loc; 3743 #endif 3744 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3745 #if OMPT_SUPPORT && OMPT_OPTIONAL 3746 if (ompt_enabled.enabled) { 3747 ompt_frame->enter_frame = ompt_data_none; 3748 } 3749 #endif 3750 3751 } else if (packed_reduction_method == empty_reduce_block) { 3752 3753 OMPT_REDUCTION_END; 3754 3755 // usage: if team size==1, no synchronization is required (Intel platforms only) 3756 3757 // TODO: implicit barrier: should be exposed 3758 #if OMPT_SUPPORT 3759 ompt_frame_t *ompt_frame; 3760 if (ompt_enabled.enabled) { 3761 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3762 if (ompt_frame->enter_frame.ptr == NULL) 3763 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3764 } 3765 OMPT_STORE_RETURN_ADDRESS(global_tid); 3766 #endif 3767 #if USE_ITT_NOTIFY 3768 __kmp_threads[global_tid]->th.th_ident = loc; 3769 #endif 3770 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3771 #if OMPT_SUPPORT && OMPT_OPTIONAL 3772 if (ompt_enabled.enabled) { 3773 ompt_frame->enter_frame = ompt_data_none; 3774 } 3775 #endif 3776 3777 } else if (packed_reduction_method == atomic_reduce_block) { 3778 3779 #if OMPT_SUPPORT 3780 ompt_frame_t *ompt_frame; 3781 if (ompt_enabled.enabled) { 3782 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3783 if (ompt_frame->enter_frame.ptr == NULL) 3784 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3785 } 3786 OMPT_STORE_RETURN_ADDRESS(global_tid); 3787 #endif 3788 // TODO: implicit barrier: should be exposed 3789 #if USE_ITT_NOTIFY 3790 __kmp_threads[global_tid]->th.th_ident = loc; 3791 #endif 3792 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3793 #if OMPT_SUPPORT && OMPT_OPTIONAL 3794 if (ompt_enabled.enabled) { 3795 ompt_frame->enter_frame = ompt_data_none; 3796 } 3797 #endif 3798 3799 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3800 tree_reduce_block)) { 3801 3802 // only master executes here (master releases all other workers) 3803 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3804 global_tid); 3805 3806 } else { 3807 3808 // should never reach this block 3809 KMP_ASSERT(0); // "unexpected method" 3810 } 3811 if (teams_swapped) { 3812 __kmp_restore_swapped_teams(th, team, task_state); 3813 } 3814 3815 if (__kmp_env_consistency_check) 3816 __kmp_pop_sync(global_tid, ct_reduce, loc); 3817 3818 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3819 global_tid, packed_reduction_method)); 3820 3821 return; 3822 } 3823 3824 #undef __KMP_GET_REDUCTION_METHOD 3825 #undef __KMP_SET_REDUCTION_METHOD 3826 3827 /* end of interface to fast scalable reduce routines */ 3828 3829 kmp_uint64 __kmpc_get_taskid() { 3830 3831 kmp_int32 gtid; 3832 kmp_info_t *thread; 3833 3834 gtid = __kmp_get_gtid(); 3835 if (gtid < 0) { 3836 return 0; 3837 } 3838 thread = __kmp_thread_from_gtid(gtid); 3839 return thread->th.th_current_task->td_task_id; 3840 3841 } // __kmpc_get_taskid 3842 3843 kmp_uint64 __kmpc_get_parent_taskid() { 3844 3845 kmp_int32 gtid; 3846 kmp_info_t *thread; 3847 kmp_taskdata_t *parent_task; 3848 3849 gtid = __kmp_get_gtid(); 3850 if (gtid < 0) { 3851 return 0; 3852 } 3853 thread = __kmp_thread_from_gtid(gtid); 3854 parent_task = thread->th.th_current_task->td_parent; 3855 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3856 3857 } // __kmpc_get_parent_taskid 3858 3859 /*! 3860 @ingroup WORK_SHARING 3861 @param loc source location information. 3862 @param gtid global thread number. 3863 @param num_dims number of associated doacross loops. 3864 @param dims info on loops bounds. 3865 3866 Initialize doacross loop information. 3867 Expect compiler send us inclusive bounds, 3868 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3869 */ 3870 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3871 const struct kmp_dim *dims) { 3872 __kmp_assert_valid_gtid(gtid); 3873 int j, idx; 3874 kmp_int64 last, trace_count; 3875 kmp_info_t *th = __kmp_threads[gtid]; 3876 kmp_team_t *team = th->th.th_team; 3877 kmp_uint32 *flags; 3878 kmp_disp_t *pr_buf = th->th.th_dispatch; 3879 dispatch_shared_info_t *sh_buf; 3880 3881 KA_TRACE( 3882 20, 3883 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 3884 gtid, num_dims, !team->t.t_serialized)); 3885 KMP_DEBUG_ASSERT(dims != NULL); 3886 KMP_DEBUG_ASSERT(num_dims > 0); 3887 3888 if (team->t.t_serialized) { 3889 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 3890 return; // no dependencies if team is serialized 3891 } 3892 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 3893 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 3894 // the next loop 3895 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 3896 3897 // Save bounds info into allocated private buffer 3898 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 3899 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 3900 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 3901 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 3902 pr_buf->th_doacross_info[0] = 3903 (kmp_int64)num_dims; // first element is number of dimensions 3904 // Save also address of num_done in order to access it later without knowing 3905 // the buffer index 3906 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 3907 pr_buf->th_doacross_info[2] = dims[0].lo; 3908 pr_buf->th_doacross_info[3] = dims[0].up; 3909 pr_buf->th_doacross_info[4] = dims[0].st; 3910 last = 5; 3911 for (j = 1; j < num_dims; ++j) { 3912 kmp_int64 3913 range_length; // To keep ranges of all dimensions but the first dims[0] 3914 if (dims[j].st == 1) { // most common case 3915 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 3916 range_length = dims[j].up - dims[j].lo + 1; 3917 } else { 3918 if (dims[j].st > 0) { 3919 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 3920 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 3921 } else { // negative increment 3922 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 3923 range_length = 3924 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 3925 } 3926 } 3927 pr_buf->th_doacross_info[last++] = range_length; 3928 pr_buf->th_doacross_info[last++] = dims[j].lo; 3929 pr_buf->th_doacross_info[last++] = dims[j].up; 3930 pr_buf->th_doacross_info[last++] = dims[j].st; 3931 } 3932 3933 // Compute total trip count. 3934 // Start with range of dims[0] which we don't need to keep in the buffer. 3935 if (dims[0].st == 1) { // most common case 3936 trace_count = dims[0].up - dims[0].lo + 1; 3937 } else if (dims[0].st > 0) { 3938 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 3939 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 3940 } else { // negative increment 3941 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 3942 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 3943 } 3944 for (j = 1; j < num_dims; ++j) { 3945 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 3946 } 3947 KMP_DEBUG_ASSERT(trace_count > 0); 3948 3949 // Check if shared buffer is not occupied by other loop (idx - 3950 // __kmp_dispatch_num_buffers) 3951 if (idx != sh_buf->doacross_buf_idx) { 3952 // Shared buffer is occupied, wait for it to be free 3953 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 3954 __kmp_eq_4, NULL); 3955 } 3956 #if KMP_32_BIT_ARCH 3957 // Check if we are the first thread. After the CAS the first thread gets 0, 3958 // others get 1 if initialization is in progress, allocated pointer otherwise. 3959 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 3960 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 3961 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 3962 #else 3963 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 3964 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 3965 #endif 3966 if (flags == NULL) { 3967 // we are the first thread, allocate the array of flags 3968 size_t size = 3969 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration 3970 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 3971 KMP_MB(); 3972 sh_buf->doacross_flags = flags; 3973 } else if (flags == (kmp_uint32 *)1) { 3974 #if KMP_32_BIT_ARCH 3975 // initialization is still in progress, need to wait 3976 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 3977 #else 3978 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 3979 #endif 3980 KMP_YIELD(TRUE); 3981 KMP_MB(); 3982 } else { 3983 KMP_MB(); 3984 } 3985 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 3986 pr_buf->th_doacross_flags = 3987 sh_buf->doacross_flags; // save private copy in order to not 3988 // touch shared buffer on each iteration 3989 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 3990 } 3991 3992 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 3993 __kmp_assert_valid_gtid(gtid); 3994 kmp_int64 shft; 3995 size_t num_dims, i; 3996 kmp_uint32 flag; 3997 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 3998 kmp_info_t *th = __kmp_threads[gtid]; 3999 kmp_team_t *team = th->th.th_team; 4000 kmp_disp_t *pr_buf; 4001 kmp_int64 lo, up, st; 4002 4003 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4004 if (team->t.t_serialized) { 4005 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4006 return; // no dependencies if team is serialized 4007 } 4008 4009 // calculate sequential iteration number and check out-of-bounds condition 4010 pr_buf = th->th.th_dispatch; 4011 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4012 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4013 lo = pr_buf->th_doacross_info[2]; 4014 up = pr_buf->th_doacross_info[3]; 4015 st = pr_buf->th_doacross_info[4]; 4016 #if OMPT_SUPPORT && OMPT_OPTIONAL 4017 ompt_dependence_t deps[num_dims]; 4018 #endif 4019 if (st == 1) { // most common case 4020 if (vec[0] < lo || vec[0] > up) { 4021 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4022 "bounds [%lld,%lld]\n", 4023 gtid, vec[0], lo, up)); 4024 return; 4025 } 4026 iter_number = vec[0] - lo; 4027 } else if (st > 0) { 4028 if (vec[0] < lo || vec[0] > up) { 4029 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4030 "bounds [%lld,%lld]\n", 4031 gtid, vec[0], lo, up)); 4032 return; 4033 } 4034 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4035 } else { // negative increment 4036 if (vec[0] > lo || vec[0] < up) { 4037 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4038 "bounds [%lld,%lld]\n", 4039 gtid, vec[0], lo, up)); 4040 return; 4041 } 4042 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4043 } 4044 #if OMPT_SUPPORT && OMPT_OPTIONAL 4045 deps[0].variable.value = iter_number; 4046 deps[0].dependence_type = ompt_dependence_type_sink; 4047 #endif 4048 for (i = 1; i < num_dims; ++i) { 4049 kmp_int64 iter, ln; 4050 size_t j = i * 4; 4051 ln = pr_buf->th_doacross_info[j + 1]; 4052 lo = pr_buf->th_doacross_info[j + 2]; 4053 up = pr_buf->th_doacross_info[j + 3]; 4054 st = pr_buf->th_doacross_info[j + 4]; 4055 if (st == 1) { 4056 if (vec[i] < lo || vec[i] > up) { 4057 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4058 "bounds [%lld,%lld]\n", 4059 gtid, vec[i], lo, up)); 4060 return; 4061 } 4062 iter = vec[i] - lo; 4063 } else if (st > 0) { 4064 if (vec[i] < lo || vec[i] > up) { 4065 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4066 "bounds [%lld,%lld]\n", 4067 gtid, vec[i], lo, up)); 4068 return; 4069 } 4070 iter = (kmp_uint64)(vec[i] - lo) / st; 4071 } else { // st < 0 4072 if (vec[i] > lo || vec[i] < up) { 4073 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4074 "bounds [%lld,%lld]\n", 4075 gtid, vec[i], lo, up)); 4076 return; 4077 } 4078 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4079 } 4080 iter_number = iter + ln * iter_number; 4081 #if OMPT_SUPPORT && OMPT_OPTIONAL 4082 deps[i].variable.value = iter; 4083 deps[i].dependence_type = ompt_dependence_type_sink; 4084 #endif 4085 } 4086 shft = iter_number % 32; // use 32-bit granularity 4087 iter_number >>= 5; // divided by 32 4088 flag = 1 << shft; 4089 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4090 KMP_YIELD(TRUE); 4091 } 4092 KMP_MB(); 4093 #if OMPT_SUPPORT && OMPT_OPTIONAL 4094 if (ompt_enabled.ompt_callback_dependences) { 4095 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4096 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4097 } 4098 #endif 4099 KA_TRACE(20, 4100 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4101 gtid, (iter_number << 5) + shft)); 4102 } 4103 4104 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4105 __kmp_assert_valid_gtid(gtid); 4106 kmp_int64 shft; 4107 size_t num_dims, i; 4108 kmp_uint32 flag; 4109 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4110 kmp_info_t *th = __kmp_threads[gtid]; 4111 kmp_team_t *team = th->th.th_team; 4112 kmp_disp_t *pr_buf; 4113 kmp_int64 lo, st; 4114 4115 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4116 if (team->t.t_serialized) { 4117 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4118 return; // no dependencies if team is serialized 4119 } 4120 4121 // calculate sequential iteration number (same as in "wait" but no 4122 // out-of-bounds checks) 4123 pr_buf = th->th.th_dispatch; 4124 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4125 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4126 lo = pr_buf->th_doacross_info[2]; 4127 st = pr_buf->th_doacross_info[4]; 4128 #if OMPT_SUPPORT && OMPT_OPTIONAL 4129 ompt_dependence_t deps[num_dims]; 4130 #endif 4131 if (st == 1) { // most common case 4132 iter_number = vec[0] - lo; 4133 } else if (st > 0) { 4134 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4135 } else { // negative increment 4136 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4137 } 4138 #if OMPT_SUPPORT && OMPT_OPTIONAL 4139 deps[0].variable.value = iter_number; 4140 deps[0].dependence_type = ompt_dependence_type_source; 4141 #endif 4142 for (i = 1; i < num_dims; ++i) { 4143 kmp_int64 iter, ln; 4144 size_t j = i * 4; 4145 ln = pr_buf->th_doacross_info[j + 1]; 4146 lo = pr_buf->th_doacross_info[j + 2]; 4147 st = pr_buf->th_doacross_info[j + 4]; 4148 if (st == 1) { 4149 iter = vec[i] - lo; 4150 } else if (st > 0) { 4151 iter = (kmp_uint64)(vec[i] - lo) / st; 4152 } else { // st < 0 4153 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4154 } 4155 iter_number = iter + ln * iter_number; 4156 #if OMPT_SUPPORT && OMPT_OPTIONAL 4157 deps[i].variable.value = iter; 4158 deps[i].dependence_type = ompt_dependence_type_source; 4159 #endif 4160 } 4161 #if OMPT_SUPPORT && OMPT_OPTIONAL 4162 if (ompt_enabled.ompt_callback_dependences) { 4163 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4164 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4165 } 4166 #endif 4167 shft = iter_number % 32; // use 32-bit granularity 4168 iter_number >>= 5; // divided by 32 4169 flag = 1 << shft; 4170 KMP_MB(); 4171 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4172 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4173 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4174 (iter_number << 5) + shft)); 4175 } 4176 4177 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4178 __kmp_assert_valid_gtid(gtid); 4179 kmp_int32 num_done; 4180 kmp_info_t *th = __kmp_threads[gtid]; 4181 kmp_team_t *team = th->th.th_team; 4182 kmp_disp_t *pr_buf = th->th.th_dispatch; 4183 4184 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4185 if (team->t.t_serialized) { 4186 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4187 return; // nothing to do 4188 } 4189 num_done = 4190 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1; 4191 if (num_done == th->th.th_team_nproc) { 4192 // we are the last thread, need to free shared resources 4193 int idx = pr_buf->th_doacross_buf_idx - 1; 4194 dispatch_shared_info_t *sh_buf = 4195 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4196 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4197 (kmp_int64)&sh_buf->doacross_num_done); 4198 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4199 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4200 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4201 sh_buf->doacross_flags = NULL; 4202 sh_buf->doacross_num_done = 0; 4203 sh_buf->doacross_buf_idx += 4204 __kmp_dispatch_num_buffers; // free buffer for future re-use 4205 } 4206 // free private resources (need to keep buffer index forever) 4207 pr_buf->th_doacross_flags = NULL; 4208 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4209 pr_buf->th_doacross_info = NULL; 4210 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4211 } 4212 4213 /* omp_alloc/omp_calloc/omp_free only defined for C/C++, not for Fortran */ 4214 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4215 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); 4216 } 4217 4218 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { 4219 return __kmpc_calloc(__kmp_entry_gtid(), nmemb, size, allocator); 4220 } 4221 4222 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, 4223 omp_allocator_handle_t free_allocator) { 4224 return __kmpc_realloc(__kmp_entry_gtid(), ptr, size, allocator, 4225 free_allocator); 4226 } 4227 4228 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4229 __kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4230 } 4231 4232 int __kmpc_get_target_offload(void) { 4233 if (!__kmp_init_serial) { 4234 __kmp_serial_initialize(); 4235 } 4236 return __kmp_target_offload; 4237 } 4238 4239 int __kmpc_pause_resource(kmp_pause_status_t level) { 4240 if (!__kmp_init_serial) { 4241 return 1; // Can't pause if runtime is not initialized 4242 } 4243 return __kmp_pause_resource(level); 4244 } 4245