1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 22 #if OMPT_SUPPORT 23 #include "ompt-specific.h" 24 #endif 25 26 #define MAX_MESSAGE 512 27 28 // flags will be used in future, e.g. to implement openmp_strict library 29 // restrictions 30 31 /*! 32 * @ingroup STARTUP_SHUTDOWN 33 * @param loc in source location information 34 * @param flags in for future use (currently ignored) 35 * 36 * Initialize the runtime library. This call is optional; if it is not made then 37 * it will be implicitly called by attempts to use other library functions. 38 */ 39 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 40 // By default __kmpc_begin() is no-op. 41 char *env; 42 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 43 __kmp_str_match_true(env)) { 44 __kmp_middle_initialize(); 45 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 46 } else if (__kmp_ignore_mppbeg() == FALSE) { 47 // By default __kmp_ignore_mppbeg() returns TRUE. 48 __kmp_internal_begin(); 49 KC_TRACE(10, ("__kmpc_begin: called\n")); 50 } 51 } 52 53 /*! 54 * @ingroup STARTUP_SHUTDOWN 55 * @param loc source location information 56 * 57 * Shutdown the runtime library. This is also optional, and even if called will 58 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 59 * zero. 60 */ 61 void __kmpc_end(ident_t *loc) { 62 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 63 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 64 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 65 // returns FALSE and __kmpc_end() will unregister this root (it can cause 66 // library shut down). 67 if (__kmp_ignore_mppend() == FALSE) { 68 KC_TRACE(10, ("__kmpc_end: called\n")); 69 KA_TRACE(30, ("__kmpc_end\n")); 70 71 __kmp_internal_end_thread(-1); 72 } 73 #if KMP_OS_WINDOWS && OMPT_SUPPORT 74 // Normal exit process on Windows does not allow worker threads of the final 75 // parallel region to finish reporting their events, so shutting down the 76 // library here fixes the issue at least for the cases where __kmpc_end() is 77 // placed properly. 78 if (ompt_enabled.enabled) 79 __kmp_internal_end_library(__kmp_gtid_get_specific()); 80 #endif 81 } 82 83 /*! 84 @ingroup THREAD_STATES 85 @param loc Source location information. 86 @return The global thread index of the active thread. 87 88 This function can be called in any context. 89 90 If the runtime has ony been entered at the outermost level from a 91 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 92 that which would be returned by omp_get_thread_num() in the outermost 93 active parallel construct. (Or zero if there is no active parallel 94 construct, since the master thread is necessarily thread zero). 95 96 If multiple non-OpenMP threads all enter an OpenMP construct then this 97 will be a unique thread identifier among all the threads created by 98 the OpenMP runtime (but the value cannote be defined in terms of 99 OpenMP thread ids returned by omp_get_thread_num()). 100 */ 101 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 102 kmp_int32 gtid = __kmp_entry_gtid(); 103 104 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 105 106 return gtid; 107 } 108 109 /*! 110 @ingroup THREAD_STATES 111 @param loc Source location information. 112 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 113 114 This function can be called in any context. 115 It returns the total number of threads under the control of the OpenMP runtime. 116 That is not a number that can be determined by any OpenMP standard calls, since 117 the library may be called from more than one non-OpenMP thread, and this 118 reflects the total over all such calls. Similarly the runtime maintains 119 underlying threads even when they are not active (since the cost of creating 120 and destroying OS threads is high), this call counts all such threads even if 121 they are not waiting for work. 122 */ 123 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 124 KC_TRACE(10, 125 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 126 127 return TCR_4(__kmp_all_nth); 128 } 129 130 /*! 131 @ingroup THREAD_STATES 132 @param loc Source location information. 133 @return The thread number of the calling thread in the innermost active parallel 134 construct. 135 */ 136 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 137 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 138 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 139 } 140 141 /*! 142 @ingroup THREAD_STATES 143 @param loc Source location information. 144 @return The number of threads in the innermost active parallel construct. 145 */ 146 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 147 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 148 149 return __kmp_entry_thread()->th.th_team->t.t_nproc; 150 } 151 152 /*! 153 * @ingroup DEPRECATED 154 * @param loc location description 155 * 156 * This function need not be called. It always returns TRUE. 157 */ 158 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 159 #ifndef KMP_DEBUG 160 161 return TRUE; 162 163 #else 164 165 const char *semi2; 166 const char *semi3; 167 int line_no; 168 169 if (__kmp_par_range == 0) { 170 return TRUE; 171 } 172 semi2 = loc->psource; 173 if (semi2 == NULL) { 174 return TRUE; 175 } 176 semi2 = strchr(semi2, ';'); 177 if (semi2 == NULL) { 178 return TRUE; 179 } 180 semi2 = strchr(semi2 + 1, ';'); 181 if (semi2 == NULL) { 182 return TRUE; 183 } 184 if (__kmp_par_range_filename[0]) { 185 const char *name = semi2 - 1; 186 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 187 name--; 188 } 189 if ((*name == '/') || (*name == ';')) { 190 name++; 191 } 192 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 193 return __kmp_par_range < 0; 194 } 195 } 196 semi3 = strchr(semi2 + 1, ';'); 197 if (__kmp_par_range_routine[0]) { 198 if ((semi3 != NULL) && (semi3 > semi2) && 199 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 200 return __kmp_par_range < 0; 201 } 202 } 203 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 204 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 205 return __kmp_par_range > 0; 206 } 207 return __kmp_par_range < 0; 208 } 209 return TRUE; 210 211 #endif /* KMP_DEBUG */ 212 } 213 214 /*! 215 @ingroup THREAD_STATES 216 @param loc Source location information. 217 @return 1 if this thread is executing inside an active parallel region, zero if 218 not. 219 */ 220 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 221 return __kmp_entry_thread()->th.th_root->r.r_active; 222 } 223 224 /*! 225 @ingroup PARALLEL 226 @param loc source location information 227 @param global_tid global thread number 228 @param num_threads number of threads requested for this parallel construct 229 230 Set the number of threads to be used by the next fork spawned by this thread. 231 This call is only required if the parallel construct has a `num_threads` clause. 232 */ 233 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 234 kmp_int32 num_threads) { 235 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 236 global_tid, num_threads)); 237 238 __kmp_push_num_threads(loc, global_tid, num_threads); 239 } 240 241 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 242 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 243 244 /* the num_threads are automatically popped */ 245 } 246 247 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 248 kmp_int32 proc_bind) { 249 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 250 proc_bind)); 251 252 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 253 } 254 255 /*! 256 @ingroup PARALLEL 257 @param loc source location information 258 @param argc total number of arguments in the ellipsis 259 @param microtask pointer to callback routine consisting of outlined parallel 260 construct 261 @param ... pointers to shared variables that aren't global 262 263 Do the actual fork and call the microtask in the relevant number of threads. 264 */ 265 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 266 int gtid = __kmp_entry_gtid(); 267 268 #if (KMP_STATS_ENABLED) 269 // If we were in a serial region, then stop the serial timer, record 270 // the event, and start parallel region timer 271 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 272 if (previous_state == stats_state_e::SERIAL_REGION) { 273 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 274 } else { 275 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 276 } 277 int inParallel = __kmpc_in_parallel(loc); 278 if (inParallel) { 279 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 280 } else { 281 KMP_COUNT_BLOCK(OMP_PARALLEL); 282 } 283 #endif 284 285 // maybe to save thr_state is enough here 286 { 287 va_list ap; 288 va_start(ap, microtask); 289 290 #if OMPT_SUPPORT 291 ompt_frame_t *ompt_frame; 292 if (ompt_enabled.enabled) { 293 kmp_info_t *master_th = __kmp_threads[gtid]; 294 kmp_team_t *parent_team = master_th->th.th_team; 295 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 296 if (lwt) 297 ompt_frame = &(lwt->ompt_task_info.frame); 298 else { 299 int tid = __kmp_tid_from_gtid(gtid); 300 ompt_frame = &( 301 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 302 } 303 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 304 OMPT_STORE_RETURN_ADDRESS(gtid); 305 } 306 #endif 307 308 #if INCLUDE_SSC_MARKS 309 SSC_MARK_FORKING(); 310 #endif 311 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 312 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 313 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 314 /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 315 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 316 &ap 317 #else 318 ap 319 #endif 320 ); 321 #if INCLUDE_SSC_MARKS 322 SSC_MARK_JOINING(); 323 #endif 324 __kmp_join_call(loc, gtid 325 #if OMPT_SUPPORT 326 , 327 fork_context_intel 328 #endif 329 ); 330 331 va_end(ap); 332 } 333 334 #if KMP_STATS_ENABLED 335 if (previous_state == stats_state_e::SERIAL_REGION) { 336 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 337 } else { 338 KMP_POP_PARTITIONED_TIMER(); 339 } 340 #endif // KMP_STATS_ENABLED 341 } 342 343 /*! 344 @ingroup PARALLEL 345 @param loc source location information 346 @param global_tid global thread number 347 @param num_teams number of teams requested for the teams construct 348 @param num_threads number of threads per team requested for the teams construct 349 350 Set the number of teams to be used by the teams construct. 351 This call is only required if the teams construct has a `num_teams` clause 352 or a `thread_limit` clause (or both). 353 */ 354 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 355 kmp_int32 num_teams, kmp_int32 num_threads) { 356 KA_TRACE(20, 357 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 358 global_tid, num_teams, num_threads)); 359 360 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 361 } 362 363 /*! 364 @ingroup PARALLEL 365 @param loc source location information 366 @param argc total number of arguments in the ellipsis 367 @param microtask pointer to callback routine consisting of outlined teams 368 construct 369 @param ... pointers to shared variables that aren't global 370 371 Do the actual fork and call the microtask in the relevant number of threads. 372 */ 373 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 374 ...) { 375 int gtid = __kmp_entry_gtid(); 376 kmp_info_t *this_thr = __kmp_threads[gtid]; 377 va_list ap; 378 va_start(ap, microtask); 379 380 #if KMP_STATS_ENABLED 381 KMP_COUNT_BLOCK(OMP_TEAMS); 382 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 383 if (previous_state == stats_state_e::SERIAL_REGION) { 384 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 385 } else { 386 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 387 } 388 #endif 389 390 // remember teams entry point and nesting level 391 this_thr->th.th_teams_microtask = microtask; 392 this_thr->th.th_teams_level = 393 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 394 395 #if OMPT_SUPPORT 396 kmp_team_t *parent_team = this_thr->th.th_team; 397 int tid = __kmp_tid_from_gtid(gtid); 398 if (ompt_enabled.enabled) { 399 parent_team->t.t_implicit_task_taskdata[tid] 400 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 401 } 402 OMPT_STORE_RETURN_ADDRESS(gtid); 403 #endif 404 405 // check if __kmpc_push_num_teams called, set default number of teams 406 // otherwise 407 if (this_thr->th.th_teams_size.nteams == 0) { 408 __kmp_push_num_teams(loc, gtid, 0, 0); 409 } 410 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 411 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 412 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 413 414 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 415 VOLATILE_CAST(microtask_t) 416 __kmp_teams_master, // "wrapped" task 417 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, 418 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 419 &ap 420 #else 421 ap 422 #endif 423 ); 424 __kmp_join_call(loc, gtid 425 #if OMPT_SUPPORT 426 , 427 fork_context_intel 428 #endif 429 ); 430 431 // Pop current CG root off list 432 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 433 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 434 this_thr->th.th_cg_roots = tmp->up; 435 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 436 " to node %p. cg_nthreads was %d\n", 437 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 438 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 439 int i = tmp->cg_nthreads--; 440 if (i == 1) { // check is we are the last thread in CG (not always the case) 441 __kmp_free(tmp); 442 } 443 // Restore current task's thread_limit from CG root 444 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 445 this_thr->th.th_current_task->td_icvs.thread_limit = 446 this_thr->th.th_cg_roots->cg_thread_limit; 447 448 this_thr->th.th_teams_microtask = NULL; 449 this_thr->th.th_teams_level = 0; 450 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 451 va_end(ap); 452 #if KMP_STATS_ENABLED 453 if (previous_state == stats_state_e::SERIAL_REGION) { 454 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 455 } else { 456 KMP_POP_PARTITIONED_TIMER(); 457 } 458 #endif // KMP_STATS_ENABLED 459 } 460 461 // I don't think this function should ever have been exported. 462 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 463 // openmp code ever called it, but it's been exported from the RTL for so 464 // long that I'm afraid to remove the definition. 465 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 466 467 /*! 468 @ingroup PARALLEL 469 @param loc source location information 470 @param global_tid global thread number 471 472 Enter a serialized parallel construct. This interface is used to handle a 473 conditional parallel region, like this, 474 @code 475 #pragma omp parallel if (condition) 476 @endcode 477 when the condition is false. 478 */ 479 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 480 // The implementation is now in kmp_runtime.cpp so that it can share static 481 // functions with kmp_fork_call since the tasks to be done are similar in 482 // each case. 483 #if OMPT_SUPPORT 484 OMPT_STORE_RETURN_ADDRESS(global_tid); 485 #endif 486 __kmp_serialized_parallel(loc, global_tid); 487 } 488 489 /*! 490 @ingroup PARALLEL 491 @param loc source location information 492 @param global_tid global thread number 493 494 Leave a serialized parallel construct. 495 */ 496 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 497 kmp_internal_control_t *top; 498 kmp_info_t *this_thr; 499 kmp_team_t *serial_team; 500 501 KC_TRACE(10, 502 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 503 504 /* skip all this code for autopar serialized loops since it results in 505 unacceptable overhead */ 506 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 507 return; 508 509 // Not autopar code 510 if (!TCR_4(__kmp_init_parallel)) 511 __kmp_parallel_initialize(); 512 513 __kmp_resume_if_soft_paused(); 514 515 this_thr = __kmp_threads[global_tid]; 516 serial_team = this_thr->th.th_serial_team; 517 518 kmp_task_team_t *task_team = this_thr->th.th_task_team; 519 // we need to wait for the proxy tasks before finishing the thread 520 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 521 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 522 523 KMP_MB(); 524 KMP_DEBUG_ASSERT(serial_team); 525 KMP_ASSERT(serial_team->t.t_serialized); 526 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 527 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 528 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 529 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 530 531 #if OMPT_SUPPORT 532 if (ompt_enabled.enabled && 533 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 534 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 535 if (ompt_enabled.ompt_callback_implicit_task) { 536 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 537 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 538 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 539 } 540 541 // reset clear the task id only after unlinking the task 542 ompt_data_t *parent_task_data; 543 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 544 545 if (ompt_enabled.ompt_callback_parallel_end) { 546 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 547 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 548 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid)); 549 } 550 __ompt_lw_taskteam_unlink(this_thr); 551 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 552 } 553 #endif 554 555 /* If necessary, pop the internal control stack values and replace the team 556 * values */ 557 top = serial_team->t.t_control_stack_top; 558 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 559 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 560 serial_team->t.t_control_stack_top = top->next; 561 __kmp_free(top); 562 } 563 564 // if( serial_team -> t.t_serialized > 1 ) 565 serial_team->t.t_level--; 566 567 /* pop dispatch buffers stack */ 568 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 569 { 570 dispatch_private_info_t *disp_buffer = 571 serial_team->t.t_dispatch->th_disp_buffer; 572 serial_team->t.t_dispatch->th_disp_buffer = 573 serial_team->t.t_dispatch->th_disp_buffer->next; 574 __kmp_free(disp_buffer); 575 } 576 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 577 578 --serial_team->t.t_serialized; 579 if (serial_team->t.t_serialized == 0) { 580 581 /* return to the parallel section */ 582 583 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 584 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 585 __kmp_clear_x87_fpu_status_word(); 586 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 587 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 588 } 589 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 590 591 this_thr->th.th_team = serial_team->t.t_parent; 592 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 593 594 /* restore values cached in the thread */ 595 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 596 this_thr->th.th_team_master = 597 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 598 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 599 600 /* TODO the below shouldn't need to be adjusted for serialized teams */ 601 this_thr->th.th_dispatch = 602 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 603 604 __kmp_pop_current_task_from_thread(this_thr); 605 606 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 607 this_thr->th.th_current_task->td_flags.executing = 1; 608 609 if (__kmp_tasking_mode != tskm_immediate_exec) { 610 // Copy the task team from the new child / old parent team to the thread. 611 this_thr->th.th_task_team = 612 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 613 KA_TRACE(20, 614 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 615 "team %p\n", 616 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 617 } 618 } else { 619 if (__kmp_tasking_mode != tskm_immediate_exec) { 620 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 621 "depth of serial team %p to %d\n", 622 global_tid, serial_team, serial_team->t.t_serialized)); 623 } 624 } 625 626 if (__kmp_env_consistency_check) 627 __kmp_pop_parallel(global_tid, NULL); 628 #if OMPT_SUPPORT 629 if (ompt_enabled.enabled) 630 this_thr->th.ompt_thread_info.state = 631 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 632 : ompt_state_work_parallel); 633 #endif 634 } 635 636 /*! 637 @ingroup SYNCHRONIZATION 638 @param loc source location information. 639 640 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 641 depending on the memory ordering convention obeyed by the compiler 642 even that may not be necessary). 643 */ 644 void __kmpc_flush(ident_t *loc) { 645 KC_TRACE(10, ("__kmpc_flush: called\n")); 646 647 /* need explicit __mf() here since use volatile instead in library */ 648 KMP_MB(); /* Flush all pending memory write invalidates. */ 649 650 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 651 #if KMP_MIC 652 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 653 // We shouldn't need it, though, since the ABI rules require that 654 // * If the compiler generates NGO stores it also generates the fence 655 // * If users hand-code NGO stores they should insert the fence 656 // therefore no incomplete unordered stores should be visible. 657 #else 658 // C74404 659 // This is to address non-temporal store instructions (sfence needed). 660 // The clflush instruction is addressed either (mfence needed). 661 // Probably the non-temporal load monvtdqa instruction should also be 662 // addressed. 663 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 664 if (!__kmp_cpuinfo.initialized) { 665 __kmp_query_cpuid(&__kmp_cpuinfo); 666 } 667 if (!__kmp_cpuinfo.sse2) { 668 // CPU cannot execute SSE2 instructions. 669 } else { 670 #if KMP_COMPILER_ICC 671 _mm_mfence(); 672 #elif KMP_COMPILER_MSVC 673 MemoryBarrier(); 674 #else 675 __sync_synchronize(); 676 #endif // KMP_COMPILER_ICC 677 } 678 #endif // KMP_MIC 679 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64) 680 // Nothing to see here move along 681 #elif KMP_ARCH_PPC64 682 // Nothing needed here (we have a real MB above). 683 #if KMP_OS_CNK 684 // The flushing thread needs to yield here; this prevents a 685 // busy-waiting thread from saturating the pipeline. flush is 686 // often used in loops like this: 687 // while (!flag) { 688 // #pragma omp flush(flag) 689 // } 690 // and adding the yield here is good for at least a 10x speedup 691 // when running >2 threads per core (on the NAS LU benchmark). 692 __kmp_yield(); 693 #endif 694 #else 695 #error Unknown or unsupported architecture 696 #endif 697 698 #if OMPT_SUPPORT && OMPT_OPTIONAL 699 if (ompt_enabled.ompt_callback_flush) { 700 ompt_callbacks.ompt_callback(ompt_callback_flush)( 701 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 702 } 703 #endif 704 } 705 706 /* -------------------------------------------------------------------------- */ 707 /*! 708 @ingroup SYNCHRONIZATION 709 @param loc source location information 710 @param global_tid thread id. 711 712 Execute a barrier. 713 */ 714 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 715 KMP_COUNT_BLOCK(OMP_BARRIER); 716 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 717 718 if (!TCR_4(__kmp_init_parallel)) 719 __kmp_parallel_initialize(); 720 721 __kmp_resume_if_soft_paused(); 722 723 if (__kmp_env_consistency_check) { 724 if (loc == 0) { 725 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 726 } 727 __kmp_check_barrier(global_tid, ct_barrier, loc); 728 } 729 730 #if OMPT_SUPPORT 731 ompt_frame_t *ompt_frame; 732 if (ompt_enabled.enabled) { 733 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 734 if (ompt_frame->enter_frame.ptr == NULL) 735 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 736 OMPT_STORE_RETURN_ADDRESS(global_tid); 737 } 738 #endif 739 __kmp_threads[global_tid]->th.th_ident = loc; 740 // TODO: explicit barrier_wait_id: 741 // this function is called when 'barrier' directive is present or 742 // implicit barrier at the end of a worksharing construct. 743 // 1) better to add a per-thread barrier counter to a thread data structure 744 // 2) set to 0 when a new team is created 745 // 4) no sync is required 746 747 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 748 #if OMPT_SUPPORT && OMPT_OPTIONAL 749 if (ompt_enabled.enabled) { 750 ompt_frame->enter_frame = ompt_data_none; 751 } 752 #endif 753 } 754 755 /* The BARRIER for a MASTER section is always explicit */ 756 /*! 757 @ingroup WORK_SHARING 758 @param loc source location information. 759 @param global_tid global thread number . 760 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 761 */ 762 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 763 int status = 0; 764 765 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 766 767 if (!TCR_4(__kmp_init_parallel)) 768 __kmp_parallel_initialize(); 769 770 __kmp_resume_if_soft_paused(); 771 772 if (KMP_MASTER_GTID(global_tid)) { 773 KMP_COUNT_BLOCK(OMP_MASTER); 774 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 775 status = 1; 776 } 777 778 #if OMPT_SUPPORT && OMPT_OPTIONAL 779 if (status) { 780 if (ompt_enabled.ompt_callback_master) { 781 kmp_info_t *this_thr = __kmp_threads[global_tid]; 782 kmp_team_t *team = this_thr->th.th_team; 783 784 int tid = __kmp_tid_from_gtid(global_tid); 785 ompt_callbacks.ompt_callback(ompt_callback_master)( 786 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 787 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 788 OMPT_GET_RETURN_ADDRESS(0)); 789 } 790 } 791 #endif 792 793 if (__kmp_env_consistency_check) { 794 #if KMP_USE_DYNAMIC_LOCK 795 if (status) 796 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 797 else 798 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 799 #else 800 if (status) 801 __kmp_push_sync(global_tid, ct_master, loc, NULL); 802 else 803 __kmp_check_sync(global_tid, ct_master, loc, NULL); 804 #endif 805 } 806 807 return status; 808 } 809 810 /*! 811 @ingroup WORK_SHARING 812 @param loc source location information. 813 @param global_tid global thread number . 814 815 Mark the end of a <tt>master</tt> region. This should only be called by the 816 thread that executes the <tt>master</tt> region. 817 */ 818 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 819 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 820 821 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 822 KMP_POP_PARTITIONED_TIMER(); 823 824 #if OMPT_SUPPORT && OMPT_OPTIONAL 825 kmp_info_t *this_thr = __kmp_threads[global_tid]; 826 kmp_team_t *team = this_thr->th.th_team; 827 if (ompt_enabled.ompt_callback_master) { 828 int tid = __kmp_tid_from_gtid(global_tid); 829 ompt_callbacks.ompt_callback(ompt_callback_master)( 830 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 831 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 832 OMPT_GET_RETURN_ADDRESS(0)); 833 } 834 #endif 835 836 if (__kmp_env_consistency_check) { 837 if (global_tid < 0) 838 KMP_WARNING(ThreadIdentInvalid); 839 840 if (KMP_MASTER_GTID(global_tid)) 841 __kmp_pop_sync(global_tid, ct_master, loc); 842 } 843 } 844 845 /*! 846 @ingroup WORK_SHARING 847 @param loc source location information. 848 @param gtid global thread number. 849 850 Start execution of an <tt>ordered</tt> construct. 851 */ 852 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 853 int cid = 0; 854 kmp_info_t *th; 855 KMP_DEBUG_ASSERT(__kmp_init_serial); 856 857 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 858 859 if (!TCR_4(__kmp_init_parallel)) 860 __kmp_parallel_initialize(); 861 862 __kmp_resume_if_soft_paused(); 863 864 #if USE_ITT_BUILD 865 __kmp_itt_ordered_prep(gtid); 866 // TODO: ordered_wait_id 867 #endif /* USE_ITT_BUILD */ 868 869 th = __kmp_threads[gtid]; 870 871 #if OMPT_SUPPORT && OMPT_OPTIONAL 872 kmp_team_t *team; 873 ompt_wait_id_t lck; 874 void *codeptr_ra; 875 if (ompt_enabled.enabled) { 876 OMPT_STORE_RETURN_ADDRESS(gtid); 877 team = __kmp_team_from_gtid(gtid); 878 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 879 /* OMPT state update */ 880 th->th.ompt_thread_info.wait_id = lck; 881 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 882 883 /* OMPT event callback */ 884 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 885 if (ompt_enabled.ompt_callback_mutex_acquire) { 886 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 887 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 888 codeptr_ra); 889 } 890 } 891 #endif 892 893 if (th->th.th_dispatch->th_deo_fcn != 0) 894 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 895 else 896 __kmp_parallel_deo(>id, &cid, loc); 897 898 #if OMPT_SUPPORT && OMPT_OPTIONAL 899 if (ompt_enabled.enabled) { 900 /* OMPT state update */ 901 th->th.ompt_thread_info.state = ompt_state_work_parallel; 902 th->th.ompt_thread_info.wait_id = 0; 903 904 /* OMPT event callback */ 905 if (ompt_enabled.ompt_callback_mutex_acquired) { 906 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 907 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 908 } 909 } 910 #endif 911 912 #if USE_ITT_BUILD 913 __kmp_itt_ordered_start(gtid); 914 #endif /* USE_ITT_BUILD */ 915 } 916 917 /*! 918 @ingroup WORK_SHARING 919 @param loc source location information. 920 @param gtid global thread number. 921 922 End execution of an <tt>ordered</tt> construct. 923 */ 924 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 925 int cid = 0; 926 kmp_info_t *th; 927 928 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 929 930 #if USE_ITT_BUILD 931 __kmp_itt_ordered_end(gtid); 932 // TODO: ordered_wait_id 933 #endif /* USE_ITT_BUILD */ 934 935 th = __kmp_threads[gtid]; 936 937 if (th->th.th_dispatch->th_dxo_fcn != 0) 938 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 939 else 940 __kmp_parallel_dxo(>id, &cid, loc); 941 942 #if OMPT_SUPPORT && OMPT_OPTIONAL 943 OMPT_STORE_RETURN_ADDRESS(gtid); 944 if (ompt_enabled.ompt_callback_mutex_released) { 945 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 946 ompt_mutex_ordered, 947 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 948 ->t.t_ordered.dt.t_value, 949 OMPT_LOAD_RETURN_ADDRESS(gtid)); 950 } 951 #endif 952 } 953 954 #if KMP_USE_DYNAMIC_LOCK 955 956 static __forceinline void 957 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 958 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 959 // Pointer to the allocated indirect lock is written to crit, while indexing 960 // is ignored. 961 void *idx; 962 kmp_indirect_lock_t **lck; 963 lck = (kmp_indirect_lock_t **)crit; 964 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 965 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 966 KMP_SET_I_LOCK_LOCATION(ilk, loc); 967 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 968 KA_TRACE(20, 969 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 970 #if USE_ITT_BUILD 971 __kmp_itt_critical_creating(ilk->lock, loc); 972 #endif 973 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 974 if (status == 0) { 975 #if USE_ITT_BUILD 976 __kmp_itt_critical_destroyed(ilk->lock); 977 #endif 978 // We don't really need to destroy the unclaimed lock here since it will be 979 // cleaned up at program exit. 980 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 981 } 982 KMP_DEBUG_ASSERT(*lck != NULL); 983 } 984 985 // Fast-path acquire tas lock 986 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 987 { \ 988 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 989 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 990 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 991 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 992 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 993 kmp_uint32 spins; \ 994 KMP_FSYNC_PREPARE(l); \ 995 KMP_INIT_YIELD(spins); \ 996 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 997 do { \ 998 if (TCR_4(__kmp_nth) > \ 999 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1000 KMP_YIELD(TRUE); \ 1001 } else { \ 1002 KMP_YIELD_SPIN(spins); \ 1003 } \ 1004 __kmp_spin_backoff(&backoff); \ 1005 } while ( \ 1006 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1007 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1008 } \ 1009 KMP_FSYNC_ACQUIRED(l); \ 1010 } 1011 1012 // Fast-path test tas lock 1013 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1014 { \ 1015 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1016 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1017 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1018 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1019 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1020 } 1021 1022 // Fast-path release tas lock 1023 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1024 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1025 1026 #if KMP_USE_FUTEX 1027 1028 #include <sys/syscall.h> 1029 #include <unistd.h> 1030 #ifndef FUTEX_WAIT 1031 #define FUTEX_WAIT 0 1032 #endif 1033 #ifndef FUTEX_WAKE 1034 #define FUTEX_WAKE 1 1035 #endif 1036 1037 // Fast-path acquire futex lock 1038 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1039 { \ 1040 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1041 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1042 KMP_MB(); \ 1043 KMP_FSYNC_PREPARE(ftx); \ 1044 kmp_int32 poll_val; \ 1045 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1046 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1047 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1048 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1049 if (!cond) { \ 1050 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1051 poll_val | \ 1052 KMP_LOCK_BUSY(1, futex))) { \ 1053 continue; \ 1054 } \ 1055 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1056 } \ 1057 kmp_int32 rc; \ 1058 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1059 NULL, NULL, 0)) != 0) { \ 1060 continue; \ 1061 } \ 1062 gtid_code |= 1; \ 1063 } \ 1064 KMP_FSYNC_ACQUIRED(ftx); \ 1065 } 1066 1067 // Fast-path test futex lock 1068 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1069 { \ 1070 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1071 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1072 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1073 KMP_FSYNC_ACQUIRED(ftx); \ 1074 rc = TRUE; \ 1075 } else { \ 1076 rc = FALSE; \ 1077 } \ 1078 } 1079 1080 // Fast-path release futex lock 1081 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1082 { \ 1083 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1084 KMP_MB(); \ 1085 KMP_FSYNC_RELEASING(ftx); \ 1086 kmp_int32 poll_val = \ 1087 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1088 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1089 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1090 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1091 } \ 1092 KMP_MB(); \ 1093 KMP_YIELD_OVERSUB(); \ 1094 } 1095 1096 #endif // KMP_USE_FUTEX 1097 1098 #else // KMP_USE_DYNAMIC_LOCK 1099 1100 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1101 ident_t const *loc, 1102 kmp_int32 gtid) { 1103 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1104 1105 // Because of the double-check, the following load doesn't need to be volatile 1106 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1107 1108 if (lck == NULL) { 1109 void *idx; 1110 1111 // Allocate & initialize the lock. 1112 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1113 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1114 __kmp_init_user_lock_with_checks(lck); 1115 __kmp_set_user_lock_location(lck, loc); 1116 #if USE_ITT_BUILD 1117 __kmp_itt_critical_creating(lck); 1118 // __kmp_itt_critical_creating() should be called *before* the first usage 1119 // of underlying lock. It is the only place where we can guarantee it. There 1120 // are chances the lock will destroyed with no usage, but it is not a 1121 // problem, because this is not real event seen by user but rather setting 1122 // name for object (lock). See more details in kmp_itt.h. 1123 #endif /* USE_ITT_BUILD */ 1124 1125 // Use a cmpxchg instruction to slam the start of the critical section with 1126 // the lock pointer. If another thread beat us to it, deallocate the lock, 1127 // and use the lock that the other thread allocated. 1128 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1129 1130 if (status == 0) { 1131 // Deallocate the lock and reload the value. 1132 #if USE_ITT_BUILD 1133 __kmp_itt_critical_destroyed(lck); 1134 // Let ITT know the lock is destroyed and the same memory location may be reused 1135 // for another purpose. 1136 #endif /* USE_ITT_BUILD */ 1137 __kmp_destroy_user_lock_with_checks(lck); 1138 __kmp_user_lock_free(&idx, gtid, lck); 1139 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1140 KMP_DEBUG_ASSERT(lck != NULL); 1141 } 1142 } 1143 return lck; 1144 } 1145 1146 #endif // KMP_USE_DYNAMIC_LOCK 1147 1148 /*! 1149 @ingroup WORK_SHARING 1150 @param loc source location information. 1151 @param global_tid global thread number . 1152 @param crit identity of the critical section. This could be a pointer to a lock 1153 associated with the critical section, or some other suitably unique value. 1154 1155 Enter code protected by a `critical` construct. 1156 This function blocks until the executing thread can enter the critical section. 1157 */ 1158 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1159 kmp_critical_name *crit) { 1160 #if KMP_USE_DYNAMIC_LOCK 1161 #if OMPT_SUPPORT && OMPT_OPTIONAL 1162 OMPT_STORE_RETURN_ADDRESS(global_tid); 1163 #endif // OMPT_SUPPORT 1164 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1165 #else 1166 KMP_COUNT_BLOCK(OMP_CRITICAL); 1167 #if OMPT_SUPPORT && OMPT_OPTIONAL 1168 ompt_state_t prev_state = ompt_state_undefined; 1169 ompt_thread_info_t ti; 1170 #endif 1171 kmp_user_lock_p lck; 1172 1173 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1174 1175 // TODO: add THR_OVHD_STATE 1176 1177 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1178 KMP_CHECK_USER_LOCK_INIT(); 1179 1180 if ((__kmp_user_lock_kind == lk_tas) && 1181 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1182 lck = (kmp_user_lock_p)crit; 1183 } 1184 #if KMP_USE_FUTEX 1185 else if ((__kmp_user_lock_kind == lk_futex) && 1186 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1187 lck = (kmp_user_lock_p)crit; 1188 } 1189 #endif 1190 else { // ticket, queuing or drdpa 1191 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1192 } 1193 1194 if (__kmp_env_consistency_check) 1195 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1196 1197 // since the critical directive binds to all threads, not just the current 1198 // team we have to check this even if we are in a serialized team. 1199 // also, even if we are the uber thread, we still have to conduct the lock, 1200 // as we have to contend with sibling threads. 1201 1202 #if USE_ITT_BUILD 1203 __kmp_itt_critical_acquiring(lck); 1204 #endif /* USE_ITT_BUILD */ 1205 #if OMPT_SUPPORT && OMPT_OPTIONAL 1206 OMPT_STORE_RETURN_ADDRESS(gtid); 1207 void *codeptr_ra = NULL; 1208 if (ompt_enabled.enabled) { 1209 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1210 /* OMPT state update */ 1211 prev_state = ti.state; 1212 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1213 ti.state = ompt_state_wait_critical; 1214 1215 /* OMPT event callback */ 1216 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1217 if (ompt_enabled.ompt_callback_mutex_acquire) { 1218 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1219 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1220 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1221 } 1222 } 1223 #endif 1224 // Value of 'crit' should be good for using as a critical_id of the critical 1225 // section directive. 1226 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1227 1228 #if USE_ITT_BUILD 1229 __kmp_itt_critical_acquired(lck); 1230 #endif /* USE_ITT_BUILD */ 1231 #if OMPT_SUPPORT && OMPT_OPTIONAL 1232 if (ompt_enabled.enabled) { 1233 /* OMPT state update */ 1234 ti.state = prev_state; 1235 ti.wait_id = 0; 1236 1237 /* OMPT event callback */ 1238 if (ompt_enabled.ompt_callback_mutex_acquired) { 1239 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1240 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1241 } 1242 } 1243 #endif 1244 KMP_POP_PARTITIONED_TIMER(); 1245 1246 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1247 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1248 #endif // KMP_USE_DYNAMIC_LOCK 1249 } 1250 1251 #if KMP_USE_DYNAMIC_LOCK 1252 1253 // Converts the given hint to an internal lock implementation 1254 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1255 #if KMP_USE_TSX 1256 #define KMP_TSX_LOCK(seq) lockseq_##seq 1257 #else 1258 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1259 #endif 1260 1261 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1262 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1263 #else 1264 #define KMP_CPUINFO_RTM 0 1265 #endif 1266 1267 // Hints that do not require further logic 1268 if (hint & kmp_lock_hint_hle) 1269 return KMP_TSX_LOCK(hle); 1270 if (hint & kmp_lock_hint_rtm) 1271 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq; 1272 if (hint & kmp_lock_hint_adaptive) 1273 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1274 1275 // Rule out conflicting hints first by returning the default lock 1276 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1277 return __kmp_user_lock_seq; 1278 if ((hint & omp_lock_hint_speculative) && 1279 (hint & omp_lock_hint_nonspeculative)) 1280 return __kmp_user_lock_seq; 1281 1282 // Do not even consider speculation when it appears to be contended 1283 if (hint & omp_lock_hint_contended) 1284 return lockseq_queuing; 1285 1286 // Uncontended lock without speculation 1287 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1288 return lockseq_tas; 1289 1290 // HLE lock for speculation 1291 if (hint & omp_lock_hint_speculative) 1292 return KMP_TSX_LOCK(hle); 1293 1294 return __kmp_user_lock_seq; 1295 } 1296 1297 #if OMPT_SUPPORT && OMPT_OPTIONAL 1298 #if KMP_USE_DYNAMIC_LOCK 1299 static kmp_mutex_impl_t 1300 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1301 if (user_lock) { 1302 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1303 case 0: 1304 break; 1305 #if KMP_USE_FUTEX 1306 case locktag_futex: 1307 return kmp_mutex_impl_queuing; 1308 #endif 1309 case locktag_tas: 1310 return kmp_mutex_impl_spin; 1311 #if KMP_USE_TSX 1312 case locktag_hle: 1313 return kmp_mutex_impl_speculative; 1314 #endif 1315 default: 1316 return kmp_mutex_impl_none; 1317 } 1318 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1319 } 1320 KMP_ASSERT(ilock); 1321 switch (ilock->type) { 1322 #if KMP_USE_TSX 1323 case locktag_adaptive: 1324 case locktag_rtm: 1325 return kmp_mutex_impl_speculative; 1326 #endif 1327 case locktag_nested_tas: 1328 return kmp_mutex_impl_spin; 1329 #if KMP_USE_FUTEX 1330 case locktag_nested_futex: 1331 #endif 1332 case locktag_ticket: 1333 case locktag_queuing: 1334 case locktag_drdpa: 1335 case locktag_nested_ticket: 1336 case locktag_nested_queuing: 1337 case locktag_nested_drdpa: 1338 return kmp_mutex_impl_queuing; 1339 default: 1340 return kmp_mutex_impl_none; 1341 } 1342 } 1343 #else 1344 // For locks without dynamic binding 1345 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1346 switch (__kmp_user_lock_kind) { 1347 case lk_tas: 1348 return kmp_mutex_impl_spin; 1349 #if KMP_USE_FUTEX 1350 case lk_futex: 1351 #endif 1352 case lk_ticket: 1353 case lk_queuing: 1354 case lk_drdpa: 1355 return kmp_mutex_impl_queuing; 1356 #if KMP_USE_TSX 1357 case lk_hle: 1358 case lk_rtm: 1359 case lk_adaptive: 1360 return kmp_mutex_impl_speculative; 1361 #endif 1362 default: 1363 return kmp_mutex_impl_none; 1364 } 1365 } 1366 #endif // KMP_USE_DYNAMIC_LOCK 1367 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1368 1369 /*! 1370 @ingroup WORK_SHARING 1371 @param loc source location information. 1372 @param global_tid global thread number. 1373 @param crit identity of the critical section. This could be a pointer to a lock 1374 associated with the critical section, or some other suitably unique value. 1375 @param hint the lock hint. 1376 1377 Enter code protected by a `critical` construct with a hint. The hint value is 1378 used to suggest a lock implementation. This function blocks until the executing 1379 thread can enter the critical section unless the hint suggests use of 1380 speculative execution and the hardware supports it. 1381 */ 1382 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1383 kmp_critical_name *crit, uint32_t hint) { 1384 KMP_COUNT_BLOCK(OMP_CRITICAL); 1385 kmp_user_lock_p lck; 1386 #if OMPT_SUPPORT && OMPT_OPTIONAL 1387 ompt_state_t prev_state = ompt_state_undefined; 1388 ompt_thread_info_t ti; 1389 // This is the case, if called from __kmpc_critical: 1390 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1391 if (!codeptr) 1392 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1393 #endif 1394 1395 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1396 1397 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1398 // Check if it is initialized. 1399 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1400 if (*lk == 0) { 1401 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); 1402 if (KMP_IS_D_LOCK(lckseq)) { 1403 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1404 KMP_GET_D_TAG(lckseq)); 1405 } else { 1406 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); 1407 } 1408 } 1409 // Branch for accessing the actual lock object and set operation. This 1410 // branching is inevitable since this lock initialization does not follow the 1411 // normal dispatch path (lock table is not used). 1412 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1413 lck = (kmp_user_lock_p)lk; 1414 if (__kmp_env_consistency_check) { 1415 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1416 __kmp_map_hint_to_lock(hint)); 1417 } 1418 #if USE_ITT_BUILD 1419 __kmp_itt_critical_acquiring(lck); 1420 #endif 1421 #if OMPT_SUPPORT && OMPT_OPTIONAL 1422 if (ompt_enabled.enabled) { 1423 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1424 /* OMPT state update */ 1425 prev_state = ti.state; 1426 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1427 ti.state = ompt_state_wait_critical; 1428 1429 /* OMPT event callback */ 1430 if (ompt_enabled.ompt_callback_mutex_acquire) { 1431 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1432 ompt_mutex_critical, (unsigned int)hint, 1433 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1434 codeptr); 1435 } 1436 } 1437 #endif 1438 #if KMP_USE_INLINED_TAS 1439 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1440 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1441 } else 1442 #elif KMP_USE_INLINED_FUTEX 1443 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1444 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1445 } else 1446 #endif 1447 { 1448 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1449 } 1450 } else { 1451 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1452 lck = ilk->lock; 1453 if (__kmp_env_consistency_check) { 1454 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1455 __kmp_map_hint_to_lock(hint)); 1456 } 1457 #if USE_ITT_BUILD 1458 __kmp_itt_critical_acquiring(lck); 1459 #endif 1460 #if OMPT_SUPPORT && OMPT_OPTIONAL 1461 if (ompt_enabled.enabled) { 1462 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1463 /* OMPT state update */ 1464 prev_state = ti.state; 1465 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1466 ti.state = ompt_state_wait_critical; 1467 1468 /* OMPT event callback */ 1469 if (ompt_enabled.ompt_callback_mutex_acquire) { 1470 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1471 ompt_mutex_critical, (unsigned int)hint, 1472 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1473 codeptr); 1474 } 1475 } 1476 #endif 1477 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1478 } 1479 KMP_POP_PARTITIONED_TIMER(); 1480 1481 #if USE_ITT_BUILD 1482 __kmp_itt_critical_acquired(lck); 1483 #endif /* USE_ITT_BUILD */ 1484 #if OMPT_SUPPORT && OMPT_OPTIONAL 1485 if (ompt_enabled.enabled) { 1486 /* OMPT state update */ 1487 ti.state = prev_state; 1488 ti.wait_id = 0; 1489 1490 /* OMPT event callback */ 1491 if (ompt_enabled.ompt_callback_mutex_acquired) { 1492 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1493 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1494 } 1495 } 1496 #endif 1497 1498 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1499 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1500 } // __kmpc_critical_with_hint 1501 1502 #endif // KMP_USE_DYNAMIC_LOCK 1503 1504 /*! 1505 @ingroup WORK_SHARING 1506 @param loc source location information. 1507 @param global_tid global thread number . 1508 @param crit identity of the critical section. This could be a pointer to a lock 1509 associated with the critical section, or some other suitably unique value. 1510 1511 Leave a critical section, releasing any lock that was held during its execution. 1512 */ 1513 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1514 kmp_critical_name *crit) { 1515 kmp_user_lock_p lck; 1516 1517 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1518 1519 #if KMP_USE_DYNAMIC_LOCK 1520 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 1521 lck = (kmp_user_lock_p)crit; 1522 KMP_ASSERT(lck != NULL); 1523 if (__kmp_env_consistency_check) { 1524 __kmp_pop_sync(global_tid, ct_critical, loc); 1525 } 1526 #if USE_ITT_BUILD 1527 __kmp_itt_critical_releasing(lck); 1528 #endif 1529 #if KMP_USE_INLINED_TAS 1530 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1531 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1532 } else 1533 #elif KMP_USE_INLINED_FUTEX 1534 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1535 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1536 } else 1537 #endif 1538 { 1539 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1540 } 1541 } else { 1542 kmp_indirect_lock_t *ilk = 1543 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1544 KMP_ASSERT(ilk != NULL); 1545 lck = ilk->lock; 1546 if (__kmp_env_consistency_check) { 1547 __kmp_pop_sync(global_tid, ct_critical, loc); 1548 } 1549 #if USE_ITT_BUILD 1550 __kmp_itt_critical_releasing(lck); 1551 #endif 1552 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1553 } 1554 1555 #else // KMP_USE_DYNAMIC_LOCK 1556 1557 if ((__kmp_user_lock_kind == lk_tas) && 1558 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1559 lck = (kmp_user_lock_p)crit; 1560 } 1561 #if KMP_USE_FUTEX 1562 else if ((__kmp_user_lock_kind == lk_futex) && 1563 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1564 lck = (kmp_user_lock_p)crit; 1565 } 1566 #endif 1567 else { // ticket, queuing or drdpa 1568 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1569 } 1570 1571 KMP_ASSERT(lck != NULL); 1572 1573 if (__kmp_env_consistency_check) 1574 __kmp_pop_sync(global_tid, ct_critical, loc); 1575 1576 #if USE_ITT_BUILD 1577 __kmp_itt_critical_releasing(lck); 1578 #endif /* USE_ITT_BUILD */ 1579 // Value of 'crit' should be good for using as a critical_id of the critical 1580 // section directive. 1581 __kmp_release_user_lock_with_checks(lck, global_tid); 1582 1583 #endif // KMP_USE_DYNAMIC_LOCK 1584 1585 #if OMPT_SUPPORT && OMPT_OPTIONAL 1586 /* OMPT release event triggers after lock is released; place here to trigger 1587 * for all #if branches */ 1588 OMPT_STORE_RETURN_ADDRESS(global_tid); 1589 if (ompt_enabled.ompt_callback_mutex_released) { 1590 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1591 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1592 OMPT_LOAD_RETURN_ADDRESS(0)); 1593 } 1594 #endif 1595 1596 KMP_POP_PARTITIONED_TIMER(); 1597 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1598 } 1599 1600 /*! 1601 @ingroup SYNCHRONIZATION 1602 @param loc source location information 1603 @param global_tid thread id. 1604 @return one if the thread should execute the master block, zero otherwise 1605 1606 Start execution of a combined barrier and master. The barrier is executed inside 1607 this function. 1608 */ 1609 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1610 int status; 1611 1612 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1613 1614 if (!TCR_4(__kmp_init_parallel)) 1615 __kmp_parallel_initialize(); 1616 1617 __kmp_resume_if_soft_paused(); 1618 1619 if (__kmp_env_consistency_check) 1620 __kmp_check_barrier(global_tid, ct_barrier, loc); 1621 1622 #if OMPT_SUPPORT 1623 ompt_frame_t *ompt_frame; 1624 if (ompt_enabled.enabled) { 1625 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1626 if (ompt_frame->enter_frame.ptr == NULL) 1627 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1628 OMPT_STORE_RETURN_ADDRESS(global_tid); 1629 } 1630 #endif 1631 #if USE_ITT_NOTIFY 1632 __kmp_threads[global_tid]->th.th_ident = loc; 1633 #endif 1634 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1635 #if OMPT_SUPPORT && OMPT_OPTIONAL 1636 if (ompt_enabled.enabled) { 1637 ompt_frame->enter_frame = ompt_data_none; 1638 } 1639 #endif 1640 1641 return (status != 0) ? 0 : 1; 1642 } 1643 1644 /*! 1645 @ingroup SYNCHRONIZATION 1646 @param loc source location information 1647 @param global_tid thread id. 1648 1649 Complete the execution of a combined barrier and master. This function should 1650 only be called at the completion of the <tt>master</tt> code. Other threads will 1651 still be waiting at the barrier and this call releases them. 1652 */ 1653 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1654 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1655 1656 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1657 } 1658 1659 /*! 1660 @ingroup SYNCHRONIZATION 1661 @param loc source location information 1662 @param global_tid thread id. 1663 @return one if the thread should execute the master block, zero otherwise 1664 1665 Start execution of a combined barrier and master(nowait) construct. 1666 The barrier is executed inside this function. 1667 There is no equivalent "end" function, since the 1668 */ 1669 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1670 kmp_int32 ret; 1671 1672 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1673 1674 if (!TCR_4(__kmp_init_parallel)) 1675 __kmp_parallel_initialize(); 1676 1677 __kmp_resume_if_soft_paused(); 1678 1679 if (__kmp_env_consistency_check) { 1680 if (loc == 0) { 1681 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1682 } 1683 __kmp_check_barrier(global_tid, ct_barrier, loc); 1684 } 1685 1686 #if OMPT_SUPPORT 1687 ompt_frame_t *ompt_frame; 1688 if (ompt_enabled.enabled) { 1689 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1690 if (ompt_frame->enter_frame.ptr == NULL) 1691 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1692 OMPT_STORE_RETURN_ADDRESS(global_tid); 1693 } 1694 #endif 1695 #if USE_ITT_NOTIFY 1696 __kmp_threads[global_tid]->th.th_ident = loc; 1697 #endif 1698 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1699 #if OMPT_SUPPORT && OMPT_OPTIONAL 1700 if (ompt_enabled.enabled) { 1701 ompt_frame->enter_frame = ompt_data_none; 1702 } 1703 #endif 1704 1705 ret = __kmpc_master(loc, global_tid); 1706 1707 if (__kmp_env_consistency_check) { 1708 /* there's no __kmpc_end_master called; so the (stats) */ 1709 /* actions of __kmpc_end_master are done here */ 1710 1711 if (global_tid < 0) { 1712 KMP_WARNING(ThreadIdentInvalid); 1713 } 1714 if (ret) { 1715 /* only one thread should do the pop since only */ 1716 /* one did the push (see __kmpc_master()) */ 1717 1718 __kmp_pop_sync(global_tid, ct_master, loc); 1719 } 1720 } 1721 1722 return (ret); 1723 } 1724 1725 /* The BARRIER for a SINGLE process section is always explicit */ 1726 /*! 1727 @ingroup WORK_SHARING 1728 @param loc source location information 1729 @param global_tid global thread number 1730 @return One if this thread should execute the single construct, zero otherwise. 1731 1732 Test whether to execute a <tt>single</tt> construct. 1733 There are no implicit barriers in the two "single" calls, rather the compiler 1734 should introduce an explicit barrier if it is required. 1735 */ 1736 1737 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1738 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1739 1740 if (rc) { 1741 // We are going to execute the single statement, so we should count it. 1742 KMP_COUNT_BLOCK(OMP_SINGLE); 1743 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1744 } 1745 1746 #if OMPT_SUPPORT && OMPT_OPTIONAL 1747 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1748 kmp_team_t *team = this_thr->th.th_team; 1749 int tid = __kmp_tid_from_gtid(global_tid); 1750 1751 if (ompt_enabled.enabled) { 1752 if (rc) { 1753 if (ompt_enabled.ompt_callback_work) { 1754 ompt_callbacks.ompt_callback(ompt_callback_work)( 1755 ompt_work_single_executor, ompt_scope_begin, 1756 &(team->t.ompt_team_info.parallel_data), 1757 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1758 1, OMPT_GET_RETURN_ADDRESS(0)); 1759 } 1760 } else { 1761 if (ompt_enabled.ompt_callback_work) { 1762 ompt_callbacks.ompt_callback(ompt_callback_work)( 1763 ompt_work_single_other, ompt_scope_begin, 1764 &(team->t.ompt_team_info.parallel_data), 1765 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1766 1, OMPT_GET_RETURN_ADDRESS(0)); 1767 ompt_callbacks.ompt_callback(ompt_callback_work)( 1768 ompt_work_single_other, ompt_scope_end, 1769 &(team->t.ompt_team_info.parallel_data), 1770 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1771 1, OMPT_GET_RETURN_ADDRESS(0)); 1772 } 1773 } 1774 } 1775 #endif 1776 1777 return rc; 1778 } 1779 1780 /*! 1781 @ingroup WORK_SHARING 1782 @param loc source location information 1783 @param global_tid global thread number 1784 1785 Mark the end of a <tt>single</tt> construct. This function should 1786 only be called by the thread that executed the block of code protected 1787 by the `single` construct. 1788 */ 1789 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1790 __kmp_exit_single(global_tid); 1791 KMP_POP_PARTITIONED_TIMER(); 1792 1793 #if OMPT_SUPPORT && OMPT_OPTIONAL 1794 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1795 kmp_team_t *team = this_thr->th.th_team; 1796 int tid = __kmp_tid_from_gtid(global_tid); 1797 1798 if (ompt_enabled.ompt_callback_work) { 1799 ompt_callbacks.ompt_callback(ompt_callback_work)( 1800 ompt_work_single_executor, ompt_scope_end, 1801 &(team->t.ompt_team_info.parallel_data), 1802 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1803 OMPT_GET_RETURN_ADDRESS(0)); 1804 } 1805 #endif 1806 } 1807 1808 /*! 1809 @ingroup WORK_SHARING 1810 @param loc Source location 1811 @param global_tid Global thread id 1812 1813 Mark the end of a statically scheduled loop. 1814 */ 1815 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1816 KMP_POP_PARTITIONED_TIMER(); 1817 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1818 1819 #if OMPT_SUPPORT && OMPT_OPTIONAL 1820 if (ompt_enabled.ompt_callback_work) { 1821 ompt_work_t ompt_work_type = ompt_work_loop; 1822 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1823 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1824 // Determine workshare type 1825 if (loc != NULL) { 1826 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1827 ompt_work_type = ompt_work_loop; 1828 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1829 ompt_work_type = ompt_work_sections; 1830 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1831 ompt_work_type = ompt_work_distribute; 1832 } else { 1833 // use default set above. 1834 // a warning about this case is provided in __kmpc_for_static_init 1835 } 1836 KMP_DEBUG_ASSERT(ompt_work_type); 1837 } 1838 ompt_callbacks.ompt_callback(ompt_callback_work)( 1839 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1840 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1841 } 1842 #endif 1843 if (__kmp_env_consistency_check) 1844 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1845 } 1846 1847 // User routines which take C-style arguments (call by value) 1848 // different from the Fortran equivalent routines 1849 1850 void ompc_set_num_threads(int arg) { 1851 // !!!!! TODO: check the per-task binding 1852 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1853 } 1854 1855 void ompc_set_dynamic(int flag) { 1856 kmp_info_t *thread; 1857 1858 /* For the thread-private implementation of the internal controls */ 1859 thread = __kmp_entry_thread(); 1860 1861 __kmp_save_internal_controls(thread); 1862 1863 set__dynamic(thread, flag ? TRUE : FALSE); 1864 } 1865 1866 void ompc_set_nested(int flag) { 1867 kmp_info_t *thread; 1868 1869 /* For the thread-private internal controls implementation */ 1870 thread = __kmp_entry_thread(); 1871 1872 __kmp_save_internal_controls(thread); 1873 1874 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1875 } 1876 1877 void ompc_set_max_active_levels(int max_active_levels) { 1878 /* TO DO */ 1879 /* we want per-task implementation of this internal control */ 1880 1881 /* For the per-thread internal controls implementation */ 1882 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1883 } 1884 1885 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1886 // !!!!! TODO: check the per-task binding 1887 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1888 } 1889 1890 int ompc_get_ancestor_thread_num(int level) { 1891 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1892 } 1893 1894 int ompc_get_team_size(int level) { 1895 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1896 } 1897 1898 /* OpenMP 5.0 Affinity Format API */ 1899 1900 void ompc_set_affinity_format(char const *format) { 1901 if (!__kmp_init_serial) { 1902 __kmp_serial_initialize(); 1903 } 1904 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 1905 format, KMP_STRLEN(format) + 1); 1906 } 1907 1908 size_t ompc_get_affinity_format(char *buffer, size_t size) { 1909 size_t format_size; 1910 if (!__kmp_init_serial) { 1911 __kmp_serial_initialize(); 1912 } 1913 format_size = KMP_STRLEN(__kmp_affinity_format); 1914 if (buffer && size) { 1915 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 1916 format_size + 1); 1917 } 1918 return format_size; 1919 } 1920 1921 void ompc_display_affinity(char const *format) { 1922 int gtid; 1923 if (!TCR_4(__kmp_init_middle)) { 1924 __kmp_middle_initialize(); 1925 } 1926 gtid = __kmp_get_gtid(); 1927 __kmp_aux_display_affinity(gtid, format); 1928 } 1929 1930 size_t ompc_capture_affinity(char *buffer, size_t buf_size, 1931 char const *format) { 1932 int gtid; 1933 size_t num_required; 1934 kmp_str_buf_t capture_buf; 1935 if (!TCR_4(__kmp_init_middle)) { 1936 __kmp_middle_initialize(); 1937 } 1938 gtid = __kmp_get_gtid(); 1939 __kmp_str_buf_init(&capture_buf); 1940 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 1941 if (buffer && buf_size) { 1942 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 1943 capture_buf.used + 1); 1944 } 1945 __kmp_str_buf_free(&capture_buf); 1946 return num_required; 1947 } 1948 1949 void kmpc_set_stacksize(int arg) { 1950 // __kmp_aux_set_stacksize initializes the library if needed 1951 __kmp_aux_set_stacksize(arg); 1952 } 1953 1954 void kmpc_set_stacksize_s(size_t arg) { 1955 // __kmp_aux_set_stacksize initializes the library if needed 1956 __kmp_aux_set_stacksize(arg); 1957 } 1958 1959 void kmpc_set_blocktime(int arg) { 1960 int gtid, tid; 1961 kmp_info_t *thread; 1962 1963 gtid = __kmp_entry_gtid(); 1964 tid = __kmp_tid_from_gtid(gtid); 1965 thread = __kmp_thread_from_gtid(gtid); 1966 1967 __kmp_aux_set_blocktime(arg, thread, tid); 1968 } 1969 1970 void kmpc_set_library(int arg) { 1971 // __kmp_user_set_library initializes the library if needed 1972 __kmp_user_set_library((enum library_type)arg); 1973 } 1974 1975 void kmpc_set_defaults(char const *str) { 1976 // __kmp_aux_set_defaults initializes the library if needed 1977 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 1978 } 1979 1980 void kmpc_set_disp_num_buffers(int arg) { 1981 // ignore after initialization because some teams have already 1982 // allocated dispatch buffers 1983 if (__kmp_init_serial == 0 && arg > 0) 1984 __kmp_dispatch_num_buffers = arg; 1985 } 1986 1987 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 1988 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1989 return -1; 1990 #else 1991 if (!TCR_4(__kmp_init_middle)) { 1992 __kmp_middle_initialize(); 1993 } 1994 return __kmp_aux_set_affinity_mask_proc(proc, mask); 1995 #endif 1996 } 1997 1998 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 1999 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2000 return -1; 2001 #else 2002 if (!TCR_4(__kmp_init_middle)) { 2003 __kmp_middle_initialize(); 2004 } 2005 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2006 #endif 2007 } 2008 2009 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2010 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2011 return -1; 2012 #else 2013 if (!TCR_4(__kmp_init_middle)) { 2014 __kmp_middle_initialize(); 2015 } 2016 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2017 #endif 2018 } 2019 2020 /* -------------------------------------------------------------------------- */ 2021 /*! 2022 @ingroup THREADPRIVATE 2023 @param loc source location information 2024 @param gtid global thread number 2025 @param cpy_size size of the cpy_data buffer 2026 @param cpy_data pointer to data to be copied 2027 @param cpy_func helper function to call for copying data 2028 @param didit flag variable: 1=single thread; 0=not single thread 2029 2030 __kmpc_copyprivate implements the interface for the private data broadcast 2031 needed for the copyprivate clause associated with a single region in an 2032 OpenMP<sup>*</sup> program (both C and Fortran). 2033 All threads participating in the parallel region call this routine. 2034 One of the threads (called the single thread) should have the <tt>didit</tt> 2035 variable set to 1 and all other threads should have that variable set to 0. 2036 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2037 2038 The OpenMP specification forbids the use of nowait on the single region when a 2039 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2040 barrier internally to avoid race conditions, so the code generation for the 2041 single region should avoid generating a barrier after the call to @ref 2042 __kmpc_copyprivate. 2043 2044 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2045 The <tt>loc</tt> parameter is a pointer to source location information. 2046 2047 Internal implementation: The single thread will first copy its descriptor 2048 address (cpy_data) to a team-private location, then the other threads will each 2049 call the function pointed to by the parameter cpy_func, which carries out the 2050 copy by copying the data using the cpy_data buffer. 2051 2052 The cpy_func routine used for the copy and the contents of the data area defined 2053 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2054 to be done. For instance, the cpy_data buffer can hold the actual data to be 2055 copied or it may hold a list of pointers to the data. The cpy_func routine must 2056 interpret the cpy_data buffer appropriately. 2057 2058 The interface to cpy_func is as follows: 2059 @code 2060 void cpy_func( void *destination, void *source ) 2061 @endcode 2062 where void *destination is the cpy_data pointer for the thread being copied to 2063 and void *source is the cpy_data pointer for the thread being copied from. 2064 */ 2065 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2066 void *cpy_data, void (*cpy_func)(void *, void *), 2067 kmp_int32 didit) { 2068 void **data_ptr; 2069 2070 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2071 2072 KMP_MB(); 2073 2074 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2075 2076 if (__kmp_env_consistency_check) { 2077 if (loc == 0) { 2078 KMP_WARNING(ConstructIdentInvalid); 2079 } 2080 } 2081 2082 // ToDo: Optimize the following two barriers into some kind of split barrier 2083 2084 if (didit) 2085 *data_ptr = cpy_data; 2086 2087 #if OMPT_SUPPORT 2088 ompt_frame_t *ompt_frame; 2089 if (ompt_enabled.enabled) { 2090 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2091 if (ompt_frame->enter_frame.ptr == NULL) 2092 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2093 OMPT_STORE_RETURN_ADDRESS(gtid); 2094 } 2095 #endif 2096 /* This barrier is not a barrier region boundary */ 2097 #if USE_ITT_NOTIFY 2098 __kmp_threads[gtid]->th.th_ident = loc; 2099 #endif 2100 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2101 2102 if (!didit) 2103 (*cpy_func)(cpy_data, *data_ptr); 2104 2105 // Consider next barrier a user-visible barrier for barrier region boundaries 2106 // Nesting checks are already handled by the single construct checks 2107 2108 #if OMPT_SUPPORT 2109 if (ompt_enabled.enabled) { 2110 OMPT_STORE_RETURN_ADDRESS(gtid); 2111 } 2112 #endif 2113 #if USE_ITT_NOTIFY 2114 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2115 // tasks can overwrite the location) 2116 #endif 2117 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2118 #if OMPT_SUPPORT && OMPT_OPTIONAL 2119 if (ompt_enabled.enabled) { 2120 ompt_frame->enter_frame = ompt_data_none; 2121 } 2122 #endif 2123 } 2124 2125 /* -------------------------------------------------------------------------- */ 2126 2127 #define INIT_LOCK __kmp_init_user_lock_with_checks 2128 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2129 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2130 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2131 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2132 #define ACQUIRE_NESTED_LOCK_TIMED \ 2133 __kmp_acquire_nested_user_lock_with_checks_timed 2134 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2135 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2136 #define TEST_LOCK __kmp_test_user_lock_with_checks 2137 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2138 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2139 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2140 2141 // TODO: Make check abort messages use location info & pass it into 2142 // with_checks routines 2143 2144 #if KMP_USE_DYNAMIC_LOCK 2145 2146 // internal lock initializer 2147 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2148 kmp_dyna_lockseq_t seq) { 2149 if (KMP_IS_D_LOCK(seq)) { 2150 KMP_INIT_D_LOCK(lock, seq); 2151 #if USE_ITT_BUILD 2152 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2153 #endif 2154 } else { 2155 KMP_INIT_I_LOCK(lock, seq); 2156 #if USE_ITT_BUILD 2157 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2158 __kmp_itt_lock_creating(ilk->lock, loc); 2159 #endif 2160 } 2161 } 2162 2163 // internal nest lock initializer 2164 static __forceinline void 2165 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2166 kmp_dyna_lockseq_t seq) { 2167 #if KMP_USE_TSX 2168 // Don't have nested lock implementation for speculative locks 2169 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive) 2170 seq = __kmp_user_lock_seq; 2171 #endif 2172 switch (seq) { 2173 case lockseq_tas: 2174 seq = lockseq_nested_tas; 2175 break; 2176 #if KMP_USE_FUTEX 2177 case lockseq_futex: 2178 seq = lockseq_nested_futex; 2179 break; 2180 #endif 2181 case lockseq_ticket: 2182 seq = lockseq_nested_ticket; 2183 break; 2184 case lockseq_queuing: 2185 seq = lockseq_nested_queuing; 2186 break; 2187 case lockseq_drdpa: 2188 seq = lockseq_nested_drdpa; 2189 break; 2190 default: 2191 seq = lockseq_nested_queuing; 2192 } 2193 KMP_INIT_I_LOCK(lock, seq); 2194 #if USE_ITT_BUILD 2195 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2196 __kmp_itt_lock_creating(ilk->lock, loc); 2197 #endif 2198 } 2199 2200 /* initialize the lock with a hint */ 2201 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2202 uintptr_t hint) { 2203 KMP_DEBUG_ASSERT(__kmp_init_serial); 2204 if (__kmp_env_consistency_check && user_lock == NULL) { 2205 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2206 } 2207 2208 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2209 2210 #if OMPT_SUPPORT && OMPT_OPTIONAL 2211 // This is the case, if called from omp_init_lock_with_hint: 2212 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2213 if (!codeptr) 2214 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2215 if (ompt_enabled.ompt_callback_lock_init) { 2216 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2217 ompt_mutex_lock, (omp_lock_hint_t)hint, 2218 __ompt_get_mutex_impl_type(user_lock), 2219 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2220 } 2221 #endif 2222 } 2223 2224 /* initialize the lock with a hint */ 2225 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2226 void **user_lock, uintptr_t hint) { 2227 KMP_DEBUG_ASSERT(__kmp_init_serial); 2228 if (__kmp_env_consistency_check && user_lock == NULL) { 2229 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2230 } 2231 2232 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2233 2234 #if OMPT_SUPPORT && OMPT_OPTIONAL 2235 // This is the case, if called from omp_init_lock_with_hint: 2236 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2237 if (!codeptr) 2238 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2239 if (ompt_enabled.ompt_callback_lock_init) { 2240 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2241 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2242 __ompt_get_mutex_impl_type(user_lock), 2243 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2244 } 2245 #endif 2246 } 2247 2248 #endif // KMP_USE_DYNAMIC_LOCK 2249 2250 /* initialize the lock */ 2251 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2252 #if KMP_USE_DYNAMIC_LOCK 2253 2254 KMP_DEBUG_ASSERT(__kmp_init_serial); 2255 if (__kmp_env_consistency_check && user_lock == NULL) { 2256 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2257 } 2258 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2259 2260 #if OMPT_SUPPORT && OMPT_OPTIONAL 2261 // This is the case, if called from omp_init_lock_with_hint: 2262 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2263 if (!codeptr) 2264 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2265 if (ompt_enabled.ompt_callback_lock_init) { 2266 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2267 ompt_mutex_lock, omp_lock_hint_none, 2268 __ompt_get_mutex_impl_type(user_lock), 2269 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2270 } 2271 #endif 2272 2273 #else // KMP_USE_DYNAMIC_LOCK 2274 2275 static char const *const func = "omp_init_lock"; 2276 kmp_user_lock_p lck; 2277 KMP_DEBUG_ASSERT(__kmp_init_serial); 2278 2279 if (__kmp_env_consistency_check) { 2280 if (user_lock == NULL) { 2281 KMP_FATAL(LockIsUninitialized, func); 2282 } 2283 } 2284 2285 KMP_CHECK_USER_LOCK_INIT(); 2286 2287 if ((__kmp_user_lock_kind == lk_tas) && 2288 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2289 lck = (kmp_user_lock_p)user_lock; 2290 } 2291 #if KMP_USE_FUTEX 2292 else if ((__kmp_user_lock_kind == lk_futex) && 2293 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2294 lck = (kmp_user_lock_p)user_lock; 2295 } 2296 #endif 2297 else { 2298 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2299 } 2300 INIT_LOCK(lck); 2301 __kmp_set_user_lock_location(lck, loc); 2302 2303 #if OMPT_SUPPORT && OMPT_OPTIONAL 2304 // This is the case, if called from omp_init_lock_with_hint: 2305 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2306 if (!codeptr) 2307 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2308 if (ompt_enabled.ompt_callback_lock_init) { 2309 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2310 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2311 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2312 } 2313 #endif 2314 2315 #if USE_ITT_BUILD 2316 __kmp_itt_lock_creating(lck); 2317 #endif /* USE_ITT_BUILD */ 2318 2319 #endif // KMP_USE_DYNAMIC_LOCK 2320 } // __kmpc_init_lock 2321 2322 /* initialize the lock */ 2323 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2324 #if KMP_USE_DYNAMIC_LOCK 2325 2326 KMP_DEBUG_ASSERT(__kmp_init_serial); 2327 if (__kmp_env_consistency_check && user_lock == NULL) { 2328 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2329 } 2330 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2331 2332 #if OMPT_SUPPORT && OMPT_OPTIONAL 2333 // This is the case, if called from omp_init_lock_with_hint: 2334 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2335 if (!codeptr) 2336 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2337 if (ompt_enabled.ompt_callback_lock_init) { 2338 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2339 ompt_mutex_nest_lock, omp_lock_hint_none, 2340 __ompt_get_mutex_impl_type(user_lock), 2341 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2342 } 2343 #endif 2344 2345 #else // KMP_USE_DYNAMIC_LOCK 2346 2347 static char const *const func = "omp_init_nest_lock"; 2348 kmp_user_lock_p lck; 2349 KMP_DEBUG_ASSERT(__kmp_init_serial); 2350 2351 if (__kmp_env_consistency_check) { 2352 if (user_lock == NULL) { 2353 KMP_FATAL(LockIsUninitialized, func); 2354 } 2355 } 2356 2357 KMP_CHECK_USER_LOCK_INIT(); 2358 2359 if ((__kmp_user_lock_kind == lk_tas) && 2360 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2361 OMP_NEST_LOCK_T_SIZE)) { 2362 lck = (kmp_user_lock_p)user_lock; 2363 } 2364 #if KMP_USE_FUTEX 2365 else if ((__kmp_user_lock_kind == lk_futex) && 2366 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2367 OMP_NEST_LOCK_T_SIZE)) { 2368 lck = (kmp_user_lock_p)user_lock; 2369 } 2370 #endif 2371 else { 2372 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2373 } 2374 2375 INIT_NESTED_LOCK(lck); 2376 __kmp_set_user_lock_location(lck, loc); 2377 2378 #if OMPT_SUPPORT && OMPT_OPTIONAL 2379 // This is the case, if called from omp_init_lock_with_hint: 2380 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2381 if (!codeptr) 2382 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2383 if (ompt_enabled.ompt_callback_lock_init) { 2384 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2385 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2386 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2387 } 2388 #endif 2389 2390 #if USE_ITT_BUILD 2391 __kmp_itt_lock_creating(lck); 2392 #endif /* USE_ITT_BUILD */ 2393 2394 #endif // KMP_USE_DYNAMIC_LOCK 2395 } // __kmpc_init_nest_lock 2396 2397 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2398 #if KMP_USE_DYNAMIC_LOCK 2399 2400 #if USE_ITT_BUILD 2401 kmp_user_lock_p lck; 2402 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2403 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2404 } else { 2405 lck = (kmp_user_lock_p)user_lock; 2406 } 2407 __kmp_itt_lock_destroyed(lck); 2408 #endif 2409 #if OMPT_SUPPORT && OMPT_OPTIONAL 2410 // This is the case, if called from omp_init_lock_with_hint: 2411 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2412 if (!codeptr) 2413 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2414 if (ompt_enabled.ompt_callback_lock_destroy) { 2415 kmp_user_lock_p lck; 2416 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2417 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2418 } else { 2419 lck = (kmp_user_lock_p)user_lock; 2420 } 2421 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2422 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2423 } 2424 #endif 2425 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2426 #else 2427 kmp_user_lock_p lck; 2428 2429 if ((__kmp_user_lock_kind == lk_tas) && 2430 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2431 lck = (kmp_user_lock_p)user_lock; 2432 } 2433 #if KMP_USE_FUTEX 2434 else if ((__kmp_user_lock_kind == lk_futex) && 2435 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2436 lck = (kmp_user_lock_p)user_lock; 2437 } 2438 #endif 2439 else { 2440 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2441 } 2442 2443 #if OMPT_SUPPORT && OMPT_OPTIONAL 2444 // This is the case, if called from omp_init_lock_with_hint: 2445 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2446 if (!codeptr) 2447 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2448 if (ompt_enabled.ompt_callback_lock_destroy) { 2449 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2450 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2451 } 2452 #endif 2453 2454 #if USE_ITT_BUILD 2455 __kmp_itt_lock_destroyed(lck); 2456 #endif /* USE_ITT_BUILD */ 2457 DESTROY_LOCK(lck); 2458 2459 if ((__kmp_user_lock_kind == lk_tas) && 2460 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2461 ; 2462 } 2463 #if KMP_USE_FUTEX 2464 else if ((__kmp_user_lock_kind == lk_futex) && 2465 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2466 ; 2467 } 2468 #endif 2469 else { 2470 __kmp_user_lock_free(user_lock, gtid, lck); 2471 } 2472 #endif // KMP_USE_DYNAMIC_LOCK 2473 } // __kmpc_destroy_lock 2474 2475 /* destroy the lock */ 2476 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2477 #if KMP_USE_DYNAMIC_LOCK 2478 2479 #if USE_ITT_BUILD 2480 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2481 __kmp_itt_lock_destroyed(ilk->lock); 2482 #endif 2483 #if OMPT_SUPPORT && OMPT_OPTIONAL 2484 // This is the case, if called from omp_init_lock_with_hint: 2485 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2486 if (!codeptr) 2487 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2488 if (ompt_enabled.ompt_callback_lock_destroy) { 2489 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2490 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2491 } 2492 #endif 2493 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2494 2495 #else // KMP_USE_DYNAMIC_LOCK 2496 2497 kmp_user_lock_p lck; 2498 2499 if ((__kmp_user_lock_kind == lk_tas) && 2500 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2501 OMP_NEST_LOCK_T_SIZE)) { 2502 lck = (kmp_user_lock_p)user_lock; 2503 } 2504 #if KMP_USE_FUTEX 2505 else if ((__kmp_user_lock_kind == lk_futex) && 2506 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2507 OMP_NEST_LOCK_T_SIZE)) { 2508 lck = (kmp_user_lock_p)user_lock; 2509 } 2510 #endif 2511 else { 2512 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2513 } 2514 2515 #if OMPT_SUPPORT && OMPT_OPTIONAL 2516 // This is the case, if called from omp_init_lock_with_hint: 2517 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2518 if (!codeptr) 2519 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2520 if (ompt_enabled.ompt_callback_lock_destroy) { 2521 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2522 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2523 } 2524 #endif 2525 2526 #if USE_ITT_BUILD 2527 __kmp_itt_lock_destroyed(lck); 2528 #endif /* USE_ITT_BUILD */ 2529 2530 DESTROY_NESTED_LOCK(lck); 2531 2532 if ((__kmp_user_lock_kind == lk_tas) && 2533 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2534 OMP_NEST_LOCK_T_SIZE)) { 2535 ; 2536 } 2537 #if KMP_USE_FUTEX 2538 else if ((__kmp_user_lock_kind == lk_futex) && 2539 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2540 OMP_NEST_LOCK_T_SIZE)) { 2541 ; 2542 } 2543 #endif 2544 else { 2545 __kmp_user_lock_free(user_lock, gtid, lck); 2546 } 2547 #endif // KMP_USE_DYNAMIC_LOCK 2548 } // __kmpc_destroy_nest_lock 2549 2550 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2551 KMP_COUNT_BLOCK(OMP_set_lock); 2552 #if KMP_USE_DYNAMIC_LOCK 2553 int tag = KMP_EXTRACT_D_TAG(user_lock); 2554 #if USE_ITT_BUILD 2555 __kmp_itt_lock_acquiring( 2556 (kmp_user_lock_p) 2557 user_lock); // itt function will get to the right lock object. 2558 #endif 2559 #if OMPT_SUPPORT && OMPT_OPTIONAL 2560 // This is the case, if called from omp_init_lock_with_hint: 2561 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2562 if (!codeptr) 2563 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2564 if (ompt_enabled.ompt_callback_mutex_acquire) { 2565 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2566 ompt_mutex_lock, omp_lock_hint_none, 2567 __ompt_get_mutex_impl_type(user_lock), 2568 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2569 } 2570 #endif 2571 #if KMP_USE_INLINED_TAS 2572 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2573 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2574 } else 2575 #elif KMP_USE_INLINED_FUTEX 2576 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2577 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2578 } else 2579 #endif 2580 { 2581 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2582 } 2583 #if USE_ITT_BUILD 2584 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2585 #endif 2586 #if OMPT_SUPPORT && OMPT_OPTIONAL 2587 if (ompt_enabled.ompt_callback_mutex_acquired) { 2588 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2589 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2590 } 2591 #endif 2592 2593 #else // KMP_USE_DYNAMIC_LOCK 2594 2595 kmp_user_lock_p lck; 2596 2597 if ((__kmp_user_lock_kind == lk_tas) && 2598 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2599 lck = (kmp_user_lock_p)user_lock; 2600 } 2601 #if KMP_USE_FUTEX 2602 else if ((__kmp_user_lock_kind == lk_futex) && 2603 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2604 lck = (kmp_user_lock_p)user_lock; 2605 } 2606 #endif 2607 else { 2608 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2609 } 2610 2611 #if USE_ITT_BUILD 2612 __kmp_itt_lock_acquiring(lck); 2613 #endif /* USE_ITT_BUILD */ 2614 #if OMPT_SUPPORT && OMPT_OPTIONAL 2615 // This is the case, if called from omp_init_lock_with_hint: 2616 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2617 if (!codeptr) 2618 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2619 if (ompt_enabled.ompt_callback_mutex_acquire) { 2620 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2621 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2622 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2623 } 2624 #endif 2625 2626 ACQUIRE_LOCK(lck, gtid); 2627 2628 #if USE_ITT_BUILD 2629 __kmp_itt_lock_acquired(lck); 2630 #endif /* USE_ITT_BUILD */ 2631 2632 #if OMPT_SUPPORT && OMPT_OPTIONAL 2633 if (ompt_enabled.ompt_callback_mutex_acquired) { 2634 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2635 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2636 } 2637 #endif 2638 2639 #endif // KMP_USE_DYNAMIC_LOCK 2640 } 2641 2642 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2643 #if KMP_USE_DYNAMIC_LOCK 2644 2645 #if USE_ITT_BUILD 2646 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2647 #endif 2648 #if OMPT_SUPPORT && OMPT_OPTIONAL 2649 // This is the case, if called from omp_init_lock_with_hint: 2650 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2651 if (!codeptr) 2652 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2653 if (ompt_enabled.enabled) { 2654 if (ompt_enabled.ompt_callback_mutex_acquire) { 2655 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2656 ompt_mutex_nest_lock, omp_lock_hint_none, 2657 __ompt_get_mutex_impl_type(user_lock), 2658 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2659 } 2660 } 2661 #endif 2662 int acquire_status = 2663 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2664 (void) acquire_status; 2665 #if USE_ITT_BUILD 2666 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2667 #endif 2668 2669 #if OMPT_SUPPORT && OMPT_OPTIONAL 2670 if (ompt_enabled.enabled) { 2671 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2672 if (ompt_enabled.ompt_callback_mutex_acquired) { 2673 // lock_first 2674 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2675 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2676 codeptr); 2677 } 2678 } else { 2679 if (ompt_enabled.ompt_callback_nest_lock) { 2680 // lock_next 2681 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2682 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2683 } 2684 } 2685 } 2686 #endif 2687 2688 #else // KMP_USE_DYNAMIC_LOCK 2689 int acquire_status; 2690 kmp_user_lock_p lck; 2691 2692 if ((__kmp_user_lock_kind == lk_tas) && 2693 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2694 OMP_NEST_LOCK_T_SIZE)) { 2695 lck = (kmp_user_lock_p)user_lock; 2696 } 2697 #if KMP_USE_FUTEX 2698 else if ((__kmp_user_lock_kind == lk_futex) && 2699 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2700 OMP_NEST_LOCK_T_SIZE)) { 2701 lck = (kmp_user_lock_p)user_lock; 2702 } 2703 #endif 2704 else { 2705 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2706 } 2707 2708 #if USE_ITT_BUILD 2709 __kmp_itt_lock_acquiring(lck); 2710 #endif /* USE_ITT_BUILD */ 2711 #if OMPT_SUPPORT && OMPT_OPTIONAL 2712 // This is the case, if called from omp_init_lock_with_hint: 2713 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2714 if (!codeptr) 2715 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2716 if (ompt_enabled.enabled) { 2717 if (ompt_enabled.ompt_callback_mutex_acquire) { 2718 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2719 ompt_mutex_nest_lock, omp_lock_hint_none, 2720 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2721 codeptr); 2722 } 2723 } 2724 #endif 2725 2726 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2727 2728 #if USE_ITT_BUILD 2729 __kmp_itt_lock_acquired(lck); 2730 #endif /* USE_ITT_BUILD */ 2731 2732 #if OMPT_SUPPORT && OMPT_OPTIONAL 2733 if (ompt_enabled.enabled) { 2734 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2735 if (ompt_enabled.ompt_callback_mutex_acquired) { 2736 // lock_first 2737 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2738 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2739 } 2740 } else { 2741 if (ompt_enabled.ompt_callback_nest_lock) { 2742 // lock_next 2743 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2744 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2745 } 2746 } 2747 } 2748 #endif 2749 2750 #endif // KMP_USE_DYNAMIC_LOCK 2751 } 2752 2753 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2754 #if KMP_USE_DYNAMIC_LOCK 2755 2756 int tag = KMP_EXTRACT_D_TAG(user_lock); 2757 #if USE_ITT_BUILD 2758 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2759 #endif 2760 #if KMP_USE_INLINED_TAS 2761 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2762 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2763 } else 2764 #elif KMP_USE_INLINED_FUTEX 2765 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2766 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2767 } else 2768 #endif 2769 { 2770 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2771 } 2772 2773 #if OMPT_SUPPORT && OMPT_OPTIONAL 2774 // This is the case, if called from omp_init_lock_with_hint: 2775 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2776 if (!codeptr) 2777 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2778 if (ompt_enabled.ompt_callback_mutex_released) { 2779 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2780 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2781 } 2782 #endif 2783 2784 #else // KMP_USE_DYNAMIC_LOCK 2785 2786 kmp_user_lock_p lck; 2787 2788 /* Can't use serial interval since not block structured */ 2789 /* release the lock */ 2790 2791 if ((__kmp_user_lock_kind == lk_tas) && 2792 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2793 #if KMP_OS_LINUX && \ 2794 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2795 // "fast" path implemented to fix customer performance issue 2796 #if USE_ITT_BUILD 2797 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2798 #endif /* USE_ITT_BUILD */ 2799 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2800 KMP_MB(); 2801 2802 #if OMPT_SUPPORT && OMPT_OPTIONAL 2803 // This is the case, if called from omp_init_lock_with_hint: 2804 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2805 if (!codeptr) 2806 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2807 if (ompt_enabled.ompt_callback_mutex_released) { 2808 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2809 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2810 } 2811 #endif 2812 2813 return; 2814 #else 2815 lck = (kmp_user_lock_p)user_lock; 2816 #endif 2817 } 2818 #if KMP_USE_FUTEX 2819 else if ((__kmp_user_lock_kind == lk_futex) && 2820 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2821 lck = (kmp_user_lock_p)user_lock; 2822 } 2823 #endif 2824 else { 2825 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2826 } 2827 2828 #if USE_ITT_BUILD 2829 __kmp_itt_lock_releasing(lck); 2830 #endif /* USE_ITT_BUILD */ 2831 2832 RELEASE_LOCK(lck, gtid); 2833 2834 #if OMPT_SUPPORT && OMPT_OPTIONAL 2835 // This is the case, if called from omp_init_lock_with_hint: 2836 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2837 if (!codeptr) 2838 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2839 if (ompt_enabled.ompt_callback_mutex_released) { 2840 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2841 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2842 } 2843 #endif 2844 2845 #endif // KMP_USE_DYNAMIC_LOCK 2846 } 2847 2848 /* release the lock */ 2849 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2850 #if KMP_USE_DYNAMIC_LOCK 2851 2852 #if USE_ITT_BUILD 2853 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2854 #endif 2855 int release_status = 2856 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2857 (void) release_status; 2858 2859 #if OMPT_SUPPORT && OMPT_OPTIONAL 2860 // This is the case, if called from omp_init_lock_with_hint: 2861 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2862 if (!codeptr) 2863 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2864 if (ompt_enabled.enabled) { 2865 if (release_status == KMP_LOCK_RELEASED) { 2866 if (ompt_enabled.ompt_callback_mutex_released) { 2867 // release_lock_last 2868 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2869 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2870 codeptr); 2871 } 2872 } else if (ompt_enabled.ompt_callback_nest_lock) { 2873 // release_lock_prev 2874 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2875 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2876 } 2877 } 2878 #endif 2879 2880 #else // KMP_USE_DYNAMIC_LOCK 2881 2882 kmp_user_lock_p lck; 2883 2884 /* Can't use serial interval since not block structured */ 2885 2886 if ((__kmp_user_lock_kind == lk_tas) && 2887 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2888 OMP_NEST_LOCK_T_SIZE)) { 2889 #if KMP_OS_LINUX && \ 2890 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2891 // "fast" path implemented to fix customer performance issue 2892 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2893 #if USE_ITT_BUILD 2894 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2895 #endif /* USE_ITT_BUILD */ 2896 2897 #if OMPT_SUPPORT && OMPT_OPTIONAL 2898 int release_status = KMP_LOCK_STILL_HELD; 2899 #endif 2900 2901 if (--(tl->lk.depth_locked) == 0) { 2902 TCW_4(tl->lk.poll, 0); 2903 #if OMPT_SUPPORT && OMPT_OPTIONAL 2904 release_status = KMP_LOCK_RELEASED; 2905 #endif 2906 } 2907 KMP_MB(); 2908 2909 #if OMPT_SUPPORT && OMPT_OPTIONAL 2910 // This is the case, if called from omp_init_lock_with_hint: 2911 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2912 if (!codeptr) 2913 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2914 if (ompt_enabled.enabled) { 2915 if (release_status == KMP_LOCK_RELEASED) { 2916 if (ompt_enabled.ompt_callback_mutex_released) { 2917 // release_lock_last 2918 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2919 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2920 } 2921 } else if (ompt_enabled.ompt_callback_nest_lock) { 2922 // release_lock_previous 2923 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2924 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2925 } 2926 } 2927 #endif 2928 2929 return; 2930 #else 2931 lck = (kmp_user_lock_p)user_lock; 2932 #endif 2933 } 2934 #if KMP_USE_FUTEX 2935 else if ((__kmp_user_lock_kind == lk_futex) && 2936 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2937 OMP_NEST_LOCK_T_SIZE)) { 2938 lck = (kmp_user_lock_p)user_lock; 2939 } 2940 #endif 2941 else { 2942 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 2943 } 2944 2945 #if USE_ITT_BUILD 2946 __kmp_itt_lock_releasing(lck); 2947 #endif /* USE_ITT_BUILD */ 2948 2949 int release_status; 2950 release_status = RELEASE_NESTED_LOCK(lck, gtid); 2951 #if OMPT_SUPPORT && OMPT_OPTIONAL 2952 // This is the case, if called from omp_init_lock_with_hint: 2953 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2954 if (!codeptr) 2955 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2956 if (ompt_enabled.enabled) { 2957 if (release_status == KMP_LOCK_RELEASED) { 2958 if (ompt_enabled.ompt_callback_mutex_released) { 2959 // release_lock_last 2960 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2961 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2962 } 2963 } else if (ompt_enabled.ompt_callback_nest_lock) { 2964 // release_lock_previous 2965 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2966 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2967 } 2968 } 2969 #endif 2970 2971 #endif // KMP_USE_DYNAMIC_LOCK 2972 } 2973 2974 /* try to acquire the lock */ 2975 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2976 KMP_COUNT_BLOCK(OMP_test_lock); 2977 2978 #if KMP_USE_DYNAMIC_LOCK 2979 int rc; 2980 int tag = KMP_EXTRACT_D_TAG(user_lock); 2981 #if USE_ITT_BUILD 2982 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2983 #endif 2984 #if OMPT_SUPPORT && OMPT_OPTIONAL 2985 // This is the case, if called from omp_init_lock_with_hint: 2986 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2987 if (!codeptr) 2988 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2989 if (ompt_enabled.ompt_callback_mutex_acquire) { 2990 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2991 ompt_mutex_lock, omp_lock_hint_none, 2992 __ompt_get_mutex_impl_type(user_lock), 2993 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2994 } 2995 #endif 2996 #if KMP_USE_INLINED_TAS 2997 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2998 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 2999 } else 3000 #elif KMP_USE_INLINED_FUTEX 3001 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3002 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3003 } else 3004 #endif 3005 { 3006 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3007 } 3008 if (rc) { 3009 #if USE_ITT_BUILD 3010 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3011 #endif 3012 #if OMPT_SUPPORT && OMPT_OPTIONAL 3013 if (ompt_enabled.ompt_callback_mutex_acquired) { 3014 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3015 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3016 } 3017 #endif 3018 return FTN_TRUE; 3019 } else { 3020 #if USE_ITT_BUILD 3021 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3022 #endif 3023 return FTN_FALSE; 3024 } 3025 3026 #else // KMP_USE_DYNAMIC_LOCK 3027 3028 kmp_user_lock_p lck; 3029 int rc; 3030 3031 if ((__kmp_user_lock_kind == lk_tas) && 3032 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3033 lck = (kmp_user_lock_p)user_lock; 3034 } 3035 #if KMP_USE_FUTEX 3036 else if ((__kmp_user_lock_kind == lk_futex) && 3037 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3038 lck = (kmp_user_lock_p)user_lock; 3039 } 3040 #endif 3041 else { 3042 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3043 } 3044 3045 #if USE_ITT_BUILD 3046 __kmp_itt_lock_acquiring(lck); 3047 #endif /* USE_ITT_BUILD */ 3048 #if OMPT_SUPPORT && OMPT_OPTIONAL 3049 // This is the case, if called from omp_init_lock_with_hint: 3050 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3051 if (!codeptr) 3052 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3053 if (ompt_enabled.ompt_callback_mutex_acquire) { 3054 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3055 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3056 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3057 } 3058 #endif 3059 3060 rc = TEST_LOCK(lck, gtid); 3061 #if USE_ITT_BUILD 3062 if (rc) { 3063 __kmp_itt_lock_acquired(lck); 3064 } else { 3065 __kmp_itt_lock_cancelled(lck); 3066 } 3067 #endif /* USE_ITT_BUILD */ 3068 #if OMPT_SUPPORT && OMPT_OPTIONAL 3069 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3070 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3071 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3072 } 3073 #endif 3074 3075 return (rc ? FTN_TRUE : FTN_FALSE); 3076 3077 /* Can't use serial interval since not block structured */ 3078 3079 #endif // KMP_USE_DYNAMIC_LOCK 3080 } 3081 3082 /* try to acquire the lock */ 3083 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3084 #if KMP_USE_DYNAMIC_LOCK 3085 int rc; 3086 #if USE_ITT_BUILD 3087 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3088 #endif 3089 #if OMPT_SUPPORT && OMPT_OPTIONAL 3090 // This is the case, if called from omp_init_lock_with_hint: 3091 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3092 if (!codeptr) 3093 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3094 if (ompt_enabled.ompt_callback_mutex_acquire) { 3095 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3096 ompt_mutex_nest_lock, omp_lock_hint_none, 3097 __ompt_get_mutex_impl_type(user_lock), 3098 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3099 } 3100 #endif 3101 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3102 #if USE_ITT_BUILD 3103 if (rc) { 3104 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3105 } else { 3106 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3107 } 3108 #endif 3109 #if OMPT_SUPPORT && OMPT_OPTIONAL 3110 if (ompt_enabled.enabled && rc) { 3111 if (rc == 1) { 3112 if (ompt_enabled.ompt_callback_mutex_acquired) { 3113 // lock_first 3114 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3115 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3116 codeptr); 3117 } 3118 } else { 3119 if (ompt_enabled.ompt_callback_nest_lock) { 3120 // lock_next 3121 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3122 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3123 } 3124 } 3125 } 3126 #endif 3127 return rc; 3128 3129 #else // KMP_USE_DYNAMIC_LOCK 3130 3131 kmp_user_lock_p lck; 3132 int rc; 3133 3134 if ((__kmp_user_lock_kind == lk_tas) && 3135 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3136 OMP_NEST_LOCK_T_SIZE)) { 3137 lck = (kmp_user_lock_p)user_lock; 3138 } 3139 #if KMP_USE_FUTEX 3140 else if ((__kmp_user_lock_kind == lk_futex) && 3141 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3142 OMP_NEST_LOCK_T_SIZE)) { 3143 lck = (kmp_user_lock_p)user_lock; 3144 } 3145 #endif 3146 else { 3147 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3148 } 3149 3150 #if USE_ITT_BUILD 3151 __kmp_itt_lock_acquiring(lck); 3152 #endif /* USE_ITT_BUILD */ 3153 3154 #if OMPT_SUPPORT && OMPT_OPTIONAL 3155 // This is the case, if called from omp_init_lock_with_hint: 3156 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3157 if (!codeptr) 3158 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3159 if (ompt_enabled.enabled) && 3160 ompt_enabled.ompt_callback_mutex_acquire) { 3161 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3162 ompt_mutex_nest_lock, omp_lock_hint_none, 3163 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3164 codeptr); 3165 } 3166 #endif 3167 3168 rc = TEST_NESTED_LOCK(lck, gtid); 3169 #if USE_ITT_BUILD 3170 if (rc) { 3171 __kmp_itt_lock_acquired(lck); 3172 } else { 3173 __kmp_itt_lock_cancelled(lck); 3174 } 3175 #endif /* USE_ITT_BUILD */ 3176 #if OMPT_SUPPORT && OMPT_OPTIONAL 3177 if (ompt_enabled.enabled && rc) { 3178 if (rc == 1) { 3179 if (ompt_enabled.ompt_callback_mutex_acquired) { 3180 // lock_first 3181 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3182 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3183 } 3184 } else { 3185 if (ompt_enabled.ompt_callback_nest_lock) { 3186 // lock_next 3187 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3188 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3189 } 3190 } 3191 } 3192 #endif 3193 return rc; 3194 3195 /* Can't use serial interval since not block structured */ 3196 3197 #endif // KMP_USE_DYNAMIC_LOCK 3198 } 3199 3200 // Interface to fast scalable reduce methods routines 3201 3202 // keep the selected method in a thread local structure for cross-function 3203 // usage: will be used in __kmpc_end_reduce* functions; 3204 // another solution: to re-determine the method one more time in 3205 // __kmpc_end_reduce* functions (new prototype required then) 3206 // AT: which solution is better? 3207 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3208 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3209 3210 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3211 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3212 3213 // description of the packed_reduction_method variable: look at the macros in 3214 // kmp.h 3215 3216 // used in a critical section reduce block 3217 static __forceinline void 3218 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3219 kmp_critical_name *crit) { 3220 3221 // this lock was visible to a customer and to the threading profile tool as a 3222 // serial overhead span (although it's used for an internal purpose only) 3223 // why was it visible in previous implementation? 3224 // should we keep it visible in new reduce block? 3225 kmp_user_lock_p lck; 3226 3227 #if KMP_USE_DYNAMIC_LOCK 3228 3229 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3230 // Check if it is initialized. 3231 if (*lk == 0) { 3232 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3233 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3234 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3235 } else { 3236 __kmp_init_indirect_csptr(crit, loc, global_tid, 3237 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3238 } 3239 } 3240 // Branch for accessing the actual lock object and set operation. This 3241 // branching is inevitable since this lock initialization does not follow the 3242 // normal dispatch path (lock table is not used). 3243 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3244 lck = (kmp_user_lock_p)lk; 3245 KMP_DEBUG_ASSERT(lck != NULL); 3246 if (__kmp_env_consistency_check) { 3247 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3248 } 3249 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3250 } else { 3251 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3252 lck = ilk->lock; 3253 KMP_DEBUG_ASSERT(lck != NULL); 3254 if (__kmp_env_consistency_check) { 3255 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3256 } 3257 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3258 } 3259 3260 #else // KMP_USE_DYNAMIC_LOCK 3261 3262 // We know that the fast reduction code is only emitted by Intel compilers 3263 // with 32 byte critical sections. If there isn't enough space, then we 3264 // have to use a pointer. 3265 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3266 lck = (kmp_user_lock_p)crit; 3267 } else { 3268 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3269 } 3270 KMP_DEBUG_ASSERT(lck != NULL); 3271 3272 if (__kmp_env_consistency_check) 3273 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3274 3275 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3276 3277 #endif // KMP_USE_DYNAMIC_LOCK 3278 } 3279 3280 // used in a critical section reduce block 3281 static __forceinline void 3282 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3283 kmp_critical_name *crit) { 3284 3285 kmp_user_lock_p lck; 3286 3287 #if KMP_USE_DYNAMIC_LOCK 3288 3289 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3290 lck = (kmp_user_lock_p)crit; 3291 if (__kmp_env_consistency_check) 3292 __kmp_pop_sync(global_tid, ct_critical, loc); 3293 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3294 } else { 3295 kmp_indirect_lock_t *ilk = 3296 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3297 if (__kmp_env_consistency_check) 3298 __kmp_pop_sync(global_tid, ct_critical, loc); 3299 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3300 } 3301 3302 #else // KMP_USE_DYNAMIC_LOCK 3303 3304 // We know that the fast reduction code is only emitted by Intel compilers 3305 // with 32 byte critical sections. If there isn't enough space, then we have 3306 // to use a pointer. 3307 if (__kmp_base_user_lock_size > 32) { 3308 lck = *((kmp_user_lock_p *)crit); 3309 KMP_ASSERT(lck != NULL); 3310 } else { 3311 lck = (kmp_user_lock_p)crit; 3312 } 3313 3314 if (__kmp_env_consistency_check) 3315 __kmp_pop_sync(global_tid, ct_critical, loc); 3316 3317 __kmp_release_user_lock_with_checks(lck, global_tid); 3318 3319 #endif // KMP_USE_DYNAMIC_LOCK 3320 } // __kmp_end_critical_section_reduce_block 3321 3322 static __forceinline int 3323 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3324 int *task_state) { 3325 kmp_team_t *team; 3326 3327 // Check if we are inside the teams construct? 3328 if (th->th.th_teams_microtask) { 3329 *team_p = team = th->th.th_team; 3330 if (team->t.t_level == th->th.th_teams_level) { 3331 // This is reduction at teams construct. 3332 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3333 // Let's swap teams temporarily for the reduction. 3334 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3335 th->th.th_team = team->t.t_parent; 3336 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3337 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3338 *task_state = th->th.th_task_state; 3339 th->th.th_task_state = 0; 3340 3341 return 1; 3342 } 3343 } 3344 return 0; 3345 } 3346 3347 static __forceinline void 3348 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3349 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3350 th->th.th_info.ds.ds_tid = 0; 3351 th->th.th_team = team; 3352 th->th.th_team_nproc = team->t.t_nproc; 3353 th->th.th_task_team = team->t.t_task_team[task_state]; 3354 th->th.th_task_state = task_state; 3355 } 3356 3357 /* 2.a.i. Reduce Block without a terminating barrier */ 3358 /*! 3359 @ingroup SYNCHRONIZATION 3360 @param loc source location information 3361 @param global_tid global thread number 3362 @param num_vars number of items (variables) to be reduced 3363 @param reduce_size size of data in bytes to be reduced 3364 @param reduce_data pointer to data to be reduced 3365 @param reduce_func callback function providing reduction operation on two 3366 operands and returning result of reduction in lhs_data 3367 @param lck pointer to the unique lock data structure 3368 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3369 threads if atomic reduction needed 3370 3371 The nowait version is used for a reduce clause with the nowait argument. 3372 */ 3373 kmp_int32 3374 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3375 size_t reduce_size, void *reduce_data, 3376 void (*reduce_func)(void *lhs_data, void *rhs_data), 3377 kmp_critical_name *lck) { 3378 3379 KMP_COUNT_BLOCK(REDUCE_nowait); 3380 int retval = 0; 3381 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3382 kmp_info_t *th; 3383 kmp_team_t *team; 3384 int teams_swapped = 0, task_state; 3385 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3386 3387 // why do we need this initialization here at all? 3388 // Reduction clause can not be used as a stand-alone directive. 3389 3390 // do not call __kmp_serial_initialize(), it will be called by 3391 // __kmp_parallel_initialize() if needed 3392 // possible detection of false-positive race by the threadchecker ??? 3393 if (!TCR_4(__kmp_init_parallel)) 3394 __kmp_parallel_initialize(); 3395 3396 __kmp_resume_if_soft_paused(); 3397 3398 // check correctness of reduce block nesting 3399 #if KMP_USE_DYNAMIC_LOCK 3400 if (__kmp_env_consistency_check) 3401 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3402 #else 3403 if (__kmp_env_consistency_check) 3404 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3405 #endif 3406 3407 th = __kmp_thread_from_gtid(global_tid); 3408 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3409 3410 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3411 // the value should be kept in a variable 3412 // the variable should be either a construct-specific or thread-specific 3413 // property, not a team specific property 3414 // (a thread can reach the next reduce block on the next construct, reduce 3415 // method may differ on the next construct) 3416 // an ident_t "loc" parameter could be used as a construct-specific property 3417 // (what if loc == 0?) 3418 // (if both construct-specific and team-specific variables were shared, 3419 // then unness extra syncs should be needed) 3420 // a thread-specific variable is better regarding two issues above (next 3421 // construct and extra syncs) 3422 // a thread-specific "th_local.reduction_method" variable is used currently 3423 // each thread executes 'determine' and 'set' lines (no need to execute by one 3424 // thread, to avoid unness extra syncs) 3425 3426 packed_reduction_method = __kmp_determine_reduction_method( 3427 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3428 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3429 3430 if (packed_reduction_method == critical_reduce_block) { 3431 3432 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3433 retval = 1; 3434 3435 } else if (packed_reduction_method == empty_reduce_block) { 3436 3437 // usage: if team size == 1, no synchronization is required ( Intel 3438 // platforms only ) 3439 retval = 1; 3440 3441 } else if (packed_reduction_method == atomic_reduce_block) { 3442 3443 retval = 2; 3444 3445 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3446 // won't be called by the code gen) 3447 // (it's not quite good, because the checking block has been closed by 3448 // this 'pop', 3449 // but atomic operation has not been executed yet, will be executed 3450 // slightly later, literally on next instruction) 3451 if (__kmp_env_consistency_check) 3452 __kmp_pop_sync(global_tid, ct_reduce, loc); 3453 3454 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3455 tree_reduce_block)) { 3456 3457 // AT: performance issue: a real barrier here 3458 // AT: (if master goes slow, other threads are blocked here waiting for the 3459 // master to come and release them) 3460 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3461 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3462 // be confusing to a customer) 3463 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3464 // might go faster and be more in line with sense of NOWAIT 3465 // AT: TO DO: do epcc test and compare times 3466 3467 // this barrier should be invisible to a customer and to the threading profile 3468 // tool (it's neither a terminating barrier nor customer's code, it's 3469 // used for an internal purpose) 3470 #if OMPT_SUPPORT 3471 // JP: can this barrier potentially leed to task scheduling? 3472 // JP: as long as there is a barrier in the implementation, OMPT should and 3473 // will provide the barrier events 3474 // so we set-up the necessary frame/return addresses. 3475 ompt_frame_t *ompt_frame; 3476 if (ompt_enabled.enabled) { 3477 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3478 if (ompt_frame->enter_frame.ptr == NULL) 3479 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3480 OMPT_STORE_RETURN_ADDRESS(global_tid); 3481 } 3482 #endif 3483 #if USE_ITT_NOTIFY 3484 __kmp_threads[global_tid]->th.th_ident = loc; 3485 #endif 3486 retval = 3487 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3488 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3489 retval = (retval != 0) ? (0) : (1); 3490 #if OMPT_SUPPORT && OMPT_OPTIONAL 3491 if (ompt_enabled.enabled) { 3492 ompt_frame->enter_frame = ompt_data_none; 3493 } 3494 #endif 3495 3496 // all other workers except master should do this pop here 3497 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3498 if (__kmp_env_consistency_check) { 3499 if (retval == 0) { 3500 __kmp_pop_sync(global_tid, ct_reduce, loc); 3501 } 3502 } 3503 3504 } else { 3505 3506 // should never reach this block 3507 KMP_ASSERT(0); // "unexpected method" 3508 } 3509 if (teams_swapped) { 3510 __kmp_restore_swapped_teams(th, team, task_state); 3511 } 3512 KA_TRACE( 3513 10, 3514 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3515 global_tid, packed_reduction_method, retval)); 3516 3517 return retval; 3518 } 3519 3520 /*! 3521 @ingroup SYNCHRONIZATION 3522 @param loc source location information 3523 @param global_tid global thread id. 3524 @param lck pointer to the unique lock data structure 3525 3526 Finish the execution of a reduce nowait. 3527 */ 3528 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3529 kmp_critical_name *lck) { 3530 3531 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3532 3533 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3534 3535 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3536 3537 if (packed_reduction_method == critical_reduce_block) { 3538 3539 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3540 3541 } else if (packed_reduction_method == empty_reduce_block) { 3542 3543 // usage: if team size == 1, no synchronization is required ( on Intel 3544 // platforms only ) 3545 3546 } else if (packed_reduction_method == atomic_reduce_block) { 3547 3548 // neither master nor other workers should get here 3549 // (code gen does not generate this call in case 2: atomic reduce block) 3550 // actually it's better to remove this elseif at all; 3551 // after removal this value will checked by the 'else' and will assert 3552 3553 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3554 tree_reduce_block)) { 3555 3556 // only master gets here 3557 3558 } else { 3559 3560 // should never reach this block 3561 KMP_ASSERT(0); // "unexpected method" 3562 } 3563 3564 if (__kmp_env_consistency_check) 3565 __kmp_pop_sync(global_tid, ct_reduce, loc); 3566 3567 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3568 global_tid, packed_reduction_method)); 3569 3570 return; 3571 } 3572 3573 /* 2.a.ii. Reduce Block with a terminating barrier */ 3574 3575 /*! 3576 @ingroup SYNCHRONIZATION 3577 @param loc source location information 3578 @param global_tid global thread number 3579 @param num_vars number of items (variables) to be reduced 3580 @param reduce_size size of data in bytes to be reduced 3581 @param reduce_data pointer to data to be reduced 3582 @param reduce_func callback function providing reduction operation on two 3583 operands and returning result of reduction in lhs_data 3584 @param lck pointer to the unique lock data structure 3585 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3586 threads if atomic reduction needed 3587 3588 A blocking reduce that includes an implicit barrier. 3589 */ 3590 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3591 size_t reduce_size, void *reduce_data, 3592 void (*reduce_func)(void *lhs_data, void *rhs_data), 3593 kmp_critical_name *lck) { 3594 KMP_COUNT_BLOCK(REDUCE_wait); 3595 int retval = 0; 3596 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3597 kmp_info_t *th; 3598 kmp_team_t *team; 3599 int teams_swapped = 0, task_state; 3600 3601 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3602 3603 // why do we need this initialization here at all? 3604 // Reduction clause can not be a stand-alone directive. 3605 3606 // do not call __kmp_serial_initialize(), it will be called by 3607 // __kmp_parallel_initialize() if needed 3608 // possible detection of false-positive race by the threadchecker ??? 3609 if (!TCR_4(__kmp_init_parallel)) 3610 __kmp_parallel_initialize(); 3611 3612 __kmp_resume_if_soft_paused(); 3613 3614 // check correctness of reduce block nesting 3615 #if KMP_USE_DYNAMIC_LOCK 3616 if (__kmp_env_consistency_check) 3617 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3618 #else 3619 if (__kmp_env_consistency_check) 3620 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3621 #endif 3622 3623 th = __kmp_thread_from_gtid(global_tid); 3624 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3625 3626 packed_reduction_method = __kmp_determine_reduction_method( 3627 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3628 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3629 3630 if (packed_reduction_method == critical_reduce_block) { 3631 3632 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3633 retval = 1; 3634 3635 } else if (packed_reduction_method == empty_reduce_block) { 3636 3637 // usage: if team size == 1, no synchronization is required ( Intel 3638 // platforms only ) 3639 retval = 1; 3640 3641 } else if (packed_reduction_method == atomic_reduce_block) { 3642 3643 retval = 2; 3644 3645 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3646 tree_reduce_block)) { 3647 3648 // case tree_reduce_block: 3649 // this barrier should be visible to a customer and to the threading profile 3650 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3651 #if OMPT_SUPPORT 3652 ompt_frame_t *ompt_frame; 3653 if (ompt_enabled.enabled) { 3654 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3655 if (ompt_frame->enter_frame.ptr == NULL) 3656 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3657 OMPT_STORE_RETURN_ADDRESS(global_tid); 3658 } 3659 #endif 3660 #if USE_ITT_NOTIFY 3661 __kmp_threads[global_tid]->th.th_ident = 3662 loc; // needed for correct notification of frames 3663 #endif 3664 retval = 3665 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3666 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3667 retval = (retval != 0) ? (0) : (1); 3668 #if OMPT_SUPPORT && OMPT_OPTIONAL 3669 if (ompt_enabled.enabled) { 3670 ompt_frame->enter_frame = ompt_data_none; 3671 } 3672 #endif 3673 3674 // all other workers except master should do this pop here 3675 // ( none of other workers except master will enter __kmpc_end_reduce() ) 3676 if (__kmp_env_consistency_check) { 3677 if (retval == 0) { // 0: all other workers; 1: master 3678 __kmp_pop_sync(global_tid, ct_reduce, loc); 3679 } 3680 } 3681 3682 } else { 3683 3684 // should never reach this block 3685 KMP_ASSERT(0); // "unexpected method" 3686 } 3687 if (teams_swapped) { 3688 __kmp_restore_swapped_teams(th, team, task_state); 3689 } 3690 3691 KA_TRACE(10, 3692 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3693 global_tid, packed_reduction_method, retval)); 3694 return retval; 3695 } 3696 3697 /*! 3698 @ingroup SYNCHRONIZATION 3699 @param loc source location information 3700 @param global_tid global thread id. 3701 @param lck pointer to the unique lock data structure 3702 3703 Finish the execution of a blocking reduce. 3704 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3705 start function. 3706 */ 3707 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3708 kmp_critical_name *lck) { 3709 3710 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3711 kmp_info_t *th; 3712 kmp_team_t *team; 3713 int teams_swapped = 0, task_state; 3714 3715 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3716 3717 th = __kmp_thread_from_gtid(global_tid); 3718 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3719 3720 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3721 3722 // this barrier should be visible to a customer and to the threading profile 3723 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3724 3725 if (packed_reduction_method == critical_reduce_block) { 3726 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3727 3728 // TODO: implicit barrier: should be exposed 3729 #if OMPT_SUPPORT 3730 ompt_frame_t *ompt_frame; 3731 if (ompt_enabled.enabled) { 3732 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3733 if (ompt_frame->enter_frame.ptr == NULL) 3734 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3735 OMPT_STORE_RETURN_ADDRESS(global_tid); 3736 } 3737 #endif 3738 #if USE_ITT_NOTIFY 3739 __kmp_threads[global_tid]->th.th_ident = loc; 3740 #endif 3741 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3742 #if OMPT_SUPPORT && OMPT_OPTIONAL 3743 if (ompt_enabled.enabled) { 3744 ompt_frame->enter_frame = ompt_data_none; 3745 } 3746 #endif 3747 3748 } else if (packed_reduction_method == empty_reduce_block) { 3749 3750 // usage: if team size==1, no synchronization is required (Intel platforms only) 3751 3752 // TODO: implicit barrier: should be exposed 3753 #if OMPT_SUPPORT 3754 ompt_frame_t *ompt_frame; 3755 if (ompt_enabled.enabled) { 3756 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3757 if (ompt_frame->enter_frame.ptr == NULL) 3758 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3759 OMPT_STORE_RETURN_ADDRESS(global_tid); 3760 } 3761 #endif 3762 #if USE_ITT_NOTIFY 3763 __kmp_threads[global_tid]->th.th_ident = loc; 3764 #endif 3765 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3766 #if OMPT_SUPPORT && OMPT_OPTIONAL 3767 if (ompt_enabled.enabled) { 3768 ompt_frame->enter_frame = ompt_data_none; 3769 } 3770 #endif 3771 3772 } else if (packed_reduction_method == atomic_reduce_block) { 3773 3774 #if OMPT_SUPPORT 3775 ompt_frame_t *ompt_frame; 3776 if (ompt_enabled.enabled) { 3777 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3778 if (ompt_frame->enter_frame.ptr == NULL) 3779 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3780 OMPT_STORE_RETURN_ADDRESS(global_tid); 3781 } 3782 #endif 3783 // TODO: implicit barrier: should be exposed 3784 #if USE_ITT_NOTIFY 3785 __kmp_threads[global_tid]->th.th_ident = loc; 3786 #endif 3787 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3788 #if OMPT_SUPPORT && OMPT_OPTIONAL 3789 if (ompt_enabled.enabled) { 3790 ompt_frame->enter_frame = ompt_data_none; 3791 } 3792 #endif 3793 3794 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3795 tree_reduce_block)) { 3796 3797 // only master executes here (master releases all other workers) 3798 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3799 global_tid); 3800 3801 } else { 3802 3803 // should never reach this block 3804 KMP_ASSERT(0); // "unexpected method" 3805 } 3806 if (teams_swapped) { 3807 __kmp_restore_swapped_teams(th, team, task_state); 3808 } 3809 3810 if (__kmp_env_consistency_check) 3811 __kmp_pop_sync(global_tid, ct_reduce, loc); 3812 3813 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3814 global_tid, packed_reduction_method)); 3815 3816 return; 3817 } 3818 3819 #undef __KMP_GET_REDUCTION_METHOD 3820 #undef __KMP_SET_REDUCTION_METHOD 3821 3822 /* end of interface to fast scalable reduce routines */ 3823 3824 kmp_uint64 __kmpc_get_taskid() { 3825 3826 kmp_int32 gtid; 3827 kmp_info_t *thread; 3828 3829 gtid = __kmp_get_gtid(); 3830 if (gtid < 0) { 3831 return 0; 3832 } 3833 thread = __kmp_thread_from_gtid(gtid); 3834 return thread->th.th_current_task->td_task_id; 3835 3836 } // __kmpc_get_taskid 3837 3838 kmp_uint64 __kmpc_get_parent_taskid() { 3839 3840 kmp_int32 gtid; 3841 kmp_info_t *thread; 3842 kmp_taskdata_t *parent_task; 3843 3844 gtid = __kmp_get_gtid(); 3845 if (gtid < 0) { 3846 return 0; 3847 } 3848 thread = __kmp_thread_from_gtid(gtid); 3849 parent_task = thread->th.th_current_task->td_parent; 3850 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3851 3852 } // __kmpc_get_parent_taskid 3853 3854 /*! 3855 @ingroup WORK_SHARING 3856 @param loc source location information. 3857 @param gtid global thread number. 3858 @param num_dims number of associated doacross loops. 3859 @param dims info on loops bounds. 3860 3861 Initialize doacross loop information. 3862 Expect compiler send us inclusive bounds, 3863 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3864 */ 3865 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3866 const struct kmp_dim *dims) { 3867 int j, idx; 3868 kmp_int64 last, trace_count; 3869 kmp_info_t *th = __kmp_threads[gtid]; 3870 kmp_team_t *team = th->th.th_team; 3871 kmp_uint32 *flags; 3872 kmp_disp_t *pr_buf = th->th.th_dispatch; 3873 dispatch_shared_info_t *sh_buf; 3874 3875 KA_TRACE( 3876 20, 3877 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 3878 gtid, num_dims, !team->t.t_serialized)); 3879 KMP_DEBUG_ASSERT(dims != NULL); 3880 KMP_DEBUG_ASSERT(num_dims > 0); 3881 3882 if (team->t.t_serialized) { 3883 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 3884 return; // no dependencies if team is serialized 3885 } 3886 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 3887 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 3888 // the next loop 3889 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 3890 3891 // Save bounds info into allocated private buffer 3892 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 3893 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 3894 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 3895 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 3896 pr_buf->th_doacross_info[0] = 3897 (kmp_int64)num_dims; // first element is number of dimensions 3898 // Save also address of num_done in order to access it later without knowing 3899 // the buffer index 3900 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 3901 pr_buf->th_doacross_info[2] = dims[0].lo; 3902 pr_buf->th_doacross_info[3] = dims[0].up; 3903 pr_buf->th_doacross_info[4] = dims[0].st; 3904 last = 5; 3905 for (j = 1; j < num_dims; ++j) { 3906 kmp_int64 3907 range_length; // To keep ranges of all dimensions but the first dims[0] 3908 if (dims[j].st == 1) { // most common case 3909 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 3910 range_length = dims[j].up - dims[j].lo + 1; 3911 } else { 3912 if (dims[j].st > 0) { 3913 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 3914 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 3915 } else { // negative increment 3916 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 3917 range_length = 3918 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 3919 } 3920 } 3921 pr_buf->th_doacross_info[last++] = range_length; 3922 pr_buf->th_doacross_info[last++] = dims[j].lo; 3923 pr_buf->th_doacross_info[last++] = dims[j].up; 3924 pr_buf->th_doacross_info[last++] = dims[j].st; 3925 } 3926 3927 // Compute total trip count. 3928 // Start with range of dims[0] which we don't need to keep in the buffer. 3929 if (dims[0].st == 1) { // most common case 3930 trace_count = dims[0].up - dims[0].lo + 1; 3931 } else if (dims[0].st > 0) { 3932 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 3933 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 3934 } else { // negative increment 3935 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 3936 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 3937 } 3938 for (j = 1; j < num_dims; ++j) { 3939 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 3940 } 3941 KMP_DEBUG_ASSERT(trace_count > 0); 3942 3943 // Check if shared buffer is not occupied by other loop (idx - 3944 // __kmp_dispatch_num_buffers) 3945 if (idx != sh_buf->doacross_buf_idx) { 3946 // Shared buffer is occupied, wait for it to be free 3947 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 3948 __kmp_eq_4, NULL); 3949 } 3950 #if KMP_32_BIT_ARCH 3951 // Check if we are the first thread. After the CAS the first thread gets 0, 3952 // others get 1 if initialization is in progress, allocated pointer otherwise. 3953 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 3954 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 3955 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 3956 #else 3957 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 3958 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 3959 #endif 3960 if (flags == NULL) { 3961 // we are the first thread, allocate the array of flags 3962 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration 3963 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 3964 KMP_MB(); 3965 sh_buf->doacross_flags = flags; 3966 } else if (flags == (kmp_uint32 *)1) { 3967 #if KMP_32_BIT_ARCH 3968 // initialization is still in progress, need to wait 3969 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 3970 #else 3971 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 3972 #endif 3973 KMP_YIELD(TRUE); 3974 KMP_MB(); 3975 } else { 3976 KMP_MB(); 3977 } 3978 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 3979 pr_buf->th_doacross_flags = 3980 sh_buf->doacross_flags; // save private copy in order to not 3981 // touch shared buffer on each iteration 3982 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 3983 } 3984 3985 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 3986 kmp_int32 shft, num_dims, i; 3987 kmp_uint32 flag; 3988 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 3989 kmp_info_t *th = __kmp_threads[gtid]; 3990 kmp_team_t *team = th->th.th_team; 3991 kmp_disp_t *pr_buf; 3992 kmp_int64 lo, up, st; 3993 3994 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 3995 if (team->t.t_serialized) { 3996 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 3997 return; // no dependencies if team is serialized 3998 } 3999 4000 // calculate sequential iteration number and check out-of-bounds condition 4001 pr_buf = th->th.th_dispatch; 4002 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4003 num_dims = pr_buf->th_doacross_info[0]; 4004 lo = pr_buf->th_doacross_info[2]; 4005 up = pr_buf->th_doacross_info[3]; 4006 st = pr_buf->th_doacross_info[4]; 4007 if (st == 1) { // most common case 4008 if (vec[0] < lo || vec[0] > up) { 4009 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4010 "bounds [%lld,%lld]\n", 4011 gtid, vec[0], lo, up)); 4012 return; 4013 } 4014 iter_number = vec[0] - lo; 4015 } else if (st > 0) { 4016 if (vec[0] < lo || vec[0] > up) { 4017 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4018 "bounds [%lld,%lld]\n", 4019 gtid, vec[0], lo, up)); 4020 return; 4021 } 4022 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4023 } else { // negative increment 4024 if (vec[0] > lo || vec[0] < up) { 4025 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4026 "bounds [%lld,%lld]\n", 4027 gtid, vec[0], lo, up)); 4028 return; 4029 } 4030 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4031 } 4032 for (i = 1; i < num_dims; ++i) { 4033 kmp_int64 iter, ln; 4034 kmp_int32 j = i * 4; 4035 ln = pr_buf->th_doacross_info[j + 1]; 4036 lo = pr_buf->th_doacross_info[j + 2]; 4037 up = pr_buf->th_doacross_info[j + 3]; 4038 st = pr_buf->th_doacross_info[j + 4]; 4039 if (st == 1) { 4040 if (vec[i] < lo || vec[i] > up) { 4041 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4042 "bounds [%lld,%lld]\n", 4043 gtid, vec[i], lo, up)); 4044 return; 4045 } 4046 iter = vec[i] - lo; 4047 } else if (st > 0) { 4048 if (vec[i] < lo || vec[i] > up) { 4049 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4050 "bounds [%lld,%lld]\n", 4051 gtid, vec[i], lo, up)); 4052 return; 4053 } 4054 iter = (kmp_uint64)(vec[i] - lo) / st; 4055 } else { // st < 0 4056 if (vec[i] > lo || vec[i] < up) { 4057 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4058 "bounds [%lld,%lld]\n", 4059 gtid, vec[i], lo, up)); 4060 return; 4061 } 4062 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4063 } 4064 iter_number = iter + ln * iter_number; 4065 } 4066 shft = iter_number % 32; // use 32-bit granularity 4067 iter_number >>= 5; // divided by 32 4068 flag = 1 << shft; 4069 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4070 KMP_YIELD(TRUE); 4071 } 4072 KMP_MB(); 4073 KA_TRACE(20, 4074 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4075 gtid, (iter_number << 5) + shft)); 4076 } 4077 4078 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4079 kmp_int32 shft, num_dims, i; 4080 kmp_uint32 flag; 4081 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4082 kmp_info_t *th = __kmp_threads[gtid]; 4083 kmp_team_t *team = th->th.th_team; 4084 kmp_disp_t *pr_buf; 4085 kmp_int64 lo, st; 4086 4087 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4088 if (team->t.t_serialized) { 4089 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4090 return; // no dependencies if team is serialized 4091 } 4092 4093 // calculate sequential iteration number (same as in "wait" but no 4094 // out-of-bounds checks) 4095 pr_buf = th->th.th_dispatch; 4096 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4097 num_dims = pr_buf->th_doacross_info[0]; 4098 lo = pr_buf->th_doacross_info[2]; 4099 st = pr_buf->th_doacross_info[4]; 4100 if (st == 1) { // most common case 4101 iter_number = vec[0] - lo; 4102 } else if (st > 0) { 4103 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4104 } else { // negative increment 4105 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4106 } 4107 for (i = 1; i < num_dims; ++i) { 4108 kmp_int64 iter, ln; 4109 kmp_int32 j = i * 4; 4110 ln = pr_buf->th_doacross_info[j + 1]; 4111 lo = pr_buf->th_doacross_info[j + 2]; 4112 st = pr_buf->th_doacross_info[j + 4]; 4113 if (st == 1) { 4114 iter = vec[i] - lo; 4115 } else if (st > 0) { 4116 iter = (kmp_uint64)(vec[i] - lo) / st; 4117 } else { // st < 0 4118 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4119 } 4120 iter_number = iter + ln * iter_number; 4121 } 4122 shft = iter_number % 32; // use 32-bit granularity 4123 iter_number >>= 5; // divided by 32 4124 flag = 1 << shft; 4125 KMP_MB(); 4126 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4127 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4128 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4129 (iter_number << 5) + shft)); 4130 } 4131 4132 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4133 kmp_int32 num_done; 4134 kmp_info_t *th = __kmp_threads[gtid]; 4135 kmp_team_t *team = th->th.th_team; 4136 kmp_disp_t *pr_buf = th->th.th_dispatch; 4137 4138 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4139 if (team->t.t_serialized) { 4140 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4141 return; // nothing to do 4142 } 4143 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1; 4144 if (num_done == th->th.th_team_nproc) { 4145 // we are the last thread, need to free shared resources 4146 int idx = pr_buf->th_doacross_buf_idx - 1; 4147 dispatch_shared_info_t *sh_buf = 4148 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4149 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4150 (kmp_int64)&sh_buf->doacross_num_done); 4151 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4152 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4153 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4154 sh_buf->doacross_flags = NULL; 4155 sh_buf->doacross_num_done = 0; 4156 sh_buf->doacross_buf_idx += 4157 __kmp_dispatch_num_buffers; // free buffer for future re-use 4158 } 4159 // free private resources (need to keep buffer index forever) 4160 pr_buf->th_doacross_flags = NULL; 4161 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4162 pr_buf->th_doacross_info = NULL; 4163 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4164 } 4165 4166 /* omp_alloc/omp_free only defined for C/C++, not for Fortran */ 4167 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4168 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); 4169 } 4170 4171 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4172 __kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4173 } 4174 4175 int __kmpc_get_target_offload(void) { 4176 if (!__kmp_init_serial) { 4177 __kmp_serial_initialize(); 4178 } 4179 return __kmp_target_offload; 4180 } 4181 4182 int __kmpc_pause_resource(kmp_pause_status_t level) { 4183 if (!__kmp_init_serial) { 4184 return 1; // Can't pause if runtime is not initialized 4185 } 4186 return __kmp_pause_resource(level); 4187 } 4188