1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 43 } else if (__kmp_ignore_mppbeg() == FALSE) { 44 // By default __kmp_ignore_mppbeg() returns TRUE. 45 __kmp_internal_begin(); 46 KC_TRACE(10, ("__kmpc_begin: called\n")); 47 } 48 } 49 50 /*! 51 * @ingroup STARTUP_SHUTDOWN 52 * @param loc source location information 53 * 54 * Shutdown the runtime library. This is also optional, and even if called will 55 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 56 * zero. 57 */ 58 void __kmpc_end(ident_t *loc) { 59 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 60 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 61 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 62 // returns FALSE and __kmpc_end() will unregister this root (it can cause 63 // library shut down). 64 if (__kmp_ignore_mppend() == FALSE) { 65 KC_TRACE(10, ("__kmpc_end: called\n")); 66 KA_TRACE(30, ("__kmpc_end\n")); 67 68 __kmp_internal_end_thread(-1); 69 } 70 #if KMP_OS_WINDOWS && OMPT_SUPPORT 71 // Normal exit process on Windows does not allow worker threads of the final 72 // parallel region to finish reporting their events, so shutting down the 73 // library here fixes the issue at least for the cases where __kmpc_end() is 74 // placed properly. 75 if (ompt_enabled.enabled) 76 __kmp_internal_end_library(__kmp_gtid_get_specific()); 77 #endif 78 } 79 80 /*! 81 @ingroup THREAD_STATES 82 @param loc Source location information. 83 @return The global thread index of the active thread. 84 85 This function can be called in any context. 86 87 If the runtime has ony been entered at the outermost level from a 88 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 89 that which would be returned by omp_get_thread_num() in the outermost 90 active parallel construct. (Or zero if there is no active parallel 91 construct, since the master thread is necessarily thread zero). 92 93 If multiple non-OpenMP threads all enter an OpenMP construct then this 94 will be a unique thread identifier among all the threads created by 95 the OpenMP runtime (but the value cannot be defined in terms of 96 OpenMP thread ids returned by omp_get_thread_num()). 97 */ 98 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 99 kmp_int32 gtid = __kmp_entry_gtid(); 100 101 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 102 103 return gtid; 104 } 105 106 /*! 107 @ingroup THREAD_STATES 108 @param loc Source location information. 109 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 110 111 This function can be called in any context. 112 It returns the total number of threads under the control of the OpenMP runtime. 113 That is not a number that can be determined by any OpenMP standard calls, since 114 the library may be called from more than one non-OpenMP thread, and this 115 reflects the total over all such calls. Similarly the runtime maintains 116 underlying threads even when they are not active (since the cost of creating 117 and destroying OS threads is high), this call counts all such threads even if 118 they are not waiting for work. 119 */ 120 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 121 KC_TRACE(10, 122 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 123 124 return TCR_4(__kmp_all_nth); 125 } 126 127 /*! 128 @ingroup THREAD_STATES 129 @param loc Source location information. 130 @return The thread number of the calling thread in the innermost active parallel 131 construct. 132 */ 133 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 134 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 135 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 136 } 137 138 /*! 139 @ingroup THREAD_STATES 140 @param loc Source location information. 141 @return The number of threads in the innermost active parallel construct. 142 */ 143 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 144 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 145 146 return __kmp_entry_thread()->th.th_team->t.t_nproc; 147 } 148 149 /*! 150 * @ingroup DEPRECATED 151 * @param loc location description 152 * 153 * This function need not be called. It always returns TRUE. 154 */ 155 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 156 #ifndef KMP_DEBUG 157 158 return TRUE; 159 160 #else 161 162 const char *semi2; 163 const char *semi3; 164 int line_no; 165 166 if (__kmp_par_range == 0) { 167 return TRUE; 168 } 169 semi2 = loc->psource; 170 if (semi2 == NULL) { 171 return TRUE; 172 } 173 semi2 = strchr(semi2, ';'); 174 if (semi2 == NULL) { 175 return TRUE; 176 } 177 semi2 = strchr(semi2 + 1, ';'); 178 if (semi2 == NULL) { 179 return TRUE; 180 } 181 if (__kmp_par_range_filename[0]) { 182 const char *name = semi2 - 1; 183 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 184 name--; 185 } 186 if ((*name == '/') || (*name == ';')) { 187 name++; 188 } 189 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 190 return __kmp_par_range < 0; 191 } 192 } 193 semi3 = strchr(semi2 + 1, ';'); 194 if (__kmp_par_range_routine[0]) { 195 if ((semi3 != NULL) && (semi3 > semi2) && 196 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 197 return __kmp_par_range < 0; 198 } 199 } 200 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 201 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 202 return __kmp_par_range > 0; 203 } 204 return __kmp_par_range < 0; 205 } 206 return TRUE; 207 208 #endif /* KMP_DEBUG */ 209 } 210 211 /*! 212 @ingroup THREAD_STATES 213 @param loc Source location information. 214 @return 1 if this thread is executing inside an active parallel region, zero if 215 not. 216 */ 217 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 218 return __kmp_entry_thread()->th.th_root->r.r_active; 219 } 220 221 /*! 222 @ingroup PARALLEL 223 @param loc source location information 224 @param global_tid global thread number 225 @param num_threads number of threads requested for this parallel construct 226 227 Set the number of threads to be used by the next fork spawned by this thread. 228 This call is only required if the parallel construct has a `num_threads` clause. 229 */ 230 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 231 kmp_int32 num_threads) { 232 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 233 global_tid, num_threads)); 234 235 __kmp_push_num_threads(loc, global_tid, num_threads); 236 } 237 238 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 239 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 240 241 /* the num_threads are automatically popped */ 242 } 243 244 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 245 kmp_int32 proc_bind) { 246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 247 proc_bind)); 248 249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 250 } 251 252 /*! 253 @ingroup PARALLEL 254 @param loc source location information 255 @param argc total number of arguments in the ellipsis 256 @param microtask pointer to callback routine consisting of outlined parallel 257 construct 258 @param ... pointers to shared variables that aren't global 259 260 Do the actual fork and call the microtask in the relevant number of threads. 261 */ 262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 263 int gtid = __kmp_entry_gtid(); 264 265 #if (KMP_STATS_ENABLED) 266 // If we were in a serial region, then stop the serial timer, record 267 // the event, and start parallel region timer 268 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 269 if (previous_state == stats_state_e::SERIAL_REGION) { 270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 271 } else { 272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 273 } 274 int inParallel = __kmpc_in_parallel(loc); 275 if (inParallel) { 276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 277 } else { 278 KMP_COUNT_BLOCK(OMP_PARALLEL); 279 } 280 #endif 281 282 // maybe to save thr_state is enough here 283 { 284 va_list ap; 285 va_start(ap, microtask); 286 287 #if OMPT_SUPPORT 288 ompt_frame_t *ompt_frame; 289 if (ompt_enabled.enabled) { 290 kmp_info_t *master_th = __kmp_threads[gtid]; 291 kmp_team_t *parent_team = master_th->th.th_team; 292 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 293 if (lwt) 294 ompt_frame = &(lwt->ompt_task_info.frame); 295 else { 296 int tid = __kmp_tid_from_gtid(gtid); 297 ompt_frame = &( 298 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 299 } 300 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 301 OMPT_STORE_RETURN_ADDRESS(gtid); 302 } 303 #endif 304 305 #if INCLUDE_SSC_MARKS 306 SSC_MARK_FORKING(); 307 #endif 308 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 309 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 310 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 311 kmp_va_addr_of(ap)); 312 #if INCLUDE_SSC_MARKS 313 SSC_MARK_JOINING(); 314 #endif 315 __kmp_join_call(loc, gtid 316 #if OMPT_SUPPORT 317 , 318 fork_context_intel 319 #endif 320 ); 321 322 va_end(ap); 323 } 324 325 #if KMP_STATS_ENABLED 326 if (previous_state == stats_state_e::SERIAL_REGION) { 327 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 328 } else { 329 KMP_POP_PARTITIONED_TIMER(); 330 } 331 #endif // KMP_STATS_ENABLED 332 } 333 334 /*! 335 @ingroup PARALLEL 336 @param loc source location information 337 @param global_tid global thread number 338 @param num_teams number of teams requested for the teams construct 339 @param num_threads number of threads per team requested for the teams construct 340 341 Set the number of teams to be used by the teams construct. 342 This call is only required if the teams construct has a `num_teams` clause 343 or a `thread_limit` clause (or both). 344 */ 345 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 346 kmp_int32 num_teams, kmp_int32 num_threads) { 347 KA_TRACE(20, 348 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 349 global_tid, num_teams, num_threads)); 350 351 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 352 } 353 354 /*! 355 @ingroup PARALLEL 356 @param loc source location information 357 @param argc total number of arguments in the ellipsis 358 @param microtask pointer to callback routine consisting of outlined teams 359 construct 360 @param ... pointers to shared variables that aren't global 361 362 Do the actual fork and call the microtask in the relevant number of threads. 363 */ 364 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 365 ...) { 366 int gtid = __kmp_entry_gtid(); 367 kmp_info_t *this_thr = __kmp_threads[gtid]; 368 va_list ap; 369 va_start(ap, microtask); 370 371 #if KMP_STATS_ENABLED 372 KMP_COUNT_BLOCK(OMP_TEAMS); 373 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 374 if (previous_state == stats_state_e::SERIAL_REGION) { 375 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 376 } else { 377 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 378 } 379 #endif 380 381 // remember teams entry point and nesting level 382 this_thr->th.th_teams_microtask = microtask; 383 this_thr->th.th_teams_level = 384 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 385 386 #if OMPT_SUPPORT 387 kmp_team_t *parent_team = this_thr->th.th_team; 388 int tid = __kmp_tid_from_gtid(gtid); 389 if (ompt_enabled.enabled) { 390 parent_team->t.t_implicit_task_taskdata[tid] 391 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 392 } 393 OMPT_STORE_RETURN_ADDRESS(gtid); 394 #endif 395 396 // check if __kmpc_push_num_teams called, set default number of teams 397 // otherwise 398 if (this_thr->th.th_teams_size.nteams == 0) { 399 __kmp_push_num_teams(loc, gtid, 0, 0); 400 } 401 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 402 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 403 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 404 405 __kmp_fork_call( 406 loc, gtid, fork_context_intel, argc, 407 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 408 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 409 __kmp_join_call(loc, gtid 410 #if OMPT_SUPPORT 411 , 412 fork_context_intel 413 #endif 414 ); 415 416 // Pop current CG root off list 417 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 418 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 419 this_thr->th.th_cg_roots = tmp->up; 420 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 421 " to node %p. cg_nthreads was %d\n", 422 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 423 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 424 int i = tmp->cg_nthreads--; 425 if (i == 1) { // check is we are the last thread in CG (not always the case) 426 __kmp_free(tmp); 427 } 428 // Restore current task's thread_limit from CG root 429 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 430 this_thr->th.th_current_task->td_icvs.thread_limit = 431 this_thr->th.th_cg_roots->cg_thread_limit; 432 433 this_thr->th.th_teams_microtask = NULL; 434 this_thr->th.th_teams_level = 0; 435 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 436 va_end(ap); 437 #if KMP_STATS_ENABLED 438 if (previous_state == stats_state_e::SERIAL_REGION) { 439 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 440 } else { 441 KMP_POP_PARTITIONED_TIMER(); 442 } 443 #endif // KMP_STATS_ENABLED 444 } 445 446 // I don't think this function should ever have been exported. 447 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 448 // openmp code ever called it, but it's been exported from the RTL for so 449 // long that I'm afraid to remove the definition. 450 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 451 452 /*! 453 @ingroup PARALLEL 454 @param loc source location information 455 @param global_tid global thread number 456 457 Enter a serialized parallel construct. This interface is used to handle a 458 conditional parallel region, like this, 459 @code 460 #pragma omp parallel if (condition) 461 @endcode 462 when the condition is false. 463 */ 464 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 465 // The implementation is now in kmp_runtime.cpp so that it can share static 466 // functions with kmp_fork_call since the tasks to be done are similar in 467 // each case. 468 #if OMPT_SUPPORT 469 OMPT_STORE_RETURN_ADDRESS(global_tid); 470 #endif 471 __kmp_serialized_parallel(loc, global_tid); 472 } 473 474 /*! 475 @ingroup PARALLEL 476 @param loc source location information 477 @param global_tid global thread number 478 479 Leave a serialized parallel construct. 480 */ 481 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 482 kmp_internal_control_t *top; 483 kmp_info_t *this_thr; 484 kmp_team_t *serial_team; 485 486 KC_TRACE(10, 487 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 488 489 /* skip all this code for autopar serialized loops since it results in 490 unacceptable overhead */ 491 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 492 return; 493 494 // Not autopar code 495 if (!TCR_4(__kmp_init_parallel)) 496 __kmp_parallel_initialize(); 497 498 __kmp_resume_if_soft_paused(); 499 500 this_thr = __kmp_threads[global_tid]; 501 serial_team = this_thr->th.th_serial_team; 502 503 kmp_task_team_t *task_team = this_thr->th.th_task_team; 504 // we need to wait for the proxy tasks before finishing the thread 505 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 506 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 507 508 KMP_MB(); 509 KMP_DEBUG_ASSERT(serial_team); 510 KMP_ASSERT(serial_team->t.t_serialized); 511 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 512 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 513 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 514 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 515 516 #if OMPT_SUPPORT 517 if (ompt_enabled.enabled && 518 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 519 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 520 if (ompt_enabled.ompt_callback_implicit_task) { 521 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 522 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 523 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 524 } 525 526 // reset clear the task id only after unlinking the task 527 ompt_data_t *parent_task_data; 528 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 529 530 if (ompt_enabled.ompt_callback_parallel_end) { 531 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 532 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 533 ompt_parallel_invoker_program | ompt_parallel_team, 534 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 535 } 536 __ompt_lw_taskteam_unlink(this_thr); 537 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 538 } 539 #endif 540 541 /* If necessary, pop the internal control stack values and replace the team 542 * values */ 543 top = serial_team->t.t_control_stack_top; 544 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 545 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 546 serial_team->t.t_control_stack_top = top->next; 547 __kmp_free(top); 548 } 549 550 // if( serial_team -> t.t_serialized > 1 ) 551 serial_team->t.t_level--; 552 553 /* pop dispatch buffers stack */ 554 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 555 { 556 dispatch_private_info_t *disp_buffer = 557 serial_team->t.t_dispatch->th_disp_buffer; 558 serial_team->t.t_dispatch->th_disp_buffer = 559 serial_team->t.t_dispatch->th_disp_buffer->next; 560 __kmp_free(disp_buffer); 561 } 562 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 563 564 --serial_team->t.t_serialized; 565 if (serial_team->t.t_serialized == 0) { 566 567 /* return to the parallel section */ 568 569 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 570 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 571 __kmp_clear_x87_fpu_status_word(); 572 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 573 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 574 } 575 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 576 577 this_thr->th.th_team = serial_team->t.t_parent; 578 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 579 580 /* restore values cached in the thread */ 581 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 582 this_thr->th.th_team_master = 583 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 584 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 585 586 /* TODO the below shouldn't need to be adjusted for serialized teams */ 587 this_thr->th.th_dispatch = 588 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 589 590 __kmp_pop_current_task_from_thread(this_thr); 591 592 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 593 this_thr->th.th_current_task->td_flags.executing = 1; 594 595 if (__kmp_tasking_mode != tskm_immediate_exec) { 596 // Copy the task team from the new child / old parent team to the thread. 597 this_thr->th.th_task_team = 598 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 599 KA_TRACE(20, 600 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 601 "team %p\n", 602 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 603 } 604 } else { 605 if (__kmp_tasking_mode != tskm_immediate_exec) { 606 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 607 "depth of serial team %p to %d\n", 608 global_tid, serial_team, serial_team->t.t_serialized)); 609 } 610 } 611 612 if (__kmp_env_consistency_check) 613 __kmp_pop_parallel(global_tid, NULL); 614 #if OMPT_SUPPORT 615 if (ompt_enabled.enabled) 616 this_thr->th.ompt_thread_info.state = 617 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 618 : ompt_state_work_parallel); 619 #endif 620 } 621 622 /*! 623 @ingroup SYNCHRONIZATION 624 @param loc source location information. 625 626 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 627 depending on the memory ordering convention obeyed by the compiler 628 even that may not be necessary). 629 */ 630 void __kmpc_flush(ident_t *loc) { 631 KC_TRACE(10, ("__kmpc_flush: called\n")); 632 633 /* need explicit __mf() here since use volatile instead in library */ 634 KMP_MB(); /* Flush all pending memory write invalidates. */ 635 636 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 637 #if KMP_MIC 638 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 639 // We shouldn't need it, though, since the ABI rules require that 640 // * If the compiler generates NGO stores it also generates the fence 641 // * If users hand-code NGO stores they should insert the fence 642 // therefore no incomplete unordered stores should be visible. 643 #else 644 // C74404 645 // This is to address non-temporal store instructions (sfence needed). 646 // The clflush instruction is addressed either (mfence needed). 647 // Probably the non-temporal load monvtdqa instruction should also be 648 // addressed. 649 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 650 if (!__kmp_cpuinfo.initialized) { 651 __kmp_query_cpuid(&__kmp_cpuinfo); 652 } 653 if (!__kmp_cpuinfo.sse2) { 654 // CPU cannot execute SSE2 instructions. 655 } else { 656 #if KMP_COMPILER_ICC 657 _mm_mfence(); 658 #elif KMP_COMPILER_MSVC 659 MemoryBarrier(); 660 #else 661 __sync_synchronize(); 662 #endif // KMP_COMPILER_ICC 663 } 664 #endif // KMP_MIC 665 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 666 KMP_ARCH_RISCV64) 667 // Nothing to see here move along 668 #elif KMP_ARCH_PPC64 669 // Nothing needed here (we have a real MB above). 670 #if KMP_OS_CNK 671 // The flushing thread needs to yield here; this prevents a 672 // busy-waiting thread from saturating the pipeline. flush is 673 // often used in loops like this: 674 // while (!flag) { 675 // #pragma omp flush(flag) 676 // } 677 // and adding the yield here is good for at least a 10x speedup 678 // when running >2 threads per core (on the NAS LU benchmark). 679 __kmp_yield(); 680 #endif 681 #else 682 #error Unknown or unsupported architecture 683 #endif 684 685 #if OMPT_SUPPORT && OMPT_OPTIONAL 686 if (ompt_enabled.ompt_callback_flush) { 687 ompt_callbacks.ompt_callback(ompt_callback_flush)( 688 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 689 } 690 #endif 691 } 692 693 /* -------------------------------------------------------------------------- */ 694 /*! 695 @ingroup SYNCHRONIZATION 696 @param loc source location information 697 @param global_tid thread id. 698 699 Execute a barrier. 700 */ 701 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 702 KMP_COUNT_BLOCK(OMP_BARRIER); 703 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 704 705 if (!TCR_4(__kmp_init_parallel)) 706 __kmp_parallel_initialize(); 707 708 __kmp_resume_if_soft_paused(); 709 710 if (__kmp_env_consistency_check) { 711 if (loc == 0) { 712 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 713 } 714 __kmp_check_barrier(global_tid, ct_barrier, loc); 715 } 716 717 #if OMPT_SUPPORT 718 ompt_frame_t *ompt_frame; 719 if (ompt_enabled.enabled) { 720 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 721 if (ompt_frame->enter_frame.ptr == NULL) 722 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 723 OMPT_STORE_RETURN_ADDRESS(global_tid); 724 } 725 #endif 726 __kmp_threads[global_tid]->th.th_ident = loc; 727 // TODO: explicit barrier_wait_id: 728 // this function is called when 'barrier' directive is present or 729 // implicit barrier at the end of a worksharing construct. 730 // 1) better to add a per-thread barrier counter to a thread data structure 731 // 2) set to 0 when a new team is created 732 // 4) no sync is required 733 734 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 735 #if OMPT_SUPPORT && OMPT_OPTIONAL 736 if (ompt_enabled.enabled) { 737 ompt_frame->enter_frame = ompt_data_none; 738 } 739 #endif 740 } 741 742 /* The BARRIER for a MASTER section is always explicit */ 743 /*! 744 @ingroup WORK_SHARING 745 @param loc source location information. 746 @param global_tid global thread number . 747 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 748 */ 749 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 750 int status = 0; 751 752 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 753 754 if (!TCR_4(__kmp_init_parallel)) 755 __kmp_parallel_initialize(); 756 757 __kmp_resume_if_soft_paused(); 758 759 if (KMP_MASTER_GTID(global_tid)) { 760 KMP_COUNT_BLOCK(OMP_MASTER); 761 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 762 status = 1; 763 } 764 765 #if OMPT_SUPPORT && OMPT_OPTIONAL 766 if (status) { 767 if (ompt_enabled.ompt_callback_master) { 768 kmp_info_t *this_thr = __kmp_threads[global_tid]; 769 kmp_team_t *team = this_thr->th.th_team; 770 771 int tid = __kmp_tid_from_gtid(global_tid); 772 ompt_callbacks.ompt_callback(ompt_callback_master)( 773 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 774 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 775 OMPT_GET_RETURN_ADDRESS(0)); 776 } 777 } 778 #endif 779 780 if (__kmp_env_consistency_check) { 781 #if KMP_USE_DYNAMIC_LOCK 782 if (status) 783 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 784 else 785 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 786 #else 787 if (status) 788 __kmp_push_sync(global_tid, ct_master, loc, NULL); 789 else 790 __kmp_check_sync(global_tid, ct_master, loc, NULL); 791 #endif 792 } 793 794 return status; 795 } 796 797 /*! 798 @ingroup WORK_SHARING 799 @param loc source location information. 800 @param global_tid global thread number . 801 802 Mark the end of a <tt>master</tt> region. This should only be called by the 803 thread that executes the <tt>master</tt> region. 804 */ 805 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 806 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 807 808 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 809 KMP_POP_PARTITIONED_TIMER(); 810 811 #if OMPT_SUPPORT && OMPT_OPTIONAL 812 kmp_info_t *this_thr = __kmp_threads[global_tid]; 813 kmp_team_t *team = this_thr->th.th_team; 814 if (ompt_enabled.ompt_callback_master) { 815 int tid = __kmp_tid_from_gtid(global_tid); 816 ompt_callbacks.ompt_callback(ompt_callback_master)( 817 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 818 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 819 OMPT_GET_RETURN_ADDRESS(0)); 820 } 821 #endif 822 823 if (__kmp_env_consistency_check) { 824 if (global_tid < 0) 825 KMP_WARNING(ThreadIdentInvalid); 826 827 if (KMP_MASTER_GTID(global_tid)) 828 __kmp_pop_sync(global_tid, ct_master, loc); 829 } 830 } 831 832 /*! 833 @ingroup WORK_SHARING 834 @param loc source location information. 835 @param gtid global thread number. 836 837 Start execution of an <tt>ordered</tt> construct. 838 */ 839 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 840 int cid = 0; 841 kmp_info_t *th; 842 KMP_DEBUG_ASSERT(__kmp_init_serial); 843 844 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 845 846 if (!TCR_4(__kmp_init_parallel)) 847 __kmp_parallel_initialize(); 848 849 __kmp_resume_if_soft_paused(); 850 851 #if USE_ITT_BUILD 852 __kmp_itt_ordered_prep(gtid); 853 // TODO: ordered_wait_id 854 #endif /* USE_ITT_BUILD */ 855 856 th = __kmp_threads[gtid]; 857 858 #if OMPT_SUPPORT && OMPT_OPTIONAL 859 kmp_team_t *team; 860 ompt_wait_id_t lck; 861 void *codeptr_ra; 862 if (ompt_enabled.enabled) { 863 OMPT_STORE_RETURN_ADDRESS(gtid); 864 team = __kmp_team_from_gtid(gtid); 865 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 866 /* OMPT state update */ 867 th->th.ompt_thread_info.wait_id = lck; 868 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 869 870 /* OMPT event callback */ 871 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 872 if (ompt_enabled.ompt_callback_mutex_acquire) { 873 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 874 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 875 codeptr_ra); 876 } 877 } 878 #endif 879 880 if (th->th.th_dispatch->th_deo_fcn != 0) 881 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 882 else 883 __kmp_parallel_deo(>id, &cid, loc); 884 885 #if OMPT_SUPPORT && OMPT_OPTIONAL 886 if (ompt_enabled.enabled) { 887 /* OMPT state update */ 888 th->th.ompt_thread_info.state = ompt_state_work_parallel; 889 th->th.ompt_thread_info.wait_id = 0; 890 891 /* OMPT event callback */ 892 if (ompt_enabled.ompt_callback_mutex_acquired) { 893 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 894 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 895 } 896 } 897 #endif 898 899 #if USE_ITT_BUILD 900 __kmp_itt_ordered_start(gtid); 901 #endif /* USE_ITT_BUILD */ 902 } 903 904 /*! 905 @ingroup WORK_SHARING 906 @param loc source location information. 907 @param gtid global thread number. 908 909 End execution of an <tt>ordered</tt> construct. 910 */ 911 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 912 int cid = 0; 913 kmp_info_t *th; 914 915 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 916 917 #if USE_ITT_BUILD 918 __kmp_itt_ordered_end(gtid); 919 // TODO: ordered_wait_id 920 #endif /* USE_ITT_BUILD */ 921 922 th = __kmp_threads[gtid]; 923 924 if (th->th.th_dispatch->th_dxo_fcn != 0) 925 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 926 else 927 __kmp_parallel_dxo(>id, &cid, loc); 928 929 #if OMPT_SUPPORT && OMPT_OPTIONAL 930 OMPT_STORE_RETURN_ADDRESS(gtid); 931 if (ompt_enabled.ompt_callback_mutex_released) { 932 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 933 ompt_mutex_ordered, 934 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 935 ->t.t_ordered.dt.t_value, 936 OMPT_LOAD_RETURN_ADDRESS(gtid)); 937 } 938 #endif 939 } 940 941 #if KMP_USE_DYNAMIC_LOCK 942 943 static __forceinline void 944 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 945 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 946 // Pointer to the allocated indirect lock is written to crit, while indexing 947 // is ignored. 948 void *idx; 949 kmp_indirect_lock_t **lck; 950 lck = (kmp_indirect_lock_t **)crit; 951 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 952 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 953 KMP_SET_I_LOCK_LOCATION(ilk, loc); 954 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 955 KA_TRACE(20, 956 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 957 #if USE_ITT_BUILD 958 __kmp_itt_critical_creating(ilk->lock, loc); 959 #endif 960 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 961 if (status == 0) { 962 #if USE_ITT_BUILD 963 __kmp_itt_critical_destroyed(ilk->lock); 964 #endif 965 // We don't really need to destroy the unclaimed lock here since it will be 966 // cleaned up at program exit. 967 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 968 } 969 KMP_DEBUG_ASSERT(*lck != NULL); 970 } 971 972 // Fast-path acquire tas lock 973 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 974 { \ 975 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 976 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 977 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 978 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 979 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 980 kmp_uint32 spins; \ 981 KMP_FSYNC_PREPARE(l); \ 982 KMP_INIT_YIELD(spins); \ 983 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 984 do { \ 985 if (TCR_4(__kmp_nth) > \ 986 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 987 KMP_YIELD(TRUE); \ 988 } else { \ 989 KMP_YIELD_SPIN(spins); \ 990 } \ 991 __kmp_spin_backoff(&backoff); \ 992 } while ( \ 993 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 994 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 995 } \ 996 KMP_FSYNC_ACQUIRED(l); \ 997 } 998 999 // Fast-path test tas lock 1000 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1001 { \ 1002 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1003 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1004 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1005 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1006 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1007 } 1008 1009 // Fast-path release tas lock 1010 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1011 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1012 1013 #if KMP_USE_FUTEX 1014 1015 #include <sys/syscall.h> 1016 #include <unistd.h> 1017 #ifndef FUTEX_WAIT 1018 #define FUTEX_WAIT 0 1019 #endif 1020 #ifndef FUTEX_WAKE 1021 #define FUTEX_WAKE 1 1022 #endif 1023 1024 // Fast-path acquire futex lock 1025 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1026 { \ 1027 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1028 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1029 KMP_MB(); \ 1030 KMP_FSYNC_PREPARE(ftx); \ 1031 kmp_int32 poll_val; \ 1032 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1033 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1034 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1035 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1036 if (!cond) { \ 1037 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1038 poll_val | \ 1039 KMP_LOCK_BUSY(1, futex))) { \ 1040 continue; \ 1041 } \ 1042 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1043 } \ 1044 kmp_int32 rc; \ 1045 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1046 NULL, NULL, 0)) != 0) { \ 1047 continue; \ 1048 } \ 1049 gtid_code |= 1; \ 1050 } \ 1051 KMP_FSYNC_ACQUIRED(ftx); \ 1052 } 1053 1054 // Fast-path test futex lock 1055 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1056 { \ 1057 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1058 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1059 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1060 KMP_FSYNC_ACQUIRED(ftx); \ 1061 rc = TRUE; \ 1062 } else { \ 1063 rc = FALSE; \ 1064 } \ 1065 } 1066 1067 // Fast-path release futex lock 1068 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1069 { \ 1070 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1071 KMP_MB(); \ 1072 KMP_FSYNC_RELEASING(ftx); \ 1073 kmp_int32 poll_val = \ 1074 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1075 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1076 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1077 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1078 } \ 1079 KMP_MB(); \ 1080 KMP_YIELD_OVERSUB(); \ 1081 } 1082 1083 #endif // KMP_USE_FUTEX 1084 1085 #else // KMP_USE_DYNAMIC_LOCK 1086 1087 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1088 ident_t const *loc, 1089 kmp_int32 gtid) { 1090 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1091 1092 // Because of the double-check, the following load doesn't need to be volatile 1093 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1094 1095 if (lck == NULL) { 1096 void *idx; 1097 1098 // Allocate & initialize the lock. 1099 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1100 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1101 __kmp_init_user_lock_with_checks(lck); 1102 __kmp_set_user_lock_location(lck, loc); 1103 #if USE_ITT_BUILD 1104 __kmp_itt_critical_creating(lck); 1105 // __kmp_itt_critical_creating() should be called *before* the first usage 1106 // of underlying lock. It is the only place where we can guarantee it. There 1107 // are chances the lock will destroyed with no usage, but it is not a 1108 // problem, because this is not real event seen by user but rather setting 1109 // name for object (lock). See more details in kmp_itt.h. 1110 #endif /* USE_ITT_BUILD */ 1111 1112 // Use a cmpxchg instruction to slam the start of the critical section with 1113 // the lock pointer. If another thread beat us to it, deallocate the lock, 1114 // and use the lock that the other thread allocated. 1115 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1116 1117 if (status == 0) { 1118 // Deallocate the lock and reload the value. 1119 #if USE_ITT_BUILD 1120 __kmp_itt_critical_destroyed(lck); 1121 // Let ITT know the lock is destroyed and the same memory location may be reused 1122 // for another purpose. 1123 #endif /* USE_ITT_BUILD */ 1124 __kmp_destroy_user_lock_with_checks(lck); 1125 __kmp_user_lock_free(&idx, gtid, lck); 1126 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1127 KMP_DEBUG_ASSERT(lck != NULL); 1128 } 1129 } 1130 return lck; 1131 } 1132 1133 #endif // KMP_USE_DYNAMIC_LOCK 1134 1135 /*! 1136 @ingroup WORK_SHARING 1137 @param loc source location information. 1138 @param global_tid global thread number . 1139 @param crit identity of the critical section. This could be a pointer to a lock 1140 associated with the critical section, or some other suitably unique value. 1141 1142 Enter code protected by a `critical` construct. 1143 This function blocks until the executing thread can enter the critical section. 1144 */ 1145 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1146 kmp_critical_name *crit) { 1147 #if KMP_USE_DYNAMIC_LOCK 1148 #if OMPT_SUPPORT && OMPT_OPTIONAL 1149 OMPT_STORE_RETURN_ADDRESS(global_tid); 1150 #endif // OMPT_SUPPORT 1151 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1152 #else 1153 KMP_COUNT_BLOCK(OMP_CRITICAL); 1154 #if OMPT_SUPPORT && OMPT_OPTIONAL 1155 ompt_state_t prev_state = ompt_state_undefined; 1156 ompt_thread_info_t ti; 1157 #endif 1158 kmp_user_lock_p lck; 1159 1160 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1161 1162 // TODO: add THR_OVHD_STATE 1163 1164 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1165 KMP_CHECK_USER_LOCK_INIT(); 1166 1167 if ((__kmp_user_lock_kind == lk_tas) && 1168 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1169 lck = (kmp_user_lock_p)crit; 1170 } 1171 #if KMP_USE_FUTEX 1172 else if ((__kmp_user_lock_kind == lk_futex) && 1173 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1174 lck = (kmp_user_lock_p)crit; 1175 } 1176 #endif 1177 else { // ticket, queuing or drdpa 1178 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1179 } 1180 1181 if (__kmp_env_consistency_check) 1182 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1183 1184 // since the critical directive binds to all threads, not just the current 1185 // team we have to check this even if we are in a serialized team. 1186 // also, even if we are the uber thread, we still have to conduct the lock, 1187 // as we have to contend with sibling threads. 1188 1189 #if USE_ITT_BUILD 1190 __kmp_itt_critical_acquiring(lck); 1191 #endif /* USE_ITT_BUILD */ 1192 #if OMPT_SUPPORT && OMPT_OPTIONAL 1193 OMPT_STORE_RETURN_ADDRESS(gtid); 1194 void *codeptr_ra = NULL; 1195 if (ompt_enabled.enabled) { 1196 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1197 /* OMPT state update */ 1198 prev_state = ti.state; 1199 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1200 ti.state = ompt_state_wait_critical; 1201 1202 /* OMPT event callback */ 1203 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1204 if (ompt_enabled.ompt_callback_mutex_acquire) { 1205 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1206 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1207 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1208 } 1209 } 1210 #endif 1211 // Value of 'crit' should be good for using as a critical_id of the critical 1212 // section directive. 1213 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1214 1215 #if USE_ITT_BUILD 1216 __kmp_itt_critical_acquired(lck); 1217 #endif /* USE_ITT_BUILD */ 1218 #if OMPT_SUPPORT && OMPT_OPTIONAL 1219 if (ompt_enabled.enabled) { 1220 /* OMPT state update */ 1221 ti.state = prev_state; 1222 ti.wait_id = 0; 1223 1224 /* OMPT event callback */ 1225 if (ompt_enabled.ompt_callback_mutex_acquired) { 1226 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1227 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1228 } 1229 } 1230 #endif 1231 KMP_POP_PARTITIONED_TIMER(); 1232 1233 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1234 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1235 #endif // KMP_USE_DYNAMIC_LOCK 1236 } 1237 1238 #if KMP_USE_DYNAMIC_LOCK 1239 1240 // Converts the given hint to an internal lock implementation 1241 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1242 #if KMP_USE_TSX 1243 #define KMP_TSX_LOCK(seq) lockseq_##seq 1244 #else 1245 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1246 #endif 1247 1248 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1249 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1250 #else 1251 #define KMP_CPUINFO_RTM 0 1252 #endif 1253 1254 // Hints that do not require further logic 1255 if (hint & kmp_lock_hint_hle) 1256 return KMP_TSX_LOCK(hle); 1257 if (hint & kmp_lock_hint_rtm) 1258 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq; 1259 if (hint & kmp_lock_hint_adaptive) 1260 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1261 1262 // Rule out conflicting hints first by returning the default lock 1263 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1264 return __kmp_user_lock_seq; 1265 if ((hint & omp_lock_hint_speculative) && 1266 (hint & omp_lock_hint_nonspeculative)) 1267 return __kmp_user_lock_seq; 1268 1269 // Do not even consider speculation when it appears to be contended 1270 if (hint & omp_lock_hint_contended) 1271 return lockseq_queuing; 1272 1273 // Uncontended lock without speculation 1274 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1275 return lockseq_tas; 1276 1277 // HLE lock for speculation 1278 if (hint & omp_lock_hint_speculative) 1279 return KMP_TSX_LOCK(hle); 1280 1281 return __kmp_user_lock_seq; 1282 } 1283 1284 #if OMPT_SUPPORT && OMPT_OPTIONAL 1285 #if KMP_USE_DYNAMIC_LOCK 1286 static kmp_mutex_impl_t 1287 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1288 if (user_lock) { 1289 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1290 case 0: 1291 break; 1292 #if KMP_USE_FUTEX 1293 case locktag_futex: 1294 return kmp_mutex_impl_queuing; 1295 #endif 1296 case locktag_tas: 1297 return kmp_mutex_impl_spin; 1298 #if KMP_USE_TSX 1299 case locktag_hle: 1300 return kmp_mutex_impl_speculative; 1301 #endif 1302 default: 1303 return kmp_mutex_impl_none; 1304 } 1305 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1306 } 1307 KMP_ASSERT(ilock); 1308 switch (ilock->type) { 1309 #if KMP_USE_TSX 1310 case locktag_adaptive: 1311 case locktag_rtm: 1312 return kmp_mutex_impl_speculative; 1313 #endif 1314 case locktag_nested_tas: 1315 return kmp_mutex_impl_spin; 1316 #if KMP_USE_FUTEX 1317 case locktag_nested_futex: 1318 #endif 1319 case locktag_ticket: 1320 case locktag_queuing: 1321 case locktag_drdpa: 1322 case locktag_nested_ticket: 1323 case locktag_nested_queuing: 1324 case locktag_nested_drdpa: 1325 return kmp_mutex_impl_queuing; 1326 default: 1327 return kmp_mutex_impl_none; 1328 } 1329 } 1330 #else 1331 // For locks without dynamic binding 1332 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1333 switch (__kmp_user_lock_kind) { 1334 case lk_tas: 1335 return kmp_mutex_impl_spin; 1336 #if KMP_USE_FUTEX 1337 case lk_futex: 1338 #endif 1339 case lk_ticket: 1340 case lk_queuing: 1341 case lk_drdpa: 1342 return kmp_mutex_impl_queuing; 1343 #if KMP_USE_TSX 1344 case lk_hle: 1345 case lk_rtm: 1346 case lk_adaptive: 1347 return kmp_mutex_impl_speculative; 1348 #endif 1349 default: 1350 return kmp_mutex_impl_none; 1351 } 1352 } 1353 #endif // KMP_USE_DYNAMIC_LOCK 1354 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1355 1356 /*! 1357 @ingroup WORK_SHARING 1358 @param loc source location information. 1359 @param global_tid global thread number. 1360 @param crit identity of the critical section. This could be a pointer to a lock 1361 associated with the critical section, or some other suitably unique value. 1362 @param hint the lock hint. 1363 1364 Enter code protected by a `critical` construct with a hint. The hint value is 1365 used to suggest a lock implementation. This function blocks until the executing 1366 thread can enter the critical section unless the hint suggests use of 1367 speculative execution and the hardware supports it. 1368 */ 1369 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1370 kmp_critical_name *crit, uint32_t hint) { 1371 KMP_COUNT_BLOCK(OMP_CRITICAL); 1372 kmp_user_lock_p lck; 1373 #if OMPT_SUPPORT && OMPT_OPTIONAL 1374 ompt_state_t prev_state = ompt_state_undefined; 1375 ompt_thread_info_t ti; 1376 // This is the case, if called from __kmpc_critical: 1377 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1378 if (!codeptr) 1379 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1380 #endif 1381 1382 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1383 1384 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1385 // Check if it is initialized. 1386 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1387 if (*lk == 0) { 1388 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); 1389 if (KMP_IS_D_LOCK(lckseq)) { 1390 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1391 KMP_GET_D_TAG(lckseq)); 1392 } else { 1393 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); 1394 } 1395 } 1396 // Branch for accessing the actual lock object and set operation. This 1397 // branching is inevitable since this lock initialization does not follow the 1398 // normal dispatch path (lock table is not used). 1399 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1400 lck = (kmp_user_lock_p)lk; 1401 if (__kmp_env_consistency_check) { 1402 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1403 __kmp_map_hint_to_lock(hint)); 1404 } 1405 #if USE_ITT_BUILD 1406 __kmp_itt_critical_acquiring(lck); 1407 #endif 1408 #if OMPT_SUPPORT && OMPT_OPTIONAL 1409 if (ompt_enabled.enabled) { 1410 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1411 /* OMPT state update */ 1412 prev_state = ti.state; 1413 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1414 ti.state = ompt_state_wait_critical; 1415 1416 /* OMPT event callback */ 1417 if (ompt_enabled.ompt_callback_mutex_acquire) { 1418 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1419 ompt_mutex_critical, (unsigned int)hint, 1420 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1421 codeptr); 1422 } 1423 } 1424 #endif 1425 #if KMP_USE_INLINED_TAS 1426 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1427 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1428 } else 1429 #elif KMP_USE_INLINED_FUTEX 1430 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1431 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1432 } else 1433 #endif 1434 { 1435 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1436 } 1437 } else { 1438 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1439 lck = ilk->lock; 1440 if (__kmp_env_consistency_check) { 1441 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1442 __kmp_map_hint_to_lock(hint)); 1443 } 1444 #if USE_ITT_BUILD 1445 __kmp_itt_critical_acquiring(lck); 1446 #endif 1447 #if OMPT_SUPPORT && OMPT_OPTIONAL 1448 if (ompt_enabled.enabled) { 1449 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1450 /* OMPT state update */ 1451 prev_state = ti.state; 1452 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1453 ti.state = ompt_state_wait_critical; 1454 1455 /* OMPT event callback */ 1456 if (ompt_enabled.ompt_callback_mutex_acquire) { 1457 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1458 ompt_mutex_critical, (unsigned int)hint, 1459 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1460 codeptr); 1461 } 1462 } 1463 #endif 1464 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1465 } 1466 KMP_POP_PARTITIONED_TIMER(); 1467 1468 #if USE_ITT_BUILD 1469 __kmp_itt_critical_acquired(lck); 1470 #endif /* USE_ITT_BUILD */ 1471 #if OMPT_SUPPORT && OMPT_OPTIONAL 1472 if (ompt_enabled.enabled) { 1473 /* OMPT state update */ 1474 ti.state = prev_state; 1475 ti.wait_id = 0; 1476 1477 /* OMPT event callback */ 1478 if (ompt_enabled.ompt_callback_mutex_acquired) { 1479 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1480 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1481 } 1482 } 1483 #endif 1484 1485 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1486 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1487 } // __kmpc_critical_with_hint 1488 1489 #endif // KMP_USE_DYNAMIC_LOCK 1490 1491 /*! 1492 @ingroup WORK_SHARING 1493 @param loc source location information. 1494 @param global_tid global thread number . 1495 @param crit identity of the critical section. This could be a pointer to a lock 1496 associated with the critical section, or some other suitably unique value. 1497 1498 Leave a critical section, releasing any lock that was held during its execution. 1499 */ 1500 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1501 kmp_critical_name *crit) { 1502 kmp_user_lock_p lck; 1503 1504 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1505 1506 #if KMP_USE_DYNAMIC_LOCK 1507 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 1508 lck = (kmp_user_lock_p)crit; 1509 KMP_ASSERT(lck != NULL); 1510 if (__kmp_env_consistency_check) { 1511 __kmp_pop_sync(global_tid, ct_critical, loc); 1512 } 1513 #if USE_ITT_BUILD 1514 __kmp_itt_critical_releasing(lck); 1515 #endif 1516 #if KMP_USE_INLINED_TAS 1517 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1518 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1519 } else 1520 #elif KMP_USE_INLINED_FUTEX 1521 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1522 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1523 } else 1524 #endif 1525 { 1526 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1527 } 1528 } else { 1529 kmp_indirect_lock_t *ilk = 1530 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1531 KMP_ASSERT(ilk != NULL); 1532 lck = ilk->lock; 1533 if (__kmp_env_consistency_check) { 1534 __kmp_pop_sync(global_tid, ct_critical, loc); 1535 } 1536 #if USE_ITT_BUILD 1537 __kmp_itt_critical_releasing(lck); 1538 #endif 1539 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1540 } 1541 1542 #else // KMP_USE_DYNAMIC_LOCK 1543 1544 if ((__kmp_user_lock_kind == lk_tas) && 1545 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1546 lck = (kmp_user_lock_p)crit; 1547 } 1548 #if KMP_USE_FUTEX 1549 else if ((__kmp_user_lock_kind == lk_futex) && 1550 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1551 lck = (kmp_user_lock_p)crit; 1552 } 1553 #endif 1554 else { // ticket, queuing or drdpa 1555 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1556 } 1557 1558 KMP_ASSERT(lck != NULL); 1559 1560 if (__kmp_env_consistency_check) 1561 __kmp_pop_sync(global_tid, ct_critical, loc); 1562 1563 #if USE_ITT_BUILD 1564 __kmp_itt_critical_releasing(lck); 1565 #endif /* USE_ITT_BUILD */ 1566 // Value of 'crit' should be good for using as a critical_id of the critical 1567 // section directive. 1568 __kmp_release_user_lock_with_checks(lck, global_tid); 1569 1570 #endif // KMP_USE_DYNAMIC_LOCK 1571 1572 #if OMPT_SUPPORT && OMPT_OPTIONAL 1573 /* OMPT release event triggers after lock is released; place here to trigger 1574 * for all #if branches */ 1575 OMPT_STORE_RETURN_ADDRESS(global_tid); 1576 if (ompt_enabled.ompt_callback_mutex_released) { 1577 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1578 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1579 OMPT_LOAD_RETURN_ADDRESS(0)); 1580 } 1581 #endif 1582 1583 KMP_POP_PARTITIONED_TIMER(); 1584 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1585 } 1586 1587 /*! 1588 @ingroup SYNCHRONIZATION 1589 @param loc source location information 1590 @param global_tid thread id. 1591 @return one if the thread should execute the master block, zero otherwise 1592 1593 Start execution of a combined barrier and master. The barrier is executed inside 1594 this function. 1595 */ 1596 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1597 int status; 1598 1599 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1600 1601 if (!TCR_4(__kmp_init_parallel)) 1602 __kmp_parallel_initialize(); 1603 1604 __kmp_resume_if_soft_paused(); 1605 1606 if (__kmp_env_consistency_check) 1607 __kmp_check_barrier(global_tid, ct_barrier, loc); 1608 1609 #if OMPT_SUPPORT 1610 ompt_frame_t *ompt_frame; 1611 if (ompt_enabled.enabled) { 1612 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1613 if (ompt_frame->enter_frame.ptr == NULL) 1614 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1615 OMPT_STORE_RETURN_ADDRESS(global_tid); 1616 } 1617 #endif 1618 #if USE_ITT_NOTIFY 1619 __kmp_threads[global_tid]->th.th_ident = loc; 1620 #endif 1621 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1622 #if OMPT_SUPPORT && OMPT_OPTIONAL 1623 if (ompt_enabled.enabled) { 1624 ompt_frame->enter_frame = ompt_data_none; 1625 } 1626 #endif 1627 1628 return (status != 0) ? 0 : 1; 1629 } 1630 1631 /*! 1632 @ingroup SYNCHRONIZATION 1633 @param loc source location information 1634 @param global_tid thread id. 1635 1636 Complete the execution of a combined barrier and master. This function should 1637 only be called at the completion of the <tt>master</tt> code. Other threads will 1638 still be waiting at the barrier and this call releases them. 1639 */ 1640 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1641 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1642 1643 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1644 } 1645 1646 /*! 1647 @ingroup SYNCHRONIZATION 1648 @param loc source location information 1649 @param global_tid thread id. 1650 @return one if the thread should execute the master block, zero otherwise 1651 1652 Start execution of a combined barrier and master(nowait) construct. 1653 The barrier is executed inside this function. 1654 There is no equivalent "end" function, since the 1655 */ 1656 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1657 kmp_int32 ret; 1658 1659 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1660 1661 if (!TCR_4(__kmp_init_parallel)) 1662 __kmp_parallel_initialize(); 1663 1664 __kmp_resume_if_soft_paused(); 1665 1666 if (__kmp_env_consistency_check) { 1667 if (loc == 0) { 1668 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1669 } 1670 __kmp_check_barrier(global_tid, ct_barrier, loc); 1671 } 1672 1673 #if OMPT_SUPPORT 1674 ompt_frame_t *ompt_frame; 1675 if (ompt_enabled.enabled) { 1676 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1677 if (ompt_frame->enter_frame.ptr == NULL) 1678 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1679 OMPT_STORE_RETURN_ADDRESS(global_tid); 1680 } 1681 #endif 1682 #if USE_ITT_NOTIFY 1683 __kmp_threads[global_tid]->th.th_ident = loc; 1684 #endif 1685 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1686 #if OMPT_SUPPORT && OMPT_OPTIONAL 1687 if (ompt_enabled.enabled) { 1688 ompt_frame->enter_frame = ompt_data_none; 1689 } 1690 #endif 1691 1692 ret = __kmpc_master(loc, global_tid); 1693 1694 if (__kmp_env_consistency_check) { 1695 /* there's no __kmpc_end_master called; so the (stats) */ 1696 /* actions of __kmpc_end_master are done here */ 1697 1698 if (global_tid < 0) { 1699 KMP_WARNING(ThreadIdentInvalid); 1700 } 1701 if (ret) { 1702 /* only one thread should do the pop since only */ 1703 /* one did the push (see __kmpc_master()) */ 1704 1705 __kmp_pop_sync(global_tid, ct_master, loc); 1706 } 1707 } 1708 1709 return (ret); 1710 } 1711 1712 /* The BARRIER for a SINGLE process section is always explicit */ 1713 /*! 1714 @ingroup WORK_SHARING 1715 @param loc source location information 1716 @param global_tid global thread number 1717 @return One if this thread should execute the single construct, zero otherwise. 1718 1719 Test whether to execute a <tt>single</tt> construct. 1720 There are no implicit barriers in the two "single" calls, rather the compiler 1721 should introduce an explicit barrier if it is required. 1722 */ 1723 1724 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1725 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1726 1727 if (rc) { 1728 // We are going to execute the single statement, so we should count it. 1729 KMP_COUNT_BLOCK(OMP_SINGLE); 1730 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1731 } 1732 1733 #if OMPT_SUPPORT && OMPT_OPTIONAL 1734 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1735 kmp_team_t *team = this_thr->th.th_team; 1736 int tid = __kmp_tid_from_gtid(global_tid); 1737 1738 if (ompt_enabled.enabled) { 1739 if (rc) { 1740 if (ompt_enabled.ompt_callback_work) { 1741 ompt_callbacks.ompt_callback(ompt_callback_work)( 1742 ompt_work_single_executor, ompt_scope_begin, 1743 &(team->t.ompt_team_info.parallel_data), 1744 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1745 1, OMPT_GET_RETURN_ADDRESS(0)); 1746 } 1747 } else { 1748 if (ompt_enabled.ompt_callback_work) { 1749 ompt_callbacks.ompt_callback(ompt_callback_work)( 1750 ompt_work_single_other, ompt_scope_begin, 1751 &(team->t.ompt_team_info.parallel_data), 1752 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1753 1, OMPT_GET_RETURN_ADDRESS(0)); 1754 ompt_callbacks.ompt_callback(ompt_callback_work)( 1755 ompt_work_single_other, ompt_scope_end, 1756 &(team->t.ompt_team_info.parallel_data), 1757 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1758 1, OMPT_GET_RETURN_ADDRESS(0)); 1759 } 1760 } 1761 } 1762 #endif 1763 1764 return rc; 1765 } 1766 1767 /*! 1768 @ingroup WORK_SHARING 1769 @param loc source location information 1770 @param global_tid global thread number 1771 1772 Mark the end of a <tt>single</tt> construct. This function should 1773 only be called by the thread that executed the block of code protected 1774 by the `single` construct. 1775 */ 1776 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1777 __kmp_exit_single(global_tid); 1778 KMP_POP_PARTITIONED_TIMER(); 1779 1780 #if OMPT_SUPPORT && OMPT_OPTIONAL 1781 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1782 kmp_team_t *team = this_thr->th.th_team; 1783 int tid = __kmp_tid_from_gtid(global_tid); 1784 1785 if (ompt_enabled.ompt_callback_work) { 1786 ompt_callbacks.ompt_callback(ompt_callback_work)( 1787 ompt_work_single_executor, ompt_scope_end, 1788 &(team->t.ompt_team_info.parallel_data), 1789 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1790 OMPT_GET_RETURN_ADDRESS(0)); 1791 } 1792 #endif 1793 } 1794 1795 /*! 1796 @ingroup WORK_SHARING 1797 @param loc Source location 1798 @param global_tid Global thread id 1799 1800 Mark the end of a statically scheduled loop. 1801 */ 1802 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1803 KMP_POP_PARTITIONED_TIMER(); 1804 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1805 1806 #if OMPT_SUPPORT && OMPT_OPTIONAL 1807 if (ompt_enabled.ompt_callback_work) { 1808 ompt_work_t ompt_work_type = ompt_work_loop; 1809 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1810 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1811 // Determine workshare type 1812 if (loc != NULL) { 1813 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1814 ompt_work_type = ompt_work_loop; 1815 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1816 ompt_work_type = ompt_work_sections; 1817 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1818 ompt_work_type = ompt_work_distribute; 1819 } else { 1820 // use default set above. 1821 // a warning about this case is provided in __kmpc_for_static_init 1822 } 1823 KMP_DEBUG_ASSERT(ompt_work_type); 1824 } 1825 ompt_callbacks.ompt_callback(ompt_callback_work)( 1826 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1827 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1828 } 1829 #endif 1830 if (__kmp_env_consistency_check) 1831 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1832 } 1833 1834 // User routines which take C-style arguments (call by value) 1835 // different from the Fortran equivalent routines 1836 1837 void ompc_set_num_threads(int arg) { 1838 // !!!!! TODO: check the per-task binding 1839 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1840 } 1841 1842 void ompc_set_dynamic(int flag) { 1843 kmp_info_t *thread; 1844 1845 /* For the thread-private implementation of the internal controls */ 1846 thread = __kmp_entry_thread(); 1847 1848 __kmp_save_internal_controls(thread); 1849 1850 set__dynamic(thread, flag ? TRUE : FALSE); 1851 } 1852 1853 void ompc_set_nested(int flag) { 1854 kmp_info_t *thread; 1855 1856 /* For the thread-private internal controls implementation */ 1857 thread = __kmp_entry_thread(); 1858 1859 __kmp_save_internal_controls(thread); 1860 1861 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1862 } 1863 1864 void ompc_set_max_active_levels(int max_active_levels) { 1865 /* TO DO */ 1866 /* we want per-task implementation of this internal control */ 1867 1868 /* For the per-thread internal controls implementation */ 1869 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1870 } 1871 1872 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1873 // !!!!! TODO: check the per-task binding 1874 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1875 } 1876 1877 int ompc_get_ancestor_thread_num(int level) { 1878 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1879 } 1880 1881 int ompc_get_team_size(int level) { 1882 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1883 } 1884 1885 /* OpenMP 5.0 Affinity Format API */ 1886 1887 void ompc_set_affinity_format(char const *format) { 1888 if (!__kmp_init_serial) { 1889 __kmp_serial_initialize(); 1890 } 1891 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 1892 format, KMP_STRLEN(format) + 1); 1893 } 1894 1895 size_t ompc_get_affinity_format(char *buffer, size_t size) { 1896 size_t format_size; 1897 if (!__kmp_init_serial) { 1898 __kmp_serial_initialize(); 1899 } 1900 format_size = KMP_STRLEN(__kmp_affinity_format); 1901 if (buffer && size) { 1902 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 1903 format_size + 1); 1904 } 1905 return format_size; 1906 } 1907 1908 void ompc_display_affinity(char const *format) { 1909 int gtid; 1910 if (!TCR_4(__kmp_init_middle)) { 1911 __kmp_middle_initialize(); 1912 } 1913 gtid = __kmp_get_gtid(); 1914 __kmp_aux_display_affinity(gtid, format); 1915 } 1916 1917 size_t ompc_capture_affinity(char *buffer, size_t buf_size, 1918 char const *format) { 1919 int gtid; 1920 size_t num_required; 1921 kmp_str_buf_t capture_buf; 1922 if (!TCR_4(__kmp_init_middle)) { 1923 __kmp_middle_initialize(); 1924 } 1925 gtid = __kmp_get_gtid(); 1926 __kmp_str_buf_init(&capture_buf); 1927 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 1928 if (buffer && buf_size) { 1929 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 1930 capture_buf.used + 1); 1931 } 1932 __kmp_str_buf_free(&capture_buf); 1933 return num_required; 1934 } 1935 1936 void kmpc_set_stacksize(int arg) { 1937 // __kmp_aux_set_stacksize initializes the library if needed 1938 __kmp_aux_set_stacksize(arg); 1939 } 1940 1941 void kmpc_set_stacksize_s(size_t arg) { 1942 // __kmp_aux_set_stacksize initializes the library if needed 1943 __kmp_aux_set_stacksize(arg); 1944 } 1945 1946 void kmpc_set_blocktime(int arg) { 1947 int gtid, tid; 1948 kmp_info_t *thread; 1949 1950 gtid = __kmp_entry_gtid(); 1951 tid = __kmp_tid_from_gtid(gtid); 1952 thread = __kmp_thread_from_gtid(gtid); 1953 1954 __kmp_aux_set_blocktime(arg, thread, tid); 1955 } 1956 1957 void kmpc_set_library(int arg) { 1958 // __kmp_user_set_library initializes the library if needed 1959 __kmp_user_set_library((enum library_type)arg); 1960 } 1961 1962 void kmpc_set_defaults(char const *str) { 1963 // __kmp_aux_set_defaults initializes the library if needed 1964 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 1965 } 1966 1967 void kmpc_set_disp_num_buffers(int arg) { 1968 // ignore after initialization because some teams have already 1969 // allocated dispatch buffers 1970 if (__kmp_init_serial == 0 && arg > 0) 1971 __kmp_dispatch_num_buffers = arg; 1972 } 1973 1974 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 1975 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1976 return -1; 1977 #else 1978 if (!TCR_4(__kmp_init_middle)) { 1979 __kmp_middle_initialize(); 1980 } 1981 return __kmp_aux_set_affinity_mask_proc(proc, mask); 1982 #endif 1983 } 1984 1985 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 1986 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1987 return -1; 1988 #else 1989 if (!TCR_4(__kmp_init_middle)) { 1990 __kmp_middle_initialize(); 1991 } 1992 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 1993 #endif 1994 } 1995 1996 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 1997 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1998 return -1; 1999 #else 2000 if (!TCR_4(__kmp_init_middle)) { 2001 __kmp_middle_initialize(); 2002 } 2003 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2004 #endif 2005 } 2006 2007 /* -------------------------------------------------------------------------- */ 2008 /*! 2009 @ingroup THREADPRIVATE 2010 @param loc source location information 2011 @param gtid global thread number 2012 @param cpy_size size of the cpy_data buffer 2013 @param cpy_data pointer to data to be copied 2014 @param cpy_func helper function to call for copying data 2015 @param didit flag variable: 1=single thread; 0=not single thread 2016 2017 __kmpc_copyprivate implements the interface for the private data broadcast 2018 needed for the copyprivate clause associated with a single region in an 2019 OpenMP<sup>*</sup> program (both C and Fortran). 2020 All threads participating in the parallel region call this routine. 2021 One of the threads (called the single thread) should have the <tt>didit</tt> 2022 variable set to 1 and all other threads should have that variable set to 0. 2023 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2024 2025 The OpenMP specification forbids the use of nowait on the single region when a 2026 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2027 barrier internally to avoid race conditions, so the code generation for the 2028 single region should avoid generating a barrier after the call to @ref 2029 __kmpc_copyprivate. 2030 2031 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2032 The <tt>loc</tt> parameter is a pointer to source location information. 2033 2034 Internal implementation: The single thread will first copy its descriptor 2035 address (cpy_data) to a team-private location, then the other threads will each 2036 call the function pointed to by the parameter cpy_func, which carries out the 2037 copy by copying the data using the cpy_data buffer. 2038 2039 The cpy_func routine used for the copy and the contents of the data area defined 2040 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2041 to be done. For instance, the cpy_data buffer can hold the actual data to be 2042 copied or it may hold a list of pointers to the data. The cpy_func routine must 2043 interpret the cpy_data buffer appropriately. 2044 2045 The interface to cpy_func is as follows: 2046 @code 2047 void cpy_func( void *destination, void *source ) 2048 @endcode 2049 where void *destination is the cpy_data pointer for the thread being copied to 2050 and void *source is the cpy_data pointer for the thread being copied from. 2051 */ 2052 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2053 void *cpy_data, void (*cpy_func)(void *, void *), 2054 kmp_int32 didit) { 2055 void **data_ptr; 2056 2057 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2058 2059 KMP_MB(); 2060 2061 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2062 2063 if (__kmp_env_consistency_check) { 2064 if (loc == 0) { 2065 KMP_WARNING(ConstructIdentInvalid); 2066 } 2067 } 2068 2069 // ToDo: Optimize the following two barriers into some kind of split barrier 2070 2071 if (didit) 2072 *data_ptr = cpy_data; 2073 2074 #if OMPT_SUPPORT 2075 ompt_frame_t *ompt_frame; 2076 if (ompt_enabled.enabled) { 2077 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2078 if (ompt_frame->enter_frame.ptr == NULL) 2079 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2080 OMPT_STORE_RETURN_ADDRESS(gtid); 2081 } 2082 #endif 2083 /* This barrier is not a barrier region boundary */ 2084 #if USE_ITT_NOTIFY 2085 __kmp_threads[gtid]->th.th_ident = loc; 2086 #endif 2087 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2088 2089 if (!didit) 2090 (*cpy_func)(cpy_data, *data_ptr); 2091 2092 // Consider next barrier a user-visible barrier for barrier region boundaries 2093 // Nesting checks are already handled by the single construct checks 2094 2095 #if OMPT_SUPPORT 2096 if (ompt_enabled.enabled) { 2097 OMPT_STORE_RETURN_ADDRESS(gtid); 2098 } 2099 #endif 2100 #if USE_ITT_NOTIFY 2101 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2102 // tasks can overwrite the location) 2103 #endif 2104 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2105 #if OMPT_SUPPORT && OMPT_OPTIONAL 2106 if (ompt_enabled.enabled) { 2107 ompt_frame->enter_frame = ompt_data_none; 2108 } 2109 #endif 2110 } 2111 2112 /* -------------------------------------------------------------------------- */ 2113 2114 #define INIT_LOCK __kmp_init_user_lock_with_checks 2115 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2116 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2117 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2118 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2119 #define ACQUIRE_NESTED_LOCK_TIMED \ 2120 __kmp_acquire_nested_user_lock_with_checks_timed 2121 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2122 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2123 #define TEST_LOCK __kmp_test_user_lock_with_checks 2124 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2125 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2126 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2127 2128 // TODO: Make check abort messages use location info & pass it into 2129 // with_checks routines 2130 2131 #if KMP_USE_DYNAMIC_LOCK 2132 2133 // internal lock initializer 2134 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2135 kmp_dyna_lockseq_t seq) { 2136 if (KMP_IS_D_LOCK(seq)) { 2137 KMP_INIT_D_LOCK(lock, seq); 2138 #if USE_ITT_BUILD 2139 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2140 #endif 2141 } else { 2142 KMP_INIT_I_LOCK(lock, seq); 2143 #if USE_ITT_BUILD 2144 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2145 __kmp_itt_lock_creating(ilk->lock, loc); 2146 #endif 2147 } 2148 } 2149 2150 // internal nest lock initializer 2151 static __forceinline void 2152 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2153 kmp_dyna_lockseq_t seq) { 2154 #if KMP_USE_TSX 2155 // Don't have nested lock implementation for speculative locks 2156 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive) 2157 seq = __kmp_user_lock_seq; 2158 #endif 2159 switch (seq) { 2160 case lockseq_tas: 2161 seq = lockseq_nested_tas; 2162 break; 2163 #if KMP_USE_FUTEX 2164 case lockseq_futex: 2165 seq = lockseq_nested_futex; 2166 break; 2167 #endif 2168 case lockseq_ticket: 2169 seq = lockseq_nested_ticket; 2170 break; 2171 case lockseq_queuing: 2172 seq = lockseq_nested_queuing; 2173 break; 2174 case lockseq_drdpa: 2175 seq = lockseq_nested_drdpa; 2176 break; 2177 default: 2178 seq = lockseq_nested_queuing; 2179 } 2180 KMP_INIT_I_LOCK(lock, seq); 2181 #if USE_ITT_BUILD 2182 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2183 __kmp_itt_lock_creating(ilk->lock, loc); 2184 #endif 2185 } 2186 2187 /* initialize the lock with a hint */ 2188 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2189 uintptr_t hint) { 2190 KMP_DEBUG_ASSERT(__kmp_init_serial); 2191 if (__kmp_env_consistency_check && user_lock == NULL) { 2192 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2193 } 2194 2195 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2196 2197 #if OMPT_SUPPORT && OMPT_OPTIONAL 2198 // This is the case, if called from omp_init_lock_with_hint: 2199 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2200 if (!codeptr) 2201 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2202 if (ompt_enabled.ompt_callback_lock_init) { 2203 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2204 ompt_mutex_lock, (omp_lock_hint_t)hint, 2205 __ompt_get_mutex_impl_type(user_lock), 2206 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2207 } 2208 #endif 2209 } 2210 2211 /* initialize the lock with a hint */ 2212 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2213 void **user_lock, uintptr_t hint) { 2214 KMP_DEBUG_ASSERT(__kmp_init_serial); 2215 if (__kmp_env_consistency_check && user_lock == NULL) { 2216 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2217 } 2218 2219 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2220 2221 #if OMPT_SUPPORT && OMPT_OPTIONAL 2222 // This is the case, if called from omp_init_lock_with_hint: 2223 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2224 if (!codeptr) 2225 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2226 if (ompt_enabled.ompt_callback_lock_init) { 2227 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2228 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2229 __ompt_get_mutex_impl_type(user_lock), 2230 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2231 } 2232 #endif 2233 } 2234 2235 #endif // KMP_USE_DYNAMIC_LOCK 2236 2237 /* initialize the lock */ 2238 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2239 #if KMP_USE_DYNAMIC_LOCK 2240 2241 KMP_DEBUG_ASSERT(__kmp_init_serial); 2242 if (__kmp_env_consistency_check && user_lock == NULL) { 2243 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2244 } 2245 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2246 2247 #if OMPT_SUPPORT && OMPT_OPTIONAL 2248 // This is the case, if called from omp_init_lock_with_hint: 2249 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2250 if (!codeptr) 2251 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2252 if (ompt_enabled.ompt_callback_lock_init) { 2253 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2254 ompt_mutex_lock, omp_lock_hint_none, 2255 __ompt_get_mutex_impl_type(user_lock), 2256 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2257 } 2258 #endif 2259 2260 #else // KMP_USE_DYNAMIC_LOCK 2261 2262 static char const *const func = "omp_init_lock"; 2263 kmp_user_lock_p lck; 2264 KMP_DEBUG_ASSERT(__kmp_init_serial); 2265 2266 if (__kmp_env_consistency_check) { 2267 if (user_lock == NULL) { 2268 KMP_FATAL(LockIsUninitialized, func); 2269 } 2270 } 2271 2272 KMP_CHECK_USER_LOCK_INIT(); 2273 2274 if ((__kmp_user_lock_kind == lk_tas) && 2275 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2276 lck = (kmp_user_lock_p)user_lock; 2277 } 2278 #if KMP_USE_FUTEX 2279 else if ((__kmp_user_lock_kind == lk_futex) && 2280 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2281 lck = (kmp_user_lock_p)user_lock; 2282 } 2283 #endif 2284 else { 2285 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2286 } 2287 INIT_LOCK(lck); 2288 __kmp_set_user_lock_location(lck, loc); 2289 2290 #if OMPT_SUPPORT && OMPT_OPTIONAL 2291 // This is the case, if called from omp_init_lock_with_hint: 2292 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2293 if (!codeptr) 2294 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2295 if (ompt_enabled.ompt_callback_lock_init) { 2296 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2297 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2298 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2299 } 2300 #endif 2301 2302 #if USE_ITT_BUILD 2303 __kmp_itt_lock_creating(lck); 2304 #endif /* USE_ITT_BUILD */ 2305 2306 #endif // KMP_USE_DYNAMIC_LOCK 2307 } // __kmpc_init_lock 2308 2309 /* initialize the lock */ 2310 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2311 #if KMP_USE_DYNAMIC_LOCK 2312 2313 KMP_DEBUG_ASSERT(__kmp_init_serial); 2314 if (__kmp_env_consistency_check && user_lock == NULL) { 2315 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2316 } 2317 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2318 2319 #if OMPT_SUPPORT && OMPT_OPTIONAL 2320 // This is the case, if called from omp_init_lock_with_hint: 2321 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2322 if (!codeptr) 2323 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2324 if (ompt_enabled.ompt_callback_lock_init) { 2325 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2326 ompt_mutex_nest_lock, omp_lock_hint_none, 2327 __ompt_get_mutex_impl_type(user_lock), 2328 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2329 } 2330 #endif 2331 2332 #else // KMP_USE_DYNAMIC_LOCK 2333 2334 static char const *const func = "omp_init_nest_lock"; 2335 kmp_user_lock_p lck; 2336 KMP_DEBUG_ASSERT(__kmp_init_serial); 2337 2338 if (__kmp_env_consistency_check) { 2339 if (user_lock == NULL) { 2340 KMP_FATAL(LockIsUninitialized, func); 2341 } 2342 } 2343 2344 KMP_CHECK_USER_LOCK_INIT(); 2345 2346 if ((__kmp_user_lock_kind == lk_tas) && 2347 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2348 OMP_NEST_LOCK_T_SIZE)) { 2349 lck = (kmp_user_lock_p)user_lock; 2350 } 2351 #if KMP_USE_FUTEX 2352 else if ((__kmp_user_lock_kind == lk_futex) && 2353 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2354 OMP_NEST_LOCK_T_SIZE)) { 2355 lck = (kmp_user_lock_p)user_lock; 2356 } 2357 #endif 2358 else { 2359 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2360 } 2361 2362 INIT_NESTED_LOCK(lck); 2363 __kmp_set_user_lock_location(lck, loc); 2364 2365 #if OMPT_SUPPORT && OMPT_OPTIONAL 2366 // This is the case, if called from omp_init_lock_with_hint: 2367 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2368 if (!codeptr) 2369 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2370 if (ompt_enabled.ompt_callback_lock_init) { 2371 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2372 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2373 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2374 } 2375 #endif 2376 2377 #if USE_ITT_BUILD 2378 __kmp_itt_lock_creating(lck); 2379 #endif /* USE_ITT_BUILD */ 2380 2381 #endif // KMP_USE_DYNAMIC_LOCK 2382 } // __kmpc_init_nest_lock 2383 2384 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2385 #if KMP_USE_DYNAMIC_LOCK 2386 2387 #if USE_ITT_BUILD 2388 kmp_user_lock_p lck; 2389 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2390 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2391 } else { 2392 lck = (kmp_user_lock_p)user_lock; 2393 } 2394 __kmp_itt_lock_destroyed(lck); 2395 #endif 2396 #if OMPT_SUPPORT && OMPT_OPTIONAL 2397 // This is the case, if called from omp_init_lock_with_hint: 2398 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2399 if (!codeptr) 2400 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2401 if (ompt_enabled.ompt_callback_lock_destroy) { 2402 kmp_user_lock_p lck; 2403 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2404 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2405 } else { 2406 lck = (kmp_user_lock_p)user_lock; 2407 } 2408 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2409 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2410 } 2411 #endif 2412 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2413 #else 2414 kmp_user_lock_p lck; 2415 2416 if ((__kmp_user_lock_kind == lk_tas) && 2417 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2418 lck = (kmp_user_lock_p)user_lock; 2419 } 2420 #if KMP_USE_FUTEX 2421 else if ((__kmp_user_lock_kind == lk_futex) && 2422 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2423 lck = (kmp_user_lock_p)user_lock; 2424 } 2425 #endif 2426 else { 2427 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2428 } 2429 2430 #if OMPT_SUPPORT && OMPT_OPTIONAL 2431 // This is the case, if called from omp_init_lock_with_hint: 2432 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2433 if (!codeptr) 2434 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2435 if (ompt_enabled.ompt_callback_lock_destroy) { 2436 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2437 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2438 } 2439 #endif 2440 2441 #if USE_ITT_BUILD 2442 __kmp_itt_lock_destroyed(lck); 2443 #endif /* USE_ITT_BUILD */ 2444 DESTROY_LOCK(lck); 2445 2446 if ((__kmp_user_lock_kind == lk_tas) && 2447 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2448 ; 2449 } 2450 #if KMP_USE_FUTEX 2451 else if ((__kmp_user_lock_kind == lk_futex) && 2452 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2453 ; 2454 } 2455 #endif 2456 else { 2457 __kmp_user_lock_free(user_lock, gtid, lck); 2458 } 2459 #endif // KMP_USE_DYNAMIC_LOCK 2460 } // __kmpc_destroy_lock 2461 2462 /* destroy the lock */ 2463 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2464 #if KMP_USE_DYNAMIC_LOCK 2465 2466 #if USE_ITT_BUILD 2467 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2468 __kmp_itt_lock_destroyed(ilk->lock); 2469 #endif 2470 #if OMPT_SUPPORT && OMPT_OPTIONAL 2471 // This is the case, if called from omp_init_lock_with_hint: 2472 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2473 if (!codeptr) 2474 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2475 if (ompt_enabled.ompt_callback_lock_destroy) { 2476 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2477 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2478 } 2479 #endif 2480 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2481 2482 #else // KMP_USE_DYNAMIC_LOCK 2483 2484 kmp_user_lock_p lck; 2485 2486 if ((__kmp_user_lock_kind == lk_tas) && 2487 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2488 OMP_NEST_LOCK_T_SIZE)) { 2489 lck = (kmp_user_lock_p)user_lock; 2490 } 2491 #if KMP_USE_FUTEX 2492 else if ((__kmp_user_lock_kind == lk_futex) && 2493 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2494 OMP_NEST_LOCK_T_SIZE)) { 2495 lck = (kmp_user_lock_p)user_lock; 2496 } 2497 #endif 2498 else { 2499 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2500 } 2501 2502 #if OMPT_SUPPORT && OMPT_OPTIONAL 2503 // This is the case, if called from omp_init_lock_with_hint: 2504 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2505 if (!codeptr) 2506 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2507 if (ompt_enabled.ompt_callback_lock_destroy) { 2508 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2509 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2510 } 2511 #endif 2512 2513 #if USE_ITT_BUILD 2514 __kmp_itt_lock_destroyed(lck); 2515 #endif /* USE_ITT_BUILD */ 2516 2517 DESTROY_NESTED_LOCK(lck); 2518 2519 if ((__kmp_user_lock_kind == lk_tas) && 2520 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2521 OMP_NEST_LOCK_T_SIZE)) { 2522 ; 2523 } 2524 #if KMP_USE_FUTEX 2525 else if ((__kmp_user_lock_kind == lk_futex) && 2526 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2527 OMP_NEST_LOCK_T_SIZE)) { 2528 ; 2529 } 2530 #endif 2531 else { 2532 __kmp_user_lock_free(user_lock, gtid, lck); 2533 } 2534 #endif // KMP_USE_DYNAMIC_LOCK 2535 } // __kmpc_destroy_nest_lock 2536 2537 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2538 KMP_COUNT_BLOCK(OMP_set_lock); 2539 #if KMP_USE_DYNAMIC_LOCK 2540 int tag = KMP_EXTRACT_D_TAG(user_lock); 2541 #if USE_ITT_BUILD 2542 __kmp_itt_lock_acquiring( 2543 (kmp_user_lock_p) 2544 user_lock); // itt function will get to the right lock object. 2545 #endif 2546 #if OMPT_SUPPORT && OMPT_OPTIONAL 2547 // This is the case, if called from omp_init_lock_with_hint: 2548 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2549 if (!codeptr) 2550 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2551 if (ompt_enabled.ompt_callback_mutex_acquire) { 2552 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2553 ompt_mutex_lock, omp_lock_hint_none, 2554 __ompt_get_mutex_impl_type(user_lock), 2555 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2556 } 2557 #endif 2558 #if KMP_USE_INLINED_TAS 2559 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2560 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2561 } else 2562 #elif KMP_USE_INLINED_FUTEX 2563 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2564 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2565 } else 2566 #endif 2567 { 2568 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2569 } 2570 #if USE_ITT_BUILD 2571 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2572 #endif 2573 #if OMPT_SUPPORT && OMPT_OPTIONAL 2574 if (ompt_enabled.ompt_callback_mutex_acquired) { 2575 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2576 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2577 } 2578 #endif 2579 2580 #else // KMP_USE_DYNAMIC_LOCK 2581 2582 kmp_user_lock_p lck; 2583 2584 if ((__kmp_user_lock_kind == lk_tas) && 2585 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2586 lck = (kmp_user_lock_p)user_lock; 2587 } 2588 #if KMP_USE_FUTEX 2589 else if ((__kmp_user_lock_kind == lk_futex) && 2590 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2591 lck = (kmp_user_lock_p)user_lock; 2592 } 2593 #endif 2594 else { 2595 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2596 } 2597 2598 #if USE_ITT_BUILD 2599 __kmp_itt_lock_acquiring(lck); 2600 #endif /* USE_ITT_BUILD */ 2601 #if OMPT_SUPPORT && OMPT_OPTIONAL 2602 // This is the case, if called from omp_init_lock_with_hint: 2603 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2604 if (!codeptr) 2605 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2606 if (ompt_enabled.ompt_callback_mutex_acquire) { 2607 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2608 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2609 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2610 } 2611 #endif 2612 2613 ACQUIRE_LOCK(lck, gtid); 2614 2615 #if USE_ITT_BUILD 2616 __kmp_itt_lock_acquired(lck); 2617 #endif /* USE_ITT_BUILD */ 2618 2619 #if OMPT_SUPPORT && OMPT_OPTIONAL 2620 if (ompt_enabled.ompt_callback_mutex_acquired) { 2621 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2622 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2623 } 2624 #endif 2625 2626 #endif // KMP_USE_DYNAMIC_LOCK 2627 } 2628 2629 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2630 #if KMP_USE_DYNAMIC_LOCK 2631 2632 #if USE_ITT_BUILD 2633 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2634 #endif 2635 #if OMPT_SUPPORT && OMPT_OPTIONAL 2636 // This is the case, if called from omp_init_lock_with_hint: 2637 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2638 if (!codeptr) 2639 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2640 if (ompt_enabled.enabled) { 2641 if (ompt_enabled.ompt_callback_mutex_acquire) { 2642 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2643 ompt_mutex_nest_lock, omp_lock_hint_none, 2644 __ompt_get_mutex_impl_type(user_lock), 2645 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2646 } 2647 } 2648 #endif 2649 int acquire_status = 2650 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2651 (void) acquire_status; 2652 #if USE_ITT_BUILD 2653 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2654 #endif 2655 2656 #if OMPT_SUPPORT && OMPT_OPTIONAL 2657 if (ompt_enabled.enabled) { 2658 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2659 if (ompt_enabled.ompt_callback_mutex_acquired) { 2660 // lock_first 2661 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2662 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2663 codeptr); 2664 } 2665 } else { 2666 if (ompt_enabled.ompt_callback_nest_lock) { 2667 // lock_next 2668 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2669 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2670 } 2671 } 2672 } 2673 #endif 2674 2675 #else // KMP_USE_DYNAMIC_LOCK 2676 int acquire_status; 2677 kmp_user_lock_p lck; 2678 2679 if ((__kmp_user_lock_kind == lk_tas) && 2680 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2681 OMP_NEST_LOCK_T_SIZE)) { 2682 lck = (kmp_user_lock_p)user_lock; 2683 } 2684 #if KMP_USE_FUTEX 2685 else if ((__kmp_user_lock_kind == lk_futex) && 2686 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2687 OMP_NEST_LOCK_T_SIZE)) { 2688 lck = (kmp_user_lock_p)user_lock; 2689 } 2690 #endif 2691 else { 2692 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2693 } 2694 2695 #if USE_ITT_BUILD 2696 __kmp_itt_lock_acquiring(lck); 2697 #endif /* USE_ITT_BUILD */ 2698 #if OMPT_SUPPORT && OMPT_OPTIONAL 2699 // This is the case, if called from omp_init_lock_with_hint: 2700 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2701 if (!codeptr) 2702 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2703 if (ompt_enabled.enabled) { 2704 if (ompt_enabled.ompt_callback_mutex_acquire) { 2705 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2706 ompt_mutex_nest_lock, omp_lock_hint_none, 2707 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2708 codeptr); 2709 } 2710 } 2711 #endif 2712 2713 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2714 2715 #if USE_ITT_BUILD 2716 __kmp_itt_lock_acquired(lck); 2717 #endif /* USE_ITT_BUILD */ 2718 2719 #if OMPT_SUPPORT && OMPT_OPTIONAL 2720 if (ompt_enabled.enabled) { 2721 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2722 if (ompt_enabled.ompt_callback_mutex_acquired) { 2723 // lock_first 2724 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2725 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2726 } 2727 } else { 2728 if (ompt_enabled.ompt_callback_nest_lock) { 2729 // lock_next 2730 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2731 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2732 } 2733 } 2734 } 2735 #endif 2736 2737 #endif // KMP_USE_DYNAMIC_LOCK 2738 } 2739 2740 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2741 #if KMP_USE_DYNAMIC_LOCK 2742 2743 int tag = KMP_EXTRACT_D_TAG(user_lock); 2744 #if USE_ITT_BUILD 2745 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2746 #endif 2747 #if KMP_USE_INLINED_TAS 2748 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2749 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2750 } else 2751 #elif KMP_USE_INLINED_FUTEX 2752 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2753 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2754 } else 2755 #endif 2756 { 2757 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2758 } 2759 2760 #if OMPT_SUPPORT && OMPT_OPTIONAL 2761 // This is the case, if called from omp_init_lock_with_hint: 2762 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2763 if (!codeptr) 2764 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2765 if (ompt_enabled.ompt_callback_mutex_released) { 2766 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2767 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2768 } 2769 #endif 2770 2771 #else // KMP_USE_DYNAMIC_LOCK 2772 2773 kmp_user_lock_p lck; 2774 2775 /* Can't use serial interval since not block structured */ 2776 /* release the lock */ 2777 2778 if ((__kmp_user_lock_kind == lk_tas) && 2779 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2780 #if KMP_OS_LINUX && \ 2781 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2782 // "fast" path implemented to fix customer performance issue 2783 #if USE_ITT_BUILD 2784 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2785 #endif /* USE_ITT_BUILD */ 2786 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2787 KMP_MB(); 2788 2789 #if OMPT_SUPPORT && OMPT_OPTIONAL 2790 // This is the case, if called from omp_init_lock_with_hint: 2791 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2792 if (!codeptr) 2793 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2794 if (ompt_enabled.ompt_callback_mutex_released) { 2795 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2796 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2797 } 2798 #endif 2799 2800 return; 2801 #else 2802 lck = (kmp_user_lock_p)user_lock; 2803 #endif 2804 } 2805 #if KMP_USE_FUTEX 2806 else if ((__kmp_user_lock_kind == lk_futex) && 2807 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2808 lck = (kmp_user_lock_p)user_lock; 2809 } 2810 #endif 2811 else { 2812 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2813 } 2814 2815 #if USE_ITT_BUILD 2816 __kmp_itt_lock_releasing(lck); 2817 #endif /* USE_ITT_BUILD */ 2818 2819 RELEASE_LOCK(lck, gtid); 2820 2821 #if OMPT_SUPPORT && OMPT_OPTIONAL 2822 // This is the case, if called from omp_init_lock_with_hint: 2823 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2824 if (!codeptr) 2825 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2826 if (ompt_enabled.ompt_callback_mutex_released) { 2827 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2828 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2829 } 2830 #endif 2831 2832 #endif // KMP_USE_DYNAMIC_LOCK 2833 } 2834 2835 /* release the lock */ 2836 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2837 #if KMP_USE_DYNAMIC_LOCK 2838 2839 #if USE_ITT_BUILD 2840 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2841 #endif 2842 int release_status = 2843 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2844 (void) release_status; 2845 2846 #if OMPT_SUPPORT && OMPT_OPTIONAL 2847 // This is the case, if called from omp_init_lock_with_hint: 2848 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2849 if (!codeptr) 2850 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2851 if (ompt_enabled.enabled) { 2852 if (release_status == KMP_LOCK_RELEASED) { 2853 if (ompt_enabled.ompt_callback_mutex_released) { 2854 // release_lock_last 2855 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2856 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2857 codeptr); 2858 } 2859 } else if (ompt_enabled.ompt_callback_nest_lock) { 2860 // release_lock_prev 2861 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2862 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2863 } 2864 } 2865 #endif 2866 2867 #else // KMP_USE_DYNAMIC_LOCK 2868 2869 kmp_user_lock_p lck; 2870 2871 /* Can't use serial interval since not block structured */ 2872 2873 if ((__kmp_user_lock_kind == lk_tas) && 2874 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2875 OMP_NEST_LOCK_T_SIZE)) { 2876 #if KMP_OS_LINUX && \ 2877 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2878 // "fast" path implemented to fix customer performance issue 2879 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2880 #if USE_ITT_BUILD 2881 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2882 #endif /* USE_ITT_BUILD */ 2883 2884 #if OMPT_SUPPORT && OMPT_OPTIONAL 2885 int release_status = KMP_LOCK_STILL_HELD; 2886 #endif 2887 2888 if (--(tl->lk.depth_locked) == 0) { 2889 TCW_4(tl->lk.poll, 0); 2890 #if OMPT_SUPPORT && OMPT_OPTIONAL 2891 release_status = KMP_LOCK_RELEASED; 2892 #endif 2893 } 2894 KMP_MB(); 2895 2896 #if OMPT_SUPPORT && OMPT_OPTIONAL 2897 // This is the case, if called from omp_init_lock_with_hint: 2898 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2899 if (!codeptr) 2900 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2901 if (ompt_enabled.enabled) { 2902 if (release_status == KMP_LOCK_RELEASED) { 2903 if (ompt_enabled.ompt_callback_mutex_released) { 2904 // release_lock_last 2905 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2906 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2907 } 2908 } else if (ompt_enabled.ompt_callback_nest_lock) { 2909 // release_lock_previous 2910 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2911 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2912 } 2913 } 2914 #endif 2915 2916 return; 2917 #else 2918 lck = (kmp_user_lock_p)user_lock; 2919 #endif 2920 } 2921 #if KMP_USE_FUTEX 2922 else if ((__kmp_user_lock_kind == lk_futex) && 2923 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2924 OMP_NEST_LOCK_T_SIZE)) { 2925 lck = (kmp_user_lock_p)user_lock; 2926 } 2927 #endif 2928 else { 2929 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 2930 } 2931 2932 #if USE_ITT_BUILD 2933 __kmp_itt_lock_releasing(lck); 2934 #endif /* USE_ITT_BUILD */ 2935 2936 int release_status; 2937 release_status = RELEASE_NESTED_LOCK(lck, gtid); 2938 #if OMPT_SUPPORT && OMPT_OPTIONAL 2939 // This is the case, if called from omp_init_lock_with_hint: 2940 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2941 if (!codeptr) 2942 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2943 if (ompt_enabled.enabled) { 2944 if (release_status == KMP_LOCK_RELEASED) { 2945 if (ompt_enabled.ompt_callback_mutex_released) { 2946 // release_lock_last 2947 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2948 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2949 } 2950 } else if (ompt_enabled.ompt_callback_nest_lock) { 2951 // release_lock_previous 2952 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2953 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2954 } 2955 } 2956 #endif 2957 2958 #endif // KMP_USE_DYNAMIC_LOCK 2959 } 2960 2961 /* try to acquire the lock */ 2962 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2963 KMP_COUNT_BLOCK(OMP_test_lock); 2964 2965 #if KMP_USE_DYNAMIC_LOCK 2966 int rc; 2967 int tag = KMP_EXTRACT_D_TAG(user_lock); 2968 #if USE_ITT_BUILD 2969 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2970 #endif 2971 #if OMPT_SUPPORT && OMPT_OPTIONAL 2972 // This is the case, if called from omp_init_lock_with_hint: 2973 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2974 if (!codeptr) 2975 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2976 if (ompt_enabled.ompt_callback_mutex_acquire) { 2977 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2978 ompt_mutex_lock, omp_lock_hint_none, 2979 __ompt_get_mutex_impl_type(user_lock), 2980 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2981 } 2982 #endif 2983 #if KMP_USE_INLINED_TAS 2984 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2985 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 2986 } else 2987 #elif KMP_USE_INLINED_FUTEX 2988 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2989 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 2990 } else 2991 #endif 2992 { 2993 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2994 } 2995 if (rc) { 2996 #if USE_ITT_BUILD 2997 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2998 #endif 2999 #if OMPT_SUPPORT && OMPT_OPTIONAL 3000 if (ompt_enabled.ompt_callback_mutex_acquired) { 3001 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3002 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3003 } 3004 #endif 3005 return FTN_TRUE; 3006 } else { 3007 #if USE_ITT_BUILD 3008 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3009 #endif 3010 return FTN_FALSE; 3011 } 3012 3013 #else // KMP_USE_DYNAMIC_LOCK 3014 3015 kmp_user_lock_p lck; 3016 int rc; 3017 3018 if ((__kmp_user_lock_kind == lk_tas) && 3019 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3020 lck = (kmp_user_lock_p)user_lock; 3021 } 3022 #if KMP_USE_FUTEX 3023 else if ((__kmp_user_lock_kind == lk_futex) && 3024 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3025 lck = (kmp_user_lock_p)user_lock; 3026 } 3027 #endif 3028 else { 3029 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3030 } 3031 3032 #if USE_ITT_BUILD 3033 __kmp_itt_lock_acquiring(lck); 3034 #endif /* USE_ITT_BUILD */ 3035 #if OMPT_SUPPORT && OMPT_OPTIONAL 3036 // This is the case, if called from omp_init_lock_with_hint: 3037 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3038 if (!codeptr) 3039 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3040 if (ompt_enabled.ompt_callback_mutex_acquire) { 3041 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3042 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3043 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3044 } 3045 #endif 3046 3047 rc = TEST_LOCK(lck, gtid); 3048 #if USE_ITT_BUILD 3049 if (rc) { 3050 __kmp_itt_lock_acquired(lck); 3051 } else { 3052 __kmp_itt_lock_cancelled(lck); 3053 } 3054 #endif /* USE_ITT_BUILD */ 3055 #if OMPT_SUPPORT && OMPT_OPTIONAL 3056 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3057 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3058 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3059 } 3060 #endif 3061 3062 return (rc ? FTN_TRUE : FTN_FALSE); 3063 3064 /* Can't use serial interval since not block structured */ 3065 3066 #endif // KMP_USE_DYNAMIC_LOCK 3067 } 3068 3069 /* try to acquire the lock */ 3070 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3071 #if KMP_USE_DYNAMIC_LOCK 3072 int rc; 3073 #if USE_ITT_BUILD 3074 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3075 #endif 3076 #if OMPT_SUPPORT && OMPT_OPTIONAL 3077 // This is the case, if called from omp_init_lock_with_hint: 3078 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3079 if (!codeptr) 3080 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3081 if (ompt_enabled.ompt_callback_mutex_acquire) { 3082 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3083 ompt_mutex_nest_lock, omp_lock_hint_none, 3084 __ompt_get_mutex_impl_type(user_lock), 3085 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3086 } 3087 #endif 3088 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3089 #if USE_ITT_BUILD 3090 if (rc) { 3091 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3092 } else { 3093 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3094 } 3095 #endif 3096 #if OMPT_SUPPORT && OMPT_OPTIONAL 3097 if (ompt_enabled.enabled && rc) { 3098 if (rc == 1) { 3099 if (ompt_enabled.ompt_callback_mutex_acquired) { 3100 // lock_first 3101 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3102 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3103 codeptr); 3104 } 3105 } else { 3106 if (ompt_enabled.ompt_callback_nest_lock) { 3107 // lock_next 3108 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3109 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3110 } 3111 } 3112 } 3113 #endif 3114 return rc; 3115 3116 #else // KMP_USE_DYNAMIC_LOCK 3117 3118 kmp_user_lock_p lck; 3119 int rc; 3120 3121 if ((__kmp_user_lock_kind == lk_tas) && 3122 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3123 OMP_NEST_LOCK_T_SIZE)) { 3124 lck = (kmp_user_lock_p)user_lock; 3125 } 3126 #if KMP_USE_FUTEX 3127 else if ((__kmp_user_lock_kind == lk_futex) && 3128 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3129 OMP_NEST_LOCK_T_SIZE)) { 3130 lck = (kmp_user_lock_p)user_lock; 3131 } 3132 #endif 3133 else { 3134 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3135 } 3136 3137 #if USE_ITT_BUILD 3138 __kmp_itt_lock_acquiring(lck); 3139 #endif /* USE_ITT_BUILD */ 3140 3141 #if OMPT_SUPPORT && OMPT_OPTIONAL 3142 // This is the case, if called from omp_init_lock_with_hint: 3143 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3144 if (!codeptr) 3145 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3146 if (ompt_enabled.enabled) && 3147 ompt_enabled.ompt_callback_mutex_acquire) { 3148 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3149 ompt_mutex_nest_lock, omp_lock_hint_none, 3150 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3151 codeptr); 3152 } 3153 #endif 3154 3155 rc = TEST_NESTED_LOCK(lck, gtid); 3156 #if USE_ITT_BUILD 3157 if (rc) { 3158 __kmp_itt_lock_acquired(lck); 3159 } else { 3160 __kmp_itt_lock_cancelled(lck); 3161 } 3162 #endif /* USE_ITT_BUILD */ 3163 #if OMPT_SUPPORT && OMPT_OPTIONAL 3164 if (ompt_enabled.enabled && rc) { 3165 if (rc == 1) { 3166 if (ompt_enabled.ompt_callback_mutex_acquired) { 3167 // lock_first 3168 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3169 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3170 } 3171 } else { 3172 if (ompt_enabled.ompt_callback_nest_lock) { 3173 // lock_next 3174 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3175 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3176 } 3177 } 3178 } 3179 #endif 3180 return rc; 3181 3182 /* Can't use serial interval since not block structured */ 3183 3184 #endif // KMP_USE_DYNAMIC_LOCK 3185 } 3186 3187 // Interface to fast scalable reduce methods routines 3188 3189 // keep the selected method in a thread local structure for cross-function 3190 // usage: will be used in __kmpc_end_reduce* functions; 3191 // another solution: to re-determine the method one more time in 3192 // __kmpc_end_reduce* functions (new prototype required then) 3193 // AT: which solution is better? 3194 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3195 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3196 3197 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3198 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3199 3200 // description of the packed_reduction_method variable: look at the macros in 3201 // kmp.h 3202 3203 // used in a critical section reduce block 3204 static __forceinline void 3205 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3206 kmp_critical_name *crit) { 3207 3208 // this lock was visible to a customer and to the threading profile tool as a 3209 // serial overhead span (although it's used for an internal purpose only) 3210 // why was it visible in previous implementation? 3211 // should we keep it visible in new reduce block? 3212 kmp_user_lock_p lck; 3213 3214 #if KMP_USE_DYNAMIC_LOCK 3215 3216 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3217 // Check if it is initialized. 3218 if (*lk == 0) { 3219 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3220 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3221 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3222 } else { 3223 __kmp_init_indirect_csptr(crit, loc, global_tid, 3224 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3225 } 3226 } 3227 // Branch for accessing the actual lock object and set operation. This 3228 // branching is inevitable since this lock initialization does not follow the 3229 // normal dispatch path (lock table is not used). 3230 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3231 lck = (kmp_user_lock_p)lk; 3232 KMP_DEBUG_ASSERT(lck != NULL); 3233 if (__kmp_env_consistency_check) { 3234 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3235 } 3236 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3237 } else { 3238 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3239 lck = ilk->lock; 3240 KMP_DEBUG_ASSERT(lck != NULL); 3241 if (__kmp_env_consistency_check) { 3242 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3243 } 3244 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3245 } 3246 3247 #else // KMP_USE_DYNAMIC_LOCK 3248 3249 // We know that the fast reduction code is only emitted by Intel compilers 3250 // with 32 byte critical sections. If there isn't enough space, then we 3251 // have to use a pointer. 3252 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3253 lck = (kmp_user_lock_p)crit; 3254 } else { 3255 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3256 } 3257 KMP_DEBUG_ASSERT(lck != NULL); 3258 3259 if (__kmp_env_consistency_check) 3260 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3261 3262 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3263 3264 #endif // KMP_USE_DYNAMIC_LOCK 3265 } 3266 3267 // used in a critical section reduce block 3268 static __forceinline void 3269 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3270 kmp_critical_name *crit) { 3271 3272 kmp_user_lock_p lck; 3273 3274 #if KMP_USE_DYNAMIC_LOCK 3275 3276 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3277 lck = (kmp_user_lock_p)crit; 3278 if (__kmp_env_consistency_check) 3279 __kmp_pop_sync(global_tid, ct_critical, loc); 3280 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3281 } else { 3282 kmp_indirect_lock_t *ilk = 3283 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3284 if (__kmp_env_consistency_check) 3285 __kmp_pop_sync(global_tid, ct_critical, loc); 3286 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3287 } 3288 3289 #else // KMP_USE_DYNAMIC_LOCK 3290 3291 // We know that the fast reduction code is only emitted by Intel compilers 3292 // with 32 byte critical sections. If there isn't enough space, then we have 3293 // to use a pointer. 3294 if (__kmp_base_user_lock_size > 32) { 3295 lck = *((kmp_user_lock_p *)crit); 3296 KMP_ASSERT(lck != NULL); 3297 } else { 3298 lck = (kmp_user_lock_p)crit; 3299 } 3300 3301 if (__kmp_env_consistency_check) 3302 __kmp_pop_sync(global_tid, ct_critical, loc); 3303 3304 __kmp_release_user_lock_with_checks(lck, global_tid); 3305 3306 #endif // KMP_USE_DYNAMIC_LOCK 3307 } // __kmp_end_critical_section_reduce_block 3308 3309 static __forceinline int 3310 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3311 int *task_state) { 3312 kmp_team_t *team; 3313 3314 // Check if we are inside the teams construct? 3315 if (th->th.th_teams_microtask) { 3316 *team_p = team = th->th.th_team; 3317 if (team->t.t_level == th->th.th_teams_level) { 3318 // This is reduction at teams construct. 3319 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3320 // Let's swap teams temporarily for the reduction. 3321 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3322 th->th.th_team = team->t.t_parent; 3323 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3324 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3325 *task_state = th->th.th_task_state; 3326 th->th.th_task_state = 0; 3327 3328 return 1; 3329 } 3330 } 3331 return 0; 3332 } 3333 3334 static __forceinline void 3335 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3336 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3337 th->th.th_info.ds.ds_tid = 0; 3338 th->th.th_team = team; 3339 th->th.th_team_nproc = team->t.t_nproc; 3340 th->th.th_task_team = team->t.t_task_team[task_state]; 3341 th->th.th_task_state = task_state; 3342 } 3343 3344 /* 2.a.i. Reduce Block without a terminating barrier */ 3345 /*! 3346 @ingroup SYNCHRONIZATION 3347 @param loc source location information 3348 @param global_tid global thread number 3349 @param num_vars number of items (variables) to be reduced 3350 @param reduce_size size of data in bytes to be reduced 3351 @param reduce_data pointer to data to be reduced 3352 @param reduce_func callback function providing reduction operation on two 3353 operands and returning result of reduction in lhs_data 3354 @param lck pointer to the unique lock data structure 3355 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3356 threads if atomic reduction needed 3357 3358 The nowait version is used for a reduce clause with the nowait argument. 3359 */ 3360 kmp_int32 3361 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3362 size_t reduce_size, void *reduce_data, 3363 void (*reduce_func)(void *lhs_data, void *rhs_data), 3364 kmp_critical_name *lck) { 3365 3366 KMP_COUNT_BLOCK(REDUCE_nowait); 3367 int retval = 0; 3368 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3369 kmp_info_t *th; 3370 kmp_team_t *team; 3371 int teams_swapped = 0, task_state; 3372 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3373 3374 // why do we need this initialization here at all? 3375 // Reduction clause can not be used as a stand-alone directive. 3376 3377 // do not call __kmp_serial_initialize(), it will be called by 3378 // __kmp_parallel_initialize() if needed 3379 // possible detection of false-positive race by the threadchecker ??? 3380 if (!TCR_4(__kmp_init_parallel)) 3381 __kmp_parallel_initialize(); 3382 3383 __kmp_resume_if_soft_paused(); 3384 3385 // check correctness of reduce block nesting 3386 #if KMP_USE_DYNAMIC_LOCK 3387 if (__kmp_env_consistency_check) 3388 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3389 #else 3390 if (__kmp_env_consistency_check) 3391 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3392 #endif 3393 3394 th = __kmp_thread_from_gtid(global_tid); 3395 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3396 3397 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3398 // the value should be kept in a variable 3399 // the variable should be either a construct-specific or thread-specific 3400 // property, not a team specific property 3401 // (a thread can reach the next reduce block on the next construct, reduce 3402 // method may differ on the next construct) 3403 // an ident_t "loc" parameter could be used as a construct-specific property 3404 // (what if loc == 0?) 3405 // (if both construct-specific and team-specific variables were shared, 3406 // then unness extra syncs should be needed) 3407 // a thread-specific variable is better regarding two issues above (next 3408 // construct and extra syncs) 3409 // a thread-specific "th_local.reduction_method" variable is used currently 3410 // each thread executes 'determine' and 'set' lines (no need to execute by one 3411 // thread, to avoid unness extra syncs) 3412 3413 packed_reduction_method = __kmp_determine_reduction_method( 3414 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3415 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3416 3417 OMPT_REDUCTION_DECL(th, global_tid); 3418 if (packed_reduction_method == critical_reduce_block) { 3419 3420 OMPT_REDUCTION_BEGIN; 3421 3422 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3423 retval = 1; 3424 3425 } else if (packed_reduction_method == empty_reduce_block) { 3426 3427 OMPT_REDUCTION_BEGIN; 3428 3429 // usage: if team size == 1, no synchronization is required ( Intel 3430 // platforms only ) 3431 retval = 1; 3432 3433 } else if (packed_reduction_method == atomic_reduce_block) { 3434 3435 retval = 2; 3436 3437 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3438 // won't be called by the code gen) 3439 // (it's not quite good, because the checking block has been closed by 3440 // this 'pop', 3441 // but atomic operation has not been executed yet, will be executed 3442 // slightly later, literally on next instruction) 3443 if (__kmp_env_consistency_check) 3444 __kmp_pop_sync(global_tid, ct_reduce, loc); 3445 3446 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3447 tree_reduce_block)) { 3448 3449 // AT: performance issue: a real barrier here 3450 // AT: (if master goes slow, other threads are blocked here waiting for the 3451 // master to come and release them) 3452 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3453 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3454 // be confusing to a customer) 3455 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3456 // might go faster and be more in line with sense of NOWAIT 3457 // AT: TO DO: do epcc test and compare times 3458 3459 // this barrier should be invisible to a customer and to the threading profile 3460 // tool (it's neither a terminating barrier nor customer's code, it's 3461 // used for an internal purpose) 3462 #if OMPT_SUPPORT 3463 // JP: can this barrier potentially leed to task scheduling? 3464 // JP: as long as there is a barrier in the implementation, OMPT should and 3465 // will provide the barrier events 3466 // so we set-up the necessary frame/return addresses. 3467 ompt_frame_t *ompt_frame; 3468 if (ompt_enabled.enabled) { 3469 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3470 if (ompt_frame->enter_frame.ptr == NULL) 3471 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3472 OMPT_STORE_RETURN_ADDRESS(global_tid); 3473 } 3474 #endif 3475 #if USE_ITT_NOTIFY 3476 __kmp_threads[global_tid]->th.th_ident = loc; 3477 #endif 3478 retval = 3479 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3480 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3481 retval = (retval != 0) ? (0) : (1); 3482 #if OMPT_SUPPORT && OMPT_OPTIONAL 3483 if (ompt_enabled.enabled) { 3484 ompt_frame->enter_frame = ompt_data_none; 3485 } 3486 #endif 3487 3488 // all other workers except master should do this pop here 3489 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3490 if (__kmp_env_consistency_check) { 3491 if (retval == 0) { 3492 __kmp_pop_sync(global_tid, ct_reduce, loc); 3493 } 3494 } 3495 3496 } else { 3497 3498 // should never reach this block 3499 KMP_ASSERT(0); // "unexpected method" 3500 } 3501 if (teams_swapped) { 3502 __kmp_restore_swapped_teams(th, team, task_state); 3503 } 3504 KA_TRACE( 3505 10, 3506 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3507 global_tid, packed_reduction_method, retval)); 3508 3509 return retval; 3510 } 3511 3512 /*! 3513 @ingroup SYNCHRONIZATION 3514 @param loc source location information 3515 @param global_tid global thread id. 3516 @param lck pointer to the unique lock data structure 3517 3518 Finish the execution of a reduce nowait. 3519 */ 3520 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3521 kmp_critical_name *lck) { 3522 3523 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3524 3525 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3526 3527 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3528 3529 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3530 3531 if (packed_reduction_method == critical_reduce_block) { 3532 3533 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3534 OMPT_REDUCTION_END; 3535 3536 } else if (packed_reduction_method == empty_reduce_block) { 3537 3538 // usage: if team size == 1, no synchronization is required ( on Intel 3539 // platforms only ) 3540 3541 OMPT_REDUCTION_END; 3542 3543 } else if (packed_reduction_method == atomic_reduce_block) { 3544 3545 // neither master nor other workers should get here 3546 // (code gen does not generate this call in case 2: atomic reduce block) 3547 // actually it's better to remove this elseif at all; 3548 // after removal this value will checked by the 'else' and will assert 3549 3550 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3551 tree_reduce_block)) { 3552 3553 // only master gets here 3554 // OMPT: tree reduction is annotated in the barrier code 3555 3556 } else { 3557 3558 // should never reach this block 3559 KMP_ASSERT(0); // "unexpected method" 3560 } 3561 3562 if (__kmp_env_consistency_check) 3563 __kmp_pop_sync(global_tid, ct_reduce, loc); 3564 3565 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3566 global_tid, packed_reduction_method)); 3567 3568 return; 3569 } 3570 3571 /* 2.a.ii. Reduce Block with a terminating barrier */ 3572 3573 /*! 3574 @ingroup SYNCHRONIZATION 3575 @param loc source location information 3576 @param global_tid global thread number 3577 @param num_vars number of items (variables) to be reduced 3578 @param reduce_size size of data in bytes to be reduced 3579 @param reduce_data pointer to data to be reduced 3580 @param reduce_func callback function providing reduction operation on two 3581 operands and returning result of reduction in lhs_data 3582 @param lck pointer to the unique lock data structure 3583 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3584 threads if atomic reduction needed 3585 3586 A blocking reduce that includes an implicit barrier. 3587 */ 3588 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3589 size_t reduce_size, void *reduce_data, 3590 void (*reduce_func)(void *lhs_data, void *rhs_data), 3591 kmp_critical_name *lck) { 3592 KMP_COUNT_BLOCK(REDUCE_wait); 3593 int retval = 0; 3594 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3595 kmp_info_t *th; 3596 kmp_team_t *team; 3597 int teams_swapped = 0, task_state; 3598 3599 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3600 3601 // why do we need this initialization here at all? 3602 // Reduction clause can not be a stand-alone directive. 3603 3604 // do not call __kmp_serial_initialize(), it will be called by 3605 // __kmp_parallel_initialize() if needed 3606 // possible detection of false-positive race by the threadchecker ??? 3607 if (!TCR_4(__kmp_init_parallel)) 3608 __kmp_parallel_initialize(); 3609 3610 __kmp_resume_if_soft_paused(); 3611 3612 // check correctness of reduce block nesting 3613 #if KMP_USE_DYNAMIC_LOCK 3614 if (__kmp_env_consistency_check) 3615 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3616 #else 3617 if (__kmp_env_consistency_check) 3618 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3619 #endif 3620 3621 th = __kmp_thread_from_gtid(global_tid); 3622 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3623 3624 packed_reduction_method = __kmp_determine_reduction_method( 3625 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3626 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3627 3628 OMPT_REDUCTION_DECL(th, global_tid); 3629 3630 if (packed_reduction_method == critical_reduce_block) { 3631 3632 OMPT_REDUCTION_BEGIN; 3633 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3634 retval = 1; 3635 3636 } else if (packed_reduction_method == empty_reduce_block) { 3637 3638 OMPT_REDUCTION_BEGIN; 3639 // usage: if team size == 1, no synchronization is required ( Intel 3640 // platforms only ) 3641 retval = 1; 3642 3643 } else if (packed_reduction_method == atomic_reduce_block) { 3644 3645 retval = 2; 3646 3647 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3648 tree_reduce_block)) { 3649 3650 // case tree_reduce_block: 3651 // this barrier should be visible to a customer and to the threading profile 3652 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3653 #if OMPT_SUPPORT 3654 ompt_frame_t *ompt_frame; 3655 if (ompt_enabled.enabled) { 3656 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3657 if (ompt_frame->enter_frame.ptr == NULL) 3658 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3659 OMPT_STORE_RETURN_ADDRESS(global_tid); 3660 } 3661 #endif 3662 #if USE_ITT_NOTIFY 3663 __kmp_threads[global_tid]->th.th_ident = 3664 loc; // needed for correct notification of frames 3665 #endif 3666 retval = 3667 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3668 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3669 retval = (retval != 0) ? (0) : (1); 3670 #if OMPT_SUPPORT && OMPT_OPTIONAL 3671 if (ompt_enabled.enabled) { 3672 ompt_frame->enter_frame = ompt_data_none; 3673 } 3674 #endif 3675 3676 // all other workers except master should do this pop here 3677 // ( none of other workers except master will enter __kmpc_end_reduce() ) 3678 if (__kmp_env_consistency_check) { 3679 if (retval == 0) { // 0: all other workers; 1: master 3680 __kmp_pop_sync(global_tid, ct_reduce, loc); 3681 } 3682 } 3683 3684 } else { 3685 3686 // should never reach this block 3687 KMP_ASSERT(0); // "unexpected method" 3688 } 3689 if (teams_swapped) { 3690 __kmp_restore_swapped_teams(th, team, task_state); 3691 } 3692 3693 KA_TRACE(10, 3694 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3695 global_tid, packed_reduction_method, retval)); 3696 return retval; 3697 } 3698 3699 /*! 3700 @ingroup SYNCHRONIZATION 3701 @param loc source location information 3702 @param global_tid global thread id. 3703 @param lck pointer to the unique lock data structure 3704 3705 Finish the execution of a blocking reduce. 3706 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3707 start function. 3708 */ 3709 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3710 kmp_critical_name *lck) { 3711 3712 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3713 kmp_info_t *th; 3714 kmp_team_t *team; 3715 int teams_swapped = 0, task_state; 3716 3717 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3718 3719 th = __kmp_thread_from_gtid(global_tid); 3720 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3721 3722 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3723 3724 // this barrier should be visible to a customer and to the threading profile 3725 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3726 OMPT_REDUCTION_DECL(th, global_tid); 3727 3728 if (packed_reduction_method == critical_reduce_block) { 3729 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3730 3731 OMPT_REDUCTION_END; 3732 3733 // TODO: implicit barrier: should be exposed 3734 #if OMPT_SUPPORT 3735 ompt_frame_t *ompt_frame; 3736 if (ompt_enabled.enabled) { 3737 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3738 if (ompt_frame->enter_frame.ptr == NULL) 3739 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3740 OMPT_STORE_RETURN_ADDRESS(global_tid); 3741 } 3742 #endif 3743 #if USE_ITT_NOTIFY 3744 __kmp_threads[global_tid]->th.th_ident = loc; 3745 #endif 3746 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3747 #if OMPT_SUPPORT && OMPT_OPTIONAL 3748 if (ompt_enabled.enabled) { 3749 ompt_frame->enter_frame = ompt_data_none; 3750 } 3751 #endif 3752 3753 } else if (packed_reduction_method == empty_reduce_block) { 3754 3755 OMPT_REDUCTION_END; 3756 3757 // usage: if team size==1, no synchronization is required (Intel platforms only) 3758 3759 // TODO: implicit barrier: should be exposed 3760 #if OMPT_SUPPORT 3761 ompt_frame_t *ompt_frame; 3762 if (ompt_enabled.enabled) { 3763 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3764 if (ompt_frame->enter_frame.ptr == NULL) 3765 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3766 OMPT_STORE_RETURN_ADDRESS(global_tid); 3767 } 3768 #endif 3769 #if USE_ITT_NOTIFY 3770 __kmp_threads[global_tid]->th.th_ident = loc; 3771 #endif 3772 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3773 #if OMPT_SUPPORT && OMPT_OPTIONAL 3774 if (ompt_enabled.enabled) { 3775 ompt_frame->enter_frame = ompt_data_none; 3776 } 3777 #endif 3778 3779 } else if (packed_reduction_method == atomic_reduce_block) { 3780 3781 #if OMPT_SUPPORT 3782 ompt_frame_t *ompt_frame; 3783 if (ompt_enabled.enabled) { 3784 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3785 if (ompt_frame->enter_frame.ptr == NULL) 3786 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3787 OMPT_STORE_RETURN_ADDRESS(global_tid); 3788 } 3789 #endif 3790 // TODO: implicit barrier: should be exposed 3791 #if USE_ITT_NOTIFY 3792 __kmp_threads[global_tid]->th.th_ident = loc; 3793 #endif 3794 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3795 #if OMPT_SUPPORT && OMPT_OPTIONAL 3796 if (ompt_enabled.enabled) { 3797 ompt_frame->enter_frame = ompt_data_none; 3798 } 3799 #endif 3800 3801 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3802 tree_reduce_block)) { 3803 3804 // only master executes here (master releases all other workers) 3805 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3806 global_tid); 3807 3808 } else { 3809 3810 // should never reach this block 3811 KMP_ASSERT(0); // "unexpected method" 3812 } 3813 if (teams_swapped) { 3814 __kmp_restore_swapped_teams(th, team, task_state); 3815 } 3816 3817 if (__kmp_env_consistency_check) 3818 __kmp_pop_sync(global_tid, ct_reduce, loc); 3819 3820 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3821 global_tid, packed_reduction_method)); 3822 3823 return; 3824 } 3825 3826 #undef __KMP_GET_REDUCTION_METHOD 3827 #undef __KMP_SET_REDUCTION_METHOD 3828 3829 /* end of interface to fast scalable reduce routines */ 3830 3831 kmp_uint64 __kmpc_get_taskid() { 3832 3833 kmp_int32 gtid; 3834 kmp_info_t *thread; 3835 3836 gtid = __kmp_get_gtid(); 3837 if (gtid < 0) { 3838 return 0; 3839 } 3840 thread = __kmp_thread_from_gtid(gtid); 3841 return thread->th.th_current_task->td_task_id; 3842 3843 } // __kmpc_get_taskid 3844 3845 kmp_uint64 __kmpc_get_parent_taskid() { 3846 3847 kmp_int32 gtid; 3848 kmp_info_t *thread; 3849 kmp_taskdata_t *parent_task; 3850 3851 gtid = __kmp_get_gtid(); 3852 if (gtid < 0) { 3853 return 0; 3854 } 3855 thread = __kmp_thread_from_gtid(gtid); 3856 parent_task = thread->th.th_current_task->td_parent; 3857 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3858 3859 } // __kmpc_get_parent_taskid 3860 3861 /*! 3862 @ingroup WORK_SHARING 3863 @param loc source location information. 3864 @param gtid global thread number. 3865 @param num_dims number of associated doacross loops. 3866 @param dims info on loops bounds. 3867 3868 Initialize doacross loop information. 3869 Expect compiler send us inclusive bounds, 3870 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3871 */ 3872 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3873 const struct kmp_dim *dims) { 3874 int j, idx; 3875 kmp_int64 last, trace_count; 3876 kmp_info_t *th = __kmp_threads[gtid]; 3877 kmp_team_t *team = th->th.th_team; 3878 kmp_uint32 *flags; 3879 kmp_disp_t *pr_buf = th->th.th_dispatch; 3880 dispatch_shared_info_t *sh_buf; 3881 3882 KA_TRACE( 3883 20, 3884 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 3885 gtid, num_dims, !team->t.t_serialized)); 3886 KMP_DEBUG_ASSERT(dims != NULL); 3887 KMP_DEBUG_ASSERT(num_dims > 0); 3888 3889 if (team->t.t_serialized) { 3890 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 3891 return; // no dependencies if team is serialized 3892 } 3893 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 3894 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 3895 // the next loop 3896 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 3897 3898 // Save bounds info into allocated private buffer 3899 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 3900 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 3901 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 3902 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 3903 pr_buf->th_doacross_info[0] = 3904 (kmp_int64)num_dims; // first element is number of dimensions 3905 // Save also address of num_done in order to access it later without knowing 3906 // the buffer index 3907 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 3908 pr_buf->th_doacross_info[2] = dims[0].lo; 3909 pr_buf->th_doacross_info[3] = dims[0].up; 3910 pr_buf->th_doacross_info[4] = dims[0].st; 3911 last = 5; 3912 for (j = 1; j < num_dims; ++j) { 3913 kmp_int64 3914 range_length; // To keep ranges of all dimensions but the first dims[0] 3915 if (dims[j].st == 1) { // most common case 3916 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 3917 range_length = dims[j].up - dims[j].lo + 1; 3918 } else { 3919 if (dims[j].st > 0) { 3920 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 3921 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 3922 } else { // negative increment 3923 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 3924 range_length = 3925 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 3926 } 3927 } 3928 pr_buf->th_doacross_info[last++] = range_length; 3929 pr_buf->th_doacross_info[last++] = dims[j].lo; 3930 pr_buf->th_doacross_info[last++] = dims[j].up; 3931 pr_buf->th_doacross_info[last++] = dims[j].st; 3932 } 3933 3934 // Compute total trip count. 3935 // Start with range of dims[0] which we don't need to keep in the buffer. 3936 if (dims[0].st == 1) { // most common case 3937 trace_count = dims[0].up - dims[0].lo + 1; 3938 } else if (dims[0].st > 0) { 3939 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 3940 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 3941 } else { // negative increment 3942 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 3943 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 3944 } 3945 for (j = 1; j < num_dims; ++j) { 3946 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 3947 } 3948 KMP_DEBUG_ASSERT(trace_count > 0); 3949 3950 // Check if shared buffer is not occupied by other loop (idx - 3951 // __kmp_dispatch_num_buffers) 3952 if (idx != sh_buf->doacross_buf_idx) { 3953 // Shared buffer is occupied, wait for it to be free 3954 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 3955 __kmp_eq_4, NULL); 3956 } 3957 #if KMP_32_BIT_ARCH 3958 // Check if we are the first thread. After the CAS the first thread gets 0, 3959 // others get 1 if initialization is in progress, allocated pointer otherwise. 3960 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 3961 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 3962 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 3963 #else 3964 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 3965 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 3966 #endif 3967 if (flags == NULL) { 3968 // we are the first thread, allocate the array of flags 3969 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration 3970 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 3971 KMP_MB(); 3972 sh_buf->doacross_flags = flags; 3973 } else if (flags == (kmp_uint32 *)1) { 3974 #if KMP_32_BIT_ARCH 3975 // initialization is still in progress, need to wait 3976 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 3977 #else 3978 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 3979 #endif 3980 KMP_YIELD(TRUE); 3981 KMP_MB(); 3982 } else { 3983 KMP_MB(); 3984 } 3985 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 3986 pr_buf->th_doacross_flags = 3987 sh_buf->doacross_flags; // save private copy in order to not 3988 // touch shared buffer on each iteration 3989 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 3990 } 3991 3992 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 3993 kmp_int32 shft, num_dims, i; 3994 kmp_uint32 flag; 3995 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 3996 kmp_info_t *th = __kmp_threads[gtid]; 3997 kmp_team_t *team = th->th.th_team; 3998 kmp_disp_t *pr_buf; 3999 kmp_int64 lo, up, st; 4000 4001 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4002 if (team->t.t_serialized) { 4003 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4004 return; // no dependencies if team is serialized 4005 } 4006 4007 // calculate sequential iteration number and check out-of-bounds condition 4008 pr_buf = th->th.th_dispatch; 4009 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4010 num_dims = pr_buf->th_doacross_info[0]; 4011 lo = pr_buf->th_doacross_info[2]; 4012 up = pr_buf->th_doacross_info[3]; 4013 st = pr_buf->th_doacross_info[4]; 4014 #if OMPT_SUPPORT && OMPT_OPTIONAL 4015 ompt_dependence_t deps[num_dims]; 4016 #endif 4017 if (st == 1) { // most common case 4018 if (vec[0] < lo || vec[0] > up) { 4019 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4020 "bounds [%lld,%lld]\n", 4021 gtid, vec[0], lo, up)); 4022 return; 4023 } 4024 iter_number = vec[0] - lo; 4025 } else if (st > 0) { 4026 if (vec[0] < lo || vec[0] > up) { 4027 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4028 "bounds [%lld,%lld]\n", 4029 gtid, vec[0], lo, up)); 4030 return; 4031 } 4032 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4033 } else { // negative increment 4034 if (vec[0] > lo || vec[0] < up) { 4035 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4036 "bounds [%lld,%lld]\n", 4037 gtid, vec[0], lo, up)); 4038 return; 4039 } 4040 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4041 } 4042 #if OMPT_SUPPORT && OMPT_OPTIONAL 4043 deps[0].variable.value = iter_number; 4044 deps[0].dependence_type = ompt_dependence_type_sink; 4045 #endif 4046 for (i = 1; i < num_dims; ++i) { 4047 kmp_int64 iter, ln; 4048 kmp_int32 j = i * 4; 4049 ln = pr_buf->th_doacross_info[j + 1]; 4050 lo = pr_buf->th_doacross_info[j + 2]; 4051 up = pr_buf->th_doacross_info[j + 3]; 4052 st = pr_buf->th_doacross_info[j + 4]; 4053 if (st == 1) { 4054 if (vec[i] < lo || vec[i] > up) { 4055 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4056 "bounds [%lld,%lld]\n", 4057 gtid, vec[i], lo, up)); 4058 return; 4059 } 4060 iter = vec[i] - lo; 4061 } else if (st > 0) { 4062 if (vec[i] < lo || vec[i] > up) { 4063 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4064 "bounds [%lld,%lld]\n", 4065 gtid, vec[i], lo, up)); 4066 return; 4067 } 4068 iter = (kmp_uint64)(vec[i] - lo) / st; 4069 } else { // st < 0 4070 if (vec[i] > lo || vec[i] < up) { 4071 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4072 "bounds [%lld,%lld]\n", 4073 gtid, vec[i], lo, up)); 4074 return; 4075 } 4076 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4077 } 4078 iter_number = iter + ln * iter_number; 4079 #if OMPT_SUPPORT && OMPT_OPTIONAL 4080 deps[i].variable.value = iter; 4081 deps[i].dependence_type = ompt_dependence_type_sink; 4082 #endif 4083 } 4084 shft = iter_number % 32; // use 32-bit granularity 4085 iter_number >>= 5; // divided by 32 4086 flag = 1 << shft; 4087 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4088 KMP_YIELD(TRUE); 4089 } 4090 KMP_MB(); 4091 #if OMPT_SUPPORT && OMPT_OPTIONAL 4092 if (ompt_enabled.ompt_callback_dependences) { 4093 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4094 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims); 4095 } 4096 #endif 4097 KA_TRACE(20, 4098 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4099 gtid, (iter_number << 5) + shft)); 4100 } 4101 4102 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4103 kmp_int32 shft, num_dims, i; 4104 kmp_uint32 flag; 4105 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4106 kmp_info_t *th = __kmp_threads[gtid]; 4107 kmp_team_t *team = th->th.th_team; 4108 kmp_disp_t *pr_buf; 4109 kmp_int64 lo, st; 4110 4111 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4112 if (team->t.t_serialized) { 4113 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4114 return; // no dependencies if team is serialized 4115 } 4116 4117 // calculate sequential iteration number (same as in "wait" but no 4118 // out-of-bounds checks) 4119 pr_buf = th->th.th_dispatch; 4120 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4121 num_dims = pr_buf->th_doacross_info[0]; 4122 lo = pr_buf->th_doacross_info[2]; 4123 st = pr_buf->th_doacross_info[4]; 4124 #if OMPT_SUPPORT && OMPT_OPTIONAL 4125 ompt_dependence_t deps[num_dims]; 4126 #endif 4127 if (st == 1) { // most common case 4128 iter_number = vec[0] - lo; 4129 } else if (st > 0) { 4130 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4131 } else { // negative increment 4132 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4133 } 4134 #if OMPT_SUPPORT && OMPT_OPTIONAL 4135 deps[0].variable.value = iter_number; 4136 deps[0].dependence_type = ompt_dependence_type_source; 4137 #endif 4138 for (i = 1; i < num_dims; ++i) { 4139 kmp_int64 iter, ln; 4140 kmp_int32 j = i * 4; 4141 ln = pr_buf->th_doacross_info[j + 1]; 4142 lo = pr_buf->th_doacross_info[j + 2]; 4143 st = pr_buf->th_doacross_info[j + 4]; 4144 if (st == 1) { 4145 iter = vec[i] - lo; 4146 } else if (st > 0) { 4147 iter = (kmp_uint64)(vec[i] - lo) / st; 4148 } else { // st < 0 4149 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4150 } 4151 iter_number = iter + ln * iter_number; 4152 #if OMPT_SUPPORT && OMPT_OPTIONAL 4153 deps[i].variable.value = iter; 4154 deps[i].dependence_type = ompt_dependence_type_source; 4155 #endif 4156 } 4157 #if OMPT_SUPPORT && OMPT_OPTIONAL 4158 if (ompt_enabled.ompt_callback_dependences) { 4159 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4160 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims); 4161 } 4162 #endif 4163 shft = iter_number % 32; // use 32-bit granularity 4164 iter_number >>= 5; // divided by 32 4165 flag = 1 << shft; 4166 KMP_MB(); 4167 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4168 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4169 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4170 (iter_number << 5) + shft)); 4171 } 4172 4173 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4174 kmp_int32 num_done; 4175 kmp_info_t *th = __kmp_threads[gtid]; 4176 kmp_team_t *team = th->th.th_team; 4177 kmp_disp_t *pr_buf = th->th.th_dispatch; 4178 4179 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4180 if (team->t.t_serialized) { 4181 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4182 return; // nothing to do 4183 } 4184 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1; 4185 if (num_done == th->th.th_team_nproc) { 4186 // we are the last thread, need to free shared resources 4187 int idx = pr_buf->th_doacross_buf_idx - 1; 4188 dispatch_shared_info_t *sh_buf = 4189 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4190 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4191 (kmp_int64)&sh_buf->doacross_num_done); 4192 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4193 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4194 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4195 sh_buf->doacross_flags = NULL; 4196 sh_buf->doacross_num_done = 0; 4197 sh_buf->doacross_buf_idx += 4198 __kmp_dispatch_num_buffers; // free buffer for future re-use 4199 } 4200 // free private resources (need to keep buffer index forever) 4201 pr_buf->th_doacross_flags = NULL; 4202 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4203 pr_buf->th_doacross_info = NULL; 4204 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4205 } 4206 4207 /* omp_alloc/omp_free only defined for C/C++, not for Fortran */ 4208 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4209 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); 4210 } 4211 4212 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4213 __kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4214 } 4215 4216 int __kmpc_get_target_offload(void) { 4217 if (!__kmp_init_serial) { 4218 __kmp_serial_initialize(); 4219 } 4220 return __kmp_target_offload; 4221 } 4222 4223 int __kmpc_pause_resource(kmp_pause_status_t level) { 4224 if (!__kmp_init_serial) { 4225 return 1; // Can't pause if runtime is not initialized 4226 } 4227 return __kmp_pause_resource(level); 4228 } 4229