1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 __kmp_assign_root_init_mask(); 43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 44 } else if (__kmp_ignore_mppbeg() == FALSE) { 45 // By default __kmp_ignore_mppbeg() returns TRUE. 46 __kmp_internal_begin(); 47 KC_TRACE(10, ("__kmpc_begin: called\n")); 48 } 49 } 50 51 /*! 52 * @ingroup STARTUP_SHUTDOWN 53 * @param loc source location information 54 * 55 * Shutdown the runtime library. This is also optional, and even if called will 56 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 57 * zero. 58 */ 59 void __kmpc_end(ident_t *loc) { 60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 63 // returns FALSE and __kmpc_end() will unregister this root (it can cause 64 // library shut down). 65 if (__kmp_ignore_mppend() == FALSE) { 66 KC_TRACE(10, ("__kmpc_end: called\n")); 67 KA_TRACE(30, ("__kmpc_end\n")); 68 69 __kmp_internal_end_thread(-1); 70 } 71 #if KMP_OS_WINDOWS && OMPT_SUPPORT 72 // Normal exit process on Windows does not allow worker threads of the final 73 // parallel region to finish reporting their events, so shutting down the 74 // library here fixes the issue at least for the cases where __kmpc_end() is 75 // placed properly. 76 if (ompt_enabled.enabled) 77 __kmp_internal_end_library(__kmp_gtid_get_specific()); 78 #endif 79 } 80 81 /*! 82 @ingroup THREAD_STATES 83 @param loc Source location information. 84 @return The global thread index of the active thread. 85 86 This function can be called in any context. 87 88 If the runtime has ony been entered at the outermost level from a 89 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 90 that which would be returned by omp_get_thread_num() in the outermost 91 active parallel construct. (Or zero if there is no active parallel 92 construct, since the primary thread is necessarily thread zero). 93 94 If multiple non-OpenMP threads all enter an OpenMP construct then this 95 will be a unique thread identifier among all the threads created by 96 the OpenMP runtime (but the value cannot be defined in terms of 97 OpenMP thread ids returned by omp_get_thread_num()). 98 */ 99 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 100 kmp_int32 gtid = __kmp_entry_gtid(); 101 102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 103 104 return gtid; 105 } 106 107 /*! 108 @ingroup THREAD_STATES 109 @param loc Source location information. 110 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 111 112 This function can be called in any context. 113 It returns the total number of threads under the control of the OpenMP runtime. 114 That is not a number that can be determined by any OpenMP standard calls, since 115 the library may be called from more than one non-OpenMP thread, and this 116 reflects the total over all such calls. Similarly the runtime maintains 117 underlying threads even when they are not active (since the cost of creating 118 and destroying OS threads is high), this call counts all such threads even if 119 they are not waiting for work. 120 */ 121 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 122 KC_TRACE(10, 123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 124 125 return TCR_4(__kmp_all_nth); 126 } 127 128 /*! 129 @ingroup THREAD_STATES 130 @param loc Source location information. 131 @return The thread number of the calling thread in the innermost active parallel 132 construct. 133 */ 134 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 136 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 137 } 138 139 /*! 140 @ingroup THREAD_STATES 141 @param loc Source location information. 142 @return The number of threads in the innermost active parallel construct. 143 */ 144 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 146 147 return __kmp_entry_thread()->th.th_team->t.t_nproc; 148 } 149 150 /*! 151 * @ingroup DEPRECATED 152 * @param loc location description 153 * 154 * This function need not be called. It always returns TRUE. 155 */ 156 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 157 #ifndef KMP_DEBUG 158 159 return TRUE; 160 161 #else 162 163 const char *semi2; 164 const char *semi3; 165 int line_no; 166 167 if (__kmp_par_range == 0) { 168 return TRUE; 169 } 170 semi2 = loc->psource; 171 if (semi2 == NULL) { 172 return TRUE; 173 } 174 semi2 = strchr(semi2, ';'); 175 if (semi2 == NULL) { 176 return TRUE; 177 } 178 semi2 = strchr(semi2 + 1, ';'); 179 if (semi2 == NULL) { 180 return TRUE; 181 } 182 if (__kmp_par_range_filename[0]) { 183 const char *name = semi2 - 1; 184 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 185 name--; 186 } 187 if ((*name == '/') || (*name == ';')) { 188 name++; 189 } 190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 191 return __kmp_par_range < 0; 192 } 193 } 194 semi3 = strchr(semi2 + 1, ';'); 195 if (__kmp_par_range_routine[0]) { 196 if ((semi3 != NULL) && (semi3 > semi2) && 197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 198 return __kmp_par_range < 0; 199 } 200 } 201 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 203 return __kmp_par_range > 0; 204 } 205 return __kmp_par_range < 0; 206 } 207 return TRUE; 208 209 #endif /* KMP_DEBUG */ 210 } 211 212 /*! 213 @ingroup THREAD_STATES 214 @param loc Source location information. 215 @return 1 if this thread is executing inside an active parallel region, zero if 216 not. 217 */ 218 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 219 return __kmp_entry_thread()->th.th_root->r.r_active; 220 } 221 222 /*! 223 @ingroup PARALLEL 224 @param loc source location information 225 @param global_tid global thread number 226 @param num_threads number of threads requested for this parallel construct 227 228 Set the number of threads to be used by the next fork spawned by this thread. 229 This call is only required if the parallel construct has a `num_threads` clause. 230 */ 231 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 232 kmp_int32 num_threads) { 233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 234 global_tid, num_threads)); 235 __kmp_assert_valid_gtid(global_tid); 236 __kmp_push_num_threads(loc, global_tid, num_threads); 237 } 238 239 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 241 /* the num_threads are automatically popped */ 242 } 243 244 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 245 kmp_int32 proc_bind) { 246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 247 proc_bind)); 248 __kmp_assert_valid_gtid(global_tid); 249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 250 } 251 252 /*! 253 @ingroup PARALLEL 254 @param loc source location information 255 @param argc total number of arguments in the ellipsis 256 @param microtask pointer to callback routine consisting of outlined parallel 257 construct 258 @param ... pointers to shared variables that aren't global 259 260 Do the actual fork and call the microtask in the relevant number of threads. 261 */ 262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 263 int gtid = __kmp_entry_gtid(); 264 265 #if (KMP_STATS_ENABLED) 266 // If we were in a serial region, then stop the serial timer, record 267 // the event, and start parallel region timer 268 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 269 if (previous_state == stats_state_e::SERIAL_REGION) { 270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 271 } else { 272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 273 } 274 int inParallel = __kmpc_in_parallel(loc); 275 if (inParallel) { 276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 277 } else { 278 KMP_COUNT_BLOCK(OMP_PARALLEL); 279 } 280 #endif 281 282 // maybe to save thr_state is enough here 283 { 284 va_list ap; 285 va_start(ap, microtask); 286 287 #if OMPT_SUPPORT 288 ompt_frame_t *ompt_frame; 289 if (ompt_enabled.enabled) { 290 kmp_info_t *master_th = __kmp_threads[gtid]; 291 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame; 292 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 293 } 294 OMPT_STORE_RETURN_ADDRESS(gtid); 295 #endif 296 297 #if INCLUDE_SSC_MARKS 298 SSC_MARK_FORKING(); 299 #endif 300 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 301 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 302 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 303 kmp_va_addr_of(ap)); 304 #if INCLUDE_SSC_MARKS 305 SSC_MARK_JOINING(); 306 #endif 307 __kmp_join_call(loc, gtid 308 #if OMPT_SUPPORT 309 , 310 fork_context_intel 311 #endif 312 ); 313 314 va_end(ap); 315 316 #if OMPT_SUPPORT 317 if (ompt_enabled.enabled) { 318 ompt_frame->enter_frame = ompt_data_none; 319 } 320 #endif 321 } 322 323 #if KMP_STATS_ENABLED 324 if (previous_state == stats_state_e::SERIAL_REGION) { 325 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 326 KMP_SET_THREAD_STATE(previous_state); 327 } else { 328 KMP_POP_PARTITIONED_TIMER(); 329 } 330 #endif // KMP_STATS_ENABLED 331 } 332 333 /*! 334 @ingroup PARALLEL 335 @param loc source location information 336 @param microtask pointer to callback routine consisting of outlined parallel 337 construct 338 @param cond condition for running in parallel 339 @param args struct of pointers to shared variables that aren't global 340 341 Perform a fork only if the condition is true. 342 */ 343 void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 344 kmp_int32 cond, void *args) { 345 int gtid = __kmp_entry_gtid(); 346 int zero = 0; 347 if (cond) { 348 if (args) 349 __kmpc_fork_call(loc, argc, microtask, args); 350 else 351 __kmpc_fork_call(loc, argc, microtask); 352 } else { 353 __kmpc_serialized_parallel(loc, gtid); 354 355 if (args) 356 microtask(>id, &zero, args); 357 else 358 microtask(>id, &zero); 359 360 __kmpc_end_serialized_parallel(loc, gtid); 361 } 362 } 363 364 /*! 365 @ingroup PARALLEL 366 @param loc source location information 367 @param global_tid global thread number 368 @param num_teams number of teams requested for the teams construct 369 @param num_threads number of threads per team requested for the teams construct 370 371 Set the number of teams to be used by the teams construct. 372 This call is only required if the teams construct has a `num_teams` clause 373 or a `thread_limit` clause (or both). 374 */ 375 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 376 kmp_int32 num_teams, kmp_int32 num_threads) { 377 KA_TRACE(20, 378 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 379 global_tid, num_teams, num_threads)); 380 __kmp_assert_valid_gtid(global_tid); 381 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 382 } 383 384 /*! 385 @ingroup PARALLEL 386 @param loc source location information 387 @param global_tid global thread number 388 @param num_teams_lb lower bound on number of teams requested for the teams 389 construct 390 @param num_teams_ub upper bound on number of teams requested for the teams 391 construct 392 @param num_threads number of threads per team requested for the teams construct 393 394 Set the number of teams to be used by the teams construct. The number of initial 395 teams cretaed will be greater than or equal to the lower bound and less than or 396 equal to the upper bound. 397 This call is only required if the teams construct has a `num_teams` clause 398 or a `thread_limit` clause (or both). 399 */ 400 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, 401 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, 402 kmp_int32 num_threads) { 403 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d" 404 " num_teams_ub=%d num_threads=%d\n", 405 global_tid, num_teams_lb, num_teams_ub, num_threads)); 406 __kmp_assert_valid_gtid(global_tid); 407 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub, 408 num_threads); 409 } 410 411 /*! 412 @ingroup PARALLEL 413 @param loc source location information 414 @param argc total number of arguments in the ellipsis 415 @param microtask pointer to callback routine consisting of outlined teams 416 construct 417 @param ... pointers to shared variables that aren't global 418 419 Do the actual fork and call the microtask in the relevant number of threads. 420 */ 421 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 422 ...) { 423 int gtid = __kmp_entry_gtid(); 424 kmp_info_t *this_thr = __kmp_threads[gtid]; 425 va_list ap; 426 va_start(ap, microtask); 427 428 #if KMP_STATS_ENABLED 429 KMP_COUNT_BLOCK(OMP_TEAMS); 430 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 431 if (previous_state == stats_state_e::SERIAL_REGION) { 432 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 433 } else { 434 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 435 } 436 #endif 437 438 // remember teams entry point and nesting level 439 this_thr->th.th_teams_microtask = microtask; 440 this_thr->th.th_teams_level = 441 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 442 443 #if OMPT_SUPPORT 444 kmp_team_t *parent_team = this_thr->th.th_team; 445 int tid = __kmp_tid_from_gtid(gtid); 446 if (ompt_enabled.enabled) { 447 parent_team->t.t_implicit_task_taskdata[tid] 448 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 449 } 450 OMPT_STORE_RETURN_ADDRESS(gtid); 451 #endif 452 453 // check if __kmpc_push_num_teams called, set default number of teams 454 // otherwise 455 if (this_thr->th.th_teams_size.nteams == 0) { 456 __kmp_push_num_teams(loc, gtid, 0, 0); 457 } 458 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 459 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 460 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 461 462 __kmp_fork_call( 463 loc, gtid, fork_context_intel, argc, 464 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 465 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 466 __kmp_join_call(loc, gtid 467 #if OMPT_SUPPORT 468 , 469 fork_context_intel 470 #endif 471 ); 472 473 // Pop current CG root off list 474 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 475 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 476 this_thr->th.th_cg_roots = tmp->up; 477 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 478 " to node %p. cg_nthreads was %d\n", 479 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 480 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 481 int i = tmp->cg_nthreads--; 482 if (i == 1) { // check is we are the last thread in CG (not always the case) 483 __kmp_free(tmp); 484 } 485 // Restore current task's thread_limit from CG root 486 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 487 this_thr->th.th_current_task->td_icvs.thread_limit = 488 this_thr->th.th_cg_roots->cg_thread_limit; 489 490 this_thr->th.th_teams_microtask = NULL; 491 this_thr->th.th_teams_level = 0; 492 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 493 va_end(ap); 494 #if KMP_STATS_ENABLED 495 if (previous_state == stats_state_e::SERIAL_REGION) { 496 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 497 KMP_SET_THREAD_STATE(previous_state); 498 } else { 499 KMP_POP_PARTITIONED_TIMER(); 500 } 501 #endif // KMP_STATS_ENABLED 502 } 503 504 // I don't think this function should ever have been exported. 505 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 506 // openmp code ever called it, but it's been exported from the RTL for so 507 // long that I'm afraid to remove the definition. 508 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 509 510 /*! 511 @ingroup PARALLEL 512 @param loc source location information 513 @param global_tid global thread number 514 515 Enter a serialized parallel construct. This interface is used to handle a 516 conditional parallel region, like this, 517 @code 518 #pragma omp parallel if (condition) 519 @endcode 520 when the condition is false. 521 */ 522 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 523 // The implementation is now in kmp_runtime.cpp so that it can share static 524 // functions with kmp_fork_call since the tasks to be done are similar in 525 // each case. 526 __kmp_assert_valid_gtid(global_tid); 527 #if OMPT_SUPPORT 528 OMPT_STORE_RETURN_ADDRESS(global_tid); 529 #endif 530 __kmp_serialized_parallel(loc, global_tid); 531 } 532 533 /*! 534 @ingroup PARALLEL 535 @param loc source location information 536 @param global_tid global thread number 537 538 Leave a serialized parallel construct. 539 */ 540 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 541 kmp_internal_control_t *top; 542 kmp_info_t *this_thr; 543 kmp_team_t *serial_team; 544 545 KC_TRACE(10, 546 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 547 548 /* skip all this code for autopar serialized loops since it results in 549 unacceptable overhead */ 550 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 551 return; 552 553 // Not autopar code 554 __kmp_assert_valid_gtid(global_tid); 555 if (!TCR_4(__kmp_init_parallel)) 556 __kmp_parallel_initialize(); 557 558 __kmp_resume_if_soft_paused(); 559 560 this_thr = __kmp_threads[global_tid]; 561 serial_team = this_thr->th.th_serial_team; 562 563 kmp_task_team_t *task_team = this_thr->th.th_task_team; 564 // we need to wait for the proxy tasks before finishing the thread 565 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks || 566 task_team->tt.tt_hidden_helper_task_encountered)) 567 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 568 569 KMP_MB(); 570 KMP_DEBUG_ASSERT(serial_team); 571 KMP_ASSERT(serial_team->t.t_serialized); 572 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 573 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 574 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 575 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 576 577 #if OMPT_SUPPORT 578 if (ompt_enabled.enabled && 579 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 580 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 581 if (ompt_enabled.ompt_callback_implicit_task) { 582 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 583 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 584 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 585 } 586 587 // reset clear the task id only after unlinking the task 588 ompt_data_t *parent_task_data; 589 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 590 591 if (ompt_enabled.ompt_callback_parallel_end) { 592 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 593 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 594 ompt_parallel_invoker_program | ompt_parallel_team, 595 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 596 } 597 __ompt_lw_taskteam_unlink(this_thr); 598 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 599 } 600 #endif 601 602 /* If necessary, pop the internal control stack values and replace the team 603 * values */ 604 top = serial_team->t.t_control_stack_top; 605 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 606 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 607 serial_team->t.t_control_stack_top = top->next; 608 __kmp_free(top); 609 } 610 611 /* pop dispatch buffers stack */ 612 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 613 { 614 dispatch_private_info_t *disp_buffer = 615 serial_team->t.t_dispatch->th_disp_buffer; 616 serial_team->t.t_dispatch->th_disp_buffer = 617 serial_team->t.t_dispatch->th_disp_buffer->next; 618 __kmp_free(disp_buffer); 619 } 620 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 621 622 --serial_team->t.t_serialized; 623 if (serial_team->t.t_serialized == 0) { 624 625 /* return to the parallel section */ 626 627 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 628 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 629 __kmp_clear_x87_fpu_status_word(); 630 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 631 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 632 } 633 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 634 635 __kmp_pop_current_task_from_thread(this_thr); 636 #if OMPD_SUPPORT 637 if (ompd_state & OMPD_ENABLE_BP) 638 ompd_bp_parallel_end(); 639 #endif 640 641 this_thr->th.th_team = serial_team->t.t_parent; 642 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 643 644 /* restore values cached in the thread */ 645 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 646 this_thr->th.th_team_master = 647 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 648 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 649 650 /* TODO the below shouldn't need to be adjusted for serialized teams */ 651 this_thr->th.th_dispatch = 652 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 653 654 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 655 this_thr->th.th_current_task->td_flags.executing = 1; 656 657 if (__kmp_tasking_mode != tskm_immediate_exec) { 658 // Copy the task team from the new child / old parent team to the thread. 659 this_thr->th.th_task_team = 660 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 661 KA_TRACE(20, 662 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 663 "team %p\n", 664 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 665 } 666 #if KMP_AFFINITY_SUPPORTED 667 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) { 668 __kmp_reset_root_init_mask(global_tid); 669 } 670 #endif 671 } else { 672 if (__kmp_tasking_mode != tskm_immediate_exec) { 673 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 674 "depth of serial team %p to %d\n", 675 global_tid, serial_team, serial_team->t.t_serialized)); 676 } 677 } 678 679 serial_team->t.t_level--; 680 if (__kmp_env_consistency_check) 681 __kmp_pop_parallel(global_tid, NULL); 682 #if OMPT_SUPPORT 683 if (ompt_enabled.enabled) 684 this_thr->th.ompt_thread_info.state = 685 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 686 : ompt_state_work_parallel); 687 #endif 688 } 689 690 /*! 691 @ingroup SYNCHRONIZATION 692 @param loc source location information. 693 694 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 695 depending on the memory ordering convention obeyed by the compiler 696 even that may not be necessary). 697 */ 698 void __kmpc_flush(ident_t *loc) { 699 KC_TRACE(10, ("__kmpc_flush: called\n")); 700 701 /* need explicit __mf() here since use volatile instead in library */ 702 KMP_MFENCE(); /* Flush all pending memory write invalidates. */ 703 704 #if OMPT_SUPPORT && OMPT_OPTIONAL 705 if (ompt_enabled.ompt_callback_flush) { 706 ompt_callbacks.ompt_callback(ompt_callback_flush)( 707 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 708 } 709 #endif 710 } 711 712 /* -------------------------------------------------------------------------- */ 713 /*! 714 @ingroup SYNCHRONIZATION 715 @param loc source location information 716 @param global_tid thread id. 717 718 Execute a barrier. 719 */ 720 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 721 KMP_COUNT_BLOCK(OMP_BARRIER); 722 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 723 __kmp_assert_valid_gtid(global_tid); 724 725 if (!TCR_4(__kmp_init_parallel)) 726 __kmp_parallel_initialize(); 727 728 __kmp_resume_if_soft_paused(); 729 730 if (__kmp_env_consistency_check) { 731 if (loc == 0) { 732 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 733 } 734 __kmp_check_barrier(global_tid, ct_barrier, loc); 735 } 736 737 #if OMPT_SUPPORT 738 ompt_frame_t *ompt_frame; 739 if (ompt_enabled.enabled) { 740 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 741 if (ompt_frame->enter_frame.ptr == NULL) 742 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 743 } 744 OMPT_STORE_RETURN_ADDRESS(global_tid); 745 #endif 746 __kmp_threads[global_tid]->th.th_ident = loc; 747 // TODO: explicit barrier_wait_id: 748 // this function is called when 'barrier' directive is present or 749 // implicit barrier at the end of a worksharing construct. 750 // 1) better to add a per-thread barrier counter to a thread data structure 751 // 2) set to 0 when a new team is created 752 // 4) no sync is required 753 754 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 755 #if OMPT_SUPPORT && OMPT_OPTIONAL 756 if (ompt_enabled.enabled) { 757 ompt_frame->enter_frame = ompt_data_none; 758 } 759 #endif 760 } 761 762 /* The BARRIER for a MASTER section is always explicit */ 763 /*! 764 @ingroup WORK_SHARING 765 @param loc source location information. 766 @param global_tid global thread number . 767 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 768 */ 769 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 770 int status = 0; 771 772 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 773 __kmp_assert_valid_gtid(global_tid); 774 775 if (!TCR_4(__kmp_init_parallel)) 776 __kmp_parallel_initialize(); 777 778 __kmp_resume_if_soft_paused(); 779 780 if (KMP_MASTER_GTID(global_tid)) { 781 KMP_COUNT_BLOCK(OMP_MASTER); 782 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 783 status = 1; 784 } 785 786 #if OMPT_SUPPORT && OMPT_OPTIONAL 787 if (status) { 788 if (ompt_enabled.ompt_callback_masked) { 789 kmp_info_t *this_thr = __kmp_threads[global_tid]; 790 kmp_team_t *team = this_thr->th.th_team; 791 792 int tid = __kmp_tid_from_gtid(global_tid); 793 ompt_callbacks.ompt_callback(ompt_callback_masked)( 794 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 795 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 796 OMPT_GET_RETURN_ADDRESS(0)); 797 } 798 } 799 #endif 800 801 if (__kmp_env_consistency_check) { 802 #if KMP_USE_DYNAMIC_LOCK 803 if (status) 804 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 805 else 806 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 807 #else 808 if (status) 809 __kmp_push_sync(global_tid, ct_master, loc, NULL); 810 else 811 __kmp_check_sync(global_tid, ct_master, loc, NULL); 812 #endif 813 } 814 815 return status; 816 } 817 818 /*! 819 @ingroup WORK_SHARING 820 @param loc source location information. 821 @param global_tid global thread number . 822 823 Mark the end of a <tt>master</tt> region. This should only be called by the 824 thread that executes the <tt>master</tt> region. 825 */ 826 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 827 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 828 __kmp_assert_valid_gtid(global_tid); 829 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 830 KMP_POP_PARTITIONED_TIMER(); 831 832 #if OMPT_SUPPORT && OMPT_OPTIONAL 833 kmp_info_t *this_thr = __kmp_threads[global_tid]; 834 kmp_team_t *team = this_thr->th.th_team; 835 if (ompt_enabled.ompt_callback_masked) { 836 int tid = __kmp_tid_from_gtid(global_tid); 837 ompt_callbacks.ompt_callback(ompt_callback_masked)( 838 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 839 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 840 OMPT_GET_RETURN_ADDRESS(0)); 841 } 842 #endif 843 844 if (__kmp_env_consistency_check) { 845 if (KMP_MASTER_GTID(global_tid)) 846 __kmp_pop_sync(global_tid, ct_master, loc); 847 } 848 } 849 850 /*! 851 @ingroup WORK_SHARING 852 @param loc source location information. 853 @param global_tid global thread number. 854 @param filter result of evaluating filter clause on thread global_tid, or zero 855 if no filter clause present 856 @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise. 857 */ 858 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) { 859 int status = 0; 860 int tid; 861 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid)); 862 __kmp_assert_valid_gtid(global_tid); 863 864 if (!TCR_4(__kmp_init_parallel)) 865 __kmp_parallel_initialize(); 866 867 __kmp_resume_if_soft_paused(); 868 869 tid = __kmp_tid_from_gtid(global_tid); 870 if (tid == filter) { 871 KMP_COUNT_BLOCK(OMP_MASKED); 872 KMP_PUSH_PARTITIONED_TIMER(OMP_masked); 873 status = 1; 874 } 875 876 #if OMPT_SUPPORT && OMPT_OPTIONAL 877 if (status) { 878 if (ompt_enabled.ompt_callback_masked) { 879 kmp_info_t *this_thr = __kmp_threads[global_tid]; 880 kmp_team_t *team = this_thr->th.th_team; 881 ompt_callbacks.ompt_callback(ompt_callback_masked)( 882 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 883 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 884 OMPT_GET_RETURN_ADDRESS(0)); 885 } 886 } 887 #endif 888 889 if (__kmp_env_consistency_check) { 890 #if KMP_USE_DYNAMIC_LOCK 891 if (status) 892 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0); 893 else 894 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0); 895 #else 896 if (status) 897 __kmp_push_sync(global_tid, ct_masked, loc, NULL); 898 else 899 __kmp_check_sync(global_tid, ct_masked, loc, NULL); 900 #endif 901 } 902 903 return status; 904 } 905 906 /*! 907 @ingroup WORK_SHARING 908 @param loc source location information. 909 @param global_tid global thread number . 910 911 Mark the end of a <tt>masked</tt> region. This should only be called by the 912 thread that executes the <tt>masked</tt> region. 913 */ 914 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) { 915 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid)); 916 __kmp_assert_valid_gtid(global_tid); 917 KMP_POP_PARTITIONED_TIMER(); 918 919 #if OMPT_SUPPORT && OMPT_OPTIONAL 920 kmp_info_t *this_thr = __kmp_threads[global_tid]; 921 kmp_team_t *team = this_thr->th.th_team; 922 if (ompt_enabled.ompt_callback_masked) { 923 int tid = __kmp_tid_from_gtid(global_tid); 924 ompt_callbacks.ompt_callback(ompt_callback_masked)( 925 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 926 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 927 OMPT_GET_RETURN_ADDRESS(0)); 928 } 929 #endif 930 931 if (__kmp_env_consistency_check) { 932 __kmp_pop_sync(global_tid, ct_masked, loc); 933 } 934 } 935 936 /*! 937 @ingroup WORK_SHARING 938 @param loc source location information. 939 @param gtid global thread number. 940 941 Start execution of an <tt>ordered</tt> construct. 942 */ 943 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 944 int cid = 0; 945 kmp_info_t *th; 946 KMP_DEBUG_ASSERT(__kmp_init_serial); 947 948 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 949 __kmp_assert_valid_gtid(gtid); 950 951 if (!TCR_4(__kmp_init_parallel)) 952 __kmp_parallel_initialize(); 953 954 __kmp_resume_if_soft_paused(); 955 956 #if USE_ITT_BUILD 957 __kmp_itt_ordered_prep(gtid); 958 // TODO: ordered_wait_id 959 #endif /* USE_ITT_BUILD */ 960 961 th = __kmp_threads[gtid]; 962 963 #if OMPT_SUPPORT && OMPT_OPTIONAL 964 kmp_team_t *team; 965 ompt_wait_id_t lck; 966 void *codeptr_ra; 967 OMPT_STORE_RETURN_ADDRESS(gtid); 968 if (ompt_enabled.enabled) { 969 team = __kmp_team_from_gtid(gtid); 970 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 971 /* OMPT state update */ 972 th->th.ompt_thread_info.wait_id = lck; 973 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 974 975 /* OMPT event callback */ 976 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 977 if (ompt_enabled.ompt_callback_mutex_acquire) { 978 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 979 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 980 codeptr_ra); 981 } 982 } 983 #endif 984 985 if (th->th.th_dispatch->th_deo_fcn != 0) 986 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 987 else 988 __kmp_parallel_deo(>id, &cid, loc); 989 990 #if OMPT_SUPPORT && OMPT_OPTIONAL 991 if (ompt_enabled.enabled) { 992 /* OMPT state update */ 993 th->th.ompt_thread_info.state = ompt_state_work_parallel; 994 th->th.ompt_thread_info.wait_id = 0; 995 996 /* OMPT event callback */ 997 if (ompt_enabled.ompt_callback_mutex_acquired) { 998 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 999 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1000 } 1001 } 1002 #endif 1003 1004 #if USE_ITT_BUILD 1005 __kmp_itt_ordered_start(gtid); 1006 #endif /* USE_ITT_BUILD */ 1007 } 1008 1009 /*! 1010 @ingroup WORK_SHARING 1011 @param loc source location information. 1012 @param gtid global thread number. 1013 1014 End execution of an <tt>ordered</tt> construct. 1015 */ 1016 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 1017 int cid = 0; 1018 kmp_info_t *th; 1019 1020 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 1021 __kmp_assert_valid_gtid(gtid); 1022 1023 #if USE_ITT_BUILD 1024 __kmp_itt_ordered_end(gtid); 1025 // TODO: ordered_wait_id 1026 #endif /* USE_ITT_BUILD */ 1027 1028 th = __kmp_threads[gtid]; 1029 1030 if (th->th.th_dispatch->th_dxo_fcn != 0) 1031 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 1032 else 1033 __kmp_parallel_dxo(>id, &cid, loc); 1034 1035 #if OMPT_SUPPORT && OMPT_OPTIONAL 1036 OMPT_STORE_RETURN_ADDRESS(gtid); 1037 if (ompt_enabled.ompt_callback_mutex_released) { 1038 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1039 ompt_mutex_ordered, 1040 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 1041 ->t.t_ordered.dt.t_value, 1042 OMPT_LOAD_RETURN_ADDRESS(gtid)); 1043 } 1044 #endif 1045 } 1046 1047 #if KMP_USE_DYNAMIC_LOCK 1048 1049 static __forceinline void 1050 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 1051 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 1052 // Pointer to the allocated indirect lock is written to crit, while indexing 1053 // is ignored. 1054 void *idx; 1055 kmp_indirect_lock_t **lck; 1056 lck = (kmp_indirect_lock_t **)crit; 1057 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 1058 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 1059 KMP_SET_I_LOCK_LOCATION(ilk, loc); 1060 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 1061 KA_TRACE(20, 1062 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 1063 #if USE_ITT_BUILD 1064 __kmp_itt_critical_creating(ilk->lock, loc); 1065 #endif 1066 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 1067 if (status == 0) { 1068 #if USE_ITT_BUILD 1069 __kmp_itt_critical_destroyed(ilk->lock); 1070 #endif 1071 // We don't really need to destroy the unclaimed lock here since it will be 1072 // cleaned up at program exit. 1073 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 1074 } 1075 KMP_DEBUG_ASSERT(*lck != NULL); 1076 } 1077 1078 // Fast-path acquire tas lock 1079 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 1080 { \ 1081 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1082 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1083 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1084 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1085 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1086 kmp_uint32 spins; \ 1087 KMP_FSYNC_PREPARE(l); \ 1088 KMP_INIT_YIELD(spins); \ 1089 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1090 do { \ 1091 if (TCR_4(__kmp_nth) > \ 1092 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1093 KMP_YIELD(TRUE); \ 1094 } else { \ 1095 KMP_YIELD_SPIN(spins); \ 1096 } \ 1097 __kmp_spin_backoff(&backoff); \ 1098 } while ( \ 1099 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1100 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1101 } \ 1102 KMP_FSYNC_ACQUIRED(l); \ 1103 } 1104 1105 // Fast-path test tas lock 1106 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1107 { \ 1108 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1109 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1110 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1111 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1112 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1113 } 1114 1115 // Fast-path release tas lock 1116 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1117 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1118 1119 #if KMP_USE_FUTEX 1120 1121 #include <sys/syscall.h> 1122 #include <unistd.h> 1123 #ifndef FUTEX_WAIT 1124 #define FUTEX_WAIT 0 1125 #endif 1126 #ifndef FUTEX_WAKE 1127 #define FUTEX_WAKE 1 1128 #endif 1129 1130 // Fast-path acquire futex lock 1131 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1132 { \ 1133 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1134 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1135 KMP_MB(); \ 1136 KMP_FSYNC_PREPARE(ftx); \ 1137 kmp_int32 poll_val; \ 1138 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1139 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1140 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1141 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1142 if (!cond) { \ 1143 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1144 poll_val | \ 1145 KMP_LOCK_BUSY(1, futex))) { \ 1146 continue; \ 1147 } \ 1148 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1149 } \ 1150 kmp_int32 rc; \ 1151 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1152 NULL, NULL, 0)) != 0) { \ 1153 continue; \ 1154 } \ 1155 gtid_code |= 1; \ 1156 } \ 1157 KMP_FSYNC_ACQUIRED(ftx); \ 1158 } 1159 1160 // Fast-path test futex lock 1161 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1162 { \ 1163 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1164 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1165 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1166 KMP_FSYNC_ACQUIRED(ftx); \ 1167 rc = TRUE; \ 1168 } else { \ 1169 rc = FALSE; \ 1170 } \ 1171 } 1172 1173 // Fast-path release futex lock 1174 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1175 { \ 1176 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1177 KMP_MB(); \ 1178 KMP_FSYNC_RELEASING(ftx); \ 1179 kmp_int32 poll_val = \ 1180 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1181 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1182 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1183 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1184 } \ 1185 KMP_MB(); \ 1186 KMP_YIELD_OVERSUB(); \ 1187 } 1188 1189 #endif // KMP_USE_FUTEX 1190 1191 #else // KMP_USE_DYNAMIC_LOCK 1192 1193 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1194 ident_t const *loc, 1195 kmp_int32 gtid) { 1196 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1197 1198 // Because of the double-check, the following load doesn't need to be volatile 1199 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1200 1201 if (lck == NULL) { 1202 void *idx; 1203 1204 // Allocate & initialize the lock. 1205 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1206 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1207 __kmp_init_user_lock_with_checks(lck); 1208 __kmp_set_user_lock_location(lck, loc); 1209 #if USE_ITT_BUILD 1210 __kmp_itt_critical_creating(lck); 1211 // __kmp_itt_critical_creating() should be called *before* the first usage 1212 // of underlying lock. It is the only place where we can guarantee it. There 1213 // are chances the lock will destroyed with no usage, but it is not a 1214 // problem, because this is not real event seen by user but rather setting 1215 // name for object (lock). See more details in kmp_itt.h. 1216 #endif /* USE_ITT_BUILD */ 1217 1218 // Use a cmpxchg instruction to slam the start of the critical section with 1219 // the lock pointer. If another thread beat us to it, deallocate the lock, 1220 // and use the lock that the other thread allocated. 1221 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1222 1223 if (status == 0) { 1224 // Deallocate the lock and reload the value. 1225 #if USE_ITT_BUILD 1226 __kmp_itt_critical_destroyed(lck); 1227 // Let ITT know the lock is destroyed and the same memory location may be reused 1228 // for another purpose. 1229 #endif /* USE_ITT_BUILD */ 1230 __kmp_destroy_user_lock_with_checks(lck); 1231 __kmp_user_lock_free(&idx, gtid, lck); 1232 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1233 KMP_DEBUG_ASSERT(lck != NULL); 1234 } 1235 } 1236 return lck; 1237 } 1238 1239 #endif // KMP_USE_DYNAMIC_LOCK 1240 1241 /*! 1242 @ingroup WORK_SHARING 1243 @param loc source location information. 1244 @param global_tid global thread number. 1245 @param crit identity of the critical section. This could be a pointer to a lock 1246 associated with the critical section, or some other suitably unique value. 1247 1248 Enter code protected by a `critical` construct. 1249 This function blocks until the executing thread can enter the critical section. 1250 */ 1251 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1252 kmp_critical_name *crit) { 1253 #if KMP_USE_DYNAMIC_LOCK 1254 #if OMPT_SUPPORT && OMPT_OPTIONAL 1255 OMPT_STORE_RETURN_ADDRESS(global_tid); 1256 #endif // OMPT_SUPPORT 1257 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1258 #else 1259 KMP_COUNT_BLOCK(OMP_CRITICAL); 1260 #if OMPT_SUPPORT && OMPT_OPTIONAL 1261 ompt_state_t prev_state = ompt_state_undefined; 1262 ompt_thread_info_t ti; 1263 #endif 1264 kmp_user_lock_p lck; 1265 1266 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1267 __kmp_assert_valid_gtid(global_tid); 1268 1269 // TODO: add THR_OVHD_STATE 1270 1271 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1272 KMP_CHECK_USER_LOCK_INIT(); 1273 1274 if ((__kmp_user_lock_kind == lk_tas) && 1275 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1276 lck = (kmp_user_lock_p)crit; 1277 } 1278 #if KMP_USE_FUTEX 1279 else if ((__kmp_user_lock_kind == lk_futex) && 1280 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1281 lck = (kmp_user_lock_p)crit; 1282 } 1283 #endif 1284 else { // ticket, queuing or drdpa 1285 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1286 } 1287 1288 if (__kmp_env_consistency_check) 1289 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1290 1291 // since the critical directive binds to all threads, not just the current 1292 // team we have to check this even if we are in a serialized team. 1293 // also, even if we are the uber thread, we still have to conduct the lock, 1294 // as we have to contend with sibling threads. 1295 1296 #if USE_ITT_BUILD 1297 __kmp_itt_critical_acquiring(lck); 1298 #endif /* USE_ITT_BUILD */ 1299 #if OMPT_SUPPORT && OMPT_OPTIONAL 1300 OMPT_STORE_RETURN_ADDRESS(gtid); 1301 void *codeptr_ra = NULL; 1302 if (ompt_enabled.enabled) { 1303 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1304 /* OMPT state update */ 1305 prev_state = ti.state; 1306 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1307 ti.state = ompt_state_wait_critical; 1308 1309 /* OMPT event callback */ 1310 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1311 if (ompt_enabled.ompt_callback_mutex_acquire) { 1312 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1313 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1314 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1315 } 1316 } 1317 #endif 1318 // Value of 'crit' should be good for using as a critical_id of the critical 1319 // section directive. 1320 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1321 1322 #if USE_ITT_BUILD 1323 __kmp_itt_critical_acquired(lck); 1324 #endif /* USE_ITT_BUILD */ 1325 #if OMPT_SUPPORT && OMPT_OPTIONAL 1326 if (ompt_enabled.enabled) { 1327 /* OMPT state update */ 1328 ti.state = prev_state; 1329 ti.wait_id = 0; 1330 1331 /* OMPT event callback */ 1332 if (ompt_enabled.ompt_callback_mutex_acquired) { 1333 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1334 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1335 } 1336 } 1337 #endif 1338 KMP_POP_PARTITIONED_TIMER(); 1339 1340 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1341 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1342 #endif // KMP_USE_DYNAMIC_LOCK 1343 } 1344 1345 #if KMP_USE_DYNAMIC_LOCK 1346 1347 // Converts the given hint to an internal lock implementation 1348 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1349 #if KMP_USE_TSX 1350 #define KMP_TSX_LOCK(seq) lockseq_##seq 1351 #else 1352 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1353 #endif 1354 1355 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1356 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm) 1357 #else 1358 #define KMP_CPUINFO_RTM 0 1359 #endif 1360 1361 // Hints that do not require further logic 1362 if (hint & kmp_lock_hint_hle) 1363 return KMP_TSX_LOCK(hle); 1364 if (hint & kmp_lock_hint_rtm) 1365 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq; 1366 if (hint & kmp_lock_hint_adaptive) 1367 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1368 1369 // Rule out conflicting hints first by returning the default lock 1370 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1371 return __kmp_user_lock_seq; 1372 if ((hint & omp_lock_hint_speculative) && 1373 (hint & omp_lock_hint_nonspeculative)) 1374 return __kmp_user_lock_seq; 1375 1376 // Do not even consider speculation when it appears to be contended 1377 if (hint & omp_lock_hint_contended) 1378 return lockseq_queuing; 1379 1380 // Uncontended lock without speculation 1381 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1382 return lockseq_tas; 1383 1384 // Use RTM lock for speculation 1385 if (hint & omp_lock_hint_speculative) 1386 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq; 1387 1388 return __kmp_user_lock_seq; 1389 } 1390 1391 #if OMPT_SUPPORT && OMPT_OPTIONAL 1392 #if KMP_USE_DYNAMIC_LOCK 1393 static kmp_mutex_impl_t 1394 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1395 if (user_lock) { 1396 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1397 case 0: 1398 break; 1399 #if KMP_USE_FUTEX 1400 case locktag_futex: 1401 return kmp_mutex_impl_queuing; 1402 #endif 1403 case locktag_tas: 1404 return kmp_mutex_impl_spin; 1405 #if KMP_USE_TSX 1406 case locktag_hle: 1407 case locktag_rtm_spin: 1408 return kmp_mutex_impl_speculative; 1409 #endif 1410 default: 1411 return kmp_mutex_impl_none; 1412 } 1413 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1414 } 1415 KMP_ASSERT(ilock); 1416 switch (ilock->type) { 1417 #if KMP_USE_TSX 1418 case locktag_adaptive: 1419 case locktag_rtm_queuing: 1420 return kmp_mutex_impl_speculative; 1421 #endif 1422 case locktag_nested_tas: 1423 return kmp_mutex_impl_spin; 1424 #if KMP_USE_FUTEX 1425 case locktag_nested_futex: 1426 #endif 1427 case locktag_ticket: 1428 case locktag_queuing: 1429 case locktag_drdpa: 1430 case locktag_nested_ticket: 1431 case locktag_nested_queuing: 1432 case locktag_nested_drdpa: 1433 return kmp_mutex_impl_queuing; 1434 default: 1435 return kmp_mutex_impl_none; 1436 } 1437 } 1438 #else 1439 // For locks without dynamic binding 1440 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1441 switch (__kmp_user_lock_kind) { 1442 case lk_tas: 1443 return kmp_mutex_impl_spin; 1444 #if KMP_USE_FUTEX 1445 case lk_futex: 1446 #endif 1447 case lk_ticket: 1448 case lk_queuing: 1449 case lk_drdpa: 1450 return kmp_mutex_impl_queuing; 1451 #if KMP_USE_TSX 1452 case lk_hle: 1453 case lk_rtm_queuing: 1454 case lk_rtm_spin: 1455 case lk_adaptive: 1456 return kmp_mutex_impl_speculative; 1457 #endif 1458 default: 1459 return kmp_mutex_impl_none; 1460 } 1461 } 1462 #endif // KMP_USE_DYNAMIC_LOCK 1463 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1464 1465 /*! 1466 @ingroup WORK_SHARING 1467 @param loc source location information. 1468 @param global_tid global thread number. 1469 @param crit identity of the critical section. This could be a pointer to a lock 1470 associated with the critical section, or some other suitably unique value. 1471 @param hint the lock hint. 1472 1473 Enter code protected by a `critical` construct with a hint. The hint value is 1474 used to suggest a lock implementation. This function blocks until the executing 1475 thread can enter the critical section unless the hint suggests use of 1476 speculative execution and the hardware supports it. 1477 */ 1478 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1479 kmp_critical_name *crit, uint32_t hint) { 1480 KMP_COUNT_BLOCK(OMP_CRITICAL); 1481 kmp_user_lock_p lck; 1482 #if OMPT_SUPPORT && OMPT_OPTIONAL 1483 ompt_state_t prev_state = ompt_state_undefined; 1484 ompt_thread_info_t ti; 1485 // This is the case, if called from __kmpc_critical: 1486 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1487 if (!codeptr) 1488 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1489 #endif 1490 1491 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1492 __kmp_assert_valid_gtid(global_tid); 1493 1494 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1495 // Check if it is initialized. 1496 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1497 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint); 1498 if (*lk == 0) { 1499 if (KMP_IS_D_LOCK(lockseq)) { 1500 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1501 KMP_GET_D_TAG(lockseq)); 1502 } else { 1503 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq)); 1504 } 1505 } 1506 // Branch for accessing the actual lock object and set operation. This 1507 // branching is inevitable since this lock initialization does not follow the 1508 // normal dispatch path (lock table is not used). 1509 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1510 lck = (kmp_user_lock_p)lk; 1511 if (__kmp_env_consistency_check) { 1512 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1513 __kmp_map_hint_to_lock(hint)); 1514 } 1515 #if USE_ITT_BUILD 1516 __kmp_itt_critical_acquiring(lck); 1517 #endif 1518 #if OMPT_SUPPORT && OMPT_OPTIONAL 1519 if (ompt_enabled.enabled) { 1520 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1521 /* OMPT state update */ 1522 prev_state = ti.state; 1523 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1524 ti.state = ompt_state_wait_critical; 1525 1526 /* OMPT event callback */ 1527 if (ompt_enabled.ompt_callback_mutex_acquire) { 1528 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1529 ompt_mutex_critical, (unsigned int)hint, 1530 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1531 codeptr); 1532 } 1533 } 1534 #endif 1535 #if KMP_USE_INLINED_TAS 1536 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) { 1537 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1538 } else 1539 #elif KMP_USE_INLINED_FUTEX 1540 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) { 1541 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1542 } else 1543 #endif 1544 { 1545 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1546 } 1547 } else { 1548 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1549 lck = ilk->lock; 1550 if (__kmp_env_consistency_check) { 1551 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1552 __kmp_map_hint_to_lock(hint)); 1553 } 1554 #if USE_ITT_BUILD 1555 __kmp_itt_critical_acquiring(lck); 1556 #endif 1557 #if OMPT_SUPPORT && OMPT_OPTIONAL 1558 if (ompt_enabled.enabled) { 1559 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1560 /* OMPT state update */ 1561 prev_state = ti.state; 1562 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1563 ti.state = ompt_state_wait_critical; 1564 1565 /* OMPT event callback */ 1566 if (ompt_enabled.ompt_callback_mutex_acquire) { 1567 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1568 ompt_mutex_critical, (unsigned int)hint, 1569 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1570 codeptr); 1571 } 1572 } 1573 #endif 1574 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1575 } 1576 KMP_POP_PARTITIONED_TIMER(); 1577 1578 #if USE_ITT_BUILD 1579 __kmp_itt_critical_acquired(lck); 1580 #endif /* USE_ITT_BUILD */ 1581 #if OMPT_SUPPORT && OMPT_OPTIONAL 1582 if (ompt_enabled.enabled) { 1583 /* OMPT state update */ 1584 ti.state = prev_state; 1585 ti.wait_id = 0; 1586 1587 /* OMPT event callback */ 1588 if (ompt_enabled.ompt_callback_mutex_acquired) { 1589 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1590 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1591 } 1592 } 1593 #endif 1594 1595 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1596 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1597 } // __kmpc_critical_with_hint 1598 1599 #endif // KMP_USE_DYNAMIC_LOCK 1600 1601 /*! 1602 @ingroup WORK_SHARING 1603 @param loc source location information. 1604 @param global_tid global thread number . 1605 @param crit identity of the critical section. This could be a pointer to a lock 1606 associated with the critical section, or some other suitably unique value. 1607 1608 Leave a critical section, releasing any lock that was held during its execution. 1609 */ 1610 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1611 kmp_critical_name *crit) { 1612 kmp_user_lock_p lck; 1613 1614 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1615 1616 #if KMP_USE_DYNAMIC_LOCK 1617 int locktag = KMP_EXTRACT_D_TAG(crit); 1618 if (locktag) { 1619 lck = (kmp_user_lock_p)crit; 1620 KMP_ASSERT(lck != NULL); 1621 if (__kmp_env_consistency_check) { 1622 __kmp_pop_sync(global_tid, ct_critical, loc); 1623 } 1624 #if USE_ITT_BUILD 1625 __kmp_itt_critical_releasing(lck); 1626 #endif 1627 #if KMP_USE_INLINED_TAS 1628 if (locktag == locktag_tas && !__kmp_env_consistency_check) { 1629 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1630 } else 1631 #elif KMP_USE_INLINED_FUTEX 1632 if (locktag == locktag_futex && !__kmp_env_consistency_check) { 1633 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1634 } else 1635 #endif 1636 { 1637 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1638 } 1639 } else { 1640 kmp_indirect_lock_t *ilk = 1641 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1642 KMP_ASSERT(ilk != NULL); 1643 lck = ilk->lock; 1644 if (__kmp_env_consistency_check) { 1645 __kmp_pop_sync(global_tid, ct_critical, loc); 1646 } 1647 #if USE_ITT_BUILD 1648 __kmp_itt_critical_releasing(lck); 1649 #endif 1650 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1651 } 1652 1653 #else // KMP_USE_DYNAMIC_LOCK 1654 1655 if ((__kmp_user_lock_kind == lk_tas) && 1656 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1657 lck = (kmp_user_lock_p)crit; 1658 } 1659 #if KMP_USE_FUTEX 1660 else if ((__kmp_user_lock_kind == lk_futex) && 1661 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1662 lck = (kmp_user_lock_p)crit; 1663 } 1664 #endif 1665 else { // ticket, queuing or drdpa 1666 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1667 } 1668 1669 KMP_ASSERT(lck != NULL); 1670 1671 if (__kmp_env_consistency_check) 1672 __kmp_pop_sync(global_tid, ct_critical, loc); 1673 1674 #if USE_ITT_BUILD 1675 __kmp_itt_critical_releasing(lck); 1676 #endif /* USE_ITT_BUILD */ 1677 // Value of 'crit' should be good for using as a critical_id of the critical 1678 // section directive. 1679 __kmp_release_user_lock_with_checks(lck, global_tid); 1680 1681 #endif // KMP_USE_DYNAMIC_LOCK 1682 1683 #if OMPT_SUPPORT && OMPT_OPTIONAL 1684 /* OMPT release event triggers after lock is released; place here to trigger 1685 * for all #if branches */ 1686 OMPT_STORE_RETURN_ADDRESS(global_tid); 1687 if (ompt_enabled.ompt_callback_mutex_released) { 1688 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1689 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1690 OMPT_LOAD_RETURN_ADDRESS(0)); 1691 } 1692 #endif 1693 1694 KMP_POP_PARTITIONED_TIMER(); 1695 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1696 } 1697 1698 /*! 1699 @ingroup SYNCHRONIZATION 1700 @param loc source location information 1701 @param global_tid thread id. 1702 @return one if the thread should execute the master block, zero otherwise 1703 1704 Start execution of a combined barrier and master. The barrier is executed inside 1705 this function. 1706 */ 1707 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1708 int status; 1709 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1710 __kmp_assert_valid_gtid(global_tid); 1711 1712 if (!TCR_4(__kmp_init_parallel)) 1713 __kmp_parallel_initialize(); 1714 1715 __kmp_resume_if_soft_paused(); 1716 1717 if (__kmp_env_consistency_check) 1718 __kmp_check_barrier(global_tid, ct_barrier, loc); 1719 1720 #if OMPT_SUPPORT 1721 ompt_frame_t *ompt_frame; 1722 if (ompt_enabled.enabled) { 1723 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1724 if (ompt_frame->enter_frame.ptr == NULL) 1725 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1726 } 1727 OMPT_STORE_RETURN_ADDRESS(global_tid); 1728 #endif 1729 #if USE_ITT_NOTIFY 1730 __kmp_threads[global_tid]->th.th_ident = loc; 1731 #endif 1732 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1733 #if OMPT_SUPPORT && OMPT_OPTIONAL 1734 if (ompt_enabled.enabled) { 1735 ompt_frame->enter_frame = ompt_data_none; 1736 } 1737 #endif 1738 1739 return (status != 0) ? 0 : 1; 1740 } 1741 1742 /*! 1743 @ingroup SYNCHRONIZATION 1744 @param loc source location information 1745 @param global_tid thread id. 1746 1747 Complete the execution of a combined barrier and master. This function should 1748 only be called at the completion of the <tt>master</tt> code. Other threads will 1749 still be waiting at the barrier and this call releases them. 1750 */ 1751 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1752 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1753 __kmp_assert_valid_gtid(global_tid); 1754 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1755 } 1756 1757 /*! 1758 @ingroup SYNCHRONIZATION 1759 @param loc source location information 1760 @param global_tid thread id. 1761 @return one if the thread should execute the master block, zero otherwise 1762 1763 Start execution of a combined barrier and master(nowait) construct. 1764 The barrier is executed inside this function. 1765 There is no equivalent "end" function, since the 1766 */ 1767 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1768 kmp_int32 ret; 1769 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1770 __kmp_assert_valid_gtid(global_tid); 1771 1772 if (!TCR_4(__kmp_init_parallel)) 1773 __kmp_parallel_initialize(); 1774 1775 __kmp_resume_if_soft_paused(); 1776 1777 if (__kmp_env_consistency_check) { 1778 if (loc == 0) { 1779 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1780 } 1781 __kmp_check_barrier(global_tid, ct_barrier, loc); 1782 } 1783 1784 #if OMPT_SUPPORT 1785 ompt_frame_t *ompt_frame; 1786 if (ompt_enabled.enabled) { 1787 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1788 if (ompt_frame->enter_frame.ptr == NULL) 1789 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1790 } 1791 OMPT_STORE_RETURN_ADDRESS(global_tid); 1792 #endif 1793 #if USE_ITT_NOTIFY 1794 __kmp_threads[global_tid]->th.th_ident = loc; 1795 #endif 1796 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1797 #if OMPT_SUPPORT && OMPT_OPTIONAL 1798 if (ompt_enabled.enabled) { 1799 ompt_frame->enter_frame = ompt_data_none; 1800 } 1801 #endif 1802 1803 ret = __kmpc_master(loc, global_tid); 1804 1805 if (__kmp_env_consistency_check) { 1806 /* there's no __kmpc_end_master called; so the (stats) */ 1807 /* actions of __kmpc_end_master are done here */ 1808 if (ret) { 1809 /* only one thread should do the pop since only */ 1810 /* one did the push (see __kmpc_master()) */ 1811 __kmp_pop_sync(global_tid, ct_master, loc); 1812 } 1813 } 1814 1815 return (ret); 1816 } 1817 1818 /* The BARRIER for a SINGLE process section is always explicit */ 1819 /*! 1820 @ingroup WORK_SHARING 1821 @param loc source location information 1822 @param global_tid global thread number 1823 @return One if this thread should execute the single construct, zero otherwise. 1824 1825 Test whether to execute a <tt>single</tt> construct. 1826 There are no implicit barriers in the two "single" calls, rather the compiler 1827 should introduce an explicit barrier if it is required. 1828 */ 1829 1830 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1831 __kmp_assert_valid_gtid(global_tid); 1832 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1833 1834 if (rc) { 1835 // We are going to execute the single statement, so we should count it. 1836 KMP_COUNT_BLOCK(OMP_SINGLE); 1837 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1838 } 1839 1840 #if OMPT_SUPPORT && OMPT_OPTIONAL 1841 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1842 kmp_team_t *team = this_thr->th.th_team; 1843 int tid = __kmp_tid_from_gtid(global_tid); 1844 1845 if (ompt_enabled.enabled) { 1846 if (rc) { 1847 if (ompt_enabled.ompt_callback_work) { 1848 ompt_callbacks.ompt_callback(ompt_callback_work)( 1849 ompt_work_single_executor, ompt_scope_begin, 1850 &(team->t.ompt_team_info.parallel_data), 1851 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1852 1, OMPT_GET_RETURN_ADDRESS(0)); 1853 } 1854 } else { 1855 if (ompt_enabled.ompt_callback_work) { 1856 ompt_callbacks.ompt_callback(ompt_callback_work)( 1857 ompt_work_single_other, ompt_scope_begin, 1858 &(team->t.ompt_team_info.parallel_data), 1859 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1860 1, OMPT_GET_RETURN_ADDRESS(0)); 1861 ompt_callbacks.ompt_callback(ompt_callback_work)( 1862 ompt_work_single_other, ompt_scope_end, 1863 &(team->t.ompt_team_info.parallel_data), 1864 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1865 1, OMPT_GET_RETURN_ADDRESS(0)); 1866 } 1867 } 1868 } 1869 #endif 1870 1871 return rc; 1872 } 1873 1874 /*! 1875 @ingroup WORK_SHARING 1876 @param loc source location information 1877 @param global_tid global thread number 1878 1879 Mark the end of a <tt>single</tt> construct. This function should 1880 only be called by the thread that executed the block of code protected 1881 by the `single` construct. 1882 */ 1883 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1884 __kmp_assert_valid_gtid(global_tid); 1885 __kmp_exit_single(global_tid); 1886 KMP_POP_PARTITIONED_TIMER(); 1887 1888 #if OMPT_SUPPORT && OMPT_OPTIONAL 1889 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1890 kmp_team_t *team = this_thr->th.th_team; 1891 int tid = __kmp_tid_from_gtid(global_tid); 1892 1893 if (ompt_enabled.ompt_callback_work) { 1894 ompt_callbacks.ompt_callback(ompt_callback_work)( 1895 ompt_work_single_executor, ompt_scope_end, 1896 &(team->t.ompt_team_info.parallel_data), 1897 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1898 OMPT_GET_RETURN_ADDRESS(0)); 1899 } 1900 #endif 1901 } 1902 1903 /*! 1904 @ingroup WORK_SHARING 1905 @param loc Source location 1906 @param global_tid Global thread id 1907 1908 Mark the end of a statically scheduled loop. 1909 */ 1910 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1911 KMP_POP_PARTITIONED_TIMER(); 1912 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1913 1914 #if OMPT_SUPPORT && OMPT_OPTIONAL 1915 if (ompt_enabled.ompt_callback_work) { 1916 ompt_work_t ompt_work_type = ompt_work_loop; 1917 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1918 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1919 // Determine workshare type 1920 if (loc != NULL) { 1921 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1922 ompt_work_type = ompt_work_loop; 1923 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1924 ompt_work_type = ompt_work_sections; 1925 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1926 ompt_work_type = ompt_work_distribute; 1927 } else { 1928 // use default set above. 1929 // a warning about this case is provided in __kmpc_for_static_init 1930 } 1931 KMP_DEBUG_ASSERT(ompt_work_type); 1932 } 1933 ompt_callbacks.ompt_callback(ompt_callback_work)( 1934 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1935 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1936 } 1937 #endif 1938 if (__kmp_env_consistency_check) 1939 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1940 } 1941 1942 // User routines which take C-style arguments (call by value) 1943 // different from the Fortran equivalent routines 1944 1945 void ompc_set_num_threads(int arg) { 1946 // !!!!! TODO: check the per-task binding 1947 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1948 } 1949 1950 void ompc_set_dynamic(int flag) { 1951 kmp_info_t *thread; 1952 1953 /* For the thread-private implementation of the internal controls */ 1954 thread = __kmp_entry_thread(); 1955 1956 __kmp_save_internal_controls(thread); 1957 1958 set__dynamic(thread, flag ? true : false); 1959 } 1960 1961 void ompc_set_nested(int flag) { 1962 kmp_info_t *thread; 1963 1964 /* For the thread-private internal controls implementation */ 1965 thread = __kmp_entry_thread(); 1966 1967 __kmp_save_internal_controls(thread); 1968 1969 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1970 } 1971 1972 void ompc_set_max_active_levels(int max_active_levels) { 1973 /* TO DO */ 1974 /* we want per-task implementation of this internal control */ 1975 1976 /* For the per-thread internal controls implementation */ 1977 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1978 } 1979 1980 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1981 // !!!!! TODO: check the per-task binding 1982 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1983 } 1984 1985 int ompc_get_ancestor_thread_num(int level) { 1986 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1987 } 1988 1989 int ompc_get_team_size(int level) { 1990 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1991 } 1992 1993 /* OpenMP 5.0 Affinity Format API */ 1994 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) { 1995 if (!__kmp_init_serial) { 1996 __kmp_serial_initialize(); 1997 } 1998 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 1999 format, KMP_STRLEN(format) + 1); 2000 } 2001 2002 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) { 2003 size_t format_size; 2004 if (!__kmp_init_serial) { 2005 __kmp_serial_initialize(); 2006 } 2007 format_size = KMP_STRLEN(__kmp_affinity_format); 2008 if (buffer && size) { 2009 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 2010 format_size + 1); 2011 } 2012 return format_size; 2013 } 2014 2015 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) { 2016 int gtid; 2017 if (!TCR_4(__kmp_init_middle)) { 2018 __kmp_middle_initialize(); 2019 } 2020 __kmp_assign_root_init_mask(); 2021 gtid = __kmp_get_gtid(); 2022 #if KMP_AFFINITY_SUPPORTED 2023 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && 2024 __kmp_affinity.flags.reset) { 2025 __kmp_reset_root_init_mask(gtid); 2026 } 2027 #endif 2028 __kmp_aux_display_affinity(gtid, format); 2029 } 2030 2031 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size, 2032 char const *format) { 2033 int gtid; 2034 size_t num_required; 2035 kmp_str_buf_t capture_buf; 2036 if (!TCR_4(__kmp_init_middle)) { 2037 __kmp_middle_initialize(); 2038 } 2039 __kmp_assign_root_init_mask(); 2040 gtid = __kmp_get_gtid(); 2041 #if KMP_AFFINITY_SUPPORTED 2042 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && 2043 __kmp_affinity.flags.reset) { 2044 __kmp_reset_root_init_mask(gtid); 2045 } 2046 #endif 2047 __kmp_str_buf_init(&capture_buf); 2048 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 2049 if (buffer && buf_size) { 2050 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 2051 capture_buf.used + 1); 2052 } 2053 __kmp_str_buf_free(&capture_buf); 2054 return num_required; 2055 } 2056 2057 void kmpc_set_stacksize(int arg) { 2058 // __kmp_aux_set_stacksize initializes the library if needed 2059 __kmp_aux_set_stacksize(arg); 2060 } 2061 2062 void kmpc_set_stacksize_s(size_t arg) { 2063 // __kmp_aux_set_stacksize initializes the library if needed 2064 __kmp_aux_set_stacksize(arg); 2065 } 2066 2067 void kmpc_set_blocktime(int arg) { 2068 int gtid, tid; 2069 kmp_info_t *thread; 2070 2071 gtid = __kmp_entry_gtid(); 2072 tid = __kmp_tid_from_gtid(gtid); 2073 thread = __kmp_thread_from_gtid(gtid); 2074 2075 __kmp_aux_set_blocktime(arg, thread, tid); 2076 } 2077 2078 void kmpc_set_library(int arg) { 2079 // __kmp_user_set_library initializes the library if needed 2080 __kmp_user_set_library((enum library_type)arg); 2081 } 2082 2083 void kmpc_set_defaults(char const *str) { 2084 // __kmp_aux_set_defaults initializes the library if needed 2085 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 2086 } 2087 2088 void kmpc_set_disp_num_buffers(int arg) { 2089 // ignore after initialization because some teams have already 2090 // allocated dispatch buffers 2091 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF && 2092 arg <= KMP_MAX_DISP_NUM_BUFF) { 2093 __kmp_dispatch_num_buffers = arg; 2094 } 2095 } 2096 2097 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 2098 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2099 return -1; 2100 #else 2101 if (!TCR_4(__kmp_init_middle)) { 2102 __kmp_middle_initialize(); 2103 } 2104 __kmp_assign_root_init_mask(); 2105 return __kmp_aux_set_affinity_mask_proc(proc, mask); 2106 #endif 2107 } 2108 2109 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2110 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2111 return -1; 2112 #else 2113 if (!TCR_4(__kmp_init_middle)) { 2114 __kmp_middle_initialize(); 2115 } 2116 __kmp_assign_root_init_mask(); 2117 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2118 #endif 2119 } 2120 2121 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2122 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2123 return -1; 2124 #else 2125 if (!TCR_4(__kmp_init_middle)) { 2126 __kmp_middle_initialize(); 2127 } 2128 __kmp_assign_root_init_mask(); 2129 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2130 #endif 2131 } 2132 2133 /* -------------------------------------------------------------------------- */ 2134 /*! 2135 @ingroup THREADPRIVATE 2136 @param loc source location information 2137 @param gtid global thread number 2138 @param cpy_size size of the cpy_data buffer 2139 @param cpy_data pointer to data to be copied 2140 @param cpy_func helper function to call for copying data 2141 @param didit flag variable: 1=single thread; 0=not single thread 2142 2143 __kmpc_copyprivate implements the interface for the private data broadcast 2144 needed for the copyprivate clause associated with a single region in an 2145 OpenMP<sup>*</sup> program (both C and Fortran). 2146 All threads participating in the parallel region call this routine. 2147 One of the threads (called the single thread) should have the <tt>didit</tt> 2148 variable set to 1 and all other threads should have that variable set to 0. 2149 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2150 2151 The OpenMP specification forbids the use of nowait on the single region when a 2152 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2153 barrier internally to avoid race conditions, so the code generation for the 2154 single region should avoid generating a barrier after the call to @ref 2155 __kmpc_copyprivate. 2156 2157 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2158 The <tt>loc</tt> parameter is a pointer to source location information. 2159 2160 Internal implementation: The single thread will first copy its descriptor 2161 address (cpy_data) to a team-private location, then the other threads will each 2162 call the function pointed to by the parameter cpy_func, which carries out the 2163 copy by copying the data using the cpy_data buffer. 2164 2165 The cpy_func routine used for the copy and the contents of the data area defined 2166 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2167 to be done. For instance, the cpy_data buffer can hold the actual data to be 2168 copied or it may hold a list of pointers to the data. The cpy_func routine must 2169 interpret the cpy_data buffer appropriately. 2170 2171 The interface to cpy_func is as follows: 2172 @code 2173 void cpy_func( void *destination, void *source ) 2174 @endcode 2175 where void *destination is the cpy_data pointer for the thread being copied to 2176 and void *source is the cpy_data pointer for the thread being copied from. 2177 */ 2178 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2179 void *cpy_data, void (*cpy_func)(void *, void *), 2180 kmp_int32 didit) { 2181 void **data_ptr; 2182 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2183 __kmp_assert_valid_gtid(gtid); 2184 2185 KMP_MB(); 2186 2187 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2188 2189 if (__kmp_env_consistency_check) { 2190 if (loc == 0) { 2191 KMP_WARNING(ConstructIdentInvalid); 2192 } 2193 } 2194 2195 // ToDo: Optimize the following two barriers into some kind of split barrier 2196 2197 if (didit) 2198 *data_ptr = cpy_data; 2199 2200 #if OMPT_SUPPORT 2201 ompt_frame_t *ompt_frame; 2202 if (ompt_enabled.enabled) { 2203 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2204 if (ompt_frame->enter_frame.ptr == NULL) 2205 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2206 } 2207 OMPT_STORE_RETURN_ADDRESS(gtid); 2208 #endif 2209 /* This barrier is not a barrier region boundary */ 2210 #if USE_ITT_NOTIFY 2211 __kmp_threads[gtid]->th.th_ident = loc; 2212 #endif 2213 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2214 2215 if (!didit) 2216 (*cpy_func)(cpy_data, *data_ptr); 2217 2218 // Consider next barrier a user-visible barrier for barrier region boundaries 2219 // Nesting checks are already handled by the single construct checks 2220 { 2221 #if OMPT_SUPPORT 2222 OMPT_STORE_RETURN_ADDRESS(gtid); 2223 #endif 2224 #if USE_ITT_NOTIFY 2225 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2226 // tasks can overwrite the location) 2227 #endif 2228 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2229 #if OMPT_SUPPORT && OMPT_OPTIONAL 2230 if (ompt_enabled.enabled) { 2231 ompt_frame->enter_frame = ompt_data_none; 2232 } 2233 #endif 2234 } 2235 } 2236 2237 /* --------------------------------------------------------------------------*/ 2238 /*! 2239 @ingroup THREADPRIVATE 2240 @param loc source location information 2241 @param gtid global thread number 2242 @param cpy_data pointer to the data to be saved/copied or 0 2243 @return the saved pointer to the data 2244 2245 __kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate: 2246 __kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so 2247 coming from single), and returns that pointer in all calls (for single thread 2248 it's not needed). This version doesn't do any actual data copying. Data copying 2249 has to be done somewhere else, e.g. inline in the generated code. Due to this, 2250 this function doesn't have any barrier at the end of the function, like 2251 __kmpc_copyprivate does, so generated code needs barrier after copying of all 2252 data was done. 2253 */ 2254 void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) { 2255 void **data_ptr; 2256 2257 KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid)); 2258 2259 KMP_MB(); 2260 2261 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2262 2263 if (__kmp_env_consistency_check) { 2264 if (loc == 0) { 2265 KMP_WARNING(ConstructIdentInvalid); 2266 } 2267 } 2268 2269 // ToDo: Optimize the following barrier 2270 2271 if (cpy_data) 2272 *data_ptr = cpy_data; 2273 2274 #if OMPT_SUPPORT 2275 ompt_frame_t *ompt_frame; 2276 if (ompt_enabled.enabled) { 2277 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2278 if (ompt_frame->enter_frame.ptr == NULL) 2279 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2280 OMPT_STORE_RETURN_ADDRESS(gtid); 2281 } 2282 #endif 2283 /* This barrier is not a barrier region boundary */ 2284 #if USE_ITT_NOTIFY 2285 __kmp_threads[gtid]->th.th_ident = loc; 2286 #endif 2287 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2288 2289 return *data_ptr; 2290 } 2291 2292 /* -------------------------------------------------------------------------- */ 2293 2294 #define INIT_LOCK __kmp_init_user_lock_with_checks 2295 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2296 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2297 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2298 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2299 #define ACQUIRE_NESTED_LOCK_TIMED \ 2300 __kmp_acquire_nested_user_lock_with_checks_timed 2301 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2302 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2303 #define TEST_LOCK __kmp_test_user_lock_with_checks 2304 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2305 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2306 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2307 2308 // TODO: Make check abort messages use location info & pass it into 2309 // with_checks routines 2310 2311 #if KMP_USE_DYNAMIC_LOCK 2312 2313 // internal lock initializer 2314 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2315 kmp_dyna_lockseq_t seq) { 2316 if (KMP_IS_D_LOCK(seq)) { 2317 KMP_INIT_D_LOCK(lock, seq); 2318 #if USE_ITT_BUILD 2319 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2320 #endif 2321 } else { 2322 KMP_INIT_I_LOCK(lock, seq); 2323 #if USE_ITT_BUILD 2324 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2325 __kmp_itt_lock_creating(ilk->lock, loc); 2326 #endif 2327 } 2328 } 2329 2330 // internal nest lock initializer 2331 static __forceinline void 2332 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2333 kmp_dyna_lockseq_t seq) { 2334 #if KMP_USE_TSX 2335 // Don't have nested lock implementation for speculative locks 2336 if (seq == lockseq_hle || seq == lockseq_rtm_queuing || 2337 seq == lockseq_rtm_spin || seq == lockseq_adaptive) 2338 seq = __kmp_user_lock_seq; 2339 #endif 2340 switch (seq) { 2341 case lockseq_tas: 2342 seq = lockseq_nested_tas; 2343 break; 2344 #if KMP_USE_FUTEX 2345 case lockseq_futex: 2346 seq = lockseq_nested_futex; 2347 break; 2348 #endif 2349 case lockseq_ticket: 2350 seq = lockseq_nested_ticket; 2351 break; 2352 case lockseq_queuing: 2353 seq = lockseq_nested_queuing; 2354 break; 2355 case lockseq_drdpa: 2356 seq = lockseq_nested_drdpa; 2357 break; 2358 default: 2359 seq = lockseq_nested_queuing; 2360 } 2361 KMP_INIT_I_LOCK(lock, seq); 2362 #if USE_ITT_BUILD 2363 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2364 __kmp_itt_lock_creating(ilk->lock, loc); 2365 #endif 2366 } 2367 2368 /* initialize the lock with a hint */ 2369 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2370 uintptr_t hint) { 2371 KMP_DEBUG_ASSERT(__kmp_init_serial); 2372 if (__kmp_env_consistency_check && user_lock == NULL) { 2373 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2374 } 2375 2376 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2377 2378 #if OMPT_SUPPORT && OMPT_OPTIONAL 2379 // This is the case, if called from omp_init_lock_with_hint: 2380 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2381 if (!codeptr) 2382 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2383 if (ompt_enabled.ompt_callback_lock_init) { 2384 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2385 ompt_mutex_lock, (omp_lock_hint_t)hint, 2386 __ompt_get_mutex_impl_type(user_lock), 2387 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2388 } 2389 #endif 2390 } 2391 2392 /* initialize the lock with a hint */ 2393 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2394 void **user_lock, uintptr_t hint) { 2395 KMP_DEBUG_ASSERT(__kmp_init_serial); 2396 if (__kmp_env_consistency_check && user_lock == NULL) { 2397 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2398 } 2399 2400 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2401 2402 #if OMPT_SUPPORT && OMPT_OPTIONAL 2403 // This is the case, if called from omp_init_lock_with_hint: 2404 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2405 if (!codeptr) 2406 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2407 if (ompt_enabled.ompt_callback_lock_init) { 2408 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2409 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2410 __ompt_get_mutex_impl_type(user_lock), 2411 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2412 } 2413 #endif 2414 } 2415 2416 #endif // KMP_USE_DYNAMIC_LOCK 2417 2418 /* initialize the lock */ 2419 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2420 #if KMP_USE_DYNAMIC_LOCK 2421 2422 KMP_DEBUG_ASSERT(__kmp_init_serial); 2423 if (__kmp_env_consistency_check && user_lock == NULL) { 2424 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2425 } 2426 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2427 2428 #if OMPT_SUPPORT && OMPT_OPTIONAL 2429 // This is the case, if called from omp_init_lock_with_hint: 2430 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2431 if (!codeptr) 2432 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2433 if (ompt_enabled.ompt_callback_lock_init) { 2434 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2435 ompt_mutex_lock, omp_lock_hint_none, 2436 __ompt_get_mutex_impl_type(user_lock), 2437 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2438 } 2439 #endif 2440 2441 #else // KMP_USE_DYNAMIC_LOCK 2442 2443 static char const *const func = "omp_init_lock"; 2444 kmp_user_lock_p lck; 2445 KMP_DEBUG_ASSERT(__kmp_init_serial); 2446 2447 if (__kmp_env_consistency_check) { 2448 if (user_lock == NULL) { 2449 KMP_FATAL(LockIsUninitialized, func); 2450 } 2451 } 2452 2453 KMP_CHECK_USER_LOCK_INIT(); 2454 2455 if ((__kmp_user_lock_kind == lk_tas) && 2456 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2457 lck = (kmp_user_lock_p)user_lock; 2458 } 2459 #if KMP_USE_FUTEX 2460 else if ((__kmp_user_lock_kind == lk_futex) && 2461 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2462 lck = (kmp_user_lock_p)user_lock; 2463 } 2464 #endif 2465 else { 2466 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2467 } 2468 INIT_LOCK(lck); 2469 __kmp_set_user_lock_location(lck, loc); 2470 2471 #if OMPT_SUPPORT && OMPT_OPTIONAL 2472 // This is the case, if called from omp_init_lock_with_hint: 2473 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2474 if (!codeptr) 2475 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2476 if (ompt_enabled.ompt_callback_lock_init) { 2477 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2478 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2479 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2480 } 2481 #endif 2482 2483 #if USE_ITT_BUILD 2484 __kmp_itt_lock_creating(lck); 2485 #endif /* USE_ITT_BUILD */ 2486 2487 #endif // KMP_USE_DYNAMIC_LOCK 2488 } // __kmpc_init_lock 2489 2490 /* initialize the lock */ 2491 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2492 #if KMP_USE_DYNAMIC_LOCK 2493 2494 KMP_DEBUG_ASSERT(__kmp_init_serial); 2495 if (__kmp_env_consistency_check && user_lock == NULL) { 2496 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2497 } 2498 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2499 2500 #if OMPT_SUPPORT && OMPT_OPTIONAL 2501 // This is the case, if called from omp_init_lock_with_hint: 2502 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2503 if (!codeptr) 2504 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2505 if (ompt_enabled.ompt_callback_lock_init) { 2506 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2507 ompt_mutex_nest_lock, omp_lock_hint_none, 2508 __ompt_get_mutex_impl_type(user_lock), 2509 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2510 } 2511 #endif 2512 2513 #else // KMP_USE_DYNAMIC_LOCK 2514 2515 static char const *const func = "omp_init_nest_lock"; 2516 kmp_user_lock_p lck; 2517 KMP_DEBUG_ASSERT(__kmp_init_serial); 2518 2519 if (__kmp_env_consistency_check) { 2520 if (user_lock == NULL) { 2521 KMP_FATAL(LockIsUninitialized, func); 2522 } 2523 } 2524 2525 KMP_CHECK_USER_LOCK_INIT(); 2526 2527 if ((__kmp_user_lock_kind == lk_tas) && 2528 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2529 OMP_NEST_LOCK_T_SIZE)) { 2530 lck = (kmp_user_lock_p)user_lock; 2531 } 2532 #if KMP_USE_FUTEX 2533 else if ((__kmp_user_lock_kind == lk_futex) && 2534 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2535 OMP_NEST_LOCK_T_SIZE)) { 2536 lck = (kmp_user_lock_p)user_lock; 2537 } 2538 #endif 2539 else { 2540 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2541 } 2542 2543 INIT_NESTED_LOCK(lck); 2544 __kmp_set_user_lock_location(lck, loc); 2545 2546 #if OMPT_SUPPORT && OMPT_OPTIONAL 2547 // This is the case, if called from omp_init_lock_with_hint: 2548 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2549 if (!codeptr) 2550 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2551 if (ompt_enabled.ompt_callback_lock_init) { 2552 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2553 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2554 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2555 } 2556 #endif 2557 2558 #if USE_ITT_BUILD 2559 __kmp_itt_lock_creating(lck); 2560 #endif /* USE_ITT_BUILD */ 2561 2562 #endif // KMP_USE_DYNAMIC_LOCK 2563 } // __kmpc_init_nest_lock 2564 2565 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2566 #if KMP_USE_DYNAMIC_LOCK 2567 2568 #if USE_ITT_BUILD 2569 kmp_user_lock_p lck; 2570 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2571 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2572 } else { 2573 lck = (kmp_user_lock_p)user_lock; 2574 } 2575 __kmp_itt_lock_destroyed(lck); 2576 #endif 2577 #if OMPT_SUPPORT && OMPT_OPTIONAL 2578 // This is the case, if called from omp_init_lock_with_hint: 2579 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2580 if (!codeptr) 2581 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2582 if (ompt_enabled.ompt_callback_lock_destroy) { 2583 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2584 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2585 } 2586 #endif 2587 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2588 #else 2589 kmp_user_lock_p lck; 2590 2591 if ((__kmp_user_lock_kind == lk_tas) && 2592 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2593 lck = (kmp_user_lock_p)user_lock; 2594 } 2595 #if KMP_USE_FUTEX 2596 else if ((__kmp_user_lock_kind == lk_futex) && 2597 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2598 lck = (kmp_user_lock_p)user_lock; 2599 } 2600 #endif 2601 else { 2602 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2603 } 2604 2605 #if OMPT_SUPPORT && OMPT_OPTIONAL 2606 // This is the case, if called from omp_init_lock_with_hint: 2607 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2608 if (!codeptr) 2609 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2610 if (ompt_enabled.ompt_callback_lock_destroy) { 2611 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2612 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2613 } 2614 #endif 2615 2616 #if USE_ITT_BUILD 2617 __kmp_itt_lock_destroyed(lck); 2618 #endif /* USE_ITT_BUILD */ 2619 DESTROY_LOCK(lck); 2620 2621 if ((__kmp_user_lock_kind == lk_tas) && 2622 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2623 ; 2624 } 2625 #if KMP_USE_FUTEX 2626 else if ((__kmp_user_lock_kind == lk_futex) && 2627 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2628 ; 2629 } 2630 #endif 2631 else { 2632 __kmp_user_lock_free(user_lock, gtid, lck); 2633 } 2634 #endif // KMP_USE_DYNAMIC_LOCK 2635 } // __kmpc_destroy_lock 2636 2637 /* destroy the lock */ 2638 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2639 #if KMP_USE_DYNAMIC_LOCK 2640 2641 #if USE_ITT_BUILD 2642 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2643 __kmp_itt_lock_destroyed(ilk->lock); 2644 #endif 2645 #if OMPT_SUPPORT && OMPT_OPTIONAL 2646 // This is the case, if called from omp_init_lock_with_hint: 2647 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2648 if (!codeptr) 2649 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2650 if (ompt_enabled.ompt_callback_lock_destroy) { 2651 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2652 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2653 } 2654 #endif 2655 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2656 2657 #else // KMP_USE_DYNAMIC_LOCK 2658 2659 kmp_user_lock_p lck; 2660 2661 if ((__kmp_user_lock_kind == lk_tas) && 2662 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2663 OMP_NEST_LOCK_T_SIZE)) { 2664 lck = (kmp_user_lock_p)user_lock; 2665 } 2666 #if KMP_USE_FUTEX 2667 else if ((__kmp_user_lock_kind == lk_futex) && 2668 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2669 OMP_NEST_LOCK_T_SIZE)) { 2670 lck = (kmp_user_lock_p)user_lock; 2671 } 2672 #endif 2673 else { 2674 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2675 } 2676 2677 #if OMPT_SUPPORT && OMPT_OPTIONAL 2678 // This is the case, if called from omp_init_lock_with_hint: 2679 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2680 if (!codeptr) 2681 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2682 if (ompt_enabled.ompt_callback_lock_destroy) { 2683 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2684 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2685 } 2686 #endif 2687 2688 #if USE_ITT_BUILD 2689 __kmp_itt_lock_destroyed(lck); 2690 #endif /* USE_ITT_BUILD */ 2691 2692 DESTROY_NESTED_LOCK(lck); 2693 2694 if ((__kmp_user_lock_kind == lk_tas) && 2695 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2696 OMP_NEST_LOCK_T_SIZE)) { 2697 ; 2698 } 2699 #if KMP_USE_FUTEX 2700 else if ((__kmp_user_lock_kind == lk_futex) && 2701 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2702 OMP_NEST_LOCK_T_SIZE)) { 2703 ; 2704 } 2705 #endif 2706 else { 2707 __kmp_user_lock_free(user_lock, gtid, lck); 2708 } 2709 #endif // KMP_USE_DYNAMIC_LOCK 2710 } // __kmpc_destroy_nest_lock 2711 2712 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2713 KMP_COUNT_BLOCK(OMP_set_lock); 2714 #if KMP_USE_DYNAMIC_LOCK 2715 int tag = KMP_EXTRACT_D_TAG(user_lock); 2716 #if USE_ITT_BUILD 2717 __kmp_itt_lock_acquiring( 2718 (kmp_user_lock_p) 2719 user_lock); // itt function will get to the right lock object. 2720 #endif 2721 #if OMPT_SUPPORT && OMPT_OPTIONAL 2722 // This is the case, if called from omp_init_lock_with_hint: 2723 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2724 if (!codeptr) 2725 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2726 if (ompt_enabled.ompt_callback_mutex_acquire) { 2727 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2728 ompt_mutex_lock, omp_lock_hint_none, 2729 __ompt_get_mutex_impl_type(user_lock), 2730 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2731 } 2732 #endif 2733 #if KMP_USE_INLINED_TAS 2734 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2735 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2736 } else 2737 #elif KMP_USE_INLINED_FUTEX 2738 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2739 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2740 } else 2741 #endif 2742 { 2743 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2744 } 2745 #if USE_ITT_BUILD 2746 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2747 #endif 2748 #if OMPT_SUPPORT && OMPT_OPTIONAL 2749 if (ompt_enabled.ompt_callback_mutex_acquired) { 2750 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2751 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2752 } 2753 #endif 2754 2755 #else // KMP_USE_DYNAMIC_LOCK 2756 2757 kmp_user_lock_p lck; 2758 2759 if ((__kmp_user_lock_kind == lk_tas) && 2760 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2761 lck = (kmp_user_lock_p)user_lock; 2762 } 2763 #if KMP_USE_FUTEX 2764 else if ((__kmp_user_lock_kind == lk_futex) && 2765 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2766 lck = (kmp_user_lock_p)user_lock; 2767 } 2768 #endif 2769 else { 2770 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2771 } 2772 2773 #if USE_ITT_BUILD 2774 __kmp_itt_lock_acquiring(lck); 2775 #endif /* USE_ITT_BUILD */ 2776 #if OMPT_SUPPORT && OMPT_OPTIONAL 2777 // This is the case, if called from omp_init_lock_with_hint: 2778 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2779 if (!codeptr) 2780 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2781 if (ompt_enabled.ompt_callback_mutex_acquire) { 2782 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2783 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2784 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2785 } 2786 #endif 2787 2788 ACQUIRE_LOCK(lck, gtid); 2789 2790 #if USE_ITT_BUILD 2791 __kmp_itt_lock_acquired(lck); 2792 #endif /* USE_ITT_BUILD */ 2793 2794 #if OMPT_SUPPORT && OMPT_OPTIONAL 2795 if (ompt_enabled.ompt_callback_mutex_acquired) { 2796 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2797 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2798 } 2799 #endif 2800 2801 #endif // KMP_USE_DYNAMIC_LOCK 2802 } 2803 2804 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2805 #if KMP_USE_DYNAMIC_LOCK 2806 2807 #if USE_ITT_BUILD 2808 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2809 #endif 2810 #if OMPT_SUPPORT && OMPT_OPTIONAL 2811 // This is the case, if called from omp_init_lock_with_hint: 2812 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2813 if (!codeptr) 2814 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2815 if (ompt_enabled.enabled) { 2816 if (ompt_enabled.ompt_callback_mutex_acquire) { 2817 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2818 ompt_mutex_nest_lock, omp_lock_hint_none, 2819 __ompt_get_mutex_impl_type(user_lock), 2820 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2821 } 2822 } 2823 #endif 2824 int acquire_status = 2825 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2826 (void)acquire_status; 2827 #if USE_ITT_BUILD 2828 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2829 #endif 2830 2831 #if OMPT_SUPPORT && OMPT_OPTIONAL 2832 if (ompt_enabled.enabled) { 2833 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2834 if (ompt_enabled.ompt_callback_mutex_acquired) { 2835 // lock_first 2836 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2837 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2838 codeptr); 2839 } 2840 } else { 2841 if (ompt_enabled.ompt_callback_nest_lock) { 2842 // lock_next 2843 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2844 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2845 } 2846 } 2847 } 2848 #endif 2849 2850 #else // KMP_USE_DYNAMIC_LOCK 2851 int acquire_status; 2852 kmp_user_lock_p lck; 2853 2854 if ((__kmp_user_lock_kind == lk_tas) && 2855 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2856 OMP_NEST_LOCK_T_SIZE)) { 2857 lck = (kmp_user_lock_p)user_lock; 2858 } 2859 #if KMP_USE_FUTEX 2860 else if ((__kmp_user_lock_kind == lk_futex) && 2861 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2862 OMP_NEST_LOCK_T_SIZE)) { 2863 lck = (kmp_user_lock_p)user_lock; 2864 } 2865 #endif 2866 else { 2867 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2868 } 2869 2870 #if USE_ITT_BUILD 2871 __kmp_itt_lock_acquiring(lck); 2872 #endif /* USE_ITT_BUILD */ 2873 #if OMPT_SUPPORT && OMPT_OPTIONAL 2874 // This is the case, if called from omp_init_lock_with_hint: 2875 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2876 if (!codeptr) 2877 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2878 if (ompt_enabled.enabled) { 2879 if (ompt_enabled.ompt_callback_mutex_acquire) { 2880 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2881 ompt_mutex_nest_lock, omp_lock_hint_none, 2882 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2883 codeptr); 2884 } 2885 } 2886 #endif 2887 2888 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2889 2890 #if USE_ITT_BUILD 2891 __kmp_itt_lock_acquired(lck); 2892 #endif /* USE_ITT_BUILD */ 2893 2894 #if OMPT_SUPPORT && OMPT_OPTIONAL 2895 if (ompt_enabled.enabled) { 2896 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2897 if (ompt_enabled.ompt_callback_mutex_acquired) { 2898 // lock_first 2899 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2900 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2901 } 2902 } else { 2903 if (ompt_enabled.ompt_callback_nest_lock) { 2904 // lock_next 2905 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2906 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2907 } 2908 } 2909 } 2910 #endif 2911 2912 #endif // KMP_USE_DYNAMIC_LOCK 2913 } 2914 2915 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2916 #if KMP_USE_DYNAMIC_LOCK 2917 2918 int tag = KMP_EXTRACT_D_TAG(user_lock); 2919 #if USE_ITT_BUILD 2920 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2921 #endif 2922 #if KMP_USE_INLINED_TAS 2923 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2924 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2925 } else 2926 #elif KMP_USE_INLINED_FUTEX 2927 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2928 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2929 } else 2930 #endif 2931 { 2932 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2933 } 2934 2935 #if OMPT_SUPPORT && OMPT_OPTIONAL 2936 // This is the case, if called from omp_init_lock_with_hint: 2937 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2938 if (!codeptr) 2939 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2940 if (ompt_enabled.ompt_callback_mutex_released) { 2941 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2942 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2943 } 2944 #endif 2945 2946 #else // KMP_USE_DYNAMIC_LOCK 2947 2948 kmp_user_lock_p lck; 2949 2950 /* Can't use serial interval since not block structured */ 2951 /* release the lock */ 2952 2953 if ((__kmp_user_lock_kind == lk_tas) && 2954 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2955 #if KMP_OS_LINUX && \ 2956 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2957 // "fast" path implemented to fix customer performance issue 2958 #if USE_ITT_BUILD 2959 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2960 #endif /* USE_ITT_BUILD */ 2961 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2962 KMP_MB(); 2963 2964 #if OMPT_SUPPORT && OMPT_OPTIONAL 2965 // This is the case, if called from omp_init_lock_with_hint: 2966 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2967 if (!codeptr) 2968 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2969 if (ompt_enabled.ompt_callback_mutex_released) { 2970 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2971 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2972 } 2973 #endif 2974 2975 return; 2976 #else 2977 lck = (kmp_user_lock_p)user_lock; 2978 #endif 2979 } 2980 #if KMP_USE_FUTEX 2981 else if ((__kmp_user_lock_kind == lk_futex) && 2982 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2983 lck = (kmp_user_lock_p)user_lock; 2984 } 2985 #endif 2986 else { 2987 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2988 } 2989 2990 #if USE_ITT_BUILD 2991 __kmp_itt_lock_releasing(lck); 2992 #endif /* USE_ITT_BUILD */ 2993 2994 RELEASE_LOCK(lck, gtid); 2995 2996 #if OMPT_SUPPORT && OMPT_OPTIONAL 2997 // This is the case, if called from omp_init_lock_with_hint: 2998 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2999 if (!codeptr) 3000 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3001 if (ompt_enabled.ompt_callback_mutex_released) { 3002 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3003 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3004 } 3005 #endif 3006 3007 #endif // KMP_USE_DYNAMIC_LOCK 3008 } 3009 3010 /* release the lock */ 3011 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3012 #if KMP_USE_DYNAMIC_LOCK 3013 3014 #if USE_ITT_BUILD 3015 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 3016 #endif 3017 int release_status = 3018 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 3019 (void)release_status; 3020 3021 #if OMPT_SUPPORT && OMPT_OPTIONAL 3022 // This is the case, if called from omp_init_lock_with_hint: 3023 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3024 if (!codeptr) 3025 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3026 if (ompt_enabled.enabled) { 3027 if (release_status == KMP_LOCK_RELEASED) { 3028 if (ompt_enabled.ompt_callback_mutex_released) { 3029 // release_lock_last 3030 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3031 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3032 codeptr); 3033 } 3034 } else if (ompt_enabled.ompt_callback_nest_lock) { 3035 // release_lock_prev 3036 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3037 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3038 } 3039 } 3040 #endif 3041 3042 #else // KMP_USE_DYNAMIC_LOCK 3043 3044 kmp_user_lock_p lck; 3045 3046 /* Can't use serial interval since not block structured */ 3047 3048 if ((__kmp_user_lock_kind == lk_tas) && 3049 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3050 OMP_NEST_LOCK_T_SIZE)) { 3051 #if KMP_OS_LINUX && \ 3052 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 3053 // "fast" path implemented to fix customer performance issue 3054 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 3055 #if USE_ITT_BUILD 3056 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 3057 #endif /* USE_ITT_BUILD */ 3058 3059 #if OMPT_SUPPORT && OMPT_OPTIONAL 3060 int release_status = KMP_LOCK_STILL_HELD; 3061 #endif 3062 3063 if (--(tl->lk.depth_locked) == 0) { 3064 TCW_4(tl->lk.poll, 0); 3065 #if OMPT_SUPPORT && OMPT_OPTIONAL 3066 release_status = KMP_LOCK_RELEASED; 3067 #endif 3068 } 3069 KMP_MB(); 3070 3071 #if OMPT_SUPPORT && OMPT_OPTIONAL 3072 // This is the case, if called from omp_init_lock_with_hint: 3073 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3074 if (!codeptr) 3075 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3076 if (ompt_enabled.enabled) { 3077 if (release_status == KMP_LOCK_RELEASED) { 3078 if (ompt_enabled.ompt_callback_mutex_released) { 3079 // release_lock_last 3080 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3081 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3082 } 3083 } else if (ompt_enabled.ompt_callback_nest_lock) { 3084 // release_lock_previous 3085 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3086 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3087 } 3088 } 3089 #endif 3090 3091 return; 3092 #else 3093 lck = (kmp_user_lock_p)user_lock; 3094 #endif 3095 } 3096 #if KMP_USE_FUTEX 3097 else if ((__kmp_user_lock_kind == lk_futex) && 3098 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3099 OMP_NEST_LOCK_T_SIZE)) { 3100 lck = (kmp_user_lock_p)user_lock; 3101 } 3102 #endif 3103 else { 3104 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 3105 } 3106 3107 #if USE_ITT_BUILD 3108 __kmp_itt_lock_releasing(lck); 3109 #endif /* USE_ITT_BUILD */ 3110 3111 int release_status; 3112 release_status = RELEASE_NESTED_LOCK(lck, gtid); 3113 #if OMPT_SUPPORT && OMPT_OPTIONAL 3114 // This is the case, if called from omp_init_lock_with_hint: 3115 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3116 if (!codeptr) 3117 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3118 if (ompt_enabled.enabled) { 3119 if (release_status == KMP_LOCK_RELEASED) { 3120 if (ompt_enabled.ompt_callback_mutex_released) { 3121 // release_lock_last 3122 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3123 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3124 } 3125 } else if (ompt_enabled.ompt_callback_nest_lock) { 3126 // release_lock_previous 3127 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3128 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3129 } 3130 } 3131 #endif 3132 3133 #endif // KMP_USE_DYNAMIC_LOCK 3134 } 3135 3136 /* try to acquire the lock */ 3137 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3138 KMP_COUNT_BLOCK(OMP_test_lock); 3139 3140 #if KMP_USE_DYNAMIC_LOCK 3141 int rc; 3142 int tag = KMP_EXTRACT_D_TAG(user_lock); 3143 #if USE_ITT_BUILD 3144 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3145 #endif 3146 #if OMPT_SUPPORT && OMPT_OPTIONAL 3147 // This is the case, if called from omp_init_lock_with_hint: 3148 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3149 if (!codeptr) 3150 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3151 if (ompt_enabled.ompt_callback_mutex_acquire) { 3152 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3153 ompt_mutex_lock, omp_lock_hint_none, 3154 __ompt_get_mutex_impl_type(user_lock), 3155 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3156 } 3157 #endif 3158 #if KMP_USE_INLINED_TAS 3159 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3160 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3161 } else 3162 #elif KMP_USE_INLINED_FUTEX 3163 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3164 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3165 } else 3166 #endif 3167 { 3168 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3169 } 3170 if (rc) { 3171 #if USE_ITT_BUILD 3172 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3173 #endif 3174 #if OMPT_SUPPORT && OMPT_OPTIONAL 3175 if (ompt_enabled.ompt_callback_mutex_acquired) { 3176 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3177 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3178 } 3179 #endif 3180 return FTN_TRUE; 3181 } else { 3182 #if USE_ITT_BUILD 3183 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3184 #endif 3185 return FTN_FALSE; 3186 } 3187 3188 #else // KMP_USE_DYNAMIC_LOCK 3189 3190 kmp_user_lock_p lck; 3191 int rc; 3192 3193 if ((__kmp_user_lock_kind == lk_tas) && 3194 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3195 lck = (kmp_user_lock_p)user_lock; 3196 } 3197 #if KMP_USE_FUTEX 3198 else if ((__kmp_user_lock_kind == lk_futex) && 3199 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3200 lck = (kmp_user_lock_p)user_lock; 3201 } 3202 #endif 3203 else { 3204 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3205 } 3206 3207 #if USE_ITT_BUILD 3208 __kmp_itt_lock_acquiring(lck); 3209 #endif /* USE_ITT_BUILD */ 3210 #if OMPT_SUPPORT && OMPT_OPTIONAL 3211 // This is the case, if called from omp_init_lock_with_hint: 3212 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3213 if (!codeptr) 3214 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3215 if (ompt_enabled.ompt_callback_mutex_acquire) { 3216 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3217 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3218 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3219 } 3220 #endif 3221 3222 rc = TEST_LOCK(lck, gtid); 3223 #if USE_ITT_BUILD 3224 if (rc) { 3225 __kmp_itt_lock_acquired(lck); 3226 } else { 3227 __kmp_itt_lock_cancelled(lck); 3228 } 3229 #endif /* USE_ITT_BUILD */ 3230 #if OMPT_SUPPORT && OMPT_OPTIONAL 3231 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3232 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3233 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3234 } 3235 #endif 3236 3237 return (rc ? FTN_TRUE : FTN_FALSE); 3238 3239 /* Can't use serial interval since not block structured */ 3240 3241 #endif // KMP_USE_DYNAMIC_LOCK 3242 } 3243 3244 /* try to acquire the lock */ 3245 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3246 #if KMP_USE_DYNAMIC_LOCK 3247 int rc; 3248 #if USE_ITT_BUILD 3249 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3250 #endif 3251 #if OMPT_SUPPORT && OMPT_OPTIONAL 3252 // This is the case, if called from omp_init_lock_with_hint: 3253 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3254 if (!codeptr) 3255 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3256 if (ompt_enabled.ompt_callback_mutex_acquire) { 3257 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3258 ompt_mutex_nest_lock, omp_lock_hint_none, 3259 __ompt_get_mutex_impl_type(user_lock), 3260 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3261 } 3262 #endif 3263 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3264 #if USE_ITT_BUILD 3265 if (rc) { 3266 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3267 } else { 3268 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3269 } 3270 #endif 3271 #if OMPT_SUPPORT && OMPT_OPTIONAL 3272 if (ompt_enabled.enabled && rc) { 3273 if (rc == 1) { 3274 if (ompt_enabled.ompt_callback_mutex_acquired) { 3275 // lock_first 3276 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3277 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3278 codeptr); 3279 } 3280 } else { 3281 if (ompt_enabled.ompt_callback_nest_lock) { 3282 // lock_next 3283 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3284 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3285 } 3286 } 3287 } 3288 #endif 3289 return rc; 3290 3291 #else // KMP_USE_DYNAMIC_LOCK 3292 3293 kmp_user_lock_p lck; 3294 int rc; 3295 3296 if ((__kmp_user_lock_kind == lk_tas) && 3297 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3298 OMP_NEST_LOCK_T_SIZE)) { 3299 lck = (kmp_user_lock_p)user_lock; 3300 } 3301 #if KMP_USE_FUTEX 3302 else if ((__kmp_user_lock_kind == lk_futex) && 3303 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3304 OMP_NEST_LOCK_T_SIZE)) { 3305 lck = (kmp_user_lock_p)user_lock; 3306 } 3307 #endif 3308 else { 3309 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3310 } 3311 3312 #if USE_ITT_BUILD 3313 __kmp_itt_lock_acquiring(lck); 3314 #endif /* USE_ITT_BUILD */ 3315 3316 #if OMPT_SUPPORT && OMPT_OPTIONAL 3317 // This is the case, if called from omp_init_lock_with_hint: 3318 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3319 if (!codeptr) 3320 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3321 if (ompt_enabled.enabled) && 3322 ompt_enabled.ompt_callback_mutex_acquire) { 3323 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3324 ompt_mutex_nest_lock, omp_lock_hint_none, 3325 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3326 codeptr); 3327 } 3328 #endif 3329 3330 rc = TEST_NESTED_LOCK(lck, gtid); 3331 #if USE_ITT_BUILD 3332 if (rc) { 3333 __kmp_itt_lock_acquired(lck); 3334 } else { 3335 __kmp_itt_lock_cancelled(lck); 3336 } 3337 #endif /* USE_ITT_BUILD */ 3338 #if OMPT_SUPPORT && OMPT_OPTIONAL 3339 if (ompt_enabled.enabled && rc) { 3340 if (rc == 1) { 3341 if (ompt_enabled.ompt_callback_mutex_acquired) { 3342 // lock_first 3343 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3344 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3345 } 3346 } else { 3347 if (ompt_enabled.ompt_callback_nest_lock) { 3348 // lock_next 3349 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3350 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3351 } 3352 } 3353 } 3354 #endif 3355 return rc; 3356 3357 /* Can't use serial interval since not block structured */ 3358 3359 #endif // KMP_USE_DYNAMIC_LOCK 3360 } 3361 3362 // Interface to fast scalable reduce methods routines 3363 3364 // keep the selected method in a thread local structure for cross-function 3365 // usage: will be used in __kmpc_end_reduce* functions; 3366 // another solution: to re-determine the method one more time in 3367 // __kmpc_end_reduce* functions (new prototype required then) 3368 // AT: which solution is better? 3369 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3370 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3371 3372 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3373 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3374 3375 // description of the packed_reduction_method variable: look at the macros in 3376 // kmp.h 3377 3378 // used in a critical section reduce block 3379 static __forceinline void 3380 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3381 kmp_critical_name *crit) { 3382 3383 // this lock was visible to a customer and to the threading profile tool as a 3384 // serial overhead span (although it's used for an internal purpose only) 3385 // why was it visible in previous implementation? 3386 // should we keep it visible in new reduce block? 3387 kmp_user_lock_p lck; 3388 3389 #if KMP_USE_DYNAMIC_LOCK 3390 3391 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3392 // Check if it is initialized. 3393 if (*lk == 0) { 3394 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3395 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3396 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3397 } else { 3398 __kmp_init_indirect_csptr(crit, loc, global_tid, 3399 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3400 } 3401 } 3402 // Branch for accessing the actual lock object and set operation. This 3403 // branching is inevitable since this lock initialization does not follow the 3404 // normal dispatch path (lock table is not used). 3405 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3406 lck = (kmp_user_lock_p)lk; 3407 KMP_DEBUG_ASSERT(lck != NULL); 3408 if (__kmp_env_consistency_check) { 3409 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3410 } 3411 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3412 } else { 3413 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3414 lck = ilk->lock; 3415 KMP_DEBUG_ASSERT(lck != NULL); 3416 if (__kmp_env_consistency_check) { 3417 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3418 } 3419 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3420 } 3421 3422 #else // KMP_USE_DYNAMIC_LOCK 3423 3424 // We know that the fast reduction code is only emitted by Intel compilers 3425 // with 32 byte critical sections. If there isn't enough space, then we 3426 // have to use a pointer. 3427 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3428 lck = (kmp_user_lock_p)crit; 3429 } else { 3430 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3431 } 3432 KMP_DEBUG_ASSERT(lck != NULL); 3433 3434 if (__kmp_env_consistency_check) 3435 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3436 3437 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3438 3439 #endif // KMP_USE_DYNAMIC_LOCK 3440 } 3441 3442 // used in a critical section reduce block 3443 static __forceinline void 3444 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3445 kmp_critical_name *crit) { 3446 3447 kmp_user_lock_p lck; 3448 3449 #if KMP_USE_DYNAMIC_LOCK 3450 3451 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3452 lck = (kmp_user_lock_p)crit; 3453 if (__kmp_env_consistency_check) 3454 __kmp_pop_sync(global_tid, ct_critical, loc); 3455 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3456 } else { 3457 kmp_indirect_lock_t *ilk = 3458 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3459 if (__kmp_env_consistency_check) 3460 __kmp_pop_sync(global_tid, ct_critical, loc); 3461 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3462 } 3463 3464 #else // KMP_USE_DYNAMIC_LOCK 3465 3466 // We know that the fast reduction code is only emitted by Intel compilers 3467 // with 32 byte critical sections. If there isn't enough space, then we have 3468 // to use a pointer. 3469 if (__kmp_base_user_lock_size > 32) { 3470 lck = *((kmp_user_lock_p *)crit); 3471 KMP_ASSERT(lck != NULL); 3472 } else { 3473 lck = (kmp_user_lock_p)crit; 3474 } 3475 3476 if (__kmp_env_consistency_check) 3477 __kmp_pop_sync(global_tid, ct_critical, loc); 3478 3479 __kmp_release_user_lock_with_checks(lck, global_tid); 3480 3481 #endif // KMP_USE_DYNAMIC_LOCK 3482 } // __kmp_end_critical_section_reduce_block 3483 3484 static __forceinline int 3485 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3486 int *task_state) { 3487 kmp_team_t *team; 3488 3489 // Check if we are inside the teams construct? 3490 if (th->th.th_teams_microtask) { 3491 *team_p = team = th->th.th_team; 3492 if (team->t.t_level == th->th.th_teams_level) { 3493 // This is reduction at teams construct. 3494 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3495 // Let's swap teams temporarily for the reduction. 3496 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3497 th->th.th_team = team->t.t_parent; 3498 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3499 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3500 *task_state = th->th.th_task_state; 3501 th->th.th_task_state = 0; 3502 3503 return 1; 3504 } 3505 } 3506 return 0; 3507 } 3508 3509 static __forceinline void 3510 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3511 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3512 th->th.th_info.ds.ds_tid = 0; 3513 th->th.th_team = team; 3514 th->th.th_team_nproc = team->t.t_nproc; 3515 th->th.th_task_team = team->t.t_task_team[task_state]; 3516 __kmp_type_convert(task_state, &(th->th.th_task_state)); 3517 } 3518 3519 /* 2.a.i. Reduce Block without a terminating barrier */ 3520 /*! 3521 @ingroup SYNCHRONIZATION 3522 @param loc source location information 3523 @param global_tid global thread number 3524 @param num_vars number of items (variables) to be reduced 3525 @param reduce_size size of data in bytes to be reduced 3526 @param reduce_data pointer to data to be reduced 3527 @param reduce_func callback function providing reduction operation on two 3528 operands and returning result of reduction in lhs_data 3529 @param lck pointer to the unique lock data structure 3530 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3531 threads if atomic reduction needed 3532 3533 The nowait version is used for a reduce clause with the nowait argument. 3534 */ 3535 kmp_int32 3536 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3537 size_t reduce_size, void *reduce_data, 3538 void (*reduce_func)(void *lhs_data, void *rhs_data), 3539 kmp_critical_name *lck) { 3540 3541 KMP_COUNT_BLOCK(REDUCE_nowait); 3542 int retval = 0; 3543 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3544 kmp_info_t *th; 3545 kmp_team_t *team; 3546 int teams_swapped = 0, task_state; 3547 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3548 __kmp_assert_valid_gtid(global_tid); 3549 3550 // why do we need this initialization here at all? 3551 // Reduction clause can not be used as a stand-alone directive. 3552 3553 // do not call __kmp_serial_initialize(), it will be called by 3554 // __kmp_parallel_initialize() if needed 3555 // possible detection of false-positive race by the threadchecker ??? 3556 if (!TCR_4(__kmp_init_parallel)) 3557 __kmp_parallel_initialize(); 3558 3559 __kmp_resume_if_soft_paused(); 3560 3561 // check correctness of reduce block nesting 3562 #if KMP_USE_DYNAMIC_LOCK 3563 if (__kmp_env_consistency_check) 3564 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3565 #else 3566 if (__kmp_env_consistency_check) 3567 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3568 #endif 3569 3570 th = __kmp_thread_from_gtid(global_tid); 3571 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3572 3573 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3574 // the value should be kept in a variable 3575 // the variable should be either a construct-specific or thread-specific 3576 // property, not a team specific property 3577 // (a thread can reach the next reduce block on the next construct, reduce 3578 // method may differ on the next construct) 3579 // an ident_t "loc" parameter could be used as a construct-specific property 3580 // (what if loc == 0?) 3581 // (if both construct-specific and team-specific variables were shared, 3582 // then unness extra syncs should be needed) 3583 // a thread-specific variable is better regarding two issues above (next 3584 // construct and extra syncs) 3585 // a thread-specific "th_local.reduction_method" variable is used currently 3586 // each thread executes 'determine' and 'set' lines (no need to execute by one 3587 // thread, to avoid unness extra syncs) 3588 3589 packed_reduction_method = __kmp_determine_reduction_method( 3590 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3591 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3592 3593 OMPT_REDUCTION_DECL(th, global_tid); 3594 if (packed_reduction_method == critical_reduce_block) { 3595 3596 OMPT_REDUCTION_BEGIN; 3597 3598 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3599 retval = 1; 3600 3601 } else if (packed_reduction_method == empty_reduce_block) { 3602 3603 OMPT_REDUCTION_BEGIN; 3604 3605 // usage: if team size == 1, no synchronization is required ( Intel 3606 // platforms only ) 3607 retval = 1; 3608 3609 } else if (packed_reduction_method == atomic_reduce_block) { 3610 3611 retval = 2; 3612 3613 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3614 // won't be called by the code gen) 3615 // (it's not quite good, because the checking block has been closed by 3616 // this 'pop', 3617 // but atomic operation has not been executed yet, will be executed 3618 // slightly later, literally on next instruction) 3619 if (__kmp_env_consistency_check) 3620 __kmp_pop_sync(global_tid, ct_reduce, loc); 3621 3622 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3623 tree_reduce_block)) { 3624 3625 // AT: performance issue: a real barrier here 3626 // AT: (if primary thread is slow, other threads are blocked here waiting for 3627 // the primary thread to come and release them) 3628 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3629 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3630 // be confusing to a customer) 3631 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3632 // might go faster and be more in line with sense of NOWAIT 3633 // AT: TO DO: do epcc test and compare times 3634 3635 // this barrier should be invisible to a customer and to the threading profile 3636 // tool (it's neither a terminating barrier nor customer's code, it's 3637 // used for an internal purpose) 3638 #if OMPT_SUPPORT 3639 // JP: can this barrier potentially leed to task scheduling? 3640 // JP: as long as there is a barrier in the implementation, OMPT should and 3641 // will provide the barrier events 3642 // so we set-up the necessary frame/return addresses. 3643 ompt_frame_t *ompt_frame; 3644 if (ompt_enabled.enabled) { 3645 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3646 if (ompt_frame->enter_frame.ptr == NULL) 3647 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3648 } 3649 OMPT_STORE_RETURN_ADDRESS(global_tid); 3650 #endif 3651 #if USE_ITT_NOTIFY 3652 __kmp_threads[global_tid]->th.th_ident = loc; 3653 #endif 3654 retval = 3655 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3656 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3657 retval = (retval != 0) ? (0) : (1); 3658 #if OMPT_SUPPORT && OMPT_OPTIONAL 3659 if (ompt_enabled.enabled) { 3660 ompt_frame->enter_frame = ompt_data_none; 3661 } 3662 #endif 3663 3664 // all other workers except primary thread should do this pop here 3665 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3666 if (__kmp_env_consistency_check) { 3667 if (retval == 0) { 3668 __kmp_pop_sync(global_tid, ct_reduce, loc); 3669 } 3670 } 3671 3672 } else { 3673 3674 // should never reach this block 3675 KMP_ASSERT(0); // "unexpected method" 3676 } 3677 if (teams_swapped) { 3678 __kmp_restore_swapped_teams(th, team, task_state); 3679 } 3680 KA_TRACE( 3681 10, 3682 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3683 global_tid, packed_reduction_method, retval)); 3684 3685 return retval; 3686 } 3687 3688 /*! 3689 @ingroup SYNCHRONIZATION 3690 @param loc source location information 3691 @param global_tid global thread id. 3692 @param lck pointer to the unique lock data structure 3693 3694 Finish the execution of a reduce nowait. 3695 */ 3696 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3697 kmp_critical_name *lck) { 3698 3699 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3700 3701 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3702 __kmp_assert_valid_gtid(global_tid); 3703 3704 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3705 3706 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3707 3708 if (packed_reduction_method == critical_reduce_block) { 3709 3710 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3711 OMPT_REDUCTION_END; 3712 3713 } else if (packed_reduction_method == empty_reduce_block) { 3714 3715 // usage: if team size == 1, no synchronization is required ( on Intel 3716 // platforms only ) 3717 3718 OMPT_REDUCTION_END; 3719 3720 } else if (packed_reduction_method == atomic_reduce_block) { 3721 3722 // neither primary thread nor other workers should get here 3723 // (code gen does not generate this call in case 2: atomic reduce block) 3724 // actually it's better to remove this elseif at all; 3725 // after removal this value will checked by the 'else' and will assert 3726 3727 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3728 tree_reduce_block)) { 3729 3730 // only primary thread gets here 3731 // OMPT: tree reduction is annotated in the barrier code 3732 3733 } else { 3734 3735 // should never reach this block 3736 KMP_ASSERT(0); // "unexpected method" 3737 } 3738 3739 if (__kmp_env_consistency_check) 3740 __kmp_pop_sync(global_tid, ct_reduce, loc); 3741 3742 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3743 global_tid, packed_reduction_method)); 3744 3745 return; 3746 } 3747 3748 /* 2.a.ii. Reduce Block with a terminating barrier */ 3749 3750 /*! 3751 @ingroup SYNCHRONIZATION 3752 @param loc source location information 3753 @param global_tid global thread number 3754 @param num_vars number of items (variables) to be reduced 3755 @param reduce_size size of data in bytes to be reduced 3756 @param reduce_data pointer to data to be reduced 3757 @param reduce_func callback function providing reduction operation on two 3758 operands and returning result of reduction in lhs_data 3759 @param lck pointer to the unique lock data structure 3760 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3761 threads if atomic reduction needed 3762 3763 A blocking reduce that includes an implicit barrier. 3764 */ 3765 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3766 size_t reduce_size, void *reduce_data, 3767 void (*reduce_func)(void *lhs_data, void *rhs_data), 3768 kmp_critical_name *lck) { 3769 KMP_COUNT_BLOCK(REDUCE_wait); 3770 int retval = 0; 3771 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3772 kmp_info_t *th; 3773 kmp_team_t *team; 3774 int teams_swapped = 0, task_state; 3775 3776 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3777 __kmp_assert_valid_gtid(global_tid); 3778 3779 // why do we need this initialization here at all? 3780 // Reduction clause can not be a stand-alone directive. 3781 3782 // do not call __kmp_serial_initialize(), it will be called by 3783 // __kmp_parallel_initialize() if needed 3784 // possible detection of false-positive race by the threadchecker ??? 3785 if (!TCR_4(__kmp_init_parallel)) 3786 __kmp_parallel_initialize(); 3787 3788 __kmp_resume_if_soft_paused(); 3789 3790 // check correctness of reduce block nesting 3791 #if KMP_USE_DYNAMIC_LOCK 3792 if (__kmp_env_consistency_check) 3793 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3794 #else 3795 if (__kmp_env_consistency_check) 3796 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3797 #endif 3798 3799 th = __kmp_thread_from_gtid(global_tid); 3800 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3801 3802 packed_reduction_method = __kmp_determine_reduction_method( 3803 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3804 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3805 3806 OMPT_REDUCTION_DECL(th, global_tid); 3807 3808 if (packed_reduction_method == critical_reduce_block) { 3809 3810 OMPT_REDUCTION_BEGIN; 3811 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3812 retval = 1; 3813 3814 } else if (packed_reduction_method == empty_reduce_block) { 3815 3816 OMPT_REDUCTION_BEGIN; 3817 // usage: if team size == 1, no synchronization is required ( Intel 3818 // platforms only ) 3819 retval = 1; 3820 3821 } else if (packed_reduction_method == atomic_reduce_block) { 3822 3823 retval = 2; 3824 3825 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3826 tree_reduce_block)) { 3827 3828 // case tree_reduce_block: 3829 // this barrier should be visible to a customer and to the threading profile 3830 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3831 #if OMPT_SUPPORT 3832 ompt_frame_t *ompt_frame; 3833 if (ompt_enabled.enabled) { 3834 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3835 if (ompt_frame->enter_frame.ptr == NULL) 3836 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3837 } 3838 OMPT_STORE_RETURN_ADDRESS(global_tid); 3839 #endif 3840 #if USE_ITT_NOTIFY 3841 __kmp_threads[global_tid]->th.th_ident = 3842 loc; // needed for correct notification of frames 3843 #endif 3844 retval = 3845 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3846 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3847 retval = (retval != 0) ? (0) : (1); 3848 #if OMPT_SUPPORT && OMPT_OPTIONAL 3849 if (ompt_enabled.enabled) { 3850 ompt_frame->enter_frame = ompt_data_none; 3851 } 3852 #endif 3853 3854 // all other workers except primary thread should do this pop here 3855 // (none of other workers except primary will enter __kmpc_end_reduce()) 3856 if (__kmp_env_consistency_check) { 3857 if (retval == 0) { // 0: all other workers; 1: primary thread 3858 __kmp_pop_sync(global_tid, ct_reduce, loc); 3859 } 3860 } 3861 3862 } else { 3863 3864 // should never reach this block 3865 KMP_ASSERT(0); // "unexpected method" 3866 } 3867 if (teams_swapped) { 3868 __kmp_restore_swapped_teams(th, team, task_state); 3869 } 3870 3871 KA_TRACE(10, 3872 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3873 global_tid, packed_reduction_method, retval)); 3874 return retval; 3875 } 3876 3877 /*! 3878 @ingroup SYNCHRONIZATION 3879 @param loc source location information 3880 @param global_tid global thread id. 3881 @param lck pointer to the unique lock data structure 3882 3883 Finish the execution of a blocking reduce. 3884 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3885 start function. 3886 */ 3887 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3888 kmp_critical_name *lck) { 3889 3890 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3891 kmp_info_t *th; 3892 kmp_team_t *team; 3893 int teams_swapped = 0, task_state; 3894 3895 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3896 __kmp_assert_valid_gtid(global_tid); 3897 3898 th = __kmp_thread_from_gtid(global_tid); 3899 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3900 3901 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3902 3903 // this barrier should be visible to a customer and to the threading profile 3904 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3905 OMPT_REDUCTION_DECL(th, global_tid); 3906 3907 if (packed_reduction_method == critical_reduce_block) { 3908 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3909 3910 OMPT_REDUCTION_END; 3911 3912 // TODO: implicit barrier: should be exposed 3913 #if OMPT_SUPPORT 3914 ompt_frame_t *ompt_frame; 3915 if (ompt_enabled.enabled) { 3916 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3917 if (ompt_frame->enter_frame.ptr == NULL) 3918 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3919 } 3920 OMPT_STORE_RETURN_ADDRESS(global_tid); 3921 #endif 3922 #if USE_ITT_NOTIFY 3923 __kmp_threads[global_tid]->th.th_ident = loc; 3924 #endif 3925 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3926 #if OMPT_SUPPORT && OMPT_OPTIONAL 3927 if (ompt_enabled.enabled) { 3928 ompt_frame->enter_frame = ompt_data_none; 3929 } 3930 #endif 3931 3932 } else if (packed_reduction_method == empty_reduce_block) { 3933 3934 OMPT_REDUCTION_END; 3935 3936 // usage: if team size==1, no synchronization is required (Intel platforms only) 3937 3938 // TODO: implicit barrier: should be exposed 3939 #if OMPT_SUPPORT 3940 ompt_frame_t *ompt_frame; 3941 if (ompt_enabled.enabled) { 3942 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3943 if (ompt_frame->enter_frame.ptr == NULL) 3944 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3945 } 3946 OMPT_STORE_RETURN_ADDRESS(global_tid); 3947 #endif 3948 #if USE_ITT_NOTIFY 3949 __kmp_threads[global_tid]->th.th_ident = loc; 3950 #endif 3951 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3952 #if OMPT_SUPPORT && OMPT_OPTIONAL 3953 if (ompt_enabled.enabled) { 3954 ompt_frame->enter_frame = ompt_data_none; 3955 } 3956 #endif 3957 3958 } else if (packed_reduction_method == atomic_reduce_block) { 3959 3960 #if OMPT_SUPPORT 3961 ompt_frame_t *ompt_frame; 3962 if (ompt_enabled.enabled) { 3963 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3964 if (ompt_frame->enter_frame.ptr == NULL) 3965 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3966 } 3967 OMPT_STORE_RETURN_ADDRESS(global_tid); 3968 #endif 3969 // TODO: implicit barrier: should be exposed 3970 #if USE_ITT_NOTIFY 3971 __kmp_threads[global_tid]->th.th_ident = loc; 3972 #endif 3973 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3974 #if OMPT_SUPPORT && OMPT_OPTIONAL 3975 if (ompt_enabled.enabled) { 3976 ompt_frame->enter_frame = ompt_data_none; 3977 } 3978 #endif 3979 3980 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3981 tree_reduce_block)) { 3982 3983 // only primary thread executes here (primary releases all other workers) 3984 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3985 global_tid); 3986 3987 } else { 3988 3989 // should never reach this block 3990 KMP_ASSERT(0); // "unexpected method" 3991 } 3992 if (teams_swapped) { 3993 __kmp_restore_swapped_teams(th, team, task_state); 3994 } 3995 3996 if (__kmp_env_consistency_check) 3997 __kmp_pop_sync(global_tid, ct_reduce, loc); 3998 3999 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 4000 global_tid, packed_reduction_method)); 4001 4002 return; 4003 } 4004 4005 #undef __KMP_GET_REDUCTION_METHOD 4006 #undef __KMP_SET_REDUCTION_METHOD 4007 4008 /* end of interface to fast scalable reduce routines */ 4009 4010 kmp_uint64 __kmpc_get_taskid() { 4011 4012 kmp_int32 gtid; 4013 kmp_info_t *thread; 4014 4015 gtid = __kmp_get_gtid(); 4016 if (gtid < 0) { 4017 return 0; 4018 } 4019 thread = __kmp_thread_from_gtid(gtid); 4020 return thread->th.th_current_task->td_task_id; 4021 4022 } // __kmpc_get_taskid 4023 4024 kmp_uint64 __kmpc_get_parent_taskid() { 4025 4026 kmp_int32 gtid; 4027 kmp_info_t *thread; 4028 kmp_taskdata_t *parent_task; 4029 4030 gtid = __kmp_get_gtid(); 4031 if (gtid < 0) { 4032 return 0; 4033 } 4034 thread = __kmp_thread_from_gtid(gtid); 4035 parent_task = thread->th.th_current_task->td_parent; 4036 return (parent_task == NULL ? 0 : parent_task->td_task_id); 4037 4038 } // __kmpc_get_parent_taskid 4039 4040 /*! 4041 @ingroup WORK_SHARING 4042 @param loc source location information. 4043 @param gtid global thread number. 4044 @param num_dims number of associated doacross loops. 4045 @param dims info on loops bounds. 4046 4047 Initialize doacross loop information. 4048 Expect compiler send us inclusive bounds, 4049 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 4050 */ 4051 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 4052 const struct kmp_dim *dims) { 4053 __kmp_assert_valid_gtid(gtid); 4054 int j, idx; 4055 kmp_int64 last, trace_count; 4056 kmp_info_t *th = __kmp_threads[gtid]; 4057 kmp_team_t *team = th->th.th_team; 4058 kmp_uint32 *flags; 4059 kmp_disp_t *pr_buf = th->th.th_dispatch; 4060 dispatch_shared_info_t *sh_buf; 4061 4062 KA_TRACE( 4063 20, 4064 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 4065 gtid, num_dims, !team->t.t_serialized)); 4066 KMP_DEBUG_ASSERT(dims != NULL); 4067 KMP_DEBUG_ASSERT(num_dims > 0); 4068 4069 if (team->t.t_serialized) { 4070 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 4071 return; // no dependencies if team is serialized 4072 } 4073 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 4074 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 4075 // the next loop 4076 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4077 4078 // Save bounds info into allocated private buffer 4079 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 4080 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 4081 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 4082 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4083 pr_buf->th_doacross_info[0] = 4084 (kmp_int64)num_dims; // first element is number of dimensions 4085 // Save also address of num_done in order to access it later without knowing 4086 // the buffer index 4087 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 4088 pr_buf->th_doacross_info[2] = dims[0].lo; 4089 pr_buf->th_doacross_info[3] = dims[0].up; 4090 pr_buf->th_doacross_info[4] = dims[0].st; 4091 last = 5; 4092 for (j = 1; j < num_dims; ++j) { 4093 kmp_int64 4094 range_length; // To keep ranges of all dimensions but the first dims[0] 4095 if (dims[j].st == 1) { // most common case 4096 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 4097 range_length = dims[j].up - dims[j].lo + 1; 4098 } else { 4099 if (dims[j].st > 0) { 4100 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 4101 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 4102 } else { // negative increment 4103 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 4104 range_length = 4105 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 4106 } 4107 } 4108 pr_buf->th_doacross_info[last++] = range_length; 4109 pr_buf->th_doacross_info[last++] = dims[j].lo; 4110 pr_buf->th_doacross_info[last++] = dims[j].up; 4111 pr_buf->th_doacross_info[last++] = dims[j].st; 4112 } 4113 4114 // Compute total trip count. 4115 // Start with range of dims[0] which we don't need to keep in the buffer. 4116 if (dims[0].st == 1) { // most common case 4117 trace_count = dims[0].up - dims[0].lo + 1; 4118 } else if (dims[0].st > 0) { 4119 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 4120 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 4121 } else { // negative increment 4122 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 4123 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 4124 } 4125 for (j = 1; j < num_dims; ++j) { 4126 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 4127 } 4128 KMP_DEBUG_ASSERT(trace_count > 0); 4129 4130 // Check if shared buffer is not occupied by other loop (idx - 4131 // __kmp_dispatch_num_buffers) 4132 if (idx != sh_buf->doacross_buf_idx) { 4133 // Shared buffer is occupied, wait for it to be free 4134 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 4135 __kmp_eq_4, NULL); 4136 } 4137 #if KMP_32_BIT_ARCH 4138 // Check if we are the first thread. After the CAS the first thread gets 0, 4139 // others get 1 if initialization is in progress, allocated pointer otherwise. 4140 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 4141 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 4142 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 4143 #else 4144 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 4145 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 4146 #endif 4147 if (flags == NULL) { 4148 // we are the first thread, allocate the array of flags 4149 size_t size = 4150 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration 4151 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 4152 KMP_MB(); 4153 sh_buf->doacross_flags = flags; 4154 } else if (flags == (kmp_uint32 *)1) { 4155 #if KMP_32_BIT_ARCH 4156 // initialization is still in progress, need to wait 4157 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 4158 #else 4159 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 4160 #endif 4161 KMP_YIELD(TRUE); 4162 KMP_MB(); 4163 } else { 4164 KMP_MB(); 4165 } 4166 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4167 pr_buf->th_doacross_flags = 4168 sh_buf->doacross_flags; // save private copy in order to not 4169 // touch shared buffer on each iteration 4170 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4171 } 4172 4173 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4174 __kmp_assert_valid_gtid(gtid); 4175 kmp_int64 shft; 4176 size_t num_dims, i; 4177 kmp_uint32 flag; 4178 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4179 kmp_info_t *th = __kmp_threads[gtid]; 4180 kmp_team_t *team = th->th.th_team; 4181 kmp_disp_t *pr_buf; 4182 kmp_int64 lo, up, st; 4183 4184 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4185 if (team->t.t_serialized) { 4186 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4187 return; // no dependencies if team is serialized 4188 } 4189 4190 // calculate sequential iteration number and check out-of-bounds condition 4191 pr_buf = th->th.th_dispatch; 4192 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4193 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4194 lo = pr_buf->th_doacross_info[2]; 4195 up = pr_buf->th_doacross_info[3]; 4196 st = pr_buf->th_doacross_info[4]; 4197 #if OMPT_SUPPORT && OMPT_OPTIONAL 4198 ompt_dependence_t deps[num_dims]; 4199 #endif 4200 if (st == 1) { // most common case 4201 if (vec[0] < lo || vec[0] > up) { 4202 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4203 "bounds [%lld,%lld]\n", 4204 gtid, vec[0], lo, up)); 4205 return; 4206 } 4207 iter_number = vec[0] - lo; 4208 } else if (st > 0) { 4209 if (vec[0] < lo || vec[0] > up) { 4210 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4211 "bounds [%lld,%lld]\n", 4212 gtid, vec[0], lo, up)); 4213 return; 4214 } 4215 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4216 } else { // negative increment 4217 if (vec[0] > lo || vec[0] < up) { 4218 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4219 "bounds [%lld,%lld]\n", 4220 gtid, vec[0], lo, up)); 4221 return; 4222 } 4223 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4224 } 4225 #if OMPT_SUPPORT && OMPT_OPTIONAL 4226 deps[0].variable.value = iter_number; 4227 deps[0].dependence_type = ompt_dependence_type_sink; 4228 #endif 4229 for (i = 1; i < num_dims; ++i) { 4230 kmp_int64 iter, ln; 4231 size_t j = i * 4; 4232 ln = pr_buf->th_doacross_info[j + 1]; 4233 lo = pr_buf->th_doacross_info[j + 2]; 4234 up = pr_buf->th_doacross_info[j + 3]; 4235 st = pr_buf->th_doacross_info[j + 4]; 4236 if (st == 1) { 4237 if (vec[i] < lo || vec[i] > up) { 4238 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4239 "bounds [%lld,%lld]\n", 4240 gtid, vec[i], lo, up)); 4241 return; 4242 } 4243 iter = vec[i] - lo; 4244 } else if (st > 0) { 4245 if (vec[i] < lo || vec[i] > up) { 4246 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4247 "bounds [%lld,%lld]\n", 4248 gtid, vec[i], lo, up)); 4249 return; 4250 } 4251 iter = (kmp_uint64)(vec[i] - lo) / st; 4252 } else { // st < 0 4253 if (vec[i] > lo || vec[i] < up) { 4254 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4255 "bounds [%lld,%lld]\n", 4256 gtid, vec[i], lo, up)); 4257 return; 4258 } 4259 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4260 } 4261 iter_number = iter + ln * iter_number; 4262 #if OMPT_SUPPORT && OMPT_OPTIONAL 4263 deps[i].variable.value = iter; 4264 deps[i].dependence_type = ompt_dependence_type_sink; 4265 #endif 4266 } 4267 shft = iter_number % 32; // use 32-bit granularity 4268 iter_number >>= 5; // divided by 32 4269 flag = 1 << shft; 4270 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4271 KMP_YIELD(TRUE); 4272 } 4273 KMP_MB(); 4274 #if OMPT_SUPPORT && OMPT_OPTIONAL 4275 if (ompt_enabled.ompt_callback_dependences) { 4276 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4277 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4278 } 4279 #endif 4280 KA_TRACE(20, 4281 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4282 gtid, (iter_number << 5) + shft)); 4283 } 4284 4285 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4286 __kmp_assert_valid_gtid(gtid); 4287 kmp_int64 shft; 4288 size_t num_dims, i; 4289 kmp_uint32 flag; 4290 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4291 kmp_info_t *th = __kmp_threads[gtid]; 4292 kmp_team_t *team = th->th.th_team; 4293 kmp_disp_t *pr_buf; 4294 kmp_int64 lo, st; 4295 4296 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4297 if (team->t.t_serialized) { 4298 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4299 return; // no dependencies if team is serialized 4300 } 4301 4302 // calculate sequential iteration number (same as in "wait" but no 4303 // out-of-bounds checks) 4304 pr_buf = th->th.th_dispatch; 4305 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4306 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4307 lo = pr_buf->th_doacross_info[2]; 4308 st = pr_buf->th_doacross_info[4]; 4309 #if OMPT_SUPPORT && OMPT_OPTIONAL 4310 ompt_dependence_t deps[num_dims]; 4311 #endif 4312 if (st == 1) { // most common case 4313 iter_number = vec[0] - lo; 4314 } else if (st > 0) { 4315 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4316 } else { // negative increment 4317 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4318 } 4319 #if OMPT_SUPPORT && OMPT_OPTIONAL 4320 deps[0].variable.value = iter_number; 4321 deps[0].dependence_type = ompt_dependence_type_source; 4322 #endif 4323 for (i = 1; i < num_dims; ++i) { 4324 kmp_int64 iter, ln; 4325 size_t j = i * 4; 4326 ln = pr_buf->th_doacross_info[j + 1]; 4327 lo = pr_buf->th_doacross_info[j + 2]; 4328 st = pr_buf->th_doacross_info[j + 4]; 4329 if (st == 1) { 4330 iter = vec[i] - lo; 4331 } else if (st > 0) { 4332 iter = (kmp_uint64)(vec[i] - lo) / st; 4333 } else { // st < 0 4334 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4335 } 4336 iter_number = iter + ln * iter_number; 4337 #if OMPT_SUPPORT && OMPT_OPTIONAL 4338 deps[i].variable.value = iter; 4339 deps[i].dependence_type = ompt_dependence_type_source; 4340 #endif 4341 } 4342 #if OMPT_SUPPORT && OMPT_OPTIONAL 4343 if (ompt_enabled.ompt_callback_dependences) { 4344 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4345 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4346 } 4347 #endif 4348 shft = iter_number % 32; // use 32-bit granularity 4349 iter_number >>= 5; // divided by 32 4350 flag = 1 << shft; 4351 KMP_MB(); 4352 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4353 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4354 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4355 (iter_number << 5) + shft)); 4356 } 4357 4358 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4359 __kmp_assert_valid_gtid(gtid); 4360 kmp_int32 num_done; 4361 kmp_info_t *th = __kmp_threads[gtid]; 4362 kmp_team_t *team = th->th.th_team; 4363 kmp_disp_t *pr_buf = th->th.th_dispatch; 4364 4365 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4366 if (team->t.t_serialized) { 4367 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4368 return; // nothing to do 4369 } 4370 num_done = 4371 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1; 4372 if (num_done == th->th.th_team_nproc) { 4373 // we are the last thread, need to free shared resources 4374 int idx = pr_buf->th_doacross_buf_idx - 1; 4375 dispatch_shared_info_t *sh_buf = 4376 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4377 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4378 (kmp_int64)&sh_buf->doacross_num_done); 4379 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4380 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4381 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4382 sh_buf->doacross_flags = NULL; 4383 sh_buf->doacross_num_done = 0; 4384 sh_buf->doacross_buf_idx += 4385 __kmp_dispatch_num_buffers; // free buffer for future re-use 4386 } 4387 // free private resources (need to keep buffer index forever) 4388 pr_buf->th_doacross_flags = NULL; 4389 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4390 pr_buf->th_doacross_info = NULL; 4391 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4392 } 4393 4394 /* OpenMP 5.1 Memory Management routines */ 4395 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4396 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator); 4397 } 4398 4399 void *omp_aligned_alloc(size_t align, size_t size, 4400 omp_allocator_handle_t allocator) { 4401 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator); 4402 } 4403 4404 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { 4405 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator); 4406 } 4407 4408 void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size, 4409 omp_allocator_handle_t allocator) { 4410 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator); 4411 } 4412 4413 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, 4414 omp_allocator_handle_t free_allocator) { 4415 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator, 4416 free_allocator); 4417 } 4418 4419 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4420 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4421 } 4422 /* end of OpenMP 5.1 Memory Management routines */ 4423 4424 int __kmpc_get_target_offload(void) { 4425 if (!__kmp_init_serial) { 4426 __kmp_serial_initialize(); 4427 } 4428 return __kmp_target_offload; 4429 } 4430 4431 int __kmpc_pause_resource(kmp_pause_status_t level) { 4432 if (!__kmp_init_serial) { 4433 return 1; // Can't pause if runtime is not initialized 4434 } 4435 return __kmp_pause_resource(level); 4436 } 4437 4438 void __kmpc_error(ident_t *loc, int severity, const char *message) { 4439 if (!__kmp_init_serial) 4440 __kmp_serial_initialize(); 4441 4442 KMP_ASSERT(severity == severity_warning || severity == severity_fatal); 4443 4444 #if OMPT_SUPPORT 4445 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) { 4446 ompt_callbacks.ompt_callback(ompt_callback_error)( 4447 (ompt_severity_t)severity, message, KMP_STRLEN(message), 4448 OMPT_GET_RETURN_ADDRESS(0)); 4449 } 4450 #endif // OMPT_SUPPORT 4451 4452 char *src_loc; 4453 if (loc && loc->psource) { 4454 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false); 4455 src_loc = 4456 __kmp_str_format("%s:%d:%d", str_loc.file, str_loc.line, str_loc.col); 4457 __kmp_str_loc_free(&str_loc); 4458 } else { 4459 src_loc = __kmp_str_format("unknown"); 4460 } 4461 4462 if (severity == severity_warning) 4463 KMP_WARNING(UserDirectedWarning, src_loc, message); 4464 else 4465 KMP_FATAL(UserDirectedError, src_loc, message); 4466 4467 __kmp_str_free(&src_loc); 4468 } 4469 4470 // Mark begin of scope directive. 4471 void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { 4472 // reserved is for extension of scope directive and not used. 4473 #if OMPT_SUPPORT && OMPT_OPTIONAL 4474 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { 4475 kmp_team_t *team = __kmp_threads[gtid]->th.th_team; 4476 int tid = __kmp_tid_from_gtid(gtid); 4477 ompt_callbacks.ompt_callback(ompt_callback_work)( 4478 ompt_work_scope, ompt_scope_begin, 4479 &(team->t.ompt_team_info.parallel_data), 4480 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 4481 OMPT_GET_RETURN_ADDRESS(0)); 4482 } 4483 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 4484 } 4485 4486 // Mark end of scope directive 4487 void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { 4488 // reserved is for extension of scope directive and not used. 4489 #if OMPT_SUPPORT && OMPT_OPTIONAL 4490 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { 4491 kmp_team_t *team = __kmp_threads[gtid]->th.th_team; 4492 int tid = __kmp_tid_from_gtid(gtid); 4493 ompt_callbacks.ompt_callback(ompt_callback_work)( 4494 ompt_work_scope, ompt_scope_end, 4495 &(team->t.ompt_team_info.parallel_data), 4496 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 4497 OMPT_GET_RETURN_ADDRESS(0)); 4498 } 4499 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 4500 } 4501 4502 #ifdef KMP_USE_VERSION_SYMBOLS 4503 // For GOMP compatibility there are two versions of each omp_* API. 4504 // One is the plain C symbol and one is the Fortran symbol with an appended 4505 // underscore. When we implement a specific ompc_* version of an omp_* 4506 // function, we want the plain GOMP versioned symbol to alias the ompc_* version 4507 // instead of the Fortran versions in kmp_ftn_entry.h 4508 extern "C" { 4509 // Have to undef these from omp.h so they aren't translated into 4510 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below 4511 #ifdef omp_set_affinity_format 4512 #undef omp_set_affinity_format 4513 #endif 4514 #ifdef omp_get_affinity_format 4515 #undef omp_get_affinity_format 4516 #endif 4517 #ifdef omp_display_affinity 4518 #undef omp_display_affinity 4519 #endif 4520 #ifdef omp_capture_affinity 4521 #undef omp_capture_affinity 4522 #endif 4523 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50, 4524 "OMP_5.0"); 4525 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50, 4526 "OMP_5.0"); 4527 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50, 4528 "OMP_5.0"); 4529 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50, 4530 "OMP_5.0"); 4531 } // extern "C" 4532 #endif 4533