1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "kmp_utils.h" 22 #include "ompt-specific.h" 23 24 #define MAX_MESSAGE 512 25 26 // flags will be used in future, e.g. to implement openmp_strict library 27 // restrictions 28 29 /*! 30 * @ingroup STARTUP_SHUTDOWN 31 * @param loc in source location information 32 * @param flags in for future use (currently ignored) 33 * 34 * Initialize the runtime library. This call is optional; if it is not made then 35 * it will be implicitly called by attempts to use other library functions. 36 */ 37 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 38 // By default __kmpc_begin() is no-op. 39 char *env; 40 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 41 __kmp_str_match_true(env)) { 42 __kmp_middle_initialize(); 43 __kmp_assign_root_init_mask(); 44 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 45 } else if (__kmp_ignore_mppbeg() == FALSE) { 46 // By default __kmp_ignore_mppbeg() returns TRUE. 47 __kmp_internal_begin(); 48 KC_TRACE(10, ("__kmpc_begin: called\n")); 49 } 50 } 51 52 /*! 53 * @ingroup STARTUP_SHUTDOWN 54 * @param loc source location information 55 * 56 * Shutdown the runtime library. This is also optional, and even if called will 57 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 58 * zero. 59 */ 60 void __kmpc_end(ident_t *loc) { 61 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 62 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 63 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 64 // returns FALSE and __kmpc_end() will unregister this root (it can cause 65 // library shut down). 66 if (__kmp_ignore_mppend() == FALSE) { 67 KC_TRACE(10, ("__kmpc_end: called\n")); 68 KA_TRACE(30, ("__kmpc_end\n")); 69 70 __kmp_internal_end_thread(-1); 71 } 72 #if KMP_OS_WINDOWS && OMPT_SUPPORT 73 // Normal exit process on Windows does not allow worker threads of the final 74 // parallel region to finish reporting their events, so shutting down the 75 // library here fixes the issue at least for the cases where __kmpc_end() is 76 // placed properly. 77 if (ompt_enabled.enabled) 78 __kmp_internal_end_library(__kmp_gtid_get_specific()); 79 #endif 80 } 81 82 /*! 83 @ingroup THREAD_STATES 84 @param loc Source location information. 85 @return The global thread index of the active thread. 86 87 This function can be called in any context. 88 89 If the runtime has ony been entered at the outermost level from a 90 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 91 that which would be returned by omp_get_thread_num() in the outermost 92 active parallel construct. (Or zero if there is no active parallel 93 construct, since the primary thread is necessarily thread zero). 94 95 If multiple non-OpenMP threads all enter an OpenMP construct then this 96 will be a unique thread identifier among all the threads created by 97 the OpenMP runtime (but the value cannot be defined in terms of 98 OpenMP thread ids returned by omp_get_thread_num()). 99 */ 100 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 101 kmp_int32 gtid = __kmp_entry_gtid(); 102 103 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 104 105 return gtid; 106 } 107 108 /*! 109 @ingroup THREAD_STATES 110 @param loc Source location information. 111 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 112 113 This function can be called in any context. 114 It returns the total number of threads under the control of the OpenMP runtime. 115 That is not a number that can be determined by any OpenMP standard calls, since 116 the library may be called from more than one non-OpenMP thread, and this 117 reflects the total over all such calls. Similarly the runtime maintains 118 underlying threads even when they are not active (since the cost of creating 119 and destroying OS threads is high), this call counts all such threads even if 120 they are not waiting for work. 121 */ 122 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 123 KC_TRACE(10, 124 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 125 126 return TCR_4(__kmp_all_nth); 127 } 128 129 /*! 130 @ingroup THREAD_STATES 131 @param loc Source location information. 132 @return The thread number of the calling thread in the innermost active parallel 133 construct. 134 */ 135 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 136 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 137 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 138 } 139 140 /*! 141 @ingroup THREAD_STATES 142 @param loc Source location information. 143 @return The number of threads in the innermost active parallel construct. 144 */ 145 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 146 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 147 148 return __kmp_entry_thread()->th.th_team->t.t_nproc; 149 } 150 151 /*! 152 * @ingroup DEPRECATED 153 * @param loc location description 154 * 155 * This function need not be called. It always returns TRUE. 156 */ 157 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 158 #ifndef KMP_DEBUG 159 160 return TRUE; 161 162 #else 163 164 const char *semi2; 165 const char *semi3; 166 int line_no; 167 168 if (__kmp_par_range == 0) { 169 return TRUE; 170 } 171 semi2 = loc->psource; 172 if (semi2 == NULL) { 173 return TRUE; 174 } 175 semi2 = strchr(semi2, ';'); 176 if (semi2 == NULL) { 177 return TRUE; 178 } 179 semi2 = strchr(semi2 + 1, ';'); 180 if (semi2 == NULL) { 181 return TRUE; 182 } 183 if (__kmp_par_range_filename[0]) { 184 const char *name = semi2 - 1; 185 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 186 name--; 187 } 188 if ((*name == '/') || (*name == ';')) { 189 name++; 190 } 191 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 192 return __kmp_par_range < 0; 193 } 194 } 195 semi3 = strchr(semi2 + 1, ';'); 196 if (__kmp_par_range_routine[0]) { 197 if ((semi3 != NULL) && (semi3 > semi2) && 198 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 199 return __kmp_par_range < 0; 200 } 201 } 202 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 203 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 204 return __kmp_par_range > 0; 205 } 206 return __kmp_par_range < 0; 207 } 208 return TRUE; 209 210 #endif /* KMP_DEBUG */ 211 } 212 213 /*! 214 @ingroup THREAD_STATES 215 @param loc Source location information. 216 @return 1 if this thread is executing inside an active parallel region, zero if 217 not. 218 */ 219 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 220 return __kmp_entry_thread()->th.th_root->r.r_active; 221 } 222 223 /*! 224 @ingroup PARALLEL 225 @param loc source location information 226 @param global_tid global thread number 227 @param num_threads number of threads requested for this parallel construct 228 229 Set the number of threads to be used by the next fork spawned by this thread. 230 This call is only required if the parallel construct has a `num_threads` clause. 231 */ 232 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 233 kmp_int32 num_threads) { 234 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 235 global_tid, num_threads)); 236 __kmp_assert_valid_gtid(global_tid); 237 __kmp_push_num_threads(loc, global_tid, num_threads); 238 } 239 240 void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32 global_tid, 241 kmp_int32 num_threads, int severity, 242 const char *message) { 243 __kmp_push_num_threads(loc, global_tid, num_threads); 244 __kmp_set_strict_num_threads(loc, global_tid, severity, message); 245 } 246 247 /*! 248 @ingroup PARALLEL 249 @param loc source location information 250 @param global_tid global thread number 251 @param list_length number of entries in the num_threads_list array 252 @param num_threads_list array of numbers of threads requested for this parallel 253 construct and subsequent nested parallel constructs 254 255 Set the number of threads to be used by the next fork spawned by this thread, 256 and some nested forks as well. 257 This call is only required if the parallel construct has a `num_threads` clause 258 that has a list of integers as the argument. 259 */ 260 void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid, 261 kmp_uint32 list_length, 262 kmp_int32 *num_threads_list) { 263 KA_TRACE(20, ("__kmpc_push_num_threads_list: enter T#%d num_threads_list=", 264 global_tid)); 265 KA_TRACE(20, ("%d", num_threads_list[0])); 266 #ifdef KMP_DEBUG 267 for (kmp_uint32 i = 1; i < list_length; ++i) 268 KA_TRACE(20, (", %d", num_threads_list[i])); 269 #endif 270 KA_TRACE(20, ("/n")); 271 272 __kmp_assert_valid_gtid(global_tid); 273 __kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list); 274 } 275 276 void __kmpc_push_num_threads_list_strict(ident_t *loc, kmp_int32 global_tid, 277 kmp_uint32 list_length, 278 kmp_int32 *num_threads_list, 279 int severity, const char *message) { 280 __kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list); 281 __kmp_set_strict_num_threads(loc, global_tid, severity, message); 282 } 283 284 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 285 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 286 /* the num_threads are automatically popped */ 287 } 288 289 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 290 kmp_int32 proc_bind) { 291 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 292 proc_bind)); 293 __kmp_assert_valid_gtid(global_tid); 294 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 295 } 296 297 /*! 298 @ingroup PARALLEL 299 @param loc source location information 300 @param argc total number of arguments in the ellipsis 301 @param microtask pointer to callback routine consisting of outlined parallel 302 construct 303 @param ... pointers to shared variables that aren't global 304 305 Do the actual fork and call the microtask in the relevant number of threads. 306 */ 307 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 308 int gtid = __kmp_entry_gtid(); 309 310 #if (KMP_STATS_ENABLED) 311 // If we were in a serial region, then stop the serial timer, record 312 // the event, and start parallel region timer 313 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 314 if (previous_state == stats_state_e::SERIAL_REGION) { 315 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 316 } else { 317 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 318 } 319 int inParallel = __kmpc_in_parallel(loc); 320 if (inParallel) { 321 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 322 } else { 323 KMP_COUNT_BLOCK(OMP_PARALLEL); 324 } 325 #endif 326 327 // maybe to save thr_state is enough here 328 { 329 va_list ap; 330 va_start(ap, microtask); 331 332 #if OMPT_SUPPORT 333 ompt_frame_t *ompt_frame; 334 if (ompt_enabled.enabled) { 335 kmp_info_t *master_th = __kmp_threads[gtid]; 336 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame; 337 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 338 } 339 OMPT_STORE_RETURN_ADDRESS(gtid); 340 #endif 341 342 #if INCLUDE_SSC_MARKS 343 SSC_MARK_FORKING(); 344 #endif 345 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 346 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 347 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 348 kmp_va_addr_of(ap)); 349 #if INCLUDE_SSC_MARKS 350 SSC_MARK_JOINING(); 351 #endif 352 __kmp_join_call(loc, gtid 353 #if OMPT_SUPPORT 354 , 355 fork_context_intel 356 #endif 357 ); 358 359 va_end(ap); 360 361 #if OMPT_SUPPORT 362 if (ompt_enabled.enabled) { 363 ompt_frame->enter_frame = ompt_data_none; 364 } 365 #endif 366 } 367 368 #if KMP_STATS_ENABLED 369 if (previous_state == stats_state_e::SERIAL_REGION) { 370 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 371 KMP_SET_THREAD_STATE(previous_state); 372 } else { 373 KMP_POP_PARTITIONED_TIMER(); 374 } 375 #endif // KMP_STATS_ENABLED 376 } 377 378 /*! 379 @ingroup PARALLEL 380 @param loc source location information 381 @param microtask pointer to callback routine consisting of outlined parallel 382 construct 383 @param cond condition for running in parallel 384 @param args struct of pointers to shared variables that aren't global 385 386 Perform a fork only if the condition is true. 387 */ 388 void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 389 kmp_int32 cond, void *args) { 390 int gtid = __kmp_entry_gtid(); 391 if (cond) { 392 if (args) 393 __kmpc_fork_call(loc, argc, microtask, args); 394 else 395 __kmpc_fork_call(loc, argc, microtask); 396 } else { 397 __kmpc_serialized_parallel(loc, gtid); 398 399 #if OMPT_SUPPORT 400 void *exit_frame_ptr; 401 #endif 402 403 if (args) 404 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid, 405 /*npr=*/0, 406 /*argc=*/1, &args 407 #if OMPT_SUPPORT 408 , 409 &exit_frame_ptr 410 #endif 411 ); 412 else 413 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid, 414 /*npr=*/0, 415 /*argc=*/0, 416 /*args=*/nullptr 417 #if OMPT_SUPPORT 418 , 419 &exit_frame_ptr 420 #endif 421 ); 422 423 __kmpc_end_serialized_parallel(loc, gtid); 424 } 425 } 426 427 /*! 428 @ingroup PARALLEL 429 @param loc source location information 430 @param global_tid global thread number 431 @param num_teams number of teams requested for the teams construct 432 @param num_threads number of threads per team requested for the teams construct 433 434 Set the number of teams to be used by the teams construct. 435 This call is only required if the teams construct has a `num_teams` clause 436 or a `thread_limit` clause (or both). 437 */ 438 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 439 kmp_int32 num_teams, kmp_int32 num_threads) { 440 KA_TRACE(20, 441 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 442 global_tid, num_teams, num_threads)); 443 __kmp_assert_valid_gtid(global_tid); 444 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 445 } 446 447 /*! 448 @ingroup PARALLEL 449 @param loc source location information 450 @param global_tid global thread number 451 @param thread_limit limit on number of threads which can be created within the 452 current task 453 454 Set the thread_limit for the current task 455 This call is there to support `thread_limit` clause on the `target` construct 456 */ 457 void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, 458 kmp_int32 thread_limit) { 459 __kmp_assert_valid_gtid(global_tid); 460 kmp_info_t *thread = __kmp_threads[global_tid]; 461 if (thread_limit > 0) 462 thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit; 463 } 464 465 /*! 466 @ingroup PARALLEL 467 @param loc source location information 468 @param global_tid global thread number 469 @param num_teams_lb lower bound on number of teams requested for the teams 470 construct 471 @param num_teams_ub upper bound on number of teams requested for the teams 472 construct 473 @param num_threads number of threads per team requested for the teams construct 474 475 Set the number of teams to be used by the teams construct. The number of initial 476 teams cretaed will be greater than or equal to the lower bound and less than or 477 equal to the upper bound. 478 This call is only required if the teams construct has a `num_teams` clause 479 or a `thread_limit` clause (or both). 480 */ 481 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, 482 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, 483 kmp_int32 num_threads) { 484 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d" 485 " num_teams_ub=%d num_threads=%d\n", 486 global_tid, num_teams_lb, num_teams_ub, num_threads)); 487 __kmp_assert_valid_gtid(global_tid); 488 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub, 489 num_threads); 490 } 491 492 /*! 493 @ingroup PARALLEL 494 @param loc source location information 495 @param argc total number of arguments in the ellipsis 496 @param microtask pointer to callback routine consisting of outlined teams 497 construct 498 @param ... pointers to shared variables that aren't global 499 500 Do the actual fork and call the microtask in the relevant number of threads. 501 */ 502 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 503 ...) { 504 int gtid = __kmp_entry_gtid(); 505 kmp_info_t *this_thr = __kmp_threads[gtid]; 506 va_list ap; 507 va_start(ap, microtask); 508 509 #if KMP_STATS_ENABLED 510 KMP_COUNT_BLOCK(OMP_TEAMS); 511 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 512 if (previous_state == stats_state_e::SERIAL_REGION) { 513 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 514 } else { 515 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 516 } 517 #endif 518 519 // remember teams entry point and nesting level 520 this_thr->th.th_teams_microtask = microtask; 521 this_thr->th.th_teams_level = 522 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 523 524 #if OMPT_SUPPORT 525 kmp_team_t *parent_team = this_thr->th.th_team; 526 int tid = __kmp_tid_from_gtid(gtid); 527 if (ompt_enabled.enabled) { 528 parent_team->t.t_implicit_task_taskdata[tid] 529 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 530 } 531 OMPT_STORE_RETURN_ADDRESS(gtid); 532 #endif 533 534 // check if __kmpc_push_num_teams called, set default number of teams 535 // otherwise 536 if (this_thr->th.th_teams_size.nteams == 0) { 537 __kmp_push_num_teams(loc, gtid, 0, 0); 538 } 539 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 540 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 541 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 542 543 __kmp_fork_call( 544 loc, gtid, fork_context_intel, argc, 545 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 546 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 547 __kmp_join_call(loc, gtid 548 #if OMPT_SUPPORT 549 , 550 fork_context_intel 551 #endif 552 ); 553 554 // Pop current CG root off list 555 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 556 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 557 this_thr->th.th_cg_roots = tmp->up; 558 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 559 " to node %p. cg_nthreads was %d\n", 560 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 561 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 562 int i = tmp->cg_nthreads--; 563 if (i == 1) { // check is we are the last thread in CG (not always the case) 564 __kmp_free(tmp); 565 } 566 // Restore current task's thread_limit from CG root 567 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 568 this_thr->th.th_current_task->td_icvs.thread_limit = 569 this_thr->th.th_cg_roots->cg_thread_limit; 570 571 this_thr->th.th_teams_microtask = NULL; 572 this_thr->th.th_teams_level = 0; 573 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 574 va_end(ap); 575 #if KMP_STATS_ENABLED 576 if (previous_state == stats_state_e::SERIAL_REGION) { 577 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 578 KMP_SET_THREAD_STATE(previous_state); 579 } else { 580 KMP_POP_PARTITIONED_TIMER(); 581 } 582 #endif // KMP_STATS_ENABLED 583 } 584 585 // I don't think this function should ever have been exported. 586 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 587 // openmp code ever called it, but it's been exported from the RTL for so 588 // long that I'm afraid to remove the definition. 589 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 590 591 /*! 592 @ingroup PARALLEL 593 @param loc source location information 594 @param global_tid global thread number 595 596 Enter a serialized parallel construct. This interface is used to handle a 597 conditional parallel region, like this, 598 @code 599 #pragma omp parallel if (condition) 600 @endcode 601 when the condition is false. 602 */ 603 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 604 // The implementation is now in kmp_runtime.cpp so that it can share static 605 // functions with kmp_fork_call since the tasks to be done are similar in 606 // each case. 607 __kmp_assert_valid_gtid(global_tid); 608 #if OMPT_SUPPORT 609 OMPT_STORE_RETURN_ADDRESS(global_tid); 610 #endif 611 __kmp_serialized_parallel(loc, global_tid); 612 } 613 614 /*! 615 @ingroup PARALLEL 616 @param loc source location information 617 @param global_tid global thread number 618 619 Leave a serialized parallel construct. 620 */ 621 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 622 kmp_internal_control_t *top; 623 kmp_info_t *this_thr; 624 kmp_team_t *serial_team; 625 626 KC_TRACE(10, 627 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 628 629 /* skip all this code for autopar serialized loops since it results in 630 unacceptable overhead */ 631 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 632 return; 633 634 // Not autopar code 635 __kmp_assert_valid_gtid(global_tid); 636 if (!TCR_4(__kmp_init_parallel)) 637 __kmp_parallel_initialize(); 638 639 __kmp_resume_if_soft_paused(); 640 641 this_thr = __kmp_threads[global_tid]; 642 serial_team = this_thr->th.th_serial_team; 643 644 kmp_task_team_t *task_team = this_thr->th.th_task_team; 645 // we need to wait for the proxy tasks before finishing the thread 646 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks || 647 task_team->tt.tt_hidden_helper_task_encountered)) 648 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 649 650 KMP_MB(); 651 KMP_DEBUG_ASSERT(serial_team); 652 KMP_ASSERT(serial_team->t.t_serialized); 653 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 654 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 655 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 656 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 657 658 #if OMPT_SUPPORT 659 if (ompt_enabled.enabled && 660 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 661 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 662 if (ompt_enabled.ompt_callback_implicit_task) { 663 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 664 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 665 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 666 } 667 668 // reset clear the task id only after unlinking the task 669 ompt_data_t *parent_task_data; 670 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 671 672 if (ompt_enabled.ompt_callback_parallel_end) { 673 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 674 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 675 ompt_parallel_invoker_program | ompt_parallel_team, 676 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 677 } 678 __ompt_lw_taskteam_unlink(this_thr); 679 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 680 } 681 #endif 682 683 /* If necessary, pop the internal control stack values and replace the team 684 * values */ 685 top = serial_team->t.t_control_stack_top; 686 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 687 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 688 serial_team->t.t_control_stack_top = top->next; 689 __kmp_free(top); 690 } 691 692 /* pop dispatch buffers stack */ 693 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 694 { 695 dispatch_private_info_t *disp_buffer = 696 serial_team->t.t_dispatch->th_disp_buffer; 697 serial_team->t.t_dispatch->th_disp_buffer = 698 serial_team->t.t_dispatch->th_disp_buffer->next; 699 __kmp_free(disp_buffer); 700 } 701 702 /* pop the task team stack */ 703 if (serial_team->t.t_serialized > 1) { 704 __kmp_pop_task_team_node(this_thr, serial_team); 705 } 706 707 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 708 709 --serial_team->t.t_serialized; 710 if (serial_team->t.t_serialized == 0) { 711 712 /* return to the parallel section */ 713 714 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 715 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 716 __kmp_clear_x87_fpu_status_word(); 717 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 718 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 719 } 720 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 721 722 __kmp_pop_current_task_from_thread(this_thr); 723 #if OMPD_SUPPORT 724 if (ompd_state & OMPD_ENABLE_BP) 725 ompd_bp_parallel_end(); 726 #endif 727 728 this_thr->th.th_team = serial_team->t.t_parent; 729 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 730 731 /* restore values cached in the thread */ 732 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 733 this_thr->th.th_team_master = 734 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 735 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 736 737 /* TODO the below shouldn't need to be adjusted for serialized teams */ 738 this_thr->th.th_dispatch = 739 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 740 741 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 742 this_thr->th.th_current_task->td_flags.executing = 1; 743 744 if (__kmp_tasking_mode != tskm_immediate_exec) { 745 // Restore task state from serial team structure 746 KMP_DEBUG_ASSERT(serial_team->t.t_primary_task_state == 0 || 747 serial_team->t.t_primary_task_state == 1); 748 this_thr->th.th_task_state = 749 (kmp_uint8)serial_team->t.t_primary_task_state; 750 // Copy the task team from the new child / old parent team to the thread. 751 this_thr->th.th_task_team = 752 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 753 KA_TRACE(20, 754 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 755 "team %p\n", 756 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 757 } 758 #if KMP_AFFINITY_SUPPORTED 759 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) { 760 __kmp_reset_root_init_mask(global_tid); 761 } 762 #endif 763 } else { 764 if (__kmp_tasking_mode != tskm_immediate_exec) { 765 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 766 "depth of serial team %p to %d\n", 767 global_tid, serial_team, serial_team->t.t_serialized)); 768 } 769 } 770 771 serial_team->t.t_level--; 772 if (__kmp_env_consistency_check) 773 __kmp_pop_parallel(global_tid, NULL); 774 #if OMPT_SUPPORT 775 if (ompt_enabled.enabled) 776 this_thr->th.ompt_thread_info.state = 777 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 778 : ompt_state_work_parallel); 779 #endif 780 } 781 782 /*! 783 @ingroup SYNCHRONIZATION 784 @param loc source location information. 785 786 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 787 depending on the memory ordering convention obeyed by the compiler 788 even that may not be necessary). 789 */ 790 void __kmpc_flush(ident_t *loc) { 791 KC_TRACE(10, ("__kmpc_flush: called\n")); 792 793 /* need explicit __mf() here since use volatile instead in library */ 794 KMP_MFENCE(); /* Flush all pending memory write invalidates. */ 795 796 #if OMPT_SUPPORT && OMPT_OPTIONAL 797 if (ompt_enabled.ompt_callback_flush) { 798 ompt_callbacks.ompt_callback(ompt_callback_flush)( 799 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 800 } 801 #endif 802 } 803 804 /* -------------------------------------------------------------------------- */ 805 /*! 806 @ingroup SYNCHRONIZATION 807 @param loc source location information 808 @param global_tid thread id. 809 810 Execute a barrier. 811 */ 812 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 813 KMP_COUNT_BLOCK(OMP_BARRIER); 814 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 815 __kmp_assert_valid_gtid(global_tid); 816 817 if (!TCR_4(__kmp_init_parallel)) 818 __kmp_parallel_initialize(); 819 820 __kmp_resume_if_soft_paused(); 821 822 if (__kmp_env_consistency_check) { 823 if (loc == 0) { 824 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 825 } 826 __kmp_check_barrier(global_tid, ct_barrier, loc); 827 } 828 829 #if OMPT_SUPPORT 830 ompt_frame_t *ompt_frame; 831 if (ompt_enabled.enabled) { 832 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 833 if (ompt_frame->enter_frame.ptr == NULL) 834 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 835 } 836 OMPT_STORE_RETURN_ADDRESS(global_tid); 837 #endif 838 __kmp_threads[global_tid]->th.th_ident = loc; 839 // TODO: explicit barrier_wait_id: 840 // this function is called when 'barrier' directive is present or 841 // implicit barrier at the end of a worksharing construct. 842 // 1) better to add a per-thread barrier counter to a thread data structure 843 // 2) set to 0 when a new team is created 844 // 4) no sync is required 845 846 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 847 #if OMPT_SUPPORT && OMPT_OPTIONAL 848 if (ompt_enabled.enabled) { 849 ompt_frame->enter_frame = ompt_data_none; 850 } 851 #endif 852 } 853 854 /* The BARRIER for a MASTER section is always explicit */ 855 /*! 856 @ingroup WORK_SHARING 857 @param loc source location information. 858 @param global_tid global thread number . 859 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 860 */ 861 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 862 int status = 0; 863 864 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 865 __kmp_assert_valid_gtid(global_tid); 866 867 if (!TCR_4(__kmp_init_parallel)) 868 __kmp_parallel_initialize(); 869 870 __kmp_resume_if_soft_paused(); 871 872 if (KMP_MASTER_GTID(global_tid)) { 873 KMP_COUNT_BLOCK(OMP_MASTER); 874 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 875 status = 1; 876 } 877 878 #if OMPT_SUPPORT && OMPT_OPTIONAL 879 if (status) { 880 if (ompt_enabled.ompt_callback_masked) { 881 kmp_info_t *this_thr = __kmp_threads[global_tid]; 882 kmp_team_t *team = this_thr->th.th_team; 883 884 int tid = __kmp_tid_from_gtid(global_tid); 885 ompt_callbacks.ompt_callback(ompt_callback_masked)( 886 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 887 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 888 OMPT_GET_RETURN_ADDRESS(0)); 889 } 890 } 891 #endif 892 893 if (__kmp_env_consistency_check) { 894 #if KMP_USE_DYNAMIC_LOCK 895 if (status) 896 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 897 else 898 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 899 #else 900 if (status) 901 __kmp_push_sync(global_tid, ct_master, loc, NULL); 902 else 903 __kmp_check_sync(global_tid, ct_master, loc, NULL); 904 #endif 905 } 906 907 return status; 908 } 909 910 /*! 911 @ingroup WORK_SHARING 912 @param loc source location information. 913 @param global_tid global thread number . 914 915 Mark the end of a <tt>master</tt> region. This should only be called by the 916 thread that executes the <tt>master</tt> region. 917 */ 918 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 919 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 920 __kmp_assert_valid_gtid(global_tid); 921 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 922 KMP_POP_PARTITIONED_TIMER(); 923 924 #if OMPT_SUPPORT && OMPT_OPTIONAL 925 kmp_info_t *this_thr = __kmp_threads[global_tid]; 926 kmp_team_t *team = this_thr->th.th_team; 927 if (ompt_enabled.ompt_callback_masked) { 928 int tid = __kmp_tid_from_gtid(global_tid); 929 ompt_callbacks.ompt_callback(ompt_callback_masked)( 930 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 931 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 932 OMPT_GET_RETURN_ADDRESS(0)); 933 } 934 #endif 935 936 if (__kmp_env_consistency_check) { 937 if (KMP_MASTER_GTID(global_tid)) 938 __kmp_pop_sync(global_tid, ct_master, loc); 939 } 940 } 941 942 /*! 943 @ingroup WORK_SHARING 944 @param loc source location information. 945 @param global_tid global thread number. 946 @param filter result of evaluating filter clause on thread global_tid, or zero 947 if no filter clause present 948 @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise. 949 */ 950 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) { 951 int status = 0; 952 int tid; 953 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid)); 954 __kmp_assert_valid_gtid(global_tid); 955 956 if (!TCR_4(__kmp_init_parallel)) 957 __kmp_parallel_initialize(); 958 959 __kmp_resume_if_soft_paused(); 960 961 tid = __kmp_tid_from_gtid(global_tid); 962 if (tid == filter) { 963 KMP_COUNT_BLOCK(OMP_MASKED); 964 KMP_PUSH_PARTITIONED_TIMER(OMP_masked); 965 status = 1; 966 } 967 968 #if OMPT_SUPPORT && OMPT_OPTIONAL 969 if (status) { 970 if (ompt_enabled.ompt_callback_masked) { 971 kmp_info_t *this_thr = __kmp_threads[global_tid]; 972 kmp_team_t *team = this_thr->th.th_team; 973 ompt_callbacks.ompt_callback(ompt_callback_masked)( 974 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 975 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 976 OMPT_GET_RETURN_ADDRESS(0)); 977 } 978 } 979 #endif 980 981 if (__kmp_env_consistency_check) { 982 #if KMP_USE_DYNAMIC_LOCK 983 if (status) 984 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0); 985 else 986 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0); 987 #else 988 if (status) 989 __kmp_push_sync(global_tid, ct_masked, loc, NULL); 990 else 991 __kmp_check_sync(global_tid, ct_masked, loc, NULL); 992 #endif 993 } 994 995 return status; 996 } 997 998 /*! 999 @ingroup WORK_SHARING 1000 @param loc source location information. 1001 @param global_tid global thread number . 1002 1003 Mark the end of a <tt>masked</tt> region. This should only be called by the 1004 thread that executes the <tt>masked</tt> region. 1005 */ 1006 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) { 1007 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid)); 1008 __kmp_assert_valid_gtid(global_tid); 1009 KMP_POP_PARTITIONED_TIMER(); 1010 1011 #if OMPT_SUPPORT && OMPT_OPTIONAL 1012 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1013 kmp_team_t *team = this_thr->th.th_team; 1014 if (ompt_enabled.ompt_callback_masked) { 1015 int tid = __kmp_tid_from_gtid(global_tid); 1016 ompt_callbacks.ompt_callback(ompt_callback_masked)( 1017 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 1018 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1019 OMPT_GET_RETURN_ADDRESS(0)); 1020 } 1021 #endif 1022 1023 if (__kmp_env_consistency_check) { 1024 __kmp_pop_sync(global_tid, ct_masked, loc); 1025 } 1026 } 1027 1028 /*! 1029 @ingroup WORK_SHARING 1030 @param loc source location information. 1031 @param gtid global thread number. 1032 1033 Start execution of an <tt>ordered</tt> construct. 1034 */ 1035 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 1036 int cid = 0; 1037 kmp_info_t *th; 1038 KMP_DEBUG_ASSERT(__kmp_init_serial); 1039 1040 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 1041 __kmp_assert_valid_gtid(gtid); 1042 1043 if (!TCR_4(__kmp_init_parallel)) 1044 __kmp_parallel_initialize(); 1045 1046 __kmp_resume_if_soft_paused(); 1047 1048 #if USE_ITT_BUILD 1049 __kmp_itt_ordered_prep(gtid); 1050 // TODO: ordered_wait_id 1051 #endif /* USE_ITT_BUILD */ 1052 1053 th = __kmp_threads[gtid]; 1054 1055 #if OMPT_SUPPORT && OMPT_OPTIONAL 1056 kmp_team_t *team; 1057 ompt_wait_id_t lck; 1058 void *codeptr_ra; 1059 OMPT_STORE_RETURN_ADDRESS(gtid); 1060 if (ompt_enabled.enabled) { 1061 team = __kmp_team_from_gtid(gtid); 1062 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 1063 /* OMPT state update */ 1064 th->th.ompt_thread_info.wait_id = lck; 1065 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 1066 1067 /* OMPT event callback */ 1068 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1069 if (ompt_enabled.ompt_callback_mutex_acquire) { 1070 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1071 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 1072 codeptr_ra); 1073 } 1074 } 1075 #endif 1076 1077 if (th->th.th_dispatch->th_deo_fcn != 0) 1078 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 1079 else 1080 __kmp_parallel_deo(>id, &cid, loc); 1081 1082 #if OMPT_SUPPORT && OMPT_OPTIONAL 1083 if (ompt_enabled.enabled) { 1084 /* OMPT state update */ 1085 th->th.ompt_thread_info.state = ompt_state_work_parallel; 1086 th->th.ompt_thread_info.wait_id = 0; 1087 1088 /* OMPT event callback */ 1089 if (ompt_enabled.ompt_callback_mutex_acquired) { 1090 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1091 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1092 } 1093 } 1094 #endif 1095 1096 #if USE_ITT_BUILD 1097 __kmp_itt_ordered_start(gtid); 1098 #endif /* USE_ITT_BUILD */ 1099 } 1100 1101 /*! 1102 @ingroup WORK_SHARING 1103 @param loc source location information. 1104 @param gtid global thread number. 1105 1106 End execution of an <tt>ordered</tt> construct. 1107 */ 1108 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 1109 int cid = 0; 1110 kmp_info_t *th; 1111 1112 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 1113 __kmp_assert_valid_gtid(gtid); 1114 1115 #if USE_ITT_BUILD 1116 __kmp_itt_ordered_end(gtid); 1117 // TODO: ordered_wait_id 1118 #endif /* USE_ITT_BUILD */ 1119 1120 th = __kmp_threads[gtid]; 1121 1122 if (th->th.th_dispatch->th_dxo_fcn != 0) 1123 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 1124 else 1125 __kmp_parallel_dxo(>id, &cid, loc); 1126 1127 #if OMPT_SUPPORT && OMPT_OPTIONAL 1128 OMPT_STORE_RETURN_ADDRESS(gtid); 1129 if (ompt_enabled.ompt_callback_mutex_released) { 1130 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1131 ompt_mutex_ordered, 1132 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 1133 ->t.t_ordered.dt.t_value, 1134 OMPT_LOAD_RETURN_ADDRESS(gtid)); 1135 } 1136 #endif 1137 } 1138 1139 #if KMP_USE_DYNAMIC_LOCK 1140 1141 static __forceinline void 1142 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 1143 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 1144 // Pointer to the allocated indirect lock is written to crit, while indexing 1145 // is ignored. 1146 void *idx; 1147 kmp_indirect_lock_t **lck; 1148 lck = (kmp_indirect_lock_t **)crit; 1149 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 1150 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 1151 KMP_SET_I_LOCK_LOCATION(ilk, loc); 1152 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 1153 KA_TRACE(20, 1154 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 1155 #if USE_ITT_BUILD 1156 __kmp_itt_critical_creating(ilk->lock, loc); 1157 #endif 1158 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 1159 if (status == 0) { 1160 #if USE_ITT_BUILD 1161 __kmp_itt_critical_destroyed(ilk->lock); 1162 #endif 1163 // We don't really need to destroy the unclaimed lock here since it will be 1164 // cleaned up at program exit. 1165 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 1166 } 1167 KMP_DEBUG_ASSERT(*lck != NULL); 1168 } 1169 1170 // Fast-path acquire tas lock 1171 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 1172 { \ 1173 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1174 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1175 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1176 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1177 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1178 kmp_uint32 spins; \ 1179 KMP_FSYNC_PREPARE(l); \ 1180 KMP_INIT_YIELD(spins); \ 1181 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1182 do { \ 1183 if (TCR_4(__kmp_nth) > \ 1184 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1185 KMP_YIELD(TRUE); \ 1186 } else { \ 1187 KMP_YIELD_SPIN(spins); \ 1188 } \ 1189 __kmp_spin_backoff(&backoff); \ 1190 } while ( \ 1191 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1192 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1193 } \ 1194 KMP_FSYNC_ACQUIRED(l); \ 1195 } 1196 1197 // Fast-path test tas lock 1198 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1199 { \ 1200 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1201 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1202 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1203 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1204 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1205 } 1206 1207 // Fast-path release tas lock 1208 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1209 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1210 1211 #if KMP_USE_FUTEX 1212 1213 #include <sys/syscall.h> 1214 #include <unistd.h> 1215 #ifndef FUTEX_WAIT 1216 #define FUTEX_WAIT 0 1217 #endif 1218 #ifndef FUTEX_WAKE 1219 #define FUTEX_WAKE 1 1220 #endif 1221 1222 // Fast-path acquire futex lock 1223 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1224 { \ 1225 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1226 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1227 KMP_MB(); \ 1228 KMP_FSYNC_PREPARE(ftx); \ 1229 kmp_int32 poll_val; \ 1230 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1231 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1232 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1233 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1234 if (!cond) { \ 1235 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1236 poll_val | \ 1237 KMP_LOCK_BUSY(1, futex))) { \ 1238 continue; \ 1239 } \ 1240 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1241 } \ 1242 kmp_int32 rc; \ 1243 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1244 NULL, NULL, 0)) != 0) { \ 1245 continue; \ 1246 } \ 1247 gtid_code |= 1; \ 1248 } \ 1249 KMP_FSYNC_ACQUIRED(ftx); \ 1250 } 1251 1252 // Fast-path test futex lock 1253 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1254 { \ 1255 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1256 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1257 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1258 KMP_FSYNC_ACQUIRED(ftx); \ 1259 rc = TRUE; \ 1260 } else { \ 1261 rc = FALSE; \ 1262 } \ 1263 } 1264 1265 // Fast-path release futex lock 1266 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1267 { \ 1268 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1269 KMP_MB(); \ 1270 KMP_FSYNC_RELEASING(ftx); \ 1271 kmp_int32 poll_val = \ 1272 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1273 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1274 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1275 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1276 } \ 1277 KMP_MB(); \ 1278 KMP_YIELD_OVERSUB(); \ 1279 } 1280 1281 #endif // KMP_USE_FUTEX 1282 1283 #else // KMP_USE_DYNAMIC_LOCK 1284 1285 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1286 ident_t const *loc, 1287 kmp_int32 gtid) { 1288 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1289 1290 // Because of the double-check, the following load doesn't need to be volatile 1291 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1292 1293 if (lck == NULL) { 1294 void *idx; 1295 1296 // Allocate & initialize the lock. 1297 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1298 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1299 __kmp_init_user_lock_with_checks(lck); 1300 __kmp_set_user_lock_location(lck, loc); 1301 #if USE_ITT_BUILD 1302 __kmp_itt_critical_creating(lck); 1303 // __kmp_itt_critical_creating() should be called *before* the first usage 1304 // of underlying lock. It is the only place where we can guarantee it. There 1305 // are chances the lock will destroyed with no usage, but it is not a 1306 // problem, because this is not real event seen by user but rather setting 1307 // name for object (lock). See more details in kmp_itt.h. 1308 #endif /* USE_ITT_BUILD */ 1309 1310 // Use a cmpxchg instruction to slam the start of the critical section with 1311 // the lock pointer. If another thread beat us to it, deallocate the lock, 1312 // and use the lock that the other thread allocated. 1313 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1314 1315 if (status == 0) { 1316 // Deallocate the lock and reload the value. 1317 #if USE_ITT_BUILD 1318 __kmp_itt_critical_destroyed(lck); 1319 // Let ITT know the lock is destroyed and the same memory location may be reused 1320 // for another purpose. 1321 #endif /* USE_ITT_BUILD */ 1322 __kmp_destroy_user_lock_with_checks(lck); 1323 __kmp_user_lock_free(&idx, gtid, lck); 1324 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1325 KMP_DEBUG_ASSERT(lck != NULL); 1326 } 1327 } 1328 return lck; 1329 } 1330 1331 #endif // KMP_USE_DYNAMIC_LOCK 1332 1333 /*! 1334 @ingroup WORK_SHARING 1335 @param loc source location information. 1336 @param global_tid global thread number. 1337 @param crit identity of the critical section. This could be a pointer to a lock 1338 associated with the critical section, or some other suitably unique value. 1339 1340 Enter code protected by a `critical` construct. 1341 This function blocks until the executing thread can enter the critical section. 1342 */ 1343 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1344 kmp_critical_name *crit) { 1345 #if KMP_USE_DYNAMIC_LOCK 1346 #if OMPT_SUPPORT && OMPT_OPTIONAL 1347 OMPT_STORE_RETURN_ADDRESS(global_tid); 1348 #endif // OMPT_SUPPORT 1349 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1350 #else 1351 KMP_COUNT_BLOCK(OMP_CRITICAL); 1352 #if OMPT_SUPPORT && OMPT_OPTIONAL 1353 ompt_state_t prev_state = ompt_state_undefined; 1354 ompt_thread_info_t ti; 1355 #endif 1356 kmp_user_lock_p lck; 1357 1358 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1359 __kmp_assert_valid_gtid(global_tid); 1360 1361 // TODO: add THR_OVHD_STATE 1362 1363 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1364 KMP_CHECK_USER_LOCK_INIT(); 1365 1366 if ((__kmp_user_lock_kind == lk_tas) && 1367 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1368 lck = (kmp_user_lock_p)crit; 1369 } 1370 #if KMP_USE_FUTEX 1371 else if ((__kmp_user_lock_kind == lk_futex) && 1372 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1373 lck = (kmp_user_lock_p)crit; 1374 } 1375 #endif 1376 else { // ticket, queuing or drdpa 1377 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1378 } 1379 1380 if (__kmp_env_consistency_check) 1381 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1382 1383 // since the critical directive binds to all threads, not just the current 1384 // team we have to check this even if we are in a serialized team. 1385 // also, even if we are the uber thread, we still have to conduct the lock, 1386 // as we have to contend with sibling threads. 1387 1388 #if USE_ITT_BUILD 1389 __kmp_itt_critical_acquiring(lck); 1390 #endif /* USE_ITT_BUILD */ 1391 #if OMPT_SUPPORT && OMPT_OPTIONAL 1392 OMPT_STORE_RETURN_ADDRESS(gtid); 1393 void *codeptr_ra = NULL; 1394 if (ompt_enabled.enabled) { 1395 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1396 /* OMPT state update */ 1397 prev_state = ti.state; 1398 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1399 ti.state = ompt_state_wait_critical; 1400 1401 /* OMPT event callback */ 1402 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1403 if (ompt_enabled.ompt_callback_mutex_acquire) { 1404 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1405 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1406 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1407 } 1408 } 1409 #endif 1410 // Value of 'crit' should be good for using as a critical_id of the critical 1411 // section directive. 1412 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1413 1414 #if USE_ITT_BUILD 1415 __kmp_itt_critical_acquired(lck); 1416 #endif /* USE_ITT_BUILD */ 1417 #if OMPT_SUPPORT && OMPT_OPTIONAL 1418 if (ompt_enabled.enabled) { 1419 /* OMPT state update */ 1420 ti.state = prev_state; 1421 ti.wait_id = 0; 1422 1423 /* OMPT event callback */ 1424 if (ompt_enabled.ompt_callback_mutex_acquired) { 1425 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1426 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1427 } 1428 } 1429 #endif 1430 KMP_POP_PARTITIONED_TIMER(); 1431 1432 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1433 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1434 #endif // KMP_USE_DYNAMIC_LOCK 1435 } 1436 1437 #if KMP_USE_DYNAMIC_LOCK 1438 1439 // Converts the given hint to an internal lock implementation 1440 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1441 #if KMP_USE_TSX 1442 #define KMP_TSX_LOCK(seq) lockseq_##seq 1443 #else 1444 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1445 #endif 1446 1447 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1448 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm) 1449 #else 1450 #define KMP_CPUINFO_RTM 0 1451 #endif 1452 1453 // Hints that do not require further logic 1454 if (hint & kmp_lock_hint_hle) 1455 return KMP_TSX_LOCK(hle); 1456 if (hint & kmp_lock_hint_rtm) 1457 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq; 1458 if (hint & kmp_lock_hint_adaptive) 1459 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1460 1461 // Rule out conflicting hints first by returning the default lock 1462 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1463 return __kmp_user_lock_seq; 1464 if ((hint & omp_lock_hint_speculative) && 1465 (hint & omp_lock_hint_nonspeculative)) 1466 return __kmp_user_lock_seq; 1467 1468 // Do not even consider speculation when it appears to be contended 1469 if (hint & omp_lock_hint_contended) 1470 return lockseq_queuing; 1471 1472 // Uncontended lock without speculation 1473 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1474 return lockseq_tas; 1475 1476 // Use RTM lock for speculation 1477 if (hint & omp_lock_hint_speculative) 1478 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq; 1479 1480 return __kmp_user_lock_seq; 1481 } 1482 1483 #if OMPT_SUPPORT && OMPT_OPTIONAL 1484 #if KMP_USE_DYNAMIC_LOCK 1485 static kmp_mutex_impl_t 1486 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1487 if (user_lock) { 1488 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1489 case 0: 1490 break; 1491 #if KMP_USE_FUTEX 1492 case locktag_futex: 1493 return kmp_mutex_impl_queuing; 1494 #endif 1495 case locktag_tas: 1496 return kmp_mutex_impl_spin; 1497 #if KMP_USE_TSX 1498 case locktag_hle: 1499 case locktag_rtm_spin: 1500 return kmp_mutex_impl_speculative; 1501 #endif 1502 default: 1503 return kmp_mutex_impl_none; 1504 } 1505 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1506 } 1507 KMP_ASSERT(ilock); 1508 switch (ilock->type) { 1509 #if KMP_USE_TSX 1510 case locktag_adaptive: 1511 case locktag_rtm_queuing: 1512 return kmp_mutex_impl_speculative; 1513 #endif 1514 case locktag_nested_tas: 1515 return kmp_mutex_impl_spin; 1516 #if KMP_USE_FUTEX 1517 case locktag_nested_futex: 1518 #endif 1519 case locktag_ticket: 1520 case locktag_queuing: 1521 case locktag_drdpa: 1522 case locktag_nested_ticket: 1523 case locktag_nested_queuing: 1524 case locktag_nested_drdpa: 1525 return kmp_mutex_impl_queuing; 1526 default: 1527 return kmp_mutex_impl_none; 1528 } 1529 } 1530 #else 1531 // For locks without dynamic binding 1532 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1533 switch (__kmp_user_lock_kind) { 1534 case lk_tas: 1535 return kmp_mutex_impl_spin; 1536 #if KMP_USE_FUTEX 1537 case lk_futex: 1538 #endif 1539 case lk_ticket: 1540 case lk_queuing: 1541 case lk_drdpa: 1542 return kmp_mutex_impl_queuing; 1543 #if KMP_USE_TSX 1544 case lk_hle: 1545 case lk_rtm_queuing: 1546 case lk_rtm_spin: 1547 case lk_adaptive: 1548 return kmp_mutex_impl_speculative; 1549 #endif 1550 default: 1551 return kmp_mutex_impl_none; 1552 } 1553 } 1554 #endif // KMP_USE_DYNAMIC_LOCK 1555 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1556 1557 /*! 1558 @ingroup WORK_SHARING 1559 @param loc source location information. 1560 @param global_tid global thread number. 1561 @param crit identity of the critical section. This could be a pointer to a lock 1562 associated with the critical section, or some other suitably unique value. 1563 @param hint the lock hint. 1564 1565 Enter code protected by a `critical` construct with a hint. The hint value is 1566 used to suggest a lock implementation. This function blocks until the executing 1567 thread can enter the critical section unless the hint suggests use of 1568 speculative execution and the hardware supports it. 1569 */ 1570 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1571 kmp_critical_name *crit, uint32_t hint) { 1572 KMP_COUNT_BLOCK(OMP_CRITICAL); 1573 kmp_user_lock_p lck; 1574 #if OMPT_SUPPORT && OMPT_OPTIONAL 1575 ompt_state_t prev_state = ompt_state_undefined; 1576 ompt_thread_info_t ti; 1577 // This is the case, if called from __kmpc_critical: 1578 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1579 if (!codeptr) 1580 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1581 #endif 1582 1583 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1584 __kmp_assert_valid_gtid(global_tid); 1585 1586 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1587 // Check if it is initialized. 1588 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1589 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint); 1590 if (*lk == 0) { 1591 if (KMP_IS_D_LOCK(lockseq)) { 1592 KMP_COMPARE_AND_STORE_ACQ32( 1593 (volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0, 1594 KMP_GET_D_TAG(lockseq)); 1595 } else { 1596 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq)); 1597 } 1598 } 1599 // Branch for accessing the actual lock object and set operation. This 1600 // branching is inevitable since this lock initialization does not follow the 1601 // normal dispatch path (lock table is not used). 1602 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1603 lck = (kmp_user_lock_p)lk; 1604 if (__kmp_env_consistency_check) { 1605 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1606 __kmp_map_hint_to_lock(hint)); 1607 } 1608 #if USE_ITT_BUILD 1609 __kmp_itt_critical_acquiring(lck); 1610 #endif 1611 #if OMPT_SUPPORT && OMPT_OPTIONAL 1612 if (ompt_enabled.enabled) { 1613 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1614 /* OMPT state update */ 1615 prev_state = ti.state; 1616 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1617 ti.state = ompt_state_wait_critical; 1618 1619 /* OMPT event callback */ 1620 if (ompt_enabled.ompt_callback_mutex_acquire) { 1621 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1622 ompt_mutex_critical, (unsigned int)hint, 1623 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1624 codeptr); 1625 } 1626 } 1627 #endif 1628 #if KMP_USE_INLINED_TAS 1629 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) { 1630 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1631 } else 1632 #elif KMP_USE_INLINED_FUTEX 1633 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) { 1634 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1635 } else 1636 #endif 1637 { 1638 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1639 } 1640 } else { 1641 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1642 lck = ilk->lock; 1643 if (__kmp_env_consistency_check) { 1644 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1645 __kmp_map_hint_to_lock(hint)); 1646 } 1647 #if USE_ITT_BUILD 1648 __kmp_itt_critical_acquiring(lck); 1649 #endif 1650 #if OMPT_SUPPORT && OMPT_OPTIONAL 1651 if (ompt_enabled.enabled) { 1652 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1653 /* OMPT state update */ 1654 prev_state = ti.state; 1655 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1656 ti.state = ompt_state_wait_critical; 1657 1658 /* OMPT event callback */ 1659 if (ompt_enabled.ompt_callback_mutex_acquire) { 1660 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1661 ompt_mutex_critical, (unsigned int)hint, 1662 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1663 codeptr); 1664 } 1665 } 1666 #endif 1667 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1668 } 1669 KMP_POP_PARTITIONED_TIMER(); 1670 1671 #if USE_ITT_BUILD 1672 __kmp_itt_critical_acquired(lck); 1673 #endif /* USE_ITT_BUILD */ 1674 #if OMPT_SUPPORT && OMPT_OPTIONAL 1675 if (ompt_enabled.enabled) { 1676 /* OMPT state update */ 1677 ti.state = prev_state; 1678 ti.wait_id = 0; 1679 1680 /* OMPT event callback */ 1681 if (ompt_enabled.ompt_callback_mutex_acquired) { 1682 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1683 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1684 } 1685 } 1686 #endif 1687 1688 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1689 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1690 } // __kmpc_critical_with_hint 1691 1692 #endif // KMP_USE_DYNAMIC_LOCK 1693 1694 /*! 1695 @ingroup WORK_SHARING 1696 @param loc source location information. 1697 @param global_tid global thread number . 1698 @param crit identity of the critical section. This could be a pointer to a lock 1699 associated with the critical section, or some other suitably unique value. 1700 1701 Leave a critical section, releasing any lock that was held during its execution. 1702 */ 1703 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1704 kmp_critical_name *crit) { 1705 kmp_user_lock_p lck; 1706 1707 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1708 1709 #if KMP_USE_DYNAMIC_LOCK 1710 int locktag = KMP_EXTRACT_D_TAG(crit); 1711 if (locktag) { 1712 lck = (kmp_user_lock_p)crit; 1713 KMP_ASSERT(lck != NULL); 1714 if (__kmp_env_consistency_check) { 1715 __kmp_pop_sync(global_tid, ct_critical, loc); 1716 } 1717 #if USE_ITT_BUILD 1718 __kmp_itt_critical_releasing(lck); 1719 #endif 1720 #if KMP_USE_INLINED_TAS 1721 if (locktag == locktag_tas && !__kmp_env_consistency_check) { 1722 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1723 } else 1724 #elif KMP_USE_INLINED_FUTEX 1725 if (locktag == locktag_futex && !__kmp_env_consistency_check) { 1726 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1727 } else 1728 #endif 1729 { 1730 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1731 } 1732 } else { 1733 kmp_indirect_lock_t *ilk = 1734 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1735 KMP_ASSERT(ilk != NULL); 1736 lck = ilk->lock; 1737 if (__kmp_env_consistency_check) { 1738 __kmp_pop_sync(global_tid, ct_critical, loc); 1739 } 1740 #if USE_ITT_BUILD 1741 __kmp_itt_critical_releasing(lck); 1742 #endif 1743 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1744 } 1745 1746 #else // KMP_USE_DYNAMIC_LOCK 1747 1748 if ((__kmp_user_lock_kind == lk_tas) && 1749 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1750 lck = (kmp_user_lock_p)crit; 1751 } 1752 #if KMP_USE_FUTEX 1753 else if ((__kmp_user_lock_kind == lk_futex) && 1754 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1755 lck = (kmp_user_lock_p)crit; 1756 } 1757 #endif 1758 else { // ticket, queuing or drdpa 1759 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1760 } 1761 1762 KMP_ASSERT(lck != NULL); 1763 1764 if (__kmp_env_consistency_check) 1765 __kmp_pop_sync(global_tid, ct_critical, loc); 1766 1767 #if USE_ITT_BUILD 1768 __kmp_itt_critical_releasing(lck); 1769 #endif /* USE_ITT_BUILD */ 1770 // Value of 'crit' should be good for using as a critical_id of the critical 1771 // section directive. 1772 __kmp_release_user_lock_with_checks(lck, global_tid); 1773 1774 #endif // KMP_USE_DYNAMIC_LOCK 1775 1776 #if OMPT_SUPPORT && OMPT_OPTIONAL 1777 /* OMPT release event triggers after lock is released; place here to trigger 1778 * for all #if branches */ 1779 OMPT_STORE_RETURN_ADDRESS(global_tid); 1780 if (ompt_enabled.ompt_callback_mutex_released) { 1781 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1782 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1783 OMPT_LOAD_RETURN_ADDRESS(0)); 1784 } 1785 #endif 1786 1787 KMP_POP_PARTITIONED_TIMER(); 1788 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1789 } 1790 1791 /*! 1792 @ingroup SYNCHRONIZATION 1793 @param loc source location information 1794 @param global_tid thread id. 1795 @return one if the thread should execute the master block, zero otherwise 1796 1797 Start execution of a combined barrier and master. The barrier is executed inside 1798 this function. 1799 */ 1800 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1801 int status; 1802 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1803 __kmp_assert_valid_gtid(global_tid); 1804 1805 if (!TCR_4(__kmp_init_parallel)) 1806 __kmp_parallel_initialize(); 1807 1808 __kmp_resume_if_soft_paused(); 1809 1810 if (__kmp_env_consistency_check) 1811 __kmp_check_barrier(global_tid, ct_barrier, loc); 1812 1813 #if OMPT_SUPPORT 1814 ompt_frame_t *ompt_frame; 1815 if (ompt_enabled.enabled) { 1816 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1817 if (ompt_frame->enter_frame.ptr == NULL) 1818 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1819 } 1820 OMPT_STORE_RETURN_ADDRESS(global_tid); 1821 #endif 1822 #if USE_ITT_NOTIFY 1823 __kmp_threads[global_tid]->th.th_ident = loc; 1824 #endif 1825 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1826 #if OMPT_SUPPORT && OMPT_OPTIONAL 1827 if (ompt_enabled.enabled) { 1828 ompt_frame->enter_frame = ompt_data_none; 1829 } 1830 #endif 1831 1832 return (status != 0) ? 0 : 1; 1833 } 1834 1835 /*! 1836 @ingroup SYNCHRONIZATION 1837 @param loc source location information 1838 @param global_tid thread id. 1839 1840 Complete the execution of a combined barrier and master. This function should 1841 only be called at the completion of the <tt>master</tt> code. Other threads will 1842 still be waiting at the barrier and this call releases them. 1843 */ 1844 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1845 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1846 __kmp_assert_valid_gtid(global_tid); 1847 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1848 } 1849 1850 /*! 1851 @ingroup SYNCHRONIZATION 1852 @param loc source location information 1853 @param global_tid thread id. 1854 @return one if the thread should execute the master block, zero otherwise 1855 1856 Start execution of a combined barrier and master(nowait) construct. 1857 The barrier is executed inside this function. 1858 There is no equivalent "end" function, since the 1859 */ 1860 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1861 kmp_int32 ret; 1862 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1863 __kmp_assert_valid_gtid(global_tid); 1864 1865 if (!TCR_4(__kmp_init_parallel)) 1866 __kmp_parallel_initialize(); 1867 1868 __kmp_resume_if_soft_paused(); 1869 1870 if (__kmp_env_consistency_check) { 1871 if (loc == 0) { 1872 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1873 } 1874 __kmp_check_barrier(global_tid, ct_barrier, loc); 1875 } 1876 1877 #if OMPT_SUPPORT 1878 ompt_frame_t *ompt_frame; 1879 if (ompt_enabled.enabled) { 1880 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1881 if (ompt_frame->enter_frame.ptr == NULL) 1882 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1883 } 1884 OMPT_STORE_RETURN_ADDRESS(global_tid); 1885 #endif 1886 #if USE_ITT_NOTIFY 1887 __kmp_threads[global_tid]->th.th_ident = loc; 1888 #endif 1889 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1890 #if OMPT_SUPPORT && OMPT_OPTIONAL 1891 if (ompt_enabled.enabled) { 1892 ompt_frame->enter_frame = ompt_data_none; 1893 } 1894 #endif 1895 1896 ret = __kmpc_master(loc, global_tid); 1897 1898 if (__kmp_env_consistency_check) { 1899 /* there's no __kmpc_end_master called; so the (stats) */ 1900 /* actions of __kmpc_end_master are done here */ 1901 if (ret) { 1902 /* only one thread should do the pop since only */ 1903 /* one did the push (see __kmpc_master()) */ 1904 __kmp_pop_sync(global_tid, ct_master, loc); 1905 } 1906 } 1907 1908 return (ret); 1909 } 1910 1911 /* The BARRIER for a SINGLE process section is always explicit */ 1912 /*! 1913 @ingroup WORK_SHARING 1914 @param loc source location information 1915 @param global_tid global thread number 1916 @return One if this thread should execute the single construct, zero otherwise. 1917 1918 Test whether to execute a <tt>single</tt> construct. 1919 There are no implicit barriers in the two "single" calls, rather the compiler 1920 should introduce an explicit barrier if it is required. 1921 */ 1922 1923 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1924 __kmp_assert_valid_gtid(global_tid); 1925 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1926 1927 if (rc) { 1928 // We are going to execute the single statement, so we should count it. 1929 KMP_COUNT_BLOCK(OMP_SINGLE); 1930 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1931 } 1932 1933 #if OMPT_SUPPORT && OMPT_OPTIONAL 1934 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1935 kmp_team_t *team = this_thr->th.th_team; 1936 int tid = __kmp_tid_from_gtid(global_tid); 1937 1938 if (ompt_enabled.enabled) { 1939 if (rc) { 1940 if (ompt_enabled.ompt_callback_work) { 1941 ompt_callbacks.ompt_callback(ompt_callback_work)( 1942 ompt_work_single_executor, ompt_scope_begin, 1943 &(team->t.ompt_team_info.parallel_data), 1944 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1945 1, OMPT_GET_RETURN_ADDRESS(0)); 1946 } 1947 } else { 1948 if (ompt_enabled.ompt_callback_work) { 1949 ompt_callbacks.ompt_callback(ompt_callback_work)( 1950 ompt_work_single_other, ompt_scope_begin, 1951 &(team->t.ompt_team_info.parallel_data), 1952 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1953 1, OMPT_GET_RETURN_ADDRESS(0)); 1954 ompt_callbacks.ompt_callback(ompt_callback_work)( 1955 ompt_work_single_other, ompt_scope_end, 1956 &(team->t.ompt_team_info.parallel_data), 1957 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1958 1, OMPT_GET_RETURN_ADDRESS(0)); 1959 } 1960 } 1961 } 1962 #endif 1963 1964 return rc; 1965 } 1966 1967 /*! 1968 @ingroup WORK_SHARING 1969 @param loc source location information 1970 @param global_tid global thread number 1971 1972 Mark the end of a <tt>single</tt> construct. This function should 1973 only be called by the thread that executed the block of code protected 1974 by the `single` construct. 1975 */ 1976 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1977 __kmp_assert_valid_gtid(global_tid); 1978 __kmp_exit_single(global_tid); 1979 KMP_POP_PARTITIONED_TIMER(); 1980 1981 #if OMPT_SUPPORT && OMPT_OPTIONAL 1982 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1983 kmp_team_t *team = this_thr->th.th_team; 1984 int tid = __kmp_tid_from_gtid(global_tid); 1985 1986 if (ompt_enabled.ompt_callback_work) { 1987 ompt_callbacks.ompt_callback(ompt_callback_work)( 1988 ompt_work_single_executor, ompt_scope_end, 1989 &(team->t.ompt_team_info.parallel_data), 1990 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1991 OMPT_GET_RETURN_ADDRESS(0)); 1992 } 1993 #endif 1994 } 1995 1996 /*! 1997 @ingroup WORK_SHARING 1998 @param loc Source location 1999 @param global_tid Global thread id 2000 2001 Mark the end of a statically scheduled loop. 2002 */ 2003 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 2004 KMP_POP_PARTITIONED_TIMER(); 2005 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 2006 2007 #if OMPT_SUPPORT && OMPT_OPTIONAL 2008 if (ompt_enabled.ompt_callback_work) { 2009 ompt_work_t ompt_work_type = ompt_work_loop_static; 2010 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 2011 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 2012 // Determine workshare type 2013 if (loc != NULL) { 2014 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 2015 ompt_work_type = ompt_work_loop_static; 2016 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 2017 ompt_work_type = ompt_work_sections; 2018 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 2019 ompt_work_type = ompt_work_distribute; 2020 } else { 2021 // use default set above. 2022 // a warning about this case is provided in __kmpc_for_static_init 2023 } 2024 KMP_DEBUG_ASSERT(ompt_work_type); 2025 } 2026 ompt_callbacks.ompt_callback(ompt_callback_work)( 2027 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 2028 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 2029 } 2030 #endif 2031 if (__kmp_env_consistency_check) 2032 __kmp_pop_workshare(global_tid, ct_pdo, loc); 2033 } 2034 2035 // User routines which take C-style arguments (call by value) 2036 // different from the Fortran equivalent routines 2037 2038 void ompc_set_num_threads(int arg) { 2039 // !!!!! TODO: check the per-task binding 2040 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 2041 } 2042 2043 void ompc_set_dynamic(int flag) { 2044 kmp_info_t *thread; 2045 2046 /* For the thread-private implementation of the internal controls */ 2047 thread = __kmp_entry_thread(); 2048 2049 __kmp_save_internal_controls(thread); 2050 2051 set__dynamic(thread, flag ? true : false); 2052 } 2053 2054 void ompc_set_nested(int flag) { 2055 kmp_info_t *thread; 2056 2057 /* For the thread-private internal controls implementation */ 2058 thread = __kmp_entry_thread(); 2059 2060 __kmp_save_internal_controls(thread); 2061 2062 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 2063 } 2064 2065 void ompc_set_max_active_levels(int max_active_levels) { 2066 /* TO DO */ 2067 /* we want per-task implementation of this internal control */ 2068 2069 /* For the per-thread internal controls implementation */ 2070 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 2071 } 2072 2073 void ompc_set_schedule(omp_sched_t kind, int modifier) { 2074 // !!!!! TODO: check the per-task binding 2075 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 2076 } 2077 2078 int ompc_get_ancestor_thread_num(int level) { 2079 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 2080 } 2081 2082 int ompc_get_team_size(int level) { 2083 return __kmp_get_team_size(__kmp_entry_gtid(), level); 2084 } 2085 2086 /* OpenMP 5.0 Affinity Format API */ 2087 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) { 2088 if (!__kmp_init_serial) { 2089 __kmp_serial_initialize(); 2090 } 2091 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 2092 format, KMP_STRLEN(format) + 1); 2093 } 2094 2095 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) { 2096 size_t format_size; 2097 if (!__kmp_init_serial) { 2098 __kmp_serial_initialize(); 2099 } 2100 format_size = KMP_STRLEN(__kmp_affinity_format); 2101 if (buffer && size) { 2102 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 2103 format_size + 1); 2104 } 2105 return format_size; 2106 } 2107 2108 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) { 2109 int gtid; 2110 if (!TCR_4(__kmp_init_middle)) { 2111 __kmp_middle_initialize(); 2112 } 2113 __kmp_assign_root_init_mask(); 2114 gtid = __kmp_get_gtid(); 2115 #if KMP_AFFINITY_SUPPORTED 2116 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && 2117 __kmp_affinity.flags.reset) { 2118 __kmp_reset_root_init_mask(gtid); 2119 } 2120 #endif 2121 __kmp_aux_display_affinity(gtid, format); 2122 } 2123 2124 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size, 2125 char const *format) { 2126 int gtid; 2127 size_t num_required; 2128 kmp_str_buf_t capture_buf; 2129 if (!TCR_4(__kmp_init_middle)) { 2130 __kmp_middle_initialize(); 2131 } 2132 __kmp_assign_root_init_mask(); 2133 gtid = __kmp_get_gtid(); 2134 #if KMP_AFFINITY_SUPPORTED 2135 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && 2136 __kmp_affinity.flags.reset) { 2137 __kmp_reset_root_init_mask(gtid); 2138 } 2139 #endif 2140 __kmp_str_buf_init(&capture_buf); 2141 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 2142 if (buffer && buf_size) { 2143 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 2144 capture_buf.used + 1); 2145 } 2146 __kmp_str_buf_free(&capture_buf); 2147 return num_required; 2148 } 2149 2150 void kmpc_set_stacksize(int arg) { 2151 // __kmp_aux_set_stacksize initializes the library if needed 2152 __kmp_aux_set_stacksize(arg); 2153 } 2154 2155 void kmpc_set_stacksize_s(size_t arg) { 2156 // __kmp_aux_set_stacksize initializes the library if needed 2157 __kmp_aux_set_stacksize(arg); 2158 } 2159 2160 void kmpc_set_blocktime(int arg) { 2161 int gtid, tid, bt = arg; 2162 kmp_info_t *thread; 2163 2164 gtid = __kmp_entry_gtid(); 2165 tid = __kmp_tid_from_gtid(gtid); 2166 thread = __kmp_thread_from_gtid(gtid); 2167 2168 __kmp_aux_convert_blocktime(&bt); 2169 __kmp_aux_set_blocktime(bt, thread, tid); 2170 } 2171 2172 void kmpc_set_library(int arg) { 2173 // __kmp_user_set_library initializes the library if needed 2174 __kmp_user_set_library((enum library_type)arg); 2175 } 2176 2177 void kmpc_set_defaults(char const *str) { 2178 // __kmp_aux_set_defaults initializes the library if needed 2179 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 2180 } 2181 2182 void kmpc_set_disp_num_buffers(int arg) { 2183 // ignore after initialization because some teams have already 2184 // allocated dispatch buffers 2185 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF && 2186 arg <= KMP_MAX_DISP_NUM_BUFF) { 2187 __kmp_dispatch_num_buffers = arg; 2188 } 2189 } 2190 2191 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 2192 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2193 return -1; 2194 #else 2195 if (!TCR_4(__kmp_init_middle)) { 2196 __kmp_middle_initialize(); 2197 } 2198 __kmp_assign_root_init_mask(); 2199 return __kmp_aux_set_affinity_mask_proc(proc, mask); 2200 #endif 2201 } 2202 2203 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2204 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2205 return -1; 2206 #else 2207 if (!TCR_4(__kmp_init_middle)) { 2208 __kmp_middle_initialize(); 2209 } 2210 __kmp_assign_root_init_mask(); 2211 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2212 #endif 2213 } 2214 2215 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2216 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2217 return -1; 2218 #else 2219 if (!TCR_4(__kmp_init_middle)) { 2220 __kmp_middle_initialize(); 2221 } 2222 __kmp_assign_root_init_mask(); 2223 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2224 #endif 2225 } 2226 2227 /* -------------------------------------------------------------------------- */ 2228 /*! 2229 @ingroup THREADPRIVATE 2230 @param loc source location information 2231 @param gtid global thread number 2232 @param cpy_size size of the cpy_data buffer 2233 @param cpy_data pointer to data to be copied 2234 @param cpy_func helper function to call for copying data 2235 @param didit flag variable: 1=single thread; 0=not single thread 2236 2237 __kmpc_copyprivate implements the interface for the private data broadcast 2238 needed for the copyprivate clause associated with a single region in an 2239 OpenMP<sup>*</sup> program (both C and Fortran). 2240 All threads participating in the parallel region call this routine. 2241 One of the threads (called the single thread) should have the <tt>didit</tt> 2242 variable set to 1 and all other threads should have that variable set to 0. 2243 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2244 2245 The OpenMP specification forbids the use of nowait on the single region when a 2246 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2247 barrier internally to avoid race conditions, so the code generation for the 2248 single region should avoid generating a barrier after the call to @ref 2249 __kmpc_copyprivate. 2250 2251 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2252 The <tt>loc</tt> parameter is a pointer to source location information. 2253 2254 Internal implementation: The single thread will first copy its descriptor 2255 address (cpy_data) to a team-private location, then the other threads will each 2256 call the function pointed to by the parameter cpy_func, which carries out the 2257 copy by copying the data using the cpy_data buffer. 2258 2259 The cpy_func routine used for the copy and the contents of the data area defined 2260 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2261 to be done. For instance, the cpy_data buffer can hold the actual data to be 2262 copied or it may hold a list of pointers to the data. The cpy_func routine must 2263 interpret the cpy_data buffer appropriately. 2264 2265 The interface to cpy_func is as follows: 2266 @code 2267 void cpy_func( void *destination, void *source ) 2268 @endcode 2269 where void *destination is the cpy_data pointer for the thread being copied to 2270 and void *source is the cpy_data pointer for the thread being copied from. 2271 */ 2272 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2273 void *cpy_data, void (*cpy_func)(void *, void *), 2274 kmp_int32 didit) { 2275 void **data_ptr; 2276 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2277 __kmp_assert_valid_gtid(gtid); 2278 2279 KMP_MB(); 2280 2281 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2282 2283 if (__kmp_env_consistency_check) { 2284 if (loc == 0) { 2285 KMP_WARNING(ConstructIdentInvalid); 2286 } 2287 } 2288 2289 // ToDo: Optimize the following two barriers into some kind of split barrier 2290 2291 if (didit) 2292 *data_ptr = cpy_data; 2293 2294 #if OMPT_SUPPORT 2295 ompt_frame_t *ompt_frame; 2296 if (ompt_enabled.enabled) { 2297 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2298 if (ompt_frame->enter_frame.ptr == NULL) 2299 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2300 } 2301 OMPT_STORE_RETURN_ADDRESS(gtid); 2302 #endif 2303 /* This barrier is not a barrier region boundary */ 2304 #if USE_ITT_NOTIFY 2305 __kmp_threads[gtid]->th.th_ident = loc; 2306 #endif 2307 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2308 2309 if (!didit) 2310 (*cpy_func)(cpy_data, *data_ptr); 2311 2312 // Consider next barrier a user-visible barrier for barrier region boundaries 2313 // Nesting checks are already handled by the single construct checks 2314 { 2315 #if OMPT_SUPPORT 2316 OMPT_STORE_RETURN_ADDRESS(gtid); 2317 #endif 2318 #if USE_ITT_NOTIFY 2319 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2320 // tasks can overwrite the location) 2321 #endif 2322 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2323 #if OMPT_SUPPORT && OMPT_OPTIONAL 2324 if (ompt_enabled.enabled) { 2325 ompt_frame->enter_frame = ompt_data_none; 2326 } 2327 #endif 2328 } 2329 } 2330 2331 /* --------------------------------------------------------------------------*/ 2332 /*! 2333 @ingroup THREADPRIVATE 2334 @param loc source location information 2335 @param gtid global thread number 2336 @param cpy_data pointer to the data to be saved/copied or 0 2337 @return the saved pointer to the data 2338 2339 __kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate: 2340 __kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so 2341 coming from single), and returns that pointer in all calls (for single thread 2342 it's not needed). This version doesn't do any actual data copying. Data copying 2343 has to be done somewhere else, e.g. inline in the generated code. Due to this, 2344 this function doesn't have any barrier at the end of the function, like 2345 __kmpc_copyprivate does, so generated code needs barrier after copying of all 2346 data was done. 2347 */ 2348 void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) { 2349 void **data_ptr; 2350 2351 KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid)); 2352 2353 KMP_MB(); 2354 2355 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2356 2357 if (__kmp_env_consistency_check) { 2358 if (loc == 0) { 2359 KMP_WARNING(ConstructIdentInvalid); 2360 } 2361 } 2362 2363 // ToDo: Optimize the following barrier 2364 2365 if (cpy_data) 2366 *data_ptr = cpy_data; 2367 2368 #if OMPT_SUPPORT 2369 ompt_frame_t *ompt_frame; 2370 if (ompt_enabled.enabled) { 2371 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2372 if (ompt_frame->enter_frame.ptr == NULL) 2373 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2374 OMPT_STORE_RETURN_ADDRESS(gtid); 2375 } 2376 #endif 2377 /* This barrier is not a barrier region boundary */ 2378 #if USE_ITT_NOTIFY 2379 __kmp_threads[gtid]->th.th_ident = loc; 2380 #endif 2381 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2382 2383 return *data_ptr; 2384 } 2385 2386 /* -------------------------------------------------------------------------- */ 2387 2388 #define INIT_LOCK __kmp_init_user_lock_with_checks 2389 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2390 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2391 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2392 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2393 #define ACQUIRE_NESTED_LOCK_TIMED \ 2394 __kmp_acquire_nested_user_lock_with_checks_timed 2395 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2396 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2397 #define TEST_LOCK __kmp_test_user_lock_with_checks 2398 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2399 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2400 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2401 2402 // TODO: Make check abort messages use location info & pass it into 2403 // with_checks routines 2404 2405 #if KMP_USE_DYNAMIC_LOCK 2406 2407 // internal lock initializer 2408 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2409 kmp_dyna_lockseq_t seq) { 2410 if (KMP_IS_D_LOCK(seq)) { 2411 KMP_INIT_D_LOCK(lock, seq); 2412 #if USE_ITT_BUILD 2413 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2414 #endif 2415 } else { 2416 KMP_INIT_I_LOCK(lock, seq); 2417 #if USE_ITT_BUILD 2418 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2419 __kmp_itt_lock_creating(ilk->lock, loc); 2420 #endif 2421 } 2422 } 2423 2424 // internal nest lock initializer 2425 static __forceinline void 2426 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2427 kmp_dyna_lockseq_t seq) { 2428 #if KMP_USE_TSX 2429 // Don't have nested lock implementation for speculative locks 2430 if (seq == lockseq_hle || seq == lockseq_rtm_queuing || 2431 seq == lockseq_rtm_spin || seq == lockseq_adaptive) 2432 seq = __kmp_user_lock_seq; 2433 #endif 2434 switch (seq) { 2435 case lockseq_tas: 2436 seq = lockseq_nested_tas; 2437 break; 2438 #if KMP_USE_FUTEX 2439 case lockseq_futex: 2440 seq = lockseq_nested_futex; 2441 break; 2442 #endif 2443 case lockseq_ticket: 2444 seq = lockseq_nested_ticket; 2445 break; 2446 case lockseq_queuing: 2447 seq = lockseq_nested_queuing; 2448 break; 2449 case lockseq_drdpa: 2450 seq = lockseq_nested_drdpa; 2451 break; 2452 default: 2453 seq = lockseq_nested_queuing; 2454 } 2455 KMP_INIT_I_LOCK(lock, seq); 2456 #if USE_ITT_BUILD 2457 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2458 __kmp_itt_lock_creating(ilk->lock, loc); 2459 #endif 2460 } 2461 2462 /* initialize the lock with a hint */ 2463 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2464 uintptr_t hint) { 2465 KMP_DEBUG_ASSERT(__kmp_init_serial); 2466 if (__kmp_env_consistency_check && user_lock == NULL) { 2467 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2468 } 2469 2470 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2471 2472 #if OMPT_SUPPORT && OMPT_OPTIONAL 2473 // This is the case, if called from omp_init_lock_with_hint: 2474 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2475 if (!codeptr) 2476 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2477 if (ompt_enabled.ompt_callback_lock_init) { 2478 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2479 ompt_mutex_lock, (omp_lock_hint_t)hint, 2480 __ompt_get_mutex_impl_type(user_lock), 2481 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2482 } 2483 #endif 2484 } 2485 2486 /* initialize the lock with a hint */ 2487 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2488 void **user_lock, uintptr_t hint) { 2489 KMP_DEBUG_ASSERT(__kmp_init_serial); 2490 if (__kmp_env_consistency_check && user_lock == NULL) { 2491 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2492 } 2493 2494 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2495 2496 #if OMPT_SUPPORT && OMPT_OPTIONAL 2497 // This is the case, if called from omp_init_lock_with_hint: 2498 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2499 if (!codeptr) 2500 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2501 if (ompt_enabled.ompt_callback_lock_init) { 2502 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2503 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2504 __ompt_get_mutex_impl_type(user_lock), 2505 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2506 } 2507 #endif 2508 } 2509 2510 #endif // KMP_USE_DYNAMIC_LOCK 2511 2512 /* initialize the lock */ 2513 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2514 #if KMP_USE_DYNAMIC_LOCK 2515 2516 KMP_DEBUG_ASSERT(__kmp_init_serial); 2517 if (__kmp_env_consistency_check && user_lock == NULL) { 2518 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2519 } 2520 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2521 2522 #if OMPT_SUPPORT && OMPT_OPTIONAL 2523 // This is the case, if called from omp_init_lock_with_hint: 2524 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2525 if (!codeptr) 2526 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2527 if (ompt_enabled.ompt_callback_lock_init) { 2528 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2529 ompt_mutex_lock, omp_lock_hint_none, 2530 __ompt_get_mutex_impl_type(user_lock), 2531 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2532 } 2533 #endif 2534 2535 #else // KMP_USE_DYNAMIC_LOCK 2536 2537 static char const *const func = "omp_init_lock"; 2538 kmp_user_lock_p lck; 2539 KMP_DEBUG_ASSERT(__kmp_init_serial); 2540 2541 if (__kmp_env_consistency_check) { 2542 if (user_lock == NULL) { 2543 KMP_FATAL(LockIsUninitialized, func); 2544 } 2545 } 2546 2547 KMP_CHECK_USER_LOCK_INIT(); 2548 2549 if ((__kmp_user_lock_kind == lk_tas) && 2550 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2551 lck = (kmp_user_lock_p)user_lock; 2552 } 2553 #if KMP_USE_FUTEX 2554 else if ((__kmp_user_lock_kind == lk_futex) && 2555 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2556 lck = (kmp_user_lock_p)user_lock; 2557 } 2558 #endif 2559 else { 2560 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2561 } 2562 INIT_LOCK(lck); 2563 __kmp_set_user_lock_location(lck, loc); 2564 2565 #if OMPT_SUPPORT && OMPT_OPTIONAL 2566 // This is the case, if called from omp_init_lock_with_hint: 2567 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2568 if (!codeptr) 2569 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2570 if (ompt_enabled.ompt_callback_lock_init) { 2571 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2572 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2573 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2574 } 2575 #endif 2576 2577 #if USE_ITT_BUILD 2578 __kmp_itt_lock_creating(lck); 2579 #endif /* USE_ITT_BUILD */ 2580 2581 #endif // KMP_USE_DYNAMIC_LOCK 2582 } // __kmpc_init_lock 2583 2584 /* initialize the lock */ 2585 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2586 #if KMP_USE_DYNAMIC_LOCK 2587 2588 KMP_DEBUG_ASSERT(__kmp_init_serial); 2589 if (__kmp_env_consistency_check && user_lock == NULL) { 2590 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2591 } 2592 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2593 2594 #if OMPT_SUPPORT && OMPT_OPTIONAL 2595 // This is the case, if called from omp_init_lock_with_hint: 2596 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2597 if (!codeptr) 2598 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2599 if (ompt_enabled.ompt_callback_lock_init) { 2600 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2601 ompt_mutex_nest_lock, omp_lock_hint_none, 2602 __ompt_get_mutex_impl_type(user_lock), 2603 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2604 } 2605 #endif 2606 2607 #else // KMP_USE_DYNAMIC_LOCK 2608 2609 static char const *const func = "omp_init_nest_lock"; 2610 kmp_user_lock_p lck; 2611 KMP_DEBUG_ASSERT(__kmp_init_serial); 2612 2613 if (__kmp_env_consistency_check) { 2614 if (user_lock == NULL) { 2615 KMP_FATAL(LockIsUninitialized, func); 2616 } 2617 } 2618 2619 KMP_CHECK_USER_LOCK_INIT(); 2620 2621 if ((__kmp_user_lock_kind == lk_tas) && 2622 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2623 OMP_NEST_LOCK_T_SIZE)) { 2624 lck = (kmp_user_lock_p)user_lock; 2625 } 2626 #if KMP_USE_FUTEX 2627 else if ((__kmp_user_lock_kind == lk_futex) && 2628 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2629 OMP_NEST_LOCK_T_SIZE)) { 2630 lck = (kmp_user_lock_p)user_lock; 2631 } 2632 #endif 2633 else { 2634 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2635 } 2636 2637 INIT_NESTED_LOCK(lck); 2638 __kmp_set_user_lock_location(lck, loc); 2639 2640 #if OMPT_SUPPORT && OMPT_OPTIONAL 2641 // This is the case, if called from omp_init_lock_with_hint: 2642 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2643 if (!codeptr) 2644 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2645 if (ompt_enabled.ompt_callback_lock_init) { 2646 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2647 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2648 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2649 } 2650 #endif 2651 2652 #if USE_ITT_BUILD 2653 __kmp_itt_lock_creating(lck); 2654 #endif /* USE_ITT_BUILD */ 2655 2656 #endif // KMP_USE_DYNAMIC_LOCK 2657 } // __kmpc_init_nest_lock 2658 2659 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2660 #if KMP_USE_DYNAMIC_LOCK 2661 2662 #if USE_ITT_BUILD 2663 kmp_user_lock_p lck; 2664 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2665 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2666 } else { 2667 lck = (kmp_user_lock_p)user_lock; 2668 } 2669 __kmp_itt_lock_destroyed(lck); 2670 #endif 2671 #if OMPT_SUPPORT && OMPT_OPTIONAL 2672 // This is the case, if called from omp_init_lock_with_hint: 2673 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2674 if (!codeptr) 2675 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2676 if (ompt_enabled.ompt_callback_lock_destroy) { 2677 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2678 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2679 } 2680 #endif 2681 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2682 #else 2683 kmp_user_lock_p lck; 2684 2685 if ((__kmp_user_lock_kind == lk_tas) && 2686 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2687 lck = (kmp_user_lock_p)user_lock; 2688 } 2689 #if KMP_USE_FUTEX 2690 else if ((__kmp_user_lock_kind == lk_futex) && 2691 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2692 lck = (kmp_user_lock_p)user_lock; 2693 } 2694 #endif 2695 else { 2696 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2697 } 2698 2699 #if OMPT_SUPPORT && OMPT_OPTIONAL 2700 // This is the case, if called from omp_init_lock_with_hint: 2701 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2702 if (!codeptr) 2703 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2704 if (ompt_enabled.ompt_callback_lock_destroy) { 2705 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2706 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2707 } 2708 #endif 2709 2710 #if USE_ITT_BUILD 2711 __kmp_itt_lock_destroyed(lck); 2712 #endif /* USE_ITT_BUILD */ 2713 DESTROY_LOCK(lck); 2714 2715 if ((__kmp_user_lock_kind == lk_tas) && 2716 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2717 ; 2718 } 2719 #if KMP_USE_FUTEX 2720 else if ((__kmp_user_lock_kind == lk_futex) && 2721 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2722 ; 2723 } 2724 #endif 2725 else { 2726 __kmp_user_lock_free(user_lock, gtid, lck); 2727 } 2728 #endif // KMP_USE_DYNAMIC_LOCK 2729 } // __kmpc_destroy_lock 2730 2731 /* destroy the lock */ 2732 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2733 #if KMP_USE_DYNAMIC_LOCK 2734 2735 #if USE_ITT_BUILD 2736 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2737 __kmp_itt_lock_destroyed(ilk->lock); 2738 #endif 2739 #if OMPT_SUPPORT && OMPT_OPTIONAL 2740 // This is the case, if called from omp_init_lock_with_hint: 2741 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2742 if (!codeptr) 2743 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2744 if (ompt_enabled.ompt_callback_lock_destroy) { 2745 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2746 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2747 } 2748 #endif 2749 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2750 2751 #else // KMP_USE_DYNAMIC_LOCK 2752 2753 kmp_user_lock_p lck; 2754 2755 if ((__kmp_user_lock_kind == lk_tas) && 2756 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2757 OMP_NEST_LOCK_T_SIZE)) { 2758 lck = (kmp_user_lock_p)user_lock; 2759 } 2760 #if KMP_USE_FUTEX 2761 else if ((__kmp_user_lock_kind == lk_futex) && 2762 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2763 OMP_NEST_LOCK_T_SIZE)) { 2764 lck = (kmp_user_lock_p)user_lock; 2765 } 2766 #endif 2767 else { 2768 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2769 } 2770 2771 #if OMPT_SUPPORT && OMPT_OPTIONAL 2772 // This is the case, if called from omp_init_lock_with_hint: 2773 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2774 if (!codeptr) 2775 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2776 if (ompt_enabled.ompt_callback_lock_destroy) { 2777 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2778 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2779 } 2780 #endif 2781 2782 #if USE_ITT_BUILD 2783 __kmp_itt_lock_destroyed(lck); 2784 #endif /* USE_ITT_BUILD */ 2785 2786 DESTROY_NESTED_LOCK(lck); 2787 2788 if ((__kmp_user_lock_kind == lk_tas) && 2789 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2790 OMP_NEST_LOCK_T_SIZE)) { 2791 ; 2792 } 2793 #if KMP_USE_FUTEX 2794 else if ((__kmp_user_lock_kind == lk_futex) && 2795 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2796 OMP_NEST_LOCK_T_SIZE)) { 2797 ; 2798 } 2799 #endif 2800 else { 2801 __kmp_user_lock_free(user_lock, gtid, lck); 2802 } 2803 #endif // KMP_USE_DYNAMIC_LOCK 2804 } // __kmpc_destroy_nest_lock 2805 2806 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2807 KMP_COUNT_BLOCK(OMP_set_lock); 2808 #if KMP_USE_DYNAMIC_LOCK 2809 int tag = KMP_EXTRACT_D_TAG(user_lock); 2810 #if USE_ITT_BUILD 2811 __kmp_itt_lock_acquiring( 2812 (kmp_user_lock_p) 2813 user_lock); // itt function will get to the right lock object. 2814 #endif 2815 #if OMPT_SUPPORT && OMPT_OPTIONAL 2816 // This is the case, if called from omp_init_lock_with_hint: 2817 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2818 if (!codeptr) 2819 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2820 if (ompt_enabled.ompt_callback_mutex_acquire) { 2821 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2822 ompt_mutex_lock, omp_lock_hint_none, 2823 __ompt_get_mutex_impl_type(user_lock), 2824 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2825 } 2826 #endif 2827 #if KMP_USE_INLINED_TAS 2828 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2829 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2830 } else 2831 #elif KMP_USE_INLINED_FUTEX 2832 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2833 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2834 } else 2835 #endif 2836 { 2837 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2838 } 2839 #if USE_ITT_BUILD 2840 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2841 #endif 2842 #if OMPT_SUPPORT && OMPT_OPTIONAL 2843 if (ompt_enabled.ompt_callback_mutex_acquired) { 2844 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2845 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2846 } 2847 #endif 2848 2849 #else // KMP_USE_DYNAMIC_LOCK 2850 2851 kmp_user_lock_p lck; 2852 2853 if ((__kmp_user_lock_kind == lk_tas) && 2854 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2855 lck = (kmp_user_lock_p)user_lock; 2856 } 2857 #if KMP_USE_FUTEX 2858 else if ((__kmp_user_lock_kind == lk_futex) && 2859 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2860 lck = (kmp_user_lock_p)user_lock; 2861 } 2862 #endif 2863 else { 2864 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2865 } 2866 2867 #if USE_ITT_BUILD 2868 __kmp_itt_lock_acquiring(lck); 2869 #endif /* USE_ITT_BUILD */ 2870 #if OMPT_SUPPORT && OMPT_OPTIONAL 2871 // This is the case, if called from omp_init_lock_with_hint: 2872 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2873 if (!codeptr) 2874 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2875 if (ompt_enabled.ompt_callback_mutex_acquire) { 2876 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2877 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2878 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2879 } 2880 #endif 2881 2882 ACQUIRE_LOCK(lck, gtid); 2883 2884 #if USE_ITT_BUILD 2885 __kmp_itt_lock_acquired(lck); 2886 #endif /* USE_ITT_BUILD */ 2887 2888 #if OMPT_SUPPORT && OMPT_OPTIONAL 2889 if (ompt_enabled.ompt_callback_mutex_acquired) { 2890 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2891 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2892 } 2893 #endif 2894 2895 #endif // KMP_USE_DYNAMIC_LOCK 2896 } 2897 2898 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2899 #if KMP_USE_DYNAMIC_LOCK 2900 2901 #if USE_ITT_BUILD 2902 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2903 #endif 2904 #if OMPT_SUPPORT && OMPT_OPTIONAL 2905 // This is the case, if called from omp_init_lock_with_hint: 2906 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2907 if (!codeptr) 2908 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2909 if (ompt_enabled.enabled) { 2910 if (ompt_enabled.ompt_callback_mutex_acquire) { 2911 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2912 ompt_mutex_nest_lock, omp_lock_hint_none, 2913 __ompt_get_mutex_impl_type(user_lock), 2914 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2915 } 2916 } 2917 #endif 2918 int acquire_status = 2919 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2920 (void)acquire_status; 2921 #if USE_ITT_BUILD 2922 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2923 #endif 2924 2925 #if OMPT_SUPPORT && OMPT_OPTIONAL 2926 if (ompt_enabled.enabled) { 2927 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2928 if (ompt_enabled.ompt_callback_mutex_acquired) { 2929 // lock_first 2930 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2931 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2932 codeptr); 2933 } 2934 } else { 2935 if (ompt_enabled.ompt_callback_nest_lock) { 2936 // lock_next 2937 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2938 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2939 } 2940 } 2941 } 2942 #endif 2943 2944 #else // KMP_USE_DYNAMIC_LOCK 2945 int acquire_status; 2946 kmp_user_lock_p lck; 2947 2948 if ((__kmp_user_lock_kind == lk_tas) && 2949 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2950 OMP_NEST_LOCK_T_SIZE)) { 2951 lck = (kmp_user_lock_p)user_lock; 2952 } 2953 #if KMP_USE_FUTEX 2954 else if ((__kmp_user_lock_kind == lk_futex) && 2955 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2956 OMP_NEST_LOCK_T_SIZE)) { 2957 lck = (kmp_user_lock_p)user_lock; 2958 } 2959 #endif 2960 else { 2961 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2962 } 2963 2964 #if USE_ITT_BUILD 2965 __kmp_itt_lock_acquiring(lck); 2966 #endif /* USE_ITT_BUILD */ 2967 #if OMPT_SUPPORT && OMPT_OPTIONAL 2968 // This is the case, if called from omp_init_lock_with_hint: 2969 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2970 if (!codeptr) 2971 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2972 if (ompt_enabled.enabled) { 2973 if (ompt_enabled.ompt_callback_mutex_acquire) { 2974 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2975 ompt_mutex_nest_lock, omp_lock_hint_none, 2976 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2977 codeptr); 2978 } 2979 } 2980 #endif 2981 2982 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2983 2984 #if USE_ITT_BUILD 2985 __kmp_itt_lock_acquired(lck); 2986 #endif /* USE_ITT_BUILD */ 2987 2988 #if OMPT_SUPPORT && OMPT_OPTIONAL 2989 if (ompt_enabled.enabled) { 2990 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2991 if (ompt_enabled.ompt_callback_mutex_acquired) { 2992 // lock_first 2993 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2994 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2995 } 2996 } else { 2997 if (ompt_enabled.ompt_callback_nest_lock) { 2998 // lock_next 2999 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3000 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3001 } 3002 } 3003 } 3004 #endif 3005 3006 #endif // KMP_USE_DYNAMIC_LOCK 3007 } 3008 3009 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3010 #if KMP_USE_DYNAMIC_LOCK 3011 3012 int tag = KMP_EXTRACT_D_TAG(user_lock); 3013 #if USE_ITT_BUILD 3014 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 3015 #endif 3016 #if KMP_USE_INLINED_TAS 3017 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3018 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 3019 } else 3020 #elif KMP_USE_INLINED_FUTEX 3021 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3022 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 3023 } else 3024 #endif 3025 { 3026 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3027 } 3028 3029 #if OMPT_SUPPORT && OMPT_OPTIONAL 3030 // This is the case, if called from omp_init_lock_with_hint: 3031 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3032 if (!codeptr) 3033 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3034 if (ompt_enabled.ompt_callback_mutex_released) { 3035 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3036 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3037 } 3038 #endif 3039 3040 #else // KMP_USE_DYNAMIC_LOCK 3041 3042 kmp_user_lock_p lck; 3043 3044 /* Can't use serial interval since not block structured */ 3045 /* release the lock */ 3046 3047 if ((__kmp_user_lock_kind == lk_tas) && 3048 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3049 #if KMP_OS_LINUX && \ 3050 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 3051 // "fast" path implemented to fix customer performance issue 3052 #if USE_ITT_BUILD 3053 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 3054 #endif /* USE_ITT_BUILD */ 3055 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 3056 KMP_MB(); 3057 3058 #if OMPT_SUPPORT && OMPT_OPTIONAL 3059 // This is the case, if called from omp_init_lock_with_hint: 3060 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3061 if (!codeptr) 3062 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3063 if (ompt_enabled.ompt_callback_mutex_released) { 3064 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3065 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3066 } 3067 #endif 3068 3069 return; 3070 #else 3071 lck = (kmp_user_lock_p)user_lock; 3072 #endif 3073 } 3074 #if KMP_USE_FUTEX 3075 else if ((__kmp_user_lock_kind == lk_futex) && 3076 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3077 lck = (kmp_user_lock_p)user_lock; 3078 } 3079 #endif 3080 else { 3081 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 3082 } 3083 3084 #if USE_ITT_BUILD 3085 __kmp_itt_lock_releasing(lck); 3086 #endif /* USE_ITT_BUILD */ 3087 3088 RELEASE_LOCK(lck, gtid); 3089 3090 #if OMPT_SUPPORT && OMPT_OPTIONAL 3091 // This is the case, if called from omp_init_lock_with_hint: 3092 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3093 if (!codeptr) 3094 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3095 if (ompt_enabled.ompt_callback_mutex_released) { 3096 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3097 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3098 } 3099 #endif 3100 3101 #endif // KMP_USE_DYNAMIC_LOCK 3102 } 3103 3104 /* release the lock */ 3105 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3106 #if KMP_USE_DYNAMIC_LOCK 3107 3108 #if USE_ITT_BUILD 3109 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 3110 #endif 3111 int release_status = 3112 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 3113 (void)release_status; 3114 3115 #if OMPT_SUPPORT && OMPT_OPTIONAL 3116 // This is the case, if called from omp_init_lock_with_hint: 3117 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3118 if (!codeptr) 3119 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3120 if (ompt_enabled.enabled) { 3121 if (release_status == KMP_LOCK_RELEASED) { 3122 if (ompt_enabled.ompt_callback_mutex_released) { 3123 // release_lock_last 3124 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3125 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3126 codeptr); 3127 } 3128 } else if (ompt_enabled.ompt_callback_nest_lock) { 3129 // release_lock_prev 3130 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3131 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3132 } 3133 } 3134 #endif 3135 3136 #else // KMP_USE_DYNAMIC_LOCK 3137 3138 kmp_user_lock_p lck; 3139 3140 /* Can't use serial interval since not block structured */ 3141 3142 if ((__kmp_user_lock_kind == lk_tas) && 3143 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3144 OMP_NEST_LOCK_T_SIZE)) { 3145 #if KMP_OS_LINUX && \ 3146 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 3147 // "fast" path implemented to fix customer performance issue 3148 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 3149 #if USE_ITT_BUILD 3150 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 3151 #endif /* USE_ITT_BUILD */ 3152 3153 #if OMPT_SUPPORT && OMPT_OPTIONAL 3154 int release_status = KMP_LOCK_STILL_HELD; 3155 #endif 3156 3157 if (--(tl->lk.depth_locked) == 0) { 3158 TCW_4(tl->lk.poll, 0); 3159 #if OMPT_SUPPORT && OMPT_OPTIONAL 3160 release_status = KMP_LOCK_RELEASED; 3161 #endif 3162 } 3163 KMP_MB(); 3164 3165 #if OMPT_SUPPORT && OMPT_OPTIONAL 3166 // This is the case, if called from omp_init_lock_with_hint: 3167 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3168 if (!codeptr) 3169 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3170 if (ompt_enabled.enabled) { 3171 if (release_status == KMP_LOCK_RELEASED) { 3172 if (ompt_enabled.ompt_callback_mutex_released) { 3173 // release_lock_last 3174 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3175 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3176 } 3177 } else if (ompt_enabled.ompt_callback_nest_lock) { 3178 // release_lock_previous 3179 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3180 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3181 } 3182 } 3183 #endif 3184 3185 return; 3186 #else 3187 lck = (kmp_user_lock_p)user_lock; 3188 #endif 3189 } 3190 #if KMP_USE_FUTEX 3191 else if ((__kmp_user_lock_kind == lk_futex) && 3192 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3193 OMP_NEST_LOCK_T_SIZE)) { 3194 lck = (kmp_user_lock_p)user_lock; 3195 } 3196 #endif 3197 else { 3198 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 3199 } 3200 3201 #if USE_ITT_BUILD 3202 __kmp_itt_lock_releasing(lck); 3203 #endif /* USE_ITT_BUILD */ 3204 3205 int release_status; 3206 release_status = RELEASE_NESTED_LOCK(lck, gtid); 3207 #if OMPT_SUPPORT && OMPT_OPTIONAL 3208 // This is the case, if called from omp_init_lock_with_hint: 3209 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3210 if (!codeptr) 3211 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3212 if (ompt_enabled.enabled) { 3213 if (release_status == KMP_LOCK_RELEASED) { 3214 if (ompt_enabled.ompt_callback_mutex_released) { 3215 // release_lock_last 3216 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3217 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3218 } 3219 } else if (ompt_enabled.ompt_callback_nest_lock) { 3220 // release_lock_previous 3221 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3222 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3223 } 3224 } 3225 #endif 3226 3227 #endif // KMP_USE_DYNAMIC_LOCK 3228 } 3229 3230 /* try to acquire the lock */ 3231 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3232 KMP_COUNT_BLOCK(OMP_test_lock); 3233 3234 #if KMP_USE_DYNAMIC_LOCK 3235 int rc; 3236 int tag = KMP_EXTRACT_D_TAG(user_lock); 3237 #if USE_ITT_BUILD 3238 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3239 #endif 3240 #if OMPT_SUPPORT && OMPT_OPTIONAL 3241 // This is the case, if called from omp_init_lock_with_hint: 3242 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3243 if (!codeptr) 3244 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3245 if (ompt_enabled.ompt_callback_mutex_acquire) { 3246 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3247 ompt_mutex_test_lock, omp_lock_hint_none, 3248 __ompt_get_mutex_impl_type(user_lock), 3249 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3250 } 3251 #endif 3252 #if KMP_USE_INLINED_TAS 3253 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3254 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3255 } else 3256 #elif KMP_USE_INLINED_FUTEX 3257 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3258 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3259 } else 3260 #endif 3261 { 3262 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3263 } 3264 if (rc) { 3265 #if USE_ITT_BUILD 3266 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3267 #endif 3268 #if OMPT_SUPPORT && OMPT_OPTIONAL 3269 if (ompt_enabled.ompt_callback_mutex_acquired) { 3270 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3271 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3272 } 3273 #endif 3274 return FTN_TRUE; 3275 } else { 3276 #if USE_ITT_BUILD 3277 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3278 #endif 3279 return FTN_FALSE; 3280 } 3281 3282 #else // KMP_USE_DYNAMIC_LOCK 3283 3284 kmp_user_lock_p lck; 3285 int rc; 3286 3287 if ((__kmp_user_lock_kind == lk_tas) && 3288 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3289 lck = (kmp_user_lock_p)user_lock; 3290 } 3291 #if KMP_USE_FUTEX 3292 else if ((__kmp_user_lock_kind == lk_futex) && 3293 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3294 lck = (kmp_user_lock_p)user_lock; 3295 } 3296 #endif 3297 else { 3298 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3299 } 3300 3301 #if USE_ITT_BUILD 3302 __kmp_itt_lock_acquiring(lck); 3303 #endif /* USE_ITT_BUILD */ 3304 #if OMPT_SUPPORT && OMPT_OPTIONAL 3305 // This is the case, if called from omp_init_lock_with_hint: 3306 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3307 if (!codeptr) 3308 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3309 if (ompt_enabled.ompt_callback_mutex_acquire) { 3310 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3311 ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3312 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3313 } 3314 #endif 3315 3316 rc = TEST_LOCK(lck, gtid); 3317 #if USE_ITT_BUILD 3318 if (rc) { 3319 __kmp_itt_lock_acquired(lck); 3320 } else { 3321 __kmp_itt_lock_cancelled(lck); 3322 } 3323 #endif /* USE_ITT_BUILD */ 3324 #if OMPT_SUPPORT && OMPT_OPTIONAL 3325 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3326 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3327 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3328 } 3329 #endif 3330 3331 return (rc ? FTN_TRUE : FTN_FALSE); 3332 3333 /* Can't use serial interval since not block structured */ 3334 3335 #endif // KMP_USE_DYNAMIC_LOCK 3336 } 3337 3338 /* try to acquire the lock */ 3339 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3340 #if KMP_USE_DYNAMIC_LOCK 3341 int rc; 3342 #if USE_ITT_BUILD 3343 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3344 #endif 3345 #if OMPT_SUPPORT && OMPT_OPTIONAL 3346 // This is the case, if called from omp_init_lock_with_hint: 3347 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3348 if (!codeptr) 3349 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3350 if (ompt_enabled.ompt_callback_mutex_acquire) { 3351 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3352 ompt_mutex_test_nest_lock, omp_lock_hint_none, 3353 __ompt_get_mutex_impl_type(user_lock), 3354 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3355 } 3356 #endif 3357 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3358 #if USE_ITT_BUILD 3359 if (rc) { 3360 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3361 } else { 3362 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3363 } 3364 #endif 3365 #if OMPT_SUPPORT && OMPT_OPTIONAL 3366 if (ompt_enabled.enabled && rc) { 3367 if (rc == 1) { 3368 if (ompt_enabled.ompt_callback_mutex_acquired) { 3369 // lock_first 3370 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3371 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3372 codeptr); 3373 } 3374 } else { 3375 if (ompt_enabled.ompt_callback_nest_lock) { 3376 // lock_next 3377 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3378 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3379 } 3380 } 3381 } 3382 #endif 3383 return rc; 3384 3385 #else // KMP_USE_DYNAMIC_LOCK 3386 3387 kmp_user_lock_p lck; 3388 int rc; 3389 3390 if ((__kmp_user_lock_kind == lk_tas) && 3391 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3392 OMP_NEST_LOCK_T_SIZE)) { 3393 lck = (kmp_user_lock_p)user_lock; 3394 } 3395 #if KMP_USE_FUTEX 3396 else if ((__kmp_user_lock_kind == lk_futex) && 3397 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3398 OMP_NEST_LOCK_T_SIZE)) { 3399 lck = (kmp_user_lock_p)user_lock; 3400 } 3401 #endif 3402 else { 3403 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3404 } 3405 3406 #if USE_ITT_BUILD 3407 __kmp_itt_lock_acquiring(lck); 3408 #endif /* USE_ITT_BUILD */ 3409 3410 #if OMPT_SUPPORT && OMPT_OPTIONAL 3411 // This is the case, if called from omp_init_lock_with_hint: 3412 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3413 if (!codeptr) 3414 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3415 if (ompt_enabled.enabled) && 3416 ompt_enabled.ompt_callback_mutex_acquire) { 3417 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3418 ompt_mutex_test_nest_lock, omp_lock_hint_none, 3419 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3420 codeptr); 3421 } 3422 #endif 3423 3424 rc = TEST_NESTED_LOCK(lck, gtid); 3425 #if USE_ITT_BUILD 3426 if (rc) { 3427 __kmp_itt_lock_acquired(lck); 3428 } else { 3429 __kmp_itt_lock_cancelled(lck); 3430 } 3431 #endif /* USE_ITT_BUILD */ 3432 #if OMPT_SUPPORT && OMPT_OPTIONAL 3433 if (ompt_enabled.enabled && rc) { 3434 if (rc == 1) { 3435 if (ompt_enabled.ompt_callback_mutex_acquired) { 3436 // lock_first 3437 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3438 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3439 } 3440 } else { 3441 if (ompt_enabled.ompt_callback_nest_lock) { 3442 // lock_next 3443 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3444 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3445 } 3446 } 3447 } 3448 #endif 3449 return rc; 3450 3451 /* Can't use serial interval since not block structured */ 3452 3453 #endif // KMP_USE_DYNAMIC_LOCK 3454 } 3455 3456 // Interface to fast scalable reduce methods routines 3457 3458 // keep the selected method in a thread local structure for cross-function 3459 // usage: will be used in __kmpc_end_reduce* functions; 3460 // another solution: to re-determine the method one more time in 3461 // __kmpc_end_reduce* functions (new prototype required then) 3462 // AT: which solution is better? 3463 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3464 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3465 3466 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3467 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3468 3469 // description of the packed_reduction_method variable: look at the macros in 3470 // kmp.h 3471 3472 // used in a critical section reduce block 3473 static __forceinline void 3474 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3475 kmp_critical_name *crit) { 3476 3477 // this lock was visible to a customer and to the threading profile tool as a 3478 // serial overhead span (although it's used for an internal purpose only) 3479 // why was it visible in previous implementation? 3480 // should we keep it visible in new reduce block? 3481 kmp_user_lock_p lck; 3482 3483 #if KMP_USE_DYNAMIC_LOCK 3484 3485 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3486 // Check if it is initialized. 3487 if (*lk == 0) { 3488 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3489 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3490 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3491 } else { 3492 __kmp_init_indirect_csptr(crit, loc, global_tid, 3493 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3494 } 3495 } 3496 // Branch for accessing the actual lock object and set operation. This 3497 // branching is inevitable since this lock initialization does not follow the 3498 // normal dispatch path (lock table is not used). 3499 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3500 lck = (kmp_user_lock_p)lk; 3501 KMP_DEBUG_ASSERT(lck != NULL); 3502 if (__kmp_env_consistency_check) { 3503 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3504 } 3505 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3506 } else { 3507 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3508 lck = ilk->lock; 3509 KMP_DEBUG_ASSERT(lck != NULL); 3510 if (__kmp_env_consistency_check) { 3511 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3512 } 3513 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3514 } 3515 3516 #else // KMP_USE_DYNAMIC_LOCK 3517 3518 // We know that the fast reduction code is only emitted by Intel compilers 3519 // with 32 byte critical sections. If there isn't enough space, then we 3520 // have to use a pointer. 3521 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3522 lck = (kmp_user_lock_p)crit; 3523 } else { 3524 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3525 } 3526 KMP_DEBUG_ASSERT(lck != NULL); 3527 3528 if (__kmp_env_consistency_check) 3529 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3530 3531 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3532 3533 #endif // KMP_USE_DYNAMIC_LOCK 3534 } 3535 3536 // used in a critical section reduce block 3537 static __forceinline void 3538 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3539 kmp_critical_name *crit) { 3540 3541 kmp_user_lock_p lck; 3542 3543 #if KMP_USE_DYNAMIC_LOCK 3544 3545 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3546 lck = (kmp_user_lock_p)crit; 3547 if (__kmp_env_consistency_check) 3548 __kmp_pop_sync(global_tid, ct_critical, loc); 3549 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3550 } else { 3551 kmp_indirect_lock_t *ilk = 3552 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3553 if (__kmp_env_consistency_check) 3554 __kmp_pop_sync(global_tid, ct_critical, loc); 3555 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3556 } 3557 3558 #else // KMP_USE_DYNAMIC_LOCK 3559 3560 // We know that the fast reduction code is only emitted by Intel compilers 3561 // with 32 byte critical sections. If there isn't enough space, then we have 3562 // to use a pointer. 3563 if (__kmp_base_user_lock_size > 32) { 3564 lck = *((kmp_user_lock_p *)crit); 3565 KMP_ASSERT(lck != NULL); 3566 } else { 3567 lck = (kmp_user_lock_p)crit; 3568 } 3569 3570 if (__kmp_env_consistency_check) 3571 __kmp_pop_sync(global_tid, ct_critical, loc); 3572 3573 __kmp_release_user_lock_with_checks(lck, global_tid); 3574 3575 #endif // KMP_USE_DYNAMIC_LOCK 3576 } // __kmp_end_critical_section_reduce_block 3577 3578 static __forceinline int 3579 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3580 int *task_state) { 3581 kmp_team_t *team; 3582 3583 // Check if we are inside the teams construct? 3584 if (th->th.th_teams_microtask) { 3585 *team_p = team = th->th.th_team; 3586 if (team->t.t_level == th->th.th_teams_level) { 3587 // This is reduction at teams construct. 3588 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3589 // Let's swap teams temporarily for the reduction. 3590 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3591 th->th.th_team = team->t.t_parent; 3592 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3593 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3594 *task_state = th->th.th_task_state; 3595 th->th.th_task_state = 0; 3596 3597 return 1; 3598 } 3599 } 3600 return 0; 3601 } 3602 3603 static __forceinline void 3604 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3605 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3606 th->th.th_info.ds.ds_tid = 0; 3607 th->th.th_team = team; 3608 th->th.th_team_nproc = team->t.t_nproc; 3609 th->th.th_task_team = team->t.t_task_team[task_state]; 3610 __kmp_type_convert(task_state, &(th->th.th_task_state)); 3611 } 3612 3613 /* 2.a.i. Reduce Block without a terminating barrier */ 3614 /*! 3615 @ingroup SYNCHRONIZATION 3616 @param loc source location information 3617 @param global_tid global thread number 3618 @param num_vars number of items (variables) to be reduced 3619 @param reduce_size size of data in bytes to be reduced 3620 @param reduce_data pointer to data to be reduced 3621 @param reduce_func callback function providing reduction operation on two 3622 operands and returning result of reduction in lhs_data 3623 @param lck pointer to the unique lock data structure 3624 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3625 threads if atomic reduction needed 3626 3627 The nowait version is used for a reduce clause with the nowait argument. 3628 */ 3629 kmp_int32 3630 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3631 size_t reduce_size, void *reduce_data, 3632 void (*reduce_func)(void *lhs_data, void *rhs_data), 3633 kmp_critical_name *lck) { 3634 3635 KMP_COUNT_BLOCK(REDUCE_nowait); 3636 int retval = 0; 3637 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3638 kmp_info_t *th; 3639 kmp_team_t *team; 3640 int teams_swapped = 0, task_state; 3641 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3642 __kmp_assert_valid_gtid(global_tid); 3643 3644 // why do we need this initialization here at all? 3645 // Reduction clause can not be used as a stand-alone directive. 3646 3647 // do not call __kmp_serial_initialize(), it will be called by 3648 // __kmp_parallel_initialize() if needed 3649 // possible detection of false-positive race by the threadchecker ??? 3650 if (!TCR_4(__kmp_init_parallel)) 3651 __kmp_parallel_initialize(); 3652 3653 __kmp_resume_if_soft_paused(); 3654 3655 // check correctness of reduce block nesting 3656 #if KMP_USE_DYNAMIC_LOCK 3657 if (__kmp_env_consistency_check) 3658 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3659 #else 3660 if (__kmp_env_consistency_check) 3661 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3662 #endif 3663 3664 th = __kmp_thread_from_gtid(global_tid); 3665 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3666 3667 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3668 // the value should be kept in a variable 3669 // the variable should be either a construct-specific or thread-specific 3670 // property, not a team specific property 3671 // (a thread can reach the next reduce block on the next construct, reduce 3672 // method may differ on the next construct) 3673 // an ident_t "loc" parameter could be used as a construct-specific property 3674 // (what if loc == 0?) 3675 // (if both construct-specific and team-specific variables were shared, 3676 // then unness extra syncs should be needed) 3677 // a thread-specific variable is better regarding two issues above (next 3678 // construct and extra syncs) 3679 // a thread-specific "th_local.reduction_method" variable is used currently 3680 // each thread executes 'determine' and 'set' lines (no need to execute by one 3681 // thread, to avoid unness extra syncs) 3682 3683 packed_reduction_method = __kmp_determine_reduction_method( 3684 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3685 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3686 3687 OMPT_REDUCTION_DECL(th, global_tid); 3688 if (packed_reduction_method == critical_reduce_block) { 3689 3690 OMPT_REDUCTION_BEGIN; 3691 3692 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3693 retval = 1; 3694 3695 } else if (packed_reduction_method == empty_reduce_block) { 3696 3697 OMPT_REDUCTION_BEGIN; 3698 3699 // usage: if team size == 1, no synchronization is required ( Intel 3700 // platforms only ) 3701 retval = 1; 3702 3703 } else if (packed_reduction_method == atomic_reduce_block) { 3704 3705 retval = 2; 3706 3707 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3708 // won't be called by the code gen) 3709 // (it's not quite good, because the checking block has been closed by 3710 // this 'pop', 3711 // but atomic operation has not been executed yet, will be executed 3712 // slightly later, literally on next instruction) 3713 if (__kmp_env_consistency_check) 3714 __kmp_pop_sync(global_tid, ct_reduce, loc); 3715 3716 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3717 tree_reduce_block)) { 3718 3719 // AT: performance issue: a real barrier here 3720 // AT: (if primary thread is slow, other threads are blocked here waiting for 3721 // the primary thread to come and release them) 3722 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3723 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3724 // be confusing to a customer) 3725 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3726 // might go faster and be more in line with sense of NOWAIT 3727 // AT: TO DO: do epcc test and compare times 3728 3729 // this barrier should be invisible to a customer and to the threading profile 3730 // tool (it's neither a terminating barrier nor customer's code, it's 3731 // used for an internal purpose) 3732 #if OMPT_SUPPORT 3733 // JP: can this barrier potentially leed to task scheduling? 3734 // JP: as long as there is a barrier in the implementation, OMPT should and 3735 // will provide the barrier events 3736 // so we set-up the necessary frame/return addresses. 3737 ompt_frame_t *ompt_frame; 3738 if (ompt_enabled.enabled) { 3739 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3740 if (ompt_frame->enter_frame.ptr == NULL) 3741 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3742 } 3743 OMPT_STORE_RETURN_ADDRESS(global_tid); 3744 #endif 3745 #if USE_ITT_NOTIFY 3746 __kmp_threads[global_tid]->th.th_ident = loc; 3747 #endif 3748 retval = 3749 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3750 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3751 retval = (retval != 0) ? (0) : (1); 3752 #if OMPT_SUPPORT && OMPT_OPTIONAL 3753 if (ompt_enabled.enabled) { 3754 ompt_frame->enter_frame = ompt_data_none; 3755 } 3756 #endif 3757 3758 // all other workers except primary thread should do this pop here 3759 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3760 if (__kmp_env_consistency_check) { 3761 if (retval == 0) { 3762 __kmp_pop_sync(global_tid, ct_reduce, loc); 3763 } 3764 } 3765 3766 } else { 3767 3768 // should never reach this block 3769 KMP_ASSERT(0); // "unexpected method" 3770 } 3771 if (teams_swapped) { 3772 __kmp_restore_swapped_teams(th, team, task_state); 3773 } 3774 KA_TRACE( 3775 10, 3776 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3777 global_tid, packed_reduction_method, retval)); 3778 3779 return retval; 3780 } 3781 3782 /*! 3783 @ingroup SYNCHRONIZATION 3784 @param loc source location information 3785 @param global_tid global thread id. 3786 @param lck pointer to the unique lock data structure 3787 3788 Finish the execution of a reduce nowait. 3789 */ 3790 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3791 kmp_critical_name *lck) { 3792 3793 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3794 3795 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3796 __kmp_assert_valid_gtid(global_tid); 3797 3798 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3799 3800 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3801 3802 if (packed_reduction_method == critical_reduce_block) { 3803 3804 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3805 OMPT_REDUCTION_END; 3806 3807 } else if (packed_reduction_method == empty_reduce_block) { 3808 3809 // usage: if team size == 1, no synchronization is required ( on Intel 3810 // platforms only ) 3811 3812 OMPT_REDUCTION_END; 3813 3814 } else if (packed_reduction_method == atomic_reduce_block) { 3815 3816 // neither primary thread nor other workers should get here 3817 // (code gen does not generate this call in case 2: atomic reduce block) 3818 // actually it's better to remove this elseif at all; 3819 // after removal this value will checked by the 'else' and will assert 3820 3821 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3822 tree_reduce_block)) { 3823 3824 // only primary thread gets here 3825 // OMPT: tree reduction is annotated in the barrier code 3826 3827 } else { 3828 3829 // should never reach this block 3830 KMP_ASSERT(0); // "unexpected method" 3831 } 3832 3833 if (__kmp_env_consistency_check) 3834 __kmp_pop_sync(global_tid, ct_reduce, loc); 3835 3836 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3837 global_tid, packed_reduction_method)); 3838 3839 return; 3840 } 3841 3842 /* 2.a.ii. Reduce Block with a terminating barrier */ 3843 3844 /*! 3845 @ingroup SYNCHRONIZATION 3846 @param loc source location information 3847 @param global_tid global thread number 3848 @param num_vars number of items (variables) to be reduced 3849 @param reduce_size size of data in bytes to be reduced 3850 @param reduce_data pointer to data to be reduced 3851 @param reduce_func callback function providing reduction operation on two 3852 operands and returning result of reduction in lhs_data 3853 @param lck pointer to the unique lock data structure 3854 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3855 threads if atomic reduction needed 3856 3857 A blocking reduce that includes an implicit barrier. 3858 */ 3859 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3860 size_t reduce_size, void *reduce_data, 3861 void (*reduce_func)(void *lhs_data, void *rhs_data), 3862 kmp_critical_name *lck) { 3863 KMP_COUNT_BLOCK(REDUCE_wait); 3864 int retval = 0; 3865 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3866 kmp_info_t *th; 3867 kmp_team_t *team; 3868 int teams_swapped = 0, task_state; 3869 3870 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3871 __kmp_assert_valid_gtid(global_tid); 3872 3873 // why do we need this initialization here at all? 3874 // Reduction clause can not be a stand-alone directive. 3875 3876 // do not call __kmp_serial_initialize(), it will be called by 3877 // __kmp_parallel_initialize() if needed 3878 // possible detection of false-positive race by the threadchecker ??? 3879 if (!TCR_4(__kmp_init_parallel)) 3880 __kmp_parallel_initialize(); 3881 3882 __kmp_resume_if_soft_paused(); 3883 3884 // check correctness of reduce block nesting 3885 #if KMP_USE_DYNAMIC_LOCK 3886 if (__kmp_env_consistency_check) 3887 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3888 #else 3889 if (__kmp_env_consistency_check) 3890 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3891 #endif 3892 3893 th = __kmp_thread_from_gtid(global_tid); 3894 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3895 3896 packed_reduction_method = __kmp_determine_reduction_method( 3897 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3898 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3899 3900 OMPT_REDUCTION_DECL(th, global_tid); 3901 3902 if (packed_reduction_method == critical_reduce_block) { 3903 3904 OMPT_REDUCTION_BEGIN; 3905 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3906 retval = 1; 3907 3908 } else if (packed_reduction_method == empty_reduce_block) { 3909 3910 OMPT_REDUCTION_BEGIN; 3911 // usage: if team size == 1, no synchronization is required ( Intel 3912 // platforms only ) 3913 retval = 1; 3914 3915 } else if (packed_reduction_method == atomic_reduce_block) { 3916 3917 retval = 2; 3918 3919 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3920 tree_reduce_block)) { 3921 3922 // case tree_reduce_block: 3923 // this barrier should be visible to a customer and to the threading profile 3924 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3925 #if OMPT_SUPPORT 3926 ompt_frame_t *ompt_frame; 3927 if (ompt_enabled.enabled) { 3928 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3929 if (ompt_frame->enter_frame.ptr == NULL) 3930 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3931 } 3932 OMPT_STORE_RETURN_ADDRESS(global_tid); 3933 #endif 3934 #if USE_ITT_NOTIFY 3935 __kmp_threads[global_tid]->th.th_ident = 3936 loc; // needed for correct notification of frames 3937 #endif 3938 retval = 3939 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3940 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3941 retval = (retval != 0) ? (0) : (1); 3942 #if OMPT_SUPPORT && OMPT_OPTIONAL 3943 if (ompt_enabled.enabled) { 3944 ompt_frame->enter_frame = ompt_data_none; 3945 } 3946 #endif 3947 3948 // all other workers except primary thread should do this pop here 3949 // (none of other workers except primary will enter __kmpc_end_reduce()) 3950 if (__kmp_env_consistency_check) { 3951 if (retval == 0) { // 0: all other workers; 1: primary thread 3952 __kmp_pop_sync(global_tid, ct_reduce, loc); 3953 } 3954 } 3955 3956 } else { 3957 3958 // should never reach this block 3959 KMP_ASSERT(0); // "unexpected method" 3960 } 3961 if (teams_swapped) { 3962 __kmp_restore_swapped_teams(th, team, task_state); 3963 } 3964 3965 KA_TRACE(10, 3966 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3967 global_tid, packed_reduction_method, retval)); 3968 return retval; 3969 } 3970 3971 /*! 3972 @ingroup SYNCHRONIZATION 3973 @param loc source location information 3974 @param global_tid global thread id. 3975 @param lck pointer to the unique lock data structure 3976 3977 Finish the execution of a blocking reduce. 3978 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3979 start function. 3980 */ 3981 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3982 kmp_critical_name *lck) { 3983 3984 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3985 kmp_info_t *th; 3986 kmp_team_t *team; 3987 int teams_swapped = 0, task_state; 3988 3989 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3990 __kmp_assert_valid_gtid(global_tid); 3991 3992 th = __kmp_thread_from_gtid(global_tid); 3993 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3994 3995 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3996 3997 // this barrier should be visible to a customer and to the threading profile 3998 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3999 OMPT_REDUCTION_DECL(th, global_tid); 4000 4001 if (packed_reduction_method == critical_reduce_block) { 4002 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 4003 4004 OMPT_REDUCTION_END; 4005 4006 // TODO: implicit barrier: should be exposed 4007 #if OMPT_SUPPORT 4008 ompt_frame_t *ompt_frame; 4009 if (ompt_enabled.enabled) { 4010 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 4011 if (ompt_frame->enter_frame.ptr == NULL) 4012 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 4013 } 4014 OMPT_STORE_RETURN_ADDRESS(global_tid); 4015 #endif 4016 #if USE_ITT_NOTIFY 4017 __kmp_threads[global_tid]->th.th_ident = loc; 4018 #endif 4019 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 4020 #if OMPT_SUPPORT && OMPT_OPTIONAL 4021 if (ompt_enabled.enabled) { 4022 ompt_frame->enter_frame = ompt_data_none; 4023 } 4024 #endif 4025 4026 } else if (packed_reduction_method == empty_reduce_block) { 4027 4028 OMPT_REDUCTION_END; 4029 4030 // usage: if team size==1, no synchronization is required (Intel platforms only) 4031 4032 // TODO: implicit barrier: should be exposed 4033 #if OMPT_SUPPORT 4034 ompt_frame_t *ompt_frame; 4035 if (ompt_enabled.enabled) { 4036 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 4037 if (ompt_frame->enter_frame.ptr == NULL) 4038 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 4039 } 4040 OMPT_STORE_RETURN_ADDRESS(global_tid); 4041 #endif 4042 #if USE_ITT_NOTIFY 4043 __kmp_threads[global_tid]->th.th_ident = loc; 4044 #endif 4045 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 4046 #if OMPT_SUPPORT && OMPT_OPTIONAL 4047 if (ompt_enabled.enabled) { 4048 ompt_frame->enter_frame = ompt_data_none; 4049 } 4050 #endif 4051 4052 } else if (packed_reduction_method == atomic_reduce_block) { 4053 4054 #if OMPT_SUPPORT 4055 ompt_frame_t *ompt_frame; 4056 if (ompt_enabled.enabled) { 4057 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 4058 if (ompt_frame->enter_frame.ptr == NULL) 4059 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 4060 } 4061 OMPT_STORE_RETURN_ADDRESS(global_tid); 4062 #endif 4063 // TODO: implicit barrier: should be exposed 4064 #if USE_ITT_NOTIFY 4065 __kmp_threads[global_tid]->th.th_ident = loc; 4066 #endif 4067 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 4068 #if OMPT_SUPPORT && OMPT_OPTIONAL 4069 if (ompt_enabled.enabled) { 4070 ompt_frame->enter_frame = ompt_data_none; 4071 } 4072 #endif 4073 4074 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 4075 tree_reduce_block)) { 4076 4077 // only primary thread executes here (primary releases all other workers) 4078 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 4079 global_tid); 4080 4081 } else { 4082 4083 // should never reach this block 4084 KMP_ASSERT(0); // "unexpected method" 4085 } 4086 if (teams_swapped) { 4087 __kmp_restore_swapped_teams(th, team, task_state); 4088 } 4089 4090 if (__kmp_env_consistency_check) 4091 __kmp_pop_sync(global_tid, ct_reduce, loc); 4092 4093 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 4094 global_tid, packed_reduction_method)); 4095 4096 return; 4097 } 4098 4099 #undef __KMP_GET_REDUCTION_METHOD 4100 #undef __KMP_SET_REDUCTION_METHOD 4101 4102 /* end of interface to fast scalable reduce routines */ 4103 4104 kmp_uint64 __kmpc_get_taskid() { 4105 4106 kmp_int32 gtid; 4107 kmp_info_t *thread; 4108 4109 gtid = __kmp_get_gtid(); 4110 if (gtid < 0) { 4111 return 0; 4112 } 4113 thread = __kmp_thread_from_gtid(gtid); 4114 return thread->th.th_current_task->td_task_id; 4115 4116 } // __kmpc_get_taskid 4117 4118 kmp_uint64 __kmpc_get_parent_taskid() { 4119 4120 kmp_int32 gtid; 4121 kmp_info_t *thread; 4122 kmp_taskdata_t *parent_task; 4123 4124 gtid = __kmp_get_gtid(); 4125 if (gtid < 0) { 4126 return 0; 4127 } 4128 thread = __kmp_thread_from_gtid(gtid); 4129 parent_task = thread->th.th_current_task->td_parent; 4130 return (parent_task == NULL ? 0 : parent_task->td_task_id); 4131 4132 } // __kmpc_get_parent_taskid 4133 4134 /*! 4135 @ingroup WORK_SHARING 4136 @param loc source location information. 4137 @param gtid global thread number. 4138 @param num_dims number of associated doacross loops. 4139 @param dims info on loops bounds. 4140 4141 Initialize doacross loop information. 4142 Expect compiler send us inclusive bounds, 4143 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 4144 */ 4145 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 4146 const struct kmp_dim *dims) { 4147 __kmp_assert_valid_gtid(gtid); 4148 int j, idx; 4149 kmp_int64 last, trace_count; 4150 kmp_info_t *th = __kmp_threads[gtid]; 4151 kmp_team_t *team = th->th.th_team; 4152 kmp_uint32 *flags; 4153 kmp_disp_t *pr_buf = th->th.th_dispatch; 4154 dispatch_shared_info_t *sh_buf; 4155 4156 KA_TRACE( 4157 20, 4158 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 4159 gtid, num_dims, !team->t.t_serialized)); 4160 KMP_DEBUG_ASSERT(dims != NULL); 4161 KMP_DEBUG_ASSERT(num_dims > 0); 4162 4163 if (team->t.t_serialized) { 4164 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 4165 return; // no dependencies if team is serialized 4166 } 4167 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 4168 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 4169 // the next loop 4170 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4171 4172 // Save bounds info into allocated private buffer 4173 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 4174 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 4175 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 4176 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4177 pr_buf->th_doacross_info[0] = 4178 (kmp_int64)num_dims; // first element is number of dimensions 4179 // Save also address of num_done in order to access it later without knowing 4180 // the buffer index 4181 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 4182 pr_buf->th_doacross_info[2] = dims[0].lo; 4183 pr_buf->th_doacross_info[3] = dims[0].up; 4184 pr_buf->th_doacross_info[4] = dims[0].st; 4185 last = 5; 4186 for (j = 1; j < num_dims; ++j) { 4187 kmp_int64 4188 range_length; // To keep ranges of all dimensions but the first dims[0] 4189 if (dims[j].st == 1) { // most common case 4190 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 4191 range_length = dims[j].up - dims[j].lo + 1; 4192 } else { 4193 if (dims[j].st > 0) { 4194 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 4195 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 4196 } else { // negative increment 4197 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 4198 range_length = 4199 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 4200 } 4201 } 4202 pr_buf->th_doacross_info[last++] = range_length; 4203 pr_buf->th_doacross_info[last++] = dims[j].lo; 4204 pr_buf->th_doacross_info[last++] = dims[j].up; 4205 pr_buf->th_doacross_info[last++] = dims[j].st; 4206 } 4207 4208 // Compute total trip count. 4209 // Start with range of dims[0] which we don't need to keep in the buffer. 4210 if (dims[0].st == 1) { // most common case 4211 trace_count = dims[0].up - dims[0].lo + 1; 4212 } else if (dims[0].st > 0) { 4213 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 4214 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 4215 } else { // negative increment 4216 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 4217 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 4218 } 4219 for (j = 1; j < num_dims; ++j) { 4220 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 4221 } 4222 KMP_DEBUG_ASSERT(trace_count > 0); 4223 4224 // Check if shared buffer is not occupied by other loop (idx - 4225 // __kmp_dispatch_num_buffers) 4226 if (idx != sh_buf->doacross_buf_idx) { 4227 // Shared buffer is occupied, wait for it to be free 4228 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 4229 __kmp_eq_4, NULL); 4230 } 4231 #if KMP_32_BIT_ARCH 4232 // Check if we are the first thread. After the CAS the first thread gets 0, 4233 // others get 1 if initialization is in progress, allocated pointer otherwise. 4234 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 4235 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 4236 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 4237 #else 4238 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 4239 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 4240 #endif 4241 if (flags == NULL) { 4242 // we are the first thread, allocate the array of flags 4243 size_t size = 4244 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration 4245 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 4246 KMP_MB(); 4247 sh_buf->doacross_flags = flags; 4248 } else if (flags == (kmp_uint32 *)1) { 4249 #if KMP_32_BIT_ARCH 4250 // initialization is still in progress, need to wait 4251 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 4252 #else 4253 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 4254 #endif 4255 KMP_YIELD(TRUE); 4256 KMP_MB(); 4257 } else { 4258 KMP_MB(); 4259 } 4260 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4261 pr_buf->th_doacross_flags = 4262 sh_buf->doacross_flags; // save private copy in order to not 4263 // touch shared buffer on each iteration 4264 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4265 } 4266 4267 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4268 __kmp_assert_valid_gtid(gtid); 4269 kmp_int64 shft; 4270 size_t num_dims, i; 4271 kmp_uint32 flag; 4272 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4273 kmp_info_t *th = __kmp_threads[gtid]; 4274 kmp_team_t *team = th->th.th_team; 4275 kmp_disp_t *pr_buf; 4276 kmp_int64 lo, up, st; 4277 4278 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4279 if (team->t.t_serialized) { 4280 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4281 return; // no dependencies if team is serialized 4282 } 4283 4284 // calculate sequential iteration number and check out-of-bounds condition 4285 pr_buf = th->th.th_dispatch; 4286 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4287 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4288 lo = pr_buf->th_doacross_info[2]; 4289 up = pr_buf->th_doacross_info[3]; 4290 st = pr_buf->th_doacross_info[4]; 4291 #if OMPT_SUPPORT && OMPT_OPTIONAL 4292 SimpleVLA<ompt_dependence_t> deps(num_dims); 4293 #endif 4294 if (st == 1) { // most common case 4295 if (vec[0] < lo || vec[0] > up) { 4296 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4297 "bounds [%lld,%lld]\n", 4298 gtid, vec[0], lo, up)); 4299 return; 4300 } 4301 iter_number = vec[0] - lo; 4302 } else if (st > 0) { 4303 if (vec[0] < lo || vec[0] > up) { 4304 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4305 "bounds [%lld,%lld]\n", 4306 gtid, vec[0], lo, up)); 4307 return; 4308 } 4309 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4310 } else { // negative increment 4311 if (vec[0] > lo || vec[0] < up) { 4312 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4313 "bounds [%lld,%lld]\n", 4314 gtid, vec[0], lo, up)); 4315 return; 4316 } 4317 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4318 } 4319 #if OMPT_SUPPORT && OMPT_OPTIONAL 4320 deps[0].variable.value = iter_number; 4321 deps[0].dependence_type = ompt_dependence_type_sink; 4322 #endif 4323 for (i = 1; i < num_dims; ++i) { 4324 kmp_int64 iter, ln; 4325 size_t j = i * 4; 4326 ln = pr_buf->th_doacross_info[j + 1]; 4327 lo = pr_buf->th_doacross_info[j + 2]; 4328 up = pr_buf->th_doacross_info[j + 3]; 4329 st = pr_buf->th_doacross_info[j + 4]; 4330 if (st == 1) { 4331 if (vec[i] < lo || vec[i] > up) { 4332 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4333 "bounds [%lld,%lld]\n", 4334 gtid, vec[i], lo, up)); 4335 return; 4336 } 4337 iter = vec[i] - lo; 4338 } else if (st > 0) { 4339 if (vec[i] < lo || vec[i] > up) { 4340 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4341 "bounds [%lld,%lld]\n", 4342 gtid, vec[i], lo, up)); 4343 return; 4344 } 4345 iter = (kmp_uint64)(vec[i] - lo) / st; 4346 } else { // st < 0 4347 if (vec[i] > lo || vec[i] < up) { 4348 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4349 "bounds [%lld,%lld]\n", 4350 gtid, vec[i], lo, up)); 4351 return; 4352 } 4353 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4354 } 4355 iter_number = iter + ln * iter_number; 4356 #if OMPT_SUPPORT && OMPT_OPTIONAL 4357 deps[i].variable.value = iter; 4358 deps[i].dependence_type = ompt_dependence_type_sink; 4359 #endif 4360 } 4361 shft = iter_number % 32; // use 32-bit granularity 4362 iter_number >>= 5; // divided by 32 4363 flag = 1 << shft; 4364 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4365 KMP_YIELD(TRUE); 4366 } 4367 KMP_MB(); 4368 #if OMPT_SUPPORT && OMPT_OPTIONAL 4369 if (ompt_enabled.ompt_callback_dependences) { 4370 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4371 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4372 } 4373 #endif 4374 KA_TRACE(20, 4375 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4376 gtid, (iter_number << 5) + shft)); 4377 } 4378 4379 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4380 __kmp_assert_valid_gtid(gtid); 4381 kmp_int64 shft; 4382 size_t num_dims, i; 4383 kmp_uint32 flag; 4384 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4385 kmp_info_t *th = __kmp_threads[gtid]; 4386 kmp_team_t *team = th->th.th_team; 4387 kmp_disp_t *pr_buf; 4388 kmp_int64 lo, st; 4389 4390 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4391 if (team->t.t_serialized) { 4392 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4393 return; // no dependencies if team is serialized 4394 } 4395 4396 // calculate sequential iteration number (same as in "wait" but no 4397 // out-of-bounds checks) 4398 pr_buf = th->th.th_dispatch; 4399 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4400 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4401 lo = pr_buf->th_doacross_info[2]; 4402 st = pr_buf->th_doacross_info[4]; 4403 #if OMPT_SUPPORT && OMPT_OPTIONAL 4404 SimpleVLA<ompt_dependence_t> deps(num_dims); 4405 #endif 4406 if (st == 1) { // most common case 4407 iter_number = vec[0] - lo; 4408 } else if (st > 0) { 4409 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4410 } else { // negative increment 4411 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4412 } 4413 #if OMPT_SUPPORT && OMPT_OPTIONAL 4414 deps[0].variable.value = iter_number; 4415 deps[0].dependence_type = ompt_dependence_type_source; 4416 #endif 4417 for (i = 1; i < num_dims; ++i) { 4418 kmp_int64 iter, ln; 4419 size_t j = i * 4; 4420 ln = pr_buf->th_doacross_info[j + 1]; 4421 lo = pr_buf->th_doacross_info[j + 2]; 4422 st = pr_buf->th_doacross_info[j + 4]; 4423 if (st == 1) { 4424 iter = vec[i] - lo; 4425 } else if (st > 0) { 4426 iter = (kmp_uint64)(vec[i] - lo) / st; 4427 } else { // st < 0 4428 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4429 } 4430 iter_number = iter + ln * iter_number; 4431 #if OMPT_SUPPORT && OMPT_OPTIONAL 4432 deps[i].variable.value = iter; 4433 deps[i].dependence_type = ompt_dependence_type_source; 4434 #endif 4435 } 4436 #if OMPT_SUPPORT && OMPT_OPTIONAL 4437 if (ompt_enabled.ompt_callback_dependences) { 4438 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4439 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4440 } 4441 #endif 4442 shft = iter_number % 32; // use 32-bit granularity 4443 iter_number >>= 5; // divided by 32 4444 flag = 1 << shft; 4445 KMP_MB(); 4446 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4447 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4448 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4449 (iter_number << 5) + shft)); 4450 } 4451 4452 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4453 __kmp_assert_valid_gtid(gtid); 4454 kmp_int32 num_done; 4455 kmp_info_t *th = __kmp_threads[gtid]; 4456 kmp_team_t *team = th->th.th_team; 4457 kmp_disp_t *pr_buf = th->th.th_dispatch; 4458 4459 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4460 if (team->t.t_serialized) { 4461 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4462 return; // nothing to do 4463 } 4464 num_done = 4465 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1; 4466 if (num_done == th->th.th_team_nproc) { 4467 // we are the last thread, need to free shared resources 4468 int idx = pr_buf->th_doacross_buf_idx - 1; 4469 dispatch_shared_info_t *sh_buf = 4470 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4471 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4472 (kmp_int64)&sh_buf->doacross_num_done); 4473 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4474 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4475 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4476 sh_buf->doacross_flags = NULL; 4477 sh_buf->doacross_num_done = 0; 4478 sh_buf->doacross_buf_idx += 4479 __kmp_dispatch_num_buffers; // free buffer for future re-use 4480 } 4481 // free private resources (need to keep buffer index forever) 4482 pr_buf->th_doacross_flags = NULL; 4483 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4484 pr_buf->th_doacross_info = NULL; 4485 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4486 } 4487 4488 /* OpenMP 5.1 Memory Management routines */ 4489 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4490 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator); 4491 } 4492 4493 void *omp_aligned_alloc(size_t align, size_t size, 4494 omp_allocator_handle_t allocator) { 4495 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator); 4496 } 4497 4498 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { 4499 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator); 4500 } 4501 4502 void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size, 4503 omp_allocator_handle_t allocator) { 4504 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator); 4505 } 4506 4507 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, 4508 omp_allocator_handle_t free_allocator) { 4509 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator, 4510 free_allocator); 4511 } 4512 4513 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4514 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4515 } 4516 /* end of OpenMP 5.1 Memory Management routines */ 4517 4518 int __kmpc_get_target_offload(void) { 4519 if (!__kmp_init_serial) { 4520 __kmp_serial_initialize(); 4521 } 4522 return __kmp_target_offload; 4523 } 4524 4525 int __kmpc_pause_resource(kmp_pause_status_t level) { 4526 if (!__kmp_init_serial) { 4527 return 1; // Can't pause if runtime is not initialized 4528 } 4529 return __kmp_pause_resource(level); 4530 } 4531 4532 void __kmpc_error(ident_t *loc, int severity, const char *message) { 4533 if (!__kmp_init_serial) 4534 __kmp_serial_initialize(); 4535 4536 KMP_ASSERT(severity == severity_warning || severity == severity_fatal); 4537 4538 #if OMPT_SUPPORT 4539 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) { 4540 ompt_callbacks.ompt_callback(ompt_callback_error)( 4541 (ompt_severity_t)severity, message, KMP_STRLEN(message), 4542 OMPT_GET_RETURN_ADDRESS(0)); 4543 } 4544 #endif // OMPT_SUPPORT 4545 4546 char *src_loc; 4547 if (loc && loc->psource) { 4548 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false); 4549 src_loc = 4550 __kmp_str_format("%s:%d:%d", str_loc.file, str_loc.line, str_loc.col); 4551 __kmp_str_loc_free(&str_loc); 4552 } else { 4553 src_loc = __kmp_str_format("unknown"); 4554 } 4555 4556 if (severity == severity_warning) 4557 KMP_WARNING(UserDirectedWarning, src_loc, message); 4558 else 4559 KMP_FATAL(UserDirectedError, src_loc, message); 4560 4561 __kmp_str_free(&src_loc); 4562 } 4563 4564 // Mark begin of scope directive. 4565 void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { 4566 // reserved is for extension of scope directive and not used. 4567 #if OMPT_SUPPORT && OMPT_OPTIONAL 4568 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { 4569 kmp_team_t *team = __kmp_threads[gtid]->th.th_team; 4570 int tid = __kmp_tid_from_gtid(gtid); 4571 ompt_callbacks.ompt_callback(ompt_callback_work)( 4572 ompt_work_scope, ompt_scope_begin, 4573 &(team->t.ompt_team_info.parallel_data), 4574 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 4575 OMPT_GET_RETURN_ADDRESS(0)); 4576 } 4577 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 4578 } 4579 4580 // Mark end of scope directive 4581 void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { 4582 // reserved is for extension of scope directive and not used. 4583 #if OMPT_SUPPORT && OMPT_OPTIONAL 4584 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { 4585 kmp_team_t *team = __kmp_threads[gtid]->th.th_team; 4586 int tid = __kmp_tid_from_gtid(gtid); 4587 ompt_callbacks.ompt_callback(ompt_callback_work)( 4588 ompt_work_scope, ompt_scope_end, 4589 &(team->t.ompt_team_info.parallel_data), 4590 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 4591 OMPT_GET_RETURN_ADDRESS(0)); 4592 } 4593 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 4594 } 4595 4596 #ifdef KMP_USE_VERSION_SYMBOLS 4597 // For GOMP compatibility there are two versions of each omp_* API. 4598 // One is the plain C symbol and one is the Fortran symbol with an appended 4599 // underscore. When we implement a specific ompc_* version of an omp_* 4600 // function, we want the plain GOMP versioned symbol to alias the ompc_* version 4601 // instead of the Fortran versions in kmp_ftn_entry.h 4602 extern "C" { 4603 // Have to undef these from omp.h so they aren't translated into 4604 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below 4605 #ifdef omp_set_affinity_format 4606 #undef omp_set_affinity_format 4607 #endif 4608 #ifdef omp_get_affinity_format 4609 #undef omp_get_affinity_format 4610 #endif 4611 #ifdef omp_display_affinity 4612 #undef omp_display_affinity 4613 #endif 4614 #ifdef omp_capture_affinity 4615 #undef omp_capture_affinity 4616 #endif 4617 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50, 4618 "OMP_5.0"); 4619 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50, 4620 "OMP_5.0"); 4621 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50, 4622 "OMP_5.0"); 4623 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50, 4624 "OMP_5.0"); 4625 } // extern "C" 4626 #endif 4627