1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 __kmp_assign_root_init_mask(); 43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 44 } else if (__kmp_ignore_mppbeg() == FALSE) { 45 // By default __kmp_ignore_mppbeg() returns TRUE. 46 __kmp_internal_begin(); 47 KC_TRACE(10, ("__kmpc_begin: called\n")); 48 } 49 } 50 51 /*! 52 * @ingroup STARTUP_SHUTDOWN 53 * @param loc source location information 54 * 55 * Shutdown the runtime library. This is also optional, and even if called will 56 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 57 * zero. 58 */ 59 void __kmpc_end(ident_t *loc) { 60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 63 // returns FALSE and __kmpc_end() will unregister this root (it can cause 64 // library shut down). 65 if (__kmp_ignore_mppend() == FALSE) { 66 KC_TRACE(10, ("__kmpc_end: called\n")); 67 KA_TRACE(30, ("__kmpc_end\n")); 68 69 __kmp_internal_end_thread(-1); 70 } 71 #if KMP_OS_WINDOWS && OMPT_SUPPORT 72 // Normal exit process on Windows does not allow worker threads of the final 73 // parallel region to finish reporting their events, so shutting down the 74 // library here fixes the issue at least for the cases where __kmpc_end() is 75 // placed properly. 76 if (ompt_enabled.enabled) 77 __kmp_internal_end_library(__kmp_gtid_get_specific()); 78 #endif 79 } 80 81 /*! 82 @ingroup THREAD_STATES 83 @param loc Source location information. 84 @return The global thread index of the active thread. 85 86 This function can be called in any context. 87 88 If the runtime has ony been entered at the outermost level from a 89 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 90 that which would be returned by omp_get_thread_num() in the outermost 91 active parallel construct. (Or zero if there is no active parallel 92 construct, since the primary thread is necessarily thread zero). 93 94 If multiple non-OpenMP threads all enter an OpenMP construct then this 95 will be a unique thread identifier among all the threads created by 96 the OpenMP runtime (but the value cannot be defined in terms of 97 OpenMP thread ids returned by omp_get_thread_num()). 98 */ 99 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 100 kmp_int32 gtid = __kmp_entry_gtid(); 101 102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 103 104 return gtid; 105 } 106 107 /*! 108 @ingroup THREAD_STATES 109 @param loc Source location information. 110 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 111 112 This function can be called in any context. 113 It returns the total number of threads under the control of the OpenMP runtime. 114 That is not a number that can be determined by any OpenMP standard calls, since 115 the library may be called from more than one non-OpenMP thread, and this 116 reflects the total over all such calls. Similarly the runtime maintains 117 underlying threads even when they are not active (since the cost of creating 118 and destroying OS threads is high), this call counts all such threads even if 119 they are not waiting for work. 120 */ 121 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 122 KC_TRACE(10, 123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 124 125 return TCR_4(__kmp_all_nth); 126 } 127 128 /*! 129 @ingroup THREAD_STATES 130 @param loc Source location information. 131 @return The thread number of the calling thread in the innermost active parallel 132 construct. 133 */ 134 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 136 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 137 } 138 139 /*! 140 @ingroup THREAD_STATES 141 @param loc Source location information. 142 @return The number of threads in the innermost active parallel construct. 143 */ 144 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 146 147 return __kmp_entry_thread()->th.th_team->t.t_nproc; 148 } 149 150 /*! 151 * @ingroup DEPRECATED 152 * @param loc location description 153 * 154 * This function need not be called. It always returns TRUE. 155 */ 156 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 157 #ifndef KMP_DEBUG 158 159 return TRUE; 160 161 #else 162 163 const char *semi2; 164 const char *semi3; 165 int line_no; 166 167 if (__kmp_par_range == 0) { 168 return TRUE; 169 } 170 semi2 = loc->psource; 171 if (semi2 == NULL) { 172 return TRUE; 173 } 174 semi2 = strchr(semi2, ';'); 175 if (semi2 == NULL) { 176 return TRUE; 177 } 178 semi2 = strchr(semi2 + 1, ';'); 179 if (semi2 == NULL) { 180 return TRUE; 181 } 182 if (__kmp_par_range_filename[0]) { 183 const char *name = semi2 - 1; 184 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 185 name--; 186 } 187 if ((*name == '/') || (*name == ';')) { 188 name++; 189 } 190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 191 return __kmp_par_range < 0; 192 } 193 } 194 semi3 = strchr(semi2 + 1, ';'); 195 if (__kmp_par_range_routine[0]) { 196 if ((semi3 != NULL) && (semi3 > semi2) && 197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 198 return __kmp_par_range < 0; 199 } 200 } 201 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 203 return __kmp_par_range > 0; 204 } 205 return __kmp_par_range < 0; 206 } 207 return TRUE; 208 209 #endif /* KMP_DEBUG */ 210 } 211 212 /*! 213 @ingroup THREAD_STATES 214 @param loc Source location information. 215 @return 1 if this thread is executing inside an active parallel region, zero if 216 not. 217 */ 218 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 219 return __kmp_entry_thread()->th.th_root->r.r_active; 220 } 221 222 /*! 223 @ingroup PARALLEL 224 @param loc source location information 225 @param global_tid global thread number 226 @param num_threads number of threads requested for this parallel construct 227 228 Set the number of threads to be used by the next fork spawned by this thread. 229 This call is only required if the parallel construct has a `num_threads` clause. 230 */ 231 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 232 kmp_int32 num_threads) { 233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 234 global_tid, num_threads)); 235 __kmp_assert_valid_gtid(global_tid); 236 __kmp_push_num_threads(loc, global_tid, num_threads); 237 } 238 239 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 241 /* the num_threads are automatically popped */ 242 } 243 244 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 245 kmp_int32 proc_bind) { 246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 247 proc_bind)); 248 __kmp_assert_valid_gtid(global_tid); 249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 250 } 251 252 /*! 253 @ingroup PARALLEL 254 @param loc source location information 255 @param argc total number of arguments in the ellipsis 256 @param microtask pointer to callback routine consisting of outlined parallel 257 construct 258 @param ... pointers to shared variables that aren't global 259 260 Do the actual fork and call the microtask in the relevant number of threads. 261 */ 262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 263 int gtid = __kmp_entry_gtid(); 264 265 #if (KMP_STATS_ENABLED) 266 // If we were in a serial region, then stop the serial timer, record 267 // the event, and start parallel region timer 268 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 269 if (previous_state == stats_state_e::SERIAL_REGION) { 270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 271 } else { 272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 273 } 274 int inParallel = __kmpc_in_parallel(loc); 275 if (inParallel) { 276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 277 } else { 278 KMP_COUNT_BLOCK(OMP_PARALLEL); 279 } 280 #endif 281 282 // maybe to save thr_state is enough here 283 { 284 va_list ap; 285 va_start(ap, microtask); 286 287 #if OMPT_SUPPORT 288 ompt_frame_t *ompt_frame; 289 if (ompt_enabled.enabled) { 290 kmp_info_t *master_th = __kmp_threads[gtid]; 291 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame; 292 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 293 } 294 OMPT_STORE_RETURN_ADDRESS(gtid); 295 #endif 296 297 #if INCLUDE_SSC_MARKS 298 SSC_MARK_FORKING(); 299 #endif 300 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 301 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 302 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 303 kmp_va_addr_of(ap)); 304 #if INCLUDE_SSC_MARKS 305 SSC_MARK_JOINING(); 306 #endif 307 __kmp_join_call(loc, gtid 308 #if OMPT_SUPPORT 309 , 310 fork_context_intel 311 #endif 312 ); 313 314 va_end(ap); 315 316 #if OMPT_SUPPORT 317 if (ompt_enabled.enabled) { 318 ompt_frame->enter_frame = ompt_data_none; 319 } 320 #endif 321 } 322 323 #if KMP_STATS_ENABLED 324 if (previous_state == stats_state_e::SERIAL_REGION) { 325 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 326 KMP_SET_THREAD_STATE(previous_state); 327 } else { 328 KMP_POP_PARTITIONED_TIMER(); 329 } 330 #endif // KMP_STATS_ENABLED 331 } 332 333 /*! 334 @ingroup PARALLEL 335 @param loc source location information 336 @param microtask pointer to callback routine consisting of outlined parallel 337 construct 338 @param cond condition for running in parallel 339 @param args struct of pointers to shared variables that aren't global 340 341 Perform a fork only if the condition is true. 342 */ 343 void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 344 kmp_int32 cond, void *args) { 345 int gtid = __kmp_entry_gtid(); 346 if (cond) { 347 if (args) 348 __kmpc_fork_call(loc, argc, microtask, args); 349 else 350 __kmpc_fork_call(loc, argc, microtask); 351 } else { 352 __kmpc_serialized_parallel(loc, gtid); 353 354 #if OMPT_SUPPORT 355 void *exit_frame_ptr; 356 #endif 357 358 if (args) 359 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid, 360 /*npr=*/0, 361 /*argc=*/1, &args 362 #if OMPT_SUPPORT 363 , 364 &exit_frame_ptr 365 #endif 366 ); 367 else 368 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid, 369 /*npr=*/0, 370 /*argc=*/0, 371 /*args=*/nullptr 372 #if OMPT_SUPPORT 373 , 374 &exit_frame_ptr 375 #endif 376 ); 377 378 __kmpc_end_serialized_parallel(loc, gtid); 379 } 380 } 381 382 /*! 383 @ingroup PARALLEL 384 @param loc source location information 385 @param global_tid global thread number 386 @param num_teams number of teams requested for the teams construct 387 @param num_threads number of threads per team requested for the teams construct 388 389 Set the number of teams to be used by the teams construct. 390 This call is only required if the teams construct has a `num_teams` clause 391 or a `thread_limit` clause (or both). 392 */ 393 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 394 kmp_int32 num_teams, kmp_int32 num_threads) { 395 KA_TRACE(20, 396 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 397 global_tid, num_teams, num_threads)); 398 __kmp_assert_valid_gtid(global_tid); 399 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 400 } 401 402 /*! 403 @ingroup PARALLEL 404 @param loc source location information 405 @param global_tid global thread number 406 @param thread_limit limit on number of threads which can be created within the 407 current task 408 409 Set the thread_limit for the current task 410 This call is there to support `thread_limit` clause on the `target` construct 411 */ 412 void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, 413 kmp_int32 thread_limit) { 414 __kmp_assert_valid_gtid(global_tid); 415 kmp_info_t *thread = __kmp_threads[global_tid]; 416 if (thread_limit > 0) 417 thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit; 418 } 419 420 /*! 421 @ingroup PARALLEL 422 @param loc source location information 423 @param global_tid global thread number 424 @param num_teams_lb lower bound on number of teams requested for the teams 425 construct 426 @param num_teams_ub upper bound on number of teams requested for the teams 427 construct 428 @param num_threads number of threads per team requested for the teams construct 429 430 Set the number of teams to be used by the teams construct. The number of initial 431 teams cretaed will be greater than or equal to the lower bound and less than or 432 equal to the upper bound. 433 This call is only required if the teams construct has a `num_teams` clause 434 or a `thread_limit` clause (or both). 435 */ 436 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, 437 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, 438 kmp_int32 num_threads) { 439 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d" 440 " num_teams_ub=%d num_threads=%d\n", 441 global_tid, num_teams_lb, num_teams_ub, num_threads)); 442 __kmp_assert_valid_gtid(global_tid); 443 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub, 444 num_threads); 445 } 446 447 /*! 448 @ingroup PARALLEL 449 @param loc source location information 450 @param argc total number of arguments in the ellipsis 451 @param microtask pointer to callback routine consisting of outlined teams 452 construct 453 @param ... pointers to shared variables that aren't global 454 455 Do the actual fork and call the microtask in the relevant number of threads. 456 */ 457 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 458 ...) { 459 int gtid = __kmp_entry_gtid(); 460 kmp_info_t *this_thr = __kmp_threads[gtid]; 461 va_list ap; 462 va_start(ap, microtask); 463 464 #if KMP_STATS_ENABLED 465 KMP_COUNT_BLOCK(OMP_TEAMS); 466 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 467 if (previous_state == stats_state_e::SERIAL_REGION) { 468 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 469 } else { 470 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 471 } 472 #endif 473 474 // remember teams entry point and nesting level 475 this_thr->th.th_teams_microtask = microtask; 476 this_thr->th.th_teams_level = 477 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 478 479 #if OMPT_SUPPORT 480 kmp_team_t *parent_team = this_thr->th.th_team; 481 int tid = __kmp_tid_from_gtid(gtid); 482 if (ompt_enabled.enabled) { 483 parent_team->t.t_implicit_task_taskdata[tid] 484 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 485 } 486 OMPT_STORE_RETURN_ADDRESS(gtid); 487 #endif 488 489 // check if __kmpc_push_num_teams called, set default number of teams 490 // otherwise 491 if (this_thr->th.th_teams_size.nteams == 0) { 492 __kmp_push_num_teams(loc, gtid, 0, 0); 493 } 494 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 495 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 496 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 497 498 __kmp_fork_call( 499 loc, gtid, fork_context_intel, argc, 500 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 501 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 502 __kmp_join_call(loc, gtid 503 #if OMPT_SUPPORT 504 , 505 fork_context_intel 506 #endif 507 ); 508 509 // Pop current CG root off list 510 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 511 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 512 this_thr->th.th_cg_roots = tmp->up; 513 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 514 " to node %p. cg_nthreads was %d\n", 515 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 516 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 517 int i = tmp->cg_nthreads--; 518 if (i == 1) { // check is we are the last thread in CG (not always the case) 519 __kmp_free(tmp); 520 } 521 // Restore current task's thread_limit from CG root 522 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 523 this_thr->th.th_current_task->td_icvs.thread_limit = 524 this_thr->th.th_cg_roots->cg_thread_limit; 525 526 this_thr->th.th_teams_microtask = NULL; 527 this_thr->th.th_teams_level = 0; 528 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 529 va_end(ap); 530 #if KMP_STATS_ENABLED 531 if (previous_state == stats_state_e::SERIAL_REGION) { 532 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 533 KMP_SET_THREAD_STATE(previous_state); 534 } else { 535 KMP_POP_PARTITIONED_TIMER(); 536 } 537 #endif // KMP_STATS_ENABLED 538 } 539 540 // I don't think this function should ever have been exported. 541 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 542 // openmp code ever called it, but it's been exported from the RTL for so 543 // long that I'm afraid to remove the definition. 544 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 545 546 /*! 547 @ingroup PARALLEL 548 @param loc source location information 549 @param global_tid global thread number 550 551 Enter a serialized parallel construct. This interface is used to handle a 552 conditional parallel region, like this, 553 @code 554 #pragma omp parallel if (condition) 555 @endcode 556 when the condition is false. 557 */ 558 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 559 // The implementation is now in kmp_runtime.cpp so that it can share static 560 // functions with kmp_fork_call since the tasks to be done are similar in 561 // each case. 562 __kmp_assert_valid_gtid(global_tid); 563 #if OMPT_SUPPORT 564 OMPT_STORE_RETURN_ADDRESS(global_tid); 565 #endif 566 __kmp_serialized_parallel(loc, global_tid); 567 } 568 569 /*! 570 @ingroup PARALLEL 571 @param loc source location information 572 @param global_tid global thread number 573 574 Leave a serialized parallel construct. 575 */ 576 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 577 kmp_internal_control_t *top; 578 kmp_info_t *this_thr; 579 kmp_team_t *serial_team; 580 581 KC_TRACE(10, 582 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 583 584 /* skip all this code for autopar serialized loops since it results in 585 unacceptable overhead */ 586 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 587 return; 588 589 // Not autopar code 590 __kmp_assert_valid_gtid(global_tid); 591 if (!TCR_4(__kmp_init_parallel)) 592 __kmp_parallel_initialize(); 593 594 __kmp_resume_if_soft_paused(); 595 596 this_thr = __kmp_threads[global_tid]; 597 serial_team = this_thr->th.th_serial_team; 598 599 kmp_task_team_t *task_team = this_thr->th.th_task_team; 600 // we need to wait for the proxy tasks before finishing the thread 601 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks || 602 task_team->tt.tt_hidden_helper_task_encountered)) 603 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 604 605 KMP_MB(); 606 KMP_DEBUG_ASSERT(serial_team); 607 KMP_ASSERT(serial_team->t.t_serialized); 608 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 609 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 610 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 611 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 612 613 #if OMPT_SUPPORT 614 if (ompt_enabled.enabled && 615 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 616 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 617 if (ompt_enabled.ompt_callback_implicit_task) { 618 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 619 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 620 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 621 } 622 623 // reset clear the task id only after unlinking the task 624 ompt_data_t *parent_task_data; 625 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 626 627 if (ompt_enabled.ompt_callback_parallel_end) { 628 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 629 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 630 ompt_parallel_invoker_program | ompt_parallel_team, 631 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 632 } 633 __ompt_lw_taskteam_unlink(this_thr); 634 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 635 } 636 #endif 637 638 /* If necessary, pop the internal control stack values and replace the team 639 * values */ 640 top = serial_team->t.t_control_stack_top; 641 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 642 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 643 serial_team->t.t_control_stack_top = top->next; 644 __kmp_free(top); 645 } 646 647 /* pop dispatch buffers stack */ 648 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 649 { 650 dispatch_private_info_t *disp_buffer = 651 serial_team->t.t_dispatch->th_disp_buffer; 652 serial_team->t.t_dispatch->th_disp_buffer = 653 serial_team->t.t_dispatch->th_disp_buffer->next; 654 __kmp_free(disp_buffer); 655 } 656 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 657 658 --serial_team->t.t_serialized; 659 if (serial_team->t.t_serialized == 0) { 660 661 /* return to the parallel section */ 662 663 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 664 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 665 __kmp_clear_x87_fpu_status_word(); 666 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 667 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 668 } 669 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 670 671 __kmp_pop_current_task_from_thread(this_thr); 672 #if OMPD_SUPPORT 673 if (ompd_state & OMPD_ENABLE_BP) 674 ompd_bp_parallel_end(); 675 #endif 676 677 this_thr->th.th_team = serial_team->t.t_parent; 678 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 679 680 /* restore values cached in the thread */ 681 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 682 this_thr->th.th_team_master = 683 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 684 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 685 686 /* TODO the below shouldn't need to be adjusted for serialized teams */ 687 this_thr->th.th_dispatch = 688 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 689 690 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 691 this_thr->th.th_current_task->td_flags.executing = 1; 692 693 if (__kmp_tasking_mode != tskm_immediate_exec) { 694 // Copy the task team from the new child / old parent team to the thread. 695 this_thr->th.th_task_team = 696 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 697 KA_TRACE(20, 698 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 699 "team %p\n", 700 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 701 } 702 #if KMP_AFFINITY_SUPPORTED 703 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) { 704 __kmp_reset_root_init_mask(global_tid); 705 } 706 #endif 707 } else { 708 if (__kmp_tasking_mode != tskm_immediate_exec) { 709 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 710 "depth of serial team %p to %d\n", 711 global_tid, serial_team, serial_team->t.t_serialized)); 712 } 713 } 714 715 serial_team->t.t_level--; 716 if (__kmp_env_consistency_check) 717 __kmp_pop_parallel(global_tid, NULL); 718 #if OMPT_SUPPORT 719 if (ompt_enabled.enabled) 720 this_thr->th.ompt_thread_info.state = 721 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 722 : ompt_state_work_parallel); 723 #endif 724 } 725 726 /*! 727 @ingroup SYNCHRONIZATION 728 @param loc source location information. 729 730 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 731 depending on the memory ordering convention obeyed by the compiler 732 even that may not be necessary). 733 */ 734 void __kmpc_flush(ident_t *loc) { 735 KC_TRACE(10, ("__kmpc_flush: called\n")); 736 737 /* need explicit __mf() here since use volatile instead in library */ 738 KMP_MFENCE(); /* Flush all pending memory write invalidates. */ 739 740 #if OMPT_SUPPORT && OMPT_OPTIONAL 741 if (ompt_enabled.ompt_callback_flush) { 742 ompt_callbacks.ompt_callback(ompt_callback_flush)( 743 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 744 } 745 #endif 746 } 747 748 /* -------------------------------------------------------------------------- */ 749 /*! 750 @ingroup SYNCHRONIZATION 751 @param loc source location information 752 @param global_tid thread id. 753 754 Execute a barrier. 755 */ 756 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 757 KMP_COUNT_BLOCK(OMP_BARRIER); 758 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 759 __kmp_assert_valid_gtid(global_tid); 760 761 if (!TCR_4(__kmp_init_parallel)) 762 __kmp_parallel_initialize(); 763 764 __kmp_resume_if_soft_paused(); 765 766 if (__kmp_env_consistency_check) { 767 if (loc == 0) { 768 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 769 } 770 __kmp_check_barrier(global_tid, ct_barrier, loc); 771 } 772 773 #if OMPT_SUPPORT 774 ompt_frame_t *ompt_frame; 775 if (ompt_enabled.enabled) { 776 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 777 if (ompt_frame->enter_frame.ptr == NULL) 778 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 779 } 780 OMPT_STORE_RETURN_ADDRESS(global_tid); 781 #endif 782 __kmp_threads[global_tid]->th.th_ident = loc; 783 // TODO: explicit barrier_wait_id: 784 // this function is called when 'barrier' directive is present or 785 // implicit barrier at the end of a worksharing construct. 786 // 1) better to add a per-thread barrier counter to a thread data structure 787 // 2) set to 0 when a new team is created 788 // 4) no sync is required 789 790 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 791 #if OMPT_SUPPORT && OMPT_OPTIONAL 792 if (ompt_enabled.enabled) { 793 ompt_frame->enter_frame = ompt_data_none; 794 } 795 #endif 796 } 797 798 /* The BARRIER for a MASTER section is always explicit */ 799 /*! 800 @ingroup WORK_SHARING 801 @param loc source location information. 802 @param global_tid global thread number . 803 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 804 */ 805 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 806 int status = 0; 807 808 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 809 __kmp_assert_valid_gtid(global_tid); 810 811 if (!TCR_4(__kmp_init_parallel)) 812 __kmp_parallel_initialize(); 813 814 __kmp_resume_if_soft_paused(); 815 816 if (KMP_MASTER_GTID(global_tid)) { 817 KMP_COUNT_BLOCK(OMP_MASTER); 818 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 819 status = 1; 820 } 821 822 #if OMPT_SUPPORT && OMPT_OPTIONAL 823 if (status) { 824 if (ompt_enabled.ompt_callback_masked) { 825 kmp_info_t *this_thr = __kmp_threads[global_tid]; 826 kmp_team_t *team = this_thr->th.th_team; 827 828 int tid = __kmp_tid_from_gtid(global_tid); 829 ompt_callbacks.ompt_callback(ompt_callback_masked)( 830 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 831 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 832 OMPT_GET_RETURN_ADDRESS(0)); 833 } 834 } 835 #endif 836 837 if (__kmp_env_consistency_check) { 838 #if KMP_USE_DYNAMIC_LOCK 839 if (status) 840 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 841 else 842 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 843 #else 844 if (status) 845 __kmp_push_sync(global_tid, ct_master, loc, NULL); 846 else 847 __kmp_check_sync(global_tid, ct_master, loc, NULL); 848 #endif 849 } 850 851 return status; 852 } 853 854 /*! 855 @ingroup WORK_SHARING 856 @param loc source location information. 857 @param global_tid global thread number . 858 859 Mark the end of a <tt>master</tt> region. This should only be called by the 860 thread that executes the <tt>master</tt> region. 861 */ 862 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 863 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 864 __kmp_assert_valid_gtid(global_tid); 865 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 866 KMP_POP_PARTITIONED_TIMER(); 867 868 #if OMPT_SUPPORT && OMPT_OPTIONAL 869 kmp_info_t *this_thr = __kmp_threads[global_tid]; 870 kmp_team_t *team = this_thr->th.th_team; 871 if (ompt_enabled.ompt_callback_masked) { 872 int tid = __kmp_tid_from_gtid(global_tid); 873 ompt_callbacks.ompt_callback(ompt_callback_masked)( 874 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 875 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 876 OMPT_GET_RETURN_ADDRESS(0)); 877 } 878 #endif 879 880 if (__kmp_env_consistency_check) { 881 if (KMP_MASTER_GTID(global_tid)) 882 __kmp_pop_sync(global_tid, ct_master, loc); 883 } 884 } 885 886 /*! 887 @ingroup WORK_SHARING 888 @param loc source location information. 889 @param global_tid global thread number. 890 @param filter result of evaluating filter clause on thread global_tid, or zero 891 if no filter clause present 892 @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise. 893 */ 894 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) { 895 int status = 0; 896 int tid; 897 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid)); 898 __kmp_assert_valid_gtid(global_tid); 899 900 if (!TCR_4(__kmp_init_parallel)) 901 __kmp_parallel_initialize(); 902 903 __kmp_resume_if_soft_paused(); 904 905 tid = __kmp_tid_from_gtid(global_tid); 906 if (tid == filter) { 907 KMP_COUNT_BLOCK(OMP_MASKED); 908 KMP_PUSH_PARTITIONED_TIMER(OMP_masked); 909 status = 1; 910 } 911 912 #if OMPT_SUPPORT && OMPT_OPTIONAL 913 if (status) { 914 if (ompt_enabled.ompt_callback_masked) { 915 kmp_info_t *this_thr = __kmp_threads[global_tid]; 916 kmp_team_t *team = this_thr->th.th_team; 917 ompt_callbacks.ompt_callback(ompt_callback_masked)( 918 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 919 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 920 OMPT_GET_RETURN_ADDRESS(0)); 921 } 922 } 923 #endif 924 925 if (__kmp_env_consistency_check) { 926 #if KMP_USE_DYNAMIC_LOCK 927 if (status) 928 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0); 929 else 930 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0); 931 #else 932 if (status) 933 __kmp_push_sync(global_tid, ct_masked, loc, NULL); 934 else 935 __kmp_check_sync(global_tid, ct_masked, loc, NULL); 936 #endif 937 } 938 939 return status; 940 } 941 942 /*! 943 @ingroup WORK_SHARING 944 @param loc source location information. 945 @param global_tid global thread number . 946 947 Mark the end of a <tt>masked</tt> region. This should only be called by the 948 thread that executes the <tt>masked</tt> region. 949 */ 950 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) { 951 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid)); 952 __kmp_assert_valid_gtid(global_tid); 953 KMP_POP_PARTITIONED_TIMER(); 954 955 #if OMPT_SUPPORT && OMPT_OPTIONAL 956 kmp_info_t *this_thr = __kmp_threads[global_tid]; 957 kmp_team_t *team = this_thr->th.th_team; 958 if (ompt_enabled.ompt_callback_masked) { 959 int tid = __kmp_tid_from_gtid(global_tid); 960 ompt_callbacks.ompt_callback(ompt_callback_masked)( 961 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 962 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 963 OMPT_GET_RETURN_ADDRESS(0)); 964 } 965 #endif 966 967 if (__kmp_env_consistency_check) { 968 __kmp_pop_sync(global_tid, ct_masked, loc); 969 } 970 } 971 972 /*! 973 @ingroup WORK_SHARING 974 @param loc source location information. 975 @param gtid global thread number. 976 977 Start execution of an <tt>ordered</tt> construct. 978 */ 979 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 980 int cid = 0; 981 kmp_info_t *th; 982 KMP_DEBUG_ASSERT(__kmp_init_serial); 983 984 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 985 __kmp_assert_valid_gtid(gtid); 986 987 if (!TCR_4(__kmp_init_parallel)) 988 __kmp_parallel_initialize(); 989 990 __kmp_resume_if_soft_paused(); 991 992 #if USE_ITT_BUILD 993 __kmp_itt_ordered_prep(gtid); 994 // TODO: ordered_wait_id 995 #endif /* USE_ITT_BUILD */ 996 997 th = __kmp_threads[gtid]; 998 999 #if OMPT_SUPPORT && OMPT_OPTIONAL 1000 kmp_team_t *team; 1001 ompt_wait_id_t lck; 1002 void *codeptr_ra; 1003 OMPT_STORE_RETURN_ADDRESS(gtid); 1004 if (ompt_enabled.enabled) { 1005 team = __kmp_team_from_gtid(gtid); 1006 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 1007 /* OMPT state update */ 1008 th->th.ompt_thread_info.wait_id = lck; 1009 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 1010 1011 /* OMPT event callback */ 1012 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1013 if (ompt_enabled.ompt_callback_mutex_acquire) { 1014 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1015 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 1016 codeptr_ra); 1017 } 1018 } 1019 #endif 1020 1021 if (th->th.th_dispatch->th_deo_fcn != 0) 1022 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 1023 else 1024 __kmp_parallel_deo(>id, &cid, loc); 1025 1026 #if OMPT_SUPPORT && OMPT_OPTIONAL 1027 if (ompt_enabled.enabled) { 1028 /* OMPT state update */ 1029 th->th.ompt_thread_info.state = ompt_state_work_parallel; 1030 th->th.ompt_thread_info.wait_id = 0; 1031 1032 /* OMPT event callback */ 1033 if (ompt_enabled.ompt_callback_mutex_acquired) { 1034 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1035 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1036 } 1037 } 1038 #endif 1039 1040 #if USE_ITT_BUILD 1041 __kmp_itt_ordered_start(gtid); 1042 #endif /* USE_ITT_BUILD */ 1043 } 1044 1045 /*! 1046 @ingroup WORK_SHARING 1047 @param loc source location information. 1048 @param gtid global thread number. 1049 1050 End execution of an <tt>ordered</tt> construct. 1051 */ 1052 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 1053 int cid = 0; 1054 kmp_info_t *th; 1055 1056 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 1057 __kmp_assert_valid_gtid(gtid); 1058 1059 #if USE_ITT_BUILD 1060 __kmp_itt_ordered_end(gtid); 1061 // TODO: ordered_wait_id 1062 #endif /* USE_ITT_BUILD */ 1063 1064 th = __kmp_threads[gtid]; 1065 1066 if (th->th.th_dispatch->th_dxo_fcn != 0) 1067 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 1068 else 1069 __kmp_parallel_dxo(>id, &cid, loc); 1070 1071 #if OMPT_SUPPORT && OMPT_OPTIONAL 1072 OMPT_STORE_RETURN_ADDRESS(gtid); 1073 if (ompt_enabled.ompt_callback_mutex_released) { 1074 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1075 ompt_mutex_ordered, 1076 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 1077 ->t.t_ordered.dt.t_value, 1078 OMPT_LOAD_RETURN_ADDRESS(gtid)); 1079 } 1080 #endif 1081 } 1082 1083 #if KMP_USE_DYNAMIC_LOCK 1084 1085 static __forceinline void 1086 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 1087 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 1088 // Pointer to the allocated indirect lock is written to crit, while indexing 1089 // is ignored. 1090 void *idx; 1091 kmp_indirect_lock_t **lck; 1092 lck = (kmp_indirect_lock_t **)crit; 1093 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 1094 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 1095 KMP_SET_I_LOCK_LOCATION(ilk, loc); 1096 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 1097 KA_TRACE(20, 1098 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 1099 #if USE_ITT_BUILD 1100 __kmp_itt_critical_creating(ilk->lock, loc); 1101 #endif 1102 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 1103 if (status == 0) { 1104 #if USE_ITT_BUILD 1105 __kmp_itt_critical_destroyed(ilk->lock); 1106 #endif 1107 // We don't really need to destroy the unclaimed lock here since it will be 1108 // cleaned up at program exit. 1109 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 1110 } 1111 KMP_DEBUG_ASSERT(*lck != NULL); 1112 } 1113 1114 // Fast-path acquire tas lock 1115 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 1116 { \ 1117 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1118 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1119 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1120 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1121 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1122 kmp_uint32 spins; \ 1123 KMP_FSYNC_PREPARE(l); \ 1124 KMP_INIT_YIELD(spins); \ 1125 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1126 do { \ 1127 if (TCR_4(__kmp_nth) > \ 1128 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1129 KMP_YIELD(TRUE); \ 1130 } else { \ 1131 KMP_YIELD_SPIN(spins); \ 1132 } \ 1133 __kmp_spin_backoff(&backoff); \ 1134 } while ( \ 1135 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1136 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1137 } \ 1138 KMP_FSYNC_ACQUIRED(l); \ 1139 } 1140 1141 // Fast-path test tas lock 1142 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1143 { \ 1144 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1145 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1146 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1147 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1148 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1149 } 1150 1151 // Fast-path release tas lock 1152 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1153 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1154 1155 #if KMP_USE_FUTEX 1156 1157 #include <sys/syscall.h> 1158 #include <unistd.h> 1159 #ifndef FUTEX_WAIT 1160 #define FUTEX_WAIT 0 1161 #endif 1162 #ifndef FUTEX_WAKE 1163 #define FUTEX_WAKE 1 1164 #endif 1165 1166 // Fast-path acquire futex lock 1167 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1168 { \ 1169 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1170 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1171 KMP_MB(); \ 1172 KMP_FSYNC_PREPARE(ftx); \ 1173 kmp_int32 poll_val; \ 1174 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1175 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1176 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1177 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1178 if (!cond) { \ 1179 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1180 poll_val | \ 1181 KMP_LOCK_BUSY(1, futex))) { \ 1182 continue; \ 1183 } \ 1184 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1185 } \ 1186 kmp_int32 rc; \ 1187 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1188 NULL, NULL, 0)) != 0) { \ 1189 continue; \ 1190 } \ 1191 gtid_code |= 1; \ 1192 } \ 1193 KMP_FSYNC_ACQUIRED(ftx); \ 1194 } 1195 1196 // Fast-path test futex lock 1197 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1198 { \ 1199 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1200 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1201 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1202 KMP_FSYNC_ACQUIRED(ftx); \ 1203 rc = TRUE; \ 1204 } else { \ 1205 rc = FALSE; \ 1206 } \ 1207 } 1208 1209 // Fast-path release futex lock 1210 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1211 { \ 1212 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1213 KMP_MB(); \ 1214 KMP_FSYNC_RELEASING(ftx); \ 1215 kmp_int32 poll_val = \ 1216 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1217 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1218 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1219 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1220 } \ 1221 KMP_MB(); \ 1222 KMP_YIELD_OVERSUB(); \ 1223 } 1224 1225 #endif // KMP_USE_FUTEX 1226 1227 #else // KMP_USE_DYNAMIC_LOCK 1228 1229 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1230 ident_t const *loc, 1231 kmp_int32 gtid) { 1232 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1233 1234 // Because of the double-check, the following load doesn't need to be volatile 1235 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1236 1237 if (lck == NULL) { 1238 void *idx; 1239 1240 // Allocate & initialize the lock. 1241 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1242 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1243 __kmp_init_user_lock_with_checks(lck); 1244 __kmp_set_user_lock_location(lck, loc); 1245 #if USE_ITT_BUILD 1246 __kmp_itt_critical_creating(lck); 1247 // __kmp_itt_critical_creating() should be called *before* the first usage 1248 // of underlying lock. It is the only place where we can guarantee it. There 1249 // are chances the lock will destroyed with no usage, but it is not a 1250 // problem, because this is not real event seen by user but rather setting 1251 // name for object (lock). See more details in kmp_itt.h. 1252 #endif /* USE_ITT_BUILD */ 1253 1254 // Use a cmpxchg instruction to slam the start of the critical section with 1255 // the lock pointer. If another thread beat us to it, deallocate the lock, 1256 // and use the lock that the other thread allocated. 1257 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1258 1259 if (status == 0) { 1260 // Deallocate the lock and reload the value. 1261 #if USE_ITT_BUILD 1262 __kmp_itt_critical_destroyed(lck); 1263 // Let ITT know the lock is destroyed and the same memory location may be reused 1264 // for another purpose. 1265 #endif /* USE_ITT_BUILD */ 1266 __kmp_destroy_user_lock_with_checks(lck); 1267 __kmp_user_lock_free(&idx, gtid, lck); 1268 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1269 KMP_DEBUG_ASSERT(lck != NULL); 1270 } 1271 } 1272 return lck; 1273 } 1274 1275 #endif // KMP_USE_DYNAMIC_LOCK 1276 1277 /*! 1278 @ingroup WORK_SHARING 1279 @param loc source location information. 1280 @param global_tid global thread number. 1281 @param crit identity of the critical section. This could be a pointer to a lock 1282 associated with the critical section, or some other suitably unique value. 1283 1284 Enter code protected by a `critical` construct. 1285 This function blocks until the executing thread can enter the critical section. 1286 */ 1287 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1288 kmp_critical_name *crit) { 1289 #if KMP_USE_DYNAMIC_LOCK 1290 #if OMPT_SUPPORT && OMPT_OPTIONAL 1291 OMPT_STORE_RETURN_ADDRESS(global_tid); 1292 #endif // OMPT_SUPPORT 1293 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1294 #else 1295 KMP_COUNT_BLOCK(OMP_CRITICAL); 1296 #if OMPT_SUPPORT && OMPT_OPTIONAL 1297 ompt_state_t prev_state = ompt_state_undefined; 1298 ompt_thread_info_t ti; 1299 #endif 1300 kmp_user_lock_p lck; 1301 1302 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1303 __kmp_assert_valid_gtid(global_tid); 1304 1305 // TODO: add THR_OVHD_STATE 1306 1307 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1308 KMP_CHECK_USER_LOCK_INIT(); 1309 1310 if ((__kmp_user_lock_kind == lk_tas) && 1311 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1312 lck = (kmp_user_lock_p)crit; 1313 } 1314 #if KMP_USE_FUTEX 1315 else if ((__kmp_user_lock_kind == lk_futex) && 1316 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1317 lck = (kmp_user_lock_p)crit; 1318 } 1319 #endif 1320 else { // ticket, queuing or drdpa 1321 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1322 } 1323 1324 if (__kmp_env_consistency_check) 1325 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1326 1327 // since the critical directive binds to all threads, not just the current 1328 // team we have to check this even if we are in a serialized team. 1329 // also, even if we are the uber thread, we still have to conduct the lock, 1330 // as we have to contend with sibling threads. 1331 1332 #if USE_ITT_BUILD 1333 __kmp_itt_critical_acquiring(lck); 1334 #endif /* USE_ITT_BUILD */ 1335 #if OMPT_SUPPORT && OMPT_OPTIONAL 1336 OMPT_STORE_RETURN_ADDRESS(gtid); 1337 void *codeptr_ra = NULL; 1338 if (ompt_enabled.enabled) { 1339 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1340 /* OMPT state update */ 1341 prev_state = ti.state; 1342 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1343 ti.state = ompt_state_wait_critical; 1344 1345 /* OMPT event callback */ 1346 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1347 if (ompt_enabled.ompt_callback_mutex_acquire) { 1348 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1349 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1350 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1351 } 1352 } 1353 #endif 1354 // Value of 'crit' should be good for using as a critical_id of the critical 1355 // section directive. 1356 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1357 1358 #if USE_ITT_BUILD 1359 __kmp_itt_critical_acquired(lck); 1360 #endif /* USE_ITT_BUILD */ 1361 #if OMPT_SUPPORT && OMPT_OPTIONAL 1362 if (ompt_enabled.enabled) { 1363 /* OMPT state update */ 1364 ti.state = prev_state; 1365 ti.wait_id = 0; 1366 1367 /* OMPT event callback */ 1368 if (ompt_enabled.ompt_callback_mutex_acquired) { 1369 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1370 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1371 } 1372 } 1373 #endif 1374 KMP_POP_PARTITIONED_TIMER(); 1375 1376 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1377 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1378 #endif // KMP_USE_DYNAMIC_LOCK 1379 } 1380 1381 #if KMP_USE_DYNAMIC_LOCK 1382 1383 // Converts the given hint to an internal lock implementation 1384 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1385 #if KMP_USE_TSX 1386 #define KMP_TSX_LOCK(seq) lockseq_##seq 1387 #else 1388 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1389 #endif 1390 1391 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1392 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm) 1393 #else 1394 #define KMP_CPUINFO_RTM 0 1395 #endif 1396 1397 // Hints that do not require further logic 1398 if (hint & kmp_lock_hint_hle) 1399 return KMP_TSX_LOCK(hle); 1400 if (hint & kmp_lock_hint_rtm) 1401 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq; 1402 if (hint & kmp_lock_hint_adaptive) 1403 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1404 1405 // Rule out conflicting hints first by returning the default lock 1406 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1407 return __kmp_user_lock_seq; 1408 if ((hint & omp_lock_hint_speculative) && 1409 (hint & omp_lock_hint_nonspeculative)) 1410 return __kmp_user_lock_seq; 1411 1412 // Do not even consider speculation when it appears to be contended 1413 if (hint & omp_lock_hint_contended) 1414 return lockseq_queuing; 1415 1416 // Uncontended lock without speculation 1417 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1418 return lockseq_tas; 1419 1420 // Use RTM lock for speculation 1421 if (hint & omp_lock_hint_speculative) 1422 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq; 1423 1424 return __kmp_user_lock_seq; 1425 } 1426 1427 #if OMPT_SUPPORT && OMPT_OPTIONAL 1428 #if KMP_USE_DYNAMIC_LOCK 1429 static kmp_mutex_impl_t 1430 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1431 if (user_lock) { 1432 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1433 case 0: 1434 break; 1435 #if KMP_USE_FUTEX 1436 case locktag_futex: 1437 return kmp_mutex_impl_queuing; 1438 #endif 1439 case locktag_tas: 1440 return kmp_mutex_impl_spin; 1441 #if KMP_USE_TSX 1442 case locktag_hle: 1443 case locktag_rtm_spin: 1444 return kmp_mutex_impl_speculative; 1445 #endif 1446 default: 1447 return kmp_mutex_impl_none; 1448 } 1449 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1450 } 1451 KMP_ASSERT(ilock); 1452 switch (ilock->type) { 1453 #if KMP_USE_TSX 1454 case locktag_adaptive: 1455 case locktag_rtm_queuing: 1456 return kmp_mutex_impl_speculative; 1457 #endif 1458 case locktag_nested_tas: 1459 return kmp_mutex_impl_spin; 1460 #if KMP_USE_FUTEX 1461 case locktag_nested_futex: 1462 #endif 1463 case locktag_ticket: 1464 case locktag_queuing: 1465 case locktag_drdpa: 1466 case locktag_nested_ticket: 1467 case locktag_nested_queuing: 1468 case locktag_nested_drdpa: 1469 return kmp_mutex_impl_queuing; 1470 default: 1471 return kmp_mutex_impl_none; 1472 } 1473 } 1474 #else 1475 // For locks without dynamic binding 1476 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1477 switch (__kmp_user_lock_kind) { 1478 case lk_tas: 1479 return kmp_mutex_impl_spin; 1480 #if KMP_USE_FUTEX 1481 case lk_futex: 1482 #endif 1483 case lk_ticket: 1484 case lk_queuing: 1485 case lk_drdpa: 1486 return kmp_mutex_impl_queuing; 1487 #if KMP_USE_TSX 1488 case lk_hle: 1489 case lk_rtm_queuing: 1490 case lk_rtm_spin: 1491 case lk_adaptive: 1492 return kmp_mutex_impl_speculative; 1493 #endif 1494 default: 1495 return kmp_mutex_impl_none; 1496 } 1497 } 1498 #endif // KMP_USE_DYNAMIC_LOCK 1499 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1500 1501 /*! 1502 @ingroup WORK_SHARING 1503 @param loc source location information. 1504 @param global_tid global thread number. 1505 @param crit identity of the critical section. This could be a pointer to a lock 1506 associated with the critical section, or some other suitably unique value. 1507 @param hint the lock hint. 1508 1509 Enter code protected by a `critical` construct with a hint. The hint value is 1510 used to suggest a lock implementation. This function blocks until the executing 1511 thread can enter the critical section unless the hint suggests use of 1512 speculative execution and the hardware supports it. 1513 */ 1514 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1515 kmp_critical_name *crit, uint32_t hint) { 1516 KMP_COUNT_BLOCK(OMP_CRITICAL); 1517 kmp_user_lock_p lck; 1518 #if OMPT_SUPPORT && OMPT_OPTIONAL 1519 ompt_state_t prev_state = ompt_state_undefined; 1520 ompt_thread_info_t ti; 1521 // This is the case, if called from __kmpc_critical: 1522 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1523 if (!codeptr) 1524 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1525 #endif 1526 1527 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1528 __kmp_assert_valid_gtid(global_tid); 1529 1530 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1531 // Check if it is initialized. 1532 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1533 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint); 1534 if (*lk == 0) { 1535 if (KMP_IS_D_LOCK(lockseq)) { 1536 KMP_COMPARE_AND_STORE_ACQ32( 1537 (volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0, 1538 KMP_GET_D_TAG(lockseq)); 1539 } else { 1540 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq)); 1541 } 1542 } 1543 // Branch for accessing the actual lock object and set operation. This 1544 // branching is inevitable since this lock initialization does not follow the 1545 // normal dispatch path (lock table is not used). 1546 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1547 lck = (kmp_user_lock_p)lk; 1548 if (__kmp_env_consistency_check) { 1549 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1550 __kmp_map_hint_to_lock(hint)); 1551 } 1552 #if USE_ITT_BUILD 1553 __kmp_itt_critical_acquiring(lck); 1554 #endif 1555 #if OMPT_SUPPORT && OMPT_OPTIONAL 1556 if (ompt_enabled.enabled) { 1557 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1558 /* OMPT state update */ 1559 prev_state = ti.state; 1560 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1561 ti.state = ompt_state_wait_critical; 1562 1563 /* OMPT event callback */ 1564 if (ompt_enabled.ompt_callback_mutex_acquire) { 1565 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1566 ompt_mutex_critical, (unsigned int)hint, 1567 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1568 codeptr); 1569 } 1570 } 1571 #endif 1572 #if KMP_USE_INLINED_TAS 1573 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) { 1574 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1575 } else 1576 #elif KMP_USE_INLINED_FUTEX 1577 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) { 1578 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1579 } else 1580 #endif 1581 { 1582 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1583 } 1584 } else { 1585 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1586 lck = ilk->lock; 1587 if (__kmp_env_consistency_check) { 1588 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1589 __kmp_map_hint_to_lock(hint)); 1590 } 1591 #if USE_ITT_BUILD 1592 __kmp_itt_critical_acquiring(lck); 1593 #endif 1594 #if OMPT_SUPPORT && OMPT_OPTIONAL 1595 if (ompt_enabled.enabled) { 1596 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1597 /* OMPT state update */ 1598 prev_state = ti.state; 1599 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1600 ti.state = ompt_state_wait_critical; 1601 1602 /* OMPT event callback */ 1603 if (ompt_enabled.ompt_callback_mutex_acquire) { 1604 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1605 ompt_mutex_critical, (unsigned int)hint, 1606 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1607 codeptr); 1608 } 1609 } 1610 #endif 1611 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1612 } 1613 KMP_POP_PARTITIONED_TIMER(); 1614 1615 #if USE_ITT_BUILD 1616 __kmp_itt_critical_acquired(lck); 1617 #endif /* USE_ITT_BUILD */ 1618 #if OMPT_SUPPORT && OMPT_OPTIONAL 1619 if (ompt_enabled.enabled) { 1620 /* OMPT state update */ 1621 ti.state = prev_state; 1622 ti.wait_id = 0; 1623 1624 /* OMPT event callback */ 1625 if (ompt_enabled.ompt_callback_mutex_acquired) { 1626 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1627 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1628 } 1629 } 1630 #endif 1631 1632 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1633 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1634 } // __kmpc_critical_with_hint 1635 1636 #endif // KMP_USE_DYNAMIC_LOCK 1637 1638 /*! 1639 @ingroup WORK_SHARING 1640 @param loc source location information. 1641 @param global_tid global thread number . 1642 @param crit identity of the critical section. This could be a pointer to a lock 1643 associated with the critical section, or some other suitably unique value. 1644 1645 Leave a critical section, releasing any lock that was held during its execution. 1646 */ 1647 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1648 kmp_critical_name *crit) { 1649 kmp_user_lock_p lck; 1650 1651 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1652 1653 #if KMP_USE_DYNAMIC_LOCK 1654 int locktag = KMP_EXTRACT_D_TAG(crit); 1655 if (locktag) { 1656 lck = (kmp_user_lock_p)crit; 1657 KMP_ASSERT(lck != NULL); 1658 if (__kmp_env_consistency_check) { 1659 __kmp_pop_sync(global_tid, ct_critical, loc); 1660 } 1661 #if USE_ITT_BUILD 1662 __kmp_itt_critical_releasing(lck); 1663 #endif 1664 #if KMP_USE_INLINED_TAS 1665 if (locktag == locktag_tas && !__kmp_env_consistency_check) { 1666 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1667 } else 1668 #elif KMP_USE_INLINED_FUTEX 1669 if (locktag == locktag_futex && !__kmp_env_consistency_check) { 1670 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1671 } else 1672 #endif 1673 { 1674 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1675 } 1676 } else { 1677 kmp_indirect_lock_t *ilk = 1678 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1679 KMP_ASSERT(ilk != NULL); 1680 lck = ilk->lock; 1681 if (__kmp_env_consistency_check) { 1682 __kmp_pop_sync(global_tid, ct_critical, loc); 1683 } 1684 #if USE_ITT_BUILD 1685 __kmp_itt_critical_releasing(lck); 1686 #endif 1687 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1688 } 1689 1690 #else // KMP_USE_DYNAMIC_LOCK 1691 1692 if ((__kmp_user_lock_kind == lk_tas) && 1693 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1694 lck = (kmp_user_lock_p)crit; 1695 } 1696 #if KMP_USE_FUTEX 1697 else if ((__kmp_user_lock_kind == lk_futex) && 1698 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1699 lck = (kmp_user_lock_p)crit; 1700 } 1701 #endif 1702 else { // ticket, queuing or drdpa 1703 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1704 } 1705 1706 KMP_ASSERT(lck != NULL); 1707 1708 if (__kmp_env_consistency_check) 1709 __kmp_pop_sync(global_tid, ct_critical, loc); 1710 1711 #if USE_ITT_BUILD 1712 __kmp_itt_critical_releasing(lck); 1713 #endif /* USE_ITT_BUILD */ 1714 // Value of 'crit' should be good for using as a critical_id of the critical 1715 // section directive. 1716 __kmp_release_user_lock_with_checks(lck, global_tid); 1717 1718 #endif // KMP_USE_DYNAMIC_LOCK 1719 1720 #if OMPT_SUPPORT && OMPT_OPTIONAL 1721 /* OMPT release event triggers after lock is released; place here to trigger 1722 * for all #if branches */ 1723 OMPT_STORE_RETURN_ADDRESS(global_tid); 1724 if (ompt_enabled.ompt_callback_mutex_released) { 1725 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1726 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1727 OMPT_LOAD_RETURN_ADDRESS(0)); 1728 } 1729 #endif 1730 1731 KMP_POP_PARTITIONED_TIMER(); 1732 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1733 } 1734 1735 /*! 1736 @ingroup SYNCHRONIZATION 1737 @param loc source location information 1738 @param global_tid thread id. 1739 @return one if the thread should execute the master block, zero otherwise 1740 1741 Start execution of a combined barrier and master. The barrier is executed inside 1742 this function. 1743 */ 1744 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1745 int status; 1746 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1747 __kmp_assert_valid_gtid(global_tid); 1748 1749 if (!TCR_4(__kmp_init_parallel)) 1750 __kmp_parallel_initialize(); 1751 1752 __kmp_resume_if_soft_paused(); 1753 1754 if (__kmp_env_consistency_check) 1755 __kmp_check_barrier(global_tid, ct_barrier, loc); 1756 1757 #if OMPT_SUPPORT 1758 ompt_frame_t *ompt_frame; 1759 if (ompt_enabled.enabled) { 1760 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1761 if (ompt_frame->enter_frame.ptr == NULL) 1762 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1763 } 1764 OMPT_STORE_RETURN_ADDRESS(global_tid); 1765 #endif 1766 #if USE_ITT_NOTIFY 1767 __kmp_threads[global_tid]->th.th_ident = loc; 1768 #endif 1769 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1770 #if OMPT_SUPPORT && OMPT_OPTIONAL 1771 if (ompt_enabled.enabled) { 1772 ompt_frame->enter_frame = ompt_data_none; 1773 } 1774 #endif 1775 1776 return (status != 0) ? 0 : 1; 1777 } 1778 1779 /*! 1780 @ingroup SYNCHRONIZATION 1781 @param loc source location information 1782 @param global_tid thread id. 1783 1784 Complete the execution of a combined barrier and master. This function should 1785 only be called at the completion of the <tt>master</tt> code. Other threads will 1786 still be waiting at the barrier and this call releases them. 1787 */ 1788 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1789 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1790 __kmp_assert_valid_gtid(global_tid); 1791 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1792 } 1793 1794 /*! 1795 @ingroup SYNCHRONIZATION 1796 @param loc source location information 1797 @param global_tid thread id. 1798 @return one if the thread should execute the master block, zero otherwise 1799 1800 Start execution of a combined barrier and master(nowait) construct. 1801 The barrier is executed inside this function. 1802 There is no equivalent "end" function, since the 1803 */ 1804 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1805 kmp_int32 ret; 1806 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1807 __kmp_assert_valid_gtid(global_tid); 1808 1809 if (!TCR_4(__kmp_init_parallel)) 1810 __kmp_parallel_initialize(); 1811 1812 __kmp_resume_if_soft_paused(); 1813 1814 if (__kmp_env_consistency_check) { 1815 if (loc == 0) { 1816 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1817 } 1818 __kmp_check_barrier(global_tid, ct_barrier, loc); 1819 } 1820 1821 #if OMPT_SUPPORT 1822 ompt_frame_t *ompt_frame; 1823 if (ompt_enabled.enabled) { 1824 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1825 if (ompt_frame->enter_frame.ptr == NULL) 1826 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1827 } 1828 OMPT_STORE_RETURN_ADDRESS(global_tid); 1829 #endif 1830 #if USE_ITT_NOTIFY 1831 __kmp_threads[global_tid]->th.th_ident = loc; 1832 #endif 1833 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1834 #if OMPT_SUPPORT && OMPT_OPTIONAL 1835 if (ompt_enabled.enabled) { 1836 ompt_frame->enter_frame = ompt_data_none; 1837 } 1838 #endif 1839 1840 ret = __kmpc_master(loc, global_tid); 1841 1842 if (__kmp_env_consistency_check) { 1843 /* there's no __kmpc_end_master called; so the (stats) */ 1844 /* actions of __kmpc_end_master are done here */ 1845 if (ret) { 1846 /* only one thread should do the pop since only */ 1847 /* one did the push (see __kmpc_master()) */ 1848 __kmp_pop_sync(global_tid, ct_master, loc); 1849 } 1850 } 1851 1852 return (ret); 1853 } 1854 1855 /* The BARRIER for a SINGLE process section is always explicit */ 1856 /*! 1857 @ingroup WORK_SHARING 1858 @param loc source location information 1859 @param global_tid global thread number 1860 @return One if this thread should execute the single construct, zero otherwise. 1861 1862 Test whether to execute a <tt>single</tt> construct. 1863 There are no implicit barriers in the two "single" calls, rather the compiler 1864 should introduce an explicit barrier if it is required. 1865 */ 1866 1867 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1868 __kmp_assert_valid_gtid(global_tid); 1869 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1870 1871 if (rc) { 1872 // We are going to execute the single statement, so we should count it. 1873 KMP_COUNT_BLOCK(OMP_SINGLE); 1874 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1875 } 1876 1877 #if OMPT_SUPPORT && OMPT_OPTIONAL 1878 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1879 kmp_team_t *team = this_thr->th.th_team; 1880 int tid = __kmp_tid_from_gtid(global_tid); 1881 1882 if (ompt_enabled.enabled) { 1883 if (rc) { 1884 if (ompt_enabled.ompt_callback_work) { 1885 ompt_callbacks.ompt_callback(ompt_callback_work)( 1886 ompt_work_single_executor, ompt_scope_begin, 1887 &(team->t.ompt_team_info.parallel_data), 1888 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1889 1, OMPT_GET_RETURN_ADDRESS(0)); 1890 } 1891 } else { 1892 if (ompt_enabled.ompt_callback_work) { 1893 ompt_callbacks.ompt_callback(ompt_callback_work)( 1894 ompt_work_single_other, ompt_scope_begin, 1895 &(team->t.ompt_team_info.parallel_data), 1896 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1897 1, OMPT_GET_RETURN_ADDRESS(0)); 1898 ompt_callbacks.ompt_callback(ompt_callback_work)( 1899 ompt_work_single_other, ompt_scope_end, 1900 &(team->t.ompt_team_info.parallel_data), 1901 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1902 1, OMPT_GET_RETURN_ADDRESS(0)); 1903 } 1904 } 1905 } 1906 #endif 1907 1908 return rc; 1909 } 1910 1911 /*! 1912 @ingroup WORK_SHARING 1913 @param loc source location information 1914 @param global_tid global thread number 1915 1916 Mark the end of a <tt>single</tt> construct. This function should 1917 only be called by the thread that executed the block of code protected 1918 by the `single` construct. 1919 */ 1920 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1921 __kmp_assert_valid_gtid(global_tid); 1922 __kmp_exit_single(global_tid); 1923 KMP_POP_PARTITIONED_TIMER(); 1924 1925 #if OMPT_SUPPORT && OMPT_OPTIONAL 1926 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1927 kmp_team_t *team = this_thr->th.th_team; 1928 int tid = __kmp_tid_from_gtid(global_tid); 1929 1930 if (ompt_enabled.ompt_callback_work) { 1931 ompt_callbacks.ompt_callback(ompt_callback_work)( 1932 ompt_work_single_executor, ompt_scope_end, 1933 &(team->t.ompt_team_info.parallel_data), 1934 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1935 OMPT_GET_RETURN_ADDRESS(0)); 1936 } 1937 #endif 1938 } 1939 1940 /*! 1941 @ingroup WORK_SHARING 1942 @param loc Source location 1943 @param global_tid Global thread id 1944 1945 Mark the end of a statically scheduled loop. 1946 */ 1947 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1948 KMP_POP_PARTITIONED_TIMER(); 1949 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1950 1951 #if OMPT_SUPPORT && OMPT_OPTIONAL 1952 if (ompt_enabled.ompt_callback_work) { 1953 ompt_work_t ompt_work_type = ompt_work_loop; 1954 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1955 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1956 // Determine workshare type 1957 if (loc != NULL) { 1958 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1959 ompt_work_type = ompt_work_loop; 1960 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1961 ompt_work_type = ompt_work_sections; 1962 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1963 ompt_work_type = ompt_work_distribute; 1964 } else { 1965 // use default set above. 1966 // a warning about this case is provided in __kmpc_for_static_init 1967 } 1968 KMP_DEBUG_ASSERT(ompt_work_type); 1969 } 1970 ompt_callbacks.ompt_callback(ompt_callback_work)( 1971 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1972 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1973 } 1974 #endif 1975 if (__kmp_env_consistency_check) 1976 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1977 } 1978 1979 // User routines which take C-style arguments (call by value) 1980 // different from the Fortran equivalent routines 1981 1982 void ompc_set_num_threads(int arg) { 1983 // !!!!! TODO: check the per-task binding 1984 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1985 } 1986 1987 void ompc_set_dynamic(int flag) { 1988 kmp_info_t *thread; 1989 1990 /* For the thread-private implementation of the internal controls */ 1991 thread = __kmp_entry_thread(); 1992 1993 __kmp_save_internal_controls(thread); 1994 1995 set__dynamic(thread, flag ? true : false); 1996 } 1997 1998 void ompc_set_nested(int flag) { 1999 kmp_info_t *thread; 2000 2001 /* For the thread-private internal controls implementation */ 2002 thread = __kmp_entry_thread(); 2003 2004 __kmp_save_internal_controls(thread); 2005 2006 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 2007 } 2008 2009 void ompc_set_max_active_levels(int max_active_levels) { 2010 /* TO DO */ 2011 /* we want per-task implementation of this internal control */ 2012 2013 /* For the per-thread internal controls implementation */ 2014 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 2015 } 2016 2017 void ompc_set_schedule(omp_sched_t kind, int modifier) { 2018 // !!!!! TODO: check the per-task binding 2019 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 2020 } 2021 2022 int ompc_get_ancestor_thread_num(int level) { 2023 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 2024 } 2025 2026 int ompc_get_team_size(int level) { 2027 return __kmp_get_team_size(__kmp_entry_gtid(), level); 2028 } 2029 2030 /* OpenMP 5.0 Affinity Format API */ 2031 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) { 2032 if (!__kmp_init_serial) { 2033 __kmp_serial_initialize(); 2034 } 2035 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 2036 format, KMP_STRLEN(format) + 1); 2037 } 2038 2039 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) { 2040 size_t format_size; 2041 if (!__kmp_init_serial) { 2042 __kmp_serial_initialize(); 2043 } 2044 format_size = KMP_STRLEN(__kmp_affinity_format); 2045 if (buffer && size) { 2046 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 2047 format_size + 1); 2048 } 2049 return format_size; 2050 } 2051 2052 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) { 2053 int gtid; 2054 if (!TCR_4(__kmp_init_middle)) { 2055 __kmp_middle_initialize(); 2056 } 2057 __kmp_assign_root_init_mask(); 2058 gtid = __kmp_get_gtid(); 2059 #if KMP_AFFINITY_SUPPORTED 2060 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && 2061 __kmp_affinity.flags.reset) { 2062 __kmp_reset_root_init_mask(gtid); 2063 } 2064 #endif 2065 __kmp_aux_display_affinity(gtid, format); 2066 } 2067 2068 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size, 2069 char const *format) { 2070 int gtid; 2071 size_t num_required; 2072 kmp_str_buf_t capture_buf; 2073 if (!TCR_4(__kmp_init_middle)) { 2074 __kmp_middle_initialize(); 2075 } 2076 __kmp_assign_root_init_mask(); 2077 gtid = __kmp_get_gtid(); 2078 #if KMP_AFFINITY_SUPPORTED 2079 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && 2080 __kmp_affinity.flags.reset) { 2081 __kmp_reset_root_init_mask(gtid); 2082 } 2083 #endif 2084 __kmp_str_buf_init(&capture_buf); 2085 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 2086 if (buffer && buf_size) { 2087 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 2088 capture_buf.used + 1); 2089 } 2090 __kmp_str_buf_free(&capture_buf); 2091 return num_required; 2092 } 2093 2094 void kmpc_set_stacksize(int arg) { 2095 // __kmp_aux_set_stacksize initializes the library if needed 2096 __kmp_aux_set_stacksize(arg); 2097 } 2098 2099 void kmpc_set_stacksize_s(size_t arg) { 2100 // __kmp_aux_set_stacksize initializes the library if needed 2101 __kmp_aux_set_stacksize(arg); 2102 } 2103 2104 void kmpc_set_blocktime(int arg) { 2105 int gtid, tid, bt = arg; 2106 kmp_info_t *thread; 2107 2108 gtid = __kmp_entry_gtid(); 2109 tid = __kmp_tid_from_gtid(gtid); 2110 thread = __kmp_thread_from_gtid(gtid); 2111 2112 __kmp_aux_convert_blocktime(&bt); 2113 __kmp_aux_set_blocktime(bt, thread, tid); 2114 } 2115 2116 void kmpc_set_library(int arg) { 2117 // __kmp_user_set_library initializes the library if needed 2118 __kmp_user_set_library((enum library_type)arg); 2119 } 2120 2121 void kmpc_set_defaults(char const *str) { 2122 // __kmp_aux_set_defaults initializes the library if needed 2123 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 2124 } 2125 2126 void kmpc_set_disp_num_buffers(int arg) { 2127 // ignore after initialization because some teams have already 2128 // allocated dispatch buffers 2129 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF && 2130 arg <= KMP_MAX_DISP_NUM_BUFF) { 2131 __kmp_dispatch_num_buffers = arg; 2132 } 2133 } 2134 2135 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 2136 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2137 return -1; 2138 #else 2139 if (!TCR_4(__kmp_init_middle)) { 2140 __kmp_middle_initialize(); 2141 } 2142 __kmp_assign_root_init_mask(); 2143 return __kmp_aux_set_affinity_mask_proc(proc, mask); 2144 #endif 2145 } 2146 2147 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2148 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2149 return -1; 2150 #else 2151 if (!TCR_4(__kmp_init_middle)) { 2152 __kmp_middle_initialize(); 2153 } 2154 __kmp_assign_root_init_mask(); 2155 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2156 #endif 2157 } 2158 2159 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2160 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2161 return -1; 2162 #else 2163 if (!TCR_4(__kmp_init_middle)) { 2164 __kmp_middle_initialize(); 2165 } 2166 __kmp_assign_root_init_mask(); 2167 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2168 #endif 2169 } 2170 2171 /* -------------------------------------------------------------------------- */ 2172 /*! 2173 @ingroup THREADPRIVATE 2174 @param loc source location information 2175 @param gtid global thread number 2176 @param cpy_size size of the cpy_data buffer 2177 @param cpy_data pointer to data to be copied 2178 @param cpy_func helper function to call for copying data 2179 @param didit flag variable: 1=single thread; 0=not single thread 2180 2181 __kmpc_copyprivate implements the interface for the private data broadcast 2182 needed for the copyprivate clause associated with a single region in an 2183 OpenMP<sup>*</sup> program (both C and Fortran). 2184 All threads participating in the parallel region call this routine. 2185 One of the threads (called the single thread) should have the <tt>didit</tt> 2186 variable set to 1 and all other threads should have that variable set to 0. 2187 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2188 2189 The OpenMP specification forbids the use of nowait on the single region when a 2190 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2191 barrier internally to avoid race conditions, so the code generation for the 2192 single region should avoid generating a barrier after the call to @ref 2193 __kmpc_copyprivate. 2194 2195 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2196 The <tt>loc</tt> parameter is a pointer to source location information. 2197 2198 Internal implementation: The single thread will first copy its descriptor 2199 address (cpy_data) to a team-private location, then the other threads will each 2200 call the function pointed to by the parameter cpy_func, which carries out the 2201 copy by copying the data using the cpy_data buffer. 2202 2203 The cpy_func routine used for the copy and the contents of the data area defined 2204 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2205 to be done. For instance, the cpy_data buffer can hold the actual data to be 2206 copied or it may hold a list of pointers to the data. The cpy_func routine must 2207 interpret the cpy_data buffer appropriately. 2208 2209 The interface to cpy_func is as follows: 2210 @code 2211 void cpy_func( void *destination, void *source ) 2212 @endcode 2213 where void *destination is the cpy_data pointer for the thread being copied to 2214 and void *source is the cpy_data pointer for the thread being copied from. 2215 */ 2216 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2217 void *cpy_data, void (*cpy_func)(void *, void *), 2218 kmp_int32 didit) { 2219 void **data_ptr; 2220 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2221 __kmp_assert_valid_gtid(gtid); 2222 2223 KMP_MB(); 2224 2225 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2226 2227 if (__kmp_env_consistency_check) { 2228 if (loc == 0) { 2229 KMP_WARNING(ConstructIdentInvalid); 2230 } 2231 } 2232 2233 // ToDo: Optimize the following two barriers into some kind of split barrier 2234 2235 if (didit) 2236 *data_ptr = cpy_data; 2237 2238 #if OMPT_SUPPORT 2239 ompt_frame_t *ompt_frame; 2240 if (ompt_enabled.enabled) { 2241 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2242 if (ompt_frame->enter_frame.ptr == NULL) 2243 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2244 } 2245 OMPT_STORE_RETURN_ADDRESS(gtid); 2246 #endif 2247 /* This barrier is not a barrier region boundary */ 2248 #if USE_ITT_NOTIFY 2249 __kmp_threads[gtid]->th.th_ident = loc; 2250 #endif 2251 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2252 2253 if (!didit) 2254 (*cpy_func)(cpy_data, *data_ptr); 2255 2256 // Consider next barrier a user-visible barrier for barrier region boundaries 2257 // Nesting checks are already handled by the single construct checks 2258 { 2259 #if OMPT_SUPPORT 2260 OMPT_STORE_RETURN_ADDRESS(gtid); 2261 #endif 2262 #if USE_ITT_NOTIFY 2263 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2264 // tasks can overwrite the location) 2265 #endif 2266 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2267 #if OMPT_SUPPORT && OMPT_OPTIONAL 2268 if (ompt_enabled.enabled) { 2269 ompt_frame->enter_frame = ompt_data_none; 2270 } 2271 #endif 2272 } 2273 } 2274 2275 /* --------------------------------------------------------------------------*/ 2276 /*! 2277 @ingroup THREADPRIVATE 2278 @param loc source location information 2279 @param gtid global thread number 2280 @param cpy_data pointer to the data to be saved/copied or 0 2281 @return the saved pointer to the data 2282 2283 __kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate: 2284 __kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so 2285 coming from single), and returns that pointer in all calls (for single thread 2286 it's not needed). This version doesn't do any actual data copying. Data copying 2287 has to be done somewhere else, e.g. inline in the generated code. Due to this, 2288 this function doesn't have any barrier at the end of the function, like 2289 __kmpc_copyprivate does, so generated code needs barrier after copying of all 2290 data was done. 2291 */ 2292 void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) { 2293 void **data_ptr; 2294 2295 KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid)); 2296 2297 KMP_MB(); 2298 2299 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2300 2301 if (__kmp_env_consistency_check) { 2302 if (loc == 0) { 2303 KMP_WARNING(ConstructIdentInvalid); 2304 } 2305 } 2306 2307 // ToDo: Optimize the following barrier 2308 2309 if (cpy_data) 2310 *data_ptr = cpy_data; 2311 2312 #if OMPT_SUPPORT 2313 ompt_frame_t *ompt_frame; 2314 if (ompt_enabled.enabled) { 2315 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2316 if (ompt_frame->enter_frame.ptr == NULL) 2317 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2318 OMPT_STORE_RETURN_ADDRESS(gtid); 2319 } 2320 #endif 2321 /* This barrier is not a barrier region boundary */ 2322 #if USE_ITT_NOTIFY 2323 __kmp_threads[gtid]->th.th_ident = loc; 2324 #endif 2325 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2326 2327 return *data_ptr; 2328 } 2329 2330 /* -------------------------------------------------------------------------- */ 2331 2332 #define INIT_LOCK __kmp_init_user_lock_with_checks 2333 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2334 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2335 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2336 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2337 #define ACQUIRE_NESTED_LOCK_TIMED \ 2338 __kmp_acquire_nested_user_lock_with_checks_timed 2339 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2340 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2341 #define TEST_LOCK __kmp_test_user_lock_with_checks 2342 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2343 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2344 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2345 2346 // TODO: Make check abort messages use location info & pass it into 2347 // with_checks routines 2348 2349 #if KMP_USE_DYNAMIC_LOCK 2350 2351 // internal lock initializer 2352 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2353 kmp_dyna_lockseq_t seq) { 2354 if (KMP_IS_D_LOCK(seq)) { 2355 KMP_INIT_D_LOCK(lock, seq); 2356 #if USE_ITT_BUILD 2357 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2358 #endif 2359 } else { 2360 KMP_INIT_I_LOCK(lock, seq); 2361 #if USE_ITT_BUILD 2362 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2363 __kmp_itt_lock_creating(ilk->lock, loc); 2364 #endif 2365 } 2366 } 2367 2368 // internal nest lock initializer 2369 static __forceinline void 2370 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2371 kmp_dyna_lockseq_t seq) { 2372 #if KMP_USE_TSX 2373 // Don't have nested lock implementation for speculative locks 2374 if (seq == lockseq_hle || seq == lockseq_rtm_queuing || 2375 seq == lockseq_rtm_spin || seq == lockseq_adaptive) 2376 seq = __kmp_user_lock_seq; 2377 #endif 2378 switch (seq) { 2379 case lockseq_tas: 2380 seq = lockseq_nested_tas; 2381 break; 2382 #if KMP_USE_FUTEX 2383 case lockseq_futex: 2384 seq = lockseq_nested_futex; 2385 break; 2386 #endif 2387 case lockseq_ticket: 2388 seq = lockseq_nested_ticket; 2389 break; 2390 case lockseq_queuing: 2391 seq = lockseq_nested_queuing; 2392 break; 2393 case lockseq_drdpa: 2394 seq = lockseq_nested_drdpa; 2395 break; 2396 default: 2397 seq = lockseq_nested_queuing; 2398 } 2399 KMP_INIT_I_LOCK(lock, seq); 2400 #if USE_ITT_BUILD 2401 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2402 __kmp_itt_lock_creating(ilk->lock, loc); 2403 #endif 2404 } 2405 2406 /* initialize the lock with a hint */ 2407 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2408 uintptr_t hint) { 2409 KMP_DEBUG_ASSERT(__kmp_init_serial); 2410 if (__kmp_env_consistency_check && user_lock == NULL) { 2411 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2412 } 2413 2414 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2415 2416 #if OMPT_SUPPORT && OMPT_OPTIONAL 2417 // This is the case, if called from omp_init_lock_with_hint: 2418 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2419 if (!codeptr) 2420 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2421 if (ompt_enabled.ompt_callback_lock_init) { 2422 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2423 ompt_mutex_lock, (omp_lock_hint_t)hint, 2424 __ompt_get_mutex_impl_type(user_lock), 2425 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2426 } 2427 #endif 2428 } 2429 2430 /* initialize the lock with a hint */ 2431 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2432 void **user_lock, uintptr_t hint) { 2433 KMP_DEBUG_ASSERT(__kmp_init_serial); 2434 if (__kmp_env_consistency_check && user_lock == NULL) { 2435 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2436 } 2437 2438 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2439 2440 #if OMPT_SUPPORT && OMPT_OPTIONAL 2441 // This is the case, if called from omp_init_lock_with_hint: 2442 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2443 if (!codeptr) 2444 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2445 if (ompt_enabled.ompt_callback_lock_init) { 2446 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2447 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2448 __ompt_get_mutex_impl_type(user_lock), 2449 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2450 } 2451 #endif 2452 } 2453 2454 #endif // KMP_USE_DYNAMIC_LOCK 2455 2456 /* initialize the lock */ 2457 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2458 #if KMP_USE_DYNAMIC_LOCK 2459 2460 KMP_DEBUG_ASSERT(__kmp_init_serial); 2461 if (__kmp_env_consistency_check && user_lock == NULL) { 2462 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2463 } 2464 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2465 2466 #if OMPT_SUPPORT && OMPT_OPTIONAL 2467 // This is the case, if called from omp_init_lock_with_hint: 2468 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2469 if (!codeptr) 2470 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2471 if (ompt_enabled.ompt_callback_lock_init) { 2472 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2473 ompt_mutex_lock, omp_lock_hint_none, 2474 __ompt_get_mutex_impl_type(user_lock), 2475 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2476 } 2477 #endif 2478 2479 #else // KMP_USE_DYNAMIC_LOCK 2480 2481 static char const *const func = "omp_init_lock"; 2482 kmp_user_lock_p lck; 2483 KMP_DEBUG_ASSERT(__kmp_init_serial); 2484 2485 if (__kmp_env_consistency_check) { 2486 if (user_lock == NULL) { 2487 KMP_FATAL(LockIsUninitialized, func); 2488 } 2489 } 2490 2491 KMP_CHECK_USER_LOCK_INIT(); 2492 2493 if ((__kmp_user_lock_kind == lk_tas) && 2494 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2495 lck = (kmp_user_lock_p)user_lock; 2496 } 2497 #if KMP_USE_FUTEX 2498 else if ((__kmp_user_lock_kind == lk_futex) && 2499 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2500 lck = (kmp_user_lock_p)user_lock; 2501 } 2502 #endif 2503 else { 2504 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2505 } 2506 INIT_LOCK(lck); 2507 __kmp_set_user_lock_location(lck, loc); 2508 2509 #if OMPT_SUPPORT && OMPT_OPTIONAL 2510 // This is the case, if called from omp_init_lock_with_hint: 2511 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2512 if (!codeptr) 2513 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2514 if (ompt_enabled.ompt_callback_lock_init) { 2515 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2516 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2517 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2518 } 2519 #endif 2520 2521 #if USE_ITT_BUILD 2522 __kmp_itt_lock_creating(lck); 2523 #endif /* USE_ITT_BUILD */ 2524 2525 #endif // KMP_USE_DYNAMIC_LOCK 2526 } // __kmpc_init_lock 2527 2528 /* initialize the lock */ 2529 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2530 #if KMP_USE_DYNAMIC_LOCK 2531 2532 KMP_DEBUG_ASSERT(__kmp_init_serial); 2533 if (__kmp_env_consistency_check && user_lock == NULL) { 2534 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2535 } 2536 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2537 2538 #if OMPT_SUPPORT && OMPT_OPTIONAL 2539 // This is the case, if called from omp_init_lock_with_hint: 2540 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2541 if (!codeptr) 2542 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2543 if (ompt_enabled.ompt_callback_lock_init) { 2544 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2545 ompt_mutex_nest_lock, omp_lock_hint_none, 2546 __ompt_get_mutex_impl_type(user_lock), 2547 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2548 } 2549 #endif 2550 2551 #else // KMP_USE_DYNAMIC_LOCK 2552 2553 static char const *const func = "omp_init_nest_lock"; 2554 kmp_user_lock_p lck; 2555 KMP_DEBUG_ASSERT(__kmp_init_serial); 2556 2557 if (__kmp_env_consistency_check) { 2558 if (user_lock == NULL) { 2559 KMP_FATAL(LockIsUninitialized, func); 2560 } 2561 } 2562 2563 KMP_CHECK_USER_LOCK_INIT(); 2564 2565 if ((__kmp_user_lock_kind == lk_tas) && 2566 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2567 OMP_NEST_LOCK_T_SIZE)) { 2568 lck = (kmp_user_lock_p)user_lock; 2569 } 2570 #if KMP_USE_FUTEX 2571 else if ((__kmp_user_lock_kind == lk_futex) && 2572 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2573 OMP_NEST_LOCK_T_SIZE)) { 2574 lck = (kmp_user_lock_p)user_lock; 2575 } 2576 #endif 2577 else { 2578 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2579 } 2580 2581 INIT_NESTED_LOCK(lck); 2582 __kmp_set_user_lock_location(lck, loc); 2583 2584 #if OMPT_SUPPORT && OMPT_OPTIONAL 2585 // This is the case, if called from omp_init_lock_with_hint: 2586 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2587 if (!codeptr) 2588 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2589 if (ompt_enabled.ompt_callback_lock_init) { 2590 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2591 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2592 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2593 } 2594 #endif 2595 2596 #if USE_ITT_BUILD 2597 __kmp_itt_lock_creating(lck); 2598 #endif /* USE_ITT_BUILD */ 2599 2600 #endif // KMP_USE_DYNAMIC_LOCK 2601 } // __kmpc_init_nest_lock 2602 2603 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2604 #if KMP_USE_DYNAMIC_LOCK 2605 2606 #if USE_ITT_BUILD 2607 kmp_user_lock_p lck; 2608 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2609 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2610 } else { 2611 lck = (kmp_user_lock_p)user_lock; 2612 } 2613 __kmp_itt_lock_destroyed(lck); 2614 #endif 2615 #if OMPT_SUPPORT && OMPT_OPTIONAL 2616 // This is the case, if called from omp_init_lock_with_hint: 2617 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2618 if (!codeptr) 2619 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2620 if (ompt_enabled.ompt_callback_lock_destroy) { 2621 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2622 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2623 } 2624 #endif 2625 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2626 #else 2627 kmp_user_lock_p lck; 2628 2629 if ((__kmp_user_lock_kind == lk_tas) && 2630 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2631 lck = (kmp_user_lock_p)user_lock; 2632 } 2633 #if KMP_USE_FUTEX 2634 else if ((__kmp_user_lock_kind == lk_futex) && 2635 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2636 lck = (kmp_user_lock_p)user_lock; 2637 } 2638 #endif 2639 else { 2640 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2641 } 2642 2643 #if OMPT_SUPPORT && OMPT_OPTIONAL 2644 // This is the case, if called from omp_init_lock_with_hint: 2645 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2646 if (!codeptr) 2647 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2648 if (ompt_enabled.ompt_callback_lock_destroy) { 2649 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2650 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2651 } 2652 #endif 2653 2654 #if USE_ITT_BUILD 2655 __kmp_itt_lock_destroyed(lck); 2656 #endif /* USE_ITT_BUILD */ 2657 DESTROY_LOCK(lck); 2658 2659 if ((__kmp_user_lock_kind == lk_tas) && 2660 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2661 ; 2662 } 2663 #if KMP_USE_FUTEX 2664 else if ((__kmp_user_lock_kind == lk_futex) && 2665 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2666 ; 2667 } 2668 #endif 2669 else { 2670 __kmp_user_lock_free(user_lock, gtid, lck); 2671 } 2672 #endif // KMP_USE_DYNAMIC_LOCK 2673 } // __kmpc_destroy_lock 2674 2675 /* destroy the lock */ 2676 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2677 #if KMP_USE_DYNAMIC_LOCK 2678 2679 #if USE_ITT_BUILD 2680 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2681 __kmp_itt_lock_destroyed(ilk->lock); 2682 #endif 2683 #if OMPT_SUPPORT && OMPT_OPTIONAL 2684 // This is the case, if called from omp_init_lock_with_hint: 2685 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2686 if (!codeptr) 2687 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2688 if (ompt_enabled.ompt_callback_lock_destroy) { 2689 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2690 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2691 } 2692 #endif 2693 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2694 2695 #else // KMP_USE_DYNAMIC_LOCK 2696 2697 kmp_user_lock_p lck; 2698 2699 if ((__kmp_user_lock_kind == lk_tas) && 2700 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2701 OMP_NEST_LOCK_T_SIZE)) { 2702 lck = (kmp_user_lock_p)user_lock; 2703 } 2704 #if KMP_USE_FUTEX 2705 else if ((__kmp_user_lock_kind == lk_futex) && 2706 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2707 OMP_NEST_LOCK_T_SIZE)) { 2708 lck = (kmp_user_lock_p)user_lock; 2709 } 2710 #endif 2711 else { 2712 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2713 } 2714 2715 #if OMPT_SUPPORT && OMPT_OPTIONAL 2716 // This is the case, if called from omp_init_lock_with_hint: 2717 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2718 if (!codeptr) 2719 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2720 if (ompt_enabled.ompt_callback_lock_destroy) { 2721 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2722 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2723 } 2724 #endif 2725 2726 #if USE_ITT_BUILD 2727 __kmp_itt_lock_destroyed(lck); 2728 #endif /* USE_ITT_BUILD */ 2729 2730 DESTROY_NESTED_LOCK(lck); 2731 2732 if ((__kmp_user_lock_kind == lk_tas) && 2733 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2734 OMP_NEST_LOCK_T_SIZE)) { 2735 ; 2736 } 2737 #if KMP_USE_FUTEX 2738 else if ((__kmp_user_lock_kind == lk_futex) && 2739 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2740 OMP_NEST_LOCK_T_SIZE)) { 2741 ; 2742 } 2743 #endif 2744 else { 2745 __kmp_user_lock_free(user_lock, gtid, lck); 2746 } 2747 #endif // KMP_USE_DYNAMIC_LOCK 2748 } // __kmpc_destroy_nest_lock 2749 2750 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2751 KMP_COUNT_BLOCK(OMP_set_lock); 2752 #if KMP_USE_DYNAMIC_LOCK 2753 int tag = KMP_EXTRACT_D_TAG(user_lock); 2754 #if USE_ITT_BUILD 2755 __kmp_itt_lock_acquiring( 2756 (kmp_user_lock_p) 2757 user_lock); // itt function will get to the right lock object. 2758 #endif 2759 #if OMPT_SUPPORT && OMPT_OPTIONAL 2760 // This is the case, if called from omp_init_lock_with_hint: 2761 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2762 if (!codeptr) 2763 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2764 if (ompt_enabled.ompt_callback_mutex_acquire) { 2765 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2766 ompt_mutex_lock, omp_lock_hint_none, 2767 __ompt_get_mutex_impl_type(user_lock), 2768 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2769 } 2770 #endif 2771 #if KMP_USE_INLINED_TAS 2772 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2773 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2774 } else 2775 #elif KMP_USE_INLINED_FUTEX 2776 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2777 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2778 } else 2779 #endif 2780 { 2781 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2782 } 2783 #if USE_ITT_BUILD 2784 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2785 #endif 2786 #if OMPT_SUPPORT && OMPT_OPTIONAL 2787 if (ompt_enabled.ompt_callback_mutex_acquired) { 2788 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2789 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2790 } 2791 #endif 2792 2793 #else // KMP_USE_DYNAMIC_LOCK 2794 2795 kmp_user_lock_p lck; 2796 2797 if ((__kmp_user_lock_kind == lk_tas) && 2798 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2799 lck = (kmp_user_lock_p)user_lock; 2800 } 2801 #if KMP_USE_FUTEX 2802 else if ((__kmp_user_lock_kind == lk_futex) && 2803 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2804 lck = (kmp_user_lock_p)user_lock; 2805 } 2806 #endif 2807 else { 2808 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2809 } 2810 2811 #if USE_ITT_BUILD 2812 __kmp_itt_lock_acquiring(lck); 2813 #endif /* USE_ITT_BUILD */ 2814 #if OMPT_SUPPORT && OMPT_OPTIONAL 2815 // This is the case, if called from omp_init_lock_with_hint: 2816 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2817 if (!codeptr) 2818 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2819 if (ompt_enabled.ompt_callback_mutex_acquire) { 2820 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2821 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2822 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2823 } 2824 #endif 2825 2826 ACQUIRE_LOCK(lck, gtid); 2827 2828 #if USE_ITT_BUILD 2829 __kmp_itt_lock_acquired(lck); 2830 #endif /* USE_ITT_BUILD */ 2831 2832 #if OMPT_SUPPORT && OMPT_OPTIONAL 2833 if (ompt_enabled.ompt_callback_mutex_acquired) { 2834 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2835 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2836 } 2837 #endif 2838 2839 #endif // KMP_USE_DYNAMIC_LOCK 2840 } 2841 2842 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2843 #if KMP_USE_DYNAMIC_LOCK 2844 2845 #if USE_ITT_BUILD 2846 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2847 #endif 2848 #if OMPT_SUPPORT && OMPT_OPTIONAL 2849 // This is the case, if called from omp_init_lock_with_hint: 2850 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2851 if (!codeptr) 2852 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2853 if (ompt_enabled.enabled) { 2854 if (ompt_enabled.ompt_callback_mutex_acquire) { 2855 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2856 ompt_mutex_nest_lock, omp_lock_hint_none, 2857 __ompt_get_mutex_impl_type(user_lock), 2858 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2859 } 2860 } 2861 #endif 2862 int acquire_status = 2863 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2864 (void)acquire_status; 2865 #if USE_ITT_BUILD 2866 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2867 #endif 2868 2869 #if OMPT_SUPPORT && OMPT_OPTIONAL 2870 if (ompt_enabled.enabled) { 2871 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2872 if (ompt_enabled.ompt_callback_mutex_acquired) { 2873 // lock_first 2874 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2875 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2876 codeptr); 2877 } 2878 } else { 2879 if (ompt_enabled.ompt_callback_nest_lock) { 2880 // lock_next 2881 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2882 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2883 } 2884 } 2885 } 2886 #endif 2887 2888 #else // KMP_USE_DYNAMIC_LOCK 2889 int acquire_status; 2890 kmp_user_lock_p lck; 2891 2892 if ((__kmp_user_lock_kind == lk_tas) && 2893 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2894 OMP_NEST_LOCK_T_SIZE)) { 2895 lck = (kmp_user_lock_p)user_lock; 2896 } 2897 #if KMP_USE_FUTEX 2898 else if ((__kmp_user_lock_kind == lk_futex) && 2899 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2900 OMP_NEST_LOCK_T_SIZE)) { 2901 lck = (kmp_user_lock_p)user_lock; 2902 } 2903 #endif 2904 else { 2905 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2906 } 2907 2908 #if USE_ITT_BUILD 2909 __kmp_itt_lock_acquiring(lck); 2910 #endif /* USE_ITT_BUILD */ 2911 #if OMPT_SUPPORT && OMPT_OPTIONAL 2912 // This is the case, if called from omp_init_lock_with_hint: 2913 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2914 if (!codeptr) 2915 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2916 if (ompt_enabled.enabled) { 2917 if (ompt_enabled.ompt_callback_mutex_acquire) { 2918 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2919 ompt_mutex_nest_lock, omp_lock_hint_none, 2920 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2921 codeptr); 2922 } 2923 } 2924 #endif 2925 2926 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2927 2928 #if USE_ITT_BUILD 2929 __kmp_itt_lock_acquired(lck); 2930 #endif /* USE_ITT_BUILD */ 2931 2932 #if OMPT_SUPPORT && OMPT_OPTIONAL 2933 if (ompt_enabled.enabled) { 2934 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2935 if (ompt_enabled.ompt_callback_mutex_acquired) { 2936 // lock_first 2937 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2938 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2939 } 2940 } else { 2941 if (ompt_enabled.ompt_callback_nest_lock) { 2942 // lock_next 2943 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2944 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2945 } 2946 } 2947 } 2948 #endif 2949 2950 #endif // KMP_USE_DYNAMIC_LOCK 2951 } 2952 2953 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2954 #if KMP_USE_DYNAMIC_LOCK 2955 2956 int tag = KMP_EXTRACT_D_TAG(user_lock); 2957 #if USE_ITT_BUILD 2958 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2959 #endif 2960 #if KMP_USE_INLINED_TAS 2961 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2962 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2963 } else 2964 #elif KMP_USE_INLINED_FUTEX 2965 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2966 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2967 } else 2968 #endif 2969 { 2970 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2971 } 2972 2973 #if OMPT_SUPPORT && OMPT_OPTIONAL 2974 // This is the case, if called from omp_init_lock_with_hint: 2975 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2976 if (!codeptr) 2977 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2978 if (ompt_enabled.ompt_callback_mutex_released) { 2979 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2980 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2981 } 2982 #endif 2983 2984 #else // KMP_USE_DYNAMIC_LOCK 2985 2986 kmp_user_lock_p lck; 2987 2988 /* Can't use serial interval since not block structured */ 2989 /* release the lock */ 2990 2991 if ((__kmp_user_lock_kind == lk_tas) && 2992 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2993 #if KMP_OS_LINUX && \ 2994 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2995 // "fast" path implemented to fix customer performance issue 2996 #if USE_ITT_BUILD 2997 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2998 #endif /* USE_ITT_BUILD */ 2999 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 3000 KMP_MB(); 3001 3002 #if OMPT_SUPPORT && OMPT_OPTIONAL 3003 // This is the case, if called from omp_init_lock_with_hint: 3004 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3005 if (!codeptr) 3006 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3007 if (ompt_enabled.ompt_callback_mutex_released) { 3008 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3009 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3010 } 3011 #endif 3012 3013 return; 3014 #else 3015 lck = (kmp_user_lock_p)user_lock; 3016 #endif 3017 } 3018 #if KMP_USE_FUTEX 3019 else if ((__kmp_user_lock_kind == lk_futex) && 3020 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3021 lck = (kmp_user_lock_p)user_lock; 3022 } 3023 #endif 3024 else { 3025 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 3026 } 3027 3028 #if USE_ITT_BUILD 3029 __kmp_itt_lock_releasing(lck); 3030 #endif /* USE_ITT_BUILD */ 3031 3032 RELEASE_LOCK(lck, gtid); 3033 3034 #if OMPT_SUPPORT && OMPT_OPTIONAL 3035 // This is the case, if called from omp_init_lock_with_hint: 3036 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3037 if (!codeptr) 3038 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3039 if (ompt_enabled.ompt_callback_mutex_released) { 3040 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3041 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3042 } 3043 #endif 3044 3045 #endif // KMP_USE_DYNAMIC_LOCK 3046 } 3047 3048 /* release the lock */ 3049 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3050 #if KMP_USE_DYNAMIC_LOCK 3051 3052 #if USE_ITT_BUILD 3053 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 3054 #endif 3055 int release_status = 3056 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 3057 (void)release_status; 3058 3059 #if OMPT_SUPPORT && OMPT_OPTIONAL 3060 // This is the case, if called from omp_init_lock_with_hint: 3061 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3062 if (!codeptr) 3063 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3064 if (ompt_enabled.enabled) { 3065 if (release_status == KMP_LOCK_RELEASED) { 3066 if (ompt_enabled.ompt_callback_mutex_released) { 3067 // release_lock_last 3068 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3069 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3070 codeptr); 3071 } 3072 } else if (ompt_enabled.ompt_callback_nest_lock) { 3073 // release_lock_prev 3074 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3075 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3076 } 3077 } 3078 #endif 3079 3080 #else // KMP_USE_DYNAMIC_LOCK 3081 3082 kmp_user_lock_p lck; 3083 3084 /* Can't use serial interval since not block structured */ 3085 3086 if ((__kmp_user_lock_kind == lk_tas) && 3087 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3088 OMP_NEST_LOCK_T_SIZE)) { 3089 #if KMP_OS_LINUX && \ 3090 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 3091 // "fast" path implemented to fix customer performance issue 3092 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 3093 #if USE_ITT_BUILD 3094 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 3095 #endif /* USE_ITT_BUILD */ 3096 3097 #if OMPT_SUPPORT && OMPT_OPTIONAL 3098 int release_status = KMP_LOCK_STILL_HELD; 3099 #endif 3100 3101 if (--(tl->lk.depth_locked) == 0) { 3102 TCW_4(tl->lk.poll, 0); 3103 #if OMPT_SUPPORT && OMPT_OPTIONAL 3104 release_status = KMP_LOCK_RELEASED; 3105 #endif 3106 } 3107 KMP_MB(); 3108 3109 #if OMPT_SUPPORT && OMPT_OPTIONAL 3110 // This is the case, if called from omp_init_lock_with_hint: 3111 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3112 if (!codeptr) 3113 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3114 if (ompt_enabled.enabled) { 3115 if (release_status == KMP_LOCK_RELEASED) { 3116 if (ompt_enabled.ompt_callback_mutex_released) { 3117 // release_lock_last 3118 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3119 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3120 } 3121 } else if (ompt_enabled.ompt_callback_nest_lock) { 3122 // release_lock_previous 3123 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3124 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3125 } 3126 } 3127 #endif 3128 3129 return; 3130 #else 3131 lck = (kmp_user_lock_p)user_lock; 3132 #endif 3133 } 3134 #if KMP_USE_FUTEX 3135 else if ((__kmp_user_lock_kind == lk_futex) && 3136 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3137 OMP_NEST_LOCK_T_SIZE)) { 3138 lck = (kmp_user_lock_p)user_lock; 3139 } 3140 #endif 3141 else { 3142 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 3143 } 3144 3145 #if USE_ITT_BUILD 3146 __kmp_itt_lock_releasing(lck); 3147 #endif /* USE_ITT_BUILD */ 3148 3149 int release_status; 3150 release_status = RELEASE_NESTED_LOCK(lck, gtid); 3151 #if OMPT_SUPPORT && OMPT_OPTIONAL 3152 // This is the case, if called from omp_init_lock_with_hint: 3153 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3154 if (!codeptr) 3155 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3156 if (ompt_enabled.enabled) { 3157 if (release_status == KMP_LOCK_RELEASED) { 3158 if (ompt_enabled.ompt_callback_mutex_released) { 3159 // release_lock_last 3160 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3161 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3162 } 3163 } else if (ompt_enabled.ompt_callback_nest_lock) { 3164 // release_lock_previous 3165 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3166 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3167 } 3168 } 3169 #endif 3170 3171 #endif // KMP_USE_DYNAMIC_LOCK 3172 } 3173 3174 /* try to acquire the lock */ 3175 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3176 KMP_COUNT_BLOCK(OMP_test_lock); 3177 3178 #if KMP_USE_DYNAMIC_LOCK 3179 int rc; 3180 int tag = KMP_EXTRACT_D_TAG(user_lock); 3181 #if USE_ITT_BUILD 3182 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3183 #endif 3184 #if OMPT_SUPPORT && OMPT_OPTIONAL 3185 // This is the case, if called from omp_init_lock_with_hint: 3186 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3187 if (!codeptr) 3188 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3189 if (ompt_enabled.ompt_callback_mutex_acquire) { 3190 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3191 ompt_mutex_test_lock, omp_lock_hint_none, 3192 __ompt_get_mutex_impl_type(user_lock), 3193 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3194 } 3195 #endif 3196 #if KMP_USE_INLINED_TAS 3197 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3198 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3199 } else 3200 #elif KMP_USE_INLINED_FUTEX 3201 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3202 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3203 } else 3204 #endif 3205 { 3206 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3207 } 3208 if (rc) { 3209 #if USE_ITT_BUILD 3210 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3211 #endif 3212 #if OMPT_SUPPORT && OMPT_OPTIONAL 3213 if (ompt_enabled.ompt_callback_mutex_acquired) { 3214 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3215 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3216 } 3217 #endif 3218 return FTN_TRUE; 3219 } else { 3220 #if USE_ITT_BUILD 3221 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3222 #endif 3223 return FTN_FALSE; 3224 } 3225 3226 #else // KMP_USE_DYNAMIC_LOCK 3227 3228 kmp_user_lock_p lck; 3229 int rc; 3230 3231 if ((__kmp_user_lock_kind == lk_tas) && 3232 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3233 lck = (kmp_user_lock_p)user_lock; 3234 } 3235 #if KMP_USE_FUTEX 3236 else if ((__kmp_user_lock_kind == lk_futex) && 3237 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3238 lck = (kmp_user_lock_p)user_lock; 3239 } 3240 #endif 3241 else { 3242 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3243 } 3244 3245 #if USE_ITT_BUILD 3246 __kmp_itt_lock_acquiring(lck); 3247 #endif /* USE_ITT_BUILD */ 3248 #if OMPT_SUPPORT && OMPT_OPTIONAL 3249 // This is the case, if called from omp_init_lock_with_hint: 3250 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3251 if (!codeptr) 3252 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3253 if (ompt_enabled.ompt_callback_mutex_acquire) { 3254 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3255 ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3256 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3257 } 3258 #endif 3259 3260 rc = TEST_LOCK(lck, gtid); 3261 #if USE_ITT_BUILD 3262 if (rc) { 3263 __kmp_itt_lock_acquired(lck); 3264 } else { 3265 __kmp_itt_lock_cancelled(lck); 3266 } 3267 #endif /* USE_ITT_BUILD */ 3268 #if OMPT_SUPPORT && OMPT_OPTIONAL 3269 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3270 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3271 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3272 } 3273 #endif 3274 3275 return (rc ? FTN_TRUE : FTN_FALSE); 3276 3277 /* Can't use serial interval since not block structured */ 3278 3279 #endif // KMP_USE_DYNAMIC_LOCK 3280 } 3281 3282 /* try to acquire the lock */ 3283 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3284 #if KMP_USE_DYNAMIC_LOCK 3285 int rc; 3286 #if USE_ITT_BUILD 3287 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3288 #endif 3289 #if OMPT_SUPPORT && OMPT_OPTIONAL 3290 // This is the case, if called from omp_init_lock_with_hint: 3291 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3292 if (!codeptr) 3293 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3294 if (ompt_enabled.ompt_callback_mutex_acquire) { 3295 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3296 ompt_mutex_test_nest_lock, omp_lock_hint_none, 3297 __ompt_get_mutex_impl_type(user_lock), 3298 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3299 } 3300 #endif 3301 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3302 #if USE_ITT_BUILD 3303 if (rc) { 3304 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3305 } else { 3306 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3307 } 3308 #endif 3309 #if OMPT_SUPPORT && OMPT_OPTIONAL 3310 if (ompt_enabled.enabled && rc) { 3311 if (rc == 1) { 3312 if (ompt_enabled.ompt_callback_mutex_acquired) { 3313 // lock_first 3314 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3315 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3316 codeptr); 3317 } 3318 } else { 3319 if (ompt_enabled.ompt_callback_nest_lock) { 3320 // lock_next 3321 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3322 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3323 } 3324 } 3325 } 3326 #endif 3327 return rc; 3328 3329 #else // KMP_USE_DYNAMIC_LOCK 3330 3331 kmp_user_lock_p lck; 3332 int rc; 3333 3334 if ((__kmp_user_lock_kind == lk_tas) && 3335 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3336 OMP_NEST_LOCK_T_SIZE)) { 3337 lck = (kmp_user_lock_p)user_lock; 3338 } 3339 #if KMP_USE_FUTEX 3340 else if ((__kmp_user_lock_kind == lk_futex) && 3341 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3342 OMP_NEST_LOCK_T_SIZE)) { 3343 lck = (kmp_user_lock_p)user_lock; 3344 } 3345 #endif 3346 else { 3347 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3348 } 3349 3350 #if USE_ITT_BUILD 3351 __kmp_itt_lock_acquiring(lck); 3352 #endif /* USE_ITT_BUILD */ 3353 3354 #if OMPT_SUPPORT && OMPT_OPTIONAL 3355 // This is the case, if called from omp_init_lock_with_hint: 3356 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3357 if (!codeptr) 3358 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3359 if (ompt_enabled.enabled) && 3360 ompt_enabled.ompt_callback_mutex_acquire) { 3361 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3362 ompt_mutex_test_nest_lock, omp_lock_hint_none, 3363 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3364 codeptr); 3365 } 3366 #endif 3367 3368 rc = TEST_NESTED_LOCK(lck, gtid); 3369 #if USE_ITT_BUILD 3370 if (rc) { 3371 __kmp_itt_lock_acquired(lck); 3372 } else { 3373 __kmp_itt_lock_cancelled(lck); 3374 } 3375 #endif /* USE_ITT_BUILD */ 3376 #if OMPT_SUPPORT && OMPT_OPTIONAL 3377 if (ompt_enabled.enabled && rc) { 3378 if (rc == 1) { 3379 if (ompt_enabled.ompt_callback_mutex_acquired) { 3380 // lock_first 3381 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3382 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3383 } 3384 } else { 3385 if (ompt_enabled.ompt_callback_nest_lock) { 3386 // lock_next 3387 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3388 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3389 } 3390 } 3391 } 3392 #endif 3393 return rc; 3394 3395 /* Can't use serial interval since not block structured */ 3396 3397 #endif // KMP_USE_DYNAMIC_LOCK 3398 } 3399 3400 // Interface to fast scalable reduce methods routines 3401 3402 // keep the selected method in a thread local structure for cross-function 3403 // usage: will be used in __kmpc_end_reduce* functions; 3404 // another solution: to re-determine the method one more time in 3405 // __kmpc_end_reduce* functions (new prototype required then) 3406 // AT: which solution is better? 3407 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3408 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3409 3410 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3411 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3412 3413 // description of the packed_reduction_method variable: look at the macros in 3414 // kmp.h 3415 3416 // used in a critical section reduce block 3417 static __forceinline void 3418 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3419 kmp_critical_name *crit) { 3420 3421 // this lock was visible to a customer and to the threading profile tool as a 3422 // serial overhead span (although it's used for an internal purpose only) 3423 // why was it visible in previous implementation? 3424 // should we keep it visible in new reduce block? 3425 kmp_user_lock_p lck; 3426 3427 #if KMP_USE_DYNAMIC_LOCK 3428 3429 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3430 // Check if it is initialized. 3431 if (*lk == 0) { 3432 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3433 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3434 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3435 } else { 3436 __kmp_init_indirect_csptr(crit, loc, global_tid, 3437 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3438 } 3439 } 3440 // Branch for accessing the actual lock object and set operation. This 3441 // branching is inevitable since this lock initialization does not follow the 3442 // normal dispatch path (lock table is not used). 3443 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3444 lck = (kmp_user_lock_p)lk; 3445 KMP_DEBUG_ASSERT(lck != NULL); 3446 if (__kmp_env_consistency_check) { 3447 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3448 } 3449 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3450 } else { 3451 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3452 lck = ilk->lock; 3453 KMP_DEBUG_ASSERT(lck != NULL); 3454 if (__kmp_env_consistency_check) { 3455 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3456 } 3457 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3458 } 3459 3460 #else // KMP_USE_DYNAMIC_LOCK 3461 3462 // We know that the fast reduction code is only emitted by Intel compilers 3463 // with 32 byte critical sections. If there isn't enough space, then we 3464 // have to use a pointer. 3465 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3466 lck = (kmp_user_lock_p)crit; 3467 } else { 3468 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3469 } 3470 KMP_DEBUG_ASSERT(lck != NULL); 3471 3472 if (__kmp_env_consistency_check) 3473 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3474 3475 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3476 3477 #endif // KMP_USE_DYNAMIC_LOCK 3478 } 3479 3480 // used in a critical section reduce block 3481 static __forceinline void 3482 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3483 kmp_critical_name *crit) { 3484 3485 kmp_user_lock_p lck; 3486 3487 #if KMP_USE_DYNAMIC_LOCK 3488 3489 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3490 lck = (kmp_user_lock_p)crit; 3491 if (__kmp_env_consistency_check) 3492 __kmp_pop_sync(global_tid, ct_critical, loc); 3493 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3494 } else { 3495 kmp_indirect_lock_t *ilk = 3496 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3497 if (__kmp_env_consistency_check) 3498 __kmp_pop_sync(global_tid, ct_critical, loc); 3499 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3500 } 3501 3502 #else // KMP_USE_DYNAMIC_LOCK 3503 3504 // We know that the fast reduction code is only emitted by Intel compilers 3505 // with 32 byte critical sections. If there isn't enough space, then we have 3506 // to use a pointer. 3507 if (__kmp_base_user_lock_size > 32) { 3508 lck = *((kmp_user_lock_p *)crit); 3509 KMP_ASSERT(lck != NULL); 3510 } else { 3511 lck = (kmp_user_lock_p)crit; 3512 } 3513 3514 if (__kmp_env_consistency_check) 3515 __kmp_pop_sync(global_tid, ct_critical, loc); 3516 3517 __kmp_release_user_lock_with_checks(lck, global_tid); 3518 3519 #endif // KMP_USE_DYNAMIC_LOCK 3520 } // __kmp_end_critical_section_reduce_block 3521 3522 static __forceinline int 3523 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3524 int *task_state) { 3525 kmp_team_t *team; 3526 3527 // Check if we are inside the teams construct? 3528 if (th->th.th_teams_microtask) { 3529 *team_p = team = th->th.th_team; 3530 if (team->t.t_level == th->th.th_teams_level) { 3531 // This is reduction at teams construct. 3532 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3533 // Let's swap teams temporarily for the reduction. 3534 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3535 th->th.th_team = team->t.t_parent; 3536 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3537 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3538 *task_state = th->th.th_task_state; 3539 th->th.th_task_state = 0; 3540 3541 return 1; 3542 } 3543 } 3544 return 0; 3545 } 3546 3547 static __forceinline void 3548 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3549 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3550 th->th.th_info.ds.ds_tid = 0; 3551 th->th.th_team = team; 3552 th->th.th_team_nproc = team->t.t_nproc; 3553 th->th.th_task_team = team->t.t_task_team[task_state]; 3554 __kmp_type_convert(task_state, &(th->th.th_task_state)); 3555 } 3556 3557 /* 2.a.i. Reduce Block without a terminating barrier */ 3558 /*! 3559 @ingroup SYNCHRONIZATION 3560 @param loc source location information 3561 @param global_tid global thread number 3562 @param num_vars number of items (variables) to be reduced 3563 @param reduce_size size of data in bytes to be reduced 3564 @param reduce_data pointer to data to be reduced 3565 @param reduce_func callback function providing reduction operation on two 3566 operands and returning result of reduction in lhs_data 3567 @param lck pointer to the unique lock data structure 3568 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3569 threads if atomic reduction needed 3570 3571 The nowait version is used for a reduce clause with the nowait argument. 3572 */ 3573 kmp_int32 3574 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3575 size_t reduce_size, void *reduce_data, 3576 void (*reduce_func)(void *lhs_data, void *rhs_data), 3577 kmp_critical_name *lck) { 3578 3579 KMP_COUNT_BLOCK(REDUCE_nowait); 3580 int retval = 0; 3581 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3582 kmp_info_t *th; 3583 kmp_team_t *team; 3584 int teams_swapped = 0, task_state; 3585 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3586 __kmp_assert_valid_gtid(global_tid); 3587 3588 // why do we need this initialization here at all? 3589 // Reduction clause can not be used as a stand-alone directive. 3590 3591 // do not call __kmp_serial_initialize(), it will be called by 3592 // __kmp_parallel_initialize() if needed 3593 // possible detection of false-positive race by the threadchecker ??? 3594 if (!TCR_4(__kmp_init_parallel)) 3595 __kmp_parallel_initialize(); 3596 3597 __kmp_resume_if_soft_paused(); 3598 3599 // check correctness of reduce block nesting 3600 #if KMP_USE_DYNAMIC_LOCK 3601 if (__kmp_env_consistency_check) 3602 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3603 #else 3604 if (__kmp_env_consistency_check) 3605 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3606 #endif 3607 3608 th = __kmp_thread_from_gtid(global_tid); 3609 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3610 3611 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3612 // the value should be kept in a variable 3613 // the variable should be either a construct-specific or thread-specific 3614 // property, not a team specific property 3615 // (a thread can reach the next reduce block on the next construct, reduce 3616 // method may differ on the next construct) 3617 // an ident_t "loc" parameter could be used as a construct-specific property 3618 // (what if loc == 0?) 3619 // (if both construct-specific and team-specific variables were shared, 3620 // then unness extra syncs should be needed) 3621 // a thread-specific variable is better regarding two issues above (next 3622 // construct and extra syncs) 3623 // a thread-specific "th_local.reduction_method" variable is used currently 3624 // each thread executes 'determine' and 'set' lines (no need to execute by one 3625 // thread, to avoid unness extra syncs) 3626 3627 packed_reduction_method = __kmp_determine_reduction_method( 3628 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3629 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3630 3631 OMPT_REDUCTION_DECL(th, global_tid); 3632 if (packed_reduction_method == critical_reduce_block) { 3633 3634 OMPT_REDUCTION_BEGIN; 3635 3636 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3637 retval = 1; 3638 3639 } else if (packed_reduction_method == empty_reduce_block) { 3640 3641 OMPT_REDUCTION_BEGIN; 3642 3643 // usage: if team size == 1, no synchronization is required ( Intel 3644 // platforms only ) 3645 retval = 1; 3646 3647 } else if (packed_reduction_method == atomic_reduce_block) { 3648 3649 retval = 2; 3650 3651 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3652 // won't be called by the code gen) 3653 // (it's not quite good, because the checking block has been closed by 3654 // this 'pop', 3655 // but atomic operation has not been executed yet, will be executed 3656 // slightly later, literally on next instruction) 3657 if (__kmp_env_consistency_check) 3658 __kmp_pop_sync(global_tid, ct_reduce, loc); 3659 3660 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3661 tree_reduce_block)) { 3662 3663 // AT: performance issue: a real barrier here 3664 // AT: (if primary thread is slow, other threads are blocked here waiting for 3665 // the primary thread to come and release them) 3666 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3667 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3668 // be confusing to a customer) 3669 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3670 // might go faster and be more in line with sense of NOWAIT 3671 // AT: TO DO: do epcc test and compare times 3672 3673 // this barrier should be invisible to a customer and to the threading profile 3674 // tool (it's neither a terminating barrier nor customer's code, it's 3675 // used for an internal purpose) 3676 #if OMPT_SUPPORT 3677 // JP: can this barrier potentially leed to task scheduling? 3678 // JP: as long as there is a barrier in the implementation, OMPT should and 3679 // will provide the barrier events 3680 // so we set-up the necessary frame/return addresses. 3681 ompt_frame_t *ompt_frame; 3682 if (ompt_enabled.enabled) { 3683 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3684 if (ompt_frame->enter_frame.ptr == NULL) 3685 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3686 } 3687 OMPT_STORE_RETURN_ADDRESS(global_tid); 3688 #endif 3689 #if USE_ITT_NOTIFY 3690 __kmp_threads[global_tid]->th.th_ident = loc; 3691 #endif 3692 retval = 3693 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3694 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3695 retval = (retval != 0) ? (0) : (1); 3696 #if OMPT_SUPPORT && OMPT_OPTIONAL 3697 if (ompt_enabled.enabled) { 3698 ompt_frame->enter_frame = ompt_data_none; 3699 } 3700 #endif 3701 3702 // all other workers except primary thread should do this pop here 3703 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3704 if (__kmp_env_consistency_check) { 3705 if (retval == 0) { 3706 __kmp_pop_sync(global_tid, ct_reduce, loc); 3707 } 3708 } 3709 3710 } else { 3711 3712 // should never reach this block 3713 KMP_ASSERT(0); // "unexpected method" 3714 } 3715 if (teams_swapped) { 3716 __kmp_restore_swapped_teams(th, team, task_state); 3717 } 3718 KA_TRACE( 3719 10, 3720 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3721 global_tid, packed_reduction_method, retval)); 3722 3723 return retval; 3724 } 3725 3726 /*! 3727 @ingroup SYNCHRONIZATION 3728 @param loc source location information 3729 @param global_tid global thread id. 3730 @param lck pointer to the unique lock data structure 3731 3732 Finish the execution of a reduce nowait. 3733 */ 3734 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3735 kmp_critical_name *lck) { 3736 3737 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3738 3739 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3740 __kmp_assert_valid_gtid(global_tid); 3741 3742 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3743 3744 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3745 3746 if (packed_reduction_method == critical_reduce_block) { 3747 3748 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3749 OMPT_REDUCTION_END; 3750 3751 } else if (packed_reduction_method == empty_reduce_block) { 3752 3753 // usage: if team size == 1, no synchronization is required ( on Intel 3754 // platforms only ) 3755 3756 OMPT_REDUCTION_END; 3757 3758 } else if (packed_reduction_method == atomic_reduce_block) { 3759 3760 // neither primary thread nor other workers should get here 3761 // (code gen does not generate this call in case 2: atomic reduce block) 3762 // actually it's better to remove this elseif at all; 3763 // after removal this value will checked by the 'else' and will assert 3764 3765 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3766 tree_reduce_block)) { 3767 3768 // only primary thread gets here 3769 // OMPT: tree reduction is annotated in the barrier code 3770 3771 } else { 3772 3773 // should never reach this block 3774 KMP_ASSERT(0); // "unexpected method" 3775 } 3776 3777 if (__kmp_env_consistency_check) 3778 __kmp_pop_sync(global_tid, ct_reduce, loc); 3779 3780 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3781 global_tid, packed_reduction_method)); 3782 3783 return; 3784 } 3785 3786 /* 2.a.ii. Reduce Block with a terminating barrier */ 3787 3788 /*! 3789 @ingroup SYNCHRONIZATION 3790 @param loc source location information 3791 @param global_tid global thread number 3792 @param num_vars number of items (variables) to be reduced 3793 @param reduce_size size of data in bytes to be reduced 3794 @param reduce_data pointer to data to be reduced 3795 @param reduce_func callback function providing reduction operation on two 3796 operands and returning result of reduction in lhs_data 3797 @param lck pointer to the unique lock data structure 3798 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3799 threads if atomic reduction needed 3800 3801 A blocking reduce that includes an implicit barrier. 3802 */ 3803 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3804 size_t reduce_size, void *reduce_data, 3805 void (*reduce_func)(void *lhs_data, void *rhs_data), 3806 kmp_critical_name *lck) { 3807 KMP_COUNT_BLOCK(REDUCE_wait); 3808 int retval = 0; 3809 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3810 kmp_info_t *th; 3811 kmp_team_t *team; 3812 int teams_swapped = 0, task_state; 3813 3814 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3815 __kmp_assert_valid_gtid(global_tid); 3816 3817 // why do we need this initialization here at all? 3818 // Reduction clause can not be a stand-alone directive. 3819 3820 // do not call __kmp_serial_initialize(), it will be called by 3821 // __kmp_parallel_initialize() if needed 3822 // possible detection of false-positive race by the threadchecker ??? 3823 if (!TCR_4(__kmp_init_parallel)) 3824 __kmp_parallel_initialize(); 3825 3826 __kmp_resume_if_soft_paused(); 3827 3828 // check correctness of reduce block nesting 3829 #if KMP_USE_DYNAMIC_LOCK 3830 if (__kmp_env_consistency_check) 3831 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3832 #else 3833 if (__kmp_env_consistency_check) 3834 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3835 #endif 3836 3837 th = __kmp_thread_from_gtid(global_tid); 3838 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3839 3840 packed_reduction_method = __kmp_determine_reduction_method( 3841 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3842 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3843 3844 OMPT_REDUCTION_DECL(th, global_tid); 3845 3846 if (packed_reduction_method == critical_reduce_block) { 3847 3848 OMPT_REDUCTION_BEGIN; 3849 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3850 retval = 1; 3851 3852 } else if (packed_reduction_method == empty_reduce_block) { 3853 3854 OMPT_REDUCTION_BEGIN; 3855 // usage: if team size == 1, no synchronization is required ( Intel 3856 // platforms only ) 3857 retval = 1; 3858 3859 } else if (packed_reduction_method == atomic_reduce_block) { 3860 3861 retval = 2; 3862 3863 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3864 tree_reduce_block)) { 3865 3866 // case tree_reduce_block: 3867 // this barrier should be visible to a customer and to the threading profile 3868 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3869 #if OMPT_SUPPORT 3870 ompt_frame_t *ompt_frame; 3871 if (ompt_enabled.enabled) { 3872 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3873 if (ompt_frame->enter_frame.ptr == NULL) 3874 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3875 } 3876 OMPT_STORE_RETURN_ADDRESS(global_tid); 3877 #endif 3878 #if USE_ITT_NOTIFY 3879 __kmp_threads[global_tid]->th.th_ident = 3880 loc; // needed for correct notification of frames 3881 #endif 3882 retval = 3883 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3884 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3885 retval = (retval != 0) ? (0) : (1); 3886 #if OMPT_SUPPORT && OMPT_OPTIONAL 3887 if (ompt_enabled.enabled) { 3888 ompt_frame->enter_frame = ompt_data_none; 3889 } 3890 #endif 3891 3892 // all other workers except primary thread should do this pop here 3893 // (none of other workers except primary will enter __kmpc_end_reduce()) 3894 if (__kmp_env_consistency_check) { 3895 if (retval == 0) { // 0: all other workers; 1: primary thread 3896 __kmp_pop_sync(global_tid, ct_reduce, loc); 3897 } 3898 } 3899 3900 } else { 3901 3902 // should never reach this block 3903 KMP_ASSERT(0); // "unexpected method" 3904 } 3905 if (teams_swapped) { 3906 __kmp_restore_swapped_teams(th, team, task_state); 3907 } 3908 3909 KA_TRACE(10, 3910 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3911 global_tid, packed_reduction_method, retval)); 3912 return retval; 3913 } 3914 3915 /*! 3916 @ingroup SYNCHRONIZATION 3917 @param loc source location information 3918 @param global_tid global thread id. 3919 @param lck pointer to the unique lock data structure 3920 3921 Finish the execution of a blocking reduce. 3922 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3923 start function. 3924 */ 3925 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3926 kmp_critical_name *lck) { 3927 3928 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3929 kmp_info_t *th; 3930 kmp_team_t *team; 3931 int teams_swapped = 0, task_state; 3932 3933 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3934 __kmp_assert_valid_gtid(global_tid); 3935 3936 th = __kmp_thread_from_gtid(global_tid); 3937 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3938 3939 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3940 3941 // this barrier should be visible to a customer and to the threading profile 3942 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3943 OMPT_REDUCTION_DECL(th, global_tid); 3944 3945 if (packed_reduction_method == critical_reduce_block) { 3946 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3947 3948 OMPT_REDUCTION_END; 3949 3950 // TODO: implicit barrier: should be exposed 3951 #if OMPT_SUPPORT 3952 ompt_frame_t *ompt_frame; 3953 if (ompt_enabled.enabled) { 3954 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3955 if (ompt_frame->enter_frame.ptr == NULL) 3956 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3957 } 3958 OMPT_STORE_RETURN_ADDRESS(global_tid); 3959 #endif 3960 #if USE_ITT_NOTIFY 3961 __kmp_threads[global_tid]->th.th_ident = loc; 3962 #endif 3963 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3964 #if OMPT_SUPPORT && OMPT_OPTIONAL 3965 if (ompt_enabled.enabled) { 3966 ompt_frame->enter_frame = ompt_data_none; 3967 } 3968 #endif 3969 3970 } else if (packed_reduction_method == empty_reduce_block) { 3971 3972 OMPT_REDUCTION_END; 3973 3974 // usage: if team size==1, no synchronization is required (Intel platforms only) 3975 3976 // TODO: implicit barrier: should be exposed 3977 #if OMPT_SUPPORT 3978 ompt_frame_t *ompt_frame; 3979 if (ompt_enabled.enabled) { 3980 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3981 if (ompt_frame->enter_frame.ptr == NULL) 3982 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3983 } 3984 OMPT_STORE_RETURN_ADDRESS(global_tid); 3985 #endif 3986 #if USE_ITT_NOTIFY 3987 __kmp_threads[global_tid]->th.th_ident = loc; 3988 #endif 3989 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3990 #if OMPT_SUPPORT && OMPT_OPTIONAL 3991 if (ompt_enabled.enabled) { 3992 ompt_frame->enter_frame = ompt_data_none; 3993 } 3994 #endif 3995 3996 } else if (packed_reduction_method == atomic_reduce_block) { 3997 3998 #if OMPT_SUPPORT 3999 ompt_frame_t *ompt_frame; 4000 if (ompt_enabled.enabled) { 4001 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 4002 if (ompt_frame->enter_frame.ptr == NULL) 4003 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 4004 } 4005 OMPT_STORE_RETURN_ADDRESS(global_tid); 4006 #endif 4007 // TODO: implicit barrier: should be exposed 4008 #if USE_ITT_NOTIFY 4009 __kmp_threads[global_tid]->th.th_ident = loc; 4010 #endif 4011 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 4012 #if OMPT_SUPPORT && OMPT_OPTIONAL 4013 if (ompt_enabled.enabled) { 4014 ompt_frame->enter_frame = ompt_data_none; 4015 } 4016 #endif 4017 4018 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 4019 tree_reduce_block)) { 4020 4021 // only primary thread executes here (primary releases all other workers) 4022 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 4023 global_tid); 4024 4025 } else { 4026 4027 // should never reach this block 4028 KMP_ASSERT(0); // "unexpected method" 4029 } 4030 if (teams_swapped) { 4031 __kmp_restore_swapped_teams(th, team, task_state); 4032 } 4033 4034 if (__kmp_env_consistency_check) 4035 __kmp_pop_sync(global_tid, ct_reduce, loc); 4036 4037 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 4038 global_tid, packed_reduction_method)); 4039 4040 return; 4041 } 4042 4043 #undef __KMP_GET_REDUCTION_METHOD 4044 #undef __KMP_SET_REDUCTION_METHOD 4045 4046 /* end of interface to fast scalable reduce routines */ 4047 4048 kmp_uint64 __kmpc_get_taskid() { 4049 4050 kmp_int32 gtid; 4051 kmp_info_t *thread; 4052 4053 gtid = __kmp_get_gtid(); 4054 if (gtid < 0) { 4055 return 0; 4056 } 4057 thread = __kmp_thread_from_gtid(gtid); 4058 return thread->th.th_current_task->td_task_id; 4059 4060 } // __kmpc_get_taskid 4061 4062 kmp_uint64 __kmpc_get_parent_taskid() { 4063 4064 kmp_int32 gtid; 4065 kmp_info_t *thread; 4066 kmp_taskdata_t *parent_task; 4067 4068 gtid = __kmp_get_gtid(); 4069 if (gtid < 0) { 4070 return 0; 4071 } 4072 thread = __kmp_thread_from_gtid(gtid); 4073 parent_task = thread->th.th_current_task->td_parent; 4074 return (parent_task == NULL ? 0 : parent_task->td_task_id); 4075 4076 } // __kmpc_get_parent_taskid 4077 4078 /*! 4079 @ingroup WORK_SHARING 4080 @param loc source location information. 4081 @param gtid global thread number. 4082 @param num_dims number of associated doacross loops. 4083 @param dims info on loops bounds. 4084 4085 Initialize doacross loop information. 4086 Expect compiler send us inclusive bounds, 4087 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 4088 */ 4089 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 4090 const struct kmp_dim *dims) { 4091 __kmp_assert_valid_gtid(gtid); 4092 int j, idx; 4093 kmp_int64 last, trace_count; 4094 kmp_info_t *th = __kmp_threads[gtid]; 4095 kmp_team_t *team = th->th.th_team; 4096 kmp_uint32 *flags; 4097 kmp_disp_t *pr_buf = th->th.th_dispatch; 4098 dispatch_shared_info_t *sh_buf; 4099 4100 KA_TRACE( 4101 20, 4102 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 4103 gtid, num_dims, !team->t.t_serialized)); 4104 KMP_DEBUG_ASSERT(dims != NULL); 4105 KMP_DEBUG_ASSERT(num_dims > 0); 4106 4107 if (team->t.t_serialized) { 4108 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 4109 return; // no dependencies if team is serialized 4110 } 4111 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 4112 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 4113 // the next loop 4114 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4115 4116 // Save bounds info into allocated private buffer 4117 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 4118 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 4119 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 4120 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4121 pr_buf->th_doacross_info[0] = 4122 (kmp_int64)num_dims; // first element is number of dimensions 4123 // Save also address of num_done in order to access it later without knowing 4124 // the buffer index 4125 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 4126 pr_buf->th_doacross_info[2] = dims[0].lo; 4127 pr_buf->th_doacross_info[3] = dims[0].up; 4128 pr_buf->th_doacross_info[4] = dims[0].st; 4129 last = 5; 4130 for (j = 1; j < num_dims; ++j) { 4131 kmp_int64 4132 range_length; // To keep ranges of all dimensions but the first dims[0] 4133 if (dims[j].st == 1) { // most common case 4134 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 4135 range_length = dims[j].up - dims[j].lo + 1; 4136 } else { 4137 if (dims[j].st > 0) { 4138 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 4139 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 4140 } else { // negative increment 4141 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 4142 range_length = 4143 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 4144 } 4145 } 4146 pr_buf->th_doacross_info[last++] = range_length; 4147 pr_buf->th_doacross_info[last++] = dims[j].lo; 4148 pr_buf->th_doacross_info[last++] = dims[j].up; 4149 pr_buf->th_doacross_info[last++] = dims[j].st; 4150 } 4151 4152 // Compute total trip count. 4153 // Start with range of dims[0] which we don't need to keep in the buffer. 4154 if (dims[0].st == 1) { // most common case 4155 trace_count = dims[0].up - dims[0].lo + 1; 4156 } else if (dims[0].st > 0) { 4157 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 4158 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 4159 } else { // negative increment 4160 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 4161 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 4162 } 4163 for (j = 1; j < num_dims; ++j) { 4164 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 4165 } 4166 KMP_DEBUG_ASSERT(trace_count > 0); 4167 4168 // Check if shared buffer is not occupied by other loop (idx - 4169 // __kmp_dispatch_num_buffers) 4170 if (idx != sh_buf->doacross_buf_idx) { 4171 // Shared buffer is occupied, wait for it to be free 4172 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 4173 __kmp_eq_4, NULL); 4174 } 4175 #if KMP_32_BIT_ARCH 4176 // Check if we are the first thread. After the CAS the first thread gets 0, 4177 // others get 1 if initialization is in progress, allocated pointer otherwise. 4178 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 4179 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 4180 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 4181 #else 4182 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 4183 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 4184 #endif 4185 if (flags == NULL) { 4186 // we are the first thread, allocate the array of flags 4187 size_t size = 4188 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration 4189 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 4190 KMP_MB(); 4191 sh_buf->doacross_flags = flags; 4192 } else if (flags == (kmp_uint32 *)1) { 4193 #if KMP_32_BIT_ARCH 4194 // initialization is still in progress, need to wait 4195 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 4196 #else 4197 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 4198 #endif 4199 KMP_YIELD(TRUE); 4200 KMP_MB(); 4201 } else { 4202 KMP_MB(); 4203 } 4204 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4205 pr_buf->th_doacross_flags = 4206 sh_buf->doacross_flags; // save private copy in order to not 4207 // touch shared buffer on each iteration 4208 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4209 } 4210 4211 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4212 __kmp_assert_valid_gtid(gtid); 4213 kmp_int64 shft; 4214 size_t num_dims, i; 4215 kmp_uint32 flag; 4216 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4217 kmp_info_t *th = __kmp_threads[gtid]; 4218 kmp_team_t *team = th->th.th_team; 4219 kmp_disp_t *pr_buf; 4220 kmp_int64 lo, up, st; 4221 4222 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4223 if (team->t.t_serialized) { 4224 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4225 return; // no dependencies if team is serialized 4226 } 4227 4228 // calculate sequential iteration number and check out-of-bounds condition 4229 pr_buf = th->th.th_dispatch; 4230 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4231 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4232 lo = pr_buf->th_doacross_info[2]; 4233 up = pr_buf->th_doacross_info[3]; 4234 st = pr_buf->th_doacross_info[4]; 4235 #if OMPT_SUPPORT && OMPT_OPTIONAL 4236 ompt_dependence_t deps[num_dims]; 4237 #endif 4238 if (st == 1) { // most common case 4239 if (vec[0] < lo || vec[0] > up) { 4240 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4241 "bounds [%lld,%lld]\n", 4242 gtid, vec[0], lo, up)); 4243 return; 4244 } 4245 iter_number = vec[0] - lo; 4246 } else if (st > 0) { 4247 if (vec[0] < lo || vec[0] > up) { 4248 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4249 "bounds [%lld,%lld]\n", 4250 gtid, vec[0], lo, up)); 4251 return; 4252 } 4253 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4254 } else { // negative increment 4255 if (vec[0] > lo || vec[0] < up) { 4256 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4257 "bounds [%lld,%lld]\n", 4258 gtid, vec[0], lo, up)); 4259 return; 4260 } 4261 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4262 } 4263 #if OMPT_SUPPORT && OMPT_OPTIONAL 4264 deps[0].variable.value = iter_number; 4265 deps[0].dependence_type = ompt_dependence_type_sink; 4266 #endif 4267 for (i = 1; i < num_dims; ++i) { 4268 kmp_int64 iter, ln; 4269 size_t j = i * 4; 4270 ln = pr_buf->th_doacross_info[j + 1]; 4271 lo = pr_buf->th_doacross_info[j + 2]; 4272 up = pr_buf->th_doacross_info[j + 3]; 4273 st = pr_buf->th_doacross_info[j + 4]; 4274 if (st == 1) { 4275 if (vec[i] < lo || vec[i] > up) { 4276 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4277 "bounds [%lld,%lld]\n", 4278 gtid, vec[i], lo, up)); 4279 return; 4280 } 4281 iter = vec[i] - lo; 4282 } else if (st > 0) { 4283 if (vec[i] < lo || vec[i] > up) { 4284 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4285 "bounds [%lld,%lld]\n", 4286 gtid, vec[i], lo, up)); 4287 return; 4288 } 4289 iter = (kmp_uint64)(vec[i] - lo) / st; 4290 } else { // st < 0 4291 if (vec[i] > lo || vec[i] < up) { 4292 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4293 "bounds [%lld,%lld]\n", 4294 gtid, vec[i], lo, up)); 4295 return; 4296 } 4297 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4298 } 4299 iter_number = iter + ln * iter_number; 4300 #if OMPT_SUPPORT && OMPT_OPTIONAL 4301 deps[i].variable.value = iter; 4302 deps[i].dependence_type = ompt_dependence_type_sink; 4303 #endif 4304 } 4305 shft = iter_number % 32; // use 32-bit granularity 4306 iter_number >>= 5; // divided by 32 4307 flag = 1 << shft; 4308 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4309 KMP_YIELD(TRUE); 4310 } 4311 KMP_MB(); 4312 #if OMPT_SUPPORT && OMPT_OPTIONAL 4313 if (ompt_enabled.ompt_callback_dependences) { 4314 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4315 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4316 } 4317 #endif 4318 KA_TRACE(20, 4319 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4320 gtid, (iter_number << 5) + shft)); 4321 } 4322 4323 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4324 __kmp_assert_valid_gtid(gtid); 4325 kmp_int64 shft; 4326 size_t num_dims, i; 4327 kmp_uint32 flag; 4328 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4329 kmp_info_t *th = __kmp_threads[gtid]; 4330 kmp_team_t *team = th->th.th_team; 4331 kmp_disp_t *pr_buf; 4332 kmp_int64 lo, st; 4333 4334 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4335 if (team->t.t_serialized) { 4336 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4337 return; // no dependencies if team is serialized 4338 } 4339 4340 // calculate sequential iteration number (same as in "wait" but no 4341 // out-of-bounds checks) 4342 pr_buf = th->th.th_dispatch; 4343 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4344 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4345 lo = pr_buf->th_doacross_info[2]; 4346 st = pr_buf->th_doacross_info[4]; 4347 #if OMPT_SUPPORT && OMPT_OPTIONAL 4348 ompt_dependence_t deps[num_dims]; 4349 #endif 4350 if (st == 1) { // most common case 4351 iter_number = vec[0] - lo; 4352 } else if (st > 0) { 4353 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4354 } else { // negative increment 4355 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4356 } 4357 #if OMPT_SUPPORT && OMPT_OPTIONAL 4358 deps[0].variable.value = iter_number; 4359 deps[0].dependence_type = ompt_dependence_type_source; 4360 #endif 4361 for (i = 1; i < num_dims; ++i) { 4362 kmp_int64 iter, ln; 4363 size_t j = i * 4; 4364 ln = pr_buf->th_doacross_info[j + 1]; 4365 lo = pr_buf->th_doacross_info[j + 2]; 4366 st = pr_buf->th_doacross_info[j + 4]; 4367 if (st == 1) { 4368 iter = vec[i] - lo; 4369 } else if (st > 0) { 4370 iter = (kmp_uint64)(vec[i] - lo) / st; 4371 } else { // st < 0 4372 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4373 } 4374 iter_number = iter + ln * iter_number; 4375 #if OMPT_SUPPORT && OMPT_OPTIONAL 4376 deps[i].variable.value = iter; 4377 deps[i].dependence_type = ompt_dependence_type_source; 4378 #endif 4379 } 4380 #if OMPT_SUPPORT && OMPT_OPTIONAL 4381 if (ompt_enabled.ompt_callback_dependences) { 4382 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4383 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4384 } 4385 #endif 4386 shft = iter_number % 32; // use 32-bit granularity 4387 iter_number >>= 5; // divided by 32 4388 flag = 1 << shft; 4389 KMP_MB(); 4390 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4391 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4392 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4393 (iter_number << 5) + shft)); 4394 } 4395 4396 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4397 __kmp_assert_valid_gtid(gtid); 4398 kmp_int32 num_done; 4399 kmp_info_t *th = __kmp_threads[gtid]; 4400 kmp_team_t *team = th->th.th_team; 4401 kmp_disp_t *pr_buf = th->th.th_dispatch; 4402 4403 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4404 if (team->t.t_serialized) { 4405 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4406 return; // nothing to do 4407 } 4408 num_done = 4409 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1; 4410 if (num_done == th->th.th_team_nproc) { 4411 // we are the last thread, need to free shared resources 4412 int idx = pr_buf->th_doacross_buf_idx - 1; 4413 dispatch_shared_info_t *sh_buf = 4414 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4415 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4416 (kmp_int64)&sh_buf->doacross_num_done); 4417 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4418 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4419 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4420 sh_buf->doacross_flags = NULL; 4421 sh_buf->doacross_num_done = 0; 4422 sh_buf->doacross_buf_idx += 4423 __kmp_dispatch_num_buffers; // free buffer for future re-use 4424 } 4425 // free private resources (need to keep buffer index forever) 4426 pr_buf->th_doacross_flags = NULL; 4427 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4428 pr_buf->th_doacross_info = NULL; 4429 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4430 } 4431 4432 /* OpenMP 5.1 Memory Management routines */ 4433 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4434 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator); 4435 } 4436 4437 void *omp_aligned_alloc(size_t align, size_t size, 4438 omp_allocator_handle_t allocator) { 4439 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator); 4440 } 4441 4442 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { 4443 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator); 4444 } 4445 4446 void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size, 4447 omp_allocator_handle_t allocator) { 4448 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator); 4449 } 4450 4451 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, 4452 omp_allocator_handle_t free_allocator) { 4453 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator, 4454 free_allocator); 4455 } 4456 4457 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4458 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4459 } 4460 /* end of OpenMP 5.1 Memory Management routines */ 4461 4462 int __kmpc_get_target_offload(void) { 4463 if (!__kmp_init_serial) { 4464 __kmp_serial_initialize(); 4465 } 4466 return __kmp_target_offload; 4467 } 4468 4469 int __kmpc_pause_resource(kmp_pause_status_t level) { 4470 if (!__kmp_init_serial) { 4471 return 1; // Can't pause if runtime is not initialized 4472 } 4473 return __kmp_pause_resource(level); 4474 } 4475 4476 void __kmpc_error(ident_t *loc, int severity, const char *message) { 4477 if (!__kmp_init_serial) 4478 __kmp_serial_initialize(); 4479 4480 KMP_ASSERT(severity == severity_warning || severity == severity_fatal); 4481 4482 #if OMPT_SUPPORT 4483 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) { 4484 ompt_callbacks.ompt_callback(ompt_callback_error)( 4485 (ompt_severity_t)severity, message, KMP_STRLEN(message), 4486 OMPT_GET_RETURN_ADDRESS(0)); 4487 } 4488 #endif // OMPT_SUPPORT 4489 4490 char *src_loc; 4491 if (loc && loc->psource) { 4492 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false); 4493 src_loc = 4494 __kmp_str_format("%s:%d:%d", str_loc.file, str_loc.line, str_loc.col); 4495 __kmp_str_loc_free(&str_loc); 4496 } else { 4497 src_loc = __kmp_str_format("unknown"); 4498 } 4499 4500 if (severity == severity_warning) 4501 KMP_WARNING(UserDirectedWarning, src_loc, message); 4502 else 4503 KMP_FATAL(UserDirectedError, src_loc, message); 4504 4505 __kmp_str_free(&src_loc); 4506 } 4507 4508 // Mark begin of scope directive. 4509 void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { 4510 // reserved is for extension of scope directive and not used. 4511 #if OMPT_SUPPORT && OMPT_OPTIONAL 4512 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { 4513 kmp_team_t *team = __kmp_threads[gtid]->th.th_team; 4514 int tid = __kmp_tid_from_gtid(gtid); 4515 ompt_callbacks.ompt_callback(ompt_callback_work)( 4516 ompt_work_scope, ompt_scope_begin, 4517 &(team->t.ompt_team_info.parallel_data), 4518 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 4519 OMPT_GET_RETURN_ADDRESS(0)); 4520 } 4521 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 4522 } 4523 4524 // Mark end of scope directive 4525 void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { 4526 // reserved is for extension of scope directive and not used. 4527 #if OMPT_SUPPORT && OMPT_OPTIONAL 4528 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { 4529 kmp_team_t *team = __kmp_threads[gtid]->th.th_team; 4530 int tid = __kmp_tid_from_gtid(gtid); 4531 ompt_callbacks.ompt_callback(ompt_callback_work)( 4532 ompt_work_scope, ompt_scope_end, 4533 &(team->t.ompt_team_info.parallel_data), 4534 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 4535 OMPT_GET_RETURN_ADDRESS(0)); 4536 } 4537 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 4538 } 4539 4540 #ifdef KMP_USE_VERSION_SYMBOLS 4541 // For GOMP compatibility there are two versions of each omp_* API. 4542 // One is the plain C symbol and one is the Fortran symbol with an appended 4543 // underscore. When we implement a specific ompc_* version of an omp_* 4544 // function, we want the plain GOMP versioned symbol to alias the ompc_* version 4545 // instead of the Fortran versions in kmp_ftn_entry.h 4546 extern "C" { 4547 // Have to undef these from omp.h so they aren't translated into 4548 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below 4549 #ifdef omp_set_affinity_format 4550 #undef omp_set_affinity_format 4551 #endif 4552 #ifdef omp_get_affinity_format 4553 #undef omp_get_affinity_format 4554 #endif 4555 #ifdef omp_display_affinity 4556 #undef omp_display_affinity 4557 #endif 4558 #ifdef omp_capture_affinity 4559 #undef omp_capture_affinity 4560 #endif 4561 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50, 4562 "OMP_5.0"); 4563 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50, 4564 "OMP_5.0"); 4565 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50, 4566 "OMP_5.0"); 4567 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50, 4568 "OMP_5.0"); 4569 } // extern "C" 4570 #endif 4571