1 /* 2 * kmp_gsupport.cpp 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "kmp.h" 14 #include "kmp_atomic.h" 15 16 #if OMPT_SUPPORT 17 #include "ompt-specific.h" 18 #endif 19 20 enum { 21 KMP_GOMP_TASK_UNTIED_FLAG = 1, 22 KMP_GOMP_TASK_FINAL_FLAG = 2, 23 KMP_GOMP_TASK_DEPENDS_FLAG = 8 24 }; 25 26 // This class helps convert gomp dependency info into 27 // kmp_depend_info_t structures 28 class kmp_gomp_depends_info_t { 29 void **depend; 30 kmp_int32 num_deps; 31 size_t num_out, num_mutexinout, num_in; 32 size_t offset; 33 34 public: 35 kmp_gomp_depends_info_t(void **depend) : depend(depend) { 36 size_t ndeps = (kmp_intptr_t)depend[0]; 37 size_t num_doable; 38 // GOMP taskdep structure: 39 // if depend[0] != 0: 40 // depend = [ ndeps | nout | &out | ... | &out | &in | ... | &in ] 41 // 42 // if depend[0] == 0: 43 // depend = [ 0 | ndeps | nout | nmtx | nin | &out | ... | &out | &mtx | 44 // ... | &mtx | &in | ... | &in | &depobj | ... | &depobj ] 45 if (ndeps) { 46 num_out = (kmp_intptr_t)depend[1]; 47 num_in = ndeps - num_out; 48 num_mutexinout = 0; 49 num_doable = ndeps; 50 offset = 2; 51 } else { 52 ndeps = (kmp_intptr_t)depend[1]; 53 num_out = (kmp_intptr_t)depend[2]; 54 num_mutexinout = (kmp_intptr_t)depend[3]; 55 num_in = (kmp_intptr_t)depend[4]; 56 num_doable = num_out + num_mutexinout + num_in; 57 offset = 5; 58 } 59 // TODO: Support gomp depobj 60 if (ndeps != num_doable) { 61 KMP_FATAL(GompFeatureNotSupported, "depobj"); 62 } 63 num_deps = static_cast<kmp_int32>(ndeps); 64 } 65 kmp_int32 get_num_deps() const { return num_deps; } 66 kmp_depend_info_t get_kmp_depend(size_t index) const { 67 kmp_depend_info_t retval; 68 memset(&retval, '\0', sizeof(retval)); 69 KMP_ASSERT(index < (size_t)num_deps); 70 retval.base_addr = (kmp_intptr_t)depend[offset + index]; 71 retval.len = 0; 72 // Because inout and out are logically equivalent, 73 // use inout and in dependency flags. GOMP does not provide a 74 // way to distinguish if user specified out vs. inout. 75 if (index < num_out) { 76 retval.flags.in = 1; 77 retval.flags.out = 1; 78 } else if (index >= num_out && index < (num_out + num_mutexinout)) { 79 retval.flags.mtx = 1; 80 } else { 81 retval.flags.in = 1; 82 } 83 return retval; 84 } 85 }; 86 87 #ifdef __cplusplus 88 extern "C" { 89 #endif // __cplusplus 90 91 #define MKLOC(loc, routine) \ 92 static ident_t loc = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"}; 93 94 #include "kmp_ftn_os.h" 95 96 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_BARRIER)(void) { 97 int gtid = __kmp_entry_gtid(); 98 MKLOC(loc, "GOMP_barrier"); 99 KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid)); 100 #if OMPT_SUPPORT && OMPT_OPTIONAL 101 ompt_frame_t *ompt_frame; 102 if (ompt_enabled.enabled) { 103 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 104 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 105 } 106 OMPT_STORE_RETURN_ADDRESS(gtid); 107 #endif 108 __kmpc_barrier(&loc, gtid); 109 #if OMPT_SUPPORT && OMPT_OPTIONAL 110 if (ompt_enabled.enabled) { 111 ompt_frame->enter_frame = ompt_data_none; 112 } 113 #endif 114 } 115 116 // Mutual exclusion 117 118 // The symbol that icc/ifort generates for unnamed for unnamed critical sections 119 // - .gomp_critical_user_ - is defined using .comm in any objects reference it. 120 // We can't reference it directly here in C code, as the symbol contains a ".". 121 // 122 // The RTL contains an assembly language definition of .gomp_critical_user_ 123 // with another symbol __kmp_unnamed_critical_addr initialized with it's 124 // address. 125 extern kmp_critical_name *__kmp_unnamed_critical_addr; 126 127 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_START)(void) { 128 int gtid = __kmp_entry_gtid(); 129 MKLOC(loc, "GOMP_critical_start"); 130 KA_TRACE(20, ("GOMP_critical_start: T#%d\n", gtid)); 131 #if OMPT_SUPPORT && OMPT_OPTIONAL 132 OMPT_STORE_RETURN_ADDRESS(gtid); 133 #endif 134 __kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr); 135 } 136 137 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_END)(void) { 138 int gtid = __kmp_get_gtid(); 139 MKLOC(loc, "GOMP_critical_end"); 140 KA_TRACE(20, ("GOMP_critical_end: T#%d\n", gtid)); 141 #if OMPT_SUPPORT && OMPT_OPTIONAL 142 OMPT_STORE_RETURN_ADDRESS(gtid); 143 #endif 144 __kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr); 145 } 146 147 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_NAME_START)(void **pptr) { 148 int gtid = __kmp_entry_gtid(); 149 MKLOC(loc, "GOMP_critical_name_start"); 150 KA_TRACE(20, ("GOMP_critical_name_start: T#%d\n", gtid)); 151 __kmpc_critical(&loc, gtid, (kmp_critical_name *)pptr); 152 } 153 154 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_NAME_END)(void **pptr) { 155 int gtid = __kmp_get_gtid(); 156 MKLOC(loc, "GOMP_critical_name_end"); 157 KA_TRACE(20, ("GOMP_critical_name_end: T#%d\n", gtid)); 158 __kmpc_end_critical(&loc, gtid, (kmp_critical_name *)pptr); 159 } 160 161 // The Gnu codegen tries to use locked operations to perform atomic updates 162 // inline. If it can't, then it calls GOMP_atomic_start() before performing 163 // the update and GOMP_atomic_end() afterward, regardless of the data type. 164 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ATOMIC_START)(void) { 165 int gtid = __kmp_entry_gtid(); 166 KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid)); 167 168 #if OMPT_SUPPORT 169 __ompt_thread_assign_wait_id(0); 170 #endif 171 172 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 173 } 174 175 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ATOMIC_END)(void) { 176 int gtid = __kmp_get_gtid(); 177 KA_TRACE(20, ("GOMP_atomic_end: T#%d\n", gtid)); 178 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 179 } 180 181 int KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_START)(void) { 182 int gtid = __kmp_entry_gtid(); 183 MKLOC(loc, "GOMP_single_start"); 184 KA_TRACE(20, ("GOMP_single_start: T#%d\n", gtid)); 185 186 if (!TCR_4(__kmp_init_parallel)) 187 __kmp_parallel_initialize(); 188 __kmp_resume_if_soft_paused(); 189 190 // 3rd parameter == FALSE prevents kmp_enter_single from pushing a 191 // workshare when USE_CHECKS is defined. We need to avoid the push, 192 // as there is no corresponding GOMP_single_end() call. 193 kmp_int32 rc = __kmp_enter_single(gtid, &loc, FALSE); 194 195 #if OMPT_SUPPORT && OMPT_OPTIONAL 196 kmp_info_t *this_thr = __kmp_threads[gtid]; 197 kmp_team_t *team = this_thr->th.th_team; 198 int tid = __kmp_tid_from_gtid(gtid); 199 200 if (ompt_enabled.enabled) { 201 if (rc) { 202 if (ompt_enabled.ompt_callback_work) { 203 ompt_callbacks.ompt_callback(ompt_callback_work)( 204 ompt_work_single_executor, ompt_scope_begin, 205 &(team->t.ompt_team_info.parallel_data), 206 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 207 1, OMPT_GET_RETURN_ADDRESS(0)); 208 } 209 } else { 210 if (ompt_enabled.ompt_callback_work) { 211 ompt_callbacks.ompt_callback(ompt_callback_work)( 212 ompt_work_single_other, ompt_scope_begin, 213 &(team->t.ompt_team_info.parallel_data), 214 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 215 1, OMPT_GET_RETURN_ADDRESS(0)); 216 ompt_callbacks.ompt_callback(ompt_callback_work)( 217 ompt_work_single_other, ompt_scope_end, 218 &(team->t.ompt_team_info.parallel_data), 219 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 220 1, OMPT_GET_RETURN_ADDRESS(0)); 221 } 222 } 223 } 224 #endif 225 226 return rc; 227 } 228 229 void *KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) { 230 void *retval; 231 int gtid = __kmp_entry_gtid(); 232 MKLOC(loc, "GOMP_single_copy_start"); 233 KA_TRACE(20, ("GOMP_single_copy_start: T#%d\n", gtid)); 234 235 if (!TCR_4(__kmp_init_parallel)) 236 __kmp_parallel_initialize(); 237 __kmp_resume_if_soft_paused(); 238 239 // If this is the first thread to enter, return NULL. The generated code will 240 // then call GOMP_single_copy_end() for this thread only, with the 241 // copyprivate data pointer as an argument. 242 if (__kmp_enter_single(gtid, &loc, FALSE)) 243 return NULL; 244 245 // Wait for the first thread to set the copyprivate data pointer, 246 // and for all other threads to reach this point. 247 248 #if OMPT_SUPPORT && OMPT_OPTIONAL 249 ompt_frame_t *ompt_frame; 250 if (ompt_enabled.enabled) { 251 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 252 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 253 } 254 OMPT_STORE_RETURN_ADDRESS(gtid); 255 #endif 256 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 257 258 // Retrieve the value of the copyprivate data point, and wait for all 259 // threads to do likewise, then return. 260 retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data; 261 { 262 #if OMPT_SUPPORT && OMPT_OPTIONAL 263 OMPT_STORE_RETURN_ADDRESS(gtid); 264 #endif 265 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 266 } 267 #if OMPT_SUPPORT && OMPT_OPTIONAL 268 if (ompt_enabled.enabled) { 269 ompt_frame->enter_frame = ompt_data_none; 270 } 271 #endif 272 return retval; 273 } 274 275 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data) { 276 int gtid = __kmp_get_gtid(); 277 KA_TRACE(20, ("GOMP_single_copy_end: T#%d\n", gtid)); 278 279 // Set the copyprivate data pointer fo the team, then hit the barrier so that 280 // the other threads will continue on and read it. Hit another barrier before 281 // continuing, so that the know that the copyprivate data pointer has been 282 // propagated to all threads before trying to reuse the t_copypriv_data field. 283 __kmp_team_from_gtid(gtid)->t.t_copypriv_data = data; 284 #if OMPT_SUPPORT && OMPT_OPTIONAL 285 ompt_frame_t *ompt_frame; 286 if (ompt_enabled.enabled) { 287 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 288 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 289 } 290 OMPT_STORE_RETURN_ADDRESS(gtid); 291 #endif 292 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 293 { 294 #if OMPT_SUPPORT && OMPT_OPTIONAL 295 OMPT_STORE_RETURN_ADDRESS(gtid); 296 #endif 297 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 298 } 299 #if OMPT_SUPPORT && OMPT_OPTIONAL 300 if (ompt_enabled.enabled) { 301 ompt_frame->enter_frame = ompt_data_none; 302 } 303 #endif 304 } 305 306 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ORDERED_START)(void) { 307 int gtid = __kmp_entry_gtid(); 308 MKLOC(loc, "GOMP_ordered_start"); 309 KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid)); 310 #if OMPT_SUPPORT && OMPT_OPTIONAL 311 OMPT_STORE_RETURN_ADDRESS(gtid); 312 #endif 313 __kmpc_ordered(&loc, gtid); 314 } 315 316 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ORDERED_END)(void) { 317 int gtid = __kmp_get_gtid(); 318 MKLOC(loc, "GOMP_ordered_end"); 319 KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid)); 320 #if OMPT_SUPPORT && OMPT_OPTIONAL 321 OMPT_STORE_RETURN_ADDRESS(gtid); 322 #endif 323 __kmpc_end_ordered(&loc, gtid); 324 } 325 326 // Dispatch macro defs 327 // 328 // They come in two flavors: 64-bit unsigned, and either 32-bit signed 329 // (IA-32 architecture) or 64-bit signed (Intel(R) 64). 330 331 #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS 332 #define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_4 333 #define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_4 334 #define KMP_DISPATCH_NEXT __kmpc_dispatch_next_4 335 #else 336 #define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_8 337 #define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_8 338 #define KMP_DISPATCH_NEXT __kmpc_dispatch_next_8 339 #endif /* KMP_ARCH_X86 */ 340 341 #define KMP_DISPATCH_INIT_ULL __kmp_aux_dispatch_init_8u 342 #define KMP_DISPATCH_FINI_CHUNK_ULL __kmp_aux_dispatch_fini_chunk_8u 343 #define KMP_DISPATCH_NEXT_ULL __kmpc_dispatch_next_8u 344 345 // The parallel construct 346 347 #ifndef KMP_DEBUG 348 static 349 #endif /* KMP_DEBUG */ 350 void 351 __kmp_GOMP_microtask_wrapper(int *gtid, int *npr, void (*task)(void *), 352 void *data) { 353 #if OMPT_SUPPORT 354 kmp_info_t *thr; 355 ompt_frame_t *ompt_frame; 356 ompt_state_t enclosing_state; 357 358 if (ompt_enabled.enabled) { 359 // get pointer to thread data structure 360 thr = __kmp_threads[*gtid]; 361 362 // save enclosing task state; set current state for task 363 enclosing_state = thr->th.ompt_thread_info.state; 364 thr->th.ompt_thread_info.state = ompt_state_work_parallel; 365 366 // set task frame 367 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 368 ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 369 } 370 #endif 371 372 task(data); 373 374 #if OMPT_SUPPORT 375 if (ompt_enabled.enabled) { 376 // clear task frame 377 ompt_frame->exit_frame = ompt_data_none; 378 379 // restore enclosing state 380 thr->th.ompt_thread_info.state = enclosing_state; 381 } 382 #endif 383 } 384 385 #ifndef KMP_DEBUG 386 static 387 #endif /* KMP_DEBUG */ 388 void 389 __kmp_GOMP_parallel_microtask_wrapper(int *gtid, int *npr, 390 void (*task)(void *), void *data, 391 unsigned num_threads, ident_t *loc, 392 enum sched_type schedule, long start, 393 long end, long incr, 394 long chunk_size) { 395 // Initialize the loop worksharing construct. 396 397 KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size, 398 schedule != kmp_sch_static); 399 400 #if OMPT_SUPPORT 401 kmp_info_t *thr; 402 ompt_frame_t *ompt_frame; 403 ompt_state_t enclosing_state; 404 405 if (ompt_enabled.enabled) { 406 thr = __kmp_threads[*gtid]; 407 // save enclosing task state; set current state for task 408 enclosing_state = thr->th.ompt_thread_info.state; 409 thr->th.ompt_thread_info.state = ompt_state_work_parallel; 410 411 // set task frame 412 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 413 ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 414 } 415 #endif 416 417 // Now invoke the microtask. 418 task(data); 419 420 #if OMPT_SUPPORT 421 if (ompt_enabled.enabled) { 422 // clear task frame 423 ompt_frame->exit_frame = ompt_data_none; 424 425 // reset enclosing state 426 thr->th.ompt_thread_info.state = enclosing_state; 427 } 428 #endif 429 } 430 431 static void __kmp_GOMP_fork_call(ident_t *loc, int gtid, unsigned num_threads, 432 unsigned flags, void (*unwrapped_task)(void *), 433 microtask_t wrapper, int argc, ...) { 434 int rc; 435 kmp_info_t *thr = __kmp_threads[gtid]; 436 kmp_team_t *team = thr->th.th_team; 437 int tid = __kmp_tid_from_gtid(gtid); 438 439 va_list ap; 440 va_start(ap, argc); 441 442 if (num_threads != 0) 443 __kmp_push_num_threads(loc, gtid, num_threads); 444 if (flags != 0) 445 __kmp_push_proc_bind(loc, gtid, (kmp_proc_bind_t)flags); 446 rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, wrapper, 447 __kmp_invoke_task_func, kmp_va_addr_of(ap)); 448 449 va_end(ap); 450 451 if (rc) { 452 __kmp_run_before_invoked_task(gtid, tid, thr, team); 453 } 454 455 #if OMPT_SUPPORT 456 int ompt_team_size; 457 if (ompt_enabled.enabled) { 458 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 459 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 460 461 // implicit task callback 462 if (ompt_enabled.ompt_callback_implicit_task) { 463 ompt_team_size = __kmp_team_from_gtid(gtid)->t.t_nproc; 464 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 465 ompt_scope_begin, &(team_info->parallel_data), 466 &(task_info->task_data), ompt_team_size, __kmp_tid_from_gtid(gtid), 467 ompt_task_implicit); // TODO: Can this be ompt_task_initial? 468 task_info->thread_num = __kmp_tid_from_gtid(gtid); 469 } 470 thr->th.ompt_thread_info.state = ompt_state_work_parallel; 471 } 472 #endif 473 } 474 475 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), 476 void *data, 477 unsigned num_threads) { 478 int gtid = __kmp_entry_gtid(); 479 480 #if OMPT_SUPPORT 481 ompt_frame_t *parent_frame, *frame; 482 483 if (ompt_enabled.enabled) { 484 __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); 485 parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 486 } 487 OMPT_STORE_RETURN_ADDRESS(gtid); 488 #endif 489 490 MKLOC(loc, "GOMP_parallel_start"); 491 KA_TRACE(20, ("GOMP_parallel_start: T#%d\n", gtid)); 492 __kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task, 493 (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, 494 data); 495 #if OMPT_SUPPORT 496 if (ompt_enabled.enabled) { 497 __ompt_get_task_info_internal(0, NULL, NULL, &frame, NULL, NULL); 498 frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 499 } 500 #endif 501 #if OMPD_SUPPORT 502 if (ompd_state & OMPD_ENABLE_BP) 503 ompd_bp_parallel_begin(); 504 #endif 505 } 506 507 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(void) { 508 int gtid = __kmp_get_gtid(); 509 kmp_info_t *thr; 510 511 thr = __kmp_threads[gtid]; 512 513 MKLOC(loc, "GOMP_parallel_end"); 514 KA_TRACE(20, ("GOMP_parallel_end: T#%d\n", gtid)); 515 516 if (!thr->th.th_team->t.t_serialized) { 517 __kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr, 518 thr->th.th_team); 519 } 520 #if OMPT_SUPPORT 521 if (ompt_enabled.enabled) { 522 // Implicit task is finished here, in the barrier we might schedule 523 // deferred tasks, 524 // these don't see the implicit task on the stack 525 OMPT_CUR_TASK_INFO(thr)->frame.exit_frame = ompt_data_none; 526 } 527 #endif 528 529 __kmp_join_call(&loc, gtid 530 #if OMPT_SUPPORT 531 , 532 fork_context_gnu 533 #endif 534 ); 535 #if OMPD_SUPPORT 536 if (ompd_state & OMPD_ENABLE_BP) 537 ompd_bp_parallel_end(); 538 #endif 539 } 540 541 // Loop worksharing constructs 542 543 // The Gnu codegen passes in an exclusive upper bound for the overall range, 544 // but the libguide dispatch code expects an inclusive upper bound, hence the 545 // "end - incr" 5th argument to KMP_DISPATCH_INIT (and the " ub - str" 11th 546 // argument to __kmp_GOMP_fork_call). 547 // 548 // Conversely, KMP_DISPATCH_NEXT returns and inclusive upper bound in *p_ub, 549 // but the Gnu codegen expects an exclusive upper bound, so the adjustment 550 // "*p_ub += stride" compensates for the discrepancy. 551 // 552 // Correction: the gnu codegen always adjusts the upper bound by +-1, not the 553 // stride value. We adjust the dispatch parameters accordingly (by +-1), but 554 // we still adjust p_ub by the actual stride value. 555 // 556 // The "runtime" versions do not take a chunk_sz parameter. 557 // 558 // The profile lib cannot support construct checking of unordered loops that 559 // are predetermined by the compiler to be statically scheduled, as the gcc 560 // codegen will not always emit calls to GOMP_loop_static_next() to get the 561 // next iteration. Instead, it emits inline code to call omp_get_thread_num() 562 // num and calculate the iteration space using the result. It doesn't do this 563 // with ordered static loop, so they can be checked. 564 565 #if OMPT_SUPPORT 566 #define IF_OMPT_SUPPORT(code) code 567 #else 568 #define IF_OMPT_SUPPORT(code) 569 #endif 570 571 #define LOOP_START(func, schedule) \ 572 int func(long lb, long ub, long str, long chunk_sz, long *p_lb, \ 573 long *p_ub) { \ 574 int status; \ 575 long stride; \ 576 int gtid = __kmp_entry_gtid(); \ 577 MKLOC(loc, KMP_STR(func)); \ 578 KA_TRACE( \ 579 20, \ 580 (KMP_STR( \ 581 func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \ 582 gtid, lb, ub, str, chunk_sz)); \ 583 \ 584 if ((str > 0) ? (lb < ub) : (lb > ub)) { \ 585 { \ 586 IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ 587 KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ 588 (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ 589 (schedule) != kmp_sch_static); \ 590 } \ 591 { \ 592 IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ 593 status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ 594 (kmp_int *)p_ub, (kmp_int *)&stride); \ 595 } \ 596 if (status) { \ 597 KMP_DEBUG_ASSERT(stride == str); \ 598 *p_ub += (str > 0) ? 1 : -1; \ 599 } \ 600 } else { \ 601 status = 0; \ 602 } \ 603 \ 604 KA_TRACE( \ 605 20, \ 606 (KMP_STR( \ 607 func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \ 608 gtid, *p_lb, *p_ub, status)); \ 609 return status; \ 610 } 611 612 #define LOOP_RUNTIME_START(func, schedule) \ 613 int func(long lb, long ub, long str, long *p_lb, long *p_ub) { \ 614 int status; \ 615 long stride; \ 616 long chunk_sz = 0; \ 617 int gtid = __kmp_entry_gtid(); \ 618 MKLOC(loc, KMP_STR(func)); \ 619 KA_TRACE( \ 620 20, \ 621 (KMP_STR(func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n", \ 622 gtid, lb, ub, str, chunk_sz)); \ 623 \ 624 if ((str > 0) ? (lb < ub) : (lb > ub)) { \ 625 { \ 626 IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ 627 KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ 628 (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ 629 TRUE); \ 630 } \ 631 { \ 632 IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ 633 status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ 634 (kmp_int *)p_ub, (kmp_int *)&stride); \ 635 } \ 636 if (status) { \ 637 KMP_DEBUG_ASSERT(stride == str); \ 638 *p_ub += (str > 0) ? 1 : -1; \ 639 } \ 640 } else { \ 641 status = 0; \ 642 } \ 643 \ 644 KA_TRACE( \ 645 20, \ 646 (KMP_STR( \ 647 func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \ 648 gtid, *p_lb, *p_ub, status)); \ 649 return status; \ 650 } 651 652 #define KMP_DOACROSS_FINI(status, gtid) \ 653 if (!status && __kmp_threads[gtid]->th.th_dispatch->th_doacross_flags) { \ 654 __kmpc_doacross_fini(NULL, gtid); \ 655 } 656 657 #define LOOP_NEXT(func, fini_code) \ 658 int func(long *p_lb, long *p_ub) { \ 659 int status; \ 660 long stride; \ 661 int gtid = __kmp_get_gtid(); \ 662 MKLOC(loc, KMP_STR(func)); \ 663 KA_TRACE(20, (KMP_STR(func) ": T#%d\n", gtid)); \ 664 \ 665 IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ 666 fini_code status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ 667 (kmp_int *)p_ub, (kmp_int *)&stride); \ 668 if (status) { \ 669 *p_ub += (stride > 0) ? 1 : -1; \ 670 } \ 671 KMP_DOACROSS_FINI(status, gtid) \ 672 \ 673 KA_TRACE( \ 674 20, \ 675 (KMP_STR(func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, stride 0x%lx, " \ 676 "returning %d\n", \ 677 gtid, *p_lb, *p_ub, stride, status)); \ 678 return status; \ 679 } 680 681 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_START), kmp_sch_static) 682 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT), {}) 683 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START), 684 kmp_sch_dynamic_chunked) 685 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START), 686 kmp_sch_dynamic_chunked) 687 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT), {}) 688 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT), {}) 689 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_START), 690 kmp_sch_guided_chunked) 691 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START), 692 kmp_sch_guided_chunked) 693 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT), {}) 694 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT), {}) 695 LOOP_RUNTIME_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_START), 696 kmp_sch_runtime) 697 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {}) 698 LOOP_RUNTIME_START( 699 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_START), 700 kmp_sch_runtime) 701 LOOP_RUNTIME_START( 702 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_START), 703 kmp_sch_runtime) 704 LOOP_NEXT( 705 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_NEXT), {}) 706 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_NEXT), {}) 707 708 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START), 709 kmp_ord_static) 710 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT), 711 { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) 712 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START), 713 kmp_ord_dynamic_chunked) 714 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT), 715 { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) 716 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START), 717 kmp_ord_guided_chunked) 718 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT), 719 { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) 720 LOOP_RUNTIME_START( 721 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START), 722 kmp_ord_runtime) 723 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT), 724 { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) 725 726 #define LOOP_DOACROSS_START(func, schedule) \ 727 bool func(unsigned ncounts, long *counts, long chunk_sz, long *p_lb, \ 728 long *p_ub) { \ 729 int status; \ 730 long stride, lb, ub, str; \ 731 int gtid = __kmp_entry_gtid(); \ 732 struct kmp_dim *dims = \ 733 (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts); \ 734 MKLOC(loc, KMP_STR(func)); \ 735 for (unsigned i = 0; i < ncounts; ++i) { \ 736 dims[i].lo = 0; \ 737 dims[i].up = counts[i] - 1; \ 738 dims[i].st = 1; \ 739 } \ 740 __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \ 741 lb = 0; \ 742 ub = counts[0]; \ 743 str = 1; \ 744 KA_TRACE(20, (KMP_STR(func) ": T#%d, ncounts %u, lb 0x%lx, ub 0x%lx, str " \ 745 "0x%lx, chunk_sz " \ 746 "0x%lx\n", \ 747 gtid, ncounts, lb, ub, str, chunk_sz)); \ 748 \ 749 if ((str > 0) ? (lb < ub) : (lb > ub)) { \ 750 KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ 751 (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ 752 (schedule) != kmp_sch_static); \ 753 status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ 754 (kmp_int *)p_ub, (kmp_int *)&stride); \ 755 if (status) { \ 756 KMP_DEBUG_ASSERT(stride == str); \ 757 *p_ub += (str > 0) ? 1 : -1; \ 758 } \ 759 } else { \ 760 status = 0; \ 761 } \ 762 KMP_DOACROSS_FINI(status, gtid); \ 763 \ 764 KA_TRACE( \ 765 20, \ 766 (KMP_STR( \ 767 func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \ 768 gtid, *p_lb, *p_ub, status)); \ 769 __kmp_free(dims); \ 770 return status; \ 771 } 772 773 #define LOOP_DOACROSS_RUNTIME_START(func, schedule) \ 774 int func(unsigned ncounts, long *counts, long *p_lb, long *p_ub) { \ 775 int status; \ 776 long stride, lb, ub, str; \ 777 long chunk_sz = 0; \ 778 int gtid = __kmp_entry_gtid(); \ 779 struct kmp_dim *dims = \ 780 (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts); \ 781 MKLOC(loc, KMP_STR(func)); \ 782 for (unsigned i = 0; i < ncounts; ++i) { \ 783 dims[i].lo = 0; \ 784 dims[i].up = counts[i] - 1; \ 785 dims[i].st = 1; \ 786 } \ 787 __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \ 788 lb = 0; \ 789 ub = counts[0]; \ 790 str = 1; \ 791 KA_TRACE( \ 792 20, \ 793 (KMP_STR(func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n", \ 794 gtid, lb, ub, str, chunk_sz)); \ 795 \ 796 if ((str > 0) ? (lb < ub) : (lb > ub)) { \ 797 KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ 798 (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \ 799 status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ 800 (kmp_int *)p_ub, (kmp_int *)&stride); \ 801 if (status) { \ 802 KMP_DEBUG_ASSERT(stride == str); \ 803 *p_ub += (str > 0) ? 1 : -1; \ 804 } \ 805 } else { \ 806 status = 0; \ 807 } \ 808 KMP_DOACROSS_FINI(status, gtid); \ 809 \ 810 KA_TRACE( \ 811 20, \ 812 (KMP_STR( \ 813 func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \ 814 gtid, *p_lb, *p_ub, status)); \ 815 __kmp_free(dims); \ 816 return status; \ 817 } 818 819 LOOP_DOACROSS_START( 820 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START), 821 kmp_sch_static) 822 LOOP_DOACROSS_START( 823 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START), 824 kmp_sch_dynamic_chunked) 825 LOOP_DOACROSS_START( 826 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START), 827 kmp_sch_guided_chunked) 828 LOOP_DOACROSS_RUNTIME_START( 829 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START), 830 kmp_sch_runtime) 831 832 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END)(void) { 833 int gtid = __kmp_get_gtid(); 834 KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid)) 835 836 #if OMPT_SUPPORT && OMPT_OPTIONAL 837 ompt_frame_t *ompt_frame; 838 if (ompt_enabled.enabled) { 839 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 840 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 841 OMPT_STORE_RETURN_ADDRESS(gtid); 842 } 843 #endif 844 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 845 #if OMPT_SUPPORT && OMPT_OPTIONAL 846 if (ompt_enabled.enabled) { 847 ompt_frame->enter_frame = ompt_data_none; 848 } 849 #endif 850 851 KA_TRACE(20, ("GOMP_loop_end exit: T#%d\n", gtid)) 852 } 853 854 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END_NOWAIT)(void) { 855 KA_TRACE(20, ("GOMP_loop_end_nowait: T#%d\n", __kmp_get_gtid())) 856 } 857 858 // Unsigned long long loop worksharing constructs 859 // 860 // These are new with gcc 4.4 861 862 #define LOOP_START_ULL(func, schedule) \ 863 int func(int up, unsigned long long lb, unsigned long long ub, \ 864 unsigned long long str, unsigned long long chunk_sz, \ 865 unsigned long long *p_lb, unsigned long long *p_ub) { \ 866 int status; \ 867 long long str2 = up ? ((long long)str) : -((long long)str); \ 868 long long stride; \ 869 int gtid = __kmp_entry_gtid(); \ 870 MKLOC(loc, KMP_STR(func)); \ 871 \ 872 KA_TRACE(20, (KMP_STR(func) ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str " \ 873 "0x%llx, chunk_sz 0x%llx\n", \ 874 gtid, up, lb, ub, str, chunk_sz)); \ 875 \ 876 if ((str > 0) ? (lb < ub) : (lb > ub)) { \ 877 KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \ 878 (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz, \ 879 (schedule) != kmp_sch_static); \ 880 status = \ 881 KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \ 882 (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ 883 if (status) { \ 884 KMP_DEBUG_ASSERT(stride == str2); \ 885 *p_ub += (str > 0) ? 1 : -1; \ 886 } \ 887 } else { \ 888 status = 0; \ 889 } \ 890 \ 891 KA_TRACE( \ 892 20, \ 893 (KMP_STR( \ 894 func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \ 895 gtid, *p_lb, *p_ub, status)); \ 896 return status; \ 897 } 898 899 #define LOOP_RUNTIME_START_ULL(func, schedule) \ 900 int func(int up, unsigned long long lb, unsigned long long ub, \ 901 unsigned long long str, unsigned long long *p_lb, \ 902 unsigned long long *p_ub) { \ 903 int status; \ 904 long long str2 = up ? ((long long)str) : -((long long)str); \ 905 unsigned long long stride; \ 906 unsigned long long chunk_sz = 0; \ 907 int gtid = __kmp_entry_gtid(); \ 908 MKLOC(loc, KMP_STR(func)); \ 909 \ 910 KA_TRACE(20, (KMP_STR(func) ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str " \ 911 "0x%llx, chunk_sz 0x%llx\n", \ 912 gtid, up, lb, ub, str, chunk_sz)); \ 913 \ 914 if ((str > 0) ? (lb < ub) : (lb > ub)) { \ 915 KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \ 916 (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz, \ 917 TRUE); \ 918 status = \ 919 KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \ 920 (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ 921 if (status) { \ 922 KMP_DEBUG_ASSERT((long long)stride == str2); \ 923 *p_ub += (str > 0) ? 1 : -1; \ 924 } \ 925 } else { \ 926 status = 0; \ 927 } \ 928 \ 929 KA_TRACE( \ 930 20, \ 931 (KMP_STR( \ 932 func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \ 933 gtid, *p_lb, *p_ub, status)); \ 934 return status; \ 935 } 936 937 #define LOOP_NEXT_ULL(func, fini_code) \ 938 int func(unsigned long long *p_lb, unsigned long long *p_ub) { \ 939 int status; \ 940 long long stride; \ 941 int gtid = __kmp_get_gtid(); \ 942 MKLOC(loc, KMP_STR(func)); \ 943 KA_TRACE(20, (KMP_STR(func) ": T#%d\n", gtid)); \ 944 \ 945 fini_code status = \ 946 KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \ 947 (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ 948 if (status) { \ 949 *p_ub += (stride > 0) ? 1 : -1; \ 950 } \ 951 \ 952 KA_TRACE( \ 953 20, \ 954 (KMP_STR( \ 955 func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, stride 0x%llx, " \ 956 "returning %d\n", \ 957 gtid, *p_lb, *p_ub, stride, status)); \ 958 return status; \ 959 } 960 961 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START), 962 kmp_sch_static) 963 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT), {}) 964 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START), 965 kmp_sch_dynamic_chunked) 966 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT), {}) 967 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START), 968 kmp_sch_guided_chunked) 969 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT), {}) 970 LOOP_START_ULL( 971 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START), 972 kmp_sch_dynamic_chunked) 973 LOOP_NEXT_ULL( 974 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT), {}) 975 LOOP_START_ULL( 976 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START), 977 kmp_sch_guided_chunked) 978 LOOP_NEXT_ULL( 979 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT), {}) 980 LOOP_RUNTIME_START_ULL( 981 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime) 982 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {}) 983 LOOP_RUNTIME_START_ULL( 984 KMP_EXPAND_NAME( 985 KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_START), 986 kmp_sch_runtime) 987 LOOP_RUNTIME_START_ULL( 988 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_START), 989 kmp_sch_runtime) 990 LOOP_NEXT_ULL( 991 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_NEXT), 992 {}) 993 LOOP_NEXT_ULL( 994 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_NEXT), {}) 995 996 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START), 997 kmp_ord_static) 998 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT), 999 { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) 1000 LOOP_START_ULL( 1001 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START), 1002 kmp_ord_dynamic_chunked) 1003 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT), 1004 { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) 1005 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START), 1006 kmp_ord_guided_chunked) 1007 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT), 1008 { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) 1009 LOOP_RUNTIME_START_ULL( 1010 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START), 1011 kmp_ord_runtime) 1012 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT), 1013 { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) 1014 1015 #define LOOP_DOACROSS_START_ULL(func, schedule) \ 1016 int func(unsigned ncounts, unsigned long long *counts, \ 1017 unsigned long long chunk_sz, unsigned long long *p_lb, \ 1018 unsigned long long *p_ub) { \ 1019 int status; \ 1020 long long stride, str, lb, ub; \ 1021 int gtid = __kmp_entry_gtid(); \ 1022 struct kmp_dim *dims = \ 1023 (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts); \ 1024 MKLOC(loc, KMP_STR(func)); \ 1025 for (unsigned i = 0; i < ncounts; ++i) { \ 1026 dims[i].lo = 0; \ 1027 dims[i].up = counts[i] - 1; \ 1028 dims[i].st = 1; \ 1029 } \ 1030 __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \ 1031 lb = 0; \ 1032 ub = counts[0]; \ 1033 str = 1; \ 1034 \ 1035 KA_TRACE(20, (KMP_STR(func) ": T#%d, lb 0x%llx, ub 0x%llx, str " \ 1036 "0x%llx, chunk_sz 0x%llx\n", \ 1037 gtid, lb, ub, str, chunk_sz)); \ 1038 \ 1039 if ((str > 0) ? (lb < ub) : (lb > ub)) { \ 1040 KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \ 1041 (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ 1042 (schedule) != kmp_sch_static); \ 1043 status = \ 1044 KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \ 1045 (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ 1046 if (status) { \ 1047 KMP_DEBUG_ASSERT(stride == str); \ 1048 *p_ub += (str > 0) ? 1 : -1; \ 1049 } \ 1050 } else { \ 1051 status = 0; \ 1052 } \ 1053 KMP_DOACROSS_FINI(status, gtid); \ 1054 \ 1055 KA_TRACE( \ 1056 20, \ 1057 (KMP_STR( \ 1058 func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \ 1059 gtid, *p_lb, *p_ub, status)); \ 1060 __kmp_free(dims); \ 1061 return status; \ 1062 } 1063 1064 #define LOOP_DOACROSS_RUNTIME_START_ULL(func, schedule) \ 1065 int func(unsigned ncounts, unsigned long long *counts, \ 1066 unsigned long long *p_lb, unsigned long long *p_ub) { \ 1067 int status; \ 1068 unsigned long long stride, str, lb, ub; \ 1069 unsigned long long chunk_sz = 0; \ 1070 int gtid = __kmp_entry_gtid(); \ 1071 struct kmp_dim *dims = \ 1072 (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts); \ 1073 MKLOC(loc, KMP_STR(func)); \ 1074 for (unsigned i = 0; i < ncounts; ++i) { \ 1075 dims[i].lo = 0; \ 1076 dims[i].up = counts[i] - 1; \ 1077 dims[i].st = 1; \ 1078 } \ 1079 __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \ 1080 lb = 0; \ 1081 ub = counts[0]; \ 1082 str = 1; \ 1083 KA_TRACE(20, (KMP_STR(func) ": T#%d, lb 0x%llx, ub 0x%llx, str " \ 1084 "0x%llx, chunk_sz 0x%llx\n", \ 1085 gtid, lb, ub, str, chunk_sz)); \ 1086 \ 1087 if ((str > 0) ? (lb < ub) : (lb > ub)) { \ 1088 KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \ 1089 (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ 1090 TRUE); \ 1091 status = \ 1092 KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \ 1093 (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ 1094 if (status) { \ 1095 KMP_DEBUG_ASSERT(stride == str); \ 1096 *p_ub += (str > 0) ? 1 : -1; \ 1097 } \ 1098 } else { \ 1099 status = 0; \ 1100 } \ 1101 KMP_DOACROSS_FINI(status, gtid); \ 1102 \ 1103 KA_TRACE( \ 1104 20, \ 1105 (KMP_STR( \ 1106 func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \ 1107 gtid, *p_lb, *p_ub, status)); \ 1108 __kmp_free(dims); \ 1109 return status; \ 1110 } 1111 1112 LOOP_DOACROSS_START_ULL( 1113 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START), 1114 kmp_sch_static) 1115 LOOP_DOACROSS_START_ULL( 1116 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START), 1117 kmp_sch_dynamic_chunked) 1118 LOOP_DOACROSS_START_ULL( 1119 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START), 1120 kmp_sch_guided_chunked) 1121 LOOP_DOACROSS_RUNTIME_START_ULL( 1122 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START), 1123 kmp_sch_runtime) 1124 1125 // Combined parallel / loop worksharing constructs 1126 // 1127 // There are no ull versions (yet). 1128 1129 #define PARALLEL_LOOP_START(func, schedule, ompt_pre, ompt_post) \ 1130 void func(void (*task)(void *), void *data, unsigned num_threads, long lb, \ 1131 long ub, long str, long chunk_sz) { \ 1132 int gtid = __kmp_entry_gtid(); \ 1133 MKLOC(loc, KMP_STR(func)); \ 1134 KA_TRACE( \ 1135 20, \ 1136 (KMP_STR( \ 1137 func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \ 1138 gtid, lb, ub, str, chunk_sz)); \ 1139 \ 1140 ompt_pre(); \ 1141 \ 1142 __kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task, \ 1143 (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \ 1144 9, task, data, num_threads, &loc, (schedule), lb, \ 1145 (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \ 1146 IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid)); \ 1147 \ 1148 KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ 1149 (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ 1150 (schedule) != kmp_sch_static); \ 1151 \ 1152 ompt_post(); \ 1153 \ 1154 KA_TRACE(20, (KMP_STR(func) " exit: T#%d\n", gtid)); \ 1155 } 1156 1157 #if OMPT_SUPPORT && OMPT_OPTIONAL 1158 1159 #define OMPT_LOOP_PRE() \ 1160 ompt_frame_t *parent_frame; \ 1161 if (ompt_enabled.enabled) { \ 1162 __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); \ 1163 parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); \ 1164 OMPT_STORE_RETURN_ADDRESS(gtid); \ 1165 } 1166 1167 #define OMPT_LOOP_POST() \ 1168 if (ompt_enabled.enabled) { \ 1169 parent_frame->enter_frame = ompt_data_none; \ 1170 } 1171 1172 #else 1173 1174 #define OMPT_LOOP_PRE() 1175 1176 #define OMPT_LOOP_POST() 1177 1178 #endif 1179 1180 PARALLEL_LOOP_START( 1181 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START), 1182 kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST) 1183 PARALLEL_LOOP_START( 1184 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START), 1185 kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) 1186 PARALLEL_LOOP_START( 1187 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START), 1188 kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) 1189 PARALLEL_LOOP_START( 1190 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START), 1191 kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST) 1192 1193 // Tasking constructs 1194 1195 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, 1196 void (*copy_func)(void *, void *), 1197 long arg_size, long arg_align, 1198 bool if_cond, unsigned gomp_flags, 1199 void **depend) { 1200 MKLOC(loc, "GOMP_task"); 1201 int gtid = __kmp_entry_gtid(); 1202 kmp_int32 flags = 0; 1203 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags; 1204 1205 KA_TRACE(20, ("GOMP_task: T#%d\n", gtid)); 1206 1207 // The low-order bit is the "untied" flag 1208 if (!(gomp_flags & KMP_GOMP_TASK_UNTIED_FLAG)) { 1209 input_flags->tiedness = 1; 1210 } 1211 // The second low-order bit is the "final" flag 1212 if (gomp_flags & KMP_GOMP_TASK_FINAL_FLAG) { 1213 input_flags->final = 1; 1214 } 1215 input_flags->native = 1; 1216 // __kmp_task_alloc() sets up all other flags 1217 1218 if (!if_cond) { 1219 arg_size = 0; 1220 } 1221 1222 kmp_task_t *task = __kmp_task_alloc( 1223 &loc, gtid, input_flags, sizeof(kmp_task_t), 1224 arg_size ? arg_size + arg_align - 1 : 0, (kmp_routine_entry_t)func); 1225 1226 if (arg_size > 0) { 1227 if (arg_align > 0) { 1228 task->shareds = (void *)((((size_t)task->shareds) + arg_align - 1) / 1229 arg_align * arg_align); 1230 } 1231 // else error?? 1232 1233 if (copy_func) { 1234 (*copy_func)(task->shareds, data); 1235 } else { 1236 KMP_MEMCPY(task->shareds, data, arg_size); 1237 } 1238 } 1239 1240 #if OMPT_SUPPORT 1241 kmp_taskdata_t *current_task; 1242 if (ompt_enabled.enabled) { 1243 current_task = __kmp_threads[gtid]->th.th_current_task; 1244 current_task->ompt_task_info.frame.enter_frame.ptr = 1245 OMPT_GET_FRAME_ADDRESS(0); 1246 } 1247 OMPT_STORE_RETURN_ADDRESS(gtid); 1248 #endif 1249 1250 if (if_cond) { 1251 if (gomp_flags & KMP_GOMP_TASK_DEPENDS_FLAG) { 1252 KMP_ASSERT(depend); 1253 kmp_gomp_depends_info_t gomp_depends(depend); 1254 kmp_int32 ndeps = gomp_depends.get_num_deps(); 1255 kmp_depend_info_t dep_list[ndeps]; 1256 for (kmp_int32 i = 0; i < ndeps; i++) 1257 dep_list[i] = gomp_depends.get_kmp_depend(i); 1258 kmp_int32 ndeps_cnv; 1259 __kmp_type_convert(ndeps, &ndeps_cnv); 1260 __kmpc_omp_task_with_deps(&loc, gtid, task, ndeps_cnv, dep_list, 0, NULL); 1261 } else { 1262 __kmpc_omp_task(&loc, gtid, task); 1263 } 1264 } else { 1265 #if OMPT_SUPPORT 1266 ompt_thread_info_t oldInfo; 1267 kmp_info_t *thread; 1268 kmp_taskdata_t *taskdata; 1269 if (ompt_enabled.enabled) { 1270 // Store the threads states and restore them after the task 1271 thread = __kmp_threads[gtid]; 1272 taskdata = KMP_TASK_TO_TASKDATA(task); 1273 oldInfo = thread->th.ompt_thread_info; 1274 thread->th.ompt_thread_info.wait_id = 0; 1275 thread->th.ompt_thread_info.state = ompt_state_work_parallel; 1276 taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1277 } 1278 OMPT_STORE_RETURN_ADDRESS(gtid); 1279 #endif 1280 if (gomp_flags & KMP_GOMP_TASK_DEPENDS_FLAG) { 1281 KMP_ASSERT(depend); 1282 kmp_gomp_depends_info_t gomp_depends(depend); 1283 kmp_int32 ndeps = gomp_depends.get_num_deps(); 1284 kmp_depend_info_t dep_list[ndeps]; 1285 for (kmp_int32 i = 0; i < ndeps; i++) 1286 dep_list[i] = gomp_depends.get_kmp_depend(i); 1287 __kmpc_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL); 1288 } 1289 1290 __kmpc_omp_task_begin_if0(&loc, gtid, task); 1291 func(data); 1292 __kmpc_omp_task_complete_if0(&loc, gtid, task); 1293 1294 #if OMPT_SUPPORT 1295 if (ompt_enabled.enabled) { 1296 thread->th.ompt_thread_info = oldInfo; 1297 taskdata->ompt_task_info.frame.exit_frame = ompt_data_none; 1298 } 1299 #endif 1300 } 1301 #if OMPT_SUPPORT 1302 if (ompt_enabled.enabled) { 1303 current_task->ompt_task_info.frame.enter_frame = ompt_data_none; 1304 } 1305 #endif 1306 1307 KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid)); 1308 } 1309 1310 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKWAIT)(void) { 1311 MKLOC(loc, "GOMP_taskwait"); 1312 int gtid = __kmp_entry_gtid(); 1313 1314 #if OMPT_SUPPORT 1315 OMPT_STORE_RETURN_ADDRESS(gtid); 1316 #endif 1317 1318 KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid)); 1319 1320 __kmpc_omp_taskwait(&loc, gtid); 1321 1322 KA_TRACE(20, ("GOMP_taskwait exit: T#%d\n", gtid)); 1323 } 1324 1325 // Sections worksharing constructs 1326 // 1327 // For the sections construct, we initialize a dynamically scheduled loop 1328 // worksharing construct with lb 1 and stride 1, and use the iteration #'s 1329 // that its returns as sections ids. 1330 // 1331 // There are no special entry points for ordered sections, so we always use 1332 // the dynamically scheduled workshare, even if the sections aren't ordered. 1333 1334 unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_START)(unsigned count) { 1335 int status; 1336 kmp_int lb, ub, stride; 1337 int gtid = __kmp_entry_gtid(); 1338 MKLOC(loc, "GOMP_sections_start"); 1339 KA_TRACE(20, ("GOMP_sections_start: T#%d\n", gtid)); 1340 1341 KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); 1342 1343 status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride); 1344 if (status) { 1345 KMP_DEBUG_ASSERT(stride == 1); 1346 KMP_DEBUG_ASSERT(lb > 0); 1347 KMP_ASSERT(lb == ub); 1348 } else { 1349 lb = 0; 1350 } 1351 1352 KA_TRACE(20, ("GOMP_sections_start exit: T#%d returning %u\n", gtid, 1353 (unsigned)lb)); 1354 return (unsigned)lb; 1355 } 1356 1357 unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_NEXT)(void) { 1358 int status; 1359 kmp_int lb, ub, stride; 1360 int gtid = __kmp_get_gtid(); 1361 MKLOC(loc, "GOMP_sections_next"); 1362 KA_TRACE(20, ("GOMP_sections_next: T#%d\n", gtid)); 1363 1364 #if OMPT_SUPPORT 1365 OMPT_STORE_RETURN_ADDRESS(gtid); 1366 #endif 1367 1368 status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride); 1369 if (status) { 1370 KMP_DEBUG_ASSERT(stride == 1); 1371 KMP_DEBUG_ASSERT(lb > 0); 1372 KMP_ASSERT(lb == ub); 1373 } else { 1374 lb = 0; 1375 } 1376 1377 KA_TRACE( 1378 20, ("GOMP_sections_next exit: T#%d returning %u\n", gtid, (unsigned)lb)); 1379 return (unsigned)lb; 1380 } 1381 1382 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)( 1383 void (*task)(void *), void *data, unsigned num_threads, unsigned count) { 1384 int gtid = __kmp_entry_gtid(); 1385 1386 #if OMPT_SUPPORT 1387 ompt_frame_t *parent_frame; 1388 1389 if (ompt_enabled.enabled) { 1390 __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); 1391 parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1392 } 1393 OMPT_STORE_RETURN_ADDRESS(gtid); 1394 #endif 1395 1396 MKLOC(loc, "GOMP_parallel_sections_start"); 1397 KA_TRACE(20, ("GOMP_parallel_sections_start: T#%d\n", gtid)); 1398 1399 __kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task, 1400 (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, 1401 task, data, num_threads, &loc, kmp_nm_dynamic_chunked, 1402 (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1); 1403 1404 #if OMPT_SUPPORT 1405 if (ompt_enabled.enabled) { 1406 parent_frame->enter_frame = ompt_data_none; 1407 } 1408 #endif 1409 1410 KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); 1411 1412 KA_TRACE(20, ("GOMP_parallel_sections_start exit: T#%d\n", gtid)); 1413 } 1414 1415 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END)(void) { 1416 int gtid = __kmp_get_gtid(); 1417 KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid)) 1418 1419 #if OMPT_SUPPORT 1420 ompt_frame_t *ompt_frame; 1421 if (ompt_enabled.enabled) { 1422 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1423 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1424 } 1425 OMPT_STORE_RETURN_ADDRESS(gtid); 1426 #endif 1427 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 1428 #if OMPT_SUPPORT 1429 if (ompt_enabled.enabled) { 1430 ompt_frame->enter_frame = ompt_data_none; 1431 } 1432 #endif 1433 1434 KA_TRACE(20, ("GOMP_sections_end exit: T#%d\n", gtid)) 1435 } 1436 1437 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT)(void) { 1438 KA_TRACE(20, ("GOMP_sections_end_nowait: T#%d\n", __kmp_get_gtid())) 1439 } 1440 1441 // libgomp has an empty function for GOMP_taskyield as of 2013-10-10 1442 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKYIELD)(void) { 1443 KA_TRACE(20, ("GOMP_taskyield: T#%d\n", __kmp_get_gtid())) 1444 return; 1445 } 1446 1447 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *), 1448 void *data, 1449 unsigned num_threads, 1450 unsigned int flags) { 1451 int gtid = __kmp_entry_gtid(); 1452 MKLOC(loc, "GOMP_parallel"); 1453 KA_TRACE(20, ("GOMP_parallel: T#%d\n", gtid)); 1454 1455 #if OMPT_SUPPORT 1456 ompt_task_info_t *parent_task_info, *task_info; 1457 if (ompt_enabled.enabled) { 1458 parent_task_info = __ompt_get_task_info_object(0); 1459 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1460 } 1461 OMPT_STORE_RETURN_ADDRESS(gtid); 1462 #endif 1463 __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task, 1464 (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, 1465 data); 1466 #if OMPT_SUPPORT 1467 if (ompt_enabled.enabled) { 1468 task_info = __ompt_get_task_info_object(0); 1469 task_info->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1470 } 1471 #endif 1472 task(data); 1473 { 1474 #if OMPT_SUPPORT 1475 OMPT_STORE_RETURN_ADDRESS(gtid); 1476 #endif 1477 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(); 1478 } 1479 #if OMPT_SUPPORT 1480 if (ompt_enabled.enabled) { 1481 task_info->frame.exit_frame = ompt_data_none; 1482 parent_task_info->frame.enter_frame = ompt_data_none; 1483 } 1484 #endif 1485 } 1486 1487 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *), 1488 void *data, 1489 unsigned num_threads, 1490 unsigned count, 1491 unsigned flags) { 1492 int gtid = __kmp_entry_gtid(); 1493 MKLOC(loc, "GOMP_parallel_sections"); 1494 KA_TRACE(20, ("GOMP_parallel_sections: T#%d\n", gtid)); 1495 1496 #if OMPT_SUPPORT 1497 OMPT_STORE_RETURN_ADDRESS(gtid); 1498 #endif 1499 1500 __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task, 1501 (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, 1502 task, data, num_threads, &loc, kmp_nm_dynamic_chunked, 1503 (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1); 1504 1505 { 1506 #if OMPT_SUPPORT 1507 OMPT_STORE_RETURN_ADDRESS(gtid); 1508 #endif 1509 1510 KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); 1511 } 1512 task(data); 1513 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(); 1514 KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid)); 1515 } 1516 1517 #define PARALLEL_LOOP(func, schedule, ompt_pre, ompt_post) \ 1518 void func(void (*task)(void *), void *data, unsigned num_threads, long lb, \ 1519 long ub, long str, long chunk_sz, unsigned flags) { \ 1520 int gtid = __kmp_entry_gtid(); \ 1521 MKLOC(loc, KMP_STR(func)); \ 1522 KA_TRACE( \ 1523 20, \ 1524 (KMP_STR( \ 1525 func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \ 1526 gtid, lb, ub, str, chunk_sz)); \ 1527 \ 1528 ompt_pre(); \ 1529 IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ 1530 __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task, \ 1531 (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \ 1532 9, task, data, num_threads, &loc, (schedule), lb, \ 1533 (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \ 1534 \ 1535 { \ 1536 IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ 1537 KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ 1538 (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ 1539 (schedule) != kmp_sch_static); \ 1540 } \ 1541 task(data); \ 1542 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(); \ 1543 ompt_post(); \ 1544 \ 1545 KA_TRACE(20, (KMP_STR(func) " exit: T#%d\n", gtid)); \ 1546 } 1547 1548 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC), 1549 kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST) 1550 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC), 1551 kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) 1552 PARALLEL_LOOP( 1553 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED), 1554 kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) 1555 PARALLEL_LOOP( 1556 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC), 1557 kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) 1558 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED), 1559 kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) 1560 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME), 1561 kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST) 1562 PARALLEL_LOOP( 1563 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME), 1564 kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST) 1565 PARALLEL_LOOP( 1566 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_RUNTIME), 1567 kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST) 1568 1569 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_START)(void) { 1570 int gtid = __kmp_entry_gtid(); 1571 MKLOC(loc, "GOMP_taskgroup_start"); 1572 KA_TRACE(20, ("GOMP_taskgroup_start: T#%d\n", gtid)); 1573 1574 #if OMPT_SUPPORT 1575 OMPT_STORE_RETURN_ADDRESS(gtid); 1576 #endif 1577 1578 __kmpc_taskgroup(&loc, gtid); 1579 1580 return; 1581 } 1582 1583 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_END)(void) { 1584 int gtid = __kmp_get_gtid(); 1585 MKLOC(loc, "GOMP_taskgroup_end"); 1586 KA_TRACE(20, ("GOMP_taskgroup_end: T#%d\n", gtid)); 1587 1588 #if OMPT_SUPPORT 1589 OMPT_STORE_RETURN_ADDRESS(gtid); 1590 #endif 1591 1592 __kmpc_end_taskgroup(&loc, gtid); 1593 1594 return; 1595 } 1596 1597 static kmp_int32 __kmp_gomp_to_omp_cancellation_kind(int gomp_kind) { 1598 kmp_int32 cncl_kind = 0; 1599 switch (gomp_kind) { 1600 case 1: 1601 cncl_kind = cancel_parallel; 1602 break; 1603 case 2: 1604 cncl_kind = cancel_loop; 1605 break; 1606 case 4: 1607 cncl_kind = cancel_sections; 1608 break; 1609 case 8: 1610 cncl_kind = cancel_taskgroup; 1611 break; 1612 } 1613 return cncl_kind; 1614 } 1615 1616 // Return true if cancellation should take place, false otherwise 1617 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CANCELLATION_POINT)(int which) { 1618 int gtid = __kmp_get_gtid(); 1619 MKLOC(loc, "GOMP_cancellation_point"); 1620 KA_TRACE(20, ("GOMP_cancellation_point: T#%d which:%d\n", gtid, which)); 1621 kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which); 1622 return __kmpc_cancellationpoint(&loc, gtid, cncl_kind); 1623 } 1624 1625 // Return true if cancellation should take place, false otherwise 1626 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CANCEL)(int which, bool do_cancel) { 1627 int gtid = __kmp_get_gtid(); 1628 MKLOC(loc, "GOMP_cancel"); 1629 KA_TRACE(20, ("GOMP_cancel: T#%d which:%d do_cancel:%d\n", gtid, which, 1630 (int)do_cancel)); 1631 kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which); 1632 1633 if (do_cancel == FALSE) { 1634 return __kmpc_cancellationpoint(&loc, gtid, cncl_kind); 1635 } else { 1636 return __kmpc_cancel(&loc, gtid, cncl_kind); 1637 } 1638 } 1639 1640 // Return true if cancellation should take place, false otherwise 1641 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_BARRIER_CANCEL)(void) { 1642 int gtid = __kmp_get_gtid(); 1643 KA_TRACE(20, ("GOMP_barrier_cancel: T#%d\n", gtid)); 1644 return __kmp_barrier_gomp_cancel(gtid); 1645 } 1646 1647 // Return true if cancellation should take place, false otherwise 1648 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL)(void) { 1649 int gtid = __kmp_get_gtid(); 1650 KA_TRACE(20, ("GOMP_sections_end_cancel: T#%d\n", gtid)); 1651 return __kmp_barrier_gomp_cancel(gtid); 1652 } 1653 1654 // Return true if cancellation should take place, false otherwise 1655 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END_CANCEL)(void) { 1656 int gtid = __kmp_get_gtid(); 1657 KA_TRACE(20, ("GOMP_loop_end_cancel: T#%d\n", gtid)); 1658 return __kmp_barrier_gomp_cancel(gtid); 1659 } 1660 1661 // All target functions are empty as of 2014-05-29 1662 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET)(int device, void (*fn)(void *), 1663 const void *openmp_target, 1664 size_t mapnum, void **hostaddrs, 1665 size_t *sizes, 1666 unsigned char *kinds) { 1667 return; 1668 } 1669 1670 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_DATA)( 1671 int device, const void *openmp_target, size_t mapnum, void **hostaddrs, 1672 size_t *sizes, unsigned char *kinds) { 1673 return; 1674 } 1675 1676 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_END_DATA)(void) { return; } 1677 1678 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_UPDATE)( 1679 int device, const void *openmp_target, size_t mapnum, void **hostaddrs, 1680 size_t *sizes, unsigned char *kinds) { 1681 return; 1682 } 1683 1684 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TEAMS)(unsigned int num_teams, 1685 unsigned int thread_limit) { 1686 return; 1687 } 1688 1689 // Task duplication function which copies src to dest (both are 1690 // preallocated task structures) 1691 static void __kmp_gomp_task_dup(kmp_task_t *dest, kmp_task_t *src, 1692 kmp_int32 last_private) { 1693 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(src); 1694 if (taskdata->td_copy_func) { 1695 (taskdata->td_copy_func)(dest->shareds, src->shareds); 1696 } 1697 } 1698 1699 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER)( 1700 uintptr_t *); 1701 1702 #ifdef __cplusplus 1703 } // extern "C" 1704 #endif 1705 1706 template <typename T> 1707 void __GOMP_taskloop(void (*func)(void *), void *data, 1708 void (*copy_func)(void *, void *), long arg_size, 1709 long arg_align, unsigned gomp_flags, 1710 unsigned long num_tasks, int priority, T start, T end, 1711 T step) { 1712 typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32); 1713 MKLOC(loc, "GOMP_taskloop"); 1714 int sched; 1715 T *loop_bounds; 1716 int gtid = __kmp_entry_gtid(); 1717 kmp_int32 flags = 0; 1718 int if_val = gomp_flags & (1u << 10); 1719 int nogroup = gomp_flags & (1u << 11); 1720 int up = gomp_flags & (1u << 8); 1721 int reductions = gomp_flags & (1u << 12); 1722 p_task_dup_t task_dup = NULL; 1723 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags; 1724 #ifdef KMP_DEBUG 1725 { 1726 char *buff; 1727 buff = __kmp_str_format( 1728 "GOMP_taskloop: T#%%d: func:%%p data:%%p copy_func:%%p " 1729 "arg_size:%%ld arg_align:%%ld gomp_flags:0x%%x num_tasks:%%lu " 1730 "priority:%%d start:%%%s end:%%%s step:%%%s\n", 1731 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec); 1732 KA_TRACE(20, (buff, gtid, func, data, copy_func, arg_size, arg_align, 1733 gomp_flags, num_tasks, priority, start, end, step)); 1734 __kmp_str_free(&buff); 1735 } 1736 #endif 1737 KMP_ASSERT((size_t)arg_size >= 2 * sizeof(T)); 1738 KMP_ASSERT(arg_align > 0); 1739 // The low-order bit is the "untied" flag 1740 if (!(gomp_flags & 1)) { 1741 input_flags->tiedness = 1; 1742 } 1743 // The second low-order bit is the "final" flag 1744 if (gomp_flags & 2) { 1745 input_flags->final = 1; 1746 } 1747 // Negative step flag 1748 if (!up) { 1749 // If step is flagged as negative, but isn't properly sign extended 1750 // Then manually sign extend it. Could be a short, int, char embedded 1751 // in a long. So cannot assume any cast. 1752 if (step > 0) { 1753 for (int i = sizeof(T) * CHAR_BIT - 1; i >= 0L; --i) { 1754 // break at the first 1 bit 1755 if (step & ((T)1 << i)) 1756 break; 1757 step |= ((T)1 << i); 1758 } 1759 } 1760 } 1761 input_flags->native = 1; 1762 // Figure out if none/grainsize/num_tasks clause specified 1763 if (num_tasks > 0) { 1764 if (gomp_flags & (1u << 9)) 1765 sched = 1; // grainsize specified 1766 else 1767 sched = 2; // num_tasks specified 1768 // neither grainsize nor num_tasks specified 1769 } else { 1770 sched = 0; 1771 } 1772 1773 // __kmp_task_alloc() sets up all other flags 1774 kmp_task_t *task = 1775 __kmp_task_alloc(&loc, gtid, input_flags, sizeof(kmp_task_t), 1776 arg_size + arg_align - 1, (kmp_routine_entry_t)func); 1777 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); 1778 taskdata->td_copy_func = copy_func; 1779 taskdata->td_size_loop_bounds = sizeof(T); 1780 1781 // re-align shareds if needed and setup firstprivate copy constructors 1782 // through the task_dup mechanism 1783 task->shareds = (void *)((((size_t)task->shareds) + arg_align - 1) / 1784 arg_align * arg_align); 1785 if (copy_func) { 1786 task_dup = __kmp_gomp_task_dup; 1787 } 1788 KMP_MEMCPY(task->shareds, data, arg_size); 1789 1790 loop_bounds = (T *)task->shareds; 1791 loop_bounds[0] = start; 1792 loop_bounds[1] = end + (up ? -1 : 1); 1793 1794 if (!nogroup) { 1795 #if OMPT_SUPPORT && OMPT_OPTIONAL 1796 OMPT_STORE_RETURN_ADDRESS(gtid); 1797 #endif 1798 __kmpc_taskgroup(&loc, gtid); 1799 if (reductions) { 1800 // The data pointer points to lb, ub, then reduction data 1801 struct data_t { 1802 T a, b; 1803 uintptr_t *d; 1804 }; 1805 uintptr_t *d = ((data_t *)data)->d; 1806 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER)(d); 1807 } 1808 } 1809 __kmpc_taskloop(&loc, gtid, task, if_val, (kmp_uint64 *)&(loop_bounds[0]), 1810 (kmp_uint64 *)&(loop_bounds[1]), (kmp_int64)step, 1, sched, 1811 (kmp_uint64)num_tasks, (void *)task_dup); 1812 if (!nogroup) { 1813 #if OMPT_SUPPORT && OMPT_OPTIONAL 1814 OMPT_STORE_RETURN_ADDRESS(gtid); 1815 #endif 1816 __kmpc_end_taskgroup(&loc, gtid); 1817 } 1818 } 1819 1820 // 4 byte version of GOMP_doacross_post 1821 // This verison needs to create a temporary array which converts 4 byte 1822 // integers into 8 byte integers 1823 template <typename T, bool need_conversion = (sizeof(long) == 4)> 1824 void __kmp_GOMP_doacross_post(T *count); 1825 1826 template <> void __kmp_GOMP_doacross_post<long, true>(long *count) { 1827 int gtid = __kmp_entry_gtid(); 1828 kmp_info_t *th = __kmp_threads[gtid]; 1829 MKLOC(loc, "GOMP_doacross_post"); 1830 kmp_int64 num_dims = th->th.th_dispatch->th_doacross_info[0]; 1831 kmp_int64 *vec = (kmp_int64 *)__kmp_thread_malloc( 1832 th, (size_t)(sizeof(kmp_int64) * num_dims)); 1833 for (kmp_int64 i = 0; i < num_dims; ++i) { 1834 vec[i] = (kmp_int64)count[i]; 1835 } 1836 __kmpc_doacross_post(&loc, gtid, vec); 1837 __kmp_thread_free(th, vec); 1838 } 1839 1840 // 8 byte versions of GOMP_doacross_post 1841 // This version can just pass in the count array directly instead of creating 1842 // a temporary array 1843 template <> void __kmp_GOMP_doacross_post<long, false>(long *count) { 1844 int gtid = __kmp_entry_gtid(); 1845 MKLOC(loc, "GOMP_doacross_post"); 1846 __kmpc_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count)); 1847 } 1848 1849 template <typename T> void __kmp_GOMP_doacross_wait(T first, va_list args) { 1850 int gtid = __kmp_entry_gtid(); 1851 kmp_info_t *th = __kmp_threads[gtid]; 1852 MKLOC(loc, "GOMP_doacross_wait"); 1853 kmp_int64 num_dims = th->th.th_dispatch->th_doacross_info[0]; 1854 kmp_int64 *vec = (kmp_int64 *)__kmp_thread_malloc( 1855 th, (size_t)(sizeof(kmp_int64) * num_dims)); 1856 vec[0] = (kmp_int64)first; 1857 for (kmp_int64 i = 1; i < num_dims; ++i) { 1858 T item = va_arg(args, T); 1859 vec[i] = (kmp_int64)item; 1860 } 1861 __kmpc_doacross_wait(&loc, gtid, vec); 1862 __kmp_thread_free(th, vec); 1863 return; 1864 } 1865 1866 #ifdef __cplusplus 1867 extern "C" { 1868 #endif // __cplusplus 1869 1870 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKLOOP)( 1871 void (*func)(void *), void *data, void (*copy_func)(void *, void *), 1872 long arg_size, long arg_align, unsigned gomp_flags, unsigned long num_tasks, 1873 int priority, long start, long end, long step) { 1874 __GOMP_taskloop<long>(func, data, copy_func, arg_size, arg_align, gomp_flags, 1875 num_tasks, priority, start, end, step); 1876 } 1877 1878 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKLOOP_ULL)( 1879 void (*func)(void *), void *data, void (*copy_func)(void *, void *), 1880 long arg_size, long arg_align, unsigned gomp_flags, unsigned long num_tasks, 1881 int priority, unsigned long long start, unsigned long long end, 1882 unsigned long long step) { 1883 __GOMP_taskloop<unsigned long long>(func, data, copy_func, arg_size, 1884 arg_align, gomp_flags, num_tasks, 1885 priority, start, end, step); 1886 } 1887 1888 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_POST)(long *count) { 1889 __kmp_GOMP_doacross_post(count); 1890 } 1891 1892 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_WAIT)(long first, ...) { 1893 va_list args; 1894 va_start(args, first); 1895 __kmp_GOMP_doacross_wait<long>(first, args); 1896 va_end(args); 1897 } 1898 1899 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_ULL_POST)( 1900 unsigned long long *count) { 1901 int gtid = __kmp_entry_gtid(); 1902 MKLOC(loc, "GOMP_doacross_ull_post"); 1903 __kmpc_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count)); 1904 } 1905 1906 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_ULL_WAIT)( 1907 unsigned long long first, ...) { 1908 va_list args; 1909 va_start(args, first); 1910 __kmp_GOMP_doacross_wait<unsigned long long>(first, args); 1911 va_end(args); 1912 } 1913 1914 // fn: the function each primary thread of new team will call 1915 // data: argument to fn 1916 // num_teams, thread_limit: max bounds on respective ICV 1917 // flags: unused 1918 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TEAMS_REG)(void (*fn)(void *), 1919 void *data, 1920 unsigned num_teams, 1921 unsigned thread_limit, 1922 unsigned flags) { 1923 MKLOC(loc, "GOMP_teams_reg"); 1924 int gtid = __kmp_entry_gtid(); 1925 KA_TRACE(20, ("GOMP_teams_reg: T#%d num_teams=%u thread_limit=%u flag=%u\n", 1926 gtid, num_teams, thread_limit, flags)); 1927 __kmpc_push_num_teams(&loc, gtid, num_teams, thread_limit); 1928 __kmpc_fork_teams(&loc, 2, (microtask_t)__kmp_GOMP_microtask_wrapper, fn, 1929 data); 1930 KA_TRACE(20, ("GOMP_teams_reg exit: T#%d\n", gtid)); 1931 } 1932 1933 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKWAIT_DEPEND)(void **depend) { 1934 MKLOC(loc, "GOMP_taskwait_depend"); 1935 int gtid = __kmp_entry_gtid(); 1936 KA_TRACE(20, ("GOMP_taskwait_depend: T#%d\n", gtid)); 1937 kmp_gomp_depends_info_t gomp_depends(depend); 1938 kmp_int32 ndeps = gomp_depends.get_num_deps(); 1939 kmp_depend_info_t dep_list[ndeps]; 1940 for (kmp_int32 i = 0; i < ndeps; i++) 1941 dep_list[i] = gomp_depends.get_kmp_depend(i); 1942 #if OMPT_SUPPORT 1943 OMPT_STORE_RETURN_ADDRESS(gtid); 1944 #endif 1945 __kmpc_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL); 1946 KA_TRACE(20, ("GOMP_taskwait_depend exit: T#%d\n", gtid)); 1947 } 1948 1949 static inline void 1950 __kmp_GOMP_taskgroup_reduction_register(uintptr_t *data, kmp_taskgroup_t *tg, 1951 int nthreads, 1952 uintptr_t *allocated = nullptr) { 1953 KMP_ASSERT(data); 1954 KMP_ASSERT(nthreads > 0); 1955 // Have private copy pointers point to previously allocated 1956 // reduction data or allocate new data here 1957 if (allocated) { 1958 data[2] = allocated[2]; 1959 data[6] = allocated[6]; 1960 } else { 1961 data[2] = (uintptr_t)__kmp_allocate(nthreads * data[1]); 1962 data[6] = data[2] + (nthreads * data[1]); 1963 } 1964 if (tg) 1965 tg->gomp_data = data; 1966 } 1967 1968 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER)( 1969 uintptr_t *data) { 1970 int gtid = __kmp_entry_gtid(); 1971 KA_TRACE(20, ("GOMP_taskgroup_reduction_register: T#%d\n", gtid)); 1972 kmp_info_t *thread = __kmp_threads[gtid]; 1973 kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup; 1974 int nthreads = thread->th.th_team_nproc; 1975 __kmp_GOMP_taskgroup_reduction_register(data, tg, nthreads); 1976 } 1977 1978 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_UNREGISTER)( 1979 uintptr_t *data) { 1980 KA_TRACE(20, 1981 ("GOMP_taskgroup_reduction_unregister: T#%d\n", __kmp_get_gtid())); 1982 KMP_ASSERT(data && data[2]); 1983 __kmp_free((void *)data[2]); 1984 } 1985 1986 // Search through reduction data and set ptrs[] elements 1987 // to proper privatized copy address 1988 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK_REDUCTION_REMAP)(size_t cnt, 1989 size_t cntorig, 1990 void **ptrs) { 1991 int gtid = __kmp_entry_gtid(); 1992 KA_TRACE(20, ("GOMP_task_reduction_remap: T#%d\n", gtid)); 1993 kmp_info_t *thread = __kmp_threads[gtid]; 1994 kmp_int32 tid = __kmp_get_tid(); 1995 for (size_t i = 0; i < cnt; ++i) { 1996 uintptr_t address = (uintptr_t)ptrs[i]; 1997 void *propagated_address = NULL; 1998 void *mapped_address = NULL; 1999 // Check taskgroups reduce data 2000 kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup; 2001 while (tg) { 2002 uintptr_t *gomp_data = tg->gomp_data; 2003 if (!gomp_data) { 2004 tg = tg->parent; 2005 continue; 2006 } 2007 // Check the shared addresses list 2008 size_t num_vars = (size_t)gomp_data[0]; 2009 uintptr_t per_thread_size = gomp_data[1]; 2010 uintptr_t reduce_data = gomp_data[2]; 2011 uintptr_t end_reduce_data = gomp_data[6]; 2012 for (size_t j = 0; j < num_vars; ++j) { 2013 uintptr_t *entry = gomp_data + 7 + 3 * j; 2014 if (entry[0] == address) { 2015 uintptr_t offset = entry[1]; 2016 mapped_address = 2017 (void *)(reduce_data + tid * per_thread_size + offset); 2018 if (i < cntorig) 2019 propagated_address = (void *)entry[0]; 2020 break; 2021 } 2022 } 2023 if (mapped_address) 2024 break; 2025 // Check if address is within privatized copies range 2026 if (!mapped_address && address >= reduce_data && 2027 address < end_reduce_data) { 2028 uintptr_t offset = (address - reduce_data) % per_thread_size; 2029 mapped_address = (void *)(reduce_data + tid * per_thread_size + offset); 2030 if (i < cntorig) { 2031 for (size_t j = 0; j < num_vars; ++j) { 2032 uintptr_t *entry = gomp_data + 7 + 3 * j; 2033 if (entry[1] == offset) { 2034 propagated_address = (void *)entry[0]; 2035 break; 2036 } 2037 } 2038 } 2039 } 2040 if (mapped_address) 2041 break; 2042 tg = tg->parent; 2043 } 2044 KMP_ASSERT(mapped_address); 2045 ptrs[i] = mapped_address; 2046 if (i < cntorig) { 2047 KMP_ASSERT(propagated_address); 2048 ptrs[cnt + i] = propagated_address; 2049 } 2050 } 2051 } 2052 2053 static void __kmp_GOMP_init_reductions(int gtid, uintptr_t *data, int is_ws) { 2054 kmp_info_t *thr = __kmp_threads[gtid]; 2055 kmp_team_t *team = thr->th.th_team; 2056 // First start a taskgroup 2057 __kmpc_taskgroup(NULL, gtid); 2058 // Then setup reduction data 2059 void *reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws]); 2060 if (reduce_data == NULL && 2061 __kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data, 2062 (void *)1)) { 2063 // Single thread enters this block to initialize common reduction data 2064 KMP_DEBUG_ASSERT(reduce_data == NULL); 2065 __kmp_GOMP_taskgroup_reduction_register(data, NULL, thr->th.th_team_nproc); 2066 KMP_ATOMIC_ST_REL(&team->t.t_tg_fini_counter[is_ws], 0); 2067 KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], (void *)data); 2068 } else { 2069 // Wait for task reduction initialization 2070 while ((reduce_data = KMP_ATOMIC_LD_ACQ( 2071 &team->t.t_tg_reduce_data[is_ws])) == (void *)1) { 2072 KMP_CPU_PAUSE(); 2073 } 2074 KMP_DEBUG_ASSERT(reduce_data > (void *)1); // should be valid pointer here 2075 } 2076 // For worksharing constructs, each thread has its own reduction structure. 2077 // Have each reduction structure point to same privatized copies of vars. 2078 // For parallel, each thread points to same reduction structure and privatized 2079 // copies of vars 2080 if (is_ws) { 2081 __kmp_GOMP_taskgroup_reduction_register( 2082 data, NULL, thr->th.th_team_nproc, 2083 (uintptr_t *)KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws])); 2084 } 2085 kmp_taskgroup_t *tg = thr->th.th_current_task->td_taskgroup; 2086 tg->gomp_data = data; 2087 } 2088 2089 static unsigned 2090 __kmp_GOMP_par_reductions_microtask_wrapper(int *gtid, int *npr, 2091 void (*task)(void *), void *data) { 2092 kmp_info_t *thr = __kmp_threads[*gtid]; 2093 kmp_team_t *team = thr->th.th_team; 2094 uintptr_t *reduce_data = *(uintptr_t **)data; 2095 __kmp_GOMP_init_reductions(*gtid, reduce_data, 0); 2096 2097 #if OMPT_SUPPORT 2098 ompt_frame_t *ompt_frame; 2099 ompt_state_t enclosing_state; 2100 2101 if (ompt_enabled.enabled) { 2102 // save enclosing task state; set current state for task 2103 enclosing_state = thr->th.ompt_thread_info.state; 2104 thr->th.ompt_thread_info.state = ompt_state_work_parallel; 2105 2106 // set task frame 2107 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2108 ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2109 } 2110 #endif 2111 2112 task(data); 2113 2114 #if OMPT_SUPPORT 2115 if (ompt_enabled.enabled) { 2116 // clear task frame 2117 ompt_frame->exit_frame = ompt_data_none; 2118 2119 // restore enclosing state 2120 thr->th.ompt_thread_info.state = enclosing_state; 2121 } 2122 #endif 2123 __kmpc_end_taskgroup(NULL, *gtid); 2124 // if last thread out, then reset the team's reduce data 2125 // the GOMP_taskgroup_reduction_unregister() function will deallocate 2126 // private copies after reduction calculations take place. 2127 int count = KMP_ATOMIC_INC(&team->t.t_tg_fini_counter[0]); 2128 if (count == thr->th.th_team_nproc - 1) { 2129 KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[0], NULL); 2130 KMP_ATOMIC_ST_REL(&team->t.t_tg_fini_counter[0], 0); 2131 } 2132 return (unsigned)thr->th.th_team_nproc; 2133 } 2134 2135 unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_REDUCTIONS)( 2136 void (*task)(void *), void *data, unsigned num_threads, 2137 unsigned int flags) { 2138 MKLOC(loc, "GOMP_parallel_reductions"); 2139 int gtid = __kmp_entry_gtid(); 2140 KA_TRACE(20, ("GOMP_parallel_reductions: T#%d\n", gtid)); 2141 __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task, 2142 (microtask_t)__kmp_GOMP_par_reductions_microtask_wrapper, 2143 2, task, data); 2144 unsigned retval = 2145 __kmp_GOMP_par_reductions_microtask_wrapper(>id, NULL, task, data); 2146 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(); 2147 KA_TRACE(20, ("GOMP_parallel_reductions exit: T#%d\n", gtid)); 2148 return retval; 2149 } 2150 2151 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_START)( 2152 long start, long end, long incr, long sched, long chunk_size, long *istart, 2153 long *iend, uintptr_t *reductions, void **mem) { 2154 int status = 0; 2155 int gtid = __kmp_entry_gtid(); 2156 KA_TRACE(20, ("GOMP_loop_start: T#%d, reductions: %p\n", gtid, reductions)); 2157 if (reductions) 2158 __kmp_GOMP_init_reductions(gtid, reductions, 1); 2159 if (mem) 2160 KMP_FATAL(GompFeatureNotSupported, "scan"); 2161 if (istart == NULL) 2162 return true; 2163 const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic); 2164 long monotonic = sched & MONOTONIC_FLAG; 2165 sched &= ~MONOTONIC_FLAG; 2166 if (sched == 0) { 2167 if (monotonic) 2168 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_START)( 2169 start, end, incr, istart, iend); 2170 else 2171 status = KMP_EXPAND_NAME( 2172 KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_START)( 2173 start, end, incr, istart, iend); 2174 } else if (sched == 1) { 2175 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_START)( 2176 start, end, incr, chunk_size, istart, iend); 2177 } else if (sched == 2) { 2178 if (monotonic) 2179 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START)( 2180 start, end, incr, chunk_size, istart, iend); 2181 else 2182 status = 2183 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START)( 2184 start, end, incr, chunk_size, istart, iend); 2185 } else if (sched == 3) { 2186 if (monotonic) 2187 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_START)( 2188 start, end, incr, chunk_size, istart, iend); 2189 else 2190 status = 2191 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START)( 2192 start, end, incr, chunk_size, istart, iend); 2193 } else if (sched == 4) { 2194 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_START)( 2195 start, end, incr, istart, iend); 2196 } else { 2197 KMP_ASSERT(0); 2198 } 2199 return status; 2200 } 2201 2202 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_START)( 2203 bool up, unsigned long long start, unsigned long long end, 2204 unsigned long long incr, long sched, unsigned long long chunk_size, 2205 unsigned long long *istart, unsigned long long *iend, uintptr_t *reductions, 2206 void **mem) { 2207 int status = 0; 2208 int gtid = __kmp_entry_gtid(); 2209 KA_TRACE(20, 2210 ("GOMP_loop_ull_start: T#%d, reductions: %p\n", gtid, reductions)); 2211 if (reductions) 2212 __kmp_GOMP_init_reductions(gtid, reductions, 1); 2213 if (mem) 2214 KMP_FATAL(GompFeatureNotSupported, "scan"); 2215 if (istart == NULL) 2216 return true; 2217 const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic); 2218 long monotonic = sched & MONOTONIC_FLAG; 2219 sched &= ~MONOTONIC_FLAG; 2220 if (sched == 0) { 2221 if (monotonic) 2222 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START)( 2223 up, start, end, incr, istart, iend); 2224 else 2225 status = KMP_EXPAND_NAME( 2226 KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_START)( 2227 up, start, end, incr, istart, iend); 2228 } else if (sched == 1) { 2229 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START)( 2230 up, start, end, incr, chunk_size, istart, iend); 2231 } else if (sched == 2) { 2232 if (monotonic) 2233 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START)( 2234 up, start, end, incr, chunk_size, istart, iend); 2235 else 2236 status = KMP_EXPAND_NAME( 2237 KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START)( 2238 up, start, end, incr, chunk_size, istart, iend); 2239 } else if (sched == 3) { 2240 if (monotonic) 2241 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START)( 2242 up, start, end, incr, chunk_size, istart, iend); 2243 else 2244 status = 2245 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START)( 2246 up, start, end, incr, chunk_size, istart, iend); 2247 } else if (sched == 4) { 2248 status = 2249 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_START)( 2250 up, start, end, incr, istart, iend); 2251 } else { 2252 KMP_ASSERT(0); 2253 } 2254 return status; 2255 } 2256 2257 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_START)( 2258 unsigned ncounts, long *counts, long sched, long chunk_size, long *istart, 2259 long *iend, uintptr_t *reductions, void **mem) { 2260 int status = 0; 2261 int gtid = __kmp_entry_gtid(); 2262 KA_TRACE(20, ("GOMP_loop_doacross_start: T#%d, reductions: %p\n", gtid, 2263 reductions)); 2264 if (reductions) 2265 __kmp_GOMP_init_reductions(gtid, reductions, 1); 2266 if (mem) 2267 KMP_FATAL(GompFeatureNotSupported, "scan"); 2268 if (istart == NULL) 2269 return true; 2270 // Ignore any monotonic flag 2271 const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic); 2272 sched &= ~MONOTONIC_FLAG; 2273 if (sched == 0) { 2274 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START)( 2275 ncounts, counts, istart, iend); 2276 } else if (sched == 1) { 2277 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START)( 2278 ncounts, counts, chunk_size, istart, iend); 2279 } else if (sched == 2) { 2280 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START)( 2281 ncounts, counts, chunk_size, istart, iend); 2282 } else if (sched == 3) { 2283 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START)( 2284 ncounts, counts, chunk_size, istart, iend); 2285 } else { 2286 KMP_ASSERT(0); 2287 } 2288 return status; 2289 } 2290 2291 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_START)( 2292 unsigned ncounts, unsigned long long *counts, long sched, 2293 unsigned long long chunk_size, unsigned long long *istart, 2294 unsigned long long *iend, uintptr_t *reductions, void **mem) { 2295 int status = 0; 2296 int gtid = __kmp_entry_gtid(); 2297 KA_TRACE(20, ("GOMP_loop_ull_doacross_start: T#%d, reductions: %p\n", gtid, 2298 reductions)); 2299 if (reductions) 2300 __kmp_GOMP_init_reductions(gtid, reductions, 1); 2301 if (mem) 2302 KMP_FATAL(GompFeatureNotSupported, "scan"); 2303 if (istart == NULL) 2304 return true; 2305 // Ignore any monotonic flag 2306 const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic); 2307 sched &= ~MONOTONIC_FLAG; 2308 if (sched == 0) { 2309 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START)( 2310 ncounts, counts, istart, iend); 2311 } else if (sched == 1) { 2312 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START)( 2313 ncounts, counts, chunk_size, istart, iend); 2314 } else if (sched == 2) { 2315 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START)( 2316 ncounts, counts, chunk_size, istart, iend); 2317 } else if (sched == 3) { 2318 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START)( 2319 ncounts, counts, chunk_size, istart, iend); 2320 } else { 2321 KMP_ASSERT(0); 2322 } 2323 return status; 2324 } 2325 2326 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_START)( 2327 long start, long end, long incr, long sched, long chunk_size, long *istart, 2328 long *iend, uintptr_t *reductions, void **mem) { 2329 int status = 0; 2330 int gtid = __kmp_entry_gtid(); 2331 KA_TRACE(20, ("GOMP_loop_ordered_start: T#%d, reductions: %p\n", gtid, 2332 reductions)); 2333 if (reductions) 2334 __kmp_GOMP_init_reductions(gtid, reductions, 1); 2335 if (mem) 2336 KMP_FATAL(GompFeatureNotSupported, "scan"); 2337 if (istart == NULL) 2338 return true; 2339 // Ignore any monotonic flag 2340 const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic); 2341 sched &= ~MONOTONIC_FLAG; 2342 if (sched == 0) { 2343 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START)( 2344 start, end, incr, istart, iend); 2345 } else if (sched == 1) { 2346 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START)( 2347 start, end, incr, chunk_size, istart, iend); 2348 } else if (sched == 2) { 2349 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START)( 2350 start, end, incr, chunk_size, istart, iend); 2351 } else if (sched == 3) { 2352 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START)( 2353 start, end, incr, chunk_size, istart, iend); 2354 } else { 2355 KMP_ASSERT(0); 2356 } 2357 return status; 2358 } 2359 2360 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_START)( 2361 bool up, unsigned long long start, unsigned long long end, 2362 unsigned long long incr, long sched, unsigned long long chunk_size, 2363 unsigned long long *istart, unsigned long long *iend, uintptr_t *reductions, 2364 void **mem) { 2365 int status = 0; 2366 int gtid = __kmp_entry_gtid(); 2367 KA_TRACE(20, ("GOMP_loop_ull_ordered_start: T#%d, reductions: %p\n", gtid, 2368 reductions)); 2369 if (reductions) 2370 __kmp_GOMP_init_reductions(gtid, reductions, 1); 2371 if (mem) 2372 KMP_FATAL(GompFeatureNotSupported, "scan"); 2373 if (istart == NULL) 2374 return true; 2375 // Ignore any monotonic flag 2376 const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic); 2377 sched &= ~MONOTONIC_FLAG; 2378 if (sched == 0) { 2379 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START)( 2380 up, start, end, incr, istart, iend); 2381 } else if (sched == 1) { 2382 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START)( 2383 up, start, end, incr, chunk_size, istart, iend); 2384 } else if (sched == 2) { 2385 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START)( 2386 up, start, end, incr, chunk_size, istart, iend); 2387 } else if (sched == 3) { 2388 status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START)( 2389 up, start, end, incr, chunk_size, istart, iend); 2390 } else { 2391 KMP_ASSERT(0); 2392 } 2393 return status; 2394 } 2395 2396 unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS2_START)( 2397 unsigned count, uintptr_t *reductions, void **mem) { 2398 int gtid = __kmp_entry_gtid(); 2399 KA_TRACE(20, 2400 ("GOMP_sections2_start: T#%d, reductions: %p\n", gtid, reductions)); 2401 if (reductions) 2402 __kmp_GOMP_init_reductions(gtid, reductions, 1); 2403 if (mem) 2404 KMP_FATAL(GompFeatureNotSupported, "scan"); 2405 return KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_START)(count); 2406 } 2407 2408 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER)( 2409 bool cancelled) { 2410 int gtid = __kmp_get_gtid(); 2411 MKLOC(loc, "GOMP_workshare_task_reduction_unregister"); 2412 KA_TRACE(20, ("GOMP_workshare_task_reduction_unregister: T#%d\n", gtid)); 2413 kmp_info_t *thr = __kmp_threads[gtid]; 2414 kmp_team_t *team = thr->th.th_team; 2415 __kmpc_end_taskgroup(NULL, gtid); 2416 // If last thread out of workshare, then reset the team's reduce data 2417 // the GOMP_taskgroup_reduction_unregister() function will deallocate 2418 // private copies after reduction calculations take place. 2419 int count = KMP_ATOMIC_INC(&team->t.t_tg_fini_counter[1]); 2420 if (count == thr->th.th_team_nproc - 1) { 2421 KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_UNREGISTER) 2422 ((uintptr_t *)KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[1])); 2423 KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[1], NULL); 2424 KMP_ATOMIC_ST_REL(&team->t.t_tg_fini_counter[1], 0); 2425 } 2426 if (!cancelled) { 2427 __kmpc_barrier(&loc, gtid); 2428 } 2429 } 2430 2431 /* The following sections of code create aliases for the GOMP_* functions, then 2432 create versioned symbols using the assembler directive .symver. This is only 2433 pertinent for ELF .so library. The KMP_VERSION_SYMBOL macro is defined in 2434 kmp_os.h */ 2435 2436 #ifdef KMP_USE_VERSION_SYMBOLS 2437 // GOMP_1.0 versioned symbols 2438 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ATOMIC_END, 10, "GOMP_1.0"); 2439 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ATOMIC_START, 10, "GOMP_1.0"); 2440 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_BARRIER, 10, "GOMP_1.0"); 2441 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_END, 10, "GOMP_1.0"); 2442 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10, "GOMP_1.0"); 2443 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10, "GOMP_1.0"); 2444 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_START, 10, "GOMP_1.0"); 2445 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10, "GOMP_1.0"); 2446 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10, "GOMP_1.0"); 2447 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END, 10, "GOMP_1.0"); 2448 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10, "GOMP_1.0"); 2449 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10, "GOMP_1.0"); 2450 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10, "GOMP_1.0"); 2451 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10, "GOMP_1.0"); 2452 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10, 2453 "GOMP_1.0"); 2454 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10, "GOMP_1.0"); 2455 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10, "GOMP_1.0"); 2456 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10, "GOMP_1.0"); 2457 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10, 2458 "GOMP_1.0"); 2459 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10, "GOMP_1.0"); 2460 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10, "GOMP_1.0"); 2461 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10, "GOMP_1.0"); 2462 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10, "GOMP_1.0"); 2463 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10, "GOMP_1.0"); 2464 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10, "GOMP_1.0"); 2465 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ORDERED_END, 10, "GOMP_1.0"); 2466 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ORDERED_START, 10, "GOMP_1.0"); 2467 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_END, 10, "GOMP_1.0"); 2468 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10, 2469 "GOMP_1.0"); 2470 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10, 2471 "GOMP_1.0"); 2472 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10, 2473 "GOMP_1.0"); 2474 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10, 2475 "GOMP_1.0"); 2476 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10, "GOMP_1.0"); 2477 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_START, 10, "GOMP_1.0"); 2478 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END, 10, "GOMP_1.0"); 2479 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10, "GOMP_1.0"); 2480 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10, "GOMP_1.0"); 2481 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_START, 10, "GOMP_1.0"); 2482 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10, "GOMP_1.0"); 2483 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10, "GOMP_1.0"); 2484 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_START, 10, "GOMP_1.0"); 2485 2486 // GOMP_2.0 versioned symbols 2487 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASK, 20, "GOMP_2.0"); 2488 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKWAIT, 20, "GOMP_2.0"); 2489 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20, "GOMP_2.0"); 2490 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20, "GOMP_2.0"); 2491 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20, "GOMP_2.0"); 2492 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20, "GOMP_2.0"); 2493 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20, 2494 "GOMP_2.0"); 2495 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20, 2496 "GOMP_2.0"); 2497 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20, 2498 "GOMP_2.0"); 2499 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20, 2500 "GOMP_2.0"); 2501 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20, 2502 "GOMP_2.0"); 2503 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20, 2504 "GOMP_2.0"); 2505 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20, 2506 "GOMP_2.0"); 2507 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20, 2508 "GOMP_2.0"); 2509 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20, "GOMP_2.0"); 2510 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20, "GOMP_2.0"); 2511 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20, "GOMP_2.0"); 2512 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20, "GOMP_2.0"); 2513 2514 // GOMP_3.0 versioned symbols 2515 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKYIELD, 30, "GOMP_3.0"); 2516 2517 // GOMP_4.0 versioned symbols 2518 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL, 40, "GOMP_4.0"); 2519 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_SECTIONS, 40, "GOMP_4.0"); 2520 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC, 40, "GOMP_4.0"); 2521 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED, 40, "GOMP_4.0"); 2522 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME, 40, "GOMP_4.0"); 2523 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC, 40, "GOMP_4.0"); 2524 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_START, 40, "GOMP_4.0"); 2525 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_END, 40, "GOMP_4.0"); 2526 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_BARRIER_CANCEL, 40, "GOMP_4.0"); 2527 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CANCEL, 40, "GOMP_4.0"); 2528 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CANCELLATION_POINT, 40, "GOMP_4.0"); 2529 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END_CANCEL, 40, "GOMP_4.0"); 2530 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL, 40, "GOMP_4.0"); 2531 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET, 40, "GOMP_4.0"); 2532 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_DATA, 40, "GOMP_4.0"); 2533 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_END_DATA, 40, "GOMP_4.0"); 2534 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_UPDATE, 40, "GOMP_4.0"); 2535 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TEAMS, 40, "GOMP_4.0"); 2536 2537 // GOMP_4.5 versioned symbols 2538 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKLOOP, 45, "GOMP_4.5"); 2539 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKLOOP_ULL, 45, "GOMP_4.5"); 2540 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_POST, 45, "GOMP_4.5"); 2541 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_WAIT, 45, "GOMP_4.5"); 2542 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START, 45, 2543 "GOMP_4.5"); 2544 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START, 45, 2545 "GOMP_4.5"); 2546 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START, 45, 2547 "GOMP_4.5"); 2548 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START, 45, 2549 "GOMP_4.5"); 2550 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_ULL_POST, 45, "GOMP_4.5"); 2551 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_ULL_WAIT, 45, "GOMP_4.5"); 2552 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START, 45, 2553 "GOMP_4.5"); 2554 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START, 45, 2555 "GOMP_4.5"); 2556 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START, 45, 2557 "GOMP_4.5"); 2558 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START, 45, 2559 "GOMP_4.5"); 2560 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START, 45, 2561 "GOMP_4.5"); 2562 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT, 45, 2563 "GOMP_4.5"); 2564 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START, 45, 2565 "GOMP_4.5"); 2566 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT, 45, 2567 "GOMP_4.5"); 2568 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START, 45, 2569 "GOMP_4.5"); 2570 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT, 45, 2571 "GOMP_4.5"); 2572 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START, 45, 2573 "GOMP_4.5"); 2574 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT, 45, 2575 "GOMP_4.5"); 2576 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC, 45, 2577 "GOMP_4.5"); 2578 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED, 45, 2579 "GOMP_4.5"); 2580 2581 // GOMP_5.0 versioned symbols 2582 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_NEXT, 50, 2583 "GOMP_5.0"); 2584 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_START, 50, 2585 "GOMP_5.0"); 2586 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_NEXT, 50, 2587 "GOMP_5.0"); 2588 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_START, 50, 2589 "GOMP_5.0"); 2590 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_NEXT, 2591 50, "GOMP_5.0"); 2592 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_START, 2593 50, "GOMP_5.0"); 2594 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_NEXT, 50, 2595 "GOMP_5.0"); 2596 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_START, 50, 2597 "GOMP_5.0"); 2598 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_RUNTIME, 50, 2599 "GOMP_5.0"); 2600 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME, 2601 50, "GOMP_5.0"); 2602 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TEAMS_REG, 50, "GOMP_5.0"); 2603 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKWAIT_DEPEND, 50, "GOMP_5.0"); 2604 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER, 50, 2605 "GOMP_5.0"); 2606 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_UNREGISTER, 50, 2607 "GOMP_5.0"); 2608 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASK_REDUCTION_REMAP, 50, "GOMP_5.0"); 2609 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_REDUCTIONS, 50, "GOMP_5.0"); 2610 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_START, 50, "GOMP_5.0"); 2611 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_START, 50, "GOMP_5.0"); 2612 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_START, 50, "GOMP_5.0"); 2613 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_START, 50, "GOMP_5.0"); 2614 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_START, 50, "GOMP_5.0"); 2615 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_START, 50, "GOMP_5.0"); 2616 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS2_START, 50, "GOMP_5.0"); 2617 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER, 50, 2618 "GOMP_5.0"); 2619 #endif // KMP_USE_VERSION_SYMBOLS 2620 2621 #ifdef __cplusplus 2622 } // extern "C" 2623 #endif // __cplusplus 2624