xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_gsupport.cpp (revision 63f537551380d2dab29fa402ad1269feae17e594)
1 /*
2  * kmp_gsupport.cpp
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp.h"
14 #include "kmp_atomic.h"
15 
16 #if OMPT_SUPPORT
17 #include "ompt-specific.h"
18 #endif
19 
20 enum {
21   KMP_GOMP_TASK_UNTIED_FLAG = 1,
22   KMP_GOMP_TASK_FINAL_FLAG = 2,
23   KMP_GOMP_TASK_DEPENDS_FLAG = 8
24 };
25 
26 enum {
27   KMP_GOMP_DEPOBJ_IN = 1,
28   KMP_GOMP_DEPOBJ_OUT = 2,
29   KMP_GOMP_DEPOBJ_INOUT = 3,
30   KMP_GOMP_DEPOBJ_MTXINOUTSET = 4
31 };
32 
33 // This class helps convert gomp dependency info into
34 // kmp_depend_info_t structures
35 class kmp_gomp_depends_info_t {
36   void **depend;
37   kmp_int32 num_deps;
38   size_t num_out, num_mutexinout, num_in, num_depobj;
39   size_t offset;
40 
41 public:
42   kmp_gomp_depends_info_t(void **depend) : depend(depend) {
43     size_t ndeps = (kmp_intptr_t)depend[0];
44     // GOMP taskdep structure:
45     // if depend[0] != 0:
46     // depend =  [ ndeps | nout | &out | ... | &out | &in | ... | &in ]
47     //
48     // if depend[0] == 0:
49     // depend = [ 0 | ndeps | nout | nmtx | nin | &out | ... | &out | &mtx |
50     //            ... | &mtx | &in   | ...  | &in  | &depobj | ... | &depobj ]
51     if (ndeps) {
52       num_out = (kmp_intptr_t)depend[1];
53       num_in = ndeps - num_out;
54       num_mutexinout = num_depobj = 0;
55       offset = 2;
56     } else {
57       ndeps = (kmp_intptr_t)depend[1];
58       num_out = (kmp_intptr_t)depend[2];
59       num_mutexinout = (kmp_intptr_t)depend[3];
60       num_in = (kmp_intptr_t)depend[4];
61       num_depobj = ndeps - num_out - num_mutexinout - num_in;
62       KMP_ASSERT(num_depobj <= ndeps);
63       offset = 5;
64     }
65     num_deps = static_cast<kmp_int32>(ndeps);
66   }
67   kmp_int32 get_num_deps() const { return num_deps; }
68   kmp_depend_info_t get_kmp_depend(size_t index) const {
69     kmp_depend_info_t retval;
70     memset(&retval, '\0', sizeof(retval));
71     KMP_ASSERT(index < (size_t)num_deps);
72     retval.len = 0;
73     // Because inout and out are logically equivalent,
74     // use inout and in dependency flags. GOMP does not provide a
75     // way to distinguish if user specified out vs. inout.
76     if (index < num_out) {
77       retval.flags.in = 1;
78       retval.flags.out = 1;
79       retval.base_addr = (kmp_intptr_t)depend[offset + index];
80     } else if (index >= num_out && index < (num_out + num_mutexinout)) {
81       retval.flags.mtx = 1;
82       retval.base_addr = (kmp_intptr_t)depend[offset + index];
83     } else if (index >= (num_out + num_mutexinout) &&
84                index < (num_out + num_mutexinout + num_in)) {
85       retval.flags.in = 1;
86       retval.base_addr = (kmp_intptr_t)depend[offset + index];
87     } else {
88       // depobj is a two element array (size of elements are size of pointer)
89       // depobj[0] = base_addr
90       // depobj[1] = type (in, out, inout, mutexinoutset, etc.)
91       kmp_intptr_t *depobj = (kmp_intptr_t *)depend[offset + index];
92       retval.base_addr = depobj[0];
93       switch (depobj[1]) {
94       case KMP_GOMP_DEPOBJ_IN:
95         retval.flags.in = 1;
96         break;
97       case KMP_GOMP_DEPOBJ_OUT:
98         retval.flags.out = 1;
99         break;
100       case KMP_GOMP_DEPOBJ_INOUT:
101         retval.flags.in = 1;
102         retval.flags.out = 1;
103         break;
104       case KMP_GOMP_DEPOBJ_MTXINOUTSET:
105         retval.flags.mtx = 1;
106         break;
107       default:
108         KMP_FATAL(GompFeatureNotSupported, "Unknown depobj type");
109       }
110     }
111     return retval;
112   }
113 };
114 
115 #ifdef __cplusplus
116 extern "C" {
117 #endif // __cplusplus
118 
119 #define MKLOC(loc, routine)                                                    \
120   static ident_t loc = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
121 
122 #include "kmp_ftn_os.h"
123 
124 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_BARRIER)(void) {
125   int gtid = __kmp_entry_gtid();
126   MKLOC(loc, "GOMP_barrier");
127   KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid));
128 #if OMPT_SUPPORT && OMPT_OPTIONAL
129   ompt_frame_t *ompt_frame;
130   if (ompt_enabled.enabled) {
131     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
132     ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
133   }
134   OMPT_STORE_RETURN_ADDRESS(gtid);
135 #endif
136   __kmpc_barrier(&loc, gtid);
137 #if OMPT_SUPPORT && OMPT_OPTIONAL
138   if (ompt_enabled.enabled) {
139     ompt_frame->enter_frame = ompt_data_none;
140   }
141 #endif
142 }
143 
144 // Mutual exclusion
145 
146 // The symbol that icc/ifort generates for unnamed for unnamed critical sections
147 // - .gomp_critical_user_ - is defined using .comm in any objects reference it.
148 // We can't reference it directly here in C code, as the symbol contains a ".".
149 //
150 // The RTL contains an assembly language definition of .gomp_critical_user_
151 // with another symbol __kmp_unnamed_critical_addr initialized with it's
152 // address.
153 extern kmp_critical_name *__kmp_unnamed_critical_addr;
154 
155 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_START)(void) {
156   int gtid = __kmp_entry_gtid();
157   MKLOC(loc, "GOMP_critical_start");
158   KA_TRACE(20, ("GOMP_critical_start: T#%d\n", gtid));
159 #if OMPT_SUPPORT && OMPT_OPTIONAL
160   OMPT_STORE_RETURN_ADDRESS(gtid);
161 #endif
162   __kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr);
163 }
164 
165 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_END)(void) {
166   int gtid = __kmp_get_gtid();
167   MKLOC(loc, "GOMP_critical_end");
168   KA_TRACE(20, ("GOMP_critical_end: T#%d\n", gtid));
169 #if OMPT_SUPPORT && OMPT_OPTIONAL
170   OMPT_STORE_RETURN_ADDRESS(gtid);
171 #endif
172   __kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr);
173 }
174 
175 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_NAME_START)(void **pptr) {
176   int gtid = __kmp_entry_gtid();
177   MKLOC(loc, "GOMP_critical_name_start");
178   KA_TRACE(20, ("GOMP_critical_name_start: T#%d\n", gtid));
179   __kmpc_critical(&loc, gtid, (kmp_critical_name *)pptr);
180 }
181 
182 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_NAME_END)(void **pptr) {
183   int gtid = __kmp_get_gtid();
184   MKLOC(loc, "GOMP_critical_name_end");
185   KA_TRACE(20, ("GOMP_critical_name_end: T#%d\n", gtid));
186   __kmpc_end_critical(&loc, gtid, (kmp_critical_name *)pptr);
187 }
188 
189 // The Gnu codegen tries to use locked operations to perform atomic updates
190 // inline.  If it can't, then it calls GOMP_atomic_start() before performing
191 // the update and GOMP_atomic_end() afterward, regardless of the data type.
192 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ATOMIC_START)(void) {
193   int gtid = __kmp_entry_gtid();
194   KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid));
195 
196 #if OMPT_SUPPORT
197   __ompt_thread_assign_wait_id(0);
198 #endif
199 
200   __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
201 }
202 
203 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ATOMIC_END)(void) {
204   int gtid = __kmp_get_gtid();
205   KA_TRACE(20, ("GOMP_atomic_end: T#%d\n", gtid));
206   __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
207 }
208 
209 int KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_START)(void) {
210   int gtid = __kmp_entry_gtid();
211   MKLOC(loc, "GOMP_single_start");
212   KA_TRACE(20, ("GOMP_single_start: T#%d\n", gtid));
213 
214   if (!TCR_4(__kmp_init_parallel))
215     __kmp_parallel_initialize();
216   __kmp_resume_if_soft_paused();
217 
218   // 3rd parameter == FALSE prevents kmp_enter_single from pushing a
219   // workshare when USE_CHECKS is defined.  We need to avoid the push,
220   // as there is no corresponding GOMP_single_end() call.
221   kmp_int32 rc = __kmp_enter_single(gtid, &loc, FALSE);
222 
223 #if OMPT_SUPPORT && OMPT_OPTIONAL
224   kmp_info_t *this_thr = __kmp_threads[gtid];
225   kmp_team_t *team = this_thr->th.th_team;
226   int tid = __kmp_tid_from_gtid(gtid);
227 
228   if (ompt_enabled.enabled) {
229     if (rc) {
230       if (ompt_enabled.ompt_callback_work) {
231         ompt_callbacks.ompt_callback(ompt_callback_work)(
232             ompt_work_single_executor, ompt_scope_begin,
233             &(team->t.ompt_team_info.parallel_data),
234             &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
235             1, OMPT_GET_RETURN_ADDRESS(0));
236       }
237     } else {
238       if (ompt_enabled.ompt_callback_work) {
239         ompt_callbacks.ompt_callback(ompt_callback_work)(
240             ompt_work_single_other, ompt_scope_begin,
241             &(team->t.ompt_team_info.parallel_data),
242             &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
243             1, OMPT_GET_RETURN_ADDRESS(0));
244         ompt_callbacks.ompt_callback(ompt_callback_work)(
245             ompt_work_single_other, ompt_scope_end,
246             &(team->t.ompt_team_info.parallel_data),
247             &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
248             1, OMPT_GET_RETURN_ADDRESS(0));
249       }
250     }
251   }
252 #endif
253 
254   return rc;
255 }
256 
257 void *KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) {
258   void *retval;
259   int gtid = __kmp_entry_gtid();
260   MKLOC(loc, "GOMP_single_copy_start");
261   KA_TRACE(20, ("GOMP_single_copy_start: T#%d\n", gtid));
262 
263   if (!TCR_4(__kmp_init_parallel))
264     __kmp_parallel_initialize();
265   __kmp_resume_if_soft_paused();
266 
267   // If this is the first thread to enter, return NULL.  The generated code will
268   // then call GOMP_single_copy_end() for this thread only, with the
269   // copyprivate data pointer as an argument.
270   if (__kmp_enter_single(gtid, &loc, FALSE))
271     return NULL;
272 
273     // Wait for the first thread to set the copyprivate data pointer,
274     // and for all other threads to reach this point.
275 
276 #if OMPT_SUPPORT && OMPT_OPTIONAL
277   ompt_frame_t *ompt_frame;
278   if (ompt_enabled.enabled) {
279     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
280     ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
281   }
282   OMPT_STORE_RETURN_ADDRESS(gtid);
283 #endif
284   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
285 
286   // Retrieve the value of the copyprivate data point, and wait for all
287   // threads to do likewise, then return.
288   retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data;
289   {
290 #if OMPT_SUPPORT && OMPT_OPTIONAL
291     OMPT_STORE_RETURN_ADDRESS(gtid);
292 #endif
293     __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
294   }
295 #if OMPT_SUPPORT && OMPT_OPTIONAL
296   if (ompt_enabled.enabled) {
297     ompt_frame->enter_frame = ompt_data_none;
298   }
299 #endif
300   return retval;
301 }
302 
303 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data) {
304   int gtid = __kmp_get_gtid();
305   KA_TRACE(20, ("GOMP_single_copy_end: T#%d\n", gtid));
306 
307   // Set the copyprivate data pointer fo the team, then hit the barrier so that
308   // the other threads will continue on and read it.  Hit another barrier before
309   // continuing, so that the know that the copyprivate data pointer has been
310   // propagated to all threads before trying to reuse the t_copypriv_data field.
311   __kmp_team_from_gtid(gtid)->t.t_copypriv_data = data;
312 #if OMPT_SUPPORT && OMPT_OPTIONAL
313   ompt_frame_t *ompt_frame;
314   if (ompt_enabled.enabled) {
315     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
316     ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
317   }
318   OMPT_STORE_RETURN_ADDRESS(gtid);
319 #endif
320   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
321   {
322 #if OMPT_SUPPORT && OMPT_OPTIONAL
323     OMPT_STORE_RETURN_ADDRESS(gtid);
324 #endif
325     __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
326   }
327 #if OMPT_SUPPORT && OMPT_OPTIONAL
328   if (ompt_enabled.enabled) {
329     ompt_frame->enter_frame = ompt_data_none;
330   }
331 #endif
332 }
333 
334 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ORDERED_START)(void) {
335   int gtid = __kmp_entry_gtid();
336   MKLOC(loc, "GOMP_ordered_start");
337   KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid));
338 #if OMPT_SUPPORT && OMPT_OPTIONAL
339   OMPT_STORE_RETURN_ADDRESS(gtid);
340 #endif
341   __kmpc_ordered(&loc, gtid);
342 }
343 
344 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ORDERED_END)(void) {
345   int gtid = __kmp_get_gtid();
346   MKLOC(loc, "GOMP_ordered_end");
347   KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid));
348 #if OMPT_SUPPORT && OMPT_OPTIONAL
349   OMPT_STORE_RETURN_ADDRESS(gtid);
350 #endif
351   __kmpc_end_ordered(&loc, gtid);
352 }
353 
354 // Dispatch macro defs
355 //
356 // They come in two flavors: 64-bit unsigned, and either 32-bit signed
357 // (IA-32 architecture) or 64-bit signed (Intel(R) 64).
358 
359 #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS
360 #define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_4
361 #define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_4
362 #define KMP_DISPATCH_NEXT __kmpc_dispatch_next_4
363 #else
364 #define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_8
365 #define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_8
366 #define KMP_DISPATCH_NEXT __kmpc_dispatch_next_8
367 #endif /* KMP_ARCH_X86 */
368 
369 #define KMP_DISPATCH_INIT_ULL __kmp_aux_dispatch_init_8u
370 #define KMP_DISPATCH_FINI_CHUNK_ULL __kmp_aux_dispatch_fini_chunk_8u
371 #define KMP_DISPATCH_NEXT_ULL __kmpc_dispatch_next_8u
372 
373 // The parallel construct
374 
375 #ifndef KMP_DEBUG
376 static
377 #endif /* KMP_DEBUG */
378     void
379     __kmp_GOMP_microtask_wrapper(int *gtid, int *npr, void (*task)(void *),
380                                  void *data) {
381 #if OMPT_SUPPORT
382   kmp_info_t *thr;
383   ompt_frame_t *ompt_frame;
384   ompt_state_t enclosing_state;
385 
386   if (ompt_enabled.enabled) {
387     // get pointer to thread data structure
388     thr = __kmp_threads[*gtid];
389 
390     // save enclosing task state; set current state for task
391     enclosing_state = thr->th.ompt_thread_info.state;
392     thr->th.ompt_thread_info.state = ompt_state_work_parallel;
393 
394     // set task frame
395     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
396     ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
397   }
398 #endif
399 
400   task(data);
401 
402 #if OMPT_SUPPORT
403   if (ompt_enabled.enabled) {
404     // clear task frame
405     ompt_frame->exit_frame = ompt_data_none;
406 
407     // restore enclosing state
408     thr->th.ompt_thread_info.state = enclosing_state;
409   }
410 #endif
411 }
412 
413 #ifndef KMP_DEBUG
414 static
415 #endif /* KMP_DEBUG */
416     void
417     __kmp_GOMP_parallel_microtask_wrapper(int *gtid, int *npr,
418                                           void (*task)(void *), void *data,
419                                           unsigned num_threads, ident_t *loc,
420                                           enum sched_type schedule, long start,
421                                           long end, long incr,
422                                           long chunk_size) {
423   // Initialize the loop worksharing construct.
424 
425   KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size,
426                     schedule != kmp_sch_static);
427 
428 #if OMPT_SUPPORT
429   kmp_info_t *thr;
430   ompt_frame_t *ompt_frame;
431   ompt_state_t enclosing_state;
432 
433   if (ompt_enabled.enabled) {
434     thr = __kmp_threads[*gtid];
435     // save enclosing task state; set current state for task
436     enclosing_state = thr->th.ompt_thread_info.state;
437     thr->th.ompt_thread_info.state = ompt_state_work_parallel;
438 
439     // set task frame
440     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
441     ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
442   }
443 #endif
444 
445   // Now invoke the microtask.
446   task(data);
447 
448 #if OMPT_SUPPORT
449   if (ompt_enabled.enabled) {
450     // clear task frame
451     ompt_frame->exit_frame = ompt_data_none;
452 
453     // reset enclosing state
454     thr->th.ompt_thread_info.state = enclosing_state;
455   }
456 #endif
457 }
458 
459 static void __kmp_GOMP_fork_call(ident_t *loc, int gtid, unsigned num_threads,
460                                  unsigned flags, void (*unwrapped_task)(void *),
461                                  microtask_t wrapper, int argc, ...) {
462   int rc;
463   kmp_info_t *thr = __kmp_threads[gtid];
464   kmp_team_t *team = thr->th.th_team;
465   int tid = __kmp_tid_from_gtid(gtid);
466 
467   va_list ap;
468   va_start(ap, argc);
469 
470   if (num_threads != 0)
471     __kmp_push_num_threads(loc, gtid, num_threads);
472   if (flags != 0)
473     __kmp_push_proc_bind(loc, gtid, (kmp_proc_bind_t)flags);
474   rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, wrapper,
475                        __kmp_invoke_task_func, kmp_va_addr_of(ap));
476 
477   va_end(ap);
478 
479   if (rc) {
480     __kmp_run_before_invoked_task(gtid, tid, thr, team);
481   }
482 
483 #if OMPT_SUPPORT
484   int ompt_team_size;
485   if (ompt_enabled.enabled) {
486     ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
487     ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
488 
489     // implicit task callback
490     if (ompt_enabled.ompt_callback_implicit_task) {
491       ompt_team_size = __kmp_team_from_gtid(gtid)->t.t_nproc;
492       ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
493           ompt_scope_begin, &(team_info->parallel_data),
494           &(task_info->task_data), ompt_team_size, __kmp_tid_from_gtid(gtid),
495           ompt_task_implicit); // TODO: Can this be ompt_task_initial?
496       task_info->thread_num = __kmp_tid_from_gtid(gtid);
497     }
498     thr->th.ompt_thread_info.state = ompt_state_work_parallel;
499   }
500 #endif
501 }
502 
503 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *),
504                                                        void *data,
505                                                        unsigned num_threads) {
506   int gtid = __kmp_entry_gtid();
507 
508 #if OMPT_SUPPORT
509   ompt_frame_t *parent_frame, *frame;
510 
511   if (ompt_enabled.enabled) {
512     __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);
513     parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
514   }
515   OMPT_STORE_RETURN_ADDRESS(gtid);
516 #endif
517 
518   MKLOC(loc, "GOMP_parallel_start");
519   KA_TRACE(20, ("GOMP_parallel_start: T#%d\n", gtid));
520   __kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task,
521                        (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task,
522                        data);
523 #if OMPT_SUPPORT
524   if (ompt_enabled.enabled) {
525     __ompt_get_task_info_internal(0, NULL, NULL, &frame, NULL, NULL);
526     frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
527   }
528 #endif
529 #if OMPD_SUPPORT
530   if (ompd_state & OMPD_ENABLE_BP)
531     ompd_bp_parallel_begin();
532 #endif
533 }
534 
535 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
536   int gtid = __kmp_get_gtid();
537   kmp_info_t *thr;
538 
539   thr = __kmp_threads[gtid];
540 
541   MKLOC(loc, "GOMP_parallel_end");
542   KA_TRACE(20, ("GOMP_parallel_end: T#%d\n", gtid));
543 
544   if (!thr->th.th_team->t.t_serialized) {
545     __kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr,
546                                  thr->th.th_team);
547   }
548 #if OMPT_SUPPORT
549   if (ompt_enabled.enabled) {
550     // Implicit task is finished here, in the barrier we might schedule
551     // deferred tasks,
552     // these don't see the implicit task on the stack
553     OMPT_CUR_TASK_INFO(thr)->frame.exit_frame = ompt_data_none;
554   }
555 #endif
556 
557   __kmp_join_call(&loc, gtid
558 #if OMPT_SUPPORT
559                   ,
560                   fork_context_gnu
561 #endif
562   );
563 #if OMPD_SUPPORT
564   if (ompd_state & OMPD_ENABLE_BP)
565     ompd_bp_parallel_end();
566 #endif
567 }
568 
569 // Loop worksharing constructs
570 
571 // The Gnu codegen passes in an exclusive upper bound for the overall range,
572 // but the libguide dispatch code expects an inclusive upper bound, hence the
573 // "end - incr" 5th argument to KMP_DISPATCH_INIT (and the " ub - str" 11th
574 // argument to __kmp_GOMP_fork_call).
575 //
576 // Conversely, KMP_DISPATCH_NEXT returns and inclusive upper bound in *p_ub,
577 // but the Gnu codegen expects an exclusive upper bound, so the adjustment
578 // "*p_ub += stride" compensates for the discrepancy.
579 //
580 // Correction: the gnu codegen always adjusts the upper bound by +-1, not the
581 // stride value.  We adjust the dispatch parameters accordingly (by +-1), but
582 // we still adjust p_ub by the actual stride value.
583 //
584 // The "runtime" versions do not take a chunk_sz parameter.
585 //
586 // The profile lib cannot support construct checking of unordered loops that
587 // are predetermined by the compiler to be statically scheduled, as the gcc
588 // codegen will not always emit calls to GOMP_loop_static_next() to get the
589 // next iteration.  Instead, it emits inline code to call omp_get_thread_num()
590 // num and calculate the iteration space using the result.  It doesn't do this
591 // with ordered static loop, so they can be checked.
592 
593 #if OMPT_SUPPORT
594 #define IF_OMPT_SUPPORT(code) code
595 #else
596 #define IF_OMPT_SUPPORT(code)
597 #endif
598 
599 #define LOOP_START(func, schedule)                                             \
600   int func(long lb, long ub, long str, long chunk_sz, long *p_lb,              \
601            long *p_ub) {                                                       \
602     int status;                                                                \
603     long stride;                                                               \
604     int gtid = __kmp_entry_gtid();                                             \
605     MKLOC(loc, KMP_STR(func));                                                 \
606     KA_TRACE(                                                                  \
607         20,                                                                    \
608         (KMP_STR(                                                              \
609              func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n",  \
610          gtid, lb, ub, str, chunk_sz));                                        \
611                                                                                \
612     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
613       {                                                                        \
614         IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                      \
615         KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                          \
616                           (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,      \
617                           (schedule) != kmp_sch_static);                       \
618       }                                                                        \
619       {                                                                        \
620         IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                      \
621         status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,          \
622                                    (kmp_int *)p_ub, (kmp_int *)&stride);       \
623       }                                                                        \
624       if (status) {                                                            \
625         KMP_DEBUG_ASSERT(stride == str);                                       \
626         *p_ub += (str > 0) ? 1 : -1;                                           \
627       }                                                                        \
628     } else {                                                                   \
629       status = 0;                                                              \
630     }                                                                          \
631                                                                                \
632     KA_TRACE(                                                                  \
633         20,                                                                    \
634         (KMP_STR(                                                              \
635              func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n",    \
636          gtid, *p_lb, *p_ub, status));                                         \
637     return status;                                                             \
638   }
639 
640 #define LOOP_RUNTIME_START(func, schedule)                                     \
641   int func(long lb, long ub, long str, long *p_lb, long *p_ub) {               \
642     int status;                                                                \
643     long stride;                                                               \
644     long chunk_sz = 0;                                                         \
645     int gtid = __kmp_entry_gtid();                                             \
646     MKLOC(loc, KMP_STR(func));                                                 \
647     KA_TRACE(                                                                  \
648         20,                                                                    \
649         (KMP_STR(func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n", \
650          gtid, lb, ub, str, chunk_sz));                                        \
651                                                                                \
652     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
653       {                                                                        \
654         IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                      \
655         KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                          \
656                           (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,      \
657                           TRUE);                                               \
658       }                                                                        \
659       {                                                                        \
660         IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                      \
661         status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,          \
662                                    (kmp_int *)p_ub, (kmp_int *)&stride);       \
663       }                                                                        \
664       if (status) {                                                            \
665         KMP_DEBUG_ASSERT(stride == str);                                       \
666         *p_ub += (str > 0) ? 1 : -1;                                           \
667       }                                                                        \
668     } else {                                                                   \
669       status = 0;                                                              \
670     }                                                                          \
671                                                                                \
672     KA_TRACE(                                                                  \
673         20,                                                                    \
674         (KMP_STR(                                                              \
675              func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n",    \
676          gtid, *p_lb, *p_ub, status));                                         \
677     return status;                                                             \
678   }
679 
680 #define KMP_DOACROSS_FINI(status, gtid)                                        \
681   if (!status && __kmp_threads[gtid]->th.th_dispatch->th_doacross_flags) {     \
682     __kmpc_doacross_fini(NULL, gtid);                                          \
683   }
684 
685 #define LOOP_NEXT(func, fini_code)                                             \
686   int func(long *p_lb, long *p_ub) {                                           \
687     int status;                                                                \
688     long stride;                                                               \
689     int gtid = __kmp_get_gtid();                                               \
690     MKLOC(loc, KMP_STR(func));                                                 \
691     KA_TRACE(20, (KMP_STR(func) ": T#%d\n", gtid));                            \
692                                                                                \
693     IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                          \
694     fini_code status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,    \
695                                          (kmp_int *)p_ub, (kmp_int *)&stride); \
696     if (status) {                                                              \
697       *p_ub += (stride > 0) ? 1 : -1;                                          \
698     }                                                                          \
699     KMP_DOACROSS_FINI(status, gtid)                                            \
700                                                                                \
701     KA_TRACE(                                                                  \
702         20,                                                                    \
703         (KMP_STR(func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, stride 0x%lx, " \
704                        "returning %d\n",                                       \
705          gtid, *p_lb, *p_ub, stride, status));                                 \
706     return status;                                                             \
707   }
708 
709 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_START), kmp_sch_static)
710 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT), {})
711 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START),
712            kmp_sch_dynamic_chunked)
713 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START),
714            kmp_sch_dynamic_chunked)
715 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT), {})
716 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT), {})
717 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_START),
718            kmp_sch_guided_chunked)
719 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START),
720            kmp_sch_guided_chunked)
721 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT), {})
722 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT), {})
723 LOOP_RUNTIME_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_START),
724                    kmp_sch_runtime)
725 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {})
726 LOOP_RUNTIME_START(
727     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_START),
728     kmp_sch_runtime)
729 LOOP_RUNTIME_START(
730     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_START),
731     kmp_sch_runtime)
732 LOOP_NEXT(
733     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_NEXT), {})
734 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_NEXT), {})
735 
736 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START),
737            kmp_ord_static)
738 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT),
739           { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
740 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START),
741            kmp_ord_dynamic_chunked)
742 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT),
743           { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
744 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START),
745            kmp_ord_guided_chunked)
746 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT),
747           { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
748 LOOP_RUNTIME_START(
749     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START),
750     kmp_ord_runtime)
751 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT),
752           { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
753 
754 #define LOOP_DOACROSS_START(func, schedule)                                    \
755   bool func(unsigned ncounts, long *counts, long chunk_sz, long *p_lb,         \
756             long *p_ub) {                                                      \
757     int status;                                                                \
758     long stride, lb, ub, str;                                                  \
759     int gtid = __kmp_entry_gtid();                                             \
760     struct kmp_dim *dims =                                                     \
761         (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts);    \
762     MKLOC(loc, KMP_STR(func));                                                 \
763     for (unsigned i = 0; i < ncounts; ++i) {                                   \
764       dims[i].lo = 0;                                                          \
765       dims[i].up = counts[i] - 1;                                              \
766       dims[i].st = 1;                                                          \
767     }                                                                          \
768     __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims);                      \
769     lb = 0;                                                                    \
770     ub = counts[0];                                                            \
771     str = 1;                                                                   \
772     KA_TRACE(20, (KMP_STR(func) ": T#%d, ncounts %u, lb 0x%lx, ub 0x%lx, str " \
773                                 "0x%lx, chunk_sz "                             \
774                                 "0x%lx\n",                                     \
775                   gtid, ncounts, lb, ub, str, chunk_sz));                      \
776                                                                                \
777     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
778       KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                            \
779                         (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,        \
780                         (schedule) != kmp_sch_static);                         \
781       status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,            \
782                                  (kmp_int *)p_ub, (kmp_int *)&stride);         \
783       if (status) {                                                            \
784         KMP_DEBUG_ASSERT(stride == str);                                       \
785         *p_ub += (str > 0) ? 1 : -1;                                           \
786       }                                                                        \
787     } else {                                                                   \
788       status = 0;                                                              \
789     }                                                                          \
790     KMP_DOACROSS_FINI(status, gtid);                                           \
791                                                                                \
792     KA_TRACE(                                                                  \
793         20,                                                                    \
794         (KMP_STR(                                                              \
795              func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n",    \
796          gtid, *p_lb, *p_ub, status));                                         \
797     __kmp_free(dims);                                                          \
798     return status;                                                             \
799   }
800 
801 #define LOOP_DOACROSS_RUNTIME_START(func, schedule)                            \
802   int func(unsigned ncounts, long *counts, long *p_lb, long *p_ub) {           \
803     int status;                                                                \
804     long stride, lb, ub, str;                                                  \
805     long chunk_sz = 0;                                                         \
806     int gtid = __kmp_entry_gtid();                                             \
807     struct kmp_dim *dims =                                                     \
808         (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts);    \
809     MKLOC(loc, KMP_STR(func));                                                 \
810     for (unsigned i = 0; i < ncounts; ++i) {                                   \
811       dims[i].lo = 0;                                                          \
812       dims[i].up = counts[i] - 1;                                              \
813       dims[i].st = 1;                                                          \
814     }                                                                          \
815     __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims);                      \
816     lb = 0;                                                                    \
817     ub = counts[0];                                                            \
818     str = 1;                                                                   \
819     KA_TRACE(                                                                  \
820         20,                                                                    \
821         (KMP_STR(func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n", \
822          gtid, lb, ub, str, chunk_sz));                                        \
823                                                                                \
824     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
825       KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                            \
826                         (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \
827       status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,            \
828                                  (kmp_int *)p_ub, (kmp_int *)&stride);         \
829       if (status) {                                                            \
830         KMP_DEBUG_ASSERT(stride == str);                                       \
831         *p_ub += (str > 0) ? 1 : -1;                                           \
832       }                                                                        \
833     } else {                                                                   \
834       status = 0;                                                              \
835     }                                                                          \
836     KMP_DOACROSS_FINI(status, gtid);                                           \
837                                                                                \
838     KA_TRACE(                                                                  \
839         20,                                                                    \
840         (KMP_STR(                                                              \
841              func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n",    \
842          gtid, *p_lb, *p_ub, status));                                         \
843     __kmp_free(dims);                                                          \
844     return status;                                                             \
845   }
846 
847 LOOP_DOACROSS_START(
848     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START),
849     kmp_sch_static)
850 LOOP_DOACROSS_START(
851     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START),
852     kmp_sch_dynamic_chunked)
853 LOOP_DOACROSS_START(
854     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START),
855     kmp_sch_guided_chunked)
856 LOOP_DOACROSS_RUNTIME_START(
857     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START),
858     kmp_sch_runtime)
859 
860 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END)(void) {
861   int gtid = __kmp_get_gtid();
862   KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid))
863 
864 #if OMPT_SUPPORT && OMPT_OPTIONAL
865   ompt_frame_t *ompt_frame;
866   if (ompt_enabled.enabled) {
867     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
868     ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
869     OMPT_STORE_RETURN_ADDRESS(gtid);
870   }
871 #endif
872   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
873 #if OMPT_SUPPORT && OMPT_OPTIONAL
874   if (ompt_enabled.enabled) {
875     ompt_frame->enter_frame = ompt_data_none;
876   }
877 #endif
878 
879   KA_TRACE(20, ("GOMP_loop_end exit: T#%d\n", gtid))
880 }
881 
882 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END_NOWAIT)(void) {
883   KA_TRACE(20, ("GOMP_loop_end_nowait: T#%d\n", __kmp_get_gtid()))
884 }
885 
886 // Unsigned long long loop worksharing constructs
887 //
888 // These are new with gcc 4.4
889 
890 #define LOOP_START_ULL(func, schedule)                                         \
891   int func(int up, unsigned long long lb, unsigned long long ub,               \
892            unsigned long long str, unsigned long long chunk_sz,                \
893            unsigned long long *p_lb, unsigned long long *p_ub) {               \
894     int status;                                                                \
895     long long str2 = up ? ((long long)str) : -((long long)str);                \
896     long long stride;                                                          \
897     int gtid = __kmp_entry_gtid();                                             \
898     MKLOC(loc, KMP_STR(func));                                                 \
899                                                                                \
900     KA_TRACE(20, (KMP_STR(func) ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str "    \
901                                 "0x%llx, chunk_sz 0x%llx\n",                   \
902                   gtid, up, lb, ub, str, chunk_sz));                           \
903                                                                                \
904     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
905       KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb,                        \
906                             (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz,  \
907                             (schedule) != kmp_sch_static);                     \
908       status =                                                                 \
909           KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb,          \
910                                 (kmp_uint64 *)p_ub, (kmp_int64 *)&stride);     \
911       if (status) {                                                            \
912         KMP_DEBUG_ASSERT(stride == str2);                                      \
913         *p_ub += (str > 0) ? 1 : -1;                                           \
914       }                                                                        \
915     } else {                                                                   \
916       status = 0;                                                              \
917     }                                                                          \
918                                                                                \
919     KA_TRACE(                                                                  \
920         20,                                                                    \
921         (KMP_STR(                                                              \
922              func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n",  \
923          gtid, *p_lb, *p_ub, status));                                         \
924     return status;                                                             \
925   }
926 
927 #define LOOP_RUNTIME_START_ULL(func, schedule)                                 \
928   int func(int up, unsigned long long lb, unsigned long long ub,               \
929            unsigned long long str, unsigned long long *p_lb,                   \
930            unsigned long long *p_ub) {                                         \
931     int status;                                                                \
932     long long str2 = up ? ((long long)str) : -((long long)str);                \
933     unsigned long long stride;                                                 \
934     unsigned long long chunk_sz = 0;                                           \
935     int gtid = __kmp_entry_gtid();                                             \
936     MKLOC(loc, KMP_STR(func));                                                 \
937                                                                                \
938     KA_TRACE(20, (KMP_STR(func) ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str "    \
939                                 "0x%llx, chunk_sz 0x%llx\n",                   \
940                   gtid, up, lb, ub, str, chunk_sz));                           \
941                                                                                \
942     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
943       KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb,                        \
944                             (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz,  \
945                             TRUE);                                             \
946       status =                                                                 \
947           KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb,          \
948                                 (kmp_uint64 *)p_ub, (kmp_int64 *)&stride);     \
949       if (status) {                                                            \
950         KMP_DEBUG_ASSERT((long long)stride == str2);                           \
951         *p_ub += (str > 0) ? 1 : -1;                                           \
952       }                                                                        \
953     } else {                                                                   \
954       status = 0;                                                              \
955     }                                                                          \
956                                                                                \
957     KA_TRACE(                                                                  \
958         20,                                                                    \
959         (KMP_STR(                                                              \
960              func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n",  \
961          gtid, *p_lb, *p_ub, status));                                         \
962     return status;                                                             \
963   }
964 
965 #define LOOP_NEXT_ULL(func, fini_code)                                         \
966   int func(unsigned long long *p_lb, unsigned long long *p_ub) {               \
967     int status;                                                                \
968     long long stride;                                                          \
969     int gtid = __kmp_get_gtid();                                               \
970     MKLOC(loc, KMP_STR(func));                                                 \
971     KA_TRACE(20, (KMP_STR(func) ": T#%d\n", gtid));                            \
972                                                                                \
973     fini_code status =                                                         \
974         KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb,            \
975                               (kmp_uint64 *)p_ub, (kmp_int64 *)&stride);       \
976     if (status) {                                                              \
977       *p_ub += (stride > 0) ? 1 : -1;                                          \
978     }                                                                          \
979                                                                                \
980     KA_TRACE(                                                                  \
981         20,                                                                    \
982         (KMP_STR(                                                              \
983              func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, stride 0x%llx, "  \
984                    "returning %d\n",                                           \
985          gtid, *p_lb, *p_ub, stride, status));                                 \
986     return status;                                                             \
987   }
988 
989 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START),
990                kmp_sch_static)
991 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT), {})
992 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START),
993                kmp_sch_dynamic_chunked)
994 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT), {})
995 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START),
996                kmp_sch_guided_chunked)
997 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT), {})
998 LOOP_START_ULL(
999     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START),
1000     kmp_sch_dynamic_chunked)
1001 LOOP_NEXT_ULL(
1002     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT), {})
1003 LOOP_START_ULL(
1004     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START),
1005     kmp_sch_guided_chunked)
1006 LOOP_NEXT_ULL(
1007     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT), {})
1008 LOOP_RUNTIME_START_ULL(
1009     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime)
1010 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {})
1011 LOOP_RUNTIME_START_ULL(
1012     KMP_EXPAND_NAME(
1013         KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_START),
1014     kmp_sch_runtime)
1015 LOOP_RUNTIME_START_ULL(
1016     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_START),
1017     kmp_sch_runtime)
1018 LOOP_NEXT_ULL(
1019     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_NEXT),
1020     {})
1021 LOOP_NEXT_ULL(
1022     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_NEXT), {})
1023 
1024 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START),
1025                kmp_ord_static)
1026 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT),
1027               { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
1028 LOOP_START_ULL(
1029     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START),
1030     kmp_ord_dynamic_chunked)
1031 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT),
1032               { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
1033 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START),
1034                kmp_ord_guided_chunked)
1035 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT),
1036               { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
1037 LOOP_RUNTIME_START_ULL(
1038     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START),
1039     kmp_ord_runtime)
1040 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT),
1041               { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
1042 
1043 #define LOOP_DOACROSS_START_ULL(func, schedule)                                \
1044   int func(unsigned ncounts, unsigned long long *counts,                       \
1045            unsigned long long chunk_sz, unsigned long long *p_lb,              \
1046            unsigned long long *p_ub) {                                         \
1047     int status;                                                                \
1048     long long stride, str, lb, ub;                                             \
1049     int gtid = __kmp_entry_gtid();                                             \
1050     struct kmp_dim *dims =                                                     \
1051         (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts);    \
1052     MKLOC(loc, KMP_STR(func));                                                 \
1053     for (unsigned i = 0; i < ncounts; ++i) {                                   \
1054       dims[i].lo = 0;                                                          \
1055       dims[i].up = counts[i] - 1;                                              \
1056       dims[i].st = 1;                                                          \
1057     }                                                                          \
1058     __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims);                      \
1059     lb = 0;                                                                    \
1060     ub = counts[0];                                                            \
1061     str = 1;                                                                   \
1062                                                                                \
1063     KA_TRACE(20, (KMP_STR(func) ": T#%d, lb 0x%llx, ub 0x%llx, str "           \
1064                                 "0x%llx, chunk_sz 0x%llx\n",                   \
1065                   gtid, lb, ub, str, chunk_sz));                               \
1066                                                                                \
1067     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
1068       KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb,                        \
1069                             (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,    \
1070                             (schedule) != kmp_sch_static);                     \
1071       status =                                                                 \
1072           KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb,          \
1073                                 (kmp_uint64 *)p_ub, (kmp_int64 *)&stride);     \
1074       if (status) {                                                            \
1075         KMP_DEBUG_ASSERT(stride == str);                                       \
1076         *p_ub += (str > 0) ? 1 : -1;                                           \
1077       }                                                                        \
1078     } else {                                                                   \
1079       status = 0;                                                              \
1080     }                                                                          \
1081     KMP_DOACROSS_FINI(status, gtid);                                           \
1082                                                                                \
1083     KA_TRACE(                                                                  \
1084         20,                                                                    \
1085         (KMP_STR(                                                              \
1086              func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n",  \
1087          gtid, *p_lb, *p_ub, status));                                         \
1088     __kmp_free(dims);                                                          \
1089     return status;                                                             \
1090   }
1091 
1092 #define LOOP_DOACROSS_RUNTIME_START_ULL(func, schedule)                        \
1093   int func(unsigned ncounts, unsigned long long *counts,                       \
1094            unsigned long long *p_lb, unsigned long long *p_ub) {               \
1095     int status;                                                                \
1096     unsigned long long stride, str, lb, ub;                                    \
1097     unsigned long long chunk_sz = 0;                                           \
1098     int gtid = __kmp_entry_gtid();                                             \
1099     struct kmp_dim *dims =                                                     \
1100         (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts);    \
1101     MKLOC(loc, KMP_STR(func));                                                 \
1102     for (unsigned i = 0; i < ncounts; ++i) {                                   \
1103       dims[i].lo = 0;                                                          \
1104       dims[i].up = counts[i] - 1;                                              \
1105       dims[i].st = 1;                                                          \
1106     }                                                                          \
1107     __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims);                      \
1108     lb = 0;                                                                    \
1109     ub = counts[0];                                                            \
1110     str = 1;                                                                   \
1111     KA_TRACE(20, (KMP_STR(func) ": T#%d, lb 0x%llx, ub 0x%llx, str "           \
1112                                 "0x%llx, chunk_sz 0x%llx\n",                   \
1113                   gtid, lb, ub, str, chunk_sz));                               \
1114                                                                                \
1115     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
1116       KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb,                        \
1117                             (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,    \
1118                             TRUE);                                             \
1119       status =                                                                 \
1120           KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb,          \
1121                                 (kmp_uint64 *)p_ub, (kmp_int64 *)&stride);     \
1122       if (status) {                                                            \
1123         KMP_DEBUG_ASSERT(stride == str);                                       \
1124         *p_ub += (str > 0) ? 1 : -1;                                           \
1125       }                                                                        \
1126     } else {                                                                   \
1127       status = 0;                                                              \
1128     }                                                                          \
1129     KMP_DOACROSS_FINI(status, gtid);                                           \
1130                                                                                \
1131     KA_TRACE(                                                                  \
1132         20,                                                                    \
1133         (KMP_STR(                                                              \
1134              func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n",  \
1135          gtid, *p_lb, *p_ub, status));                                         \
1136     __kmp_free(dims);                                                          \
1137     return status;                                                             \
1138   }
1139 
1140 LOOP_DOACROSS_START_ULL(
1141     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START),
1142     kmp_sch_static)
1143 LOOP_DOACROSS_START_ULL(
1144     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START),
1145     kmp_sch_dynamic_chunked)
1146 LOOP_DOACROSS_START_ULL(
1147     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START),
1148     kmp_sch_guided_chunked)
1149 LOOP_DOACROSS_RUNTIME_START_ULL(
1150     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START),
1151     kmp_sch_runtime)
1152 
1153 // Combined parallel / loop worksharing constructs
1154 //
1155 // There are no ull versions (yet).
1156 
1157 #define PARALLEL_LOOP_START(func, schedule, ompt_pre, ompt_post)               \
1158   void func(void (*task)(void *), void *data, unsigned num_threads, long lb,   \
1159             long ub, long str, long chunk_sz) {                                \
1160     int gtid = __kmp_entry_gtid();                                             \
1161     MKLOC(loc, KMP_STR(func));                                                 \
1162     KA_TRACE(                                                                  \
1163         20,                                                                    \
1164         (KMP_STR(                                                              \
1165              func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n",  \
1166          gtid, lb, ub, str, chunk_sz));                                        \
1167                                                                                \
1168     ompt_pre();                                                                \
1169                                                                                \
1170     __kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task,                    \
1171                          (microtask_t)__kmp_GOMP_parallel_microtask_wrapper,   \
1172                          9, task, data, num_threads, &loc, (schedule), lb,     \
1173                          (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz);      \
1174     IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid));                          \
1175                                                                                \
1176     KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                              \
1177                       (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,          \
1178                       (schedule) != kmp_sch_static);                           \
1179                                                                                \
1180     ompt_post();                                                               \
1181                                                                                \
1182     KA_TRACE(20, (KMP_STR(func) " exit: T#%d\n", gtid));                       \
1183   }
1184 
1185 #if OMPT_SUPPORT && OMPT_OPTIONAL
1186 
1187 #define OMPT_LOOP_PRE()                                                        \
1188   ompt_frame_t *parent_frame;                                                  \
1189   if (ompt_enabled.enabled) {                                                  \
1190     __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);   \
1191     parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);                 \
1192     OMPT_STORE_RETURN_ADDRESS(gtid);                                           \
1193   }
1194 
1195 #define OMPT_LOOP_POST()                                                       \
1196   if (ompt_enabled.enabled) {                                                  \
1197     parent_frame->enter_frame = ompt_data_none;                                \
1198   }
1199 
1200 #else
1201 
1202 #define OMPT_LOOP_PRE()
1203 
1204 #define OMPT_LOOP_POST()
1205 
1206 #endif
1207 
1208 PARALLEL_LOOP_START(
1209     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START),
1210     kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1211 PARALLEL_LOOP_START(
1212     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START),
1213     kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1214 PARALLEL_LOOP_START(
1215     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START),
1216     kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1217 PARALLEL_LOOP_START(
1218     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START),
1219     kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1220 
1221 // Tasking constructs
1222 
1223 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data,
1224                                              void (*copy_func)(void *, void *),
1225                                              long arg_size, long arg_align,
1226                                              bool if_cond, unsigned gomp_flags,
1227                                              void **depend) {
1228   MKLOC(loc, "GOMP_task");
1229   int gtid = __kmp_entry_gtid();
1230   kmp_int32 flags = 0;
1231   kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1232 
1233   KA_TRACE(20, ("GOMP_task: T#%d\n", gtid));
1234 
1235   // The low-order bit is the "untied" flag
1236   if (!(gomp_flags & KMP_GOMP_TASK_UNTIED_FLAG)) {
1237     input_flags->tiedness = TASK_TIED;
1238   }
1239   // The second low-order bit is the "final" flag
1240   if (gomp_flags & KMP_GOMP_TASK_FINAL_FLAG) {
1241     input_flags->final = 1;
1242   }
1243   input_flags->native = 1;
1244   // __kmp_task_alloc() sets up all other flags
1245 
1246   if (!if_cond) {
1247     arg_size = 0;
1248   }
1249 
1250   kmp_task_t *task = __kmp_task_alloc(
1251       &loc, gtid, input_flags, sizeof(kmp_task_t),
1252       arg_size ? arg_size + arg_align - 1 : 0, (kmp_routine_entry_t)func);
1253 
1254   if (arg_size > 0) {
1255     if (arg_align > 0) {
1256       task->shareds = (void *)((((size_t)task->shareds) + arg_align - 1) /
1257                                arg_align * arg_align);
1258     }
1259     // else error??
1260 
1261     if (copy_func) {
1262       (*copy_func)(task->shareds, data);
1263     } else {
1264       KMP_MEMCPY(task->shareds, data, arg_size);
1265     }
1266   }
1267 
1268 #if OMPT_SUPPORT
1269   kmp_taskdata_t *current_task;
1270   if (ompt_enabled.enabled) {
1271     current_task = __kmp_threads[gtid]->th.th_current_task;
1272     current_task->ompt_task_info.frame.enter_frame.ptr =
1273         OMPT_GET_FRAME_ADDRESS(0);
1274   }
1275   OMPT_STORE_RETURN_ADDRESS(gtid);
1276 #endif
1277 
1278   if (if_cond) {
1279     if (gomp_flags & KMP_GOMP_TASK_DEPENDS_FLAG) {
1280       KMP_ASSERT(depend);
1281       kmp_gomp_depends_info_t gomp_depends(depend);
1282       kmp_int32 ndeps = gomp_depends.get_num_deps();
1283       kmp_depend_info_t dep_list[ndeps];
1284       for (kmp_int32 i = 0; i < ndeps; i++)
1285         dep_list[i] = gomp_depends.get_kmp_depend(i);
1286       kmp_int32 ndeps_cnv;
1287       __kmp_type_convert(ndeps, &ndeps_cnv);
1288       __kmpc_omp_task_with_deps(&loc, gtid, task, ndeps_cnv, dep_list, 0, NULL);
1289     } else {
1290       __kmpc_omp_task(&loc, gtid, task);
1291     }
1292   } else {
1293 #if OMPT_SUPPORT
1294     ompt_thread_info_t oldInfo;
1295     kmp_info_t *thread;
1296     kmp_taskdata_t *taskdata;
1297     if (ompt_enabled.enabled) {
1298       // Store the threads states and restore them after the task
1299       thread = __kmp_threads[gtid];
1300       taskdata = KMP_TASK_TO_TASKDATA(task);
1301       oldInfo = thread->th.ompt_thread_info;
1302       thread->th.ompt_thread_info.wait_id = 0;
1303       thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1304       taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1305     }
1306     OMPT_STORE_RETURN_ADDRESS(gtid);
1307 #endif
1308     if (gomp_flags & KMP_GOMP_TASK_DEPENDS_FLAG) {
1309       KMP_ASSERT(depend);
1310       kmp_gomp_depends_info_t gomp_depends(depend);
1311       kmp_int32 ndeps = gomp_depends.get_num_deps();
1312       kmp_depend_info_t dep_list[ndeps];
1313       for (kmp_int32 i = 0; i < ndeps; i++)
1314         dep_list[i] = gomp_depends.get_kmp_depend(i);
1315       __kmpc_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL);
1316     }
1317 
1318     __kmpc_omp_task_begin_if0(&loc, gtid, task);
1319     func(data);
1320     __kmpc_omp_task_complete_if0(&loc, gtid, task);
1321 
1322 #if OMPT_SUPPORT
1323     if (ompt_enabled.enabled) {
1324       thread->th.ompt_thread_info = oldInfo;
1325       taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1326     }
1327 #endif
1328   }
1329 #if OMPT_SUPPORT
1330   if (ompt_enabled.enabled) {
1331     current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
1332   }
1333 #endif
1334 
1335   KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid));
1336 }
1337 
1338 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKWAIT)(void) {
1339   MKLOC(loc, "GOMP_taskwait");
1340   int gtid = __kmp_entry_gtid();
1341 
1342 #if OMPT_SUPPORT
1343   OMPT_STORE_RETURN_ADDRESS(gtid);
1344 #endif
1345 
1346   KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid));
1347 
1348   __kmpc_omp_taskwait(&loc, gtid);
1349 
1350   KA_TRACE(20, ("GOMP_taskwait exit: T#%d\n", gtid));
1351 }
1352 
1353 // Sections worksharing constructs
1354 //
1355 // For the sections construct, we initialize a dynamically scheduled loop
1356 // worksharing construct with lb 1 and stride 1, and use the iteration #'s
1357 // that its returns as sections ids.
1358 //
1359 // There are no special entry points for ordered sections, so we always use
1360 // the dynamically scheduled workshare, even if the sections aren't ordered.
1361 
1362 unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_START)(unsigned count) {
1363   int status;
1364   kmp_int lb, ub, stride;
1365   int gtid = __kmp_entry_gtid();
1366   MKLOC(loc, "GOMP_sections_start");
1367   KA_TRACE(20, ("GOMP_sections_start: T#%d\n", gtid));
1368 
1369   KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
1370 
1371   status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride);
1372   if (status) {
1373     KMP_DEBUG_ASSERT(stride == 1);
1374     KMP_DEBUG_ASSERT(lb > 0);
1375     KMP_ASSERT(lb == ub);
1376   } else {
1377     lb = 0;
1378   }
1379 
1380   KA_TRACE(20, ("GOMP_sections_start exit: T#%d returning %u\n", gtid,
1381                 (unsigned)lb));
1382   return (unsigned)lb;
1383 }
1384 
1385 unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_NEXT)(void) {
1386   int status;
1387   kmp_int lb, ub, stride;
1388   int gtid = __kmp_get_gtid();
1389   MKLOC(loc, "GOMP_sections_next");
1390   KA_TRACE(20, ("GOMP_sections_next: T#%d\n", gtid));
1391 
1392 #if OMPT_SUPPORT
1393   OMPT_STORE_RETURN_ADDRESS(gtid);
1394 #endif
1395 
1396   status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride);
1397   if (status) {
1398     KMP_DEBUG_ASSERT(stride == 1);
1399     KMP_DEBUG_ASSERT(lb > 0);
1400     KMP_ASSERT(lb == ub);
1401   } else {
1402     lb = 0;
1403   }
1404 
1405   KA_TRACE(
1406       20, ("GOMP_sections_next exit: T#%d returning %u\n", gtid, (unsigned)lb));
1407   return (unsigned)lb;
1408 }
1409 
1410 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(
1411     void (*task)(void *), void *data, unsigned num_threads, unsigned count) {
1412   int gtid = __kmp_entry_gtid();
1413 
1414 #if OMPT_SUPPORT
1415   ompt_frame_t *parent_frame;
1416 
1417   if (ompt_enabled.enabled) {
1418     __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);
1419     parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1420   }
1421   OMPT_STORE_RETURN_ADDRESS(gtid);
1422 #endif
1423 
1424   MKLOC(loc, "GOMP_parallel_sections_start");
1425   KA_TRACE(20, ("GOMP_parallel_sections_start: T#%d\n", gtid));
1426 
1427   __kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task,
1428                        (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,
1429                        task, data, num_threads, &loc, kmp_nm_dynamic_chunked,
1430                        (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);
1431 
1432 #if OMPT_SUPPORT
1433   if (ompt_enabled.enabled) {
1434     parent_frame->enter_frame = ompt_data_none;
1435   }
1436 #endif
1437 
1438   KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
1439 
1440   KA_TRACE(20, ("GOMP_parallel_sections_start exit: T#%d\n", gtid));
1441 }
1442 
1443 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END)(void) {
1444   int gtid = __kmp_get_gtid();
1445   KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid))
1446 
1447 #if OMPT_SUPPORT
1448   ompt_frame_t *ompt_frame;
1449   if (ompt_enabled.enabled) {
1450     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1451     ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1452   }
1453   OMPT_STORE_RETURN_ADDRESS(gtid);
1454 #endif
1455   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
1456 #if OMPT_SUPPORT
1457   if (ompt_enabled.enabled) {
1458     ompt_frame->enter_frame = ompt_data_none;
1459   }
1460 #endif
1461 
1462   KA_TRACE(20, ("GOMP_sections_end exit: T#%d\n", gtid))
1463 }
1464 
1465 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT)(void) {
1466   KA_TRACE(20, ("GOMP_sections_end_nowait: T#%d\n", __kmp_get_gtid()))
1467 }
1468 
1469 // libgomp has an empty function for GOMP_taskyield as of 2013-10-10
1470 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKYIELD)(void) {
1471   KA_TRACE(20, ("GOMP_taskyield: T#%d\n", __kmp_get_gtid()))
1472   return;
1473 }
1474 
1475 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *),
1476                                                  void *data,
1477                                                  unsigned num_threads,
1478                                                  unsigned int flags) {
1479   int gtid = __kmp_entry_gtid();
1480   MKLOC(loc, "GOMP_parallel");
1481   KA_TRACE(20, ("GOMP_parallel: T#%d\n", gtid));
1482 
1483 #if OMPT_SUPPORT
1484   ompt_task_info_t *parent_task_info, *task_info;
1485   if (ompt_enabled.enabled) {
1486     parent_task_info = __ompt_get_task_info_object(0);
1487     parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1488   }
1489   OMPT_STORE_RETURN_ADDRESS(gtid);
1490 #endif
1491   __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task,
1492                        (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task,
1493                        data);
1494 #if OMPT_SUPPORT
1495   if (ompt_enabled.enabled) {
1496     task_info = __ompt_get_task_info_object(0);
1497     task_info->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1498   }
1499 #endif
1500   task(data);
1501   {
1502 #if OMPT_SUPPORT
1503     OMPT_STORE_RETURN_ADDRESS(gtid);
1504 #endif
1505     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();
1506   }
1507 #if OMPT_SUPPORT
1508   if (ompt_enabled.enabled) {
1509     task_info->frame.exit_frame = ompt_data_none;
1510     parent_task_info->frame.enter_frame = ompt_data_none;
1511   }
1512 #endif
1513 }
1514 
1515 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *),
1516                                                           void *data,
1517                                                           unsigned num_threads,
1518                                                           unsigned count,
1519                                                           unsigned flags) {
1520   int gtid = __kmp_entry_gtid();
1521   MKLOC(loc, "GOMP_parallel_sections");
1522   KA_TRACE(20, ("GOMP_parallel_sections: T#%d\n", gtid));
1523 
1524 #if OMPT_SUPPORT
1525   ompt_frame_t *task_frame;
1526   kmp_info_t *thr;
1527   if (ompt_enabled.enabled) {
1528     thr = __kmp_threads[gtid];
1529     task_frame = &(thr->th.th_current_task->ompt_task_info.frame);
1530     task_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1531   }
1532   OMPT_STORE_RETURN_ADDRESS(gtid);
1533 #endif
1534 
1535   __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task,
1536                        (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,
1537                        task, data, num_threads, &loc, kmp_nm_dynamic_chunked,
1538                        (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);
1539 
1540   {
1541 #if OMPT_SUPPORT
1542     OMPT_STORE_RETURN_ADDRESS(gtid);
1543 #endif
1544 
1545     KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
1546   }
1547 
1548 #if OMPT_SUPPORT
1549   ompt_frame_t *child_frame;
1550   if (ompt_enabled.enabled) {
1551     child_frame = &(thr->th.th_current_task->ompt_task_info.frame);
1552     child_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1553   }
1554 #endif
1555 
1556   task(data);
1557 
1558 #if OMPT_SUPPORT
1559   if (ompt_enabled.enabled) {
1560     child_frame->exit_frame = ompt_data_none;
1561   }
1562 #endif
1563 
1564   KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();
1565   KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid));
1566 
1567 #if OMPT_SUPPORT
1568   if (ompt_enabled.enabled) {
1569     task_frame->enter_frame = ompt_data_none;
1570   }
1571 #endif
1572 }
1573 
1574 #define PARALLEL_LOOP(func, schedule, ompt_pre, ompt_post)                     \
1575   void func(void (*task)(void *), void *data, unsigned num_threads, long lb,   \
1576             long ub, long str, long chunk_sz, unsigned flags) {                \
1577     int gtid = __kmp_entry_gtid();                                             \
1578     MKLOC(loc, KMP_STR(func));                                                 \
1579     KA_TRACE(                                                                  \
1580         20,                                                                    \
1581         (KMP_STR(                                                              \
1582              func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n",  \
1583          gtid, lb, ub, str, chunk_sz));                                        \
1584                                                                                \
1585     ompt_pre();                                                                \
1586     IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                          \
1587     __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task,                 \
1588                          (microtask_t)__kmp_GOMP_parallel_microtask_wrapper,   \
1589                          9, task, data, num_threads, &loc, (schedule), lb,     \
1590                          (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz);      \
1591                                                                                \
1592     {                                                                          \
1593       IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                        \
1594       KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                            \
1595                         (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,        \
1596                         (schedule) != kmp_sch_static);                         \
1597     }                                                                          \
1598     task(data);                                                                \
1599     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();                         \
1600     ompt_post();                                                               \
1601                                                                                \
1602     KA_TRACE(20, (KMP_STR(func) " exit: T#%d\n", gtid));                       \
1603   }
1604 
1605 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC),
1606               kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1607 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC),
1608               kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1609 PARALLEL_LOOP(
1610     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED),
1611     kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1612 PARALLEL_LOOP(
1613     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC),
1614     kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1615 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED),
1616               kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1617 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME),
1618               kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1619 PARALLEL_LOOP(
1620     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME),
1621     kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1622 PARALLEL_LOOP(
1623     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_RUNTIME),
1624     kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1625 
1626 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_START)(void) {
1627   int gtid = __kmp_entry_gtid();
1628   MKLOC(loc, "GOMP_taskgroup_start");
1629   KA_TRACE(20, ("GOMP_taskgroup_start: T#%d\n", gtid));
1630 
1631 #if OMPT_SUPPORT
1632   OMPT_STORE_RETURN_ADDRESS(gtid);
1633 #endif
1634 
1635   __kmpc_taskgroup(&loc, gtid);
1636 
1637   return;
1638 }
1639 
1640 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_END)(void) {
1641   int gtid = __kmp_get_gtid();
1642   MKLOC(loc, "GOMP_taskgroup_end");
1643   KA_TRACE(20, ("GOMP_taskgroup_end: T#%d\n", gtid));
1644 
1645 #if OMPT_SUPPORT
1646   OMPT_STORE_RETURN_ADDRESS(gtid);
1647 #endif
1648 
1649   __kmpc_end_taskgroup(&loc, gtid);
1650 
1651   return;
1652 }
1653 
1654 static kmp_int32 __kmp_gomp_to_omp_cancellation_kind(int gomp_kind) {
1655   kmp_int32 cncl_kind = 0;
1656   switch (gomp_kind) {
1657   case 1:
1658     cncl_kind = cancel_parallel;
1659     break;
1660   case 2:
1661     cncl_kind = cancel_loop;
1662     break;
1663   case 4:
1664     cncl_kind = cancel_sections;
1665     break;
1666   case 8:
1667     cncl_kind = cancel_taskgroup;
1668     break;
1669   }
1670   return cncl_kind;
1671 }
1672 
1673 // Return true if cancellation should take place, false otherwise
1674 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CANCELLATION_POINT)(int which) {
1675   int gtid = __kmp_get_gtid();
1676   MKLOC(loc, "GOMP_cancellation_point");
1677   KA_TRACE(20, ("GOMP_cancellation_point: T#%d which:%d\n", gtid, which));
1678   kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which);
1679   return __kmpc_cancellationpoint(&loc, gtid, cncl_kind);
1680 }
1681 
1682 // Return true if cancellation should take place, false otherwise
1683 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CANCEL)(int which, bool do_cancel) {
1684   int gtid = __kmp_get_gtid();
1685   MKLOC(loc, "GOMP_cancel");
1686   KA_TRACE(20, ("GOMP_cancel: T#%d which:%d do_cancel:%d\n", gtid, which,
1687                 (int)do_cancel));
1688   kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which);
1689 
1690   if (do_cancel == FALSE) {
1691     return __kmpc_cancellationpoint(&loc, gtid, cncl_kind);
1692   } else {
1693     return __kmpc_cancel(&loc, gtid, cncl_kind);
1694   }
1695 }
1696 
1697 // Return true if cancellation should take place, false otherwise
1698 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_BARRIER_CANCEL)(void) {
1699   int gtid = __kmp_get_gtid();
1700   KA_TRACE(20, ("GOMP_barrier_cancel: T#%d\n", gtid));
1701   return __kmp_barrier_gomp_cancel(gtid);
1702 }
1703 
1704 // Return true if cancellation should take place, false otherwise
1705 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL)(void) {
1706   int gtid = __kmp_get_gtid();
1707   KA_TRACE(20, ("GOMP_sections_end_cancel: T#%d\n", gtid));
1708   return __kmp_barrier_gomp_cancel(gtid);
1709 }
1710 
1711 // Return true if cancellation should take place, false otherwise
1712 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END_CANCEL)(void) {
1713   int gtid = __kmp_get_gtid();
1714   KA_TRACE(20, ("GOMP_loop_end_cancel: T#%d\n", gtid));
1715   return __kmp_barrier_gomp_cancel(gtid);
1716 }
1717 
1718 // All target functions are empty as of 2014-05-29
1719 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET)(int device, void (*fn)(void *),
1720                                                const void *openmp_target,
1721                                                size_t mapnum, void **hostaddrs,
1722                                                size_t *sizes,
1723                                                unsigned char *kinds) {
1724   return;
1725 }
1726 
1727 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_DATA)(
1728     int device, const void *openmp_target, size_t mapnum, void **hostaddrs,
1729     size_t *sizes, unsigned char *kinds) {
1730   return;
1731 }
1732 
1733 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_END_DATA)(void) { return; }
1734 
1735 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_UPDATE)(
1736     int device, const void *openmp_target, size_t mapnum, void **hostaddrs,
1737     size_t *sizes, unsigned char *kinds) {
1738   return;
1739 }
1740 
1741 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TEAMS)(unsigned int num_teams,
1742                                               unsigned int thread_limit) {
1743   return;
1744 }
1745 
1746 // Task duplication function which copies src to dest (both are
1747 // preallocated task structures)
1748 static void __kmp_gomp_task_dup(kmp_task_t *dest, kmp_task_t *src,
1749                                 kmp_int32 last_private) {
1750   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(src);
1751   if (taskdata->td_copy_func) {
1752     (taskdata->td_copy_func)(dest->shareds, src->shareds);
1753   }
1754 }
1755 
1756 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER)(
1757     uintptr_t *);
1758 
1759 #ifdef __cplusplus
1760 } // extern "C"
1761 #endif
1762 
1763 template <typename T>
1764 void __GOMP_taskloop(void (*func)(void *), void *data,
1765                      void (*copy_func)(void *, void *), long arg_size,
1766                      long arg_align, unsigned gomp_flags,
1767                      unsigned long num_tasks, int priority, T start, T end,
1768                      T step) {
1769   typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
1770   MKLOC(loc, "GOMP_taskloop");
1771   int sched;
1772   T *loop_bounds;
1773   int gtid = __kmp_entry_gtid();
1774   kmp_int32 flags = 0;
1775   int if_val = gomp_flags & (1u << 10);
1776   int nogroup = gomp_flags & (1u << 11);
1777   int up = gomp_flags & (1u << 8);
1778   int reductions = gomp_flags & (1u << 12);
1779   p_task_dup_t task_dup = NULL;
1780   kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1781 #ifdef KMP_DEBUG
1782   {
1783     char *buff;
1784     buff = __kmp_str_format(
1785         "GOMP_taskloop: T#%%d: func:%%p data:%%p copy_func:%%p "
1786         "arg_size:%%ld arg_align:%%ld gomp_flags:0x%%x num_tasks:%%lu "
1787         "priority:%%d start:%%%s end:%%%s step:%%%s\n",
1788         traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec);
1789     KA_TRACE(20, (buff, gtid, func, data, copy_func, arg_size, arg_align,
1790                   gomp_flags, num_tasks, priority, start, end, step));
1791     __kmp_str_free(&buff);
1792   }
1793 #endif
1794   KMP_ASSERT((size_t)arg_size >= 2 * sizeof(T));
1795   KMP_ASSERT(arg_align > 0);
1796   // The low-order bit is the "untied" flag
1797   if (!(gomp_flags & 1)) {
1798     input_flags->tiedness = TASK_TIED;
1799   }
1800   // The second low-order bit is the "final" flag
1801   if (gomp_flags & 2) {
1802     input_flags->final = 1;
1803   }
1804   // Negative step flag
1805   if (!up) {
1806     // If step is flagged as negative, but isn't properly sign extended
1807     // Then manually sign extend it.  Could be a short, int, char embedded
1808     // in a long.  So cannot assume any cast.
1809     if (step > 0) {
1810       for (int i = sizeof(T) * CHAR_BIT - 1; i >= 0L; --i) {
1811         // break at the first 1 bit
1812         if (step & ((T)1 << i))
1813           break;
1814         step |= ((T)1 << i);
1815       }
1816     }
1817   }
1818   input_flags->native = 1;
1819   // Figure out if none/grainsize/num_tasks clause specified
1820   if (num_tasks > 0) {
1821     if (gomp_flags & (1u << 9))
1822       sched = 1; // grainsize specified
1823     else
1824       sched = 2; // num_tasks specified
1825     // neither grainsize nor num_tasks specified
1826   } else {
1827     sched = 0;
1828   }
1829 
1830   // __kmp_task_alloc() sets up all other flags
1831   kmp_task_t *task =
1832       __kmp_task_alloc(&loc, gtid, input_flags, sizeof(kmp_task_t),
1833                        arg_size + arg_align - 1, (kmp_routine_entry_t)func);
1834   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
1835   taskdata->td_copy_func = copy_func;
1836   taskdata->td_size_loop_bounds = sizeof(T);
1837 
1838   // re-align shareds if needed and setup firstprivate copy constructors
1839   // through the task_dup mechanism
1840   task->shareds = (void *)((((size_t)task->shareds) + arg_align - 1) /
1841                            arg_align * arg_align);
1842   if (copy_func) {
1843     task_dup = __kmp_gomp_task_dup;
1844   }
1845   KMP_MEMCPY(task->shareds, data, arg_size);
1846 
1847   loop_bounds = (T *)task->shareds;
1848   loop_bounds[0] = start;
1849   loop_bounds[1] = end + (up ? -1 : 1);
1850 
1851   if (!nogroup) {
1852 #if OMPT_SUPPORT && OMPT_OPTIONAL
1853     OMPT_STORE_RETURN_ADDRESS(gtid);
1854 #endif
1855     __kmpc_taskgroup(&loc, gtid);
1856     if (reductions) {
1857       // The data pointer points to lb, ub, then reduction data
1858       struct data_t {
1859         T a, b;
1860         uintptr_t *d;
1861       };
1862       uintptr_t *d = ((data_t *)data)->d;
1863       KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER)(d);
1864     }
1865   }
1866   __kmpc_taskloop(&loc, gtid, task, if_val, (kmp_uint64 *)&(loop_bounds[0]),
1867                   (kmp_uint64 *)&(loop_bounds[1]), (kmp_int64)step, 1, sched,
1868                   (kmp_uint64)num_tasks, (void *)task_dup);
1869   if (!nogroup) {
1870 #if OMPT_SUPPORT && OMPT_OPTIONAL
1871     OMPT_STORE_RETURN_ADDRESS(gtid);
1872 #endif
1873     __kmpc_end_taskgroup(&loc, gtid);
1874   }
1875 }
1876 
1877 // 4 byte version of GOMP_doacross_post
1878 // This verison needs to create a temporary array which converts 4 byte
1879 // integers into 8 byte integers
1880 template <typename T, bool need_conversion = (sizeof(long) == 4)>
1881 void __kmp_GOMP_doacross_post(T *count);
1882 
1883 template <> void __kmp_GOMP_doacross_post<long, true>(long *count) {
1884   int gtid = __kmp_entry_gtid();
1885   kmp_info_t *th = __kmp_threads[gtid];
1886   MKLOC(loc, "GOMP_doacross_post");
1887   kmp_int64 num_dims = th->th.th_dispatch->th_doacross_info[0];
1888   kmp_int64 *vec = (kmp_int64 *)__kmp_thread_malloc(
1889       th, (size_t)(sizeof(kmp_int64) * num_dims));
1890   for (kmp_int64 i = 0; i < num_dims; ++i) {
1891     vec[i] = (kmp_int64)count[i];
1892   }
1893   __kmpc_doacross_post(&loc, gtid, vec);
1894   __kmp_thread_free(th, vec);
1895 }
1896 
1897 // 8 byte versions of GOMP_doacross_post
1898 // This version can just pass in the count array directly instead of creating
1899 // a temporary array
1900 template <> void __kmp_GOMP_doacross_post<long, false>(long *count) {
1901   int gtid = __kmp_entry_gtid();
1902   MKLOC(loc, "GOMP_doacross_post");
1903   __kmpc_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count));
1904 }
1905 
1906 template <typename T> void __kmp_GOMP_doacross_wait(T first, va_list args) {
1907   int gtid = __kmp_entry_gtid();
1908   kmp_info_t *th = __kmp_threads[gtid];
1909   MKLOC(loc, "GOMP_doacross_wait");
1910   kmp_int64 num_dims = th->th.th_dispatch->th_doacross_info[0];
1911   kmp_int64 *vec = (kmp_int64 *)__kmp_thread_malloc(
1912       th, (size_t)(sizeof(kmp_int64) * num_dims));
1913   vec[0] = (kmp_int64)first;
1914   for (kmp_int64 i = 1; i < num_dims; ++i) {
1915     T item = va_arg(args, T);
1916     vec[i] = (kmp_int64)item;
1917   }
1918   __kmpc_doacross_wait(&loc, gtid, vec);
1919   __kmp_thread_free(th, vec);
1920   return;
1921 }
1922 
1923 #ifdef __cplusplus
1924 extern "C" {
1925 #endif // __cplusplus
1926 
1927 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKLOOP)(
1928     void (*func)(void *), void *data, void (*copy_func)(void *, void *),
1929     long arg_size, long arg_align, unsigned gomp_flags, unsigned long num_tasks,
1930     int priority, long start, long end, long step) {
1931   __GOMP_taskloop<long>(func, data, copy_func, arg_size, arg_align, gomp_flags,
1932                         num_tasks, priority, start, end, step);
1933 }
1934 
1935 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKLOOP_ULL)(
1936     void (*func)(void *), void *data, void (*copy_func)(void *, void *),
1937     long arg_size, long arg_align, unsigned gomp_flags, unsigned long num_tasks,
1938     int priority, unsigned long long start, unsigned long long end,
1939     unsigned long long step) {
1940   __GOMP_taskloop<unsigned long long>(func, data, copy_func, arg_size,
1941                                       arg_align, gomp_flags, num_tasks,
1942                                       priority, start, end, step);
1943 }
1944 
1945 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_POST)(long *count) {
1946   __kmp_GOMP_doacross_post(count);
1947 }
1948 
1949 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_WAIT)(long first, ...) {
1950   va_list args;
1951   va_start(args, first);
1952   __kmp_GOMP_doacross_wait<long>(first, args);
1953   va_end(args);
1954 }
1955 
1956 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_ULL_POST)(
1957     unsigned long long *count) {
1958   int gtid = __kmp_entry_gtid();
1959   MKLOC(loc, "GOMP_doacross_ull_post");
1960   __kmpc_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count));
1961 }
1962 
1963 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_ULL_WAIT)(
1964     unsigned long long first, ...) {
1965   va_list args;
1966   va_start(args, first);
1967   __kmp_GOMP_doacross_wait<unsigned long long>(first, args);
1968   va_end(args);
1969 }
1970 
1971 // fn: the function each primary thread of new team will call
1972 // data: argument to fn
1973 // num_teams, thread_limit: max bounds on respective ICV
1974 // flags: unused
1975 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TEAMS_REG)(void (*fn)(void *),
1976                                                   void *data,
1977                                                   unsigned num_teams,
1978                                                   unsigned thread_limit,
1979                                                   unsigned flags) {
1980   MKLOC(loc, "GOMP_teams_reg");
1981   int gtid = __kmp_entry_gtid();
1982   KA_TRACE(20, ("GOMP_teams_reg: T#%d num_teams=%u thread_limit=%u flag=%u\n",
1983                 gtid, num_teams, thread_limit, flags));
1984   __kmpc_push_num_teams(&loc, gtid, num_teams, thread_limit);
1985   __kmpc_fork_teams(&loc, 2, (microtask_t)__kmp_GOMP_microtask_wrapper, fn,
1986                     data);
1987   KA_TRACE(20, ("GOMP_teams_reg exit: T#%d\n", gtid));
1988 }
1989 
1990 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKWAIT_DEPEND)(void **depend) {
1991   MKLOC(loc, "GOMP_taskwait_depend");
1992   int gtid = __kmp_entry_gtid();
1993   KA_TRACE(20, ("GOMP_taskwait_depend: T#%d\n", gtid));
1994   kmp_gomp_depends_info_t gomp_depends(depend);
1995   kmp_int32 ndeps = gomp_depends.get_num_deps();
1996   kmp_depend_info_t dep_list[ndeps];
1997   for (kmp_int32 i = 0; i < ndeps; i++)
1998     dep_list[i] = gomp_depends.get_kmp_depend(i);
1999 #if OMPT_SUPPORT
2000   OMPT_STORE_RETURN_ADDRESS(gtid);
2001 #endif
2002   __kmpc_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL);
2003   KA_TRACE(20, ("GOMP_taskwait_depend exit: T#%d\n", gtid));
2004 }
2005 
2006 static inline void
2007 __kmp_GOMP_taskgroup_reduction_register(uintptr_t *data, kmp_taskgroup_t *tg,
2008                                         int nthreads,
2009                                         uintptr_t *allocated = nullptr) {
2010   KMP_ASSERT(data);
2011   KMP_ASSERT(nthreads > 0);
2012   // Have private copy pointers point to previously allocated
2013   // reduction data or allocate new data here
2014   if (allocated) {
2015     data[2] = allocated[2];
2016     data[6] = allocated[6];
2017   } else {
2018     data[2] = (uintptr_t)__kmp_allocate(nthreads * data[1]);
2019     data[6] = data[2] + (nthreads * data[1]);
2020   }
2021   if (tg)
2022     tg->gomp_data = data;
2023 }
2024 
2025 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER)(
2026     uintptr_t *data) {
2027   int gtid = __kmp_entry_gtid();
2028   KA_TRACE(20, ("GOMP_taskgroup_reduction_register: T#%d\n", gtid));
2029   kmp_info_t *thread = __kmp_threads[gtid];
2030   kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
2031   int nthreads = thread->th.th_team_nproc;
2032   __kmp_GOMP_taskgroup_reduction_register(data, tg, nthreads);
2033 }
2034 
2035 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_UNREGISTER)(
2036     uintptr_t *data) {
2037   KA_TRACE(20,
2038            ("GOMP_taskgroup_reduction_unregister: T#%d\n", __kmp_get_gtid()));
2039   KMP_ASSERT(data && data[2]);
2040   __kmp_free((void *)data[2]);
2041 }
2042 
2043 // Search through reduction data and set ptrs[] elements
2044 // to proper privatized copy address
2045 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK_REDUCTION_REMAP)(size_t cnt,
2046                                                              size_t cntorig,
2047                                                              void **ptrs) {
2048   int gtid = __kmp_entry_gtid();
2049   KA_TRACE(20, ("GOMP_task_reduction_remap: T#%d\n", gtid));
2050   kmp_info_t *thread = __kmp_threads[gtid];
2051   kmp_int32 tid = __kmp_get_tid();
2052   for (size_t i = 0; i < cnt; ++i) {
2053     uintptr_t address = (uintptr_t)ptrs[i];
2054     void *propagated_address = NULL;
2055     void *mapped_address = NULL;
2056     // Check taskgroups reduce data
2057     kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
2058     while (tg) {
2059       uintptr_t *gomp_data = tg->gomp_data;
2060       if (!gomp_data) {
2061         tg = tg->parent;
2062         continue;
2063       }
2064       // Check the shared addresses list
2065       size_t num_vars = (size_t)gomp_data[0];
2066       uintptr_t per_thread_size = gomp_data[1];
2067       uintptr_t reduce_data = gomp_data[2];
2068       uintptr_t end_reduce_data = gomp_data[6];
2069       for (size_t j = 0; j < num_vars; ++j) {
2070         uintptr_t *entry = gomp_data + 7 + 3 * j;
2071         if (entry[0] == address) {
2072           uintptr_t offset = entry[1];
2073           mapped_address =
2074               (void *)(reduce_data + tid * per_thread_size + offset);
2075           if (i < cntorig)
2076             propagated_address = (void *)entry[0];
2077           break;
2078         }
2079       }
2080       if (mapped_address)
2081         break;
2082       // Check if address is within privatized copies range
2083       if (!mapped_address && address >= reduce_data &&
2084           address < end_reduce_data) {
2085         uintptr_t offset = (address - reduce_data) % per_thread_size;
2086         mapped_address = (void *)(reduce_data + tid * per_thread_size + offset);
2087         if (i < cntorig) {
2088           for (size_t j = 0; j < num_vars; ++j) {
2089             uintptr_t *entry = gomp_data + 7 + 3 * j;
2090             if (entry[1] == offset) {
2091               propagated_address = (void *)entry[0];
2092               break;
2093             }
2094           }
2095         }
2096       }
2097       if (mapped_address)
2098         break;
2099       tg = tg->parent;
2100     }
2101     KMP_ASSERT(mapped_address);
2102     ptrs[i] = mapped_address;
2103     if (i < cntorig) {
2104       KMP_ASSERT(propagated_address);
2105       ptrs[cnt + i] = propagated_address;
2106     }
2107   }
2108 }
2109 
2110 static void __kmp_GOMP_init_reductions(int gtid, uintptr_t *data, int is_ws) {
2111   kmp_info_t *thr = __kmp_threads[gtid];
2112   kmp_team_t *team = thr->th.th_team;
2113   // First start a taskgroup
2114   __kmpc_taskgroup(NULL, gtid);
2115   // Then setup reduction data
2116   void *reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws]);
2117   if (reduce_data == NULL &&
2118       __kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data,
2119                                  (void *)1)) {
2120     // Single thread enters this block to initialize common reduction data
2121     KMP_DEBUG_ASSERT(reduce_data == NULL);
2122     __kmp_GOMP_taskgroup_reduction_register(data, NULL, thr->th.th_team_nproc);
2123     KMP_ATOMIC_ST_REL(&team->t.t_tg_fini_counter[is_ws], 0);
2124     KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], (void *)data);
2125   } else {
2126     // Wait for task reduction initialization
2127     while ((reduce_data = KMP_ATOMIC_LD_ACQ(
2128                 &team->t.t_tg_reduce_data[is_ws])) == (void *)1) {
2129       KMP_CPU_PAUSE();
2130     }
2131     KMP_DEBUG_ASSERT(reduce_data > (void *)1); // should be valid pointer here
2132   }
2133   // For worksharing constructs, each thread has its own reduction structure.
2134   // Have each reduction structure point to same privatized copies of vars.
2135   // For parallel, each thread points to same reduction structure and privatized
2136   // copies of vars
2137   if (is_ws) {
2138     __kmp_GOMP_taskgroup_reduction_register(
2139         data, NULL, thr->th.th_team_nproc,
2140         (uintptr_t *)KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws]));
2141   }
2142   kmp_taskgroup_t *tg = thr->th.th_current_task->td_taskgroup;
2143   tg->gomp_data = data;
2144 }
2145 
2146 static unsigned
2147 __kmp_GOMP_par_reductions_microtask_wrapper(int *gtid, int *npr,
2148                                             void (*task)(void *), void *data) {
2149   kmp_info_t *thr = __kmp_threads[*gtid];
2150   kmp_team_t *team = thr->th.th_team;
2151   uintptr_t *reduce_data = *(uintptr_t **)data;
2152   __kmp_GOMP_init_reductions(*gtid, reduce_data, 0);
2153 
2154 #if OMPT_SUPPORT
2155   ompt_frame_t *ompt_frame;
2156   ompt_state_t enclosing_state;
2157 
2158   if (ompt_enabled.enabled) {
2159     // save enclosing task state; set current state for task
2160     enclosing_state = thr->th.ompt_thread_info.state;
2161     thr->th.ompt_thread_info.state = ompt_state_work_parallel;
2162 
2163     // set task frame
2164     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2165     ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2166   }
2167 #endif
2168 
2169   task(data);
2170 
2171 #if OMPT_SUPPORT
2172   if (ompt_enabled.enabled) {
2173     // clear task frame
2174     ompt_frame->exit_frame = ompt_data_none;
2175 
2176     // restore enclosing state
2177     thr->th.ompt_thread_info.state = enclosing_state;
2178   }
2179 #endif
2180   __kmpc_end_taskgroup(NULL, *gtid);
2181   // if last thread out, then reset the team's reduce data
2182   // the GOMP_taskgroup_reduction_unregister() function will deallocate
2183   // private copies after reduction calculations take place.
2184   int count = KMP_ATOMIC_INC(&team->t.t_tg_fini_counter[0]);
2185   if (count == thr->th.th_team_nproc - 1) {
2186     KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[0], NULL);
2187     KMP_ATOMIC_ST_REL(&team->t.t_tg_fini_counter[0], 0);
2188   }
2189   return (unsigned)thr->th.th_team_nproc;
2190 }
2191 
2192 unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_REDUCTIONS)(
2193     void (*task)(void *), void *data, unsigned num_threads,
2194     unsigned int flags) {
2195   MKLOC(loc, "GOMP_parallel_reductions");
2196   int gtid = __kmp_entry_gtid();
2197   KA_TRACE(20, ("GOMP_parallel_reductions: T#%d\n", gtid));
2198   __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task,
2199                        (microtask_t)__kmp_GOMP_par_reductions_microtask_wrapper,
2200                        2, task, data);
2201   unsigned retval =
2202       __kmp_GOMP_par_reductions_microtask_wrapper(&gtid, NULL, task, data);
2203   KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();
2204   KA_TRACE(20, ("GOMP_parallel_reductions exit: T#%d\n", gtid));
2205   return retval;
2206 }
2207 
2208 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_START)(
2209     long start, long end, long incr, long sched, long chunk_size, long *istart,
2210     long *iend, uintptr_t *reductions, void **mem) {
2211   int status = 0;
2212   int gtid = __kmp_entry_gtid();
2213   KA_TRACE(20, ("GOMP_loop_start: T#%d, reductions: %p\n", gtid, reductions));
2214   if (reductions)
2215     __kmp_GOMP_init_reductions(gtid, reductions, 1);
2216   if (mem)
2217     KMP_FATAL(GompFeatureNotSupported, "scan");
2218   if (istart == NULL)
2219     return true;
2220   const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
2221   long monotonic = sched & MONOTONIC_FLAG;
2222   sched &= ~MONOTONIC_FLAG;
2223   if (sched == 0) {
2224     if (monotonic)
2225       status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_START)(
2226           start, end, incr, istart, iend);
2227     else
2228       status = KMP_EXPAND_NAME(
2229           KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_START)(
2230           start, end, incr, istart, iend);
2231   } else if (sched == 1) {
2232     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_START)(
2233         start, end, incr, chunk_size, istart, iend);
2234   } else if (sched == 2) {
2235     if (monotonic)
2236       status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START)(
2237           start, end, incr, chunk_size, istart, iend);
2238     else
2239       status =
2240           KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START)(
2241               start, end, incr, chunk_size, istart, iend);
2242   } else if (sched == 3) {
2243     if (monotonic)
2244       status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_START)(
2245           start, end, incr, chunk_size, istart, iend);
2246     else
2247       status =
2248           KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START)(
2249               start, end, incr, chunk_size, istart, iend);
2250   } else if (sched == 4) {
2251     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_START)(
2252         start, end, incr, istart, iend);
2253   } else {
2254     KMP_ASSERT(0);
2255   }
2256   return status;
2257 }
2258 
2259 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_START)(
2260     bool up, unsigned long long start, unsigned long long end,
2261     unsigned long long incr, long sched, unsigned long long chunk_size,
2262     unsigned long long *istart, unsigned long long *iend, uintptr_t *reductions,
2263     void **mem) {
2264   int status = 0;
2265   int gtid = __kmp_entry_gtid();
2266   KA_TRACE(20,
2267            ("GOMP_loop_ull_start: T#%d, reductions: %p\n", gtid, reductions));
2268   if (reductions)
2269     __kmp_GOMP_init_reductions(gtid, reductions, 1);
2270   if (mem)
2271     KMP_FATAL(GompFeatureNotSupported, "scan");
2272   if (istart == NULL)
2273     return true;
2274   const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
2275   long monotonic = sched & MONOTONIC_FLAG;
2276   sched &= ~MONOTONIC_FLAG;
2277   if (sched == 0) {
2278     if (monotonic)
2279       status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START)(
2280           up, start, end, incr, istart, iend);
2281     else
2282       status = KMP_EXPAND_NAME(
2283           KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_START)(
2284           up, start, end, incr, istart, iend);
2285   } else if (sched == 1) {
2286     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START)(
2287         up, start, end, incr, chunk_size, istart, iend);
2288   } else if (sched == 2) {
2289     if (monotonic)
2290       status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START)(
2291           up, start, end, incr, chunk_size, istart, iend);
2292     else
2293       status = KMP_EXPAND_NAME(
2294           KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START)(
2295           up, start, end, incr, chunk_size, istart, iend);
2296   } else if (sched == 3) {
2297     if (monotonic)
2298       status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START)(
2299           up, start, end, incr, chunk_size, istart, iend);
2300     else
2301       status =
2302           KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START)(
2303               up, start, end, incr, chunk_size, istart, iend);
2304   } else if (sched == 4) {
2305     status =
2306         KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_START)(
2307             up, start, end, incr, istart, iend);
2308   } else {
2309     KMP_ASSERT(0);
2310   }
2311   return status;
2312 }
2313 
2314 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_START)(
2315     unsigned ncounts, long *counts, long sched, long chunk_size, long *istart,
2316     long *iend, uintptr_t *reductions, void **mem) {
2317   int status = 0;
2318   int gtid = __kmp_entry_gtid();
2319   KA_TRACE(20, ("GOMP_loop_doacross_start: T#%d, reductions: %p\n", gtid,
2320                 reductions));
2321   if (reductions)
2322     __kmp_GOMP_init_reductions(gtid, reductions, 1);
2323   if (mem)
2324     KMP_FATAL(GompFeatureNotSupported, "scan");
2325   if (istart == NULL)
2326     return true;
2327   // Ignore any monotonic flag
2328   const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
2329   sched &= ~MONOTONIC_FLAG;
2330   if (sched == 0) {
2331     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START)(
2332         ncounts, counts, istart, iend);
2333   } else if (sched == 1) {
2334     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START)(
2335         ncounts, counts, chunk_size, istart, iend);
2336   } else if (sched == 2) {
2337     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START)(
2338         ncounts, counts, chunk_size, istart, iend);
2339   } else if (sched == 3) {
2340     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START)(
2341         ncounts, counts, chunk_size, istart, iend);
2342   } else {
2343     KMP_ASSERT(0);
2344   }
2345   return status;
2346 }
2347 
2348 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_START)(
2349     unsigned ncounts, unsigned long long *counts, long sched,
2350     unsigned long long chunk_size, unsigned long long *istart,
2351     unsigned long long *iend, uintptr_t *reductions, void **mem) {
2352   int status = 0;
2353   int gtid = __kmp_entry_gtid();
2354   KA_TRACE(20, ("GOMP_loop_ull_doacross_start: T#%d, reductions: %p\n", gtid,
2355                 reductions));
2356   if (reductions)
2357     __kmp_GOMP_init_reductions(gtid, reductions, 1);
2358   if (mem)
2359     KMP_FATAL(GompFeatureNotSupported, "scan");
2360   if (istart == NULL)
2361     return true;
2362   // Ignore any monotonic flag
2363   const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
2364   sched &= ~MONOTONIC_FLAG;
2365   if (sched == 0) {
2366     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START)(
2367         ncounts, counts, istart, iend);
2368   } else if (sched == 1) {
2369     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START)(
2370         ncounts, counts, chunk_size, istart, iend);
2371   } else if (sched == 2) {
2372     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START)(
2373         ncounts, counts, chunk_size, istart, iend);
2374   } else if (sched == 3) {
2375     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START)(
2376         ncounts, counts, chunk_size, istart, iend);
2377   } else {
2378     KMP_ASSERT(0);
2379   }
2380   return status;
2381 }
2382 
2383 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_START)(
2384     long start, long end, long incr, long sched, long chunk_size, long *istart,
2385     long *iend, uintptr_t *reductions, void **mem) {
2386   int status = 0;
2387   int gtid = __kmp_entry_gtid();
2388   KA_TRACE(20, ("GOMP_loop_ordered_start: T#%d, reductions: %p\n", gtid,
2389                 reductions));
2390   if (reductions)
2391     __kmp_GOMP_init_reductions(gtid, reductions, 1);
2392   if (mem)
2393     KMP_FATAL(GompFeatureNotSupported, "scan");
2394   if (istart == NULL)
2395     return true;
2396   // Ignore any monotonic flag
2397   const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
2398   sched &= ~MONOTONIC_FLAG;
2399   if (sched == 0) {
2400     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START)(
2401         start, end, incr, istart, iend);
2402   } else if (sched == 1) {
2403     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START)(
2404         start, end, incr, chunk_size, istart, iend);
2405   } else if (sched == 2) {
2406     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START)(
2407         start, end, incr, chunk_size, istart, iend);
2408   } else if (sched == 3) {
2409     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START)(
2410         start, end, incr, chunk_size, istart, iend);
2411   } else {
2412     KMP_ASSERT(0);
2413   }
2414   return status;
2415 }
2416 
2417 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_START)(
2418     bool up, unsigned long long start, unsigned long long end,
2419     unsigned long long incr, long sched, unsigned long long chunk_size,
2420     unsigned long long *istart, unsigned long long *iend, uintptr_t *reductions,
2421     void **mem) {
2422   int status = 0;
2423   int gtid = __kmp_entry_gtid();
2424   KA_TRACE(20, ("GOMP_loop_ull_ordered_start: T#%d, reductions: %p\n", gtid,
2425                 reductions));
2426   if (reductions)
2427     __kmp_GOMP_init_reductions(gtid, reductions, 1);
2428   if (mem)
2429     KMP_FATAL(GompFeatureNotSupported, "scan");
2430   if (istart == NULL)
2431     return true;
2432   // Ignore any monotonic flag
2433   const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
2434   sched &= ~MONOTONIC_FLAG;
2435   if (sched == 0) {
2436     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START)(
2437         up, start, end, incr, istart, iend);
2438   } else if (sched == 1) {
2439     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START)(
2440         up, start, end, incr, chunk_size, istart, iend);
2441   } else if (sched == 2) {
2442     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START)(
2443         up, start, end, incr, chunk_size, istart, iend);
2444   } else if (sched == 3) {
2445     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START)(
2446         up, start, end, incr, chunk_size, istart, iend);
2447   } else {
2448     KMP_ASSERT(0);
2449   }
2450   return status;
2451 }
2452 
2453 unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS2_START)(
2454     unsigned count, uintptr_t *reductions, void **mem) {
2455   int gtid = __kmp_entry_gtid();
2456   KA_TRACE(20,
2457            ("GOMP_sections2_start: T#%d, reductions: %p\n", gtid, reductions));
2458   if (reductions)
2459     __kmp_GOMP_init_reductions(gtid, reductions, 1);
2460   if (mem)
2461     KMP_FATAL(GompFeatureNotSupported, "scan");
2462   return KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_START)(count);
2463 }
2464 
2465 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER)(
2466     bool cancelled) {
2467   int gtid = __kmp_get_gtid();
2468   MKLOC(loc, "GOMP_workshare_task_reduction_unregister");
2469   KA_TRACE(20, ("GOMP_workshare_task_reduction_unregister: T#%d\n", gtid));
2470   kmp_info_t *thr = __kmp_threads[gtid];
2471   kmp_team_t *team = thr->th.th_team;
2472   __kmpc_end_taskgroup(NULL, gtid);
2473   // If last thread out of workshare, then reset the team's reduce data
2474   // the GOMP_taskgroup_reduction_unregister() function will deallocate
2475   // private copies after reduction calculations take place.
2476   int count = KMP_ATOMIC_INC(&team->t.t_tg_fini_counter[1]);
2477   if (count == thr->th.th_team_nproc - 1) {
2478     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_UNREGISTER)
2479     ((uintptr_t *)KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[1]));
2480     KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[1], NULL);
2481     KMP_ATOMIC_ST_REL(&team->t.t_tg_fini_counter[1], 0);
2482   }
2483   if (!cancelled) {
2484     __kmpc_barrier(&loc, gtid);
2485   }
2486 }
2487 
2488 // allocator construct
2489 void *KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ALLOC)(size_t alignment, size_t size,
2490                                                uintptr_t allocator) {
2491   int gtid = __kmp_entry_gtid();
2492   KA_TRACE(20, ("GOMP_alloc: T#%d\n", gtid));
2493 #if OMPT_SUPPORT && OMPT_OPTIONAL
2494   OMPT_STORE_RETURN_ADDRESS(gtid);
2495 #endif
2496   return __kmp_alloc(gtid, alignment, size, (omp_allocator_handle_t)allocator);
2497 }
2498 
2499 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_FREE)(void *ptr, uintptr_t allocator) {
2500   int gtid = __kmp_entry_gtid();
2501   KA_TRACE(20, ("GOMP_free: T#%d\n", gtid));
2502 #if OMPT_SUPPORT && OMPT_OPTIONAL
2503   OMPT_STORE_RETURN_ADDRESS(gtid);
2504 #endif
2505   return ___kmpc_free(gtid, ptr, (omp_allocator_handle_t)allocator);
2506 }
2507 
2508 /* The following sections of code create aliases for the GOMP_* functions, then
2509    create versioned symbols using the assembler directive .symver. This is only
2510    pertinent for ELF .so library. The KMP_VERSION_SYMBOL macro is defined in
2511    kmp_os.h  */
2512 
2513 #ifdef KMP_USE_VERSION_SYMBOLS
2514 // GOMP_1.0 versioned symbols
2515 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ATOMIC_END, 10, "GOMP_1.0");
2516 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ATOMIC_START, 10, "GOMP_1.0");
2517 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_BARRIER, 10, "GOMP_1.0");
2518 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_END, 10, "GOMP_1.0");
2519 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10, "GOMP_1.0");
2520 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10, "GOMP_1.0");
2521 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_START, 10, "GOMP_1.0");
2522 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10, "GOMP_1.0");
2523 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10, "GOMP_1.0");
2524 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END, 10, "GOMP_1.0");
2525 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10, "GOMP_1.0");
2526 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10, "GOMP_1.0");
2527 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10, "GOMP_1.0");
2528 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10, "GOMP_1.0");
2529 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10,
2530                    "GOMP_1.0");
2531 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10, "GOMP_1.0");
2532 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10, "GOMP_1.0");
2533 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10, "GOMP_1.0");
2534 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10,
2535                    "GOMP_1.0");
2536 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10, "GOMP_1.0");
2537 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10, "GOMP_1.0");
2538 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10, "GOMP_1.0");
2539 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10, "GOMP_1.0");
2540 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10, "GOMP_1.0");
2541 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10, "GOMP_1.0");
2542 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ORDERED_END, 10, "GOMP_1.0");
2543 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ORDERED_START, 10, "GOMP_1.0");
2544 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_END, 10, "GOMP_1.0");
2545 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10,
2546                    "GOMP_1.0");
2547 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10,
2548                    "GOMP_1.0");
2549 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10,
2550                    "GOMP_1.0");
2551 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10,
2552                    "GOMP_1.0");
2553 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10, "GOMP_1.0");
2554 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_START, 10, "GOMP_1.0");
2555 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END, 10, "GOMP_1.0");
2556 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10, "GOMP_1.0");
2557 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10, "GOMP_1.0");
2558 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_START, 10, "GOMP_1.0");
2559 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10, "GOMP_1.0");
2560 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10, "GOMP_1.0");
2561 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_START, 10, "GOMP_1.0");
2562 
2563 // GOMP_2.0 versioned symbols
2564 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASK, 20, "GOMP_2.0");
2565 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKWAIT, 20, "GOMP_2.0");
2566 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20, "GOMP_2.0");
2567 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20, "GOMP_2.0");
2568 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20, "GOMP_2.0");
2569 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20, "GOMP_2.0");
2570 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20,
2571                    "GOMP_2.0");
2572 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20,
2573                    "GOMP_2.0");
2574 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20,
2575                    "GOMP_2.0");
2576 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20,
2577                    "GOMP_2.0");
2578 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20,
2579                    "GOMP_2.0");
2580 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20,
2581                    "GOMP_2.0");
2582 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20,
2583                    "GOMP_2.0");
2584 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20,
2585                    "GOMP_2.0");
2586 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20, "GOMP_2.0");
2587 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20, "GOMP_2.0");
2588 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20, "GOMP_2.0");
2589 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20, "GOMP_2.0");
2590 
2591 // GOMP_3.0 versioned symbols
2592 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKYIELD, 30, "GOMP_3.0");
2593 
2594 // GOMP_4.0 versioned symbols
2595 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL, 40, "GOMP_4.0");
2596 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_SECTIONS, 40, "GOMP_4.0");
2597 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC, 40, "GOMP_4.0");
2598 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED, 40, "GOMP_4.0");
2599 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME, 40, "GOMP_4.0");
2600 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC, 40, "GOMP_4.0");
2601 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_START, 40, "GOMP_4.0");
2602 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_END, 40, "GOMP_4.0");
2603 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_BARRIER_CANCEL, 40, "GOMP_4.0");
2604 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CANCEL, 40, "GOMP_4.0");
2605 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CANCELLATION_POINT, 40, "GOMP_4.0");
2606 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END_CANCEL, 40, "GOMP_4.0");
2607 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL, 40, "GOMP_4.0");
2608 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET, 40, "GOMP_4.0");
2609 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_DATA, 40, "GOMP_4.0");
2610 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_END_DATA, 40, "GOMP_4.0");
2611 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_UPDATE, 40, "GOMP_4.0");
2612 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TEAMS, 40, "GOMP_4.0");
2613 
2614 // GOMP_4.5 versioned symbols
2615 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKLOOP, 45, "GOMP_4.5");
2616 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKLOOP_ULL, 45, "GOMP_4.5");
2617 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_POST, 45, "GOMP_4.5");
2618 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_WAIT, 45, "GOMP_4.5");
2619 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START, 45,
2620                    "GOMP_4.5");
2621 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START, 45,
2622                    "GOMP_4.5");
2623 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START, 45,
2624                    "GOMP_4.5");
2625 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START, 45,
2626                    "GOMP_4.5");
2627 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_ULL_POST, 45, "GOMP_4.5");
2628 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_ULL_WAIT, 45, "GOMP_4.5");
2629 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START, 45,
2630                    "GOMP_4.5");
2631 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START, 45,
2632                    "GOMP_4.5");
2633 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START, 45,
2634                    "GOMP_4.5");
2635 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START, 45,
2636                    "GOMP_4.5");
2637 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START, 45,
2638                    "GOMP_4.5");
2639 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT, 45,
2640                    "GOMP_4.5");
2641 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START, 45,
2642                    "GOMP_4.5");
2643 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT, 45,
2644                    "GOMP_4.5");
2645 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START, 45,
2646                    "GOMP_4.5");
2647 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT, 45,
2648                    "GOMP_4.5");
2649 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START, 45,
2650                    "GOMP_4.5");
2651 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT, 45,
2652                    "GOMP_4.5");
2653 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC, 45,
2654                    "GOMP_4.5");
2655 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED, 45,
2656                    "GOMP_4.5");
2657 
2658 // GOMP_5.0 versioned symbols
2659 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_NEXT, 50,
2660                    "GOMP_5.0");
2661 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_START, 50,
2662                    "GOMP_5.0");
2663 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_NEXT, 50,
2664                    "GOMP_5.0");
2665 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_START, 50,
2666                    "GOMP_5.0");
2667 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_NEXT,
2668                    50, "GOMP_5.0");
2669 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_START,
2670                    50, "GOMP_5.0");
2671 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_NEXT, 50,
2672                    "GOMP_5.0");
2673 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_START, 50,
2674                    "GOMP_5.0");
2675 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_RUNTIME, 50,
2676                    "GOMP_5.0");
2677 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME,
2678                    50, "GOMP_5.0");
2679 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TEAMS_REG, 50, "GOMP_5.0");
2680 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKWAIT_DEPEND, 50, "GOMP_5.0");
2681 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER, 50,
2682                    "GOMP_5.0");
2683 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_UNREGISTER, 50,
2684                    "GOMP_5.0");
2685 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASK_REDUCTION_REMAP, 50, "GOMP_5.0");
2686 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_REDUCTIONS, 50, "GOMP_5.0");
2687 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_START, 50, "GOMP_5.0");
2688 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_START, 50, "GOMP_5.0");
2689 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_START, 50, "GOMP_5.0");
2690 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_START, 50, "GOMP_5.0");
2691 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_START, 50, "GOMP_5.0");
2692 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_START, 50, "GOMP_5.0");
2693 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS2_START, 50, "GOMP_5.0");
2694 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER, 50,
2695                    "GOMP_5.0");
2696 
2697 // GOMP_5.0.1 versioned symbols
2698 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ALLOC, 501, "GOMP_5.0.1");
2699 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_FREE, 501, "GOMP_5.0.1");
2700 #endif // KMP_USE_VERSION_SYMBOLS
2701 
2702 #ifdef __cplusplus
2703 } // extern "C"
2704 #endif // __cplusplus
2705