xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp.h (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric /*! \file */
2*0b57cec5SDimitry Andric /*
3*0b57cec5SDimitry Andric  * kmp.h -- KPTS runtime header file.
4*0b57cec5SDimitry Andric  */
5*0b57cec5SDimitry Andric 
6*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
7*0b57cec5SDimitry Andric //
8*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
10*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11*0b57cec5SDimitry Andric //
12*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
13*0b57cec5SDimitry Andric 
14*0b57cec5SDimitry Andric #ifndef KMP_H
15*0b57cec5SDimitry Andric #define KMP_H
16*0b57cec5SDimitry Andric 
17*0b57cec5SDimitry Andric #include "kmp_config.h"
18*0b57cec5SDimitry Andric 
19*0b57cec5SDimitry Andric /* #define BUILD_PARALLEL_ORDERED 1 */
20*0b57cec5SDimitry Andric 
21*0b57cec5SDimitry Andric /* This fix replaces gettimeofday with clock_gettime for better scalability on
22*0b57cec5SDimitry Andric    the Altix.  Requires user code to be linked with -lrt. */
23*0b57cec5SDimitry Andric //#define FIX_SGI_CLOCK
24*0b57cec5SDimitry Andric 
25*0b57cec5SDimitry Andric /* Defines for OpenMP 3.0 tasking and auto scheduling */
26*0b57cec5SDimitry Andric 
27*0b57cec5SDimitry Andric #ifndef KMP_STATIC_STEAL_ENABLED
28*0b57cec5SDimitry Andric #define KMP_STATIC_STEAL_ENABLED 1
29*0b57cec5SDimitry Andric #endif
30*0b57cec5SDimitry Andric 
31*0b57cec5SDimitry Andric #define TASK_CURRENT_NOT_QUEUED 0
32*0b57cec5SDimitry Andric #define TASK_CURRENT_QUEUED 1
33*0b57cec5SDimitry Andric 
34*0b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK
35*0b57cec5SDimitry Andric #define TASK_STACK_EMPTY 0 // entries when the stack is empty
36*0b57cec5SDimitry Andric #define TASK_STACK_BLOCK_BITS 5 // Used in TASK_STACK_SIZE and TASK_STACK_MASK
37*0b57cec5SDimitry Andric // Number of entries in each task stack array
38*0b57cec5SDimitry Andric #define TASK_STACK_BLOCK_SIZE (1 << TASK_STACK_BLOCK_BITS)
39*0b57cec5SDimitry Andric // Mask for determining index into stack block
40*0b57cec5SDimitry Andric #define TASK_STACK_INDEX_MASK (TASK_STACK_BLOCK_SIZE - 1)
41*0b57cec5SDimitry Andric #endif // BUILD_TIED_TASK_STACK
42*0b57cec5SDimitry Andric 
43*0b57cec5SDimitry Andric #define TASK_NOT_PUSHED 1
44*0b57cec5SDimitry Andric #define TASK_SUCCESSFULLY_PUSHED 0
45*0b57cec5SDimitry Andric #define TASK_TIED 1
46*0b57cec5SDimitry Andric #define TASK_UNTIED 0
47*0b57cec5SDimitry Andric #define TASK_EXPLICIT 1
48*0b57cec5SDimitry Andric #define TASK_IMPLICIT 0
49*0b57cec5SDimitry Andric #define TASK_PROXY 1
50*0b57cec5SDimitry Andric #define TASK_FULL 0
51*0b57cec5SDimitry Andric #define TASK_DETACHABLE 1
52*0b57cec5SDimitry Andric #define TASK_UNDETACHABLE 0
53*0b57cec5SDimitry Andric 
54*0b57cec5SDimitry Andric #define KMP_CANCEL_THREADS
55*0b57cec5SDimitry Andric #define KMP_THREAD_ATTR
56*0b57cec5SDimitry Andric 
57*0b57cec5SDimitry Andric // Android does not have pthread_cancel.  Undefine KMP_CANCEL_THREADS if being
58*0b57cec5SDimitry Andric // built on Android
59*0b57cec5SDimitry Andric #if defined(__ANDROID__)
60*0b57cec5SDimitry Andric #undef KMP_CANCEL_THREADS
61*0b57cec5SDimitry Andric #endif
62*0b57cec5SDimitry Andric 
63*0b57cec5SDimitry Andric #include <signal.h>
64*0b57cec5SDimitry Andric #include <stdarg.h>
65*0b57cec5SDimitry Andric #include <stddef.h>
66*0b57cec5SDimitry Andric #include <stdio.h>
67*0b57cec5SDimitry Andric #include <stdlib.h>
68*0b57cec5SDimitry Andric #include <string.h>
69*0b57cec5SDimitry Andric /* include <ctype.h> don't use; problems with /MD on Windows* OS NT due to bad
70*0b57cec5SDimitry Andric    Microsoft library. Some macros provided below to replace these functions  */
71*0b57cec5SDimitry Andric #ifndef __ABSOFT_WIN
72*0b57cec5SDimitry Andric #include <sys/types.h>
73*0b57cec5SDimitry Andric #endif
74*0b57cec5SDimitry Andric #include <limits.h>
75*0b57cec5SDimitry Andric #include <time.h>
76*0b57cec5SDimitry Andric 
77*0b57cec5SDimitry Andric #include <errno.h>
78*0b57cec5SDimitry Andric 
79*0b57cec5SDimitry Andric #include "kmp_os.h"
80*0b57cec5SDimitry Andric 
81*0b57cec5SDimitry Andric #include "kmp_safe_c_api.h"
82*0b57cec5SDimitry Andric 
83*0b57cec5SDimitry Andric #if KMP_STATS_ENABLED
84*0b57cec5SDimitry Andric class kmp_stats_list;
85*0b57cec5SDimitry Andric #endif
86*0b57cec5SDimitry Andric 
87*0b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
88*0b57cec5SDimitry Andric // Only include hierarchical scheduling if affinity is supported
89*0b57cec5SDimitry Andric #undef KMP_USE_HIER_SCHED
90*0b57cec5SDimitry Andric #define KMP_USE_HIER_SCHED KMP_AFFINITY_SUPPORTED
91*0b57cec5SDimitry Andric #endif
92*0b57cec5SDimitry Andric 
93*0b57cec5SDimitry Andric #if KMP_USE_HWLOC && KMP_AFFINITY_SUPPORTED
94*0b57cec5SDimitry Andric #include "hwloc.h"
95*0b57cec5SDimitry Andric #ifndef HWLOC_OBJ_NUMANODE
96*0b57cec5SDimitry Andric #define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
97*0b57cec5SDimitry Andric #endif
98*0b57cec5SDimitry Andric #ifndef HWLOC_OBJ_PACKAGE
99*0b57cec5SDimitry Andric #define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET
100*0b57cec5SDimitry Andric #endif
101*0b57cec5SDimitry Andric #if HWLOC_API_VERSION >= 0x00020000
102*0b57cec5SDimitry Andric // hwloc 2.0 changed type of depth of object from unsigned to int
103*0b57cec5SDimitry Andric typedef int kmp_hwloc_depth_t;
104*0b57cec5SDimitry Andric #else
105*0b57cec5SDimitry Andric typedef unsigned int kmp_hwloc_depth_t;
106*0b57cec5SDimitry Andric #endif
107*0b57cec5SDimitry Andric #endif
108*0b57cec5SDimitry Andric 
109*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
110*0b57cec5SDimitry Andric #include <xmmintrin.h>
111*0b57cec5SDimitry Andric #endif
112*0b57cec5SDimitry Andric 
113*0b57cec5SDimitry Andric #include "kmp_debug.h"
114*0b57cec5SDimitry Andric #include "kmp_lock.h"
115*0b57cec5SDimitry Andric #include "kmp_version.h"
116*0b57cec5SDimitry Andric #if USE_DEBUGGER
117*0b57cec5SDimitry Andric #include "kmp_debugger.h"
118*0b57cec5SDimitry Andric #endif
119*0b57cec5SDimitry Andric #include "kmp_i18n.h"
120*0b57cec5SDimitry Andric 
121*0b57cec5SDimitry Andric #define KMP_HANDLE_SIGNALS (KMP_OS_UNIX || KMP_OS_WINDOWS)
122*0b57cec5SDimitry Andric 
123*0b57cec5SDimitry Andric #include "kmp_wrapper_malloc.h"
124*0b57cec5SDimitry Andric #if KMP_OS_UNIX
125*0b57cec5SDimitry Andric #include <unistd.h>
126*0b57cec5SDimitry Andric #if !defined NSIG && defined _NSIG
127*0b57cec5SDimitry Andric #define NSIG _NSIG
128*0b57cec5SDimitry Andric #endif
129*0b57cec5SDimitry Andric #endif
130*0b57cec5SDimitry Andric 
131*0b57cec5SDimitry Andric #if KMP_OS_LINUX
132*0b57cec5SDimitry Andric #pragma weak clock_gettime
133*0b57cec5SDimitry Andric #endif
134*0b57cec5SDimitry Andric 
135*0b57cec5SDimitry Andric #if OMPT_SUPPORT
136*0b57cec5SDimitry Andric #include "ompt-internal.h"
137*0b57cec5SDimitry Andric #endif
138*0b57cec5SDimitry Andric 
139*0b57cec5SDimitry Andric // Affinity format function
140*0b57cec5SDimitry Andric #include "kmp_str.h"
141*0b57cec5SDimitry Andric 
142*0b57cec5SDimitry Andric // 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64.
143*0b57cec5SDimitry Andric // 3 - fast allocation using sync, non-sync free lists of any size, non-self
144*0b57cec5SDimitry Andric // free lists of limited size.
145*0b57cec5SDimitry Andric #ifndef USE_FAST_MEMORY
146*0b57cec5SDimitry Andric #define USE_FAST_MEMORY 3
147*0b57cec5SDimitry Andric #endif
148*0b57cec5SDimitry Andric 
149*0b57cec5SDimitry Andric #ifndef KMP_NESTED_HOT_TEAMS
150*0b57cec5SDimitry Andric #define KMP_NESTED_HOT_TEAMS 0
151*0b57cec5SDimitry Andric #define USE_NESTED_HOT_ARG(x)
152*0b57cec5SDimitry Andric #else
153*0b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS
154*0b57cec5SDimitry Andric #define USE_NESTED_HOT_ARG(x) , x
155*0b57cec5SDimitry Andric #else
156*0b57cec5SDimitry Andric #define USE_NESTED_HOT_ARG(x)
157*0b57cec5SDimitry Andric #endif
158*0b57cec5SDimitry Andric #endif
159*0b57cec5SDimitry Andric 
160*0b57cec5SDimitry Andric // Assume using BGET compare_exchange instruction instead of lock by default.
161*0b57cec5SDimitry Andric #ifndef USE_CMP_XCHG_FOR_BGET
162*0b57cec5SDimitry Andric #define USE_CMP_XCHG_FOR_BGET 1
163*0b57cec5SDimitry Andric #endif
164*0b57cec5SDimitry Andric 
165*0b57cec5SDimitry Andric // Test to see if queuing lock is better than bootstrap lock for bget
166*0b57cec5SDimitry Andric // #ifndef USE_QUEUING_LOCK_FOR_BGET
167*0b57cec5SDimitry Andric // #define USE_QUEUING_LOCK_FOR_BGET
168*0b57cec5SDimitry Andric // #endif
169*0b57cec5SDimitry Andric 
170*0b57cec5SDimitry Andric #define KMP_NSEC_PER_SEC 1000000000L
171*0b57cec5SDimitry Andric #define KMP_USEC_PER_SEC 1000000L
172*0b57cec5SDimitry Andric 
173*0b57cec5SDimitry Andric /*!
174*0b57cec5SDimitry Andric @ingroup BASIC_TYPES
175*0b57cec5SDimitry Andric @{
176*0b57cec5SDimitry Andric */
177*0b57cec5SDimitry Andric 
178*0b57cec5SDimitry Andric /*!
179*0b57cec5SDimitry Andric Values for bit flags used in the ident_t to describe the fields.
180*0b57cec5SDimitry Andric */
181*0b57cec5SDimitry Andric enum {
182*0b57cec5SDimitry Andric   /*! Use trampoline for internal microtasks */
183*0b57cec5SDimitry Andric   KMP_IDENT_IMB = 0x01,
184*0b57cec5SDimitry Andric   /*! Use c-style ident structure */
185*0b57cec5SDimitry Andric   KMP_IDENT_KMPC = 0x02,
186*0b57cec5SDimitry Andric   /* 0x04 is no longer used */
187*0b57cec5SDimitry Andric   /*! Entry point generated by auto-parallelization */
188*0b57cec5SDimitry Andric   KMP_IDENT_AUTOPAR = 0x08,
189*0b57cec5SDimitry Andric   /*! Compiler generates atomic reduction option for kmpc_reduce* */
190*0b57cec5SDimitry Andric   KMP_IDENT_ATOMIC_REDUCE = 0x10,
191*0b57cec5SDimitry Andric   /*! To mark a 'barrier' directive in user code */
192*0b57cec5SDimitry Andric   KMP_IDENT_BARRIER_EXPL = 0x20,
193*0b57cec5SDimitry Andric   /*! To Mark implicit barriers. */
194*0b57cec5SDimitry Andric   KMP_IDENT_BARRIER_IMPL = 0x0040,
195*0b57cec5SDimitry Andric   KMP_IDENT_BARRIER_IMPL_MASK = 0x01C0,
196*0b57cec5SDimitry Andric   KMP_IDENT_BARRIER_IMPL_FOR = 0x0040,
197*0b57cec5SDimitry Andric   KMP_IDENT_BARRIER_IMPL_SECTIONS = 0x00C0,
198*0b57cec5SDimitry Andric 
199*0b57cec5SDimitry Andric   KMP_IDENT_BARRIER_IMPL_SINGLE = 0x0140,
200*0b57cec5SDimitry Andric   KMP_IDENT_BARRIER_IMPL_WORKSHARE = 0x01C0,
201*0b57cec5SDimitry Andric 
202*0b57cec5SDimitry Andric   /*! To mark a static loop in OMPT callbacks */
203*0b57cec5SDimitry Andric   KMP_IDENT_WORK_LOOP = 0x200,
204*0b57cec5SDimitry Andric   /*! To mark a sections directive in OMPT callbacks */
205*0b57cec5SDimitry Andric   KMP_IDENT_WORK_SECTIONS = 0x400,
206*0b57cec5SDimitry Andric   /*! To mark a distirbute construct in OMPT callbacks */
207*0b57cec5SDimitry Andric   KMP_IDENT_WORK_DISTRIBUTE = 0x800,
208*0b57cec5SDimitry Andric   /*! Atomic hint; bottom four bits as omp_sync_hint_t. Top four reserved and
209*0b57cec5SDimitry Andric       not currently used. If one day we need more bits, then we can use
210*0b57cec5SDimitry Andric       an invalid combination of hints to mean that another, larger field
211*0b57cec5SDimitry Andric       should be used in a different flag. */
212*0b57cec5SDimitry Andric   KMP_IDENT_ATOMIC_HINT_MASK = 0xFF0000,
213*0b57cec5SDimitry Andric   KMP_IDENT_ATOMIC_HINT_UNCONTENDED = 0x010000,
214*0b57cec5SDimitry Andric   KMP_IDENT_ATOMIC_HINT_CONTENDED = 0x020000,
215*0b57cec5SDimitry Andric   KMP_IDENT_ATOMIC_HINT_NONSPECULATIVE = 0x040000,
216*0b57cec5SDimitry Andric   KMP_IDENT_ATOMIC_HINT_SPECULATIVE = 0x080000,
217*0b57cec5SDimitry Andric };
218*0b57cec5SDimitry Andric 
219*0b57cec5SDimitry Andric /*!
220*0b57cec5SDimitry Andric  * The ident structure that describes a source location.
221*0b57cec5SDimitry Andric  */
222*0b57cec5SDimitry Andric typedef struct ident {
223*0b57cec5SDimitry Andric   kmp_int32 reserved_1; /**<  might be used in Fortran; see above  */
224*0b57cec5SDimitry Andric   kmp_int32 flags; /**<  also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC
225*0b57cec5SDimitry Andric                       identifies this union member  */
226*0b57cec5SDimitry Andric   kmp_int32 reserved_2; /**<  not really used in Fortran any more; see above */
227*0b57cec5SDimitry Andric #if USE_ITT_BUILD
228*0b57cec5SDimitry Andric /*  but currently used for storing region-specific ITT */
229*0b57cec5SDimitry Andric /*  contextual information. */
230*0b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
231*0b57cec5SDimitry Andric   kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++  */
232*0b57cec5SDimitry Andric   char const *psource; /**< String describing the source location.
233*0b57cec5SDimitry Andric                        The string is composed of semi-colon separated fields
234*0b57cec5SDimitry Andric                        which describe the source file, the function and a pair
235*0b57cec5SDimitry Andric                        of line numbers that delimit the construct. */
236*0b57cec5SDimitry Andric } ident_t;
237*0b57cec5SDimitry Andric /*!
238*0b57cec5SDimitry Andric @}
239*0b57cec5SDimitry Andric */
240*0b57cec5SDimitry Andric 
241*0b57cec5SDimitry Andric // Some forward declarations.
242*0b57cec5SDimitry Andric typedef union kmp_team kmp_team_t;
243*0b57cec5SDimitry Andric typedef struct kmp_taskdata kmp_taskdata_t;
244*0b57cec5SDimitry Andric typedef union kmp_task_team kmp_task_team_t;
245*0b57cec5SDimitry Andric typedef union kmp_team kmp_team_p;
246*0b57cec5SDimitry Andric typedef union kmp_info kmp_info_p;
247*0b57cec5SDimitry Andric typedef union kmp_root kmp_root_p;
248*0b57cec5SDimitry Andric 
249*0b57cec5SDimitry Andric #ifdef __cplusplus
250*0b57cec5SDimitry Andric extern "C" {
251*0b57cec5SDimitry Andric #endif
252*0b57cec5SDimitry Andric 
253*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
254*0b57cec5SDimitry Andric 
255*0b57cec5SDimitry Andric /* Pack two 32-bit signed integers into a 64-bit signed integer */
256*0b57cec5SDimitry Andric /* ToDo: Fix word ordering for big-endian machines. */
257*0b57cec5SDimitry Andric #define KMP_PACK_64(HIGH_32, LOW_32)                                           \
258*0b57cec5SDimitry Andric   ((kmp_int64)((((kmp_uint64)(HIGH_32)) << 32) | (kmp_uint64)(LOW_32)))
259*0b57cec5SDimitry Andric 
260*0b57cec5SDimitry Andric // Generic string manipulation macros. Assume that _x is of type char *
261*0b57cec5SDimitry Andric #define SKIP_WS(_x)                                                            \
262*0b57cec5SDimitry Andric   {                                                                            \
263*0b57cec5SDimitry Andric     while (*(_x) == ' ' || *(_x) == '\t')                                      \
264*0b57cec5SDimitry Andric       (_x)++;                                                                  \
265*0b57cec5SDimitry Andric   }
266*0b57cec5SDimitry Andric #define SKIP_DIGITS(_x)                                                        \
267*0b57cec5SDimitry Andric   {                                                                            \
268*0b57cec5SDimitry Andric     while (*(_x) >= '0' && *(_x) <= '9')                                       \
269*0b57cec5SDimitry Andric       (_x)++;                                                                  \
270*0b57cec5SDimitry Andric   }
271*0b57cec5SDimitry Andric #define SKIP_TOKEN(_x)                                                         \
272*0b57cec5SDimitry Andric   {                                                                            \
273*0b57cec5SDimitry Andric     while ((*(_x) >= '0' && *(_x) <= '9') || (*(_x) >= 'a' && *(_x) <= 'z') || \
274*0b57cec5SDimitry Andric            (*(_x) >= 'A' && *(_x) <= 'Z') || *(_x) == '_')                     \
275*0b57cec5SDimitry Andric       (_x)++;                                                                  \
276*0b57cec5SDimitry Andric   }
277*0b57cec5SDimitry Andric #define SKIP_TO(_x, _c)                                                        \
278*0b57cec5SDimitry Andric   {                                                                            \
279*0b57cec5SDimitry Andric     while (*(_x) != '\0' && *(_x) != (_c))                                     \
280*0b57cec5SDimitry Andric       (_x)++;                                                                  \
281*0b57cec5SDimitry Andric   }
282*0b57cec5SDimitry Andric 
283*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
284*0b57cec5SDimitry Andric 
285*0b57cec5SDimitry Andric #define KMP_MAX(x, y) ((x) > (y) ? (x) : (y))
286*0b57cec5SDimitry Andric #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
287*0b57cec5SDimitry Andric 
288*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
289*0b57cec5SDimitry Andric /* Enumeration types */
290*0b57cec5SDimitry Andric 
291*0b57cec5SDimitry Andric enum kmp_state_timer {
292*0b57cec5SDimitry Andric   ts_stop,
293*0b57cec5SDimitry Andric   ts_start,
294*0b57cec5SDimitry Andric   ts_pause,
295*0b57cec5SDimitry Andric 
296*0b57cec5SDimitry Andric   ts_last_state
297*0b57cec5SDimitry Andric };
298*0b57cec5SDimitry Andric 
299*0b57cec5SDimitry Andric enum dynamic_mode {
300*0b57cec5SDimitry Andric   dynamic_default,
301*0b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE
302*0b57cec5SDimitry Andric   dynamic_load_balance,
303*0b57cec5SDimitry Andric #endif /* USE_LOAD_BALANCE */
304*0b57cec5SDimitry Andric   dynamic_random,
305*0b57cec5SDimitry Andric   dynamic_thread_limit,
306*0b57cec5SDimitry Andric   dynamic_max
307*0b57cec5SDimitry Andric };
308*0b57cec5SDimitry Andric 
309*0b57cec5SDimitry Andric /* external schedule constants, duplicate enum omp_sched in omp.h in order to
310*0b57cec5SDimitry Andric  * not include it here */
311*0b57cec5SDimitry Andric #ifndef KMP_SCHED_TYPE_DEFINED
312*0b57cec5SDimitry Andric #define KMP_SCHED_TYPE_DEFINED
313*0b57cec5SDimitry Andric typedef enum kmp_sched {
314*0b57cec5SDimitry Andric   kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check
315*0b57cec5SDimitry Andric   // Note: need to adjust __kmp_sch_map global array in case enum is changed
316*0b57cec5SDimitry Andric   kmp_sched_static = 1, // mapped to kmp_sch_static_chunked           (33)
317*0b57cec5SDimitry Andric   kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked          (35)
318*0b57cec5SDimitry Andric   kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked           (36)
319*0b57cec5SDimitry Andric   kmp_sched_auto = 4, // mapped to kmp_sch_auto                     (38)
320*0b57cec5SDimitry Andric   kmp_sched_upper_std = 5, // upper bound for standard schedules
321*0b57cec5SDimitry Andric   kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules
322*0b57cec5SDimitry Andric   kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39)
323*0b57cec5SDimitry Andric #if KMP_STATIC_STEAL_ENABLED
324*0b57cec5SDimitry Andric   kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44)
325*0b57cec5SDimitry Andric #endif
326*0b57cec5SDimitry Andric   kmp_sched_upper,
327*0b57cec5SDimitry Andric   kmp_sched_default = kmp_sched_static, // default scheduling
328*0b57cec5SDimitry Andric   kmp_sched_monotonic = 0x80000000
329*0b57cec5SDimitry Andric } kmp_sched_t;
330*0b57cec5SDimitry Andric #endif
331*0b57cec5SDimitry Andric 
332*0b57cec5SDimitry Andric /*!
333*0b57cec5SDimitry Andric  @ingroup WORK_SHARING
334*0b57cec5SDimitry Andric  * Describes the loop schedule to be used for a parallel for loop.
335*0b57cec5SDimitry Andric  */
336*0b57cec5SDimitry Andric enum sched_type : kmp_int32 {
337*0b57cec5SDimitry Andric   kmp_sch_lower = 32, /**< lower bound for unordered values */
338*0b57cec5SDimitry Andric   kmp_sch_static_chunked = 33,
339*0b57cec5SDimitry Andric   kmp_sch_static = 34, /**< static unspecialized */
340*0b57cec5SDimitry Andric   kmp_sch_dynamic_chunked = 35,
341*0b57cec5SDimitry Andric   kmp_sch_guided_chunked = 36, /**< guided unspecialized */
342*0b57cec5SDimitry Andric   kmp_sch_runtime = 37,
343*0b57cec5SDimitry Andric   kmp_sch_auto = 38, /**< auto */
344*0b57cec5SDimitry Andric   kmp_sch_trapezoidal = 39,
345*0b57cec5SDimitry Andric 
346*0b57cec5SDimitry Andric   /* accessible only through KMP_SCHEDULE environment variable */
347*0b57cec5SDimitry Andric   kmp_sch_static_greedy = 40,
348*0b57cec5SDimitry Andric   kmp_sch_static_balanced = 41,
349*0b57cec5SDimitry Andric   /* accessible only through KMP_SCHEDULE environment variable */
350*0b57cec5SDimitry Andric   kmp_sch_guided_iterative_chunked = 42,
351*0b57cec5SDimitry Andric   kmp_sch_guided_analytical_chunked = 43,
352*0b57cec5SDimitry Andric   /* accessible only through KMP_SCHEDULE environment variable */
353*0b57cec5SDimitry Andric   kmp_sch_static_steal = 44,
354*0b57cec5SDimitry Andric 
355*0b57cec5SDimitry Andric   /* static with chunk adjustment (e.g., simd) */
356*0b57cec5SDimitry Andric   kmp_sch_static_balanced_chunked = 45,
357*0b57cec5SDimitry Andric   kmp_sch_guided_simd = 46, /**< guided with chunk adjustment */
358*0b57cec5SDimitry Andric   kmp_sch_runtime_simd = 47, /**< runtime with chunk adjustment */
359*0b57cec5SDimitry Andric 
360*0b57cec5SDimitry Andric   /* accessible only through KMP_SCHEDULE environment variable */
361*0b57cec5SDimitry Andric   kmp_sch_upper, /**< upper bound for unordered values */
362*0b57cec5SDimitry Andric 
363*0b57cec5SDimitry Andric   kmp_ord_lower = 64, /**< lower bound for ordered values, must be power of 2 */
364*0b57cec5SDimitry Andric   kmp_ord_static_chunked = 65,
365*0b57cec5SDimitry Andric   kmp_ord_static = 66, /**< ordered static unspecialized */
366*0b57cec5SDimitry Andric   kmp_ord_dynamic_chunked = 67,
367*0b57cec5SDimitry Andric   kmp_ord_guided_chunked = 68,
368*0b57cec5SDimitry Andric   kmp_ord_runtime = 69,
369*0b57cec5SDimitry Andric   kmp_ord_auto = 70, /**< ordered auto */
370*0b57cec5SDimitry Andric   kmp_ord_trapezoidal = 71,
371*0b57cec5SDimitry Andric   kmp_ord_upper, /**< upper bound for ordered values */
372*0b57cec5SDimitry Andric 
373*0b57cec5SDimitry Andric   /* Schedules for Distribute construct */
374*0b57cec5SDimitry Andric   kmp_distribute_static_chunked = 91, /**< distribute static chunked */
375*0b57cec5SDimitry Andric   kmp_distribute_static = 92, /**< distribute static unspecialized */
376*0b57cec5SDimitry Andric 
377*0b57cec5SDimitry Andric   /* For the "nomerge" versions, kmp_dispatch_next*() will always return a
378*0b57cec5SDimitry Andric      single iteration/chunk, even if the loop is serialized. For the schedule
379*0b57cec5SDimitry Andric      types listed above, the entire iteration vector is returned if the loop is
380*0b57cec5SDimitry Andric      serialized. This doesn't work for gcc/gcomp sections. */
381*0b57cec5SDimitry Andric   kmp_nm_lower = 160, /**< lower bound for nomerge values */
382*0b57cec5SDimitry Andric 
383*0b57cec5SDimitry Andric   kmp_nm_static_chunked =
384*0b57cec5SDimitry Andric       (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower),
385*0b57cec5SDimitry Andric   kmp_nm_static = 162, /**< static unspecialized */
386*0b57cec5SDimitry Andric   kmp_nm_dynamic_chunked = 163,
387*0b57cec5SDimitry Andric   kmp_nm_guided_chunked = 164, /**< guided unspecialized */
388*0b57cec5SDimitry Andric   kmp_nm_runtime = 165,
389*0b57cec5SDimitry Andric   kmp_nm_auto = 166, /**< auto */
390*0b57cec5SDimitry Andric   kmp_nm_trapezoidal = 167,
391*0b57cec5SDimitry Andric 
392*0b57cec5SDimitry Andric   /* accessible only through KMP_SCHEDULE environment variable */
393*0b57cec5SDimitry Andric   kmp_nm_static_greedy = 168,
394*0b57cec5SDimitry Andric   kmp_nm_static_balanced = 169,
395*0b57cec5SDimitry Andric   /* accessible only through KMP_SCHEDULE environment variable */
396*0b57cec5SDimitry Andric   kmp_nm_guided_iterative_chunked = 170,
397*0b57cec5SDimitry Andric   kmp_nm_guided_analytical_chunked = 171,
398*0b57cec5SDimitry Andric   kmp_nm_static_steal =
399*0b57cec5SDimitry Andric       172, /* accessible only through OMP_SCHEDULE environment variable */
400*0b57cec5SDimitry Andric 
401*0b57cec5SDimitry Andric   kmp_nm_ord_static_chunked = 193,
402*0b57cec5SDimitry Andric   kmp_nm_ord_static = 194, /**< ordered static unspecialized */
403*0b57cec5SDimitry Andric   kmp_nm_ord_dynamic_chunked = 195,
404*0b57cec5SDimitry Andric   kmp_nm_ord_guided_chunked = 196,
405*0b57cec5SDimitry Andric   kmp_nm_ord_runtime = 197,
406*0b57cec5SDimitry Andric   kmp_nm_ord_auto = 198, /**< auto */
407*0b57cec5SDimitry Andric   kmp_nm_ord_trapezoidal = 199,
408*0b57cec5SDimitry Andric   kmp_nm_upper, /**< upper bound for nomerge values */
409*0b57cec5SDimitry Andric 
410*0b57cec5SDimitry Andric   /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. Since
411*0b57cec5SDimitry Andric      we need to distinguish the three possible cases (no modifier, monotonic
412*0b57cec5SDimitry Andric      modifier, nonmonotonic modifier), we need separate bits for each modifier.
413*0b57cec5SDimitry Andric      The absence of monotonic does not imply nonmonotonic, especially since 4.5
414*0b57cec5SDimitry Andric      says that the behaviour of the "no modifier" case is implementation defined
415*0b57cec5SDimitry Andric      in 4.5, but will become "nonmonotonic" in 5.0.
416*0b57cec5SDimitry Andric 
417*0b57cec5SDimitry Andric      Since we're passing a full 32 bit value, we can use a couple of high bits
418*0b57cec5SDimitry Andric      for these flags; out of paranoia we avoid the sign bit.
419*0b57cec5SDimitry Andric 
420*0b57cec5SDimitry Andric      These modifiers can be or-ed into non-static schedules by the compiler to
421*0b57cec5SDimitry Andric      pass the additional information. They will be stripped early in the
422*0b57cec5SDimitry Andric      processing in __kmp_dispatch_init when setting up schedules, so most of the
423*0b57cec5SDimitry Andric      code won't ever see schedules with these bits set.  */
424*0b57cec5SDimitry Andric   kmp_sch_modifier_monotonic =
425*0b57cec5SDimitry Andric       (1 << 29), /**< Set if the monotonic schedule modifier was present */
426*0b57cec5SDimitry Andric   kmp_sch_modifier_nonmonotonic =
427*0b57cec5SDimitry Andric       (1 << 30), /**< Set if the nonmonotonic schedule modifier was present */
428*0b57cec5SDimitry Andric 
429*0b57cec5SDimitry Andric #define SCHEDULE_WITHOUT_MODIFIERS(s)                                          \
430*0b57cec5SDimitry Andric   (enum sched_type)(                                                           \
431*0b57cec5SDimitry Andric       (s) & ~(kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic))
432*0b57cec5SDimitry Andric #define SCHEDULE_HAS_MONOTONIC(s) (((s)&kmp_sch_modifier_monotonic) != 0)
433*0b57cec5SDimitry Andric #define SCHEDULE_HAS_NONMONOTONIC(s) (((s)&kmp_sch_modifier_nonmonotonic) != 0)
434*0b57cec5SDimitry Andric #define SCHEDULE_HAS_NO_MODIFIERS(s)                                           \
435*0b57cec5SDimitry Andric   (((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0)
436*0b57cec5SDimitry Andric #define SCHEDULE_GET_MODIFIERS(s)                                              \
437*0b57cec5SDimitry Andric   ((enum sched_type)(                                                          \
438*0b57cec5SDimitry Andric       (s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)))
439*0b57cec5SDimitry Andric #define SCHEDULE_SET_MODIFIERS(s, m)                                           \
440*0b57cec5SDimitry Andric   (s = (enum sched_type)((kmp_int32)s | (kmp_int32)m))
441*0b57cec5SDimitry Andric #define SCHEDULE_NONMONOTONIC 0
442*0b57cec5SDimitry Andric #define SCHEDULE_MONOTONIC 1
443*0b57cec5SDimitry Andric 
444*0b57cec5SDimitry Andric   kmp_sch_default = kmp_sch_static /**< default scheduling algorithm */
445*0b57cec5SDimitry Andric };
446*0b57cec5SDimitry Andric 
447*0b57cec5SDimitry Andric // Apply modifiers on internal kind to standard kind
448*0b57cec5SDimitry Andric static inline void
449*0b57cec5SDimitry Andric __kmp_sched_apply_mods_stdkind(kmp_sched_t *kind,
450*0b57cec5SDimitry Andric                                enum sched_type internal_kind) {
451*0b57cec5SDimitry Andric   if (SCHEDULE_HAS_MONOTONIC(internal_kind)) {
452*0b57cec5SDimitry Andric     *kind = (kmp_sched_t)((int)*kind | (int)kmp_sched_monotonic);
453*0b57cec5SDimitry Andric   }
454*0b57cec5SDimitry Andric }
455*0b57cec5SDimitry Andric 
456*0b57cec5SDimitry Andric // Apply modifiers on standard kind to internal kind
457*0b57cec5SDimitry Andric static inline void
458*0b57cec5SDimitry Andric __kmp_sched_apply_mods_intkind(kmp_sched_t kind,
459*0b57cec5SDimitry Andric                                enum sched_type *internal_kind) {
460*0b57cec5SDimitry Andric   if ((int)kind & (int)kmp_sched_monotonic) {
461*0b57cec5SDimitry Andric     *internal_kind = (enum sched_type)((int)*internal_kind |
462*0b57cec5SDimitry Andric                                        (int)kmp_sch_modifier_monotonic);
463*0b57cec5SDimitry Andric   }
464*0b57cec5SDimitry Andric }
465*0b57cec5SDimitry Andric 
466*0b57cec5SDimitry Andric // Get standard schedule without modifiers
467*0b57cec5SDimitry Andric static inline kmp_sched_t __kmp_sched_without_mods(kmp_sched_t kind) {
468*0b57cec5SDimitry Andric   return (kmp_sched_t)((int)kind & ~((int)kmp_sched_monotonic));
469*0b57cec5SDimitry Andric }
470*0b57cec5SDimitry Andric 
471*0b57cec5SDimitry Andric /* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */
472*0b57cec5SDimitry Andric typedef union kmp_r_sched {
473*0b57cec5SDimitry Andric   struct {
474*0b57cec5SDimitry Andric     enum sched_type r_sched_type;
475*0b57cec5SDimitry Andric     int chunk;
476*0b57cec5SDimitry Andric   };
477*0b57cec5SDimitry Andric   kmp_int64 sched;
478*0b57cec5SDimitry Andric } kmp_r_sched_t;
479*0b57cec5SDimitry Andric 
480*0b57cec5SDimitry Andric extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our
481*0b57cec5SDimitry Andric // internal schedule types
482*0b57cec5SDimitry Andric 
483*0b57cec5SDimitry Andric enum library_type {
484*0b57cec5SDimitry Andric   library_none,
485*0b57cec5SDimitry Andric   library_serial,
486*0b57cec5SDimitry Andric   library_turnaround,
487*0b57cec5SDimitry Andric   library_throughput
488*0b57cec5SDimitry Andric };
489*0b57cec5SDimitry Andric 
490*0b57cec5SDimitry Andric #if KMP_OS_LINUX
491*0b57cec5SDimitry Andric enum clock_function_type {
492*0b57cec5SDimitry Andric   clock_function_gettimeofday,
493*0b57cec5SDimitry Andric   clock_function_clock_gettime
494*0b57cec5SDimitry Andric };
495*0b57cec5SDimitry Andric #endif /* KMP_OS_LINUX */
496*0b57cec5SDimitry Andric 
497*0b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED
498*0b57cec5SDimitry Andric enum mic_type { non_mic, mic1, mic2, mic3, dummy };
499*0b57cec5SDimitry Andric #endif
500*0b57cec5SDimitry Andric 
501*0b57cec5SDimitry Andric /* -- fast reduction stuff ------------------------------------------------ */
502*0b57cec5SDimitry Andric 
503*0b57cec5SDimitry Andric #undef KMP_FAST_REDUCTION_BARRIER
504*0b57cec5SDimitry Andric #define KMP_FAST_REDUCTION_BARRIER 1
505*0b57cec5SDimitry Andric 
506*0b57cec5SDimitry Andric #undef KMP_FAST_REDUCTION_CORE_DUO
507*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
508*0b57cec5SDimitry Andric #define KMP_FAST_REDUCTION_CORE_DUO 1
509*0b57cec5SDimitry Andric #endif
510*0b57cec5SDimitry Andric 
511*0b57cec5SDimitry Andric enum _reduction_method {
512*0b57cec5SDimitry Andric   reduction_method_not_defined = 0,
513*0b57cec5SDimitry Andric   critical_reduce_block = (1 << 8),
514*0b57cec5SDimitry Andric   atomic_reduce_block = (2 << 8),
515*0b57cec5SDimitry Andric   tree_reduce_block = (3 << 8),
516*0b57cec5SDimitry Andric   empty_reduce_block = (4 << 8)
517*0b57cec5SDimitry Andric };
518*0b57cec5SDimitry Andric 
519*0b57cec5SDimitry Andric // Description of the packed_reduction_method variable:
520*0b57cec5SDimitry Andric // The packed_reduction_method variable consists of two enum types variables
521*0b57cec5SDimitry Andric // that are packed together into 0-th byte and 1-st byte:
522*0b57cec5SDimitry Andric // 0: (packed_reduction_method & 0x000000FF) is a 'enum barrier_type' value of
523*0b57cec5SDimitry Andric // barrier that will be used in fast reduction: bs_plain_barrier or
524*0b57cec5SDimitry Andric // bs_reduction_barrier
525*0b57cec5SDimitry Andric // 1: (packed_reduction_method & 0x0000FF00) is a reduction method that will
526*0b57cec5SDimitry Andric // be used in fast reduction;
527*0b57cec5SDimitry Andric // Reduction method is of 'enum _reduction_method' type and it's defined the way
528*0b57cec5SDimitry Andric // so that the bits of 0-th byte are empty, so no need to execute a shift
529*0b57cec5SDimitry Andric // instruction while packing/unpacking
530*0b57cec5SDimitry Andric 
531*0b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER
532*0b57cec5SDimitry Andric #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type)      \
533*0b57cec5SDimitry Andric   ((reduction_method) | (barrier_type))
534*0b57cec5SDimitry Andric 
535*0b57cec5SDimitry Andric #define UNPACK_REDUCTION_METHOD(packed_reduction_method)                       \
536*0b57cec5SDimitry Andric   ((enum _reduction_method)((packed_reduction_method) & (0x0000FF00)))
537*0b57cec5SDimitry Andric 
538*0b57cec5SDimitry Andric #define UNPACK_REDUCTION_BARRIER(packed_reduction_method)                      \
539*0b57cec5SDimitry Andric   ((enum barrier_type)((packed_reduction_method) & (0x000000FF)))
540*0b57cec5SDimitry Andric #else
541*0b57cec5SDimitry Andric #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type)      \
542*0b57cec5SDimitry Andric   (reduction_method)
543*0b57cec5SDimitry Andric 
544*0b57cec5SDimitry Andric #define UNPACK_REDUCTION_METHOD(packed_reduction_method)                       \
545*0b57cec5SDimitry Andric   (packed_reduction_method)
546*0b57cec5SDimitry Andric 
547*0b57cec5SDimitry Andric #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) (bs_plain_barrier)
548*0b57cec5SDimitry Andric #endif
549*0b57cec5SDimitry Andric 
550*0b57cec5SDimitry Andric #define TEST_REDUCTION_METHOD(packed_reduction_method, which_reduction_block)  \
551*0b57cec5SDimitry Andric   ((UNPACK_REDUCTION_METHOD(packed_reduction_method)) ==                       \
552*0b57cec5SDimitry Andric    (which_reduction_block))
553*0b57cec5SDimitry Andric 
554*0b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER
555*0b57cec5SDimitry Andric #define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER                               \
556*0b57cec5SDimitry Andric   (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_reduction_barrier))
557*0b57cec5SDimitry Andric 
558*0b57cec5SDimitry Andric #define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER                                   \
559*0b57cec5SDimitry Andric   (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_plain_barrier))
560*0b57cec5SDimitry Andric #endif
561*0b57cec5SDimitry Andric 
562*0b57cec5SDimitry Andric typedef int PACKED_REDUCTION_METHOD_T;
563*0b57cec5SDimitry Andric 
564*0b57cec5SDimitry Andric /* -- end of fast reduction stuff ----------------------------------------- */
565*0b57cec5SDimitry Andric 
566*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS
567*0b57cec5SDimitry Andric #define USE_CBLKDATA
568*0b57cec5SDimitry Andric #if KMP_MSVC_COMPAT
569*0b57cec5SDimitry Andric #pragma warning(push)
570*0b57cec5SDimitry Andric #pragma warning(disable : 271 310)
571*0b57cec5SDimitry Andric #endif
572*0b57cec5SDimitry Andric #include <windows.h>
573*0b57cec5SDimitry Andric #if KMP_MSVC_COMPAT
574*0b57cec5SDimitry Andric #pragma warning(pop)
575*0b57cec5SDimitry Andric #endif
576*0b57cec5SDimitry Andric #endif
577*0b57cec5SDimitry Andric 
578*0b57cec5SDimitry Andric #if KMP_OS_UNIX
579*0b57cec5SDimitry Andric #include <dlfcn.h>
580*0b57cec5SDimitry Andric #include <pthread.h>
581*0b57cec5SDimitry Andric #endif
582*0b57cec5SDimitry Andric 
583*0b57cec5SDimitry Andric /* Only Linux* OS and Windows* OS support thread affinity. */
584*0b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
585*0b57cec5SDimitry Andric 
586*0b57cec5SDimitry Andric // GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
587*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS
588*0b57cec5SDimitry Andric #if _MSC_VER < 1600 && KMP_MSVC_COMPAT
589*0b57cec5SDimitry Andric typedef struct GROUP_AFFINITY {
590*0b57cec5SDimitry Andric   KAFFINITY Mask;
591*0b57cec5SDimitry Andric   WORD Group;
592*0b57cec5SDimitry Andric   WORD Reserved[3];
593*0b57cec5SDimitry Andric } GROUP_AFFINITY;
594*0b57cec5SDimitry Andric #endif /* _MSC_VER < 1600 */
595*0b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
596*0b57cec5SDimitry Andric extern int __kmp_num_proc_groups;
597*0b57cec5SDimitry Andric #else
598*0b57cec5SDimitry Andric static const int __kmp_num_proc_groups = 1;
599*0b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */
600*0b57cec5SDimitry Andric typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);
601*0b57cec5SDimitry Andric extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;
602*0b57cec5SDimitry Andric 
603*0b57cec5SDimitry Andric typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void);
604*0b57cec5SDimitry Andric extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount;
605*0b57cec5SDimitry Andric 
606*0b57cec5SDimitry Andric typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *);
607*0b57cec5SDimitry Andric extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity;
608*0b57cec5SDimitry Andric 
609*0b57cec5SDimitry Andric typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *,
610*0b57cec5SDimitry Andric                                              GROUP_AFFINITY *);
611*0b57cec5SDimitry Andric extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
612*0b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
613*0b57cec5SDimitry Andric 
614*0b57cec5SDimitry Andric #if KMP_USE_HWLOC
615*0b57cec5SDimitry Andric extern hwloc_topology_t __kmp_hwloc_topology;
616*0b57cec5SDimitry Andric extern int __kmp_hwloc_error;
617*0b57cec5SDimitry Andric extern int __kmp_numa_detected;
618*0b57cec5SDimitry Andric extern int __kmp_tile_depth;
619*0b57cec5SDimitry Andric #endif
620*0b57cec5SDimitry Andric 
621*0b57cec5SDimitry Andric extern size_t __kmp_affin_mask_size;
622*0b57cec5SDimitry Andric #define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0)
623*0b57cec5SDimitry Andric #define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0)
624*0b57cec5SDimitry Andric #define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size)
625*0b57cec5SDimitry Andric #define KMP_CPU_SET_ITERATE(i, mask)                                           \
626*0b57cec5SDimitry Andric   for (i = (mask)->begin(); (int)i != (mask)->end(); i = (mask)->next(i))
627*0b57cec5SDimitry Andric #define KMP_CPU_SET(i, mask) (mask)->set(i)
628*0b57cec5SDimitry Andric #define KMP_CPU_ISSET(i, mask) (mask)->is_set(i)
629*0b57cec5SDimitry Andric #define KMP_CPU_CLR(i, mask) (mask)->clear(i)
630*0b57cec5SDimitry Andric #define KMP_CPU_ZERO(mask) (mask)->zero()
631*0b57cec5SDimitry Andric #define KMP_CPU_COPY(dest, src) (dest)->copy(src)
632*0b57cec5SDimitry Andric #define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src)
633*0b57cec5SDimitry Andric #define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not()
634*0b57cec5SDimitry Andric #define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src)
635*0b57cec5SDimitry Andric #define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask())
636*0b57cec5SDimitry Andric #define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr)
637*0b57cec5SDimitry Andric #define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr)
638*0b57cec5SDimitry Andric #define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr)
639*0b57cec5SDimitry Andric #define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr)
640*0b57cec5SDimitry Andric #define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr)
641*0b57cec5SDimitry Andric #define KMP_CPU_INDEX(arr, i) __kmp_affinity_dispatch->index_mask_array(arr, i)
642*0b57cec5SDimitry Andric #define KMP_CPU_ALLOC_ARRAY(arr, n)                                            \
643*0b57cec5SDimitry Andric   (arr = __kmp_affinity_dispatch->allocate_mask_array(n))
644*0b57cec5SDimitry Andric #define KMP_CPU_FREE_ARRAY(arr, n)                                             \
645*0b57cec5SDimitry Andric   __kmp_affinity_dispatch->deallocate_mask_array(arr)
646*0b57cec5SDimitry Andric #define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) KMP_CPU_ALLOC_ARRAY(arr, n)
647*0b57cec5SDimitry Andric #define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_CPU_FREE_ARRAY(arr, n)
648*0b57cec5SDimitry Andric #define __kmp_get_system_affinity(mask, abort_bool)                            \
649*0b57cec5SDimitry Andric   (mask)->get_system_affinity(abort_bool)
650*0b57cec5SDimitry Andric #define __kmp_set_system_affinity(mask, abort_bool)                            \
651*0b57cec5SDimitry Andric   (mask)->set_system_affinity(abort_bool)
652*0b57cec5SDimitry Andric #define __kmp_get_proc_group(mask) (mask)->get_proc_group()
653*0b57cec5SDimitry Andric 
654*0b57cec5SDimitry Andric class KMPAffinity {
655*0b57cec5SDimitry Andric public:
656*0b57cec5SDimitry Andric   class Mask {
657*0b57cec5SDimitry Andric   public:
658*0b57cec5SDimitry Andric     void *operator new(size_t n);
659*0b57cec5SDimitry Andric     void operator delete(void *p);
660*0b57cec5SDimitry Andric     void *operator new[](size_t n);
661*0b57cec5SDimitry Andric     void operator delete[](void *p);
662*0b57cec5SDimitry Andric     virtual ~Mask() {}
663*0b57cec5SDimitry Andric     // Set bit i to 1
664*0b57cec5SDimitry Andric     virtual void set(int i) {}
665*0b57cec5SDimitry Andric     // Return bit i
666*0b57cec5SDimitry Andric     virtual bool is_set(int i) const { return false; }
667*0b57cec5SDimitry Andric     // Set bit i to 0
668*0b57cec5SDimitry Andric     virtual void clear(int i) {}
669*0b57cec5SDimitry Andric     // Zero out entire mask
670*0b57cec5SDimitry Andric     virtual void zero() {}
671*0b57cec5SDimitry Andric     // Copy src into this mask
672*0b57cec5SDimitry Andric     virtual void copy(const Mask *src) {}
673*0b57cec5SDimitry Andric     // this &= rhs
674*0b57cec5SDimitry Andric     virtual void bitwise_and(const Mask *rhs) {}
675*0b57cec5SDimitry Andric     // this |= rhs
676*0b57cec5SDimitry Andric     virtual void bitwise_or(const Mask *rhs) {}
677*0b57cec5SDimitry Andric     // this = ~this
678*0b57cec5SDimitry Andric     virtual void bitwise_not() {}
679*0b57cec5SDimitry Andric     // API for iterating over an affinity mask
680*0b57cec5SDimitry Andric     // for (int i = mask->begin(); i != mask->end(); i = mask->next(i))
681*0b57cec5SDimitry Andric     virtual int begin() const { return 0; }
682*0b57cec5SDimitry Andric     virtual int end() const { return 0; }
683*0b57cec5SDimitry Andric     virtual int next(int previous) const { return 0; }
684*0b57cec5SDimitry Andric     // Set the system's affinity to this affinity mask's value
685*0b57cec5SDimitry Andric     virtual int set_system_affinity(bool abort_on_error) const { return -1; }
686*0b57cec5SDimitry Andric     // Set this affinity mask to the current system affinity
687*0b57cec5SDimitry Andric     virtual int get_system_affinity(bool abort_on_error) { return -1; }
688*0b57cec5SDimitry Andric     // Only 1 DWORD in the mask should have any procs set.
689*0b57cec5SDimitry Andric     // Return the appropriate index, or -1 for an invalid mask.
690*0b57cec5SDimitry Andric     virtual int get_proc_group() const { return -1; }
691*0b57cec5SDimitry Andric   };
692*0b57cec5SDimitry Andric   void *operator new(size_t n);
693*0b57cec5SDimitry Andric   void operator delete(void *p);
694*0b57cec5SDimitry Andric   // Need virtual destructor
695*0b57cec5SDimitry Andric   virtual ~KMPAffinity() = default;
696*0b57cec5SDimitry Andric   // Determine if affinity is capable
697*0b57cec5SDimitry Andric   virtual void determine_capable(const char *env_var) {}
698*0b57cec5SDimitry Andric   // Bind the current thread to os proc
699*0b57cec5SDimitry Andric   virtual void bind_thread(int proc) {}
700*0b57cec5SDimitry Andric   // Factory functions to allocate/deallocate a mask
701*0b57cec5SDimitry Andric   virtual Mask *allocate_mask() { return nullptr; }
702*0b57cec5SDimitry Andric   virtual void deallocate_mask(Mask *m) {}
703*0b57cec5SDimitry Andric   virtual Mask *allocate_mask_array(int num) { return nullptr; }
704*0b57cec5SDimitry Andric   virtual void deallocate_mask_array(Mask *m) {}
705*0b57cec5SDimitry Andric   virtual Mask *index_mask_array(Mask *m, int index) { return nullptr; }
706*0b57cec5SDimitry Andric   static void pick_api();
707*0b57cec5SDimitry Andric   static void destroy_api();
708*0b57cec5SDimitry Andric   enum api_type {
709*0b57cec5SDimitry Andric     NATIVE_OS
710*0b57cec5SDimitry Andric #if KMP_USE_HWLOC
711*0b57cec5SDimitry Andric     ,
712*0b57cec5SDimitry Andric     HWLOC
713*0b57cec5SDimitry Andric #endif
714*0b57cec5SDimitry Andric   };
715*0b57cec5SDimitry Andric   virtual api_type get_api_type() const {
716*0b57cec5SDimitry Andric     KMP_ASSERT(0);
717*0b57cec5SDimitry Andric     return NATIVE_OS;
718*0b57cec5SDimitry Andric   }
719*0b57cec5SDimitry Andric 
720*0b57cec5SDimitry Andric private:
721*0b57cec5SDimitry Andric   static bool picked_api;
722*0b57cec5SDimitry Andric };
723*0b57cec5SDimitry Andric 
724*0b57cec5SDimitry Andric typedef KMPAffinity::Mask kmp_affin_mask_t;
725*0b57cec5SDimitry Andric extern KMPAffinity *__kmp_affinity_dispatch;
726*0b57cec5SDimitry Andric 
727*0b57cec5SDimitry Andric // Declare local char buffers with this size for printing debug and info
728*0b57cec5SDimitry Andric // messages, using __kmp_affinity_print_mask().
729*0b57cec5SDimitry Andric #define KMP_AFFIN_MASK_PRINT_LEN 1024
730*0b57cec5SDimitry Andric 
731*0b57cec5SDimitry Andric enum affinity_type {
732*0b57cec5SDimitry Andric   affinity_none = 0,
733*0b57cec5SDimitry Andric   affinity_physical,
734*0b57cec5SDimitry Andric   affinity_logical,
735*0b57cec5SDimitry Andric   affinity_compact,
736*0b57cec5SDimitry Andric   affinity_scatter,
737*0b57cec5SDimitry Andric   affinity_explicit,
738*0b57cec5SDimitry Andric   affinity_balanced,
739*0b57cec5SDimitry Andric   affinity_disabled, // not used outsize the env var parser
740*0b57cec5SDimitry Andric   affinity_default
741*0b57cec5SDimitry Andric };
742*0b57cec5SDimitry Andric 
743*0b57cec5SDimitry Andric enum affinity_gran {
744*0b57cec5SDimitry Andric   affinity_gran_fine = 0,
745*0b57cec5SDimitry Andric   affinity_gran_thread,
746*0b57cec5SDimitry Andric   affinity_gran_core,
747*0b57cec5SDimitry Andric   affinity_gran_tile,
748*0b57cec5SDimitry Andric   affinity_gran_numa,
749*0b57cec5SDimitry Andric   affinity_gran_package,
750*0b57cec5SDimitry Andric   affinity_gran_node,
751*0b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
752*0b57cec5SDimitry Andric   // The "group" granularity isn't necesssarily coarser than all of the
753*0b57cec5SDimitry Andric   // other levels, but we put it last in the enum.
754*0b57cec5SDimitry Andric   affinity_gran_group,
755*0b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */
756*0b57cec5SDimitry Andric   affinity_gran_default
757*0b57cec5SDimitry Andric };
758*0b57cec5SDimitry Andric 
759*0b57cec5SDimitry Andric enum affinity_top_method {
760*0b57cec5SDimitry Andric   affinity_top_method_all = 0, // try all (supported) methods, in order
761*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
762*0b57cec5SDimitry Andric   affinity_top_method_apicid,
763*0b57cec5SDimitry Andric   affinity_top_method_x2apicid,
764*0b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
765*0b57cec5SDimitry Andric   affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too
766*0b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
767*0b57cec5SDimitry Andric   affinity_top_method_group,
768*0b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */
769*0b57cec5SDimitry Andric   affinity_top_method_flat,
770*0b57cec5SDimitry Andric #if KMP_USE_HWLOC
771*0b57cec5SDimitry Andric   affinity_top_method_hwloc,
772*0b57cec5SDimitry Andric #endif
773*0b57cec5SDimitry Andric   affinity_top_method_default
774*0b57cec5SDimitry Andric };
775*0b57cec5SDimitry Andric 
776*0b57cec5SDimitry Andric #define affinity_respect_mask_default (-1)
777*0b57cec5SDimitry Andric 
778*0b57cec5SDimitry Andric extern enum affinity_type __kmp_affinity_type; /* Affinity type */
779*0b57cec5SDimitry Andric extern enum affinity_gran __kmp_affinity_gran; /* Affinity granularity */
780*0b57cec5SDimitry Andric extern int __kmp_affinity_gran_levels; /* corresponding int value */
781*0b57cec5SDimitry Andric extern int __kmp_affinity_dups; /* Affinity duplicate masks */
782*0b57cec5SDimitry Andric extern enum affinity_top_method __kmp_affinity_top_method;
783*0b57cec5SDimitry Andric extern int __kmp_affinity_compact; /* Affinity 'compact' value */
784*0b57cec5SDimitry Andric extern int __kmp_affinity_offset; /* Affinity offset value  */
785*0b57cec5SDimitry Andric extern int __kmp_affinity_verbose; /* Was verbose specified for KMP_AFFINITY? */
786*0b57cec5SDimitry Andric extern int __kmp_affinity_warnings; /* KMP_AFFINITY warnings enabled ? */
787*0b57cec5SDimitry Andric extern int __kmp_affinity_respect_mask; // Respect process' init affinity mask?
788*0b57cec5SDimitry Andric extern char *__kmp_affinity_proclist; /* proc ID list */
789*0b57cec5SDimitry Andric extern kmp_affin_mask_t *__kmp_affinity_masks;
790*0b57cec5SDimitry Andric extern unsigned __kmp_affinity_num_masks;
791*0b57cec5SDimitry Andric extern void __kmp_affinity_bind_thread(int which);
792*0b57cec5SDimitry Andric 
793*0b57cec5SDimitry Andric extern kmp_affin_mask_t *__kmp_affin_fullMask;
794*0b57cec5SDimitry Andric extern char *__kmp_cpuinfo_file;
795*0b57cec5SDimitry Andric 
796*0b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */
797*0b57cec5SDimitry Andric 
798*0b57cec5SDimitry Andric // This needs to be kept in sync with the values in omp.h !!!
799*0b57cec5SDimitry Andric typedef enum kmp_proc_bind_t {
800*0b57cec5SDimitry Andric   proc_bind_false = 0,
801*0b57cec5SDimitry Andric   proc_bind_true,
802*0b57cec5SDimitry Andric   proc_bind_master,
803*0b57cec5SDimitry Andric   proc_bind_close,
804*0b57cec5SDimitry Andric   proc_bind_spread,
805*0b57cec5SDimitry Andric   proc_bind_intel, // use KMP_AFFINITY interface
806*0b57cec5SDimitry Andric   proc_bind_default
807*0b57cec5SDimitry Andric } kmp_proc_bind_t;
808*0b57cec5SDimitry Andric 
809*0b57cec5SDimitry Andric typedef struct kmp_nested_proc_bind_t {
810*0b57cec5SDimitry Andric   kmp_proc_bind_t *bind_types;
811*0b57cec5SDimitry Andric   int size;
812*0b57cec5SDimitry Andric   int used;
813*0b57cec5SDimitry Andric } kmp_nested_proc_bind_t;
814*0b57cec5SDimitry Andric 
815*0b57cec5SDimitry Andric extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
816*0b57cec5SDimitry Andric 
817*0b57cec5SDimitry Andric extern int __kmp_display_affinity;
818*0b57cec5SDimitry Andric extern char *__kmp_affinity_format;
819*0b57cec5SDimitry Andric static const size_t KMP_AFFINITY_FORMAT_SIZE = 512;
820*0b57cec5SDimitry Andric 
821*0b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
822*0b57cec5SDimitry Andric #define KMP_PLACE_ALL (-1)
823*0b57cec5SDimitry Andric #define KMP_PLACE_UNDEFINED (-2)
824*0b57cec5SDimitry Andric // Is KMP_AFFINITY is being used instead of OMP_PROC_BIND/OMP_PLACES?
825*0b57cec5SDimitry Andric #define KMP_AFFINITY_NON_PROC_BIND                                             \
826*0b57cec5SDimitry Andric   ((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false ||                 \
827*0b57cec5SDimitry Andric     __kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) &&                \
828*0b57cec5SDimitry Andric    (__kmp_affinity_num_masks > 0 || __kmp_affinity_type == affinity_balanced))
829*0b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */
830*0b57cec5SDimitry Andric 
831*0b57cec5SDimitry Andric extern int __kmp_affinity_num_places;
832*0b57cec5SDimitry Andric 
833*0b57cec5SDimitry Andric typedef enum kmp_cancel_kind_t {
834*0b57cec5SDimitry Andric   cancel_noreq = 0,
835*0b57cec5SDimitry Andric   cancel_parallel = 1,
836*0b57cec5SDimitry Andric   cancel_loop = 2,
837*0b57cec5SDimitry Andric   cancel_sections = 3,
838*0b57cec5SDimitry Andric   cancel_taskgroup = 4
839*0b57cec5SDimitry Andric } kmp_cancel_kind_t;
840*0b57cec5SDimitry Andric 
841*0b57cec5SDimitry Andric // KMP_HW_SUBSET support:
842*0b57cec5SDimitry Andric typedef struct kmp_hws_item {
843*0b57cec5SDimitry Andric   int num;
844*0b57cec5SDimitry Andric   int offset;
845*0b57cec5SDimitry Andric } kmp_hws_item_t;
846*0b57cec5SDimitry Andric 
847*0b57cec5SDimitry Andric extern kmp_hws_item_t __kmp_hws_socket;
848*0b57cec5SDimitry Andric extern kmp_hws_item_t __kmp_hws_node;
849*0b57cec5SDimitry Andric extern kmp_hws_item_t __kmp_hws_tile;
850*0b57cec5SDimitry Andric extern kmp_hws_item_t __kmp_hws_core;
851*0b57cec5SDimitry Andric extern kmp_hws_item_t __kmp_hws_proc;
852*0b57cec5SDimitry Andric extern int __kmp_hws_requested;
853*0b57cec5SDimitry Andric extern int __kmp_hws_abs_flag; // absolute or per-item number requested
854*0b57cec5SDimitry Andric 
855*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
856*0b57cec5SDimitry Andric 
857*0b57cec5SDimitry Andric #define KMP_PAD(type, sz)                                                      \
858*0b57cec5SDimitry Andric   (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
859*0b57cec5SDimitry Andric 
860*0b57cec5SDimitry Andric // We need to avoid using -1 as a GTID as +1 is added to the gtid
861*0b57cec5SDimitry Andric // when storing it in a lock, and the value 0 is reserved.
862*0b57cec5SDimitry Andric #define KMP_GTID_DNE (-2) /* Does not exist */
863*0b57cec5SDimitry Andric #define KMP_GTID_SHUTDOWN (-3) /* Library is shutting down */
864*0b57cec5SDimitry Andric #define KMP_GTID_MONITOR (-4) /* Monitor thread ID */
865*0b57cec5SDimitry Andric #define KMP_GTID_UNKNOWN (-5) /* Is not known */
866*0b57cec5SDimitry Andric #define KMP_GTID_MIN (-6) /* Minimal gtid for low bound check in DEBUG */
867*0b57cec5SDimitry Andric 
868*0b57cec5SDimitry Andric /* OpenMP 5.0 Memory Management support */
869*0b57cec5SDimitry Andric 
870*0b57cec5SDimitry Andric #ifndef __OMP_H
871*0b57cec5SDimitry Andric // Duplicate type definitios from omp.h
872*0b57cec5SDimitry Andric typedef uintptr_t omp_uintptr_t;
873*0b57cec5SDimitry Andric 
874*0b57cec5SDimitry Andric typedef enum {
875*0b57cec5SDimitry Andric   OMP_ATK_THREADMODEL = 1,
876*0b57cec5SDimitry Andric   OMP_ATK_ALIGNMENT = 2,
877*0b57cec5SDimitry Andric   OMP_ATK_ACCESS = 3,
878*0b57cec5SDimitry Andric   OMP_ATK_POOL_SIZE = 4,
879*0b57cec5SDimitry Andric   OMP_ATK_FALLBACK = 5,
880*0b57cec5SDimitry Andric   OMP_ATK_FB_DATA = 6,
881*0b57cec5SDimitry Andric   OMP_ATK_PINNED = 7,
882*0b57cec5SDimitry Andric   OMP_ATK_PARTITION = 8
883*0b57cec5SDimitry Andric } omp_alloctrait_key_t;
884*0b57cec5SDimitry Andric 
885*0b57cec5SDimitry Andric typedef enum {
886*0b57cec5SDimitry Andric   OMP_ATV_FALSE = 0,
887*0b57cec5SDimitry Andric   OMP_ATV_TRUE = 1,
888*0b57cec5SDimitry Andric   OMP_ATV_DEFAULT = 2,
889*0b57cec5SDimitry Andric   OMP_ATV_CONTENDED = 3,
890*0b57cec5SDimitry Andric   OMP_ATV_UNCONTENDED = 4,
891*0b57cec5SDimitry Andric   OMP_ATV_SEQUENTIAL = 5,
892*0b57cec5SDimitry Andric   OMP_ATV_PRIVATE = 6,
893*0b57cec5SDimitry Andric   OMP_ATV_ALL = 7,
894*0b57cec5SDimitry Andric   OMP_ATV_THREAD = 8,
895*0b57cec5SDimitry Andric   OMP_ATV_PTEAM = 9,
896*0b57cec5SDimitry Andric   OMP_ATV_CGROUP = 10,
897*0b57cec5SDimitry Andric   OMP_ATV_DEFAULT_MEM_FB = 11,
898*0b57cec5SDimitry Andric   OMP_ATV_NULL_FB = 12,
899*0b57cec5SDimitry Andric   OMP_ATV_ABORT_FB = 13,
900*0b57cec5SDimitry Andric   OMP_ATV_ALLOCATOR_FB = 14,
901*0b57cec5SDimitry Andric   OMP_ATV_ENVIRONMENT = 15,
902*0b57cec5SDimitry Andric   OMP_ATV_NEAREST = 16,
903*0b57cec5SDimitry Andric   OMP_ATV_BLOCKED = 17,
904*0b57cec5SDimitry Andric   OMP_ATV_INTERLEAVED = 18
905*0b57cec5SDimitry Andric } omp_alloctrait_value_t;
906*0b57cec5SDimitry Andric 
907*0b57cec5SDimitry Andric typedef void *omp_memspace_handle_t;
908*0b57cec5SDimitry Andric extern omp_memspace_handle_t const omp_default_mem_space;
909*0b57cec5SDimitry Andric extern omp_memspace_handle_t const omp_large_cap_mem_space;
910*0b57cec5SDimitry Andric extern omp_memspace_handle_t const omp_const_mem_space;
911*0b57cec5SDimitry Andric extern omp_memspace_handle_t const omp_high_bw_mem_space;
912*0b57cec5SDimitry Andric extern omp_memspace_handle_t const omp_low_lat_mem_space;
913*0b57cec5SDimitry Andric 
914*0b57cec5SDimitry Andric typedef struct {
915*0b57cec5SDimitry Andric   omp_alloctrait_key_t key;
916*0b57cec5SDimitry Andric   omp_uintptr_t value;
917*0b57cec5SDimitry Andric } omp_alloctrait_t;
918*0b57cec5SDimitry Andric 
919*0b57cec5SDimitry Andric typedef void *omp_allocator_handle_t;
920*0b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_null_allocator;
921*0b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_default_mem_alloc;
922*0b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_large_cap_mem_alloc;
923*0b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_const_mem_alloc;
924*0b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_high_bw_mem_alloc;
925*0b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_low_lat_mem_alloc;
926*0b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_cgroup_mem_alloc;
927*0b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_pteam_mem_alloc;
928*0b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_thread_mem_alloc;
929*0b57cec5SDimitry Andric extern omp_allocator_handle_t const kmp_max_mem_alloc;
930*0b57cec5SDimitry Andric extern omp_allocator_handle_t __kmp_def_allocator;
931*0b57cec5SDimitry Andric 
932*0b57cec5SDimitry Andric // end of duplicate type definitios from omp.h
933*0b57cec5SDimitry Andric #endif
934*0b57cec5SDimitry Andric 
935*0b57cec5SDimitry Andric extern int __kmp_memkind_available;
936*0b57cec5SDimitry Andric 
937*0b57cec5SDimitry Andric typedef omp_memspace_handle_t kmp_memspace_t; // placeholder
938*0b57cec5SDimitry Andric 
939*0b57cec5SDimitry Andric typedef struct kmp_allocator_t {
940*0b57cec5SDimitry Andric   omp_memspace_handle_t memspace;
941*0b57cec5SDimitry Andric   void **memkind; // pointer to memkind
942*0b57cec5SDimitry Andric   int alignment;
943*0b57cec5SDimitry Andric   omp_alloctrait_value_t fb;
944*0b57cec5SDimitry Andric   kmp_allocator_t *fb_data;
945*0b57cec5SDimitry Andric   kmp_uint64 pool_size;
946*0b57cec5SDimitry Andric   kmp_uint64 pool_used;
947*0b57cec5SDimitry Andric } kmp_allocator_t;
948*0b57cec5SDimitry Andric 
949*0b57cec5SDimitry Andric extern omp_allocator_handle_t __kmpc_init_allocator(int gtid,
950*0b57cec5SDimitry Andric                                                     omp_memspace_handle_t,
951*0b57cec5SDimitry Andric                                                     int ntraits,
952*0b57cec5SDimitry Andric                                                     omp_alloctrait_t traits[]);
953*0b57cec5SDimitry Andric extern void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al);
954*0b57cec5SDimitry Andric extern void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t al);
955*0b57cec5SDimitry Andric extern omp_allocator_handle_t __kmpc_get_default_allocator(int gtid);
956*0b57cec5SDimitry Andric extern void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
957*0b57cec5SDimitry Andric extern void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
958*0b57cec5SDimitry Andric 
959*0b57cec5SDimitry Andric extern void __kmp_init_memkind();
960*0b57cec5SDimitry Andric extern void __kmp_fini_memkind();
961*0b57cec5SDimitry Andric 
962*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
963*0b57cec5SDimitry Andric 
964*0b57cec5SDimitry Andric #define KMP_UINT64_MAX                                                         \
965*0b57cec5SDimitry Andric   (~((kmp_uint64)1 << ((sizeof(kmp_uint64) * (1 << 3)) - 1)))
966*0b57cec5SDimitry Andric 
967*0b57cec5SDimitry Andric #define KMP_MIN_NTH 1
968*0b57cec5SDimitry Andric 
969*0b57cec5SDimitry Andric #ifndef KMP_MAX_NTH
970*0b57cec5SDimitry Andric #if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX
971*0b57cec5SDimitry Andric #define KMP_MAX_NTH PTHREAD_THREADS_MAX
972*0b57cec5SDimitry Andric #else
973*0b57cec5SDimitry Andric #define KMP_MAX_NTH INT_MAX
974*0b57cec5SDimitry Andric #endif
975*0b57cec5SDimitry Andric #endif /* KMP_MAX_NTH */
976*0b57cec5SDimitry Andric 
977*0b57cec5SDimitry Andric #ifdef PTHREAD_STACK_MIN
978*0b57cec5SDimitry Andric #define KMP_MIN_STKSIZE PTHREAD_STACK_MIN
979*0b57cec5SDimitry Andric #else
980*0b57cec5SDimitry Andric #define KMP_MIN_STKSIZE ((size_t)(32 * 1024))
981*0b57cec5SDimitry Andric #endif
982*0b57cec5SDimitry Andric 
983*0b57cec5SDimitry Andric #define KMP_MAX_STKSIZE (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
984*0b57cec5SDimitry Andric 
985*0b57cec5SDimitry Andric #if KMP_ARCH_X86
986*0b57cec5SDimitry Andric #define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024))
987*0b57cec5SDimitry Andric #elif KMP_ARCH_X86_64
988*0b57cec5SDimitry Andric #define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))
989*0b57cec5SDimitry Andric #define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024))
990*0b57cec5SDimitry Andric #else
991*0b57cec5SDimitry Andric #define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024))
992*0b57cec5SDimitry Andric #endif
993*0b57cec5SDimitry Andric 
994*0b57cec5SDimitry Andric #define KMP_DEFAULT_MALLOC_POOL_INCR ((size_t)(1024 * 1024))
995*0b57cec5SDimitry Andric #define KMP_MIN_MALLOC_POOL_INCR ((size_t)(4 * 1024))
996*0b57cec5SDimitry Andric #define KMP_MAX_MALLOC_POOL_INCR                                               \
997*0b57cec5SDimitry Andric   (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
998*0b57cec5SDimitry Andric 
999*0b57cec5SDimitry Andric #define KMP_MIN_STKOFFSET (0)
1000*0b57cec5SDimitry Andric #define KMP_MAX_STKOFFSET KMP_MAX_STKSIZE
1001*0b57cec5SDimitry Andric #if KMP_OS_DARWIN
1002*0b57cec5SDimitry Andric #define KMP_DEFAULT_STKOFFSET KMP_MIN_STKOFFSET
1003*0b57cec5SDimitry Andric #else
1004*0b57cec5SDimitry Andric #define KMP_DEFAULT_STKOFFSET CACHE_LINE
1005*0b57cec5SDimitry Andric #endif
1006*0b57cec5SDimitry Andric 
1007*0b57cec5SDimitry Andric #define KMP_MIN_STKPADDING (0)
1008*0b57cec5SDimitry Andric #define KMP_MAX_STKPADDING (2 * 1024 * 1024)
1009*0b57cec5SDimitry Andric 
1010*0b57cec5SDimitry Andric #define KMP_BLOCKTIME_MULTIPLIER                                               \
1011*0b57cec5SDimitry Andric   (1000) /* number of blocktime units per second */
1012*0b57cec5SDimitry Andric #define KMP_MIN_BLOCKTIME (0)
1013*0b57cec5SDimitry Andric #define KMP_MAX_BLOCKTIME                                                      \
1014*0b57cec5SDimitry Andric   (INT_MAX) /* Must be this for "infinite" setting the work */
1015*0b57cec5SDimitry Andric #define KMP_DEFAULT_BLOCKTIME (200) /*  __kmp_blocktime is in milliseconds  */
1016*0b57cec5SDimitry Andric 
1017*0b57cec5SDimitry Andric #if KMP_USE_MONITOR
1018*0b57cec5SDimitry Andric #define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024))
1019*0b57cec5SDimitry Andric #define KMP_MIN_MONITOR_WAKEUPS (1) // min times monitor wakes up per second
1020*0b57cec5SDimitry Andric #define KMP_MAX_MONITOR_WAKEUPS (1000) // max times monitor can wake up per sec
1021*0b57cec5SDimitry Andric 
1022*0b57cec5SDimitry Andric /* Calculate new number of monitor wakeups for a specific block time based on
1023*0b57cec5SDimitry Andric    previous monitor_wakeups. Only allow increasing number of wakeups */
1024*0b57cec5SDimitry Andric #define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups)                 \
1025*0b57cec5SDimitry Andric   (((blocktime) == KMP_MAX_BLOCKTIME)                                          \
1026*0b57cec5SDimitry Andric        ? (monitor_wakeups)                                                     \
1027*0b57cec5SDimitry Andric        : ((blocktime) == KMP_MIN_BLOCKTIME)                                    \
1028*0b57cec5SDimitry Andric              ? KMP_MAX_MONITOR_WAKEUPS                                         \
1029*0b57cec5SDimitry Andric              : ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime)))  \
1030*0b57cec5SDimitry Andric                    ? (monitor_wakeups)                                         \
1031*0b57cec5SDimitry Andric                    : (KMP_BLOCKTIME_MULTIPLIER) / (blocktime))
1032*0b57cec5SDimitry Andric 
1033*0b57cec5SDimitry Andric /* Calculate number of intervals for a specific block time based on
1034*0b57cec5SDimitry Andric    monitor_wakeups */
1035*0b57cec5SDimitry Andric #define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups)               \
1036*0b57cec5SDimitry Andric   (((blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1) /        \
1037*0b57cec5SDimitry Andric    (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)))
1038*0b57cec5SDimitry Andric #else
1039*0b57cec5SDimitry Andric #define KMP_BLOCKTIME(team, tid)                                               \
1040*0b57cec5SDimitry Andric   (get__bt_set(team, tid) ? get__blocktime(team, tid) : __kmp_dflt_blocktime)
1041*0b57cec5SDimitry Andric #if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1042*0b57cec5SDimitry Andric // HW TSC is used to reduce overhead (clock tick instead of nanosecond).
1043*0b57cec5SDimitry Andric extern kmp_uint64 __kmp_ticks_per_msec;
1044*0b57cec5SDimitry Andric #if KMP_COMPILER_ICC
1045*0b57cec5SDimitry Andric #define KMP_NOW() ((kmp_uint64)_rdtsc())
1046*0b57cec5SDimitry Andric #else
1047*0b57cec5SDimitry Andric #define KMP_NOW() __kmp_hardware_timestamp()
1048*0b57cec5SDimitry Andric #endif
1049*0b57cec5SDimitry Andric #define KMP_NOW_MSEC() (KMP_NOW() / __kmp_ticks_per_msec)
1050*0b57cec5SDimitry Andric #define KMP_BLOCKTIME_INTERVAL(team, tid)                                      \
1051*0b57cec5SDimitry Andric   (KMP_BLOCKTIME(team, tid) * __kmp_ticks_per_msec)
1052*0b57cec5SDimitry Andric #define KMP_BLOCKING(goal, count) ((goal) > KMP_NOW())
1053*0b57cec5SDimitry Andric #else
1054*0b57cec5SDimitry Andric // System time is retrieved sporadically while blocking.
1055*0b57cec5SDimitry Andric extern kmp_uint64 __kmp_now_nsec();
1056*0b57cec5SDimitry Andric #define KMP_NOW() __kmp_now_nsec()
1057*0b57cec5SDimitry Andric #define KMP_NOW_MSEC() (KMP_NOW() / KMP_USEC_PER_SEC)
1058*0b57cec5SDimitry Andric #define KMP_BLOCKTIME_INTERVAL(team, tid)                                      \
1059*0b57cec5SDimitry Andric   (KMP_BLOCKTIME(team, tid) * KMP_USEC_PER_SEC)
1060*0b57cec5SDimitry Andric #define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW())
1061*0b57cec5SDimitry Andric #endif
1062*0b57cec5SDimitry Andric #endif // KMP_USE_MONITOR
1063*0b57cec5SDimitry Andric 
1064*0b57cec5SDimitry Andric #define KMP_MIN_STATSCOLS 40
1065*0b57cec5SDimitry Andric #define KMP_MAX_STATSCOLS 4096
1066*0b57cec5SDimitry Andric #define KMP_DEFAULT_STATSCOLS 80
1067*0b57cec5SDimitry Andric 
1068*0b57cec5SDimitry Andric #define KMP_MIN_INTERVAL 0
1069*0b57cec5SDimitry Andric #define KMP_MAX_INTERVAL (INT_MAX - 1)
1070*0b57cec5SDimitry Andric #define KMP_DEFAULT_INTERVAL 0
1071*0b57cec5SDimitry Andric 
1072*0b57cec5SDimitry Andric #define KMP_MIN_CHUNK 1
1073*0b57cec5SDimitry Andric #define KMP_MAX_CHUNK (INT_MAX - 1)
1074*0b57cec5SDimitry Andric #define KMP_DEFAULT_CHUNK 1
1075*0b57cec5SDimitry Andric 
1076*0b57cec5SDimitry Andric #define KMP_DFLT_DISP_NUM_BUFF 7
1077*0b57cec5SDimitry Andric #define KMP_MAX_ORDERED 8
1078*0b57cec5SDimitry Andric 
1079*0b57cec5SDimitry Andric #define KMP_MAX_FIELDS 32
1080*0b57cec5SDimitry Andric 
1081*0b57cec5SDimitry Andric #define KMP_MAX_BRANCH_BITS 31
1082*0b57cec5SDimitry Andric 
1083*0b57cec5SDimitry Andric #define KMP_MAX_ACTIVE_LEVELS_LIMIT INT_MAX
1084*0b57cec5SDimitry Andric 
1085*0b57cec5SDimitry Andric #define KMP_MAX_DEFAULT_DEVICE_LIMIT INT_MAX
1086*0b57cec5SDimitry Andric 
1087*0b57cec5SDimitry Andric #define KMP_MAX_TASK_PRIORITY_LIMIT INT_MAX
1088*0b57cec5SDimitry Andric 
1089*0b57cec5SDimitry Andric /* Minimum number of threads before switch to TLS gtid (experimentally
1090*0b57cec5SDimitry Andric    determined) */
1091*0b57cec5SDimitry Andric /* josh TODO: what about OS X* tuning? */
1092*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1093*0b57cec5SDimitry Andric #define KMP_TLS_GTID_MIN 5
1094*0b57cec5SDimitry Andric #else
1095*0b57cec5SDimitry Andric #define KMP_TLS_GTID_MIN INT_MAX
1096*0b57cec5SDimitry Andric #endif
1097*0b57cec5SDimitry Andric 
1098*0b57cec5SDimitry Andric #define KMP_MASTER_TID(tid) ((tid) == 0)
1099*0b57cec5SDimitry Andric #define KMP_WORKER_TID(tid) ((tid) != 0)
1100*0b57cec5SDimitry Andric 
1101*0b57cec5SDimitry Andric #define KMP_MASTER_GTID(gtid) (__kmp_tid_from_gtid((gtid)) == 0)
1102*0b57cec5SDimitry Andric #define KMP_WORKER_GTID(gtid) (__kmp_tid_from_gtid((gtid)) != 0)
1103*0b57cec5SDimitry Andric #define KMP_INITIAL_GTID(gtid) ((gtid) == 0)
1104*0b57cec5SDimitry Andric 
1105*0b57cec5SDimitry Andric #ifndef TRUE
1106*0b57cec5SDimitry Andric #define FALSE 0
1107*0b57cec5SDimitry Andric #define TRUE (!FALSE)
1108*0b57cec5SDimitry Andric #endif
1109*0b57cec5SDimitry Andric 
1110*0b57cec5SDimitry Andric /* NOTE: all of the following constants must be even */
1111*0b57cec5SDimitry Andric 
1112*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS
1113*0b57cec5SDimitry Andric #define KMP_INIT_WAIT 64U /* initial number of spin-tests   */
1114*0b57cec5SDimitry Andric #define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */
1115*0b57cec5SDimitry Andric #elif KMP_OS_CNK
1116*0b57cec5SDimitry Andric #define KMP_INIT_WAIT 16U /* initial number of spin-tests   */
1117*0b57cec5SDimitry Andric #define KMP_NEXT_WAIT 8U /* susequent number of spin-tests */
1118*0b57cec5SDimitry Andric #elif KMP_OS_LINUX
1119*0b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
1120*0b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1121*0b57cec5SDimitry Andric #elif KMP_OS_DARWIN
1122*0b57cec5SDimitry Andric /* TODO: tune for KMP_OS_DARWIN */
1123*0b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
1124*0b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1125*0b57cec5SDimitry Andric #elif KMP_OS_DRAGONFLY
1126*0b57cec5SDimitry Andric /* TODO: tune for KMP_OS_DRAGONFLY */
1127*0b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
1128*0b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1129*0b57cec5SDimitry Andric #elif KMP_OS_FREEBSD
1130*0b57cec5SDimitry Andric /* TODO: tune for KMP_OS_FREEBSD */
1131*0b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
1132*0b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1133*0b57cec5SDimitry Andric #elif KMP_OS_NETBSD
1134*0b57cec5SDimitry Andric /* TODO: tune for KMP_OS_NETBSD */
1135*0b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
1136*0b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1137*0b57cec5SDimitry Andric #elif KMP_OS_HURD
1138*0b57cec5SDimitry Andric /* TODO: tune for KMP_OS_HURD */
1139*0b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
1140*0b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1141*0b57cec5SDimitry Andric #elif KMP_OS_OPENBSD
1142*0b57cec5SDimitry Andric /* TODO: tune for KMP_OS_OPENBSD */
1143*0b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
1144*0b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1145*0b57cec5SDimitry Andric #endif
1146*0b57cec5SDimitry Andric 
1147*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1148*0b57cec5SDimitry Andric typedef struct kmp_cpuid {
1149*0b57cec5SDimitry Andric   kmp_uint32 eax;
1150*0b57cec5SDimitry Andric   kmp_uint32 ebx;
1151*0b57cec5SDimitry Andric   kmp_uint32 ecx;
1152*0b57cec5SDimitry Andric   kmp_uint32 edx;
1153*0b57cec5SDimitry Andric } kmp_cpuid_t;
1154*0b57cec5SDimitry Andric 
1155*0b57cec5SDimitry Andric typedef struct kmp_cpuinfo {
1156*0b57cec5SDimitry Andric   int initialized; // If 0, other fields are not initialized.
1157*0b57cec5SDimitry Andric   int signature; // CPUID(1).EAX
1158*0b57cec5SDimitry Andric   int family; // CPUID(1).EAX[27:20]+CPUID(1).EAX[11:8] (Extended Family+Family)
1159*0b57cec5SDimitry Andric   int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended
1160*0b57cec5SDimitry Andric   // Model << 4 ) + Model)
1161*0b57cec5SDimitry Andric   int stepping; // CPUID(1).EAX[3:0] ( Stepping )
1162*0b57cec5SDimitry Andric   int sse2; // 0 if SSE2 instructions are not supported, 1 otherwise.
1163*0b57cec5SDimitry Andric   int rtm; // 0 if RTM instructions are not supported, 1 otherwise.
1164*0b57cec5SDimitry Andric   int cpu_stackoffset;
1165*0b57cec5SDimitry Andric   int apic_id;
1166*0b57cec5SDimitry Andric   int physical_id;
1167*0b57cec5SDimitry Andric   int logical_id;
1168*0b57cec5SDimitry Andric   kmp_uint64 frequency; // Nominal CPU frequency in Hz.
1169*0b57cec5SDimitry Andric   char name[3 * sizeof(kmp_cpuid_t)]; // CPUID(0x80000002,0x80000003,0x80000004)
1170*0b57cec5SDimitry Andric } kmp_cpuinfo_t;
1171*0b57cec5SDimitry Andric 
1172*0b57cec5SDimitry Andric extern void __kmp_query_cpuid(kmp_cpuinfo_t *p);
1173*0b57cec5SDimitry Andric 
1174*0b57cec5SDimitry Andric #if KMP_OS_UNIX
1175*0b57cec5SDimitry Andric // subleaf is only needed for cache and topology discovery and can be set to
1176*0b57cec5SDimitry Andric // zero in most cases
1177*0b57cec5SDimitry Andric static inline void __kmp_x86_cpuid(int leaf, int subleaf, struct kmp_cpuid *p) {
1178*0b57cec5SDimitry Andric   __asm__ __volatile__("cpuid"
1179*0b57cec5SDimitry Andric                        : "=a"(p->eax), "=b"(p->ebx), "=c"(p->ecx), "=d"(p->edx)
1180*0b57cec5SDimitry Andric                        : "a"(leaf), "c"(subleaf));
1181*0b57cec5SDimitry Andric }
1182*0b57cec5SDimitry Andric // Load p into FPU control word
1183*0b57cec5SDimitry Andric static inline void __kmp_load_x87_fpu_control_word(const kmp_int16 *p) {
1184*0b57cec5SDimitry Andric   __asm__ __volatile__("fldcw %0" : : "m"(*p));
1185*0b57cec5SDimitry Andric }
1186*0b57cec5SDimitry Andric // Store FPU control word into p
1187*0b57cec5SDimitry Andric static inline void __kmp_store_x87_fpu_control_word(kmp_int16 *p) {
1188*0b57cec5SDimitry Andric   __asm__ __volatile__("fstcw %0" : "=m"(*p));
1189*0b57cec5SDimitry Andric }
1190*0b57cec5SDimitry Andric static inline void __kmp_clear_x87_fpu_status_word() {
1191*0b57cec5SDimitry Andric #if KMP_MIC
1192*0b57cec5SDimitry Andric   // 32-bit protected mode x87 FPU state
1193*0b57cec5SDimitry Andric   struct x87_fpu_state {
1194*0b57cec5SDimitry Andric     unsigned cw;
1195*0b57cec5SDimitry Andric     unsigned sw;
1196*0b57cec5SDimitry Andric     unsigned tw;
1197*0b57cec5SDimitry Andric     unsigned fip;
1198*0b57cec5SDimitry Andric     unsigned fips;
1199*0b57cec5SDimitry Andric     unsigned fdp;
1200*0b57cec5SDimitry Andric     unsigned fds;
1201*0b57cec5SDimitry Andric   };
1202*0b57cec5SDimitry Andric   struct x87_fpu_state fpu_state = {0, 0, 0, 0, 0, 0, 0};
1203*0b57cec5SDimitry Andric   __asm__ __volatile__("fstenv %0\n\t" // store FP env
1204*0b57cec5SDimitry Andric                        "andw $0x7f00, %1\n\t" // clear 0-7,15 bits of FP SW
1205*0b57cec5SDimitry Andric                        "fldenv %0\n\t" // load FP env back
1206*0b57cec5SDimitry Andric                        : "+m"(fpu_state), "+m"(fpu_state.sw));
1207*0b57cec5SDimitry Andric #else
1208*0b57cec5SDimitry Andric   __asm__ __volatile__("fnclex");
1209*0b57cec5SDimitry Andric #endif // KMP_MIC
1210*0b57cec5SDimitry Andric }
1211*0b57cec5SDimitry Andric #if __SSE__
1212*0b57cec5SDimitry Andric static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); }
1213*0b57cec5SDimitry Andric static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
1214*0b57cec5SDimitry Andric #else
1215*0b57cec5SDimitry Andric static inline void __kmp_load_mxcsr(const kmp_uint32 *p) {}
1216*0b57cec5SDimitry Andric static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = 0; }
1217*0b57cec5SDimitry Andric #endif
1218*0b57cec5SDimitry Andric #else
1219*0b57cec5SDimitry Andric // Windows still has these as external functions in assembly file
1220*0b57cec5SDimitry Andric extern void __kmp_x86_cpuid(int mode, int mode2, struct kmp_cpuid *p);
1221*0b57cec5SDimitry Andric extern void __kmp_load_x87_fpu_control_word(const kmp_int16 *p);
1222*0b57cec5SDimitry Andric extern void __kmp_store_x87_fpu_control_word(kmp_int16 *p);
1223*0b57cec5SDimitry Andric extern void __kmp_clear_x87_fpu_status_word();
1224*0b57cec5SDimitry Andric static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); }
1225*0b57cec5SDimitry Andric static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
1226*0b57cec5SDimitry Andric #endif // KMP_OS_UNIX
1227*0b57cec5SDimitry Andric 
1228*0b57cec5SDimitry Andric #define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */
1229*0b57cec5SDimitry Andric 
1230*0b57cec5SDimitry Andric #if KMP_ARCH_X86
1231*0b57cec5SDimitry Andric extern void __kmp_x86_pause(void);
1232*0b57cec5SDimitry Andric #elif KMP_MIC
1233*0b57cec5SDimitry Andric // Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed
1234*0b57cec5SDimitry Andric // regression after removal of extra PAUSE from spin loops. Changing
1235*0b57cec5SDimitry Andric // the delay from 100 to 300 showed even better performance than double PAUSE
1236*0b57cec5SDimitry Andric // on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC.
1237*0b57cec5SDimitry Andric static inline void __kmp_x86_pause(void) { _mm_delay_32(300); }
1238*0b57cec5SDimitry Andric #else
1239*0b57cec5SDimitry Andric static inline void __kmp_x86_pause(void) { _mm_pause(); }
1240*0b57cec5SDimitry Andric #endif
1241*0b57cec5SDimitry Andric #define KMP_CPU_PAUSE() __kmp_x86_pause()
1242*0b57cec5SDimitry Andric #elif KMP_ARCH_PPC64
1243*0b57cec5SDimitry Andric #define KMP_PPC64_PRI_LOW() __asm__ volatile("or 1, 1, 1")
1244*0b57cec5SDimitry Andric #define KMP_PPC64_PRI_MED() __asm__ volatile("or 2, 2, 2")
1245*0b57cec5SDimitry Andric #define KMP_PPC64_PRI_LOC_MB() __asm__ volatile("" : : : "memory")
1246*0b57cec5SDimitry Andric #define KMP_CPU_PAUSE()                                                        \
1247*0b57cec5SDimitry Andric   do {                                                                         \
1248*0b57cec5SDimitry Andric     KMP_PPC64_PRI_LOW();                                                       \
1249*0b57cec5SDimitry Andric     KMP_PPC64_PRI_MED();                                                       \
1250*0b57cec5SDimitry Andric     KMP_PPC64_PRI_LOC_MB();                                                    \
1251*0b57cec5SDimitry Andric   } while (0)
1252*0b57cec5SDimitry Andric #else
1253*0b57cec5SDimitry Andric #define KMP_CPU_PAUSE() /* nothing to do */
1254*0b57cec5SDimitry Andric #endif
1255*0b57cec5SDimitry Andric 
1256*0b57cec5SDimitry Andric #define KMP_INIT_YIELD(count)                                                  \
1257*0b57cec5SDimitry Andric   { (count) = __kmp_yield_init; }
1258*0b57cec5SDimitry Andric 
1259*0b57cec5SDimitry Andric #define KMP_OVERSUBSCRIBED                                                     \
1260*0b57cec5SDimitry Andric   (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))
1261*0b57cec5SDimitry Andric 
1262*0b57cec5SDimitry Andric #define KMP_TRY_YIELD                                                          \
1263*0b57cec5SDimitry Andric   ((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (KMP_OVERSUBSCRIBED)))
1264*0b57cec5SDimitry Andric 
1265*0b57cec5SDimitry Andric #define KMP_TRY_YIELD_OVERSUB                                                  \
1266*0b57cec5SDimitry Andric   ((__kmp_use_yield == 1 || __kmp_use_yield == 2) && (KMP_OVERSUBSCRIBED))
1267*0b57cec5SDimitry Andric 
1268*0b57cec5SDimitry Andric #define KMP_YIELD(cond)                                                        \
1269*0b57cec5SDimitry Andric   {                                                                            \
1270*0b57cec5SDimitry Andric     KMP_CPU_PAUSE();                                                           \
1271*0b57cec5SDimitry Andric     if ((cond) && (KMP_TRY_YIELD))                                             \
1272*0b57cec5SDimitry Andric       __kmp_yield();                                                           \
1273*0b57cec5SDimitry Andric   }
1274*0b57cec5SDimitry Andric 
1275*0b57cec5SDimitry Andric #define KMP_YIELD_OVERSUB()                                                    \
1276*0b57cec5SDimitry Andric   {                                                                            \
1277*0b57cec5SDimitry Andric     KMP_CPU_PAUSE();                                                           \
1278*0b57cec5SDimitry Andric     if ((KMP_TRY_YIELD_OVERSUB))                                               \
1279*0b57cec5SDimitry Andric       __kmp_yield();                                                           \
1280*0b57cec5SDimitry Andric   }
1281*0b57cec5SDimitry Andric 
1282*0b57cec5SDimitry Andric // Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround,
1283*0b57cec5SDimitry Andric // there should be no yielding since initial value from KMP_INIT_YIELD() is odd.
1284*0b57cec5SDimitry Andric #define KMP_YIELD_SPIN(count)                                                  \
1285*0b57cec5SDimitry Andric   {                                                                            \
1286*0b57cec5SDimitry Andric     KMP_CPU_PAUSE();                                                           \
1287*0b57cec5SDimitry Andric     if (KMP_TRY_YIELD) {                                                       \
1288*0b57cec5SDimitry Andric       (count) -= 2;                                                            \
1289*0b57cec5SDimitry Andric       if (!(count)) {                                                          \
1290*0b57cec5SDimitry Andric         __kmp_yield();                                                         \
1291*0b57cec5SDimitry Andric         (count) = __kmp_yield_next;                                            \
1292*0b57cec5SDimitry Andric       }                                                                        \
1293*0b57cec5SDimitry Andric     }                                                                          \
1294*0b57cec5SDimitry Andric   }
1295*0b57cec5SDimitry Andric 
1296*0b57cec5SDimitry Andric #define KMP_YIELD_OVERSUB_ELSE_SPIN(count)                                     \
1297*0b57cec5SDimitry Andric   {                                                                            \
1298*0b57cec5SDimitry Andric     KMP_CPU_PAUSE();                                                           \
1299*0b57cec5SDimitry Andric     if ((KMP_TRY_YIELD_OVERSUB))                                               \
1300*0b57cec5SDimitry Andric       __kmp_yield();                                                           \
1301*0b57cec5SDimitry Andric     else if (__kmp_use_yield == 1) {                                           \
1302*0b57cec5SDimitry Andric       (count) -= 2;                                                            \
1303*0b57cec5SDimitry Andric       if (!(count)) {                                                          \
1304*0b57cec5SDimitry Andric         __kmp_yield();                                                         \
1305*0b57cec5SDimitry Andric         (count) = __kmp_yield_next;                                            \
1306*0b57cec5SDimitry Andric       }                                                                        \
1307*0b57cec5SDimitry Andric     }                                                                          \
1308*0b57cec5SDimitry Andric   }
1309*0b57cec5SDimitry Andric 
1310*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
1311*0b57cec5SDimitry Andric /* Support datatypes for the orphaned construct nesting checks.             */
1312*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
1313*0b57cec5SDimitry Andric 
1314*0b57cec5SDimitry Andric enum cons_type {
1315*0b57cec5SDimitry Andric   ct_none,
1316*0b57cec5SDimitry Andric   ct_parallel,
1317*0b57cec5SDimitry Andric   ct_pdo,
1318*0b57cec5SDimitry Andric   ct_pdo_ordered,
1319*0b57cec5SDimitry Andric   ct_psections,
1320*0b57cec5SDimitry Andric   ct_psingle,
1321*0b57cec5SDimitry Andric   ct_critical,
1322*0b57cec5SDimitry Andric   ct_ordered_in_parallel,
1323*0b57cec5SDimitry Andric   ct_ordered_in_pdo,
1324*0b57cec5SDimitry Andric   ct_master,
1325*0b57cec5SDimitry Andric   ct_reduce,
1326*0b57cec5SDimitry Andric   ct_barrier
1327*0b57cec5SDimitry Andric };
1328*0b57cec5SDimitry Andric 
1329*0b57cec5SDimitry Andric #define IS_CONS_TYPE_ORDERED(ct) ((ct) == ct_pdo_ordered)
1330*0b57cec5SDimitry Andric 
1331*0b57cec5SDimitry Andric struct cons_data {
1332*0b57cec5SDimitry Andric   ident_t const *ident;
1333*0b57cec5SDimitry Andric   enum cons_type type;
1334*0b57cec5SDimitry Andric   int prev;
1335*0b57cec5SDimitry Andric   kmp_user_lock_p
1336*0b57cec5SDimitry Andric       name; /* address exclusively for critical section name comparison */
1337*0b57cec5SDimitry Andric };
1338*0b57cec5SDimitry Andric 
1339*0b57cec5SDimitry Andric struct cons_header {
1340*0b57cec5SDimitry Andric   int p_top, w_top, s_top;
1341*0b57cec5SDimitry Andric   int stack_size, stack_top;
1342*0b57cec5SDimitry Andric   struct cons_data *stack_data;
1343*0b57cec5SDimitry Andric };
1344*0b57cec5SDimitry Andric 
1345*0b57cec5SDimitry Andric struct kmp_region_info {
1346*0b57cec5SDimitry Andric   char *text;
1347*0b57cec5SDimitry Andric   int offset[KMP_MAX_FIELDS];
1348*0b57cec5SDimitry Andric   int length[KMP_MAX_FIELDS];
1349*0b57cec5SDimitry Andric };
1350*0b57cec5SDimitry Andric 
1351*0b57cec5SDimitry Andric /* ---------------------------------------------------------------------- */
1352*0b57cec5SDimitry Andric /* ---------------------------------------------------------------------- */
1353*0b57cec5SDimitry Andric 
1354*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS
1355*0b57cec5SDimitry Andric typedef HANDLE kmp_thread_t;
1356*0b57cec5SDimitry Andric typedef DWORD kmp_key_t;
1357*0b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
1358*0b57cec5SDimitry Andric 
1359*0b57cec5SDimitry Andric #if KMP_OS_UNIX
1360*0b57cec5SDimitry Andric typedef pthread_t kmp_thread_t;
1361*0b57cec5SDimitry Andric typedef pthread_key_t kmp_key_t;
1362*0b57cec5SDimitry Andric #endif
1363*0b57cec5SDimitry Andric 
1364*0b57cec5SDimitry Andric extern kmp_key_t __kmp_gtid_threadprivate_key;
1365*0b57cec5SDimitry Andric 
1366*0b57cec5SDimitry Andric typedef struct kmp_sys_info {
1367*0b57cec5SDimitry Andric   long maxrss; /* the maximum resident set size utilized (in kilobytes)     */
1368*0b57cec5SDimitry Andric   long minflt; /* the number of page faults serviced without any I/O        */
1369*0b57cec5SDimitry Andric   long majflt; /* the number of page faults serviced that required I/O      */
1370*0b57cec5SDimitry Andric   long nswap; /* the number of times a process was "swapped" out of memory */
1371*0b57cec5SDimitry Andric   long inblock; /* the number of times the file system had to perform input  */
1372*0b57cec5SDimitry Andric   long oublock; /* the number of times the file system had to perform output */
1373*0b57cec5SDimitry Andric   long nvcsw; /* the number of times a context switch was voluntarily      */
1374*0b57cec5SDimitry Andric   long nivcsw; /* the number of times a context switch was forced           */
1375*0b57cec5SDimitry Andric } kmp_sys_info_t;
1376*0b57cec5SDimitry Andric 
1377*0b57cec5SDimitry Andric #if USE_ITT_BUILD
1378*0b57cec5SDimitry Andric // We cannot include "kmp_itt.h" due to circular dependency. Declare the only
1379*0b57cec5SDimitry Andric // required type here. Later we will check the type meets requirements.
1380*0b57cec5SDimitry Andric typedef int kmp_itt_mark_t;
1381*0b57cec5SDimitry Andric #define KMP_ITT_DEBUG 0
1382*0b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
1383*0b57cec5SDimitry Andric 
1384*0b57cec5SDimitry Andric typedef kmp_int32 kmp_critical_name[8];
1385*0b57cec5SDimitry Andric 
1386*0b57cec5SDimitry Andric /*!
1387*0b57cec5SDimitry Andric @ingroup PARALLEL
1388*0b57cec5SDimitry Andric The type for a microtask which gets passed to @ref __kmpc_fork_call().
1389*0b57cec5SDimitry Andric The arguments to the outlined function are
1390*0b57cec5SDimitry Andric @param global_tid the global thread identity of the thread executing the
1391*0b57cec5SDimitry Andric function.
1392*0b57cec5SDimitry Andric @param bound_tid  the local identitiy of the thread executing the function
1393*0b57cec5SDimitry Andric @param ... pointers to shared variables accessed by the function.
1394*0b57cec5SDimitry Andric */
1395*0b57cec5SDimitry Andric typedef void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...);
1396*0b57cec5SDimitry Andric typedef void (*kmpc_micro_bound)(kmp_int32 *bound_tid, kmp_int32 *bound_nth,
1397*0b57cec5SDimitry Andric                                  ...);
1398*0b57cec5SDimitry Andric 
1399*0b57cec5SDimitry Andric /*!
1400*0b57cec5SDimitry Andric @ingroup THREADPRIVATE
1401*0b57cec5SDimitry Andric @{
1402*0b57cec5SDimitry Andric */
1403*0b57cec5SDimitry Andric /* ---------------------------------------------------------------------------
1404*0b57cec5SDimitry Andric  */
1405*0b57cec5SDimitry Andric /* Threadprivate initialization/finalization function declarations */
1406*0b57cec5SDimitry Andric 
1407*0b57cec5SDimitry Andric /*  for non-array objects:  __kmpc_threadprivate_register()  */
1408*0b57cec5SDimitry Andric 
1409*0b57cec5SDimitry Andric /*!
1410*0b57cec5SDimitry Andric  Pointer to the constructor function.
1411*0b57cec5SDimitry Andric  The first argument is the <tt>this</tt> pointer
1412*0b57cec5SDimitry Andric */
1413*0b57cec5SDimitry Andric typedef void *(*kmpc_ctor)(void *);
1414*0b57cec5SDimitry Andric 
1415*0b57cec5SDimitry Andric /*!
1416*0b57cec5SDimitry Andric  Pointer to the destructor function.
1417*0b57cec5SDimitry Andric  The first argument is the <tt>this</tt> pointer
1418*0b57cec5SDimitry Andric */
1419*0b57cec5SDimitry Andric typedef void (*kmpc_dtor)(
1420*0b57cec5SDimitry Andric     void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel
1421*0b57cec5SDimitry Andric                               compiler */
1422*0b57cec5SDimitry Andric /*!
1423*0b57cec5SDimitry Andric  Pointer to an alternate constructor.
1424*0b57cec5SDimitry Andric  The first argument is the <tt>this</tt> pointer.
1425*0b57cec5SDimitry Andric */
1426*0b57cec5SDimitry Andric typedef void *(*kmpc_cctor)(void *, void *);
1427*0b57cec5SDimitry Andric 
1428*0b57cec5SDimitry Andric /* for array objects: __kmpc_threadprivate_register_vec() */
1429*0b57cec5SDimitry Andric /* First arg: "this" pointer */
1430*0b57cec5SDimitry Andric /* Last arg: number of array elements */
1431*0b57cec5SDimitry Andric /*!
1432*0b57cec5SDimitry Andric  Array constructor.
1433*0b57cec5SDimitry Andric  First argument is the <tt>this</tt> pointer
1434*0b57cec5SDimitry Andric  Second argument the number of array elements.
1435*0b57cec5SDimitry Andric */
1436*0b57cec5SDimitry Andric typedef void *(*kmpc_ctor_vec)(void *, size_t);
1437*0b57cec5SDimitry Andric /*!
1438*0b57cec5SDimitry Andric  Pointer to the array destructor function.
1439*0b57cec5SDimitry Andric  The first argument is the <tt>this</tt> pointer
1440*0b57cec5SDimitry Andric  Second argument the number of array elements.
1441*0b57cec5SDimitry Andric */
1442*0b57cec5SDimitry Andric typedef void (*kmpc_dtor_vec)(void *, size_t);
1443*0b57cec5SDimitry Andric /*!
1444*0b57cec5SDimitry Andric  Array constructor.
1445*0b57cec5SDimitry Andric  First argument is the <tt>this</tt> pointer
1446*0b57cec5SDimitry Andric  Third argument the number of array elements.
1447*0b57cec5SDimitry Andric */
1448*0b57cec5SDimitry Andric typedef void *(*kmpc_cctor_vec)(void *, void *,
1449*0b57cec5SDimitry Andric                                 size_t); /* function unused by compiler */
1450*0b57cec5SDimitry Andric 
1451*0b57cec5SDimitry Andric /*!
1452*0b57cec5SDimitry Andric @}
1453*0b57cec5SDimitry Andric */
1454*0b57cec5SDimitry Andric 
1455*0b57cec5SDimitry Andric /* keeps tracked of threadprivate cache allocations for cleanup later */
1456*0b57cec5SDimitry Andric typedef struct kmp_cached_addr {
1457*0b57cec5SDimitry Andric   void **addr; /* address of allocated cache */
1458*0b57cec5SDimitry Andric   void ***compiler_cache; /* pointer to compiler's cache */
1459*0b57cec5SDimitry Andric   void *data; /* pointer to global data */
1460*0b57cec5SDimitry Andric   struct kmp_cached_addr *next; /* pointer to next cached address */
1461*0b57cec5SDimitry Andric } kmp_cached_addr_t;
1462*0b57cec5SDimitry Andric 
1463*0b57cec5SDimitry Andric struct private_data {
1464*0b57cec5SDimitry Andric   struct private_data *next; /* The next descriptor in the list      */
1465*0b57cec5SDimitry Andric   void *data; /* The data buffer for this descriptor  */
1466*0b57cec5SDimitry Andric   int more; /* The repeat count for this descriptor */
1467*0b57cec5SDimitry Andric   size_t size; /* The data size for this descriptor    */
1468*0b57cec5SDimitry Andric };
1469*0b57cec5SDimitry Andric 
1470*0b57cec5SDimitry Andric struct private_common {
1471*0b57cec5SDimitry Andric   struct private_common *next;
1472*0b57cec5SDimitry Andric   struct private_common *link;
1473*0b57cec5SDimitry Andric   void *gbl_addr;
1474*0b57cec5SDimitry Andric   void *par_addr; /* par_addr == gbl_addr for MASTER thread */
1475*0b57cec5SDimitry Andric   size_t cmn_size;
1476*0b57cec5SDimitry Andric };
1477*0b57cec5SDimitry Andric 
1478*0b57cec5SDimitry Andric struct shared_common {
1479*0b57cec5SDimitry Andric   struct shared_common *next;
1480*0b57cec5SDimitry Andric   struct private_data *pod_init;
1481*0b57cec5SDimitry Andric   void *obj_init;
1482*0b57cec5SDimitry Andric   void *gbl_addr;
1483*0b57cec5SDimitry Andric   union {
1484*0b57cec5SDimitry Andric     kmpc_ctor ctor;
1485*0b57cec5SDimitry Andric     kmpc_ctor_vec ctorv;
1486*0b57cec5SDimitry Andric   } ct;
1487*0b57cec5SDimitry Andric   union {
1488*0b57cec5SDimitry Andric     kmpc_cctor cctor;
1489*0b57cec5SDimitry Andric     kmpc_cctor_vec cctorv;
1490*0b57cec5SDimitry Andric   } cct;
1491*0b57cec5SDimitry Andric   union {
1492*0b57cec5SDimitry Andric     kmpc_dtor dtor;
1493*0b57cec5SDimitry Andric     kmpc_dtor_vec dtorv;
1494*0b57cec5SDimitry Andric   } dt;
1495*0b57cec5SDimitry Andric   size_t vec_len;
1496*0b57cec5SDimitry Andric   int is_vec;
1497*0b57cec5SDimitry Andric   size_t cmn_size;
1498*0b57cec5SDimitry Andric };
1499*0b57cec5SDimitry Andric 
1500*0b57cec5SDimitry Andric #define KMP_HASH_TABLE_LOG2 9 /* log2 of the hash table size */
1501*0b57cec5SDimitry Andric #define KMP_HASH_TABLE_SIZE                                                    \
1502*0b57cec5SDimitry Andric   (1 << KMP_HASH_TABLE_LOG2) /* size of the hash table */
1503*0b57cec5SDimitry Andric #define KMP_HASH_SHIFT 3 /* throw away this many low bits from the address */
1504*0b57cec5SDimitry Andric #define KMP_HASH(x)                                                            \
1505*0b57cec5SDimitry Andric   ((((kmp_uintptr_t)x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE - 1))
1506*0b57cec5SDimitry Andric 
1507*0b57cec5SDimitry Andric struct common_table {
1508*0b57cec5SDimitry Andric   struct private_common *data[KMP_HASH_TABLE_SIZE];
1509*0b57cec5SDimitry Andric };
1510*0b57cec5SDimitry Andric 
1511*0b57cec5SDimitry Andric struct shared_table {
1512*0b57cec5SDimitry Andric   struct shared_common *data[KMP_HASH_TABLE_SIZE];
1513*0b57cec5SDimitry Andric };
1514*0b57cec5SDimitry Andric 
1515*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
1516*0b57cec5SDimitry Andric 
1517*0b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
1518*0b57cec5SDimitry Andric // Shared barrier data that exists inside a single unit of the scheduling
1519*0b57cec5SDimitry Andric // hierarchy
1520*0b57cec5SDimitry Andric typedef struct kmp_hier_private_bdata_t {
1521*0b57cec5SDimitry Andric   kmp_int32 num_active;
1522*0b57cec5SDimitry Andric   kmp_uint64 index;
1523*0b57cec5SDimitry Andric   kmp_uint64 wait_val[2];
1524*0b57cec5SDimitry Andric } kmp_hier_private_bdata_t;
1525*0b57cec5SDimitry Andric #endif
1526*0b57cec5SDimitry Andric 
1527*0b57cec5SDimitry Andric typedef struct kmp_sched_flags {
1528*0b57cec5SDimitry Andric   unsigned ordered : 1;
1529*0b57cec5SDimitry Andric   unsigned nomerge : 1;
1530*0b57cec5SDimitry Andric   unsigned contains_last : 1;
1531*0b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
1532*0b57cec5SDimitry Andric   unsigned use_hier : 1;
1533*0b57cec5SDimitry Andric   unsigned unused : 28;
1534*0b57cec5SDimitry Andric #else
1535*0b57cec5SDimitry Andric   unsigned unused : 29;
1536*0b57cec5SDimitry Andric #endif
1537*0b57cec5SDimitry Andric } kmp_sched_flags_t;
1538*0b57cec5SDimitry Andric 
1539*0b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(kmp_sched_flags_t) == 4);
1540*0b57cec5SDimitry Andric 
1541*0b57cec5SDimitry Andric #if KMP_STATIC_STEAL_ENABLED
1542*0b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
1543*0b57cec5SDimitry Andric   kmp_int32 count;
1544*0b57cec5SDimitry Andric   kmp_int32 ub;
1545*0b57cec5SDimitry Andric   /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
1546*0b57cec5SDimitry Andric   kmp_int32 lb;
1547*0b57cec5SDimitry Andric   kmp_int32 st;
1548*0b57cec5SDimitry Andric   kmp_int32 tc;
1549*0b57cec5SDimitry Andric   kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put
1550*0b57cec5SDimitry Andric                                      after ub */
1551*0b57cec5SDimitry Andric 
1552*0b57cec5SDimitry Andric   // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on )
1553*0b57cec5SDimitry Andric   //    a) parm3 is properly aligned and
1554*0b57cec5SDimitry Andric   //    b) all parm1-4 are in the same cache line.
1555*0b57cec5SDimitry Andric   // Because of parm1-4 are used together, performance seems to be better
1556*0b57cec5SDimitry Andric   // if they are in the same line (not measured though).
1557*0b57cec5SDimitry Andric 
1558*0b57cec5SDimitry Andric   struct KMP_ALIGN(32) { // AC: changed 16 to 32 in order to simplify template
1559*0b57cec5SDimitry Andric     kmp_int32 parm1; //     structures in kmp_dispatch.cpp. This should
1560*0b57cec5SDimitry Andric     kmp_int32 parm2; //     make no real change at least while padding is off.
1561*0b57cec5SDimitry Andric     kmp_int32 parm3;
1562*0b57cec5SDimitry Andric     kmp_int32 parm4;
1563*0b57cec5SDimitry Andric   };
1564*0b57cec5SDimitry Andric 
1565*0b57cec5SDimitry Andric   kmp_uint32 ordered_lower;
1566*0b57cec5SDimitry Andric   kmp_uint32 ordered_upper;
1567*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS
1568*0b57cec5SDimitry Andric   // This var can be placed in the hole between 'tc' and 'parm1', instead of
1569*0b57cec5SDimitry Andric   // 'static_steal_counter'. It would be nice to measure execution times.
1570*0b57cec5SDimitry Andric   // Conditional if/endif can be removed at all.
1571*0b57cec5SDimitry Andric   kmp_int32 last_upper;
1572*0b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
1573*0b57cec5SDimitry Andric } dispatch_private_info32_t;
1574*0b57cec5SDimitry Andric 
1575*0b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
1576*0b57cec5SDimitry Andric   kmp_int64 count; // current chunk number for static & static-steal scheduling
1577*0b57cec5SDimitry Andric   kmp_int64 ub; /* upper-bound */
1578*0b57cec5SDimitry Andric   /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
1579*0b57cec5SDimitry Andric   kmp_int64 lb; /* lower-bound */
1580*0b57cec5SDimitry Andric   kmp_int64 st; /* stride */
1581*0b57cec5SDimitry Andric   kmp_int64 tc; /* trip count (number of iterations) */
1582*0b57cec5SDimitry Andric   kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put
1583*0b57cec5SDimitry Andric                                      after ub */
1584*0b57cec5SDimitry Andric 
1585*0b57cec5SDimitry Andric   /* parm[1-4] are used in different ways by different scheduling algorithms */
1586*0b57cec5SDimitry Andric 
1587*0b57cec5SDimitry Andric   // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on )
1588*0b57cec5SDimitry Andric   //    a) parm3 is properly aligned and
1589*0b57cec5SDimitry Andric   //    b) all parm1-4 are in the same cache line.
1590*0b57cec5SDimitry Andric   // Because of parm1-4 are used together, performance seems to be better
1591*0b57cec5SDimitry Andric   // if they are in the same line (not measured though).
1592*0b57cec5SDimitry Andric 
1593*0b57cec5SDimitry Andric   struct KMP_ALIGN(32) {
1594*0b57cec5SDimitry Andric     kmp_int64 parm1;
1595*0b57cec5SDimitry Andric     kmp_int64 parm2;
1596*0b57cec5SDimitry Andric     kmp_int64 parm3;
1597*0b57cec5SDimitry Andric     kmp_int64 parm4;
1598*0b57cec5SDimitry Andric   };
1599*0b57cec5SDimitry Andric 
1600*0b57cec5SDimitry Andric   kmp_uint64 ordered_lower;
1601*0b57cec5SDimitry Andric   kmp_uint64 ordered_upper;
1602*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS
1603*0b57cec5SDimitry Andric   // This var can be placed in the hole between 'tc' and 'parm1', instead of
1604*0b57cec5SDimitry Andric   // 'static_steal_counter'. It would be nice to measure execution times.
1605*0b57cec5SDimitry Andric   // Conditional if/endif can be removed at all.
1606*0b57cec5SDimitry Andric   kmp_int64 last_upper;
1607*0b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
1608*0b57cec5SDimitry Andric } dispatch_private_info64_t;
1609*0b57cec5SDimitry Andric #else /* KMP_STATIC_STEAL_ENABLED */
1610*0b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
1611*0b57cec5SDimitry Andric   kmp_int32 lb;
1612*0b57cec5SDimitry Andric   kmp_int32 ub;
1613*0b57cec5SDimitry Andric   kmp_int32 st;
1614*0b57cec5SDimitry Andric   kmp_int32 tc;
1615*0b57cec5SDimitry Andric 
1616*0b57cec5SDimitry Andric   kmp_int32 parm1;
1617*0b57cec5SDimitry Andric   kmp_int32 parm2;
1618*0b57cec5SDimitry Andric   kmp_int32 parm3;
1619*0b57cec5SDimitry Andric   kmp_int32 parm4;
1620*0b57cec5SDimitry Andric 
1621*0b57cec5SDimitry Andric   kmp_int32 count;
1622*0b57cec5SDimitry Andric 
1623*0b57cec5SDimitry Andric   kmp_uint32 ordered_lower;
1624*0b57cec5SDimitry Andric   kmp_uint32 ordered_upper;
1625*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS
1626*0b57cec5SDimitry Andric   kmp_int32 last_upper;
1627*0b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
1628*0b57cec5SDimitry Andric } dispatch_private_info32_t;
1629*0b57cec5SDimitry Andric 
1630*0b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
1631*0b57cec5SDimitry Andric   kmp_int64 lb; /* lower-bound */
1632*0b57cec5SDimitry Andric   kmp_int64 ub; /* upper-bound */
1633*0b57cec5SDimitry Andric   kmp_int64 st; /* stride */
1634*0b57cec5SDimitry Andric   kmp_int64 tc; /* trip count (number of iterations) */
1635*0b57cec5SDimitry Andric 
1636*0b57cec5SDimitry Andric   /* parm[1-4] are used in different ways by different scheduling algorithms */
1637*0b57cec5SDimitry Andric   kmp_int64 parm1;
1638*0b57cec5SDimitry Andric   kmp_int64 parm2;
1639*0b57cec5SDimitry Andric   kmp_int64 parm3;
1640*0b57cec5SDimitry Andric   kmp_int64 parm4;
1641*0b57cec5SDimitry Andric 
1642*0b57cec5SDimitry Andric   kmp_int64 count; /* current chunk number for static scheduling */
1643*0b57cec5SDimitry Andric 
1644*0b57cec5SDimitry Andric   kmp_uint64 ordered_lower;
1645*0b57cec5SDimitry Andric   kmp_uint64 ordered_upper;
1646*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS
1647*0b57cec5SDimitry Andric   kmp_int64 last_upper;
1648*0b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
1649*0b57cec5SDimitry Andric } dispatch_private_info64_t;
1650*0b57cec5SDimitry Andric #endif /* KMP_STATIC_STEAL_ENABLED */
1651*0b57cec5SDimitry Andric 
1652*0b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE dispatch_private_info {
1653*0b57cec5SDimitry Andric   union private_info {
1654*0b57cec5SDimitry Andric     dispatch_private_info32_t p32;
1655*0b57cec5SDimitry Andric     dispatch_private_info64_t p64;
1656*0b57cec5SDimitry Andric   } u;
1657*0b57cec5SDimitry Andric   enum sched_type schedule; /* scheduling algorithm */
1658*0b57cec5SDimitry Andric   kmp_sched_flags_t flags; /* flags (e.g., ordered, nomerge, etc.) */
1659*0b57cec5SDimitry Andric   kmp_int32 ordered_bumped;
1660*0b57cec5SDimitry Andric   // To retain the structure size after making ordered_iteration scalar
1661*0b57cec5SDimitry Andric   kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 3];
1662*0b57cec5SDimitry Andric   // Stack of buffers for nest of serial regions
1663*0b57cec5SDimitry Andric   struct dispatch_private_info *next;
1664*0b57cec5SDimitry Andric   kmp_int32 type_size; /* the size of types in private_info */
1665*0b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
1666*0b57cec5SDimitry Andric   kmp_int32 hier_id;
1667*0b57cec5SDimitry Andric   void *parent; /* hierarchical scheduling parent pointer */
1668*0b57cec5SDimitry Andric #endif
1669*0b57cec5SDimitry Andric   enum cons_type pushed_ws;
1670*0b57cec5SDimitry Andric } dispatch_private_info_t;
1671*0b57cec5SDimitry Andric 
1672*0b57cec5SDimitry Andric typedef struct dispatch_shared_info32 {
1673*0b57cec5SDimitry Andric   /* chunk index under dynamic, number of idle threads under static-steal;
1674*0b57cec5SDimitry Andric      iteration index otherwise */
1675*0b57cec5SDimitry Andric   volatile kmp_uint32 iteration;
1676*0b57cec5SDimitry Andric   volatile kmp_uint32 num_done;
1677*0b57cec5SDimitry Andric   volatile kmp_uint32 ordered_iteration;
1678*0b57cec5SDimitry Andric   // Dummy to retain the structure size after making ordered_iteration scalar
1679*0b57cec5SDimitry Andric   kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 1];
1680*0b57cec5SDimitry Andric } dispatch_shared_info32_t;
1681*0b57cec5SDimitry Andric 
1682*0b57cec5SDimitry Andric typedef struct dispatch_shared_info64 {
1683*0b57cec5SDimitry Andric   /* chunk index under dynamic, number of idle threads under static-steal;
1684*0b57cec5SDimitry Andric      iteration index otherwise */
1685*0b57cec5SDimitry Andric   volatile kmp_uint64 iteration;
1686*0b57cec5SDimitry Andric   volatile kmp_uint64 num_done;
1687*0b57cec5SDimitry Andric   volatile kmp_uint64 ordered_iteration;
1688*0b57cec5SDimitry Andric   // Dummy to retain the structure size after making ordered_iteration scalar
1689*0b57cec5SDimitry Andric   kmp_int64 ordered_dummy[KMP_MAX_ORDERED - 3];
1690*0b57cec5SDimitry Andric } dispatch_shared_info64_t;
1691*0b57cec5SDimitry Andric 
1692*0b57cec5SDimitry Andric typedef struct dispatch_shared_info {
1693*0b57cec5SDimitry Andric   union shared_info {
1694*0b57cec5SDimitry Andric     dispatch_shared_info32_t s32;
1695*0b57cec5SDimitry Andric     dispatch_shared_info64_t s64;
1696*0b57cec5SDimitry Andric   } u;
1697*0b57cec5SDimitry Andric   volatile kmp_uint32 buffer_index;
1698*0b57cec5SDimitry Andric   volatile kmp_int32 doacross_buf_idx; // teamwise index
1699*0b57cec5SDimitry Andric   volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1)
1700*0b57cec5SDimitry Andric   kmp_int32 doacross_num_done; // count finished threads
1701*0b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
1702*0b57cec5SDimitry Andric   void *hier;
1703*0b57cec5SDimitry Andric #endif
1704*0b57cec5SDimitry Andric #if KMP_USE_HWLOC
1705*0b57cec5SDimitry Andric   // When linking with libhwloc, the ORDERED EPCC test slows down on big
1706*0b57cec5SDimitry Andric   // machines (> 48 cores). Performance analysis showed that a cache thrash
1707*0b57cec5SDimitry Andric   // was occurring and this padding helps alleviate the problem.
1708*0b57cec5SDimitry Andric   char padding[64];
1709*0b57cec5SDimitry Andric #endif
1710*0b57cec5SDimitry Andric } dispatch_shared_info_t;
1711*0b57cec5SDimitry Andric 
1712*0b57cec5SDimitry Andric typedef struct kmp_disp {
1713*0b57cec5SDimitry Andric   /* Vector for ORDERED SECTION */
1714*0b57cec5SDimitry Andric   void (*th_deo_fcn)(int *gtid, int *cid, ident_t *);
1715*0b57cec5SDimitry Andric   /* Vector for END ORDERED SECTION */
1716*0b57cec5SDimitry Andric   void (*th_dxo_fcn)(int *gtid, int *cid, ident_t *);
1717*0b57cec5SDimitry Andric 
1718*0b57cec5SDimitry Andric   dispatch_shared_info_t *th_dispatch_sh_current;
1719*0b57cec5SDimitry Andric   dispatch_private_info_t *th_dispatch_pr_current;
1720*0b57cec5SDimitry Andric 
1721*0b57cec5SDimitry Andric   dispatch_private_info_t *th_disp_buffer;
1722*0b57cec5SDimitry Andric   kmp_int32 th_disp_index;
1723*0b57cec5SDimitry Andric   kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index
1724*0b57cec5SDimitry Andric   volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags
1725*0b57cec5SDimitry Andric   union { // we can use union here because doacross cannot be used in
1726*0b57cec5SDimitry Andric     // nonmonotonic loops
1727*0b57cec5SDimitry Andric     kmp_int64 *th_doacross_info; // info on loop bounds
1728*0b57cec5SDimitry Andric     kmp_lock_t *th_steal_lock; // lock used for chunk stealing (8-byte variable)
1729*0b57cec5SDimitry Andric   };
1730*0b57cec5SDimitry Andric #if KMP_USE_INTERNODE_ALIGNMENT
1731*0b57cec5SDimitry Andric   char more_padding[INTERNODE_CACHE_LINE];
1732*0b57cec5SDimitry Andric #endif
1733*0b57cec5SDimitry Andric } kmp_disp_t;
1734*0b57cec5SDimitry Andric 
1735*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
1736*0b57cec5SDimitry Andric /* Barrier stuff */
1737*0b57cec5SDimitry Andric 
1738*0b57cec5SDimitry Andric /* constants for barrier state update */
1739*0b57cec5SDimitry Andric #define KMP_INIT_BARRIER_STATE 0 /* should probably start from zero */
1740*0b57cec5SDimitry Andric #define KMP_BARRIER_SLEEP_BIT 0 /* bit used for suspend/sleep part of state */
1741*0b57cec5SDimitry Andric #define KMP_BARRIER_UNUSED_BIT 1 // bit that must never be set for valid state
1742*0b57cec5SDimitry Andric #define KMP_BARRIER_BUMP_BIT 2 /* lsb used for bump of go/arrived state */
1743*0b57cec5SDimitry Andric 
1744*0b57cec5SDimitry Andric #define KMP_BARRIER_SLEEP_STATE (1 << KMP_BARRIER_SLEEP_BIT)
1745*0b57cec5SDimitry Andric #define KMP_BARRIER_UNUSED_STATE (1 << KMP_BARRIER_UNUSED_BIT)
1746*0b57cec5SDimitry Andric #define KMP_BARRIER_STATE_BUMP (1 << KMP_BARRIER_BUMP_BIT)
1747*0b57cec5SDimitry Andric 
1748*0b57cec5SDimitry Andric #if (KMP_BARRIER_SLEEP_BIT >= KMP_BARRIER_BUMP_BIT)
1749*0b57cec5SDimitry Andric #error "Barrier sleep bit must be smaller than barrier bump bit"
1750*0b57cec5SDimitry Andric #endif
1751*0b57cec5SDimitry Andric #if (KMP_BARRIER_UNUSED_BIT >= KMP_BARRIER_BUMP_BIT)
1752*0b57cec5SDimitry Andric #error "Barrier unused bit must be smaller than barrier bump bit"
1753*0b57cec5SDimitry Andric #endif
1754*0b57cec5SDimitry Andric 
1755*0b57cec5SDimitry Andric // Constants for release barrier wait state: currently, hierarchical only
1756*0b57cec5SDimitry Andric #define KMP_BARRIER_NOT_WAITING 0 // Normal state; worker not in wait_sleep
1757*0b57cec5SDimitry Andric #define KMP_BARRIER_OWN_FLAG                                                   \
1758*0b57cec5SDimitry Andric   1 // Normal state; worker waiting on own b_go flag in release
1759*0b57cec5SDimitry Andric #define KMP_BARRIER_PARENT_FLAG                                                \
1760*0b57cec5SDimitry Andric   2 // Special state; worker waiting on parent's b_go flag in release
1761*0b57cec5SDimitry Andric #define KMP_BARRIER_SWITCH_TO_OWN_FLAG                                         \
1762*0b57cec5SDimitry Andric   3 // Special state; tells worker to shift from parent to own b_go
1763*0b57cec5SDimitry Andric #define KMP_BARRIER_SWITCHING                                                  \
1764*0b57cec5SDimitry Andric   4 // Special state; worker resets appropriate flag on wake-up
1765*0b57cec5SDimitry Andric 
1766*0b57cec5SDimitry Andric #define KMP_NOT_SAFE_TO_REAP                                                   \
1767*0b57cec5SDimitry Andric   0 // Thread th_reap_state: not safe to reap (tasking)
1768*0b57cec5SDimitry Andric #define KMP_SAFE_TO_REAP 1 // Thread th_reap_state: safe to reap (not tasking)
1769*0b57cec5SDimitry Andric 
1770*0b57cec5SDimitry Andric enum barrier_type {
1771*0b57cec5SDimitry Andric   bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction
1772*0b57cec5SDimitry Andric                            barriers if enabled) */
1773*0b57cec5SDimitry Andric   bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */
1774*0b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER
1775*0b57cec5SDimitry Andric   bs_reduction_barrier, /* 2, All barriers that are used in reduction */
1776*0b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER
1777*0b57cec5SDimitry Andric   bs_last_barrier /* Just a placeholder to mark the end */
1778*0b57cec5SDimitry Andric };
1779*0b57cec5SDimitry Andric 
1780*0b57cec5SDimitry Andric // to work with reduction barriers just like with plain barriers
1781*0b57cec5SDimitry Andric #if !KMP_FAST_REDUCTION_BARRIER
1782*0b57cec5SDimitry Andric #define bs_reduction_barrier bs_plain_barrier
1783*0b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER
1784*0b57cec5SDimitry Andric 
1785*0b57cec5SDimitry Andric typedef enum kmp_bar_pat { /* Barrier communication patterns */
1786*0b57cec5SDimitry Andric                            bp_linear_bar =
1787*0b57cec5SDimitry Andric                                0, /* Single level (degenerate) tree */
1788*0b57cec5SDimitry Andric                            bp_tree_bar =
1789*0b57cec5SDimitry Andric                                1, /* Balanced tree with branching factor 2^n */
1790*0b57cec5SDimitry Andric                            bp_hyper_bar =
1791*0b57cec5SDimitry Andric                                2, /* Hypercube-embedded tree with min branching
1792*0b57cec5SDimitry Andric                                      factor 2^n */
1793*0b57cec5SDimitry Andric                            bp_hierarchical_bar = 3, /* Machine hierarchy tree */
1794*0b57cec5SDimitry Andric                            bp_last_bar /* Placeholder to mark the end */
1795*0b57cec5SDimitry Andric } kmp_bar_pat_e;
1796*0b57cec5SDimitry Andric 
1797*0b57cec5SDimitry Andric #define KMP_BARRIER_ICV_PUSH 1
1798*0b57cec5SDimitry Andric 
1799*0b57cec5SDimitry Andric /* Record for holding the values of the internal controls stack records */
1800*0b57cec5SDimitry Andric typedef struct kmp_internal_control {
1801*0b57cec5SDimitry Andric   int serial_nesting_level; /* corresponds to the value of the
1802*0b57cec5SDimitry Andric                                th_team_serialized field */
1803*0b57cec5SDimitry Andric   kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per
1804*0b57cec5SDimitry Andric                        thread) */
1805*0b57cec5SDimitry Andric   kmp_int8
1806*0b57cec5SDimitry Andric       bt_set; /* internal control for whether blocktime is explicitly set */
1807*0b57cec5SDimitry Andric   int blocktime; /* internal control for blocktime */
1808*0b57cec5SDimitry Andric #if KMP_USE_MONITOR
1809*0b57cec5SDimitry Andric   int bt_intervals; /* internal control for blocktime intervals */
1810*0b57cec5SDimitry Andric #endif
1811*0b57cec5SDimitry Andric   int nproc; /* internal control for #threads for next parallel region (per
1812*0b57cec5SDimitry Andric                 thread) */
1813*0b57cec5SDimitry Andric   int thread_limit; /* internal control for thread-limit-var */
1814*0b57cec5SDimitry Andric   int max_active_levels; /* internal control for max_active_levels */
1815*0b57cec5SDimitry Andric   kmp_r_sched_t
1816*0b57cec5SDimitry Andric       sched; /* internal control for runtime schedule {sched,chunk} pair */
1817*0b57cec5SDimitry Andric   kmp_proc_bind_t proc_bind; /* internal control for affinity  */
1818*0b57cec5SDimitry Andric   kmp_int32 default_device; /* internal control for default device */
1819*0b57cec5SDimitry Andric   struct kmp_internal_control *next;
1820*0b57cec5SDimitry Andric } kmp_internal_control_t;
1821*0b57cec5SDimitry Andric 
1822*0b57cec5SDimitry Andric static inline void copy_icvs(kmp_internal_control_t *dst,
1823*0b57cec5SDimitry Andric                              kmp_internal_control_t *src) {
1824*0b57cec5SDimitry Andric   *dst = *src;
1825*0b57cec5SDimitry Andric }
1826*0b57cec5SDimitry Andric 
1827*0b57cec5SDimitry Andric /* Thread barrier needs volatile barrier fields */
1828*0b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE kmp_bstate {
1829*0b57cec5SDimitry Andric   // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all
1830*0b57cec5SDimitry Andric   // uses of it). It is not explicitly aligned below, because we *don't* want
1831*0b57cec5SDimitry Andric   // it to be padded -- instead, we fit b_go into the same cache line with
1832*0b57cec5SDimitry Andric   // th_fixed_icvs, enabling NGO cache lines stores in the hierarchical barrier.
1833*0b57cec5SDimitry Andric   kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread
1834*0b57cec5SDimitry Andric   // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with
1835*0b57cec5SDimitry Andric   // same NGO store
1836*0b57cec5SDimitry Andric   volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical)
1837*0b57cec5SDimitry Andric   KMP_ALIGN_CACHE volatile kmp_uint64
1838*0b57cec5SDimitry Andric       b_arrived; // STATE => task reached synch point.
1839*0b57cec5SDimitry Andric   kmp_uint32 *skip_per_level;
1840*0b57cec5SDimitry Andric   kmp_uint32 my_level;
1841*0b57cec5SDimitry Andric   kmp_int32 parent_tid;
1842*0b57cec5SDimitry Andric   kmp_int32 old_tid;
1843*0b57cec5SDimitry Andric   kmp_uint32 depth;
1844*0b57cec5SDimitry Andric   struct kmp_bstate *parent_bar;
1845*0b57cec5SDimitry Andric   kmp_team_t *team;
1846*0b57cec5SDimitry Andric   kmp_uint64 leaf_state;
1847*0b57cec5SDimitry Andric   kmp_uint32 nproc;
1848*0b57cec5SDimitry Andric   kmp_uint8 base_leaf_kids;
1849*0b57cec5SDimitry Andric   kmp_uint8 leaf_kids;
1850*0b57cec5SDimitry Andric   kmp_uint8 offset;
1851*0b57cec5SDimitry Andric   kmp_uint8 wait_flag;
1852*0b57cec5SDimitry Andric   kmp_uint8 use_oncore_barrier;
1853*0b57cec5SDimitry Andric #if USE_DEBUGGER
1854*0b57cec5SDimitry Andric   // The following field is intended for the debugger solely. Only the worker
1855*0b57cec5SDimitry Andric   // thread itself accesses this field: the worker increases it by 1 when it
1856*0b57cec5SDimitry Andric   // arrives to a barrier.
1857*0b57cec5SDimitry Andric   KMP_ALIGN_CACHE kmp_uint b_worker_arrived;
1858*0b57cec5SDimitry Andric #endif /* USE_DEBUGGER */
1859*0b57cec5SDimitry Andric } kmp_bstate_t;
1860*0b57cec5SDimitry Andric 
1861*0b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_barrier_union {
1862*0b57cec5SDimitry Andric   double b_align; /* use worst case alignment */
1863*0b57cec5SDimitry Andric   char b_pad[KMP_PAD(kmp_bstate_t, CACHE_LINE)];
1864*0b57cec5SDimitry Andric   kmp_bstate_t bb;
1865*0b57cec5SDimitry Andric };
1866*0b57cec5SDimitry Andric 
1867*0b57cec5SDimitry Andric typedef union kmp_barrier_union kmp_balign_t;
1868*0b57cec5SDimitry Andric 
1869*0b57cec5SDimitry Andric /* Team barrier needs only non-volatile arrived counter */
1870*0b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_barrier_team_union {
1871*0b57cec5SDimitry Andric   double b_align; /* use worst case alignment */
1872*0b57cec5SDimitry Andric   char b_pad[CACHE_LINE];
1873*0b57cec5SDimitry Andric   struct {
1874*0b57cec5SDimitry Andric     kmp_uint64 b_arrived; /* STATE => task reached synch point. */
1875*0b57cec5SDimitry Andric #if USE_DEBUGGER
1876*0b57cec5SDimitry Andric     // The following two fields are indended for the debugger solely. Only
1877*0b57cec5SDimitry Andric     // master of the team accesses these fields: the first one is increased by
1878*0b57cec5SDimitry Andric     // 1 when master arrives to a barrier, the second one is increased by one
1879*0b57cec5SDimitry Andric     // when all the threads arrived.
1880*0b57cec5SDimitry Andric     kmp_uint b_master_arrived;
1881*0b57cec5SDimitry Andric     kmp_uint b_team_arrived;
1882*0b57cec5SDimitry Andric #endif
1883*0b57cec5SDimitry Andric   };
1884*0b57cec5SDimitry Andric };
1885*0b57cec5SDimitry Andric 
1886*0b57cec5SDimitry Andric typedef union kmp_barrier_team_union kmp_balign_team_t;
1887*0b57cec5SDimitry Andric 
1888*0b57cec5SDimitry Andric /* Padding for Linux* OS pthreads condition variables and mutexes used to signal
1889*0b57cec5SDimitry Andric    threads when a condition changes.  This is to workaround an NPTL bug where
1890*0b57cec5SDimitry Andric    padding was added to pthread_cond_t which caused the initialization routine
1891*0b57cec5SDimitry Andric    to write outside of the structure if compiled on pre-NPTL threads.  */
1892*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS
1893*0b57cec5SDimitry Andric typedef struct kmp_win32_mutex {
1894*0b57cec5SDimitry Andric   /* The Lock */
1895*0b57cec5SDimitry Andric   CRITICAL_SECTION cs;
1896*0b57cec5SDimitry Andric } kmp_win32_mutex_t;
1897*0b57cec5SDimitry Andric 
1898*0b57cec5SDimitry Andric typedef struct kmp_win32_cond {
1899*0b57cec5SDimitry Andric   /* Count of the number of waiters. */
1900*0b57cec5SDimitry Andric   int waiters_count_;
1901*0b57cec5SDimitry Andric 
1902*0b57cec5SDimitry Andric   /* Serialize access to <waiters_count_> */
1903*0b57cec5SDimitry Andric   kmp_win32_mutex_t waiters_count_lock_;
1904*0b57cec5SDimitry Andric 
1905*0b57cec5SDimitry Andric   /* Number of threads to release via a <cond_broadcast> or a <cond_signal> */
1906*0b57cec5SDimitry Andric   int release_count_;
1907*0b57cec5SDimitry Andric 
1908*0b57cec5SDimitry Andric   /* Keeps track of the current "generation" so that we don't allow */
1909*0b57cec5SDimitry Andric   /* one thread to steal all the "releases" from the broadcast. */
1910*0b57cec5SDimitry Andric   int wait_generation_count_;
1911*0b57cec5SDimitry Andric 
1912*0b57cec5SDimitry Andric   /* A manual-reset event that's used to block and release waiting threads. */
1913*0b57cec5SDimitry Andric   HANDLE event_;
1914*0b57cec5SDimitry Andric } kmp_win32_cond_t;
1915*0b57cec5SDimitry Andric #endif
1916*0b57cec5SDimitry Andric 
1917*0b57cec5SDimitry Andric #if KMP_OS_UNIX
1918*0b57cec5SDimitry Andric 
1919*0b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_cond_union {
1920*0b57cec5SDimitry Andric   double c_align;
1921*0b57cec5SDimitry Andric   char c_pad[CACHE_LINE];
1922*0b57cec5SDimitry Andric   pthread_cond_t c_cond;
1923*0b57cec5SDimitry Andric };
1924*0b57cec5SDimitry Andric 
1925*0b57cec5SDimitry Andric typedef union kmp_cond_union kmp_cond_align_t;
1926*0b57cec5SDimitry Andric 
1927*0b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_mutex_union {
1928*0b57cec5SDimitry Andric   double m_align;
1929*0b57cec5SDimitry Andric   char m_pad[CACHE_LINE];
1930*0b57cec5SDimitry Andric   pthread_mutex_t m_mutex;
1931*0b57cec5SDimitry Andric };
1932*0b57cec5SDimitry Andric 
1933*0b57cec5SDimitry Andric typedef union kmp_mutex_union kmp_mutex_align_t;
1934*0b57cec5SDimitry Andric 
1935*0b57cec5SDimitry Andric #endif /* KMP_OS_UNIX */
1936*0b57cec5SDimitry Andric 
1937*0b57cec5SDimitry Andric typedef struct kmp_desc_base {
1938*0b57cec5SDimitry Andric   void *ds_stackbase;
1939*0b57cec5SDimitry Andric   size_t ds_stacksize;
1940*0b57cec5SDimitry Andric   int ds_stackgrow;
1941*0b57cec5SDimitry Andric   kmp_thread_t ds_thread;
1942*0b57cec5SDimitry Andric   volatile int ds_tid;
1943*0b57cec5SDimitry Andric   int ds_gtid;
1944*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS
1945*0b57cec5SDimitry Andric   volatile int ds_alive;
1946*0b57cec5SDimitry Andric   DWORD ds_thread_id;
1947*0b57cec5SDimitry Andric /* ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes.
1948*0b57cec5SDimitry Andric    However, debugger support (libomp_db) cannot work with handles, because they
1949*0b57cec5SDimitry Andric    uncomparable. For example, debugger requests info about thread with handle h.
1950*0b57cec5SDimitry Andric    h is valid within debugger process, and meaningless within debugee process.
1951*0b57cec5SDimitry Andric    Even if h is duped by call to DuplicateHandle(), so the result h' is valid
1952*0b57cec5SDimitry Andric    within debugee process, but it is a *new* handle which does *not* equal to
1953*0b57cec5SDimitry Andric    any other handle in debugee... The only way to compare handles is convert
1954*0b57cec5SDimitry Andric    them to system-wide ids. GetThreadId() function is available only in
1955*0b57cec5SDimitry Andric    Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is available
1956*0b57cec5SDimitry Andric    on all Windows* OS flavours (including Windows* 95). Thus, we have to get
1957*0b57cec5SDimitry Andric    thread id by call to GetCurrentThreadId() from within the thread and save it
1958*0b57cec5SDimitry Andric    to let libomp_db identify threads.  */
1959*0b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
1960*0b57cec5SDimitry Andric } kmp_desc_base_t;
1961*0b57cec5SDimitry Andric 
1962*0b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_desc {
1963*0b57cec5SDimitry Andric   double ds_align; /* use worst case alignment */
1964*0b57cec5SDimitry Andric   char ds_pad[KMP_PAD(kmp_desc_base_t, CACHE_LINE)];
1965*0b57cec5SDimitry Andric   kmp_desc_base_t ds;
1966*0b57cec5SDimitry Andric } kmp_desc_t;
1967*0b57cec5SDimitry Andric 
1968*0b57cec5SDimitry Andric typedef struct kmp_local {
1969*0b57cec5SDimitry Andric   volatile int this_construct; /* count of single's encountered by thread */
1970*0b57cec5SDimitry Andric   void *reduce_data;
1971*0b57cec5SDimitry Andric #if KMP_USE_BGET
1972*0b57cec5SDimitry Andric   void *bget_data;
1973*0b57cec5SDimitry Andric   void *bget_list;
1974*0b57cec5SDimitry Andric #if !USE_CMP_XCHG_FOR_BGET
1975*0b57cec5SDimitry Andric #ifdef USE_QUEUING_LOCK_FOR_BGET
1976*0b57cec5SDimitry Andric   kmp_lock_t bget_lock; /* Lock for accessing bget free list */
1977*0b57cec5SDimitry Andric #else
1978*0b57cec5SDimitry Andric   kmp_bootstrap_lock_t bget_lock; // Lock for accessing bget free list. Must be
1979*0b57cec5SDimitry Andric // bootstrap lock so we can use it at library
1980*0b57cec5SDimitry Andric // shutdown.
1981*0b57cec5SDimitry Andric #endif /* USE_LOCK_FOR_BGET */
1982*0b57cec5SDimitry Andric #endif /* ! USE_CMP_XCHG_FOR_BGET */
1983*0b57cec5SDimitry Andric #endif /* KMP_USE_BGET */
1984*0b57cec5SDimitry Andric 
1985*0b57cec5SDimitry Andric   PACKED_REDUCTION_METHOD_T
1986*0b57cec5SDimitry Andric   packed_reduction_method; /* stored by __kmpc_reduce*(), used by
1987*0b57cec5SDimitry Andric                               __kmpc_end_reduce*() */
1988*0b57cec5SDimitry Andric 
1989*0b57cec5SDimitry Andric } kmp_local_t;
1990*0b57cec5SDimitry Andric 
1991*0b57cec5SDimitry Andric #define KMP_CHECK_UPDATE(a, b)                                                 \
1992*0b57cec5SDimitry Andric   if ((a) != (b))                                                              \
1993*0b57cec5SDimitry Andric   (a) = (b)
1994*0b57cec5SDimitry Andric #define KMP_CHECK_UPDATE_SYNC(a, b)                                            \
1995*0b57cec5SDimitry Andric   if ((a) != (b))                                                              \
1996*0b57cec5SDimitry Andric   TCW_SYNC_PTR((a), (b))
1997*0b57cec5SDimitry Andric 
1998*0b57cec5SDimitry Andric #define get__blocktime(xteam, xtid)                                            \
1999*0b57cec5SDimitry Andric   ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime)
2000*0b57cec5SDimitry Andric #define get__bt_set(xteam, xtid)                                               \
2001*0b57cec5SDimitry Andric   ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set)
2002*0b57cec5SDimitry Andric #if KMP_USE_MONITOR
2003*0b57cec5SDimitry Andric #define get__bt_intervals(xteam, xtid)                                         \
2004*0b57cec5SDimitry Andric   ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals)
2005*0b57cec5SDimitry Andric #endif
2006*0b57cec5SDimitry Andric 
2007*0b57cec5SDimitry Andric #define get__dynamic_2(xteam, xtid)                                            \
2008*0b57cec5SDimitry Andric   ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic)
2009*0b57cec5SDimitry Andric #define get__nproc_2(xteam, xtid)                                              \
2010*0b57cec5SDimitry Andric   ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc)
2011*0b57cec5SDimitry Andric #define get__sched_2(xteam, xtid)                                              \
2012*0b57cec5SDimitry Andric   ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched)
2013*0b57cec5SDimitry Andric 
2014*0b57cec5SDimitry Andric #define set__blocktime_team(xteam, xtid, xval)                                 \
2015*0b57cec5SDimitry Andric   (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) =     \
2016*0b57cec5SDimitry Andric        (xval))
2017*0b57cec5SDimitry Andric 
2018*0b57cec5SDimitry Andric #if KMP_USE_MONITOR
2019*0b57cec5SDimitry Andric #define set__bt_intervals_team(xteam, xtid, xval)                              \
2020*0b57cec5SDimitry Andric   (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) =  \
2021*0b57cec5SDimitry Andric        (xval))
2022*0b57cec5SDimitry Andric #endif
2023*0b57cec5SDimitry Andric 
2024*0b57cec5SDimitry Andric #define set__bt_set_team(xteam, xtid, xval)                                    \
2025*0b57cec5SDimitry Andric   (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) = (xval))
2026*0b57cec5SDimitry Andric 
2027*0b57cec5SDimitry Andric #define set__dynamic(xthread, xval)                                            \
2028*0b57cec5SDimitry Andric   (((xthread)->th.th_current_task->td_icvs.dynamic) = (xval))
2029*0b57cec5SDimitry Andric #define get__dynamic(xthread)                                                  \
2030*0b57cec5SDimitry Andric   (((xthread)->th.th_current_task->td_icvs.dynamic) ? (FTN_TRUE) : (FTN_FALSE))
2031*0b57cec5SDimitry Andric 
2032*0b57cec5SDimitry Andric #define set__nproc(xthread, xval)                                              \
2033*0b57cec5SDimitry Andric   (((xthread)->th.th_current_task->td_icvs.nproc) = (xval))
2034*0b57cec5SDimitry Andric 
2035*0b57cec5SDimitry Andric #define set__thread_limit(xthread, xval)                                       \
2036*0b57cec5SDimitry Andric   (((xthread)->th.th_current_task->td_icvs.thread_limit) = (xval))
2037*0b57cec5SDimitry Andric 
2038*0b57cec5SDimitry Andric #define set__max_active_levels(xthread, xval)                                  \
2039*0b57cec5SDimitry Andric   (((xthread)->th.th_current_task->td_icvs.max_active_levels) = (xval))
2040*0b57cec5SDimitry Andric 
2041*0b57cec5SDimitry Andric #define get__max_active_levels(xthread)                                        \
2042*0b57cec5SDimitry Andric   ((xthread)->th.th_current_task->td_icvs.max_active_levels)
2043*0b57cec5SDimitry Andric 
2044*0b57cec5SDimitry Andric #define set__sched(xthread, xval)                                              \
2045*0b57cec5SDimitry Andric   (((xthread)->th.th_current_task->td_icvs.sched) = (xval))
2046*0b57cec5SDimitry Andric 
2047*0b57cec5SDimitry Andric #define set__proc_bind(xthread, xval)                                          \
2048*0b57cec5SDimitry Andric   (((xthread)->th.th_current_task->td_icvs.proc_bind) = (xval))
2049*0b57cec5SDimitry Andric #define get__proc_bind(xthread)                                                \
2050*0b57cec5SDimitry Andric   ((xthread)->th.th_current_task->td_icvs.proc_bind)
2051*0b57cec5SDimitry Andric 
2052*0b57cec5SDimitry Andric // OpenMP tasking data structures
2053*0b57cec5SDimitry Andric 
2054*0b57cec5SDimitry Andric typedef enum kmp_tasking_mode {
2055*0b57cec5SDimitry Andric   tskm_immediate_exec = 0,
2056*0b57cec5SDimitry Andric   tskm_extra_barrier = 1,
2057*0b57cec5SDimitry Andric   tskm_task_teams = 2,
2058*0b57cec5SDimitry Andric   tskm_max = 2
2059*0b57cec5SDimitry Andric } kmp_tasking_mode_t;
2060*0b57cec5SDimitry Andric 
2061*0b57cec5SDimitry Andric extern kmp_tasking_mode_t
2062*0b57cec5SDimitry Andric     __kmp_tasking_mode; /* determines how/when to execute tasks */
2063*0b57cec5SDimitry Andric extern int __kmp_task_stealing_constraint;
2064*0b57cec5SDimitry Andric extern int __kmp_enable_task_throttling;
2065*0b57cec5SDimitry Andric extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if
2066*0b57cec5SDimitry Andric // specified, defaults to 0 otherwise
2067*0b57cec5SDimitry Andric // Set via OMP_MAX_TASK_PRIORITY if specified, defaults to 0 otherwise
2068*0b57cec5SDimitry Andric extern kmp_int32 __kmp_max_task_priority;
2069*0b57cec5SDimitry Andric // Set via KMP_TASKLOOP_MIN_TASKS if specified, defaults to 0 otherwise
2070*0b57cec5SDimitry Andric extern kmp_uint64 __kmp_taskloop_min_tasks;
2071*0b57cec5SDimitry Andric 
2072*0b57cec5SDimitry Andric /* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with
2073*0b57cec5SDimitry Andric    taskdata first */
2074*0b57cec5SDimitry Andric #define KMP_TASK_TO_TASKDATA(task) (((kmp_taskdata_t *)task) - 1)
2075*0b57cec5SDimitry Andric #define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *)(taskdata + 1)
2076*0b57cec5SDimitry Andric 
2077*0b57cec5SDimitry Andric // The tt_found_tasks flag is a signal to all threads in the team that tasks
2078*0b57cec5SDimitry Andric // were spawned and queued since the previous barrier release.
2079*0b57cec5SDimitry Andric #define KMP_TASKING_ENABLED(task_team)                                         \
2080*0b57cec5SDimitry Andric   (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE)
2081*0b57cec5SDimitry Andric /*!
2082*0b57cec5SDimitry Andric @ingroup BASIC_TYPES
2083*0b57cec5SDimitry Andric @{
2084*0b57cec5SDimitry Andric */
2085*0b57cec5SDimitry Andric 
2086*0b57cec5SDimitry Andric /*!
2087*0b57cec5SDimitry Andric  */
2088*0b57cec5SDimitry Andric typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32, void *);
2089*0b57cec5SDimitry Andric 
2090*0b57cec5SDimitry Andric typedef union kmp_cmplrdata {
2091*0b57cec5SDimitry Andric   kmp_int32 priority; /**< priority specified by user for the task */
2092*0b57cec5SDimitry Andric   kmp_routine_entry_t
2093*0b57cec5SDimitry Andric       destructors; /* pointer to function to invoke deconstructors of
2094*0b57cec5SDimitry Andric                       firstprivate C++ objects */
2095*0b57cec5SDimitry Andric   /* future data */
2096*0b57cec5SDimitry Andric } kmp_cmplrdata_t;
2097*0b57cec5SDimitry Andric 
2098*0b57cec5SDimitry Andric /*  sizeof_kmp_task_t passed as arg to kmpc_omp_task call  */
2099*0b57cec5SDimitry Andric /*!
2100*0b57cec5SDimitry Andric  */
2101*0b57cec5SDimitry Andric typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */
2102*0b57cec5SDimitry Andric   void *shareds; /**< pointer to block of pointers to shared vars   */
2103*0b57cec5SDimitry Andric   kmp_routine_entry_t
2104*0b57cec5SDimitry Andric       routine; /**< pointer to routine to call for executing task */
2105*0b57cec5SDimitry Andric   kmp_int32 part_id; /**< part id for the task                          */
2106*0b57cec5SDimitry Andric   kmp_cmplrdata_t
2107*0b57cec5SDimitry Andric       data1; /* Two known optional additions: destructors and priority */
2108*0b57cec5SDimitry Andric   kmp_cmplrdata_t data2; /* Process destructors first, priority second */
2109*0b57cec5SDimitry Andric   /* future data */
2110*0b57cec5SDimitry Andric   /*  private vars  */
2111*0b57cec5SDimitry Andric } kmp_task_t;
2112*0b57cec5SDimitry Andric 
2113*0b57cec5SDimitry Andric /*!
2114*0b57cec5SDimitry Andric @}
2115*0b57cec5SDimitry Andric */
2116*0b57cec5SDimitry Andric 
2117*0b57cec5SDimitry Andric typedef struct kmp_taskgroup {
2118*0b57cec5SDimitry Andric   std::atomic<kmp_int32> count; // number of allocated and incomplete tasks
2119*0b57cec5SDimitry Andric   std::atomic<kmp_int32>
2120*0b57cec5SDimitry Andric       cancel_request; // request for cancellation of this taskgroup
2121*0b57cec5SDimitry Andric   struct kmp_taskgroup *parent; // parent taskgroup
2122*0b57cec5SDimitry Andric   // Block of data to perform task reduction
2123*0b57cec5SDimitry Andric   void *reduce_data; // reduction related info
2124*0b57cec5SDimitry Andric   kmp_int32 reduce_num_data; // number of data items to reduce
2125*0b57cec5SDimitry Andric } kmp_taskgroup_t;
2126*0b57cec5SDimitry Andric 
2127*0b57cec5SDimitry Andric // forward declarations
2128*0b57cec5SDimitry Andric typedef union kmp_depnode kmp_depnode_t;
2129*0b57cec5SDimitry Andric typedef struct kmp_depnode_list kmp_depnode_list_t;
2130*0b57cec5SDimitry Andric typedef struct kmp_dephash_entry kmp_dephash_entry_t;
2131*0b57cec5SDimitry Andric 
2132*0b57cec5SDimitry Andric // Compiler sends us this info:
2133*0b57cec5SDimitry Andric typedef struct kmp_depend_info {
2134*0b57cec5SDimitry Andric   kmp_intptr_t base_addr;
2135*0b57cec5SDimitry Andric   size_t len;
2136*0b57cec5SDimitry Andric   struct {
2137*0b57cec5SDimitry Andric     bool in : 1;
2138*0b57cec5SDimitry Andric     bool out : 1;
2139*0b57cec5SDimitry Andric     bool mtx : 1;
2140*0b57cec5SDimitry Andric   } flags;
2141*0b57cec5SDimitry Andric } kmp_depend_info_t;
2142*0b57cec5SDimitry Andric 
2143*0b57cec5SDimitry Andric // Internal structures to work with task dependencies:
2144*0b57cec5SDimitry Andric struct kmp_depnode_list {
2145*0b57cec5SDimitry Andric   kmp_depnode_t *node;
2146*0b57cec5SDimitry Andric   kmp_depnode_list_t *next;
2147*0b57cec5SDimitry Andric };
2148*0b57cec5SDimitry Andric 
2149*0b57cec5SDimitry Andric // Max number of mutexinoutset dependencies per node
2150*0b57cec5SDimitry Andric #define MAX_MTX_DEPS 4
2151*0b57cec5SDimitry Andric 
2152*0b57cec5SDimitry Andric typedef struct kmp_base_depnode {
2153*0b57cec5SDimitry Andric   kmp_depnode_list_t *successors; /* used under lock */
2154*0b57cec5SDimitry Andric   kmp_task_t *task; /* non-NULL if depnode is active, used under lock */
2155*0b57cec5SDimitry Andric   kmp_lock_t *mtx_locks[MAX_MTX_DEPS]; /* lock mutexinoutset dependent tasks */
2156*0b57cec5SDimitry Andric   kmp_int32 mtx_num_locks; /* number of locks in mtx_locks array */
2157*0b57cec5SDimitry Andric   kmp_lock_t lock; /* guards shared fields: task, successors */
2158*0b57cec5SDimitry Andric #if KMP_SUPPORT_GRAPH_OUTPUT
2159*0b57cec5SDimitry Andric   kmp_uint32 id;
2160*0b57cec5SDimitry Andric #endif
2161*0b57cec5SDimitry Andric   std::atomic<kmp_int32> npredecessors;
2162*0b57cec5SDimitry Andric   std::atomic<kmp_int32> nrefs;
2163*0b57cec5SDimitry Andric } kmp_base_depnode_t;
2164*0b57cec5SDimitry Andric 
2165*0b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_depnode {
2166*0b57cec5SDimitry Andric   double dn_align; /* use worst case alignment */
2167*0b57cec5SDimitry Andric   char dn_pad[KMP_PAD(kmp_base_depnode_t, CACHE_LINE)];
2168*0b57cec5SDimitry Andric   kmp_base_depnode_t dn;
2169*0b57cec5SDimitry Andric };
2170*0b57cec5SDimitry Andric 
2171*0b57cec5SDimitry Andric struct kmp_dephash_entry {
2172*0b57cec5SDimitry Andric   kmp_intptr_t addr;
2173*0b57cec5SDimitry Andric   kmp_depnode_t *last_out;
2174*0b57cec5SDimitry Andric   kmp_depnode_list_t *last_ins;
2175*0b57cec5SDimitry Andric   kmp_depnode_list_t *last_mtxs;
2176*0b57cec5SDimitry Andric   kmp_int32 last_flag;
2177*0b57cec5SDimitry Andric   kmp_lock_t *mtx_lock; /* is referenced by depnodes w/mutexinoutset dep */
2178*0b57cec5SDimitry Andric   kmp_dephash_entry_t *next_in_bucket;
2179*0b57cec5SDimitry Andric };
2180*0b57cec5SDimitry Andric 
2181*0b57cec5SDimitry Andric typedef struct kmp_dephash {
2182*0b57cec5SDimitry Andric   kmp_dephash_entry_t **buckets;
2183*0b57cec5SDimitry Andric   size_t size;
2184*0b57cec5SDimitry Andric #ifdef KMP_DEBUG
2185*0b57cec5SDimitry Andric   kmp_uint32 nelements;
2186*0b57cec5SDimitry Andric   kmp_uint32 nconflicts;
2187*0b57cec5SDimitry Andric #endif
2188*0b57cec5SDimitry Andric } kmp_dephash_t;
2189*0b57cec5SDimitry Andric 
2190*0b57cec5SDimitry Andric typedef struct kmp_task_affinity_info {
2191*0b57cec5SDimitry Andric   kmp_intptr_t base_addr;
2192*0b57cec5SDimitry Andric   size_t len;
2193*0b57cec5SDimitry Andric   struct {
2194*0b57cec5SDimitry Andric     bool flag1 : 1;
2195*0b57cec5SDimitry Andric     bool flag2 : 1;
2196*0b57cec5SDimitry Andric     kmp_int32 reserved : 30;
2197*0b57cec5SDimitry Andric   } flags;
2198*0b57cec5SDimitry Andric } kmp_task_affinity_info_t;
2199*0b57cec5SDimitry Andric 
2200*0b57cec5SDimitry Andric typedef enum kmp_event_type_t {
2201*0b57cec5SDimitry Andric   KMP_EVENT_UNINITIALIZED = 0,
2202*0b57cec5SDimitry Andric   KMP_EVENT_ALLOW_COMPLETION = 1
2203*0b57cec5SDimitry Andric } kmp_event_type_t;
2204*0b57cec5SDimitry Andric 
2205*0b57cec5SDimitry Andric typedef struct {
2206*0b57cec5SDimitry Andric   kmp_event_type_t type;
2207*0b57cec5SDimitry Andric   kmp_tas_lock_t lock;
2208*0b57cec5SDimitry Andric   union {
2209*0b57cec5SDimitry Andric     kmp_task_t *task;
2210*0b57cec5SDimitry Andric   } ed;
2211*0b57cec5SDimitry Andric } kmp_event_t;
2212*0b57cec5SDimitry Andric 
2213*0b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK
2214*0b57cec5SDimitry Andric 
2215*0b57cec5SDimitry Andric /* Tied Task stack definitions */
2216*0b57cec5SDimitry Andric typedef struct kmp_stack_block {
2217*0b57cec5SDimitry Andric   kmp_taskdata_t *sb_block[TASK_STACK_BLOCK_SIZE];
2218*0b57cec5SDimitry Andric   struct kmp_stack_block *sb_next;
2219*0b57cec5SDimitry Andric   struct kmp_stack_block *sb_prev;
2220*0b57cec5SDimitry Andric } kmp_stack_block_t;
2221*0b57cec5SDimitry Andric 
2222*0b57cec5SDimitry Andric typedef struct kmp_task_stack {
2223*0b57cec5SDimitry Andric   kmp_stack_block_t ts_first_block; // first block of stack entries
2224*0b57cec5SDimitry Andric   kmp_taskdata_t **ts_top; // pointer to the top of stack
2225*0b57cec5SDimitry Andric   kmp_int32 ts_entries; // number of entries on the stack
2226*0b57cec5SDimitry Andric } kmp_task_stack_t;
2227*0b57cec5SDimitry Andric 
2228*0b57cec5SDimitry Andric #endif // BUILD_TIED_TASK_STACK
2229*0b57cec5SDimitry Andric 
2230*0b57cec5SDimitry Andric typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
2231*0b57cec5SDimitry Andric   /* Compiler flags */ /* Total compiler flags must be 16 bits */
2232*0b57cec5SDimitry Andric   unsigned tiedness : 1; /* task is either tied (1) or untied (0) */
2233*0b57cec5SDimitry Andric   unsigned final : 1; /* task is final(1) so execute immediately */
2234*0b57cec5SDimitry Andric   unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0
2235*0b57cec5SDimitry Andric                               code path */
2236*0b57cec5SDimitry Andric   unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to
2237*0b57cec5SDimitry Andric                                      invoke destructors from the runtime */
2238*0b57cec5SDimitry Andric   unsigned proxy : 1; /* task is a proxy task (it will be executed outside the
2239*0b57cec5SDimitry Andric                          context of the RTL) */
2240*0b57cec5SDimitry Andric   unsigned priority_specified : 1; /* set if the compiler provides priority
2241*0b57cec5SDimitry Andric                                       setting for the task */
2242*0b57cec5SDimitry Andric   unsigned detachable : 1; /* 1 == can detach */
2243*0b57cec5SDimitry Andric   unsigned reserved : 9; /* reserved for compiler use */
2244*0b57cec5SDimitry Andric 
2245*0b57cec5SDimitry Andric   /* Library flags */ /* Total library flags must be 16 bits */
2246*0b57cec5SDimitry Andric   unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */
2247*0b57cec5SDimitry Andric   unsigned task_serial : 1; // task is executed immediately (1) or deferred (0)
2248*0b57cec5SDimitry Andric   unsigned tasking_ser : 1; // all tasks in team are either executed immediately
2249*0b57cec5SDimitry Andric   // (1) or may be deferred (0)
2250*0b57cec5SDimitry Andric   unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel
2251*0b57cec5SDimitry Andric   // (0) [>= 2 threads]
2252*0b57cec5SDimitry Andric   /* If either team_serial or tasking_ser is set, task team may be NULL */
2253*0b57cec5SDimitry Andric   /* Task State Flags: */
2254*0b57cec5SDimitry Andric   unsigned started : 1; /* 1==started, 0==not started     */
2255*0b57cec5SDimitry Andric   unsigned executing : 1; /* 1==executing, 0==not executing */
2256*0b57cec5SDimitry Andric   unsigned complete : 1; /* 1==complete, 0==not complete   */
2257*0b57cec5SDimitry Andric   unsigned freed : 1; /* 1==freed, 0==allocateed        */
2258*0b57cec5SDimitry Andric   unsigned native : 1; /* 1==gcc-compiled task, 0==intel */
2259*0b57cec5SDimitry Andric   unsigned reserved31 : 7; /* reserved for library use */
2260*0b57cec5SDimitry Andric 
2261*0b57cec5SDimitry Andric } kmp_tasking_flags_t;
2262*0b57cec5SDimitry Andric 
2263*0b57cec5SDimitry Andric struct kmp_taskdata { /* aligned during dynamic allocation       */
2264*0b57cec5SDimitry Andric   kmp_int32 td_task_id; /* id, assigned by debugger                */
2265*0b57cec5SDimitry Andric   kmp_tasking_flags_t td_flags; /* task flags                              */
2266*0b57cec5SDimitry Andric   kmp_team_t *td_team; /* team for this task                      */
2267*0b57cec5SDimitry Andric   kmp_info_p *td_alloc_thread; /* thread that allocated data structures   */
2268*0b57cec5SDimitry Andric   /* Currently not used except for perhaps IDB */
2269*0b57cec5SDimitry Andric   kmp_taskdata_t *td_parent; /* parent task                             */
2270*0b57cec5SDimitry Andric   kmp_int32 td_level; /* task nesting level                      */
2271*0b57cec5SDimitry Andric   std::atomic<kmp_int32> td_untied_count; // untied task active parts counter
2272*0b57cec5SDimitry Andric   ident_t *td_ident; /* task identifier                         */
2273*0b57cec5SDimitry Andric   // Taskwait data.
2274*0b57cec5SDimitry Andric   ident_t *td_taskwait_ident;
2275*0b57cec5SDimitry Andric   kmp_uint32 td_taskwait_counter;
2276*0b57cec5SDimitry Andric   kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */
2277*0b57cec5SDimitry Andric   KMP_ALIGN_CACHE kmp_internal_control_t
2278*0b57cec5SDimitry Andric       td_icvs; /* Internal control variables for the task */
2279*0b57cec5SDimitry Andric   KMP_ALIGN_CACHE std::atomic<kmp_int32>
2280*0b57cec5SDimitry Andric       td_allocated_child_tasks; /* Child tasks (+ current task) not yet
2281*0b57cec5SDimitry Andric                                    deallocated */
2282*0b57cec5SDimitry Andric   std::atomic<kmp_int32>
2283*0b57cec5SDimitry Andric       td_incomplete_child_tasks; /* Child tasks not yet complete */
2284*0b57cec5SDimitry Andric   kmp_taskgroup_t
2285*0b57cec5SDimitry Andric       *td_taskgroup; // Each task keeps pointer to its current taskgroup
2286*0b57cec5SDimitry Andric   kmp_dephash_t
2287*0b57cec5SDimitry Andric       *td_dephash; // Dependencies for children tasks are tracked from here
2288*0b57cec5SDimitry Andric   kmp_depnode_t
2289*0b57cec5SDimitry Andric       *td_depnode; // Pointer to graph node if this task has dependencies
2290*0b57cec5SDimitry Andric   kmp_task_team_t *td_task_team;
2291*0b57cec5SDimitry Andric   kmp_int32 td_size_alloc; // The size of task structure, including shareds etc.
2292*0b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT)
2293*0b57cec5SDimitry Andric   // 4 or 8 byte integers for the loop bounds in GOMP_taskloop
2294*0b57cec5SDimitry Andric   kmp_int32 td_size_loop_bounds;
2295*0b57cec5SDimitry Andric #endif
2296*0b57cec5SDimitry Andric   kmp_taskdata_t *td_last_tied; // keep tied task for task scheduling constraint
2297*0b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT)
2298*0b57cec5SDimitry Andric   // GOMP sends in a copy function for copy constructors
2299*0b57cec5SDimitry Andric   void (*td_copy_func)(void *, void *);
2300*0b57cec5SDimitry Andric #endif
2301*0b57cec5SDimitry Andric   kmp_event_t td_allow_completion_event;
2302*0b57cec5SDimitry Andric #if OMPT_SUPPORT
2303*0b57cec5SDimitry Andric   ompt_task_info_t ompt_task_info;
2304*0b57cec5SDimitry Andric #endif
2305*0b57cec5SDimitry Andric }; // struct kmp_taskdata
2306*0b57cec5SDimitry Andric 
2307*0b57cec5SDimitry Andric // Make sure padding above worked
2308*0b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(kmp_taskdata_t) % sizeof(void *) == 0);
2309*0b57cec5SDimitry Andric 
2310*0b57cec5SDimitry Andric // Data for task team but per thread
2311*0b57cec5SDimitry Andric typedef struct kmp_base_thread_data {
2312*0b57cec5SDimitry Andric   kmp_info_p *td_thr; // Pointer back to thread info
2313*0b57cec5SDimitry Andric   // Used only in __kmp_execute_tasks_template, maybe not avail until task is
2314*0b57cec5SDimitry Andric   // queued?
2315*0b57cec5SDimitry Andric   kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque
2316*0b57cec5SDimitry Andric   kmp_taskdata_t *
2317*0b57cec5SDimitry Andric       *td_deque; // Deque of tasks encountered by td_thr, dynamically allocated
2318*0b57cec5SDimitry Andric   kmp_int32 td_deque_size; // Size of deck
2319*0b57cec5SDimitry Andric   kmp_uint32 td_deque_head; // Head of deque (will wrap)
2320*0b57cec5SDimitry Andric   kmp_uint32 td_deque_tail; // Tail of deque (will wrap)
2321*0b57cec5SDimitry Andric   kmp_int32 td_deque_ntasks; // Number of tasks in deque
2322*0b57cec5SDimitry Andric   // GEH: shouldn't this be volatile since used in while-spin?
2323*0b57cec5SDimitry Andric   kmp_int32 td_deque_last_stolen; // Thread number of last successful steal
2324*0b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK
2325*0b57cec5SDimitry Andric   kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task
2326*0b57cec5SDimitry Andric // scheduling constraint
2327*0b57cec5SDimitry Andric #endif // BUILD_TIED_TASK_STACK
2328*0b57cec5SDimitry Andric } kmp_base_thread_data_t;
2329*0b57cec5SDimitry Andric 
2330*0b57cec5SDimitry Andric #define TASK_DEQUE_BITS 8 // Used solely to define INITIAL_TASK_DEQUE_SIZE
2331*0b57cec5SDimitry Andric #define INITIAL_TASK_DEQUE_SIZE (1 << TASK_DEQUE_BITS)
2332*0b57cec5SDimitry Andric 
2333*0b57cec5SDimitry Andric #define TASK_DEQUE_SIZE(td) ((td).td_deque_size)
2334*0b57cec5SDimitry Andric #define TASK_DEQUE_MASK(td) ((td).td_deque_size - 1)
2335*0b57cec5SDimitry Andric 
2336*0b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_thread_data {
2337*0b57cec5SDimitry Andric   kmp_base_thread_data_t td;
2338*0b57cec5SDimitry Andric   double td_align; /* use worst case alignment */
2339*0b57cec5SDimitry Andric   char td_pad[KMP_PAD(kmp_base_thread_data_t, CACHE_LINE)];
2340*0b57cec5SDimitry Andric } kmp_thread_data_t;
2341*0b57cec5SDimitry Andric 
2342*0b57cec5SDimitry Andric // Data for task teams which are used when tasking is enabled for the team
2343*0b57cec5SDimitry Andric typedef struct kmp_base_task_team {
2344*0b57cec5SDimitry Andric   kmp_bootstrap_lock_t
2345*0b57cec5SDimitry Andric       tt_threads_lock; /* Lock used to allocate per-thread part of task team */
2346*0b57cec5SDimitry Andric   /* must be bootstrap lock since used at library shutdown*/
2347*0b57cec5SDimitry Andric   kmp_task_team_t *tt_next; /* For linking the task team free list */
2348*0b57cec5SDimitry Andric   kmp_thread_data_t
2349*0b57cec5SDimitry Andric       *tt_threads_data; /* Array of per-thread structures for task team */
2350*0b57cec5SDimitry Andric   /* Data survives task team deallocation */
2351*0b57cec5SDimitry Andric   kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while
2352*0b57cec5SDimitry Andric                                executing this team? */
2353*0b57cec5SDimitry Andric   /* TRUE means tt_threads_data is set up and initialized */
2354*0b57cec5SDimitry Andric   kmp_int32 tt_nproc; /* #threads in team           */
2355*0b57cec5SDimitry Andric   kmp_int32 tt_max_threads; // # entries allocated for threads_data array
2356*0b57cec5SDimitry Andric   kmp_int32 tt_found_proxy_tasks; // found proxy tasks since last barrier
2357*0b57cec5SDimitry Andric   kmp_int32 tt_untied_task_encountered;
2358*0b57cec5SDimitry Andric 
2359*0b57cec5SDimitry Andric   KMP_ALIGN_CACHE
2360*0b57cec5SDimitry Andric   std::atomic<kmp_int32> tt_unfinished_threads; /* #threads still active */
2361*0b57cec5SDimitry Andric 
2362*0b57cec5SDimitry Andric   KMP_ALIGN_CACHE
2363*0b57cec5SDimitry Andric   volatile kmp_uint32
2364*0b57cec5SDimitry Andric       tt_active; /* is the team still actively executing tasks */
2365*0b57cec5SDimitry Andric } kmp_base_task_team_t;
2366*0b57cec5SDimitry Andric 
2367*0b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_task_team {
2368*0b57cec5SDimitry Andric   kmp_base_task_team_t tt;
2369*0b57cec5SDimitry Andric   double tt_align; /* use worst case alignment */
2370*0b57cec5SDimitry Andric   char tt_pad[KMP_PAD(kmp_base_task_team_t, CACHE_LINE)];
2371*0b57cec5SDimitry Andric };
2372*0b57cec5SDimitry Andric 
2373*0b57cec5SDimitry Andric #if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5)
2374*0b57cec5SDimitry Andric // Free lists keep same-size free memory slots for fast memory allocation
2375*0b57cec5SDimitry Andric // routines
2376*0b57cec5SDimitry Andric typedef struct kmp_free_list {
2377*0b57cec5SDimitry Andric   void *th_free_list_self; // Self-allocated tasks free list
2378*0b57cec5SDimitry Andric   void *th_free_list_sync; // Self-allocated tasks stolen/returned by other
2379*0b57cec5SDimitry Andric   // threads
2380*0b57cec5SDimitry Andric   void *th_free_list_other; // Non-self free list (to be returned to owner's
2381*0b57cec5SDimitry Andric   // sync list)
2382*0b57cec5SDimitry Andric } kmp_free_list_t;
2383*0b57cec5SDimitry Andric #endif
2384*0b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS
2385*0b57cec5SDimitry Andric // Hot teams array keeps hot teams and their sizes for given thread. Hot teams
2386*0b57cec5SDimitry Andric // are not put in teams pool, and they don't put threads in threads pool.
2387*0b57cec5SDimitry Andric typedef struct kmp_hot_team_ptr {
2388*0b57cec5SDimitry Andric   kmp_team_p *hot_team; // pointer to hot_team of given nesting level
2389*0b57cec5SDimitry Andric   kmp_int32 hot_team_nth; // number of threads allocated for the hot_team
2390*0b57cec5SDimitry Andric } kmp_hot_team_ptr_t;
2391*0b57cec5SDimitry Andric #endif
2392*0b57cec5SDimitry Andric typedef struct kmp_teams_size {
2393*0b57cec5SDimitry Andric   kmp_int32 nteams; // number of teams in a league
2394*0b57cec5SDimitry Andric   kmp_int32 nth; // number of threads in each team of the league
2395*0b57cec5SDimitry Andric } kmp_teams_size_t;
2396*0b57cec5SDimitry Andric 
2397*0b57cec5SDimitry Andric // This struct stores a thread that acts as a "root" for a contention
2398*0b57cec5SDimitry Andric // group. Contention groups are rooted at kmp_root threads, but also at
2399*0b57cec5SDimitry Andric // each master thread of each team created in the teams construct.
2400*0b57cec5SDimitry Andric // This struct therefore also stores a thread_limit associated with
2401*0b57cec5SDimitry Andric // that contention group, and a counter to track the number of threads
2402*0b57cec5SDimitry Andric // active in that contention group. Each thread has a list of these: CG
2403*0b57cec5SDimitry Andric // root threads have an entry in their list in which cg_root refers to
2404*0b57cec5SDimitry Andric // the thread itself, whereas other workers in the CG will have a
2405*0b57cec5SDimitry Andric // single entry where cg_root is same as the entry containing their CG
2406*0b57cec5SDimitry Andric // root. When a thread encounters a teams construct, it will add a new
2407*0b57cec5SDimitry Andric // entry to the front of its list, because it now roots a new CG.
2408*0b57cec5SDimitry Andric typedef struct kmp_cg_root {
2409*0b57cec5SDimitry Andric   kmp_info_p *cg_root; // "root" thread for a contention group
2410*0b57cec5SDimitry Andric   // The CG root's limit comes from OMP_THREAD_LIMIT for root threads, or
2411*0b57cec5SDimitry Andric   // thread_limit clause for teams masters
2412*0b57cec5SDimitry Andric   kmp_int32 cg_thread_limit;
2413*0b57cec5SDimitry Andric   kmp_int32 cg_nthreads; // Count of active threads in CG rooted at cg_root
2414*0b57cec5SDimitry Andric   struct kmp_cg_root *up; // pointer to higher level CG root in list
2415*0b57cec5SDimitry Andric } kmp_cg_root_t;
2416*0b57cec5SDimitry Andric 
2417*0b57cec5SDimitry Andric // OpenMP thread data structures
2418*0b57cec5SDimitry Andric 
2419*0b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE kmp_base_info {
2420*0b57cec5SDimitry Andric   /* Start with the readonly data which is cache aligned and padded. This is
2421*0b57cec5SDimitry Andric      written before the thread starts working by the master. Uber masters may
2422*0b57cec5SDimitry Andric      update themselves later. Usage does not consider serialized regions.  */
2423*0b57cec5SDimitry Andric   kmp_desc_t th_info;
2424*0b57cec5SDimitry Andric   kmp_team_p *th_team; /* team we belong to */
2425*0b57cec5SDimitry Andric   kmp_root_p *th_root; /* pointer to root of task hierarchy */
2426*0b57cec5SDimitry Andric   kmp_info_p *th_next_pool; /* next available thread in the pool */
2427*0b57cec5SDimitry Andric   kmp_disp_t *th_dispatch; /* thread's dispatch data */
2428*0b57cec5SDimitry Andric   int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */
2429*0b57cec5SDimitry Andric 
2430*0b57cec5SDimitry Andric   /* The following are cached from the team info structure */
2431*0b57cec5SDimitry Andric   /* TODO use these in more places as determined to be needed via profiling */
2432*0b57cec5SDimitry Andric   int th_team_nproc; /* number of threads in a team */
2433*0b57cec5SDimitry Andric   kmp_info_p *th_team_master; /* the team's master thread */
2434*0b57cec5SDimitry Andric   int th_team_serialized; /* team is serialized */
2435*0b57cec5SDimitry Andric   microtask_t th_teams_microtask; /* save entry address for teams construct */
2436*0b57cec5SDimitry Andric   int th_teams_level; /* save initial level of teams construct */
2437*0b57cec5SDimitry Andric /* it is 0 on device but may be any on host */
2438*0b57cec5SDimitry Andric 
2439*0b57cec5SDimitry Andric /* The blocktime info is copied from the team struct to the thread sruct */
2440*0b57cec5SDimitry Andric /* at the start of a barrier, and the values stored in the team are used */
2441*0b57cec5SDimitry Andric /* at points in the code where the team struct is no longer guaranteed   */
2442*0b57cec5SDimitry Andric /* to exist (from the POV of worker threads).                            */
2443*0b57cec5SDimitry Andric #if KMP_USE_MONITOR
2444*0b57cec5SDimitry Andric   int th_team_bt_intervals;
2445*0b57cec5SDimitry Andric   int th_team_bt_set;
2446*0b57cec5SDimitry Andric #else
2447*0b57cec5SDimitry Andric   kmp_uint64 th_team_bt_intervals;
2448*0b57cec5SDimitry Andric #endif
2449*0b57cec5SDimitry Andric 
2450*0b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
2451*0b57cec5SDimitry Andric   kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */
2452*0b57cec5SDimitry Andric #endif
2453*0b57cec5SDimitry Andric   omp_allocator_handle_t th_def_allocator; /* default allocator */
2454*0b57cec5SDimitry Andric   /* The data set by the master at reinit, then R/W by the worker */
2455*0b57cec5SDimitry Andric   KMP_ALIGN_CACHE int
2456*0b57cec5SDimitry Andric       th_set_nproc; /* if > 0, then only use this request for the next fork */
2457*0b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS
2458*0b57cec5SDimitry Andric   kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */
2459*0b57cec5SDimitry Andric #endif
2460*0b57cec5SDimitry Andric   kmp_proc_bind_t
2461*0b57cec5SDimitry Andric       th_set_proc_bind; /* if != proc_bind_default, use request for next fork */
2462*0b57cec5SDimitry Andric   kmp_teams_size_t
2463*0b57cec5SDimitry Andric       th_teams_size; /* number of teams/threads in teams construct */
2464*0b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
2465*0b57cec5SDimitry Andric   int th_current_place; /* place currently bound to */
2466*0b57cec5SDimitry Andric   int th_new_place; /* place to bind to in par reg */
2467*0b57cec5SDimitry Andric   int th_first_place; /* first place in partition */
2468*0b57cec5SDimitry Andric   int th_last_place; /* last place in partition */
2469*0b57cec5SDimitry Andric #endif
2470*0b57cec5SDimitry Andric   int th_prev_level; /* previous level for affinity format */
2471*0b57cec5SDimitry Andric   int th_prev_num_threads; /* previous num_threads for affinity format */
2472*0b57cec5SDimitry Andric #if USE_ITT_BUILD
2473*0b57cec5SDimitry Andric   kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */
2474*0b57cec5SDimitry Andric   kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */
2475*0b57cec5SDimitry Andric   kmp_uint64 th_frame_time; /* frame timestamp */
2476*0b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
2477*0b57cec5SDimitry Andric   kmp_local_t th_local;
2478*0b57cec5SDimitry Andric   struct private_common *th_pri_head;
2479*0b57cec5SDimitry Andric 
2480*0b57cec5SDimitry Andric   /* Now the data only used by the worker (after initial allocation) */
2481*0b57cec5SDimitry Andric   /* TODO the first serial team should actually be stored in the info_t
2482*0b57cec5SDimitry Andric      structure.  this will help reduce initial allocation overhead */
2483*0b57cec5SDimitry Andric   KMP_ALIGN_CACHE kmp_team_p
2484*0b57cec5SDimitry Andric       *th_serial_team; /*serialized team held in reserve*/
2485*0b57cec5SDimitry Andric 
2486*0b57cec5SDimitry Andric #if OMPT_SUPPORT
2487*0b57cec5SDimitry Andric   ompt_thread_info_t ompt_thread_info;
2488*0b57cec5SDimitry Andric #endif
2489*0b57cec5SDimitry Andric 
2490*0b57cec5SDimitry Andric   /* The following are also read by the master during reinit */
2491*0b57cec5SDimitry Andric   struct common_table *th_pri_common;
2492*0b57cec5SDimitry Andric 
2493*0b57cec5SDimitry Andric   volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */
2494*0b57cec5SDimitry Andric   /* while awaiting queuing lock acquire */
2495*0b57cec5SDimitry Andric 
2496*0b57cec5SDimitry Andric   volatile void *th_sleep_loc; // this points at a kmp_flag<T>
2497*0b57cec5SDimitry Andric 
2498*0b57cec5SDimitry Andric   ident_t *th_ident;
2499*0b57cec5SDimitry Andric   unsigned th_x; // Random number generator data
2500*0b57cec5SDimitry Andric   unsigned th_a; // Random number generator data
2501*0b57cec5SDimitry Andric 
2502*0b57cec5SDimitry Andric   /* Tasking-related data for the thread */
2503*0b57cec5SDimitry Andric   kmp_task_team_t *th_task_team; // Task team struct
2504*0b57cec5SDimitry Andric   kmp_taskdata_t *th_current_task; // Innermost Task being executed
2505*0b57cec5SDimitry Andric   kmp_uint8 th_task_state; // alternating 0/1 for task team identification
2506*0b57cec5SDimitry Andric   kmp_uint8 *th_task_state_memo_stack; // Stack holding memos of th_task_state
2507*0b57cec5SDimitry Andric   // at nested levels
2508*0b57cec5SDimitry Andric   kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack
2509*0b57cec5SDimitry Andric   kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack
2510*0b57cec5SDimitry Andric   kmp_uint32 th_reap_state; // Non-zero indicates thread is not
2511*0b57cec5SDimitry Andric   // tasking, thus safe to reap
2512*0b57cec5SDimitry Andric 
2513*0b57cec5SDimitry Andric   /* More stuff for keeping track of active/sleeping threads (this part is
2514*0b57cec5SDimitry Andric      written by the worker thread) */
2515*0b57cec5SDimitry Andric   kmp_uint8 th_active_in_pool; // included in count of #active threads in pool
2516*0b57cec5SDimitry Andric   int th_active; // ! sleeping; 32 bits for TCR/TCW
2517*0b57cec5SDimitry Andric   struct cons_header *th_cons; // used for consistency check
2518*0b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
2519*0b57cec5SDimitry Andric   // used for hierarchical scheduling
2520*0b57cec5SDimitry Andric   kmp_hier_private_bdata_t *th_hier_bar_data;
2521*0b57cec5SDimitry Andric #endif
2522*0b57cec5SDimitry Andric 
2523*0b57cec5SDimitry Andric   /* Add the syncronizing data which is cache aligned and padded. */
2524*0b57cec5SDimitry Andric   KMP_ALIGN_CACHE kmp_balign_t th_bar[bs_last_barrier];
2525*0b57cec5SDimitry Andric 
2526*0b57cec5SDimitry Andric   KMP_ALIGN_CACHE volatile kmp_int32
2527*0b57cec5SDimitry Andric       th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */
2528*0b57cec5SDimitry Andric 
2529*0b57cec5SDimitry Andric #if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5)
2530*0b57cec5SDimitry Andric #define NUM_LISTS 4
2531*0b57cec5SDimitry Andric   kmp_free_list_t th_free_lists[NUM_LISTS]; // Free lists for fast memory
2532*0b57cec5SDimitry Andric // allocation routines
2533*0b57cec5SDimitry Andric #endif
2534*0b57cec5SDimitry Andric 
2535*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS
2536*0b57cec5SDimitry Andric   kmp_win32_cond_t th_suspend_cv;
2537*0b57cec5SDimitry Andric   kmp_win32_mutex_t th_suspend_mx;
2538*0b57cec5SDimitry Andric   std::atomic<int> th_suspend_init;
2539*0b57cec5SDimitry Andric #endif
2540*0b57cec5SDimitry Andric #if KMP_OS_UNIX
2541*0b57cec5SDimitry Andric   kmp_cond_align_t th_suspend_cv;
2542*0b57cec5SDimitry Andric   kmp_mutex_align_t th_suspend_mx;
2543*0b57cec5SDimitry Andric   std::atomic<int> th_suspend_init_count;
2544*0b57cec5SDimitry Andric #endif
2545*0b57cec5SDimitry Andric 
2546*0b57cec5SDimitry Andric #if USE_ITT_BUILD
2547*0b57cec5SDimitry Andric   kmp_itt_mark_t th_itt_mark_single;
2548*0b57cec5SDimitry Andric // alignment ???
2549*0b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
2550*0b57cec5SDimitry Andric #if KMP_STATS_ENABLED
2551*0b57cec5SDimitry Andric   kmp_stats_list *th_stats;
2552*0b57cec5SDimitry Andric #endif
2553*0b57cec5SDimitry Andric #if KMP_OS_UNIX
2554*0b57cec5SDimitry Andric   std::atomic<bool> th_blocking;
2555*0b57cec5SDimitry Andric #endif
2556*0b57cec5SDimitry Andric   kmp_cg_root_t *th_cg_roots; // list of cg_roots associated with this thread
2557*0b57cec5SDimitry Andric } kmp_base_info_t;
2558*0b57cec5SDimitry Andric 
2559*0b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_info {
2560*0b57cec5SDimitry Andric   double th_align; /* use worst case alignment */
2561*0b57cec5SDimitry Andric   char th_pad[KMP_PAD(kmp_base_info_t, CACHE_LINE)];
2562*0b57cec5SDimitry Andric   kmp_base_info_t th;
2563*0b57cec5SDimitry Andric } kmp_info_t;
2564*0b57cec5SDimitry Andric 
2565*0b57cec5SDimitry Andric // OpenMP thread team data structures
2566*0b57cec5SDimitry Andric 
2567*0b57cec5SDimitry Andric typedef struct kmp_base_data { volatile kmp_uint32 t_value; } kmp_base_data_t;
2568*0b57cec5SDimitry Andric 
2569*0b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_sleep_team {
2570*0b57cec5SDimitry Andric   double dt_align; /* use worst case alignment */
2571*0b57cec5SDimitry Andric   char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
2572*0b57cec5SDimitry Andric   kmp_base_data_t dt;
2573*0b57cec5SDimitry Andric } kmp_sleep_team_t;
2574*0b57cec5SDimitry Andric 
2575*0b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_ordered_team {
2576*0b57cec5SDimitry Andric   double dt_align; /* use worst case alignment */
2577*0b57cec5SDimitry Andric   char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
2578*0b57cec5SDimitry Andric   kmp_base_data_t dt;
2579*0b57cec5SDimitry Andric } kmp_ordered_team_t;
2580*0b57cec5SDimitry Andric 
2581*0b57cec5SDimitry Andric typedef int (*launch_t)(int gtid);
2582*0b57cec5SDimitry Andric 
2583*0b57cec5SDimitry Andric /* Minimum number of ARGV entries to malloc if necessary */
2584*0b57cec5SDimitry Andric #define KMP_MIN_MALLOC_ARGV_ENTRIES 100
2585*0b57cec5SDimitry Andric 
2586*0b57cec5SDimitry Andric // Set up how many argv pointers will fit in cache lines containing
2587*0b57cec5SDimitry Andric // t_inline_argv. Historically, we have supported at least 96 bytes. Using a
2588*0b57cec5SDimitry Andric // larger value for more space between the master write/worker read section and
2589*0b57cec5SDimitry Andric // read/write by all section seems to buy more performance on EPCC PARALLEL.
2590*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2591*0b57cec5SDimitry Andric #define KMP_INLINE_ARGV_BYTES                                                  \
2592*0b57cec5SDimitry Andric   (4 * CACHE_LINE -                                                            \
2593*0b57cec5SDimitry Andric    ((3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) +               \
2594*0b57cec5SDimitry Andric      sizeof(kmp_int16) + sizeof(kmp_uint32)) %                                 \
2595*0b57cec5SDimitry Andric     CACHE_LINE))
2596*0b57cec5SDimitry Andric #else
2597*0b57cec5SDimitry Andric #define KMP_INLINE_ARGV_BYTES                                                  \
2598*0b57cec5SDimitry Andric   (2 * CACHE_LINE - ((3 * KMP_PTR_SKIP + 2 * sizeof(int)) % CACHE_LINE))
2599*0b57cec5SDimitry Andric #endif
2600*0b57cec5SDimitry Andric #define KMP_INLINE_ARGV_ENTRIES (int)(KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP)
2601*0b57cec5SDimitry Andric 
2602*0b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE kmp_base_team {
2603*0b57cec5SDimitry Andric   // Synchronization Data
2604*0b57cec5SDimitry Andric   // ---------------------------------------------------------------------------
2605*0b57cec5SDimitry Andric   KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered;
2606*0b57cec5SDimitry Andric   kmp_balign_team_t t_bar[bs_last_barrier];
2607*0b57cec5SDimitry Andric   std::atomic<int> t_construct; // count of single directive encountered by team
2608*0b57cec5SDimitry Andric   char pad[sizeof(kmp_lock_t)]; // padding to maintain performance on big iron
2609*0b57cec5SDimitry Andric 
2610*0b57cec5SDimitry Andric   // [0] - parallel / [1] - worksharing task reduction data shared by taskgroups
2611*0b57cec5SDimitry Andric   std::atomic<void *> t_tg_reduce_data[2]; // to support task modifier
2612*0b57cec5SDimitry Andric   std::atomic<int> t_tg_fini_counter[2]; // sync end of task reductions
2613*0b57cec5SDimitry Andric 
2614*0b57cec5SDimitry Andric   // Master only
2615*0b57cec5SDimitry Andric   // ---------------------------------------------------------------------------
2616*0b57cec5SDimitry Andric   KMP_ALIGN_CACHE int t_master_tid; // tid of master in parent team
2617*0b57cec5SDimitry Andric   int t_master_this_cons; // "this_construct" single counter of master in parent
2618*0b57cec5SDimitry Andric   // team
2619*0b57cec5SDimitry Andric   ident_t *t_ident; // if volatile, have to change too much other crud to
2620*0b57cec5SDimitry Andric   // volatile too
2621*0b57cec5SDimitry Andric   kmp_team_p *t_parent; // parent team
2622*0b57cec5SDimitry Andric   kmp_team_p *t_next_pool; // next free team in the team pool
2623*0b57cec5SDimitry Andric   kmp_disp_t *t_dispatch; // thread's dispatch data
2624*0b57cec5SDimitry Andric   kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2
2625*0b57cec5SDimitry Andric   kmp_proc_bind_t t_proc_bind; // bind type for par region
2626*0b57cec5SDimitry Andric #if USE_ITT_BUILD
2627*0b57cec5SDimitry Andric   kmp_uint64 t_region_time; // region begin timestamp
2628*0b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
2629*0b57cec5SDimitry Andric 
2630*0b57cec5SDimitry Andric   // Master write, workers read
2631*0b57cec5SDimitry Andric   // --------------------------------------------------------------------------
2632*0b57cec5SDimitry Andric   KMP_ALIGN_CACHE void **t_argv;
2633*0b57cec5SDimitry Andric   int t_argc;
2634*0b57cec5SDimitry Andric   int t_nproc; // number of threads in team
2635*0b57cec5SDimitry Andric   microtask_t t_pkfn;
2636*0b57cec5SDimitry Andric   launch_t t_invoke; // procedure to launch the microtask
2637*0b57cec5SDimitry Andric 
2638*0b57cec5SDimitry Andric #if OMPT_SUPPORT
2639*0b57cec5SDimitry Andric   ompt_team_info_t ompt_team_info;
2640*0b57cec5SDimitry Andric   ompt_lw_taskteam_t *ompt_serialized_team_info;
2641*0b57cec5SDimitry Andric #endif
2642*0b57cec5SDimitry Andric 
2643*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2644*0b57cec5SDimitry Andric   kmp_int8 t_fp_control_saved;
2645*0b57cec5SDimitry Andric   kmp_int8 t_pad2b;
2646*0b57cec5SDimitry Andric   kmp_int16 t_x87_fpu_control_word; // FP control regs
2647*0b57cec5SDimitry Andric   kmp_uint32 t_mxcsr;
2648*0b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
2649*0b57cec5SDimitry Andric 
2650*0b57cec5SDimitry Andric   void *t_inline_argv[KMP_INLINE_ARGV_ENTRIES];
2651*0b57cec5SDimitry Andric 
2652*0b57cec5SDimitry Andric   KMP_ALIGN_CACHE kmp_info_t **t_threads;
2653*0b57cec5SDimitry Andric   kmp_taskdata_t
2654*0b57cec5SDimitry Andric       *t_implicit_task_taskdata; // Taskdata for the thread's implicit task
2655*0b57cec5SDimitry Andric   int t_level; // nested parallel level
2656*0b57cec5SDimitry Andric 
2657*0b57cec5SDimitry Andric   KMP_ALIGN_CACHE int t_max_argc;
2658*0b57cec5SDimitry Andric   int t_max_nproc; // max threads this team can handle (dynamicly expandable)
2659*0b57cec5SDimitry Andric   int t_serialized; // levels deep of serialized teams
2660*0b57cec5SDimitry Andric   dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system
2661*0b57cec5SDimitry Andric   int t_id; // team's id, assigned by debugger.
2662*0b57cec5SDimitry Andric   int t_active_level; // nested active parallel level
2663*0b57cec5SDimitry Andric   kmp_r_sched_t t_sched; // run-time schedule for the team
2664*0b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
2665*0b57cec5SDimitry Andric   int t_first_place; // first & last place in parent thread's partition.
2666*0b57cec5SDimitry Andric   int t_last_place; // Restore these values to master after par region.
2667*0b57cec5SDimitry Andric #endif // KMP_AFFINITY_SUPPORTED
2668*0b57cec5SDimitry Andric   int t_display_affinity;
2669*0b57cec5SDimitry Andric   int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via
2670*0b57cec5SDimitry Andric   // omp_set_num_threads() call
2671*0b57cec5SDimitry Andric   omp_allocator_handle_t t_def_allocator; /* default allocator */
2672*0b57cec5SDimitry Andric 
2673*0b57cec5SDimitry Andric // Read/write by workers as well
2674*0b57cec5SDimitry Andric #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
2675*0b57cec5SDimitry Andric   // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf
2676*0b57cec5SDimitry Andric   // regression of epcc 'parallel' and 'barrier' on fxe256lin01. This extra
2677*0b57cec5SDimitry Andric   // padding serves to fix the performance of epcc 'parallel' and 'barrier' when
2678*0b57cec5SDimitry Andric   // CACHE_LINE=64. TODO: investigate more and get rid if this padding.
2679*0b57cec5SDimitry Andric   char dummy_padding[1024];
2680*0b57cec5SDimitry Andric #endif
2681*0b57cec5SDimitry Andric   // Internal control stack for additional nested teams.
2682*0b57cec5SDimitry Andric   KMP_ALIGN_CACHE kmp_internal_control_t *t_control_stack_top;
2683*0b57cec5SDimitry Andric   // for SERIALIZED teams nested 2 or more levels deep
2684*0b57cec5SDimitry Andric   // typed flag to store request state of cancellation
2685*0b57cec5SDimitry Andric   std::atomic<kmp_int32> t_cancel_request;
2686*0b57cec5SDimitry Andric   int t_master_active; // save on fork, restore on join
2687*0b57cec5SDimitry Andric   void *t_copypriv_data; // team specific pointer to copyprivate data array
2688*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS
2689*0b57cec5SDimitry Andric   std::atomic<kmp_uint32> t_copyin_counter;
2690*0b57cec5SDimitry Andric #endif
2691*0b57cec5SDimitry Andric #if USE_ITT_BUILD
2692*0b57cec5SDimitry Andric   void *t_stack_id; // team specific stack stitching id (for ittnotify)
2693*0b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
2694*0b57cec5SDimitry Andric } kmp_base_team_t;
2695*0b57cec5SDimitry Andric 
2696*0b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_team {
2697*0b57cec5SDimitry Andric   kmp_base_team_t t;
2698*0b57cec5SDimitry Andric   double t_align; /* use worst case alignment */
2699*0b57cec5SDimitry Andric   char t_pad[KMP_PAD(kmp_base_team_t, CACHE_LINE)];
2700*0b57cec5SDimitry Andric };
2701*0b57cec5SDimitry Andric 
2702*0b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_time_global {
2703*0b57cec5SDimitry Andric   double dt_align; /* use worst case alignment */
2704*0b57cec5SDimitry Andric   char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
2705*0b57cec5SDimitry Andric   kmp_base_data_t dt;
2706*0b57cec5SDimitry Andric } kmp_time_global_t;
2707*0b57cec5SDimitry Andric 
2708*0b57cec5SDimitry Andric typedef struct kmp_base_global {
2709*0b57cec5SDimitry Andric   /* cache-aligned */
2710*0b57cec5SDimitry Andric   kmp_time_global_t g_time;
2711*0b57cec5SDimitry Andric 
2712*0b57cec5SDimitry Andric   /* non cache-aligned */
2713*0b57cec5SDimitry Andric   volatile int g_abort;
2714*0b57cec5SDimitry Andric   volatile int g_done;
2715*0b57cec5SDimitry Andric 
2716*0b57cec5SDimitry Andric   int g_dynamic;
2717*0b57cec5SDimitry Andric   enum dynamic_mode g_dynamic_mode;
2718*0b57cec5SDimitry Andric } kmp_base_global_t;
2719*0b57cec5SDimitry Andric 
2720*0b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_global {
2721*0b57cec5SDimitry Andric   kmp_base_global_t g;
2722*0b57cec5SDimitry Andric   double g_align; /* use worst case alignment */
2723*0b57cec5SDimitry Andric   char g_pad[KMP_PAD(kmp_base_global_t, CACHE_LINE)];
2724*0b57cec5SDimitry Andric } kmp_global_t;
2725*0b57cec5SDimitry Andric 
2726*0b57cec5SDimitry Andric typedef struct kmp_base_root {
2727*0b57cec5SDimitry Andric   // TODO: GEH - combine r_active with r_in_parallel then r_active ==
2728*0b57cec5SDimitry Andric   // (r_in_parallel>= 0)
2729*0b57cec5SDimitry Andric   // TODO: GEH - then replace r_active with t_active_levels if we can to reduce
2730*0b57cec5SDimitry Andric   // the synch overhead or keeping r_active
2731*0b57cec5SDimitry Andric   volatile int r_active; /* TRUE if some region in a nest has > 1 thread */
2732*0b57cec5SDimitry Andric   // keeps a count of active parallel regions per root
2733*0b57cec5SDimitry Andric   std::atomic<int> r_in_parallel;
2734*0b57cec5SDimitry Andric   // GEH: This is misnamed, should be r_active_levels
2735*0b57cec5SDimitry Andric   kmp_team_t *r_root_team;
2736*0b57cec5SDimitry Andric   kmp_team_t *r_hot_team;
2737*0b57cec5SDimitry Andric   kmp_info_t *r_uber_thread;
2738*0b57cec5SDimitry Andric   kmp_lock_t r_begin_lock;
2739*0b57cec5SDimitry Andric   volatile int r_begin;
2740*0b57cec5SDimitry Andric   int r_blocktime; /* blocktime for this root and descendants */
2741*0b57cec5SDimitry Andric } kmp_base_root_t;
2742*0b57cec5SDimitry Andric 
2743*0b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_root {
2744*0b57cec5SDimitry Andric   kmp_base_root_t r;
2745*0b57cec5SDimitry Andric   double r_align; /* use worst case alignment */
2746*0b57cec5SDimitry Andric   char r_pad[KMP_PAD(kmp_base_root_t, CACHE_LINE)];
2747*0b57cec5SDimitry Andric } kmp_root_t;
2748*0b57cec5SDimitry Andric 
2749*0b57cec5SDimitry Andric struct fortran_inx_info {
2750*0b57cec5SDimitry Andric   kmp_int32 data;
2751*0b57cec5SDimitry Andric };
2752*0b57cec5SDimitry Andric 
2753*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
2754*0b57cec5SDimitry Andric 
2755*0b57cec5SDimitry Andric extern int __kmp_settings;
2756*0b57cec5SDimitry Andric extern int __kmp_duplicate_library_ok;
2757*0b57cec5SDimitry Andric #if USE_ITT_BUILD
2758*0b57cec5SDimitry Andric extern int __kmp_forkjoin_frames;
2759*0b57cec5SDimitry Andric extern int __kmp_forkjoin_frames_mode;
2760*0b57cec5SDimitry Andric #endif
2761*0b57cec5SDimitry Andric extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method;
2762*0b57cec5SDimitry Andric extern int __kmp_determ_red;
2763*0b57cec5SDimitry Andric 
2764*0b57cec5SDimitry Andric #ifdef KMP_DEBUG
2765*0b57cec5SDimitry Andric extern int kmp_a_debug;
2766*0b57cec5SDimitry Andric extern int kmp_b_debug;
2767*0b57cec5SDimitry Andric extern int kmp_c_debug;
2768*0b57cec5SDimitry Andric extern int kmp_d_debug;
2769*0b57cec5SDimitry Andric extern int kmp_e_debug;
2770*0b57cec5SDimitry Andric extern int kmp_f_debug;
2771*0b57cec5SDimitry Andric #endif /* KMP_DEBUG */
2772*0b57cec5SDimitry Andric 
2773*0b57cec5SDimitry Andric /* For debug information logging using rotating buffer */
2774*0b57cec5SDimitry Andric #define KMP_DEBUG_BUF_LINES_INIT 512
2775*0b57cec5SDimitry Andric #define KMP_DEBUG_BUF_LINES_MIN 1
2776*0b57cec5SDimitry Andric 
2777*0b57cec5SDimitry Andric #define KMP_DEBUG_BUF_CHARS_INIT 128
2778*0b57cec5SDimitry Andric #define KMP_DEBUG_BUF_CHARS_MIN 2
2779*0b57cec5SDimitry Andric 
2780*0b57cec5SDimitry Andric extern int
2781*0b57cec5SDimitry Andric     __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */
2782*0b57cec5SDimitry Andric extern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */
2783*0b57cec5SDimitry Andric extern int
2784*0b57cec5SDimitry Andric     __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */
2785*0b57cec5SDimitry Andric extern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer
2786*0b57cec5SDimitry Andric                                       entry pointer */
2787*0b57cec5SDimitry Andric 
2788*0b57cec5SDimitry Andric extern char *__kmp_debug_buffer; /* Debug buffer itself */
2789*0b57cec5SDimitry Andric extern std::atomic<int> __kmp_debug_count; /* Counter for number of lines
2790*0b57cec5SDimitry Andric                                               printed in buffer so far */
2791*0b57cec5SDimitry Andric extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase
2792*0b57cec5SDimitry Andric                                           recommended in warnings */
2793*0b57cec5SDimitry Andric /* end rotating debug buffer */
2794*0b57cec5SDimitry Andric 
2795*0b57cec5SDimitry Andric #ifdef KMP_DEBUG
2796*0b57cec5SDimitry Andric extern int __kmp_par_range; /* +1 => only go par for constructs in range */
2797*0b57cec5SDimitry Andric 
2798*0b57cec5SDimitry Andric #define KMP_PAR_RANGE_ROUTINE_LEN 1024
2799*0b57cec5SDimitry Andric extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN];
2800*0b57cec5SDimitry Andric #define KMP_PAR_RANGE_FILENAME_LEN 1024
2801*0b57cec5SDimitry Andric extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN];
2802*0b57cec5SDimitry Andric extern int __kmp_par_range_lb;
2803*0b57cec5SDimitry Andric extern int __kmp_par_range_ub;
2804*0b57cec5SDimitry Andric #endif
2805*0b57cec5SDimitry Andric 
2806*0b57cec5SDimitry Andric /* For printing out dynamic storage map for threads and teams */
2807*0b57cec5SDimitry Andric extern int
2808*0b57cec5SDimitry Andric     __kmp_storage_map; /* True means print storage map for threads and teams */
2809*0b57cec5SDimitry Andric extern int __kmp_storage_map_verbose; /* True means storage map includes
2810*0b57cec5SDimitry Andric                                          placement info */
2811*0b57cec5SDimitry Andric extern int __kmp_storage_map_verbose_specified;
2812*0b57cec5SDimitry Andric 
2813*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2814*0b57cec5SDimitry Andric extern kmp_cpuinfo_t __kmp_cpuinfo;
2815*0b57cec5SDimitry Andric #endif
2816*0b57cec5SDimitry Andric 
2817*0b57cec5SDimitry Andric extern volatile int __kmp_init_serial;
2818*0b57cec5SDimitry Andric extern volatile int __kmp_init_gtid;
2819*0b57cec5SDimitry Andric extern volatile int __kmp_init_common;
2820*0b57cec5SDimitry Andric extern volatile int __kmp_init_middle;
2821*0b57cec5SDimitry Andric extern volatile int __kmp_init_parallel;
2822*0b57cec5SDimitry Andric #if KMP_USE_MONITOR
2823*0b57cec5SDimitry Andric extern volatile int __kmp_init_monitor;
2824*0b57cec5SDimitry Andric #endif
2825*0b57cec5SDimitry Andric extern volatile int __kmp_init_user_locks;
2826*0b57cec5SDimitry Andric extern int __kmp_init_counter;
2827*0b57cec5SDimitry Andric extern int __kmp_root_counter;
2828*0b57cec5SDimitry Andric extern int __kmp_version;
2829*0b57cec5SDimitry Andric 
2830*0b57cec5SDimitry Andric /* list of address of allocated caches for commons */
2831*0b57cec5SDimitry Andric extern kmp_cached_addr_t *__kmp_threadpriv_cache_list;
2832*0b57cec5SDimitry Andric 
2833*0b57cec5SDimitry Andric /* Barrier algorithm types and options */
2834*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_barrier_gather_bb_dflt;
2835*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_barrier_release_bb_dflt;
2836*0b57cec5SDimitry Andric extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt;
2837*0b57cec5SDimitry Andric extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt;
2838*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier];
2839*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier];
2840*0b57cec5SDimitry Andric extern kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier];
2841*0b57cec5SDimitry Andric extern kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier];
2842*0b57cec5SDimitry Andric extern char const *__kmp_barrier_branch_bit_env_name[bs_last_barrier];
2843*0b57cec5SDimitry Andric extern char const *__kmp_barrier_pattern_env_name[bs_last_barrier];
2844*0b57cec5SDimitry Andric extern char const *__kmp_barrier_type_name[bs_last_barrier];
2845*0b57cec5SDimitry Andric extern char const *__kmp_barrier_pattern_name[bp_last_bar];
2846*0b57cec5SDimitry Andric 
2847*0b57cec5SDimitry Andric /* Global Locks */
2848*0b57cec5SDimitry Andric extern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */
2849*0b57cec5SDimitry Andric extern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */
2850*0b57cec5SDimitry Andric extern kmp_bootstrap_lock_t __kmp_task_team_lock;
2851*0b57cec5SDimitry Andric extern kmp_bootstrap_lock_t
2852*0b57cec5SDimitry Andric     __kmp_exit_lock; /* exit() is not always thread-safe */
2853*0b57cec5SDimitry Andric #if KMP_USE_MONITOR
2854*0b57cec5SDimitry Andric extern kmp_bootstrap_lock_t
2855*0b57cec5SDimitry Andric     __kmp_monitor_lock; /* control monitor thread creation */
2856*0b57cec5SDimitry Andric #endif
2857*0b57cec5SDimitry Andric extern kmp_bootstrap_lock_t
2858*0b57cec5SDimitry Andric     __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and
2859*0b57cec5SDimitry Andric                              __kmp_threads expansion to co-exist */
2860*0b57cec5SDimitry Andric 
2861*0b57cec5SDimitry Andric extern kmp_lock_t __kmp_global_lock; /* control OS/global access  */
2862*0b57cec5SDimitry Andric extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access  */
2863*0b57cec5SDimitry Andric extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */
2864*0b57cec5SDimitry Andric 
2865*0b57cec5SDimitry Andric extern enum library_type __kmp_library;
2866*0b57cec5SDimitry Andric 
2867*0b57cec5SDimitry Andric extern enum sched_type __kmp_sched; /* default runtime scheduling */
2868*0b57cec5SDimitry Andric extern enum sched_type __kmp_static; /* default static scheduling method */
2869*0b57cec5SDimitry Andric extern enum sched_type __kmp_guided; /* default guided scheduling method */
2870*0b57cec5SDimitry Andric extern enum sched_type __kmp_auto; /* default auto scheduling method */
2871*0b57cec5SDimitry Andric extern int __kmp_chunk; /* default runtime chunk size */
2872*0b57cec5SDimitry Andric 
2873*0b57cec5SDimitry Andric extern size_t __kmp_stksize; /* stack size per thread         */
2874*0b57cec5SDimitry Andric #if KMP_USE_MONITOR
2875*0b57cec5SDimitry Andric extern size_t __kmp_monitor_stksize; /* stack size for monitor thread */
2876*0b57cec5SDimitry Andric #endif
2877*0b57cec5SDimitry Andric extern size_t __kmp_stkoffset; /* stack offset per thread       */
2878*0b57cec5SDimitry Andric extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */
2879*0b57cec5SDimitry Andric 
2880*0b57cec5SDimitry Andric extern size_t
2881*0b57cec5SDimitry Andric     __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */
2882*0b57cec5SDimitry Andric extern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */
2883*0b57cec5SDimitry Andric extern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */
2884*0b57cec5SDimitry Andric extern int __kmp_env_checks; /* was KMP_CHECKS specified?    */
2885*0b57cec5SDimitry Andric extern int __kmp_env_consistency_check; // was KMP_CONSISTENCY_CHECK specified?
2886*0b57cec5SDimitry Andric extern int __kmp_generate_warnings; /* should we issue warnings? */
2887*0b57cec5SDimitry Andric extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */
2888*0b57cec5SDimitry Andric 
2889*0b57cec5SDimitry Andric #ifdef DEBUG_SUSPEND
2890*0b57cec5SDimitry Andric extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */
2891*0b57cec5SDimitry Andric #endif
2892*0b57cec5SDimitry Andric 
2893*0b57cec5SDimitry Andric extern kmp_int32 __kmp_use_yield;
2894*0b57cec5SDimitry Andric extern kmp_int32 __kmp_use_yield_exp_set;
2895*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_yield_init;
2896*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_yield_next;
2897*0b57cec5SDimitry Andric 
2898*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------- */
2899*0b57cec5SDimitry Andric extern int __kmp_allThreadsSpecified;
2900*0b57cec5SDimitry Andric 
2901*0b57cec5SDimitry Andric extern size_t __kmp_align_alloc;
2902*0b57cec5SDimitry Andric /* following data protected by initialization routines */
2903*0b57cec5SDimitry Andric extern int __kmp_xproc; /* number of processors in the system */
2904*0b57cec5SDimitry Andric extern int __kmp_avail_proc; /* number of processors available to the process */
2905*0b57cec5SDimitry Andric extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */
2906*0b57cec5SDimitry Andric extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */
2907*0b57cec5SDimitry Andric // maximum total number of concurrently-existing threads on device
2908*0b57cec5SDimitry Andric extern int __kmp_max_nth;
2909*0b57cec5SDimitry Andric // maximum total number of concurrently-existing threads in a contention group
2910*0b57cec5SDimitry Andric extern int __kmp_cg_max_nth;
2911*0b57cec5SDimitry Andric extern int __kmp_teams_max_nth; // max threads used in a teams construct
2912*0b57cec5SDimitry Andric extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and
2913*0b57cec5SDimitry Andric                                       __kmp_root */
2914*0b57cec5SDimitry Andric extern int __kmp_dflt_team_nth; /* default number of threads in a parallel
2915*0b57cec5SDimitry Andric                                    region a la OMP_NUM_THREADS */
2916*0b57cec5SDimitry Andric extern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial
2917*0b57cec5SDimitry Andric                                       initialization */
2918*0b57cec5SDimitry Andric extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is
2919*0b57cec5SDimitry Andric                                  used (fixed) */
2920*0b57cec5SDimitry Andric extern int __kmp_tp_cached; /* whether threadprivate cache has been created
2921*0b57cec5SDimitry Andric                                (__kmpc_threadprivate_cached()) */
2922*0b57cec5SDimitry Andric extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before
2923*0b57cec5SDimitry Andric                                     blocking (env setting) */
2924*0b57cec5SDimitry Andric #if KMP_USE_MONITOR
2925*0b57cec5SDimitry Andric extern int
2926*0b57cec5SDimitry Andric     __kmp_monitor_wakeups; /* number of times monitor wakes up per second */
2927*0b57cec5SDimitry Andric extern int __kmp_bt_intervals; /* number of monitor timestamp intervals before
2928*0b57cec5SDimitry Andric                                   blocking */
2929*0b57cec5SDimitry Andric #endif
2930*0b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME
2931*0b57cec5SDimitry Andric extern int __kmp_zero_bt; /* whether blocktime has been forced to zero */
2932*0b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */
2933*0b57cec5SDimitry Andric #ifdef KMP_DFLT_NTH_CORES
2934*0b57cec5SDimitry Andric extern int __kmp_ncores; /* Total number of cores for threads placement */
2935*0b57cec5SDimitry Andric #endif
2936*0b57cec5SDimitry Andric /* Number of millisecs to delay on abort for Intel(R) VTune(TM) tools */
2937*0b57cec5SDimitry Andric extern int __kmp_abort_delay;
2938*0b57cec5SDimitry Andric 
2939*0b57cec5SDimitry Andric extern int __kmp_need_register_atfork_specified;
2940*0b57cec5SDimitry Andric extern int
2941*0b57cec5SDimitry Andric     __kmp_need_register_atfork; /* At initialization, call pthread_atfork to
2942*0b57cec5SDimitry Andric                                    install fork handler */
2943*0b57cec5SDimitry Andric extern int __kmp_gtid_mode; /* Method of getting gtid, values:
2944*0b57cec5SDimitry Andric                                0 - not set, will be set at runtime
2945*0b57cec5SDimitry Andric                                1 - using stack search
2946*0b57cec5SDimitry Andric                                2 - dynamic TLS (pthread_getspecific(Linux* OS/OS
2947*0b57cec5SDimitry Andric                                    X*) or TlsGetValue(Windows* OS))
2948*0b57cec5SDimitry Andric                                3 - static TLS (__declspec(thread) __kmp_gtid),
2949*0b57cec5SDimitry Andric                                    Linux* OS .so only.  */
2950*0b57cec5SDimitry Andric extern int
2951*0b57cec5SDimitry Andric     __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */
2952*0b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID
2953*0b57cec5SDimitry Andric extern KMP_THREAD_LOCAL int __kmp_gtid;
2954*0b57cec5SDimitry Andric #endif
2955*0b57cec5SDimitry Andric extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */
2956*0b57cec5SDimitry Andric extern int __kmp_foreign_tp; // If true, separate TP var for each foreign thread
2957*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2958*0b57cec5SDimitry Andric extern int __kmp_inherit_fp_control; // copy fp creg(s) parent->workers at fork
2959*0b57cec5SDimitry Andric extern kmp_int16 __kmp_init_x87_fpu_control_word; // init thread's FP ctrl reg
2960*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */
2961*0b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
2962*0b57cec5SDimitry Andric 
2963*0b57cec5SDimitry Andric // max_active_levels for nested parallelism enabled by default via
2964*0b57cec5SDimitry Andric // OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS, and OMP_PROC_BIND
2965*0b57cec5SDimitry Andric extern int __kmp_dflt_max_active_levels;
2966*0b57cec5SDimitry Andric // Indicates whether value of __kmp_dflt_max_active_levels was already
2967*0b57cec5SDimitry Andric // explicitly set by OMP_MAX_ACTIVE_LEVELS or OMP_NESTED=false
2968*0b57cec5SDimitry Andric extern bool __kmp_dflt_max_active_levels_set;
2969*0b57cec5SDimitry Andric extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in
2970*0b57cec5SDimitry Andric                                           concurrent execution per team */
2971*0b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS
2972*0b57cec5SDimitry Andric extern int __kmp_hot_teams_mode;
2973*0b57cec5SDimitry Andric extern int __kmp_hot_teams_max_level;
2974*0b57cec5SDimitry Andric #endif
2975*0b57cec5SDimitry Andric 
2976*0b57cec5SDimitry Andric #if KMP_OS_LINUX
2977*0b57cec5SDimitry Andric extern enum clock_function_type __kmp_clock_function;
2978*0b57cec5SDimitry Andric extern int __kmp_clock_function_param;
2979*0b57cec5SDimitry Andric #endif /* KMP_OS_LINUX */
2980*0b57cec5SDimitry Andric 
2981*0b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED
2982*0b57cec5SDimitry Andric extern enum mic_type __kmp_mic_type;
2983*0b57cec5SDimitry Andric #endif
2984*0b57cec5SDimitry Andric 
2985*0b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE
2986*0b57cec5SDimitry Andric extern double __kmp_load_balance_interval; // load balance algorithm interval
2987*0b57cec5SDimitry Andric #endif /* USE_LOAD_BALANCE */
2988*0b57cec5SDimitry Andric 
2989*0b57cec5SDimitry Andric // OpenMP 3.1 - Nested num threads array
2990*0b57cec5SDimitry Andric typedef struct kmp_nested_nthreads_t {
2991*0b57cec5SDimitry Andric   int *nth;
2992*0b57cec5SDimitry Andric   int size;
2993*0b57cec5SDimitry Andric   int used;
2994*0b57cec5SDimitry Andric } kmp_nested_nthreads_t;
2995*0b57cec5SDimitry Andric 
2996*0b57cec5SDimitry Andric extern kmp_nested_nthreads_t __kmp_nested_nth;
2997*0b57cec5SDimitry Andric 
2998*0b57cec5SDimitry Andric #if KMP_USE_ADAPTIVE_LOCKS
2999*0b57cec5SDimitry Andric 
3000*0b57cec5SDimitry Andric // Parameters for the speculative lock backoff system.
3001*0b57cec5SDimitry Andric struct kmp_adaptive_backoff_params_t {
3002*0b57cec5SDimitry Andric   // Number of soft retries before it counts as a hard retry.
3003*0b57cec5SDimitry Andric   kmp_uint32 max_soft_retries;
3004*0b57cec5SDimitry Andric   // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to
3005*0b57cec5SDimitry Andric   // the right
3006*0b57cec5SDimitry Andric   kmp_uint32 max_badness;
3007*0b57cec5SDimitry Andric };
3008*0b57cec5SDimitry Andric 
3009*0b57cec5SDimitry Andric extern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params;
3010*0b57cec5SDimitry Andric 
3011*0b57cec5SDimitry Andric #if KMP_DEBUG_ADAPTIVE_LOCKS
3012*0b57cec5SDimitry Andric extern const char *__kmp_speculative_statsfile;
3013*0b57cec5SDimitry Andric #endif
3014*0b57cec5SDimitry Andric 
3015*0b57cec5SDimitry Andric #endif // KMP_USE_ADAPTIVE_LOCKS
3016*0b57cec5SDimitry Andric 
3017*0b57cec5SDimitry Andric extern int __kmp_display_env; /* TRUE or FALSE */
3018*0b57cec5SDimitry Andric extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */
3019*0b57cec5SDimitry Andric extern int __kmp_omp_cancellation; /* TRUE or FALSE */
3020*0b57cec5SDimitry Andric 
3021*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------- */
3022*0b57cec5SDimitry Andric 
3023*0b57cec5SDimitry Andric /* the following are protected by the fork/join lock */
3024*0b57cec5SDimitry Andric /* write: lock  read: anytime */
3025*0b57cec5SDimitry Andric extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */
3026*0b57cec5SDimitry Andric /* read/write: lock */
3027*0b57cec5SDimitry Andric extern volatile kmp_team_t *__kmp_team_pool;
3028*0b57cec5SDimitry Andric extern volatile kmp_info_t *__kmp_thread_pool;
3029*0b57cec5SDimitry Andric extern kmp_info_t *__kmp_thread_pool_insert_pt;
3030*0b57cec5SDimitry Andric 
3031*0b57cec5SDimitry Andric // total num threads reachable from some root thread including all root threads
3032*0b57cec5SDimitry Andric extern volatile int __kmp_nth;
3033*0b57cec5SDimitry Andric /* total number of threads reachable from some root thread including all root
3034*0b57cec5SDimitry Andric    threads, and those in the thread pool */
3035*0b57cec5SDimitry Andric extern volatile int __kmp_all_nth;
3036*0b57cec5SDimitry Andric extern std::atomic<int> __kmp_thread_pool_active_nth;
3037*0b57cec5SDimitry Andric 
3038*0b57cec5SDimitry Andric extern kmp_root_t **__kmp_root; /* root of thread hierarchy */
3039*0b57cec5SDimitry Andric /* end data protected by fork/join lock */
3040*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------- */
3041*0b57cec5SDimitry Andric 
3042*0b57cec5SDimitry Andric #define __kmp_get_gtid() __kmp_get_global_thread_id()
3043*0b57cec5SDimitry Andric #define __kmp_entry_gtid() __kmp_get_global_thread_id_reg()
3044*0b57cec5SDimitry Andric #define __kmp_get_tid() (__kmp_tid_from_gtid(__kmp_get_gtid()))
3045*0b57cec5SDimitry Andric #define __kmp_get_team() (__kmp_threads[(__kmp_get_gtid())]->th.th_team)
3046*0b57cec5SDimitry Andric #define __kmp_get_thread() (__kmp_thread_from_gtid(__kmp_get_gtid()))
3047*0b57cec5SDimitry Andric 
3048*0b57cec5SDimitry Andric // AT: Which way is correct?
3049*0b57cec5SDimitry Andric // AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc;
3050*0b57cec5SDimitry Andric // AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc;
3051*0b57cec5SDimitry Andric #define __kmp_get_team_num_threads(gtid)                                       \
3052*0b57cec5SDimitry Andric   (__kmp_threads[(gtid)]->th.th_team->t.t_nproc)
3053*0b57cec5SDimitry Andric 
3054*0b57cec5SDimitry Andric static inline bool KMP_UBER_GTID(int gtid) {
3055*0b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(gtid >= KMP_GTID_MIN);
3056*0b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(gtid < __kmp_threads_capacity);
3057*0b57cec5SDimitry Andric   return (gtid >= 0 && __kmp_root[gtid] && __kmp_threads[gtid] &&
3058*0b57cec5SDimitry Andric           __kmp_threads[gtid] == __kmp_root[gtid]->r.r_uber_thread);
3059*0b57cec5SDimitry Andric }
3060*0b57cec5SDimitry Andric 
3061*0b57cec5SDimitry Andric static inline int __kmp_tid_from_gtid(int gtid) {
3062*0b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(gtid >= 0);
3063*0b57cec5SDimitry Andric   return __kmp_threads[gtid]->th.th_info.ds.ds_tid;
3064*0b57cec5SDimitry Andric }
3065*0b57cec5SDimitry Andric 
3066*0b57cec5SDimitry Andric static inline int __kmp_gtid_from_tid(int tid, const kmp_team_t *team) {
3067*0b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(tid >= 0 && team);
3068*0b57cec5SDimitry Andric   return team->t.t_threads[tid]->th.th_info.ds.ds_gtid;
3069*0b57cec5SDimitry Andric }
3070*0b57cec5SDimitry Andric 
3071*0b57cec5SDimitry Andric static inline int __kmp_gtid_from_thread(const kmp_info_t *thr) {
3072*0b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(thr);
3073*0b57cec5SDimitry Andric   return thr->th.th_info.ds.ds_gtid;
3074*0b57cec5SDimitry Andric }
3075*0b57cec5SDimitry Andric 
3076*0b57cec5SDimitry Andric static inline kmp_info_t *__kmp_thread_from_gtid(int gtid) {
3077*0b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(gtid >= 0);
3078*0b57cec5SDimitry Andric   return __kmp_threads[gtid];
3079*0b57cec5SDimitry Andric }
3080*0b57cec5SDimitry Andric 
3081*0b57cec5SDimitry Andric static inline kmp_team_t *__kmp_team_from_gtid(int gtid) {
3082*0b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(gtid >= 0);
3083*0b57cec5SDimitry Andric   return __kmp_threads[gtid]->th.th_team;
3084*0b57cec5SDimitry Andric }
3085*0b57cec5SDimitry Andric 
3086*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------- */
3087*0b57cec5SDimitry Andric 
3088*0b57cec5SDimitry Andric extern kmp_global_t __kmp_global; /* global status */
3089*0b57cec5SDimitry Andric 
3090*0b57cec5SDimitry Andric extern kmp_info_t __kmp_monitor;
3091*0b57cec5SDimitry Andric // For Debugging Support Library
3092*0b57cec5SDimitry Andric extern std::atomic<kmp_int32> __kmp_team_counter;
3093*0b57cec5SDimitry Andric // For Debugging Support Library
3094*0b57cec5SDimitry Andric extern std::atomic<kmp_int32> __kmp_task_counter;
3095*0b57cec5SDimitry Andric 
3096*0b57cec5SDimitry Andric #if USE_DEBUGGER
3097*0b57cec5SDimitry Andric #define _KMP_GEN_ID(counter)                                                   \
3098*0b57cec5SDimitry Andric   (__kmp_debugging ? KMP_ATOMIC_INC(&counter) + 1 : ~0)
3099*0b57cec5SDimitry Andric #else
3100*0b57cec5SDimitry Andric #define _KMP_GEN_ID(counter) (~0)
3101*0b57cec5SDimitry Andric #endif /* USE_DEBUGGER */
3102*0b57cec5SDimitry Andric 
3103*0b57cec5SDimitry Andric #define KMP_GEN_TASK_ID() _KMP_GEN_ID(__kmp_task_counter)
3104*0b57cec5SDimitry Andric #define KMP_GEN_TEAM_ID() _KMP_GEN_ID(__kmp_team_counter)
3105*0b57cec5SDimitry Andric 
3106*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
3107*0b57cec5SDimitry Andric 
3108*0b57cec5SDimitry Andric extern void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2,
3109*0b57cec5SDimitry Andric                                          size_t size, char const *format, ...);
3110*0b57cec5SDimitry Andric 
3111*0b57cec5SDimitry Andric extern void __kmp_serial_initialize(void);
3112*0b57cec5SDimitry Andric extern void __kmp_middle_initialize(void);
3113*0b57cec5SDimitry Andric extern void __kmp_parallel_initialize(void);
3114*0b57cec5SDimitry Andric 
3115*0b57cec5SDimitry Andric extern void __kmp_internal_begin(void);
3116*0b57cec5SDimitry Andric extern void __kmp_internal_end_library(int gtid);
3117*0b57cec5SDimitry Andric extern void __kmp_internal_end_thread(int gtid);
3118*0b57cec5SDimitry Andric extern void __kmp_internal_end_atexit(void);
3119*0b57cec5SDimitry Andric extern void __kmp_internal_end_fini(void);
3120*0b57cec5SDimitry Andric extern void __kmp_internal_end_dtor(void);
3121*0b57cec5SDimitry Andric extern void __kmp_internal_end_dest(void *);
3122*0b57cec5SDimitry Andric 
3123*0b57cec5SDimitry Andric extern int __kmp_register_root(int initial_thread);
3124*0b57cec5SDimitry Andric extern void __kmp_unregister_root(int gtid);
3125*0b57cec5SDimitry Andric 
3126*0b57cec5SDimitry Andric extern int __kmp_ignore_mppbeg(void);
3127*0b57cec5SDimitry Andric extern int __kmp_ignore_mppend(void);
3128*0b57cec5SDimitry Andric 
3129*0b57cec5SDimitry Andric extern int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws);
3130*0b57cec5SDimitry Andric extern void __kmp_exit_single(int gtid);
3131*0b57cec5SDimitry Andric 
3132*0b57cec5SDimitry Andric extern void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);
3133*0b57cec5SDimitry Andric extern void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);
3134*0b57cec5SDimitry Andric 
3135*0b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE
3136*0b57cec5SDimitry Andric extern int __kmp_get_load_balance(int);
3137*0b57cec5SDimitry Andric #endif
3138*0b57cec5SDimitry Andric 
3139*0b57cec5SDimitry Andric extern int __kmp_get_global_thread_id(void);
3140*0b57cec5SDimitry Andric extern int __kmp_get_global_thread_id_reg(void);
3141*0b57cec5SDimitry Andric extern void __kmp_exit_thread(int exit_status);
3142*0b57cec5SDimitry Andric extern void __kmp_abort(char const *format, ...);
3143*0b57cec5SDimitry Andric extern void __kmp_abort_thread(void);
3144*0b57cec5SDimitry Andric KMP_NORETURN extern void __kmp_abort_process(void);
3145*0b57cec5SDimitry Andric extern void __kmp_warn(char const *format, ...);
3146*0b57cec5SDimitry Andric 
3147*0b57cec5SDimitry Andric extern void __kmp_set_num_threads(int new_nth, int gtid);
3148*0b57cec5SDimitry Andric 
3149*0b57cec5SDimitry Andric // Returns current thread (pointer to kmp_info_t). Current thread *must* be
3150*0b57cec5SDimitry Andric // registered.
3151*0b57cec5SDimitry Andric static inline kmp_info_t *__kmp_entry_thread() {
3152*0b57cec5SDimitry Andric   int gtid = __kmp_entry_gtid();
3153*0b57cec5SDimitry Andric 
3154*0b57cec5SDimitry Andric   return __kmp_threads[gtid];
3155*0b57cec5SDimitry Andric }
3156*0b57cec5SDimitry Andric 
3157*0b57cec5SDimitry Andric extern void __kmp_set_max_active_levels(int gtid, int new_max_active_levels);
3158*0b57cec5SDimitry Andric extern int __kmp_get_max_active_levels(int gtid);
3159*0b57cec5SDimitry Andric extern int __kmp_get_ancestor_thread_num(int gtid, int level);
3160*0b57cec5SDimitry Andric extern int __kmp_get_team_size(int gtid, int level);
3161*0b57cec5SDimitry Andric extern void __kmp_set_schedule(int gtid, kmp_sched_t new_sched, int chunk);
3162*0b57cec5SDimitry Andric extern void __kmp_get_schedule(int gtid, kmp_sched_t *sched, int *chunk);
3163*0b57cec5SDimitry Andric 
3164*0b57cec5SDimitry Andric extern unsigned short __kmp_get_random(kmp_info_t *thread);
3165*0b57cec5SDimitry Andric extern void __kmp_init_random(kmp_info_t *thread);
3166*0b57cec5SDimitry Andric 
3167*0b57cec5SDimitry Andric extern kmp_r_sched_t __kmp_get_schedule_global(void);
3168*0b57cec5SDimitry Andric extern void __kmp_adjust_num_threads(int new_nproc);
3169*0b57cec5SDimitry Andric extern void __kmp_check_stksize(size_t *val);
3170*0b57cec5SDimitry Andric 
3171*0b57cec5SDimitry Andric extern void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL);
3172*0b57cec5SDimitry Andric extern void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL);
3173*0b57cec5SDimitry Andric extern void ___kmp_free(void *ptr KMP_SRC_LOC_DECL);
3174*0b57cec5SDimitry Andric #define __kmp_allocate(size) ___kmp_allocate((size)KMP_SRC_LOC_CURR)
3175*0b57cec5SDimitry Andric #define __kmp_page_allocate(size) ___kmp_page_allocate((size)KMP_SRC_LOC_CURR)
3176*0b57cec5SDimitry Andric #define __kmp_free(ptr) ___kmp_free((ptr)KMP_SRC_LOC_CURR)
3177*0b57cec5SDimitry Andric 
3178*0b57cec5SDimitry Andric #if USE_FAST_MEMORY
3179*0b57cec5SDimitry Andric extern void *___kmp_fast_allocate(kmp_info_t *this_thr,
3180*0b57cec5SDimitry Andric                                   size_t size KMP_SRC_LOC_DECL);
3181*0b57cec5SDimitry Andric extern void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL);
3182*0b57cec5SDimitry Andric extern void __kmp_free_fast_memory(kmp_info_t *this_thr);
3183*0b57cec5SDimitry Andric extern void __kmp_initialize_fast_memory(kmp_info_t *this_thr);
3184*0b57cec5SDimitry Andric #define __kmp_fast_allocate(this_thr, size)                                    \
3185*0b57cec5SDimitry Andric   ___kmp_fast_allocate((this_thr), (size)KMP_SRC_LOC_CURR)
3186*0b57cec5SDimitry Andric #define __kmp_fast_free(this_thr, ptr)                                         \
3187*0b57cec5SDimitry Andric   ___kmp_fast_free((this_thr), (ptr)KMP_SRC_LOC_CURR)
3188*0b57cec5SDimitry Andric #endif
3189*0b57cec5SDimitry Andric 
3190*0b57cec5SDimitry Andric extern void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL);
3191*0b57cec5SDimitry Andric extern void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
3192*0b57cec5SDimitry Andric                                   size_t elsize KMP_SRC_LOC_DECL);
3193*0b57cec5SDimitry Andric extern void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
3194*0b57cec5SDimitry Andric                                    size_t size KMP_SRC_LOC_DECL);
3195*0b57cec5SDimitry Andric extern void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL);
3196*0b57cec5SDimitry Andric #define __kmp_thread_malloc(th, size)                                          \
3197*0b57cec5SDimitry Andric   ___kmp_thread_malloc((th), (size)KMP_SRC_LOC_CURR)
3198*0b57cec5SDimitry Andric #define __kmp_thread_calloc(th, nelem, elsize)                                 \
3199*0b57cec5SDimitry Andric   ___kmp_thread_calloc((th), (nelem), (elsize)KMP_SRC_LOC_CURR)
3200*0b57cec5SDimitry Andric #define __kmp_thread_realloc(th, ptr, size)                                    \
3201*0b57cec5SDimitry Andric   ___kmp_thread_realloc((th), (ptr), (size)KMP_SRC_LOC_CURR)
3202*0b57cec5SDimitry Andric #define __kmp_thread_free(th, ptr)                                             \
3203*0b57cec5SDimitry Andric   ___kmp_thread_free((th), (ptr)KMP_SRC_LOC_CURR)
3204*0b57cec5SDimitry Andric 
3205*0b57cec5SDimitry Andric #define KMP_INTERNAL_MALLOC(sz) malloc(sz)
3206*0b57cec5SDimitry Andric #define KMP_INTERNAL_FREE(p) free(p)
3207*0b57cec5SDimitry Andric #define KMP_INTERNAL_REALLOC(p, sz) realloc((p), (sz))
3208*0b57cec5SDimitry Andric #define KMP_INTERNAL_CALLOC(n, sz) calloc((n), (sz))
3209*0b57cec5SDimitry Andric 
3210*0b57cec5SDimitry Andric extern void __kmp_push_num_threads(ident_t *loc, int gtid, int num_threads);
3211*0b57cec5SDimitry Andric 
3212*0b57cec5SDimitry Andric extern void __kmp_push_proc_bind(ident_t *loc, int gtid,
3213*0b57cec5SDimitry Andric                                  kmp_proc_bind_t proc_bind);
3214*0b57cec5SDimitry Andric extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams,
3215*0b57cec5SDimitry Andric                                  int num_threads);
3216*0b57cec5SDimitry Andric 
3217*0b57cec5SDimitry Andric extern void __kmp_yield();
3218*0b57cec5SDimitry Andric 
3219*0b57cec5SDimitry Andric extern void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
3220*0b57cec5SDimitry Andric                                    enum sched_type schedule, kmp_int32 lb,
3221*0b57cec5SDimitry Andric                                    kmp_int32 ub, kmp_int32 st, kmp_int32 chunk);
3222*0b57cec5SDimitry Andric extern void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
3223*0b57cec5SDimitry Andric                                     enum sched_type schedule, kmp_uint32 lb,
3224*0b57cec5SDimitry Andric                                     kmp_uint32 ub, kmp_int32 st,
3225*0b57cec5SDimitry Andric                                     kmp_int32 chunk);
3226*0b57cec5SDimitry Andric extern void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
3227*0b57cec5SDimitry Andric                                    enum sched_type schedule, kmp_int64 lb,
3228*0b57cec5SDimitry Andric                                    kmp_int64 ub, kmp_int64 st, kmp_int64 chunk);
3229*0b57cec5SDimitry Andric extern void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
3230*0b57cec5SDimitry Andric                                     enum sched_type schedule, kmp_uint64 lb,
3231*0b57cec5SDimitry Andric                                     kmp_uint64 ub, kmp_int64 st,
3232*0b57cec5SDimitry Andric                                     kmp_int64 chunk);
3233*0b57cec5SDimitry Andric 
3234*0b57cec5SDimitry Andric extern int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid,
3235*0b57cec5SDimitry Andric                                   kmp_int32 *p_last, kmp_int32 *p_lb,
3236*0b57cec5SDimitry Andric                                   kmp_int32 *p_ub, kmp_int32 *p_st);
3237*0b57cec5SDimitry Andric extern int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid,
3238*0b57cec5SDimitry Andric                                    kmp_int32 *p_last, kmp_uint32 *p_lb,
3239*0b57cec5SDimitry Andric                                    kmp_uint32 *p_ub, kmp_int32 *p_st);
3240*0b57cec5SDimitry Andric extern int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid,
3241*0b57cec5SDimitry Andric                                   kmp_int32 *p_last, kmp_int64 *p_lb,
3242*0b57cec5SDimitry Andric                                   kmp_int64 *p_ub, kmp_int64 *p_st);
3243*0b57cec5SDimitry Andric extern int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid,
3244*0b57cec5SDimitry Andric                                    kmp_int32 *p_last, kmp_uint64 *p_lb,
3245*0b57cec5SDimitry Andric                                    kmp_uint64 *p_ub, kmp_int64 *p_st);
3246*0b57cec5SDimitry Andric 
3247*0b57cec5SDimitry Andric extern void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid);
3248*0b57cec5SDimitry Andric extern void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid);
3249*0b57cec5SDimitry Andric extern void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid);
3250*0b57cec5SDimitry Andric extern void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid);
3251*0b57cec5SDimitry Andric 
3252*0b57cec5SDimitry Andric #ifdef KMP_GOMP_COMPAT
3253*0b57cec5SDimitry Andric 
3254*0b57cec5SDimitry Andric extern void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
3255*0b57cec5SDimitry Andric                                       enum sched_type schedule, kmp_int32 lb,
3256*0b57cec5SDimitry Andric                                       kmp_int32 ub, kmp_int32 st,
3257*0b57cec5SDimitry Andric                                       kmp_int32 chunk, int push_ws);
3258*0b57cec5SDimitry Andric extern void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
3259*0b57cec5SDimitry Andric                                        enum sched_type schedule, kmp_uint32 lb,
3260*0b57cec5SDimitry Andric                                        kmp_uint32 ub, kmp_int32 st,
3261*0b57cec5SDimitry Andric                                        kmp_int32 chunk, int push_ws);
3262*0b57cec5SDimitry Andric extern void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
3263*0b57cec5SDimitry Andric                                       enum sched_type schedule, kmp_int64 lb,
3264*0b57cec5SDimitry Andric                                       kmp_int64 ub, kmp_int64 st,
3265*0b57cec5SDimitry Andric                                       kmp_int64 chunk, int push_ws);
3266*0b57cec5SDimitry Andric extern void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
3267*0b57cec5SDimitry Andric                                        enum sched_type schedule, kmp_uint64 lb,
3268*0b57cec5SDimitry Andric                                        kmp_uint64 ub, kmp_int64 st,
3269*0b57cec5SDimitry Andric                                        kmp_int64 chunk, int push_ws);
3270*0b57cec5SDimitry Andric extern void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid);
3271*0b57cec5SDimitry Andric extern void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid);
3272*0b57cec5SDimitry Andric extern void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid);
3273*0b57cec5SDimitry Andric extern void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid);
3274*0b57cec5SDimitry Andric 
3275*0b57cec5SDimitry Andric #endif /* KMP_GOMP_COMPAT */
3276*0b57cec5SDimitry Andric 
3277*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker);
3278*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker);
3279*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker);
3280*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker);
3281*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker);
3282*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_wait_4(kmp_uint32 volatile *spinner, kmp_uint32 checker,
3283*0b57cec5SDimitry Andric                                kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
3284*0b57cec5SDimitry Andric                                void *obj);
3285*0b57cec5SDimitry Andric extern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
3286*0b57cec5SDimitry Andric                              kmp_uint32 (*pred)(void *, kmp_uint32), void *obj);
3287*0b57cec5SDimitry Andric 
3288*0b57cec5SDimitry Andric class kmp_flag_32;
3289*0b57cec5SDimitry Andric class kmp_flag_64;
3290*0b57cec5SDimitry Andric class kmp_flag_oncore;
3291*0b57cec5SDimitry Andric extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag,
3292*0b57cec5SDimitry Andric                           int final_spin
3293*0b57cec5SDimitry Andric #if USE_ITT_BUILD
3294*0b57cec5SDimitry Andric                           ,
3295*0b57cec5SDimitry Andric                           void *itt_sync_obj
3296*0b57cec5SDimitry Andric #endif
3297*0b57cec5SDimitry Andric                           );
3298*0b57cec5SDimitry Andric extern void __kmp_release_64(kmp_flag_64 *flag);
3299*0b57cec5SDimitry Andric 
3300*0b57cec5SDimitry Andric extern void __kmp_infinite_loop(void);
3301*0b57cec5SDimitry Andric 
3302*0b57cec5SDimitry Andric extern void __kmp_cleanup(void);
3303*0b57cec5SDimitry Andric 
3304*0b57cec5SDimitry Andric #if KMP_HANDLE_SIGNALS
3305*0b57cec5SDimitry Andric extern int __kmp_handle_signals;
3306*0b57cec5SDimitry Andric extern void __kmp_install_signals(int parallel_init);
3307*0b57cec5SDimitry Andric extern void __kmp_remove_signals(void);
3308*0b57cec5SDimitry Andric #endif
3309*0b57cec5SDimitry Andric 
3310*0b57cec5SDimitry Andric extern void __kmp_clear_system_time(void);
3311*0b57cec5SDimitry Andric extern void __kmp_read_system_time(double *delta);
3312*0b57cec5SDimitry Andric 
3313*0b57cec5SDimitry Andric extern void __kmp_check_stack_overlap(kmp_info_t *thr);
3314*0b57cec5SDimitry Andric 
3315*0b57cec5SDimitry Andric extern void __kmp_expand_host_name(char *buffer, size_t size);
3316*0b57cec5SDimitry Andric extern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern);
3317*0b57cec5SDimitry Andric 
3318*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3319*0b57cec5SDimitry Andric extern void
3320*0b57cec5SDimitry Andric __kmp_initialize_system_tick(void); /* Initialize timer tick value */
3321*0b57cec5SDimitry Andric #endif
3322*0b57cec5SDimitry Andric 
3323*0b57cec5SDimitry Andric extern void
3324*0b57cec5SDimitry Andric __kmp_runtime_initialize(void); /* machine specific initialization */
3325*0b57cec5SDimitry Andric extern void __kmp_runtime_destroy(void);
3326*0b57cec5SDimitry Andric 
3327*0b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
3328*0b57cec5SDimitry Andric extern char *__kmp_affinity_print_mask(char *buf, int buf_len,
3329*0b57cec5SDimitry Andric                                        kmp_affin_mask_t *mask);
3330*0b57cec5SDimitry Andric extern kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
3331*0b57cec5SDimitry Andric                                                   kmp_affin_mask_t *mask);
3332*0b57cec5SDimitry Andric extern void __kmp_affinity_initialize(void);
3333*0b57cec5SDimitry Andric extern void __kmp_affinity_uninitialize(void);
3334*0b57cec5SDimitry Andric extern void __kmp_affinity_set_init_mask(
3335*0b57cec5SDimitry Andric     int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */
3336*0b57cec5SDimitry Andric extern void __kmp_affinity_set_place(int gtid);
3337*0b57cec5SDimitry Andric extern void __kmp_affinity_determine_capable(const char *env_var);
3338*0b57cec5SDimitry Andric extern int __kmp_aux_set_affinity(void **mask);
3339*0b57cec5SDimitry Andric extern int __kmp_aux_get_affinity(void **mask);
3340*0b57cec5SDimitry Andric extern int __kmp_aux_get_affinity_max_proc();
3341*0b57cec5SDimitry Andric extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask);
3342*0b57cec5SDimitry Andric extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask);
3343*0b57cec5SDimitry Andric extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask);
3344*0b57cec5SDimitry Andric extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size);
3345*0b57cec5SDimitry Andric #if KMP_OS_LINUX
3346*0b57cec5SDimitry Andric extern int kmp_set_thread_affinity_mask_initial(void);
3347*0b57cec5SDimitry Andric #endif
3348*0b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */
3349*0b57cec5SDimitry Andric // No need for KMP_AFFINITY_SUPPORTED guard as only one field in the
3350*0b57cec5SDimitry Andric // format string is for affinity, so platforms that do not support
3351*0b57cec5SDimitry Andric // affinity can still use the other fields, e.g., %n for num_threads
3352*0b57cec5SDimitry Andric extern size_t __kmp_aux_capture_affinity(int gtid, const char *format,
3353*0b57cec5SDimitry Andric                                          kmp_str_buf_t *buffer);
3354*0b57cec5SDimitry Andric extern void __kmp_aux_display_affinity(int gtid, const char *format);
3355*0b57cec5SDimitry Andric 
3356*0b57cec5SDimitry Andric extern void __kmp_cleanup_hierarchy();
3357*0b57cec5SDimitry Andric extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar);
3358*0b57cec5SDimitry Andric 
3359*0b57cec5SDimitry Andric #if KMP_USE_FUTEX
3360*0b57cec5SDimitry Andric 
3361*0b57cec5SDimitry Andric extern int __kmp_futex_determine_capable(void);
3362*0b57cec5SDimitry Andric 
3363*0b57cec5SDimitry Andric #endif // KMP_USE_FUTEX
3364*0b57cec5SDimitry Andric 
3365*0b57cec5SDimitry Andric extern void __kmp_gtid_set_specific(int gtid);
3366*0b57cec5SDimitry Andric extern int __kmp_gtid_get_specific(void);
3367*0b57cec5SDimitry Andric 
3368*0b57cec5SDimitry Andric extern double __kmp_read_cpu_time(void);
3369*0b57cec5SDimitry Andric 
3370*0b57cec5SDimitry Andric extern int __kmp_read_system_info(struct kmp_sys_info *info);
3371*0b57cec5SDimitry Andric 
3372*0b57cec5SDimitry Andric #if KMP_USE_MONITOR
3373*0b57cec5SDimitry Andric extern void __kmp_create_monitor(kmp_info_t *th);
3374*0b57cec5SDimitry Andric #endif
3375*0b57cec5SDimitry Andric 
3376*0b57cec5SDimitry Andric extern void *__kmp_launch_thread(kmp_info_t *thr);
3377*0b57cec5SDimitry Andric 
3378*0b57cec5SDimitry Andric extern void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size);
3379*0b57cec5SDimitry Andric 
3380*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS
3381*0b57cec5SDimitry Andric extern int __kmp_still_running(kmp_info_t *th);
3382*0b57cec5SDimitry Andric extern int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val);
3383*0b57cec5SDimitry Andric extern void __kmp_free_handle(kmp_thread_t tHandle);
3384*0b57cec5SDimitry Andric #endif
3385*0b57cec5SDimitry Andric 
3386*0b57cec5SDimitry Andric #if KMP_USE_MONITOR
3387*0b57cec5SDimitry Andric extern void __kmp_reap_monitor(kmp_info_t *th);
3388*0b57cec5SDimitry Andric #endif
3389*0b57cec5SDimitry Andric extern void __kmp_reap_worker(kmp_info_t *th);
3390*0b57cec5SDimitry Andric extern void __kmp_terminate_thread(int gtid);
3391*0b57cec5SDimitry Andric 
3392*0b57cec5SDimitry Andric extern int __kmp_try_suspend_mx(kmp_info_t *th);
3393*0b57cec5SDimitry Andric extern void __kmp_lock_suspend_mx(kmp_info_t *th);
3394*0b57cec5SDimitry Andric extern void __kmp_unlock_suspend_mx(kmp_info_t *th);
3395*0b57cec5SDimitry Andric 
3396*0b57cec5SDimitry Andric extern void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag);
3397*0b57cec5SDimitry Andric extern void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag);
3398*0b57cec5SDimitry Andric extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag);
3399*0b57cec5SDimitry Andric extern void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag);
3400*0b57cec5SDimitry Andric extern void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag);
3401*0b57cec5SDimitry Andric extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag);
3402*0b57cec5SDimitry Andric 
3403*0b57cec5SDimitry Andric extern void __kmp_elapsed(double *);
3404*0b57cec5SDimitry Andric extern void __kmp_elapsed_tick(double *);
3405*0b57cec5SDimitry Andric 
3406*0b57cec5SDimitry Andric extern void __kmp_enable(int old_state);
3407*0b57cec5SDimitry Andric extern void __kmp_disable(int *old_state);
3408*0b57cec5SDimitry Andric 
3409*0b57cec5SDimitry Andric extern void __kmp_thread_sleep(int millis);
3410*0b57cec5SDimitry Andric 
3411*0b57cec5SDimitry Andric extern void __kmp_common_initialize(void);
3412*0b57cec5SDimitry Andric extern void __kmp_common_destroy(void);
3413*0b57cec5SDimitry Andric extern void __kmp_common_destroy_gtid(int gtid);
3414*0b57cec5SDimitry Andric 
3415*0b57cec5SDimitry Andric #if KMP_OS_UNIX
3416*0b57cec5SDimitry Andric extern void __kmp_register_atfork(void);
3417*0b57cec5SDimitry Andric #endif
3418*0b57cec5SDimitry Andric extern void __kmp_suspend_initialize(void);
3419*0b57cec5SDimitry Andric extern void __kmp_suspend_initialize_thread(kmp_info_t *th);
3420*0b57cec5SDimitry Andric extern void __kmp_suspend_uninitialize_thread(kmp_info_t *th);
3421*0b57cec5SDimitry Andric 
3422*0b57cec5SDimitry Andric extern kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
3423*0b57cec5SDimitry Andric                                          int tid);
3424*0b57cec5SDimitry Andric extern kmp_team_t *
3425*0b57cec5SDimitry Andric __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
3426*0b57cec5SDimitry Andric #if OMPT_SUPPORT
3427*0b57cec5SDimitry Andric                     ompt_data_t ompt_parallel_data,
3428*0b57cec5SDimitry Andric #endif
3429*0b57cec5SDimitry Andric                     kmp_proc_bind_t proc_bind, kmp_internal_control_t *new_icvs,
3430*0b57cec5SDimitry Andric                     int argc USE_NESTED_HOT_ARG(kmp_info_t *thr));
3431*0b57cec5SDimitry Andric extern void __kmp_free_thread(kmp_info_t *);
3432*0b57cec5SDimitry Andric extern void __kmp_free_team(kmp_root_t *,
3433*0b57cec5SDimitry Andric                             kmp_team_t *USE_NESTED_HOT_ARG(kmp_info_t *));
3434*0b57cec5SDimitry Andric extern kmp_team_t *__kmp_reap_team(kmp_team_t *);
3435*0b57cec5SDimitry Andric 
3436*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
3437*0b57cec5SDimitry Andric 
3438*0b57cec5SDimitry Andric extern void __kmp_initialize_bget(kmp_info_t *th);
3439*0b57cec5SDimitry Andric extern void __kmp_finalize_bget(kmp_info_t *th);
3440*0b57cec5SDimitry Andric 
3441*0b57cec5SDimitry Andric KMP_EXPORT void *kmpc_malloc(size_t size);
3442*0b57cec5SDimitry Andric KMP_EXPORT void *kmpc_aligned_malloc(size_t size, size_t alignment);
3443*0b57cec5SDimitry Andric KMP_EXPORT void *kmpc_calloc(size_t nelem, size_t elsize);
3444*0b57cec5SDimitry Andric KMP_EXPORT void *kmpc_realloc(void *ptr, size_t size);
3445*0b57cec5SDimitry Andric KMP_EXPORT void kmpc_free(void *ptr);
3446*0b57cec5SDimitry Andric 
3447*0b57cec5SDimitry Andric /* declarations for internal use */
3448*0b57cec5SDimitry Andric 
3449*0b57cec5SDimitry Andric extern int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
3450*0b57cec5SDimitry Andric                          size_t reduce_size, void *reduce_data,
3451*0b57cec5SDimitry Andric                          void (*reduce)(void *, void *));
3452*0b57cec5SDimitry Andric extern void __kmp_end_split_barrier(enum barrier_type bt, int gtid);
3453*0b57cec5SDimitry Andric extern int __kmp_barrier_gomp_cancel(int gtid);
3454*0b57cec5SDimitry Andric 
3455*0b57cec5SDimitry Andric /*!
3456*0b57cec5SDimitry Andric  * Tell the fork call which compiler generated the fork call, and therefore how
3457*0b57cec5SDimitry Andric  * to deal with the call.
3458*0b57cec5SDimitry Andric  */
3459*0b57cec5SDimitry Andric enum fork_context_e {
3460*0b57cec5SDimitry Andric   fork_context_gnu, /**< Called from GNU generated code, so must not invoke the
3461*0b57cec5SDimitry Andric                        microtask internally. */
3462*0b57cec5SDimitry Andric   fork_context_intel, /**< Called from Intel generated code.  */
3463*0b57cec5SDimitry Andric   fork_context_last
3464*0b57cec5SDimitry Andric };
3465*0b57cec5SDimitry Andric extern int __kmp_fork_call(ident_t *loc, int gtid,
3466*0b57cec5SDimitry Andric                            enum fork_context_e fork_context, kmp_int32 argc,
3467*0b57cec5SDimitry Andric                            microtask_t microtask, launch_t invoker,
3468*0b57cec5SDimitry Andric /* TODO: revert workaround for Intel(R) 64 tracker #96 */
3469*0b57cec5SDimitry Andric #if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX
3470*0b57cec5SDimitry Andric                            va_list *ap
3471*0b57cec5SDimitry Andric #else
3472*0b57cec5SDimitry Andric                            va_list ap
3473*0b57cec5SDimitry Andric #endif
3474*0b57cec5SDimitry Andric                            );
3475*0b57cec5SDimitry Andric 
3476*0b57cec5SDimitry Andric extern void __kmp_join_call(ident_t *loc, int gtid
3477*0b57cec5SDimitry Andric #if OMPT_SUPPORT
3478*0b57cec5SDimitry Andric                             ,
3479*0b57cec5SDimitry Andric                             enum fork_context_e fork_context
3480*0b57cec5SDimitry Andric #endif
3481*0b57cec5SDimitry Andric                             ,
3482*0b57cec5SDimitry Andric                             int exit_teams = 0);
3483*0b57cec5SDimitry Andric 
3484*0b57cec5SDimitry Andric extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid);
3485*0b57cec5SDimitry Andric extern void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team);
3486*0b57cec5SDimitry Andric extern void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team);
3487*0b57cec5SDimitry Andric extern int __kmp_invoke_task_func(int gtid);
3488*0b57cec5SDimitry Andric extern void __kmp_run_before_invoked_task(int gtid, int tid,
3489*0b57cec5SDimitry Andric                                           kmp_info_t *this_thr,
3490*0b57cec5SDimitry Andric                                           kmp_team_t *team);
3491*0b57cec5SDimitry Andric extern void __kmp_run_after_invoked_task(int gtid, int tid,
3492*0b57cec5SDimitry Andric                                          kmp_info_t *this_thr,
3493*0b57cec5SDimitry Andric                                          kmp_team_t *team);
3494*0b57cec5SDimitry Andric 
3495*0b57cec5SDimitry Andric // should never have been exported
3496*0b57cec5SDimitry Andric KMP_EXPORT int __kmpc_invoke_task_func(int gtid);
3497*0b57cec5SDimitry Andric extern int __kmp_invoke_teams_master(int gtid);
3498*0b57cec5SDimitry Andric extern void __kmp_teams_master(int gtid);
3499*0b57cec5SDimitry Andric extern int __kmp_aux_get_team_num();
3500*0b57cec5SDimitry Andric extern int __kmp_aux_get_num_teams();
3501*0b57cec5SDimitry Andric extern void __kmp_save_internal_controls(kmp_info_t *thread);
3502*0b57cec5SDimitry Andric extern void __kmp_user_set_library(enum library_type arg);
3503*0b57cec5SDimitry Andric extern void __kmp_aux_set_library(enum library_type arg);
3504*0b57cec5SDimitry Andric extern void __kmp_aux_set_stacksize(size_t arg);
3505*0b57cec5SDimitry Andric extern void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid);
3506*0b57cec5SDimitry Andric extern void __kmp_aux_set_defaults(char const *str, int len);
3507*0b57cec5SDimitry Andric 
3508*0b57cec5SDimitry Andric /* Functions called from __kmp_aux_env_initialize() in kmp_settings.cpp */
3509*0b57cec5SDimitry Andric void kmpc_set_blocktime(int arg);
3510*0b57cec5SDimitry Andric void ompc_set_nested(int flag);
3511*0b57cec5SDimitry Andric void ompc_set_dynamic(int flag);
3512*0b57cec5SDimitry Andric void ompc_set_num_threads(int arg);
3513*0b57cec5SDimitry Andric 
3514*0b57cec5SDimitry Andric extern void __kmp_push_current_task_to_thread(kmp_info_t *this_thr,
3515*0b57cec5SDimitry Andric                                               kmp_team_t *team, int tid);
3516*0b57cec5SDimitry Andric extern void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr);
3517*0b57cec5SDimitry Andric extern kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
3518*0b57cec5SDimitry Andric                                     kmp_tasking_flags_t *flags,
3519*0b57cec5SDimitry Andric                                     size_t sizeof_kmp_task_t,
3520*0b57cec5SDimitry Andric                                     size_t sizeof_shareds,
3521*0b57cec5SDimitry Andric                                     kmp_routine_entry_t task_entry);
3522*0b57cec5SDimitry Andric extern void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
3523*0b57cec5SDimitry Andric                                      kmp_team_t *team, int tid,
3524*0b57cec5SDimitry Andric                                      int set_curr_task);
3525*0b57cec5SDimitry Andric extern void __kmp_finish_implicit_task(kmp_info_t *this_thr);
3526*0b57cec5SDimitry Andric extern void __kmp_free_implicit_task(kmp_info_t *this_thr);
3527*0b57cec5SDimitry Andric 
3528*0b57cec5SDimitry Andric extern kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3529*0b57cec5SDimitry Andric                                                        int gtid,
3530*0b57cec5SDimitry Andric                                                        kmp_task_t *task);
3531*0b57cec5SDimitry Andric extern void __kmp_fulfill_event(kmp_event_t *event);
3532*0b57cec5SDimitry Andric 
3533*0b57cec5SDimitry Andric int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid,
3534*0b57cec5SDimitry Andric                            kmp_flag_32 *flag, int final_spin,
3535*0b57cec5SDimitry Andric                            int *thread_finished,
3536*0b57cec5SDimitry Andric #if USE_ITT_BUILD
3537*0b57cec5SDimitry Andric                            void *itt_sync_obj,
3538*0b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
3539*0b57cec5SDimitry Andric                            kmp_int32 is_constrained);
3540*0b57cec5SDimitry Andric int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
3541*0b57cec5SDimitry Andric                            kmp_flag_64 *flag, int final_spin,
3542*0b57cec5SDimitry Andric                            int *thread_finished,
3543*0b57cec5SDimitry Andric #if USE_ITT_BUILD
3544*0b57cec5SDimitry Andric                            void *itt_sync_obj,
3545*0b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
3546*0b57cec5SDimitry Andric                            kmp_int32 is_constrained);
3547*0b57cec5SDimitry Andric int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,
3548*0b57cec5SDimitry Andric                                kmp_flag_oncore *flag, int final_spin,
3549*0b57cec5SDimitry Andric                                int *thread_finished,
3550*0b57cec5SDimitry Andric #if USE_ITT_BUILD
3551*0b57cec5SDimitry Andric                                void *itt_sync_obj,
3552*0b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
3553*0b57cec5SDimitry Andric                                kmp_int32 is_constrained);
3554*0b57cec5SDimitry Andric 
3555*0b57cec5SDimitry Andric extern void __kmp_free_task_team(kmp_info_t *thread,
3556*0b57cec5SDimitry Andric                                  kmp_task_team_t *task_team);
3557*0b57cec5SDimitry Andric extern void __kmp_reap_task_teams(void);
3558*0b57cec5SDimitry Andric extern void __kmp_wait_to_unref_task_teams(void);
3559*0b57cec5SDimitry Andric extern void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
3560*0b57cec5SDimitry Andric                                   int always);
3561*0b57cec5SDimitry Andric extern void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team);
3562*0b57cec5SDimitry Andric extern void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team
3563*0b57cec5SDimitry Andric #if USE_ITT_BUILD
3564*0b57cec5SDimitry Andric                                  ,
3565*0b57cec5SDimitry Andric                                  void *itt_sync_obj
3566*0b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
3567*0b57cec5SDimitry Andric                                  ,
3568*0b57cec5SDimitry Andric                                  int wait = 1);
3569*0b57cec5SDimitry Andric extern void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
3570*0b57cec5SDimitry Andric                                   int gtid);
3571*0b57cec5SDimitry Andric 
3572*0b57cec5SDimitry Andric extern int __kmp_is_address_mapped(void *addr);
3573*0b57cec5SDimitry Andric extern kmp_uint64 __kmp_hardware_timestamp(void);
3574*0b57cec5SDimitry Andric 
3575*0b57cec5SDimitry Andric #if KMP_OS_UNIX
3576*0b57cec5SDimitry Andric extern int __kmp_read_from_file(char const *path, char const *format, ...);
3577*0b57cec5SDimitry Andric #endif
3578*0b57cec5SDimitry Andric 
3579*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
3580*0b57cec5SDimitry Andric //
3581*0b57cec5SDimitry Andric // Assembly routines that have no compiler intrinsic replacement
3582*0b57cec5SDimitry Andric //
3583*0b57cec5SDimitry Andric 
3584*0b57cec5SDimitry Andric extern int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc,
3585*0b57cec5SDimitry Andric                                   void *argv[]
3586*0b57cec5SDimitry Andric #if OMPT_SUPPORT
3587*0b57cec5SDimitry Andric                                   ,
3588*0b57cec5SDimitry Andric                                   void **exit_frame_ptr
3589*0b57cec5SDimitry Andric #endif
3590*0b57cec5SDimitry Andric                                   );
3591*0b57cec5SDimitry Andric 
3592*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
3593*0b57cec5SDimitry Andric 
3594*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_begin(ident_t *, kmp_int32 flags);
3595*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end(ident_t *);
3596*0b57cec5SDimitry Andric 
3597*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_threadprivate_register_vec(ident_t *, void *data,
3598*0b57cec5SDimitry Andric                                                   kmpc_ctor_vec ctor,
3599*0b57cec5SDimitry Andric                                                   kmpc_cctor_vec cctor,
3600*0b57cec5SDimitry Andric                                                   kmpc_dtor_vec dtor,
3601*0b57cec5SDimitry Andric                                                   size_t vector_length);
3602*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_threadprivate_register(ident_t *, void *data,
3603*0b57cec5SDimitry Andric                                               kmpc_ctor ctor, kmpc_cctor cctor,
3604*0b57cec5SDimitry Andric                                               kmpc_dtor dtor);
3605*0b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_threadprivate(ident_t *, kmp_int32 global_tid,
3606*0b57cec5SDimitry Andric                                       void *data, size_t size);
3607*0b57cec5SDimitry Andric 
3608*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_global_thread_num(ident_t *);
3609*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_global_num_threads(ident_t *);
3610*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_bound_thread_num(ident_t *);
3611*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_bound_num_threads(ident_t *);
3612*0b57cec5SDimitry Andric 
3613*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_ok_to_fork(ident_t *);
3614*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs,
3615*0b57cec5SDimitry Andric                                  kmpc_micro microtask, ...);
3616*0b57cec5SDimitry Andric 
3617*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid);
3618*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid);
3619*0b57cec5SDimitry Andric 
3620*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_flush(ident_t *);
3621*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_barrier(ident_t *, kmp_int32 global_tid);
3622*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
3623*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
3624*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_ordered(ident_t *, kmp_int32 global_tid);
3625*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid);
3626*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_critical(ident_t *, kmp_int32 global_tid,
3627*0b57cec5SDimitry Andric                                 kmp_critical_name *);
3628*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_critical(ident_t *, kmp_int32 global_tid,
3629*0b57cec5SDimitry Andric                                     kmp_critical_name *);
3630*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_critical_with_hint(ident_t *, kmp_int32 global_tid,
3631*0b57cec5SDimitry Andric                                           kmp_critical_name *, uint32_t hint);
3632*0b57cec5SDimitry Andric 
3633*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_barrier_master(ident_t *, kmp_int32 global_tid);
3634*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid);
3635*0b57cec5SDimitry Andric 
3636*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait(ident_t *,
3637*0b57cec5SDimitry Andric                                                   kmp_int32 global_tid);
3638*0b57cec5SDimitry Andric 
3639*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
3640*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
3641*0b57cec5SDimitry Andric 
3642*0b57cec5SDimitry Andric KMP_EXPORT void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid,
3643*0b57cec5SDimitry Andric                                      kmp_int32 schedtype, kmp_int32 *plastiter,
3644*0b57cec5SDimitry Andric                                      kmp_int *plower, kmp_int *pupper,
3645*0b57cec5SDimitry Andric                                      kmp_int *pstride, kmp_int incr,
3646*0b57cec5SDimitry Andric                                      kmp_int chunk);
3647*0b57cec5SDimitry Andric 
3648*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
3649*0b57cec5SDimitry Andric 
3650*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
3651*0b57cec5SDimitry Andric                                    size_t cpy_size, void *cpy_data,
3652*0b57cec5SDimitry Andric                                    void (*cpy_func)(void *, void *),
3653*0b57cec5SDimitry Andric                                    kmp_int32 didit);
3654*0b57cec5SDimitry Andric 
3655*0b57cec5SDimitry Andric extern void KMPC_SET_NUM_THREADS(int arg);
3656*0b57cec5SDimitry Andric extern void KMPC_SET_DYNAMIC(int flag);
3657*0b57cec5SDimitry Andric extern void KMPC_SET_NESTED(int flag);
3658*0b57cec5SDimitry Andric 
3659*0b57cec5SDimitry Andric /* OMP 3.0 tasking interface routines */
3660*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
3661*0b57cec5SDimitry Andric                                      kmp_task_t *new_task);
3662*0b57cec5SDimitry Andric KMP_EXPORT kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
3663*0b57cec5SDimitry Andric                                              kmp_int32 flags,
3664*0b57cec5SDimitry Andric                                              size_t sizeof_kmp_task_t,
3665*0b57cec5SDimitry Andric                                              size_t sizeof_shareds,
3666*0b57cec5SDimitry Andric                                              kmp_routine_entry_t task_entry);
3667*0b57cec5SDimitry Andric KMP_EXPORT kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
3668*0b57cec5SDimitry Andric                                                     kmp_int32 flags,
3669*0b57cec5SDimitry Andric                                                     size_t sizeof_kmp_task_t,
3670*0b57cec5SDimitry Andric                                                     size_t sizeof_shareds,
3671*0b57cec5SDimitry Andric                                                     kmp_routine_entry_t task_entry,
3672*0b57cec5SDimitry Andric                                                     kmp_int64 device_id);
3673*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
3674*0b57cec5SDimitry Andric                                           kmp_task_t *task);
3675*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
3676*0b57cec5SDimitry Andric                                              kmp_task_t *task);
3677*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
3678*0b57cec5SDimitry Andric                                            kmp_task_t *new_task);
3679*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid);
3680*0b57cec5SDimitry Andric 
3681*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid,
3682*0b57cec5SDimitry Andric                                           int end_part);
3683*0b57cec5SDimitry Andric 
3684*0b57cec5SDimitry Andric #if TASK_UNUSED
3685*0b57cec5SDimitry Andric void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task);
3686*0b57cec5SDimitry Andric void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
3687*0b57cec5SDimitry Andric                               kmp_task_t *task);
3688*0b57cec5SDimitry Andric #endif // TASK_UNUSED
3689*0b57cec5SDimitry Andric 
3690*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
3691*0b57cec5SDimitry Andric 
3692*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_taskgroup(ident_t *loc, int gtid);
3693*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_taskgroup(ident_t *loc, int gtid);
3694*0b57cec5SDimitry Andric 
3695*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps(
3696*0b57cec5SDimitry Andric     ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps,
3697*0b57cec5SDimitry Andric     kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
3698*0b57cec5SDimitry Andric     kmp_depend_info_t *noalias_dep_list);
3699*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid,
3700*0b57cec5SDimitry Andric                                      kmp_int32 ndeps,
3701*0b57cec5SDimitry Andric                                      kmp_depend_info_t *dep_list,
3702*0b57cec5SDimitry Andric                                      kmp_int32 ndeps_noalias,
3703*0b57cec5SDimitry Andric                                      kmp_depend_info_t *noalias_dep_list);
3704*0b57cec5SDimitry Andric extern kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
3705*0b57cec5SDimitry Andric                                 bool serialize_immediate);
3706*0b57cec5SDimitry Andric 
3707*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid,
3708*0b57cec5SDimitry Andric                                    kmp_int32 cncl_kind);
3709*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid,
3710*0b57cec5SDimitry Andric                                               kmp_int32 cncl_kind);
3711*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t *loc_ref, kmp_int32 gtid);
3712*0b57cec5SDimitry Andric KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind);
3713*0b57cec5SDimitry Andric 
3714*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask);
3715*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask);
3716*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task,
3717*0b57cec5SDimitry Andric                                 kmp_int32 if_val, kmp_uint64 *lb,
3718*0b57cec5SDimitry Andric                                 kmp_uint64 *ub, kmp_int64 st, kmp_int32 nogroup,
3719*0b57cec5SDimitry Andric                                 kmp_int32 sched, kmp_uint64 grainsize,
3720*0b57cec5SDimitry Andric                                 void *task_dup);
3721*0b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_task_reduction_init(int gtid, int num_data, void *data);
3722*0b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_taskred_init(int gtid, int num_data, void *data);
3723*0b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d);
3724*0b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_task_reduction_modifier_init(ident_t *loc, int gtid,
3725*0b57cec5SDimitry Andric                                                      int is_ws, int num,
3726*0b57cec5SDimitry Andric                                                      void *data);
3727*0b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws,
3728*0b57cec5SDimitry Andric                                               int num, void *data);
3729*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
3730*0b57cec5SDimitry Andric                                                     int is_ws);
3731*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_reg_task_with_affinity(
3732*0b57cec5SDimitry Andric     ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins,
3733*0b57cec5SDimitry Andric     kmp_task_affinity_info_t *affin_list);
3734*0b57cec5SDimitry Andric 
3735*0b57cec5SDimitry Andric /* Lock interface routines (fast versions with gtid passed in) */
3736*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid,
3737*0b57cec5SDimitry Andric                                  void **user_lock);
3738*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid,
3739*0b57cec5SDimitry Andric                                       void **user_lock);
3740*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid,
3741*0b57cec5SDimitry Andric                                     void **user_lock);
3742*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid,
3743*0b57cec5SDimitry Andric                                          void **user_lock);
3744*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);
3745*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid,
3746*0b57cec5SDimitry Andric                                      void **user_lock);
3747*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid,
3748*0b57cec5SDimitry Andric                                   void **user_lock);
3749*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid,
3750*0b57cec5SDimitry Andric                                        void **user_lock);
3751*0b57cec5SDimitry Andric KMP_EXPORT int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);
3752*0b57cec5SDimitry Andric KMP_EXPORT int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid,
3753*0b57cec5SDimitry Andric                                      void **user_lock);
3754*0b57cec5SDimitry Andric 
3755*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid,
3756*0b57cec5SDimitry Andric                                            void **user_lock, uintptr_t hint);
3757*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
3758*0b57cec5SDimitry Andric                                                 void **user_lock,
3759*0b57cec5SDimitry Andric                                                 uintptr_t hint);
3760*0b57cec5SDimitry Andric 
3761*0b57cec5SDimitry Andric /* Interface to fast scalable reduce methods routines */
3762*0b57cec5SDimitry Andric 
3763*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_reduce_nowait(
3764*0b57cec5SDimitry Andric     ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
3765*0b57cec5SDimitry Andric     void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
3766*0b57cec5SDimitry Andric     kmp_critical_name *lck);
3767*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3768*0b57cec5SDimitry Andric                                          kmp_critical_name *lck);
3769*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_reduce(
3770*0b57cec5SDimitry Andric     ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
3771*0b57cec5SDimitry Andric     void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
3772*0b57cec5SDimitry Andric     kmp_critical_name *lck);
3773*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3774*0b57cec5SDimitry Andric                                   kmp_critical_name *lck);
3775*0b57cec5SDimitry Andric 
3776*0b57cec5SDimitry Andric /* Internal fast reduction routines */
3777*0b57cec5SDimitry Andric 
3778*0b57cec5SDimitry Andric extern PACKED_REDUCTION_METHOD_T __kmp_determine_reduction_method(
3779*0b57cec5SDimitry Andric     ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
3780*0b57cec5SDimitry Andric     void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
3781*0b57cec5SDimitry Andric     kmp_critical_name *lck);
3782*0b57cec5SDimitry Andric 
3783*0b57cec5SDimitry Andric // this function is for testing set/get/determine reduce method
3784*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmp_get_reduce_method(void);
3785*0b57cec5SDimitry Andric 
3786*0b57cec5SDimitry Andric KMP_EXPORT kmp_uint64 __kmpc_get_taskid();
3787*0b57cec5SDimitry Andric KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid();
3788*0b57cec5SDimitry Andric 
3789*0b57cec5SDimitry Andric // C++ port
3790*0b57cec5SDimitry Andric // missing 'extern "C"' declarations
3791*0b57cec5SDimitry Andric 
3792*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_in_parallel(ident_t *loc);
3793*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid);
3794*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
3795*0b57cec5SDimitry Andric                                         kmp_int32 num_threads);
3796*0b57cec5SDimitry Andric 
3797*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
3798*0b57cec5SDimitry Andric                                       int proc_bind);
3799*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
3800*0b57cec5SDimitry Andric                                       kmp_int32 num_teams,
3801*0b57cec5SDimitry Andric                                       kmp_int32 num_threads);
3802*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,
3803*0b57cec5SDimitry Andric                                   kmpc_micro microtask, ...);
3804*0b57cec5SDimitry Andric struct kmp_dim { // loop bounds info casted to kmp_int64
3805*0b57cec5SDimitry Andric   kmp_int64 lo; // lower
3806*0b57cec5SDimitry Andric   kmp_int64 up; // upper
3807*0b57cec5SDimitry Andric   kmp_int64 st; // stride
3808*0b57cec5SDimitry Andric };
3809*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
3810*0b57cec5SDimitry Andric                                      kmp_int32 num_dims,
3811*0b57cec5SDimitry Andric                                      const struct kmp_dim *dims);
3812*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid,
3813*0b57cec5SDimitry Andric                                      const kmp_int64 *vec);
3814*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid,
3815*0b57cec5SDimitry Andric                                      const kmp_int64 *vec);
3816*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
3817*0b57cec5SDimitry Andric 
3818*0b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid,
3819*0b57cec5SDimitry Andric                                              void *data, size_t size,
3820*0b57cec5SDimitry Andric                                              void ***cache);
3821*0b57cec5SDimitry Andric 
3822*0b57cec5SDimitry Andric // Symbols for MS mutual detection.
3823*0b57cec5SDimitry Andric extern int _You_must_link_with_exactly_one_OpenMP_library;
3824*0b57cec5SDimitry Andric extern int _You_must_link_with_Intel_OpenMP_library;
3825*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS && (KMP_VERSION_MAJOR > 4)
3826*0b57cec5SDimitry Andric extern int _You_must_link_with_Microsoft_OpenMP_library;
3827*0b57cec5SDimitry Andric #endif
3828*0b57cec5SDimitry Andric 
3829*0b57cec5SDimitry Andric // The routines below are not exported.
3830*0b57cec5SDimitry Andric // Consider making them 'static' in corresponding source files.
3831*0b57cec5SDimitry Andric void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
3832*0b57cec5SDimitry Andric                                            void *data_addr, size_t pc_size);
3833*0b57cec5SDimitry Andric struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
3834*0b57cec5SDimitry Andric                                                 void *data_addr,
3835*0b57cec5SDimitry Andric                                                 size_t pc_size);
3836*0b57cec5SDimitry Andric void __kmp_threadprivate_resize_cache(int newCapacity);
3837*0b57cec5SDimitry Andric void __kmp_cleanup_threadprivate_caches();
3838*0b57cec5SDimitry Andric 
3839*0b57cec5SDimitry Andric // ompc_, kmpc_ entries moved from omp.h.
3840*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS
3841*0b57cec5SDimitry Andric #define KMPC_CONVENTION __cdecl
3842*0b57cec5SDimitry Andric #else
3843*0b57cec5SDimitry Andric #define KMPC_CONVENTION
3844*0b57cec5SDimitry Andric #endif
3845*0b57cec5SDimitry Andric 
3846*0b57cec5SDimitry Andric #ifndef __OMP_H
3847*0b57cec5SDimitry Andric typedef enum omp_sched_t {
3848*0b57cec5SDimitry Andric   omp_sched_static = 1,
3849*0b57cec5SDimitry Andric   omp_sched_dynamic = 2,
3850*0b57cec5SDimitry Andric   omp_sched_guided = 3,
3851*0b57cec5SDimitry Andric   omp_sched_auto = 4
3852*0b57cec5SDimitry Andric } omp_sched_t;
3853*0b57cec5SDimitry Andric typedef void *kmp_affinity_mask_t;
3854*0b57cec5SDimitry Andric #endif
3855*0b57cec5SDimitry Andric 
3856*0b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int);
3857*0b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int);
3858*0b57cec5SDimitry Andric KMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int);
3859*0b57cec5SDimitry Andric KMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int);
3860*0b57cec5SDimitry Andric KMP_EXPORT int KMPC_CONVENTION
3861*0b57cec5SDimitry Andric kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *);
3862*0b57cec5SDimitry Andric KMP_EXPORT int KMPC_CONVENTION
3863*0b57cec5SDimitry Andric kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *);
3864*0b57cec5SDimitry Andric KMP_EXPORT int KMPC_CONVENTION
3865*0b57cec5SDimitry Andric kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *);
3866*0b57cec5SDimitry Andric 
3867*0b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int);
3868*0b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t);
3869*0b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int);
3870*0b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *);
3871*0b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION kmpc_set_disp_num_buffers(int);
3872*0b57cec5SDimitry Andric 
3873*0b57cec5SDimitry Andric enum kmp_target_offload_kind {
3874*0b57cec5SDimitry Andric   tgt_disabled = 0,
3875*0b57cec5SDimitry Andric   tgt_default = 1,
3876*0b57cec5SDimitry Andric   tgt_mandatory = 2
3877*0b57cec5SDimitry Andric };
3878*0b57cec5SDimitry Andric typedef enum kmp_target_offload_kind kmp_target_offload_kind_t;
3879*0b57cec5SDimitry Andric // Set via OMP_TARGET_OFFLOAD if specified, defaults to tgt_default otherwise
3880*0b57cec5SDimitry Andric extern kmp_target_offload_kind_t __kmp_target_offload;
3881*0b57cec5SDimitry Andric extern int __kmpc_get_target_offload();
3882*0b57cec5SDimitry Andric 
3883*0b57cec5SDimitry Andric // Constants used in libomptarget
3884*0b57cec5SDimitry Andric #define KMP_DEVICE_DEFAULT -1 // This is libomptarget's default device.
3885*0b57cec5SDimitry Andric #define KMP_HOST_DEVICE -10 // This is what it is in libomptarget, go figure.
3886*0b57cec5SDimitry Andric #define KMP_DEVICE_ALL -11 // This is libomptarget's "all devices".
3887*0b57cec5SDimitry Andric 
3888*0b57cec5SDimitry Andric // OMP Pause Resource
3889*0b57cec5SDimitry Andric 
3890*0b57cec5SDimitry Andric // The following enum is used both to set the status in __kmp_pause_status, and
3891*0b57cec5SDimitry Andric // as the internal equivalent of the externally-visible omp_pause_resource_t.
3892*0b57cec5SDimitry Andric typedef enum kmp_pause_status_t {
3893*0b57cec5SDimitry Andric   kmp_not_paused = 0, // status is not paused, or, requesting resume
3894*0b57cec5SDimitry Andric   kmp_soft_paused = 1, // status is soft-paused, or, requesting soft pause
3895*0b57cec5SDimitry Andric   kmp_hard_paused = 2 // status is hard-paused, or, requesting hard pause
3896*0b57cec5SDimitry Andric } kmp_pause_status_t;
3897*0b57cec5SDimitry Andric 
3898*0b57cec5SDimitry Andric // This stores the pause state of the runtime
3899*0b57cec5SDimitry Andric extern kmp_pause_status_t __kmp_pause_status;
3900*0b57cec5SDimitry Andric extern int __kmpc_pause_resource(kmp_pause_status_t level);
3901*0b57cec5SDimitry Andric extern int __kmp_pause_resource(kmp_pause_status_t level);
3902*0b57cec5SDimitry Andric // Soft resume sets __kmp_pause_status, and wakes up all threads.
3903*0b57cec5SDimitry Andric extern void __kmp_resume_if_soft_paused();
3904*0b57cec5SDimitry Andric // Hard resume simply resets the status to not paused. Library will appear to
3905*0b57cec5SDimitry Andric // be uninitialized after hard pause. Let OMP constructs trigger required
3906*0b57cec5SDimitry Andric // initializations.
3907*0b57cec5SDimitry Andric static inline void __kmp_resume_if_hard_paused() {
3908*0b57cec5SDimitry Andric   if (__kmp_pause_status == kmp_hard_paused) {
3909*0b57cec5SDimitry Andric     __kmp_pause_status = kmp_not_paused;
3910*0b57cec5SDimitry Andric   }
3911*0b57cec5SDimitry Andric }
3912*0b57cec5SDimitry Andric 
3913*0b57cec5SDimitry Andric #ifdef __cplusplus
3914*0b57cec5SDimitry Andric }
3915*0b57cec5SDimitry Andric #endif
3916*0b57cec5SDimitry Andric 
3917*0b57cec5SDimitry Andric #endif /* KMP_H */
3918