xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp.h (revision 5ffd83dbcc34f10e07f6d3e968ae6365869615f4)
10b57cec5SDimitry Andric /*! \file */
20b57cec5SDimitry Andric /*
30b57cec5SDimitry Andric  * kmp.h -- KPTS runtime header file.
40b57cec5SDimitry Andric  */
50b57cec5SDimitry Andric 
60b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
70b57cec5SDimitry Andric //
80b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
90b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
100b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #ifndef KMP_H
150b57cec5SDimitry Andric #define KMP_H
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric #include "kmp_config.h"
180b57cec5SDimitry Andric 
190b57cec5SDimitry Andric /* #define BUILD_PARALLEL_ORDERED 1 */
200b57cec5SDimitry Andric 
210b57cec5SDimitry Andric /* This fix replaces gettimeofday with clock_gettime for better scalability on
220b57cec5SDimitry Andric    the Altix.  Requires user code to be linked with -lrt. */
230b57cec5SDimitry Andric //#define FIX_SGI_CLOCK
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric /* Defines for OpenMP 3.0 tasking and auto scheduling */
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric #ifndef KMP_STATIC_STEAL_ENABLED
280b57cec5SDimitry Andric #define KMP_STATIC_STEAL_ENABLED 1
290b57cec5SDimitry Andric #endif
300b57cec5SDimitry Andric 
310b57cec5SDimitry Andric #define TASK_CURRENT_NOT_QUEUED 0
320b57cec5SDimitry Andric #define TASK_CURRENT_QUEUED 1
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK
350b57cec5SDimitry Andric #define TASK_STACK_EMPTY 0 // entries when the stack is empty
360b57cec5SDimitry Andric #define TASK_STACK_BLOCK_BITS 5 // Used in TASK_STACK_SIZE and TASK_STACK_MASK
370b57cec5SDimitry Andric // Number of entries in each task stack array
380b57cec5SDimitry Andric #define TASK_STACK_BLOCK_SIZE (1 << TASK_STACK_BLOCK_BITS)
390b57cec5SDimitry Andric // Mask for determining index into stack block
400b57cec5SDimitry Andric #define TASK_STACK_INDEX_MASK (TASK_STACK_BLOCK_SIZE - 1)
410b57cec5SDimitry Andric #endif // BUILD_TIED_TASK_STACK
420b57cec5SDimitry Andric 
430b57cec5SDimitry Andric #define TASK_NOT_PUSHED 1
440b57cec5SDimitry Andric #define TASK_SUCCESSFULLY_PUSHED 0
450b57cec5SDimitry Andric #define TASK_TIED 1
460b57cec5SDimitry Andric #define TASK_UNTIED 0
470b57cec5SDimitry Andric #define TASK_EXPLICIT 1
480b57cec5SDimitry Andric #define TASK_IMPLICIT 0
490b57cec5SDimitry Andric #define TASK_PROXY 1
500b57cec5SDimitry Andric #define TASK_FULL 0
510b57cec5SDimitry Andric #define TASK_DETACHABLE 1
520b57cec5SDimitry Andric #define TASK_UNDETACHABLE 0
530b57cec5SDimitry Andric 
540b57cec5SDimitry Andric #define KMP_CANCEL_THREADS
550b57cec5SDimitry Andric #define KMP_THREAD_ATTR
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric // Android does not have pthread_cancel.  Undefine KMP_CANCEL_THREADS if being
580b57cec5SDimitry Andric // built on Android
590b57cec5SDimitry Andric #if defined(__ANDROID__)
600b57cec5SDimitry Andric #undef KMP_CANCEL_THREADS
610b57cec5SDimitry Andric #endif
620b57cec5SDimitry Andric 
630b57cec5SDimitry Andric #include <signal.h>
640b57cec5SDimitry Andric #include <stdarg.h>
650b57cec5SDimitry Andric #include <stddef.h>
660b57cec5SDimitry Andric #include <stdio.h>
670b57cec5SDimitry Andric #include <stdlib.h>
680b57cec5SDimitry Andric #include <string.h>
690b57cec5SDimitry Andric /* include <ctype.h> don't use; problems with /MD on Windows* OS NT due to bad
700b57cec5SDimitry Andric    Microsoft library. Some macros provided below to replace these functions  */
710b57cec5SDimitry Andric #ifndef __ABSOFT_WIN
720b57cec5SDimitry Andric #include <sys/types.h>
730b57cec5SDimitry Andric #endif
740b57cec5SDimitry Andric #include <limits.h>
750b57cec5SDimitry Andric #include <time.h>
760b57cec5SDimitry Andric 
770b57cec5SDimitry Andric #include <errno.h>
780b57cec5SDimitry Andric 
790b57cec5SDimitry Andric #include "kmp_os.h"
800b57cec5SDimitry Andric 
810b57cec5SDimitry Andric #include "kmp_safe_c_api.h"
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric #if KMP_STATS_ENABLED
840b57cec5SDimitry Andric class kmp_stats_list;
850b57cec5SDimitry Andric #endif
860b57cec5SDimitry Andric 
870b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
880b57cec5SDimitry Andric // Only include hierarchical scheduling if affinity is supported
890b57cec5SDimitry Andric #undef KMP_USE_HIER_SCHED
900b57cec5SDimitry Andric #define KMP_USE_HIER_SCHED KMP_AFFINITY_SUPPORTED
910b57cec5SDimitry Andric #endif
920b57cec5SDimitry Andric 
930b57cec5SDimitry Andric #if KMP_USE_HWLOC && KMP_AFFINITY_SUPPORTED
940b57cec5SDimitry Andric #include "hwloc.h"
950b57cec5SDimitry Andric #ifndef HWLOC_OBJ_NUMANODE
960b57cec5SDimitry Andric #define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
970b57cec5SDimitry Andric #endif
980b57cec5SDimitry Andric #ifndef HWLOC_OBJ_PACKAGE
990b57cec5SDimitry Andric #define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET
1000b57cec5SDimitry Andric #endif
1010b57cec5SDimitry Andric #if HWLOC_API_VERSION >= 0x00020000
1020b57cec5SDimitry Andric // hwloc 2.0 changed type of depth of object from unsigned to int
1030b57cec5SDimitry Andric typedef int kmp_hwloc_depth_t;
1040b57cec5SDimitry Andric #else
1050b57cec5SDimitry Andric typedef unsigned int kmp_hwloc_depth_t;
1060b57cec5SDimitry Andric #endif
1070b57cec5SDimitry Andric #endif
1080b57cec5SDimitry Andric 
1090b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1100b57cec5SDimitry Andric #include <xmmintrin.h>
1110b57cec5SDimitry Andric #endif
1120b57cec5SDimitry Andric 
1130b57cec5SDimitry Andric #include "kmp_debug.h"
1140b57cec5SDimitry Andric #include "kmp_lock.h"
1150b57cec5SDimitry Andric #include "kmp_version.h"
1160b57cec5SDimitry Andric #if USE_DEBUGGER
1170b57cec5SDimitry Andric #include "kmp_debugger.h"
1180b57cec5SDimitry Andric #endif
1190b57cec5SDimitry Andric #include "kmp_i18n.h"
1200b57cec5SDimitry Andric 
1210b57cec5SDimitry Andric #define KMP_HANDLE_SIGNALS (KMP_OS_UNIX || KMP_OS_WINDOWS)
1220b57cec5SDimitry Andric 
1230b57cec5SDimitry Andric #include "kmp_wrapper_malloc.h"
1240b57cec5SDimitry Andric #if KMP_OS_UNIX
1250b57cec5SDimitry Andric #include <unistd.h>
1260b57cec5SDimitry Andric #if !defined NSIG && defined _NSIG
1270b57cec5SDimitry Andric #define NSIG _NSIG
1280b57cec5SDimitry Andric #endif
1290b57cec5SDimitry Andric #endif
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric #if KMP_OS_LINUX
1320b57cec5SDimitry Andric #pragma weak clock_gettime
1330b57cec5SDimitry Andric #endif
1340b57cec5SDimitry Andric 
1350b57cec5SDimitry Andric #if OMPT_SUPPORT
1360b57cec5SDimitry Andric #include "ompt-internal.h"
1370b57cec5SDimitry Andric #endif
1380b57cec5SDimitry Andric 
1390b57cec5SDimitry Andric // Affinity format function
1400b57cec5SDimitry Andric #include "kmp_str.h"
1410b57cec5SDimitry Andric 
1420b57cec5SDimitry Andric // 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64.
1430b57cec5SDimitry Andric // 3 - fast allocation using sync, non-sync free lists of any size, non-self
1440b57cec5SDimitry Andric // free lists of limited size.
1450b57cec5SDimitry Andric #ifndef USE_FAST_MEMORY
1460b57cec5SDimitry Andric #define USE_FAST_MEMORY 3
1470b57cec5SDimitry Andric #endif
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric #ifndef KMP_NESTED_HOT_TEAMS
1500b57cec5SDimitry Andric #define KMP_NESTED_HOT_TEAMS 0
1510b57cec5SDimitry Andric #define USE_NESTED_HOT_ARG(x)
1520b57cec5SDimitry Andric #else
1530b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS
1540b57cec5SDimitry Andric #define USE_NESTED_HOT_ARG(x) , x
1550b57cec5SDimitry Andric #else
1560b57cec5SDimitry Andric #define USE_NESTED_HOT_ARG(x)
1570b57cec5SDimitry Andric #endif
1580b57cec5SDimitry Andric #endif
1590b57cec5SDimitry Andric 
1600b57cec5SDimitry Andric // Assume using BGET compare_exchange instruction instead of lock by default.
1610b57cec5SDimitry Andric #ifndef USE_CMP_XCHG_FOR_BGET
1620b57cec5SDimitry Andric #define USE_CMP_XCHG_FOR_BGET 1
1630b57cec5SDimitry Andric #endif
1640b57cec5SDimitry Andric 
1650b57cec5SDimitry Andric // Test to see if queuing lock is better than bootstrap lock for bget
1660b57cec5SDimitry Andric // #ifndef USE_QUEUING_LOCK_FOR_BGET
1670b57cec5SDimitry Andric // #define USE_QUEUING_LOCK_FOR_BGET
1680b57cec5SDimitry Andric // #endif
1690b57cec5SDimitry Andric 
1700b57cec5SDimitry Andric #define KMP_NSEC_PER_SEC 1000000000L
1710b57cec5SDimitry Andric #define KMP_USEC_PER_SEC 1000000L
1720b57cec5SDimitry Andric 
1730b57cec5SDimitry Andric /*!
1740b57cec5SDimitry Andric @ingroup BASIC_TYPES
1750b57cec5SDimitry Andric @{
1760b57cec5SDimitry Andric */
1770b57cec5SDimitry Andric 
1780b57cec5SDimitry Andric /*!
1790b57cec5SDimitry Andric Values for bit flags used in the ident_t to describe the fields.
1800b57cec5SDimitry Andric */
1810b57cec5SDimitry Andric enum {
1820b57cec5SDimitry Andric   /*! Use trampoline for internal microtasks */
1830b57cec5SDimitry Andric   KMP_IDENT_IMB = 0x01,
1840b57cec5SDimitry Andric   /*! Use c-style ident structure */
1850b57cec5SDimitry Andric   KMP_IDENT_KMPC = 0x02,
1860b57cec5SDimitry Andric   /* 0x04 is no longer used */
1870b57cec5SDimitry Andric   /*! Entry point generated by auto-parallelization */
1880b57cec5SDimitry Andric   KMP_IDENT_AUTOPAR = 0x08,
1890b57cec5SDimitry Andric   /*! Compiler generates atomic reduction option for kmpc_reduce* */
1900b57cec5SDimitry Andric   KMP_IDENT_ATOMIC_REDUCE = 0x10,
1910b57cec5SDimitry Andric   /*! To mark a 'barrier' directive in user code */
1920b57cec5SDimitry Andric   KMP_IDENT_BARRIER_EXPL = 0x20,
1930b57cec5SDimitry Andric   /*! To Mark implicit barriers. */
1940b57cec5SDimitry Andric   KMP_IDENT_BARRIER_IMPL = 0x0040,
1950b57cec5SDimitry Andric   KMP_IDENT_BARRIER_IMPL_MASK = 0x01C0,
1960b57cec5SDimitry Andric   KMP_IDENT_BARRIER_IMPL_FOR = 0x0040,
1970b57cec5SDimitry Andric   KMP_IDENT_BARRIER_IMPL_SECTIONS = 0x00C0,
1980b57cec5SDimitry Andric 
1990b57cec5SDimitry Andric   KMP_IDENT_BARRIER_IMPL_SINGLE = 0x0140,
2000b57cec5SDimitry Andric   KMP_IDENT_BARRIER_IMPL_WORKSHARE = 0x01C0,
2010b57cec5SDimitry Andric 
2020b57cec5SDimitry Andric   /*! To mark a static loop in OMPT callbacks */
2030b57cec5SDimitry Andric   KMP_IDENT_WORK_LOOP = 0x200,
2040b57cec5SDimitry Andric   /*! To mark a sections directive in OMPT callbacks */
2050b57cec5SDimitry Andric   KMP_IDENT_WORK_SECTIONS = 0x400,
206480093f4SDimitry Andric   /*! To mark a distribute construct in OMPT callbacks */
2070b57cec5SDimitry Andric   KMP_IDENT_WORK_DISTRIBUTE = 0x800,
2080b57cec5SDimitry Andric   /*! Atomic hint; bottom four bits as omp_sync_hint_t. Top four reserved and
2090b57cec5SDimitry Andric       not currently used. If one day we need more bits, then we can use
2100b57cec5SDimitry Andric       an invalid combination of hints to mean that another, larger field
2110b57cec5SDimitry Andric       should be used in a different flag. */
2120b57cec5SDimitry Andric   KMP_IDENT_ATOMIC_HINT_MASK = 0xFF0000,
2130b57cec5SDimitry Andric   KMP_IDENT_ATOMIC_HINT_UNCONTENDED = 0x010000,
2140b57cec5SDimitry Andric   KMP_IDENT_ATOMIC_HINT_CONTENDED = 0x020000,
2150b57cec5SDimitry Andric   KMP_IDENT_ATOMIC_HINT_NONSPECULATIVE = 0x040000,
2160b57cec5SDimitry Andric   KMP_IDENT_ATOMIC_HINT_SPECULATIVE = 0x080000,
2170b57cec5SDimitry Andric };
2180b57cec5SDimitry Andric 
2190b57cec5SDimitry Andric /*!
2200b57cec5SDimitry Andric  * The ident structure that describes a source location.
2210b57cec5SDimitry Andric  */
2220b57cec5SDimitry Andric typedef struct ident {
2230b57cec5SDimitry Andric   kmp_int32 reserved_1; /**<  might be used in Fortran; see above  */
2240b57cec5SDimitry Andric   kmp_int32 flags; /**<  also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC
2250b57cec5SDimitry Andric                       identifies this union member  */
2260b57cec5SDimitry Andric   kmp_int32 reserved_2; /**<  not really used in Fortran any more; see above */
2270b57cec5SDimitry Andric #if USE_ITT_BUILD
2280b57cec5SDimitry Andric /*  but currently used for storing region-specific ITT */
2290b57cec5SDimitry Andric /*  contextual information. */
2300b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
2310b57cec5SDimitry Andric   kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++  */
2320b57cec5SDimitry Andric   char const *psource; /**< String describing the source location.
2330b57cec5SDimitry Andric                        The string is composed of semi-colon separated fields
2340b57cec5SDimitry Andric                        which describe the source file, the function and a pair
2350b57cec5SDimitry Andric                        of line numbers that delimit the construct. */
2360b57cec5SDimitry Andric } ident_t;
2370b57cec5SDimitry Andric /*!
2380b57cec5SDimitry Andric @}
2390b57cec5SDimitry Andric */
2400b57cec5SDimitry Andric 
2410b57cec5SDimitry Andric // Some forward declarations.
2420b57cec5SDimitry Andric typedef union kmp_team kmp_team_t;
2430b57cec5SDimitry Andric typedef struct kmp_taskdata kmp_taskdata_t;
2440b57cec5SDimitry Andric typedef union kmp_task_team kmp_task_team_t;
2450b57cec5SDimitry Andric typedef union kmp_team kmp_team_p;
2460b57cec5SDimitry Andric typedef union kmp_info kmp_info_p;
2470b57cec5SDimitry Andric typedef union kmp_root kmp_root_p;
2480b57cec5SDimitry Andric 
2490b57cec5SDimitry Andric #ifdef __cplusplus
2500b57cec5SDimitry Andric extern "C" {
2510b57cec5SDimitry Andric #endif
2520b57cec5SDimitry Andric 
2530b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
2540b57cec5SDimitry Andric 
2550b57cec5SDimitry Andric /* Pack two 32-bit signed integers into a 64-bit signed integer */
2560b57cec5SDimitry Andric /* ToDo: Fix word ordering for big-endian machines. */
2570b57cec5SDimitry Andric #define KMP_PACK_64(HIGH_32, LOW_32)                                           \
2580b57cec5SDimitry Andric   ((kmp_int64)((((kmp_uint64)(HIGH_32)) << 32) | (kmp_uint64)(LOW_32)))
2590b57cec5SDimitry Andric 
2600b57cec5SDimitry Andric // Generic string manipulation macros. Assume that _x is of type char *
2610b57cec5SDimitry Andric #define SKIP_WS(_x)                                                            \
2620b57cec5SDimitry Andric   {                                                                            \
2630b57cec5SDimitry Andric     while (*(_x) == ' ' || *(_x) == '\t')                                      \
2640b57cec5SDimitry Andric       (_x)++;                                                                  \
2650b57cec5SDimitry Andric   }
2660b57cec5SDimitry Andric #define SKIP_DIGITS(_x)                                                        \
2670b57cec5SDimitry Andric   {                                                                            \
2680b57cec5SDimitry Andric     while (*(_x) >= '0' && *(_x) <= '9')                                       \
2690b57cec5SDimitry Andric       (_x)++;                                                                  \
2700b57cec5SDimitry Andric   }
2710b57cec5SDimitry Andric #define SKIP_TOKEN(_x)                                                         \
2720b57cec5SDimitry Andric   {                                                                            \
2730b57cec5SDimitry Andric     while ((*(_x) >= '0' && *(_x) <= '9') || (*(_x) >= 'a' && *(_x) <= 'z') || \
2740b57cec5SDimitry Andric            (*(_x) >= 'A' && *(_x) <= 'Z') || *(_x) == '_')                     \
2750b57cec5SDimitry Andric       (_x)++;                                                                  \
2760b57cec5SDimitry Andric   }
2770b57cec5SDimitry Andric #define SKIP_TO(_x, _c)                                                        \
2780b57cec5SDimitry Andric   {                                                                            \
2790b57cec5SDimitry Andric     while (*(_x) != '\0' && *(_x) != (_c))                                     \
2800b57cec5SDimitry Andric       (_x)++;                                                                  \
2810b57cec5SDimitry Andric   }
2820b57cec5SDimitry Andric 
2830b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
2840b57cec5SDimitry Andric 
2850b57cec5SDimitry Andric #define KMP_MAX(x, y) ((x) > (y) ? (x) : (y))
2860b57cec5SDimitry Andric #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
2870b57cec5SDimitry Andric 
2880b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
2890b57cec5SDimitry Andric /* Enumeration types */
2900b57cec5SDimitry Andric 
2910b57cec5SDimitry Andric enum kmp_state_timer {
2920b57cec5SDimitry Andric   ts_stop,
2930b57cec5SDimitry Andric   ts_start,
2940b57cec5SDimitry Andric   ts_pause,
2950b57cec5SDimitry Andric 
2960b57cec5SDimitry Andric   ts_last_state
2970b57cec5SDimitry Andric };
2980b57cec5SDimitry Andric 
2990b57cec5SDimitry Andric enum dynamic_mode {
3000b57cec5SDimitry Andric   dynamic_default,
3010b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE
3020b57cec5SDimitry Andric   dynamic_load_balance,
3030b57cec5SDimitry Andric #endif /* USE_LOAD_BALANCE */
3040b57cec5SDimitry Andric   dynamic_random,
3050b57cec5SDimitry Andric   dynamic_thread_limit,
3060b57cec5SDimitry Andric   dynamic_max
3070b57cec5SDimitry Andric };
3080b57cec5SDimitry Andric 
3090b57cec5SDimitry Andric /* external schedule constants, duplicate enum omp_sched in omp.h in order to
3100b57cec5SDimitry Andric  * not include it here */
3110b57cec5SDimitry Andric #ifndef KMP_SCHED_TYPE_DEFINED
3120b57cec5SDimitry Andric #define KMP_SCHED_TYPE_DEFINED
3130b57cec5SDimitry Andric typedef enum kmp_sched {
3140b57cec5SDimitry Andric   kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check
3150b57cec5SDimitry Andric   // Note: need to adjust __kmp_sch_map global array in case enum is changed
3160b57cec5SDimitry Andric   kmp_sched_static = 1, // mapped to kmp_sch_static_chunked           (33)
3170b57cec5SDimitry Andric   kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked          (35)
3180b57cec5SDimitry Andric   kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked           (36)
3190b57cec5SDimitry Andric   kmp_sched_auto = 4, // mapped to kmp_sch_auto                     (38)
3200b57cec5SDimitry Andric   kmp_sched_upper_std = 5, // upper bound for standard schedules
3210b57cec5SDimitry Andric   kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules
3220b57cec5SDimitry Andric   kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39)
3230b57cec5SDimitry Andric #if KMP_STATIC_STEAL_ENABLED
3240b57cec5SDimitry Andric   kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44)
3250b57cec5SDimitry Andric #endif
3260b57cec5SDimitry Andric   kmp_sched_upper,
3270b57cec5SDimitry Andric   kmp_sched_default = kmp_sched_static, // default scheduling
3280b57cec5SDimitry Andric   kmp_sched_monotonic = 0x80000000
3290b57cec5SDimitry Andric } kmp_sched_t;
3300b57cec5SDimitry Andric #endif
3310b57cec5SDimitry Andric 
3320b57cec5SDimitry Andric /*!
3330b57cec5SDimitry Andric  @ingroup WORK_SHARING
3340b57cec5SDimitry Andric  * Describes the loop schedule to be used for a parallel for loop.
3350b57cec5SDimitry Andric  */
3360b57cec5SDimitry Andric enum sched_type : kmp_int32 {
3370b57cec5SDimitry Andric   kmp_sch_lower = 32, /**< lower bound for unordered values */
3380b57cec5SDimitry Andric   kmp_sch_static_chunked = 33,
3390b57cec5SDimitry Andric   kmp_sch_static = 34, /**< static unspecialized */
3400b57cec5SDimitry Andric   kmp_sch_dynamic_chunked = 35,
3410b57cec5SDimitry Andric   kmp_sch_guided_chunked = 36, /**< guided unspecialized */
3420b57cec5SDimitry Andric   kmp_sch_runtime = 37,
3430b57cec5SDimitry Andric   kmp_sch_auto = 38, /**< auto */
3440b57cec5SDimitry Andric   kmp_sch_trapezoidal = 39,
3450b57cec5SDimitry Andric 
3460b57cec5SDimitry Andric   /* accessible only through KMP_SCHEDULE environment variable */
3470b57cec5SDimitry Andric   kmp_sch_static_greedy = 40,
3480b57cec5SDimitry Andric   kmp_sch_static_balanced = 41,
3490b57cec5SDimitry Andric   /* accessible only through KMP_SCHEDULE environment variable */
3500b57cec5SDimitry Andric   kmp_sch_guided_iterative_chunked = 42,
3510b57cec5SDimitry Andric   kmp_sch_guided_analytical_chunked = 43,
3520b57cec5SDimitry Andric   /* accessible only through KMP_SCHEDULE environment variable */
3530b57cec5SDimitry Andric   kmp_sch_static_steal = 44,
3540b57cec5SDimitry Andric 
3550b57cec5SDimitry Andric   /* static with chunk adjustment (e.g., simd) */
3560b57cec5SDimitry Andric   kmp_sch_static_balanced_chunked = 45,
3570b57cec5SDimitry Andric   kmp_sch_guided_simd = 46, /**< guided with chunk adjustment */
3580b57cec5SDimitry Andric   kmp_sch_runtime_simd = 47, /**< runtime with chunk adjustment */
3590b57cec5SDimitry Andric 
3600b57cec5SDimitry Andric   /* accessible only through KMP_SCHEDULE environment variable */
3610b57cec5SDimitry Andric   kmp_sch_upper, /**< upper bound for unordered values */
3620b57cec5SDimitry Andric 
3630b57cec5SDimitry Andric   kmp_ord_lower = 64, /**< lower bound for ordered values, must be power of 2 */
3640b57cec5SDimitry Andric   kmp_ord_static_chunked = 65,
3650b57cec5SDimitry Andric   kmp_ord_static = 66, /**< ordered static unspecialized */
3660b57cec5SDimitry Andric   kmp_ord_dynamic_chunked = 67,
3670b57cec5SDimitry Andric   kmp_ord_guided_chunked = 68,
3680b57cec5SDimitry Andric   kmp_ord_runtime = 69,
3690b57cec5SDimitry Andric   kmp_ord_auto = 70, /**< ordered auto */
3700b57cec5SDimitry Andric   kmp_ord_trapezoidal = 71,
3710b57cec5SDimitry Andric   kmp_ord_upper, /**< upper bound for ordered values */
3720b57cec5SDimitry Andric 
3730b57cec5SDimitry Andric   /* Schedules for Distribute construct */
3740b57cec5SDimitry Andric   kmp_distribute_static_chunked = 91, /**< distribute static chunked */
3750b57cec5SDimitry Andric   kmp_distribute_static = 92, /**< distribute static unspecialized */
3760b57cec5SDimitry Andric 
3770b57cec5SDimitry Andric   /* For the "nomerge" versions, kmp_dispatch_next*() will always return a
3780b57cec5SDimitry Andric      single iteration/chunk, even if the loop is serialized. For the schedule
3790b57cec5SDimitry Andric      types listed above, the entire iteration vector is returned if the loop is
3800b57cec5SDimitry Andric      serialized. This doesn't work for gcc/gcomp sections. */
3810b57cec5SDimitry Andric   kmp_nm_lower = 160, /**< lower bound for nomerge values */
3820b57cec5SDimitry Andric 
3830b57cec5SDimitry Andric   kmp_nm_static_chunked =
3840b57cec5SDimitry Andric       (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower),
3850b57cec5SDimitry Andric   kmp_nm_static = 162, /**< static unspecialized */
3860b57cec5SDimitry Andric   kmp_nm_dynamic_chunked = 163,
3870b57cec5SDimitry Andric   kmp_nm_guided_chunked = 164, /**< guided unspecialized */
3880b57cec5SDimitry Andric   kmp_nm_runtime = 165,
3890b57cec5SDimitry Andric   kmp_nm_auto = 166, /**< auto */
3900b57cec5SDimitry Andric   kmp_nm_trapezoidal = 167,
3910b57cec5SDimitry Andric 
3920b57cec5SDimitry Andric   /* accessible only through KMP_SCHEDULE environment variable */
3930b57cec5SDimitry Andric   kmp_nm_static_greedy = 168,
3940b57cec5SDimitry Andric   kmp_nm_static_balanced = 169,
3950b57cec5SDimitry Andric   /* accessible only through KMP_SCHEDULE environment variable */
3960b57cec5SDimitry Andric   kmp_nm_guided_iterative_chunked = 170,
3970b57cec5SDimitry Andric   kmp_nm_guided_analytical_chunked = 171,
3980b57cec5SDimitry Andric   kmp_nm_static_steal =
3990b57cec5SDimitry Andric       172, /* accessible only through OMP_SCHEDULE environment variable */
4000b57cec5SDimitry Andric 
4010b57cec5SDimitry Andric   kmp_nm_ord_static_chunked = 193,
4020b57cec5SDimitry Andric   kmp_nm_ord_static = 194, /**< ordered static unspecialized */
4030b57cec5SDimitry Andric   kmp_nm_ord_dynamic_chunked = 195,
4040b57cec5SDimitry Andric   kmp_nm_ord_guided_chunked = 196,
4050b57cec5SDimitry Andric   kmp_nm_ord_runtime = 197,
4060b57cec5SDimitry Andric   kmp_nm_ord_auto = 198, /**< auto */
4070b57cec5SDimitry Andric   kmp_nm_ord_trapezoidal = 199,
4080b57cec5SDimitry Andric   kmp_nm_upper, /**< upper bound for nomerge values */
4090b57cec5SDimitry Andric 
4100b57cec5SDimitry Andric   /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. Since
4110b57cec5SDimitry Andric      we need to distinguish the three possible cases (no modifier, monotonic
4120b57cec5SDimitry Andric      modifier, nonmonotonic modifier), we need separate bits for each modifier.
4130b57cec5SDimitry Andric      The absence of monotonic does not imply nonmonotonic, especially since 4.5
4140b57cec5SDimitry Andric      says that the behaviour of the "no modifier" case is implementation defined
4150b57cec5SDimitry Andric      in 4.5, but will become "nonmonotonic" in 5.0.
4160b57cec5SDimitry Andric 
4170b57cec5SDimitry Andric      Since we're passing a full 32 bit value, we can use a couple of high bits
4180b57cec5SDimitry Andric      for these flags; out of paranoia we avoid the sign bit.
4190b57cec5SDimitry Andric 
4200b57cec5SDimitry Andric      These modifiers can be or-ed into non-static schedules by the compiler to
4210b57cec5SDimitry Andric      pass the additional information. They will be stripped early in the
4220b57cec5SDimitry Andric      processing in __kmp_dispatch_init when setting up schedules, so most of the
4230b57cec5SDimitry Andric      code won't ever see schedules with these bits set.  */
4240b57cec5SDimitry Andric   kmp_sch_modifier_monotonic =
4250b57cec5SDimitry Andric       (1 << 29), /**< Set if the monotonic schedule modifier was present */
4260b57cec5SDimitry Andric   kmp_sch_modifier_nonmonotonic =
4270b57cec5SDimitry Andric       (1 << 30), /**< Set if the nonmonotonic schedule modifier was present */
4280b57cec5SDimitry Andric 
4290b57cec5SDimitry Andric #define SCHEDULE_WITHOUT_MODIFIERS(s)                                          \
4300b57cec5SDimitry Andric   (enum sched_type)(                                                           \
4310b57cec5SDimitry Andric       (s) & ~(kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic))
4320b57cec5SDimitry Andric #define SCHEDULE_HAS_MONOTONIC(s) (((s)&kmp_sch_modifier_monotonic) != 0)
4330b57cec5SDimitry Andric #define SCHEDULE_HAS_NONMONOTONIC(s) (((s)&kmp_sch_modifier_nonmonotonic) != 0)
4340b57cec5SDimitry Andric #define SCHEDULE_HAS_NO_MODIFIERS(s)                                           \
4350b57cec5SDimitry Andric   (((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0)
4360b57cec5SDimitry Andric #define SCHEDULE_GET_MODIFIERS(s)                                              \
4370b57cec5SDimitry Andric   ((enum sched_type)(                                                          \
4380b57cec5SDimitry Andric       (s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)))
4390b57cec5SDimitry Andric #define SCHEDULE_SET_MODIFIERS(s, m)                                           \
4400b57cec5SDimitry Andric   (s = (enum sched_type)((kmp_int32)s | (kmp_int32)m))
4410b57cec5SDimitry Andric #define SCHEDULE_NONMONOTONIC 0
4420b57cec5SDimitry Andric #define SCHEDULE_MONOTONIC 1
4430b57cec5SDimitry Andric 
4440b57cec5SDimitry Andric   kmp_sch_default = kmp_sch_static /**< default scheduling algorithm */
4450b57cec5SDimitry Andric };
4460b57cec5SDimitry Andric 
4470b57cec5SDimitry Andric // Apply modifiers on internal kind to standard kind
4480b57cec5SDimitry Andric static inline void
4490b57cec5SDimitry Andric __kmp_sched_apply_mods_stdkind(kmp_sched_t *kind,
4500b57cec5SDimitry Andric                                enum sched_type internal_kind) {
4510b57cec5SDimitry Andric   if (SCHEDULE_HAS_MONOTONIC(internal_kind)) {
4520b57cec5SDimitry Andric     *kind = (kmp_sched_t)((int)*kind | (int)kmp_sched_monotonic);
4530b57cec5SDimitry Andric   }
4540b57cec5SDimitry Andric }
4550b57cec5SDimitry Andric 
4560b57cec5SDimitry Andric // Apply modifiers on standard kind to internal kind
4570b57cec5SDimitry Andric static inline void
4580b57cec5SDimitry Andric __kmp_sched_apply_mods_intkind(kmp_sched_t kind,
4590b57cec5SDimitry Andric                                enum sched_type *internal_kind) {
4600b57cec5SDimitry Andric   if ((int)kind & (int)kmp_sched_monotonic) {
4610b57cec5SDimitry Andric     *internal_kind = (enum sched_type)((int)*internal_kind |
4620b57cec5SDimitry Andric                                        (int)kmp_sch_modifier_monotonic);
4630b57cec5SDimitry Andric   }
4640b57cec5SDimitry Andric }
4650b57cec5SDimitry Andric 
4660b57cec5SDimitry Andric // Get standard schedule without modifiers
4670b57cec5SDimitry Andric static inline kmp_sched_t __kmp_sched_without_mods(kmp_sched_t kind) {
4680b57cec5SDimitry Andric   return (kmp_sched_t)((int)kind & ~((int)kmp_sched_monotonic));
4690b57cec5SDimitry Andric }
4700b57cec5SDimitry Andric 
4710b57cec5SDimitry Andric /* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */
4720b57cec5SDimitry Andric typedef union kmp_r_sched {
4730b57cec5SDimitry Andric   struct {
4740b57cec5SDimitry Andric     enum sched_type r_sched_type;
4750b57cec5SDimitry Andric     int chunk;
4760b57cec5SDimitry Andric   };
4770b57cec5SDimitry Andric   kmp_int64 sched;
4780b57cec5SDimitry Andric } kmp_r_sched_t;
4790b57cec5SDimitry Andric 
4800b57cec5SDimitry Andric extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our
4810b57cec5SDimitry Andric // internal schedule types
4820b57cec5SDimitry Andric 
4830b57cec5SDimitry Andric enum library_type {
4840b57cec5SDimitry Andric   library_none,
4850b57cec5SDimitry Andric   library_serial,
4860b57cec5SDimitry Andric   library_turnaround,
4870b57cec5SDimitry Andric   library_throughput
4880b57cec5SDimitry Andric };
4890b57cec5SDimitry Andric 
4900b57cec5SDimitry Andric #if KMP_OS_LINUX
4910b57cec5SDimitry Andric enum clock_function_type {
4920b57cec5SDimitry Andric   clock_function_gettimeofday,
4930b57cec5SDimitry Andric   clock_function_clock_gettime
4940b57cec5SDimitry Andric };
4950b57cec5SDimitry Andric #endif /* KMP_OS_LINUX */
4960b57cec5SDimitry Andric 
4970b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED
4980b57cec5SDimitry Andric enum mic_type { non_mic, mic1, mic2, mic3, dummy };
4990b57cec5SDimitry Andric #endif
5000b57cec5SDimitry Andric 
5010b57cec5SDimitry Andric /* -- fast reduction stuff ------------------------------------------------ */
5020b57cec5SDimitry Andric 
5030b57cec5SDimitry Andric #undef KMP_FAST_REDUCTION_BARRIER
5040b57cec5SDimitry Andric #define KMP_FAST_REDUCTION_BARRIER 1
5050b57cec5SDimitry Andric 
5060b57cec5SDimitry Andric #undef KMP_FAST_REDUCTION_CORE_DUO
5070b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
5080b57cec5SDimitry Andric #define KMP_FAST_REDUCTION_CORE_DUO 1
5090b57cec5SDimitry Andric #endif
5100b57cec5SDimitry Andric 
5110b57cec5SDimitry Andric enum _reduction_method {
5120b57cec5SDimitry Andric   reduction_method_not_defined = 0,
5130b57cec5SDimitry Andric   critical_reduce_block = (1 << 8),
5140b57cec5SDimitry Andric   atomic_reduce_block = (2 << 8),
5150b57cec5SDimitry Andric   tree_reduce_block = (3 << 8),
5160b57cec5SDimitry Andric   empty_reduce_block = (4 << 8)
5170b57cec5SDimitry Andric };
5180b57cec5SDimitry Andric 
5190b57cec5SDimitry Andric // Description of the packed_reduction_method variable:
5200b57cec5SDimitry Andric // The packed_reduction_method variable consists of two enum types variables
5210b57cec5SDimitry Andric // that are packed together into 0-th byte and 1-st byte:
5220b57cec5SDimitry Andric // 0: (packed_reduction_method & 0x000000FF) is a 'enum barrier_type' value of
5230b57cec5SDimitry Andric // barrier that will be used in fast reduction: bs_plain_barrier or
5240b57cec5SDimitry Andric // bs_reduction_barrier
5250b57cec5SDimitry Andric // 1: (packed_reduction_method & 0x0000FF00) is a reduction method that will
5260b57cec5SDimitry Andric // be used in fast reduction;
5270b57cec5SDimitry Andric // Reduction method is of 'enum _reduction_method' type and it's defined the way
5280b57cec5SDimitry Andric // so that the bits of 0-th byte are empty, so no need to execute a shift
5290b57cec5SDimitry Andric // instruction while packing/unpacking
5300b57cec5SDimitry Andric 
5310b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER
5320b57cec5SDimitry Andric #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type)      \
5330b57cec5SDimitry Andric   ((reduction_method) | (barrier_type))
5340b57cec5SDimitry Andric 
5350b57cec5SDimitry Andric #define UNPACK_REDUCTION_METHOD(packed_reduction_method)                       \
5360b57cec5SDimitry Andric   ((enum _reduction_method)((packed_reduction_method) & (0x0000FF00)))
5370b57cec5SDimitry Andric 
5380b57cec5SDimitry Andric #define UNPACK_REDUCTION_BARRIER(packed_reduction_method)                      \
5390b57cec5SDimitry Andric   ((enum barrier_type)((packed_reduction_method) & (0x000000FF)))
5400b57cec5SDimitry Andric #else
5410b57cec5SDimitry Andric #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type)      \
5420b57cec5SDimitry Andric   (reduction_method)
5430b57cec5SDimitry Andric 
5440b57cec5SDimitry Andric #define UNPACK_REDUCTION_METHOD(packed_reduction_method)                       \
5450b57cec5SDimitry Andric   (packed_reduction_method)
5460b57cec5SDimitry Andric 
5470b57cec5SDimitry Andric #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) (bs_plain_barrier)
5480b57cec5SDimitry Andric #endif
5490b57cec5SDimitry Andric 
5500b57cec5SDimitry Andric #define TEST_REDUCTION_METHOD(packed_reduction_method, which_reduction_block)  \
5510b57cec5SDimitry Andric   ((UNPACK_REDUCTION_METHOD(packed_reduction_method)) ==                       \
5520b57cec5SDimitry Andric    (which_reduction_block))
5530b57cec5SDimitry Andric 
5540b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER
5550b57cec5SDimitry Andric #define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER                               \
5560b57cec5SDimitry Andric   (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_reduction_barrier))
5570b57cec5SDimitry Andric 
5580b57cec5SDimitry Andric #define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER                                   \
5590b57cec5SDimitry Andric   (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_plain_barrier))
5600b57cec5SDimitry Andric #endif
5610b57cec5SDimitry Andric 
5620b57cec5SDimitry Andric typedef int PACKED_REDUCTION_METHOD_T;
5630b57cec5SDimitry Andric 
5640b57cec5SDimitry Andric /* -- end of fast reduction stuff ----------------------------------------- */
5650b57cec5SDimitry Andric 
5660b57cec5SDimitry Andric #if KMP_OS_WINDOWS
5670b57cec5SDimitry Andric #define USE_CBLKDATA
5680b57cec5SDimitry Andric #if KMP_MSVC_COMPAT
5690b57cec5SDimitry Andric #pragma warning(push)
5700b57cec5SDimitry Andric #pragma warning(disable : 271 310)
5710b57cec5SDimitry Andric #endif
5720b57cec5SDimitry Andric #include <windows.h>
5730b57cec5SDimitry Andric #if KMP_MSVC_COMPAT
5740b57cec5SDimitry Andric #pragma warning(pop)
5750b57cec5SDimitry Andric #endif
5760b57cec5SDimitry Andric #endif
5770b57cec5SDimitry Andric 
5780b57cec5SDimitry Andric #if KMP_OS_UNIX
5790b57cec5SDimitry Andric #include <dlfcn.h>
5800b57cec5SDimitry Andric #include <pthread.h>
5810b57cec5SDimitry Andric #endif
5820b57cec5SDimitry Andric 
5830b57cec5SDimitry Andric /* Only Linux* OS and Windows* OS support thread affinity. */
5840b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
5850b57cec5SDimitry Andric 
5860b57cec5SDimitry Andric // GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
5870b57cec5SDimitry Andric #if KMP_OS_WINDOWS
5880b57cec5SDimitry Andric #if _MSC_VER < 1600 && KMP_MSVC_COMPAT
5890b57cec5SDimitry Andric typedef struct GROUP_AFFINITY {
5900b57cec5SDimitry Andric   KAFFINITY Mask;
5910b57cec5SDimitry Andric   WORD Group;
5920b57cec5SDimitry Andric   WORD Reserved[3];
5930b57cec5SDimitry Andric } GROUP_AFFINITY;
5940b57cec5SDimitry Andric #endif /* _MSC_VER < 1600 */
5950b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
5960b57cec5SDimitry Andric extern int __kmp_num_proc_groups;
5970b57cec5SDimitry Andric #else
5980b57cec5SDimitry Andric static const int __kmp_num_proc_groups = 1;
5990b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */
6000b57cec5SDimitry Andric typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);
6010b57cec5SDimitry Andric extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;
6020b57cec5SDimitry Andric 
6030b57cec5SDimitry Andric typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void);
6040b57cec5SDimitry Andric extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount;
6050b57cec5SDimitry Andric 
6060b57cec5SDimitry Andric typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *);
6070b57cec5SDimitry Andric extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity;
6080b57cec5SDimitry Andric 
6090b57cec5SDimitry Andric typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *,
6100b57cec5SDimitry Andric                                              GROUP_AFFINITY *);
6110b57cec5SDimitry Andric extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
6120b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
6130b57cec5SDimitry Andric 
6140b57cec5SDimitry Andric #if KMP_USE_HWLOC
6150b57cec5SDimitry Andric extern hwloc_topology_t __kmp_hwloc_topology;
6160b57cec5SDimitry Andric extern int __kmp_hwloc_error;
6170b57cec5SDimitry Andric extern int __kmp_numa_detected;
6180b57cec5SDimitry Andric extern int __kmp_tile_depth;
6190b57cec5SDimitry Andric #endif
6200b57cec5SDimitry Andric 
6210b57cec5SDimitry Andric extern size_t __kmp_affin_mask_size;
6220b57cec5SDimitry Andric #define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0)
6230b57cec5SDimitry Andric #define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0)
6240b57cec5SDimitry Andric #define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size)
6250b57cec5SDimitry Andric #define KMP_CPU_SET_ITERATE(i, mask)                                           \
6260b57cec5SDimitry Andric   for (i = (mask)->begin(); (int)i != (mask)->end(); i = (mask)->next(i))
6270b57cec5SDimitry Andric #define KMP_CPU_SET(i, mask) (mask)->set(i)
6280b57cec5SDimitry Andric #define KMP_CPU_ISSET(i, mask) (mask)->is_set(i)
6290b57cec5SDimitry Andric #define KMP_CPU_CLR(i, mask) (mask)->clear(i)
6300b57cec5SDimitry Andric #define KMP_CPU_ZERO(mask) (mask)->zero()
6310b57cec5SDimitry Andric #define KMP_CPU_COPY(dest, src) (dest)->copy(src)
6320b57cec5SDimitry Andric #define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src)
6330b57cec5SDimitry Andric #define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not()
6340b57cec5SDimitry Andric #define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src)
6350b57cec5SDimitry Andric #define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask())
6360b57cec5SDimitry Andric #define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr)
6370b57cec5SDimitry Andric #define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr)
6380b57cec5SDimitry Andric #define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr)
6390b57cec5SDimitry Andric #define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr)
6400b57cec5SDimitry Andric #define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr)
6410b57cec5SDimitry Andric #define KMP_CPU_INDEX(arr, i) __kmp_affinity_dispatch->index_mask_array(arr, i)
6420b57cec5SDimitry Andric #define KMP_CPU_ALLOC_ARRAY(arr, n)                                            \
6430b57cec5SDimitry Andric   (arr = __kmp_affinity_dispatch->allocate_mask_array(n))
6440b57cec5SDimitry Andric #define KMP_CPU_FREE_ARRAY(arr, n)                                             \
6450b57cec5SDimitry Andric   __kmp_affinity_dispatch->deallocate_mask_array(arr)
6460b57cec5SDimitry Andric #define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) KMP_CPU_ALLOC_ARRAY(arr, n)
6470b57cec5SDimitry Andric #define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_CPU_FREE_ARRAY(arr, n)
6480b57cec5SDimitry Andric #define __kmp_get_system_affinity(mask, abort_bool)                            \
6490b57cec5SDimitry Andric   (mask)->get_system_affinity(abort_bool)
6500b57cec5SDimitry Andric #define __kmp_set_system_affinity(mask, abort_bool)                            \
6510b57cec5SDimitry Andric   (mask)->set_system_affinity(abort_bool)
6520b57cec5SDimitry Andric #define __kmp_get_proc_group(mask) (mask)->get_proc_group()
6530b57cec5SDimitry Andric 
6540b57cec5SDimitry Andric class KMPAffinity {
6550b57cec5SDimitry Andric public:
6560b57cec5SDimitry Andric   class Mask {
6570b57cec5SDimitry Andric   public:
6580b57cec5SDimitry Andric     void *operator new(size_t n);
6590b57cec5SDimitry Andric     void operator delete(void *p);
6600b57cec5SDimitry Andric     void *operator new[](size_t n);
6610b57cec5SDimitry Andric     void operator delete[](void *p);
6620b57cec5SDimitry Andric     virtual ~Mask() {}
6630b57cec5SDimitry Andric     // Set bit i to 1
6640b57cec5SDimitry Andric     virtual void set(int i) {}
6650b57cec5SDimitry Andric     // Return bit i
6660b57cec5SDimitry Andric     virtual bool is_set(int i) const { return false; }
6670b57cec5SDimitry Andric     // Set bit i to 0
6680b57cec5SDimitry Andric     virtual void clear(int i) {}
6690b57cec5SDimitry Andric     // Zero out entire mask
6700b57cec5SDimitry Andric     virtual void zero() {}
6710b57cec5SDimitry Andric     // Copy src into this mask
6720b57cec5SDimitry Andric     virtual void copy(const Mask *src) {}
6730b57cec5SDimitry Andric     // this &= rhs
6740b57cec5SDimitry Andric     virtual void bitwise_and(const Mask *rhs) {}
6750b57cec5SDimitry Andric     // this |= rhs
6760b57cec5SDimitry Andric     virtual void bitwise_or(const Mask *rhs) {}
6770b57cec5SDimitry Andric     // this = ~this
6780b57cec5SDimitry Andric     virtual void bitwise_not() {}
6790b57cec5SDimitry Andric     // API for iterating over an affinity mask
6800b57cec5SDimitry Andric     // for (int i = mask->begin(); i != mask->end(); i = mask->next(i))
6810b57cec5SDimitry Andric     virtual int begin() const { return 0; }
6820b57cec5SDimitry Andric     virtual int end() const { return 0; }
6830b57cec5SDimitry Andric     virtual int next(int previous) const { return 0; }
6840b57cec5SDimitry Andric     // Set the system's affinity to this affinity mask's value
6850b57cec5SDimitry Andric     virtual int set_system_affinity(bool abort_on_error) const { return -1; }
6860b57cec5SDimitry Andric     // Set this affinity mask to the current system affinity
6870b57cec5SDimitry Andric     virtual int get_system_affinity(bool abort_on_error) { return -1; }
6880b57cec5SDimitry Andric     // Only 1 DWORD in the mask should have any procs set.
6890b57cec5SDimitry Andric     // Return the appropriate index, or -1 for an invalid mask.
6900b57cec5SDimitry Andric     virtual int get_proc_group() const { return -1; }
6910b57cec5SDimitry Andric   };
6920b57cec5SDimitry Andric   void *operator new(size_t n);
6930b57cec5SDimitry Andric   void operator delete(void *p);
6940b57cec5SDimitry Andric   // Need virtual destructor
6950b57cec5SDimitry Andric   virtual ~KMPAffinity() = default;
6960b57cec5SDimitry Andric   // Determine if affinity is capable
6970b57cec5SDimitry Andric   virtual void determine_capable(const char *env_var) {}
6980b57cec5SDimitry Andric   // Bind the current thread to os proc
6990b57cec5SDimitry Andric   virtual void bind_thread(int proc) {}
7000b57cec5SDimitry Andric   // Factory functions to allocate/deallocate a mask
7010b57cec5SDimitry Andric   virtual Mask *allocate_mask() { return nullptr; }
7020b57cec5SDimitry Andric   virtual void deallocate_mask(Mask *m) {}
7030b57cec5SDimitry Andric   virtual Mask *allocate_mask_array(int num) { return nullptr; }
7040b57cec5SDimitry Andric   virtual void deallocate_mask_array(Mask *m) {}
7050b57cec5SDimitry Andric   virtual Mask *index_mask_array(Mask *m, int index) { return nullptr; }
7060b57cec5SDimitry Andric   static void pick_api();
7070b57cec5SDimitry Andric   static void destroy_api();
7080b57cec5SDimitry Andric   enum api_type {
7090b57cec5SDimitry Andric     NATIVE_OS
7100b57cec5SDimitry Andric #if KMP_USE_HWLOC
7110b57cec5SDimitry Andric     ,
7120b57cec5SDimitry Andric     HWLOC
7130b57cec5SDimitry Andric #endif
7140b57cec5SDimitry Andric   };
7150b57cec5SDimitry Andric   virtual api_type get_api_type() const {
7160b57cec5SDimitry Andric     KMP_ASSERT(0);
7170b57cec5SDimitry Andric     return NATIVE_OS;
7180b57cec5SDimitry Andric   }
7190b57cec5SDimitry Andric 
7200b57cec5SDimitry Andric private:
7210b57cec5SDimitry Andric   static bool picked_api;
7220b57cec5SDimitry Andric };
7230b57cec5SDimitry Andric 
7240b57cec5SDimitry Andric typedef KMPAffinity::Mask kmp_affin_mask_t;
7250b57cec5SDimitry Andric extern KMPAffinity *__kmp_affinity_dispatch;
7260b57cec5SDimitry Andric 
7270b57cec5SDimitry Andric // Declare local char buffers with this size for printing debug and info
7280b57cec5SDimitry Andric // messages, using __kmp_affinity_print_mask().
7290b57cec5SDimitry Andric #define KMP_AFFIN_MASK_PRINT_LEN 1024
7300b57cec5SDimitry Andric 
7310b57cec5SDimitry Andric enum affinity_type {
7320b57cec5SDimitry Andric   affinity_none = 0,
7330b57cec5SDimitry Andric   affinity_physical,
7340b57cec5SDimitry Andric   affinity_logical,
7350b57cec5SDimitry Andric   affinity_compact,
7360b57cec5SDimitry Andric   affinity_scatter,
7370b57cec5SDimitry Andric   affinity_explicit,
7380b57cec5SDimitry Andric   affinity_balanced,
7390b57cec5SDimitry Andric   affinity_disabled, // not used outsize the env var parser
7400b57cec5SDimitry Andric   affinity_default
7410b57cec5SDimitry Andric };
7420b57cec5SDimitry Andric 
7430b57cec5SDimitry Andric enum affinity_gran {
7440b57cec5SDimitry Andric   affinity_gran_fine = 0,
7450b57cec5SDimitry Andric   affinity_gran_thread,
7460b57cec5SDimitry Andric   affinity_gran_core,
7470b57cec5SDimitry Andric   affinity_gran_tile,
7480b57cec5SDimitry Andric   affinity_gran_numa,
7490b57cec5SDimitry Andric   affinity_gran_package,
7500b57cec5SDimitry Andric   affinity_gran_node,
7510b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
7520b57cec5SDimitry Andric   // The "group" granularity isn't necesssarily coarser than all of the
7530b57cec5SDimitry Andric   // other levels, but we put it last in the enum.
7540b57cec5SDimitry Andric   affinity_gran_group,
7550b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */
7560b57cec5SDimitry Andric   affinity_gran_default
7570b57cec5SDimitry Andric };
7580b57cec5SDimitry Andric 
7590b57cec5SDimitry Andric enum affinity_top_method {
7600b57cec5SDimitry Andric   affinity_top_method_all = 0, // try all (supported) methods, in order
7610b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
7620b57cec5SDimitry Andric   affinity_top_method_apicid,
7630b57cec5SDimitry Andric   affinity_top_method_x2apicid,
7640b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
7650b57cec5SDimitry Andric   affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too
7660b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
7670b57cec5SDimitry Andric   affinity_top_method_group,
7680b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */
7690b57cec5SDimitry Andric   affinity_top_method_flat,
7700b57cec5SDimitry Andric #if KMP_USE_HWLOC
7710b57cec5SDimitry Andric   affinity_top_method_hwloc,
7720b57cec5SDimitry Andric #endif
7730b57cec5SDimitry Andric   affinity_top_method_default
7740b57cec5SDimitry Andric };
7750b57cec5SDimitry Andric 
7760b57cec5SDimitry Andric #define affinity_respect_mask_default (-1)
7770b57cec5SDimitry Andric 
7780b57cec5SDimitry Andric extern enum affinity_type __kmp_affinity_type; /* Affinity type */
7790b57cec5SDimitry Andric extern enum affinity_gran __kmp_affinity_gran; /* Affinity granularity */
7800b57cec5SDimitry Andric extern int __kmp_affinity_gran_levels; /* corresponding int value */
7810b57cec5SDimitry Andric extern int __kmp_affinity_dups; /* Affinity duplicate masks */
7820b57cec5SDimitry Andric extern enum affinity_top_method __kmp_affinity_top_method;
7830b57cec5SDimitry Andric extern int __kmp_affinity_compact; /* Affinity 'compact' value */
7840b57cec5SDimitry Andric extern int __kmp_affinity_offset; /* Affinity offset value  */
7850b57cec5SDimitry Andric extern int __kmp_affinity_verbose; /* Was verbose specified for KMP_AFFINITY? */
7860b57cec5SDimitry Andric extern int __kmp_affinity_warnings; /* KMP_AFFINITY warnings enabled ? */
7870b57cec5SDimitry Andric extern int __kmp_affinity_respect_mask; // Respect process' init affinity mask?
7880b57cec5SDimitry Andric extern char *__kmp_affinity_proclist; /* proc ID list */
7890b57cec5SDimitry Andric extern kmp_affin_mask_t *__kmp_affinity_masks;
7900b57cec5SDimitry Andric extern unsigned __kmp_affinity_num_masks;
7910b57cec5SDimitry Andric extern void __kmp_affinity_bind_thread(int which);
7920b57cec5SDimitry Andric 
7930b57cec5SDimitry Andric extern kmp_affin_mask_t *__kmp_affin_fullMask;
7940b57cec5SDimitry Andric extern char *__kmp_cpuinfo_file;
7950b57cec5SDimitry Andric 
7960b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */
7970b57cec5SDimitry Andric 
7980b57cec5SDimitry Andric // This needs to be kept in sync with the values in omp.h !!!
7990b57cec5SDimitry Andric typedef enum kmp_proc_bind_t {
8000b57cec5SDimitry Andric   proc_bind_false = 0,
8010b57cec5SDimitry Andric   proc_bind_true,
8020b57cec5SDimitry Andric   proc_bind_master,
8030b57cec5SDimitry Andric   proc_bind_close,
8040b57cec5SDimitry Andric   proc_bind_spread,
8050b57cec5SDimitry Andric   proc_bind_intel, // use KMP_AFFINITY interface
8060b57cec5SDimitry Andric   proc_bind_default
8070b57cec5SDimitry Andric } kmp_proc_bind_t;
8080b57cec5SDimitry Andric 
8090b57cec5SDimitry Andric typedef struct kmp_nested_proc_bind_t {
8100b57cec5SDimitry Andric   kmp_proc_bind_t *bind_types;
8110b57cec5SDimitry Andric   int size;
8120b57cec5SDimitry Andric   int used;
8130b57cec5SDimitry Andric } kmp_nested_proc_bind_t;
8140b57cec5SDimitry Andric 
8150b57cec5SDimitry Andric extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
8160b57cec5SDimitry Andric 
8170b57cec5SDimitry Andric extern int __kmp_display_affinity;
8180b57cec5SDimitry Andric extern char *__kmp_affinity_format;
8190b57cec5SDimitry Andric static const size_t KMP_AFFINITY_FORMAT_SIZE = 512;
8200b57cec5SDimitry Andric 
8210b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
8220b57cec5SDimitry Andric #define KMP_PLACE_ALL (-1)
8230b57cec5SDimitry Andric #define KMP_PLACE_UNDEFINED (-2)
8240b57cec5SDimitry Andric // Is KMP_AFFINITY is being used instead of OMP_PROC_BIND/OMP_PLACES?
8250b57cec5SDimitry Andric #define KMP_AFFINITY_NON_PROC_BIND                                             \
8260b57cec5SDimitry Andric   ((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false ||                 \
8270b57cec5SDimitry Andric     __kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) &&                \
8280b57cec5SDimitry Andric    (__kmp_affinity_num_masks > 0 || __kmp_affinity_type == affinity_balanced))
8290b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */
8300b57cec5SDimitry Andric 
8310b57cec5SDimitry Andric extern int __kmp_affinity_num_places;
8320b57cec5SDimitry Andric 
8330b57cec5SDimitry Andric typedef enum kmp_cancel_kind_t {
8340b57cec5SDimitry Andric   cancel_noreq = 0,
8350b57cec5SDimitry Andric   cancel_parallel = 1,
8360b57cec5SDimitry Andric   cancel_loop = 2,
8370b57cec5SDimitry Andric   cancel_sections = 3,
8380b57cec5SDimitry Andric   cancel_taskgroup = 4
8390b57cec5SDimitry Andric } kmp_cancel_kind_t;
8400b57cec5SDimitry Andric 
8410b57cec5SDimitry Andric // KMP_HW_SUBSET support:
8420b57cec5SDimitry Andric typedef struct kmp_hws_item {
8430b57cec5SDimitry Andric   int num;
8440b57cec5SDimitry Andric   int offset;
8450b57cec5SDimitry Andric } kmp_hws_item_t;
8460b57cec5SDimitry Andric 
8470b57cec5SDimitry Andric extern kmp_hws_item_t __kmp_hws_socket;
8480b57cec5SDimitry Andric extern kmp_hws_item_t __kmp_hws_node;
8490b57cec5SDimitry Andric extern kmp_hws_item_t __kmp_hws_tile;
8500b57cec5SDimitry Andric extern kmp_hws_item_t __kmp_hws_core;
8510b57cec5SDimitry Andric extern kmp_hws_item_t __kmp_hws_proc;
8520b57cec5SDimitry Andric extern int __kmp_hws_requested;
8530b57cec5SDimitry Andric extern int __kmp_hws_abs_flag; // absolute or per-item number requested
8540b57cec5SDimitry Andric 
8550b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
8560b57cec5SDimitry Andric 
8570b57cec5SDimitry Andric #define KMP_PAD(type, sz)                                                      \
8580b57cec5SDimitry Andric   (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
8590b57cec5SDimitry Andric 
8600b57cec5SDimitry Andric // We need to avoid using -1 as a GTID as +1 is added to the gtid
8610b57cec5SDimitry Andric // when storing it in a lock, and the value 0 is reserved.
8620b57cec5SDimitry Andric #define KMP_GTID_DNE (-2) /* Does not exist */
8630b57cec5SDimitry Andric #define KMP_GTID_SHUTDOWN (-3) /* Library is shutting down */
8640b57cec5SDimitry Andric #define KMP_GTID_MONITOR (-4) /* Monitor thread ID */
8650b57cec5SDimitry Andric #define KMP_GTID_UNKNOWN (-5) /* Is not known */
8660b57cec5SDimitry Andric #define KMP_GTID_MIN (-6) /* Minimal gtid for low bound check in DEBUG */
8670b57cec5SDimitry Andric 
8680b57cec5SDimitry Andric /* OpenMP 5.0 Memory Management support */
8690b57cec5SDimitry Andric 
8700b57cec5SDimitry Andric #ifndef __OMP_H
871480093f4SDimitry Andric // Duplicate type definitions from omp.h
8720b57cec5SDimitry Andric typedef uintptr_t omp_uintptr_t;
8730b57cec5SDimitry Andric 
8740b57cec5SDimitry Andric typedef enum {
875*5ffd83dbSDimitry Andric   omp_atk_threadmodel = 1,
876*5ffd83dbSDimitry Andric   omp_atk_alignment = 2,
877*5ffd83dbSDimitry Andric   omp_atk_access = 3,
878*5ffd83dbSDimitry Andric   omp_atk_pool_size = 4,
879*5ffd83dbSDimitry Andric   omp_atk_fallback = 5,
880*5ffd83dbSDimitry Andric   omp_atk_fb_data = 6,
881*5ffd83dbSDimitry Andric   omp_atk_pinned = 7,
882*5ffd83dbSDimitry Andric   omp_atk_partition = 8
8830b57cec5SDimitry Andric } omp_alloctrait_key_t;
8840b57cec5SDimitry Andric 
8850b57cec5SDimitry Andric typedef enum {
886*5ffd83dbSDimitry Andric   omp_atv_false = 0,
887*5ffd83dbSDimitry Andric   omp_atv_true = 1,
888*5ffd83dbSDimitry Andric   omp_atv_default = 2,
889*5ffd83dbSDimitry Andric   omp_atv_contended = 3,
890*5ffd83dbSDimitry Andric   omp_atv_uncontended = 4,
891*5ffd83dbSDimitry Andric   omp_atv_sequential = 5,
892*5ffd83dbSDimitry Andric   omp_atv_private = 6,
893*5ffd83dbSDimitry Andric   omp_atv_all = 7,
894*5ffd83dbSDimitry Andric   omp_atv_thread = 8,
895*5ffd83dbSDimitry Andric   omp_atv_pteam = 9,
896*5ffd83dbSDimitry Andric   omp_atv_cgroup = 10,
897*5ffd83dbSDimitry Andric   omp_atv_default_mem_fb = 11,
898*5ffd83dbSDimitry Andric   omp_atv_null_fb = 12,
899*5ffd83dbSDimitry Andric   omp_atv_abort_fb = 13,
900*5ffd83dbSDimitry Andric   omp_atv_allocator_fb = 14,
901*5ffd83dbSDimitry Andric   omp_atv_environment = 15,
902*5ffd83dbSDimitry Andric   omp_atv_nearest = 16,
903*5ffd83dbSDimitry Andric   omp_atv_blocked = 17,
904*5ffd83dbSDimitry Andric   omp_atv_interleaved = 18
9050b57cec5SDimitry Andric } omp_alloctrait_value_t;
9060b57cec5SDimitry Andric 
9070b57cec5SDimitry Andric typedef void *omp_memspace_handle_t;
9080b57cec5SDimitry Andric extern omp_memspace_handle_t const omp_default_mem_space;
9090b57cec5SDimitry Andric extern omp_memspace_handle_t const omp_large_cap_mem_space;
9100b57cec5SDimitry Andric extern omp_memspace_handle_t const omp_const_mem_space;
9110b57cec5SDimitry Andric extern omp_memspace_handle_t const omp_high_bw_mem_space;
9120b57cec5SDimitry Andric extern omp_memspace_handle_t const omp_low_lat_mem_space;
9130b57cec5SDimitry Andric 
9140b57cec5SDimitry Andric typedef struct {
9150b57cec5SDimitry Andric   omp_alloctrait_key_t key;
9160b57cec5SDimitry Andric   omp_uintptr_t value;
9170b57cec5SDimitry Andric } omp_alloctrait_t;
9180b57cec5SDimitry Andric 
9190b57cec5SDimitry Andric typedef void *omp_allocator_handle_t;
9200b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_null_allocator;
9210b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_default_mem_alloc;
9220b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_large_cap_mem_alloc;
9230b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_const_mem_alloc;
9240b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_high_bw_mem_alloc;
9250b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_low_lat_mem_alloc;
9260b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_cgroup_mem_alloc;
9270b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_pteam_mem_alloc;
9280b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_thread_mem_alloc;
9290b57cec5SDimitry Andric extern omp_allocator_handle_t const kmp_max_mem_alloc;
9300b57cec5SDimitry Andric extern omp_allocator_handle_t __kmp_def_allocator;
9310b57cec5SDimitry Andric 
932480093f4SDimitry Andric // end of duplicate type definitions from omp.h
9330b57cec5SDimitry Andric #endif
9340b57cec5SDimitry Andric 
9350b57cec5SDimitry Andric extern int __kmp_memkind_available;
9360b57cec5SDimitry Andric 
9370b57cec5SDimitry Andric typedef omp_memspace_handle_t kmp_memspace_t; // placeholder
9380b57cec5SDimitry Andric 
9390b57cec5SDimitry Andric typedef struct kmp_allocator_t {
9400b57cec5SDimitry Andric   omp_memspace_handle_t memspace;
9410b57cec5SDimitry Andric   void **memkind; // pointer to memkind
9420b57cec5SDimitry Andric   int alignment;
9430b57cec5SDimitry Andric   omp_alloctrait_value_t fb;
9440b57cec5SDimitry Andric   kmp_allocator_t *fb_data;
9450b57cec5SDimitry Andric   kmp_uint64 pool_size;
9460b57cec5SDimitry Andric   kmp_uint64 pool_used;
9470b57cec5SDimitry Andric } kmp_allocator_t;
9480b57cec5SDimitry Andric 
9490b57cec5SDimitry Andric extern omp_allocator_handle_t __kmpc_init_allocator(int gtid,
9500b57cec5SDimitry Andric                                                     omp_memspace_handle_t,
9510b57cec5SDimitry Andric                                                     int ntraits,
9520b57cec5SDimitry Andric                                                     omp_alloctrait_t traits[]);
9530b57cec5SDimitry Andric extern void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al);
9540b57cec5SDimitry Andric extern void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t al);
9550b57cec5SDimitry Andric extern omp_allocator_handle_t __kmpc_get_default_allocator(int gtid);
9560b57cec5SDimitry Andric extern void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
9570b57cec5SDimitry Andric extern void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
9580b57cec5SDimitry Andric 
9590b57cec5SDimitry Andric extern void __kmp_init_memkind();
9600b57cec5SDimitry Andric extern void __kmp_fini_memkind();
9610b57cec5SDimitry Andric 
9620b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
9630b57cec5SDimitry Andric 
9640b57cec5SDimitry Andric #define KMP_UINT64_MAX                                                         \
9650b57cec5SDimitry Andric   (~((kmp_uint64)1 << ((sizeof(kmp_uint64) * (1 << 3)) - 1)))
9660b57cec5SDimitry Andric 
9670b57cec5SDimitry Andric #define KMP_MIN_NTH 1
9680b57cec5SDimitry Andric 
9690b57cec5SDimitry Andric #ifndef KMP_MAX_NTH
9700b57cec5SDimitry Andric #if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX
9710b57cec5SDimitry Andric #define KMP_MAX_NTH PTHREAD_THREADS_MAX
9720b57cec5SDimitry Andric #else
9730b57cec5SDimitry Andric #define KMP_MAX_NTH INT_MAX
9740b57cec5SDimitry Andric #endif
9750b57cec5SDimitry Andric #endif /* KMP_MAX_NTH */
9760b57cec5SDimitry Andric 
9770b57cec5SDimitry Andric #ifdef PTHREAD_STACK_MIN
9780b57cec5SDimitry Andric #define KMP_MIN_STKSIZE PTHREAD_STACK_MIN
9790b57cec5SDimitry Andric #else
9800b57cec5SDimitry Andric #define KMP_MIN_STKSIZE ((size_t)(32 * 1024))
9810b57cec5SDimitry Andric #endif
9820b57cec5SDimitry Andric 
9830b57cec5SDimitry Andric #define KMP_MAX_STKSIZE (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
9840b57cec5SDimitry Andric 
9850b57cec5SDimitry Andric #if KMP_ARCH_X86
9860b57cec5SDimitry Andric #define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024))
9870b57cec5SDimitry Andric #elif KMP_ARCH_X86_64
9880b57cec5SDimitry Andric #define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))
9890b57cec5SDimitry Andric #define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024))
9900b57cec5SDimitry Andric #else
9910b57cec5SDimitry Andric #define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024))
9920b57cec5SDimitry Andric #endif
9930b57cec5SDimitry Andric 
9940b57cec5SDimitry Andric #define KMP_DEFAULT_MALLOC_POOL_INCR ((size_t)(1024 * 1024))
9950b57cec5SDimitry Andric #define KMP_MIN_MALLOC_POOL_INCR ((size_t)(4 * 1024))
9960b57cec5SDimitry Andric #define KMP_MAX_MALLOC_POOL_INCR                                               \
9970b57cec5SDimitry Andric   (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
9980b57cec5SDimitry Andric 
9990b57cec5SDimitry Andric #define KMP_MIN_STKOFFSET (0)
10000b57cec5SDimitry Andric #define KMP_MAX_STKOFFSET KMP_MAX_STKSIZE
10010b57cec5SDimitry Andric #if KMP_OS_DARWIN
10020b57cec5SDimitry Andric #define KMP_DEFAULT_STKOFFSET KMP_MIN_STKOFFSET
10030b57cec5SDimitry Andric #else
10040b57cec5SDimitry Andric #define KMP_DEFAULT_STKOFFSET CACHE_LINE
10050b57cec5SDimitry Andric #endif
10060b57cec5SDimitry Andric 
10070b57cec5SDimitry Andric #define KMP_MIN_STKPADDING (0)
10080b57cec5SDimitry Andric #define KMP_MAX_STKPADDING (2 * 1024 * 1024)
10090b57cec5SDimitry Andric 
10100b57cec5SDimitry Andric #define KMP_BLOCKTIME_MULTIPLIER                                               \
10110b57cec5SDimitry Andric   (1000) /* number of blocktime units per second */
10120b57cec5SDimitry Andric #define KMP_MIN_BLOCKTIME (0)
10130b57cec5SDimitry Andric #define KMP_MAX_BLOCKTIME                                                      \
10140b57cec5SDimitry Andric   (INT_MAX) /* Must be this for "infinite" setting the work */
10150b57cec5SDimitry Andric #define KMP_DEFAULT_BLOCKTIME (200) /*  __kmp_blocktime is in milliseconds  */
10160b57cec5SDimitry Andric 
10170b57cec5SDimitry Andric #if KMP_USE_MONITOR
10180b57cec5SDimitry Andric #define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024))
10190b57cec5SDimitry Andric #define KMP_MIN_MONITOR_WAKEUPS (1) // min times monitor wakes up per second
10200b57cec5SDimitry Andric #define KMP_MAX_MONITOR_WAKEUPS (1000) // max times monitor can wake up per sec
10210b57cec5SDimitry Andric 
10220b57cec5SDimitry Andric /* Calculate new number of monitor wakeups for a specific block time based on
10230b57cec5SDimitry Andric    previous monitor_wakeups. Only allow increasing number of wakeups */
10240b57cec5SDimitry Andric #define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups)                 \
10250b57cec5SDimitry Andric   (((blocktime) == KMP_MAX_BLOCKTIME)                                          \
10260b57cec5SDimitry Andric        ? (monitor_wakeups)                                                     \
10270b57cec5SDimitry Andric        : ((blocktime) == KMP_MIN_BLOCKTIME)                                    \
10280b57cec5SDimitry Andric              ? KMP_MAX_MONITOR_WAKEUPS                                         \
10290b57cec5SDimitry Andric              : ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime)))  \
10300b57cec5SDimitry Andric                    ? (monitor_wakeups)                                         \
10310b57cec5SDimitry Andric                    : (KMP_BLOCKTIME_MULTIPLIER) / (blocktime))
10320b57cec5SDimitry Andric 
10330b57cec5SDimitry Andric /* Calculate number of intervals for a specific block time based on
10340b57cec5SDimitry Andric    monitor_wakeups */
10350b57cec5SDimitry Andric #define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups)               \
10360b57cec5SDimitry Andric   (((blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1) /        \
10370b57cec5SDimitry Andric    (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)))
10380b57cec5SDimitry Andric #else
10390b57cec5SDimitry Andric #define KMP_BLOCKTIME(team, tid)                                               \
10400b57cec5SDimitry Andric   (get__bt_set(team, tid) ? get__blocktime(team, tid) : __kmp_dflt_blocktime)
10410b57cec5SDimitry Andric #if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
10420b57cec5SDimitry Andric // HW TSC is used to reduce overhead (clock tick instead of nanosecond).
10430b57cec5SDimitry Andric extern kmp_uint64 __kmp_ticks_per_msec;
10440b57cec5SDimitry Andric #if KMP_COMPILER_ICC
10450b57cec5SDimitry Andric #define KMP_NOW() ((kmp_uint64)_rdtsc())
10460b57cec5SDimitry Andric #else
10470b57cec5SDimitry Andric #define KMP_NOW() __kmp_hardware_timestamp()
10480b57cec5SDimitry Andric #endif
10490b57cec5SDimitry Andric #define KMP_NOW_MSEC() (KMP_NOW() / __kmp_ticks_per_msec)
10500b57cec5SDimitry Andric #define KMP_BLOCKTIME_INTERVAL(team, tid)                                      \
10510b57cec5SDimitry Andric   (KMP_BLOCKTIME(team, tid) * __kmp_ticks_per_msec)
10520b57cec5SDimitry Andric #define KMP_BLOCKING(goal, count) ((goal) > KMP_NOW())
10530b57cec5SDimitry Andric #else
10540b57cec5SDimitry Andric // System time is retrieved sporadically while blocking.
10550b57cec5SDimitry Andric extern kmp_uint64 __kmp_now_nsec();
10560b57cec5SDimitry Andric #define KMP_NOW() __kmp_now_nsec()
10570b57cec5SDimitry Andric #define KMP_NOW_MSEC() (KMP_NOW() / KMP_USEC_PER_SEC)
10580b57cec5SDimitry Andric #define KMP_BLOCKTIME_INTERVAL(team, tid)                                      \
10590b57cec5SDimitry Andric   (KMP_BLOCKTIME(team, tid) * KMP_USEC_PER_SEC)
10600b57cec5SDimitry Andric #define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW())
10610b57cec5SDimitry Andric #endif
10620b57cec5SDimitry Andric #endif // KMP_USE_MONITOR
10630b57cec5SDimitry Andric 
10640b57cec5SDimitry Andric #define KMP_MIN_STATSCOLS 40
10650b57cec5SDimitry Andric #define KMP_MAX_STATSCOLS 4096
10660b57cec5SDimitry Andric #define KMP_DEFAULT_STATSCOLS 80
10670b57cec5SDimitry Andric 
10680b57cec5SDimitry Andric #define KMP_MIN_INTERVAL 0
10690b57cec5SDimitry Andric #define KMP_MAX_INTERVAL (INT_MAX - 1)
10700b57cec5SDimitry Andric #define KMP_DEFAULT_INTERVAL 0
10710b57cec5SDimitry Andric 
10720b57cec5SDimitry Andric #define KMP_MIN_CHUNK 1
10730b57cec5SDimitry Andric #define KMP_MAX_CHUNK (INT_MAX - 1)
10740b57cec5SDimitry Andric #define KMP_DEFAULT_CHUNK 1
10750b57cec5SDimitry Andric 
10760b57cec5SDimitry Andric #define KMP_DFLT_DISP_NUM_BUFF 7
10770b57cec5SDimitry Andric #define KMP_MAX_ORDERED 8
10780b57cec5SDimitry Andric 
10790b57cec5SDimitry Andric #define KMP_MAX_FIELDS 32
10800b57cec5SDimitry Andric 
10810b57cec5SDimitry Andric #define KMP_MAX_BRANCH_BITS 31
10820b57cec5SDimitry Andric 
10830b57cec5SDimitry Andric #define KMP_MAX_ACTIVE_LEVELS_LIMIT INT_MAX
10840b57cec5SDimitry Andric 
10850b57cec5SDimitry Andric #define KMP_MAX_DEFAULT_DEVICE_LIMIT INT_MAX
10860b57cec5SDimitry Andric 
10870b57cec5SDimitry Andric #define KMP_MAX_TASK_PRIORITY_LIMIT INT_MAX
10880b57cec5SDimitry Andric 
10890b57cec5SDimitry Andric /* Minimum number of threads before switch to TLS gtid (experimentally
10900b57cec5SDimitry Andric    determined) */
10910b57cec5SDimitry Andric /* josh TODO: what about OS X* tuning? */
10920b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
10930b57cec5SDimitry Andric #define KMP_TLS_GTID_MIN 5
10940b57cec5SDimitry Andric #else
10950b57cec5SDimitry Andric #define KMP_TLS_GTID_MIN INT_MAX
10960b57cec5SDimitry Andric #endif
10970b57cec5SDimitry Andric 
10980b57cec5SDimitry Andric #define KMP_MASTER_TID(tid) ((tid) == 0)
10990b57cec5SDimitry Andric #define KMP_WORKER_TID(tid) ((tid) != 0)
11000b57cec5SDimitry Andric 
11010b57cec5SDimitry Andric #define KMP_MASTER_GTID(gtid) (__kmp_tid_from_gtid((gtid)) == 0)
11020b57cec5SDimitry Andric #define KMP_WORKER_GTID(gtid) (__kmp_tid_from_gtid((gtid)) != 0)
11030b57cec5SDimitry Andric #define KMP_INITIAL_GTID(gtid) ((gtid) == 0)
11040b57cec5SDimitry Andric 
11050b57cec5SDimitry Andric #ifndef TRUE
11060b57cec5SDimitry Andric #define FALSE 0
11070b57cec5SDimitry Andric #define TRUE (!FALSE)
11080b57cec5SDimitry Andric #endif
11090b57cec5SDimitry Andric 
11100b57cec5SDimitry Andric /* NOTE: all of the following constants must be even */
11110b57cec5SDimitry Andric 
11120b57cec5SDimitry Andric #if KMP_OS_WINDOWS
11130b57cec5SDimitry Andric #define KMP_INIT_WAIT 64U /* initial number of spin-tests   */
11140b57cec5SDimitry Andric #define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */
11150b57cec5SDimitry Andric #elif KMP_OS_CNK
11160b57cec5SDimitry Andric #define KMP_INIT_WAIT 16U /* initial number of spin-tests   */
11170b57cec5SDimitry Andric #define KMP_NEXT_WAIT 8U /* susequent number of spin-tests */
11180b57cec5SDimitry Andric #elif KMP_OS_LINUX
11190b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
11200b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
11210b57cec5SDimitry Andric #elif KMP_OS_DARWIN
11220b57cec5SDimitry Andric /* TODO: tune for KMP_OS_DARWIN */
11230b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
11240b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
11250b57cec5SDimitry Andric #elif KMP_OS_DRAGONFLY
11260b57cec5SDimitry Andric /* TODO: tune for KMP_OS_DRAGONFLY */
11270b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
11280b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
11290b57cec5SDimitry Andric #elif KMP_OS_FREEBSD
11300b57cec5SDimitry Andric /* TODO: tune for KMP_OS_FREEBSD */
11310b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
11320b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
11330b57cec5SDimitry Andric #elif KMP_OS_NETBSD
11340b57cec5SDimitry Andric /* TODO: tune for KMP_OS_NETBSD */
11350b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
11360b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
11370b57cec5SDimitry Andric #elif KMP_OS_HURD
11380b57cec5SDimitry Andric /* TODO: tune for KMP_OS_HURD */
11390b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
11400b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
11410b57cec5SDimitry Andric #elif KMP_OS_OPENBSD
11420b57cec5SDimitry Andric /* TODO: tune for KMP_OS_OPENBSD */
11430b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
11440b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
11450b57cec5SDimitry Andric #endif
11460b57cec5SDimitry Andric 
11470b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
11480b57cec5SDimitry Andric typedef struct kmp_cpuid {
11490b57cec5SDimitry Andric   kmp_uint32 eax;
11500b57cec5SDimitry Andric   kmp_uint32 ebx;
11510b57cec5SDimitry Andric   kmp_uint32 ecx;
11520b57cec5SDimitry Andric   kmp_uint32 edx;
11530b57cec5SDimitry Andric } kmp_cpuid_t;
11540b57cec5SDimitry Andric 
11550b57cec5SDimitry Andric typedef struct kmp_cpuinfo {
11560b57cec5SDimitry Andric   int initialized; // If 0, other fields are not initialized.
11570b57cec5SDimitry Andric   int signature; // CPUID(1).EAX
11580b57cec5SDimitry Andric   int family; // CPUID(1).EAX[27:20]+CPUID(1).EAX[11:8] (Extended Family+Family)
11590b57cec5SDimitry Andric   int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended
11600b57cec5SDimitry Andric   // Model << 4 ) + Model)
11610b57cec5SDimitry Andric   int stepping; // CPUID(1).EAX[3:0] ( Stepping )
11620b57cec5SDimitry Andric   int sse2; // 0 if SSE2 instructions are not supported, 1 otherwise.
11630b57cec5SDimitry Andric   int rtm; // 0 if RTM instructions are not supported, 1 otherwise.
11640b57cec5SDimitry Andric   int cpu_stackoffset;
11650b57cec5SDimitry Andric   int apic_id;
11660b57cec5SDimitry Andric   int physical_id;
11670b57cec5SDimitry Andric   int logical_id;
11680b57cec5SDimitry Andric   kmp_uint64 frequency; // Nominal CPU frequency in Hz.
11690b57cec5SDimitry Andric   char name[3 * sizeof(kmp_cpuid_t)]; // CPUID(0x80000002,0x80000003,0x80000004)
11700b57cec5SDimitry Andric } kmp_cpuinfo_t;
11710b57cec5SDimitry Andric 
11720b57cec5SDimitry Andric extern void __kmp_query_cpuid(kmp_cpuinfo_t *p);
11730b57cec5SDimitry Andric 
11740b57cec5SDimitry Andric #if KMP_OS_UNIX
11750b57cec5SDimitry Andric // subleaf is only needed for cache and topology discovery and can be set to
11760b57cec5SDimitry Andric // zero in most cases
11770b57cec5SDimitry Andric static inline void __kmp_x86_cpuid(int leaf, int subleaf, struct kmp_cpuid *p) {
11780b57cec5SDimitry Andric   __asm__ __volatile__("cpuid"
11790b57cec5SDimitry Andric                        : "=a"(p->eax), "=b"(p->ebx), "=c"(p->ecx), "=d"(p->edx)
11800b57cec5SDimitry Andric                        : "a"(leaf), "c"(subleaf));
11810b57cec5SDimitry Andric }
11820b57cec5SDimitry Andric // Load p into FPU control word
11830b57cec5SDimitry Andric static inline void __kmp_load_x87_fpu_control_word(const kmp_int16 *p) {
11840b57cec5SDimitry Andric   __asm__ __volatile__("fldcw %0" : : "m"(*p));
11850b57cec5SDimitry Andric }
11860b57cec5SDimitry Andric // Store FPU control word into p
11870b57cec5SDimitry Andric static inline void __kmp_store_x87_fpu_control_word(kmp_int16 *p) {
11880b57cec5SDimitry Andric   __asm__ __volatile__("fstcw %0" : "=m"(*p));
11890b57cec5SDimitry Andric }
11900b57cec5SDimitry Andric static inline void __kmp_clear_x87_fpu_status_word() {
11910b57cec5SDimitry Andric #if KMP_MIC
11920b57cec5SDimitry Andric   // 32-bit protected mode x87 FPU state
11930b57cec5SDimitry Andric   struct x87_fpu_state {
11940b57cec5SDimitry Andric     unsigned cw;
11950b57cec5SDimitry Andric     unsigned sw;
11960b57cec5SDimitry Andric     unsigned tw;
11970b57cec5SDimitry Andric     unsigned fip;
11980b57cec5SDimitry Andric     unsigned fips;
11990b57cec5SDimitry Andric     unsigned fdp;
12000b57cec5SDimitry Andric     unsigned fds;
12010b57cec5SDimitry Andric   };
12020b57cec5SDimitry Andric   struct x87_fpu_state fpu_state = {0, 0, 0, 0, 0, 0, 0};
12030b57cec5SDimitry Andric   __asm__ __volatile__("fstenv %0\n\t" // store FP env
12040b57cec5SDimitry Andric                        "andw $0x7f00, %1\n\t" // clear 0-7,15 bits of FP SW
12050b57cec5SDimitry Andric                        "fldenv %0\n\t" // load FP env back
12060b57cec5SDimitry Andric                        : "+m"(fpu_state), "+m"(fpu_state.sw));
12070b57cec5SDimitry Andric #else
12080b57cec5SDimitry Andric   __asm__ __volatile__("fnclex");
12090b57cec5SDimitry Andric #endif // KMP_MIC
12100b57cec5SDimitry Andric }
12110b57cec5SDimitry Andric #if __SSE__
12120b57cec5SDimitry Andric static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); }
12130b57cec5SDimitry Andric static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
12140b57cec5SDimitry Andric #else
12150b57cec5SDimitry Andric static inline void __kmp_load_mxcsr(const kmp_uint32 *p) {}
12160b57cec5SDimitry Andric static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = 0; }
12170b57cec5SDimitry Andric #endif
12180b57cec5SDimitry Andric #else
12190b57cec5SDimitry Andric // Windows still has these as external functions in assembly file
12200b57cec5SDimitry Andric extern void __kmp_x86_cpuid(int mode, int mode2, struct kmp_cpuid *p);
12210b57cec5SDimitry Andric extern void __kmp_load_x87_fpu_control_word(const kmp_int16 *p);
12220b57cec5SDimitry Andric extern void __kmp_store_x87_fpu_control_word(kmp_int16 *p);
12230b57cec5SDimitry Andric extern void __kmp_clear_x87_fpu_status_word();
12240b57cec5SDimitry Andric static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); }
12250b57cec5SDimitry Andric static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
12260b57cec5SDimitry Andric #endif // KMP_OS_UNIX
12270b57cec5SDimitry Andric 
12280b57cec5SDimitry Andric #define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */
12290b57cec5SDimitry Andric 
12300b57cec5SDimitry Andric #if KMP_ARCH_X86
12310b57cec5SDimitry Andric extern void __kmp_x86_pause(void);
12320b57cec5SDimitry Andric #elif KMP_MIC
12330b57cec5SDimitry Andric // Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed
12340b57cec5SDimitry Andric // regression after removal of extra PAUSE from spin loops. Changing
12350b57cec5SDimitry Andric // the delay from 100 to 300 showed even better performance than double PAUSE
12360b57cec5SDimitry Andric // on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC.
12370b57cec5SDimitry Andric static inline void __kmp_x86_pause(void) { _mm_delay_32(300); }
12380b57cec5SDimitry Andric #else
12390b57cec5SDimitry Andric static inline void __kmp_x86_pause(void) { _mm_pause(); }
12400b57cec5SDimitry Andric #endif
12410b57cec5SDimitry Andric #define KMP_CPU_PAUSE() __kmp_x86_pause()
12420b57cec5SDimitry Andric #elif KMP_ARCH_PPC64
12430b57cec5SDimitry Andric #define KMP_PPC64_PRI_LOW() __asm__ volatile("or 1, 1, 1")
12440b57cec5SDimitry Andric #define KMP_PPC64_PRI_MED() __asm__ volatile("or 2, 2, 2")
12450b57cec5SDimitry Andric #define KMP_PPC64_PRI_LOC_MB() __asm__ volatile("" : : : "memory")
12460b57cec5SDimitry Andric #define KMP_CPU_PAUSE()                                                        \
12470b57cec5SDimitry Andric   do {                                                                         \
12480b57cec5SDimitry Andric     KMP_PPC64_PRI_LOW();                                                       \
12490b57cec5SDimitry Andric     KMP_PPC64_PRI_MED();                                                       \
12500b57cec5SDimitry Andric     KMP_PPC64_PRI_LOC_MB();                                                    \
12510b57cec5SDimitry Andric   } while (0)
12520b57cec5SDimitry Andric #else
12530b57cec5SDimitry Andric #define KMP_CPU_PAUSE() /* nothing to do */
12540b57cec5SDimitry Andric #endif
12550b57cec5SDimitry Andric 
12560b57cec5SDimitry Andric #define KMP_INIT_YIELD(count)                                                  \
12570b57cec5SDimitry Andric   { (count) = __kmp_yield_init; }
12580b57cec5SDimitry Andric 
12590b57cec5SDimitry Andric #define KMP_OVERSUBSCRIBED                                                     \
12600b57cec5SDimitry Andric   (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))
12610b57cec5SDimitry Andric 
12620b57cec5SDimitry Andric #define KMP_TRY_YIELD                                                          \
12630b57cec5SDimitry Andric   ((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (KMP_OVERSUBSCRIBED)))
12640b57cec5SDimitry Andric 
12650b57cec5SDimitry Andric #define KMP_TRY_YIELD_OVERSUB                                                  \
12660b57cec5SDimitry Andric   ((__kmp_use_yield == 1 || __kmp_use_yield == 2) && (KMP_OVERSUBSCRIBED))
12670b57cec5SDimitry Andric 
12680b57cec5SDimitry Andric #define KMP_YIELD(cond)                                                        \
12690b57cec5SDimitry Andric   {                                                                            \
12700b57cec5SDimitry Andric     KMP_CPU_PAUSE();                                                           \
12710b57cec5SDimitry Andric     if ((cond) && (KMP_TRY_YIELD))                                             \
12720b57cec5SDimitry Andric       __kmp_yield();                                                           \
12730b57cec5SDimitry Andric   }
12740b57cec5SDimitry Andric 
12750b57cec5SDimitry Andric #define KMP_YIELD_OVERSUB()                                                    \
12760b57cec5SDimitry Andric   {                                                                            \
12770b57cec5SDimitry Andric     KMP_CPU_PAUSE();                                                           \
12780b57cec5SDimitry Andric     if ((KMP_TRY_YIELD_OVERSUB))                                               \
12790b57cec5SDimitry Andric       __kmp_yield();                                                           \
12800b57cec5SDimitry Andric   }
12810b57cec5SDimitry Andric 
12820b57cec5SDimitry Andric // Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround,
12830b57cec5SDimitry Andric // there should be no yielding since initial value from KMP_INIT_YIELD() is odd.
12840b57cec5SDimitry Andric #define KMP_YIELD_SPIN(count)                                                  \
12850b57cec5SDimitry Andric   {                                                                            \
12860b57cec5SDimitry Andric     KMP_CPU_PAUSE();                                                           \
12870b57cec5SDimitry Andric     if (KMP_TRY_YIELD) {                                                       \
12880b57cec5SDimitry Andric       (count) -= 2;                                                            \
12890b57cec5SDimitry Andric       if (!(count)) {                                                          \
12900b57cec5SDimitry Andric         __kmp_yield();                                                         \
12910b57cec5SDimitry Andric         (count) = __kmp_yield_next;                                            \
12920b57cec5SDimitry Andric       }                                                                        \
12930b57cec5SDimitry Andric     }                                                                          \
12940b57cec5SDimitry Andric   }
12950b57cec5SDimitry Andric 
12960b57cec5SDimitry Andric #define KMP_YIELD_OVERSUB_ELSE_SPIN(count)                                     \
12970b57cec5SDimitry Andric   {                                                                            \
12980b57cec5SDimitry Andric     KMP_CPU_PAUSE();                                                           \
12990b57cec5SDimitry Andric     if ((KMP_TRY_YIELD_OVERSUB))                                               \
13000b57cec5SDimitry Andric       __kmp_yield();                                                           \
13010b57cec5SDimitry Andric     else if (__kmp_use_yield == 1) {                                           \
13020b57cec5SDimitry Andric       (count) -= 2;                                                            \
13030b57cec5SDimitry Andric       if (!(count)) {                                                          \
13040b57cec5SDimitry Andric         __kmp_yield();                                                         \
13050b57cec5SDimitry Andric         (count) = __kmp_yield_next;                                            \
13060b57cec5SDimitry Andric       }                                                                        \
13070b57cec5SDimitry Andric     }                                                                          \
13080b57cec5SDimitry Andric   }
13090b57cec5SDimitry Andric 
13100b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
13110b57cec5SDimitry Andric /* Support datatypes for the orphaned construct nesting checks.             */
13120b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
13130b57cec5SDimitry Andric 
13140b57cec5SDimitry Andric enum cons_type {
13150b57cec5SDimitry Andric   ct_none,
13160b57cec5SDimitry Andric   ct_parallel,
13170b57cec5SDimitry Andric   ct_pdo,
13180b57cec5SDimitry Andric   ct_pdo_ordered,
13190b57cec5SDimitry Andric   ct_psections,
13200b57cec5SDimitry Andric   ct_psingle,
13210b57cec5SDimitry Andric   ct_critical,
13220b57cec5SDimitry Andric   ct_ordered_in_parallel,
13230b57cec5SDimitry Andric   ct_ordered_in_pdo,
13240b57cec5SDimitry Andric   ct_master,
13250b57cec5SDimitry Andric   ct_reduce,
13260b57cec5SDimitry Andric   ct_barrier
13270b57cec5SDimitry Andric };
13280b57cec5SDimitry Andric 
13290b57cec5SDimitry Andric #define IS_CONS_TYPE_ORDERED(ct) ((ct) == ct_pdo_ordered)
13300b57cec5SDimitry Andric 
13310b57cec5SDimitry Andric struct cons_data {
13320b57cec5SDimitry Andric   ident_t const *ident;
13330b57cec5SDimitry Andric   enum cons_type type;
13340b57cec5SDimitry Andric   int prev;
13350b57cec5SDimitry Andric   kmp_user_lock_p
13360b57cec5SDimitry Andric       name; /* address exclusively for critical section name comparison */
13370b57cec5SDimitry Andric };
13380b57cec5SDimitry Andric 
13390b57cec5SDimitry Andric struct cons_header {
13400b57cec5SDimitry Andric   int p_top, w_top, s_top;
13410b57cec5SDimitry Andric   int stack_size, stack_top;
13420b57cec5SDimitry Andric   struct cons_data *stack_data;
13430b57cec5SDimitry Andric };
13440b57cec5SDimitry Andric 
13450b57cec5SDimitry Andric struct kmp_region_info {
13460b57cec5SDimitry Andric   char *text;
13470b57cec5SDimitry Andric   int offset[KMP_MAX_FIELDS];
13480b57cec5SDimitry Andric   int length[KMP_MAX_FIELDS];
13490b57cec5SDimitry Andric };
13500b57cec5SDimitry Andric 
13510b57cec5SDimitry Andric /* ---------------------------------------------------------------------- */
13520b57cec5SDimitry Andric /* ---------------------------------------------------------------------- */
13530b57cec5SDimitry Andric 
13540b57cec5SDimitry Andric #if KMP_OS_WINDOWS
13550b57cec5SDimitry Andric typedef HANDLE kmp_thread_t;
13560b57cec5SDimitry Andric typedef DWORD kmp_key_t;
13570b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
13580b57cec5SDimitry Andric 
13590b57cec5SDimitry Andric #if KMP_OS_UNIX
13600b57cec5SDimitry Andric typedef pthread_t kmp_thread_t;
13610b57cec5SDimitry Andric typedef pthread_key_t kmp_key_t;
13620b57cec5SDimitry Andric #endif
13630b57cec5SDimitry Andric 
13640b57cec5SDimitry Andric extern kmp_key_t __kmp_gtid_threadprivate_key;
13650b57cec5SDimitry Andric 
13660b57cec5SDimitry Andric typedef struct kmp_sys_info {
13670b57cec5SDimitry Andric   long maxrss; /* the maximum resident set size utilized (in kilobytes)     */
13680b57cec5SDimitry Andric   long minflt; /* the number of page faults serviced without any I/O        */
13690b57cec5SDimitry Andric   long majflt; /* the number of page faults serviced that required I/O      */
13700b57cec5SDimitry Andric   long nswap; /* the number of times a process was "swapped" out of memory */
13710b57cec5SDimitry Andric   long inblock; /* the number of times the file system had to perform input  */
13720b57cec5SDimitry Andric   long oublock; /* the number of times the file system had to perform output */
13730b57cec5SDimitry Andric   long nvcsw; /* the number of times a context switch was voluntarily      */
13740b57cec5SDimitry Andric   long nivcsw; /* the number of times a context switch was forced           */
13750b57cec5SDimitry Andric } kmp_sys_info_t;
13760b57cec5SDimitry Andric 
13770b57cec5SDimitry Andric #if USE_ITT_BUILD
13780b57cec5SDimitry Andric // We cannot include "kmp_itt.h" due to circular dependency. Declare the only
13790b57cec5SDimitry Andric // required type here. Later we will check the type meets requirements.
13800b57cec5SDimitry Andric typedef int kmp_itt_mark_t;
13810b57cec5SDimitry Andric #define KMP_ITT_DEBUG 0
13820b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
13830b57cec5SDimitry Andric 
13840b57cec5SDimitry Andric typedef kmp_int32 kmp_critical_name[8];
13850b57cec5SDimitry Andric 
13860b57cec5SDimitry Andric /*!
13870b57cec5SDimitry Andric @ingroup PARALLEL
13880b57cec5SDimitry Andric The type for a microtask which gets passed to @ref __kmpc_fork_call().
13890b57cec5SDimitry Andric The arguments to the outlined function are
13900b57cec5SDimitry Andric @param global_tid the global thread identity of the thread executing the
13910b57cec5SDimitry Andric function.
1392480093f4SDimitry Andric @param bound_tid  the local identity of the thread executing the function
13930b57cec5SDimitry Andric @param ... pointers to shared variables accessed by the function.
13940b57cec5SDimitry Andric */
13950b57cec5SDimitry Andric typedef void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...);
13960b57cec5SDimitry Andric typedef void (*kmpc_micro_bound)(kmp_int32 *bound_tid, kmp_int32 *bound_nth,
13970b57cec5SDimitry Andric                                  ...);
13980b57cec5SDimitry Andric 
13990b57cec5SDimitry Andric /*!
14000b57cec5SDimitry Andric @ingroup THREADPRIVATE
14010b57cec5SDimitry Andric @{
14020b57cec5SDimitry Andric */
14030b57cec5SDimitry Andric /* ---------------------------------------------------------------------------
14040b57cec5SDimitry Andric  */
14050b57cec5SDimitry Andric /* Threadprivate initialization/finalization function declarations */
14060b57cec5SDimitry Andric 
14070b57cec5SDimitry Andric /*  for non-array objects:  __kmpc_threadprivate_register()  */
14080b57cec5SDimitry Andric 
14090b57cec5SDimitry Andric /*!
14100b57cec5SDimitry Andric  Pointer to the constructor function.
14110b57cec5SDimitry Andric  The first argument is the <tt>this</tt> pointer
14120b57cec5SDimitry Andric */
14130b57cec5SDimitry Andric typedef void *(*kmpc_ctor)(void *);
14140b57cec5SDimitry Andric 
14150b57cec5SDimitry Andric /*!
14160b57cec5SDimitry Andric  Pointer to the destructor function.
14170b57cec5SDimitry Andric  The first argument is the <tt>this</tt> pointer
14180b57cec5SDimitry Andric */
14190b57cec5SDimitry Andric typedef void (*kmpc_dtor)(
14200b57cec5SDimitry Andric     void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel
14210b57cec5SDimitry Andric                               compiler */
14220b57cec5SDimitry Andric /*!
14230b57cec5SDimitry Andric  Pointer to an alternate constructor.
14240b57cec5SDimitry Andric  The first argument is the <tt>this</tt> pointer.
14250b57cec5SDimitry Andric */
14260b57cec5SDimitry Andric typedef void *(*kmpc_cctor)(void *, void *);
14270b57cec5SDimitry Andric 
14280b57cec5SDimitry Andric /* for array objects: __kmpc_threadprivate_register_vec() */
14290b57cec5SDimitry Andric /* First arg: "this" pointer */
14300b57cec5SDimitry Andric /* Last arg: number of array elements */
14310b57cec5SDimitry Andric /*!
14320b57cec5SDimitry Andric  Array constructor.
14330b57cec5SDimitry Andric  First argument is the <tt>this</tt> pointer
14340b57cec5SDimitry Andric  Second argument the number of array elements.
14350b57cec5SDimitry Andric */
14360b57cec5SDimitry Andric typedef void *(*kmpc_ctor_vec)(void *, size_t);
14370b57cec5SDimitry Andric /*!
14380b57cec5SDimitry Andric  Pointer to the array destructor function.
14390b57cec5SDimitry Andric  The first argument is the <tt>this</tt> pointer
14400b57cec5SDimitry Andric  Second argument the number of array elements.
14410b57cec5SDimitry Andric */
14420b57cec5SDimitry Andric typedef void (*kmpc_dtor_vec)(void *, size_t);
14430b57cec5SDimitry Andric /*!
14440b57cec5SDimitry Andric  Array constructor.
14450b57cec5SDimitry Andric  First argument is the <tt>this</tt> pointer
14460b57cec5SDimitry Andric  Third argument the number of array elements.
14470b57cec5SDimitry Andric */
14480b57cec5SDimitry Andric typedef void *(*kmpc_cctor_vec)(void *, void *,
14490b57cec5SDimitry Andric                                 size_t); /* function unused by compiler */
14500b57cec5SDimitry Andric 
14510b57cec5SDimitry Andric /*!
14520b57cec5SDimitry Andric @}
14530b57cec5SDimitry Andric */
14540b57cec5SDimitry Andric 
14550b57cec5SDimitry Andric /* keeps tracked of threadprivate cache allocations for cleanup later */
14560b57cec5SDimitry Andric typedef struct kmp_cached_addr {
14570b57cec5SDimitry Andric   void **addr; /* address of allocated cache */
14580b57cec5SDimitry Andric   void ***compiler_cache; /* pointer to compiler's cache */
14590b57cec5SDimitry Andric   void *data; /* pointer to global data */
14600b57cec5SDimitry Andric   struct kmp_cached_addr *next; /* pointer to next cached address */
14610b57cec5SDimitry Andric } kmp_cached_addr_t;
14620b57cec5SDimitry Andric 
14630b57cec5SDimitry Andric struct private_data {
14640b57cec5SDimitry Andric   struct private_data *next; /* The next descriptor in the list      */
14650b57cec5SDimitry Andric   void *data; /* The data buffer for this descriptor  */
14660b57cec5SDimitry Andric   int more; /* The repeat count for this descriptor */
14670b57cec5SDimitry Andric   size_t size; /* The data size for this descriptor    */
14680b57cec5SDimitry Andric };
14690b57cec5SDimitry Andric 
14700b57cec5SDimitry Andric struct private_common {
14710b57cec5SDimitry Andric   struct private_common *next;
14720b57cec5SDimitry Andric   struct private_common *link;
14730b57cec5SDimitry Andric   void *gbl_addr;
14740b57cec5SDimitry Andric   void *par_addr; /* par_addr == gbl_addr for MASTER thread */
14750b57cec5SDimitry Andric   size_t cmn_size;
14760b57cec5SDimitry Andric };
14770b57cec5SDimitry Andric 
14780b57cec5SDimitry Andric struct shared_common {
14790b57cec5SDimitry Andric   struct shared_common *next;
14800b57cec5SDimitry Andric   struct private_data *pod_init;
14810b57cec5SDimitry Andric   void *obj_init;
14820b57cec5SDimitry Andric   void *gbl_addr;
14830b57cec5SDimitry Andric   union {
14840b57cec5SDimitry Andric     kmpc_ctor ctor;
14850b57cec5SDimitry Andric     kmpc_ctor_vec ctorv;
14860b57cec5SDimitry Andric   } ct;
14870b57cec5SDimitry Andric   union {
14880b57cec5SDimitry Andric     kmpc_cctor cctor;
14890b57cec5SDimitry Andric     kmpc_cctor_vec cctorv;
14900b57cec5SDimitry Andric   } cct;
14910b57cec5SDimitry Andric   union {
14920b57cec5SDimitry Andric     kmpc_dtor dtor;
14930b57cec5SDimitry Andric     kmpc_dtor_vec dtorv;
14940b57cec5SDimitry Andric   } dt;
14950b57cec5SDimitry Andric   size_t vec_len;
14960b57cec5SDimitry Andric   int is_vec;
14970b57cec5SDimitry Andric   size_t cmn_size;
14980b57cec5SDimitry Andric };
14990b57cec5SDimitry Andric 
15000b57cec5SDimitry Andric #define KMP_HASH_TABLE_LOG2 9 /* log2 of the hash table size */
15010b57cec5SDimitry Andric #define KMP_HASH_TABLE_SIZE                                                    \
15020b57cec5SDimitry Andric   (1 << KMP_HASH_TABLE_LOG2) /* size of the hash table */
15030b57cec5SDimitry Andric #define KMP_HASH_SHIFT 3 /* throw away this many low bits from the address */
15040b57cec5SDimitry Andric #define KMP_HASH(x)                                                            \
15050b57cec5SDimitry Andric   ((((kmp_uintptr_t)x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE - 1))
15060b57cec5SDimitry Andric 
15070b57cec5SDimitry Andric struct common_table {
15080b57cec5SDimitry Andric   struct private_common *data[KMP_HASH_TABLE_SIZE];
15090b57cec5SDimitry Andric };
15100b57cec5SDimitry Andric 
15110b57cec5SDimitry Andric struct shared_table {
15120b57cec5SDimitry Andric   struct shared_common *data[KMP_HASH_TABLE_SIZE];
15130b57cec5SDimitry Andric };
15140b57cec5SDimitry Andric 
15150b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
15160b57cec5SDimitry Andric 
15170b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
15180b57cec5SDimitry Andric // Shared barrier data that exists inside a single unit of the scheduling
15190b57cec5SDimitry Andric // hierarchy
15200b57cec5SDimitry Andric typedef struct kmp_hier_private_bdata_t {
15210b57cec5SDimitry Andric   kmp_int32 num_active;
15220b57cec5SDimitry Andric   kmp_uint64 index;
15230b57cec5SDimitry Andric   kmp_uint64 wait_val[2];
15240b57cec5SDimitry Andric } kmp_hier_private_bdata_t;
15250b57cec5SDimitry Andric #endif
15260b57cec5SDimitry Andric 
15270b57cec5SDimitry Andric typedef struct kmp_sched_flags {
15280b57cec5SDimitry Andric   unsigned ordered : 1;
15290b57cec5SDimitry Andric   unsigned nomerge : 1;
15300b57cec5SDimitry Andric   unsigned contains_last : 1;
15310b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
15320b57cec5SDimitry Andric   unsigned use_hier : 1;
15330b57cec5SDimitry Andric   unsigned unused : 28;
15340b57cec5SDimitry Andric #else
15350b57cec5SDimitry Andric   unsigned unused : 29;
15360b57cec5SDimitry Andric #endif
15370b57cec5SDimitry Andric } kmp_sched_flags_t;
15380b57cec5SDimitry Andric 
15390b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(kmp_sched_flags_t) == 4);
15400b57cec5SDimitry Andric 
15410b57cec5SDimitry Andric #if KMP_STATIC_STEAL_ENABLED
15420b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
15430b57cec5SDimitry Andric   kmp_int32 count;
15440b57cec5SDimitry Andric   kmp_int32 ub;
15450b57cec5SDimitry Andric   /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
15460b57cec5SDimitry Andric   kmp_int32 lb;
15470b57cec5SDimitry Andric   kmp_int32 st;
15480b57cec5SDimitry Andric   kmp_int32 tc;
15490b57cec5SDimitry Andric   kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put
15500b57cec5SDimitry Andric                                      after ub */
1551*5ffd83dbSDimitry Andric   kmp_lock_t *th_steal_lock; // lock used for chunk stealing
15520b57cec5SDimitry Andric   // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on )
15530b57cec5SDimitry Andric   //    a) parm3 is properly aligned and
15540b57cec5SDimitry Andric   //    b) all parm1-4 are in the same cache line.
15550b57cec5SDimitry Andric   // Because of parm1-4 are used together, performance seems to be better
15560b57cec5SDimitry Andric   // if they are in the same line (not measured though).
15570b57cec5SDimitry Andric 
15580b57cec5SDimitry Andric   struct KMP_ALIGN(32) { // AC: changed 16 to 32 in order to simplify template
15590b57cec5SDimitry Andric     kmp_int32 parm1; //     structures in kmp_dispatch.cpp. This should
15600b57cec5SDimitry Andric     kmp_int32 parm2; //     make no real change at least while padding is off.
15610b57cec5SDimitry Andric     kmp_int32 parm3;
15620b57cec5SDimitry Andric     kmp_int32 parm4;
15630b57cec5SDimitry Andric   };
15640b57cec5SDimitry Andric 
15650b57cec5SDimitry Andric   kmp_uint32 ordered_lower;
15660b57cec5SDimitry Andric   kmp_uint32 ordered_upper;
15670b57cec5SDimitry Andric #if KMP_OS_WINDOWS
15680b57cec5SDimitry Andric   // This var can be placed in the hole between 'tc' and 'parm1', instead of
15690b57cec5SDimitry Andric   // 'static_steal_counter'. It would be nice to measure execution times.
15700b57cec5SDimitry Andric   // Conditional if/endif can be removed at all.
15710b57cec5SDimitry Andric   kmp_int32 last_upper;
15720b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
15730b57cec5SDimitry Andric } dispatch_private_info32_t;
15740b57cec5SDimitry Andric 
15750b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
15760b57cec5SDimitry Andric   kmp_int64 count; // current chunk number for static & static-steal scheduling
15770b57cec5SDimitry Andric   kmp_int64 ub; /* upper-bound */
15780b57cec5SDimitry Andric   /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
15790b57cec5SDimitry Andric   kmp_int64 lb; /* lower-bound */
15800b57cec5SDimitry Andric   kmp_int64 st; /* stride */
15810b57cec5SDimitry Andric   kmp_int64 tc; /* trip count (number of iterations) */
15820b57cec5SDimitry Andric   kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put
15830b57cec5SDimitry Andric                                      after ub */
1584*5ffd83dbSDimitry Andric   kmp_lock_t *th_steal_lock; // lock used for chunk stealing
15850b57cec5SDimitry Andric   /* parm[1-4] are used in different ways by different scheduling algorithms */
15860b57cec5SDimitry Andric 
15870b57cec5SDimitry Andric   // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on )
15880b57cec5SDimitry Andric   //    a) parm3 is properly aligned and
15890b57cec5SDimitry Andric   //    b) all parm1-4 are in the same cache line.
15900b57cec5SDimitry Andric   // Because of parm1-4 are used together, performance seems to be better
15910b57cec5SDimitry Andric   // if they are in the same line (not measured though).
15920b57cec5SDimitry Andric 
15930b57cec5SDimitry Andric   struct KMP_ALIGN(32) {
15940b57cec5SDimitry Andric     kmp_int64 parm1;
15950b57cec5SDimitry Andric     kmp_int64 parm2;
15960b57cec5SDimitry Andric     kmp_int64 parm3;
15970b57cec5SDimitry Andric     kmp_int64 parm4;
15980b57cec5SDimitry Andric   };
15990b57cec5SDimitry Andric 
16000b57cec5SDimitry Andric   kmp_uint64 ordered_lower;
16010b57cec5SDimitry Andric   kmp_uint64 ordered_upper;
16020b57cec5SDimitry Andric #if KMP_OS_WINDOWS
16030b57cec5SDimitry Andric   // This var can be placed in the hole between 'tc' and 'parm1', instead of
16040b57cec5SDimitry Andric   // 'static_steal_counter'. It would be nice to measure execution times.
16050b57cec5SDimitry Andric   // Conditional if/endif can be removed at all.
16060b57cec5SDimitry Andric   kmp_int64 last_upper;
16070b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
16080b57cec5SDimitry Andric } dispatch_private_info64_t;
16090b57cec5SDimitry Andric #else /* KMP_STATIC_STEAL_ENABLED */
16100b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
16110b57cec5SDimitry Andric   kmp_int32 lb;
16120b57cec5SDimitry Andric   kmp_int32 ub;
16130b57cec5SDimitry Andric   kmp_int32 st;
16140b57cec5SDimitry Andric   kmp_int32 tc;
16150b57cec5SDimitry Andric 
16160b57cec5SDimitry Andric   kmp_int32 parm1;
16170b57cec5SDimitry Andric   kmp_int32 parm2;
16180b57cec5SDimitry Andric   kmp_int32 parm3;
16190b57cec5SDimitry Andric   kmp_int32 parm4;
16200b57cec5SDimitry Andric 
16210b57cec5SDimitry Andric   kmp_int32 count;
16220b57cec5SDimitry Andric 
16230b57cec5SDimitry Andric   kmp_uint32 ordered_lower;
16240b57cec5SDimitry Andric   kmp_uint32 ordered_upper;
16250b57cec5SDimitry Andric #if KMP_OS_WINDOWS
16260b57cec5SDimitry Andric   kmp_int32 last_upper;
16270b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
16280b57cec5SDimitry Andric } dispatch_private_info32_t;
16290b57cec5SDimitry Andric 
16300b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
16310b57cec5SDimitry Andric   kmp_int64 lb; /* lower-bound */
16320b57cec5SDimitry Andric   kmp_int64 ub; /* upper-bound */
16330b57cec5SDimitry Andric   kmp_int64 st; /* stride */
16340b57cec5SDimitry Andric   kmp_int64 tc; /* trip count (number of iterations) */
16350b57cec5SDimitry Andric 
16360b57cec5SDimitry Andric   /* parm[1-4] are used in different ways by different scheduling algorithms */
16370b57cec5SDimitry Andric   kmp_int64 parm1;
16380b57cec5SDimitry Andric   kmp_int64 parm2;
16390b57cec5SDimitry Andric   kmp_int64 parm3;
16400b57cec5SDimitry Andric   kmp_int64 parm4;
16410b57cec5SDimitry Andric 
16420b57cec5SDimitry Andric   kmp_int64 count; /* current chunk number for static scheduling */
16430b57cec5SDimitry Andric 
16440b57cec5SDimitry Andric   kmp_uint64 ordered_lower;
16450b57cec5SDimitry Andric   kmp_uint64 ordered_upper;
16460b57cec5SDimitry Andric #if KMP_OS_WINDOWS
16470b57cec5SDimitry Andric   kmp_int64 last_upper;
16480b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
16490b57cec5SDimitry Andric } dispatch_private_info64_t;
16500b57cec5SDimitry Andric #endif /* KMP_STATIC_STEAL_ENABLED */
16510b57cec5SDimitry Andric 
16520b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE dispatch_private_info {
16530b57cec5SDimitry Andric   union private_info {
16540b57cec5SDimitry Andric     dispatch_private_info32_t p32;
16550b57cec5SDimitry Andric     dispatch_private_info64_t p64;
16560b57cec5SDimitry Andric   } u;
16570b57cec5SDimitry Andric   enum sched_type schedule; /* scheduling algorithm */
16580b57cec5SDimitry Andric   kmp_sched_flags_t flags; /* flags (e.g., ordered, nomerge, etc.) */
16590b57cec5SDimitry Andric   kmp_int32 ordered_bumped;
16600b57cec5SDimitry Andric   // To retain the structure size after making ordered_iteration scalar
16610b57cec5SDimitry Andric   kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 3];
16620b57cec5SDimitry Andric   // Stack of buffers for nest of serial regions
16630b57cec5SDimitry Andric   struct dispatch_private_info *next;
16640b57cec5SDimitry Andric   kmp_int32 type_size; /* the size of types in private_info */
16650b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
16660b57cec5SDimitry Andric   kmp_int32 hier_id;
16670b57cec5SDimitry Andric   void *parent; /* hierarchical scheduling parent pointer */
16680b57cec5SDimitry Andric #endif
16690b57cec5SDimitry Andric   enum cons_type pushed_ws;
16700b57cec5SDimitry Andric } dispatch_private_info_t;
16710b57cec5SDimitry Andric 
16720b57cec5SDimitry Andric typedef struct dispatch_shared_info32 {
16730b57cec5SDimitry Andric   /* chunk index under dynamic, number of idle threads under static-steal;
16740b57cec5SDimitry Andric      iteration index otherwise */
16750b57cec5SDimitry Andric   volatile kmp_uint32 iteration;
16760b57cec5SDimitry Andric   volatile kmp_uint32 num_done;
16770b57cec5SDimitry Andric   volatile kmp_uint32 ordered_iteration;
16780b57cec5SDimitry Andric   // Dummy to retain the structure size after making ordered_iteration scalar
16790b57cec5SDimitry Andric   kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 1];
16800b57cec5SDimitry Andric } dispatch_shared_info32_t;
16810b57cec5SDimitry Andric 
16820b57cec5SDimitry Andric typedef struct dispatch_shared_info64 {
16830b57cec5SDimitry Andric   /* chunk index under dynamic, number of idle threads under static-steal;
16840b57cec5SDimitry Andric      iteration index otherwise */
16850b57cec5SDimitry Andric   volatile kmp_uint64 iteration;
16860b57cec5SDimitry Andric   volatile kmp_uint64 num_done;
16870b57cec5SDimitry Andric   volatile kmp_uint64 ordered_iteration;
16880b57cec5SDimitry Andric   // Dummy to retain the structure size after making ordered_iteration scalar
16890b57cec5SDimitry Andric   kmp_int64 ordered_dummy[KMP_MAX_ORDERED - 3];
16900b57cec5SDimitry Andric } dispatch_shared_info64_t;
16910b57cec5SDimitry Andric 
16920b57cec5SDimitry Andric typedef struct dispatch_shared_info {
16930b57cec5SDimitry Andric   union shared_info {
16940b57cec5SDimitry Andric     dispatch_shared_info32_t s32;
16950b57cec5SDimitry Andric     dispatch_shared_info64_t s64;
16960b57cec5SDimitry Andric   } u;
16970b57cec5SDimitry Andric   volatile kmp_uint32 buffer_index;
16980b57cec5SDimitry Andric   volatile kmp_int32 doacross_buf_idx; // teamwise index
16990b57cec5SDimitry Andric   volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1)
17000b57cec5SDimitry Andric   kmp_int32 doacross_num_done; // count finished threads
17010b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
17020b57cec5SDimitry Andric   void *hier;
17030b57cec5SDimitry Andric #endif
17040b57cec5SDimitry Andric #if KMP_USE_HWLOC
17050b57cec5SDimitry Andric   // When linking with libhwloc, the ORDERED EPCC test slows down on big
17060b57cec5SDimitry Andric   // machines (> 48 cores). Performance analysis showed that a cache thrash
17070b57cec5SDimitry Andric   // was occurring and this padding helps alleviate the problem.
17080b57cec5SDimitry Andric   char padding[64];
17090b57cec5SDimitry Andric #endif
17100b57cec5SDimitry Andric } dispatch_shared_info_t;
17110b57cec5SDimitry Andric 
17120b57cec5SDimitry Andric typedef struct kmp_disp {
17130b57cec5SDimitry Andric   /* Vector for ORDERED SECTION */
17140b57cec5SDimitry Andric   void (*th_deo_fcn)(int *gtid, int *cid, ident_t *);
17150b57cec5SDimitry Andric   /* Vector for END ORDERED SECTION */
17160b57cec5SDimitry Andric   void (*th_dxo_fcn)(int *gtid, int *cid, ident_t *);
17170b57cec5SDimitry Andric 
17180b57cec5SDimitry Andric   dispatch_shared_info_t *th_dispatch_sh_current;
17190b57cec5SDimitry Andric   dispatch_private_info_t *th_dispatch_pr_current;
17200b57cec5SDimitry Andric 
17210b57cec5SDimitry Andric   dispatch_private_info_t *th_disp_buffer;
17220b57cec5SDimitry Andric   kmp_int32 th_disp_index;
17230b57cec5SDimitry Andric   kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index
17240b57cec5SDimitry Andric   volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags
17250b57cec5SDimitry Andric   kmp_int64 *th_doacross_info; // info on loop bounds
17260b57cec5SDimitry Andric #if KMP_USE_INTERNODE_ALIGNMENT
17270b57cec5SDimitry Andric   char more_padding[INTERNODE_CACHE_LINE];
17280b57cec5SDimitry Andric #endif
17290b57cec5SDimitry Andric } kmp_disp_t;
17300b57cec5SDimitry Andric 
17310b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
17320b57cec5SDimitry Andric /* Barrier stuff */
17330b57cec5SDimitry Andric 
17340b57cec5SDimitry Andric /* constants for barrier state update */
17350b57cec5SDimitry Andric #define KMP_INIT_BARRIER_STATE 0 /* should probably start from zero */
17360b57cec5SDimitry Andric #define KMP_BARRIER_SLEEP_BIT 0 /* bit used for suspend/sleep part of state */
17370b57cec5SDimitry Andric #define KMP_BARRIER_UNUSED_BIT 1 // bit that must never be set for valid state
17380b57cec5SDimitry Andric #define KMP_BARRIER_BUMP_BIT 2 /* lsb used for bump of go/arrived state */
17390b57cec5SDimitry Andric 
17400b57cec5SDimitry Andric #define KMP_BARRIER_SLEEP_STATE (1 << KMP_BARRIER_SLEEP_BIT)
17410b57cec5SDimitry Andric #define KMP_BARRIER_UNUSED_STATE (1 << KMP_BARRIER_UNUSED_BIT)
17420b57cec5SDimitry Andric #define KMP_BARRIER_STATE_BUMP (1 << KMP_BARRIER_BUMP_BIT)
17430b57cec5SDimitry Andric 
17440b57cec5SDimitry Andric #if (KMP_BARRIER_SLEEP_BIT >= KMP_BARRIER_BUMP_BIT)
17450b57cec5SDimitry Andric #error "Barrier sleep bit must be smaller than barrier bump bit"
17460b57cec5SDimitry Andric #endif
17470b57cec5SDimitry Andric #if (KMP_BARRIER_UNUSED_BIT >= KMP_BARRIER_BUMP_BIT)
17480b57cec5SDimitry Andric #error "Barrier unused bit must be smaller than barrier bump bit"
17490b57cec5SDimitry Andric #endif
17500b57cec5SDimitry Andric 
17510b57cec5SDimitry Andric // Constants for release barrier wait state: currently, hierarchical only
17520b57cec5SDimitry Andric #define KMP_BARRIER_NOT_WAITING 0 // Normal state; worker not in wait_sleep
17530b57cec5SDimitry Andric #define KMP_BARRIER_OWN_FLAG                                                   \
17540b57cec5SDimitry Andric   1 // Normal state; worker waiting on own b_go flag in release
17550b57cec5SDimitry Andric #define KMP_BARRIER_PARENT_FLAG                                                \
17560b57cec5SDimitry Andric   2 // Special state; worker waiting on parent's b_go flag in release
17570b57cec5SDimitry Andric #define KMP_BARRIER_SWITCH_TO_OWN_FLAG                                         \
17580b57cec5SDimitry Andric   3 // Special state; tells worker to shift from parent to own b_go
17590b57cec5SDimitry Andric #define KMP_BARRIER_SWITCHING                                                  \
17600b57cec5SDimitry Andric   4 // Special state; worker resets appropriate flag on wake-up
17610b57cec5SDimitry Andric 
17620b57cec5SDimitry Andric #define KMP_NOT_SAFE_TO_REAP                                                   \
17630b57cec5SDimitry Andric   0 // Thread th_reap_state: not safe to reap (tasking)
17640b57cec5SDimitry Andric #define KMP_SAFE_TO_REAP 1 // Thread th_reap_state: safe to reap (not tasking)
17650b57cec5SDimitry Andric 
17660b57cec5SDimitry Andric enum barrier_type {
17670b57cec5SDimitry Andric   bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction
17680b57cec5SDimitry Andric                            barriers if enabled) */
17690b57cec5SDimitry Andric   bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */
17700b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER
17710b57cec5SDimitry Andric   bs_reduction_barrier, /* 2, All barriers that are used in reduction */
17720b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER
17730b57cec5SDimitry Andric   bs_last_barrier /* Just a placeholder to mark the end */
17740b57cec5SDimitry Andric };
17750b57cec5SDimitry Andric 
17760b57cec5SDimitry Andric // to work with reduction barriers just like with plain barriers
17770b57cec5SDimitry Andric #if !KMP_FAST_REDUCTION_BARRIER
17780b57cec5SDimitry Andric #define bs_reduction_barrier bs_plain_barrier
17790b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER
17800b57cec5SDimitry Andric 
17810b57cec5SDimitry Andric typedef enum kmp_bar_pat { /* Barrier communication patterns */
17820b57cec5SDimitry Andric                            bp_linear_bar =
17830b57cec5SDimitry Andric                                0, /* Single level (degenerate) tree */
17840b57cec5SDimitry Andric                            bp_tree_bar =
17850b57cec5SDimitry Andric                                1, /* Balanced tree with branching factor 2^n */
17860b57cec5SDimitry Andric                            bp_hyper_bar =
17870b57cec5SDimitry Andric                                2, /* Hypercube-embedded tree with min branching
17880b57cec5SDimitry Andric                                      factor 2^n */
17890b57cec5SDimitry Andric                            bp_hierarchical_bar = 3, /* Machine hierarchy tree */
17900b57cec5SDimitry Andric                            bp_last_bar /* Placeholder to mark the end */
17910b57cec5SDimitry Andric } kmp_bar_pat_e;
17920b57cec5SDimitry Andric 
17930b57cec5SDimitry Andric #define KMP_BARRIER_ICV_PUSH 1
17940b57cec5SDimitry Andric 
17950b57cec5SDimitry Andric /* Record for holding the values of the internal controls stack records */
17960b57cec5SDimitry Andric typedef struct kmp_internal_control {
17970b57cec5SDimitry Andric   int serial_nesting_level; /* corresponds to the value of the
17980b57cec5SDimitry Andric                                th_team_serialized field */
17990b57cec5SDimitry Andric   kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per
18000b57cec5SDimitry Andric                        thread) */
18010b57cec5SDimitry Andric   kmp_int8
18020b57cec5SDimitry Andric       bt_set; /* internal control for whether blocktime is explicitly set */
18030b57cec5SDimitry Andric   int blocktime; /* internal control for blocktime */
18040b57cec5SDimitry Andric #if KMP_USE_MONITOR
18050b57cec5SDimitry Andric   int bt_intervals; /* internal control for blocktime intervals */
18060b57cec5SDimitry Andric #endif
18070b57cec5SDimitry Andric   int nproc; /* internal control for #threads for next parallel region (per
18080b57cec5SDimitry Andric                 thread) */
18090b57cec5SDimitry Andric   int thread_limit; /* internal control for thread-limit-var */
18100b57cec5SDimitry Andric   int max_active_levels; /* internal control for max_active_levels */
18110b57cec5SDimitry Andric   kmp_r_sched_t
18120b57cec5SDimitry Andric       sched; /* internal control for runtime schedule {sched,chunk} pair */
18130b57cec5SDimitry Andric   kmp_proc_bind_t proc_bind; /* internal control for affinity  */
18140b57cec5SDimitry Andric   kmp_int32 default_device; /* internal control for default device */
18150b57cec5SDimitry Andric   struct kmp_internal_control *next;
18160b57cec5SDimitry Andric } kmp_internal_control_t;
18170b57cec5SDimitry Andric 
18180b57cec5SDimitry Andric static inline void copy_icvs(kmp_internal_control_t *dst,
18190b57cec5SDimitry Andric                              kmp_internal_control_t *src) {
18200b57cec5SDimitry Andric   *dst = *src;
18210b57cec5SDimitry Andric }
18220b57cec5SDimitry Andric 
18230b57cec5SDimitry Andric /* Thread barrier needs volatile barrier fields */
18240b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE kmp_bstate {
18250b57cec5SDimitry Andric   // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all
18260b57cec5SDimitry Andric   // uses of it). It is not explicitly aligned below, because we *don't* want
18270b57cec5SDimitry Andric   // it to be padded -- instead, we fit b_go into the same cache line with
18280b57cec5SDimitry Andric   // th_fixed_icvs, enabling NGO cache lines stores in the hierarchical barrier.
18290b57cec5SDimitry Andric   kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread
18300b57cec5SDimitry Andric   // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with
18310b57cec5SDimitry Andric   // same NGO store
18320b57cec5SDimitry Andric   volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical)
18330b57cec5SDimitry Andric   KMP_ALIGN_CACHE volatile kmp_uint64
18340b57cec5SDimitry Andric       b_arrived; // STATE => task reached synch point.
18350b57cec5SDimitry Andric   kmp_uint32 *skip_per_level;
18360b57cec5SDimitry Andric   kmp_uint32 my_level;
18370b57cec5SDimitry Andric   kmp_int32 parent_tid;
18380b57cec5SDimitry Andric   kmp_int32 old_tid;
18390b57cec5SDimitry Andric   kmp_uint32 depth;
18400b57cec5SDimitry Andric   struct kmp_bstate *parent_bar;
18410b57cec5SDimitry Andric   kmp_team_t *team;
18420b57cec5SDimitry Andric   kmp_uint64 leaf_state;
18430b57cec5SDimitry Andric   kmp_uint32 nproc;
18440b57cec5SDimitry Andric   kmp_uint8 base_leaf_kids;
18450b57cec5SDimitry Andric   kmp_uint8 leaf_kids;
18460b57cec5SDimitry Andric   kmp_uint8 offset;
18470b57cec5SDimitry Andric   kmp_uint8 wait_flag;
18480b57cec5SDimitry Andric   kmp_uint8 use_oncore_barrier;
18490b57cec5SDimitry Andric #if USE_DEBUGGER
18500b57cec5SDimitry Andric   // The following field is intended for the debugger solely. Only the worker
18510b57cec5SDimitry Andric   // thread itself accesses this field: the worker increases it by 1 when it
18520b57cec5SDimitry Andric   // arrives to a barrier.
18530b57cec5SDimitry Andric   KMP_ALIGN_CACHE kmp_uint b_worker_arrived;
18540b57cec5SDimitry Andric #endif /* USE_DEBUGGER */
18550b57cec5SDimitry Andric } kmp_bstate_t;
18560b57cec5SDimitry Andric 
18570b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_barrier_union {
18580b57cec5SDimitry Andric   double b_align; /* use worst case alignment */
18590b57cec5SDimitry Andric   char b_pad[KMP_PAD(kmp_bstate_t, CACHE_LINE)];
18600b57cec5SDimitry Andric   kmp_bstate_t bb;
18610b57cec5SDimitry Andric };
18620b57cec5SDimitry Andric 
18630b57cec5SDimitry Andric typedef union kmp_barrier_union kmp_balign_t;
18640b57cec5SDimitry Andric 
18650b57cec5SDimitry Andric /* Team barrier needs only non-volatile arrived counter */
18660b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_barrier_team_union {
18670b57cec5SDimitry Andric   double b_align; /* use worst case alignment */
18680b57cec5SDimitry Andric   char b_pad[CACHE_LINE];
18690b57cec5SDimitry Andric   struct {
18700b57cec5SDimitry Andric     kmp_uint64 b_arrived; /* STATE => task reached synch point. */
18710b57cec5SDimitry Andric #if USE_DEBUGGER
18720b57cec5SDimitry Andric     // The following two fields are indended for the debugger solely. Only
18730b57cec5SDimitry Andric     // master of the team accesses these fields: the first one is increased by
18740b57cec5SDimitry Andric     // 1 when master arrives to a barrier, the second one is increased by one
18750b57cec5SDimitry Andric     // when all the threads arrived.
18760b57cec5SDimitry Andric     kmp_uint b_master_arrived;
18770b57cec5SDimitry Andric     kmp_uint b_team_arrived;
18780b57cec5SDimitry Andric #endif
18790b57cec5SDimitry Andric   };
18800b57cec5SDimitry Andric };
18810b57cec5SDimitry Andric 
18820b57cec5SDimitry Andric typedef union kmp_barrier_team_union kmp_balign_team_t;
18830b57cec5SDimitry Andric 
18840b57cec5SDimitry Andric /* Padding for Linux* OS pthreads condition variables and mutexes used to signal
18850b57cec5SDimitry Andric    threads when a condition changes.  This is to workaround an NPTL bug where
18860b57cec5SDimitry Andric    padding was added to pthread_cond_t which caused the initialization routine
18870b57cec5SDimitry Andric    to write outside of the structure if compiled on pre-NPTL threads.  */
18880b57cec5SDimitry Andric #if KMP_OS_WINDOWS
18890b57cec5SDimitry Andric typedef struct kmp_win32_mutex {
18900b57cec5SDimitry Andric   /* The Lock */
18910b57cec5SDimitry Andric   CRITICAL_SECTION cs;
18920b57cec5SDimitry Andric } kmp_win32_mutex_t;
18930b57cec5SDimitry Andric 
18940b57cec5SDimitry Andric typedef struct kmp_win32_cond {
18950b57cec5SDimitry Andric   /* Count of the number of waiters. */
18960b57cec5SDimitry Andric   int waiters_count_;
18970b57cec5SDimitry Andric 
18980b57cec5SDimitry Andric   /* Serialize access to <waiters_count_> */
18990b57cec5SDimitry Andric   kmp_win32_mutex_t waiters_count_lock_;
19000b57cec5SDimitry Andric 
19010b57cec5SDimitry Andric   /* Number of threads to release via a <cond_broadcast> or a <cond_signal> */
19020b57cec5SDimitry Andric   int release_count_;
19030b57cec5SDimitry Andric 
19040b57cec5SDimitry Andric   /* Keeps track of the current "generation" so that we don't allow */
19050b57cec5SDimitry Andric   /* one thread to steal all the "releases" from the broadcast. */
19060b57cec5SDimitry Andric   int wait_generation_count_;
19070b57cec5SDimitry Andric 
19080b57cec5SDimitry Andric   /* A manual-reset event that's used to block and release waiting threads. */
19090b57cec5SDimitry Andric   HANDLE event_;
19100b57cec5SDimitry Andric } kmp_win32_cond_t;
19110b57cec5SDimitry Andric #endif
19120b57cec5SDimitry Andric 
19130b57cec5SDimitry Andric #if KMP_OS_UNIX
19140b57cec5SDimitry Andric 
19150b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_cond_union {
19160b57cec5SDimitry Andric   double c_align;
19170b57cec5SDimitry Andric   char c_pad[CACHE_LINE];
19180b57cec5SDimitry Andric   pthread_cond_t c_cond;
19190b57cec5SDimitry Andric };
19200b57cec5SDimitry Andric 
19210b57cec5SDimitry Andric typedef union kmp_cond_union kmp_cond_align_t;
19220b57cec5SDimitry Andric 
19230b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_mutex_union {
19240b57cec5SDimitry Andric   double m_align;
19250b57cec5SDimitry Andric   char m_pad[CACHE_LINE];
19260b57cec5SDimitry Andric   pthread_mutex_t m_mutex;
19270b57cec5SDimitry Andric };
19280b57cec5SDimitry Andric 
19290b57cec5SDimitry Andric typedef union kmp_mutex_union kmp_mutex_align_t;
19300b57cec5SDimitry Andric 
19310b57cec5SDimitry Andric #endif /* KMP_OS_UNIX */
19320b57cec5SDimitry Andric 
19330b57cec5SDimitry Andric typedef struct kmp_desc_base {
19340b57cec5SDimitry Andric   void *ds_stackbase;
19350b57cec5SDimitry Andric   size_t ds_stacksize;
19360b57cec5SDimitry Andric   int ds_stackgrow;
19370b57cec5SDimitry Andric   kmp_thread_t ds_thread;
19380b57cec5SDimitry Andric   volatile int ds_tid;
19390b57cec5SDimitry Andric   int ds_gtid;
19400b57cec5SDimitry Andric #if KMP_OS_WINDOWS
19410b57cec5SDimitry Andric   volatile int ds_alive;
19420b57cec5SDimitry Andric   DWORD ds_thread_id;
19430b57cec5SDimitry Andric /* ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes.
19440b57cec5SDimitry Andric    However, debugger support (libomp_db) cannot work with handles, because they
19450b57cec5SDimitry Andric    uncomparable. For example, debugger requests info about thread with handle h.
19460b57cec5SDimitry Andric    h is valid within debugger process, and meaningless within debugee process.
19470b57cec5SDimitry Andric    Even if h is duped by call to DuplicateHandle(), so the result h' is valid
19480b57cec5SDimitry Andric    within debugee process, but it is a *new* handle which does *not* equal to
19490b57cec5SDimitry Andric    any other handle in debugee... The only way to compare handles is convert
19500b57cec5SDimitry Andric    them to system-wide ids. GetThreadId() function is available only in
19510b57cec5SDimitry Andric    Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is available
19520b57cec5SDimitry Andric    on all Windows* OS flavours (including Windows* 95). Thus, we have to get
19530b57cec5SDimitry Andric    thread id by call to GetCurrentThreadId() from within the thread and save it
19540b57cec5SDimitry Andric    to let libomp_db identify threads.  */
19550b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
19560b57cec5SDimitry Andric } kmp_desc_base_t;
19570b57cec5SDimitry Andric 
19580b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_desc {
19590b57cec5SDimitry Andric   double ds_align; /* use worst case alignment */
19600b57cec5SDimitry Andric   char ds_pad[KMP_PAD(kmp_desc_base_t, CACHE_LINE)];
19610b57cec5SDimitry Andric   kmp_desc_base_t ds;
19620b57cec5SDimitry Andric } kmp_desc_t;
19630b57cec5SDimitry Andric 
19640b57cec5SDimitry Andric typedef struct kmp_local {
19650b57cec5SDimitry Andric   volatile int this_construct; /* count of single's encountered by thread */
19660b57cec5SDimitry Andric   void *reduce_data;
19670b57cec5SDimitry Andric #if KMP_USE_BGET
19680b57cec5SDimitry Andric   void *bget_data;
19690b57cec5SDimitry Andric   void *bget_list;
19700b57cec5SDimitry Andric #if !USE_CMP_XCHG_FOR_BGET
19710b57cec5SDimitry Andric #ifdef USE_QUEUING_LOCK_FOR_BGET
19720b57cec5SDimitry Andric   kmp_lock_t bget_lock; /* Lock for accessing bget free list */
19730b57cec5SDimitry Andric #else
19740b57cec5SDimitry Andric   kmp_bootstrap_lock_t bget_lock; // Lock for accessing bget free list. Must be
19750b57cec5SDimitry Andric // bootstrap lock so we can use it at library
19760b57cec5SDimitry Andric // shutdown.
19770b57cec5SDimitry Andric #endif /* USE_LOCK_FOR_BGET */
19780b57cec5SDimitry Andric #endif /* ! USE_CMP_XCHG_FOR_BGET */
19790b57cec5SDimitry Andric #endif /* KMP_USE_BGET */
19800b57cec5SDimitry Andric 
19810b57cec5SDimitry Andric   PACKED_REDUCTION_METHOD_T
19820b57cec5SDimitry Andric   packed_reduction_method; /* stored by __kmpc_reduce*(), used by
19830b57cec5SDimitry Andric                               __kmpc_end_reduce*() */
19840b57cec5SDimitry Andric 
19850b57cec5SDimitry Andric } kmp_local_t;
19860b57cec5SDimitry Andric 
19870b57cec5SDimitry Andric #define KMP_CHECK_UPDATE(a, b)                                                 \
19880b57cec5SDimitry Andric   if ((a) != (b))                                                              \
19890b57cec5SDimitry Andric   (a) = (b)
19900b57cec5SDimitry Andric #define KMP_CHECK_UPDATE_SYNC(a, b)                                            \
19910b57cec5SDimitry Andric   if ((a) != (b))                                                              \
19920b57cec5SDimitry Andric   TCW_SYNC_PTR((a), (b))
19930b57cec5SDimitry Andric 
19940b57cec5SDimitry Andric #define get__blocktime(xteam, xtid)                                            \
19950b57cec5SDimitry Andric   ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime)
19960b57cec5SDimitry Andric #define get__bt_set(xteam, xtid)                                               \
19970b57cec5SDimitry Andric   ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set)
19980b57cec5SDimitry Andric #if KMP_USE_MONITOR
19990b57cec5SDimitry Andric #define get__bt_intervals(xteam, xtid)                                         \
20000b57cec5SDimitry Andric   ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals)
20010b57cec5SDimitry Andric #endif
20020b57cec5SDimitry Andric 
20030b57cec5SDimitry Andric #define get__dynamic_2(xteam, xtid)                                            \
20040b57cec5SDimitry Andric   ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic)
20050b57cec5SDimitry Andric #define get__nproc_2(xteam, xtid)                                              \
20060b57cec5SDimitry Andric   ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc)
20070b57cec5SDimitry Andric #define get__sched_2(xteam, xtid)                                              \
20080b57cec5SDimitry Andric   ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched)
20090b57cec5SDimitry Andric 
20100b57cec5SDimitry Andric #define set__blocktime_team(xteam, xtid, xval)                                 \
20110b57cec5SDimitry Andric   (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) =     \
20120b57cec5SDimitry Andric        (xval))
20130b57cec5SDimitry Andric 
20140b57cec5SDimitry Andric #if KMP_USE_MONITOR
20150b57cec5SDimitry Andric #define set__bt_intervals_team(xteam, xtid, xval)                              \
20160b57cec5SDimitry Andric   (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) =  \
20170b57cec5SDimitry Andric        (xval))
20180b57cec5SDimitry Andric #endif
20190b57cec5SDimitry Andric 
20200b57cec5SDimitry Andric #define set__bt_set_team(xteam, xtid, xval)                                    \
20210b57cec5SDimitry Andric   (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) = (xval))
20220b57cec5SDimitry Andric 
20230b57cec5SDimitry Andric #define set__dynamic(xthread, xval)                                            \
20240b57cec5SDimitry Andric   (((xthread)->th.th_current_task->td_icvs.dynamic) = (xval))
20250b57cec5SDimitry Andric #define get__dynamic(xthread)                                                  \
20260b57cec5SDimitry Andric   (((xthread)->th.th_current_task->td_icvs.dynamic) ? (FTN_TRUE) : (FTN_FALSE))
20270b57cec5SDimitry Andric 
20280b57cec5SDimitry Andric #define set__nproc(xthread, xval)                                              \
20290b57cec5SDimitry Andric   (((xthread)->th.th_current_task->td_icvs.nproc) = (xval))
20300b57cec5SDimitry Andric 
20310b57cec5SDimitry Andric #define set__thread_limit(xthread, xval)                                       \
20320b57cec5SDimitry Andric   (((xthread)->th.th_current_task->td_icvs.thread_limit) = (xval))
20330b57cec5SDimitry Andric 
20340b57cec5SDimitry Andric #define set__max_active_levels(xthread, xval)                                  \
20350b57cec5SDimitry Andric   (((xthread)->th.th_current_task->td_icvs.max_active_levels) = (xval))
20360b57cec5SDimitry Andric 
20370b57cec5SDimitry Andric #define get__max_active_levels(xthread)                                        \
20380b57cec5SDimitry Andric   ((xthread)->th.th_current_task->td_icvs.max_active_levels)
20390b57cec5SDimitry Andric 
20400b57cec5SDimitry Andric #define set__sched(xthread, xval)                                              \
20410b57cec5SDimitry Andric   (((xthread)->th.th_current_task->td_icvs.sched) = (xval))
20420b57cec5SDimitry Andric 
20430b57cec5SDimitry Andric #define set__proc_bind(xthread, xval)                                          \
20440b57cec5SDimitry Andric   (((xthread)->th.th_current_task->td_icvs.proc_bind) = (xval))
20450b57cec5SDimitry Andric #define get__proc_bind(xthread)                                                \
20460b57cec5SDimitry Andric   ((xthread)->th.th_current_task->td_icvs.proc_bind)
20470b57cec5SDimitry Andric 
20480b57cec5SDimitry Andric // OpenMP tasking data structures
20490b57cec5SDimitry Andric 
20500b57cec5SDimitry Andric typedef enum kmp_tasking_mode {
20510b57cec5SDimitry Andric   tskm_immediate_exec = 0,
20520b57cec5SDimitry Andric   tskm_extra_barrier = 1,
20530b57cec5SDimitry Andric   tskm_task_teams = 2,
20540b57cec5SDimitry Andric   tskm_max = 2
20550b57cec5SDimitry Andric } kmp_tasking_mode_t;
20560b57cec5SDimitry Andric 
20570b57cec5SDimitry Andric extern kmp_tasking_mode_t
20580b57cec5SDimitry Andric     __kmp_tasking_mode; /* determines how/when to execute tasks */
20590b57cec5SDimitry Andric extern int __kmp_task_stealing_constraint;
20600b57cec5SDimitry Andric extern int __kmp_enable_task_throttling;
20610b57cec5SDimitry Andric extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if
20620b57cec5SDimitry Andric // specified, defaults to 0 otherwise
20630b57cec5SDimitry Andric // Set via OMP_MAX_TASK_PRIORITY if specified, defaults to 0 otherwise
20640b57cec5SDimitry Andric extern kmp_int32 __kmp_max_task_priority;
20650b57cec5SDimitry Andric // Set via KMP_TASKLOOP_MIN_TASKS if specified, defaults to 0 otherwise
20660b57cec5SDimitry Andric extern kmp_uint64 __kmp_taskloop_min_tasks;
20670b57cec5SDimitry Andric 
20680b57cec5SDimitry Andric /* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with
20690b57cec5SDimitry Andric    taskdata first */
20700b57cec5SDimitry Andric #define KMP_TASK_TO_TASKDATA(task) (((kmp_taskdata_t *)task) - 1)
20710b57cec5SDimitry Andric #define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *)(taskdata + 1)
20720b57cec5SDimitry Andric 
20730b57cec5SDimitry Andric // The tt_found_tasks flag is a signal to all threads in the team that tasks
20740b57cec5SDimitry Andric // were spawned and queued since the previous barrier release.
20750b57cec5SDimitry Andric #define KMP_TASKING_ENABLED(task_team)                                         \
20760b57cec5SDimitry Andric   (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE)
20770b57cec5SDimitry Andric /*!
20780b57cec5SDimitry Andric @ingroup BASIC_TYPES
20790b57cec5SDimitry Andric @{
20800b57cec5SDimitry Andric */
20810b57cec5SDimitry Andric 
20820b57cec5SDimitry Andric /*!
20830b57cec5SDimitry Andric  */
20840b57cec5SDimitry Andric typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32, void *);
20850b57cec5SDimitry Andric 
20860b57cec5SDimitry Andric typedef union kmp_cmplrdata {
20870b57cec5SDimitry Andric   kmp_int32 priority; /**< priority specified by user for the task */
20880b57cec5SDimitry Andric   kmp_routine_entry_t
20890b57cec5SDimitry Andric       destructors; /* pointer to function to invoke deconstructors of
20900b57cec5SDimitry Andric                       firstprivate C++ objects */
20910b57cec5SDimitry Andric   /* future data */
20920b57cec5SDimitry Andric } kmp_cmplrdata_t;
20930b57cec5SDimitry Andric 
20940b57cec5SDimitry Andric /*  sizeof_kmp_task_t passed as arg to kmpc_omp_task call  */
20950b57cec5SDimitry Andric /*!
20960b57cec5SDimitry Andric  */
20970b57cec5SDimitry Andric typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */
20980b57cec5SDimitry Andric   void *shareds; /**< pointer to block of pointers to shared vars   */
20990b57cec5SDimitry Andric   kmp_routine_entry_t
21000b57cec5SDimitry Andric       routine; /**< pointer to routine to call for executing task */
21010b57cec5SDimitry Andric   kmp_int32 part_id; /**< part id for the task                          */
21020b57cec5SDimitry Andric   kmp_cmplrdata_t
21030b57cec5SDimitry Andric       data1; /* Two known optional additions: destructors and priority */
21040b57cec5SDimitry Andric   kmp_cmplrdata_t data2; /* Process destructors first, priority second */
21050b57cec5SDimitry Andric   /* future data */
21060b57cec5SDimitry Andric   /*  private vars  */
21070b57cec5SDimitry Andric } kmp_task_t;
21080b57cec5SDimitry Andric 
21090b57cec5SDimitry Andric /*!
21100b57cec5SDimitry Andric @}
21110b57cec5SDimitry Andric */
21120b57cec5SDimitry Andric 
21130b57cec5SDimitry Andric typedef struct kmp_taskgroup {
21140b57cec5SDimitry Andric   std::atomic<kmp_int32> count; // number of allocated and incomplete tasks
21150b57cec5SDimitry Andric   std::atomic<kmp_int32>
21160b57cec5SDimitry Andric       cancel_request; // request for cancellation of this taskgroup
21170b57cec5SDimitry Andric   struct kmp_taskgroup *parent; // parent taskgroup
21180b57cec5SDimitry Andric   // Block of data to perform task reduction
21190b57cec5SDimitry Andric   void *reduce_data; // reduction related info
21200b57cec5SDimitry Andric   kmp_int32 reduce_num_data; // number of data items to reduce
21210b57cec5SDimitry Andric } kmp_taskgroup_t;
21220b57cec5SDimitry Andric 
21230b57cec5SDimitry Andric // forward declarations
21240b57cec5SDimitry Andric typedef union kmp_depnode kmp_depnode_t;
21250b57cec5SDimitry Andric typedef struct kmp_depnode_list kmp_depnode_list_t;
21260b57cec5SDimitry Andric typedef struct kmp_dephash_entry kmp_dephash_entry_t;
21270b57cec5SDimitry Andric 
21280b57cec5SDimitry Andric // Compiler sends us this info:
21290b57cec5SDimitry Andric typedef struct kmp_depend_info {
21300b57cec5SDimitry Andric   kmp_intptr_t base_addr;
21310b57cec5SDimitry Andric   size_t len;
21320b57cec5SDimitry Andric   struct {
21330b57cec5SDimitry Andric     bool in : 1;
21340b57cec5SDimitry Andric     bool out : 1;
21350b57cec5SDimitry Andric     bool mtx : 1;
21360b57cec5SDimitry Andric   } flags;
21370b57cec5SDimitry Andric } kmp_depend_info_t;
21380b57cec5SDimitry Andric 
21390b57cec5SDimitry Andric // Internal structures to work with task dependencies:
21400b57cec5SDimitry Andric struct kmp_depnode_list {
21410b57cec5SDimitry Andric   kmp_depnode_t *node;
21420b57cec5SDimitry Andric   kmp_depnode_list_t *next;
21430b57cec5SDimitry Andric };
21440b57cec5SDimitry Andric 
21450b57cec5SDimitry Andric // Max number of mutexinoutset dependencies per node
21460b57cec5SDimitry Andric #define MAX_MTX_DEPS 4
21470b57cec5SDimitry Andric 
21480b57cec5SDimitry Andric typedef struct kmp_base_depnode {
21490b57cec5SDimitry Andric   kmp_depnode_list_t *successors; /* used under lock */
21500b57cec5SDimitry Andric   kmp_task_t *task; /* non-NULL if depnode is active, used under lock */
21510b57cec5SDimitry Andric   kmp_lock_t *mtx_locks[MAX_MTX_DEPS]; /* lock mutexinoutset dependent tasks */
21520b57cec5SDimitry Andric   kmp_int32 mtx_num_locks; /* number of locks in mtx_locks array */
21530b57cec5SDimitry Andric   kmp_lock_t lock; /* guards shared fields: task, successors */
21540b57cec5SDimitry Andric #if KMP_SUPPORT_GRAPH_OUTPUT
21550b57cec5SDimitry Andric   kmp_uint32 id;
21560b57cec5SDimitry Andric #endif
21570b57cec5SDimitry Andric   std::atomic<kmp_int32> npredecessors;
21580b57cec5SDimitry Andric   std::atomic<kmp_int32> nrefs;
21590b57cec5SDimitry Andric } kmp_base_depnode_t;
21600b57cec5SDimitry Andric 
21610b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_depnode {
21620b57cec5SDimitry Andric   double dn_align; /* use worst case alignment */
21630b57cec5SDimitry Andric   char dn_pad[KMP_PAD(kmp_base_depnode_t, CACHE_LINE)];
21640b57cec5SDimitry Andric   kmp_base_depnode_t dn;
21650b57cec5SDimitry Andric };
21660b57cec5SDimitry Andric 
21670b57cec5SDimitry Andric struct kmp_dephash_entry {
21680b57cec5SDimitry Andric   kmp_intptr_t addr;
21690b57cec5SDimitry Andric   kmp_depnode_t *last_out;
21700b57cec5SDimitry Andric   kmp_depnode_list_t *last_ins;
21710b57cec5SDimitry Andric   kmp_depnode_list_t *last_mtxs;
21720b57cec5SDimitry Andric   kmp_int32 last_flag;
21730b57cec5SDimitry Andric   kmp_lock_t *mtx_lock; /* is referenced by depnodes w/mutexinoutset dep */
21740b57cec5SDimitry Andric   kmp_dephash_entry_t *next_in_bucket;
21750b57cec5SDimitry Andric };
21760b57cec5SDimitry Andric 
21770b57cec5SDimitry Andric typedef struct kmp_dephash {
21780b57cec5SDimitry Andric   kmp_dephash_entry_t **buckets;
21790b57cec5SDimitry Andric   size_t size;
2180489b1cf2SDimitry Andric   size_t generation;
21810b57cec5SDimitry Andric   kmp_uint32 nelements;
21820b57cec5SDimitry Andric   kmp_uint32 nconflicts;
21830b57cec5SDimitry Andric } kmp_dephash_t;
21840b57cec5SDimitry Andric 
21850b57cec5SDimitry Andric typedef struct kmp_task_affinity_info {
21860b57cec5SDimitry Andric   kmp_intptr_t base_addr;
21870b57cec5SDimitry Andric   size_t len;
21880b57cec5SDimitry Andric   struct {
21890b57cec5SDimitry Andric     bool flag1 : 1;
21900b57cec5SDimitry Andric     bool flag2 : 1;
21910b57cec5SDimitry Andric     kmp_int32 reserved : 30;
21920b57cec5SDimitry Andric   } flags;
21930b57cec5SDimitry Andric } kmp_task_affinity_info_t;
21940b57cec5SDimitry Andric 
21950b57cec5SDimitry Andric typedef enum kmp_event_type_t {
21960b57cec5SDimitry Andric   KMP_EVENT_UNINITIALIZED = 0,
21970b57cec5SDimitry Andric   KMP_EVENT_ALLOW_COMPLETION = 1
21980b57cec5SDimitry Andric } kmp_event_type_t;
21990b57cec5SDimitry Andric 
22000b57cec5SDimitry Andric typedef struct {
22010b57cec5SDimitry Andric   kmp_event_type_t type;
22020b57cec5SDimitry Andric   kmp_tas_lock_t lock;
22030b57cec5SDimitry Andric   union {
22040b57cec5SDimitry Andric     kmp_task_t *task;
22050b57cec5SDimitry Andric   } ed;
22060b57cec5SDimitry Andric } kmp_event_t;
22070b57cec5SDimitry Andric 
22080b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK
22090b57cec5SDimitry Andric 
22100b57cec5SDimitry Andric /* Tied Task stack definitions */
22110b57cec5SDimitry Andric typedef struct kmp_stack_block {
22120b57cec5SDimitry Andric   kmp_taskdata_t *sb_block[TASK_STACK_BLOCK_SIZE];
22130b57cec5SDimitry Andric   struct kmp_stack_block *sb_next;
22140b57cec5SDimitry Andric   struct kmp_stack_block *sb_prev;
22150b57cec5SDimitry Andric } kmp_stack_block_t;
22160b57cec5SDimitry Andric 
22170b57cec5SDimitry Andric typedef struct kmp_task_stack {
22180b57cec5SDimitry Andric   kmp_stack_block_t ts_first_block; // first block of stack entries
22190b57cec5SDimitry Andric   kmp_taskdata_t **ts_top; // pointer to the top of stack
22200b57cec5SDimitry Andric   kmp_int32 ts_entries; // number of entries on the stack
22210b57cec5SDimitry Andric } kmp_task_stack_t;
22220b57cec5SDimitry Andric 
22230b57cec5SDimitry Andric #endif // BUILD_TIED_TASK_STACK
22240b57cec5SDimitry Andric 
22250b57cec5SDimitry Andric typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
22260b57cec5SDimitry Andric   /* Compiler flags */ /* Total compiler flags must be 16 bits */
22270b57cec5SDimitry Andric   unsigned tiedness : 1; /* task is either tied (1) or untied (0) */
22280b57cec5SDimitry Andric   unsigned final : 1; /* task is final(1) so execute immediately */
22290b57cec5SDimitry Andric   unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0
22300b57cec5SDimitry Andric                               code path */
22310b57cec5SDimitry Andric   unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to
22320b57cec5SDimitry Andric                                      invoke destructors from the runtime */
22330b57cec5SDimitry Andric   unsigned proxy : 1; /* task is a proxy task (it will be executed outside the
22340b57cec5SDimitry Andric                          context of the RTL) */
22350b57cec5SDimitry Andric   unsigned priority_specified : 1; /* set if the compiler provides priority
22360b57cec5SDimitry Andric                                       setting for the task */
22370b57cec5SDimitry Andric   unsigned detachable : 1; /* 1 == can detach */
22380b57cec5SDimitry Andric   unsigned reserved : 9; /* reserved for compiler use */
22390b57cec5SDimitry Andric 
22400b57cec5SDimitry Andric   /* Library flags */ /* Total library flags must be 16 bits */
22410b57cec5SDimitry Andric   unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */
22420b57cec5SDimitry Andric   unsigned task_serial : 1; // task is executed immediately (1) or deferred (0)
22430b57cec5SDimitry Andric   unsigned tasking_ser : 1; // all tasks in team are either executed immediately
22440b57cec5SDimitry Andric   // (1) or may be deferred (0)
22450b57cec5SDimitry Andric   unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel
22460b57cec5SDimitry Andric   // (0) [>= 2 threads]
22470b57cec5SDimitry Andric   /* If either team_serial or tasking_ser is set, task team may be NULL */
22480b57cec5SDimitry Andric   /* Task State Flags: */
22490b57cec5SDimitry Andric   unsigned started : 1; /* 1==started, 0==not started     */
22500b57cec5SDimitry Andric   unsigned executing : 1; /* 1==executing, 0==not executing */
22510b57cec5SDimitry Andric   unsigned complete : 1; /* 1==complete, 0==not complete   */
2252480093f4SDimitry Andric   unsigned freed : 1; /* 1==freed, 0==allocated        */
22530b57cec5SDimitry Andric   unsigned native : 1; /* 1==gcc-compiled task, 0==intel */
22540b57cec5SDimitry Andric   unsigned reserved31 : 7; /* reserved for library use */
22550b57cec5SDimitry Andric 
22560b57cec5SDimitry Andric } kmp_tasking_flags_t;
22570b57cec5SDimitry Andric 
22580b57cec5SDimitry Andric struct kmp_taskdata { /* aligned during dynamic allocation       */
22590b57cec5SDimitry Andric   kmp_int32 td_task_id; /* id, assigned by debugger                */
22600b57cec5SDimitry Andric   kmp_tasking_flags_t td_flags; /* task flags                              */
22610b57cec5SDimitry Andric   kmp_team_t *td_team; /* team for this task                      */
22620b57cec5SDimitry Andric   kmp_info_p *td_alloc_thread; /* thread that allocated data structures   */
22630b57cec5SDimitry Andric   /* Currently not used except for perhaps IDB */
22640b57cec5SDimitry Andric   kmp_taskdata_t *td_parent; /* parent task                             */
22650b57cec5SDimitry Andric   kmp_int32 td_level; /* task nesting level                      */
22660b57cec5SDimitry Andric   std::atomic<kmp_int32> td_untied_count; // untied task active parts counter
22670b57cec5SDimitry Andric   ident_t *td_ident; /* task identifier                         */
22680b57cec5SDimitry Andric   // Taskwait data.
22690b57cec5SDimitry Andric   ident_t *td_taskwait_ident;
22700b57cec5SDimitry Andric   kmp_uint32 td_taskwait_counter;
22710b57cec5SDimitry Andric   kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */
22720b57cec5SDimitry Andric   KMP_ALIGN_CACHE kmp_internal_control_t
22730b57cec5SDimitry Andric       td_icvs; /* Internal control variables for the task */
22740b57cec5SDimitry Andric   KMP_ALIGN_CACHE std::atomic<kmp_int32>
22750b57cec5SDimitry Andric       td_allocated_child_tasks; /* Child tasks (+ current task) not yet
22760b57cec5SDimitry Andric                                    deallocated */
22770b57cec5SDimitry Andric   std::atomic<kmp_int32>
22780b57cec5SDimitry Andric       td_incomplete_child_tasks; /* Child tasks not yet complete */
22790b57cec5SDimitry Andric   kmp_taskgroup_t
22800b57cec5SDimitry Andric       *td_taskgroup; // Each task keeps pointer to its current taskgroup
22810b57cec5SDimitry Andric   kmp_dephash_t
22820b57cec5SDimitry Andric       *td_dephash; // Dependencies for children tasks are tracked from here
22830b57cec5SDimitry Andric   kmp_depnode_t
22840b57cec5SDimitry Andric       *td_depnode; // Pointer to graph node if this task has dependencies
22850b57cec5SDimitry Andric   kmp_task_team_t *td_task_team;
22860b57cec5SDimitry Andric   kmp_int32 td_size_alloc; // The size of task structure, including shareds etc.
22870b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT)
22880b57cec5SDimitry Andric   // 4 or 8 byte integers for the loop bounds in GOMP_taskloop
22890b57cec5SDimitry Andric   kmp_int32 td_size_loop_bounds;
22900b57cec5SDimitry Andric #endif
22910b57cec5SDimitry Andric   kmp_taskdata_t *td_last_tied; // keep tied task for task scheduling constraint
22920b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT)
22930b57cec5SDimitry Andric   // GOMP sends in a copy function for copy constructors
22940b57cec5SDimitry Andric   void (*td_copy_func)(void *, void *);
22950b57cec5SDimitry Andric #endif
22960b57cec5SDimitry Andric   kmp_event_t td_allow_completion_event;
22970b57cec5SDimitry Andric #if OMPT_SUPPORT
22980b57cec5SDimitry Andric   ompt_task_info_t ompt_task_info;
22990b57cec5SDimitry Andric #endif
23000b57cec5SDimitry Andric }; // struct kmp_taskdata
23010b57cec5SDimitry Andric 
23020b57cec5SDimitry Andric // Make sure padding above worked
23030b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(kmp_taskdata_t) % sizeof(void *) == 0);
23040b57cec5SDimitry Andric 
23050b57cec5SDimitry Andric // Data for task team but per thread
23060b57cec5SDimitry Andric typedef struct kmp_base_thread_data {
23070b57cec5SDimitry Andric   kmp_info_p *td_thr; // Pointer back to thread info
23080b57cec5SDimitry Andric   // Used only in __kmp_execute_tasks_template, maybe not avail until task is
23090b57cec5SDimitry Andric   // queued?
23100b57cec5SDimitry Andric   kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque
23110b57cec5SDimitry Andric   kmp_taskdata_t *
23120b57cec5SDimitry Andric       *td_deque; // Deque of tasks encountered by td_thr, dynamically allocated
23130b57cec5SDimitry Andric   kmp_int32 td_deque_size; // Size of deck
23140b57cec5SDimitry Andric   kmp_uint32 td_deque_head; // Head of deque (will wrap)
23150b57cec5SDimitry Andric   kmp_uint32 td_deque_tail; // Tail of deque (will wrap)
23160b57cec5SDimitry Andric   kmp_int32 td_deque_ntasks; // Number of tasks in deque
23170b57cec5SDimitry Andric   // GEH: shouldn't this be volatile since used in while-spin?
23180b57cec5SDimitry Andric   kmp_int32 td_deque_last_stolen; // Thread number of last successful steal
23190b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK
23200b57cec5SDimitry Andric   kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task
23210b57cec5SDimitry Andric // scheduling constraint
23220b57cec5SDimitry Andric #endif // BUILD_TIED_TASK_STACK
23230b57cec5SDimitry Andric } kmp_base_thread_data_t;
23240b57cec5SDimitry Andric 
23250b57cec5SDimitry Andric #define TASK_DEQUE_BITS 8 // Used solely to define INITIAL_TASK_DEQUE_SIZE
23260b57cec5SDimitry Andric #define INITIAL_TASK_DEQUE_SIZE (1 << TASK_DEQUE_BITS)
23270b57cec5SDimitry Andric 
23280b57cec5SDimitry Andric #define TASK_DEQUE_SIZE(td) ((td).td_deque_size)
23290b57cec5SDimitry Andric #define TASK_DEQUE_MASK(td) ((td).td_deque_size - 1)
23300b57cec5SDimitry Andric 
23310b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_thread_data {
23320b57cec5SDimitry Andric   kmp_base_thread_data_t td;
23330b57cec5SDimitry Andric   double td_align; /* use worst case alignment */
23340b57cec5SDimitry Andric   char td_pad[KMP_PAD(kmp_base_thread_data_t, CACHE_LINE)];
23350b57cec5SDimitry Andric } kmp_thread_data_t;
23360b57cec5SDimitry Andric 
23370b57cec5SDimitry Andric // Data for task teams which are used when tasking is enabled for the team
23380b57cec5SDimitry Andric typedef struct kmp_base_task_team {
23390b57cec5SDimitry Andric   kmp_bootstrap_lock_t
23400b57cec5SDimitry Andric       tt_threads_lock; /* Lock used to allocate per-thread part of task team */
23410b57cec5SDimitry Andric   /* must be bootstrap lock since used at library shutdown*/
23420b57cec5SDimitry Andric   kmp_task_team_t *tt_next; /* For linking the task team free list */
23430b57cec5SDimitry Andric   kmp_thread_data_t
23440b57cec5SDimitry Andric       *tt_threads_data; /* Array of per-thread structures for task team */
23450b57cec5SDimitry Andric   /* Data survives task team deallocation */
23460b57cec5SDimitry Andric   kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while
23470b57cec5SDimitry Andric                                executing this team? */
23480b57cec5SDimitry Andric   /* TRUE means tt_threads_data is set up and initialized */
23490b57cec5SDimitry Andric   kmp_int32 tt_nproc; /* #threads in team           */
23500b57cec5SDimitry Andric   kmp_int32 tt_max_threads; // # entries allocated for threads_data array
23510b57cec5SDimitry Andric   kmp_int32 tt_found_proxy_tasks; // found proxy tasks since last barrier
23520b57cec5SDimitry Andric   kmp_int32 tt_untied_task_encountered;
23530b57cec5SDimitry Andric 
23540b57cec5SDimitry Andric   KMP_ALIGN_CACHE
23550b57cec5SDimitry Andric   std::atomic<kmp_int32> tt_unfinished_threads; /* #threads still active */
23560b57cec5SDimitry Andric 
23570b57cec5SDimitry Andric   KMP_ALIGN_CACHE
23580b57cec5SDimitry Andric   volatile kmp_uint32
23590b57cec5SDimitry Andric       tt_active; /* is the team still actively executing tasks */
23600b57cec5SDimitry Andric } kmp_base_task_team_t;
23610b57cec5SDimitry Andric 
23620b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_task_team {
23630b57cec5SDimitry Andric   kmp_base_task_team_t tt;
23640b57cec5SDimitry Andric   double tt_align; /* use worst case alignment */
23650b57cec5SDimitry Andric   char tt_pad[KMP_PAD(kmp_base_task_team_t, CACHE_LINE)];
23660b57cec5SDimitry Andric };
23670b57cec5SDimitry Andric 
23680b57cec5SDimitry Andric #if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5)
23690b57cec5SDimitry Andric // Free lists keep same-size free memory slots for fast memory allocation
23700b57cec5SDimitry Andric // routines
23710b57cec5SDimitry Andric typedef struct kmp_free_list {
23720b57cec5SDimitry Andric   void *th_free_list_self; // Self-allocated tasks free list
23730b57cec5SDimitry Andric   void *th_free_list_sync; // Self-allocated tasks stolen/returned by other
23740b57cec5SDimitry Andric   // threads
23750b57cec5SDimitry Andric   void *th_free_list_other; // Non-self free list (to be returned to owner's
23760b57cec5SDimitry Andric   // sync list)
23770b57cec5SDimitry Andric } kmp_free_list_t;
23780b57cec5SDimitry Andric #endif
23790b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS
23800b57cec5SDimitry Andric // Hot teams array keeps hot teams and their sizes for given thread. Hot teams
23810b57cec5SDimitry Andric // are not put in teams pool, and they don't put threads in threads pool.
23820b57cec5SDimitry Andric typedef struct kmp_hot_team_ptr {
23830b57cec5SDimitry Andric   kmp_team_p *hot_team; // pointer to hot_team of given nesting level
23840b57cec5SDimitry Andric   kmp_int32 hot_team_nth; // number of threads allocated for the hot_team
23850b57cec5SDimitry Andric } kmp_hot_team_ptr_t;
23860b57cec5SDimitry Andric #endif
23870b57cec5SDimitry Andric typedef struct kmp_teams_size {
23880b57cec5SDimitry Andric   kmp_int32 nteams; // number of teams in a league
23890b57cec5SDimitry Andric   kmp_int32 nth; // number of threads in each team of the league
23900b57cec5SDimitry Andric } kmp_teams_size_t;
23910b57cec5SDimitry Andric 
23920b57cec5SDimitry Andric // This struct stores a thread that acts as a "root" for a contention
23930b57cec5SDimitry Andric // group. Contention groups are rooted at kmp_root threads, but also at
23940b57cec5SDimitry Andric // each master thread of each team created in the teams construct.
23950b57cec5SDimitry Andric // This struct therefore also stores a thread_limit associated with
23960b57cec5SDimitry Andric // that contention group, and a counter to track the number of threads
23970b57cec5SDimitry Andric // active in that contention group. Each thread has a list of these: CG
23980b57cec5SDimitry Andric // root threads have an entry in their list in which cg_root refers to
23990b57cec5SDimitry Andric // the thread itself, whereas other workers in the CG will have a
24000b57cec5SDimitry Andric // single entry where cg_root is same as the entry containing their CG
24010b57cec5SDimitry Andric // root. When a thread encounters a teams construct, it will add a new
24020b57cec5SDimitry Andric // entry to the front of its list, because it now roots a new CG.
24030b57cec5SDimitry Andric typedef struct kmp_cg_root {
24040b57cec5SDimitry Andric   kmp_info_p *cg_root; // "root" thread for a contention group
24050b57cec5SDimitry Andric   // The CG root's limit comes from OMP_THREAD_LIMIT for root threads, or
24060b57cec5SDimitry Andric   // thread_limit clause for teams masters
24070b57cec5SDimitry Andric   kmp_int32 cg_thread_limit;
24080b57cec5SDimitry Andric   kmp_int32 cg_nthreads; // Count of active threads in CG rooted at cg_root
24090b57cec5SDimitry Andric   struct kmp_cg_root *up; // pointer to higher level CG root in list
24100b57cec5SDimitry Andric } kmp_cg_root_t;
24110b57cec5SDimitry Andric 
24120b57cec5SDimitry Andric // OpenMP thread data structures
24130b57cec5SDimitry Andric 
24140b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE kmp_base_info {
24150b57cec5SDimitry Andric   /* Start with the readonly data which is cache aligned and padded. This is
24160b57cec5SDimitry Andric      written before the thread starts working by the master. Uber masters may
24170b57cec5SDimitry Andric      update themselves later. Usage does not consider serialized regions.  */
24180b57cec5SDimitry Andric   kmp_desc_t th_info;
24190b57cec5SDimitry Andric   kmp_team_p *th_team; /* team we belong to */
24200b57cec5SDimitry Andric   kmp_root_p *th_root; /* pointer to root of task hierarchy */
24210b57cec5SDimitry Andric   kmp_info_p *th_next_pool; /* next available thread in the pool */
24220b57cec5SDimitry Andric   kmp_disp_t *th_dispatch; /* thread's dispatch data */
24230b57cec5SDimitry Andric   int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */
24240b57cec5SDimitry Andric 
24250b57cec5SDimitry Andric   /* The following are cached from the team info structure */
24260b57cec5SDimitry Andric   /* TODO use these in more places as determined to be needed via profiling */
24270b57cec5SDimitry Andric   int th_team_nproc; /* number of threads in a team */
24280b57cec5SDimitry Andric   kmp_info_p *th_team_master; /* the team's master thread */
24290b57cec5SDimitry Andric   int th_team_serialized; /* team is serialized */
24300b57cec5SDimitry Andric   microtask_t th_teams_microtask; /* save entry address for teams construct */
24310b57cec5SDimitry Andric   int th_teams_level; /* save initial level of teams construct */
24320b57cec5SDimitry Andric /* it is 0 on device but may be any on host */
24330b57cec5SDimitry Andric 
2434*5ffd83dbSDimitry Andric /* The blocktime info is copied from the team struct to the thread struct */
24350b57cec5SDimitry Andric /* at the start of a barrier, and the values stored in the team are used  */
24360b57cec5SDimitry Andric /* at points in the code where the team struct is no longer guaranteed    */
24370b57cec5SDimitry Andric /* to exist (from the POV of worker threads).                             */
24380b57cec5SDimitry Andric #if KMP_USE_MONITOR
24390b57cec5SDimitry Andric   int th_team_bt_intervals;
24400b57cec5SDimitry Andric   int th_team_bt_set;
24410b57cec5SDimitry Andric #else
24420b57cec5SDimitry Andric   kmp_uint64 th_team_bt_intervals;
24430b57cec5SDimitry Andric #endif
24440b57cec5SDimitry Andric 
24450b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
24460b57cec5SDimitry Andric   kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */
24470b57cec5SDimitry Andric #endif
24480b57cec5SDimitry Andric   omp_allocator_handle_t th_def_allocator; /* default allocator */
24490b57cec5SDimitry Andric   /* The data set by the master at reinit, then R/W by the worker */
24500b57cec5SDimitry Andric   KMP_ALIGN_CACHE int
24510b57cec5SDimitry Andric       th_set_nproc; /* if > 0, then only use this request for the next fork */
24520b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS
24530b57cec5SDimitry Andric   kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */
24540b57cec5SDimitry Andric #endif
24550b57cec5SDimitry Andric   kmp_proc_bind_t
24560b57cec5SDimitry Andric       th_set_proc_bind; /* if != proc_bind_default, use request for next fork */
24570b57cec5SDimitry Andric   kmp_teams_size_t
24580b57cec5SDimitry Andric       th_teams_size; /* number of teams/threads in teams construct */
24590b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
24600b57cec5SDimitry Andric   int th_current_place; /* place currently bound to */
24610b57cec5SDimitry Andric   int th_new_place; /* place to bind to in par reg */
24620b57cec5SDimitry Andric   int th_first_place; /* first place in partition */
24630b57cec5SDimitry Andric   int th_last_place; /* last place in partition */
24640b57cec5SDimitry Andric #endif
24650b57cec5SDimitry Andric   int th_prev_level; /* previous level for affinity format */
24660b57cec5SDimitry Andric   int th_prev_num_threads; /* previous num_threads for affinity format */
24670b57cec5SDimitry Andric #if USE_ITT_BUILD
24680b57cec5SDimitry Andric   kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */
24690b57cec5SDimitry Andric   kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */
24700b57cec5SDimitry Andric   kmp_uint64 th_frame_time; /* frame timestamp */
24710b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
24720b57cec5SDimitry Andric   kmp_local_t th_local;
24730b57cec5SDimitry Andric   struct private_common *th_pri_head;
24740b57cec5SDimitry Andric 
24750b57cec5SDimitry Andric   /* Now the data only used by the worker (after initial allocation) */
24760b57cec5SDimitry Andric   /* TODO the first serial team should actually be stored in the info_t
24770b57cec5SDimitry Andric      structure.  this will help reduce initial allocation overhead */
24780b57cec5SDimitry Andric   KMP_ALIGN_CACHE kmp_team_p
24790b57cec5SDimitry Andric       *th_serial_team; /*serialized team held in reserve*/
24800b57cec5SDimitry Andric 
24810b57cec5SDimitry Andric #if OMPT_SUPPORT
24820b57cec5SDimitry Andric   ompt_thread_info_t ompt_thread_info;
24830b57cec5SDimitry Andric #endif
24840b57cec5SDimitry Andric 
24850b57cec5SDimitry Andric   /* The following are also read by the master during reinit */
24860b57cec5SDimitry Andric   struct common_table *th_pri_common;
24870b57cec5SDimitry Andric 
24880b57cec5SDimitry Andric   volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */
24890b57cec5SDimitry Andric   /* while awaiting queuing lock acquire */
24900b57cec5SDimitry Andric 
24910b57cec5SDimitry Andric   volatile void *th_sleep_loc; // this points at a kmp_flag<T>
24920b57cec5SDimitry Andric 
24930b57cec5SDimitry Andric   ident_t *th_ident;
24940b57cec5SDimitry Andric   unsigned th_x; // Random number generator data
24950b57cec5SDimitry Andric   unsigned th_a; // Random number generator data
24960b57cec5SDimitry Andric 
24970b57cec5SDimitry Andric   /* Tasking-related data for the thread */
24980b57cec5SDimitry Andric   kmp_task_team_t *th_task_team; // Task team struct
24990b57cec5SDimitry Andric   kmp_taskdata_t *th_current_task; // Innermost Task being executed
25000b57cec5SDimitry Andric   kmp_uint8 th_task_state; // alternating 0/1 for task team identification
25010b57cec5SDimitry Andric   kmp_uint8 *th_task_state_memo_stack; // Stack holding memos of th_task_state
25020b57cec5SDimitry Andric   // at nested levels
25030b57cec5SDimitry Andric   kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack
25040b57cec5SDimitry Andric   kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack
25050b57cec5SDimitry Andric   kmp_uint32 th_reap_state; // Non-zero indicates thread is not
25060b57cec5SDimitry Andric   // tasking, thus safe to reap
25070b57cec5SDimitry Andric 
25080b57cec5SDimitry Andric   /* More stuff for keeping track of active/sleeping threads (this part is
25090b57cec5SDimitry Andric      written by the worker thread) */
25100b57cec5SDimitry Andric   kmp_uint8 th_active_in_pool; // included in count of #active threads in pool
25110b57cec5SDimitry Andric   int th_active; // ! sleeping; 32 bits for TCR/TCW
25120b57cec5SDimitry Andric   struct cons_header *th_cons; // used for consistency check
25130b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
25140b57cec5SDimitry Andric   // used for hierarchical scheduling
25150b57cec5SDimitry Andric   kmp_hier_private_bdata_t *th_hier_bar_data;
25160b57cec5SDimitry Andric #endif
25170b57cec5SDimitry Andric 
25180b57cec5SDimitry Andric   /* Add the syncronizing data which is cache aligned and padded. */
25190b57cec5SDimitry Andric   KMP_ALIGN_CACHE kmp_balign_t th_bar[bs_last_barrier];
25200b57cec5SDimitry Andric 
25210b57cec5SDimitry Andric   KMP_ALIGN_CACHE volatile kmp_int32
25220b57cec5SDimitry Andric       th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */
25230b57cec5SDimitry Andric 
25240b57cec5SDimitry Andric #if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5)
25250b57cec5SDimitry Andric #define NUM_LISTS 4
25260b57cec5SDimitry Andric   kmp_free_list_t th_free_lists[NUM_LISTS]; // Free lists for fast memory
25270b57cec5SDimitry Andric // allocation routines
25280b57cec5SDimitry Andric #endif
25290b57cec5SDimitry Andric 
25300b57cec5SDimitry Andric #if KMP_OS_WINDOWS
25310b57cec5SDimitry Andric   kmp_win32_cond_t th_suspend_cv;
25320b57cec5SDimitry Andric   kmp_win32_mutex_t th_suspend_mx;
25330b57cec5SDimitry Andric   std::atomic<int> th_suspend_init;
25340b57cec5SDimitry Andric #endif
25350b57cec5SDimitry Andric #if KMP_OS_UNIX
25360b57cec5SDimitry Andric   kmp_cond_align_t th_suspend_cv;
25370b57cec5SDimitry Andric   kmp_mutex_align_t th_suspend_mx;
25380b57cec5SDimitry Andric   std::atomic<int> th_suspend_init_count;
25390b57cec5SDimitry Andric #endif
25400b57cec5SDimitry Andric 
25410b57cec5SDimitry Andric #if USE_ITT_BUILD
25420b57cec5SDimitry Andric   kmp_itt_mark_t th_itt_mark_single;
25430b57cec5SDimitry Andric // alignment ???
25440b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
25450b57cec5SDimitry Andric #if KMP_STATS_ENABLED
25460b57cec5SDimitry Andric   kmp_stats_list *th_stats;
25470b57cec5SDimitry Andric #endif
25480b57cec5SDimitry Andric #if KMP_OS_UNIX
25490b57cec5SDimitry Andric   std::atomic<bool> th_blocking;
25500b57cec5SDimitry Andric #endif
25510b57cec5SDimitry Andric   kmp_cg_root_t *th_cg_roots; // list of cg_roots associated with this thread
25520b57cec5SDimitry Andric } kmp_base_info_t;
25530b57cec5SDimitry Andric 
25540b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_info {
25550b57cec5SDimitry Andric   double th_align; /* use worst case alignment */
25560b57cec5SDimitry Andric   char th_pad[KMP_PAD(kmp_base_info_t, CACHE_LINE)];
25570b57cec5SDimitry Andric   kmp_base_info_t th;
25580b57cec5SDimitry Andric } kmp_info_t;
25590b57cec5SDimitry Andric 
25600b57cec5SDimitry Andric // OpenMP thread team data structures
25610b57cec5SDimitry Andric 
25620b57cec5SDimitry Andric typedef struct kmp_base_data { volatile kmp_uint32 t_value; } kmp_base_data_t;
25630b57cec5SDimitry Andric 
25640b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_sleep_team {
25650b57cec5SDimitry Andric   double dt_align; /* use worst case alignment */
25660b57cec5SDimitry Andric   char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
25670b57cec5SDimitry Andric   kmp_base_data_t dt;
25680b57cec5SDimitry Andric } kmp_sleep_team_t;
25690b57cec5SDimitry Andric 
25700b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_ordered_team {
25710b57cec5SDimitry Andric   double dt_align; /* use worst case alignment */
25720b57cec5SDimitry Andric   char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
25730b57cec5SDimitry Andric   kmp_base_data_t dt;
25740b57cec5SDimitry Andric } kmp_ordered_team_t;
25750b57cec5SDimitry Andric 
25760b57cec5SDimitry Andric typedef int (*launch_t)(int gtid);
25770b57cec5SDimitry Andric 
25780b57cec5SDimitry Andric /* Minimum number of ARGV entries to malloc if necessary */
25790b57cec5SDimitry Andric #define KMP_MIN_MALLOC_ARGV_ENTRIES 100
25800b57cec5SDimitry Andric 
25810b57cec5SDimitry Andric // Set up how many argv pointers will fit in cache lines containing
25820b57cec5SDimitry Andric // t_inline_argv. Historically, we have supported at least 96 bytes. Using a
25830b57cec5SDimitry Andric // larger value for more space between the master write/worker read section and
25840b57cec5SDimitry Andric // read/write by all section seems to buy more performance on EPCC PARALLEL.
25850b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
25860b57cec5SDimitry Andric #define KMP_INLINE_ARGV_BYTES                                                  \
25870b57cec5SDimitry Andric   (4 * CACHE_LINE -                                                            \
25880b57cec5SDimitry Andric    ((3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) +               \
25890b57cec5SDimitry Andric      sizeof(kmp_int16) + sizeof(kmp_uint32)) %                                 \
25900b57cec5SDimitry Andric     CACHE_LINE))
25910b57cec5SDimitry Andric #else
25920b57cec5SDimitry Andric #define KMP_INLINE_ARGV_BYTES                                                  \
25930b57cec5SDimitry Andric   (2 * CACHE_LINE - ((3 * KMP_PTR_SKIP + 2 * sizeof(int)) % CACHE_LINE))
25940b57cec5SDimitry Andric #endif
25950b57cec5SDimitry Andric #define KMP_INLINE_ARGV_ENTRIES (int)(KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP)
25960b57cec5SDimitry Andric 
25970b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE kmp_base_team {
25980b57cec5SDimitry Andric   // Synchronization Data
25990b57cec5SDimitry Andric   // ---------------------------------------------------------------------------
26000b57cec5SDimitry Andric   KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered;
26010b57cec5SDimitry Andric   kmp_balign_team_t t_bar[bs_last_barrier];
26020b57cec5SDimitry Andric   std::atomic<int> t_construct; // count of single directive encountered by team
26030b57cec5SDimitry Andric   char pad[sizeof(kmp_lock_t)]; // padding to maintain performance on big iron
26040b57cec5SDimitry Andric 
26050b57cec5SDimitry Andric   // [0] - parallel / [1] - worksharing task reduction data shared by taskgroups
26060b57cec5SDimitry Andric   std::atomic<void *> t_tg_reduce_data[2]; // to support task modifier
26070b57cec5SDimitry Andric   std::atomic<int> t_tg_fini_counter[2]; // sync end of task reductions
26080b57cec5SDimitry Andric 
26090b57cec5SDimitry Andric   // Master only
26100b57cec5SDimitry Andric   // ---------------------------------------------------------------------------
26110b57cec5SDimitry Andric   KMP_ALIGN_CACHE int t_master_tid; // tid of master in parent team
26120b57cec5SDimitry Andric   int t_master_this_cons; // "this_construct" single counter of master in parent
26130b57cec5SDimitry Andric   // team
26140b57cec5SDimitry Andric   ident_t *t_ident; // if volatile, have to change too much other crud to
26150b57cec5SDimitry Andric   // volatile too
26160b57cec5SDimitry Andric   kmp_team_p *t_parent; // parent team
26170b57cec5SDimitry Andric   kmp_team_p *t_next_pool; // next free team in the team pool
26180b57cec5SDimitry Andric   kmp_disp_t *t_dispatch; // thread's dispatch data
26190b57cec5SDimitry Andric   kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2
26200b57cec5SDimitry Andric   kmp_proc_bind_t t_proc_bind; // bind type for par region
26210b57cec5SDimitry Andric #if USE_ITT_BUILD
26220b57cec5SDimitry Andric   kmp_uint64 t_region_time; // region begin timestamp
26230b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
26240b57cec5SDimitry Andric 
26250b57cec5SDimitry Andric   // Master write, workers read
26260b57cec5SDimitry Andric   // --------------------------------------------------------------------------
26270b57cec5SDimitry Andric   KMP_ALIGN_CACHE void **t_argv;
26280b57cec5SDimitry Andric   int t_argc;
26290b57cec5SDimitry Andric   int t_nproc; // number of threads in team
26300b57cec5SDimitry Andric   microtask_t t_pkfn;
26310b57cec5SDimitry Andric   launch_t t_invoke; // procedure to launch the microtask
26320b57cec5SDimitry Andric 
26330b57cec5SDimitry Andric #if OMPT_SUPPORT
26340b57cec5SDimitry Andric   ompt_team_info_t ompt_team_info;
26350b57cec5SDimitry Andric   ompt_lw_taskteam_t *ompt_serialized_team_info;
26360b57cec5SDimitry Andric #endif
26370b57cec5SDimitry Andric 
26380b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
26390b57cec5SDimitry Andric   kmp_int8 t_fp_control_saved;
26400b57cec5SDimitry Andric   kmp_int8 t_pad2b;
26410b57cec5SDimitry Andric   kmp_int16 t_x87_fpu_control_word; // FP control regs
26420b57cec5SDimitry Andric   kmp_uint32 t_mxcsr;
26430b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
26440b57cec5SDimitry Andric 
26450b57cec5SDimitry Andric   void *t_inline_argv[KMP_INLINE_ARGV_ENTRIES];
26460b57cec5SDimitry Andric 
26470b57cec5SDimitry Andric   KMP_ALIGN_CACHE kmp_info_t **t_threads;
26480b57cec5SDimitry Andric   kmp_taskdata_t
26490b57cec5SDimitry Andric       *t_implicit_task_taskdata; // Taskdata for the thread's implicit task
26500b57cec5SDimitry Andric   int t_level; // nested parallel level
26510b57cec5SDimitry Andric 
26520b57cec5SDimitry Andric   KMP_ALIGN_CACHE int t_max_argc;
2653480093f4SDimitry Andric   int t_max_nproc; // max threads this team can handle (dynamically expandable)
26540b57cec5SDimitry Andric   int t_serialized; // levels deep of serialized teams
26550b57cec5SDimitry Andric   dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system
26560b57cec5SDimitry Andric   int t_id; // team's id, assigned by debugger.
26570b57cec5SDimitry Andric   int t_active_level; // nested active parallel level
26580b57cec5SDimitry Andric   kmp_r_sched_t t_sched; // run-time schedule for the team
26590b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
26600b57cec5SDimitry Andric   int t_first_place; // first & last place in parent thread's partition.
26610b57cec5SDimitry Andric   int t_last_place; // Restore these values to master after par region.
26620b57cec5SDimitry Andric #endif // KMP_AFFINITY_SUPPORTED
26630b57cec5SDimitry Andric   int t_display_affinity;
26640b57cec5SDimitry Andric   int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via
26650b57cec5SDimitry Andric   // omp_set_num_threads() call
26660b57cec5SDimitry Andric   omp_allocator_handle_t t_def_allocator; /* default allocator */
26670b57cec5SDimitry Andric 
26680b57cec5SDimitry Andric // Read/write by workers as well
26690b57cec5SDimitry Andric #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
26700b57cec5SDimitry Andric   // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf
26710b57cec5SDimitry Andric   // regression of epcc 'parallel' and 'barrier' on fxe256lin01. This extra
26720b57cec5SDimitry Andric   // padding serves to fix the performance of epcc 'parallel' and 'barrier' when
26730b57cec5SDimitry Andric   // CACHE_LINE=64. TODO: investigate more and get rid if this padding.
26740b57cec5SDimitry Andric   char dummy_padding[1024];
26750b57cec5SDimitry Andric #endif
26760b57cec5SDimitry Andric   // Internal control stack for additional nested teams.
26770b57cec5SDimitry Andric   KMP_ALIGN_CACHE kmp_internal_control_t *t_control_stack_top;
26780b57cec5SDimitry Andric   // for SERIALIZED teams nested 2 or more levels deep
26790b57cec5SDimitry Andric   // typed flag to store request state of cancellation
26800b57cec5SDimitry Andric   std::atomic<kmp_int32> t_cancel_request;
26810b57cec5SDimitry Andric   int t_master_active; // save on fork, restore on join
26820b57cec5SDimitry Andric   void *t_copypriv_data; // team specific pointer to copyprivate data array
26830b57cec5SDimitry Andric #if KMP_OS_WINDOWS
26840b57cec5SDimitry Andric   std::atomic<kmp_uint32> t_copyin_counter;
26850b57cec5SDimitry Andric #endif
26860b57cec5SDimitry Andric #if USE_ITT_BUILD
26870b57cec5SDimitry Andric   void *t_stack_id; // team specific stack stitching id (for ittnotify)
26880b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
26890b57cec5SDimitry Andric } kmp_base_team_t;
26900b57cec5SDimitry Andric 
26910b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_team {
26920b57cec5SDimitry Andric   kmp_base_team_t t;
26930b57cec5SDimitry Andric   double t_align; /* use worst case alignment */
26940b57cec5SDimitry Andric   char t_pad[KMP_PAD(kmp_base_team_t, CACHE_LINE)];
26950b57cec5SDimitry Andric };
26960b57cec5SDimitry Andric 
26970b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_time_global {
26980b57cec5SDimitry Andric   double dt_align; /* use worst case alignment */
26990b57cec5SDimitry Andric   char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
27000b57cec5SDimitry Andric   kmp_base_data_t dt;
27010b57cec5SDimitry Andric } kmp_time_global_t;
27020b57cec5SDimitry Andric 
27030b57cec5SDimitry Andric typedef struct kmp_base_global {
27040b57cec5SDimitry Andric   /* cache-aligned */
27050b57cec5SDimitry Andric   kmp_time_global_t g_time;
27060b57cec5SDimitry Andric 
27070b57cec5SDimitry Andric   /* non cache-aligned */
27080b57cec5SDimitry Andric   volatile int g_abort;
27090b57cec5SDimitry Andric   volatile int g_done;
27100b57cec5SDimitry Andric 
27110b57cec5SDimitry Andric   int g_dynamic;
27120b57cec5SDimitry Andric   enum dynamic_mode g_dynamic_mode;
27130b57cec5SDimitry Andric } kmp_base_global_t;
27140b57cec5SDimitry Andric 
27150b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_global {
27160b57cec5SDimitry Andric   kmp_base_global_t g;
27170b57cec5SDimitry Andric   double g_align; /* use worst case alignment */
27180b57cec5SDimitry Andric   char g_pad[KMP_PAD(kmp_base_global_t, CACHE_LINE)];
27190b57cec5SDimitry Andric } kmp_global_t;
27200b57cec5SDimitry Andric 
27210b57cec5SDimitry Andric typedef struct kmp_base_root {
27220b57cec5SDimitry Andric   // TODO: GEH - combine r_active with r_in_parallel then r_active ==
27230b57cec5SDimitry Andric   // (r_in_parallel>= 0)
27240b57cec5SDimitry Andric   // TODO: GEH - then replace r_active with t_active_levels if we can to reduce
27250b57cec5SDimitry Andric   // the synch overhead or keeping r_active
27260b57cec5SDimitry Andric   volatile int r_active; /* TRUE if some region in a nest has > 1 thread */
27270b57cec5SDimitry Andric   // keeps a count of active parallel regions per root
27280b57cec5SDimitry Andric   std::atomic<int> r_in_parallel;
27290b57cec5SDimitry Andric   // GEH: This is misnamed, should be r_active_levels
27300b57cec5SDimitry Andric   kmp_team_t *r_root_team;
27310b57cec5SDimitry Andric   kmp_team_t *r_hot_team;
27320b57cec5SDimitry Andric   kmp_info_t *r_uber_thread;
27330b57cec5SDimitry Andric   kmp_lock_t r_begin_lock;
27340b57cec5SDimitry Andric   volatile int r_begin;
27350b57cec5SDimitry Andric   int r_blocktime; /* blocktime for this root and descendants */
27360b57cec5SDimitry Andric } kmp_base_root_t;
27370b57cec5SDimitry Andric 
27380b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_root {
27390b57cec5SDimitry Andric   kmp_base_root_t r;
27400b57cec5SDimitry Andric   double r_align; /* use worst case alignment */
27410b57cec5SDimitry Andric   char r_pad[KMP_PAD(kmp_base_root_t, CACHE_LINE)];
27420b57cec5SDimitry Andric } kmp_root_t;
27430b57cec5SDimitry Andric 
27440b57cec5SDimitry Andric struct fortran_inx_info {
27450b57cec5SDimitry Andric   kmp_int32 data;
27460b57cec5SDimitry Andric };
27470b57cec5SDimitry Andric 
27480b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
27490b57cec5SDimitry Andric 
27500b57cec5SDimitry Andric extern int __kmp_settings;
27510b57cec5SDimitry Andric extern int __kmp_duplicate_library_ok;
27520b57cec5SDimitry Andric #if USE_ITT_BUILD
27530b57cec5SDimitry Andric extern int __kmp_forkjoin_frames;
27540b57cec5SDimitry Andric extern int __kmp_forkjoin_frames_mode;
27550b57cec5SDimitry Andric #endif
27560b57cec5SDimitry Andric extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method;
27570b57cec5SDimitry Andric extern int __kmp_determ_red;
27580b57cec5SDimitry Andric 
27590b57cec5SDimitry Andric #ifdef KMP_DEBUG
27600b57cec5SDimitry Andric extern int kmp_a_debug;
27610b57cec5SDimitry Andric extern int kmp_b_debug;
27620b57cec5SDimitry Andric extern int kmp_c_debug;
27630b57cec5SDimitry Andric extern int kmp_d_debug;
27640b57cec5SDimitry Andric extern int kmp_e_debug;
27650b57cec5SDimitry Andric extern int kmp_f_debug;
27660b57cec5SDimitry Andric #endif /* KMP_DEBUG */
27670b57cec5SDimitry Andric 
27680b57cec5SDimitry Andric /* For debug information logging using rotating buffer */
27690b57cec5SDimitry Andric #define KMP_DEBUG_BUF_LINES_INIT 512
27700b57cec5SDimitry Andric #define KMP_DEBUG_BUF_LINES_MIN 1
27710b57cec5SDimitry Andric 
27720b57cec5SDimitry Andric #define KMP_DEBUG_BUF_CHARS_INIT 128
27730b57cec5SDimitry Andric #define KMP_DEBUG_BUF_CHARS_MIN 2
27740b57cec5SDimitry Andric 
27750b57cec5SDimitry Andric extern int
27760b57cec5SDimitry Andric     __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */
27770b57cec5SDimitry Andric extern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */
27780b57cec5SDimitry Andric extern int
27790b57cec5SDimitry Andric     __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */
27800b57cec5SDimitry Andric extern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer
27810b57cec5SDimitry Andric                                       entry pointer */
27820b57cec5SDimitry Andric 
27830b57cec5SDimitry Andric extern char *__kmp_debug_buffer; /* Debug buffer itself */
27840b57cec5SDimitry Andric extern std::atomic<int> __kmp_debug_count; /* Counter for number of lines
27850b57cec5SDimitry Andric                                               printed in buffer so far */
27860b57cec5SDimitry Andric extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase
27870b57cec5SDimitry Andric                                           recommended in warnings */
27880b57cec5SDimitry Andric /* end rotating debug buffer */
27890b57cec5SDimitry Andric 
27900b57cec5SDimitry Andric #ifdef KMP_DEBUG
27910b57cec5SDimitry Andric extern int __kmp_par_range; /* +1 => only go par for constructs in range */
27920b57cec5SDimitry Andric 
27930b57cec5SDimitry Andric #define KMP_PAR_RANGE_ROUTINE_LEN 1024
27940b57cec5SDimitry Andric extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN];
27950b57cec5SDimitry Andric #define KMP_PAR_RANGE_FILENAME_LEN 1024
27960b57cec5SDimitry Andric extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN];
27970b57cec5SDimitry Andric extern int __kmp_par_range_lb;
27980b57cec5SDimitry Andric extern int __kmp_par_range_ub;
27990b57cec5SDimitry Andric #endif
28000b57cec5SDimitry Andric 
28010b57cec5SDimitry Andric /* For printing out dynamic storage map for threads and teams */
28020b57cec5SDimitry Andric extern int
28030b57cec5SDimitry Andric     __kmp_storage_map; /* True means print storage map for threads and teams */
28040b57cec5SDimitry Andric extern int __kmp_storage_map_verbose; /* True means storage map includes
28050b57cec5SDimitry Andric                                          placement info */
28060b57cec5SDimitry Andric extern int __kmp_storage_map_verbose_specified;
28070b57cec5SDimitry Andric 
28080b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
28090b57cec5SDimitry Andric extern kmp_cpuinfo_t __kmp_cpuinfo;
28100b57cec5SDimitry Andric #endif
28110b57cec5SDimitry Andric 
28120b57cec5SDimitry Andric extern volatile int __kmp_init_serial;
28130b57cec5SDimitry Andric extern volatile int __kmp_init_gtid;
28140b57cec5SDimitry Andric extern volatile int __kmp_init_common;
28150b57cec5SDimitry Andric extern volatile int __kmp_init_middle;
28160b57cec5SDimitry Andric extern volatile int __kmp_init_parallel;
28170b57cec5SDimitry Andric #if KMP_USE_MONITOR
28180b57cec5SDimitry Andric extern volatile int __kmp_init_monitor;
28190b57cec5SDimitry Andric #endif
28200b57cec5SDimitry Andric extern volatile int __kmp_init_user_locks;
28210b57cec5SDimitry Andric extern int __kmp_init_counter;
28220b57cec5SDimitry Andric extern int __kmp_root_counter;
28230b57cec5SDimitry Andric extern int __kmp_version;
28240b57cec5SDimitry Andric 
28250b57cec5SDimitry Andric /* list of address of allocated caches for commons */
28260b57cec5SDimitry Andric extern kmp_cached_addr_t *__kmp_threadpriv_cache_list;
28270b57cec5SDimitry Andric 
28280b57cec5SDimitry Andric /* Barrier algorithm types and options */
28290b57cec5SDimitry Andric extern kmp_uint32 __kmp_barrier_gather_bb_dflt;
28300b57cec5SDimitry Andric extern kmp_uint32 __kmp_barrier_release_bb_dflt;
28310b57cec5SDimitry Andric extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt;
28320b57cec5SDimitry Andric extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt;
28330b57cec5SDimitry Andric extern kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier];
28340b57cec5SDimitry Andric extern kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier];
28350b57cec5SDimitry Andric extern kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier];
28360b57cec5SDimitry Andric extern kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier];
28370b57cec5SDimitry Andric extern char const *__kmp_barrier_branch_bit_env_name[bs_last_barrier];
28380b57cec5SDimitry Andric extern char const *__kmp_barrier_pattern_env_name[bs_last_barrier];
28390b57cec5SDimitry Andric extern char const *__kmp_barrier_type_name[bs_last_barrier];
28400b57cec5SDimitry Andric extern char const *__kmp_barrier_pattern_name[bp_last_bar];
28410b57cec5SDimitry Andric 
28420b57cec5SDimitry Andric /* Global Locks */
28430b57cec5SDimitry Andric extern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */
28440b57cec5SDimitry Andric extern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */
28450b57cec5SDimitry Andric extern kmp_bootstrap_lock_t __kmp_task_team_lock;
28460b57cec5SDimitry Andric extern kmp_bootstrap_lock_t
28470b57cec5SDimitry Andric     __kmp_exit_lock; /* exit() is not always thread-safe */
28480b57cec5SDimitry Andric #if KMP_USE_MONITOR
28490b57cec5SDimitry Andric extern kmp_bootstrap_lock_t
28500b57cec5SDimitry Andric     __kmp_monitor_lock; /* control monitor thread creation */
28510b57cec5SDimitry Andric #endif
28520b57cec5SDimitry Andric extern kmp_bootstrap_lock_t
28530b57cec5SDimitry Andric     __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and
28540b57cec5SDimitry Andric                              __kmp_threads expansion to co-exist */
28550b57cec5SDimitry Andric 
28560b57cec5SDimitry Andric extern kmp_lock_t __kmp_global_lock; /* control OS/global access  */
28570b57cec5SDimitry Andric extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access  */
28580b57cec5SDimitry Andric extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */
28590b57cec5SDimitry Andric 
28600b57cec5SDimitry Andric extern enum library_type __kmp_library;
28610b57cec5SDimitry Andric 
28620b57cec5SDimitry Andric extern enum sched_type __kmp_sched; /* default runtime scheduling */
28630b57cec5SDimitry Andric extern enum sched_type __kmp_static; /* default static scheduling method */
28640b57cec5SDimitry Andric extern enum sched_type __kmp_guided; /* default guided scheduling method */
28650b57cec5SDimitry Andric extern enum sched_type __kmp_auto; /* default auto scheduling method */
28660b57cec5SDimitry Andric extern int __kmp_chunk; /* default runtime chunk size */
28670b57cec5SDimitry Andric 
28680b57cec5SDimitry Andric extern size_t __kmp_stksize; /* stack size per thread         */
28690b57cec5SDimitry Andric #if KMP_USE_MONITOR
28700b57cec5SDimitry Andric extern size_t __kmp_monitor_stksize; /* stack size for monitor thread */
28710b57cec5SDimitry Andric #endif
28720b57cec5SDimitry Andric extern size_t __kmp_stkoffset; /* stack offset per thread       */
28730b57cec5SDimitry Andric extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */
28740b57cec5SDimitry Andric 
28750b57cec5SDimitry Andric extern size_t
28760b57cec5SDimitry Andric     __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */
28770b57cec5SDimitry Andric extern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */
28780b57cec5SDimitry Andric extern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */
28790b57cec5SDimitry Andric extern int __kmp_env_checks; /* was KMP_CHECKS specified?    */
28800b57cec5SDimitry Andric extern int __kmp_env_consistency_check; // was KMP_CONSISTENCY_CHECK specified?
28810b57cec5SDimitry Andric extern int __kmp_generate_warnings; /* should we issue warnings? */
28820b57cec5SDimitry Andric extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */
28830b57cec5SDimitry Andric 
28840b57cec5SDimitry Andric #ifdef DEBUG_SUSPEND
28850b57cec5SDimitry Andric extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */
28860b57cec5SDimitry Andric #endif
28870b57cec5SDimitry Andric 
28880b57cec5SDimitry Andric extern kmp_int32 __kmp_use_yield;
28890b57cec5SDimitry Andric extern kmp_int32 __kmp_use_yield_exp_set;
28900b57cec5SDimitry Andric extern kmp_uint32 __kmp_yield_init;
28910b57cec5SDimitry Andric extern kmp_uint32 __kmp_yield_next;
28920b57cec5SDimitry Andric 
28930b57cec5SDimitry Andric /* ------------------------------------------------------------------------- */
28940b57cec5SDimitry Andric extern int __kmp_allThreadsSpecified;
28950b57cec5SDimitry Andric 
28960b57cec5SDimitry Andric extern size_t __kmp_align_alloc;
28970b57cec5SDimitry Andric /* following data protected by initialization routines */
28980b57cec5SDimitry Andric extern int __kmp_xproc; /* number of processors in the system */
28990b57cec5SDimitry Andric extern int __kmp_avail_proc; /* number of processors available to the process */
29000b57cec5SDimitry Andric extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */
29010b57cec5SDimitry Andric extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */
29020b57cec5SDimitry Andric // maximum total number of concurrently-existing threads on device
29030b57cec5SDimitry Andric extern int __kmp_max_nth;
29040b57cec5SDimitry Andric // maximum total number of concurrently-existing threads in a contention group
29050b57cec5SDimitry Andric extern int __kmp_cg_max_nth;
29060b57cec5SDimitry Andric extern int __kmp_teams_max_nth; // max threads used in a teams construct
29070b57cec5SDimitry Andric extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and
29080b57cec5SDimitry Andric                                       __kmp_root */
29090b57cec5SDimitry Andric extern int __kmp_dflt_team_nth; /* default number of threads in a parallel
29100b57cec5SDimitry Andric                                    region a la OMP_NUM_THREADS */
29110b57cec5SDimitry Andric extern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial
29120b57cec5SDimitry Andric                                       initialization */
29130b57cec5SDimitry Andric extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is
29140b57cec5SDimitry Andric                                  used (fixed) */
29150b57cec5SDimitry Andric extern int __kmp_tp_cached; /* whether threadprivate cache has been created
29160b57cec5SDimitry Andric                                (__kmpc_threadprivate_cached()) */
29170b57cec5SDimitry Andric extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before
29180b57cec5SDimitry Andric                                     blocking (env setting) */
29190b57cec5SDimitry Andric #if KMP_USE_MONITOR
29200b57cec5SDimitry Andric extern int
29210b57cec5SDimitry Andric     __kmp_monitor_wakeups; /* number of times monitor wakes up per second */
29220b57cec5SDimitry Andric extern int __kmp_bt_intervals; /* number of monitor timestamp intervals before
29230b57cec5SDimitry Andric                                   blocking */
29240b57cec5SDimitry Andric #endif
29250b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME
29260b57cec5SDimitry Andric extern int __kmp_zero_bt; /* whether blocktime has been forced to zero */
29270b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */
29280b57cec5SDimitry Andric #ifdef KMP_DFLT_NTH_CORES
29290b57cec5SDimitry Andric extern int __kmp_ncores; /* Total number of cores for threads placement */
29300b57cec5SDimitry Andric #endif
29310b57cec5SDimitry Andric /* Number of millisecs to delay on abort for Intel(R) VTune(TM) tools */
29320b57cec5SDimitry Andric extern int __kmp_abort_delay;
29330b57cec5SDimitry Andric 
29340b57cec5SDimitry Andric extern int __kmp_need_register_atfork_specified;
29350b57cec5SDimitry Andric extern int
29360b57cec5SDimitry Andric     __kmp_need_register_atfork; /* At initialization, call pthread_atfork to
29370b57cec5SDimitry Andric                                    install fork handler */
29380b57cec5SDimitry Andric extern int __kmp_gtid_mode; /* Method of getting gtid, values:
29390b57cec5SDimitry Andric                                0 - not set, will be set at runtime
29400b57cec5SDimitry Andric                                1 - using stack search
29410b57cec5SDimitry Andric                                2 - dynamic TLS (pthread_getspecific(Linux* OS/OS
29420b57cec5SDimitry Andric                                    X*) or TlsGetValue(Windows* OS))
29430b57cec5SDimitry Andric                                3 - static TLS (__declspec(thread) __kmp_gtid),
29440b57cec5SDimitry Andric                                    Linux* OS .so only.  */
29450b57cec5SDimitry Andric extern int
29460b57cec5SDimitry Andric     __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */
29470b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID
29480b57cec5SDimitry Andric extern KMP_THREAD_LOCAL int __kmp_gtid;
29490b57cec5SDimitry Andric #endif
29500b57cec5SDimitry Andric extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */
29510b57cec5SDimitry Andric extern int __kmp_foreign_tp; // If true, separate TP var for each foreign thread
29520b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
29530b57cec5SDimitry Andric extern int __kmp_inherit_fp_control; // copy fp creg(s) parent->workers at fork
29540b57cec5SDimitry Andric extern kmp_int16 __kmp_init_x87_fpu_control_word; // init thread's FP ctrl reg
29550b57cec5SDimitry Andric extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */
29560b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
29570b57cec5SDimitry Andric 
29580b57cec5SDimitry Andric // max_active_levels for nested parallelism enabled by default via
29590b57cec5SDimitry Andric // OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS, and OMP_PROC_BIND
29600b57cec5SDimitry Andric extern int __kmp_dflt_max_active_levels;
29610b57cec5SDimitry Andric // Indicates whether value of __kmp_dflt_max_active_levels was already
29620b57cec5SDimitry Andric // explicitly set by OMP_MAX_ACTIVE_LEVELS or OMP_NESTED=false
29630b57cec5SDimitry Andric extern bool __kmp_dflt_max_active_levels_set;
29640b57cec5SDimitry Andric extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in
29650b57cec5SDimitry Andric                                           concurrent execution per team */
29660b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS
29670b57cec5SDimitry Andric extern int __kmp_hot_teams_mode;
29680b57cec5SDimitry Andric extern int __kmp_hot_teams_max_level;
29690b57cec5SDimitry Andric #endif
29700b57cec5SDimitry Andric 
29710b57cec5SDimitry Andric #if KMP_OS_LINUX
29720b57cec5SDimitry Andric extern enum clock_function_type __kmp_clock_function;
29730b57cec5SDimitry Andric extern int __kmp_clock_function_param;
29740b57cec5SDimitry Andric #endif /* KMP_OS_LINUX */
29750b57cec5SDimitry Andric 
29760b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED
29770b57cec5SDimitry Andric extern enum mic_type __kmp_mic_type;
29780b57cec5SDimitry Andric #endif
29790b57cec5SDimitry Andric 
29800b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE
29810b57cec5SDimitry Andric extern double __kmp_load_balance_interval; // load balance algorithm interval
29820b57cec5SDimitry Andric #endif /* USE_LOAD_BALANCE */
29830b57cec5SDimitry Andric 
29840b57cec5SDimitry Andric // OpenMP 3.1 - Nested num threads array
29850b57cec5SDimitry Andric typedef struct kmp_nested_nthreads_t {
29860b57cec5SDimitry Andric   int *nth;
29870b57cec5SDimitry Andric   int size;
29880b57cec5SDimitry Andric   int used;
29890b57cec5SDimitry Andric } kmp_nested_nthreads_t;
29900b57cec5SDimitry Andric 
29910b57cec5SDimitry Andric extern kmp_nested_nthreads_t __kmp_nested_nth;
29920b57cec5SDimitry Andric 
29930b57cec5SDimitry Andric #if KMP_USE_ADAPTIVE_LOCKS
29940b57cec5SDimitry Andric 
29950b57cec5SDimitry Andric // Parameters for the speculative lock backoff system.
29960b57cec5SDimitry Andric struct kmp_adaptive_backoff_params_t {
29970b57cec5SDimitry Andric   // Number of soft retries before it counts as a hard retry.
29980b57cec5SDimitry Andric   kmp_uint32 max_soft_retries;
29990b57cec5SDimitry Andric   // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to
30000b57cec5SDimitry Andric   // the right
30010b57cec5SDimitry Andric   kmp_uint32 max_badness;
30020b57cec5SDimitry Andric };
30030b57cec5SDimitry Andric 
30040b57cec5SDimitry Andric extern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params;
30050b57cec5SDimitry Andric 
30060b57cec5SDimitry Andric #if KMP_DEBUG_ADAPTIVE_LOCKS
30070b57cec5SDimitry Andric extern const char *__kmp_speculative_statsfile;
30080b57cec5SDimitry Andric #endif
30090b57cec5SDimitry Andric 
30100b57cec5SDimitry Andric #endif // KMP_USE_ADAPTIVE_LOCKS
30110b57cec5SDimitry Andric 
30120b57cec5SDimitry Andric extern int __kmp_display_env; /* TRUE or FALSE */
30130b57cec5SDimitry Andric extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */
30140b57cec5SDimitry Andric extern int __kmp_omp_cancellation; /* TRUE or FALSE */
30150b57cec5SDimitry Andric 
30160b57cec5SDimitry Andric /* ------------------------------------------------------------------------- */
30170b57cec5SDimitry Andric 
30180b57cec5SDimitry Andric /* the following are protected by the fork/join lock */
30190b57cec5SDimitry Andric /* write: lock  read: anytime */
30200b57cec5SDimitry Andric extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */
30210b57cec5SDimitry Andric /* read/write: lock */
30220b57cec5SDimitry Andric extern volatile kmp_team_t *__kmp_team_pool;
30230b57cec5SDimitry Andric extern volatile kmp_info_t *__kmp_thread_pool;
30240b57cec5SDimitry Andric extern kmp_info_t *__kmp_thread_pool_insert_pt;
30250b57cec5SDimitry Andric 
30260b57cec5SDimitry Andric // total num threads reachable from some root thread including all root threads
30270b57cec5SDimitry Andric extern volatile int __kmp_nth;
30280b57cec5SDimitry Andric /* total number of threads reachable from some root thread including all root
30290b57cec5SDimitry Andric    threads, and those in the thread pool */
30300b57cec5SDimitry Andric extern volatile int __kmp_all_nth;
30310b57cec5SDimitry Andric extern std::atomic<int> __kmp_thread_pool_active_nth;
30320b57cec5SDimitry Andric 
30330b57cec5SDimitry Andric extern kmp_root_t **__kmp_root; /* root of thread hierarchy */
30340b57cec5SDimitry Andric /* end data protected by fork/join lock */
30350b57cec5SDimitry Andric /* ------------------------------------------------------------------------- */
30360b57cec5SDimitry Andric 
30370b57cec5SDimitry Andric #define __kmp_get_gtid() __kmp_get_global_thread_id()
30380b57cec5SDimitry Andric #define __kmp_entry_gtid() __kmp_get_global_thread_id_reg()
30390b57cec5SDimitry Andric #define __kmp_get_tid() (__kmp_tid_from_gtid(__kmp_get_gtid()))
30400b57cec5SDimitry Andric #define __kmp_get_team() (__kmp_threads[(__kmp_get_gtid())]->th.th_team)
30410b57cec5SDimitry Andric #define __kmp_get_thread() (__kmp_thread_from_gtid(__kmp_get_gtid()))
30420b57cec5SDimitry Andric 
30430b57cec5SDimitry Andric // AT: Which way is correct?
30440b57cec5SDimitry Andric // AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc;
30450b57cec5SDimitry Andric // AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc;
30460b57cec5SDimitry Andric #define __kmp_get_team_num_threads(gtid)                                       \
30470b57cec5SDimitry Andric   (__kmp_threads[(gtid)]->th.th_team->t.t_nproc)
30480b57cec5SDimitry Andric 
30490b57cec5SDimitry Andric static inline bool KMP_UBER_GTID(int gtid) {
30500b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(gtid >= KMP_GTID_MIN);
30510b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(gtid < __kmp_threads_capacity);
30520b57cec5SDimitry Andric   return (gtid >= 0 && __kmp_root[gtid] && __kmp_threads[gtid] &&
30530b57cec5SDimitry Andric           __kmp_threads[gtid] == __kmp_root[gtid]->r.r_uber_thread);
30540b57cec5SDimitry Andric }
30550b57cec5SDimitry Andric 
30560b57cec5SDimitry Andric static inline int __kmp_tid_from_gtid(int gtid) {
30570b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(gtid >= 0);
30580b57cec5SDimitry Andric   return __kmp_threads[gtid]->th.th_info.ds.ds_tid;
30590b57cec5SDimitry Andric }
30600b57cec5SDimitry Andric 
30610b57cec5SDimitry Andric static inline int __kmp_gtid_from_tid(int tid, const kmp_team_t *team) {
30620b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(tid >= 0 && team);
30630b57cec5SDimitry Andric   return team->t.t_threads[tid]->th.th_info.ds.ds_gtid;
30640b57cec5SDimitry Andric }
30650b57cec5SDimitry Andric 
30660b57cec5SDimitry Andric static inline int __kmp_gtid_from_thread(const kmp_info_t *thr) {
30670b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(thr);
30680b57cec5SDimitry Andric   return thr->th.th_info.ds.ds_gtid;
30690b57cec5SDimitry Andric }
30700b57cec5SDimitry Andric 
30710b57cec5SDimitry Andric static inline kmp_info_t *__kmp_thread_from_gtid(int gtid) {
30720b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(gtid >= 0);
30730b57cec5SDimitry Andric   return __kmp_threads[gtid];
30740b57cec5SDimitry Andric }
30750b57cec5SDimitry Andric 
30760b57cec5SDimitry Andric static inline kmp_team_t *__kmp_team_from_gtid(int gtid) {
30770b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(gtid >= 0);
30780b57cec5SDimitry Andric   return __kmp_threads[gtid]->th.th_team;
30790b57cec5SDimitry Andric }
30800b57cec5SDimitry Andric 
30810b57cec5SDimitry Andric /* ------------------------------------------------------------------------- */
30820b57cec5SDimitry Andric 
30830b57cec5SDimitry Andric extern kmp_global_t __kmp_global; /* global status */
30840b57cec5SDimitry Andric 
30850b57cec5SDimitry Andric extern kmp_info_t __kmp_monitor;
30860b57cec5SDimitry Andric // For Debugging Support Library
30870b57cec5SDimitry Andric extern std::atomic<kmp_int32> __kmp_team_counter;
30880b57cec5SDimitry Andric // For Debugging Support Library
30890b57cec5SDimitry Andric extern std::atomic<kmp_int32> __kmp_task_counter;
30900b57cec5SDimitry Andric 
30910b57cec5SDimitry Andric #if USE_DEBUGGER
30920b57cec5SDimitry Andric #define _KMP_GEN_ID(counter)                                                   \
30930b57cec5SDimitry Andric   (__kmp_debugging ? KMP_ATOMIC_INC(&counter) + 1 : ~0)
30940b57cec5SDimitry Andric #else
30950b57cec5SDimitry Andric #define _KMP_GEN_ID(counter) (~0)
30960b57cec5SDimitry Andric #endif /* USE_DEBUGGER */
30970b57cec5SDimitry Andric 
30980b57cec5SDimitry Andric #define KMP_GEN_TASK_ID() _KMP_GEN_ID(__kmp_task_counter)
30990b57cec5SDimitry Andric #define KMP_GEN_TEAM_ID() _KMP_GEN_ID(__kmp_team_counter)
31000b57cec5SDimitry Andric 
31010b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
31020b57cec5SDimitry Andric 
31030b57cec5SDimitry Andric extern void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2,
31040b57cec5SDimitry Andric                                          size_t size, char const *format, ...);
31050b57cec5SDimitry Andric 
31060b57cec5SDimitry Andric extern void __kmp_serial_initialize(void);
31070b57cec5SDimitry Andric extern void __kmp_middle_initialize(void);
31080b57cec5SDimitry Andric extern void __kmp_parallel_initialize(void);
31090b57cec5SDimitry Andric 
31100b57cec5SDimitry Andric extern void __kmp_internal_begin(void);
31110b57cec5SDimitry Andric extern void __kmp_internal_end_library(int gtid);
31120b57cec5SDimitry Andric extern void __kmp_internal_end_thread(int gtid);
31130b57cec5SDimitry Andric extern void __kmp_internal_end_atexit(void);
31140b57cec5SDimitry Andric extern void __kmp_internal_end_dtor(void);
31150b57cec5SDimitry Andric extern void __kmp_internal_end_dest(void *);
31160b57cec5SDimitry Andric 
31170b57cec5SDimitry Andric extern int __kmp_register_root(int initial_thread);
31180b57cec5SDimitry Andric extern void __kmp_unregister_root(int gtid);
31190b57cec5SDimitry Andric 
31200b57cec5SDimitry Andric extern int __kmp_ignore_mppbeg(void);
31210b57cec5SDimitry Andric extern int __kmp_ignore_mppend(void);
31220b57cec5SDimitry Andric 
31230b57cec5SDimitry Andric extern int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws);
31240b57cec5SDimitry Andric extern void __kmp_exit_single(int gtid);
31250b57cec5SDimitry Andric 
31260b57cec5SDimitry Andric extern void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);
31270b57cec5SDimitry Andric extern void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);
31280b57cec5SDimitry Andric 
31290b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE
31300b57cec5SDimitry Andric extern int __kmp_get_load_balance(int);
31310b57cec5SDimitry Andric #endif
31320b57cec5SDimitry Andric 
31330b57cec5SDimitry Andric extern int __kmp_get_global_thread_id(void);
31340b57cec5SDimitry Andric extern int __kmp_get_global_thread_id_reg(void);
31350b57cec5SDimitry Andric extern void __kmp_exit_thread(int exit_status);
31360b57cec5SDimitry Andric extern void __kmp_abort(char const *format, ...);
31370b57cec5SDimitry Andric extern void __kmp_abort_thread(void);
31380b57cec5SDimitry Andric KMP_NORETURN extern void __kmp_abort_process(void);
31390b57cec5SDimitry Andric extern void __kmp_warn(char const *format, ...);
31400b57cec5SDimitry Andric 
31410b57cec5SDimitry Andric extern void __kmp_set_num_threads(int new_nth, int gtid);
31420b57cec5SDimitry Andric 
31430b57cec5SDimitry Andric // Returns current thread (pointer to kmp_info_t). Current thread *must* be
31440b57cec5SDimitry Andric // registered.
31450b57cec5SDimitry Andric static inline kmp_info_t *__kmp_entry_thread() {
31460b57cec5SDimitry Andric   int gtid = __kmp_entry_gtid();
31470b57cec5SDimitry Andric 
31480b57cec5SDimitry Andric   return __kmp_threads[gtid];
31490b57cec5SDimitry Andric }
31500b57cec5SDimitry Andric 
31510b57cec5SDimitry Andric extern void __kmp_set_max_active_levels(int gtid, int new_max_active_levels);
31520b57cec5SDimitry Andric extern int __kmp_get_max_active_levels(int gtid);
31530b57cec5SDimitry Andric extern int __kmp_get_ancestor_thread_num(int gtid, int level);
31540b57cec5SDimitry Andric extern int __kmp_get_team_size(int gtid, int level);
31550b57cec5SDimitry Andric extern void __kmp_set_schedule(int gtid, kmp_sched_t new_sched, int chunk);
31560b57cec5SDimitry Andric extern void __kmp_get_schedule(int gtid, kmp_sched_t *sched, int *chunk);
31570b57cec5SDimitry Andric 
31580b57cec5SDimitry Andric extern unsigned short __kmp_get_random(kmp_info_t *thread);
31590b57cec5SDimitry Andric extern void __kmp_init_random(kmp_info_t *thread);
31600b57cec5SDimitry Andric 
31610b57cec5SDimitry Andric extern kmp_r_sched_t __kmp_get_schedule_global(void);
31620b57cec5SDimitry Andric extern void __kmp_adjust_num_threads(int new_nproc);
31630b57cec5SDimitry Andric extern void __kmp_check_stksize(size_t *val);
31640b57cec5SDimitry Andric 
31650b57cec5SDimitry Andric extern void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL);
31660b57cec5SDimitry Andric extern void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL);
31670b57cec5SDimitry Andric extern void ___kmp_free(void *ptr KMP_SRC_LOC_DECL);
31680b57cec5SDimitry Andric #define __kmp_allocate(size) ___kmp_allocate((size)KMP_SRC_LOC_CURR)
31690b57cec5SDimitry Andric #define __kmp_page_allocate(size) ___kmp_page_allocate((size)KMP_SRC_LOC_CURR)
31700b57cec5SDimitry Andric #define __kmp_free(ptr) ___kmp_free((ptr)KMP_SRC_LOC_CURR)
31710b57cec5SDimitry Andric 
31720b57cec5SDimitry Andric #if USE_FAST_MEMORY
31730b57cec5SDimitry Andric extern void *___kmp_fast_allocate(kmp_info_t *this_thr,
31740b57cec5SDimitry Andric                                   size_t size KMP_SRC_LOC_DECL);
31750b57cec5SDimitry Andric extern void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL);
31760b57cec5SDimitry Andric extern void __kmp_free_fast_memory(kmp_info_t *this_thr);
31770b57cec5SDimitry Andric extern void __kmp_initialize_fast_memory(kmp_info_t *this_thr);
31780b57cec5SDimitry Andric #define __kmp_fast_allocate(this_thr, size)                                    \
31790b57cec5SDimitry Andric   ___kmp_fast_allocate((this_thr), (size)KMP_SRC_LOC_CURR)
31800b57cec5SDimitry Andric #define __kmp_fast_free(this_thr, ptr)                                         \
31810b57cec5SDimitry Andric   ___kmp_fast_free((this_thr), (ptr)KMP_SRC_LOC_CURR)
31820b57cec5SDimitry Andric #endif
31830b57cec5SDimitry Andric 
31840b57cec5SDimitry Andric extern void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL);
31850b57cec5SDimitry Andric extern void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
31860b57cec5SDimitry Andric                                   size_t elsize KMP_SRC_LOC_DECL);
31870b57cec5SDimitry Andric extern void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
31880b57cec5SDimitry Andric                                    size_t size KMP_SRC_LOC_DECL);
31890b57cec5SDimitry Andric extern void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL);
31900b57cec5SDimitry Andric #define __kmp_thread_malloc(th, size)                                          \
31910b57cec5SDimitry Andric   ___kmp_thread_malloc((th), (size)KMP_SRC_LOC_CURR)
31920b57cec5SDimitry Andric #define __kmp_thread_calloc(th, nelem, elsize)                                 \
31930b57cec5SDimitry Andric   ___kmp_thread_calloc((th), (nelem), (elsize)KMP_SRC_LOC_CURR)
31940b57cec5SDimitry Andric #define __kmp_thread_realloc(th, ptr, size)                                    \
31950b57cec5SDimitry Andric   ___kmp_thread_realloc((th), (ptr), (size)KMP_SRC_LOC_CURR)
31960b57cec5SDimitry Andric #define __kmp_thread_free(th, ptr)                                             \
31970b57cec5SDimitry Andric   ___kmp_thread_free((th), (ptr)KMP_SRC_LOC_CURR)
31980b57cec5SDimitry Andric 
31990b57cec5SDimitry Andric #define KMP_INTERNAL_MALLOC(sz) malloc(sz)
32000b57cec5SDimitry Andric #define KMP_INTERNAL_FREE(p) free(p)
32010b57cec5SDimitry Andric #define KMP_INTERNAL_REALLOC(p, sz) realloc((p), (sz))
32020b57cec5SDimitry Andric #define KMP_INTERNAL_CALLOC(n, sz) calloc((n), (sz))
32030b57cec5SDimitry Andric 
32040b57cec5SDimitry Andric extern void __kmp_push_num_threads(ident_t *loc, int gtid, int num_threads);
32050b57cec5SDimitry Andric 
32060b57cec5SDimitry Andric extern void __kmp_push_proc_bind(ident_t *loc, int gtid,
32070b57cec5SDimitry Andric                                  kmp_proc_bind_t proc_bind);
32080b57cec5SDimitry Andric extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams,
32090b57cec5SDimitry Andric                                  int num_threads);
32100b57cec5SDimitry Andric 
32110b57cec5SDimitry Andric extern void __kmp_yield();
32120b57cec5SDimitry Andric 
32130b57cec5SDimitry Andric extern void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
32140b57cec5SDimitry Andric                                    enum sched_type schedule, kmp_int32 lb,
32150b57cec5SDimitry Andric                                    kmp_int32 ub, kmp_int32 st, kmp_int32 chunk);
32160b57cec5SDimitry Andric extern void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
32170b57cec5SDimitry Andric                                     enum sched_type schedule, kmp_uint32 lb,
32180b57cec5SDimitry Andric                                     kmp_uint32 ub, kmp_int32 st,
32190b57cec5SDimitry Andric                                     kmp_int32 chunk);
32200b57cec5SDimitry Andric extern void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
32210b57cec5SDimitry Andric                                    enum sched_type schedule, kmp_int64 lb,
32220b57cec5SDimitry Andric                                    kmp_int64 ub, kmp_int64 st, kmp_int64 chunk);
32230b57cec5SDimitry Andric extern void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
32240b57cec5SDimitry Andric                                     enum sched_type schedule, kmp_uint64 lb,
32250b57cec5SDimitry Andric                                     kmp_uint64 ub, kmp_int64 st,
32260b57cec5SDimitry Andric                                     kmp_int64 chunk);
32270b57cec5SDimitry Andric 
32280b57cec5SDimitry Andric extern int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid,
32290b57cec5SDimitry Andric                                   kmp_int32 *p_last, kmp_int32 *p_lb,
32300b57cec5SDimitry Andric                                   kmp_int32 *p_ub, kmp_int32 *p_st);
32310b57cec5SDimitry Andric extern int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid,
32320b57cec5SDimitry Andric                                    kmp_int32 *p_last, kmp_uint32 *p_lb,
32330b57cec5SDimitry Andric                                    kmp_uint32 *p_ub, kmp_int32 *p_st);
32340b57cec5SDimitry Andric extern int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid,
32350b57cec5SDimitry Andric                                   kmp_int32 *p_last, kmp_int64 *p_lb,
32360b57cec5SDimitry Andric                                   kmp_int64 *p_ub, kmp_int64 *p_st);
32370b57cec5SDimitry Andric extern int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid,
32380b57cec5SDimitry Andric                                    kmp_int32 *p_last, kmp_uint64 *p_lb,
32390b57cec5SDimitry Andric                                    kmp_uint64 *p_ub, kmp_int64 *p_st);
32400b57cec5SDimitry Andric 
32410b57cec5SDimitry Andric extern void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid);
32420b57cec5SDimitry Andric extern void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid);
32430b57cec5SDimitry Andric extern void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid);
32440b57cec5SDimitry Andric extern void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid);
32450b57cec5SDimitry Andric 
32460b57cec5SDimitry Andric #ifdef KMP_GOMP_COMPAT
32470b57cec5SDimitry Andric 
32480b57cec5SDimitry Andric extern void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
32490b57cec5SDimitry Andric                                       enum sched_type schedule, kmp_int32 lb,
32500b57cec5SDimitry Andric                                       kmp_int32 ub, kmp_int32 st,
32510b57cec5SDimitry Andric                                       kmp_int32 chunk, int push_ws);
32520b57cec5SDimitry Andric extern void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
32530b57cec5SDimitry Andric                                        enum sched_type schedule, kmp_uint32 lb,
32540b57cec5SDimitry Andric                                        kmp_uint32 ub, kmp_int32 st,
32550b57cec5SDimitry Andric                                        kmp_int32 chunk, int push_ws);
32560b57cec5SDimitry Andric extern void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
32570b57cec5SDimitry Andric                                       enum sched_type schedule, kmp_int64 lb,
32580b57cec5SDimitry Andric                                       kmp_int64 ub, kmp_int64 st,
32590b57cec5SDimitry Andric                                       kmp_int64 chunk, int push_ws);
32600b57cec5SDimitry Andric extern void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
32610b57cec5SDimitry Andric                                        enum sched_type schedule, kmp_uint64 lb,
32620b57cec5SDimitry Andric                                        kmp_uint64 ub, kmp_int64 st,
32630b57cec5SDimitry Andric                                        kmp_int64 chunk, int push_ws);
32640b57cec5SDimitry Andric extern void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid);
32650b57cec5SDimitry Andric extern void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid);
32660b57cec5SDimitry Andric extern void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid);
32670b57cec5SDimitry Andric extern void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid);
32680b57cec5SDimitry Andric 
32690b57cec5SDimitry Andric #endif /* KMP_GOMP_COMPAT */
32700b57cec5SDimitry Andric 
32710b57cec5SDimitry Andric extern kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker);
32720b57cec5SDimitry Andric extern kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker);
32730b57cec5SDimitry Andric extern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker);
32740b57cec5SDimitry Andric extern kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker);
32750b57cec5SDimitry Andric extern kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker);
32760b57cec5SDimitry Andric extern kmp_uint32 __kmp_wait_4(kmp_uint32 volatile *spinner, kmp_uint32 checker,
32770b57cec5SDimitry Andric                                kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
32780b57cec5SDimitry Andric                                void *obj);
32790b57cec5SDimitry Andric extern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
32800b57cec5SDimitry Andric                              kmp_uint32 (*pred)(void *, kmp_uint32), void *obj);
32810b57cec5SDimitry Andric 
32820b57cec5SDimitry Andric class kmp_flag_32;
32830b57cec5SDimitry Andric class kmp_flag_64;
32840b57cec5SDimitry Andric class kmp_flag_oncore;
32850b57cec5SDimitry Andric extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag,
32860b57cec5SDimitry Andric                           int final_spin
32870b57cec5SDimitry Andric #if USE_ITT_BUILD
32880b57cec5SDimitry Andric                           ,
32890b57cec5SDimitry Andric                           void *itt_sync_obj
32900b57cec5SDimitry Andric #endif
32910b57cec5SDimitry Andric                           );
32920b57cec5SDimitry Andric extern void __kmp_release_64(kmp_flag_64 *flag);
32930b57cec5SDimitry Andric 
32940b57cec5SDimitry Andric extern void __kmp_infinite_loop(void);
32950b57cec5SDimitry Andric 
32960b57cec5SDimitry Andric extern void __kmp_cleanup(void);
32970b57cec5SDimitry Andric 
32980b57cec5SDimitry Andric #if KMP_HANDLE_SIGNALS
32990b57cec5SDimitry Andric extern int __kmp_handle_signals;
33000b57cec5SDimitry Andric extern void __kmp_install_signals(int parallel_init);
33010b57cec5SDimitry Andric extern void __kmp_remove_signals(void);
33020b57cec5SDimitry Andric #endif
33030b57cec5SDimitry Andric 
33040b57cec5SDimitry Andric extern void __kmp_clear_system_time(void);
33050b57cec5SDimitry Andric extern void __kmp_read_system_time(double *delta);
33060b57cec5SDimitry Andric 
33070b57cec5SDimitry Andric extern void __kmp_check_stack_overlap(kmp_info_t *thr);
33080b57cec5SDimitry Andric 
33090b57cec5SDimitry Andric extern void __kmp_expand_host_name(char *buffer, size_t size);
33100b57cec5SDimitry Andric extern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern);
33110b57cec5SDimitry Andric 
33120b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
33130b57cec5SDimitry Andric extern void
33140b57cec5SDimitry Andric __kmp_initialize_system_tick(void); /* Initialize timer tick value */
33150b57cec5SDimitry Andric #endif
33160b57cec5SDimitry Andric 
33170b57cec5SDimitry Andric extern void
33180b57cec5SDimitry Andric __kmp_runtime_initialize(void); /* machine specific initialization */
33190b57cec5SDimitry Andric extern void __kmp_runtime_destroy(void);
33200b57cec5SDimitry Andric 
33210b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
33220b57cec5SDimitry Andric extern char *__kmp_affinity_print_mask(char *buf, int buf_len,
33230b57cec5SDimitry Andric                                        kmp_affin_mask_t *mask);
33240b57cec5SDimitry Andric extern kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
33250b57cec5SDimitry Andric                                                   kmp_affin_mask_t *mask);
33260b57cec5SDimitry Andric extern void __kmp_affinity_initialize(void);
33270b57cec5SDimitry Andric extern void __kmp_affinity_uninitialize(void);
33280b57cec5SDimitry Andric extern void __kmp_affinity_set_init_mask(
33290b57cec5SDimitry Andric     int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */
33300b57cec5SDimitry Andric extern void __kmp_affinity_set_place(int gtid);
33310b57cec5SDimitry Andric extern void __kmp_affinity_determine_capable(const char *env_var);
33320b57cec5SDimitry Andric extern int __kmp_aux_set_affinity(void **mask);
33330b57cec5SDimitry Andric extern int __kmp_aux_get_affinity(void **mask);
33340b57cec5SDimitry Andric extern int __kmp_aux_get_affinity_max_proc();
33350b57cec5SDimitry Andric extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask);
33360b57cec5SDimitry Andric extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask);
33370b57cec5SDimitry Andric extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask);
33380b57cec5SDimitry Andric extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size);
3339489b1cf2SDimitry Andric #if KMP_OS_LINUX || KMP_OS_FREEBSD
33400b57cec5SDimitry Andric extern int kmp_set_thread_affinity_mask_initial(void);
33410b57cec5SDimitry Andric #endif
33420b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */
33430b57cec5SDimitry Andric // No need for KMP_AFFINITY_SUPPORTED guard as only one field in the
33440b57cec5SDimitry Andric // format string is for affinity, so platforms that do not support
33450b57cec5SDimitry Andric // affinity can still use the other fields, e.g., %n for num_threads
33460b57cec5SDimitry Andric extern size_t __kmp_aux_capture_affinity(int gtid, const char *format,
33470b57cec5SDimitry Andric                                          kmp_str_buf_t *buffer);
33480b57cec5SDimitry Andric extern void __kmp_aux_display_affinity(int gtid, const char *format);
33490b57cec5SDimitry Andric 
33500b57cec5SDimitry Andric extern void __kmp_cleanup_hierarchy();
33510b57cec5SDimitry Andric extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar);
33520b57cec5SDimitry Andric 
33530b57cec5SDimitry Andric #if KMP_USE_FUTEX
33540b57cec5SDimitry Andric 
33550b57cec5SDimitry Andric extern int __kmp_futex_determine_capable(void);
33560b57cec5SDimitry Andric 
33570b57cec5SDimitry Andric #endif // KMP_USE_FUTEX
33580b57cec5SDimitry Andric 
33590b57cec5SDimitry Andric extern void __kmp_gtid_set_specific(int gtid);
33600b57cec5SDimitry Andric extern int __kmp_gtid_get_specific(void);
33610b57cec5SDimitry Andric 
33620b57cec5SDimitry Andric extern double __kmp_read_cpu_time(void);
33630b57cec5SDimitry Andric 
33640b57cec5SDimitry Andric extern int __kmp_read_system_info(struct kmp_sys_info *info);
33650b57cec5SDimitry Andric 
33660b57cec5SDimitry Andric #if KMP_USE_MONITOR
33670b57cec5SDimitry Andric extern void __kmp_create_monitor(kmp_info_t *th);
33680b57cec5SDimitry Andric #endif
33690b57cec5SDimitry Andric 
33700b57cec5SDimitry Andric extern void *__kmp_launch_thread(kmp_info_t *thr);
33710b57cec5SDimitry Andric 
33720b57cec5SDimitry Andric extern void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size);
33730b57cec5SDimitry Andric 
33740b57cec5SDimitry Andric #if KMP_OS_WINDOWS
33750b57cec5SDimitry Andric extern int __kmp_still_running(kmp_info_t *th);
33760b57cec5SDimitry Andric extern int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val);
33770b57cec5SDimitry Andric extern void __kmp_free_handle(kmp_thread_t tHandle);
33780b57cec5SDimitry Andric #endif
33790b57cec5SDimitry Andric 
33800b57cec5SDimitry Andric #if KMP_USE_MONITOR
33810b57cec5SDimitry Andric extern void __kmp_reap_monitor(kmp_info_t *th);
33820b57cec5SDimitry Andric #endif
33830b57cec5SDimitry Andric extern void __kmp_reap_worker(kmp_info_t *th);
33840b57cec5SDimitry Andric extern void __kmp_terminate_thread(int gtid);
33850b57cec5SDimitry Andric 
33860b57cec5SDimitry Andric extern int __kmp_try_suspend_mx(kmp_info_t *th);
33870b57cec5SDimitry Andric extern void __kmp_lock_suspend_mx(kmp_info_t *th);
33880b57cec5SDimitry Andric extern void __kmp_unlock_suspend_mx(kmp_info_t *th);
33890b57cec5SDimitry Andric 
33900b57cec5SDimitry Andric extern void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag);
33910b57cec5SDimitry Andric extern void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag);
33920b57cec5SDimitry Andric extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag);
33930b57cec5SDimitry Andric extern void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag);
33940b57cec5SDimitry Andric extern void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag);
33950b57cec5SDimitry Andric extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag);
33960b57cec5SDimitry Andric 
33970b57cec5SDimitry Andric extern void __kmp_elapsed(double *);
33980b57cec5SDimitry Andric extern void __kmp_elapsed_tick(double *);
33990b57cec5SDimitry Andric 
34000b57cec5SDimitry Andric extern void __kmp_enable(int old_state);
34010b57cec5SDimitry Andric extern void __kmp_disable(int *old_state);
34020b57cec5SDimitry Andric 
34030b57cec5SDimitry Andric extern void __kmp_thread_sleep(int millis);
34040b57cec5SDimitry Andric 
34050b57cec5SDimitry Andric extern void __kmp_common_initialize(void);
34060b57cec5SDimitry Andric extern void __kmp_common_destroy(void);
34070b57cec5SDimitry Andric extern void __kmp_common_destroy_gtid(int gtid);
34080b57cec5SDimitry Andric 
34090b57cec5SDimitry Andric #if KMP_OS_UNIX
34100b57cec5SDimitry Andric extern void __kmp_register_atfork(void);
34110b57cec5SDimitry Andric #endif
34120b57cec5SDimitry Andric extern void __kmp_suspend_initialize(void);
34130b57cec5SDimitry Andric extern void __kmp_suspend_initialize_thread(kmp_info_t *th);
34140b57cec5SDimitry Andric extern void __kmp_suspend_uninitialize_thread(kmp_info_t *th);
34150b57cec5SDimitry Andric 
34160b57cec5SDimitry Andric extern kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
34170b57cec5SDimitry Andric                                          int tid);
34180b57cec5SDimitry Andric extern kmp_team_t *
34190b57cec5SDimitry Andric __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
34200b57cec5SDimitry Andric #if OMPT_SUPPORT
34210b57cec5SDimitry Andric                     ompt_data_t ompt_parallel_data,
34220b57cec5SDimitry Andric #endif
34230b57cec5SDimitry Andric                     kmp_proc_bind_t proc_bind, kmp_internal_control_t *new_icvs,
34240b57cec5SDimitry Andric                     int argc USE_NESTED_HOT_ARG(kmp_info_t *thr));
34250b57cec5SDimitry Andric extern void __kmp_free_thread(kmp_info_t *);
34260b57cec5SDimitry Andric extern void __kmp_free_team(kmp_root_t *,
34270b57cec5SDimitry Andric                             kmp_team_t *USE_NESTED_HOT_ARG(kmp_info_t *));
34280b57cec5SDimitry Andric extern kmp_team_t *__kmp_reap_team(kmp_team_t *);
34290b57cec5SDimitry Andric 
34300b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
34310b57cec5SDimitry Andric 
34320b57cec5SDimitry Andric extern void __kmp_initialize_bget(kmp_info_t *th);
34330b57cec5SDimitry Andric extern void __kmp_finalize_bget(kmp_info_t *th);
34340b57cec5SDimitry Andric 
34350b57cec5SDimitry Andric KMP_EXPORT void *kmpc_malloc(size_t size);
34360b57cec5SDimitry Andric KMP_EXPORT void *kmpc_aligned_malloc(size_t size, size_t alignment);
34370b57cec5SDimitry Andric KMP_EXPORT void *kmpc_calloc(size_t nelem, size_t elsize);
34380b57cec5SDimitry Andric KMP_EXPORT void *kmpc_realloc(void *ptr, size_t size);
34390b57cec5SDimitry Andric KMP_EXPORT void kmpc_free(void *ptr);
34400b57cec5SDimitry Andric 
34410b57cec5SDimitry Andric /* declarations for internal use */
34420b57cec5SDimitry Andric 
34430b57cec5SDimitry Andric extern int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
34440b57cec5SDimitry Andric                          size_t reduce_size, void *reduce_data,
34450b57cec5SDimitry Andric                          void (*reduce)(void *, void *));
34460b57cec5SDimitry Andric extern void __kmp_end_split_barrier(enum barrier_type bt, int gtid);
34470b57cec5SDimitry Andric extern int __kmp_barrier_gomp_cancel(int gtid);
34480b57cec5SDimitry Andric 
34490b57cec5SDimitry Andric /*!
34500b57cec5SDimitry Andric  * Tell the fork call which compiler generated the fork call, and therefore how
34510b57cec5SDimitry Andric  * to deal with the call.
34520b57cec5SDimitry Andric  */
34530b57cec5SDimitry Andric enum fork_context_e {
34540b57cec5SDimitry Andric   fork_context_gnu, /**< Called from GNU generated code, so must not invoke the
34550b57cec5SDimitry Andric                        microtask internally. */
34560b57cec5SDimitry Andric   fork_context_intel, /**< Called from Intel generated code.  */
34570b57cec5SDimitry Andric   fork_context_last
34580b57cec5SDimitry Andric };
34590b57cec5SDimitry Andric extern int __kmp_fork_call(ident_t *loc, int gtid,
34600b57cec5SDimitry Andric                            enum fork_context_e fork_context, kmp_int32 argc,
34610b57cec5SDimitry Andric                            microtask_t microtask, launch_t invoker,
34620b57cec5SDimitry Andric /* TODO: revert workaround for Intel(R) 64 tracker #96 */
34630b57cec5SDimitry Andric #if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX
34640b57cec5SDimitry Andric                            va_list *ap
34650b57cec5SDimitry Andric #else
34660b57cec5SDimitry Andric                            va_list ap
34670b57cec5SDimitry Andric #endif
34680b57cec5SDimitry Andric                            );
34690b57cec5SDimitry Andric 
34700b57cec5SDimitry Andric extern void __kmp_join_call(ident_t *loc, int gtid
34710b57cec5SDimitry Andric #if OMPT_SUPPORT
34720b57cec5SDimitry Andric                             ,
34730b57cec5SDimitry Andric                             enum fork_context_e fork_context
34740b57cec5SDimitry Andric #endif
34750b57cec5SDimitry Andric                             ,
34760b57cec5SDimitry Andric                             int exit_teams = 0);
34770b57cec5SDimitry Andric 
34780b57cec5SDimitry Andric extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid);
34790b57cec5SDimitry Andric extern void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team);
34800b57cec5SDimitry Andric extern void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team);
34810b57cec5SDimitry Andric extern int __kmp_invoke_task_func(int gtid);
34820b57cec5SDimitry Andric extern void __kmp_run_before_invoked_task(int gtid, int tid,
34830b57cec5SDimitry Andric                                           kmp_info_t *this_thr,
34840b57cec5SDimitry Andric                                           kmp_team_t *team);
34850b57cec5SDimitry Andric extern void __kmp_run_after_invoked_task(int gtid, int tid,
34860b57cec5SDimitry Andric                                          kmp_info_t *this_thr,
34870b57cec5SDimitry Andric                                          kmp_team_t *team);
34880b57cec5SDimitry Andric 
34890b57cec5SDimitry Andric // should never have been exported
34900b57cec5SDimitry Andric KMP_EXPORT int __kmpc_invoke_task_func(int gtid);
34910b57cec5SDimitry Andric extern int __kmp_invoke_teams_master(int gtid);
34920b57cec5SDimitry Andric extern void __kmp_teams_master(int gtid);
34930b57cec5SDimitry Andric extern int __kmp_aux_get_team_num();
34940b57cec5SDimitry Andric extern int __kmp_aux_get_num_teams();
34950b57cec5SDimitry Andric extern void __kmp_save_internal_controls(kmp_info_t *thread);
34960b57cec5SDimitry Andric extern void __kmp_user_set_library(enum library_type arg);
34970b57cec5SDimitry Andric extern void __kmp_aux_set_library(enum library_type arg);
34980b57cec5SDimitry Andric extern void __kmp_aux_set_stacksize(size_t arg);
34990b57cec5SDimitry Andric extern void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid);
35000b57cec5SDimitry Andric extern void __kmp_aux_set_defaults(char const *str, int len);
35010b57cec5SDimitry Andric 
35020b57cec5SDimitry Andric /* Functions called from __kmp_aux_env_initialize() in kmp_settings.cpp */
35030b57cec5SDimitry Andric void kmpc_set_blocktime(int arg);
35040b57cec5SDimitry Andric void ompc_set_nested(int flag);
35050b57cec5SDimitry Andric void ompc_set_dynamic(int flag);
35060b57cec5SDimitry Andric void ompc_set_num_threads(int arg);
35070b57cec5SDimitry Andric 
35080b57cec5SDimitry Andric extern void __kmp_push_current_task_to_thread(kmp_info_t *this_thr,
35090b57cec5SDimitry Andric                                               kmp_team_t *team, int tid);
35100b57cec5SDimitry Andric extern void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr);
35110b57cec5SDimitry Andric extern kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
35120b57cec5SDimitry Andric                                     kmp_tasking_flags_t *flags,
35130b57cec5SDimitry Andric                                     size_t sizeof_kmp_task_t,
35140b57cec5SDimitry Andric                                     size_t sizeof_shareds,
35150b57cec5SDimitry Andric                                     kmp_routine_entry_t task_entry);
35160b57cec5SDimitry Andric extern void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
35170b57cec5SDimitry Andric                                      kmp_team_t *team, int tid,
35180b57cec5SDimitry Andric                                      int set_curr_task);
35190b57cec5SDimitry Andric extern void __kmp_finish_implicit_task(kmp_info_t *this_thr);
35200b57cec5SDimitry Andric extern void __kmp_free_implicit_task(kmp_info_t *this_thr);
35210b57cec5SDimitry Andric 
35220b57cec5SDimitry Andric extern kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
35230b57cec5SDimitry Andric                                                        int gtid,
35240b57cec5SDimitry Andric                                                        kmp_task_t *task);
35250b57cec5SDimitry Andric extern void __kmp_fulfill_event(kmp_event_t *event);
35260b57cec5SDimitry Andric 
35270b57cec5SDimitry Andric int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid,
35280b57cec5SDimitry Andric                            kmp_flag_32 *flag, int final_spin,
35290b57cec5SDimitry Andric                            int *thread_finished,
35300b57cec5SDimitry Andric #if USE_ITT_BUILD
35310b57cec5SDimitry Andric                            void *itt_sync_obj,
35320b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
35330b57cec5SDimitry Andric                            kmp_int32 is_constrained);
35340b57cec5SDimitry Andric int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
35350b57cec5SDimitry Andric                            kmp_flag_64 *flag, int final_spin,
35360b57cec5SDimitry Andric                            int *thread_finished,
35370b57cec5SDimitry Andric #if USE_ITT_BUILD
35380b57cec5SDimitry Andric                            void *itt_sync_obj,
35390b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
35400b57cec5SDimitry Andric                            kmp_int32 is_constrained);
35410b57cec5SDimitry Andric int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,
35420b57cec5SDimitry Andric                                kmp_flag_oncore *flag, int final_spin,
35430b57cec5SDimitry Andric                                int *thread_finished,
35440b57cec5SDimitry Andric #if USE_ITT_BUILD
35450b57cec5SDimitry Andric                                void *itt_sync_obj,
35460b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
35470b57cec5SDimitry Andric                                kmp_int32 is_constrained);
35480b57cec5SDimitry Andric 
35490b57cec5SDimitry Andric extern void __kmp_free_task_team(kmp_info_t *thread,
35500b57cec5SDimitry Andric                                  kmp_task_team_t *task_team);
35510b57cec5SDimitry Andric extern void __kmp_reap_task_teams(void);
35520b57cec5SDimitry Andric extern void __kmp_wait_to_unref_task_teams(void);
35530b57cec5SDimitry Andric extern void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
35540b57cec5SDimitry Andric                                   int always);
35550b57cec5SDimitry Andric extern void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team);
35560b57cec5SDimitry Andric extern void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team
35570b57cec5SDimitry Andric #if USE_ITT_BUILD
35580b57cec5SDimitry Andric                                  ,
35590b57cec5SDimitry Andric                                  void *itt_sync_obj
35600b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
35610b57cec5SDimitry Andric                                  ,
35620b57cec5SDimitry Andric                                  int wait = 1);
35630b57cec5SDimitry Andric extern void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
35640b57cec5SDimitry Andric                                   int gtid);
35650b57cec5SDimitry Andric 
35660b57cec5SDimitry Andric extern int __kmp_is_address_mapped(void *addr);
35670b57cec5SDimitry Andric extern kmp_uint64 __kmp_hardware_timestamp(void);
35680b57cec5SDimitry Andric 
35690b57cec5SDimitry Andric #if KMP_OS_UNIX
35700b57cec5SDimitry Andric extern int __kmp_read_from_file(char const *path, char const *format, ...);
35710b57cec5SDimitry Andric #endif
35720b57cec5SDimitry Andric 
35730b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
35740b57cec5SDimitry Andric //
35750b57cec5SDimitry Andric // Assembly routines that have no compiler intrinsic replacement
35760b57cec5SDimitry Andric //
35770b57cec5SDimitry Andric 
35780b57cec5SDimitry Andric extern int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc,
35790b57cec5SDimitry Andric                                   void *argv[]
35800b57cec5SDimitry Andric #if OMPT_SUPPORT
35810b57cec5SDimitry Andric                                   ,
35820b57cec5SDimitry Andric                                   void **exit_frame_ptr
35830b57cec5SDimitry Andric #endif
35840b57cec5SDimitry Andric                                   );
35850b57cec5SDimitry Andric 
35860b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
35870b57cec5SDimitry Andric 
35880b57cec5SDimitry Andric KMP_EXPORT void __kmpc_begin(ident_t *, kmp_int32 flags);
35890b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end(ident_t *);
35900b57cec5SDimitry Andric 
35910b57cec5SDimitry Andric KMP_EXPORT void __kmpc_threadprivate_register_vec(ident_t *, void *data,
35920b57cec5SDimitry Andric                                                   kmpc_ctor_vec ctor,
35930b57cec5SDimitry Andric                                                   kmpc_cctor_vec cctor,
35940b57cec5SDimitry Andric                                                   kmpc_dtor_vec dtor,
35950b57cec5SDimitry Andric                                                   size_t vector_length);
35960b57cec5SDimitry Andric KMP_EXPORT void __kmpc_threadprivate_register(ident_t *, void *data,
35970b57cec5SDimitry Andric                                               kmpc_ctor ctor, kmpc_cctor cctor,
35980b57cec5SDimitry Andric                                               kmpc_dtor dtor);
35990b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_threadprivate(ident_t *, kmp_int32 global_tid,
36000b57cec5SDimitry Andric                                       void *data, size_t size);
36010b57cec5SDimitry Andric 
36020b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_global_thread_num(ident_t *);
36030b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_global_num_threads(ident_t *);
36040b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_bound_thread_num(ident_t *);
36050b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_bound_num_threads(ident_t *);
36060b57cec5SDimitry Andric 
36070b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_ok_to_fork(ident_t *);
36080b57cec5SDimitry Andric KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs,
36090b57cec5SDimitry Andric                                  kmpc_micro microtask, ...);
36100b57cec5SDimitry Andric 
36110b57cec5SDimitry Andric KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid);
36120b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid);
36130b57cec5SDimitry Andric 
36140b57cec5SDimitry Andric KMP_EXPORT void __kmpc_flush(ident_t *);
36150b57cec5SDimitry Andric KMP_EXPORT void __kmpc_barrier(ident_t *, kmp_int32 global_tid);
36160b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
36170b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
36180b57cec5SDimitry Andric KMP_EXPORT void __kmpc_ordered(ident_t *, kmp_int32 global_tid);
36190b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid);
36200b57cec5SDimitry Andric KMP_EXPORT void __kmpc_critical(ident_t *, kmp_int32 global_tid,
36210b57cec5SDimitry Andric                                 kmp_critical_name *);
36220b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_critical(ident_t *, kmp_int32 global_tid,
36230b57cec5SDimitry Andric                                     kmp_critical_name *);
36240b57cec5SDimitry Andric KMP_EXPORT void __kmpc_critical_with_hint(ident_t *, kmp_int32 global_tid,
36250b57cec5SDimitry Andric                                           kmp_critical_name *, uint32_t hint);
36260b57cec5SDimitry Andric 
36270b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_barrier_master(ident_t *, kmp_int32 global_tid);
36280b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid);
36290b57cec5SDimitry Andric 
36300b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait(ident_t *,
36310b57cec5SDimitry Andric                                                   kmp_int32 global_tid);
36320b57cec5SDimitry Andric 
36330b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
36340b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
36350b57cec5SDimitry Andric 
36360b57cec5SDimitry Andric KMP_EXPORT void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid,
36370b57cec5SDimitry Andric                                      kmp_int32 schedtype, kmp_int32 *plastiter,
36380b57cec5SDimitry Andric                                      kmp_int *plower, kmp_int *pupper,
36390b57cec5SDimitry Andric                                      kmp_int *pstride, kmp_int incr,
36400b57cec5SDimitry Andric                                      kmp_int chunk);
36410b57cec5SDimitry Andric 
36420b57cec5SDimitry Andric KMP_EXPORT void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
36430b57cec5SDimitry Andric 
36440b57cec5SDimitry Andric KMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
36450b57cec5SDimitry Andric                                    size_t cpy_size, void *cpy_data,
36460b57cec5SDimitry Andric                                    void (*cpy_func)(void *, void *),
36470b57cec5SDimitry Andric                                    kmp_int32 didit);
36480b57cec5SDimitry Andric 
36490b57cec5SDimitry Andric extern void KMPC_SET_NUM_THREADS(int arg);
36500b57cec5SDimitry Andric extern void KMPC_SET_DYNAMIC(int flag);
36510b57cec5SDimitry Andric extern void KMPC_SET_NESTED(int flag);
36520b57cec5SDimitry Andric 
36530b57cec5SDimitry Andric /* OMP 3.0 tasking interface routines */
36540b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
36550b57cec5SDimitry Andric                                      kmp_task_t *new_task);
36560b57cec5SDimitry Andric KMP_EXPORT kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
36570b57cec5SDimitry Andric                                              kmp_int32 flags,
36580b57cec5SDimitry Andric                                              size_t sizeof_kmp_task_t,
36590b57cec5SDimitry Andric                                              size_t sizeof_shareds,
36600b57cec5SDimitry Andric                                              kmp_routine_entry_t task_entry);
36610b57cec5SDimitry Andric KMP_EXPORT kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
36620b57cec5SDimitry Andric                                                     kmp_int32 flags,
36630b57cec5SDimitry Andric                                                     size_t sizeof_kmp_task_t,
36640b57cec5SDimitry Andric                                                     size_t sizeof_shareds,
36650b57cec5SDimitry Andric                                                     kmp_routine_entry_t task_entry,
36660b57cec5SDimitry Andric                                                     kmp_int64 device_id);
36670b57cec5SDimitry Andric KMP_EXPORT void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
36680b57cec5SDimitry Andric                                           kmp_task_t *task);
36690b57cec5SDimitry Andric KMP_EXPORT void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
36700b57cec5SDimitry Andric                                              kmp_task_t *task);
36710b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
36720b57cec5SDimitry Andric                                            kmp_task_t *new_task);
36730b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid);
36740b57cec5SDimitry Andric 
36750b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid,
36760b57cec5SDimitry Andric                                           int end_part);
36770b57cec5SDimitry Andric 
36780b57cec5SDimitry Andric #if TASK_UNUSED
36790b57cec5SDimitry Andric void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task);
36800b57cec5SDimitry Andric void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
36810b57cec5SDimitry Andric                               kmp_task_t *task);
36820b57cec5SDimitry Andric #endif // TASK_UNUSED
36830b57cec5SDimitry Andric 
36840b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
36850b57cec5SDimitry Andric 
36860b57cec5SDimitry Andric KMP_EXPORT void __kmpc_taskgroup(ident_t *loc, int gtid);
36870b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_taskgroup(ident_t *loc, int gtid);
36880b57cec5SDimitry Andric 
36890b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps(
36900b57cec5SDimitry Andric     ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps,
36910b57cec5SDimitry Andric     kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
36920b57cec5SDimitry Andric     kmp_depend_info_t *noalias_dep_list);
36930b57cec5SDimitry Andric KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid,
36940b57cec5SDimitry Andric                                      kmp_int32 ndeps,
36950b57cec5SDimitry Andric                                      kmp_depend_info_t *dep_list,
36960b57cec5SDimitry Andric                                      kmp_int32 ndeps_noalias,
36970b57cec5SDimitry Andric                                      kmp_depend_info_t *noalias_dep_list);
36980b57cec5SDimitry Andric extern kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
36990b57cec5SDimitry Andric                                 bool serialize_immediate);
37000b57cec5SDimitry Andric 
37010b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid,
37020b57cec5SDimitry Andric                                    kmp_int32 cncl_kind);
37030b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid,
37040b57cec5SDimitry Andric                                               kmp_int32 cncl_kind);
37050b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t *loc_ref, kmp_int32 gtid);
37060b57cec5SDimitry Andric KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind);
37070b57cec5SDimitry Andric 
37080b57cec5SDimitry Andric KMP_EXPORT void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask);
37090b57cec5SDimitry Andric KMP_EXPORT void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask);
37100b57cec5SDimitry Andric KMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task,
37110b57cec5SDimitry Andric                                 kmp_int32 if_val, kmp_uint64 *lb,
37120b57cec5SDimitry Andric                                 kmp_uint64 *ub, kmp_int64 st, kmp_int32 nogroup,
37130b57cec5SDimitry Andric                                 kmp_int32 sched, kmp_uint64 grainsize,
37140b57cec5SDimitry Andric                                 void *task_dup);
37150b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_task_reduction_init(int gtid, int num_data, void *data);
37160b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_taskred_init(int gtid, int num_data, void *data);
37170b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d);
37180b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_task_reduction_modifier_init(ident_t *loc, int gtid,
37190b57cec5SDimitry Andric                                                      int is_ws, int num,
37200b57cec5SDimitry Andric                                                      void *data);
37210b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws,
37220b57cec5SDimitry Andric                                               int num, void *data);
37230b57cec5SDimitry Andric KMP_EXPORT void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
37240b57cec5SDimitry Andric                                                     int is_ws);
37250b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_reg_task_with_affinity(
37260b57cec5SDimitry Andric     ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins,
37270b57cec5SDimitry Andric     kmp_task_affinity_info_t *affin_list);
37280b57cec5SDimitry Andric 
37290b57cec5SDimitry Andric /* Lock interface routines (fast versions with gtid passed in) */
37300b57cec5SDimitry Andric KMP_EXPORT void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid,
37310b57cec5SDimitry Andric                                  void **user_lock);
37320b57cec5SDimitry Andric KMP_EXPORT void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid,
37330b57cec5SDimitry Andric                                       void **user_lock);
37340b57cec5SDimitry Andric KMP_EXPORT void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid,
37350b57cec5SDimitry Andric                                     void **user_lock);
37360b57cec5SDimitry Andric KMP_EXPORT void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid,
37370b57cec5SDimitry Andric                                          void **user_lock);
37380b57cec5SDimitry Andric KMP_EXPORT void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);
37390b57cec5SDimitry Andric KMP_EXPORT void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid,
37400b57cec5SDimitry Andric                                      void **user_lock);
37410b57cec5SDimitry Andric KMP_EXPORT void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid,
37420b57cec5SDimitry Andric                                   void **user_lock);
37430b57cec5SDimitry Andric KMP_EXPORT void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid,
37440b57cec5SDimitry Andric                                        void **user_lock);
37450b57cec5SDimitry Andric KMP_EXPORT int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);
37460b57cec5SDimitry Andric KMP_EXPORT int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid,
37470b57cec5SDimitry Andric                                      void **user_lock);
37480b57cec5SDimitry Andric 
37490b57cec5SDimitry Andric KMP_EXPORT void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid,
37500b57cec5SDimitry Andric                                            void **user_lock, uintptr_t hint);
37510b57cec5SDimitry Andric KMP_EXPORT void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
37520b57cec5SDimitry Andric                                                 void **user_lock,
37530b57cec5SDimitry Andric                                                 uintptr_t hint);
37540b57cec5SDimitry Andric 
37550b57cec5SDimitry Andric /* Interface to fast scalable reduce methods routines */
37560b57cec5SDimitry Andric 
37570b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_reduce_nowait(
37580b57cec5SDimitry Andric     ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
37590b57cec5SDimitry Andric     void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
37600b57cec5SDimitry Andric     kmp_critical_name *lck);
37610b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
37620b57cec5SDimitry Andric                                          kmp_critical_name *lck);
37630b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_reduce(
37640b57cec5SDimitry Andric     ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
37650b57cec5SDimitry Andric     void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
37660b57cec5SDimitry Andric     kmp_critical_name *lck);
37670b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
37680b57cec5SDimitry Andric                                   kmp_critical_name *lck);
37690b57cec5SDimitry Andric 
37700b57cec5SDimitry Andric /* Internal fast reduction routines */
37710b57cec5SDimitry Andric 
37720b57cec5SDimitry Andric extern PACKED_REDUCTION_METHOD_T __kmp_determine_reduction_method(
37730b57cec5SDimitry Andric     ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
37740b57cec5SDimitry Andric     void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
37750b57cec5SDimitry Andric     kmp_critical_name *lck);
37760b57cec5SDimitry Andric 
37770b57cec5SDimitry Andric // this function is for testing set/get/determine reduce method
37780b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmp_get_reduce_method(void);
37790b57cec5SDimitry Andric 
37800b57cec5SDimitry Andric KMP_EXPORT kmp_uint64 __kmpc_get_taskid();
37810b57cec5SDimitry Andric KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid();
37820b57cec5SDimitry Andric 
37830b57cec5SDimitry Andric // C++ port
37840b57cec5SDimitry Andric // missing 'extern "C"' declarations
37850b57cec5SDimitry Andric 
37860b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_in_parallel(ident_t *loc);
37870b57cec5SDimitry Andric KMP_EXPORT void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid);
37880b57cec5SDimitry Andric KMP_EXPORT void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
37890b57cec5SDimitry Andric                                         kmp_int32 num_threads);
37900b57cec5SDimitry Andric 
37910b57cec5SDimitry Andric KMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
37920b57cec5SDimitry Andric                                       int proc_bind);
37930b57cec5SDimitry Andric KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
37940b57cec5SDimitry Andric                                       kmp_int32 num_teams,
37950b57cec5SDimitry Andric                                       kmp_int32 num_threads);
37960b57cec5SDimitry Andric KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,
37970b57cec5SDimitry Andric                                   kmpc_micro microtask, ...);
37980b57cec5SDimitry Andric struct kmp_dim { // loop bounds info casted to kmp_int64
37990b57cec5SDimitry Andric   kmp_int64 lo; // lower
38000b57cec5SDimitry Andric   kmp_int64 up; // upper
38010b57cec5SDimitry Andric   kmp_int64 st; // stride
38020b57cec5SDimitry Andric };
38030b57cec5SDimitry Andric KMP_EXPORT void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
38040b57cec5SDimitry Andric                                      kmp_int32 num_dims,
38050b57cec5SDimitry Andric                                      const struct kmp_dim *dims);
38060b57cec5SDimitry Andric KMP_EXPORT void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid,
38070b57cec5SDimitry Andric                                      const kmp_int64 *vec);
38080b57cec5SDimitry Andric KMP_EXPORT void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid,
38090b57cec5SDimitry Andric                                      const kmp_int64 *vec);
38100b57cec5SDimitry Andric KMP_EXPORT void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
38110b57cec5SDimitry Andric 
38120b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid,
38130b57cec5SDimitry Andric                                              void *data, size_t size,
38140b57cec5SDimitry Andric                                              void ***cache);
38150b57cec5SDimitry Andric 
38160b57cec5SDimitry Andric // Symbols for MS mutual detection.
38170b57cec5SDimitry Andric extern int _You_must_link_with_exactly_one_OpenMP_library;
38180b57cec5SDimitry Andric extern int _You_must_link_with_Intel_OpenMP_library;
38190b57cec5SDimitry Andric #if KMP_OS_WINDOWS && (KMP_VERSION_MAJOR > 4)
38200b57cec5SDimitry Andric extern int _You_must_link_with_Microsoft_OpenMP_library;
38210b57cec5SDimitry Andric #endif
38220b57cec5SDimitry Andric 
38230b57cec5SDimitry Andric // The routines below are not exported.
38240b57cec5SDimitry Andric // Consider making them 'static' in corresponding source files.
38250b57cec5SDimitry Andric void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
38260b57cec5SDimitry Andric                                            void *data_addr, size_t pc_size);
38270b57cec5SDimitry Andric struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
38280b57cec5SDimitry Andric                                                 void *data_addr,
38290b57cec5SDimitry Andric                                                 size_t pc_size);
38300b57cec5SDimitry Andric void __kmp_threadprivate_resize_cache(int newCapacity);
38310b57cec5SDimitry Andric void __kmp_cleanup_threadprivate_caches();
38320b57cec5SDimitry Andric 
38330b57cec5SDimitry Andric // ompc_, kmpc_ entries moved from omp.h.
38340b57cec5SDimitry Andric #if KMP_OS_WINDOWS
38350b57cec5SDimitry Andric #define KMPC_CONVENTION __cdecl
38360b57cec5SDimitry Andric #else
38370b57cec5SDimitry Andric #define KMPC_CONVENTION
38380b57cec5SDimitry Andric #endif
38390b57cec5SDimitry Andric 
38400b57cec5SDimitry Andric #ifndef __OMP_H
38410b57cec5SDimitry Andric typedef enum omp_sched_t {
38420b57cec5SDimitry Andric   omp_sched_static = 1,
38430b57cec5SDimitry Andric   omp_sched_dynamic = 2,
38440b57cec5SDimitry Andric   omp_sched_guided = 3,
38450b57cec5SDimitry Andric   omp_sched_auto = 4
38460b57cec5SDimitry Andric } omp_sched_t;
38470b57cec5SDimitry Andric typedef void *kmp_affinity_mask_t;
38480b57cec5SDimitry Andric #endif
38490b57cec5SDimitry Andric 
38500b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int);
38510b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int);
38520b57cec5SDimitry Andric KMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int);
38530b57cec5SDimitry Andric KMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int);
38540b57cec5SDimitry Andric KMP_EXPORT int KMPC_CONVENTION
38550b57cec5SDimitry Andric kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *);
38560b57cec5SDimitry Andric KMP_EXPORT int KMPC_CONVENTION
38570b57cec5SDimitry Andric kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *);
38580b57cec5SDimitry Andric KMP_EXPORT int KMPC_CONVENTION
38590b57cec5SDimitry Andric kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *);
38600b57cec5SDimitry Andric 
38610b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int);
38620b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t);
38630b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int);
38640b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *);
38650b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION kmpc_set_disp_num_buffers(int);
38660b57cec5SDimitry Andric 
38670b57cec5SDimitry Andric enum kmp_target_offload_kind {
38680b57cec5SDimitry Andric   tgt_disabled = 0,
38690b57cec5SDimitry Andric   tgt_default = 1,
38700b57cec5SDimitry Andric   tgt_mandatory = 2
38710b57cec5SDimitry Andric };
38720b57cec5SDimitry Andric typedef enum kmp_target_offload_kind kmp_target_offload_kind_t;
38730b57cec5SDimitry Andric // Set via OMP_TARGET_OFFLOAD if specified, defaults to tgt_default otherwise
38740b57cec5SDimitry Andric extern kmp_target_offload_kind_t __kmp_target_offload;
38750b57cec5SDimitry Andric extern int __kmpc_get_target_offload();
38760b57cec5SDimitry Andric 
38770b57cec5SDimitry Andric // Constants used in libomptarget
38780b57cec5SDimitry Andric #define KMP_DEVICE_DEFAULT -1 // This is libomptarget's default device.
38790b57cec5SDimitry Andric #define KMP_HOST_DEVICE -10 // This is what it is in libomptarget, go figure.
38800b57cec5SDimitry Andric #define KMP_DEVICE_ALL -11 // This is libomptarget's "all devices".
38810b57cec5SDimitry Andric 
38820b57cec5SDimitry Andric // OMP Pause Resource
38830b57cec5SDimitry Andric 
38840b57cec5SDimitry Andric // The following enum is used both to set the status in __kmp_pause_status, and
38850b57cec5SDimitry Andric // as the internal equivalent of the externally-visible omp_pause_resource_t.
38860b57cec5SDimitry Andric typedef enum kmp_pause_status_t {
38870b57cec5SDimitry Andric   kmp_not_paused = 0, // status is not paused, or, requesting resume
38880b57cec5SDimitry Andric   kmp_soft_paused = 1, // status is soft-paused, or, requesting soft pause
38890b57cec5SDimitry Andric   kmp_hard_paused = 2 // status is hard-paused, or, requesting hard pause
38900b57cec5SDimitry Andric } kmp_pause_status_t;
38910b57cec5SDimitry Andric 
38920b57cec5SDimitry Andric // This stores the pause state of the runtime
38930b57cec5SDimitry Andric extern kmp_pause_status_t __kmp_pause_status;
38940b57cec5SDimitry Andric extern int __kmpc_pause_resource(kmp_pause_status_t level);
38950b57cec5SDimitry Andric extern int __kmp_pause_resource(kmp_pause_status_t level);
38960b57cec5SDimitry Andric // Soft resume sets __kmp_pause_status, and wakes up all threads.
38970b57cec5SDimitry Andric extern void __kmp_resume_if_soft_paused();
38980b57cec5SDimitry Andric // Hard resume simply resets the status to not paused. Library will appear to
38990b57cec5SDimitry Andric // be uninitialized after hard pause. Let OMP constructs trigger required
39000b57cec5SDimitry Andric // initializations.
39010b57cec5SDimitry Andric static inline void __kmp_resume_if_hard_paused() {
39020b57cec5SDimitry Andric   if (__kmp_pause_status == kmp_hard_paused) {
39030b57cec5SDimitry Andric     __kmp_pause_status = kmp_not_paused;
39040b57cec5SDimitry Andric   }
39050b57cec5SDimitry Andric }
39060b57cec5SDimitry Andric 
3907*5ffd83dbSDimitry Andric extern void __kmp_omp_display_env(int verbose);
3908*5ffd83dbSDimitry Andric 
39090b57cec5SDimitry Andric #ifdef __cplusplus
39100b57cec5SDimitry Andric }
39110b57cec5SDimitry Andric #endif
39120b57cec5SDimitry Andric 
39130b57cec5SDimitry Andric #endif /* KMP_H */
3914