10b57cec5SDimitry Andric /*! \file */
20b57cec5SDimitry Andric /*
30b57cec5SDimitry Andric * kmp.h -- KPTS runtime header file.
40b57cec5SDimitry Andric */
50b57cec5SDimitry Andric
60b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
70b57cec5SDimitry Andric //
80b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
90b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
100b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric
140b57cec5SDimitry Andric #ifndef KMP_H
150b57cec5SDimitry Andric #define KMP_H
160b57cec5SDimitry Andric
170b57cec5SDimitry Andric #include "kmp_config.h"
180b57cec5SDimitry Andric
190b57cec5SDimitry Andric /* #define BUILD_PARALLEL_ORDERED 1 */
200b57cec5SDimitry Andric
210b57cec5SDimitry Andric /* This fix replaces gettimeofday with clock_gettime for better scalability on
220b57cec5SDimitry Andric the Altix. Requires user code to be linked with -lrt. */
230b57cec5SDimitry Andric //#define FIX_SGI_CLOCK
240b57cec5SDimitry Andric
250b57cec5SDimitry Andric /* Defines for OpenMP 3.0 tasking and auto scheduling */
260b57cec5SDimitry Andric
270b57cec5SDimitry Andric #ifndef KMP_STATIC_STEAL_ENABLED
280b57cec5SDimitry Andric #define KMP_STATIC_STEAL_ENABLED 1
290b57cec5SDimitry Andric #endif
305f757f3fSDimitry Andric #define KMP_WEIGHTED_ITERATIONS_SUPPORTED \
315f757f3fSDimitry Andric (KMP_AFFINITY_SUPPORTED && KMP_STATIC_STEAL_ENABLED && \
325f757f3fSDimitry Andric (KMP_ARCH_X86 || KMP_ARCH_X86_64))
330b57cec5SDimitry Andric
340b57cec5SDimitry Andric #define TASK_CURRENT_NOT_QUEUED 0
350b57cec5SDimitry Andric #define TASK_CURRENT_QUEUED 1
360b57cec5SDimitry Andric
370b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK
380b57cec5SDimitry Andric #define TASK_STACK_EMPTY 0 // entries when the stack is empty
390b57cec5SDimitry Andric #define TASK_STACK_BLOCK_BITS 5 // Used in TASK_STACK_SIZE and TASK_STACK_MASK
400b57cec5SDimitry Andric // Number of entries in each task stack array
410b57cec5SDimitry Andric #define TASK_STACK_BLOCK_SIZE (1 << TASK_STACK_BLOCK_BITS)
420b57cec5SDimitry Andric // Mask for determining index into stack block
430b57cec5SDimitry Andric #define TASK_STACK_INDEX_MASK (TASK_STACK_BLOCK_SIZE - 1)
440b57cec5SDimitry Andric #endif // BUILD_TIED_TASK_STACK
450b57cec5SDimitry Andric
460b57cec5SDimitry Andric #define TASK_NOT_PUSHED 1
470b57cec5SDimitry Andric #define TASK_SUCCESSFULLY_PUSHED 0
480b57cec5SDimitry Andric #define TASK_TIED 1
490b57cec5SDimitry Andric #define TASK_UNTIED 0
500b57cec5SDimitry Andric #define TASK_EXPLICIT 1
510b57cec5SDimitry Andric #define TASK_IMPLICIT 0
520b57cec5SDimitry Andric #define TASK_PROXY 1
530b57cec5SDimitry Andric #define TASK_FULL 0
540b57cec5SDimitry Andric #define TASK_DETACHABLE 1
550b57cec5SDimitry Andric #define TASK_UNDETACHABLE 0
560b57cec5SDimitry Andric
570b57cec5SDimitry Andric #define KMP_CANCEL_THREADS
580b57cec5SDimitry Andric #define KMP_THREAD_ATTR
590b57cec5SDimitry Andric
600b57cec5SDimitry Andric // Android does not have pthread_cancel. Undefine KMP_CANCEL_THREADS if being
610b57cec5SDimitry Andric // built on Android
620b57cec5SDimitry Andric #if defined(__ANDROID__)
630b57cec5SDimitry Andric #undef KMP_CANCEL_THREADS
640b57cec5SDimitry Andric #endif
650b57cec5SDimitry Andric
665f757f3fSDimitry Andric // Some WASI targets (e.g., wasm32-wasi-threads) do not support thread
675f757f3fSDimitry Andric // cancellation.
685f757f3fSDimitry Andric #if KMP_OS_WASI
695f757f3fSDimitry Andric #undef KMP_CANCEL_THREADS
705f757f3fSDimitry Andric #endif
715f757f3fSDimitry Andric
725f757f3fSDimitry Andric #if !KMP_OS_WASI
730b57cec5SDimitry Andric #include <signal.h>
745f757f3fSDimitry Andric #endif
750b57cec5SDimitry Andric #include <stdarg.h>
760b57cec5SDimitry Andric #include <stddef.h>
770b57cec5SDimitry Andric #include <stdio.h>
780b57cec5SDimitry Andric #include <stdlib.h>
790b57cec5SDimitry Andric #include <string.h>
80e8d8bef9SDimitry Andric #include <limits>
81e8d8bef9SDimitry Andric #include <type_traits>
820b57cec5SDimitry Andric /* include <ctype.h> don't use; problems with /MD on Windows* OS NT due to bad
830b57cec5SDimitry Andric Microsoft library. Some macros provided below to replace these functions */
840b57cec5SDimitry Andric #ifndef __ABSOFT_WIN
850b57cec5SDimitry Andric #include <sys/types.h>
860b57cec5SDimitry Andric #endif
870b57cec5SDimitry Andric #include <limits.h>
880b57cec5SDimitry Andric #include <time.h>
890b57cec5SDimitry Andric
900b57cec5SDimitry Andric #include <errno.h>
910b57cec5SDimitry Andric
920b57cec5SDimitry Andric #include "kmp_os.h"
930b57cec5SDimitry Andric
940b57cec5SDimitry Andric #include "kmp_safe_c_api.h"
950b57cec5SDimitry Andric
960b57cec5SDimitry Andric #if KMP_STATS_ENABLED
970b57cec5SDimitry Andric class kmp_stats_list;
980b57cec5SDimitry Andric #endif
990b57cec5SDimitry Andric
1000b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
1010b57cec5SDimitry Andric // Only include hierarchical scheduling if affinity is supported
1020b57cec5SDimitry Andric #undef KMP_USE_HIER_SCHED
1030b57cec5SDimitry Andric #define KMP_USE_HIER_SCHED KMP_AFFINITY_SUPPORTED
1040b57cec5SDimitry Andric #endif
1050b57cec5SDimitry Andric
106*0fca6ea1SDimitry Andric // OMPD_SKIP_HWLOC used in libompd/omp-icv.cpp to avoid OMPD depending on hwloc
107*0fca6ea1SDimitry Andric #if KMP_USE_HWLOC && KMP_AFFINITY_SUPPORTED && !defined(OMPD_SKIP_HWLOC)
1080b57cec5SDimitry Andric #include "hwloc.h"
1090b57cec5SDimitry Andric #ifndef HWLOC_OBJ_NUMANODE
1100b57cec5SDimitry Andric #define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
1110b57cec5SDimitry Andric #endif
1120b57cec5SDimitry Andric #ifndef HWLOC_OBJ_PACKAGE
1130b57cec5SDimitry Andric #define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET
1140b57cec5SDimitry Andric #endif
1150b57cec5SDimitry Andric #endif
1160b57cec5SDimitry Andric
1170b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1180b57cec5SDimitry Andric #include <xmmintrin.h>
1190b57cec5SDimitry Andric #endif
1200b57cec5SDimitry Andric
12181ad6265SDimitry Andric // The below has to be defined before including "kmp_barrier.h".
12281ad6265SDimitry Andric #define KMP_INTERNAL_MALLOC(sz) malloc(sz)
12381ad6265SDimitry Andric #define KMP_INTERNAL_FREE(p) free(p)
12481ad6265SDimitry Andric #define KMP_INTERNAL_REALLOC(p, sz) realloc((p), (sz))
12581ad6265SDimitry Andric #define KMP_INTERNAL_CALLOC(n, sz) calloc((n), (sz))
12681ad6265SDimitry Andric
1270b57cec5SDimitry Andric #include "kmp_debug.h"
1280b57cec5SDimitry Andric #include "kmp_lock.h"
1290b57cec5SDimitry Andric #include "kmp_version.h"
130349cc55cSDimitry Andric #include "kmp_barrier.h"
1310b57cec5SDimitry Andric #if USE_DEBUGGER
1320b57cec5SDimitry Andric #include "kmp_debugger.h"
1330b57cec5SDimitry Andric #endif
1340b57cec5SDimitry Andric #include "kmp_i18n.h"
1350b57cec5SDimitry Andric
1365f757f3fSDimitry Andric #define KMP_HANDLE_SIGNALS ((KMP_OS_UNIX && !KMP_OS_WASI) || KMP_OS_WINDOWS)
1370b57cec5SDimitry Andric
1380b57cec5SDimitry Andric #include "kmp_wrapper_malloc.h"
1390b57cec5SDimitry Andric #if KMP_OS_UNIX
1400b57cec5SDimitry Andric #include <unistd.h>
1410b57cec5SDimitry Andric #if !defined NSIG && defined _NSIG
1420b57cec5SDimitry Andric #define NSIG _NSIG
1430b57cec5SDimitry Andric #endif
1440b57cec5SDimitry Andric #endif
1450b57cec5SDimitry Andric
1460b57cec5SDimitry Andric #if KMP_OS_LINUX
1470b57cec5SDimitry Andric #pragma weak clock_gettime
1480b57cec5SDimitry Andric #endif
1490b57cec5SDimitry Andric
1500b57cec5SDimitry Andric #if OMPT_SUPPORT
1510b57cec5SDimitry Andric #include "ompt-internal.h"
1520b57cec5SDimitry Andric #endif
1530b57cec5SDimitry Andric
154fe6060f1SDimitry Andric #if OMPD_SUPPORT
155fe6060f1SDimitry Andric #include "ompd-specific.h"
156fe6060f1SDimitry Andric #endif
157fe6060f1SDimitry Andric
158e8d8bef9SDimitry Andric #ifndef UNLIKELY
159e8d8bef9SDimitry Andric #define UNLIKELY(x) (x)
160e8d8bef9SDimitry Andric #endif
161e8d8bef9SDimitry Andric
1620b57cec5SDimitry Andric // Affinity format function
1630b57cec5SDimitry Andric #include "kmp_str.h"
1640b57cec5SDimitry Andric
1650b57cec5SDimitry Andric // 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64.
1660b57cec5SDimitry Andric // 3 - fast allocation using sync, non-sync free lists of any size, non-self
1670b57cec5SDimitry Andric // free lists of limited size.
1680b57cec5SDimitry Andric #ifndef USE_FAST_MEMORY
1690b57cec5SDimitry Andric #define USE_FAST_MEMORY 3
1700b57cec5SDimitry Andric #endif
1710b57cec5SDimitry Andric
1720b57cec5SDimitry Andric #ifndef KMP_NESTED_HOT_TEAMS
1730b57cec5SDimitry Andric #define KMP_NESTED_HOT_TEAMS 0
1740b57cec5SDimitry Andric #define USE_NESTED_HOT_ARG(x)
1750b57cec5SDimitry Andric #else
1760b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS
1770b57cec5SDimitry Andric #define USE_NESTED_HOT_ARG(x) , x
1780b57cec5SDimitry Andric #else
1790b57cec5SDimitry Andric #define USE_NESTED_HOT_ARG(x)
1800b57cec5SDimitry Andric #endif
1810b57cec5SDimitry Andric #endif
1820b57cec5SDimitry Andric
1830b57cec5SDimitry Andric // Assume using BGET compare_exchange instruction instead of lock by default.
1840b57cec5SDimitry Andric #ifndef USE_CMP_XCHG_FOR_BGET
1850b57cec5SDimitry Andric #define USE_CMP_XCHG_FOR_BGET 1
1860b57cec5SDimitry Andric #endif
1870b57cec5SDimitry Andric
1880b57cec5SDimitry Andric // Test to see if queuing lock is better than bootstrap lock for bget
1890b57cec5SDimitry Andric // #ifndef USE_QUEUING_LOCK_FOR_BGET
1900b57cec5SDimitry Andric // #define USE_QUEUING_LOCK_FOR_BGET
1910b57cec5SDimitry Andric // #endif
1920b57cec5SDimitry Andric
1930b57cec5SDimitry Andric #define KMP_NSEC_PER_SEC 1000000000L
1940b57cec5SDimitry Andric #define KMP_USEC_PER_SEC 1000000L
1955f757f3fSDimitry Andric #define KMP_NSEC_PER_USEC 1000L
1960b57cec5SDimitry Andric
1970b57cec5SDimitry Andric /*!
1980b57cec5SDimitry Andric @ingroup BASIC_TYPES
1990b57cec5SDimitry Andric @{
2000b57cec5SDimitry Andric */
2010b57cec5SDimitry Andric
2020b57cec5SDimitry Andric /*!
2030b57cec5SDimitry Andric Values for bit flags used in the ident_t to describe the fields.
2040b57cec5SDimitry Andric */
2050b57cec5SDimitry Andric enum {
2060b57cec5SDimitry Andric /*! Use trampoline for internal microtasks */
2070b57cec5SDimitry Andric KMP_IDENT_IMB = 0x01,
2080b57cec5SDimitry Andric /*! Use c-style ident structure */
2090b57cec5SDimitry Andric KMP_IDENT_KMPC = 0x02,
2100b57cec5SDimitry Andric /* 0x04 is no longer used */
2110b57cec5SDimitry Andric /*! Entry point generated by auto-parallelization */
2120b57cec5SDimitry Andric KMP_IDENT_AUTOPAR = 0x08,
2130b57cec5SDimitry Andric /*! Compiler generates atomic reduction option for kmpc_reduce* */
2140b57cec5SDimitry Andric KMP_IDENT_ATOMIC_REDUCE = 0x10,
2150b57cec5SDimitry Andric /*! To mark a 'barrier' directive in user code */
2160b57cec5SDimitry Andric KMP_IDENT_BARRIER_EXPL = 0x20,
2170b57cec5SDimitry Andric /*! To Mark implicit barriers. */
2180b57cec5SDimitry Andric KMP_IDENT_BARRIER_IMPL = 0x0040,
2190b57cec5SDimitry Andric KMP_IDENT_BARRIER_IMPL_MASK = 0x01C0,
2200b57cec5SDimitry Andric KMP_IDENT_BARRIER_IMPL_FOR = 0x0040,
2210b57cec5SDimitry Andric KMP_IDENT_BARRIER_IMPL_SECTIONS = 0x00C0,
2220b57cec5SDimitry Andric
2230b57cec5SDimitry Andric KMP_IDENT_BARRIER_IMPL_SINGLE = 0x0140,
2240b57cec5SDimitry Andric KMP_IDENT_BARRIER_IMPL_WORKSHARE = 0x01C0,
2250b57cec5SDimitry Andric
2260b57cec5SDimitry Andric /*! To mark a static loop in OMPT callbacks */
2270b57cec5SDimitry Andric KMP_IDENT_WORK_LOOP = 0x200,
2280b57cec5SDimitry Andric /*! To mark a sections directive in OMPT callbacks */
2290b57cec5SDimitry Andric KMP_IDENT_WORK_SECTIONS = 0x400,
230480093f4SDimitry Andric /*! To mark a distribute construct in OMPT callbacks */
2310b57cec5SDimitry Andric KMP_IDENT_WORK_DISTRIBUTE = 0x800,
2320b57cec5SDimitry Andric /*! Atomic hint; bottom four bits as omp_sync_hint_t. Top four reserved and
2330b57cec5SDimitry Andric not currently used. If one day we need more bits, then we can use
2340b57cec5SDimitry Andric an invalid combination of hints to mean that another, larger field
2350b57cec5SDimitry Andric should be used in a different flag. */
2360b57cec5SDimitry Andric KMP_IDENT_ATOMIC_HINT_MASK = 0xFF0000,
2370b57cec5SDimitry Andric KMP_IDENT_ATOMIC_HINT_UNCONTENDED = 0x010000,
2380b57cec5SDimitry Andric KMP_IDENT_ATOMIC_HINT_CONTENDED = 0x020000,
2390b57cec5SDimitry Andric KMP_IDENT_ATOMIC_HINT_NONSPECULATIVE = 0x040000,
2400b57cec5SDimitry Andric KMP_IDENT_ATOMIC_HINT_SPECULATIVE = 0x080000,
241e8d8bef9SDimitry Andric KMP_IDENT_OPENMP_SPEC_VERSION_MASK = 0xFF000000
2420b57cec5SDimitry Andric };
2430b57cec5SDimitry Andric
2440b57cec5SDimitry Andric /*!
2450b57cec5SDimitry Andric * The ident structure that describes a source location.
2460b57cec5SDimitry Andric */
2470b57cec5SDimitry Andric typedef struct ident {
2480b57cec5SDimitry Andric kmp_int32 reserved_1; /**< might be used in Fortran; see above */
2490b57cec5SDimitry Andric kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC
2500b57cec5SDimitry Andric identifies this union member */
2510b57cec5SDimitry Andric kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */
2520b57cec5SDimitry Andric #if USE_ITT_BUILD
2530b57cec5SDimitry Andric /* but currently used for storing region-specific ITT */
2540b57cec5SDimitry Andric /* contextual information. */
2550b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
2560b57cec5SDimitry Andric kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */
2570b57cec5SDimitry Andric char const *psource; /**< String describing the source location.
2580b57cec5SDimitry Andric The string is composed of semi-colon separated fields
2590b57cec5SDimitry Andric which describe the source file, the function and a pair
2600b57cec5SDimitry Andric of line numbers that delimit the construct. */
261e8d8bef9SDimitry Andric // Returns the OpenMP version in form major*10+minor (e.g., 50 for 5.0)
get_openmp_versionident262e8d8bef9SDimitry Andric kmp_int32 get_openmp_version() {
263e8d8bef9SDimitry Andric return (((flags & KMP_IDENT_OPENMP_SPEC_VERSION_MASK) >> 24) & 0xFF);
264e8d8bef9SDimitry Andric }
2650b57cec5SDimitry Andric } ident_t;
2660b57cec5SDimitry Andric /*!
2670b57cec5SDimitry Andric @}
2680b57cec5SDimitry Andric */
2690b57cec5SDimitry Andric
2700b57cec5SDimitry Andric // Some forward declarations.
2710b57cec5SDimitry Andric typedef union kmp_team kmp_team_t;
2720b57cec5SDimitry Andric typedef struct kmp_taskdata kmp_taskdata_t;
2730b57cec5SDimitry Andric typedef union kmp_task_team kmp_task_team_t;
2740b57cec5SDimitry Andric typedef union kmp_team kmp_team_p;
2750b57cec5SDimitry Andric typedef union kmp_info kmp_info_p;
2760b57cec5SDimitry Andric typedef union kmp_root kmp_root_p;
2770b57cec5SDimitry Andric
278e8d8bef9SDimitry Andric template <bool C = false, bool S = true> class kmp_flag_32;
279e8d8bef9SDimitry Andric template <bool C = false, bool S = true> class kmp_flag_64;
280349cc55cSDimitry Andric template <bool C = false, bool S = true> class kmp_atomic_flag_64;
281e8d8bef9SDimitry Andric class kmp_flag_oncore;
282e8d8bef9SDimitry Andric
2830b57cec5SDimitry Andric #ifdef __cplusplus
2840b57cec5SDimitry Andric extern "C" {
2850b57cec5SDimitry Andric #endif
2860b57cec5SDimitry Andric
2870b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
2880b57cec5SDimitry Andric
2890b57cec5SDimitry Andric /* Pack two 32-bit signed integers into a 64-bit signed integer */
2900b57cec5SDimitry Andric /* ToDo: Fix word ordering for big-endian machines. */
2910b57cec5SDimitry Andric #define KMP_PACK_64(HIGH_32, LOW_32) \
2920b57cec5SDimitry Andric ((kmp_int64)((((kmp_uint64)(HIGH_32)) << 32) | (kmp_uint64)(LOW_32)))
2930b57cec5SDimitry Andric
2940b57cec5SDimitry Andric // Generic string manipulation macros. Assume that _x is of type char *
2950b57cec5SDimitry Andric #define SKIP_WS(_x) \
2960b57cec5SDimitry Andric { \
2970b57cec5SDimitry Andric while (*(_x) == ' ' || *(_x) == '\t') \
2980b57cec5SDimitry Andric (_x)++; \
2990b57cec5SDimitry Andric }
3000b57cec5SDimitry Andric #define SKIP_DIGITS(_x) \
3010b57cec5SDimitry Andric { \
3020b57cec5SDimitry Andric while (*(_x) >= '0' && *(_x) <= '9') \
3030b57cec5SDimitry Andric (_x)++; \
3040b57cec5SDimitry Andric }
3050b57cec5SDimitry Andric #define SKIP_TOKEN(_x) \
3060b57cec5SDimitry Andric { \
3070b57cec5SDimitry Andric while ((*(_x) >= '0' && *(_x) <= '9') || (*(_x) >= 'a' && *(_x) <= 'z') || \
3080b57cec5SDimitry Andric (*(_x) >= 'A' && *(_x) <= 'Z') || *(_x) == '_') \
3090b57cec5SDimitry Andric (_x)++; \
3100b57cec5SDimitry Andric }
3110b57cec5SDimitry Andric #define SKIP_TO(_x, _c) \
3120b57cec5SDimitry Andric { \
3130b57cec5SDimitry Andric while (*(_x) != '\0' && *(_x) != (_c)) \
3140b57cec5SDimitry Andric (_x)++; \
3150b57cec5SDimitry Andric }
3160b57cec5SDimitry Andric
3170b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
3180b57cec5SDimitry Andric
3190b57cec5SDimitry Andric #define KMP_MAX(x, y) ((x) > (y) ? (x) : (y))
3200b57cec5SDimitry Andric #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
3210b57cec5SDimitry Andric
3220b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
3230b57cec5SDimitry Andric /* Enumeration types */
3240b57cec5SDimitry Andric
3250b57cec5SDimitry Andric enum kmp_state_timer {
3260b57cec5SDimitry Andric ts_stop,
3270b57cec5SDimitry Andric ts_start,
3280b57cec5SDimitry Andric ts_pause,
3290b57cec5SDimitry Andric
3300b57cec5SDimitry Andric ts_last_state
3310b57cec5SDimitry Andric };
3320b57cec5SDimitry Andric
3330b57cec5SDimitry Andric enum dynamic_mode {
3340b57cec5SDimitry Andric dynamic_default,
3350b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE
3360b57cec5SDimitry Andric dynamic_load_balance,
3370b57cec5SDimitry Andric #endif /* USE_LOAD_BALANCE */
3380b57cec5SDimitry Andric dynamic_random,
3390b57cec5SDimitry Andric dynamic_thread_limit,
3400b57cec5SDimitry Andric dynamic_max
3410b57cec5SDimitry Andric };
3420b57cec5SDimitry Andric
3430b57cec5SDimitry Andric /* external schedule constants, duplicate enum omp_sched in omp.h in order to
3440b57cec5SDimitry Andric * not include it here */
3450b57cec5SDimitry Andric #ifndef KMP_SCHED_TYPE_DEFINED
3460b57cec5SDimitry Andric #define KMP_SCHED_TYPE_DEFINED
3470b57cec5SDimitry Andric typedef enum kmp_sched {
3480b57cec5SDimitry Andric kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check
3490b57cec5SDimitry Andric // Note: need to adjust __kmp_sch_map global array in case enum is changed
3500b57cec5SDimitry Andric kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33)
3510b57cec5SDimitry Andric kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35)
3520b57cec5SDimitry Andric kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36)
3530b57cec5SDimitry Andric kmp_sched_auto = 4, // mapped to kmp_sch_auto (38)
3540b57cec5SDimitry Andric kmp_sched_upper_std = 5, // upper bound for standard schedules
3550b57cec5SDimitry Andric kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules
3560b57cec5SDimitry Andric kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39)
3570b57cec5SDimitry Andric #if KMP_STATIC_STEAL_ENABLED
3580b57cec5SDimitry Andric kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44)
3590b57cec5SDimitry Andric #endif
3600b57cec5SDimitry Andric kmp_sched_upper,
3610b57cec5SDimitry Andric kmp_sched_default = kmp_sched_static, // default scheduling
3620b57cec5SDimitry Andric kmp_sched_monotonic = 0x80000000
3630b57cec5SDimitry Andric } kmp_sched_t;
3640b57cec5SDimitry Andric #endif
3650b57cec5SDimitry Andric
3660b57cec5SDimitry Andric /*!
3670b57cec5SDimitry Andric @ingroup WORK_SHARING
3680b57cec5SDimitry Andric * Describes the loop schedule to be used for a parallel for loop.
3690b57cec5SDimitry Andric */
3700b57cec5SDimitry Andric enum sched_type : kmp_int32 {
3710b57cec5SDimitry Andric kmp_sch_lower = 32, /**< lower bound for unordered values */
3720b57cec5SDimitry Andric kmp_sch_static_chunked = 33,
3730b57cec5SDimitry Andric kmp_sch_static = 34, /**< static unspecialized */
3740b57cec5SDimitry Andric kmp_sch_dynamic_chunked = 35,
3750b57cec5SDimitry Andric kmp_sch_guided_chunked = 36, /**< guided unspecialized */
3760b57cec5SDimitry Andric kmp_sch_runtime = 37,
3770b57cec5SDimitry Andric kmp_sch_auto = 38, /**< auto */
3780b57cec5SDimitry Andric kmp_sch_trapezoidal = 39,
3790b57cec5SDimitry Andric
3800b57cec5SDimitry Andric /* accessible only through KMP_SCHEDULE environment variable */
3810b57cec5SDimitry Andric kmp_sch_static_greedy = 40,
3820b57cec5SDimitry Andric kmp_sch_static_balanced = 41,
3830b57cec5SDimitry Andric /* accessible only through KMP_SCHEDULE environment variable */
3840b57cec5SDimitry Andric kmp_sch_guided_iterative_chunked = 42,
3850b57cec5SDimitry Andric kmp_sch_guided_analytical_chunked = 43,
3860b57cec5SDimitry Andric /* accessible only through KMP_SCHEDULE environment variable */
3870b57cec5SDimitry Andric kmp_sch_static_steal = 44,
3880b57cec5SDimitry Andric
3890b57cec5SDimitry Andric /* static with chunk adjustment (e.g., simd) */
3900b57cec5SDimitry Andric kmp_sch_static_balanced_chunked = 45,
3910b57cec5SDimitry Andric kmp_sch_guided_simd = 46, /**< guided with chunk adjustment */
3920b57cec5SDimitry Andric kmp_sch_runtime_simd = 47, /**< runtime with chunk adjustment */
3930b57cec5SDimitry Andric
3940b57cec5SDimitry Andric /* accessible only through KMP_SCHEDULE environment variable */
3950b57cec5SDimitry Andric kmp_sch_upper, /**< upper bound for unordered values */
3960b57cec5SDimitry Andric
3970b57cec5SDimitry Andric kmp_ord_lower = 64, /**< lower bound for ordered values, must be power of 2 */
3980b57cec5SDimitry Andric kmp_ord_static_chunked = 65,
3990b57cec5SDimitry Andric kmp_ord_static = 66, /**< ordered static unspecialized */
4000b57cec5SDimitry Andric kmp_ord_dynamic_chunked = 67,
4010b57cec5SDimitry Andric kmp_ord_guided_chunked = 68,
4020b57cec5SDimitry Andric kmp_ord_runtime = 69,
4030b57cec5SDimitry Andric kmp_ord_auto = 70, /**< ordered auto */
4040b57cec5SDimitry Andric kmp_ord_trapezoidal = 71,
4050b57cec5SDimitry Andric kmp_ord_upper, /**< upper bound for ordered values */
4060b57cec5SDimitry Andric
4070b57cec5SDimitry Andric /* Schedules for Distribute construct */
4080b57cec5SDimitry Andric kmp_distribute_static_chunked = 91, /**< distribute static chunked */
4090b57cec5SDimitry Andric kmp_distribute_static = 92, /**< distribute static unspecialized */
4100b57cec5SDimitry Andric
4110b57cec5SDimitry Andric /* For the "nomerge" versions, kmp_dispatch_next*() will always return a
4120b57cec5SDimitry Andric single iteration/chunk, even if the loop is serialized. For the schedule
4130b57cec5SDimitry Andric types listed above, the entire iteration vector is returned if the loop is
4140b57cec5SDimitry Andric serialized. This doesn't work for gcc/gcomp sections. */
4150b57cec5SDimitry Andric kmp_nm_lower = 160, /**< lower bound for nomerge values */
4160b57cec5SDimitry Andric
4170b57cec5SDimitry Andric kmp_nm_static_chunked =
4180b57cec5SDimitry Andric (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower),
4190b57cec5SDimitry Andric kmp_nm_static = 162, /**< static unspecialized */
4200b57cec5SDimitry Andric kmp_nm_dynamic_chunked = 163,
4210b57cec5SDimitry Andric kmp_nm_guided_chunked = 164, /**< guided unspecialized */
4220b57cec5SDimitry Andric kmp_nm_runtime = 165,
4230b57cec5SDimitry Andric kmp_nm_auto = 166, /**< auto */
4240b57cec5SDimitry Andric kmp_nm_trapezoidal = 167,
4250b57cec5SDimitry Andric
4260b57cec5SDimitry Andric /* accessible only through KMP_SCHEDULE environment variable */
4270b57cec5SDimitry Andric kmp_nm_static_greedy = 168,
4280b57cec5SDimitry Andric kmp_nm_static_balanced = 169,
4290b57cec5SDimitry Andric /* accessible only through KMP_SCHEDULE environment variable */
4300b57cec5SDimitry Andric kmp_nm_guided_iterative_chunked = 170,
4310b57cec5SDimitry Andric kmp_nm_guided_analytical_chunked = 171,
4320b57cec5SDimitry Andric kmp_nm_static_steal =
4330b57cec5SDimitry Andric 172, /* accessible only through OMP_SCHEDULE environment variable */
4340b57cec5SDimitry Andric
4350b57cec5SDimitry Andric kmp_nm_ord_static_chunked = 193,
4360b57cec5SDimitry Andric kmp_nm_ord_static = 194, /**< ordered static unspecialized */
4370b57cec5SDimitry Andric kmp_nm_ord_dynamic_chunked = 195,
4380b57cec5SDimitry Andric kmp_nm_ord_guided_chunked = 196,
4390b57cec5SDimitry Andric kmp_nm_ord_runtime = 197,
4400b57cec5SDimitry Andric kmp_nm_ord_auto = 198, /**< auto */
4410b57cec5SDimitry Andric kmp_nm_ord_trapezoidal = 199,
4420b57cec5SDimitry Andric kmp_nm_upper, /**< upper bound for nomerge values */
4430b57cec5SDimitry Andric
4440b57cec5SDimitry Andric /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. Since
4450b57cec5SDimitry Andric we need to distinguish the three possible cases (no modifier, monotonic
4460b57cec5SDimitry Andric modifier, nonmonotonic modifier), we need separate bits for each modifier.
4470b57cec5SDimitry Andric The absence of monotonic does not imply nonmonotonic, especially since 4.5
4480b57cec5SDimitry Andric says that the behaviour of the "no modifier" case is implementation defined
4490b57cec5SDimitry Andric in 4.5, but will become "nonmonotonic" in 5.0.
4500b57cec5SDimitry Andric
4510b57cec5SDimitry Andric Since we're passing a full 32 bit value, we can use a couple of high bits
4520b57cec5SDimitry Andric for these flags; out of paranoia we avoid the sign bit.
4530b57cec5SDimitry Andric
4540b57cec5SDimitry Andric These modifiers can be or-ed into non-static schedules by the compiler to
4550b57cec5SDimitry Andric pass the additional information. They will be stripped early in the
4560b57cec5SDimitry Andric processing in __kmp_dispatch_init when setting up schedules, so most of the
4570b57cec5SDimitry Andric code won't ever see schedules with these bits set. */
4580b57cec5SDimitry Andric kmp_sch_modifier_monotonic =
4590b57cec5SDimitry Andric (1 << 29), /**< Set if the monotonic schedule modifier was present */
4600b57cec5SDimitry Andric kmp_sch_modifier_nonmonotonic =
4610b57cec5SDimitry Andric (1 << 30), /**< Set if the nonmonotonic schedule modifier was present */
4620b57cec5SDimitry Andric
4630b57cec5SDimitry Andric #define SCHEDULE_WITHOUT_MODIFIERS(s) \
4640b57cec5SDimitry Andric (enum sched_type)( \
4650b57cec5SDimitry Andric (s) & ~(kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic))
4660b57cec5SDimitry Andric #define SCHEDULE_HAS_MONOTONIC(s) (((s)&kmp_sch_modifier_monotonic) != 0)
4670b57cec5SDimitry Andric #define SCHEDULE_HAS_NONMONOTONIC(s) (((s)&kmp_sch_modifier_nonmonotonic) != 0)
4680b57cec5SDimitry Andric #define SCHEDULE_HAS_NO_MODIFIERS(s) \
4690b57cec5SDimitry Andric (((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0)
4700b57cec5SDimitry Andric #define SCHEDULE_GET_MODIFIERS(s) \
4710b57cec5SDimitry Andric ((enum sched_type)( \
4720b57cec5SDimitry Andric (s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)))
4730b57cec5SDimitry Andric #define SCHEDULE_SET_MODIFIERS(s, m) \
4740b57cec5SDimitry Andric (s = (enum sched_type)((kmp_int32)s | (kmp_int32)m))
4750b57cec5SDimitry Andric #define SCHEDULE_NONMONOTONIC 0
4760b57cec5SDimitry Andric #define SCHEDULE_MONOTONIC 1
4770b57cec5SDimitry Andric
4780b57cec5SDimitry Andric kmp_sch_default = kmp_sch_static /**< default scheduling algorithm */
4790b57cec5SDimitry Andric };
4800b57cec5SDimitry Andric
4810b57cec5SDimitry Andric // Apply modifiers on internal kind to standard kind
4820b57cec5SDimitry Andric static inline void
__kmp_sched_apply_mods_stdkind(kmp_sched_t * kind,enum sched_type internal_kind)4830b57cec5SDimitry Andric __kmp_sched_apply_mods_stdkind(kmp_sched_t *kind,
4840b57cec5SDimitry Andric enum sched_type internal_kind) {
4850b57cec5SDimitry Andric if (SCHEDULE_HAS_MONOTONIC(internal_kind)) {
4860b57cec5SDimitry Andric *kind = (kmp_sched_t)((int)*kind | (int)kmp_sched_monotonic);
4870b57cec5SDimitry Andric }
4880b57cec5SDimitry Andric }
4890b57cec5SDimitry Andric
4900b57cec5SDimitry Andric // Apply modifiers on standard kind to internal kind
4910b57cec5SDimitry Andric static inline void
__kmp_sched_apply_mods_intkind(kmp_sched_t kind,enum sched_type * internal_kind)4920b57cec5SDimitry Andric __kmp_sched_apply_mods_intkind(kmp_sched_t kind,
4930b57cec5SDimitry Andric enum sched_type *internal_kind) {
4940b57cec5SDimitry Andric if ((int)kind & (int)kmp_sched_monotonic) {
4950b57cec5SDimitry Andric *internal_kind = (enum sched_type)((int)*internal_kind |
4960b57cec5SDimitry Andric (int)kmp_sch_modifier_monotonic);
4970b57cec5SDimitry Andric }
4980b57cec5SDimitry Andric }
4990b57cec5SDimitry Andric
5000b57cec5SDimitry Andric // Get standard schedule without modifiers
__kmp_sched_without_mods(kmp_sched_t kind)5010b57cec5SDimitry Andric static inline kmp_sched_t __kmp_sched_without_mods(kmp_sched_t kind) {
5020b57cec5SDimitry Andric return (kmp_sched_t)((int)kind & ~((int)kmp_sched_monotonic));
5030b57cec5SDimitry Andric }
5040b57cec5SDimitry Andric
5050b57cec5SDimitry Andric /* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */
5060b57cec5SDimitry Andric typedef union kmp_r_sched {
5070b57cec5SDimitry Andric struct {
5080b57cec5SDimitry Andric enum sched_type r_sched_type;
5090b57cec5SDimitry Andric int chunk;
5100b57cec5SDimitry Andric };
5110b57cec5SDimitry Andric kmp_int64 sched;
5120b57cec5SDimitry Andric } kmp_r_sched_t;
5130b57cec5SDimitry Andric
5140b57cec5SDimitry Andric extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our
5150b57cec5SDimitry Andric // internal schedule types
5160b57cec5SDimitry Andric
5170b57cec5SDimitry Andric enum library_type {
5180b57cec5SDimitry Andric library_none,
5190b57cec5SDimitry Andric library_serial,
5200b57cec5SDimitry Andric library_turnaround,
5210b57cec5SDimitry Andric library_throughput
5220b57cec5SDimitry Andric };
5230b57cec5SDimitry Andric
5240b57cec5SDimitry Andric #if KMP_OS_LINUX
5250b57cec5SDimitry Andric enum clock_function_type {
5260b57cec5SDimitry Andric clock_function_gettimeofday,
5270b57cec5SDimitry Andric clock_function_clock_gettime
5280b57cec5SDimitry Andric };
5290b57cec5SDimitry Andric #endif /* KMP_OS_LINUX */
5300b57cec5SDimitry Andric
5310b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED
5320b57cec5SDimitry Andric enum mic_type { non_mic, mic1, mic2, mic3, dummy };
5330b57cec5SDimitry Andric #endif
5340b57cec5SDimitry Andric
535*0fca6ea1SDimitry Andric // OpenMP 3.1 - Nested num threads array
536*0fca6ea1SDimitry Andric typedef struct kmp_nested_nthreads_t {
537*0fca6ea1SDimitry Andric int *nth;
538*0fca6ea1SDimitry Andric int size;
539*0fca6ea1SDimitry Andric int used;
540*0fca6ea1SDimitry Andric } kmp_nested_nthreads_t;
541*0fca6ea1SDimitry Andric
542*0fca6ea1SDimitry Andric extern kmp_nested_nthreads_t __kmp_nested_nth;
543*0fca6ea1SDimitry Andric
5440b57cec5SDimitry Andric /* -- fast reduction stuff ------------------------------------------------ */
5450b57cec5SDimitry Andric
5460b57cec5SDimitry Andric #undef KMP_FAST_REDUCTION_BARRIER
5470b57cec5SDimitry Andric #define KMP_FAST_REDUCTION_BARRIER 1
5480b57cec5SDimitry Andric
5490b57cec5SDimitry Andric #undef KMP_FAST_REDUCTION_CORE_DUO
5500b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
5510b57cec5SDimitry Andric #define KMP_FAST_REDUCTION_CORE_DUO 1
5520b57cec5SDimitry Andric #endif
5530b57cec5SDimitry Andric
5540b57cec5SDimitry Andric enum _reduction_method {
5550b57cec5SDimitry Andric reduction_method_not_defined = 0,
5560b57cec5SDimitry Andric critical_reduce_block = (1 << 8),
5570b57cec5SDimitry Andric atomic_reduce_block = (2 << 8),
5580b57cec5SDimitry Andric tree_reduce_block = (3 << 8),
5590b57cec5SDimitry Andric empty_reduce_block = (4 << 8)
5600b57cec5SDimitry Andric };
5610b57cec5SDimitry Andric
5620b57cec5SDimitry Andric // Description of the packed_reduction_method variable:
5630b57cec5SDimitry Andric // The packed_reduction_method variable consists of two enum types variables
5640b57cec5SDimitry Andric // that are packed together into 0-th byte and 1-st byte:
5650b57cec5SDimitry Andric // 0: (packed_reduction_method & 0x000000FF) is a 'enum barrier_type' value of
5660b57cec5SDimitry Andric // barrier that will be used in fast reduction: bs_plain_barrier or
5670b57cec5SDimitry Andric // bs_reduction_barrier
5680b57cec5SDimitry Andric // 1: (packed_reduction_method & 0x0000FF00) is a reduction method that will
5690b57cec5SDimitry Andric // be used in fast reduction;
5700b57cec5SDimitry Andric // Reduction method is of 'enum _reduction_method' type and it's defined the way
5710b57cec5SDimitry Andric // so that the bits of 0-th byte are empty, so no need to execute a shift
5720b57cec5SDimitry Andric // instruction while packing/unpacking
5730b57cec5SDimitry Andric
5740b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER
5750b57cec5SDimitry Andric #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type) \
5760b57cec5SDimitry Andric ((reduction_method) | (barrier_type))
5770b57cec5SDimitry Andric
5780b57cec5SDimitry Andric #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \
5790b57cec5SDimitry Andric ((enum _reduction_method)((packed_reduction_method) & (0x0000FF00)))
5800b57cec5SDimitry Andric
5810b57cec5SDimitry Andric #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \
5820b57cec5SDimitry Andric ((enum barrier_type)((packed_reduction_method) & (0x000000FF)))
5830b57cec5SDimitry Andric #else
5840b57cec5SDimitry Andric #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type) \
5850b57cec5SDimitry Andric (reduction_method)
5860b57cec5SDimitry Andric
5870b57cec5SDimitry Andric #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \
5880b57cec5SDimitry Andric (packed_reduction_method)
5890b57cec5SDimitry Andric
5900b57cec5SDimitry Andric #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) (bs_plain_barrier)
5910b57cec5SDimitry Andric #endif
5920b57cec5SDimitry Andric
5930b57cec5SDimitry Andric #define TEST_REDUCTION_METHOD(packed_reduction_method, which_reduction_block) \
5940b57cec5SDimitry Andric ((UNPACK_REDUCTION_METHOD(packed_reduction_method)) == \
5950b57cec5SDimitry Andric (which_reduction_block))
5960b57cec5SDimitry Andric
5970b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER
5980b57cec5SDimitry Andric #define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER \
5990b57cec5SDimitry Andric (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_reduction_barrier))
6000b57cec5SDimitry Andric
6010b57cec5SDimitry Andric #define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER \
6020b57cec5SDimitry Andric (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_plain_barrier))
6030b57cec5SDimitry Andric #endif
6040b57cec5SDimitry Andric
6050b57cec5SDimitry Andric typedef int PACKED_REDUCTION_METHOD_T;
6060b57cec5SDimitry Andric
6070b57cec5SDimitry Andric /* -- end of fast reduction stuff ----------------------------------------- */
6080b57cec5SDimitry Andric
6090b57cec5SDimitry Andric #if KMP_OS_WINDOWS
6100b57cec5SDimitry Andric #define USE_CBLKDATA
6110b57cec5SDimitry Andric #if KMP_MSVC_COMPAT
6120b57cec5SDimitry Andric #pragma warning(push)
6130b57cec5SDimitry Andric #pragma warning(disable : 271 310)
6140b57cec5SDimitry Andric #endif
6150b57cec5SDimitry Andric #include <windows.h>
6160b57cec5SDimitry Andric #if KMP_MSVC_COMPAT
6170b57cec5SDimitry Andric #pragma warning(pop)
6180b57cec5SDimitry Andric #endif
6190b57cec5SDimitry Andric #endif
6200b57cec5SDimitry Andric
6210b57cec5SDimitry Andric #if KMP_OS_UNIX
6225f757f3fSDimitry Andric #if !KMP_OS_WASI
6230b57cec5SDimitry Andric #include <dlfcn.h>
6245f757f3fSDimitry Andric #endif
6250b57cec5SDimitry Andric #include <pthread.h>
6260b57cec5SDimitry Andric #endif
6270b57cec5SDimitry Andric
628fe6060f1SDimitry Andric enum kmp_hw_t : int {
629fe6060f1SDimitry Andric KMP_HW_UNKNOWN = -1,
630fe6060f1SDimitry Andric KMP_HW_SOCKET = 0,
631fe6060f1SDimitry Andric KMP_HW_PROC_GROUP,
632fe6060f1SDimitry Andric KMP_HW_NUMA,
633fe6060f1SDimitry Andric KMP_HW_DIE,
634fe6060f1SDimitry Andric KMP_HW_LLC,
635fe6060f1SDimitry Andric KMP_HW_L3,
636fe6060f1SDimitry Andric KMP_HW_TILE,
637fe6060f1SDimitry Andric KMP_HW_MODULE,
638fe6060f1SDimitry Andric KMP_HW_L2,
639fe6060f1SDimitry Andric KMP_HW_L1,
640fe6060f1SDimitry Andric KMP_HW_CORE,
641fe6060f1SDimitry Andric KMP_HW_THREAD,
642fe6060f1SDimitry Andric KMP_HW_LAST
643fe6060f1SDimitry Andric };
644fe6060f1SDimitry Andric
6450eae32dcSDimitry Andric typedef enum kmp_hw_core_type_t {
6460eae32dcSDimitry Andric KMP_HW_CORE_TYPE_UNKNOWN = 0x0,
6470eae32dcSDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6480eae32dcSDimitry Andric KMP_HW_CORE_TYPE_ATOM = 0x20,
6490eae32dcSDimitry Andric KMP_HW_CORE_TYPE_CORE = 0x40,
6500eae32dcSDimitry Andric KMP_HW_MAX_NUM_CORE_TYPES = 3,
6510eae32dcSDimitry Andric #else
6520eae32dcSDimitry Andric KMP_HW_MAX_NUM_CORE_TYPES = 1,
6530eae32dcSDimitry Andric #endif
6540eae32dcSDimitry Andric } kmp_hw_core_type_t;
6550eae32dcSDimitry Andric
6560eae32dcSDimitry Andric #define KMP_HW_MAX_NUM_CORE_EFFS 8
6570eae32dcSDimitry Andric
658fe6060f1SDimitry Andric #define KMP_DEBUG_ASSERT_VALID_HW_TYPE(type) \
659fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST)
660fe6060f1SDimitry Andric #define KMP_ASSERT_VALID_HW_TYPE(type) \
661fe6060f1SDimitry Andric KMP_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST)
662fe6060f1SDimitry Andric
663fe6060f1SDimitry Andric #define KMP_FOREACH_HW_TYPE(type) \
664fe6060f1SDimitry Andric for (kmp_hw_t type = (kmp_hw_t)0; type < KMP_HW_LAST; \
665fe6060f1SDimitry Andric type = (kmp_hw_t)((int)type + 1))
666fe6060f1SDimitry Andric
667fe6060f1SDimitry Andric const char *__kmp_hw_get_keyword(kmp_hw_t type, bool plural = false);
668fe6060f1SDimitry Andric const char *__kmp_hw_get_catalog_string(kmp_hw_t type, bool plural = false);
6690eae32dcSDimitry Andric const char *__kmp_hw_get_core_type_string(kmp_hw_core_type_t type);
670fe6060f1SDimitry Andric
6710b57cec5SDimitry Andric /* Only Linux* OS and Windows* OS support thread affinity. */
6720b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
6730b57cec5SDimitry Andric
6740b57cec5SDimitry Andric // GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
6750b57cec5SDimitry Andric #if KMP_OS_WINDOWS
6760b57cec5SDimitry Andric #if _MSC_VER < 1600 && KMP_MSVC_COMPAT
6770b57cec5SDimitry Andric typedef struct GROUP_AFFINITY {
6780b57cec5SDimitry Andric KAFFINITY Mask;
6790b57cec5SDimitry Andric WORD Group;
6800b57cec5SDimitry Andric WORD Reserved[3];
6810b57cec5SDimitry Andric } GROUP_AFFINITY;
6820b57cec5SDimitry Andric #endif /* _MSC_VER < 1600 */
6830b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
6840b57cec5SDimitry Andric extern int __kmp_num_proc_groups;
6850b57cec5SDimitry Andric #else
6860b57cec5SDimitry Andric static const int __kmp_num_proc_groups = 1;
6870b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */
6880b57cec5SDimitry Andric typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);
6890b57cec5SDimitry Andric extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;
6900b57cec5SDimitry Andric
6910b57cec5SDimitry Andric typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void);
6920b57cec5SDimitry Andric extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount;
6930b57cec5SDimitry Andric
6940b57cec5SDimitry Andric typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *);
6950b57cec5SDimitry Andric extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity;
6960b57cec5SDimitry Andric
6970b57cec5SDimitry Andric typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *,
6980b57cec5SDimitry Andric GROUP_AFFINITY *);
6990b57cec5SDimitry Andric extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
7000b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
7010b57cec5SDimitry Andric
702*0fca6ea1SDimitry Andric #if KMP_USE_HWLOC && !defined(OMPD_SKIP_HWLOC)
7030b57cec5SDimitry Andric extern hwloc_topology_t __kmp_hwloc_topology;
7040b57cec5SDimitry Andric extern int __kmp_hwloc_error;
7050b57cec5SDimitry Andric #endif
7060b57cec5SDimitry Andric
7070b57cec5SDimitry Andric extern size_t __kmp_affin_mask_size;
7080b57cec5SDimitry Andric #define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0)
7090b57cec5SDimitry Andric #define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0)
7100b57cec5SDimitry Andric #define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size)
7110b57cec5SDimitry Andric #define KMP_CPU_SET_ITERATE(i, mask) \
7120b57cec5SDimitry Andric for (i = (mask)->begin(); (int)i != (mask)->end(); i = (mask)->next(i))
7130b57cec5SDimitry Andric #define KMP_CPU_SET(i, mask) (mask)->set(i)
7140b57cec5SDimitry Andric #define KMP_CPU_ISSET(i, mask) (mask)->is_set(i)
7150b57cec5SDimitry Andric #define KMP_CPU_CLR(i, mask) (mask)->clear(i)
7160b57cec5SDimitry Andric #define KMP_CPU_ZERO(mask) (mask)->zero()
7175f757f3fSDimitry Andric #define KMP_CPU_ISEMPTY(mask) (mask)->empty()
7180b57cec5SDimitry Andric #define KMP_CPU_COPY(dest, src) (dest)->copy(src)
7190b57cec5SDimitry Andric #define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src)
7200b57cec5SDimitry Andric #define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not()
7210b57cec5SDimitry Andric #define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src)
7225f757f3fSDimitry Andric #define KMP_CPU_EQUAL(dest, src) (dest)->is_equal(src)
7230b57cec5SDimitry Andric #define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask())
7240b57cec5SDimitry Andric #define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr)
7250b57cec5SDimitry Andric #define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr)
7260b57cec5SDimitry Andric #define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr)
7270b57cec5SDimitry Andric #define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr)
7280b57cec5SDimitry Andric #define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr)
7290b57cec5SDimitry Andric #define KMP_CPU_INDEX(arr, i) __kmp_affinity_dispatch->index_mask_array(arr, i)
7300b57cec5SDimitry Andric #define KMP_CPU_ALLOC_ARRAY(arr, n) \
7310b57cec5SDimitry Andric (arr = __kmp_affinity_dispatch->allocate_mask_array(n))
7320b57cec5SDimitry Andric #define KMP_CPU_FREE_ARRAY(arr, n) \
7330b57cec5SDimitry Andric __kmp_affinity_dispatch->deallocate_mask_array(arr)
7340b57cec5SDimitry Andric #define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) KMP_CPU_ALLOC_ARRAY(arr, n)
7350b57cec5SDimitry Andric #define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_CPU_FREE_ARRAY(arr, n)
7360b57cec5SDimitry Andric #define __kmp_get_system_affinity(mask, abort_bool) \
7370b57cec5SDimitry Andric (mask)->get_system_affinity(abort_bool)
7380b57cec5SDimitry Andric #define __kmp_set_system_affinity(mask, abort_bool) \
7390b57cec5SDimitry Andric (mask)->set_system_affinity(abort_bool)
7400b57cec5SDimitry Andric #define __kmp_get_proc_group(mask) (mask)->get_proc_group()
7410b57cec5SDimitry Andric
7420b57cec5SDimitry Andric class KMPAffinity {
7430b57cec5SDimitry Andric public:
7440b57cec5SDimitry Andric class Mask {
7450b57cec5SDimitry Andric public:
7460b57cec5SDimitry Andric void *operator new(size_t n);
7470b57cec5SDimitry Andric void operator delete(void *p);
7480b57cec5SDimitry Andric void *operator new[](size_t n);
7490b57cec5SDimitry Andric void operator delete[](void *p);
~Mask()7500b57cec5SDimitry Andric virtual ~Mask() {}
7510b57cec5SDimitry Andric // Set bit i to 1
set(int i)7520b57cec5SDimitry Andric virtual void set(int i) {}
7530b57cec5SDimitry Andric // Return bit i
is_set(int i)7540b57cec5SDimitry Andric virtual bool is_set(int i) const { return false; }
7550b57cec5SDimitry Andric // Set bit i to 0
clear(int i)7560b57cec5SDimitry Andric virtual void clear(int i) {}
7570b57cec5SDimitry Andric // Zero out entire mask
zero()7580b57cec5SDimitry Andric virtual void zero() {}
7595f757f3fSDimitry Andric // Check whether mask is empty
empty()7605f757f3fSDimitry Andric virtual bool empty() const { return true; }
7610b57cec5SDimitry Andric // Copy src into this mask
copy(const Mask * src)7620b57cec5SDimitry Andric virtual void copy(const Mask *src) {}
7630b57cec5SDimitry Andric // this &= rhs
bitwise_and(const Mask * rhs)7640b57cec5SDimitry Andric virtual void bitwise_and(const Mask *rhs) {}
7650b57cec5SDimitry Andric // this |= rhs
bitwise_or(const Mask * rhs)7660b57cec5SDimitry Andric virtual void bitwise_or(const Mask *rhs) {}
7670b57cec5SDimitry Andric // this = ~this
bitwise_not()7680b57cec5SDimitry Andric virtual void bitwise_not() {}
7695f757f3fSDimitry Andric // this == rhs
is_equal(const Mask * rhs)7705f757f3fSDimitry Andric virtual bool is_equal(const Mask *rhs) const { return false; }
7710b57cec5SDimitry Andric // API for iterating over an affinity mask
7720b57cec5SDimitry Andric // for (int i = mask->begin(); i != mask->end(); i = mask->next(i))
begin()7730b57cec5SDimitry Andric virtual int begin() const { return 0; }
end()7740b57cec5SDimitry Andric virtual int end() const { return 0; }
next(int previous)7750b57cec5SDimitry Andric virtual int next(int previous) const { return 0; }
776e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS
set_process_affinity(bool abort_on_error)777e8d8bef9SDimitry Andric virtual int set_process_affinity(bool abort_on_error) const { return -1; }
778e8d8bef9SDimitry Andric #endif
7790b57cec5SDimitry Andric // Set the system's affinity to this affinity mask's value
set_system_affinity(bool abort_on_error)7800b57cec5SDimitry Andric virtual int set_system_affinity(bool abort_on_error) const { return -1; }
7810b57cec5SDimitry Andric // Set this affinity mask to the current system affinity
get_system_affinity(bool abort_on_error)7820b57cec5SDimitry Andric virtual int get_system_affinity(bool abort_on_error) { return -1; }
7830b57cec5SDimitry Andric // Only 1 DWORD in the mask should have any procs set.
7840b57cec5SDimitry Andric // Return the appropriate index, or -1 for an invalid mask.
get_proc_group()7850b57cec5SDimitry Andric virtual int get_proc_group() const { return -1; }
get_max_cpu()786bdd1243dSDimitry Andric int get_max_cpu() const {
787bdd1243dSDimitry Andric int cpu;
788bdd1243dSDimitry Andric int max_cpu = -1;
789bdd1243dSDimitry Andric KMP_CPU_SET_ITERATE(cpu, this) {
790bdd1243dSDimitry Andric if (cpu > max_cpu)
791bdd1243dSDimitry Andric max_cpu = cpu;
792bdd1243dSDimitry Andric }
793bdd1243dSDimitry Andric return max_cpu;
794bdd1243dSDimitry Andric }
7950b57cec5SDimitry Andric };
7960b57cec5SDimitry Andric void *operator new(size_t n);
7970b57cec5SDimitry Andric void operator delete(void *p);
7980b57cec5SDimitry Andric // Need virtual destructor
7990b57cec5SDimitry Andric virtual ~KMPAffinity() = default;
8000b57cec5SDimitry Andric // Determine if affinity is capable
determine_capable(const char * env_var)8010b57cec5SDimitry Andric virtual void determine_capable(const char *env_var) {}
8020b57cec5SDimitry Andric // Bind the current thread to os proc
bind_thread(int proc)8030b57cec5SDimitry Andric virtual void bind_thread(int proc) {}
8040b57cec5SDimitry Andric // Factory functions to allocate/deallocate a mask
allocate_mask()8050b57cec5SDimitry Andric virtual Mask *allocate_mask() { return nullptr; }
deallocate_mask(Mask * m)8060b57cec5SDimitry Andric virtual void deallocate_mask(Mask *m) {}
allocate_mask_array(int num)8070b57cec5SDimitry Andric virtual Mask *allocate_mask_array(int num) { return nullptr; }
deallocate_mask_array(Mask * m)8080b57cec5SDimitry Andric virtual void deallocate_mask_array(Mask *m) {}
index_mask_array(Mask * m,int index)8090b57cec5SDimitry Andric virtual Mask *index_mask_array(Mask *m, int index) { return nullptr; }
8100b57cec5SDimitry Andric static void pick_api();
8110b57cec5SDimitry Andric static void destroy_api();
8120b57cec5SDimitry Andric enum api_type {
8130b57cec5SDimitry Andric NATIVE_OS
8140b57cec5SDimitry Andric #if KMP_USE_HWLOC
8150b57cec5SDimitry Andric ,
8160b57cec5SDimitry Andric HWLOC
8170b57cec5SDimitry Andric #endif
8180b57cec5SDimitry Andric };
get_api_type()8190b57cec5SDimitry Andric virtual api_type get_api_type() const {
8200b57cec5SDimitry Andric KMP_ASSERT(0);
8210b57cec5SDimitry Andric return NATIVE_OS;
8220b57cec5SDimitry Andric }
8230b57cec5SDimitry Andric
8240b57cec5SDimitry Andric private:
8250b57cec5SDimitry Andric static bool picked_api;
8260b57cec5SDimitry Andric };
8270b57cec5SDimitry Andric
8280b57cec5SDimitry Andric typedef KMPAffinity::Mask kmp_affin_mask_t;
8290b57cec5SDimitry Andric extern KMPAffinity *__kmp_affinity_dispatch;
8300b57cec5SDimitry Andric
831439352acSDimitry Andric #ifndef KMP_OS_AIX
83206c3fb27SDimitry Andric class kmp_affinity_raii_t {
83306c3fb27SDimitry Andric kmp_affin_mask_t *mask;
83406c3fb27SDimitry Andric bool restored;
83506c3fb27SDimitry Andric
83606c3fb27SDimitry Andric public:
83706c3fb27SDimitry Andric kmp_affinity_raii_t(const kmp_affin_mask_t *new_mask = nullptr)
mask(nullptr)838*0fca6ea1SDimitry Andric : mask(nullptr), restored(false) {
83906c3fb27SDimitry Andric if (KMP_AFFINITY_CAPABLE()) {
84006c3fb27SDimitry Andric KMP_CPU_ALLOC(mask);
84106c3fb27SDimitry Andric KMP_ASSERT(mask != NULL);
84206c3fb27SDimitry Andric __kmp_get_system_affinity(mask, /*abort_on_error=*/true);
84306c3fb27SDimitry Andric if (new_mask)
84406c3fb27SDimitry Andric __kmp_set_system_affinity(new_mask, /*abort_on_error=*/true);
84506c3fb27SDimitry Andric }
84606c3fb27SDimitry Andric }
restore()84706c3fb27SDimitry Andric void restore() {
848*0fca6ea1SDimitry Andric if (mask && KMP_AFFINITY_CAPABLE() && !restored) {
84906c3fb27SDimitry Andric __kmp_set_system_affinity(mask, /*abort_on_error=*/true);
85006c3fb27SDimitry Andric KMP_CPU_FREE(mask);
85106c3fb27SDimitry Andric }
85206c3fb27SDimitry Andric restored = true;
85306c3fb27SDimitry Andric }
~kmp_affinity_raii_t()85406c3fb27SDimitry Andric ~kmp_affinity_raii_t() { restore(); }
85506c3fb27SDimitry Andric };
856439352acSDimitry Andric #endif // !KMP_OS_AIX
85706c3fb27SDimitry Andric
8580b57cec5SDimitry Andric // Declare local char buffers with this size for printing debug and info
8590b57cec5SDimitry Andric // messages, using __kmp_affinity_print_mask().
8600b57cec5SDimitry Andric #define KMP_AFFIN_MASK_PRINT_LEN 1024
8610b57cec5SDimitry Andric
8620b57cec5SDimitry Andric enum affinity_type {
8630b57cec5SDimitry Andric affinity_none = 0,
8640b57cec5SDimitry Andric affinity_physical,
8650b57cec5SDimitry Andric affinity_logical,
8660b57cec5SDimitry Andric affinity_compact,
8670b57cec5SDimitry Andric affinity_scatter,
8680b57cec5SDimitry Andric affinity_explicit,
8690b57cec5SDimitry Andric affinity_balanced,
8700b57cec5SDimitry Andric affinity_disabled, // not used outsize the env var parser
8710b57cec5SDimitry Andric affinity_default
8720b57cec5SDimitry Andric };
8730b57cec5SDimitry Andric
8740b57cec5SDimitry Andric enum affinity_top_method {
8750b57cec5SDimitry Andric affinity_top_method_all = 0, // try all (supported) methods, in order
8760b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
8770b57cec5SDimitry Andric affinity_top_method_apicid,
8780b57cec5SDimitry Andric affinity_top_method_x2apicid,
879fe6060f1SDimitry Andric affinity_top_method_x2apicid_1f,
8800b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
8810b57cec5SDimitry Andric affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too
8820b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
8830b57cec5SDimitry Andric affinity_top_method_group,
8840b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */
8850b57cec5SDimitry Andric affinity_top_method_flat,
8860b57cec5SDimitry Andric #if KMP_USE_HWLOC
8870b57cec5SDimitry Andric affinity_top_method_hwloc,
8880b57cec5SDimitry Andric #endif
8890b57cec5SDimitry Andric affinity_top_method_default
8900b57cec5SDimitry Andric };
8910b57cec5SDimitry Andric
892bdd1243dSDimitry Andric #define affinity_respect_mask_default (2)
8930b57cec5SDimitry Andric
894bdd1243dSDimitry Andric typedef struct kmp_affinity_flags_t {
895bdd1243dSDimitry Andric unsigned dups : 1;
896bdd1243dSDimitry Andric unsigned verbose : 1;
897bdd1243dSDimitry Andric unsigned warnings : 1;
898bdd1243dSDimitry Andric unsigned respect : 2;
899bdd1243dSDimitry Andric unsigned reset : 1;
900bdd1243dSDimitry Andric unsigned initialized : 1;
9015f757f3fSDimitry Andric unsigned core_types_gran : 1;
9025f757f3fSDimitry Andric unsigned core_effs_gran : 1;
9035f757f3fSDimitry Andric unsigned omp_places : 1;
9045f757f3fSDimitry Andric unsigned reserved : 22;
905bdd1243dSDimitry Andric } kmp_affinity_flags_t;
906bdd1243dSDimitry Andric KMP_BUILD_ASSERT(sizeof(kmp_affinity_flags_t) == 4);
907bdd1243dSDimitry Andric
908bdd1243dSDimitry Andric typedef struct kmp_affinity_ids_t {
9095f757f3fSDimitry Andric int os_id;
910bdd1243dSDimitry Andric int ids[KMP_HW_LAST];
911bdd1243dSDimitry Andric } kmp_affinity_ids_t;
912bdd1243dSDimitry Andric
913bdd1243dSDimitry Andric typedef struct kmp_affinity_attrs_t {
914bdd1243dSDimitry Andric int core_type : 8;
915bdd1243dSDimitry Andric int core_eff : 8;
916bdd1243dSDimitry Andric unsigned valid : 1;
917bdd1243dSDimitry Andric unsigned reserved : 15;
918bdd1243dSDimitry Andric } kmp_affinity_attrs_t;
919bdd1243dSDimitry Andric #define KMP_AFFINITY_ATTRS_UNKNOWN \
920bdd1243dSDimitry Andric { KMP_HW_CORE_TYPE_UNKNOWN, kmp_hw_attr_t::UNKNOWN_CORE_EFF, 0, 0 }
921bdd1243dSDimitry Andric
922bdd1243dSDimitry Andric typedef struct kmp_affinity_t {
923bdd1243dSDimitry Andric char *proclist;
924bdd1243dSDimitry Andric enum affinity_type type;
925bdd1243dSDimitry Andric kmp_hw_t gran;
926bdd1243dSDimitry Andric int gran_levels;
9275f757f3fSDimitry Andric kmp_affinity_attrs_t core_attr_gran;
928bdd1243dSDimitry Andric int compact;
929bdd1243dSDimitry Andric int offset;
930bdd1243dSDimitry Andric kmp_affinity_flags_t flags;
931bdd1243dSDimitry Andric unsigned num_masks;
932bdd1243dSDimitry Andric kmp_affin_mask_t *masks;
933bdd1243dSDimitry Andric kmp_affinity_ids_t *ids;
934bdd1243dSDimitry Andric kmp_affinity_attrs_t *attrs;
935bdd1243dSDimitry Andric unsigned num_os_id_masks;
936bdd1243dSDimitry Andric kmp_affin_mask_t *os_id_masks;
937bdd1243dSDimitry Andric const char *env_var;
938bdd1243dSDimitry Andric } kmp_affinity_t;
939bdd1243dSDimitry Andric
940bdd1243dSDimitry Andric #define KMP_AFFINITY_INIT(env) \
941bdd1243dSDimitry Andric { \
9425f757f3fSDimitry Andric nullptr, affinity_default, KMP_HW_UNKNOWN, -1, KMP_AFFINITY_ATTRS_UNKNOWN, \
9435f757f3fSDimitry Andric 0, 0, \
9445f757f3fSDimitry Andric {TRUE, FALSE, TRUE, affinity_respect_mask_default, FALSE, FALSE, \
9455f757f3fSDimitry Andric FALSE, FALSE, FALSE}, \
9465f757f3fSDimitry Andric 0, nullptr, nullptr, nullptr, 0, nullptr, env \
947bdd1243dSDimitry Andric }
948bdd1243dSDimitry Andric
9490b57cec5SDimitry Andric extern enum affinity_top_method __kmp_affinity_top_method;
950bdd1243dSDimitry Andric extern kmp_affinity_t __kmp_affinity;
951bdd1243dSDimitry Andric extern kmp_affinity_t __kmp_hh_affinity;
952bdd1243dSDimitry Andric extern kmp_affinity_t *__kmp_affinities[2];
953bdd1243dSDimitry Andric
9540b57cec5SDimitry Andric extern void __kmp_affinity_bind_thread(int which);
9550b57cec5SDimitry Andric
9560b57cec5SDimitry Andric extern kmp_affin_mask_t *__kmp_affin_fullMask;
957fcaf7f86SDimitry Andric extern kmp_affin_mask_t *__kmp_affin_origMask;
9580b57cec5SDimitry Andric extern char *__kmp_cpuinfo_file;
9590b57cec5SDimitry Andric
9605f757f3fSDimitry Andric #if KMP_WEIGHTED_ITERATIONS_SUPPORTED
9615f757f3fSDimitry Andric extern int __kmp_first_osid_with_ecore;
9625f757f3fSDimitry Andric #endif
9635f757f3fSDimitry Andric
9640b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */
9650b57cec5SDimitry Andric
9660b57cec5SDimitry Andric // This needs to be kept in sync with the values in omp.h !!!
9670b57cec5SDimitry Andric typedef enum kmp_proc_bind_t {
9680b57cec5SDimitry Andric proc_bind_false = 0,
9690b57cec5SDimitry Andric proc_bind_true,
970fe6060f1SDimitry Andric proc_bind_primary,
9710b57cec5SDimitry Andric proc_bind_close,
9720b57cec5SDimitry Andric proc_bind_spread,
9730b57cec5SDimitry Andric proc_bind_intel, // use KMP_AFFINITY interface
9740b57cec5SDimitry Andric proc_bind_default
9750b57cec5SDimitry Andric } kmp_proc_bind_t;
9760b57cec5SDimitry Andric
9770b57cec5SDimitry Andric typedef struct kmp_nested_proc_bind_t {
9780b57cec5SDimitry Andric kmp_proc_bind_t *bind_types;
9790b57cec5SDimitry Andric int size;
9800b57cec5SDimitry Andric int used;
9810b57cec5SDimitry Andric } kmp_nested_proc_bind_t;
9820b57cec5SDimitry Andric
9830b57cec5SDimitry Andric extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
984349cc55cSDimitry Andric extern kmp_proc_bind_t __kmp_teams_proc_bind;
9850b57cec5SDimitry Andric
9860b57cec5SDimitry Andric extern int __kmp_display_affinity;
9870b57cec5SDimitry Andric extern char *__kmp_affinity_format;
9880b57cec5SDimitry Andric static const size_t KMP_AFFINITY_FORMAT_SIZE = 512;
989fe6060f1SDimitry Andric #if OMPT_SUPPORT
990fe6060f1SDimitry Andric extern int __kmp_tool;
991fe6060f1SDimitry Andric extern char *__kmp_tool_libraries;
992fe6060f1SDimitry Andric #endif // OMPT_SUPPORT
9930b57cec5SDimitry Andric
9940b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
9950b57cec5SDimitry Andric #define KMP_PLACE_ALL (-1)
9960b57cec5SDimitry Andric #define KMP_PLACE_UNDEFINED (-2)
9970b57cec5SDimitry Andric // Is KMP_AFFINITY is being used instead of OMP_PROC_BIND/OMP_PLACES?
9980b57cec5SDimitry Andric #define KMP_AFFINITY_NON_PROC_BIND \
9990b57cec5SDimitry Andric ((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false || \
10000b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) && \
1001bdd1243dSDimitry Andric (__kmp_affinity.num_masks > 0 || __kmp_affinity.type == affinity_balanced))
10020b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */
10030b57cec5SDimitry Andric
10040b57cec5SDimitry Andric extern int __kmp_affinity_num_places;
10050b57cec5SDimitry Andric
10060b57cec5SDimitry Andric typedef enum kmp_cancel_kind_t {
10070b57cec5SDimitry Andric cancel_noreq = 0,
10080b57cec5SDimitry Andric cancel_parallel = 1,
10090b57cec5SDimitry Andric cancel_loop = 2,
10100b57cec5SDimitry Andric cancel_sections = 3,
10110b57cec5SDimitry Andric cancel_taskgroup = 4
10120b57cec5SDimitry Andric } kmp_cancel_kind_t;
10130b57cec5SDimitry Andric
10140b57cec5SDimitry Andric // KMP_HW_SUBSET support:
10150b57cec5SDimitry Andric typedef struct kmp_hws_item {
10160b57cec5SDimitry Andric int num;
10170b57cec5SDimitry Andric int offset;
10180b57cec5SDimitry Andric } kmp_hws_item_t;
10190b57cec5SDimitry Andric
10200b57cec5SDimitry Andric extern kmp_hws_item_t __kmp_hws_socket;
1021fe6060f1SDimitry Andric extern kmp_hws_item_t __kmp_hws_die;
10220b57cec5SDimitry Andric extern kmp_hws_item_t __kmp_hws_node;
10230b57cec5SDimitry Andric extern kmp_hws_item_t __kmp_hws_tile;
10240b57cec5SDimitry Andric extern kmp_hws_item_t __kmp_hws_core;
10250b57cec5SDimitry Andric extern kmp_hws_item_t __kmp_hws_proc;
10260b57cec5SDimitry Andric extern int __kmp_hws_requested;
10270b57cec5SDimitry Andric extern int __kmp_hws_abs_flag; // absolute or per-item number requested
10280b57cec5SDimitry Andric
10290b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
10300b57cec5SDimitry Andric
10310b57cec5SDimitry Andric #define KMP_PAD(type, sz) \
10320b57cec5SDimitry Andric (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
10330b57cec5SDimitry Andric
10340b57cec5SDimitry Andric // We need to avoid using -1 as a GTID as +1 is added to the gtid
10350b57cec5SDimitry Andric // when storing it in a lock, and the value 0 is reserved.
10360b57cec5SDimitry Andric #define KMP_GTID_DNE (-2) /* Does not exist */
10370b57cec5SDimitry Andric #define KMP_GTID_SHUTDOWN (-3) /* Library is shutting down */
10380b57cec5SDimitry Andric #define KMP_GTID_MONITOR (-4) /* Monitor thread ID */
10390b57cec5SDimitry Andric #define KMP_GTID_UNKNOWN (-5) /* Is not known */
10400b57cec5SDimitry Andric #define KMP_GTID_MIN (-6) /* Minimal gtid for low bound check in DEBUG */
10410b57cec5SDimitry Andric
10420b57cec5SDimitry Andric /* OpenMP 5.0 Memory Management support */
10430b57cec5SDimitry Andric
10440b57cec5SDimitry Andric #ifndef __OMP_H
1045480093f4SDimitry Andric // Duplicate type definitions from omp.h
10460b57cec5SDimitry Andric typedef uintptr_t omp_uintptr_t;
10470b57cec5SDimitry Andric
10480b57cec5SDimitry Andric typedef enum {
1049e8d8bef9SDimitry Andric omp_atk_sync_hint = 1,
10505ffd83dbSDimitry Andric omp_atk_alignment = 2,
10515ffd83dbSDimitry Andric omp_atk_access = 3,
10525ffd83dbSDimitry Andric omp_atk_pool_size = 4,
10535ffd83dbSDimitry Andric omp_atk_fallback = 5,
10545ffd83dbSDimitry Andric omp_atk_fb_data = 6,
10555ffd83dbSDimitry Andric omp_atk_pinned = 7,
10565ffd83dbSDimitry Andric omp_atk_partition = 8
10570b57cec5SDimitry Andric } omp_alloctrait_key_t;
10580b57cec5SDimitry Andric
10590b57cec5SDimitry Andric typedef enum {
10605ffd83dbSDimitry Andric omp_atv_false = 0,
10615ffd83dbSDimitry Andric omp_atv_true = 1,
10625ffd83dbSDimitry Andric omp_atv_contended = 3,
10635ffd83dbSDimitry Andric omp_atv_uncontended = 4,
1064e8d8bef9SDimitry Andric omp_atv_serialized = 5,
1065e8d8bef9SDimitry Andric omp_atv_sequential = omp_atv_serialized, // (deprecated)
10665ffd83dbSDimitry Andric omp_atv_private = 6,
10675ffd83dbSDimitry Andric omp_atv_all = 7,
10685ffd83dbSDimitry Andric omp_atv_thread = 8,
10695ffd83dbSDimitry Andric omp_atv_pteam = 9,
10705ffd83dbSDimitry Andric omp_atv_cgroup = 10,
10715ffd83dbSDimitry Andric omp_atv_default_mem_fb = 11,
10725ffd83dbSDimitry Andric omp_atv_null_fb = 12,
10735ffd83dbSDimitry Andric omp_atv_abort_fb = 13,
10745ffd83dbSDimitry Andric omp_atv_allocator_fb = 14,
10755ffd83dbSDimitry Andric omp_atv_environment = 15,
10765ffd83dbSDimitry Andric omp_atv_nearest = 16,
10775ffd83dbSDimitry Andric omp_atv_blocked = 17,
10785ffd83dbSDimitry Andric omp_atv_interleaved = 18
10790b57cec5SDimitry Andric } omp_alloctrait_value_t;
1080e8d8bef9SDimitry Andric #define omp_atv_default ((omp_uintptr_t)-1)
10810b57cec5SDimitry Andric
10820b57cec5SDimitry Andric typedef void *omp_memspace_handle_t;
10830b57cec5SDimitry Andric extern omp_memspace_handle_t const omp_default_mem_space;
10840b57cec5SDimitry Andric extern omp_memspace_handle_t const omp_large_cap_mem_space;
10850b57cec5SDimitry Andric extern omp_memspace_handle_t const omp_const_mem_space;
10860b57cec5SDimitry Andric extern omp_memspace_handle_t const omp_high_bw_mem_space;
10870b57cec5SDimitry Andric extern omp_memspace_handle_t const omp_low_lat_mem_space;
1088fe6060f1SDimitry Andric extern omp_memspace_handle_t const llvm_omp_target_host_mem_space;
1089fe6060f1SDimitry Andric extern omp_memspace_handle_t const llvm_omp_target_shared_mem_space;
1090fe6060f1SDimitry Andric extern omp_memspace_handle_t const llvm_omp_target_device_mem_space;
10910b57cec5SDimitry Andric
10920b57cec5SDimitry Andric typedef struct {
10930b57cec5SDimitry Andric omp_alloctrait_key_t key;
10940b57cec5SDimitry Andric omp_uintptr_t value;
10950b57cec5SDimitry Andric } omp_alloctrait_t;
10960b57cec5SDimitry Andric
10970b57cec5SDimitry Andric typedef void *omp_allocator_handle_t;
10980b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_null_allocator;
10990b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_default_mem_alloc;
11000b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_large_cap_mem_alloc;
11010b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_const_mem_alloc;
11020b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_high_bw_mem_alloc;
11030b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_low_lat_mem_alloc;
11040b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_cgroup_mem_alloc;
11050b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_pteam_mem_alloc;
11060b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_thread_mem_alloc;
1107fe6060f1SDimitry Andric extern omp_allocator_handle_t const llvm_omp_target_host_mem_alloc;
1108fe6060f1SDimitry Andric extern omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc;
1109fe6060f1SDimitry Andric extern omp_allocator_handle_t const llvm_omp_target_device_mem_alloc;
11100b57cec5SDimitry Andric extern omp_allocator_handle_t const kmp_max_mem_alloc;
11110b57cec5SDimitry Andric extern omp_allocator_handle_t __kmp_def_allocator;
11120b57cec5SDimitry Andric
1113480093f4SDimitry Andric // end of duplicate type definitions from omp.h
11140b57cec5SDimitry Andric #endif
11150b57cec5SDimitry Andric
11160b57cec5SDimitry Andric extern int __kmp_memkind_available;
11170b57cec5SDimitry Andric
11180b57cec5SDimitry Andric typedef omp_memspace_handle_t kmp_memspace_t; // placeholder
11190b57cec5SDimitry Andric
11200b57cec5SDimitry Andric typedef struct kmp_allocator_t {
11210b57cec5SDimitry Andric omp_memspace_handle_t memspace;
11220b57cec5SDimitry Andric void **memkind; // pointer to memkind
1123349cc55cSDimitry Andric size_t alignment;
11240b57cec5SDimitry Andric omp_alloctrait_value_t fb;
11250b57cec5SDimitry Andric kmp_allocator_t *fb_data;
11260b57cec5SDimitry Andric kmp_uint64 pool_size;
11270b57cec5SDimitry Andric kmp_uint64 pool_used;
1128bdd1243dSDimitry Andric bool pinned;
11290b57cec5SDimitry Andric } kmp_allocator_t;
11300b57cec5SDimitry Andric
11310b57cec5SDimitry Andric extern omp_allocator_handle_t __kmpc_init_allocator(int gtid,
11320b57cec5SDimitry Andric omp_memspace_handle_t,
11330b57cec5SDimitry Andric int ntraits,
11340b57cec5SDimitry Andric omp_alloctrait_t traits[]);
11350b57cec5SDimitry Andric extern void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al);
11360b57cec5SDimitry Andric extern void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t al);
11370b57cec5SDimitry Andric extern omp_allocator_handle_t __kmpc_get_default_allocator(int gtid);
1138349cc55cSDimitry Andric // external interfaces, may be used by compiler
11390b57cec5SDimitry Andric extern void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
1140349cc55cSDimitry Andric extern void *__kmpc_aligned_alloc(int gtid, size_t align, size_t sz,
1141349cc55cSDimitry Andric omp_allocator_handle_t al);
1142e8d8bef9SDimitry Andric extern void *__kmpc_calloc(int gtid, size_t nmemb, size_t sz,
1143e8d8bef9SDimitry Andric omp_allocator_handle_t al);
1144e8d8bef9SDimitry Andric extern void *__kmpc_realloc(int gtid, void *ptr, size_t sz,
1145e8d8bef9SDimitry Andric omp_allocator_handle_t al,
1146e8d8bef9SDimitry Andric omp_allocator_handle_t free_al);
11470b57cec5SDimitry Andric extern void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
1148349cc55cSDimitry Andric // internal interfaces, contain real implementation
1149349cc55cSDimitry Andric extern void *__kmp_alloc(int gtid, size_t align, size_t sz,
1150349cc55cSDimitry Andric omp_allocator_handle_t al);
1151349cc55cSDimitry Andric extern void *__kmp_calloc(int gtid, size_t align, size_t nmemb, size_t sz,
1152349cc55cSDimitry Andric omp_allocator_handle_t al);
1153349cc55cSDimitry Andric extern void *__kmp_realloc(int gtid, void *ptr, size_t sz,
1154349cc55cSDimitry Andric omp_allocator_handle_t al,
1155349cc55cSDimitry Andric omp_allocator_handle_t free_al);
1156349cc55cSDimitry Andric extern void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
11570b57cec5SDimitry Andric
11580b57cec5SDimitry Andric extern void __kmp_init_memkind();
11590b57cec5SDimitry Andric extern void __kmp_fini_memkind();
1160fe6060f1SDimitry Andric extern void __kmp_init_target_mem();
11610b57cec5SDimitry Andric
11620b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
11630b57cec5SDimitry Andric
116406c3fb27SDimitry Andric #if ENABLE_LIBOMPTARGET
116506c3fb27SDimitry Andric extern void __kmp_init_target_task();
116606c3fb27SDimitry Andric #endif
116706c3fb27SDimitry Andric
116806c3fb27SDimitry Andric /* ------------------------------------------------------------------------ */
116906c3fb27SDimitry Andric
11700b57cec5SDimitry Andric #define KMP_UINT64_MAX \
11710b57cec5SDimitry Andric (~((kmp_uint64)1 << ((sizeof(kmp_uint64) * (1 << 3)) - 1)))
11720b57cec5SDimitry Andric
11730b57cec5SDimitry Andric #define KMP_MIN_NTH 1
11740b57cec5SDimitry Andric
11750b57cec5SDimitry Andric #ifndef KMP_MAX_NTH
11760b57cec5SDimitry Andric #if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX
11770b57cec5SDimitry Andric #define KMP_MAX_NTH PTHREAD_THREADS_MAX
11780b57cec5SDimitry Andric #else
11795f757f3fSDimitry Andric #ifdef __ve__
11805f757f3fSDimitry Andric // VE's pthread supports only up to 64 threads per a VE process.
11815f757f3fSDimitry Andric // Please check p. 14 of following documentation for more details.
11825f757f3fSDimitry Andric // https://sxauroratsubasa.sakura.ne.jp/documents/veos/en/VEOS_high_level_design.pdf
11835f757f3fSDimitry Andric #define KMP_MAX_NTH 64
11845f757f3fSDimitry Andric #else
11850b57cec5SDimitry Andric #define KMP_MAX_NTH INT_MAX
11860b57cec5SDimitry Andric #endif
11875f757f3fSDimitry Andric #endif
11880b57cec5SDimitry Andric #endif /* KMP_MAX_NTH */
11890b57cec5SDimitry Andric
11900b57cec5SDimitry Andric #ifdef PTHREAD_STACK_MIN
119106c3fb27SDimitry Andric #define KMP_MIN_STKSIZE ((size_t)PTHREAD_STACK_MIN)
11920b57cec5SDimitry Andric #else
11930b57cec5SDimitry Andric #define KMP_MIN_STKSIZE ((size_t)(32 * 1024))
11940b57cec5SDimitry Andric #endif
11950b57cec5SDimitry Andric
119674626c16SDimitry Andric #if KMP_OS_AIX && KMP_ARCH_PPC
119774626c16SDimitry Andric #define KMP_MAX_STKSIZE 0x10000000 /* 256Mb max size on 32-bit AIX */
119874626c16SDimitry Andric #else
11990b57cec5SDimitry Andric #define KMP_MAX_STKSIZE (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
120074626c16SDimitry Andric #endif
12010b57cec5SDimitry Andric
12020b57cec5SDimitry Andric #if KMP_ARCH_X86
12030b57cec5SDimitry Andric #define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024))
12040b57cec5SDimitry Andric #elif KMP_ARCH_X86_64
12050b57cec5SDimitry Andric #define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))
12060b57cec5SDimitry Andric #define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024))
12075f757f3fSDimitry Andric #elif KMP_ARCH_VE
12085f757f3fSDimitry Andric // Minimum stack size for pthread for VE is 4MB.
12095f757f3fSDimitry Andric // https://www.hpc.nec/documents/veos/en/glibc/Difference_Points_glibc.htm
12105f757f3fSDimitry Andric #define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))
12111db9f3b2SDimitry Andric #elif KMP_OS_AIX
12121db9f3b2SDimitry Andric // The default stack size for worker threads on AIX is 4MB.
12131db9f3b2SDimitry Andric #define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))
12140b57cec5SDimitry Andric #else
12150b57cec5SDimitry Andric #define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024))
12160b57cec5SDimitry Andric #endif
12170b57cec5SDimitry Andric
12180b57cec5SDimitry Andric #define KMP_DEFAULT_MALLOC_POOL_INCR ((size_t)(1024 * 1024))
12190b57cec5SDimitry Andric #define KMP_MIN_MALLOC_POOL_INCR ((size_t)(4 * 1024))
12200b57cec5SDimitry Andric #define KMP_MAX_MALLOC_POOL_INCR \
12210b57cec5SDimitry Andric (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
12220b57cec5SDimitry Andric
12230b57cec5SDimitry Andric #define KMP_MIN_STKOFFSET (0)
12240b57cec5SDimitry Andric #define KMP_MAX_STKOFFSET KMP_MAX_STKSIZE
12250b57cec5SDimitry Andric #if KMP_OS_DARWIN
12260b57cec5SDimitry Andric #define KMP_DEFAULT_STKOFFSET KMP_MIN_STKOFFSET
12270b57cec5SDimitry Andric #else
12280b57cec5SDimitry Andric #define KMP_DEFAULT_STKOFFSET CACHE_LINE
12290b57cec5SDimitry Andric #endif
12300b57cec5SDimitry Andric
12310b57cec5SDimitry Andric #define KMP_MIN_STKPADDING (0)
12320b57cec5SDimitry Andric #define KMP_MAX_STKPADDING (2 * 1024 * 1024)
12330b57cec5SDimitry Andric
12340b57cec5SDimitry Andric #define KMP_BLOCKTIME_MULTIPLIER \
12355f757f3fSDimitry Andric (1000000) /* number of blocktime units per second */
12360b57cec5SDimitry Andric #define KMP_MIN_BLOCKTIME (0)
12370b57cec5SDimitry Andric #define KMP_MAX_BLOCKTIME \
12380b57cec5SDimitry Andric (INT_MAX) /* Must be this for "infinite" setting the work */
1239349cc55cSDimitry Andric
12405f757f3fSDimitry Andric /* __kmp_blocktime is in microseconds */
12415f757f3fSDimitry Andric #define KMP_DEFAULT_BLOCKTIME (__kmp_is_hybrid_cpu() ? (0) : (200000))
12420b57cec5SDimitry Andric
12430b57cec5SDimitry Andric #if KMP_USE_MONITOR
12440b57cec5SDimitry Andric #define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024))
12450b57cec5SDimitry Andric #define KMP_MIN_MONITOR_WAKEUPS (1) // min times monitor wakes up per second
12460b57cec5SDimitry Andric #define KMP_MAX_MONITOR_WAKEUPS (1000) // max times monitor can wake up per sec
12470b57cec5SDimitry Andric
12480b57cec5SDimitry Andric /* Calculate new number of monitor wakeups for a specific block time based on
12490b57cec5SDimitry Andric previous monitor_wakeups. Only allow increasing number of wakeups */
12500b57cec5SDimitry Andric #define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
1251fe6060f1SDimitry Andric (((blocktime) == KMP_MAX_BLOCKTIME) ? (monitor_wakeups) \
1252fe6060f1SDimitry Andric : ((blocktime) == KMP_MIN_BLOCKTIME) ? KMP_MAX_MONITOR_WAKEUPS \
12530b57cec5SDimitry Andric : ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) \
12540b57cec5SDimitry Andric ? (monitor_wakeups) \
12550b57cec5SDimitry Andric : (KMP_BLOCKTIME_MULTIPLIER) / (blocktime))
12560b57cec5SDimitry Andric
12570b57cec5SDimitry Andric /* Calculate number of intervals for a specific block time based on
12580b57cec5SDimitry Andric monitor_wakeups */
12590b57cec5SDimitry Andric #define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
12600b57cec5SDimitry Andric (((blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1) / \
12610b57cec5SDimitry Andric (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)))
12620b57cec5SDimitry Andric #else
12630b57cec5SDimitry Andric #define KMP_BLOCKTIME(team, tid) \
12640b57cec5SDimitry Andric (get__bt_set(team, tid) ? get__blocktime(team, tid) : __kmp_dflt_blocktime)
12650b57cec5SDimitry Andric #if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
12660b57cec5SDimitry Andric // HW TSC is used to reduce overhead (clock tick instead of nanosecond).
12670b57cec5SDimitry Andric extern kmp_uint64 __kmp_ticks_per_msec;
12685f757f3fSDimitry Andric extern kmp_uint64 __kmp_ticks_per_usec;
126981ad6265SDimitry Andric #if KMP_COMPILER_ICC || KMP_COMPILER_ICX
12700b57cec5SDimitry Andric #define KMP_NOW() ((kmp_uint64)_rdtsc())
12710b57cec5SDimitry Andric #else
12720b57cec5SDimitry Andric #define KMP_NOW() __kmp_hardware_timestamp()
12730b57cec5SDimitry Andric #endif
12740b57cec5SDimitry Andric #define KMP_BLOCKTIME_INTERVAL(team, tid) \
12755f757f3fSDimitry Andric ((kmp_uint64)KMP_BLOCKTIME(team, tid) * __kmp_ticks_per_usec)
12760b57cec5SDimitry Andric #define KMP_BLOCKING(goal, count) ((goal) > KMP_NOW())
12770b57cec5SDimitry Andric #else
12780b57cec5SDimitry Andric // System time is retrieved sporadically while blocking.
12790b57cec5SDimitry Andric extern kmp_uint64 __kmp_now_nsec();
12800b57cec5SDimitry Andric #define KMP_NOW() __kmp_now_nsec()
12810b57cec5SDimitry Andric #define KMP_BLOCKTIME_INTERVAL(team, tid) \
12825f757f3fSDimitry Andric ((kmp_uint64)KMP_BLOCKTIME(team, tid) * (kmp_uint64)KMP_NSEC_PER_USEC)
12830b57cec5SDimitry Andric #define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW())
12840b57cec5SDimitry Andric #endif
12850b57cec5SDimitry Andric #endif // KMP_USE_MONITOR
12860b57cec5SDimitry Andric
12870b57cec5SDimitry Andric #define KMP_MIN_STATSCOLS 40
12880b57cec5SDimitry Andric #define KMP_MAX_STATSCOLS 4096
12890b57cec5SDimitry Andric #define KMP_DEFAULT_STATSCOLS 80
12900b57cec5SDimitry Andric
12910b57cec5SDimitry Andric #define KMP_MIN_INTERVAL 0
12920b57cec5SDimitry Andric #define KMP_MAX_INTERVAL (INT_MAX - 1)
12930b57cec5SDimitry Andric #define KMP_DEFAULT_INTERVAL 0
12940b57cec5SDimitry Andric
12950b57cec5SDimitry Andric #define KMP_MIN_CHUNK 1
12960b57cec5SDimitry Andric #define KMP_MAX_CHUNK (INT_MAX - 1)
12970b57cec5SDimitry Andric #define KMP_DEFAULT_CHUNK 1
12980b57cec5SDimitry Andric
1299fe6060f1SDimitry Andric #define KMP_MIN_DISP_NUM_BUFF 1
13000b57cec5SDimitry Andric #define KMP_DFLT_DISP_NUM_BUFF 7
1301fe6060f1SDimitry Andric #define KMP_MAX_DISP_NUM_BUFF 4096
1302fe6060f1SDimitry Andric
13030b57cec5SDimitry Andric #define KMP_MAX_ORDERED 8
13040b57cec5SDimitry Andric
13050b57cec5SDimitry Andric #define KMP_MAX_FIELDS 32
13060b57cec5SDimitry Andric
13070b57cec5SDimitry Andric #define KMP_MAX_BRANCH_BITS 31
13080b57cec5SDimitry Andric
13090b57cec5SDimitry Andric #define KMP_MAX_ACTIVE_LEVELS_LIMIT INT_MAX
13100b57cec5SDimitry Andric
13110b57cec5SDimitry Andric #define KMP_MAX_DEFAULT_DEVICE_LIMIT INT_MAX
13120b57cec5SDimitry Andric
13130b57cec5SDimitry Andric #define KMP_MAX_TASK_PRIORITY_LIMIT INT_MAX
13140b57cec5SDimitry Andric
13150b57cec5SDimitry Andric /* Minimum number of threads before switch to TLS gtid (experimentally
13160b57cec5SDimitry Andric determined) */
13170b57cec5SDimitry Andric /* josh TODO: what about OS X* tuning? */
13180b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
13190b57cec5SDimitry Andric #define KMP_TLS_GTID_MIN 5
13200b57cec5SDimitry Andric #else
13210b57cec5SDimitry Andric #define KMP_TLS_GTID_MIN INT_MAX
13220b57cec5SDimitry Andric #endif
13230b57cec5SDimitry Andric
1324e8d8bef9SDimitry Andric #define KMP_MASTER_TID(tid) (0 == (tid))
1325e8d8bef9SDimitry Andric #define KMP_WORKER_TID(tid) (0 != (tid))
13260b57cec5SDimitry Andric
1327e8d8bef9SDimitry Andric #define KMP_MASTER_GTID(gtid) (0 == __kmp_tid_from_gtid((gtid)))
1328e8d8bef9SDimitry Andric #define KMP_WORKER_GTID(gtid) (0 != __kmp_tid_from_gtid((gtid)))
1329e8d8bef9SDimitry Andric #define KMP_INITIAL_GTID(gtid) (0 == (gtid))
13300b57cec5SDimitry Andric
13310b57cec5SDimitry Andric #ifndef TRUE
13320b57cec5SDimitry Andric #define FALSE 0
13330b57cec5SDimitry Andric #define TRUE (!FALSE)
13340b57cec5SDimitry Andric #endif
13350b57cec5SDimitry Andric
13360b57cec5SDimitry Andric /* NOTE: all of the following constants must be even */
13370b57cec5SDimitry Andric
13380b57cec5SDimitry Andric #if KMP_OS_WINDOWS
13390b57cec5SDimitry Andric #define KMP_INIT_WAIT 64U /* initial number of spin-tests */
13400b57cec5SDimitry Andric #define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */
13410b57cec5SDimitry Andric #elif KMP_OS_LINUX
13420b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
13430b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
13440b57cec5SDimitry Andric #elif KMP_OS_DARWIN
13450b57cec5SDimitry Andric /* TODO: tune for KMP_OS_DARWIN */
13460b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
13470b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
13480b57cec5SDimitry Andric #elif KMP_OS_DRAGONFLY
13490b57cec5SDimitry Andric /* TODO: tune for KMP_OS_DRAGONFLY */
13500b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
13510b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
13520b57cec5SDimitry Andric #elif KMP_OS_FREEBSD
13530b57cec5SDimitry Andric /* TODO: tune for KMP_OS_FREEBSD */
13540b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
13550b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
13560b57cec5SDimitry Andric #elif KMP_OS_NETBSD
13570b57cec5SDimitry Andric /* TODO: tune for KMP_OS_NETBSD */
13580b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
13590b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
13605f757f3fSDimitry Andric #elif KMP_OS_OPENBSD
13615f757f3fSDimitry Andric /* TODO: tune for KMP_OS_OPENBSD */
13625f757f3fSDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
13635f757f3fSDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
13640b57cec5SDimitry Andric #elif KMP_OS_HURD
13650b57cec5SDimitry Andric /* TODO: tune for KMP_OS_HURD */
13660b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
13670b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
13685f757f3fSDimitry Andric #elif KMP_OS_SOLARIS
13695f757f3fSDimitry Andric /* TODO: tune for KMP_OS_SOLARIS */
13705f757f3fSDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
13715f757f3fSDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
13725f757f3fSDimitry Andric #elif KMP_OS_WASI
13735f757f3fSDimitry Andric /* TODO: tune for KMP_OS_WASI */
13740b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
13750b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
13761db9f3b2SDimitry Andric #elif KMP_OS_AIX
13771db9f3b2SDimitry Andric /* TODO: tune for KMP_OS_AIX */
13781db9f3b2SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
13791db9f3b2SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
13800b57cec5SDimitry Andric #endif
13810b57cec5SDimitry Andric
13820b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
13830b57cec5SDimitry Andric typedef struct kmp_cpuid {
13840b57cec5SDimitry Andric kmp_uint32 eax;
13850b57cec5SDimitry Andric kmp_uint32 ebx;
13860b57cec5SDimitry Andric kmp_uint32 ecx;
13870b57cec5SDimitry Andric kmp_uint32 edx;
13880b57cec5SDimitry Andric } kmp_cpuid_t;
13890b57cec5SDimitry Andric
1390349cc55cSDimitry Andric typedef struct kmp_cpuinfo_flags_t {
1391349cc55cSDimitry Andric unsigned sse2 : 1; // 0 if SSE2 instructions are not supported, 1 otherwise.
1392349cc55cSDimitry Andric unsigned rtm : 1; // 0 if RTM instructions are not supported, 1 otherwise.
1393349cc55cSDimitry Andric unsigned hybrid : 1;
1394349cc55cSDimitry Andric unsigned reserved : 29; // Ensure size of 32 bits
1395349cc55cSDimitry Andric } kmp_cpuinfo_flags_t;
1396349cc55cSDimitry Andric
13970b57cec5SDimitry Andric typedef struct kmp_cpuinfo {
13980b57cec5SDimitry Andric int initialized; // If 0, other fields are not initialized.
13990b57cec5SDimitry Andric int signature; // CPUID(1).EAX
14000b57cec5SDimitry Andric int family; // CPUID(1).EAX[27:20]+CPUID(1).EAX[11:8] (Extended Family+Family)
14010b57cec5SDimitry Andric int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended
14020b57cec5SDimitry Andric // Model << 4 ) + Model)
14030b57cec5SDimitry Andric int stepping; // CPUID(1).EAX[3:0] ( Stepping )
1404349cc55cSDimitry Andric kmp_cpuinfo_flags_t flags;
14050b57cec5SDimitry Andric int apic_id;
14060b57cec5SDimitry Andric kmp_uint64 frequency; // Nominal CPU frequency in Hz.
14070b57cec5SDimitry Andric char name[3 * sizeof(kmp_cpuid_t)]; // CPUID(0x80000002,0x80000003,0x80000004)
14080b57cec5SDimitry Andric } kmp_cpuinfo_t;
14090b57cec5SDimitry Andric
14100b57cec5SDimitry Andric extern void __kmp_query_cpuid(kmp_cpuinfo_t *p);
14110b57cec5SDimitry Andric
14120b57cec5SDimitry Andric #if KMP_OS_UNIX
14130b57cec5SDimitry Andric // subleaf is only needed for cache and topology discovery and can be set to
14140b57cec5SDimitry Andric // zero in most cases
__kmp_x86_cpuid(int leaf,int subleaf,struct kmp_cpuid * p)14150b57cec5SDimitry Andric static inline void __kmp_x86_cpuid(int leaf, int subleaf, struct kmp_cpuid *p) {
14160b57cec5SDimitry Andric __asm__ __volatile__("cpuid"
14170b57cec5SDimitry Andric : "=a"(p->eax), "=b"(p->ebx), "=c"(p->ecx), "=d"(p->edx)
14180b57cec5SDimitry Andric : "a"(leaf), "c"(subleaf));
14190b57cec5SDimitry Andric }
14200b57cec5SDimitry Andric // Load p into FPU control word
__kmp_load_x87_fpu_control_word(const kmp_int16 * p)14210b57cec5SDimitry Andric static inline void __kmp_load_x87_fpu_control_word(const kmp_int16 *p) {
14220b57cec5SDimitry Andric __asm__ __volatile__("fldcw %0" : : "m"(*p));
14230b57cec5SDimitry Andric }
14240b57cec5SDimitry Andric // Store FPU control word into p
__kmp_store_x87_fpu_control_word(kmp_int16 * p)14250b57cec5SDimitry Andric static inline void __kmp_store_x87_fpu_control_word(kmp_int16 *p) {
14260b57cec5SDimitry Andric __asm__ __volatile__("fstcw %0" : "=m"(*p));
14270b57cec5SDimitry Andric }
__kmp_clear_x87_fpu_status_word()14280b57cec5SDimitry Andric static inline void __kmp_clear_x87_fpu_status_word() {
14290b57cec5SDimitry Andric #if KMP_MIC
14300b57cec5SDimitry Andric // 32-bit protected mode x87 FPU state
14310b57cec5SDimitry Andric struct x87_fpu_state {
14320b57cec5SDimitry Andric unsigned cw;
14330b57cec5SDimitry Andric unsigned sw;
14340b57cec5SDimitry Andric unsigned tw;
14350b57cec5SDimitry Andric unsigned fip;
14360b57cec5SDimitry Andric unsigned fips;
14370b57cec5SDimitry Andric unsigned fdp;
14380b57cec5SDimitry Andric unsigned fds;
14390b57cec5SDimitry Andric };
14400b57cec5SDimitry Andric struct x87_fpu_state fpu_state = {0, 0, 0, 0, 0, 0, 0};
14410b57cec5SDimitry Andric __asm__ __volatile__("fstenv %0\n\t" // store FP env
14420b57cec5SDimitry Andric "andw $0x7f00, %1\n\t" // clear 0-7,15 bits of FP SW
14430b57cec5SDimitry Andric "fldenv %0\n\t" // load FP env back
14440b57cec5SDimitry Andric : "+m"(fpu_state), "+m"(fpu_state.sw));
14450b57cec5SDimitry Andric #else
14460b57cec5SDimitry Andric __asm__ __volatile__("fnclex");
14470b57cec5SDimitry Andric #endif // KMP_MIC
14480b57cec5SDimitry Andric }
14490b57cec5SDimitry Andric #if __SSE__
__kmp_load_mxcsr(const kmp_uint32 * p)14500b57cec5SDimitry Andric static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); }
__kmp_store_mxcsr(kmp_uint32 * p)14510b57cec5SDimitry Andric static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
14520b57cec5SDimitry Andric #else
__kmp_load_mxcsr(const kmp_uint32 * p)14530b57cec5SDimitry Andric static inline void __kmp_load_mxcsr(const kmp_uint32 *p) {}
__kmp_store_mxcsr(kmp_uint32 * p)14540b57cec5SDimitry Andric static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = 0; }
14550b57cec5SDimitry Andric #endif
14560b57cec5SDimitry Andric #else
14570b57cec5SDimitry Andric // Windows still has these as external functions in assembly file
14580b57cec5SDimitry Andric extern void __kmp_x86_cpuid(int mode, int mode2, struct kmp_cpuid *p);
14590b57cec5SDimitry Andric extern void __kmp_load_x87_fpu_control_word(const kmp_int16 *p);
14600b57cec5SDimitry Andric extern void __kmp_store_x87_fpu_control_word(kmp_int16 *p);
14610b57cec5SDimitry Andric extern void __kmp_clear_x87_fpu_status_word();
__kmp_load_mxcsr(const kmp_uint32 * p)14620b57cec5SDimitry Andric static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); }
__kmp_store_mxcsr(kmp_uint32 * p)14630b57cec5SDimitry Andric static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
14640b57cec5SDimitry Andric #endif // KMP_OS_UNIX
14650b57cec5SDimitry Andric
14660b57cec5SDimitry Andric #define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */
14670b57cec5SDimitry Andric
1468e8d8bef9SDimitry Andric // User-level Monitor/Mwait
1469e8d8bef9SDimitry Andric #if KMP_HAVE_UMWAIT
1470e8d8bef9SDimitry Andric // We always try for UMWAIT first
1471e8d8bef9SDimitry Andric #if KMP_HAVE_WAITPKG_INTRINSICS
1472e8d8bef9SDimitry Andric #if KMP_HAVE_IMMINTRIN_H
1473e8d8bef9SDimitry Andric #include <immintrin.h>
1474e8d8bef9SDimitry Andric #elif KMP_HAVE_INTRIN_H
1475e8d8bef9SDimitry Andric #include <intrin.h>
1476e8d8bef9SDimitry Andric #endif
1477e8d8bef9SDimitry Andric #endif // KMP_HAVE_WAITPKG_INTRINSICS
147804eeddc0SDimitry Andric
1479e8d8bef9SDimitry Andric KMP_ATTRIBUTE_TARGET_WAITPKG
__kmp_tpause(uint32_t hint,uint64_t counter)1480fe6060f1SDimitry Andric static inline int __kmp_tpause(uint32_t hint, uint64_t counter) {
1481e8d8bef9SDimitry Andric #if !KMP_HAVE_WAITPKG_INTRINSICS
1482e8d8bef9SDimitry Andric uint32_t timeHi = uint32_t(counter >> 32);
1483e8d8bef9SDimitry Andric uint32_t timeLo = uint32_t(counter & 0xffffffff);
1484e8d8bef9SDimitry Andric char flag;
1485e8d8bef9SDimitry Andric __asm__ volatile("#tpause\n.byte 0x66, 0x0F, 0xAE, 0xF1\n"
1486e8d8bef9SDimitry Andric "setb %0"
148781ad6265SDimitry Andric // The "=q" restraint means any register accessible as rl
148881ad6265SDimitry Andric // in 32-bit mode: a, b, c, and d;
148981ad6265SDimitry Andric // in 64-bit mode: any integer register
149081ad6265SDimitry Andric : "=q"(flag)
1491e8d8bef9SDimitry Andric : "a"(timeLo), "d"(timeHi), "c"(hint)
1492e8d8bef9SDimitry Andric :);
1493e8d8bef9SDimitry Andric return flag;
1494e8d8bef9SDimitry Andric #else
1495e8d8bef9SDimitry Andric return _tpause(hint, counter);
1496e8d8bef9SDimitry Andric #endif
1497e8d8bef9SDimitry Andric }
1498e8d8bef9SDimitry Andric KMP_ATTRIBUTE_TARGET_WAITPKG
__kmp_umonitor(void * cacheline)1499fe6060f1SDimitry Andric static inline void __kmp_umonitor(void *cacheline) {
1500e8d8bef9SDimitry Andric #if !KMP_HAVE_WAITPKG_INTRINSICS
1501e8d8bef9SDimitry Andric __asm__ volatile("# umonitor\n.byte 0xF3, 0x0F, 0xAE, 0x01 "
1502e8d8bef9SDimitry Andric :
1503e8d8bef9SDimitry Andric : "a"(cacheline)
1504e8d8bef9SDimitry Andric :);
1505e8d8bef9SDimitry Andric #else
1506e8d8bef9SDimitry Andric _umonitor(cacheline);
1507e8d8bef9SDimitry Andric #endif
1508e8d8bef9SDimitry Andric }
1509e8d8bef9SDimitry Andric KMP_ATTRIBUTE_TARGET_WAITPKG
__kmp_umwait(uint32_t hint,uint64_t counter)1510fe6060f1SDimitry Andric static inline int __kmp_umwait(uint32_t hint, uint64_t counter) {
1511e8d8bef9SDimitry Andric #if !KMP_HAVE_WAITPKG_INTRINSICS
1512e8d8bef9SDimitry Andric uint32_t timeHi = uint32_t(counter >> 32);
1513e8d8bef9SDimitry Andric uint32_t timeLo = uint32_t(counter & 0xffffffff);
1514e8d8bef9SDimitry Andric char flag;
1515e8d8bef9SDimitry Andric __asm__ volatile("#umwait\n.byte 0xF2, 0x0F, 0xAE, 0xF1\n"
1516e8d8bef9SDimitry Andric "setb %0"
151781ad6265SDimitry Andric // The "=q" restraint means any register accessible as rl
151881ad6265SDimitry Andric // in 32-bit mode: a, b, c, and d;
151981ad6265SDimitry Andric // in 64-bit mode: any integer register
152081ad6265SDimitry Andric : "=q"(flag)
1521e8d8bef9SDimitry Andric : "a"(timeLo), "d"(timeHi), "c"(hint)
1522e8d8bef9SDimitry Andric :);
1523e8d8bef9SDimitry Andric return flag;
1524e8d8bef9SDimitry Andric #else
1525e8d8bef9SDimitry Andric return _umwait(hint, counter);
1526e8d8bef9SDimitry Andric #endif
1527e8d8bef9SDimitry Andric }
1528e8d8bef9SDimitry Andric #elif KMP_HAVE_MWAIT
1529e8d8bef9SDimitry Andric #if KMP_OS_UNIX
1530e8d8bef9SDimitry Andric #include <pmmintrin.h>
1531e8d8bef9SDimitry Andric #else
1532e8d8bef9SDimitry Andric #include <intrin.h>
1533e8d8bef9SDimitry Andric #endif
1534e8d8bef9SDimitry Andric #if KMP_OS_UNIX
1535e8d8bef9SDimitry Andric __attribute__((target("sse3")))
1536e8d8bef9SDimitry Andric #endif
1537e8d8bef9SDimitry Andric static inline void
__kmp_mm_monitor(void * cacheline,unsigned extensions,unsigned hints)1538e8d8bef9SDimitry Andric __kmp_mm_monitor(void *cacheline, unsigned extensions, unsigned hints) {
1539e8d8bef9SDimitry Andric _mm_monitor(cacheline, extensions, hints);
1540e8d8bef9SDimitry Andric }
1541e8d8bef9SDimitry Andric #if KMP_OS_UNIX
1542e8d8bef9SDimitry Andric __attribute__((target("sse3")))
1543e8d8bef9SDimitry Andric #endif
1544e8d8bef9SDimitry Andric static inline void
__kmp_mm_mwait(unsigned extensions,unsigned hints)1545e8d8bef9SDimitry Andric __kmp_mm_mwait(unsigned extensions, unsigned hints) {
1546e8d8bef9SDimitry Andric _mm_mwait(extensions, hints);
1547e8d8bef9SDimitry Andric }
1548e8d8bef9SDimitry Andric #endif // KMP_HAVE_UMWAIT
1549e8d8bef9SDimitry Andric
155004eeddc0SDimitry Andric #if KMP_ARCH_X86
155104eeddc0SDimitry Andric extern void __kmp_x86_pause(void);
155204eeddc0SDimitry Andric #elif KMP_MIC
155304eeddc0SDimitry Andric // Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed
155404eeddc0SDimitry Andric // regression after removal of extra PAUSE from spin loops. Changing
155504eeddc0SDimitry Andric // the delay from 100 to 300 showed even better performance than double PAUSE
155604eeddc0SDimitry Andric // on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC.
__kmp_x86_pause(void)155704eeddc0SDimitry Andric static inline void __kmp_x86_pause(void) { _mm_delay_32(300); }
155804eeddc0SDimitry Andric #else
__kmp_x86_pause(void)155904eeddc0SDimitry Andric static inline void __kmp_x86_pause(void) { _mm_pause(); }
156004eeddc0SDimitry Andric #endif
156104eeddc0SDimitry Andric #define KMP_CPU_PAUSE() __kmp_x86_pause()
156204eeddc0SDimitry Andric #elif KMP_ARCH_PPC64
156304eeddc0SDimitry Andric #define KMP_PPC64_PRI_LOW() __asm__ volatile("or 1, 1, 1")
156404eeddc0SDimitry Andric #define KMP_PPC64_PRI_MED() __asm__ volatile("or 2, 2, 2")
156504eeddc0SDimitry Andric #define KMP_PPC64_PRI_LOC_MB() __asm__ volatile("" : : : "memory")
156604eeddc0SDimitry Andric #define KMP_CPU_PAUSE() \
156704eeddc0SDimitry Andric do { \
156804eeddc0SDimitry Andric KMP_PPC64_PRI_LOW(); \
156904eeddc0SDimitry Andric KMP_PPC64_PRI_MED(); \
157004eeddc0SDimitry Andric KMP_PPC64_PRI_LOC_MB(); \
157104eeddc0SDimitry Andric } while (0)
157204eeddc0SDimitry Andric #else
157304eeddc0SDimitry Andric #define KMP_CPU_PAUSE() /* nothing to do */
157404eeddc0SDimitry Andric #endif
157504eeddc0SDimitry Andric
157604eeddc0SDimitry Andric #define KMP_INIT_YIELD(count) \
157704eeddc0SDimitry Andric { (count) = __kmp_yield_init; }
157804eeddc0SDimitry Andric
157904eeddc0SDimitry Andric #define KMP_INIT_BACKOFF(time) \
158004eeddc0SDimitry Andric { (time) = __kmp_pause_init; }
158104eeddc0SDimitry Andric
158204eeddc0SDimitry Andric #define KMP_OVERSUBSCRIBED \
158304eeddc0SDimitry Andric (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))
158404eeddc0SDimitry Andric
158504eeddc0SDimitry Andric #define KMP_TRY_YIELD \
158604eeddc0SDimitry Andric ((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (KMP_OVERSUBSCRIBED)))
158704eeddc0SDimitry Andric
158804eeddc0SDimitry Andric #define KMP_TRY_YIELD_OVERSUB \
158904eeddc0SDimitry Andric ((__kmp_use_yield == 1 || __kmp_use_yield == 2) && (KMP_OVERSUBSCRIBED))
159004eeddc0SDimitry Andric
159104eeddc0SDimitry Andric #define KMP_YIELD(cond) \
159204eeddc0SDimitry Andric { \
159304eeddc0SDimitry Andric KMP_CPU_PAUSE(); \
159404eeddc0SDimitry Andric if ((cond) && (KMP_TRY_YIELD)) \
159504eeddc0SDimitry Andric __kmp_yield(); \
159604eeddc0SDimitry Andric }
159704eeddc0SDimitry Andric
159804eeddc0SDimitry Andric #define KMP_YIELD_OVERSUB() \
159904eeddc0SDimitry Andric { \
160004eeddc0SDimitry Andric KMP_CPU_PAUSE(); \
160104eeddc0SDimitry Andric if ((KMP_TRY_YIELD_OVERSUB)) \
160204eeddc0SDimitry Andric __kmp_yield(); \
160304eeddc0SDimitry Andric }
160404eeddc0SDimitry Andric
160504eeddc0SDimitry Andric // Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround,
160604eeddc0SDimitry Andric // there should be no yielding since initial value from KMP_INIT_YIELD() is odd.
160704eeddc0SDimitry Andric #define KMP_YIELD_SPIN(count) \
160804eeddc0SDimitry Andric { \
160904eeddc0SDimitry Andric KMP_CPU_PAUSE(); \
161004eeddc0SDimitry Andric if (KMP_TRY_YIELD) { \
161104eeddc0SDimitry Andric (count) -= 2; \
161204eeddc0SDimitry Andric if (!(count)) { \
161304eeddc0SDimitry Andric __kmp_yield(); \
161404eeddc0SDimitry Andric (count) = __kmp_yield_next; \
161504eeddc0SDimitry Andric } \
161604eeddc0SDimitry Andric } \
161704eeddc0SDimitry Andric }
161804eeddc0SDimitry Andric
161904eeddc0SDimitry Andric // If TPAUSE is available & enabled, use it. If oversubscribed, use the slower
162004eeddc0SDimitry Andric // (C0.2) state, which improves performance of other SMT threads on the same
162104eeddc0SDimitry Andric // core, otherwise, use the fast (C0.1) default state, or whatever the user has
162204eeddc0SDimitry Andric // requested. Uses a timed TPAUSE, and exponential backoff. If TPAUSE isn't
162304eeddc0SDimitry Andric // available, fall back to the regular CPU pause and yield combination.
162404eeddc0SDimitry Andric #if KMP_HAVE_UMWAIT
1625bdd1243dSDimitry Andric #define KMP_TPAUSE_MAX_MASK ((kmp_uint64)0xFFFF)
162604eeddc0SDimitry Andric #define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time) \
162704eeddc0SDimitry Andric { \
162804eeddc0SDimitry Andric if (__kmp_tpause_enabled) { \
162904eeddc0SDimitry Andric if (KMP_OVERSUBSCRIBED) { \
163004eeddc0SDimitry Andric __kmp_tpause(0, (time)); \
163104eeddc0SDimitry Andric } else { \
163204eeddc0SDimitry Andric __kmp_tpause(__kmp_tpause_hint, (time)); \
163304eeddc0SDimitry Andric } \
1634bdd1243dSDimitry Andric (time) = (time << 1 | 1) & KMP_TPAUSE_MAX_MASK; \
163504eeddc0SDimitry Andric } else { \
163604eeddc0SDimitry Andric KMP_CPU_PAUSE(); \
163704eeddc0SDimitry Andric if ((KMP_TRY_YIELD_OVERSUB)) { \
163804eeddc0SDimitry Andric __kmp_yield(); \
163904eeddc0SDimitry Andric } else if (__kmp_use_yield == 1) { \
164004eeddc0SDimitry Andric (count) -= 2; \
164104eeddc0SDimitry Andric if (!(count)) { \
164204eeddc0SDimitry Andric __kmp_yield(); \
164304eeddc0SDimitry Andric (count) = __kmp_yield_next; \
164404eeddc0SDimitry Andric } \
164504eeddc0SDimitry Andric } \
164604eeddc0SDimitry Andric } \
164704eeddc0SDimitry Andric }
164804eeddc0SDimitry Andric #else
164904eeddc0SDimitry Andric #define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time) \
165004eeddc0SDimitry Andric { \
165104eeddc0SDimitry Andric KMP_CPU_PAUSE(); \
165204eeddc0SDimitry Andric if ((KMP_TRY_YIELD_OVERSUB)) \
165304eeddc0SDimitry Andric __kmp_yield(); \
165404eeddc0SDimitry Andric else if (__kmp_use_yield == 1) { \
165504eeddc0SDimitry Andric (count) -= 2; \
165604eeddc0SDimitry Andric if (!(count)) { \
165704eeddc0SDimitry Andric __kmp_yield(); \
165804eeddc0SDimitry Andric (count) = __kmp_yield_next; \
165904eeddc0SDimitry Andric } \
166004eeddc0SDimitry Andric } \
166104eeddc0SDimitry Andric }
166204eeddc0SDimitry Andric #endif // KMP_HAVE_UMWAIT
166304eeddc0SDimitry Andric
16640b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
16650b57cec5SDimitry Andric /* Support datatypes for the orphaned construct nesting checks. */
16660b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
16670b57cec5SDimitry Andric
1668349cc55cSDimitry Andric /* When adding to this enum, add its corresponding string in cons_text_c[]
1669349cc55cSDimitry Andric * array in kmp_error.cpp */
16700b57cec5SDimitry Andric enum cons_type {
16710b57cec5SDimitry Andric ct_none,
16720b57cec5SDimitry Andric ct_parallel,
16730b57cec5SDimitry Andric ct_pdo,
16740b57cec5SDimitry Andric ct_pdo_ordered,
16750b57cec5SDimitry Andric ct_psections,
16760b57cec5SDimitry Andric ct_psingle,
16770b57cec5SDimitry Andric ct_critical,
16780b57cec5SDimitry Andric ct_ordered_in_parallel,
16790b57cec5SDimitry Andric ct_ordered_in_pdo,
16800b57cec5SDimitry Andric ct_master,
16810b57cec5SDimitry Andric ct_reduce,
1682fe6060f1SDimitry Andric ct_barrier,
1683fe6060f1SDimitry Andric ct_masked
16840b57cec5SDimitry Andric };
16850b57cec5SDimitry Andric
16860b57cec5SDimitry Andric #define IS_CONS_TYPE_ORDERED(ct) ((ct) == ct_pdo_ordered)
16870b57cec5SDimitry Andric
16880b57cec5SDimitry Andric struct cons_data {
16890b57cec5SDimitry Andric ident_t const *ident;
16900b57cec5SDimitry Andric enum cons_type type;
16910b57cec5SDimitry Andric int prev;
16920b57cec5SDimitry Andric kmp_user_lock_p
16930b57cec5SDimitry Andric name; /* address exclusively for critical section name comparison */
16940b57cec5SDimitry Andric };
16950b57cec5SDimitry Andric
16960b57cec5SDimitry Andric struct cons_header {
16970b57cec5SDimitry Andric int p_top, w_top, s_top;
16980b57cec5SDimitry Andric int stack_size, stack_top;
16990b57cec5SDimitry Andric struct cons_data *stack_data;
17000b57cec5SDimitry Andric };
17010b57cec5SDimitry Andric
17020b57cec5SDimitry Andric struct kmp_region_info {
17030b57cec5SDimitry Andric char *text;
17040b57cec5SDimitry Andric int offset[KMP_MAX_FIELDS];
17050b57cec5SDimitry Andric int length[KMP_MAX_FIELDS];
17060b57cec5SDimitry Andric };
17070b57cec5SDimitry Andric
17080b57cec5SDimitry Andric /* ---------------------------------------------------------------------- */
17090b57cec5SDimitry Andric /* ---------------------------------------------------------------------- */
17100b57cec5SDimitry Andric
17110b57cec5SDimitry Andric #if KMP_OS_WINDOWS
17120b57cec5SDimitry Andric typedef HANDLE kmp_thread_t;
17130b57cec5SDimitry Andric typedef DWORD kmp_key_t;
17140b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
17150b57cec5SDimitry Andric
17160b57cec5SDimitry Andric #if KMP_OS_UNIX
17170b57cec5SDimitry Andric typedef pthread_t kmp_thread_t;
17180b57cec5SDimitry Andric typedef pthread_key_t kmp_key_t;
17190b57cec5SDimitry Andric #endif
17200b57cec5SDimitry Andric
17210b57cec5SDimitry Andric extern kmp_key_t __kmp_gtid_threadprivate_key;
17220b57cec5SDimitry Andric
17230b57cec5SDimitry Andric typedef struct kmp_sys_info {
17240b57cec5SDimitry Andric long maxrss; /* the maximum resident set size utilized (in kilobytes) */
17250b57cec5SDimitry Andric long minflt; /* the number of page faults serviced without any I/O */
17260b57cec5SDimitry Andric long majflt; /* the number of page faults serviced that required I/O */
17270b57cec5SDimitry Andric long nswap; /* the number of times a process was "swapped" out of memory */
17280b57cec5SDimitry Andric long inblock; /* the number of times the file system had to perform input */
17290b57cec5SDimitry Andric long oublock; /* the number of times the file system had to perform output */
17300b57cec5SDimitry Andric long nvcsw; /* the number of times a context switch was voluntarily */
17310b57cec5SDimitry Andric long nivcsw; /* the number of times a context switch was forced */
17320b57cec5SDimitry Andric } kmp_sys_info_t;
17330b57cec5SDimitry Andric
17340b57cec5SDimitry Andric #if USE_ITT_BUILD
17350b57cec5SDimitry Andric // We cannot include "kmp_itt.h" due to circular dependency. Declare the only
17360b57cec5SDimitry Andric // required type here. Later we will check the type meets requirements.
17370b57cec5SDimitry Andric typedef int kmp_itt_mark_t;
17380b57cec5SDimitry Andric #define KMP_ITT_DEBUG 0
17390b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
17400b57cec5SDimitry Andric
17410b57cec5SDimitry Andric typedef kmp_int32 kmp_critical_name[8];
17420b57cec5SDimitry Andric
17430b57cec5SDimitry Andric /*!
17440b57cec5SDimitry Andric @ingroup PARALLEL
17450b57cec5SDimitry Andric The type for a microtask which gets passed to @ref __kmpc_fork_call().
17460b57cec5SDimitry Andric The arguments to the outlined function are
17470b57cec5SDimitry Andric @param global_tid the global thread identity of the thread executing the
17480b57cec5SDimitry Andric function.
1749480093f4SDimitry Andric @param bound_tid the local identity of the thread executing the function
17500b57cec5SDimitry Andric @param ... pointers to shared variables accessed by the function.
17510b57cec5SDimitry Andric */
17520b57cec5SDimitry Andric typedef void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...);
17530b57cec5SDimitry Andric typedef void (*kmpc_micro_bound)(kmp_int32 *bound_tid, kmp_int32 *bound_nth,
17540b57cec5SDimitry Andric ...);
17550b57cec5SDimitry Andric
17560b57cec5SDimitry Andric /*!
17570b57cec5SDimitry Andric @ingroup THREADPRIVATE
17580b57cec5SDimitry Andric @{
17590b57cec5SDimitry Andric */
17600b57cec5SDimitry Andric /* ---------------------------------------------------------------------------
17610b57cec5SDimitry Andric */
17620b57cec5SDimitry Andric /* Threadprivate initialization/finalization function declarations */
17630b57cec5SDimitry Andric
17640b57cec5SDimitry Andric /* for non-array objects: __kmpc_threadprivate_register() */
17650b57cec5SDimitry Andric
17660b57cec5SDimitry Andric /*!
17670b57cec5SDimitry Andric Pointer to the constructor function.
17680b57cec5SDimitry Andric The first argument is the <tt>this</tt> pointer
17690b57cec5SDimitry Andric */
17700b57cec5SDimitry Andric typedef void *(*kmpc_ctor)(void *);
17710b57cec5SDimitry Andric
17720b57cec5SDimitry Andric /*!
17730b57cec5SDimitry Andric Pointer to the destructor function.
17740b57cec5SDimitry Andric The first argument is the <tt>this</tt> pointer
17750b57cec5SDimitry Andric */
17760b57cec5SDimitry Andric typedef void (*kmpc_dtor)(
17770b57cec5SDimitry Andric void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel
17780b57cec5SDimitry Andric compiler */
17790b57cec5SDimitry Andric /*!
17800b57cec5SDimitry Andric Pointer to an alternate constructor.
17810b57cec5SDimitry Andric The first argument is the <tt>this</tt> pointer.
17820b57cec5SDimitry Andric */
17830b57cec5SDimitry Andric typedef void *(*kmpc_cctor)(void *, void *);
17840b57cec5SDimitry Andric
17850b57cec5SDimitry Andric /* for array objects: __kmpc_threadprivate_register_vec() */
17860b57cec5SDimitry Andric /* First arg: "this" pointer */
17870b57cec5SDimitry Andric /* Last arg: number of array elements */
17880b57cec5SDimitry Andric /*!
17890b57cec5SDimitry Andric Array constructor.
17900b57cec5SDimitry Andric First argument is the <tt>this</tt> pointer
17910b57cec5SDimitry Andric Second argument the number of array elements.
17920b57cec5SDimitry Andric */
17930b57cec5SDimitry Andric typedef void *(*kmpc_ctor_vec)(void *, size_t);
17940b57cec5SDimitry Andric /*!
17950b57cec5SDimitry Andric Pointer to the array destructor function.
17960b57cec5SDimitry Andric The first argument is the <tt>this</tt> pointer
17970b57cec5SDimitry Andric Second argument the number of array elements.
17980b57cec5SDimitry Andric */
17990b57cec5SDimitry Andric typedef void (*kmpc_dtor_vec)(void *, size_t);
18000b57cec5SDimitry Andric /*!
18010b57cec5SDimitry Andric Array constructor.
18020b57cec5SDimitry Andric First argument is the <tt>this</tt> pointer
18030b57cec5SDimitry Andric Third argument the number of array elements.
18040b57cec5SDimitry Andric */
18050b57cec5SDimitry Andric typedef void *(*kmpc_cctor_vec)(void *, void *,
18060b57cec5SDimitry Andric size_t); /* function unused by compiler */
18070b57cec5SDimitry Andric
18080b57cec5SDimitry Andric /*!
18090b57cec5SDimitry Andric @}
18100b57cec5SDimitry Andric */
18110b57cec5SDimitry Andric
18120b57cec5SDimitry Andric /* keeps tracked of threadprivate cache allocations for cleanup later */
18130b57cec5SDimitry Andric typedef struct kmp_cached_addr {
18140b57cec5SDimitry Andric void **addr; /* address of allocated cache */
18150b57cec5SDimitry Andric void ***compiler_cache; /* pointer to compiler's cache */
18160b57cec5SDimitry Andric void *data; /* pointer to global data */
18170b57cec5SDimitry Andric struct kmp_cached_addr *next; /* pointer to next cached address */
18180b57cec5SDimitry Andric } kmp_cached_addr_t;
18190b57cec5SDimitry Andric
18200b57cec5SDimitry Andric struct private_data {
18210b57cec5SDimitry Andric struct private_data *next; /* The next descriptor in the list */
18220b57cec5SDimitry Andric void *data; /* The data buffer for this descriptor */
18230b57cec5SDimitry Andric int more; /* The repeat count for this descriptor */
18240b57cec5SDimitry Andric size_t size; /* The data size for this descriptor */
18250b57cec5SDimitry Andric };
18260b57cec5SDimitry Andric
18270b57cec5SDimitry Andric struct private_common {
18280b57cec5SDimitry Andric struct private_common *next;
18290b57cec5SDimitry Andric struct private_common *link;
18300b57cec5SDimitry Andric void *gbl_addr;
1831fe6060f1SDimitry Andric void *par_addr; /* par_addr == gbl_addr for PRIMARY thread */
18320b57cec5SDimitry Andric size_t cmn_size;
18330b57cec5SDimitry Andric };
18340b57cec5SDimitry Andric
18350b57cec5SDimitry Andric struct shared_common {
18360b57cec5SDimitry Andric struct shared_common *next;
18370b57cec5SDimitry Andric struct private_data *pod_init;
18380b57cec5SDimitry Andric void *obj_init;
18390b57cec5SDimitry Andric void *gbl_addr;
18400b57cec5SDimitry Andric union {
18410b57cec5SDimitry Andric kmpc_ctor ctor;
18420b57cec5SDimitry Andric kmpc_ctor_vec ctorv;
18430b57cec5SDimitry Andric } ct;
18440b57cec5SDimitry Andric union {
18450b57cec5SDimitry Andric kmpc_cctor cctor;
18460b57cec5SDimitry Andric kmpc_cctor_vec cctorv;
18470b57cec5SDimitry Andric } cct;
18480b57cec5SDimitry Andric union {
18490b57cec5SDimitry Andric kmpc_dtor dtor;
18500b57cec5SDimitry Andric kmpc_dtor_vec dtorv;
18510b57cec5SDimitry Andric } dt;
18520b57cec5SDimitry Andric size_t vec_len;
18530b57cec5SDimitry Andric int is_vec;
18540b57cec5SDimitry Andric size_t cmn_size;
18550b57cec5SDimitry Andric };
18560b57cec5SDimitry Andric
18570b57cec5SDimitry Andric #define KMP_HASH_TABLE_LOG2 9 /* log2 of the hash table size */
18580b57cec5SDimitry Andric #define KMP_HASH_TABLE_SIZE \
18590b57cec5SDimitry Andric (1 << KMP_HASH_TABLE_LOG2) /* size of the hash table */
18600b57cec5SDimitry Andric #define KMP_HASH_SHIFT 3 /* throw away this many low bits from the address */
18610b57cec5SDimitry Andric #define KMP_HASH(x) \
18620b57cec5SDimitry Andric ((((kmp_uintptr_t)x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE - 1))
18630b57cec5SDimitry Andric
18640b57cec5SDimitry Andric struct common_table {
18650b57cec5SDimitry Andric struct private_common *data[KMP_HASH_TABLE_SIZE];
18660b57cec5SDimitry Andric };
18670b57cec5SDimitry Andric
18680b57cec5SDimitry Andric struct shared_table {
18690b57cec5SDimitry Andric struct shared_common *data[KMP_HASH_TABLE_SIZE];
18700b57cec5SDimitry Andric };
18710b57cec5SDimitry Andric
18720b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
18730b57cec5SDimitry Andric
18740b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
18750b57cec5SDimitry Andric // Shared barrier data that exists inside a single unit of the scheduling
18760b57cec5SDimitry Andric // hierarchy
18770b57cec5SDimitry Andric typedef struct kmp_hier_private_bdata_t {
18780b57cec5SDimitry Andric kmp_int32 num_active;
18790b57cec5SDimitry Andric kmp_uint64 index;
18800b57cec5SDimitry Andric kmp_uint64 wait_val[2];
18810b57cec5SDimitry Andric } kmp_hier_private_bdata_t;
18820b57cec5SDimitry Andric #endif
18830b57cec5SDimitry Andric
18840b57cec5SDimitry Andric typedef struct kmp_sched_flags {
18850b57cec5SDimitry Andric unsigned ordered : 1;
18860b57cec5SDimitry Andric unsigned nomerge : 1;
18870b57cec5SDimitry Andric unsigned contains_last : 1;
18885f757f3fSDimitry Andric unsigned use_hier : 1; // Used in KMP_USE_HIER_SCHED code
18895f757f3fSDimitry Andric unsigned use_hybrid : 1; // Used in KMP_WEIGHTED_ITERATIONS_SUPPORTED code
18905f757f3fSDimitry Andric unsigned unused : 27;
18910b57cec5SDimitry Andric } kmp_sched_flags_t;
18920b57cec5SDimitry Andric
18930b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(kmp_sched_flags_t) == 4);
18940b57cec5SDimitry Andric
18950b57cec5SDimitry Andric #if KMP_STATIC_STEAL_ENABLED
18960b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
18970b57cec5SDimitry Andric kmp_int32 count;
18980b57cec5SDimitry Andric kmp_int32 ub;
18990b57cec5SDimitry Andric /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
19000b57cec5SDimitry Andric kmp_int32 lb;
19010b57cec5SDimitry Andric kmp_int32 st;
19020b57cec5SDimitry Andric kmp_int32 tc;
1903fe6060f1SDimitry Andric kmp_lock_t *steal_lock; // lock used for chunk stealing
19045f757f3fSDimitry Andric
19055f757f3fSDimitry Andric kmp_uint32 ordered_lower;
19065f757f3fSDimitry Andric kmp_uint32 ordered_upper;
19075f757f3fSDimitry Andric
1908fe6060f1SDimitry Andric // KMP_ALIGN(32) ensures (if the KMP_ALIGN macro is turned on)
19090b57cec5SDimitry Andric // a) parm3 is properly aligned and
1910fe6060f1SDimitry Andric // b) all parm1-4 are on the same cache line.
19110b57cec5SDimitry Andric // Because of parm1-4 are used together, performance seems to be better
1912fe6060f1SDimitry Andric // if they are on the same cache line (not measured though).
19130b57cec5SDimitry Andric
19145f757f3fSDimitry Andric struct KMP_ALIGN(32) {
19155f757f3fSDimitry Andric kmp_int32 parm1;
19165f757f3fSDimitry Andric kmp_int32 parm2;
19170b57cec5SDimitry Andric kmp_int32 parm3;
19180b57cec5SDimitry Andric kmp_int32 parm4;
19190b57cec5SDimitry Andric };
19200b57cec5SDimitry Andric
19215f757f3fSDimitry Andric #if KMP_WEIGHTED_ITERATIONS_SUPPORTED
19225f757f3fSDimitry Andric kmp_uint32 pchunks;
19235f757f3fSDimitry Andric kmp_uint32 num_procs_with_pcore;
19245f757f3fSDimitry Andric kmp_int32 first_thread_with_ecore;
19255f757f3fSDimitry Andric #endif
19260b57cec5SDimitry Andric #if KMP_OS_WINDOWS
19270b57cec5SDimitry Andric kmp_int32 last_upper;
19280b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
19290b57cec5SDimitry Andric } dispatch_private_info32_t;
19300b57cec5SDimitry Andric
19315f757f3fSDimitry Andric #if CACHE_LINE <= 128
19325f757f3fSDimitry Andric KMP_BUILD_ASSERT(sizeof(dispatch_private_info32_t) <= 128);
19335f757f3fSDimitry Andric #endif
19345f757f3fSDimitry Andric
19350b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
19360b57cec5SDimitry Andric kmp_int64 count; // current chunk number for static & static-steal scheduling
19370b57cec5SDimitry Andric kmp_int64 ub; /* upper-bound */
19380b57cec5SDimitry Andric /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
19390b57cec5SDimitry Andric kmp_int64 lb; /* lower-bound */
19400b57cec5SDimitry Andric kmp_int64 st; /* stride */
19410b57cec5SDimitry Andric kmp_int64 tc; /* trip count (number of iterations) */
1942fe6060f1SDimitry Andric kmp_lock_t *steal_lock; // lock used for chunk stealing
19435f757f3fSDimitry Andric
19445f757f3fSDimitry Andric kmp_uint64 ordered_lower;
19455f757f3fSDimitry Andric kmp_uint64 ordered_upper;
19460b57cec5SDimitry Andric /* parm[1-4] are used in different ways by different scheduling algorithms */
19470b57cec5SDimitry Andric
19480b57cec5SDimitry Andric // KMP_ALIGN(32) ensures ( if the KMP_ALIGN macro is turned on )
19490b57cec5SDimitry Andric // a) parm3 is properly aligned and
19500b57cec5SDimitry Andric // b) all parm1-4 are in the same cache line.
19510b57cec5SDimitry Andric // Because of parm1-4 are used together, performance seems to be better
19520b57cec5SDimitry Andric // if they are in the same line (not measured though).
19530b57cec5SDimitry Andric struct KMP_ALIGN(32) {
19540b57cec5SDimitry Andric kmp_int64 parm1;
19550b57cec5SDimitry Andric kmp_int64 parm2;
19560b57cec5SDimitry Andric kmp_int64 parm3;
19570b57cec5SDimitry Andric kmp_int64 parm4;
19580b57cec5SDimitry Andric };
19590b57cec5SDimitry Andric
19605f757f3fSDimitry Andric #if KMP_WEIGHTED_ITERATIONS_SUPPORTED
19615f757f3fSDimitry Andric kmp_uint64 pchunks;
19625f757f3fSDimitry Andric kmp_uint64 num_procs_with_pcore;
19635f757f3fSDimitry Andric kmp_int64 first_thread_with_ecore;
19645f757f3fSDimitry Andric #endif
19655f757f3fSDimitry Andric
19660b57cec5SDimitry Andric #if KMP_OS_WINDOWS
19670b57cec5SDimitry Andric kmp_int64 last_upper;
19680b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
19690b57cec5SDimitry Andric } dispatch_private_info64_t;
19705f757f3fSDimitry Andric
19715f757f3fSDimitry Andric #if CACHE_LINE <= 128
19725f757f3fSDimitry Andric KMP_BUILD_ASSERT(sizeof(dispatch_private_info64_t) <= 128);
19735f757f3fSDimitry Andric #endif
19745f757f3fSDimitry Andric
19750b57cec5SDimitry Andric #else /* KMP_STATIC_STEAL_ENABLED */
19760b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
19770b57cec5SDimitry Andric kmp_int32 lb;
19780b57cec5SDimitry Andric kmp_int32 ub;
19790b57cec5SDimitry Andric kmp_int32 st;
19800b57cec5SDimitry Andric kmp_int32 tc;
19810b57cec5SDimitry Andric
19820b57cec5SDimitry Andric kmp_int32 parm1;
19830b57cec5SDimitry Andric kmp_int32 parm2;
19840b57cec5SDimitry Andric kmp_int32 parm3;
19850b57cec5SDimitry Andric kmp_int32 parm4;
19860b57cec5SDimitry Andric
19870b57cec5SDimitry Andric kmp_int32 count;
19880b57cec5SDimitry Andric
19890b57cec5SDimitry Andric kmp_uint32 ordered_lower;
19900b57cec5SDimitry Andric kmp_uint32 ordered_upper;
19910b57cec5SDimitry Andric #if KMP_OS_WINDOWS
19920b57cec5SDimitry Andric kmp_int32 last_upper;
19930b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
19940b57cec5SDimitry Andric } dispatch_private_info32_t;
19950b57cec5SDimitry Andric
19960b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
19970b57cec5SDimitry Andric kmp_int64 lb; /* lower-bound */
19980b57cec5SDimitry Andric kmp_int64 ub; /* upper-bound */
19990b57cec5SDimitry Andric kmp_int64 st; /* stride */
20000b57cec5SDimitry Andric kmp_int64 tc; /* trip count (number of iterations) */
20010b57cec5SDimitry Andric
20020b57cec5SDimitry Andric /* parm[1-4] are used in different ways by different scheduling algorithms */
20030b57cec5SDimitry Andric kmp_int64 parm1;
20040b57cec5SDimitry Andric kmp_int64 parm2;
20050b57cec5SDimitry Andric kmp_int64 parm3;
20060b57cec5SDimitry Andric kmp_int64 parm4;
20070b57cec5SDimitry Andric
20080b57cec5SDimitry Andric kmp_int64 count; /* current chunk number for static scheduling */
20090b57cec5SDimitry Andric
20100b57cec5SDimitry Andric kmp_uint64 ordered_lower;
20110b57cec5SDimitry Andric kmp_uint64 ordered_upper;
20120b57cec5SDimitry Andric #if KMP_OS_WINDOWS
20130b57cec5SDimitry Andric kmp_int64 last_upper;
20140b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
20150b57cec5SDimitry Andric } dispatch_private_info64_t;
20160b57cec5SDimitry Andric #endif /* KMP_STATIC_STEAL_ENABLED */
20170b57cec5SDimitry Andric
20180b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE dispatch_private_info {
20190b57cec5SDimitry Andric union private_info {
20200b57cec5SDimitry Andric dispatch_private_info32_t p32;
20210b57cec5SDimitry Andric dispatch_private_info64_t p64;
20220b57cec5SDimitry Andric } u;
20230b57cec5SDimitry Andric enum sched_type schedule; /* scheduling algorithm */
20240b57cec5SDimitry Andric kmp_sched_flags_t flags; /* flags (e.g., ordered, nomerge, etc.) */
2025fe6060f1SDimitry Andric std::atomic<kmp_uint32> steal_flag; // static_steal only, state of a buffer
20260b57cec5SDimitry Andric kmp_int32 ordered_bumped;
20270b57cec5SDimitry Andric // Stack of buffers for nest of serial regions
20280b57cec5SDimitry Andric struct dispatch_private_info *next;
20290b57cec5SDimitry Andric kmp_int32 type_size; /* the size of types in private_info */
20300b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
20310b57cec5SDimitry Andric kmp_int32 hier_id;
20320b57cec5SDimitry Andric void *parent; /* hierarchical scheduling parent pointer */
20330b57cec5SDimitry Andric #endif
20340b57cec5SDimitry Andric enum cons_type pushed_ws;
20350b57cec5SDimitry Andric } dispatch_private_info_t;
20360b57cec5SDimitry Andric
20370b57cec5SDimitry Andric typedef struct dispatch_shared_info32 {
20380b57cec5SDimitry Andric /* chunk index under dynamic, number of idle threads under static-steal;
20390b57cec5SDimitry Andric iteration index otherwise */
20400b57cec5SDimitry Andric volatile kmp_uint32 iteration;
2041fe6060f1SDimitry Andric volatile kmp_int32 num_done;
20420b57cec5SDimitry Andric volatile kmp_uint32 ordered_iteration;
20430b57cec5SDimitry Andric // Dummy to retain the structure size after making ordered_iteration scalar
20440b57cec5SDimitry Andric kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 1];
20450b57cec5SDimitry Andric } dispatch_shared_info32_t;
20460b57cec5SDimitry Andric
20470b57cec5SDimitry Andric typedef struct dispatch_shared_info64 {
20480b57cec5SDimitry Andric /* chunk index under dynamic, number of idle threads under static-steal;
20490b57cec5SDimitry Andric iteration index otherwise */
20500b57cec5SDimitry Andric volatile kmp_uint64 iteration;
2051fe6060f1SDimitry Andric volatile kmp_int64 num_done;
20520b57cec5SDimitry Andric volatile kmp_uint64 ordered_iteration;
20530b57cec5SDimitry Andric // Dummy to retain the structure size after making ordered_iteration scalar
20540b57cec5SDimitry Andric kmp_int64 ordered_dummy[KMP_MAX_ORDERED - 3];
20550b57cec5SDimitry Andric } dispatch_shared_info64_t;
20560b57cec5SDimitry Andric
20570b57cec5SDimitry Andric typedef struct dispatch_shared_info {
20580b57cec5SDimitry Andric union shared_info {
20590b57cec5SDimitry Andric dispatch_shared_info32_t s32;
20600b57cec5SDimitry Andric dispatch_shared_info64_t s64;
20610b57cec5SDimitry Andric } u;
20620b57cec5SDimitry Andric volatile kmp_uint32 buffer_index;
20630b57cec5SDimitry Andric volatile kmp_int32 doacross_buf_idx; // teamwise index
20640b57cec5SDimitry Andric volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1)
20650b57cec5SDimitry Andric kmp_int32 doacross_num_done; // count finished threads
20660b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
20670b57cec5SDimitry Andric void *hier;
20680b57cec5SDimitry Andric #endif
20690b57cec5SDimitry Andric #if KMP_USE_HWLOC
20700b57cec5SDimitry Andric // When linking with libhwloc, the ORDERED EPCC test slows down on big
20710b57cec5SDimitry Andric // machines (> 48 cores). Performance analysis showed that a cache thrash
20720b57cec5SDimitry Andric // was occurring and this padding helps alleviate the problem.
20730b57cec5SDimitry Andric char padding[64];
20740b57cec5SDimitry Andric #endif
20750b57cec5SDimitry Andric } dispatch_shared_info_t;
20760b57cec5SDimitry Andric
20770b57cec5SDimitry Andric typedef struct kmp_disp {
20780b57cec5SDimitry Andric /* Vector for ORDERED SECTION */
20790b57cec5SDimitry Andric void (*th_deo_fcn)(int *gtid, int *cid, ident_t *);
20800b57cec5SDimitry Andric /* Vector for END ORDERED SECTION */
20810b57cec5SDimitry Andric void (*th_dxo_fcn)(int *gtid, int *cid, ident_t *);
20820b57cec5SDimitry Andric
20830b57cec5SDimitry Andric dispatch_shared_info_t *th_dispatch_sh_current;
20840b57cec5SDimitry Andric dispatch_private_info_t *th_dispatch_pr_current;
20850b57cec5SDimitry Andric
20860b57cec5SDimitry Andric dispatch_private_info_t *th_disp_buffer;
2087fe6060f1SDimitry Andric kmp_uint32 th_disp_index;
20880b57cec5SDimitry Andric kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index
20890b57cec5SDimitry Andric volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags
20900b57cec5SDimitry Andric kmp_int64 *th_doacross_info; // info on loop bounds
20910b57cec5SDimitry Andric #if KMP_USE_INTERNODE_ALIGNMENT
20920b57cec5SDimitry Andric char more_padding[INTERNODE_CACHE_LINE];
20930b57cec5SDimitry Andric #endif
20940b57cec5SDimitry Andric } kmp_disp_t;
20950b57cec5SDimitry Andric
20960b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
20970b57cec5SDimitry Andric /* Barrier stuff */
20980b57cec5SDimitry Andric
20990b57cec5SDimitry Andric /* constants for barrier state update */
21000b57cec5SDimitry Andric #define KMP_INIT_BARRIER_STATE 0 /* should probably start from zero */
21010b57cec5SDimitry Andric #define KMP_BARRIER_SLEEP_BIT 0 /* bit used for suspend/sleep part of state */
21020b57cec5SDimitry Andric #define KMP_BARRIER_UNUSED_BIT 1 // bit that must never be set for valid state
21030b57cec5SDimitry Andric #define KMP_BARRIER_BUMP_BIT 2 /* lsb used for bump of go/arrived state */
21040b57cec5SDimitry Andric
21050b57cec5SDimitry Andric #define KMP_BARRIER_SLEEP_STATE (1 << KMP_BARRIER_SLEEP_BIT)
21060b57cec5SDimitry Andric #define KMP_BARRIER_UNUSED_STATE (1 << KMP_BARRIER_UNUSED_BIT)
21070b57cec5SDimitry Andric #define KMP_BARRIER_STATE_BUMP (1 << KMP_BARRIER_BUMP_BIT)
21080b57cec5SDimitry Andric
21090b57cec5SDimitry Andric #if (KMP_BARRIER_SLEEP_BIT >= KMP_BARRIER_BUMP_BIT)
21100b57cec5SDimitry Andric #error "Barrier sleep bit must be smaller than barrier bump bit"
21110b57cec5SDimitry Andric #endif
21120b57cec5SDimitry Andric #if (KMP_BARRIER_UNUSED_BIT >= KMP_BARRIER_BUMP_BIT)
21130b57cec5SDimitry Andric #error "Barrier unused bit must be smaller than barrier bump bit"
21140b57cec5SDimitry Andric #endif
21150b57cec5SDimitry Andric
21160b57cec5SDimitry Andric // Constants for release barrier wait state: currently, hierarchical only
21170b57cec5SDimitry Andric #define KMP_BARRIER_NOT_WAITING 0 // Normal state; worker not in wait_sleep
21180b57cec5SDimitry Andric #define KMP_BARRIER_OWN_FLAG \
21190b57cec5SDimitry Andric 1 // Normal state; worker waiting on own b_go flag in release
21200b57cec5SDimitry Andric #define KMP_BARRIER_PARENT_FLAG \
21210b57cec5SDimitry Andric 2 // Special state; worker waiting on parent's b_go flag in release
21220b57cec5SDimitry Andric #define KMP_BARRIER_SWITCH_TO_OWN_FLAG \
21230b57cec5SDimitry Andric 3 // Special state; tells worker to shift from parent to own b_go
21240b57cec5SDimitry Andric #define KMP_BARRIER_SWITCHING \
21250b57cec5SDimitry Andric 4 // Special state; worker resets appropriate flag on wake-up
21260b57cec5SDimitry Andric
21270b57cec5SDimitry Andric #define KMP_NOT_SAFE_TO_REAP \
21280b57cec5SDimitry Andric 0 // Thread th_reap_state: not safe to reap (tasking)
21290b57cec5SDimitry Andric #define KMP_SAFE_TO_REAP 1 // Thread th_reap_state: safe to reap (not tasking)
21300b57cec5SDimitry Andric
2131349cc55cSDimitry Andric // The flag_type describes the storage used for the flag.
2132349cc55cSDimitry Andric enum flag_type {
2133349cc55cSDimitry Andric flag32, /**< atomic 32 bit flags */
2134349cc55cSDimitry Andric flag64, /**< 64 bit flags */
2135349cc55cSDimitry Andric atomic_flag64, /**< atomic 64 bit flags */
2136349cc55cSDimitry Andric flag_oncore, /**< special 64-bit flag for on-core barrier (hierarchical) */
2137349cc55cSDimitry Andric flag_unset
2138349cc55cSDimitry Andric };
2139349cc55cSDimitry Andric
21400b57cec5SDimitry Andric enum barrier_type {
21410b57cec5SDimitry Andric bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction
21420b57cec5SDimitry Andric barriers if enabled) */
21430b57cec5SDimitry Andric bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */
21440b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER
21450b57cec5SDimitry Andric bs_reduction_barrier, /* 2, All barriers that are used in reduction */
21460b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER
21470b57cec5SDimitry Andric bs_last_barrier /* Just a placeholder to mark the end */
21480b57cec5SDimitry Andric };
21490b57cec5SDimitry Andric
21500b57cec5SDimitry Andric // to work with reduction barriers just like with plain barriers
21510b57cec5SDimitry Andric #if !KMP_FAST_REDUCTION_BARRIER
21520b57cec5SDimitry Andric #define bs_reduction_barrier bs_plain_barrier
21530b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER
21540b57cec5SDimitry Andric
21550b57cec5SDimitry Andric typedef enum kmp_bar_pat { /* Barrier communication patterns */
21560b57cec5SDimitry Andric bp_linear_bar =
21570b57cec5SDimitry Andric 0, /* Single level (degenerate) tree */
21580b57cec5SDimitry Andric bp_tree_bar =
21590b57cec5SDimitry Andric 1, /* Balanced tree with branching factor 2^n */
2160fe6060f1SDimitry Andric bp_hyper_bar = 2, /* Hypercube-embedded tree with min
2161fe6060f1SDimitry Andric branching factor 2^n */
21620b57cec5SDimitry Andric bp_hierarchical_bar = 3, /* Machine hierarchy tree */
2163349cc55cSDimitry Andric bp_dist_bar = 4, /* Distributed barrier */
21640b57cec5SDimitry Andric bp_last_bar /* Placeholder to mark the end */
21650b57cec5SDimitry Andric } kmp_bar_pat_e;
21660b57cec5SDimitry Andric
21670b57cec5SDimitry Andric #define KMP_BARRIER_ICV_PUSH 1
21680b57cec5SDimitry Andric
21690b57cec5SDimitry Andric /* Record for holding the values of the internal controls stack records */
21700b57cec5SDimitry Andric typedef struct kmp_internal_control {
21710b57cec5SDimitry Andric int serial_nesting_level; /* corresponds to the value of the
21720b57cec5SDimitry Andric th_team_serialized field */
21730b57cec5SDimitry Andric kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per
21740b57cec5SDimitry Andric thread) */
21750b57cec5SDimitry Andric kmp_int8
21760b57cec5SDimitry Andric bt_set; /* internal control for whether blocktime is explicitly set */
21770b57cec5SDimitry Andric int blocktime; /* internal control for blocktime */
21780b57cec5SDimitry Andric #if KMP_USE_MONITOR
21790b57cec5SDimitry Andric int bt_intervals; /* internal control for blocktime intervals */
21800b57cec5SDimitry Andric #endif
21810b57cec5SDimitry Andric int nproc; /* internal control for #threads for next parallel region (per
21820b57cec5SDimitry Andric thread) */
21830b57cec5SDimitry Andric int thread_limit; /* internal control for thread-limit-var */
21845f757f3fSDimitry Andric int task_thread_limit; /* internal control for thread-limit-var of a task*/
21850b57cec5SDimitry Andric int max_active_levels; /* internal control for max_active_levels */
21860b57cec5SDimitry Andric kmp_r_sched_t
21870b57cec5SDimitry Andric sched; /* internal control for runtime schedule {sched,chunk} pair */
21880b57cec5SDimitry Andric kmp_proc_bind_t proc_bind; /* internal control for affinity */
21890b57cec5SDimitry Andric kmp_int32 default_device; /* internal control for default device */
21900b57cec5SDimitry Andric struct kmp_internal_control *next;
21910b57cec5SDimitry Andric } kmp_internal_control_t;
21920b57cec5SDimitry Andric
copy_icvs(kmp_internal_control_t * dst,kmp_internal_control_t * src)21930b57cec5SDimitry Andric static inline void copy_icvs(kmp_internal_control_t *dst,
21940b57cec5SDimitry Andric kmp_internal_control_t *src) {
21950b57cec5SDimitry Andric *dst = *src;
21960b57cec5SDimitry Andric }
21970b57cec5SDimitry Andric
21980b57cec5SDimitry Andric /* Thread barrier needs volatile barrier fields */
21990b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE kmp_bstate {
22000b57cec5SDimitry Andric // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all
22010b57cec5SDimitry Andric // uses of it). It is not explicitly aligned below, because we *don't* want
22020b57cec5SDimitry Andric // it to be padded -- instead, we fit b_go into the same cache line with
22030b57cec5SDimitry Andric // th_fixed_icvs, enabling NGO cache lines stores in the hierarchical barrier.
22040b57cec5SDimitry Andric kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread
22050b57cec5SDimitry Andric // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with
22060b57cec5SDimitry Andric // same NGO store
22070b57cec5SDimitry Andric volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical)
22080b57cec5SDimitry Andric KMP_ALIGN_CACHE volatile kmp_uint64
22090b57cec5SDimitry Andric b_arrived; // STATE => task reached synch point.
22100b57cec5SDimitry Andric kmp_uint32 *skip_per_level;
22110b57cec5SDimitry Andric kmp_uint32 my_level;
22120b57cec5SDimitry Andric kmp_int32 parent_tid;
22130b57cec5SDimitry Andric kmp_int32 old_tid;
22140b57cec5SDimitry Andric kmp_uint32 depth;
22150b57cec5SDimitry Andric struct kmp_bstate *parent_bar;
22160b57cec5SDimitry Andric kmp_team_t *team;
22170b57cec5SDimitry Andric kmp_uint64 leaf_state;
22180b57cec5SDimitry Andric kmp_uint32 nproc;
22190b57cec5SDimitry Andric kmp_uint8 base_leaf_kids;
22200b57cec5SDimitry Andric kmp_uint8 leaf_kids;
22210b57cec5SDimitry Andric kmp_uint8 offset;
22220b57cec5SDimitry Andric kmp_uint8 wait_flag;
22230b57cec5SDimitry Andric kmp_uint8 use_oncore_barrier;
22240b57cec5SDimitry Andric #if USE_DEBUGGER
22250b57cec5SDimitry Andric // The following field is intended for the debugger solely. Only the worker
22260b57cec5SDimitry Andric // thread itself accesses this field: the worker increases it by 1 when it
22270b57cec5SDimitry Andric // arrives to a barrier.
22280b57cec5SDimitry Andric KMP_ALIGN_CACHE kmp_uint b_worker_arrived;
22290b57cec5SDimitry Andric #endif /* USE_DEBUGGER */
22300b57cec5SDimitry Andric } kmp_bstate_t;
22310b57cec5SDimitry Andric
22320b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_barrier_union {
22330b57cec5SDimitry Andric double b_align; /* use worst case alignment */
22340b57cec5SDimitry Andric char b_pad[KMP_PAD(kmp_bstate_t, CACHE_LINE)];
22350b57cec5SDimitry Andric kmp_bstate_t bb;
22360b57cec5SDimitry Andric };
22370b57cec5SDimitry Andric
22380b57cec5SDimitry Andric typedef union kmp_barrier_union kmp_balign_t;
22390b57cec5SDimitry Andric
22400b57cec5SDimitry Andric /* Team barrier needs only non-volatile arrived counter */
22410b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_barrier_team_union {
22420b57cec5SDimitry Andric double b_align; /* use worst case alignment */
22430b57cec5SDimitry Andric char b_pad[CACHE_LINE];
22440b57cec5SDimitry Andric struct {
22450b57cec5SDimitry Andric kmp_uint64 b_arrived; /* STATE => task reached synch point. */
22460b57cec5SDimitry Andric #if USE_DEBUGGER
22470b57cec5SDimitry Andric // The following two fields are indended for the debugger solely. Only
2248fe6060f1SDimitry Andric // primary thread of the team accesses these fields: the first one is
2249fe6060f1SDimitry Andric // increased by 1 when the primary thread arrives to a barrier, the second
2250fe6060f1SDimitry Andric // one is increased by one when all the threads arrived.
22510b57cec5SDimitry Andric kmp_uint b_master_arrived;
22520b57cec5SDimitry Andric kmp_uint b_team_arrived;
22530b57cec5SDimitry Andric #endif
22540b57cec5SDimitry Andric };
22550b57cec5SDimitry Andric };
22560b57cec5SDimitry Andric
22570b57cec5SDimitry Andric typedef union kmp_barrier_team_union kmp_balign_team_t;
22580b57cec5SDimitry Andric
22590b57cec5SDimitry Andric /* Padding for Linux* OS pthreads condition variables and mutexes used to signal
22600b57cec5SDimitry Andric threads when a condition changes. This is to workaround an NPTL bug where
22610b57cec5SDimitry Andric padding was added to pthread_cond_t which caused the initialization routine
22620b57cec5SDimitry Andric to write outside of the structure if compiled on pre-NPTL threads. */
22630b57cec5SDimitry Andric #if KMP_OS_WINDOWS
22640b57cec5SDimitry Andric typedef struct kmp_win32_mutex {
22650b57cec5SDimitry Andric /* The Lock */
22660b57cec5SDimitry Andric CRITICAL_SECTION cs;
22670b57cec5SDimitry Andric } kmp_win32_mutex_t;
22680b57cec5SDimitry Andric
22690b57cec5SDimitry Andric typedef struct kmp_win32_cond {
22700b57cec5SDimitry Andric /* Count of the number of waiters. */
22710b57cec5SDimitry Andric int waiters_count_;
22720b57cec5SDimitry Andric
22730b57cec5SDimitry Andric /* Serialize access to <waiters_count_> */
22740b57cec5SDimitry Andric kmp_win32_mutex_t waiters_count_lock_;
22750b57cec5SDimitry Andric
22760b57cec5SDimitry Andric /* Number of threads to release via a <cond_broadcast> or a <cond_signal> */
22770b57cec5SDimitry Andric int release_count_;
22780b57cec5SDimitry Andric
22790b57cec5SDimitry Andric /* Keeps track of the current "generation" so that we don't allow */
22800b57cec5SDimitry Andric /* one thread to steal all the "releases" from the broadcast. */
22810b57cec5SDimitry Andric int wait_generation_count_;
22820b57cec5SDimitry Andric
22830b57cec5SDimitry Andric /* A manual-reset event that's used to block and release waiting threads. */
22840b57cec5SDimitry Andric HANDLE event_;
22850b57cec5SDimitry Andric } kmp_win32_cond_t;
22860b57cec5SDimitry Andric #endif
22870b57cec5SDimitry Andric
22880b57cec5SDimitry Andric #if KMP_OS_UNIX
22890b57cec5SDimitry Andric
22900b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_cond_union {
22910b57cec5SDimitry Andric double c_align;
22920b57cec5SDimitry Andric char c_pad[CACHE_LINE];
22930b57cec5SDimitry Andric pthread_cond_t c_cond;
22940b57cec5SDimitry Andric };
22950b57cec5SDimitry Andric
22960b57cec5SDimitry Andric typedef union kmp_cond_union kmp_cond_align_t;
22970b57cec5SDimitry Andric
22980b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_mutex_union {
22990b57cec5SDimitry Andric double m_align;
23000b57cec5SDimitry Andric char m_pad[CACHE_LINE];
23010b57cec5SDimitry Andric pthread_mutex_t m_mutex;
23020b57cec5SDimitry Andric };
23030b57cec5SDimitry Andric
23040b57cec5SDimitry Andric typedef union kmp_mutex_union kmp_mutex_align_t;
23050b57cec5SDimitry Andric
23060b57cec5SDimitry Andric #endif /* KMP_OS_UNIX */
23070b57cec5SDimitry Andric
23080b57cec5SDimitry Andric typedef struct kmp_desc_base {
23090b57cec5SDimitry Andric void *ds_stackbase;
23100b57cec5SDimitry Andric size_t ds_stacksize;
23110b57cec5SDimitry Andric int ds_stackgrow;
23120b57cec5SDimitry Andric kmp_thread_t ds_thread;
23130b57cec5SDimitry Andric volatile int ds_tid;
23140b57cec5SDimitry Andric int ds_gtid;
23150b57cec5SDimitry Andric #if KMP_OS_WINDOWS
23160b57cec5SDimitry Andric volatile int ds_alive;
23170b57cec5SDimitry Andric DWORD ds_thread_id;
23180b57cec5SDimitry Andric /* ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes.
23190b57cec5SDimitry Andric However, debugger support (libomp_db) cannot work with handles, because they
23200b57cec5SDimitry Andric uncomparable. For example, debugger requests info about thread with handle h.
23210b57cec5SDimitry Andric h is valid within debugger process, and meaningless within debugee process.
23220b57cec5SDimitry Andric Even if h is duped by call to DuplicateHandle(), so the result h' is valid
23230b57cec5SDimitry Andric within debugee process, but it is a *new* handle which does *not* equal to
23240b57cec5SDimitry Andric any other handle in debugee... The only way to compare handles is convert
23250b57cec5SDimitry Andric them to system-wide ids. GetThreadId() function is available only in
23260b57cec5SDimitry Andric Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is available
23270b57cec5SDimitry Andric on all Windows* OS flavours (including Windows* 95). Thus, we have to get
23280b57cec5SDimitry Andric thread id by call to GetCurrentThreadId() from within the thread and save it
23290b57cec5SDimitry Andric to let libomp_db identify threads. */
23300b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
23310b57cec5SDimitry Andric } kmp_desc_base_t;
23320b57cec5SDimitry Andric
23330b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_desc {
23340b57cec5SDimitry Andric double ds_align; /* use worst case alignment */
23350b57cec5SDimitry Andric char ds_pad[KMP_PAD(kmp_desc_base_t, CACHE_LINE)];
23360b57cec5SDimitry Andric kmp_desc_base_t ds;
23370b57cec5SDimitry Andric } kmp_desc_t;
23380b57cec5SDimitry Andric
23390b57cec5SDimitry Andric typedef struct kmp_local {
23400b57cec5SDimitry Andric volatile int this_construct; /* count of single's encountered by thread */
23410b57cec5SDimitry Andric void *reduce_data;
23420b57cec5SDimitry Andric #if KMP_USE_BGET
23430b57cec5SDimitry Andric void *bget_data;
23440b57cec5SDimitry Andric void *bget_list;
23450b57cec5SDimitry Andric #if !USE_CMP_XCHG_FOR_BGET
23460b57cec5SDimitry Andric #ifdef USE_QUEUING_LOCK_FOR_BGET
23470b57cec5SDimitry Andric kmp_lock_t bget_lock; /* Lock for accessing bget free list */
23480b57cec5SDimitry Andric #else
23490b57cec5SDimitry Andric kmp_bootstrap_lock_t bget_lock; // Lock for accessing bget free list. Must be
23500b57cec5SDimitry Andric // bootstrap lock so we can use it at library
23510b57cec5SDimitry Andric // shutdown.
23520b57cec5SDimitry Andric #endif /* USE_LOCK_FOR_BGET */
23530b57cec5SDimitry Andric #endif /* ! USE_CMP_XCHG_FOR_BGET */
23540b57cec5SDimitry Andric #endif /* KMP_USE_BGET */
23550b57cec5SDimitry Andric
23560b57cec5SDimitry Andric PACKED_REDUCTION_METHOD_T
23570b57cec5SDimitry Andric packed_reduction_method; /* stored by __kmpc_reduce*(), used by
23580b57cec5SDimitry Andric __kmpc_end_reduce*() */
23590b57cec5SDimitry Andric
23600b57cec5SDimitry Andric } kmp_local_t;
23610b57cec5SDimitry Andric
23620b57cec5SDimitry Andric #define KMP_CHECK_UPDATE(a, b) \
23630b57cec5SDimitry Andric if ((a) != (b)) \
23640b57cec5SDimitry Andric (a) = (b)
23650b57cec5SDimitry Andric #define KMP_CHECK_UPDATE_SYNC(a, b) \
23660b57cec5SDimitry Andric if ((a) != (b)) \
23670b57cec5SDimitry Andric TCW_SYNC_PTR((a), (b))
23680b57cec5SDimitry Andric
23690b57cec5SDimitry Andric #define get__blocktime(xteam, xtid) \
23700b57cec5SDimitry Andric ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime)
23710b57cec5SDimitry Andric #define get__bt_set(xteam, xtid) \
23720b57cec5SDimitry Andric ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set)
23730b57cec5SDimitry Andric #if KMP_USE_MONITOR
23740b57cec5SDimitry Andric #define get__bt_intervals(xteam, xtid) \
23750b57cec5SDimitry Andric ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals)
23760b57cec5SDimitry Andric #endif
23770b57cec5SDimitry Andric
23780b57cec5SDimitry Andric #define get__dynamic_2(xteam, xtid) \
23790b57cec5SDimitry Andric ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic)
23800b57cec5SDimitry Andric #define get__nproc_2(xteam, xtid) \
23810b57cec5SDimitry Andric ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc)
23820b57cec5SDimitry Andric #define get__sched_2(xteam, xtid) \
23830b57cec5SDimitry Andric ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched)
23840b57cec5SDimitry Andric
23850b57cec5SDimitry Andric #define set__blocktime_team(xteam, xtid, xval) \
23860b57cec5SDimitry Andric (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) = \
23870b57cec5SDimitry Andric (xval))
23880b57cec5SDimitry Andric
23890b57cec5SDimitry Andric #if KMP_USE_MONITOR
23900b57cec5SDimitry Andric #define set__bt_intervals_team(xteam, xtid, xval) \
23910b57cec5SDimitry Andric (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) = \
23920b57cec5SDimitry Andric (xval))
23930b57cec5SDimitry Andric #endif
23940b57cec5SDimitry Andric
23950b57cec5SDimitry Andric #define set__bt_set_team(xteam, xtid, xval) \
23960b57cec5SDimitry Andric (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) = (xval))
23970b57cec5SDimitry Andric
23980b57cec5SDimitry Andric #define set__dynamic(xthread, xval) \
23990b57cec5SDimitry Andric (((xthread)->th.th_current_task->td_icvs.dynamic) = (xval))
24000b57cec5SDimitry Andric #define get__dynamic(xthread) \
24010b57cec5SDimitry Andric (((xthread)->th.th_current_task->td_icvs.dynamic) ? (FTN_TRUE) : (FTN_FALSE))
24020b57cec5SDimitry Andric
24030b57cec5SDimitry Andric #define set__nproc(xthread, xval) \
24040b57cec5SDimitry Andric (((xthread)->th.th_current_task->td_icvs.nproc) = (xval))
24050b57cec5SDimitry Andric
24060b57cec5SDimitry Andric #define set__thread_limit(xthread, xval) \
24070b57cec5SDimitry Andric (((xthread)->th.th_current_task->td_icvs.thread_limit) = (xval))
24080b57cec5SDimitry Andric
24090b57cec5SDimitry Andric #define set__max_active_levels(xthread, xval) \
24100b57cec5SDimitry Andric (((xthread)->th.th_current_task->td_icvs.max_active_levels) = (xval))
24110b57cec5SDimitry Andric
24120b57cec5SDimitry Andric #define get__max_active_levels(xthread) \
24130b57cec5SDimitry Andric ((xthread)->th.th_current_task->td_icvs.max_active_levels)
24140b57cec5SDimitry Andric
24150b57cec5SDimitry Andric #define set__sched(xthread, xval) \
24160b57cec5SDimitry Andric (((xthread)->th.th_current_task->td_icvs.sched) = (xval))
24170b57cec5SDimitry Andric
24180b57cec5SDimitry Andric #define set__proc_bind(xthread, xval) \
24190b57cec5SDimitry Andric (((xthread)->th.th_current_task->td_icvs.proc_bind) = (xval))
24200b57cec5SDimitry Andric #define get__proc_bind(xthread) \
24210b57cec5SDimitry Andric ((xthread)->th.th_current_task->td_icvs.proc_bind)
24220b57cec5SDimitry Andric
24230b57cec5SDimitry Andric // OpenMP tasking data structures
24240b57cec5SDimitry Andric
24250b57cec5SDimitry Andric typedef enum kmp_tasking_mode {
24260b57cec5SDimitry Andric tskm_immediate_exec = 0,
24270b57cec5SDimitry Andric tskm_extra_barrier = 1,
24280b57cec5SDimitry Andric tskm_task_teams = 2,
24290b57cec5SDimitry Andric tskm_max = 2
24300b57cec5SDimitry Andric } kmp_tasking_mode_t;
24310b57cec5SDimitry Andric
24320b57cec5SDimitry Andric extern kmp_tasking_mode_t
24330b57cec5SDimitry Andric __kmp_tasking_mode; /* determines how/when to execute tasks */
24340b57cec5SDimitry Andric extern int __kmp_task_stealing_constraint;
24350b57cec5SDimitry Andric extern int __kmp_enable_task_throttling;
24360b57cec5SDimitry Andric extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if
24370b57cec5SDimitry Andric // specified, defaults to 0 otherwise
24380b57cec5SDimitry Andric // Set via OMP_MAX_TASK_PRIORITY if specified, defaults to 0 otherwise
24390b57cec5SDimitry Andric extern kmp_int32 __kmp_max_task_priority;
24400b57cec5SDimitry Andric // Set via KMP_TASKLOOP_MIN_TASKS if specified, defaults to 0 otherwise
24410b57cec5SDimitry Andric extern kmp_uint64 __kmp_taskloop_min_tasks;
24420b57cec5SDimitry Andric
24430b57cec5SDimitry Andric /* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with
24440b57cec5SDimitry Andric taskdata first */
24450b57cec5SDimitry Andric #define KMP_TASK_TO_TASKDATA(task) (((kmp_taskdata_t *)task) - 1)
24460b57cec5SDimitry Andric #define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *)(taskdata + 1)
24470b57cec5SDimitry Andric
24480b57cec5SDimitry Andric // The tt_found_tasks flag is a signal to all threads in the team that tasks
24490b57cec5SDimitry Andric // were spawned and queued since the previous barrier release.
24500b57cec5SDimitry Andric #define KMP_TASKING_ENABLED(task_team) \
2451e8d8bef9SDimitry Andric (TRUE == TCR_SYNC_4((task_team)->tt.tt_found_tasks))
24520b57cec5SDimitry Andric /*!
24530b57cec5SDimitry Andric @ingroup BASIC_TYPES
24540b57cec5SDimitry Andric @{
24550b57cec5SDimitry Andric */
24560b57cec5SDimitry Andric
24570b57cec5SDimitry Andric /*!
24580b57cec5SDimitry Andric */
24590b57cec5SDimitry Andric typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32, void *);
24600b57cec5SDimitry Andric
24610b57cec5SDimitry Andric typedef union kmp_cmplrdata {
24620b57cec5SDimitry Andric kmp_int32 priority; /**< priority specified by user for the task */
24630b57cec5SDimitry Andric kmp_routine_entry_t
24640b57cec5SDimitry Andric destructors; /* pointer to function to invoke deconstructors of
24650b57cec5SDimitry Andric firstprivate C++ objects */
24660b57cec5SDimitry Andric /* future data */
24670b57cec5SDimitry Andric } kmp_cmplrdata_t;
24680b57cec5SDimitry Andric
24690b57cec5SDimitry Andric /* sizeof_kmp_task_t passed as arg to kmpc_omp_task call */
24700b57cec5SDimitry Andric /*!
24710b57cec5SDimitry Andric */
24720b57cec5SDimitry Andric typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */
24730b57cec5SDimitry Andric void *shareds; /**< pointer to block of pointers to shared vars */
24740b57cec5SDimitry Andric kmp_routine_entry_t
24750b57cec5SDimitry Andric routine; /**< pointer to routine to call for executing task */
24760b57cec5SDimitry Andric kmp_int32 part_id; /**< part id for the task */
24770b57cec5SDimitry Andric kmp_cmplrdata_t
24780b57cec5SDimitry Andric data1; /* Two known optional additions: destructors and priority */
24790b57cec5SDimitry Andric kmp_cmplrdata_t data2; /* Process destructors first, priority second */
24800b57cec5SDimitry Andric /* future data */
24810b57cec5SDimitry Andric /* private vars */
24820b57cec5SDimitry Andric } kmp_task_t;
24830b57cec5SDimitry Andric
24840b57cec5SDimitry Andric /*!
24850b57cec5SDimitry Andric @}
24860b57cec5SDimitry Andric */
24870b57cec5SDimitry Andric
24880b57cec5SDimitry Andric typedef struct kmp_taskgroup {
24890b57cec5SDimitry Andric std::atomic<kmp_int32> count; // number of allocated and incomplete tasks
24900b57cec5SDimitry Andric std::atomic<kmp_int32>
24910b57cec5SDimitry Andric cancel_request; // request for cancellation of this taskgroup
24920b57cec5SDimitry Andric struct kmp_taskgroup *parent; // parent taskgroup
24930b57cec5SDimitry Andric // Block of data to perform task reduction
24940b57cec5SDimitry Andric void *reduce_data; // reduction related info
24950b57cec5SDimitry Andric kmp_int32 reduce_num_data; // number of data items to reduce
2496fe6060f1SDimitry Andric uintptr_t *gomp_data; // gomp reduction data
24970b57cec5SDimitry Andric } kmp_taskgroup_t;
24980b57cec5SDimitry Andric
24990b57cec5SDimitry Andric // forward declarations
25000b57cec5SDimitry Andric typedef union kmp_depnode kmp_depnode_t;
25010b57cec5SDimitry Andric typedef struct kmp_depnode_list kmp_depnode_list_t;
25020b57cec5SDimitry Andric typedef struct kmp_dephash_entry kmp_dephash_entry_t;
25030b57cec5SDimitry Andric
2504349cc55cSDimitry Andric // macros for checking dep flag as an integer
2505fe6060f1SDimitry Andric #define KMP_DEP_IN 0x1
2506fe6060f1SDimitry Andric #define KMP_DEP_OUT 0x2
2507fe6060f1SDimitry Andric #define KMP_DEP_INOUT 0x3
2508fe6060f1SDimitry Andric #define KMP_DEP_MTX 0x4
2509fe6060f1SDimitry Andric #define KMP_DEP_SET 0x8
2510349cc55cSDimitry Andric #define KMP_DEP_ALL 0x80
251174626c16SDimitry Andric // Compiler sends us this info. Note: some test cases contain an explicit copy
251274626c16SDimitry Andric // of this struct and should be in sync with any changes here.
25130b57cec5SDimitry Andric typedef struct kmp_depend_info {
25140b57cec5SDimitry Andric kmp_intptr_t base_addr;
25150b57cec5SDimitry Andric size_t len;
2516fe6060f1SDimitry Andric union {
2517349cc55cSDimitry Andric kmp_uint8 flag; // flag as an unsigned char
2518349cc55cSDimitry Andric struct { // flag as a set of 8 bits
2519439352acSDimitry Andric #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
25205f757f3fSDimitry Andric /* Same fields as in the #else branch, but in reverse order */
25215f757f3fSDimitry Andric unsigned all : 1;
25225f757f3fSDimitry Andric unsigned unused : 3;
25235f757f3fSDimitry Andric unsigned set : 1;
25245f757f3fSDimitry Andric unsigned mtx : 1;
25255f757f3fSDimitry Andric unsigned out : 1;
25265f757f3fSDimitry Andric unsigned in : 1;
25275f757f3fSDimitry Andric #else
2528fe6060f1SDimitry Andric unsigned in : 1;
2529fe6060f1SDimitry Andric unsigned out : 1;
2530fe6060f1SDimitry Andric unsigned mtx : 1;
2531fe6060f1SDimitry Andric unsigned set : 1;
2532349cc55cSDimitry Andric unsigned unused : 3;
2533349cc55cSDimitry Andric unsigned all : 1;
25345f757f3fSDimitry Andric #endif
25350b57cec5SDimitry Andric } flags;
2536fe6060f1SDimitry Andric };
25370b57cec5SDimitry Andric } kmp_depend_info_t;
25380b57cec5SDimitry Andric
25390b57cec5SDimitry Andric // Internal structures to work with task dependencies:
25400b57cec5SDimitry Andric struct kmp_depnode_list {
25410b57cec5SDimitry Andric kmp_depnode_t *node;
25420b57cec5SDimitry Andric kmp_depnode_list_t *next;
25430b57cec5SDimitry Andric };
25440b57cec5SDimitry Andric
25450b57cec5SDimitry Andric // Max number of mutexinoutset dependencies per node
25460b57cec5SDimitry Andric #define MAX_MTX_DEPS 4
25470b57cec5SDimitry Andric
25480b57cec5SDimitry Andric typedef struct kmp_base_depnode {
25490b57cec5SDimitry Andric kmp_depnode_list_t *successors; /* used under lock */
25500b57cec5SDimitry Andric kmp_task_t *task; /* non-NULL if depnode is active, used under lock */
25510b57cec5SDimitry Andric kmp_lock_t *mtx_locks[MAX_MTX_DEPS]; /* lock mutexinoutset dependent tasks */
25520b57cec5SDimitry Andric kmp_int32 mtx_num_locks; /* number of locks in mtx_locks array */
25530b57cec5SDimitry Andric kmp_lock_t lock; /* guards shared fields: task, successors */
25540b57cec5SDimitry Andric #if KMP_SUPPORT_GRAPH_OUTPUT
25550b57cec5SDimitry Andric kmp_uint32 id;
25560b57cec5SDimitry Andric #endif
25570b57cec5SDimitry Andric std::atomic<kmp_int32> npredecessors;
25580b57cec5SDimitry Andric std::atomic<kmp_int32> nrefs;
25590b57cec5SDimitry Andric } kmp_base_depnode_t;
25600b57cec5SDimitry Andric
25610b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_depnode {
25620b57cec5SDimitry Andric double dn_align; /* use worst case alignment */
25630b57cec5SDimitry Andric char dn_pad[KMP_PAD(kmp_base_depnode_t, CACHE_LINE)];
25640b57cec5SDimitry Andric kmp_base_depnode_t dn;
25650b57cec5SDimitry Andric };
25660b57cec5SDimitry Andric
25670b57cec5SDimitry Andric struct kmp_dephash_entry {
25680b57cec5SDimitry Andric kmp_intptr_t addr;
25690b57cec5SDimitry Andric kmp_depnode_t *last_out;
2570fe6060f1SDimitry Andric kmp_depnode_list_t *last_set;
2571fe6060f1SDimitry Andric kmp_depnode_list_t *prev_set;
2572fe6060f1SDimitry Andric kmp_uint8 last_flag;
25730b57cec5SDimitry Andric kmp_lock_t *mtx_lock; /* is referenced by depnodes w/mutexinoutset dep */
25740b57cec5SDimitry Andric kmp_dephash_entry_t *next_in_bucket;
25750b57cec5SDimitry Andric };
25760b57cec5SDimitry Andric
25770b57cec5SDimitry Andric typedef struct kmp_dephash {
25780b57cec5SDimitry Andric kmp_dephash_entry_t **buckets;
25790b57cec5SDimitry Andric size_t size;
2580349cc55cSDimitry Andric kmp_depnode_t *last_all;
2581489b1cf2SDimitry Andric size_t generation;
25820b57cec5SDimitry Andric kmp_uint32 nelements;
25830b57cec5SDimitry Andric kmp_uint32 nconflicts;
25840b57cec5SDimitry Andric } kmp_dephash_t;
25850b57cec5SDimitry Andric
25860b57cec5SDimitry Andric typedef struct kmp_task_affinity_info {
25870b57cec5SDimitry Andric kmp_intptr_t base_addr;
25880b57cec5SDimitry Andric size_t len;
25890b57cec5SDimitry Andric struct {
25900b57cec5SDimitry Andric bool flag1 : 1;
25910b57cec5SDimitry Andric bool flag2 : 1;
25920b57cec5SDimitry Andric kmp_int32 reserved : 30;
25930b57cec5SDimitry Andric } flags;
25940b57cec5SDimitry Andric } kmp_task_affinity_info_t;
25950b57cec5SDimitry Andric
25960b57cec5SDimitry Andric typedef enum kmp_event_type_t {
25970b57cec5SDimitry Andric KMP_EVENT_UNINITIALIZED = 0,
25980b57cec5SDimitry Andric KMP_EVENT_ALLOW_COMPLETION = 1
25990b57cec5SDimitry Andric } kmp_event_type_t;
26000b57cec5SDimitry Andric
26010b57cec5SDimitry Andric typedef struct {
26020b57cec5SDimitry Andric kmp_event_type_t type;
26030b57cec5SDimitry Andric kmp_tas_lock_t lock;
26040b57cec5SDimitry Andric union {
26050b57cec5SDimitry Andric kmp_task_t *task;
26060b57cec5SDimitry Andric } ed;
26070b57cec5SDimitry Andric } kmp_event_t;
26080b57cec5SDimitry Andric
260906c3fb27SDimitry Andric #if OMPX_TASKGRAPH
261006c3fb27SDimitry Andric // Initial number of allocated nodes while recording
261106c3fb27SDimitry Andric #define INIT_MAPSIZE 50
261206c3fb27SDimitry Andric
261306c3fb27SDimitry Andric typedef struct kmp_taskgraph_flags { /*This needs to be exactly 32 bits */
261406c3fb27SDimitry Andric unsigned nowait : 1;
261506c3fb27SDimitry Andric unsigned re_record : 1;
261606c3fb27SDimitry Andric unsigned reserved : 30;
261706c3fb27SDimitry Andric } kmp_taskgraph_flags_t;
261806c3fb27SDimitry Andric
261906c3fb27SDimitry Andric /// Represents a TDG node
262006c3fb27SDimitry Andric typedef struct kmp_node_info {
262106c3fb27SDimitry Andric kmp_task_t *task; // Pointer to the actual task
262206c3fb27SDimitry Andric kmp_int32 *successors; // Array of the succesors ids
262306c3fb27SDimitry Andric kmp_int32 nsuccessors; // Number of succesors of the node
262406c3fb27SDimitry Andric std::atomic<kmp_int32>
262506c3fb27SDimitry Andric npredecessors_counter; // Number of predessors on the fly
262606c3fb27SDimitry Andric kmp_int32 npredecessors; // Total number of predecessors
262706c3fb27SDimitry Andric kmp_int32 successors_size; // Number of allocated succesors ids
262806c3fb27SDimitry Andric kmp_taskdata_t *parent_task; // Parent implicit task
262906c3fb27SDimitry Andric } kmp_node_info_t;
263006c3fb27SDimitry Andric
263106c3fb27SDimitry Andric /// Represent a TDG's current status
263206c3fb27SDimitry Andric typedef enum kmp_tdg_status {
263306c3fb27SDimitry Andric KMP_TDG_NONE = 0,
263406c3fb27SDimitry Andric KMP_TDG_RECORDING = 1,
263506c3fb27SDimitry Andric KMP_TDG_READY = 2
263606c3fb27SDimitry Andric } kmp_tdg_status_t;
263706c3fb27SDimitry Andric
263806c3fb27SDimitry Andric /// Structure that contains a TDG
263906c3fb27SDimitry Andric typedef struct kmp_tdg_info {
264006c3fb27SDimitry Andric kmp_int32 tdg_id; // Unique idenfifier of the TDG
264106c3fb27SDimitry Andric kmp_taskgraph_flags_t tdg_flags; // Flags related to a TDG
264206c3fb27SDimitry Andric kmp_int32 map_size; // Number of allocated TDG nodes
264306c3fb27SDimitry Andric kmp_int32 num_roots; // Number of roots tasks int the TDG
264406c3fb27SDimitry Andric kmp_int32 *root_tasks; // Array of tasks identifiers that are roots
264506c3fb27SDimitry Andric kmp_node_info_t *record_map; // Array of TDG nodes
264606c3fb27SDimitry Andric kmp_tdg_status_t tdg_status =
264706c3fb27SDimitry Andric KMP_TDG_NONE; // Status of the TDG (recording, ready...)
264806c3fb27SDimitry Andric std::atomic<kmp_int32> num_tasks; // Number of TDG nodes
264906c3fb27SDimitry Andric kmp_bootstrap_lock_t
265006c3fb27SDimitry Andric graph_lock; // Protect graph attributes when updated via taskloop_recur
265106c3fb27SDimitry Andric // Taskloop reduction related
265206c3fb27SDimitry Andric void *rec_taskred_data; // Data to pass to __kmpc_task_reduction_init or
265306c3fb27SDimitry Andric // __kmpc_taskred_init
265406c3fb27SDimitry Andric kmp_int32 rec_num_taskred;
265506c3fb27SDimitry Andric } kmp_tdg_info_t;
265606c3fb27SDimitry Andric
265706c3fb27SDimitry Andric extern int __kmp_tdg_dot;
265806c3fb27SDimitry Andric extern kmp_int32 __kmp_max_tdgs;
265906c3fb27SDimitry Andric extern kmp_tdg_info_t **__kmp_global_tdgs;
266006c3fb27SDimitry Andric extern kmp_int32 __kmp_curr_tdg_idx;
266106c3fb27SDimitry Andric extern kmp_int32 __kmp_successors_size;
266206c3fb27SDimitry Andric extern std::atomic<kmp_int32> __kmp_tdg_task_id;
266306c3fb27SDimitry Andric extern kmp_int32 __kmp_num_tdg;
266406c3fb27SDimitry Andric #endif
266506c3fb27SDimitry Andric
26660b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK
26670b57cec5SDimitry Andric
26680b57cec5SDimitry Andric /* Tied Task stack definitions */
26690b57cec5SDimitry Andric typedef struct kmp_stack_block {
26700b57cec5SDimitry Andric kmp_taskdata_t *sb_block[TASK_STACK_BLOCK_SIZE];
26710b57cec5SDimitry Andric struct kmp_stack_block *sb_next;
26720b57cec5SDimitry Andric struct kmp_stack_block *sb_prev;
26730b57cec5SDimitry Andric } kmp_stack_block_t;
26740b57cec5SDimitry Andric
26750b57cec5SDimitry Andric typedef struct kmp_task_stack {
26760b57cec5SDimitry Andric kmp_stack_block_t ts_first_block; // first block of stack entries
26770b57cec5SDimitry Andric kmp_taskdata_t **ts_top; // pointer to the top of stack
26780b57cec5SDimitry Andric kmp_int32 ts_entries; // number of entries on the stack
26790b57cec5SDimitry Andric } kmp_task_stack_t;
26800b57cec5SDimitry Andric
26810b57cec5SDimitry Andric #endif // BUILD_TIED_TASK_STACK
26820b57cec5SDimitry Andric
26830b57cec5SDimitry Andric typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
2684439352acSDimitry Andric #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
26855f757f3fSDimitry Andric /* Same fields as in the #else branch, but in reverse order */
26865f757f3fSDimitry Andric #if OMPX_TASKGRAPH
2687*0fca6ea1SDimitry Andric unsigned reserved31 : 5;
26885f757f3fSDimitry Andric unsigned onced : 1;
26895f757f3fSDimitry Andric #else
2690*0fca6ea1SDimitry Andric unsigned reserved31 : 6;
26915f757f3fSDimitry Andric #endif
2692*0fca6ea1SDimitry Andric unsigned target : 1;
26935f757f3fSDimitry Andric unsigned native : 1;
26945f757f3fSDimitry Andric unsigned freed : 1;
26955f757f3fSDimitry Andric unsigned complete : 1;
26965f757f3fSDimitry Andric unsigned executing : 1;
26975f757f3fSDimitry Andric unsigned started : 1;
26985f757f3fSDimitry Andric unsigned team_serial : 1;
26995f757f3fSDimitry Andric unsigned tasking_ser : 1;
27005f757f3fSDimitry Andric unsigned task_serial : 1;
27015f757f3fSDimitry Andric unsigned tasktype : 1;
27025f757f3fSDimitry Andric unsigned reserved : 8;
27035f757f3fSDimitry Andric unsigned hidden_helper : 1;
27045f757f3fSDimitry Andric unsigned detachable : 1;
27055f757f3fSDimitry Andric unsigned priority_specified : 1;
27065f757f3fSDimitry Andric unsigned proxy : 1;
27075f757f3fSDimitry Andric unsigned destructors_thunk : 1;
27085f757f3fSDimitry Andric unsigned merged_if0 : 1;
27095f757f3fSDimitry Andric unsigned final : 1;
27105f757f3fSDimitry Andric unsigned tiedness : 1;
27115f757f3fSDimitry Andric #else
27120b57cec5SDimitry Andric /* Compiler flags */ /* Total compiler flags must be 16 bits */
27130b57cec5SDimitry Andric unsigned tiedness : 1; /* task is either tied (1) or untied (0) */
27140b57cec5SDimitry Andric unsigned final : 1; /* task is final(1) so execute immediately */
27150b57cec5SDimitry Andric unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0
27160b57cec5SDimitry Andric code path */
27170b57cec5SDimitry Andric unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to
27180b57cec5SDimitry Andric invoke destructors from the runtime */
27190b57cec5SDimitry Andric unsigned proxy : 1; /* task is a proxy task (it will be executed outside the
27200b57cec5SDimitry Andric context of the RTL) */
27210b57cec5SDimitry Andric unsigned priority_specified : 1; /* set if the compiler provides priority
27220b57cec5SDimitry Andric setting for the task */
27230b57cec5SDimitry Andric unsigned detachable : 1; /* 1 == can detach */
2724e8d8bef9SDimitry Andric unsigned hidden_helper : 1; /* 1 == hidden helper task */
2725e8d8bef9SDimitry Andric unsigned reserved : 8; /* reserved for compiler use */
27260b57cec5SDimitry Andric
27270b57cec5SDimitry Andric /* Library flags */ /* Total library flags must be 16 bits */
27280b57cec5SDimitry Andric unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */
27290b57cec5SDimitry Andric unsigned task_serial : 1; // task is executed immediately (1) or deferred (0)
27300b57cec5SDimitry Andric unsigned tasking_ser : 1; // all tasks in team are either executed immediately
27310b57cec5SDimitry Andric // (1) or may be deferred (0)
27320b57cec5SDimitry Andric unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel
27330b57cec5SDimitry Andric // (0) [>= 2 threads]
27340b57cec5SDimitry Andric /* If either team_serial or tasking_ser is set, task team may be NULL */
27350b57cec5SDimitry Andric /* Task State Flags: */
27360b57cec5SDimitry Andric unsigned started : 1; /* 1==started, 0==not started */
27370b57cec5SDimitry Andric unsigned executing : 1; /* 1==executing, 0==not executing */
27380b57cec5SDimitry Andric unsigned complete : 1; /* 1==complete, 0==not complete */
2739480093f4SDimitry Andric unsigned freed : 1; /* 1==freed, 0==allocated */
27400b57cec5SDimitry Andric unsigned native : 1; /* 1==gcc-compiled task, 0==intel */
2741*0fca6ea1SDimitry Andric unsigned target : 1;
274206c3fb27SDimitry Andric #if OMPX_TASKGRAPH
274306c3fb27SDimitry Andric unsigned onced : 1; /* 1==ran once already, 0==never ran, record & replay purposes */
2744*0fca6ea1SDimitry Andric unsigned reserved31 : 5; /* reserved for library use */
274506c3fb27SDimitry Andric #else
2746*0fca6ea1SDimitry Andric unsigned reserved31 : 6; /* reserved for library use */
274706c3fb27SDimitry Andric #endif
27485f757f3fSDimitry Andric #endif
27490b57cec5SDimitry Andric } kmp_tasking_flags_t;
27500b57cec5SDimitry Andric
2751bdd1243dSDimitry Andric typedef struct kmp_target_data {
2752bdd1243dSDimitry Andric void *async_handle; // libomptarget async handle for task completion query
2753bdd1243dSDimitry Andric } kmp_target_data_t;
2754bdd1243dSDimitry Andric
27550b57cec5SDimitry Andric struct kmp_taskdata { /* aligned during dynamic allocation */
27560b57cec5SDimitry Andric kmp_int32 td_task_id; /* id, assigned by debugger */
27570b57cec5SDimitry Andric kmp_tasking_flags_t td_flags; /* task flags */
27580b57cec5SDimitry Andric kmp_team_t *td_team; /* team for this task */
27590b57cec5SDimitry Andric kmp_info_p *td_alloc_thread; /* thread that allocated data structures */
27600b57cec5SDimitry Andric /* Currently not used except for perhaps IDB */
27610b57cec5SDimitry Andric kmp_taskdata_t *td_parent; /* parent task */
27620b57cec5SDimitry Andric kmp_int32 td_level; /* task nesting level */
27630b57cec5SDimitry Andric std::atomic<kmp_int32> td_untied_count; // untied task active parts counter
27640b57cec5SDimitry Andric ident_t *td_ident; /* task identifier */
27650b57cec5SDimitry Andric // Taskwait data.
27660b57cec5SDimitry Andric ident_t *td_taskwait_ident;
27670b57cec5SDimitry Andric kmp_uint32 td_taskwait_counter;
27680b57cec5SDimitry Andric kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */
27690b57cec5SDimitry Andric KMP_ALIGN_CACHE kmp_internal_control_t
27700b57cec5SDimitry Andric td_icvs; /* Internal control variables for the task */
27710b57cec5SDimitry Andric KMP_ALIGN_CACHE std::atomic<kmp_int32>
27720b57cec5SDimitry Andric td_allocated_child_tasks; /* Child tasks (+ current task) not yet
27730b57cec5SDimitry Andric deallocated */
27740b57cec5SDimitry Andric std::atomic<kmp_int32>
27750b57cec5SDimitry Andric td_incomplete_child_tasks; /* Child tasks not yet complete */
27760b57cec5SDimitry Andric kmp_taskgroup_t
27770b57cec5SDimitry Andric *td_taskgroup; // Each task keeps pointer to its current taskgroup
27780b57cec5SDimitry Andric kmp_dephash_t
27790b57cec5SDimitry Andric *td_dephash; // Dependencies for children tasks are tracked from here
27800b57cec5SDimitry Andric kmp_depnode_t
27810b57cec5SDimitry Andric *td_depnode; // Pointer to graph node if this task has dependencies
27820b57cec5SDimitry Andric kmp_task_team_t *td_task_team;
2783e8d8bef9SDimitry Andric size_t td_size_alloc; // Size of task structure, including shareds etc.
27840b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT)
27850b57cec5SDimitry Andric // 4 or 8 byte integers for the loop bounds in GOMP_taskloop
27860b57cec5SDimitry Andric kmp_int32 td_size_loop_bounds;
27870b57cec5SDimitry Andric #endif
27880b57cec5SDimitry Andric kmp_taskdata_t *td_last_tied; // keep tied task for task scheduling constraint
27890b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT)
27900b57cec5SDimitry Andric // GOMP sends in a copy function for copy constructors
27910b57cec5SDimitry Andric void (*td_copy_func)(void *, void *);
27920b57cec5SDimitry Andric #endif
27930b57cec5SDimitry Andric kmp_event_t td_allow_completion_event;
27940b57cec5SDimitry Andric #if OMPT_SUPPORT
27950b57cec5SDimitry Andric ompt_task_info_t ompt_task_info;
27960b57cec5SDimitry Andric #endif
279706c3fb27SDimitry Andric #if OMPX_TASKGRAPH
279806c3fb27SDimitry Andric bool is_taskgraph = 0; // whether the task is within a TDG
279906c3fb27SDimitry Andric kmp_tdg_info_t *tdg; // used to associate task with a TDG
280006c3fb27SDimitry Andric #endif
2801bdd1243dSDimitry Andric kmp_target_data_t td_target_data;
28020b57cec5SDimitry Andric }; // struct kmp_taskdata
28030b57cec5SDimitry Andric
28040b57cec5SDimitry Andric // Make sure padding above worked
28050b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(kmp_taskdata_t) % sizeof(void *) == 0);
28060b57cec5SDimitry Andric
28070b57cec5SDimitry Andric // Data for task team but per thread
28080b57cec5SDimitry Andric typedef struct kmp_base_thread_data {
28090b57cec5SDimitry Andric kmp_info_p *td_thr; // Pointer back to thread info
28100b57cec5SDimitry Andric // Used only in __kmp_execute_tasks_template, maybe not avail until task is
28110b57cec5SDimitry Andric // queued?
28120b57cec5SDimitry Andric kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque
28130b57cec5SDimitry Andric kmp_taskdata_t *
28140b57cec5SDimitry Andric *td_deque; // Deque of tasks encountered by td_thr, dynamically allocated
28150b57cec5SDimitry Andric kmp_int32 td_deque_size; // Size of deck
28160b57cec5SDimitry Andric kmp_uint32 td_deque_head; // Head of deque (will wrap)
28170b57cec5SDimitry Andric kmp_uint32 td_deque_tail; // Tail of deque (will wrap)
28180b57cec5SDimitry Andric kmp_int32 td_deque_ntasks; // Number of tasks in deque
28190b57cec5SDimitry Andric // GEH: shouldn't this be volatile since used in while-spin?
28200b57cec5SDimitry Andric kmp_int32 td_deque_last_stolen; // Thread number of last successful steal
28210b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK
28220b57cec5SDimitry Andric kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task
28230b57cec5SDimitry Andric // scheduling constraint
28240b57cec5SDimitry Andric #endif // BUILD_TIED_TASK_STACK
28250b57cec5SDimitry Andric } kmp_base_thread_data_t;
28260b57cec5SDimitry Andric
28270b57cec5SDimitry Andric #define TASK_DEQUE_BITS 8 // Used solely to define INITIAL_TASK_DEQUE_SIZE
28280b57cec5SDimitry Andric #define INITIAL_TASK_DEQUE_SIZE (1 << TASK_DEQUE_BITS)
28290b57cec5SDimitry Andric
28300b57cec5SDimitry Andric #define TASK_DEQUE_SIZE(td) ((td).td_deque_size)
28310b57cec5SDimitry Andric #define TASK_DEQUE_MASK(td) ((td).td_deque_size - 1)
28320b57cec5SDimitry Andric
28330b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_thread_data {
28340b57cec5SDimitry Andric kmp_base_thread_data_t td;
28350b57cec5SDimitry Andric double td_align; /* use worst case alignment */
28360b57cec5SDimitry Andric char td_pad[KMP_PAD(kmp_base_thread_data_t, CACHE_LINE)];
28370b57cec5SDimitry Andric } kmp_thread_data_t;
28380b57cec5SDimitry Andric
283981ad6265SDimitry Andric typedef struct kmp_task_pri {
284081ad6265SDimitry Andric kmp_thread_data_t td;
284181ad6265SDimitry Andric kmp_int32 priority;
284281ad6265SDimitry Andric kmp_task_pri *next;
284381ad6265SDimitry Andric } kmp_task_pri_t;
284481ad6265SDimitry Andric
28450b57cec5SDimitry Andric // Data for task teams which are used when tasking is enabled for the team
28460b57cec5SDimitry Andric typedef struct kmp_base_task_team {
28470b57cec5SDimitry Andric kmp_bootstrap_lock_t
28480b57cec5SDimitry Andric tt_threads_lock; /* Lock used to allocate per-thread part of task team */
28490b57cec5SDimitry Andric /* must be bootstrap lock since used at library shutdown*/
285081ad6265SDimitry Andric
285181ad6265SDimitry Andric // TODO: check performance vs kmp_tas_lock_t
285281ad6265SDimitry Andric kmp_bootstrap_lock_t tt_task_pri_lock; /* Lock to access priority tasks */
285381ad6265SDimitry Andric kmp_task_pri_t *tt_task_pri_list;
285481ad6265SDimitry Andric
28550b57cec5SDimitry Andric kmp_task_team_t *tt_next; /* For linking the task team free list */
28560b57cec5SDimitry Andric kmp_thread_data_t
28570b57cec5SDimitry Andric *tt_threads_data; /* Array of per-thread structures for task team */
28580b57cec5SDimitry Andric /* Data survives task team deallocation */
28590b57cec5SDimitry Andric kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while
28600b57cec5SDimitry Andric executing this team? */
28610b57cec5SDimitry Andric /* TRUE means tt_threads_data is set up and initialized */
28620b57cec5SDimitry Andric kmp_int32 tt_nproc; /* #threads in team */
28630b57cec5SDimitry Andric kmp_int32 tt_max_threads; // # entries allocated for threads_data array
28640b57cec5SDimitry Andric kmp_int32 tt_found_proxy_tasks; // found proxy tasks since last barrier
28650b57cec5SDimitry Andric kmp_int32 tt_untied_task_encountered;
286681ad6265SDimitry Andric std::atomic<kmp_int32> tt_num_task_pri; // number of priority tasks enqueued
2867e8d8bef9SDimitry Andric // There is hidden helper thread encountered in this task team so that we must
2868e8d8bef9SDimitry Andric // wait when waiting on task team
2869e8d8bef9SDimitry Andric kmp_int32 tt_hidden_helper_task_encountered;
28700b57cec5SDimitry Andric
28710b57cec5SDimitry Andric KMP_ALIGN_CACHE
28720b57cec5SDimitry Andric std::atomic<kmp_int32> tt_unfinished_threads; /* #threads still active */
28730b57cec5SDimitry Andric
28740b57cec5SDimitry Andric KMP_ALIGN_CACHE
28750b57cec5SDimitry Andric volatile kmp_uint32
28760b57cec5SDimitry Andric tt_active; /* is the team still actively executing tasks */
28770b57cec5SDimitry Andric } kmp_base_task_team_t;
28780b57cec5SDimitry Andric
28790b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_task_team {
28800b57cec5SDimitry Andric kmp_base_task_team_t tt;
28810b57cec5SDimitry Andric double tt_align; /* use worst case alignment */
28820b57cec5SDimitry Andric char tt_pad[KMP_PAD(kmp_base_task_team_t, CACHE_LINE)];
28830b57cec5SDimitry Andric };
28840b57cec5SDimitry Andric
2885*0fca6ea1SDimitry Andric typedef struct kmp_task_team_list_t {
2886*0fca6ea1SDimitry Andric kmp_task_team_t *task_team;
2887*0fca6ea1SDimitry Andric kmp_task_team_list_t *next;
2888*0fca6ea1SDimitry Andric } kmp_task_team_list_t;
2889*0fca6ea1SDimitry Andric
28900b57cec5SDimitry Andric #if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5)
28910b57cec5SDimitry Andric // Free lists keep same-size free memory slots for fast memory allocation
28920b57cec5SDimitry Andric // routines
28930b57cec5SDimitry Andric typedef struct kmp_free_list {
28940b57cec5SDimitry Andric void *th_free_list_self; // Self-allocated tasks free list
28950b57cec5SDimitry Andric void *th_free_list_sync; // Self-allocated tasks stolen/returned by other
28960b57cec5SDimitry Andric // threads
28970b57cec5SDimitry Andric void *th_free_list_other; // Non-self free list (to be returned to owner's
28980b57cec5SDimitry Andric // sync list)
28990b57cec5SDimitry Andric } kmp_free_list_t;
29000b57cec5SDimitry Andric #endif
29010b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS
29020b57cec5SDimitry Andric // Hot teams array keeps hot teams and their sizes for given thread. Hot teams
29030b57cec5SDimitry Andric // are not put in teams pool, and they don't put threads in threads pool.
29040b57cec5SDimitry Andric typedef struct kmp_hot_team_ptr {
29050b57cec5SDimitry Andric kmp_team_p *hot_team; // pointer to hot_team of given nesting level
29060b57cec5SDimitry Andric kmp_int32 hot_team_nth; // number of threads allocated for the hot_team
29070b57cec5SDimitry Andric } kmp_hot_team_ptr_t;
29080b57cec5SDimitry Andric #endif
29090b57cec5SDimitry Andric typedef struct kmp_teams_size {
29100b57cec5SDimitry Andric kmp_int32 nteams; // number of teams in a league
29110b57cec5SDimitry Andric kmp_int32 nth; // number of threads in each team of the league
29120b57cec5SDimitry Andric } kmp_teams_size_t;
29130b57cec5SDimitry Andric
29140b57cec5SDimitry Andric // This struct stores a thread that acts as a "root" for a contention
29150b57cec5SDimitry Andric // group. Contention groups are rooted at kmp_root threads, but also at
2916fe6060f1SDimitry Andric // each primary thread of each team created in the teams construct.
29170b57cec5SDimitry Andric // This struct therefore also stores a thread_limit associated with
29180b57cec5SDimitry Andric // that contention group, and a counter to track the number of threads
29190b57cec5SDimitry Andric // active in that contention group. Each thread has a list of these: CG
29200b57cec5SDimitry Andric // root threads have an entry in their list in which cg_root refers to
29210b57cec5SDimitry Andric // the thread itself, whereas other workers in the CG will have a
29220b57cec5SDimitry Andric // single entry where cg_root is same as the entry containing their CG
29230b57cec5SDimitry Andric // root. When a thread encounters a teams construct, it will add a new
29240b57cec5SDimitry Andric // entry to the front of its list, because it now roots a new CG.
29250b57cec5SDimitry Andric typedef struct kmp_cg_root {
29260b57cec5SDimitry Andric kmp_info_p *cg_root; // "root" thread for a contention group
29270b57cec5SDimitry Andric // The CG root's limit comes from OMP_THREAD_LIMIT for root threads, or
2928fe6060f1SDimitry Andric // thread_limit clause for teams primary threads
29290b57cec5SDimitry Andric kmp_int32 cg_thread_limit;
29300b57cec5SDimitry Andric kmp_int32 cg_nthreads; // Count of active threads in CG rooted at cg_root
29310b57cec5SDimitry Andric struct kmp_cg_root *up; // pointer to higher level CG root in list
29320b57cec5SDimitry Andric } kmp_cg_root_t;
29330b57cec5SDimitry Andric
29340b57cec5SDimitry Andric // OpenMP thread data structures
29350b57cec5SDimitry Andric
29360b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE kmp_base_info {
29370b57cec5SDimitry Andric /* Start with the readonly data which is cache aligned and padded. This is
2938fe6060f1SDimitry Andric written before the thread starts working by the primary thread. Uber
2939fe6060f1SDimitry Andric masters may update themselves later. Usage does not consider serialized
2940fe6060f1SDimitry Andric regions. */
29410b57cec5SDimitry Andric kmp_desc_t th_info;
29420b57cec5SDimitry Andric kmp_team_p *th_team; /* team we belong to */
29430b57cec5SDimitry Andric kmp_root_p *th_root; /* pointer to root of task hierarchy */
29440b57cec5SDimitry Andric kmp_info_p *th_next_pool; /* next available thread in the pool */
29450b57cec5SDimitry Andric kmp_disp_t *th_dispatch; /* thread's dispatch data */
29460b57cec5SDimitry Andric int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */
29470b57cec5SDimitry Andric
29480b57cec5SDimitry Andric /* The following are cached from the team info structure */
29490b57cec5SDimitry Andric /* TODO use these in more places as determined to be needed via profiling */
29500b57cec5SDimitry Andric int th_team_nproc; /* number of threads in a team */
2951fe6060f1SDimitry Andric kmp_info_p *th_team_master; /* the team's primary thread */
29520b57cec5SDimitry Andric int th_team_serialized; /* team is serialized */
29530b57cec5SDimitry Andric microtask_t th_teams_microtask; /* save entry address for teams construct */
29540b57cec5SDimitry Andric int th_teams_level; /* save initial level of teams construct */
29550b57cec5SDimitry Andric /* it is 0 on device but may be any on host */
29560b57cec5SDimitry Andric
29575ffd83dbSDimitry Andric /* The blocktime info is copied from the team struct to the thread struct */
29580b57cec5SDimitry Andric /* at the start of a barrier, and the values stored in the team are used */
29590b57cec5SDimitry Andric /* at points in the code where the team struct is no longer guaranteed */
29600b57cec5SDimitry Andric /* to exist (from the POV of worker threads). */
29610b57cec5SDimitry Andric #if KMP_USE_MONITOR
29620b57cec5SDimitry Andric int th_team_bt_intervals;
29630b57cec5SDimitry Andric int th_team_bt_set;
29640b57cec5SDimitry Andric #else
29650b57cec5SDimitry Andric kmp_uint64 th_team_bt_intervals;
29660b57cec5SDimitry Andric #endif
29670b57cec5SDimitry Andric
29680b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
29690b57cec5SDimitry Andric kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */
2970bdd1243dSDimitry Andric kmp_affinity_ids_t th_topology_ids; /* thread's current topology ids */
2971bdd1243dSDimitry Andric kmp_affinity_attrs_t th_topology_attrs; /* thread's current topology attrs */
29720b57cec5SDimitry Andric #endif
29730b57cec5SDimitry Andric omp_allocator_handle_t th_def_allocator; /* default allocator */
2974fe6060f1SDimitry Andric /* The data set by the primary thread at reinit, then R/W by the worker */
29750b57cec5SDimitry Andric KMP_ALIGN_CACHE int
29760b57cec5SDimitry Andric th_set_nproc; /* if > 0, then only use this request for the next fork */
2977*0fca6ea1SDimitry Andric int *th_set_nested_nth;
2978*0fca6ea1SDimitry Andric bool th_nt_strict; // num_threads clause has strict modifier
2979*0fca6ea1SDimitry Andric ident_t *th_nt_loc; // loc for strict modifier
2980*0fca6ea1SDimitry Andric int th_nt_sev; // error severity for strict modifier
2981*0fca6ea1SDimitry Andric const char *th_nt_msg; // error message for strict modifier
2982*0fca6ea1SDimitry Andric int th_set_nested_nth_sz;
29830b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS
29840b57cec5SDimitry Andric kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */
29850b57cec5SDimitry Andric #endif
29860b57cec5SDimitry Andric kmp_proc_bind_t
29870b57cec5SDimitry Andric th_set_proc_bind; /* if != proc_bind_default, use request for next fork */
29880b57cec5SDimitry Andric kmp_teams_size_t
29890b57cec5SDimitry Andric th_teams_size; /* number of teams/threads in teams construct */
29900b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
29910b57cec5SDimitry Andric int th_current_place; /* place currently bound to */
29920b57cec5SDimitry Andric int th_new_place; /* place to bind to in par reg */
29930b57cec5SDimitry Andric int th_first_place; /* first place in partition */
29940b57cec5SDimitry Andric int th_last_place; /* last place in partition */
29950b57cec5SDimitry Andric #endif
29960b57cec5SDimitry Andric int th_prev_level; /* previous level for affinity format */
29970b57cec5SDimitry Andric int th_prev_num_threads; /* previous num_threads for affinity format */
29980b57cec5SDimitry Andric #if USE_ITT_BUILD
29990b57cec5SDimitry Andric kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */
30000b57cec5SDimitry Andric kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */
30010b57cec5SDimitry Andric kmp_uint64 th_frame_time; /* frame timestamp */
30020b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
30030b57cec5SDimitry Andric kmp_local_t th_local;
30040b57cec5SDimitry Andric struct private_common *th_pri_head;
30050b57cec5SDimitry Andric
30060b57cec5SDimitry Andric /* Now the data only used by the worker (after initial allocation) */
30070b57cec5SDimitry Andric /* TODO the first serial team should actually be stored in the info_t
30080b57cec5SDimitry Andric structure. this will help reduce initial allocation overhead */
30090b57cec5SDimitry Andric KMP_ALIGN_CACHE kmp_team_p
30100b57cec5SDimitry Andric *th_serial_team; /*serialized team held in reserve*/
30110b57cec5SDimitry Andric
30120b57cec5SDimitry Andric #if OMPT_SUPPORT
30130b57cec5SDimitry Andric ompt_thread_info_t ompt_thread_info;
30140b57cec5SDimitry Andric #endif
30150b57cec5SDimitry Andric
3016fe6060f1SDimitry Andric /* The following are also read by the primary thread during reinit */
30170b57cec5SDimitry Andric struct common_table *th_pri_common;
30180b57cec5SDimitry Andric
30190b57cec5SDimitry Andric volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */
30200b57cec5SDimitry Andric /* while awaiting queuing lock acquire */
30210b57cec5SDimitry Andric
30220b57cec5SDimitry Andric volatile void *th_sleep_loc; // this points at a kmp_flag<T>
3023349cc55cSDimitry Andric flag_type th_sleep_loc_type; // enum type of flag stored in th_sleep_loc
30240b57cec5SDimitry Andric
30250b57cec5SDimitry Andric ident_t *th_ident;
30260b57cec5SDimitry Andric unsigned th_x; // Random number generator data
30270b57cec5SDimitry Andric unsigned th_a; // Random number generator data
30280b57cec5SDimitry Andric
30290b57cec5SDimitry Andric /* Tasking-related data for the thread */
30300b57cec5SDimitry Andric kmp_task_team_t *th_task_team; // Task team struct
30310b57cec5SDimitry Andric kmp_taskdata_t *th_current_task; // Innermost Task being executed
30320b57cec5SDimitry Andric kmp_uint8 th_task_state; // alternating 0/1 for task team identification
30330b57cec5SDimitry Andric kmp_uint32 th_reap_state; // Non-zero indicates thread is not
30340b57cec5SDimitry Andric // tasking, thus safe to reap
30350b57cec5SDimitry Andric
30360b57cec5SDimitry Andric /* More stuff for keeping track of active/sleeping threads (this part is
30370b57cec5SDimitry Andric written by the worker thread) */
30380b57cec5SDimitry Andric kmp_uint8 th_active_in_pool; // included in count of #active threads in pool
30390b57cec5SDimitry Andric int th_active; // ! sleeping; 32 bits for TCR/TCW
3040349cc55cSDimitry Andric std::atomic<kmp_uint32> th_used_in_team; // Flag indicating use in team
3041349cc55cSDimitry Andric // 0 = not used in team; 1 = used in team;
3042349cc55cSDimitry Andric // 2 = transitioning to not used in team; 3 = transitioning to used in team
30430b57cec5SDimitry Andric struct cons_header *th_cons; // used for consistency check
30440b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
30450b57cec5SDimitry Andric // used for hierarchical scheduling
30460b57cec5SDimitry Andric kmp_hier_private_bdata_t *th_hier_bar_data;
30470b57cec5SDimitry Andric #endif
30480b57cec5SDimitry Andric
30490b57cec5SDimitry Andric /* Add the syncronizing data which is cache aligned and padded. */
30500b57cec5SDimitry Andric KMP_ALIGN_CACHE kmp_balign_t th_bar[bs_last_barrier];
30510b57cec5SDimitry Andric
30520b57cec5SDimitry Andric KMP_ALIGN_CACHE volatile kmp_int32
30530b57cec5SDimitry Andric th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */
30540b57cec5SDimitry Andric
30550b57cec5SDimitry Andric #if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5)
30560b57cec5SDimitry Andric #define NUM_LISTS 4
30570b57cec5SDimitry Andric kmp_free_list_t th_free_lists[NUM_LISTS]; // Free lists for fast memory
30580b57cec5SDimitry Andric // allocation routines
30590b57cec5SDimitry Andric #endif
30600b57cec5SDimitry Andric
30610b57cec5SDimitry Andric #if KMP_OS_WINDOWS
30620b57cec5SDimitry Andric kmp_win32_cond_t th_suspend_cv;
30630b57cec5SDimitry Andric kmp_win32_mutex_t th_suspend_mx;
30640b57cec5SDimitry Andric std::atomic<int> th_suspend_init;
30650b57cec5SDimitry Andric #endif
30660b57cec5SDimitry Andric #if KMP_OS_UNIX
30670b57cec5SDimitry Andric kmp_cond_align_t th_suspend_cv;
30680b57cec5SDimitry Andric kmp_mutex_align_t th_suspend_mx;
30690b57cec5SDimitry Andric std::atomic<int> th_suspend_init_count;
30700b57cec5SDimitry Andric #endif
30710b57cec5SDimitry Andric
30720b57cec5SDimitry Andric #if USE_ITT_BUILD
30730b57cec5SDimitry Andric kmp_itt_mark_t th_itt_mark_single;
30740b57cec5SDimitry Andric // alignment ???
30750b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
30760b57cec5SDimitry Andric #if KMP_STATS_ENABLED
30770b57cec5SDimitry Andric kmp_stats_list *th_stats;
30780b57cec5SDimitry Andric #endif
30790b57cec5SDimitry Andric #if KMP_OS_UNIX
30800b57cec5SDimitry Andric std::atomic<bool> th_blocking;
30810b57cec5SDimitry Andric #endif
30820b57cec5SDimitry Andric kmp_cg_root_t *th_cg_roots; // list of cg_roots associated with this thread
30830b57cec5SDimitry Andric } kmp_base_info_t;
30840b57cec5SDimitry Andric
30850b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_info {
30860b57cec5SDimitry Andric double th_align; /* use worst case alignment */
30870b57cec5SDimitry Andric char th_pad[KMP_PAD(kmp_base_info_t, CACHE_LINE)];
30880b57cec5SDimitry Andric kmp_base_info_t th;
30890b57cec5SDimitry Andric } kmp_info_t;
30900b57cec5SDimitry Andric
30910b57cec5SDimitry Andric // OpenMP thread team data structures
30920b57cec5SDimitry Andric
3093fe6060f1SDimitry Andric typedef struct kmp_base_data {
3094fe6060f1SDimitry Andric volatile kmp_uint32 t_value;
3095fe6060f1SDimitry Andric } kmp_base_data_t;
30960b57cec5SDimitry Andric
30970b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_sleep_team {
30980b57cec5SDimitry Andric double dt_align; /* use worst case alignment */
30990b57cec5SDimitry Andric char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
31000b57cec5SDimitry Andric kmp_base_data_t dt;
31010b57cec5SDimitry Andric } kmp_sleep_team_t;
31020b57cec5SDimitry Andric
31030b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_ordered_team {
31040b57cec5SDimitry Andric double dt_align; /* use worst case alignment */
31050b57cec5SDimitry Andric char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
31060b57cec5SDimitry Andric kmp_base_data_t dt;
31070b57cec5SDimitry Andric } kmp_ordered_team_t;
31080b57cec5SDimitry Andric
31090b57cec5SDimitry Andric typedef int (*launch_t)(int gtid);
31100b57cec5SDimitry Andric
31110b57cec5SDimitry Andric /* Minimum number of ARGV entries to malloc if necessary */
31120b57cec5SDimitry Andric #define KMP_MIN_MALLOC_ARGV_ENTRIES 100
31130b57cec5SDimitry Andric
31140b57cec5SDimitry Andric // Set up how many argv pointers will fit in cache lines containing
31150b57cec5SDimitry Andric // t_inline_argv. Historically, we have supported at least 96 bytes. Using a
3116fe6060f1SDimitry Andric // larger value for more space between the primary write/worker read section and
31170b57cec5SDimitry Andric // read/write by all section seems to buy more performance on EPCC PARALLEL.
31180b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
31190b57cec5SDimitry Andric #define KMP_INLINE_ARGV_BYTES \
31200b57cec5SDimitry Andric (4 * CACHE_LINE - \
31210b57cec5SDimitry Andric ((3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + \
31220b57cec5SDimitry Andric sizeof(kmp_int16) + sizeof(kmp_uint32)) % \
31230b57cec5SDimitry Andric CACHE_LINE))
31240b57cec5SDimitry Andric #else
31250b57cec5SDimitry Andric #define KMP_INLINE_ARGV_BYTES \
31260b57cec5SDimitry Andric (2 * CACHE_LINE - ((3 * KMP_PTR_SKIP + 2 * sizeof(int)) % CACHE_LINE))
31270b57cec5SDimitry Andric #endif
31280b57cec5SDimitry Andric #define KMP_INLINE_ARGV_ENTRIES (int)(KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP)
31290b57cec5SDimitry Andric
31300b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE kmp_base_team {
31310b57cec5SDimitry Andric // Synchronization Data
31320b57cec5SDimitry Andric // ---------------------------------------------------------------------------
31330b57cec5SDimitry Andric KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered;
31340b57cec5SDimitry Andric kmp_balign_team_t t_bar[bs_last_barrier];
31350b57cec5SDimitry Andric std::atomic<int> t_construct; // count of single directive encountered by team
31360b57cec5SDimitry Andric char pad[sizeof(kmp_lock_t)]; // padding to maintain performance on big iron
31370b57cec5SDimitry Andric
31380b57cec5SDimitry Andric // [0] - parallel / [1] - worksharing task reduction data shared by taskgroups
31390b57cec5SDimitry Andric std::atomic<void *> t_tg_reduce_data[2]; // to support task modifier
31400b57cec5SDimitry Andric std::atomic<int> t_tg_fini_counter[2]; // sync end of task reductions
31410b57cec5SDimitry Andric
3142fe6060f1SDimitry Andric // Primary thread only
31430b57cec5SDimitry Andric // ---------------------------------------------------------------------------
3144fe6060f1SDimitry Andric KMP_ALIGN_CACHE int t_master_tid; // tid of primary thread in parent team
3145fe6060f1SDimitry Andric int t_master_this_cons; // "this_construct" single counter of primary thread
3146fe6060f1SDimitry Andric // in parent team
31470b57cec5SDimitry Andric ident_t *t_ident; // if volatile, have to change too much other crud to
31480b57cec5SDimitry Andric // volatile too
31490b57cec5SDimitry Andric kmp_team_p *t_parent; // parent team
31500b57cec5SDimitry Andric kmp_team_p *t_next_pool; // next free team in the team pool
31510b57cec5SDimitry Andric kmp_disp_t *t_dispatch; // thread's dispatch data
31520b57cec5SDimitry Andric kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2
31530b57cec5SDimitry Andric kmp_proc_bind_t t_proc_bind; // bind type for par region
3154*0fca6ea1SDimitry Andric int t_primary_task_state; // primary thread's task state saved
31550b57cec5SDimitry Andric #if USE_ITT_BUILD
31560b57cec5SDimitry Andric kmp_uint64 t_region_time; // region begin timestamp
31570b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
31580b57cec5SDimitry Andric
3159fe6060f1SDimitry Andric // Primary thread write, workers read
31600b57cec5SDimitry Andric // --------------------------------------------------------------------------
31610b57cec5SDimitry Andric KMP_ALIGN_CACHE void **t_argv;
31620b57cec5SDimitry Andric int t_argc;
31630b57cec5SDimitry Andric int t_nproc; // number of threads in team
31640b57cec5SDimitry Andric microtask_t t_pkfn;
31650b57cec5SDimitry Andric launch_t t_invoke; // procedure to launch the microtask
31660b57cec5SDimitry Andric
31670b57cec5SDimitry Andric #if OMPT_SUPPORT
31680b57cec5SDimitry Andric ompt_team_info_t ompt_team_info;
31690b57cec5SDimitry Andric ompt_lw_taskteam_t *ompt_serialized_team_info;
31700b57cec5SDimitry Andric #endif
31710b57cec5SDimitry Andric
31720b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
31730b57cec5SDimitry Andric kmp_int8 t_fp_control_saved;
31740b57cec5SDimitry Andric kmp_int8 t_pad2b;
31750b57cec5SDimitry Andric kmp_int16 t_x87_fpu_control_word; // FP control regs
31760b57cec5SDimitry Andric kmp_uint32 t_mxcsr;
31770b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
31780b57cec5SDimitry Andric
31790b57cec5SDimitry Andric void *t_inline_argv[KMP_INLINE_ARGV_ENTRIES];
31800b57cec5SDimitry Andric
31810b57cec5SDimitry Andric KMP_ALIGN_CACHE kmp_info_t **t_threads;
31820b57cec5SDimitry Andric kmp_taskdata_t
31830b57cec5SDimitry Andric *t_implicit_task_taskdata; // Taskdata for the thread's implicit task
31840b57cec5SDimitry Andric int t_level; // nested parallel level
31850b57cec5SDimitry Andric
31860b57cec5SDimitry Andric KMP_ALIGN_CACHE int t_max_argc;
3187480093f4SDimitry Andric int t_max_nproc; // max threads this team can handle (dynamically expandable)
31880b57cec5SDimitry Andric int t_serialized; // levels deep of serialized teams
31890b57cec5SDimitry Andric dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system
31900b57cec5SDimitry Andric int t_id; // team's id, assigned by debugger.
31910b57cec5SDimitry Andric int t_active_level; // nested active parallel level
31920b57cec5SDimitry Andric kmp_r_sched_t t_sched; // run-time schedule for the team
31930b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
31940b57cec5SDimitry Andric int t_first_place; // first & last place in parent thread's partition.
3195fe6060f1SDimitry Andric int t_last_place; // Restore these values to primary thread after par region.
31960b57cec5SDimitry Andric #endif // KMP_AFFINITY_SUPPORTED
31970b57cec5SDimitry Andric int t_display_affinity;
31980b57cec5SDimitry Andric int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via
31990b57cec5SDimitry Andric // omp_set_num_threads() call
32000b57cec5SDimitry Andric omp_allocator_handle_t t_def_allocator; /* default allocator */
32010b57cec5SDimitry Andric
32020b57cec5SDimitry Andric // Read/write by workers as well
32030b57cec5SDimitry Andric #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
32040b57cec5SDimitry Andric // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf
32050b57cec5SDimitry Andric // regression of epcc 'parallel' and 'barrier' on fxe256lin01. This extra
32060b57cec5SDimitry Andric // padding serves to fix the performance of epcc 'parallel' and 'barrier' when
32070b57cec5SDimitry Andric // CACHE_LINE=64. TODO: investigate more and get rid if this padding.
32080b57cec5SDimitry Andric char dummy_padding[1024];
32090b57cec5SDimitry Andric #endif
32100b57cec5SDimitry Andric // Internal control stack for additional nested teams.
32110b57cec5SDimitry Andric KMP_ALIGN_CACHE kmp_internal_control_t *t_control_stack_top;
32120b57cec5SDimitry Andric // for SERIALIZED teams nested 2 or more levels deep
32130b57cec5SDimitry Andric // typed flag to store request state of cancellation
32140b57cec5SDimitry Andric std::atomic<kmp_int32> t_cancel_request;
32150b57cec5SDimitry Andric int t_master_active; // save on fork, restore on join
32160b57cec5SDimitry Andric void *t_copypriv_data; // team specific pointer to copyprivate data array
32170b57cec5SDimitry Andric #if KMP_OS_WINDOWS
32180b57cec5SDimitry Andric std::atomic<kmp_uint32> t_copyin_counter;
32190b57cec5SDimitry Andric #endif
32200b57cec5SDimitry Andric #if USE_ITT_BUILD
32210b57cec5SDimitry Andric void *t_stack_id; // team specific stack stitching id (for ittnotify)
32220b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
3223349cc55cSDimitry Andric distributedBarrier *b; // Distributed barrier data associated with team
3224*0fca6ea1SDimitry Andric kmp_nested_nthreads_t *t_nested_nth;
32250b57cec5SDimitry Andric } kmp_base_team_t;
32260b57cec5SDimitry Andric
3227*0fca6ea1SDimitry Andric // Assert that the list structure fits and aligns within
3228*0fca6ea1SDimitry Andric // the double task team pointer
3229*0fca6ea1SDimitry Andric KMP_BUILD_ASSERT(sizeof(kmp_task_team_t *[2]) == sizeof(kmp_task_team_list_t));
3230*0fca6ea1SDimitry Andric KMP_BUILD_ASSERT(alignof(kmp_task_team_t *[2]) ==
3231*0fca6ea1SDimitry Andric alignof(kmp_task_team_list_t));
3232*0fca6ea1SDimitry Andric
32330b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_team {
32340b57cec5SDimitry Andric kmp_base_team_t t;
32350b57cec5SDimitry Andric double t_align; /* use worst case alignment */
32360b57cec5SDimitry Andric char t_pad[KMP_PAD(kmp_base_team_t, CACHE_LINE)];
32370b57cec5SDimitry Andric };
32380b57cec5SDimitry Andric
32390b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_time_global {
32400b57cec5SDimitry Andric double dt_align; /* use worst case alignment */
32410b57cec5SDimitry Andric char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
32420b57cec5SDimitry Andric kmp_base_data_t dt;
32430b57cec5SDimitry Andric } kmp_time_global_t;
32440b57cec5SDimitry Andric
32450b57cec5SDimitry Andric typedef struct kmp_base_global {
32460b57cec5SDimitry Andric /* cache-aligned */
32470b57cec5SDimitry Andric kmp_time_global_t g_time;
32480b57cec5SDimitry Andric
32490b57cec5SDimitry Andric /* non cache-aligned */
32500b57cec5SDimitry Andric volatile int g_abort;
32510b57cec5SDimitry Andric volatile int g_done;
32520b57cec5SDimitry Andric
32530b57cec5SDimitry Andric int g_dynamic;
32540b57cec5SDimitry Andric enum dynamic_mode g_dynamic_mode;
32550b57cec5SDimitry Andric } kmp_base_global_t;
32560b57cec5SDimitry Andric
32570b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_global {
32580b57cec5SDimitry Andric kmp_base_global_t g;
32590b57cec5SDimitry Andric double g_align; /* use worst case alignment */
32600b57cec5SDimitry Andric char g_pad[KMP_PAD(kmp_base_global_t, CACHE_LINE)];
32610b57cec5SDimitry Andric } kmp_global_t;
32620b57cec5SDimitry Andric
32630b57cec5SDimitry Andric typedef struct kmp_base_root {
32640b57cec5SDimitry Andric // TODO: GEH - combine r_active with r_in_parallel then r_active ==
32650b57cec5SDimitry Andric // (r_in_parallel>= 0)
32660b57cec5SDimitry Andric // TODO: GEH - then replace r_active with t_active_levels if we can to reduce
32670b57cec5SDimitry Andric // the synch overhead or keeping r_active
32680b57cec5SDimitry Andric volatile int r_active; /* TRUE if some region in a nest has > 1 thread */
32690b57cec5SDimitry Andric // keeps a count of active parallel regions per root
32700b57cec5SDimitry Andric std::atomic<int> r_in_parallel;
32710b57cec5SDimitry Andric // GEH: This is misnamed, should be r_active_levels
32720b57cec5SDimitry Andric kmp_team_t *r_root_team;
32730b57cec5SDimitry Andric kmp_team_t *r_hot_team;
32740b57cec5SDimitry Andric kmp_info_t *r_uber_thread;
32750b57cec5SDimitry Andric kmp_lock_t r_begin_lock;
32760b57cec5SDimitry Andric volatile int r_begin;
32770b57cec5SDimitry Andric int r_blocktime; /* blocktime for this root and descendants */
3278fe6060f1SDimitry Andric #if KMP_AFFINITY_SUPPORTED
3279fe6060f1SDimitry Andric int r_affinity_assigned;
3280fe6060f1SDimitry Andric #endif // KMP_AFFINITY_SUPPORTED
32810b57cec5SDimitry Andric } kmp_base_root_t;
32820b57cec5SDimitry Andric
32830b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_root {
32840b57cec5SDimitry Andric kmp_base_root_t r;
32850b57cec5SDimitry Andric double r_align; /* use worst case alignment */
32860b57cec5SDimitry Andric char r_pad[KMP_PAD(kmp_base_root_t, CACHE_LINE)];
32870b57cec5SDimitry Andric } kmp_root_t;
32880b57cec5SDimitry Andric
32890b57cec5SDimitry Andric struct fortran_inx_info {
32900b57cec5SDimitry Andric kmp_int32 data;
32910b57cec5SDimitry Andric };
32920b57cec5SDimitry Andric
329381ad6265SDimitry Andric // This list type exists to hold old __kmp_threads arrays so that
329481ad6265SDimitry Andric // old references to them may complete while reallocation takes place when
329581ad6265SDimitry Andric // expanding the array. The items in this list are kept alive until library
329681ad6265SDimitry Andric // shutdown.
329781ad6265SDimitry Andric typedef struct kmp_old_threads_list_t {
329881ad6265SDimitry Andric kmp_info_t **threads;
329981ad6265SDimitry Andric struct kmp_old_threads_list_t *next;
330081ad6265SDimitry Andric } kmp_old_threads_list_t;
330181ad6265SDimitry Andric
33020b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
33030b57cec5SDimitry Andric
33040b57cec5SDimitry Andric extern int __kmp_settings;
33050b57cec5SDimitry Andric extern int __kmp_duplicate_library_ok;
33060b57cec5SDimitry Andric #if USE_ITT_BUILD
33070b57cec5SDimitry Andric extern int __kmp_forkjoin_frames;
33080b57cec5SDimitry Andric extern int __kmp_forkjoin_frames_mode;
33090b57cec5SDimitry Andric #endif
33100b57cec5SDimitry Andric extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method;
33110b57cec5SDimitry Andric extern int __kmp_determ_red;
33120b57cec5SDimitry Andric
33130b57cec5SDimitry Andric #ifdef KMP_DEBUG
33140b57cec5SDimitry Andric extern int kmp_a_debug;
33150b57cec5SDimitry Andric extern int kmp_b_debug;
33160b57cec5SDimitry Andric extern int kmp_c_debug;
33170b57cec5SDimitry Andric extern int kmp_d_debug;
33180b57cec5SDimitry Andric extern int kmp_e_debug;
33190b57cec5SDimitry Andric extern int kmp_f_debug;
33200b57cec5SDimitry Andric #endif /* KMP_DEBUG */
33210b57cec5SDimitry Andric
33220b57cec5SDimitry Andric /* For debug information logging using rotating buffer */
33230b57cec5SDimitry Andric #define KMP_DEBUG_BUF_LINES_INIT 512
33240b57cec5SDimitry Andric #define KMP_DEBUG_BUF_LINES_MIN 1
33250b57cec5SDimitry Andric
33260b57cec5SDimitry Andric #define KMP_DEBUG_BUF_CHARS_INIT 128
33270b57cec5SDimitry Andric #define KMP_DEBUG_BUF_CHARS_MIN 2
33280b57cec5SDimitry Andric
33290b57cec5SDimitry Andric extern int
33300b57cec5SDimitry Andric __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */
33310b57cec5SDimitry Andric extern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */
33320b57cec5SDimitry Andric extern int
33330b57cec5SDimitry Andric __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */
33340b57cec5SDimitry Andric extern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer
33350b57cec5SDimitry Andric entry pointer */
33360b57cec5SDimitry Andric
33370b57cec5SDimitry Andric extern char *__kmp_debug_buffer; /* Debug buffer itself */
33380b57cec5SDimitry Andric extern std::atomic<int> __kmp_debug_count; /* Counter for number of lines
33390b57cec5SDimitry Andric printed in buffer so far */
33400b57cec5SDimitry Andric extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase
33410b57cec5SDimitry Andric recommended in warnings */
33420b57cec5SDimitry Andric /* end rotating debug buffer */
33430b57cec5SDimitry Andric
33440b57cec5SDimitry Andric #ifdef KMP_DEBUG
33450b57cec5SDimitry Andric extern int __kmp_par_range; /* +1 => only go par for constructs in range */
33460b57cec5SDimitry Andric
33470b57cec5SDimitry Andric #define KMP_PAR_RANGE_ROUTINE_LEN 1024
33480b57cec5SDimitry Andric extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN];
33490b57cec5SDimitry Andric #define KMP_PAR_RANGE_FILENAME_LEN 1024
33500b57cec5SDimitry Andric extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN];
33510b57cec5SDimitry Andric extern int __kmp_par_range_lb;
33520b57cec5SDimitry Andric extern int __kmp_par_range_ub;
33530b57cec5SDimitry Andric #endif
33540b57cec5SDimitry Andric
33550b57cec5SDimitry Andric /* For printing out dynamic storage map for threads and teams */
33560b57cec5SDimitry Andric extern int
33570b57cec5SDimitry Andric __kmp_storage_map; /* True means print storage map for threads and teams */
33580b57cec5SDimitry Andric extern int __kmp_storage_map_verbose; /* True means storage map includes
33590b57cec5SDimitry Andric placement info */
33600b57cec5SDimitry Andric extern int __kmp_storage_map_verbose_specified;
33610b57cec5SDimitry Andric
33620b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
33630b57cec5SDimitry Andric extern kmp_cpuinfo_t __kmp_cpuinfo;
__kmp_is_hybrid_cpu()3364349cc55cSDimitry Andric static inline bool __kmp_is_hybrid_cpu() { return __kmp_cpuinfo.flags.hybrid; }
336581ad6265SDimitry Andric #elif KMP_OS_DARWIN && KMP_ARCH_AARCH64
__kmp_is_hybrid_cpu()336681ad6265SDimitry Andric static inline bool __kmp_is_hybrid_cpu() { return true; }
3367349cc55cSDimitry Andric #else
__kmp_is_hybrid_cpu()3368349cc55cSDimitry Andric static inline bool __kmp_is_hybrid_cpu() { return false; }
33690b57cec5SDimitry Andric #endif
33700b57cec5SDimitry Andric
33710b57cec5SDimitry Andric extern volatile int __kmp_init_serial;
33720b57cec5SDimitry Andric extern volatile int __kmp_init_gtid;
33730b57cec5SDimitry Andric extern volatile int __kmp_init_common;
3374fcaf7f86SDimitry Andric extern volatile int __kmp_need_register_serial;
33750b57cec5SDimitry Andric extern volatile int __kmp_init_middle;
33760b57cec5SDimitry Andric extern volatile int __kmp_init_parallel;
33770b57cec5SDimitry Andric #if KMP_USE_MONITOR
33780b57cec5SDimitry Andric extern volatile int __kmp_init_monitor;
33790b57cec5SDimitry Andric #endif
33800b57cec5SDimitry Andric extern volatile int __kmp_init_user_locks;
3381e8d8bef9SDimitry Andric extern volatile int __kmp_init_hidden_helper_threads;
33820b57cec5SDimitry Andric extern int __kmp_init_counter;
33830b57cec5SDimitry Andric extern int __kmp_root_counter;
33840b57cec5SDimitry Andric extern int __kmp_version;
33850b57cec5SDimitry Andric
33860b57cec5SDimitry Andric /* list of address of allocated caches for commons */
33870b57cec5SDimitry Andric extern kmp_cached_addr_t *__kmp_threadpriv_cache_list;
33880b57cec5SDimitry Andric
33890b57cec5SDimitry Andric /* Barrier algorithm types and options */
33900b57cec5SDimitry Andric extern kmp_uint32 __kmp_barrier_gather_bb_dflt;
33910b57cec5SDimitry Andric extern kmp_uint32 __kmp_barrier_release_bb_dflt;
33920b57cec5SDimitry Andric extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt;
33930b57cec5SDimitry Andric extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt;
33940b57cec5SDimitry Andric extern kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier];
33950b57cec5SDimitry Andric extern kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier];
33960b57cec5SDimitry Andric extern kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier];
33970b57cec5SDimitry Andric extern kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier];
33980b57cec5SDimitry Andric extern char const *__kmp_barrier_branch_bit_env_name[bs_last_barrier];
33990b57cec5SDimitry Andric extern char const *__kmp_barrier_pattern_env_name[bs_last_barrier];
34000b57cec5SDimitry Andric extern char const *__kmp_barrier_type_name[bs_last_barrier];
34010b57cec5SDimitry Andric extern char const *__kmp_barrier_pattern_name[bp_last_bar];
34020b57cec5SDimitry Andric
34030b57cec5SDimitry Andric /* Global Locks */
34040b57cec5SDimitry Andric extern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */
34050b57cec5SDimitry Andric extern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */
34060b57cec5SDimitry Andric extern kmp_bootstrap_lock_t __kmp_task_team_lock;
34070b57cec5SDimitry Andric extern kmp_bootstrap_lock_t
34080b57cec5SDimitry Andric __kmp_exit_lock; /* exit() is not always thread-safe */
34090b57cec5SDimitry Andric #if KMP_USE_MONITOR
34100b57cec5SDimitry Andric extern kmp_bootstrap_lock_t
34110b57cec5SDimitry Andric __kmp_monitor_lock; /* control monitor thread creation */
34120b57cec5SDimitry Andric #endif
34130b57cec5SDimitry Andric extern kmp_bootstrap_lock_t
34140b57cec5SDimitry Andric __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and
34150b57cec5SDimitry Andric __kmp_threads expansion to co-exist */
34160b57cec5SDimitry Andric
34170b57cec5SDimitry Andric extern kmp_lock_t __kmp_global_lock; /* control OS/global access */
34180b57cec5SDimitry Andric extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */
34190b57cec5SDimitry Andric extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */
34200b57cec5SDimitry Andric
34210b57cec5SDimitry Andric extern enum library_type __kmp_library;
34220b57cec5SDimitry Andric
34230b57cec5SDimitry Andric extern enum sched_type __kmp_sched; /* default runtime scheduling */
34240b57cec5SDimitry Andric extern enum sched_type __kmp_static; /* default static scheduling method */
34250b57cec5SDimitry Andric extern enum sched_type __kmp_guided; /* default guided scheduling method */
34260b57cec5SDimitry Andric extern enum sched_type __kmp_auto; /* default auto scheduling method */
34270b57cec5SDimitry Andric extern int __kmp_chunk; /* default runtime chunk size */
3428fe6060f1SDimitry Andric extern int __kmp_force_monotonic; /* whether monotonic scheduling forced */
34290b57cec5SDimitry Andric
34300b57cec5SDimitry Andric extern size_t __kmp_stksize; /* stack size per thread */
34310b57cec5SDimitry Andric #if KMP_USE_MONITOR
34320b57cec5SDimitry Andric extern size_t __kmp_monitor_stksize; /* stack size for monitor thread */
34330b57cec5SDimitry Andric #endif
34340b57cec5SDimitry Andric extern size_t __kmp_stkoffset; /* stack offset per thread */
34350b57cec5SDimitry Andric extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */
34360b57cec5SDimitry Andric
34370b57cec5SDimitry Andric extern size_t
34380b57cec5SDimitry Andric __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */
34390b57cec5SDimitry Andric extern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */
34400b57cec5SDimitry Andric extern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */
34410b57cec5SDimitry Andric extern int __kmp_env_checks; /* was KMP_CHECKS specified? */
34420b57cec5SDimitry Andric extern int __kmp_env_consistency_check; // was KMP_CONSISTENCY_CHECK specified?
34430b57cec5SDimitry Andric extern int __kmp_generate_warnings; /* should we issue warnings? */
34440b57cec5SDimitry Andric extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */
34450b57cec5SDimitry Andric
34460b57cec5SDimitry Andric #ifdef DEBUG_SUSPEND
34470b57cec5SDimitry Andric extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */
34480b57cec5SDimitry Andric #endif
34490b57cec5SDimitry Andric
34500b57cec5SDimitry Andric extern kmp_int32 __kmp_use_yield;
34510b57cec5SDimitry Andric extern kmp_int32 __kmp_use_yield_exp_set;
34520b57cec5SDimitry Andric extern kmp_uint32 __kmp_yield_init;
34530b57cec5SDimitry Andric extern kmp_uint32 __kmp_yield_next;
345404eeddc0SDimitry Andric extern kmp_uint64 __kmp_pause_init;
34550b57cec5SDimitry Andric
34560b57cec5SDimitry Andric /* ------------------------------------------------------------------------- */
34570b57cec5SDimitry Andric extern int __kmp_allThreadsSpecified;
34580b57cec5SDimitry Andric
34590b57cec5SDimitry Andric extern size_t __kmp_align_alloc;
34600b57cec5SDimitry Andric /* following data protected by initialization routines */
34610b57cec5SDimitry Andric extern int __kmp_xproc; /* number of processors in the system */
34620b57cec5SDimitry Andric extern int __kmp_avail_proc; /* number of processors available to the process */
34630b57cec5SDimitry Andric extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */
34640b57cec5SDimitry Andric extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */
34650b57cec5SDimitry Andric // maximum total number of concurrently-existing threads on device
34660b57cec5SDimitry Andric extern int __kmp_max_nth;
34670b57cec5SDimitry Andric // maximum total number of concurrently-existing threads in a contention group
34680b57cec5SDimitry Andric extern int __kmp_cg_max_nth;
34695f757f3fSDimitry Andric extern int __kmp_task_max_nth; // max threads used in a task
34700b57cec5SDimitry Andric extern int __kmp_teams_max_nth; // max threads used in a teams construct
34710b57cec5SDimitry Andric extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and
34720b57cec5SDimitry Andric __kmp_root */
34730b57cec5SDimitry Andric extern int __kmp_dflt_team_nth; /* default number of threads in a parallel
34740b57cec5SDimitry Andric region a la OMP_NUM_THREADS */
34750b57cec5SDimitry Andric extern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial
34760b57cec5SDimitry Andric initialization */
34770b57cec5SDimitry Andric extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is
34780b57cec5SDimitry Andric used (fixed) */
34790b57cec5SDimitry Andric extern int __kmp_tp_cached; /* whether threadprivate cache has been created
34800b57cec5SDimitry Andric (__kmpc_threadprivate_cached()) */
34815f757f3fSDimitry Andric extern int __kmp_dflt_blocktime; /* number of microseconds to wait before
34820b57cec5SDimitry Andric blocking (env setting) */
34835f757f3fSDimitry Andric extern char __kmp_blocktime_units; /* 'm' or 'u' to note units specified */
348481ad6265SDimitry Andric extern bool __kmp_wpolicy_passive; /* explicitly set passive wait policy */
34855f757f3fSDimitry Andric
34865f757f3fSDimitry Andric // Convert raw blocktime from ms to us if needed.
__kmp_aux_convert_blocktime(int * bt)34875f757f3fSDimitry Andric static inline void __kmp_aux_convert_blocktime(int *bt) {
34885f757f3fSDimitry Andric if (__kmp_blocktime_units == 'm') {
34895f757f3fSDimitry Andric if (*bt > INT_MAX / 1000) {
34905f757f3fSDimitry Andric *bt = INT_MAX / 1000;
34915f757f3fSDimitry Andric KMP_INFORM(MaxValueUsing, "kmp_set_blocktime(ms)", bt);
34925f757f3fSDimitry Andric }
34935f757f3fSDimitry Andric *bt = *bt * 1000;
34945f757f3fSDimitry Andric }
34955f757f3fSDimitry Andric }
34965f757f3fSDimitry Andric
34970b57cec5SDimitry Andric #if KMP_USE_MONITOR
34980b57cec5SDimitry Andric extern int
34990b57cec5SDimitry Andric __kmp_monitor_wakeups; /* number of times monitor wakes up per second */
35000b57cec5SDimitry Andric extern int __kmp_bt_intervals; /* number of monitor timestamp intervals before
35010b57cec5SDimitry Andric blocking */
35020b57cec5SDimitry Andric #endif
35030b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME
35040b57cec5SDimitry Andric extern int __kmp_zero_bt; /* whether blocktime has been forced to zero */
35050b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */
35060b57cec5SDimitry Andric #ifdef KMP_DFLT_NTH_CORES
35070b57cec5SDimitry Andric extern int __kmp_ncores; /* Total number of cores for threads placement */
35080b57cec5SDimitry Andric #endif
35090b57cec5SDimitry Andric /* Number of millisecs to delay on abort for Intel(R) VTune(TM) tools */
35100b57cec5SDimitry Andric extern int __kmp_abort_delay;
35110b57cec5SDimitry Andric
35120b57cec5SDimitry Andric extern int __kmp_need_register_atfork_specified;
3513fe6060f1SDimitry Andric extern int __kmp_need_register_atfork; /* At initialization, call pthread_atfork
3514fe6060f1SDimitry Andric to install fork handler */
35150b57cec5SDimitry Andric extern int __kmp_gtid_mode; /* Method of getting gtid, values:
35160b57cec5SDimitry Andric 0 - not set, will be set at runtime
35170b57cec5SDimitry Andric 1 - using stack search
35180b57cec5SDimitry Andric 2 - dynamic TLS (pthread_getspecific(Linux* OS/OS
35190b57cec5SDimitry Andric X*) or TlsGetValue(Windows* OS))
35200b57cec5SDimitry Andric 3 - static TLS (__declspec(thread) __kmp_gtid),
35210b57cec5SDimitry Andric Linux* OS .so only. */
35220b57cec5SDimitry Andric extern int
35230b57cec5SDimitry Andric __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */
35240b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID
35250b57cec5SDimitry Andric extern KMP_THREAD_LOCAL int __kmp_gtid;
35260b57cec5SDimitry Andric #endif
35270b57cec5SDimitry Andric extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */
35280b57cec5SDimitry Andric extern int __kmp_foreign_tp; // If true, separate TP var for each foreign thread
35290b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
35300b57cec5SDimitry Andric extern int __kmp_inherit_fp_control; // copy fp creg(s) parent->workers at fork
35310b57cec5SDimitry Andric extern kmp_int16 __kmp_init_x87_fpu_control_word; // init thread's FP ctrl reg
35320b57cec5SDimitry Andric extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */
35330b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
35340b57cec5SDimitry Andric
35350b57cec5SDimitry Andric // max_active_levels for nested parallelism enabled by default via
35360b57cec5SDimitry Andric // OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS, and OMP_PROC_BIND
35370b57cec5SDimitry Andric extern int __kmp_dflt_max_active_levels;
35380b57cec5SDimitry Andric // Indicates whether value of __kmp_dflt_max_active_levels was already
35390b57cec5SDimitry Andric // explicitly set by OMP_MAX_ACTIVE_LEVELS or OMP_NESTED=false
35400b57cec5SDimitry Andric extern bool __kmp_dflt_max_active_levels_set;
35410b57cec5SDimitry Andric extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in
35420b57cec5SDimitry Andric concurrent execution per team */
35430b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS
35440b57cec5SDimitry Andric extern int __kmp_hot_teams_mode;
35450b57cec5SDimitry Andric extern int __kmp_hot_teams_max_level;
35460b57cec5SDimitry Andric #endif
35470b57cec5SDimitry Andric
35480b57cec5SDimitry Andric #if KMP_OS_LINUX
35490b57cec5SDimitry Andric extern enum clock_function_type __kmp_clock_function;
35500b57cec5SDimitry Andric extern int __kmp_clock_function_param;
35510b57cec5SDimitry Andric #endif /* KMP_OS_LINUX */
35520b57cec5SDimitry Andric
35530b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED
35540b57cec5SDimitry Andric extern enum mic_type __kmp_mic_type;
35550b57cec5SDimitry Andric #endif
35560b57cec5SDimitry Andric
35570b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE
35580b57cec5SDimitry Andric extern double __kmp_load_balance_interval; // load balance algorithm interval
35590b57cec5SDimitry Andric #endif /* USE_LOAD_BALANCE */
35600b57cec5SDimitry Andric
35610b57cec5SDimitry Andric #if KMP_USE_ADAPTIVE_LOCKS
35620b57cec5SDimitry Andric
35630b57cec5SDimitry Andric // Parameters for the speculative lock backoff system.
35640b57cec5SDimitry Andric struct kmp_adaptive_backoff_params_t {
35650b57cec5SDimitry Andric // Number of soft retries before it counts as a hard retry.
35660b57cec5SDimitry Andric kmp_uint32 max_soft_retries;
35670b57cec5SDimitry Andric // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to
35680b57cec5SDimitry Andric // the right
35690b57cec5SDimitry Andric kmp_uint32 max_badness;
35700b57cec5SDimitry Andric };
35710b57cec5SDimitry Andric
35720b57cec5SDimitry Andric extern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params;
35730b57cec5SDimitry Andric
35740b57cec5SDimitry Andric #if KMP_DEBUG_ADAPTIVE_LOCKS
35750b57cec5SDimitry Andric extern const char *__kmp_speculative_statsfile;
35760b57cec5SDimitry Andric #endif
35770b57cec5SDimitry Andric
35780b57cec5SDimitry Andric #endif // KMP_USE_ADAPTIVE_LOCKS
35790b57cec5SDimitry Andric
35800b57cec5SDimitry Andric extern int __kmp_display_env; /* TRUE or FALSE */
35810b57cec5SDimitry Andric extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */
35820b57cec5SDimitry Andric extern int __kmp_omp_cancellation; /* TRUE or FALSE */
3583fe6060f1SDimitry Andric extern int __kmp_nteams;
3584fe6060f1SDimitry Andric extern int __kmp_teams_thread_limit;
35850b57cec5SDimitry Andric
35860b57cec5SDimitry Andric /* ------------------------------------------------------------------------- */
35870b57cec5SDimitry Andric
35880b57cec5SDimitry Andric /* the following are protected by the fork/join lock */
35890b57cec5SDimitry Andric /* write: lock read: anytime */
35900b57cec5SDimitry Andric extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */
359181ad6265SDimitry Andric /* Holds old arrays of __kmp_threads until library shutdown */
359281ad6265SDimitry Andric extern kmp_old_threads_list_t *__kmp_old_threads_list;
35930b57cec5SDimitry Andric /* read/write: lock */
35940b57cec5SDimitry Andric extern volatile kmp_team_t *__kmp_team_pool;
35950b57cec5SDimitry Andric extern volatile kmp_info_t *__kmp_thread_pool;
35960b57cec5SDimitry Andric extern kmp_info_t *__kmp_thread_pool_insert_pt;
35970b57cec5SDimitry Andric
35980b57cec5SDimitry Andric // total num threads reachable from some root thread including all root threads
35990b57cec5SDimitry Andric extern volatile int __kmp_nth;
36000b57cec5SDimitry Andric /* total number of threads reachable from some root thread including all root
36010b57cec5SDimitry Andric threads, and those in the thread pool */
36020b57cec5SDimitry Andric extern volatile int __kmp_all_nth;
36030b57cec5SDimitry Andric extern std::atomic<int> __kmp_thread_pool_active_nth;
36040b57cec5SDimitry Andric
36050b57cec5SDimitry Andric extern kmp_root_t **__kmp_root; /* root of thread hierarchy */
36060b57cec5SDimitry Andric /* end data protected by fork/join lock */
36070b57cec5SDimitry Andric /* ------------------------------------------------------------------------- */
36080b57cec5SDimitry Andric
36090b57cec5SDimitry Andric #define __kmp_get_gtid() __kmp_get_global_thread_id()
36100b57cec5SDimitry Andric #define __kmp_entry_gtid() __kmp_get_global_thread_id_reg()
36110b57cec5SDimitry Andric #define __kmp_get_tid() (__kmp_tid_from_gtid(__kmp_get_gtid()))
36120b57cec5SDimitry Andric #define __kmp_get_team() (__kmp_threads[(__kmp_get_gtid())]->th.th_team)
36130b57cec5SDimitry Andric #define __kmp_get_thread() (__kmp_thread_from_gtid(__kmp_get_gtid()))
36140b57cec5SDimitry Andric
36150b57cec5SDimitry Andric // AT: Which way is correct?
36160b57cec5SDimitry Andric // AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc;
36170b57cec5SDimitry Andric // AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc;
36180b57cec5SDimitry Andric #define __kmp_get_team_num_threads(gtid) \
36190b57cec5SDimitry Andric (__kmp_threads[(gtid)]->th.th_team->t.t_nproc)
36200b57cec5SDimitry Andric
KMP_UBER_GTID(int gtid)36210b57cec5SDimitry Andric static inline bool KMP_UBER_GTID(int gtid) {
36220b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= KMP_GTID_MIN);
36230b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid < __kmp_threads_capacity);
36240b57cec5SDimitry Andric return (gtid >= 0 && __kmp_root[gtid] && __kmp_threads[gtid] &&
36250b57cec5SDimitry Andric __kmp_threads[gtid] == __kmp_root[gtid]->r.r_uber_thread);
36260b57cec5SDimitry Andric }
36270b57cec5SDimitry Andric
__kmp_tid_from_gtid(int gtid)36280b57cec5SDimitry Andric static inline int __kmp_tid_from_gtid(int gtid) {
36290b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0);
36300b57cec5SDimitry Andric return __kmp_threads[gtid]->th.th_info.ds.ds_tid;
36310b57cec5SDimitry Andric }
36320b57cec5SDimitry Andric
__kmp_gtid_from_tid(int tid,const kmp_team_t * team)36330b57cec5SDimitry Andric static inline int __kmp_gtid_from_tid(int tid, const kmp_team_t *team) {
36340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(tid >= 0 && team);
36350b57cec5SDimitry Andric return team->t.t_threads[tid]->th.th_info.ds.ds_gtid;
36360b57cec5SDimitry Andric }
36370b57cec5SDimitry Andric
__kmp_gtid_from_thread(const kmp_info_t * thr)36380b57cec5SDimitry Andric static inline int __kmp_gtid_from_thread(const kmp_info_t *thr) {
36390b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr);
36400b57cec5SDimitry Andric return thr->th.th_info.ds.ds_gtid;
36410b57cec5SDimitry Andric }
36420b57cec5SDimitry Andric
__kmp_thread_from_gtid(int gtid)36430b57cec5SDimitry Andric static inline kmp_info_t *__kmp_thread_from_gtid(int gtid) {
36440b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0);
36450b57cec5SDimitry Andric return __kmp_threads[gtid];
36460b57cec5SDimitry Andric }
36470b57cec5SDimitry Andric
__kmp_team_from_gtid(int gtid)36480b57cec5SDimitry Andric static inline kmp_team_t *__kmp_team_from_gtid(int gtid) {
36490b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0);
36500b57cec5SDimitry Andric return __kmp_threads[gtid]->th.th_team;
36510b57cec5SDimitry Andric }
36520b57cec5SDimitry Andric
__kmp_assert_valid_gtid(kmp_int32 gtid)3653e8d8bef9SDimitry Andric static inline void __kmp_assert_valid_gtid(kmp_int32 gtid) {
3654e8d8bef9SDimitry Andric if (UNLIKELY(gtid < 0 || gtid >= __kmp_threads_capacity))
3655e8d8bef9SDimitry Andric KMP_FATAL(ThreadIdentInvalid);
3656e8d8bef9SDimitry Andric }
3657e8d8bef9SDimitry Andric
3658e8d8bef9SDimitry Andric #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
3659e8d8bef9SDimitry Andric extern int __kmp_user_level_mwait; // TRUE or FALSE; from KMP_USER_LEVEL_MWAIT
3660e8d8bef9SDimitry Andric extern int __kmp_umwait_enabled; // Runtime check if user-level mwait enabled
3661e8d8bef9SDimitry Andric extern int __kmp_mwait_enabled; // Runtime check if ring3 mwait is enabled
3662e8d8bef9SDimitry Andric extern int __kmp_mwait_hints; // Hints to pass in to mwait
3663e8d8bef9SDimitry Andric #endif
3664e8d8bef9SDimitry Andric
366504eeddc0SDimitry Andric #if KMP_HAVE_UMWAIT
366604eeddc0SDimitry Andric extern int __kmp_waitpkg_enabled; // Runtime check if waitpkg exists
366704eeddc0SDimitry Andric extern int __kmp_tpause_state; // 0 (default), 1=C0.1, 2=C0.2; from KMP_TPAUSE
366804eeddc0SDimitry Andric extern int __kmp_tpause_hint; // 1=C0.1 (default), 0=C0.2; from KMP_TPAUSE
366904eeddc0SDimitry Andric extern int __kmp_tpause_enabled; // 0 (default), 1 (KMP_TPAUSE is non-zero)
367004eeddc0SDimitry Andric #endif
367104eeddc0SDimitry Andric
36720b57cec5SDimitry Andric /* ------------------------------------------------------------------------- */
36730b57cec5SDimitry Andric
36740b57cec5SDimitry Andric extern kmp_global_t __kmp_global; /* global status */
36750b57cec5SDimitry Andric
36760b57cec5SDimitry Andric extern kmp_info_t __kmp_monitor;
36770b57cec5SDimitry Andric // For Debugging Support Library
36780b57cec5SDimitry Andric extern std::atomic<kmp_int32> __kmp_team_counter;
36790b57cec5SDimitry Andric // For Debugging Support Library
36800b57cec5SDimitry Andric extern std::atomic<kmp_int32> __kmp_task_counter;
36810b57cec5SDimitry Andric
36820b57cec5SDimitry Andric #if USE_DEBUGGER
36830b57cec5SDimitry Andric #define _KMP_GEN_ID(counter) \
36840b57cec5SDimitry Andric (__kmp_debugging ? KMP_ATOMIC_INC(&counter) + 1 : ~0)
36850b57cec5SDimitry Andric #else
36860b57cec5SDimitry Andric #define _KMP_GEN_ID(counter) (~0)
36870b57cec5SDimitry Andric #endif /* USE_DEBUGGER */
36880b57cec5SDimitry Andric
36890b57cec5SDimitry Andric #define KMP_GEN_TASK_ID() _KMP_GEN_ID(__kmp_task_counter)
36900b57cec5SDimitry Andric #define KMP_GEN_TEAM_ID() _KMP_GEN_ID(__kmp_team_counter)
36910b57cec5SDimitry Andric
36920b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
36930b57cec5SDimitry Andric
36940b57cec5SDimitry Andric extern void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2,
36950b57cec5SDimitry Andric size_t size, char const *format, ...);
36960b57cec5SDimitry Andric
36970b57cec5SDimitry Andric extern void __kmp_serial_initialize(void);
36980b57cec5SDimitry Andric extern void __kmp_middle_initialize(void);
36990b57cec5SDimitry Andric extern void __kmp_parallel_initialize(void);
37000b57cec5SDimitry Andric
37010b57cec5SDimitry Andric extern void __kmp_internal_begin(void);
37020b57cec5SDimitry Andric extern void __kmp_internal_end_library(int gtid);
37030b57cec5SDimitry Andric extern void __kmp_internal_end_thread(int gtid);
37040b57cec5SDimitry Andric extern void __kmp_internal_end_atexit(void);
37050b57cec5SDimitry Andric extern void __kmp_internal_end_dtor(void);
37060b57cec5SDimitry Andric extern void __kmp_internal_end_dest(void *);
37070b57cec5SDimitry Andric
37080b57cec5SDimitry Andric extern int __kmp_register_root(int initial_thread);
37090b57cec5SDimitry Andric extern void __kmp_unregister_root(int gtid);
3710e8d8bef9SDimitry Andric extern void __kmp_unregister_library(void); // called by __kmp_internal_end()
37110b57cec5SDimitry Andric
37120b57cec5SDimitry Andric extern int __kmp_ignore_mppbeg(void);
37130b57cec5SDimitry Andric extern int __kmp_ignore_mppend(void);
37140b57cec5SDimitry Andric
37150b57cec5SDimitry Andric extern int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws);
37160b57cec5SDimitry Andric extern void __kmp_exit_single(int gtid);
37170b57cec5SDimitry Andric
37180b57cec5SDimitry Andric extern void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);
37190b57cec5SDimitry Andric extern void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);
37200b57cec5SDimitry Andric
37210b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE
37220b57cec5SDimitry Andric extern int __kmp_get_load_balance(int);
37230b57cec5SDimitry Andric #endif
37240b57cec5SDimitry Andric
37250b57cec5SDimitry Andric extern int __kmp_get_global_thread_id(void);
37260b57cec5SDimitry Andric extern int __kmp_get_global_thread_id_reg(void);
37270b57cec5SDimitry Andric extern void __kmp_exit_thread(int exit_status);
37280b57cec5SDimitry Andric extern void __kmp_abort(char const *format, ...);
37290b57cec5SDimitry Andric extern void __kmp_abort_thread(void);
37300b57cec5SDimitry Andric KMP_NORETURN extern void __kmp_abort_process(void);
37310b57cec5SDimitry Andric extern void __kmp_warn(char const *format, ...);
37320b57cec5SDimitry Andric
37330b57cec5SDimitry Andric extern void __kmp_set_num_threads(int new_nth, int gtid);
37340b57cec5SDimitry Andric
37355f757f3fSDimitry Andric extern bool __kmp_detect_shm();
37365f757f3fSDimitry Andric extern bool __kmp_detect_tmp();
37375f757f3fSDimitry Andric
37380b57cec5SDimitry Andric // Returns current thread (pointer to kmp_info_t). Current thread *must* be
37390b57cec5SDimitry Andric // registered.
__kmp_entry_thread()37400b57cec5SDimitry Andric static inline kmp_info_t *__kmp_entry_thread() {
37410b57cec5SDimitry Andric int gtid = __kmp_entry_gtid();
37420b57cec5SDimitry Andric
37430b57cec5SDimitry Andric return __kmp_threads[gtid];
37440b57cec5SDimitry Andric }
37450b57cec5SDimitry Andric
37460b57cec5SDimitry Andric extern void __kmp_set_max_active_levels(int gtid, int new_max_active_levels);
37470b57cec5SDimitry Andric extern int __kmp_get_max_active_levels(int gtid);
37480b57cec5SDimitry Andric extern int __kmp_get_ancestor_thread_num(int gtid, int level);
37490b57cec5SDimitry Andric extern int __kmp_get_team_size(int gtid, int level);
37500b57cec5SDimitry Andric extern void __kmp_set_schedule(int gtid, kmp_sched_t new_sched, int chunk);
37510b57cec5SDimitry Andric extern void __kmp_get_schedule(int gtid, kmp_sched_t *sched, int *chunk);
37520b57cec5SDimitry Andric
37530b57cec5SDimitry Andric extern unsigned short __kmp_get_random(kmp_info_t *thread);
37540b57cec5SDimitry Andric extern void __kmp_init_random(kmp_info_t *thread);
37550b57cec5SDimitry Andric
37560b57cec5SDimitry Andric extern kmp_r_sched_t __kmp_get_schedule_global(void);
37570b57cec5SDimitry Andric extern void __kmp_adjust_num_threads(int new_nproc);
37580b57cec5SDimitry Andric extern void __kmp_check_stksize(size_t *val);
37590b57cec5SDimitry Andric
37600b57cec5SDimitry Andric extern void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL);
37610b57cec5SDimitry Andric extern void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL);
37620b57cec5SDimitry Andric extern void ___kmp_free(void *ptr KMP_SRC_LOC_DECL);
37630b57cec5SDimitry Andric #define __kmp_allocate(size) ___kmp_allocate((size)KMP_SRC_LOC_CURR)
37640b57cec5SDimitry Andric #define __kmp_page_allocate(size) ___kmp_page_allocate((size)KMP_SRC_LOC_CURR)
37650b57cec5SDimitry Andric #define __kmp_free(ptr) ___kmp_free((ptr)KMP_SRC_LOC_CURR)
37660b57cec5SDimitry Andric
37670b57cec5SDimitry Andric #if USE_FAST_MEMORY
37680b57cec5SDimitry Andric extern void *___kmp_fast_allocate(kmp_info_t *this_thr,
37690b57cec5SDimitry Andric size_t size KMP_SRC_LOC_DECL);
37700b57cec5SDimitry Andric extern void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL);
37710b57cec5SDimitry Andric extern void __kmp_free_fast_memory(kmp_info_t *this_thr);
37720b57cec5SDimitry Andric extern void __kmp_initialize_fast_memory(kmp_info_t *this_thr);
37730b57cec5SDimitry Andric #define __kmp_fast_allocate(this_thr, size) \
37740b57cec5SDimitry Andric ___kmp_fast_allocate((this_thr), (size)KMP_SRC_LOC_CURR)
37750b57cec5SDimitry Andric #define __kmp_fast_free(this_thr, ptr) \
37760b57cec5SDimitry Andric ___kmp_fast_free((this_thr), (ptr)KMP_SRC_LOC_CURR)
37770b57cec5SDimitry Andric #endif
37780b57cec5SDimitry Andric
37790b57cec5SDimitry Andric extern void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL);
37800b57cec5SDimitry Andric extern void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
37810b57cec5SDimitry Andric size_t elsize KMP_SRC_LOC_DECL);
37820b57cec5SDimitry Andric extern void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
37830b57cec5SDimitry Andric size_t size KMP_SRC_LOC_DECL);
37840b57cec5SDimitry Andric extern void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL);
37850b57cec5SDimitry Andric #define __kmp_thread_malloc(th, size) \
37860b57cec5SDimitry Andric ___kmp_thread_malloc((th), (size)KMP_SRC_LOC_CURR)
37870b57cec5SDimitry Andric #define __kmp_thread_calloc(th, nelem, elsize) \
37880b57cec5SDimitry Andric ___kmp_thread_calloc((th), (nelem), (elsize)KMP_SRC_LOC_CURR)
37890b57cec5SDimitry Andric #define __kmp_thread_realloc(th, ptr, size) \
37900b57cec5SDimitry Andric ___kmp_thread_realloc((th), (ptr), (size)KMP_SRC_LOC_CURR)
37910b57cec5SDimitry Andric #define __kmp_thread_free(th, ptr) \
37920b57cec5SDimitry Andric ___kmp_thread_free((th), (ptr)KMP_SRC_LOC_CURR)
37930b57cec5SDimitry Andric
37940b57cec5SDimitry Andric extern void __kmp_push_num_threads(ident_t *loc, int gtid, int num_threads);
3795*0fca6ea1SDimitry Andric extern void __kmp_push_num_threads_list(ident_t *loc, int gtid,
3796*0fca6ea1SDimitry Andric kmp_uint32 list_length,
3797*0fca6ea1SDimitry Andric int *num_threads_list);
3798*0fca6ea1SDimitry Andric extern void __kmp_set_strict_num_threads(ident_t *loc, int gtid, int sev,
3799*0fca6ea1SDimitry Andric const char *msg);
38000b57cec5SDimitry Andric
38010b57cec5SDimitry Andric extern void __kmp_push_proc_bind(ident_t *loc, int gtid,
38020b57cec5SDimitry Andric kmp_proc_bind_t proc_bind);
38030b57cec5SDimitry Andric extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams,
38040b57cec5SDimitry Andric int num_threads);
3805fe6060f1SDimitry Andric extern void __kmp_push_num_teams_51(ident_t *loc, int gtid, int num_teams_lb,
3806fe6060f1SDimitry Andric int num_teams_ub, int num_threads);
38070b57cec5SDimitry Andric
38080b57cec5SDimitry Andric extern void __kmp_yield();
38090b57cec5SDimitry Andric
38100b57cec5SDimitry Andric extern void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
38110b57cec5SDimitry Andric enum sched_type schedule, kmp_int32 lb,
38120b57cec5SDimitry Andric kmp_int32 ub, kmp_int32 st, kmp_int32 chunk);
38130b57cec5SDimitry Andric extern void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
38140b57cec5SDimitry Andric enum sched_type schedule, kmp_uint32 lb,
38150b57cec5SDimitry Andric kmp_uint32 ub, kmp_int32 st,
38160b57cec5SDimitry Andric kmp_int32 chunk);
38170b57cec5SDimitry Andric extern void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
38180b57cec5SDimitry Andric enum sched_type schedule, kmp_int64 lb,
38190b57cec5SDimitry Andric kmp_int64 ub, kmp_int64 st, kmp_int64 chunk);
38200b57cec5SDimitry Andric extern void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
38210b57cec5SDimitry Andric enum sched_type schedule, kmp_uint64 lb,
38220b57cec5SDimitry Andric kmp_uint64 ub, kmp_int64 st,
38230b57cec5SDimitry Andric kmp_int64 chunk);
38240b57cec5SDimitry Andric
38250b57cec5SDimitry Andric extern int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid,
38260b57cec5SDimitry Andric kmp_int32 *p_last, kmp_int32 *p_lb,
38270b57cec5SDimitry Andric kmp_int32 *p_ub, kmp_int32 *p_st);
38280b57cec5SDimitry Andric extern int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid,
38290b57cec5SDimitry Andric kmp_int32 *p_last, kmp_uint32 *p_lb,
38300b57cec5SDimitry Andric kmp_uint32 *p_ub, kmp_int32 *p_st);
38310b57cec5SDimitry Andric extern int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid,
38320b57cec5SDimitry Andric kmp_int32 *p_last, kmp_int64 *p_lb,
38330b57cec5SDimitry Andric kmp_int64 *p_ub, kmp_int64 *p_st);
38340b57cec5SDimitry Andric extern int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid,
38350b57cec5SDimitry Andric kmp_int32 *p_last, kmp_uint64 *p_lb,
38360b57cec5SDimitry Andric kmp_uint64 *p_ub, kmp_int64 *p_st);
38370b57cec5SDimitry Andric
38380b57cec5SDimitry Andric extern void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid);
38390b57cec5SDimitry Andric extern void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid);
38400b57cec5SDimitry Andric extern void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid);
38410b57cec5SDimitry Andric extern void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid);
38420b57cec5SDimitry Andric
3843*0fca6ea1SDimitry Andric extern void __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 gtid);
3844*0fca6ea1SDimitry Andric
38450b57cec5SDimitry Andric #ifdef KMP_GOMP_COMPAT
38460b57cec5SDimitry Andric
38470b57cec5SDimitry Andric extern void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
38480b57cec5SDimitry Andric enum sched_type schedule, kmp_int32 lb,
38490b57cec5SDimitry Andric kmp_int32 ub, kmp_int32 st,
38500b57cec5SDimitry Andric kmp_int32 chunk, int push_ws);
38510b57cec5SDimitry Andric extern void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
38520b57cec5SDimitry Andric enum sched_type schedule, kmp_uint32 lb,
38530b57cec5SDimitry Andric kmp_uint32 ub, kmp_int32 st,
38540b57cec5SDimitry Andric kmp_int32 chunk, int push_ws);
38550b57cec5SDimitry Andric extern void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
38560b57cec5SDimitry Andric enum sched_type schedule, kmp_int64 lb,
38570b57cec5SDimitry Andric kmp_int64 ub, kmp_int64 st,
38580b57cec5SDimitry Andric kmp_int64 chunk, int push_ws);
38590b57cec5SDimitry Andric extern void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
38600b57cec5SDimitry Andric enum sched_type schedule, kmp_uint64 lb,
38610b57cec5SDimitry Andric kmp_uint64 ub, kmp_int64 st,
38620b57cec5SDimitry Andric kmp_int64 chunk, int push_ws);
38630b57cec5SDimitry Andric extern void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid);
38640b57cec5SDimitry Andric extern void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid);
38650b57cec5SDimitry Andric extern void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid);
38660b57cec5SDimitry Andric extern void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid);
38670b57cec5SDimitry Andric
38680b57cec5SDimitry Andric #endif /* KMP_GOMP_COMPAT */
38690b57cec5SDimitry Andric
38700b57cec5SDimitry Andric extern kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker);
38710b57cec5SDimitry Andric extern kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker);
38720b57cec5SDimitry Andric extern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker);
38730b57cec5SDimitry Andric extern kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker);
38740b57cec5SDimitry Andric extern kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker);
38750b57cec5SDimitry Andric extern kmp_uint32 __kmp_wait_4(kmp_uint32 volatile *spinner, kmp_uint32 checker,
38760b57cec5SDimitry Andric kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
38770b57cec5SDimitry Andric void *obj);
38780b57cec5SDimitry Andric extern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
38790b57cec5SDimitry Andric kmp_uint32 (*pred)(void *, kmp_uint32), void *obj);
38800b57cec5SDimitry Andric
3881e8d8bef9SDimitry Andric extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64<> *flag,
38820b57cec5SDimitry Andric int final_spin
38830b57cec5SDimitry Andric #if USE_ITT_BUILD
38840b57cec5SDimitry Andric ,
38850b57cec5SDimitry Andric void *itt_sync_obj
38860b57cec5SDimitry Andric #endif
38870b57cec5SDimitry Andric );
3888e8d8bef9SDimitry Andric extern void __kmp_release_64(kmp_flag_64<> *flag);
38890b57cec5SDimitry Andric
38900b57cec5SDimitry Andric extern void __kmp_infinite_loop(void);
38910b57cec5SDimitry Andric
38920b57cec5SDimitry Andric extern void __kmp_cleanup(void);
38930b57cec5SDimitry Andric
38940b57cec5SDimitry Andric #if KMP_HANDLE_SIGNALS
38950b57cec5SDimitry Andric extern int __kmp_handle_signals;
38960b57cec5SDimitry Andric extern void __kmp_install_signals(int parallel_init);
38970b57cec5SDimitry Andric extern void __kmp_remove_signals(void);
38980b57cec5SDimitry Andric #endif
38990b57cec5SDimitry Andric
39000b57cec5SDimitry Andric extern void __kmp_clear_system_time(void);
39010b57cec5SDimitry Andric extern void __kmp_read_system_time(double *delta);
39020b57cec5SDimitry Andric
39030b57cec5SDimitry Andric extern void __kmp_check_stack_overlap(kmp_info_t *thr);
39040b57cec5SDimitry Andric
39050b57cec5SDimitry Andric extern void __kmp_expand_host_name(char *buffer, size_t size);
39060b57cec5SDimitry Andric extern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern);
39070b57cec5SDimitry Andric
3908bdd1243dSDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 || (KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM))
39090b57cec5SDimitry Andric extern void
39100b57cec5SDimitry Andric __kmp_initialize_system_tick(void); /* Initialize timer tick value */
39110b57cec5SDimitry Andric #endif
39120b57cec5SDimitry Andric
39130b57cec5SDimitry Andric extern void
39140b57cec5SDimitry Andric __kmp_runtime_initialize(void); /* machine specific initialization */
39150b57cec5SDimitry Andric extern void __kmp_runtime_destroy(void);
39160b57cec5SDimitry Andric
39170b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
39180b57cec5SDimitry Andric extern char *__kmp_affinity_print_mask(char *buf, int buf_len,
39190b57cec5SDimitry Andric kmp_affin_mask_t *mask);
39200b57cec5SDimitry Andric extern kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
39210b57cec5SDimitry Andric kmp_affin_mask_t *mask);
3922bdd1243dSDimitry Andric extern void __kmp_affinity_initialize(kmp_affinity_t &affinity);
39230b57cec5SDimitry Andric extern void __kmp_affinity_uninitialize(void);
39240b57cec5SDimitry Andric extern void __kmp_affinity_set_init_mask(
39250b57cec5SDimitry Andric int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */
39265f757f3fSDimitry Andric void __kmp_affinity_bind_init_mask(int gtid);
39275f757f3fSDimitry Andric extern void __kmp_affinity_bind_place(int gtid);
39280b57cec5SDimitry Andric extern void __kmp_affinity_determine_capable(const char *env_var);
39290b57cec5SDimitry Andric extern int __kmp_aux_set_affinity(void **mask);
39300b57cec5SDimitry Andric extern int __kmp_aux_get_affinity(void **mask);
39310b57cec5SDimitry Andric extern int __kmp_aux_get_affinity_max_proc();
39320b57cec5SDimitry Andric extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask);
39330b57cec5SDimitry Andric extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask);
39340b57cec5SDimitry Andric extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask);
39350b57cec5SDimitry Andric extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size);
39365f757f3fSDimitry Andric #if KMP_WEIGHTED_ITERATIONS_SUPPORTED
39375f757f3fSDimitry Andric extern int __kmp_get_first_osid_with_ecore(void);
39385f757f3fSDimitry Andric #endif
3939*0fca6ea1SDimitry Andric #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY || \
3940*0fca6ea1SDimitry Andric KMP_OS_AIX
39410b57cec5SDimitry Andric extern int kmp_set_thread_affinity_mask_initial(void);
39420b57cec5SDimitry Andric #endif
__kmp_assign_root_init_mask()3943fe6060f1SDimitry Andric static inline void __kmp_assign_root_init_mask() {
3944fe6060f1SDimitry Andric int gtid = __kmp_entry_gtid();
3945fe6060f1SDimitry Andric kmp_root_t *r = __kmp_threads[gtid]->th.th_root;
3946fe6060f1SDimitry Andric if (r->r.r_uber_thread == __kmp_threads[gtid] && !r->r.r_affinity_assigned) {
39475f757f3fSDimitry Andric __kmp_affinity_set_init_mask(gtid, /*isa_root=*/TRUE);
39485f757f3fSDimitry Andric __kmp_affinity_bind_init_mask(gtid);
3949fe6060f1SDimitry Andric r->r.r_affinity_assigned = TRUE;
3950fe6060f1SDimitry Andric }
3951fe6060f1SDimitry Andric }
__kmp_reset_root_init_mask(int gtid)3952fcaf7f86SDimitry Andric static inline void __kmp_reset_root_init_mask(int gtid) {
3953bdd1243dSDimitry Andric if (!KMP_AFFINITY_CAPABLE())
3954bdd1243dSDimitry Andric return;
3955fcaf7f86SDimitry Andric kmp_info_t *th = __kmp_threads[gtid];
3956fcaf7f86SDimitry Andric kmp_root_t *r = th->th.th_root;
3957fcaf7f86SDimitry Andric if (r->r.r_uber_thread == th && r->r.r_affinity_assigned) {
3958fcaf7f86SDimitry Andric __kmp_set_system_affinity(__kmp_affin_origMask, FALSE);
3959fcaf7f86SDimitry Andric KMP_CPU_COPY(th->th.th_affin_mask, __kmp_affin_origMask);
3960fcaf7f86SDimitry Andric r->r.r_affinity_assigned = FALSE;
3961fcaf7f86SDimitry Andric }
3962fcaf7f86SDimitry Andric }
3963fe6060f1SDimitry Andric #else /* KMP_AFFINITY_SUPPORTED */
3964fe6060f1SDimitry Andric #define __kmp_assign_root_init_mask() /* Nothing */
__kmp_reset_root_init_mask(int gtid)3965fcaf7f86SDimitry Andric static inline void __kmp_reset_root_init_mask(int gtid) {}
39660b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */
39670b57cec5SDimitry Andric // No need for KMP_AFFINITY_SUPPORTED guard as only one field in the
39680b57cec5SDimitry Andric // format string is for affinity, so platforms that do not support
39690b57cec5SDimitry Andric // affinity can still use the other fields, e.g., %n for num_threads
39700b57cec5SDimitry Andric extern size_t __kmp_aux_capture_affinity(int gtid, const char *format,
39710b57cec5SDimitry Andric kmp_str_buf_t *buffer);
39720b57cec5SDimitry Andric extern void __kmp_aux_display_affinity(int gtid, const char *format);
39730b57cec5SDimitry Andric
39740b57cec5SDimitry Andric extern void __kmp_cleanup_hierarchy();
39750b57cec5SDimitry Andric extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar);
39760b57cec5SDimitry Andric
39770b57cec5SDimitry Andric #if KMP_USE_FUTEX
39780b57cec5SDimitry Andric
39790b57cec5SDimitry Andric extern int __kmp_futex_determine_capable(void);
39800b57cec5SDimitry Andric
39810b57cec5SDimitry Andric #endif // KMP_USE_FUTEX
39820b57cec5SDimitry Andric
39830b57cec5SDimitry Andric extern void __kmp_gtid_set_specific(int gtid);
39840b57cec5SDimitry Andric extern int __kmp_gtid_get_specific(void);
39850b57cec5SDimitry Andric
39860b57cec5SDimitry Andric extern double __kmp_read_cpu_time(void);
39870b57cec5SDimitry Andric
39880b57cec5SDimitry Andric extern int __kmp_read_system_info(struct kmp_sys_info *info);
39890b57cec5SDimitry Andric
39900b57cec5SDimitry Andric #if KMP_USE_MONITOR
39910b57cec5SDimitry Andric extern void __kmp_create_monitor(kmp_info_t *th);
39920b57cec5SDimitry Andric #endif
39930b57cec5SDimitry Andric
39940b57cec5SDimitry Andric extern void *__kmp_launch_thread(kmp_info_t *thr);
39950b57cec5SDimitry Andric
39960b57cec5SDimitry Andric extern void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size);
39970b57cec5SDimitry Andric
39980b57cec5SDimitry Andric #if KMP_OS_WINDOWS
39990b57cec5SDimitry Andric extern int __kmp_still_running(kmp_info_t *th);
40000b57cec5SDimitry Andric extern int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val);
40010b57cec5SDimitry Andric extern void __kmp_free_handle(kmp_thread_t tHandle);
40020b57cec5SDimitry Andric #endif
40030b57cec5SDimitry Andric
40040b57cec5SDimitry Andric #if KMP_USE_MONITOR
40050b57cec5SDimitry Andric extern void __kmp_reap_monitor(kmp_info_t *th);
40060b57cec5SDimitry Andric #endif
40070b57cec5SDimitry Andric extern void __kmp_reap_worker(kmp_info_t *th);
40080b57cec5SDimitry Andric extern void __kmp_terminate_thread(int gtid);
40090b57cec5SDimitry Andric
40100b57cec5SDimitry Andric extern int __kmp_try_suspend_mx(kmp_info_t *th);
40110b57cec5SDimitry Andric extern void __kmp_lock_suspend_mx(kmp_info_t *th);
40120b57cec5SDimitry Andric extern void __kmp_unlock_suspend_mx(kmp_info_t *th);
40130b57cec5SDimitry Andric
40140b57cec5SDimitry Andric extern void __kmp_elapsed(double *);
40150b57cec5SDimitry Andric extern void __kmp_elapsed_tick(double *);
40160b57cec5SDimitry Andric
40170b57cec5SDimitry Andric extern void __kmp_enable(int old_state);
40180b57cec5SDimitry Andric extern void __kmp_disable(int *old_state);
40190b57cec5SDimitry Andric
40200b57cec5SDimitry Andric extern void __kmp_thread_sleep(int millis);
40210b57cec5SDimitry Andric
40220b57cec5SDimitry Andric extern void __kmp_common_initialize(void);
40230b57cec5SDimitry Andric extern void __kmp_common_destroy(void);
40240b57cec5SDimitry Andric extern void __kmp_common_destroy_gtid(int gtid);
40250b57cec5SDimitry Andric
40260b57cec5SDimitry Andric #if KMP_OS_UNIX
40270b57cec5SDimitry Andric extern void __kmp_register_atfork(void);
40280b57cec5SDimitry Andric #endif
40290b57cec5SDimitry Andric extern void __kmp_suspend_initialize(void);
40300b57cec5SDimitry Andric extern void __kmp_suspend_initialize_thread(kmp_info_t *th);
40310b57cec5SDimitry Andric extern void __kmp_suspend_uninitialize_thread(kmp_info_t *th);
40320b57cec5SDimitry Andric
40330b57cec5SDimitry Andric extern kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
40340b57cec5SDimitry Andric int tid);
40350b57cec5SDimitry Andric extern kmp_team_t *
40360b57cec5SDimitry Andric __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
40370b57cec5SDimitry Andric #if OMPT_SUPPORT
40380b57cec5SDimitry Andric ompt_data_t ompt_parallel_data,
40390b57cec5SDimitry Andric #endif
40400b57cec5SDimitry Andric kmp_proc_bind_t proc_bind, kmp_internal_control_t *new_icvs,
40410b57cec5SDimitry Andric int argc USE_NESTED_HOT_ARG(kmp_info_t *thr));
40420b57cec5SDimitry Andric extern void __kmp_free_thread(kmp_info_t *);
40430b57cec5SDimitry Andric extern void __kmp_free_team(kmp_root_t *,
40440b57cec5SDimitry Andric kmp_team_t *USE_NESTED_HOT_ARG(kmp_info_t *));
40450b57cec5SDimitry Andric extern kmp_team_t *__kmp_reap_team(kmp_team_t *);
40460b57cec5SDimitry Andric
40470b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
40480b57cec5SDimitry Andric
40490b57cec5SDimitry Andric extern void __kmp_initialize_bget(kmp_info_t *th);
40500b57cec5SDimitry Andric extern void __kmp_finalize_bget(kmp_info_t *th);
40510b57cec5SDimitry Andric
40520b57cec5SDimitry Andric KMP_EXPORT void *kmpc_malloc(size_t size);
40530b57cec5SDimitry Andric KMP_EXPORT void *kmpc_aligned_malloc(size_t size, size_t alignment);
40540b57cec5SDimitry Andric KMP_EXPORT void *kmpc_calloc(size_t nelem, size_t elsize);
40550b57cec5SDimitry Andric KMP_EXPORT void *kmpc_realloc(void *ptr, size_t size);
40560b57cec5SDimitry Andric KMP_EXPORT void kmpc_free(void *ptr);
40570b57cec5SDimitry Andric
40580b57cec5SDimitry Andric /* declarations for internal use */
40590b57cec5SDimitry Andric
40600b57cec5SDimitry Andric extern int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
40610b57cec5SDimitry Andric size_t reduce_size, void *reduce_data,
40620b57cec5SDimitry Andric void (*reduce)(void *, void *));
40630b57cec5SDimitry Andric extern void __kmp_end_split_barrier(enum barrier_type bt, int gtid);
40640b57cec5SDimitry Andric extern int __kmp_barrier_gomp_cancel(int gtid);
40650b57cec5SDimitry Andric
40660b57cec5SDimitry Andric /*!
40670b57cec5SDimitry Andric * Tell the fork call which compiler generated the fork call, and therefore how
40680b57cec5SDimitry Andric * to deal with the call.
40690b57cec5SDimitry Andric */
40700b57cec5SDimitry Andric enum fork_context_e {
40710b57cec5SDimitry Andric fork_context_gnu, /**< Called from GNU generated code, so must not invoke the
40720b57cec5SDimitry Andric microtask internally. */
40730b57cec5SDimitry Andric fork_context_intel, /**< Called from Intel generated code. */
40740b57cec5SDimitry Andric fork_context_last
40750b57cec5SDimitry Andric };
40760b57cec5SDimitry Andric extern int __kmp_fork_call(ident_t *loc, int gtid,
40770b57cec5SDimitry Andric enum fork_context_e fork_context, kmp_int32 argc,
40780b57cec5SDimitry Andric microtask_t microtask, launch_t invoker,
407916794618SDimitry Andric kmp_va_list ap);
40800b57cec5SDimitry Andric
40810b57cec5SDimitry Andric extern void __kmp_join_call(ident_t *loc, int gtid
40820b57cec5SDimitry Andric #if OMPT_SUPPORT
40830b57cec5SDimitry Andric ,
40840b57cec5SDimitry Andric enum fork_context_e fork_context
40850b57cec5SDimitry Andric #endif
40860b57cec5SDimitry Andric ,
40870b57cec5SDimitry Andric int exit_teams = 0);
40880b57cec5SDimitry Andric
40890b57cec5SDimitry Andric extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid);
40900b57cec5SDimitry Andric extern void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team);
40910b57cec5SDimitry Andric extern void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team);
40920b57cec5SDimitry Andric extern int __kmp_invoke_task_func(int gtid);
40930b57cec5SDimitry Andric extern void __kmp_run_before_invoked_task(int gtid, int tid,
40940b57cec5SDimitry Andric kmp_info_t *this_thr,
40950b57cec5SDimitry Andric kmp_team_t *team);
40960b57cec5SDimitry Andric extern void __kmp_run_after_invoked_task(int gtid, int tid,
40970b57cec5SDimitry Andric kmp_info_t *this_thr,
40980b57cec5SDimitry Andric kmp_team_t *team);
40990b57cec5SDimitry Andric
41000b57cec5SDimitry Andric // should never have been exported
41010b57cec5SDimitry Andric KMP_EXPORT int __kmpc_invoke_task_func(int gtid);
41020b57cec5SDimitry Andric extern int __kmp_invoke_teams_master(int gtid);
41030b57cec5SDimitry Andric extern void __kmp_teams_master(int gtid);
41040b57cec5SDimitry Andric extern int __kmp_aux_get_team_num();
41050b57cec5SDimitry Andric extern int __kmp_aux_get_num_teams();
41060b57cec5SDimitry Andric extern void __kmp_save_internal_controls(kmp_info_t *thread);
41070b57cec5SDimitry Andric extern void __kmp_user_set_library(enum library_type arg);
41080b57cec5SDimitry Andric extern void __kmp_aux_set_library(enum library_type arg);
41090b57cec5SDimitry Andric extern void __kmp_aux_set_stacksize(size_t arg);
41100b57cec5SDimitry Andric extern void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid);
4111e8d8bef9SDimitry Andric extern void __kmp_aux_set_defaults(char const *str, size_t len);
41120b57cec5SDimitry Andric
41130b57cec5SDimitry Andric /* Functions called from __kmp_aux_env_initialize() in kmp_settings.cpp */
41140b57cec5SDimitry Andric void kmpc_set_blocktime(int arg);
41150b57cec5SDimitry Andric void ompc_set_nested(int flag);
41160b57cec5SDimitry Andric void ompc_set_dynamic(int flag);
41170b57cec5SDimitry Andric void ompc_set_num_threads(int arg);
41180b57cec5SDimitry Andric
41190b57cec5SDimitry Andric extern void __kmp_push_current_task_to_thread(kmp_info_t *this_thr,
41200b57cec5SDimitry Andric kmp_team_t *team, int tid);
41210b57cec5SDimitry Andric extern void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr);
41220b57cec5SDimitry Andric extern kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
41230b57cec5SDimitry Andric kmp_tasking_flags_t *flags,
41240b57cec5SDimitry Andric size_t sizeof_kmp_task_t,
41250b57cec5SDimitry Andric size_t sizeof_shareds,
41260b57cec5SDimitry Andric kmp_routine_entry_t task_entry);
41270b57cec5SDimitry Andric extern void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
41280b57cec5SDimitry Andric kmp_team_t *team, int tid,
41290b57cec5SDimitry Andric int set_curr_task);
41300b57cec5SDimitry Andric extern void __kmp_finish_implicit_task(kmp_info_t *this_thr);
41310b57cec5SDimitry Andric extern void __kmp_free_implicit_task(kmp_info_t *this_thr);
41320b57cec5SDimitry Andric
41330b57cec5SDimitry Andric extern kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
41340b57cec5SDimitry Andric int gtid,
41350b57cec5SDimitry Andric kmp_task_t *task);
41360b57cec5SDimitry Andric extern void __kmp_fulfill_event(kmp_event_t *event);
41370b57cec5SDimitry Andric
41380b57cec5SDimitry Andric extern void __kmp_free_task_team(kmp_info_t *thread,
41390b57cec5SDimitry Andric kmp_task_team_t *task_team);
41400b57cec5SDimitry Andric extern void __kmp_reap_task_teams(void);
4141*0fca6ea1SDimitry Andric extern void __kmp_push_task_team_node(kmp_info_t *thread, kmp_team_t *team);
4142*0fca6ea1SDimitry Andric extern void __kmp_pop_task_team_node(kmp_info_t *thread, kmp_team_t *team);
41430b57cec5SDimitry Andric extern void __kmp_wait_to_unref_task_teams(void);
4144*0fca6ea1SDimitry Andric extern void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team);
41450b57cec5SDimitry Andric extern void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team);
41460b57cec5SDimitry Andric extern void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team
41470b57cec5SDimitry Andric #if USE_ITT_BUILD
41480b57cec5SDimitry Andric ,
41490b57cec5SDimitry Andric void *itt_sync_obj
41500b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */
41510b57cec5SDimitry Andric ,
41520b57cec5SDimitry Andric int wait = 1);
41530b57cec5SDimitry Andric extern void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
41540b57cec5SDimitry Andric int gtid);
4155*0fca6ea1SDimitry Andric #if KMP_DEBUG
4156*0fca6ea1SDimitry Andric #define KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, thr) \
4157*0fca6ea1SDimitry Andric KMP_DEBUG_ASSERT( \
4158*0fca6ea1SDimitry Andric __kmp_tasking_mode != tskm_task_teams || team->t.t_nproc == 1 || \
4159*0fca6ea1SDimitry Andric thr->th.th_task_team == team->t.t_task_team[thr->th.th_task_state])
4160*0fca6ea1SDimitry Andric #else
4161*0fca6ea1SDimitry Andric #define KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, thr) /* Nothing */
4162*0fca6ea1SDimitry Andric #endif
41630b57cec5SDimitry Andric
41640b57cec5SDimitry Andric extern int __kmp_is_address_mapped(void *addr);
41650b57cec5SDimitry Andric extern kmp_uint64 __kmp_hardware_timestamp(void);
41660b57cec5SDimitry Andric
41670b57cec5SDimitry Andric #if KMP_OS_UNIX
41680b57cec5SDimitry Andric extern int __kmp_read_from_file(char const *path, char const *format, ...);
41690b57cec5SDimitry Andric #endif
41700b57cec5SDimitry Andric
41710b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
41720b57cec5SDimitry Andric //
41730b57cec5SDimitry Andric // Assembly routines that have no compiler intrinsic replacement
41740b57cec5SDimitry Andric //
41750b57cec5SDimitry Andric
41760b57cec5SDimitry Andric extern int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc,
41770b57cec5SDimitry Andric void *argv[]
41780b57cec5SDimitry Andric #if OMPT_SUPPORT
41790b57cec5SDimitry Andric ,
41800b57cec5SDimitry Andric void **exit_frame_ptr
41810b57cec5SDimitry Andric #endif
41820b57cec5SDimitry Andric );
41830b57cec5SDimitry Andric
41840b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
41850b57cec5SDimitry Andric
41860b57cec5SDimitry Andric KMP_EXPORT void __kmpc_begin(ident_t *, kmp_int32 flags);
41870b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end(ident_t *);
41880b57cec5SDimitry Andric
41890b57cec5SDimitry Andric KMP_EXPORT void __kmpc_threadprivate_register_vec(ident_t *, void *data,
41900b57cec5SDimitry Andric kmpc_ctor_vec ctor,
41910b57cec5SDimitry Andric kmpc_cctor_vec cctor,
41920b57cec5SDimitry Andric kmpc_dtor_vec dtor,
41930b57cec5SDimitry Andric size_t vector_length);
41940b57cec5SDimitry Andric KMP_EXPORT void __kmpc_threadprivate_register(ident_t *, void *data,
41950b57cec5SDimitry Andric kmpc_ctor ctor, kmpc_cctor cctor,
41960b57cec5SDimitry Andric kmpc_dtor dtor);
41970b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_threadprivate(ident_t *, kmp_int32 global_tid,
41980b57cec5SDimitry Andric void *data, size_t size);
41990b57cec5SDimitry Andric
42000b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_global_thread_num(ident_t *);
42010b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_global_num_threads(ident_t *);
42020b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_bound_thread_num(ident_t *);
42030b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_bound_num_threads(ident_t *);
42040b57cec5SDimitry Andric
42050b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_ok_to_fork(ident_t *);
42060b57cec5SDimitry Andric KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs,
42070b57cec5SDimitry Andric kmpc_micro microtask, ...);
4208bdd1243dSDimitry Andric KMP_EXPORT void __kmpc_fork_call_if(ident_t *loc, kmp_int32 nargs,
4209bdd1243dSDimitry Andric kmpc_micro microtask, kmp_int32 cond,
4210bdd1243dSDimitry Andric void *args);
42110b57cec5SDimitry Andric
42120b57cec5SDimitry Andric KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid);
42130b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid);
42140b57cec5SDimitry Andric
42150b57cec5SDimitry Andric KMP_EXPORT void __kmpc_flush(ident_t *);
42160b57cec5SDimitry Andric KMP_EXPORT void __kmpc_barrier(ident_t *, kmp_int32 global_tid);
42170b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
42180b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
4219fe6060f1SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_masked(ident_t *, kmp_int32 global_tid,
4220fe6060f1SDimitry Andric kmp_int32 filter);
4221fe6060f1SDimitry Andric KMP_EXPORT void __kmpc_end_masked(ident_t *, kmp_int32 global_tid);
42220b57cec5SDimitry Andric KMP_EXPORT void __kmpc_ordered(ident_t *, kmp_int32 global_tid);
42230b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid);
42240b57cec5SDimitry Andric KMP_EXPORT void __kmpc_critical(ident_t *, kmp_int32 global_tid,
42250b57cec5SDimitry Andric kmp_critical_name *);
42260b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_critical(ident_t *, kmp_int32 global_tid,
42270b57cec5SDimitry Andric kmp_critical_name *);
42280b57cec5SDimitry Andric KMP_EXPORT void __kmpc_critical_with_hint(ident_t *, kmp_int32 global_tid,
42290b57cec5SDimitry Andric kmp_critical_name *, uint32_t hint);
42300b57cec5SDimitry Andric
42310b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_barrier_master(ident_t *, kmp_int32 global_tid);
42320b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid);
42330b57cec5SDimitry Andric
42340b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait(ident_t *,
42350b57cec5SDimitry Andric kmp_int32 global_tid);
42360b57cec5SDimitry Andric
42370b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
42380b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
42390b57cec5SDimitry Andric
4240753f127fSDimitry Andric KMP_EXPORT kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 global_tid);
4241753f127fSDimitry Andric KMP_EXPORT kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 global_tid,
4242753f127fSDimitry Andric kmp_int32 numberOfSections);
4243753f127fSDimitry Andric KMP_EXPORT void __kmpc_end_sections(ident_t *loc, kmp_int32 global_tid);
4244753f127fSDimitry Andric
42450b57cec5SDimitry Andric KMP_EXPORT void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid,
42460b57cec5SDimitry Andric kmp_int32 schedtype, kmp_int32 *plastiter,
42470b57cec5SDimitry Andric kmp_int *plower, kmp_int *pupper,
42480b57cec5SDimitry Andric kmp_int *pstride, kmp_int incr,
42490b57cec5SDimitry Andric kmp_int chunk);
42500b57cec5SDimitry Andric
42510b57cec5SDimitry Andric KMP_EXPORT void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
42520b57cec5SDimitry Andric
42530b57cec5SDimitry Andric KMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
42540b57cec5SDimitry Andric size_t cpy_size, void *cpy_data,
42550b57cec5SDimitry Andric void (*cpy_func)(void *, void *),
42560b57cec5SDimitry Andric kmp_int32 didit);
42570b57cec5SDimitry Andric
4258753f127fSDimitry Andric KMP_EXPORT void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid,
4259753f127fSDimitry Andric void *cpy_data);
4260753f127fSDimitry Andric
42610b57cec5SDimitry Andric extern void KMPC_SET_NUM_THREADS(int arg);
42620b57cec5SDimitry Andric extern void KMPC_SET_DYNAMIC(int flag);
42630b57cec5SDimitry Andric extern void KMPC_SET_NESTED(int flag);
42640b57cec5SDimitry Andric
42650b57cec5SDimitry Andric /* OMP 3.0 tasking interface routines */
42660b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
42670b57cec5SDimitry Andric kmp_task_t *new_task);
42680b57cec5SDimitry Andric KMP_EXPORT kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
42690b57cec5SDimitry Andric kmp_int32 flags,
42700b57cec5SDimitry Andric size_t sizeof_kmp_task_t,
42710b57cec5SDimitry Andric size_t sizeof_shareds,
42720b57cec5SDimitry Andric kmp_routine_entry_t task_entry);
4273fe6060f1SDimitry Andric KMP_EXPORT kmp_task_t *__kmpc_omp_target_task_alloc(
4274fe6060f1SDimitry Andric ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
4275fe6060f1SDimitry Andric size_t sizeof_shareds, kmp_routine_entry_t task_entry, kmp_int64 device_id);
42760b57cec5SDimitry Andric KMP_EXPORT void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
42770b57cec5SDimitry Andric kmp_task_t *task);
42780b57cec5SDimitry Andric KMP_EXPORT void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
42790b57cec5SDimitry Andric kmp_task_t *task);
42800b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
42810b57cec5SDimitry Andric kmp_task_t *new_task);
42820b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid);
42830b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid,
42840b57cec5SDimitry Andric int end_part);
42850b57cec5SDimitry Andric
42860b57cec5SDimitry Andric #if TASK_UNUSED
42870b57cec5SDimitry Andric void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task);
42880b57cec5SDimitry Andric void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
42890b57cec5SDimitry Andric kmp_task_t *task);
42900b57cec5SDimitry Andric #endif // TASK_UNUSED
42910b57cec5SDimitry Andric
42920b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */
42930b57cec5SDimitry Andric
42940b57cec5SDimitry Andric KMP_EXPORT void __kmpc_taskgroup(ident_t *loc, int gtid);
42950b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_taskgroup(ident_t *loc, int gtid);
42960b57cec5SDimitry Andric
42970b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps(
42980b57cec5SDimitry Andric ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps,
42990b57cec5SDimitry Andric kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
43000b57cec5SDimitry Andric kmp_depend_info_t *noalias_dep_list);
43015f757f3fSDimitry Andric
43025f757f3fSDimitry Andric KMP_EXPORT kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task);
43035f757f3fSDimitry Andric
43045f757f3fSDimitry Andric KMP_EXPORT kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task);
43055f757f3fSDimitry Andric
43060b57cec5SDimitry Andric KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid,
43070b57cec5SDimitry Andric kmp_int32 ndeps,
43080b57cec5SDimitry Andric kmp_depend_info_t *dep_list,
43090b57cec5SDimitry Andric kmp_int32 ndeps_noalias,
43100b57cec5SDimitry Andric kmp_depend_info_t *noalias_dep_list);
4311bdd1243dSDimitry Andric /* __kmpc_omp_taskwait_deps_51 : Function for OpenMP 5.1 nowait clause.
4312bdd1243dSDimitry Andric * Placeholder for taskwait with nowait clause.*/
4313bdd1243dSDimitry Andric KMP_EXPORT void __kmpc_omp_taskwait_deps_51(ident_t *loc_ref, kmp_int32 gtid,
4314bdd1243dSDimitry Andric kmp_int32 ndeps,
4315bdd1243dSDimitry Andric kmp_depend_info_t *dep_list,
4316bdd1243dSDimitry Andric kmp_int32 ndeps_noalias,
4317bdd1243dSDimitry Andric kmp_depend_info_t *noalias_dep_list,
4318bdd1243dSDimitry Andric kmp_int32 has_no_wait);
4319bdd1243dSDimitry Andric
43200b57cec5SDimitry Andric extern kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
43210b57cec5SDimitry Andric bool serialize_immediate);
43220b57cec5SDimitry Andric
43230b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid,
43240b57cec5SDimitry Andric kmp_int32 cncl_kind);
43250b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid,
43260b57cec5SDimitry Andric kmp_int32 cncl_kind);
43270b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t *loc_ref, kmp_int32 gtid);
43280b57cec5SDimitry Andric KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind);
43290b57cec5SDimitry Andric
43300b57cec5SDimitry Andric KMP_EXPORT void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask);
43310b57cec5SDimitry Andric KMP_EXPORT void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask);
43320b57cec5SDimitry Andric KMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task,
43330b57cec5SDimitry Andric kmp_int32 if_val, kmp_uint64 *lb,
43340b57cec5SDimitry Andric kmp_uint64 *ub, kmp_int64 st, kmp_int32 nogroup,
43350b57cec5SDimitry Andric kmp_int32 sched, kmp_uint64 grainsize,
43360b57cec5SDimitry Andric void *task_dup);
4337e8d8bef9SDimitry Andric KMP_EXPORT void __kmpc_taskloop_5(ident_t *loc, kmp_int32 gtid,
4338e8d8bef9SDimitry Andric kmp_task_t *task, kmp_int32 if_val,
4339e8d8bef9SDimitry Andric kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4340e8d8bef9SDimitry Andric kmp_int32 nogroup, kmp_int32 sched,
4341e8d8bef9SDimitry Andric kmp_uint64 grainsize, kmp_int32 modifier,
4342e8d8bef9SDimitry Andric void *task_dup);
43430b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_task_reduction_init(int gtid, int num_data, void *data);
43440b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_taskred_init(int gtid, int num_data, void *data);
43450b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d);
43460b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_task_reduction_modifier_init(ident_t *loc, int gtid,
43470b57cec5SDimitry Andric int is_ws, int num,
43480b57cec5SDimitry Andric void *data);
43490b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws,
43500b57cec5SDimitry Andric int num, void *data);
43510b57cec5SDimitry Andric KMP_EXPORT void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
43520b57cec5SDimitry Andric int is_ws);
43530b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_reg_task_with_affinity(
43540b57cec5SDimitry Andric ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins,
43550b57cec5SDimitry Andric kmp_task_affinity_info_t *affin_list);
4356fe6060f1SDimitry Andric KMP_EXPORT void __kmp_set_num_teams(int num_teams);
4357fe6060f1SDimitry Andric KMP_EXPORT int __kmp_get_max_teams(void);
4358fe6060f1SDimitry Andric KMP_EXPORT void __kmp_set_teams_thread_limit(int limit);
4359fe6060f1SDimitry Andric KMP_EXPORT int __kmp_get_teams_thread_limit(void);
43600b57cec5SDimitry Andric
4361bdd1243dSDimitry Andric /* Interface target task integration */
4362bdd1243dSDimitry Andric KMP_EXPORT void **__kmpc_omp_get_target_async_handle_ptr(kmp_int32 gtid);
4363bdd1243dSDimitry Andric KMP_EXPORT bool __kmpc_omp_has_task_team(kmp_int32 gtid);
4364bdd1243dSDimitry Andric
43650b57cec5SDimitry Andric /* Lock interface routines (fast versions with gtid passed in) */
43660b57cec5SDimitry Andric KMP_EXPORT void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid,
43670b57cec5SDimitry Andric void **user_lock);
43680b57cec5SDimitry Andric KMP_EXPORT void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid,
43690b57cec5SDimitry Andric void **user_lock);
43700b57cec5SDimitry Andric KMP_EXPORT void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid,
43710b57cec5SDimitry Andric void **user_lock);
43720b57cec5SDimitry Andric KMP_EXPORT void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid,
43730b57cec5SDimitry Andric void **user_lock);
43740b57cec5SDimitry Andric KMP_EXPORT void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);
43750b57cec5SDimitry Andric KMP_EXPORT void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid,
43760b57cec5SDimitry Andric void **user_lock);
43770b57cec5SDimitry Andric KMP_EXPORT void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid,
43780b57cec5SDimitry Andric void **user_lock);
43790b57cec5SDimitry Andric KMP_EXPORT void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid,
43800b57cec5SDimitry Andric void **user_lock);
43810b57cec5SDimitry Andric KMP_EXPORT int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);
43820b57cec5SDimitry Andric KMP_EXPORT int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid,
43830b57cec5SDimitry Andric void **user_lock);
43840b57cec5SDimitry Andric
43850b57cec5SDimitry Andric KMP_EXPORT void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid,
43860b57cec5SDimitry Andric void **user_lock, uintptr_t hint);
43870b57cec5SDimitry Andric KMP_EXPORT void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
43880b57cec5SDimitry Andric void **user_lock,
43890b57cec5SDimitry Andric uintptr_t hint);
43900b57cec5SDimitry Andric
439106c3fb27SDimitry Andric #if OMPX_TASKGRAPH
439206c3fb27SDimitry Andric // Taskgraph's Record & Replay mechanism
439306c3fb27SDimitry Andric // __kmp_tdg_is_recording: check whether a given TDG is recording
439406c3fb27SDimitry Andric // status: the tdg's current status
__kmp_tdg_is_recording(kmp_tdg_status_t status)439506c3fb27SDimitry Andric static inline bool __kmp_tdg_is_recording(kmp_tdg_status_t status) {
439606c3fb27SDimitry Andric return status == KMP_TDG_RECORDING;
439706c3fb27SDimitry Andric }
439806c3fb27SDimitry Andric
439906c3fb27SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_start_record_task(ident_t *loc, kmp_int32 gtid,
440006c3fb27SDimitry Andric kmp_int32 input_flags,
440106c3fb27SDimitry Andric kmp_int32 tdg_id);
440206c3fb27SDimitry Andric KMP_EXPORT void __kmpc_end_record_task(ident_t *loc, kmp_int32 gtid,
440306c3fb27SDimitry Andric kmp_int32 input_flags, kmp_int32 tdg_id);
440406c3fb27SDimitry Andric #endif
44050b57cec5SDimitry Andric /* Interface to fast scalable reduce methods routines */
44060b57cec5SDimitry Andric
44070b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_reduce_nowait(
44080b57cec5SDimitry Andric ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
44090b57cec5SDimitry Andric void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
44100b57cec5SDimitry Andric kmp_critical_name *lck);
44110b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
44120b57cec5SDimitry Andric kmp_critical_name *lck);
44130b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_reduce(
44140b57cec5SDimitry Andric ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
44150b57cec5SDimitry Andric void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
44160b57cec5SDimitry Andric kmp_critical_name *lck);
44170b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
44180b57cec5SDimitry Andric kmp_critical_name *lck);
44190b57cec5SDimitry Andric
44200b57cec5SDimitry Andric /* Internal fast reduction routines */
44210b57cec5SDimitry Andric
44220b57cec5SDimitry Andric extern PACKED_REDUCTION_METHOD_T __kmp_determine_reduction_method(
44230b57cec5SDimitry Andric ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
44240b57cec5SDimitry Andric void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
44250b57cec5SDimitry Andric kmp_critical_name *lck);
44260b57cec5SDimitry Andric
44270b57cec5SDimitry Andric // this function is for testing set/get/determine reduce method
44280b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmp_get_reduce_method(void);
44290b57cec5SDimitry Andric
44300b57cec5SDimitry Andric KMP_EXPORT kmp_uint64 __kmpc_get_taskid();
44310b57cec5SDimitry Andric KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid();
44320b57cec5SDimitry Andric
44330b57cec5SDimitry Andric // C++ port
44340b57cec5SDimitry Andric // missing 'extern "C"' declarations
44350b57cec5SDimitry Andric
44360b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_in_parallel(ident_t *loc);
44370b57cec5SDimitry Andric KMP_EXPORT void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid);
44380b57cec5SDimitry Andric KMP_EXPORT void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
44390b57cec5SDimitry Andric kmp_int32 num_threads);
4440*0fca6ea1SDimitry Andric KMP_EXPORT void __kmpc_push_num_threads_strict(ident_t *loc,
4441*0fca6ea1SDimitry Andric kmp_int32 global_tid,
4442*0fca6ea1SDimitry Andric kmp_int32 num_threads,
4443*0fca6ea1SDimitry Andric int severity,
4444*0fca6ea1SDimitry Andric const char *message);
4445*0fca6ea1SDimitry Andric
4446*0fca6ea1SDimitry Andric KMP_EXPORT void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid,
4447*0fca6ea1SDimitry Andric kmp_uint32 list_length,
4448*0fca6ea1SDimitry Andric kmp_int32 *num_threads_list);
4449*0fca6ea1SDimitry Andric KMP_EXPORT void __kmpc_push_num_threads_list_strict(
4450*0fca6ea1SDimitry Andric ident_t *loc, kmp_int32 global_tid, kmp_uint32 list_length,
4451*0fca6ea1SDimitry Andric kmp_int32 *num_threads_list, int severity, const char *message);
44520b57cec5SDimitry Andric
44530b57cec5SDimitry Andric KMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
44540b57cec5SDimitry Andric int proc_bind);
44550b57cec5SDimitry Andric KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
44560b57cec5SDimitry Andric kmp_int32 num_teams,
44570b57cec5SDimitry Andric kmp_int32 num_threads);
44585f757f3fSDimitry Andric KMP_EXPORT void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid,
44595f757f3fSDimitry Andric kmp_int32 thread_limit);
4460fe6060f1SDimitry Andric /* Function for OpenMP 5.1 num_teams clause */
4461fe6060f1SDimitry Andric KMP_EXPORT void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
4462fe6060f1SDimitry Andric kmp_int32 num_teams_lb,
4463fe6060f1SDimitry Andric kmp_int32 num_teams_ub,
4464fe6060f1SDimitry Andric kmp_int32 num_threads);
44650b57cec5SDimitry Andric KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,
44660b57cec5SDimitry Andric kmpc_micro microtask, ...);
44670b57cec5SDimitry Andric struct kmp_dim { // loop bounds info casted to kmp_int64
44680b57cec5SDimitry Andric kmp_int64 lo; // lower
44690b57cec5SDimitry Andric kmp_int64 up; // upper
44700b57cec5SDimitry Andric kmp_int64 st; // stride
44710b57cec5SDimitry Andric };
44720b57cec5SDimitry Andric KMP_EXPORT void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
44730b57cec5SDimitry Andric kmp_int32 num_dims,
44740b57cec5SDimitry Andric const struct kmp_dim *dims);
44750b57cec5SDimitry Andric KMP_EXPORT void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid,
44760b57cec5SDimitry Andric const kmp_int64 *vec);
44770b57cec5SDimitry Andric KMP_EXPORT void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid,
44780b57cec5SDimitry Andric const kmp_int64 *vec);
44790b57cec5SDimitry Andric KMP_EXPORT void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
44800b57cec5SDimitry Andric
44810b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid,
44820b57cec5SDimitry Andric void *data, size_t size,
44830b57cec5SDimitry Andric void ***cache);
44840b57cec5SDimitry Andric
44850b57cec5SDimitry Andric // The routines below are not exported.
44860b57cec5SDimitry Andric // Consider making them 'static' in corresponding source files.
44870b57cec5SDimitry Andric void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
44880b57cec5SDimitry Andric void *data_addr, size_t pc_size);
44890b57cec5SDimitry Andric struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
44900b57cec5SDimitry Andric void *data_addr,
44910b57cec5SDimitry Andric size_t pc_size);
44920b57cec5SDimitry Andric void __kmp_threadprivate_resize_cache(int newCapacity);
44930b57cec5SDimitry Andric void __kmp_cleanup_threadprivate_caches();
44940b57cec5SDimitry Andric
44950b57cec5SDimitry Andric // ompc_, kmpc_ entries moved from omp.h.
44960b57cec5SDimitry Andric #if KMP_OS_WINDOWS
44970b57cec5SDimitry Andric #define KMPC_CONVENTION __cdecl
44980b57cec5SDimitry Andric #else
44990b57cec5SDimitry Andric #define KMPC_CONVENTION
45000b57cec5SDimitry Andric #endif
45010b57cec5SDimitry Andric
45020b57cec5SDimitry Andric #ifndef __OMP_H
45030b57cec5SDimitry Andric typedef enum omp_sched_t {
45040b57cec5SDimitry Andric omp_sched_static = 1,
45050b57cec5SDimitry Andric omp_sched_dynamic = 2,
45060b57cec5SDimitry Andric omp_sched_guided = 3,
45070b57cec5SDimitry Andric omp_sched_auto = 4
45080b57cec5SDimitry Andric } omp_sched_t;
45090b57cec5SDimitry Andric typedef void *kmp_affinity_mask_t;
45100b57cec5SDimitry Andric #endif
45110b57cec5SDimitry Andric
45120b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int);
45130b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int);
45140b57cec5SDimitry Andric KMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int);
45150b57cec5SDimitry Andric KMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int);
45160b57cec5SDimitry Andric KMP_EXPORT int KMPC_CONVENTION
45170b57cec5SDimitry Andric kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *);
45180b57cec5SDimitry Andric KMP_EXPORT int KMPC_CONVENTION
45190b57cec5SDimitry Andric kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *);
45200b57cec5SDimitry Andric KMP_EXPORT int KMPC_CONVENTION
45210b57cec5SDimitry Andric kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *);
45220b57cec5SDimitry Andric
45230b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int);
45240b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t);
45250b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int);
45260b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *);
45270b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION kmpc_set_disp_num_buffers(int);
4528fe6060f1SDimitry Andric void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format);
4529fe6060f1SDimitry Andric size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size);
4530fe6060f1SDimitry Andric void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format);
4531fe6060f1SDimitry Andric size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
4532fe6060f1SDimitry Andric char const *format);
45330b57cec5SDimitry Andric
45340b57cec5SDimitry Andric enum kmp_target_offload_kind {
45350b57cec5SDimitry Andric tgt_disabled = 0,
45360b57cec5SDimitry Andric tgt_default = 1,
45370b57cec5SDimitry Andric tgt_mandatory = 2
45380b57cec5SDimitry Andric };
45390b57cec5SDimitry Andric typedef enum kmp_target_offload_kind kmp_target_offload_kind_t;
45400b57cec5SDimitry Andric // Set via OMP_TARGET_OFFLOAD if specified, defaults to tgt_default otherwise
45410b57cec5SDimitry Andric extern kmp_target_offload_kind_t __kmp_target_offload;
45420b57cec5SDimitry Andric extern int __kmpc_get_target_offload();
45430b57cec5SDimitry Andric
45440b57cec5SDimitry Andric // Constants used in libomptarget
45450b57cec5SDimitry Andric #define KMP_DEVICE_DEFAULT -1 // This is libomptarget's default device.
45460b57cec5SDimitry Andric #define KMP_DEVICE_ALL -11 // This is libomptarget's "all devices".
45470b57cec5SDimitry Andric
45480b57cec5SDimitry Andric // OMP Pause Resource
45490b57cec5SDimitry Andric
45500b57cec5SDimitry Andric // The following enum is used both to set the status in __kmp_pause_status, and
45510b57cec5SDimitry Andric // as the internal equivalent of the externally-visible omp_pause_resource_t.
45520b57cec5SDimitry Andric typedef enum kmp_pause_status_t {
45530b57cec5SDimitry Andric kmp_not_paused = 0, // status is not paused, or, requesting resume
45540b57cec5SDimitry Andric kmp_soft_paused = 1, // status is soft-paused, or, requesting soft pause
45550b57cec5SDimitry Andric kmp_hard_paused = 2 // status is hard-paused, or, requesting hard pause
45560b57cec5SDimitry Andric } kmp_pause_status_t;
45570b57cec5SDimitry Andric
45580b57cec5SDimitry Andric // This stores the pause state of the runtime
45590b57cec5SDimitry Andric extern kmp_pause_status_t __kmp_pause_status;
45600b57cec5SDimitry Andric extern int __kmpc_pause_resource(kmp_pause_status_t level);
45610b57cec5SDimitry Andric extern int __kmp_pause_resource(kmp_pause_status_t level);
45620b57cec5SDimitry Andric // Soft resume sets __kmp_pause_status, and wakes up all threads.
45630b57cec5SDimitry Andric extern void __kmp_resume_if_soft_paused();
45640b57cec5SDimitry Andric // Hard resume simply resets the status to not paused. Library will appear to
45650b57cec5SDimitry Andric // be uninitialized after hard pause. Let OMP constructs trigger required
45660b57cec5SDimitry Andric // initializations.
__kmp_resume_if_hard_paused()45670b57cec5SDimitry Andric static inline void __kmp_resume_if_hard_paused() {
45680b57cec5SDimitry Andric if (__kmp_pause_status == kmp_hard_paused) {
45690b57cec5SDimitry Andric __kmp_pause_status = kmp_not_paused;
45700b57cec5SDimitry Andric }
45710b57cec5SDimitry Andric }
45720b57cec5SDimitry Andric
45735ffd83dbSDimitry Andric extern void __kmp_omp_display_env(int verbose);
45745ffd83dbSDimitry Andric
4575e8d8bef9SDimitry Andric // 1: it is initializing hidden helper team
4576e8d8bef9SDimitry Andric extern volatile int __kmp_init_hidden_helper;
4577e8d8bef9SDimitry Andric // 1: the hidden helper team is done
4578e8d8bef9SDimitry Andric extern volatile int __kmp_hidden_helper_team_done;
4579e8d8bef9SDimitry Andric // 1: enable hidden helper task
4580e8d8bef9SDimitry Andric extern kmp_int32 __kmp_enable_hidden_helper;
4581e8d8bef9SDimitry Andric // Main thread of hidden helper team
4582e8d8bef9SDimitry Andric extern kmp_info_t *__kmp_hidden_helper_main_thread;
4583e8d8bef9SDimitry Andric // Descriptors for the hidden helper threads
4584e8d8bef9SDimitry Andric extern kmp_info_t **__kmp_hidden_helper_threads;
4585e8d8bef9SDimitry Andric // Number of hidden helper threads
4586e8d8bef9SDimitry Andric extern kmp_int32 __kmp_hidden_helper_threads_num;
4587e8d8bef9SDimitry Andric // Number of hidden helper tasks that have not been executed yet
4588e8d8bef9SDimitry Andric extern std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
4589e8d8bef9SDimitry Andric
4590e8d8bef9SDimitry Andric extern void __kmp_hidden_helper_initialize();
4591e8d8bef9SDimitry Andric extern void __kmp_hidden_helper_threads_initz_routine();
4592e8d8bef9SDimitry Andric extern void __kmp_do_initialize_hidden_helper_threads();
4593e8d8bef9SDimitry Andric extern void __kmp_hidden_helper_threads_initz_wait();
4594e8d8bef9SDimitry Andric extern void __kmp_hidden_helper_initz_release();
4595e8d8bef9SDimitry Andric extern void __kmp_hidden_helper_threads_deinitz_wait();
4596e8d8bef9SDimitry Andric extern void __kmp_hidden_helper_threads_deinitz_release();
4597e8d8bef9SDimitry Andric extern void __kmp_hidden_helper_main_thread_wait();
4598e8d8bef9SDimitry Andric extern void __kmp_hidden_helper_worker_thread_wait();
4599e8d8bef9SDimitry Andric extern void __kmp_hidden_helper_worker_thread_signal();
4600e8d8bef9SDimitry Andric extern void __kmp_hidden_helper_main_thread_release();
4601e8d8bef9SDimitry Andric
4602e8d8bef9SDimitry Andric // Check whether a given thread is a hidden helper thread
4603e8d8bef9SDimitry Andric #define KMP_HIDDEN_HELPER_THREAD(gtid) \
4604e8d8bef9SDimitry Andric ((gtid) >= 1 && (gtid) <= __kmp_hidden_helper_threads_num)
4605e8d8bef9SDimitry Andric
4606e8d8bef9SDimitry Andric #define KMP_HIDDEN_HELPER_WORKER_THREAD(gtid) \
4607e8d8bef9SDimitry Andric ((gtid) > 1 && (gtid) <= __kmp_hidden_helper_threads_num)
4608e8d8bef9SDimitry Andric
4609bdd1243dSDimitry Andric #define KMP_HIDDEN_HELPER_MAIN_THREAD(gtid) \
4610bdd1243dSDimitry Andric ((gtid) == 1 && (gtid) <= __kmp_hidden_helper_threads_num)
4611bdd1243dSDimitry Andric
4612fe6060f1SDimitry Andric #define KMP_HIDDEN_HELPER_TEAM(team) \
4613fe6060f1SDimitry Andric (team->t.t_threads[0] == __kmp_hidden_helper_main_thread)
4614fe6060f1SDimitry Andric
4615e8d8bef9SDimitry Andric // Map a gtid to a hidden helper thread. The first hidden helper thread, a.k.a
4616e8d8bef9SDimitry Andric // main thread, is skipped.
4617e8d8bef9SDimitry Andric #define KMP_GTID_TO_SHADOW_GTID(gtid) \
4618e8d8bef9SDimitry Andric ((gtid) % (__kmp_hidden_helper_threads_num - 1) + 2)
4619e8d8bef9SDimitry Andric
4620fe6060f1SDimitry Andric // Return the adjusted gtid value by subtracting from gtid the number
4621fe6060f1SDimitry Andric // of hidden helper threads. This adjusted value is the gtid the thread would
4622fe6060f1SDimitry Andric // have received if there were no hidden helper threads.
__kmp_adjust_gtid_for_hidden_helpers(int gtid)4623fe6060f1SDimitry Andric static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) {
4624fe6060f1SDimitry Andric int adjusted_gtid = gtid;
4625fe6060f1SDimitry Andric if (__kmp_hidden_helper_threads_num > 0 && gtid > 0 &&
4626fe6060f1SDimitry Andric gtid - __kmp_hidden_helper_threads_num >= 0) {
4627fe6060f1SDimitry Andric adjusted_gtid -= __kmp_hidden_helper_threads_num;
4628fe6060f1SDimitry Andric }
4629fe6060f1SDimitry Andric return adjusted_gtid;
4630fe6060f1SDimitry Andric }
4631fe6060f1SDimitry Andric
4632fe6060f1SDimitry Andric // Support for error directive
4633fe6060f1SDimitry Andric typedef enum kmp_severity_t {
4634fe6060f1SDimitry Andric severity_warning = 1,
4635fe6060f1SDimitry Andric severity_fatal = 2
4636fe6060f1SDimitry Andric } kmp_severity_t;
4637fe6060f1SDimitry Andric extern void __kmpc_error(ident_t *loc, int severity, const char *message);
4638fe6060f1SDimitry Andric
4639349cc55cSDimitry Andric // Support for scope directive
4640349cc55cSDimitry Andric KMP_EXPORT void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved);
4641349cc55cSDimitry Andric KMP_EXPORT void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved);
4642349cc55cSDimitry Andric
46430b57cec5SDimitry Andric #ifdef __cplusplus
46440b57cec5SDimitry Andric }
46450b57cec5SDimitry Andric #endif
46460b57cec5SDimitry Andric
4647e8d8bef9SDimitry Andric template <bool C, bool S>
4648e8d8bef9SDimitry Andric extern void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag);
4649e8d8bef9SDimitry Andric template <bool C, bool S>
4650e8d8bef9SDimitry Andric extern void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag);
4651349cc55cSDimitry Andric template <bool C, bool S>
4652349cc55cSDimitry Andric extern void __kmp_atomic_suspend_64(int th_gtid,
4653349cc55cSDimitry Andric kmp_atomic_flag_64<C, S> *flag);
4654e8d8bef9SDimitry Andric extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag);
4655e8d8bef9SDimitry Andric #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
4656e8d8bef9SDimitry Andric template <bool C, bool S>
4657e8d8bef9SDimitry Andric extern void __kmp_mwait_32(int th_gtid, kmp_flag_32<C, S> *flag);
4658e8d8bef9SDimitry Andric template <bool C, bool S>
4659e8d8bef9SDimitry Andric extern void __kmp_mwait_64(int th_gtid, kmp_flag_64<C, S> *flag);
4660349cc55cSDimitry Andric template <bool C, bool S>
4661349cc55cSDimitry Andric extern void __kmp_atomic_mwait_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag);
4662e8d8bef9SDimitry Andric extern void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag);
4663e8d8bef9SDimitry Andric #endif
4664e8d8bef9SDimitry Andric template <bool C, bool S>
4665e8d8bef9SDimitry Andric extern void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag);
4666e8d8bef9SDimitry Andric template <bool C, bool S>
4667e8d8bef9SDimitry Andric extern void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag);
4668349cc55cSDimitry Andric template <bool C, bool S>
4669349cc55cSDimitry Andric extern void __kmp_atomic_resume_64(int target_gtid,
4670349cc55cSDimitry Andric kmp_atomic_flag_64<C, S> *flag);
4671e8d8bef9SDimitry Andric extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag);
4672e8d8bef9SDimitry Andric
4673e8d8bef9SDimitry Andric template <bool C, bool S>
4674e8d8bef9SDimitry Andric int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid,
4675e8d8bef9SDimitry Andric kmp_flag_32<C, S> *flag, int final_spin,
4676e8d8bef9SDimitry Andric int *thread_finished,
4677e8d8bef9SDimitry Andric #if USE_ITT_BUILD
4678e8d8bef9SDimitry Andric void *itt_sync_obj,
4679e8d8bef9SDimitry Andric #endif /* USE_ITT_BUILD */
4680e8d8bef9SDimitry Andric kmp_int32 is_constrained);
4681e8d8bef9SDimitry Andric template <bool C, bool S>
4682e8d8bef9SDimitry Andric int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
4683e8d8bef9SDimitry Andric kmp_flag_64<C, S> *flag, int final_spin,
4684e8d8bef9SDimitry Andric int *thread_finished,
4685e8d8bef9SDimitry Andric #if USE_ITT_BUILD
4686e8d8bef9SDimitry Andric void *itt_sync_obj,
4687e8d8bef9SDimitry Andric #endif /* USE_ITT_BUILD */
4688e8d8bef9SDimitry Andric kmp_int32 is_constrained);
4689349cc55cSDimitry Andric template <bool C, bool S>
4690349cc55cSDimitry Andric int __kmp_atomic_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
4691349cc55cSDimitry Andric kmp_atomic_flag_64<C, S> *flag,
4692349cc55cSDimitry Andric int final_spin, int *thread_finished,
4693349cc55cSDimitry Andric #if USE_ITT_BUILD
4694349cc55cSDimitry Andric void *itt_sync_obj,
4695349cc55cSDimitry Andric #endif /* USE_ITT_BUILD */
4696349cc55cSDimitry Andric kmp_int32 is_constrained);
4697e8d8bef9SDimitry Andric int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,
4698e8d8bef9SDimitry Andric kmp_flag_oncore *flag, int final_spin,
4699e8d8bef9SDimitry Andric int *thread_finished,
4700e8d8bef9SDimitry Andric #if USE_ITT_BUILD
4701e8d8bef9SDimitry Andric void *itt_sync_obj,
4702e8d8bef9SDimitry Andric #endif /* USE_ITT_BUILD */
4703e8d8bef9SDimitry Andric kmp_int32 is_constrained);
4704e8d8bef9SDimitry Andric
4705fe6060f1SDimitry Andric extern int __kmp_nesting_mode;
4706fe6060f1SDimitry Andric extern int __kmp_nesting_mode_nlevels;
4707fe6060f1SDimitry Andric extern int *__kmp_nesting_nth_level;
4708fe6060f1SDimitry Andric extern void __kmp_init_nesting_mode();
4709fe6060f1SDimitry Andric extern void __kmp_set_nesting_mode_threads();
4710fe6060f1SDimitry Andric
4711e8d8bef9SDimitry Andric /// This class safely opens and closes a C-style FILE* object using RAII
4712e8d8bef9SDimitry Andric /// semantics. There are also methods which allow using stdout or stderr as
4713e8d8bef9SDimitry Andric /// the underlying FILE* object. With the implicit conversion operator to
4714e8d8bef9SDimitry Andric /// FILE*, an object with this type can be used in any function which takes
4715e8d8bef9SDimitry Andric /// a FILE* object e.g., fprintf().
4716e8d8bef9SDimitry Andric /// No close method is needed at use sites.
4717e8d8bef9SDimitry Andric class kmp_safe_raii_file_t {
4718e8d8bef9SDimitry Andric FILE *f;
4719e8d8bef9SDimitry Andric
close()4720e8d8bef9SDimitry Andric void close() {
4721e8d8bef9SDimitry Andric if (f && f != stdout && f != stderr) {
4722e8d8bef9SDimitry Andric fclose(f);
4723e8d8bef9SDimitry Andric f = nullptr;
4724e8d8bef9SDimitry Andric }
4725e8d8bef9SDimitry Andric }
4726e8d8bef9SDimitry Andric
4727e8d8bef9SDimitry Andric public:
kmp_safe_raii_file_t()4728e8d8bef9SDimitry Andric kmp_safe_raii_file_t() : f(nullptr) {}
4729e8d8bef9SDimitry Andric kmp_safe_raii_file_t(const char *filename, const char *mode,
4730e8d8bef9SDimitry Andric const char *env_var = nullptr)
f(nullptr)4731e8d8bef9SDimitry Andric : f(nullptr) {
4732e8d8bef9SDimitry Andric open(filename, mode, env_var);
4733e8d8bef9SDimitry Andric }
~kmp_safe_raii_file_t()4734e8d8bef9SDimitry Andric ~kmp_safe_raii_file_t() { close(); }
4735e8d8bef9SDimitry Andric
4736e8d8bef9SDimitry Andric /// Open filename using mode. This is automatically closed in the destructor.
4737e8d8bef9SDimitry Andric /// The env_var parameter indicates the environment variable the filename
4738e8d8bef9SDimitry Andric /// came from if != nullptr.
4739e8d8bef9SDimitry Andric void open(const char *filename, const char *mode,
4740e8d8bef9SDimitry Andric const char *env_var = nullptr) {
4741e8d8bef9SDimitry Andric KMP_ASSERT(!f);
4742e8d8bef9SDimitry Andric f = fopen(filename, mode);
4743e8d8bef9SDimitry Andric if (!f) {
4744e8d8bef9SDimitry Andric int code = errno;
4745e8d8bef9SDimitry Andric if (env_var) {
4746e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code),
4747e8d8bef9SDimitry Andric KMP_HNT(CheckEnvVar, env_var, filename), __kmp_msg_null);
4748e8d8bef9SDimitry Andric } else {
4749e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code),
4750e8d8bef9SDimitry Andric __kmp_msg_null);
4751e8d8bef9SDimitry Andric }
4752e8d8bef9SDimitry Andric }
4753e8d8bef9SDimitry Andric }
4754349cc55cSDimitry Andric /// Instead of erroring out, return non-zero when
4755349cc55cSDimitry Andric /// unsuccessful fopen() for any reason
try_open(const char * filename,const char * mode)4756349cc55cSDimitry Andric int try_open(const char *filename, const char *mode) {
4757349cc55cSDimitry Andric KMP_ASSERT(!f);
4758349cc55cSDimitry Andric f = fopen(filename, mode);
4759349cc55cSDimitry Andric if (!f)
4760349cc55cSDimitry Andric return errno;
4761349cc55cSDimitry Andric return 0;
4762349cc55cSDimitry Andric }
4763e8d8bef9SDimitry Andric /// Set the FILE* object to stdout and output there
4764e8d8bef9SDimitry Andric /// No open call should happen before this call.
set_stdout()4765e8d8bef9SDimitry Andric void set_stdout() {
4766e8d8bef9SDimitry Andric KMP_ASSERT(!f);
4767e8d8bef9SDimitry Andric f = stdout;
4768e8d8bef9SDimitry Andric }
4769e8d8bef9SDimitry Andric /// Set the FILE* object to stderr and output there
4770e8d8bef9SDimitry Andric /// No open call should happen before this call.
set_stderr()4771e8d8bef9SDimitry Andric void set_stderr() {
4772e8d8bef9SDimitry Andric KMP_ASSERT(!f);
4773e8d8bef9SDimitry Andric f = stderr;
4774e8d8bef9SDimitry Andric }
4775e8d8bef9SDimitry Andric operator bool() { return bool(f); }
4776e8d8bef9SDimitry Andric operator FILE *() { return f; }
4777e8d8bef9SDimitry Andric };
4778e8d8bef9SDimitry Andric
4779e8d8bef9SDimitry Andric template <typename SourceType, typename TargetType,
4780e8d8bef9SDimitry Andric bool isSourceSmaller = (sizeof(SourceType) < sizeof(TargetType)),
4781e8d8bef9SDimitry Andric bool isSourceEqual = (sizeof(SourceType) == sizeof(TargetType)),
4782e8d8bef9SDimitry Andric bool isSourceSigned = std::is_signed<SourceType>::value,
4783e8d8bef9SDimitry Andric bool isTargetSigned = std::is_signed<TargetType>::value>
4784e8d8bef9SDimitry Andric struct kmp_convert {};
4785e8d8bef9SDimitry Andric
4786e8d8bef9SDimitry Andric // Both types are signed; Source smaller
4787e8d8bef9SDimitry Andric template <typename SourceType, typename TargetType>
4788e8d8bef9SDimitry Andric struct kmp_convert<SourceType, TargetType, true, false, true, true> {
4789e8d8bef9SDimitry Andric static TargetType to(SourceType src) { return (TargetType)src; }
4790e8d8bef9SDimitry Andric };
4791e8d8bef9SDimitry Andric // Source equal
4792e8d8bef9SDimitry Andric template <typename SourceType, typename TargetType>
4793e8d8bef9SDimitry Andric struct kmp_convert<SourceType, TargetType, false, true, true, true> {
4794e8d8bef9SDimitry Andric static TargetType to(SourceType src) { return src; }
4795e8d8bef9SDimitry Andric };
4796e8d8bef9SDimitry Andric // Source bigger
4797e8d8bef9SDimitry Andric template <typename SourceType, typename TargetType>
4798e8d8bef9SDimitry Andric struct kmp_convert<SourceType, TargetType, false, false, true, true> {
4799e8d8bef9SDimitry Andric static TargetType to(SourceType src) {
4800e8d8bef9SDimitry Andric KMP_ASSERT(src <= static_cast<SourceType>(
4801e8d8bef9SDimitry Andric (std::numeric_limits<TargetType>::max)()));
4802e8d8bef9SDimitry Andric KMP_ASSERT(src >= static_cast<SourceType>(
4803e8d8bef9SDimitry Andric (std::numeric_limits<TargetType>::min)()));
4804e8d8bef9SDimitry Andric return (TargetType)src;
4805e8d8bef9SDimitry Andric }
4806e8d8bef9SDimitry Andric };
4807e8d8bef9SDimitry Andric
4808e8d8bef9SDimitry Andric // Source signed, Target unsigned
4809e8d8bef9SDimitry Andric // Source smaller
4810e8d8bef9SDimitry Andric template <typename SourceType, typename TargetType>
4811e8d8bef9SDimitry Andric struct kmp_convert<SourceType, TargetType, true, false, true, false> {
4812e8d8bef9SDimitry Andric static TargetType to(SourceType src) {
4813e8d8bef9SDimitry Andric KMP_ASSERT(src >= 0);
4814e8d8bef9SDimitry Andric return (TargetType)src;
4815e8d8bef9SDimitry Andric }
4816e8d8bef9SDimitry Andric };
4817e8d8bef9SDimitry Andric // Source equal
4818e8d8bef9SDimitry Andric template <typename SourceType, typename TargetType>
4819e8d8bef9SDimitry Andric struct kmp_convert<SourceType, TargetType, false, true, true, false> {
4820e8d8bef9SDimitry Andric static TargetType to(SourceType src) {
4821e8d8bef9SDimitry Andric KMP_ASSERT(src >= 0);
4822e8d8bef9SDimitry Andric return (TargetType)src;
4823e8d8bef9SDimitry Andric }
4824e8d8bef9SDimitry Andric };
4825e8d8bef9SDimitry Andric // Source bigger
4826e8d8bef9SDimitry Andric template <typename SourceType, typename TargetType>
4827e8d8bef9SDimitry Andric struct kmp_convert<SourceType, TargetType, false, false, true, false> {
4828e8d8bef9SDimitry Andric static TargetType to(SourceType src) {
4829e8d8bef9SDimitry Andric KMP_ASSERT(src >= 0);
4830e8d8bef9SDimitry Andric KMP_ASSERT(src <= static_cast<SourceType>(
4831e8d8bef9SDimitry Andric (std::numeric_limits<TargetType>::max)()));
4832e8d8bef9SDimitry Andric return (TargetType)src;
4833e8d8bef9SDimitry Andric }
4834e8d8bef9SDimitry Andric };
4835e8d8bef9SDimitry Andric
4836e8d8bef9SDimitry Andric // Source unsigned, Target signed
4837e8d8bef9SDimitry Andric // Source smaller
4838e8d8bef9SDimitry Andric template <typename SourceType, typename TargetType>
4839e8d8bef9SDimitry Andric struct kmp_convert<SourceType, TargetType, true, false, false, true> {
4840e8d8bef9SDimitry Andric static TargetType to(SourceType src) { return (TargetType)src; }
4841e8d8bef9SDimitry Andric };
4842e8d8bef9SDimitry Andric // Source equal
4843e8d8bef9SDimitry Andric template <typename SourceType, typename TargetType>
4844e8d8bef9SDimitry Andric struct kmp_convert<SourceType, TargetType, false, true, false, true> {
4845e8d8bef9SDimitry Andric static TargetType to(SourceType src) {
4846e8d8bef9SDimitry Andric KMP_ASSERT(src <= static_cast<SourceType>(
4847e8d8bef9SDimitry Andric (std::numeric_limits<TargetType>::max)()));
4848e8d8bef9SDimitry Andric return (TargetType)src;
4849e8d8bef9SDimitry Andric }
4850e8d8bef9SDimitry Andric };
4851e8d8bef9SDimitry Andric // Source bigger
4852e8d8bef9SDimitry Andric template <typename SourceType, typename TargetType>
4853e8d8bef9SDimitry Andric struct kmp_convert<SourceType, TargetType, false, false, false, true> {
4854e8d8bef9SDimitry Andric static TargetType to(SourceType src) {
4855e8d8bef9SDimitry Andric KMP_ASSERT(src <= static_cast<SourceType>(
4856e8d8bef9SDimitry Andric (std::numeric_limits<TargetType>::max)()));
4857e8d8bef9SDimitry Andric return (TargetType)src;
4858e8d8bef9SDimitry Andric }
4859e8d8bef9SDimitry Andric };
4860e8d8bef9SDimitry Andric
4861e8d8bef9SDimitry Andric // Source unsigned, Target unsigned
4862e8d8bef9SDimitry Andric // Source smaller
4863e8d8bef9SDimitry Andric template <typename SourceType, typename TargetType>
4864e8d8bef9SDimitry Andric struct kmp_convert<SourceType, TargetType, true, false, false, false> {
4865e8d8bef9SDimitry Andric static TargetType to(SourceType src) { return (TargetType)src; }
4866e8d8bef9SDimitry Andric };
4867e8d8bef9SDimitry Andric // Source equal
4868e8d8bef9SDimitry Andric template <typename SourceType, typename TargetType>
4869e8d8bef9SDimitry Andric struct kmp_convert<SourceType, TargetType, false, true, false, false> {
4870e8d8bef9SDimitry Andric static TargetType to(SourceType src) { return src; }
4871e8d8bef9SDimitry Andric };
4872e8d8bef9SDimitry Andric // Source bigger
4873e8d8bef9SDimitry Andric template <typename SourceType, typename TargetType>
4874e8d8bef9SDimitry Andric struct kmp_convert<SourceType, TargetType, false, false, false, false> {
4875e8d8bef9SDimitry Andric static TargetType to(SourceType src) {
4876e8d8bef9SDimitry Andric KMP_ASSERT(src <= static_cast<SourceType>(
4877e8d8bef9SDimitry Andric (std::numeric_limits<TargetType>::max)()));
4878e8d8bef9SDimitry Andric return (TargetType)src;
4879e8d8bef9SDimitry Andric }
4880e8d8bef9SDimitry Andric };
4881e8d8bef9SDimitry Andric
4882e8d8bef9SDimitry Andric template <typename T1, typename T2>
4883e8d8bef9SDimitry Andric static inline void __kmp_type_convert(T1 src, T2 *dest) {
4884e8d8bef9SDimitry Andric *dest = kmp_convert<T1, T2>::to(src);
4885e8d8bef9SDimitry Andric }
4886e8d8bef9SDimitry Andric
48870b57cec5SDimitry Andric #endif /* KMP_H */
4888