1*0b57cec5SDimitry Andric /*! \file */ 2*0b57cec5SDimitry Andric /* 3*0b57cec5SDimitry Andric * kmp.h -- KPTS runtime header file. 4*0b57cec5SDimitry Andric */ 5*0b57cec5SDimitry Andric 6*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 7*0b57cec5SDimitry Andric // 8*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 9*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 10*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 11*0b57cec5SDimitry Andric // 12*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 13*0b57cec5SDimitry Andric 14*0b57cec5SDimitry Andric #ifndef KMP_H 15*0b57cec5SDimitry Andric #define KMP_H 16*0b57cec5SDimitry Andric 17*0b57cec5SDimitry Andric #include "kmp_config.h" 18*0b57cec5SDimitry Andric 19*0b57cec5SDimitry Andric /* #define BUILD_PARALLEL_ORDERED 1 */ 20*0b57cec5SDimitry Andric 21*0b57cec5SDimitry Andric /* This fix replaces gettimeofday with clock_gettime for better scalability on 22*0b57cec5SDimitry Andric the Altix. Requires user code to be linked with -lrt. */ 23*0b57cec5SDimitry Andric //#define FIX_SGI_CLOCK 24*0b57cec5SDimitry Andric 25*0b57cec5SDimitry Andric /* Defines for OpenMP 3.0 tasking and auto scheduling */ 26*0b57cec5SDimitry Andric 27*0b57cec5SDimitry Andric #ifndef KMP_STATIC_STEAL_ENABLED 28*0b57cec5SDimitry Andric #define KMP_STATIC_STEAL_ENABLED 1 29*0b57cec5SDimitry Andric #endif 30*0b57cec5SDimitry Andric 31*0b57cec5SDimitry Andric #define TASK_CURRENT_NOT_QUEUED 0 32*0b57cec5SDimitry Andric #define TASK_CURRENT_QUEUED 1 33*0b57cec5SDimitry Andric 34*0b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK 35*0b57cec5SDimitry Andric #define TASK_STACK_EMPTY 0 // entries when the stack is empty 36*0b57cec5SDimitry Andric #define TASK_STACK_BLOCK_BITS 5 // Used in TASK_STACK_SIZE and TASK_STACK_MASK 37*0b57cec5SDimitry Andric // Number of entries in each task stack array 38*0b57cec5SDimitry Andric #define TASK_STACK_BLOCK_SIZE (1 << TASK_STACK_BLOCK_BITS) 39*0b57cec5SDimitry Andric // Mask for determining index into stack block 40*0b57cec5SDimitry Andric #define TASK_STACK_INDEX_MASK (TASK_STACK_BLOCK_SIZE - 1) 41*0b57cec5SDimitry Andric #endif // BUILD_TIED_TASK_STACK 42*0b57cec5SDimitry Andric 43*0b57cec5SDimitry Andric #define TASK_NOT_PUSHED 1 44*0b57cec5SDimitry Andric #define TASK_SUCCESSFULLY_PUSHED 0 45*0b57cec5SDimitry Andric #define TASK_TIED 1 46*0b57cec5SDimitry Andric #define TASK_UNTIED 0 47*0b57cec5SDimitry Andric #define TASK_EXPLICIT 1 48*0b57cec5SDimitry Andric #define TASK_IMPLICIT 0 49*0b57cec5SDimitry Andric #define TASK_PROXY 1 50*0b57cec5SDimitry Andric #define TASK_FULL 0 51*0b57cec5SDimitry Andric #define TASK_DETACHABLE 1 52*0b57cec5SDimitry Andric #define TASK_UNDETACHABLE 0 53*0b57cec5SDimitry Andric 54*0b57cec5SDimitry Andric #define KMP_CANCEL_THREADS 55*0b57cec5SDimitry Andric #define KMP_THREAD_ATTR 56*0b57cec5SDimitry Andric 57*0b57cec5SDimitry Andric // Android does not have pthread_cancel. Undefine KMP_CANCEL_THREADS if being 58*0b57cec5SDimitry Andric // built on Android 59*0b57cec5SDimitry Andric #if defined(__ANDROID__) 60*0b57cec5SDimitry Andric #undef KMP_CANCEL_THREADS 61*0b57cec5SDimitry Andric #endif 62*0b57cec5SDimitry Andric 63*0b57cec5SDimitry Andric #include <signal.h> 64*0b57cec5SDimitry Andric #include <stdarg.h> 65*0b57cec5SDimitry Andric #include <stddef.h> 66*0b57cec5SDimitry Andric #include <stdio.h> 67*0b57cec5SDimitry Andric #include <stdlib.h> 68*0b57cec5SDimitry Andric #include <string.h> 69*0b57cec5SDimitry Andric /* include <ctype.h> don't use; problems with /MD on Windows* OS NT due to bad 70*0b57cec5SDimitry Andric Microsoft library. Some macros provided below to replace these functions */ 71*0b57cec5SDimitry Andric #ifndef __ABSOFT_WIN 72*0b57cec5SDimitry Andric #include <sys/types.h> 73*0b57cec5SDimitry Andric #endif 74*0b57cec5SDimitry Andric #include <limits.h> 75*0b57cec5SDimitry Andric #include <time.h> 76*0b57cec5SDimitry Andric 77*0b57cec5SDimitry Andric #include <errno.h> 78*0b57cec5SDimitry Andric 79*0b57cec5SDimitry Andric #include "kmp_os.h" 80*0b57cec5SDimitry Andric 81*0b57cec5SDimitry Andric #include "kmp_safe_c_api.h" 82*0b57cec5SDimitry Andric 83*0b57cec5SDimitry Andric #if KMP_STATS_ENABLED 84*0b57cec5SDimitry Andric class kmp_stats_list; 85*0b57cec5SDimitry Andric #endif 86*0b57cec5SDimitry Andric 87*0b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 88*0b57cec5SDimitry Andric // Only include hierarchical scheduling if affinity is supported 89*0b57cec5SDimitry Andric #undef KMP_USE_HIER_SCHED 90*0b57cec5SDimitry Andric #define KMP_USE_HIER_SCHED KMP_AFFINITY_SUPPORTED 91*0b57cec5SDimitry Andric #endif 92*0b57cec5SDimitry Andric 93*0b57cec5SDimitry Andric #if KMP_USE_HWLOC && KMP_AFFINITY_SUPPORTED 94*0b57cec5SDimitry Andric #include "hwloc.h" 95*0b57cec5SDimitry Andric #ifndef HWLOC_OBJ_NUMANODE 96*0b57cec5SDimitry Andric #define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE 97*0b57cec5SDimitry Andric #endif 98*0b57cec5SDimitry Andric #ifndef HWLOC_OBJ_PACKAGE 99*0b57cec5SDimitry Andric #define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET 100*0b57cec5SDimitry Andric #endif 101*0b57cec5SDimitry Andric #if HWLOC_API_VERSION >= 0x00020000 102*0b57cec5SDimitry Andric // hwloc 2.0 changed type of depth of object from unsigned to int 103*0b57cec5SDimitry Andric typedef int kmp_hwloc_depth_t; 104*0b57cec5SDimitry Andric #else 105*0b57cec5SDimitry Andric typedef unsigned int kmp_hwloc_depth_t; 106*0b57cec5SDimitry Andric #endif 107*0b57cec5SDimitry Andric #endif 108*0b57cec5SDimitry Andric 109*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 110*0b57cec5SDimitry Andric #include <xmmintrin.h> 111*0b57cec5SDimitry Andric #endif 112*0b57cec5SDimitry Andric 113*0b57cec5SDimitry Andric #include "kmp_debug.h" 114*0b57cec5SDimitry Andric #include "kmp_lock.h" 115*0b57cec5SDimitry Andric #include "kmp_version.h" 116*0b57cec5SDimitry Andric #if USE_DEBUGGER 117*0b57cec5SDimitry Andric #include "kmp_debugger.h" 118*0b57cec5SDimitry Andric #endif 119*0b57cec5SDimitry Andric #include "kmp_i18n.h" 120*0b57cec5SDimitry Andric 121*0b57cec5SDimitry Andric #define KMP_HANDLE_SIGNALS (KMP_OS_UNIX || KMP_OS_WINDOWS) 122*0b57cec5SDimitry Andric 123*0b57cec5SDimitry Andric #include "kmp_wrapper_malloc.h" 124*0b57cec5SDimitry Andric #if KMP_OS_UNIX 125*0b57cec5SDimitry Andric #include <unistd.h> 126*0b57cec5SDimitry Andric #if !defined NSIG && defined _NSIG 127*0b57cec5SDimitry Andric #define NSIG _NSIG 128*0b57cec5SDimitry Andric #endif 129*0b57cec5SDimitry Andric #endif 130*0b57cec5SDimitry Andric 131*0b57cec5SDimitry Andric #if KMP_OS_LINUX 132*0b57cec5SDimitry Andric #pragma weak clock_gettime 133*0b57cec5SDimitry Andric #endif 134*0b57cec5SDimitry Andric 135*0b57cec5SDimitry Andric #if OMPT_SUPPORT 136*0b57cec5SDimitry Andric #include "ompt-internal.h" 137*0b57cec5SDimitry Andric #endif 138*0b57cec5SDimitry Andric 139*0b57cec5SDimitry Andric // Affinity format function 140*0b57cec5SDimitry Andric #include "kmp_str.h" 141*0b57cec5SDimitry Andric 142*0b57cec5SDimitry Andric // 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64. 143*0b57cec5SDimitry Andric // 3 - fast allocation using sync, non-sync free lists of any size, non-self 144*0b57cec5SDimitry Andric // free lists of limited size. 145*0b57cec5SDimitry Andric #ifndef USE_FAST_MEMORY 146*0b57cec5SDimitry Andric #define USE_FAST_MEMORY 3 147*0b57cec5SDimitry Andric #endif 148*0b57cec5SDimitry Andric 149*0b57cec5SDimitry Andric #ifndef KMP_NESTED_HOT_TEAMS 150*0b57cec5SDimitry Andric #define KMP_NESTED_HOT_TEAMS 0 151*0b57cec5SDimitry Andric #define USE_NESTED_HOT_ARG(x) 152*0b57cec5SDimitry Andric #else 153*0b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 154*0b57cec5SDimitry Andric #define USE_NESTED_HOT_ARG(x) , x 155*0b57cec5SDimitry Andric #else 156*0b57cec5SDimitry Andric #define USE_NESTED_HOT_ARG(x) 157*0b57cec5SDimitry Andric #endif 158*0b57cec5SDimitry Andric #endif 159*0b57cec5SDimitry Andric 160*0b57cec5SDimitry Andric // Assume using BGET compare_exchange instruction instead of lock by default. 161*0b57cec5SDimitry Andric #ifndef USE_CMP_XCHG_FOR_BGET 162*0b57cec5SDimitry Andric #define USE_CMP_XCHG_FOR_BGET 1 163*0b57cec5SDimitry Andric #endif 164*0b57cec5SDimitry Andric 165*0b57cec5SDimitry Andric // Test to see if queuing lock is better than bootstrap lock for bget 166*0b57cec5SDimitry Andric // #ifndef USE_QUEUING_LOCK_FOR_BGET 167*0b57cec5SDimitry Andric // #define USE_QUEUING_LOCK_FOR_BGET 168*0b57cec5SDimitry Andric // #endif 169*0b57cec5SDimitry Andric 170*0b57cec5SDimitry Andric #define KMP_NSEC_PER_SEC 1000000000L 171*0b57cec5SDimitry Andric #define KMP_USEC_PER_SEC 1000000L 172*0b57cec5SDimitry Andric 173*0b57cec5SDimitry Andric /*! 174*0b57cec5SDimitry Andric @ingroup BASIC_TYPES 175*0b57cec5SDimitry Andric @{ 176*0b57cec5SDimitry Andric */ 177*0b57cec5SDimitry Andric 178*0b57cec5SDimitry Andric /*! 179*0b57cec5SDimitry Andric Values for bit flags used in the ident_t to describe the fields. 180*0b57cec5SDimitry Andric */ 181*0b57cec5SDimitry Andric enum { 182*0b57cec5SDimitry Andric /*! Use trampoline for internal microtasks */ 183*0b57cec5SDimitry Andric KMP_IDENT_IMB = 0x01, 184*0b57cec5SDimitry Andric /*! Use c-style ident structure */ 185*0b57cec5SDimitry Andric KMP_IDENT_KMPC = 0x02, 186*0b57cec5SDimitry Andric /* 0x04 is no longer used */ 187*0b57cec5SDimitry Andric /*! Entry point generated by auto-parallelization */ 188*0b57cec5SDimitry Andric KMP_IDENT_AUTOPAR = 0x08, 189*0b57cec5SDimitry Andric /*! Compiler generates atomic reduction option for kmpc_reduce* */ 190*0b57cec5SDimitry Andric KMP_IDENT_ATOMIC_REDUCE = 0x10, 191*0b57cec5SDimitry Andric /*! To mark a 'barrier' directive in user code */ 192*0b57cec5SDimitry Andric KMP_IDENT_BARRIER_EXPL = 0x20, 193*0b57cec5SDimitry Andric /*! To Mark implicit barriers. */ 194*0b57cec5SDimitry Andric KMP_IDENT_BARRIER_IMPL = 0x0040, 195*0b57cec5SDimitry Andric KMP_IDENT_BARRIER_IMPL_MASK = 0x01C0, 196*0b57cec5SDimitry Andric KMP_IDENT_BARRIER_IMPL_FOR = 0x0040, 197*0b57cec5SDimitry Andric KMP_IDENT_BARRIER_IMPL_SECTIONS = 0x00C0, 198*0b57cec5SDimitry Andric 199*0b57cec5SDimitry Andric KMP_IDENT_BARRIER_IMPL_SINGLE = 0x0140, 200*0b57cec5SDimitry Andric KMP_IDENT_BARRIER_IMPL_WORKSHARE = 0x01C0, 201*0b57cec5SDimitry Andric 202*0b57cec5SDimitry Andric /*! To mark a static loop in OMPT callbacks */ 203*0b57cec5SDimitry Andric KMP_IDENT_WORK_LOOP = 0x200, 204*0b57cec5SDimitry Andric /*! To mark a sections directive in OMPT callbacks */ 205*0b57cec5SDimitry Andric KMP_IDENT_WORK_SECTIONS = 0x400, 206*0b57cec5SDimitry Andric /*! To mark a distirbute construct in OMPT callbacks */ 207*0b57cec5SDimitry Andric KMP_IDENT_WORK_DISTRIBUTE = 0x800, 208*0b57cec5SDimitry Andric /*! Atomic hint; bottom four bits as omp_sync_hint_t. Top four reserved and 209*0b57cec5SDimitry Andric not currently used. If one day we need more bits, then we can use 210*0b57cec5SDimitry Andric an invalid combination of hints to mean that another, larger field 211*0b57cec5SDimitry Andric should be used in a different flag. */ 212*0b57cec5SDimitry Andric KMP_IDENT_ATOMIC_HINT_MASK = 0xFF0000, 213*0b57cec5SDimitry Andric KMP_IDENT_ATOMIC_HINT_UNCONTENDED = 0x010000, 214*0b57cec5SDimitry Andric KMP_IDENT_ATOMIC_HINT_CONTENDED = 0x020000, 215*0b57cec5SDimitry Andric KMP_IDENT_ATOMIC_HINT_NONSPECULATIVE = 0x040000, 216*0b57cec5SDimitry Andric KMP_IDENT_ATOMIC_HINT_SPECULATIVE = 0x080000, 217*0b57cec5SDimitry Andric }; 218*0b57cec5SDimitry Andric 219*0b57cec5SDimitry Andric /*! 220*0b57cec5SDimitry Andric * The ident structure that describes a source location. 221*0b57cec5SDimitry Andric */ 222*0b57cec5SDimitry Andric typedef struct ident { 223*0b57cec5SDimitry Andric kmp_int32 reserved_1; /**< might be used in Fortran; see above */ 224*0b57cec5SDimitry Andric kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC 225*0b57cec5SDimitry Andric identifies this union member */ 226*0b57cec5SDimitry Andric kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */ 227*0b57cec5SDimitry Andric #if USE_ITT_BUILD 228*0b57cec5SDimitry Andric /* but currently used for storing region-specific ITT */ 229*0b57cec5SDimitry Andric /* contextual information. */ 230*0b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 231*0b57cec5SDimitry Andric kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */ 232*0b57cec5SDimitry Andric char const *psource; /**< String describing the source location. 233*0b57cec5SDimitry Andric The string is composed of semi-colon separated fields 234*0b57cec5SDimitry Andric which describe the source file, the function and a pair 235*0b57cec5SDimitry Andric of line numbers that delimit the construct. */ 236*0b57cec5SDimitry Andric } ident_t; 237*0b57cec5SDimitry Andric /*! 238*0b57cec5SDimitry Andric @} 239*0b57cec5SDimitry Andric */ 240*0b57cec5SDimitry Andric 241*0b57cec5SDimitry Andric // Some forward declarations. 242*0b57cec5SDimitry Andric typedef union kmp_team kmp_team_t; 243*0b57cec5SDimitry Andric typedef struct kmp_taskdata kmp_taskdata_t; 244*0b57cec5SDimitry Andric typedef union kmp_task_team kmp_task_team_t; 245*0b57cec5SDimitry Andric typedef union kmp_team kmp_team_p; 246*0b57cec5SDimitry Andric typedef union kmp_info kmp_info_p; 247*0b57cec5SDimitry Andric typedef union kmp_root kmp_root_p; 248*0b57cec5SDimitry Andric 249*0b57cec5SDimitry Andric #ifdef __cplusplus 250*0b57cec5SDimitry Andric extern "C" { 251*0b57cec5SDimitry Andric #endif 252*0b57cec5SDimitry Andric 253*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 254*0b57cec5SDimitry Andric 255*0b57cec5SDimitry Andric /* Pack two 32-bit signed integers into a 64-bit signed integer */ 256*0b57cec5SDimitry Andric /* ToDo: Fix word ordering for big-endian machines. */ 257*0b57cec5SDimitry Andric #define KMP_PACK_64(HIGH_32, LOW_32) \ 258*0b57cec5SDimitry Andric ((kmp_int64)((((kmp_uint64)(HIGH_32)) << 32) | (kmp_uint64)(LOW_32))) 259*0b57cec5SDimitry Andric 260*0b57cec5SDimitry Andric // Generic string manipulation macros. Assume that _x is of type char * 261*0b57cec5SDimitry Andric #define SKIP_WS(_x) \ 262*0b57cec5SDimitry Andric { \ 263*0b57cec5SDimitry Andric while (*(_x) == ' ' || *(_x) == '\t') \ 264*0b57cec5SDimitry Andric (_x)++; \ 265*0b57cec5SDimitry Andric } 266*0b57cec5SDimitry Andric #define SKIP_DIGITS(_x) \ 267*0b57cec5SDimitry Andric { \ 268*0b57cec5SDimitry Andric while (*(_x) >= '0' && *(_x) <= '9') \ 269*0b57cec5SDimitry Andric (_x)++; \ 270*0b57cec5SDimitry Andric } 271*0b57cec5SDimitry Andric #define SKIP_TOKEN(_x) \ 272*0b57cec5SDimitry Andric { \ 273*0b57cec5SDimitry Andric while ((*(_x) >= '0' && *(_x) <= '9') || (*(_x) >= 'a' && *(_x) <= 'z') || \ 274*0b57cec5SDimitry Andric (*(_x) >= 'A' && *(_x) <= 'Z') || *(_x) == '_') \ 275*0b57cec5SDimitry Andric (_x)++; \ 276*0b57cec5SDimitry Andric } 277*0b57cec5SDimitry Andric #define SKIP_TO(_x, _c) \ 278*0b57cec5SDimitry Andric { \ 279*0b57cec5SDimitry Andric while (*(_x) != '\0' && *(_x) != (_c)) \ 280*0b57cec5SDimitry Andric (_x)++; \ 281*0b57cec5SDimitry Andric } 282*0b57cec5SDimitry Andric 283*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 284*0b57cec5SDimitry Andric 285*0b57cec5SDimitry Andric #define KMP_MAX(x, y) ((x) > (y) ? (x) : (y)) 286*0b57cec5SDimitry Andric #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) 287*0b57cec5SDimitry Andric 288*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 289*0b57cec5SDimitry Andric /* Enumeration types */ 290*0b57cec5SDimitry Andric 291*0b57cec5SDimitry Andric enum kmp_state_timer { 292*0b57cec5SDimitry Andric ts_stop, 293*0b57cec5SDimitry Andric ts_start, 294*0b57cec5SDimitry Andric ts_pause, 295*0b57cec5SDimitry Andric 296*0b57cec5SDimitry Andric ts_last_state 297*0b57cec5SDimitry Andric }; 298*0b57cec5SDimitry Andric 299*0b57cec5SDimitry Andric enum dynamic_mode { 300*0b57cec5SDimitry Andric dynamic_default, 301*0b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE 302*0b57cec5SDimitry Andric dynamic_load_balance, 303*0b57cec5SDimitry Andric #endif /* USE_LOAD_BALANCE */ 304*0b57cec5SDimitry Andric dynamic_random, 305*0b57cec5SDimitry Andric dynamic_thread_limit, 306*0b57cec5SDimitry Andric dynamic_max 307*0b57cec5SDimitry Andric }; 308*0b57cec5SDimitry Andric 309*0b57cec5SDimitry Andric /* external schedule constants, duplicate enum omp_sched in omp.h in order to 310*0b57cec5SDimitry Andric * not include it here */ 311*0b57cec5SDimitry Andric #ifndef KMP_SCHED_TYPE_DEFINED 312*0b57cec5SDimitry Andric #define KMP_SCHED_TYPE_DEFINED 313*0b57cec5SDimitry Andric typedef enum kmp_sched { 314*0b57cec5SDimitry Andric kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check 315*0b57cec5SDimitry Andric // Note: need to adjust __kmp_sch_map global array in case enum is changed 316*0b57cec5SDimitry Andric kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33) 317*0b57cec5SDimitry Andric kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35) 318*0b57cec5SDimitry Andric kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36) 319*0b57cec5SDimitry Andric kmp_sched_auto = 4, // mapped to kmp_sch_auto (38) 320*0b57cec5SDimitry Andric kmp_sched_upper_std = 5, // upper bound for standard schedules 321*0b57cec5SDimitry Andric kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules 322*0b57cec5SDimitry Andric kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39) 323*0b57cec5SDimitry Andric #if KMP_STATIC_STEAL_ENABLED 324*0b57cec5SDimitry Andric kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44) 325*0b57cec5SDimitry Andric #endif 326*0b57cec5SDimitry Andric kmp_sched_upper, 327*0b57cec5SDimitry Andric kmp_sched_default = kmp_sched_static, // default scheduling 328*0b57cec5SDimitry Andric kmp_sched_monotonic = 0x80000000 329*0b57cec5SDimitry Andric } kmp_sched_t; 330*0b57cec5SDimitry Andric #endif 331*0b57cec5SDimitry Andric 332*0b57cec5SDimitry Andric /*! 333*0b57cec5SDimitry Andric @ingroup WORK_SHARING 334*0b57cec5SDimitry Andric * Describes the loop schedule to be used for a parallel for loop. 335*0b57cec5SDimitry Andric */ 336*0b57cec5SDimitry Andric enum sched_type : kmp_int32 { 337*0b57cec5SDimitry Andric kmp_sch_lower = 32, /**< lower bound for unordered values */ 338*0b57cec5SDimitry Andric kmp_sch_static_chunked = 33, 339*0b57cec5SDimitry Andric kmp_sch_static = 34, /**< static unspecialized */ 340*0b57cec5SDimitry Andric kmp_sch_dynamic_chunked = 35, 341*0b57cec5SDimitry Andric kmp_sch_guided_chunked = 36, /**< guided unspecialized */ 342*0b57cec5SDimitry Andric kmp_sch_runtime = 37, 343*0b57cec5SDimitry Andric kmp_sch_auto = 38, /**< auto */ 344*0b57cec5SDimitry Andric kmp_sch_trapezoidal = 39, 345*0b57cec5SDimitry Andric 346*0b57cec5SDimitry Andric /* accessible only through KMP_SCHEDULE environment variable */ 347*0b57cec5SDimitry Andric kmp_sch_static_greedy = 40, 348*0b57cec5SDimitry Andric kmp_sch_static_balanced = 41, 349*0b57cec5SDimitry Andric /* accessible only through KMP_SCHEDULE environment variable */ 350*0b57cec5SDimitry Andric kmp_sch_guided_iterative_chunked = 42, 351*0b57cec5SDimitry Andric kmp_sch_guided_analytical_chunked = 43, 352*0b57cec5SDimitry Andric /* accessible only through KMP_SCHEDULE environment variable */ 353*0b57cec5SDimitry Andric kmp_sch_static_steal = 44, 354*0b57cec5SDimitry Andric 355*0b57cec5SDimitry Andric /* static with chunk adjustment (e.g., simd) */ 356*0b57cec5SDimitry Andric kmp_sch_static_balanced_chunked = 45, 357*0b57cec5SDimitry Andric kmp_sch_guided_simd = 46, /**< guided with chunk adjustment */ 358*0b57cec5SDimitry Andric kmp_sch_runtime_simd = 47, /**< runtime with chunk adjustment */ 359*0b57cec5SDimitry Andric 360*0b57cec5SDimitry Andric /* accessible only through KMP_SCHEDULE environment variable */ 361*0b57cec5SDimitry Andric kmp_sch_upper, /**< upper bound for unordered values */ 362*0b57cec5SDimitry Andric 363*0b57cec5SDimitry Andric kmp_ord_lower = 64, /**< lower bound for ordered values, must be power of 2 */ 364*0b57cec5SDimitry Andric kmp_ord_static_chunked = 65, 365*0b57cec5SDimitry Andric kmp_ord_static = 66, /**< ordered static unspecialized */ 366*0b57cec5SDimitry Andric kmp_ord_dynamic_chunked = 67, 367*0b57cec5SDimitry Andric kmp_ord_guided_chunked = 68, 368*0b57cec5SDimitry Andric kmp_ord_runtime = 69, 369*0b57cec5SDimitry Andric kmp_ord_auto = 70, /**< ordered auto */ 370*0b57cec5SDimitry Andric kmp_ord_trapezoidal = 71, 371*0b57cec5SDimitry Andric kmp_ord_upper, /**< upper bound for ordered values */ 372*0b57cec5SDimitry Andric 373*0b57cec5SDimitry Andric /* Schedules for Distribute construct */ 374*0b57cec5SDimitry Andric kmp_distribute_static_chunked = 91, /**< distribute static chunked */ 375*0b57cec5SDimitry Andric kmp_distribute_static = 92, /**< distribute static unspecialized */ 376*0b57cec5SDimitry Andric 377*0b57cec5SDimitry Andric /* For the "nomerge" versions, kmp_dispatch_next*() will always return a 378*0b57cec5SDimitry Andric single iteration/chunk, even if the loop is serialized. For the schedule 379*0b57cec5SDimitry Andric types listed above, the entire iteration vector is returned if the loop is 380*0b57cec5SDimitry Andric serialized. This doesn't work for gcc/gcomp sections. */ 381*0b57cec5SDimitry Andric kmp_nm_lower = 160, /**< lower bound for nomerge values */ 382*0b57cec5SDimitry Andric 383*0b57cec5SDimitry Andric kmp_nm_static_chunked = 384*0b57cec5SDimitry Andric (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower), 385*0b57cec5SDimitry Andric kmp_nm_static = 162, /**< static unspecialized */ 386*0b57cec5SDimitry Andric kmp_nm_dynamic_chunked = 163, 387*0b57cec5SDimitry Andric kmp_nm_guided_chunked = 164, /**< guided unspecialized */ 388*0b57cec5SDimitry Andric kmp_nm_runtime = 165, 389*0b57cec5SDimitry Andric kmp_nm_auto = 166, /**< auto */ 390*0b57cec5SDimitry Andric kmp_nm_trapezoidal = 167, 391*0b57cec5SDimitry Andric 392*0b57cec5SDimitry Andric /* accessible only through KMP_SCHEDULE environment variable */ 393*0b57cec5SDimitry Andric kmp_nm_static_greedy = 168, 394*0b57cec5SDimitry Andric kmp_nm_static_balanced = 169, 395*0b57cec5SDimitry Andric /* accessible only through KMP_SCHEDULE environment variable */ 396*0b57cec5SDimitry Andric kmp_nm_guided_iterative_chunked = 170, 397*0b57cec5SDimitry Andric kmp_nm_guided_analytical_chunked = 171, 398*0b57cec5SDimitry Andric kmp_nm_static_steal = 399*0b57cec5SDimitry Andric 172, /* accessible only through OMP_SCHEDULE environment variable */ 400*0b57cec5SDimitry Andric 401*0b57cec5SDimitry Andric kmp_nm_ord_static_chunked = 193, 402*0b57cec5SDimitry Andric kmp_nm_ord_static = 194, /**< ordered static unspecialized */ 403*0b57cec5SDimitry Andric kmp_nm_ord_dynamic_chunked = 195, 404*0b57cec5SDimitry Andric kmp_nm_ord_guided_chunked = 196, 405*0b57cec5SDimitry Andric kmp_nm_ord_runtime = 197, 406*0b57cec5SDimitry Andric kmp_nm_ord_auto = 198, /**< auto */ 407*0b57cec5SDimitry Andric kmp_nm_ord_trapezoidal = 199, 408*0b57cec5SDimitry Andric kmp_nm_upper, /**< upper bound for nomerge values */ 409*0b57cec5SDimitry Andric 410*0b57cec5SDimitry Andric /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. Since 411*0b57cec5SDimitry Andric we need to distinguish the three possible cases (no modifier, monotonic 412*0b57cec5SDimitry Andric modifier, nonmonotonic modifier), we need separate bits for each modifier. 413*0b57cec5SDimitry Andric The absence of monotonic does not imply nonmonotonic, especially since 4.5 414*0b57cec5SDimitry Andric says that the behaviour of the "no modifier" case is implementation defined 415*0b57cec5SDimitry Andric in 4.5, but will become "nonmonotonic" in 5.0. 416*0b57cec5SDimitry Andric 417*0b57cec5SDimitry Andric Since we're passing a full 32 bit value, we can use a couple of high bits 418*0b57cec5SDimitry Andric for these flags; out of paranoia we avoid the sign bit. 419*0b57cec5SDimitry Andric 420*0b57cec5SDimitry Andric These modifiers can be or-ed into non-static schedules by the compiler to 421*0b57cec5SDimitry Andric pass the additional information. They will be stripped early in the 422*0b57cec5SDimitry Andric processing in __kmp_dispatch_init when setting up schedules, so most of the 423*0b57cec5SDimitry Andric code won't ever see schedules with these bits set. */ 424*0b57cec5SDimitry Andric kmp_sch_modifier_monotonic = 425*0b57cec5SDimitry Andric (1 << 29), /**< Set if the monotonic schedule modifier was present */ 426*0b57cec5SDimitry Andric kmp_sch_modifier_nonmonotonic = 427*0b57cec5SDimitry Andric (1 << 30), /**< Set if the nonmonotonic schedule modifier was present */ 428*0b57cec5SDimitry Andric 429*0b57cec5SDimitry Andric #define SCHEDULE_WITHOUT_MODIFIERS(s) \ 430*0b57cec5SDimitry Andric (enum sched_type)( \ 431*0b57cec5SDimitry Andric (s) & ~(kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) 432*0b57cec5SDimitry Andric #define SCHEDULE_HAS_MONOTONIC(s) (((s)&kmp_sch_modifier_monotonic) != 0) 433*0b57cec5SDimitry Andric #define SCHEDULE_HAS_NONMONOTONIC(s) (((s)&kmp_sch_modifier_nonmonotonic) != 0) 434*0b57cec5SDimitry Andric #define SCHEDULE_HAS_NO_MODIFIERS(s) \ 435*0b57cec5SDimitry Andric (((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0) 436*0b57cec5SDimitry Andric #define SCHEDULE_GET_MODIFIERS(s) \ 437*0b57cec5SDimitry Andric ((enum sched_type)( \ 438*0b57cec5SDimitry Andric (s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic))) 439*0b57cec5SDimitry Andric #define SCHEDULE_SET_MODIFIERS(s, m) \ 440*0b57cec5SDimitry Andric (s = (enum sched_type)((kmp_int32)s | (kmp_int32)m)) 441*0b57cec5SDimitry Andric #define SCHEDULE_NONMONOTONIC 0 442*0b57cec5SDimitry Andric #define SCHEDULE_MONOTONIC 1 443*0b57cec5SDimitry Andric 444*0b57cec5SDimitry Andric kmp_sch_default = kmp_sch_static /**< default scheduling algorithm */ 445*0b57cec5SDimitry Andric }; 446*0b57cec5SDimitry Andric 447*0b57cec5SDimitry Andric // Apply modifiers on internal kind to standard kind 448*0b57cec5SDimitry Andric static inline void 449*0b57cec5SDimitry Andric __kmp_sched_apply_mods_stdkind(kmp_sched_t *kind, 450*0b57cec5SDimitry Andric enum sched_type internal_kind) { 451*0b57cec5SDimitry Andric if (SCHEDULE_HAS_MONOTONIC(internal_kind)) { 452*0b57cec5SDimitry Andric *kind = (kmp_sched_t)((int)*kind | (int)kmp_sched_monotonic); 453*0b57cec5SDimitry Andric } 454*0b57cec5SDimitry Andric } 455*0b57cec5SDimitry Andric 456*0b57cec5SDimitry Andric // Apply modifiers on standard kind to internal kind 457*0b57cec5SDimitry Andric static inline void 458*0b57cec5SDimitry Andric __kmp_sched_apply_mods_intkind(kmp_sched_t kind, 459*0b57cec5SDimitry Andric enum sched_type *internal_kind) { 460*0b57cec5SDimitry Andric if ((int)kind & (int)kmp_sched_monotonic) { 461*0b57cec5SDimitry Andric *internal_kind = (enum sched_type)((int)*internal_kind | 462*0b57cec5SDimitry Andric (int)kmp_sch_modifier_monotonic); 463*0b57cec5SDimitry Andric } 464*0b57cec5SDimitry Andric } 465*0b57cec5SDimitry Andric 466*0b57cec5SDimitry Andric // Get standard schedule without modifiers 467*0b57cec5SDimitry Andric static inline kmp_sched_t __kmp_sched_without_mods(kmp_sched_t kind) { 468*0b57cec5SDimitry Andric return (kmp_sched_t)((int)kind & ~((int)kmp_sched_monotonic)); 469*0b57cec5SDimitry Andric } 470*0b57cec5SDimitry Andric 471*0b57cec5SDimitry Andric /* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */ 472*0b57cec5SDimitry Andric typedef union kmp_r_sched { 473*0b57cec5SDimitry Andric struct { 474*0b57cec5SDimitry Andric enum sched_type r_sched_type; 475*0b57cec5SDimitry Andric int chunk; 476*0b57cec5SDimitry Andric }; 477*0b57cec5SDimitry Andric kmp_int64 sched; 478*0b57cec5SDimitry Andric } kmp_r_sched_t; 479*0b57cec5SDimitry Andric 480*0b57cec5SDimitry Andric extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our 481*0b57cec5SDimitry Andric // internal schedule types 482*0b57cec5SDimitry Andric 483*0b57cec5SDimitry Andric enum library_type { 484*0b57cec5SDimitry Andric library_none, 485*0b57cec5SDimitry Andric library_serial, 486*0b57cec5SDimitry Andric library_turnaround, 487*0b57cec5SDimitry Andric library_throughput 488*0b57cec5SDimitry Andric }; 489*0b57cec5SDimitry Andric 490*0b57cec5SDimitry Andric #if KMP_OS_LINUX 491*0b57cec5SDimitry Andric enum clock_function_type { 492*0b57cec5SDimitry Andric clock_function_gettimeofday, 493*0b57cec5SDimitry Andric clock_function_clock_gettime 494*0b57cec5SDimitry Andric }; 495*0b57cec5SDimitry Andric #endif /* KMP_OS_LINUX */ 496*0b57cec5SDimitry Andric 497*0b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 498*0b57cec5SDimitry Andric enum mic_type { non_mic, mic1, mic2, mic3, dummy }; 499*0b57cec5SDimitry Andric #endif 500*0b57cec5SDimitry Andric 501*0b57cec5SDimitry Andric /* -- fast reduction stuff ------------------------------------------------ */ 502*0b57cec5SDimitry Andric 503*0b57cec5SDimitry Andric #undef KMP_FAST_REDUCTION_BARRIER 504*0b57cec5SDimitry Andric #define KMP_FAST_REDUCTION_BARRIER 1 505*0b57cec5SDimitry Andric 506*0b57cec5SDimitry Andric #undef KMP_FAST_REDUCTION_CORE_DUO 507*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 508*0b57cec5SDimitry Andric #define KMP_FAST_REDUCTION_CORE_DUO 1 509*0b57cec5SDimitry Andric #endif 510*0b57cec5SDimitry Andric 511*0b57cec5SDimitry Andric enum _reduction_method { 512*0b57cec5SDimitry Andric reduction_method_not_defined = 0, 513*0b57cec5SDimitry Andric critical_reduce_block = (1 << 8), 514*0b57cec5SDimitry Andric atomic_reduce_block = (2 << 8), 515*0b57cec5SDimitry Andric tree_reduce_block = (3 << 8), 516*0b57cec5SDimitry Andric empty_reduce_block = (4 << 8) 517*0b57cec5SDimitry Andric }; 518*0b57cec5SDimitry Andric 519*0b57cec5SDimitry Andric // Description of the packed_reduction_method variable: 520*0b57cec5SDimitry Andric // The packed_reduction_method variable consists of two enum types variables 521*0b57cec5SDimitry Andric // that are packed together into 0-th byte and 1-st byte: 522*0b57cec5SDimitry Andric // 0: (packed_reduction_method & 0x000000FF) is a 'enum barrier_type' value of 523*0b57cec5SDimitry Andric // barrier that will be used in fast reduction: bs_plain_barrier or 524*0b57cec5SDimitry Andric // bs_reduction_barrier 525*0b57cec5SDimitry Andric // 1: (packed_reduction_method & 0x0000FF00) is a reduction method that will 526*0b57cec5SDimitry Andric // be used in fast reduction; 527*0b57cec5SDimitry Andric // Reduction method is of 'enum _reduction_method' type and it's defined the way 528*0b57cec5SDimitry Andric // so that the bits of 0-th byte are empty, so no need to execute a shift 529*0b57cec5SDimitry Andric // instruction while packing/unpacking 530*0b57cec5SDimitry Andric 531*0b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 532*0b57cec5SDimitry Andric #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type) \ 533*0b57cec5SDimitry Andric ((reduction_method) | (barrier_type)) 534*0b57cec5SDimitry Andric 535*0b57cec5SDimitry Andric #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \ 536*0b57cec5SDimitry Andric ((enum _reduction_method)((packed_reduction_method) & (0x0000FF00))) 537*0b57cec5SDimitry Andric 538*0b57cec5SDimitry Andric #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \ 539*0b57cec5SDimitry Andric ((enum barrier_type)((packed_reduction_method) & (0x000000FF))) 540*0b57cec5SDimitry Andric #else 541*0b57cec5SDimitry Andric #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type) \ 542*0b57cec5SDimitry Andric (reduction_method) 543*0b57cec5SDimitry Andric 544*0b57cec5SDimitry Andric #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \ 545*0b57cec5SDimitry Andric (packed_reduction_method) 546*0b57cec5SDimitry Andric 547*0b57cec5SDimitry Andric #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) (bs_plain_barrier) 548*0b57cec5SDimitry Andric #endif 549*0b57cec5SDimitry Andric 550*0b57cec5SDimitry Andric #define TEST_REDUCTION_METHOD(packed_reduction_method, which_reduction_block) \ 551*0b57cec5SDimitry Andric ((UNPACK_REDUCTION_METHOD(packed_reduction_method)) == \ 552*0b57cec5SDimitry Andric (which_reduction_block)) 553*0b57cec5SDimitry Andric 554*0b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 555*0b57cec5SDimitry Andric #define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER \ 556*0b57cec5SDimitry Andric (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_reduction_barrier)) 557*0b57cec5SDimitry Andric 558*0b57cec5SDimitry Andric #define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER \ 559*0b57cec5SDimitry Andric (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_plain_barrier)) 560*0b57cec5SDimitry Andric #endif 561*0b57cec5SDimitry Andric 562*0b57cec5SDimitry Andric typedef int PACKED_REDUCTION_METHOD_T; 563*0b57cec5SDimitry Andric 564*0b57cec5SDimitry Andric /* -- end of fast reduction stuff ----------------------------------------- */ 565*0b57cec5SDimitry Andric 566*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS 567*0b57cec5SDimitry Andric #define USE_CBLKDATA 568*0b57cec5SDimitry Andric #if KMP_MSVC_COMPAT 569*0b57cec5SDimitry Andric #pragma warning(push) 570*0b57cec5SDimitry Andric #pragma warning(disable : 271 310) 571*0b57cec5SDimitry Andric #endif 572*0b57cec5SDimitry Andric #include <windows.h> 573*0b57cec5SDimitry Andric #if KMP_MSVC_COMPAT 574*0b57cec5SDimitry Andric #pragma warning(pop) 575*0b57cec5SDimitry Andric #endif 576*0b57cec5SDimitry Andric #endif 577*0b57cec5SDimitry Andric 578*0b57cec5SDimitry Andric #if KMP_OS_UNIX 579*0b57cec5SDimitry Andric #include <dlfcn.h> 580*0b57cec5SDimitry Andric #include <pthread.h> 581*0b57cec5SDimitry Andric #endif 582*0b57cec5SDimitry Andric 583*0b57cec5SDimitry Andric /* Only Linux* OS and Windows* OS support thread affinity. */ 584*0b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 585*0b57cec5SDimitry Andric 586*0b57cec5SDimitry Andric // GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later). 587*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS 588*0b57cec5SDimitry Andric #if _MSC_VER < 1600 && KMP_MSVC_COMPAT 589*0b57cec5SDimitry Andric typedef struct GROUP_AFFINITY { 590*0b57cec5SDimitry Andric KAFFINITY Mask; 591*0b57cec5SDimitry Andric WORD Group; 592*0b57cec5SDimitry Andric WORD Reserved[3]; 593*0b57cec5SDimitry Andric } GROUP_AFFINITY; 594*0b57cec5SDimitry Andric #endif /* _MSC_VER < 1600 */ 595*0b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY 596*0b57cec5SDimitry Andric extern int __kmp_num_proc_groups; 597*0b57cec5SDimitry Andric #else 598*0b57cec5SDimitry Andric static const int __kmp_num_proc_groups = 1; 599*0b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */ 600*0b57cec5SDimitry Andric typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD); 601*0b57cec5SDimitry Andric extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount; 602*0b57cec5SDimitry Andric 603*0b57cec5SDimitry Andric typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void); 604*0b57cec5SDimitry Andric extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount; 605*0b57cec5SDimitry Andric 606*0b57cec5SDimitry Andric typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *); 607*0b57cec5SDimitry Andric extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity; 608*0b57cec5SDimitry Andric 609*0b57cec5SDimitry Andric typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, 610*0b57cec5SDimitry Andric GROUP_AFFINITY *); 611*0b57cec5SDimitry Andric extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity; 612*0b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 613*0b57cec5SDimitry Andric 614*0b57cec5SDimitry Andric #if KMP_USE_HWLOC 615*0b57cec5SDimitry Andric extern hwloc_topology_t __kmp_hwloc_topology; 616*0b57cec5SDimitry Andric extern int __kmp_hwloc_error; 617*0b57cec5SDimitry Andric extern int __kmp_numa_detected; 618*0b57cec5SDimitry Andric extern int __kmp_tile_depth; 619*0b57cec5SDimitry Andric #endif 620*0b57cec5SDimitry Andric 621*0b57cec5SDimitry Andric extern size_t __kmp_affin_mask_size; 622*0b57cec5SDimitry Andric #define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0) 623*0b57cec5SDimitry Andric #define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0) 624*0b57cec5SDimitry Andric #define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size) 625*0b57cec5SDimitry Andric #define KMP_CPU_SET_ITERATE(i, mask) \ 626*0b57cec5SDimitry Andric for (i = (mask)->begin(); (int)i != (mask)->end(); i = (mask)->next(i)) 627*0b57cec5SDimitry Andric #define KMP_CPU_SET(i, mask) (mask)->set(i) 628*0b57cec5SDimitry Andric #define KMP_CPU_ISSET(i, mask) (mask)->is_set(i) 629*0b57cec5SDimitry Andric #define KMP_CPU_CLR(i, mask) (mask)->clear(i) 630*0b57cec5SDimitry Andric #define KMP_CPU_ZERO(mask) (mask)->zero() 631*0b57cec5SDimitry Andric #define KMP_CPU_COPY(dest, src) (dest)->copy(src) 632*0b57cec5SDimitry Andric #define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src) 633*0b57cec5SDimitry Andric #define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not() 634*0b57cec5SDimitry Andric #define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src) 635*0b57cec5SDimitry Andric #define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask()) 636*0b57cec5SDimitry Andric #define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr) 637*0b57cec5SDimitry Andric #define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr) 638*0b57cec5SDimitry Andric #define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr) 639*0b57cec5SDimitry Andric #define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr) 640*0b57cec5SDimitry Andric #define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr) 641*0b57cec5SDimitry Andric #define KMP_CPU_INDEX(arr, i) __kmp_affinity_dispatch->index_mask_array(arr, i) 642*0b57cec5SDimitry Andric #define KMP_CPU_ALLOC_ARRAY(arr, n) \ 643*0b57cec5SDimitry Andric (arr = __kmp_affinity_dispatch->allocate_mask_array(n)) 644*0b57cec5SDimitry Andric #define KMP_CPU_FREE_ARRAY(arr, n) \ 645*0b57cec5SDimitry Andric __kmp_affinity_dispatch->deallocate_mask_array(arr) 646*0b57cec5SDimitry Andric #define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) KMP_CPU_ALLOC_ARRAY(arr, n) 647*0b57cec5SDimitry Andric #define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_CPU_FREE_ARRAY(arr, n) 648*0b57cec5SDimitry Andric #define __kmp_get_system_affinity(mask, abort_bool) \ 649*0b57cec5SDimitry Andric (mask)->get_system_affinity(abort_bool) 650*0b57cec5SDimitry Andric #define __kmp_set_system_affinity(mask, abort_bool) \ 651*0b57cec5SDimitry Andric (mask)->set_system_affinity(abort_bool) 652*0b57cec5SDimitry Andric #define __kmp_get_proc_group(mask) (mask)->get_proc_group() 653*0b57cec5SDimitry Andric 654*0b57cec5SDimitry Andric class KMPAffinity { 655*0b57cec5SDimitry Andric public: 656*0b57cec5SDimitry Andric class Mask { 657*0b57cec5SDimitry Andric public: 658*0b57cec5SDimitry Andric void *operator new(size_t n); 659*0b57cec5SDimitry Andric void operator delete(void *p); 660*0b57cec5SDimitry Andric void *operator new[](size_t n); 661*0b57cec5SDimitry Andric void operator delete[](void *p); 662*0b57cec5SDimitry Andric virtual ~Mask() {} 663*0b57cec5SDimitry Andric // Set bit i to 1 664*0b57cec5SDimitry Andric virtual void set(int i) {} 665*0b57cec5SDimitry Andric // Return bit i 666*0b57cec5SDimitry Andric virtual bool is_set(int i) const { return false; } 667*0b57cec5SDimitry Andric // Set bit i to 0 668*0b57cec5SDimitry Andric virtual void clear(int i) {} 669*0b57cec5SDimitry Andric // Zero out entire mask 670*0b57cec5SDimitry Andric virtual void zero() {} 671*0b57cec5SDimitry Andric // Copy src into this mask 672*0b57cec5SDimitry Andric virtual void copy(const Mask *src) {} 673*0b57cec5SDimitry Andric // this &= rhs 674*0b57cec5SDimitry Andric virtual void bitwise_and(const Mask *rhs) {} 675*0b57cec5SDimitry Andric // this |= rhs 676*0b57cec5SDimitry Andric virtual void bitwise_or(const Mask *rhs) {} 677*0b57cec5SDimitry Andric // this = ~this 678*0b57cec5SDimitry Andric virtual void bitwise_not() {} 679*0b57cec5SDimitry Andric // API for iterating over an affinity mask 680*0b57cec5SDimitry Andric // for (int i = mask->begin(); i != mask->end(); i = mask->next(i)) 681*0b57cec5SDimitry Andric virtual int begin() const { return 0; } 682*0b57cec5SDimitry Andric virtual int end() const { return 0; } 683*0b57cec5SDimitry Andric virtual int next(int previous) const { return 0; } 684*0b57cec5SDimitry Andric // Set the system's affinity to this affinity mask's value 685*0b57cec5SDimitry Andric virtual int set_system_affinity(bool abort_on_error) const { return -1; } 686*0b57cec5SDimitry Andric // Set this affinity mask to the current system affinity 687*0b57cec5SDimitry Andric virtual int get_system_affinity(bool abort_on_error) { return -1; } 688*0b57cec5SDimitry Andric // Only 1 DWORD in the mask should have any procs set. 689*0b57cec5SDimitry Andric // Return the appropriate index, or -1 for an invalid mask. 690*0b57cec5SDimitry Andric virtual int get_proc_group() const { return -1; } 691*0b57cec5SDimitry Andric }; 692*0b57cec5SDimitry Andric void *operator new(size_t n); 693*0b57cec5SDimitry Andric void operator delete(void *p); 694*0b57cec5SDimitry Andric // Need virtual destructor 695*0b57cec5SDimitry Andric virtual ~KMPAffinity() = default; 696*0b57cec5SDimitry Andric // Determine if affinity is capable 697*0b57cec5SDimitry Andric virtual void determine_capable(const char *env_var) {} 698*0b57cec5SDimitry Andric // Bind the current thread to os proc 699*0b57cec5SDimitry Andric virtual void bind_thread(int proc) {} 700*0b57cec5SDimitry Andric // Factory functions to allocate/deallocate a mask 701*0b57cec5SDimitry Andric virtual Mask *allocate_mask() { return nullptr; } 702*0b57cec5SDimitry Andric virtual void deallocate_mask(Mask *m) {} 703*0b57cec5SDimitry Andric virtual Mask *allocate_mask_array(int num) { return nullptr; } 704*0b57cec5SDimitry Andric virtual void deallocate_mask_array(Mask *m) {} 705*0b57cec5SDimitry Andric virtual Mask *index_mask_array(Mask *m, int index) { return nullptr; } 706*0b57cec5SDimitry Andric static void pick_api(); 707*0b57cec5SDimitry Andric static void destroy_api(); 708*0b57cec5SDimitry Andric enum api_type { 709*0b57cec5SDimitry Andric NATIVE_OS 710*0b57cec5SDimitry Andric #if KMP_USE_HWLOC 711*0b57cec5SDimitry Andric , 712*0b57cec5SDimitry Andric HWLOC 713*0b57cec5SDimitry Andric #endif 714*0b57cec5SDimitry Andric }; 715*0b57cec5SDimitry Andric virtual api_type get_api_type() const { 716*0b57cec5SDimitry Andric KMP_ASSERT(0); 717*0b57cec5SDimitry Andric return NATIVE_OS; 718*0b57cec5SDimitry Andric } 719*0b57cec5SDimitry Andric 720*0b57cec5SDimitry Andric private: 721*0b57cec5SDimitry Andric static bool picked_api; 722*0b57cec5SDimitry Andric }; 723*0b57cec5SDimitry Andric 724*0b57cec5SDimitry Andric typedef KMPAffinity::Mask kmp_affin_mask_t; 725*0b57cec5SDimitry Andric extern KMPAffinity *__kmp_affinity_dispatch; 726*0b57cec5SDimitry Andric 727*0b57cec5SDimitry Andric // Declare local char buffers with this size for printing debug and info 728*0b57cec5SDimitry Andric // messages, using __kmp_affinity_print_mask(). 729*0b57cec5SDimitry Andric #define KMP_AFFIN_MASK_PRINT_LEN 1024 730*0b57cec5SDimitry Andric 731*0b57cec5SDimitry Andric enum affinity_type { 732*0b57cec5SDimitry Andric affinity_none = 0, 733*0b57cec5SDimitry Andric affinity_physical, 734*0b57cec5SDimitry Andric affinity_logical, 735*0b57cec5SDimitry Andric affinity_compact, 736*0b57cec5SDimitry Andric affinity_scatter, 737*0b57cec5SDimitry Andric affinity_explicit, 738*0b57cec5SDimitry Andric affinity_balanced, 739*0b57cec5SDimitry Andric affinity_disabled, // not used outsize the env var parser 740*0b57cec5SDimitry Andric affinity_default 741*0b57cec5SDimitry Andric }; 742*0b57cec5SDimitry Andric 743*0b57cec5SDimitry Andric enum affinity_gran { 744*0b57cec5SDimitry Andric affinity_gran_fine = 0, 745*0b57cec5SDimitry Andric affinity_gran_thread, 746*0b57cec5SDimitry Andric affinity_gran_core, 747*0b57cec5SDimitry Andric affinity_gran_tile, 748*0b57cec5SDimitry Andric affinity_gran_numa, 749*0b57cec5SDimitry Andric affinity_gran_package, 750*0b57cec5SDimitry Andric affinity_gran_node, 751*0b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY 752*0b57cec5SDimitry Andric // The "group" granularity isn't necesssarily coarser than all of the 753*0b57cec5SDimitry Andric // other levels, but we put it last in the enum. 754*0b57cec5SDimitry Andric affinity_gran_group, 755*0b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */ 756*0b57cec5SDimitry Andric affinity_gran_default 757*0b57cec5SDimitry Andric }; 758*0b57cec5SDimitry Andric 759*0b57cec5SDimitry Andric enum affinity_top_method { 760*0b57cec5SDimitry Andric affinity_top_method_all = 0, // try all (supported) methods, in order 761*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 762*0b57cec5SDimitry Andric affinity_top_method_apicid, 763*0b57cec5SDimitry Andric affinity_top_method_x2apicid, 764*0b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 765*0b57cec5SDimitry Andric affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too 766*0b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY 767*0b57cec5SDimitry Andric affinity_top_method_group, 768*0b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */ 769*0b57cec5SDimitry Andric affinity_top_method_flat, 770*0b57cec5SDimitry Andric #if KMP_USE_HWLOC 771*0b57cec5SDimitry Andric affinity_top_method_hwloc, 772*0b57cec5SDimitry Andric #endif 773*0b57cec5SDimitry Andric affinity_top_method_default 774*0b57cec5SDimitry Andric }; 775*0b57cec5SDimitry Andric 776*0b57cec5SDimitry Andric #define affinity_respect_mask_default (-1) 777*0b57cec5SDimitry Andric 778*0b57cec5SDimitry Andric extern enum affinity_type __kmp_affinity_type; /* Affinity type */ 779*0b57cec5SDimitry Andric extern enum affinity_gran __kmp_affinity_gran; /* Affinity granularity */ 780*0b57cec5SDimitry Andric extern int __kmp_affinity_gran_levels; /* corresponding int value */ 781*0b57cec5SDimitry Andric extern int __kmp_affinity_dups; /* Affinity duplicate masks */ 782*0b57cec5SDimitry Andric extern enum affinity_top_method __kmp_affinity_top_method; 783*0b57cec5SDimitry Andric extern int __kmp_affinity_compact; /* Affinity 'compact' value */ 784*0b57cec5SDimitry Andric extern int __kmp_affinity_offset; /* Affinity offset value */ 785*0b57cec5SDimitry Andric extern int __kmp_affinity_verbose; /* Was verbose specified for KMP_AFFINITY? */ 786*0b57cec5SDimitry Andric extern int __kmp_affinity_warnings; /* KMP_AFFINITY warnings enabled ? */ 787*0b57cec5SDimitry Andric extern int __kmp_affinity_respect_mask; // Respect process' init affinity mask? 788*0b57cec5SDimitry Andric extern char *__kmp_affinity_proclist; /* proc ID list */ 789*0b57cec5SDimitry Andric extern kmp_affin_mask_t *__kmp_affinity_masks; 790*0b57cec5SDimitry Andric extern unsigned __kmp_affinity_num_masks; 791*0b57cec5SDimitry Andric extern void __kmp_affinity_bind_thread(int which); 792*0b57cec5SDimitry Andric 793*0b57cec5SDimitry Andric extern kmp_affin_mask_t *__kmp_affin_fullMask; 794*0b57cec5SDimitry Andric extern char *__kmp_cpuinfo_file; 795*0b57cec5SDimitry Andric 796*0b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 797*0b57cec5SDimitry Andric 798*0b57cec5SDimitry Andric // This needs to be kept in sync with the values in omp.h !!! 799*0b57cec5SDimitry Andric typedef enum kmp_proc_bind_t { 800*0b57cec5SDimitry Andric proc_bind_false = 0, 801*0b57cec5SDimitry Andric proc_bind_true, 802*0b57cec5SDimitry Andric proc_bind_master, 803*0b57cec5SDimitry Andric proc_bind_close, 804*0b57cec5SDimitry Andric proc_bind_spread, 805*0b57cec5SDimitry Andric proc_bind_intel, // use KMP_AFFINITY interface 806*0b57cec5SDimitry Andric proc_bind_default 807*0b57cec5SDimitry Andric } kmp_proc_bind_t; 808*0b57cec5SDimitry Andric 809*0b57cec5SDimitry Andric typedef struct kmp_nested_proc_bind_t { 810*0b57cec5SDimitry Andric kmp_proc_bind_t *bind_types; 811*0b57cec5SDimitry Andric int size; 812*0b57cec5SDimitry Andric int used; 813*0b57cec5SDimitry Andric } kmp_nested_proc_bind_t; 814*0b57cec5SDimitry Andric 815*0b57cec5SDimitry Andric extern kmp_nested_proc_bind_t __kmp_nested_proc_bind; 816*0b57cec5SDimitry Andric 817*0b57cec5SDimitry Andric extern int __kmp_display_affinity; 818*0b57cec5SDimitry Andric extern char *__kmp_affinity_format; 819*0b57cec5SDimitry Andric static const size_t KMP_AFFINITY_FORMAT_SIZE = 512; 820*0b57cec5SDimitry Andric 821*0b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 822*0b57cec5SDimitry Andric #define KMP_PLACE_ALL (-1) 823*0b57cec5SDimitry Andric #define KMP_PLACE_UNDEFINED (-2) 824*0b57cec5SDimitry Andric // Is KMP_AFFINITY is being used instead of OMP_PROC_BIND/OMP_PLACES? 825*0b57cec5SDimitry Andric #define KMP_AFFINITY_NON_PROC_BIND \ 826*0b57cec5SDimitry Andric ((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false || \ 827*0b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) && \ 828*0b57cec5SDimitry Andric (__kmp_affinity_num_masks > 0 || __kmp_affinity_type == affinity_balanced)) 829*0b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 830*0b57cec5SDimitry Andric 831*0b57cec5SDimitry Andric extern int __kmp_affinity_num_places; 832*0b57cec5SDimitry Andric 833*0b57cec5SDimitry Andric typedef enum kmp_cancel_kind_t { 834*0b57cec5SDimitry Andric cancel_noreq = 0, 835*0b57cec5SDimitry Andric cancel_parallel = 1, 836*0b57cec5SDimitry Andric cancel_loop = 2, 837*0b57cec5SDimitry Andric cancel_sections = 3, 838*0b57cec5SDimitry Andric cancel_taskgroup = 4 839*0b57cec5SDimitry Andric } kmp_cancel_kind_t; 840*0b57cec5SDimitry Andric 841*0b57cec5SDimitry Andric // KMP_HW_SUBSET support: 842*0b57cec5SDimitry Andric typedef struct kmp_hws_item { 843*0b57cec5SDimitry Andric int num; 844*0b57cec5SDimitry Andric int offset; 845*0b57cec5SDimitry Andric } kmp_hws_item_t; 846*0b57cec5SDimitry Andric 847*0b57cec5SDimitry Andric extern kmp_hws_item_t __kmp_hws_socket; 848*0b57cec5SDimitry Andric extern kmp_hws_item_t __kmp_hws_node; 849*0b57cec5SDimitry Andric extern kmp_hws_item_t __kmp_hws_tile; 850*0b57cec5SDimitry Andric extern kmp_hws_item_t __kmp_hws_core; 851*0b57cec5SDimitry Andric extern kmp_hws_item_t __kmp_hws_proc; 852*0b57cec5SDimitry Andric extern int __kmp_hws_requested; 853*0b57cec5SDimitry Andric extern int __kmp_hws_abs_flag; // absolute or per-item number requested 854*0b57cec5SDimitry Andric 855*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 856*0b57cec5SDimitry Andric 857*0b57cec5SDimitry Andric #define KMP_PAD(type, sz) \ 858*0b57cec5SDimitry Andric (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) 859*0b57cec5SDimitry Andric 860*0b57cec5SDimitry Andric // We need to avoid using -1 as a GTID as +1 is added to the gtid 861*0b57cec5SDimitry Andric // when storing it in a lock, and the value 0 is reserved. 862*0b57cec5SDimitry Andric #define KMP_GTID_DNE (-2) /* Does not exist */ 863*0b57cec5SDimitry Andric #define KMP_GTID_SHUTDOWN (-3) /* Library is shutting down */ 864*0b57cec5SDimitry Andric #define KMP_GTID_MONITOR (-4) /* Monitor thread ID */ 865*0b57cec5SDimitry Andric #define KMP_GTID_UNKNOWN (-5) /* Is not known */ 866*0b57cec5SDimitry Andric #define KMP_GTID_MIN (-6) /* Minimal gtid for low bound check in DEBUG */ 867*0b57cec5SDimitry Andric 868*0b57cec5SDimitry Andric /* OpenMP 5.0 Memory Management support */ 869*0b57cec5SDimitry Andric 870*0b57cec5SDimitry Andric #ifndef __OMP_H 871*0b57cec5SDimitry Andric // Duplicate type definitios from omp.h 872*0b57cec5SDimitry Andric typedef uintptr_t omp_uintptr_t; 873*0b57cec5SDimitry Andric 874*0b57cec5SDimitry Andric typedef enum { 875*0b57cec5SDimitry Andric OMP_ATK_THREADMODEL = 1, 876*0b57cec5SDimitry Andric OMP_ATK_ALIGNMENT = 2, 877*0b57cec5SDimitry Andric OMP_ATK_ACCESS = 3, 878*0b57cec5SDimitry Andric OMP_ATK_POOL_SIZE = 4, 879*0b57cec5SDimitry Andric OMP_ATK_FALLBACK = 5, 880*0b57cec5SDimitry Andric OMP_ATK_FB_DATA = 6, 881*0b57cec5SDimitry Andric OMP_ATK_PINNED = 7, 882*0b57cec5SDimitry Andric OMP_ATK_PARTITION = 8 883*0b57cec5SDimitry Andric } omp_alloctrait_key_t; 884*0b57cec5SDimitry Andric 885*0b57cec5SDimitry Andric typedef enum { 886*0b57cec5SDimitry Andric OMP_ATV_FALSE = 0, 887*0b57cec5SDimitry Andric OMP_ATV_TRUE = 1, 888*0b57cec5SDimitry Andric OMP_ATV_DEFAULT = 2, 889*0b57cec5SDimitry Andric OMP_ATV_CONTENDED = 3, 890*0b57cec5SDimitry Andric OMP_ATV_UNCONTENDED = 4, 891*0b57cec5SDimitry Andric OMP_ATV_SEQUENTIAL = 5, 892*0b57cec5SDimitry Andric OMP_ATV_PRIVATE = 6, 893*0b57cec5SDimitry Andric OMP_ATV_ALL = 7, 894*0b57cec5SDimitry Andric OMP_ATV_THREAD = 8, 895*0b57cec5SDimitry Andric OMP_ATV_PTEAM = 9, 896*0b57cec5SDimitry Andric OMP_ATV_CGROUP = 10, 897*0b57cec5SDimitry Andric OMP_ATV_DEFAULT_MEM_FB = 11, 898*0b57cec5SDimitry Andric OMP_ATV_NULL_FB = 12, 899*0b57cec5SDimitry Andric OMP_ATV_ABORT_FB = 13, 900*0b57cec5SDimitry Andric OMP_ATV_ALLOCATOR_FB = 14, 901*0b57cec5SDimitry Andric OMP_ATV_ENVIRONMENT = 15, 902*0b57cec5SDimitry Andric OMP_ATV_NEAREST = 16, 903*0b57cec5SDimitry Andric OMP_ATV_BLOCKED = 17, 904*0b57cec5SDimitry Andric OMP_ATV_INTERLEAVED = 18 905*0b57cec5SDimitry Andric } omp_alloctrait_value_t; 906*0b57cec5SDimitry Andric 907*0b57cec5SDimitry Andric typedef void *omp_memspace_handle_t; 908*0b57cec5SDimitry Andric extern omp_memspace_handle_t const omp_default_mem_space; 909*0b57cec5SDimitry Andric extern omp_memspace_handle_t const omp_large_cap_mem_space; 910*0b57cec5SDimitry Andric extern omp_memspace_handle_t const omp_const_mem_space; 911*0b57cec5SDimitry Andric extern omp_memspace_handle_t const omp_high_bw_mem_space; 912*0b57cec5SDimitry Andric extern omp_memspace_handle_t const omp_low_lat_mem_space; 913*0b57cec5SDimitry Andric 914*0b57cec5SDimitry Andric typedef struct { 915*0b57cec5SDimitry Andric omp_alloctrait_key_t key; 916*0b57cec5SDimitry Andric omp_uintptr_t value; 917*0b57cec5SDimitry Andric } omp_alloctrait_t; 918*0b57cec5SDimitry Andric 919*0b57cec5SDimitry Andric typedef void *omp_allocator_handle_t; 920*0b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_null_allocator; 921*0b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_default_mem_alloc; 922*0b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_large_cap_mem_alloc; 923*0b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_const_mem_alloc; 924*0b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_high_bw_mem_alloc; 925*0b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_low_lat_mem_alloc; 926*0b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_cgroup_mem_alloc; 927*0b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_pteam_mem_alloc; 928*0b57cec5SDimitry Andric extern omp_allocator_handle_t const omp_thread_mem_alloc; 929*0b57cec5SDimitry Andric extern omp_allocator_handle_t const kmp_max_mem_alloc; 930*0b57cec5SDimitry Andric extern omp_allocator_handle_t __kmp_def_allocator; 931*0b57cec5SDimitry Andric 932*0b57cec5SDimitry Andric // end of duplicate type definitios from omp.h 933*0b57cec5SDimitry Andric #endif 934*0b57cec5SDimitry Andric 935*0b57cec5SDimitry Andric extern int __kmp_memkind_available; 936*0b57cec5SDimitry Andric 937*0b57cec5SDimitry Andric typedef omp_memspace_handle_t kmp_memspace_t; // placeholder 938*0b57cec5SDimitry Andric 939*0b57cec5SDimitry Andric typedef struct kmp_allocator_t { 940*0b57cec5SDimitry Andric omp_memspace_handle_t memspace; 941*0b57cec5SDimitry Andric void **memkind; // pointer to memkind 942*0b57cec5SDimitry Andric int alignment; 943*0b57cec5SDimitry Andric omp_alloctrait_value_t fb; 944*0b57cec5SDimitry Andric kmp_allocator_t *fb_data; 945*0b57cec5SDimitry Andric kmp_uint64 pool_size; 946*0b57cec5SDimitry Andric kmp_uint64 pool_used; 947*0b57cec5SDimitry Andric } kmp_allocator_t; 948*0b57cec5SDimitry Andric 949*0b57cec5SDimitry Andric extern omp_allocator_handle_t __kmpc_init_allocator(int gtid, 950*0b57cec5SDimitry Andric omp_memspace_handle_t, 951*0b57cec5SDimitry Andric int ntraits, 952*0b57cec5SDimitry Andric omp_alloctrait_t traits[]); 953*0b57cec5SDimitry Andric extern void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al); 954*0b57cec5SDimitry Andric extern void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t al); 955*0b57cec5SDimitry Andric extern omp_allocator_handle_t __kmpc_get_default_allocator(int gtid); 956*0b57cec5SDimitry Andric extern void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 957*0b57cec5SDimitry Andric extern void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 958*0b57cec5SDimitry Andric 959*0b57cec5SDimitry Andric extern void __kmp_init_memkind(); 960*0b57cec5SDimitry Andric extern void __kmp_fini_memkind(); 961*0b57cec5SDimitry Andric 962*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 963*0b57cec5SDimitry Andric 964*0b57cec5SDimitry Andric #define KMP_UINT64_MAX \ 965*0b57cec5SDimitry Andric (~((kmp_uint64)1 << ((sizeof(kmp_uint64) * (1 << 3)) - 1))) 966*0b57cec5SDimitry Andric 967*0b57cec5SDimitry Andric #define KMP_MIN_NTH 1 968*0b57cec5SDimitry Andric 969*0b57cec5SDimitry Andric #ifndef KMP_MAX_NTH 970*0b57cec5SDimitry Andric #if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX 971*0b57cec5SDimitry Andric #define KMP_MAX_NTH PTHREAD_THREADS_MAX 972*0b57cec5SDimitry Andric #else 973*0b57cec5SDimitry Andric #define KMP_MAX_NTH INT_MAX 974*0b57cec5SDimitry Andric #endif 975*0b57cec5SDimitry Andric #endif /* KMP_MAX_NTH */ 976*0b57cec5SDimitry Andric 977*0b57cec5SDimitry Andric #ifdef PTHREAD_STACK_MIN 978*0b57cec5SDimitry Andric #define KMP_MIN_STKSIZE PTHREAD_STACK_MIN 979*0b57cec5SDimitry Andric #else 980*0b57cec5SDimitry Andric #define KMP_MIN_STKSIZE ((size_t)(32 * 1024)) 981*0b57cec5SDimitry Andric #endif 982*0b57cec5SDimitry Andric 983*0b57cec5SDimitry Andric #define KMP_MAX_STKSIZE (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1))) 984*0b57cec5SDimitry Andric 985*0b57cec5SDimitry Andric #if KMP_ARCH_X86 986*0b57cec5SDimitry Andric #define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024)) 987*0b57cec5SDimitry Andric #elif KMP_ARCH_X86_64 988*0b57cec5SDimitry Andric #define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024)) 989*0b57cec5SDimitry Andric #define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024)) 990*0b57cec5SDimitry Andric #else 991*0b57cec5SDimitry Andric #define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024)) 992*0b57cec5SDimitry Andric #endif 993*0b57cec5SDimitry Andric 994*0b57cec5SDimitry Andric #define KMP_DEFAULT_MALLOC_POOL_INCR ((size_t)(1024 * 1024)) 995*0b57cec5SDimitry Andric #define KMP_MIN_MALLOC_POOL_INCR ((size_t)(4 * 1024)) 996*0b57cec5SDimitry Andric #define KMP_MAX_MALLOC_POOL_INCR \ 997*0b57cec5SDimitry Andric (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1))) 998*0b57cec5SDimitry Andric 999*0b57cec5SDimitry Andric #define KMP_MIN_STKOFFSET (0) 1000*0b57cec5SDimitry Andric #define KMP_MAX_STKOFFSET KMP_MAX_STKSIZE 1001*0b57cec5SDimitry Andric #if KMP_OS_DARWIN 1002*0b57cec5SDimitry Andric #define KMP_DEFAULT_STKOFFSET KMP_MIN_STKOFFSET 1003*0b57cec5SDimitry Andric #else 1004*0b57cec5SDimitry Andric #define KMP_DEFAULT_STKOFFSET CACHE_LINE 1005*0b57cec5SDimitry Andric #endif 1006*0b57cec5SDimitry Andric 1007*0b57cec5SDimitry Andric #define KMP_MIN_STKPADDING (0) 1008*0b57cec5SDimitry Andric #define KMP_MAX_STKPADDING (2 * 1024 * 1024) 1009*0b57cec5SDimitry Andric 1010*0b57cec5SDimitry Andric #define KMP_BLOCKTIME_MULTIPLIER \ 1011*0b57cec5SDimitry Andric (1000) /* number of blocktime units per second */ 1012*0b57cec5SDimitry Andric #define KMP_MIN_BLOCKTIME (0) 1013*0b57cec5SDimitry Andric #define KMP_MAX_BLOCKTIME \ 1014*0b57cec5SDimitry Andric (INT_MAX) /* Must be this for "infinite" setting the work */ 1015*0b57cec5SDimitry Andric #define KMP_DEFAULT_BLOCKTIME (200) /* __kmp_blocktime is in milliseconds */ 1016*0b57cec5SDimitry Andric 1017*0b57cec5SDimitry Andric #if KMP_USE_MONITOR 1018*0b57cec5SDimitry Andric #define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024)) 1019*0b57cec5SDimitry Andric #define KMP_MIN_MONITOR_WAKEUPS (1) // min times monitor wakes up per second 1020*0b57cec5SDimitry Andric #define KMP_MAX_MONITOR_WAKEUPS (1000) // max times monitor can wake up per sec 1021*0b57cec5SDimitry Andric 1022*0b57cec5SDimitry Andric /* Calculate new number of monitor wakeups for a specific block time based on 1023*0b57cec5SDimitry Andric previous monitor_wakeups. Only allow increasing number of wakeups */ 1024*0b57cec5SDimitry Andric #define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \ 1025*0b57cec5SDimitry Andric (((blocktime) == KMP_MAX_BLOCKTIME) \ 1026*0b57cec5SDimitry Andric ? (monitor_wakeups) \ 1027*0b57cec5SDimitry Andric : ((blocktime) == KMP_MIN_BLOCKTIME) \ 1028*0b57cec5SDimitry Andric ? KMP_MAX_MONITOR_WAKEUPS \ 1029*0b57cec5SDimitry Andric : ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) \ 1030*0b57cec5SDimitry Andric ? (monitor_wakeups) \ 1031*0b57cec5SDimitry Andric : (KMP_BLOCKTIME_MULTIPLIER) / (blocktime)) 1032*0b57cec5SDimitry Andric 1033*0b57cec5SDimitry Andric /* Calculate number of intervals for a specific block time based on 1034*0b57cec5SDimitry Andric monitor_wakeups */ 1035*0b57cec5SDimitry Andric #define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \ 1036*0b57cec5SDimitry Andric (((blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1) / \ 1037*0b57cec5SDimitry Andric (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups))) 1038*0b57cec5SDimitry Andric #else 1039*0b57cec5SDimitry Andric #define KMP_BLOCKTIME(team, tid) \ 1040*0b57cec5SDimitry Andric (get__bt_set(team, tid) ? get__blocktime(team, tid) : __kmp_dflt_blocktime) 1041*0b57cec5SDimitry Andric #if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) 1042*0b57cec5SDimitry Andric // HW TSC is used to reduce overhead (clock tick instead of nanosecond). 1043*0b57cec5SDimitry Andric extern kmp_uint64 __kmp_ticks_per_msec; 1044*0b57cec5SDimitry Andric #if KMP_COMPILER_ICC 1045*0b57cec5SDimitry Andric #define KMP_NOW() ((kmp_uint64)_rdtsc()) 1046*0b57cec5SDimitry Andric #else 1047*0b57cec5SDimitry Andric #define KMP_NOW() __kmp_hardware_timestamp() 1048*0b57cec5SDimitry Andric #endif 1049*0b57cec5SDimitry Andric #define KMP_NOW_MSEC() (KMP_NOW() / __kmp_ticks_per_msec) 1050*0b57cec5SDimitry Andric #define KMP_BLOCKTIME_INTERVAL(team, tid) \ 1051*0b57cec5SDimitry Andric (KMP_BLOCKTIME(team, tid) * __kmp_ticks_per_msec) 1052*0b57cec5SDimitry Andric #define KMP_BLOCKING(goal, count) ((goal) > KMP_NOW()) 1053*0b57cec5SDimitry Andric #else 1054*0b57cec5SDimitry Andric // System time is retrieved sporadically while blocking. 1055*0b57cec5SDimitry Andric extern kmp_uint64 __kmp_now_nsec(); 1056*0b57cec5SDimitry Andric #define KMP_NOW() __kmp_now_nsec() 1057*0b57cec5SDimitry Andric #define KMP_NOW_MSEC() (KMP_NOW() / KMP_USEC_PER_SEC) 1058*0b57cec5SDimitry Andric #define KMP_BLOCKTIME_INTERVAL(team, tid) \ 1059*0b57cec5SDimitry Andric (KMP_BLOCKTIME(team, tid) * KMP_USEC_PER_SEC) 1060*0b57cec5SDimitry Andric #define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW()) 1061*0b57cec5SDimitry Andric #endif 1062*0b57cec5SDimitry Andric #endif // KMP_USE_MONITOR 1063*0b57cec5SDimitry Andric 1064*0b57cec5SDimitry Andric #define KMP_MIN_STATSCOLS 40 1065*0b57cec5SDimitry Andric #define KMP_MAX_STATSCOLS 4096 1066*0b57cec5SDimitry Andric #define KMP_DEFAULT_STATSCOLS 80 1067*0b57cec5SDimitry Andric 1068*0b57cec5SDimitry Andric #define KMP_MIN_INTERVAL 0 1069*0b57cec5SDimitry Andric #define KMP_MAX_INTERVAL (INT_MAX - 1) 1070*0b57cec5SDimitry Andric #define KMP_DEFAULT_INTERVAL 0 1071*0b57cec5SDimitry Andric 1072*0b57cec5SDimitry Andric #define KMP_MIN_CHUNK 1 1073*0b57cec5SDimitry Andric #define KMP_MAX_CHUNK (INT_MAX - 1) 1074*0b57cec5SDimitry Andric #define KMP_DEFAULT_CHUNK 1 1075*0b57cec5SDimitry Andric 1076*0b57cec5SDimitry Andric #define KMP_DFLT_DISP_NUM_BUFF 7 1077*0b57cec5SDimitry Andric #define KMP_MAX_ORDERED 8 1078*0b57cec5SDimitry Andric 1079*0b57cec5SDimitry Andric #define KMP_MAX_FIELDS 32 1080*0b57cec5SDimitry Andric 1081*0b57cec5SDimitry Andric #define KMP_MAX_BRANCH_BITS 31 1082*0b57cec5SDimitry Andric 1083*0b57cec5SDimitry Andric #define KMP_MAX_ACTIVE_LEVELS_LIMIT INT_MAX 1084*0b57cec5SDimitry Andric 1085*0b57cec5SDimitry Andric #define KMP_MAX_DEFAULT_DEVICE_LIMIT INT_MAX 1086*0b57cec5SDimitry Andric 1087*0b57cec5SDimitry Andric #define KMP_MAX_TASK_PRIORITY_LIMIT INT_MAX 1088*0b57cec5SDimitry Andric 1089*0b57cec5SDimitry Andric /* Minimum number of threads before switch to TLS gtid (experimentally 1090*0b57cec5SDimitry Andric determined) */ 1091*0b57cec5SDimitry Andric /* josh TODO: what about OS X* tuning? */ 1092*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1093*0b57cec5SDimitry Andric #define KMP_TLS_GTID_MIN 5 1094*0b57cec5SDimitry Andric #else 1095*0b57cec5SDimitry Andric #define KMP_TLS_GTID_MIN INT_MAX 1096*0b57cec5SDimitry Andric #endif 1097*0b57cec5SDimitry Andric 1098*0b57cec5SDimitry Andric #define KMP_MASTER_TID(tid) ((tid) == 0) 1099*0b57cec5SDimitry Andric #define KMP_WORKER_TID(tid) ((tid) != 0) 1100*0b57cec5SDimitry Andric 1101*0b57cec5SDimitry Andric #define KMP_MASTER_GTID(gtid) (__kmp_tid_from_gtid((gtid)) == 0) 1102*0b57cec5SDimitry Andric #define KMP_WORKER_GTID(gtid) (__kmp_tid_from_gtid((gtid)) != 0) 1103*0b57cec5SDimitry Andric #define KMP_INITIAL_GTID(gtid) ((gtid) == 0) 1104*0b57cec5SDimitry Andric 1105*0b57cec5SDimitry Andric #ifndef TRUE 1106*0b57cec5SDimitry Andric #define FALSE 0 1107*0b57cec5SDimitry Andric #define TRUE (!FALSE) 1108*0b57cec5SDimitry Andric #endif 1109*0b57cec5SDimitry Andric 1110*0b57cec5SDimitry Andric /* NOTE: all of the following constants must be even */ 1111*0b57cec5SDimitry Andric 1112*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS 1113*0b57cec5SDimitry Andric #define KMP_INIT_WAIT 64U /* initial number of spin-tests */ 1114*0b57cec5SDimitry Andric #define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */ 1115*0b57cec5SDimitry Andric #elif KMP_OS_CNK 1116*0b57cec5SDimitry Andric #define KMP_INIT_WAIT 16U /* initial number of spin-tests */ 1117*0b57cec5SDimitry Andric #define KMP_NEXT_WAIT 8U /* susequent number of spin-tests */ 1118*0b57cec5SDimitry Andric #elif KMP_OS_LINUX 1119*0b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ 1120*0b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ 1121*0b57cec5SDimitry Andric #elif KMP_OS_DARWIN 1122*0b57cec5SDimitry Andric /* TODO: tune for KMP_OS_DARWIN */ 1123*0b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ 1124*0b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ 1125*0b57cec5SDimitry Andric #elif KMP_OS_DRAGONFLY 1126*0b57cec5SDimitry Andric /* TODO: tune for KMP_OS_DRAGONFLY */ 1127*0b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ 1128*0b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ 1129*0b57cec5SDimitry Andric #elif KMP_OS_FREEBSD 1130*0b57cec5SDimitry Andric /* TODO: tune for KMP_OS_FREEBSD */ 1131*0b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ 1132*0b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ 1133*0b57cec5SDimitry Andric #elif KMP_OS_NETBSD 1134*0b57cec5SDimitry Andric /* TODO: tune for KMP_OS_NETBSD */ 1135*0b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ 1136*0b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ 1137*0b57cec5SDimitry Andric #elif KMP_OS_HURD 1138*0b57cec5SDimitry Andric /* TODO: tune for KMP_OS_HURD */ 1139*0b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ 1140*0b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ 1141*0b57cec5SDimitry Andric #elif KMP_OS_OPENBSD 1142*0b57cec5SDimitry Andric /* TODO: tune for KMP_OS_OPENBSD */ 1143*0b57cec5SDimitry Andric #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ 1144*0b57cec5SDimitry Andric #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ 1145*0b57cec5SDimitry Andric #endif 1146*0b57cec5SDimitry Andric 1147*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1148*0b57cec5SDimitry Andric typedef struct kmp_cpuid { 1149*0b57cec5SDimitry Andric kmp_uint32 eax; 1150*0b57cec5SDimitry Andric kmp_uint32 ebx; 1151*0b57cec5SDimitry Andric kmp_uint32 ecx; 1152*0b57cec5SDimitry Andric kmp_uint32 edx; 1153*0b57cec5SDimitry Andric } kmp_cpuid_t; 1154*0b57cec5SDimitry Andric 1155*0b57cec5SDimitry Andric typedef struct kmp_cpuinfo { 1156*0b57cec5SDimitry Andric int initialized; // If 0, other fields are not initialized. 1157*0b57cec5SDimitry Andric int signature; // CPUID(1).EAX 1158*0b57cec5SDimitry Andric int family; // CPUID(1).EAX[27:20]+CPUID(1).EAX[11:8] (Extended Family+Family) 1159*0b57cec5SDimitry Andric int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended 1160*0b57cec5SDimitry Andric // Model << 4 ) + Model) 1161*0b57cec5SDimitry Andric int stepping; // CPUID(1).EAX[3:0] ( Stepping ) 1162*0b57cec5SDimitry Andric int sse2; // 0 if SSE2 instructions are not supported, 1 otherwise. 1163*0b57cec5SDimitry Andric int rtm; // 0 if RTM instructions are not supported, 1 otherwise. 1164*0b57cec5SDimitry Andric int cpu_stackoffset; 1165*0b57cec5SDimitry Andric int apic_id; 1166*0b57cec5SDimitry Andric int physical_id; 1167*0b57cec5SDimitry Andric int logical_id; 1168*0b57cec5SDimitry Andric kmp_uint64 frequency; // Nominal CPU frequency in Hz. 1169*0b57cec5SDimitry Andric char name[3 * sizeof(kmp_cpuid_t)]; // CPUID(0x80000002,0x80000003,0x80000004) 1170*0b57cec5SDimitry Andric } kmp_cpuinfo_t; 1171*0b57cec5SDimitry Andric 1172*0b57cec5SDimitry Andric extern void __kmp_query_cpuid(kmp_cpuinfo_t *p); 1173*0b57cec5SDimitry Andric 1174*0b57cec5SDimitry Andric #if KMP_OS_UNIX 1175*0b57cec5SDimitry Andric // subleaf is only needed for cache and topology discovery and can be set to 1176*0b57cec5SDimitry Andric // zero in most cases 1177*0b57cec5SDimitry Andric static inline void __kmp_x86_cpuid(int leaf, int subleaf, struct kmp_cpuid *p) { 1178*0b57cec5SDimitry Andric __asm__ __volatile__("cpuid" 1179*0b57cec5SDimitry Andric : "=a"(p->eax), "=b"(p->ebx), "=c"(p->ecx), "=d"(p->edx) 1180*0b57cec5SDimitry Andric : "a"(leaf), "c"(subleaf)); 1181*0b57cec5SDimitry Andric } 1182*0b57cec5SDimitry Andric // Load p into FPU control word 1183*0b57cec5SDimitry Andric static inline void __kmp_load_x87_fpu_control_word(const kmp_int16 *p) { 1184*0b57cec5SDimitry Andric __asm__ __volatile__("fldcw %0" : : "m"(*p)); 1185*0b57cec5SDimitry Andric } 1186*0b57cec5SDimitry Andric // Store FPU control word into p 1187*0b57cec5SDimitry Andric static inline void __kmp_store_x87_fpu_control_word(kmp_int16 *p) { 1188*0b57cec5SDimitry Andric __asm__ __volatile__("fstcw %0" : "=m"(*p)); 1189*0b57cec5SDimitry Andric } 1190*0b57cec5SDimitry Andric static inline void __kmp_clear_x87_fpu_status_word() { 1191*0b57cec5SDimitry Andric #if KMP_MIC 1192*0b57cec5SDimitry Andric // 32-bit protected mode x87 FPU state 1193*0b57cec5SDimitry Andric struct x87_fpu_state { 1194*0b57cec5SDimitry Andric unsigned cw; 1195*0b57cec5SDimitry Andric unsigned sw; 1196*0b57cec5SDimitry Andric unsigned tw; 1197*0b57cec5SDimitry Andric unsigned fip; 1198*0b57cec5SDimitry Andric unsigned fips; 1199*0b57cec5SDimitry Andric unsigned fdp; 1200*0b57cec5SDimitry Andric unsigned fds; 1201*0b57cec5SDimitry Andric }; 1202*0b57cec5SDimitry Andric struct x87_fpu_state fpu_state = {0, 0, 0, 0, 0, 0, 0}; 1203*0b57cec5SDimitry Andric __asm__ __volatile__("fstenv %0\n\t" // store FP env 1204*0b57cec5SDimitry Andric "andw $0x7f00, %1\n\t" // clear 0-7,15 bits of FP SW 1205*0b57cec5SDimitry Andric "fldenv %0\n\t" // load FP env back 1206*0b57cec5SDimitry Andric : "+m"(fpu_state), "+m"(fpu_state.sw)); 1207*0b57cec5SDimitry Andric #else 1208*0b57cec5SDimitry Andric __asm__ __volatile__("fnclex"); 1209*0b57cec5SDimitry Andric #endif // KMP_MIC 1210*0b57cec5SDimitry Andric } 1211*0b57cec5SDimitry Andric #if __SSE__ 1212*0b57cec5SDimitry Andric static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); } 1213*0b57cec5SDimitry Andric static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); } 1214*0b57cec5SDimitry Andric #else 1215*0b57cec5SDimitry Andric static inline void __kmp_load_mxcsr(const kmp_uint32 *p) {} 1216*0b57cec5SDimitry Andric static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = 0; } 1217*0b57cec5SDimitry Andric #endif 1218*0b57cec5SDimitry Andric #else 1219*0b57cec5SDimitry Andric // Windows still has these as external functions in assembly file 1220*0b57cec5SDimitry Andric extern void __kmp_x86_cpuid(int mode, int mode2, struct kmp_cpuid *p); 1221*0b57cec5SDimitry Andric extern void __kmp_load_x87_fpu_control_word(const kmp_int16 *p); 1222*0b57cec5SDimitry Andric extern void __kmp_store_x87_fpu_control_word(kmp_int16 *p); 1223*0b57cec5SDimitry Andric extern void __kmp_clear_x87_fpu_status_word(); 1224*0b57cec5SDimitry Andric static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); } 1225*0b57cec5SDimitry Andric static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); } 1226*0b57cec5SDimitry Andric #endif // KMP_OS_UNIX 1227*0b57cec5SDimitry Andric 1228*0b57cec5SDimitry Andric #define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */ 1229*0b57cec5SDimitry Andric 1230*0b57cec5SDimitry Andric #if KMP_ARCH_X86 1231*0b57cec5SDimitry Andric extern void __kmp_x86_pause(void); 1232*0b57cec5SDimitry Andric #elif KMP_MIC 1233*0b57cec5SDimitry Andric // Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed 1234*0b57cec5SDimitry Andric // regression after removal of extra PAUSE from spin loops. Changing 1235*0b57cec5SDimitry Andric // the delay from 100 to 300 showed even better performance than double PAUSE 1236*0b57cec5SDimitry Andric // on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC. 1237*0b57cec5SDimitry Andric static inline void __kmp_x86_pause(void) { _mm_delay_32(300); } 1238*0b57cec5SDimitry Andric #else 1239*0b57cec5SDimitry Andric static inline void __kmp_x86_pause(void) { _mm_pause(); } 1240*0b57cec5SDimitry Andric #endif 1241*0b57cec5SDimitry Andric #define KMP_CPU_PAUSE() __kmp_x86_pause() 1242*0b57cec5SDimitry Andric #elif KMP_ARCH_PPC64 1243*0b57cec5SDimitry Andric #define KMP_PPC64_PRI_LOW() __asm__ volatile("or 1, 1, 1") 1244*0b57cec5SDimitry Andric #define KMP_PPC64_PRI_MED() __asm__ volatile("or 2, 2, 2") 1245*0b57cec5SDimitry Andric #define KMP_PPC64_PRI_LOC_MB() __asm__ volatile("" : : : "memory") 1246*0b57cec5SDimitry Andric #define KMP_CPU_PAUSE() \ 1247*0b57cec5SDimitry Andric do { \ 1248*0b57cec5SDimitry Andric KMP_PPC64_PRI_LOW(); \ 1249*0b57cec5SDimitry Andric KMP_PPC64_PRI_MED(); \ 1250*0b57cec5SDimitry Andric KMP_PPC64_PRI_LOC_MB(); \ 1251*0b57cec5SDimitry Andric } while (0) 1252*0b57cec5SDimitry Andric #else 1253*0b57cec5SDimitry Andric #define KMP_CPU_PAUSE() /* nothing to do */ 1254*0b57cec5SDimitry Andric #endif 1255*0b57cec5SDimitry Andric 1256*0b57cec5SDimitry Andric #define KMP_INIT_YIELD(count) \ 1257*0b57cec5SDimitry Andric { (count) = __kmp_yield_init; } 1258*0b57cec5SDimitry Andric 1259*0b57cec5SDimitry Andric #define KMP_OVERSUBSCRIBED \ 1260*0b57cec5SDimitry Andric (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) 1261*0b57cec5SDimitry Andric 1262*0b57cec5SDimitry Andric #define KMP_TRY_YIELD \ 1263*0b57cec5SDimitry Andric ((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (KMP_OVERSUBSCRIBED))) 1264*0b57cec5SDimitry Andric 1265*0b57cec5SDimitry Andric #define KMP_TRY_YIELD_OVERSUB \ 1266*0b57cec5SDimitry Andric ((__kmp_use_yield == 1 || __kmp_use_yield == 2) && (KMP_OVERSUBSCRIBED)) 1267*0b57cec5SDimitry Andric 1268*0b57cec5SDimitry Andric #define KMP_YIELD(cond) \ 1269*0b57cec5SDimitry Andric { \ 1270*0b57cec5SDimitry Andric KMP_CPU_PAUSE(); \ 1271*0b57cec5SDimitry Andric if ((cond) && (KMP_TRY_YIELD)) \ 1272*0b57cec5SDimitry Andric __kmp_yield(); \ 1273*0b57cec5SDimitry Andric } 1274*0b57cec5SDimitry Andric 1275*0b57cec5SDimitry Andric #define KMP_YIELD_OVERSUB() \ 1276*0b57cec5SDimitry Andric { \ 1277*0b57cec5SDimitry Andric KMP_CPU_PAUSE(); \ 1278*0b57cec5SDimitry Andric if ((KMP_TRY_YIELD_OVERSUB)) \ 1279*0b57cec5SDimitry Andric __kmp_yield(); \ 1280*0b57cec5SDimitry Andric } 1281*0b57cec5SDimitry Andric 1282*0b57cec5SDimitry Andric // Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround, 1283*0b57cec5SDimitry Andric // there should be no yielding since initial value from KMP_INIT_YIELD() is odd. 1284*0b57cec5SDimitry Andric #define KMP_YIELD_SPIN(count) \ 1285*0b57cec5SDimitry Andric { \ 1286*0b57cec5SDimitry Andric KMP_CPU_PAUSE(); \ 1287*0b57cec5SDimitry Andric if (KMP_TRY_YIELD) { \ 1288*0b57cec5SDimitry Andric (count) -= 2; \ 1289*0b57cec5SDimitry Andric if (!(count)) { \ 1290*0b57cec5SDimitry Andric __kmp_yield(); \ 1291*0b57cec5SDimitry Andric (count) = __kmp_yield_next; \ 1292*0b57cec5SDimitry Andric } \ 1293*0b57cec5SDimitry Andric } \ 1294*0b57cec5SDimitry Andric } 1295*0b57cec5SDimitry Andric 1296*0b57cec5SDimitry Andric #define KMP_YIELD_OVERSUB_ELSE_SPIN(count) \ 1297*0b57cec5SDimitry Andric { \ 1298*0b57cec5SDimitry Andric KMP_CPU_PAUSE(); \ 1299*0b57cec5SDimitry Andric if ((KMP_TRY_YIELD_OVERSUB)) \ 1300*0b57cec5SDimitry Andric __kmp_yield(); \ 1301*0b57cec5SDimitry Andric else if (__kmp_use_yield == 1) { \ 1302*0b57cec5SDimitry Andric (count) -= 2; \ 1303*0b57cec5SDimitry Andric if (!(count)) { \ 1304*0b57cec5SDimitry Andric __kmp_yield(); \ 1305*0b57cec5SDimitry Andric (count) = __kmp_yield_next; \ 1306*0b57cec5SDimitry Andric } \ 1307*0b57cec5SDimitry Andric } \ 1308*0b57cec5SDimitry Andric } 1309*0b57cec5SDimitry Andric 1310*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 1311*0b57cec5SDimitry Andric /* Support datatypes for the orphaned construct nesting checks. */ 1312*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 1313*0b57cec5SDimitry Andric 1314*0b57cec5SDimitry Andric enum cons_type { 1315*0b57cec5SDimitry Andric ct_none, 1316*0b57cec5SDimitry Andric ct_parallel, 1317*0b57cec5SDimitry Andric ct_pdo, 1318*0b57cec5SDimitry Andric ct_pdo_ordered, 1319*0b57cec5SDimitry Andric ct_psections, 1320*0b57cec5SDimitry Andric ct_psingle, 1321*0b57cec5SDimitry Andric ct_critical, 1322*0b57cec5SDimitry Andric ct_ordered_in_parallel, 1323*0b57cec5SDimitry Andric ct_ordered_in_pdo, 1324*0b57cec5SDimitry Andric ct_master, 1325*0b57cec5SDimitry Andric ct_reduce, 1326*0b57cec5SDimitry Andric ct_barrier 1327*0b57cec5SDimitry Andric }; 1328*0b57cec5SDimitry Andric 1329*0b57cec5SDimitry Andric #define IS_CONS_TYPE_ORDERED(ct) ((ct) == ct_pdo_ordered) 1330*0b57cec5SDimitry Andric 1331*0b57cec5SDimitry Andric struct cons_data { 1332*0b57cec5SDimitry Andric ident_t const *ident; 1333*0b57cec5SDimitry Andric enum cons_type type; 1334*0b57cec5SDimitry Andric int prev; 1335*0b57cec5SDimitry Andric kmp_user_lock_p 1336*0b57cec5SDimitry Andric name; /* address exclusively for critical section name comparison */ 1337*0b57cec5SDimitry Andric }; 1338*0b57cec5SDimitry Andric 1339*0b57cec5SDimitry Andric struct cons_header { 1340*0b57cec5SDimitry Andric int p_top, w_top, s_top; 1341*0b57cec5SDimitry Andric int stack_size, stack_top; 1342*0b57cec5SDimitry Andric struct cons_data *stack_data; 1343*0b57cec5SDimitry Andric }; 1344*0b57cec5SDimitry Andric 1345*0b57cec5SDimitry Andric struct kmp_region_info { 1346*0b57cec5SDimitry Andric char *text; 1347*0b57cec5SDimitry Andric int offset[KMP_MAX_FIELDS]; 1348*0b57cec5SDimitry Andric int length[KMP_MAX_FIELDS]; 1349*0b57cec5SDimitry Andric }; 1350*0b57cec5SDimitry Andric 1351*0b57cec5SDimitry Andric /* ---------------------------------------------------------------------- */ 1352*0b57cec5SDimitry Andric /* ---------------------------------------------------------------------- */ 1353*0b57cec5SDimitry Andric 1354*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS 1355*0b57cec5SDimitry Andric typedef HANDLE kmp_thread_t; 1356*0b57cec5SDimitry Andric typedef DWORD kmp_key_t; 1357*0b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 1358*0b57cec5SDimitry Andric 1359*0b57cec5SDimitry Andric #if KMP_OS_UNIX 1360*0b57cec5SDimitry Andric typedef pthread_t kmp_thread_t; 1361*0b57cec5SDimitry Andric typedef pthread_key_t kmp_key_t; 1362*0b57cec5SDimitry Andric #endif 1363*0b57cec5SDimitry Andric 1364*0b57cec5SDimitry Andric extern kmp_key_t __kmp_gtid_threadprivate_key; 1365*0b57cec5SDimitry Andric 1366*0b57cec5SDimitry Andric typedef struct kmp_sys_info { 1367*0b57cec5SDimitry Andric long maxrss; /* the maximum resident set size utilized (in kilobytes) */ 1368*0b57cec5SDimitry Andric long minflt; /* the number of page faults serviced without any I/O */ 1369*0b57cec5SDimitry Andric long majflt; /* the number of page faults serviced that required I/O */ 1370*0b57cec5SDimitry Andric long nswap; /* the number of times a process was "swapped" out of memory */ 1371*0b57cec5SDimitry Andric long inblock; /* the number of times the file system had to perform input */ 1372*0b57cec5SDimitry Andric long oublock; /* the number of times the file system had to perform output */ 1373*0b57cec5SDimitry Andric long nvcsw; /* the number of times a context switch was voluntarily */ 1374*0b57cec5SDimitry Andric long nivcsw; /* the number of times a context switch was forced */ 1375*0b57cec5SDimitry Andric } kmp_sys_info_t; 1376*0b57cec5SDimitry Andric 1377*0b57cec5SDimitry Andric #if USE_ITT_BUILD 1378*0b57cec5SDimitry Andric // We cannot include "kmp_itt.h" due to circular dependency. Declare the only 1379*0b57cec5SDimitry Andric // required type here. Later we will check the type meets requirements. 1380*0b57cec5SDimitry Andric typedef int kmp_itt_mark_t; 1381*0b57cec5SDimitry Andric #define KMP_ITT_DEBUG 0 1382*0b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 1383*0b57cec5SDimitry Andric 1384*0b57cec5SDimitry Andric typedef kmp_int32 kmp_critical_name[8]; 1385*0b57cec5SDimitry Andric 1386*0b57cec5SDimitry Andric /*! 1387*0b57cec5SDimitry Andric @ingroup PARALLEL 1388*0b57cec5SDimitry Andric The type for a microtask which gets passed to @ref __kmpc_fork_call(). 1389*0b57cec5SDimitry Andric The arguments to the outlined function are 1390*0b57cec5SDimitry Andric @param global_tid the global thread identity of the thread executing the 1391*0b57cec5SDimitry Andric function. 1392*0b57cec5SDimitry Andric @param bound_tid the local identitiy of the thread executing the function 1393*0b57cec5SDimitry Andric @param ... pointers to shared variables accessed by the function. 1394*0b57cec5SDimitry Andric */ 1395*0b57cec5SDimitry Andric typedef void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...); 1396*0b57cec5SDimitry Andric typedef void (*kmpc_micro_bound)(kmp_int32 *bound_tid, kmp_int32 *bound_nth, 1397*0b57cec5SDimitry Andric ...); 1398*0b57cec5SDimitry Andric 1399*0b57cec5SDimitry Andric /*! 1400*0b57cec5SDimitry Andric @ingroup THREADPRIVATE 1401*0b57cec5SDimitry Andric @{ 1402*0b57cec5SDimitry Andric */ 1403*0b57cec5SDimitry Andric /* --------------------------------------------------------------------------- 1404*0b57cec5SDimitry Andric */ 1405*0b57cec5SDimitry Andric /* Threadprivate initialization/finalization function declarations */ 1406*0b57cec5SDimitry Andric 1407*0b57cec5SDimitry Andric /* for non-array objects: __kmpc_threadprivate_register() */ 1408*0b57cec5SDimitry Andric 1409*0b57cec5SDimitry Andric /*! 1410*0b57cec5SDimitry Andric Pointer to the constructor function. 1411*0b57cec5SDimitry Andric The first argument is the <tt>this</tt> pointer 1412*0b57cec5SDimitry Andric */ 1413*0b57cec5SDimitry Andric typedef void *(*kmpc_ctor)(void *); 1414*0b57cec5SDimitry Andric 1415*0b57cec5SDimitry Andric /*! 1416*0b57cec5SDimitry Andric Pointer to the destructor function. 1417*0b57cec5SDimitry Andric The first argument is the <tt>this</tt> pointer 1418*0b57cec5SDimitry Andric */ 1419*0b57cec5SDimitry Andric typedef void (*kmpc_dtor)( 1420*0b57cec5SDimitry Andric void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel 1421*0b57cec5SDimitry Andric compiler */ 1422*0b57cec5SDimitry Andric /*! 1423*0b57cec5SDimitry Andric Pointer to an alternate constructor. 1424*0b57cec5SDimitry Andric The first argument is the <tt>this</tt> pointer. 1425*0b57cec5SDimitry Andric */ 1426*0b57cec5SDimitry Andric typedef void *(*kmpc_cctor)(void *, void *); 1427*0b57cec5SDimitry Andric 1428*0b57cec5SDimitry Andric /* for array objects: __kmpc_threadprivate_register_vec() */ 1429*0b57cec5SDimitry Andric /* First arg: "this" pointer */ 1430*0b57cec5SDimitry Andric /* Last arg: number of array elements */ 1431*0b57cec5SDimitry Andric /*! 1432*0b57cec5SDimitry Andric Array constructor. 1433*0b57cec5SDimitry Andric First argument is the <tt>this</tt> pointer 1434*0b57cec5SDimitry Andric Second argument the number of array elements. 1435*0b57cec5SDimitry Andric */ 1436*0b57cec5SDimitry Andric typedef void *(*kmpc_ctor_vec)(void *, size_t); 1437*0b57cec5SDimitry Andric /*! 1438*0b57cec5SDimitry Andric Pointer to the array destructor function. 1439*0b57cec5SDimitry Andric The first argument is the <tt>this</tt> pointer 1440*0b57cec5SDimitry Andric Second argument the number of array elements. 1441*0b57cec5SDimitry Andric */ 1442*0b57cec5SDimitry Andric typedef void (*kmpc_dtor_vec)(void *, size_t); 1443*0b57cec5SDimitry Andric /*! 1444*0b57cec5SDimitry Andric Array constructor. 1445*0b57cec5SDimitry Andric First argument is the <tt>this</tt> pointer 1446*0b57cec5SDimitry Andric Third argument the number of array elements. 1447*0b57cec5SDimitry Andric */ 1448*0b57cec5SDimitry Andric typedef void *(*kmpc_cctor_vec)(void *, void *, 1449*0b57cec5SDimitry Andric size_t); /* function unused by compiler */ 1450*0b57cec5SDimitry Andric 1451*0b57cec5SDimitry Andric /*! 1452*0b57cec5SDimitry Andric @} 1453*0b57cec5SDimitry Andric */ 1454*0b57cec5SDimitry Andric 1455*0b57cec5SDimitry Andric /* keeps tracked of threadprivate cache allocations for cleanup later */ 1456*0b57cec5SDimitry Andric typedef struct kmp_cached_addr { 1457*0b57cec5SDimitry Andric void **addr; /* address of allocated cache */ 1458*0b57cec5SDimitry Andric void ***compiler_cache; /* pointer to compiler's cache */ 1459*0b57cec5SDimitry Andric void *data; /* pointer to global data */ 1460*0b57cec5SDimitry Andric struct kmp_cached_addr *next; /* pointer to next cached address */ 1461*0b57cec5SDimitry Andric } kmp_cached_addr_t; 1462*0b57cec5SDimitry Andric 1463*0b57cec5SDimitry Andric struct private_data { 1464*0b57cec5SDimitry Andric struct private_data *next; /* The next descriptor in the list */ 1465*0b57cec5SDimitry Andric void *data; /* The data buffer for this descriptor */ 1466*0b57cec5SDimitry Andric int more; /* The repeat count for this descriptor */ 1467*0b57cec5SDimitry Andric size_t size; /* The data size for this descriptor */ 1468*0b57cec5SDimitry Andric }; 1469*0b57cec5SDimitry Andric 1470*0b57cec5SDimitry Andric struct private_common { 1471*0b57cec5SDimitry Andric struct private_common *next; 1472*0b57cec5SDimitry Andric struct private_common *link; 1473*0b57cec5SDimitry Andric void *gbl_addr; 1474*0b57cec5SDimitry Andric void *par_addr; /* par_addr == gbl_addr for MASTER thread */ 1475*0b57cec5SDimitry Andric size_t cmn_size; 1476*0b57cec5SDimitry Andric }; 1477*0b57cec5SDimitry Andric 1478*0b57cec5SDimitry Andric struct shared_common { 1479*0b57cec5SDimitry Andric struct shared_common *next; 1480*0b57cec5SDimitry Andric struct private_data *pod_init; 1481*0b57cec5SDimitry Andric void *obj_init; 1482*0b57cec5SDimitry Andric void *gbl_addr; 1483*0b57cec5SDimitry Andric union { 1484*0b57cec5SDimitry Andric kmpc_ctor ctor; 1485*0b57cec5SDimitry Andric kmpc_ctor_vec ctorv; 1486*0b57cec5SDimitry Andric } ct; 1487*0b57cec5SDimitry Andric union { 1488*0b57cec5SDimitry Andric kmpc_cctor cctor; 1489*0b57cec5SDimitry Andric kmpc_cctor_vec cctorv; 1490*0b57cec5SDimitry Andric } cct; 1491*0b57cec5SDimitry Andric union { 1492*0b57cec5SDimitry Andric kmpc_dtor dtor; 1493*0b57cec5SDimitry Andric kmpc_dtor_vec dtorv; 1494*0b57cec5SDimitry Andric } dt; 1495*0b57cec5SDimitry Andric size_t vec_len; 1496*0b57cec5SDimitry Andric int is_vec; 1497*0b57cec5SDimitry Andric size_t cmn_size; 1498*0b57cec5SDimitry Andric }; 1499*0b57cec5SDimitry Andric 1500*0b57cec5SDimitry Andric #define KMP_HASH_TABLE_LOG2 9 /* log2 of the hash table size */ 1501*0b57cec5SDimitry Andric #define KMP_HASH_TABLE_SIZE \ 1502*0b57cec5SDimitry Andric (1 << KMP_HASH_TABLE_LOG2) /* size of the hash table */ 1503*0b57cec5SDimitry Andric #define KMP_HASH_SHIFT 3 /* throw away this many low bits from the address */ 1504*0b57cec5SDimitry Andric #define KMP_HASH(x) \ 1505*0b57cec5SDimitry Andric ((((kmp_uintptr_t)x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE - 1)) 1506*0b57cec5SDimitry Andric 1507*0b57cec5SDimitry Andric struct common_table { 1508*0b57cec5SDimitry Andric struct private_common *data[KMP_HASH_TABLE_SIZE]; 1509*0b57cec5SDimitry Andric }; 1510*0b57cec5SDimitry Andric 1511*0b57cec5SDimitry Andric struct shared_table { 1512*0b57cec5SDimitry Andric struct shared_common *data[KMP_HASH_TABLE_SIZE]; 1513*0b57cec5SDimitry Andric }; 1514*0b57cec5SDimitry Andric 1515*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 1516*0b57cec5SDimitry Andric 1517*0b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 1518*0b57cec5SDimitry Andric // Shared barrier data that exists inside a single unit of the scheduling 1519*0b57cec5SDimitry Andric // hierarchy 1520*0b57cec5SDimitry Andric typedef struct kmp_hier_private_bdata_t { 1521*0b57cec5SDimitry Andric kmp_int32 num_active; 1522*0b57cec5SDimitry Andric kmp_uint64 index; 1523*0b57cec5SDimitry Andric kmp_uint64 wait_val[2]; 1524*0b57cec5SDimitry Andric } kmp_hier_private_bdata_t; 1525*0b57cec5SDimitry Andric #endif 1526*0b57cec5SDimitry Andric 1527*0b57cec5SDimitry Andric typedef struct kmp_sched_flags { 1528*0b57cec5SDimitry Andric unsigned ordered : 1; 1529*0b57cec5SDimitry Andric unsigned nomerge : 1; 1530*0b57cec5SDimitry Andric unsigned contains_last : 1; 1531*0b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 1532*0b57cec5SDimitry Andric unsigned use_hier : 1; 1533*0b57cec5SDimitry Andric unsigned unused : 28; 1534*0b57cec5SDimitry Andric #else 1535*0b57cec5SDimitry Andric unsigned unused : 29; 1536*0b57cec5SDimitry Andric #endif 1537*0b57cec5SDimitry Andric } kmp_sched_flags_t; 1538*0b57cec5SDimitry Andric 1539*0b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(kmp_sched_flags_t) == 4); 1540*0b57cec5SDimitry Andric 1541*0b57cec5SDimitry Andric #if KMP_STATIC_STEAL_ENABLED 1542*0b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE dispatch_private_info32 { 1543*0b57cec5SDimitry Andric kmp_int32 count; 1544*0b57cec5SDimitry Andric kmp_int32 ub; 1545*0b57cec5SDimitry Andric /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ 1546*0b57cec5SDimitry Andric kmp_int32 lb; 1547*0b57cec5SDimitry Andric kmp_int32 st; 1548*0b57cec5SDimitry Andric kmp_int32 tc; 1549*0b57cec5SDimitry Andric kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put 1550*0b57cec5SDimitry Andric after ub */ 1551*0b57cec5SDimitry Andric 1552*0b57cec5SDimitry Andric // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on ) 1553*0b57cec5SDimitry Andric // a) parm3 is properly aligned and 1554*0b57cec5SDimitry Andric // b) all parm1-4 are in the same cache line. 1555*0b57cec5SDimitry Andric // Because of parm1-4 are used together, performance seems to be better 1556*0b57cec5SDimitry Andric // if they are in the same line (not measured though). 1557*0b57cec5SDimitry Andric 1558*0b57cec5SDimitry Andric struct KMP_ALIGN(32) { // AC: changed 16 to 32 in order to simplify template 1559*0b57cec5SDimitry Andric kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should 1560*0b57cec5SDimitry Andric kmp_int32 parm2; // make no real change at least while padding is off. 1561*0b57cec5SDimitry Andric kmp_int32 parm3; 1562*0b57cec5SDimitry Andric kmp_int32 parm4; 1563*0b57cec5SDimitry Andric }; 1564*0b57cec5SDimitry Andric 1565*0b57cec5SDimitry Andric kmp_uint32 ordered_lower; 1566*0b57cec5SDimitry Andric kmp_uint32 ordered_upper; 1567*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS 1568*0b57cec5SDimitry Andric // This var can be placed in the hole between 'tc' and 'parm1', instead of 1569*0b57cec5SDimitry Andric // 'static_steal_counter'. It would be nice to measure execution times. 1570*0b57cec5SDimitry Andric // Conditional if/endif can be removed at all. 1571*0b57cec5SDimitry Andric kmp_int32 last_upper; 1572*0b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 1573*0b57cec5SDimitry Andric } dispatch_private_info32_t; 1574*0b57cec5SDimitry Andric 1575*0b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE dispatch_private_info64 { 1576*0b57cec5SDimitry Andric kmp_int64 count; // current chunk number for static & static-steal scheduling 1577*0b57cec5SDimitry Andric kmp_int64 ub; /* upper-bound */ 1578*0b57cec5SDimitry Andric /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ 1579*0b57cec5SDimitry Andric kmp_int64 lb; /* lower-bound */ 1580*0b57cec5SDimitry Andric kmp_int64 st; /* stride */ 1581*0b57cec5SDimitry Andric kmp_int64 tc; /* trip count (number of iterations) */ 1582*0b57cec5SDimitry Andric kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put 1583*0b57cec5SDimitry Andric after ub */ 1584*0b57cec5SDimitry Andric 1585*0b57cec5SDimitry Andric /* parm[1-4] are used in different ways by different scheduling algorithms */ 1586*0b57cec5SDimitry Andric 1587*0b57cec5SDimitry Andric // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on ) 1588*0b57cec5SDimitry Andric // a) parm3 is properly aligned and 1589*0b57cec5SDimitry Andric // b) all parm1-4 are in the same cache line. 1590*0b57cec5SDimitry Andric // Because of parm1-4 are used together, performance seems to be better 1591*0b57cec5SDimitry Andric // if they are in the same line (not measured though). 1592*0b57cec5SDimitry Andric 1593*0b57cec5SDimitry Andric struct KMP_ALIGN(32) { 1594*0b57cec5SDimitry Andric kmp_int64 parm1; 1595*0b57cec5SDimitry Andric kmp_int64 parm2; 1596*0b57cec5SDimitry Andric kmp_int64 parm3; 1597*0b57cec5SDimitry Andric kmp_int64 parm4; 1598*0b57cec5SDimitry Andric }; 1599*0b57cec5SDimitry Andric 1600*0b57cec5SDimitry Andric kmp_uint64 ordered_lower; 1601*0b57cec5SDimitry Andric kmp_uint64 ordered_upper; 1602*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS 1603*0b57cec5SDimitry Andric // This var can be placed in the hole between 'tc' and 'parm1', instead of 1604*0b57cec5SDimitry Andric // 'static_steal_counter'. It would be nice to measure execution times. 1605*0b57cec5SDimitry Andric // Conditional if/endif can be removed at all. 1606*0b57cec5SDimitry Andric kmp_int64 last_upper; 1607*0b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 1608*0b57cec5SDimitry Andric } dispatch_private_info64_t; 1609*0b57cec5SDimitry Andric #else /* KMP_STATIC_STEAL_ENABLED */ 1610*0b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE dispatch_private_info32 { 1611*0b57cec5SDimitry Andric kmp_int32 lb; 1612*0b57cec5SDimitry Andric kmp_int32 ub; 1613*0b57cec5SDimitry Andric kmp_int32 st; 1614*0b57cec5SDimitry Andric kmp_int32 tc; 1615*0b57cec5SDimitry Andric 1616*0b57cec5SDimitry Andric kmp_int32 parm1; 1617*0b57cec5SDimitry Andric kmp_int32 parm2; 1618*0b57cec5SDimitry Andric kmp_int32 parm3; 1619*0b57cec5SDimitry Andric kmp_int32 parm4; 1620*0b57cec5SDimitry Andric 1621*0b57cec5SDimitry Andric kmp_int32 count; 1622*0b57cec5SDimitry Andric 1623*0b57cec5SDimitry Andric kmp_uint32 ordered_lower; 1624*0b57cec5SDimitry Andric kmp_uint32 ordered_upper; 1625*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS 1626*0b57cec5SDimitry Andric kmp_int32 last_upper; 1627*0b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 1628*0b57cec5SDimitry Andric } dispatch_private_info32_t; 1629*0b57cec5SDimitry Andric 1630*0b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE dispatch_private_info64 { 1631*0b57cec5SDimitry Andric kmp_int64 lb; /* lower-bound */ 1632*0b57cec5SDimitry Andric kmp_int64 ub; /* upper-bound */ 1633*0b57cec5SDimitry Andric kmp_int64 st; /* stride */ 1634*0b57cec5SDimitry Andric kmp_int64 tc; /* trip count (number of iterations) */ 1635*0b57cec5SDimitry Andric 1636*0b57cec5SDimitry Andric /* parm[1-4] are used in different ways by different scheduling algorithms */ 1637*0b57cec5SDimitry Andric kmp_int64 parm1; 1638*0b57cec5SDimitry Andric kmp_int64 parm2; 1639*0b57cec5SDimitry Andric kmp_int64 parm3; 1640*0b57cec5SDimitry Andric kmp_int64 parm4; 1641*0b57cec5SDimitry Andric 1642*0b57cec5SDimitry Andric kmp_int64 count; /* current chunk number for static scheduling */ 1643*0b57cec5SDimitry Andric 1644*0b57cec5SDimitry Andric kmp_uint64 ordered_lower; 1645*0b57cec5SDimitry Andric kmp_uint64 ordered_upper; 1646*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS 1647*0b57cec5SDimitry Andric kmp_int64 last_upper; 1648*0b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 1649*0b57cec5SDimitry Andric } dispatch_private_info64_t; 1650*0b57cec5SDimitry Andric #endif /* KMP_STATIC_STEAL_ENABLED */ 1651*0b57cec5SDimitry Andric 1652*0b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE dispatch_private_info { 1653*0b57cec5SDimitry Andric union private_info { 1654*0b57cec5SDimitry Andric dispatch_private_info32_t p32; 1655*0b57cec5SDimitry Andric dispatch_private_info64_t p64; 1656*0b57cec5SDimitry Andric } u; 1657*0b57cec5SDimitry Andric enum sched_type schedule; /* scheduling algorithm */ 1658*0b57cec5SDimitry Andric kmp_sched_flags_t flags; /* flags (e.g., ordered, nomerge, etc.) */ 1659*0b57cec5SDimitry Andric kmp_int32 ordered_bumped; 1660*0b57cec5SDimitry Andric // To retain the structure size after making ordered_iteration scalar 1661*0b57cec5SDimitry Andric kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 3]; 1662*0b57cec5SDimitry Andric // Stack of buffers for nest of serial regions 1663*0b57cec5SDimitry Andric struct dispatch_private_info *next; 1664*0b57cec5SDimitry Andric kmp_int32 type_size; /* the size of types in private_info */ 1665*0b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 1666*0b57cec5SDimitry Andric kmp_int32 hier_id; 1667*0b57cec5SDimitry Andric void *parent; /* hierarchical scheduling parent pointer */ 1668*0b57cec5SDimitry Andric #endif 1669*0b57cec5SDimitry Andric enum cons_type pushed_ws; 1670*0b57cec5SDimitry Andric } dispatch_private_info_t; 1671*0b57cec5SDimitry Andric 1672*0b57cec5SDimitry Andric typedef struct dispatch_shared_info32 { 1673*0b57cec5SDimitry Andric /* chunk index under dynamic, number of idle threads under static-steal; 1674*0b57cec5SDimitry Andric iteration index otherwise */ 1675*0b57cec5SDimitry Andric volatile kmp_uint32 iteration; 1676*0b57cec5SDimitry Andric volatile kmp_uint32 num_done; 1677*0b57cec5SDimitry Andric volatile kmp_uint32 ordered_iteration; 1678*0b57cec5SDimitry Andric // Dummy to retain the structure size after making ordered_iteration scalar 1679*0b57cec5SDimitry Andric kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 1]; 1680*0b57cec5SDimitry Andric } dispatch_shared_info32_t; 1681*0b57cec5SDimitry Andric 1682*0b57cec5SDimitry Andric typedef struct dispatch_shared_info64 { 1683*0b57cec5SDimitry Andric /* chunk index under dynamic, number of idle threads under static-steal; 1684*0b57cec5SDimitry Andric iteration index otherwise */ 1685*0b57cec5SDimitry Andric volatile kmp_uint64 iteration; 1686*0b57cec5SDimitry Andric volatile kmp_uint64 num_done; 1687*0b57cec5SDimitry Andric volatile kmp_uint64 ordered_iteration; 1688*0b57cec5SDimitry Andric // Dummy to retain the structure size after making ordered_iteration scalar 1689*0b57cec5SDimitry Andric kmp_int64 ordered_dummy[KMP_MAX_ORDERED - 3]; 1690*0b57cec5SDimitry Andric } dispatch_shared_info64_t; 1691*0b57cec5SDimitry Andric 1692*0b57cec5SDimitry Andric typedef struct dispatch_shared_info { 1693*0b57cec5SDimitry Andric union shared_info { 1694*0b57cec5SDimitry Andric dispatch_shared_info32_t s32; 1695*0b57cec5SDimitry Andric dispatch_shared_info64_t s64; 1696*0b57cec5SDimitry Andric } u; 1697*0b57cec5SDimitry Andric volatile kmp_uint32 buffer_index; 1698*0b57cec5SDimitry Andric volatile kmp_int32 doacross_buf_idx; // teamwise index 1699*0b57cec5SDimitry Andric volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1) 1700*0b57cec5SDimitry Andric kmp_int32 doacross_num_done; // count finished threads 1701*0b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 1702*0b57cec5SDimitry Andric void *hier; 1703*0b57cec5SDimitry Andric #endif 1704*0b57cec5SDimitry Andric #if KMP_USE_HWLOC 1705*0b57cec5SDimitry Andric // When linking with libhwloc, the ORDERED EPCC test slows down on big 1706*0b57cec5SDimitry Andric // machines (> 48 cores). Performance analysis showed that a cache thrash 1707*0b57cec5SDimitry Andric // was occurring and this padding helps alleviate the problem. 1708*0b57cec5SDimitry Andric char padding[64]; 1709*0b57cec5SDimitry Andric #endif 1710*0b57cec5SDimitry Andric } dispatch_shared_info_t; 1711*0b57cec5SDimitry Andric 1712*0b57cec5SDimitry Andric typedef struct kmp_disp { 1713*0b57cec5SDimitry Andric /* Vector for ORDERED SECTION */ 1714*0b57cec5SDimitry Andric void (*th_deo_fcn)(int *gtid, int *cid, ident_t *); 1715*0b57cec5SDimitry Andric /* Vector for END ORDERED SECTION */ 1716*0b57cec5SDimitry Andric void (*th_dxo_fcn)(int *gtid, int *cid, ident_t *); 1717*0b57cec5SDimitry Andric 1718*0b57cec5SDimitry Andric dispatch_shared_info_t *th_dispatch_sh_current; 1719*0b57cec5SDimitry Andric dispatch_private_info_t *th_dispatch_pr_current; 1720*0b57cec5SDimitry Andric 1721*0b57cec5SDimitry Andric dispatch_private_info_t *th_disp_buffer; 1722*0b57cec5SDimitry Andric kmp_int32 th_disp_index; 1723*0b57cec5SDimitry Andric kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index 1724*0b57cec5SDimitry Andric volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags 1725*0b57cec5SDimitry Andric union { // we can use union here because doacross cannot be used in 1726*0b57cec5SDimitry Andric // nonmonotonic loops 1727*0b57cec5SDimitry Andric kmp_int64 *th_doacross_info; // info on loop bounds 1728*0b57cec5SDimitry Andric kmp_lock_t *th_steal_lock; // lock used for chunk stealing (8-byte variable) 1729*0b57cec5SDimitry Andric }; 1730*0b57cec5SDimitry Andric #if KMP_USE_INTERNODE_ALIGNMENT 1731*0b57cec5SDimitry Andric char more_padding[INTERNODE_CACHE_LINE]; 1732*0b57cec5SDimitry Andric #endif 1733*0b57cec5SDimitry Andric } kmp_disp_t; 1734*0b57cec5SDimitry Andric 1735*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 1736*0b57cec5SDimitry Andric /* Barrier stuff */ 1737*0b57cec5SDimitry Andric 1738*0b57cec5SDimitry Andric /* constants for barrier state update */ 1739*0b57cec5SDimitry Andric #define KMP_INIT_BARRIER_STATE 0 /* should probably start from zero */ 1740*0b57cec5SDimitry Andric #define KMP_BARRIER_SLEEP_BIT 0 /* bit used for suspend/sleep part of state */ 1741*0b57cec5SDimitry Andric #define KMP_BARRIER_UNUSED_BIT 1 // bit that must never be set for valid state 1742*0b57cec5SDimitry Andric #define KMP_BARRIER_BUMP_BIT 2 /* lsb used for bump of go/arrived state */ 1743*0b57cec5SDimitry Andric 1744*0b57cec5SDimitry Andric #define KMP_BARRIER_SLEEP_STATE (1 << KMP_BARRIER_SLEEP_BIT) 1745*0b57cec5SDimitry Andric #define KMP_BARRIER_UNUSED_STATE (1 << KMP_BARRIER_UNUSED_BIT) 1746*0b57cec5SDimitry Andric #define KMP_BARRIER_STATE_BUMP (1 << KMP_BARRIER_BUMP_BIT) 1747*0b57cec5SDimitry Andric 1748*0b57cec5SDimitry Andric #if (KMP_BARRIER_SLEEP_BIT >= KMP_BARRIER_BUMP_BIT) 1749*0b57cec5SDimitry Andric #error "Barrier sleep bit must be smaller than barrier bump bit" 1750*0b57cec5SDimitry Andric #endif 1751*0b57cec5SDimitry Andric #if (KMP_BARRIER_UNUSED_BIT >= KMP_BARRIER_BUMP_BIT) 1752*0b57cec5SDimitry Andric #error "Barrier unused bit must be smaller than barrier bump bit" 1753*0b57cec5SDimitry Andric #endif 1754*0b57cec5SDimitry Andric 1755*0b57cec5SDimitry Andric // Constants for release barrier wait state: currently, hierarchical only 1756*0b57cec5SDimitry Andric #define KMP_BARRIER_NOT_WAITING 0 // Normal state; worker not in wait_sleep 1757*0b57cec5SDimitry Andric #define KMP_BARRIER_OWN_FLAG \ 1758*0b57cec5SDimitry Andric 1 // Normal state; worker waiting on own b_go flag in release 1759*0b57cec5SDimitry Andric #define KMP_BARRIER_PARENT_FLAG \ 1760*0b57cec5SDimitry Andric 2 // Special state; worker waiting on parent's b_go flag in release 1761*0b57cec5SDimitry Andric #define KMP_BARRIER_SWITCH_TO_OWN_FLAG \ 1762*0b57cec5SDimitry Andric 3 // Special state; tells worker to shift from parent to own b_go 1763*0b57cec5SDimitry Andric #define KMP_BARRIER_SWITCHING \ 1764*0b57cec5SDimitry Andric 4 // Special state; worker resets appropriate flag on wake-up 1765*0b57cec5SDimitry Andric 1766*0b57cec5SDimitry Andric #define KMP_NOT_SAFE_TO_REAP \ 1767*0b57cec5SDimitry Andric 0 // Thread th_reap_state: not safe to reap (tasking) 1768*0b57cec5SDimitry Andric #define KMP_SAFE_TO_REAP 1 // Thread th_reap_state: safe to reap (not tasking) 1769*0b57cec5SDimitry Andric 1770*0b57cec5SDimitry Andric enum barrier_type { 1771*0b57cec5SDimitry Andric bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction 1772*0b57cec5SDimitry Andric barriers if enabled) */ 1773*0b57cec5SDimitry Andric bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */ 1774*0b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 1775*0b57cec5SDimitry Andric bs_reduction_barrier, /* 2, All barriers that are used in reduction */ 1776*0b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 1777*0b57cec5SDimitry Andric bs_last_barrier /* Just a placeholder to mark the end */ 1778*0b57cec5SDimitry Andric }; 1779*0b57cec5SDimitry Andric 1780*0b57cec5SDimitry Andric // to work with reduction barriers just like with plain barriers 1781*0b57cec5SDimitry Andric #if !KMP_FAST_REDUCTION_BARRIER 1782*0b57cec5SDimitry Andric #define bs_reduction_barrier bs_plain_barrier 1783*0b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 1784*0b57cec5SDimitry Andric 1785*0b57cec5SDimitry Andric typedef enum kmp_bar_pat { /* Barrier communication patterns */ 1786*0b57cec5SDimitry Andric bp_linear_bar = 1787*0b57cec5SDimitry Andric 0, /* Single level (degenerate) tree */ 1788*0b57cec5SDimitry Andric bp_tree_bar = 1789*0b57cec5SDimitry Andric 1, /* Balanced tree with branching factor 2^n */ 1790*0b57cec5SDimitry Andric bp_hyper_bar = 1791*0b57cec5SDimitry Andric 2, /* Hypercube-embedded tree with min branching 1792*0b57cec5SDimitry Andric factor 2^n */ 1793*0b57cec5SDimitry Andric bp_hierarchical_bar = 3, /* Machine hierarchy tree */ 1794*0b57cec5SDimitry Andric bp_last_bar /* Placeholder to mark the end */ 1795*0b57cec5SDimitry Andric } kmp_bar_pat_e; 1796*0b57cec5SDimitry Andric 1797*0b57cec5SDimitry Andric #define KMP_BARRIER_ICV_PUSH 1 1798*0b57cec5SDimitry Andric 1799*0b57cec5SDimitry Andric /* Record for holding the values of the internal controls stack records */ 1800*0b57cec5SDimitry Andric typedef struct kmp_internal_control { 1801*0b57cec5SDimitry Andric int serial_nesting_level; /* corresponds to the value of the 1802*0b57cec5SDimitry Andric th_team_serialized field */ 1803*0b57cec5SDimitry Andric kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per 1804*0b57cec5SDimitry Andric thread) */ 1805*0b57cec5SDimitry Andric kmp_int8 1806*0b57cec5SDimitry Andric bt_set; /* internal control for whether blocktime is explicitly set */ 1807*0b57cec5SDimitry Andric int blocktime; /* internal control for blocktime */ 1808*0b57cec5SDimitry Andric #if KMP_USE_MONITOR 1809*0b57cec5SDimitry Andric int bt_intervals; /* internal control for blocktime intervals */ 1810*0b57cec5SDimitry Andric #endif 1811*0b57cec5SDimitry Andric int nproc; /* internal control for #threads for next parallel region (per 1812*0b57cec5SDimitry Andric thread) */ 1813*0b57cec5SDimitry Andric int thread_limit; /* internal control for thread-limit-var */ 1814*0b57cec5SDimitry Andric int max_active_levels; /* internal control for max_active_levels */ 1815*0b57cec5SDimitry Andric kmp_r_sched_t 1816*0b57cec5SDimitry Andric sched; /* internal control for runtime schedule {sched,chunk} pair */ 1817*0b57cec5SDimitry Andric kmp_proc_bind_t proc_bind; /* internal control for affinity */ 1818*0b57cec5SDimitry Andric kmp_int32 default_device; /* internal control for default device */ 1819*0b57cec5SDimitry Andric struct kmp_internal_control *next; 1820*0b57cec5SDimitry Andric } kmp_internal_control_t; 1821*0b57cec5SDimitry Andric 1822*0b57cec5SDimitry Andric static inline void copy_icvs(kmp_internal_control_t *dst, 1823*0b57cec5SDimitry Andric kmp_internal_control_t *src) { 1824*0b57cec5SDimitry Andric *dst = *src; 1825*0b57cec5SDimitry Andric } 1826*0b57cec5SDimitry Andric 1827*0b57cec5SDimitry Andric /* Thread barrier needs volatile barrier fields */ 1828*0b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE kmp_bstate { 1829*0b57cec5SDimitry Andric // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all 1830*0b57cec5SDimitry Andric // uses of it). It is not explicitly aligned below, because we *don't* want 1831*0b57cec5SDimitry Andric // it to be padded -- instead, we fit b_go into the same cache line with 1832*0b57cec5SDimitry Andric // th_fixed_icvs, enabling NGO cache lines stores in the hierarchical barrier. 1833*0b57cec5SDimitry Andric kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread 1834*0b57cec5SDimitry Andric // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with 1835*0b57cec5SDimitry Andric // same NGO store 1836*0b57cec5SDimitry Andric volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical) 1837*0b57cec5SDimitry Andric KMP_ALIGN_CACHE volatile kmp_uint64 1838*0b57cec5SDimitry Andric b_arrived; // STATE => task reached synch point. 1839*0b57cec5SDimitry Andric kmp_uint32 *skip_per_level; 1840*0b57cec5SDimitry Andric kmp_uint32 my_level; 1841*0b57cec5SDimitry Andric kmp_int32 parent_tid; 1842*0b57cec5SDimitry Andric kmp_int32 old_tid; 1843*0b57cec5SDimitry Andric kmp_uint32 depth; 1844*0b57cec5SDimitry Andric struct kmp_bstate *parent_bar; 1845*0b57cec5SDimitry Andric kmp_team_t *team; 1846*0b57cec5SDimitry Andric kmp_uint64 leaf_state; 1847*0b57cec5SDimitry Andric kmp_uint32 nproc; 1848*0b57cec5SDimitry Andric kmp_uint8 base_leaf_kids; 1849*0b57cec5SDimitry Andric kmp_uint8 leaf_kids; 1850*0b57cec5SDimitry Andric kmp_uint8 offset; 1851*0b57cec5SDimitry Andric kmp_uint8 wait_flag; 1852*0b57cec5SDimitry Andric kmp_uint8 use_oncore_barrier; 1853*0b57cec5SDimitry Andric #if USE_DEBUGGER 1854*0b57cec5SDimitry Andric // The following field is intended for the debugger solely. Only the worker 1855*0b57cec5SDimitry Andric // thread itself accesses this field: the worker increases it by 1 when it 1856*0b57cec5SDimitry Andric // arrives to a barrier. 1857*0b57cec5SDimitry Andric KMP_ALIGN_CACHE kmp_uint b_worker_arrived; 1858*0b57cec5SDimitry Andric #endif /* USE_DEBUGGER */ 1859*0b57cec5SDimitry Andric } kmp_bstate_t; 1860*0b57cec5SDimitry Andric 1861*0b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_barrier_union { 1862*0b57cec5SDimitry Andric double b_align; /* use worst case alignment */ 1863*0b57cec5SDimitry Andric char b_pad[KMP_PAD(kmp_bstate_t, CACHE_LINE)]; 1864*0b57cec5SDimitry Andric kmp_bstate_t bb; 1865*0b57cec5SDimitry Andric }; 1866*0b57cec5SDimitry Andric 1867*0b57cec5SDimitry Andric typedef union kmp_barrier_union kmp_balign_t; 1868*0b57cec5SDimitry Andric 1869*0b57cec5SDimitry Andric /* Team barrier needs only non-volatile arrived counter */ 1870*0b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_barrier_team_union { 1871*0b57cec5SDimitry Andric double b_align; /* use worst case alignment */ 1872*0b57cec5SDimitry Andric char b_pad[CACHE_LINE]; 1873*0b57cec5SDimitry Andric struct { 1874*0b57cec5SDimitry Andric kmp_uint64 b_arrived; /* STATE => task reached synch point. */ 1875*0b57cec5SDimitry Andric #if USE_DEBUGGER 1876*0b57cec5SDimitry Andric // The following two fields are indended for the debugger solely. Only 1877*0b57cec5SDimitry Andric // master of the team accesses these fields: the first one is increased by 1878*0b57cec5SDimitry Andric // 1 when master arrives to a barrier, the second one is increased by one 1879*0b57cec5SDimitry Andric // when all the threads arrived. 1880*0b57cec5SDimitry Andric kmp_uint b_master_arrived; 1881*0b57cec5SDimitry Andric kmp_uint b_team_arrived; 1882*0b57cec5SDimitry Andric #endif 1883*0b57cec5SDimitry Andric }; 1884*0b57cec5SDimitry Andric }; 1885*0b57cec5SDimitry Andric 1886*0b57cec5SDimitry Andric typedef union kmp_barrier_team_union kmp_balign_team_t; 1887*0b57cec5SDimitry Andric 1888*0b57cec5SDimitry Andric /* Padding for Linux* OS pthreads condition variables and mutexes used to signal 1889*0b57cec5SDimitry Andric threads when a condition changes. This is to workaround an NPTL bug where 1890*0b57cec5SDimitry Andric padding was added to pthread_cond_t which caused the initialization routine 1891*0b57cec5SDimitry Andric to write outside of the structure if compiled on pre-NPTL threads. */ 1892*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS 1893*0b57cec5SDimitry Andric typedef struct kmp_win32_mutex { 1894*0b57cec5SDimitry Andric /* The Lock */ 1895*0b57cec5SDimitry Andric CRITICAL_SECTION cs; 1896*0b57cec5SDimitry Andric } kmp_win32_mutex_t; 1897*0b57cec5SDimitry Andric 1898*0b57cec5SDimitry Andric typedef struct kmp_win32_cond { 1899*0b57cec5SDimitry Andric /* Count of the number of waiters. */ 1900*0b57cec5SDimitry Andric int waiters_count_; 1901*0b57cec5SDimitry Andric 1902*0b57cec5SDimitry Andric /* Serialize access to <waiters_count_> */ 1903*0b57cec5SDimitry Andric kmp_win32_mutex_t waiters_count_lock_; 1904*0b57cec5SDimitry Andric 1905*0b57cec5SDimitry Andric /* Number of threads to release via a <cond_broadcast> or a <cond_signal> */ 1906*0b57cec5SDimitry Andric int release_count_; 1907*0b57cec5SDimitry Andric 1908*0b57cec5SDimitry Andric /* Keeps track of the current "generation" so that we don't allow */ 1909*0b57cec5SDimitry Andric /* one thread to steal all the "releases" from the broadcast. */ 1910*0b57cec5SDimitry Andric int wait_generation_count_; 1911*0b57cec5SDimitry Andric 1912*0b57cec5SDimitry Andric /* A manual-reset event that's used to block and release waiting threads. */ 1913*0b57cec5SDimitry Andric HANDLE event_; 1914*0b57cec5SDimitry Andric } kmp_win32_cond_t; 1915*0b57cec5SDimitry Andric #endif 1916*0b57cec5SDimitry Andric 1917*0b57cec5SDimitry Andric #if KMP_OS_UNIX 1918*0b57cec5SDimitry Andric 1919*0b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_cond_union { 1920*0b57cec5SDimitry Andric double c_align; 1921*0b57cec5SDimitry Andric char c_pad[CACHE_LINE]; 1922*0b57cec5SDimitry Andric pthread_cond_t c_cond; 1923*0b57cec5SDimitry Andric }; 1924*0b57cec5SDimitry Andric 1925*0b57cec5SDimitry Andric typedef union kmp_cond_union kmp_cond_align_t; 1926*0b57cec5SDimitry Andric 1927*0b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_mutex_union { 1928*0b57cec5SDimitry Andric double m_align; 1929*0b57cec5SDimitry Andric char m_pad[CACHE_LINE]; 1930*0b57cec5SDimitry Andric pthread_mutex_t m_mutex; 1931*0b57cec5SDimitry Andric }; 1932*0b57cec5SDimitry Andric 1933*0b57cec5SDimitry Andric typedef union kmp_mutex_union kmp_mutex_align_t; 1934*0b57cec5SDimitry Andric 1935*0b57cec5SDimitry Andric #endif /* KMP_OS_UNIX */ 1936*0b57cec5SDimitry Andric 1937*0b57cec5SDimitry Andric typedef struct kmp_desc_base { 1938*0b57cec5SDimitry Andric void *ds_stackbase; 1939*0b57cec5SDimitry Andric size_t ds_stacksize; 1940*0b57cec5SDimitry Andric int ds_stackgrow; 1941*0b57cec5SDimitry Andric kmp_thread_t ds_thread; 1942*0b57cec5SDimitry Andric volatile int ds_tid; 1943*0b57cec5SDimitry Andric int ds_gtid; 1944*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS 1945*0b57cec5SDimitry Andric volatile int ds_alive; 1946*0b57cec5SDimitry Andric DWORD ds_thread_id; 1947*0b57cec5SDimitry Andric /* ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes. 1948*0b57cec5SDimitry Andric However, debugger support (libomp_db) cannot work with handles, because they 1949*0b57cec5SDimitry Andric uncomparable. For example, debugger requests info about thread with handle h. 1950*0b57cec5SDimitry Andric h is valid within debugger process, and meaningless within debugee process. 1951*0b57cec5SDimitry Andric Even if h is duped by call to DuplicateHandle(), so the result h' is valid 1952*0b57cec5SDimitry Andric within debugee process, but it is a *new* handle which does *not* equal to 1953*0b57cec5SDimitry Andric any other handle in debugee... The only way to compare handles is convert 1954*0b57cec5SDimitry Andric them to system-wide ids. GetThreadId() function is available only in 1955*0b57cec5SDimitry Andric Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is available 1956*0b57cec5SDimitry Andric on all Windows* OS flavours (including Windows* 95). Thus, we have to get 1957*0b57cec5SDimitry Andric thread id by call to GetCurrentThreadId() from within the thread and save it 1958*0b57cec5SDimitry Andric to let libomp_db identify threads. */ 1959*0b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 1960*0b57cec5SDimitry Andric } kmp_desc_base_t; 1961*0b57cec5SDimitry Andric 1962*0b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_desc { 1963*0b57cec5SDimitry Andric double ds_align; /* use worst case alignment */ 1964*0b57cec5SDimitry Andric char ds_pad[KMP_PAD(kmp_desc_base_t, CACHE_LINE)]; 1965*0b57cec5SDimitry Andric kmp_desc_base_t ds; 1966*0b57cec5SDimitry Andric } kmp_desc_t; 1967*0b57cec5SDimitry Andric 1968*0b57cec5SDimitry Andric typedef struct kmp_local { 1969*0b57cec5SDimitry Andric volatile int this_construct; /* count of single's encountered by thread */ 1970*0b57cec5SDimitry Andric void *reduce_data; 1971*0b57cec5SDimitry Andric #if KMP_USE_BGET 1972*0b57cec5SDimitry Andric void *bget_data; 1973*0b57cec5SDimitry Andric void *bget_list; 1974*0b57cec5SDimitry Andric #if !USE_CMP_XCHG_FOR_BGET 1975*0b57cec5SDimitry Andric #ifdef USE_QUEUING_LOCK_FOR_BGET 1976*0b57cec5SDimitry Andric kmp_lock_t bget_lock; /* Lock for accessing bget free list */ 1977*0b57cec5SDimitry Andric #else 1978*0b57cec5SDimitry Andric kmp_bootstrap_lock_t bget_lock; // Lock for accessing bget free list. Must be 1979*0b57cec5SDimitry Andric // bootstrap lock so we can use it at library 1980*0b57cec5SDimitry Andric // shutdown. 1981*0b57cec5SDimitry Andric #endif /* USE_LOCK_FOR_BGET */ 1982*0b57cec5SDimitry Andric #endif /* ! USE_CMP_XCHG_FOR_BGET */ 1983*0b57cec5SDimitry Andric #endif /* KMP_USE_BGET */ 1984*0b57cec5SDimitry Andric 1985*0b57cec5SDimitry Andric PACKED_REDUCTION_METHOD_T 1986*0b57cec5SDimitry Andric packed_reduction_method; /* stored by __kmpc_reduce*(), used by 1987*0b57cec5SDimitry Andric __kmpc_end_reduce*() */ 1988*0b57cec5SDimitry Andric 1989*0b57cec5SDimitry Andric } kmp_local_t; 1990*0b57cec5SDimitry Andric 1991*0b57cec5SDimitry Andric #define KMP_CHECK_UPDATE(a, b) \ 1992*0b57cec5SDimitry Andric if ((a) != (b)) \ 1993*0b57cec5SDimitry Andric (a) = (b) 1994*0b57cec5SDimitry Andric #define KMP_CHECK_UPDATE_SYNC(a, b) \ 1995*0b57cec5SDimitry Andric if ((a) != (b)) \ 1996*0b57cec5SDimitry Andric TCW_SYNC_PTR((a), (b)) 1997*0b57cec5SDimitry Andric 1998*0b57cec5SDimitry Andric #define get__blocktime(xteam, xtid) \ 1999*0b57cec5SDimitry Andric ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) 2000*0b57cec5SDimitry Andric #define get__bt_set(xteam, xtid) \ 2001*0b57cec5SDimitry Andric ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) 2002*0b57cec5SDimitry Andric #if KMP_USE_MONITOR 2003*0b57cec5SDimitry Andric #define get__bt_intervals(xteam, xtid) \ 2004*0b57cec5SDimitry Andric ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) 2005*0b57cec5SDimitry Andric #endif 2006*0b57cec5SDimitry Andric 2007*0b57cec5SDimitry Andric #define get__dynamic_2(xteam, xtid) \ 2008*0b57cec5SDimitry Andric ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic) 2009*0b57cec5SDimitry Andric #define get__nproc_2(xteam, xtid) \ 2010*0b57cec5SDimitry Andric ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc) 2011*0b57cec5SDimitry Andric #define get__sched_2(xteam, xtid) \ 2012*0b57cec5SDimitry Andric ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched) 2013*0b57cec5SDimitry Andric 2014*0b57cec5SDimitry Andric #define set__blocktime_team(xteam, xtid, xval) \ 2015*0b57cec5SDimitry Andric (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) = \ 2016*0b57cec5SDimitry Andric (xval)) 2017*0b57cec5SDimitry Andric 2018*0b57cec5SDimitry Andric #if KMP_USE_MONITOR 2019*0b57cec5SDimitry Andric #define set__bt_intervals_team(xteam, xtid, xval) \ 2020*0b57cec5SDimitry Andric (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) = \ 2021*0b57cec5SDimitry Andric (xval)) 2022*0b57cec5SDimitry Andric #endif 2023*0b57cec5SDimitry Andric 2024*0b57cec5SDimitry Andric #define set__bt_set_team(xteam, xtid, xval) \ 2025*0b57cec5SDimitry Andric (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) = (xval)) 2026*0b57cec5SDimitry Andric 2027*0b57cec5SDimitry Andric #define set__dynamic(xthread, xval) \ 2028*0b57cec5SDimitry Andric (((xthread)->th.th_current_task->td_icvs.dynamic) = (xval)) 2029*0b57cec5SDimitry Andric #define get__dynamic(xthread) \ 2030*0b57cec5SDimitry Andric (((xthread)->th.th_current_task->td_icvs.dynamic) ? (FTN_TRUE) : (FTN_FALSE)) 2031*0b57cec5SDimitry Andric 2032*0b57cec5SDimitry Andric #define set__nproc(xthread, xval) \ 2033*0b57cec5SDimitry Andric (((xthread)->th.th_current_task->td_icvs.nproc) = (xval)) 2034*0b57cec5SDimitry Andric 2035*0b57cec5SDimitry Andric #define set__thread_limit(xthread, xval) \ 2036*0b57cec5SDimitry Andric (((xthread)->th.th_current_task->td_icvs.thread_limit) = (xval)) 2037*0b57cec5SDimitry Andric 2038*0b57cec5SDimitry Andric #define set__max_active_levels(xthread, xval) \ 2039*0b57cec5SDimitry Andric (((xthread)->th.th_current_task->td_icvs.max_active_levels) = (xval)) 2040*0b57cec5SDimitry Andric 2041*0b57cec5SDimitry Andric #define get__max_active_levels(xthread) \ 2042*0b57cec5SDimitry Andric ((xthread)->th.th_current_task->td_icvs.max_active_levels) 2043*0b57cec5SDimitry Andric 2044*0b57cec5SDimitry Andric #define set__sched(xthread, xval) \ 2045*0b57cec5SDimitry Andric (((xthread)->th.th_current_task->td_icvs.sched) = (xval)) 2046*0b57cec5SDimitry Andric 2047*0b57cec5SDimitry Andric #define set__proc_bind(xthread, xval) \ 2048*0b57cec5SDimitry Andric (((xthread)->th.th_current_task->td_icvs.proc_bind) = (xval)) 2049*0b57cec5SDimitry Andric #define get__proc_bind(xthread) \ 2050*0b57cec5SDimitry Andric ((xthread)->th.th_current_task->td_icvs.proc_bind) 2051*0b57cec5SDimitry Andric 2052*0b57cec5SDimitry Andric // OpenMP tasking data structures 2053*0b57cec5SDimitry Andric 2054*0b57cec5SDimitry Andric typedef enum kmp_tasking_mode { 2055*0b57cec5SDimitry Andric tskm_immediate_exec = 0, 2056*0b57cec5SDimitry Andric tskm_extra_barrier = 1, 2057*0b57cec5SDimitry Andric tskm_task_teams = 2, 2058*0b57cec5SDimitry Andric tskm_max = 2 2059*0b57cec5SDimitry Andric } kmp_tasking_mode_t; 2060*0b57cec5SDimitry Andric 2061*0b57cec5SDimitry Andric extern kmp_tasking_mode_t 2062*0b57cec5SDimitry Andric __kmp_tasking_mode; /* determines how/when to execute tasks */ 2063*0b57cec5SDimitry Andric extern int __kmp_task_stealing_constraint; 2064*0b57cec5SDimitry Andric extern int __kmp_enable_task_throttling; 2065*0b57cec5SDimitry Andric extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if 2066*0b57cec5SDimitry Andric // specified, defaults to 0 otherwise 2067*0b57cec5SDimitry Andric // Set via OMP_MAX_TASK_PRIORITY if specified, defaults to 0 otherwise 2068*0b57cec5SDimitry Andric extern kmp_int32 __kmp_max_task_priority; 2069*0b57cec5SDimitry Andric // Set via KMP_TASKLOOP_MIN_TASKS if specified, defaults to 0 otherwise 2070*0b57cec5SDimitry Andric extern kmp_uint64 __kmp_taskloop_min_tasks; 2071*0b57cec5SDimitry Andric 2072*0b57cec5SDimitry Andric /* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with 2073*0b57cec5SDimitry Andric taskdata first */ 2074*0b57cec5SDimitry Andric #define KMP_TASK_TO_TASKDATA(task) (((kmp_taskdata_t *)task) - 1) 2075*0b57cec5SDimitry Andric #define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *)(taskdata + 1) 2076*0b57cec5SDimitry Andric 2077*0b57cec5SDimitry Andric // The tt_found_tasks flag is a signal to all threads in the team that tasks 2078*0b57cec5SDimitry Andric // were spawned and queued since the previous barrier release. 2079*0b57cec5SDimitry Andric #define KMP_TASKING_ENABLED(task_team) \ 2080*0b57cec5SDimitry Andric (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE) 2081*0b57cec5SDimitry Andric /*! 2082*0b57cec5SDimitry Andric @ingroup BASIC_TYPES 2083*0b57cec5SDimitry Andric @{ 2084*0b57cec5SDimitry Andric */ 2085*0b57cec5SDimitry Andric 2086*0b57cec5SDimitry Andric /*! 2087*0b57cec5SDimitry Andric */ 2088*0b57cec5SDimitry Andric typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32, void *); 2089*0b57cec5SDimitry Andric 2090*0b57cec5SDimitry Andric typedef union kmp_cmplrdata { 2091*0b57cec5SDimitry Andric kmp_int32 priority; /**< priority specified by user for the task */ 2092*0b57cec5SDimitry Andric kmp_routine_entry_t 2093*0b57cec5SDimitry Andric destructors; /* pointer to function to invoke deconstructors of 2094*0b57cec5SDimitry Andric firstprivate C++ objects */ 2095*0b57cec5SDimitry Andric /* future data */ 2096*0b57cec5SDimitry Andric } kmp_cmplrdata_t; 2097*0b57cec5SDimitry Andric 2098*0b57cec5SDimitry Andric /* sizeof_kmp_task_t passed as arg to kmpc_omp_task call */ 2099*0b57cec5SDimitry Andric /*! 2100*0b57cec5SDimitry Andric */ 2101*0b57cec5SDimitry Andric typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */ 2102*0b57cec5SDimitry Andric void *shareds; /**< pointer to block of pointers to shared vars */ 2103*0b57cec5SDimitry Andric kmp_routine_entry_t 2104*0b57cec5SDimitry Andric routine; /**< pointer to routine to call for executing task */ 2105*0b57cec5SDimitry Andric kmp_int32 part_id; /**< part id for the task */ 2106*0b57cec5SDimitry Andric kmp_cmplrdata_t 2107*0b57cec5SDimitry Andric data1; /* Two known optional additions: destructors and priority */ 2108*0b57cec5SDimitry Andric kmp_cmplrdata_t data2; /* Process destructors first, priority second */ 2109*0b57cec5SDimitry Andric /* future data */ 2110*0b57cec5SDimitry Andric /* private vars */ 2111*0b57cec5SDimitry Andric } kmp_task_t; 2112*0b57cec5SDimitry Andric 2113*0b57cec5SDimitry Andric /*! 2114*0b57cec5SDimitry Andric @} 2115*0b57cec5SDimitry Andric */ 2116*0b57cec5SDimitry Andric 2117*0b57cec5SDimitry Andric typedef struct kmp_taskgroup { 2118*0b57cec5SDimitry Andric std::atomic<kmp_int32> count; // number of allocated and incomplete tasks 2119*0b57cec5SDimitry Andric std::atomic<kmp_int32> 2120*0b57cec5SDimitry Andric cancel_request; // request for cancellation of this taskgroup 2121*0b57cec5SDimitry Andric struct kmp_taskgroup *parent; // parent taskgroup 2122*0b57cec5SDimitry Andric // Block of data to perform task reduction 2123*0b57cec5SDimitry Andric void *reduce_data; // reduction related info 2124*0b57cec5SDimitry Andric kmp_int32 reduce_num_data; // number of data items to reduce 2125*0b57cec5SDimitry Andric } kmp_taskgroup_t; 2126*0b57cec5SDimitry Andric 2127*0b57cec5SDimitry Andric // forward declarations 2128*0b57cec5SDimitry Andric typedef union kmp_depnode kmp_depnode_t; 2129*0b57cec5SDimitry Andric typedef struct kmp_depnode_list kmp_depnode_list_t; 2130*0b57cec5SDimitry Andric typedef struct kmp_dephash_entry kmp_dephash_entry_t; 2131*0b57cec5SDimitry Andric 2132*0b57cec5SDimitry Andric // Compiler sends us this info: 2133*0b57cec5SDimitry Andric typedef struct kmp_depend_info { 2134*0b57cec5SDimitry Andric kmp_intptr_t base_addr; 2135*0b57cec5SDimitry Andric size_t len; 2136*0b57cec5SDimitry Andric struct { 2137*0b57cec5SDimitry Andric bool in : 1; 2138*0b57cec5SDimitry Andric bool out : 1; 2139*0b57cec5SDimitry Andric bool mtx : 1; 2140*0b57cec5SDimitry Andric } flags; 2141*0b57cec5SDimitry Andric } kmp_depend_info_t; 2142*0b57cec5SDimitry Andric 2143*0b57cec5SDimitry Andric // Internal structures to work with task dependencies: 2144*0b57cec5SDimitry Andric struct kmp_depnode_list { 2145*0b57cec5SDimitry Andric kmp_depnode_t *node; 2146*0b57cec5SDimitry Andric kmp_depnode_list_t *next; 2147*0b57cec5SDimitry Andric }; 2148*0b57cec5SDimitry Andric 2149*0b57cec5SDimitry Andric // Max number of mutexinoutset dependencies per node 2150*0b57cec5SDimitry Andric #define MAX_MTX_DEPS 4 2151*0b57cec5SDimitry Andric 2152*0b57cec5SDimitry Andric typedef struct kmp_base_depnode { 2153*0b57cec5SDimitry Andric kmp_depnode_list_t *successors; /* used under lock */ 2154*0b57cec5SDimitry Andric kmp_task_t *task; /* non-NULL if depnode is active, used under lock */ 2155*0b57cec5SDimitry Andric kmp_lock_t *mtx_locks[MAX_MTX_DEPS]; /* lock mutexinoutset dependent tasks */ 2156*0b57cec5SDimitry Andric kmp_int32 mtx_num_locks; /* number of locks in mtx_locks array */ 2157*0b57cec5SDimitry Andric kmp_lock_t lock; /* guards shared fields: task, successors */ 2158*0b57cec5SDimitry Andric #if KMP_SUPPORT_GRAPH_OUTPUT 2159*0b57cec5SDimitry Andric kmp_uint32 id; 2160*0b57cec5SDimitry Andric #endif 2161*0b57cec5SDimitry Andric std::atomic<kmp_int32> npredecessors; 2162*0b57cec5SDimitry Andric std::atomic<kmp_int32> nrefs; 2163*0b57cec5SDimitry Andric } kmp_base_depnode_t; 2164*0b57cec5SDimitry Andric 2165*0b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_depnode { 2166*0b57cec5SDimitry Andric double dn_align; /* use worst case alignment */ 2167*0b57cec5SDimitry Andric char dn_pad[KMP_PAD(kmp_base_depnode_t, CACHE_LINE)]; 2168*0b57cec5SDimitry Andric kmp_base_depnode_t dn; 2169*0b57cec5SDimitry Andric }; 2170*0b57cec5SDimitry Andric 2171*0b57cec5SDimitry Andric struct kmp_dephash_entry { 2172*0b57cec5SDimitry Andric kmp_intptr_t addr; 2173*0b57cec5SDimitry Andric kmp_depnode_t *last_out; 2174*0b57cec5SDimitry Andric kmp_depnode_list_t *last_ins; 2175*0b57cec5SDimitry Andric kmp_depnode_list_t *last_mtxs; 2176*0b57cec5SDimitry Andric kmp_int32 last_flag; 2177*0b57cec5SDimitry Andric kmp_lock_t *mtx_lock; /* is referenced by depnodes w/mutexinoutset dep */ 2178*0b57cec5SDimitry Andric kmp_dephash_entry_t *next_in_bucket; 2179*0b57cec5SDimitry Andric }; 2180*0b57cec5SDimitry Andric 2181*0b57cec5SDimitry Andric typedef struct kmp_dephash { 2182*0b57cec5SDimitry Andric kmp_dephash_entry_t **buckets; 2183*0b57cec5SDimitry Andric size_t size; 2184*0b57cec5SDimitry Andric #ifdef KMP_DEBUG 2185*0b57cec5SDimitry Andric kmp_uint32 nelements; 2186*0b57cec5SDimitry Andric kmp_uint32 nconflicts; 2187*0b57cec5SDimitry Andric #endif 2188*0b57cec5SDimitry Andric } kmp_dephash_t; 2189*0b57cec5SDimitry Andric 2190*0b57cec5SDimitry Andric typedef struct kmp_task_affinity_info { 2191*0b57cec5SDimitry Andric kmp_intptr_t base_addr; 2192*0b57cec5SDimitry Andric size_t len; 2193*0b57cec5SDimitry Andric struct { 2194*0b57cec5SDimitry Andric bool flag1 : 1; 2195*0b57cec5SDimitry Andric bool flag2 : 1; 2196*0b57cec5SDimitry Andric kmp_int32 reserved : 30; 2197*0b57cec5SDimitry Andric } flags; 2198*0b57cec5SDimitry Andric } kmp_task_affinity_info_t; 2199*0b57cec5SDimitry Andric 2200*0b57cec5SDimitry Andric typedef enum kmp_event_type_t { 2201*0b57cec5SDimitry Andric KMP_EVENT_UNINITIALIZED = 0, 2202*0b57cec5SDimitry Andric KMP_EVENT_ALLOW_COMPLETION = 1 2203*0b57cec5SDimitry Andric } kmp_event_type_t; 2204*0b57cec5SDimitry Andric 2205*0b57cec5SDimitry Andric typedef struct { 2206*0b57cec5SDimitry Andric kmp_event_type_t type; 2207*0b57cec5SDimitry Andric kmp_tas_lock_t lock; 2208*0b57cec5SDimitry Andric union { 2209*0b57cec5SDimitry Andric kmp_task_t *task; 2210*0b57cec5SDimitry Andric } ed; 2211*0b57cec5SDimitry Andric } kmp_event_t; 2212*0b57cec5SDimitry Andric 2213*0b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK 2214*0b57cec5SDimitry Andric 2215*0b57cec5SDimitry Andric /* Tied Task stack definitions */ 2216*0b57cec5SDimitry Andric typedef struct kmp_stack_block { 2217*0b57cec5SDimitry Andric kmp_taskdata_t *sb_block[TASK_STACK_BLOCK_SIZE]; 2218*0b57cec5SDimitry Andric struct kmp_stack_block *sb_next; 2219*0b57cec5SDimitry Andric struct kmp_stack_block *sb_prev; 2220*0b57cec5SDimitry Andric } kmp_stack_block_t; 2221*0b57cec5SDimitry Andric 2222*0b57cec5SDimitry Andric typedef struct kmp_task_stack { 2223*0b57cec5SDimitry Andric kmp_stack_block_t ts_first_block; // first block of stack entries 2224*0b57cec5SDimitry Andric kmp_taskdata_t **ts_top; // pointer to the top of stack 2225*0b57cec5SDimitry Andric kmp_int32 ts_entries; // number of entries on the stack 2226*0b57cec5SDimitry Andric } kmp_task_stack_t; 2227*0b57cec5SDimitry Andric 2228*0b57cec5SDimitry Andric #endif // BUILD_TIED_TASK_STACK 2229*0b57cec5SDimitry Andric 2230*0b57cec5SDimitry Andric typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ 2231*0b57cec5SDimitry Andric /* Compiler flags */ /* Total compiler flags must be 16 bits */ 2232*0b57cec5SDimitry Andric unsigned tiedness : 1; /* task is either tied (1) or untied (0) */ 2233*0b57cec5SDimitry Andric unsigned final : 1; /* task is final(1) so execute immediately */ 2234*0b57cec5SDimitry Andric unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0 2235*0b57cec5SDimitry Andric code path */ 2236*0b57cec5SDimitry Andric unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to 2237*0b57cec5SDimitry Andric invoke destructors from the runtime */ 2238*0b57cec5SDimitry Andric unsigned proxy : 1; /* task is a proxy task (it will be executed outside the 2239*0b57cec5SDimitry Andric context of the RTL) */ 2240*0b57cec5SDimitry Andric unsigned priority_specified : 1; /* set if the compiler provides priority 2241*0b57cec5SDimitry Andric setting for the task */ 2242*0b57cec5SDimitry Andric unsigned detachable : 1; /* 1 == can detach */ 2243*0b57cec5SDimitry Andric unsigned reserved : 9; /* reserved for compiler use */ 2244*0b57cec5SDimitry Andric 2245*0b57cec5SDimitry Andric /* Library flags */ /* Total library flags must be 16 bits */ 2246*0b57cec5SDimitry Andric unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ 2247*0b57cec5SDimitry Andric unsigned task_serial : 1; // task is executed immediately (1) or deferred (0) 2248*0b57cec5SDimitry Andric unsigned tasking_ser : 1; // all tasks in team are either executed immediately 2249*0b57cec5SDimitry Andric // (1) or may be deferred (0) 2250*0b57cec5SDimitry Andric unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel 2251*0b57cec5SDimitry Andric // (0) [>= 2 threads] 2252*0b57cec5SDimitry Andric /* If either team_serial or tasking_ser is set, task team may be NULL */ 2253*0b57cec5SDimitry Andric /* Task State Flags: */ 2254*0b57cec5SDimitry Andric unsigned started : 1; /* 1==started, 0==not started */ 2255*0b57cec5SDimitry Andric unsigned executing : 1; /* 1==executing, 0==not executing */ 2256*0b57cec5SDimitry Andric unsigned complete : 1; /* 1==complete, 0==not complete */ 2257*0b57cec5SDimitry Andric unsigned freed : 1; /* 1==freed, 0==allocateed */ 2258*0b57cec5SDimitry Andric unsigned native : 1; /* 1==gcc-compiled task, 0==intel */ 2259*0b57cec5SDimitry Andric unsigned reserved31 : 7; /* reserved for library use */ 2260*0b57cec5SDimitry Andric 2261*0b57cec5SDimitry Andric } kmp_tasking_flags_t; 2262*0b57cec5SDimitry Andric 2263*0b57cec5SDimitry Andric struct kmp_taskdata { /* aligned during dynamic allocation */ 2264*0b57cec5SDimitry Andric kmp_int32 td_task_id; /* id, assigned by debugger */ 2265*0b57cec5SDimitry Andric kmp_tasking_flags_t td_flags; /* task flags */ 2266*0b57cec5SDimitry Andric kmp_team_t *td_team; /* team for this task */ 2267*0b57cec5SDimitry Andric kmp_info_p *td_alloc_thread; /* thread that allocated data structures */ 2268*0b57cec5SDimitry Andric /* Currently not used except for perhaps IDB */ 2269*0b57cec5SDimitry Andric kmp_taskdata_t *td_parent; /* parent task */ 2270*0b57cec5SDimitry Andric kmp_int32 td_level; /* task nesting level */ 2271*0b57cec5SDimitry Andric std::atomic<kmp_int32> td_untied_count; // untied task active parts counter 2272*0b57cec5SDimitry Andric ident_t *td_ident; /* task identifier */ 2273*0b57cec5SDimitry Andric // Taskwait data. 2274*0b57cec5SDimitry Andric ident_t *td_taskwait_ident; 2275*0b57cec5SDimitry Andric kmp_uint32 td_taskwait_counter; 2276*0b57cec5SDimitry Andric kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */ 2277*0b57cec5SDimitry Andric KMP_ALIGN_CACHE kmp_internal_control_t 2278*0b57cec5SDimitry Andric td_icvs; /* Internal control variables for the task */ 2279*0b57cec5SDimitry Andric KMP_ALIGN_CACHE std::atomic<kmp_int32> 2280*0b57cec5SDimitry Andric td_allocated_child_tasks; /* Child tasks (+ current task) not yet 2281*0b57cec5SDimitry Andric deallocated */ 2282*0b57cec5SDimitry Andric std::atomic<kmp_int32> 2283*0b57cec5SDimitry Andric td_incomplete_child_tasks; /* Child tasks not yet complete */ 2284*0b57cec5SDimitry Andric kmp_taskgroup_t 2285*0b57cec5SDimitry Andric *td_taskgroup; // Each task keeps pointer to its current taskgroup 2286*0b57cec5SDimitry Andric kmp_dephash_t 2287*0b57cec5SDimitry Andric *td_dephash; // Dependencies for children tasks are tracked from here 2288*0b57cec5SDimitry Andric kmp_depnode_t 2289*0b57cec5SDimitry Andric *td_depnode; // Pointer to graph node if this task has dependencies 2290*0b57cec5SDimitry Andric kmp_task_team_t *td_task_team; 2291*0b57cec5SDimitry Andric kmp_int32 td_size_alloc; // The size of task structure, including shareds etc. 2292*0b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT) 2293*0b57cec5SDimitry Andric // 4 or 8 byte integers for the loop bounds in GOMP_taskloop 2294*0b57cec5SDimitry Andric kmp_int32 td_size_loop_bounds; 2295*0b57cec5SDimitry Andric #endif 2296*0b57cec5SDimitry Andric kmp_taskdata_t *td_last_tied; // keep tied task for task scheduling constraint 2297*0b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT) 2298*0b57cec5SDimitry Andric // GOMP sends in a copy function for copy constructors 2299*0b57cec5SDimitry Andric void (*td_copy_func)(void *, void *); 2300*0b57cec5SDimitry Andric #endif 2301*0b57cec5SDimitry Andric kmp_event_t td_allow_completion_event; 2302*0b57cec5SDimitry Andric #if OMPT_SUPPORT 2303*0b57cec5SDimitry Andric ompt_task_info_t ompt_task_info; 2304*0b57cec5SDimitry Andric #endif 2305*0b57cec5SDimitry Andric }; // struct kmp_taskdata 2306*0b57cec5SDimitry Andric 2307*0b57cec5SDimitry Andric // Make sure padding above worked 2308*0b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(kmp_taskdata_t) % sizeof(void *) == 0); 2309*0b57cec5SDimitry Andric 2310*0b57cec5SDimitry Andric // Data for task team but per thread 2311*0b57cec5SDimitry Andric typedef struct kmp_base_thread_data { 2312*0b57cec5SDimitry Andric kmp_info_p *td_thr; // Pointer back to thread info 2313*0b57cec5SDimitry Andric // Used only in __kmp_execute_tasks_template, maybe not avail until task is 2314*0b57cec5SDimitry Andric // queued? 2315*0b57cec5SDimitry Andric kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque 2316*0b57cec5SDimitry Andric kmp_taskdata_t * 2317*0b57cec5SDimitry Andric *td_deque; // Deque of tasks encountered by td_thr, dynamically allocated 2318*0b57cec5SDimitry Andric kmp_int32 td_deque_size; // Size of deck 2319*0b57cec5SDimitry Andric kmp_uint32 td_deque_head; // Head of deque (will wrap) 2320*0b57cec5SDimitry Andric kmp_uint32 td_deque_tail; // Tail of deque (will wrap) 2321*0b57cec5SDimitry Andric kmp_int32 td_deque_ntasks; // Number of tasks in deque 2322*0b57cec5SDimitry Andric // GEH: shouldn't this be volatile since used in while-spin? 2323*0b57cec5SDimitry Andric kmp_int32 td_deque_last_stolen; // Thread number of last successful steal 2324*0b57cec5SDimitry Andric #ifdef BUILD_TIED_TASK_STACK 2325*0b57cec5SDimitry Andric kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task 2326*0b57cec5SDimitry Andric // scheduling constraint 2327*0b57cec5SDimitry Andric #endif // BUILD_TIED_TASK_STACK 2328*0b57cec5SDimitry Andric } kmp_base_thread_data_t; 2329*0b57cec5SDimitry Andric 2330*0b57cec5SDimitry Andric #define TASK_DEQUE_BITS 8 // Used solely to define INITIAL_TASK_DEQUE_SIZE 2331*0b57cec5SDimitry Andric #define INITIAL_TASK_DEQUE_SIZE (1 << TASK_DEQUE_BITS) 2332*0b57cec5SDimitry Andric 2333*0b57cec5SDimitry Andric #define TASK_DEQUE_SIZE(td) ((td).td_deque_size) 2334*0b57cec5SDimitry Andric #define TASK_DEQUE_MASK(td) ((td).td_deque_size - 1) 2335*0b57cec5SDimitry Andric 2336*0b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_thread_data { 2337*0b57cec5SDimitry Andric kmp_base_thread_data_t td; 2338*0b57cec5SDimitry Andric double td_align; /* use worst case alignment */ 2339*0b57cec5SDimitry Andric char td_pad[KMP_PAD(kmp_base_thread_data_t, CACHE_LINE)]; 2340*0b57cec5SDimitry Andric } kmp_thread_data_t; 2341*0b57cec5SDimitry Andric 2342*0b57cec5SDimitry Andric // Data for task teams which are used when tasking is enabled for the team 2343*0b57cec5SDimitry Andric typedef struct kmp_base_task_team { 2344*0b57cec5SDimitry Andric kmp_bootstrap_lock_t 2345*0b57cec5SDimitry Andric tt_threads_lock; /* Lock used to allocate per-thread part of task team */ 2346*0b57cec5SDimitry Andric /* must be bootstrap lock since used at library shutdown*/ 2347*0b57cec5SDimitry Andric kmp_task_team_t *tt_next; /* For linking the task team free list */ 2348*0b57cec5SDimitry Andric kmp_thread_data_t 2349*0b57cec5SDimitry Andric *tt_threads_data; /* Array of per-thread structures for task team */ 2350*0b57cec5SDimitry Andric /* Data survives task team deallocation */ 2351*0b57cec5SDimitry Andric kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while 2352*0b57cec5SDimitry Andric executing this team? */ 2353*0b57cec5SDimitry Andric /* TRUE means tt_threads_data is set up and initialized */ 2354*0b57cec5SDimitry Andric kmp_int32 tt_nproc; /* #threads in team */ 2355*0b57cec5SDimitry Andric kmp_int32 tt_max_threads; // # entries allocated for threads_data array 2356*0b57cec5SDimitry Andric kmp_int32 tt_found_proxy_tasks; // found proxy tasks since last barrier 2357*0b57cec5SDimitry Andric kmp_int32 tt_untied_task_encountered; 2358*0b57cec5SDimitry Andric 2359*0b57cec5SDimitry Andric KMP_ALIGN_CACHE 2360*0b57cec5SDimitry Andric std::atomic<kmp_int32> tt_unfinished_threads; /* #threads still active */ 2361*0b57cec5SDimitry Andric 2362*0b57cec5SDimitry Andric KMP_ALIGN_CACHE 2363*0b57cec5SDimitry Andric volatile kmp_uint32 2364*0b57cec5SDimitry Andric tt_active; /* is the team still actively executing tasks */ 2365*0b57cec5SDimitry Andric } kmp_base_task_team_t; 2366*0b57cec5SDimitry Andric 2367*0b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_task_team { 2368*0b57cec5SDimitry Andric kmp_base_task_team_t tt; 2369*0b57cec5SDimitry Andric double tt_align; /* use worst case alignment */ 2370*0b57cec5SDimitry Andric char tt_pad[KMP_PAD(kmp_base_task_team_t, CACHE_LINE)]; 2371*0b57cec5SDimitry Andric }; 2372*0b57cec5SDimitry Andric 2373*0b57cec5SDimitry Andric #if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5) 2374*0b57cec5SDimitry Andric // Free lists keep same-size free memory slots for fast memory allocation 2375*0b57cec5SDimitry Andric // routines 2376*0b57cec5SDimitry Andric typedef struct kmp_free_list { 2377*0b57cec5SDimitry Andric void *th_free_list_self; // Self-allocated tasks free list 2378*0b57cec5SDimitry Andric void *th_free_list_sync; // Self-allocated tasks stolen/returned by other 2379*0b57cec5SDimitry Andric // threads 2380*0b57cec5SDimitry Andric void *th_free_list_other; // Non-self free list (to be returned to owner's 2381*0b57cec5SDimitry Andric // sync list) 2382*0b57cec5SDimitry Andric } kmp_free_list_t; 2383*0b57cec5SDimitry Andric #endif 2384*0b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 2385*0b57cec5SDimitry Andric // Hot teams array keeps hot teams and their sizes for given thread. Hot teams 2386*0b57cec5SDimitry Andric // are not put in teams pool, and they don't put threads in threads pool. 2387*0b57cec5SDimitry Andric typedef struct kmp_hot_team_ptr { 2388*0b57cec5SDimitry Andric kmp_team_p *hot_team; // pointer to hot_team of given nesting level 2389*0b57cec5SDimitry Andric kmp_int32 hot_team_nth; // number of threads allocated for the hot_team 2390*0b57cec5SDimitry Andric } kmp_hot_team_ptr_t; 2391*0b57cec5SDimitry Andric #endif 2392*0b57cec5SDimitry Andric typedef struct kmp_teams_size { 2393*0b57cec5SDimitry Andric kmp_int32 nteams; // number of teams in a league 2394*0b57cec5SDimitry Andric kmp_int32 nth; // number of threads in each team of the league 2395*0b57cec5SDimitry Andric } kmp_teams_size_t; 2396*0b57cec5SDimitry Andric 2397*0b57cec5SDimitry Andric // This struct stores a thread that acts as a "root" for a contention 2398*0b57cec5SDimitry Andric // group. Contention groups are rooted at kmp_root threads, but also at 2399*0b57cec5SDimitry Andric // each master thread of each team created in the teams construct. 2400*0b57cec5SDimitry Andric // This struct therefore also stores a thread_limit associated with 2401*0b57cec5SDimitry Andric // that contention group, and a counter to track the number of threads 2402*0b57cec5SDimitry Andric // active in that contention group. Each thread has a list of these: CG 2403*0b57cec5SDimitry Andric // root threads have an entry in their list in which cg_root refers to 2404*0b57cec5SDimitry Andric // the thread itself, whereas other workers in the CG will have a 2405*0b57cec5SDimitry Andric // single entry where cg_root is same as the entry containing their CG 2406*0b57cec5SDimitry Andric // root. When a thread encounters a teams construct, it will add a new 2407*0b57cec5SDimitry Andric // entry to the front of its list, because it now roots a new CG. 2408*0b57cec5SDimitry Andric typedef struct kmp_cg_root { 2409*0b57cec5SDimitry Andric kmp_info_p *cg_root; // "root" thread for a contention group 2410*0b57cec5SDimitry Andric // The CG root's limit comes from OMP_THREAD_LIMIT for root threads, or 2411*0b57cec5SDimitry Andric // thread_limit clause for teams masters 2412*0b57cec5SDimitry Andric kmp_int32 cg_thread_limit; 2413*0b57cec5SDimitry Andric kmp_int32 cg_nthreads; // Count of active threads in CG rooted at cg_root 2414*0b57cec5SDimitry Andric struct kmp_cg_root *up; // pointer to higher level CG root in list 2415*0b57cec5SDimitry Andric } kmp_cg_root_t; 2416*0b57cec5SDimitry Andric 2417*0b57cec5SDimitry Andric // OpenMP thread data structures 2418*0b57cec5SDimitry Andric 2419*0b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE kmp_base_info { 2420*0b57cec5SDimitry Andric /* Start with the readonly data which is cache aligned and padded. This is 2421*0b57cec5SDimitry Andric written before the thread starts working by the master. Uber masters may 2422*0b57cec5SDimitry Andric update themselves later. Usage does not consider serialized regions. */ 2423*0b57cec5SDimitry Andric kmp_desc_t th_info; 2424*0b57cec5SDimitry Andric kmp_team_p *th_team; /* team we belong to */ 2425*0b57cec5SDimitry Andric kmp_root_p *th_root; /* pointer to root of task hierarchy */ 2426*0b57cec5SDimitry Andric kmp_info_p *th_next_pool; /* next available thread in the pool */ 2427*0b57cec5SDimitry Andric kmp_disp_t *th_dispatch; /* thread's dispatch data */ 2428*0b57cec5SDimitry Andric int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */ 2429*0b57cec5SDimitry Andric 2430*0b57cec5SDimitry Andric /* The following are cached from the team info structure */ 2431*0b57cec5SDimitry Andric /* TODO use these in more places as determined to be needed via profiling */ 2432*0b57cec5SDimitry Andric int th_team_nproc; /* number of threads in a team */ 2433*0b57cec5SDimitry Andric kmp_info_p *th_team_master; /* the team's master thread */ 2434*0b57cec5SDimitry Andric int th_team_serialized; /* team is serialized */ 2435*0b57cec5SDimitry Andric microtask_t th_teams_microtask; /* save entry address for teams construct */ 2436*0b57cec5SDimitry Andric int th_teams_level; /* save initial level of teams construct */ 2437*0b57cec5SDimitry Andric /* it is 0 on device but may be any on host */ 2438*0b57cec5SDimitry Andric 2439*0b57cec5SDimitry Andric /* The blocktime info is copied from the team struct to the thread sruct */ 2440*0b57cec5SDimitry Andric /* at the start of a barrier, and the values stored in the team are used */ 2441*0b57cec5SDimitry Andric /* at points in the code where the team struct is no longer guaranteed */ 2442*0b57cec5SDimitry Andric /* to exist (from the POV of worker threads). */ 2443*0b57cec5SDimitry Andric #if KMP_USE_MONITOR 2444*0b57cec5SDimitry Andric int th_team_bt_intervals; 2445*0b57cec5SDimitry Andric int th_team_bt_set; 2446*0b57cec5SDimitry Andric #else 2447*0b57cec5SDimitry Andric kmp_uint64 th_team_bt_intervals; 2448*0b57cec5SDimitry Andric #endif 2449*0b57cec5SDimitry Andric 2450*0b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 2451*0b57cec5SDimitry Andric kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */ 2452*0b57cec5SDimitry Andric #endif 2453*0b57cec5SDimitry Andric omp_allocator_handle_t th_def_allocator; /* default allocator */ 2454*0b57cec5SDimitry Andric /* The data set by the master at reinit, then R/W by the worker */ 2455*0b57cec5SDimitry Andric KMP_ALIGN_CACHE int 2456*0b57cec5SDimitry Andric th_set_nproc; /* if > 0, then only use this request for the next fork */ 2457*0b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 2458*0b57cec5SDimitry Andric kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */ 2459*0b57cec5SDimitry Andric #endif 2460*0b57cec5SDimitry Andric kmp_proc_bind_t 2461*0b57cec5SDimitry Andric th_set_proc_bind; /* if != proc_bind_default, use request for next fork */ 2462*0b57cec5SDimitry Andric kmp_teams_size_t 2463*0b57cec5SDimitry Andric th_teams_size; /* number of teams/threads in teams construct */ 2464*0b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 2465*0b57cec5SDimitry Andric int th_current_place; /* place currently bound to */ 2466*0b57cec5SDimitry Andric int th_new_place; /* place to bind to in par reg */ 2467*0b57cec5SDimitry Andric int th_first_place; /* first place in partition */ 2468*0b57cec5SDimitry Andric int th_last_place; /* last place in partition */ 2469*0b57cec5SDimitry Andric #endif 2470*0b57cec5SDimitry Andric int th_prev_level; /* previous level for affinity format */ 2471*0b57cec5SDimitry Andric int th_prev_num_threads; /* previous num_threads for affinity format */ 2472*0b57cec5SDimitry Andric #if USE_ITT_BUILD 2473*0b57cec5SDimitry Andric kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */ 2474*0b57cec5SDimitry Andric kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */ 2475*0b57cec5SDimitry Andric kmp_uint64 th_frame_time; /* frame timestamp */ 2476*0b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 2477*0b57cec5SDimitry Andric kmp_local_t th_local; 2478*0b57cec5SDimitry Andric struct private_common *th_pri_head; 2479*0b57cec5SDimitry Andric 2480*0b57cec5SDimitry Andric /* Now the data only used by the worker (after initial allocation) */ 2481*0b57cec5SDimitry Andric /* TODO the first serial team should actually be stored in the info_t 2482*0b57cec5SDimitry Andric structure. this will help reduce initial allocation overhead */ 2483*0b57cec5SDimitry Andric KMP_ALIGN_CACHE kmp_team_p 2484*0b57cec5SDimitry Andric *th_serial_team; /*serialized team held in reserve*/ 2485*0b57cec5SDimitry Andric 2486*0b57cec5SDimitry Andric #if OMPT_SUPPORT 2487*0b57cec5SDimitry Andric ompt_thread_info_t ompt_thread_info; 2488*0b57cec5SDimitry Andric #endif 2489*0b57cec5SDimitry Andric 2490*0b57cec5SDimitry Andric /* The following are also read by the master during reinit */ 2491*0b57cec5SDimitry Andric struct common_table *th_pri_common; 2492*0b57cec5SDimitry Andric 2493*0b57cec5SDimitry Andric volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */ 2494*0b57cec5SDimitry Andric /* while awaiting queuing lock acquire */ 2495*0b57cec5SDimitry Andric 2496*0b57cec5SDimitry Andric volatile void *th_sleep_loc; // this points at a kmp_flag<T> 2497*0b57cec5SDimitry Andric 2498*0b57cec5SDimitry Andric ident_t *th_ident; 2499*0b57cec5SDimitry Andric unsigned th_x; // Random number generator data 2500*0b57cec5SDimitry Andric unsigned th_a; // Random number generator data 2501*0b57cec5SDimitry Andric 2502*0b57cec5SDimitry Andric /* Tasking-related data for the thread */ 2503*0b57cec5SDimitry Andric kmp_task_team_t *th_task_team; // Task team struct 2504*0b57cec5SDimitry Andric kmp_taskdata_t *th_current_task; // Innermost Task being executed 2505*0b57cec5SDimitry Andric kmp_uint8 th_task_state; // alternating 0/1 for task team identification 2506*0b57cec5SDimitry Andric kmp_uint8 *th_task_state_memo_stack; // Stack holding memos of th_task_state 2507*0b57cec5SDimitry Andric // at nested levels 2508*0b57cec5SDimitry Andric kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack 2509*0b57cec5SDimitry Andric kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack 2510*0b57cec5SDimitry Andric kmp_uint32 th_reap_state; // Non-zero indicates thread is not 2511*0b57cec5SDimitry Andric // tasking, thus safe to reap 2512*0b57cec5SDimitry Andric 2513*0b57cec5SDimitry Andric /* More stuff for keeping track of active/sleeping threads (this part is 2514*0b57cec5SDimitry Andric written by the worker thread) */ 2515*0b57cec5SDimitry Andric kmp_uint8 th_active_in_pool; // included in count of #active threads in pool 2516*0b57cec5SDimitry Andric int th_active; // ! sleeping; 32 bits for TCR/TCW 2517*0b57cec5SDimitry Andric struct cons_header *th_cons; // used for consistency check 2518*0b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 2519*0b57cec5SDimitry Andric // used for hierarchical scheduling 2520*0b57cec5SDimitry Andric kmp_hier_private_bdata_t *th_hier_bar_data; 2521*0b57cec5SDimitry Andric #endif 2522*0b57cec5SDimitry Andric 2523*0b57cec5SDimitry Andric /* Add the syncronizing data which is cache aligned and padded. */ 2524*0b57cec5SDimitry Andric KMP_ALIGN_CACHE kmp_balign_t th_bar[bs_last_barrier]; 2525*0b57cec5SDimitry Andric 2526*0b57cec5SDimitry Andric KMP_ALIGN_CACHE volatile kmp_int32 2527*0b57cec5SDimitry Andric th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */ 2528*0b57cec5SDimitry Andric 2529*0b57cec5SDimitry Andric #if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5) 2530*0b57cec5SDimitry Andric #define NUM_LISTS 4 2531*0b57cec5SDimitry Andric kmp_free_list_t th_free_lists[NUM_LISTS]; // Free lists for fast memory 2532*0b57cec5SDimitry Andric // allocation routines 2533*0b57cec5SDimitry Andric #endif 2534*0b57cec5SDimitry Andric 2535*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS 2536*0b57cec5SDimitry Andric kmp_win32_cond_t th_suspend_cv; 2537*0b57cec5SDimitry Andric kmp_win32_mutex_t th_suspend_mx; 2538*0b57cec5SDimitry Andric std::atomic<int> th_suspend_init; 2539*0b57cec5SDimitry Andric #endif 2540*0b57cec5SDimitry Andric #if KMP_OS_UNIX 2541*0b57cec5SDimitry Andric kmp_cond_align_t th_suspend_cv; 2542*0b57cec5SDimitry Andric kmp_mutex_align_t th_suspend_mx; 2543*0b57cec5SDimitry Andric std::atomic<int> th_suspend_init_count; 2544*0b57cec5SDimitry Andric #endif 2545*0b57cec5SDimitry Andric 2546*0b57cec5SDimitry Andric #if USE_ITT_BUILD 2547*0b57cec5SDimitry Andric kmp_itt_mark_t th_itt_mark_single; 2548*0b57cec5SDimitry Andric // alignment ??? 2549*0b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 2550*0b57cec5SDimitry Andric #if KMP_STATS_ENABLED 2551*0b57cec5SDimitry Andric kmp_stats_list *th_stats; 2552*0b57cec5SDimitry Andric #endif 2553*0b57cec5SDimitry Andric #if KMP_OS_UNIX 2554*0b57cec5SDimitry Andric std::atomic<bool> th_blocking; 2555*0b57cec5SDimitry Andric #endif 2556*0b57cec5SDimitry Andric kmp_cg_root_t *th_cg_roots; // list of cg_roots associated with this thread 2557*0b57cec5SDimitry Andric } kmp_base_info_t; 2558*0b57cec5SDimitry Andric 2559*0b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_info { 2560*0b57cec5SDimitry Andric double th_align; /* use worst case alignment */ 2561*0b57cec5SDimitry Andric char th_pad[KMP_PAD(kmp_base_info_t, CACHE_LINE)]; 2562*0b57cec5SDimitry Andric kmp_base_info_t th; 2563*0b57cec5SDimitry Andric } kmp_info_t; 2564*0b57cec5SDimitry Andric 2565*0b57cec5SDimitry Andric // OpenMP thread team data structures 2566*0b57cec5SDimitry Andric 2567*0b57cec5SDimitry Andric typedef struct kmp_base_data { volatile kmp_uint32 t_value; } kmp_base_data_t; 2568*0b57cec5SDimitry Andric 2569*0b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_sleep_team { 2570*0b57cec5SDimitry Andric double dt_align; /* use worst case alignment */ 2571*0b57cec5SDimitry Andric char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)]; 2572*0b57cec5SDimitry Andric kmp_base_data_t dt; 2573*0b57cec5SDimitry Andric } kmp_sleep_team_t; 2574*0b57cec5SDimitry Andric 2575*0b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_ordered_team { 2576*0b57cec5SDimitry Andric double dt_align; /* use worst case alignment */ 2577*0b57cec5SDimitry Andric char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)]; 2578*0b57cec5SDimitry Andric kmp_base_data_t dt; 2579*0b57cec5SDimitry Andric } kmp_ordered_team_t; 2580*0b57cec5SDimitry Andric 2581*0b57cec5SDimitry Andric typedef int (*launch_t)(int gtid); 2582*0b57cec5SDimitry Andric 2583*0b57cec5SDimitry Andric /* Minimum number of ARGV entries to malloc if necessary */ 2584*0b57cec5SDimitry Andric #define KMP_MIN_MALLOC_ARGV_ENTRIES 100 2585*0b57cec5SDimitry Andric 2586*0b57cec5SDimitry Andric // Set up how many argv pointers will fit in cache lines containing 2587*0b57cec5SDimitry Andric // t_inline_argv. Historically, we have supported at least 96 bytes. Using a 2588*0b57cec5SDimitry Andric // larger value for more space between the master write/worker read section and 2589*0b57cec5SDimitry Andric // read/write by all section seems to buy more performance on EPCC PARALLEL. 2590*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 2591*0b57cec5SDimitry Andric #define KMP_INLINE_ARGV_BYTES \ 2592*0b57cec5SDimitry Andric (4 * CACHE_LINE - \ 2593*0b57cec5SDimitry Andric ((3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + \ 2594*0b57cec5SDimitry Andric sizeof(kmp_int16) + sizeof(kmp_uint32)) % \ 2595*0b57cec5SDimitry Andric CACHE_LINE)) 2596*0b57cec5SDimitry Andric #else 2597*0b57cec5SDimitry Andric #define KMP_INLINE_ARGV_BYTES \ 2598*0b57cec5SDimitry Andric (2 * CACHE_LINE - ((3 * KMP_PTR_SKIP + 2 * sizeof(int)) % CACHE_LINE)) 2599*0b57cec5SDimitry Andric #endif 2600*0b57cec5SDimitry Andric #define KMP_INLINE_ARGV_ENTRIES (int)(KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP) 2601*0b57cec5SDimitry Andric 2602*0b57cec5SDimitry Andric typedef struct KMP_ALIGN_CACHE kmp_base_team { 2603*0b57cec5SDimitry Andric // Synchronization Data 2604*0b57cec5SDimitry Andric // --------------------------------------------------------------------------- 2605*0b57cec5SDimitry Andric KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered; 2606*0b57cec5SDimitry Andric kmp_balign_team_t t_bar[bs_last_barrier]; 2607*0b57cec5SDimitry Andric std::atomic<int> t_construct; // count of single directive encountered by team 2608*0b57cec5SDimitry Andric char pad[sizeof(kmp_lock_t)]; // padding to maintain performance on big iron 2609*0b57cec5SDimitry Andric 2610*0b57cec5SDimitry Andric // [0] - parallel / [1] - worksharing task reduction data shared by taskgroups 2611*0b57cec5SDimitry Andric std::atomic<void *> t_tg_reduce_data[2]; // to support task modifier 2612*0b57cec5SDimitry Andric std::atomic<int> t_tg_fini_counter[2]; // sync end of task reductions 2613*0b57cec5SDimitry Andric 2614*0b57cec5SDimitry Andric // Master only 2615*0b57cec5SDimitry Andric // --------------------------------------------------------------------------- 2616*0b57cec5SDimitry Andric KMP_ALIGN_CACHE int t_master_tid; // tid of master in parent team 2617*0b57cec5SDimitry Andric int t_master_this_cons; // "this_construct" single counter of master in parent 2618*0b57cec5SDimitry Andric // team 2619*0b57cec5SDimitry Andric ident_t *t_ident; // if volatile, have to change too much other crud to 2620*0b57cec5SDimitry Andric // volatile too 2621*0b57cec5SDimitry Andric kmp_team_p *t_parent; // parent team 2622*0b57cec5SDimitry Andric kmp_team_p *t_next_pool; // next free team in the team pool 2623*0b57cec5SDimitry Andric kmp_disp_t *t_dispatch; // thread's dispatch data 2624*0b57cec5SDimitry Andric kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2 2625*0b57cec5SDimitry Andric kmp_proc_bind_t t_proc_bind; // bind type for par region 2626*0b57cec5SDimitry Andric #if USE_ITT_BUILD 2627*0b57cec5SDimitry Andric kmp_uint64 t_region_time; // region begin timestamp 2628*0b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 2629*0b57cec5SDimitry Andric 2630*0b57cec5SDimitry Andric // Master write, workers read 2631*0b57cec5SDimitry Andric // -------------------------------------------------------------------------- 2632*0b57cec5SDimitry Andric KMP_ALIGN_CACHE void **t_argv; 2633*0b57cec5SDimitry Andric int t_argc; 2634*0b57cec5SDimitry Andric int t_nproc; // number of threads in team 2635*0b57cec5SDimitry Andric microtask_t t_pkfn; 2636*0b57cec5SDimitry Andric launch_t t_invoke; // procedure to launch the microtask 2637*0b57cec5SDimitry Andric 2638*0b57cec5SDimitry Andric #if OMPT_SUPPORT 2639*0b57cec5SDimitry Andric ompt_team_info_t ompt_team_info; 2640*0b57cec5SDimitry Andric ompt_lw_taskteam_t *ompt_serialized_team_info; 2641*0b57cec5SDimitry Andric #endif 2642*0b57cec5SDimitry Andric 2643*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 2644*0b57cec5SDimitry Andric kmp_int8 t_fp_control_saved; 2645*0b57cec5SDimitry Andric kmp_int8 t_pad2b; 2646*0b57cec5SDimitry Andric kmp_int16 t_x87_fpu_control_word; // FP control regs 2647*0b57cec5SDimitry Andric kmp_uint32 t_mxcsr; 2648*0b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 2649*0b57cec5SDimitry Andric 2650*0b57cec5SDimitry Andric void *t_inline_argv[KMP_INLINE_ARGV_ENTRIES]; 2651*0b57cec5SDimitry Andric 2652*0b57cec5SDimitry Andric KMP_ALIGN_CACHE kmp_info_t **t_threads; 2653*0b57cec5SDimitry Andric kmp_taskdata_t 2654*0b57cec5SDimitry Andric *t_implicit_task_taskdata; // Taskdata for the thread's implicit task 2655*0b57cec5SDimitry Andric int t_level; // nested parallel level 2656*0b57cec5SDimitry Andric 2657*0b57cec5SDimitry Andric KMP_ALIGN_CACHE int t_max_argc; 2658*0b57cec5SDimitry Andric int t_max_nproc; // max threads this team can handle (dynamicly expandable) 2659*0b57cec5SDimitry Andric int t_serialized; // levels deep of serialized teams 2660*0b57cec5SDimitry Andric dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system 2661*0b57cec5SDimitry Andric int t_id; // team's id, assigned by debugger. 2662*0b57cec5SDimitry Andric int t_active_level; // nested active parallel level 2663*0b57cec5SDimitry Andric kmp_r_sched_t t_sched; // run-time schedule for the team 2664*0b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 2665*0b57cec5SDimitry Andric int t_first_place; // first & last place in parent thread's partition. 2666*0b57cec5SDimitry Andric int t_last_place; // Restore these values to master after par region. 2667*0b57cec5SDimitry Andric #endif // KMP_AFFINITY_SUPPORTED 2668*0b57cec5SDimitry Andric int t_display_affinity; 2669*0b57cec5SDimitry Andric int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via 2670*0b57cec5SDimitry Andric // omp_set_num_threads() call 2671*0b57cec5SDimitry Andric omp_allocator_handle_t t_def_allocator; /* default allocator */ 2672*0b57cec5SDimitry Andric 2673*0b57cec5SDimitry Andric // Read/write by workers as well 2674*0b57cec5SDimitry Andric #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 2675*0b57cec5SDimitry Andric // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf 2676*0b57cec5SDimitry Andric // regression of epcc 'parallel' and 'barrier' on fxe256lin01. This extra 2677*0b57cec5SDimitry Andric // padding serves to fix the performance of epcc 'parallel' and 'barrier' when 2678*0b57cec5SDimitry Andric // CACHE_LINE=64. TODO: investigate more and get rid if this padding. 2679*0b57cec5SDimitry Andric char dummy_padding[1024]; 2680*0b57cec5SDimitry Andric #endif 2681*0b57cec5SDimitry Andric // Internal control stack for additional nested teams. 2682*0b57cec5SDimitry Andric KMP_ALIGN_CACHE kmp_internal_control_t *t_control_stack_top; 2683*0b57cec5SDimitry Andric // for SERIALIZED teams nested 2 or more levels deep 2684*0b57cec5SDimitry Andric // typed flag to store request state of cancellation 2685*0b57cec5SDimitry Andric std::atomic<kmp_int32> t_cancel_request; 2686*0b57cec5SDimitry Andric int t_master_active; // save on fork, restore on join 2687*0b57cec5SDimitry Andric void *t_copypriv_data; // team specific pointer to copyprivate data array 2688*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS 2689*0b57cec5SDimitry Andric std::atomic<kmp_uint32> t_copyin_counter; 2690*0b57cec5SDimitry Andric #endif 2691*0b57cec5SDimitry Andric #if USE_ITT_BUILD 2692*0b57cec5SDimitry Andric void *t_stack_id; // team specific stack stitching id (for ittnotify) 2693*0b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 2694*0b57cec5SDimitry Andric } kmp_base_team_t; 2695*0b57cec5SDimitry Andric 2696*0b57cec5SDimitry Andric union KMP_ALIGN_CACHE kmp_team { 2697*0b57cec5SDimitry Andric kmp_base_team_t t; 2698*0b57cec5SDimitry Andric double t_align; /* use worst case alignment */ 2699*0b57cec5SDimitry Andric char t_pad[KMP_PAD(kmp_base_team_t, CACHE_LINE)]; 2700*0b57cec5SDimitry Andric }; 2701*0b57cec5SDimitry Andric 2702*0b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_time_global { 2703*0b57cec5SDimitry Andric double dt_align; /* use worst case alignment */ 2704*0b57cec5SDimitry Andric char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)]; 2705*0b57cec5SDimitry Andric kmp_base_data_t dt; 2706*0b57cec5SDimitry Andric } kmp_time_global_t; 2707*0b57cec5SDimitry Andric 2708*0b57cec5SDimitry Andric typedef struct kmp_base_global { 2709*0b57cec5SDimitry Andric /* cache-aligned */ 2710*0b57cec5SDimitry Andric kmp_time_global_t g_time; 2711*0b57cec5SDimitry Andric 2712*0b57cec5SDimitry Andric /* non cache-aligned */ 2713*0b57cec5SDimitry Andric volatile int g_abort; 2714*0b57cec5SDimitry Andric volatile int g_done; 2715*0b57cec5SDimitry Andric 2716*0b57cec5SDimitry Andric int g_dynamic; 2717*0b57cec5SDimitry Andric enum dynamic_mode g_dynamic_mode; 2718*0b57cec5SDimitry Andric } kmp_base_global_t; 2719*0b57cec5SDimitry Andric 2720*0b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_global { 2721*0b57cec5SDimitry Andric kmp_base_global_t g; 2722*0b57cec5SDimitry Andric double g_align; /* use worst case alignment */ 2723*0b57cec5SDimitry Andric char g_pad[KMP_PAD(kmp_base_global_t, CACHE_LINE)]; 2724*0b57cec5SDimitry Andric } kmp_global_t; 2725*0b57cec5SDimitry Andric 2726*0b57cec5SDimitry Andric typedef struct kmp_base_root { 2727*0b57cec5SDimitry Andric // TODO: GEH - combine r_active with r_in_parallel then r_active == 2728*0b57cec5SDimitry Andric // (r_in_parallel>= 0) 2729*0b57cec5SDimitry Andric // TODO: GEH - then replace r_active with t_active_levels if we can to reduce 2730*0b57cec5SDimitry Andric // the synch overhead or keeping r_active 2731*0b57cec5SDimitry Andric volatile int r_active; /* TRUE if some region in a nest has > 1 thread */ 2732*0b57cec5SDimitry Andric // keeps a count of active parallel regions per root 2733*0b57cec5SDimitry Andric std::atomic<int> r_in_parallel; 2734*0b57cec5SDimitry Andric // GEH: This is misnamed, should be r_active_levels 2735*0b57cec5SDimitry Andric kmp_team_t *r_root_team; 2736*0b57cec5SDimitry Andric kmp_team_t *r_hot_team; 2737*0b57cec5SDimitry Andric kmp_info_t *r_uber_thread; 2738*0b57cec5SDimitry Andric kmp_lock_t r_begin_lock; 2739*0b57cec5SDimitry Andric volatile int r_begin; 2740*0b57cec5SDimitry Andric int r_blocktime; /* blocktime for this root and descendants */ 2741*0b57cec5SDimitry Andric } kmp_base_root_t; 2742*0b57cec5SDimitry Andric 2743*0b57cec5SDimitry Andric typedef union KMP_ALIGN_CACHE kmp_root { 2744*0b57cec5SDimitry Andric kmp_base_root_t r; 2745*0b57cec5SDimitry Andric double r_align; /* use worst case alignment */ 2746*0b57cec5SDimitry Andric char r_pad[KMP_PAD(kmp_base_root_t, CACHE_LINE)]; 2747*0b57cec5SDimitry Andric } kmp_root_t; 2748*0b57cec5SDimitry Andric 2749*0b57cec5SDimitry Andric struct fortran_inx_info { 2750*0b57cec5SDimitry Andric kmp_int32 data; 2751*0b57cec5SDimitry Andric }; 2752*0b57cec5SDimitry Andric 2753*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 2754*0b57cec5SDimitry Andric 2755*0b57cec5SDimitry Andric extern int __kmp_settings; 2756*0b57cec5SDimitry Andric extern int __kmp_duplicate_library_ok; 2757*0b57cec5SDimitry Andric #if USE_ITT_BUILD 2758*0b57cec5SDimitry Andric extern int __kmp_forkjoin_frames; 2759*0b57cec5SDimitry Andric extern int __kmp_forkjoin_frames_mode; 2760*0b57cec5SDimitry Andric #endif 2761*0b57cec5SDimitry Andric extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method; 2762*0b57cec5SDimitry Andric extern int __kmp_determ_red; 2763*0b57cec5SDimitry Andric 2764*0b57cec5SDimitry Andric #ifdef KMP_DEBUG 2765*0b57cec5SDimitry Andric extern int kmp_a_debug; 2766*0b57cec5SDimitry Andric extern int kmp_b_debug; 2767*0b57cec5SDimitry Andric extern int kmp_c_debug; 2768*0b57cec5SDimitry Andric extern int kmp_d_debug; 2769*0b57cec5SDimitry Andric extern int kmp_e_debug; 2770*0b57cec5SDimitry Andric extern int kmp_f_debug; 2771*0b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 2772*0b57cec5SDimitry Andric 2773*0b57cec5SDimitry Andric /* For debug information logging using rotating buffer */ 2774*0b57cec5SDimitry Andric #define KMP_DEBUG_BUF_LINES_INIT 512 2775*0b57cec5SDimitry Andric #define KMP_DEBUG_BUF_LINES_MIN 1 2776*0b57cec5SDimitry Andric 2777*0b57cec5SDimitry Andric #define KMP_DEBUG_BUF_CHARS_INIT 128 2778*0b57cec5SDimitry Andric #define KMP_DEBUG_BUF_CHARS_MIN 2 2779*0b57cec5SDimitry Andric 2780*0b57cec5SDimitry Andric extern int 2781*0b57cec5SDimitry Andric __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */ 2782*0b57cec5SDimitry Andric extern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */ 2783*0b57cec5SDimitry Andric extern int 2784*0b57cec5SDimitry Andric __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */ 2785*0b57cec5SDimitry Andric extern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer 2786*0b57cec5SDimitry Andric entry pointer */ 2787*0b57cec5SDimitry Andric 2788*0b57cec5SDimitry Andric extern char *__kmp_debug_buffer; /* Debug buffer itself */ 2789*0b57cec5SDimitry Andric extern std::atomic<int> __kmp_debug_count; /* Counter for number of lines 2790*0b57cec5SDimitry Andric printed in buffer so far */ 2791*0b57cec5SDimitry Andric extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase 2792*0b57cec5SDimitry Andric recommended in warnings */ 2793*0b57cec5SDimitry Andric /* end rotating debug buffer */ 2794*0b57cec5SDimitry Andric 2795*0b57cec5SDimitry Andric #ifdef KMP_DEBUG 2796*0b57cec5SDimitry Andric extern int __kmp_par_range; /* +1 => only go par for constructs in range */ 2797*0b57cec5SDimitry Andric 2798*0b57cec5SDimitry Andric #define KMP_PAR_RANGE_ROUTINE_LEN 1024 2799*0b57cec5SDimitry Andric extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN]; 2800*0b57cec5SDimitry Andric #define KMP_PAR_RANGE_FILENAME_LEN 1024 2801*0b57cec5SDimitry Andric extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN]; 2802*0b57cec5SDimitry Andric extern int __kmp_par_range_lb; 2803*0b57cec5SDimitry Andric extern int __kmp_par_range_ub; 2804*0b57cec5SDimitry Andric #endif 2805*0b57cec5SDimitry Andric 2806*0b57cec5SDimitry Andric /* For printing out dynamic storage map for threads and teams */ 2807*0b57cec5SDimitry Andric extern int 2808*0b57cec5SDimitry Andric __kmp_storage_map; /* True means print storage map for threads and teams */ 2809*0b57cec5SDimitry Andric extern int __kmp_storage_map_verbose; /* True means storage map includes 2810*0b57cec5SDimitry Andric placement info */ 2811*0b57cec5SDimitry Andric extern int __kmp_storage_map_verbose_specified; 2812*0b57cec5SDimitry Andric 2813*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 2814*0b57cec5SDimitry Andric extern kmp_cpuinfo_t __kmp_cpuinfo; 2815*0b57cec5SDimitry Andric #endif 2816*0b57cec5SDimitry Andric 2817*0b57cec5SDimitry Andric extern volatile int __kmp_init_serial; 2818*0b57cec5SDimitry Andric extern volatile int __kmp_init_gtid; 2819*0b57cec5SDimitry Andric extern volatile int __kmp_init_common; 2820*0b57cec5SDimitry Andric extern volatile int __kmp_init_middle; 2821*0b57cec5SDimitry Andric extern volatile int __kmp_init_parallel; 2822*0b57cec5SDimitry Andric #if KMP_USE_MONITOR 2823*0b57cec5SDimitry Andric extern volatile int __kmp_init_monitor; 2824*0b57cec5SDimitry Andric #endif 2825*0b57cec5SDimitry Andric extern volatile int __kmp_init_user_locks; 2826*0b57cec5SDimitry Andric extern int __kmp_init_counter; 2827*0b57cec5SDimitry Andric extern int __kmp_root_counter; 2828*0b57cec5SDimitry Andric extern int __kmp_version; 2829*0b57cec5SDimitry Andric 2830*0b57cec5SDimitry Andric /* list of address of allocated caches for commons */ 2831*0b57cec5SDimitry Andric extern kmp_cached_addr_t *__kmp_threadpriv_cache_list; 2832*0b57cec5SDimitry Andric 2833*0b57cec5SDimitry Andric /* Barrier algorithm types and options */ 2834*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_barrier_gather_bb_dflt; 2835*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_barrier_release_bb_dflt; 2836*0b57cec5SDimitry Andric extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt; 2837*0b57cec5SDimitry Andric extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt; 2838*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier]; 2839*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier]; 2840*0b57cec5SDimitry Andric extern kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier]; 2841*0b57cec5SDimitry Andric extern kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier]; 2842*0b57cec5SDimitry Andric extern char const *__kmp_barrier_branch_bit_env_name[bs_last_barrier]; 2843*0b57cec5SDimitry Andric extern char const *__kmp_barrier_pattern_env_name[bs_last_barrier]; 2844*0b57cec5SDimitry Andric extern char const *__kmp_barrier_type_name[bs_last_barrier]; 2845*0b57cec5SDimitry Andric extern char const *__kmp_barrier_pattern_name[bp_last_bar]; 2846*0b57cec5SDimitry Andric 2847*0b57cec5SDimitry Andric /* Global Locks */ 2848*0b57cec5SDimitry Andric extern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */ 2849*0b57cec5SDimitry Andric extern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */ 2850*0b57cec5SDimitry Andric extern kmp_bootstrap_lock_t __kmp_task_team_lock; 2851*0b57cec5SDimitry Andric extern kmp_bootstrap_lock_t 2852*0b57cec5SDimitry Andric __kmp_exit_lock; /* exit() is not always thread-safe */ 2853*0b57cec5SDimitry Andric #if KMP_USE_MONITOR 2854*0b57cec5SDimitry Andric extern kmp_bootstrap_lock_t 2855*0b57cec5SDimitry Andric __kmp_monitor_lock; /* control monitor thread creation */ 2856*0b57cec5SDimitry Andric #endif 2857*0b57cec5SDimitry Andric extern kmp_bootstrap_lock_t 2858*0b57cec5SDimitry Andric __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and 2859*0b57cec5SDimitry Andric __kmp_threads expansion to co-exist */ 2860*0b57cec5SDimitry Andric 2861*0b57cec5SDimitry Andric extern kmp_lock_t __kmp_global_lock; /* control OS/global access */ 2862*0b57cec5SDimitry Andric extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */ 2863*0b57cec5SDimitry Andric extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */ 2864*0b57cec5SDimitry Andric 2865*0b57cec5SDimitry Andric extern enum library_type __kmp_library; 2866*0b57cec5SDimitry Andric 2867*0b57cec5SDimitry Andric extern enum sched_type __kmp_sched; /* default runtime scheduling */ 2868*0b57cec5SDimitry Andric extern enum sched_type __kmp_static; /* default static scheduling method */ 2869*0b57cec5SDimitry Andric extern enum sched_type __kmp_guided; /* default guided scheduling method */ 2870*0b57cec5SDimitry Andric extern enum sched_type __kmp_auto; /* default auto scheduling method */ 2871*0b57cec5SDimitry Andric extern int __kmp_chunk; /* default runtime chunk size */ 2872*0b57cec5SDimitry Andric 2873*0b57cec5SDimitry Andric extern size_t __kmp_stksize; /* stack size per thread */ 2874*0b57cec5SDimitry Andric #if KMP_USE_MONITOR 2875*0b57cec5SDimitry Andric extern size_t __kmp_monitor_stksize; /* stack size for monitor thread */ 2876*0b57cec5SDimitry Andric #endif 2877*0b57cec5SDimitry Andric extern size_t __kmp_stkoffset; /* stack offset per thread */ 2878*0b57cec5SDimitry Andric extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */ 2879*0b57cec5SDimitry Andric 2880*0b57cec5SDimitry Andric extern size_t 2881*0b57cec5SDimitry Andric __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */ 2882*0b57cec5SDimitry Andric extern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */ 2883*0b57cec5SDimitry Andric extern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */ 2884*0b57cec5SDimitry Andric extern int __kmp_env_checks; /* was KMP_CHECKS specified? */ 2885*0b57cec5SDimitry Andric extern int __kmp_env_consistency_check; // was KMP_CONSISTENCY_CHECK specified? 2886*0b57cec5SDimitry Andric extern int __kmp_generate_warnings; /* should we issue warnings? */ 2887*0b57cec5SDimitry Andric extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */ 2888*0b57cec5SDimitry Andric 2889*0b57cec5SDimitry Andric #ifdef DEBUG_SUSPEND 2890*0b57cec5SDimitry Andric extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */ 2891*0b57cec5SDimitry Andric #endif 2892*0b57cec5SDimitry Andric 2893*0b57cec5SDimitry Andric extern kmp_int32 __kmp_use_yield; 2894*0b57cec5SDimitry Andric extern kmp_int32 __kmp_use_yield_exp_set; 2895*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_yield_init; 2896*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_yield_next; 2897*0b57cec5SDimitry Andric 2898*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------- */ 2899*0b57cec5SDimitry Andric extern int __kmp_allThreadsSpecified; 2900*0b57cec5SDimitry Andric 2901*0b57cec5SDimitry Andric extern size_t __kmp_align_alloc; 2902*0b57cec5SDimitry Andric /* following data protected by initialization routines */ 2903*0b57cec5SDimitry Andric extern int __kmp_xproc; /* number of processors in the system */ 2904*0b57cec5SDimitry Andric extern int __kmp_avail_proc; /* number of processors available to the process */ 2905*0b57cec5SDimitry Andric extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */ 2906*0b57cec5SDimitry Andric extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */ 2907*0b57cec5SDimitry Andric // maximum total number of concurrently-existing threads on device 2908*0b57cec5SDimitry Andric extern int __kmp_max_nth; 2909*0b57cec5SDimitry Andric // maximum total number of concurrently-existing threads in a contention group 2910*0b57cec5SDimitry Andric extern int __kmp_cg_max_nth; 2911*0b57cec5SDimitry Andric extern int __kmp_teams_max_nth; // max threads used in a teams construct 2912*0b57cec5SDimitry Andric extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and 2913*0b57cec5SDimitry Andric __kmp_root */ 2914*0b57cec5SDimitry Andric extern int __kmp_dflt_team_nth; /* default number of threads in a parallel 2915*0b57cec5SDimitry Andric region a la OMP_NUM_THREADS */ 2916*0b57cec5SDimitry Andric extern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial 2917*0b57cec5SDimitry Andric initialization */ 2918*0b57cec5SDimitry Andric extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is 2919*0b57cec5SDimitry Andric used (fixed) */ 2920*0b57cec5SDimitry Andric extern int __kmp_tp_cached; /* whether threadprivate cache has been created 2921*0b57cec5SDimitry Andric (__kmpc_threadprivate_cached()) */ 2922*0b57cec5SDimitry Andric extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before 2923*0b57cec5SDimitry Andric blocking (env setting) */ 2924*0b57cec5SDimitry Andric #if KMP_USE_MONITOR 2925*0b57cec5SDimitry Andric extern int 2926*0b57cec5SDimitry Andric __kmp_monitor_wakeups; /* number of times monitor wakes up per second */ 2927*0b57cec5SDimitry Andric extern int __kmp_bt_intervals; /* number of monitor timestamp intervals before 2928*0b57cec5SDimitry Andric blocking */ 2929*0b57cec5SDimitry Andric #endif 2930*0b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 2931*0b57cec5SDimitry Andric extern int __kmp_zero_bt; /* whether blocktime has been forced to zero */ 2932*0b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 2933*0b57cec5SDimitry Andric #ifdef KMP_DFLT_NTH_CORES 2934*0b57cec5SDimitry Andric extern int __kmp_ncores; /* Total number of cores for threads placement */ 2935*0b57cec5SDimitry Andric #endif 2936*0b57cec5SDimitry Andric /* Number of millisecs to delay on abort for Intel(R) VTune(TM) tools */ 2937*0b57cec5SDimitry Andric extern int __kmp_abort_delay; 2938*0b57cec5SDimitry Andric 2939*0b57cec5SDimitry Andric extern int __kmp_need_register_atfork_specified; 2940*0b57cec5SDimitry Andric extern int 2941*0b57cec5SDimitry Andric __kmp_need_register_atfork; /* At initialization, call pthread_atfork to 2942*0b57cec5SDimitry Andric install fork handler */ 2943*0b57cec5SDimitry Andric extern int __kmp_gtid_mode; /* Method of getting gtid, values: 2944*0b57cec5SDimitry Andric 0 - not set, will be set at runtime 2945*0b57cec5SDimitry Andric 1 - using stack search 2946*0b57cec5SDimitry Andric 2 - dynamic TLS (pthread_getspecific(Linux* OS/OS 2947*0b57cec5SDimitry Andric X*) or TlsGetValue(Windows* OS)) 2948*0b57cec5SDimitry Andric 3 - static TLS (__declspec(thread) __kmp_gtid), 2949*0b57cec5SDimitry Andric Linux* OS .so only. */ 2950*0b57cec5SDimitry Andric extern int 2951*0b57cec5SDimitry Andric __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */ 2952*0b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID 2953*0b57cec5SDimitry Andric extern KMP_THREAD_LOCAL int __kmp_gtid; 2954*0b57cec5SDimitry Andric #endif 2955*0b57cec5SDimitry Andric extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */ 2956*0b57cec5SDimitry Andric extern int __kmp_foreign_tp; // If true, separate TP var for each foreign thread 2957*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 2958*0b57cec5SDimitry Andric extern int __kmp_inherit_fp_control; // copy fp creg(s) parent->workers at fork 2959*0b57cec5SDimitry Andric extern kmp_int16 __kmp_init_x87_fpu_control_word; // init thread's FP ctrl reg 2960*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */ 2961*0b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 2962*0b57cec5SDimitry Andric 2963*0b57cec5SDimitry Andric // max_active_levels for nested parallelism enabled by default via 2964*0b57cec5SDimitry Andric // OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS, and OMP_PROC_BIND 2965*0b57cec5SDimitry Andric extern int __kmp_dflt_max_active_levels; 2966*0b57cec5SDimitry Andric // Indicates whether value of __kmp_dflt_max_active_levels was already 2967*0b57cec5SDimitry Andric // explicitly set by OMP_MAX_ACTIVE_LEVELS or OMP_NESTED=false 2968*0b57cec5SDimitry Andric extern bool __kmp_dflt_max_active_levels_set; 2969*0b57cec5SDimitry Andric extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in 2970*0b57cec5SDimitry Andric concurrent execution per team */ 2971*0b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 2972*0b57cec5SDimitry Andric extern int __kmp_hot_teams_mode; 2973*0b57cec5SDimitry Andric extern int __kmp_hot_teams_max_level; 2974*0b57cec5SDimitry Andric #endif 2975*0b57cec5SDimitry Andric 2976*0b57cec5SDimitry Andric #if KMP_OS_LINUX 2977*0b57cec5SDimitry Andric extern enum clock_function_type __kmp_clock_function; 2978*0b57cec5SDimitry Andric extern int __kmp_clock_function_param; 2979*0b57cec5SDimitry Andric #endif /* KMP_OS_LINUX */ 2980*0b57cec5SDimitry Andric 2981*0b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 2982*0b57cec5SDimitry Andric extern enum mic_type __kmp_mic_type; 2983*0b57cec5SDimitry Andric #endif 2984*0b57cec5SDimitry Andric 2985*0b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE 2986*0b57cec5SDimitry Andric extern double __kmp_load_balance_interval; // load balance algorithm interval 2987*0b57cec5SDimitry Andric #endif /* USE_LOAD_BALANCE */ 2988*0b57cec5SDimitry Andric 2989*0b57cec5SDimitry Andric // OpenMP 3.1 - Nested num threads array 2990*0b57cec5SDimitry Andric typedef struct kmp_nested_nthreads_t { 2991*0b57cec5SDimitry Andric int *nth; 2992*0b57cec5SDimitry Andric int size; 2993*0b57cec5SDimitry Andric int used; 2994*0b57cec5SDimitry Andric } kmp_nested_nthreads_t; 2995*0b57cec5SDimitry Andric 2996*0b57cec5SDimitry Andric extern kmp_nested_nthreads_t __kmp_nested_nth; 2997*0b57cec5SDimitry Andric 2998*0b57cec5SDimitry Andric #if KMP_USE_ADAPTIVE_LOCKS 2999*0b57cec5SDimitry Andric 3000*0b57cec5SDimitry Andric // Parameters for the speculative lock backoff system. 3001*0b57cec5SDimitry Andric struct kmp_adaptive_backoff_params_t { 3002*0b57cec5SDimitry Andric // Number of soft retries before it counts as a hard retry. 3003*0b57cec5SDimitry Andric kmp_uint32 max_soft_retries; 3004*0b57cec5SDimitry Andric // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to 3005*0b57cec5SDimitry Andric // the right 3006*0b57cec5SDimitry Andric kmp_uint32 max_badness; 3007*0b57cec5SDimitry Andric }; 3008*0b57cec5SDimitry Andric 3009*0b57cec5SDimitry Andric extern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params; 3010*0b57cec5SDimitry Andric 3011*0b57cec5SDimitry Andric #if KMP_DEBUG_ADAPTIVE_LOCKS 3012*0b57cec5SDimitry Andric extern const char *__kmp_speculative_statsfile; 3013*0b57cec5SDimitry Andric #endif 3014*0b57cec5SDimitry Andric 3015*0b57cec5SDimitry Andric #endif // KMP_USE_ADAPTIVE_LOCKS 3016*0b57cec5SDimitry Andric 3017*0b57cec5SDimitry Andric extern int __kmp_display_env; /* TRUE or FALSE */ 3018*0b57cec5SDimitry Andric extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */ 3019*0b57cec5SDimitry Andric extern int __kmp_omp_cancellation; /* TRUE or FALSE */ 3020*0b57cec5SDimitry Andric 3021*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------- */ 3022*0b57cec5SDimitry Andric 3023*0b57cec5SDimitry Andric /* the following are protected by the fork/join lock */ 3024*0b57cec5SDimitry Andric /* write: lock read: anytime */ 3025*0b57cec5SDimitry Andric extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */ 3026*0b57cec5SDimitry Andric /* read/write: lock */ 3027*0b57cec5SDimitry Andric extern volatile kmp_team_t *__kmp_team_pool; 3028*0b57cec5SDimitry Andric extern volatile kmp_info_t *__kmp_thread_pool; 3029*0b57cec5SDimitry Andric extern kmp_info_t *__kmp_thread_pool_insert_pt; 3030*0b57cec5SDimitry Andric 3031*0b57cec5SDimitry Andric // total num threads reachable from some root thread including all root threads 3032*0b57cec5SDimitry Andric extern volatile int __kmp_nth; 3033*0b57cec5SDimitry Andric /* total number of threads reachable from some root thread including all root 3034*0b57cec5SDimitry Andric threads, and those in the thread pool */ 3035*0b57cec5SDimitry Andric extern volatile int __kmp_all_nth; 3036*0b57cec5SDimitry Andric extern std::atomic<int> __kmp_thread_pool_active_nth; 3037*0b57cec5SDimitry Andric 3038*0b57cec5SDimitry Andric extern kmp_root_t **__kmp_root; /* root of thread hierarchy */ 3039*0b57cec5SDimitry Andric /* end data protected by fork/join lock */ 3040*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------- */ 3041*0b57cec5SDimitry Andric 3042*0b57cec5SDimitry Andric #define __kmp_get_gtid() __kmp_get_global_thread_id() 3043*0b57cec5SDimitry Andric #define __kmp_entry_gtid() __kmp_get_global_thread_id_reg() 3044*0b57cec5SDimitry Andric #define __kmp_get_tid() (__kmp_tid_from_gtid(__kmp_get_gtid())) 3045*0b57cec5SDimitry Andric #define __kmp_get_team() (__kmp_threads[(__kmp_get_gtid())]->th.th_team) 3046*0b57cec5SDimitry Andric #define __kmp_get_thread() (__kmp_thread_from_gtid(__kmp_get_gtid())) 3047*0b57cec5SDimitry Andric 3048*0b57cec5SDimitry Andric // AT: Which way is correct? 3049*0b57cec5SDimitry Andric // AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc; 3050*0b57cec5SDimitry Andric // AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc; 3051*0b57cec5SDimitry Andric #define __kmp_get_team_num_threads(gtid) \ 3052*0b57cec5SDimitry Andric (__kmp_threads[(gtid)]->th.th_team->t.t_nproc) 3053*0b57cec5SDimitry Andric 3054*0b57cec5SDimitry Andric static inline bool KMP_UBER_GTID(int gtid) { 3055*0b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= KMP_GTID_MIN); 3056*0b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid < __kmp_threads_capacity); 3057*0b57cec5SDimitry Andric return (gtid >= 0 && __kmp_root[gtid] && __kmp_threads[gtid] && 3058*0b57cec5SDimitry Andric __kmp_threads[gtid] == __kmp_root[gtid]->r.r_uber_thread); 3059*0b57cec5SDimitry Andric } 3060*0b57cec5SDimitry Andric 3061*0b57cec5SDimitry Andric static inline int __kmp_tid_from_gtid(int gtid) { 3062*0b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0); 3063*0b57cec5SDimitry Andric return __kmp_threads[gtid]->th.th_info.ds.ds_tid; 3064*0b57cec5SDimitry Andric } 3065*0b57cec5SDimitry Andric 3066*0b57cec5SDimitry Andric static inline int __kmp_gtid_from_tid(int tid, const kmp_team_t *team) { 3067*0b57cec5SDimitry Andric KMP_DEBUG_ASSERT(tid >= 0 && team); 3068*0b57cec5SDimitry Andric return team->t.t_threads[tid]->th.th_info.ds.ds_gtid; 3069*0b57cec5SDimitry Andric } 3070*0b57cec5SDimitry Andric 3071*0b57cec5SDimitry Andric static inline int __kmp_gtid_from_thread(const kmp_info_t *thr) { 3072*0b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr); 3073*0b57cec5SDimitry Andric return thr->th.th_info.ds.ds_gtid; 3074*0b57cec5SDimitry Andric } 3075*0b57cec5SDimitry Andric 3076*0b57cec5SDimitry Andric static inline kmp_info_t *__kmp_thread_from_gtid(int gtid) { 3077*0b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0); 3078*0b57cec5SDimitry Andric return __kmp_threads[gtid]; 3079*0b57cec5SDimitry Andric } 3080*0b57cec5SDimitry Andric 3081*0b57cec5SDimitry Andric static inline kmp_team_t *__kmp_team_from_gtid(int gtid) { 3082*0b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0); 3083*0b57cec5SDimitry Andric return __kmp_threads[gtid]->th.th_team; 3084*0b57cec5SDimitry Andric } 3085*0b57cec5SDimitry Andric 3086*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------- */ 3087*0b57cec5SDimitry Andric 3088*0b57cec5SDimitry Andric extern kmp_global_t __kmp_global; /* global status */ 3089*0b57cec5SDimitry Andric 3090*0b57cec5SDimitry Andric extern kmp_info_t __kmp_monitor; 3091*0b57cec5SDimitry Andric // For Debugging Support Library 3092*0b57cec5SDimitry Andric extern std::atomic<kmp_int32> __kmp_team_counter; 3093*0b57cec5SDimitry Andric // For Debugging Support Library 3094*0b57cec5SDimitry Andric extern std::atomic<kmp_int32> __kmp_task_counter; 3095*0b57cec5SDimitry Andric 3096*0b57cec5SDimitry Andric #if USE_DEBUGGER 3097*0b57cec5SDimitry Andric #define _KMP_GEN_ID(counter) \ 3098*0b57cec5SDimitry Andric (__kmp_debugging ? KMP_ATOMIC_INC(&counter) + 1 : ~0) 3099*0b57cec5SDimitry Andric #else 3100*0b57cec5SDimitry Andric #define _KMP_GEN_ID(counter) (~0) 3101*0b57cec5SDimitry Andric #endif /* USE_DEBUGGER */ 3102*0b57cec5SDimitry Andric 3103*0b57cec5SDimitry Andric #define KMP_GEN_TASK_ID() _KMP_GEN_ID(__kmp_task_counter) 3104*0b57cec5SDimitry Andric #define KMP_GEN_TEAM_ID() _KMP_GEN_ID(__kmp_team_counter) 3105*0b57cec5SDimitry Andric 3106*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 3107*0b57cec5SDimitry Andric 3108*0b57cec5SDimitry Andric extern void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, 3109*0b57cec5SDimitry Andric size_t size, char const *format, ...); 3110*0b57cec5SDimitry Andric 3111*0b57cec5SDimitry Andric extern void __kmp_serial_initialize(void); 3112*0b57cec5SDimitry Andric extern void __kmp_middle_initialize(void); 3113*0b57cec5SDimitry Andric extern void __kmp_parallel_initialize(void); 3114*0b57cec5SDimitry Andric 3115*0b57cec5SDimitry Andric extern void __kmp_internal_begin(void); 3116*0b57cec5SDimitry Andric extern void __kmp_internal_end_library(int gtid); 3117*0b57cec5SDimitry Andric extern void __kmp_internal_end_thread(int gtid); 3118*0b57cec5SDimitry Andric extern void __kmp_internal_end_atexit(void); 3119*0b57cec5SDimitry Andric extern void __kmp_internal_end_fini(void); 3120*0b57cec5SDimitry Andric extern void __kmp_internal_end_dtor(void); 3121*0b57cec5SDimitry Andric extern void __kmp_internal_end_dest(void *); 3122*0b57cec5SDimitry Andric 3123*0b57cec5SDimitry Andric extern int __kmp_register_root(int initial_thread); 3124*0b57cec5SDimitry Andric extern void __kmp_unregister_root(int gtid); 3125*0b57cec5SDimitry Andric 3126*0b57cec5SDimitry Andric extern int __kmp_ignore_mppbeg(void); 3127*0b57cec5SDimitry Andric extern int __kmp_ignore_mppend(void); 3128*0b57cec5SDimitry Andric 3129*0b57cec5SDimitry Andric extern int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws); 3130*0b57cec5SDimitry Andric extern void __kmp_exit_single(int gtid); 3131*0b57cec5SDimitry Andric 3132*0b57cec5SDimitry Andric extern void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref); 3133*0b57cec5SDimitry Andric extern void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref); 3134*0b57cec5SDimitry Andric 3135*0b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE 3136*0b57cec5SDimitry Andric extern int __kmp_get_load_balance(int); 3137*0b57cec5SDimitry Andric #endif 3138*0b57cec5SDimitry Andric 3139*0b57cec5SDimitry Andric extern int __kmp_get_global_thread_id(void); 3140*0b57cec5SDimitry Andric extern int __kmp_get_global_thread_id_reg(void); 3141*0b57cec5SDimitry Andric extern void __kmp_exit_thread(int exit_status); 3142*0b57cec5SDimitry Andric extern void __kmp_abort(char const *format, ...); 3143*0b57cec5SDimitry Andric extern void __kmp_abort_thread(void); 3144*0b57cec5SDimitry Andric KMP_NORETURN extern void __kmp_abort_process(void); 3145*0b57cec5SDimitry Andric extern void __kmp_warn(char const *format, ...); 3146*0b57cec5SDimitry Andric 3147*0b57cec5SDimitry Andric extern void __kmp_set_num_threads(int new_nth, int gtid); 3148*0b57cec5SDimitry Andric 3149*0b57cec5SDimitry Andric // Returns current thread (pointer to kmp_info_t). Current thread *must* be 3150*0b57cec5SDimitry Andric // registered. 3151*0b57cec5SDimitry Andric static inline kmp_info_t *__kmp_entry_thread() { 3152*0b57cec5SDimitry Andric int gtid = __kmp_entry_gtid(); 3153*0b57cec5SDimitry Andric 3154*0b57cec5SDimitry Andric return __kmp_threads[gtid]; 3155*0b57cec5SDimitry Andric } 3156*0b57cec5SDimitry Andric 3157*0b57cec5SDimitry Andric extern void __kmp_set_max_active_levels(int gtid, int new_max_active_levels); 3158*0b57cec5SDimitry Andric extern int __kmp_get_max_active_levels(int gtid); 3159*0b57cec5SDimitry Andric extern int __kmp_get_ancestor_thread_num(int gtid, int level); 3160*0b57cec5SDimitry Andric extern int __kmp_get_team_size(int gtid, int level); 3161*0b57cec5SDimitry Andric extern void __kmp_set_schedule(int gtid, kmp_sched_t new_sched, int chunk); 3162*0b57cec5SDimitry Andric extern void __kmp_get_schedule(int gtid, kmp_sched_t *sched, int *chunk); 3163*0b57cec5SDimitry Andric 3164*0b57cec5SDimitry Andric extern unsigned short __kmp_get_random(kmp_info_t *thread); 3165*0b57cec5SDimitry Andric extern void __kmp_init_random(kmp_info_t *thread); 3166*0b57cec5SDimitry Andric 3167*0b57cec5SDimitry Andric extern kmp_r_sched_t __kmp_get_schedule_global(void); 3168*0b57cec5SDimitry Andric extern void __kmp_adjust_num_threads(int new_nproc); 3169*0b57cec5SDimitry Andric extern void __kmp_check_stksize(size_t *val); 3170*0b57cec5SDimitry Andric 3171*0b57cec5SDimitry Andric extern void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL); 3172*0b57cec5SDimitry Andric extern void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL); 3173*0b57cec5SDimitry Andric extern void ___kmp_free(void *ptr KMP_SRC_LOC_DECL); 3174*0b57cec5SDimitry Andric #define __kmp_allocate(size) ___kmp_allocate((size)KMP_SRC_LOC_CURR) 3175*0b57cec5SDimitry Andric #define __kmp_page_allocate(size) ___kmp_page_allocate((size)KMP_SRC_LOC_CURR) 3176*0b57cec5SDimitry Andric #define __kmp_free(ptr) ___kmp_free((ptr)KMP_SRC_LOC_CURR) 3177*0b57cec5SDimitry Andric 3178*0b57cec5SDimitry Andric #if USE_FAST_MEMORY 3179*0b57cec5SDimitry Andric extern void *___kmp_fast_allocate(kmp_info_t *this_thr, 3180*0b57cec5SDimitry Andric size_t size KMP_SRC_LOC_DECL); 3181*0b57cec5SDimitry Andric extern void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL); 3182*0b57cec5SDimitry Andric extern void __kmp_free_fast_memory(kmp_info_t *this_thr); 3183*0b57cec5SDimitry Andric extern void __kmp_initialize_fast_memory(kmp_info_t *this_thr); 3184*0b57cec5SDimitry Andric #define __kmp_fast_allocate(this_thr, size) \ 3185*0b57cec5SDimitry Andric ___kmp_fast_allocate((this_thr), (size)KMP_SRC_LOC_CURR) 3186*0b57cec5SDimitry Andric #define __kmp_fast_free(this_thr, ptr) \ 3187*0b57cec5SDimitry Andric ___kmp_fast_free((this_thr), (ptr)KMP_SRC_LOC_CURR) 3188*0b57cec5SDimitry Andric #endif 3189*0b57cec5SDimitry Andric 3190*0b57cec5SDimitry Andric extern void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL); 3191*0b57cec5SDimitry Andric extern void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem, 3192*0b57cec5SDimitry Andric size_t elsize KMP_SRC_LOC_DECL); 3193*0b57cec5SDimitry Andric extern void *___kmp_thread_realloc(kmp_info_t *th, void *ptr, 3194*0b57cec5SDimitry Andric size_t size KMP_SRC_LOC_DECL); 3195*0b57cec5SDimitry Andric extern void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL); 3196*0b57cec5SDimitry Andric #define __kmp_thread_malloc(th, size) \ 3197*0b57cec5SDimitry Andric ___kmp_thread_malloc((th), (size)KMP_SRC_LOC_CURR) 3198*0b57cec5SDimitry Andric #define __kmp_thread_calloc(th, nelem, elsize) \ 3199*0b57cec5SDimitry Andric ___kmp_thread_calloc((th), (nelem), (elsize)KMP_SRC_LOC_CURR) 3200*0b57cec5SDimitry Andric #define __kmp_thread_realloc(th, ptr, size) \ 3201*0b57cec5SDimitry Andric ___kmp_thread_realloc((th), (ptr), (size)KMP_SRC_LOC_CURR) 3202*0b57cec5SDimitry Andric #define __kmp_thread_free(th, ptr) \ 3203*0b57cec5SDimitry Andric ___kmp_thread_free((th), (ptr)KMP_SRC_LOC_CURR) 3204*0b57cec5SDimitry Andric 3205*0b57cec5SDimitry Andric #define KMP_INTERNAL_MALLOC(sz) malloc(sz) 3206*0b57cec5SDimitry Andric #define KMP_INTERNAL_FREE(p) free(p) 3207*0b57cec5SDimitry Andric #define KMP_INTERNAL_REALLOC(p, sz) realloc((p), (sz)) 3208*0b57cec5SDimitry Andric #define KMP_INTERNAL_CALLOC(n, sz) calloc((n), (sz)) 3209*0b57cec5SDimitry Andric 3210*0b57cec5SDimitry Andric extern void __kmp_push_num_threads(ident_t *loc, int gtid, int num_threads); 3211*0b57cec5SDimitry Andric 3212*0b57cec5SDimitry Andric extern void __kmp_push_proc_bind(ident_t *loc, int gtid, 3213*0b57cec5SDimitry Andric kmp_proc_bind_t proc_bind); 3214*0b57cec5SDimitry Andric extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams, 3215*0b57cec5SDimitry Andric int num_threads); 3216*0b57cec5SDimitry Andric 3217*0b57cec5SDimitry Andric extern void __kmp_yield(); 3218*0b57cec5SDimitry Andric 3219*0b57cec5SDimitry Andric extern void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, 3220*0b57cec5SDimitry Andric enum sched_type schedule, kmp_int32 lb, 3221*0b57cec5SDimitry Andric kmp_int32 ub, kmp_int32 st, kmp_int32 chunk); 3222*0b57cec5SDimitry Andric extern void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, 3223*0b57cec5SDimitry Andric enum sched_type schedule, kmp_uint32 lb, 3224*0b57cec5SDimitry Andric kmp_uint32 ub, kmp_int32 st, 3225*0b57cec5SDimitry Andric kmp_int32 chunk); 3226*0b57cec5SDimitry Andric extern void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, 3227*0b57cec5SDimitry Andric enum sched_type schedule, kmp_int64 lb, 3228*0b57cec5SDimitry Andric kmp_int64 ub, kmp_int64 st, kmp_int64 chunk); 3229*0b57cec5SDimitry Andric extern void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, 3230*0b57cec5SDimitry Andric enum sched_type schedule, kmp_uint64 lb, 3231*0b57cec5SDimitry Andric kmp_uint64 ub, kmp_int64 st, 3232*0b57cec5SDimitry Andric kmp_int64 chunk); 3233*0b57cec5SDimitry Andric 3234*0b57cec5SDimitry Andric extern int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, 3235*0b57cec5SDimitry Andric kmp_int32 *p_last, kmp_int32 *p_lb, 3236*0b57cec5SDimitry Andric kmp_int32 *p_ub, kmp_int32 *p_st); 3237*0b57cec5SDimitry Andric extern int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, 3238*0b57cec5SDimitry Andric kmp_int32 *p_last, kmp_uint32 *p_lb, 3239*0b57cec5SDimitry Andric kmp_uint32 *p_ub, kmp_int32 *p_st); 3240*0b57cec5SDimitry Andric extern int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, 3241*0b57cec5SDimitry Andric kmp_int32 *p_last, kmp_int64 *p_lb, 3242*0b57cec5SDimitry Andric kmp_int64 *p_ub, kmp_int64 *p_st); 3243*0b57cec5SDimitry Andric extern int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, 3244*0b57cec5SDimitry Andric kmp_int32 *p_last, kmp_uint64 *p_lb, 3245*0b57cec5SDimitry Andric kmp_uint64 *p_ub, kmp_int64 *p_st); 3246*0b57cec5SDimitry Andric 3247*0b57cec5SDimitry Andric extern void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid); 3248*0b57cec5SDimitry Andric extern void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid); 3249*0b57cec5SDimitry Andric extern void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid); 3250*0b57cec5SDimitry Andric extern void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid); 3251*0b57cec5SDimitry Andric 3252*0b57cec5SDimitry Andric #ifdef KMP_GOMP_COMPAT 3253*0b57cec5SDimitry Andric 3254*0b57cec5SDimitry Andric extern void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid, 3255*0b57cec5SDimitry Andric enum sched_type schedule, kmp_int32 lb, 3256*0b57cec5SDimitry Andric kmp_int32 ub, kmp_int32 st, 3257*0b57cec5SDimitry Andric kmp_int32 chunk, int push_ws); 3258*0b57cec5SDimitry Andric extern void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, 3259*0b57cec5SDimitry Andric enum sched_type schedule, kmp_uint32 lb, 3260*0b57cec5SDimitry Andric kmp_uint32 ub, kmp_int32 st, 3261*0b57cec5SDimitry Andric kmp_int32 chunk, int push_ws); 3262*0b57cec5SDimitry Andric extern void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid, 3263*0b57cec5SDimitry Andric enum sched_type schedule, kmp_int64 lb, 3264*0b57cec5SDimitry Andric kmp_int64 ub, kmp_int64 st, 3265*0b57cec5SDimitry Andric kmp_int64 chunk, int push_ws); 3266*0b57cec5SDimitry Andric extern void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, 3267*0b57cec5SDimitry Andric enum sched_type schedule, kmp_uint64 lb, 3268*0b57cec5SDimitry Andric kmp_uint64 ub, kmp_int64 st, 3269*0b57cec5SDimitry Andric kmp_int64 chunk, int push_ws); 3270*0b57cec5SDimitry Andric extern void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid); 3271*0b57cec5SDimitry Andric extern void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid); 3272*0b57cec5SDimitry Andric extern void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid); 3273*0b57cec5SDimitry Andric extern void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid); 3274*0b57cec5SDimitry Andric 3275*0b57cec5SDimitry Andric #endif /* KMP_GOMP_COMPAT */ 3276*0b57cec5SDimitry Andric 3277*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker); 3278*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker); 3279*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker); 3280*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker); 3281*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker); 3282*0b57cec5SDimitry Andric extern kmp_uint32 __kmp_wait_4(kmp_uint32 volatile *spinner, kmp_uint32 checker, 3283*0b57cec5SDimitry Andric kmp_uint32 (*pred)(kmp_uint32, kmp_uint32), 3284*0b57cec5SDimitry Andric void *obj); 3285*0b57cec5SDimitry Andric extern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker, 3286*0b57cec5SDimitry Andric kmp_uint32 (*pred)(void *, kmp_uint32), void *obj); 3287*0b57cec5SDimitry Andric 3288*0b57cec5SDimitry Andric class kmp_flag_32; 3289*0b57cec5SDimitry Andric class kmp_flag_64; 3290*0b57cec5SDimitry Andric class kmp_flag_oncore; 3291*0b57cec5SDimitry Andric extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, 3292*0b57cec5SDimitry Andric int final_spin 3293*0b57cec5SDimitry Andric #if USE_ITT_BUILD 3294*0b57cec5SDimitry Andric , 3295*0b57cec5SDimitry Andric void *itt_sync_obj 3296*0b57cec5SDimitry Andric #endif 3297*0b57cec5SDimitry Andric ); 3298*0b57cec5SDimitry Andric extern void __kmp_release_64(kmp_flag_64 *flag); 3299*0b57cec5SDimitry Andric 3300*0b57cec5SDimitry Andric extern void __kmp_infinite_loop(void); 3301*0b57cec5SDimitry Andric 3302*0b57cec5SDimitry Andric extern void __kmp_cleanup(void); 3303*0b57cec5SDimitry Andric 3304*0b57cec5SDimitry Andric #if KMP_HANDLE_SIGNALS 3305*0b57cec5SDimitry Andric extern int __kmp_handle_signals; 3306*0b57cec5SDimitry Andric extern void __kmp_install_signals(int parallel_init); 3307*0b57cec5SDimitry Andric extern void __kmp_remove_signals(void); 3308*0b57cec5SDimitry Andric #endif 3309*0b57cec5SDimitry Andric 3310*0b57cec5SDimitry Andric extern void __kmp_clear_system_time(void); 3311*0b57cec5SDimitry Andric extern void __kmp_read_system_time(double *delta); 3312*0b57cec5SDimitry Andric 3313*0b57cec5SDimitry Andric extern void __kmp_check_stack_overlap(kmp_info_t *thr); 3314*0b57cec5SDimitry Andric 3315*0b57cec5SDimitry Andric extern void __kmp_expand_host_name(char *buffer, size_t size); 3316*0b57cec5SDimitry Andric extern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern); 3317*0b57cec5SDimitry Andric 3318*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3319*0b57cec5SDimitry Andric extern void 3320*0b57cec5SDimitry Andric __kmp_initialize_system_tick(void); /* Initialize timer tick value */ 3321*0b57cec5SDimitry Andric #endif 3322*0b57cec5SDimitry Andric 3323*0b57cec5SDimitry Andric extern void 3324*0b57cec5SDimitry Andric __kmp_runtime_initialize(void); /* machine specific initialization */ 3325*0b57cec5SDimitry Andric extern void __kmp_runtime_destroy(void); 3326*0b57cec5SDimitry Andric 3327*0b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 3328*0b57cec5SDimitry Andric extern char *__kmp_affinity_print_mask(char *buf, int buf_len, 3329*0b57cec5SDimitry Andric kmp_affin_mask_t *mask); 3330*0b57cec5SDimitry Andric extern kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf, 3331*0b57cec5SDimitry Andric kmp_affin_mask_t *mask); 3332*0b57cec5SDimitry Andric extern void __kmp_affinity_initialize(void); 3333*0b57cec5SDimitry Andric extern void __kmp_affinity_uninitialize(void); 3334*0b57cec5SDimitry Andric extern void __kmp_affinity_set_init_mask( 3335*0b57cec5SDimitry Andric int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */ 3336*0b57cec5SDimitry Andric extern void __kmp_affinity_set_place(int gtid); 3337*0b57cec5SDimitry Andric extern void __kmp_affinity_determine_capable(const char *env_var); 3338*0b57cec5SDimitry Andric extern int __kmp_aux_set_affinity(void **mask); 3339*0b57cec5SDimitry Andric extern int __kmp_aux_get_affinity(void **mask); 3340*0b57cec5SDimitry Andric extern int __kmp_aux_get_affinity_max_proc(); 3341*0b57cec5SDimitry Andric extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask); 3342*0b57cec5SDimitry Andric extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask); 3343*0b57cec5SDimitry Andric extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask); 3344*0b57cec5SDimitry Andric extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size); 3345*0b57cec5SDimitry Andric #if KMP_OS_LINUX 3346*0b57cec5SDimitry Andric extern int kmp_set_thread_affinity_mask_initial(void); 3347*0b57cec5SDimitry Andric #endif 3348*0b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 3349*0b57cec5SDimitry Andric // No need for KMP_AFFINITY_SUPPORTED guard as only one field in the 3350*0b57cec5SDimitry Andric // format string is for affinity, so platforms that do not support 3351*0b57cec5SDimitry Andric // affinity can still use the other fields, e.g., %n for num_threads 3352*0b57cec5SDimitry Andric extern size_t __kmp_aux_capture_affinity(int gtid, const char *format, 3353*0b57cec5SDimitry Andric kmp_str_buf_t *buffer); 3354*0b57cec5SDimitry Andric extern void __kmp_aux_display_affinity(int gtid, const char *format); 3355*0b57cec5SDimitry Andric 3356*0b57cec5SDimitry Andric extern void __kmp_cleanup_hierarchy(); 3357*0b57cec5SDimitry Andric extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar); 3358*0b57cec5SDimitry Andric 3359*0b57cec5SDimitry Andric #if KMP_USE_FUTEX 3360*0b57cec5SDimitry Andric 3361*0b57cec5SDimitry Andric extern int __kmp_futex_determine_capable(void); 3362*0b57cec5SDimitry Andric 3363*0b57cec5SDimitry Andric #endif // KMP_USE_FUTEX 3364*0b57cec5SDimitry Andric 3365*0b57cec5SDimitry Andric extern void __kmp_gtid_set_specific(int gtid); 3366*0b57cec5SDimitry Andric extern int __kmp_gtid_get_specific(void); 3367*0b57cec5SDimitry Andric 3368*0b57cec5SDimitry Andric extern double __kmp_read_cpu_time(void); 3369*0b57cec5SDimitry Andric 3370*0b57cec5SDimitry Andric extern int __kmp_read_system_info(struct kmp_sys_info *info); 3371*0b57cec5SDimitry Andric 3372*0b57cec5SDimitry Andric #if KMP_USE_MONITOR 3373*0b57cec5SDimitry Andric extern void __kmp_create_monitor(kmp_info_t *th); 3374*0b57cec5SDimitry Andric #endif 3375*0b57cec5SDimitry Andric 3376*0b57cec5SDimitry Andric extern void *__kmp_launch_thread(kmp_info_t *thr); 3377*0b57cec5SDimitry Andric 3378*0b57cec5SDimitry Andric extern void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size); 3379*0b57cec5SDimitry Andric 3380*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS 3381*0b57cec5SDimitry Andric extern int __kmp_still_running(kmp_info_t *th); 3382*0b57cec5SDimitry Andric extern int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val); 3383*0b57cec5SDimitry Andric extern void __kmp_free_handle(kmp_thread_t tHandle); 3384*0b57cec5SDimitry Andric #endif 3385*0b57cec5SDimitry Andric 3386*0b57cec5SDimitry Andric #if KMP_USE_MONITOR 3387*0b57cec5SDimitry Andric extern void __kmp_reap_monitor(kmp_info_t *th); 3388*0b57cec5SDimitry Andric #endif 3389*0b57cec5SDimitry Andric extern void __kmp_reap_worker(kmp_info_t *th); 3390*0b57cec5SDimitry Andric extern void __kmp_terminate_thread(int gtid); 3391*0b57cec5SDimitry Andric 3392*0b57cec5SDimitry Andric extern int __kmp_try_suspend_mx(kmp_info_t *th); 3393*0b57cec5SDimitry Andric extern void __kmp_lock_suspend_mx(kmp_info_t *th); 3394*0b57cec5SDimitry Andric extern void __kmp_unlock_suspend_mx(kmp_info_t *th); 3395*0b57cec5SDimitry Andric 3396*0b57cec5SDimitry Andric extern void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag); 3397*0b57cec5SDimitry Andric extern void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag); 3398*0b57cec5SDimitry Andric extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag); 3399*0b57cec5SDimitry Andric extern void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag); 3400*0b57cec5SDimitry Andric extern void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag); 3401*0b57cec5SDimitry Andric extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag); 3402*0b57cec5SDimitry Andric 3403*0b57cec5SDimitry Andric extern void __kmp_elapsed(double *); 3404*0b57cec5SDimitry Andric extern void __kmp_elapsed_tick(double *); 3405*0b57cec5SDimitry Andric 3406*0b57cec5SDimitry Andric extern void __kmp_enable(int old_state); 3407*0b57cec5SDimitry Andric extern void __kmp_disable(int *old_state); 3408*0b57cec5SDimitry Andric 3409*0b57cec5SDimitry Andric extern void __kmp_thread_sleep(int millis); 3410*0b57cec5SDimitry Andric 3411*0b57cec5SDimitry Andric extern void __kmp_common_initialize(void); 3412*0b57cec5SDimitry Andric extern void __kmp_common_destroy(void); 3413*0b57cec5SDimitry Andric extern void __kmp_common_destroy_gtid(int gtid); 3414*0b57cec5SDimitry Andric 3415*0b57cec5SDimitry Andric #if KMP_OS_UNIX 3416*0b57cec5SDimitry Andric extern void __kmp_register_atfork(void); 3417*0b57cec5SDimitry Andric #endif 3418*0b57cec5SDimitry Andric extern void __kmp_suspend_initialize(void); 3419*0b57cec5SDimitry Andric extern void __kmp_suspend_initialize_thread(kmp_info_t *th); 3420*0b57cec5SDimitry Andric extern void __kmp_suspend_uninitialize_thread(kmp_info_t *th); 3421*0b57cec5SDimitry Andric 3422*0b57cec5SDimitry Andric extern kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, 3423*0b57cec5SDimitry Andric int tid); 3424*0b57cec5SDimitry Andric extern kmp_team_t * 3425*0b57cec5SDimitry Andric __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, 3426*0b57cec5SDimitry Andric #if OMPT_SUPPORT 3427*0b57cec5SDimitry Andric ompt_data_t ompt_parallel_data, 3428*0b57cec5SDimitry Andric #endif 3429*0b57cec5SDimitry Andric kmp_proc_bind_t proc_bind, kmp_internal_control_t *new_icvs, 3430*0b57cec5SDimitry Andric int argc USE_NESTED_HOT_ARG(kmp_info_t *thr)); 3431*0b57cec5SDimitry Andric extern void __kmp_free_thread(kmp_info_t *); 3432*0b57cec5SDimitry Andric extern void __kmp_free_team(kmp_root_t *, 3433*0b57cec5SDimitry Andric kmp_team_t *USE_NESTED_HOT_ARG(kmp_info_t *)); 3434*0b57cec5SDimitry Andric extern kmp_team_t *__kmp_reap_team(kmp_team_t *); 3435*0b57cec5SDimitry Andric 3436*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 3437*0b57cec5SDimitry Andric 3438*0b57cec5SDimitry Andric extern void __kmp_initialize_bget(kmp_info_t *th); 3439*0b57cec5SDimitry Andric extern void __kmp_finalize_bget(kmp_info_t *th); 3440*0b57cec5SDimitry Andric 3441*0b57cec5SDimitry Andric KMP_EXPORT void *kmpc_malloc(size_t size); 3442*0b57cec5SDimitry Andric KMP_EXPORT void *kmpc_aligned_malloc(size_t size, size_t alignment); 3443*0b57cec5SDimitry Andric KMP_EXPORT void *kmpc_calloc(size_t nelem, size_t elsize); 3444*0b57cec5SDimitry Andric KMP_EXPORT void *kmpc_realloc(void *ptr, size_t size); 3445*0b57cec5SDimitry Andric KMP_EXPORT void kmpc_free(void *ptr); 3446*0b57cec5SDimitry Andric 3447*0b57cec5SDimitry Andric /* declarations for internal use */ 3448*0b57cec5SDimitry Andric 3449*0b57cec5SDimitry Andric extern int __kmp_barrier(enum barrier_type bt, int gtid, int is_split, 3450*0b57cec5SDimitry Andric size_t reduce_size, void *reduce_data, 3451*0b57cec5SDimitry Andric void (*reduce)(void *, void *)); 3452*0b57cec5SDimitry Andric extern void __kmp_end_split_barrier(enum barrier_type bt, int gtid); 3453*0b57cec5SDimitry Andric extern int __kmp_barrier_gomp_cancel(int gtid); 3454*0b57cec5SDimitry Andric 3455*0b57cec5SDimitry Andric /*! 3456*0b57cec5SDimitry Andric * Tell the fork call which compiler generated the fork call, and therefore how 3457*0b57cec5SDimitry Andric * to deal with the call. 3458*0b57cec5SDimitry Andric */ 3459*0b57cec5SDimitry Andric enum fork_context_e { 3460*0b57cec5SDimitry Andric fork_context_gnu, /**< Called from GNU generated code, so must not invoke the 3461*0b57cec5SDimitry Andric microtask internally. */ 3462*0b57cec5SDimitry Andric fork_context_intel, /**< Called from Intel generated code. */ 3463*0b57cec5SDimitry Andric fork_context_last 3464*0b57cec5SDimitry Andric }; 3465*0b57cec5SDimitry Andric extern int __kmp_fork_call(ident_t *loc, int gtid, 3466*0b57cec5SDimitry Andric enum fork_context_e fork_context, kmp_int32 argc, 3467*0b57cec5SDimitry Andric microtask_t microtask, launch_t invoker, 3468*0b57cec5SDimitry Andric /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 3469*0b57cec5SDimitry Andric #if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX 3470*0b57cec5SDimitry Andric va_list *ap 3471*0b57cec5SDimitry Andric #else 3472*0b57cec5SDimitry Andric va_list ap 3473*0b57cec5SDimitry Andric #endif 3474*0b57cec5SDimitry Andric ); 3475*0b57cec5SDimitry Andric 3476*0b57cec5SDimitry Andric extern void __kmp_join_call(ident_t *loc, int gtid 3477*0b57cec5SDimitry Andric #if OMPT_SUPPORT 3478*0b57cec5SDimitry Andric , 3479*0b57cec5SDimitry Andric enum fork_context_e fork_context 3480*0b57cec5SDimitry Andric #endif 3481*0b57cec5SDimitry Andric , 3482*0b57cec5SDimitry Andric int exit_teams = 0); 3483*0b57cec5SDimitry Andric 3484*0b57cec5SDimitry Andric extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid); 3485*0b57cec5SDimitry Andric extern void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team); 3486*0b57cec5SDimitry Andric extern void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team); 3487*0b57cec5SDimitry Andric extern int __kmp_invoke_task_func(int gtid); 3488*0b57cec5SDimitry Andric extern void __kmp_run_before_invoked_task(int gtid, int tid, 3489*0b57cec5SDimitry Andric kmp_info_t *this_thr, 3490*0b57cec5SDimitry Andric kmp_team_t *team); 3491*0b57cec5SDimitry Andric extern void __kmp_run_after_invoked_task(int gtid, int tid, 3492*0b57cec5SDimitry Andric kmp_info_t *this_thr, 3493*0b57cec5SDimitry Andric kmp_team_t *team); 3494*0b57cec5SDimitry Andric 3495*0b57cec5SDimitry Andric // should never have been exported 3496*0b57cec5SDimitry Andric KMP_EXPORT int __kmpc_invoke_task_func(int gtid); 3497*0b57cec5SDimitry Andric extern int __kmp_invoke_teams_master(int gtid); 3498*0b57cec5SDimitry Andric extern void __kmp_teams_master(int gtid); 3499*0b57cec5SDimitry Andric extern int __kmp_aux_get_team_num(); 3500*0b57cec5SDimitry Andric extern int __kmp_aux_get_num_teams(); 3501*0b57cec5SDimitry Andric extern void __kmp_save_internal_controls(kmp_info_t *thread); 3502*0b57cec5SDimitry Andric extern void __kmp_user_set_library(enum library_type arg); 3503*0b57cec5SDimitry Andric extern void __kmp_aux_set_library(enum library_type arg); 3504*0b57cec5SDimitry Andric extern void __kmp_aux_set_stacksize(size_t arg); 3505*0b57cec5SDimitry Andric extern void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid); 3506*0b57cec5SDimitry Andric extern void __kmp_aux_set_defaults(char const *str, int len); 3507*0b57cec5SDimitry Andric 3508*0b57cec5SDimitry Andric /* Functions called from __kmp_aux_env_initialize() in kmp_settings.cpp */ 3509*0b57cec5SDimitry Andric void kmpc_set_blocktime(int arg); 3510*0b57cec5SDimitry Andric void ompc_set_nested(int flag); 3511*0b57cec5SDimitry Andric void ompc_set_dynamic(int flag); 3512*0b57cec5SDimitry Andric void ompc_set_num_threads(int arg); 3513*0b57cec5SDimitry Andric 3514*0b57cec5SDimitry Andric extern void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, 3515*0b57cec5SDimitry Andric kmp_team_t *team, int tid); 3516*0b57cec5SDimitry Andric extern void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr); 3517*0b57cec5SDimitry Andric extern kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, 3518*0b57cec5SDimitry Andric kmp_tasking_flags_t *flags, 3519*0b57cec5SDimitry Andric size_t sizeof_kmp_task_t, 3520*0b57cec5SDimitry Andric size_t sizeof_shareds, 3521*0b57cec5SDimitry Andric kmp_routine_entry_t task_entry); 3522*0b57cec5SDimitry Andric extern void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, 3523*0b57cec5SDimitry Andric kmp_team_t *team, int tid, 3524*0b57cec5SDimitry Andric int set_curr_task); 3525*0b57cec5SDimitry Andric extern void __kmp_finish_implicit_task(kmp_info_t *this_thr); 3526*0b57cec5SDimitry Andric extern void __kmp_free_implicit_task(kmp_info_t *this_thr); 3527*0b57cec5SDimitry Andric 3528*0b57cec5SDimitry Andric extern kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 3529*0b57cec5SDimitry Andric int gtid, 3530*0b57cec5SDimitry Andric kmp_task_t *task); 3531*0b57cec5SDimitry Andric extern void __kmp_fulfill_event(kmp_event_t *event); 3532*0b57cec5SDimitry Andric 3533*0b57cec5SDimitry Andric int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, 3534*0b57cec5SDimitry Andric kmp_flag_32 *flag, int final_spin, 3535*0b57cec5SDimitry Andric int *thread_finished, 3536*0b57cec5SDimitry Andric #if USE_ITT_BUILD 3537*0b57cec5SDimitry Andric void *itt_sync_obj, 3538*0b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 3539*0b57cec5SDimitry Andric kmp_int32 is_constrained); 3540*0b57cec5SDimitry Andric int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, 3541*0b57cec5SDimitry Andric kmp_flag_64 *flag, int final_spin, 3542*0b57cec5SDimitry Andric int *thread_finished, 3543*0b57cec5SDimitry Andric #if USE_ITT_BUILD 3544*0b57cec5SDimitry Andric void *itt_sync_obj, 3545*0b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 3546*0b57cec5SDimitry Andric kmp_int32 is_constrained); 3547*0b57cec5SDimitry Andric int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, 3548*0b57cec5SDimitry Andric kmp_flag_oncore *flag, int final_spin, 3549*0b57cec5SDimitry Andric int *thread_finished, 3550*0b57cec5SDimitry Andric #if USE_ITT_BUILD 3551*0b57cec5SDimitry Andric void *itt_sync_obj, 3552*0b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 3553*0b57cec5SDimitry Andric kmp_int32 is_constrained); 3554*0b57cec5SDimitry Andric 3555*0b57cec5SDimitry Andric extern void __kmp_free_task_team(kmp_info_t *thread, 3556*0b57cec5SDimitry Andric kmp_task_team_t *task_team); 3557*0b57cec5SDimitry Andric extern void __kmp_reap_task_teams(void); 3558*0b57cec5SDimitry Andric extern void __kmp_wait_to_unref_task_teams(void); 3559*0b57cec5SDimitry Andric extern void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team, 3560*0b57cec5SDimitry Andric int always); 3561*0b57cec5SDimitry Andric extern void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team); 3562*0b57cec5SDimitry Andric extern void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team 3563*0b57cec5SDimitry Andric #if USE_ITT_BUILD 3564*0b57cec5SDimitry Andric , 3565*0b57cec5SDimitry Andric void *itt_sync_obj 3566*0b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 3567*0b57cec5SDimitry Andric , 3568*0b57cec5SDimitry Andric int wait = 1); 3569*0b57cec5SDimitry Andric extern void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, 3570*0b57cec5SDimitry Andric int gtid); 3571*0b57cec5SDimitry Andric 3572*0b57cec5SDimitry Andric extern int __kmp_is_address_mapped(void *addr); 3573*0b57cec5SDimitry Andric extern kmp_uint64 __kmp_hardware_timestamp(void); 3574*0b57cec5SDimitry Andric 3575*0b57cec5SDimitry Andric #if KMP_OS_UNIX 3576*0b57cec5SDimitry Andric extern int __kmp_read_from_file(char const *path, char const *format, ...); 3577*0b57cec5SDimitry Andric #endif 3578*0b57cec5SDimitry Andric 3579*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 3580*0b57cec5SDimitry Andric // 3581*0b57cec5SDimitry Andric // Assembly routines that have no compiler intrinsic replacement 3582*0b57cec5SDimitry Andric // 3583*0b57cec5SDimitry Andric 3584*0b57cec5SDimitry Andric extern int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc, 3585*0b57cec5SDimitry Andric void *argv[] 3586*0b57cec5SDimitry Andric #if OMPT_SUPPORT 3587*0b57cec5SDimitry Andric , 3588*0b57cec5SDimitry Andric void **exit_frame_ptr 3589*0b57cec5SDimitry Andric #endif 3590*0b57cec5SDimitry Andric ); 3591*0b57cec5SDimitry Andric 3592*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 3593*0b57cec5SDimitry Andric 3594*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_begin(ident_t *, kmp_int32 flags); 3595*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end(ident_t *); 3596*0b57cec5SDimitry Andric 3597*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_threadprivate_register_vec(ident_t *, void *data, 3598*0b57cec5SDimitry Andric kmpc_ctor_vec ctor, 3599*0b57cec5SDimitry Andric kmpc_cctor_vec cctor, 3600*0b57cec5SDimitry Andric kmpc_dtor_vec dtor, 3601*0b57cec5SDimitry Andric size_t vector_length); 3602*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_threadprivate_register(ident_t *, void *data, 3603*0b57cec5SDimitry Andric kmpc_ctor ctor, kmpc_cctor cctor, 3604*0b57cec5SDimitry Andric kmpc_dtor dtor); 3605*0b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_threadprivate(ident_t *, kmp_int32 global_tid, 3606*0b57cec5SDimitry Andric void *data, size_t size); 3607*0b57cec5SDimitry Andric 3608*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_global_thread_num(ident_t *); 3609*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_global_num_threads(ident_t *); 3610*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_bound_thread_num(ident_t *); 3611*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_bound_num_threads(ident_t *); 3612*0b57cec5SDimitry Andric 3613*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_ok_to_fork(ident_t *); 3614*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, 3615*0b57cec5SDimitry Andric kmpc_micro microtask, ...); 3616*0b57cec5SDimitry Andric 3617*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid); 3618*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid); 3619*0b57cec5SDimitry Andric 3620*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_flush(ident_t *); 3621*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_barrier(ident_t *, kmp_int32 global_tid); 3622*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 3623*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 3624*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_ordered(ident_t *, kmp_int32 global_tid); 3625*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid); 3626*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_critical(ident_t *, kmp_int32 global_tid, 3627*0b57cec5SDimitry Andric kmp_critical_name *); 3628*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_critical(ident_t *, kmp_int32 global_tid, 3629*0b57cec5SDimitry Andric kmp_critical_name *); 3630*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_critical_with_hint(ident_t *, kmp_int32 global_tid, 3631*0b57cec5SDimitry Andric kmp_critical_name *, uint32_t hint); 3632*0b57cec5SDimitry Andric 3633*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_barrier_master(ident_t *, kmp_int32 global_tid); 3634*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid); 3635*0b57cec5SDimitry Andric 3636*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait(ident_t *, 3637*0b57cec5SDimitry Andric kmp_int32 global_tid); 3638*0b57cec5SDimitry Andric 3639*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 3640*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 3641*0b57cec5SDimitry Andric 3642*0b57cec5SDimitry Andric KMP_EXPORT void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid, 3643*0b57cec5SDimitry Andric kmp_int32 schedtype, kmp_int32 *plastiter, 3644*0b57cec5SDimitry Andric kmp_int *plower, kmp_int *pupper, 3645*0b57cec5SDimitry Andric kmp_int *pstride, kmp_int incr, 3646*0b57cec5SDimitry Andric kmp_int chunk); 3647*0b57cec5SDimitry Andric 3648*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 3649*0b57cec5SDimitry Andric 3650*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 3651*0b57cec5SDimitry Andric size_t cpy_size, void *cpy_data, 3652*0b57cec5SDimitry Andric void (*cpy_func)(void *, void *), 3653*0b57cec5SDimitry Andric kmp_int32 didit); 3654*0b57cec5SDimitry Andric 3655*0b57cec5SDimitry Andric extern void KMPC_SET_NUM_THREADS(int arg); 3656*0b57cec5SDimitry Andric extern void KMPC_SET_DYNAMIC(int flag); 3657*0b57cec5SDimitry Andric extern void KMPC_SET_NESTED(int flag); 3658*0b57cec5SDimitry Andric 3659*0b57cec5SDimitry Andric /* OMP 3.0 tasking interface routines */ 3660*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, 3661*0b57cec5SDimitry Andric kmp_task_t *new_task); 3662*0b57cec5SDimitry Andric KMP_EXPORT kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, 3663*0b57cec5SDimitry Andric kmp_int32 flags, 3664*0b57cec5SDimitry Andric size_t sizeof_kmp_task_t, 3665*0b57cec5SDimitry Andric size_t sizeof_shareds, 3666*0b57cec5SDimitry Andric kmp_routine_entry_t task_entry); 3667*0b57cec5SDimitry Andric KMP_EXPORT kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid, 3668*0b57cec5SDimitry Andric kmp_int32 flags, 3669*0b57cec5SDimitry Andric size_t sizeof_kmp_task_t, 3670*0b57cec5SDimitry Andric size_t sizeof_shareds, 3671*0b57cec5SDimitry Andric kmp_routine_entry_t task_entry, 3672*0b57cec5SDimitry Andric kmp_int64 device_id); 3673*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid, 3674*0b57cec5SDimitry Andric kmp_task_t *task); 3675*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid, 3676*0b57cec5SDimitry Andric kmp_task_t *task); 3677*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid, 3678*0b57cec5SDimitry Andric kmp_task_t *new_task); 3679*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid); 3680*0b57cec5SDimitry Andric 3681*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, 3682*0b57cec5SDimitry Andric int end_part); 3683*0b57cec5SDimitry Andric 3684*0b57cec5SDimitry Andric #if TASK_UNUSED 3685*0b57cec5SDimitry Andric void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task); 3686*0b57cec5SDimitry Andric void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid, 3687*0b57cec5SDimitry Andric kmp_task_t *task); 3688*0b57cec5SDimitry Andric #endif // TASK_UNUSED 3689*0b57cec5SDimitry Andric 3690*0b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 3691*0b57cec5SDimitry Andric 3692*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_taskgroup(ident_t *loc, int gtid); 3693*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_taskgroup(ident_t *loc, int gtid); 3694*0b57cec5SDimitry Andric 3695*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps( 3696*0b57cec5SDimitry Andric ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps, 3697*0b57cec5SDimitry Andric kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 3698*0b57cec5SDimitry Andric kmp_depend_info_t *noalias_dep_list); 3699*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, 3700*0b57cec5SDimitry Andric kmp_int32 ndeps, 3701*0b57cec5SDimitry Andric kmp_depend_info_t *dep_list, 3702*0b57cec5SDimitry Andric kmp_int32 ndeps_noalias, 3703*0b57cec5SDimitry Andric kmp_depend_info_t *noalias_dep_list); 3704*0b57cec5SDimitry Andric extern kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task, 3705*0b57cec5SDimitry Andric bool serialize_immediate); 3706*0b57cec5SDimitry Andric 3707*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid, 3708*0b57cec5SDimitry Andric kmp_int32 cncl_kind); 3709*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid, 3710*0b57cec5SDimitry Andric kmp_int32 cncl_kind); 3711*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t *loc_ref, kmp_int32 gtid); 3712*0b57cec5SDimitry Andric KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind); 3713*0b57cec5SDimitry Andric 3714*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask); 3715*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask); 3716*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task, 3717*0b57cec5SDimitry Andric kmp_int32 if_val, kmp_uint64 *lb, 3718*0b57cec5SDimitry Andric kmp_uint64 *ub, kmp_int64 st, kmp_int32 nogroup, 3719*0b57cec5SDimitry Andric kmp_int32 sched, kmp_uint64 grainsize, 3720*0b57cec5SDimitry Andric void *task_dup); 3721*0b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_task_reduction_init(int gtid, int num_data, void *data); 3722*0b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_taskred_init(int gtid, int num_data, void *data); 3723*0b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d); 3724*0b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_task_reduction_modifier_init(ident_t *loc, int gtid, 3725*0b57cec5SDimitry Andric int is_ws, int num, 3726*0b57cec5SDimitry Andric void *data); 3727*0b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws, 3728*0b57cec5SDimitry Andric int num, void *data); 3729*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, 3730*0b57cec5SDimitry Andric int is_ws); 3731*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_omp_reg_task_with_affinity( 3732*0b57cec5SDimitry Andric ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins, 3733*0b57cec5SDimitry Andric kmp_task_affinity_info_t *affin_list); 3734*0b57cec5SDimitry Andric 3735*0b57cec5SDimitry Andric /* Lock interface routines (fast versions with gtid passed in) */ 3736*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, 3737*0b57cec5SDimitry Andric void **user_lock); 3738*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, 3739*0b57cec5SDimitry Andric void **user_lock); 3740*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, 3741*0b57cec5SDimitry Andric void **user_lock); 3742*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, 3743*0b57cec5SDimitry Andric void **user_lock); 3744*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock); 3745*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, 3746*0b57cec5SDimitry Andric void **user_lock); 3747*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, 3748*0b57cec5SDimitry Andric void **user_lock); 3749*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, 3750*0b57cec5SDimitry Andric void **user_lock); 3751*0b57cec5SDimitry Andric KMP_EXPORT int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock); 3752*0b57cec5SDimitry Andric KMP_EXPORT int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, 3753*0b57cec5SDimitry Andric void **user_lock); 3754*0b57cec5SDimitry Andric 3755*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, 3756*0b57cec5SDimitry Andric void **user_lock, uintptr_t hint); 3757*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 3758*0b57cec5SDimitry Andric void **user_lock, 3759*0b57cec5SDimitry Andric uintptr_t hint); 3760*0b57cec5SDimitry Andric 3761*0b57cec5SDimitry Andric /* Interface to fast scalable reduce methods routines */ 3762*0b57cec5SDimitry Andric 3763*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_reduce_nowait( 3764*0b57cec5SDimitry Andric ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, 3765*0b57cec5SDimitry Andric void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 3766*0b57cec5SDimitry Andric kmp_critical_name *lck); 3767*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3768*0b57cec5SDimitry Andric kmp_critical_name *lck); 3769*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_reduce( 3770*0b57cec5SDimitry Andric ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, 3771*0b57cec5SDimitry Andric void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 3772*0b57cec5SDimitry Andric kmp_critical_name *lck); 3773*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3774*0b57cec5SDimitry Andric kmp_critical_name *lck); 3775*0b57cec5SDimitry Andric 3776*0b57cec5SDimitry Andric /* Internal fast reduction routines */ 3777*0b57cec5SDimitry Andric 3778*0b57cec5SDimitry Andric extern PACKED_REDUCTION_METHOD_T __kmp_determine_reduction_method( 3779*0b57cec5SDimitry Andric ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, 3780*0b57cec5SDimitry Andric void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 3781*0b57cec5SDimitry Andric kmp_critical_name *lck); 3782*0b57cec5SDimitry Andric 3783*0b57cec5SDimitry Andric // this function is for testing set/get/determine reduce method 3784*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmp_get_reduce_method(void); 3785*0b57cec5SDimitry Andric 3786*0b57cec5SDimitry Andric KMP_EXPORT kmp_uint64 __kmpc_get_taskid(); 3787*0b57cec5SDimitry Andric KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid(); 3788*0b57cec5SDimitry Andric 3789*0b57cec5SDimitry Andric // C++ port 3790*0b57cec5SDimitry Andric // missing 'extern "C"' declarations 3791*0b57cec5SDimitry Andric 3792*0b57cec5SDimitry Andric KMP_EXPORT kmp_int32 __kmpc_in_parallel(ident_t *loc); 3793*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid); 3794*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 3795*0b57cec5SDimitry Andric kmp_int32 num_threads); 3796*0b57cec5SDimitry Andric 3797*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 3798*0b57cec5SDimitry Andric int proc_bind); 3799*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 3800*0b57cec5SDimitry Andric kmp_int32 num_teams, 3801*0b57cec5SDimitry Andric kmp_int32 num_threads); 3802*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, 3803*0b57cec5SDimitry Andric kmpc_micro microtask, ...); 3804*0b57cec5SDimitry Andric struct kmp_dim { // loop bounds info casted to kmp_int64 3805*0b57cec5SDimitry Andric kmp_int64 lo; // lower 3806*0b57cec5SDimitry Andric kmp_int64 up; // upper 3807*0b57cec5SDimitry Andric kmp_int64 st; // stride 3808*0b57cec5SDimitry Andric }; 3809*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 3810*0b57cec5SDimitry Andric kmp_int32 num_dims, 3811*0b57cec5SDimitry Andric const struct kmp_dim *dims); 3812*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, 3813*0b57cec5SDimitry Andric const kmp_int64 *vec); 3814*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, 3815*0b57cec5SDimitry Andric const kmp_int64 *vec); 3816*0b57cec5SDimitry Andric KMP_EXPORT void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 3817*0b57cec5SDimitry Andric 3818*0b57cec5SDimitry Andric KMP_EXPORT void *__kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid, 3819*0b57cec5SDimitry Andric void *data, size_t size, 3820*0b57cec5SDimitry Andric void ***cache); 3821*0b57cec5SDimitry Andric 3822*0b57cec5SDimitry Andric // Symbols for MS mutual detection. 3823*0b57cec5SDimitry Andric extern int _You_must_link_with_exactly_one_OpenMP_library; 3824*0b57cec5SDimitry Andric extern int _You_must_link_with_Intel_OpenMP_library; 3825*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS && (KMP_VERSION_MAJOR > 4) 3826*0b57cec5SDimitry Andric extern int _You_must_link_with_Microsoft_OpenMP_library; 3827*0b57cec5SDimitry Andric #endif 3828*0b57cec5SDimitry Andric 3829*0b57cec5SDimitry Andric // The routines below are not exported. 3830*0b57cec5SDimitry Andric // Consider making them 'static' in corresponding source files. 3831*0b57cec5SDimitry Andric void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr, 3832*0b57cec5SDimitry Andric void *data_addr, size_t pc_size); 3833*0b57cec5SDimitry Andric struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr, 3834*0b57cec5SDimitry Andric void *data_addr, 3835*0b57cec5SDimitry Andric size_t pc_size); 3836*0b57cec5SDimitry Andric void __kmp_threadprivate_resize_cache(int newCapacity); 3837*0b57cec5SDimitry Andric void __kmp_cleanup_threadprivate_caches(); 3838*0b57cec5SDimitry Andric 3839*0b57cec5SDimitry Andric // ompc_, kmpc_ entries moved from omp.h. 3840*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS 3841*0b57cec5SDimitry Andric #define KMPC_CONVENTION __cdecl 3842*0b57cec5SDimitry Andric #else 3843*0b57cec5SDimitry Andric #define KMPC_CONVENTION 3844*0b57cec5SDimitry Andric #endif 3845*0b57cec5SDimitry Andric 3846*0b57cec5SDimitry Andric #ifndef __OMP_H 3847*0b57cec5SDimitry Andric typedef enum omp_sched_t { 3848*0b57cec5SDimitry Andric omp_sched_static = 1, 3849*0b57cec5SDimitry Andric omp_sched_dynamic = 2, 3850*0b57cec5SDimitry Andric omp_sched_guided = 3, 3851*0b57cec5SDimitry Andric omp_sched_auto = 4 3852*0b57cec5SDimitry Andric } omp_sched_t; 3853*0b57cec5SDimitry Andric typedef void *kmp_affinity_mask_t; 3854*0b57cec5SDimitry Andric #endif 3855*0b57cec5SDimitry Andric 3856*0b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int); 3857*0b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int); 3858*0b57cec5SDimitry Andric KMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int); 3859*0b57cec5SDimitry Andric KMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int); 3860*0b57cec5SDimitry Andric KMP_EXPORT int KMPC_CONVENTION 3861*0b57cec5SDimitry Andric kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *); 3862*0b57cec5SDimitry Andric KMP_EXPORT int KMPC_CONVENTION 3863*0b57cec5SDimitry Andric kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *); 3864*0b57cec5SDimitry Andric KMP_EXPORT int KMPC_CONVENTION 3865*0b57cec5SDimitry Andric kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *); 3866*0b57cec5SDimitry Andric 3867*0b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int); 3868*0b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t); 3869*0b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int); 3870*0b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *); 3871*0b57cec5SDimitry Andric KMP_EXPORT void KMPC_CONVENTION kmpc_set_disp_num_buffers(int); 3872*0b57cec5SDimitry Andric 3873*0b57cec5SDimitry Andric enum kmp_target_offload_kind { 3874*0b57cec5SDimitry Andric tgt_disabled = 0, 3875*0b57cec5SDimitry Andric tgt_default = 1, 3876*0b57cec5SDimitry Andric tgt_mandatory = 2 3877*0b57cec5SDimitry Andric }; 3878*0b57cec5SDimitry Andric typedef enum kmp_target_offload_kind kmp_target_offload_kind_t; 3879*0b57cec5SDimitry Andric // Set via OMP_TARGET_OFFLOAD if specified, defaults to tgt_default otherwise 3880*0b57cec5SDimitry Andric extern kmp_target_offload_kind_t __kmp_target_offload; 3881*0b57cec5SDimitry Andric extern int __kmpc_get_target_offload(); 3882*0b57cec5SDimitry Andric 3883*0b57cec5SDimitry Andric // Constants used in libomptarget 3884*0b57cec5SDimitry Andric #define KMP_DEVICE_DEFAULT -1 // This is libomptarget's default device. 3885*0b57cec5SDimitry Andric #define KMP_HOST_DEVICE -10 // This is what it is in libomptarget, go figure. 3886*0b57cec5SDimitry Andric #define KMP_DEVICE_ALL -11 // This is libomptarget's "all devices". 3887*0b57cec5SDimitry Andric 3888*0b57cec5SDimitry Andric // OMP Pause Resource 3889*0b57cec5SDimitry Andric 3890*0b57cec5SDimitry Andric // The following enum is used both to set the status in __kmp_pause_status, and 3891*0b57cec5SDimitry Andric // as the internal equivalent of the externally-visible omp_pause_resource_t. 3892*0b57cec5SDimitry Andric typedef enum kmp_pause_status_t { 3893*0b57cec5SDimitry Andric kmp_not_paused = 0, // status is not paused, or, requesting resume 3894*0b57cec5SDimitry Andric kmp_soft_paused = 1, // status is soft-paused, or, requesting soft pause 3895*0b57cec5SDimitry Andric kmp_hard_paused = 2 // status is hard-paused, or, requesting hard pause 3896*0b57cec5SDimitry Andric } kmp_pause_status_t; 3897*0b57cec5SDimitry Andric 3898*0b57cec5SDimitry Andric // This stores the pause state of the runtime 3899*0b57cec5SDimitry Andric extern kmp_pause_status_t __kmp_pause_status; 3900*0b57cec5SDimitry Andric extern int __kmpc_pause_resource(kmp_pause_status_t level); 3901*0b57cec5SDimitry Andric extern int __kmp_pause_resource(kmp_pause_status_t level); 3902*0b57cec5SDimitry Andric // Soft resume sets __kmp_pause_status, and wakes up all threads. 3903*0b57cec5SDimitry Andric extern void __kmp_resume_if_soft_paused(); 3904*0b57cec5SDimitry Andric // Hard resume simply resets the status to not paused. Library will appear to 3905*0b57cec5SDimitry Andric // be uninitialized after hard pause. Let OMP constructs trigger required 3906*0b57cec5SDimitry Andric // initializations. 3907*0b57cec5SDimitry Andric static inline void __kmp_resume_if_hard_paused() { 3908*0b57cec5SDimitry Andric if (__kmp_pause_status == kmp_hard_paused) { 3909*0b57cec5SDimitry Andric __kmp_pause_status = kmp_not_paused; 3910*0b57cec5SDimitry Andric } 3911*0b57cec5SDimitry Andric } 3912*0b57cec5SDimitry Andric 3913*0b57cec5SDimitry Andric #ifdef __cplusplus 3914*0b57cec5SDimitry Andric } 3915*0b57cec5SDimitry Andric #endif 3916*0b57cec5SDimitry Andric 3917*0b57cec5SDimitry Andric #endif /* KMP_H */ 3918