1*0b57cec5SDimitry Andric /* 2*0b57cec5SDimitry Andric * kmp_affinity.h -- header for affinity management 3*0b57cec5SDimitry Andric */ 4*0b57cec5SDimitry Andric 5*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 9*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10*0b57cec5SDimitry Andric // 11*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 12*0b57cec5SDimitry Andric 13*0b57cec5SDimitry Andric #ifndef KMP_AFFINITY_H 14*0b57cec5SDimitry Andric #define KMP_AFFINITY_H 15*0b57cec5SDimitry Andric 16*0b57cec5SDimitry Andric #include "kmp.h" 17*0b57cec5SDimitry Andric #include "kmp_os.h" 18*0b57cec5SDimitry Andric 19*0b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 20*0b57cec5SDimitry Andric #if KMP_USE_HWLOC 21*0b57cec5SDimitry Andric class KMPHwlocAffinity : public KMPAffinity { 22*0b57cec5SDimitry Andric public: 23*0b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 24*0b57cec5SDimitry Andric hwloc_cpuset_t mask; 25*0b57cec5SDimitry Andric 26*0b57cec5SDimitry Andric public: 27*0b57cec5SDimitry Andric Mask() { 28*0b57cec5SDimitry Andric mask = hwloc_bitmap_alloc(); 29*0b57cec5SDimitry Andric this->zero(); 30*0b57cec5SDimitry Andric } 31*0b57cec5SDimitry Andric ~Mask() { hwloc_bitmap_free(mask); } 32*0b57cec5SDimitry Andric void set(int i) override { hwloc_bitmap_set(mask, i); } 33*0b57cec5SDimitry Andric bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } 34*0b57cec5SDimitry Andric void clear(int i) override { hwloc_bitmap_clr(mask, i); } 35*0b57cec5SDimitry Andric void zero() override { hwloc_bitmap_zero(mask); } 36*0b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 37*0b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 38*0b57cec5SDimitry Andric hwloc_bitmap_copy(mask, convert->mask); 39*0b57cec5SDimitry Andric } 40*0b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 41*0b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 42*0b57cec5SDimitry Andric hwloc_bitmap_and(mask, mask, convert->mask); 43*0b57cec5SDimitry Andric } 44*0b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 45*0b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 46*0b57cec5SDimitry Andric hwloc_bitmap_or(mask, mask, convert->mask); 47*0b57cec5SDimitry Andric } 48*0b57cec5SDimitry Andric void bitwise_not() override { hwloc_bitmap_not(mask, mask); } 49*0b57cec5SDimitry Andric int begin() const override { return hwloc_bitmap_first(mask); } 50*0b57cec5SDimitry Andric int end() const override { return -1; } 51*0b57cec5SDimitry Andric int next(int previous) const override { 52*0b57cec5SDimitry Andric return hwloc_bitmap_next(mask, previous); 53*0b57cec5SDimitry Andric } 54*0b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 55*0b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 56*0b57cec5SDimitry Andric "Illegal get affinity operation when not capable"); 57*0b57cec5SDimitry Andric int retval = 58*0b57cec5SDimitry Andric hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 59*0b57cec5SDimitry Andric if (retval >= 0) { 60*0b57cec5SDimitry Andric return 0; 61*0b57cec5SDimitry Andric } 62*0b57cec5SDimitry Andric int error = errno; 63*0b57cec5SDimitry Andric if (abort_on_error) { 64*0b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 65*0b57cec5SDimitry Andric } 66*0b57cec5SDimitry Andric return error; 67*0b57cec5SDimitry Andric } 68*0b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 69*0b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 70*0b57cec5SDimitry Andric "Illegal get affinity operation when not capable"); 71*0b57cec5SDimitry Andric int retval = 72*0b57cec5SDimitry Andric hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 73*0b57cec5SDimitry Andric if (retval >= 0) { 74*0b57cec5SDimitry Andric return 0; 75*0b57cec5SDimitry Andric } 76*0b57cec5SDimitry Andric int error = errno; 77*0b57cec5SDimitry Andric if (abort_on_error) { 78*0b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 79*0b57cec5SDimitry Andric } 80*0b57cec5SDimitry Andric return error; 81*0b57cec5SDimitry Andric } 82*0b57cec5SDimitry Andric int get_proc_group() const override { 83*0b57cec5SDimitry Andric int group = -1; 84*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS 85*0b57cec5SDimitry Andric if (__kmp_num_proc_groups == 1) { 86*0b57cec5SDimitry Andric return 1; 87*0b57cec5SDimitry Andric } 88*0b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; i++) { 89*0b57cec5SDimitry Andric // On windows, the long type is always 32 bits 90*0b57cec5SDimitry Andric unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2); 91*0b57cec5SDimitry Andric unsigned long second_32_bits = 92*0b57cec5SDimitry Andric hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1); 93*0b57cec5SDimitry Andric if (first_32_bits == 0 && second_32_bits == 0) { 94*0b57cec5SDimitry Andric continue; 95*0b57cec5SDimitry Andric } 96*0b57cec5SDimitry Andric if (group >= 0) { 97*0b57cec5SDimitry Andric return -1; 98*0b57cec5SDimitry Andric } 99*0b57cec5SDimitry Andric group = i; 100*0b57cec5SDimitry Andric } 101*0b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 102*0b57cec5SDimitry Andric return group; 103*0b57cec5SDimitry Andric } 104*0b57cec5SDimitry Andric }; 105*0b57cec5SDimitry Andric void determine_capable(const char *var) override { 106*0b57cec5SDimitry Andric const hwloc_topology_support *topology_support; 107*0b57cec5SDimitry Andric if (__kmp_hwloc_topology == NULL) { 108*0b57cec5SDimitry Andric if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) { 109*0b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 110*0b57cec5SDimitry Andric if (__kmp_affinity_verbose) 111*0b57cec5SDimitry Andric KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); 112*0b57cec5SDimitry Andric } 113*0b57cec5SDimitry Andric if (hwloc_topology_load(__kmp_hwloc_topology) < 0) { 114*0b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 115*0b57cec5SDimitry Andric if (__kmp_affinity_verbose) 116*0b57cec5SDimitry Andric KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); 117*0b57cec5SDimitry Andric } 118*0b57cec5SDimitry Andric } 119*0b57cec5SDimitry Andric topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); 120*0b57cec5SDimitry Andric // Is the system capable of setting/getting this thread's affinity? 121*0b57cec5SDimitry Andric // Also, is topology discovery possible? (pu indicates ability to discover 122*0b57cec5SDimitry Andric // processing units). And finally, were there no errors when calling any 123*0b57cec5SDimitry Andric // hwloc_* API functions? 124*0b57cec5SDimitry Andric if (topology_support && topology_support->cpubind->set_thisthread_cpubind && 125*0b57cec5SDimitry Andric topology_support->cpubind->get_thisthread_cpubind && 126*0b57cec5SDimitry Andric topology_support->discovery->pu && !__kmp_hwloc_error) { 127*0b57cec5SDimitry Andric // enables affinity according to KMP_AFFINITY_CAPABLE() macro 128*0b57cec5SDimitry Andric KMP_AFFINITY_ENABLE(TRUE); 129*0b57cec5SDimitry Andric } else { 130*0b57cec5SDimitry Andric // indicate that hwloc didn't work and disable affinity 131*0b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 132*0b57cec5SDimitry Andric KMP_AFFINITY_DISABLE(); 133*0b57cec5SDimitry Andric } 134*0b57cec5SDimitry Andric } 135*0b57cec5SDimitry Andric void bind_thread(int which) override { 136*0b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 137*0b57cec5SDimitry Andric "Illegal set affinity operation when not capable"); 138*0b57cec5SDimitry Andric KMPAffinity::Mask *mask; 139*0b57cec5SDimitry Andric KMP_CPU_ALLOC_ON_STACK(mask); 140*0b57cec5SDimitry Andric KMP_CPU_ZERO(mask); 141*0b57cec5SDimitry Andric KMP_CPU_SET(which, mask); 142*0b57cec5SDimitry Andric __kmp_set_system_affinity(mask, TRUE); 143*0b57cec5SDimitry Andric KMP_CPU_FREE_FROM_STACK(mask); 144*0b57cec5SDimitry Andric } 145*0b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 146*0b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 147*0b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 148*0b57cec5SDimitry Andric return new Mask[num]; 149*0b57cec5SDimitry Andric } 150*0b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 151*0b57cec5SDimitry Andric Mask *hwloc_array = static_cast<Mask *>(array); 152*0b57cec5SDimitry Andric delete[] hwloc_array; 153*0b57cec5SDimitry Andric } 154*0b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 155*0b57cec5SDimitry Andric int index) override { 156*0b57cec5SDimitry Andric Mask *hwloc_array = static_cast<Mask *>(array); 157*0b57cec5SDimitry Andric return &(hwloc_array[index]); 158*0b57cec5SDimitry Andric } 159*0b57cec5SDimitry Andric api_type get_api_type() const override { return HWLOC; } 160*0b57cec5SDimitry Andric }; 161*0b57cec5SDimitry Andric #endif /* KMP_USE_HWLOC */ 162*0b57cec5SDimitry Andric 163*0b57cec5SDimitry Andric #if KMP_OS_LINUX 164*0b57cec5SDimitry Andric /* On some of the older OS's that we build on, these constants aren't present 165*0b57cec5SDimitry Andric in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on 166*0b57cec5SDimitry Andric all systems of the same arch where they are defined, and they cannot change. 167*0b57cec5SDimitry Andric stone forever. */ 168*0b57cec5SDimitry Andric #include <sys/syscall.h> 169*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_ARM 170*0b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 171*0b57cec5SDimitry Andric #define __NR_sched_setaffinity 241 172*0b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 241 173*0b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 174*0b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 175*0b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 176*0b57cec5SDimitry Andric #define __NR_sched_getaffinity 242 177*0b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 242 178*0b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 179*0b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 180*0b57cec5SDimitry Andric #elif KMP_ARCH_AARCH64 181*0b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 182*0b57cec5SDimitry Andric #define __NR_sched_setaffinity 122 183*0b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 122 184*0b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 185*0b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 186*0b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 187*0b57cec5SDimitry Andric #define __NR_sched_getaffinity 123 188*0b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 123 189*0b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 190*0b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 191*0b57cec5SDimitry Andric #elif KMP_ARCH_X86_64 192*0b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 193*0b57cec5SDimitry Andric #define __NR_sched_setaffinity 203 194*0b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 203 195*0b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 196*0b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 197*0b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 198*0b57cec5SDimitry Andric #define __NR_sched_getaffinity 204 199*0b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 204 200*0b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 201*0b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 202*0b57cec5SDimitry Andric #elif KMP_ARCH_PPC64 203*0b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 204*0b57cec5SDimitry Andric #define __NR_sched_setaffinity 222 205*0b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 222 206*0b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 207*0b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 208*0b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 209*0b57cec5SDimitry Andric #define __NR_sched_getaffinity 223 210*0b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 223 211*0b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 212*0b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 213*0b57cec5SDimitry Andric #elif KMP_ARCH_MIPS 214*0b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 215*0b57cec5SDimitry Andric #define __NR_sched_setaffinity 4239 216*0b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 4239 217*0b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 218*0b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 219*0b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 220*0b57cec5SDimitry Andric #define __NR_sched_getaffinity 4240 221*0b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 4240 222*0b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 223*0b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 224*0b57cec5SDimitry Andric #elif KMP_ARCH_MIPS64 225*0b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 226*0b57cec5SDimitry Andric #define __NR_sched_setaffinity 5195 227*0b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 5195 228*0b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 229*0b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 230*0b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 231*0b57cec5SDimitry Andric #define __NR_sched_getaffinity 5196 232*0b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 5196 233*0b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 234*0b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 235*0b57cec5SDimitry Andric #error Unknown or unsupported architecture 236*0b57cec5SDimitry Andric #endif /* KMP_ARCH_* */ 237*0b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity { 238*0b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 239*0b57cec5SDimitry Andric typedef unsigned char mask_t; 240*0b57cec5SDimitry Andric static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 241*0b57cec5SDimitry Andric 242*0b57cec5SDimitry Andric public: 243*0b57cec5SDimitry Andric mask_t *mask; 244*0b57cec5SDimitry Andric Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); } 245*0b57cec5SDimitry Andric ~Mask() { 246*0b57cec5SDimitry Andric if (mask) 247*0b57cec5SDimitry Andric __kmp_free(mask); 248*0b57cec5SDimitry Andric } 249*0b57cec5SDimitry Andric void set(int i) override { 250*0b57cec5SDimitry Andric mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); 251*0b57cec5SDimitry Andric } 252*0b57cec5SDimitry Andric bool is_set(int i) const override { 253*0b57cec5SDimitry Andric return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); 254*0b57cec5SDimitry Andric } 255*0b57cec5SDimitry Andric void clear(int i) override { 256*0b57cec5SDimitry Andric mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); 257*0b57cec5SDimitry Andric } 258*0b57cec5SDimitry Andric void zero() override { 259*0b57cec5SDimitry Andric for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 260*0b57cec5SDimitry Andric mask[i] = 0; 261*0b57cec5SDimitry Andric } 262*0b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 263*0b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 264*0b57cec5SDimitry Andric for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 265*0b57cec5SDimitry Andric mask[i] = convert->mask[i]; 266*0b57cec5SDimitry Andric } 267*0b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 268*0b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 269*0b57cec5SDimitry Andric for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 270*0b57cec5SDimitry Andric mask[i] &= convert->mask[i]; 271*0b57cec5SDimitry Andric } 272*0b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 273*0b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 274*0b57cec5SDimitry Andric for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 275*0b57cec5SDimitry Andric mask[i] |= convert->mask[i]; 276*0b57cec5SDimitry Andric } 277*0b57cec5SDimitry Andric void bitwise_not() override { 278*0b57cec5SDimitry Andric for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 279*0b57cec5SDimitry Andric mask[i] = ~(mask[i]); 280*0b57cec5SDimitry Andric } 281*0b57cec5SDimitry Andric int begin() const override { 282*0b57cec5SDimitry Andric int retval = 0; 283*0b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 284*0b57cec5SDimitry Andric ++retval; 285*0b57cec5SDimitry Andric return retval; 286*0b57cec5SDimitry Andric } 287*0b57cec5SDimitry Andric int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; } 288*0b57cec5SDimitry Andric int next(int previous) const override { 289*0b57cec5SDimitry Andric int retval = previous + 1; 290*0b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 291*0b57cec5SDimitry Andric ++retval; 292*0b57cec5SDimitry Andric return retval; 293*0b57cec5SDimitry Andric } 294*0b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 295*0b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 296*0b57cec5SDimitry Andric "Illegal get affinity operation when not capable"); 297*0b57cec5SDimitry Andric int retval = 298*0b57cec5SDimitry Andric syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask); 299*0b57cec5SDimitry Andric if (retval >= 0) { 300*0b57cec5SDimitry Andric return 0; 301*0b57cec5SDimitry Andric } 302*0b57cec5SDimitry Andric int error = errno; 303*0b57cec5SDimitry Andric if (abort_on_error) { 304*0b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 305*0b57cec5SDimitry Andric } 306*0b57cec5SDimitry Andric return error; 307*0b57cec5SDimitry Andric } 308*0b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 309*0b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 310*0b57cec5SDimitry Andric "Illegal get affinity operation when not capable"); 311*0b57cec5SDimitry Andric int retval = 312*0b57cec5SDimitry Andric syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask); 313*0b57cec5SDimitry Andric if (retval >= 0) { 314*0b57cec5SDimitry Andric return 0; 315*0b57cec5SDimitry Andric } 316*0b57cec5SDimitry Andric int error = errno; 317*0b57cec5SDimitry Andric if (abort_on_error) { 318*0b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 319*0b57cec5SDimitry Andric } 320*0b57cec5SDimitry Andric return error; 321*0b57cec5SDimitry Andric } 322*0b57cec5SDimitry Andric }; 323*0b57cec5SDimitry Andric void determine_capable(const char *env_var) override { 324*0b57cec5SDimitry Andric __kmp_affinity_determine_capable(env_var); 325*0b57cec5SDimitry Andric } 326*0b57cec5SDimitry Andric void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 327*0b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { 328*0b57cec5SDimitry Andric KMPNativeAffinity::Mask *retval = new Mask(); 329*0b57cec5SDimitry Andric return retval; 330*0b57cec5SDimitry Andric } 331*0b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { 332*0b57cec5SDimitry Andric KMPNativeAffinity::Mask *native_mask = 333*0b57cec5SDimitry Andric static_cast<KMPNativeAffinity::Mask *>(m); 334*0b57cec5SDimitry Andric delete native_mask; 335*0b57cec5SDimitry Andric } 336*0b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 337*0b57cec5SDimitry Andric return new Mask[num]; 338*0b57cec5SDimitry Andric } 339*0b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 340*0b57cec5SDimitry Andric Mask *linux_array = static_cast<Mask *>(array); 341*0b57cec5SDimitry Andric delete[] linux_array; 342*0b57cec5SDimitry Andric } 343*0b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 344*0b57cec5SDimitry Andric int index) override { 345*0b57cec5SDimitry Andric Mask *linux_array = static_cast<Mask *>(array); 346*0b57cec5SDimitry Andric return &(linux_array[index]); 347*0b57cec5SDimitry Andric } 348*0b57cec5SDimitry Andric api_type get_api_type() const override { return NATIVE_OS; } 349*0b57cec5SDimitry Andric }; 350*0b57cec5SDimitry Andric #endif /* KMP_OS_LINUX */ 351*0b57cec5SDimitry Andric 352*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS 353*0b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity { 354*0b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 355*0b57cec5SDimitry Andric typedef ULONG_PTR mask_t; 356*0b57cec5SDimitry Andric static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 357*0b57cec5SDimitry Andric mask_t *mask; 358*0b57cec5SDimitry Andric 359*0b57cec5SDimitry Andric public: 360*0b57cec5SDimitry Andric Mask() { 361*0b57cec5SDimitry Andric mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups); 362*0b57cec5SDimitry Andric } 363*0b57cec5SDimitry Andric ~Mask() { 364*0b57cec5SDimitry Andric if (mask) 365*0b57cec5SDimitry Andric __kmp_free(mask); 366*0b57cec5SDimitry Andric } 367*0b57cec5SDimitry Andric void set(int i) override { 368*0b57cec5SDimitry Andric mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); 369*0b57cec5SDimitry Andric } 370*0b57cec5SDimitry Andric bool is_set(int i) const override { 371*0b57cec5SDimitry Andric return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); 372*0b57cec5SDimitry Andric } 373*0b57cec5SDimitry Andric void clear(int i) override { 374*0b57cec5SDimitry Andric mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); 375*0b57cec5SDimitry Andric } 376*0b57cec5SDimitry Andric void zero() override { 377*0b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 378*0b57cec5SDimitry Andric mask[i] = 0; 379*0b57cec5SDimitry Andric } 380*0b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 381*0b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 382*0b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 383*0b57cec5SDimitry Andric mask[i] = convert->mask[i]; 384*0b57cec5SDimitry Andric } 385*0b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 386*0b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 387*0b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 388*0b57cec5SDimitry Andric mask[i] &= convert->mask[i]; 389*0b57cec5SDimitry Andric } 390*0b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 391*0b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 392*0b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 393*0b57cec5SDimitry Andric mask[i] |= convert->mask[i]; 394*0b57cec5SDimitry Andric } 395*0b57cec5SDimitry Andric void bitwise_not() override { 396*0b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 397*0b57cec5SDimitry Andric mask[i] = ~(mask[i]); 398*0b57cec5SDimitry Andric } 399*0b57cec5SDimitry Andric int begin() const override { 400*0b57cec5SDimitry Andric int retval = 0; 401*0b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 402*0b57cec5SDimitry Andric ++retval; 403*0b57cec5SDimitry Andric return retval; 404*0b57cec5SDimitry Andric } 405*0b57cec5SDimitry Andric int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; } 406*0b57cec5SDimitry Andric int next(int previous) const override { 407*0b57cec5SDimitry Andric int retval = previous + 1; 408*0b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 409*0b57cec5SDimitry Andric ++retval; 410*0b57cec5SDimitry Andric return retval; 411*0b57cec5SDimitry Andric } 412*0b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 413*0b57cec5SDimitry Andric if (__kmp_num_proc_groups > 1) { 414*0b57cec5SDimitry Andric // Check for a valid mask. 415*0b57cec5SDimitry Andric GROUP_AFFINITY ga; 416*0b57cec5SDimitry Andric int group = get_proc_group(); 417*0b57cec5SDimitry Andric if (group < 0) { 418*0b57cec5SDimitry Andric if (abort_on_error) { 419*0b57cec5SDimitry Andric KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); 420*0b57cec5SDimitry Andric } 421*0b57cec5SDimitry Andric return -1; 422*0b57cec5SDimitry Andric } 423*0b57cec5SDimitry Andric // Transform the bit vector into a GROUP_AFFINITY struct 424*0b57cec5SDimitry Andric // and make the system call to set affinity. 425*0b57cec5SDimitry Andric ga.Group = group; 426*0b57cec5SDimitry Andric ga.Mask = mask[group]; 427*0b57cec5SDimitry Andric ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; 428*0b57cec5SDimitry Andric 429*0b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); 430*0b57cec5SDimitry Andric if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { 431*0b57cec5SDimitry Andric DWORD error = GetLastError(); 432*0b57cec5SDimitry Andric if (abort_on_error) { 433*0b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 434*0b57cec5SDimitry Andric __kmp_msg_null); 435*0b57cec5SDimitry Andric } 436*0b57cec5SDimitry Andric return error; 437*0b57cec5SDimitry Andric } 438*0b57cec5SDimitry Andric } else { 439*0b57cec5SDimitry Andric if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) { 440*0b57cec5SDimitry Andric DWORD error = GetLastError(); 441*0b57cec5SDimitry Andric if (abort_on_error) { 442*0b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 443*0b57cec5SDimitry Andric __kmp_msg_null); 444*0b57cec5SDimitry Andric } 445*0b57cec5SDimitry Andric return error; 446*0b57cec5SDimitry Andric } 447*0b57cec5SDimitry Andric } 448*0b57cec5SDimitry Andric return 0; 449*0b57cec5SDimitry Andric } 450*0b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 451*0b57cec5SDimitry Andric if (__kmp_num_proc_groups > 1) { 452*0b57cec5SDimitry Andric this->zero(); 453*0b57cec5SDimitry Andric GROUP_AFFINITY ga; 454*0b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); 455*0b57cec5SDimitry Andric if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { 456*0b57cec5SDimitry Andric DWORD error = GetLastError(); 457*0b57cec5SDimitry Andric if (abort_on_error) { 458*0b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), 459*0b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 460*0b57cec5SDimitry Andric } 461*0b57cec5SDimitry Andric return error; 462*0b57cec5SDimitry Andric } 463*0b57cec5SDimitry Andric if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || 464*0b57cec5SDimitry Andric (ga.Mask == 0)) { 465*0b57cec5SDimitry Andric return -1; 466*0b57cec5SDimitry Andric } 467*0b57cec5SDimitry Andric mask[ga.Group] = ga.Mask; 468*0b57cec5SDimitry Andric } else { 469*0b57cec5SDimitry Andric mask_t newMask, sysMask, retval; 470*0b57cec5SDimitry Andric if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { 471*0b57cec5SDimitry Andric DWORD error = GetLastError(); 472*0b57cec5SDimitry Andric if (abort_on_error) { 473*0b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"), 474*0b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 475*0b57cec5SDimitry Andric } 476*0b57cec5SDimitry Andric return error; 477*0b57cec5SDimitry Andric } 478*0b57cec5SDimitry Andric retval = SetThreadAffinityMask(GetCurrentThread(), newMask); 479*0b57cec5SDimitry Andric if (!retval) { 480*0b57cec5SDimitry Andric DWORD error = GetLastError(); 481*0b57cec5SDimitry Andric if (abort_on_error) { 482*0b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 483*0b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 484*0b57cec5SDimitry Andric } 485*0b57cec5SDimitry Andric return error; 486*0b57cec5SDimitry Andric } 487*0b57cec5SDimitry Andric newMask = SetThreadAffinityMask(GetCurrentThread(), retval); 488*0b57cec5SDimitry Andric if (!newMask) { 489*0b57cec5SDimitry Andric DWORD error = GetLastError(); 490*0b57cec5SDimitry Andric if (abort_on_error) { 491*0b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 492*0b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 493*0b57cec5SDimitry Andric } 494*0b57cec5SDimitry Andric } 495*0b57cec5SDimitry Andric *mask = retval; 496*0b57cec5SDimitry Andric } 497*0b57cec5SDimitry Andric return 0; 498*0b57cec5SDimitry Andric } 499*0b57cec5SDimitry Andric int get_proc_group() const override { 500*0b57cec5SDimitry Andric int group = -1; 501*0b57cec5SDimitry Andric if (__kmp_num_proc_groups == 1) { 502*0b57cec5SDimitry Andric return 1; 503*0b57cec5SDimitry Andric } 504*0b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; i++) { 505*0b57cec5SDimitry Andric if (mask[i] == 0) 506*0b57cec5SDimitry Andric continue; 507*0b57cec5SDimitry Andric if (group >= 0) 508*0b57cec5SDimitry Andric return -1; 509*0b57cec5SDimitry Andric group = i; 510*0b57cec5SDimitry Andric } 511*0b57cec5SDimitry Andric return group; 512*0b57cec5SDimitry Andric } 513*0b57cec5SDimitry Andric }; 514*0b57cec5SDimitry Andric void determine_capable(const char *env_var) override { 515*0b57cec5SDimitry Andric __kmp_affinity_determine_capable(env_var); 516*0b57cec5SDimitry Andric } 517*0b57cec5SDimitry Andric void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 518*0b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 519*0b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 520*0b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 521*0b57cec5SDimitry Andric return new Mask[num]; 522*0b57cec5SDimitry Andric } 523*0b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 524*0b57cec5SDimitry Andric Mask *windows_array = static_cast<Mask *>(array); 525*0b57cec5SDimitry Andric delete[] windows_array; 526*0b57cec5SDimitry Andric } 527*0b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 528*0b57cec5SDimitry Andric int index) override { 529*0b57cec5SDimitry Andric Mask *windows_array = static_cast<Mask *>(array); 530*0b57cec5SDimitry Andric return &(windows_array[index]); 531*0b57cec5SDimitry Andric } 532*0b57cec5SDimitry Andric api_type get_api_type() const override { return NATIVE_OS; } 533*0b57cec5SDimitry Andric }; 534*0b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 535*0b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 536*0b57cec5SDimitry Andric 537*0b57cec5SDimitry Andric class Address { 538*0b57cec5SDimitry Andric public: 539*0b57cec5SDimitry Andric static const unsigned maxDepth = 32; 540*0b57cec5SDimitry Andric unsigned labels[maxDepth]; 541*0b57cec5SDimitry Andric unsigned childNums[maxDepth]; 542*0b57cec5SDimitry Andric unsigned depth; 543*0b57cec5SDimitry Andric unsigned leader; 544*0b57cec5SDimitry Andric Address(unsigned _depth) : depth(_depth), leader(FALSE) {} 545*0b57cec5SDimitry Andric Address &operator=(const Address &b) { 546*0b57cec5SDimitry Andric depth = b.depth; 547*0b57cec5SDimitry Andric for (unsigned i = 0; i < depth; i++) { 548*0b57cec5SDimitry Andric labels[i] = b.labels[i]; 549*0b57cec5SDimitry Andric childNums[i] = b.childNums[i]; 550*0b57cec5SDimitry Andric } 551*0b57cec5SDimitry Andric leader = FALSE; 552*0b57cec5SDimitry Andric return *this; 553*0b57cec5SDimitry Andric } 554*0b57cec5SDimitry Andric bool operator==(const Address &b) const { 555*0b57cec5SDimitry Andric if (depth != b.depth) 556*0b57cec5SDimitry Andric return false; 557*0b57cec5SDimitry Andric for (unsigned i = 0; i < depth; i++) 558*0b57cec5SDimitry Andric if (labels[i] != b.labels[i]) 559*0b57cec5SDimitry Andric return false; 560*0b57cec5SDimitry Andric return true; 561*0b57cec5SDimitry Andric } 562*0b57cec5SDimitry Andric bool isClose(const Address &b, int level) const { 563*0b57cec5SDimitry Andric if (depth != b.depth) 564*0b57cec5SDimitry Andric return false; 565*0b57cec5SDimitry Andric if ((unsigned)level >= depth) 566*0b57cec5SDimitry Andric return true; 567*0b57cec5SDimitry Andric for (unsigned i = 0; i < (depth - level); i++) 568*0b57cec5SDimitry Andric if (labels[i] != b.labels[i]) 569*0b57cec5SDimitry Andric return false; 570*0b57cec5SDimitry Andric return true; 571*0b57cec5SDimitry Andric } 572*0b57cec5SDimitry Andric bool operator!=(const Address &b) const { return !operator==(b); } 573*0b57cec5SDimitry Andric void print() const { 574*0b57cec5SDimitry Andric unsigned i; 575*0b57cec5SDimitry Andric printf("Depth: %u --- ", depth); 576*0b57cec5SDimitry Andric for (i = 0; i < depth; i++) { 577*0b57cec5SDimitry Andric printf("%u ", labels[i]); 578*0b57cec5SDimitry Andric } 579*0b57cec5SDimitry Andric } 580*0b57cec5SDimitry Andric }; 581*0b57cec5SDimitry Andric 582*0b57cec5SDimitry Andric class AddrUnsPair { 583*0b57cec5SDimitry Andric public: 584*0b57cec5SDimitry Andric Address first; 585*0b57cec5SDimitry Andric unsigned second; 586*0b57cec5SDimitry Andric AddrUnsPair(Address _first, unsigned _second) 587*0b57cec5SDimitry Andric : first(_first), second(_second) {} 588*0b57cec5SDimitry Andric AddrUnsPair &operator=(const AddrUnsPair &b) { 589*0b57cec5SDimitry Andric first = b.first; 590*0b57cec5SDimitry Andric second = b.second; 591*0b57cec5SDimitry Andric return *this; 592*0b57cec5SDimitry Andric } 593*0b57cec5SDimitry Andric void print() const { 594*0b57cec5SDimitry Andric printf("first = "); 595*0b57cec5SDimitry Andric first.print(); 596*0b57cec5SDimitry Andric printf(" --- second = %u", second); 597*0b57cec5SDimitry Andric } 598*0b57cec5SDimitry Andric bool operator==(const AddrUnsPair &b) const { 599*0b57cec5SDimitry Andric if (first != b.first) 600*0b57cec5SDimitry Andric return false; 601*0b57cec5SDimitry Andric if (second != b.second) 602*0b57cec5SDimitry Andric return false; 603*0b57cec5SDimitry Andric return true; 604*0b57cec5SDimitry Andric } 605*0b57cec5SDimitry Andric bool operator!=(const AddrUnsPair &b) const { return !operator==(b); } 606*0b57cec5SDimitry Andric }; 607*0b57cec5SDimitry Andric 608*0b57cec5SDimitry Andric static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) { 609*0b57cec5SDimitry Andric const Address *aa = &(((const AddrUnsPair *)a)->first); 610*0b57cec5SDimitry Andric const Address *bb = &(((const AddrUnsPair *)b)->first); 611*0b57cec5SDimitry Andric unsigned depth = aa->depth; 612*0b57cec5SDimitry Andric unsigned i; 613*0b57cec5SDimitry Andric KMP_DEBUG_ASSERT(depth == bb->depth); 614*0b57cec5SDimitry Andric for (i = 0; i < depth; i++) { 615*0b57cec5SDimitry Andric if (aa->labels[i] < bb->labels[i]) 616*0b57cec5SDimitry Andric return -1; 617*0b57cec5SDimitry Andric if (aa->labels[i] > bb->labels[i]) 618*0b57cec5SDimitry Andric return 1; 619*0b57cec5SDimitry Andric } 620*0b57cec5SDimitry Andric return 0; 621*0b57cec5SDimitry Andric } 622*0b57cec5SDimitry Andric 623*0b57cec5SDimitry Andric /* A structure for holding machine-specific hierarchy info to be computed once 624*0b57cec5SDimitry Andric at init. This structure represents a mapping of threads to the actual machine 625*0b57cec5SDimitry Andric hierarchy, or to our best guess at what the hierarchy might be, for the 626*0b57cec5SDimitry Andric purpose of performing an efficient barrier. In the worst case, when there is 627*0b57cec5SDimitry Andric no machine hierarchy information, it produces a tree suitable for a barrier, 628*0b57cec5SDimitry Andric similar to the tree used in the hyper barrier. */ 629*0b57cec5SDimitry Andric class hierarchy_info { 630*0b57cec5SDimitry Andric public: 631*0b57cec5SDimitry Andric /* Good default values for number of leaves and branching factor, given no 632*0b57cec5SDimitry Andric affinity information. Behaves a bit like hyper barrier. */ 633*0b57cec5SDimitry Andric static const kmp_uint32 maxLeaves = 4; 634*0b57cec5SDimitry Andric static const kmp_uint32 minBranch = 4; 635*0b57cec5SDimitry Andric /** Number of levels in the hierarchy. Typical levels are threads/core, 636*0b57cec5SDimitry Andric cores/package or socket, packages/node, nodes/machine, etc. We don't want 637*0b57cec5SDimitry Andric to get specific with nomenclature. When the machine is oversubscribed we 638*0b57cec5SDimitry Andric add levels to duplicate the hierarchy, doubling the thread capacity of the 639*0b57cec5SDimitry Andric hierarchy each time we add a level. */ 640*0b57cec5SDimitry Andric kmp_uint32 maxLevels; 641*0b57cec5SDimitry Andric 642*0b57cec5SDimitry Andric /** This is specifically the depth of the machine configuration hierarchy, in 643*0b57cec5SDimitry Andric terms of the number of levels along the longest path from root to any 644*0b57cec5SDimitry Andric leaf. It corresponds to the number of entries in numPerLevel if we exclude 645*0b57cec5SDimitry Andric all but one trailing 1. */ 646*0b57cec5SDimitry Andric kmp_uint32 depth; 647*0b57cec5SDimitry Andric kmp_uint32 base_num_threads; 648*0b57cec5SDimitry Andric enum init_status { initialized = 0, not_initialized = 1, initializing = 2 }; 649*0b57cec5SDimitry Andric volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 650*0b57cec5SDimitry Andric // 2=initialization in progress 651*0b57cec5SDimitry Andric volatile kmp_int8 resizing; // 0=not resizing, 1=resizing 652*0b57cec5SDimitry Andric 653*0b57cec5SDimitry Andric /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children 654*0b57cec5SDimitry Andric the parent of a node at level i has. For example, if we have a machine 655*0b57cec5SDimitry Andric with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel = 656*0b57cec5SDimitry Andric {2, 4, 4, 1, 1}. All empty levels are set to 1. */ 657*0b57cec5SDimitry Andric kmp_uint32 *numPerLevel; 658*0b57cec5SDimitry Andric kmp_uint32 *skipPerLevel; 659*0b57cec5SDimitry Andric 660*0b57cec5SDimitry Andric void deriveLevels(AddrUnsPair *adr2os, int num_addrs) { 661*0b57cec5SDimitry Andric int hier_depth = adr2os[0].first.depth; 662*0b57cec5SDimitry Andric int level = 0; 663*0b57cec5SDimitry Andric for (int i = hier_depth - 1; i >= 0; --i) { 664*0b57cec5SDimitry Andric int max = -1; 665*0b57cec5SDimitry Andric for (int j = 0; j < num_addrs; ++j) { 666*0b57cec5SDimitry Andric int next = adr2os[j].first.childNums[i]; 667*0b57cec5SDimitry Andric if (next > max) 668*0b57cec5SDimitry Andric max = next; 669*0b57cec5SDimitry Andric } 670*0b57cec5SDimitry Andric numPerLevel[level] = max + 1; 671*0b57cec5SDimitry Andric ++level; 672*0b57cec5SDimitry Andric } 673*0b57cec5SDimitry Andric } 674*0b57cec5SDimitry Andric 675*0b57cec5SDimitry Andric hierarchy_info() 676*0b57cec5SDimitry Andric : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} 677*0b57cec5SDimitry Andric 678*0b57cec5SDimitry Andric void fini() { 679*0b57cec5SDimitry Andric if (!uninitialized && numPerLevel) { 680*0b57cec5SDimitry Andric __kmp_free(numPerLevel); 681*0b57cec5SDimitry Andric numPerLevel = NULL; 682*0b57cec5SDimitry Andric uninitialized = not_initialized; 683*0b57cec5SDimitry Andric } 684*0b57cec5SDimitry Andric } 685*0b57cec5SDimitry Andric 686*0b57cec5SDimitry Andric void init(AddrUnsPair *adr2os, int num_addrs) { 687*0b57cec5SDimitry Andric kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8( 688*0b57cec5SDimitry Andric &uninitialized, not_initialized, initializing); 689*0b57cec5SDimitry Andric if (bool_result == 0) { // Wait for initialization 690*0b57cec5SDimitry Andric while (TCR_1(uninitialized) != initialized) 691*0b57cec5SDimitry Andric KMP_CPU_PAUSE(); 692*0b57cec5SDimitry Andric return; 693*0b57cec5SDimitry Andric } 694*0b57cec5SDimitry Andric KMP_DEBUG_ASSERT(bool_result == 1); 695*0b57cec5SDimitry Andric 696*0b57cec5SDimitry Andric /* Added explicit initialization of the data fields here to prevent usage of 697*0b57cec5SDimitry Andric dirty value observed when static library is re-initialized multiple times 698*0b57cec5SDimitry Andric (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses 699*0b57cec5SDimitry Andric OpenMP). */ 700*0b57cec5SDimitry Andric depth = 1; 701*0b57cec5SDimitry Andric resizing = 0; 702*0b57cec5SDimitry Andric maxLevels = 7; 703*0b57cec5SDimitry Andric numPerLevel = 704*0b57cec5SDimitry Andric (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 705*0b57cec5SDimitry Andric skipPerLevel = &(numPerLevel[maxLevels]); 706*0b57cec5SDimitry Andric for (kmp_uint32 i = 0; i < maxLevels; 707*0b57cec5SDimitry Andric ++i) { // init numPerLevel[*] to 1 item per level 708*0b57cec5SDimitry Andric numPerLevel[i] = 1; 709*0b57cec5SDimitry Andric skipPerLevel[i] = 1; 710*0b57cec5SDimitry Andric } 711*0b57cec5SDimitry Andric 712*0b57cec5SDimitry Andric // Sort table by physical ID 713*0b57cec5SDimitry Andric if (adr2os) { 714*0b57cec5SDimitry Andric qsort(adr2os, num_addrs, sizeof(*adr2os), 715*0b57cec5SDimitry Andric __kmp_affinity_cmp_Address_labels); 716*0b57cec5SDimitry Andric deriveLevels(adr2os, num_addrs); 717*0b57cec5SDimitry Andric } else { 718*0b57cec5SDimitry Andric numPerLevel[0] = maxLeaves; 719*0b57cec5SDimitry Andric numPerLevel[1] = num_addrs / maxLeaves; 720*0b57cec5SDimitry Andric if (num_addrs % maxLeaves) 721*0b57cec5SDimitry Andric numPerLevel[1]++; 722*0b57cec5SDimitry Andric } 723*0b57cec5SDimitry Andric 724*0b57cec5SDimitry Andric base_num_threads = num_addrs; 725*0b57cec5SDimitry Andric for (int i = maxLevels - 1; i >= 0; 726*0b57cec5SDimitry Andric --i) // count non-empty levels to get depth 727*0b57cec5SDimitry Andric if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' 728*0b57cec5SDimitry Andric depth++; 729*0b57cec5SDimitry Andric 730*0b57cec5SDimitry Andric kmp_uint32 branch = minBranch; 731*0b57cec5SDimitry Andric if (numPerLevel[0] == 1) 732*0b57cec5SDimitry Andric branch = num_addrs / maxLeaves; 733*0b57cec5SDimitry Andric if (branch < minBranch) 734*0b57cec5SDimitry Andric branch = minBranch; 735*0b57cec5SDimitry Andric for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width 736*0b57cec5SDimitry Andric while (numPerLevel[d] > branch || 737*0b57cec5SDimitry Andric (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0! 738*0b57cec5SDimitry Andric if (numPerLevel[d] & 1) 739*0b57cec5SDimitry Andric numPerLevel[d]++; 740*0b57cec5SDimitry Andric numPerLevel[d] = numPerLevel[d] >> 1; 741*0b57cec5SDimitry Andric if (numPerLevel[d + 1] == 1) 742*0b57cec5SDimitry Andric depth++; 743*0b57cec5SDimitry Andric numPerLevel[d + 1] = numPerLevel[d + 1] << 1; 744*0b57cec5SDimitry Andric } 745*0b57cec5SDimitry Andric if (numPerLevel[0] == 1) { 746*0b57cec5SDimitry Andric branch = branch >> 1; 747*0b57cec5SDimitry Andric if (branch < 4) 748*0b57cec5SDimitry Andric branch = minBranch; 749*0b57cec5SDimitry Andric } 750*0b57cec5SDimitry Andric } 751*0b57cec5SDimitry Andric 752*0b57cec5SDimitry Andric for (kmp_uint32 i = 1; i < depth; ++i) 753*0b57cec5SDimitry Andric skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1]; 754*0b57cec5SDimitry Andric // Fill in hierarchy in the case of oversubscription 755*0b57cec5SDimitry Andric for (kmp_uint32 i = depth; i < maxLevels; ++i) 756*0b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 757*0b57cec5SDimitry Andric 758*0b57cec5SDimitry Andric uninitialized = initialized; // One writer 759*0b57cec5SDimitry Andric } 760*0b57cec5SDimitry Andric 761*0b57cec5SDimitry Andric // Resize the hierarchy if nproc changes to something larger than before 762*0b57cec5SDimitry Andric void resize(kmp_uint32 nproc) { 763*0b57cec5SDimitry Andric kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 764*0b57cec5SDimitry Andric while (bool_result == 0) { // someone else is trying to resize 765*0b57cec5SDimitry Andric KMP_CPU_PAUSE(); 766*0b57cec5SDimitry Andric if (nproc <= base_num_threads) // happy with other thread's resize 767*0b57cec5SDimitry Andric return; 768*0b57cec5SDimitry Andric else // try to resize 769*0b57cec5SDimitry Andric bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 770*0b57cec5SDimitry Andric } 771*0b57cec5SDimitry Andric KMP_DEBUG_ASSERT(bool_result != 0); 772*0b57cec5SDimitry Andric if (nproc <= base_num_threads) 773*0b57cec5SDimitry Andric return; // happy with other thread's resize 774*0b57cec5SDimitry Andric 775*0b57cec5SDimitry Andric // Calculate new maxLevels 776*0b57cec5SDimitry Andric kmp_uint32 old_sz = skipPerLevel[depth - 1]; 777*0b57cec5SDimitry Andric kmp_uint32 incs = 0, old_maxLevels = maxLevels; 778*0b57cec5SDimitry Andric // First see if old maxLevels is enough to contain new size 779*0b57cec5SDimitry Andric for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) { 780*0b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 781*0b57cec5SDimitry Andric numPerLevel[i - 1] *= 2; 782*0b57cec5SDimitry Andric old_sz *= 2; 783*0b57cec5SDimitry Andric depth++; 784*0b57cec5SDimitry Andric } 785*0b57cec5SDimitry Andric if (nproc > old_sz) { // Not enough space, need to expand hierarchy 786*0b57cec5SDimitry Andric while (nproc > old_sz) { 787*0b57cec5SDimitry Andric old_sz *= 2; 788*0b57cec5SDimitry Andric incs++; 789*0b57cec5SDimitry Andric depth++; 790*0b57cec5SDimitry Andric } 791*0b57cec5SDimitry Andric maxLevels += incs; 792*0b57cec5SDimitry Andric 793*0b57cec5SDimitry Andric // Resize arrays 794*0b57cec5SDimitry Andric kmp_uint32 *old_numPerLevel = numPerLevel; 795*0b57cec5SDimitry Andric kmp_uint32 *old_skipPerLevel = skipPerLevel; 796*0b57cec5SDimitry Andric numPerLevel = skipPerLevel = NULL; 797*0b57cec5SDimitry Andric numPerLevel = 798*0b57cec5SDimitry Andric (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 799*0b57cec5SDimitry Andric skipPerLevel = &(numPerLevel[maxLevels]); 800*0b57cec5SDimitry Andric 801*0b57cec5SDimitry Andric // Copy old elements from old arrays 802*0b57cec5SDimitry Andric for (kmp_uint32 i = 0; i < old_maxLevels; 803*0b57cec5SDimitry Andric ++i) { // init numPerLevel[*] to 1 item per level 804*0b57cec5SDimitry Andric numPerLevel[i] = old_numPerLevel[i]; 805*0b57cec5SDimitry Andric skipPerLevel[i] = old_skipPerLevel[i]; 806*0b57cec5SDimitry Andric } 807*0b57cec5SDimitry Andric 808*0b57cec5SDimitry Andric // Init new elements in arrays to 1 809*0b57cec5SDimitry Andric for (kmp_uint32 i = old_maxLevels; i < maxLevels; 810*0b57cec5SDimitry Andric ++i) { // init numPerLevel[*] to 1 item per level 811*0b57cec5SDimitry Andric numPerLevel[i] = 1; 812*0b57cec5SDimitry Andric skipPerLevel[i] = 1; 813*0b57cec5SDimitry Andric } 814*0b57cec5SDimitry Andric 815*0b57cec5SDimitry Andric // Free old arrays 816*0b57cec5SDimitry Andric __kmp_free(old_numPerLevel); 817*0b57cec5SDimitry Andric } 818*0b57cec5SDimitry Andric 819*0b57cec5SDimitry Andric // Fill in oversubscription levels of hierarchy 820*0b57cec5SDimitry Andric for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) 821*0b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 822*0b57cec5SDimitry Andric 823*0b57cec5SDimitry Andric base_num_threads = nproc; 824*0b57cec5SDimitry Andric resizing = 0; // One writer 825*0b57cec5SDimitry Andric } 826*0b57cec5SDimitry Andric }; 827*0b57cec5SDimitry Andric #endif // KMP_AFFINITY_H 828