10b57cec5SDimitry Andric /* 20b57cec5SDimitry Andric * kmp_affinity.h -- header for affinity management 30b57cec5SDimitry Andric */ 40b57cec5SDimitry Andric 50b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #ifndef KMP_AFFINITY_H 140b57cec5SDimitry Andric #define KMP_AFFINITY_H 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric #include "kmp.h" 170b57cec5SDimitry Andric #include "kmp_os.h" 180eae32dcSDimitry Andric #include <limits> 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 210b57cec5SDimitry Andric #if KMP_USE_HWLOC 220b57cec5SDimitry Andric class KMPHwlocAffinity : public KMPAffinity { 230b57cec5SDimitry Andric public: 240b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 250b57cec5SDimitry Andric hwloc_cpuset_t mask; 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric public: 280b57cec5SDimitry Andric Mask() { 290b57cec5SDimitry Andric mask = hwloc_bitmap_alloc(); 300b57cec5SDimitry Andric this->zero(); 310b57cec5SDimitry Andric } 320b57cec5SDimitry Andric ~Mask() { hwloc_bitmap_free(mask); } 330b57cec5SDimitry Andric void set(int i) override { hwloc_bitmap_set(mask, i); } 340b57cec5SDimitry Andric bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } 350b57cec5SDimitry Andric void clear(int i) override { hwloc_bitmap_clr(mask, i); } 360b57cec5SDimitry Andric void zero() override { hwloc_bitmap_zero(mask); } 375f757f3fSDimitry Andric bool empty() const override { return hwloc_bitmap_iszero(mask); } 380b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 390b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 400b57cec5SDimitry Andric hwloc_bitmap_copy(mask, convert->mask); 410b57cec5SDimitry Andric } 420b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 430b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 440b57cec5SDimitry Andric hwloc_bitmap_and(mask, mask, convert->mask); 450b57cec5SDimitry Andric } 460b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 470b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 480b57cec5SDimitry Andric hwloc_bitmap_or(mask, mask, convert->mask); 490b57cec5SDimitry Andric } 500b57cec5SDimitry Andric void bitwise_not() override { hwloc_bitmap_not(mask, mask); } 515f757f3fSDimitry Andric bool is_equal(const KMPAffinity::Mask *rhs) const override { 525f757f3fSDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 535f757f3fSDimitry Andric return hwloc_bitmap_isequal(mask, convert->mask); 545f757f3fSDimitry Andric } 550b57cec5SDimitry Andric int begin() const override { return hwloc_bitmap_first(mask); } 560b57cec5SDimitry Andric int end() const override { return -1; } 570b57cec5SDimitry Andric int next(int previous) const override { 580b57cec5SDimitry Andric return hwloc_bitmap_next(mask, previous); 590b57cec5SDimitry Andric } 600b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 610b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 620b57cec5SDimitry Andric "Illegal get affinity operation when not capable"); 63e8d8bef9SDimitry Andric long retval = 640b57cec5SDimitry Andric hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 650b57cec5SDimitry Andric if (retval >= 0) { 660b57cec5SDimitry Andric return 0; 670b57cec5SDimitry Andric } 680b57cec5SDimitry Andric int error = errno; 690b57cec5SDimitry Andric if (abort_on_error) { 7006c3fb27SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "hwloc_get_cpubind()"), 7106c3fb27SDimitry Andric KMP_ERR(error), __kmp_msg_null); 720b57cec5SDimitry Andric } 730b57cec5SDimitry Andric return error; 740b57cec5SDimitry Andric } 750b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 760b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 77e8d8bef9SDimitry Andric "Illegal set affinity operation when not capable"); 78e8d8bef9SDimitry Andric long retval = 790b57cec5SDimitry Andric hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 800b57cec5SDimitry Andric if (retval >= 0) { 810b57cec5SDimitry Andric return 0; 820b57cec5SDimitry Andric } 830b57cec5SDimitry Andric int error = errno; 840b57cec5SDimitry Andric if (abort_on_error) { 8506c3fb27SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "hwloc_set_cpubind()"), 8606c3fb27SDimitry Andric KMP_ERR(error), __kmp_msg_null); 870b57cec5SDimitry Andric } 880b57cec5SDimitry Andric return error; 890b57cec5SDimitry Andric } 90e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS 91e8d8bef9SDimitry Andric int set_process_affinity(bool abort_on_error) const override { 92e8d8bef9SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 93e8d8bef9SDimitry Andric "Illegal set process affinity operation when not capable"); 94e8d8bef9SDimitry Andric int error = 0; 95e8d8bef9SDimitry Andric const hwloc_topology_support *support = 96e8d8bef9SDimitry Andric hwloc_topology_get_support(__kmp_hwloc_topology); 97e8d8bef9SDimitry Andric if (support->cpubind->set_proc_cpubind) { 98e8d8bef9SDimitry Andric int retval; 99e8d8bef9SDimitry Andric retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, 100e8d8bef9SDimitry Andric HWLOC_CPUBIND_PROCESS); 101e8d8bef9SDimitry Andric if (retval >= 0) 102e8d8bef9SDimitry Andric return 0; 103e8d8bef9SDimitry Andric error = errno; 104e8d8bef9SDimitry Andric if (abort_on_error) 10506c3fb27SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "hwloc_set_cpubind()"), 10606c3fb27SDimitry Andric KMP_ERR(error), __kmp_msg_null); 107e8d8bef9SDimitry Andric } 108e8d8bef9SDimitry Andric return error; 109e8d8bef9SDimitry Andric } 110e8d8bef9SDimitry Andric #endif 1110b57cec5SDimitry Andric int get_proc_group() const override { 1120b57cec5SDimitry Andric int group = -1; 1130b57cec5SDimitry Andric #if KMP_OS_WINDOWS 1140b57cec5SDimitry Andric if (__kmp_num_proc_groups == 1) { 1150b57cec5SDimitry Andric return 1; 1160b57cec5SDimitry Andric } 1170b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; i++) { 1180b57cec5SDimitry Andric // On windows, the long type is always 32 bits 1190b57cec5SDimitry Andric unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2); 1200b57cec5SDimitry Andric unsigned long second_32_bits = 1210b57cec5SDimitry Andric hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1); 1220b57cec5SDimitry Andric if (first_32_bits == 0 && second_32_bits == 0) { 1230b57cec5SDimitry Andric continue; 1240b57cec5SDimitry Andric } 1250b57cec5SDimitry Andric if (group >= 0) { 1260b57cec5SDimitry Andric return -1; 1270b57cec5SDimitry Andric } 1280b57cec5SDimitry Andric group = i; 1290b57cec5SDimitry Andric } 1300b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 1310b57cec5SDimitry Andric return group; 1320b57cec5SDimitry Andric } 1330b57cec5SDimitry Andric }; 1340b57cec5SDimitry Andric void determine_capable(const char *var) override { 1350b57cec5SDimitry Andric const hwloc_topology_support *topology_support; 1360b57cec5SDimitry Andric if (__kmp_hwloc_topology == NULL) { 1370b57cec5SDimitry Andric if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) { 1380b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 139bdd1243dSDimitry Andric if (__kmp_affinity.flags.verbose) { 1400b57cec5SDimitry Andric KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); 1410b57cec5SDimitry Andric } 142bdd1243dSDimitry Andric } 1430b57cec5SDimitry Andric if (hwloc_topology_load(__kmp_hwloc_topology) < 0) { 1440b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 145bdd1243dSDimitry Andric if (__kmp_affinity.flags.verbose) { 1460b57cec5SDimitry Andric KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); 1470b57cec5SDimitry Andric } 1480b57cec5SDimitry Andric } 149bdd1243dSDimitry Andric } 1500b57cec5SDimitry Andric topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); 1510b57cec5SDimitry Andric // Is the system capable of setting/getting this thread's affinity? 1520b57cec5SDimitry Andric // Also, is topology discovery possible? (pu indicates ability to discover 1530b57cec5SDimitry Andric // processing units). And finally, were there no errors when calling any 1540b57cec5SDimitry Andric // hwloc_* API functions? 1550b57cec5SDimitry Andric if (topology_support && topology_support->cpubind->set_thisthread_cpubind && 1560b57cec5SDimitry Andric topology_support->cpubind->get_thisthread_cpubind && 1570b57cec5SDimitry Andric topology_support->discovery->pu && !__kmp_hwloc_error) { 1580b57cec5SDimitry Andric // enables affinity according to KMP_AFFINITY_CAPABLE() macro 1590b57cec5SDimitry Andric KMP_AFFINITY_ENABLE(TRUE); 1600b57cec5SDimitry Andric } else { 1610b57cec5SDimitry Andric // indicate that hwloc didn't work and disable affinity 1620b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 1630b57cec5SDimitry Andric KMP_AFFINITY_DISABLE(); 1640b57cec5SDimitry Andric } 1650b57cec5SDimitry Andric } 1660b57cec5SDimitry Andric void bind_thread(int which) override { 1670b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 1680b57cec5SDimitry Andric "Illegal set affinity operation when not capable"); 1690b57cec5SDimitry Andric KMPAffinity::Mask *mask; 1700b57cec5SDimitry Andric KMP_CPU_ALLOC_ON_STACK(mask); 1710b57cec5SDimitry Andric KMP_CPU_ZERO(mask); 1720b57cec5SDimitry Andric KMP_CPU_SET(which, mask); 1730b57cec5SDimitry Andric __kmp_set_system_affinity(mask, TRUE); 1740b57cec5SDimitry Andric KMP_CPU_FREE_FROM_STACK(mask); 1750b57cec5SDimitry Andric } 1760b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 1770b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 1780b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 1790b57cec5SDimitry Andric return new Mask[num]; 1800b57cec5SDimitry Andric } 1810b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 1820b57cec5SDimitry Andric Mask *hwloc_array = static_cast<Mask *>(array); 1830b57cec5SDimitry Andric delete[] hwloc_array; 1840b57cec5SDimitry Andric } 1850b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 1860b57cec5SDimitry Andric int index) override { 1870b57cec5SDimitry Andric Mask *hwloc_array = static_cast<Mask *>(array); 1880b57cec5SDimitry Andric return &(hwloc_array[index]); 1890b57cec5SDimitry Andric } 1900b57cec5SDimitry Andric api_type get_api_type() const override { return HWLOC; } 1910b57cec5SDimitry Andric }; 1920b57cec5SDimitry Andric #endif /* KMP_USE_HWLOC */ 1930b57cec5SDimitry Andric 194*439352acSDimitry Andric #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_AIX 1950b57cec5SDimitry Andric #if KMP_OS_LINUX 1960b57cec5SDimitry Andric /* On some of the older OS's that we build on, these constants aren't present 1970b57cec5SDimitry Andric in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on 1980b57cec5SDimitry Andric all systems of the same arch where they are defined, and they cannot change. 1990b57cec5SDimitry Andric stone forever. */ 2000b57cec5SDimitry Andric #include <sys/syscall.h> 2010b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_ARM 2020b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2030b57cec5SDimitry Andric #define __NR_sched_setaffinity 241 2040b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 241 2050b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2060b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2070b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2080b57cec5SDimitry Andric #define __NR_sched_getaffinity 242 2090b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 242 2100b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2110b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2120b57cec5SDimitry Andric #elif KMP_ARCH_AARCH64 2130b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2140b57cec5SDimitry Andric #define __NR_sched_setaffinity 122 2150b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 122 2160b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2170b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2180b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2190b57cec5SDimitry Andric #define __NR_sched_getaffinity 123 2200b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 123 2210b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2220b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2230b57cec5SDimitry Andric #elif KMP_ARCH_X86_64 2240b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2250b57cec5SDimitry Andric #define __NR_sched_setaffinity 203 2260b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 203 2270b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2280b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2290b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2300b57cec5SDimitry Andric #define __NR_sched_getaffinity 204 2310b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 204 2320b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2330b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2340b57cec5SDimitry Andric #elif KMP_ARCH_PPC64 2350b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2360b57cec5SDimitry Andric #define __NR_sched_setaffinity 222 2370b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 222 2380b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2390b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2400b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2410b57cec5SDimitry Andric #define __NR_sched_getaffinity 223 2420b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 223 2430b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2440b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2450b57cec5SDimitry Andric #elif KMP_ARCH_MIPS 2460b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2470b57cec5SDimitry Andric #define __NR_sched_setaffinity 4239 2480b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 4239 2490b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2500b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2510b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2520b57cec5SDimitry Andric #define __NR_sched_getaffinity 4240 2530b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 4240 2540b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2550b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2560b57cec5SDimitry Andric #elif KMP_ARCH_MIPS64 2570b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2580b57cec5SDimitry Andric #define __NR_sched_setaffinity 5195 2590b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 5195 2600b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2610b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2620b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2630b57cec5SDimitry Andric #define __NR_sched_getaffinity 5196 2640b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 5196 2650b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2660b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 267bdd1243dSDimitry Andric #elif KMP_ARCH_LOONGARCH64 268bdd1243dSDimitry Andric #ifndef __NR_sched_setaffinity 269bdd1243dSDimitry Andric #define __NR_sched_setaffinity 122 270bdd1243dSDimitry Andric #elif __NR_sched_setaffinity != 122 271bdd1243dSDimitry Andric #error Wrong code for setaffinity system call. 272bdd1243dSDimitry Andric #endif /* __NR_sched_setaffinity */ 273bdd1243dSDimitry Andric #ifndef __NR_sched_getaffinity 274bdd1243dSDimitry Andric #define __NR_sched_getaffinity 123 275bdd1243dSDimitry Andric #elif __NR_sched_getaffinity != 123 276bdd1243dSDimitry Andric #error Wrong code for getaffinity system call. 277bdd1243dSDimitry Andric #endif /* __NR_sched_getaffinity */ 278bdd1243dSDimitry Andric #elif KMP_ARCH_RISCV64 279bdd1243dSDimitry Andric #ifndef __NR_sched_setaffinity 280bdd1243dSDimitry Andric #define __NR_sched_setaffinity 122 281bdd1243dSDimitry Andric #elif __NR_sched_setaffinity != 122 282bdd1243dSDimitry Andric #error Wrong code for setaffinity system call. 283bdd1243dSDimitry Andric #endif /* __NR_sched_setaffinity */ 284bdd1243dSDimitry Andric #ifndef __NR_sched_getaffinity 285bdd1243dSDimitry Andric #define __NR_sched_getaffinity 123 286bdd1243dSDimitry Andric #elif __NR_sched_getaffinity != 123 287bdd1243dSDimitry Andric #error Wrong code for getaffinity system call. 288bdd1243dSDimitry Andric #endif /* __NR_sched_getaffinity */ 2895f757f3fSDimitry Andric #elif KMP_ARCH_VE 2905f757f3fSDimitry Andric #ifndef __NR_sched_setaffinity 2915f757f3fSDimitry Andric #define __NR_sched_setaffinity 203 2925f757f3fSDimitry Andric #elif __NR_sched_setaffinity != 203 2935f757f3fSDimitry Andric #error Wrong code for setaffinity system call. 2945f757f3fSDimitry Andric #endif /* __NR_sched_setaffinity */ 2955f757f3fSDimitry Andric #ifndef __NR_sched_getaffinity 2965f757f3fSDimitry Andric #define __NR_sched_getaffinity 204 2975f757f3fSDimitry Andric #elif __NR_sched_getaffinity != 204 2985f757f3fSDimitry Andric #error Wrong code for getaffinity system call. 2995f757f3fSDimitry Andric #endif /* __NR_sched_getaffinity */ 3005f757f3fSDimitry Andric #elif KMP_ARCH_S390X 3015f757f3fSDimitry Andric #ifndef __NR_sched_setaffinity 3025f757f3fSDimitry Andric #define __NR_sched_setaffinity 239 3035f757f3fSDimitry Andric #elif __NR_sched_setaffinity != 239 3045f757f3fSDimitry Andric #error Wrong code for setaffinity system call. 3055f757f3fSDimitry Andric #endif /* __NR_sched_setaffinity */ 3065f757f3fSDimitry Andric #ifndef __NR_sched_getaffinity 3075f757f3fSDimitry Andric #define __NR_sched_getaffinity 240 3085f757f3fSDimitry Andric #elif __NR_sched_getaffinity != 240 3095f757f3fSDimitry Andric #error Wrong code for getaffinity system call. 3105f757f3fSDimitry Andric #endif /* __NR_sched_getaffinity */ 311bdd1243dSDimitry Andric #else 3120b57cec5SDimitry Andric #error Unknown or unsupported architecture 3130b57cec5SDimitry Andric #endif /* KMP_ARCH_* */ 314489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD 315489b1cf2SDimitry Andric #include <pthread.h> 316489b1cf2SDimitry Andric #include <pthread_np.h> 317*439352acSDimitry Andric #elif KMP_OS_AIX 318*439352acSDimitry Andric #include <sys/dr.h> 319*439352acSDimitry Andric #include <sys/rset.h> 320*439352acSDimitry Andric #define VMI_MAXRADS 64 // Maximum number of RADs allowed by AIX. 321489b1cf2SDimitry Andric #endif 3220b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity { 3230b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 324e8d8bef9SDimitry Andric typedef unsigned long mask_t; 325e8d8bef9SDimitry Andric typedef decltype(__kmp_affin_mask_size) mask_size_type; 326e8d8bef9SDimitry Andric static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 327e8d8bef9SDimitry Andric static const mask_t ONE = 1; 328e8d8bef9SDimitry Andric mask_size_type get_num_mask_types() const { 329e8d8bef9SDimitry Andric return __kmp_affin_mask_size / sizeof(mask_t); 330e8d8bef9SDimitry Andric } 3310b57cec5SDimitry Andric 3320b57cec5SDimitry Andric public: 3330b57cec5SDimitry Andric mask_t *mask; 3340b57cec5SDimitry Andric Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); } 3350b57cec5SDimitry Andric ~Mask() { 3360b57cec5SDimitry Andric if (mask) 3370b57cec5SDimitry Andric __kmp_free(mask); 3380b57cec5SDimitry Andric } 3390b57cec5SDimitry Andric void set(int i) override { 340e8d8bef9SDimitry Andric mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T)); 3410b57cec5SDimitry Andric } 3420b57cec5SDimitry Andric bool is_set(int i) const override { 343e8d8bef9SDimitry Andric return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T))); 3440b57cec5SDimitry Andric } 3450b57cec5SDimitry Andric void clear(int i) override { 346e8d8bef9SDimitry Andric mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T)); 3470b57cec5SDimitry Andric } 3480b57cec5SDimitry Andric void zero() override { 349e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 350e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 351e8d8bef9SDimitry Andric mask[i] = (mask_t)0; 3520b57cec5SDimitry Andric } 3535f757f3fSDimitry Andric bool empty() const override { 3545f757f3fSDimitry Andric mask_size_type e = get_num_mask_types(); 3555f757f3fSDimitry Andric for (mask_size_type i = 0; i < e; ++i) 3565f757f3fSDimitry Andric if (mask[i] != (mask_t)0) 3575f757f3fSDimitry Andric return false; 3585f757f3fSDimitry Andric return true; 3595f757f3fSDimitry Andric } 3600b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 3610b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 362e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 363e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 3640b57cec5SDimitry Andric mask[i] = convert->mask[i]; 3650b57cec5SDimitry Andric } 3660b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 3670b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 368e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 369e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 3700b57cec5SDimitry Andric mask[i] &= convert->mask[i]; 3710b57cec5SDimitry Andric } 3720b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 3730b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 374e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 375e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 3760b57cec5SDimitry Andric mask[i] |= convert->mask[i]; 3770b57cec5SDimitry Andric } 3780b57cec5SDimitry Andric void bitwise_not() override { 379e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 380e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 3810b57cec5SDimitry Andric mask[i] = ~(mask[i]); 3820b57cec5SDimitry Andric } 3835f757f3fSDimitry Andric bool is_equal(const KMPAffinity::Mask *rhs) const override { 3845f757f3fSDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 3855f757f3fSDimitry Andric mask_size_type e = get_num_mask_types(); 3865f757f3fSDimitry Andric for (mask_size_type i = 0; i < e; ++i) 3875f757f3fSDimitry Andric if (mask[i] != convert->mask[i]) 3885f757f3fSDimitry Andric return false; 3895f757f3fSDimitry Andric return true; 3905f757f3fSDimitry Andric } 3910b57cec5SDimitry Andric int begin() const override { 3920b57cec5SDimitry Andric int retval = 0; 3930b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 3940b57cec5SDimitry Andric ++retval; 3950b57cec5SDimitry Andric return retval; 3960b57cec5SDimitry Andric } 397e8d8bef9SDimitry Andric int end() const override { 398e8d8bef9SDimitry Andric int e; 399e8d8bef9SDimitry Andric __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e); 400e8d8bef9SDimitry Andric return e; 401e8d8bef9SDimitry Andric } 4020b57cec5SDimitry Andric int next(int previous) const override { 4030b57cec5SDimitry Andric int retval = previous + 1; 4040b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 4050b57cec5SDimitry Andric ++retval; 4060b57cec5SDimitry Andric return retval; 4070b57cec5SDimitry Andric } 408*439352acSDimitry Andric #if KMP_OS_AIX 409*439352acSDimitry Andric // On AIX, we don't have a way to get CPU(s) a thread is bound to. 410*439352acSDimitry Andric // This routine is only used to get the full mask. 411*439352acSDimitry Andric int get_system_affinity(bool abort_on_error) override { 412*439352acSDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 413*439352acSDimitry Andric "Illegal get affinity operation when not capable"); 414*439352acSDimitry Andric 415*439352acSDimitry Andric (void)abort_on_error; 416*439352acSDimitry Andric 417*439352acSDimitry Andric // Set the mask with all CPUs that are available. 418*439352acSDimitry Andric for (int i = 0; i < __kmp_xproc; ++i) 419*439352acSDimitry Andric KMP_CPU_SET(i, this); 420*439352acSDimitry Andric return 0; 421*439352acSDimitry Andric } 422*439352acSDimitry Andric int set_system_affinity(bool abort_on_error) const override { 423*439352acSDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 424*439352acSDimitry Andric 425*439352acSDimitry Andric "Illegal set affinity operation when not capable"); 426*439352acSDimitry Andric 427*439352acSDimitry Andric int location; 428*439352acSDimitry Andric int gtid = __kmp_entry_gtid(); 429*439352acSDimitry Andric int tid = thread_self(); 430*439352acSDimitry Andric 431*439352acSDimitry Andric // Unbind the thread if it was bound to any processors before so that 432*439352acSDimitry Andric // we can bind the thread to CPUs specified by the mask not others. 433*439352acSDimitry Andric int retval = bindprocessor(BINDTHREAD, tid, PROCESSOR_CLASS_ANY); 434*439352acSDimitry Andric 435*439352acSDimitry Andric // On AIX, we can only bind to one instead of a set of CPUs with the 436*439352acSDimitry Andric // bindprocessor() system call. 437*439352acSDimitry Andric KMP_CPU_SET_ITERATE(location, this) { 438*439352acSDimitry Andric if (KMP_CPU_ISSET(location, this)) { 439*439352acSDimitry Andric retval = bindprocessor(BINDTHREAD, tid, location); 440*439352acSDimitry Andric if (retval == -1 && errno == 1) { 441*439352acSDimitry Andric rsid_t rsid; 442*439352acSDimitry Andric rsethandle_t rsh; 443*439352acSDimitry Andric // Put something in rsh to prevent compiler warning 444*439352acSDimitry Andric // about uninitalized use 445*439352acSDimitry Andric rsh = rs_alloc(RS_EMPTY); 446*439352acSDimitry Andric rsid.at_pid = getpid(); 447*439352acSDimitry Andric if (RS_DEFAULT_RSET != ra_getrset(R_PROCESS, rsid, 0, rsh)) { 448*439352acSDimitry Andric retval = ra_detachrset(R_PROCESS, rsid, 0); 449*439352acSDimitry Andric retval = bindprocessor(BINDTHREAD, tid, location); 450*439352acSDimitry Andric } 451*439352acSDimitry Andric } 452*439352acSDimitry Andric if (retval == 0) { 453*439352acSDimitry Andric KA_TRACE(10, ("__kmp_set_system_affinity: Done binding " 454*439352acSDimitry Andric "T#%d to cpu=%d.\n", 455*439352acSDimitry Andric gtid, location)); 456*439352acSDimitry Andric continue; 457*439352acSDimitry Andric } 458*439352acSDimitry Andric int error = errno; 459*439352acSDimitry Andric if (abort_on_error) { 460*439352acSDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "bindprocessor()"), 461*439352acSDimitry Andric KMP_ERR(error), __kmp_msg_null); 462*439352acSDimitry Andric KA_TRACE(10, ("__kmp_set_system_affinity: Error binding " 463*439352acSDimitry Andric "T#%d to cpu=%d, errno=%d.\n", 464*439352acSDimitry Andric gtid, location, error)); 465*439352acSDimitry Andric return error; 466*439352acSDimitry Andric } 467*439352acSDimitry Andric } 468*439352acSDimitry Andric } 469*439352acSDimitry Andric return 0; 470*439352acSDimitry Andric } 471*439352acSDimitry Andric #else // !KMP_OS_AIX 4720b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 4730b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 4740b57cec5SDimitry Andric "Illegal get affinity operation when not capable"); 475489b1cf2SDimitry Andric #if KMP_OS_LINUX 476e8d8bef9SDimitry Andric long retval = 4770b57cec5SDimitry Andric syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask); 478489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD 479fe6060f1SDimitry Andric int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, 480fe6060f1SDimitry Andric reinterpret_cast<cpuset_t *>(mask)); 4815ffd83dbSDimitry Andric int retval = (r == 0 ? 0 : -1); 482489b1cf2SDimitry Andric #endif 4830b57cec5SDimitry Andric if (retval >= 0) { 4840b57cec5SDimitry Andric return 0; 4850b57cec5SDimitry Andric } 4860b57cec5SDimitry Andric int error = errno; 4870b57cec5SDimitry Andric if (abort_on_error) { 48806c3fb27SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "pthread_getaffinity_np()"), 48906c3fb27SDimitry Andric KMP_ERR(error), __kmp_msg_null); 4900b57cec5SDimitry Andric } 4910b57cec5SDimitry Andric return error; 4920b57cec5SDimitry Andric } 4930b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 4940b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 495e8d8bef9SDimitry Andric "Illegal set affinity operation when not capable"); 496489b1cf2SDimitry Andric #if KMP_OS_LINUX 497e8d8bef9SDimitry Andric long retval = 4980b57cec5SDimitry Andric syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask); 499489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD 500fe6060f1SDimitry Andric int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, 501fe6060f1SDimitry Andric reinterpret_cast<cpuset_t *>(mask)); 5025ffd83dbSDimitry Andric int retval = (r == 0 ? 0 : -1); 503489b1cf2SDimitry Andric #endif 5040b57cec5SDimitry Andric if (retval >= 0) { 5050b57cec5SDimitry Andric return 0; 5060b57cec5SDimitry Andric } 5070b57cec5SDimitry Andric int error = errno; 5080b57cec5SDimitry Andric if (abort_on_error) { 50906c3fb27SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "pthread_setaffinity_np()"), 51006c3fb27SDimitry Andric KMP_ERR(error), __kmp_msg_null); 5110b57cec5SDimitry Andric } 5120b57cec5SDimitry Andric return error; 5130b57cec5SDimitry Andric } 514*439352acSDimitry Andric #endif // KMP_OS_AIX 5150b57cec5SDimitry Andric }; 5160b57cec5SDimitry Andric void determine_capable(const char *env_var) override { 5170b57cec5SDimitry Andric __kmp_affinity_determine_capable(env_var); 5180b57cec5SDimitry Andric } 5190b57cec5SDimitry Andric void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 5200b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { 5210b57cec5SDimitry Andric KMPNativeAffinity::Mask *retval = new Mask(); 5220b57cec5SDimitry Andric return retval; 5230b57cec5SDimitry Andric } 5240b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { 5250b57cec5SDimitry Andric KMPNativeAffinity::Mask *native_mask = 5260b57cec5SDimitry Andric static_cast<KMPNativeAffinity::Mask *>(m); 5270b57cec5SDimitry Andric delete native_mask; 5280b57cec5SDimitry Andric } 5290b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 5300b57cec5SDimitry Andric return new Mask[num]; 5310b57cec5SDimitry Andric } 5320b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 5330b57cec5SDimitry Andric Mask *linux_array = static_cast<Mask *>(array); 5340b57cec5SDimitry Andric delete[] linux_array; 5350b57cec5SDimitry Andric } 5360b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 5370b57cec5SDimitry Andric int index) override { 5380b57cec5SDimitry Andric Mask *linux_array = static_cast<Mask *>(array); 5390b57cec5SDimitry Andric return &(linux_array[index]); 5400b57cec5SDimitry Andric } 5410b57cec5SDimitry Andric api_type get_api_type() const override { return NATIVE_OS; } 5420b57cec5SDimitry Andric }; 543*439352acSDimitry Andric #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_AIX */ 5440b57cec5SDimitry Andric 5450b57cec5SDimitry Andric #if KMP_OS_WINDOWS 5460b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity { 5470b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 5480b57cec5SDimitry Andric typedef ULONG_PTR mask_t; 5490b57cec5SDimitry Andric static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 5500b57cec5SDimitry Andric mask_t *mask; 5510b57cec5SDimitry Andric 5520b57cec5SDimitry Andric public: 5530b57cec5SDimitry Andric Mask() { 5540b57cec5SDimitry Andric mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups); 5550b57cec5SDimitry Andric } 5560b57cec5SDimitry Andric ~Mask() { 5570b57cec5SDimitry Andric if (mask) 5580b57cec5SDimitry Andric __kmp_free(mask); 5590b57cec5SDimitry Andric } 5600b57cec5SDimitry Andric void set(int i) override { 5610b57cec5SDimitry Andric mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); 5620b57cec5SDimitry Andric } 5630b57cec5SDimitry Andric bool is_set(int i) const override { 5640b57cec5SDimitry Andric return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); 5650b57cec5SDimitry Andric } 5660b57cec5SDimitry Andric void clear(int i) override { 5670b57cec5SDimitry Andric mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); 5680b57cec5SDimitry Andric } 5690b57cec5SDimitry Andric void zero() override { 5700b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 5710b57cec5SDimitry Andric mask[i] = 0; 5720b57cec5SDimitry Andric } 5735f757f3fSDimitry Andric bool empty() const override { 5745f757f3fSDimitry Andric for (size_t i = 0; i < __kmp_num_proc_groups; ++i) 5755f757f3fSDimitry Andric if (mask[i]) 5765f757f3fSDimitry Andric return false; 5775f757f3fSDimitry Andric return true; 5785f757f3fSDimitry Andric } 5790b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 5800b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 5810b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 5820b57cec5SDimitry Andric mask[i] = convert->mask[i]; 5830b57cec5SDimitry Andric } 5840b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 5850b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 5860b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 5870b57cec5SDimitry Andric mask[i] &= convert->mask[i]; 5880b57cec5SDimitry Andric } 5890b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 5900b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 5910b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 5920b57cec5SDimitry Andric mask[i] |= convert->mask[i]; 5930b57cec5SDimitry Andric } 5940b57cec5SDimitry Andric void bitwise_not() override { 5950b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 5960b57cec5SDimitry Andric mask[i] = ~(mask[i]); 5970b57cec5SDimitry Andric } 5985f757f3fSDimitry Andric bool is_equal(const KMPAffinity::Mask *rhs) const override { 5995f757f3fSDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 6005f757f3fSDimitry Andric for (size_t i = 0; i < __kmp_num_proc_groups; ++i) 6015f757f3fSDimitry Andric if (mask[i] != convert->mask[i]) 6025f757f3fSDimitry Andric return false; 6035f757f3fSDimitry Andric return true; 6045f757f3fSDimitry Andric } 6050b57cec5SDimitry Andric int begin() const override { 6060b57cec5SDimitry Andric int retval = 0; 6070b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 6080b57cec5SDimitry Andric ++retval; 6090b57cec5SDimitry Andric return retval; 6100b57cec5SDimitry Andric } 6110b57cec5SDimitry Andric int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; } 6120b57cec5SDimitry Andric int next(int previous) const override { 6130b57cec5SDimitry Andric int retval = previous + 1; 6140b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 6150b57cec5SDimitry Andric ++retval; 6160b57cec5SDimitry Andric return retval; 6170b57cec5SDimitry Andric } 618e8d8bef9SDimitry Andric int set_process_affinity(bool abort_on_error) const override { 619e8d8bef9SDimitry Andric if (__kmp_num_proc_groups <= 1) { 620e8d8bef9SDimitry Andric if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) { 621e8d8bef9SDimitry Andric DWORD error = GetLastError(); 622e8d8bef9SDimitry Andric if (abort_on_error) { 623e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 624e8d8bef9SDimitry Andric __kmp_msg_null); 625e8d8bef9SDimitry Andric } 626e8d8bef9SDimitry Andric return error; 627e8d8bef9SDimitry Andric } 628e8d8bef9SDimitry Andric } 629e8d8bef9SDimitry Andric return 0; 630e8d8bef9SDimitry Andric } 6310b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 6320b57cec5SDimitry Andric if (__kmp_num_proc_groups > 1) { 6330b57cec5SDimitry Andric // Check for a valid mask. 6340b57cec5SDimitry Andric GROUP_AFFINITY ga; 6350b57cec5SDimitry Andric int group = get_proc_group(); 6360b57cec5SDimitry Andric if (group < 0) { 6370b57cec5SDimitry Andric if (abort_on_error) { 6380b57cec5SDimitry Andric KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); 6390b57cec5SDimitry Andric } 6400b57cec5SDimitry Andric return -1; 6410b57cec5SDimitry Andric } 6420b57cec5SDimitry Andric // Transform the bit vector into a GROUP_AFFINITY struct 6430b57cec5SDimitry Andric // and make the system call to set affinity. 6440b57cec5SDimitry Andric ga.Group = group; 6450b57cec5SDimitry Andric ga.Mask = mask[group]; 6460b57cec5SDimitry Andric ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; 6470b57cec5SDimitry Andric 6480b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); 6490b57cec5SDimitry Andric if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { 6500b57cec5SDimitry Andric DWORD error = GetLastError(); 6510b57cec5SDimitry Andric if (abort_on_error) { 6520b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 6530b57cec5SDimitry Andric __kmp_msg_null); 6540b57cec5SDimitry Andric } 6550b57cec5SDimitry Andric return error; 6560b57cec5SDimitry Andric } 6570b57cec5SDimitry Andric } else { 6580b57cec5SDimitry Andric if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) { 6590b57cec5SDimitry Andric DWORD error = GetLastError(); 6600b57cec5SDimitry Andric if (abort_on_error) { 6610b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 6620b57cec5SDimitry Andric __kmp_msg_null); 6630b57cec5SDimitry Andric } 6640b57cec5SDimitry Andric return error; 6650b57cec5SDimitry Andric } 6660b57cec5SDimitry Andric } 6670b57cec5SDimitry Andric return 0; 6680b57cec5SDimitry Andric } 6690b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 6700b57cec5SDimitry Andric if (__kmp_num_proc_groups > 1) { 6710b57cec5SDimitry Andric this->zero(); 6720b57cec5SDimitry Andric GROUP_AFFINITY ga; 6730b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); 6740b57cec5SDimitry Andric if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { 6750b57cec5SDimitry Andric DWORD error = GetLastError(); 6760b57cec5SDimitry Andric if (abort_on_error) { 6770b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), 6780b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 6790b57cec5SDimitry Andric } 6800b57cec5SDimitry Andric return error; 6810b57cec5SDimitry Andric } 6820b57cec5SDimitry Andric if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || 6830b57cec5SDimitry Andric (ga.Mask == 0)) { 6840b57cec5SDimitry Andric return -1; 6850b57cec5SDimitry Andric } 6860b57cec5SDimitry Andric mask[ga.Group] = ga.Mask; 6870b57cec5SDimitry Andric } else { 6880b57cec5SDimitry Andric mask_t newMask, sysMask, retval; 6890b57cec5SDimitry Andric if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { 6900b57cec5SDimitry Andric DWORD error = GetLastError(); 6910b57cec5SDimitry Andric if (abort_on_error) { 6920b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"), 6930b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 6940b57cec5SDimitry Andric } 6950b57cec5SDimitry Andric return error; 6960b57cec5SDimitry Andric } 6970b57cec5SDimitry Andric retval = SetThreadAffinityMask(GetCurrentThread(), newMask); 6980b57cec5SDimitry Andric if (!retval) { 6990b57cec5SDimitry Andric DWORD error = GetLastError(); 7000b57cec5SDimitry Andric if (abort_on_error) { 7010b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 7020b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 7030b57cec5SDimitry Andric } 7040b57cec5SDimitry Andric return error; 7050b57cec5SDimitry Andric } 7060b57cec5SDimitry Andric newMask = SetThreadAffinityMask(GetCurrentThread(), retval); 7070b57cec5SDimitry Andric if (!newMask) { 7080b57cec5SDimitry Andric DWORD error = GetLastError(); 7090b57cec5SDimitry Andric if (abort_on_error) { 7100b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 7110b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 7120b57cec5SDimitry Andric } 7130b57cec5SDimitry Andric } 7140b57cec5SDimitry Andric *mask = retval; 7150b57cec5SDimitry Andric } 7160b57cec5SDimitry Andric return 0; 7170b57cec5SDimitry Andric } 7180b57cec5SDimitry Andric int get_proc_group() const override { 7190b57cec5SDimitry Andric int group = -1; 7200b57cec5SDimitry Andric if (__kmp_num_proc_groups == 1) { 7210b57cec5SDimitry Andric return 1; 7220b57cec5SDimitry Andric } 7230b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; i++) { 7240b57cec5SDimitry Andric if (mask[i] == 0) 7250b57cec5SDimitry Andric continue; 7260b57cec5SDimitry Andric if (group >= 0) 7270b57cec5SDimitry Andric return -1; 7280b57cec5SDimitry Andric group = i; 7290b57cec5SDimitry Andric } 7300b57cec5SDimitry Andric return group; 7310b57cec5SDimitry Andric } 7320b57cec5SDimitry Andric }; 7330b57cec5SDimitry Andric void determine_capable(const char *env_var) override { 7340b57cec5SDimitry Andric __kmp_affinity_determine_capable(env_var); 7350b57cec5SDimitry Andric } 7360b57cec5SDimitry Andric void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 7370b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 7380b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 7390b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 7400b57cec5SDimitry Andric return new Mask[num]; 7410b57cec5SDimitry Andric } 7420b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 7430b57cec5SDimitry Andric Mask *windows_array = static_cast<Mask *>(array); 7440b57cec5SDimitry Andric delete[] windows_array; 7450b57cec5SDimitry Andric } 7460b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 7470b57cec5SDimitry Andric int index) override { 7480b57cec5SDimitry Andric Mask *windows_array = static_cast<Mask *>(array); 7490b57cec5SDimitry Andric return &(windows_array[index]); 7500b57cec5SDimitry Andric } 7510b57cec5SDimitry Andric api_type get_api_type() const override { return NATIVE_OS; } 7520b57cec5SDimitry Andric }; 7530b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 7540b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 7550b57cec5SDimitry Andric 7560eae32dcSDimitry Andric // Describe an attribute for a level in the machine topology 7570eae32dcSDimitry Andric struct kmp_hw_attr_t { 7580eae32dcSDimitry Andric int core_type : 8; 7590eae32dcSDimitry Andric int core_eff : 8; 7600eae32dcSDimitry Andric unsigned valid : 1; 7610eae32dcSDimitry Andric unsigned reserved : 15; 7620eae32dcSDimitry Andric 7630eae32dcSDimitry Andric static const int UNKNOWN_CORE_EFF = -1; 7640eae32dcSDimitry Andric 7650eae32dcSDimitry Andric kmp_hw_attr_t() 7660eae32dcSDimitry Andric : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF), 7670eae32dcSDimitry Andric valid(0), reserved(0) {} 7680eae32dcSDimitry Andric void set_core_type(kmp_hw_core_type_t type) { 7690eae32dcSDimitry Andric valid = 1; 7700eae32dcSDimitry Andric core_type = type; 7710eae32dcSDimitry Andric } 7720eae32dcSDimitry Andric void set_core_eff(int eff) { 7730eae32dcSDimitry Andric valid = 1; 7740eae32dcSDimitry Andric core_eff = eff; 7750eae32dcSDimitry Andric } 7760eae32dcSDimitry Andric kmp_hw_core_type_t get_core_type() const { 7770eae32dcSDimitry Andric return (kmp_hw_core_type_t)core_type; 7780eae32dcSDimitry Andric } 7790eae32dcSDimitry Andric int get_core_eff() const { return core_eff; } 7800eae32dcSDimitry Andric bool is_core_type_valid() const { 7810eae32dcSDimitry Andric return core_type != KMP_HW_CORE_TYPE_UNKNOWN; 7820eae32dcSDimitry Andric } 7830eae32dcSDimitry Andric bool is_core_eff_valid() const { return core_eff != UNKNOWN_CORE_EFF; } 7840eae32dcSDimitry Andric operator bool() const { return valid; } 7850eae32dcSDimitry Andric void clear() { 7860eae32dcSDimitry Andric core_type = KMP_HW_CORE_TYPE_UNKNOWN; 7870eae32dcSDimitry Andric core_eff = UNKNOWN_CORE_EFF; 7880eae32dcSDimitry Andric valid = 0; 7890eae32dcSDimitry Andric } 7900eae32dcSDimitry Andric bool contains(const kmp_hw_attr_t &other) const { 7910eae32dcSDimitry Andric if (!valid && !other.valid) 7920eae32dcSDimitry Andric return true; 7930eae32dcSDimitry Andric if (valid && other.valid) { 7940eae32dcSDimitry Andric if (other.is_core_type_valid()) { 7950eae32dcSDimitry Andric if (!is_core_type_valid() || (get_core_type() != other.get_core_type())) 7960eae32dcSDimitry Andric return false; 7970eae32dcSDimitry Andric } 7980eae32dcSDimitry Andric if (other.is_core_eff_valid()) { 7990eae32dcSDimitry Andric if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff())) 8000eae32dcSDimitry Andric return false; 8010eae32dcSDimitry Andric } 8020eae32dcSDimitry Andric return true; 8030eae32dcSDimitry Andric } 8040eae32dcSDimitry Andric return false; 8050eae32dcSDimitry Andric } 8065f757f3fSDimitry Andric #if KMP_AFFINITY_SUPPORTED 8075f757f3fSDimitry Andric bool contains(const kmp_affinity_attrs_t &attr) const { 8085f757f3fSDimitry Andric if (!valid && !attr.valid) 8095f757f3fSDimitry Andric return true; 8105f757f3fSDimitry Andric if (valid && attr.valid) { 8115f757f3fSDimitry Andric if (attr.core_type != KMP_HW_CORE_TYPE_UNKNOWN) 8125f757f3fSDimitry Andric return (is_core_type_valid() && 8135f757f3fSDimitry Andric (get_core_type() == (kmp_hw_core_type_t)attr.core_type)); 8145f757f3fSDimitry Andric if (attr.core_eff != UNKNOWN_CORE_EFF) 8155f757f3fSDimitry Andric return (is_core_eff_valid() && (get_core_eff() == attr.core_eff)); 8165f757f3fSDimitry Andric return true; 8175f757f3fSDimitry Andric } 8185f757f3fSDimitry Andric return false; 8195f757f3fSDimitry Andric } 8205f757f3fSDimitry Andric #endif // KMP_AFFINITY_SUPPORTED 8210eae32dcSDimitry Andric bool operator==(const kmp_hw_attr_t &rhs) const { 8220eae32dcSDimitry Andric return (rhs.valid == valid && rhs.core_eff == core_eff && 8230eae32dcSDimitry Andric rhs.core_type == core_type); 8240eae32dcSDimitry Andric } 8250eae32dcSDimitry Andric bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); } 8260eae32dcSDimitry Andric }; 827349cc55cSDimitry Andric 828bdd1243dSDimitry Andric #if KMP_AFFINITY_SUPPORTED 829bdd1243dSDimitry Andric KMP_BUILD_ASSERT(sizeof(kmp_hw_attr_t) == sizeof(kmp_affinity_attrs_t)); 830bdd1243dSDimitry Andric #endif 831bdd1243dSDimitry Andric 832fe6060f1SDimitry Andric class kmp_hw_thread_t { 8330b57cec5SDimitry Andric public: 834fe6060f1SDimitry Andric static const int UNKNOWN_ID = -1; 835bdd1243dSDimitry Andric static const int MULTIPLE_ID = -2; 836fe6060f1SDimitry Andric static int compare_ids(const void *a, const void *b); 837fe6060f1SDimitry Andric static int compare_compact(const void *a, const void *b); 838fe6060f1SDimitry Andric int ids[KMP_HW_LAST]; 839fe6060f1SDimitry Andric int sub_ids[KMP_HW_LAST]; 840fe6060f1SDimitry Andric bool leader; 841fe6060f1SDimitry Andric int os_id; 8420eae32dcSDimitry Andric kmp_hw_attr_t attrs; 843349cc55cSDimitry Andric 844fe6060f1SDimitry Andric void print() const; 845fe6060f1SDimitry Andric void clear() { 846fe6060f1SDimitry Andric for (int i = 0; i < (int)KMP_HW_LAST; ++i) 847fe6060f1SDimitry Andric ids[i] = UNKNOWN_ID; 848fe6060f1SDimitry Andric leader = false; 8490eae32dcSDimitry Andric attrs.clear(); 8500b57cec5SDimitry Andric } 8510b57cec5SDimitry Andric }; 8520b57cec5SDimitry Andric 853fe6060f1SDimitry Andric class kmp_topology_t { 854fe6060f1SDimitry Andric 855fe6060f1SDimitry Andric struct flags_t { 856fe6060f1SDimitry Andric int uniform : 1; 857fe6060f1SDimitry Andric int reserved : 31; 8580b57cec5SDimitry Andric }; 8590b57cec5SDimitry Andric 860fe6060f1SDimitry Andric int depth; 861fe6060f1SDimitry Andric 862349cc55cSDimitry Andric // The following arrays are all 'depth' long and have been 863349cc55cSDimitry Andric // allocated to hold up to KMP_HW_LAST number of objects if 864349cc55cSDimitry Andric // needed so layers can be added without reallocation of any array 865fe6060f1SDimitry Andric 866fe6060f1SDimitry Andric // Orderd array of the types in the topology 867fe6060f1SDimitry Andric kmp_hw_t *types; 868fe6060f1SDimitry Andric 869fe6060f1SDimitry Andric // Keep quick topology ratios, for non-uniform topologies, 870fe6060f1SDimitry Andric // this ratio holds the max number of itemAs per itemB 871fe6060f1SDimitry Andric // e.g., [ 4 packages | 6 cores / package | 2 threads / core ] 872fe6060f1SDimitry Andric int *ratio; 873fe6060f1SDimitry Andric 874fe6060f1SDimitry Andric // Storage containing the absolute number of each topology layer 875fe6060f1SDimitry Andric int *count; 876fe6060f1SDimitry Andric 8770eae32dcSDimitry Andric // The number of core efficiencies. This is only useful for hybrid 8780eae32dcSDimitry Andric // topologies. Core efficiencies will range from 0 to num efficiencies - 1 8790eae32dcSDimitry Andric int num_core_efficiencies; 8800eae32dcSDimitry Andric int num_core_types; 881349cc55cSDimitry Andric kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES]; 882349cc55cSDimitry Andric 883fe6060f1SDimitry Andric // The hardware threads array 884fe6060f1SDimitry Andric // hw_threads is num_hw_threads long 885fe6060f1SDimitry Andric // Each hw_thread's ids and sub_ids are depth deep 886fe6060f1SDimitry Andric int num_hw_threads; 887fe6060f1SDimitry Andric kmp_hw_thread_t *hw_threads; 888fe6060f1SDimitry Andric 889fe6060f1SDimitry Andric // Equivalence hash where the key is the hardware topology item 890fe6060f1SDimitry Andric // and the value is the equivalent hardware topology type in the 891fe6060f1SDimitry Andric // types[] array, if the value is KMP_HW_UNKNOWN, then there is no 892fe6060f1SDimitry Andric // known equivalence for the topology type 893fe6060f1SDimitry Andric kmp_hw_t equivalent[KMP_HW_LAST]; 894fe6060f1SDimitry Andric 895fe6060f1SDimitry Andric // Flags describing the topology 896fe6060f1SDimitry Andric flags_t flags; 897fe6060f1SDimitry Andric 898bdd1243dSDimitry Andric // Compact value used during sort_compact() 899bdd1243dSDimitry Andric int compact; 900bdd1243dSDimitry Andric 901349cc55cSDimitry Andric // Insert a new topology layer after allocation 902349cc55cSDimitry Andric void _insert_layer(kmp_hw_t type, const int *ids); 903349cc55cSDimitry Andric 904349cc55cSDimitry Andric #if KMP_GROUP_AFFINITY 905349cc55cSDimitry Andric // Insert topology information about Windows Processor groups 906349cc55cSDimitry Andric void _insert_windows_proc_groups(); 907349cc55cSDimitry Andric #endif 908349cc55cSDimitry Andric 909fe6060f1SDimitry Andric // Count each item & get the num x's per y 910fe6060f1SDimitry Andric // e.g., get the number of cores and the number of threads per core 911fe6060f1SDimitry Andric // for each (x, y) in (KMP_HW_* , KMP_HW_*) 912fe6060f1SDimitry Andric void _gather_enumeration_information(); 913fe6060f1SDimitry Andric 914fe6060f1SDimitry Andric // Remove layers that don't add information to the topology. 915fe6060f1SDimitry Andric // This is done by having the layer take on the id = UNKNOWN_ID (-1) 916fe6060f1SDimitry Andric void _remove_radix1_layers(); 917fe6060f1SDimitry Andric 918fe6060f1SDimitry Andric // Find out if the topology is uniform 919fe6060f1SDimitry Andric void _discover_uniformity(); 920fe6060f1SDimitry Andric 921fe6060f1SDimitry Andric // Set all the sub_ids for each hardware thread 922fe6060f1SDimitry Andric void _set_sub_ids(); 923fe6060f1SDimitry Andric 924fe6060f1SDimitry Andric // Set global affinity variables describing the number of threads per 925fe6060f1SDimitry Andric // core, the number of packages, the number of cores per package, and 926fe6060f1SDimitry Andric // the number of cores. 927fe6060f1SDimitry Andric void _set_globals(); 928fe6060f1SDimitry Andric 929fe6060f1SDimitry Andric // Set the last level cache equivalent type 930fe6060f1SDimitry Andric void _set_last_level_cache(); 931fe6060f1SDimitry Andric 9320eae32dcSDimitry Andric // Return the number of cores with a particular attribute, 'attr'. 9330eae32dcSDimitry Andric // If 'find_all' is true, then find all cores on the machine, otherwise find 9340eae32dcSDimitry Andric // all cores per the layer 'above' 9350eae32dcSDimitry Andric int _get_ncores_with_attr(const kmp_hw_attr_t &attr, int above, 9360eae32dcSDimitry Andric bool find_all = false) const; 937349cc55cSDimitry Andric 938fe6060f1SDimitry Andric public: 939fe6060f1SDimitry Andric // Force use of allocate()/deallocate() 940fe6060f1SDimitry Andric kmp_topology_t() = delete; 941fe6060f1SDimitry Andric kmp_topology_t(const kmp_topology_t &t) = delete; 942fe6060f1SDimitry Andric kmp_topology_t(kmp_topology_t &&t) = delete; 943fe6060f1SDimitry Andric kmp_topology_t &operator=(const kmp_topology_t &t) = delete; 944fe6060f1SDimitry Andric kmp_topology_t &operator=(kmp_topology_t &&t) = delete; 945fe6060f1SDimitry Andric 946fe6060f1SDimitry Andric static kmp_topology_t *allocate(int nproc, int ndepth, const kmp_hw_t *types); 947fe6060f1SDimitry Andric static void deallocate(kmp_topology_t *); 948fe6060f1SDimitry Andric 949fe6060f1SDimitry Andric // Functions used in create_map() routines 950fe6060f1SDimitry Andric kmp_hw_thread_t &at(int index) { 951fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads); 952fe6060f1SDimitry Andric return hw_threads[index]; 953fe6060f1SDimitry Andric } 954fe6060f1SDimitry Andric const kmp_hw_thread_t &at(int index) const { 955fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads); 956fe6060f1SDimitry Andric return hw_threads[index]; 957fe6060f1SDimitry Andric } 958fe6060f1SDimitry Andric int get_num_hw_threads() const { return num_hw_threads; } 959fe6060f1SDimitry Andric void sort_ids() { 960fe6060f1SDimitry Andric qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t), 961fe6060f1SDimitry Andric kmp_hw_thread_t::compare_ids); 962fe6060f1SDimitry Andric } 963fe6060f1SDimitry Andric // Check if the hardware ids are unique, if they are 964fe6060f1SDimitry Andric // return true, otherwise return false 965fe6060f1SDimitry Andric bool check_ids() const; 966fe6060f1SDimitry Andric 967fe6060f1SDimitry Andric // Function to call after the create_map() routine 968fe6060f1SDimitry Andric void canonicalize(); 969fe6060f1SDimitry Andric void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores); 970fe6060f1SDimitry Andric 971fe6060f1SDimitry Andric // Functions used after canonicalize() called 972bdd1243dSDimitry Andric 973bdd1243dSDimitry Andric #if KMP_AFFINITY_SUPPORTED 974bdd1243dSDimitry Andric // Set the granularity for affinity settings 975bdd1243dSDimitry Andric void set_granularity(kmp_affinity_t &stgs) const; 9765f757f3fSDimitry Andric bool is_close(int hwt1, int hwt2, const kmp_affinity_t &stgs) const; 9775f757f3fSDimitry Andric bool restrict_to_mask(const kmp_affin_mask_t *mask); 978fe6060f1SDimitry Andric bool filter_hw_subset(); 9795f757f3fSDimitry Andric #endif 980fe6060f1SDimitry Andric bool is_uniform() const { return flags.uniform; } 981fe6060f1SDimitry Andric // Tell whether a type is a valid type in the topology 982fe6060f1SDimitry Andric // returns KMP_HW_UNKNOWN when there is no equivalent type 9835f757f3fSDimitry Andric kmp_hw_t get_equivalent_type(kmp_hw_t type) const { 9845f757f3fSDimitry Andric if (type == KMP_HW_UNKNOWN) 9855f757f3fSDimitry Andric return KMP_HW_UNKNOWN; 9865f757f3fSDimitry Andric return equivalent[type]; 9875f757f3fSDimitry Andric } 988fe6060f1SDimitry Andric // Set type1 = type2 989fe6060f1SDimitry Andric void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) { 990fe6060f1SDimitry Andric KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1); 991fe6060f1SDimitry Andric KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2); 992fe6060f1SDimitry Andric kmp_hw_t real_type2 = equivalent[type2]; 993fe6060f1SDimitry Andric if (real_type2 == KMP_HW_UNKNOWN) 994fe6060f1SDimitry Andric real_type2 = type2; 995fe6060f1SDimitry Andric equivalent[type1] = real_type2; 996fe6060f1SDimitry Andric // This loop is required since any of the types may have been set to 997fe6060f1SDimitry Andric // be equivalent to type1. They all must be checked and reset to type2. 998fe6060f1SDimitry Andric KMP_FOREACH_HW_TYPE(type) { 999fe6060f1SDimitry Andric if (equivalent[type] == type1) { 1000fe6060f1SDimitry Andric equivalent[type] = real_type2; 1001fe6060f1SDimitry Andric } 1002fe6060f1SDimitry Andric } 1003fe6060f1SDimitry Andric } 1004fe6060f1SDimitry Andric // Calculate number of types corresponding to level1 1005fe6060f1SDimitry Andric // per types corresponding to level2 (e.g., number of threads per core) 1006fe6060f1SDimitry Andric int calculate_ratio(int level1, int level2) const { 1007fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth); 1008fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth); 1009fe6060f1SDimitry Andric int r = 1; 1010fe6060f1SDimitry Andric for (int level = level1; level > level2; --level) 1011fe6060f1SDimitry Andric r *= ratio[level]; 1012fe6060f1SDimitry Andric return r; 1013fe6060f1SDimitry Andric } 1014fe6060f1SDimitry Andric int get_ratio(int level) const { 1015fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level >= 0 && level < depth); 1016fe6060f1SDimitry Andric return ratio[level]; 1017fe6060f1SDimitry Andric } 1018fe6060f1SDimitry Andric int get_depth() const { return depth; }; 1019fe6060f1SDimitry Andric kmp_hw_t get_type(int level) const { 1020fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level >= 0 && level < depth); 1021fe6060f1SDimitry Andric return types[level]; 1022fe6060f1SDimitry Andric } 1023fe6060f1SDimitry Andric int get_level(kmp_hw_t type) const { 1024fe6060f1SDimitry Andric KMP_DEBUG_ASSERT_VALID_HW_TYPE(type); 1025fe6060f1SDimitry Andric int eq_type = equivalent[type]; 1026fe6060f1SDimitry Andric if (eq_type == KMP_HW_UNKNOWN) 10270b57cec5SDimitry Andric return -1; 1028fe6060f1SDimitry Andric for (int i = 0; i < depth; ++i) 1029fe6060f1SDimitry Andric if (types[i] == eq_type) 1030fe6060f1SDimitry Andric return i; 1031fe6060f1SDimitry Andric return -1; 10320b57cec5SDimitry Andric } 1033fe6060f1SDimitry Andric int get_count(int level) const { 1034fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level >= 0 && level < depth); 1035fe6060f1SDimitry Andric return count[level]; 10360b57cec5SDimitry Andric } 10370eae32dcSDimitry Andric // Return the total number of cores with attribute 'attr' 10380eae32dcSDimitry Andric int get_ncores_with_attr(const kmp_hw_attr_t &attr) const { 10390eae32dcSDimitry Andric return _get_ncores_with_attr(attr, -1, true); 10400eae32dcSDimitry Andric } 10410eae32dcSDimitry Andric // Return the number of cores with attribute 10420eae32dcSDimitry Andric // 'attr' per topology level 'above' 10430eae32dcSDimitry Andric int get_ncores_with_attr_per(const kmp_hw_attr_t &attr, int above) const { 10440eae32dcSDimitry Andric return _get_ncores_with_attr(attr, above, false); 10450eae32dcSDimitry Andric } 10460eae32dcSDimitry Andric 1047fe6060f1SDimitry Andric #if KMP_AFFINITY_SUPPORTED 1048bdd1243dSDimitry Andric friend int kmp_hw_thread_t::compare_compact(const void *a, const void *b); 1049bdd1243dSDimitry Andric void sort_compact(kmp_affinity_t &affinity) { 1050bdd1243dSDimitry Andric compact = affinity.compact; 1051fe6060f1SDimitry Andric qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t), 1052fe6060f1SDimitry Andric kmp_hw_thread_t::compare_compact); 1053fe6060f1SDimitry Andric } 1054fe6060f1SDimitry Andric #endif 1055fe6060f1SDimitry Andric void print(const char *env_var = "KMP_AFFINITY") const; 1056fe6060f1SDimitry Andric void dump() const; 1057fe6060f1SDimitry Andric }; 1058349cc55cSDimitry Andric extern kmp_topology_t *__kmp_topology; 1059fe6060f1SDimitry Andric 1060fe6060f1SDimitry Andric class kmp_hw_subset_t { 10610eae32dcSDimitry Andric const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS; 10620eae32dcSDimitry Andric 1063fe6060f1SDimitry Andric public: 10640eae32dcSDimitry Andric // Describe a machine topology item in KMP_HW_SUBSET 1065fe6060f1SDimitry Andric struct item_t { 1066fe6060f1SDimitry Andric kmp_hw_t type; 10670eae32dcSDimitry Andric int num_attrs; 10680eae32dcSDimitry Andric int num[MAX_ATTRS]; 10690eae32dcSDimitry Andric int offset[MAX_ATTRS]; 10700eae32dcSDimitry Andric kmp_hw_attr_t attr[MAX_ATTRS]; 1071fe6060f1SDimitry Andric }; 10720eae32dcSDimitry Andric // Put parenthesis around max to avoid accidental use of Windows max macro. 10730eae32dcSDimitry Andric const static int USE_ALL = (std::numeric_limits<int>::max)(); 1074fe6060f1SDimitry Andric 1075fe6060f1SDimitry Andric private: 1076fe6060f1SDimitry Andric int depth; 1077fe6060f1SDimitry Andric int capacity; 1078fe6060f1SDimitry Andric item_t *items; 1079fe6060f1SDimitry Andric kmp_uint64 set; 1080fe6060f1SDimitry Andric bool absolute; 1081fe6060f1SDimitry Andric // The set must be able to handle up to KMP_HW_LAST number of layers 1082fe6060f1SDimitry Andric KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST); 1083349cc55cSDimitry Andric // Sorting the KMP_HW_SUBSET items to follow topology order 1084349cc55cSDimitry Andric // All unknown topology types will be at the beginning of the subset 1085349cc55cSDimitry Andric static int hw_subset_compare(const void *i1, const void *i2) { 1086349cc55cSDimitry Andric kmp_hw_t type1 = ((const item_t *)i1)->type; 1087349cc55cSDimitry Andric kmp_hw_t type2 = ((const item_t *)i2)->type; 1088349cc55cSDimitry Andric int level1 = __kmp_topology->get_level(type1); 1089349cc55cSDimitry Andric int level2 = __kmp_topology->get_level(type2); 1090349cc55cSDimitry Andric return level1 - level2; 1091349cc55cSDimitry Andric } 1092fe6060f1SDimitry Andric 1093fe6060f1SDimitry Andric public: 1094fe6060f1SDimitry Andric // Force use of allocate()/deallocate() 1095fe6060f1SDimitry Andric kmp_hw_subset_t() = delete; 1096fe6060f1SDimitry Andric kmp_hw_subset_t(const kmp_hw_subset_t &t) = delete; 1097fe6060f1SDimitry Andric kmp_hw_subset_t(kmp_hw_subset_t &&t) = delete; 1098fe6060f1SDimitry Andric kmp_hw_subset_t &operator=(const kmp_hw_subset_t &t) = delete; 1099fe6060f1SDimitry Andric kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) = delete; 1100fe6060f1SDimitry Andric 1101fe6060f1SDimitry Andric static kmp_hw_subset_t *allocate() { 1102fe6060f1SDimitry Andric int initial_capacity = 5; 1103fe6060f1SDimitry Andric kmp_hw_subset_t *retval = 1104fe6060f1SDimitry Andric (kmp_hw_subset_t *)__kmp_allocate(sizeof(kmp_hw_subset_t)); 1105fe6060f1SDimitry Andric retval->depth = 0; 1106fe6060f1SDimitry Andric retval->capacity = initial_capacity; 1107fe6060f1SDimitry Andric retval->set = 0ull; 1108fe6060f1SDimitry Andric retval->absolute = false; 1109fe6060f1SDimitry Andric retval->items = (item_t *)__kmp_allocate(sizeof(item_t) * initial_capacity); 1110fe6060f1SDimitry Andric return retval; 1111fe6060f1SDimitry Andric } 1112fe6060f1SDimitry Andric static void deallocate(kmp_hw_subset_t *subset) { 1113fe6060f1SDimitry Andric __kmp_free(subset->items); 1114fe6060f1SDimitry Andric __kmp_free(subset); 1115fe6060f1SDimitry Andric } 1116fe6060f1SDimitry Andric void set_absolute() { absolute = true; } 1117fe6060f1SDimitry Andric bool is_absolute() const { return absolute; } 11180eae32dcSDimitry Andric void push_back(int num, kmp_hw_t type, int offset, kmp_hw_attr_t attr) { 11190eae32dcSDimitry Andric for (int i = 0; i < depth; ++i) { 11200eae32dcSDimitry Andric // Found an existing item for this layer type 11210eae32dcSDimitry Andric // Add the num, offset, and attr to this item 11220eae32dcSDimitry Andric if (items[i].type == type) { 11230eae32dcSDimitry Andric int idx = items[i].num_attrs++; 11240eae32dcSDimitry Andric if ((size_t)idx >= MAX_ATTRS) 11250eae32dcSDimitry Andric return; 11260eae32dcSDimitry Andric items[i].num[idx] = num; 11270eae32dcSDimitry Andric items[i].offset[idx] = offset; 11280eae32dcSDimitry Andric items[i].attr[idx] = attr; 11290eae32dcSDimitry Andric return; 11300eae32dcSDimitry Andric } 11310eae32dcSDimitry Andric } 1132fe6060f1SDimitry Andric if (depth == capacity - 1) { 1133fe6060f1SDimitry Andric capacity *= 2; 1134fe6060f1SDimitry Andric item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity); 1135fe6060f1SDimitry Andric for (int i = 0; i < depth; ++i) 1136fe6060f1SDimitry Andric new_items[i] = items[i]; 1137fe6060f1SDimitry Andric __kmp_free(items); 1138fe6060f1SDimitry Andric items = new_items; 1139fe6060f1SDimitry Andric } 11400eae32dcSDimitry Andric items[depth].num_attrs = 1; 1141fe6060f1SDimitry Andric items[depth].type = type; 11420eae32dcSDimitry Andric items[depth].num[0] = num; 11430eae32dcSDimitry Andric items[depth].offset[0] = offset; 11440eae32dcSDimitry Andric items[depth].attr[0] = attr; 1145fe6060f1SDimitry Andric depth++; 1146fe6060f1SDimitry Andric set |= (1ull << type); 1147fe6060f1SDimitry Andric } 1148fe6060f1SDimitry Andric int get_depth() const { return depth; } 1149fe6060f1SDimitry Andric const item_t &at(int index) const { 1150fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(index >= 0 && index < depth); 1151fe6060f1SDimitry Andric return items[index]; 1152fe6060f1SDimitry Andric } 1153fe6060f1SDimitry Andric item_t &at(int index) { 1154fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(index >= 0 && index < depth); 1155fe6060f1SDimitry Andric return items[index]; 1156fe6060f1SDimitry Andric } 1157fe6060f1SDimitry Andric void remove(int index) { 1158fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(index >= 0 && index < depth); 1159fe6060f1SDimitry Andric set &= ~(1ull << items[index].type); 1160fe6060f1SDimitry Andric for (int j = index + 1; j < depth; ++j) { 1161fe6060f1SDimitry Andric items[j - 1] = items[j]; 1162fe6060f1SDimitry Andric } 1163fe6060f1SDimitry Andric depth--; 1164fe6060f1SDimitry Andric } 1165349cc55cSDimitry Andric void sort() { 1166349cc55cSDimitry Andric KMP_DEBUG_ASSERT(__kmp_topology); 1167349cc55cSDimitry Andric qsort(items, depth, sizeof(item_t), hw_subset_compare); 1168349cc55cSDimitry Andric } 1169fe6060f1SDimitry Andric bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); } 1170fe6060f1SDimitry Andric void dump() const { 1171fe6060f1SDimitry Andric printf("**********************\n"); 1172fe6060f1SDimitry Andric printf("*** kmp_hw_subset: ***\n"); 1173fe6060f1SDimitry Andric printf("* depth: %d\n", depth); 1174fe6060f1SDimitry Andric printf("* items:\n"); 1175fe6060f1SDimitry Andric for (int i = 0; i < depth; ++i) { 11760eae32dcSDimitry Andric printf(" type: %s\n", __kmp_hw_get_keyword(items[i].type)); 11770eae32dcSDimitry Andric for (int j = 0; j < items[i].num_attrs; ++j) { 11780eae32dcSDimitry Andric printf(" num: %d, offset: %d, attr: ", items[i].num[j], 11790eae32dcSDimitry Andric items[i].offset[j]); 11800eae32dcSDimitry Andric if (!items[i].attr[j]) { 11810eae32dcSDimitry Andric printf(" (none)\n"); 11820eae32dcSDimitry Andric } else { 11830eae32dcSDimitry Andric printf( 11840eae32dcSDimitry Andric " core_type = %s, core_eff = %d\n", 11850eae32dcSDimitry Andric __kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()), 11860eae32dcSDimitry Andric items[i].attr[j].get_core_eff()); 11870eae32dcSDimitry Andric } 11880eae32dcSDimitry Andric } 1189fe6060f1SDimitry Andric } 1190fe6060f1SDimitry Andric printf("* set: 0x%llx\n", set); 1191fe6060f1SDimitry Andric printf("* absolute: %d\n", absolute); 1192fe6060f1SDimitry Andric printf("**********************\n"); 1193fe6060f1SDimitry Andric } 1194fe6060f1SDimitry Andric }; 1195fe6060f1SDimitry Andric extern kmp_hw_subset_t *__kmp_hw_subset; 11960b57cec5SDimitry Andric 11970b57cec5SDimitry Andric /* A structure for holding machine-specific hierarchy info to be computed once 11980b57cec5SDimitry Andric at init. This structure represents a mapping of threads to the actual machine 11990b57cec5SDimitry Andric hierarchy, or to our best guess at what the hierarchy might be, for the 12000b57cec5SDimitry Andric purpose of performing an efficient barrier. In the worst case, when there is 12010b57cec5SDimitry Andric no machine hierarchy information, it produces a tree suitable for a barrier, 12020b57cec5SDimitry Andric similar to the tree used in the hyper barrier. */ 12030b57cec5SDimitry Andric class hierarchy_info { 12040b57cec5SDimitry Andric public: 12050b57cec5SDimitry Andric /* Good default values for number of leaves and branching factor, given no 12060b57cec5SDimitry Andric affinity information. Behaves a bit like hyper barrier. */ 12070b57cec5SDimitry Andric static const kmp_uint32 maxLeaves = 4; 12080b57cec5SDimitry Andric static const kmp_uint32 minBranch = 4; 12090b57cec5SDimitry Andric /** Number of levels in the hierarchy. Typical levels are threads/core, 12100b57cec5SDimitry Andric cores/package or socket, packages/node, nodes/machine, etc. We don't want 12110b57cec5SDimitry Andric to get specific with nomenclature. When the machine is oversubscribed we 12120b57cec5SDimitry Andric add levels to duplicate the hierarchy, doubling the thread capacity of the 12130b57cec5SDimitry Andric hierarchy each time we add a level. */ 12140b57cec5SDimitry Andric kmp_uint32 maxLevels; 12150b57cec5SDimitry Andric 12160b57cec5SDimitry Andric /** This is specifically the depth of the machine configuration hierarchy, in 12170b57cec5SDimitry Andric terms of the number of levels along the longest path from root to any 12180b57cec5SDimitry Andric leaf. It corresponds to the number of entries in numPerLevel if we exclude 12190b57cec5SDimitry Andric all but one trailing 1. */ 12200b57cec5SDimitry Andric kmp_uint32 depth; 12210b57cec5SDimitry Andric kmp_uint32 base_num_threads; 12220b57cec5SDimitry Andric enum init_status { initialized = 0, not_initialized = 1, initializing = 2 }; 12230b57cec5SDimitry Andric volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 12240b57cec5SDimitry Andric // 2=initialization in progress 12250b57cec5SDimitry Andric volatile kmp_int8 resizing; // 0=not resizing, 1=resizing 12260b57cec5SDimitry Andric 12270b57cec5SDimitry Andric /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children 12280b57cec5SDimitry Andric the parent of a node at level i has. For example, if we have a machine 12290b57cec5SDimitry Andric with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel = 12300b57cec5SDimitry Andric {2, 4, 4, 1, 1}. All empty levels are set to 1. */ 12310b57cec5SDimitry Andric kmp_uint32 *numPerLevel; 12320b57cec5SDimitry Andric kmp_uint32 *skipPerLevel; 12330b57cec5SDimitry Andric 1234fe6060f1SDimitry Andric void deriveLevels() { 1235fe6060f1SDimitry Andric int hier_depth = __kmp_topology->get_depth(); 1236fe6060f1SDimitry Andric for (int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) { 1237fe6060f1SDimitry Andric numPerLevel[level] = __kmp_topology->get_ratio(i); 12380b57cec5SDimitry Andric } 12390b57cec5SDimitry Andric } 12400b57cec5SDimitry Andric 12410b57cec5SDimitry Andric hierarchy_info() 12420b57cec5SDimitry Andric : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} 12430b57cec5SDimitry Andric 12440b57cec5SDimitry Andric void fini() { 12450b57cec5SDimitry Andric if (!uninitialized && numPerLevel) { 12460b57cec5SDimitry Andric __kmp_free(numPerLevel); 12470b57cec5SDimitry Andric numPerLevel = NULL; 12480b57cec5SDimitry Andric uninitialized = not_initialized; 12490b57cec5SDimitry Andric } 12500b57cec5SDimitry Andric } 12510b57cec5SDimitry Andric 1252fe6060f1SDimitry Andric void init(int num_addrs) { 12530b57cec5SDimitry Andric kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8( 12540b57cec5SDimitry Andric &uninitialized, not_initialized, initializing); 12550b57cec5SDimitry Andric if (bool_result == 0) { // Wait for initialization 12560b57cec5SDimitry Andric while (TCR_1(uninitialized) != initialized) 12570b57cec5SDimitry Andric KMP_CPU_PAUSE(); 12580b57cec5SDimitry Andric return; 12590b57cec5SDimitry Andric } 12600b57cec5SDimitry Andric KMP_DEBUG_ASSERT(bool_result == 1); 12610b57cec5SDimitry Andric 12620b57cec5SDimitry Andric /* Added explicit initialization of the data fields here to prevent usage of 12630b57cec5SDimitry Andric dirty value observed when static library is re-initialized multiple times 12640b57cec5SDimitry Andric (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses 12650b57cec5SDimitry Andric OpenMP). */ 12660b57cec5SDimitry Andric depth = 1; 12670b57cec5SDimitry Andric resizing = 0; 12680b57cec5SDimitry Andric maxLevels = 7; 12690b57cec5SDimitry Andric numPerLevel = 12700b57cec5SDimitry Andric (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 12710b57cec5SDimitry Andric skipPerLevel = &(numPerLevel[maxLevels]); 12720b57cec5SDimitry Andric for (kmp_uint32 i = 0; i < maxLevels; 12730b57cec5SDimitry Andric ++i) { // init numPerLevel[*] to 1 item per level 12740b57cec5SDimitry Andric numPerLevel[i] = 1; 12750b57cec5SDimitry Andric skipPerLevel[i] = 1; 12760b57cec5SDimitry Andric } 12770b57cec5SDimitry Andric 12780b57cec5SDimitry Andric // Sort table by physical ID 1279fe6060f1SDimitry Andric if (__kmp_topology && __kmp_topology->get_depth() > 0) { 1280fe6060f1SDimitry Andric deriveLevels(); 12810b57cec5SDimitry Andric } else { 12820b57cec5SDimitry Andric numPerLevel[0] = maxLeaves; 12830b57cec5SDimitry Andric numPerLevel[1] = num_addrs / maxLeaves; 12840b57cec5SDimitry Andric if (num_addrs % maxLeaves) 12850b57cec5SDimitry Andric numPerLevel[1]++; 12860b57cec5SDimitry Andric } 12870b57cec5SDimitry Andric 12880b57cec5SDimitry Andric base_num_threads = num_addrs; 12890b57cec5SDimitry Andric for (int i = maxLevels - 1; i >= 0; 12900b57cec5SDimitry Andric --i) // count non-empty levels to get depth 12910b57cec5SDimitry Andric if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' 12920b57cec5SDimitry Andric depth++; 12930b57cec5SDimitry Andric 12940b57cec5SDimitry Andric kmp_uint32 branch = minBranch; 12950b57cec5SDimitry Andric if (numPerLevel[0] == 1) 12960b57cec5SDimitry Andric branch = num_addrs / maxLeaves; 12970b57cec5SDimitry Andric if (branch < minBranch) 12980b57cec5SDimitry Andric branch = minBranch; 12990b57cec5SDimitry Andric for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width 13000b57cec5SDimitry Andric while (numPerLevel[d] > branch || 13010b57cec5SDimitry Andric (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0! 13020b57cec5SDimitry Andric if (numPerLevel[d] & 1) 13030b57cec5SDimitry Andric numPerLevel[d]++; 13040b57cec5SDimitry Andric numPerLevel[d] = numPerLevel[d] >> 1; 13050b57cec5SDimitry Andric if (numPerLevel[d + 1] == 1) 13060b57cec5SDimitry Andric depth++; 13070b57cec5SDimitry Andric numPerLevel[d + 1] = numPerLevel[d + 1] << 1; 13080b57cec5SDimitry Andric } 13090b57cec5SDimitry Andric if (numPerLevel[0] == 1) { 13100b57cec5SDimitry Andric branch = branch >> 1; 13110b57cec5SDimitry Andric if (branch < 4) 13120b57cec5SDimitry Andric branch = minBranch; 13130b57cec5SDimitry Andric } 13140b57cec5SDimitry Andric } 13150b57cec5SDimitry Andric 13160b57cec5SDimitry Andric for (kmp_uint32 i = 1; i < depth; ++i) 13170b57cec5SDimitry Andric skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1]; 13180b57cec5SDimitry Andric // Fill in hierarchy in the case of oversubscription 13190b57cec5SDimitry Andric for (kmp_uint32 i = depth; i < maxLevels; ++i) 13200b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 13210b57cec5SDimitry Andric 13220b57cec5SDimitry Andric uninitialized = initialized; // One writer 13230b57cec5SDimitry Andric } 13240b57cec5SDimitry Andric 13250b57cec5SDimitry Andric // Resize the hierarchy if nproc changes to something larger than before 13260b57cec5SDimitry Andric void resize(kmp_uint32 nproc) { 13270b57cec5SDimitry Andric kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 13280b57cec5SDimitry Andric while (bool_result == 0) { // someone else is trying to resize 13290b57cec5SDimitry Andric KMP_CPU_PAUSE(); 13300b57cec5SDimitry Andric if (nproc <= base_num_threads) // happy with other thread's resize 13310b57cec5SDimitry Andric return; 13320b57cec5SDimitry Andric else // try to resize 13330b57cec5SDimitry Andric bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 13340b57cec5SDimitry Andric } 13350b57cec5SDimitry Andric KMP_DEBUG_ASSERT(bool_result != 0); 13360b57cec5SDimitry Andric if (nproc <= base_num_threads) 13370b57cec5SDimitry Andric return; // happy with other thread's resize 13380b57cec5SDimitry Andric 13390b57cec5SDimitry Andric // Calculate new maxLevels 13400b57cec5SDimitry Andric kmp_uint32 old_sz = skipPerLevel[depth - 1]; 13410b57cec5SDimitry Andric kmp_uint32 incs = 0, old_maxLevels = maxLevels; 13420b57cec5SDimitry Andric // First see if old maxLevels is enough to contain new size 13430b57cec5SDimitry Andric for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) { 13440b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 13450b57cec5SDimitry Andric numPerLevel[i - 1] *= 2; 13460b57cec5SDimitry Andric old_sz *= 2; 13470b57cec5SDimitry Andric depth++; 13480b57cec5SDimitry Andric } 13490b57cec5SDimitry Andric if (nproc > old_sz) { // Not enough space, need to expand hierarchy 13500b57cec5SDimitry Andric while (nproc > old_sz) { 13510b57cec5SDimitry Andric old_sz *= 2; 13520b57cec5SDimitry Andric incs++; 13530b57cec5SDimitry Andric depth++; 13540b57cec5SDimitry Andric } 13550b57cec5SDimitry Andric maxLevels += incs; 13560b57cec5SDimitry Andric 13570b57cec5SDimitry Andric // Resize arrays 13580b57cec5SDimitry Andric kmp_uint32 *old_numPerLevel = numPerLevel; 13590b57cec5SDimitry Andric kmp_uint32 *old_skipPerLevel = skipPerLevel; 13600b57cec5SDimitry Andric numPerLevel = skipPerLevel = NULL; 13610b57cec5SDimitry Andric numPerLevel = 13620b57cec5SDimitry Andric (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 13630b57cec5SDimitry Andric skipPerLevel = &(numPerLevel[maxLevels]); 13640b57cec5SDimitry Andric 13650b57cec5SDimitry Andric // Copy old elements from old arrays 1366e8d8bef9SDimitry Andric for (kmp_uint32 i = 0; i < old_maxLevels; ++i) { 1367e8d8bef9SDimitry Andric // init numPerLevel[*] to 1 item per level 13680b57cec5SDimitry Andric numPerLevel[i] = old_numPerLevel[i]; 13690b57cec5SDimitry Andric skipPerLevel[i] = old_skipPerLevel[i]; 13700b57cec5SDimitry Andric } 13710b57cec5SDimitry Andric 13720b57cec5SDimitry Andric // Init new elements in arrays to 1 1373e8d8bef9SDimitry Andric for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) { 1374e8d8bef9SDimitry Andric // init numPerLevel[*] to 1 item per level 13750b57cec5SDimitry Andric numPerLevel[i] = 1; 13760b57cec5SDimitry Andric skipPerLevel[i] = 1; 13770b57cec5SDimitry Andric } 13780b57cec5SDimitry Andric 13790b57cec5SDimitry Andric // Free old arrays 13800b57cec5SDimitry Andric __kmp_free(old_numPerLevel); 13810b57cec5SDimitry Andric } 13820b57cec5SDimitry Andric 13830b57cec5SDimitry Andric // Fill in oversubscription levels of hierarchy 13840b57cec5SDimitry Andric for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) 13850b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 13860b57cec5SDimitry Andric 13870b57cec5SDimitry Andric base_num_threads = nproc; 13880b57cec5SDimitry Andric resizing = 0; // One writer 13890b57cec5SDimitry Andric } 13900b57cec5SDimitry Andric }; 13910b57cec5SDimitry Andric #endif // KMP_AFFINITY_H 1392