10b57cec5SDimitry Andric /* 20b57cec5SDimitry Andric * kmp_affinity.h -- header for affinity management 30b57cec5SDimitry Andric */ 40b57cec5SDimitry Andric 50b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #ifndef KMP_AFFINITY_H 140b57cec5SDimitry Andric #define KMP_AFFINITY_H 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric #include "kmp.h" 170b57cec5SDimitry Andric #include "kmp_os.h" 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 200b57cec5SDimitry Andric #if KMP_USE_HWLOC 210b57cec5SDimitry Andric class KMPHwlocAffinity : public KMPAffinity { 220b57cec5SDimitry Andric public: 230b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 240b57cec5SDimitry Andric hwloc_cpuset_t mask; 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric public: 270b57cec5SDimitry Andric Mask() { 280b57cec5SDimitry Andric mask = hwloc_bitmap_alloc(); 290b57cec5SDimitry Andric this->zero(); 300b57cec5SDimitry Andric } 310b57cec5SDimitry Andric ~Mask() { hwloc_bitmap_free(mask); } 320b57cec5SDimitry Andric void set(int i) override { hwloc_bitmap_set(mask, i); } 330b57cec5SDimitry Andric bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } 340b57cec5SDimitry Andric void clear(int i) override { hwloc_bitmap_clr(mask, i); } 350b57cec5SDimitry Andric void zero() override { hwloc_bitmap_zero(mask); } 360b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 370b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 380b57cec5SDimitry Andric hwloc_bitmap_copy(mask, convert->mask); 390b57cec5SDimitry Andric } 400b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 410b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 420b57cec5SDimitry Andric hwloc_bitmap_and(mask, mask, convert->mask); 430b57cec5SDimitry Andric } 440b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 450b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 460b57cec5SDimitry Andric hwloc_bitmap_or(mask, mask, convert->mask); 470b57cec5SDimitry Andric } 480b57cec5SDimitry Andric void bitwise_not() override { hwloc_bitmap_not(mask, mask); } 490b57cec5SDimitry Andric int begin() const override { return hwloc_bitmap_first(mask); } 500b57cec5SDimitry Andric int end() const override { return -1; } 510b57cec5SDimitry Andric int next(int previous) const override { 520b57cec5SDimitry Andric return hwloc_bitmap_next(mask, previous); 530b57cec5SDimitry Andric } 540b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 550b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 560b57cec5SDimitry Andric "Illegal get affinity operation when not capable"); 57e8d8bef9SDimitry Andric long retval = 580b57cec5SDimitry Andric hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 590b57cec5SDimitry Andric if (retval >= 0) { 600b57cec5SDimitry Andric return 0; 610b57cec5SDimitry Andric } 620b57cec5SDimitry Andric int error = errno; 630b57cec5SDimitry Andric if (abort_on_error) { 640b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 650b57cec5SDimitry Andric } 660b57cec5SDimitry Andric return error; 670b57cec5SDimitry Andric } 680b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 690b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 70e8d8bef9SDimitry Andric "Illegal set affinity operation when not capable"); 71e8d8bef9SDimitry Andric long retval = 720b57cec5SDimitry Andric hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 730b57cec5SDimitry Andric if (retval >= 0) { 740b57cec5SDimitry Andric return 0; 750b57cec5SDimitry Andric } 760b57cec5SDimitry Andric int error = errno; 770b57cec5SDimitry Andric if (abort_on_error) { 780b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 790b57cec5SDimitry Andric } 800b57cec5SDimitry Andric return error; 810b57cec5SDimitry Andric } 82e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS 83e8d8bef9SDimitry Andric int set_process_affinity(bool abort_on_error) const override { 84e8d8bef9SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 85e8d8bef9SDimitry Andric "Illegal set process affinity operation when not capable"); 86e8d8bef9SDimitry Andric int error = 0; 87e8d8bef9SDimitry Andric const hwloc_topology_support *support = 88e8d8bef9SDimitry Andric hwloc_topology_get_support(__kmp_hwloc_topology); 89e8d8bef9SDimitry Andric if (support->cpubind->set_proc_cpubind) { 90e8d8bef9SDimitry Andric int retval; 91e8d8bef9SDimitry Andric retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, 92e8d8bef9SDimitry Andric HWLOC_CPUBIND_PROCESS); 93e8d8bef9SDimitry Andric if (retval >= 0) 94e8d8bef9SDimitry Andric return 0; 95e8d8bef9SDimitry Andric error = errno; 96e8d8bef9SDimitry Andric if (abort_on_error) 97e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 98e8d8bef9SDimitry Andric } 99e8d8bef9SDimitry Andric return error; 100e8d8bef9SDimitry Andric } 101e8d8bef9SDimitry Andric #endif 1020b57cec5SDimitry Andric int get_proc_group() const override { 1030b57cec5SDimitry Andric int group = -1; 1040b57cec5SDimitry Andric #if KMP_OS_WINDOWS 1050b57cec5SDimitry Andric if (__kmp_num_proc_groups == 1) { 1060b57cec5SDimitry Andric return 1; 1070b57cec5SDimitry Andric } 1080b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; i++) { 1090b57cec5SDimitry Andric // On windows, the long type is always 32 bits 1100b57cec5SDimitry Andric unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2); 1110b57cec5SDimitry Andric unsigned long second_32_bits = 1120b57cec5SDimitry Andric hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1); 1130b57cec5SDimitry Andric if (first_32_bits == 0 && second_32_bits == 0) { 1140b57cec5SDimitry Andric continue; 1150b57cec5SDimitry Andric } 1160b57cec5SDimitry Andric if (group >= 0) { 1170b57cec5SDimitry Andric return -1; 1180b57cec5SDimitry Andric } 1190b57cec5SDimitry Andric group = i; 1200b57cec5SDimitry Andric } 1210b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 1220b57cec5SDimitry Andric return group; 1230b57cec5SDimitry Andric } 1240b57cec5SDimitry Andric }; 1250b57cec5SDimitry Andric void determine_capable(const char *var) override { 1260b57cec5SDimitry Andric const hwloc_topology_support *topology_support; 1270b57cec5SDimitry Andric if (__kmp_hwloc_topology == NULL) { 1280b57cec5SDimitry Andric if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) { 1290b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 1300b57cec5SDimitry Andric if (__kmp_affinity_verbose) 1310b57cec5SDimitry Andric KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); 1320b57cec5SDimitry Andric } 1330b57cec5SDimitry Andric if (hwloc_topology_load(__kmp_hwloc_topology) < 0) { 1340b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 1350b57cec5SDimitry Andric if (__kmp_affinity_verbose) 1360b57cec5SDimitry Andric KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); 1370b57cec5SDimitry Andric } 1380b57cec5SDimitry Andric } 1390b57cec5SDimitry Andric topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); 1400b57cec5SDimitry Andric // Is the system capable of setting/getting this thread's affinity? 1410b57cec5SDimitry Andric // Also, is topology discovery possible? (pu indicates ability to discover 1420b57cec5SDimitry Andric // processing units). And finally, were there no errors when calling any 1430b57cec5SDimitry Andric // hwloc_* API functions? 1440b57cec5SDimitry Andric if (topology_support && topology_support->cpubind->set_thisthread_cpubind && 1450b57cec5SDimitry Andric topology_support->cpubind->get_thisthread_cpubind && 1460b57cec5SDimitry Andric topology_support->discovery->pu && !__kmp_hwloc_error) { 1470b57cec5SDimitry Andric // enables affinity according to KMP_AFFINITY_CAPABLE() macro 1480b57cec5SDimitry Andric KMP_AFFINITY_ENABLE(TRUE); 1490b57cec5SDimitry Andric } else { 1500b57cec5SDimitry Andric // indicate that hwloc didn't work and disable affinity 1510b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 1520b57cec5SDimitry Andric KMP_AFFINITY_DISABLE(); 1530b57cec5SDimitry Andric } 1540b57cec5SDimitry Andric } 1550b57cec5SDimitry Andric void bind_thread(int which) override { 1560b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 1570b57cec5SDimitry Andric "Illegal set affinity operation when not capable"); 1580b57cec5SDimitry Andric KMPAffinity::Mask *mask; 1590b57cec5SDimitry Andric KMP_CPU_ALLOC_ON_STACK(mask); 1600b57cec5SDimitry Andric KMP_CPU_ZERO(mask); 1610b57cec5SDimitry Andric KMP_CPU_SET(which, mask); 1620b57cec5SDimitry Andric __kmp_set_system_affinity(mask, TRUE); 1630b57cec5SDimitry Andric KMP_CPU_FREE_FROM_STACK(mask); 1640b57cec5SDimitry Andric } 1650b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 1660b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 1670b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 1680b57cec5SDimitry Andric return new Mask[num]; 1690b57cec5SDimitry Andric } 1700b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 1710b57cec5SDimitry Andric Mask *hwloc_array = static_cast<Mask *>(array); 1720b57cec5SDimitry Andric delete[] hwloc_array; 1730b57cec5SDimitry Andric } 1740b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 1750b57cec5SDimitry Andric int index) override { 1760b57cec5SDimitry Andric Mask *hwloc_array = static_cast<Mask *>(array); 1770b57cec5SDimitry Andric return &(hwloc_array[index]); 1780b57cec5SDimitry Andric } 1790b57cec5SDimitry Andric api_type get_api_type() const override { return HWLOC; } 1800b57cec5SDimitry Andric }; 1810b57cec5SDimitry Andric #endif /* KMP_USE_HWLOC */ 1820b57cec5SDimitry Andric 183489b1cf2SDimitry Andric #if KMP_OS_LINUX || KMP_OS_FREEBSD 1840b57cec5SDimitry Andric #if KMP_OS_LINUX 1850b57cec5SDimitry Andric /* On some of the older OS's that we build on, these constants aren't present 1860b57cec5SDimitry Andric in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on 1870b57cec5SDimitry Andric all systems of the same arch where they are defined, and they cannot change. 1880b57cec5SDimitry Andric stone forever. */ 1890b57cec5SDimitry Andric #include <sys/syscall.h> 1900b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_ARM 1910b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 1920b57cec5SDimitry Andric #define __NR_sched_setaffinity 241 1930b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 241 1940b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 1950b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 1960b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 1970b57cec5SDimitry Andric #define __NR_sched_getaffinity 242 1980b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 242 1990b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2000b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2010b57cec5SDimitry Andric #elif KMP_ARCH_AARCH64 2020b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2030b57cec5SDimitry Andric #define __NR_sched_setaffinity 122 2040b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 122 2050b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2060b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2070b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2080b57cec5SDimitry Andric #define __NR_sched_getaffinity 123 2090b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 123 2100b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2110b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2120b57cec5SDimitry Andric #elif KMP_ARCH_X86_64 2130b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2140b57cec5SDimitry Andric #define __NR_sched_setaffinity 203 2150b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 203 2160b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2170b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2180b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2190b57cec5SDimitry Andric #define __NR_sched_getaffinity 204 2200b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 204 2210b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2220b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2230b57cec5SDimitry Andric #elif KMP_ARCH_PPC64 2240b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2250b57cec5SDimitry Andric #define __NR_sched_setaffinity 222 2260b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 222 2270b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2280b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2290b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2300b57cec5SDimitry Andric #define __NR_sched_getaffinity 223 2310b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 223 2320b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2330b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2340b57cec5SDimitry Andric #elif KMP_ARCH_MIPS 2350b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2360b57cec5SDimitry Andric #define __NR_sched_setaffinity 4239 2370b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 4239 2380b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2390b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2400b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2410b57cec5SDimitry Andric #define __NR_sched_getaffinity 4240 2420b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 4240 2430b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2440b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2450b57cec5SDimitry Andric #elif KMP_ARCH_MIPS64 2460b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2470b57cec5SDimitry Andric #define __NR_sched_setaffinity 5195 2480b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 5195 2490b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2500b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2510b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2520b57cec5SDimitry Andric #define __NR_sched_getaffinity 5196 2530b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 5196 2540b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2550b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2560b57cec5SDimitry Andric #error Unknown or unsupported architecture 2570b57cec5SDimitry Andric #endif /* KMP_ARCH_* */ 258489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD 259489b1cf2SDimitry Andric #include <pthread.h> 260489b1cf2SDimitry Andric #include <pthread_np.h> 261489b1cf2SDimitry Andric #endif 2620b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity { 2630b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 264e8d8bef9SDimitry Andric typedef unsigned long mask_t; 265e8d8bef9SDimitry Andric typedef decltype(__kmp_affin_mask_size) mask_size_type; 266e8d8bef9SDimitry Andric static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 267e8d8bef9SDimitry Andric static const mask_t ONE = 1; 268e8d8bef9SDimitry Andric mask_size_type get_num_mask_types() const { 269e8d8bef9SDimitry Andric return __kmp_affin_mask_size / sizeof(mask_t); 270e8d8bef9SDimitry Andric } 2710b57cec5SDimitry Andric 2720b57cec5SDimitry Andric public: 2730b57cec5SDimitry Andric mask_t *mask; 2740b57cec5SDimitry Andric Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); } 2750b57cec5SDimitry Andric ~Mask() { 2760b57cec5SDimitry Andric if (mask) 2770b57cec5SDimitry Andric __kmp_free(mask); 2780b57cec5SDimitry Andric } 2790b57cec5SDimitry Andric void set(int i) override { 280e8d8bef9SDimitry Andric mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T)); 2810b57cec5SDimitry Andric } 2820b57cec5SDimitry Andric bool is_set(int i) const override { 283e8d8bef9SDimitry Andric return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T))); 2840b57cec5SDimitry Andric } 2850b57cec5SDimitry Andric void clear(int i) override { 286e8d8bef9SDimitry Andric mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T)); 2870b57cec5SDimitry Andric } 2880b57cec5SDimitry Andric void zero() override { 289e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 290e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 291e8d8bef9SDimitry Andric mask[i] = (mask_t)0; 2920b57cec5SDimitry Andric } 2930b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 2940b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 295e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 296e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 2970b57cec5SDimitry Andric mask[i] = convert->mask[i]; 2980b57cec5SDimitry Andric } 2990b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 3000b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 301e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 302e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 3030b57cec5SDimitry Andric mask[i] &= convert->mask[i]; 3040b57cec5SDimitry Andric } 3050b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 3060b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 307e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 308e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 3090b57cec5SDimitry Andric mask[i] |= convert->mask[i]; 3100b57cec5SDimitry Andric } 3110b57cec5SDimitry Andric void bitwise_not() override { 312e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 313e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 3140b57cec5SDimitry Andric mask[i] = ~(mask[i]); 3150b57cec5SDimitry Andric } 3160b57cec5SDimitry Andric int begin() const override { 3170b57cec5SDimitry Andric int retval = 0; 3180b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 3190b57cec5SDimitry Andric ++retval; 3200b57cec5SDimitry Andric return retval; 3210b57cec5SDimitry Andric } 322e8d8bef9SDimitry Andric int end() const override { 323e8d8bef9SDimitry Andric int e; 324e8d8bef9SDimitry Andric __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e); 325e8d8bef9SDimitry Andric return e; 326e8d8bef9SDimitry Andric } 3270b57cec5SDimitry Andric int next(int previous) const override { 3280b57cec5SDimitry Andric int retval = previous + 1; 3290b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 3300b57cec5SDimitry Andric ++retval; 3310b57cec5SDimitry Andric return retval; 3320b57cec5SDimitry Andric } 3330b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 3340b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 3350b57cec5SDimitry Andric "Illegal get affinity operation when not capable"); 336489b1cf2SDimitry Andric #if KMP_OS_LINUX 337e8d8bef9SDimitry Andric long retval = 3380b57cec5SDimitry Andric syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask); 339489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD 340*fe6060f1SDimitry Andric int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, 341*fe6060f1SDimitry Andric reinterpret_cast<cpuset_t *>(mask)); 3425ffd83dbSDimitry Andric int retval = (r == 0 ? 0 : -1); 343489b1cf2SDimitry Andric #endif 3440b57cec5SDimitry Andric if (retval >= 0) { 3450b57cec5SDimitry Andric return 0; 3460b57cec5SDimitry Andric } 3470b57cec5SDimitry Andric int error = errno; 3480b57cec5SDimitry Andric if (abort_on_error) { 3490b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 3500b57cec5SDimitry Andric } 3510b57cec5SDimitry Andric return error; 3520b57cec5SDimitry Andric } 3530b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 3540b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 355e8d8bef9SDimitry Andric "Illegal set affinity operation when not capable"); 356489b1cf2SDimitry Andric #if KMP_OS_LINUX 357e8d8bef9SDimitry Andric long retval = 3580b57cec5SDimitry Andric syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask); 359489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD 360*fe6060f1SDimitry Andric int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, 361*fe6060f1SDimitry Andric reinterpret_cast<cpuset_t *>(mask)); 3625ffd83dbSDimitry Andric int retval = (r == 0 ? 0 : -1); 363489b1cf2SDimitry Andric #endif 3640b57cec5SDimitry Andric if (retval >= 0) { 3650b57cec5SDimitry Andric return 0; 3660b57cec5SDimitry Andric } 3670b57cec5SDimitry Andric int error = errno; 3680b57cec5SDimitry Andric if (abort_on_error) { 3690b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 3700b57cec5SDimitry Andric } 3710b57cec5SDimitry Andric return error; 3720b57cec5SDimitry Andric } 3730b57cec5SDimitry Andric }; 3740b57cec5SDimitry Andric void determine_capable(const char *env_var) override { 3750b57cec5SDimitry Andric __kmp_affinity_determine_capable(env_var); 3760b57cec5SDimitry Andric } 3770b57cec5SDimitry Andric void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 3780b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { 3790b57cec5SDimitry Andric KMPNativeAffinity::Mask *retval = new Mask(); 3800b57cec5SDimitry Andric return retval; 3810b57cec5SDimitry Andric } 3820b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { 3830b57cec5SDimitry Andric KMPNativeAffinity::Mask *native_mask = 3840b57cec5SDimitry Andric static_cast<KMPNativeAffinity::Mask *>(m); 3850b57cec5SDimitry Andric delete native_mask; 3860b57cec5SDimitry Andric } 3870b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 3880b57cec5SDimitry Andric return new Mask[num]; 3890b57cec5SDimitry Andric } 3900b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 3910b57cec5SDimitry Andric Mask *linux_array = static_cast<Mask *>(array); 3920b57cec5SDimitry Andric delete[] linux_array; 3930b57cec5SDimitry Andric } 3940b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 3950b57cec5SDimitry Andric int index) override { 3960b57cec5SDimitry Andric Mask *linux_array = static_cast<Mask *>(array); 3970b57cec5SDimitry Andric return &(linux_array[index]); 3980b57cec5SDimitry Andric } 3990b57cec5SDimitry Andric api_type get_api_type() const override { return NATIVE_OS; } 4000b57cec5SDimitry Andric }; 401489b1cf2SDimitry Andric #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */ 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric #if KMP_OS_WINDOWS 4040b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity { 4050b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 4060b57cec5SDimitry Andric typedef ULONG_PTR mask_t; 4070b57cec5SDimitry Andric static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 4080b57cec5SDimitry Andric mask_t *mask; 4090b57cec5SDimitry Andric 4100b57cec5SDimitry Andric public: 4110b57cec5SDimitry Andric Mask() { 4120b57cec5SDimitry Andric mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups); 4130b57cec5SDimitry Andric } 4140b57cec5SDimitry Andric ~Mask() { 4150b57cec5SDimitry Andric if (mask) 4160b57cec5SDimitry Andric __kmp_free(mask); 4170b57cec5SDimitry Andric } 4180b57cec5SDimitry Andric void set(int i) override { 4190b57cec5SDimitry Andric mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); 4200b57cec5SDimitry Andric } 4210b57cec5SDimitry Andric bool is_set(int i) const override { 4220b57cec5SDimitry Andric return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); 4230b57cec5SDimitry Andric } 4240b57cec5SDimitry Andric void clear(int i) override { 4250b57cec5SDimitry Andric mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); 4260b57cec5SDimitry Andric } 4270b57cec5SDimitry Andric void zero() override { 4280b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4290b57cec5SDimitry Andric mask[i] = 0; 4300b57cec5SDimitry Andric } 4310b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 4320b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 4330b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4340b57cec5SDimitry Andric mask[i] = convert->mask[i]; 4350b57cec5SDimitry Andric } 4360b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 4370b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 4380b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4390b57cec5SDimitry Andric mask[i] &= convert->mask[i]; 4400b57cec5SDimitry Andric } 4410b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 4420b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 4430b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4440b57cec5SDimitry Andric mask[i] |= convert->mask[i]; 4450b57cec5SDimitry Andric } 4460b57cec5SDimitry Andric void bitwise_not() override { 4470b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4480b57cec5SDimitry Andric mask[i] = ~(mask[i]); 4490b57cec5SDimitry Andric } 4500b57cec5SDimitry Andric int begin() const override { 4510b57cec5SDimitry Andric int retval = 0; 4520b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 4530b57cec5SDimitry Andric ++retval; 4540b57cec5SDimitry Andric return retval; 4550b57cec5SDimitry Andric } 4560b57cec5SDimitry Andric int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; } 4570b57cec5SDimitry Andric int next(int previous) const override { 4580b57cec5SDimitry Andric int retval = previous + 1; 4590b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 4600b57cec5SDimitry Andric ++retval; 4610b57cec5SDimitry Andric return retval; 4620b57cec5SDimitry Andric } 463e8d8bef9SDimitry Andric int set_process_affinity(bool abort_on_error) const override { 464e8d8bef9SDimitry Andric if (__kmp_num_proc_groups <= 1) { 465e8d8bef9SDimitry Andric if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) { 466e8d8bef9SDimitry Andric DWORD error = GetLastError(); 467e8d8bef9SDimitry Andric if (abort_on_error) { 468e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 469e8d8bef9SDimitry Andric __kmp_msg_null); 470e8d8bef9SDimitry Andric } 471e8d8bef9SDimitry Andric return error; 472e8d8bef9SDimitry Andric } 473e8d8bef9SDimitry Andric } 474e8d8bef9SDimitry Andric return 0; 475e8d8bef9SDimitry Andric } 4760b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 4770b57cec5SDimitry Andric if (__kmp_num_proc_groups > 1) { 4780b57cec5SDimitry Andric // Check for a valid mask. 4790b57cec5SDimitry Andric GROUP_AFFINITY ga; 4800b57cec5SDimitry Andric int group = get_proc_group(); 4810b57cec5SDimitry Andric if (group < 0) { 4820b57cec5SDimitry Andric if (abort_on_error) { 4830b57cec5SDimitry Andric KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); 4840b57cec5SDimitry Andric } 4850b57cec5SDimitry Andric return -1; 4860b57cec5SDimitry Andric } 4870b57cec5SDimitry Andric // Transform the bit vector into a GROUP_AFFINITY struct 4880b57cec5SDimitry Andric // and make the system call to set affinity. 4890b57cec5SDimitry Andric ga.Group = group; 4900b57cec5SDimitry Andric ga.Mask = mask[group]; 4910b57cec5SDimitry Andric ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; 4920b57cec5SDimitry Andric 4930b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); 4940b57cec5SDimitry Andric if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { 4950b57cec5SDimitry Andric DWORD error = GetLastError(); 4960b57cec5SDimitry Andric if (abort_on_error) { 4970b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 4980b57cec5SDimitry Andric __kmp_msg_null); 4990b57cec5SDimitry Andric } 5000b57cec5SDimitry Andric return error; 5010b57cec5SDimitry Andric } 5020b57cec5SDimitry Andric } else { 5030b57cec5SDimitry Andric if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) { 5040b57cec5SDimitry Andric DWORD error = GetLastError(); 5050b57cec5SDimitry Andric if (abort_on_error) { 5060b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 5070b57cec5SDimitry Andric __kmp_msg_null); 5080b57cec5SDimitry Andric } 5090b57cec5SDimitry Andric return error; 5100b57cec5SDimitry Andric } 5110b57cec5SDimitry Andric } 5120b57cec5SDimitry Andric return 0; 5130b57cec5SDimitry Andric } 5140b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 5150b57cec5SDimitry Andric if (__kmp_num_proc_groups > 1) { 5160b57cec5SDimitry Andric this->zero(); 5170b57cec5SDimitry Andric GROUP_AFFINITY ga; 5180b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); 5190b57cec5SDimitry Andric if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { 5200b57cec5SDimitry Andric DWORD error = GetLastError(); 5210b57cec5SDimitry Andric if (abort_on_error) { 5220b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), 5230b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 5240b57cec5SDimitry Andric } 5250b57cec5SDimitry Andric return error; 5260b57cec5SDimitry Andric } 5270b57cec5SDimitry Andric if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || 5280b57cec5SDimitry Andric (ga.Mask == 0)) { 5290b57cec5SDimitry Andric return -1; 5300b57cec5SDimitry Andric } 5310b57cec5SDimitry Andric mask[ga.Group] = ga.Mask; 5320b57cec5SDimitry Andric } else { 5330b57cec5SDimitry Andric mask_t newMask, sysMask, retval; 5340b57cec5SDimitry Andric if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { 5350b57cec5SDimitry Andric DWORD error = GetLastError(); 5360b57cec5SDimitry Andric if (abort_on_error) { 5370b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"), 5380b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 5390b57cec5SDimitry Andric } 5400b57cec5SDimitry Andric return error; 5410b57cec5SDimitry Andric } 5420b57cec5SDimitry Andric retval = SetThreadAffinityMask(GetCurrentThread(), newMask); 5430b57cec5SDimitry Andric if (!retval) { 5440b57cec5SDimitry Andric DWORD error = GetLastError(); 5450b57cec5SDimitry Andric if (abort_on_error) { 5460b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 5470b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 5480b57cec5SDimitry Andric } 5490b57cec5SDimitry Andric return error; 5500b57cec5SDimitry Andric } 5510b57cec5SDimitry Andric newMask = SetThreadAffinityMask(GetCurrentThread(), retval); 5520b57cec5SDimitry Andric if (!newMask) { 5530b57cec5SDimitry Andric DWORD error = GetLastError(); 5540b57cec5SDimitry Andric if (abort_on_error) { 5550b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 5560b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 5570b57cec5SDimitry Andric } 5580b57cec5SDimitry Andric } 5590b57cec5SDimitry Andric *mask = retval; 5600b57cec5SDimitry Andric } 5610b57cec5SDimitry Andric return 0; 5620b57cec5SDimitry Andric } 5630b57cec5SDimitry Andric int get_proc_group() const override { 5640b57cec5SDimitry Andric int group = -1; 5650b57cec5SDimitry Andric if (__kmp_num_proc_groups == 1) { 5660b57cec5SDimitry Andric return 1; 5670b57cec5SDimitry Andric } 5680b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; i++) { 5690b57cec5SDimitry Andric if (mask[i] == 0) 5700b57cec5SDimitry Andric continue; 5710b57cec5SDimitry Andric if (group >= 0) 5720b57cec5SDimitry Andric return -1; 5730b57cec5SDimitry Andric group = i; 5740b57cec5SDimitry Andric } 5750b57cec5SDimitry Andric return group; 5760b57cec5SDimitry Andric } 5770b57cec5SDimitry Andric }; 5780b57cec5SDimitry Andric void determine_capable(const char *env_var) override { 5790b57cec5SDimitry Andric __kmp_affinity_determine_capable(env_var); 5800b57cec5SDimitry Andric } 5810b57cec5SDimitry Andric void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 5820b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 5830b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 5840b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 5850b57cec5SDimitry Andric return new Mask[num]; 5860b57cec5SDimitry Andric } 5870b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 5880b57cec5SDimitry Andric Mask *windows_array = static_cast<Mask *>(array); 5890b57cec5SDimitry Andric delete[] windows_array; 5900b57cec5SDimitry Andric } 5910b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 5920b57cec5SDimitry Andric int index) override { 5930b57cec5SDimitry Andric Mask *windows_array = static_cast<Mask *>(array); 5940b57cec5SDimitry Andric return &(windows_array[index]); 5950b57cec5SDimitry Andric } 5960b57cec5SDimitry Andric api_type get_api_type() const override { return NATIVE_OS; } 5970b57cec5SDimitry Andric }; 5980b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 5990b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 6000b57cec5SDimitry Andric 601*fe6060f1SDimitry Andric class kmp_hw_thread_t { 6020b57cec5SDimitry Andric public: 603*fe6060f1SDimitry Andric static const int UNKNOWN_ID = -1; 604*fe6060f1SDimitry Andric static int compare_ids(const void *a, const void *b); 605*fe6060f1SDimitry Andric static int compare_compact(const void *a, const void *b); 606*fe6060f1SDimitry Andric int ids[KMP_HW_LAST]; 607*fe6060f1SDimitry Andric int sub_ids[KMP_HW_LAST]; 608*fe6060f1SDimitry Andric bool leader; 609*fe6060f1SDimitry Andric int os_id; 610*fe6060f1SDimitry Andric void print() const; 611*fe6060f1SDimitry Andric void clear() { 612*fe6060f1SDimitry Andric for (int i = 0; i < (int)KMP_HW_LAST; ++i) 613*fe6060f1SDimitry Andric ids[i] = UNKNOWN_ID; 614*fe6060f1SDimitry Andric leader = false; 6150b57cec5SDimitry Andric } 6160b57cec5SDimitry Andric }; 6170b57cec5SDimitry Andric 618*fe6060f1SDimitry Andric class kmp_topology_t { 619*fe6060f1SDimitry Andric 620*fe6060f1SDimitry Andric struct flags_t { 621*fe6060f1SDimitry Andric int uniform : 1; 622*fe6060f1SDimitry Andric int reserved : 31; 6230b57cec5SDimitry Andric }; 6240b57cec5SDimitry Andric 625*fe6060f1SDimitry Andric int depth; 626*fe6060f1SDimitry Andric 627*fe6060f1SDimitry Andric // The following arrays are all 'depth' long 628*fe6060f1SDimitry Andric 629*fe6060f1SDimitry Andric // Orderd array of the types in the topology 630*fe6060f1SDimitry Andric kmp_hw_t *types; 631*fe6060f1SDimitry Andric 632*fe6060f1SDimitry Andric // Keep quick topology ratios, for non-uniform topologies, 633*fe6060f1SDimitry Andric // this ratio holds the max number of itemAs per itemB 634*fe6060f1SDimitry Andric // e.g., [ 4 packages | 6 cores / package | 2 threads / core ] 635*fe6060f1SDimitry Andric int *ratio; 636*fe6060f1SDimitry Andric 637*fe6060f1SDimitry Andric // Storage containing the absolute number of each topology layer 638*fe6060f1SDimitry Andric int *count; 639*fe6060f1SDimitry Andric 640*fe6060f1SDimitry Andric // The hardware threads array 641*fe6060f1SDimitry Andric // hw_threads is num_hw_threads long 642*fe6060f1SDimitry Andric // Each hw_thread's ids and sub_ids are depth deep 643*fe6060f1SDimitry Andric int num_hw_threads; 644*fe6060f1SDimitry Andric kmp_hw_thread_t *hw_threads; 645*fe6060f1SDimitry Andric 646*fe6060f1SDimitry Andric // Equivalence hash where the key is the hardware topology item 647*fe6060f1SDimitry Andric // and the value is the equivalent hardware topology type in the 648*fe6060f1SDimitry Andric // types[] array, if the value is KMP_HW_UNKNOWN, then there is no 649*fe6060f1SDimitry Andric // known equivalence for the topology type 650*fe6060f1SDimitry Andric kmp_hw_t equivalent[KMP_HW_LAST]; 651*fe6060f1SDimitry Andric 652*fe6060f1SDimitry Andric // Flags describing the topology 653*fe6060f1SDimitry Andric flags_t flags; 654*fe6060f1SDimitry Andric 655*fe6060f1SDimitry Andric // Count each item & get the num x's per y 656*fe6060f1SDimitry Andric // e.g., get the number of cores and the number of threads per core 657*fe6060f1SDimitry Andric // for each (x, y) in (KMP_HW_* , KMP_HW_*) 658*fe6060f1SDimitry Andric void _gather_enumeration_information(); 659*fe6060f1SDimitry Andric 660*fe6060f1SDimitry Andric // Remove layers that don't add information to the topology. 661*fe6060f1SDimitry Andric // This is done by having the layer take on the id = UNKNOWN_ID (-1) 662*fe6060f1SDimitry Andric void _remove_radix1_layers(); 663*fe6060f1SDimitry Andric 664*fe6060f1SDimitry Andric // Find out if the topology is uniform 665*fe6060f1SDimitry Andric void _discover_uniformity(); 666*fe6060f1SDimitry Andric 667*fe6060f1SDimitry Andric // Set all the sub_ids for each hardware thread 668*fe6060f1SDimitry Andric void _set_sub_ids(); 669*fe6060f1SDimitry Andric 670*fe6060f1SDimitry Andric // Set global affinity variables describing the number of threads per 671*fe6060f1SDimitry Andric // core, the number of packages, the number of cores per package, and 672*fe6060f1SDimitry Andric // the number of cores. 673*fe6060f1SDimitry Andric void _set_globals(); 674*fe6060f1SDimitry Andric 675*fe6060f1SDimitry Andric // Set the last level cache equivalent type 676*fe6060f1SDimitry Andric void _set_last_level_cache(); 677*fe6060f1SDimitry Andric 678*fe6060f1SDimitry Andric public: 679*fe6060f1SDimitry Andric // Force use of allocate()/deallocate() 680*fe6060f1SDimitry Andric kmp_topology_t() = delete; 681*fe6060f1SDimitry Andric kmp_topology_t(const kmp_topology_t &t) = delete; 682*fe6060f1SDimitry Andric kmp_topology_t(kmp_topology_t &&t) = delete; 683*fe6060f1SDimitry Andric kmp_topology_t &operator=(const kmp_topology_t &t) = delete; 684*fe6060f1SDimitry Andric kmp_topology_t &operator=(kmp_topology_t &&t) = delete; 685*fe6060f1SDimitry Andric 686*fe6060f1SDimitry Andric static kmp_topology_t *allocate(int nproc, int ndepth, const kmp_hw_t *types); 687*fe6060f1SDimitry Andric static void deallocate(kmp_topology_t *); 688*fe6060f1SDimitry Andric 689*fe6060f1SDimitry Andric // Functions used in create_map() routines 690*fe6060f1SDimitry Andric kmp_hw_thread_t &at(int index) { 691*fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads); 692*fe6060f1SDimitry Andric return hw_threads[index]; 693*fe6060f1SDimitry Andric } 694*fe6060f1SDimitry Andric const kmp_hw_thread_t &at(int index) const { 695*fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads); 696*fe6060f1SDimitry Andric return hw_threads[index]; 697*fe6060f1SDimitry Andric } 698*fe6060f1SDimitry Andric int get_num_hw_threads() const { return num_hw_threads; } 699*fe6060f1SDimitry Andric void sort_ids() { 700*fe6060f1SDimitry Andric qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t), 701*fe6060f1SDimitry Andric kmp_hw_thread_t::compare_ids); 702*fe6060f1SDimitry Andric } 703*fe6060f1SDimitry Andric // Check if the hardware ids are unique, if they are 704*fe6060f1SDimitry Andric // return true, otherwise return false 705*fe6060f1SDimitry Andric bool check_ids() const; 706*fe6060f1SDimitry Andric 707*fe6060f1SDimitry Andric // Function to call after the create_map() routine 708*fe6060f1SDimitry Andric void canonicalize(); 709*fe6060f1SDimitry Andric void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores); 710*fe6060f1SDimitry Andric 711*fe6060f1SDimitry Andric // Functions used after canonicalize() called 712*fe6060f1SDimitry Andric bool filter_hw_subset(); 713*fe6060f1SDimitry Andric bool is_close(int hwt1, int hwt2, int level) const; 714*fe6060f1SDimitry Andric bool is_uniform() const { return flags.uniform; } 715*fe6060f1SDimitry Andric // Tell whether a type is a valid type in the topology 716*fe6060f1SDimitry Andric // returns KMP_HW_UNKNOWN when there is no equivalent type 717*fe6060f1SDimitry Andric kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; } 718*fe6060f1SDimitry Andric // Set type1 = type2 719*fe6060f1SDimitry Andric void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) { 720*fe6060f1SDimitry Andric KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1); 721*fe6060f1SDimitry Andric KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2); 722*fe6060f1SDimitry Andric kmp_hw_t real_type2 = equivalent[type2]; 723*fe6060f1SDimitry Andric if (real_type2 == KMP_HW_UNKNOWN) 724*fe6060f1SDimitry Andric real_type2 = type2; 725*fe6060f1SDimitry Andric equivalent[type1] = real_type2; 726*fe6060f1SDimitry Andric // This loop is required since any of the types may have been set to 727*fe6060f1SDimitry Andric // be equivalent to type1. They all must be checked and reset to type2. 728*fe6060f1SDimitry Andric KMP_FOREACH_HW_TYPE(type) { 729*fe6060f1SDimitry Andric if (equivalent[type] == type1) { 730*fe6060f1SDimitry Andric equivalent[type] = real_type2; 731*fe6060f1SDimitry Andric } 732*fe6060f1SDimitry Andric } 733*fe6060f1SDimitry Andric } 734*fe6060f1SDimitry Andric // Calculate number of types corresponding to level1 735*fe6060f1SDimitry Andric // per types corresponding to level2 (e.g., number of threads per core) 736*fe6060f1SDimitry Andric int calculate_ratio(int level1, int level2) const { 737*fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth); 738*fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth); 739*fe6060f1SDimitry Andric int r = 1; 740*fe6060f1SDimitry Andric for (int level = level1; level > level2; --level) 741*fe6060f1SDimitry Andric r *= ratio[level]; 742*fe6060f1SDimitry Andric return r; 743*fe6060f1SDimitry Andric } 744*fe6060f1SDimitry Andric int get_ratio(int level) const { 745*fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level >= 0 && level < depth); 746*fe6060f1SDimitry Andric return ratio[level]; 747*fe6060f1SDimitry Andric } 748*fe6060f1SDimitry Andric int get_depth() const { return depth; }; 749*fe6060f1SDimitry Andric kmp_hw_t get_type(int level) const { 750*fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level >= 0 && level < depth); 751*fe6060f1SDimitry Andric return types[level]; 752*fe6060f1SDimitry Andric } 753*fe6060f1SDimitry Andric int get_level(kmp_hw_t type) const { 754*fe6060f1SDimitry Andric KMP_DEBUG_ASSERT_VALID_HW_TYPE(type); 755*fe6060f1SDimitry Andric int eq_type = equivalent[type]; 756*fe6060f1SDimitry Andric if (eq_type == KMP_HW_UNKNOWN) 7570b57cec5SDimitry Andric return -1; 758*fe6060f1SDimitry Andric for (int i = 0; i < depth; ++i) 759*fe6060f1SDimitry Andric if (types[i] == eq_type) 760*fe6060f1SDimitry Andric return i; 761*fe6060f1SDimitry Andric return -1; 7620b57cec5SDimitry Andric } 763*fe6060f1SDimitry Andric int get_count(int level) const { 764*fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level >= 0 && level < depth); 765*fe6060f1SDimitry Andric return count[level]; 7660b57cec5SDimitry Andric } 767*fe6060f1SDimitry Andric #if KMP_AFFINITY_SUPPORTED 768*fe6060f1SDimitry Andric void sort_compact() { 769*fe6060f1SDimitry Andric qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t), 770*fe6060f1SDimitry Andric kmp_hw_thread_t::compare_compact); 771*fe6060f1SDimitry Andric } 772*fe6060f1SDimitry Andric #endif 773*fe6060f1SDimitry Andric void print(const char *env_var = "KMP_AFFINITY") const; 774*fe6060f1SDimitry Andric void dump() const; 775*fe6060f1SDimitry Andric }; 776*fe6060f1SDimitry Andric 777*fe6060f1SDimitry Andric class kmp_hw_subset_t { 778*fe6060f1SDimitry Andric public: 779*fe6060f1SDimitry Andric struct item_t { 780*fe6060f1SDimitry Andric int num; 781*fe6060f1SDimitry Andric kmp_hw_t type; 782*fe6060f1SDimitry Andric int offset; 783*fe6060f1SDimitry Andric }; 784*fe6060f1SDimitry Andric 785*fe6060f1SDimitry Andric private: 786*fe6060f1SDimitry Andric int depth; 787*fe6060f1SDimitry Andric int capacity; 788*fe6060f1SDimitry Andric item_t *items; 789*fe6060f1SDimitry Andric kmp_uint64 set; 790*fe6060f1SDimitry Andric bool absolute; 791*fe6060f1SDimitry Andric // The set must be able to handle up to KMP_HW_LAST number of layers 792*fe6060f1SDimitry Andric KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST); 793*fe6060f1SDimitry Andric 794*fe6060f1SDimitry Andric public: 795*fe6060f1SDimitry Andric // Force use of allocate()/deallocate() 796*fe6060f1SDimitry Andric kmp_hw_subset_t() = delete; 797*fe6060f1SDimitry Andric kmp_hw_subset_t(const kmp_hw_subset_t &t) = delete; 798*fe6060f1SDimitry Andric kmp_hw_subset_t(kmp_hw_subset_t &&t) = delete; 799*fe6060f1SDimitry Andric kmp_hw_subset_t &operator=(const kmp_hw_subset_t &t) = delete; 800*fe6060f1SDimitry Andric kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) = delete; 801*fe6060f1SDimitry Andric 802*fe6060f1SDimitry Andric static kmp_hw_subset_t *allocate() { 803*fe6060f1SDimitry Andric int initial_capacity = 5; 804*fe6060f1SDimitry Andric kmp_hw_subset_t *retval = 805*fe6060f1SDimitry Andric (kmp_hw_subset_t *)__kmp_allocate(sizeof(kmp_hw_subset_t)); 806*fe6060f1SDimitry Andric retval->depth = 0; 807*fe6060f1SDimitry Andric retval->capacity = initial_capacity; 808*fe6060f1SDimitry Andric retval->set = 0ull; 809*fe6060f1SDimitry Andric retval->absolute = false; 810*fe6060f1SDimitry Andric retval->items = (item_t *)__kmp_allocate(sizeof(item_t) * initial_capacity); 811*fe6060f1SDimitry Andric return retval; 812*fe6060f1SDimitry Andric } 813*fe6060f1SDimitry Andric static void deallocate(kmp_hw_subset_t *subset) { 814*fe6060f1SDimitry Andric __kmp_free(subset->items); 815*fe6060f1SDimitry Andric __kmp_free(subset); 816*fe6060f1SDimitry Andric } 817*fe6060f1SDimitry Andric void set_absolute() { absolute = true; } 818*fe6060f1SDimitry Andric bool is_absolute() const { return absolute; } 819*fe6060f1SDimitry Andric void push_back(int num, kmp_hw_t type, int offset) { 820*fe6060f1SDimitry Andric if (depth == capacity - 1) { 821*fe6060f1SDimitry Andric capacity *= 2; 822*fe6060f1SDimitry Andric item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity); 823*fe6060f1SDimitry Andric for (int i = 0; i < depth; ++i) 824*fe6060f1SDimitry Andric new_items[i] = items[i]; 825*fe6060f1SDimitry Andric __kmp_free(items); 826*fe6060f1SDimitry Andric items = new_items; 827*fe6060f1SDimitry Andric } 828*fe6060f1SDimitry Andric items[depth].num = num; 829*fe6060f1SDimitry Andric items[depth].type = type; 830*fe6060f1SDimitry Andric items[depth].offset = offset; 831*fe6060f1SDimitry Andric depth++; 832*fe6060f1SDimitry Andric set |= (1ull << type); 833*fe6060f1SDimitry Andric } 834*fe6060f1SDimitry Andric int get_depth() const { return depth; } 835*fe6060f1SDimitry Andric const item_t &at(int index) const { 836*fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(index >= 0 && index < depth); 837*fe6060f1SDimitry Andric return items[index]; 838*fe6060f1SDimitry Andric } 839*fe6060f1SDimitry Andric item_t &at(int index) { 840*fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(index >= 0 && index < depth); 841*fe6060f1SDimitry Andric return items[index]; 842*fe6060f1SDimitry Andric } 843*fe6060f1SDimitry Andric void remove(int index) { 844*fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(index >= 0 && index < depth); 845*fe6060f1SDimitry Andric set &= ~(1ull << items[index].type); 846*fe6060f1SDimitry Andric for (int j = index + 1; j < depth; ++j) { 847*fe6060f1SDimitry Andric items[j - 1] = items[j]; 848*fe6060f1SDimitry Andric } 849*fe6060f1SDimitry Andric depth--; 850*fe6060f1SDimitry Andric } 851*fe6060f1SDimitry Andric bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); } 852*fe6060f1SDimitry Andric void dump() const { 853*fe6060f1SDimitry Andric printf("**********************\n"); 854*fe6060f1SDimitry Andric printf("*** kmp_hw_subset: ***\n"); 855*fe6060f1SDimitry Andric printf("* depth: %d\n", depth); 856*fe6060f1SDimitry Andric printf("* items:\n"); 857*fe6060f1SDimitry Andric for (int i = 0; i < depth; ++i) { 858*fe6060f1SDimitry Andric printf("num: %d, type: %s, offset: %d\n", items[i].num, 859*fe6060f1SDimitry Andric __kmp_hw_get_keyword(items[i].type), items[i].offset); 860*fe6060f1SDimitry Andric } 861*fe6060f1SDimitry Andric printf("* set: 0x%llx\n", set); 862*fe6060f1SDimitry Andric printf("* absolute: %d\n", absolute); 863*fe6060f1SDimitry Andric printf("**********************\n"); 864*fe6060f1SDimitry Andric } 865*fe6060f1SDimitry Andric }; 866*fe6060f1SDimitry Andric 867*fe6060f1SDimitry Andric extern kmp_topology_t *__kmp_topology; 868*fe6060f1SDimitry Andric extern kmp_hw_subset_t *__kmp_hw_subset; 8690b57cec5SDimitry Andric 8700b57cec5SDimitry Andric /* A structure for holding machine-specific hierarchy info to be computed once 8710b57cec5SDimitry Andric at init. This structure represents a mapping of threads to the actual machine 8720b57cec5SDimitry Andric hierarchy, or to our best guess at what the hierarchy might be, for the 8730b57cec5SDimitry Andric purpose of performing an efficient barrier. In the worst case, when there is 8740b57cec5SDimitry Andric no machine hierarchy information, it produces a tree suitable for a barrier, 8750b57cec5SDimitry Andric similar to the tree used in the hyper barrier. */ 8760b57cec5SDimitry Andric class hierarchy_info { 8770b57cec5SDimitry Andric public: 8780b57cec5SDimitry Andric /* Good default values for number of leaves and branching factor, given no 8790b57cec5SDimitry Andric affinity information. Behaves a bit like hyper barrier. */ 8800b57cec5SDimitry Andric static const kmp_uint32 maxLeaves = 4; 8810b57cec5SDimitry Andric static const kmp_uint32 minBranch = 4; 8820b57cec5SDimitry Andric /** Number of levels in the hierarchy. Typical levels are threads/core, 8830b57cec5SDimitry Andric cores/package or socket, packages/node, nodes/machine, etc. We don't want 8840b57cec5SDimitry Andric to get specific with nomenclature. When the machine is oversubscribed we 8850b57cec5SDimitry Andric add levels to duplicate the hierarchy, doubling the thread capacity of the 8860b57cec5SDimitry Andric hierarchy each time we add a level. */ 8870b57cec5SDimitry Andric kmp_uint32 maxLevels; 8880b57cec5SDimitry Andric 8890b57cec5SDimitry Andric /** This is specifically the depth of the machine configuration hierarchy, in 8900b57cec5SDimitry Andric terms of the number of levels along the longest path from root to any 8910b57cec5SDimitry Andric leaf. It corresponds to the number of entries in numPerLevel if we exclude 8920b57cec5SDimitry Andric all but one trailing 1. */ 8930b57cec5SDimitry Andric kmp_uint32 depth; 8940b57cec5SDimitry Andric kmp_uint32 base_num_threads; 8950b57cec5SDimitry Andric enum init_status { initialized = 0, not_initialized = 1, initializing = 2 }; 8960b57cec5SDimitry Andric volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 8970b57cec5SDimitry Andric // 2=initialization in progress 8980b57cec5SDimitry Andric volatile kmp_int8 resizing; // 0=not resizing, 1=resizing 8990b57cec5SDimitry Andric 9000b57cec5SDimitry Andric /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children 9010b57cec5SDimitry Andric the parent of a node at level i has. For example, if we have a machine 9020b57cec5SDimitry Andric with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel = 9030b57cec5SDimitry Andric {2, 4, 4, 1, 1}. All empty levels are set to 1. */ 9040b57cec5SDimitry Andric kmp_uint32 *numPerLevel; 9050b57cec5SDimitry Andric kmp_uint32 *skipPerLevel; 9060b57cec5SDimitry Andric 907*fe6060f1SDimitry Andric void deriveLevels() { 908*fe6060f1SDimitry Andric int hier_depth = __kmp_topology->get_depth(); 909*fe6060f1SDimitry Andric for (int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) { 910*fe6060f1SDimitry Andric numPerLevel[level] = __kmp_topology->get_ratio(i); 9110b57cec5SDimitry Andric } 9120b57cec5SDimitry Andric } 9130b57cec5SDimitry Andric 9140b57cec5SDimitry Andric hierarchy_info() 9150b57cec5SDimitry Andric : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} 9160b57cec5SDimitry Andric 9170b57cec5SDimitry Andric void fini() { 9180b57cec5SDimitry Andric if (!uninitialized && numPerLevel) { 9190b57cec5SDimitry Andric __kmp_free(numPerLevel); 9200b57cec5SDimitry Andric numPerLevel = NULL; 9210b57cec5SDimitry Andric uninitialized = not_initialized; 9220b57cec5SDimitry Andric } 9230b57cec5SDimitry Andric } 9240b57cec5SDimitry Andric 925*fe6060f1SDimitry Andric void init(int num_addrs) { 9260b57cec5SDimitry Andric kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8( 9270b57cec5SDimitry Andric &uninitialized, not_initialized, initializing); 9280b57cec5SDimitry Andric if (bool_result == 0) { // Wait for initialization 9290b57cec5SDimitry Andric while (TCR_1(uninitialized) != initialized) 9300b57cec5SDimitry Andric KMP_CPU_PAUSE(); 9310b57cec5SDimitry Andric return; 9320b57cec5SDimitry Andric } 9330b57cec5SDimitry Andric KMP_DEBUG_ASSERT(bool_result == 1); 9340b57cec5SDimitry Andric 9350b57cec5SDimitry Andric /* Added explicit initialization of the data fields here to prevent usage of 9360b57cec5SDimitry Andric dirty value observed when static library is re-initialized multiple times 9370b57cec5SDimitry Andric (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses 9380b57cec5SDimitry Andric OpenMP). */ 9390b57cec5SDimitry Andric depth = 1; 9400b57cec5SDimitry Andric resizing = 0; 9410b57cec5SDimitry Andric maxLevels = 7; 9420b57cec5SDimitry Andric numPerLevel = 9430b57cec5SDimitry Andric (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 9440b57cec5SDimitry Andric skipPerLevel = &(numPerLevel[maxLevels]); 9450b57cec5SDimitry Andric for (kmp_uint32 i = 0; i < maxLevels; 9460b57cec5SDimitry Andric ++i) { // init numPerLevel[*] to 1 item per level 9470b57cec5SDimitry Andric numPerLevel[i] = 1; 9480b57cec5SDimitry Andric skipPerLevel[i] = 1; 9490b57cec5SDimitry Andric } 9500b57cec5SDimitry Andric 9510b57cec5SDimitry Andric // Sort table by physical ID 952*fe6060f1SDimitry Andric if (__kmp_topology && __kmp_topology->get_depth() > 0) { 953*fe6060f1SDimitry Andric deriveLevels(); 9540b57cec5SDimitry Andric } else { 9550b57cec5SDimitry Andric numPerLevel[0] = maxLeaves; 9560b57cec5SDimitry Andric numPerLevel[1] = num_addrs / maxLeaves; 9570b57cec5SDimitry Andric if (num_addrs % maxLeaves) 9580b57cec5SDimitry Andric numPerLevel[1]++; 9590b57cec5SDimitry Andric } 9600b57cec5SDimitry Andric 9610b57cec5SDimitry Andric base_num_threads = num_addrs; 9620b57cec5SDimitry Andric for (int i = maxLevels - 1; i >= 0; 9630b57cec5SDimitry Andric --i) // count non-empty levels to get depth 9640b57cec5SDimitry Andric if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' 9650b57cec5SDimitry Andric depth++; 9660b57cec5SDimitry Andric 9670b57cec5SDimitry Andric kmp_uint32 branch = minBranch; 9680b57cec5SDimitry Andric if (numPerLevel[0] == 1) 9690b57cec5SDimitry Andric branch = num_addrs / maxLeaves; 9700b57cec5SDimitry Andric if (branch < minBranch) 9710b57cec5SDimitry Andric branch = minBranch; 9720b57cec5SDimitry Andric for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width 9730b57cec5SDimitry Andric while (numPerLevel[d] > branch || 9740b57cec5SDimitry Andric (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0! 9750b57cec5SDimitry Andric if (numPerLevel[d] & 1) 9760b57cec5SDimitry Andric numPerLevel[d]++; 9770b57cec5SDimitry Andric numPerLevel[d] = numPerLevel[d] >> 1; 9780b57cec5SDimitry Andric if (numPerLevel[d + 1] == 1) 9790b57cec5SDimitry Andric depth++; 9800b57cec5SDimitry Andric numPerLevel[d + 1] = numPerLevel[d + 1] << 1; 9810b57cec5SDimitry Andric } 9820b57cec5SDimitry Andric if (numPerLevel[0] == 1) { 9830b57cec5SDimitry Andric branch = branch >> 1; 9840b57cec5SDimitry Andric if (branch < 4) 9850b57cec5SDimitry Andric branch = minBranch; 9860b57cec5SDimitry Andric } 9870b57cec5SDimitry Andric } 9880b57cec5SDimitry Andric 9890b57cec5SDimitry Andric for (kmp_uint32 i = 1; i < depth; ++i) 9900b57cec5SDimitry Andric skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1]; 9910b57cec5SDimitry Andric // Fill in hierarchy in the case of oversubscription 9920b57cec5SDimitry Andric for (kmp_uint32 i = depth; i < maxLevels; ++i) 9930b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 9940b57cec5SDimitry Andric 9950b57cec5SDimitry Andric uninitialized = initialized; // One writer 9960b57cec5SDimitry Andric } 9970b57cec5SDimitry Andric 9980b57cec5SDimitry Andric // Resize the hierarchy if nproc changes to something larger than before 9990b57cec5SDimitry Andric void resize(kmp_uint32 nproc) { 10000b57cec5SDimitry Andric kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 10010b57cec5SDimitry Andric while (bool_result == 0) { // someone else is trying to resize 10020b57cec5SDimitry Andric KMP_CPU_PAUSE(); 10030b57cec5SDimitry Andric if (nproc <= base_num_threads) // happy with other thread's resize 10040b57cec5SDimitry Andric return; 10050b57cec5SDimitry Andric else // try to resize 10060b57cec5SDimitry Andric bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 10070b57cec5SDimitry Andric } 10080b57cec5SDimitry Andric KMP_DEBUG_ASSERT(bool_result != 0); 10090b57cec5SDimitry Andric if (nproc <= base_num_threads) 10100b57cec5SDimitry Andric return; // happy with other thread's resize 10110b57cec5SDimitry Andric 10120b57cec5SDimitry Andric // Calculate new maxLevels 10130b57cec5SDimitry Andric kmp_uint32 old_sz = skipPerLevel[depth - 1]; 10140b57cec5SDimitry Andric kmp_uint32 incs = 0, old_maxLevels = maxLevels; 10150b57cec5SDimitry Andric // First see if old maxLevels is enough to contain new size 10160b57cec5SDimitry Andric for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) { 10170b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 10180b57cec5SDimitry Andric numPerLevel[i - 1] *= 2; 10190b57cec5SDimitry Andric old_sz *= 2; 10200b57cec5SDimitry Andric depth++; 10210b57cec5SDimitry Andric } 10220b57cec5SDimitry Andric if (nproc > old_sz) { // Not enough space, need to expand hierarchy 10230b57cec5SDimitry Andric while (nproc > old_sz) { 10240b57cec5SDimitry Andric old_sz *= 2; 10250b57cec5SDimitry Andric incs++; 10260b57cec5SDimitry Andric depth++; 10270b57cec5SDimitry Andric } 10280b57cec5SDimitry Andric maxLevels += incs; 10290b57cec5SDimitry Andric 10300b57cec5SDimitry Andric // Resize arrays 10310b57cec5SDimitry Andric kmp_uint32 *old_numPerLevel = numPerLevel; 10320b57cec5SDimitry Andric kmp_uint32 *old_skipPerLevel = skipPerLevel; 10330b57cec5SDimitry Andric numPerLevel = skipPerLevel = NULL; 10340b57cec5SDimitry Andric numPerLevel = 10350b57cec5SDimitry Andric (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 10360b57cec5SDimitry Andric skipPerLevel = &(numPerLevel[maxLevels]); 10370b57cec5SDimitry Andric 10380b57cec5SDimitry Andric // Copy old elements from old arrays 1039e8d8bef9SDimitry Andric for (kmp_uint32 i = 0; i < old_maxLevels; ++i) { 1040e8d8bef9SDimitry Andric // init numPerLevel[*] to 1 item per level 10410b57cec5SDimitry Andric numPerLevel[i] = old_numPerLevel[i]; 10420b57cec5SDimitry Andric skipPerLevel[i] = old_skipPerLevel[i]; 10430b57cec5SDimitry Andric } 10440b57cec5SDimitry Andric 10450b57cec5SDimitry Andric // Init new elements in arrays to 1 1046e8d8bef9SDimitry Andric for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) { 1047e8d8bef9SDimitry Andric // init numPerLevel[*] to 1 item per level 10480b57cec5SDimitry Andric numPerLevel[i] = 1; 10490b57cec5SDimitry Andric skipPerLevel[i] = 1; 10500b57cec5SDimitry Andric } 10510b57cec5SDimitry Andric 10520b57cec5SDimitry Andric // Free old arrays 10530b57cec5SDimitry Andric __kmp_free(old_numPerLevel); 10540b57cec5SDimitry Andric } 10550b57cec5SDimitry Andric 10560b57cec5SDimitry Andric // Fill in oversubscription levels of hierarchy 10570b57cec5SDimitry Andric for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) 10580b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 10590b57cec5SDimitry Andric 10600b57cec5SDimitry Andric base_num_threads = nproc; 10610b57cec5SDimitry Andric resizing = 0; // One writer 10620b57cec5SDimitry Andric } 10630b57cec5SDimitry Andric }; 10640b57cec5SDimitry Andric #endif // KMP_AFFINITY_H 1065