10b57cec5SDimitry Andric /* 20b57cec5SDimitry Andric * kmp_affinity.h -- header for affinity management 30b57cec5SDimitry Andric */ 40b57cec5SDimitry Andric 50b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #ifndef KMP_AFFINITY_H 140b57cec5SDimitry Andric #define KMP_AFFINITY_H 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric #include "kmp.h" 170b57cec5SDimitry Andric #include "kmp_os.h" 18*0eae32dcSDimitry Andric #include <limits> 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 210b57cec5SDimitry Andric #if KMP_USE_HWLOC 220b57cec5SDimitry Andric class KMPHwlocAffinity : public KMPAffinity { 230b57cec5SDimitry Andric public: 240b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 250b57cec5SDimitry Andric hwloc_cpuset_t mask; 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric public: 280b57cec5SDimitry Andric Mask() { 290b57cec5SDimitry Andric mask = hwloc_bitmap_alloc(); 300b57cec5SDimitry Andric this->zero(); 310b57cec5SDimitry Andric } 320b57cec5SDimitry Andric ~Mask() { hwloc_bitmap_free(mask); } 330b57cec5SDimitry Andric void set(int i) override { hwloc_bitmap_set(mask, i); } 340b57cec5SDimitry Andric bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } 350b57cec5SDimitry Andric void clear(int i) override { hwloc_bitmap_clr(mask, i); } 360b57cec5SDimitry Andric void zero() override { hwloc_bitmap_zero(mask); } 370b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 380b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 390b57cec5SDimitry Andric hwloc_bitmap_copy(mask, convert->mask); 400b57cec5SDimitry Andric } 410b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 420b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 430b57cec5SDimitry Andric hwloc_bitmap_and(mask, mask, convert->mask); 440b57cec5SDimitry Andric } 450b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 460b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 470b57cec5SDimitry Andric hwloc_bitmap_or(mask, mask, convert->mask); 480b57cec5SDimitry Andric } 490b57cec5SDimitry Andric void bitwise_not() override { hwloc_bitmap_not(mask, mask); } 500b57cec5SDimitry Andric int begin() const override { return hwloc_bitmap_first(mask); } 510b57cec5SDimitry Andric int end() const override { return -1; } 520b57cec5SDimitry Andric int next(int previous) const override { 530b57cec5SDimitry Andric return hwloc_bitmap_next(mask, previous); 540b57cec5SDimitry Andric } 550b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 560b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 570b57cec5SDimitry Andric "Illegal get affinity operation when not capable"); 58e8d8bef9SDimitry Andric long retval = 590b57cec5SDimitry Andric hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 600b57cec5SDimitry Andric if (retval >= 0) { 610b57cec5SDimitry Andric return 0; 620b57cec5SDimitry Andric } 630b57cec5SDimitry Andric int error = errno; 640b57cec5SDimitry Andric if (abort_on_error) { 650b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 660b57cec5SDimitry Andric } 670b57cec5SDimitry Andric return error; 680b57cec5SDimitry Andric } 690b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 700b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 71e8d8bef9SDimitry Andric "Illegal set affinity operation when not capable"); 72e8d8bef9SDimitry Andric long retval = 730b57cec5SDimitry Andric hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 740b57cec5SDimitry Andric if (retval >= 0) { 750b57cec5SDimitry Andric return 0; 760b57cec5SDimitry Andric } 770b57cec5SDimitry Andric int error = errno; 780b57cec5SDimitry Andric if (abort_on_error) { 790b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 800b57cec5SDimitry Andric } 810b57cec5SDimitry Andric return error; 820b57cec5SDimitry Andric } 83e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS 84e8d8bef9SDimitry Andric int set_process_affinity(bool abort_on_error) const override { 85e8d8bef9SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 86e8d8bef9SDimitry Andric "Illegal set process affinity operation when not capable"); 87e8d8bef9SDimitry Andric int error = 0; 88e8d8bef9SDimitry Andric const hwloc_topology_support *support = 89e8d8bef9SDimitry Andric hwloc_topology_get_support(__kmp_hwloc_topology); 90e8d8bef9SDimitry Andric if (support->cpubind->set_proc_cpubind) { 91e8d8bef9SDimitry Andric int retval; 92e8d8bef9SDimitry Andric retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, 93e8d8bef9SDimitry Andric HWLOC_CPUBIND_PROCESS); 94e8d8bef9SDimitry Andric if (retval >= 0) 95e8d8bef9SDimitry Andric return 0; 96e8d8bef9SDimitry Andric error = errno; 97e8d8bef9SDimitry Andric if (abort_on_error) 98e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 99e8d8bef9SDimitry Andric } 100e8d8bef9SDimitry Andric return error; 101e8d8bef9SDimitry Andric } 102e8d8bef9SDimitry Andric #endif 1030b57cec5SDimitry Andric int get_proc_group() const override { 1040b57cec5SDimitry Andric int group = -1; 1050b57cec5SDimitry Andric #if KMP_OS_WINDOWS 1060b57cec5SDimitry Andric if (__kmp_num_proc_groups == 1) { 1070b57cec5SDimitry Andric return 1; 1080b57cec5SDimitry Andric } 1090b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; i++) { 1100b57cec5SDimitry Andric // On windows, the long type is always 32 bits 1110b57cec5SDimitry Andric unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2); 1120b57cec5SDimitry Andric unsigned long second_32_bits = 1130b57cec5SDimitry Andric hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1); 1140b57cec5SDimitry Andric if (first_32_bits == 0 && second_32_bits == 0) { 1150b57cec5SDimitry Andric continue; 1160b57cec5SDimitry Andric } 1170b57cec5SDimitry Andric if (group >= 0) { 1180b57cec5SDimitry Andric return -1; 1190b57cec5SDimitry Andric } 1200b57cec5SDimitry Andric group = i; 1210b57cec5SDimitry Andric } 1220b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 1230b57cec5SDimitry Andric return group; 1240b57cec5SDimitry Andric } 1250b57cec5SDimitry Andric }; 1260b57cec5SDimitry Andric void determine_capable(const char *var) override { 1270b57cec5SDimitry Andric const hwloc_topology_support *topology_support; 1280b57cec5SDimitry Andric if (__kmp_hwloc_topology == NULL) { 1290b57cec5SDimitry Andric if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) { 1300b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 1310b57cec5SDimitry Andric if (__kmp_affinity_verbose) 1320b57cec5SDimitry Andric KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); 1330b57cec5SDimitry Andric } 1340b57cec5SDimitry Andric if (hwloc_topology_load(__kmp_hwloc_topology) < 0) { 1350b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 1360b57cec5SDimitry Andric if (__kmp_affinity_verbose) 1370b57cec5SDimitry Andric KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); 1380b57cec5SDimitry Andric } 1390b57cec5SDimitry Andric } 1400b57cec5SDimitry Andric topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); 1410b57cec5SDimitry Andric // Is the system capable of setting/getting this thread's affinity? 1420b57cec5SDimitry Andric // Also, is topology discovery possible? (pu indicates ability to discover 1430b57cec5SDimitry Andric // processing units). And finally, were there no errors when calling any 1440b57cec5SDimitry Andric // hwloc_* API functions? 1450b57cec5SDimitry Andric if (topology_support && topology_support->cpubind->set_thisthread_cpubind && 1460b57cec5SDimitry Andric topology_support->cpubind->get_thisthread_cpubind && 1470b57cec5SDimitry Andric topology_support->discovery->pu && !__kmp_hwloc_error) { 1480b57cec5SDimitry Andric // enables affinity according to KMP_AFFINITY_CAPABLE() macro 1490b57cec5SDimitry Andric KMP_AFFINITY_ENABLE(TRUE); 1500b57cec5SDimitry Andric } else { 1510b57cec5SDimitry Andric // indicate that hwloc didn't work and disable affinity 1520b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 1530b57cec5SDimitry Andric KMP_AFFINITY_DISABLE(); 1540b57cec5SDimitry Andric } 1550b57cec5SDimitry Andric } 1560b57cec5SDimitry Andric void bind_thread(int which) override { 1570b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 1580b57cec5SDimitry Andric "Illegal set affinity operation when not capable"); 1590b57cec5SDimitry Andric KMPAffinity::Mask *mask; 1600b57cec5SDimitry Andric KMP_CPU_ALLOC_ON_STACK(mask); 1610b57cec5SDimitry Andric KMP_CPU_ZERO(mask); 1620b57cec5SDimitry Andric KMP_CPU_SET(which, mask); 1630b57cec5SDimitry Andric __kmp_set_system_affinity(mask, TRUE); 1640b57cec5SDimitry Andric KMP_CPU_FREE_FROM_STACK(mask); 1650b57cec5SDimitry Andric } 1660b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 1670b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 1680b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 1690b57cec5SDimitry Andric return new Mask[num]; 1700b57cec5SDimitry Andric } 1710b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 1720b57cec5SDimitry Andric Mask *hwloc_array = static_cast<Mask *>(array); 1730b57cec5SDimitry Andric delete[] hwloc_array; 1740b57cec5SDimitry Andric } 1750b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 1760b57cec5SDimitry Andric int index) override { 1770b57cec5SDimitry Andric Mask *hwloc_array = static_cast<Mask *>(array); 1780b57cec5SDimitry Andric return &(hwloc_array[index]); 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric api_type get_api_type() const override { return HWLOC; } 1810b57cec5SDimitry Andric }; 1820b57cec5SDimitry Andric #endif /* KMP_USE_HWLOC */ 1830b57cec5SDimitry Andric 184489b1cf2SDimitry Andric #if KMP_OS_LINUX || KMP_OS_FREEBSD 1850b57cec5SDimitry Andric #if KMP_OS_LINUX 1860b57cec5SDimitry Andric /* On some of the older OS's that we build on, these constants aren't present 1870b57cec5SDimitry Andric in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on 1880b57cec5SDimitry Andric all systems of the same arch where they are defined, and they cannot change. 1890b57cec5SDimitry Andric stone forever. */ 1900b57cec5SDimitry Andric #include <sys/syscall.h> 1910b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_ARM 1920b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 1930b57cec5SDimitry Andric #define __NR_sched_setaffinity 241 1940b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 241 1950b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 1960b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 1970b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 1980b57cec5SDimitry Andric #define __NR_sched_getaffinity 242 1990b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 242 2000b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2010b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2020b57cec5SDimitry Andric #elif KMP_ARCH_AARCH64 2030b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2040b57cec5SDimitry Andric #define __NR_sched_setaffinity 122 2050b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 122 2060b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2070b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2080b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2090b57cec5SDimitry Andric #define __NR_sched_getaffinity 123 2100b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 123 2110b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2120b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2130b57cec5SDimitry Andric #elif KMP_ARCH_X86_64 2140b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2150b57cec5SDimitry Andric #define __NR_sched_setaffinity 203 2160b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 203 2170b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2180b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2190b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2200b57cec5SDimitry Andric #define __NR_sched_getaffinity 204 2210b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 204 2220b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2230b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2240b57cec5SDimitry Andric #elif KMP_ARCH_PPC64 2250b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2260b57cec5SDimitry Andric #define __NR_sched_setaffinity 222 2270b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 222 2280b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2290b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2300b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2310b57cec5SDimitry Andric #define __NR_sched_getaffinity 223 2320b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 223 2330b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2340b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2350b57cec5SDimitry Andric #elif KMP_ARCH_MIPS 2360b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2370b57cec5SDimitry Andric #define __NR_sched_setaffinity 4239 2380b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 4239 2390b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2400b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2410b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2420b57cec5SDimitry Andric #define __NR_sched_getaffinity 4240 2430b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 4240 2440b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2450b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2460b57cec5SDimitry Andric #elif KMP_ARCH_MIPS64 2470b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2480b57cec5SDimitry Andric #define __NR_sched_setaffinity 5195 2490b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 5195 2500b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2510b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2520b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2530b57cec5SDimitry Andric #define __NR_sched_getaffinity 5196 2540b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 5196 2550b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2560b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2570b57cec5SDimitry Andric #error Unknown or unsupported architecture 2580b57cec5SDimitry Andric #endif /* KMP_ARCH_* */ 259489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD 260489b1cf2SDimitry Andric #include <pthread.h> 261489b1cf2SDimitry Andric #include <pthread_np.h> 262489b1cf2SDimitry Andric #endif 2630b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity { 2640b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 265e8d8bef9SDimitry Andric typedef unsigned long mask_t; 266e8d8bef9SDimitry Andric typedef decltype(__kmp_affin_mask_size) mask_size_type; 267e8d8bef9SDimitry Andric static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 268e8d8bef9SDimitry Andric static const mask_t ONE = 1; 269e8d8bef9SDimitry Andric mask_size_type get_num_mask_types() const { 270e8d8bef9SDimitry Andric return __kmp_affin_mask_size / sizeof(mask_t); 271e8d8bef9SDimitry Andric } 2720b57cec5SDimitry Andric 2730b57cec5SDimitry Andric public: 2740b57cec5SDimitry Andric mask_t *mask; 2750b57cec5SDimitry Andric Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); } 2760b57cec5SDimitry Andric ~Mask() { 2770b57cec5SDimitry Andric if (mask) 2780b57cec5SDimitry Andric __kmp_free(mask); 2790b57cec5SDimitry Andric } 2800b57cec5SDimitry Andric void set(int i) override { 281e8d8bef9SDimitry Andric mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T)); 2820b57cec5SDimitry Andric } 2830b57cec5SDimitry Andric bool is_set(int i) const override { 284e8d8bef9SDimitry Andric return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T))); 2850b57cec5SDimitry Andric } 2860b57cec5SDimitry Andric void clear(int i) override { 287e8d8bef9SDimitry Andric mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T)); 2880b57cec5SDimitry Andric } 2890b57cec5SDimitry Andric void zero() override { 290e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 291e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 292e8d8bef9SDimitry Andric mask[i] = (mask_t)0; 2930b57cec5SDimitry Andric } 2940b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 2950b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 296e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 297e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 2980b57cec5SDimitry Andric mask[i] = convert->mask[i]; 2990b57cec5SDimitry Andric } 3000b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 3010b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 302e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 303e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 3040b57cec5SDimitry Andric mask[i] &= convert->mask[i]; 3050b57cec5SDimitry Andric } 3060b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 3070b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 308e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 309e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 3100b57cec5SDimitry Andric mask[i] |= convert->mask[i]; 3110b57cec5SDimitry Andric } 3120b57cec5SDimitry Andric void bitwise_not() override { 313e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 314e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 3150b57cec5SDimitry Andric mask[i] = ~(mask[i]); 3160b57cec5SDimitry Andric } 3170b57cec5SDimitry Andric int begin() const override { 3180b57cec5SDimitry Andric int retval = 0; 3190b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 3200b57cec5SDimitry Andric ++retval; 3210b57cec5SDimitry Andric return retval; 3220b57cec5SDimitry Andric } 323e8d8bef9SDimitry Andric int end() const override { 324e8d8bef9SDimitry Andric int e; 325e8d8bef9SDimitry Andric __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e); 326e8d8bef9SDimitry Andric return e; 327e8d8bef9SDimitry Andric } 3280b57cec5SDimitry Andric int next(int previous) const override { 3290b57cec5SDimitry Andric int retval = previous + 1; 3300b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 3310b57cec5SDimitry Andric ++retval; 3320b57cec5SDimitry Andric return retval; 3330b57cec5SDimitry Andric } 3340b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 3350b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 3360b57cec5SDimitry Andric "Illegal get affinity operation when not capable"); 337489b1cf2SDimitry Andric #if KMP_OS_LINUX 338e8d8bef9SDimitry Andric long retval = 3390b57cec5SDimitry Andric syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask); 340489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD 341fe6060f1SDimitry Andric int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, 342fe6060f1SDimitry Andric reinterpret_cast<cpuset_t *>(mask)); 3435ffd83dbSDimitry Andric int retval = (r == 0 ? 0 : -1); 344489b1cf2SDimitry Andric #endif 3450b57cec5SDimitry Andric if (retval >= 0) { 3460b57cec5SDimitry Andric return 0; 3470b57cec5SDimitry Andric } 3480b57cec5SDimitry Andric int error = errno; 3490b57cec5SDimitry Andric if (abort_on_error) { 3500b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 3510b57cec5SDimitry Andric } 3520b57cec5SDimitry Andric return error; 3530b57cec5SDimitry Andric } 3540b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 3550b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 356e8d8bef9SDimitry Andric "Illegal set affinity operation when not capable"); 357489b1cf2SDimitry Andric #if KMP_OS_LINUX 358e8d8bef9SDimitry Andric long retval = 3590b57cec5SDimitry Andric syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask); 360489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD 361fe6060f1SDimitry Andric int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, 362fe6060f1SDimitry Andric reinterpret_cast<cpuset_t *>(mask)); 3635ffd83dbSDimitry Andric int retval = (r == 0 ? 0 : -1); 364489b1cf2SDimitry Andric #endif 3650b57cec5SDimitry Andric if (retval >= 0) { 3660b57cec5SDimitry Andric return 0; 3670b57cec5SDimitry Andric } 3680b57cec5SDimitry Andric int error = errno; 3690b57cec5SDimitry Andric if (abort_on_error) { 3700b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 3710b57cec5SDimitry Andric } 3720b57cec5SDimitry Andric return error; 3730b57cec5SDimitry Andric } 3740b57cec5SDimitry Andric }; 3750b57cec5SDimitry Andric void determine_capable(const char *env_var) override { 3760b57cec5SDimitry Andric __kmp_affinity_determine_capable(env_var); 3770b57cec5SDimitry Andric } 3780b57cec5SDimitry Andric void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 3790b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { 3800b57cec5SDimitry Andric KMPNativeAffinity::Mask *retval = new Mask(); 3810b57cec5SDimitry Andric return retval; 3820b57cec5SDimitry Andric } 3830b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { 3840b57cec5SDimitry Andric KMPNativeAffinity::Mask *native_mask = 3850b57cec5SDimitry Andric static_cast<KMPNativeAffinity::Mask *>(m); 3860b57cec5SDimitry Andric delete native_mask; 3870b57cec5SDimitry Andric } 3880b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 3890b57cec5SDimitry Andric return new Mask[num]; 3900b57cec5SDimitry Andric } 3910b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 3920b57cec5SDimitry Andric Mask *linux_array = static_cast<Mask *>(array); 3930b57cec5SDimitry Andric delete[] linux_array; 3940b57cec5SDimitry Andric } 3950b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 3960b57cec5SDimitry Andric int index) override { 3970b57cec5SDimitry Andric Mask *linux_array = static_cast<Mask *>(array); 3980b57cec5SDimitry Andric return &(linux_array[index]); 3990b57cec5SDimitry Andric } 4000b57cec5SDimitry Andric api_type get_api_type() const override { return NATIVE_OS; } 4010b57cec5SDimitry Andric }; 402489b1cf2SDimitry Andric #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */ 4030b57cec5SDimitry Andric 4040b57cec5SDimitry Andric #if KMP_OS_WINDOWS 4050b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity { 4060b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 4070b57cec5SDimitry Andric typedef ULONG_PTR mask_t; 4080b57cec5SDimitry Andric static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 4090b57cec5SDimitry Andric mask_t *mask; 4100b57cec5SDimitry Andric 4110b57cec5SDimitry Andric public: 4120b57cec5SDimitry Andric Mask() { 4130b57cec5SDimitry Andric mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups); 4140b57cec5SDimitry Andric } 4150b57cec5SDimitry Andric ~Mask() { 4160b57cec5SDimitry Andric if (mask) 4170b57cec5SDimitry Andric __kmp_free(mask); 4180b57cec5SDimitry Andric } 4190b57cec5SDimitry Andric void set(int i) override { 4200b57cec5SDimitry Andric mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); 4210b57cec5SDimitry Andric } 4220b57cec5SDimitry Andric bool is_set(int i) const override { 4230b57cec5SDimitry Andric return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); 4240b57cec5SDimitry Andric } 4250b57cec5SDimitry Andric void clear(int i) override { 4260b57cec5SDimitry Andric mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); 4270b57cec5SDimitry Andric } 4280b57cec5SDimitry Andric void zero() override { 4290b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4300b57cec5SDimitry Andric mask[i] = 0; 4310b57cec5SDimitry Andric } 4320b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 4330b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 4340b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4350b57cec5SDimitry Andric mask[i] = convert->mask[i]; 4360b57cec5SDimitry Andric } 4370b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 4380b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 4390b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4400b57cec5SDimitry Andric mask[i] &= convert->mask[i]; 4410b57cec5SDimitry Andric } 4420b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 4430b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 4440b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4450b57cec5SDimitry Andric mask[i] |= convert->mask[i]; 4460b57cec5SDimitry Andric } 4470b57cec5SDimitry Andric void bitwise_not() override { 4480b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4490b57cec5SDimitry Andric mask[i] = ~(mask[i]); 4500b57cec5SDimitry Andric } 4510b57cec5SDimitry Andric int begin() const override { 4520b57cec5SDimitry Andric int retval = 0; 4530b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 4540b57cec5SDimitry Andric ++retval; 4550b57cec5SDimitry Andric return retval; 4560b57cec5SDimitry Andric } 4570b57cec5SDimitry Andric int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; } 4580b57cec5SDimitry Andric int next(int previous) const override { 4590b57cec5SDimitry Andric int retval = previous + 1; 4600b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 4610b57cec5SDimitry Andric ++retval; 4620b57cec5SDimitry Andric return retval; 4630b57cec5SDimitry Andric } 464e8d8bef9SDimitry Andric int set_process_affinity(bool abort_on_error) const override { 465e8d8bef9SDimitry Andric if (__kmp_num_proc_groups <= 1) { 466e8d8bef9SDimitry Andric if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) { 467e8d8bef9SDimitry Andric DWORD error = GetLastError(); 468e8d8bef9SDimitry Andric if (abort_on_error) { 469e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 470e8d8bef9SDimitry Andric __kmp_msg_null); 471e8d8bef9SDimitry Andric } 472e8d8bef9SDimitry Andric return error; 473e8d8bef9SDimitry Andric } 474e8d8bef9SDimitry Andric } 475e8d8bef9SDimitry Andric return 0; 476e8d8bef9SDimitry Andric } 4770b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 4780b57cec5SDimitry Andric if (__kmp_num_proc_groups > 1) { 4790b57cec5SDimitry Andric // Check for a valid mask. 4800b57cec5SDimitry Andric GROUP_AFFINITY ga; 4810b57cec5SDimitry Andric int group = get_proc_group(); 4820b57cec5SDimitry Andric if (group < 0) { 4830b57cec5SDimitry Andric if (abort_on_error) { 4840b57cec5SDimitry Andric KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); 4850b57cec5SDimitry Andric } 4860b57cec5SDimitry Andric return -1; 4870b57cec5SDimitry Andric } 4880b57cec5SDimitry Andric // Transform the bit vector into a GROUP_AFFINITY struct 4890b57cec5SDimitry Andric // and make the system call to set affinity. 4900b57cec5SDimitry Andric ga.Group = group; 4910b57cec5SDimitry Andric ga.Mask = mask[group]; 4920b57cec5SDimitry Andric ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; 4930b57cec5SDimitry Andric 4940b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); 4950b57cec5SDimitry Andric if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { 4960b57cec5SDimitry Andric DWORD error = GetLastError(); 4970b57cec5SDimitry Andric if (abort_on_error) { 4980b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 4990b57cec5SDimitry Andric __kmp_msg_null); 5000b57cec5SDimitry Andric } 5010b57cec5SDimitry Andric return error; 5020b57cec5SDimitry Andric } 5030b57cec5SDimitry Andric } else { 5040b57cec5SDimitry Andric if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) { 5050b57cec5SDimitry Andric DWORD error = GetLastError(); 5060b57cec5SDimitry Andric if (abort_on_error) { 5070b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 5080b57cec5SDimitry Andric __kmp_msg_null); 5090b57cec5SDimitry Andric } 5100b57cec5SDimitry Andric return error; 5110b57cec5SDimitry Andric } 5120b57cec5SDimitry Andric } 5130b57cec5SDimitry Andric return 0; 5140b57cec5SDimitry Andric } 5150b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 5160b57cec5SDimitry Andric if (__kmp_num_proc_groups > 1) { 5170b57cec5SDimitry Andric this->zero(); 5180b57cec5SDimitry Andric GROUP_AFFINITY ga; 5190b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); 5200b57cec5SDimitry Andric if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { 5210b57cec5SDimitry Andric DWORD error = GetLastError(); 5220b57cec5SDimitry Andric if (abort_on_error) { 5230b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), 5240b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 5250b57cec5SDimitry Andric } 5260b57cec5SDimitry Andric return error; 5270b57cec5SDimitry Andric } 5280b57cec5SDimitry Andric if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || 5290b57cec5SDimitry Andric (ga.Mask == 0)) { 5300b57cec5SDimitry Andric return -1; 5310b57cec5SDimitry Andric } 5320b57cec5SDimitry Andric mask[ga.Group] = ga.Mask; 5330b57cec5SDimitry Andric } else { 5340b57cec5SDimitry Andric mask_t newMask, sysMask, retval; 5350b57cec5SDimitry Andric if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { 5360b57cec5SDimitry Andric DWORD error = GetLastError(); 5370b57cec5SDimitry Andric if (abort_on_error) { 5380b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"), 5390b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 5400b57cec5SDimitry Andric } 5410b57cec5SDimitry Andric return error; 5420b57cec5SDimitry Andric } 5430b57cec5SDimitry Andric retval = SetThreadAffinityMask(GetCurrentThread(), newMask); 5440b57cec5SDimitry Andric if (!retval) { 5450b57cec5SDimitry Andric DWORD error = GetLastError(); 5460b57cec5SDimitry Andric if (abort_on_error) { 5470b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 5480b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 5490b57cec5SDimitry Andric } 5500b57cec5SDimitry Andric return error; 5510b57cec5SDimitry Andric } 5520b57cec5SDimitry Andric newMask = SetThreadAffinityMask(GetCurrentThread(), retval); 5530b57cec5SDimitry Andric if (!newMask) { 5540b57cec5SDimitry Andric DWORD error = GetLastError(); 5550b57cec5SDimitry Andric if (abort_on_error) { 5560b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 5570b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 5580b57cec5SDimitry Andric } 5590b57cec5SDimitry Andric } 5600b57cec5SDimitry Andric *mask = retval; 5610b57cec5SDimitry Andric } 5620b57cec5SDimitry Andric return 0; 5630b57cec5SDimitry Andric } 5640b57cec5SDimitry Andric int get_proc_group() const override { 5650b57cec5SDimitry Andric int group = -1; 5660b57cec5SDimitry Andric if (__kmp_num_proc_groups == 1) { 5670b57cec5SDimitry Andric return 1; 5680b57cec5SDimitry Andric } 5690b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; i++) { 5700b57cec5SDimitry Andric if (mask[i] == 0) 5710b57cec5SDimitry Andric continue; 5720b57cec5SDimitry Andric if (group >= 0) 5730b57cec5SDimitry Andric return -1; 5740b57cec5SDimitry Andric group = i; 5750b57cec5SDimitry Andric } 5760b57cec5SDimitry Andric return group; 5770b57cec5SDimitry Andric } 5780b57cec5SDimitry Andric }; 5790b57cec5SDimitry Andric void determine_capable(const char *env_var) override { 5800b57cec5SDimitry Andric __kmp_affinity_determine_capable(env_var); 5810b57cec5SDimitry Andric } 5820b57cec5SDimitry Andric void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 5830b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 5840b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 5850b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 5860b57cec5SDimitry Andric return new Mask[num]; 5870b57cec5SDimitry Andric } 5880b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 5890b57cec5SDimitry Andric Mask *windows_array = static_cast<Mask *>(array); 5900b57cec5SDimitry Andric delete[] windows_array; 5910b57cec5SDimitry Andric } 5920b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 5930b57cec5SDimitry Andric int index) override { 5940b57cec5SDimitry Andric Mask *windows_array = static_cast<Mask *>(array); 5950b57cec5SDimitry Andric return &(windows_array[index]); 5960b57cec5SDimitry Andric } 5970b57cec5SDimitry Andric api_type get_api_type() const override { return NATIVE_OS; } 5980b57cec5SDimitry Andric }; 5990b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 6000b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 6010b57cec5SDimitry Andric 602*0eae32dcSDimitry Andric // Describe an attribute for a level in the machine topology 603*0eae32dcSDimitry Andric struct kmp_hw_attr_t { 604*0eae32dcSDimitry Andric int core_type : 8; 605*0eae32dcSDimitry Andric int core_eff : 8; 606*0eae32dcSDimitry Andric unsigned valid : 1; 607*0eae32dcSDimitry Andric unsigned reserved : 15; 608*0eae32dcSDimitry Andric 609*0eae32dcSDimitry Andric static const int UNKNOWN_CORE_EFF = -1; 610*0eae32dcSDimitry Andric 611*0eae32dcSDimitry Andric kmp_hw_attr_t() 612*0eae32dcSDimitry Andric : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF), 613*0eae32dcSDimitry Andric valid(0), reserved(0) {} 614*0eae32dcSDimitry Andric void set_core_type(kmp_hw_core_type_t type) { 615*0eae32dcSDimitry Andric valid = 1; 616*0eae32dcSDimitry Andric core_type = type; 617*0eae32dcSDimitry Andric } 618*0eae32dcSDimitry Andric void set_core_eff(int eff) { 619*0eae32dcSDimitry Andric valid = 1; 620*0eae32dcSDimitry Andric core_eff = eff; 621*0eae32dcSDimitry Andric } 622*0eae32dcSDimitry Andric kmp_hw_core_type_t get_core_type() const { 623*0eae32dcSDimitry Andric return (kmp_hw_core_type_t)core_type; 624*0eae32dcSDimitry Andric } 625*0eae32dcSDimitry Andric int get_core_eff() const { return core_eff; } 626*0eae32dcSDimitry Andric bool is_core_type_valid() const { 627*0eae32dcSDimitry Andric return core_type != KMP_HW_CORE_TYPE_UNKNOWN; 628*0eae32dcSDimitry Andric } 629*0eae32dcSDimitry Andric bool is_core_eff_valid() const { return core_eff != UNKNOWN_CORE_EFF; } 630*0eae32dcSDimitry Andric operator bool() const { return valid; } 631*0eae32dcSDimitry Andric void clear() { 632*0eae32dcSDimitry Andric core_type = KMP_HW_CORE_TYPE_UNKNOWN; 633*0eae32dcSDimitry Andric core_eff = UNKNOWN_CORE_EFF; 634*0eae32dcSDimitry Andric valid = 0; 635*0eae32dcSDimitry Andric } 636*0eae32dcSDimitry Andric bool contains(const kmp_hw_attr_t &other) const { 637*0eae32dcSDimitry Andric if (!valid && !other.valid) 638*0eae32dcSDimitry Andric return true; 639*0eae32dcSDimitry Andric if (valid && other.valid) { 640*0eae32dcSDimitry Andric if (other.is_core_type_valid()) { 641*0eae32dcSDimitry Andric if (!is_core_type_valid() || (get_core_type() != other.get_core_type())) 642*0eae32dcSDimitry Andric return false; 643*0eae32dcSDimitry Andric } 644*0eae32dcSDimitry Andric if (other.is_core_eff_valid()) { 645*0eae32dcSDimitry Andric if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff())) 646*0eae32dcSDimitry Andric return false; 647*0eae32dcSDimitry Andric } 648*0eae32dcSDimitry Andric return true; 649*0eae32dcSDimitry Andric } 650*0eae32dcSDimitry Andric return false; 651*0eae32dcSDimitry Andric } 652*0eae32dcSDimitry Andric bool operator==(const kmp_hw_attr_t &rhs) const { 653*0eae32dcSDimitry Andric return (rhs.valid == valid && rhs.core_eff == core_eff && 654*0eae32dcSDimitry Andric rhs.core_type == core_type); 655*0eae32dcSDimitry Andric } 656*0eae32dcSDimitry Andric bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); } 657*0eae32dcSDimitry Andric }; 658349cc55cSDimitry Andric 659fe6060f1SDimitry Andric class kmp_hw_thread_t { 6600b57cec5SDimitry Andric public: 661fe6060f1SDimitry Andric static const int UNKNOWN_ID = -1; 662fe6060f1SDimitry Andric static int compare_ids(const void *a, const void *b); 663fe6060f1SDimitry Andric static int compare_compact(const void *a, const void *b); 664fe6060f1SDimitry Andric int ids[KMP_HW_LAST]; 665fe6060f1SDimitry Andric int sub_ids[KMP_HW_LAST]; 666fe6060f1SDimitry Andric bool leader; 667fe6060f1SDimitry Andric int os_id; 668*0eae32dcSDimitry Andric kmp_hw_attr_t attrs; 669349cc55cSDimitry Andric 670fe6060f1SDimitry Andric void print() const; 671fe6060f1SDimitry Andric void clear() { 672fe6060f1SDimitry Andric for (int i = 0; i < (int)KMP_HW_LAST; ++i) 673fe6060f1SDimitry Andric ids[i] = UNKNOWN_ID; 674fe6060f1SDimitry Andric leader = false; 675*0eae32dcSDimitry Andric attrs.clear(); 6760b57cec5SDimitry Andric } 6770b57cec5SDimitry Andric }; 6780b57cec5SDimitry Andric 679fe6060f1SDimitry Andric class kmp_topology_t { 680fe6060f1SDimitry Andric 681fe6060f1SDimitry Andric struct flags_t { 682fe6060f1SDimitry Andric int uniform : 1; 683fe6060f1SDimitry Andric int reserved : 31; 6840b57cec5SDimitry Andric }; 6850b57cec5SDimitry Andric 686fe6060f1SDimitry Andric int depth; 687fe6060f1SDimitry Andric 688349cc55cSDimitry Andric // The following arrays are all 'depth' long and have been 689349cc55cSDimitry Andric // allocated to hold up to KMP_HW_LAST number of objects if 690349cc55cSDimitry Andric // needed so layers can be added without reallocation of any array 691fe6060f1SDimitry Andric 692fe6060f1SDimitry Andric // Orderd array of the types in the topology 693fe6060f1SDimitry Andric kmp_hw_t *types; 694fe6060f1SDimitry Andric 695fe6060f1SDimitry Andric // Keep quick topology ratios, for non-uniform topologies, 696fe6060f1SDimitry Andric // this ratio holds the max number of itemAs per itemB 697fe6060f1SDimitry Andric // e.g., [ 4 packages | 6 cores / package | 2 threads / core ] 698fe6060f1SDimitry Andric int *ratio; 699fe6060f1SDimitry Andric 700fe6060f1SDimitry Andric // Storage containing the absolute number of each topology layer 701fe6060f1SDimitry Andric int *count; 702fe6060f1SDimitry Andric 703*0eae32dcSDimitry Andric // The number of core efficiencies. This is only useful for hybrid 704*0eae32dcSDimitry Andric // topologies. Core efficiencies will range from 0 to num efficiencies - 1 705*0eae32dcSDimitry Andric int num_core_efficiencies; 706*0eae32dcSDimitry Andric int num_core_types; 707349cc55cSDimitry Andric kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES]; 708349cc55cSDimitry Andric 709fe6060f1SDimitry Andric // The hardware threads array 710fe6060f1SDimitry Andric // hw_threads is num_hw_threads long 711fe6060f1SDimitry Andric // Each hw_thread's ids and sub_ids are depth deep 712fe6060f1SDimitry Andric int num_hw_threads; 713fe6060f1SDimitry Andric kmp_hw_thread_t *hw_threads; 714fe6060f1SDimitry Andric 715fe6060f1SDimitry Andric // Equivalence hash where the key is the hardware topology item 716fe6060f1SDimitry Andric // and the value is the equivalent hardware topology type in the 717fe6060f1SDimitry Andric // types[] array, if the value is KMP_HW_UNKNOWN, then there is no 718fe6060f1SDimitry Andric // known equivalence for the topology type 719fe6060f1SDimitry Andric kmp_hw_t equivalent[KMP_HW_LAST]; 720fe6060f1SDimitry Andric 721fe6060f1SDimitry Andric // Flags describing the topology 722fe6060f1SDimitry Andric flags_t flags; 723fe6060f1SDimitry Andric 724349cc55cSDimitry Andric // Insert a new topology layer after allocation 725349cc55cSDimitry Andric void _insert_layer(kmp_hw_t type, const int *ids); 726349cc55cSDimitry Andric 727349cc55cSDimitry Andric #if KMP_GROUP_AFFINITY 728349cc55cSDimitry Andric // Insert topology information about Windows Processor groups 729349cc55cSDimitry Andric void _insert_windows_proc_groups(); 730349cc55cSDimitry Andric #endif 731349cc55cSDimitry Andric 732fe6060f1SDimitry Andric // Count each item & get the num x's per y 733fe6060f1SDimitry Andric // e.g., get the number of cores and the number of threads per core 734fe6060f1SDimitry Andric // for each (x, y) in (KMP_HW_* , KMP_HW_*) 735fe6060f1SDimitry Andric void _gather_enumeration_information(); 736fe6060f1SDimitry Andric 737fe6060f1SDimitry Andric // Remove layers that don't add information to the topology. 738fe6060f1SDimitry Andric // This is done by having the layer take on the id = UNKNOWN_ID (-1) 739fe6060f1SDimitry Andric void _remove_radix1_layers(); 740fe6060f1SDimitry Andric 741fe6060f1SDimitry Andric // Find out if the topology is uniform 742fe6060f1SDimitry Andric void _discover_uniformity(); 743fe6060f1SDimitry Andric 744fe6060f1SDimitry Andric // Set all the sub_ids for each hardware thread 745fe6060f1SDimitry Andric void _set_sub_ids(); 746fe6060f1SDimitry Andric 747fe6060f1SDimitry Andric // Set global affinity variables describing the number of threads per 748fe6060f1SDimitry Andric // core, the number of packages, the number of cores per package, and 749fe6060f1SDimitry Andric // the number of cores. 750fe6060f1SDimitry Andric void _set_globals(); 751fe6060f1SDimitry Andric 752fe6060f1SDimitry Andric // Set the last level cache equivalent type 753fe6060f1SDimitry Andric void _set_last_level_cache(); 754fe6060f1SDimitry Andric 755*0eae32dcSDimitry Andric // Return the number of cores with a particular attribute, 'attr'. 756*0eae32dcSDimitry Andric // If 'find_all' is true, then find all cores on the machine, otherwise find 757*0eae32dcSDimitry Andric // all cores per the layer 'above' 758*0eae32dcSDimitry Andric int _get_ncores_with_attr(const kmp_hw_attr_t &attr, int above, 759*0eae32dcSDimitry Andric bool find_all = false) const; 760349cc55cSDimitry Andric 761fe6060f1SDimitry Andric public: 762fe6060f1SDimitry Andric // Force use of allocate()/deallocate() 763fe6060f1SDimitry Andric kmp_topology_t() = delete; 764fe6060f1SDimitry Andric kmp_topology_t(const kmp_topology_t &t) = delete; 765fe6060f1SDimitry Andric kmp_topology_t(kmp_topology_t &&t) = delete; 766fe6060f1SDimitry Andric kmp_topology_t &operator=(const kmp_topology_t &t) = delete; 767fe6060f1SDimitry Andric kmp_topology_t &operator=(kmp_topology_t &&t) = delete; 768fe6060f1SDimitry Andric 769fe6060f1SDimitry Andric static kmp_topology_t *allocate(int nproc, int ndepth, const kmp_hw_t *types); 770fe6060f1SDimitry Andric static void deallocate(kmp_topology_t *); 771fe6060f1SDimitry Andric 772fe6060f1SDimitry Andric // Functions used in create_map() routines 773fe6060f1SDimitry Andric kmp_hw_thread_t &at(int index) { 774fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads); 775fe6060f1SDimitry Andric return hw_threads[index]; 776fe6060f1SDimitry Andric } 777fe6060f1SDimitry Andric const kmp_hw_thread_t &at(int index) const { 778fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads); 779fe6060f1SDimitry Andric return hw_threads[index]; 780fe6060f1SDimitry Andric } 781fe6060f1SDimitry Andric int get_num_hw_threads() const { return num_hw_threads; } 782fe6060f1SDimitry Andric void sort_ids() { 783fe6060f1SDimitry Andric qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t), 784fe6060f1SDimitry Andric kmp_hw_thread_t::compare_ids); 785fe6060f1SDimitry Andric } 786fe6060f1SDimitry Andric // Check if the hardware ids are unique, if they are 787fe6060f1SDimitry Andric // return true, otherwise return false 788fe6060f1SDimitry Andric bool check_ids() const; 789fe6060f1SDimitry Andric 790fe6060f1SDimitry Andric // Function to call after the create_map() routine 791fe6060f1SDimitry Andric void canonicalize(); 792fe6060f1SDimitry Andric void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores); 793fe6060f1SDimitry Andric 794fe6060f1SDimitry Andric // Functions used after canonicalize() called 795fe6060f1SDimitry Andric bool filter_hw_subset(); 796fe6060f1SDimitry Andric bool is_close(int hwt1, int hwt2, int level) const; 797fe6060f1SDimitry Andric bool is_uniform() const { return flags.uniform; } 798fe6060f1SDimitry Andric // Tell whether a type is a valid type in the topology 799fe6060f1SDimitry Andric // returns KMP_HW_UNKNOWN when there is no equivalent type 800fe6060f1SDimitry Andric kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; } 801fe6060f1SDimitry Andric // Set type1 = type2 802fe6060f1SDimitry Andric void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) { 803fe6060f1SDimitry Andric KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1); 804fe6060f1SDimitry Andric KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2); 805fe6060f1SDimitry Andric kmp_hw_t real_type2 = equivalent[type2]; 806fe6060f1SDimitry Andric if (real_type2 == KMP_HW_UNKNOWN) 807fe6060f1SDimitry Andric real_type2 = type2; 808fe6060f1SDimitry Andric equivalent[type1] = real_type2; 809fe6060f1SDimitry Andric // This loop is required since any of the types may have been set to 810fe6060f1SDimitry Andric // be equivalent to type1. They all must be checked and reset to type2. 811fe6060f1SDimitry Andric KMP_FOREACH_HW_TYPE(type) { 812fe6060f1SDimitry Andric if (equivalent[type] == type1) { 813fe6060f1SDimitry Andric equivalent[type] = real_type2; 814fe6060f1SDimitry Andric } 815fe6060f1SDimitry Andric } 816fe6060f1SDimitry Andric } 817fe6060f1SDimitry Andric // Calculate number of types corresponding to level1 818fe6060f1SDimitry Andric // per types corresponding to level2 (e.g., number of threads per core) 819fe6060f1SDimitry Andric int calculate_ratio(int level1, int level2) const { 820fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth); 821fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth); 822fe6060f1SDimitry Andric int r = 1; 823fe6060f1SDimitry Andric for (int level = level1; level > level2; --level) 824fe6060f1SDimitry Andric r *= ratio[level]; 825fe6060f1SDimitry Andric return r; 826fe6060f1SDimitry Andric } 827fe6060f1SDimitry Andric int get_ratio(int level) const { 828fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level >= 0 && level < depth); 829fe6060f1SDimitry Andric return ratio[level]; 830fe6060f1SDimitry Andric } 831fe6060f1SDimitry Andric int get_depth() const { return depth; }; 832fe6060f1SDimitry Andric kmp_hw_t get_type(int level) const { 833fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level >= 0 && level < depth); 834fe6060f1SDimitry Andric return types[level]; 835fe6060f1SDimitry Andric } 836fe6060f1SDimitry Andric int get_level(kmp_hw_t type) const { 837fe6060f1SDimitry Andric KMP_DEBUG_ASSERT_VALID_HW_TYPE(type); 838fe6060f1SDimitry Andric int eq_type = equivalent[type]; 839fe6060f1SDimitry Andric if (eq_type == KMP_HW_UNKNOWN) 8400b57cec5SDimitry Andric return -1; 841fe6060f1SDimitry Andric for (int i = 0; i < depth; ++i) 842fe6060f1SDimitry Andric if (types[i] == eq_type) 843fe6060f1SDimitry Andric return i; 844fe6060f1SDimitry Andric return -1; 8450b57cec5SDimitry Andric } 846fe6060f1SDimitry Andric int get_count(int level) const { 847fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level >= 0 && level < depth); 848fe6060f1SDimitry Andric return count[level]; 8490b57cec5SDimitry Andric } 850*0eae32dcSDimitry Andric // Return the total number of cores with attribute 'attr' 851*0eae32dcSDimitry Andric int get_ncores_with_attr(const kmp_hw_attr_t &attr) const { 852*0eae32dcSDimitry Andric return _get_ncores_with_attr(attr, -1, true); 853*0eae32dcSDimitry Andric } 854*0eae32dcSDimitry Andric // Return the number of cores with attribute 855*0eae32dcSDimitry Andric // 'attr' per topology level 'above' 856*0eae32dcSDimitry Andric int get_ncores_with_attr_per(const kmp_hw_attr_t &attr, int above) const { 857*0eae32dcSDimitry Andric return _get_ncores_with_attr(attr, above, false); 858*0eae32dcSDimitry Andric } 859*0eae32dcSDimitry Andric 860fe6060f1SDimitry Andric #if KMP_AFFINITY_SUPPORTED 861fe6060f1SDimitry Andric void sort_compact() { 862fe6060f1SDimitry Andric qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t), 863fe6060f1SDimitry Andric kmp_hw_thread_t::compare_compact); 864fe6060f1SDimitry Andric } 865fe6060f1SDimitry Andric #endif 866fe6060f1SDimitry Andric void print(const char *env_var = "KMP_AFFINITY") const; 867fe6060f1SDimitry Andric void dump() const; 868fe6060f1SDimitry Andric }; 869349cc55cSDimitry Andric extern kmp_topology_t *__kmp_topology; 870fe6060f1SDimitry Andric 871fe6060f1SDimitry Andric class kmp_hw_subset_t { 872*0eae32dcSDimitry Andric const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS; 873*0eae32dcSDimitry Andric 874fe6060f1SDimitry Andric public: 875*0eae32dcSDimitry Andric // Describe a machine topology item in KMP_HW_SUBSET 876fe6060f1SDimitry Andric struct item_t { 877fe6060f1SDimitry Andric kmp_hw_t type; 878*0eae32dcSDimitry Andric int num_attrs; 879*0eae32dcSDimitry Andric int num[MAX_ATTRS]; 880*0eae32dcSDimitry Andric int offset[MAX_ATTRS]; 881*0eae32dcSDimitry Andric kmp_hw_attr_t attr[MAX_ATTRS]; 882fe6060f1SDimitry Andric }; 883*0eae32dcSDimitry Andric // Put parenthesis around max to avoid accidental use of Windows max macro. 884*0eae32dcSDimitry Andric const static int USE_ALL = (std::numeric_limits<int>::max)(); 885fe6060f1SDimitry Andric 886fe6060f1SDimitry Andric private: 887fe6060f1SDimitry Andric int depth; 888fe6060f1SDimitry Andric int capacity; 889fe6060f1SDimitry Andric item_t *items; 890fe6060f1SDimitry Andric kmp_uint64 set; 891fe6060f1SDimitry Andric bool absolute; 892fe6060f1SDimitry Andric // The set must be able to handle up to KMP_HW_LAST number of layers 893fe6060f1SDimitry Andric KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST); 894349cc55cSDimitry Andric // Sorting the KMP_HW_SUBSET items to follow topology order 895349cc55cSDimitry Andric // All unknown topology types will be at the beginning of the subset 896349cc55cSDimitry Andric static int hw_subset_compare(const void *i1, const void *i2) { 897349cc55cSDimitry Andric kmp_hw_t type1 = ((const item_t *)i1)->type; 898349cc55cSDimitry Andric kmp_hw_t type2 = ((const item_t *)i2)->type; 899349cc55cSDimitry Andric int level1 = __kmp_topology->get_level(type1); 900349cc55cSDimitry Andric int level2 = __kmp_topology->get_level(type2); 901349cc55cSDimitry Andric return level1 - level2; 902349cc55cSDimitry Andric } 903fe6060f1SDimitry Andric 904fe6060f1SDimitry Andric public: 905fe6060f1SDimitry Andric // Force use of allocate()/deallocate() 906fe6060f1SDimitry Andric kmp_hw_subset_t() = delete; 907fe6060f1SDimitry Andric kmp_hw_subset_t(const kmp_hw_subset_t &t) = delete; 908fe6060f1SDimitry Andric kmp_hw_subset_t(kmp_hw_subset_t &&t) = delete; 909fe6060f1SDimitry Andric kmp_hw_subset_t &operator=(const kmp_hw_subset_t &t) = delete; 910fe6060f1SDimitry Andric kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) = delete; 911fe6060f1SDimitry Andric 912fe6060f1SDimitry Andric static kmp_hw_subset_t *allocate() { 913fe6060f1SDimitry Andric int initial_capacity = 5; 914fe6060f1SDimitry Andric kmp_hw_subset_t *retval = 915fe6060f1SDimitry Andric (kmp_hw_subset_t *)__kmp_allocate(sizeof(kmp_hw_subset_t)); 916fe6060f1SDimitry Andric retval->depth = 0; 917fe6060f1SDimitry Andric retval->capacity = initial_capacity; 918fe6060f1SDimitry Andric retval->set = 0ull; 919fe6060f1SDimitry Andric retval->absolute = false; 920fe6060f1SDimitry Andric retval->items = (item_t *)__kmp_allocate(sizeof(item_t) * initial_capacity); 921fe6060f1SDimitry Andric return retval; 922fe6060f1SDimitry Andric } 923fe6060f1SDimitry Andric static void deallocate(kmp_hw_subset_t *subset) { 924fe6060f1SDimitry Andric __kmp_free(subset->items); 925fe6060f1SDimitry Andric __kmp_free(subset); 926fe6060f1SDimitry Andric } 927fe6060f1SDimitry Andric void set_absolute() { absolute = true; } 928fe6060f1SDimitry Andric bool is_absolute() const { return absolute; } 929*0eae32dcSDimitry Andric void push_back(int num, kmp_hw_t type, int offset, kmp_hw_attr_t attr) { 930*0eae32dcSDimitry Andric for (int i = 0; i < depth; ++i) { 931*0eae32dcSDimitry Andric // Found an existing item for this layer type 932*0eae32dcSDimitry Andric // Add the num, offset, and attr to this item 933*0eae32dcSDimitry Andric if (items[i].type == type) { 934*0eae32dcSDimitry Andric int idx = items[i].num_attrs++; 935*0eae32dcSDimitry Andric if ((size_t)idx >= MAX_ATTRS) 936*0eae32dcSDimitry Andric return; 937*0eae32dcSDimitry Andric items[i].num[idx] = num; 938*0eae32dcSDimitry Andric items[i].offset[idx] = offset; 939*0eae32dcSDimitry Andric items[i].attr[idx] = attr; 940*0eae32dcSDimitry Andric return; 941*0eae32dcSDimitry Andric } 942*0eae32dcSDimitry Andric } 943fe6060f1SDimitry Andric if (depth == capacity - 1) { 944fe6060f1SDimitry Andric capacity *= 2; 945fe6060f1SDimitry Andric item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity); 946fe6060f1SDimitry Andric for (int i = 0; i < depth; ++i) 947fe6060f1SDimitry Andric new_items[i] = items[i]; 948fe6060f1SDimitry Andric __kmp_free(items); 949fe6060f1SDimitry Andric items = new_items; 950fe6060f1SDimitry Andric } 951*0eae32dcSDimitry Andric items[depth].num_attrs = 1; 952fe6060f1SDimitry Andric items[depth].type = type; 953*0eae32dcSDimitry Andric items[depth].num[0] = num; 954*0eae32dcSDimitry Andric items[depth].offset[0] = offset; 955*0eae32dcSDimitry Andric items[depth].attr[0] = attr; 956fe6060f1SDimitry Andric depth++; 957fe6060f1SDimitry Andric set |= (1ull << type); 958fe6060f1SDimitry Andric } 959fe6060f1SDimitry Andric int get_depth() const { return depth; } 960fe6060f1SDimitry Andric const item_t &at(int index) const { 961fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(index >= 0 && index < depth); 962fe6060f1SDimitry Andric return items[index]; 963fe6060f1SDimitry Andric } 964fe6060f1SDimitry Andric item_t &at(int index) { 965fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(index >= 0 && index < depth); 966fe6060f1SDimitry Andric return items[index]; 967fe6060f1SDimitry Andric } 968fe6060f1SDimitry Andric void remove(int index) { 969fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(index >= 0 && index < depth); 970fe6060f1SDimitry Andric set &= ~(1ull << items[index].type); 971fe6060f1SDimitry Andric for (int j = index + 1; j < depth; ++j) { 972fe6060f1SDimitry Andric items[j - 1] = items[j]; 973fe6060f1SDimitry Andric } 974fe6060f1SDimitry Andric depth--; 975fe6060f1SDimitry Andric } 976349cc55cSDimitry Andric void sort() { 977349cc55cSDimitry Andric KMP_DEBUG_ASSERT(__kmp_topology); 978349cc55cSDimitry Andric qsort(items, depth, sizeof(item_t), hw_subset_compare); 979349cc55cSDimitry Andric } 980fe6060f1SDimitry Andric bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); } 981fe6060f1SDimitry Andric void dump() const { 982fe6060f1SDimitry Andric printf("**********************\n"); 983fe6060f1SDimitry Andric printf("*** kmp_hw_subset: ***\n"); 984fe6060f1SDimitry Andric printf("* depth: %d\n", depth); 985fe6060f1SDimitry Andric printf("* items:\n"); 986fe6060f1SDimitry Andric for (int i = 0; i < depth; ++i) { 987*0eae32dcSDimitry Andric printf(" type: %s\n", __kmp_hw_get_keyword(items[i].type)); 988*0eae32dcSDimitry Andric for (int j = 0; j < items[i].num_attrs; ++j) { 989*0eae32dcSDimitry Andric printf(" num: %d, offset: %d, attr: ", items[i].num[j], 990*0eae32dcSDimitry Andric items[i].offset[j]); 991*0eae32dcSDimitry Andric if (!items[i].attr[j]) { 992*0eae32dcSDimitry Andric printf(" (none)\n"); 993*0eae32dcSDimitry Andric } else { 994*0eae32dcSDimitry Andric printf( 995*0eae32dcSDimitry Andric " core_type = %s, core_eff = %d\n", 996*0eae32dcSDimitry Andric __kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()), 997*0eae32dcSDimitry Andric items[i].attr[j].get_core_eff()); 998*0eae32dcSDimitry Andric } 999*0eae32dcSDimitry Andric } 1000fe6060f1SDimitry Andric } 1001fe6060f1SDimitry Andric printf("* set: 0x%llx\n", set); 1002fe6060f1SDimitry Andric printf("* absolute: %d\n", absolute); 1003fe6060f1SDimitry Andric printf("**********************\n"); 1004fe6060f1SDimitry Andric } 1005fe6060f1SDimitry Andric }; 1006fe6060f1SDimitry Andric extern kmp_hw_subset_t *__kmp_hw_subset; 10070b57cec5SDimitry Andric 10080b57cec5SDimitry Andric /* A structure for holding machine-specific hierarchy info to be computed once 10090b57cec5SDimitry Andric at init. This structure represents a mapping of threads to the actual machine 10100b57cec5SDimitry Andric hierarchy, or to our best guess at what the hierarchy might be, for the 10110b57cec5SDimitry Andric purpose of performing an efficient barrier. In the worst case, when there is 10120b57cec5SDimitry Andric no machine hierarchy information, it produces a tree suitable for a barrier, 10130b57cec5SDimitry Andric similar to the tree used in the hyper barrier. */ 10140b57cec5SDimitry Andric class hierarchy_info { 10150b57cec5SDimitry Andric public: 10160b57cec5SDimitry Andric /* Good default values for number of leaves and branching factor, given no 10170b57cec5SDimitry Andric affinity information. Behaves a bit like hyper barrier. */ 10180b57cec5SDimitry Andric static const kmp_uint32 maxLeaves = 4; 10190b57cec5SDimitry Andric static const kmp_uint32 minBranch = 4; 10200b57cec5SDimitry Andric /** Number of levels in the hierarchy. Typical levels are threads/core, 10210b57cec5SDimitry Andric cores/package or socket, packages/node, nodes/machine, etc. We don't want 10220b57cec5SDimitry Andric to get specific with nomenclature. When the machine is oversubscribed we 10230b57cec5SDimitry Andric add levels to duplicate the hierarchy, doubling the thread capacity of the 10240b57cec5SDimitry Andric hierarchy each time we add a level. */ 10250b57cec5SDimitry Andric kmp_uint32 maxLevels; 10260b57cec5SDimitry Andric 10270b57cec5SDimitry Andric /** This is specifically the depth of the machine configuration hierarchy, in 10280b57cec5SDimitry Andric terms of the number of levels along the longest path from root to any 10290b57cec5SDimitry Andric leaf. It corresponds to the number of entries in numPerLevel if we exclude 10300b57cec5SDimitry Andric all but one trailing 1. */ 10310b57cec5SDimitry Andric kmp_uint32 depth; 10320b57cec5SDimitry Andric kmp_uint32 base_num_threads; 10330b57cec5SDimitry Andric enum init_status { initialized = 0, not_initialized = 1, initializing = 2 }; 10340b57cec5SDimitry Andric volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 10350b57cec5SDimitry Andric // 2=initialization in progress 10360b57cec5SDimitry Andric volatile kmp_int8 resizing; // 0=not resizing, 1=resizing 10370b57cec5SDimitry Andric 10380b57cec5SDimitry Andric /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children 10390b57cec5SDimitry Andric the parent of a node at level i has. For example, if we have a machine 10400b57cec5SDimitry Andric with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel = 10410b57cec5SDimitry Andric {2, 4, 4, 1, 1}. All empty levels are set to 1. */ 10420b57cec5SDimitry Andric kmp_uint32 *numPerLevel; 10430b57cec5SDimitry Andric kmp_uint32 *skipPerLevel; 10440b57cec5SDimitry Andric 1045fe6060f1SDimitry Andric void deriveLevels() { 1046fe6060f1SDimitry Andric int hier_depth = __kmp_topology->get_depth(); 1047fe6060f1SDimitry Andric for (int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) { 1048fe6060f1SDimitry Andric numPerLevel[level] = __kmp_topology->get_ratio(i); 10490b57cec5SDimitry Andric } 10500b57cec5SDimitry Andric } 10510b57cec5SDimitry Andric 10520b57cec5SDimitry Andric hierarchy_info() 10530b57cec5SDimitry Andric : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} 10540b57cec5SDimitry Andric 10550b57cec5SDimitry Andric void fini() { 10560b57cec5SDimitry Andric if (!uninitialized && numPerLevel) { 10570b57cec5SDimitry Andric __kmp_free(numPerLevel); 10580b57cec5SDimitry Andric numPerLevel = NULL; 10590b57cec5SDimitry Andric uninitialized = not_initialized; 10600b57cec5SDimitry Andric } 10610b57cec5SDimitry Andric } 10620b57cec5SDimitry Andric 1063fe6060f1SDimitry Andric void init(int num_addrs) { 10640b57cec5SDimitry Andric kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8( 10650b57cec5SDimitry Andric &uninitialized, not_initialized, initializing); 10660b57cec5SDimitry Andric if (bool_result == 0) { // Wait for initialization 10670b57cec5SDimitry Andric while (TCR_1(uninitialized) != initialized) 10680b57cec5SDimitry Andric KMP_CPU_PAUSE(); 10690b57cec5SDimitry Andric return; 10700b57cec5SDimitry Andric } 10710b57cec5SDimitry Andric KMP_DEBUG_ASSERT(bool_result == 1); 10720b57cec5SDimitry Andric 10730b57cec5SDimitry Andric /* Added explicit initialization of the data fields here to prevent usage of 10740b57cec5SDimitry Andric dirty value observed when static library is re-initialized multiple times 10750b57cec5SDimitry Andric (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses 10760b57cec5SDimitry Andric OpenMP). */ 10770b57cec5SDimitry Andric depth = 1; 10780b57cec5SDimitry Andric resizing = 0; 10790b57cec5SDimitry Andric maxLevels = 7; 10800b57cec5SDimitry Andric numPerLevel = 10810b57cec5SDimitry Andric (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 10820b57cec5SDimitry Andric skipPerLevel = &(numPerLevel[maxLevels]); 10830b57cec5SDimitry Andric for (kmp_uint32 i = 0; i < maxLevels; 10840b57cec5SDimitry Andric ++i) { // init numPerLevel[*] to 1 item per level 10850b57cec5SDimitry Andric numPerLevel[i] = 1; 10860b57cec5SDimitry Andric skipPerLevel[i] = 1; 10870b57cec5SDimitry Andric } 10880b57cec5SDimitry Andric 10890b57cec5SDimitry Andric // Sort table by physical ID 1090fe6060f1SDimitry Andric if (__kmp_topology && __kmp_topology->get_depth() > 0) { 1091fe6060f1SDimitry Andric deriveLevels(); 10920b57cec5SDimitry Andric } else { 10930b57cec5SDimitry Andric numPerLevel[0] = maxLeaves; 10940b57cec5SDimitry Andric numPerLevel[1] = num_addrs / maxLeaves; 10950b57cec5SDimitry Andric if (num_addrs % maxLeaves) 10960b57cec5SDimitry Andric numPerLevel[1]++; 10970b57cec5SDimitry Andric } 10980b57cec5SDimitry Andric 10990b57cec5SDimitry Andric base_num_threads = num_addrs; 11000b57cec5SDimitry Andric for (int i = maxLevels - 1; i >= 0; 11010b57cec5SDimitry Andric --i) // count non-empty levels to get depth 11020b57cec5SDimitry Andric if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' 11030b57cec5SDimitry Andric depth++; 11040b57cec5SDimitry Andric 11050b57cec5SDimitry Andric kmp_uint32 branch = minBranch; 11060b57cec5SDimitry Andric if (numPerLevel[0] == 1) 11070b57cec5SDimitry Andric branch = num_addrs / maxLeaves; 11080b57cec5SDimitry Andric if (branch < minBranch) 11090b57cec5SDimitry Andric branch = minBranch; 11100b57cec5SDimitry Andric for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width 11110b57cec5SDimitry Andric while (numPerLevel[d] > branch || 11120b57cec5SDimitry Andric (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0! 11130b57cec5SDimitry Andric if (numPerLevel[d] & 1) 11140b57cec5SDimitry Andric numPerLevel[d]++; 11150b57cec5SDimitry Andric numPerLevel[d] = numPerLevel[d] >> 1; 11160b57cec5SDimitry Andric if (numPerLevel[d + 1] == 1) 11170b57cec5SDimitry Andric depth++; 11180b57cec5SDimitry Andric numPerLevel[d + 1] = numPerLevel[d + 1] << 1; 11190b57cec5SDimitry Andric } 11200b57cec5SDimitry Andric if (numPerLevel[0] == 1) { 11210b57cec5SDimitry Andric branch = branch >> 1; 11220b57cec5SDimitry Andric if (branch < 4) 11230b57cec5SDimitry Andric branch = minBranch; 11240b57cec5SDimitry Andric } 11250b57cec5SDimitry Andric } 11260b57cec5SDimitry Andric 11270b57cec5SDimitry Andric for (kmp_uint32 i = 1; i < depth; ++i) 11280b57cec5SDimitry Andric skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1]; 11290b57cec5SDimitry Andric // Fill in hierarchy in the case of oversubscription 11300b57cec5SDimitry Andric for (kmp_uint32 i = depth; i < maxLevels; ++i) 11310b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 11320b57cec5SDimitry Andric 11330b57cec5SDimitry Andric uninitialized = initialized; // One writer 11340b57cec5SDimitry Andric } 11350b57cec5SDimitry Andric 11360b57cec5SDimitry Andric // Resize the hierarchy if nproc changes to something larger than before 11370b57cec5SDimitry Andric void resize(kmp_uint32 nproc) { 11380b57cec5SDimitry Andric kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 11390b57cec5SDimitry Andric while (bool_result == 0) { // someone else is trying to resize 11400b57cec5SDimitry Andric KMP_CPU_PAUSE(); 11410b57cec5SDimitry Andric if (nproc <= base_num_threads) // happy with other thread's resize 11420b57cec5SDimitry Andric return; 11430b57cec5SDimitry Andric else // try to resize 11440b57cec5SDimitry Andric bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 11450b57cec5SDimitry Andric } 11460b57cec5SDimitry Andric KMP_DEBUG_ASSERT(bool_result != 0); 11470b57cec5SDimitry Andric if (nproc <= base_num_threads) 11480b57cec5SDimitry Andric return; // happy with other thread's resize 11490b57cec5SDimitry Andric 11500b57cec5SDimitry Andric // Calculate new maxLevels 11510b57cec5SDimitry Andric kmp_uint32 old_sz = skipPerLevel[depth - 1]; 11520b57cec5SDimitry Andric kmp_uint32 incs = 0, old_maxLevels = maxLevels; 11530b57cec5SDimitry Andric // First see if old maxLevels is enough to contain new size 11540b57cec5SDimitry Andric for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) { 11550b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 11560b57cec5SDimitry Andric numPerLevel[i - 1] *= 2; 11570b57cec5SDimitry Andric old_sz *= 2; 11580b57cec5SDimitry Andric depth++; 11590b57cec5SDimitry Andric } 11600b57cec5SDimitry Andric if (nproc > old_sz) { // Not enough space, need to expand hierarchy 11610b57cec5SDimitry Andric while (nproc > old_sz) { 11620b57cec5SDimitry Andric old_sz *= 2; 11630b57cec5SDimitry Andric incs++; 11640b57cec5SDimitry Andric depth++; 11650b57cec5SDimitry Andric } 11660b57cec5SDimitry Andric maxLevels += incs; 11670b57cec5SDimitry Andric 11680b57cec5SDimitry Andric // Resize arrays 11690b57cec5SDimitry Andric kmp_uint32 *old_numPerLevel = numPerLevel; 11700b57cec5SDimitry Andric kmp_uint32 *old_skipPerLevel = skipPerLevel; 11710b57cec5SDimitry Andric numPerLevel = skipPerLevel = NULL; 11720b57cec5SDimitry Andric numPerLevel = 11730b57cec5SDimitry Andric (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 11740b57cec5SDimitry Andric skipPerLevel = &(numPerLevel[maxLevels]); 11750b57cec5SDimitry Andric 11760b57cec5SDimitry Andric // Copy old elements from old arrays 1177e8d8bef9SDimitry Andric for (kmp_uint32 i = 0; i < old_maxLevels; ++i) { 1178e8d8bef9SDimitry Andric // init numPerLevel[*] to 1 item per level 11790b57cec5SDimitry Andric numPerLevel[i] = old_numPerLevel[i]; 11800b57cec5SDimitry Andric skipPerLevel[i] = old_skipPerLevel[i]; 11810b57cec5SDimitry Andric } 11820b57cec5SDimitry Andric 11830b57cec5SDimitry Andric // Init new elements in arrays to 1 1184e8d8bef9SDimitry Andric for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) { 1185e8d8bef9SDimitry Andric // init numPerLevel[*] to 1 item per level 11860b57cec5SDimitry Andric numPerLevel[i] = 1; 11870b57cec5SDimitry Andric skipPerLevel[i] = 1; 11880b57cec5SDimitry Andric } 11890b57cec5SDimitry Andric 11900b57cec5SDimitry Andric // Free old arrays 11910b57cec5SDimitry Andric __kmp_free(old_numPerLevel); 11920b57cec5SDimitry Andric } 11930b57cec5SDimitry Andric 11940b57cec5SDimitry Andric // Fill in oversubscription levels of hierarchy 11950b57cec5SDimitry Andric for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) 11960b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 11970b57cec5SDimitry Andric 11980b57cec5SDimitry Andric base_num_threads = nproc; 11990b57cec5SDimitry Andric resizing = 0; // One writer 12000b57cec5SDimitry Andric } 12010b57cec5SDimitry Andric }; 12020b57cec5SDimitry Andric #endif // KMP_AFFINITY_H 1203