10b57cec5SDimitry Andric /* 20b57cec5SDimitry Andric * kmp_affinity.h -- header for affinity management 30b57cec5SDimitry Andric */ 40b57cec5SDimitry Andric 50b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #ifndef KMP_AFFINITY_H 140b57cec5SDimitry Andric #define KMP_AFFINITY_H 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric #include "kmp.h" 170b57cec5SDimitry Andric #include "kmp_os.h" 180eae32dcSDimitry Andric #include <limits> 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 210b57cec5SDimitry Andric #if KMP_USE_HWLOC 220b57cec5SDimitry Andric class KMPHwlocAffinity : public KMPAffinity { 230b57cec5SDimitry Andric public: 240b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 250b57cec5SDimitry Andric hwloc_cpuset_t mask; 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric public: 280b57cec5SDimitry Andric Mask() { 290b57cec5SDimitry Andric mask = hwloc_bitmap_alloc(); 300b57cec5SDimitry Andric this->zero(); 310b57cec5SDimitry Andric } 320b57cec5SDimitry Andric ~Mask() { hwloc_bitmap_free(mask); } 330b57cec5SDimitry Andric void set(int i) override { hwloc_bitmap_set(mask, i); } 340b57cec5SDimitry Andric bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } 350b57cec5SDimitry Andric void clear(int i) override { hwloc_bitmap_clr(mask, i); } 360b57cec5SDimitry Andric void zero() override { hwloc_bitmap_zero(mask); } 370b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 380b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 390b57cec5SDimitry Andric hwloc_bitmap_copy(mask, convert->mask); 400b57cec5SDimitry Andric } 410b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 420b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 430b57cec5SDimitry Andric hwloc_bitmap_and(mask, mask, convert->mask); 440b57cec5SDimitry Andric } 450b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 460b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 470b57cec5SDimitry Andric hwloc_bitmap_or(mask, mask, convert->mask); 480b57cec5SDimitry Andric } 490b57cec5SDimitry Andric void bitwise_not() override { hwloc_bitmap_not(mask, mask); } 500b57cec5SDimitry Andric int begin() const override { return hwloc_bitmap_first(mask); } 510b57cec5SDimitry Andric int end() const override { return -1; } 520b57cec5SDimitry Andric int next(int previous) const override { 530b57cec5SDimitry Andric return hwloc_bitmap_next(mask, previous); 540b57cec5SDimitry Andric } 550b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 560b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 570b57cec5SDimitry Andric "Illegal get affinity operation when not capable"); 58e8d8bef9SDimitry Andric long retval = 590b57cec5SDimitry Andric hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 600b57cec5SDimitry Andric if (retval >= 0) { 610b57cec5SDimitry Andric return 0; 620b57cec5SDimitry Andric } 630b57cec5SDimitry Andric int error = errno; 640b57cec5SDimitry Andric if (abort_on_error) { 65*06c3fb27SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "hwloc_get_cpubind()"), 66*06c3fb27SDimitry Andric KMP_ERR(error), __kmp_msg_null); 670b57cec5SDimitry Andric } 680b57cec5SDimitry Andric return error; 690b57cec5SDimitry Andric } 700b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 710b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 72e8d8bef9SDimitry Andric "Illegal set affinity operation when not capable"); 73e8d8bef9SDimitry Andric long retval = 740b57cec5SDimitry Andric hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 750b57cec5SDimitry Andric if (retval >= 0) { 760b57cec5SDimitry Andric return 0; 770b57cec5SDimitry Andric } 780b57cec5SDimitry Andric int error = errno; 790b57cec5SDimitry Andric if (abort_on_error) { 80*06c3fb27SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "hwloc_set_cpubind()"), 81*06c3fb27SDimitry Andric KMP_ERR(error), __kmp_msg_null); 820b57cec5SDimitry Andric } 830b57cec5SDimitry Andric return error; 840b57cec5SDimitry Andric } 85e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS 86e8d8bef9SDimitry Andric int set_process_affinity(bool abort_on_error) const override { 87e8d8bef9SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 88e8d8bef9SDimitry Andric "Illegal set process affinity operation when not capable"); 89e8d8bef9SDimitry Andric int error = 0; 90e8d8bef9SDimitry Andric const hwloc_topology_support *support = 91e8d8bef9SDimitry Andric hwloc_topology_get_support(__kmp_hwloc_topology); 92e8d8bef9SDimitry Andric if (support->cpubind->set_proc_cpubind) { 93e8d8bef9SDimitry Andric int retval; 94e8d8bef9SDimitry Andric retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, 95e8d8bef9SDimitry Andric HWLOC_CPUBIND_PROCESS); 96e8d8bef9SDimitry Andric if (retval >= 0) 97e8d8bef9SDimitry Andric return 0; 98e8d8bef9SDimitry Andric error = errno; 99e8d8bef9SDimitry Andric if (abort_on_error) 100*06c3fb27SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "hwloc_set_cpubind()"), 101*06c3fb27SDimitry Andric KMP_ERR(error), __kmp_msg_null); 102e8d8bef9SDimitry Andric } 103e8d8bef9SDimitry Andric return error; 104e8d8bef9SDimitry Andric } 105e8d8bef9SDimitry Andric #endif 1060b57cec5SDimitry Andric int get_proc_group() const override { 1070b57cec5SDimitry Andric int group = -1; 1080b57cec5SDimitry Andric #if KMP_OS_WINDOWS 1090b57cec5SDimitry Andric if (__kmp_num_proc_groups == 1) { 1100b57cec5SDimitry Andric return 1; 1110b57cec5SDimitry Andric } 1120b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; i++) { 1130b57cec5SDimitry Andric // On windows, the long type is always 32 bits 1140b57cec5SDimitry Andric unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2); 1150b57cec5SDimitry Andric unsigned long second_32_bits = 1160b57cec5SDimitry Andric hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1); 1170b57cec5SDimitry Andric if (first_32_bits == 0 && second_32_bits == 0) { 1180b57cec5SDimitry Andric continue; 1190b57cec5SDimitry Andric } 1200b57cec5SDimitry Andric if (group >= 0) { 1210b57cec5SDimitry Andric return -1; 1220b57cec5SDimitry Andric } 1230b57cec5SDimitry Andric group = i; 1240b57cec5SDimitry Andric } 1250b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 1260b57cec5SDimitry Andric return group; 1270b57cec5SDimitry Andric } 1280b57cec5SDimitry Andric }; 1290b57cec5SDimitry Andric void determine_capable(const char *var) override { 1300b57cec5SDimitry Andric const hwloc_topology_support *topology_support; 1310b57cec5SDimitry Andric if (__kmp_hwloc_topology == NULL) { 1320b57cec5SDimitry Andric if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) { 1330b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 134bdd1243dSDimitry Andric if (__kmp_affinity.flags.verbose) { 1350b57cec5SDimitry Andric KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); 1360b57cec5SDimitry Andric } 137bdd1243dSDimitry Andric } 1380b57cec5SDimitry Andric if (hwloc_topology_load(__kmp_hwloc_topology) < 0) { 1390b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 140bdd1243dSDimitry Andric if (__kmp_affinity.flags.verbose) { 1410b57cec5SDimitry Andric KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); 1420b57cec5SDimitry Andric } 1430b57cec5SDimitry Andric } 144bdd1243dSDimitry Andric } 1450b57cec5SDimitry Andric topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); 1460b57cec5SDimitry Andric // Is the system capable of setting/getting this thread's affinity? 1470b57cec5SDimitry Andric // Also, is topology discovery possible? (pu indicates ability to discover 1480b57cec5SDimitry Andric // processing units). And finally, were there no errors when calling any 1490b57cec5SDimitry Andric // hwloc_* API functions? 1500b57cec5SDimitry Andric if (topology_support && topology_support->cpubind->set_thisthread_cpubind && 1510b57cec5SDimitry Andric topology_support->cpubind->get_thisthread_cpubind && 1520b57cec5SDimitry Andric topology_support->discovery->pu && !__kmp_hwloc_error) { 1530b57cec5SDimitry Andric // enables affinity according to KMP_AFFINITY_CAPABLE() macro 1540b57cec5SDimitry Andric KMP_AFFINITY_ENABLE(TRUE); 1550b57cec5SDimitry Andric } else { 1560b57cec5SDimitry Andric // indicate that hwloc didn't work and disable affinity 1570b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 1580b57cec5SDimitry Andric KMP_AFFINITY_DISABLE(); 1590b57cec5SDimitry Andric } 1600b57cec5SDimitry Andric } 1610b57cec5SDimitry Andric void bind_thread(int which) override { 1620b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 1630b57cec5SDimitry Andric "Illegal set affinity operation when not capable"); 1640b57cec5SDimitry Andric KMPAffinity::Mask *mask; 1650b57cec5SDimitry Andric KMP_CPU_ALLOC_ON_STACK(mask); 1660b57cec5SDimitry Andric KMP_CPU_ZERO(mask); 1670b57cec5SDimitry Andric KMP_CPU_SET(which, mask); 1680b57cec5SDimitry Andric __kmp_set_system_affinity(mask, TRUE); 1690b57cec5SDimitry Andric KMP_CPU_FREE_FROM_STACK(mask); 1700b57cec5SDimitry Andric } 1710b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 1720b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 1730b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 1740b57cec5SDimitry Andric return new Mask[num]; 1750b57cec5SDimitry Andric } 1760b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 1770b57cec5SDimitry Andric Mask *hwloc_array = static_cast<Mask *>(array); 1780b57cec5SDimitry Andric delete[] hwloc_array; 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 1810b57cec5SDimitry Andric int index) override { 1820b57cec5SDimitry Andric Mask *hwloc_array = static_cast<Mask *>(array); 1830b57cec5SDimitry Andric return &(hwloc_array[index]); 1840b57cec5SDimitry Andric } 1850b57cec5SDimitry Andric api_type get_api_type() const override { return HWLOC; } 1860b57cec5SDimitry Andric }; 1870b57cec5SDimitry Andric #endif /* KMP_USE_HWLOC */ 1880b57cec5SDimitry Andric 189489b1cf2SDimitry Andric #if KMP_OS_LINUX || KMP_OS_FREEBSD 1900b57cec5SDimitry Andric #if KMP_OS_LINUX 1910b57cec5SDimitry Andric /* On some of the older OS's that we build on, these constants aren't present 1920b57cec5SDimitry Andric in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on 1930b57cec5SDimitry Andric all systems of the same arch where they are defined, and they cannot change. 1940b57cec5SDimitry Andric stone forever. */ 1950b57cec5SDimitry Andric #include <sys/syscall.h> 1960b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_ARM 1970b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 1980b57cec5SDimitry Andric #define __NR_sched_setaffinity 241 1990b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 241 2000b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2010b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2020b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2030b57cec5SDimitry Andric #define __NR_sched_getaffinity 242 2040b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 242 2050b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2060b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2070b57cec5SDimitry Andric #elif KMP_ARCH_AARCH64 2080b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2090b57cec5SDimitry Andric #define __NR_sched_setaffinity 122 2100b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 122 2110b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2120b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2130b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2140b57cec5SDimitry Andric #define __NR_sched_getaffinity 123 2150b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 123 2160b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2170b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2180b57cec5SDimitry Andric #elif KMP_ARCH_X86_64 2190b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2200b57cec5SDimitry Andric #define __NR_sched_setaffinity 203 2210b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 203 2220b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2230b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2240b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2250b57cec5SDimitry Andric #define __NR_sched_getaffinity 204 2260b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 204 2270b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2280b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2290b57cec5SDimitry Andric #elif KMP_ARCH_PPC64 2300b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2310b57cec5SDimitry Andric #define __NR_sched_setaffinity 222 2320b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 222 2330b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2340b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2350b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2360b57cec5SDimitry Andric #define __NR_sched_getaffinity 223 2370b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 223 2380b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2390b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2400b57cec5SDimitry Andric #elif KMP_ARCH_MIPS 2410b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2420b57cec5SDimitry Andric #define __NR_sched_setaffinity 4239 2430b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 4239 2440b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2450b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2460b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2470b57cec5SDimitry Andric #define __NR_sched_getaffinity 4240 2480b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 4240 2490b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2500b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2510b57cec5SDimitry Andric #elif KMP_ARCH_MIPS64 2520b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2530b57cec5SDimitry Andric #define __NR_sched_setaffinity 5195 2540b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 5195 2550b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2560b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2570b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2580b57cec5SDimitry Andric #define __NR_sched_getaffinity 5196 2590b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 5196 2600b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2610b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 262bdd1243dSDimitry Andric #elif KMP_ARCH_LOONGARCH64 263bdd1243dSDimitry Andric #ifndef __NR_sched_setaffinity 264bdd1243dSDimitry Andric #define __NR_sched_setaffinity 122 265bdd1243dSDimitry Andric #elif __NR_sched_setaffinity != 122 266bdd1243dSDimitry Andric #error Wrong code for setaffinity system call. 267bdd1243dSDimitry Andric #endif /* __NR_sched_setaffinity */ 268bdd1243dSDimitry Andric #ifndef __NR_sched_getaffinity 269bdd1243dSDimitry Andric #define __NR_sched_getaffinity 123 270bdd1243dSDimitry Andric #elif __NR_sched_getaffinity != 123 271bdd1243dSDimitry Andric #error Wrong code for getaffinity system call. 272bdd1243dSDimitry Andric #endif /* __NR_sched_getaffinity */ 273bdd1243dSDimitry Andric #elif KMP_ARCH_RISCV64 274bdd1243dSDimitry Andric #ifndef __NR_sched_setaffinity 275bdd1243dSDimitry Andric #define __NR_sched_setaffinity 122 276bdd1243dSDimitry Andric #elif __NR_sched_setaffinity != 122 277bdd1243dSDimitry Andric #error Wrong code for setaffinity system call. 278bdd1243dSDimitry Andric #endif /* __NR_sched_setaffinity */ 279bdd1243dSDimitry Andric #ifndef __NR_sched_getaffinity 280bdd1243dSDimitry Andric #define __NR_sched_getaffinity 123 281bdd1243dSDimitry Andric #elif __NR_sched_getaffinity != 123 282bdd1243dSDimitry Andric #error Wrong code for getaffinity system call. 283bdd1243dSDimitry Andric #endif /* __NR_sched_getaffinity */ 284bdd1243dSDimitry Andric #else 2850b57cec5SDimitry Andric #error Unknown or unsupported architecture 2860b57cec5SDimitry Andric #endif /* KMP_ARCH_* */ 287489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD 288489b1cf2SDimitry Andric #include <pthread.h> 289489b1cf2SDimitry Andric #include <pthread_np.h> 290489b1cf2SDimitry Andric #endif 2910b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity { 2920b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 293e8d8bef9SDimitry Andric typedef unsigned long mask_t; 294e8d8bef9SDimitry Andric typedef decltype(__kmp_affin_mask_size) mask_size_type; 295e8d8bef9SDimitry Andric static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 296e8d8bef9SDimitry Andric static const mask_t ONE = 1; 297e8d8bef9SDimitry Andric mask_size_type get_num_mask_types() const { 298e8d8bef9SDimitry Andric return __kmp_affin_mask_size / sizeof(mask_t); 299e8d8bef9SDimitry Andric } 3000b57cec5SDimitry Andric 3010b57cec5SDimitry Andric public: 3020b57cec5SDimitry Andric mask_t *mask; 3030b57cec5SDimitry Andric Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); } 3040b57cec5SDimitry Andric ~Mask() { 3050b57cec5SDimitry Andric if (mask) 3060b57cec5SDimitry Andric __kmp_free(mask); 3070b57cec5SDimitry Andric } 3080b57cec5SDimitry Andric void set(int i) override { 309e8d8bef9SDimitry Andric mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T)); 3100b57cec5SDimitry Andric } 3110b57cec5SDimitry Andric bool is_set(int i) const override { 312e8d8bef9SDimitry Andric return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T))); 3130b57cec5SDimitry Andric } 3140b57cec5SDimitry Andric void clear(int i) override { 315e8d8bef9SDimitry Andric mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T)); 3160b57cec5SDimitry Andric } 3170b57cec5SDimitry Andric void zero() override { 318e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 319e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 320e8d8bef9SDimitry Andric mask[i] = (mask_t)0; 3210b57cec5SDimitry Andric } 3220b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 3230b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 324e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 325e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 3260b57cec5SDimitry Andric mask[i] = convert->mask[i]; 3270b57cec5SDimitry Andric } 3280b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 3290b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 330e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 331e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 3320b57cec5SDimitry Andric mask[i] &= convert->mask[i]; 3330b57cec5SDimitry Andric } 3340b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 3350b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 336e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 337e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 3380b57cec5SDimitry Andric mask[i] |= convert->mask[i]; 3390b57cec5SDimitry Andric } 3400b57cec5SDimitry Andric void bitwise_not() override { 341e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 342e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 3430b57cec5SDimitry Andric mask[i] = ~(mask[i]); 3440b57cec5SDimitry Andric } 3450b57cec5SDimitry Andric int begin() const override { 3460b57cec5SDimitry Andric int retval = 0; 3470b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 3480b57cec5SDimitry Andric ++retval; 3490b57cec5SDimitry Andric return retval; 3500b57cec5SDimitry Andric } 351e8d8bef9SDimitry Andric int end() const override { 352e8d8bef9SDimitry Andric int e; 353e8d8bef9SDimitry Andric __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e); 354e8d8bef9SDimitry Andric return e; 355e8d8bef9SDimitry Andric } 3560b57cec5SDimitry Andric int next(int previous) const override { 3570b57cec5SDimitry Andric int retval = previous + 1; 3580b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 3590b57cec5SDimitry Andric ++retval; 3600b57cec5SDimitry Andric return retval; 3610b57cec5SDimitry Andric } 3620b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 3630b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 3640b57cec5SDimitry Andric "Illegal get affinity operation when not capable"); 365489b1cf2SDimitry Andric #if KMP_OS_LINUX 366e8d8bef9SDimitry Andric long retval = 3670b57cec5SDimitry Andric syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask); 368489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD 369fe6060f1SDimitry Andric int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, 370fe6060f1SDimitry Andric reinterpret_cast<cpuset_t *>(mask)); 3715ffd83dbSDimitry Andric int retval = (r == 0 ? 0 : -1); 372489b1cf2SDimitry Andric #endif 3730b57cec5SDimitry Andric if (retval >= 0) { 3740b57cec5SDimitry Andric return 0; 3750b57cec5SDimitry Andric } 3760b57cec5SDimitry Andric int error = errno; 3770b57cec5SDimitry Andric if (abort_on_error) { 378*06c3fb27SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "pthread_getaffinity_np()"), 379*06c3fb27SDimitry Andric KMP_ERR(error), __kmp_msg_null); 3800b57cec5SDimitry Andric } 3810b57cec5SDimitry Andric return error; 3820b57cec5SDimitry Andric } 3830b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 3840b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 385e8d8bef9SDimitry Andric "Illegal set affinity operation when not capable"); 386489b1cf2SDimitry Andric #if KMP_OS_LINUX 387e8d8bef9SDimitry Andric long retval = 3880b57cec5SDimitry Andric syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask); 389489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD 390fe6060f1SDimitry Andric int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, 391fe6060f1SDimitry Andric reinterpret_cast<cpuset_t *>(mask)); 3925ffd83dbSDimitry Andric int retval = (r == 0 ? 0 : -1); 393489b1cf2SDimitry Andric #endif 3940b57cec5SDimitry Andric if (retval >= 0) { 3950b57cec5SDimitry Andric return 0; 3960b57cec5SDimitry Andric } 3970b57cec5SDimitry Andric int error = errno; 3980b57cec5SDimitry Andric if (abort_on_error) { 399*06c3fb27SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "pthread_setaffinity_np()"), 400*06c3fb27SDimitry Andric KMP_ERR(error), __kmp_msg_null); 4010b57cec5SDimitry Andric } 4020b57cec5SDimitry Andric return error; 4030b57cec5SDimitry Andric } 4040b57cec5SDimitry Andric }; 4050b57cec5SDimitry Andric void determine_capable(const char *env_var) override { 4060b57cec5SDimitry Andric __kmp_affinity_determine_capable(env_var); 4070b57cec5SDimitry Andric } 4080b57cec5SDimitry Andric void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 4090b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { 4100b57cec5SDimitry Andric KMPNativeAffinity::Mask *retval = new Mask(); 4110b57cec5SDimitry Andric return retval; 4120b57cec5SDimitry Andric } 4130b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { 4140b57cec5SDimitry Andric KMPNativeAffinity::Mask *native_mask = 4150b57cec5SDimitry Andric static_cast<KMPNativeAffinity::Mask *>(m); 4160b57cec5SDimitry Andric delete native_mask; 4170b57cec5SDimitry Andric } 4180b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 4190b57cec5SDimitry Andric return new Mask[num]; 4200b57cec5SDimitry Andric } 4210b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 4220b57cec5SDimitry Andric Mask *linux_array = static_cast<Mask *>(array); 4230b57cec5SDimitry Andric delete[] linux_array; 4240b57cec5SDimitry Andric } 4250b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 4260b57cec5SDimitry Andric int index) override { 4270b57cec5SDimitry Andric Mask *linux_array = static_cast<Mask *>(array); 4280b57cec5SDimitry Andric return &(linux_array[index]); 4290b57cec5SDimitry Andric } 4300b57cec5SDimitry Andric api_type get_api_type() const override { return NATIVE_OS; } 4310b57cec5SDimitry Andric }; 432489b1cf2SDimitry Andric #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */ 4330b57cec5SDimitry Andric 4340b57cec5SDimitry Andric #if KMP_OS_WINDOWS 4350b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity { 4360b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 4370b57cec5SDimitry Andric typedef ULONG_PTR mask_t; 4380b57cec5SDimitry Andric static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 4390b57cec5SDimitry Andric mask_t *mask; 4400b57cec5SDimitry Andric 4410b57cec5SDimitry Andric public: 4420b57cec5SDimitry Andric Mask() { 4430b57cec5SDimitry Andric mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups); 4440b57cec5SDimitry Andric } 4450b57cec5SDimitry Andric ~Mask() { 4460b57cec5SDimitry Andric if (mask) 4470b57cec5SDimitry Andric __kmp_free(mask); 4480b57cec5SDimitry Andric } 4490b57cec5SDimitry Andric void set(int i) override { 4500b57cec5SDimitry Andric mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); 4510b57cec5SDimitry Andric } 4520b57cec5SDimitry Andric bool is_set(int i) const override { 4530b57cec5SDimitry Andric return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); 4540b57cec5SDimitry Andric } 4550b57cec5SDimitry Andric void clear(int i) override { 4560b57cec5SDimitry Andric mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); 4570b57cec5SDimitry Andric } 4580b57cec5SDimitry Andric void zero() override { 4590b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4600b57cec5SDimitry Andric mask[i] = 0; 4610b57cec5SDimitry Andric } 4620b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 4630b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 4640b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4650b57cec5SDimitry Andric mask[i] = convert->mask[i]; 4660b57cec5SDimitry Andric } 4670b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 4680b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 4690b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4700b57cec5SDimitry Andric mask[i] &= convert->mask[i]; 4710b57cec5SDimitry Andric } 4720b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 4730b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 4740b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4750b57cec5SDimitry Andric mask[i] |= convert->mask[i]; 4760b57cec5SDimitry Andric } 4770b57cec5SDimitry Andric void bitwise_not() override { 4780b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4790b57cec5SDimitry Andric mask[i] = ~(mask[i]); 4800b57cec5SDimitry Andric } 4810b57cec5SDimitry Andric int begin() const override { 4820b57cec5SDimitry Andric int retval = 0; 4830b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 4840b57cec5SDimitry Andric ++retval; 4850b57cec5SDimitry Andric return retval; 4860b57cec5SDimitry Andric } 4870b57cec5SDimitry Andric int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; } 4880b57cec5SDimitry Andric int next(int previous) const override { 4890b57cec5SDimitry Andric int retval = previous + 1; 4900b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 4910b57cec5SDimitry Andric ++retval; 4920b57cec5SDimitry Andric return retval; 4930b57cec5SDimitry Andric } 494e8d8bef9SDimitry Andric int set_process_affinity(bool abort_on_error) const override { 495e8d8bef9SDimitry Andric if (__kmp_num_proc_groups <= 1) { 496e8d8bef9SDimitry Andric if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) { 497e8d8bef9SDimitry Andric DWORD error = GetLastError(); 498e8d8bef9SDimitry Andric if (abort_on_error) { 499e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 500e8d8bef9SDimitry Andric __kmp_msg_null); 501e8d8bef9SDimitry Andric } 502e8d8bef9SDimitry Andric return error; 503e8d8bef9SDimitry Andric } 504e8d8bef9SDimitry Andric } 505e8d8bef9SDimitry Andric return 0; 506e8d8bef9SDimitry Andric } 5070b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 5080b57cec5SDimitry Andric if (__kmp_num_proc_groups > 1) { 5090b57cec5SDimitry Andric // Check for a valid mask. 5100b57cec5SDimitry Andric GROUP_AFFINITY ga; 5110b57cec5SDimitry Andric int group = get_proc_group(); 5120b57cec5SDimitry Andric if (group < 0) { 5130b57cec5SDimitry Andric if (abort_on_error) { 5140b57cec5SDimitry Andric KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); 5150b57cec5SDimitry Andric } 5160b57cec5SDimitry Andric return -1; 5170b57cec5SDimitry Andric } 5180b57cec5SDimitry Andric // Transform the bit vector into a GROUP_AFFINITY struct 5190b57cec5SDimitry Andric // and make the system call to set affinity. 5200b57cec5SDimitry Andric ga.Group = group; 5210b57cec5SDimitry Andric ga.Mask = mask[group]; 5220b57cec5SDimitry Andric ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; 5230b57cec5SDimitry Andric 5240b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); 5250b57cec5SDimitry Andric if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { 5260b57cec5SDimitry Andric DWORD error = GetLastError(); 5270b57cec5SDimitry Andric if (abort_on_error) { 5280b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 5290b57cec5SDimitry Andric __kmp_msg_null); 5300b57cec5SDimitry Andric } 5310b57cec5SDimitry Andric return error; 5320b57cec5SDimitry Andric } 5330b57cec5SDimitry Andric } else { 5340b57cec5SDimitry Andric if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) { 5350b57cec5SDimitry Andric DWORD error = GetLastError(); 5360b57cec5SDimitry Andric if (abort_on_error) { 5370b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 5380b57cec5SDimitry Andric __kmp_msg_null); 5390b57cec5SDimitry Andric } 5400b57cec5SDimitry Andric return error; 5410b57cec5SDimitry Andric } 5420b57cec5SDimitry Andric } 5430b57cec5SDimitry Andric return 0; 5440b57cec5SDimitry Andric } 5450b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 5460b57cec5SDimitry Andric if (__kmp_num_proc_groups > 1) { 5470b57cec5SDimitry Andric this->zero(); 5480b57cec5SDimitry Andric GROUP_AFFINITY ga; 5490b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); 5500b57cec5SDimitry Andric if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { 5510b57cec5SDimitry Andric DWORD error = GetLastError(); 5520b57cec5SDimitry Andric if (abort_on_error) { 5530b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), 5540b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 5550b57cec5SDimitry Andric } 5560b57cec5SDimitry Andric return error; 5570b57cec5SDimitry Andric } 5580b57cec5SDimitry Andric if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || 5590b57cec5SDimitry Andric (ga.Mask == 0)) { 5600b57cec5SDimitry Andric return -1; 5610b57cec5SDimitry Andric } 5620b57cec5SDimitry Andric mask[ga.Group] = ga.Mask; 5630b57cec5SDimitry Andric } else { 5640b57cec5SDimitry Andric mask_t newMask, sysMask, retval; 5650b57cec5SDimitry Andric if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { 5660b57cec5SDimitry Andric DWORD error = GetLastError(); 5670b57cec5SDimitry Andric if (abort_on_error) { 5680b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"), 5690b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 5700b57cec5SDimitry Andric } 5710b57cec5SDimitry Andric return error; 5720b57cec5SDimitry Andric } 5730b57cec5SDimitry Andric retval = SetThreadAffinityMask(GetCurrentThread(), newMask); 5740b57cec5SDimitry Andric if (!retval) { 5750b57cec5SDimitry Andric DWORD error = GetLastError(); 5760b57cec5SDimitry Andric if (abort_on_error) { 5770b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 5780b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 5790b57cec5SDimitry Andric } 5800b57cec5SDimitry Andric return error; 5810b57cec5SDimitry Andric } 5820b57cec5SDimitry Andric newMask = SetThreadAffinityMask(GetCurrentThread(), retval); 5830b57cec5SDimitry Andric if (!newMask) { 5840b57cec5SDimitry Andric DWORD error = GetLastError(); 5850b57cec5SDimitry Andric if (abort_on_error) { 5860b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 5870b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 5880b57cec5SDimitry Andric } 5890b57cec5SDimitry Andric } 5900b57cec5SDimitry Andric *mask = retval; 5910b57cec5SDimitry Andric } 5920b57cec5SDimitry Andric return 0; 5930b57cec5SDimitry Andric } 5940b57cec5SDimitry Andric int get_proc_group() const override { 5950b57cec5SDimitry Andric int group = -1; 5960b57cec5SDimitry Andric if (__kmp_num_proc_groups == 1) { 5970b57cec5SDimitry Andric return 1; 5980b57cec5SDimitry Andric } 5990b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; i++) { 6000b57cec5SDimitry Andric if (mask[i] == 0) 6010b57cec5SDimitry Andric continue; 6020b57cec5SDimitry Andric if (group >= 0) 6030b57cec5SDimitry Andric return -1; 6040b57cec5SDimitry Andric group = i; 6050b57cec5SDimitry Andric } 6060b57cec5SDimitry Andric return group; 6070b57cec5SDimitry Andric } 6080b57cec5SDimitry Andric }; 6090b57cec5SDimitry Andric void determine_capable(const char *env_var) override { 6100b57cec5SDimitry Andric __kmp_affinity_determine_capable(env_var); 6110b57cec5SDimitry Andric } 6120b57cec5SDimitry Andric void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 6130b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 6140b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 6150b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 6160b57cec5SDimitry Andric return new Mask[num]; 6170b57cec5SDimitry Andric } 6180b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 6190b57cec5SDimitry Andric Mask *windows_array = static_cast<Mask *>(array); 6200b57cec5SDimitry Andric delete[] windows_array; 6210b57cec5SDimitry Andric } 6220b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 6230b57cec5SDimitry Andric int index) override { 6240b57cec5SDimitry Andric Mask *windows_array = static_cast<Mask *>(array); 6250b57cec5SDimitry Andric return &(windows_array[index]); 6260b57cec5SDimitry Andric } 6270b57cec5SDimitry Andric api_type get_api_type() const override { return NATIVE_OS; } 6280b57cec5SDimitry Andric }; 6290b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 6300b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 6310b57cec5SDimitry Andric 6320eae32dcSDimitry Andric // Describe an attribute for a level in the machine topology 6330eae32dcSDimitry Andric struct kmp_hw_attr_t { 6340eae32dcSDimitry Andric int core_type : 8; 6350eae32dcSDimitry Andric int core_eff : 8; 6360eae32dcSDimitry Andric unsigned valid : 1; 6370eae32dcSDimitry Andric unsigned reserved : 15; 6380eae32dcSDimitry Andric 6390eae32dcSDimitry Andric static const int UNKNOWN_CORE_EFF = -1; 6400eae32dcSDimitry Andric 6410eae32dcSDimitry Andric kmp_hw_attr_t() 6420eae32dcSDimitry Andric : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF), 6430eae32dcSDimitry Andric valid(0), reserved(0) {} 6440eae32dcSDimitry Andric void set_core_type(kmp_hw_core_type_t type) { 6450eae32dcSDimitry Andric valid = 1; 6460eae32dcSDimitry Andric core_type = type; 6470eae32dcSDimitry Andric } 6480eae32dcSDimitry Andric void set_core_eff(int eff) { 6490eae32dcSDimitry Andric valid = 1; 6500eae32dcSDimitry Andric core_eff = eff; 6510eae32dcSDimitry Andric } 6520eae32dcSDimitry Andric kmp_hw_core_type_t get_core_type() const { 6530eae32dcSDimitry Andric return (kmp_hw_core_type_t)core_type; 6540eae32dcSDimitry Andric } 6550eae32dcSDimitry Andric int get_core_eff() const { return core_eff; } 6560eae32dcSDimitry Andric bool is_core_type_valid() const { 6570eae32dcSDimitry Andric return core_type != KMP_HW_CORE_TYPE_UNKNOWN; 6580eae32dcSDimitry Andric } 6590eae32dcSDimitry Andric bool is_core_eff_valid() const { return core_eff != UNKNOWN_CORE_EFF; } 6600eae32dcSDimitry Andric operator bool() const { return valid; } 6610eae32dcSDimitry Andric void clear() { 6620eae32dcSDimitry Andric core_type = KMP_HW_CORE_TYPE_UNKNOWN; 6630eae32dcSDimitry Andric core_eff = UNKNOWN_CORE_EFF; 6640eae32dcSDimitry Andric valid = 0; 6650eae32dcSDimitry Andric } 6660eae32dcSDimitry Andric bool contains(const kmp_hw_attr_t &other) const { 6670eae32dcSDimitry Andric if (!valid && !other.valid) 6680eae32dcSDimitry Andric return true; 6690eae32dcSDimitry Andric if (valid && other.valid) { 6700eae32dcSDimitry Andric if (other.is_core_type_valid()) { 6710eae32dcSDimitry Andric if (!is_core_type_valid() || (get_core_type() != other.get_core_type())) 6720eae32dcSDimitry Andric return false; 6730eae32dcSDimitry Andric } 6740eae32dcSDimitry Andric if (other.is_core_eff_valid()) { 6750eae32dcSDimitry Andric if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff())) 6760eae32dcSDimitry Andric return false; 6770eae32dcSDimitry Andric } 6780eae32dcSDimitry Andric return true; 6790eae32dcSDimitry Andric } 6800eae32dcSDimitry Andric return false; 6810eae32dcSDimitry Andric } 6820eae32dcSDimitry Andric bool operator==(const kmp_hw_attr_t &rhs) const { 6830eae32dcSDimitry Andric return (rhs.valid == valid && rhs.core_eff == core_eff && 6840eae32dcSDimitry Andric rhs.core_type == core_type); 6850eae32dcSDimitry Andric } 6860eae32dcSDimitry Andric bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); } 6870eae32dcSDimitry Andric }; 688349cc55cSDimitry Andric 689bdd1243dSDimitry Andric #if KMP_AFFINITY_SUPPORTED 690bdd1243dSDimitry Andric KMP_BUILD_ASSERT(sizeof(kmp_hw_attr_t) == sizeof(kmp_affinity_attrs_t)); 691bdd1243dSDimitry Andric #endif 692bdd1243dSDimitry Andric 693fe6060f1SDimitry Andric class kmp_hw_thread_t { 6940b57cec5SDimitry Andric public: 695fe6060f1SDimitry Andric static const int UNKNOWN_ID = -1; 696bdd1243dSDimitry Andric static const int MULTIPLE_ID = -2; 697fe6060f1SDimitry Andric static int compare_ids(const void *a, const void *b); 698fe6060f1SDimitry Andric static int compare_compact(const void *a, const void *b); 699fe6060f1SDimitry Andric int ids[KMP_HW_LAST]; 700fe6060f1SDimitry Andric int sub_ids[KMP_HW_LAST]; 701fe6060f1SDimitry Andric bool leader; 702fe6060f1SDimitry Andric int os_id; 7030eae32dcSDimitry Andric kmp_hw_attr_t attrs; 704349cc55cSDimitry Andric 705fe6060f1SDimitry Andric void print() const; 706fe6060f1SDimitry Andric void clear() { 707fe6060f1SDimitry Andric for (int i = 0; i < (int)KMP_HW_LAST; ++i) 708fe6060f1SDimitry Andric ids[i] = UNKNOWN_ID; 709fe6060f1SDimitry Andric leader = false; 7100eae32dcSDimitry Andric attrs.clear(); 7110b57cec5SDimitry Andric } 7120b57cec5SDimitry Andric }; 7130b57cec5SDimitry Andric 714fe6060f1SDimitry Andric class kmp_topology_t { 715fe6060f1SDimitry Andric 716fe6060f1SDimitry Andric struct flags_t { 717fe6060f1SDimitry Andric int uniform : 1; 718fe6060f1SDimitry Andric int reserved : 31; 7190b57cec5SDimitry Andric }; 7200b57cec5SDimitry Andric 721fe6060f1SDimitry Andric int depth; 722fe6060f1SDimitry Andric 723349cc55cSDimitry Andric // The following arrays are all 'depth' long and have been 724349cc55cSDimitry Andric // allocated to hold up to KMP_HW_LAST number of objects if 725349cc55cSDimitry Andric // needed so layers can be added without reallocation of any array 726fe6060f1SDimitry Andric 727fe6060f1SDimitry Andric // Orderd array of the types in the topology 728fe6060f1SDimitry Andric kmp_hw_t *types; 729fe6060f1SDimitry Andric 730fe6060f1SDimitry Andric // Keep quick topology ratios, for non-uniform topologies, 731fe6060f1SDimitry Andric // this ratio holds the max number of itemAs per itemB 732fe6060f1SDimitry Andric // e.g., [ 4 packages | 6 cores / package | 2 threads / core ] 733fe6060f1SDimitry Andric int *ratio; 734fe6060f1SDimitry Andric 735fe6060f1SDimitry Andric // Storage containing the absolute number of each topology layer 736fe6060f1SDimitry Andric int *count; 737fe6060f1SDimitry Andric 7380eae32dcSDimitry Andric // The number of core efficiencies. This is only useful for hybrid 7390eae32dcSDimitry Andric // topologies. Core efficiencies will range from 0 to num efficiencies - 1 7400eae32dcSDimitry Andric int num_core_efficiencies; 7410eae32dcSDimitry Andric int num_core_types; 742349cc55cSDimitry Andric kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES]; 743349cc55cSDimitry Andric 744fe6060f1SDimitry Andric // The hardware threads array 745fe6060f1SDimitry Andric // hw_threads is num_hw_threads long 746fe6060f1SDimitry Andric // Each hw_thread's ids and sub_ids are depth deep 747fe6060f1SDimitry Andric int num_hw_threads; 748fe6060f1SDimitry Andric kmp_hw_thread_t *hw_threads; 749fe6060f1SDimitry Andric 750fe6060f1SDimitry Andric // Equivalence hash where the key is the hardware topology item 751fe6060f1SDimitry Andric // and the value is the equivalent hardware topology type in the 752fe6060f1SDimitry Andric // types[] array, if the value is KMP_HW_UNKNOWN, then there is no 753fe6060f1SDimitry Andric // known equivalence for the topology type 754fe6060f1SDimitry Andric kmp_hw_t equivalent[KMP_HW_LAST]; 755fe6060f1SDimitry Andric 756fe6060f1SDimitry Andric // Flags describing the topology 757fe6060f1SDimitry Andric flags_t flags; 758fe6060f1SDimitry Andric 759bdd1243dSDimitry Andric // Compact value used during sort_compact() 760bdd1243dSDimitry Andric int compact; 761bdd1243dSDimitry Andric 762349cc55cSDimitry Andric // Insert a new topology layer after allocation 763349cc55cSDimitry Andric void _insert_layer(kmp_hw_t type, const int *ids); 764349cc55cSDimitry Andric 765349cc55cSDimitry Andric #if KMP_GROUP_AFFINITY 766349cc55cSDimitry Andric // Insert topology information about Windows Processor groups 767349cc55cSDimitry Andric void _insert_windows_proc_groups(); 768349cc55cSDimitry Andric #endif 769349cc55cSDimitry Andric 770fe6060f1SDimitry Andric // Count each item & get the num x's per y 771fe6060f1SDimitry Andric // e.g., get the number of cores and the number of threads per core 772fe6060f1SDimitry Andric // for each (x, y) in (KMP_HW_* , KMP_HW_*) 773fe6060f1SDimitry Andric void _gather_enumeration_information(); 774fe6060f1SDimitry Andric 775fe6060f1SDimitry Andric // Remove layers that don't add information to the topology. 776fe6060f1SDimitry Andric // This is done by having the layer take on the id = UNKNOWN_ID (-1) 777fe6060f1SDimitry Andric void _remove_radix1_layers(); 778fe6060f1SDimitry Andric 779fe6060f1SDimitry Andric // Find out if the topology is uniform 780fe6060f1SDimitry Andric void _discover_uniformity(); 781fe6060f1SDimitry Andric 782fe6060f1SDimitry Andric // Set all the sub_ids for each hardware thread 783fe6060f1SDimitry Andric void _set_sub_ids(); 784fe6060f1SDimitry Andric 785fe6060f1SDimitry Andric // Set global affinity variables describing the number of threads per 786fe6060f1SDimitry Andric // core, the number of packages, the number of cores per package, and 787fe6060f1SDimitry Andric // the number of cores. 788fe6060f1SDimitry Andric void _set_globals(); 789fe6060f1SDimitry Andric 790fe6060f1SDimitry Andric // Set the last level cache equivalent type 791fe6060f1SDimitry Andric void _set_last_level_cache(); 792fe6060f1SDimitry Andric 7930eae32dcSDimitry Andric // Return the number of cores with a particular attribute, 'attr'. 7940eae32dcSDimitry Andric // If 'find_all' is true, then find all cores on the machine, otherwise find 7950eae32dcSDimitry Andric // all cores per the layer 'above' 7960eae32dcSDimitry Andric int _get_ncores_with_attr(const kmp_hw_attr_t &attr, int above, 7970eae32dcSDimitry Andric bool find_all = false) const; 798349cc55cSDimitry Andric 799fe6060f1SDimitry Andric public: 800fe6060f1SDimitry Andric // Force use of allocate()/deallocate() 801fe6060f1SDimitry Andric kmp_topology_t() = delete; 802fe6060f1SDimitry Andric kmp_topology_t(const kmp_topology_t &t) = delete; 803fe6060f1SDimitry Andric kmp_topology_t(kmp_topology_t &&t) = delete; 804fe6060f1SDimitry Andric kmp_topology_t &operator=(const kmp_topology_t &t) = delete; 805fe6060f1SDimitry Andric kmp_topology_t &operator=(kmp_topology_t &&t) = delete; 806fe6060f1SDimitry Andric 807fe6060f1SDimitry Andric static kmp_topology_t *allocate(int nproc, int ndepth, const kmp_hw_t *types); 808fe6060f1SDimitry Andric static void deallocate(kmp_topology_t *); 809fe6060f1SDimitry Andric 810fe6060f1SDimitry Andric // Functions used in create_map() routines 811fe6060f1SDimitry Andric kmp_hw_thread_t &at(int index) { 812fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads); 813fe6060f1SDimitry Andric return hw_threads[index]; 814fe6060f1SDimitry Andric } 815fe6060f1SDimitry Andric const kmp_hw_thread_t &at(int index) const { 816fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads); 817fe6060f1SDimitry Andric return hw_threads[index]; 818fe6060f1SDimitry Andric } 819fe6060f1SDimitry Andric int get_num_hw_threads() const { return num_hw_threads; } 820fe6060f1SDimitry Andric void sort_ids() { 821fe6060f1SDimitry Andric qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t), 822fe6060f1SDimitry Andric kmp_hw_thread_t::compare_ids); 823fe6060f1SDimitry Andric } 824fe6060f1SDimitry Andric // Check if the hardware ids are unique, if they are 825fe6060f1SDimitry Andric // return true, otherwise return false 826fe6060f1SDimitry Andric bool check_ids() const; 827fe6060f1SDimitry Andric 828fe6060f1SDimitry Andric // Function to call after the create_map() routine 829fe6060f1SDimitry Andric void canonicalize(); 830fe6060f1SDimitry Andric void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores); 831fe6060f1SDimitry Andric 832fe6060f1SDimitry Andric // Functions used after canonicalize() called 833bdd1243dSDimitry Andric 834bdd1243dSDimitry Andric #if KMP_AFFINITY_SUPPORTED 835bdd1243dSDimitry Andric // Set the granularity for affinity settings 836bdd1243dSDimitry Andric void set_granularity(kmp_affinity_t &stgs) const; 837bdd1243dSDimitry Andric #endif 838fe6060f1SDimitry Andric bool filter_hw_subset(); 839fe6060f1SDimitry Andric bool is_close(int hwt1, int hwt2, int level) const; 840fe6060f1SDimitry Andric bool is_uniform() const { return flags.uniform; } 841fe6060f1SDimitry Andric // Tell whether a type is a valid type in the topology 842fe6060f1SDimitry Andric // returns KMP_HW_UNKNOWN when there is no equivalent type 843fe6060f1SDimitry Andric kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; } 844fe6060f1SDimitry Andric // Set type1 = type2 845fe6060f1SDimitry Andric void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) { 846fe6060f1SDimitry Andric KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1); 847fe6060f1SDimitry Andric KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2); 848fe6060f1SDimitry Andric kmp_hw_t real_type2 = equivalent[type2]; 849fe6060f1SDimitry Andric if (real_type2 == KMP_HW_UNKNOWN) 850fe6060f1SDimitry Andric real_type2 = type2; 851fe6060f1SDimitry Andric equivalent[type1] = real_type2; 852fe6060f1SDimitry Andric // This loop is required since any of the types may have been set to 853fe6060f1SDimitry Andric // be equivalent to type1. They all must be checked and reset to type2. 854fe6060f1SDimitry Andric KMP_FOREACH_HW_TYPE(type) { 855fe6060f1SDimitry Andric if (equivalent[type] == type1) { 856fe6060f1SDimitry Andric equivalent[type] = real_type2; 857fe6060f1SDimitry Andric } 858fe6060f1SDimitry Andric } 859fe6060f1SDimitry Andric } 860fe6060f1SDimitry Andric // Calculate number of types corresponding to level1 861fe6060f1SDimitry Andric // per types corresponding to level2 (e.g., number of threads per core) 862fe6060f1SDimitry Andric int calculate_ratio(int level1, int level2) const { 863fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth); 864fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth); 865fe6060f1SDimitry Andric int r = 1; 866fe6060f1SDimitry Andric for (int level = level1; level > level2; --level) 867fe6060f1SDimitry Andric r *= ratio[level]; 868fe6060f1SDimitry Andric return r; 869fe6060f1SDimitry Andric } 870fe6060f1SDimitry Andric int get_ratio(int level) const { 871fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level >= 0 && level < depth); 872fe6060f1SDimitry Andric return ratio[level]; 873fe6060f1SDimitry Andric } 874fe6060f1SDimitry Andric int get_depth() const { return depth; }; 875fe6060f1SDimitry Andric kmp_hw_t get_type(int level) const { 876fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level >= 0 && level < depth); 877fe6060f1SDimitry Andric return types[level]; 878fe6060f1SDimitry Andric } 879fe6060f1SDimitry Andric int get_level(kmp_hw_t type) const { 880fe6060f1SDimitry Andric KMP_DEBUG_ASSERT_VALID_HW_TYPE(type); 881fe6060f1SDimitry Andric int eq_type = equivalent[type]; 882fe6060f1SDimitry Andric if (eq_type == KMP_HW_UNKNOWN) 8830b57cec5SDimitry Andric return -1; 884fe6060f1SDimitry Andric for (int i = 0; i < depth; ++i) 885fe6060f1SDimitry Andric if (types[i] == eq_type) 886fe6060f1SDimitry Andric return i; 887fe6060f1SDimitry Andric return -1; 8880b57cec5SDimitry Andric } 889fe6060f1SDimitry Andric int get_count(int level) const { 890fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level >= 0 && level < depth); 891fe6060f1SDimitry Andric return count[level]; 8920b57cec5SDimitry Andric } 8930eae32dcSDimitry Andric // Return the total number of cores with attribute 'attr' 8940eae32dcSDimitry Andric int get_ncores_with_attr(const kmp_hw_attr_t &attr) const { 8950eae32dcSDimitry Andric return _get_ncores_with_attr(attr, -1, true); 8960eae32dcSDimitry Andric } 8970eae32dcSDimitry Andric // Return the number of cores with attribute 8980eae32dcSDimitry Andric // 'attr' per topology level 'above' 8990eae32dcSDimitry Andric int get_ncores_with_attr_per(const kmp_hw_attr_t &attr, int above) const { 9000eae32dcSDimitry Andric return _get_ncores_with_attr(attr, above, false); 9010eae32dcSDimitry Andric } 9020eae32dcSDimitry Andric 903fe6060f1SDimitry Andric #if KMP_AFFINITY_SUPPORTED 904bdd1243dSDimitry Andric friend int kmp_hw_thread_t::compare_compact(const void *a, const void *b); 905bdd1243dSDimitry Andric void sort_compact(kmp_affinity_t &affinity) { 906bdd1243dSDimitry Andric compact = affinity.compact; 907fe6060f1SDimitry Andric qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t), 908fe6060f1SDimitry Andric kmp_hw_thread_t::compare_compact); 909fe6060f1SDimitry Andric } 910fe6060f1SDimitry Andric #endif 911fe6060f1SDimitry Andric void print(const char *env_var = "KMP_AFFINITY") const; 912fe6060f1SDimitry Andric void dump() const; 913fe6060f1SDimitry Andric }; 914349cc55cSDimitry Andric extern kmp_topology_t *__kmp_topology; 915fe6060f1SDimitry Andric 916fe6060f1SDimitry Andric class kmp_hw_subset_t { 9170eae32dcSDimitry Andric const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS; 9180eae32dcSDimitry Andric 919fe6060f1SDimitry Andric public: 9200eae32dcSDimitry Andric // Describe a machine topology item in KMP_HW_SUBSET 921fe6060f1SDimitry Andric struct item_t { 922fe6060f1SDimitry Andric kmp_hw_t type; 9230eae32dcSDimitry Andric int num_attrs; 9240eae32dcSDimitry Andric int num[MAX_ATTRS]; 9250eae32dcSDimitry Andric int offset[MAX_ATTRS]; 9260eae32dcSDimitry Andric kmp_hw_attr_t attr[MAX_ATTRS]; 927fe6060f1SDimitry Andric }; 9280eae32dcSDimitry Andric // Put parenthesis around max to avoid accidental use of Windows max macro. 9290eae32dcSDimitry Andric const static int USE_ALL = (std::numeric_limits<int>::max)(); 930fe6060f1SDimitry Andric 931fe6060f1SDimitry Andric private: 932fe6060f1SDimitry Andric int depth; 933fe6060f1SDimitry Andric int capacity; 934fe6060f1SDimitry Andric item_t *items; 935fe6060f1SDimitry Andric kmp_uint64 set; 936fe6060f1SDimitry Andric bool absolute; 937fe6060f1SDimitry Andric // The set must be able to handle up to KMP_HW_LAST number of layers 938fe6060f1SDimitry Andric KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST); 939349cc55cSDimitry Andric // Sorting the KMP_HW_SUBSET items to follow topology order 940349cc55cSDimitry Andric // All unknown topology types will be at the beginning of the subset 941349cc55cSDimitry Andric static int hw_subset_compare(const void *i1, const void *i2) { 942349cc55cSDimitry Andric kmp_hw_t type1 = ((const item_t *)i1)->type; 943349cc55cSDimitry Andric kmp_hw_t type2 = ((const item_t *)i2)->type; 944349cc55cSDimitry Andric int level1 = __kmp_topology->get_level(type1); 945349cc55cSDimitry Andric int level2 = __kmp_topology->get_level(type2); 946349cc55cSDimitry Andric return level1 - level2; 947349cc55cSDimitry Andric } 948fe6060f1SDimitry Andric 949fe6060f1SDimitry Andric public: 950fe6060f1SDimitry Andric // Force use of allocate()/deallocate() 951fe6060f1SDimitry Andric kmp_hw_subset_t() = delete; 952fe6060f1SDimitry Andric kmp_hw_subset_t(const kmp_hw_subset_t &t) = delete; 953fe6060f1SDimitry Andric kmp_hw_subset_t(kmp_hw_subset_t &&t) = delete; 954fe6060f1SDimitry Andric kmp_hw_subset_t &operator=(const kmp_hw_subset_t &t) = delete; 955fe6060f1SDimitry Andric kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) = delete; 956fe6060f1SDimitry Andric 957fe6060f1SDimitry Andric static kmp_hw_subset_t *allocate() { 958fe6060f1SDimitry Andric int initial_capacity = 5; 959fe6060f1SDimitry Andric kmp_hw_subset_t *retval = 960fe6060f1SDimitry Andric (kmp_hw_subset_t *)__kmp_allocate(sizeof(kmp_hw_subset_t)); 961fe6060f1SDimitry Andric retval->depth = 0; 962fe6060f1SDimitry Andric retval->capacity = initial_capacity; 963fe6060f1SDimitry Andric retval->set = 0ull; 964fe6060f1SDimitry Andric retval->absolute = false; 965fe6060f1SDimitry Andric retval->items = (item_t *)__kmp_allocate(sizeof(item_t) * initial_capacity); 966fe6060f1SDimitry Andric return retval; 967fe6060f1SDimitry Andric } 968fe6060f1SDimitry Andric static void deallocate(kmp_hw_subset_t *subset) { 969fe6060f1SDimitry Andric __kmp_free(subset->items); 970fe6060f1SDimitry Andric __kmp_free(subset); 971fe6060f1SDimitry Andric } 972fe6060f1SDimitry Andric void set_absolute() { absolute = true; } 973fe6060f1SDimitry Andric bool is_absolute() const { return absolute; } 9740eae32dcSDimitry Andric void push_back(int num, kmp_hw_t type, int offset, kmp_hw_attr_t attr) { 9750eae32dcSDimitry Andric for (int i = 0; i < depth; ++i) { 9760eae32dcSDimitry Andric // Found an existing item for this layer type 9770eae32dcSDimitry Andric // Add the num, offset, and attr to this item 9780eae32dcSDimitry Andric if (items[i].type == type) { 9790eae32dcSDimitry Andric int idx = items[i].num_attrs++; 9800eae32dcSDimitry Andric if ((size_t)idx >= MAX_ATTRS) 9810eae32dcSDimitry Andric return; 9820eae32dcSDimitry Andric items[i].num[idx] = num; 9830eae32dcSDimitry Andric items[i].offset[idx] = offset; 9840eae32dcSDimitry Andric items[i].attr[idx] = attr; 9850eae32dcSDimitry Andric return; 9860eae32dcSDimitry Andric } 9870eae32dcSDimitry Andric } 988fe6060f1SDimitry Andric if (depth == capacity - 1) { 989fe6060f1SDimitry Andric capacity *= 2; 990fe6060f1SDimitry Andric item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity); 991fe6060f1SDimitry Andric for (int i = 0; i < depth; ++i) 992fe6060f1SDimitry Andric new_items[i] = items[i]; 993fe6060f1SDimitry Andric __kmp_free(items); 994fe6060f1SDimitry Andric items = new_items; 995fe6060f1SDimitry Andric } 9960eae32dcSDimitry Andric items[depth].num_attrs = 1; 997fe6060f1SDimitry Andric items[depth].type = type; 9980eae32dcSDimitry Andric items[depth].num[0] = num; 9990eae32dcSDimitry Andric items[depth].offset[0] = offset; 10000eae32dcSDimitry Andric items[depth].attr[0] = attr; 1001fe6060f1SDimitry Andric depth++; 1002fe6060f1SDimitry Andric set |= (1ull << type); 1003fe6060f1SDimitry Andric } 1004fe6060f1SDimitry Andric int get_depth() const { return depth; } 1005fe6060f1SDimitry Andric const item_t &at(int index) const { 1006fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(index >= 0 && index < depth); 1007fe6060f1SDimitry Andric return items[index]; 1008fe6060f1SDimitry Andric } 1009fe6060f1SDimitry Andric item_t &at(int index) { 1010fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(index >= 0 && index < depth); 1011fe6060f1SDimitry Andric return items[index]; 1012fe6060f1SDimitry Andric } 1013fe6060f1SDimitry Andric void remove(int index) { 1014fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(index >= 0 && index < depth); 1015fe6060f1SDimitry Andric set &= ~(1ull << items[index].type); 1016fe6060f1SDimitry Andric for (int j = index + 1; j < depth; ++j) { 1017fe6060f1SDimitry Andric items[j - 1] = items[j]; 1018fe6060f1SDimitry Andric } 1019fe6060f1SDimitry Andric depth--; 1020fe6060f1SDimitry Andric } 1021349cc55cSDimitry Andric void sort() { 1022349cc55cSDimitry Andric KMP_DEBUG_ASSERT(__kmp_topology); 1023349cc55cSDimitry Andric qsort(items, depth, sizeof(item_t), hw_subset_compare); 1024349cc55cSDimitry Andric } 1025fe6060f1SDimitry Andric bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); } 1026fe6060f1SDimitry Andric void dump() const { 1027fe6060f1SDimitry Andric printf("**********************\n"); 1028fe6060f1SDimitry Andric printf("*** kmp_hw_subset: ***\n"); 1029fe6060f1SDimitry Andric printf("* depth: %d\n", depth); 1030fe6060f1SDimitry Andric printf("* items:\n"); 1031fe6060f1SDimitry Andric for (int i = 0; i < depth; ++i) { 10320eae32dcSDimitry Andric printf(" type: %s\n", __kmp_hw_get_keyword(items[i].type)); 10330eae32dcSDimitry Andric for (int j = 0; j < items[i].num_attrs; ++j) { 10340eae32dcSDimitry Andric printf(" num: %d, offset: %d, attr: ", items[i].num[j], 10350eae32dcSDimitry Andric items[i].offset[j]); 10360eae32dcSDimitry Andric if (!items[i].attr[j]) { 10370eae32dcSDimitry Andric printf(" (none)\n"); 10380eae32dcSDimitry Andric } else { 10390eae32dcSDimitry Andric printf( 10400eae32dcSDimitry Andric " core_type = %s, core_eff = %d\n", 10410eae32dcSDimitry Andric __kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()), 10420eae32dcSDimitry Andric items[i].attr[j].get_core_eff()); 10430eae32dcSDimitry Andric } 10440eae32dcSDimitry Andric } 1045fe6060f1SDimitry Andric } 1046fe6060f1SDimitry Andric printf("* set: 0x%llx\n", set); 1047fe6060f1SDimitry Andric printf("* absolute: %d\n", absolute); 1048fe6060f1SDimitry Andric printf("**********************\n"); 1049fe6060f1SDimitry Andric } 1050fe6060f1SDimitry Andric }; 1051fe6060f1SDimitry Andric extern kmp_hw_subset_t *__kmp_hw_subset; 10520b57cec5SDimitry Andric 10530b57cec5SDimitry Andric /* A structure for holding machine-specific hierarchy info to be computed once 10540b57cec5SDimitry Andric at init. This structure represents a mapping of threads to the actual machine 10550b57cec5SDimitry Andric hierarchy, or to our best guess at what the hierarchy might be, for the 10560b57cec5SDimitry Andric purpose of performing an efficient barrier. In the worst case, when there is 10570b57cec5SDimitry Andric no machine hierarchy information, it produces a tree suitable for a barrier, 10580b57cec5SDimitry Andric similar to the tree used in the hyper barrier. */ 10590b57cec5SDimitry Andric class hierarchy_info { 10600b57cec5SDimitry Andric public: 10610b57cec5SDimitry Andric /* Good default values for number of leaves and branching factor, given no 10620b57cec5SDimitry Andric affinity information. Behaves a bit like hyper barrier. */ 10630b57cec5SDimitry Andric static const kmp_uint32 maxLeaves = 4; 10640b57cec5SDimitry Andric static const kmp_uint32 minBranch = 4; 10650b57cec5SDimitry Andric /** Number of levels in the hierarchy. Typical levels are threads/core, 10660b57cec5SDimitry Andric cores/package or socket, packages/node, nodes/machine, etc. We don't want 10670b57cec5SDimitry Andric to get specific with nomenclature. When the machine is oversubscribed we 10680b57cec5SDimitry Andric add levels to duplicate the hierarchy, doubling the thread capacity of the 10690b57cec5SDimitry Andric hierarchy each time we add a level. */ 10700b57cec5SDimitry Andric kmp_uint32 maxLevels; 10710b57cec5SDimitry Andric 10720b57cec5SDimitry Andric /** This is specifically the depth of the machine configuration hierarchy, in 10730b57cec5SDimitry Andric terms of the number of levels along the longest path from root to any 10740b57cec5SDimitry Andric leaf. It corresponds to the number of entries in numPerLevel if we exclude 10750b57cec5SDimitry Andric all but one trailing 1. */ 10760b57cec5SDimitry Andric kmp_uint32 depth; 10770b57cec5SDimitry Andric kmp_uint32 base_num_threads; 10780b57cec5SDimitry Andric enum init_status { initialized = 0, not_initialized = 1, initializing = 2 }; 10790b57cec5SDimitry Andric volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 10800b57cec5SDimitry Andric // 2=initialization in progress 10810b57cec5SDimitry Andric volatile kmp_int8 resizing; // 0=not resizing, 1=resizing 10820b57cec5SDimitry Andric 10830b57cec5SDimitry Andric /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children 10840b57cec5SDimitry Andric the parent of a node at level i has. For example, if we have a machine 10850b57cec5SDimitry Andric with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel = 10860b57cec5SDimitry Andric {2, 4, 4, 1, 1}. All empty levels are set to 1. */ 10870b57cec5SDimitry Andric kmp_uint32 *numPerLevel; 10880b57cec5SDimitry Andric kmp_uint32 *skipPerLevel; 10890b57cec5SDimitry Andric 1090fe6060f1SDimitry Andric void deriveLevels() { 1091fe6060f1SDimitry Andric int hier_depth = __kmp_topology->get_depth(); 1092fe6060f1SDimitry Andric for (int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) { 1093fe6060f1SDimitry Andric numPerLevel[level] = __kmp_topology->get_ratio(i); 10940b57cec5SDimitry Andric } 10950b57cec5SDimitry Andric } 10960b57cec5SDimitry Andric 10970b57cec5SDimitry Andric hierarchy_info() 10980b57cec5SDimitry Andric : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} 10990b57cec5SDimitry Andric 11000b57cec5SDimitry Andric void fini() { 11010b57cec5SDimitry Andric if (!uninitialized && numPerLevel) { 11020b57cec5SDimitry Andric __kmp_free(numPerLevel); 11030b57cec5SDimitry Andric numPerLevel = NULL; 11040b57cec5SDimitry Andric uninitialized = not_initialized; 11050b57cec5SDimitry Andric } 11060b57cec5SDimitry Andric } 11070b57cec5SDimitry Andric 1108fe6060f1SDimitry Andric void init(int num_addrs) { 11090b57cec5SDimitry Andric kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8( 11100b57cec5SDimitry Andric &uninitialized, not_initialized, initializing); 11110b57cec5SDimitry Andric if (bool_result == 0) { // Wait for initialization 11120b57cec5SDimitry Andric while (TCR_1(uninitialized) != initialized) 11130b57cec5SDimitry Andric KMP_CPU_PAUSE(); 11140b57cec5SDimitry Andric return; 11150b57cec5SDimitry Andric } 11160b57cec5SDimitry Andric KMP_DEBUG_ASSERT(bool_result == 1); 11170b57cec5SDimitry Andric 11180b57cec5SDimitry Andric /* Added explicit initialization of the data fields here to prevent usage of 11190b57cec5SDimitry Andric dirty value observed when static library is re-initialized multiple times 11200b57cec5SDimitry Andric (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses 11210b57cec5SDimitry Andric OpenMP). */ 11220b57cec5SDimitry Andric depth = 1; 11230b57cec5SDimitry Andric resizing = 0; 11240b57cec5SDimitry Andric maxLevels = 7; 11250b57cec5SDimitry Andric numPerLevel = 11260b57cec5SDimitry Andric (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 11270b57cec5SDimitry Andric skipPerLevel = &(numPerLevel[maxLevels]); 11280b57cec5SDimitry Andric for (kmp_uint32 i = 0; i < maxLevels; 11290b57cec5SDimitry Andric ++i) { // init numPerLevel[*] to 1 item per level 11300b57cec5SDimitry Andric numPerLevel[i] = 1; 11310b57cec5SDimitry Andric skipPerLevel[i] = 1; 11320b57cec5SDimitry Andric } 11330b57cec5SDimitry Andric 11340b57cec5SDimitry Andric // Sort table by physical ID 1135fe6060f1SDimitry Andric if (__kmp_topology && __kmp_topology->get_depth() > 0) { 1136fe6060f1SDimitry Andric deriveLevels(); 11370b57cec5SDimitry Andric } else { 11380b57cec5SDimitry Andric numPerLevel[0] = maxLeaves; 11390b57cec5SDimitry Andric numPerLevel[1] = num_addrs / maxLeaves; 11400b57cec5SDimitry Andric if (num_addrs % maxLeaves) 11410b57cec5SDimitry Andric numPerLevel[1]++; 11420b57cec5SDimitry Andric } 11430b57cec5SDimitry Andric 11440b57cec5SDimitry Andric base_num_threads = num_addrs; 11450b57cec5SDimitry Andric for (int i = maxLevels - 1; i >= 0; 11460b57cec5SDimitry Andric --i) // count non-empty levels to get depth 11470b57cec5SDimitry Andric if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' 11480b57cec5SDimitry Andric depth++; 11490b57cec5SDimitry Andric 11500b57cec5SDimitry Andric kmp_uint32 branch = minBranch; 11510b57cec5SDimitry Andric if (numPerLevel[0] == 1) 11520b57cec5SDimitry Andric branch = num_addrs / maxLeaves; 11530b57cec5SDimitry Andric if (branch < minBranch) 11540b57cec5SDimitry Andric branch = minBranch; 11550b57cec5SDimitry Andric for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width 11560b57cec5SDimitry Andric while (numPerLevel[d] > branch || 11570b57cec5SDimitry Andric (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0! 11580b57cec5SDimitry Andric if (numPerLevel[d] & 1) 11590b57cec5SDimitry Andric numPerLevel[d]++; 11600b57cec5SDimitry Andric numPerLevel[d] = numPerLevel[d] >> 1; 11610b57cec5SDimitry Andric if (numPerLevel[d + 1] == 1) 11620b57cec5SDimitry Andric depth++; 11630b57cec5SDimitry Andric numPerLevel[d + 1] = numPerLevel[d + 1] << 1; 11640b57cec5SDimitry Andric } 11650b57cec5SDimitry Andric if (numPerLevel[0] == 1) { 11660b57cec5SDimitry Andric branch = branch >> 1; 11670b57cec5SDimitry Andric if (branch < 4) 11680b57cec5SDimitry Andric branch = minBranch; 11690b57cec5SDimitry Andric } 11700b57cec5SDimitry Andric } 11710b57cec5SDimitry Andric 11720b57cec5SDimitry Andric for (kmp_uint32 i = 1; i < depth; ++i) 11730b57cec5SDimitry Andric skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1]; 11740b57cec5SDimitry Andric // Fill in hierarchy in the case of oversubscription 11750b57cec5SDimitry Andric for (kmp_uint32 i = depth; i < maxLevels; ++i) 11760b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 11770b57cec5SDimitry Andric 11780b57cec5SDimitry Andric uninitialized = initialized; // One writer 11790b57cec5SDimitry Andric } 11800b57cec5SDimitry Andric 11810b57cec5SDimitry Andric // Resize the hierarchy if nproc changes to something larger than before 11820b57cec5SDimitry Andric void resize(kmp_uint32 nproc) { 11830b57cec5SDimitry Andric kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 11840b57cec5SDimitry Andric while (bool_result == 0) { // someone else is trying to resize 11850b57cec5SDimitry Andric KMP_CPU_PAUSE(); 11860b57cec5SDimitry Andric if (nproc <= base_num_threads) // happy with other thread's resize 11870b57cec5SDimitry Andric return; 11880b57cec5SDimitry Andric else // try to resize 11890b57cec5SDimitry Andric bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 11900b57cec5SDimitry Andric } 11910b57cec5SDimitry Andric KMP_DEBUG_ASSERT(bool_result != 0); 11920b57cec5SDimitry Andric if (nproc <= base_num_threads) 11930b57cec5SDimitry Andric return; // happy with other thread's resize 11940b57cec5SDimitry Andric 11950b57cec5SDimitry Andric // Calculate new maxLevels 11960b57cec5SDimitry Andric kmp_uint32 old_sz = skipPerLevel[depth - 1]; 11970b57cec5SDimitry Andric kmp_uint32 incs = 0, old_maxLevels = maxLevels; 11980b57cec5SDimitry Andric // First see if old maxLevels is enough to contain new size 11990b57cec5SDimitry Andric for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) { 12000b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 12010b57cec5SDimitry Andric numPerLevel[i - 1] *= 2; 12020b57cec5SDimitry Andric old_sz *= 2; 12030b57cec5SDimitry Andric depth++; 12040b57cec5SDimitry Andric } 12050b57cec5SDimitry Andric if (nproc > old_sz) { // Not enough space, need to expand hierarchy 12060b57cec5SDimitry Andric while (nproc > old_sz) { 12070b57cec5SDimitry Andric old_sz *= 2; 12080b57cec5SDimitry Andric incs++; 12090b57cec5SDimitry Andric depth++; 12100b57cec5SDimitry Andric } 12110b57cec5SDimitry Andric maxLevels += incs; 12120b57cec5SDimitry Andric 12130b57cec5SDimitry Andric // Resize arrays 12140b57cec5SDimitry Andric kmp_uint32 *old_numPerLevel = numPerLevel; 12150b57cec5SDimitry Andric kmp_uint32 *old_skipPerLevel = skipPerLevel; 12160b57cec5SDimitry Andric numPerLevel = skipPerLevel = NULL; 12170b57cec5SDimitry Andric numPerLevel = 12180b57cec5SDimitry Andric (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 12190b57cec5SDimitry Andric skipPerLevel = &(numPerLevel[maxLevels]); 12200b57cec5SDimitry Andric 12210b57cec5SDimitry Andric // Copy old elements from old arrays 1222e8d8bef9SDimitry Andric for (kmp_uint32 i = 0; i < old_maxLevels; ++i) { 1223e8d8bef9SDimitry Andric // init numPerLevel[*] to 1 item per level 12240b57cec5SDimitry Andric numPerLevel[i] = old_numPerLevel[i]; 12250b57cec5SDimitry Andric skipPerLevel[i] = old_skipPerLevel[i]; 12260b57cec5SDimitry Andric } 12270b57cec5SDimitry Andric 12280b57cec5SDimitry Andric // Init new elements in arrays to 1 1229e8d8bef9SDimitry Andric for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) { 1230e8d8bef9SDimitry Andric // init numPerLevel[*] to 1 item per level 12310b57cec5SDimitry Andric numPerLevel[i] = 1; 12320b57cec5SDimitry Andric skipPerLevel[i] = 1; 12330b57cec5SDimitry Andric } 12340b57cec5SDimitry Andric 12350b57cec5SDimitry Andric // Free old arrays 12360b57cec5SDimitry Andric __kmp_free(old_numPerLevel); 12370b57cec5SDimitry Andric } 12380b57cec5SDimitry Andric 12390b57cec5SDimitry Andric // Fill in oversubscription levels of hierarchy 12400b57cec5SDimitry Andric for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) 12410b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 12420b57cec5SDimitry Andric 12430b57cec5SDimitry Andric base_num_threads = nproc; 12440b57cec5SDimitry Andric resizing = 0; // One writer 12450b57cec5SDimitry Andric } 12460b57cec5SDimitry Andric }; 12470b57cec5SDimitry Andric #endif // KMP_AFFINITY_H 1248