10b57cec5SDimitry Andric /* 20b57cec5SDimitry Andric * kmp_affinity.h -- header for affinity management 30b57cec5SDimitry Andric */ 40b57cec5SDimitry Andric 50b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #ifndef KMP_AFFINITY_H 140b57cec5SDimitry Andric #define KMP_AFFINITY_H 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric #include "kmp.h" 170b57cec5SDimitry Andric #include "kmp_os.h" 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 200b57cec5SDimitry Andric #if KMP_USE_HWLOC 210b57cec5SDimitry Andric class KMPHwlocAffinity : public KMPAffinity { 220b57cec5SDimitry Andric public: 230b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 240b57cec5SDimitry Andric hwloc_cpuset_t mask; 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric public: 270b57cec5SDimitry Andric Mask() { 280b57cec5SDimitry Andric mask = hwloc_bitmap_alloc(); 290b57cec5SDimitry Andric this->zero(); 300b57cec5SDimitry Andric } 310b57cec5SDimitry Andric ~Mask() { hwloc_bitmap_free(mask); } 320b57cec5SDimitry Andric void set(int i) override { hwloc_bitmap_set(mask, i); } 330b57cec5SDimitry Andric bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } 340b57cec5SDimitry Andric void clear(int i) override { hwloc_bitmap_clr(mask, i); } 350b57cec5SDimitry Andric void zero() override { hwloc_bitmap_zero(mask); } 360b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 370b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 380b57cec5SDimitry Andric hwloc_bitmap_copy(mask, convert->mask); 390b57cec5SDimitry Andric } 400b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 410b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 420b57cec5SDimitry Andric hwloc_bitmap_and(mask, mask, convert->mask); 430b57cec5SDimitry Andric } 440b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 450b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 460b57cec5SDimitry Andric hwloc_bitmap_or(mask, mask, convert->mask); 470b57cec5SDimitry Andric } 480b57cec5SDimitry Andric void bitwise_not() override { hwloc_bitmap_not(mask, mask); } 490b57cec5SDimitry Andric int begin() const override { return hwloc_bitmap_first(mask); } 500b57cec5SDimitry Andric int end() const override { return -1; } 510b57cec5SDimitry Andric int next(int previous) const override { 520b57cec5SDimitry Andric return hwloc_bitmap_next(mask, previous); 530b57cec5SDimitry Andric } 540b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 550b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 560b57cec5SDimitry Andric "Illegal get affinity operation when not capable"); 57*e8d8bef9SDimitry Andric long retval = 580b57cec5SDimitry Andric hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 590b57cec5SDimitry Andric if (retval >= 0) { 600b57cec5SDimitry Andric return 0; 610b57cec5SDimitry Andric } 620b57cec5SDimitry Andric int error = errno; 630b57cec5SDimitry Andric if (abort_on_error) { 640b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 650b57cec5SDimitry Andric } 660b57cec5SDimitry Andric return error; 670b57cec5SDimitry Andric } 680b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 690b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 70*e8d8bef9SDimitry Andric "Illegal set affinity operation when not capable"); 71*e8d8bef9SDimitry Andric long retval = 720b57cec5SDimitry Andric hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 730b57cec5SDimitry Andric if (retval >= 0) { 740b57cec5SDimitry Andric return 0; 750b57cec5SDimitry Andric } 760b57cec5SDimitry Andric int error = errno; 770b57cec5SDimitry Andric if (abort_on_error) { 780b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 790b57cec5SDimitry Andric } 800b57cec5SDimitry Andric return error; 810b57cec5SDimitry Andric } 82*e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS 83*e8d8bef9SDimitry Andric int set_process_affinity(bool abort_on_error) const override { 84*e8d8bef9SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 85*e8d8bef9SDimitry Andric "Illegal set process affinity operation when not capable"); 86*e8d8bef9SDimitry Andric int error = 0; 87*e8d8bef9SDimitry Andric const hwloc_topology_support *support = 88*e8d8bef9SDimitry Andric hwloc_topology_get_support(__kmp_hwloc_topology); 89*e8d8bef9SDimitry Andric if (support->cpubind->set_proc_cpubind) { 90*e8d8bef9SDimitry Andric int retval; 91*e8d8bef9SDimitry Andric retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, 92*e8d8bef9SDimitry Andric HWLOC_CPUBIND_PROCESS); 93*e8d8bef9SDimitry Andric if (retval >= 0) 94*e8d8bef9SDimitry Andric return 0; 95*e8d8bef9SDimitry Andric error = errno; 96*e8d8bef9SDimitry Andric if (abort_on_error) 97*e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 98*e8d8bef9SDimitry Andric } 99*e8d8bef9SDimitry Andric return error; 100*e8d8bef9SDimitry Andric } 101*e8d8bef9SDimitry Andric #endif 1020b57cec5SDimitry Andric int get_proc_group() const override { 1030b57cec5SDimitry Andric int group = -1; 1040b57cec5SDimitry Andric #if KMP_OS_WINDOWS 1050b57cec5SDimitry Andric if (__kmp_num_proc_groups == 1) { 1060b57cec5SDimitry Andric return 1; 1070b57cec5SDimitry Andric } 1080b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; i++) { 1090b57cec5SDimitry Andric // On windows, the long type is always 32 bits 1100b57cec5SDimitry Andric unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2); 1110b57cec5SDimitry Andric unsigned long second_32_bits = 1120b57cec5SDimitry Andric hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1); 1130b57cec5SDimitry Andric if (first_32_bits == 0 && second_32_bits == 0) { 1140b57cec5SDimitry Andric continue; 1150b57cec5SDimitry Andric } 1160b57cec5SDimitry Andric if (group >= 0) { 1170b57cec5SDimitry Andric return -1; 1180b57cec5SDimitry Andric } 1190b57cec5SDimitry Andric group = i; 1200b57cec5SDimitry Andric } 1210b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 1220b57cec5SDimitry Andric return group; 1230b57cec5SDimitry Andric } 1240b57cec5SDimitry Andric }; 1250b57cec5SDimitry Andric void determine_capable(const char *var) override { 1260b57cec5SDimitry Andric const hwloc_topology_support *topology_support; 1270b57cec5SDimitry Andric if (__kmp_hwloc_topology == NULL) { 1280b57cec5SDimitry Andric if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) { 1290b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 1300b57cec5SDimitry Andric if (__kmp_affinity_verbose) 1310b57cec5SDimitry Andric KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); 1320b57cec5SDimitry Andric } 1330b57cec5SDimitry Andric if (hwloc_topology_load(__kmp_hwloc_topology) < 0) { 1340b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 1350b57cec5SDimitry Andric if (__kmp_affinity_verbose) 1360b57cec5SDimitry Andric KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); 1370b57cec5SDimitry Andric } 1380b57cec5SDimitry Andric } 1390b57cec5SDimitry Andric topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); 1400b57cec5SDimitry Andric // Is the system capable of setting/getting this thread's affinity? 1410b57cec5SDimitry Andric // Also, is topology discovery possible? (pu indicates ability to discover 1420b57cec5SDimitry Andric // processing units). And finally, were there no errors when calling any 1430b57cec5SDimitry Andric // hwloc_* API functions? 1440b57cec5SDimitry Andric if (topology_support && topology_support->cpubind->set_thisthread_cpubind && 1450b57cec5SDimitry Andric topology_support->cpubind->get_thisthread_cpubind && 1460b57cec5SDimitry Andric topology_support->discovery->pu && !__kmp_hwloc_error) { 1470b57cec5SDimitry Andric // enables affinity according to KMP_AFFINITY_CAPABLE() macro 1480b57cec5SDimitry Andric KMP_AFFINITY_ENABLE(TRUE); 1490b57cec5SDimitry Andric } else { 1500b57cec5SDimitry Andric // indicate that hwloc didn't work and disable affinity 1510b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 1520b57cec5SDimitry Andric KMP_AFFINITY_DISABLE(); 1530b57cec5SDimitry Andric } 1540b57cec5SDimitry Andric } 1550b57cec5SDimitry Andric void bind_thread(int which) override { 1560b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 1570b57cec5SDimitry Andric "Illegal set affinity operation when not capable"); 1580b57cec5SDimitry Andric KMPAffinity::Mask *mask; 1590b57cec5SDimitry Andric KMP_CPU_ALLOC_ON_STACK(mask); 1600b57cec5SDimitry Andric KMP_CPU_ZERO(mask); 1610b57cec5SDimitry Andric KMP_CPU_SET(which, mask); 1620b57cec5SDimitry Andric __kmp_set_system_affinity(mask, TRUE); 1630b57cec5SDimitry Andric KMP_CPU_FREE_FROM_STACK(mask); 1640b57cec5SDimitry Andric } 1650b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 1660b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 1670b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 1680b57cec5SDimitry Andric return new Mask[num]; 1690b57cec5SDimitry Andric } 1700b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 1710b57cec5SDimitry Andric Mask *hwloc_array = static_cast<Mask *>(array); 1720b57cec5SDimitry Andric delete[] hwloc_array; 1730b57cec5SDimitry Andric } 1740b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 1750b57cec5SDimitry Andric int index) override { 1760b57cec5SDimitry Andric Mask *hwloc_array = static_cast<Mask *>(array); 1770b57cec5SDimitry Andric return &(hwloc_array[index]); 1780b57cec5SDimitry Andric } 1790b57cec5SDimitry Andric api_type get_api_type() const override { return HWLOC; } 1800b57cec5SDimitry Andric }; 1810b57cec5SDimitry Andric #endif /* KMP_USE_HWLOC */ 1820b57cec5SDimitry Andric 183489b1cf2SDimitry Andric #if KMP_OS_LINUX || KMP_OS_FREEBSD 1840b57cec5SDimitry Andric #if KMP_OS_LINUX 1850b57cec5SDimitry Andric /* On some of the older OS's that we build on, these constants aren't present 1860b57cec5SDimitry Andric in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on 1870b57cec5SDimitry Andric all systems of the same arch where they are defined, and they cannot change. 1880b57cec5SDimitry Andric stone forever. */ 1890b57cec5SDimitry Andric #include <sys/syscall.h> 1900b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_ARM 1910b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 1920b57cec5SDimitry Andric #define __NR_sched_setaffinity 241 1930b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 241 1940b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 1950b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 1960b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 1970b57cec5SDimitry Andric #define __NR_sched_getaffinity 242 1980b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 242 1990b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2000b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2010b57cec5SDimitry Andric #elif KMP_ARCH_AARCH64 2020b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2030b57cec5SDimitry Andric #define __NR_sched_setaffinity 122 2040b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 122 2050b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2060b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2070b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2080b57cec5SDimitry Andric #define __NR_sched_getaffinity 123 2090b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 123 2100b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2110b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2120b57cec5SDimitry Andric #elif KMP_ARCH_X86_64 2130b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2140b57cec5SDimitry Andric #define __NR_sched_setaffinity 203 2150b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 203 2160b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2170b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2180b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2190b57cec5SDimitry Andric #define __NR_sched_getaffinity 204 2200b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 204 2210b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2220b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2230b57cec5SDimitry Andric #elif KMP_ARCH_PPC64 2240b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2250b57cec5SDimitry Andric #define __NR_sched_setaffinity 222 2260b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 222 2270b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2280b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2290b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2300b57cec5SDimitry Andric #define __NR_sched_getaffinity 223 2310b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 223 2320b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2330b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2340b57cec5SDimitry Andric #elif KMP_ARCH_MIPS 2350b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2360b57cec5SDimitry Andric #define __NR_sched_setaffinity 4239 2370b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 4239 2380b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2390b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2400b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2410b57cec5SDimitry Andric #define __NR_sched_getaffinity 4240 2420b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 4240 2430b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2440b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2450b57cec5SDimitry Andric #elif KMP_ARCH_MIPS64 2460b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2470b57cec5SDimitry Andric #define __NR_sched_setaffinity 5195 2480b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 5195 2490b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2500b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2510b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2520b57cec5SDimitry Andric #define __NR_sched_getaffinity 5196 2530b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 5196 2540b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2550b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2560b57cec5SDimitry Andric #error Unknown or unsupported architecture 2570b57cec5SDimitry Andric #endif /* KMP_ARCH_* */ 258489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD 259489b1cf2SDimitry Andric #include <pthread.h> 260489b1cf2SDimitry Andric #include <pthread_np.h> 261489b1cf2SDimitry Andric #endif 2620b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity { 2630b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 264*e8d8bef9SDimitry Andric typedef unsigned long mask_t; 265*e8d8bef9SDimitry Andric typedef decltype(__kmp_affin_mask_size) mask_size_type; 266*e8d8bef9SDimitry Andric static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 267*e8d8bef9SDimitry Andric static const mask_t ONE = 1; 268*e8d8bef9SDimitry Andric mask_size_type get_num_mask_types() const { 269*e8d8bef9SDimitry Andric return __kmp_affin_mask_size / sizeof(mask_t); 270*e8d8bef9SDimitry Andric } 2710b57cec5SDimitry Andric 2720b57cec5SDimitry Andric public: 2730b57cec5SDimitry Andric mask_t *mask; 2740b57cec5SDimitry Andric Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); } 2750b57cec5SDimitry Andric ~Mask() { 2760b57cec5SDimitry Andric if (mask) 2770b57cec5SDimitry Andric __kmp_free(mask); 2780b57cec5SDimitry Andric } 2790b57cec5SDimitry Andric void set(int i) override { 280*e8d8bef9SDimitry Andric mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T)); 2810b57cec5SDimitry Andric } 2820b57cec5SDimitry Andric bool is_set(int i) const override { 283*e8d8bef9SDimitry Andric return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T))); 2840b57cec5SDimitry Andric } 2850b57cec5SDimitry Andric void clear(int i) override { 286*e8d8bef9SDimitry Andric mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T)); 2870b57cec5SDimitry Andric } 2880b57cec5SDimitry Andric void zero() override { 289*e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 290*e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 291*e8d8bef9SDimitry Andric mask[i] = (mask_t)0; 2920b57cec5SDimitry Andric } 2930b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 2940b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 295*e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 296*e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 2970b57cec5SDimitry Andric mask[i] = convert->mask[i]; 2980b57cec5SDimitry Andric } 2990b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 3000b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 301*e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 302*e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 3030b57cec5SDimitry Andric mask[i] &= convert->mask[i]; 3040b57cec5SDimitry Andric } 3050b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 3060b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 307*e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 308*e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 3090b57cec5SDimitry Andric mask[i] |= convert->mask[i]; 3100b57cec5SDimitry Andric } 3110b57cec5SDimitry Andric void bitwise_not() override { 312*e8d8bef9SDimitry Andric mask_size_type e = get_num_mask_types(); 313*e8d8bef9SDimitry Andric for (mask_size_type i = 0; i < e; ++i) 3140b57cec5SDimitry Andric mask[i] = ~(mask[i]); 3150b57cec5SDimitry Andric } 3160b57cec5SDimitry Andric int begin() const override { 3170b57cec5SDimitry Andric int retval = 0; 3180b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 3190b57cec5SDimitry Andric ++retval; 3200b57cec5SDimitry Andric return retval; 3210b57cec5SDimitry Andric } 322*e8d8bef9SDimitry Andric int end() const override { 323*e8d8bef9SDimitry Andric int e; 324*e8d8bef9SDimitry Andric __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e); 325*e8d8bef9SDimitry Andric return e; 326*e8d8bef9SDimitry Andric } 3270b57cec5SDimitry Andric int next(int previous) const override { 3280b57cec5SDimitry Andric int retval = previous + 1; 3290b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 3300b57cec5SDimitry Andric ++retval; 3310b57cec5SDimitry Andric return retval; 3320b57cec5SDimitry Andric } 3330b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 3340b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 3350b57cec5SDimitry Andric "Illegal get affinity operation when not capable"); 336489b1cf2SDimitry Andric #if KMP_OS_LINUX 337*e8d8bef9SDimitry Andric long retval = 3380b57cec5SDimitry Andric syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask); 339489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD 3405ffd83dbSDimitry Andric int r = 341489b1cf2SDimitry Andric pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask)); 3425ffd83dbSDimitry Andric int retval = (r == 0 ? 0 : -1); 343489b1cf2SDimitry Andric #endif 3440b57cec5SDimitry Andric if (retval >= 0) { 3450b57cec5SDimitry Andric return 0; 3460b57cec5SDimitry Andric } 3470b57cec5SDimitry Andric int error = errno; 3480b57cec5SDimitry Andric if (abort_on_error) { 3490b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 3500b57cec5SDimitry Andric } 3510b57cec5SDimitry Andric return error; 3520b57cec5SDimitry Andric } 3530b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 3540b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 355*e8d8bef9SDimitry Andric "Illegal set affinity operation when not capable"); 356489b1cf2SDimitry Andric #if KMP_OS_LINUX 357*e8d8bef9SDimitry Andric long retval = 3580b57cec5SDimitry Andric syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask); 359489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD 3605ffd83dbSDimitry Andric int r = 361489b1cf2SDimitry Andric pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask)); 3625ffd83dbSDimitry Andric int retval = (r == 0 ? 0 : -1); 363489b1cf2SDimitry Andric #endif 3640b57cec5SDimitry Andric if (retval >= 0) { 3650b57cec5SDimitry Andric return 0; 3660b57cec5SDimitry Andric } 3670b57cec5SDimitry Andric int error = errno; 3680b57cec5SDimitry Andric if (abort_on_error) { 3690b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 3700b57cec5SDimitry Andric } 3710b57cec5SDimitry Andric return error; 3720b57cec5SDimitry Andric } 3730b57cec5SDimitry Andric }; 3740b57cec5SDimitry Andric void determine_capable(const char *env_var) override { 3750b57cec5SDimitry Andric __kmp_affinity_determine_capable(env_var); 3760b57cec5SDimitry Andric } 3770b57cec5SDimitry Andric void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 3780b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { 3790b57cec5SDimitry Andric KMPNativeAffinity::Mask *retval = new Mask(); 3800b57cec5SDimitry Andric return retval; 3810b57cec5SDimitry Andric } 3820b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { 3830b57cec5SDimitry Andric KMPNativeAffinity::Mask *native_mask = 3840b57cec5SDimitry Andric static_cast<KMPNativeAffinity::Mask *>(m); 3850b57cec5SDimitry Andric delete native_mask; 3860b57cec5SDimitry Andric } 3870b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 3880b57cec5SDimitry Andric return new Mask[num]; 3890b57cec5SDimitry Andric } 3900b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 3910b57cec5SDimitry Andric Mask *linux_array = static_cast<Mask *>(array); 3920b57cec5SDimitry Andric delete[] linux_array; 3930b57cec5SDimitry Andric } 3940b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 3950b57cec5SDimitry Andric int index) override { 3960b57cec5SDimitry Andric Mask *linux_array = static_cast<Mask *>(array); 3970b57cec5SDimitry Andric return &(linux_array[index]); 3980b57cec5SDimitry Andric } 3990b57cec5SDimitry Andric api_type get_api_type() const override { return NATIVE_OS; } 4000b57cec5SDimitry Andric }; 401489b1cf2SDimitry Andric #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */ 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric #if KMP_OS_WINDOWS 4040b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity { 4050b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 4060b57cec5SDimitry Andric typedef ULONG_PTR mask_t; 4070b57cec5SDimitry Andric static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 4080b57cec5SDimitry Andric mask_t *mask; 4090b57cec5SDimitry Andric 4100b57cec5SDimitry Andric public: 4110b57cec5SDimitry Andric Mask() { 4120b57cec5SDimitry Andric mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups); 4130b57cec5SDimitry Andric } 4140b57cec5SDimitry Andric ~Mask() { 4150b57cec5SDimitry Andric if (mask) 4160b57cec5SDimitry Andric __kmp_free(mask); 4170b57cec5SDimitry Andric } 4180b57cec5SDimitry Andric void set(int i) override { 4190b57cec5SDimitry Andric mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); 4200b57cec5SDimitry Andric } 4210b57cec5SDimitry Andric bool is_set(int i) const override { 4220b57cec5SDimitry Andric return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); 4230b57cec5SDimitry Andric } 4240b57cec5SDimitry Andric void clear(int i) override { 4250b57cec5SDimitry Andric mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); 4260b57cec5SDimitry Andric } 4270b57cec5SDimitry Andric void zero() override { 4280b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4290b57cec5SDimitry Andric mask[i] = 0; 4300b57cec5SDimitry Andric } 4310b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 4320b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 4330b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4340b57cec5SDimitry Andric mask[i] = convert->mask[i]; 4350b57cec5SDimitry Andric } 4360b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 4370b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 4380b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4390b57cec5SDimitry Andric mask[i] &= convert->mask[i]; 4400b57cec5SDimitry Andric } 4410b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 4420b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 4430b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4440b57cec5SDimitry Andric mask[i] |= convert->mask[i]; 4450b57cec5SDimitry Andric } 4460b57cec5SDimitry Andric void bitwise_not() override { 4470b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4480b57cec5SDimitry Andric mask[i] = ~(mask[i]); 4490b57cec5SDimitry Andric } 4500b57cec5SDimitry Andric int begin() const override { 4510b57cec5SDimitry Andric int retval = 0; 4520b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 4530b57cec5SDimitry Andric ++retval; 4540b57cec5SDimitry Andric return retval; 4550b57cec5SDimitry Andric } 4560b57cec5SDimitry Andric int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; } 4570b57cec5SDimitry Andric int next(int previous) const override { 4580b57cec5SDimitry Andric int retval = previous + 1; 4590b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 4600b57cec5SDimitry Andric ++retval; 4610b57cec5SDimitry Andric return retval; 4620b57cec5SDimitry Andric } 463*e8d8bef9SDimitry Andric int set_process_affinity(bool abort_on_error) const override { 464*e8d8bef9SDimitry Andric if (__kmp_num_proc_groups <= 1) { 465*e8d8bef9SDimitry Andric if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) { 466*e8d8bef9SDimitry Andric DWORD error = GetLastError(); 467*e8d8bef9SDimitry Andric if (abort_on_error) { 468*e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 469*e8d8bef9SDimitry Andric __kmp_msg_null); 470*e8d8bef9SDimitry Andric } 471*e8d8bef9SDimitry Andric return error; 472*e8d8bef9SDimitry Andric } 473*e8d8bef9SDimitry Andric } 474*e8d8bef9SDimitry Andric return 0; 475*e8d8bef9SDimitry Andric } 4760b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 4770b57cec5SDimitry Andric if (__kmp_num_proc_groups > 1) { 4780b57cec5SDimitry Andric // Check for a valid mask. 4790b57cec5SDimitry Andric GROUP_AFFINITY ga; 4800b57cec5SDimitry Andric int group = get_proc_group(); 4810b57cec5SDimitry Andric if (group < 0) { 4820b57cec5SDimitry Andric if (abort_on_error) { 4830b57cec5SDimitry Andric KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); 4840b57cec5SDimitry Andric } 4850b57cec5SDimitry Andric return -1; 4860b57cec5SDimitry Andric } 4870b57cec5SDimitry Andric // Transform the bit vector into a GROUP_AFFINITY struct 4880b57cec5SDimitry Andric // and make the system call to set affinity. 4890b57cec5SDimitry Andric ga.Group = group; 4900b57cec5SDimitry Andric ga.Mask = mask[group]; 4910b57cec5SDimitry Andric ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; 4920b57cec5SDimitry Andric 4930b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); 4940b57cec5SDimitry Andric if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { 4950b57cec5SDimitry Andric DWORD error = GetLastError(); 4960b57cec5SDimitry Andric if (abort_on_error) { 4970b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 4980b57cec5SDimitry Andric __kmp_msg_null); 4990b57cec5SDimitry Andric } 5000b57cec5SDimitry Andric return error; 5010b57cec5SDimitry Andric } 5020b57cec5SDimitry Andric } else { 5030b57cec5SDimitry Andric if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) { 5040b57cec5SDimitry Andric DWORD error = GetLastError(); 5050b57cec5SDimitry Andric if (abort_on_error) { 5060b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 5070b57cec5SDimitry Andric __kmp_msg_null); 5080b57cec5SDimitry Andric } 5090b57cec5SDimitry Andric return error; 5100b57cec5SDimitry Andric } 5110b57cec5SDimitry Andric } 5120b57cec5SDimitry Andric return 0; 5130b57cec5SDimitry Andric } 5140b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 5150b57cec5SDimitry Andric if (__kmp_num_proc_groups > 1) { 5160b57cec5SDimitry Andric this->zero(); 5170b57cec5SDimitry Andric GROUP_AFFINITY ga; 5180b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); 5190b57cec5SDimitry Andric if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { 5200b57cec5SDimitry Andric DWORD error = GetLastError(); 5210b57cec5SDimitry Andric if (abort_on_error) { 5220b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), 5230b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 5240b57cec5SDimitry Andric } 5250b57cec5SDimitry Andric return error; 5260b57cec5SDimitry Andric } 5270b57cec5SDimitry Andric if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || 5280b57cec5SDimitry Andric (ga.Mask == 0)) { 5290b57cec5SDimitry Andric return -1; 5300b57cec5SDimitry Andric } 5310b57cec5SDimitry Andric mask[ga.Group] = ga.Mask; 5320b57cec5SDimitry Andric } else { 5330b57cec5SDimitry Andric mask_t newMask, sysMask, retval; 5340b57cec5SDimitry Andric if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { 5350b57cec5SDimitry Andric DWORD error = GetLastError(); 5360b57cec5SDimitry Andric if (abort_on_error) { 5370b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"), 5380b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 5390b57cec5SDimitry Andric } 5400b57cec5SDimitry Andric return error; 5410b57cec5SDimitry Andric } 5420b57cec5SDimitry Andric retval = SetThreadAffinityMask(GetCurrentThread(), newMask); 5430b57cec5SDimitry Andric if (!retval) { 5440b57cec5SDimitry Andric DWORD error = GetLastError(); 5450b57cec5SDimitry Andric if (abort_on_error) { 5460b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 5470b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 5480b57cec5SDimitry Andric } 5490b57cec5SDimitry Andric return error; 5500b57cec5SDimitry Andric } 5510b57cec5SDimitry Andric newMask = SetThreadAffinityMask(GetCurrentThread(), retval); 5520b57cec5SDimitry Andric if (!newMask) { 5530b57cec5SDimitry Andric DWORD error = GetLastError(); 5540b57cec5SDimitry Andric if (abort_on_error) { 5550b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 5560b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 5570b57cec5SDimitry Andric } 5580b57cec5SDimitry Andric } 5590b57cec5SDimitry Andric *mask = retval; 5600b57cec5SDimitry Andric } 5610b57cec5SDimitry Andric return 0; 5620b57cec5SDimitry Andric } 5630b57cec5SDimitry Andric int get_proc_group() const override { 5640b57cec5SDimitry Andric int group = -1; 5650b57cec5SDimitry Andric if (__kmp_num_proc_groups == 1) { 5660b57cec5SDimitry Andric return 1; 5670b57cec5SDimitry Andric } 5680b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; i++) { 5690b57cec5SDimitry Andric if (mask[i] == 0) 5700b57cec5SDimitry Andric continue; 5710b57cec5SDimitry Andric if (group >= 0) 5720b57cec5SDimitry Andric return -1; 5730b57cec5SDimitry Andric group = i; 5740b57cec5SDimitry Andric } 5750b57cec5SDimitry Andric return group; 5760b57cec5SDimitry Andric } 5770b57cec5SDimitry Andric }; 5780b57cec5SDimitry Andric void determine_capable(const char *env_var) override { 5790b57cec5SDimitry Andric __kmp_affinity_determine_capable(env_var); 5800b57cec5SDimitry Andric } 5810b57cec5SDimitry Andric void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 5820b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 5830b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 5840b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 5850b57cec5SDimitry Andric return new Mask[num]; 5860b57cec5SDimitry Andric } 5870b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 5880b57cec5SDimitry Andric Mask *windows_array = static_cast<Mask *>(array); 5890b57cec5SDimitry Andric delete[] windows_array; 5900b57cec5SDimitry Andric } 5910b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 5920b57cec5SDimitry Andric int index) override { 5930b57cec5SDimitry Andric Mask *windows_array = static_cast<Mask *>(array); 5940b57cec5SDimitry Andric return &(windows_array[index]); 5950b57cec5SDimitry Andric } 5960b57cec5SDimitry Andric api_type get_api_type() const override { return NATIVE_OS; } 5970b57cec5SDimitry Andric }; 5980b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 5990b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 6000b57cec5SDimitry Andric 6010b57cec5SDimitry Andric class Address { 6020b57cec5SDimitry Andric public: 6030b57cec5SDimitry Andric static const unsigned maxDepth = 32; 6040b57cec5SDimitry Andric unsigned labels[maxDepth]; 6050b57cec5SDimitry Andric unsigned childNums[maxDepth]; 6060b57cec5SDimitry Andric unsigned depth; 6070b57cec5SDimitry Andric unsigned leader; 6080b57cec5SDimitry Andric Address(unsigned _depth) : depth(_depth), leader(FALSE) {} 6090b57cec5SDimitry Andric Address &operator=(const Address &b) { 6100b57cec5SDimitry Andric depth = b.depth; 6110b57cec5SDimitry Andric for (unsigned i = 0; i < depth; i++) { 6120b57cec5SDimitry Andric labels[i] = b.labels[i]; 6130b57cec5SDimitry Andric childNums[i] = b.childNums[i]; 6140b57cec5SDimitry Andric } 6150b57cec5SDimitry Andric leader = FALSE; 6160b57cec5SDimitry Andric return *this; 6170b57cec5SDimitry Andric } 6180b57cec5SDimitry Andric bool operator==(const Address &b) const { 6190b57cec5SDimitry Andric if (depth != b.depth) 6200b57cec5SDimitry Andric return false; 6210b57cec5SDimitry Andric for (unsigned i = 0; i < depth; i++) 6220b57cec5SDimitry Andric if (labels[i] != b.labels[i]) 6230b57cec5SDimitry Andric return false; 6240b57cec5SDimitry Andric return true; 6250b57cec5SDimitry Andric } 6260b57cec5SDimitry Andric bool isClose(const Address &b, int level) const { 6270b57cec5SDimitry Andric if (depth != b.depth) 6280b57cec5SDimitry Andric return false; 6290b57cec5SDimitry Andric if ((unsigned)level >= depth) 6300b57cec5SDimitry Andric return true; 6310b57cec5SDimitry Andric for (unsigned i = 0; i < (depth - level); i++) 6320b57cec5SDimitry Andric if (labels[i] != b.labels[i]) 6330b57cec5SDimitry Andric return false; 6340b57cec5SDimitry Andric return true; 6350b57cec5SDimitry Andric } 6360b57cec5SDimitry Andric bool operator!=(const Address &b) const { return !operator==(b); } 6370b57cec5SDimitry Andric void print() const { 6380b57cec5SDimitry Andric unsigned i; 6390b57cec5SDimitry Andric printf("Depth: %u --- ", depth); 6400b57cec5SDimitry Andric for (i = 0; i < depth; i++) { 6410b57cec5SDimitry Andric printf("%u ", labels[i]); 6420b57cec5SDimitry Andric } 6430b57cec5SDimitry Andric } 6440b57cec5SDimitry Andric }; 6450b57cec5SDimitry Andric 6460b57cec5SDimitry Andric class AddrUnsPair { 6470b57cec5SDimitry Andric public: 6480b57cec5SDimitry Andric Address first; 6490b57cec5SDimitry Andric unsigned second; 6500b57cec5SDimitry Andric AddrUnsPair(Address _first, unsigned _second) 6510b57cec5SDimitry Andric : first(_first), second(_second) {} 6520b57cec5SDimitry Andric AddrUnsPair &operator=(const AddrUnsPair &b) { 6530b57cec5SDimitry Andric first = b.first; 6540b57cec5SDimitry Andric second = b.second; 6550b57cec5SDimitry Andric return *this; 6560b57cec5SDimitry Andric } 6570b57cec5SDimitry Andric void print() const { 6580b57cec5SDimitry Andric printf("first = "); 6590b57cec5SDimitry Andric first.print(); 6600b57cec5SDimitry Andric printf(" --- second = %u", second); 6610b57cec5SDimitry Andric } 6620b57cec5SDimitry Andric bool operator==(const AddrUnsPair &b) const { 6630b57cec5SDimitry Andric if (first != b.first) 6640b57cec5SDimitry Andric return false; 6650b57cec5SDimitry Andric if (second != b.second) 6660b57cec5SDimitry Andric return false; 6670b57cec5SDimitry Andric return true; 6680b57cec5SDimitry Andric } 6690b57cec5SDimitry Andric bool operator!=(const AddrUnsPair &b) const { return !operator==(b); } 6700b57cec5SDimitry Andric }; 6710b57cec5SDimitry Andric 6720b57cec5SDimitry Andric static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) { 6730b57cec5SDimitry Andric const Address *aa = &(((const AddrUnsPair *)a)->first); 6740b57cec5SDimitry Andric const Address *bb = &(((const AddrUnsPair *)b)->first); 6750b57cec5SDimitry Andric unsigned depth = aa->depth; 6760b57cec5SDimitry Andric unsigned i; 6770b57cec5SDimitry Andric KMP_DEBUG_ASSERT(depth == bb->depth); 6780b57cec5SDimitry Andric for (i = 0; i < depth; i++) { 6790b57cec5SDimitry Andric if (aa->labels[i] < bb->labels[i]) 6800b57cec5SDimitry Andric return -1; 6810b57cec5SDimitry Andric if (aa->labels[i] > bb->labels[i]) 6820b57cec5SDimitry Andric return 1; 6830b57cec5SDimitry Andric } 6840b57cec5SDimitry Andric return 0; 6850b57cec5SDimitry Andric } 6860b57cec5SDimitry Andric 6870b57cec5SDimitry Andric /* A structure for holding machine-specific hierarchy info to be computed once 6880b57cec5SDimitry Andric at init. This structure represents a mapping of threads to the actual machine 6890b57cec5SDimitry Andric hierarchy, or to our best guess at what the hierarchy might be, for the 6900b57cec5SDimitry Andric purpose of performing an efficient barrier. In the worst case, when there is 6910b57cec5SDimitry Andric no machine hierarchy information, it produces a tree suitable for a barrier, 6920b57cec5SDimitry Andric similar to the tree used in the hyper barrier. */ 6930b57cec5SDimitry Andric class hierarchy_info { 6940b57cec5SDimitry Andric public: 6950b57cec5SDimitry Andric /* Good default values for number of leaves and branching factor, given no 6960b57cec5SDimitry Andric affinity information. Behaves a bit like hyper barrier. */ 6970b57cec5SDimitry Andric static const kmp_uint32 maxLeaves = 4; 6980b57cec5SDimitry Andric static const kmp_uint32 minBranch = 4; 6990b57cec5SDimitry Andric /** Number of levels in the hierarchy. Typical levels are threads/core, 7000b57cec5SDimitry Andric cores/package or socket, packages/node, nodes/machine, etc. We don't want 7010b57cec5SDimitry Andric to get specific with nomenclature. When the machine is oversubscribed we 7020b57cec5SDimitry Andric add levels to duplicate the hierarchy, doubling the thread capacity of the 7030b57cec5SDimitry Andric hierarchy each time we add a level. */ 7040b57cec5SDimitry Andric kmp_uint32 maxLevels; 7050b57cec5SDimitry Andric 7060b57cec5SDimitry Andric /** This is specifically the depth of the machine configuration hierarchy, in 7070b57cec5SDimitry Andric terms of the number of levels along the longest path from root to any 7080b57cec5SDimitry Andric leaf. It corresponds to the number of entries in numPerLevel if we exclude 7090b57cec5SDimitry Andric all but one trailing 1. */ 7100b57cec5SDimitry Andric kmp_uint32 depth; 7110b57cec5SDimitry Andric kmp_uint32 base_num_threads; 7120b57cec5SDimitry Andric enum init_status { initialized = 0, not_initialized = 1, initializing = 2 }; 7130b57cec5SDimitry Andric volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 7140b57cec5SDimitry Andric // 2=initialization in progress 7150b57cec5SDimitry Andric volatile kmp_int8 resizing; // 0=not resizing, 1=resizing 7160b57cec5SDimitry Andric 7170b57cec5SDimitry Andric /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children 7180b57cec5SDimitry Andric the parent of a node at level i has. For example, if we have a machine 7190b57cec5SDimitry Andric with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel = 7200b57cec5SDimitry Andric {2, 4, 4, 1, 1}. All empty levels are set to 1. */ 7210b57cec5SDimitry Andric kmp_uint32 *numPerLevel; 7220b57cec5SDimitry Andric kmp_uint32 *skipPerLevel; 7230b57cec5SDimitry Andric 7240b57cec5SDimitry Andric void deriveLevels(AddrUnsPair *adr2os, int num_addrs) { 7250b57cec5SDimitry Andric int hier_depth = adr2os[0].first.depth; 7260b57cec5SDimitry Andric int level = 0; 7270b57cec5SDimitry Andric for (int i = hier_depth - 1; i >= 0; --i) { 7280b57cec5SDimitry Andric int max = -1; 7290b57cec5SDimitry Andric for (int j = 0; j < num_addrs; ++j) { 7300b57cec5SDimitry Andric int next = adr2os[j].first.childNums[i]; 7310b57cec5SDimitry Andric if (next > max) 7320b57cec5SDimitry Andric max = next; 7330b57cec5SDimitry Andric } 7340b57cec5SDimitry Andric numPerLevel[level] = max + 1; 7350b57cec5SDimitry Andric ++level; 7360b57cec5SDimitry Andric } 7370b57cec5SDimitry Andric } 7380b57cec5SDimitry Andric 7390b57cec5SDimitry Andric hierarchy_info() 7400b57cec5SDimitry Andric : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} 7410b57cec5SDimitry Andric 7420b57cec5SDimitry Andric void fini() { 7430b57cec5SDimitry Andric if (!uninitialized && numPerLevel) { 7440b57cec5SDimitry Andric __kmp_free(numPerLevel); 7450b57cec5SDimitry Andric numPerLevel = NULL; 7460b57cec5SDimitry Andric uninitialized = not_initialized; 7470b57cec5SDimitry Andric } 7480b57cec5SDimitry Andric } 7490b57cec5SDimitry Andric 7500b57cec5SDimitry Andric void init(AddrUnsPair *adr2os, int num_addrs) { 7510b57cec5SDimitry Andric kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8( 7520b57cec5SDimitry Andric &uninitialized, not_initialized, initializing); 7530b57cec5SDimitry Andric if (bool_result == 0) { // Wait for initialization 7540b57cec5SDimitry Andric while (TCR_1(uninitialized) != initialized) 7550b57cec5SDimitry Andric KMP_CPU_PAUSE(); 7560b57cec5SDimitry Andric return; 7570b57cec5SDimitry Andric } 7580b57cec5SDimitry Andric KMP_DEBUG_ASSERT(bool_result == 1); 7590b57cec5SDimitry Andric 7600b57cec5SDimitry Andric /* Added explicit initialization of the data fields here to prevent usage of 7610b57cec5SDimitry Andric dirty value observed when static library is re-initialized multiple times 7620b57cec5SDimitry Andric (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses 7630b57cec5SDimitry Andric OpenMP). */ 7640b57cec5SDimitry Andric depth = 1; 7650b57cec5SDimitry Andric resizing = 0; 7660b57cec5SDimitry Andric maxLevels = 7; 7670b57cec5SDimitry Andric numPerLevel = 7680b57cec5SDimitry Andric (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 7690b57cec5SDimitry Andric skipPerLevel = &(numPerLevel[maxLevels]); 7700b57cec5SDimitry Andric for (kmp_uint32 i = 0; i < maxLevels; 7710b57cec5SDimitry Andric ++i) { // init numPerLevel[*] to 1 item per level 7720b57cec5SDimitry Andric numPerLevel[i] = 1; 7730b57cec5SDimitry Andric skipPerLevel[i] = 1; 7740b57cec5SDimitry Andric } 7750b57cec5SDimitry Andric 7760b57cec5SDimitry Andric // Sort table by physical ID 7770b57cec5SDimitry Andric if (adr2os) { 7780b57cec5SDimitry Andric qsort(adr2os, num_addrs, sizeof(*adr2os), 7790b57cec5SDimitry Andric __kmp_affinity_cmp_Address_labels); 7800b57cec5SDimitry Andric deriveLevels(adr2os, num_addrs); 7810b57cec5SDimitry Andric } else { 7820b57cec5SDimitry Andric numPerLevel[0] = maxLeaves; 7830b57cec5SDimitry Andric numPerLevel[1] = num_addrs / maxLeaves; 7840b57cec5SDimitry Andric if (num_addrs % maxLeaves) 7850b57cec5SDimitry Andric numPerLevel[1]++; 7860b57cec5SDimitry Andric } 7870b57cec5SDimitry Andric 7880b57cec5SDimitry Andric base_num_threads = num_addrs; 7890b57cec5SDimitry Andric for (int i = maxLevels - 1; i >= 0; 7900b57cec5SDimitry Andric --i) // count non-empty levels to get depth 7910b57cec5SDimitry Andric if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' 7920b57cec5SDimitry Andric depth++; 7930b57cec5SDimitry Andric 7940b57cec5SDimitry Andric kmp_uint32 branch = minBranch; 7950b57cec5SDimitry Andric if (numPerLevel[0] == 1) 7960b57cec5SDimitry Andric branch = num_addrs / maxLeaves; 7970b57cec5SDimitry Andric if (branch < minBranch) 7980b57cec5SDimitry Andric branch = minBranch; 7990b57cec5SDimitry Andric for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width 8000b57cec5SDimitry Andric while (numPerLevel[d] > branch || 8010b57cec5SDimitry Andric (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0! 8020b57cec5SDimitry Andric if (numPerLevel[d] & 1) 8030b57cec5SDimitry Andric numPerLevel[d]++; 8040b57cec5SDimitry Andric numPerLevel[d] = numPerLevel[d] >> 1; 8050b57cec5SDimitry Andric if (numPerLevel[d + 1] == 1) 8060b57cec5SDimitry Andric depth++; 8070b57cec5SDimitry Andric numPerLevel[d + 1] = numPerLevel[d + 1] << 1; 8080b57cec5SDimitry Andric } 8090b57cec5SDimitry Andric if (numPerLevel[0] == 1) { 8100b57cec5SDimitry Andric branch = branch >> 1; 8110b57cec5SDimitry Andric if (branch < 4) 8120b57cec5SDimitry Andric branch = minBranch; 8130b57cec5SDimitry Andric } 8140b57cec5SDimitry Andric } 8150b57cec5SDimitry Andric 8160b57cec5SDimitry Andric for (kmp_uint32 i = 1; i < depth; ++i) 8170b57cec5SDimitry Andric skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1]; 8180b57cec5SDimitry Andric // Fill in hierarchy in the case of oversubscription 8190b57cec5SDimitry Andric for (kmp_uint32 i = depth; i < maxLevels; ++i) 8200b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 8210b57cec5SDimitry Andric 8220b57cec5SDimitry Andric uninitialized = initialized; // One writer 8230b57cec5SDimitry Andric } 8240b57cec5SDimitry Andric 8250b57cec5SDimitry Andric // Resize the hierarchy if nproc changes to something larger than before 8260b57cec5SDimitry Andric void resize(kmp_uint32 nproc) { 8270b57cec5SDimitry Andric kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 8280b57cec5SDimitry Andric while (bool_result == 0) { // someone else is trying to resize 8290b57cec5SDimitry Andric KMP_CPU_PAUSE(); 8300b57cec5SDimitry Andric if (nproc <= base_num_threads) // happy with other thread's resize 8310b57cec5SDimitry Andric return; 8320b57cec5SDimitry Andric else // try to resize 8330b57cec5SDimitry Andric bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 8340b57cec5SDimitry Andric } 8350b57cec5SDimitry Andric KMP_DEBUG_ASSERT(bool_result != 0); 8360b57cec5SDimitry Andric if (nproc <= base_num_threads) 8370b57cec5SDimitry Andric return; // happy with other thread's resize 8380b57cec5SDimitry Andric 8390b57cec5SDimitry Andric // Calculate new maxLevels 8400b57cec5SDimitry Andric kmp_uint32 old_sz = skipPerLevel[depth - 1]; 8410b57cec5SDimitry Andric kmp_uint32 incs = 0, old_maxLevels = maxLevels; 8420b57cec5SDimitry Andric // First see if old maxLevels is enough to contain new size 8430b57cec5SDimitry Andric for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) { 8440b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 8450b57cec5SDimitry Andric numPerLevel[i - 1] *= 2; 8460b57cec5SDimitry Andric old_sz *= 2; 8470b57cec5SDimitry Andric depth++; 8480b57cec5SDimitry Andric } 8490b57cec5SDimitry Andric if (nproc > old_sz) { // Not enough space, need to expand hierarchy 8500b57cec5SDimitry Andric while (nproc > old_sz) { 8510b57cec5SDimitry Andric old_sz *= 2; 8520b57cec5SDimitry Andric incs++; 8530b57cec5SDimitry Andric depth++; 8540b57cec5SDimitry Andric } 8550b57cec5SDimitry Andric maxLevels += incs; 8560b57cec5SDimitry Andric 8570b57cec5SDimitry Andric // Resize arrays 8580b57cec5SDimitry Andric kmp_uint32 *old_numPerLevel = numPerLevel; 8590b57cec5SDimitry Andric kmp_uint32 *old_skipPerLevel = skipPerLevel; 8600b57cec5SDimitry Andric numPerLevel = skipPerLevel = NULL; 8610b57cec5SDimitry Andric numPerLevel = 8620b57cec5SDimitry Andric (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 8630b57cec5SDimitry Andric skipPerLevel = &(numPerLevel[maxLevels]); 8640b57cec5SDimitry Andric 8650b57cec5SDimitry Andric // Copy old elements from old arrays 866*e8d8bef9SDimitry Andric for (kmp_uint32 i = 0; i < old_maxLevels; ++i) { 867*e8d8bef9SDimitry Andric // init numPerLevel[*] to 1 item per level 8680b57cec5SDimitry Andric numPerLevel[i] = old_numPerLevel[i]; 8690b57cec5SDimitry Andric skipPerLevel[i] = old_skipPerLevel[i]; 8700b57cec5SDimitry Andric } 8710b57cec5SDimitry Andric 8720b57cec5SDimitry Andric // Init new elements in arrays to 1 873*e8d8bef9SDimitry Andric for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) { 874*e8d8bef9SDimitry Andric // init numPerLevel[*] to 1 item per level 8750b57cec5SDimitry Andric numPerLevel[i] = 1; 8760b57cec5SDimitry Andric skipPerLevel[i] = 1; 8770b57cec5SDimitry Andric } 8780b57cec5SDimitry Andric 8790b57cec5SDimitry Andric // Free old arrays 8800b57cec5SDimitry Andric __kmp_free(old_numPerLevel); 8810b57cec5SDimitry Andric } 8820b57cec5SDimitry Andric 8830b57cec5SDimitry Andric // Fill in oversubscription levels of hierarchy 8840b57cec5SDimitry Andric for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) 8850b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 8860b57cec5SDimitry Andric 8870b57cec5SDimitry Andric base_num_threads = nproc; 8880b57cec5SDimitry Andric resizing = 0; // One writer 8890b57cec5SDimitry Andric } 8900b57cec5SDimitry Andric }; 8910b57cec5SDimitry Andric #endif // KMP_AFFINITY_H 892