10b57cec5SDimitry Andric /* 20b57cec5SDimitry Andric * kmp_affinity.h -- header for affinity management 30b57cec5SDimitry Andric */ 40b57cec5SDimitry Andric 50b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #ifndef KMP_AFFINITY_H 140b57cec5SDimitry Andric #define KMP_AFFINITY_H 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric #include "kmp.h" 170b57cec5SDimitry Andric #include "kmp_os.h" 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 200b57cec5SDimitry Andric #if KMP_USE_HWLOC 210b57cec5SDimitry Andric class KMPHwlocAffinity : public KMPAffinity { 220b57cec5SDimitry Andric public: 230b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 240b57cec5SDimitry Andric hwloc_cpuset_t mask; 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric public: 270b57cec5SDimitry Andric Mask() { 280b57cec5SDimitry Andric mask = hwloc_bitmap_alloc(); 290b57cec5SDimitry Andric this->zero(); 300b57cec5SDimitry Andric } 310b57cec5SDimitry Andric ~Mask() { hwloc_bitmap_free(mask); } 320b57cec5SDimitry Andric void set(int i) override { hwloc_bitmap_set(mask, i); } 330b57cec5SDimitry Andric bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } 340b57cec5SDimitry Andric void clear(int i) override { hwloc_bitmap_clr(mask, i); } 350b57cec5SDimitry Andric void zero() override { hwloc_bitmap_zero(mask); } 360b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 370b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 380b57cec5SDimitry Andric hwloc_bitmap_copy(mask, convert->mask); 390b57cec5SDimitry Andric } 400b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 410b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 420b57cec5SDimitry Andric hwloc_bitmap_and(mask, mask, convert->mask); 430b57cec5SDimitry Andric } 440b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 450b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 460b57cec5SDimitry Andric hwloc_bitmap_or(mask, mask, convert->mask); 470b57cec5SDimitry Andric } 480b57cec5SDimitry Andric void bitwise_not() override { hwloc_bitmap_not(mask, mask); } 490b57cec5SDimitry Andric int begin() const override { return hwloc_bitmap_first(mask); } 500b57cec5SDimitry Andric int end() const override { return -1; } 510b57cec5SDimitry Andric int next(int previous) const override { 520b57cec5SDimitry Andric return hwloc_bitmap_next(mask, previous); 530b57cec5SDimitry Andric } 540b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 550b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 560b57cec5SDimitry Andric "Illegal get affinity operation when not capable"); 570b57cec5SDimitry Andric int retval = 580b57cec5SDimitry Andric hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 590b57cec5SDimitry Andric if (retval >= 0) { 600b57cec5SDimitry Andric return 0; 610b57cec5SDimitry Andric } 620b57cec5SDimitry Andric int error = errno; 630b57cec5SDimitry Andric if (abort_on_error) { 640b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 650b57cec5SDimitry Andric } 660b57cec5SDimitry Andric return error; 670b57cec5SDimitry Andric } 680b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 690b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 700b57cec5SDimitry Andric "Illegal get affinity operation when not capable"); 710b57cec5SDimitry Andric int retval = 720b57cec5SDimitry Andric hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 730b57cec5SDimitry Andric if (retval >= 0) { 740b57cec5SDimitry Andric return 0; 750b57cec5SDimitry Andric } 760b57cec5SDimitry Andric int error = errno; 770b57cec5SDimitry Andric if (abort_on_error) { 780b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 790b57cec5SDimitry Andric } 800b57cec5SDimitry Andric return error; 810b57cec5SDimitry Andric } 820b57cec5SDimitry Andric int get_proc_group() const override { 830b57cec5SDimitry Andric int group = -1; 840b57cec5SDimitry Andric #if KMP_OS_WINDOWS 850b57cec5SDimitry Andric if (__kmp_num_proc_groups == 1) { 860b57cec5SDimitry Andric return 1; 870b57cec5SDimitry Andric } 880b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; i++) { 890b57cec5SDimitry Andric // On windows, the long type is always 32 bits 900b57cec5SDimitry Andric unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2); 910b57cec5SDimitry Andric unsigned long second_32_bits = 920b57cec5SDimitry Andric hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1); 930b57cec5SDimitry Andric if (first_32_bits == 0 && second_32_bits == 0) { 940b57cec5SDimitry Andric continue; 950b57cec5SDimitry Andric } 960b57cec5SDimitry Andric if (group >= 0) { 970b57cec5SDimitry Andric return -1; 980b57cec5SDimitry Andric } 990b57cec5SDimitry Andric group = i; 1000b57cec5SDimitry Andric } 1010b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 1020b57cec5SDimitry Andric return group; 1030b57cec5SDimitry Andric } 1040b57cec5SDimitry Andric }; 1050b57cec5SDimitry Andric void determine_capable(const char *var) override { 1060b57cec5SDimitry Andric const hwloc_topology_support *topology_support; 1070b57cec5SDimitry Andric if (__kmp_hwloc_topology == NULL) { 1080b57cec5SDimitry Andric if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) { 1090b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 1100b57cec5SDimitry Andric if (__kmp_affinity_verbose) 1110b57cec5SDimitry Andric KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); 1120b57cec5SDimitry Andric } 1130b57cec5SDimitry Andric if (hwloc_topology_load(__kmp_hwloc_topology) < 0) { 1140b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 1150b57cec5SDimitry Andric if (__kmp_affinity_verbose) 1160b57cec5SDimitry Andric KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); 1170b57cec5SDimitry Andric } 1180b57cec5SDimitry Andric } 1190b57cec5SDimitry Andric topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); 1200b57cec5SDimitry Andric // Is the system capable of setting/getting this thread's affinity? 1210b57cec5SDimitry Andric // Also, is topology discovery possible? (pu indicates ability to discover 1220b57cec5SDimitry Andric // processing units). And finally, were there no errors when calling any 1230b57cec5SDimitry Andric // hwloc_* API functions? 1240b57cec5SDimitry Andric if (topology_support && topology_support->cpubind->set_thisthread_cpubind && 1250b57cec5SDimitry Andric topology_support->cpubind->get_thisthread_cpubind && 1260b57cec5SDimitry Andric topology_support->discovery->pu && !__kmp_hwloc_error) { 1270b57cec5SDimitry Andric // enables affinity according to KMP_AFFINITY_CAPABLE() macro 1280b57cec5SDimitry Andric KMP_AFFINITY_ENABLE(TRUE); 1290b57cec5SDimitry Andric } else { 1300b57cec5SDimitry Andric // indicate that hwloc didn't work and disable affinity 1310b57cec5SDimitry Andric __kmp_hwloc_error = TRUE; 1320b57cec5SDimitry Andric KMP_AFFINITY_DISABLE(); 1330b57cec5SDimitry Andric } 1340b57cec5SDimitry Andric } 1350b57cec5SDimitry Andric void bind_thread(int which) override { 1360b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 1370b57cec5SDimitry Andric "Illegal set affinity operation when not capable"); 1380b57cec5SDimitry Andric KMPAffinity::Mask *mask; 1390b57cec5SDimitry Andric KMP_CPU_ALLOC_ON_STACK(mask); 1400b57cec5SDimitry Andric KMP_CPU_ZERO(mask); 1410b57cec5SDimitry Andric KMP_CPU_SET(which, mask); 1420b57cec5SDimitry Andric __kmp_set_system_affinity(mask, TRUE); 1430b57cec5SDimitry Andric KMP_CPU_FREE_FROM_STACK(mask); 1440b57cec5SDimitry Andric } 1450b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 1460b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 1470b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 1480b57cec5SDimitry Andric return new Mask[num]; 1490b57cec5SDimitry Andric } 1500b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 1510b57cec5SDimitry Andric Mask *hwloc_array = static_cast<Mask *>(array); 1520b57cec5SDimitry Andric delete[] hwloc_array; 1530b57cec5SDimitry Andric } 1540b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 1550b57cec5SDimitry Andric int index) override { 1560b57cec5SDimitry Andric Mask *hwloc_array = static_cast<Mask *>(array); 1570b57cec5SDimitry Andric return &(hwloc_array[index]); 1580b57cec5SDimitry Andric } 1590b57cec5SDimitry Andric api_type get_api_type() const override { return HWLOC; } 1600b57cec5SDimitry Andric }; 1610b57cec5SDimitry Andric #endif /* KMP_USE_HWLOC */ 1620b57cec5SDimitry Andric 163489b1cf2SDimitry Andric #if KMP_OS_LINUX || KMP_OS_FREEBSD 1640b57cec5SDimitry Andric #if KMP_OS_LINUX 1650b57cec5SDimitry Andric /* On some of the older OS's that we build on, these constants aren't present 1660b57cec5SDimitry Andric in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on 1670b57cec5SDimitry Andric all systems of the same arch where they are defined, and they cannot change. 1680b57cec5SDimitry Andric stone forever. */ 1690b57cec5SDimitry Andric #include <sys/syscall.h> 1700b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_ARM 1710b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 1720b57cec5SDimitry Andric #define __NR_sched_setaffinity 241 1730b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 241 1740b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 1750b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 1760b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 1770b57cec5SDimitry Andric #define __NR_sched_getaffinity 242 1780b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 242 1790b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 1800b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 1810b57cec5SDimitry Andric #elif KMP_ARCH_AARCH64 1820b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 1830b57cec5SDimitry Andric #define __NR_sched_setaffinity 122 1840b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 122 1850b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 1860b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 1870b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 1880b57cec5SDimitry Andric #define __NR_sched_getaffinity 123 1890b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 123 1900b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 1910b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 1920b57cec5SDimitry Andric #elif KMP_ARCH_X86_64 1930b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 1940b57cec5SDimitry Andric #define __NR_sched_setaffinity 203 1950b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 203 1960b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 1970b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 1980b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 1990b57cec5SDimitry Andric #define __NR_sched_getaffinity 204 2000b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 204 2010b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2020b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2030b57cec5SDimitry Andric #elif KMP_ARCH_PPC64 2040b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2050b57cec5SDimitry Andric #define __NR_sched_setaffinity 222 2060b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 222 2070b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2080b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2090b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2100b57cec5SDimitry Andric #define __NR_sched_getaffinity 223 2110b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 223 2120b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2130b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2140b57cec5SDimitry Andric #elif KMP_ARCH_MIPS 2150b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2160b57cec5SDimitry Andric #define __NR_sched_setaffinity 4239 2170b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 4239 2180b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2190b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2200b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2210b57cec5SDimitry Andric #define __NR_sched_getaffinity 4240 2220b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 4240 2230b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2240b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2250b57cec5SDimitry Andric #elif KMP_ARCH_MIPS64 2260b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity 2270b57cec5SDimitry Andric #define __NR_sched_setaffinity 5195 2280b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 5195 2290b57cec5SDimitry Andric #error Wrong code for setaffinity system call. 2300b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */ 2310b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity 2320b57cec5SDimitry Andric #define __NR_sched_getaffinity 5196 2330b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 5196 2340b57cec5SDimitry Andric #error Wrong code for getaffinity system call. 2350b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */ 2360b57cec5SDimitry Andric #error Unknown or unsupported architecture 2370b57cec5SDimitry Andric #endif /* KMP_ARCH_* */ 238489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD 239489b1cf2SDimitry Andric #include <pthread.h> 240489b1cf2SDimitry Andric #include <pthread_np.h> 241489b1cf2SDimitry Andric #endif 2420b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity { 2430b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 2440b57cec5SDimitry Andric typedef unsigned char mask_t; 2450b57cec5SDimitry Andric static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 2460b57cec5SDimitry Andric 2470b57cec5SDimitry Andric public: 2480b57cec5SDimitry Andric mask_t *mask; 2490b57cec5SDimitry Andric Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); } 2500b57cec5SDimitry Andric ~Mask() { 2510b57cec5SDimitry Andric if (mask) 2520b57cec5SDimitry Andric __kmp_free(mask); 2530b57cec5SDimitry Andric } 2540b57cec5SDimitry Andric void set(int i) override { 2550b57cec5SDimitry Andric mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); 2560b57cec5SDimitry Andric } 2570b57cec5SDimitry Andric bool is_set(int i) const override { 2580b57cec5SDimitry Andric return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); 2590b57cec5SDimitry Andric } 2600b57cec5SDimitry Andric void clear(int i) override { 2610b57cec5SDimitry Andric mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); 2620b57cec5SDimitry Andric } 2630b57cec5SDimitry Andric void zero() override { 2640b57cec5SDimitry Andric for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 2650b57cec5SDimitry Andric mask[i] = 0; 2660b57cec5SDimitry Andric } 2670b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 2680b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 2690b57cec5SDimitry Andric for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 2700b57cec5SDimitry Andric mask[i] = convert->mask[i]; 2710b57cec5SDimitry Andric } 2720b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 2730b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 2740b57cec5SDimitry Andric for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 2750b57cec5SDimitry Andric mask[i] &= convert->mask[i]; 2760b57cec5SDimitry Andric } 2770b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 2780b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 2790b57cec5SDimitry Andric for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 2800b57cec5SDimitry Andric mask[i] |= convert->mask[i]; 2810b57cec5SDimitry Andric } 2820b57cec5SDimitry Andric void bitwise_not() override { 2830b57cec5SDimitry Andric for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 2840b57cec5SDimitry Andric mask[i] = ~(mask[i]); 2850b57cec5SDimitry Andric } 2860b57cec5SDimitry Andric int begin() const override { 2870b57cec5SDimitry Andric int retval = 0; 2880b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 2890b57cec5SDimitry Andric ++retval; 2900b57cec5SDimitry Andric return retval; 2910b57cec5SDimitry Andric } 2920b57cec5SDimitry Andric int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; } 2930b57cec5SDimitry Andric int next(int previous) const override { 2940b57cec5SDimitry Andric int retval = previous + 1; 2950b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 2960b57cec5SDimitry Andric ++retval; 2970b57cec5SDimitry Andric return retval; 2980b57cec5SDimitry Andric } 2990b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 3000b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 3010b57cec5SDimitry Andric "Illegal get affinity operation when not capable"); 302489b1cf2SDimitry Andric #if KMP_OS_LINUX 3030b57cec5SDimitry Andric int retval = 3040b57cec5SDimitry Andric syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask); 305489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD 306*5ffd83dbSDimitry Andric int r = 307489b1cf2SDimitry Andric pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask)); 308*5ffd83dbSDimitry Andric int retval = (r == 0 ? 0 : -1); 309489b1cf2SDimitry Andric #endif 3100b57cec5SDimitry Andric if (retval >= 0) { 3110b57cec5SDimitry Andric return 0; 3120b57cec5SDimitry Andric } 3130b57cec5SDimitry Andric int error = errno; 3140b57cec5SDimitry Andric if (abort_on_error) { 3150b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 3160b57cec5SDimitry Andric } 3170b57cec5SDimitry Andric return error; 3180b57cec5SDimitry Andric } 3190b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 3200b57cec5SDimitry Andric KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 3210b57cec5SDimitry Andric "Illegal get affinity operation when not capable"); 322489b1cf2SDimitry Andric #if KMP_OS_LINUX 3230b57cec5SDimitry Andric int retval = 3240b57cec5SDimitry Andric syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask); 325489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD 326*5ffd83dbSDimitry Andric int r = 327489b1cf2SDimitry Andric pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask)); 328*5ffd83dbSDimitry Andric int retval = (r == 0 ? 0 : -1); 329489b1cf2SDimitry Andric #endif 3300b57cec5SDimitry Andric if (retval >= 0) { 3310b57cec5SDimitry Andric return 0; 3320b57cec5SDimitry Andric } 3330b57cec5SDimitry Andric int error = errno; 3340b57cec5SDimitry Andric if (abort_on_error) { 3350b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 3360b57cec5SDimitry Andric } 3370b57cec5SDimitry Andric return error; 3380b57cec5SDimitry Andric } 3390b57cec5SDimitry Andric }; 3400b57cec5SDimitry Andric void determine_capable(const char *env_var) override { 3410b57cec5SDimitry Andric __kmp_affinity_determine_capable(env_var); 3420b57cec5SDimitry Andric } 3430b57cec5SDimitry Andric void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 3440b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { 3450b57cec5SDimitry Andric KMPNativeAffinity::Mask *retval = new Mask(); 3460b57cec5SDimitry Andric return retval; 3470b57cec5SDimitry Andric } 3480b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { 3490b57cec5SDimitry Andric KMPNativeAffinity::Mask *native_mask = 3500b57cec5SDimitry Andric static_cast<KMPNativeAffinity::Mask *>(m); 3510b57cec5SDimitry Andric delete native_mask; 3520b57cec5SDimitry Andric } 3530b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 3540b57cec5SDimitry Andric return new Mask[num]; 3550b57cec5SDimitry Andric } 3560b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 3570b57cec5SDimitry Andric Mask *linux_array = static_cast<Mask *>(array); 3580b57cec5SDimitry Andric delete[] linux_array; 3590b57cec5SDimitry Andric } 3600b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 3610b57cec5SDimitry Andric int index) override { 3620b57cec5SDimitry Andric Mask *linux_array = static_cast<Mask *>(array); 3630b57cec5SDimitry Andric return &(linux_array[index]); 3640b57cec5SDimitry Andric } 3650b57cec5SDimitry Andric api_type get_api_type() const override { return NATIVE_OS; } 3660b57cec5SDimitry Andric }; 367489b1cf2SDimitry Andric #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */ 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric #if KMP_OS_WINDOWS 3700b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity { 3710b57cec5SDimitry Andric class Mask : public KMPAffinity::Mask { 3720b57cec5SDimitry Andric typedef ULONG_PTR mask_t; 3730b57cec5SDimitry Andric static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 3740b57cec5SDimitry Andric mask_t *mask; 3750b57cec5SDimitry Andric 3760b57cec5SDimitry Andric public: 3770b57cec5SDimitry Andric Mask() { 3780b57cec5SDimitry Andric mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups); 3790b57cec5SDimitry Andric } 3800b57cec5SDimitry Andric ~Mask() { 3810b57cec5SDimitry Andric if (mask) 3820b57cec5SDimitry Andric __kmp_free(mask); 3830b57cec5SDimitry Andric } 3840b57cec5SDimitry Andric void set(int i) override { 3850b57cec5SDimitry Andric mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); 3860b57cec5SDimitry Andric } 3870b57cec5SDimitry Andric bool is_set(int i) const override { 3880b57cec5SDimitry Andric return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); 3890b57cec5SDimitry Andric } 3900b57cec5SDimitry Andric void clear(int i) override { 3910b57cec5SDimitry Andric mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); 3920b57cec5SDimitry Andric } 3930b57cec5SDimitry Andric void zero() override { 3940b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 3950b57cec5SDimitry Andric mask[i] = 0; 3960b57cec5SDimitry Andric } 3970b57cec5SDimitry Andric void copy(const KMPAffinity::Mask *src) override { 3980b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(src); 3990b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4000b57cec5SDimitry Andric mask[i] = convert->mask[i]; 4010b57cec5SDimitry Andric } 4020b57cec5SDimitry Andric void bitwise_and(const KMPAffinity::Mask *rhs) override { 4030b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 4040b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4050b57cec5SDimitry Andric mask[i] &= convert->mask[i]; 4060b57cec5SDimitry Andric } 4070b57cec5SDimitry Andric void bitwise_or(const KMPAffinity::Mask *rhs) override { 4080b57cec5SDimitry Andric const Mask *convert = static_cast<const Mask *>(rhs); 4090b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4100b57cec5SDimitry Andric mask[i] |= convert->mask[i]; 4110b57cec5SDimitry Andric } 4120b57cec5SDimitry Andric void bitwise_not() override { 4130b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; ++i) 4140b57cec5SDimitry Andric mask[i] = ~(mask[i]); 4150b57cec5SDimitry Andric } 4160b57cec5SDimitry Andric int begin() const override { 4170b57cec5SDimitry Andric int retval = 0; 4180b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 4190b57cec5SDimitry Andric ++retval; 4200b57cec5SDimitry Andric return retval; 4210b57cec5SDimitry Andric } 4220b57cec5SDimitry Andric int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; } 4230b57cec5SDimitry Andric int next(int previous) const override { 4240b57cec5SDimitry Andric int retval = previous + 1; 4250b57cec5SDimitry Andric while (retval < end() && !is_set(retval)) 4260b57cec5SDimitry Andric ++retval; 4270b57cec5SDimitry Andric return retval; 4280b57cec5SDimitry Andric } 4290b57cec5SDimitry Andric int set_system_affinity(bool abort_on_error) const override { 4300b57cec5SDimitry Andric if (__kmp_num_proc_groups > 1) { 4310b57cec5SDimitry Andric // Check for a valid mask. 4320b57cec5SDimitry Andric GROUP_AFFINITY ga; 4330b57cec5SDimitry Andric int group = get_proc_group(); 4340b57cec5SDimitry Andric if (group < 0) { 4350b57cec5SDimitry Andric if (abort_on_error) { 4360b57cec5SDimitry Andric KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); 4370b57cec5SDimitry Andric } 4380b57cec5SDimitry Andric return -1; 4390b57cec5SDimitry Andric } 4400b57cec5SDimitry Andric // Transform the bit vector into a GROUP_AFFINITY struct 4410b57cec5SDimitry Andric // and make the system call to set affinity. 4420b57cec5SDimitry Andric ga.Group = group; 4430b57cec5SDimitry Andric ga.Mask = mask[group]; 4440b57cec5SDimitry Andric ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; 4450b57cec5SDimitry Andric 4460b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); 4470b57cec5SDimitry Andric if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { 4480b57cec5SDimitry Andric DWORD error = GetLastError(); 4490b57cec5SDimitry Andric if (abort_on_error) { 4500b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 4510b57cec5SDimitry Andric __kmp_msg_null); 4520b57cec5SDimitry Andric } 4530b57cec5SDimitry Andric return error; 4540b57cec5SDimitry Andric } 4550b57cec5SDimitry Andric } else { 4560b57cec5SDimitry Andric if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) { 4570b57cec5SDimitry Andric DWORD error = GetLastError(); 4580b57cec5SDimitry Andric if (abort_on_error) { 4590b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 4600b57cec5SDimitry Andric __kmp_msg_null); 4610b57cec5SDimitry Andric } 4620b57cec5SDimitry Andric return error; 4630b57cec5SDimitry Andric } 4640b57cec5SDimitry Andric } 4650b57cec5SDimitry Andric return 0; 4660b57cec5SDimitry Andric } 4670b57cec5SDimitry Andric int get_system_affinity(bool abort_on_error) override { 4680b57cec5SDimitry Andric if (__kmp_num_proc_groups > 1) { 4690b57cec5SDimitry Andric this->zero(); 4700b57cec5SDimitry Andric GROUP_AFFINITY ga; 4710b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); 4720b57cec5SDimitry Andric if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { 4730b57cec5SDimitry Andric DWORD error = GetLastError(); 4740b57cec5SDimitry Andric if (abort_on_error) { 4750b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), 4760b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 4770b57cec5SDimitry Andric } 4780b57cec5SDimitry Andric return error; 4790b57cec5SDimitry Andric } 4800b57cec5SDimitry Andric if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || 4810b57cec5SDimitry Andric (ga.Mask == 0)) { 4820b57cec5SDimitry Andric return -1; 4830b57cec5SDimitry Andric } 4840b57cec5SDimitry Andric mask[ga.Group] = ga.Mask; 4850b57cec5SDimitry Andric } else { 4860b57cec5SDimitry Andric mask_t newMask, sysMask, retval; 4870b57cec5SDimitry Andric if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { 4880b57cec5SDimitry Andric DWORD error = GetLastError(); 4890b57cec5SDimitry Andric if (abort_on_error) { 4900b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"), 4910b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 4920b57cec5SDimitry Andric } 4930b57cec5SDimitry Andric return error; 4940b57cec5SDimitry Andric } 4950b57cec5SDimitry Andric retval = SetThreadAffinityMask(GetCurrentThread(), newMask); 4960b57cec5SDimitry Andric if (!retval) { 4970b57cec5SDimitry Andric DWORD error = GetLastError(); 4980b57cec5SDimitry Andric if (abort_on_error) { 4990b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 5000b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 5010b57cec5SDimitry Andric } 5020b57cec5SDimitry Andric return error; 5030b57cec5SDimitry Andric } 5040b57cec5SDimitry Andric newMask = SetThreadAffinityMask(GetCurrentThread(), retval); 5050b57cec5SDimitry Andric if (!newMask) { 5060b57cec5SDimitry Andric DWORD error = GetLastError(); 5070b57cec5SDimitry Andric if (abort_on_error) { 5080b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 5090b57cec5SDimitry Andric KMP_ERR(error), __kmp_msg_null); 5100b57cec5SDimitry Andric } 5110b57cec5SDimitry Andric } 5120b57cec5SDimitry Andric *mask = retval; 5130b57cec5SDimitry Andric } 5140b57cec5SDimitry Andric return 0; 5150b57cec5SDimitry Andric } 5160b57cec5SDimitry Andric int get_proc_group() const override { 5170b57cec5SDimitry Andric int group = -1; 5180b57cec5SDimitry Andric if (__kmp_num_proc_groups == 1) { 5190b57cec5SDimitry Andric return 1; 5200b57cec5SDimitry Andric } 5210b57cec5SDimitry Andric for (int i = 0; i < __kmp_num_proc_groups; i++) { 5220b57cec5SDimitry Andric if (mask[i] == 0) 5230b57cec5SDimitry Andric continue; 5240b57cec5SDimitry Andric if (group >= 0) 5250b57cec5SDimitry Andric return -1; 5260b57cec5SDimitry Andric group = i; 5270b57cec5SDimitry Andric } 5280b57cec5SDimitry Andric return group; 5290b57cec5SDimitry Andric } 5300b57cec5SDimitry Andric }; 5310b57cec5SDimitry Andric void determine_capable(const char *env_var) override { 5320b57cec5SDimitry Andric __kmp_affinity_determine_capable(env_var); 5330b57cec5SDimitry Andric } 5340b57cec5SDimitry Andric void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 5350b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 5360b57cec5SDimitry Andric void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 5370b57cec5SDimitry Andric KMPAffinity::Mask *allocate_mask_array(int num) override { 5380b57cec5SDimitry Andric return new Mask[num]; 5390b57cec5SDimitry Andric } 5400b57cec5SDimitry Andric void deallocate_mask_array(KMPAffinity::Mask *array) override { 5410b57cec5SDimitry Andric Mask *windows_array = static_cast<Mask *>(array); 5420b57cec5SDimitry Andric delete[] windows_array; 5430b57cec5SDimitry Andric } 5440b57cec5SDimitry Andric KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 5450b57cec5SDimitry Andric int index) override { 5460b57cec5SDimitry Andric Mask *windows_array = static_cast<Mask *>(array); 5470b57cec5SDimitry Andric return &(windows_array[index]); 5480b57cec5SDimitry Andric } 5490b57cec5SDimitry Andric api_type get_api_type() const override { return NATIVE_OS; } 5500b57cec5SDimitry Andric }; 5510b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 5520b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 5530b57cec5SDimitry Andric 5540b57cec5SDimitry Andric class Address { 5550b57cec5SDimitry Andric public: 5560b57cec5SDimitry Andric static const unsigned maxDepth = 32; 5570b57cec5SDimitry Andric unsigned labels[maxDepth]; 5580b57cec5SDimitry Andric unsigned childNums[maxDepth]; 5590b57cec5SDimitry Andric unsigned depth; 5600b57cec5SDimitry Andric unsigned leader; 5610b57cec5SDimitry Andric Address(unsigned _depth) : depth(_depth), leader(FALSE) {} 5620b57cec5SDimitry Andric Address &operator=(const Address &b) { 5630b57cec5SDimitry Andric depth = b.depth; 5640b57cec5SDimitry Andric for (unsigned i = 0; i < depth; i++) { 5650b57cec5SDimitry Andric labels[i] = b.labels[i]; 5660b57cec5SDimitry Andric childNums[i] = b.childNums[i]; 5670b57cec5SDimitry Andric } 5680b57cec5SDimitry Andric leader = FALSE; 5690b57cec5SDimitry Andric return *this; 5700b57cec5SDimitry Andric } 5710b57cec5SDimitry Andric bool operator==(const Address &b) const { 5720b57cec5SDimitry Andric if (depth != b.depth) 5730b57cec5SDimitry Andric return false; 5740b57cec5SDimitry Andric for (unsigned i = 0; i < depth; i++) 5750b57cec5SDimitry Andric if (labels[i] != b.labels[i]) 5760b57cec5SDimitry Andric return false; 5770b57cec5SDimitry Andric return true; 5780b57cec5SDimitry Andric } 5790b57cec5SDimitry Andric bool isClose(const Address &b, int level) const { 5800b57cec5SDimitry Andric if (depth != b.depth) 5810b57cec5SDimitry Andric return false; 5820b57cec5SDimitry Andric if ((unsigned)level >= depth) 5830b57cec5SDimitry Andric return true; 5840b57cec5SDimitry Andric for (unsigned i = 0; i < (depth - level); i++) 5850b57cec5SDimitry Andric if (labels[i] != b.labels[i]) 5860b57cec5SDimitry Andric return false; 5870b57cec5SDimitry Andric return true; 5880b57cec5SDimitry Andric } 5890b57cec5SDimitry Andric bool operator!=(const Address &b) const { return !operator==(b); } 5900b57cec5SDimitry Andric void print() const { 5910b57cec5SDimitry Andric unsigned i; 5920b57cec5SDimitry Andric printf("Depth: %u --- ", depth); 5930b57cec5SDimitry Andric for (i = 0; i < depth; i++) { 5940b57cec5SDimitry Andric printf("%u ", labels[i]); 5950b57cec5SDimitry Andric } 5960b57cec5SDimitry Andric } 5970b57cec5SDimitry Andric }; 5980b57cec5SDimitry Andric 5990b57cec5SDimitry Andric class AddrUnsPair { 6000b57cec5SDimitry Andric public: 6010b57cec5SDimitry Andric Address first; 6020b57cec5SDimitry Andric unsigned second; 6030b57cec5SDimitry Andric AddrUnsPair(Address _first, unsigned _second) 6040b57cec5SDimitry Andric : first(_first), second(_second) {} 6050b57cec5SDimitry Andric AddrUnsPair &operator=(const AddrUnsPair &b) { 6060b57cec5SDimitry Andric first = b.first; 6070b57cec5SDimitry Andric second = b.second; 6080b57cec5SDimitry Andric return *this; 6090b57cec5SDimitry Andric } 6100b57cec5SDimitry Andric void print() const { 6110b57cec5SDimitry Andric printf("first = "); 6120b57cec5SDimitry Andric first.print(); 6130b57cec5SDimitry Andric printf(" --- second = %u", second); 6140b57cec5SDimitry Andric } 6150b57cec5SDimitry Andric bool operator==(const AddrUnsPair &b) const { 6160b57cec5SDimitry Andric if (first != b.first) 6170b57cec5SDimitry Andric return false; 6180b57cec5SDimitry Andric if (second != b.second) 6190b57cec5SDimitry Andric return false; 6200b57cec5SDimitry Andric return true; 6210b57cec5SDimitry Andric } 6220b57cec5SDimitry Andric bool operator!=(const AddrUnsPair &b) const { return !operator==(b); } 6230b57cec5SDimitry Andric }; 6240b57cec5SDimitry Andric 6250b57cec5SDimitry Andric static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) { 6260b57cec5SDimitry Andric const Address *aa = &(((const AddrUnsPair *)a)->first); 6270b57cec5SDimitry Andric const Address *bb = &(((const AddrUnsPair *)b)->first); 6280b57cec5SDimitry Andric unsigned depth = aa->depth; 6290b57cec5SDimitry Andric unsigned i; 6300b57cec5SDimitry Andric KMP_DEBUG_ASSERT(depth == bb->depth); 6310b57cec5SDimitry Andric for (i = 0; i < depth; i++) { 6320b57cec5SDimitry Andric if (aa->labels[i] < bb->labels[i]) 6330b57cec5SDimitry Andric return -1; 6340b57cec5SDimitry Andric if (aa->labels[i] > bb->labels[i]) 6350b57cec5SDimitry Andric return 1; 6360b57cec5SDimitry Andric } 6370b57cec5SDimitry Andric return 0; 6380b57cec5SDimitry Andric } 6390b57cec5SDimitry Andric 6400b57cec5SDimitry Andric /* A structure for holding machine-specific hierarchy info to be computed once 6410b57cec5SDimitry Andric at init. This structure represents a mapping of threads to the actual machine 6420b57cec5SDimitry Andric hierarchy, or to our best guess at what the hierarchy might be, for the 6430b57cec5SDimitry Andric purpose of performing an efficient barrier. In the worst case, when there is 6440b57cec5SDimitry Andric no machine hierarchy information, it produces a tree suitable for a barrier, 6450b57cec5SDimitry Andric similar to the tree used in the hyper barrier. */ 6460b57cec5SDimitry Andric class hierarchy_info { 6470b57cec5SDimitry Andric public: 6480b57cec5SDimitry Andric /* Good default values for number of leaves and branching factor, given no 6490b57cec5SDimitry Andric affinity information. Behaves a bit like hyper barrier. */ 6500b57cec5SDimitry Andric static const kmp_uint32 maxLeaves = 4; 6510b57cec5SDimitry Andric static const kmp_uint32 minBranch = 4; 6520b57cec5SDimitry Andric /** Number of levels in the hierarchy. Typical levels are threads/core, 6530b57cec5SDimitry Andric cores/package or socket, packages/node, nodes/machine, etc. We don't want 6540b57cec5SDimitry Andric to get specific with nomenclature. When the machine is oversubscribed we 6550b57cec5SDimitry Andric add levels to duplicate the hierarchy, doubling the thread capacity of the 6560b57cec5SDimitry Andric hierarchy each time we add a level. */ 6570b57cec5SDimitry Andric kmp_uint32 maxLevels; 6580b57cec5SDimitry Andric 6590b57cec5SDimitry Andric /** This is specifically the depth of the machine configuration hierarchy, in 6600b57cec5SDimitry Andric terms of the number of levels along the longest path from root to any 6610b57cec5SDimitry Andric leaf. It corresponds to the number of entries in numPerLevel if we exclude 6620b57cec5SDimitry Andric all but one trailing 1. */ 6630b57cec5SDimitry Andric kmp_uint32 depth; 6640b57cec5SDimitry Andric kmp_uint32 base_num_threads; 6650b57cec5SDimitry Andric enum init_status { initialized = 0, not_initialized = 1, initializing = 2 }; 6660b57cec5SDimitry Andric volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 6670b57cec5SDimitry Andric // 2=initialization in progress 6680b57cec5SDimitry Andric volatile kmp_int8 resizing; // 0=not resizing, 1=resizing 6690b57cec5SDimitry Andric 6700b57cec5SDimitry Andric /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children 6710b57cec5SDimitry Andric the parent of a node at level i has. For example, if we have a machine 6720b57cec5SDimitry Andric with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel = 6730b57cec5SDimitry Andric {2, 4, 4, 1, 1}. All empty levels are set to 1. */ 6740b57cec5SDimitry Andric kmp_uint32 *numPerLevel; 6750b57cec5SDimitry Andric kmp_uint32 *skipPerLevel; 6760b57cec5SDimitry Andric 6770b57cec5SDimitry Andric void deriveLevels(AddrUnsPair *adr2os, int num_addrs) { 6780b57cec5SDimitry Andric int hier_depth = adr2os[0].first.depth; 6790b57cec5SDimitry Andric int level = 0; 6800b57cec5SDimitry Andric for (int i = hier_depth - 1; i >= 0; --i) { 6810b57cec5SDimitry Andric int max = -1; 6820b57cec5SDimitry Andric for (int j = 0; j < num_addrs; ++j) { 6830b57cec5SDimitry Andric int next = adr2os[j].first.childNums[i]; 6840b57cec5SDimitry Andric if (next > max) 6850b57cec5SDimitry Andric max = next; 6860b57cec5SDimitry Andric } 6870b57cec5SDimitry Andric numPerLevel[level] = max + 1; 6880b57cec5SDimitry Andric ++level; 6890b57cec5SDimitry Andric } 6900b57cec5SDimitry Andric } 6910b57cec5SDimitry Andric 6920b57cec5SDimitry Andric hierarchy_info() 6930b57cec5SDimitry Andric : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} 6940b57cec5SDimitry Andric 6950b57cec5SDimitry Andric void fini() { 6960b57cec5SDimitry Andric if (!uninitialized && numPerLevel) { 6970b57cec5SDimitry Andric __kmp_free(numPerLevel); 6980b57cec5SDimitry Andric numPerLevel = NULL; 6990b57cec5SDimitry Andric uninitialized = not_initialized; 7000b57cec5SDimitry Andric } 7010b57cec5SDimitry Andric } 7020b57cec5SDimitry Andric 7030b57cec5SDimitry Andric void init(AddrUnsPair *adr2os, int num_addrs) { 7040b57cec5SDimitry Andric kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8( 7050b57cec5SDimitry Andric &uninitialized, not_initialized, initializing); 7060b57cec5SDimitry Andric if (bool_result == 0) { // Wait for initialization 7070b57cec5SDimitry Andric while (TCR_1(uninitialized) != initialized) 7080b57cec5SDimitry Andric KMP_CPU_PAUSE(); 7090b57cec5SDimitry Andric return; 7100b57cec5SDimitry Andric } 7110b57cec5SDimitry Andric KMP_DEBUG_ASSERT(bool_result == 1); 7120b57cec5SDimitry Andric 7130b57cec5SDimitry Andric /* Added explicit initialization of the data fields here to prevent usage of 7140b57cec5SDimitry Andric dirty value observed when static library is re-initialized multiple times 7150b57cec5SDimitry Andric (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses 7160b57cec5SDimitry Andric OpenMP). */ 7170b57cec5SDimitry Andric depth = 1; 7180b57cec5SDimitry Andric resizing = 0; 7190b57cec5SDimitry Andric maxLevels = 7; 7200b57cec5SDimitry Andric numPerLevel = 7210b57cec5SDimitry Andric (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 7220b57cec5SDimitry Andric skipPerLevel = &(numPerLevel[maxLevels]); 7230b57cec5SDimitry Andric for (kmp_uint32 i = 0; i < maxLevels; 7240b57cec5SDimitry Andric ++i) { // init numPerLevel[*] to 1 item per level 7250b57cec5SDimitry Andric numPerLevel[i] = 1; 7260b57cec5SDimitry Andric skipPerLevel[i] = 1; 7270b57cec5SDimitry Andric } 7280b57cec5SDimitry Andric 7290b57cec5SDimitry Andric // Sort table by physical ID 7300b57cec5SDimitry Andric if (adr2os) { 7310b57cec5SDimitry Andric qsort(adr2os, num_addrs, sizeof(*adr2os), 7320b57cec5SDimitry Andric __kmp_affinity_cmp_Address_labels); 7330b57cec5SDimitry Andric deriveLevels(adr2os, num_addrs); 7340b57cec5SDimitry Andric } else { 7350b57cec5SDimitry Andric numPerLevel[0] = maxLeaves; 7360b57cec5SDimitry Andric numPerLevel[1] = num_addrs / maxLeaves; 7370b57cec5SDimitry Andric if (num_addrs % maxLeaves) 7380b57cec5SDimitry Andric numPerLevel[1]++; 7390b57cec5SDimitry Andric } 7400b57cec5SDimitry Andric 7410b57cec5SDimitry Andric base_num_threads = num_addrs; 7420b57cec5SDimitry Andric for (int i = maxLevels - 1; i >= 0; 7430b57cec5SDimitry Andric --i) // count non-empty levels to get depth 7440b57cec5SDimitry Andric if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' 7450b57cec5SDimitry Andric depth++; 7460b57cec5SDimitry Andric 7470b57cec5SDimitry Andric kmp_uint32 branch = minBranch; 7480b57cec5SDimitry Andric if (numPerLevel[0] == 1) 7490b57cec5SDimitry Andric branch = num_addrs / maxLeaves; 7500b57cec5SDimitry Andric if (branch < minBranch) 7510b57cec5SDimitry Andric branch = minBranch; 7520b57cec5SDimitry Andric for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width 7530b57cec5SDimitry Andric while (numPerLevel[d] > branch || 7540b57cec5SDimitry Andric (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0! 7550b57cec5SDimitry Andric if (numPerLevel[d] & 1) 7560b57cec5SDimitry Andric numPerLevel[d]++; 7570b57cec5SDimitry Andric numPerLevel[d] = numPerLevel[d] >> 1; 7580b57cec5SDimitry Andric if (numPerLevel[d + 1] == 1) 7590b57cec5SDimitry Andric depth++; 7600b57cec5SDimitry Andric numPerLevel[d + 1] = numPerLevel[d + 1] << 1; 7610b57cec5SDimitry Andric } 7620b57cec5SDimitry Andric if (numPerLevel[0] == 1) { 7630b57cec5SDimitry Andric branch = branch >> 1; 7640b57cec5SDimitry Andric if (branch < 4) 7650b57cec5SDimitry Andric branch = minBranch; 7660b57cec5SDimitry Andric } 7670b57cec5SDimitry Andric } 7680b57cec5SDimitry Andric 7690b57cec5SDimitry Andric for (kmp_uint32 i = 1; i < depth; ++i) 7700b57cec5SDimitry Andric skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1]; 7710b57cec5SDimitry Andric // Fill in hierarchy in the case of oversubscription 7720b57cec5SDimitry Andric for (kmp_uint32 i = depth; i < maxLevels; ++i) 7730b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 7740b57cec5SDimitry Andric 7750b57cec5SDimitry Andric uninitialized = initialized; // One writer 7760b57cec5SDimitry Andric } 7770b57cec5SDimitry Andric 7780b57cec5SDimitry Andric // Resize the hierarchy if nproc changes to something larger than before 7790b57cec5SDimitry Andric void resize(kmp_uint32 nproc) { 7800b57cec5SDimitry Andric kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 7810b57cec5SDimitry Andric while (bool_result == 0) { // someone else is trying to resize 7820b57cec5SDimitry Andric KMP_CPU_PAUSE(); 7830b57cec5SDimitry Andric if (nproc <= base_num_threads) // happy with other thread's resize 7840b57cec5SDimitry Andric return; 7850b57cec5SDimitry Andric else // try to resize 7860b57cec5SDimitry Andric bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 7870b57cec5SDimitry Andric } 7880b57cec5SDimitry Andric KMP_DEBUG_ASSERT(bool_result != 0); 7890b57cec5SDimitry Andric if (nproc <= base_num_threads) 7900b57cec5SDimitry Andric return; // happy with other thread's resize 7910b57cec5SDimitry Andric 7920b57cec5SDimitry Andric // Calculate new maxLevels 7930b57cec5SDimitry Andric kmp_uint32 old_sz = skipPerLevel[depth - 1]; 7940b57cec5SDimitry Andric kmp_uint32 incs = 0, old_maxLevels = maxLevels; 7950b57cec5SDimitry Andric // First see if old maxLevels is enough to contain new size 7960b57cec5SDimitry Andric for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) { 7970b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 7980b57cec5SDimitry Andric numPerLevel[i - 1] *= 2; 7990b57cec5SDimitry Andric old_sz *= 2; 8000b57cec5SDimitry Andric depth++; 8010b57cec5SDimitry Andric } 8020b57cec5SDimitry Andric if (nproc > old_sz) { // Not enough space, need to expand hierarchy 8030b57cec5SDimitry Andric while (nproc > old_sz) { 8040b57cec5SDimitry Andric old_sz *= 2; 8050b57cec5SDimitry Andric incs++; 8060b57cec5SDimitry Andric depth++; 8070b57cec5SDimitry Andric } 8080b57cec5SDimitry Andric maxLevels += incs; 8090b57cec5SDimitry Andric 8100b57cec5SDimitry Andric // Resize arrays 8110b57cec5SDimitry Andric kmp_uint32 *old_numPerLevel = numPerLevel; 8120b57cec5SDimitry Andric kmp_uint32 *old_skipPerLevel = skipPerLevel; 8130b57cec5SDimitry Andric numPerLevel = skipPerLevel = NULL; 8140b57cec5SDimitry Andric numPerLevel = 8150b57cec5SDimitry Andric (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 8160b57cec5SDimitry Andric skipPerLevel = &(numPerLevel[maxLevels]); 8170b57cec5SDimitry Andric 8180b57cec5SDimitry Andric // Copy old elements from old arrays 8190b57cec5SDimitry Andric for (kmp_uint32 i = 0; i < old_maxLevels; 8200b57cec5SDimitry Andric ++i) { // init numPerLevel[*] to 1 item per level 8210b57cec5SDimitry Andric numPerLevel[i] = old_numPerLevel[i]; 8220b57cec5SDimitry Andric skipPerLevel[i] = old_skipPerLevel[i]; 8230b57cec5SDimitry Andric } 8240b57cec5SDimitry Andric 8250b57cec5SDimitry Andric // Init new elements in arrays to 1 8260b57cec5SDimitry Andric for (kmp_uint32 i = old_maxLevels; i < maxLevels; 8270b57cec5SDimitry Andric ++i) { // init numPerLevel[*] to 1 item per level 8280b57cec5SDimitry Andric numPerLevel[i] = 1; 8290b57cec5SDimitry Andric skipPerLevel[i] = 1; 8300b57cec5SDimitry Andric } 8310b57cec5SDimitry Andric 8320b57cec5SDimitry Andric // Free old arrays 8330b57cec5SDimitry Andric __kmp_free(old_numPerLevel); 8340b57cec5SDimitry Andric } 8350b57cec5SDimitry Andric 8360b57cec5SDimitry Andric // Fill in oversubscription levels of hierarchy 8370b57cec5SDimitry Andric for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) 8380b57cec5SDimitry Andric skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 8390b57cec5SDimitry Andric 8400b57cec5SDimitry Andric base_num_threads = nproc; 8410b57cec5SDimitry Andric resizing = 0; // One writer 8420b57cec5SDimitry Andric } 8430b57cec5SDimitry Andric }; 8440b57cec5SDimitry Andric #endif // KMP_AFFINITY_H 845