xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_affinity.h (revision 5ffd83dbcc34f10e07f6d3e968ae6365869615f4)
10b57cec5SDimitry Andric /*
20b57cec5SDimitry Andric  * kmp_affinity.h -- header for affinity management
30b57cec5SDimitry Andric  */
40b57cec5SDimitry Andric 
50b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #ifndef KMP_AFFINITY_H
140b57cec5SDimitry Andric #define KMP_AFFINITY_H
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include "kmp.h"
170b57cec5SDimitry Andric #include "kmp_os.h"
180b57cec5SDimitry Andric 
190b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
200b57cec5SDimitry Andric #if KMP_USE_HWLOC
210b57cec5SDimitry Andric class KMPHwlocAffinity : public KMPAffinity {
220b57cec5SDimitry Andric public:
230b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
240b57cec5SDimitry Andric     hwloc_cpuset_t mask;
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric   public:
270b57cec5SDimitry Andric     Mask() {
280b57cec5SDimitry Andric       mask = hwloc_bitmap_alloc();
290b57cec5SDimitry Andric       this->zero();
300b57cec5SDimitry Andric     }
310b57cec5SDimitry Andric     ~Mask() { hwloc_bitmap_free(mask); }
320b57cec5SDimitry Andric     void set(int i) override { hwloc_bitmap_set(mask, i); }
330b57cec5SDimitry Andric     bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
340b57cec5SDimitry Andric     void clear(int i) override { hwloc_bitmap_clr(mask, i); }
350b57cec5SDimitry Andric     void zero() override { hwloc_bitmap_zero(mask); }
360b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
370b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
380b57cec5SDimitry Andric       hwloc_bitmap_copy(mask, convert->mask);
390b57cec5SDimitry Andric     }
400b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
410b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
420b57cec5SDimitry Andric       hwloc_bitmap_and(mask, mask, convert->mask);
430b57cec5SDimitry Andric     }
440b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
450b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
460b57cec5SDimitry Andric       hwloc_bitmap_or(mask, mask, convert->mask);
470b57cec5SDimitry Andric     }
480b57cec5SDimitry Andric     void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
490b57cec5SDimitry Andric     int begin() const override { return hwloc_bitmap_first(mask); }
500b57cec5SDimitry Andric     int end() const override { return -1; }
510b57cec5SDimitry Andric     int next(int previous) const override {
520b57cec5SDimitry Andric       return hwloc_bitmap_next(mask, previous);
530b57cec5SDimitry Andric     }
540b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
550b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
560b57cec5SDimitry Andric                   "Illegal get affinity operation when not capable");
570b57cec5SDimitry Andric       int retval =
580b57cec5SDimitry Andric           hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
590b57cec5SDimitry Andric       if (retval >= 0) {
600b57cec5SDimitry Andric         return 0;
610b57cec5SDimitry Andric       }
620b57cec5SDimitry Andric       int error = errno;
630b57cec5SDimitry Andric       if (abort_on_error) {
640b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
650b57cec5SDimitry Andric       }
660b57cec5SDimitry Andric       return error;
670b57cec5SDimitry Andric     }
680b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
690b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
700b57cec5SDimitry Andric                   "Illegal get affinity operation when not capable");
710b57cec5SDimitry Andric       int retval =
720b57cec5SDimitry Andric           hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
730b57cec5SDimitry Andric       if (retval >= 0) {
740b57cec5SDimitry Andric         return 0;
750b57cec5SDimitry Andric       }
760b57cec5SDimitry Andric       int error = errno;
770b57cec5SDimitry Andric       if (abort_on_error) {
780b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
790b57cec5SDimitry Andric       }
800b57cec5SDimitry Andric       return error;
810b57cec5SDimitry Andric     }
820b57cec5SDimitry Andric     int get_proc_group() const override {
830b57cec5SDimitry Andric       int group = -1;
840b57cec5SDimitry Andric #if KMP_OS_WINDOWS
850b57cec5SDimitry Andric       if (__kmp_num_proc_groups == 1) {
860b57cec5SDimitry Andric         return 1;
870b57cec5SDimitry Andric       }
880b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; i++) {
890b57cec5SDimitry Andric         // On windows, the long type is always 32 bits
900b57cec5SDimitry Andric         unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
910b57cec5SDimitry Andric         unsigned long second_32_bits =
920b57cec5SDimitry Andric             hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
930b57cec5SDimitry Andric         if (first_32_bits == 0 && second_32_bits == 0) {
940b57cec5SDimitry Andric           continue;
950b57cec5SDimitry Andric         }
960b57cec5SDimitry Andric         if (group >= 0) {
970b57cec5SDimitry Andric           return -1;
980b57cec5SDimitry Andric         }
990b57cec5SDimitry Andric         group = i;
1000b57cec5SDimitry Andric       }
1010b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
1020b57cec5SDimitry Andric       return group;
1030b57cec5SDimitry Andric     }
1040b57cec5SDimitry Andric   };
1050b57cec5SDimitry Andric   void determine_capable(const char *var) override {
1060b57cec5SDimitry Andric     const hwloc_topology_support *topology_support;
1070b57cec5SDimitry Andric     if (__kmp_hwloc_topology == NULL) {
1080b57cec5SDimitry Andric       if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
1090b57cec5SDimitry Andric         __kmp_hwloc_error = TRUE;
1100b57cec5SDimitry Andric         if (__kmp_affinity_verbose)
1110b57cec5SDimitry Andric           KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
1120b57cec5SDimitry Andric       }
1130b57cec5SDimitry Andric       if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
1140b57cec5SDimitry Andric         __kmp_hwloc_error = TRUE;
1150b57cec5SDimitry Andric         if (__kmp_affinity_verbose)
1160b57cec5SDimitry Andric           KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
1170b57cec5SDimitry Andric       }
1180b57cec5SDimitry Andric     }
1190b57cec5SDimitry Andric     topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
1200b57cec5SDimitry Andric     // Is the system capable of setting/getting this thread's affinity?
1210b57cec5SDimitry Andric     // Also, is topology discovery possible? (pu indicates ability to discover
1220b57cec5SDimitry Andric     // processing units). And finally, were there no errors when calling any
1230b57cec5SDimitry Andric     // hwloc_* API functions?
1240b57cec5SDimitry Andric     if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
1250b57cec5SDimitry Andric         topology_support->cpubind->get_thisthread_cpubind &&
1260b57cec5SDimitry Andric         topology_support->discovery->pu && !__kmp_hwloc_error) {
1270b57cec5SDimitry Andric       // enables affinity according to KMP_AFFINITY_CAPABLE() macro
1280b57cec5SDimitry Andric       KMP_AFFINITY_ENABLE(TRUE);
1290b57cec5SDimitry Andric     } else {
1300b57cec5SDimitry Andric       // indicate that hwloc didn't work and disable affinity
1310b57cec5SDimitry Andric       __kmp_hwloc_error = TRUE;
1320b57cec5SDimitry Andric       KMP_AFFINITY_DISABLE();
1330b57cec5SDimitry Andric     }
1340b57cec5SDimitry Andric   }
1350b57cec5SDimitry Andric   void bind_thread(int which) override {
1360b57cec5SDimitry Andric     KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
1370b57cec5SDimitry Andric                 "Illegal set affinity operation when not capable");
1380b57cec5SDimitry Andric     KMPAffinity::Mask *mask;
1390b57cec5SDimitry Andric     KMP_CPU_ALLOC_ON_STACK(mask);
1400b57cec5SDimitry Andric     KMP_CPU_ZERO(mask);
1410b57cec5SDimitry Andric     KMP_CPU_SET(which, mask);
1420b57cec5SDimitry Andric     __kmp_set_system_affinity(mask, TRUE);
1430b57cec5SDimitry Andric     KMP_CPU_FREE_FROM_STACK(mask);
1440b57cec5SDimitry Andric   }
1450b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
1460b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
1470b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
1480b57cec5SDimitry Andric     return new Mask[num];
1490b57cec5SDimitry Andric   }
1500b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
1510b57cec5SDimitry Andric     Mask *hwloc_array = static_cast<Mask *>(array);
1520b57cec5SDimitry Andric     delete[] hwloc_array;
1530b57cec5SDimitry Andric   }
1540b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
1550b57cec5SDimitry Andric                                       int index) override {
1560b57cec5SDimitry Andric     Mask *hwloc_array = static_cast<Mask *>(array);
1570b57cec5SDimitry Andric     return &(hwloc_array[index]);
1580b57cec5SDimitry Andric   }
1590b57cec5SDimitry Andric   api_type get_api_type() const override { return HWLOC; }
1600b57cec5SDimitry Andric };
1610b57cec5SDimitry Andric #endif /* KMP_USE_HWLOC */
1620b57cec5SDimitry Andric 
163489b1cf2SDimitry Andric #if KMP_OS_LINUX || KMP_OS_FREEBSD
1640b57cec5SDimitry Andric #if KMP_OS_LINUX
1650b57cec5SDimitry Andric /* On some of the older OS's that we build on, these constants aren't present
1660b57cec5SDimitry Andric    in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
1670b57cec5SDimitry Andric    all systems of the same arch where they are defined, and they cannot change.
1680b57cec5SDimitry Andric    stone forever. */
1690b57cec5SDimitry Andric #include <sys/syscall.h>
1700b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_ARM
1710b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
1720b57cec5SDimitry Andric #define __NR_sched_setaffinity 241
1730b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 241
1740b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
1750b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
1760b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
1770b57cec5SDimitry Andric #define __NR_sched_getaffinity 242
1780b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 242
1790b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
1800b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
1810b57cec5SDimitry Andric #elif KMP_ARCH_AARCH64
1820b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
1830b57cec5SDimitry Andric #define __NR_sched_setaffinity 122
1840b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 122
1850b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
1860b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
1870b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
1880b57cec5SDimitry Andric #define __NR_sched_getaffinity 123
1890b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 123
1900b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
1910b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
1920b57cec5SDimitry Andric #elif KMP_ARCH_X86_64
1930b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
1940b57cec5SDimitry Andric #define __NR_sched_setaffinity 203
1950b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 203
1960b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
1970b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
1980b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
1990b57cec5SDimitry Andric #define __NR_sched_getaffinity 204
2000b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 204
2010b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2020b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2030b57cec5SDimitry Andric #elif KMP_ARCH_PPC64
2040b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2050b57cec5SDimitry Andric #define __NR_sched_setaffinity 222
2060b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 222
2070b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2080b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2090b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2100b57cec5SDimitry Andric #define __NR_sched_getaffinity 223
2110b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 223
2120b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2130b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2140b57cec5SDimitry Andric #elif KMP_ARCH_MIPS
2150b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2160b57cec5SDimitry Andric #define __NR_sched_setaffinity 4239
2170b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 4239
2180b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2190b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2200b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2210b57cec5SDimitry Andric #define __NR_sched_getaffinity 4240
2220b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 4240
2230b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2240b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2250b57cec5SDimitry Andric #elif KMP_ARCH_MIPS64
2260b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2270b57cec5SDimitry Andric #define __NR_sched_setaffinity 5195
2280b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 5195
2290b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2300b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2310b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2320b57cec5SDimitry Andric #define __NR_sched_getaffinity 5196
2330b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 5196
2340b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2350b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2360b57cec5SDimitry Andric #error Unknown or unsupported architecture
2370b57cec5SDimitry Andric #endif /* KMP_ARCH_* */
238489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD
239489b1cf2SDimitry Andric #include <pthread.h>
240489b1cf2SDimitry Andric #include <pthread_np.h>
241489b1cf2SDimitry Andric #endif
2420b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity {
2430b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
2440b57cec5SDimitry Andric     typedef unsigned char mask_t;
2450b57cec5SDimitry Andric     static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
2460b57cec5SDimitry Andric 
2470b57cec5SDimitry Andric   public:
2480b57cec5SDimitry Andric     mask_t *mask;
2490b57cec5SDimitry Andric     Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
2500b57cec5SDimitry Andric     ~Mask() {
2510b57cec5SDimitry Andric       if (mask)
2520b57cec5SDimitry Andric         __kmp_free(mask);
2530b57cec5SDimitry Andric     }
2540b57cec5SDimitry Andric     void set(int i) override {
2550b57cec5SDimitry Andric       mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
2560b57cec5SDimitry Andric     }
2570b57cec5SDimitry Andric     bool is_set(int i) const override {
2580b57cec5SDimitry Andric       return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
2590b57cec5SDimitry Andric     }
2600b57cec5SDimitry Andric     void clear(int i) override {
2610b57cec5SDimitry Andric       mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
2620b57cec5SDimitry Andric     }
2630b57cec5SDimitry Andric     void zero() override {
2640b57cec5SDimitry Andric       for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
2650b57cec5SDimitry Andric         mask[i] = 0;
2660b57cec5SDimitry Andric     }
2670b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
2680b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
2690b57cec5SDimitry Andric       for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
2700b57cec5SDimitry Andric         mask[i] = convert->mask[i];
2710b57cec5SDimitry Andric     }
2720b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
2730b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
2740b57cec5SDimitry Andric       for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
2750b57cec5SDimitry Andric         mask[i] &= convert->mask[i];
2760b57cec5SDimitry Andric     }
2770b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
2780b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
2790b57cec5SDimitry Andric       for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
2800b57cec5SDimitry Andric         mask[i] |= convert->mask[i];
2810b57cec5SDimitry Andric     }
2820b57cec5SDimitry Andric     void bitwise_not() override {
2830b57cec5SDimitry Andric       for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
2840b57cec5SDimitry Andric         mask[i] = ~(mask[i]);
2850b57cec5SDimitry Andric     }
2860b57cec5SDimitry Andric     int begin() const override {
2870b57cec5SDimitry Andric       int retval = 0;
2880b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
2890b57cec5SDimitry Andric         ++retval;
2900b57cec5SDimitry Andric       return retval;
2910b57cec5SDimitry Andric     }
2920b57cec5SDimitry Andric     int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
2930b57cec5SDimitry Andric     int next(int previous) const override {
2940b57cec5SDimitry Andric       int retval = previous + 1;
2950b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
2960b57cec5SDimitry Andric         ++retval;
2970b57cec5SDimitry Andric       return retval;
2980b57cec5SDimitry Andric     }
2990b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
3000b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
3010b57cec5SDimitry Andric                   "Illegal get affinity operation when not capable");
302489b1cf2SDimitry Andric #if KMP_OS_LINUX
3030b57cec5SDimitry Andric       int retval =
3040b57cec5SDimitry Andric           syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
305489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD
306*5ffd83dbSDimitry Andric       int r =
307489b1cf2SDimitry Andric           pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
308*5ffd83dbSDimitry Andric       int retval = (r == 0 ? 0 : -1);
309489b1cf2SDimitry Andric #endif
3100b57cec5SDimitry Andric       if (retval >= 0) {
3110b57cec5SDimitry Andric         return 0;
3120b57cec5SDimitry Andric       }
3130b57cec5SDimitry Andric       int error = errno;
3140b57cec5SDimitry Andric       if (abort_on_error) {
3150b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
3160b57cec5SDimitry Andric       }
3170b57cec5SDimitry Andric       return error;
3180b57cec5SDimitry Andric     }
3190b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
3200b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
3210b57cec5SDimitry Andric                   "Illegal get affinity operation when not capable");
322489b1cf2SDimitry Andric #if KMP_OS_LINUX
3230b57cec5SDimitry Andric       int retval =
3240b57cec5SDimitry Andric           syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
325489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD
326*5ffd83dbSDimitry Andric       int r =
327489b1cf2SDimitry Andric           pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
328*5ffd83dbSDimitry Andric       int retval = (r == 0 ? 0 : -1);
329489b1cf2SDimitry Andric #endif
3300b57cec5SDimitry Andric       if (retval >= 0) {
3310b57cec5SDimitry Andric         return 0;
3320b57cec5SDimitry Andric       }
3330b57cec5SDimitry Andric       int error = errno;
3340b57cec5SDimitry Andric       if (abort_on_error) {
3350b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
3360b57cec5SDimitry Andric       }
3370b57cec5SDimitry Andric       return error;
3380b57cec5SDimitry Andric     }
3390b57cec5SDimitry Andric   };
3400b57cec5SDimitry Andric   void determine_capable(const char *env_var) override {
3410b57cec5SDimitry Andric     __kmp_affinity_determine_capable(env_var);
3420b57cec5SDimitry Andric   }
3430b57cec5SDimitry Andric   void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
3440b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override {
3450b57cec5SDimitry Andric     KMPNativeAffinity::Mask *retval = new Mask();
3460b57cec5SDimitry Andric     return retval;
3470b57cec5SDimitry Andric   }
3480b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override {
3490b57cec5SDimitry Andric     KMPNativeAffinity::Mask *native_mask =
3500b57cec5SDimitry Andric         static_cast<KMPNativeAffinity::Mask *>(m);
3510b57cec5SDimitry Andric     delete native_mask;
3520b57cec5SDimitry Andric   }
3530b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
3540b57cec5SDimitry Andric     return new Mask[num];
3550b57cec5SDimitry Andric   }
3560b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
3570b57cec5SDimitry Andric     Mask *linux_array = static_cast<Mask *>(array);
3580b57cec5SDimitry Andric     delete[] linux_array;
3590b57cec5SDimitry Andric   }
3600b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
3610b57cec5SDimitry Andric                                       int index) override {
3620b57cec5SDimitry Andric     Mask *linux_array = static_cast<Mask *>(array);
3630b57cec5SDimitry Andric     return &(linux_array[index]);
3640b57cec5SDimitry Andric   }
3650b57cec5SDimitry Andric   api_type get_api_type() const override { return NATIVE_OS; }
3660b57cec5SDimitry Andric };
367489b1cf2SDimitry Andric #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
3680b57cec5SDimitry Andric 
3690b57cec5SDimitry Andric #if KMP_OS_WINDOWS
3700b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity {
3710b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
3720b57cec5SDimitry Andric     typedef ULONG_PTR mask_t;
3730b57cec5SDimitry Andric     static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
3740b57cec5SDimitry Andric     mask_t *mask;
3750b57cec5SDimitry Andric 
3760b57cec5SDimitry Andric   public:
3770b57cec5SDimitry Andric     Mask() {
3780b57cec5SDimitry Andric       mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
3790b57cec5SDimitry Andric     }
3800b57cec5SDimitry Andric     ~Mask() {
3810b57cec5SDimitry Andric       if (mask)
3820b57cec5SDimitry Andric         __kmp_free(mask);
3830b57cec5SDimitry Andric     }
3840b57cec5SDimitry Andric     void set(int i) override {
3850b57cec5SDimitry Andric       mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
3860b57cec5SDimitry Andric     }
3870b57cec5SDimitry Andric     bool is_set(int i) const override {
3880b57cec5SDimitry Andric       return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
3890b57cec5SDimitry Andric     }
3900b57cec5SDimitry Andric     void clear(int i) override {
3910b57cec5SDimitry Andric       mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
3920b57cec5SDimitry Andric     }
3930b57cec5SDimitry Andric     void zero() override {
3940b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
3950b57cec5SDimitry Andric         mask[i] = 0;
3960b57cec5SDimitry Andric     }
3970b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
3980b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
3990b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4000b57cec5SDimitry Andric         mask[i] = convert->mask[i];
4010b57cec5SDimitry Andric     }
4020b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
4030b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
4040b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4050b57cec5SDimitry Andric         mask[i] &= convert->mask[i];
4060b57cec5SDimitry Andric     }
4070b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
4080b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
4090b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4100b57cec5SDimitry Andric         mask[i] |= convert->mask[i];
4110b57cec5SDimitry Andric     }
4120b57cec5SDimitry Andric     void bitwise_not() override {
4130b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4140b57cec5SDimitry Andric         mask[i] = ~(mask[i]);
4150b57cec5SDimitry Andric     }
4160b57cec5SDimitry Andric     int begin() const override {
4170b57cec5SDimitry Andric       int retval = 0;
4180b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
4190b57cec5SDimitry Andric         ++retval;
4200b57cec5SDimitry Andric       return retval;
4210b57cec5SDimitry Andric     }
4220b57cec5SDimitry Andric     int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
4230b57cec5SDimitry Andric     int next(int previous) const override {
4240b57cec5SDimitry Andric       int retval = previous + 1;
4250b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
4260b57cec5SDimitry Andric         ++retval;
4270b57cec5SDimitry Andric       return retval;
4280b57cec5SDimitry Andric     }
4290b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
4300b57cec5SDimitry Andric       if (__kmp_num_proc_groups > 1) {
4310b57cec5SDimitry Andric         // Check for a valid mask.
4320b57cec5SDimitry Andric         GROUP_AFFINITY ga;
4330b57cec5SDimitry Andric         int group = get_proc_group();
4340b57cec5SDimitry Andric         if (group < 0) {
4350b57cec5SDimitry Andric           if (abort_on_error) {
4360b57cec5SDimitry Andric             KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4370b57cec5SDimitry Andric           }
4380b57cec5SDimitry Andric           return -1;
4390b57cec5SDimitry Andric         }
4400b57cec5SDimitry Andric         // Transform the bit vector into a GROUP_AFFINITY struct
4410b57cec5SDimitry Andric         // and make the system call to set affinity.
4420b57cec5SDimitry Andric         ga.Group = group;
4430b57cec5SDimitry Andric         ga.Mask = mask[group];
4440b57cec5SDimitry Andric         ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
4450b57cec5SDimitry Andric 
4460b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
4470b57cec5SDimitry Andric         if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
4480b57cec5SDimitry Andric           DWORD error = GetLastError();
4490b57cec5SDimitry Andric           if (abort_on_error) {
4500b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
4510b57cec5SDimitry Andric                         __kmp_msg_null);
4520b57cec5SDimitry Andric           }
4530b57cec5SDimitry Andric           return error;
4540b57cec5SDimitry Andric         }
4550b57cec5SDimitry Andric       } else {
4560b57cec5SDimitry Andric         if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
4570b57cec5SDimitry Andric           DWORD error = GetLastError();
4580b57cec5SDimitry Andric           if (abort_on_error) {
4590b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
4600b57cec5SDimitry Andric                         __kmp_msg_null);
4610b57cec5SDimitry Andric           }
4620b57cec5SDimitry Andric           return error;
4630b57cec5SDimitry Andric         }
4640b57cec5SDimitry Andric       }
4650b57cec5SDimitry Andric       return 0;
4660b57cec5SDimitry Andric     }
4670b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
4680b57cec5SDimitry Andric       if (__kmp_num_proc_groups > 1) {
4690b57cec5SDimitry Andric         this->zero();
4700b57cec5SDimitry Andric         GROUP_AFFINITY ga;
4710b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
4720b57cec5SDimitry Andric         if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
4730b57cec5SDimitry Andric           DWORD error = GetLastError();
4740b57cec5SDimitry Andric           if (abort_on_error) {
4750b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
4760b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
4770b57cec5SDimitry Andric           }
4780b57cec5SDimitry Andric           return error;
4790b57cec5SDimitry Andric         }
4800b57cec5SDimitry Andric         if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
4810b57cec5SDimitry Andric             (ga.Mask == 0)) {
4820b57cec5SDimitry Andric           return -1;
4830b57cec5SDimitry Andric         }
4840b57cec5SDimitry Andric         mask[ga.Group] = ga.Mask;
4850b57cec5SDimitry Andric       } else {
4860b57cec5SDimitry Andric         mask_t newMask, sysMask, retval;
4870b57cec5SDimitry Andric         if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
4880b57cec5SDimitry Andric           DWORD error = GetLastError();
4890b57cec5SDimitry Andric           if (abort_on_error) {
4900b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
4910b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
4920b57cec5SDimitry Andric           }
4930b57cec5SDimitry Andric           return error;
4940b57cec5SDimitry Andric         }
4950b57cec5SDimitry Andric         retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
4960b57cec5SDimitry Andric         if (!retval) {
4970b57cec5SDimitry Andric           DWORD error = GetLastError();
4980b57cec5SDimitry Andric           if (abort_on_error) {
4990b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
5000b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
5010b57cec5SDimitry Andric           }
5020b57cec5SDimitry Andric           return error;
5030b57cec5SDimitry Andric         }
5040b57cec5SDimitry Andric         newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
5050b57cec5SDimitry Andric         if (!newMask) {
5060b57cec5SDimitry Andric           DWORD error = GetLastError();
5070b57cec5SDimitry Andric           if (abort_on_error) {
5080b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
5090b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
5100b57cec5SDimitry Andric           }
5110b57cec5SDimitry Andric         }
5120b57cec5SDimitry Andric         *mask = retval;
5130b57cec5SDimitry Andric       }
5140b57cec5SDimitry Andric       return 0;
5150b57cec5SDimitry Andric     }
5160b57cec5SDimitry Andric     int get_proc_group() const override {
5170b57cec5SDimitry Andric       int group = -1;
5180b57cec5SDimitry Andric       if (__kmp_num_proc_groups == 1) {
5190b57cec5SDimitry Andric         return 1;
5200b57cec5SDimitry Andric       }
5210b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; i++) {
5220b57cec5SDimitry Andric         if (mask[i] == 0)
5230b57cec5SDimitry Andric           continue;
5240b57cec5SDimitry Andric         if (group >= 0)
5250b57cec5SDimitry Andric           return -1;
5260b57cec5SDimitry Andric         group = i;
5270b57cec5SDimitry Andric       }
5280b57cec5SDimitry Andric       return group;
5290b57cec5SDimitry Andric     }
5300b57cec5SDimitry Andric   };
5310b57cec5SDimitry Andric   void determine_capable(const char *env_var) override {
5320b57cec5SDimitry Andric     __kmp_affinity_determine_capable(env_var);
5330b57cec5SDimitry Andric   }
5340b57cec5SDimitry Andric   void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
5350b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
5360b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
5370b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
5380b57cec5SDimitry Andric     return new Mask[num];
5390b57cec5SDimitry Andric   }
5400b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
5410b57cec5SDimitry Andric     Mask *windows_array = static_cast<Mask *>(array);
5420b57cec5SDimitry Andric     delete[] windows_array;
5430b57cec5SDimitry Andric   }
5440b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
5450b57cec5SDimitry Andric                                       int index) override {
5460b57cec5SDimitry Andric     Mask *windows_array = static_cast<Mask *>(array);
5470b57cec5SDimitry Andric     return &(windows_array[index]);
5480b57cec5SDimitry Andric   }
5490b57cec5SDimitry Andric   api_type get_api_type() const override { return NATIVE_OS; }
5500b57cec5SDimitry Andric };
5510b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
5520b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */
5530b57cec5SDimitry Andric 
5540b57cec5SDimitry Andric class Address {
5550b57cec5SDimitry Andric public:
5560b57cec5SDimitry Andric   static const unsigned maxDepth = 32;
5570b57cec5SDimitry Andric   unsigned labels[maxDepth];
5580b57cec5SDimitry Andric   unsigned childNums[maxDepth];
5590b57cec5SDimitry Andric   unsigned depth;
5600b57cec5SDimitry Andric   unsigned leader;
5610b57cec5SDimitry Andric   Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
5620b57cec5SDimitry Andric   Address &operator=(const Address &b) {
5630b57cec5SDimitry Andric     depth = b.depth;
5640b57cec5SDimitry Andric     for (unsigned i = 0; i < depth; i++) {
5650b57cec5SDimitry Andric       labels[i] = b.labels[i];
5660b57cec5SDimitry Andric       childNums[i] = b.childNums[i];
5670b57cec5SDimitry Andric     }
5680b57cec5SDimitry Andric     leader = FALSE;
5690b57cec5SDimitry Andric     return *this;
5700b57cec5SDimitry Andric   }
5710b57cec5SDimitry Andric   bool operator==(const Address &b) const {
5720b57cec5SDimitry Andric     if (depth != b.depth)
5730b57cec5SDimitry Andric       return false;
5740b57cec5SDimitry Andric     for (unsigned i = 0; i < depth; i++)
5750b57cec5SDimitry Andric       if (labels[i] != b.labels[i])
5760b57cec5SDimitry Andric         return false;
5770b57cec5SDimitry Andric     return true;
5780b57cec5SDimitry Andric   }
5790b57cec5SDimitry Andric   bool isClose(const Address &b, int level) const {
5800b57cec5SDimitry Andric     if (depth != b.depth)
5810b57cec5SDimitry Andric       return false;
5820b57cec5SDimitry Andric     if ((unsigned)level >= depth)
5830b57cec5SDimitry Andric       return true;
5840b57cec5SDimitry Andric     for (unsigned i = 0; i < (depth - level); i++)
5850b57cec5SDimitry Andric       if (labels[i] != b.labels[i])
5860b57cec5SDimitry Andric         return false;
5870b57cec5SDimitry Andric     return true;
5880b57cec5SDimitry Andric   }
5890b57cec5SDimitry Andric   bool operator!=(const Address &b) const { return !operator==(b); }
5900b57cec5SDimitry Andric   void print() const {
5910b57cec5SDimitry Andric     unsigned i;
5920b57cec5SDimitry Andric     printf("Depth: %u --- ", depth);
5930b57cec5SDimitry Andric     for (i = 0; i < depth; i++) {
5940b57cec5SDimitry Andric       printf("%u ", labels[i]);
5950b57cec5SDimitry Andric     }
5960b57cec5SDimitry Andric   }
5970b57cec5SDimitry Andric };
5980b57cec5SDimitry Andric 
5990b57cec5SDimitry Andric class AddrUnsPair {
6000b57cec5SDimitry Andric public:
6010b57cec5SDimitry Andric   Address first;
6020b57cec5SDimitry Andric   unsigned second;
6030b57cec5SDimitry Andric   AddrUnsPair(Address _first, unsigned _second)
6040b57cec5SDimitry Andric       : first(_first), second(_second) {}
6050b57cec5SDimitry Andric   AddrUnsPair &operator=(const AddrUnsPair &b) {
6060b57cec5SDimitry Andric     first = b.first;
6070b57cec5SDimitry Andric     second = b.second;
6080b57cec5SDimitry Andric     return *this;
6090b57cec5SDimitry Andric   }
6100b57cec5SDimitry Andric   void print() const {
6110b57cec5SDimitry Andric     printf("first = ");
6120b57cec5SDimitry Andric     first.print();
6130b57cec5SDimitry Andric     printf(" --- second = %u", second);
6140b57cec5SDimitry Andric   }
6150b57cec5SDimitry Andric   bool operator==(const AddrUnsPair &b) const {
6160b57cec5SDimitry Andric     if (first != b.first)
6170b57cec5SDimitry Andric       return false;
6180b57cec5SDimitry Andric     if (second != b.second)
6190b57cec5SDimitry Andric       return false;
6200b57cec5SDimitry Andric     return true;
6210b57cec5SDimitry Andric   }
6220b57cec5SDimitry Andric   bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
6230b57cec5SDimitry Andric };
6240b57cec5SDimitry Andric 
6250b57cec5SDimitry Andric static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
6260b57cec5SDimitry Andric   const Address *aa = &(((const AddrUnsPair *)a)->first);
6270b57cec5SDimitry Andric   const Address *bb = &(((const AddrUnsPair *)b)->first);
6280b57cec5SDimitry Andric   unsigned depth = aa->depth;
6290b57cec5SDimitry Andric   unsigned i;
6300b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(depth == bb->depth);
6310b57cec5SDimitry Andric   for (i = 0; i < depth; i++) {
6320b57cec5SDimitry Andric     if (aa->labels[i] < bb->labels[i])
6330b57cec5SDimitry Andric       return -1;
6340b57cec5SDimitry Andric     if (aa->labels[i] > bb->labels[i])
6350b57cec5SDimitry Andric       return 1;
6360b57cec5SDimitry Andric   }
6370b57cec5SDimitry Andric   return 0;
6380b57cec5SDimitry Andric }
6390b57cec5SDimitry Andric 
6400b57cec5SDimitry Andric /* A structure for holding machine-specific hierarchy info to be computed once
6410b57cec5SDimitry Andric    at init. This structure represents a mapping of threads to the actual machine
6420b57cec5SDimitry Andric    hierarchy, or to our best guess at what the hierarchy might be, for the
6430b57cec5SDimitry Andric    purpose of performing an efficient barrier. In the worst case, when there is
6440b57cec5SDimitry Andric    no machine hierarchy information, it produces a tree suitable for a barrier,
6450b57cec5SDimitry Andric    similar to the tree used in the hyper barrier. */
6460b57cec5SDimitry Andric class hierarchy_info {
6470b57cec5SDimitry Andric public:
6480b57cec5SDimitry Andric   /* Good default values for number of leaves and branching factor, given no
6490b57cec5SDimitry Andric      affinity information. Behaves a bit like hyper barrier. */
6500b57cec5SDimitry Andric   static const kmp_uint32 maxLeaves = 4;
6510b57cec5SDimitry Andric   static const kmp_uint32 minBranch = 4;
6520b57cec5SDimitry Andric   /** Number of levels in the hierarchy. Typical levels are threads/core,
6530b57cec5SDimitry Andric       cores/package or socket, packages/node, nodes/machine, etc. We don't want
6540b57cec5SDimitry Andric       to get specific with nomenclature. When the machine is oversubscribed we
6550b57cec5SDimitry Andric       add levels to duplicate the hierarchy, doubling the thread capacity of the
6560b57cec5SDimitry Andric       hierarchy each time we add a level. */
6570b57cec5SDimitry Andric   kmp_uint32 maxLevels;
6580b57cec5SDimitry Andric 
6590b57cec5SDimitry Andric   /** This is specifically the depth of the machine configuration hierarchy, in
6600b57cec5SDimitry Andric       terms of the number of levels along the longest path from root to any
6610b57cec5SDimitry Andric       leaf. It corresponds to the number of entries in numPerLevel if we exclude
6620b57cec5SDimitry Andric       all but one trailing 1. */
6630b57cec5SDimitry Andric   kmp_uint32 depth;
6640b57cec5SDimitry Andric   kmp_uint32 base_num_threads;
6650b57cec5SDimitry Andric   enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
6660b57cec5SDimitry Andric   volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
6670b57cec5SDimitry Andric   // 2=initialization in progress
6680b57cec5SDimitry Andric   volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
6690b57cec5SDimitry Andric 
6700b57cec5SDimitry Andric   /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
6710b57cec5SDimitry Andric       the parent of a node at level i has. For example, if we have a machine
6720b57cec5SDimitry Andric       with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
6730b57cec5SDimitry Andric       {2, 4, 4, 1, 1}. All empty levels are set to 1. */
6740b57cec5SDimitry Andric   kmp_uint32 *numPerLevel;
6750b57cec5SDimitry Andric   kmp_uint32 *skipPerLevel;
6760b57cec5SDimitry Andric 
6770b57cec5SDimitry Andric   void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
6780b57cec5SDimitry Andric     int hier_depth = adr2os[0].first.depth;
6790b57cec5SDimitry Andric     int level = 0;
6800b57cec5SDimitry Andric     for (int i = hier_depth - 1; i >= 0; --i) {
6810b57cec5SDimitry Andric       int max = -1;
6820b57cec5SDimitry Andric       for (int j = 0; j < num_addrs; ++j) {
6830b57cec5SDimitry Andric         int next = adr2os[j].first.childNums[i];
6840b57cec5SDimitry Andric         if (next > max)
6850b57cec5SDimitry Andric           max = next;
6860b57cec5SDimitry Andric       }
6870b57cec5SDimitry Andric       numPerLevel[level] = max + 1;
6880b57cec5SDimitry Andric       ++level;
6890b57cec5SDimitry Andric     }
6900b57cec5SDimitry Andric   }
6910b57cec5SDimitry Andric 
6920b57cec5SDimitry Andric   hierarchy_info()
6930b57cec5SDimitry Andric       : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
6940b57cec5SDimitry Andric 
6950b57cec5SDimitry Andric   void fini() {
6960b57cec5SDimitry Andric     if (!uninitialized && numPerLevel) {
6970b57cec5SDimitry Andric       __kmp_free(numPerLevel);
6980b57cec5SDimitry Andric       numPerLevel = NULL;
6990b57cec5SDimitry Andric       uninitialized = not_initialized;
7000b57cec5SDimitry Andric     }
7010b57cec5SDimitry Andric   }
7020b57cec5SDimitry Andric 
7030b57cec5SDimitry Andric   void init(AddrUnsPair *adr2os, int num_addrs) {
7040b57cec5SDimitry Andric     kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
7050b57cec5SDimitry Andric         &uninitialized, not_initialized, initializing);
7060b57cec5SDimitry Andric     if (bool_result == 0) { // Wait for initialization
7070b57cec5SDimitry Andric       while (TCR_1(uninitialized) != initialized)
7080b57cec5SDimitry Andric         KMP_CPU_PAUSE();
7090b57cec5SDimitry Andric       return;
7100b57cec5SDimitry Andric     }
7110b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(bool_result == 1);
7120b57cec5SDimitry Andric 
7130b57cec5SDimitry Andric     /* Added explicit initialization of the data fields here to prevent usage of
7140b57cec5SDimitry Andric        dirty value observed when static library is re-initialized multiple times
7150b57cec5SDimitry Andric        (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
7160b57cec5SDimitry Andric        OpenMP). */
7170b57cec5SDimitry Andric     depth = 1;
7180b57cec5SDimitry Andric     resizing = 0;
7190b57cec5SDimitry Andric     maxLevels = 7;
7200b57cec5SDimitry Andric     numPerLevel =
7210b57cec5SDimitry Andric         (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
7220b57cec5SDimitry Andric     skipPerLevel = &(numPerLevel[maxLevels]);
7230b57cec5SDimitry Andric     for (kmp_uint32 i = 0; i < maxLevels;
7240b57cec5SDimitry Andric          ++i) { // init numPerLevel[*] to 1 item per level
7250b57cec5SDimitry Andric       numPerLevel[i] = 1;
7260b57cec5SDimitry Andric       skipPerLevel[i] = 1;
7270b57cec5SDimitry Andric     }
7280b57cec5SDimitry Andric 
7290b57cec5SDimitry Andric     // Sort table by physical ID
7300b57cec5SDimitry Andric     if (adr2os) {
7310b57cec5SDimitry Andric       qsort(adr2os, num_addrs, sizeof(*adr2os),
7320b57cec5SDimitry Andric             __kmp_affinity_cmp_Address_labels);
7330b57cec5SDimitry Andric       deriveLevels(adr2os, num_addrs);
7340b57cec5SDimitry Andric     } else {
7350b57cec5SDimitry Andric       numPerLevel[0] = maxLeaves;
7360b57cec5SDimitry Andric       numPerLevel[1] = num_addrs / maxLeaves;
7370b57cec5SDimitry Andric       if (num_addrs % maxLeaves)
7380b57cec5SDimitry Andric         numPerLevel[1]++;
7390b57cec5SDimitry Andric     }
7400b57cec5SDimitry Andric 
7410b57cec5SDimitry Andric     base_num_threads = num_addrs;
7420b57cec5SDimitry Andric     for (int i = maxLevels - 1; i >= 0;
7430b57cec5SDimitry Andric          --i) // count non-empty levels to get depth
7440b57cec5SDimitry Andric       if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
7450b57cec5SDimitry Andric         depth++;
7460b57cec5SDimitry Andric 
7470b57cec5SDimitry Andric     kmp_uint32 branch = minBranch;
7480b57cec5SDimitry Andric     if (numPerLevel[0] == 1)
7490b57cec5SDimitry Andric       branch = num_addrs / maxLeaves;
7500b57cec5SDimitry Andric     if (branch < minBranch)
7510b57cec5SDimitry Andric       branch = minBranch;
7520b57cec5SDimitry Andric     for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
7530b57cec5SDimitry Andric       while (numPerLevel[d] > branch ||
7540b57cec5SDimitry Andric              (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
7550b57cec5SDimitry Andric         if (numPerLevel[d] & 1)
7560b57cec5SDimitry Andric           numPerLevel[d]++;
7570b57cec5SDimitry Andric         numPerLevel[d] = numPerLevel[d] >> 1;
7580b57cec5SDimitry Andric         if (numPerLevel[d + 1] == 1)
7590b57cec5SDimitry Andric           depth++;
7600b57cec5SDimitry Andric         numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
7610b57cec5SDimitry Andric       }
7620b57cec5SDimitry Andric       if (numPerLevel[0] == 1) {
7630b57cec5SDimitry Andric         branch = branch >> 1;
7640b57cec5SDimitry Andric         if (branch < 4)
7650b57cec5SDimitry Andric           branch = minBranch;
7660b57cec5SDimitry Andric       }
7670b57cec5SDimitry Andric     }
7680b57cec5SDimitry Andric 
7690b57cec5SDimitry Andric     for (kmp_uint32 i = 1; i < depth; ++i)
7700b57cec5SDimitry Andric       skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
7710b57cec5SDimitry Andric     // Fill in hierarchy in the case of oversubscription
7720b57cec5SDimitry Andric     for (kmp_uint32 i = depth; i < maxLevels; ++i)
7730b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
7740b57cec5SDimitry Andric 
7750b57cec5SDimitry Andric     uninitialized = initialized; // One writer
7760b57cec5SDimitry Andric   }
7770b57cec5SDimitry Andric 
7780b57cec5SDimitry Andric   // Resize the hierarchy if nproc changes to something larger than before
7790b57cec5SDimitry Andric   void resize(kmp_uint32 nproc) {
7800b57cec5SDimitry Andric     kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
7810b57cec5SDimitry Andric     while (bool_result == 0) { // someone else is trying to resize
7820b57cec5SDimitry Andric       KMP_CPU_PAUSE();
7830b57cec5SDimitry Andric       if (nproc <= base_num_threads) // happy with other thread's resize
7840b57cec5SDimitry Andric         return;
7850b57cec5SDimitry Andric       else // try to resize
7860b57cec5SDimitry Andric         bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
7870b57cec5SDimitry Andric     }
7880b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(bool_result != 0);
7890b57cec5SDimitry Andric     if (nproc <= base_num_threads)
7900b57cec5SDimitry Andric       return; // happy with other thread's resize
7910b57cec5SDimitry Andric 
7920b57cec5SDimitry Andric     // Calculate new maxLevels
7930b57cec5SDimitry Andric     kmp_uint32 old_sz = skipPerLevel[depth - 1];
7940b57cec5SDimitry Andric     kmp_uint32 incs = 0, old_maxLevels = maxLevels;
7950b57cec5SDimitry Andric     // First see if old maxLevels is enough to contain new size
7960b57cec5SDimitry Andric     for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
7970b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
7980b57cec5SDimitry Andric       numPerLevel[i - 1] *= 2;
7990b57cec5SDimitry Andric       old_sz *= 2;
8000b57cec5SDimitry Andric       depth++;
8010b57cec5SDimitry Andric     }
8020b57cec5SDimitry Andric     if (nproc > old_sz) { // Not enough space, need to expand hierarchy
8030b57cec5SDimitry Andric       while (nproc > old_sz) {
8040b57cec5SDimitry Andric         old_sz *= 2;
8050b57cec5SDimitry Andric         incs++;
8060b57cec5SDimitry Andric         depth++;
8070b57cec5SDimitry Andric       }
8080b57cec5SDimitry Andric       maxLevels += incs;
8090b57cec5SDimitry Andric 
8100b57cec5SDimitry Andric       // Resize arrays
8110b57cec5SDimitry Andric       kmp_uint32 *old_numPerLevel = numPerLevel;
8120b57cec5SDimitry Andric       kmp_uint32 *old_skipPerLevel = skipPerLevel;
8130b57cec5SDimitry Andric       numPerLevel = skipPerLevel = NULL;
8140b57cec5SDimitry Andric       numPerLevel =
8150b57cec5SDimitry Andric           (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
8160b57cec5SDimitry Andric       skipPerLevel = &(numPerLevel[maxLevels]);
8170b57cec5SDimitry Andric 
8180b57cec5SDimitry Andric       // Copy old elements from old arrays
8190b57cec5SDimitry Andric       for (kmp_uint32 i = 0; i < old_maxLevels;
8200b57cec5SDimitry Andric            ++i) { // init numPerLevel[*] to 1 item per level
8210b57cec5SDimitry Andric         numPerLevel[i] = old_numPerLevel[i];
8220b57cec5SDimitry Andric         skipPerLevel[i] = old_skipPerLevel[i];
8230b57cec5SDimitry Andric       }
8240b57cec5SDimitry Andric 
8250b57cec5SDimitry Andric       // Init new elements in arrays to 1
8260b57cec5SDimitry Andric       for (kmp_uint32 i = old_maxLevels; i < maxLevels;
8270b57cec5SDimitry Andric            ++i) { // init numPerLevel[*] to 1 item per level
8280b57cec5SDimitry Andric         numPerLevel[i] = 1;
8290b57cec5SDimitry Andric         skipPerLevel[i] = 1;
8300b57cec5SDimitry Andric       }
8310b57cec5SDimitry Andric 
8320b57cec5SDimitry Andric       // Free old arrays
8330b57cec5SDimitry Andric       __kmp_free(old_numPerLevel);
8340b57cec5SDimitry Andric     }
8350b57cec5SDimitry Andric 
8360b57cec5SDimitry Andric     // Fill in oversubscription levels of hierarchy
8370b57cec5SDimitry Andric     for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
8380b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
8390b57cec5SDimitry Andric 
8400b57cec5SDimitry Andric     base_num_threads = nproc;
8410b57cec5SDimitry Andric     resizing = 0; // One writer
8420b57cec5SDimitry Andric   }
8430b57cec5SDimitry Andric };
8440b57cec5SDimitry Andric #endif // KMP_AFFINITY_H
845