xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_affinity.h (revision fe6060f10f634930ff71b7c50291ddc610da2475)
10b57cec5SDimitry Andric /*
20b57cec5SDimitry Andric  * kmp_affinity.h -- header for affinity management
30b57cec5SDimitry Andric  */
40b57cec5SDimitry Andric 
50b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #ifndef KMP_AFFINITY_H
140b57cec5SDimitry Andric #define KMP_AFFINITY_H
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include "kmp.h"
170b57cec5SDimitry Andric #include "kmp_os.h"
180b57cec5SDimitry Andric 
190b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
200b57cec5SDimitry Andric #if KMP_USE_HWLOC
210b57cec5SDimitry Andric class KMPHwlocAffinity : public KMPAffinity {
220b57cec5SDimitry Andric public:
230b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
240b57cec5SDimitry Andric     hwloc_cpuset_t mask;
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric   public:
270b57cec5SDimitry Andric     Mask() {
280b57cec5SDimitry Andric       mask = hwloc_bitmap_alloc();
290b57cec5SDimitry Andric       this->zero();
300b57cec5SDimitry Andric     }
310b57cec5SDimitry Andric     ~Mask() { hwloc_bitmap_free(mask); }
320b57cec5SDimitry Andric     void set(int i) override { hwloc_bitmap_set(mask, i); }
330b57cec5SDimitry Andric     bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
340b57cec5SDimitry Andric     void clear(int i) override { hwloc_bitmap_clr(mask, i); }
350b57cec5SDimitry Andric     void zero() override { hwloc_bitmap_zero(mask); }
360b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
370b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
380b57cec5SDimitry Andric       hwloc_bitmap_copy(mask, convert->mask);
390b57cec5SDimitry Andric     }
400b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
410b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
420b57cec5SDimitry Andric       hwloc_bitmap_and(mask, mask, convert->mask);
430b57cec5SDimitry Andric     }
440b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
450b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
460b57cec5SDimitry Andric       hwloc_bitmap_or(mask, mask, convert->mask);
470b57cec5SDimitry Andric     }
480b57cec5SDimitry Andric     void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
490b57cec5SDimitry Andric     int begin() const override { return hwloc_bitmap_first(mask); }
500b57cec5SDimitry Andric     int end() const override { return -1; }
510b57cec5SDimitry Andric     int next(int previous) const override {
520b57cec5SDimitry Andric       return hwloc_bitmap_next(mask, previous);
530b57cec5SDimitry Andric     }
540b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
550b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
560b57cec5SDimitry Andric                   "Illegal get affinity operation when not capable");
57e8d8bef9SDimitry Andric       long retval =
580b57cec5SDimitry Andric           hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
590b57cec5SDimitry Andric       if (retval >= 0) {
600b57cec5SDimitry Andric         return 0;
610b57cec5SDimitry Andric       }
620b57cec5SDimitry Andric       int error = errno;
630b57cec5SDimitry Andric       if (abort_on_error) {
640b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
650b57cec5SDimitry Andric       }
660b57cec5SDimitry Andric       return error;
670b57cec5SDimitry Andric     }
680b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
690b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
70e8d8bef9SDimitry Andric                   "Illegal set affinity operation when not capable");
71e8d8bef9SDimitry Andric       long retval =
720b57cec5SDimitry Andric           hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
730b57cec5SDimitry Andric       if (retval >= 0) {
740b57cec5SDimitry Andric         return 0;
750b57cec5SDimitry Andric       }
760b57cec5SDimitry Andric       int error = errno;
770b57cec5SDimitry Andric       if (abort_on_error) {
780b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
790b57cec5SDimitry Andric       }
800b57cec5SDimitry Andric       return error;
810b57cec5SDimitry Andric     }
82e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS
83e8d8bef9SDimitry Andric     int set_process_affinity(bool abort_on_error) const override {
84e8d8bef9SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
85e8d8bef9SDimitry Andric                   "Illegal set process affinity operation when not capable");
86e8d8bef9SDimitry Andric       int error = 0;
87e8d8bef9SDimitry Andric       const hwloc_topology_support *support =
88e8d8bef9SDimitry Andric           hwloc_topology_get_support(__kmp_hwloc_topology);
89e8d8bef9SDimitry Andric       if (support->cpubind->set_proc_cpubind) {
90e8d8bef9SDimitry Andric         int retval;
91e8d8bef9SDimitry Andric         retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask,
92e8d8bef9SDimitry Andric                                    HWLOC_CPUBIND_PROCESS);
93e8d8bef9SDimitry Andric         if (retval >= 0)
94e8d8bef9SDimitry Andric           return 0;
95e8d8bef9SDimitry Andric         error = errno;
96e8d8bef9SDimitry Andric         if (abort_on_error)
97e8d8bef9SDimitry Andric           __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
98e8d8bef9SDimitry Andric       }
99e8d8bef9SDimitry Andric       return error;
100e8d8bef9SDimitry Andric     }
101e8d8bef9SDimitry Andric #endif
1020b57cec5SDimitry Andric     int get_proc_group() const override {
1030b57cec5SDimitry Andric       int group = -1;
1040b57cec5SDimitry Andric #if KMP_OS_WINDOWS
1050b57cec5SDimitry Andric       if (__kmp_num_proc_groups == 1) {
1060b57cec5SDimitry Andric         return 1;
1070b57cec5SDimitry Andric       }
1080b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; i++) {
1090b57cec5SDimitry Andric         // On windows, the long type is always 32 bits
1100b57cec5SDimitry Andric         unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
1110b57cec5SDimitry Andric         unsigned long second_32_bits =
1120b57cec5SDimitry Andric             hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
1130b57cec5SDimitry Andric         if (first_32_bits == 0 && second_32_bits == 0) {
1140b57cec5SDimitry Andric           continue;
1150b57cec5SDimitry Andric         }
1160b57cec5SDimitry Andric         if (group >= 0) {
1170b57cec5SDimitry Andric           return -1;
1180b57cec5SDimitry Andric         }
1190b57cec5SDimitry Andric         group = i;
1200b57cec5SDimitry Andric       }
1210b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
1220b57cec5SDimitry Andric       return group;
1230b57cec5SDimitry Andric     }
1240b57cec5SDimitry Andric   };
1250b57cec5SDimitry Andric   void determine_capable(const char *var) override {
1260b57cec5SDimitry Andric     const hwloc_topology_support *topology_support;
1270b57cec5SDimitry Andric     if (__kmp_hwloc_topology == NULL) {
1280b57cec5SDimitry Andric       if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
1290b57cec5SDimitry Andric         __kmp_hwloc_error = TRUE;
1300b57cec5SDimitry Andric         if (__kmp_affinity_verbose)
1310b57cec5SDimitry Andric           KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
1320b57cec5SDimitry Andric       }
1330b57cec5SDimitry Andric       if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
1340b57cec5SDimitry Andric         __kmp_hwloc_error = TRUE;
1350b57cec5SDimitry Andric         if (__kmp_affinity_verbose)
1360b57cec5SDimitry Andric           KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
1370b57cec5SDimitry Andric       }
1380b57cec5SDimitry Andric     }
1390b57cec5SDimitry Andric     topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
1400b57cec5SDimitry Andric     // Is the system capable of setting/getting this thread's affinity?
1410b57cec5SDimitry Andric     // Also, is topology discovery possible? (pu indicates ability to discover
1420b57cec5SDimitry Andric     // processing units). And finally, were there no errors when calling any
1430b57cec5SDimitry Andric     // hwloc_* API functions?
1440b57cec5SDimitry Andric     if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
1450b57cec5SDimitry Andric         topology_support->cpubind->get_thisthread_cpubind &&
1460b57cec5SDimitry Andric         topology_support->discovery->pu && !__kmp_hwloc_error) {
1470b57cec5SDimitry Andric       // enables affinity according to KMP_AFFINITY_CAPABLE() macro
1480b57cec5SDimitry Andric       KMP_AFFINITY_ENABLE(TRUE);
1490b57cec5SDimitry Andric     } else {
1500b57cec5SDimitry Andric       // indicate that hwloc didn't work and disable affinity
1510b57cec5SDimitry Andric       __kmp_hwloc_error = TRUE;
1520b57cec5SDimitry Andric       KMP_AFFINITY_DISABLE();
1530b57cec5SDimitry Andric     }
1540b57cec5SDimitry Andric   }
1550b57cec5SDimitry Andric   void bind_thread(int which) override {
1560b57cec5SDimitry Andric     KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
1570b57cec5SDimitry Andric                 "Illegal set affinity operation when not capable");
1580b57cec5SDimitry Andric     KMPAffinity::Mask *mask;
1590b57cec5SDimitry Andric     KMP_CPU_ALLOC_ON_STACK(mask);
1600b57cec5SDimitry Andric     KMP_CPU_ZERO(mask);
1610b57cec5SDimitry Andric     KMP_CPU_SET(which, mask);
1620b57cec5SDimitry Andric     __kmp_set_system_affinity(mask, TRUE);
1630b57cec5SDimitry Andric     KMP_CPU_FREE_FROM_STACK(mask);
1640b57cec5SDimitry Andric   }
1650b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
1660b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
1670b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
1680b57cec5SDimitry Andric     return new Mask[num];
1690b57cec5SDimitry Andric   }
1700b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
1710b57cec5SDimitry Andric     Mask *hwloc_array = static_cast<Mask *>(array);
1720b57cec5SDimitry Andric     delete[] hwloc_array;
1730b57cec5SDimitry Andric   }
1740b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
1750b57cec5SDimitry Andric                                       int index) override {
1760b57cec5SDimitry Andric     Mask *hwloc_array = static_cast<Mask *>(array);
1770b57cec5SDimitry Andric     return &(hwloc_array[index]);
1780b57cec5SDimitry Andric   }
1790b57cec5SDimitry Andric   api_type get_api_type() const override { return HWLOC; }
1800b57cec5SDimitry Andric };
1810b57cec5SDimitry Andric #endif /* KMP_USE_HWLOC */
1820b57cec5SDimitry Andric 
183489b1cf2SDimitry Andric #if KMP_OS_LINUX || KMP_OS_FREEBSD
1840b57cec5SDimitry Andric #if KMP_OS_LINUX
1850b57cec5SDimitry Andric /* On some of the older OS's that we build on, these constants aren't present
1860b57cec5SDimitry Andric    in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
1870b57cec5SDimitry Andric    all systems of the same arch where they are defined, and they cannot change.
1880b57cec5SDimitry Andric    stone forever. */
1890b57cec5SDimitry Andric #include <sys/syscall.h>
1900b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_ARM
1910b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
1920b57cec5SDimitry Andric #define __NR_sched_setaffinity 241
1930b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 241
1940b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
1950b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
1960b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
1970b57cec5SDimitry Andric #define __NR_sched_getaffinity 242
1980b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 242
1990b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2000b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2010b57cec5SDimitry Andric #elif KMP_ARCH_AARCH64
2020b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2030b57cec5SDimitry Andric #define __NR_sched_setaffinity 122
2040b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 122
2050b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2060b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2070b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2080b57cec5SDimitry Andric #define __NR_sched_getaffinity 123
2090b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 123
2100b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2110b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2120b57cec5SDimitry Andric #elif KMP_ARCH_X86_64
2130b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2140b57cec5SDimitry Andric #define __NR_sched_setaffinity 203
2150b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 203
2160b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2170b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2180b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2190b57cec5SDimitry Andric #define __NR_sched_getaffinity 204
2200b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 204
2210b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2220b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2230b57cec5SDimitry Andric #elif KMP_ARCH_PPC64
2240b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2250b57cec5SDimitry Andric #define __NR_sched_setaffinity 222
2260b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 222
2270b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2280b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2290b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2300b57cec5SDimitry Andric #define __NR_sched_getaffinity 223
2310b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 223
2320b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2330b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2340b57cec5SDimitry Andric #elif KMP_ARCH_MIPS
2350b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2360b57cec5SDimitry Andric #define __NR_sched_setaffinity 4239
2370b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 4239
2380b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2390b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2400b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2410b57cec5SDimitry Andric #define __NR_sched_getaffinity 4240
2420b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 4240
2430b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2440b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2450b57cec5SDimitry Andric #elif KMP_ARCH_MIPS64
2460b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2470b57cec5SDimitry Andric #define __NR_sched_setaffinity 5195
2480b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 5195
2490b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2500b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2510b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2520b57cec5SDimitry Andric #define __NR_sched_getaffinity 5196
2530b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 5196
2540b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2550b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2560b57cec5SDimitry Andric #error Unknown or unsupported architecture
2570b57cec5SDimitry Andric #endif /* KMP_ARCH_* */
258489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD
259489b1cf2SDimitry Andric #include <pthread.h>
260489b1cf2SDimitry Andric #include <pthread_np.h>
261489b1cf2SDimitry Andric #endif
2620b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity {
2630b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
264e8d8bef9SDimitry Andric     typedef unsigned long mask_t;
265e8d8bef9SDimitry Andric     typedef decltype(__kmp_affin_mask_size) mask_size_type;
266e8d8bef9SDimitry Andric     static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
267e8d8bef9SDimitry Andric     static const mask_t ONE = 1;
268e8d8bef9SDimitry Andric     mask_size_type get_num_mask_types() const {
269e8d8bef9SDimitry Andric       return __kmp_affin_mask_size / sizeof(mask_t);
270e8d8bef9SDimitry Andric     }
2710b57cec5SDimitry Andric 
2720b57cec5SDimitry Andric   public:
2730b57cec5SDimitry Andric     mask_t *mask;
2740b57cec5SDimitry Andric     Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
2750b57cec5SDimitry Andric     ~Mask() {
2760b57cec5SDimitry Andric       if (mask)
2770b57cec5SDimitry Andric         __kmp_free(mask);
2780b57cec5SDimitry Andric     }
2790b57cec5SDimitry Andric     void set(int i) override {
280e8d8bef9SDimitry Andric       mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T));
2810b57cec5SDimitry Andric     }
2820b57cec5SDimitry Andric     bool is_set(int i) const override {
283e8d8bef9SDimitry Andric       return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T)));
2840b57cec5SDimitry Andric     }
2850b57cec5SDimitry Andric     void clear(int i) override {
286e8d8bef9SDimitry Andric       mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T));
2870b57cec5SDimitry Andric     }
2880b57cec5SDimitry Andric     void zero() override {
289e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
290e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
291e8d8bef9SDimitry Andric         mask[i] = (mask_t)0;
2920b57cec5SDimitry Andric     }
2930b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
2940b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
295e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
296e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
2970b57cec5SDimitry Andric         mask[i] = convert->mask[i];
2980b57cec5SDimitry Andric     }
2990b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
3000b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
301e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
302e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
3030b57cec5SDimitry Andric         mask[i] &= convert->mask[i];
3040b57cec5SDimitry Andric     }
3050b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
3060b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
307e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
308e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
3090b57cec5SDimitry Andric         mask[i] |= convert->mask[i];
3100b57cec5SDimitry Andric     }
3110b57cec5SDimitry Andric     void bitwise_not() override {
312e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
313e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
3140b57cec5SDimitry Andric         mask[i] = ~(mask[i]);
3150b57cec5SDimitry Andric     }
3160b57cec5SDimitry Andric     int begin() const override {
3170b57cec5SDimitry Andric       int retval = 0;
3180b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
3190b57cec5SDimitry Andric         ++retval;
3200b57cec5SDimitry Andric       return retval;
3210b57cec5SDimitry Andric     }
322e8d8bef9SDimitry Andric     int end() const override {
323e8d8bef9SDimitry Andric       int e;
324e8d8bef9SDimitry Andric       __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e);
325e8d8bef9SDimitry Andric       return e;
326e8d8bef9SDimitry Andric     }
3270b57cec5SDimitry Andric     int next(int previous) const override {
3280b57cec5SDimitry Andric       int retval = previous + 1;
3290b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
3300b57cec5SDimitry Andric         ++retval;
3310b57cec5SDimitry Andric       return retval;
3320b57cec5SDimitry Andric     }
3330b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
3340b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
3350b57cec5SDimitry Andric                   "Illegal get affinity operation when not capable");
336489b1cf2SDimitry Andric #if KMP_OS_LINUX
337e8d8bef9SDimitry Andric       long retval =
3380b57cec5SDimitry Andric           syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
339489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD
340*fe6060f1SDimitry Andric       int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size,
341*fe6060f1SDimitry Andric                                      reinterpret_cast<cpuset_t *>(mask));
3425ffd83dbSDimitry Andric       int retval = (r == 0 ? 0 : -1);
343489b1cf2SDimitry Andric #endif
3440b57cec5SDimitry Andric       if (retval >= 0) {
3450b57cec5SDimitry Andric         return 0;
3460b57cec5SDimitry Andric       }
3470b57cec5SDimitry Andric       int error = errno;
3480b57cec5SDimitry Andric       if (abort_on_error) {
3490b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
3500b57cec5SDimitry Andric       }
3510b57cec5SDimitry Andric       return error;
3520b57cec5SDimitry Andric     }
3530b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
3540b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
355e8d8bef9SDimitry Andric                   "Illegal set affinity operation when not capable");
356489b1cf2SDimitry Andric #if KMP_OS_LINUX
357e8d8bef9SDimitry Andric       long retval =
3580b57cec5SDimitry Andric           syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
359489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD
360*fe6060f1SDimitry Andric       int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size,
361*fe6060f1SDimitry Andric                                      reinterpret_cast<cpuset_t *>(mask));
3625ffd83dbSDimitry Andric       int retval = (r == 0 ? 0 : -1);
363489b1cf2SDimitry Andric #endif
3640b57cec5SDimitry Andric       if (retval >= 0) {
3650b57cec5SDimitry Andric         return 0;
3660b57cec5SDimitry Andric       }
3670b57cec5SDimitry Andric       int error = errno;
3680b57cec5SDimitry Andric       if (abort_on_error) {
3690b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
3700b57cec5SDimitry Andric       }
3710b57cec5SDimitry Andric       return error;
3720b57cec5SDimitry Andric     }
3730b57cec5SDimitry Andric   };
3740b57cec5SDimitry Andric   void determine_capable(const char *env_var) override {
3750b57cec5SDimitry Andric     __kmp_affinity_determine_capable(env_var);
3760b57cec5SDimitry Andric   }
3770b57cec5SDimitry Andric   void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
3780b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override {
3790b57cec5SDimitry Andric     KMPNativeAffinity::Mask *retval = new Mask();
3800b57cec5SDimitry Andric     return retval;
3810b57cec5SDimitry Andric   }
3820b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override {
3830b57cec5SDimitry Andric     KMPNativeAffinity::Mask *native_mask =
3840b57cec5SDimitry Andric         static_cast<KMPNativeAffinity::Mask *>(m);
3850b57cec5SDimitry Andric     delete native_mask;
3860b57cec5SDimitry Andric   }
3870b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
3880b57cec5SDimitry Andric     return new Mask[num];
3890b57cec5SDimitry Andric   }
3900b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
3910b57cec5SDimitry Andric     Mask *linux_array = static_cast<Mask *>(array);
3920b57cec5SDimitry Andric     delete[] linux_array;
3930b57cec5SDimitry Andric   }
3940b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
3950b57cec5SDimitry Andric                                       int index) override {
3960b57cec5SDimitry Andric     Mask *linux_array = static_cast<Mask *>(array);
3970b57cec5SDimitry Andric     return &(linux_array[index]);
3980b57cec5SDimitry Andric   }
3990b57cec5SDimitry Andric   api_type get_api_type() const override { return NATIVE_OS; }
4000b57cec5SDimitry Andric };
401489b1cf2SDimitry Andric #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
4020b57cec5SDimitry Andric 
4030b57cec5SDimitry Andric #if KMP_OS_WINDOWS
4040b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity {
4050b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
4060b57cec5SDimitry Andric     typedef ULONG_PTR mask_t;
4070b57cec5SDimitry Andric     static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
4080b57cec5SDimitry Andric     mask_t *mask;
4090b57cec5SDimitry Andric 
4100b57cec5SDimitry Andric   public:
4110b57cec5SDimitry Andric     Mask() {
4120b57cec5SDimitry Andric       mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
4130b57cec5SDimitry Andric     }
4140b57cec5SDimitry Andric     ~Mask() {
4150b57cec5SDimitry Andric       if (mask)
4160b57cec5SDimitry Andric         __kmp_free(mask);
4170b57cec5SDimitry Andric     }
4180b57cec5SDimitry Andric     void set(int i) override {
4190b57cec5SDimitry Andric       mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
4200b57cec5SDimitry Andric     }
4210b57cec5SDimitry Andric     bool is_set(int i) const override {
4220b57cec5SDimitry Andric       return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
4230b57cec5SDimitry Andric     }
4240b57cec5SDimitry Andric     void clear(int i) override {
4250b57cec5SDimitry Andric       mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
4260b57cec5SDimitry Andric     }
4270b57cec5SDimitry Andric     void zero() override {
4280b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4290b57cec5SDimitry Andric         mask[i] = 0;
4300b57cec5SDimitry Andric     }
4310b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
4320b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
4330b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4340b57cec5SDimitry Andric         mask[i] = convert->mask[i];
4350b57cec5SDimitry Andric     }
4360b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
4370b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
4380b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4390b57cec5SDimitry Andric         mask[i] &= convert->mask[i];
4400b57cec5SDimitry Andric     }
4410b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
4420b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
4430b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4440b57cec5SDimitry Andric         mask[i] |= convert->mask[i];
4450b57cec5SDimitry Andric     }
4460b57cec5SDimitry Andric     void bitwise_not() override {
4470b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4480b57cec5SDimitry Andric         mask[i] = ~(mask[i]);
4490b57cec5SDimitry Andric     }
4500b57cec5SDimitry Andric     int begin() const override {
4510b57cec5SDimitry Andric       int retval = 0;
4520b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
4530b57cec5SDimitry Andric         ++retval;
4540b57cec5SDimitry Andric       return retval;
4550b57cec5SDimitry Andric     }
4560b57cec5SDimitry Andric     int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
4570b57cec5SDimitry Andric     int next(int previous) const override {
4580b57cec5SDimitry Andric       int retval = previous + 1;
4590b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
4600b57cec5SDimitry Andric         ++retval;
4610b57cec5SDimitry Andric       return retval;
4620b57cec5SDimitry Andric     }
463e8d8bef9SDimitry Andric     int set_process_affinity(bool abort_on_error) const override {
464e8d8bef9SDimitry Andric       if (__kmp_num_proc_groups <= 1) {
465e8d8bef9SDimitry Andric         if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) {
466e8d8bef9SDimitry Andric           DWORD error = GetLastError();
467e8d8bef9SDimitry Andric           if (abort_on_error) {
468e8d8bef9SDimitry Andric             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
469e8d8bef9SDimitry Andric                         __kmp_msg_null);
470e8d8bef9SDimitry Andric           }
471e8d8bef9SDimitry Andric           return error;
472e8d8bef9SDimitry Andric         }
473e8d8bef9SDimitry Andric       }
474e8d8bef9SDimitry Andric       return 0;
475e8d8bef9SDimitry Andric     }
4760b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
4770b57cec5SDimitry Andric       if (__kmp_num_proc_groups > 1) {
4780b57cec5SDimitry Andric         // Check for a valid mask.
4790b57cec5SDimitry Andric         GROUP_AFFINITY ga;
4800b57cec5SDimitry Andric         int group = get_proc_group();
4810b57cec5SDimitry Andric         if (group < 0) {
4820b57cec5SDimitry Andric           if (abort_on_error) {
4830b57cec5SDimitry Andric             KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4840b57cec5SDimitry Andric           }
4850b57cec5SDimitry Andric           return -1;
4860b57cec5SDimitry Andric         }
4870b57cec5SDimitry Andric         // Transform the bit vector into a GROUP_AFFINITY struct
4880b57cec5SDimitry Andric         // and make the system call to set affinity.
4890b57cec5SDimitry Andric         ga.Group = group;
4900b57cec5SDimitry Andric         ga.Mask = mask[group];
4910b57cec5SDimitry Andric         ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
4920b57cec5SDimitry Andric 
4930b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
4940b57cec5SDimitry Andric         if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
4950b57cec5SDimitry Andric           DWORD error = GetLastError();
4960b57cec5SDimitry Andric           if (abort_on_error) {
4970b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
4980b57cec5SDimitry Andric                         __kmp_msg_null);
4990b57cec5SDimitry Andric           }
5000b57cec5SDimitry Andric           return error;
5010b57cec5SDimitry Andric         }
5020b57cec5SDimitry Andric       } else {
5030b57cec5SDimitry Andric         if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
5040b57cec5SDimitry Andric           DWORD error = GetLastError();
5050b57cec5SDimitry Andric           if (abort_on_error) {
5060b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
5070b57cec5SDimitry Andric                         __kmp_msg_null);
5080b57cec5SDimitry Andric           }
5090b57cec5SDimitry Andric           return error;
5100b57cec5SDimitry Andric         }
5110b57cec5SDimitry Andric       }
5120b57cec5SDimitry Andric       return 0;
5130b57cec5SDimitry Andric     }
5140b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
5150b57cec5SDimitry Andric       if (__kmp_num_proc_groups > 1) {
5160b57cec5SDimitry Andric         this->zero();
5170b57cec5SDimitry Andric         GROUP_AFFINITY ga;
5180b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
5190b57cec5SDimitry Andric         if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
5200b57cec5SDimitry Andric           DWORD error = GetLastError();
5210b57cec5SDimitry Andric           if (abort_on_error) {
5220b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
5230b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
5240b57cec5SDimitry Andric           }
5250b57cec5SDimitry Andric           return error;
5260b57cec5SDimitry Andric         }
5270b57cec5SDimitry Andric         if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
5280b57cec5SDimitry Andric             (ga.Mask == 0)) {
5290b57cec5SDimitry Andric           return -1;
5300b57cec5SDimitry Andric         }
5310b57cec5SDimitry Andric         mask[ga.Group] = ga.Mask;
5320b57cec5SDimitry Andric       } else {
5330b57cec5SDimitry Andric         mask_t newMask, sysMask, retval;
5340b57cec5SDimitry Andric         if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
5350b57cec5SDimitry Andric           DWORD error = GetLastError();
5360b57cec5SDimitry Andric           if (abort_on_error) {
5370b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
5380b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
5390b57cec5SDimitry Andric           }
5400b57cec5SDimitry Andric           return error;
5410b57cec5SDimitry Andric         }
5420b57cec5SDimitry Andric         retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
5430b57cec5SDimitry Andric         if (!retval) {
5440b57cec5SDimitry Andric           DWORD error = GetLastError();
5450b57cec5SDimitry Andric           if (abort_on_error) {
5460b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
5470b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
5480b57cec5SDimitry Andric           }
5490b57cec5SDimitry Andric           return error;
5500b57cec5SDimitry Andric         }
5510b57cec5SDimitry Andric         newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
5520b57cec5SDimitry Andric         if (!newMask) {
5530b57cec5SDimitry Andric           DWORD error = GetLastError();
5540b57cec5SDimitry Andric           if (abort_on_error) {
5550b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
5560b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
5570b57cec5SDimitry Andric           }
5580b57cec5SDimitry Andric         }
5590b57cec5SDimitry Andric         *mask = retval;
5600b57cec5SDimitry Andric       }
5610b57cec5SDimitry Andric       return 0;
5620b57cec5SDimitry Andric     }
5630b57cec5SDimitry Andric     int get_proc_group() const override {
5640b57cec5SDimitry Andric       int group = -1;
5650b57cec5SDimitry Andric       if (__kmp_num_proc_groups == 1) {
5660b57cec5SDimitry Andric         return 1;
5670b57cec5SDimitry Andric       }
5680b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; i++) {
5690b57cec5SDimitry Andric         if (mask[i] == 0)
5700b57cec5SDimitry Andric           continue;
5710b57cec5SDimitry Andric         if (group >= 0)
5720b57cec5SDimitry Andric           return -1;
5730b57cec5SDimitry Andric         group = i;
5740b57cec5SDimitry Andric       }
5750b57cec5SDimitry Andric       return group;
5760b57cec5SDimitry Andric     }
5770b57cec5SDimitry Andric   };
5780b57cec5SDimitry Andric   void determine_capable(const char *env_var) override {
5790b57cec5SDimitry Andric     __kmp_affinity_determine_capable(env_var);
5800b57cec5SDimitry Andric   }
5810b57cec5SDimitry Andric   void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
5820b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
5830b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
5840b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
5850b57cec5SDimitry Andric     return new Mask[num];
5860b57cec5SDimitry Andric   }
5870b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
5880b57cec5SDimitry Andric     Mask *windows_array = static_cast<Mask *>(array);
5890b57cec5SDimitry Andric     delete[] windows_array;
5900b57cec5SDimitry Andric   }
5910b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
5920b57cec5SDimitry Andric                                       int index) override {
5930b57cec5SDimitry Andric     Mask *windows_array = static_cast<Mask *>(array);
5940b57cec5SDimitry Andric     return &(windows_array[index]);
5950b57cec5SDimitry Andric   }
5960b57cec5SDimitry Andric   api_type get_api_type() const override { return NATIVE_OS; }
5970b57cec5SDimitry Andric };
5980b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
5990b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */
6000b57cec5SDimitry Andric 
601*fe6060f1SDimitry Andric class kmp_hw_thread_t {
6020b57cec5SDimitry Andric public:
603*fe6060f1SDimitry Andric   static const int UNKNOWN_ID = -1;
604*fe6060f1SDimitry Andric   static int compare_ids(const void *a, const void *b);
605*fe6060f1SDimitry Andric   static int compare_compact(const void *a, const void *b);
606*fe6060f1SDimitry Andric   int ids[KMP_HW_LAST];
607*fe6060f1SDimitry Andric   int sub_ids[KMP_HW_LAST];
608*fe6060f1SDimitry Andric   bool leader;
609*fe6060f1SDimitry Andric   int os_id;
610*fe6060f1SDimitry Andric   void print() const;
611*fe6060f1SDimitry Andric   void clear() {
612*fe6060f1SDimitry Andric     for (int i = 0; i < (int)KMP_HW_LAST; ++i)
613*fe6060f1SDimitry Andric       ids[i] = UNKNOWN_ID;
614*fe6060f1SDimitry Andric     leader = false;
6150b57cec5SDimitry Andric   }
6160b57cec5SDimitry Andric };
6170b57cec5SDimitry Andric 
618*fe6060f1SDimitry Andric class kmp_topology_t {
619*fe6060f1SDimitry Andric 
620*fe6060f1SDimitry Andric   struct flags_t {
621*fe6060f1SDimitry Andric     int uniform : 1;
622*fe6060f1SDimitry Andric     int reserved : 31;
6230b57cec5SDimitry Andric   };
6240b57cec5SDimitry Andric 
625*fe6060f1SDimitry Andric   int depth;
626*fe6060f1SDimitry Andric 
627*fe6060f1SDimitry Andric   // The following arrays are all 'depth' long
628*fe6060f1SDimitry Andric 
629*fe6060f1SDimitry Andric   // Orderd array of the types in the topology
630*fe6060f1SDimitry Andric   kmp_hw_t *types;
631*fe6060f1SDimitry Andric 
632*fe6060f1SDimitry Andric   // Keep quick topology ratios, for non-uniform topologies,
633*fe6060f1SDimitry Andric   // this ratio holds the max number of itemAs per itemB
634*fe6060f1SDimitry Andric   // e.g., [ 4 packages | 6 cores / package | 2 threads / core ]
635*fe6060f1SDimitry Andric   int *ratio;
636*fe6060f1SDimitry Andric 
637*fe6060f1SDimitry Andric   // Storage containing the absolute number of each topology layer
638*fe6060f1SDimitry Andric   int *count;
639*fe6060f1SDimitry Andric 
640*fe6060f1SDimitry Andric   // The hardware threads array
641*fe6060f1SDimitry Andric   // hw_threads is num_hw_threads long
642*fe6060f1SDimitry Andric   // Each hw_thread's ids and sub_ids are depth deep
643*fe6060f1SDimitry Andric   int num_hw_threads;
644*fe6060f1SDimitry Andric   kmp_hw_thread_t *hw_threads;
645*fe6060f1SDimitry Andric 
646*fe6060f1SDimitry Andric   // Equivalence hash where the key is the hardware topology item
647*fe6060f1SDimitry Andric   // and the value is the equivalent hardware topology type in the
648*fe6060f1SDimitry Andric   // types[] array, if the value is KMP_HW_UNKNOWN, then there is no
649*fe6060f1SDimitry Andric   // known equivalence for the topology type
650*fe6060f1SDimitry Andric   kmp_hw_t equivalent[KMP_HW_LAST];
651*fe6060f1SDimitry Andric 
652*fe6060f1SDimitry Andric   // Flags describing the topology
653*fe6060f1SDimitry Andric   flags_t flags;
654*fe6060f1SDimitry Andric 
655*fe6060f1SDimitry Andric   // Count each item & get the num x's per y
656*fe6060f1SDimitry Andric   // e.g., get the number of cores and the number of threads per core
657*fe6060f1SDimitry Andric   // for each (x, y) in (KMP_HW_* , KMP_HW_*)
658*fe6060f1SDimitry Andric   void _gather_enumeration_information();
659*fe6060f1SDimitry Andric 
660*fe6060f1SDimitry Andric   // Remove layers that don't add information to the topology.
661*fe6060f1SDimitry Andric   // This is done by having the layer take on the id = UNKNOWN_ID (-1)
662*fe6060f1SDimitry Andric   void _remove_radix1_layers();
663*fe6060f1SDimitry Andric 
664*fe6060f1SDimitry Andric   // Find out if the topology is uniform
665*fe6060f1SDimitry Andric   void _discover_uniformity();
666*fe6060f1SDimitry Andric 
667*fe6060f1SDimitry Andric   // Set all the sub_ids for each hardware thread
668*fe6060f1SDimitry Andric   void _set_sub_ids();
669*fe6060f1SDimitry Andric 
670*fe6060f1SDimitry Andric   // Set global affinity variables describing the number of threads per
671*fe6060f1SDimitry Andric   // core, the number of packages, the number of cores per package, and
672*fe6060f1SDimitry Andric   // the number of cores.
673*fe6060f1SDimitry Andric   void _set_globals();
674*fe6060f1SDimitry Andric 
675*fe6060f1SDimitry Andric   // Set the last level cache equivalent type
676*fe6060f1SDimitry Andric   void _set_last_level_cache();
677*fe6060f1SDimitry Andric 
678*fe6060f1SDimitry Andric public:
679*fe6060f1SDimitry Andric   // Force use of allocate()/deallocate()
680*fe6060f1SDimitry Andric   kmp_topology_t() = delete;
681*fe6060f1SDimitry Andric   kmp_topology_t(const kmp_topology_t &t) = delete;
682*fe6060f1SDimitry Andric   kmp_topology_t(kmp_topology_t &&t) = delete;
683*fe6060f1SDimitry Andric   kmp_topology_t &operator=(const kmp_topology_t &t) = delete;
684*fe6060f1SDimitry Andric   kmp_topology_t &operator=(kmp_topology_t &&t) = delete;
685*fe6060f1SDimitry Andric 
686*fe6060f1SDimitry Andric   static kmp_topology_t *allocate(int nproc, int ndepth, const kmp_hw_t *types);
687*fe6060f1SDimitry Andric   static void deallocate(kmp_topology_t *);
688*fe6060f1SDimitry Andric 
689*fe6060f1SDimitry Andric   // Functions used in create_map() routines
690*fe6060f1SDimitry Andric   kmp_hw_thread_t &at(int index) {
691*fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
692*fe6060f1SDimitry Andric     return hw_threads[index];
693*fe6060f1SDimitry Andric   }
694*fe6060f1SDimitry Andric   const kmp_hw_thread_t &at(int index) const {
695*fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
696*fe6060f1SDimitry Andric     return hw_threads[index];
697*fe6060f1SDimitry Andric   }
698*fe6060f1SDimitry Andric   int get_num_hw_threads() const { return num_hw_threads; }
699*fe6060f1SDimitry Andric   void sort_ids() {
700*fe6060f1SDimitry Andric     qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
701*fe6060f1SDimitry Andric           kmp_hw_thread_t::compare_ids);
702*fe6060f1SDimitry Andric   }
703*fe6060f1SDimitry Andric   // Check if the hardware ids are unique, if they are
704*fe6060f1SDimitry Andric   // return true, otherwise return false
705*fe6060f1SDimitry Andric   bool check_ids() const;
706*fe6060f1SDimitry Andric 
707*fe6060f1SDimitry Andric   // Function to call after the create_map() routine
708*fe6060f1SDimitry Andric   void canonicalize();
709*fe6060f1SDimitry Andric   void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores);
710*fe6060f1SDimitry Andric 
711*fe6060f1SDimitry Andric   // Functions used after canonicalize() called
712*fe6060f1SDimitry Andric   bool filter_hw_subset();
713*fe6060f1SDimitry Andric   bool is_close(int hwt1, int hwt2, int level) const;
714*fe6060f1SDimitry Andric   bool is_uniform() const { return flags.uniform; }
715*fe6060f1SDimitry Andric   // Tell whether a type is a valid type in the topology
716*fe6060f1SDimitry Andric   // returns KMP_HW_UNKNOWN when there is no equivalent type
717*fe6060f1SDimitry Andric   kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; }
718*fe6060f1SDimitry Andric   // Set type1 = type2
719*fe6060f1SDimitry Andric   void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
720*fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);
721*fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2);
722*fe6060f1SDimitry Andric     kmp_hw_t real_type2 = equivalent[type2];
723*fe6060f1SDimitry Andric     if (real_type2 == KMP_HW_UNKNOWN)
724*fe6060f1SDimitry Andric       real_type2 = type2;
725*fe6060f1SDimitry Andric     equivalent[type1] = real_type2;
726*fe6060f1SDimitry Andric     // This loop is required since any of the types may have been set to
727*fe6060f1SDimitry Andric     // be equivalent to type1.  They all must be checked and reset to type2.
728*fe6060f1SDimitry Andric     KMP_FOREACH_HW_TYPE(type) {
729*fe6060f1SDimitry Andric       if (equivalent[type] == type1) {
730*fe6060f1SDimitry Andric         equivalent[type] = real_type2;
731*fe6060f1SDimitry Andric       }
732*fe6060f1SDimitry Andric     }
733*fe6060f1SDimitry Andric   }
734*fe6060f1SDimitry Andric   // Calculate number of types corresponding to level1
735*fe6060f1SDimitry Andric   // per types corresponding to level2 (e.g., number of threads per core)
736*fe6060f1SDimitry Andric   int calculate_ratio(int level1, int level2) const {
737*fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth);
738*fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth);
739*fe6060f1SDimitry Andric     int r = 1;
740*fe6060f1SDimitry Andric     for (int level = level1; level > level2; --level)
741*fe6060f1SDimitry Andric       r *= ratio[level];
742*fe6060f1SDimitry Andric     return r;
743*fe6060f1SDimitry Andric   }
744*fe6060f1SDimitry Andric   int get_ratio(int level) const {
745*fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level >= 0 && level < depth);
746*fe6060f1SDimitry Andric     return ratio[level];
747*fe6060f1SDimitry Andric   }
748*fe6060f1SDimitry Andric   int get_depth() const { return depth; };
749*fe6060f1SDimitry Andric   kmp_hw_t get_type(int level) const {
750*fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level >= 0 && level < depth);
751*fe6060f1SDimitry Andric     return types[level];
752*fe6060f1SDimitry Andric   }
753*fe6060f1SDimitry Andric   int get_level(kmp_hw_t type) const {
754*fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT_VALID_HW_TYPE(type);
755*fe6060f1SDimitry Andric     int eq_type = equivalent[type];
756*fe6060f1SDimitry Andric     if (eq_type == KMP_HW_UNKNOWN)
7570b57cec5SDimitry Andric       return -1;
758*fe6060f1SDimitry Andric     for (int i = 0; i < depth; ++i)
759*fe6060f1SDimitry Andric       if (types[i] == eq_type)
760*fe6060f1SDimitry Andric         return i;
761*fe6060f1SDimitry Andric     return -1;
7620b57cec5SDimitry Andric   }
763*fe6060f1SDimitry Andric   int get_count(int level) const {
764*fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level >= 0 && level < depth);
765*fe6060f1SDimitry Andric     return count[level];
7660b57cec5SDimitry Andric   }
767*fe6060f1SDimitry Andric #if KMP_AFFINITY_SUPPORTED
768*fe6060f1SDimitry Andric   void sort_compact() {
769*fe6060f1SDimitry Andric     qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
770*fe6060f1SDimitry Andric           kmp_hw_thread_t::compare_compact);
771*fe6060f1SDimitry Andric   }
772*fe6060f1SDimitry Andric #endif
773*fe6060f1SDimitry Andric   void print(const char *env_var = "KMP_AFFINITY") const;
774*fe6060f1SDimitry Andric   void dump() const;
775*fe6060f1SDimitry Andric };
776*fe6060f1SDimitry Andric 
777*fe6060f1SDimitry Andric class kmp_hw_subset_t {
778*fe6060f1SDimitry Andric public:
779*fe6060f1SDimitry Andric   struct item_t {
780*fe6060f1SDimitry Andric     int num;
781*fe6060f1SDimitry Andric     kmp_hw_t type;
782*fe6060f1SDimitry Andric     int offset;
783*fe6060f1SDimitry Andric   };
784*fe6060f1SDimitry Andric 
785*fe6060f1SDimitry Andric private:
786*fe6060f1SDimitry Andric   int depth;
787*fe6060f1SDimitry Andric   int capacity;
788*fe6060f1SDimitry Andric   item_t *items;
789*fe6060f1SDimitry Andric   kmp_uint64 set;
790*fe6060f1SDimitry Andric   bool absolute;
791*fe6060f1SDimitry Andric   // The set must be able to handle up to KMP_HW_LAST number of layers
792*fe6060f1SDimitry Andric   KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST);
793*fe6060f1SDimitry Andric 
794*fe6060f1SDimitry Andric public:
795*fe6060f1SDimitry Andric   // Force use of allocate()/deallocate()
796*fe6060f1SDimitry Andric   kmp_hw_subset_t() = delete;
797*fe6060f1SDimitry Andric   kmp_hw_subset_t(const kmp_hw_subset_t &t) = delete;
798*fe6060f1SDimitry Andric   kmp_hw_subset_t(kmp_hw_subset_t &&t) = delete;
799*fe6060f1SDimitry Andric   kmp_hw_subset_t &operator=(const kmp_hw_subset_t &t) = delete;
800*fe6060f1SDimitry Andric   kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) = delete;
801*fe6060f1SDimitry Andric 
802*fe6060f1SDimitry Andric   static kmp_hw_subset_t *allocate() {
803*fe6060f1SDimitry Andric     int initial_capacity = 5;
804*fe6060f1SDimitry Andric     kmp_hw_subset_t *retval =
805*fe6060f1SDimitry Andric         (kmp_hw_subset_t *)__kmp_allocate(sizeof(kmp_hw_subset_t));
806*fe6060f1SDimitry Andric     retval->depth = 0;
807*fe6060f1SDimitry Andric     retval->capacity = initial_capacity;
808*fe6060f1SDimitry Andric     retval->set = 0ull;
809*fe6060f1SDimitry Andric     retval->absolute = false;
810*fe6060f1SDimitry Andric     retval->items = (item_t *)__kmp_allocate(sizeof(item_t) * initial_capacity);
811*fe6060f1SDimitry Andric     return retval;
812*fe6060f1SDimitry Andric   }
813*fe6060f1SDimitry Andric   static void deallocate(kmp_hw_subset_t *subset) {
814*fe6060f1SDimitry Andric     __kmp_free(subset->items);
815*fe6060f1SDimitry Andric     __kmp_free(subset);
816*fe6060f1SDimitry Andric   }
817*fe6060f1SDimitry Andric   void set_absolute() { absolute = true; }
818*fe6060f1SDimitry Andric   bool is_absolute() const { return absolute; }
819*fe6060f1SDimitry Andric   void push_back(int num, kmp_hw_t type, int offset) {
820*fe6060f1SDimitry Andric     if (depth == capacity - 1) {
821*fe6060f1SDimitry Andric       capacity *= 2;
822*fe6060f1SDimitry Andric       item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity);
823*fe6060f1SDimitry Andric       for (int i = 0; i < depth; ++i)
824*fe6060f1SDimitry Andric         new_items[i] = items[i];
825*fe6060f1SDimitry Andric       __kmp_free(items);
826*fe6060f1SDimitry Andric       items = new_items;
827*fe6060f1SDimitry Andric     }
828*fe6060f1SDimitry Andric     items[depth].num = num;
829*fe6060f1SDimitry Andric     items[depth].type = type;
830*fe6060f1SDimitry Andric     items[depth].offset = offset;
831*fe6060f1SDimitry Andric     depth++;
832*fe6060f1SDimitry Andric     set |= (1ull << type);
833*fe6060f1SDimitry Andric   }
834*fe6060f1SDimitry Andric   int get_depth() const { return depth; }
835*fe6060f1SDimitry Andric   const item_t &at(int index) const {
836*fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0 && index < depth);
837*fe6060f1SDimitry Andric     return items[index];
838*fe6060f1SDimitry Andric   }
839*fe6060f1SDimitry Andric   item_t &at(int index) {
840*fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0 && index < depth);
841*fe6060f1SDimitry Andric     return items[index];
842*fe6060f1SDimitry Andric   }
843*fe6060f1SDimitry Andric   void remove(int index) {
844*fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0 && index < depth);
845*fe6060f1SDimitry Andric     set &= ~(1ull << items[index].type);
846*fe6060f1SDimitry Andric     for (int j = index + 1; j < depth; ++j) {
847*fe6060f1SDimitry Andric       items[j - 1] = items[j];
848*fe6060f1SDimitry Andric     }
849*fe6060f1SDimitry Andric     depth--;
850*fe6060f1SDimitry Andric   }
851*fe6060f1SDimitry Andric   bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); }
852*fe6060f1SDimitry Andric   void dump() const {
853*fe6060f1SDimitry Andric     printf("**********************\n");
854*fe6060f1SDimitry Andric     printf("*** kmp_hw_subset: ***\n");
855*fe6060f1SDimitry Andric     printf("* depth: %d\n", depth);
856*fe6060f1SDimitry Andric     printf("* items:\n");
857*fe6060f1SDimitry Andric     for (int i = 0; i < depth; ++i) {
858*fe6060f1SDimitry Andric       printf("num: %d, type: %s, offset: %d\n", items[i].num,
859*fe6060f1SDimitry Andric              __kmp_hw_get_keyword(items[i].type), items[i].offset);
860*fe6060f1SDimitry Andric     }
861*fe6060f1SDimitry Andric     printf("* set: 0x%llx\n", set);
862*fe6060f1SDimitry Andric     printf("* absolute: %d\n", absolute);
863*fe6060f1SDimitry Andric     printf("**********************\n");
864*fe6060f1SDimitry Andric   }
865*fe6060f1SDimitry Andric };
866*fe6060f1SDimitry Andric 
867*fe6060f1SDimitry Andric extern kmp_topology_t *__kmp_topology;
868*fe6060f1SDimitry Andric extern kmp_hw_subset_t *__kmp_hw_subset;
8690b57cec5SDimitry Andric 
8700b57cec5SDimitry Andric /* A structure for holding machine-specific hierarchy info to be computed once
8710b57cec5SDimitry Andric    at init. This structure represents a mapping of threads to the actual machine
8720b57cec5SDimitry Andric    hierarchy, or to our best guess at what the hierarchy might be, for the
8730b57cec5SDimitry Andric    purpose of performing an efficient barrier. In the worst case, when there is
8740b57cec5SDimitry Andric    no machine hierarchy information, it produces a tree suitable for a barrier,
8750b57cec5SDimitry Andric    similar to the tree used in the hyper barrier. */
8760b57cec5SDimitry Andric class hierarchy_info {
8770b57cec5SDimitry Andric public:
8780b57cec5SDimitry Andric   /* Good default values for number of leaves and branching factor, given no
8790b57cec5SDimitry Andric      affinity information. Behaves a bit like hyper barrier. */
8800b57cec5SDimitry Andric   static const kmp_uint32 maxLeaves = 4;
8810b57cec5SDimitry Andric   static const kmp_uint32 minBranch = 4;
8820b57cec5SDimitry Andric   /** Number of levels in the hierarchy. Typical levels are threads/core,
8830b57cec5SDimitry Andric       cores/package or socket, packages/node, nodes/machine, etc. We don't want
8840b57cec5SDimitry Andric       to get specific with nomenclature. When the machine is oversubscribed we
8850b57cec5SDimitry Andric       add levels to duplicate the hierarchy, doubling the thread capacity of the
8860b57cec5SDimitry Andric       hierarchy each time we add a level. */
8870b57cec5SDimitry Andric   kmp_uint32 maxLevels;
8880b57cec5SDimitry Andric 
8890b57cec5SDimitry Andric   /** This is specifically the depth of the machine configuration hierarchy, in
8900b57cec5SDimitry Andric       terms of the number of levels along the longest path from root to any
8910b57cec5SDimitry Andric       leaf. It corresponds to the number of entries in numPerLevel if we exclude
8920b57cec5SDimitry Andric       all but one trailing 1. */
8930b57cec5SDimitry Andric   kmp_uint32 depth;
8940b57cec5SDimitry Andric   kmp_uint32 base_num_threads;
8950b57cec5SDimitry Andric   enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
8960b57cec5SDimitry Andric   volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
8970b57cec5SDimitry Andric   // 2=initialization in progress
8980b57cec5SDimitry Andric   volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
8990b57cec5SDimitry Andric 
9000b57cec5SDimitry Andric   /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
9010b57cec5SDimitry Andric       the parent of a node at level i has. For example, if we have a machine
9020b57cec5SDimitry Andric       with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
9030b57cec5SDimitry Andric       {2, 4, 4, 1, 1}. All empty levels are set to 1. */
9040b57cec5SDimitry Andric   kmp_uint32 *numPerLevel;
9050b57cec5SDimitry Andric   kmp_uint32 *skipPerLevel;
9060b57cec5SDimitry Andric 
907*fe6060f1SDimitry Andric   void deriveLevels() {
908*fe6060f1SDimitry Andric     int hier_depth = __kmp_topology->get_depth();
909*fe6060f1SDimitry Andric     for (int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) {
910*fe6060f1SDimitry Andric       numPerLevel[level] = __kmp_topology->get_ratio(i);
9110b57cec5SDimitry Andric     }
9120b57cec5SDimitry Andric   }
9130b57cec5SDimitry Andric 
9140b57cec5SDimitry Andric   hierarchy_info()
9150b57cec5SDimitry Andric       : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
9160b57cec5SDimitry Andric 
9170b57cec5SDimitry Andric   void fini() {
9180b57cec5SDimitry Andric     if (!uninitialized && numPerLevel) {
9190b57cec5SDimitry Andric       __kmp_free(numPerLevel);
9200b57cec5SDimitry Andric       numPerLevel = NULL;
9210b57cec5SDimitry Andric       uninitialized = not_initialized;
9220b57cec5SDimitry Andric     }
9230b57cec5SDimitry Andric   }
9240b57cec5SDimitry Andric 
925*fe6060f1SDimitry Andric   void init(int num_addrs) {
9260b57cec5SDimitry Andric     kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
9270b57cec5SDimitry Andric         &uninitialized, not_initialized, initializing);
9280b57cec5SDimitry Andric     if (bool_result == 0) { // Wait for initialization
9290b57cec5SDimitry Andric       while (TCR_1(uninitialized) != initialized)
9300b57cec5SDimitry Andric         KMP_CPU_PAUSE();
9310b57cec5SDimitry Andric       return;
9320b57cec5SDimitry Andric     }
9330b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(bool_result == 1);
9340b57cec5SDimitry Andric 
9350b57cec5SDimitry Andric     /* Added explicit initialization of the data fields here to prevent usage of
9360b57cec5SDimitry Andric        dirty value observed when static library is re-initialized multiple times
9370b57cec5SDimitry Andric        (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
9380b57cec5SDimitry Andric        OpenMP). */
9390b57cec5SDimitry Andric     depth = 1;
9400b57cec5SDimitry Andric     resizing = 0;
9410b57cec5SDimitry Andric     maxLevels = 7;
9420b57cec5SDimitry Andric     numPerLevel =
9430b57cec5SDimitry Andric         (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
9440b57cec5SDimitry Andric     skipPerLevel = &(numPerLevel[maxLevels]);
9450b57cec5SDimitry Andric     for (kmp_uint32 i = 0; i < maxLevels;
9460b57cec5SDimitry Andric          ++i) { // init numPerLevel[*] to 1 item per level
9470b57cec5SDimitry Andric       numPerLevel[i] = 1;
9480b57cec5SDimitry Andric       skipPerLevel[i] = 1;
9490b57cec5SDimitry Andric     }
9500b57cec5SDimitry Andric 
9510b57cec5SDimitry Andric     // Sort table by physical ID
952*fe6060f1SDimitry Andric     if (__kmp_topology && __kmp_topology->get_depth() > 0) {
953*fe6060f1SDimitry Andric       deriveLevels();
9540b57cec5SDimitry Andric     } else {
9550b57cec5SDimitry Andric       numPerLevel[0] = maxLeaves;
9560b57cec5SDimitry Andric       numPerLevel[1] = num_addrs / maxLeaves;
9570b57cec5SDimitry Andric       if (num_addrs % maxLeaves)
9580b57cec5SDimitry Andric         numPerLevel[1]++;
9590b57cec5SDimitry Andric     }
9600b57cec5SDimitry Andric 
9610b57cec5SDimitry Andric     base_num_threads = num_addrs;
9620b57cec5SDimitry Andric     for (int i = maxLevels - 1; i >= 0;
9630b57cec5SDimitry Andric          --i) // count non-empty levels to get depth
9640b57cec5SDimitry Andric       if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
9650b57cec5SDimitry Andric         depth++;
9660b57cec5SDimitry Andric 
9670b57cec5SDimitry Andric     kmp_uint32 branch = minBranch;
9680b57cec5SDimitry Andric     if (numPerLevel[0] == 1)
9690b57cec5SDimitry Andric       branch = num_addrs / maxLeaves;
9700b57cec5SDimitry Andric     if (branch < minBranch)
9710b57cec5SDimitry Andric       branch = minBranch;
9720b57cec5SDimitry Andric     for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
9730b57cec5SDimitry Andric       while (numPerLevel[d] > branch ||
9740b57cec5SDimitry Andric              (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
9750b57cec5SDimitry Andric         if (numPerLevel[d] & 1)
9760b57cec5SDimitry Andric           numPerLevel[d]++;
9770b57cec5SDimitry Andric         numPerLevel[d] = numPerLevel[d] >> 1;
9780b57cec5SDimitry Andric         if (numPerLevel[d + 1] == 1)
9790b57cec5SDimitry Andric           depth++;
9800b57cec5SDimitry Andric         numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
9810b57cec5SDimitry Andric       }
9820b57cec5SDimitry Andric       if (numPerLevel[0] == 1) {
9830b57cec5SDimitry Andric         branch = branch >> 1;
9840b57cec5SDimitry Andric         if (branch < 4)
9850b57cec5SDimitry Andric           branch = minBranch;
9860b57cec5SDimitry Andric       }
9870b57cec5SDimitry Andric     }
9880b57cec5SDimitry Andric 
9890b57cec5SDimitry Andric     for (kmp_uint32 i = 1; i < depth; ++i)
9900b57cec5SDimitry Andric       skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
9910b57cec5SDimitry Andric     // Fill in hierarchy in the case of oversubscription
9920b57cec5SDimitry Andric     for (kmp_uint32 i = depth; i < maxLevels; ++i)
9930b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
9940b57cec5SDimitry Andric 
9950b57cec5SDimitry Andric     uninitialized = initialized; // One writer
9960b57cec5SDimitry Andric   }
9970b57cec5SDimitry Andric 
9980b57cec5SDimitry Andric   // Resize the hierarchy if nproc changes to something larger than before
9990b57cec5SDimitry Andric   void resize(kmp_uint32 nproc) {
10000b57cec5SDimitry Andric     kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
10010b57cec5SDimitry Andric     while (bool_result == 0) { // someone else is trying to resize
10020b57cec5SDimitry Andric       KMP_CPU_PAUSE();
10030b57cec5SDimitry Andric       if (nproc <= base_num_threads) // happy with other thread's resize
10040b57cec5SDimitry Andric         return;
10050b57cec5SDimitry Andric       else // try to resize
10060b57cec5SDimitry Andric         bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
10070b57cec5SDimitry Andric     }
10080b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(bool_result != 0);
10090b57cec5SDimitry Andric     if (nproc <= base_num_threads)
10100b57cec5SDimitry Andric       return; // happy with other thread's resize
10110b57cec5SDimitry Andric 
10120b57cec5SDimitry Andric     // Calculate new maxLevels
10130b57cec5SDimitry Andric     kmp_uint32 old_sz = skipPerLevel[depth - 1];
10140b57cec5SDimitry Andric     kmp_uint32 incs = 0, old_maxLevels = maxLevels;
10150b57cec5SDimitry Andric     // First see if old maxLevels is enough to contain new size
10160b57cec5SDimitry Andric     for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
10170b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
10180b57cec5SDimitry Andric       numPerLevel[i - 1] *= 2;
10190b57cec5SDimitry Andric       old_sz *= 2;
10200b57cec5SDimitry Andric       depth++;
10210b57cec5SDimitry Andric     }
10220b57cec5SDimitry Andric     if (nproc > old_sz) { // Not enough space, need to expand hierarchy
10230b57cec5SDimitry Andric       while (nproc > old_sz) {
10240b57cec5SDimitry Andric         old_sz *= 2;
10250b57cec5SDimitry Andric         incs++;
10260b57cec5SDimitry Andric         depth++;
10270b57cec5SDimitry Andric       }
10280b57cec5SDimitry Andric       maxLevels += incs;
10290b57cec5SDimitry Andric 
10300b57cec5SDimitry Andric       // Resize arrays
10310b57cec5SDimitry Andric       kmp_uint32 *old_numPerLevel = numPerLevel;
10320b57cec5SDimitry Andric       kmp_uint32 *old_skipPerLevel = skipPerLevel;
10330b57cec5SDimitry Andric       numPerLevel = skipPerLevel = NULL;
10340b57cec5SDimitry Andric       numPerLevel =
10350b57cec5SDimitry Andric           (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
10360b57cec5SDimitry Andric       skipPerLevel = &(numPerLevel[maxLevels]);
10370b57cec5SDimitry Andric 
10380b57cec5SDimitry Andric       // Copy old elements from old arrays
1039e8d8bef9SDimitry Andric       for (kmp_uint32 i = 0; i < old_maxLevels; ++i) {
1040e8d8bef9SDimitry Andric         // init numPerLevel[*] to 1 item per level
10410b57cec5SDimitry Andric         numPerLevel[i] = old_numPerLevel[i];
10420b57cec5SDimitry Andric         skipPerLevel[i] = old_skipPerLevel[i];
10430b57cec5SDimitry Andric       }
10440b57cec5SDimitry Andric 
10450b57cec5SDimitry Andric       // Init new elements in arrays to 1
1046e8d8bef9SDimitry Andric       for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) {
1047e8d8bef9SDimitry Andric         // init numPerLevel[*] to 1 item per level
10480b57cec5SDimitry Andric         numPerLevel[i] = 1;
10490b57cec5SDimitry Andric         skipPerLevel[i] = 1;
10500b57cec5SDimitry Andric       }
10510b57cec5SDimitry Andric 
10520b57cec5SDimitry Andric       // Free old arrays
10530b57cec5SDimitry Andric       __kmp_free(old_numPerLevel);
10540b57cec5SDimitry Andric     }
10550b57cec5SDimitry Andric 
10560b57cec5SDimitry Andric     // Fill in oversubscription levels of hierarchy
10570b57cec5SDimitry Andric     for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
10580b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
10590b57cec5SDimitry Andric 
10600b57cec5SDimitry Andric     base_num_threads = nproc;
10610b57cec5SDimitry Andric     resizing = 0; // One writer
10620b57cec5SDimitry Andric   }
10630b57cec5SDimitry Andric };
10640b57cec5SDimitry Andric #endif // KMP_AFFINITY_H
1065