xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_affinity.h (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
10b57cec5SDimitry Andric /*
20b57cec5SDimitry Andric  * kmp_affinity.h -- header for affinity management
30b57cec5SDimitry Andric  */
40b57cec5SDimitry Andric 
50b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #ifndef KMP_AFFINITY_H
140b57cec5SDimitry Andric #define KMP_AFFINITY_H
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include "kmp.h"
170b57cec5SDimitry Andric #include "kmp_os.h"
180eae32dcSDimitry Andric #include <limits>
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
210b57cec5SDimitry Andric #if KMP_USE_HWLOC
220b57cec5SDimitry Andric class KMPHwlocAffinity : public KMPAffinity {
230b57cec5SDimitry Andric public:
240b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
250b57cec5SDimitry Andric     hwloc_cpuset_t mask;
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric   public:
280b57cec5SDimitry Andric     Mask() {
290b57cec5SDimitry Andric       mask = hwloc_bitmap_alloc();
300b57cec5SDimitry Andric       this->zero();
310b57cec5SDimitry Andric     }
320b57cec5SDimitry Andric     ~Mask() { hwloc_bitmap_free(mask); }
330b57cec5SDimitry Andric     void set(int i) override { hwloc_bitmap_set(mask, i); }
340b57cec5SDimitry Andric     bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
350b57cec5SDimitry Andric     void clear(int i) override { hwloc_bitmap_clr(mask, i); }
360b57cec5SDimitry Andric     void zero() override { hwloc_bitmap_zero(mask); }
370b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
380b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
390b57cec5SDimitry Andric       hwloc_bitmap_copy(mask, convert->mask);
400b57cec5SDimitry Andric     }
410b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
420b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
430b57cec5SDimitry Andric       hwloc_bitmap_and(mask, mask, convert->mask);
440b57cec5SDimitry Andric     }
450b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
460b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
470b57cec5SDimitry Andric       hwloc_bitmap_or(mask, mask, convert->mask);
480b57cec5SDimitry Andric     }
490b57cec5SDimitry Andric     void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
500b57cec5SDimitry Andric     int begin() const override { return hwloc_bitmap_first(mask); }
510b57cec5SDimitry Andric     int end() const override { return -1; }
520b57cec5SDimitry Andric     int next(int previous) const override {
530b57cec5SDimitry Andric       return hwloc_bitmap_next(mask, previous);
540b57cec5SDimitry Andric     }
550b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
560b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
570b57cec5SDimitry Andric                   "Illegal get affinity operation when not capable");
58e8d8bef9SDimitry Andric       long retval =
590b57cec5SDimitry Andric           hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
600b57cec5SDimitry Andric       if (retval >= 0) {
610b57cec5SDimitry Andric         return 0;
620b57cec5SDimitry Andric       }
630b57cec5SDimitry Andric       int error = errno;
640b57cec5SDimitry Andric       if (abort_on_error) {
650b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
660b57cec5SDimitry Andric       }
670b57cec5SDimitry Andric       return error;
680b57cec5SDimitry Andric     }
690b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
700b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
71e8d8bef9SDimitry Andric                   "Illegal set affinity operation when not capable");
72e8d8bef9SDimitry Andric       long retval =
730b57cec5SDimitry Andric           hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
740b57cec5SDimitry Andric       if (retval >= 0) {
750b57cec5SDimitry Andric         return 0;
760b57cec5SDimitry Andric       }
770b57cec5SDimitry Andric       int error = errno;
780b57cec5SDimitry Andric       if (abort_on_error) {
790b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
800b57cec5SDimitry Andric       }
810b57cec5SDimitry Andric       return error;
820b57cec5SDimitry Andric     }
83e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS
84e8d8bef9SDimitry Andric     int set_process_affinity(bool abort_on_error) const override {
85e8d8bef9SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
86e8d8bef9SDimitry Andric                   "Illegal set process affinity operation when not capable");
87e8d8bef9SDimitry Andric       int error = 0;
88e8d8bef9SDimitry Andric       const hwloc_topology_support *support =
89e8d8bef9SDimitry Andric           hwloc_topology_get_support(__kmp_hwloc_topology);
90e8d8bef9SDimitry Andric       if (support->cpubind->set_proc_cpubind) {
91e8d8bef9SDimitry Andric         int retval;
92e8d8bef9SDimitry Andric         retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask,
93e8d8bef9SDimitry Andric                                    HWLOC_CPUBIND_PROCESS);
94e8d8bef9SDimitry Andric         if (retval >= 0)
95e8d8bef9SDimitry Andric           return 0;
96e8d8bef9SDimitry Andric         error = errno;
97e8d8bef9SDimitry Andric         if (abort_on_error)
98e8d8bef9SDimitry Andric           __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
99e8d8bef9SDimitry Andric       }
100e8d8bef9SDimitry Andric       return error;
101e8d8bef9SDimitry Andric     }
102e8d8bef9SDimitry Andric #endif
1030b57cec5SDimitry Andric     int get_proc_group() const override {
1040b57cec5SDimitry Andric       int group = -1;
1050b57cec5SDimitry Andric #if KMP_OS_WINDOWS
1060b57cec5SDimitry Andric       if (__kmp_num_proc_groups == 1) {
1070b57cec5SDimitry Andric         return 1;
1080b57cec5SDimitry Andric       }
1090b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; i++) {
1100b57cec5SDimitry Andric         // On windows, the long type is always 32 bits
1110b57cec5SDimitry Andric         unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
1120b57cec5SDimitry Andric         unsigned long second_32_bits =
1130b57cec5SDimitry Andric             hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
1140b57cec5SDimitry Andric         if (first_32_bits == 0 && second_32_bits == 0) {
1150b57cec5SDimitry Andric           continue;
1160b57cec5SDimitry Andric         }
1170b57cec5SDimitry Andric         if (group >= 0) {
1180b57cec5SDimitry Andric           return -1;
1190b57cec5SDimitry Andric         }
1200b57cec5SDimitry Andric         group = i;
1210b57cec5SDimitry Andric       }
1220b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
1230b57cec5SDimitry Andric       return group;
1240b57cec5SDimitry Andric     }
1250b57cec5SDimitry Andric   };
1260b57cec5SDimitry Andric   void determine_capable(const char *var) override {
1270b57cec5SDimitry Andric     const hwloc_topology_support *topology_support;
1280b57cec5SDimitry Andric     if (__kmp_hwloc_topology == NULL) {
1290b57cec5SDimitry Andric       if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
1300b57cec5SDimitry Andric         __kmp_hwloc_error = TRUE;
131*bdd1243dSDimitry Andric         if (__kmp_affinity.flags.verbose) {
1320b57cec5SDimitry Andric           KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
1330b57cec5SDimitry Andric         }
134*bdd1243dSDimitry Andric       }
1350b57cec5SDimitry Andric       if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
1360b57cec5SDimitry Andric         __kmp_hwloc_error = TRUE;
137*bdd1243dSDimitry Andric         if (__kmp_affinity.flags.verbose) {
1380b57cec5SDimitry Andric           KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
1390b57cec5SDimitry Andric         }
1400b57cec5SDimitry Andric       }
141*bdd1243dSDimitry Andric     }
1420b57cec5SDimitry Andric     topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
1430b57cec5SDimitry Andric     // Is the system capable of setting/getting this thread's affinity?
1440b57cec5SDimitry Andric     // Also, is topology discovery possible? (pu indicates ability to discover
1450b57cec5SDimitry Andric     // processing units). And finally, were there no errors when calling any
1460b57cec5SDimitry Andric     // hwloc_* API functions?
1470b57cec5SDimitry Andric     if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
1480b57cec5SDimitry Andric         topology_support->cpubind->get_thisthread_cpubind &&
1490b57cec5SDimitry Andric         topology_support->discovery->pu && !__kmp_hwloc_error) {
1500b57cec5SDimitry Andric       // enables affinity according to KMP_AFFINITY_CAPABLE() macro
1510b57cec5SDimitry Andric       KMP_AFFINITY_ENABLE(TRUE);
1520b57cec5SDimitry Andric     } else {
1530b57cec5SDimitry Andric       // indicate that hwloc didn't work and disable affinity
1540b57cec5SDimitry Andric       __kmp_hwloc_error = TRUE;
1550b57cec5SDimitry Andric       KMP_AFFINITY_DISABLE();
1560b57cec5SDimitry Andric     }
1570b57cec5SDimitry Andric   }
1580b57cec5SDimitry Andric   void bind_thread(int which) override {
1590b57cec5SDimitry Andric     KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
1600b57cec5SDimitry Andric                 "Illegal set affinity operation when not capable");
1610b57cec5SDimitry Andric     KMPAffinity::Mask *mask;
1620b57cec5SDimitry Andric     KMP_CPU_ALLOC_ON_STACK(mask);
1630b57cec5SDimitry Andric     KMP_CPU_ZERO(mask);
1640b57cec5SDimitry Andric     KMP_CPU_SET(which, mask);
1650b57cec5SDimitry Andric     __kmp_set_system_affinity(mask, TRUE);
1660b57cec5SDimitry Andric     KMP_CPU_FREE_FROM_STACK(mask);
1670b57cec5SDimitry Andric   }
1680b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
1690b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
1700b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
1710b57cec5SDimitry Andric     return new Mask[num];
1720b57cec5SDimitry Andric   }
1730b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
1740b57cec5SDimitry Andric     Mask *hwloc_array = static_cast<Mask *>(array);
1750b57cec5SDimitry Andric     delete[] hwloc_array;
1760b57cec5SDimitry Andric   }
1770b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
1780b57cec5SDimitry Andric                                       int index) override {
1790b57cec5SDimitry Andric     Mask *hwloc_array = static_cast<Mask *>(array);
1800b57cec5SDimitry Andric     return &(hwloc_array[index]);
1810b57cec5SDimitry Andric   }
1820b57cec5SDimitry Andric   api_type get_api_type() const override { return HWLOC; }
1830b57cec5SDimitry Andric };
1840b57cec5SDimitry Andric #endif /* KMP_USE_HWLOC */
1850b57cec5SDimitry Andric 
186489b1cf2SDimitry Andric #if KMP_OS_LINUX || KMP_OS_FREEBSD
1870b57cec5SDimitry Andric #if KMP_OS_LINUX
1880b57cec5SDimitry Andric /* On some of the older OS's that we build on, these constants aren't present
1890b57cec5SDimitry Andric    in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
1900b57cec5SDimitry Andric    all systems of the same arch where they are defined, and they cannot change.
1910b57cec5SDimitry Andric    stone forever. */
1920b57cec5SDimitry Andric #include <sys/syscall.h>
1930b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_ARM
1940b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
1950b57cec5SDimitry Andric #define __NR_sched_setaffinity 241
1960b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 241
1970b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
1980b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
1990b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2000b57cec5SDimitry Andric #define __NR_sched_getaffinity 242
2010b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 242
2020b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2030b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2040b57cec5SDimitry Andric #elif KMP_ARCH_AARCH64
2050b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2060b57cec5SDimitry Andric #define __NR_sched_setaffinity 122
2070b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 122
2080b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2090b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2100b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2110b57cec5SDimitry Andric #define __NR_sched_getaffinity 123
2120b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 123
2130b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2140b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2150b57cec5SDimitry Andric #elif KMP_ARCH_X86_64
2160b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2170b57cec5SDimitry Andric #define __NR_sched_setaffinity 203
2180b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 203
2190b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2200b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2210b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2220b57cec5SDimitry Andric #define __NR_sched_getaffinity 204
2230b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 204
2240b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2250b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2260b57cec5SDimitry Andric #elif KMP_ARCH_PPC64
2270b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2280b57cec5SDimitry Andric #define __NR_sched_setaffinity 222
2290b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 222
2300b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2310b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2320b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2330b57cec5SDimitry Andric #define __NR_sched_getaffinity 223
2340b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 223
2350b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2360b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2370b57cec5SDimitry Andric #elif KMP_ARCH_MIPS
2380b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2390b57cec5SDimitry Andric #define __NR_sched_setaffinity 4239
2400b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 4239
2410b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2420b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2430b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2440b57cec5SDimitry Andric #define __NR_sched_getaffinity 4240
2450b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 4240
2460b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2470b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2480b57cec5SDimitry Andric #elif KMP_ARCH_MIPS64
2490b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2500b57cec5SDimitry Andric #define __NR_sched_setaffinity 5195
2510b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 5195
2520b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2530b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2540b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2550b57cec5SDimitry Andric #define __NR_sched_getaffinity 5196
2560b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 5196
2570b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2580b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
259*bdd1243dSDimitry Andric #elif KMP_ARCH_LOONGARCH64
260*bdd1243dSDimitry Andric #ifndef __NR_sched_setaffinity
261*bdd1243dSDimitry Andric #define __NR_sched_setaffinity 122
262*bdd1243dSDimitry Andric #elif __NR_sched_setaffinity != 122
263*bdd1243dSDimitry Andric #error Wrong code for setaffinity system call.
264*bdd1243dSDimitry Andric #endif /* __NR_sched_setaffinity */
265*bdd1243dSDimitry Andric #ifndef __NR_sched_getaffinity
266*bdd1243dSDimitry Andric #define __NR_sched_getaffinity 123
267*bdd1243dSDimitry Andric #elif __NR_sched_getaffinity != 123
268*bdd1243dSDimitry Andric #error Wrong code for getaffinity system call.
269*bdd1243dSDimitry Andric #endif /* __NR_sched_getaffinity */
270*bdd1243dSDimitry Andric #elif KMP_ARCH_RISCV64
271*bdd1243dSDimitry Andric #ifndef __NR_sched_setaffinity
272*bdd1243dSDimitry Andric #define __NR_sched_setaffinity 122
273*bdd1243dSDimitry Andric #elif __NR_sched_setaffinity != 122
274*bdd1243dSDimitry Andric #error Wrong code for setaffinity system call.
275*bdd1243dSDimitry Andric #endif /* __NR_sched_setaffinity */
276*bdd1243dSDimitry Andric #ifndef __NR_sched_getaffinity
277*bdd1243dSDimitry Andric #define __NR_sched_getaffinity 123
278*bdd1243dSDimitry Andric #elif __NR_sched_getaffinity != 123
279*bdd1243dSDimitry Andric #error Wrong code for getaffinity system call.
280*bdd1243dSDimitry Andric #endif /* __NR_sched_getaffinity */
281*bdd1243dSDimitry Andric #else
2820b57cec5SDimitry Andric #error Unknown or unsupported architecture
2830b57cec5SDimitry Andric #endif /* KMP_ARCH_* */
284489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD
285489b1cf2SDimitry Andric #include <pthread.h>
286489b1cf2SDimitry Andric #include <pthread_np.h>
287489b1cf2SDimitry Andric #endif
2880b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity {
2890b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
290e8d8bef9SDimitry Andric     typedef unsigned long mask_t;
291e8d8bef9SDimitry Andric     typedef decltype(__kmp_affin_mask_size) mask_size_type;
292e8d8bef9SDimitry Andric     static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
293e8d8bef9SDimitry Andric     static const mask_t ONE = 1;
294e8d8bef9SDimitry Andric     mask_size_type get_num_mask_types() const {
295e8d8bef9SDimitry Andric       return __kmp_affin_mask_size / sizeof(mask_t);
296e8d8bef9SDimitry Andric     }
2970b57cec5SDimitry Andric 
2980b57cec5SDimitry Andric   public:
2990b57cec5SDimitry Andric     mask_t *mask;
3000b57cec5SDimitry Andric     Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
3010b57cec5SDimitry Andric     ~Mask() {
3020b57cec5SDimitry Andric       if (mask)
3030b57cec5SDimitry Andric         __kmp_free(mask);
3040b57cec5SDimitry Andric     }
3050b57cec5SDimitry Andric     void set(int i) override {
306e8d8bef9SDimitry Andric       mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T));
3070b57cec5SDimitry Andric     }
3080b57cec5SDimitry Andric     bool is_set(int i) const override {
309e8d8bef9SDimitry Andric       return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T)));
3100b57cec5SDimitry Andric     }
3110b57cec5SDimitry Andric     void clear(int i) override {
312e8d8bef9SDimitry Andric       mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T));
3130b57cec5SDimitry Andric     }
3140b57cec5SDimitry Andric     void zero() override {
315e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
316e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
317e8d8bef9SDimitry Andric         mask[i] = (mask_t)0;
3180b57cec5SDimitry Andric     }
3190b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
3200b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
321e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
322e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
3230b57cec5SDimitry Andric         mask[i] = convert->mask[i];
3240b57cec5SDimitry Andric     }
3250b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
3260b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
327e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
328e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
3290b57cec5SDimitry Andric         mask[i] &= convert->mask[i];
3300b57cec5SDimitry Andric     }
3310b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
3320b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
333e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
334e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
3350b57cec5SDimitry Andric         mask[i] |= convert->mask[i];
3360b57cec5SDimitry Andric     }
3370b57cec5SDimitry Andric     void bitwise_not() override {
338e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
339e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
3400b57cec5SDimitry Andric         mask[i] = ~(mask[i]);
3410b57cec5SDimitry Andric     }
3420b57cec5SDimitry Andric     int begin() const override {
3430b57cec5SDimitry Andric       int retval = 0;
3440b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
3450b57cec5SDimitry Andric         ++retval;
3460b57cec5SDimitry Andric       return retval;
3470b57cec5SDimitry Andric     }
348e8d8bef9SDimitry Andric     int end() const override {
349e8d8bef9SDimitry Andric       int e;
350e8d8bef9SDimitry Andric       __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e);
351e8d8bef9SDimitry Andric       return e;
352e8d8bef9SDimitry Andric     }
3530b57cec5SDimitry Andric     int next(int previous) const override {
3540b57cec5SDimitry Andric       int retval = previous + 1;
3550b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
3560b57cec5SDimitry Andric         ++retval;
3570b57cec5SDimitry Andric       return retval;
3580b57cec5SDimitry Andric     }
3590b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
3600b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
3610b57cec5SDimitry Andric                   "Illegal get affinity operation when not capable");
362489b1cf2SDimitry Andric #if KMP_OS_LINUX
363e8d8bef9SDimitry Andric       long retval =
3640b57cec5SDimitry Andric           syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
365489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD
366fe6060f1SDimitry Andric       int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size,
367fe6060f1SDimitry Andric                                      reinterpret_cast<cpuset_t *>(mask));
3685ffd83dbSDimitry Andric       int retval = (r == 0 ? 0 : -1);
369489b1cf2SDimitry Andric #endif
3700b57cec5SDimitry Andric       if (retval >= 0) {
3710b57cec5SDimitry Andric         return 0;
3720b57cec5SDimitry Andric       }
3730b57cec5SDimitry Andric       int error = errno;
3740b57cec5SDimitry Andric       if (abort_on_error) {
3750b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
3760b57cec5SDimitry Andric       }
3770b57cec5SDimitry Andric       return error;
3780b57cec5SDimitry Andric     }
3790b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
3800b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
381e8d8bef9SDimitry Andric                   "Illegal set affinity operation when not capable");
382489b1cf2SDimitry Andric #if KMP_OS_LINUX
383e8d8bef9SDimitry Andric       long retval =
3840b57cec5SDimitry Andric           syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
385489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD
386fe6060f1SDimitry Andric       int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size,
387fe6060f1SDimitry Andric                                      reinterpret_cast<cpuset_t *>(mask));
3885ffd83dbSDimitry Andric       int retval = (r == 0 ? 0 : -1);
389489b1cf2SDimitry Andric #endif
3900b57cec5SDimitry Andric       if (retval >= 0) {
3910b57cec5SDimitry Andric         return 0;
3920b57cec5SDimitry Andric       }
3930b57cec5SDimitry Andric       int error = errno;
3940b57cec5SDimitry Andric       if (abort_on_error) {
3950b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
3960b57cec5SDimitry Andric       }
3970b57cec5SDimitry Andric       return error;
3980b57cec5SDimitry Andric     }
3990b57cec5SDimitry Andric   };
4000b57cec5SDimitry Andric   void determine_capable(const char *env_var) override {
4010b57cec5SDimitry Andric     __kmp_affinity_determine_capable(env_var);
4020b57cec5SDimitry Andric   }
4030b57cec5SDimitry Andric   void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
4040b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override {
4050b57cec5SDimitry Andric     KMPNativeAffinity::Mask *retval = new Mask();
4060b57cec5SDimitry Andric     return retval;
4070b57cec5SDimitry Andric   }
4080b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override {
4090b57cec5SDimitry Andric     KMPNativeAffinity::Mask *native_mask =
4100b57cec5SDimitry Andric         static_cast<KMPNativeAffinity::Mask *>(m);
4110b57cec5SDimitry Andric     delete native_mask;
4120b57cec5SDimitry Andric   }
4130b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
4140b57cec5SDimitry Andric     return new Mask[num];
4150b57cec5SDimitry Andric   }
4160b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
4170b57cec5SDimitry Andric     Mask *linux_array = static_cast<Mask *>(array);
4180b57cec5SDimitry Andric     delete[] linux_array;
4190b57cec5SDimitry Andric   }
4200b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
4210b57cec5SDimitry Andric                                       int index) override {
4220b57cec5SDimitry Andric     Mask *linux_array = static_cast<Mask *>(array);
4230b57cec5SDimitry Andric     return &(linux_array[index]);
4240b57cec5SDimitry Andric   }
4250b57cec5SDimitry Andric   api_type get_api_type() const override { return NATIVE_OS; }
4260b57cec5SDimitry Andric };
427489b1cf2SDimitry Andric #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
4280b57cec5SDimitry Andric 
4290b57cec5SDimitry Andric #if KMP_OS_WINDOWS
4300b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity {
4310b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
4320b57cec5SDimitry Andric     typedef ULONG_PTR mask_t;
4330b57cec5SDimitry Andric     static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
4340b57cec5SDimitry Andric     mask_t *mask;
4350b57cec5SDimitry Andric 
4360b57cec5SDimitry Andric   public:
4370b57cec5SDimitry Andric     Mask() {
4380b57cec5SDimitry Andric       mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
4390b57cec5SDimitry Andric     }
4400b57cec5SDimitry Andric     ~Mask() {
4410b57cec5SDimitry Andric       if (mask)
4420b57cec5SDimitry Andric         __kmp_free(mask);
4430b57cec5SDimitry Andric     }
4440b57cec5SDimitry Andric     void set(int i) override {
4450b57cec5SDimitry Andric       mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
4460b57cec5SDimitry Andric     }
4470b57cec5SDimitry Andric     bool is_set(int i) const override {
4480b57cec5SDimitry Andric       return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
4490b57cec5SDimitry Andric     }
4500b57cec5SDimitry Andric     void clear(int i) override {
4510b57cec5SDimitry Andric       mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
4520b57cec5SDimitry Andric     }
4530b57cec5SDimitry Andric     void zero() override {
4540b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4550b57cec5SDimitry Andric         mask[i] = 0;
4560b57cec5SDimitry Andric     }
4570b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
4580b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
4590b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4600b57cec5SDimitry Andric         mask[i] = convert->mask[i];
4610b57cec5SDimitry Andric     }
4620b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
4630b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
4640b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4650b57cec5SDimitry Andric         mask[i] &= convert->mask[i];
4660b57cec5SDimitry Andric     }
4670b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
4680b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
4690b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4700b57cec5SDimitry Andric         mask[i] |= convert->mask[i];
4710b57cec5SDimitry Andric     }
4720b57cec5SDimitry Andric     void bitwise_not() override {
4730b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4740b57cec5SDimitry Andric         mask[i] = ~(mask[i]);
4750b57cec5SDimitry Andric     }
4760b57cec5SDimitry Andric     int begin() const override {
4770b57cec5SDimitry Andric       int retval = 0;
4780b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
4790b57cec5SDimitry Andric         ++retval;
4800b57cec5SDimitry Andric       return retval;
4810b57cec5SDimitry Andric     }
4820b57cec5SDimitry Andric     int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
4830b57cec5SDimitry Andric     int next(int previous) const override {
4840b57cec5SDimitry Andric       int retval = previous + 1;
4850b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
4860b57cec5SDimitry Andric         ++retval;
4870b57cec5SDimitry Andric       return retval;
4880b57cec5SDimitry Andric     }
489e8d8bef9SDimitry Andric     int set_process_affinity(bool abort_on_error) const override {
490e8d8bef9SDimitry Andric       if (__kmp_num_proc_groups <= 1) {
491e8d8bef9SDimitry Andric         if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) {
492e8d8bef9SDimitry Andric           DWORD error = GetLastError();
493e8d8bef9SDimitry Andric           if (abort_on_error) {
494e8d8bef9SDimitry Andric             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
495e8d8bef9SDimitry Andric                         __kmp_msg_null);
496e8d8bef9SDimitry Andric           }
497e8d8bef9SDimitry Andric           return error;
498e8d8bef9SDimitry Andric         }
499e8d8bef9SDimitry Andric       }
500e8d8bef9SDimitry Andric       return 0;
501e8d8bef9SDimitry Andric     }
5020b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
5030b57cec5SDimitry Andric       if (__kmp_num_proc_groups > 1) {
5040b57cec5SDimitry Andric         // Check for a valid mask.
5050b57cec5SDimitry Andric         GROUP_AFFINITY ga;
5060b57cec5SDimitry Andric         int group = get_proc_group();
5070b57cec5SDimitry Andric         if (group < 0) {
5080b57cec5SDimitry Andric           if (abort_on_error) {
5090b57cec5SDimitry Andric             KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
5100b57cec5SDimitry Andric           }
5110b57cec5SDimitry Andric           return -1;
5120b57cec5SDimitry Andric         }
5130b57cec5SDimitry Andric         // Transform the bit vector into a GROUP_AFFINITY struct
5140b57cec5SDimitry Andric         // and make the system call to set affinity.
5150b57cec5SDimitry Andric         ga.Group = group;
5160b57cec5SDimitry Andric         ga.Mask = mask[group];
5170b57cec5SDimitry Andric         ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
5180b57cec5SDimitry Andric 
5190b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
5200b57cec5SDimitry Andric         if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
5210b57cec5SDimitry Andric           DWORD error = GetLastError();
5220b57cec5SDimitry Andric           if (abort_on_error) {
5230b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
5240b57cec5SDimitry Andric                         __kmp_msg_null);
5250b57cec5SDimitry Andric           }
5260b57cec5SDimitry Andric           return error;
5270b57cec5SDimitry Andric         }
5280b57cec5SDimitry Andric       } else {
5290b57cec5SDimitry Andric         if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
5300b57cec5SDimitry Andric           DWORD error = GetLastError();
5310b57cec5SDimitry Andric           if (abort_on_error) {
5320b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
5330b57cec5SDimitry Andric                         __kmp_msg_null);
5340b57cec5SDimitry Andric           }
5350b57cec5SDimitry Andric           return error;
5360b57cec5SDimitry Andric         }
5370b57cec5SDimitry Andric       }
5380b57cec5SDimitry Andric       return 0;
5390b57cec5SDimitry Andric     }
5400b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
5410b57cec5SDimitry Andric       if (__kmp_num_proc_groups > 1) {
5420b57cec5SDimitry Andric         this->zero();
5430b57cec5SDimitry Andric         GROUP_AFFINITY ga;
5440b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
5450b57cec5SDimitry Andric         if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
5460b57cec5SDimitry Andric           DWORD error = GetLastError();
5470b57cec5SDimitry Andric           if (abort_on_error) {
5480b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
5490b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
5500b57cec5SDimitry Andric           }
5510b57cec5SDimitry Andric           return error;
5520b57cec5SDimitry Andric         }
5530b57cec5SDimitry Andric         if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
5540b57cec5SDimitry Andric             (ga.Mask == 0)) {
5550b57cec5SDimitry Andric           return -1;
5560b57cec5SDimitry Andric         }
5570b57cec5SDimitry Andric         mask[ga.Group] = ga.Mask;
5580b57cec5SDimitry Andric       } else {
5590b57cec5SDimitry Andric         mask_t newMask, sysMask, retval;
5600b57cec5SDimitry Andric         if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
5610b57cec5SDimitry Andric           DWORD error = GetLastError();
5620b57cec5SDimitry Andric           if (abort_on_error) {
5630b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
5640b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
5650b57cec5SDimitry Andric           }
5660b57cec5SDimitry Andric           return error;
5670b57cec5SDimitry Andric         }
5680b57cec5SDimitry Andric         retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
5690b57cec5SDimitry Andric         if (!retval) {
5700b57cec5SDimitry Andric           DWORD error = GetLastError();
5710b57cec5SDimitry Andric           if (abort_on_error) {
5720b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
5730b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
5740b57cec5SDimitry Andric           }
5750b57cec5SDimitry Andric           return error;
5760b57cec5SDimitry Andric         }
5770b57cec5SDimitry Andric         newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
5780b57cec5SDimitry Andric         if (!newMask) {
5790b57cec5SDimitry Andric           DWORD error = GetLastError();
5800b57cec5SDimitry Andric           if (abort_on_error) {
5810b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
5820b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
5830b57cec5SDimitry Andric           }
5840b57cec5SDimitry Andric         }
5850b57cec5SDimitry Andric         *mask = retval;
5860b57cec5SDimitry Andric       }
5870b57cec5SDimitry Andric       return 0;
5880b57cec5SDimitry Andric     }
5890b57cec5SDimitry Andric     int get_proc_group() const override {
5900b57cec5SDimitry Andric       int group = -1;
5910b57cec5SDimitry Andric       if (__kmp_num_proc_groups == 1) {
5920b57cec5SDimitry Andric         return 1;
5930b57cec5SDimitry Andric       }
5940b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; i++) {
5950b57cec5SDimitry Andric         if (mask[i] == 0)
5960b57cec5SDimitry Andric           continue;
5970b57cec5SDimitry Andric         if (group >= 0)
5980b57cec5SDimitry Andric           return -1;
5990b57cec5SDimitry Andric         group = i;
6000b57cec5SDimitry Andric       }
6010b57cec5SDimitry Andric       return group;
6020b57cec5SDimitry Andric     }
6030b57cec5SDimitry Andric   };
6040b57cec5SDimitry Andric   void determine_capable(const char *env_var) override {
6050b57cec5SDimitry Andric     __kmp_affinity_determine_capable(env_var);
6060b57cec5SDimitry Andric   }
6070b57cec5SDimitry Andric   void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
6080b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
6090b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
6100b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
6110b57cec5SDimitry Andric     return new Mask[num];
6120b57cec5SDimitry Andric   }
6130b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
6140b57cec5SDimitry Andric     Mask *windows_array = static_cast<Mask *>(array);
6150b57cec5SDimitry Andric     delete[] windows_array;
6160b57cec5SDimitry Andric   }
6170b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
6180b57cec5SDimitry Andric                                       int index) override {
6190b57cec5SDimitry Andric     Mask *windows_array = static_cast<Mask *>(array);
6200b57cec5SDimitry Andric     return &(windows_array[index]);
6210b57cec5SDimitry Andric   }
6220b57cec5SDimitry Andric   api_type get_api_type() const override { return NATIVE_OS; }
6230b57cec5SDimitry Andric };
6240b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
6250b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */
6260b57cec5SDimitry Andric 
6270eae32dcSDimitry Andric // Describe an attribute for a level in the machine topology
6280eae32dcSDimitry Andric struct kmp_hw_attr_t {
6290eae32dcSDimitry Andric   int core_type : 8;
6300eae32dcSDimitry Andric   int core_eff : 8;
6310eae32dcSDimitry Andric   unsigned valid : 1;
6320eae32dcSDimitry Andric   unsigned reserved : 15;
6330eae32dcSDimitry Andric 
6340eae32dcSDimitry Andric   static const int UNKNOWN_CORE_EFF = -1;
6350eae32dcSDimitry Andric 
6360eae32dcSDimitry Andric   kmp_hw_attr_t()
6370eae32dcSDimitry Andric       : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF),
6380eae32dcSDimitry Andric         valid(0), reserved(0) {}
6390eae32dcSDimitry Andric   void set_core_type(kmp_hw_core_type_t type) {
6400eae32dcSDimitry Andric     valid = 1;
6410eae32dcSDimitry Andric     core_type = type;
6420eae32dcSDimitry Andric   }
6430eae32dcSDimitry Andric   void set_core_eff(int eff) {
6440eae32dcSDimitry Andric     valid = 1;
6450eae32dcSDimitry Andric     core_eff = eff;
6460eae32dcSDimitry Andric   }
6470eae32dcSDimitry Andric   kmp_hw_core_type_t get_core_type() const {
6480eae32dcSDimitry Andric     return (kmp_hw_core_type_t)core_type;
6490eae32dcSDimitry Andric   }
6500eae32dcSDimitry Andric   int get_core_eff() const { return core_eff; }
6510eae32dcSDimitry Andric   bool is_core_type_valid() const {
6520eae32dcSDimitry Andric     return core_type != KMP_HW_CORE_TYPE_UNKNOWN;
6530eae32dcSDimitry Andric   }
6540eae32dcSDimitry Andric   bool is_core_eff_valid() const { return core_eff != UNKNOWN_CORE_EFF; }
6550eae32dcSDimitry Andric   operator bool() const { return valid; }
6560eae32dcSDimitry Andric   void clear() {
6570eae32dcSDimitry Andric     core_type = KMP_HW_CORE_TYPE_UNKNOWN;
6580eae32dcSDimitry Andric     core_eff = UNKNOWN_CORE_EFF;
6590eae32dcSDimitry Andric     valid = 0;
6600eae32dcSDimitry Andric   }
6610eae32dcSDimitry Andric   bool contains(const kmp_hw_attr_t &other) const {
6620eae32dcSDimitry Andric     if (!valid && !other.valid)
6630eae32dcSDimitry Andric       return true;
6640eae32dcSDimitry Andric     if (valid && other.valid) {
6650eae32dcSDimitry Andric       if (other.is_core_type_valid()) {
6660eae32dcSDimitry Andric         if (!is_core_type_valid() || (get_core_type() != other.get_core_type()))
6670eae32dcSDimitry Andric           return false;
6680eae32dcSDimitry Andric       }
6690eae32dcSDimitry Andric       if (other.is_core_eff_valid()) {
6700eae32dcSDimitry Andric         if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff()))
6710eae32dcSDimitry Andric           return false;
6720eae32dcSDimitry Andric       }
6730eae32dcSDimitry Andric       return true;
6740eae32dcSDimitry Andric     }
6750eae32dcSDimitry Andric     return false;
6760eae32dcSDimitry Andric   }
6770eae32dcSDimitry Andric   bool operator==(const kmp_hw_attr_t &rhs) const {
6780eae32dcSDimitry Andric     return (rhs.valid == valid && rhs.core_eff == core_eff &&
6790eae32dcSDimitry Andric             rhs.core_type == core_type);
6800eae32dcSDimitry Andric   }
6810eae32dcSDimitry Andric   bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); }
6820eae32dcSDimitry Andric };
683349cc55cSDimitry Andric 
684*bdd1243dSDimitry Andric #if KMP_AFFINITY_SUPPORTED
685*bdd1243dSDimitry Andric KMP_BUILD_ASSERT(sizeof(kmp_hw_attr_t) == sizeof(kmp_affinity_attrs_t));
686*bdd1243dSDimitry Andric #endif
687*bdd1243dSDimitry Andric 
688fe6060f1SDimitry Andric class kmp_hw_thread_t {
6890b57cec5SDimitry Andric public:
690fe6060f1SDimitry Andric   static const int UNKNOWN_ID = -1;
691*bdd1243dSDimitry Andric   static const int MULTIPLE_ID = -2;
692fe6060f1SDimitry Andric   static int compare_ids(const void *a, const void *b);
693fe6060f1SDimitry Andric   static int compare_compact(const void *a, const void *b);
694fe6060f1SDimitry Andric   int ids[KMP_HW_LAST];
695fe6060f1SDimitry Andric   int sub_ids[KMP_HW_LAST];
696fe6060f1SDimitry Andric   bool leader;
697fe6060f1SDimitry Andric   int os_id;
6980eae32dcSDimitry Andric   kmp_hw_attr_t attrs;
699349cc55cSDimitry Andric 
700fe6060f1SDimitry Andric   void print() const;
701fe6060f1SDimitry Andric   void clear() {
702fe6060f1SDimitry Andric     for (int i = 0; i < (int)KMP_HW_LAST; ++i)
703fe6060f1SDimitry Andric       ids[i] = UNKNOWN_ID;
704fe6060f1SDimitry Andric     leader = false;
7050eae32dcSDimitry Andric     attrs.clear();
7060b57cec5SDimitry Andric   }
7070b57cec5SDimitry Andric };
7080b57cec5SDimitry Andric 
709fe6060f1SDimitry Andric class kmp_topology_t {
710fe6060f1SDimitry Andric 
711fe6060f1SDimitry Andric   struct flags_t {
712fe6060f1SDimitry Andric     int uniform : 1;
713fe6060f1SDimitry Andric     int reserved : 31;
7140b57cec5SDimitry Andric   };
7150b57cec5SDimitry Andric 
716fe6060f1SDimitry Andric   int depth;
717fe6060f1SDimitry Andric 
718349cc55cSDimitry Andric   // The following arrays are all 'depth' long and have been
719349cc55cSDimitry Andric   // allocated to hold up to KMP_HW_LAST number of objects if
720349cc55cSDimitry Andric   // needed so layers can be added without reallocation of any array
721fe6060f1SDimitry Andric 
722fe6060f1SDimitry Andric   // Orderd array of the types in the topology
723fe6060f1SDimitry Andric   kmp_hw_t *types;
724fe6060f1SDimitry Andric 
725fe6060f1SDimitry Andric   // Keep quick topology ratios, for non-uniform topologies,
726fe6060f1SDimitry Andric   // this ratio holds the max number of itemAs per itemB
727fe6060f1SDimitry Andric   // e.g., [ 4 packages | 6 cores / package | 2 threads / core ]
728fe6060f1SDimitry Andric   int *ratio;
729fe6060f1SDimitry Andric 
730fe6060f1SDimitry Andric   // Storage containing the absolute number of each topology layer
731fe6060f1SDimitry Andric   int *count;
732fe6060f1SDimitry Andric 
7330eae32dcSDimitry Andric   // The number of core efficiencies. This is only useful for hybrid
7340eae32dcSDimitry Andric   // topologies. Core efficiencies will range from 0 to num efficiencies - 1
7350eae32dcSDimitry Andric   int num_core_efficiencies;
7360eae32dcSDimitry Andric   int num_core_types;
737349cc55cSDimitry Andric   kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
738349cc55cSDimitry Andric 
739fe6060f1SDimitry Andric   // The hardware threads array
740fe6060f1SDimitry Andric   // hw_threads is num_hw_threads long
741fe6060f1SDimitry Andric   // Each hw_thread's ids and sub_ids are depth deep
742fe6060f1SDimitry Andric   int num_hw_threads;
743fe6060f1SDimitry Andric   kmp_hw_thread_t *hw_threads;
744fe6060f1SDimitry Andric 
745fe6060f1SDimitry Andric   // Equivalence hash where the key is the hardware topology item
746fe6060f1SDimitry Andric   // and the value is the equivalent hardware topology type in the
747fe6060f1SDimitry Andric   // types[] array, if the value is KMP_HW_UNKNOWN, then there is no
748fe6060f1SDimitry Andric   // known equivalence for the topology type
749fe6060f1SDimitry Andric   kmp_hw_t equivalent[KMP_HW_LAST];
750fe6060f1SDimitry Andric 
751fe6060f1SDimitry Andric   // Flags describing the topology
752fe6060f1SDimitry Andric   flags_t flags;
753fe6060f1SDimitry Andric 
754*bdd1243dSDimitry Andric   // Compact value used during sort_compact()
755*bdd1243dSDimitry Andric   int compact;
756*bdd1243dSDimitry Andric 
757349cc55cSDimitry Andric   // Insert a new topology layer after allocation
758349cc55cSDimitry Andric   void _insert_layer(kmp_hw_t type, const int *ids);
759349cc55cSDimitry Andric 
760349cc55cSDimitry Andric #if KMP_GROUP_AFFINITY
761349cc55cSDimitry Andric   // Insert topology information about Windows Processor groups
762349cc55cSDimitry Andric   void _insert_windows_proc_groups();
763349cc55cSDimitry Andric #endif
764349cc55cSDimitry Andric 
765fe6060f1SDimitry Andric   // Count each item & get the num x's per y
766fe6060f1SDimitry Andric   // e.g., get the number of cores and the number of threads per core
767fe6060f1SDimitry Andric   // for each (x, y) in (KMP_HW_* , KMP_HW_*)
768fe6060f1SDimitry Andric   void _gather_enumeration_information();
769fe6060f1SDimitry Andric 
770fe6060f1SDimitry Andric   // Remove layers that don't add information to the topology.
771fe6060f1SDimitry Andric   // This is done by having the layer take on the id = UNKNOWN_ID (-1)
772fe6060f1SDimitry Andric   void _remove_radix1_layers();
773fe6060f1SDimitry Andric 
774fe6060f1SDimitry Andric   // Find out if the topology is uniform
775fe6060f1SDimitry Andric   void _discover_uniformity();
776fe6060f1SDimitry Andric 
777fe6060f1SDimitry Andric   // Set all the sub_ids for each hardware thread
778fe6060f1SDimitry Andric   void _set_sub_ids();
779fe6060f1SDimitry Andric 
780fe6060f1SDimitry Andric   // Set global affinity variables describing the number of threads per
781fe6060f1SDimitry Andric   // core, the number of packages, the number of cores per package, and
782fe6060f1SDimitry Andric   // the number of cores.
783fe6060f1SDimitry Andric   void _set_globals();
784fe6060f1SDimitry Andric 
785fe6060f1SDimitry Andric   // Set the last level cache equivalent type
786fe6060f1SDimitry Andric   void _set_last_level_cache();
787fe6060f1SDimitry Andric 
7880eae32dcSDimitry Andric   // Return the number of cores with a particular attribute, 'attr'.
7890eae32dcSDimitry Andric   // If 'find_all' is true, then find all cores on the machine, otherwise find
7900eae32dcSDimitry Andric   // all cores per the layer 'above'
7910eae32dcSDimitry Andric   int _get_ncores_with_attr(const kmp_hw_attr_t &attr, int above,
7920eae32dcSDimitry Andric                             bool find_all = false) const;
793349cc55cSDimitry Andric 
794fe6060f1SDimitry Andric public:
795fe6060f1SDimitry Andric   // Force use of allocate()/deallocate()
796fe6060f1SDimitry Andric   kmp_topology_t() = delete;
797fe6060f1SDimitry Andric   kmp_topology_t(const kmp_topology_t &t) = delete;
798fe6060f1SDimitry Andric   kmp_topology_t(kmp_topology_t &&t) = delete;
799fe6060f1SDimitry Andric   kmp_topology_t &operator=(const kmp_topology_t &t) = delete;
800fe6060f1SDimitry Andric   kmp_topology_t &operator=(kmp_topology_t &&t) = delete;
801fe6060f1SDimitry Andric 
802fe6060f1SDimitry Andric   static kmp_topology_t *allocate(int nproc, int ndepth, const kmp_hw_t *types);
803fe6060f1SDimitry Andric   static void deallocate(kmp_topology_t *);
804fe6060f1SDimitry Andric 
805fe6060f1SDimitry Andric   // Functions used in create_map() routines
806fe6060f1SDimitry Andric   kmp_hw_thread_t &at(int index) {
807fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
808fe6060f1SDimitry Andric     return hw_threads[index];
809fe6060f1SDimitry Andric   }
810fe6060f1SDimitry Andric   const kmp_hw_thread_t &at(int index) const {
811fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
812fe6060f1SDimitry Andric     return hw_threads[index];
813fe6060f1SDimitry Andric   }
814fe6060f1SDimitry Andric   int get_num_hw_threads() const { return num_hw_threads; }
815fe6060f1SDimitry Andric   void sort_ids() {
816fe6060f1SDimitry Andric     qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
817fe6060f1SDimitry Andric           kmp_hw_thread_t::compare_ids);
818fe6060f1SDimitry Andric   }
819fe6060f1SDimitry Andric   // Check if the hardware ids are unique, if they are
820fe6060f1SDimitry Andric   // return true, otherwise return false
821fe6060f1SDimitry Andric   bool check_ids() const;
822fe6060f1SDimitry Andric 
823fe6060f1SDimitry Andric   // Function to call after the create_map() routine
824fe6060f1SDimitry Andric   void canonicalize();
825fe6060f1SDimitry Andric   void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores);
826fe6060f1SDimitry Andric 
827fe6060f1SDimitry Andric // Functions used after canonicalize() called
828*bdd1243dSDimitry Andric 
829*bdd1243dSDimitry Andric #if KMP_AFFINITY_SUPPORTED
830*bdd1243dSDimitry Andric   // Set the granularity for affinity settings
831*bdd1243dSDimitry Andric   void set_granularity(kmp_affinity_t &stgs) const;
832*bdd1243dSDimitry Andric #endif
833fe6060f1SDimitry Andric   bool filter_hw_subset();
834fe6060f1SDimitry Andric   bool is_close(int hwt1, int hwt2, int level) const;
835fe6060f1SDimitry Andric   bool is_uniform() const { return flags.uniform; }
836fe6060f1SDimitry Andric   // Tell whether a type is a valid type in the topology
837fe6060f1SDimitry Andric   // returns KMP_HW_UNKNOWN when there is no equivalent type
838fe6060f1SDimitry Andric   kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; }
839fe6060f1SDimitry Andric   // Set type1 = type2
840fe6060f1SDimitry Andric   void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
841fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);
842fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2);
843fe6060f1SDimitry Andric     kmp_hw_t real_type2 = equivalent[type2];
844fe6060f1SDimitry Andric     if (real_type2 == KMP_HW_UNKNOWN)
845fe6060f1SDimitry Andric       real_type2 = type2;
846fe6060f1SDimitry Andric     equivalent[type1] = real_type2;
847fe6060f1SDimitry Andric     // This loop is required since any of the types may have been set to
848fe6060f1SDimitry Andric     // be equivalent to type1.  They all must be checked and reset to type2.
849fe6060f1SDimitry Andric     KMP_FOREACH_HW_TYPE(type) {
850fe6060f1SDimitry Andric       if (equivalent[type] == type1) {
851fe6060f1SDimitry Andric         equivalent[type] = real_type2;
852fe6060f1SDimitry Andric       }
853fe6060f1SDimitry Andric     }
854fe6060f1SDimitry Andric   }
855fe6060f1SDimitry Andric   // Calculate number of types corresponding to level1
856fe6060f1SDimitry Andric   // per types corresponding to level2 (e.g., number of threads per core)
857fe6060f1SDimitry Andric   int calculate_ratio(int level1, int level2) const {
858fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth);
859fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth);
860fe6060f1SDimitry Andric     int r = 1;
861fe6060f1SDimitry Andric     for (int level = level1; level > level2; --level)
862fe6060f1SDimitry Andric       r *= ratio[level];
863fe6060f1SDimitry Andric     return r;
864fe6060f1SDimitry Andric   }
865fe6060f1SDimitry Andric   int get_ratio(int level) const {
866fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level >= 0 && level < depth);
867fe6060f1SDimitry Andric     return ratio[level];
868fe6060f1SDimitry Andric   }
869fe6060f1SDimitry Andric   int get_depth() const { return depth; };
870fe6060f1SDimitry Andric   kmp_hw_t get_type(int level) const {
871fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level >= 0 && level < depth);
872fe6060f1SDimitry Andric     return types[level];
873fe6060f1SDimitry Andric   }
874fe6060f1SDimitry Andric   int get_level(kmp_hw_t type) const {
875fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT_VALID_HW_TYPE(type);
876fe6060f1SDimitry Andric     int eq_type = equivalent[type];
877fe6060f1SDimitry Andric     if (eq_type == KMP_HW_UNKNOWN)
8780b57cec5SDimitry Andric       return -1;
879fe6060f1SDimitry Andric     for (int i = 0; i < depth; ++i)
880fe6060f1SDimitry Andric       if (types[i] == eq_type)
881fe6060f1SDimitry Andric         return i;
882fe6060f1SDimitry Andric     return -1;
8830b57cec5SDimitry Andric   }
884fe6060f1SDimitry Andric   int get_count(int level) const {
885fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level >= 0 && level < depth);
886fe6060f1SDimitry Andric     return count[level];
8870b57cec5SDimitry Andric   }
8880eae32dcSDimitry Andric   // Return the total number of cores with attribute 'attr'
8890eae32dcSDimitry Andric   int get_ncores_with_attr(const kmp_hw_attr_t &attr) const {
8900eae32dcSDimitry Andric     return _get_ncores_with_attr(attr, -1, true);
8910eae32dcSDimitry Andric   }
8920eae32dcSDimitry Andric   // Return the number of cores with attribute
8930eae32dcSDimitry Andric   // 'attr' per topology level 'above'
8940eae32dcSDimitry Andric   int get_ncores_with_attr_per(const kmp_hw_attr_t &attr, int above) const {
8950eae32dcSDimitry Andric     return _get_ncores_with_attr(attr, above, false);
8960eae32dcSDimitry Andric   }
8970eae32dcSDimitry Andric 
898fe6060f1SDimitry Andric #if KMP_AFFINITY_SUPPORTED
899*bdd1243dSDimitry Andric   friend int kmp_hw_thread_t::compare_compact(const void *a, const void *b);
900*bdd1243dSDimitry Andric   void sort_compact(kmp_affinity_t &affinity) {
901*bdd1243dSDimitry Andric     compact = affinity.compact;
902fe6060f1SDimitry Andric     qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
903fe6060f1SDimitry Andric           kmp_hw_thread_t::compare_compact);
904fe6060f1SDimitry Andric   }
905fe6060f1SDimitry Andric #endif
906fe6060f1SDimitry Andric   void print(const char *env_var = "KMP_AFFINITY") const;
907fe6060f1SDimitry Andric   void dump() const;
908fe6060f1SDimitry Andric };
909349cc55cSDimitry Andric extern kmp_topology_t *__kmp_topology;
910fe6060f1SDimitry Andric 
911fe6060f1SDimitry Andric class kmp_hw_subset_t {
9120eae32dcSDimitry Andric   const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS;
9130eae32dcSDimitry Andric 
914fe6060f1SDimitry Andric public:
9150eae32dcSDimitry Andric   // Describe a machine topology item in KMP_HW_SUBSET
916fe6060f1SDimitry Andric   struct item_t {
917fe6060f1SDimitry Andric     kmp_hw_t type;
9180eae32dcSDimitry Andric     int num_attrs;
9190eae32dcSDimitry Andric     int num[MAX_ATTRS];
9200eae32dcSDimitry Andric     int offset[MAX_ATTRS];
9210eae32dcSDimitry Andric     kmp_hw_attr_t attr[MAX_ATTRS];
922fe6060f1SDimitry Andric   };
9230eae32dcSDimitry Andric   // Put parenthesis around max to avoid accidental use of Windows max macro.
9240eae32dcSDimitry Andric   const static int USE_ALL = (std::numeric_limits<int>::max)();
925fe6060f1SDimitry Andric 
926fe6060f1SDimitry Andric private:
927fe6060f1SDimitry Andric   int depth;
928fe6060f1SDimitry Andric   int capacity;
929fe6060f1SDimitry Andric   item_t *items;
930fe6060f1SDimitry Andric   kmp_uint64 set;
931fe6060f1SDimitry Andric   bool absolute;
932fe6060f1SDimitry Andric   // The set must be able to handle up to KMP_HW_LAST number of layers
933fe6060f1SDimitry Andric   KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST);
934349cc55cSDimitry Andric   // Sorting the KMP_HW_SUBSET items to follow topology order
935349cc55cSDimitry Andric   // All unknown topology types will be at the beginning of the subset
936349cc55cSDimitry Andric   static int hw_subset_compare(const void *i1, const void *i2) {
937349cc55cSDimitry Andric     kmp_hw_t type1 = ((const item_t *)i1)->type;
938349cc55cSDimitry Andric     kmp_hw_t type2 = ((const item_t *)i2)->type;
939349cc55cSDimitry Andric     int level1 = __kmp_topology->get_level(type1);
940349cc55cSDimitry Andric     int level2 = __kmp_topology->get_level(type2);
941349cc55cSDimitry Andric     return level1 - level2;
942349cc55cSDimitry Andric   }
943fe6060f1SDimitry Andric 
944fe6060f1SDimitry Andric public:
945fe6060f1SDimitry Andric   // Force use of allocate()/deallocate()
946fe6060f1SDimitry Andric   kmp_hw_subset_t() = delete;
947fe6060f1SDimitry Andric   kmp_hw_subset_t(const kmp_hw_subset_t &t) = delete;
948fe6060f1SDimitry Andric   kmp_hw_subset_t(kmp_hw_subset_t &&t) = delete;
949fe6060f1SDimitry Andric   kmp_hw_subset_t &operator=(const kmp_hw_subset_t &t) = delete;
950fe6060f1SDimitry Andric   kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) = delete;
951fe6060f1SDimitry Andric 
952fe6060f1SDimitry Andric   static kmp_hw_subset_t *allocate() {
953fe6060f1SDimitry Andric     int initial_capacity = 5;
954fe6060f1SDimitry Andric     kmp_hw_subset_t *retval =
955fe6060f1SDimitry Andric         (kmp_hw_subset_t *)__kmp_allocate(sizeof(kmp_hw_subset_t));
956fe6060f1SDimitry Andric     retval->depth = 0;
957fe6060f1SDimitry Andric     retval->capacity = initial_capacity;
958fe6060f1SDimitry Andric     retval->set = 0ull;
959fe6060f1SDimitry Andric     retval->absolute = false;
960fe6060f1SDimitry Andric     retval->items = (item_t *)__kmp_allocate(sizeof(item_t) * initial_capacity);
961fe6060f1SDimitry Andric     return retval;
962fe6060f1SDimitry Andric   }
963fe6060f1SDimitry Andric   static void deallocate(kmp_hw_subset_t *subset) {
964fe6060f1SDimitry Andric     __kmp_free(subset->items);
965fe6060f1SDimitry Andric     __kmp_free(subset);
966fe6060f1SDimitry Andric   }
967fe6060f1SDimitry Andric   void set_absolute() { absolute = true; }
968fe6060f1SDimitry Andric   bool is_absolute() const { return absolute; }
9690eae32dcSDimitry Andric   void push_back(int num, kmp_hw_t type, int offset, kmp_hw_attr_t attr) {
9700eae32dcSDimitry Andric     for (int i = 0; i < depth; ++i) {
9710eae32dcSDimitry Andric       // Found an existing item for this layer type
9720eae32dcSDimitry Andric       // Add the num, offset, and attr to this item
9730eae32dcSDimitry Andric       if (items[i].type == type) {
9740eae32dcSDimitry Andric         int idx = items[i].num_attrs++;
9750eae32dcSDimitry Andric         if ((size_t)idx >= MAX_ATTRS)
9760eae32dcSDimitry Andric           return;
9770eae32dcSDimitry Andric         items[i].num[idx] = num;
9780eae32dcSDimitry Andric         items[i].offset[idx] = offset;
9790eae32dcSDimitry Andric         items[i].attr[idx] = attr;
9800eae32dcSDimitry Andric         return;
9810eae32dcSDimitry Andric       }
9820eae32dcSDimitry Andric     }
983fe6060f1SDimitry Andric     if (depth == capacity - 1) {
984fe6060f1SDimitry Andric       capacity *= 2;
985fe6060f1SDimitry Andric       item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity);
986fe6060f1SDimitry Andric       for (int i = 0; i < depth; ++i)
987fe6060f1SDimitry Andric         new_items[i] = items[i];
988fe6060f1SDimitry Andric       __kmp_free(items);
989fe6060f1SDimitry Andric       items = new_items;
990fe6060f1SDimitry Andric     }
9910eae32dcSDimitry Andric     items[depth].num_attrs = 1;
992fe6060f1SDimitry Andric     items[depth].type = type;
9930eae32dcSDimitry Andric     items[depth].num[0] = num;
9940eae32dcSDimitry Andric     items[depth].offset[0] = offset;
9950eae32dcSDimitry Andric     items[depth].attr[0] = attr;
996fe6060f1SDimitry Andric     depth++;
997fe6060f1SDimitry Andric     set |= (1ull << type);
998fe6060f1SDimitry Andric   }
999fe6060f1SDimitry Andric   int get_depth() const { return depth; }
1000fe6060f1SDimitry Andric   const item_t &at(int index) const {
1001fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1002fe6060f1SDimitry Andric     return items[index];
1003fe6060f1SDimitry Andric   }
1004fe6060f1SDimitry Andric   item_t &at(int index) {
1005fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1006fe6060f1SDimitry Andric     return items[index];
1007fe6060f1SDimitry Andric   }
1008fe6060f1SDimitry Andric   void remove(int index) {
1009fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1010fe6060f1SDimitry Andric     set &= ~(1ull << items[index].type);
1011fe6060f1SDimitry Andric     for (int j = index + 1; j < depth; ++j) {
1012fe6060f1SDimitry Andric       items[j - 1] = items[j];
1013fe6060f1SDimitry Andric     }
1014fe6060f1SDimitry Andric     depth--;
1015fe6060f1SDimitry Andric   }
1016349cc55cSDimitry Andric   void sort() {
1017349cc55cSDimitry Andric     KMP_DEBUG_ASSERT(__kmp_topology);
1018349cc55cSDimitry Andric     qsort(items, depth, sizeof(item_t), hw_subset_compare);
1019349cc55cSDimitry Andric   }
1020fe6060f1SDimitry Andric   bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); }
1021fe6060f1SDimitry Andric   void dump() const {
1022fe6060f1SDimitry Andric     printf("**********************\n");
1023fe6060f1SDimitry Andric     printf("*** kmp_hw_subset: ***\n");
1024fe6060f1SDimitry Andric     printf("* depth: %d\n", depth);
1025fe6060f1SDimitry Andric     printf("* items:\n");
1026fe6060f1SDimitry Andric     for (int i = 0; i < depth; ++i) {
10270eae32dcSDimitry Andric       printf(" type: %s\n", __kmp_hw_get_keyword(items[i].type));
10280eae32dcSDimitry Andric       for (int j = 0; j < items[i].num_attrs; ++j) {
10290eae32dcSDimitry Andric         printf("  num: %d, offset: %d, attr: ", items[i].num[j],
10300eae32dcSDimitry Andric                items[i].offset[j]);
10310eae32dcSDimitry Andric         if (!items[i].attr[j]) {
10320eae32dcSDimitry Andric           printf(" (none)\n");
10330eae32dcSDimitry Andric         } else {
10340eae32dcSDimitry Andric           printf(
10350eae32dcSDimitry Andric               " core_type = %s, core_eff = %d\n",
10360eae32dcSDimitry Andric               __kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()),
10370eae32dcSDimitry Andric               items[i].attr[j].get_core_eff());
10380eae32dcSDimitry Andric         }
10390eae32dcSDimitry Andric       }
1040fe6060f1SDimitry Andric     }
1041fe6060f1SDimitry Andric     printf("* set: 0x%llx\n", set);
1042fe6060f1SDimitry Andric     printf("* absolute: %d\n", absolute);
1043fe6060f1SDimitry Andric     printf("**********************\n");
1044fe6060f1SDimitry Andric   }
1045fe6060f1SDimitry Andric };
1046fe6060f1SDimitry Andric extern kmp_hw_subset_t *__kmp_hw_subset;
10470b57cec5SDimitry Andric 
10480b57cec5SDimitry Andric /* A structure for holding machine-specific hierarchy info to be computed once
10490b57cec5SDimitry Andric    at init. This structure represents a mapping of threads to the actual machine
10500b57cec5SDimitry Andric    hierarchy, or to our best guess at what the hierarchy might be, for the
10510b57cec5SDimitry Andric    purpose of performing an efficient barrier. In the worst case, when there is
10520b57cec5SDimitry Andric    no machine hierarchy information, it produces a tree suitable for a barrier,
10530b57cec5SDimitry Andric    similar to the tree used in the hyper barrier. */
10540b57cec5SDimitry Andric class hierarchy_info {
10550b57cec5SDimitry Andric public:
10560b57cec5SDimitry Andric   /* Good default values for number of leaves and branching factor, given no
10570b57cec5SDimitry Andric      affinity information. Behaves a bit like hyper barrier. */
10580b57cec5SDimitry Andric   static const kmp_uint32 maxLeaves = 4;
10590b57cec5SDimitry Andric   static const kmp_uint32 minBranch = 4;
10600b57cec5SDimitry Andric   /** Number of levels in the hierarchy. Typical levels are threads/core,
10610b57cec5SDimitry Andric       cores/package or socket, packages/node, nodes/machine, etc. We don't want
10620b57cec5SDimitry Andric       to get specific with nomenclature. When the machine is oversubscribed we
10630b57cec5SDimitry Andric       add levels to duplicate the hierarchy, doubling the thread capacity of the
10640b57cec5SDimitry Andric       hierarchy each time we add a level. */
10650b57cec5SDimitry Andric   kmp_uint32 maxLevels;
10660b57cec5SDimitry Andric 
10670b57cec5SDimitry Andric   /** This is specifically the depth of the machine configuration hierarchy, in
10680b57cec5SDimitry Andric       terms of the number of levels along the longest path from root to any
10690b57cec5SDimitry Andric       leaf. It corresponds to the number of entries in numPerLevel if we exclude
10700b57cec5SDimitry Andric       all but one trailing 1. */
10710b57cec5SDimitry Andric   kmp_uint32 depth;
10720b57cec5SDimitry Andric   kmp_uint32 base_num_threads;
10730b57cec5SDimitry Andric   enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
10740b57cec5SDimitry Andric   volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
10750b57cec5SDimitry Andric   // 2=initialization in progress
10760b57cec5SDimitry Andric   volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
10770b57cec5SDimitry Andric 
10780b57cec5SDimitry Andric   /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
10790b57cec5SDimitry Andric       the parent of a node at level i has. For example, if we have a machine
10800b57cec5SDimitry Andric       with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
10810b57cec5SDimitry Andric       {2, 4, 4, 1, 1}. All empty levels are set to 1. */
10820b57cec5SDimitry Andric   kmp_uint32 *numPerLevel;
10830b57cec5SDimitry Andric   kmp_uint32 *skipPerLevel;
10840b57cec5SDimitry Andric 
1085fe6060f1SDimitry Andric   void deriveLevels() {
1086fe6060f1SDimitry Andric     int hier_depth = __kmp_topology->get_depth();
1087fe6060f1SDimitry Andric     for (int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) {
1088fe6060f1SDimitry Andric       numPerLevel[level] = __kmp_topology->get_ratio(i);
10890b57cec5SDimitry Andric     }
10900b57cec5SDimitry Andric   }
10910b57cec5SDimitry Andric 
10920b57cec5SDimitry Andric   hierarchy_info()
10930b57cec5SDimitry Andric       : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
10940b57cec5SDimitry Andric 
10950b57cec5SDimitry Andric   void fini() {
10960b57cec5SDimitry Andric     if (!uninitialized && numPerLevel) {
10970b57cec5SDimitry Andric       __kmp_free(numPerLevel);
10980b57cec5SDimitry Andric       numPerLevel = NULL;
10990b57cec5SDimitry Andric       uninitialized = not_initialized;
11000b57cec5SDimitry Andric     }
11010b57cec5SDimitry Andric   }
11020b57cec5SDimitry Andric 
1103fe6060f1SDimitry Andric   void init(int num_addrs) {
11040b57cec5SDimitry Andric     kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
11050b57cec5SDimitry Andric         &uninitialized, not_initialized, initializing);
11060b57cec5SDimitry Andric     if (bool_result == 0) { // Wait for initialization
11070b57cec5SDimitry Andric       while (TCR_1(uninitialized) != initialized)
11080b57cec5SDimitry Andric         KMP_CPU_PAUSE();
11090b57cec5SDimitry Andric       return;
11100b57cec5SDimitry Andric     }
11110b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(bool_result == 1);
11120b57cec5SDimitry Andric 
11130b57cec5SDimitry Andric     /* Added explicit initialization of the data fields here to prevent usage of
11140b57cec5SDimitry Andric        dirty value observed when static library is re-initialized multiple times
11150b57cec5SDimitry Andric        (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
11160b57cec5SDimitry Andric        OpenMP). */
11170b57cec5SDimitry Andric     depth = 1;
11180b57cec5SDimitry Andric     resizing = 0;
11190b57cec5SDimitry Andric     maxLevels = 7;
11200b57cec5SDimitry Andric     numPerLevel =
11210b57cec5SDimitry Andric         (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
11220b57cec5SDimitry Andric     skipPerLevel = &(numPerLevel[maxLevels]);
11230b57cec5SDimitry Andric     for (kmp_uint32 i = 0; i < maxLevels;
11240b57cec5SDimitry Andric          ++i) { // init numPerLevel[*] to 1 item per level
11250b57cec5SDimitry Andric       numPerLevel[i] = 1;
11260b57cec5SDimitry Andric       skipPerLevel[i] = 1;
11270b57cec5SDimitry Andric     }
11280b57cec5SDimitry Andric 
11290b57cec5SDimitry Andric     // Sort table by physical ID
1130fe6060f1SDimitry Andric     if (__kmp_topology && __kmp_topology->get_depth() > 0) {
1131fe6060f1SDimitry Andric       deriveLevels();
11320b57cec5SDimitry Andric     } else {
11330b57cec5SDimitry Andric       numPerLevel[0] = maxLeaves;
11340b57cec5SDimitry Andric       numPerLevel[1] = num_addrs / maxLeaves;
11350b57cec5SDimitry Andric       if (num_addrs % maxLeaves)
11360b57cec5SDimitry Andric         numPerLevel[1]++;
11370b57cec5SDimitry Andric     }
11380b57cec5SDimitry Andric 
11390b57cec5SDimitry Andric     base_num_threads = num_addrs;
11400b57cec5SDimitry Andric     for (int i = maxLevels - 1; i >= 0;
11410b57cec5SDimitry Andric          --i) // count non-empty levels to get depth
11420b57cec5SDimitry Andric       if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
11430b57cec5SDimitry Andric         depth++;
11440b57cec5SDimitry Andric 
11450b57cec5SDimitry Andric     kmp_uint32 branch = minBranch;
11460b57cec5SDimitry Andric     if (numPerLevel[0] == 1)
11470b57cec5SDimitry Andric       branch = num_addrs / maxLeaves;
11480b57cec5SDimitry Andric     if (branch < minBranch)
11490b57cec5SDimitry Andric       branch = minBranch;
11500b57cec5SDimitry Andric     for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
11510b57cec5SDimitry Andric       while (numPerLevel[d] > branch ||
11520b57cec5SDimitry Andric              (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
11530b57cec5SDimitry Andric         if (numPerLevel[d] & 1)
11540b57cec5SDimitry Andric           numPerLevel[d]++;
11550b57cec5SDimitry Andric         numPerLevel[d] = numPerLevel[d] >> 1;
11560b57cec5SDimitry Andric         if (numPerLevel[d + 1] == 1)
11570b57cec5SDimitry Andric           depth++;
11580b57cec5SDimitry Andric         numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
11590b57cec5SDimitry Andric       }
11600b57cec5SDimitry Andric       if (numPerLevel[0] == 1) {
11610b57cec5SDimitry Andric         branch = branch >> 1;
11620b57cec5SDimitry Andric         if (branch < 4)
11630b57cec5SDimitry Andric           branch = minBranch;
11640b57cec5SDimitry Andric       }
11650b57cec5SDimitry Andric     }
11660b57cec5SDimitry Andric 
11670b57cec5SDimitry Andric     for (kmp_uint32 i = 1; i < depth; ++i)
11680b57cec5SDimitry Andric       skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
11690b57cec5SDimitry Andric     // Fill in hierarchy in the case of oversubscription
11700b57cec5SDimitry Andric     for (kmp_uint32 i = depth; i < maxLevels; ++i)
11710b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
11720b57cec5SDimitry Andric 
11730b57cec5SDimitry Andric     uninitialized = initialized; // One writer
11740b57cec5SDimitry Andric   }
11750b57cec5SDimitry Andric 
11760b57cec5SDimitry Andric   // Resize the hierarchy if nproc changes to something larger than before
11770b57cec5SDimitry Andric   void resize(kmp_uint32 nproc) {
11780b57cec5SDimitry Andric     kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
11790b57cec5SDimitry Andric     while (bool_result == 0) { // someone else is trying to resize
11800b57cec5SDimitry Andric       KMP_CPU_PAUSE();
11810b57cec5SDimitry Andric       if (nproc <= base_num_threads) // happy with other thread's resize
11820b57cec5SDimitry Andric         return;
11830b57cec5SDimitry Andric       else // try to resize
11840b57cec5SDimitry Andric         bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
11850b57cec5SDimitry Andric     }
11860b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(bool_result != 0);
11870b57cec5SDimitry Andric     if (nproc <= base_num_threads)
11880b57cec5SDimitry Andric       return; // happy with other thread's resize
11890b57cec5SDimitry Andric 
11900b57cec5SDimitry Andric     // Calculate new maxLevels
11910b57cec5SDimitry Andric     kmp_uint32 old_sz = skipPerLevel[depth - 1];
11920b57cec5SDimitry Andric     kmp_uint32 incs = 0, old_maxLevels = maxLevels;
11930b57cec5SDimitry Andric     // First see if old maxLevels is enough to contain new size
11940b57cec5SDimitry Andric     for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
11950b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
11960b57cec5SDimitry Andric       numPerLevel[i - 1] *= 2;
11970b57cec5SDimitry Andric       old_sz *= 2;
11980b57cec5SDimitry Andric       depth++;
11990b57cec5SDimitry Andric     }
12000b57cec5SDimitry Andric     if (nproc > old_sz) { // Not enough space, need to expand hierarchy
12010b57cec5SDimitry Andric       while (nproc > old_sz) {
12020b57cec5SDimitry Andric         old_sz *= 2;
12030b57cec5SDimitry Andric         incs++;
12040b57cec5SDimitry Andric         depth++;
12050b57cec5SDimitry Andric       }
12060b57cec5SDimitry Andric       maxLevels += incs;
12070b57cec5SDimitry Andric 
12080b57cec5SDimitry Andric       // Resize arrays
12090b57cec5SDimitry Andric       kmp_uint32 *old_numPerLevel = numPerLevel;
12100b57cec5SDimitry Andric       kmp_uint32 *old_skipPerLevel = skipPerLevel;
12110b57cec5SDimitry Andric       numPerLevel = skipPerLevel = NULL;
12120b57cec5SDimitry Andric       numPerLevel =
12130b57cec5SDimitry Andric           (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
12140b57cec5SDimitry Andric       skipPerLevel = &(numPerLevel[maxLevels]);
12150b57cec5SDimitry Andric 
12160b57cec5SDimitry Andric       // Copy old elements from old arrays
1217e8d8bef9SDimitry Andric       for (kmp_uint32 i = 0; i < old_maxLevels; ++i) {
1218e8d8bef9SDimitry Andric         // init numPerLevel[*] to 1 item per level
12190b57cec5SDimitry Andric         numPerLevel[i] = old_numPerLevel[i];
12200b57cec5SDimitry Andric         skipPerLevel[i] = old_skipPerLevel[i];
12210b57cec5SDimitry Andric       }
12220b57cec5SDimitry Andric 
12230b57cec5SDimitry Andric       // Init new elements in arrays to 1
1224e8d8bef9SDimitry Andric       for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) {
1225e8d8bef9SDimitry Andric         // init numPerLevel[*] to 1 item per level
12260b57cec5SDimitry Andric         numPerLevel[i] = 1;
12270b57cec5SDimitry Andric         skipPerLevel[i] = 1;
12280b57cec5SDimitry Andric       }
12290b57cec5SDimitry Andric 
12300b57cec5SDimitry Andric       // Free old arrays
12310b57cec5SDimitry Andric       __kmp_free(old_numPerLevel);
12320b57cec5SDimitry Andric     }
12330b57cec5SDimitry Andric 
12340b57cec5SDimitry Andric     // Fill in oversubscription levels of hierarchy
12350b57cec5SDimitry Andric     for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
12360b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
12370b57cec5SDimitry Andric 
12380b57cec5SDimitry Andric     base_num_threads = nproc;
12390b57cec5SDimitry Andric     resizing = 0; // One writer
12400b57cec5SDimitry Andric   }
12410b57cec5SDimitry Andric };
12420b57cec5SDimitry Andric #endif // KMP_AFFINITY_H
1243