xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_affinity.h (revision 0eae32dcef82f6f06de6419a0d623d7def0cc8f6)
10b57cec5SDimitry Andric /*
20b57cec5SDimitry Andric  * kmp_affinity.h -- header for affinity management
30b57cec5SDimitry Andric  */
40b57cec5SDimitry Andric 
50b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #ifndef KMP_AFFINITY_H
140b57cec5SDimitry Andric #define KMP_AFFINITY_H
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include "kmp.h"
170b57cec5SDimitry Andric #include "kmp_os.h"
18*0eae32dcSDimitry Andric #include <limits>
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
210b57cec5SDimitry Andric #if KMP_USE_HWLOC
220b57cec5SDimitry Andric class KMPHwlocAffinity : public KMPAffinity {
230b57cec5SDimitry Andric public:
240b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
250b57cec5SDimitry Andric     hwloc_cpuset_t mask;
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric   public:
280b57cec5SDimitry Andric     Mask() {
290b57cec5SDimitry Andric       mask = hwloc_bitmap_alloc();
300b57cec5SDimitry Andric       this->zero();
310b57cec5SDimitry Andric     }
320b57cec5SDimitry Andric     ~Mask() { hwloc_bitmap_free(mask); }
330b57cec5SDimitry Andric     void set(int i) override { hwloc_bitmap_set(mask, i); }
340b57cec5SDimitry Andric     bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
350b57cec5SDimitry Andric     void clear(int i) override { hwloc_bitmap_clr(mask, i); }
360b57cec5SDimitry Andric     void zero() override { hwloc_bitmap_zero(mask); }
370b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
380b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
390b57cec5SDimitry Andric       hwloc_bitmap_copy(mask, convert->mask);
400b57cec5SDimitry Andric     }
410b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
420b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
430b57cec5SDimitry Andric       hwloc_bitmap_and(mask, mask, convert->mask);
440b57cec5SDimitry Andric     }
450b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
460b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
470b57cec5SDimitry Andric       hwloc_bitmap_or(mask, mask, convert->mask);
480b57cec5SDimitry Andric     }
490b57cec5SDimitry Andric     void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
500b57cec5SDimitry Andric     int begin() const override { return hwloc_bitmap_first(mask); }
510b57cec5SDimitry Andric     int end() const override { return -1; }
520b57cec5SDimitry Andric     int next(int previous) const override {
530b57cec5SDimitry Andric       return hwloc_bitmap_next(mask, previous);
540b57cec5SDimitry Andric     }
550b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
560b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
570b57cec5SDimitry Andric                   "Illegal get affinity operation when not capable");
58e8d8bef9SDimitry Andric       long retval =
590b57cec5SDimitry Andric           hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
600b57cec5SDimitry Andric       if (retval >= 0) {
610b57cec5SDimitry Andric         return 0;
620b57cec5SDimitry Andric       }
630b57cec5SDimitry Andric       int error = errno;
640b57cec5SDimitry Andric       if (abort_on_error) {
650b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
660b57cec5SDimitry Andric       }
670b57cec5SDimitry Andric       return error;
680b57cec5SDimitry Andric     }
690b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
700b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
71e8d8bef9SDimitry Andric                   "Illegal set affinity operation when not capable");
72e8d8bef9SDimitry Andric       long retval =
730b57cec5SDimitry Andric           hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
740b57cec5SDimitry Andric       if (retval >= 0) {
750b57cec5SDimitry Andric         return 0;
760b57cec5SDimitry Andric       }
770b57cec5SDimitry Andric       int error = errno;
780b57cec5SDimitry Andric       if (abort_on_error) {
790b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
800b57cec5SDimitry Andric       }
810b57cec5SDimitry Andric       return error;
820b57cec5SDimitry Andric     }
83e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS
84e8d8bef9SDimitry Andric     int set_process_affinity(bool abort_on_error) const override {
85e8d8bef9SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
86e8d8bef9SDimitry Andric                   "Illegal set process affinity operation when not capable");
87e8d8bef9SDimitry Andric       int error = 0;
88e8d8bef9SDimitry Andric       const hwloc_topology_support *support =
89e8d8bef9SDimitry Andric           hwloc_topology_get_support(__kmp_hwloc_topology);
90e8d8bef9SDimitry Andric       if (support->cpubind->set_proc_cpubind) {
91e8d8bef9SDimitry Andric         int retval;
92e8d8bef9SDimitry Andric         retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask,
93e8d8bef9SDimitry Andric                                    HWLOC_CPUBIND_PROCESS);
94e8d8bef9SDimitry Andric         if (retval >= 0)
95e8d8bef9SDimitry Andric           return 0;
96e8d8bef9SDimitry Andric         error = errno;
97e8d8bef9SDimitry Andric         if (abort_on_error)
98e8d8bef9SDimitry Andric           __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
99e8d8bef9SDimitry Andric       }
100e8d8bef9SDimitry Andric       return error;
101e8d8bef9SDimitry Andric     }
102e8d8bef9SDimitry Andric #endif
1030b57cec5SDimitry Andric     int get_proc_group() const override {
1040b57cec5SDimitry Andric       int group = -1;
1050b57cec5SDimitry Andric #if KMP_OS_WINDOWS
1060b57cec5SDimitry Andric       if (__kmp_num_proc_groups == 1) {
1070b57cec5SDimitry Andric         return 1;
1080b57cec5SDimitry Andric       }
1090b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; i++) {
1100b57cec5SDimitry Andric         // On windows, the long type is always 32 bits
1110b57cec5SDimitry Andric         unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
1120b57cec5SDimitry Andric         unsigned long second_32_bits =
1130b57cec5SDimitry Andric             hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
1140b57cec5SDimitry Andric         if (first_32_bits == 0 && second_32_bits == 0) {
1150b57cec5SDimitry Andric           continue;
1160b57cec5SDimitry Andric         }
1170b57cec5SDimitry Andric         if (group >= 0) {
1180b57cec5SDimitry Andric           return -1;
1190b57cec5SDimitry Andric         }
1200b57cec5SDimitry Andric         group = i;
1210b57cec5SDimitry Andric       }
1220b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
1230b57cec5SDimitry Andric       return group;
1240b57cec5SDimitry Andric     }
1250b57cec5SDimitry Andric   };
1260b57cec5SDimitry Andric   void determine_capable(const char *var) override {
1270b57cec5SDimitry Andric     const hwloc_topology_support *topology_support;
1280b57cec5SDimitry Andric     if (__kmp_hwloc_topology == NULL) {
1290b57cec5SDimitry Andric       if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
1300b57cec5SDimitry Andric         __kmp_hwloc_error = TRUE;
1310b57cec5SDimitry Andric         if (__kmp_affinity_verbose)
1320b57cec5SDimitry Andric           KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
1330b57cec5SDimitry Andric       }
1340b57cec5SDimitry Andric       if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
1350b57cec5SDimitry Andric         __kmp_hwloc_error = TRUE;
1360b57cec5SDimitry Andric         if (__kmp_affinity_verbose)
1370b57cec5SDimitry Andric           KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
1380b57cec5SDimitry Andric       }
1390b57cec5SDimitry Andric     }
1400b57cec5SDimitry Andric     topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
1410b57cec5SDimitry Andric     // Is the system capable of setting/getting this thread's affinity?
1420b57cec5SDimitry Andric     // Also, is topology discovery possible? (pu indicates ability to discover
1430b57cec5SDimitry Andric     // processing units). And finally, were there no errors when calling any
1440b57cec5SDimitry Andric     // hwloc_* API functions?
1450b57cec5SDimitry Andric     if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
1460b57cec5SDimitry Andric         topology_support->cpubind->get_thisthread_cpubind &&
1470b57cec5SDimitry Andric         topology_support->discovery->pu && !__kmp_hwloc_error) {
1480b57cec5SDimitry Andric       // enables affinity according to KMP_AFFINITY_CAPABLE() macro
1490b57cec5SDimitry Andric       KMP_AFFINITY_ENABLE(TRUE);
1500b57cec5SDimitry Andric     } else {
1510b57cec5SDimitry Andric       // indicate that hwloc didn't work and disable affinity
1520b57cec5SDimitry Andric       __kmp_hwloc_error = TRUE;
1530b57cec5SDimitry Andric       KMP_AFFINITY_DISABLE();
1540b57cec5SDimitry Andric     }
1550b57cec5SDimitry Andric   }
1560b57cec5SDimitry Andric   void bind_thread(int which) override {
1570b57cec5SDimitry Andric     KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
1580b57cec5SDimitry Andric                 "Illegal set affinity operation when not capable");
1590b57cec5SDimitry Andric     KMPAffinity::Mask *mask;
1600b57cec5SDimitry Andric     KMP_CPU_ALLOC_ON_STACK(mask);
1610b57cec5SDimitry Andric     KMP_CPU_ZERO(mask);
1620b57cec5SDimitry Andric     KMP_CPU_SET(which, mask);
1630b57cec5SDimitry Andric     __kmp_set_system_affinity(mask, TRUE);
1640b57cec5SDimitry Andric     KMP_CPU_FREE_FROM_STACK(mask);
1650b57cec5SDimitry Andric   }
1660b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
1670b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
1680b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
1690b57cec5SDimitry Andric     return new Mask[num];
1700b57cec5SDimitry Andric   }
1710b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
1720b57cec5SDimitry Andric     Mask *hwloc_array = static_cast<Mask *>(array);
1730b57cec5SDimitry Andric     delete[] hwloc_array;
1740b57cec5SDimitry Andric   }
1750b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
1760b57cec5SDimitry Andric                                       int index) override {
1770b57cec5SDimitry Andric     Mask *hwloc_array = static_cast<Mask *>(array);
1780b57cec5SDimitry Andric     return &(hwloc_array[index]);
1790b57cec5SDimitry Andric   }
1800b57cec5SDimitry Andric   api_type get_api_type() const override { return HWLOC; }
1810b57cec5SDimitry Andric };
1820b57cec5SDimitry Andric #endif /* KMP_USE_HWLOC */
1830b57cec5SDimitry Andric 
184489b1cf2SDimitry Andric #if KMP_OS_LINUX || KMP_OS_FREEBSD
1850b57cec5SDimitry Andric #if KMP_OS_LINUX
1860b57cec5SDimitry Andric /* On some of the older OS's that we build on, these constants aren't present
1870b57cec5SDimitry Andric    in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
1880b57cec5SDimitry Andric    all systems of the same arch where they are defined, and they cannot change.
1890b57cec5SDimitry Andric    stone forever. */
1900b57cec5SDimitry Andric #include <sys/syscall.h>
1910b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_ARM
1920b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
1930b57cec5SDimitry Andric #define __NR_sched_setaffinity 241
1940b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 241
1950b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
1960b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
1970b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
1980b57cec5SDimitry Andric #define __NR_sched_getaffinity 242
1990b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 242
2000b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2010b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2020b57cec5SDimitry Andric #elif KMP_ARCH_AARCH64
2030b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2040b57cec5SDimitry Andric #define __NR_sched_setaffinity 122
2050b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 122
2060b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2070b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2080b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2090b57cec5SDimitry Andric #define __NR_sched_getaffinity 123
2100b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 123
2110b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2120b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2130b57cec5SDimitry Andric #elif KMP_ARCH_X86_64
2140b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2150b57cec5SDimitry Andric #define __NR_sched_setaffinity 203
2160b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 203
2170b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2180b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2190b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2200b57cec5SDimitry Andric #define __NR_sched_getaffinity 204
2210b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 204
2220b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2230b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2240b57cec5SDimitry Andric #elif KMP_ARCH_PPC64
2250b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2260b57cec5SDimitry Andric #define __NR_sched_setaffinity 222
2270b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 222
2280b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2290b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2300b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2310b57cec5SDimitry Andric #define __NR_sched_getaffinity 223
2320b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 223
2330b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2340b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2350b57cec5SDimitry Andric #elif KMP_ARCH_MIPS
2360b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2370b57cec5SDimitry Andric #define __NR_sched_setaffinity 4239
2380b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 4239
2390b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2400b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2410b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2420b57cec5SDimitry Andric #define __NR_sched_getaffinity 4240
2430b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 4240
2440b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2450b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2460b57cec5SDimitry Andric #elif KMP_ARCH_MIPS64
2470b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2480b57cec5SDimitry Andric #define __NR_sched_setaffinity 5195
2490b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 5195
2500b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2510b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2520b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2530b57cec5SDimitry Andric #define __NR_sched_getaffinity 5196
2540b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 5196
2550b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2560b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2570b57cec5SDimitry Andric #error Unknown or unsupported architecture
2580b57cec5SDimitry Andric #endif /* KMP_ARCH_* */
259489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD
260489b1cf2SDimitry Andric #include <pthread.h>
261489b1cf2SDimitry Andric #include <pthread_np.h>
262489b1cf2SDimitry Andric #endif
2630b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity {
2640b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
265e8d8bef9SDimitry Andric     typedef unsigned long mask_t;
266e8d8bef9SDimitry Andric     typedef decltype(__kmp_affin_mask_size) mask_size_type;
267e8d8bef9SDimitry Andric     static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
268e8d8bef9SDimitry Andric     static const mask_t ONE = 1;
269e8d8bef9SDimitry Andric     mask_size_type get_num_mask_types() const {
270e8d8bef9SDimitry Andric       return __kmp_affin_mask_size / sizeof(mask_t);
271e8d8bef9SDimitry Andric     }
2720b57cec5SDimitry Andric 
2730b57cec5SDimitry Andric   public:
2740b57cec5SDimitry Andric     mask_t *mask;
2750b57cec5SDimitry Andric     Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
2760b57cec5SDimitry Andric     ~Mask() {
2770b57cec5SDimitry Andric       if (mask)
2780b57cec5SDimitry Andric         __kmp_free(mask);
2790b57cec5SDimitry Andric     }
2800b57cec5SDimitry Andric     void set(int i) override {
281e8d8bef9SDimitry Andric       mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T));
2820b57cec5SDimitry Andric     }
2830b57cec5SDimitry Andric     bool is_set(int i) const override {
284e8d8bef9SDimitry Andric       return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T)));
2850b57cec5SDimitry Andric     }
2860b57cec5SDimitry Andric     void clear(int i) override {
287e8d8bef9SDimitry Andric       mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T));
2880b57cec5SDimitry Andric     }
2890b57cec5SDimitry Andric     void zero() override {
290e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
291e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
292e8d8bef9SDimitry Andric         mask[i] = (mask_t)0;
2930b57cec5SDimitry Andric     }
2940b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
2950b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
296e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
297e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
2980b57cec5SDimitry Andric         mask[i] = convert->mask[i];
2990b57cec5SDimitry Andric     }
3000b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
3010b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
302e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
303e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
3040b57cec5SDimitry Andric         mask[i] &= convert->mask[i];
3050b57cec5SDimitry Andric     }
3060b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
3070b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
308e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
309e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
3100b57cec5SDimitry Andric         mask[i] |= convert->mask[i];
3110b57cec5SDimitry Andric     }
3120b57cec5SDimitry Andric     void bitwise_not() override {
313e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
314e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
3150b57cec5SDimitry Andric         mask[i] = ~(mask[i]);
3160b57cec5SDimitry Andric     }
3170b57cec5SDimitry Andric     int begin() const override {
3180b57cec5SDimitry Andric       int retval = 0;
3190b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
3200b57cec5SDimitry Andric         ++retval;
3210b57cec5SDimitry Andric       return retval;
3220b57cec5SDimitry Andric     }
323e8d8bef9SDimitry Andric     int end() const override {
324e8d8bef9SDimitry Andric       int e;
325e8d8bef9SDimitry Andric       __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e);
326e8d8bef9SDimitry Andric       return e;
327e8d8bef9SDimitry Andric     }
3280b57cec5SDimitry Andric     int next(int previous) const override {
3290b57cec5SDimitry Andric       int retval = previous + 1;
3300b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
3310b57cec5SDimitry Andric         ++retval;
3320b57cec5SDimitry Andric       return retval;
3330b57cec5SDimitry Andric     }
3340b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
3350b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
3360b57cec5SDimitry Andric                   "Illegal get affinity operation when not capable");
337489b1cf2SDimitry Andric #if KMP_OS_LINUX
338e8d8bef9SDimitry Andric       long retval =
3390b57cec5SDimitry Andric           syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
340489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD
341fe6060f1SDimitry Andric       int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size,
342fe6060f1SDimitry Andric                                      reinterpret_cast<cpuset_t *>(mask));
3435ffd83dbSDimitry Andric       int retval = (r == 0 ? 0 : -1);
344489b1cf2SDimitry Andric #endif
3450b57cec5SDimitry Andric       if (retval >= 0) {
3460b57cec5SDimitry Andric         return 0;
3470b57cec5SDimitry Andric       }
3480b57cec5SDimitry Andric       int error = errno;
3490b57cec5SDimitry Andric       if (abort_on_error) {
3500b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
3510b57cec5SDimitry Andric       }
3520b57cec5SDimitry Andric       return error;
3530b57cec5SDimitry Andric     }
3540b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
3550b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
356e8d8bef9SDimitry Andric                   "Illegal set affinity operation when not capable");
357489b1cf2SDimitry Andric #if KMP_OS_LINUX
358e8d8bef9SDimitry Andric       long retval =
3590b57cec5SDimitry Andric           syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
360489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD
361fe6060f1SDimitry Andric       int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size,
362fe6060f1SDimitry Andric                                      reinterpret_cast<cpuset_t *>(mask));
3635ffd83dbSDimitry Andric       int retval = (r == 0 ? 0 : -1);
364489b1cf2SDimitry Andric #endif
3650b57cec5SDimitry Andric       if (retval >= 0) {
3660b57cec5SDimitry Andric         return 0;
3670b57cec5SDimitry Andric       }
3680b57cec5SDimitry Andric       int error = errno;
3690b57cec5SDimitry Andric       if (abort_on_error) {
3700b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
3710b57cec5SDimitry Andric       }
3720b57cec5SDimitry Andric       return error;
3730b57cec5SDimitry Andric     }
3740b57cec5SDimitry Andric   };
3750b57cec5SDimitry Andric   void determine_capable(const char *env_var) override {
3760b57cec5SDimitry Andric     __kmp_affinity_determine_capable(env_var);
3770b57cec5SDimitry Andric   }
3780b57cec5SDimitry Andric   void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
3790b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override {
3800b57cec5SDimitry Andric     KMPNativeAffinity::Mask *retval = new Mask();
3810b57cec5SDimitry Andric     return retval;
3820b57cec5SDimitry Andric   }
3830b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override {
3840b57cec5SDimitry Andric     KMPNativeAffinity::Mask *native_mask =
3850b57cec5SDimitry Andric         static_cast<KMPNativeAffinity::Mask *>(m);
3860b57cec5SDimitry Andric     delete native_mask;
3870b57cec5SDimitry Andric   }
3880b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
3890b57cec5SDimitry Andric     return new Mask[num];
3900b57cec5SDimitry Andric   }
3910b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
3920b57cec5SDimitry Andric     Mask *linux_array = static_cast<Mask *>(array);
3930b57cec5SDimitry Andric     delete[] linux_array;
3940b57cec5SDimitry Andric   }
3950b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
3960b57cec5SDimitry Andric                                       int index) override {
3970b57cec5SDimitry Andric     Mask *linux_array = static_cast<Mask *>(array);
3980b57cec5SDimitry Andric     return &(linux_array[index]);
3990b57cec5SDimitry Andric   }
4000b57cec5SDimitry Andric   api_type get_api_type() const override { return NATIVE_OS; }
4010b57cec5SDimitry Andric };
402489b1cf2SDimitry Andric #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
4030b57cec5SDimitry Andric 
4040b57cec5SDimitry Andric #if KMP_OS_WINDOWS
4050b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity {
4060b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
4070b57cec5SDimitry Andric     typedef ULONG_PTR mask_t;
4080b57cec5SDimitry Andric     static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
4090b57cec5SDimitry Andric     mask_t *mask;
4100b57cec5SDimitry Andric 
4110b57cec5SDimitry Andric   public:
4120b57cec5SDimitry Andric     Mask() {
4130b57cec5SDimitry Andric       mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
4140b57cec5SDimitry Andric     }
4150b57cec5SDimitry Andric     ~Mask() {
4160b57cec5SDimitry Andric       if (mask)
4170b57cec5SDimitry Andric         __kmp_free(mask);
4180b57cec5SDimitry Andric     }
4190b57cec5SDimitry Andric     void set(int i) override {
4200b57cec5SDimitry Andric       mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
4210b57cec5SDimitry Andric     }
4220b57cec5SDimitry Andric     bool is_set(int i) const override {
4230b57cec5SDimitry Andric       return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
4240b57cec5SDimitry Andric     }
4250b57cec5SDimitry Andric     void clear(int i) override {
4260b57cec5SDimitry Andric       mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
4270b57cec5SDimitry Andric     }
4280b57cec5SDimitry Andric     void zero() override {
4290b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4300b57cec5SDimitry Andric         mask[i] = 0;
4310b57cec5SDimitry Andric     }
4320b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
4330b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
4340b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4350b57cec5SDimitry Andric         mask[i] = convert->mask[i];
4360b57cec5SDimitry Andric     }
4370b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
4380b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
4390b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4400b57cec5SDimitry Andric         mask[i] &= convert->mask[i];
4410b57cec5SDimitry Andric     }
4420b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
4430b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
4440b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4450b57cec5SDimitry Andric         mask[i] |= convert->mask[i];
4460b57cec5SDimitry Andric     }
4470b57cec5SDimitry Andric     void bitwise_not() override {
4480b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4490b57cec5SDimitry Andric         mask[i] = ~(mask[i]);
4500b57cec5SDimitry Andric     }
4510b57cec5SDimitry Andric     int begin() const override {
4520b57cec5SDimitry Andric       int retval = 0;
4530b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
4540b57cec5SDimitry Andric         ++retval;
4550b57cec5SDimitry Andric       return retval;
4560b57cec5SDimitry Andric     }
4570b57cec5SDimitry Andric     int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
4580b57cec5SDimitry Andric     int next(int previous) const override {
4590b57cec5SDimitry Andric       int retval = previous + 1;
4600b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
4610b57cec5SDimitry Andric         ++retval;
4620b57cec5SDimitry Andric       return retval;
4630b57cec5SDimitry Andric     }
464e8d8bef9SDimitry Andric     int set_process_affinity(bool abort_on_error) const override {
465e8d8bef9SDimitry Andric       if (__kmp_num_proc_groups <= 1) {
466e8d8bef9SDimitry Andric         if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) {
467e8d8bef9SDimitry Andric           DWORD error = GetLastError();
468e8d8bef9SDimitry Andric           if (abort_on_error) {
469e8d8bef9SDimitry Andric             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
470e8d8bef9SDimitry Andric                         __kmp_msg_null);
471e8d8bef9SDimitry Andric           }
472e8d8bef9SDimitry Andric           return error;
473e8d8bef9SDimitry Andric         }
474e8d8bef9SDimitry Andric       }
475e8d8bef9SDimitry Andric       return 0;
476e8d8bef9SDimitry Andric     }
4770b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
4780b57cec5SDimitry Andric       if (__kmp_num_proc_groups > 1) {
4790b57cec5SDimitry Andric         // Check for a valid mask.
4800b57cec5SDimitry Andric         GROUP_AFFINITY ga;
4810b57cec5SDimitry Andric         int group = get_proc_group();
4820b57cec5SDimitry Andric         if (group < 0) {
4830b57cec5SDimitry Andric           if (abort_on_error) {
4840b57cec5SDimitry Andric             KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4850b57cec5SDimitry Andric           }
4860b57cec5SDimitry Andric           return -1;
4870b57cec5SDimitry Andric         }
4880b57cec5SDimitry Andric         // Transform the bit vector into a GROUP_AFFINITY struct
4890b57cec5SDimitry Andric         // and make the system call to set affinity.
4900b57cec5SDimitry Andric         ga.Group = group;
4910b57cec5SDimitry Andric         ga.Mask = mask[group];
4920b57cec5SDimitry Andric         ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
4930b57cec5SDimitry Andric 
4940b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
4950b57cec5SDimitry Andric         if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
4960b57cec5SDimitry Andric           DWORD error = GetLastError();
4970b57cec5SDimitry Andric           if (abort_on_error) {
4980b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
4990b57cec5SDimitry Andric                         __kmp_msg_null);
5000b57cec5SDimitry Andric           }
5010b57cec5SDimitry Andric           return error;
5020b57cec5SDimitry Andric         }
5030b57cec5SDimitry Andric       } else {
5040b57cec5SDimitry Andric         if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
5050b57cec5SDimitry Andric           DWORD error = GetLastError();
5060b57cec5SDimitry Andric           if (abort_on_error) {
5070b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
5080b57cec5SDimitry Andric                         __kmp_msg_null);
5090b57cec5SDimitry Andric           }
5100b57cec5SDimitry Andric           return error;
5110b57cec5SDimitry Andric         }
5120b57cec5SDimitry Andric       }
5130b57cec5SDimitry Andric       return 0;
5140b57cec5SDimitry Andric     }
5150b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
5160b57cec5SDimitry Andric       if (__kmp_num_proc_groups > 1) {
5170b57cec5SDimitry Andric         this->zero();
5180b57cec5SDimitry Andric         GROUP_AFFINITY ga;
5190b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
5200b57cec5SDimitry Andric         if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
5210b57cec5SDimitry Andric           DWORD error = GetLastError();
5220b57cec5SDimitry Andric           if (abort_on_error) {
5230b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
5240b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
5250b57cec5SDimitry Andric           }
5260b57cec5SDimitry Andric           return error;
5270b57cec5SDimitry Andric         }
5280b57cec5SDimitry Andric         if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
5290b57cec5SDimitry Andric             (ga.Mask == 0)) {
5300b57cec5SDimitry Andric           return -1;
5310b57cec5SDimitry Andric         }
5320b57cec5SDimitry Andric         mask[ga.Group] = ga.Mask;
5330b57cec5SDimitry Andric       } else {
5340b57cec5SDimitry Andric         mask_t newMask, sysMask, retval;
5350b57cec5SDimitry Andric         if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
5360b57cec5SDimitry Andric           DWORD error = GetLastError();
5370b57cec5SDimitry Andric           if (abort_on_error) {
5380b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
5390b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
5400b57cec5SDimitry Andric           }
5410b57cec5SDimitry Andric           return error;
5420b57cec5SDimitry Andric         }
5430b57cec5SDimitry Andric         retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
5440b57cec5SDimitry Andric         if (!retval) {
5450b57cec5SDimitry Andric           DWORD error = GetLastError();
5460b57cec5SDimitry Andric           if (abort_on_error) {
5470b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
5480b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
5490b57cec5SDimitry Andric           }
5500b57cec5SDimitry Andric           return error;
5510b57cec5SDimitry Andric         }
5520b57cec5SDimitry Andric         newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
5530b57cec5SDimitry Andric         if (!newMask) {
5540b57cec5SDimitry Andric           DWORD error = GetLastError();
5550b57cec5SDimitry Andric           if (abort_on_error) {
5560b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
5570b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
5580b57cec5SDimitry Andric           }
5590b57cec5SDimitry Andric         }
5600b57cec5SDimitry Andric         *mask = retval;
5610b57cec5SDimitry Andric       }
5620b57cec5SDimitry Andric       return 0;
5630b57cec5SDimitry Andric     }
5640b57cec5SDimitry Andric     int get_proc_group() const override {
5650b57cec5SDimitry Andric       int group = -1;
5660b57cec5SDimitry Andric       if (__kmp_num_proc_groups == 1) {
5670b57cec5SDimitry Andric         return 1;
5680b57cec5SDimitry Andric       }
5690b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; i++) {
5700b57cec5SDimitry Andric         if (mask[i] == 0)
5710b57cec5SDimitry Andric           continue;
5720b57cec5SDimitry Andric         if (group >= 0)
5730b57cec5SDimitry Andric           return -1;
5740b57cec5SDimitry Andric         group = i;
5750b57cec5SDimitry Andric       }
5760b57cec5SDimitry Andric       return group;
5770b57cec5SDimitry Andric     }
5780b57cec5SDimitry Andric   };
5790b57cec5SDimitry Andric   void determine_capable(const char *env_var) override {
5800b57cec5SDimitry Andric     __kmp_affinity_determine_capable(env_var);
5810b57cec5SDimitry Andric   }
5820b57cec5SDimitry Andric   void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
5830b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
5840b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
5850b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
5860b57cec5SDimitry Andric     return new Mask[num];
5870b57cec5SDimitry Andric   }
5880b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
5890b57cec5SDimitry Andric     Mask *windows_array = static_cast<Mask *>(array);
5900b57cec5SDimitry Andric     delete[] windows_array;
5910b57cec5SDimitry Andric   }
5920b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
5930b57cec5SDimitry Andric                                       int index) override {
5940b57cec5SDimitry Andric     Mask *windows_array = static_cast<Mask *>(array);
5950b57cec5SDimitry Andric     return &(windows_array[index]);
5960b57cec5SDimitry Andric   }
5970b57cec5SDimitry Andric   api_type get_api_type() const override { return NATIVE_OS; }
5980b57cec5SDimitry Andric };
5990b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
6000b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */
6010b57cec5SDimitry Andric 
602*0eae32dcSDimitry Andric // Describe an attribute for a level in the machine topology
603*0eae32dcSDimitry Andric struct kmp_hw_attr_t {
604*0eae32dcSDimitry Andric   int core_type : 8;
605*0eae32dcSDimitry Andric   int core_eff : 8;
606*0eae32dcSDimitry Andric   unsigned valid : 1;
607*0eae32dcSDimitry Andric   unsigned reserved : 15;
608*0eae32dcSDimitry Andric 
609*0eae32dcSDimitry Andric   static const int UNKNOWN_CORE_EFF = -1;
610*0eae32dcSDimitry Andric 
611*0eae32dcSDimitry Andric   kmp_hw_attr_t()
612*0eae32dcSDimitry Andric       : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF),
613*0eae32dcSDimitry Andric         valid(0), reserved(0) {}
614*0eae32dcSDimitry Andric   void set_core_type(kmp_hw_core_type_t type) {
615*0eae32dcSDimitry Andric     valid = 1;
616*0eae32dcSDimitry Andric     core_type = type;
617*0eae32dcSDimitry Andric   }
618*0eae32dcSDimitry Andric   void set_core_eff(int eff) {
619*0eae32dcSDimitry Andric     valid = 1;
620*0eae32dcSDimitry Andric     core_eff = eff;
621*0eae32dcSDimitry Andric   }
622*0eae32dcSDimitry Andric   kmp_hw_core_type_t get_core_type() const {
623*0eae32dcSDimitry Andric     return (kmp_hw_core_type_t)core_type;
624*0eae32dcSDimitry Andric   }
625*0eae32dcSDimitry Andric   int get_core_eff() const { return core_eff; }
626*0eae32dcSDimitry Andric   bool is_core_type_valid() const {
627*0eae32dcSDimitry Andric     return core_type != KMP_HW_CORE_TYPE_UNKNOWN;
628*0eae32dcSDimitry Andric   }
629*0eae32dcSDimitry Andric   bool is_core_eff_valid() const { return core_eff != UNKNOWN_CORE_EFF; }
630*0eae32dcSDimitry Andric   operator bool() const { return valid; }
631*0eae32dcSDimitry Andric   void clear() {
632*0eae32dcSDimitry Andric     core_type = KMP_HW_CORE_TYPE_UNKNOWN;
633*0eae32dcSDimitry Andric     core_eff = UNKNOWN_CORE_EFF;
634*0eae32dcSDimitry Andric     valid = 0;
635*0eae32dcSDimitry Andric   }
636*0eae32dcSDimitry Andric   bool contains(const kmp_hw_attr_t &other) const {
637*0eae32dcSDimitry Andric     if (!valid && !other.valid)
638*0eae32dcSDimitry Andric       return true;
639*0eae32dcSDimitry Andric     if (valid && other.valid) {
640*0eae32dcSDimitry Andric       if (other.is_core_type_valid()) {
641*0eae32dcSDimitry Andric         if (!is_core_type_valid() || (get_core_type() != other.get_core_type()))
642*0eae32dcSDimitry Andric           return false;
643*0eae32dcSDimitry Andric       }
644*0eae32dcSDimitry Andric       if (other.is_core_eff_valid()) {
645*0eae32dcSDimitry Andric         if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff()))
646*0eae32dcSDimitry Andric           return false;
647*0eae32dcSDimitry Andric       }
648*0eae32dcSDimitry Andric       return true;
649*0eae32dcSDimitry Andric     }
650*0eae32dcSDimitry Andric     return false;
651*0eae32dcSDimitry Andric   }
652*0eae32dcSDimitry Andric   bool operator==(const kmp_hw_attr_t &rhs) const {
653*0eae32dcSDimitry Andric     return (rhs.valid == valid && rhs.core_eff == core_eff &&
654*0eae32dcSDimitry Andric             rhs.core_type == core_type);
655*0eae32dcSDimitry Andric   }
656*0eae32dcSDimitry Andric   bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); }
657*0eae32dcSDimitry Andric };
658349cc55cSDimitry Andric 
659fe6060f1SDimitry Andric class kmp_hw_thread_t {
6600b57cec5SDimitry Andric public:
661fe6060f1SDimitry Andric   static const int UNKNOWN_ID = -1;
662fe6060f1SDimitry Andric   static int compare_ids(const void *a, const void *b);
663fe6060f1SDimitry Andric   static int compare_compact(const void *a, const void *b);
664fe6060f1SDimitry Andric   int ids[KMP_HW_LAST];
665fe6060f1SDimitry Andric   int sub_ids[KMP_HW_LAST];
666fe6060f1SDimitry Andric   bool leader;
667fe6060f1SDimitry Andric   int os_id;
668*0eae32dcSDimitry Andric   kmp_hw_attr_t attrs;
669349cc55cSDimitry Andric 
670fe6060f1SDimitry Andric   void print() const;
671fe6060f1SDimitry Andric   void clear() {
672fe6060f1SDimitry Andric     for (int i = 0; i < (int)KMP_HW_LAST; ++i)
673fe6060f1SDimitry Andric       ids[i] = UNKNOWN_ID;
674fe6060f1SDimitry Andric     leader = false;
675*0eae32dcSDimitry Andric     attrs.clear();
6760b57cec5SDimitry Andric   }
6770b57cec5SDimitry Andric };
6780b57cec5SDimitry Andric 
679fe6060f1SDimitry Andric class kmp_topology_t {
680fe6060f1SDimitry Andric 
681fe6060f1SDimitry Andric   struct flags_t {
682fe6060f1SDimitry Andric     int uniform : 1;
683fe6060f1SDimitry Andric     int reserved : 31;
6840b57cec5SDimitry Andric   };
6850b57cec5SDimitry Andric 
686fe6060f1SDimitry Andric   int depth;
687fe6060f1SDimitry Andric 
688349cc55cSDimitry Andric   // The following arrays are all 'depth' long and have been
689349cc55cSDimitry Andric   // allocated to hold up to KMP_HW_LAST number of objects if
690349cc55cSDimitry Andric   // needed so layers can be added without reallocation of any array
691fe6060f1SDimitry Andric 
692fe6060f1SDimitry Andric   // Orderd array of the types in the topology
693fe6060f1SDimitry Andric   kmp_hw_t *types;
694fe6060f1SDimitry Andric 
695fe6060f1SDimitry Andric   // Keep quick topology ratios, for non-uniform topologies,
696fe6060f1SDimitry Andric   // this ratio holds the max number of itemAs per itemB
697fe6060f1SDimitry Andric   // e.g., [ 4 packages | 6 cores / package | 2 threads / core ]
698fe6060f1SDimitry Andric   int *ratio;
699fe6060f1SDimitry Andric 
700fe6060f1SDimitry Andric   // Storage containing the absolute number of each topology layer
701fe6060f1SDimitry Andric   int *count;
702fe6060f1SDimitry Andric 
703*0eae32dcSDimitry Andric   // The number of core efficiencies. This is only useful for hybrid
704*0eae32dcSDimitry Andric   // topologies. Core efficiencies will range from 0 to num efficiencies - 1
705*0eae32dcSDimitry Andric   int num_core_efficiencies;
706*0eae32dcSDimitry Andric   int num_core_types;
707349cc55cSDimitry Andric   kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
708349cc55cSDimitry Andric 
709fe6060f1SDimitry Andric   // The hardware threads array
710fe6060f1SDimitry Andric   // hw_threads is num_hw_threads long
711fe6060f1SDimitry Andric   // Each hw_thread's ids and sub_ids are depth deep
712fe6060f1SDimitry Andric   int num_hw_threads;
713fe6060f1SDimitry Andric   kmp_hw_thread_t *hw_threads;
714fe6060f1SDimitry Andric 
715fe6060f1SDimitry Andric   // Equivalence hash where the key is the hardware topology item
716fe6060f1SDimitry Andric   // and the value is the equivalent hardware topology type in the
717fe6060f1SDimitry Andric   // types[] array, if the value is KMP_HW_UNKNOWN, then there is no
718fe6060f1SDimitry Andric   // known equivalence for the topology type
719fe6060f1SDimitry Andric   kmp_hw_t equivalent[KMP_HW_LAST];
720fe6060f1SDimitry Andric 
721fe6060f1SDimitry Andric   // Flags describing the topology
722fe6060f1SDimitry Andric   flags_t flags;
723fe6060f1SDimitry Andric 
724349cc55cSDimitry Andric   // Insert a new topology layer after allocation
725349cc55cSDimitry Andric   void _insert_layer(kmp_hw_t type, const int *ids);
726349cc55cSDimitry Andric 
727349cc55cSDimitry Andric #if KMP_GROUP_AFFINITY
728349cc55cSDimitry Andric   // Insert topology information about Windows Processor groups
729349cc55cSDimitry Andric   void _insert_windows_proc_groups();
730349cc55cSDimitry Andric #endif
731349cc55cSDimitry Andric 
732fe6060f1SDimitry Andric   // Count each item & get the num x's per y
733fe6060f1SDimitry Andric   // e.g., get the number of cores and the number of threads per core
734fe6060f1SDimitry Andric   // for each (x, y) in (KMP_HW_* , KMP_HW_*)
735fe6060f1SDimitry Andric   void _gather_enumeration_information();
736fe6060f1SDimitry Andric 
737fe6060f1SDimitry Andric   // Remove layers that don't add information to the topology.
738fe6060f1SDimitry Andric   // This is done by having the layer take on the id = UNKNOWN_ID (-1)
739fe6060f1SDimitry Andric   void _remove_radix1_layers();
740fe6060f1SDimitry Andric 
741fe6060f1SDimitry Andric   // Find out if the topology is uniform
742fe6060f1SDimitry Andric   void _discover_uniformity();
743fe6060f1SDimitry Andric 
744fe6060f1SDimitry Andric   // Set all the sub_ids for each hardware thread
745fe6060f1SDimitry Andric   void _set_sub_ids();
746fe6060f1SDimitry Andric 
747fe6060f1SDimitry Andric   // Set global affinity variables describing the number of threads per
748fe6060f1SDimitry Andric   // core, the number of packages, the number of cores per package, and
749fe6060f1SDimitry Andric   // the number of cores.
750fe6060f1SDimitry Andric   void _set_globals();
751fe6060f1SDimitry Andric 
752fe6060f1SDimitry Andric   // Set the last level cache equivalent type
753fe6060f1SDimitry Andric   void _set_last_level_cache();
754fe6060f1SDimitry Andric 
755*0eae32dcSDimitry Andric   // Return the number of cores with a particular attribute, 'attr'.
756*0eae32dcSDimitry Andric   // If 'find_all' is true, then find all cores on the machine, otherwise find
757*0eae32dcSDimitry Andric   // all cores per the layer 'above'
758*0eae32dcSDimitry Andric   int _get_ncores_with_attr(const kmp_hw_attr_t &attr, int above,
759*0eae32dcSDimitry Andric                             bool find_all = false) const;
760349cc55cSDimitry Andric 
761fe6060f1SDimitry Andric public:
762fe6060f1SDimitry Andric   // Force use of allocate()/deallocate()
763fe6060f1SDimitry Andric   kmp_topology_t() = delete;
764fe6060f1SDimitry Andric   kmp_topology_t(const kmp_topology_t &t) = delete;
765fe6060f1SDimitry Andric   kmp_topology_t(kmp_topology_t &&t) = delete;
766fe6060f1SDimitry Andric   kmp_topology_t &operator=(const kmp_topology_t &t) = delete;
767fe6060f1SDimitry Andric   kmp_topology_t &operator=(kmp_topology_t &&t) = delete;
768fe6060f1SDimitry Andric 
769fe6060f1SDimitry Andric   static kmp_topology_t *allocate(int nproc, int ndepth, const kmp_hw_t *types);
770fe6060f1SDimitry Andric   static void deallocate(kmp_topology_t *);
771fe6060f1SDimitry Andric 
772fe6060f1SDimitry Andric   // Functions used in create_map() routines
773fe6060f1SDimitry Andric   kmp_hw_thread_t &at(int index) {
774fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
775fe6060f1SDimitry Andric     return hw_threads[index];
776fe6060f1SDimitry Andric   }
777fe6060f1SDimitry Andric   const kmp_hw_thread_t &at(int index) const {
778fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
779fe6060f1SDimitry Andric     return hw_threads[index];
780fe6060f1SDimitry Andric   }
781fe6060f1SDimitry Andric   int get_num_hw_threads() const { return num_hw_threads; }
782fe6060f1SDimitry Andric   void sort_ids() {
783fe6060f1SDimitry Andric     qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
784fe6060f1SDimitry Andric           kmp_hw_thread_t::compare_ids);
785fe6060f1SDimitry Andric   }
786fe6060f1SDimitry Andric   // Check if the hardware ids are unique, if they are
787fe6060f1SDimitry Andric   // return true, otherwise return false
788fe6060f1SDimitry Andric   bool check_ids() const;
789fe6060f1SDimitry Andric 
790fe6060f1SDimitry Andric   // Function to call after the create_map() routine
791fe6060f1SDimitry Andric   void canonicalize();
792fe6060f1SDimitry Andric   void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores);
793fe6060f1SDimitry Andric 
794fe6060f1SDimitry Andric   // Functions used after canonicalize() called
795fe6060f1SDimitry Andric   bool filter_hw_subset();
796fe6060f1SDimitry Andric   bool is_close(int hwt1, int hwt2, int level) const;
797fe6060f1SDimitry Andric   bool is_uniform() const { return flags.uniform; }
798fe6060f1SDimitry Andric   // Tell whether a type is a valid type in the topology
799fe6060f1SDimitry Andric   // returns KMP_HW_UNKNOWN when there is no equivalent type
800fe6060f1SDimitry Andric   kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; }
801fe6060f1SDimitry Andric   // Set type1 = type2
802fe6060f1SDimitry Andric   void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
803fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);
804fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2);
805fe6060f1SDimitry Andric     kmp_hw_t real_type2 = equivalent[type2];
806fe6060f1SDimitry Andric     if (real_type2 == KMP_HW_UNKNOWN)
807fe6060f1SDimitry Andric       real_type2 = type2;
808fe6060f1SDimitry Andric     equivalent[type1] = real_type2;
809fe6060f1SDimitry Andric     // This loop is required since any of the types may have been set to
810fe6060f1SDimitry Andric     // be equivalent to type1.  They all must be checked and reset to type2.
811fe6060f1SDimitry Andric     KMP_FOREACH_HW_TYPE(type) {
812fe6060f1SDimitry Andric       if (equivalent[type] == type1) {
813fe6060f1SDimitry Andric         equivalent[type] = real_type2;
814fe6060f1SDimitry Andric       }
815fe6060f1SDimitry Andric     }
816fe6060f1SDimitry Andric   }
817fe6060f1SDimitry Andric   // Calculate number of types corresponding to level1
818fe6060f1SDimitry Andric   // per types corresponding to level2 (e.g., number of threads per core)
819fe6060f1SDimitry Andric   int calculate_ratio(int level1, int level2) const {
820fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth);
821fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth);
822fe6060f1SDimitry Andric     int r = 1;
823fe6060f1SDimitry Andric     for (int level = level1; level > level2; --level)
824fe6060f1SDimitry Andric       r *= ratio[level];
825fe6060f1SDimitry Andric     return r;
826fe6060f1SDimitry Andric   }
827fe6060f1SDimitry Andric   int get_ratio(int level) const {
828fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level >= 0 && level < depth);
829fe6060f1SDimitry Andric     return ratio[level];
830fe6060f1SDimitry Andric   }
831fe6060f1SDimitry Andric   int get_depth() const { return depth; };
832fe6060f1SDimitry Andric   kmp_hw_t get_type(int level) const {
833fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level >= 0 && level < depth);
834fe6060f1SDimitry Andric     return types[level];
835fe6060f1SDimitry Andric   }
836fe6060f1SDimitry Andric   int get_level(kmp_hw_t type) const {
837fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT_VALID_HW_TYPE(type);
838fe6060f1SDimitry Andric     int eq_type = equivalent[type];
839fe6060f1SDimitry Andric     if (eq_type == KMP_HW_UNKNOWN)
8400b57cec5SDimitry Andric       return -1;
841fe6060f1SDimitry Andric     for (int i = 0; i < depth; ++i)
842fe6060f1SDimitry Andric       if (types[i] == eq_type)
843fe6060f1SDimitry Andric         return i;
844fe6060f1SDimitry Andric     return -1;
8450b57cec5SDimitry Andric   }
846fe6060f1SDimitry Andric   int get_count(int level) const {
847fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level >= 0 && level < depth);
848fe6060f1SDimitry Andric     return count[level];
8490b57cec5SDimitry Andric   }
850*0eae32dcSDimitry Andric   // Return the total number of cores with attribute 'attr'
851*0eae32dcSDimitry Andric   int get_ncores_with_attr(const kmp_hw_attr_t &attr) const {
852*0eae32dcSDimitry Andric     return _get_ncores_with_attr(attr, -1, true);
853*0eae32dcSDimitry Andric   }
854*0eae32dcSDimitry Andric   // Return the number of cores with attribute
855*0eae32dcSDimitry Andric   // 'attr' per topology level 'above'
856*0eae32dcSDimitry Andric   int get_ncores_with_attr_per(const kmp_hw_attr_t &attr, int above) const {
857*0eae32dcSDimitry Andric     return _get_ncores_with_attr(attr, above, false);
858*0eae32dcSDimitry Andric   }
859*0eae32dcSDimitry Andric 
860fe6060f1SDimitry Andric #if KMP_AFFINITY_SUPPORTED
861fe6060f1SDimitry Andric   void sort_compact() {
862fe6060f1SDimitry Andric     qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
863fe6060f1SDimitry Andric           kmp_hw_thread_t::compare_compact);
864fe6060f1SDimitry Andric   }
865fe6060f1SDimitry Andric #endif
866fe6060f1SDimitry Andric   void print(const char *env_var = "KMP_AFFINITY") const;
867fe6060f1SDimitry Andric   void dump() const;
868fe6060f1SDimitry Andric };
869349cc55cSDimitry Andric extern kmp_topology_t *__kmp_topology;
870fe6060f1SDimitry Andric 
871fe6060f1SDimitry Andric class kmp_hw_subset_t {
872*0eae32dcSDimitry Andric   const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS;
873*0eae32dcSDimitry Andric 
874fe6060f1SDimitry Andric public:
875*0eae32dcSDimitry Andric   // Describe a machine topology item in KMP_HW_SUBSET
876fe6060f1SDimitry Andric   struct item_t {
877fe6060f1SDimitry Andric     kmp_hw_t type;
878*0eae32dcSDimitry Andric     int num_attrs;
879*0eae32dcSDimitry Andric     int num[MAX_ATTRS];
880*0eae32dcSDimitry Andric     int offset[MAX_ATTRS];
881*0eae32dcSDimitry Andric     kmp_hw_attr_t attr[MAX_ATTRS];
882fe6060f1SDimitry Andric   };
883*0eae32dcSDimitry Andric   // Put parenthesis around max to avoid accidental use of Windows max macro.
884*0eae32dcSDimitry Andric   const static int USE_ALL = (std::numeric_limits<int>::max)();
885fe6060f1SDimitry Andric 
886fe6060f1SDimitry Andric private:
887fe6060f1SDimitry Andric   int depth;
888fe6060f1SDimitry Andric   int capacity;
889fe6060f1SDimitry Andric   item_t *items;
890fe6060f1SDimitry Andric   kmp_uint64 set;
891fe6060f1SDimitry Andric   bool absolute;
892fe6060f1SDimitry Andric   // The set must be able to handle up to KMP_HW_LAST number of layers
893fe6060f1SDimitry Andric   KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST);
894349cc55cSDimitry Andric   // Sorting the KMP_HW_SUBSET items to follow topology order
895349cc55cSDimitry Andric   // All unknown topology types will be at the beginning of the subset
896349cc55cSDimitry Andric   static int hw_subset_compare(const void *i1, const void *i2) {
897349cc55cSDimitry Andric     kmp_hw_t type1 = ((const item_t *)i1)->type;
898349cc55cSDimitry Andric     kmp_hw_t type2 = ((const item_t *)i2)->type;
899349cc55cSDimitry Andric     int level1 = __kmp_topology->get_level(type1);
900349cc55cSDimitry Andric     int level2 = __kmp_topology->get_level(type2);
901349cc55cSDimitry Andric     return level1 - level2;
902349cc55cSDimitry Andric   }
903fe6060f1SDimitry Andric 
904fe6060f1SDimitry Andric public:
905fe6060f1SDimitry Andric   // Force use of allocate()/deallocate()
906fe6060f1SDimitry Andric   kmp_hw_subset_t() = delete;
907fe6060f1SDimitry Andric   kmp_hw_subset_t(const kmp_hw_subset_t &t) = delete;
908fe6060f1SDimitry Andric   kmp_hw_subset_t(kmp_hw_subset_t &&t) = delete;
909fe6060f1SDimitry Andric   kmp_hw_subset_t &operator=(const kmp_hw_subset_t &t) = delete;
910fe6060f1SDimitry Andric   kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) = delete;
911fe6060f1SDimitry Andric 
912fe6060f1SDimitry Andric   static kmp_hw_subset_t *allocate() {
913fe6060f1SDimitry Andric     int initial_capacity = 5;
914fe6060f1SDimitry Andric     kmp_hw_subset_t *retval =
915fe6060f1SDimitry Andric         (kmp_hw_subset_t *)__kmp_allocate(sizeof(kmp_hw_subset_t));
916fe6060f1SDimitry Andric     retval->depth = 0;
917fe6060f1SDimitry Andric     retval->capacity = initial_capacity;
918fe6060f1SDimitry Andric     retval->set = 0ull;
919fe6060f1SDimitry Andric     retval->absolute = false;
920fe6060f1SDimitry Andric     retval->items = (item_t *)__kmp_allocate(sizeof(item_t) * initial_capacity);
921fe6060f1SDimitry Andric     return retval;
922fe6060f1SDimitry Andric   }
923fe6060f1SDimitry Andric   static void deallocate(kmp_hw_subset_t *subset) {
924fe6060f1SDimitry Andric     __kmp_free(subset->items);
925fe6060f1SDimitry Andric     __kmp_free(subset);
926fe6060f1SDimitry Andric   }
927fe6060f1SDimitry Andric   void set_absolute() { absolute = true; }
928fe6060f1SDimitry Andric   bool is_absolute() const { return absolute; }
929*0eae32dcSDimitry Andric   void push_back(int num, kmp_hw_t type, int offset, kmp_hw_attr_t attr) {
930*0eae32dcSDimitry Andric     for (int i = 0; i < depth; ++i) {
931*0eae32dcSDimitry Andric       // Found an existing item for this layer type
932*0eae32dcSDimitry Andric       // Add the num, offset, and attr to this item
933*0eae32dcSDimitry Andric       if (items[i].type == type) {
934*0eae32dcSDimitry Andric         int idx = items[i].num_attrs++;
935*0eae32dcSDimitry Andric         if ((size_t)idx >= MAX_ATTRS)
936*0eae32dcSDimitry Andric           return;
937*0eae32dcSDimitry Andric         items[i].num[idx] = num;
938*0eae32dcSDimitry Andric         items[i].offset[idx] = offset;
939*0eae32dcSDimitry Andric         items[i].attr[idx] = attr;
940*0eae32dcSDimitry Andric         return;
941*0eae32dcSDimitry Andric       }
942*0eae32dcSDimitry Andric     }
943fe6060f1SDimitry Andric     if (depth == capacity - 1) {
944fe6060f1SDimitry Andric       capacity *= 2;
945fe6060f1SDimitry Andric       item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity);
946fe6060f1SDimitry Andric       for (int i = 0; i < depth; ++i)
947fe6060f1SDimitry Andric         new_items[i] = items[i];
948fe6060f1SDimitry Andric       __kmp_free(items);
949fe6060f1SDimitry Andric       items = new_items;
950fe6060f1SDimitry Andric     }
951*0eae32dcSDimitry Andric     items[depth].num_attrs = 1;
952fe6060f1SDimitry Andric     items[depth].type = type;
953*0eae32dcSDimitry Andric     items[depth].num[0] = num;
954*0eae32dcSDimitry Andric     items[depth].offset[0] = offset;
955*0eae32dcSDimitry Andric     items[depth].attr[0] = attr;
956fe6060f1SDimitry Andric     depth++;
957fe6060f1SDimitry Andric     set |= (1ull << type);
958fe6060f1SDimitry Andric   }
959fe6060f1SDimitry Andric   int get_depth() const { return depth; }
960fe6060f1SDimitry Andric   const item_t &at(int index) const {
961fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0 && index < depth);
962fe6060f1SDimitry Andric     return items[index];
963fe6060f1SDimitry Andric   }
964fe6060f1SDimitry Andric   item_t &at(int index) {
965fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0 && index < depth);
966fe6060f1SDimitry Andric     return items[index];
967fe6060f1SDimitry Andric   }
968fe6060f1SDimitry Andric   void remove(int index) {
969fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0 && index < depth);
970fe6060f1SDimitry Andric     set &= ~(1ull << items[index].type);
971fe6060f1SDimitry Andric     for (int j = index + 1; j < depth; ++j) {
972fe6060f1SDimitry Andric       items[j - 1] = items[j];
973fe6060f1SDimitry Andric     }
974fe6060f1SDimitry Andric     depth--;
975fe6060f1SDimitry Andric   }
976349cc55cSDimitry Andric   void sort() {
977349cc55cSDimitry Andric     KMP_DEBUG_ASSERT(__kmp_topology);
978349cc55cSDimitry Andric     qsort(items, depth, sizeof(item_t), hw_subset_compare);
979349cc55cSDimitry Andric   }
980fe6060f1SDimitry Andric   bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); }
981fe6060f1SDimitry Andric   void dump() const {
982fe6060f1SDimitry Andric     printf("**********************\n");
983fe6060f1SDimitry Andric     printf("*** kmp_hw_subset: ***\n");
984fe6060f1SDimitry Andric     printf("* depth: %d\n", depth);
985fe6060f1SDimitry Andric     printf("* items:\n");
986fe6060f1SDimitry Andric     for (int i = 0; i < depth; ++i) {
987*0eae32dcSDimitry Andric       printf(" type: %s\n", __kmp_hw_get_keyword(items[i].type));
988*0eae32dcSDimitry Andric       for (int j = 0; j < items[i].num_attrs; ++j) {
989*0eae32dcSDimitry Andric         printf("  num: %d, offset: %d, attr: ", items[i].num[j],
990*0eae32dcSDimitry Andric                items[i].offset[j]);
991*0eae32dcSDimitry Andric         if (!items[i].attr[j]) {
992*0eae32dcSDimitry Andric           printf(" (none)\n");
993*0eae32dcSDimitry Andric         } else {
994*0eae32dcSDimitry Andric           printf(
995*0eae32dcSDimitry Andric               " core_type = %s, core_eff = %d\n",
996*0eae32dcSDimitry Andric               __kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()),
997*0eae32dcSDimitry Andric               items[i].attr[j].get_core_eff());
998*0eae32dcSDimitry Andric         }
999*0eae32dcSDimitry Andric       }
1000fe6060f1SDimitry Andric     }
1001fe6060f1SDimitry Andric     printf("* set: 0x%llx\n", set);
1002fe6060f1SDimitry Andric     printf("* absolute: %d\n", absolute);
1003fe6060f1SDimitry Andric     printf("**********************\n");
1004fe6060f1SDimitry Andric   }
1005fe6060f1SDimitry Andric };
1006fe6060f1SDimitry Andric extern kmp_hw_subset_t *__kmp_hw_subset;
10070b57cec5SDimitry Andric 
10080b57cec5SDimitry Andric /* A structure for holding machine-specific hierarchy info to be computed once
10090b57cec5SDimitry Andric    at init. This structure represents a mapping of threads to the actual machine
10100b57cec5SDimitry Andric    hierarchy, or to our best guess at what the hierarchy might be, for the
10110b57cec5SDimitry Andric    purpose of performing an efficient barrier. In the worst case, when there is
10120b57cec5SDimitry Andric    no machine hierarchy information, it produces a tree suitable for a barrier,
10130b57cec5SDimitry Andric    similar to the tree used in the hyper barrier. */
10140b57cec5SDimitry Andric class hierarchy_info {
10150b57cec5SDimitry Andric public:
10160b57cec5SDimitry Andric   /* Good default values for number of leaves and branching factor, given no
10170b57cec5SDimitry Andric      affinity information. Behaves a bit like hyper barrier. */
10180b57cec5SDimitry Andric   static const kmp_uint32 maxLeaves = 4;
10190b57cec5SDimitry Andric   static const kmp_uint32 minBranch = 4;
10200b57cec5SDimitry Andric   /** Number of levels in the hierarchy. Typical levels are threads/core,
10210b57cec5SDimitry Andric       cores/package or socket, packages/node, nodes/machine, etc. We don't want
10220b57cec5SDimitry Andric       to get specific with nomenclature. When the machine is oversubscribed we
10230b57cec5SDimitry Andric       add levels to duplicate the hierarchy, doubling the thread capacity of the
10240b57cec5SDimitry Andric       hierarchy each time we add a level. */
10250b57cec5SDimitry Andric   kmp_uint32 maxLevels;
10260b57cec5SDimitry Andric 
10270b57cec5SDimitry Andric   /** This is specifically the depth of the machine configuration hierarchy, in
10280b57cec5SDimitry Andric       terms of the number of levels along the longest path from root to any
10290b57cec5SDimitry Andric       leaf. It corresponds to the number of entries in numPerLevel if we exclude
10300b57cec5SDimitry Andric       all but one trailing 1. */
10310b57cec5SDimitry Andric   kmp_uint32 depth;
10320b57cec5SDimitry Andric   kmp_uint32 base_num_threads;
10330b57cec5SDimitry Andric   enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
10340b57cec5SDimitry Andric   volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
10350b57cec5SDimitry Andric   // 2=initialization in progress
10360b57cec5SDimitry Andric   volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
10370b57cec5SDimitry Andric 
10380b57cec5SDimitry Andric   /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
10390b57cec5SDimitry Andric       the parent of a node at level i has. For example, if we have a machine
10400b57cec5SDimitry Andric       with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
10410b57cec5SDimitry Andric       {2, 4, 4, 1, 1}. All empty levels are set to 1. */
10420b57cec5SDimitry Andric   kmp_uint32 *numPerLevel;
10430b57cec5SDimitry Andric   kmp_uint32 *skipPerLevel;
10440b57cec5SDimitry Andric 
1045fe6060f1SDimitry Andric   void deriveLevels() {
1046fe6060f1SDimitry Andric     int hier_depth = __kmp_topology->get_depth();
1047fe6060f1SDimitry Andric     for (int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) {
1048fe6060f1SDimitry Andric       numPerLevel[level] = __kmp_topology->get_ratio(i);
10490b57cec5SDimitry Andric     }
10500b57cec5SDimitry Andric   }
10510b57cec5SDimitry Andric 
10520b57cec5SDimitry Andric   hierarchy_info()
10530b57cec5SDimitry Andric       : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
10540b57cec5SDimitry Andric 
10550b57cec5SDimitry Andric   void fini() {
10560b57cec5SDimitry Andric     if (!uninitialized && numPerLevel) {
10570b57cec5SDimitry Andric       __kmp_free(numPerLevel);
10580b57cec5SDimitry Andric       numPerLevel = NULL;
10590b57cec5SDimitry Andric       uninitialized = not_initialized;
10600b57cec5SDimitry Andric     }
10610b57cec5SDimitry Andric   }
10620b57cec5SDimitry Andric 
1063fe6060f1SDimitry Andric   void init(int num_addrs) {
10640b57cec5SDimitry Andric     kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
10650b57cec5SDimitry Andric         &uninitialized, not_initialized, initializing);
10660b57cec5SDimitry Andric     if (bool_result == 0) { // Wait for initialization
10670b57cec5SDimitry Andric       while (TCR_1(uninitialized) != initialized)
10680b57cec5SDimitry Andric         KMP_CPU_PAUSE();
10690b57cec5SDimitry Andric       return;
10700b57cec5SDimitry Andric     }
10710b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(bool_result == 1);
10720b57cec5SDimitry Andric 
10730b57cec5SDimitry Andric     /* Added explicit initialization of the data fields here to prevent usage of
10740b57cec5SDimitry Andric        dirty value observed when static library is re-initialized multiple times
10750b57cec5SDimitry Andric        (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
10760b57cec5SDimitry Andric        OpenMP). */
10770b57cec5SDimitry Andric     depth = 1;
10780b57cec5SDimitry Andric     resizing = 0;
10790b57cec5SDimitry Andric     maxLevels = 7;
10800b57cec5SDimitry Andric     numPerLevel =
10810b57cec5SDimitry Andric         (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
10820b57cec5SDimitry Andric     skipPerLevel = &(numPerLevel[maxLevels]);
10830b57cec5SDimitry Andric     for (kmp_uint32 i = 0; i < maxLevels;
10840b57cec5SDimitry Andric          ++i) { // init numPerLevel[*] to 1 item per level
10850b57cec5SDimitry Andric       numPerLevel[i] = 1;
10860b57cec5SDimitry Andric       skipPerLevel[i] = 1;
10870b57cec5SDimitry Andric     }
10880b57cec5SDimitry Andric 
10890b57cec5SDimitry Andric     // Sort table by physical ID
1090fe6060f1SDimitry Andric     if (__kmp_topology && __kmp_topology->get_depth() > 0) {
1091fe6060f1SDimitry Andric       deriveLevels();
10920b57cec5SDimitry Andric     } else {
10930b57cec5SDimitry Andric       numPerLevel[0] = maxLeaves;
10940b57cec5SDimitry Andric       numPerLevel[1] = num_addrs / maxLeaves;
10950b57cec5SDimitry Andric       if (num_addrs % maxLeaves)
10960b57cec5SDimitry Andric         numPerLevel[1]++;
10970b57cec5SDimitry Andric     }
10980b57cec5SDimitry Andric 
10990b57cec5SDimitry Andric     base_num_threads = num_addrs;
11000b57cec5SDimitry Andric     for (int i = maxLevels - 1; i >= 0;
11010b57cec5SDimitry Andric          --i) // count non-empty levels to get depth
11020b57cec5SDimitry Andric       if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
11030b57cec5SDimitry Andric         depth++;
11040b57cec5SDimitry Andric 
11050b57cec5SDimitry Andric     kmp_uint32 branch = minBranch;
11060b57cec5SDimitry Andric     if (numPerLevel[0] == 1)
11070b57cec5SDimitry Andric       branch = num_addrs / maxLeaves;
11080b57cec5SDimitry Andric     if (branch < minBranch)
11090b57cec5SDimitry Andric       branch = minBranch;
11100b57cec5SDimitry Andric     for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
11110b57cec5SDimitry Andric       while (numPerLevel[d] > branch ||
11120b57cec5SDimitry Andric              (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
11130b57cec5SDimitry Andric         if (numPerLevel[d] & 1)
11140b57cec5SDimitry Andric           numPerLevel[d]++;
11150b57cec5SDimitry Andric         numPerLevel[d] = numPerLevel[d] >> 1;
11160b57cec5SDimitry Andric         if (numPerLevel[d + 1] == 1)
11170b57cec5SDimitry Andric           depth++;
11180b57cec5SDimitry Andric         numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
11190b57cec5SDimitry Andric       }
11200b57cec5SDimitry Andric       if (numPerLevel[0] == 1) {
11210b57cec5SDimitry Andric         branch = branch >> 1;
11220b57cec5SDimitry Andric         if (branch < 4)
11230b57cec5SDimitry Andric           branch = minBranch;
11240b57cec5SDimitry Andric       }
11250b57cec5SDimitry Andric     }
11260b57cec5SDimitry Andric 
11270b57cec5SDimitry Andric     for (kmp_uint32 i = 1; i < depth; ++i)
11280b57cec5SDimitry Andric       skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
11290b57cec5SDimitry Andric     // Fill in hierarchy in the case of oversubscription
11300b57cec5SDimitry Andric     for (kmp_uint32 i = depth; i < maxLevels; ++i)
11310b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
11320b57cec5SDimitry Andric 
11330b57cec5SDimitry Andric     uninitialized = initialized; // One writer
11340b57cec5SDimitry Andric   }
11350b57cec5SDimitry Andric 
11360b57cec5SDimitry Andric   // Resize the hierarchy if nproc changes to something larger than before
11370b57cec5SDimitry Andric   void resize(kmp_uint32 nproc) {
11380b57cec5SDimitry Andric     kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
11390b57cec5SDimitry Andric     while (bool_result == 0) { // someone else is trying to resize
11400b57cec5SDimitry Andric       KMP_CPU_PAUSE();
11410b57cec5SDimitry Andric       if (nproc <= base_num_threads) // happy with other thread's resize
11420b57cec5SDimitry Andric         return;
11430b57cec5SDimitry Andric       else // try to resize
11440b57cec5SDimitry Andric         bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
11450b57cec5SDimitry Andric     }
11460b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(bool_result != 0);
11470b57cec5SDimitry Andric     if (nproc <= base_num_threads)
11480b57cec5SDimitry Andric       return; // happy with other thread's resize
11490b57cec5SDimitry Andric 
11500b57cec5SDimitry Andric     // Calculate new maxLevels
11510b57cec5SDimitry Andric     kmp_uint32 old_sz = skipPerLevel[depth - 1];
11520b57cec5SDimitry Andric     kmp_uint32 incs = 0, old_maxLevels = maxLevels;
11530b57cec5SDimitry Andric     // First see if old maxLevels is enough to contain new size
11540b57cec5SDimitry Andric     for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
11550b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
11560b57cec5SDimitry Andric       numPerLevel[i - 1] *= 2;
11570b57cec5SDimitry Andric       old_sz *= 2;
11580b57cec5SDimitry Andric       depth++;
11590b57cec5SDimitry Andric     }
11600b57cec5SDimitry Andric     if (nproc > old_sz) { // Not enough space, need to expand hierarchy
11610b57cec5SDimitry Andric       while (nproc > old_sz) {
11620b57cec5SDimitry Andric         old_sz *= 2;
11630b57cec5SDimitry Andric         incs++;
11640b57cec5SDimitry Andric         depth++;
11650b57cec5SDimitry Andric       }
11660b57cec5SDimitry Andric       maxLevels += incs;
11670b57cec5SDimitry Andric 
11680b57cec5SDimitry Andric       // Resize arrays
11690b57cec5SDimitry Andric       kmp_uint32 *old_numPerLevel = numPerLevel;
11700b57cec5SDimitry Andric       kmp_uint32 *old_skipPerLevel = skipPerLevel;
11710b57cec5SDimitry Andric       numPerLevel = skipPerLevel = NULL;
11720b57cec5SDimitry Andric       numPerLevel =
11730b57cec5SDimitry Andric           (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
11740b57cec5SDimitry Andric       skipPerLevel = &(numPerLevel[maxLevels]);
11750b57cec5SDimitry Andric 
11760b57cec5SDimitry Andric       // Copy old elements from old arrays
1177e8d8bef9SDimitry Andric       for (kmp_uint32 i = 0; i < old_maxLevels; ++i) {
1178e8d8bef9SDimitry Andric         // init numPerLevel[*] to 1 item per level
11790b57cec5SDimitry Andric         numPerLevel[i] = old_numPerLevel[i];
11800b57cec5SDimitry Andric         skipPerLevel[i] = old_skipPerLevel[i];
11810b57cec5SDimitry Andric       }
11820b57cec5SDimitry Andric 
11830b57cec5SDimitry Andric       // Init new elements in arrays to 1
1184e8d8bef9SDimitry Andric       for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) {
1185e8d8bef9SDimitry Andric         // init numPerLevel[*] to 1 item per level
11860b57cec5SDimitry Andric         numPerLevel[i] = 1;
11870b57cec5SDimitry Andric         skipPerLevel[i] = 1;
11880b57cec5SDimitry Andric       }
11890b57cec5SDimitry Andric 
11900b57cec5SDimitry Andric       // Free old arrays
11910b57cec5SDimitry Andric       __kmp_free(old_numPerLevel);
11920b57cec5SDimitry Andric     }
11930b57cec5SDimitry Andric 
11940b57cec5SDimitry Andric     // Fill in oversubscription levels of hierarchy
11950b57cec5SDimitry Andric     for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
11960b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
11970b57cec5SDimitry Andric 
11980b57cec5SDimitry Andric     base_num_threads = nproc;
11990b57cec5SDimitry Andric     resizing = 0; // One writer
12000b57cec5SDimitry Andric   }
12010b57cec5SDimitry Andric };
12020b57cec5SDimitry Andric #endif // KMP_AFFINITY_H
1203