xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_affinity.h (revision e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
10b57cec5SDimitry Andric /*
20b57cec5SDimitry Andric  * kmp_affinity.h -- header for affinity management
30b57cec5SDimitry Andric  */
40b57cec5SDimitry Andric 
50b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #ifndef KMP_AFFINITY_H
140b57cec5SDimitry Andric #define KMP_AFFINITY_H
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include "kmp.h"
170b57cec5SDimitry Andric #include "kmp_os.h"
180b57cec5SDimitry Andric 
190b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
200b57cec5SDimitry Andric #if KMP_USE_HWLOC
210b57cec5SDimitry Andric class KMPHwlocAffinity : public KMPAffinity {
220b57cec5SDimitry Andric public:
230b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
240b57cec5SDimitry Andric     hwloc_cpuset_t mask;
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric   public:
270b57cec5SDimitry Andric     Mask() {
280b57cec5SDimitry Andric       mask = hwloc_bitmap_alloc();
290b57cec5SDimitry Andric       this->zero();
300b57cec5SDimitry Andric     }
310b57cec5SDimitry Andric     ~Mask() { hwloc_bitmap_free(mask); }
320b57cec5SDimitry Andric     void set(int i) override { hwloc_bitmap_set(mask, i); }
330b57cec5SDimitry Andric     bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
340b57cec5SDimitry Andric     void clear(int i) override { hwloc_bitmap_clr(mask, i); }
350b57cec5SDimitry Andric     void zero() override { hwloc_bitmap_zero(mask); }
360b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
370b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
380b57cec5SDimitry Andric       hwloc_bitmap_copy(mask, convert->mask);
390b57cec5SDimitry Andric     }
400b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
410b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
420b57cec5SDimitry Andric       hwloc_bitmap_and(mask, mask, convert->mask);
430b57cec5SDimitry Andric     }
440b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
450b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
460b57cec5SDimitry Andric       hwloc_bitmap_or(mask, mask, convert->mask);
470b57cec5SDimitry Andric     }
480b57cec5SDimitry Andric     void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
490b57cec5SDimitry Andric     int begin() const override { return hwloc_bitmap_first(mask); }
500b57cec5SDimitry Andric     int end() const override { return -1; }
510b57cec5SDimitry Andric     int next(int previous) const override {
520b57cec5SDimitry Andric       return hwloc_bitmap_next(mask, previous);
530b57cec5SDimitry Andric     }
540b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
550b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
560b57cec5SDimitry Andric                   "Illegal get affinity operation when not capable");
57*e8d8bef9SDimitry Andric       long retval =
580b57cec5SDimitry Andric           hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
590b57cec5SDimitry Andric       if (retval >= 0) {
600b57cec5SDimitry Andric         return 0;
610b57cec5SDimitry Andric       }
620b57cec5SDimitry Andric       int error = errno;
630b57cec5SDimitry Andric       if (abort_on_error) {
640b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
650b57cec5SDimitry Andric       }
660b57cec5SDimitry Andric       return error;
670b57cec5SDimitry Andric     }
680b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
690b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
70*e8d8bef9SDimitry Andric                   "Illegal set affinity operation when not capable");
71*e8d8bef9SDimitry Andric       long retval =
720b57cec5SDimitry Andric           hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
730b57cec5SDimitry Andric       if (retval >= 0) {
740b57cec5SDimitry Andric         return 0;
750b57cec5SDimitry Andric       }
760b57cec5SDimitry Andric       int error = errno;
770b57cec5SDimitry Andric       if (abort_on_error) {
780b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
790b57cec5SDimitry Andric       }
800b57cec5SDimitry Andric       return error;
810b57cec5SDimitry Andric     }
82*e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS
83*e8d8bef9SDimitry Andric     int set_process_affinity(bool abort_on_error) const override {
84*e8d8bef9SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
85*e8d8bef9SDimitry Andric                   "Illegal set process affinity operation when not capable");
86*e8d8bef9SDimitry Andric       int error = 0;
87*e8d8bef9SDimitry Andric       const hwloc_topology_support *support =
88*e8d8bef9SDimitry Andric           hwloc_topology_get_support(__kmp_hwloc_topology);
89*e8d8bef9SDimitry Andric       if (support->cpubind->set_proc_cpubind) {
90*e8d8bef9SDimitry Andric         int retval;
91*e8d8bef9SDimitry Andric         retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask,
92*e8d8bef9SDimitry Andric                                    HWLOC_CPUBIND_PROCESS);
93*e8d8bef9SDimitry Andric         if (retval >= 0)
94*e8d8bef9SDimitry Andric           return 0;
95*e8d8bef9SDimitry Andric         error = errno;
96*e8d8bef9SDimitry Andric         if (abort_on_error)
97*e8d8bef9SDimitry Andric           __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
98*e8d8bef9SDimitry Andric       }
99*e8d8bef9SDimitry Andric       return error;
100*e8d8bef9SDimitry Andric     }
101*e8d8bef9SDimitry Andric #endif
1020b57cec5SDimitry Andric     int get_proc_group() const override {
1030b57cec5SDimitry Andric       int group = -1;
1040b57cec5SDimitry Andric #if KMP_OS_WINDOWS
1050b57cec5SDimitry Andric       if (__kmp_num_proc_groups == 1) {
1060b57cec5SDimitry Andric         return 1;
1070b57cec5SDimitry Andric       }
1080b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; i++) {
1090b57cec5SDimitry Andric         // On windows, the long type is always 32 bits
1100b57cec5SDimitry Andric         unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
1110b57cec5SDimitry Andric         unsigned long second_32_bits =
1120b57cec5SDimitry Andric             hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
1130b57cec5SDimitry Andric         if (first_32_bits == 0 && second_32_bits == 0) {
1140b57cec5SDimitry Andric           continue;
1150b57cec5SDimitry Andric         }
1160b57cec5SDimitry Andric         if (group >= 0) {
1170b57cec5SDimitry Andric           return -1;
1180b57cec5SDimitry Andric         }
1190b57cec5SDimitry Andric         group = i;
1200b57cec5SDimitry Andric       }
1210b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
1220b57cec5SDimitry Andric       return group;
1230b57cec5SDimitry Andric     }
1240b57cec5SDimitry Andric   };
1250b57cec5SDimitry Andric   void determine_capable(const char *var) override {
1260b57cec5SDimitry Andric     const hwloc_topology_support *topology_support;
1270b57cec5SDimitry Andric     if (__kmp_hwloc_topology == NULL) {
1280b57cec5SDimitry Andric       if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
1290b57cec5SDimitry Andric         __kmp_hwloc_error = TRUE;
1300b57cec5SDimitry Andric         if (__kmp_affinity_verbose)
1310b57cec5SDimitry Andric           KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
1320b57cec5SDimitry Andric       }
1330b57cec5SDimitry Andric       if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
1340b57cec5SDimitry Andric         __kmp_hwloc_error = TRUE;
1350b57cec5SDimitry Andric         if (__kmp_affinity_verbose)
1360b57cec5SDimitry Andric           KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
1370b57cec5SDimitry Andric       }
1380b57cec5SDimitry Andric     }
1390b57cec5SDimitry Andric     topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
1400b57cec5SDimitry Andric     // Is the system capable of setting/getting this thread's affinity?
1410b57cec5SDimitry Andric     // Also, is topology discovery possible? (pu indicates ability to discover
1420b57cec5SDimitry Andric     // processing units). And finally, were there no errors when calling any
1430b57cec5SDimitry Andric     // hwloc_* API functions?
1440b57cec5SDimitry Andric     if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
1450b57cec5SDimitry Andric         topology_support->cpubind->get_thisthread_cpubind &&
1460b57cec5SDimitry Andric         topology_support->discovery->pu && !__kmp_hwloc_error) {
1470b57cec5SDimitry Andric       // enables affinity according to KMP_AFFINITY_CAPABLE() macro
1480b57cec5SDimitry Andric       KMP_AFFINITY_ENABLE(TRUE);
1490b57cec5SDimitry Andric     } else {
1500b57cec5SDimitry Andric       // indicate that hwloc didn't work and disable affinity
1510b57cec5SDimitry Andric       __kmp_hwloc_error = TRUE;
1520b57cec5SDimitry Andric       KMP_AFFINITY_DISABLE();
1530b57cec5SDimitry Andric     }
1540b57cec5SDimitry Andric   }
1550b57cec5SDimitry Andric   void bind_thread(int which) override {
1560b57cec5SDimitry Andric     KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
1570b57cec5SDimitry Andric                 "Illegal set affinity operation when not capable");
1580b57cec5SDimitry Andric     KMPAffinity::Mask *mask;
1590b57cec5SDimitry Andric     KMP_CPU_ALLOC_ON_STACK(mask);
1600b57cec5SDimitry Andric     KMP_CPU_ZERO(mask);
1610b57cec5SDimitry Andric     KMP_CPU_SET(which, mask);
1620b57cec5SDimitry Andric     __kmp_set_system_affinity(mask, TRUE);
1630b57cec5SDimitry Andric     KMP_CPU_FREE_FROM_STACK(mask);
1640b57cec5SDimitry Andric   }
1650b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
1660b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
1670b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
1680b57cec5SDimitry Andric     return new Mask[num];
1690b57cec5SDimitry Andric   }
1700b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
1710b57cec5SDimitry Andric     Mask *hwloc_array = static_cast<Mask *>(array);
1720b57cec5SDimitry Andric     delete[] hwloc_array;
1730b57cec5SDimitry Andric   }
1740b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
1750b57cec5SDimitry Andric                                       int index) override {
1760b57cec5SDimitry Andric     Mask *hwloc_array = static_cast<Mask *>(array);
1770b57cec5SDimitry Andric     return &(hwloc_array[index]);
1780b57cec5SDimitry Andric   }
1790b57cec5SDimitry Andric   api_type get_api_type() const override { return HWLOC; }
1800b57cec5SDimitry Andric };
1810b57cec5SDimitry Andric #endif /* KMP_USE_HWLOC */
1820b57cec5SDimitry Andric 
183489b1cf2SDimitry Andric #if KMP_OS_LINUX || KMP_OS_FREEBSD
1840b57cec5SDimitry Andric #if KMP_OS_LINUX
1850b57cec5SDimitry Andric /* On some of the older OS's that we build on, these constants aren't present
1860b57cec5SDimitry Andric    in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
1870b57cec5SDimitry Andric    all systems of the same arch where they are defined, and they cannot change.
1880b57cec5SDimitry Andric    stone forever. */
1890b57cec5SDimitry Andric #include <sys/syscall.h>
1900b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_ARM
1910b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
1920b57cec5SDimitry Andric #define __NR_sched_setaffinity 241
1930b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 241
1940b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
1950b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
1960b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
1970b57cec5SDimitry Andric #define __NR_sched_getaffinity 242
1980b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 242
1990b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2000b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2010b57cec5SDimitry Andric #elif KMP_ARCH_AARCH64
2020b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2030b57cec5SDimitry Andric #define __NR_sched_setaffinity 122
2040b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 122
2050b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2060b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2070b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2080b57cec5SDimitry Andric #define __NR_sched_getaffinity 123
2090b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 123
2100b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2110b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2120b57cec5SDimitry Andric #elif KMP_ARCH_X86_64
2130b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2140b57cec5SDimitry Andric #define __NR_sched_setaffinity 203
2150b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 203
2160b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2170b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2180b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2190b57cec5SDimitry Andric #define __NR_sched_getaffinity 204
2200b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 204
2210b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2220b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2230b57cec5SDimitry Andric #elif KMP_ARCH_PPC64
2240b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2250b57cec5SDimitry Andric #define __NR_sched_setaffinity 222
2260b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 222
2270b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2280b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2290b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2300b57cec5SDimitry Andric #define __NR_sched_getaffinity 223
2310b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 223
2320b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2330b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2340b57cec5SDimitry Andric #elif KMP_ARCH_MIPS
2350b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2360b57cec5SDimitry Andric #define __NR_sched_setaffinity 4239
2370b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 4239
2380b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2390b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2400b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2410b57cec5SDimitry Andric #define __NR_sched_getaffinity 4240
2420b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 4240
2430b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2440b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2450b57cec5SDimitry Andric #elif KMP_ARCH_MIPS64
2460b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
2470b57cec5SDimitry Andric #define __NR_sched_setaffinity 5195
2480b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 5195
2490b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
2500b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
2510b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
2520b57cec5SDimitry Andric #define __NR_sched_getaffinity 5196
2530b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 5196
2540b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
2550b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
2560b57cec5SDimitry Andric #error Unknown or unsupported architecture
2570b57cec5SDimitry Andric #endif /* KMP_ARCH_* */
258489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD
259489b1cf2SDimitry Andric #include <pthread.h>
260489b1cf2SDimitry Andric #include <pthread_np.h>
261489b1cf2SDimitry Andric #endif
2620b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity {
2630b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
264*e8d8bef9SDimitry Andric     typedef unsigned long mask_t;
265*e8d8bef9SDimitry Andric     typedef decltype(__kmp_affin_mask_size) mask_size_type;
266*e8d8bef9SDimitry Andric     static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
267*e8d8bef9SDimitry Andric     static const mask_t ONE = 1;
268*e8d8bef9SDimitry Andric     mask_size_type get_num_mask_types() const {
269*e8d8bef9SDimitry Andric       return __kmp_affin_mask_size / sizeof(mask_t);
270*e8d8bef9SDimitry Andric     }
2710b57cec5SDimitry Andric 
2720b57cec5SDimitry Andric   public:
2730b57cec5SDimitry Andric     mask_t *mask;
2740b57cec5SDimitry Andric     Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
2750b57cec5SDimitry Andric     ~Mask() {
2760b57cec5SDimitry Andric       if (mask)
2770b57cec5SDimitry Andric         __kmp_free(mask);
2780b57cec5SDimitry Andric     }
2790b57cec5SDimitry Andric     void set(int i) override {
280*e8d8bef9SDimitry Andric       mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T));
2810b57cec5SDimitry Andric     }
2820b57cec5SDimitry Andric     bool is_set(int i) const override {
283*e8d8bef9SDimitry Andric       return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T)));
2840b57cec5SDimitry Andric     }
2850b57cec5SDimitry Andric     void clear(int i) override {
286*e8d8bef9SDimitry Andric       mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T));
2870b57cec5SDimitry Andric     }
2880b57cec5SDimitry Andric     void zero() override {
289*e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
290*e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
291*e8d8bef9SDimitry Andric         mask[i] = (mask_t)0;
2920b57cec5SDimitry Andric     }
2930b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
2940b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
295*e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
296*e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
2970b57cec5SDimitry Andric         mask[i] = convert->mask[i];
2980b57cec5SDimitry Andric     }
2990b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
3000b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
301*e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
302*e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
3030b57cec5SDimitry Andric         mask[i] &= convert->mask[i];
3040b57cec5SDimitry Andric     }
3050b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
3060b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
307*e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
308*e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
3090b57cec5SDimitry Andric         mask[i] |= convert->mask[i];
3100b57cec5SDimitry Andric     }
3110b57cec5SDimitry Andric     void bitwise_not() override {
312*e8d8bef9SDimitry Andric       mask_size_type e = get_num_mask_types();
313*e8d8bef9SDimitry Andric       for (mask_size_type i = 0; i < e; ++i)
3140b57cec5SDimitry Andric         mask[i] = ~(mask[i]);
3150b57cec5SDimitry Andric     }
3160b57cec5SDimitry Andric     int begin() const override {
3170b57cec5SDimitry Andric       int retval = 0;
3180b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
3190b57cec5SDimitry Andric         ++retval;
3200b57cec5SDimitry Andric       return retval;
3210b57cec5SDimitry Andric     }
322*e8d8bef9SDimitry Andric     int end() const override {
323*e8d8bef9SDimitry Andric       int e;
324*e8d8bef9SDimitry Andric       __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e);
325*e8d8bef9SDimitry Andric       return e;
326*e8d8bef9SDimitry Andric     }
3270b57cec5SDimitry Andric     int next(int previous) const override {
3280b57cec5SDimitry Andric       int retval = previous + 1;
3290b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
3300b57cec5SDimitry Andric         ++retval;
3310b57cec5SDimitry Andric       return retval;
3320b57cec5SDimitry Andric     }
3330b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
3340b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
3350b57cec5SDimitry Andric                   "Illegal get affinity operation when not capable");
336489b1cf2SDimitry Andric #if KMP_OS_LINUX
337*e8d8bef9SDimitry Andric       long retval =
3380b57cec5SDimitry Andric           syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
339489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD
3405ffd83dbSDimitry Andric       int r =
341489b1cf2SDimitry Andric           pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
3425ffd83dbSDimitry Andric       int retval = (r == 0 ? 0 : -1);
343489b1cf2SDimitry Andric #endif
3440b57cec5SDimitry Andric       if (retval >= 0) {
3450b57cec5SDimitry Andric         return 0;
3460b57cec5SDimitry Andric       }
3470b57cec5SDimitry Andric       int error = errno;
3480b57cec5SDimitry Andric       if (abort_on_error) {
3490b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
3500b57cec5SDimitry Andric       }
3510b57cec5SDimitry Andric       return error;
3520b57cec5SDimitry Andric     }
3530b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
3540b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
355*e8d8bef9SDimitry Andric                   "Illegal set affinity operation when not capable");
356489b1cf2SDimitry Andric #if KMP_OS_LINUX
357*e8d8bef9SDimitry Andric       long retval =
3580b57cec5SDimitry Andric           syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
359489b1cf2SDimitry Andric #elif KMP_OS_FREEBSD
3605ffd83dbSDimitry Andric       int r =
361489b1cf2SDimitry Andric           pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
3625ffd83dbSDimitry Andric       int retval = (r == 0 ? 0 : -1);
363489b1cf2SDimitry Andric #endif
3640b57cec5SDimitry Andric       if (retval >= 0) {
3650b57cec5SDimitry Andric         return 0;
3660b57cec5SDimitry Andric       }
3670b57cec5SDimitry Andric       int error = errno;
3680b57cec5SDimitry Andric       if (abort_on_error) {
3690b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
3700b57cec5SDimitry Andric       }
3710b57cec5SDimitry Andric       return error;
3720b57cec5SDimitry Andric     }
3730b57cec5SDimitry Andric   };
3740b57cec5SDimitry Andric   void determine_capable(const char *env_var) override {
3750b57cec5SDimitry Andric     __kmp_affinity_determine_capable(env_var);
3760b57cec5SDimitry Andric   }
3770b57cec5SDimitry Andric   void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
3780b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override {
3790b57cec5SDimitry Andric     KMPNativeAffinity::Mask *retval = new Mask();
3800b57cec5SDimitry Andric     return retval;
3810b57cec5SDimitry Andric   }
3820b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override {
3830b57cec5SDimitry Andric     KMPNativeAffinity::Mask *native_mask =
3840b57cec5SDimitry Andric         static_cast<KMPNativeAffinity::Mask *>(m);
3850b57cec5SDimitry Andric     delete native_mask;
3860b57cec5SDimitry Andric   }
3870b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
3880b57cec5SDimitry Andric     return new Mask[num];
3890b57cec5SDimitry Andric   }
3900b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
3910b57cec5SDimitry Andric     Mask *linux_array = static_cast<Mask *>(array);
3920b57cec5SDimitry Andric     delete[] linux_array;
3930b57cec5SDimitry Andric   }
3940b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
3950b57cec5SDimitry Andric                                       int index) override {
3960b57cec5SDimitry Andric     Mask *linux_array = static_cast<Mask *>(array);
3970b57cec5SDimitry Andric     return &(linux_array[index]);
3980b57cec5SDimitry Andric   }
3990b57cec5SDimitry Andric   api_type get_api_type() const override { return NATIVE_OS; }
4000b57cec5SDimitry Andric };
401489b1cf2SDimitry Andric #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
4020b57cec5SDimitry Andric 
4030b57cec5SDimitry Andric #if KMP_OS_WINDOWS
4040b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity {
4050b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
4060b57cec5SDimitry Andric     typedef ULONG_PTR mask_t;
4070b57cec5SDimitry Andric     static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
4080b57cec5SDimitry Andric     mask_t *mask;
4090b57cec5SDimitry Andric 
4100b57cec5SDimitry Andric   public:
4110b57cec5SDimitry Andric     Mask() {
4120b57cec5SDimitry Andric       mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
4130b57cec5SDimitry Andric     }
4140b57cec5SDimitry Andric     ~Mask() {
4150b57cec5SDimitry Andric       if (mask)
4160b57cec5SDimitry Andric         __kmp_free(mask);
4170b57cec5SDimitry Andric     }
4180b57cec5SDimitry Andric     void set(int i) override {
4190b57cec5SDimitry Andric       mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
4200b57cec5SDimitry Andric     }
4210b57cec5SDimitry Andric     bool is_set(int i) const override {
4220b57cec5SDimitry Andric       return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
4230b57cec5SDimitry Andric     }
4240b57cec5SDimitry Andric     void clear(int i) override {
4250b57cec5SDimitry Andric       mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
4260b57cec5SDimitry Andric     }
4270b57cec5SDimitry Andric     void zero() override {
4280b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4290b57cec5SDimitry Andric         mask[i] = 0;
4300b57cec5SDimitry Andric     }
4310b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
4320b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
4330b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4340b57cec5SDimitry Andric         mask[i] = convert->mask[i];
4350b57cec5SDimitry Andric     }
4360b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
4370b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
4380b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4390b57cec5SDimitry Andric         mask[i] &= convert->mask[i];
4400b57cec5SDimitry Andric     }
4410b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
4420b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
4430b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4440b57cec5SDimitry Andric         mask[i] |= convert->mask[i];
4450b57cec5SDimitry Andric     }
4460b57cec5SDimitry Andric     void bitwise_not() override {
4470b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
4480b57cec5SDimitry Andric         mask[i] = ~(mask[i]);
4490b57cec5SDimitry Andric     }
4500b57cec5SDimitry Andric     int begin() const override {
4510b57cec5SDimitry Andric       int retval = 0;
4520b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
4530b57cec5SDimitry Andric         ++retval;
4540b57cec5SDimitry Andric       return retval;
4550b57cec5SDimitry Andric     }
4560b57cec5SDimitry Andric     int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
4570b57cec5SDimitry Andric     int next(int previous) const override {
4580b57cec5SDimitry Andric       int retval = previous + 1;
4590b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
4600b57cec5SDimitry Andric         ++retval;
4610b57cec5SDimitry Andric       return retval;
4620b57cec5SDimitry Andric     }
463*e8d8bef9SDimitry Andric     int set_process_affinity(bool abort_on_error) const override {
464*e8d8bef9SDimitry Andric       if (__kmp_num_proc_groups <= 1) {
465*e8d8bef9SDimitry Andric         if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) {
466*e8d8bef9SDimitry Andric           DWORD error = GetLastError();
467*e8d8bef9SDimitry Andric           if (abort_on_error) {
468*e8d8bef9SDimitry Andric             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
469*e8d8bef9SDimitry Andric                         __kmp_msg_null);
470*e8d8bef9SDimitry Andric           }
471*e8d8bef9SDimitry Andric           return error;
472*e8d8bef9SDimitry Andric         }
473*e8d8bef9SDimitry Andric       }
474*e8d8bef9SDimitry Andric       return 0;
475*e8d8bef9SDimitry Andric     }
4760b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
4770b57cec5SDimitry Andric       if (__kmp_num_proc_groups > 1) {
4780b57cec5SDimitry Andric         // Check for a valid mask.
4790b57cec5SDimitry Andric         GROUP_AFFINITY ga;
4800b57cec5SDimitry Andric         int group = get_proc_group();
4810b57cec5SDimitry Andric         if (group < 0) {
4820b57cec5SDimitry Andric           if (abort_on_error) {
4830b57cec5SDimitry Andric             KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4840b57cec5SDimitry Andric           }
4850b57cec5SDimitry Andric           return -1;
4860b57cec5SDimitry Andric         }
4870b57cec5SDimitry Andric         // Transform the bit vector into a GROUP_AFFINITY struct
4880b57cec5SDimitry Andric         // and make the system call to set affinity.
4890b57cec5SDimitry Andric         ga.Group = group;
4900b57cec5SDimitry Andric         ga.Mask = mask[group];
4910b57cec5SDimitry Andric         ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
4920b57cec5SDimitry Andric 
4930b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
4940b57cec5SDimitry Andric         if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
4950b57cec5SDimitry Andric           DWORD error = GetLastError();
4960b57cec5SDimitry Andric           if (abort_on_error) {
4970b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
4980b57cec5SDimitry Andric                         __kmp_msg_null);
4990b57cec5SDimitry Andric           }
5000b57cec5SDimitry Andric           return error;
5010b57cec5SDimitry Andric         }
5020b57cec5SDimitry Andric       } else {
5030b57cec5SDimitry Andric         if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
5040b57cec5SDimitry Andric           DWORD error = GetLastError();
5050b57cec5SDimitry Andric           if (abort_on_error) {
5060b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
5070b57cec5SDimitry Andric                         __kmp_msg_null);
5080b57cec5SDimitry Andric           }
5090b57cec5SDimitry Andric           return error;
5100b57cec5SDimitry Andric         }
5110b57cec5SDimitry Andric       }
5120b57cec5SDimitry Andric       return 0;
5130b57cec5SDimitry Andric     }
5140b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
5150b57cec5SDimitry Andric       if (__kmp_num_proc_groups > 1) {
5160b57cec5SDimitry Andric         this->zero();
5170b57cec5SDimitry Andric         GROUP_AFFINITY ga;
5180b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
5190b57cec5SDimitry Andric         if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
5200b57cec5SDimitry Andric           DWORD error = GetLastError();
5210b57cec5SDimitry Andric           if (abort_on_error) {
5220b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
5230b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
5240b57cec5SDimitry Andric           }
5250b57cec5SDimitry Andric           return error;
5260b57cec5SDimitry Andric         }
5270b57cec5SDimitry Andric         if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
5280b57cec5SDimitry Andric             (ga.Mask == 0)) {
5290b57cec5SDimitry Andric           return -1;
5300b57cec5SDimitry Andric         }
5310b57cec5SDimitry Andric         mask[ga.Group] = ga.Mask;
5320b57cec5SDimitry Andric       } else {
5330b57cec5SDimitry Andric         mask_t newMask, sysMask, retval;
5340b57cec5SDimitry Andric         if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
5350b57cec5SDimitry Andric           DWORD error = GetLastError();
5360b57cec5SDimitry Andric           if (abort_on_error) {
5370b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
5380b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
5390b57cec5SDimitry Andric           }
5400b57cec5SDimitry Andric           return error;
5410b57cec5SDimitry Andric         }
5420b57cec5SDimitry Andric         retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
5430b57cec5SDimitry Andric         if (!retval) {
5440b57cec5SDimitry Andric           DWORD error = GetLastError();
5450b57cec5SDimitry Andric           if (abort_on_error) {
5460b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
5470b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
5480b57cec5SDimitry Andric           }
5490b57cec5SDimitry Andric           return error;
5500b57cec5SDimitry Andric         }
5510b57cec5SDimitry Andric         newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
5520b57cec5SDimitry Andric         if (!newMask) {
5530b57cec5SDimitry Andric           DWORD error = GetLastError();
5540b57cec5SDimitry Andric           if (abort_on_error) {
5550b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
5560b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
5570b57cec5SDimitry Andric           }
5580b57cec5SDimitry Andric         }
5590b57cec5SDimitry Andric         *mask = retval;
5600b57cec5SDimitry Andric       }
5610b57cec5SDimitry Andric       return 0;
5620b57cec5SDimitry Andric     }
5630b57cec5SDimitry Andric     int get_proc_group() const override {
5640b57cec5SDimitry Andric       int group = -1;
5650b57cec5SDimitry Andric       if (__kmp_num_proc_groups == 1) {
5660b57cec5SDimitry Andric         return 1;
5670b57cec5SDimitry Andric       }
5680b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; i++) {
5690b57cec5SDimitry Andric         if (mask[i] == 0)
5700b57cec5SDimitry Andric           continue;
5710b57cec5SDimitry Andric         if (group >= 0)
5720b57cec5SDimitry Andric           return -1;
5730b57cec5SDimitry Andric         group = i;
5740b57cec5SDimitry Andric       }
5750b57cec5SDimitry Andric       return group;
5760b57cec5SDimitry Andric     }
5770b57cec5SDimitry Andric   };
5780b57cec5SDimitry Andric   void determine_capable(const char *env_var) override {
5790b57cec5SDimitry Andric     __kmp_affinity_determine_capable(env_var);
5800b57cec5SDimitry Andric   }
5810b57cec5SDimitry Andric   void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
5820b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
5830b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
5840b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
5850b57cec5SDimitry Andric     return new Mask[num];
5860b57cec5SDimitry Andric   }
5870b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
5880b57cec5SDimitry Andric     Mask *windows_array = static_cast<Mask *>(array);
5890b57cec5SDimitry Andric     delete[] windows_array;
5900b57cec5SDimitry Andric   }
5910b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
5920b57cec5SDimitry Andric                                       int index) override {
5930b57cec5SDimitry Andric     Mask *windows_array = static_cast<Mask *>(array);
5940b57cec5SDimitry Andric     return &(windows_array[index]);
5950b57cec5SDimitry Andric   }
5960b57cec5SDimitry Andric   api_type get_api_type() const override { return NATIVE_OS; }
5970b57cec5SDimitry Andric };
5980b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
5990b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */
6000b57cec5SDimitry Andric 
6010b57cec5SDimitry Andric class Address {
6020b57cec5SDimitry Andric public:
6030b57cec5SDimitry Andric   static const unsigned maxDepth = 32;
6040b57cec5SDimitry Andric   unsigned labels[maxDepth];
6050b57cec5SDimitry Andric   unsigned childNums[maxDepth];
6060b57cec5SDimitry Andric   unsigned depth;
6070b57cec5SDimitry Andric   unsigned leader;
6080b57cec5SDimitry Andric   Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
6090b57cec5SDimitry Andric   Address &operator=(const Address &b) {
6100b57cec5SDimitry Andric     depth = b.depth;
6110b57cec5SDimitry Andric     for (unsigned i = 0; i < depth; i++) {
6120b57cec5SDimitry Andric       labels[i] = b.labels[i];
6130b57cec5SDimitry Andric       childNums[i] = b.childNums[i];
6140b57cec5SDimitry Andric     }
6150b57cec5SDimitry Andric     leader = FALSE;
6160b57cec5SDimitry Andric     return *this;
6170b57cec5SDimitry Andric   }
6180b57cec5SDimitry Andric   bool operator==(const Address &b) const {
6190b57cec5SDimitry Andric     if (depth != b.depth)
6200b57cec5SDimitry Andric       return false;
6210b57cec5SDimitry Andric     for (unsigned i = 0; i < depth; i++)
6220b57cec5SDimitry Andric       if (labels[i] != b.labels[i])
6230b57cec5SDimitry Andric         return false;
6240b57cec5SDimitry Andric     return true;
6250b57cec5SDimitry Andric   }
6260b57cec5SDimitry Andric   bool isClose(const Address &b, int level) const {
6270b57cec5SDimitry Andric     if (depth != b.depth)
6280b57cec5SDimitry Andric       return false;
6290b57cec5SDimitry Andric     if ((unsigned)level >= depth)
6300b57cec5SDimitry Andric       return true;
6310b57cec5SDimitry Andric     for (unsigned i = 0; i < (depth - level); i++)
6320b57cec5SDimitry Andric       if (labels[i] != b.labels[i])
6330b57cec5SDimitry Andric         return false;
6340b57cec5SDimitry Andric     return true;
6350b57cec5SDimitry Andric   }
6360b57cec5SDimitry Andric   bool operator!=(const Address &b) const { return !operator==(b); }
6370b57cec5SDimitry Andric   void print() const {
6380b57cec5SDimitry Andric     unsigned i;
6390b57cec5SDimitry Andric     printf("Depth: %u --- ", depth);
6400b57cec5SDimitry Andric     for (i = 0; i < depth; i++) {
6410b57cec5SDimitry Andric       printf("%u ", labels[i]);
6420b57cec5SDimitry Andric     }
6430b57cec5SDimitry Andric   }
6440b57cec5SDimitry Andric };
6450b57cec5SDimitry Andric 
6460b57cec5SDimitry Andric class AddrUnsPair {
6470b57cec5SDimitry Andric public:
6480b57cec5SDimitry Andric   Address first;
6490b57cec5SDimitry Andric   unsigned second;
6500b57cec5SDimitry Andric   AddrUnsPair(Address _first, unsigned _second)
6510b57cec5SDimitry Andric       : first(_first), second(_second) {}
6520b57cec5SDimitry Andric   AddrUnsPair &operator=(const AddrUnsPair &b) {
6530b57cec5SDimitry Andric     first = b.first;
6540b57cec5SDimitry Andric     second = b.second;
6550b57cec5SDimitry Andric     return *this;
6560b57cec5SDimitry Andric   }
6570b57cec5SDimitry Andric   void print() const {
6580b57cec5SDimitry Andric     printf("first = ");
6590b57cec5SDimitry Andric     first.print();
6600b57cec5SDimitry Andric     printf(" --- second = %u", second);
6610b57cec5SDimitry Andric   }
6620b57cec5SDimitry Andric   bool operator==(const AddrUnsPair &b) const {
6630b57cec5SDimitry Andric     if (first != b.first)
6640b57cec5SDimitry Andric       return false;
6650b57cec5SDimitry Andric     if (second != b.second)
6660b57cec5SDimitry Andric       return false;
6670b57cec5SDimitry Andric     return true;
6680b57cec5SDimitry Andric   }
6690b57cec5SDimitry Andric   bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
6700b57cec5SDimitry Andric };
6710b57cec5SDimitry Andric 
6720b57cec5SDimitry Andric static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
6730b57cec5SDimitry Andric   const Address *aa = &(((const AddrUnsPair *)a)->first);
6740b57cec5SDimitry Andric   const Address *bb = &(((const AddrUnsPair *)b)->first);
6750b57cec5SDimitry Andric   unsigned depth = aa->depth;
6760b57cec5SDimitry Andric   unsigned i;
6770b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(depth == bb->depth);
6780b57cec5SDimitry Andric   for (i = 0; i < depth; i++) {
6790b57cec5SDimitry Andric     if (aa->labels[i] < bb->labels[i])
6800b57cec5SDimitry Andric       return -1;
6810b57cec5SDimitry Andric     if (aa->labels[i] > bb->labels[i])
6820b57cec5SDimitry Andric       return 1;
6830b57cec5SDimitry Andric   }
6840b57cec5SDimitry Andric   return 0;
6850b57cec5SDimitry Andric }
6860b57cec5SDimitry Andric 
6870b57cec5SDimitry Andric /* A structure for holding machine-specific hierarchy info to be computed once
6880b57cec5SDimitry Andric    at init. This structure represents a mapping of threads to the actual machine
6890b57cec5SDimitry Andric    hierarchy, or to our best guess at what the hierarchy might be, for the
6900b57cec5SDimitry Andric    purpose of performing an efficient barrier. In the worst case, when there is
6910b57cec5SDimitry Andric    no machine hierarchy information, it produces a tree suitable for a barrier,
6920b57cec5SDimitry Andric    similar to the tree used in the hyper barrier. */
6930b57cec5SDimitry Andric class hierarchy_info {
6940b57cec5SDimitry Andric public:
6950b57cec5SDimitry Andric   /* Good default values for number of leaves and branching factor, given no
6960b57cec5SDimitry Andric      affinity information. Behaves a bit like hyper barrier. */
6970b57cec5SDimitry Andric   static const kmp_uint32 maxLeaves = 4;
6980b57cec5SDimitry Andric   static const kmp_uint32 minBranch = 4;
6990b57cec5SDimitry Andric   /** Number of levels in the hierarchy. Typical levels are threads/core,
7000b57cec5SDimitry Andric       cores/package or socket, packages/node, nodes/machine, etc. We don't want
7010b57cec5SDimitry Andric       to get specific with nomenclature. When the machine is oversubscribed we
7020b57cec5SDimitry Andric       add levels to duplicate the hierarchy, doubling the thread capacity of the
7030b57cec5SDimitry Andric       hierarchy each time we add a level. */
7040b57cec5SDimitry Andric   kmp_uint32 maxLevels;
7050b57cec5SDimitry Andric 
7060b57cec5SDimitry Andric   /** This is specifically the depth of the machine configuration hierarchy, in
7070b57cec5SDimitry Andric       terms of the number of levels along the longest path from root to any
7080b57cec5SDimitry Andric       leaf. It corresponds to the number of entries in numPerLevel if we exclude
7090b57cec5SDimitry Andric       all but one trailing 1. */
7100b57cec5SDimitry Andric   kmp_uint32 depth;
7110b57cec5SDimitry Andric   kmp_uint32 base_num_threads;
7120b57cec5SDimitry Andric   enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
7130b57cec5SDimitry Andric   volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
7140b57cec5SDimitry Andric   // 2=initialization in progress
7150b57cec5SDimitry Andric   volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
7160b57cec5SDimitry Andric 
7170b57cec5SDimitry Andric   /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
7180b57cec5SDimitry Andric       the parent of a node at level i has. For example, if we have a machine
7190b57cec5SDimitry Andric       with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
7200b57cec5SDimitry Andric       {2, 4, 4, 1, 1}. All empty levels are set to 1. */
7210b57cec5SDimitry Andric   kmp_uint32 *numPerLevel;
7220b57cec5SDimitry Andric   kmp_uint32 *skipPerLevel;
7230b57cec5SDimitry Andric 
7240b57cec5SDimitry Andric   void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
7250b57cec5SDimitry Andric     int hier_depth = adr2os[0].first.depth;
7260b57cec5SDimitry Andric     int level = 0;
7270b57cec5SDimitry Andric     for (int i = hier_depth - 1; i >= 0; --i) {
7280b57cec5SDimitry Andric       int max = -1;
7290b57cec5SDimitry Andric       for (int j = 0; j < num_addrs; ++j) {
7300b57cec5SDimitry Andric         int next = adr2os[j].first.childNums[i];
7310b57cec5SDimitry Andric         if (next > max)
7320b57cec5SDimitry Andric           max = next;
7330b57cec5SDimitry Andric       }
7340b57cec5SDimitry Andric       numPerLevel[level] = max + 1;
7350b57cec5SDimitry Andric       ++level;
7360b57cec5SDimitry Andric     }
7370b57cec5SDimitry Andric   }
7380b57cec5SDimitry Andric 
7390b57cec5SDimitry Andric   hierarchy_info()
7400b57cec5SDimitry Andric       : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
7410b57cec5SDimitry Andric 
7420b57cec5SDimitry Andric   void fini() {
7430b57cec5SDimitry Andric     if (!uninitialized && numPerLevel) {
7440b57cec5SDimitry Andric       __kmp_free(numPerLevel);
7450b57cec5SDimitry Andric       numPerLevel = NULL;
7460b57cec5SDimitry Andric       uninitialized = not_initialized;
7470b57cec5SDimitry Andric     }
7480b57cec5SDimitry Andric   }
7490b57cec5SDimitry Andric 
7500b57cec5SDimitry Andric   void init(AddrUnsPair *adr2os, int num_addrs) {
7510b57cec5SDimitry Andric     kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
7520b57cec5SDimitry Andric         &uninitialized, not_initialized, initializing);
7530b57cec5SDimitry Andric     if (bool_result == 0) { // Wait for initialization
7540b57cec5SDimitry Andric       while (TCR_1(uninitialized) != initialized)
7550b57cec5SDimitry Andric         KMP_CPU_PAUSE();
7560b57cec5SDimitry Andric       return;
7570b57cec5SDimitry Andric     }
7580b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(bool_result == 1);
7590b57cec5SDimitry Andric 
7600b57cec5SDimitry Andric     /* Added explicit initialization of the data fields here to prevent usage of
7610b57cec5SDimitry Andric        dirty value observed when static library is re-initialized multiple times
7620b57cec5SDimitry Andric        (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
7630b57cec5SDimitry Andric        OpenMP). */
7640b57cec5SDimitry Andric     depth = 1;
7650b57cec5SDimitry Andric     resizing = 0;
7660b57cec5SDimitry Andric     maxLevels = 7;
7670b57cec5SDimitry Andric     numPerLevel =
7680b57cec5SDimitry Andric         (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
7690b57cec5SDimitry Andric     skipPerLevel = &(numPerLevel[maxLevels]);
7700b57cec5SDimitry Andric     for (kmp_uint32 i = 0; i < maxLevels;
7710b57cec5SDimitry Andric          ++i) { // init numPerLevel[*] to 1 item per level
7720b57cec5SDimitry Andric       numPerLevel[i] = 1;
7730b57cec5SDimitry Andric       skipPerLevel[i] = 1;
7740b57cec5SDimitry Andric     }
7750b57cec5SDimitry Andric 
7760b57cec5SDimitry Andric     // Sort table by physical ID
7770b57cec5SDimitry Andric     if (adr2os) {
7780b57cec5SDimitry Andric       qsort(adr2os, num_addrs, sizeof(*adr2os),
7790b57cec5SDimitry Andric             __kmp_affinity_cmp_Address_labels);
7800b57cec5SDimitry Andric       deriveLevels(adr2os, num_addrs);
7810b57cec5SDimitry Andric     } else {
7820b57cec5SDimitry Andric       numPerLevel[0] = maxLeaves;
7830b57cec5SDimitry Andric       numPerLevel[1] = num_addrs / maxLeaves;
7840b57cec5SDimitry Andric       if (num_addrs % maxLeaves)
7850b57cec5SDimitry Andric         numPerLevel[1]++;
7860b57cec5SDimitry Andric     }
7870b57cec5SDimitry Andric 
7880b57cec5SDimitry Andric     base_num_threads = num_addrs;
7890b57cec5SDimitry Andric     for (int i = maxLevels - 1; i >= 0;
7900b57cec5SDimitry Andric          --i) // count non-empty levels to get depth
7910b57cec5SDimitry Andric       if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
7920b57cec5SDimitry Andric         depth++;
7930b57cec5SDimitry Andric 
7940b57cec5SDimitry Andric     kmp_uint32 branch = minBranch;
7950b57cec5SDimitry Andric     if (numPerLevel[0] == 1)
7960b57cec5SDimitry Andric       branch = num_addrs / maxLeaves;
7970b57cec5SDimitry Andric     if (branch < minBranch)
7980b57cec5SDimitry Andric       branch = minBranch;
7990b57cec5SDimitry Andric     for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
8000b57cec5SDimitry Andric       while (numPerLevel[d] > branch ||
8010b57cec5SDimitry Andric              (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
8020b57cec5SDimitry Andric         if (numPerLevel[d] & 1)
8030b57cec5SDimitry Andric           numPerLevel[d]++;
8040b57cec5SDimitry Andric         numPerLevel[d] = numPerLevel[d] >> 1;
8050b57cec5SDimitry Andric         if (numPerLevel[d + 1] == 1)
8060b57cec5SDimitry Andric           depth++;
8070b57cec5SDimitry Andric         numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
8080b57cec5SDimitry Andric       }
8090b57cec5SDimitry Andric       if (numPerLevel[0] == 1) {
8100b57cec5SDimitry Andric         branch = branch >> 1;
8110b57cec5SDimitry Andric         if (branch < 4)
8120b57cec5SDimitry Andric           branch = minBranch;
8130b57cec5SDimitry Andric       }
8140b57cec5SDimitry Andric     }
8150b57cec5SDimitry Andric 
8160b57cec5SDimitry Andric     for (kmp_uint32 i = 1; i < depth; ++i)
8170b57cec5SDimitry Andric       skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
8180b57cec5SDimitry Andric     // Fill in hierarchy in the case of oversubscription
8190b57cec5SDimitry Andric     for (kmp_uint32 i = depth; i < maxLevels; ++i)
8200b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
8210b57cec5SDimitry Andric 
8220b57cec5SDimitry Andric     uninitialized = initialized; // One writer
8230b57cec5SDimitry Andric   }
8240b57cec5SDimitry Andric 
8250b57cec5SDimitry Andric   // Resize the hierarchy if nproc changes to something larger than before
8260b57cec5SDimitry Andric   void resize(kmp_uint32 nproc) {
8270b57cec5SDimitry Andric     kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
8280b57cec5SDimitry Andric     while (bool_result == 0) { // someone else is trying to resize
8290b57cec5SDimitry Andric       KMP_CPU_PAUSE();
8300b57cec5SDimitry Andric       if (nproc <= base_num_threads) // happy with other thread's resize
8310b57cec5SDimitry Andric         return;
8320b57cec5SDimitry Andric       else // try to resize
8330b57cec5SDimitry Andric         bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
8340b57cec5SDimitry Andric     }
8350b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(bool_result != 0);
8360b57cec5SDimitry Andric     if (nproc <= base_num_threads)
8370b57cec5SDimitry Andric       return; // happy with other thread's resize
8380b57cec5SDimitry Andric 
8390b57cec5SDimitry Andric     // Calculate new maxLevels
8400b57cec5SDimitry Andric     kmp_uint32 old_sz = skipPerLevel[depth - 1];
8410b57cec5SDimitry Andric     kmp_uint32 incs = 0, old_maxLevels = maxLevels;
8420b57cec5SDimitry Andric     // First see if old maxLevels is enough to contain new size
8430b57cec5SDimitry Andric     for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
8440b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
8450b57cec5SDimitry Andric       numPerLevel[i - 1] *= 2;
8460b57cec5SDimitry Andric       old_sz *= 2;
8470b57cec5SDimitry Andric       depth++;
8480b57cec5SDimitry Andric     }
8490b57cec5SDimitry Andric     if (nproc > old_sz) { // Not enough space, need to expand hierarchy
8500b57cec5SDimitry Andric       while (nproc > old_sz) {
8510b57cec5SDimitry Andric         old_sz *= 2;
8520b57cec5SDimitry Andric         incs++;
8530b57cec5SDimitry Andric         depth++;
8540b57cec5SDimitry Andric       }
8550b57cec5SDimitry Andric       maxLevels += incs;
8560b57cec5SDimitry Andric 
8570b57cec5SDimitry Andric       // Resize arrays
8580b57cec5SDimitry Andric       kmp_uint32 *old_numPerLevel = numPerLevel;
8590b57cec5SDimitry Andric       kmp_uint32 *old_skipPerLevel = skipPerLevel;
8600b57cec5SDimitry Andric       numPerLevel = skipPerLevel = NULL;
8610b57cec5SDimitry Andric       numPerLevel =
8620b57cec5SDimitry Andric           (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
8630b57cec5SDimitry Andric       skipPerLevel = &(numPerLevel[maxLevels]);
8640b57cec5SDimitry Andric 
8650b57cec5SDimitry Andric       // Copy old elements from old arrays
866*e8d8bef9SDimitry Andric       for (kmp_uint32 i = 0; i < old_maxLevels; ++i) {
867*e8d8bef9SDimitry Andric         // init numPerLevel[*] to 1 item per level
8680b57cec5SDimitry Andric         numPerLevel[i] = old_numPerLevel[i];
8690b57cec5SDimitry Andric         skipPerLevel[i] = old_skipPerLevel[i];
8700b57cec5SDimitry Andric       }
8710b57cec5SDimitry Andric 
8720b57cec5SDimitry Andric       // Init new elements in arrays to 1
873*e8d8bef9SDimitry Andric       for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) {
874*e8d8bef9SDimitry Andric         // init numPerLevel[*] to 1 item per level
8750b57cec5SDimitry Andric         numPerLevel[i] = 1;
8760b57cec5SDimitry Andric         skipPerLevel[i] = 1;
8770b57cec5SDimitry Andric       }
8780b57cec5SDimitry Andric 
8790b57cec5SDimitry Andric       // Free old arrays
8800b57cec5SDimitry Andric       __kmp_free(old_numPerLevel);
8810b57cec5SDimitry Andric     }
8820b57cec5SDimitry Andric 
8830b57cec5SDimitry Andric     // Fill in oversubscription levels of hierarchy
8840b57cec5SDimitry Andric     for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
8850b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
8860b57cec5SDimitry Andric 
8870b57cec5SDimitry Andric     base_num_threads = nproc;
8880b57cec5SDimitry Andric     resizing = 0; // One writer
8890b57cec5SDimitry Andric   }
8900b57cec5SDimitry Andric };
8910b57cec5SDimitry Andric #endif // KMP_AFFINITY_H
892