xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_affinity.h (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric /*
2*0b57cec5SDimitry Andric  * kmp_affinity.h -- header for affinity management
3*0b57cec5SDimitry Andric  */
4*0b57cec5SDimitry Andric 
5*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
9*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10*0b57cec5SDimitry Andric //
11*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
12*0b57cec5SDimitry Andric 
13*0b57cec5SDimitry Andric #ifndef KMP_AFFINITY_H
14*0b57cec5SDimitry Andric #define KMP_AFFINITY_H
15*0b57cec5SDimitry Andric 
16*0b57cec5SDimitry Andric #include "kmp.h"
17*0b57cec5SDimitry Andric #include "kmp_os.h"
18*0b57cec5SDimitry Andric 
19*0b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
20*0b57cec5SDimitry Andric #if KMP_USE_HWLOC
21*0b57cec5SDimitry Andric class KMPHwlocAffinity : public KMPAffinity {
22*0b57cec5SDimitry Andric public:
23*0b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
24*0b57cec5SDimitry Andric     hwloc_cpuset_t mask;
25*0b57cec5SDimitry Andric 
26*0b57cec5SDimitry Andric   public:
27*0b57cec5SDimitry Andric     Mask() {
28*0b57cec5SDimitry Andric       mask = hwloc_bitmap_alloc();
29*0b57cec5SDimitry Andric       this->zero();
30*0b57cec5SDimitry Andric     }
31*0b57cec5SDimitry Andric     ~Mask() { hwloc_bitmap_free(mask); }
32*0b57cec5SDimitry Andric     void set(int i) override { hwloc_bitmap_set(mask, i); }
33*0b57cec5SDimitry Andric     bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
34*0b57cec5SDimitry Andric     void clear(int i) override { hwloc_bitmap_clr(mask, i); }
35*0b57cec5SDimitry Andric     void zero() override { hwloc_bitmap_zero(mask); }
36*0b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
37*0b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
38*0b57cec5SDimitry Andric       hwloc_bitmap_copy(mask, convert->mask);
39*0b57cec5SDimitry Andric     }
40*0b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
41*0b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
42*0b57cec5SDimitry Andric       hwloc_bitmap_and(mask, mask, convert->mask);
43*0b57cec5SDimitry Andric     }
44*0b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
45*0b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
46*0b57cec5SDimitry Andric       hwloc_bitmap_or(mask, mask, convert->mask);
47*0b57cec5SDimitry Andric     }
48*0b57cec5SDimitry Andric     void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
49*0b57cec5SDimitry Andric     int begin() const override { return hwloc_bitmap_first(mask); }
50*0b57cec5SDimitry Andric     int end() const override { return -1; }
51*0b57cec5SDimitry Andric     int next(int previous) const override {
52*0b57cec5SDimitry Andric       return hwloc_bitmap_next(mask, previous);
53*0b57cec5SDimitry Andric     }
54*0b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
55*0b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
56*0b57cec5SDimitry Andric                   "Illegal get affinity operation when not capable");
57*0b57cec5SDimitry Andric       int retval =
58*0b57cec5SDimitry Andric           hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
59*0b57cec5SDimitry Andric       if (retval >= 0) {
60*0b57cec5SDimitry Andric         return 0;
61*0b57cec5SDimitry Andric       }
62*0b57cec5SDimitry Andric       int error = errno;
63*0b57cec5SDimitry Andric       if (abort_on_error) {
64*0b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
65*0b57cec5SDimitry Andric       }
66*0b57cec5SDimitry Andric       return error;
67*0b57cec5SDimitry Andric     }
68*0b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
69*0b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
70*0b57cec5SDimitry Andric                   "Illegal get affinity operation when not capable");
71*0b57cec5SDimitry Andric       int retval =
72*0b57cec5SDimitry Andric           hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
73*0b57cec5SDimitry Andric       if (retval >= 0) {
74*0b57cec5SDimitry Andric         return 0;
75*0b57cec5SDimitry Andric       }
76*0b57cec5SDimitry Andric       int error = errno;
77*0b57cec5SDimitry Andric       if (abort_on_error) {
78*0b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
79*0b57cec5SDimitry Andric       }
80*0b57cec5SDimitry Andric       return error;
81*0b57cec5SDimitry Andric     }
82*0b57cec5SDimitry Andric     int get_proc_group() const override {
83*0b57cec5SDimitry Andric       int group = -1;
84*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS
85*0b57cec5SDimitry Andric       if (__kmp_num_proc_groups == 1) {
86*0b57cec5SDimitry Andric         return 1;
87*0b57cec5SDimitry Andric       }
88*0b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; i++) {
89*0b57cec5SDimitry Andric         // On windows, the long type is always 32 bits
90*0b57cec5SDimitry Andric         unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
91*0b57cec5SDimitry Andric         unsigned long second_32_bits =
92*0b57cec5SDimitry Andric             hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
93*0b57cec5SDimitry Andric         if (first_32_bits == 0 && second_32_bits == 0) {
94*0b57cec5SDimitry Andric           continue;
95*0b57cec5SDimitry Andric         }
96*0b57cec5SDimitry Andric         if (group >= 0) {
97*0b57cec5SDimitry Andric           return -1;
98*0b57cec5SDimitry Andric         }
99*0b57cec5SDimitry Andric         group = i;
100*0b57cec5SDimitry Andric       }
101*0b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
102*0b57cec5SDimitry Andric       return group;
103*0b57cec5SDimitry Andric     }
104*0b57cec5SDimitry Andric   };
105*0b57cec5SDimitry Andric   void determine_capable(const char *var) override {
106*0b57cec5SDimitry Andric     const hwloc_topology_support *topology_support;
107*0b57cec5SDimitry Andric     if (__kmp_hwloc_topology == NULL) {
108*0b57cec5SDimitry Andric       if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
109*0b57cec5SDimitry Andric         __kmp_hwloc_error = TRUE;
110*0b57cec5SDimitry Andric         if (__kmp_affinity_verbose)
111*0b57cec5SDimitry Andric           KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
112*0b57cec5SDimitry Andric       }
113*0b57cec5SDimitry Andric       if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
114*0b57cec5SDimitry Andric         __kmp_hwloc_error = TRUE;
115*0b57cec5SDimitry Andric         if (__kmp_affinity_verbose)
116*0b57cec5SDimitry Andric           KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
117*0b57cec5SDimitry Andric       }
118*0b57cec5SDimitry Andric     }
119*0b57cec5SDimitry Andric     topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
120*0b57cec5SDimitry Andric     // Is the system capable of setting/getting this thread's affinity?
121*0b57cec5SDimitry Andric     // Also, is topology discovery possible? (pu indicates ability to discover
122*0b57cec5SDimitry Andric     // processing units). And finally, were there no errors when calling any
123*0b57cec5SDimitry Andric     // hwloc_* API functions?
124*0b57cec5SDimitry Andric     if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
125*0b57cec5SDimitry Andric         topology_support->cpubind->get_thisthread_cpubind &&
126*0b57cec5SDimitry Andric         topology_support->discovery->pu && !__kmp_hwloc_error) {
127*0b57cec5SDimitry Andric       // enables affinity according to KMP_AFFINITY_CAPABLE() macro
128*0b57cec5SDimitry Andric       KMP_AFFINITY_ENABLE(TRUE);
129*0b57cec5SDimitry Andric     } else {
130*0b57cec5SDimitry Andric       // indicate that hwloc didn't work and disable affinity
131*0b57cec5SDimitry Andric       __kmp_hwloc_error = TRUE;
132*0b57cec5SDimitry Andric       KMP_AFFINITY_DISABLE();
133*0b57cec5SDimitry Andric     }
134*0b57cec5SDimitry Andric   }
135*0b57cec5SDimitry Andric   void bind_thread(int which) override {
136*0b57cec5SDimitry Andric     KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
137*0b57cec5SDimitry Andric                 "Illegal set affinity operation when not capable");
138*0b57cec5SDimitry Andric     KMPAffinity::Mask *mask;
139*0b57cec5SDimitry Andric     KMP_CPU_ALLOC_ON_STACK(mask);
140*0b57cec5SDimitry Andric     KMP_CPU_ZERO(mask);
141*0b57cec5SDimitry Andric     KMP_CPU_SET(which, mask);
142*0b57cec5SDimitry Andric     __kmp_set_system_affinity(mask, TRUE);
143*0b57cec5SDimitry Andric     KMP_CPU_FREE_FROM_STACK(mask);
144*0b57cec5SDimitry Andric   }
145*0b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
146*0b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
147*0b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
148*0b57cec5SDimitry Andric     return new Mask[num];
149*0b57cec5SDimitry Andric   }
150*0b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
151*0b57cec5SDimitry Andric     Mask *hwloc_array = static_cast<Mask *>(array);
152*0b57cec5SDimitry Andric     delete[] hwloc_array;
153*0b57cec5SDimitry Andric   }
154*0b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
155*0b57cec5SDimitry Andric                                       int index) override {
156*0b57cec5SDimitry Andric     Mask *hwloc_array = static_cast<Mask *>(array);
157*0b57cec5SDimitry Andric     return &(hwloc_array[index]);
158*0b57cec5SDimitry Andric   }
159*0b57cec5SDimitry Andric   api_type get_api_type() const override { return HWLOC; }
160*0b57cec5SDimitry Andric };
161*0b57cec5SDimitry Andric #endif /* KMP_USE_HWLOC */
162*0b57cec5SDimitry Andric 
163*0b57cec5SDimitry Andric #if KMP_OS_LINUX
164*0b57cec5SDimitry Andric /* On some of the older OS's that we build on, these constants aren't present
165*0b57cec5SDimitry Andric    in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
166*0b57cec5SDimitry Andric    all systems of the same arch where they are defined, and they cannot change.
167*0b57cec5SDimitry Andric    stone forever. */
168*0b57cec5SDimitry Andric #include <sys/syscall.h>
169*0b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_ARM
170*0b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
171*0b57cec5SDimitry Andric #define __NR_sched_setaffinity 241
172*0b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 241
173*0b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
174*0b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
175*0b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
176*0b57cec5SDimitry Andric #define __NR_sched_getaffinity 242
177*0b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 242
178*0b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
179*0b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
180*0b57cec5SDimitry Andric #elif KMP_ARCH_AARCH64
181*0b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
182*0b57cec5SDimitry Andric #define __NR_sched_setaffinity 122
183*0b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 122
184*0b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
185*0b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
186*0b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
187*0b57cec5SDimitry Andric #define __NR_sched_getaffinity 123
188*0b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 123
189*0b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
190*0b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
191*0b57cec5SDimitry Andric #elif KMP_ARCH_X86_64
192*0b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
193*0b57cec5SDimitry Andric #define __NR_sched_setaffinity 203
194*0b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 203
195*0b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
196*0b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
197*0b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
198*0b57cec5SDimitry Andric #define __NR_sched_getaffinity 204
199*0b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 204
200*0b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
201*0b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
202*0b57cec5SDimitry Andric #elif KMP_ARCH_PPC64
203*0b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
204*0b57cec5SDimitry Andric #define __NR_sched_setaffinity 222
205*0b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 222
206*0b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
207*0b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
208*0b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
209*0b57cec5SDimitry Andric #define __NR_sched_getaffinity 223
210*0b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 223
211*0b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
212*0b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
213*0b57cec5SDimitry Andric #elif KMP_ARCH_MIPS
214*0b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
215*0b57cec5SDimitry Andric #define __NR_sched_setaffinity 4239
216*0b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 4239
217*0b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
218*0b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
219*0b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
220*0b57cec5SDimitry Andric #define __NR_sched_getaffinity 4240
221*0b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 4240
222*0b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
223*0b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
224*0b57cec5SDimitry Andric #elif KMP_ARCH_MIPS64
225*0b57cec5SDimitry Andric #ifndef __NR_sched_setaffinity
226*0b57cec5SDimitry Andric #define __NR_sched_setaffinity 5195
227*0b57cec5SDimitry Andric #elif __NR_sched_setaffinity != 5195
228*0b57cec5SDimitry Andric #error Wrong code for setaffinity system call.
229*0b57cec5SDimitry Andric #endif /* __NR_sched_setaffinity */
230*0b57cec5SDimitry Andric #ifndef __NR_sched_getaffinity
231*0b57cec5SDimitry Andric #define __NR_sched_getaffinity 5196
232*0b57cec5SDimitry Andric #elif __NR_sched_getaffinity != 5196
233*0b57cec5SDimitry Andric #error Wrong code for getaffinity system call.
234*0b57cec5SDimitry Andric #endif /* __NR_sched_getaffinity */
235*0b57cec5SDimitry Andric #error Unknown or unsupported architecture
236*0b57cec5SDimitry Andric #endif /* KMP_ARCH_* */
237*0b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity {
238*0b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
239*0b57cec5SDimitry Andric     typedef unsigned char mask_t;
240*0b57cec5SDimitry Andric     static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
241*0b57cec5SDimitry Andric 
242*0b57cec5SDimitry Andric   public:
243*0b57cec5SDimitry Andric     mask_t *mask;
244*0b57cec5SDimitry Andric     Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
245*0b57cec5SDimitry Andric     ~Mask() {
246*0b57cec5SDimitry Andric       if (mask)
247*0b57cec5SDimitry Andric         __kmp_free(mask);
248*0b57cec5SDimitry Andric     }
249*0b57cec5SDimitry Andric     void set(int i) override {
250*0b57cec5SDimitry Andric       mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
251*0b57cec5SDimitry Andric     }
252*0b57cec5SDimitry Andric     bool is_set(int i) const override {
253*0b57cec5SDimitry Andric       return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
254*0b57cec5SDimitry Andric     }
255*0b57cec5SDimitry Andric     void clear(int i) override {
256*0b57cec5SDimitry Andric       mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
257*0b57cec5SDimitry Andric     }
258*0b57cec5SDimitry Andric     void zero() override {
259*0b57cec5SDimitry Andric       for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
260*0b57cec5SDimitry Andric         mask[i] = 0;
261*0b57cec5SDimitry Andric     }
262*0b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
263*0b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
264*0b57cec5SDimitry Andric       for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
265*0b57cec5SDimitry Andric         mask[i] = convert->mask[i];
266*0b57cec5SDimitry Andric     }
267*0b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
268*0b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
269*0b57cec5SDimitry Andric       for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
270*0b57cec5SDimitry Andric         mask[i] &= convert->mask[i];
271*0b57cec5SDimitry Andric     }
272*0b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
273*0b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
274*0b57cec5SDimitry Andric       for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
275*0b57cec5SDimitry Andric         mask[i] |= convert->mask[i];
276*0b57cec5SDimitry Andric     }
277*0b57cec5SDimitry Andric     void bitwise_not() override {
278*0b57cec5SDimitry Andric       for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
279*0b57cec5SDimitry Andric         mask[i] = ~(mask[i]);
280*0b57cec5SDimitry Andric     }
281*0b57cec5SDimitry Andric     int begin() const override {
282*0b57cec5SDimitry Andric       int retval = 0;
283*0b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
284*0b57cec5SDimitry Andric         ++retval;
285*0b57cec5SDimitry Andric       return retval;
286*0b57cec5SDimitry Andric     }
287*0b57cec5SDimitry Andric     int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
288*0b57cec5SDimitry Andric     int next(int previous) const override {
289*0b57cec5SDimitry Andric       int retval = previous + 1;
290*0b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
291*0b57cec5SDimitry Andric         ++retval;
292*0b57cec5SDimitry Andric       return retval;
293*0b57cec5SDimitry Andric     }
294*0b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
295*0b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
296*0b57cec5SDimitry Andric                   "Illegal get affinity operation when not capable");
297*0b57cec5SDimitry Andric       int retval =
298*0b57cec5SDimitry Andric           syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
299*0b57cec5SDimitry Andric       if (retval >= 0) {
300*0b57cec5SDimitry Andric         return 0;
301*0b57cec5SDimitry Andric       }
302*0b57cec5SDimitry Andric       int error = errno;
303*0b57cec5SDimitry Andric       if (abort_on_error) {
304*0b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
305*0b57cec5SDimitry Andric       }
306*0b57cec5SDimitry Andric       return error;
307*0b57cec5SDimitry Andric     }
308*0b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
309*0b57cec5SDimitry Andric       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
310*0b57cec5SDimitry Andric                   "Illegal get affinity operation when not capable");
311*0b57cec5SDimitry Andric       int retval =
312*0b57cec5SDimitry Andric           syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
313*0b57cec5SDimitry Andric       if (retval >= 0) {
314*0b57cec5SDimitry Andric         return 0;
315*0b57cec5SDimitry Andric       }
316*0b57cec5SDimitry Andric       int error = errno;
317*0b57cec5SDimitry Andric       if (abort_on_error) {
318*0b57cec5SDimitry Andric         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
319*0b57cec5SDimitry Andric       }
320*0b57cec5SDimitry Andric       return error;
321*0b57cec5SDimitry Andric     }
322*0b57cec5SDimitry Andric   };
323*0b57cec5SDimitry Andric   void determine_capable(const char *env_var) override {
324*0b57cec5SDimitry Andric     __kmp_affinity_determine_capable(env_var);
325*0b57cec5SDimitry Andric   }
326*0b57cec5SDimitry Andric   void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
327*0b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override {
328*0b57cec5SDimitry Andric     KMPNativeAffinity::Mask *retval = new Mask();
329*0b57cec5SDimitry Andric     return retval;
330*0b57cec5SDimitry Andric   }
331*0b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override {
332*0b57cec5SDimitry Andric     KMPNativeAffinity::Mask *native_mask =
333*0b57cec5SDimitry Andric         static_cast<KMPNativeAffinity::Mask *>(m);
334*0b57cec5SDimitry Andric     delete native_mask;
335*0b57cec5SDimitry Andric   }
336*0b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
337*0b57cec5SDimitry Andric     return new Mask[num];
338*0b57cec5SDimitry Andric   }
339*0b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
340*0b57cec5SDimitry Andric     Mask *linux_array = static_cast<Mask *>(array);
341*0b57cec5SDimitry Andric     delete[] linux_array;
342*0b57cec5SDimitry Andric   }
343*0b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
344*0b57cec5SDimitry Andric                                       int index) override {
345*0b57cec5SDimitry Andric     Mask *linux_array = static_cast<Mask *>(array);
346*0b57cec5SDimitry Andric     return &(linux_array[index]);
347*0b57cec5SDimitry Andric   }
348*0b57cec5SDimitry Andric   api_type get_api_type() const override { return NATIVE_OS; }
349*0b57cec5SDimitry Andric };
350*0b57cec5SDimitry Andric #endif /* KMP_OS_LINUX */
351*0b57cec5SDimitry Andric 
352*0b57cec5SDimitry Andric #if KMP_OS_WINDOWS
353*0b57cec5SDimitry Andric class KMPNativeAffinity : public KMPAffinity {
354*0b57cec5SDimitry Andric   class Mask : public KMPAffinity::Mask {
355*0b57cec5SDimitry Andric     typedef ULONG_PTR mask_t;
356*0b57cec5SDimitry Andric     static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
357*0b57cec5SDimitry Andric     mask_t *mask;
358*0b57cec5SDimitry Andric 
359*0b57cec5SDimitry Andric   public:
360*0b57cec5SDimitry Andric     Mask() {
361*0b57cec5SDimitry Andric       mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
362*0b57cec5SDimitry Andric     }
363*0b57cec5SDimitry Andric     ~Mask() {
364*0b57cec5SDimitry Andric       if (mask)
365*0b57cec5SDimitry Andric         __kmp_free(mask);
366*0b57cec5SDimitry Andric     }
367*0b57cec5SDimitry Andric     void set(int i) override {
368*0b57cec5SDimitry Andric       mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
369*0b57cec5SDimitry Andric     }
370*0b57cec5SDimitry Andric     bool is_set(int i) const override {
371*0b57cec5SDimitry Andric       return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
372*0b57cec5SDimitry Andric     }
373*0b57cec5SDimitry Andric     void clear(int i) override {
374*0b57cec5SDimitry Andric       mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
375*0b57cec5SDimitry Andric     }
376*0b57cec5SDimitry Andric     void zero() override {
377*0b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
378*0b57cec5SDimitry Andric         mask[i] = 0;
379*0b57cec5SDimitry Andric     }
380*0b57cec5SDimitry Andric     void copy(const KMPAffinity::Mask *src) override {
381*0b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(src);
382*0b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
383*0b57cec5SDimitry Andric         mask[i] = convert->mask[i];
384*0b57cec5SDimitry Andric     }
385*0b57cec5SDimitry Andric     void bitwise_and(const KMPAffinity::Mask *rhs) override {
386*0b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
387*0b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
388*0b57cec5SDimitry Andric         mask[i] &= convert->mask[i];
389*0b57cec5SDimitry Andric     }
390*0b57cec5SDimitry Andric     void bitwise_or(const KMPAffinity::Mask *rhs) override {
391*0b57cec5SDimitry Andric       const Mask *convert = static_cast<const Mask *>(rhs);
392*0b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
393*0b57cec5SDimitry Andric         mask[i] |= convert->mask[i];
394*0b57cec5SDimitry Andric     }
395*0b57cec5SDimitry Andric     void bitwise_not() override {
396*0b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; ++i)
397*0b57cec5SDimitry Andric         mask[i] = ~(mask[i]);
398*0b57cec5SDimitry Andric     }
399*0b57cec5SDimitry Andric     int begin() const override {
400*0b57cec5SDimitry Andric       int retval = 0;
401*0b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
402*0b57cec5SDimitry Andric         ++retval;
403*0b57cec5SDimitry Andric       return retval;
404*0b57cec5SDimitry Andric     }
405*0b57cec5SDimitry Andric     int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
406*0b57cec5SDimitry Andric     int next(int previous) const override {
407*0b57cec5SDimitry Andric       int retval = previous + 1;
408*0b57cec5SDimitry Andric       while (retval < end() && !is_set(retval))
409*0b57cec5SDimitry Andric         ++retval;
410*0b57cec5SDimitry Andric       return retval;
411*0b57cec5SDimitry Andric     }
412*0b57cec5SDimitry Andric     int set_system_affinity(bool abort_on_error) const override {
413*0b57cec5SDimitry Andric       if (__kmp_num_proc_groups > 1) {
414*0b57cec5SDimitry Andric         // Check for a valid mask.
415*0b57cec5SDimitry Andric         GROUP_AFFINITY ga;
416*0b57cec5SDimitry Andric         int group = get_proc_group();
417*0b57cec5SDimitry Andric         if (group < 0) {
418*0b57cec5SDimitry Andric           if (abort_on_error) {
419*0b57cec5SDimitry Andric             KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
420*0b57cec5SDimitry Andric           }
421*0b57cec5SDimitry Andric           return -1;
422*0b57cec5SDimitry Andric         }
423*0b57cec5SDimitry Andric         // Transform the bit vector into a GROUP_AFFINITY struct
424*0b57cec5SDimitry Andric         // and make the system call to set affinity.
425*0b57cec5SDimitry Andric         ga.Group = group;
426*0b57cec5SDimitry Andric         ga.Mask = mask[group];
427*0b57cec5SDimitry Andric         ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
428*0b57cec5SDimitry Andric 
429*0b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
430*0b57cec5SDimitry Andric         if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
431*0b57cec5SDimitry Andric           DWORD error = GetLastError();
432*0b57cec5SDimitry Andric           if (abort_on_error) {
433*0b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
434*0b57cec5SDimitry Andric                         __kmp_msg_null);
435*0b57cec5SDimitry Andric           }
436*0b57cec5SDimitry Andric           return error;
437*0b57cec5SDimitry Andric         }
438*0b57cec5SDimitry Andric       } else {
439*0b57cec5SDimitry Andric         if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
440*0b57cec5SDimitry Andric           DWORD error = GetLastError();
441*0b57cec5SDimitry Andric           if (abort_on_error) {
442*0b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
443*0b57cec5SDimitry Andric                         __kmp_msg_null);
444*0b57cec5SDimitry Andric           }
445*0b57cec5SDimitry Andric           return error;
446*0b57cec5SDimitry Andric         }
447*0b57cec5SDimitry Andric       }
448*0b57cec5SDimitry Andric       return 0;
449*0b57cec5SDimitry Andric     }
450*0b57cec5SDimitry Andric     int get_system_affinity(bool abort_on_error) override {
451*0b57cec5SDimitry Andric       if (__kmp_num_proc_groups > 1) {
452*0b57cec5SDimitry Andric         this->zero();
453*0b57cec5SDimitry Andric         GROUP_AFFINITY ga;
454*0b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
455*0b57cec5SDimitry Andric         if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
456*0b57cec5SDimitry Andric           DWORD error = GetLastError();
457*0b57cec5SDimitry Andric           if (abort_on_error) {
458*0b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
459*0b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
460*0b57cec5SDimitry Andric           }
461*0b57cec5SDimitry Andric           return error;
462*0b57cec5SDimitry Andric         }
463*0b57cec5SDimitry Andric         if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
464*0b57cec5SDimitry Andric             (ga.Mask == 0)) {
465*0b57cec5SDimitry Andric           return -1;
466*0b57cec5SDimitry Andric         }
467*0b57cec5SDimitry Andric         mask[ga.Group] = ga.Mask;
468*0b57cec5SDimitry Andric       } else {
469*0b57cec5SDimitry Andric         mask_t newMask, sysMask, retval;
470*0b57cec5SDimitry Andric         if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
471*0b57cec5SDimitry Andric           DWORD error = GetLastError();
472*0b57cec5SDimitry Andric           if (abort_on_error) {
473*0b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
474*0b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
475*0b57cec5SDimitry Andric           }
476*0b57cec5SDimitry Andric           return error;
477*0b57cec5SDimitry Andric         }
478*0b57cec5SDimitry Andric         retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
479*0b57cec5SDimitry Andric         if (!retval) {
480*0b57cec5SDimitry Andric           DWORD error = GetLastError();
481*0b57cec5SDimitry Andric           if (abort_on_error) {
482*0b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
483*0b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
484*0b57cec5SDimitry Andric           }
485*0b57cec5SDimitry Andric           return error;
486*0b57cec5SDimitry Andric         }
487*0b57cec5SDimitry Andric         newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
488*0b57cec5SDimitry Andric         if (!newMask) {
489*0b57cec5SDimitry Andric           DWORD error = GetLastError();
490*0b57cec5SDimitry Andric           if (abort_on_error) {
491*0b57cec5SDimitry Andric             __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
492*0b57cec5SDimitry Andric                         KMP_ERR(error), __kmp_msg_null);
493*0b57cec5SDimitry Andric           }
494*0b57cec5SDimitry Andric         }
495*0b57cec5SDimitry Andric         *mask = retval;
496*0b57cec5SDimitry Andric       }
497*0b57cec5SDimitry Andric       return 0;
498*0b57cec5SDimitry Andric     }
499*0b57cec5SDimitry Andric     int get_proc_group() const override {
500*0b57cec5SDimitry Andric       int group = -1;
501*0b57cec5SDimitry Andric       if (__kmp_num_proc_groups == 1) {
502*0b57cec5SDimitry Andric         return 1;
503*0b57cec5SDimitry Andric       }
504*0b57cec5SDimitry Andric       for (int i = 0; i < __kmp_num_proc_groups; i++) {
505*0b57cec5SDimitry Andric         if (mask[i] == 0)
506*0b57cec5SDimitry Andric           continue;
507*0b57cec5SDimitry Andric         if (group >= 0)
508*0b57cec5SDimitry Andric           return -1;
509*0b57cec5SDimitry Andric         group = i;
510*0b57cec5SDimitry Andric       }
511*0b57cec5SDimitry Andric       return group;
512*0b57cec5SDimitry Andric     }
513*0b57cec5SDimitry Andric   };
514*0b57cec5SDimitry Andric   void determine_capable(const char *env_var) override {
515*0b57cec5SDimitry Andric     __kmp_affinity_determine_capable(env_var);
516*0b57cec5SDimitry Andric   }
517*0b57cec5SDimitry Andric   void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
518*0b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
519*0b57cec5SDimitry Andric   void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
520*0b57cec5SDimitry Andric   KMPAffinity::Mask *allocate_mask_array(int num) override {
521*0b57cec5SDimitry Andric     return new Mask[num];
522*0b57cec5SDimitry Andric   }
523*0b57cec5SDimitry Andric   void deallocate_mask_array(KMPAffinity::Mask *array) override {
524*0b57cec5SDimitry Andric     Mask *windows_array = static_cast<Mask *>(array);
525*0b57cec5SDimitry Andric     delete[] windows_array;
526*0b57cec5SDimitry Andric   }
527*0b57cec5SDimitry Andric   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
528*0b57cec5SDimitry Andric                                       int index) override {
529*0b57cec5SDimitry Andric     Mask *windows_array = static_cast<Mask *>(array);
530*0b57cec5SDimitry Andric     return &(windows_array[index]);
531*0b57cec5SDimitry Andric   }
532*0b57cec5SDimitry Andric   api_type get_api_type() const override { return NATIVE_OS; }
533*0b57cec5SDimitry Andric };
534*0b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */
535*0b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */
536*0b57cec5SDimitry Andric 
537*0b57cec5SDimitry Andric class Address {
538*0b57cec5SDimitry Andric public:
539*0b57cec5SDimitry Andric   static const unsigned maxDepth = 32;
540*0b57cec5SDimitry Andric   unsigned labels[maxDepth];
541*0b57cec5SDimitry Andric   unsigned childNums[maxDepth];
542*0b57cec5SDimitry Andric   unsigned depth;
543*0b57cec5SDimitry Andric   unsigned leader;
544*0b57cec5SDimitry Andric   Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
545*0b57cec5SDimitry Andric   Address &operator=(const Address &b) {
546*0b57cec5SDimitry Andric     depth = b.depth;
547*0b57cec5SDimitry Andric     for (unsigned i = 0; i < depth; i++) {
548*0b57cec5SDimitry Andric       labels[i] = b.labels[i];
549*0b57cec5SDimitry Andric       childNums[i] = b.childNums[i];
550*0b57cec5SDimitry Andric     }
551*0b57cec5SDimitry Andric     leader = FALSE;
552*0b57cec5SDimitry Andric     return *this;
553*0b57cec5SDimitry Andric   }
554*0b57cec5SDimitry Andric   bool operator==(const Address &b) const {
555*0b57cec5SDimitry Andric     if (depth != b.depth)
556*0b57cec5SDimitry Andric       return false;
557*0b57cec5SDimitry Andric     for (unsigned i = 0; i < depth; i++)
558*0b57cec5SDimitry Andric       if (labels[i] != b.labels[i])
559*0b57cec5SDimitry Andric         return false;
560*0b57cec5SDimitry Andric     return true;
561*0b57cec5SDimitry Andric   }
562*0b57cec5SDimitry Andric   bool isClose(const Address &b, int level) const {
563*0b57cec5SDimitry Andric     if (depth != b.depth)
564*0b57cec5SDimitry Andric       return false;
565*0b57cec5SDimitry Andric     if ((unsigned)level >= depth)
566*0b57cec5SDimitry Andric       return true;
567*0b57cec5SDimitry Andric     for (unsigned i = 0; i < (depth - level); i++)
568*0b57cec5SDimitry Andric       if (labels[i] != b.labels[i])
569*0b57cec5SDimitry Andric         return false;
570*0b57cec5SDimitry Andric     return true;
571*0b57cec5SDimitry Andric   }
572*0b57cec5SDimitry Andric   bool operator!=(const Address &b) const { return !operator==(b); }
573*0b57cec5SDimitry Andric   void print() const {
574*0b57cec5SDimitry Andric     unsigned i;
575*0b57cec5SDimitry Andric     printf("Depth: %u --- ", depth);
576*0b57cec5SDimitry Andric     for (i = 0; i < depth; i++) {
577*0b57cec5SDimitry Andric       printf("%u ", labels[i]);
578*0b57cec5SDimitry Andric     }
579*0b57cec5SDimitry Andric   }
580*0b57cec5SDimitry Andric };
581*0b57cec5SDimitry Andric 
582*0b57cec5SDimitry Andric class AddrUnsPair {
583*0b57cec5SDimitry Andric public:
584*0b57cec5SDimitry Andric   Address first;
585*0b57cec5SDimitry Andric   unsigned second;
586*0b57cec5SDimitry Andric   AddrUnsPair(Address _first, unsigned _second)
587*0b57cec5SDimitry Andric       : first(_first), second(_second) {}
588*0b57cec5SDimitry Andric   AddrUnsPair &operator=(const AddrUnsPair &b) {
589*0b57cec5SDimitry Andric     first = b.first;
590*0b57cec5SDimitry Andric     second = b.second;
591*0b57cec5SDimitry Andric     return *this;
592*0b57cec5SDimitry Andric   }
593*0b57cec5SDimitry Andric   void print() const {
594*0b57cec5SDimitry Andric     printf("first = ");
595*0b57cec5SDimitry Andric     first.print();
596*0b57cec5SDimitry Andric     printf(" --- second = %u", second);
597*0b57cec5SDimitry Andric   }
598*0b57cec5SDimitry Andric   bool operator==(const AddrUnsPair &b) const {
599*0b57cec5SDimitry Andric     if (first != b.first)
600*0b57cec5SDimitry Andric       return false;
601*0b57cec5SDimitry Andric     if (second != b.second)
602*0b57cec5SDimitry Andric       return false;
603*0b57cec5SDimitry Andric     return true;
604*0b57cec5SDimitry Andric   }
605*0b57cec5SDimitry Andric   bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
606*0b57cec5SDimitry Andric };
607*0b57cec5SDimitry Andric 
608*0b57cec5SDimitry Andric static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
609*0b57cec5SDimitry Andric   const Address *aa = &(((const AddrUnsPair *)a)->first);
610*0b57cec5SDimitry Andric   const Address *bb = &(((const AddrUnsPair *)b)->first);
611*0b57cec5SDimitry Andric   unsigned depth = aa->depth;
612*0b57cec5SDimitry Andric   unsigned i;
613*0b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(depth == bb->depth);
614*0b57cec5SDimitry Andric   for (i = 0; i < depth; i++) {
615*0b57cec5SDimitry Andric     if (aa->labels[i] < bb->labels[i])
616*0b57cec5SDimitry Andric       return -1;
617*0b57cec5SDimitry Andric     if (aa->labels[i] > bb->labels[i])
618*0b57cec5SDimitry Andric       return 1;
619*0b57cec5SDimitry Andric   }
620*0b57cec5SDimitry Andric   return 0;
621*0b57cec5SDimitry Andric }
622*0b57cec5SDimitry Andric 
623*0b57cec5SDimitry Andric /* A structure for holding machine-specific hierarchy info to be computed once
624*0b57cec5SDimitry Andric    at init. This structure represents a mapping of threads to the actual machine
625*0b57cec5SDimitry Andric    hierarchy, or to our best guess at what the hierarchy might be, for the
626*0b57cec5SDimitry Andric    purpose of performing an efficient barrier. In the worst case, when there is
627*0b57cec5SDimitry Andric    no machine hierarchy information, it produces a tree suitable for a barrier,
628*0b57cec5SDimitry Andric    similar to the tree used in the hyper barrier. */
629*0b57cec5SDimitry Andric class hierarchy_info {
630*0b57cec5SDimitry Andric public:
631*0b57cec5SDimitry Andric   /* Good default values for number of leaves and branching factor, given no
632*0b57cec5SDimitry Andric      affinity information. Behaves a bit like hyper barrier. */
633*0b57cec5SDimitry Andric   static const kmp_uint32 maxLeaves = 4;
634*0b57cec5SDimitry Andric   static const kmp_uint32 minBranch = 4;
635*0b57cec5SDimitry Andric   /** Number of levels in the hierarchy. Typical levels are threads/core,
636*0b57cec5SDimitry Andric       cores/package or socket, packages/node, nodes/machine, etc. We don't want
637*0b57cec5SDimitry Andric       to get specific with nomenclature. When the machine is oversubscribed we
638*0b57cec5SDimitry Andric       add levels to duplicate the hierarchy, doubling the thread capacity of the
639*0b57cec5SDimitry Andric       hierarchy each time we add a level. */
640*0b57cec5SDimitry Andric   kmp_uint32 maxLevels;
641*0b57cec5SDimitry Andric 
642*0b57cec5SDimitry Andric   /** This is specifically the depth of the machine configuration hierarchy, in
643*0b57cec5SDimitry Andric       terms of the number of levels along the longest path from root to any
644*0b57cec5SDimitry Andric       leaf. It corresponds to the number of entries in numPerLevel if we exclude
645*0b57cec5SDimitry Andric       all but one trailing 1. */
646*0b57cec5SDimitry Andric   kmp_uint32 depth;
647*0b57cec5SDimitry Andric   kmp_uint32 base_num_threads;
648*0b57cec5SDimitry Andric   enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
649*0b57cec5SDimitry Andric   volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
650*0b57cec5SDimitry Andric   // 2=initialization in progress
651*0b57cec5SDimitry Andric   volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
652*0b57cec5SDimitry Andric 
653*0b57cec5SDimitry Andric   /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
654*0b57cec5SDimitry Andric       the parent of a node at level i has. For example, if we have a machine
655*0b57cec5SDimitry Andric       with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
656*0b57cec5SDimitry Andric       {2, 4, 4, 1, 1}. All empty levels are set to 1. */
657*0b57cec5SDimitry Andric   kmp_uint32 *numPerLevel;
658*0b57cec5SDimitry Andric   kmp_uint32 *skipPerLevel;
659*0b57cec5SDimitry Andric 
660*0b57cec5SDimitry Andric   void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
661*0b57cec5SDimitry Andric     int hier_depth = adr2os[0].first.depth;
662*0b57cec5SDimitry Andric     int level = 0;
663*0b57cec5SDimitry Andric     for (int i = hier_depth - 1; i >= 0; --i) {
664*0b57cec5SDimitry Andric       int max = -1;
665*0b57cec5SDimitry Andric       for (int j = 0; j < num_addrs; ++j) {
666*0b57cec5SDimitry Andric         int next = adr2os[j].first.childNums[i];
667*0b57cec5SDimitry Andric         if (next > max)
668*0b57cec5SDimitry Andric           max = next;
669*0b57cec5SDimitry Andric       }
670*0b57cec5SDimitry Andric       numPerLevel[level] = max + 1;
671*0b57cec5SDimitry Andric       ++level;
672*0b57cec5SDimitry Andric     }
673*0b57cec5SDimitry Andric   }
674*0b57cec5SDimitry Andric 
675*0b57cec5SDimitry Andric   hierarchy_info()
676*0b57cec5SDimitry Andric       : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
677*0b57cec5SDimitry Andric 
678*0b57cec5SDimitry Andric   void fini() {
679*0b57cec5SDimitry Andric     if (!uninitialized && numPerLevel) {
680*0b57cec5SDimitry Andric       __kmp_free(numPerLevel);
681*0b57cec5SDimitry Andric       numPerLevel = NULL;
682*0b57cec5SDimitry Andric       uninitialized = not_initialized;
683*0b57cec5SDimitry Andric     }
684*0b57cec5SDimitry Andric   }
685*0b57cec5SDimitry Andric 
686*0b57cec5SDimitry Andric   void init(AddrUnsPair *adr2os, int num_addrs) {
687*0b57cec5SDimitry Andric     kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
688*0b57cec5SDimitry Andric         &uninitialized, not_initialized, initializing);
689*0b57cec5SDimitry Andric     if (bool_result == 0) { // Wait for initialization
690*0b57cec5SDimitry Andric       while (TCR_1(uninitialized) != initialized)
691*0b57cec5SDimitry Andric         KMP_CPU_PAUSE();
692*0b57cec5SDimitry Andric       return;
693*0b57cec5SDimitry Andric     }
694*0b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(bool_result == 1);
695*0b57cec5SDimitry Andric 
696*0b57cec5SDimitry Andric     /* Added explicit initialization of the data fields here to prevent usage of
697*0b57cec5SDimitry Andric        dirty value observed when static library is re-initialized multiple times
698*0b57cec5SDimitry Andric        (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
699*0b57cec5SDimitry Andric        OpenMP). */
700*0b57cec5SDimitry Andric     depth = 1;
701*0b57cec5SDimitry Andric     resizing = 0;
702*0b57cec5SDimitry Andric     maxLevels = 7;
703*0b57cec5SDimitry Andric     numPerLevel =
704*0b57cec5SDimitry Andric         (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
705*0b57cec5SDimitry Andric     skipPerLevel = &(numPerLevel[maxLevels]);
706*0b57cec5SDimitry Andric     for (kmp_uint32 i = 0; i < maxLevels;
707*0b57cec5SDimitry Andric          ++i) { // init numPerLevel[*] to 1 item per level
708*0b57cec5SDimitry Andric       numPerLevel[i] = 1;
709*0b57cec5SDimitry Andric       skipPerLevel[i] = 1;
710*0b57cec5SDimitry Andric     }
711*0b57cec5SDimitry Andric 
712*0b57cec5SDimitry Andric     // Sort table by physical ID
713*0b57cec5SDimitry Andric     if (adr2os) {
714*0b57cec5SDimitry Andric       qsort(adr2os, num_addrs, sizeof(*adr2os),
715*0b57cec5SDimitry Andric             __kmp_affinity_cmp_Address_labels);
716*0b57cec5SDimitry Andric       deriveLevels(adr2os, num_addrs);
717*0b57cec5SDimitry Andric     } else {
718*0b57cec5SDimitry Andric       numPerLevel[0] = maxLeaves;
719*0b57cec5SDimitry Andric       numPerLevel[1] = num_addrs / maxLeaves;
720*0b57cec5SDimitry Andric       if (num_addrs % maxLeaves)
721*0b57cec5SDimitry Andric         numPerLevel[1]++;
722*0b57cec5SDimitry Andric     }
723*0b57cec5SDimitry Andric 
724*0b57cec5SDimitry Andric     base_num_threads = num_addrs;
725*0b57cec5SDimitry Andric     for (int i = maxLevels - 1; i >= 0;
726*0b57cec5SDimitry Andric          --i) // count non-empty levels to get depth
727*0b57cec5SDimitry Andric       if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
728*0b57cec5SDimitry Andric         depth++;
729*0b57cec5SDimitry Andric 
730*0b57cec5SDimitry Andric     kmp_uint32 branch = minBranch;
731*0b57cec5SDimitry Andric     if (numPerLevel[0] == 1)
732*0b57cec5SDimitry Andric       branch = num_addrs / maxLeaves;
733*0b57cec5SDimitry Andric     if (branch < minBranch)
734*0b57cec5SDimitry Andric       branch = minBranch;
735*0b57cec5SDimitry Andric     for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
736*0b57cec5SDimitry Andric       while (numPerLevel[d] > branch ||
737*0b57cec5SDimitry Andric              (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
738*0b57cec5SDimitry Andric         if (numPerLevel[d] & 1)
739*0b57cec5SDimitry Andric           numPerLevel[d]++;
740*0b57cec5SDimitry Andric         numPerLevel[d] = numPerLevel[d] >> 1;
741*0b57cec5SDimitry Andric         if (numPerLevel[d + 1] == 1)
742*0b57cec5SDimitry Andric           depth++;
743*0b57cec5SDimitry Andric         numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
744*0b57cec5SDimitry Andric       }
745*0b57cec5SDimitry Andric       if (numPerLevel[0] == 1) {
746*0b57cec5SDimitry Andric         branch = branch >> 1;
747*0b57cec5SDimitry Andric         if (branch < 4)
748*0b57cec5SDimitry Andric           branch = minBranch;
749*0b57cec5SDimitry Andric       }
750*0b57cec5SDimitry Andric     }
751*0b57cec5SDimitry Andric 
752*0b57cec5SDimitry Andric     for (kmp_uint32 i = 1; i < depth; ++i)
753*0b57cec5SDimitry Andric       skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
754*0b57cec5SDimitry Andric     // Fill in hierarchy in the case of oversubscription
755*0b57cec5SDimitry Andric     for (kmp_uint32 i = depth; i < maxLevels; ++i)
756*0b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
757*0b57cec5SDimitry Andric 
758*0b57cec5SDimitry Andric     uninitialized = initialized; // One writer
759*0b57cec5SDimitry Andric   }
760*0b57cec5SDimitry Andric 
761*0b57cec5SDimitry Andric   // Resize the hierarchy if nproc changes to something larger than before
762*0b57cec5SDimitry Andric   void resize(kmp_uint32 nproc) {
763*0b57cec5SDimitry Andric     kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
764*0b57cec5SDimitry Andric     while (bool_result == 0) { // someone else is trying to resize
765*0b57cec5SDimitry Andric       KMP_CPU_PAUSE();
766*0b57cec5SDimitry Andric       if (nproc <= base_num_threads) // happy with other thread's resize
767*0b57cec5SDimitry Andric         return;
768*0b57cec5SDimitry Andric       else // try to resize
769*0b57cec5SDimitry Andric         bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
770*0b57cec5SDimitry Andric     }
771*0b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(bool_result != 0);
772*0b57cec5SDimitry Andric     if (nproc <= base_num_threads)
773*0b57cec5SDimitry Andric       return; // happy with other thread's resize
774*0b57cec5SDimitry Andric 
775*0b57cec5SDimitry Andric     // Calculate new maxLevels
776*0b57cec5SDimitry Andric     kmp_uint32 old_sz = skipPerLevel[depth - 1];
777*0b57cec5SDimitry Andric     kmp_uint32 incs = 0, old_maxLevels = maxLevels;
778*0b57cec5SDimitry Andric     // First see if old maxLevels is enough to contain new size
779*0b57cec5SDimitry Andric     for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
780*0b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
781*0b57cec5SDimitry Andric       numPerLevel[i - 1] *= 2;
782*0b57cec5SDimitry Andric       old_sz *= 2;
783*0b57cec5SDimitry Andric       depth++;
784*0b57cec5SDimitry Andric     }
785*0b57cec5SDimitry Andric     if (nproc > old_sz) { // Not enough space, need to expand hierarchy
786*0b57cec5SDimitry Andric       while (nproc > old_sz) {
787*0b57cec5SDimitry Andric         old_sz *= 2;
788*0b57cec5SDimitry Andric         incs++;
789*0b57cec5SDimitry Andric         depth++;
790*0b57cec5SDimitry Andric       }
791*0b57cec5SDimitry Andric       maxLevels += incs;
792*0b57cec5SDimitry Andric 
793*0b57cec5SDimitry Andric       // Resize arrays
794*0b57cec5SDimitry Andric       kmp_uint32 *old_numPerLevel = numPerLevel;
795*0b57cec5SDimitry Andric       kmp_uint32 *old_skipPerLevel = skipPerLevel;
796*0b57cec5SDimitry Andric       numPerLevel = skipPerLevel = NULL;
797*0b57cec5SDimitry Andric       numPerLevel =
798*0b57cec5SDimitry Andric           (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
799*0b57cec5SDimitry Andric       skipPerLevel = &(numPerLevel[maxLevels]);
800*0b57cec5SDimitry Andric 
801*0b57cec5SDimitry Andric       // Copy old elements from old arrays
802*0b57cec5SDimitry Andric       for (kmp_uint32 i = 0; i < old_maxLevels;
803*0b57cec5SDimitry Andric            ++i) { // init numPerLevel[*] to 1 item per level
804*0b57cec5SDimitry Andric         numPerLevel[i] = old_numPerLevel[i];
805*0b57cec5SDimitry Andric         skipPerLevel[i] = old_skipPerLevel[i];
806*0b57cec5SDimitry Andric       }
807*0b57cec5SDimitry Andric 
808*0b57cec5SDimitry Andric       // Init new elements in arrays to 1
809*0b57cec5SDimitry Andric       for (kmp_uint32 i = old_maxLevels; i < maxLevels;
810*0b57cec5SDimitry Andric            ++i) { // init numPerLevel[*] to 1 item per level
811*0b57cec5SDimitry Andric         numPerLevel[i] = 1;
812*0b57cec5SDimitry Andric         skipPerLevel[i] = 1;
813*0b57cec5SDimitry Andric       }
814*0b57cec5SDimitry Andric 
815*0b57cec5SDimitry Andric       // Free old arrays
816*0b57cec5SDimitry Andric       __kmp_free(old_numPerLevel);
817*0b57cec5SDimitry Andric     }
818*0b57cec5SDimitry Andric 
819*0b57cec5SDimitry Andric     // Fill in oversubscription levels of hierarchy
820*0b57cec5SDimitry Andric     for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
821*0b57cec5SDimitry Andric       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
822*0b57cec5SDimitry Andric 
823*0b57cec5SDimitry Andric     base_num_threads = nproc;
824*0b57cec5SDimitry Andric     resizing = 0; // One writer
825*0b57cec5SDimitry Andric   }
826*0b57cec5SDimitry Andric };
827*0b57cec5SDimitry Andric #endif // KMP_AFFINITY_H
828