xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_affinity.h (revision 62ff619dcc3540659a319be71c9a489f1659e14a)
1 /*
2  * kmp_affinity.h -- header for affinity management
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_AFFINITY_H
14 #define KMP_AFFINITY_H
15 
16 #include "kmp.h"
17 #include "kmp_os.h"
18 #include <limits>
19 
20 #if KMP_AFFINITY_SUPPORTED
21 #if KMP_USE_HWLOC
22 class KMPHwlocAffinity : public KMPAffinity {
23 public:
24   class Mask : public KMPAffinity::Mask {
25     hwloc_cpuset_t mask;
26 
27   public:
28     Mask() {
29       mask = hwloc_bitmap_alloc();
30       this->zero();
31     }
32     ~Mask() { hwloc_bitmap_free(mask); }
33     void set(int i) override { hwloc_bitmap_set(mask, i); }
34     bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
35     void clear(int i) override { hwloc_bitmap_clr(mask, i); }
36     void zero() override { hwloc_bitmap_zero(mask); }
37     void copy(const KMPAffinity::Mask *src) override {
38       const Mask *convert = static_cast<const Mask *>(src);
39       hwloc_bitmap_copy(mask, convert->mask);
40     }
41     void bitwise_and(const KMPAffinity::Mask *rhs) override {
42       const Mask *convert = static_cast<const Mask *>(rhs);
43       hwloc_bitmap_and(mask, mask, convert->mask);
44     }
45     void bitwise_or(const KMPAffinity::Mask *rhs) override {
46       const Mask *convert = static_cast<const Mask *>(rhs);
47       hwloc_bitmap_or(mask, mask, convert->mask);
48     }
49     void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
50     int begin() const override { return hwloc_bitmap_first(mask); }
51     int end() const override { return -1; }
52     int next(int previous) const override {
53       return hwloc_bitmap_next(mask, previous);
54     }
55     int get_system_affinity(bool abort_on_error) override {
56       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
57                   "Illegal get affinity operation when not capable");
58       long retval =
59           hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
60       if (retval >= 0) {
61         return 0;
62       }
63       int error = errno;
64       if (abort_on_error) {
65         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
66       }
67       return error;
68     }
69     int set_system_affinity(bool abort_on_error) const override {
70       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
71                   "Illegal set affinity operation when not capable");
72       long retval =
73           hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
74       if (retval >= 0) {
75         return 0;
76       }
77       int error = errno;
78       if (abort_on_error) {
79         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
80       }
81       return error;
82     }
83 #if KMP_OS_WINDOWS
84     int set_process_affinity(bool abort_on_error) const override {
85       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
86                   "Illegal set process affinity operation when not capable");
87       int error = 0;
88       const hwloc_topology_support *support =
89           hwloc_topology_get_support(__kmp_hwloc_topology);
90       if (support->cpubind->set_proc_cpubind) {
91         int retval;
92         retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask,
93                                    HWLOC_CPUBIND_PROCESS);
94         if (retval >= 0)
95           return 0;
96         error = errno;
97         if (abort_on_error)
98           __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
99       }
100       return error;
101     }
102 #endif
103     int get_proc_group() const override {
104       int group = -1;
105 #if KMP_OS_WINDOWS
106       if (__kmp_num_proc_groups == 1) {
107         return 1;
108       }
109       for (int i = 0; i < __kmp_num_proc_groups; i++) {
110         // On windows, the long type is always 32 bits
111         unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
112         unsigned long second_32_bits =
113             hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
114         if (first_32_bits == 0 && second_32_bits == 0) {
115           continue;
116         }
117         if (group >= 0) {
118           return -1;
119         }
120         group = i;
121       }
122 #endif /* KMP_OS_WINDOWS */
123       return group;
124     }
125   };
126   void determine_capable(const char *var) override {
127     const hwloc_topology_support *topology_support;
128     if (__kmp_hwloc_topology == NULL) {
129       if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
130         __kmp_hwloc_error = TRUE;
131         if (__kmp_affinity_verbose)
132           KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
133       }
134       if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
135         __kmp_hwloc_error = TRUE;
136         if (__kmp_affinity_verbose)
137           KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
138       }
139     }
140     topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
141     // Is the system capable of setting/getting this thread's affinity?
142     // Also, is topology discovery possible? (pu indicates ability to discover
143     // processing units). And finally, were there no errors when calling any
144     // hwloc_* API functions?
145     if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
146         topology_support->cpubind->get_thisthread_cpubind &&
147         topology_support->discovery->pu && !__kmp_hwloc_error) {
148       // enables affinity according to KMP_AFFINITY_CAPABLE() macro
149       KMP_AFFINITY_ENABLE(TRUE);
150     } else {
151       // indicate that hwloc didn't work and disable affinity
152       __kmp_hwloc_error = TRUE;
153       KMP_AFFINITY_DISABLE();
154     }
155   }
156   void bind_thread(int which) override {
157     KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
158                 "Illegal set affinity operation when not capable");
159     KMPAffinity::Mask *mask;
160     KMP_CPU_ALLOC_ON_STACK(mask);
161     KMP_CPU_ZERO(mask);
162     KMP_CPU_SET(which, mask);
163     __kmp_set_system_affinity(mask, TRUE);
164     KMP_CPU_FREE_FROM_STACK(mask);
165   }
166   KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
167   void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
168   KMPAffinity::Mask *allocate_mask_array(int num) override {
169     return new Mask[num];
170   }
171   void deallocate_mask_array(KMPAffinity::Mask *array) override {
172     Mask *hwloc_array = static_cast<Mask *>(array);
173     delete[] hwloc_array;
174   }
175   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
176                                       int index) override {
177     Mask *hwloc_array = static_cast<Mask *>(array);
178     return &(hwloc_array[index]);
179   }
180   api_type get_api_type() const override { return HWLOC; }
181 };
182 #endif /* KMP_USE_HWLOC */
183 
184 #if KMP_OS_LINUX || KMP_OS_FREEBSD
185 #if KMP_OS_LINUX
186 /* On some of the older OS's that we build on, these constants aren't present
187    in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
188    all systems of the same arch where they are defined, and they cannot change.
189    stone forever. */
190 #include <sys/syscall.h>
191 #if KMP_ARCH_X86 || KMP_ARCH_ARM
192 #ifndef __NR_sched_setaffinity
193 #define __NR_sched_setaffinity 241
194 #elif __NR_sched_setaffinity != 241
195 #error Wrong code for setaffinity system call.
196 #endif /* __NR_sched_setaffinity */
197 #ifndef __NR_sched_getaffinity
198 #define __NR_sched_getaffinity 242
199 #elif __NR_sched_getaffinity != 242
200 #error Wrong code for getaffinity system call.
201 #endif /* __NR_sched_getaffinity */
202 #elif KMP_ARCH_AARCH64
203 #ifndef __NR_sched_setaffinity
204 #define __NR_sched_setaffinity 122
205 #elif __NR_sched_setaffinity != 122
206 #error Wrong code for setaffinity system call.
207 #endif /* __NR_sched_setaffinity */
208 #ifndef __NR_sched_getaffinity
209 #define __NR_sched_getaffinity 123
210 #elif __NR_sched_getaffinity != 123
211 #error Wrong code for getaffinity system call.
212 #endif /* __NR_sched_getaffinity */
213 #elif KMP_ARCH_X86_64
214 #ifndef __NR_sched_setaffinity
215 #define __NR_sched_setaffinity 203
216 #elif __NR_sched_setaffinity != 203
217 #error Wrong code for setaffinity system call.
218 #endif /* __NR_sched_setaffinity */
219 #ifndef __NR_sched_getaffinity
220 #define __NR_sched_getaffinity 204
221 #elif __NR_sched_getaffinity != 204
222 #error Wrong code for getaffinity system call.
223 #endif /* __NR_sched_getaffinity */
224 #elif KMP_ARCH_PPC64
225 #ifndef __NR_sched_setaffinity
226 #define __NR_sched_setaffinity 222
227 #elif __NR_sched_setaffinity != 222
228 #error Wrong code for setaffinity system call.
229 #endif /* __NR_sched_setaffinity */
230 #ifndef __NR_sched_getaffinity
231 #define __NR_sched_getaffinity 223
232 #elif __NR_sched_getaffinity != 223
233 #error Wrong code for getaffinity system call.
234 #endif /* __NR_sched_getaffinity */
235 #elif KMP_ARCH_MIPS
236 #ifndef __NR_sched_setaffinity
237 #define __NR_sched_setaffinity 4239
238 #elif __NR_sched_setaffinity != 4239
239 #error Wrong code for setaffinity system call.
240 #endif /* __NR_sched_setaffinity */
241 #ifndef __NR_sched_getaffinity
242 #define __NR_sched_getaffinity 4240
243 #elif __NR_sched_getaffinity != 4240
244 #error Wrong code for getaffinity system call.
245 #endif /* __NR_sched_getaffinity */
246 #elif KMP_ARCH_MIPS64
247 #ifndef __NR_sched_setaffinity
248 #define __NR_sched_setaffinity 5195
249 #elif __NR_sched_setaffinity != 5195
250 #error Wrong code for setaffinity system call.
251 #endif /* __NR_sched_setaffinity */
252 #ifndef __NR_sched_getaffinity
253 #define __NR_sched_getaffinity 5196
254 #elif __NR_sched_getaffinity != 5196
255 #error Wrong code for getaffinity system call.
256 #endif /* __NR_sched_getaffinity */
257 #error Unknown or unsupported architecture
258 #endif /* KMP_ARCH_* */
259 #elif KMP_OS_FREEBSD
260 #include <pthread.h>
261 #include <pthread_np.h>
262 #endif
263 class KMPNativeAffinity : public KMPAffinity {
264   class Mask : public KMPAffinity::Mask {
265     typedef unsigned long mask_t;
266     typedef decltype(__kmp_affin_mask_size) mask_size_type;
267     static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
268     static const mask_t ONE = 1;
269     mask_size_type get_num_mask_types() const {
270       return __kmp_affin_mask_size / sizeof(mask_t);
271     }
272 
273   public:
274     mask_t *mask;
275     Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
276     ~Mask() {
277       if (mask)
278         __kmp_free(mask);
279     }
280     void set(int i) override {
281       mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T));
282     }
283     bool is_set(int i) const override {
284       return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T)));
285     }
286     void clear(int i) override {
287       mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T));
288     }
289     void zero() override {
290       mask_size_type e = get_num_mask_types();
291       for (mask_size_type i = 0; i < e; ++i)
292         mask[i] = (mask_t)0;
293     }
294     void copy(const KMPAffinity::Mask *src) override {
295       const Mask *convert = static_cast<const Mask *>(src);
296       mask_size_type e = get_num_mask_types();
297       for (mask_size_type i = 0; i < e; ++i)
298         mask[i] = convert->mask[i];
299     }
300     void bitwise_and(const KMPAffinity::Mask *rhs) override {
301       const Mask *convert = static_cast<const Mask *>(rhs);
302       mask_size_type e = get_num_mask_types();
303       for (mask_size_type i = 0; i < e; ++i)
304         mask[i] &= convert->mask[i];
305     }
306     void bitwise_or(const KMPAffinity::Mask *rhs) override {
307       const Mask *convert = static_cast<const Mask *>(rhs);
308       mask_size_type e = get_num_mask_types();
309       for (mask_size_type i = 0; i < e; ++i)
310         mask[i] |= convert->mask[i];
311     }
312     void bitwise_not() override {
313       mask_size_type e = get_num_mask_types();
314       for (mask_size_type i = 0; i < e; ++i)
315         mask[i] = ~(mask[i]);
316     }
317     int begin() const override {
318       int retval = 0;
319       while (retval < end() && !is_set(retval))
320         ++retval;
321       return retval;
322     }
323     int end() const override {
324       int e;
325       __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e);
326       return e;
327     }
328     int next(int previous) const override {
329       int retval = previous + 1;
330       while (retval < end() && !is_set(retval))
331         ++retval;
332       return retval;
333     }
334     int get_system_affinity(bool abort_on_error) override {
335       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
336                   "Illegal get affinity operation when not capable");
337 #if KMP_OS_LINUX
338       long retval =
339           syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
340 #elif KMP_OS_FREEBSD
341       int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size,
342                                      reinterpret_cast<cpuset_t *>(mask));
343       int retval = (r == 0 ? 0 : -1);
344 #endif
345       if (retval >= 0) {
346         return 0;
347       }
348       int error = errno;
349       if (abort_on_error) {
350         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
351       }
352       return error;
353     }
354     int set_system_affinity(bool abort_on_error) const override {
355       KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
356                   "Illegal set affinity operation when not capable");
357 #if KMP_OS_LINUX
358       long retval =
359           syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
360 #elif KMP_OS_FREEBSD
361       int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size,
362                                      reinterpret_cast<cpuset_t *>(mask));
363       int retval = (r == 0 ? 0 : -1);
364 #endif
365       if (retval >= 0) {
366         return 0;
367       }
368       int error = errno;
369       if (abort_on_error) {
370         __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
371       }
372       return error;
373     }
374   };
375   void determine_capable(const char *env_var) override {
376     __kmp_affinity_determine_capable(env_var);
377   }
378   void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
379   KMPAffinity::Mask *allocate_mask() override {
380     KMPNativeAffinity::Mask *retval = new Mask();
381     return retval;
382   }
383   void deallocate_mask(KMPAffinity::Mask *m) override {
384     KMPNativeAffinity::Mask *native_mask =
385         static_cast<KMPNativeAffinity::Mask *>(m);
386     delete native_mask;
387   }
388   KMPAffinity::Mask *allocate_mask_array(int num) override {
389     return new Mask[num];
390   }
391   void deallocate_mask_array(KMPAffinity::Mask *array) override {
392     Mask *linux_array = static_cast<Mask *>(array);
393     delete[] linux_array;
394   }
395   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
396                                       int index) override {
397     Mask *linux_array = static_cast<Mask *>(array);
398     return &(linux_array[index]);
399   }
400   api_type get_api_type() const override { return NATIVE_OS; }
401 };
402 #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
403 
404 #if KMP_OS_WINDOWS
405 class KMPNativeAffinity : public KMPAffinity {
406   class Mask : public KMPAffinity::Mask {
407     typedef ULONG_PTR mask_t;
408     static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
409     mask_t *mask;
410 
411   public:
412     Mask() {
413       mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
414     }
415     ~Mask() {
416       if (mask)
417         __kmp_free(mask);
418     }
419     void set(int i) override {
420       mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
421     }
422     bool is_set(int i) const override {
423       return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
424     }
425     void clear(int i) override {
426       mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
427     }
428     void zero() override {
429       for (int i = 0; i < __kmp_num_proc_groups; ++i)
430         mask[i] = 0;
431     }
432     void copy(const KMPAffinity::Mask *src) override {
433       const Mask *convert = static_cast<const Mask *>(src);
434       for (int i = 0; i < __kmp_num_proc_groups; ++i)
435         mask[i] = convert->mask[i];
436     }
437     void bitwise_and(const KMPAffinity::Mask *rhs) override {
438       const Mask *convert = static_cast<const Mask *>(rhs);
439       for (int i = 0; i < __kmp_num_proc_groups; ++i)
440         mask[i] &= convert->mask[i];
441     }
442     void bitwise_or(const KMPAffinity::Mask *rhs) override {
443       const Mask *convert = static_cast<const Mask *>(rhs);
444       for (int i = 0; i < __kmp_num_proc_groups; ++i)
445         mask[i] |= convert->mask[i];
446     }
447     void bitwise_not() override {
448       for (int i = 0; i < __kmp_num_proc_groups; ++i)
449         mask[i] = ~(mask[i]);
450     }
451     int begin() const override {
452       int retval = 0;
453       while (retval < end() && !is_set(retval))
454         ++retval;
455       return retval;
456     }
457     int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
458     int next(int previous) const override {
459       int retval = previous + 1;
460       while (retval < end() && !is_set(retval))
461         ++retval;
462       return retval;
463     }
464     int set_process_affinity(bool abort_on_error) const override {
465       if (__kmp_num_proc_groups <= 1) {
466         if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) {
467           DWORD error = GetLastError();
468           if (abort_on_error) {
469             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
470                         __kmp_msg_null);
471           }
472           return error;
473         }
474       }
475       return 0;
476     }
477     int set_system_affinity(bool abort_on_error) const override {
478       if (__kmp_num_proc_groups > 1) {
479         // Check for a valid mask.
480         GROUP_AFFINITY ga;
481         int group = get_proc_group();
482         if (group < 0) {
483           if (abort_on_error) {
484             KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
485           }
486           return -1;
487         }
488         // Transform the bit vector into a GROUP_AFFINITY struct
489         // and make the system call to set affinity.
490         ga.Group = group;
491         ga.Mask = mask[group];
492         ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
493 
494         KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
495         if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
496           DWORD error = GetLastError();
497           if (abort_on_error) {
498             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
499                         __kmp_msg_null);
500           }
501           return error;
502         }
503       } else {
504         if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
505           DWORD error = GetLastError();
506           if (abort_on_error) {
507             __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
508                         __kmp_msg_null);
509           }
510           return error;
511         }
512       }
513       return 0;
514     }
515     int get_system_affinity(bool abort_on_error) override {
516       if (__kmp_num_proc_groups > 1) {
517         this->zero();
518         GROUP_AFFINITY ga;
519         KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
520         if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
521           DWORD error = GetLastError();
522           if (abort_on_error) {
523             __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
524                         KMP_ERR(error), __kmp_msg_null);
525           }
526           return error;
527         }
528         if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
529             (ga.Mask == 0)) {
530           return -1;
531         }
532         mask[ga.Group] = ga.Mask;
533       } else {
534         mask_t newMask, sysMask, retval;
535         if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
536           DWORD error = GetLastError();
537           if (abort_on_error) {
538             __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
539                         KMP_ERR(error), __kmp_msg_null);
540           }
541           return error;
542         }
543         retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
544         if (!retval) {
545           DWORD error = GetLastError();
546           if (abort_on_error) {
547             __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
548                         KMP_ERR(error), __kmp_msg_null);
549           }
550           return error;
551         }
552         newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
553         if (!newMask) {
554           DWORD error = GetLastError();
555           if (abort_on_error) {
556             __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
557                         KMP_ERR(error), __kmp_msg_null);
558           }
559         }
560         *mask = retval;
561       }
562       return 0;
563     }
564     int get_proc_group() const override {
565       int group = -1;
566       if (__kmp_num_proc_groups == 1) {
567         return 1;
568       }
569       for (int i = 0; i < __kmp_num_proc_groups; i++) {
570         if (mask[i] == 0)
571           continue;
572         if (group >= 0)
573           return -1;
574         group = i;
575       }
576       return group;
577     }
578   };
579   void determine_capable(const char *env_var) override {
580     __kmp_affinity_determine_capable(env_var);
581   }
582   void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
583   KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
584   void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
585   KMPAffinity::Mask *allocate_mask_array(int num) override {
586     return new Mask[num];
587   }
588   void deallocate_mask_array(KMPAffinity::Mask *array) override {
589     Mask *windows_array = static_cast<Mask *>(array);
590     delete[] windows_array;
591   }
592   KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
593                                       int index) override {
594     Mask *windows_array = static_cast<Mask *>(array);
595     return &(windows_array[index]);
596   }
597   api_type get_api_type() const override { return NATIVE_OS; }
598 };
599 #endif /* KMP_OS_WINDOWS */
600 #endif /* KMP_AFFINITY_SUPPORTED */
601 
602 // Describe an attribute for a level in the machine topology
603 struct kmp_hw_attr_t {
604   int core_type : 8;
605   int core_eff : 8;
606   unsigned valid : 1;
607   unsigned reserved : 15;
608 
609   static const int UNKNOWN_CORE_EFF = -1;
610 
611   kmp_hw_attr_t()
612       : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF),
613         valid(0), reserved(0) {}
614   void set_core_type(kmp_hw_core_type_t type) {
615     valid = 1;
616     core_type = type;
617   }
618   void set_core_eff(int eff) {
619     valid = 1;
620     core_eff = eff;
621   }
622   kmp_hw_core_type_t get_core_type() const {
623     return (kmp_hw_core_type_t)core_type;
624   }
625   int get_core_eff() const { return core_eff; }
626   bool is_core_type_valid() const {
627     return core_type != KMP_HW_CORE_TYPE_UNKNOWN;
628   }
629   bool is_core_eff_valid() const { return core_eff != UNKNOWN_CORE_EFF; }
630   operator bool() const { return valid; }
631   void clear() {
632     core_type = KMP_HW_CORE_TYPE_UNKNOWN;
633     core_eff = UNKNOWN_CORE_EFF;
634     valid = 0;
635   }
636   bool contains(const kmp_hw_attr_t &other) const {
637     if (!valid && !other.valid)
638       return true;
639     if (valid && other.valid) {
640       if (other.is_core_type_valid()) {
641         if (!is_core_type_valid() || (get_core_type() != other.get_core_type()))
642           return false;
643       }
644       if (other.is_core_eff_valid()) {
645         if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff()))
646           return false;
647       }
648       return true;
649     }
650     return false;
651   }
652   bool operator==(const kmp_hw_attr_t &rhs) const {
653     return (rhs.valid == valid && rhs.core_eff == core_eff &&
654             rhs.core_type == core_type);
655   }
656   bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); }
657 };
658 
659 class kmp_hw_thread_t {
660 public:
661   static const int UNKNOWN_ID = -1;
662   static int compare_ids(const void *a, const void *b);
663   static int compare_compact(const void *a, const void *b);
664   int ids[KMP_HW_LAST];
665   int sub_ids[KMP_HW_LAST];
666   bool leader;
667   int os_id;
668   kmp_hw_attr_t attrs;
669 
670   void print() const;
671   void clear() {
672     for (int i = 0; i < (int)KMP_HW_LAST; ++i)
673       ids[i] = UNKNOWN_ID;
674     leader = false;
675     attrs.clear();
676   }
677 };
678 
679 class kmp_topology_t {
680 
681   struct flags_t {
682     int uniform : 1;
683     int reserved : 31;
684   };
685 
686   int depth;
687 
688   // The following arrays are all 'depth' long and have been
689   // allocated to hold up to KMP_HW_LAST number of objects if
690   // needed so layers can be added without reallocation of any array
691 
692   // Orderd array of the types in the topology
693   kmp_hw_t *types;
694 
695   // Keep quick topology ratios, for non-uniform topologies,
696   // this ratio holds the max number of itemAs per itemB
697   // e.g., [ 4 packages | 6 cores / package | 2 threads / core ]
698   int *ratio;
699 
700   // Storage containing the absolute number of each topology layer
701   int *count;
702 
703   // The number of core efficiencies. This is only useful for hybrid
704   // topologies. Core efficiencies will range from 0 to num efficiencies - 1
705   int num_core_efficiencies;
706   int num_core_types;
707   kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
708 
709   // The hardware threads array
710   // hw_threads is num_hw_threads long
711   // Each hw_thread's ids and sub_ids are depth deep
712   int num_hw_threads;
713   kmp_hw_thread_t *hw_threads;
714 
715   // Equivalence hash where the key is the hardware topology item
716   // and the value is the equivalent hardware topology type in the
717   // types[] array, if the value is KMP_HW_UNKNOWN, then there is no
718   // known equivalence for the topology type
719   kmp_hw_t equivalent[KMP_HW_LAST];
720 
721   // Flags describing the topology
722   flags_t flags;
723 
724   // Insert a new topology layer after allocation
725   void _insert_layer(kmp_hw_t type, const int *ids);
726 
727 #if KMP_GROUP_AFFINITY
728   // Insert topology information about Windows Processor groups
729   void _insert_windows_proc_groups();
730 #endif
731 
732   // Count each item & get the num x's per y
733   // e.g., get the number of cores and the number of threads per core
734   // for each (x, y) in (KMP_HW_* , KMP_HW_*)
735   void _gather_enumeration_information();
736 
737   // Remove layers that don't add information to the topology.
738   // This is done by having the layer take on the id = UNKNOWN_ID (-1)
739   void _remove_radix1_layers();
740 
741   // Find out if the topology is uniform
742   void _discover_uniformity();
743 
744   // Set all the sub_ids for each hardware thread
745   void _set_sub_ids();
746 
747   // Set global affinity variables describing the number of threads per
748   // core, the number of packages, the number of cores per package, and
749   // the number of cores.
750   void _set_globals();
751 
752   // Set the last level cache equivalent type
753   void _set_last_level_cache();
754 
755   // Return the number of cores with a particular attribute, 'attr'.
756   // If 'find_all' is true, then find all cores on the machine, otherwise find
757   // all cores per the layer 'above'
758   int _get_ncores_with_attr(const kmp_hw_attr_t &attr, int above,
759                             bool find_all = false) const;
760 
761 public:
762   // Force use of allocate()/deallocate()
763   kmp_topology_t() = delete;
764   kmp_topology_t(const kmp_topology_t &t) = delete;
765   kmp_topology_t(kmp_topology_t &&t) = delete;
766   kmp_topology_t &operator=(const kmp_topology_t &t) = delete;
767   kmp_topology_t &operator=(kmp_topology_t &&t) = delete;
768 
769   static kmp_topology_t *allocate(int nproc, int ndepth, const kmp_hw_t *types);
770   static void deallocate(kmp_topology_t *);
771 
772   // Functions used in create_map() routines
773   kmp_hw_thread_t &at(int index) {
774     KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
775     return hw_threads[index];
776   }
777   const kmp_hw_thread_t &at(int index) const {
778     KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
779     return hw_threads[index];
780   }
781   int get_num_hw_threads() const { return num_hw_threads; }
782   void sort_ids() {
783     qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
784           kmp_hw_thread_t::compare_ids);
785   }
786   // Check if the hardware ids are unique, if they are
787   // return true, otherwise return false
788   bool check_ids() const;
789 
790   // Function to call after the create_map() routine
791   void canonicalize();
792   void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores);
793 
794   // Functions used after canonicalize() called
795   bool filter_hw_subset();
796   bool is_close(int hwt1, int hwt2, int level) const;
797   bool is_uniform() const { return flags.uniform; }
798   // Tell whether a type is a valid type in the topology
799   // returns KMP_HW_UNKNOWN when there is no equivalent type
800   kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; }
801   // Set type1 = type2
802   void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
803     KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);
804     KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2);
805     kmp_hw_t real_type2 = equivalent[type2];
806     if (real_type2 == KMP_HW_UNKNOWN)
807       real_type2 = type2;
808     equivalent[type1] = real_type2;
809     // This loop is required since any of the types may have been set to
810     // be equivalent to type1.  They all must be checked and reset to type2.
811     KMP_FOREACH_HW_TYPE(type) {
812       if (equivalent[type] == type1) {
813         equivalent[type] = real_type2;
814       }
815     }
816   }
817   // Calculate number of types corresponding to level1
818   // per types corresponding to level2 (e.g., number of threads per core)
819   int calculate_ratio(int level1, int level2) const {
820     KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth);
821     KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth);
822     int r = 1;
823     for (int level = level1; level > level2; --level)
824       r *= ratio[level];
825     return r;
826   }
827   int get_ratio(int level) const {
828     KMP_DEBUG_ASSERT(level >= 0 && level < depth);
829     return ratio[level];
830   }
831   int get_depth() const { return depth; };
832   kmp_hw_t get_type(int level) const {
833     KMP_DEBUG_ASSERT(level >= 0 && level < depth);
834     return types[level];
835   }
836   int get_level(kmp_hw_t type) const {
837     KMP_DEBUG_ASSERT_VALID_HW_TYPE(type);
838     int eq_type = equivalent[type];
839     if (eq_type == KMP_HW_UNKNOWN)
840       return -1;
841     for (int i = 0; i < depth; ++i)
842       if (types[i] == eq_type)
843         return i;
844     return -1;
845   }
846   int get_count(int level) const {
847     KMP_DEBUG_ASSERT(level >= 0 && level < depth);
848     return count[level];
849   }
850   // Return the total number of cores with attribute 'attr'
851   int get_ncores_with_attr(const kmp_hw_attr_t &attr) const {
852     return _get_ncores_with_attr(attr, -1, true);
853   }
854   // Return the number of cores with attribute
855   // 'attr' per topology level 'above'
856   int get_ncores_with_attr_per(const kmp_hw_attr_t &attr, int above) const {
857     return _get_ncores_with_attr(attr, above, false);
858   }
859 
860 #if KMP_AFFINITY_SUPPORTED
861   void sort_compact() {
862     qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
863           kmp_hw_thread_t::compare_compact);
864   }
865 #endif
866   void print(const char *env_var = "KMP_AFFINITY") const;
867   void dump() const;
868 };
869 extern kmp_topology_t *__kmp_topology;
870 
871 class kmp_hw_subset_t {
872   const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS;
873 
874 public:
875   // Describe a machine topology item in KMP_HW_SUBSET
876   struct item_t {
877     kmp_hw_t type;
878     int num_attrs;
879     int num[MAX_ATTRS];
880     int offset[MAX_ATTRS];
881     kmp_hw_attr_t attr[MAX_ATTRS];
882   };
883   // Put parenthesis around max to avoid accidental use of Windows max macro.
884   const static int USE_ALL = (std::numeric_limits<int>::max)();
885 
886 private:
887   int depth;
888   int capacity;
889   item_t *items;
890   kmp_uint64 set;
891   bool absolute;
892   // The set must be able to handle up to KMP_HW_LAST number of layers
893   KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST);
894   // Sorting the KMP_HW_SUBSET items to follow topology order
895   // All unknown topology types will be at the beginning of the subset
896   static int hw_subset_compare(const void *i1, const void *i2) {
897     kmp_hw_t type1 = ((const item_t *)i1)->type;
898     kmp_hw_t type2 = ((const item_t *)i2)->type;
899     int level1 = __kmp_topology->get_level(type1);
900     int level2 = __kmp_topology->get_level(type2);
901     return level1 - level2;
902   }
903 
904 public:
905   // Force use of allocate()/deallocate()
906   kmp_hw_subset_t() = delete;
907   kmp_hw_subset_t(const kmp_hw_subset_t &t) = delete;
908   kmp_hw_subset_t(kmp_hw_subset_t &&t) = delete;
909   kmp_hw_subset_t &operator=(const kmp_hw_subset_t &t) = delete;
910   kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) = delete;
911 
912   static kmp_hw_subset_t *allocate() {
913     int initial_capacity = 5;
914     kmp_hw_subset_t *retval =
915         (kmp_hw_subset_t *)__kmp_allocate(sizeof(kmp_hw_subset_t));
916     retval->depth = 0;
917     retval->capacity = initial_capacity;
918     retval->set = 0ull;
919     retval->absolute = false;
920     retval->items = (item_t *)__kmp_allocate(sizeof(item_t) * initial_capacity);
921     return retval;
922   }
923   static void deallocate(kmp_hw_subset_t *subset) {
924     __kmp_free(subset->items);
925     __kmp_free(subset);
926   }
927   void set_absolute() { absolute = true; }
928   bool is_absolute() const { return absolute; }
929   void push_back(int num, kmp_hw_t type, int offset, kmp_hw_attr_t attr) {
930     for (int i = 0; i < depth; ++i) {
931       // Found an existing item for this layer type
932       // Add the num, offset, and attr to this item
933       if (items[i].type == type) {
934         int idx = items[i].num_attrs++;
935         if ((size_t)idx >= MAX_ATTRS)
936           return;
937         items[i].num[idx] = num;
938         items[i].offset[idx] = offset;
939         items[i].attr[idx] = attr;
940         return;
941       }
942     }
943     if (depth == capacity - 1) {
944       capacity *= 2;
945       item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity);
946       for (int i = 0; i < depth; ++i)
947         new_items[i] = items[i];
948       __kmp_free(items);
949       items = new_items;
950     }
951     items[depth].num_attrs = 1;
952     items[depth].type = type;
953     items[depth].num[0] = num;
954     items[depth].offset[0] = offset;
955     items[depth].attr[0] = attr;
956     depth++;
957     set |= (1ull << type);
958   }
959   int get_depth() const { return depth; }
960   const item_t &at(int index) const {
961     KMP_DEBUG_ASSERT(index >= 0 && index < depth);
962     return items[index];
963   }
964   item_t &at(int index) {
965     KMP_DEBUG_ASSERT(index >= 0 && index < depth);
966     return items[index];
967   }
968   void remove(int index) {
969     KMP_DEBUG_ASSERT(index >= 0 && index < depth);
970     set &= ~(1ull << items[index].type);
971     for (int j = index + 1; j < depth; ++j) {
972       items[j - 1] = items[j];
973     }
974     depth--;
975   }
976   void sort() {
977     KMP_DEBUG_ASSERT(__kmp_topology);
978     qsort(items, depth, sizeof(item_t), hw_subset_compare);
979   }
980   bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); }
981   void dump() const {
982     printf("**********************\n");
983     printf("*** kmp_hw_subset: ***\n");
984     printf("* depth: %d\n", depth);
985     printf("* items:\n");
986     for (int i = 0; i < depth; ++i) {
987       printf(" type: %s\n", __kmp_hw_get_keyword(items[i].type));
988       for (int j = 0; j < items[i].num_attrs; ++j) {
989         printf("  num: %d, offset: %d, attr: ", items[i].num[j],
990                items[i].offset[j]);
991         if (!items[i].attr[j]) {
992           printf(" (none)\n");
993         } else {
994           printf(
995               " core_type = %s, core_eff = %d\n",
996               __kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()),
997               items[i].attr[j].get_core_eff());
998         }
999       }
1000     }
1001     printf("* set: 0x%llx\n", set);
1002     printf("* absolute: %d\n", absolute);
1003     printf("**********************\n");
1004   }
1005 };
1006 extern kmp_hw_subset_t *__kmp_hw_subset;
1007 
1008 /* A structure for holding machine-specific hierarchy info to be computed once
1009    at init. This structure represents a mapping of threads to the actual machine
1010    hierarchy, or to our best guess at what the hierarchy might be, for the
1011    purpose of performing an efficient barrier. In the worst case, when there is
1012    no machine hierarchy information, it produces a tree suitable for a barrier,
1013    similar to the tree used in the hyper barrier. */
1014 class hierarchy_info {
1015 public:
1016   /* Good default values for number of leaves and branching factor, given no
1017      affinity information. Behaves a bit like hyper barrier. */
1018   static const kmp_uint32 maxLeaves = 4;
1019   static const kmp_uint32 minBranch = 4;
1020   /** Number of levels in the hierarchy. Typical levels are threads/core,
1021       cores/package or socket, packages/node, nodes/machine, etc. We don't want
1022       to get specific with nomenclature. When the machine is oversubscribed we
1023       add levels to duplicate the hierarchy, doubling the thread capacity of the
1024       hierarchy each time we add a level. */
1025   kmp_uint32 maxLevels;
1026 
1027   /** This is specifically the depth of the machine configuration hierarchy, in
1028       terms of the number of levels along the longest path from root to any
1029       leaf. It corresponds to the number of entries in numPerLevel if we exclude
1030       all but one trailing 1. */
1031   kmp_uint32 depth;
1032   kmp_uint32 base_num_threads;
1033   enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
1034   volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
1035   // 2=initialization in progress
1036   volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
1037 
1038   /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
1039       the parent of a node at level i has. For example, if we have a machine
1040       with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
1041       {2, 4, 4, 1, 1}. All empty levels are set to 1. */
1042   kmp_uint32 *numPerLevel;
1043   kmp_uint32 *skipPerLevel;
1044 
1045   void deriveLevels() {
1046     int hier_depth = __kmp_topology->get_depth();
1047     for (int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) {
1048       numPerLevel[level] = __kmp_topology->get_ratio(i);
1049     }
1050   }
1051 
1052   hierarchy_info()
1053       : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
1054 
1055   void fini() {
1056     if (!uninitialized && numPerLevel) {
1057       __kmp_free(numPerLevel);
1058       numPerLevel = NULL;
1059       uninitialized = not_initialized;
1060     }
1061   }
1062 
1063   void init(int num_addrs) {
1064     kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
1065         &uninitialized, not_initialized, initializing);
1066     if (bool_result == 0) { // Wait for initialization
1067       while (TCR_1(uninitialized) != initialized)
1068         KMP_CPU_PAUSE();
1069       return;
1070     }
1071     KMP_DEBUG_ASSERT(bool_result == 1);
1072 
1073     /* Added explicit initialization of the data fields here to prevent usage of
1074        dirty value observed when static library is re-initialized multiple times
1075        (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
1076        OpenMP). */
1077     depth = 1;
1078     resizing = 0;
1079     maxLevels = 7;
1080     numPerLevel =
1081         (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
1082     skipPerLevel = &(numPerLevel[maxLevels]);
1083     for (kmp_uint32 i = 0; i < maxLevels;
1084          ++i) { // init numPerLevel[*] to 1 item per level
1085       numPerLevel[i] = 1;
1086       skipPerLevel[i] = 1;
1087     }
1088 
1089     // Sort table by physical ID
1090     if (__kmp_topology && __kmp_topology->get_depth() > 0) {
1091       deriveLevels();
1092     } else {
1093       numPerLevel[0] = maxLeaves;
1094       numPerLevel[1] = num_addrs / maxLeaves;
1095       if (num_addrs % maxLeaves)
1096         numPerLevel[1]++;
1097     }
1098 
1099     base_num_threads = num_addrs;
1100     for (int i = maxLevels - 1; i >= 0;
1101          --i) // count non-empty levels to get depth
1102       if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
1103         depth++;
1104 
1105     kmp_uint32 branch = minBranch;
1106     if (numPerLevel[0] == 1)
1107       branch = num_addrs / maxLeaves;
1108     if (branch < minBranch)
1109       branch = minBranch;
1110     for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
1111       while (numPerLevel[d] > branch ||
1112              (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
1113         if (numPerLevel[d] & 1)
1114           numPerLevel[d]++;
1115         numPerLevel[d] = numPerLevel[d] >> 1;
1116         if (numPerLevel[d + 1] == 1)
1117           depth++;
1118         numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
1119       }
1120       if (numPerLevel[0] == 1) {
1121         branch = branch >> 1;
1122         if (branch < 4)
1123           branch = minBranch;
1124       }
1125     }
1126 
1127     for (kmp_uint32 i = 1; i < depth; ++i)
1128       skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
1129     // Fill in hierarchy in the case of oversubscription
1130     for (kmp_uint32 i = depth; i < maxLevels; ++i)
1131       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1132 
1133     uninitialized = initialized; // One writer
1134   }
1135 
1136   // Resize the hierarchy if nproc changes to something larger than before
1137   void resize(kmp_uint32 nproc) {
1138     kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1139     while (bool_result == 0) { // someone else is trying to resize
1140       KMP_CPU_PAUSE();
1141       if (nproc <= base_num_threads) // happy with other thread's resize
1142         return;
1143       else // try to resize
1144         bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1145     }
1146     KMP_DEBUG_ASSERT(bool_result != 0);
1147     if (nproc <= base_num_threads)
1148       return; // happy with other thread's resize
1149 
1150     // Calculate new maxLevels
1151     kmp_uint32 old_sz = skipPerLevel[depth - 1];
1152     kmp_uint32 incs = 0, old_maxLevels = maxLevels;
1153     // First see if old maxLevels is enough to contain new size
1154     for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
1155       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1156       numPerLevel[i - 1] *= 2;
1157       old_sz *= 2;
1158       depth++;
1159     }
1160     if (nproc > old_sz) { // Not enough space, need to expand hierarchy
1161       while (nproc > old_sz) {
1162         old_sz *= 2;
1163         incs++;
1164         depth++;
1165       }
1166       maxLevels += incs;
1167 
1168       // Resize arrays
1169       kmp_uint32 *old_numPerLevel = numPerLevel;
1170       kmp_uint32 *old_skipPerLevel = skipPerLevel;
1171       numPerLevel = skipPerLevel = NULL;
1172       numPerLevel =
1173           (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
1174       skipPerLevel = &(numPerLevel[maxLevels]);
1175 
1176       // Copy old elements from old arrays
1177       for (kmp_uint32 i = 0; i < old_maxLevels; ++i) {
1178         // init numPerLevel[*] to 1 item per level
1179         numPerLevel[i] = old_numPerLevel[i];
1180         skipPerLevel[i] = old_skipPerLevel[i];
1181       }
1182 
1183       // Init new elements in arrays to 1
1184       for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) {
1185         // init numPerLevel[*] to 1 item per level
1186         numPerLevel[i] = 1;
1187         skipPerLevel[i] = 1;
1188       }
1189 
1190       // Free old arrays
1191       __kmp_free(old_numPerLevel);
1192     }
1193 
1194     // Fill in oversubscription levels of hierarchy
1195     for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
1196       skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1197 
1198     base_num_threads = nproc;
1199     resizing = 0; // One writer
1200   }
1201 };
1202 #endif // KMP_AFFINITY_H
1203