xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_affinity.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric /*
20b57cec5SDimitry Andric  * kmp_affinity.cpp -- affinity management
30b57cec5SDimitry Andric  */
40b57cec5SDimitry Andric 
50b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "kmp.h"
140b57cec5SDimitry Andric #include "kmp_affinity.h"
150b57cec5SDimitry Andric #include "kmp_i18n.h"
160b57cec5SDimitry Andric #include "kmp_io.h"
170b57cec5SDimitry Andric #include "kmp_str.h"
180b57cec5SDimitry Andric #include "kmp_wrapper_getpid.h"
190b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
200b57cec5SDimitry Andric #include "kmp_dispatch_hier.h"
210b57cec5SDimitry Andric #endif
22fe6060f1SDimitry Andric #if KMP_USE_HWLOC
23fe6060f1SDimitry Andric // Copied from hwloc
24fe6060f1SDimitry Andric #define HWLOC_GROUP_KIND_INTEL_MODULE 102
25fe6060f1SDimitry Andric #define HWLOC_GROUP_KIND_INTEL_TILE 103
26fe6060f1SDimitry Andric #define HWLOC_GROUP_KIND_INTEL_DIE 104
27fe6060f1SDimitry Andric #define HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP 220
28fe6060f1SDimitry Andric #endif
29349cc55cSDimitry Andric #include <ctype.h>
30fe6060f1SDimitry Andric 
31fe6060f1SDimitry Andric // The machine topology
32fe6060f1SDimitry Andric kmp_topology_t *__kmp_topology = nullptr;
33fe6060f1SDimitry Andric // KMP_HW_SUBSET environment variable
34fe6060f1SDimitry Andric kmp_hw_subset_t *__kmp_hw_subset = nullptr;
350b57cec5SDimitry Andric 
360b57cec5SDimitry Andric // Store the real or imagined machine hierarchy here
370b57cec5SDimitry Andric static hierarchy_info machine_hierarchy;
380b57cec5SDimitry Andric 
__kmp_cleanup_hierarchy()390b57cec5SDimitry Andric void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
400b57cec5SDimitry Andric 
415f757f3fSDimitry Andric #if KMP_AFFINITY_SUPPORTED
425f757f3fSDimitry Andric // Helper class to see if place lists further restrict the fullMask
435f757f3fSDimitry Andric class kmp_full_mask_modifier_t {
445f757f3fSDimitry Andric   kmp_affin_mask_t *mask;
455f757f3fSDimitry Andric 
465f757f3fSDimitry Andric public:
kmp_full_mask_modifier_t()475f757f3fSDimitry Andric   kmp_full_mask_modifier_t() {
485f757f3fSDimitry Andric     KMP_CPU_ALLOC(mask);
495f757f3fSDimitry Andric     KMP_CPU_ZERO(mask);
505f757f3fSDimitry Andric   }
~kmp_full_mask_modifier_t()515f757f3fSDimitry Andric   ~kmp_full_mask_modifier_t() {
525f757f3fSDimitry Andric     KMP_CPU_FREE(mask);
535f757f3fSDimitry Andric     mask = nullptr;
545f757f3fSDimitry Andric   }
include(const kmp_affin_mask_t * other)555f757f3fSDimitry Andric   void include(const kmp_affin_mask_t *other) { KMP_CPU_UNION(mask, other); }
565f757f3fSDimitry Andric   // If the new full mask is different from the current full mask,
575f757f3fSDimitry Andric   // then switch them. Returns true if full mask was affected, false otherwise.
restrict_to_mask()585f757f3fSDimitry Andric   bool restrict_to_mask() {
595f757f3fSDimitry Andric     // See if the new mask further restricts or changes the full mask
605f757f3fSDimitry Andric     if (KMP_CPU_EQUAL(__kmp_affin_fullMask, mask) || KMP_CPU_ISEMPTY(mask))
615f757f3fSDimitry Andric       return false;
625f757f3fSDimitry Andric     return __kmp_topology->restrict_to_mask(mask);
635f757f3fSDimitry Andric   }
645f757f3fSDimitry Andric };
655f757f3fSDimitry Andric 
665f757f3fSDimitry Andric static inline const char *
__kmp_get_affinity_env_var(const kmp_affinity_t & affinity,bool for_binding=false)675f757f3fSDimitry Andric __kmp_get_affinity_env_var(const kmp_affinity_t &affinity,
685f757f3fSDimitry Andric                            bool for_binding = false) {
695f757f3fSDimitry Andric   if (affinity.flags.omp_places) {
705f757f3fSDimitry Andric     if (for_binding)
715f757f3fSDimitry Andric       return "OMP_PROC_BIND";
725f757f3fSDimitry Andric     return "OMP_PLACES";
735f757f3fSDimitry Andric   }
745f757f3fSDimitry Andric   return affinity.env_var;
755f757f3fSDimitry Andric }
765f757f3fSDimitry Andric #endif // KMP_AFFINITY_SUPPORTED
775f757f3fSDimitry Andric 
__kmp_get_hierarchy(kmp_uint32 nproc,kmp_bstate_t * thr_bar)780b57cec5SDimitry Andric void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
790b57cec5SDimitry Andric   kmp_uint32 depth;
800b57cec5SDimitry Andric   // The test below is true if affinity is available, but set to "none". Need to
810b57cec5SDimitry Andric   // init on first use of hierarchical barrier.
820b57cec5SDimitry Andric   if (TCR_1(machine_hierarchy.uninitialized))
83fe6060f1SDimitry Andric     machine_hierarchy.init(nproc);
840b57cec5SDimitry Andric 
850b57cec5SDimitry Andric   // Adjust the hierarchy in case num threads exceeds original
860b57cec5SDimitry Andric   if (nproc > machine_hierarchy.base_num_threads)
870b57cec5SDimitry Andric     machine_hierarchy.resize(nproc);
880b57cec5SDimitry Andric 
890b57cec5SDimitry Andric   depth = machine_hierarchy.depth;
900b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(depth > 0);
910b57cec5SDimitry Andric 
920b57cec5SDimitry Andric   thr_bar->depth = depth;
93e8d8bef9SDimitry Andric   __kmp_type_convert(machine_hierarchy.numPerLevel[0] - 1,
94e8d8bef9SDimitry Andric                      &(thr_bar->base_leaf_kids));
950b57cec5SDimitry Andric   thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
960b57cec5SDimitry Andric }
970b57cec5SDimitry Andric 
98fe6060f1SDimitry Andric static int nCoresPerPkg, nPackages;
99fe6060f1SDimitry Andric static int __kmp_nThreadsPerCore;
100fe6060f1SDimitry Andric #ifndef KMP_DFLT_NTH_CORES
101fe6060f1SDimitry Andric static int __kmp_ncores;
102fe6060f1SDimitry Andric #endif
103fe6060f1SDimitry Andric 
__kmp_hw_get_catalog_string(kmp_hw_t type,bool plural)104fe6060f1SDimitry Andric const char *__kmp_hw_get_catalog_string(kmp_hw_t type, bool plural) {
105fe6060f1SDimitry Andric   switch (type) {
106fe6060f1SDimitry Andric   case KMP_HW_SOCKET:
107fe6060f1SDimitry Andric     return ((plural) ? KMP_I18N_STR(Sockets) : KMP_I18N_STR(Socket));
108fe6060f1SDimitry Andric   case KMP_HW_DIE:
109fe6060f1SDimitry Andric     return ((plural) ? KMP_I18N_STR(Dice) : KMP_I18N_STR(Die));
110fe6060f1SDimitry Andric   case KMP_HW_MODULE:
111fe6060f1SDimitry Andric     return ((plural) ? KMP_I18N_STR(Modules) : KMP_I18N_STR(Module));
112fe6060f1SDimitry Andric   case KMP_HW_TILE:
113fe6060f1SDimitry Andric     return ((plural) ? KMP_I18N_STR(Tiles) : KMP_I18N_STR(Tile));
114fe6060f1SDimitry Andric   case KMP_HW_NUMA:
115fe6060f1SDimitry Andric     return ((plural) ? KMP_I18N_STR(NumaDomains) : KMP_I18N_STR(NumaDomain));
116fe6060f1SDimitry Andric   case KMP_HW_L3:
117fe6060f1SDimitry Andric     return ((plural) ? KMP_I18N_STR(L3Caches) : KMP_I18N_STR(L3Cache));
118fe6060f1SDimitry Andric   case KMP_HW_L2:
119fe6060f1SDimitry Andric     return ((plural) ? KMP_I18N_STR(L2Caches) : KMP_I18N_STR(L2Cache));
120fe6060f1SDimitry Andric   case KMP_HW_L1:
121fe6060f1SDimitry Andric     return ((plural) ? KMP_I18N_STR(L1Caches) : KMP_I18N_STR(L1Cache));
122fe6060f1SDimitry Andric   case KMP_HW_LLC:
123fe6060f1SDimitry Andric     return ((plural) ? KMP_I18N_STR(LLCaches) : KMP_I18N_STR(LLCache));
124fe6060f1SDimitry Andric   case KMP_HW_CORE:
125fe6060f1SDimitry Andric     return ((plural) ? KMP_I18N_STR(Cores) : KMP_I18N_STR(Core));
126fe6060f1SDimitry Andric   case KMP_HW_THREAD:
127fe6060f1SDimitry Andric     return ((plural) ? KMP_I18N_STR(Threads) : KMP_I18N_STR(Thread));
128fe6060f1SDimitry Andric   case KMP_HW_PROC_GROUP:
129fe6060f1SDimitry Andric     return ((plural) ? KMP_I18N_STR(ProcGroups) : KMP_I18N_STR(ProcGroup));
1307a6dacacSDimitry Andric   case KMP_HW_UNKNOWN:
1317a6dacacSDimitry Andric   case KMP_HW_LAST:
132fe6060f1SDimitry Andric     return KMP_I18N_STR(Unknown);
133fe6060f1SDimitry Andric   }
1347a6dacacSDimitry Andric   KMP_ASSERT2(false, "Unhandled kmp_hw_t enumeration");
1357a6dacacSDimitry Andric   KMP_BUILTIN_UNREACHABLE;
1367a6dacacSDimitry Andric }
137fe6060f1SDimitry Andric 
__kmp_hw_get_keyword(kmp_hw_t type,bool plural)138fe6060f1SDimitry Andric const char *__kmp_hw_get_keyword(kmp_hw_t type, bool plural) {
139fe6060f1SDimitry Andric   switch (type) {
140fe6060f1SDimitry Andric   case KMP_HW_SOCKET:
141fe6060f1SDimitry Andric     return ((plural) ? "sockets" : "socket");
142fe6060f1SDimitry Andric   case KMP_HW_DIE:
143fe6060f1SDimitry Andric     return ((plural) ? "dice" : "die");
144fe6060f1SDimitry Andric   case KMP_HW_MODULE:
145fe6060f1SDimitry Andric     return ((plural) ? "modules" : "module");
146fe6060f1SDimitry Andric   case KMP_HW_TILE:
147fe6060f1SDimitry Andric     return ((plural) ? "tiles" : "tile");
148fe6060f1SDimitry Andric   case KMP_HW_NUMA:
149fe6060f1SDimitry Andric     return ((plural) ? "numa_domains" : "numa_domain");
150fe6060f1SDimitry Andric   case KMP_HW_L3:
151fe6060f1SDimitry Andric     return ((plural) ? "l3_caches" : "l3_cache");
152fe6060f1SDimitry Andric   case KMP_HW_L2:
153fe6060f1SDimitry Andric     return ((plural) ? "l2_caches" : "l2_cache");
154fe6060f1SDimitry Andric   case KMP_HW_L1:
155fe6060f1SDimitry Andric     return ((plural) ? "l1_caches" : "l1_cache");
156fe6060f1SDimitry Andric   case KMP_HW_LLC:
157fe6060f1SDimitry Andric     return ((plural) ? "ll_caches" : "ll_cache");
158fe6060f1SDimitry Andric   case KMP_HW_CORE:
159fe6060f1SDimitry Andric     return ((plural) ? "cores" : "core");
160fe6060f1SDimitry Andric   case KMP_HW_THREAD:
161fe6060f1SDimitry Andric     return ((plural) ? "threads" : "thread");
162fe6060f1SDimitry Andric   case KMP_HW_PROC_GROUP:
163fe6060f1SDimitry Andric     return ((plural) ? "proc_groups" : "proc_group");
1647a6dacacSDimitry Andric   case KMP_HW_UNKNOWN:
1657a6dacacSDimitry Andric   case KMP_HW_LAST:
166fe6060f1SDimitry Andric     return ((plural) ? "unknowns" : "unknown");
167fe6060f1SDimitry Andric   }
1687a6dacacSDimitry Andric   KMP_ASSERT2(false, "Unhandled kmp_hw_t enumeration");
1697a6dacacSDimitry Andric   KMP_BUILTIN_UNREACHABLE;
1707a6dacacSDimitry Andric }
171fe6060f1SDimitry Andric 
__kmp_hw_get_core_type_string(kmp_hw_core_type_t type)172349cc55cSDimitry Andric const char *__kmp_hw_get_core_type_string(kmp_hw_core_type_t type) {
173349cc55cSDimitry Andric   switch (type) {
174349cc55cSDimitry Andric   case KMP_HW_CORE_TYPE_UNKNOWN:
1757a6dacacSDimitry Andric   case KMP_HW_MAX_NUM_CORE_TYPES:
176349cc55cSDimitry Andric     return "unknown";
177349cc55cSDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
178349cc55cSDimitry Andric   case KMP_HW_CORE_TYPE_ATOM:
179349cc55cSDimitry Andric     return "Intel Atom(R) processor";
180349cc55cSDimitry Andric   case KMP_HW_CORE_TYPE_CORE:
181349cc55cSDimitry Andric     return "Intel(R) Core(TM) processor";
182349cc55cSDimitry Andric #endif
183349cc55cSDimitry Andric   }
1847a6dacacSDimitry Andric   KMP_ASSERT2(false, "Unhandled kmp_hw_core_type_t enumeration");
1857a6dacacSDimitry Andric   KMP_BUILTIN_UNREACHABLE;
186349cc55cSDimitry Andric }
187349cc55cSDimitry Andric 
188fcaf7f86SDimitry Andric #if KMP_AFFINITY_SUPPORTED
189fcaf7f86SDimitry Andric // If affinity is supported, check the affinity
190fcaf7f86SDimitry Andric // verbose and warning flags before printing warning
191bdd1243dSDimitry Andric #define KMP_AFF_WARNING(s, ...)                                                \
192bdd1243dSDimitry Andric   if (s.flags.verbose || (s.flags.warnings && (s.type != affinity_none))) {    \
193fcaf7f86SDimitry Andric     KMP_WARNING(__VA_ARGS__);                                                  \
194fcaf7f86SDimitry Andric   }
195fcaf7f86SDimitry Andric #else
196bdd1243dSDimitry Andric #define KMP_AFF_WARNING(s, ...) KMP_WARNING(__VA_ARGS__)
197fcaf7f86SDimitry Andric #endif
198fcaf7f86SDimitry Andric 
199fe6060f1SDimitry Andric ////////////////////////////////////////////////////////////////////////////////
200fe6060f1SDimitry Andric // kmp_hw_thread_t methods
compare_ids(const void * a,const void * b)201fe6060f1SDimitry Andric int kmp_hw_thread_t::compare_ids(const void *a, const void *b) {
202fe6060f1SDimitry Andric   const kmp_hw_thread_t *ahwthread = (const kmp_hw_thread_t *)a;
203fe6060f1SDimitry Andric   const kmp_hw_thread_t *bhwthread = (const kmp_hw_thread_t *)b;
204fe6060f1SDimitry Andric   int depth = __kmp_topology->get_depth();
205fe6060f1SDimitry Andric   for (int level = 0; level < depth; ++level) {
206fe6060f1SDimitry Andric     if (ahwthread->ids[level] < bhwthread->ids[level])
207fe6060f1SDimitry Andric       return -1;
208fe6060f1SDimitry Andric     else if (ahwthread->ids[level] > bhwthread->ids[level])
209fe6060f1SDimitry Andric       return 1;
210fe6060f1SDimitry Andric   }
211fe6060f1SDimitry Andric   if (ahwthread->os_id < bhwthread->os_id)
212fe6060f1SDimitry Andric     return -1;
213fe6060f1SDimitry Andric   else if (ahwthread->os_id > bhwthread->os_id)
214fe6060f1SDimitry Andric     return 1;
215fe6060f1SDimitry Andric   return 0;
216fe6060f1SDimitry Andric }
217fe6060f1SDimitry Andric 
2180b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
compare_compact(const void * a,const void * b)219fe6060f1SDimitry Andric int kmp_hw_thread_t::compare_compact(const void *a, const void *b) {
220fe6060f1SDimitry Andric   int i;
221fe6060f1SDimitry Andric   const kmp_hw_thread_t *aa = (const kmp_hw_thread_t *)a;
222fe6060f1SDimitry Andric   const kmp_hw_thread_t *bb = (const kmp_hw_thread_t *)b;
223fe6060f1SDimitry Andric   int depth = __kmp_topology->get_depth();
224bdd1243dSDimitry Andric   int compact = __kmp_topology->compact;
225bdd1243dSDimitry Andric   KMP_DEBUG_ASSERT(compact >= 0);
226bdd1243dSDimitry Andric   KMP_DEBUG_ASSERT(compact <= depth);
227bdd1243dSDimitry Andric   for (i = 0; i < compact; i++) {
228fe6060f1SDimitry Andric     int j = depth - i - 1;
229fe6060f1SDimitry Andric     if (aa->sub_ids[j] < bb->sub_ids[j])
230fe6060f1SDimitry Andric       return -1;
231fe6060f1SDimitry Andric     if (aa->sub_ids[j] > bb->sub_ids[j])
232fe6060f1SDimitry Andric       return 1;
233fe6060f1SDimitry Andric   }
234fe6060f1SDimitry Andric   for (; i < depth; i++) {
235bdd1243dSDimitry Andric     int j = i - compact;
236fe6060f1SDimitry Andric     if (aa->sub_ids[j] < bb->sub_ids[j])
237fe6060f1SDimitry Andric       return -1;
238fe6060f1SDimitry Andric     if (aa->sub_ids[j] > bb->sub_ids[j])
239fe6060f1SDimitry Andric       return 1;
240fe6060f1SDimitry Andric   }
241fe6060f1SDimitry Andric   return 0;
242fe6060f1SDimitry Andric }
243fe6060f1SDimitry Andric #endif
244fe6060f1SDimitry Andric 
print() const245fe6060f1SDimitry Andric void kmp_hw_thread_t::print() const {
246fe6060f1SDimitry Andric   int depth = __kmp_topology->get_depth();
247fe6060f1SDimitry Andric   printf("%4d ", os_id);
248fe6060f1SDimitry Andric   for (int i = 0; i < depth; ++i) {
249fe6060f1SDimitry Andric     printf("%4d ", ids[i]);
250fe6060f1SDimitry Andric   }
2510eae32dcSDimitry Andric   if (attrs) {
2520eae32dcSDimitry Andric     if (attrs.is_core_type_valid())
2530eae32dcSDimitry Andric       printf(" (%s)", __kmp_hw_get_core_type_string(attrs.get_core_type()));
2540eae32dcSDimitry Andric     if (attrs.is_core_eff_valid())
2550eae32dcSDimitry Andric       printf(" (eff=%d)", attrs.get_core_eff());
256349cc55cSDimitry Andric   }
2575f757f3fSDimitry Andric   if (leader)
2585f757f3fSDimitry Andric     printf(" (leader)");
259fe6060f1SDimitry Andric   printf("\n");
260fe6060f1SDimitry Andric }
261fe6060f1SDimitry Andric 
262fe6060f1SDimitry Andric ////////////////////////////////////////////////////////////////////////////////
263fe6060f1SDimitry Andric // kmp_topology_t methods
264fe6060f1SDimitry Andric 
265349cc55cSDimitry Andric // Add a layer to the topology based on the ids. Assume the topology
266349cc55cSDimitry Andric // is perfectly nested (i.e., so no object has more than one parent)
_insert_layer(kmp_hw_t type,const int * ids)267349cc55cSDimitry Andric void kmp_topology_t::_insert_layer(kmp_hw_t type, const int *ids) {
268349cc55cSDimitry Andric   // Figure out where the layer should go by comparing the ids of the current
269349cc55cSDimitry Andric   // layers with the new ids
270349cc55cSDimitry Andric   int target_layer;
271349cc55cSDimitry Andric   int previous_id = kmp_hw_thread_t::UNKNOWN_ID;
272349cc55cSDimitry Andric   int previous_new_id = kmp_hw_thread_t::UNKNOWN_ID;
273349cc55cSDimitry Andric 
274349cc55cSDimitry Andric   // Start from the highest layer and work down to find target layer
275349cc55cSDimitry Andric   // If new layer is equal to another layer then put the new layer above
276349cc55cSDimitry Andric   for (target_layer = 0; target_layer < depth; ++target_layer) {
277349cc55cSDimitry Andric     bool layers_equal = true;
278349cc55cSDimitry Andric     bool strictly_above_target_layer = false;
279349cc55cSDimitry Andric     for (int i = 0; i < num_hw_threads; ++i) {
280349cc55cSDimitry Andric       int id = hw_threads[i].ids[target_layer];
281349cc55cSDimitry Andric       int new_id = ids[i];
282349cc55cSDimitry Andric       if (id != previous_id && new_id == previous_new_id) {
283349cc55cSDimitry Andric         // Found the layer we are strictly above
284349cc55cSDimitry Andric         strictly_above_target_layer = true;
285349cc55cSDimitry Andric         layers_equal = false;
286349cc55cSDimitry Andric         break;
287349cc55cSDimitry Andric       } else if (id == previous_id && new_id != previous_new_id) {
288349cc55cSDimitry Andric         // Found a layer we are below. Move to next layer and check.
289349cc55cSDimitry Andric         layers_equal = false;
290349cc55cSDimitry Andric         break;
291349cc55cSDimitry Andric       }
292349cc55cSDimitry Andric       previous_id = id;
293349cc55cSDimitry Andric       previous_new_id = new_id;
294349cc55cSDimitry Andric     }
295349cc55cSDimitry Andric     if (strictly_above_target_layer || layers_equal)
296349cc55cSDimitry Andric       break;
297349cc55cSDimitry Andric   }
298349cc55cSDimitry Andric 
299349cc55cSDimitry Andric   // Found the layer we are above. Now move everything to accommodate the new
300349cc55cSDimitry Andric   // layer. And put the new ids and type into the topology.
301349cc55cSDimitry Andric   for (int i = depth - 1, j = depth; i >= target_layer; --i, --j)
302349cc55cSDimitry Andric     types[j] = types[i];
303349cc55cSDimitry Andric   types[target_layer] = type;
304349cc55cSDimitry Andric   for (int k = 0; k < num_hw_threads; ++k) {
305349cc55cSDimitry Andric     for (int i = depth - 1, j = depth; i >= target_layer; --i, --j)
306349cc55cSDimitry Andric       hw_threads[k].ids[j] = hw_threads[k].ids[i];
307349cc55cSDimitry Andric     hw_threads[k].ids[target_layer] = ids[k];
308349cc55cSDimitry Andric   }
309349cc55cSDimitry Andric   equivalent[type] = type;
310349cc55cSDimitry Andric   depth++;
311349cc55cSDimitry Andric }
312349cc55cSDimitry Andric 
313349cc55cSDimitry Andric #if KMP_GROUP_AFFINITY
314349cc55cSDimitry Andric // Insert the Windows Processor Group structure into the topology
_insert_windows_proc_groups()315349cc55cSDimitry Andric void kmp_topology_t::_insert_windows_proc_groups() {
316349cc55cSDimitry Andric   // Do not insert the processor group structure for a single group
317349cc55cSDimitry Andric   if (__kmp_num_proc_groups == 1)
318349cc55cSDimitry Andric     return;
319349cc55cSDimitry Andric   kmp_affin_mask_t *mask;
320349cc55cSDimitry Andric   int *ids = (int *)__kmp_allocate(sizeof(int) * num_hw_threads);
321349cc55cSDimitry Andric   KMP_CPU_ALLOC(mask);
322349cc55cSDimitry Andric   for (int i = 0; i < num_hw_threads; ++i) {
323349cc55cSDimitry Andric     KMP_CPU_ZERO(mask);
324349cc55cSDimitry Andric     KMP_CPU_SET(hw_threads[i].os_id, mask);
325349cc55cSDimitry Andric     ids[i] = __kmp_get_proc_group(mask);
326349cc55cSDimitry Andric   }
327349cc55cSDimitry Andric   KMP_CPU_FREE(mask);
328349cc55cSDimitry Andric   _insert_layer(KMP_HW_PROC_GROUP, ids);
329349cc55cSDimitry Andric   __kmp_free(ids);
330*0fca6ea1SDimitry Andric 
331*0fca6ea1SDimitry Andric   // sort topology after adding proc groups
332*0fca6ea1SDimitry Andric   __kmp_topology->sort_ids();
333349cc55cSDimitry Andric }
334349cc55cSDimitry Andric #endif
335349cc55cSDimitry Andric 
336fe6060f1SDimitry Andric // Remove layers that don't add information to the topology.
337fe6060f1SDimitry Andric // This is done by having the layer take on the id = UNKNOWN_ID (-1)
_remove_radix1_layers()338fe6060f1SDimitry Andric void kmp_topology_t::_remove_radix1_layers() {
339fe6060f1SDimitry Andric   int preference[KMP_HW_LAST];
340fe6060f1SDimitry Andric   int top_index1, top_index2;
341fe6060f1SDimitry Andric   // Set up preference associative array
342349cc55cSDimitry Andric   preference[KMP_HW_SOCKET] = 110;
343349cc55cSDimitry Andric   preference[KMP_HW_PROC_GROUP] = 100;
344fe6060f1SDimitry Andric   preference[KMP_HW_CORE] = 95;
345fe6060f1SDimitry Andric   preference[KMP_HW_THREAD] = 90;
346fe6060f1SDimitry Andric   preference[KMP_HW_NUMA] = 85;
347fe6060f1SDimitry Andric   preference[KMP_HW_DIE] = 80;
348fe6060f1SDimitry Andric   preference[KMP_HW_TILE] = 75;
349fe6060f1SDimitry Andric   preference[KMP_HW_MODULE] = 73;
350fe6060f1SDimitry Andric   preference[KMP_HW_L3] = 70;
351fe6060f1SDimitry Andric   preference[KMP_HW_L2] = 65;
352fe6060f1SDimitry Andric   preference[KMP_HW_L1] = 60;
353fe6060f1SDimitry Andric   preference[KMP_HW_LLC] = 5;
354fe6060f1SDimitry Andric   top_index1 = 0;
355fe6060f1SDimitry Andric   top_index2 = 1;
356fe6060f1SDimitry Andric   while (top_index1 < depth - 1 && top_index2 < depth) {
357fe6060f1SDimitry Andric     kmp_hw_t type1 = types[top_index1];
358fe6060f1SDimitry Andric     kmp_hw_t type2 = types[top_index2];
359fe6060f1SDimitry Andric     KMP_ASSERT_VALID_HW_TYPE(type1);
360fe6060f1SDimitry Andric     KMP_ASSERT_VALID_HW_TYPE(type2);
361fe6060f1SDimitry Andric     // Do not allow the three main topology levels (sockets, cores, threads) to
362fe6060f1SDimitry Andric     // be compacted down
363fe6060f1SDimitry Andric     if ((type1 == KMP_HW_THREAD || type1 == KMP_HW_CORE ||
364fe6060f1SDimitry Andric          type1 == KMP_HW_SOCKET) &&
365fe6060f1SDimitry Andric         (type2 == KMP_HW_THREAD || type2 == KMP_HW_CORE ||
366fe6060f1SDimitry Andric          type2 == KMP_HW_SOCKET)) {
367fe6060f1SDimitry Andric       top_index1 = top_index2++;
368fe6060f1SDimitry Andric       continue;
369fe6060f1SDimitry Andric     }
370fe6060f1SDimitry Andric     bool radix1 = true;
371fe6060f1SDimitry Andric     bool all_same = true;
372fe6060f1SDimitry Andric     int id1 = hw_threads[0].ids[top_index1];
373fe6060f1SDimitry Andric     int id2 = hw_threads[0].ids[top_index2];
374fe6060f1SDimitry Andric     int pref1 = preference[type1];
375fe6060f1SDimitry Andric     int pref2 = preference[type2];
376fe6060f1SDimitry Andric     for (int hwidx = 1; hwidx < num_hw_threads; ++hwidx) {
377fe6060f1SDimitry Andric       if (hw_threads[hwidx].ids[top_index1] == id1 &&
378fe6060f1SDimitry Andric           hw_threads[hwidx].ids[top_index2] != id2) {
379fe6060f1SDimitry Andric         radix1 = false;
380fe6060f1SDimitry Andric         break;
381fe6060f1SDimitry Andric       }
382fe6060f1SDimitry Andric       if (hw_threads[hwidx].ids[top_index2] != id2)
383fe6060f1SDimitry Andric         all_same = false;
384fe6060f1SDimitry Andric       id1 = hw_threads[hwidx].ids[top_index1];
385fe6060f1SDimitry Andric       id2 = hw_threads[hwidx].ids[top_index2];
386fe6060f1SDimitry Andric     }
387fe6060f1SDimitry Andric     if (radix1) {
388fe6060f1SDimitry Andric       // Select the layer to remove based on preference
389fe6060f1SDimitry Andric       kmp_hw_t remove_type, keep_type;
390fe6060f1SDimitry Andric       int remove_layer, remove_layer_ids;
391fe6060f1SDimitry Andric       if (pref1 > pref2) {
392fe6060f1SDimitry Andric         remove_type = type2;
393fe6060f1SDimitry Andric         remove_layer = remove_layer_ids = top_index2;
394fe6060f1SDimitry Andric         keep_type = type1;
395fe6060f1SDimitry Andric       } else {
396fe6060f1SDimitry Andric         remove_type = type1;
397fe6060f1SDimitry Andric         remove_layer = remove_layer_ids = top_index1;
398fe6060f1SDimitry Andric         keep_type = type2;
399fe6060f1SDimitry Andric       }
400fe6060f1SDimitry Andric       // If all the indexes for the second (deeper) layer are the same.
401fe6060f1SDimitry Andric       // e.g., all are zero, then make sure to keep the first layer's ids
402fe6060f1SDimitry Andric       if (all_same)
403fe6060f1SDimitry Andric         remove_layer_ids = top_index2;
404fe6060f1SDimitry Andric       // Remove radix one type by setting the equivalence, removing the id from
405fe6060f1SDimitry Andric       // the hw threads and removing the layer from types and depth
406fe6060f1SDimitry Andric       set_equivalent_type(remove_type, keep_type);
407fe6060f1SDimitry Andric       for (int idx = 0; idx < num_hw_threads; ++idx) {
408fe6060f1SDimitry Andric         kmp_hw_thread_t &hw_thread = hw_threads[idx];
409fe6060f1SDimitry Andric         for (int d = remove_layer_ids; d < depth - 1; ++d)
410fe6060f1SDimitry Andric           hw_thread.ids[d] = hw_thread.ids[d + 1];
411fe6060f1SDimitry Andric       }
412fe6060f1SDimitry Andric       for (int idx = remove_layer; idx < depth - 1; ++idx)
413fe6060f1SDimitry Andric         types[idx] = types[idx + 1];
414fe6060f1SDimitry Andric       depth--;
415fe6060f1SDimitry Andric     } else {
416fe6060f1SDimitry Andric       top_index1 = top_index2++;
417fe6060f1SDimitry Andric     }
418fe6060f1SDimitry Andric   }
419fe6060f1SDimitry Andric   KMP_ASSERT(depth > 0);
420fe6060f1SDimitry Andric }
421fe6060f1SDimitry Andric 
_set_last_level_cache()422fe6060f1SDimitry Andric void kmp_topology_t::_set_last_level_cache() {
423fe6060f1SDimitry Andric   if (get_equivalent_type(KMP_HW_L3) != KMP_HW_UNKNOWN)
424fe6060f1SDimitry Andric     set_equivalent_type(KMP_HW_LLC, KMP_HW_L3);
425fe6060f1SDimitry Andric   else if (get_equivalent_type(KMP_HW_L2) != KMP_HW_UNKNOWN)
426fe6060f1SDimitry Andric     set_equivalent_type(KMP_HW_LLC, KMP_HW_L2);
427fe6060f1SDimitry Andric #if KMP_MIC_SUPPORTED
428fe6060f1SDimitry Andric   else if (__kmp_mic_type == mic3) {
429fe6060f1SDimitry Andric     if (get_equivalent_type(KMP_HW_L2) != KMP_HW_UNKNOWN)
430fe6060f1SDimitry Andric       set_equivalent_type(KMP_HW_LLC, KMP_HW_L2);
431fe6060f1SDimitry Andric     else if (get_equivalent_type(KMP_HW_TILE) != KMP_HW_UNKNOWN)
432fe6060f1SDimitry Andric       set_equivalent_type(KMP_HW_LLC, KMP_HW_TILE);
433fe6060f1SDimitry Andric     // L2/Tile wasn't detected so just say L1
434fe6060f1SDimitry Andric     else
435fe6060f1SDimitry Andric       set_equivalent_type(KMP_HW_LLC, KMP_HW_L1);
436fe6060f1SDimitry Andric   }
437fe6060f1SDimitry Andric #endif
438fe6060f1SDimitry Andric   else if (get_equivalent_type(KMP_HW_L1) != KMP_HW_UNKNOWN)
439fe6060f1SDimitry Andric     set_equivalent_type(KMP_HW_LLC, KMP_HW_L1);
440fe6060f1SDimitry Andric   // Fallback is to set last level cache to socket or core
441fe6060f1SDimitry Andric   if (get_equivalent_type(KMP_HW_LLC) == KMP_HW_UNKNOWN) {
442fe6060f1SDimitry Andric     if (get_equivalent_type(KMP_HW_SOCKET) != KMP_HW_UNKNOWN)
443fe6060f1SDimitry Andric       set_equivalent_type(KMP_HW_LLC, KMP_HW_SOCKET);
444fe6060f1SDimitry Andric     else if (get_equivalent_type(KMP_HW_CORE) != KMP_HW_UNKNOWN)
445fe6060f1SDimitry Andric       set_equivalent_type(KMP_HW_LLC, KMP_HW_CORE);
446fe6060f1SDimitry Andric   }
447fe6060f1SDimitry Andric   KMP_ASSERT(get_equivalent_type(KMP_HW_LLC) != KMP_HW_UNKNOWN);
448fe6060f1SDimitry Andric }
449fe6060f1SDimitry Andric 
450fe6060f1SDimitry Andric // Gather the count of each topology layer and the ratio
_gather_enumeration_information()451fe6060f1SDimitry Andric void kmp_topology_t::_gather_enumeration_information() {
452fe6060f1SDimitry Andric   int previous_id[KMP_HW_LAST];
453fe6060f1SDimitry Andric   int max[KMP_HW_LAST];
454fe6060f1SDimitry Andric 
455fe6060f1SDimitry Andric   for (int i = 0; i < depth; ++i) {
456fe6060f1SDimitry Andric     previous_id[i] = kmp_hw_thread_t::UNKNOWN_ID;
457fe6060f1SDimitry Andric     max[i] = 0;
458fe6060f1SDimitry Andric     count[i] = 0;
459fe6060f1SDimitry Andric     ratio[i] = 0;
460fe6060f1SDimitry Andric   }
461349cc55cSDimitry Andric   int core_level = get_level(KMP_HW_CORE);
462fe6060f1SDimitry Andric   for (int i = 0; i < num_hw_threads; ++i) {
463fe6060f1SDimitry Andric     kmp_hw_thread_t &hw_thread = hw_threads[i];
464fe6060f1SDimitry Andric     for (int layer = 0; layer < depth; ++layer) {
465fe6060f1SDimitry Andric       int id = hw_thread.ids[layer];
466fe6060f1SDimitry Andric       if (id != previous_id[layer]) {
467fe6060f1SDimitry Andric         // Add an additional increment to each count
468fe6060f1SDimitry Andric         for (int l = layer; l < depth; ++l)
469fe6060f1SDimitry Andric           count[l]++;
470fe6060f1SDimitry Andric         // Keep track of topology layer ratio statistics
471fe6060f1SDimitry Andric         max[layer]++;
472fe6060f1SDimitry Andric         for (int l = layer + 1; l < depth; ++l) {
473fe6060f1SDimitry Andric           if (max[l] > ratio[l])
474fe6060f1SDimitry Andric             ratio[l] = max[l];
475fe6060f1SDimitry Andric           max[l] = 1;
476fe6060f1SDimitry Andric         }
4770eae32dcSDimitry Andric         // Figure out the number of different core types
4780eae32dcSDimitry Andric         // and efficiencies for hybrid CPUs
4790eae32dcSDimitry Andric         if (__kmp_is_hybrid_cpu() && core_level >= 0 && layer <= core_level) {
4800eae32dcSDimitry Andric           if (hw_thread.attrs.is_core_eff_valid() &&
4810eae32dcSDimitry Andric               hw_thread.attrs.core_eff >= num_core_efficiencies) {
4820eae32dcSDimitry Andric             // Because efficiencies can range from 0 to max efficiency - 1,
4830eae32dcSDimitry Andric             // the number of efficiencies is max efficiency + 1
4840eae32dcSDimitry Andric             num_core_efficiencies = hw_thread.attrs.core_eff + 1;
4850eae32dcSDimitry Andric           }
4860eae32dcSDimitry Andric           if (hw_thread.attrs.is_core_type_valid()) {
4870eae32dcSDimitry Andric             bool found = false;
4880eae32dcSDimitry Andric             for (int j = 0; j < num_core_types; ++j) {
4890eae32dcSDimitry Andric               if (hw_thread.attrs.get_core_type() == core_types[j]) {
4900eae32dcSDimitry Andric                 found = true;
4910eae32dcSDimitry Andric                 break;
4920eae32dcSDimitry Andric               }
4930eae32dcSDimitry Andric             }
4940eae32dcSDimitry Andric             if (!found) {
4950eae32dcSDimitry Andric               KMP_ASSERT(num_core_types < KMP_HW_MAX_NUM_CORE_TYPES);
4960eae32dcSDimitry Andric               core_types[num_core_types++] = hw_thread.attrs.get_core_type();
4970eae32dcSDimitry Andric             }
4980eae32dcSDimitry Andric           }
4990eae32dcSDimitry Andric         }
500fe6060f1SDimitry Andric         break;
501fe6060f1SDimitry Andric       }
502fe6060f1SDimitry Andric     }
503fe6060f1SDimitry Andric     for (int layer = 0; layer < depth; ++layer) {
504fe6060f1SDimitry Andric       previous_id[layer] = hw_thread.ids[layer];
505fe6060f1SDimitry Andric     }
506fe6060f1SDimitry Andric   }
507fe6060f1SDimitry Andric   for (int layer = 0; layer < depth; ++layer) {
508fe6060f1SDimitry Andric     if (max[layer] > ratio[layer])
509fe6060f1SDimitry Andric       ratio[layer] = max[layer];
510fe6060f1SDimitry Andric   }
511fe6060f1SDimitry Andric }
512fe6060f1SDimitry Andric 
_get_ncores_with_attr(const kmp_hw_attr_t & attr,int above_level,bool find_all) const5130eae32dcSDimitry Andric int kmp_topology_t::_get_ncores_with_attr(const kmp_hw_attr_t &attr,
5140eae32dcSDimitry Andric                                           int above_level,
5150eae32dcSDimitry Andric                                           bool find_all) const {
5160eae32dcSDimitry Andric   int current, current_max;
5170eae32dcSDimitry Andric   int previous_id[KMP_HW_LAST];
5180eae32dcSDimitry Andric   for (int i = 0; i < depth; ++i)
5190eae32dcSDimitry Andric     previous_id[i] = kmp_hw_thread_t::UNKNOWN_ID;
5200eae32dcSDimitry Andric   int core_level = get_level(KMP_HW_CORE);
5210eae32dcSDimitry Andric   if (find_all)
5220eae32dcSDimitry Andric     above_level = -1;
5230eae32dcSDimitry Andric   KMP_ASSERT(above_level < core_level);
5240eae32dcSDimitry Andric   current_max = 0;
5250eae32dcSDimitry Andric   current = 0;
5260eae32dcSDimitry Andric   for (int i = 0; i < num_hw_threads; ++i) {
5270eae32dcSDimitry Andric     kmp_hw_thread_t &hw_thread = hw_threads[i];
5280eae32dcSDimitry Andric     if (!find_all && hw_thread.ids[above_level] != previous_id[above_level]) {
5290eae32dcSDimitry Andric       if (current > current_max)
5300eae32dcSDimitry Andric         current_max = current;
5310eae32dcSDimitry Andric       current = hw_thread.attrs.contains(attr);
5320eae32dcSDimitry Andric     } else {
5330eae32dcSDimitry Andric       for (int level = above_level + 1; level <= core_level; ++level) {
5340eae32dcSDimitry Andric         if (hw_thread.ids[level] != previous_id[level]) {
5350eae32dcSDimitry Andric           if (hw_thread.attrs.contains(attr))
5360eae32dcSDimitry Andric             current++;
5370eae32dcSDimitry Andric           break;
5380eae32dcSDimitry Andric         }
5390eae32dcSDimitry Andric       }
5400eae32dcSDimitry Andric     }
5410eae32dcSDimitry Andric     for (int level = 0; level < depth; ++level)
5420eae32dcSDimitry Andric       previous_id[level] = hw_thread.ids[level];
5430eae32dcSDimitry Andric   }
5440eae32dcSDimitry Andric   if (current > current_max)
5450eae32dcSDimitry Andric     current_max = current;
5460eae32dcSDimitry Andric   return current_max;
5470eae32dcSDimitry Andric }
5480eae32dcSDimitry Andric 
549fe6060f1SDimitry Andric // Find out if the topology is uniform
_discover_uniformity()550fe6060f1SDimitry Andric void kmp_topology_t::_discover_uniformity() {
551fe6060f1SDimitry Andric   int num = 1;
552fe6060f1SDimitry Andric   for (int level = 0; level < depth; ++level)
553fe6060f1SDimitry Andric     num *= ratio[level];
554fe6060f1SDimitry Andric   flags.uniform = (num == count[depth - 1]);
555fe6060f1SDimitry Andric }
556fe6060f1SDimitry Andric 
557fe6060f1SDimitry Andric // Set all the sub_ids for each hardware thread
_set_sub_ids()558fe6060f1SDimitry Andric void kmp_topology_t::_set_sub_ids() {
559fe6060f1SDimitry Andric   int previous_id[KMP_HW_LAST];
560fe6060f1SDimitry Andric   int sub_id[KMP_HW_LAST];
561fe6060f1SDimitry Andric 
562fe6060f1SDimitry Andric   for (int i = 0; i < depth; ++i) {
563fe6060f1SDimitry Andric     previous_id[i] = -1;
564fe6060f1SDimitry Andric     sub_id[i] = -1;
565fe6060f1SDimitry Andric   }
566fe6060f1SDimitry Andric   for (int i = 0; i < num_hw_threads; ++i) {
567fe6060f1SDimitry Andric     kmp_hw_thread_t &hw_thread = hw_threads[i];
568fe6060f1SDimitry Andric     // Setup the sub_id
569fe6060f1SDimitry Andric     for (int j = 0; j < depth; ++j) {
570fe6060f1SDimitry Andric       if (hw_thread.ids[j] != previous_id[j]) {
571fe6060f1SDimitry Andric         sub_id[j]++;
572fe6060f1SDimitry Andric         for (int k = j + 1; k < depth; ++k) {
573fe6060f1SDimitry Andric           sub_id[k] = 0;
574fe6060f1SDimitry Andric         }
575fe6060f1SDimitry Andric         break;
576fe6060f1SDimitry Andric       }
577fe6060f1SDimitry Andric     }
578fe6060f1SDimitry Andric     // Set previous_id
579fe6060f1SDimitry Andric     for (int j = 0; j < depth; ++j) {
580fe6060f1SDimitry Andric       previous_id[j] = hw_thread.ids[j];
581fe6060f1SDimitry Andric     }
582fe6060f1SDimitry Andric     // Set the sub_ids field
583fe6060f1SDimitry Andric     for (int j = 0; j < depth; ++j) {
584fe6060f1SDimitry Andric       hw_thread.sub_ids[j] = sub_id[j];
585fe6060f1SDimitry Andric     }
586fe6060f1SDimitry Andric   }
587fe6060f1SDimitry Andric }
588fe6060f1SDimitry Andric 
_set_globals()589fe6060f1SDimitry Andric void kmp_topology_t::_set_globals() {
590fe6060f1SDimitry Andric   // Set nCoresPerPkg, nPackages, __kmp_nThreadsPerCore, __kmp_ncores
591fe6060f1SDimitry Andric   int core_level, thread_level, package_level;
592fe6060f1SDimitry Andric   package_level = get_level(KMP_HW_SOCKET);
593fe6060f1SDimitry Andric #if KMP_GROUP_AFFINITY
594fe6060f1SDimitry Andric   if (package_level == -1)
595fe6060f1SDimitry Andric     package_level = get_level(KMP_HW_PROC_GROUP);
596fe6060f1SDimitry Andric #endif
597fe6060f1SDimitry Andric   core_level = get_level(KMP_HW_CORE);
598fe6060f1SDimitry Andric   thread_level = get_level(KMP_HW_THREAD);
599fe6060f1SDimitry Andric 
600fe6060f1SDimitry Andric   KMP_ASSERT(core_level != -1);
601fe6060f1SDimitry Andric   KMP_ASSERT(thread_level != -1);
602fe6060f1SDimitry Andric 
603fe6060f1SDimitry Andric   __kmp_nThreadsPerCore = calculate_ratio(thread_level, core_level);
604fe6060f1SDimitry Andric   if (package_level != -1) {
605fe6060f1SDimitry Andric     nCoresPerPkg = calculate_ratio(core_level, package_level);
606fe6060f1SDimitry Andric     nPackages = get_count(package_level);
607fe6060f1SDimitry Andric   } else {
608fe6060f1SDimitry Andric     // assume one socket
609fe6060f1SDimitry Andric     nCoresPerPkg = get_count(core_level);
610fe6060f1SDimitry Andric     nPackages = 1;
611fe6060f1SDimitry Andric   }
612fe6060f1SDimitry Andric #ifndef KMP_DFLT_NTH_CORES
613fe6060f1SDimitry Andric   __kmp_ncores = get_count(core_level);
614fe6060f1SDimitry Andric #endif
615fe6060f1SDimitry Andric }
616fe6060f1SDimitry Andric 
allocate(int nproc,int ndepth,const kmp_hw_t * types)617fe6060f1SDimitry Andric kmp_topology_t *kmp_topology_t::allocate(int nproc, int ndepth,
618fe6060f1SDimitry Andric                                          const kmp_hw_t *types) {
619fe6060f1SDimitry Andric   kmp_topology_t *retval;
620fe6060f1SDimitry Andric   // Allocate all data in one large allocation
621fe6060f1SDimitry Andric   size_t size = sizeof(kmp_topology_t) + sizeof(kmp_hw_thread_t) * nproc +
622349cc55cSDimitry Andric                 sizeof(int) * (size_t)KMP_HW_LAST * 3;
623fe6060f1SDimitry Andric   char *bytes = (char *)__kmp_allocate(size);
624fe6060f1SDimitry Andric   retval = (kmp_topology_t *)bytes;
625fe6060f1SDimitry Andric   if (nproc > 0) {
626fe6060f1SDimitry Andric     retval->hw_threads = (kmp_hw_thread_t *)(bytes + sizeof(kmp_topology_t));
627fe6060f1SDimitry Andric   } else {
628fe6060f1SDimitry Andric     retval->hw_threads = nullptr;
629fe6060f1SDimitry Andric   }
630fe6060f1SDimitry Andric   retval->num_hw_threads = nproc;
631fe6060f1SDimitry Andric   retval->depth = ndepth;
632fe6060f1SDimitry Andric   int *arr =
633fe6060f1SDimitry Andric       (int *)(bytes + sizeof(kmp_topology_t) + sizeof(kmp_hw_thread_t) * nproc);
634fe6060f1SDimitry Andric   retval->types = (kmp_hw_t *)arr;
635349cc55cSDimitry Andric   retval->ratio = arr + (size_t)KMP_HW_LAST;
636349cc55cSDimitry Andric   retval->count = arr + 2 * (size_t)KMP_HW_LAST;
6370eae32dcSDimitry Andric   retval->num_core_efficiencies = 0;
6380eae32dcSDimitry Andric   retval->num_core_types = 0;
639bdd1243dSDimitry Andric   retval->compact = 0;
6400eae32dcSDimitry Andric   for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i)
6410eae32dcSDimitry Andric     retval->core_types[i] = KMP_HW_CORE_TYPE_UNKNOWN;
642fe6060f1SDimitry Andric   KMP_FOREACH_HW_TYPE(type) { retval->equivalent[type] = KMP_HW_UNKNOWN; }
643fe6060f1SDimitry Andric   for (int i = 0; i < ndepth; ++i) {
644fe6060f1SDimitry Andric     retval->types[i] = types[i];
645fe6060f1SDimitry Andric     retval->equivalent[types[i]] = types[i];
646fe6060f1SDimitry Andric   }
647fe6060f1SDimitry Andric   return retval;
648fe6060f1SDimitry Andric }
649fe6060f1SDimitry Andric 
deallocate(kmp_topology_t * topology)650fe6060f1SDimitry Andric void kmp_topology_t::deallocate(kmp_topology_t *topology) {
651fe6060f1SDimitry Andric   if (topology)
652fe6060f1SDimitry Andric     __kmp_free(topology);
653fe6060f1SDimitry Andric }
654fe6060f1SDimitry Andric 
check_ids() const655fe6060f1SDimitry Andric bool kmp_topology_t::check_ids() const {
656fe6060f1SDimitry Andric   // Assume ids have been sorted
657fe6060f1SDimitry Andric   if (num_hw_threads == 0)
658fe6060f1SDimitry Andric     return true;
659fe6060f1SDimitry Andric   for (int i = 1; i < num_hw_threads; ++i) {
660fe6060f1SDimitry Andric     kmp_hw_thread_t &current_thread = hw_threads[i];
661fe6060f1SDimitry Andric     kmp_hw_thread_t &previous_thread = hw_threads[i - 1];
662fe6060f1SDimitry Andric     bool unique = false;
663fe6060f1SDimitry Andric     for (int j = 0; j < depth; ++j) {
664fe6060f1SDimitry Andric       if (previous_thread.ids[j] != current_thread.ids[j]) {
665fe6060f1SDimitry Andric         unique = true;
666fe6060f1SDimitry Andric         break;
667fe6060f1SDimitry Andric       }
668fe6060f1SDimitry Andric     }
669fe6060f1SDimitry Andric     if (unique)
670fe6060f1SDimitry Andric       continue;
671fe6060f1SDimitry Andric     return false;
672fe6060f1SDimitry Andric   }
673fe6060f1SDimitry Andric   return true;
674fe6060f1SDimitry Andric }
675fe6060f1SDimitry Andric 
dump() const676fe6060f1SDimitry Andric void kmp_topology_t::dump() const {
677fe6060f1SDimitry Andric   printf("***********************\n");
678fe6060f1SDimitry Andric   printf("*** __kmp_topology: ***\n");
679fe6060f1SDimitry Andric   printf("***********************\n");
680fe6060f1SDimitry Andric   printf("* depth: %d\n", depth);
681fe6060f1SDimitry Andric 
682fe6060f1SDimitry Andric   printf("* types: ");
683fe6060f1SDimitry Andric   for (int i = 0; i < depth; ++i)
684fe6060f1SDimitry Andric     printf("%15s ", __kmp_hw_get_keyword(types[i]));
685fe6060f1SDimitry Andric   printf("\n");
686fe6060f1SDimitry Andric 
687fe6060f1SDimitry Andric   printf("* ratio: ");
688fe6060f1SDimitry Andric   for (int i = 0; i < depth; ++i) {
689fe6060f1SDimitry Andric     printf("%15d ", ratio[i]);
690fe6060f1SDimitry Andric   }
691fe6060f1SDimitry Andric   printf("\n");
692fe6060f1SDimitry Andric 
693fe6060f1SDimitry Andric   printf("* count: ");
694fe6060f1SDimitry Andric   for (int i = 0; i < depth; ++i) {
695fe6060f1SDimitry Andric     printf("%15d ", count[i]);
696fe6060f1SDimitry Andric   }
697fe6060f1SDimitry Andric   printf("\n");
698fe6060f1SDimitry Andric 
6990eae32dcSDimitry Andric   printf("* num_core_eff: %d\n", num_core_efficiencies);
7000eae32dcSDimitry Andric   printf("* num_core_types: %d\n", num_core_types);
7010eae32dcSDimitry Andric   printf("* core_types: ");
7020eae32dcSDimitry Andric   for (int i = 0; i < num_core_types; ++i)
7030eae32dcSDimitry Andric     printf("%3d ", core_types[i]);
7040eae32dcSDimitry Andric   printf("\n");
705349cc55cSDimitry Andric 
706fe6060f1SDimitry Andric   printf("* equivalent map:\n");
707fe6060f1SDimitry Andric   KMP_FOREACH_HW_TYPE(i) {
708fe6060f1SDimitry Andric     const char *key = __kmp_hw_get_keyword(i);
709fe6060f1SDimitry Andric     const char *value = __kmp_hw_get_keyword(equivalent[i]);
710fe6060f1SDimitry Andric     printf("%-15s -> %-15s\n", key, value);
711fe6060f1SDimitry Andric   }
712fe6060f1SDimitry Andric 
713fe6060f1SDimitry Andric   printf("* uniform: %s\n", (is_uniform() ? "Yes" : "No"));
714fe6060f1SDimitry Andric 
715fe6060f1SDimitry Andric   printf("* num_hw_threads: %d\n", num_hw_threads);
716fe6060f1SDimitry Andric   printf("* hw_threads:\n");
717fe6060f1SDimitry Andric   for (int i = 0; i < num_hw_threads; ++i) {
718fe6060f1SDimitry Andric     hw_threads[i].print();
719fe6060f1SDimitry Andric   }
720fe6060f1SDimitry Andric   printf("***********************\n");
721fe6060f1SDimitry Andric }
722fe6060f1SDimitry Andric 
print(const char * env_var) const723fe6060f1SDimitry Andric void kmp_topology_t::print(const char *env_var) const {
724fe6060f1SDimitry Andric   kmp_str_buf_t buf;
725fe6060f1SDimitry Andric   int print_types_depth;
726fe6060f1SDimitry Andric   __kmp_str_buf_init(&buf);
727fe6060f1SDimitry Andric   kmp_hw_t print_types[KMP_HW_LAST + 2];
728fe6060f1SDimitry Andric 
729fe6060f1SDimitry Andric   // Num Available Threads
730bdd1243dSDimitry Andric   if (num_hw_threads) {
731fe6060f1SDimitry Andric     KMP_INFORM(AvailableOSProc, env_var, num_hw_threads);
732bdd1243dSDimitry Andric   } else {
733bdd1243dSDimitry Andric     KMP_INFORM(AvailableOSProc, env_var, __kmp_xproc);
734bdd1243dSDimitry Andric   }
735fe6060f1SDimitry Andric 
736fe6060f1SDimitry Andric   // Uniform or not
737fe6060f1SDimitry Andric   if (is_uniform()) {
738fe6060f1SDimitry Andric     KMP_INFORM(Uniform, env_var);
739fe6060f1SDimitry Andric   } else {
740fe6060f1SDimitry Andric     KMP_INFORM(NonUniform, env_var);
741fe6060f1SDimitry Andric   }
742fe6060f1SDimitry Andric 
743fe6060f1SDimitry Andric   // Equivalent types
744fe6060f1SDimitry Andric   KMP_FOREACH_HW_TYPE(type) {
745fe6060f1SDimitry Andric     kmp_hw_t eq_type = equivalent[type];
746fe6060f1SDimitry Andric     if (eq_type != KMP_HW_UNKNOWN && eq_type != type) {
747fe6060f1SDimitry Andric       KMP_INFORM(AffEqualTopologyTypes, env_var,
748fe6060f1SDimitry Andric                  __kmp_hw_get_catalog_string(type),
749fe6060f1SDimitry Andric                  __kmp_hw_get_catalog_string(eq_type));
750fe6060f1SDimitry Andric     }
751fe6060f1SDimitry Andric   }
752fe6060f1SDimitry Andric 
753fe6060f1SDimitry Andric   // Quick topology
754fe6060f1SDimitry Andric   KMP_ASSERT(depth > 0 && depth <= (int)KMP_HW_LAST);
755fe6060f1SDimitry Andric   // Create a print types array that always guarantees printing
756fe6060f1SDimitry Andric   // the core and thread level
757fe6060f1SDimitry Andric   print_types_depth = 0;
758fe6060f1SDimitry Andric   for (int level = 0; level < depth; ++level)
759fe6060f1SDimitry Andric     print_types[print_types_depth++] = types[level];
760fe6060f1SDimitry Andric   if (equivalent[KMP_HW_CORE] != KMP_HW_CORE) {
761fe6060f1SDimitry Andric     // Force in the core level for quick topology
762fe6060f1SDimitry Andric     if (print_types[print_types_depth - 1] == KMP_HW_THREAD) {
763fe6060f1SDimitry Andric       // Force core before thread e.g., 1 socket X 2 threads/socket
764fe6060f1SDimitry Andric       // becomes 1 socket X 1 core/socket X 2 threads/socket
765fe6060f1SDimitry Andric       print_types[print_types_depth - 1] = KMP_HW_CORE;
766fe6060f1SDimitry Andric       print_types[print_types_depth++] = KMP_HW_THREAD;
767fe6060f1SDimitry Andric     } else {
768fe6060f1SDimitry Andric       print_types[print_types_depth++] = KMP_HW_CORE;
769fe6060f1SDimitry Andric     }
770fe6060f1SDimitry Andric   }
771fe6060f1SDimitry Andric   // Always put threads at very end of quick topology
772fe6060f1SDimitry Andric   if (equivalent[KMP_HW_THREAD] != KMP_HW_THREAD)
773fe6060f1SDimitry Andric     print_types[print_types_depth++] = KMP_HW_THREAD;
774fe6060f1SDimitry Andric 
775fe6060f1SDimitry Andric   __kmp_str_buf_clear(&buf);
776fe6060f1SDimitry Andric   kmp_hw_t numerator_type;
777fe6060f1SDimitry Andric   kmp_hw_t denominator_type = KMP_HW_UNKNOWN;
778fe6060f1SDimitry Andric   int core_level = get_level(KMP_HW_CORE);
779fe6060f1SDimitry Andric   int ncores = get_count(core_level);
780fe6060f1SDimitry Andric 
781fe6060f1SDimitry Andric   for (int plevel = 0, level = 0; plevel < print_types_depth; ++plevel) {
782fe6060f1SDimitry Andric     int c;
783fe6060f1SDimitry Andric     bool plural;
784fe6060f1SDimitry Andric     numerator_type = print_types[plevel];
785fe6060f1SDimitry Andric     KMP_ASSERT_VALID_HW_TYPE(numerator_type);
786fe6060f1SDimitry Andric     if (equivalent[numerator_type] != numerator_type)
787fe6060f1SDimitry Andric       c = 1;
788fe6060f1SDimitry Andric     else
789fe6060f1SDimitry Andric       c = get_ratio(level++);
790fe6060f1SDimitry Andric     plural = (c > 1);
791fe6060f1SDimitry Andric     if (plevel == 0) {
792fe6060f1SDimitry Andric       __kmp_str_buf_print(&buf, "%d %s", c,
793fe6060f1SDimitry Andric                           __kmp_hw_get_catalog_string(numerator_type, plural));
794fe6060f1SDimitry Andric     } else {
795fe6060f1SDimitry Andric       __kmp_str_buf_print(&buf, " x %d %s/%s", c,
796fe6060f1SDimitry Andric                           __kmp_hw_get_catalog_string(numerator_type, plural),
797fe6060f1SDimitry Andric                           __kmp_hw_get_catalog_string(denominator_type));
798fe6060f1SDimitry Andric     }
799fe6060f1SDimitry Andric     denominator_type = numerator_type;
800fe6060f1SDimitry Andric   }
801fe6060f1SDimitry Andric   KMP_INFORM(TopologyGeneric, env_var, buf.str, ncores);
802fe6060f1SDimitry Andric 
8030eae32dcSDimitry Andric   // Hybrid topology information
804349cc55cSDimitry Andric   if (__kmp_is_hybrid_cpu()) {
8050eae32dcSDimitry Andric     for (int i = 0; i < num_core_types; ++i) {
8060eae32dcSDimitry Andric       kmp_hw_core_type_t core_type = core_types[i];
8070eae32dcSDimitry Andric       kmp_hw_attr_t attr;
8080eae32dcSDimitry Andric       attr.clear();
8090eae32dcSDimitry Andric       attr.set_core_type(core_type);
8100eae32dcSDimitry Andric       int ncores = get_ncores_with_attr(attr);
8110eae32dcSDimitry Andric       if (ncores > 0) {
8120eae32dcSDimitry Andric         KMP_INFORM(TopologyHybrid, env_var, ncores,
8130eae32dcSDimitry Andric                    __kmp_hw_get_core_type_string(core_type));
8140eae32dcSDimitry Andric         KMP_ASSERT(num_core_efficiencies <= KMP_HW_MAX_NUM_CORE_EFFS)
8150eae32dcSDimitry Andric         for (int eff = 0; eff < num_core_efficiencies; ++eff) {
8160eae32dcSDimitry Andric           attr.set_core_eff(eff);
8170eae32dcSDimitry Andric           int ncores_with_eff = get_ncores_with_attr(attr);
8180eae32dcSDimitry Andric           if (ncores_with_eff > 0) {
8190eae32dcSDimitry Andric             KMP_INFORM(TopologyHybridCoreEff, env_var, ncores_with_eff, eff);
8200eae32dcSDimitry Andric           }
8210eae32dcSDimitry Andric         }
8220eae32dcSDimitry Andric       }
823349cc55cSDimitry Andric     }
824349cc55cSDimitry Andric   }
825349cc55cSDimitry Andric 
826fe6060f1SDimitry Andric   if (num_hw_threads <= 0) {
827fe6060f1SDimitry Andric     __kmp_str_buf_free(&buf);
828fe6060f1SDimitry Andric     return;
829fe6060f1SDimitry Andric   }
830fe6060f1SDimitry Andric 
831fe6060f1SDimitry Andric   // Full OS proc to hardware thread map
832fe6060f1SDimitry Andric   KMP_INFORM(OSProcToPhysicalThreadMap, env_var);
833fe6060f1SDimitry Andric   for (int i = 0; i < num_hw_threads; i++) {
834fe6060f1SDimitry Andric     __kmp_str_buf_clear(&buf);
835fe6060f1SDimitry Andric     for (int level = 0; level < depth; ++level) {
836fe6060f1SDimitry Andric       kmp_hw_t type = types[level];
837fe6060f1SDimitry Andric       __kmp_str_buf_print(&buf, "%s ", __kmp_hw_get_catalog_string(type));
838fe6060f1SDimitry Andric       __kmp_str_buf_print(&buf, "%d ", hw_threads[i].ids[level]);
839fe6060f1SDimitry Andric     }
840349cc55cSDimitry Andric     if (__kmp_is_hybrid_cpu())
841349cc55cSDimitry Andric       __kmp_str_buf_print(
8420eae32dcSDimitry Andric           &buf, "(%s)",
8430eae32dcSDimitry Andric           __kmp_hw_get_core_type_string(hw_threads[i].attrs.get_core_type()));
844fe6060f1SDimitry Andric     KMP_INFORM(OSProcMapToPack, env_var, hw_threads[i].os_id, buf.str);
845fe6060f1SDimitry Andric   }
846fe6060f1SDimitry Andric 
847fe6060f1SDimitry Andric   __kmp_str_buf_free(&buf);
848fe6060f1SDimitry Andric }
849fe6060f1SDimitry Andric 
850bdd1243dSDimitry Andric #if KMP_AFFINITY_SUPPORTED
set_granularity(kmp_affinity_t & affinity) const851bdd1243dSDimitry Andric void kmp_topology_t::set_granularity(kmp_affinity_t &affinity) const {
8525f757f3fSDimitry Andric   const char *env_var = __kmp_get_affinity_env_var(affinity);
8535f757f3fSDimitry Andric   // If requested hybrid CPU attributes for granularity (either OMP_PLACES or
8545f757f3fSDimitry Andric   // KMP_AFFINITY), but none exist, then reset granularity and have below method
8555f757f3fSDimitry Andric   // select a granularity and warn user.
8565f757f3fSDimitry Andric   if (!__kmp_is_hybrid_cpu()) {
8575f757f3fSDimitry Andric     if (affinity.core_attr_gran.valid) {
8585f757f3fSDimitry Andric       // OMP_PLACES with cores:<attribute> but non-hybrid arch, use cores
8595f757f3fSDimitry Andric       // instead
8605f757f3fSDimitry Andric       KMP_AFF_WARNING(
8615f757f3fSDimitry Andric           affinity, AffIgnoringNonHybrid, env_var,
8625f757f3fSDimitry Andric           __kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true));
8635f757f3fSDimitry Andric       affinity.gran = KMP_HW_CORE;
8645f757f3fSDimitry Andric       affinity.gran_levels = -1;
8655f757f3fSDimitry Andric       affinity.core_attr_gran = KMP_AFFINITY_ATTRS_UNKNOWN;
8665f757f3fSDimitry Andric       affinity.flags.core_types_gran = affinity.flags.core_effs_gran = 0;
8675f757f3fSDimitry Andric     } else if (affinity.flags.core_types_gran ||
8685f757f3fSDimitry Andric                affinity.flags.core_effs_gran) {
8695f757f3fSDimitry Andric       // OMP_PLACES=core_types|core_effs but non-hybrid, use cores instead
8705f757f3fSDimitry Andric       if (affinity.flags.omp_places) {
8715f757f3fSDimitry Andric         KMP_AFF_WARNING(
8725f757f3fSDimitry Andric             affinity, AffIgnoringNonHybrid, env_var,
8735f757f3fSDimitry Andric             __kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true));
8745f757f3fSDimitry Andric       } else {
8755f757f3fSDimitry Andric         // KMP_AFFINITY=granularity=core_type|core_eff,...
8765f757f3fSDimitry Andric         KMP_AFF_WARNING(affinity, AffGranularityBad, env_var,
8775f757f3fSDimitry Andric                         "Intel(R) Hybrid Technology core attribute",
8785f757f3fSDimitry Andric                         __kmp_hw_get_catalog_string(KMP_HW_CORE));
8795f757f3fSDimitry Andric       }
8805f757f3fSDimitry Andric       affinity.gran = KMP_HW_CORE;
8815f757f3fSDimitry Andric       affinity.gran_levels = -1;
8825f757f3fSDimitry Andric       affinity.core_attr_gran = KMP_AFFINITY_ATTRS_UNKNOWN;
8835f757f3fSDimitry Andric       affinity.flags.core_types_gran = affinity.flags.core_effs_gran = 0;
8845f757f3fSDimitry Andric     }
8855f757f3fSDimitry Andric   }
886bdd1243dSDimitry Andric   // Set the number of affinity granularity levels
887bdd1243dSDimitry Andric   if (affinity.gran_levels < 0) {
888bdd1243dSDimitry Andric     kmp_hw_t gran_type = get_equivalent_type(affinity.gran);
889bdd1243dSDimitry Andric     // Check if user's granularity request is valid
890bdd1243dSDimitry Andric     if (gran_type == KMP_HW_UNKNOWN) {
891bdd1243dSDimitry Andric       // First try core, then thread, then package
892bdd1243dSDimitry Andric       kmp_hw_t gran_types[3] = {KMP_HW_CORE, KMP_HW_THREAD, KMP_HW_SOCKET};
893bdd1243dSDimitry Andric       for (auto g : gran_types) {
894bdd1243dSDimitry Andric         if (get_equivalent_type(g) != KMP_HW_UNKNOWN) {
895bdd1243dSDimitry Andric           gran_type = g;
896bdd1243dSDimitry Andric           break;
897bdd1243dSDimitry Andric         }
898bdd1243dSDimitry Andric       }
899bdd1243dSDimitry Andric       KMP_ASSERT(gran_type != KMP_HW_UNKNOWN);
900bdd1243dSDimitry Andric       // Warn user what granularity setting will be used instead
901bdd1243dSDimitry Andric       KMP_AFF_WARNING(affinity, AffGranularityBad, env_var,
902bdd1243dSDimitry Andric                       __kmp_hw_get_catalog_string(affinity.gran),
903bdd1243dSDimitry Andric                       __kmp_hw_get_catalog_string(gran_type));
904bdd1243dSDimitry Andric       affinity.gran = gran_type;
905bdd1243dSDimitry Andric     }
906bdd1243dSDimitry Andric #if KMP_GROUP_AFFINITY
907bdd1243dSDimitry Andric     // If more than one processor group exists, and the level of
908bdd1243dSDimitry Andric     // granularity specified by the user is too coarse, then the
909bdd1243dSDimitry Andric     // granularity must be adjusted "down" to processor group affinity
910bdd1243dSDimitry Andric     // because threads can only exist within one processor group.
911bdd1243dSDimitry Andric     // For example, if a user sets granularity=socket and there are two
912bdd1243dSDimitry Andric     // processor groups that cover a socket, then the runtime must
913bdd1243dSDimitry Andric     // restrict the granularity down to the processor group level.
914bdd1243dSDimitry Andric     if (__kmp_num_proc_groups > 1) {
915bdd1243dSDimitry Andric       int gran_depth = get_level(gran_type);
916bdd1243dSDimitry Andric       int proc_group_depth = get_level(KMP_HW_PROC_GROUP);
917bdd1243dSDimitry Andric       if (gran_depth >= 0 && proc_group_depth >= 0 &&
918bdd1243dSDimitry Andric           gran_depth < proc_group_depth) {
919bdd1243dSDimitry Andric         KMP_AFF_WARNING(affinity, AffGranTooCoarseProcGroup, env_var,
920bdd1243dSDimitry Andric                         __kmp_hw_get_catalog_string(affinity.gran));
921bdd1243dSDimitry Andric         affinity.gran = gran_type = KMP_HW_PROC_GROUP;
922bdd1243dSDimitry Andric       }
923bdd1243dSDimitry Andric     }
924bdd1243dSDimitry Andric #endif
925bdd1243dSDimitry Andric     affinity.gran_levels = 0;
926bdd1243dSDimitry Andric     for (int i = depth - 1; i >= 0 && get_type(i) != gran_type; --i)
927bdd1243dSDimitry Andric       affinity.gran_levels++;
928bdd1243dSDimitry Andric   }
929bdd1243dSDimitry Andric }
930bdd1243dSDimitry Andric #endif
931bdd1243dSDimitry Andric 
canonicalize()932fe6060f1SDimitry Andric void kmp_topology_t::canonicalize() {
933349cc55cSDimitry Andric #if KMP_GROUP_AFFINITY
934349cc55cSDimitry Andric   _insert_windows_proc_groups();
935349cc55cSDimitry Andric #endif
936fe6060f1SDimitry Andric   _remove_radix1_layers();
937fe6060f1SDimitry Andric   _gather_enumeration_information();
938fe6060f1SDimitry Andric   _discover_uniformity();
939fe6060f1SDimitry Andric   _set_sub_ids();
940fe6060f1SDimitry Andric   _set_globals();
941fe6060f1SDimitry Andric   _set_last_level_cache();
942fe6060f1SDimitry Andric 
943fe6060f1SDimitry Andric #if KMP_MIC_SUPPORTED
944fe6060f1SDimitry Andric   // Manually Add L2 = Tile equivalence
945fe6060f1SDimitry Andric   if (__kmp_mic_type == mic3) {
946fe6060f1SDimitry Andric     if (get_level(KMP_HW_L2) != -1)
947fe6060f1SDimitry Andric       set_equivalent_type(KMP_HW_TILE, KMP_HW_L2);
948fe6060f1SDimitry Andric     else if (get_level(KMP_HW_TILE) != -1)
949fe6060f1SDimitry Andric       set_equivalent_type(KMP_HW_L2, KMP_HW_TILE);
950fe6060f1SDimitry Andric   }
951fe6060f1SDimitry Andric #endif
952fe6060f1SDimitry Andric 
953fe6060f1SDimitry Andric   // Perform post canonicalization checking
954fe6060f1SDimitry Andric   KMP_ASSERT(depth > 0);
955fe6060f1SDimitry Andric   for (int level = 0; level < depth; ++level) {
956fe6060f1SDimitry Andric     // All counts, ratios, and types must be valid
957fe6060f1SDimitry Andric     KMP_ASSERT(count[level] > 0 && ratio[level] > 0);
958fe6060f1SDimitry Andric     KMP_ASSERT_VALID_HW_TYPE(types[level]);
959fe6060f1SDimitry Andric     // Detected types must point to themselves
960fe6060f1SDimitry Andric     KMP_ASSERT(equivalent[types[level]] == types[level]);
961fe6060f1SDimitry Andric   }
962fe6060f1SDimitry Andric }
963fe6060f1SDimitry Andric 
964fe6060f1SDimitry Andric // Canonicalize an explicit packages X cores/pkg X threads/core topology
canonicalize(int npackages,int ncores_per_pkg,int nthreads_per_core,int ncores)965fe6060f1SDimitry Andric void kmp_topology_t::canonicalize(int npackages, int ncores_per_pkg,
966fe6060f1SDimitry Andric                                   int nthreads_per_core, int ncores) {
967fe6060f1SDimitry Andric   int ndepth = 3;
968fe6060f1SDimitry Andric   depth = ndepth;
969fe6060f1SDimitry Andric   KMP_FOREACH_HW_TYPE(i) { equivalent[i] = KMP_HW_UNKNOWN; }
970fe6060f1SDimitry Andric   for (int level = 0; level < depth; ++level) {
971fe6060f1SDimitry Andric     count[level] = 0;
972fe6060f1SDimitry Andric     ratio[level] = 0;
973fe6060f1SDimitry Andric   }
974fe6060f1SDimitry Andric   count[0] = npackages;
975fe6060f1SDimitry Andric   count[1] = ncores;
976fe6060f1SDimitry Andric   count[2] = __kmp_xproc;
977fe6060f1SDimitry Andric   ratio[0] = npackages;
978fe6060f1SDimitry Andric   ratio[1] = ncores_per_pkg;
979fe6060f1SDimitry Andric   ratio[2] = nthreads_per_core;
980fe6060f1SDimitry Andric   equivalent[KMP_HW_SOCKET] = KMP_HW_SOCKET;
981fe6060f1SDimitry Andric   equivalent[KMP_HW_CORE] = KMP_HW_CORE;
982fe6060f1SDimitry Andric   equivalent[KMP_HW_THREAD] = KMP_HW_THREAD;
983fe6060f1SDimitry Andric   types[0] = KMP_HW_SOCKET;
984fe6060f1SDimitry Andric   types[1] = KMP_HW_CORE;
985fe6060f1SDimitry Andric   types[2] = KMP_HW_THREAD;
986fe6060f1SDimitry Andric   //__kmp_avail_proc = __kmp_xproc;
987fe6060f1SDimitry Andric   _discover_uniformity();
988fe6060f1SDimitry Andric }
989fe6060f1SDimitry Andric 
9905f757f3fSDimitry Andric #if KMP_AFFINITY_SUPPORTED
9910eae32dcSDimitry Andric static kmp_str_buf_t *
__kmp_hw_get_catalog_core_string(const kmp_hw_attr_t & attr,kmp_str_buf_t * buf,bool plural)9920eae32dcSDimitry Andric __kmp_hw_get_catalog_core_string(const kmp_hw_attr_t &attr, kmp_str_buf_t *buf,
9930eae32dcSDimitry Andric                                  bool plural) {
9940eae32dcSDimitry Andric   __kmp_str_buf_init(buf);
9950eae32dcSDimitry Andric   if (attr.is_core_type_valid())
9960eae32dcSDimitry Andric     __kmp_str_buf_print(buf, "%s %s",
9970eae32dcSDimitry Andric                         __kmp_hw_get_core_type_string(attr.get_core_type()),
9980eae32dcSDimitry Andric                         __kmp_hw_get_catalog_string(KMP_HW_CORE, plural));
9990eae32dcSDimitry Andric   else
10000eae32dcSDimitry Andric     __kmp_str_buf_print(buf, "%s eff=%d",
10010eae32dcSDimitry Andric                         __kmp_hw_get_catalog_string(KMP_HW_CORE, plural),
10020eae32dcSDimitry Andric                         attr.get_core_eff());
10030eae32dcSDimitry Andric   return buf;
10040eae32dcSDimitry Andric }
10050eae32dcSDimitry Andric 
restrict_to_mask(const kmp_affin_mask_t * mask)10065f757f3fSDimitry Andric bool kmp_topology_t::restrict_to_mask(const kmp_affin_mask_t *mask) {
10075f757f3fSDimitry Andric   // Apply the filter
10085f757f3fSDimitry Andric   bool affected;
10095f757f3fSDimitry Andric   int new_index = 0;
10105f757f3fSDimitry Andric   for (int i = 0; i < num_hw_threads; ++i) {
10115f757f3fSDimitry Andric     int os_id = hw_threads[i].os_id;
10125f757f3fSDimitry Andric     if (KMP_CPU_ISSET(os_id, mask)) {
10135f757f3fSDimitry Andric       if (i != new_index)
10145f757f3fSDimitry Andric         hw_threads[new_index] = hw_threads[i];
10155f757f3fSDimitry Andric       new_index++;
10165f757f3fSDimitry Andric     } else {
10175f757f3fSDimitry Andric       KMP_CPU_CLR(os_id, __kmp_affin_fullMask);
10185f757f3fSDimitry Andric       __kmp_avail_proc--;
10195f757f3fSDimitry Andric     }
10205f757f3fSDimitry Andric   }
10215f757f3fSDimitry Andric 
10225f757f3fSDimitry Andric   KMP_DEBUG_ASSERT(new_index <= num_hw_threads);
10235f757f3fSDimitry Andric   affected = (num_hw_threads != new_index);
10245f757f3fSDimitry Andric   num_hw_threads = new_index;
10255f757f3fSDimitry Andric 
10265f757f3fSDimitry Andric   // Post hardware subset canonicalization
10275f757f3fSDimitry Andric   if (affected) {
10285f757f3fSDimitry Andric     _gather_enumeration_information();
10295f757f3fSDimitry Andric     _discover_uniformity();
10305f757f3fSDimitry Andric     _set_globals();
10315f757f3fSDimitry Andric     _set_last_level_cache();
10325f757f3fSDimitry Andric #if KMP_OS_WINDOWS
10335f757f3fSDimitry Andric     // Copy filtered full mask if topology has single processor group
10345f757f3fSDimitry Andric     if (__kmp_num_proc_groups <= 1)
10355f757f3fSDimitry Andric #endif
10365f757f3fSDimitry Andric       __kmp_affin_origMask->copy(__kmp_affin_fullMask);
10375f757f3fSDimitry Andric   }
10385f757f3fSDimitry Andric   return affected;
10395f757f3fSDimitry Andric }
10405f757f3fSDimitry Andric 
1041fe6060f1SDimitry Andric // Apply the KMP_HW_SUBSET envirable to the topology
1042fe6060f1SDimitry Andric // Returns true if KMP_HW_SUBSET filtered any processors
1043fe6060f1SDimitry Andric // otherwise, returns false
filter_hw_subset()1044fe6060f1SDimitry Andric bool kmp_topology_t::filter_hw_subset() {
1045fe6060f1SDimitry Andric   // If KMP_HW_SUBSET wasn't requested, then do nothing.
1046fe6060f1SDimitry Andric   if (!__kmp_hw_subset)
1047fe6060f1SDimitry Andric     return false;
1048fe6060f1SDimitry Andric 
1049349cc55cSDimitry Andric   // First, sort the KMP_HW_SUBSET items by the machine topology
1050349cc55cSDimitry Andric   __kmp_hw_subset->sort();
1051349cc55cSDimitry Andric 
1052*0fca6ea1SDimitry Andric   __kmp_hw_subset->canonicalize(__kmp_topology);
1053*0fca6ea1SDimitry Andric 
1054fe6060f1SDimitry Andric   // Check to see if KMP_HW_SUBSET is a valid subset of the detected topology
10550eae32dcSDimitry Andric   bool using_core_types = false;
10560eae32dcSDimitry Andric   bool using_core_effs = false;
1057*0fca6ea1SDimitry Andric   bool is_absolute = __kmp_hw_subset->is_absolute();
1058fe6060f1SDimitry Andric   int hw_subset_depth = __kmp_hw_subset->get_depth();
1059fe6060f1SDimitry Andric   kmp_hw_t specified[KMP_HW_LAST];
1060d56accc7SDimitry Andric   int *topology_levels = (int *)KMP_ALLOCA(sizeof(int) * hw_subset_depth);
1061fe6060f1SDimitry Andric   KMP_ASSERT(hw_subset_depth > 0);
1062fe6060f1SDimitry Andric   KMP_FOREACH_HW_TYPE(i) { specified[i] = KMP_HW_UNKNOWN; }
10630eae32dcSDimitry Andric   int core_level = get_level(KMP_HW_CORE);
1064fe6060f1SDimitry Andric   for (int i = 0; i < hw_subset_depth; ++i) {
1065fe6060f1SDimitry Andric     int max_count;
10660eae32dcSDimitry Andric     const kmp_hw_subset_t::item_t &item = __kmp_hw_subset->at(i);
10670eae32dcSDimitry Andric     int num = item.num[0];
10680eae32dcSDimitry Andric     int offset = item.offset[0];
10690eae32dcSDimitry Andric     kmp_hw_t type = item.type;
1070fe6060f1SDimitry Andric     kmp_hw_t equivalent_type = equivalent[type];
1071fe6060f1SDimitry Andric     int level = get_level(type);
10720eae32dcSDimitry Andric     topology_levels[i] = level;
1073fe6060f1SDimitry Andric 
1074fe6060f1SDimitry Andric     // Check to see if current layer is in detected machine topology
1075fe6060f1SDimitry Andric     if (equivalent_type != KMP_HW_UNKNOWN) {
1076fe6060f1SDimitry Andric       __kmp_hw_subset->at(i).type = equivalent_type;
1077fe6060f1SDimitry Andric     } else {
1078bdd1243dSDimitry Andric       KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetNotExistGeneric,
1079fe6060f1SDimitry Andric                       __kmp_hw_get_catalog_string(type));
1080fe6060f1SDimitry Andric       return false;
1081fe6060f1SDimitry Andric     }
1082fe6060f1SDimitry Andric 
10830eae32dcSDimitry Andric     // Check to see if current layer has already been
10840eae32dcSDimitry Andric     // specified either directly or through an equivalent type
1085fe6060f1SDimitry Andric     if (specified[equivalent_type] != KMP_HW_UNKNOWN) {
1086bdd1243dSDimitry Andric       KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetEqvLayers,
1087bdd1243dSDimitry Andric                       __kmp_hw_get_catalog_string(type),
1088fe6060f1SDimitry Andric                       __kmp_hw_get_catalog_string(specified[equivalent_type]));
1089fe6060f1SDimitry Andric       return false;
1090fe6060f1SDimitry Andric     }
1091fe6060f1SDimitry Andric     specified[equivalent_type] = type;
1092fe6060f1SDimitry Andric 
1093fe6060f1SDimitry Andric     // Check to see if each layer's num & offset parameters are valid
1094fe6060f1SDimitry Andric     max_count = get_ratio(level);
1095*0fca6ea1SDimitry Andric     if (!is_absolute) {
10960eae32dcSDimitry Andric       if (max_count < 0 ||
10970eae32dcSDimitry Andric           (num != kmp_hw_subset_t::USE_ALL && num + offset > max_count)) {
1098fe6060f1SDimitry Andric         bool plural = (num > 1);
1099bdd1243dSDimitry Andric         KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetManyGeneric,
1100fe6060f1SDimitry Andric                         __kmp_hw_get_catalog_string(type, plural));
1101fe6060f1SDimitry Andric         return false;
1102fe6060f1SDimitry Andric       }
1103*0fca6ea1SDimitry Andric     }
11040eae32dcSDimitry Andric 
11050eae32dcSDimitry Andric     // Check to see if core attributes are consistent
11060eae32dcSDimitry Andric     if (core_level == level) {
11070eae32dcSDimitry Andric       // Determine which core attributes are specified
11080eae32dcSDimitry Andric       for (int j = 0; j < item.num_attrs; ++j) {
11090eae32dcSDimitry Andric         if (item.attr[j].is_core_type_valid())
11100eae32dcSDimitry Andric           using_core_types = true;
11110eae32dcSDimitry Andric         if (item.attr[j].is_core_eff_valid())
11120eae32dcSDimitry Andric           using_core_effs = true;
1113fe6060f1SDimitry Andric       }
1114fe6060f1SDimitry Andric 
11150eae32dcSDimitry Andric       // Check if using a single core attribute on non-hybrid arch.
11160eae32dcSDimitry Andric       // Do not ignore all of KMP_HW_SUBSET, just ignore the attribute.
11170eae32dcSDimitry Andric       //
11180eae32dcSDimitry Andric       // Check if using multiple core attributes on non-hyrbid arch.
11190eae32dcSDimitry Andric       // Ignore all of KMP_HW_SUBSET if this is the case.
11200eae32dcSDimitry Andric       if ((using_core_effs || using_core_types) && !__kmp_is_hybrid_cpu()) {
11210eae32dcSDimitry Andric         if (item.num_attrs == 1) {
11220eae32dcSDimitry Andric           if (using_core_effs) {
1123bdd1243dSDimitry Andric             KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetIgnoringAttr,
1124bdd1243dSDimitry Andric                             "efficiency");
11250eae32dcSDimitry Andric           } else {
1126bdd1243dSDimitry Andric             KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetIgnoringAttr,
1127bdd1243dSDimitry Andric                             "core_type");
11280eae32dcSDimitry Andric           }
11290eae32dcSDimitry Andric           using_core_effs = false;
11300eae32dcSDimitry Andric           using_core_types = false;
11310eae32dcSDimitry Andric         } else {
1132bdd1243dSDimitry Andric           KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetAttrsNonHybrid);
11330eae32dcSDimitry Andric           return false;
11340eae32dcSDimitry Andric         }
11350eae32dcSDimitry Andric       }
11360eae32dcSDimitry Andric 
11370eae32dcSDimitry Andric       // Check if using both core types and core efficiencies together
11380eae32dcSDimitry Andric       if (using_core_types && using_core_effs) {
1139bdd1243dSDimitry Andric         KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetIncompat, "core_type",
1140bdd1243dSDimitry Andric                         "efficiency");
11410eae32dcSDimitry Andric         return false;
11420eae32dcSDimitry Andric       }
11430eae32dcSDimitry Andric 
11440eae32dcSDimitry Andric       // Check that core efficiency values are valid
11450eae32dcSDimitry Andric       if (using_core_effs) {
11460eae32dcSDimitry Andric         for (int j = 0; j < item.num_attrs; ++j) {
11470eae32dcSDimitry Andric           if (item.attr[j].is_core_eff_valid()) {
11480eae32dcSDimitry Andric             int core_eff = item.attr[j].get_core_eff();
11490eae32dcSDimitry Andric             if (core_eff < 0 || core_eff >= num_core_efficiencies) {
11500eae32dcSDimitry Andric               kmp_str_buf_t buf;
11510eae32dcSDimitry Andric               __kmp_str_buf_init(&buf);
11520eae32dcSDimitry Andric               __kmp_str_buf_print(&buf, "%d", item.attr[j].get_core_eff());
11530eae32dcSDimitry Andric               __kmp_msg(kmp_ms_warning,
11540eae32dcSDimitry Andric                         KMP_MSG(AffHWSubsetAttrInvalid, "efficiency", buf.str),
11550eae32dcSDimitry Andric                         KMP_HNT(ValidValuesRange, 0, num_core_efficiencies - 1),
11560eae32dcSDimitry Andric                         __kmp_msg_null);
11570eae32dcSDimitry Andric               __kmp_str_buf_free(&buf);
11580eae32dcSDimitry Andric               return false;
11590eae32dcSDimitry Andric             }
11600eae32dcSDimitry Andric           }
11610eae32dcSDimitry Andric         }
11620eae32dcSDimitry Andric       }
11630eae32dcSDimitry Andric 
11640eae32dcSDimitry Andric       // Check that the number of requested cores with attributes is valid
1165*0fca6ea1SDimitry Andric       if ((using_core_types || using_core_effs) && !is_absolute) {
11660eae32dcSDimitry Andric         for (int j = 0; j < item.num_attrs; ++j) {
11670eae32dcSDimitry Andric           int num = item.num[j];
11680eae32dcSDimitry Andric           int offset = item.offset[j];
11690eae32dcSDimitry Andric           int level_above = core_level - 1;
11700eae32dcSDimitry Andric           if (level_above >= 0) {
11710eae32dcSDimitry Andric             max_count = get_ncores_with_attr_per(item.attr[j], level_above);
11720eae32dcSDimitry Andric             if (max_count <= 0 ||
11730eae32dcSDimitry Andric                 (num != kmp_hw_subset_t::USE_ALL && num + offset > max_count)) {
11740eae32dcSDimitry Andric               kmp_str_buf_t buf;
11750eae32dcSDimitry Andric               __kmp_hw_get_catalog_core_string(item.attr[j], &buf, num > 0);
1176bdd1243dSDimitry Andric               KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetManyGeneric, buf.str);
11770eae32dcSDimitry Andric               __kmp_str_buf_free(&buf);
11780eae32dcSDimitry Andric               return false;
11790eae32dcSDimitry Andric             }
11800eae32dcSDimitry Andric           }
11810eae32dcSDimitry Andric         }
11820eae32dcSDimitry Andric       }
11830eae32dcSDimitry Andric 
11840eae32dcSDimitry Andric       if ((using_core_types || using_core_effs) && item.num_attrs > 1) {
11850eae32dcSDimitry Andric         for (int j = 0; j < item.num_attrs; ++j) {
11860eae32dcSDimitry Andric           // Ambiguous use of specific core attribute + generic core
11870eae32dcSDimitry Andric           // e.g., 4c & 3c:intel_core or 4c & 3c:eff1
11880eae32dcSDimitry Andric           if (!item.attr[j]) {
11890eae32dcSDimitry Andric             kmp_hw_attr_t other_attr;
11900eae32dcSDimitry Andric             for (int k = 0; k < item.num_attrs; ++k) {
11910eae32dcSDimitry Andric               if (item.attr[k] != item.attr[j]) {
11920eae32dcSDimitry Andric                 other_attr = item.attr[k];
11930eae32dcSDimitry Andric                 break;
11940eae32dcSDimitry Andric               }
11950eae32dcSDimitry Andric             }
11960eae32dcSDimitry Andric             kmp_str_buf_t buf;
11970eae32dcSDimitry Andric             __kmp_hw_get_catalog_core_string(other_attr, &buf, item.num[j] > 0);
1198bdd1243dSDimitry Andric             KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetIncompat,
11990eae32dcSDimitry Andric                             __kmp_hw_get_catalog_string(KMP_HW_CORE), buf.str);
12000eae32dcSDimitry Andric             __kmp_str_buf_free(&buf);
12010eae32dcSDimitry Andric             return false;
12020eae32dcSDimitry Andric           }
12030eae32dcSDimitry Andric           // Allow specifying a specific core type or core eff exactly once
12040eae32dcSDimitry Andric           for (int k = 0; k < j; ++k) {
12050eae32dcSDimitry Andric             if (!item.attr[j] || !item.attr[k])
12060eae32dcSDimitry Andric               continue;
12070eae32dcSDimitry Andric             if (item.attr[k] == item.attr[j]) {
12080eae32dcSDimitry Andric               kmp_str_buf_t buf;
12090eae32dcSDimitry Andric               __kmp_hw_get_catalog_core_string(item.attr[j], &buf,
12100eae32dcSDimitry Andric                                                item.num[j] > 0);
1211bdd1243dSDimitry Andric               KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetAttrRepeat, buf.str);
12120eae32dcSDimitry Andric               __kmp_str_buf_free(&buf);
12130eae32dcSDimitry Andric               return false;
12140eae32dcSDimitry Andric             }
12150eae32dcSDimitry Andric           }
12160eae32dcSDimitry Andric         }
12170eae32dcSDimitry Andric       }
12180eae32dcSDimitry Andric     }
12190eae32dcSDimitry Andric   }
12200eae32dcSDimitry Andric 
1221*0fca6ea1SDimitry Andric   // For keeping track of sub_ids for an absolute KMP_HW_SUBSET
1222*0fca6ea1SDimitry Andric   // or core attributes (core type or efficiency)
1223*0fca6ea1SDimitry Andric   int prev_sub_ids[KMP_HW_LAST];
1224*0fca6ea1SDimitry Andric   int abs_sub_ids[KMP_HW_LAST];
1225*0fca6ea1SDimitry Andric   int core_eff_sub_ids[KMP_HW_MAX_NUM_CORE_EFFS];
1226*0fca6ea1SDimitry Andric   int core_type_sub_ids[KMP_HW_MAX_NUM_CORE_TYPES];
1227*0fca6ea1SDimitry Andric   for (size_t i = 0; i < KMP_HW_LAST; ++i) {
1228*0fca6ea1SDimitry Andric     abs_sub_ids[i] = -1;
1229*0fca6ea1SDimitry Andric     prev_sub_ids[i] = -1;
1230*0fca6ea1SDimitry Andric   }
1231*0fca6ea1SDimitry Andric   for (size_t i = 0; i < KMP_HW_MAX_NUM_CORE_EFFS; ++i)
1232*0fca6ea1SDimitry Andric     core_eff_sub_ids[i] = -1;
1233*0fca6ea1SDimitry Andric   for (size_t i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i)
1234*0fca6ea1SDimitry Andric     core_type_sub_ids[i] = -1;
1235*0fca6ea1SDimitry Andric 
1236*0fca6ea1SDimitry Andric   // Determine which hardware threads should be filtered.
1237*0fca6ea1SDimitry Andric 
1238*0fca6ea1SDimitry Andric   // Helpful to determine if a topology layer is targeted by an absolute subset
1239*0fca6ea1SDimitry Andric   auto is_targeted = [&](int level) {
1240*0fca6ea1SDimitry Andric     if (is_absolute) {
1241*0fca6ea1SDimitry Andric       for (int i = 0; i < hw_subset_depth; ++i)
1242*0fca6ea1SDimitry Andric         if (topology_levels[i] == level)
1243*0fca6ea1SDimitry Andric           return true;
1244*0fca6ea1SDimitry Andric       return false;
1245*0fca6ea1SDimitry Andric     }
1246*0fca6ea1SDimitry Andric     // If not absolute KMP_HW_SUBSET, then every layer is seen as targeted
1247*0fca6ea1SDimitry Andric     return true;
1248*0fca6ea1SDimitry Andric   };
1249*0fca6ea1SDimitry Andric 
1250*0fca6ea1SDimitry Andric   // Helpful to index into core type sub Ids array
1251*0fca6ea1SDimitry Andric   auto get_core_type_index = [](const kmp_hw_thread_t &t) {
12520eae32dcSDimitry Andric     switch (t.attrs.get_core_type()) {
12537a6dacacSDimitry Andric     case KMP_HW_CORE_TYPE_UNKNOWN:
12547a6dacacSDimitry Andric     case KMP_HW_MAX_NUM_CORE_TYPES:
12557a6dacacSDimitry Andric       return 0;
12560eae32dcSDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
12570eae32dcSDimitry Andric     case KMP_HW_CORE_TYPE_ATOM:
12580eae32dcSDimitry Andric       return 1;
12590eae32dcSDimitry Andric     case KMP_HW_CORE_TYPE_CORE:
12600eae32dcSDimitry Andric       return 2;
12610eae32dcSDimitry Andric #endif
12620eae32dcSDimitry Andric     }
12637a6dacacSDimitry Andric     KMP_ASSERT2(false, "Unhandled kmp_hw_thread_t enumeration");
12647a6dacacSDimitry Andric     KMP_BUILTIN_UNREACHABLE;
12650eae32dcSDimitry Andric   };
1266*0fca6ea1SDimitry Andric 
1267*0fca6ea1SDimitry Andric   // Helpful to index into core efficiencies sub Ids array
1268*0fca6ea1SDimitry Andric   auto get_core_eff_index = [](const kmp_hw_thread_t &t) {
12690eae32dcSDimitry Andric     return t.attrs.get_core_eff();
12700eae32dcSDimitry Andric   };
12710eae32dcSDimitry Andric 
12720eae32dcSDimitry Andric   int num_filtered = 0;
12735f757f3fSDimitry Andric   kmp_affin_mask_t *filtered_mask;
12745f757f3fSDimitry Andric   KMP_CPU_ALLOC(filtered_mask);
12755f757f3fSDimitry Andric   KMP_CPU_COPY(filtered_mask, __kmp_affin_fullMask);
1276fe6060f1SDimitry Andric   for (int i = 0; i < num_hw_threads; ++i) {
1277fe6060f1SDimitry Andric     kmp_hw_thread_t &hw_thread = hw_threads[i];
1278*0fca6ea1SDimitry Andric 
1279*0fca6ea1SDimitry Andric     // Figure out the absolute sub ids and core eff/type sub ids
1280*0fca6ea1SDimitry Andric     if (is_absolute || using_core_effs || using_core_types) {
1281*0fca6ea1SDimitry Andric       for (int level = 0; level < get_depth(); ++level) {
1282*0fca6ea1SDimitry Andric         if (hw_thread.sub_ids[level] != prev_sub_ids[level]) {
1283*0fca6ea1SDimitry Andric           bool found_targeted = false;
1284*0fca6ea1SDimitry Andric           for (int j = level; j < get_depth(); ++j) {
1285*0fca6ea1SDimitry Andric             bool targeted = is_targeted(j);
1286*0fca6ea1SDimitry Andric             if (!found_targeted && targeted) {
1287*0fca6ea1SDimitry Andric               found_targeted = true;
1288*0fca6ea1SDimitry Andric               abs_sub_ids[j]++;
1289*0fca6ea1SDimitry Andric               if (j == core_level && using_core_effs)
1290*0fca6ea1SDimitry Andric                 core_eff_sub_ids[get_core_eff_index(hw_thread)]++;
1291*0fca6ea1SDimitry Andric               if (j == core_level && using_core_types)
1292*0fca6ea1SDimitry Andric                 core_type_sub_ids[get_core_type_index(hw_thread)]++;
1293*0fca6ea1SDimitry Andric             } else if (targeted) {
1294*0fca6ea1SDimitry Andric               abs_sub_ids[j] = 0;
1295*0fca6ea1SDimitry Andric               if (j == core_level && using_core_effs)
1296*0fca6ea1SDimitry Andric                 core_eff_sub_ids[get_core_eff_index(hw_thread)] = 0;
1297*0fca6ea1SDimitry Andric               if (j == core_level && using_core_types)
1298*0fca6ea1SDimitry Andric                 core_type_sub_ids[get_core_type_index(hw_thread)] = 0;
1299*0fca6ea1SDimitry Andric             }
1300*0fca6ea1SDimitry Andric           }
1301*0fca6ea1SDimitry Andric           break;
1302*0fca6ea1SDimitry Andric         }
1303*0fca6ea1SDimitry Andric       }
1304*0fca6ea1SDimitry Andric       for (int level = 0; level < get_depth(); ++level)
1305*0fca6ea1SDimitry Andric         prev_sub_ids[level] = hw_thread.sub_ids[level];
1306*0fca6ea1SDimitry Andric     }
13070eae32dcSDimitry Andric 
1308fe6060f1SDimitry Andric     // Check to see if this hardware thread should be filtered
1309fe6060f1SDimitry Andric     bool should_be_filtered = false;
13100eae32dcSDimitry Andric     for (int hw_subset_index = 0; hw_subset_index < hw_subset_depth;
13110eae32dcSDimitry Andric          ++hw_subset_index) {
13120eae32dcSDimitry Andric       const auto &hw_subset_item = __kmp_hw_subset->at(hw_subset_index);
13130eae32dcSDimitry Andric       int level = topology_levels[hw_subset_index];
13140eae32dcSDimitry Andric       if (level == -1)
1315fe6060f1SDimitry Andric         continue;
13160eae32dcSDimitry Andric       if ((using_core_effs || using_core_types) && level == core_level) {
13170eae32dcSDimitry Andric         // Look for the core attribute in KMP_HW_SUBSET which corresponds
13180eae32dcSDimitry Andric         // to this hardware thread's core attribute. Use this num,offset plus
13190eae32dcSDimitry Andric         // the running sub_id for the particular core attribute of this hardware
13200eae32dcSDimitry Andric         // thread to determine if the hardware thread should be filtered or not.
13210eae32dcSDimitry Andric         int attr_idx;
13220eae32dcSDimitry Andric         kmp_hw_core_type_t core_type = hw_thread.attrs.get_core_type();
13230eae32dcSDimitry Andric         int core_eff = hw_thread.attrs.get_core_eff();
13240eae32dcSDimitry Andric         for (attr_idx = 0; attr_idx < hw_subset_item.num_attrs; ++attr_idx) {
13250eae32dcSDimitry Andric           if (using_core_types &&
13260eae32dcSDimitry Andric               hw_subset_item.attr[attr_idx].get_core_type() == core_type)
13270eae32dcSDimitry Andric             break;
13280eae32dcSDimitry Andric           if (using_core_effs &&
13290eae32dcSDimitry Andric               hw_subset_item.attr[attr_idx].get_core_eff() == core_eff)
13300eae32dcSDimitry Andric             break;
13310eae32dcSDimitry Andric         }
13320eae32dcSDimitry Andric         // This core attribute isn't in the KMP_HW_SUBSET so always filter it.
13330eae32dcSDimitry Andric         if (attr_idx == hw_subset_item.num_attrs) {
13340eae32dcSDimitry Andric           should_be_filtered = true;
13350eae32dcSDimitry Andric           break;
13360eae32dcSDimitry Andric         }
13370eae32dcSDimitry Andric         int sub_id;
13380eae32dcSDimitry Andric         int num = hw_subset_item.num[attr_idx];
13390eae32dcSDimitry Andric         int offset = hw_subset_item.offset[attr_idx];
13400eae32dcSDimitry Andric         if (using_core_types)
1341*0fca6ea1SDimitry Andric           sub_id = core_type_sub_ids[get_core_type_index(hw_thread)];
13420eae32dcSDimitry Andric         else
1343*0fca6ea1SDimitry Andric           sub_id = core_eff_sub_ids[get_core_eff_index(hw_thread)];
13440eae32dcSDimitry Andric         if (sub_id < offset ||
13450eae32dcSDimitry Andric             (num != kmp_hw_subset_t::USE_ALL && sub_id >= offset + num)) {
13460eae32dcSDimitry Andric           should_be_filtered = true;
13470eae32dcSDimitry Andric           break;
13480eae32dcSDimitry Andric         }
13490eae32dcSDimitry Andric       } else {
1350*0fca6ea1SDimitry Andric         int sub_id;
13510eae32dcSDimitry Andric         int num = hw_subset_item.num[0];
13520eae32dcSDimitry Andric         int offset = hw_subset_item.offset[0];
1353*0fca6ea1SDimitry Andric         if (is_absolute)
1354*0fca6ea1SDimitry Andric           sub_id = abs_sub_ids[level];
1355*0fca6ea1SDimitry Andric         else
1356*0fca6ea1SDimitry Andric           sub_id = hw_thread.sub_ids[level];
1357*0fca6ea1SDimitry Andric         if (sub_id < offset ||
1358*0fca6ea1SDimitry Andric             (num != kmp_hw_subset_t::USE_ALL && sub_id >= offset + num)) {
1359fe6060f1SDimitry Andric           should_be_filtered = true;
1360fe6060f1SDimitry Andric           break;
1361fe6060f1SDimitry Andric         }
1362fe6060f1SDimitry Andric       }
13630eae32dcSDimitry Andric     }
13640eae32dcSDimitry Andric     // Collect filtering information
13655f757f3fSDimitry Andric     if (should_be_filtered) {
13665f757f3fSDimitry Andric       KMP_CPU_CLR(hw_thread.os_id, filtered_mask);
13670eae32dcSDimitry Andric       num_filtered++;
13680eae32dcSDimitry Andric     }
13695f757f3fSDimitry Andric   }
13700eae32dcSDimitry Andric 
13710eae32dcSDimitry Andric   // One last check that we shouldn't allow filtering entire machine
13720eae32dcSDimitry Andric   if (num_filtered == num_hw_threads) {
1373bdd1243dSDimitry Andric     KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetAllFiltered);
13740eae32dcSDimitry Andric     return false;
13750eae32dcSDimitry Andric   }
13760eae32dcSDimitry Andric 
13770eae32dcSDimitry Andric   // Apply the filter
13785f757f3fSDimitry Andric   restrict_to_mask(filtered_mask);
1379fe6060f1SDimitry Andric   return true;
1380fe6060f1SDimitry Andric }
1381fe6060f1SDimitry Andric 
is_close(int hwt1,int hwt2,const kmp_affinity_t & stgs) const13825f757f3fSDimitry Andric bool kmp_topology_t::is_close(int hwt1, int hwt2,
13835f757f3fSDimitry Andric                               const kmp_affinity_t &stgs) const {
13845f757f3fSDimitry Andric   int hw_level = stgs.gran_levels;
1385fe6060f1SDimitry Andric   if (hw_level >= depth)
1386fe6060f1SDimitry Andric     return true;
1387fe6060f1SDimitry Andric   bool retval = true;
1388fe6060f1SDimitry Andric   const kmp_hw_thread_t &t1 = hw_threads[hwt1];
1389fe6060f1SDimitry Andric   const kmp_hw_thread_t &t2 = hw_threads[hwt2];
13905f757f3fSDimitry Andric   if (stgs.flags.core_types_gran)
13915f757f3fSDimitry Andric     return t1.attrs.get_core_type() == t2.attrs.get_core_type();
13925f757f3fSDimitry Andric   if (stgs.flags.core_effs_gran)
13935f757f3fSDimitry Andric     return t1.attrs.get_core_eff() == t2.attrs.get_core_eff();
1394fe6060f1SDimitry Andric   for (int i = 0; i < (depth - hw_level); ++i) {
1395fe6060f1SDimitry Andric     if (t1.ids[i] != t2.ids[i])
1396fe6060f1SDimitry Andric       return false;
1397fe6060f1SDimitry Andric   }
1398fe6060f1SDimitry Andric   return retval;
1399fe6060f1SDimitry Andric }
1400fe6060f1SDimitry Andric 
1401fe6060f1SDimitry Andric ////////////////////////////////////////////////////////////////////////////////
1402fe6060f1SDimitry Andric 
14030b57cec5SDimitry Andric bool KMPAffinity::picked_api = false;
14040b57cec5SDimitry Andric 
operator new(size_t n)14050b57cec5SDimitry Andric void *KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); }
operator new[](size_t n)14060b57cec5SDimitry Andric void *KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n); }
operator delete(void * p)14070b57cec5SDimitry Andric void KMPAffinity::Mask::operator delete(void *p) { __kmp_free(p); }
operator delete[](void * p)14080b57cec5SDimitry Andric void KMPAffinity::Mask::operator delete[](void *p) { __kmp_free(p); }
operator new(size_t n)14090b57cec5SDimitry Andric void *KMPAffinity::operator new(size_t n) { return __kmp_allocate(n); }
operator delete(void * p)14100b57cec5SDimitry Andric void KMPAffinity::operator delete(void *p) { __kmp_free(p); }
14110b57cec5SDimitry Andric 
pick_api()14120b57cec5SDimitry Andric void KMPAffinity::pick_api() {
14130b57cec5SDimitry Andric   KMPAffinity *affinity_dispatch;
14140b57cec5SDimitry Andric   if (picked_api)
14150b57cec5SDimitry Andric     return;
14160b57cec5SDimitry Andric #if KMP_USE_HWLOC
14170b57cec5SDimitry Andric   // Only use Hwloc if affinity isn't explicitly disabled and
14180b57cec5SDimitry Andric   // user requests Hwloc topology method
14190b57cec5SDimitry Andric   if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
1420bdd1243dSDimitry Andric       __kmp_affinity.type != affinity_disabled) {
14210b57cec5SDimitry Andric     affinity_dispatch = new KMPHwlocAffinity();
14220b57cec5SDimitry Andric   } else
14230b57cec5SDimitry Andric #endif
14240b57cec5SDimitry Andric   {
14250b57cec5SDimitry Andric     affinity_dispatch = new KMPNativeAffinity();
14260b57cec5SDimitry Andric   }
14270b57cec5SDimitry Andric   __kmp_affinity_dispatch = affinity_dispatch;
14280b57cec5SDimitry Andric   picked_api = true;
14290b57cec5SDimitry Andric }
14300b57cec5SDimitry Andric 
destroy_api()14310b57cec5SDimitry Andric void KMPAffinity::destroy_api() {
14320b57cec5SDimitry Andric   if (__kmp_affinity_dispatch != NULL) {
14330b57cec5SDimitry Andric     delete __kmp_affinity_dispatch;
14340b57cec5SDimitry Andric     __kmp_affinity_dispatch = NULL;
14350b57cec5SDimitry Andric     picked_api = false;
14360b57cec5SDimitry Andric   }
14370b57cec5SDimitry Andric }
14380b57cec5SDimitry Andric 
14390b57cec5SDimitry Andric #define KMP_ADVANCE_SCAN(scan)                                                 \
14400b57cec5SDimitry Andric   while (*scan != '\0') {                                                      \
14410b57cec5SDimitry Andric     scan++;                                                                    \
14420b57cec5SDimitry Andric   }
14430b57cec5SDimitry Andric 
14440b57cec5SDimitry Andric // Print the affinity mask to the character array in a pretty format.
14450b57cec5SDimitry Andric // The format is a comma separated list of non-negative integers or integer
14460b57cec5SDimitry Andric // ranges: e.g., 1,2,3-5,7,9-15
14470b57cec5SDimitry Andric // The format can also be the string "{<empty>}" if no bits are set in mask
__kmp_affinity_print_mask(char * buf,int buf_len,kmp_affin_mask_t * mask)14480b57cec5SDimitry Andric char *__kmp_affinity_print_mask(char *buf, int buf_len,
14490b57cec5SDimitry Andric                                 kmp_affin_mask_t *mask) {
14500b57cec5SDimitry Andric   int start = 0, finish = 0, previous = 0;
14510b57cec5SDimitry Andric   bool first_range;
14520b57cec5SDimitry Andric   KMP_ASSERT(buf);
14530b57cec5SDimitry Andric   KMP_ASSERT(buf_len >= 40);
14540b57cec5SDimitry Andric   KMP_ASSERT(mask);
14550b57cec5SDimitry Andric   char *scan = buf;
14560b57cec5SDimitry Andric   char *end = buf + buf_len - 1;
14570b57cec5SDimitry Andric 
14580b57cec5SDimitry Andric   // Check for empty set.
14590b57cec5SDimitry Andric   if (mask->begin() == mask->end()) {
14600b57cec5SDimitry Andric     KMP_SNPRINTF(scan, end - scan + 1, "{<empty>}");
14610b57cec5SDimitry Andric     KMP_ADVANCE_SCAN(scan);
14620b57cec5SDimitry Andric     KMP_ASSERT(scan <= end);
14630b57cec5SDimitry Andric     return buf;
14640b57cec5SDimitry Andric   }
14650b57cec5SDimitry Andric 
14660b57cec5SDimitry Andric   first_range = true;
14670b57cec5SDimitry Andric   start = mask->begin();
14680b57cec5SDimitry Andric   while (1) {
14690b57cec5SDimitry Andric     // Find next range
14700b57cec5SDimitry Andric     // [start, previous] is inclusive range of contiguous bits in mask
14710b57cec5SDimitry Andric     for (finish = mask->next(start), previous = start;
14720b57cec5SDimitry Andric          finish == previous + 1 && finish != mask->end();
14730b57cec5SDimitry Andric          finish = mask->next(finish)) {
14740b57cec5SDimitry Andric       previous = finish;
14750b57cec5SDimitry Andric     }
14760b57cec5SDimitry Andric 
14770b57cec5SDimitry Andric     // The first range does not need a comma printed before it, but the rest
14780b57cec5SDimitry Andric     // of the ranges do need a comma beforehand
14790b57cec5SDimitry Andric     if (!first_range) {
14800b57cec5SDimitry Andric       KMP_SNPRINTF(scan, end - scan + 1, "%s", ",");
14810b57cec5SDimitry Andric       KMP_ADVANCE_SCAN(scan);
14820b57cec5SDimitry Andric     } else {
14830b57cec5SDimitry Andric       first_range = false;
14840b57cec5SDimitry Andric     }
14850b57cec5SDimitry Andric     // Range with three or more contiguous bits in the affinity mask
14860b57cec5SDimitry Andric     if (previous - start > 1) {
1487e8d8bef9SDimitry Andric       KMP_SNPRINTF(scan, end - scan + 1, "%u-%u", start, previous);
14880b57cec5SDimitry Andric     } else {
14890b57cec5SDimitry Andric       // Range with one or two contiguous bits in the affinity mask
1490e8d8bef9SDimitry Andric       KMP_SNPRINTF(scan, end - scan + 1, "%u", start);
14910b57cec5SDimitry Andric       KMP_ADVANCE_SCAN(scan);
14920b57cec5SDimitry Andric       if (previous - start > 0) {
1493e8d8bef9SDimitry Andric         KMP_SNPRINTF(scan, end - scan + 1, ",%u", previous);
14940b57cec5SDimitry Andric       }
14950b57cec5SDimitry Andric     }
14960b57cec5SDimitry Andric     KMP_ADVANCE_SCAN(scan);
14970b57cec5SDimitry Andric     // Start over with new start point
14980b57cec5SDimitry Andric     start = finish;
14990b57cec5SDimitry Andric     if (start == mask->end())
15000b57cec5SDimitry Andric       break;
15010b57cec5SDimitry Andric     // Check for overflow
15020b57cec5SDimitry Andric     if (end - scan < 2)
15030b57cec5SDimitry Andric       break;
15040b57cec5SDimitry Andric   }
15050b57cec5SDimitry Andric 
15060b57cec5SDimitry Andric   // Check for overflow
15070b57cec5SDimitry Andric   KMP_ASSERT(scan <= end);
15080b57cec5SDimitry Andric   return buf;
15090b57cec5SDimitry Andric }
15100b57cec5SDimitry Andric #undef KMP_ADVANCE_SCAN
15110b57cec5SDimitry Andric 
15120b57cec5SDimitry Andric // Print the affinity mask to the string buffer object in a pretty format
15130b57cec5SDimitry Andric // The format is a comma separated list of non-negative integers or integer
15140b57cec5SDimitry Andric // ranges: e.g., 1,2,3-5,7,9-15
15150b57cec5SDimitry Andric // The format can also be the string "{<empty>}" if no bits are set in mask
__kmp_affinity_str_buf_mask(kmp_str_buf_t * buf,kmp_affin_mask_t * mask)15160b57cec5SDimitry Andric kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
15170b57cec5SDimitry Andric                                            kmp_affin_mask_t *mask) {
15180b57cec5SDimitry Andric   int start = 0, finish = 0, previous = 0;
15190b57cec5SDimitry Andric   bool first_range;
15200b57cec5SDimitry Andric   KMP_ASSERT(buf);
15210b57cec5SDimitry Andric   KMP_ASSERT(mask);
15220b57cec5SDimitry Andric 
15230b57cec5SDimitry Andric   __kmp_str_buf_clear(buf);
15240b57cec5SDimitry Andric 
15250b57cec5SDimitry Andric   // Check for empty set.
15260b57cec5SDimitry Andric   if (mask->begin() == mask->end()) {
15270b57cec5SDimitry Andric     __kmp_str_buf_print(buf, "%s", "{<empty>}");
15280b57cec5SDimitry Andric     return buf;
15290b57cec5SDimitry Andric   }
15300b57cec5SDimitry Andric 
15310b57cec5SDimitry Andric   first_range = true;
15320b57cec5SDimitry Andric   start = mask->begin();
15330b57cec5SDimitry Andric   while (1) {
15340b57cec5SDimitry Andric     // Find next range
15350b57cec5SDimitry Andric     // [start, previous] is inclusive range of contiguous bits in mask
15360b57cec5SDimitry Andric     for (finish = mask->next(start), previous = start;
15370b57cec5SDimitry Andric          finish == previous + 1 && finish != mask->end();
15380b57cec5SDimitry Andric          finish = mask->next(finish)) {
15390b57cec5SDimitry Andric       previous = finish;
15400b57cec5SDimitry Andric     }
15410b57cec5SDimitry Andric 
15420b57cec5SDimitry Andric     // The first range does not need a comma printed before it, but the rest
15430b57cec5SDimitry Andric     // of the ranges do need a comma beforehand
15440b57cec5SDimitry Andric     if (!first_range) {
15450b57cec5SDimitry Andric       __kmp_str_buf_print(buf, "%s", ",");
15460b57cec5SDimitry Andric     } else {
15470b57cec5SDimitry Andric       first_range = false;
15480b57cec5SDimitry Andric     }
15490b57cec5SDimitry Andric     // Range with three or more contiguous bits in the affinity mask
15500b57cec5SDimitry Andric     if (previous - start > 1) {
1551e8d8bef9SDimitry Andric       __kmp_str_buf_print(buf, "%u-%u", start, previous);
15520b57cec5SDimitry Andric     } else {
15530b57cec5SDimitry Andric       // Range with one or two contiguous bits in the affinity mask
1554e8d8bef9SDimitry Andric       __kmp_str_buf_print(buf, "%u", start);
15550b57cec5SDimitry Andric       if (previous - start > 0) {
1556e8d8bef9SDimitry Andric         __kmp_str_buf_print(buf, ",%u", previous);
15570b57cec5SDimitry Andric       }
15580b57cec5SDimitry Andric     }
15590b57cec5SDimitry Andric     // Start over with new start point
15600b57cec5SDimitry Andric     start = finish;
15610b57cec5SDimitry Andric     if (start == mask->end())
15620b57cec5SDimitry Andric       break;
15630b57cec5SDimitry Andric   }
15640b57cec5SDimitry Andric   return buf;
15650b57cec5SDimitry Andric }
15660b57cec5SDimitry Andric 
1567349cc55cSDimitry Andric // Return (possibly empty) affinity mask representing the offline CPUs
1568349cc55cSDimitry Andric // Caller must free the mask
__kmp_affinity_get_offline_cpus()1569349cc55cSDimitry Andric kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
1570349cc55cSDimitry Andric   kmp_affin_mask_t *offline;
1571349cc55cSDimitry Andric   KMP_CPU_ALLOC(offline);
1572349cc55cSDimitry Andric   KMP_CPU_ZERO(offline);
1573349cc55cSDimitry Andric #if KMP_OS_LINUX
1574349cc55cSDimitry Andric   int n, begin_cpu, end_cpu;
1575349cc55cSDimitry Andric   kmp_safe_raii_file_t offline_file;
1576349cc55cSDimitry Andric   auto skip_ws = [](FILE *f) {
1577349cc55cSDimitry Andric     int c;
1578349cc55cSDimitry Andric     do {
1579349cc55cSDimitry Andric       c = fgetc(f);
1580349cc55cSDimitry Andric     } while (isspace(c));
1581349cc55cSDimitry Andric     if (c != EOF)
1582349cc55cSDimitry Andric       ungetc(c, f);
1583349cc55cSDimitry Andric   };
1584349cc55cSDimitry Andric   // File contains CSV of integer ranges representing the offline CPUs
1585349cc55cSDimitry Andric   // e.g., 1,2,4-7,9,11-15
1586349cc55cSDimitry Andric   int status = offline_file.try_open("/sys/devices/system/cpu/offline", "r");
1587349cc55cSDimitry Andric   if (status != 0)
1588349cc55cSDimitry Andric     return offline;
1589349cc55cSDimitry Andric   while (!feof(offline_file)) {
1590349cc55cSDimitry Andric     skip_ws(offline_file);
1591349cc55cSDimitry Andric     n = fscanf(offline_file, "%d", &begin_cpu);
1592349cc55cSDimitry Andric     if (n != 1)
1593349cc55cSDimitry Andric       break;
1594349cc55cSDimitry Andric     skip_ws(offline_file);
1595349cc55cSDimitry Andric     int c = fgetc(offline_file);
1596349cc55cSDimitry Andric     if (c == EOF || c == ',') {
1597349cc55cSDimitry Andric       // Just single CPU
1598349cc55cSDimitry Andric       end_cpu = begin_cpu;
1599349cc55cSDimitry Andric     } else if (c == '-') {
1600349cc55cSDimitry Andric       // Range of CPUs
1601349cc55cSDimitry Andric       skip_ws(offline_file);
1602349cc55cSDimitry Andric       n = fscanf(offline_file, "%d", &end_cpu);
1603349cc55cSDimitry Andric       if (n != 1)
1604349cc55cSDimitry Andric         break;
1605349cc55cSDimitry Andric       skip_ws(offline_file);
1606349cc55cSDimitry Andric       c = fgetc(offline_file); // skip ','
1607349cc55cSDimitry Andric     } else {
1608349cc55cSDimitry Andric       // Syntax problem
1609349cc55cSDimitry Andric       break;
1610349cc55cSDimitry Andric     }
1611349cc55cSDimitry Andric     // Ensure a valid range of CPUs
1612349cc55cSDimitry Andric     if (begin_cpu < 0 || begin_cpu >= __kmp_xproc || end_cpu < 0 ||
1613349cc55cSDimitry Andric         end_cpu >= __kmp_xproc || begin_cpu > end_cpu) {
1614349cc55cSDimitry Andric       continue;
1615349cc55cSDimitry Andric     }
1616349cc55cSDimitry Andric     // Insert [begin_cpu, end_cpu] into offline mask
1617349cc55cSDimitry Andric     for (int cpu = begin_cpu; cpu <= end_cpu; ++cpu) {
1618349cc55cSDimitry Andric       KMP_CPU_SET(cpu, offline);
1619349cc55cSDimitry Andric     }
1620349cc55cSDimitry Andric   }
1621349cc55cSDimitry Andric #endif
1622349cc55cSDimitry Andric   return offline;
1623349cc55cSDimitry Andric }
1624349cc55cSDimitry Andric 
1625349cc55cSDimitry Andric // Return the number of available procs
__kmp_affinity_entire_machine_mask(kmp_affin_mask_t * mask)1626349cc55cSDimitry Andric int __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
1627349cc55cSDimitry Andric   int avail_proc = 0;
16280b57cec5SDimitry Andric   KMP_CPU_ZERO(mask);
16290b57cec5SDimitry Andric 
16300b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
16310b57cec5SDimitry Andric 
16320b57cec5SDimitry Andric   if (__kmp_num_proc_groups > 1) {
16330b57cec5SDimitry Andric     int group;
16340b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
16350b57cec5SDimitry Andric     for (group = 0; group < __kmp_num_proc_groups; group++) {
16360b57cec5SDimitry Andric       int i;
16370b57cec5SDimitry Andric       int num = __kmp_GetActiveProcessorCount(group);
16380b57cec5SDimitry Andric       for (i = 0; i < num; i++) {
16390b57cec5SDimitry Andric         KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
1640349cc55cSDimitry Andric         avail_proc++;
16410b57cec5SDimitry Andric       }
16420b57cec5SDimitry Andric     }
16430b57cec5SDimitry Andric   } else
16440b57cec5SDimitry Andric 
16450b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */
16460b57cec5SDimitry Andric 
16470b57cec5SDimitry Andric   {
16480b57cec5SDimitry Andric     int proc;
1649349cc55cSDimitry Andric     kmp_affin_mask_t *offline_cpus = __kmp_affinity_get_offline_cpus();
16500b57cec5SDimitry Andric     for (proc = 0; proc < __kmp_xproc; proc++) {
1651349cc55cSDimitry Andric       // Skip offline CPUs
1652349cc55cSDimitry Andric       if (KMP_CPU_ISSET(proc, offline_cpus))
1653349cc55cSDimitry Andric         continue;
16540b57cec5SDimitry Andric       KMP_CPU_SET(proc, mask);
1655349cc55cSDimitry Andric       avail_proc++;
16560b57cec5SDimitry Andric     }
1657349cc55cSDimitry Andric     KMP_CPU_FREE(offline_cpus);
16580b57cec5SDimitry Andric   }
1659349cc55cSDimitry Andric 
1660349cc55cSDimitry Andric   return avail_proc;
16610b57cec5SDimitry Andric }
16620b57cec5SDimitry Andric 
1663fe6060f1SDimitry Andric // All of the __kmp_affinity_create_*_map() routines should allocate the
1664fe6060f1SDimitry Andric // internal topology object and set the layer ids for it.  Each routine
1665fe6060f1SDimitry Andric // returns a boolean on whether it was successful at doing so.
16660b57cec5SDimitry Andric kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
1667fcaf7f86SDimitry Andric // Original mask is a subset of full mask in multiple processor groups topology
1668fcaf7f86SDimitry Andric kmp_affin_mask_t *__kmp_affin_origMask = NULL;
16690b57cec5SDimitry Andric 
16700b57cec5SDimitry Andric #if KMP_USE_HWLOC
__kmp_hwloc_is_cache_type(hwloc_obj_t obj)1671fe6060f1SDimitry Andric static inline bool __kmp_hwloc_is_cache_type(hwloc_obj_t obj) {
1672fe6060f1SDimitry Andric #if HWLOC_API_VERSION >= 0x00020000
1673fe6060f1SDimitry Andric   return hwloc_obj_type_is_cache(obj->type);
1674fe6060f1SDimitry Andric #else
1675fe6060f1SDimitry Andric   return obj->type == HWLOC_OBJ_CACHE;
1676fe6060f1SDimitry Andric #endif
16770b57cec5SDimitry Andric }
16780b57cec5SDimitry Andric 
1679fe6060f1SDimitry Andric // Returns KMP_HW_* type derived from HWLOC_* type
__kmp_hwloc_type_2_topology_type(hwloc_obj_t obj)1680fe6060f1SDimitry Andric static inline kmp_hw_t __kmp_hwloc_type_2_topology_type(hwloc_obj_t obj) {
16810b57cec5SDimitry Andric 
1682fe6060f1SDimitry Andric   if (__kmp_hwloc_is_cache_type(obj)) {
1683fe6060f1SDimitry Andric     if (obj->attr->cache.type == HWLOC_OBJ_CACHE_INSTRUCTION)
1684fe6060f1SDimitry Andric       return KMP_HW_UNKNOWN;
1685fe6060f1SDimitry Andric     switch (obj->attr->cache.depth) {
1686fe6060f1SDimitry Andric     case 1:
1687fe6060f1SDimitry Andric       return KMP_HW_L1;
1688fe6060f1SDimitry Andric     case 2:
1689fe6060f1SDimitry Andric #if KMP_MIC_SUPPORTED
1690fe6060f1SDimitry Andric       if (__kmp_mic_type == mic3) {
1691fe6060f1SDimitry Andric         return KMP_HW_TILE;
16920b57cec5SDimitry Andric       }
1693fe6060f1SDimitry Andric #endif
1694fe6060f1SDimitry Andric       return KMP_HW_L2;
1695fe6060f1SDimitry Andric     case 3:
1696fe6060f1SDimitry Andric       return KMP_HW_L3;
16970b57cec5SDimitry Andric     }
1698fe6060f1SDimitry Andric     return KMP_HW_UNKNOWN;
16990b57cec5SDimitry Andric   }
1700fe6060f1SDimitry Andric 
1701fe6060f1SDimitry Andric   switch (obj->type) {
1702fe6060f1SDimitry Andric   case HWLOC_OBJ_PACKAGE:
1703fe6060f1SDimitry Andric     return KMP_HW_SOCKET;
1704fe6060f1SDimitry Andric   case HWLOC_OBJ_NUMANODE:
1705fe6060f1SDimitry Andric     return KMP_HW_NUMA;
1706fe6060f1SDimitry Andric   case HWLOC_OBJ_CORE:
1707fe6060f1SDimitry Andric     return KMP_HW_CORE;
1708fe6060f1SDimitry Andric   case HWLOC_OBJ_PU:
1709fe6060f1SDimitry Andric     return KMP_HW_THREAD;
1710fe6060f1SDimitry Andric   case HWLOC_OBJ_GROUP:
1711bdd1243dSDimitry Andric #if HWLOC_API_VERSION >= 0x00020000
1712fe6060f1SDimitry Andric     if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_DIE)
1713fe6060f1SDimitry Andric       return KMP_HW_DIE;
1714fe6060f1SDimitry Andric     else if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_TILE)
1715fe6060f1SDimitry Andric       return KMP_HW_TILE;
1716fe6060f1SDimitry Andric     else if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_MODULE)
1717fe6060f1SDimitry Andric       return KMP_HW_MODULE;
1718fe6060f1SDimitry Andric     else if (obj->attr->group.kind == HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP)
1719fe6060f1SDimitry Andric       return KMP_HW_PROC_GROUP;
1720bdd1243dSDimitry Andric #endif
1721fe6060f1SDimitry Andric     return KMP_HW_UNKNOWN;
1722fe6060f1SDimitry Andric #if HWLOC_API_VERSION >= 0x00020100
1723fe6060f1SDimitry Andric   case HWLOC_OBJ_DIE:
1724fe6060f1SDimitry Andric     return KMP_HW_DIE;
1725fe6060f1SDimitry Andric #endif
17260b57cec5SDimitry Andric   }
1727fe6060f1SDimitry Andric   return KMP_HW_UNKNOWN;
17280b57cec5SDimitry Andric }
17290b57cec5SDimitry Andric 
17300b57cec5SDimitry Andric // Returns the number of objects of type 'type' below 'obj' within the topology
17310b57cec5SDimitry Andric // tree structure. e.g., if obj is a HWLOC_OBJ_PACKAGE object, and type is
17320b57cec5SDimitry Andric // HWLOC_OBJ_PU, then this will return the number of PU's under the SOCKET
17330b57cec5SDimitry Andric // object.
__kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,hwloc_obj_type_t type)17340b57cec5SDimitry Andric static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
17350b57cec5SDimitry Andric                                            hwloc_obj_type_t type) {
17360b57cec5SDimitry Andric   int retval = 0;
17370b57cec5SDimitry Andric   hwloc_obj_t first;
17380b57cec5SDimitry Andric   for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
17390b57cec5SDimitry Andric                                            obj->logical_index, type, 0);
1740fe6060f1SDimitry Andric        first != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology,
1741fe6060f1SDimitry Andric                                                        obj->type, first) == obj;
17420b57cec5SDimitry Andric        first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
17430b57cec5SDimitry Andric                                           first)) {
17440b57cec5SDimitry Andric     ++retval;
17450b57cec5SDimitry Andric   }
17460b57cec5SDimitry Andric   return retval;
17470b57cec5SDimitry Andric }
17480b57cec5SDimitry Andric 
1749fe6060f1SDimitry Andric // This gets the sub_id for a lower object under a higher object in the
1750fe6060f1SDimitry Andric // topology tree
__kmp_hwloc_get_sub_id(hwloc_topology_t t,hwloc_obj_t higher,hwloc_obj_t lower)1751fe6060f1SDimitry Andric static int __kmp_hwloc_get_sub_id(hwloc_topology_t t, hwloc_obj_t higher,
1752fe6060f1SDimitry Andric                                   hwloc_obj_t lower) {
1753fe6060f1SDimitry Andric   hwloc_obj_t obj;
1754fe6060f1SDimitry Andric   hwloc_obj_type_t ltype = lower->type;
1755fe6060f1SDimitry Andric   int lindex = lower->logical_index - 1;
1756fe6060f1SDimitry Andric   int sub_id = 0;
1757fe6060f1SDimitry Andric   // Get the previous lower object
1758fe6060f1SDimitry Andric   obj = hwloc_get_obj_by_type(t, ltype, lindex);
1759fe6060f1SDimitry Andric   while (obj && lindex >= 0 &&
1760fe6060f1SDimitry Andric          hwloc_bitmap_isincluded(obj->cpuset, higher->cpuset)) {
1761fe6060f1SDimitry Andric     if (obj->userdata) {
1762fe6060f1SDimitry Andric       sub_id = (int)(RCAST(kmp_intptr_t, obj->userdata));
1763fe6060f1SDimitry Andric       break;
17640b57cec5SDimitry Andric     }
1765fe6060f1SDimitry Andric     sub_id++;
1766fe6060f1SDimitry Andric     lindex--;
1767fe6060f1SDimitry Andric     obj = hwloc_get_obj_by_type(t, ltype, lindex);
1768fe6060f1SDimitry Andric   }
1769fe6060f1SDimitry Andric   // store sub_id + 1 so that 0 is differed from NULL
1770fe6060f1SDimitry Andric   lower->userdata = RCAST(void *, sub_id + 1);
1771fe6060f1SDimitry Andric   return sub_id;
17720b57cec5SDimitry Andric }
17730b57cec5SDimitry Andric 
__kmp_affinity_create_hwloc_map(kmp_i18n_id_t * const msg_id)1774fe6060f1SDimitry Andric static bool __kmp_affinity_create_hwloc_map(kmp_i18n_id_t *const msg_id) {
1775fe6060f1SDimitry Andric   kmp_hw_t type;
1776fe6060f1SDimitry Andric   int hw_thread_index, sub_id;
1777fe6060f1SDimitry Andric   int depth;
1778fe6060f1SDimitry Andric   hwloc_obj_t pu, obj, root, prev;
1779fe6060f1SDimitry Andric   kmp_hw_t types[KMP_HW_LAST];
1780fe6060f1SDimitry Andric   hwloc_obj_type_t hwloc_types[KMP_HW_LAST];
17810b57cec5SDimitry Andric 
1782fe6060f1SDimitry Andric   hwloc_topology_t tp = __kmp_hwloc_topology;
17830b57cec5SDimitry Andric   *msg_id = kmp_i18n_null;
1784bdd1243dSDimitry Andric   if (__kmp_affinity.flags.verbose) {
1785fe6060f1SDimitry Andric     KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
1786fe6060f1SDimitry Andric   }
17870b57cec5SDimitry Andric 
17880b57cec5SDimitry Andric   if (!KMP_AFFINITY_CAPABLE()) {
17890b57cec5SDimitry Andric     // Hack to try and infer the machine topology using only the data
1790fe6060f1SDimitry Andric     // available from hwloc on the current thread, and __kmp_xproc.
1791bdd1243dSDimitry Andric     KMP_ASSERT(__kmp_affinity.type == affinity_none);
1792e8d8bef9SDimitry Andric     // hwloc only guarantees existance of PU object, so check PACKAGE and CORE
1793e8d8bef9SDimitry Andric     hwloc_obj_t o = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0);
1794e8d8bef9SDimitry Andric     if (o != NULL)
1795e8d8bef9SDimitry Andric       nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(o, HWLOC_OBJ_CORE);
1796e8d8bef9SDimitry Andric     else
1797e8d8bef9SDimitry Andric       nCoresPerPkg = 1; // no PACKAGE found
1798e8d8bef9SDimitry Andric     o = hwloc_get_obj_by_type(tp, HWLOC_OBJ_CORE, 0);
1799e8d8bef9SDimitry Andric     if (o != NULL)
1800e8d8bef9SDimitry Andric       __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(o, HWLOC_OBJ_PU);
1801e8d8bef9SDimitry Andric     else
1802e8d8bef9SDimitry Andric       __kmp_nThreadsPerCore = 1; // no CORE found
1803*0fca6ea1SDimitry Andric     if (__kmp_nThreadsPerCore == 0)
1804*0fca6ea1SDimitry Andric       __kmp_nThreadsPerCore = 1;
18050b57cec5SDimitry Andric     __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1806e8d8bef9SDimitry Andric     if (nCoresPerPkg == 0)
1807e8d8bef9SDimitry Andric       nCoresPerPkg = 1; // to prevent possible division by 0
18080b57cec5SDimitry Andric     nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1809fe6060f1SDimitry Andric     return true;
18100b57cec5SDimitry Andric   }
18110b57cec5SDimitry Andric 
1812bdd1243dSDimitry Andric #if HWLOC_API_VERSION >= 0x00020400
1813349cc55cSDimitry Andric   // Handle multiple types of cores if they exist on the system
1814349cc55cSDimitry Andric   int nr_cpu_kinds = hwloc_cpukinds_get_nr(tp, 0);
1815349cc55cSDimitry Andric 
1816349cc55cSDimitry Andric   typedef struct kmp_hwloc_cpukinds_info_t {
1817349cc55cSDimitry Andric     int efficiency;
1818349cc55cSDimitry Andric     kmp_hw_core_type_t core_type;
1819349cc55cSDimitry Andric     hwloc_bitmap_t mask;
1820349cc55cSDimitry Andric   } kmp_hwloc_cpukinds_info_t;
1821349cc55cSDimitry Andric   kmp_hwloc_cpukinds_info_t *cpukinds = nullptr;
1822349cc55cSDimitry Andric 
1823349cc55cSDimitry Andric   if (nr_cpu_kinds > 0) {
1824349cc55cSDimitry Andric     unsigned nr_infos;
1825349cc55cSDimitry Andric     struct hwloc_info_s *infos;
1826349cc55cSDimitry Andric     cpukinds = (kmp_hwloc_cpukinds_info_t *)__kmp_allocate(
1827349cc55cSDimitry Andric         sizeof(kmp_hwloc_cpukinds_info_t) * nr_cpu_kinds);
1828349cc55cSDimitry Andric     for (unsigned idx = 0; idx < (unsigned)nr_cpu_kinds; ++idx) {
1829349cc55cSDimitry Andric       cpukinds[idx].efficiency = -1;
1830349cc55cSDimitry Andric       cpukinds[idx].core_type = KMP_HW_CORE_TYPE_UNKNOWN;
1831349cc55cSDimitry Andric       cpukinds[idx].mask = hwloc_bitmap_alloc();
1832349cc55cSDimitry Andric       if (hwloc_cpukinds_get_info(tp, idx, cpukinds[idx].mask,
1833349cc55cSDimitry Andric                                   &cpukinds[idx].efficiency, &nr_infos, &infos,
1834349cc55cSDimitry Andric                                   0) == 0) {
1835349cc55cSDimitry Andric         for (unsigned i = 0; i < nr_infos; ++i) {
1836349cc55cSDimitry Andric           if (__kmp_str_match("CoreType", 8, infos[i].name)) {
1837349cc55cSDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1838349cc55cSDimitry Andric             if (__kmp_str_match("IntelAtom", 9, infos[i].value)) {
1839349cc55cSDimitry Andric               cpukinds[idx].core_type = KMP_HW_CORE_TYPE_ATOM;
1840349cc55cSDimitry Andric               break;
1841349cc55cSDimitry Andric             } else if (__kmp_str_match("IntelCore", 9, infos[i].value)) {
1842349cc55cSDimitry Andric               cpukinds[idx].core_type = KMP_HW_CORE_TYPE_CORE;
1843349cc55cSDimitry Andric               break;
1844349cc55cSDimitry Andric             }
1845349cc55cSDimitry Andric #endif
1846349cc55cSDimitry Andric           }
1847349cc55cSDimitry Andric         }
1848349cc55cSDimitry Andric       }
1849349cc55cSDimitry Andric     }
1850349cc55cSDimitry Andric   }
1851bdd1243dSDimitry Andric #endif
1852349cc55cSDimitry Andric 
1853fe6060f1SDimitry Andric   root = hwloc_get_root_obj(tp);
1854fe6060f1SDimitry Andric 
1855fe6060f1SDimitry Andric   // Figure out the depth and types in the topology
1856fe6060f1SDimitry Andric   depth = 0;
1857*0fca6ea1SDimitry Andric   obj = hwloc_get_pu_obj_by_os_index(tp, __kmp_affin_fullMask->begin());
1858*0fca6ea1SDimitry Andric   while (obj && obj != root) {
1859fe6060f1SDimitry Andric #if HWLOC_API_VERSION >= 0x00020000
1860fe6060f1SDimitry Andric     if (obj->memory_arity) {
1861fe6060f1SDimitry Andric       hwloc_obj_t memory;
1862fe6060f1SDimitry Andric       for (memory = obj->memory_first_child; memory;
1863fe6060f1SDimitry Andric            memory = hwloc_get_next_child(tp, obj, memory)) {
1864fe6060f1SDimitry Andric         if (memory->type == HWLOC_OBJ_NUMANODE)
1865fe6060f1SDimitry Andric           break;
1866fe6060f1SDimitry Andric       }
1867fe6060f1SDimitry Andric       if (memory && memory->type == HWLOC_OBJ_NUMANODE) {
1868fe6060f1SDimitry Andric         types[depth] = KMP_HW_NUMA;
1869fe6060f1SDimitry Andric         hwloc_types[depth] = memory->type;
1870fe6060f1SDimitry Andric         depth++;
1871fe6060f1SDimitry Andric       }
1872fe6060f1SDimitry Andric     }
1873fe6060f1SDimitry Andric #endif
1874fe6060f1SDimitry Andric     type = __kmp_hwloc_type_2_topology_type(obj);
1875fe6060f1SDimitry Andric     if (type != KMP_HW_UNKNOWN) {
1876fe6060f1SDimitry Andric       types[depth] = type;
1877fe6060f1SDimitry Andric       hwloc_types[depth] = obj->type;
1878fe6060f1SDimitry Andric       depth++;
1879fe6060f1SDimitry Andric     }
1880*0fca6ea1SDimitry Andric     obj = obj->parent;
1881fe6060f1SDimitry Andric   }
1882fe6060f1SDimitry Andric   KMP_ASSERT(depth > 0);
1883fe6060f1SDimitry Andric 
1884fe6060f1SDimitry Andric   // Get the order for the types correct
1885fe6060f1SDimitry Andric   for (int i = 0, j = depth - 1; i < j; ++i, --j) {
1886fe6060f1SDimitry Andric     hwloc_obj_type_t hwloc_temp = hwloc_types[i];
1887fe6060f1SDimitry Andric     kmp_hw_t temp = types[i];
1888fe6060f1SDimitry Andric     types[i] = types[j];
1889fe6060f1SDimitry Andric     types[j] = temp;
1890fe6060f1SDimitry Andric     hwloc_types[i] = hwloc_types[j];
1891fe6060f1SDimitry Andric     hwloc_types[j] = hwloc_temp;
1892fe6060f1SDimitry Andric   }
18930b57cec5SDimitry Andric 
18940b57cec5SDimitry Andric   // Allocate the data structure to be returned.
1895fe6060f1SDimitry Andric   __kmp_topology = kmp_topology_t::allocate(__kmp_avail_proc, depth, types);
18960b57cec5SDimitry Andric 
1897fe6060f1SDimitry Andric   hw_thread_index = 0;
1898fe6060f1SDimitry Andric   pu = NULL;
189981ad6265SDimitry Andric   while ((pu = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, pu))) {
1900fe6060f1SDimitry Andric     int index = depth - 1;
1901fe6060f1SDimitry Andric     bool included = KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask);
1902fe6060f1SDimitry Andric     kmp_hw_thread_t &hw_thread = __kmp_topology->at(hw_thread_index);
1903fe6060f1SDimitry Andric     if (included) {
1904fe6060f1SDimitry Andric       hw_thread.clear();
1905fe6060f1SDimitry Andric       hw_thread.ids[index] = pu->logical_index;
1906fe6060f1SDimitry Andric       hw_thread.os_id = pu->os_index;
1907349cc55cSDimitry Andric       // If multiple core types, then set that attribute for the hardware thread
1908bdd1243dSDimitry Andric #if HWLOC_API_VERSION >= 0x00020400
1909349cc55cSDimitry Andric       if (cpukinds) {
1910349cc55cSDimitry Andric         int cpukind_index = -1;
1911349cc55cSDimitry Andric         for (int i = 0; i < nr_cpu_kinds; ++i) {
1912349cc55cSDimitry Andric           if (hwloc_bitmap_isset(cpukinds[i].mask, hw_thread.os_id)) {
1913349cc55cSDimitry Andric             cpukind_index = i;
1914349cc55cSDimitry Andric             break;
1915349cc55cSDimitry Andric           }
1916349cc55cSDimitry Andric         }
19170eae32dcSDimitry Andric         if (cpukind_index >= 0) {
19180eae32dcSDimitry Andric           hw_thread.attrs.set_core_type(cpukinds[cpukind_index].core_type);
19190eae32dcSDimitry Andric           hw_thread.attrs.set_core_eff(cpukinds[cpukind_index].efficiency);
19200eae32dcSDimitry Andric         }
1921349cc55cSDimitry Andric       }
1922bdd1243dSDimitry Andric #endif
1923fe6060f1SDimitry Andric       index--;
19240b57cec5SDimitry Andric     }
1925fe6060f1SDimitry Andric     obj = pu;
1926fe6060f1SDimitry Andric     prev = obj;
1927fe6060f1SDimitry Andric     while (obj != root && obj != NULL) {
1928fe6060f1SDimitry Andric       obj = obj->parent;
1929fe6060f1SDimitry Andric #if HWLOC_API_VERSION >= 0x00020000
1930fe6060f1SDimitry Andric       // NUMA Nodes are handled differently since they are not within the
1931fe6060f1SDimitry Andric       // parent/child structure anymore.  They are separate children
1932fe6060f1SDimitry Andric       // of obj (memory_first_child points to first memory child)
1933fe6060f1SDimitry Andric       if (obj->memory_arity) {
1934fe6060f1SDimitry Andric         hwloc_obj_t memory;
1935fe6060f1SDimitry Andric         for (memory = obj->memory_first_child; memory;
1936fe6060f1SDimitry Andric              memory = hwloc_get_next_child(tp, obj, memory)) {
1937fe6060f1SDimitry Andric           if (memory->type == HWLOC_OBJ_NUMANODE)
19380b57cec5SDimitry Andric             break;
1939fe6060f1SDimitry Andric         }
1940fe6060f1SDimitry Andric         if (memory && memory->type == HWLOC_OBJ_NUMANODE) {
1941fe6060f1SDimitry Andric           sub_id = __kmp_hwloc_get_sub_id(tp, memory, prev);
1942fe6060f1SDimitry Andric           if (included) {
1943fe6060f1SDimitry Andric             hw_thread.ids[index] = memory->logical_index;
1944fe6060f1SDimitry Andric             hw_thread.ids[index + 1] = sub_id;
1945fe6060f1SDimitry Andric             index--;
1946fe6060f1SDimitry Andric           }
1947fe6060f1SDimitry Andric           prev = memory;
1948fe6060f1SDimitry Andric         }
1949fe6060f1SDimitry Andric         prev = obj;
1950fe6060f1SDimitry Andric       }
1951fe6060f1SDimitry Andric #endif
1952fe6060f1SDimitry Andric       type = __kmp_hwloc_type_2_topology_type(obj);
1953fe6060f1SDimitry Andric       if (type != KMP_HW_UNKNOWN) {
1954fe6060f1SDimitry Andric         sub_id = __kmp_hwloc_get_sub_id(tp, obj, prev);
1955fe6060f1SDimitry Andric         if (included) {
1956fe6060f1SDimitry Andric           hw_thread.ids[index] = obj->logical_index;
1957fe6060f1SDimitry Andric           hw_thread.ids[index + 1] = sub_id;
1958fe6060f1SDimitry Andric           index--;
1959fe6060f1SDimitry Andric         }
1960fe6060f1SDimitry Andric         prev = obj;
19610b57cec5SDimitry Andric       }
19620b57cec5SDimitry Andric     }
1963fe6060f1SDimitry Andric     if (included)
1964fe6060f1SDimitry Andric       hw_thread_index++;
19650b57cec5SDimitry Andric   }
1966349cc55cSDimitry Andric 
1967bdd1243dSDimitry Andric #if HWLOC_API_VERSION >= 0x00020400
1968349cc55cSDimitry Andric   // Free the core types information
1969349cc55cSDimitry Andric   if (cpukinds) {
1970349cc55cSDimitry Andric     for (int idx = 0; idx < nr_cpu_kinds; ++idx)
1971349cc55cSDimitry Andric       hwloc_bitmap_free(cpukinds[idx].mask);
1972349cc55cSDimitry Andric     __kmp_free(cpukinds);
1973349cc55cSDimitry Andric   }
1974bdd1243dSDimitry Andric #endif
1975fe6060f1SDimitry Andric   __kmp_topology->sort_ids();
1976fe6060f1SDimitry Andric   return true;
19770b57cec5SDimitry Andric }
19780b57cec5SDimitry Andric #endif // KMP_USE_HWLOC
19790b57cec5SDimitry Andric 
19800b57cec5SDimitry Andric // If we don't know how to retrieve the machine's processor topology, or
19810b57cec5SDimitry Andric // encounter an error in doing so, this routine is called to form a "flat"
19820b57cec5SDimitry Andric // mapping of os thread id's <-> processor id's.
__kmp_affinity_create_flat_map(kmp_i18n_id_t * const msg_id)1983fe6060f1SDimitry Andric static bool __kmp_affinity_create_flat_map(kmp_i18n_id_t *const msg_id) {
19840b57cec5SDimitry Andric   *msg_id = kmp_i18n_null;
1985fe6060f1SDimitry Andric   int depth = 3;
1986fe6060f1SDimitry Andric   kmp_hw_t types[] = {KMP_HW_SOCKET, KMP_HW_CORE, KMP_HW_THREAD};
1987fe6060f1SDimitry Andric 
1988bdd1243dSDimitry Andric   if (__kmp_affinity.flags.verbose) {
1989fe6060f1SDimitry Andric     KMP_INFORM(UsingFlatOS, "KMP_AFFINITY");
1990fe6060f1SDimitry Andric   }
19910b57cec5SDimitry Andric 
1992bdd1243dSDimitry Andric   // Even if __kmp_affinity.type == affinity_none, this routine might still
1993bdd1243dSDimitry Andric   // be called to set __kmp_ncores, as well as
19940b57cec5SDimitry Andric   // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
19950b57cec5SDimitry Andric   if (!KMP_AFFINITY_CAPABLE()) {
1996bdd1243dSDimitry Andric     KMP_ASSERT(__kmp_affinity.type == affinity_none);
19970b57cec5SDimitry Andric     __kmp_ncores = nPackages = __kmp_xproc;
19980b57cec5SDimitry Andric     __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1999fe6060f1SDimitry Andric     return true;
20000b57cec5SDimitry Andric   }
20010b57cec5SDimitry Andric 
20020b57cec5SDimitry Andric   // When affinity is off, this routine will still be called to set
20030b57cec5SDimitry Andric   // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
20040b57cec5SDimitry Andric   // Make sure all these vars are set correctly, and return now if affinity is
20050b57cec5SDimitry Andric   // not enabled.
20060b57cec5SDimitry Andric   __kmp_ncores = nPackages = __kmp_avail_proc;
20070b57cec5SDimitry Andric   __kmp_nThreadsPerCore = nCoresPerPkg = 1;
20080b57cec5SDimitry Andric 
20095ffd83dbSDimitry Andric   // Construct the data structure to be returned.
2010fe6060f1SDimitry Andric   __kmp_topology = kmp_topology_t::allocate(__kmp_avail_proc, depth, types);
20110b57cec5SDimitry Andric   int avail_ct = 0;
20120b57cec5SDimitry Andric   int i;
20130b57cec5SDimitry Andric   KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
20140b57cec5SDimitry Andric     // Skip this proc if it is not included in the machine model.
20150b57cec5SDimitry Andric     if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
20160b57cec5SDimitry Andric       continue;
20170b57cec5SDimitry Andric     }
2018fe6060f1SDimitry Andric     kmp_hw_thread_t &hw_thread = __kmp_topology->at(avail_ct);
2019fe6060f1SDimitry Andric     hw_thread.clear();
2020fe6060f1SDimitry Andric     hw_thread.os_id = i;
2021fe6060f1SDimitry Andric     hw_thread.ids[0] = i;
2022fe6060f1SDimitry Andric     hw_thread.ids[1] = 0;
2023fe6060f1SDimitry Andric     hw_thread.ids[2] = 0;
2024fe6060f1SDimitry Andric     avail_ct++;
20250b57cec5SDimitry Andric   }
2026bdd1243dSDimitry Andric   if (__kmp_affinity.flags.verbose) {
20270b57cec5SDimitry Andric     KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
20280b57cec5SDimitry Andric   }
2029fe6060f1SDimitry Andric   return true;
20300b57cec5SDimitry Andric }
20310b57cec5SDimitry Andric 
20320b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
20330b57cec5SDimitry Andric // If multiple Windows* OS processor groups exist, we can create a 2-level
20340b57cec5SDimitry Andric // topology map with the groups at level 0 and the individual procs at level 1.
20350b57cec5SDimitry Andric // This facilitates letting the threads float among all procs in a group,
20360b57cec5SDimitry Andric // if granularity=group (the default when there are multiple groups).
__kmp_affinity_create_proc_group_map(kmp_i18n_id_t * const msg_id)2037fe6060f1SDimitry Andric static bool __kmp_affinity_create_proc_group_map(kmp_i18n_id_t *const msg_id) {
20380b57cec5SDimitry Andric   *msg_id = kmp_i18n_null;
2039fe6060f1SDimitry Andric   int depth = 3;
2040fe6060f1SDimitry Andric   kmp_hw_t types[] = {KMP_HW_PROC_GROUP, KMP_HW_CORE, KMP_HW_THREAD};
2041fe6060f1SDimitry Andric   const static size_t BITS_PER_GROUP = CHAR_BIT * sizeof(DWORD_PTR);
20420b57cec5SDimitry Andric 
2043bdd1243dSDimitry Andric   if (__kmp_affinity.flags.verbose) {
2044fe6060f1SDimitry Andric     KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
2045fe6060f1SDimitry Andric   }
2046fe6060f1SDimitry Andric 
2047fe6060f1SDimitry Andric   // If we aren't affinity capable, then use flat topology
20480b57cec5SDimitry Andric   if (!KMP_AFFINITY_CAPABLE()) {
2049bdd1243dSDimitry Andric     KMP_ASSERT(__kmp_affinity.type == affinity_none);
2050fe6060f1SDimitry Andric     nPackages = __kmp_num_proc_groups;
2051fe6060f1SDimitry Andric     __kmp_nThreadsPerCore = 1;
2052fe6060f1SDimitry Andric     __kmp_ncores = __kmp_xproc;
2053fe6060f1SDimitry Andric     nCoresPerPkg = nPackages / __kmp_ncores;
2054fe6060f1SDimitry Andric     return true;
20550b57cec5SDimitry Andric   }
20560b57cec5SDimitry Andric 
20575ffd83dbSDimitry Andric   // Construct the data structure to be returned.
2058fe6060f1SDimitry Andric   __kmp_topology = kmp_topology_t::allocate(__kmp_avail_proc, depth, types);
20590b57cec5SDimitry Andric   int avail_ct = 0;
20600b57cec5SDimitry Andric   int i;
20610b57cec5SDimitry Andric   KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
20620b57cec5SDimitry Andric     // Skip this proc if it is not included in the machine model.
20630b57cec5SDimitry Andric     if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
20640b57cec5SDimitry Andric       continue;
20650b57cec5SDimitry Andric     }
2066fe6060f1SDimitry Andric     kmp_hw_thread_t &hw_thread = __kmp_topology->at(avail_ct++);
2067fe6060f1SDimitry Andric     hw_thread.clear();
2068fe6060f1SDimitry Andric     hw_thread.os_id = i;
2069fe6060f1SDimitry Andric     hw_thread.ids[0] = i / BITS_PER_GROUP;
2070fe6060f1SDimitry Andric     hw_thread.ids[1] = hw_thread.ids[2] = i % BITS_PER_GROUP;
20710b57cec5SDimitry Andric   }
2072fe6060f1SDimitry Andric   return true;
20730b57cec5SDimitry Andric }
20740b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */
20750b57cec5SDimitry Andric 
20760b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
20770b57cec5SDimitry Andric 
2078fe6060f1SDimitry Andric template <kmp_uint32 LSB, kmp_uint32 MSB>
__kmp_extract_bits(kmp_uint32 v)2079fe6060f1SDimitry Andric static inline unsigned __kmp_extract_bits(kmp_uint32 v) {
2080fe6060f1SDimitry Andric   const kmp_uint32 SHIFT_LEFT = sizeof(kmp_uint32) * 8 - 1 - MSB;
2081fe6060f1SDimitry Andric   const kmp_uint32 SHIFT_RIGHT = LSB;
2082fe6060f1SDimitry Andric   kmp_uint32 retval = v;
2083fe6060f1SDimitry Andric   retval <<= SHIFT_LEFT;
2084fe6060f1SDimitry Andric   retval >>= (SHIFT_LEFT + SHIFT_RIGHT);
2085fe6060f1SDimitry Andric   return retval;
2086fe6060f1SDimitry Andric }
2087fe6060f1SDimitry Andric 
__kmp_cpuid_mask_width(int count)20880b57cec5SDimitry Andric static int __kmp_cpuid_mask_width(int count) {
20890b57cec5SDimitry Andric   int r = 0;
20900b57cec5SDimitry Andric 
20910b57cec5SDimitry Andric   while ((1 << r) < count)
20920b57cec5SDimitry Andric     ++r;
20930b57cec5SDimitry Andric   return r;
20940b57cec5SDimitry Andric }
20950b57cec5SDimitry Andric 
20960b57cec5SDimitry Andric class apicThreadInfo {
20970b57cec5SDimitry Andric public:
20980b57cec5SDimitry Andric   unsigned osId; // param to __kmp_affinity_bind_thread
20990b57cec5SDimitry Andric   unsigned apicId; // from cpuid after binding
21000b57cec5SDimitry Andric   unsigned maxCoresPerPkg; //      ""
21010b57cec5SDimitry Andric   unsigned maxThreadsPerPkg; //      ""
21020b57cec5SDimitry Andric   unsigned pkgId; // inferred from above values
21030b57cec5SDimitry Andric   unsigned coreId; //      ""
21040b57cec5SDimitry Andric   unsigned threadId; //      ""
21050b57cec5SDimitry Andric };
21060b57cec5SDimitry Andric 
__kmp_affinity_cmp_apicThreadInfo_phys_id(const void * a,const void * b)21070b57cec5SDimitry Andric static int __kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a,
21080b57cec5SDimitry Andric                                                      const void *b) {
21090b57cec5SDimitry Andric   const apicThreadInfo *aa = (const apicThreadInfo *)a;
21100b57cec5SDimitry Andric   const apicThreadInfo *bb = (const apicThreadInfo *)b;
21110b57cec5SDimitry Andric   if (aa->pkgId < bb->pkgId)
21120b57cec5SDimitry Andric     return -1;
21130b57cec5SDimitry Andric   if (aa->pkgId > bb->pkgId)
21140b57cec5SDimitry Andric     return 1;
21150b57cec5SDimitry Andric   if (aa->coreId < bb->coreId)
21160b57cec5SDimitry Andric     return -1;
21170b57cec5SDimitry Andric   if (aa->coreId > bb->coreId)
21180b57cec5SDimitry Andric     return 1;
21190b57cec5SDimitry Andric   if (aa->threadId < bb->threadId)
21200b57cec5SDimitry Andric     return -1;
21210b57cec5SDimitry Andric   if (aa->threadId > bb->threadId)
21220b57cec5SDimitry Andric     return 1;
21230b57cec5SDimitry Andric   return 0;
21240b57cec5SDimitry Andric }
21250b57cec5SDimitry Andric 
2126fe6060f1SDimitry Andric class kmp_cache_info_t {
2127fe6060f1SDimitry Andric public:
2128fe6060f1SDimitry Andric   struct info_t {
2129fe6060f1SDimitry Andric     unsigned level, mask;
2130fe6060f1SDimitry Andric   };
kmp_cache_info_t()2131fe6060f1SDimitry Andric   kmp_cache_info_t() : depth(0) { get_leaf4_levels(); }
get_depth() const2132fe6060f1SDimitry Andric   size_t get_depth() const { return depth; }
operator [](size_t index)2133fe6060f1SDimitry Andric   info_t &operator[](size_t index) { return table[index]; }
operator [](size_t index) const2134fe6060f1SDimitry Andric   const info_t &operator[](size_t index) const { return table[index]; }
2135fe6060f1SDimitry Andric 
get_topology_type(unsigned level)2136fe6060f1SDimitry Andric   static kmp_hw_t get_topology_type(unsigned level) {
2137fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(level >= 1 && level <= MAX_CACHE_LEVEL);
2138fe6060f1SDimitry Andric     switch (level) {
2139fe6060f1SDimitry Andric     case 1:
2140fe6060f1SDimitry Andric       return KMP_HW_L1;
2141fe6060f1SDimitry Andric     case 2:
2142fe6060f1SDimitry Andric       return KMP_HW_L2;
2143fe6060f1SDimitry Andric     case 3:
2144fe6060f1SDimitry Andric       return KMP_HW_L3;
2145fe6060f1SDimitry Andric     }
2146fe6060f1SDimitry Andric     return KMP_HW_UNKNOWN;
2147fe6060f1SDimitry Andric   }
2148fe6060f1SDimitry Andric 
2149fe6060f1SDimitry Andric private:
2150fe6060f1SDimitry Andric   static const int MAX_CACHE_LEVEL = 3;
2151fe6060f1SDimitry Andric 
2152fe6060f1SDimitry Andric   size_t depth;
2153fe6060f1SDimitry Andric   info_t table[MAX_CACHE_LEVEL];
2154fe6060f1SDimitry Andric 
get_leaf4_levels()2155fe6060f1SDimitry Andric   void get_leaf4_levels() {
2156fe6060f1SDimitry Andric     unsigned level = 0;
2157fe6060f1SDimitry Andric     while (depth < MAX_CACHE_LEVEL) {
2158fe6060f1SDimitry Andric       unsigned cache_type, max_threads_sharing;
2159fe6060f1SDimitry Andric       unsigned cache_level, cache_mask_width;
2160fe6060f1SDimitry Andric       kmp_cpuid buf2;
2161fe6060f1SDimitry Andric       __kmp_x86_cpuid(4, level, &buf2);
2162fe6060f1SDimitry Andric       cache_type = __kmp_extract_bits<0, 4>(buf2.eax);
2163fe6060f1SDimitry Andric       if (!cache_type)
2164fe6060f1SDimitry Andric         break;
2165fe6060f1SDimitry Andric       // Skip instruction caches
2166fe6060f1SDimitry Andric       if (cache_type == 2) {
2167fe6060f1SDimitry Andric         level++;
2168fe6060f1SDimitry Andric         continue;
2169fe6060f1SDimitry Andric       }
2170fe6060f1SDimitry Andric       max_threads_sharing = __kmp_extract_bits<14, 25>(buf2.eax) + 1;
2171fe6060f1SDimitry Andric       cache_mask_width = __kmp_cpuid_mask_width(max_threads_sharing);
2172fe6060f1SDimitry Andric       cache_level = __kmp_extract_bits<5, 7>(buf2.eax);
2173fe6060f1SDimitry Andric       table[depth].level = cache_level;
2174fe6060f1SDimitry Andric       table[depth].mask = ((-1) << cache_mask_width);
2175fe6060f1SDimitry Andric       depth++;
2176fe6060f1SDimitry Andric       level++;
2177fe6060f1SDimitry Andric     }
2178fe6060f1SDimitry Andric   }
2179fe6060f1SDimitry Andric };
2180fe6060f1SDimitry Andric 
21810b57cec5SDimitry Andric // On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
21820b57cec5SDimitry Andric // an algorithm which cycles through the available os threads, setting
21830b57cec5SDimitry Andric // the current thread's affinity mask to that thread, and then retrieves
21840b57cec5SDimitry Andric // the Apic Id for each thread context using the cpuid instruction.
__kmp_affinity_create_apicid_map(kmp_i18n_id_t * const msg_id)2185fe6060f1SDimitry Andric static bool __kmp_affinity_create_apicid_map(kmp_i18n_id_t *const msg_id) {
21860b57cec5SDimitry Andric   kmp_cpuid buf;
21870b57cec5SDimitry Andric   *msg_id = kmp_i18n_null;
21880b57cec5SDimitry Andric 
2189bdd1243dSDimitry Andric   if (__kmp_affinity.flags.verbose) {
2190fe6060f1SDimitry Andric     KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
2191fe6060f1SDimitry Andric   }
2192fe6060f1SDimitry Andric 
21930b57cec5SDimitry Andric   // Check if cpuid leaf 4 is supported.
21940b57cec5SDimitry Andric   __kmp_x86_cpuid(0, 0, &buf);
21950b57cec5SDimitry Andric   if (buf.eax < 4) {
21960b57cec5SDimitry Andric     *msg_id = kmp_i18n_str_NoLeaf4Support;
2197fe6060f1SDimitry Andric     return false;
21980b57cec5SDimitry Andric   }
21990b57cec5SDimitry Andric 
22000b57cec5SDimitry Andric   // The algorithm used starts by setting the affinity to each available thread
22010b57cec5SDimitry Andric   // and retrieving info from the cpuid instruction, so if we are not capable of
22020b57cec5SDimitry Andric   // calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we
22030b57cec5SDimitry Andric   // need to do something else - use the defaults that we calculated from
22040b57cec5SDimitry Andric   // issuing cpuid without binding to each proc.
22050b57cec5SDimitry Andric   if (!KMP_AFFINITY_CAPABLE()) {
22060b57cec5SDimitry Andric     // Hack to try and infer the machine topology using only the data
22070b57cec5SDimitry Andric     // available from cpuid on the current thread, and __kmp_xproc.
2208bdd1243dSDimitry Andric     KMP_ASSERT(__kmp_affinity.type == affinity_none);
22090b57cec5SDimitry Andric 
22100b57cec5SDimitry Andric     // Get an upper bound on the number of threads per package using cpuid(1).
22110b57cec5SDimitry Andric     // On some OS/chps combinations where HT is supported by the chip but is
22120b57cec5SDimitry Andric     // disabled, this value will be 2 on a single core chip. Usually, it will be
22130b57cec5SDimitry Andric     // 2 if HT is enabled and 1 if HT is disabled.
22140b57cec5SDimitry Andric     __kmp_x86_cpuid(1, 0, &buf);
22150b57cec5SDimitry Andric     int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
22160b57cec5SDimitry Andric     if (maxThreadsPerPkg == 0) {
22170b57cec5SDimitry Andric       maxThreadsPerPkg = 1;
22180b57cec5SDimitry Andric     }
22190b57cec5SDimitry Andric 
22200b57cec5SDimitry Andric     // The num cores per pkg comes from cpuid(4). 1 must be added to the encoded
22210b57cec5SDimitry Andric     // value.
22220b57cec5SDimitry Andric     //
22230b57cec5SDimitry Andric     // The author of cpu_count.cpp treated this only an upper bound on the
22240b57cec5SDimitry Andric     // number of cores, but I haven't seen any cases where it was greater than
22250b57cec5SDimitry Andric     // the actual number of cores, so we will treat it as exact in this block of
22260b57cec5SDimitry Andric     // code.
22270b57cec5SDimitry Andric     //
22280b57cec5SDimitry Andric     // First, we need to check if cpuid(4) is supported on this chip. To see if
22290b57cec5SDimitry Andric     // cpuid(n) is supported, issue cpuid(0) and check if eax has the value n or
22300b57cec5SDimitry Andric     // greater.
22310b57cec5SDimitry Andric     __kmp_x86_cpuid(0, 0, &buf);
22320b57cec5SDimitry Andric     if (buf.eax >= 4) {
22330b57cec5SDimitry Andric       __kmp_x86_cpuid(4, 0, &buf);
22340b57cec5SDimitry Andric       nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
22350b57cec5SDimitry Andric     } else {
22360b57cec5SDimitry Andric       nCoresPerPkg = 1;
22370b57cec5SDimitry Andric     }
22380b57cec5SDimitry Andric 
22390b57cec5SDimitry Andric     // There is no way to reliably tell if HT is enabled without issuing the
22400b57cec5SDimitry Andric     // cpuid instruction from every thread, can correlating the cpuid info, so
22410b57cec5SDimitry Andric     // if the machine is not affinity capable, we assume that HT is off. We have
22420b57cec5SDimitry Andric     // seen quite a few machines where maxThreadsPerPkg is 2, yet the machine
22430b57cec5SDimitry Andric     // does not support HT.
22440b57cec5SDimitry Andric     //
22450b57cec5SDimitry Andric     // - Older OSes are usually found on machines with older chips, which do not
22460b57cec5SDimitry Andric     //   support HT.
22470b57cec5SDimitry Andric     // - The performance penalty for mistakenly identifying a machine as HT when
2248480093f4SDimitry Andric     //   it isn't (which results in blocktime being incorrectly set to 0) is
22490b57cec5SDimitry Andric     //   greater than the penalty when for mistakenly identifying a machine as
22500b57cec5SDimitry Andric     //   being 1 thread/core when it is really HT enabled (which results in
22510b57cec5SDimitry Andric     //   blocktime being incorrectly set to a positive value).
22520b57cec5SDimitry Andric     __kmp_ncores = __kmp_xproc;
22530b57cec5SDimitry Andric     nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
22540b57cec5SDimitry Andric     __kmp_nThreadsPerCore = 1;
2255fe6060f1SDimitry Andric     return true;
22560b57cec5SDimitry Andric   }
22570b57cec5SDimitry Andric 
22580b57cec5SDimitry Andric   // From here on, we can assume that it is safe to call
22590b57cec5SDimitry Andric   // __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
2260bdd1243dSDimitry Andric   // __kmp_affinity.type = affinity_none.
22610b57cec5SDimitry Andric 
22620b57cec5SDimitry Andric   // Save the affinity mask for the current thread.
2263fe6060f1SDimitry Andric   kmp_affinity_raii_t previous_affinity;
22640b57cec5SDimitry Andric 
22650b57cec5SDimitry Andric   // Run through each of the available contexts, binding the current thread
22660b57cec5SDimitry Andric   // to it, and obtaining the pertinent information using the cpuid instr.
22670b57cec5SDimitry Andric   //
22680b57cec5SDimitry Andric   // The relevant information is:
22690b57cec5SDimitry Andric   // - Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
22700b57cec5SDimitry Andric   //     has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
22710b57cec5SDimitry Andric   // - Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The value
22720b57cec5SDimitry Andric   //     of this field determines the width of the core# + thread# fields in the
22730b57cec5SDimitry Andric   //     Apic Id. It is also an upper bound on the number of threads per
22740b57cec5SDimitry Andric   //     package, but it has been verified that situations happen were it is not
22750b57cec5SDimitry Andric   //     exact. In particular, on certain OS/chip combinations where Intel(R)
22760b57cec5SDimitry Andric   //     Hyper-Threading Technology is supported by the chip but has been
22770b57cec5SDimitry Andric   //     disabled, the value of this field will be 2 (for a single core chip).
22780b57cec5SDimitry Andric   //     On other OS/chip combinations supporting Intel(R) Hyper-Threading
22790b57cec5SDimitry Andric   //     Technology, the value of this field will be 1 when Intel(R)
22800b57cec5SDimitry Andric   //     Hyper-Threading Technology is disabled and 2 when it is enabled.
22810b57cec5SDimitry Andric   // - Max Cores Per Pkg:  Bits 26:31 of eax after issuing cpuid(4). The value
22820b57cec5SDimitry Andric   //     of this field (+1) determines the width of the core# field in the Apic
22830b57cec5SDimitry Andric   //     Id. The comments in "cpucount.cpp" say that this value is an upper
22840b57cec5SDimitry Andric   //     bound, but the IA-32 architecture manual says that it is exactly the
22850b57cec5SDimitry Andric   //     number of cores per package, and I haven't seen any case where it
22860b57cec5SDimitry Andric   //     wasn't.
22870b57cec5SDimitry Andric   //
22880b57cec5SDimitry Andric   // From this information, deduce the package Id, core Id, and thread Id,
22890b57cec5SDimitry Andric   // and set the corresponding fields in the apicThreadInfo struct.
22900b57cec5SDimitry Andric   unsigned i;
22910b57cec5SDimitry Andric   apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
22920b57cec5SDimitry Andric       __kmp_avail_proc * sizeof(apicThreadInfo));
22930b57cec5SDimitry Andric   unsigned nApics = 0;
22940b57cec5SDimitry Andric   KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
22950b57cec5SDimitry Andric     // Skip this proc if it is not included in the machine model.
22960b57cec5SDimitry Andric     if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
22970b57cec5SDimitry Andric       continue;
22980b57cec5SDimitry Andric     }
22990b57cec5SDimitry Andric     KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
23000b57cec5SDimitry Andric 
23010b57cec5SDimitry Andric     __kmp_affinity_dispatch->bind_thread(i);
23020b57cec5SDimitry Andric     threadInfo[nApics].osId = i;
23030b57cec5SDimitry Andric 
23040b57cec5SDimitry Andric     // The apic id and max threads per pkg come from cpuid(1).
23050b57cec5SDimitry Andric     __kmp_x86_cpuid(1, 0, &buf);
23060b57cec5SDimitry Andric     if (((buf.edx >> 9) & 1) == 0) {
23070b57cec5SDimitry Andric       __kmp_free(threadInfo);
23080b57cec5SDimitry Andric       *msg_id = kmp_i18n_str_ApicNotPresent;
2309fe6060f1SDimitry Andric       return false;
23100b57cec5SDimitry Andric     }
23110b57cec5SDimitry Andric     threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
23120b57cec5SDimitry Andric     threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
23130b57cec5SDimitry Andric     if (threadInfo[nApics].maxThreadsPerPkg == 0) {
23140b57cec5SDimitry Andric       threadInfo[nApics].maxThreadsPerPkg = 1;
23150b57cec5SDimitry Andric     }
23160b57cec5SDimitry Andric 
23170b57cec5SDimitry Andric     // Max cores per pkg comes from cpuid(4). 1 must be added to the encoded
23180b57cec5SDimitry Andric     // value.
23190b57cec5SDimitry Andric     //
23200b57cec5SDimitry Andric     // First, we need to check if cpuid(4) is supported on this chip. To see if
23210b57cec5SDimitry Andric     // cpuid(n) is supported, issue cpuid(0) and check if eax has the value n
23220b57cec5SDimitry Andric     // or greater.
23230b57cec5SDimitry Andric     __kmp_x86_cpuid(0, 0, &buf);
23240b57cec5SDimitry Andric     if (buf.eax >= 4) {
23250b57cec5SDimitry Andric       __kmp_x86_cpuid(4, 0, &buf);
23260b57cec5SDimitry Andric       threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
23270b57cec5SDimitry Andric     } else {
23280b57cec5SDimitry Andric       threadInfo[nApics].maxCoresPerPkg = 1;
23290b57cec5SDimitry Andric     }
23300b57cec5SDimitry Andric 
23310b57cec5SDimitry Andric     // Infer the pkgId / coreId / threadId using only the info obtained locally.
23320b57cec5SDimitry Andric     int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
23330b57cec5SDimitry Andric     threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
23340b57cec5SDimitry Andric 
23350b57cec5SDimitry Andric     int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
23360b57cec5SDimitry Andric     int widthT = widthCT - widthC;
23370b57cec5SDimitry Andric     if (widthT < 0) {
23380b57cec5SDimitry Andric       // I've never seen this one happen, but I suppose it could, if the cpuid
23390b57cec5SDimitry Andric       // instruction on a chip was really screwed up. Make sure to restore the
23400b57cec5SDimitry Andric       // affinity mask before the tail call.
23410b57cec5SDimitry Andric       __kmp_free(threadInfo);
23420b57cec5SDimitry Andric       *msg_id = kmp_i18n_str_InvalidCpuidInfo;
2343fe6060f1SDimitry Andric       return false;
23440b57cec5SDimitry Andric     }
23450b57cec5SDimitry Andric 
23460b57cec5SDimitry Andric     int maskC = (1 << widthC) - 1;
23470b57cec5SDimitry Andric     threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
23480b57cec5SDimitry Andric 
23490b57cec5SDimitry Andric     int maskT = (1 << widthT) - 1;
23500b57cec5SDimitry Andric     threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
23510b57cec5SDimitry Andric 
23520b57cec5SDimitry Andric     nApics++;
23530b57cec5SDimitry Andric   }
23540b57cec5SDimitry Andric 
23550b57cec5SDimitry Andric   // We've collected all the info we need.
23560b57cec5SDimitry Andric   // Restore the old affinity mask for this thread.
2357fe6060f1SDimitry Andric   previous_affinity.restore();
23580b57cec5SDimitry Andric 
23590b57cec5SDimitry Andric   // Sort the threadInfo table by physical Id.
23600b57cec5SDimitry Andric   qsort(threadInfo, nApics, sizeof(*threadInfo),
23610b57cec5SDimitry Andric         __kmp_affinity_cmp_apicThreadInfo_phys_id);
23620b57cec5SDimitry Andric 
23630b57cec5SDimitry Andric   // The table is now sorted by pkgId / coreId / threadId, but we really don't
23640b57cec5SDimitry Andric   // know the radix of any of the fields. pkgId's may be sparsely assigned among
23650b57cec5SDimitry Andric   // the chips on a system. Although coreId's are usually assigned
23660b57cec5SDimitry Andric   // [0 .. coresPerPkg-1] and threadId's are usually assigned
23670b57cec5SDimitry Andric   // [0..threadsPerCore-1], we don't want to make any such assumptions.
23680b57cec5SDimitry Andric   //
23690b57cec5SDimitry Andric   // For that matter, we don't know what coresPerPkg and threadsPerCore (or the
23700b57cec5SDimitry Andric   // total # packages) are at this point - we want to determine that now. We
23710b57cec5SDimitry Andric   // only have an upper bound on the first two figures.
23720b57cec5SDimitry Andric   //
23730b57cec5SDimitry Andric   // We also perform a consistency check at this point: the values returned by
23740b57cec5SDimitry Andric   // the cpuid instruction for any thread bound to a given package had better
23750b57cec5SDimitry Andric   // return the same info for maxThreadsPerPkg and maxCoresPerPkg.
23760b57cec5SDimitry Andric   nPackages = 1;
23770b57cec5SDimitry Andric   nCoresPerPkg = 1;
23780b57cec5SDimitry Andric   __kmp_nThreadsPerCore = 1;
23790b57cec5SDimitry Andric   unsigned nCores = 1;
23800b57cec5SDimitry Andric 
23810b57cec5SDimitry Andric   unsigned pkgCt = 1; // to determine radii
23820b57cec5SDimitry Andric   unsigned lastPkgId = threadInfo[0].pkgId;
23830b57cec5SDimitry Andric   unsigned coreCt = 1;
23840b57cec5SDimitry Andric   unsigned lastCoreId = threadInfo[0].coreId;
23850b57cec5SDimitry Andric   unsigned threadCt = 1;
23860b57cec5SDimitry Andric   unsigned lastThreadId = threadInfo[0].threadId;
23870b57cec5SDimitry Andric 
23880b57cec5SDimitry Andric   // intra-pkg consist checks
23890b57cec5SDimitry Andric   unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
23900b57cec5SDimitry Andric   unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
23910b57cec5SDimitry Andric 
23920b57cec5SDimitry Andric   for (i = 1; i < nApics; i++) {
23930b57cec5SDimitry Andric     if (threadInfo[i].pkgId != lastPkgId) {
23940b57cec5SDimitry Andric       nCores++;
23950b57cec5SDimitry Andric       pkgCt++;
23960b57cec5SDimitry Andric       lastPkgId = threadInfo[i].pkgId;
23970b57cec5SDimitry Andric       if ((int)coreCt > nCoresPerPkg)
23980b57cec5SDimitry Andric         nCoresPerPkg = coreCt;
23990b57cec5SDimitry Andric       coreCt = 1;
24000b57cec5SDimitry Andric       lastCoreId = threadInfo[i].coreId;
24010b57cec5SDimitry Andric       if ((int)threadCt > __kmp_nThreadsPerCore)
24020b57cec5SDimitry Andric         __kmp_nThreadsPerCore = threadCt;
24030b57cec5SDimitry Andric       threadCt = 1;
24040b57cec5SDimitry Andric       lastThreadId = threadInfo[i].threadId;
24050b57cec5SDimitry Andric 
24060b57cec5SDimitry Andric       // This is a different package, so go on to the next iteration without
24070b57cec5SDimitry Andric       // doing any consistency checks. Reset the consistency check vars, though.
24080b57cec5SDimitry Andric       prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
24090b57cec5SDimitry Andric       prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
24100b57cec5SDimitry Andric       continue;
24110b57cec5SDimitry Andric     }
24120b57cec5SDimitry Andric 
24130b57cec5SDimitry Andric     if (threadInfo[i].coreId != lastCoreId) {
24140b57cec5SDimitry Andric       nCores++;
24150b57cec5SDimitry Andric       coreCt++;
24160b57cec5SDimitry Andric       lastCoreId = threadInfo[i].coreId;
24170b57cec5SDimitry Andric       if ((int)threadCt > __kmp_nThreadsPerCore)
24180b57cec5SDimitry Andric         __kmp_nThreadsPerCore = threadCt;
24190b57cec5SDimitry Andric       threadCt = 1;
24200b57cec5SDimitry Andric       lastThreadId = threadInfo[i].threadId;
24210b57cec5SDimitry Andric     } else if (threadInfo[i].threadId != lastThreadId) {
24220b57cec5SDimitry Andric       threadCt++;
24230b57cec5SDimitry Andric       lastThreadId = threadInfo[i].threadId;
24240b57cec5SDimitry Andric     } else {
24250b57cec5SDimitry Andric       __kmp_free(threadInfo);
24260b57cec5SDimitry Andric       *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
2427fe6060f1SDimitry Andric       return false;
24280b57cec5SDimitry Andric     }
24290b57cec5SDimitry Andric 
24300b57cec5SDimitry Andric     // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
24310b57cec5SDimitry Andric     // fields agree between all the threads bounds to a given package.
24320b57cec5SDimitry Andric     if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) ||
24330b57cec5SDimitry Andric         (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
24340b57cec5SDimitry Andric       __kmp_free(threadInfo);
24350b57cec5SDimitry Andric       *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
2436fe6060f1SDimitry Andric       return false;
24370b57cec5SDimitry Andric     }
24380b57cec5SDimitry Andric   }
2439fe6060f1SDimitry Andric   // When affinity is off, this routine will still be called to set
2440fe6060f1SDimitry Andric   // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
2441fe6060f1SDimitry Andric   // Make sure all these vars are set correctly
24420b57cec5SDimitry Andric   nPackages = pkgCt;
24430b57cec5SDimitry Andric   if ((int)coreCt > nCoresPerPkg)
24440b57cec5SDimitry Andric     nCoresPerPkg = coreCt;
24450b57cec5SDimitry Andric   if ((int)threadCt > __kmp_nThreadsPerCore)
24460b57cec5SDimitry Andric     __kmp_nThreadsPerCore = threadCt;
24470b57cec5SDimitry Andric   __kmp_ncores = nCores;
24480b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(nApics == (unsigned)__kmp_avail_proc);
24490b57cec5SDimitry Andric 
24500b57cec5SDimitry Andric   // Now that we've determined the number of packages, the number of cores per
24510b57cec5SDimitry Andric   // package, and the number of threads per core, we can construct the data
24520b57cec5SDimitry Andric   // structure that is to be returned.
2453fe6060f1SDimitry Andric   int idx = 0;
24540b57cec5SDimitry Andric   int pkgLevel = 0;
2455fe6060f1SDimitry Andric   int coreLevel = 1;
2456fe6060f1SDimitry Andric   int threadLevel = 2;
2457fe6060f1SDimitry Andric   //(__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
2458fe6060f1SDimitry Andric   int depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
2459fe6060f1SDimitry Andric   kmp_hw_t types[3];
2460fe6060f1SDimitry Andric   if (pkgLevel >= 0)
2461fe6060f1SDimitry Andric     types[idx++] = KMP_HW_SOCKET;
2462fe6060f1SDimitry Andric   if (coreLevel >= 0)
2463fe6060f1SDimitry Andric     types[idx++] = KMP_HW_CORE;
2464fe6060f1SDimitry Andric   if (threadLevel >= 0)
2465fe6060f1SDimitry Andric     types[idx++] = KMP_HW_THREAD;
24660b57cec5SDimitry Andric 
24670b57cec5SDimitry Andric   KMP_ASSERT(depth > 0);
2468fe6060f1SDimitry Andric   __kmp_topology = kmp_topology_t::allocate(nApics, depth, types);
24690b57cec5SDimitry Andric 
24700b57cec5SDimitry Andric   for (i = 0; i < nApics; ++i) {
2471fe6060f1SDimitry Andric     idx = 0;
24720b57cec5SDimitry Andric     unsigned os = threadInfo[i].osId;
2473fe6060f1SDimitry Andric     kmp_hw_thread_t &hw_thread = __kmp_topology->at(i);
2474fe6060f1SDimitry Andric     hw_thread.clear();
24750b57cec5SDimitry Andric 
24760b57cec5SDimitry Andric     if (pkgLevel >= 0) {
2477fe6060f1SDimitry Andric       hw_thread.ids[idx++] = threadInfo[i].pkgId;
24780b57cec5SDimitry Andric     }
24790b57cec5SDimitry Andric     if (coreLevel >= 0) {
2480fe6060f1SDimitry Andric       hw_thread.ids[idx++] = threadInfo[i].coreId;
24810b57cec5SDimitry Andric     }
24820b57cec5SDimitry Andric     if (threadLevel >= 0) {
2483fe6060f1SDimitry Andric       hw_thread.ids[idx++] = threadInfo[i].threadId;
24840b57cec5SDimitry Andric     }
2485fe6060f1SDimitry Andric     hw_thread.os_id = os;
24860b57cec5SDimitry Andric   }
24870b57cec5SDimitry Andric 
24880b57cec5SDimitry Andric   __kmp_free(threadInfo);
2489fe6060f1SDimitry Andric   __kmp_topology->sort_ids();
2490fe6060f1SDimitry Andric   if (!__kmp_topology->check_ids()) {
2491fe6060f1SDimitry Andric     kmp_topology_t::deallocate(__kmp_topology);
2492fe6060f1SDimitry Andric     __kmp_topology = nullptr;
2493fe6060f1SDimitry Andric     *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
2494fe6060f1SDimitry Andric     return false;
2495fe6060f1SDimitry Andric   }
2496fe6060f1SDimitry Andric   return true;
24970b57cec5SDimitry Andric }
24980b57cec5SDimitry Andric 
2499349cc55cSDimitry Andric // Hybrid cpu detection using CPUID.1A
2500349cc55cSDimitry Andric // Thread should be pinned to processor already
__kmp_get_hybrid_info(kmp_hw_core_type_t * type,int * efficiency,unsigned * native_model_id)25010eae32dcSDimitry Andric static void __kmp_get_hybrid_info(kmp_hw_core_type_t *type, int *efficiency,
2502349cc55cSDimitry Andric                                   unsigned *native_model_id) {
2503349cc55cSDimitry Andric   kmp_cpuid buf;
2504349cc55cSDimitry Andric   __kmp_x86_cpuid(0x1a, 0, &buf);
2505349cc55cSDimitry Andric   *type = (kmp_hw_core_type_t)__kmp_extract_bits<24, 31>(buf.eax);
25060eae32dcSDimitry Andric   switch (*type) {
25070eae32dcSDimitry Andric   case KMP_HW_CORE_TYPE_ATOM:
25080eae32dcSDimitry Andric     *efficiency = 0;
25090eae32dcSDimitry Andric     break;
25100eae32dcSDimitry Andric   case KMP_HW_CORE_TYPE_CORE:
25110eae32dcSDimitry Andric     *efficiency = 1;
25120eae32dcSDimitry Andric     break;
25130eae32dcSDimitry Andric   default:
25140eae32dcSDimitry Andric     *efficiency = 0;
25150eae32dcSDimitry Andric   }
2516349cc55cSDimitry Andric   *native_model_id = __kmp_extract_bits<0, 23>(buf.eax);
2517349cc55cSDimitry Andric }
2518349cc55cSDimitry Andric 
25190b57cec5SDimitry Andric // Intel(R) microarchitecture code name Nehalem, Dunnington and later
25200b57cec5SDimitry Andric // architectures support a newer interface for specifying the x2APIC Ids,
2521fe6060f1SDimitry Andric // based on CPUID.B or CPUID.1F
2522fe6060f1SDimitry Andric /*
2523fe6060f1SDimitry Andric  * CPUID.B or 1F, Input ECX (sub leaf # aka level number)
2524fe6060f1SDimitry Andric     Bits            Bits            Bits           Bits
2525fe6060f1SDimitry Andric     31-16           15-8            7-4            4-0
2526fe6060f1SDimitry Andric ---+-----------+--------------+-------------+-----------------+
2527fe6060f1SDimitry Andric EAX| reserved  |   reserved   |   reserved  |  Bits to Shift  |
2528fe6060f1SDimitry Andric ---+-----------|--------------+-------------+-----------------|
2529fe6060f1SDimitry Andric EBX| reserved  | Num logical processors at level (16 bits)    |
2530fe6060f1SDimitry Andric ---+-----------|--------------+-------------------------------|
2531fe6060f1SDimitry Andric ECX| reserved  |   Level Type |      Level Number (8 bits)    |
2532fe6060f1SDimitry Andric ---+-----------+--------------+-------------------------------|
2533fe6060f1SDimitry Andric EDX|                    X2APIC ID (32 bits)                   |
2534fe6060f1SDimitry Andric ---+----------------------------------------------------------+
2535fe6060f1SDimitry Andric */
2536fe6060f1SDimitry Andric 
2537fe6060f1SDimitry Andric enum {
2538fe6060f1SDimitry Andric   INTEL_LEVEL_TYPE_INVALID = 0, // Package level
2539fe6060f1SDimitry Andric   INTEL_LEVEL_TYPE_SMT = 1,
2540fe6060f1SDimitry Andric   INTEL_LEVEL_TYPE_CORE = 2,
2541bdd1243dSDimitry Andric   INTEL_LEVEL_TYPE_MODULE = 3,
2542bdd1243dSDimitry Andric   INTEL_LEVEL_TYPE_TILE = 4,
2543fe6060f1SDimitry Andric   INTEL_LEVEL_TYPE_DIE = 5,
2544fe6060f1SDimitry Andric   INTEL_LEVEL_TYPE_LAST = 6,
2545fe6060f1SDimitry Andric };
2546fe6060f1SDimitry Andric 
2547fe6060f1SDimitry Andric struct cpuid_level_info_t {
2548fe6060f1SDimitry Andric   unsigned level_type, mask, mask_width, nitems, cache_mask;
2549fe6060f1SDimitry Andric };
2550fe6060f1SDimitry Andric 
__kmp_intel_type_2_topology_type(int intel_type)2551fe6060f1SDimitry Andric static kmp_hw_t __kmp_intel_type_2_topology_type(int intel_type) {
2552fe6060f1SDimitry Andric   switch (intel_type) {
2553fe6060f1SDimitry Andric   case INTEL_LEVEL_TYPE_INVALID:
2554fe6060f1SDimitry Andric     return KMP_HW_SOCKET;
2555fe6060f1SDimitry Andric   case INTEL_LEVEL_TYPE_SMT:
2556fe6060f1SDimitry Andric     return KMP_HW_THREAD;
2557fe6060f1SDimitry Andric   case INTEL_LEVEL_TYPE_CORE:
2558fe6060f1SDimitry Andric     return KMP_HW_CORE;
2559fe6060f1SDimitry Andric   case INTEL_LEVEL_TYPE_TILE:
2560fe6060f1SDimitry Andric     return KMP_HW_TILE;
2561fe6060f1SDimitry Andric   case INTEL_LEVEL_TYPE_MODULE:
2562fe6060f1SDimitry Andric     return KMP_HW_MODULE;
2563fe6060f1SDimitry Andric   case INTEL_LEVEL_TYPE_DIE:
2564fe6060f1SDimitry Andric     return KMP_HW_DIE;
2565fe6060f1SDimitry Andric   }
2566fe6060f1SDimitry Andric   return KMP_HW_UNKNOWN;
2567fe6060f1SDimitry Andric }
2568fe6060f1SDimitry Andric 
2569fe6060f1SDimitry Andric // This function takes the topology leaf, a levels array to store the levels
2570fe6060f1SDimitry Andric // detected and a bitmap of the known levels.
2571fe6060f1SDimitry Andric // Returns the number of levels in the topology
2572fe6060f1SDimitry Andric static unsigned
__kmp_x2apicid_get_levels(int leaf,cpuid_level_info_t levels[INTEL_LEVEL_TYPE_LAST],kmp_uint64 known_levels)2573fe6060f1SDimitry Andric __kmp_x2apicid_get_levels(int leaf,
2574fe6060f1SDimitry Andric                           cpuid_level_info_t levels[INTEL_LEVEL_TYPE_LAST],
2575fe6060f1SDimitry Andric                           kmp_uint64 known_levels) {
2576fe6060f1SDimitry Andric   unsigned level, levels_index;
2577fe6060f1SDimitry Andric   unsigned level_type, mask_width, nitems;
25780b57cec5SDimitry Andric   kmp_cpuid buf;
25790b57cec5SDimitry Andric 
2580fe6060f1SDimitry Andric   // New algorithm has known topology layers act as highest unknown topology
2581fe6060f1SDimitry Andric   // layers when unknown topology layers exist.
2582fe6060f1SDimitry Andric   // e.g., Suppose layers were SMT <X> CORE <Y> <Z> PACKAGE, where <X> <Y> <Z>
2583fe6060f1SDimitry Andric   // are unknown topology layers, Then SMT will take the characteristics of
2584fe6060f1SDimitry Andric   // (SMT x <X>) and CORE will take the characteristics of (CORE x <Y> x <Z>).
2585fe6060f1SDimitry Andric   // This eliminates unknown portions of the topology while still keeping the
2586fe6060f1SDimitry Andric   // correct structure.
2587fe6060f1SDimitry Andric   level = levels_index = 0;
2588fe6060f1SDimitry Andric   do {
2589fe6060f1SDimitry Andric     __kmp_x86_cpuid(leaf, level, &buf);
2590fe6060f1SDimitry Andric     level_type = __kmp_extract_bits<8, 15>(buf.ecx);
2591fe6060f1SDimitry Andric     mask_width = __kmp_extract_bits<0, 4>(buf.eax);
2592fe6060f1SDimitry Andric     nitems = __kmp_extract_bits<0, 15>(buf.ebx);
2593fe6060f1SDimitry Andric     if (level_type != INTEL_LEVEL_TYPE_INVALID && nitems == 0)
2594fe6060f1SDimitry Andric       return 0;
25950b57cec5SDimitry Andric 
2596fe6060f1SDimitry Andric     if (known_levels & (1ull << level_type)) {
2597fe6060f1SDimitry Andric       // Add a new level to the topology
2598fe6060f1SDimitry Andric       KMP_ASSERT(levels_index < INTEL_LEVEL_TYPE_LAST);
2599fe6060f1SDimitry Andric       levels[levels_index].level_type = level_type;
2600fe6060f1SDimitry Andric       levels[levels_index].mask_width = mask_width;
2601fe6060f1SDimitry Andric       levels[levels_index].nitems = nitems;
2602fe6060f1SDimitry Andric       levels_index++;
2603fe6060f1SDimitry Andric     } else {
2604fe6060f1SDimitry Andric       // If it is an unknown level, then logically move the previous layer up
2605fe6060f1SDimitry Andric       if (levels_index > 0) {
2606fe6060f1SDimitry Andric         levels[levels_index - 1].mask_width = mask_width;
2607fe6060f1SDimitry Andric         levels[levels_index - 1].nitems = nitems;
26080b57cec5SDimitry Andric       }
2609fe6060f1SDimitry Andric     }
26100b57cec5SDimitry Andric     level++;
2611fe6060f1SDimitry Andric   } while (level_type != INTEL_LEVEL_TYPE_INVALID);
2612fe6060f1SDimitry Andric 
261306c3fb27SDimitry Andric   // Ensure the INTEL_LEVEL_TYPE_INVALID (Socket) layer isn't first
261406c3fb27SDimitry Andric   if (levels_index == 0 || levels[0].level_type == INTEL_LEVEL_TYPE_INVALID)
261506c3fb27SDimitry Andric     return 0;
261606c3fb27SDimitry Andric 
2617fe6060f1SDimitry Andric   // Set the masks to & with apicid
2618fe6060f1SDimitry Andric   for (unsigned i = 0; i < levels_index; ++i) {
2619fe6060f1SDimitry Andric     if (levels[i].level_type != INTEL_LEVEL_TYPE_INVALID) {
2620fe6060f1SDimitry Andric       levels[i].mask = ~((-1) << levels[i].mask_width);
2621fe6060f1SDimitry Andric       levels[i].cache_mask = (-1) << levels[i].mask_width;
2622fe6060f1SDimitry Andric       for (unsigned j = 0; j < i; ++j)
2623fe6060f1SDimitry Andric         levels[i].mask ^= levels[j].mask;
2624fe6060f1SDimitry Andric     } else {
262506c3fb27SDimitry Andric       KMP_DEBUG_ASSERT(i > 0);
2626fe6060f1SDimitry Andric       levels[i].mask = (-1) << levels[i - 1].mask_width;
2627fe6060f1SDimitry Andric       levels[i].cache_mask = 0;
26280b57cec5SDimitry Andric     }
2629fe6060f1SDimitry Andric   }
2630fe6060f1SDimitry Andric   return levels_index;
2631fe6060f1SDimitry Andric }
2632fe6060f1SDimitry Andric 
__kmp_affinity_create_x2apicid_map(kmp_i18n_id_t * const msg_id)2633fe6060f1SDimitry Andric static bool __kmp_affinity_create_x2apicid_map(kmp_i18n_id_t *const msg_id) {
2634fe6060f1SDimitry Andric 
2635fe6060f1SDimitry Andric   cpuid_level_info_t levels[INTEL_LEVEL_TYPE_LAST];
2636fe6060f1SDimitry Andric   kmp_hw_t types[INTEL_LEVEL_TYPE_LAST];
2637fe6060f1SDimitry Andric   unsigned levels_index;
2638fe6060f1SDimitry Andric   kmp_cpuid buf;
2639fe6060f1SDimitry Andric   kmp_uint64 known_levels;
2640fe6060f1SDimitry Andric   int topology_leaf, highest_leaf, apic_id;
2641fe6060f1SDimitry Andric   int num_leaves;
2642fe6060f1SDimitry Andric   static int leaves[] = {0, 0};
2643fe6060f1SDimitry Andric 
2644fe6060f1SDimitry Andric   kmp_i18n_id_t leaf_message_id;
2645fe6060f1SDimitry Andric 
2646fe6060f1SDimitry Andric   KMP_BUILD_ASSERT(sizeof(known_levels) * CHAR_BIT > KMP_HW_LAST);
2647fe6060f1SDimitry Andric 
2648fe6060f1SDimitry Andric   *msg_id = kmp_i18n_null;
2649bdd1243dSDimitry Andric   if (__kmp_affinity.flags.verbose) {
2650fe6060f1SDimitry Andric     KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
2651fe6060f1SDimitry Andric   }
2652fe6060f1SDimitry Andric 
2653fe6060f1SDimitry Andric   // Figure out the known topology levels
2654fe6060f1SDimitry Andric   known_levels = 0ull;
2655fe6060f1SDimitry Andric   for (int i = 0; i < INTEL_LEVEL_TYPE_LAST; ++i) {
2656fe6060f1SDimitry Andric     if (__kmp_intel_type_2_topology_type(i) != KMP_HW_UNKNOWN) {
2657fe6060f1SDimitry Andric       known_levels |= (1ull << i);
2658fe6060f1SDimitry Andric     }
2659fe6060f1SDimitry Andric   }
2660fe6060f1SDimitry Andric 
2661fe6060f1SDimitry Andric   // Get the highest cpuid leaf supported
2662fe6060f1SDimitry Andric   __kmp_x86_cpuid(0, 0, &buf);
2663fe6060f1SDimitry Andric   highest_leaf = buf.eax;
2664fe6060f1SDimitry Andric 
2665fe6060f1SDimitry Andric   // If a specific topology method was requested, only allow that specific leaf
2666fe6060f1SDimitry Andric   // otherwise, try both leaves 31 and 11 in that order
2667fe6060f1SDimitry Andric   num_leaves = 0;
2668fe6060f1SDimitry Andric   if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
2669fe6060f1SDimitry Andric     num_leaves = 1;
2670fe6060f1SDimitry Andric     leaves[0] = 11;
2671fe6060f1SDimitry Andric     leaf_message_id = kmp_i18n_str_NoLeaf11Support;
2672fe6060f1SDimitry Andric   } else if (__kmp_affinity_top_method == affinity_top_method_x2apicid_1f) {
2673fe6060f1SDimitry Andric     num_leaves = 1;
2674fe6060f1SDimitry Andric     leaves[0] = 31;
2675fe6060f1SDimitry Andric     leaf_message_id = kmp_i18n_str_NoLeaf31Support;
2676fe6060f1SDimitry Andric   } else {
2677fe6060f1SDimitry Andric     num_leaves = 2;
2678fe6060f1SDimitry Andric     leaves[0] = 31;
2679fe6060f1SDimitry Andric     leaves[1] = 11;
2680fe6060f1SDimitry Andric     leaf_message_id = kmp_i18n_str_NoLeaf11Support;
2681fe6060f1SDimitry Andric   }
2682fe6060f1SDimitry Andric 
2683fe6060f1SDimitry Andric   // Check to see if cpuid leaf 31 or 11 is supported.
2684fe6060f1SDimitry Andric   __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
2685fe6060f1SDimitry Andric   topology_leaf = -1;
2686fe6060f1SDimitry Andric   for (int i = 0; i < num_leaves; ++i) {
2687fe6060f1SDimitry Andric     int leaf = leaves[i];
2688fe6060f1SDimitry Andric     if (highest_leaf < leaf)
2689fe6060f1SDimitry Andric       continue;
2690fe6060f1SDimitry Andric     __kmp_x86_cpuid(leaf, 0, &buf);
2691fe6060f1SDimitry Andric     if (buf.ebx == 0)
2692fe6060f1SDimitry Andric       continue;
2693fe6060f1SDimitry Andric     topology_leaf = leaf;
2694fe6060f1SDimitry Andric     levels_index = __kmp_x2apicid_get_levels(leaf, levels, known_levels);
2695fe6060f1SDimitry Andric     if (levels_index == 0)
2696fe6060f1SDimitry Andric       continue;
26970b57cec5SDimitry Andric     break;
26980b57cec5SDimitry Andric   }
2699fe6060f1SDimitry Andric   if (topology_leaf == -1 || levels_index == 0) {
2700fe6060f1SDimitry Andric     *msg_id = leaf_message_id;
2701fe6060f1SDimitry Andric     return false;
27020b57cec5SDimitry Andric   }
2703fe6060f1SDimitry Andric   KMP_ASSERT(levels_index <= INTEL_LEVEL_TYPE_LAST);
27040b57cec5SDimitry Andric 
27050b57cec5SDimitry Andric   // The algorithm used starts by setting the affinity to each available thread
27060b57cec5SDimitry Andric   // and retrieving info from the cpuid instruction, so if we are not capable of
2707fe6060f1SDimitry Andric   // calling __kmp_get_system_affinity() and __kmp_get_system_affinity(), then
2708fe6060f1SDimitry Andric   // we need to do something else - use the defaults that we calculated from
27090b57cec5SDimitry Andric   // issuing cpuid without binding to each proc.
27100b57cec5SDimitry Andric   if (!KMP_AFFINITY_CAPABLE()) {
27110b57cec5SDimitry Andric     // Hack to try and infer the machine topology using only the data
27120b57cec5SDimitry Andric     // available from cpuid on the current thread, and __kmp_xproc.
2713bdd1243dSDimitry Andric     KMP_ASSERT(__kmp_affinity.type == affinity_none);
2714fe6060f1SDimitry Andric     for (unsigned i = 0; i < levels_index; ++i) {
2715fe6060f1SDimitry Andric       if (levels[i].level_type == INTEL_LEVEL_TYPE_SMT) {
2716fe6060f1SDimitry Andric         __kmp_nThreadsPerCore = levels[i].nitems;
2717fe6060f1SDimitry Andric       } else if (levels[i].level_type == INTEL_LEVEL_TYPE_CORE) {
2718fe6060f1SDimitry Andric         nCoresPerPkg = levels[i].nitems;
2719fe6060f1SDimitry Andric       }
2720fe6060f1SDimitry Andric     }
27210b57cec5SDimitry Andric     __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
27220b57cec5SDimitry Andric     nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
2723fe6060f1SDimitry Andric     return true;
27240b57cec5SDimitry Andric   }
2725fe6060f1SDimitry Andric 
2726fe6060f1SDimitry Andric   // Allocate the data structure to be returned.
2727fe6060f1SDimitry Andric   int depth = levels_index;
2728fe6060f1SDimitry Andric   for (int i = depth - 1, j = 0; i >= 0; --i, ++j)
2729fe6060f1SDimitry Andric     types[j] = __kmp_intel_type_2_topology_type(levels[i].level_type);
2730fe6060f1SDimitry Andric   __kmp_topology =
2731fe6060f1SDimitry Andric       kmp_topology_t::allocate(__kmp_avail_proc, levels_index, types);
2732fe6060f1SDimitry Andric 
2733fe6060f1SDimitry Andric   // Insert equivalent cache types if they exist
2734fe6060f1SDimitry Andric   kmp_cache_info_t cache_info;
2735fe6060f1SDimitry Andric   for (size_t i = 0; i < cache_info.get_depth(); ++i) {
2736fe6060f1SDimitry Andric     const kmp_cache_info_t::info_t &info = cache_info[i];
2737fe6060f1SDimitry Andric     unsigned cache_mask = info.mask;
2738fe6060f1SDimitry Andric     unsigned cache_level = info.level;
2739fe6060f1SDimitry Andric     for (unsigned j = 0; j < levels_index; ++j) {
2740fe6060f1SDimitry Andric       unsigned hw_cache_mask = levels[j].cache_mask;
2741fe6060f1SDimitry Andric       kmp_hw_t cache_type = kmp_cache_info_t::get_topology_type(cache_level);
2742fe6060f1SDimitry Andric       if (hw_cache_mask == cache_mask && j < levels_index - 1) {
2743fe6060f1SDimitry Andric         kmp_hw_t type =
2744fe6060f1SDimitry Andric             __kmp_intel_type_2_topology_type(levels[j + 1].level_type);
2745fe6060f1SDimitry Andric         __kmp_topology->set_equivalent_type(cache_type, type);
27460b57cec5SDimitry Andric       }
2747fe6060f1SDimitry Andric     }
27480b57cec5SDimitry Andric   }
27490b57cec5SDimitry Andric 
27500b57cec5SDimitry Andric   // From here on, we can assume that it is safe to call
27510b57cec5SDimitry Andric   // __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
2752bdd1243dSDimitry Andric   // __kmp_affinity.type = affinity_none.
27530b57cec5SDimitry Andric 
27540b57cec5SDimitry Andric   // Save the affinity mask for the current thread.
2755fe6060f1SDimitry Andric   kmp_affinity_raii_t previous_affinity;
27560b57cec5SDimitry Andric 
27570b57cec5SDimitry Andric   // Run through each of the available contexts, binding the current thread
27580b57cec5SDimitry Andric   // to it, and obtaining the pertinent information using the cpuid instr.
27590b57cec5SDimitry Andric   unsigned int proc;
2760fe6060f1SDimitry Andric   int hw_thread_index = 0;
27610b57cec5SDimitry Andric   KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
2762fe6060f1SDimitry Andric     cpuid_level_info_t my_levels[INTEL_LEVEL_TYPE_LAST];
2763fe6060f1SDimitry Andric     unsigned my_levels_index;
2764fe6060f1SDimitry Andric 
27650b57cec5SDimitry Andric     // Skip this proc if it is not included in the machine model.
27660b57cec5SDimitry Andric     if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
27670b57cec5SDimitry Andric       continue;
27680b57cec5SDimitry Andric     }
2769fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(hw_thread_index < __kmp_avail_proc);
27700b57cec5SDimitry Andric 
27710b57cec5SDimitry Andric     __kmp_affinity_dispatch->bind_thread(proc);
27720b57cec5SDimitry Andric 
2773fe6060f1SDimitry Andric     // New algorithm
2774fe6060f1SDimitry Andric     __kmp_x86_cpuid(topology_leaf, 0, &buf);
2775fe6060f1SDimitry Andric     apic_id = buf.edx;
2776fe6060f1SDimitry Andric     kmp_hw_thread_t &hw_thread = __kmp_topology->at(hw_thread_index);
2777fe6060f1SDimitry Andric     my_levels_index =
2778fe6060f1SDimitry Andric         __kmp_x2apicid_get_levels(topology_leaf, my_levels, known_levels);
2779fe6060f1SDimitry Andric     if (my_levels_index == 0 || my_levels_index != levels_index) {
2780fe6060f1SDimitry Andric       *msg_id = kmp_i18n_str_InvalidCpuidInfo;
2781fe6060f1SDimitry Andric       return false;
27820b57cec5SDimitry Andric     }
2783fe6060f1SDimitry Andric     hw_thread.clear();
2784fe6060f1SDimitry Andric     hw_thread.os_id = proc;
2785fe6060f1SDimitry Andric     // Put in topology information
2786fe6060f1SDimitry Andric     for (unsigned j = 0, idx = depth - 1; j < my_levels_index; ++j, --idx) {
2787fe6060f1SDimitry Andric       hw_thread.ids[idx] = apic_id & my_levels[j].mask;
2788fe6060f1SDimitry Andric       if (j > 0) {
2789fe6060f1SDimitry Andric         hw_thread.ids[idx] >>= my_levels[j - 1].mask_width;
27900b57cec5SDimitry Andric       }
27910b57cec5SDimitry Andric     }
2792349cc55cSDimitry Andric     // Hybrid information
2793349cc55cSDimitry Andric     if (__kmp_is_hybrid_cpu() && highest_leaf >= 0x1a) {
2794349cc55cSDimitry Andric       kmp_hw_core_type_t type;
2795349cc55cSDimitry Andric       unsigned native_model_id;
27960eae32dcSDimitry Andric       int efficiency;
27970eae32dcSDimitry Andric       __kmp_get_hybrid_info(&type, &efficiency, &native_model_id);
27980eae32dcSDimitry Andric       hw_thread.attrs.set_core_type(type);
27990eae32dcSDimitry Andric       hw_thread.attrs.set_core_eff(efficiency);
2800349cc55cSDimitry Andric     }
2801fe6060f1SDimitry Andric     hw_thread_index++;
28020b57cec5SDimitry Andric   }
2803fe6060f1SDimitry Andric   KMP_ASSERT(hw_thread_index > 0);
2804fe6060f1SDimitry Andric   __kmp_topology->sort_ids();
2805fe6060f1SDimitry Andric   if (!__kmp_topology->check_ids()) {
2806fe6060f1SDimitry Andric     kmp_topology_t::deallocate(__kmp_topology);
2807fe6060f1SDimitry Andric     __kmp_topology = nullptr;
28080b57cec5SDimitry Andric     *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
2809fe6060f1SDimitry Andric     return false;
28100b57cec5SDimitry Andric   }
2811fe6060f1SDimitry Andric   return true;
28120b57cec5SDimitry Andric }
28130b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
28140b57cec5SDimitry Andric 
28150b57cec5SDimitry Andric #define osIdIndex 0
28160b57cec5SDimitry Andric #define threadIdIndex 1
28170b57cec5SDimitry Andric #define coreIdIndex 2
28180b57cec5SDimitry Andric #define pkgIdIndex 3
28190b57cec5SDimitry Andric #define nodeIdIndex 4
28200b57cec5SDimitry Andric 
28210b57cec5SDimitry Andric typedef unsigned *ProcCpuInfo;
28220b57cec5SDimitry Andric static unsigned maxIndex = pkgIdIndex;
28230b57cec5SDimitry Andric 
__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void * a,const void * b)28240b57cec5SDimitry Andric static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a,
28250b57cec5SDimitry Andric                                                   const void *b) {
28260b57cec5SDimitry Andric   unsigned i;
28270b57cec5SDimitry Andric   const unsigned *aa = *(unsigned *const *)a;
28280b57cec5SDimitry Andric   const unsigned *bb = *(unsigned *const *)b;
28290b57cec5SDimitry Andric   for (i = maxIndex;; i--) {
28300b57cec5SDimitry Andric     if (aa[i] < bb[i])
28310b57cec5SDimitry Andric       return -1;
28320b57cec5SDimitry Andric     if (aa[i] > bb[i])
28330b57cec5SDimitry Andric       return 1;
28340b57cec5SDimitry Andric     if (i == osIdIndex)
28350b57cec5SDimitry Andric       break;
28360b57cec5SDimitry Andric   }
28370b57cec5SDimitry Andric   return 0;
28380b57cec5SDimitry Andric }
28390b57cec5SDimitry Andric 
28400b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
28410b57cec5SDimitry Andric // Set the array sizes for the hierarchy layers
__kmp_dispatch_set_hierarchy_values()28420b57cec5SDimitry Andric static void __kmp_dispatch_set_hierarchy_values() {
28430b57cec5SDimitry Andric   // Set the maximum number of L1's to number of cores
28445f757f3fSDimitry Andric   // Set the maximum number of L2's to either number of cores / 2 for
28450b57cec5SDimitry Andric   // Intel(R) Xeon Phi(TM) coprocessor formally codenamed Knights Landing
28460b57cec5SDimitry Andric   // Or the number of cores for Intel(R) Xeon(R) processors
28470b57cec5SDimitry Andric   // Set the maximum number of NUMA nodes and L3's to number of packages
28480b57cec5SDimitry Andric   __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] =
28490b57cec5SDimitry Andric       nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
28500b57cec5SDimitry Andric   __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores;
2851*0fca6ea1SDimitry Andric #if KMP_ARCH_X86_64 &&                                                         \
2852*0fca6ea1SDimitry Andric     (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY ||    \
2853*0fca6ea1SDimitry Andric      KMP_OS_WINDOWS) &&                                                        \
28545ffd83dbSDimitry Andric     KMP_MIC_SUPPORTED
28550b57cec5SDimitry Andric   if (__kmp_mic_type >= mic3)
28560b57cec5SDimitry Andric     __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2;
28570b57cec5SDimitry Andric   else
28580b57cec5SDimitry Andric #endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
28590b57cec5SDimitry Andric     __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores;
28600b57cec5SDimitry Andric   __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L3 + 1] = nPackages;
28610b57cec5SDimitry Andric   __kmp_hier_max_units[kmp_hier_layer_e::LAYER_NUMA + 1] = nPackages;
28620b57cec5SDimitry Andric   __kmp_hier_max_units[kmp_hier_layer_e::LAYER_LOOP + 1] = 1;
28630b57cec5SDimitry Andric   // Set the number of threads per unit
28640b57cec5SDimitry Andric   // Number of hardware threads per L1/L2/L3/NUMA/LOOP
28650b57cec5SDimitry Andric   __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1;
28660b57cec5SDimitry Andric   __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] =
28670b57cec5SDimitry Andric       __kmp_nThreadsPerCore;
2868*0fca6ea1SDimitry Andric #if KMP_ARCH_X86_64 &&                                                         \
2869*0fca6ea1SDimitry Andric     (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY ||    \
2870*0fca6ea1SDimitry Andric      KMP_OS_WINDOWS) &&                                                        \
28715ffd83dbSDimitry Andric     KMP_MIC_SUPPORTED
28720b57cec5SDimitry Andric   if (__kmp_mic_type >= mic3)
28730b57cec5SDimitry Andric     __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
28740b57cec5SDimitry Andric         2 * __kmp_nThreadsPerCore;
28750b57cec5SDimitry Andric   else
28760b57cec5SDimitry Andric #endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
28770b57cec5SDimitry Andric     __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
28780b57cec5SDimitry Andric         __kmp_nThreadsPerCore;
28790b57cec5SDimitry Andric   __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L3 + 1] =
28800b57cec5SDimitry Andric       nCoresPerPkg * __kmp_nThreadsPerCore;
28810b57cec5SDimitry Andric   __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_NUMA + 1] =
28820b57cec5SDimitry Andric       nCoresPerPkg * __kmp_nThreadsPerCore;
28830b57cec5SDimitry Andric   __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LOOP + 1] =
28840b57cec5SDimitry Andric       nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
28850b57cec5SDimitry Andric }
28860b57cec5SDimitry Andric 
28870b57cec5SDimitry Andric // Return the index into the hierarchy for this tid and layer type (L1, L2, etc)
28880b57cec5SDimitry Andric // i.e., this thread's L1 or this thread's L2, etc.
__kmp_dispatch_get_index(int tid,kmp_hier_layer_e type)28890b57cec5SDimitry Andric int __kmp_dispatch_get_index(int tid, kmp_hier_layer_e type) {
28900b57cec5SDimitry Andric   int index = type + 1;
28910b57cec5SDimitry Andric   int num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1];
28920b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(type != kmp_hier_layer_e::LAYER_LAST);
28930b57cec5SDimitry Andric   if (type == kmp_hier_layer_e::LAYER_THREAD)
28940b57cec5SDimitry Andric     return tid;
28950b57cec5SDimitry Andric   else if (type == kmp_hier_layer_e::LAYER_LOOP)
28960b57cec5SDimitry Andric     return 0;
28970b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_hier_max_units[index] != 0);
28980b57cec5SDimitry Andric   if (tid >= num_hw_threads)
28990b57cec5SDimitry Andric     tid = tid % num_hw_threads;
29000b57cec5SDimitry Andric   return (tid / __kmp_hier_threads_per[index]) % __kmp_hier_max_units[index];
29010b57cec5SDimitry Andric }
29020b57cec5SDimitry Andric 
29030b57cec5SDimitry Andric // Return the number of t1's per t2
__kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1,kmp_hier_layer_e t2)29040b57cec5SDimitry Andric int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1, kmp_hier_layer_e t2) {
29050b57cec5SDimitry Andric   int i1 = t1 + 1;
29060b57cec5SDimitry Andric   int i2 = t2 + 1;
29070b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(i1 <= i2);
29080b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(t1 != kmp_hier_layer_e::LAYER_LAST);
29090b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(t2 != kmp_hier_layer_e::LAYER_LAST);
29100b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_hier_threads_per[i1] != 0);
29110b57cec5SDimitry Andric   // (nthreads/t2) / (nthreads/t1) = t1 / t2
29120b57cec5SDimitry Andric   return __kmp_hier_threads_per[i2] / __kmp_hier_threads_per[i1];
29130b57cec5SDimitry Andric }
29140b57cec5SDimitry Andric #endif // KMP_USE_HIER_SCHED
29150b57cec5SDimitry Andric 
__kmp_cpuinfo_get_filename()2916fe6060f1SDimitry Andric static inline const char *__kmp_cpuinfo_get_filename() {
2917fe6060f1SDimitry Andric   const char *filename;
2918fe6060f1SDimitry Andric   if (__kmp_cpuinfo_file != nullptr)
2919fe6060f1SDimitry Andric     filename = __kmp_cpuinfo_file;
2920fe6060f1SDimitry Andric   else
2921fe6060f1SDimitry Andric     filename = "/proc/cpuinfo";
2922fe6060f1SDimitry Andric   return filename;
2923fe6060f1SDimitry Andric }
2924fe6060f1SDimitry Andric 
__kmp_cpuinfo_get_envvar()2925fe6060f1SDimitry Andric static inline const char *__kmp_cpuinfo_get_envvar() {
2926fe6060f1SDimitry Andric   const char *envvar = nullptr;
2927fe6060f1SDimitry Andric   if (__kmp_cpuinfo_file != nullptr)
2928fe6060f1SDimitry Andric     envvar = "KMP_CPUINFO_FILE";
2929fe6060f1SDimitry Andric   return envvar;
2930fe6060f1SDimitry Andric }
2931fe6060f1SDimitry Andric 
29320b57cec5SDimitry Andric // Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
2933439352acSDimitry Andric // affinity map. On AIX, the map is obtained through system SRAD (Scheduler
2934439352acSDimitry Andric // Resource Allocation Domain).
__kmp_affinity_create_cpuinfo_map(int * line,kmp_i18n_id_t * const msg_id)2935fe6060f1SDimitry Andric static bool __kmp_affinity_create_cpuinfo_map(int *line,
2936fe6060f1SDimitry Andric                                               kmp_i18n_id_t *const msg_id) {
2937439352acSDimitry Andric   *msg_id = kmp_i18n_null;
2938439352acSDimitry Andric 
2939439352acSDimitry Andric #if KMP_OS_AIX
2940439352acSDimitry Andric   unsigned num_records = __kmp_xproc;
2941439352acSDimitry Andric #else
2942fe6060f1SDimitry Andric   const char *filename = __kmp_cpuinfo_get_filename();
2943fe6060f1SDimitry Andric   const char *envvar = __kmp_cpuinfo_get_envvar();
29440b57cec5SDimitry Andric 
2945bdd1243dSDimitry Andric   if (__kmp_affinity.flags.verbose) {
2946fe6060f1SDimitry Andric     KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
2947fe6060f1SDimitry Andric   }
2948fe6060f1SDimitry Andric 
2949fe6060f1SDimitry Andric   kmp_safe_raii_file_t f(filename, "r", envvar);
2950fe6060f1SDimitry Andric 
29510b57cec5SDimitry Andric   // Scan of the file, and count the number of "processor" (osId) fields,
29520b57cec5SDimitry Andric   // and find the highest value of <n> for a node_<n> field.
29530b57cec5SDimitry Andric   char buf[256];
29540b57cec5SDimitry Andric   unsigned num_records = 0;
29550b57cec5SDimitry Andric   while (!feof(f)) {
29560b57cec5SDimitry Andric     buf[sizeof(buf) - 1] = 1;
29570b57cec5SDimitry Andric     if (!fgets(buf, sizeof(buf), f)) {
29580b57cec5SDimitry Andric       // Read errors presumably because of EOF
29590b57cec5SDimitry Andric       break;
29600b57cec5SDimitry Andric     }
29610b57cec5SDimitry Andric 
29620b57cec5SDimitry Andric     char s1[] = "processor";
29630b57cec5SDimitry Andric     if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
29640b57cec5SDimitry Andric       num_records++;
29650b57cec5SDimitry Andric       continue;
29660b57cec5SDimitry Andric     }
29670b57cec5SDimitry Andric 
29680b57cec5SDimitry Andric     // FIXME - this will match "node_<n> <garbage>"
29690b57cec5SDimitry Andric     unsigned level;
29700b57cec5SDimitry Andric     if (KMP_SSCANF(buf, "node_%u id", &level) == 1) {
2971fe6060f1SDimitry Andric       // validate the input fisrt:
2972fe6060f1SDimitry Andric       if (level > (unsigned)__kmp_xproc) { // level is too big
2973fe6060f1SDimitry Andric         level = __kmp_xproc;
2974fe6060f1SDimitry Andric       }
29750b57cec5SDimitry Andric       if (nodeIdIndex + level >= maxIndex) {
29760b57cec5SDimitry Andric         maxIndex = nodeIdIndex + level;
29770b57cec5SDimitry Andric       }
29780b57cec5SDimitry Andric       continue;
29790b57cec5SDimitry Andric     }
29800b57cec5SDimitry Andric   }
29810b57cec5SDimitry Andric 
29820b57cec5SDimitry Andric   // Check for empty file / no valid processor records, or too many. The number
29830b57cec5SDimitry Andric   // of records can't exceed the number of valid bits in the affinity mask.
29840b57cec5SDimitry Andric   if (num_records == 0) {
29850b57cec5SDimitry Andric     *msg_id = kmp_i18n_str_NoProcRecords;
2986fe6060f1SDimitry Andric     return false;
29870b57cec5SDimitry Andric   }
29880b57cec5SDimitry Andric   if (num_records > (unsigned)__kmp_xproc) {
29890b57cec5SDimitry Andric     *msg_id = kmp_i18n_str_TooManyProcRecords;
2990fe6060f1SDimitry Andric     return false;
29910b57cec5SDimitry Andric   }
29920b57cec5SDimitry Andric 
2993480093f4SDimitry Andric   // Set the file pointer back to the beginning, so that we can scan the file
29940b57cec5SDimitry Andric   // again, this time performing a full parse of the data. Allocate a vector of
29950b57cec5SDimitry Andric   // ProcCpuInfo object, where we will place the data. Adding an extra element
29960b57cec5SDimitry Andric   // at the end allows us to remove a lot of extra checks for termination
29970b57cec5SDimitry Andric   // conditions.
29980b57cec5SDimitry Andric   if (fseek(f, 0, SEEK_SET) != 0) {
29990b57cec5SDimitry Andric     *msg_id = kmp_i18n_str_CantRewindCpuinfo;
3000fe6060f1SDimitry Andric     return false;
30010b57cec5SDimitry Andric   }
3002439352acSDimitry Andric #endif // KMP_OS_AIX
30030b57cec5SDimitry Andric 
30040b57cec5SDimitry Andric   // Allocate the array of records to store the proc info in.  The dummy
30050b57cec5SDimitry Andric   // element at the end makes the logic in filling them out easier to code.
30060b57cec5SDimitry Andric   unsigned **threadInfo =
30070b57cec5SDimitry Andric       (unsigned **)__kmp_allocate((num_records + 1) * sizeof(unsigned *));
30080b57cec5SDimitry Andric   unsigned i;
30090b57cec5SDimitry Andric   for (i = 0; i <= num_records; i++) {
30100b57cec5SDimitry Andric     threadInfo[i] =
30110b57cec5SDimitry Andric         (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
30120b57cec5SDimitry Andric   }
30130b57cec5SDimitry Andric 
30140b57cec5SDimitry Andric #define CLEANUP_THREAD_INFO                                                    \
30150b57cec5SDimitry Andric   for (i = 0; i <= num_records; i++) {                                         \
30160b57cec5SDimitry Andric     __kmp_free(threadInfo[i]);                                                 \
30170b57cec5SDimitry Andric   }                                                                            \
30180b57cec5SDimitry Andric   __kmp_free(threadInfo);
30190b57cec5SDimitry Andric 
30200b57cec5SDimitry Andric   // A value of UINT_MAX means that we didn't find the field
30210b57cec5SDimitry Andric   unsigned __index;
30220b57cec5SDimitry Andric 
30230b57cec5SDimitry Andric #define INIT_PROC_INFO(p)                                                      \
30240b57cec5SDimitry Andric   for (__index = 0; __index <= maxIndex; __index++) {                          \
30250b57cec5SDimitry Andric     (p)[__index] = UINT_MAX;                                                   \
30260b57cec5SDimitry Andric   }
30270b57cec5SDimitry Andric 
30280b57cec5SDimitry Andric   for (i = 0; i <= num_records; i++) {
30290b57cec5SDimitry Andric     INIT_PROC_INFO(threadInfo[i]);
30300b57cec5SDimitry Andric   }
30310b57cec5SDimitry Andric 
3032439352acSDimitry Andric #if KMP_OS_AIX
3033439352acSDimitry Andric   int smt_threads;
3034439352acSDimitry Andric   lpar_info_format1_t cpuinfo;
3035439352acSDimitry Andric   unsigned num_avail = __kmp_xproc;
3036439352acSDimitry Andric 
3037439352acSDimitry Andric   if (__kmp_affinity.flags.verbose)
3038439352acSDimitry Andric     KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "system info for topology");
3039439352acSDimitry Andric 
3040439352acSDimitry Andric   // Get the number of SMT threads per core.
3041*0fca6ea1SDimitry Andric   smt_threads = syssmt(GET_NUMBER_SMT_SETS, 0, 0, NULL);
3042439352acSDimitry Andric 
3043439352acSDimitry Andric   // Allocate a resource set containing available system resourses.
3044439352acSDimitry Andric   rsethandle_t sys_rset = rs_alloc(RS_SYSTEM);
3045439352acSDimitry Andric   if (sys_rset == NULL) {
3046439352acSDimitry Andric     CLEANUP_THREAD_INFO;
3047439352acSDimitry Andric     *msg_id = kmp_i18n_str_UnknownTopology;
3048439352acSDimitry Andric     return false;
3049439352acSDimitry Andric   }
3050439352acSDimitry Andric   // Allocate a resource set for the SRAD info.
3051439352acSDimitry Andric   rsethandle_t srad = rs_alloc(RS_EMPTY);
3052439352acSDimitry Andric   if (srad == NULL) {
3053439352acSDimitry Andric     rs_free(sys_rset);
3054439352acSDimitry Andric     CLEANUP_THREAD_INFO;
3055439352acSDimitry Andric     *msg_id = kmp_i18n_str_UnknownTopology;
3056439352acSDimitry Andric     return false;
3057439352acSDimitry Andric   }
3058439352acSDimitry Andric 
3059439352acSDimitry Andric   // Get the SRAD system detail level.
3060439352acSDimitry Andric   int sradsdl = rs_getinfo(NULL, R_SRADSDL, 0);
3061439352acSDimitry Andric   if (sradsdl < 0) {
3062439352acSDimitry Andric     rs_free(sys_rset);
3063439352acSDimitry Andric     rs_free(srad);
3064439352acSDimitry Andric     CLEANUP_THREAD_INFO;
3065439352acSDimitry Andric     *msg_id = kmp_i18n_str_UnknownTopology;
3066439352acSDimitry Andric     return false;
3067439352acSDimitry Andric   }
3068439352acSDimitry Andric   // Get the number of RADs at that SRAD SDL.
3069439352acSDimitry Andric   int num_rads = rs_numrads(sys_rset, sradsdl, 0);
3070439352acSDimitry Andric   if (num_rads < 0) {
3071439352acSDimitry Andric     rs_free(sys_rset);
3072439352acSDimitry Andric     rs_free(srad);
3073439352acSDimitry Andric     CLEANUP_THREAD_INFO;
3074439352acSDimitry Andric     *msg_id = kmp_i18n_str_UnknownTopology;
3075439352acSDimitry Andric     return false;
3076439352acSDimitry Andric   }
3077439352acSDimitry Andric 
3078439352acSDimitry Andric   // Get the maximum number of procs that may be contained in a resource set.
3079439352acSDimitry Andric   int max_procs = rs_getinfo(NULL, R_MAXPROCS, 0);
3080439352acSDimitry Andric   if (max_procs < 0) {
3081439352acSDimitry Andric     rs_free(sys_rset);
3082439352acSDimitry Andric     rs_free(srad);
3083439352acSDimitry Andric     CLEANUP_THREAD_INFO;
3084439352acSDimitry Andric     *msg_id = kmp_i18n_str_UnknownTopology;
3085439352acSDimitry Andric     return false;
3086439352acSDimitry Andric   }
3087439352acSDimitry Andric 
3088439352acSDimitry Andric   int cur_rad = 0;
3089439352acSDimitry Andric   int num_set = 0;
3090439352acSDimitry Andric   for (int srad_idx = 0; cur_rad < num_rads && srad_idx < VMI_MAXRADS;
3091439352acSDimitry Andric        ++srad_idx) {
3092439352acSDimitry Andric     // Check if the SRAD is available in the RSET.
3093439352acSDimitry Andric     if (rs_getrad(sys_rset, srad, sradsdl, srad_idx, 0) < 0)
3094439352acSDimitry Andric       continue;
3095439352acSDimitry Andric 
3096439352acSDimitry Andric     for (int cpu = 0; cpu < max_procs; cpu++) {
3097439352acSDimitry Andric       // Set the info for the cpu if it is in the SRAD.
3098439352acSDimitry Andric       if (rs_op(RS_TESTRESOURCE, srad, NULL, R_PROCS, cpu)) {
3099439352acSDimitry Andric         threadInfo[cpu][osIdIndex] = cpu;
3100439352acSDimitry Andric         threadInfo[cpu][pkgIdIndex] = cur_rad;
3101439352acSDimitry Andric         threadInfo[cpu][coreIdIndex] = cpu / smt_threads;
3102439352acSDimitry Andric         ++num_set;
3103439352acSDimitry Andric         if (num_set >= num_avail) {
3104439352acSDimitry Andric           // Done if all available CPUs have been set.
3105439352acSDimitry Andric           break;
3106439352acSDimitry Andric         }
3107439352acSDimitry Andric       }
3108439352acSDimitry Andric     }
3109439352acSDimitry Andric     ++cur_rad;
3110439352acSDimitry Andric   }
3111439352acSDimitry Andric   rs_free(sys_rset);
3112439352acSDimitry Andric   rs_free(srad);
3113439352acSDimitry Andric 
3114439352acSDimitry Andric   // The topology is already sorted.
3115439352acSDimitry Andric 
3116439352acSDimitry Andric #else // !KMP_OS_AIX
31170b57cec5SDimitry Andric   unsigned num_avail = 0;
31180b57cec5SDimitry Andric   *line = 0;
31195f757f3fSDimitry Andric #if KMP_ARCH_S390X
31205f757f3fSDimitry Andric   bool reading_s390x_sys_info = true;
31215f757f3fSDimitry Andric #endif
31220b57cec5SDimitry Andric   while (!feof(f)) {
31230b57cec5SDimitry Andric     // Create an inner scoping level, so that all the goto targets at the end of
31240b57cec5SDimitry Andric     // the loop appear in an outer scoping level. This avoids warnings about
31250b57cec5SDimitry Andric     // jumping past an initialization to a target in the same block.
31260b57cec5SDimitry Andric     {
31270b57cec5SDimitry Andric       buf[sizeof(buf) - 1] = 1;
31280b57cec5SDimitry Andric       bool long_line = false;
31290b57cec5SDimitry Andric       if (!fgets(buf, sizeof(buf), f)) {
31300b57cec5SDimitry Andric         // Read errors presumably because of EOF
31310b57cec5SDimitry Andric         // If there is valid data in threadInfo[num_avail], then fake
31320b57cec5SDimitry Andric         // a blank line in ensure that the last address gets parsed.
31330b57cec5SDimitry Andric         bool valid = false;
31340b57cec5SDimitry Andric         for (i = 0; i <= maxIndex; i++) {
31350b57cec5SDimitry Andric           if (threadInfo[num_avail][i] != UINT_MAX) {
31360b57cec5SDimitry Andric             valid = true;
31370b57cec5SDimitry Andric           }
31380b57cec5SDimitry Andric         }
31390b57cec5SDimitry Andric         if (!valid) {
31400b57cec5SDimitry Andric           break;
31410b57cec5SDimitry Andric         }
31420b57cec5SDimitry Andric         buf[0] = 0;
31430b57cec5SDimitry Andric       } else if (!buf[sizeof(buf) - 1]) {
31440b57cec5SDimitry Andric         // The line is longer than the buffer.  Set a flag and don't
31450b57cec5SDimitry Andric         // emit an error if we were going to ignore the line, anyway.
31460b57cec5SDimitry Andric         long_line = true;
31470b57cec5SDimitry Andric 
31480b57cec5SDimitry Andric #define CHECK_LINE                                                             \
31490b57cec5SDimitry Andric   if (long_line) {                                                             \
31500b57cec5SDimitry Andric     CLEANUP_THREAD_INFO;                                                       \
31510b57cec5SDimitry Andric     *msg_id = kmp_i18n_str_LongLineCpuinfo;                                    \
3152fe6060f1SDimitry Andric     return false;                                                              \
31530b57cec5SDimitry Andric   }
31540b57cec5SDimitry Andric       }
31550b57cec5SDimitry Andric       (*line)++;
31560b57cec5SDimitry Andric 
3157bdd1243dSDimitry Andric #if KMP_ARCH_LOONGARCH64
3158bdd1243dSDimitry Andric       // The parsing logic of /proc/cpuinfo in this function highly depends on
3159bdd1243dSDimitry Andric       // the blank lines between each processor info block. But on LoongArch a
3160bdd1243dSDimitry Andric       // blank line exists before the first processor info block (i.e. after the
3161bdd1243dSDimitry Andric       // "system type" line). This blank line was added because the "system
3162bdd1243dSDimitry Andric       // type" line is unrelated to any of the CPUs. We must skip this line so
3163bdd1243dSDimitry Andric       // that the original logic works on LoongArch.
3164bdd1243dSDimitry Andric       if (*buf == '\n' && *line == 2)
3165bdd1243dSDimitry Andric         continue;
3166bdd1243dSDimitry Andric #endif
31675f757f3fSDimitry Andric #if KMP_ARCH_S390X
31685f757f3fSDimitry Andric       // s390x /proc/cpuinfo starts with a variable number of lines containing
31695f757f3fSDimitry Andric       // the overall system information. Skip them.
31705f757f3fSDimitry Andric       if (reading_s390x_sys_info) {
31715f757f3fSDimitry Andric         if (*buf == '\n')
31725f757f3fSDimitry Andric           reading_s390x_sys_info = false;
31735f757f3fSDimitry Andric         continue;
31745f757f3fSDimitry Andric       }
31755f757f3fSDimitry Andric #endif
3176bdd1243dSDimitry Andric 
31775f757f3fSDimitry Andric #if KMP_ARCH_S390X
31785f757f3fSDimitry Andric       char s1[] = "cpu number";
31795f757f3fSDimitry Andric #else
31800b57cec5SDimitry Andric       char s1[] = "processor";
31815f757f3fSDimitry Andric #endif
31820b57cec5SDimitry Andric       if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
31830b57cec5SDimitry Andric         CHECK_LINE;
31840b57cec5SDimitry Andric         char *p = strchr(buf + sizeof(s1) - 1, ':');
31850b57cec5SDimitry Andric         unsigned val;
31860b57cec5SDimitry Andric         if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
31870b57cec5SDimitry Andric           goto no_val;
31880b57cec5SDimitry Andric         if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
31890b57cec5SDimitry Andric #if KMP_ARCH_AARCH64
31900b57cec5SDimitry Andric           // Handle the old AArch64 /proc/cpuinfo layout differently,
31910b57cec5SDimitry Andric           // it contains all of the 'processor' entries listed in a
31920b57cec5SDimitry Andric           // single 'Processor' section, therefore the normal looking
31930b57cec5SDimitry Andric           // for duplicates in that section will always fail.
31940b57cec5SDimitry Andric           num_avail++;
31950b57cec5SDimitry Andric #else
31960b57cec5SDimitry Andric           goto dup_field;
31970b57cec5SDimitry Andric #endif
31980b57cec5SDimitry Andric         threadInfo[num_avail][osIdIndex] = val;
31990b57cec5SDimitry Andric #if KMP_OS_LINUX && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
32000b57cec5SDimitry Andric         char path[256];
32010b57cec5SDimitry Andric         KMP_SNPRINTF(
32020b57cec5SDimitry Andric             path, sizeof(path),
32030b57cec5SDimitry Andric             "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
32040b57cec5SDimitry Andric             threadInfo[num_avail][osIdIndex]);
32050b57cec5SDimitry Andric         __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
32060b57cec5SDimitry Andric 
32075f757f3fSDimitry Andric #if KMP_ARCH_S390X
32085f757f3fSDimitry Andric         // Disambiguate physical_package_id.
32095f757f3fSDimitry Andric         unsigned book_id;
32105f757f3fSDimitry Andric         KMP_SNPRINTF(path, sizeof(path),
32115f757f3fSDimitry Andric                      "/sys/devices/system/cpu/cpu%u/topology/book_id",
32125f757f3fSDimitry Andric                      threadInfo[num_avail][osIdIndex]);
32135f757f3fSDimitry Andric         __kmp_read_from_file(path, "%u", &book_id);
32145f757f3fSDimitry Andric         threadInfo[num_avail][pkgIdIndex] |= (book_id << 8);
32155f757f3fSDimitry Andric 
32165f757f3fSDimitry Andric         unsigned drawer_id;
32175f757f3fSDimitry Andric         KMP_SNPRINTF(path, sizeof(path),
32185f757f3fSDimitry Andric                      "/sys/devices/system/cpu/cpu%u/topology/drawer_id",
32195f757f3fSDimitry Andric                      threadInfo[num_avail][osIdIndex]);
32205f757f3fSDimitry Andric         __kmp_read_from_file(path, "%u", &drawer_id);
32215f757f3fSDimitry Andric         threadInfo[num_avail][pkgIdIndex] |= (drawer_id << 16);
32225f757f3fSDimitry Andric #endif
32235f757f3fSDimitry Andric 
32240b57cec5SDimitry Andric         KMP_SNPRINTF(path, sizeof(path),
32250b57cec5SDimitry Andric                      "/sys/devices/system/cpu/cpu%u/topology/core_id",
32260b57cec5SDimitry Andric                      threadInfo[num_avail][osIdIndex]);
32270b57cec5SDimitry Andric         __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
32280b57cec5SDimitry Andric         continue;
32290b57cec5SDimitry Andric #else
32300b57cec5SDimitry Andric       }
32310b57cec5SDimitry Andric       char s2[] = "physical id";
32320b57cec5SDimitry Andric       if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
32330b57cec5SDimitry Andric         CHECK_LINE;
32340b57cec5SDimitry Andric         char *p = strchr(buf + sizeof(s2) - 1, ':');
32350b57cec5SDimitry Andric         unsigned val;
32360b57cec5SDimitry Andric         if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
32370b57cec5SDimitry Andric           goto no_val;
32380b57cec5SDimitry Andric         if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
32390b57cec5SDimitry Andric           goto dup_field;
32400b57cec5SDimitry Andric         threadInfo[num_avail][pkgIdIndex] = val;
32410b57cec5SDimitry Andric         continue;
32420b57cec5SDimitry Andric       }
32430b57cec5SDimitry Andric       char s3[] = "core id";
32440b57cec5SDimitry Andric       if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
32450b57cec5SDimitry Andric         CHECK_LINE;
32460b57cec5SDimitry Andric         char *p = strchr(buf + sizeof(s3) - 1, ':');
32470b57cec5SDimitry Andric         unsigned val;
32480b57cec5SDimitry Andric         if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
32490b57cec5SDimitry Andric           goto no_val;
32500b57cec5SDimitry Andric         if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
32510b57cec5SDimitry Andric           goto dup_field;
32520b57cec5SDimitry Andric         threadInfo[num_avail][coreIdIndex] = val;
32530b57cec5SDimitry Andric         continue;
32540b57cec5SDimitry Andric #endif // KMP_OS_LINUX && USE_SYSFS_INFO
32550b57cec5SDimitry Andric       }
32560b57cec5SDimitry Andric       char s4[] = "thread id";
32570b57cec5SDimitry Andric       if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
32580b57cec5SDimitry Andric         CHECK_LINE;
32590b57cec5SDimitry Andric         char *p = strchr(buf + sizeof(s4) - 1, ':');
32600b57cec5SDimitry Andric         unsigned val;
32610b57cec5SDimitry Andric         if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
32620b57cec5SDimitry Andric           goto no_val;
32630b57cec5SDimitry Andric         if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
32640b57cec5SDimitry Andric           goto dup_field;
32650b57cec5SDimitry Andric         threadInfo[num_avail][threadIdIndex] = val;
32660b57cec5SDimitry Andric         continue;
32670b57cec5SDimitry Andric       }
32680b57cec5SDimitry Andric       unsigned level;
32690b57cec5SDimitry Andric       if (KMP_SSCANF(buf, "node_%u id", &level) == 1) {
32700b57cec5SDimitry Andric         CHECK_LINE;
32710b57cec5SDimitry Andric         char *p = strchr(buf + sizeof(s4) - 1, ':');
32720b57cec5SDimitry Andric         unsigned val;
32730b57cec5SDimitry Andric         if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
32740b57cec5SDimitry Andric           goto no_val;
3275349cc55cSDimitry Andric         // validate the input before using level:
3276349cc55cSDimitry Andric         if (level > (unsigned)__kmp_xproc) { // level is too big
3277349cc55cSDimitry Andric           level = __kmp_xproc;
3278349cc55cSDimitry Andric         }
32790b57cec5SDimitry Andric         if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
32800b57cec5SDimitry Andric           goto dup_field;
32810b57cec5SDimitry Andric         threadInfo[num_avail][nodeIdIndex + level] = val;
32820b57cec5SDimitry Andric         continue;
32830b57cec5SDimitry Andric       }
32840b57cec5SDimitry Andric 
32850b57cec5SDimitry Andric       // We didn't recognize the leading token on the line. There are lots of
32860b57cec5SDimitry Andric       // leading tokens that we don't recognize - if the line isn't empty, go on
32870b57cec5SDimitry Andric       // to the next line.
32880b57cec5SDimitry Andric       if ((*buf != 0) && (*buf != '\n')) {
32890b57cec5SDimitry Andric         // If the line is longer than the buffer, read characters
32900b57cec5SDimitry Andric         // until we find a newline.
32910b57cec5SDimitry Andric         if (long_line) {
32920b57cec5SDimitry Andric           int ch;
32930b57cec5SDimitry Andric           while (((ch = fgetc(f)) != EOF) && (ch != '\n'))
32940b57cec5SDimitry Andric             ;
32950b57cec5SDimitry Andric         }
32960b57cec5SDimitry Andric         continue;
32970b57cec5SDimitry Andric       }
32980b57cec5SDimitry Andric 
32990b57cec5SDimitry Andric       // A newline has signalled the end of the processor record.
33000b57cec5SDimitry Andric       // Check that there aren't too many procs specified.
33010b57cec5SDimitry Andric       if ((int)num_avail == __kmp_xproc) {
33020b57cec5SDimitry Andric         CLEANUP_THREAD_INFO;
33030b57cec5SDimitry Andric         *msg_id = kmp_i18n_str_TooManyEntries;
3304fe6060f1SDimitry Andric         return false;
33050b57cec5SDimitry Andric       }
33060b57cec5SDimitry Andric 
33070b57cec5SDimitry Andric       // Check for missing fields.  The osId field must be there, and we
33080b57cec5SDimitry Andric       // currently require that the physical id field is specified, also.
33090b57cec5SDimitry Andric       if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
33100b57cec5SDimitry Andric         CLEANUP_THREAD_INFO;
33110b57cec5SDimitry Andric         *msg_id = kmp_i18n_str_MissingProcField;
3312fe6060f1SDimitry Andric         return false;
33130b57cec5SDimitry Andric       }
33140b57cec5SDimitry Andric       if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
33150b57cec5SDimitry Andric         CLEANUP_THREAD_INFO;
33160b57cec5SDimitry Andric         *msg_id = kmp_i18n_str_MissingPhysicalIDField;
3317fe6060f1SDimitry Andric         return false;
33180b57cec5SDimitry Andric       }
33190b57cec5SDimitry Andric 
33200b57cec5SDimitry Andric       // Skip this proc if it is not included in the machine model.
3321bdd1243dSDimitry Andric       if (KMP_AFFINITY_CAPABLE() &&
3322bdd1243dSDimitry Andric           !KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],
33230b57cec5SDimitry Andric                          __kmp_affin_fullMask)) {
33240b57cec5SDimitry Andric         INIT_PROC_INFO(threadInfo[num_avail]);
33250b57cec5SDimitry Andric         continue;
33260b57cec5SDimitry Andric       }
33270b57cec5SDimitry Andric 
33280b57cec5SDimitry Andric       // We have a successful parse of this proc's info.
33290b57cec5SDimitry Andric       // Increment the counter, and prepare for the next proc.
33300b57cec5SDimitry Andric       num_avail++;
33310b57cec5SDimitry Andric       KMP_ASSERT(num_avail <= num_records);
33320b57cec5SDimitry Andric       INIT_PROC_INFO(threadInfo[num_avail]);
33330b57cec5SDimitry Andric     }
33340b57cec5SDimitry Andric     continue;
33350b57cec5SDimitry Andric 
33360b57cec5SDimitry Andric   no_val:
33370b57cec5SDimitry Andric     CLEANUP_THREAD_INFO;
33380b57cec5SDimitry Andric     *msg_id = kmp_i18n_str_MissingValCpuinfo;
3339fe6060f1SDimitry Andric     return false;
33400b57cec5SDimitry Andric 
33410b57cec5SDimitry Andric   dup_field:
33420b57cec5SDimitry Andric     CLEANUP_THREAD_INFO;
33430b57cec5SDimitry Andric     *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
3344fe6060f1SDimitry Andric     return false;
33450b57cec5SDimitry Andric   }
33460b57cec5SDimitry Andric   *line = 0;
33470b57cec5SDimitry Andric 
33480b57cec5SDimitry Andric #if KMP_MIC && REDUCE_TEAM_SIZE
33490b57cec5SDimitry Andric   unsigned teamSize = 0;
33500b57cec5SDimitry Andric #endif // KMP_MIC && REDUCE_TEAM_SIZE
33510b57cec5SDimitry Andric 
33520b57cec5SDimitry Andric   // check for num_records == __kmp_xproc ???
33530b57cec5SDimitry Andric 
33540b57cec5SDimitry Andric   // If it is configured to omit the package level when there is only a single
33550b57cec5SDimitry Andric   // package, the logic at the end of this routine won't work if there is only a
3356fe6060f1SDimitry Andric   // single thread
33570b57cec5SDimitry Andric   KMP_ASSERT(num_avail > 0);
33580b57cec5SDimitry Andric   KMP_ASSERT(num_avail <= num_records);
33590b57cec5SDimitry Andric 
33600b57cec5SDimitry Andric   // Sort the threadInfo table by physical Id.
33610b57cec5SDimitry Andric   qsort(threadInfo, num_avail, sizeof(*threadInfo),
33620b57cec5SDimitry Andric         __kmp_affinity_cmp_ProcCpuInfo_phys_id);
33630b57cec5SDimitry Andric 
3364439352acSDimitry Andric #endif // KMP_OS_AIX
3365439352acSDimitry Andric 
33660b57cec5SDimitry Andric   // The table is now sorted by pkgId / coreId / threadId, but we really don't
33670b57cec5SDimitry Andric   // know the radix of any of the fields. pkgId's may be sparsely assigned among
33680b57cec5SDimitry Andric   // the chips on a system. Although coreId's are usually assigned
33690b57cec5SDimitry Andric   // [0 .. coresPerPkg-1] and threadId's are usually assigned
33700b57cec5SDimitry Andric   // [0..threadsPerCore-1], we don't want to make any such assumptions.
33710b57cec5SDimitry Andric   //
33720b57cec5SDimitry Andric   // For that matter, we don't know what coresPerPkg and threadsPerCore (or the
33730b57cec5SDimitry Andric   // total # packages) are at this point - we want to determine that now. We
33740b57cec5SDimitry Andric   // only have an upper bound on the first two figures.
33750b57cec5SDimitry Andric   unsigned *counts =
33760b57cec5SDimitry Andric       (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
33770b57cec5SDimitry Andric   unsigned *maxCt =
33780b57cec5SDimitry Andric       (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
33790b57cec5SDimitry Andric   unsigned *totals =
33800b57cec5SDimitry Andric       (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
33810b57cec5SDimitry Andric   unsigned *lastId =
33820b57cec5SDimitry Andric       (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
33830b57cec5SDimitry Andric 
33840b57cec5SDimitry Andric   bool assign_thread_ids = false;
33850b57cec5SDimitry Andric   unsigned threadIdCt;
33860b57cec5SDimitry Andric   unsigned index;
33870b57cec5SDimitry Andric 
33880b57cec5SDimitry Andric restart_radix_check:
33890b57cec5SDimitry Andric   threadIdCt = 0;
33900b57cec5SDimitry Andric 
33910b57cec5SDimitry Andric   // Initialize the counter arrays with data from threadInfo[0].
33920b57cec5SDimitry Andric   if (assign_thread_ids) {
33930b57cec5SDimitry Andric     if (threadInfo[0][threadIdIndex] == UINT_MAX) {
33940b57cec5SDimitry Andric       threadInfo[0][threadIdIndex] = threadIdCt++;
33950b57cec5SDimitry Andric     } else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
33960b57cec5SDimitry Andric       threadIdCt = threadInfo[0][threadIdIndex] + 1;
33970b57cec5SDimitry Andric     }
33980b57cec5SDimitry Andric   }
33990b57cec5SDimitry Andric   for (index = 0; index <= maxIndex; index++) {
34000b57cec5SDimitry Andric     counts[index] = 1;
34010b57cec5SDimitry Andric     maxCt[index] = 1;
34020b57cec5SDimitry Andric     totals[index] = 1;
34030b57cec5SDimitry Andric     lastId[index] = threadInfo[0][index];
34040b57cec5SDimitry Andric     ;
34050b57cec5SDimitry Andric   }
34060b57cec5SDimitry Andric 
34070b57cec5SDimitry Andric   // Run through the rest of the OS procs.
34080b57cec5SDimitry Andric   for (i = 1; i < num_avail; i++) {
34090b57cec5SDimitry Andric     // Find the most significant index whose id differs from the id for the
34100b57cec5SDimitry Andric     // previous OS proc.
34110b57cec5SDimitry Andric     for (index = maxIndex; index >= threadIdIndex; index--) {
34120b57cec5SDimitry Andric       if (assign_thread_ids && (index == threadIdIndex)) {
34130b57cec5SDimitry Andric         // Auto-assign the thread id field if it wasn't specified.
34140b57cec5SDimitry Andric         if (threadInfo[i][threadIdIndex] == UINT_MAX) {
34150b57cec5SDimitry Andric           threadInfo[i][threadIdIndex] = threadIdCt++;
34160b57cec5SDimitry Andric         }
34170b57cec5SDimitry Andric         // Apparently the thread id field was specified for some entries and not
34180b57cec5SDimitry Andric         // others. Start the thread id counter off at the next higher thread id.
34190b57cec5SDimitry Andric         else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
34200b57cec5SDimitry Andric           threadIdCt = threadInfo[i][threadIdIndex] + 1;
34210b57cec5SDimitry Andric         }
34220b57cec5SDimitry Andric       }
34230b57cec5SDimitry Andric       if (threadInfo[i][index] != lastId[index]) {
34240b57cec5SDimitry Andric         // Run through all indices which are less significant, and reset the
34250b57cec5SDimitry Andric         // counts to 1. At all levels up to and including index, we need to
34260b57cec5SDimitry Andric         // increment the totals and record the last id.
34270b57cec5SDimitry Andric         unsigned index2;
34280b57cec5SDimitry Andric         for (index2 = threadIdIndex; index2 < index; index2++) {
34290b57cec5SDimitry Andric           totals[index2]++;
34300b57cec5SDimitry Andric           if (counts[index2] > maxCt[index2]) {
34310b57cec5SDimitry Andric             maxCt[index2] = counts[index2];
34320b57cec5SDimitry Andric           }
34330b57cec5SDimitry Andric           counts[index2] = 1;
34340b57cec5SDimitry Andric           lastId[index2] = threadInfo[i][index2];
34350b57cec5SDimitry Andric         }
34360b57cec5SDimitry Andric         counts[index]++;
34370b57cec5SDimitry Andric         totals[index]++;
34380b57cec5SDimitry Andric         lastId[index] = threadInfo[i][index];
34390b57cec5SDimitry Andric 
34400b57cec5SDimitry Andric         if (assign_thread_ids && (index > threadIdIndex)) {
34410b57cec5SDimitry Andric 
34420b57cec5SDimitry Andric #if KMP_MIC && REDUCE_TEAM_SIZE
34430b57cec5SDimitry Andric           // The default team size is the total #threads in the machine
34440b57cec5SDimitry Andric           // minus 1 thread for every core that has 3 or more threads.
34450b57cec5SDimitry Andric           teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
34460b57cec5SDimitry Andric #endif // KMP_MIC && REDUCE_TEAM_SIZE
34470b57cec5SDimitry Andric 
34480b57cec5SDimitry Andric           // Restart the thread counter, as we are on a new core.
34490b57cec5SDimitry Andric           threadIdCt = 0;
34500b57cec5SDimitry Andric 
34510b57cec5SDimitry Andric           // Auto-assign the thread id field if it wasn't specified.
34520b57cec5SDimitry Andric           if (threadInfo[i][threadIdIndex] == UINT_MAX) {
34530b57cec5SDimitry Andric             threadInfo[i][threadIdIndex] = threadIdCt++;
34540b57cec5SDimitry Andric           }
34550b57cec5SDimitry Andric 
3456480093f4SDimitry Andric           // Apparently the thread id field was specified for some entries and
34570b57cec5SDimitry Andric           // not others. Start the thread id counter off at the next higher
34580b57cec5SDimitry Andric           // thread id.
34590b57cec5SDimitry Andric           else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
34600b57cec5SDimitry Andric             threadIdCt = threadInfo[i][threadIdIndex] + 1;
34610b57cec5SDimitry Andric           }
34620b57cec5SDimitry Andric         }
34630b57cec5SDimitry Andric         break;
34640b57cec5SDimitry Andric       }
34650b57cec5SDimitry Andric     }
34660b57cec5SDimitry Andric     if (index < threadIdIndex) {
34670b57cec5SDimitry Andric       // If thread ids were specified, it is an error if they are not unique.
34680b57cec5SDimitry Andric       // Also, check that we waven't already restarted the loop (to be safe -
34690b57cec5SDimitry Andric       // shouldn't need to).
34700b57cec5SDimitry Andric       if ((threadInfo[i][threadIdIndex] != UINT_MAX) || assign_thread_ids) {
34710b57cec5SDimitry Andric         __kmp_free(lastId);
34720b57cec5SDimitry Andric         __kmp_free(totals);
34730b57cec5SDimitry Andric         __kmp_free(maxCt);
34740b57cec5SDimitry Andric         __kmp_free(counts);
34750b57cec5SDimitry Andric         CLEANUP_THREAD_INFO;
34760b57cec5SDimitry Andric         *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
3477fe6060f1SDimitry Andric         return false;
34780b57cec5SDimitry Andric       }
34790b57cec5SDimitry Andric 
34805f757f3fSDimitry Andric       // If the thread ids were not specified and we see entries that
34810b57cec5SDimitry Andric       // are duplicates, start the loop over and assign the thread ids manually.
34820b57cec5SDimitry Andric       assign_thread_ids = true;
34830b57cec5SDimitry Andric       goto restart_radix_check;
34840b57cec5SDimitry Andric     }
34850b57cec5SDimitry Andric   }
34860b57cec5SDimitry Andric 
34870b57cec5SDimitry Andric #if KMP_MIC && REDUCE_TEAM_SIZE
34880b57cec5SDimitry Andric   // The default team size is the total #threads in the machine
34890b57cec5SDimitry Andric   // minus 1 thread for every core that has 3 or more threads.
34900b57cec5SDimitry Andric   teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
34910b57cec5SDimitry Andric #endif // KMP_MIC && REDUCE_TEAM_SIZE
34920b57cec5SDimitry Andric 
34930b57cec5SDimitry Andric   for (index = threadIdIndex; index <= maxIndex; index++) {
34940b57cec5SDimitry Andric     if (counts[index] > maxCt[index]) {
34950b57cec5SDimitry Andric       maxCt[index] = counts[index];
34960b57cec5SDimitry Andric     }
34970b57cec5SDimitry Andric   }
34980b57cec5SDimitry Andric 
34990b57cec5SDimitry Andric   __kmp_nThreadsPerCore = maxCt[threadIdIndex];
35000b57cec5SDimitry Andric   nCoresPerPkg = maxCt[coreIdIndex];
35010b57cec5SDimitry Andric   nPackages = totals[pkgIdIndex];
35020b57cec5SDimitry Andric 
35030b57cec5SDimitry Andric   // When affinity is off, this routine will still be called to set
35040b57cec5SDimitry Andric   // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
35050b57cec5SDimitry Andric   // Make sure all these vars are set correctly, and return now if affinity is
35060b57cec5SDimitry Andric   // not enabled.
35070b57cec5SDimitry Andric   __kmp_ncores = totals[coreIdIndex];
35080b57cec5SDimitry Andric   if (!KMP_AFFINITY_CAPABLE()) {
3509bdd1243dSDimitry Andric     KMP_ASSERT(__kmp_affinity.type == affinity_none);
3510fe6060f1SDimitry Andric     return true;
35110b57cec5SDimitry Andric   }
35120b57cec5SDimitry Andric 
35130b57cec5SDimitry Andric #if KMP_MIC && REDUCE_TEAM_SIZE
35140b57cec5SDimitry Andric   // Set the default team size.
35150b57cec5SDimitry Andric   if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
35160b57cec5SDimitry Andric     __kmp_dflt_team_nth = teamSize;
35170b57cec5SDimitry Andric     KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting "
35180b57cec5SDimitry Andric                   "__kmp_dflt_team_nth = %d\n",
35190b57cec5SDimitry Andric                   __kmp_dflt_team_nth));
35200b57cec5SDimitry Andric   }
35210b57cec5SDimitry Andric #endif // KMP_MIC && REDUCE_TEAM_SIZE
35220b57cec5SDimitry Andric 
35230b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(num_avail == (unsigned)__kmp_avail_proc);
35240b57cec5SDimitry Andric 
35250b57cec5SDimitry Andric   // Count the number of levels which have more nodes at that level than at the
35260b57cec5SDimitry Andric   // parent's level (with there being an implicit root node of the top level).
35270b57cec5SDimitry Andric   // This is equivalent to saying that there is at least one node at this level
35280b57cec5SDimitry Andric   // which has a sibling. These levels are in the map, and the package level is
35290b57cec5SDimitry Andric   // always in the map.
35300b57cec5SDimitry Andric   bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool));
35310b57cec5SDimitry Andric   for (index = threadIdIndex; index < maxIndex; index++) {
35320b57cec5SDimitry Andric     KMP_ASSERT(totals[index] >= totals[index + 1]);
35330b57cec5SDimitry Andric     inMap[index] = (totals[index] > totals[index + 1]);
35340b57cec5SDimitry Andric   }
35350b57cec5SDimitry Andric   inMap[maxIndex] = (totals[maxIndex] > 1);
35360b57cec5SDimitry Andric   inMap[pkgIdIndex] = true;
3537fe6060f1SDimitry Andric   inMap[coreIdIndex] = true;
3538fe6060f1SDimitry Andric   inMap[threadIdIndex] = true;
35390b57cec5SDimitry Andric 
35400b57cec5SDimitry Andric   int depth = 0;
3541fe6060f1SDimitry Andric   int idx = 0;
3542fe6060f1SDimitry Andric   kmp_hw_t types[KMP_HW_LAST];
3543fe6060f1SDimitry Andric   int pkgLevel = -1;
3544fe6060f1SDimitry Andric   int coreLevel = -1;
3545fe6060f1SDimitry Andric   int threadLevel = -1;
35460b57cec5SDimitry Andric   for (index = threadIdIndex; index <= maxIndex; index++) {
35470b57cec5SDimitry Andric     if (inMap[index]) {
35480b57cec5SDimitry Andric       depth++;
35490b57cec5SDimitry Andric     }
35500b57cec5SDimitry Andric   }
3551fe6060f1SDimitry Andric   if (inMap[pkgIdIndex]) {
3552fe6060f1SDimitry Andric     pkgLevel = idx;
3553fe6060f1SDimitry Andric     types[idx++] = KMP_HW_SOCKET;
3554fe6060f1SDimitry Andric   }
3555fe6060f1SDimitry Andric   if (inMap[coreIdIndex]) {
3556fe6060f1SDimitry Andric     coreLevel = idx;
3557fe6060f1SDimitry Andric     types[idx++] = KMP_HW_CORE;
3558fe6060f1SDimitry Andric   }
3559fe6060f1SDimitry Andric   if (inMap[threadIdIndex]) {
3560fe6060f1SDimitry Andric     threadLevel = idx;
3561fe6060f1SDimitry Andric     types[idx++] = KMP_HW_THREAD;
3562fe6060f1SDimitry Andric   }
35630b57cec5SDimitry Andric   KMP_ASSERT(depth > 0);
35640b57cec5SDimitry Andric 
35650b57cec5SDimitry Andric   // Construct the data structure that is to be returned.
3566fe6060f1SDimitry Andric   __kmp_topology = kmp_topology_t::allocate(num_avail, depth, types);
35670b57cec5SDimitry Andric 
35680b57cec5SDimitry Andric   for (i = 0; i < num_avail; ++i) {
35690b57cec5SDimitry Andric     unsigned os = threadInfo[i][osIdIndex];
35700b57cec5SDimitry Andric     int src_index;
3571fe6060f1SDimitry Andric     kmp_hw_thread_t &hw_thread = __kmp_topology->at(i);
3572fe6060f1SDimitry Andric     hw_thread.clear();
3573fe6060f1SDimitry Andric     hw_thread.os_id = os;
35740b57cec5SDimitry Andric 
3575fe6060f1SDimitry Andric     idx = 0;
35760b57cec5SDimitry Andric     for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
35770b57cec5SDimitry Andric       if (!inMap[src_index]) {
35780b57cec5SDimitry Andric         continue;
35790b57cec5SDimitry Andric       }
35800b57cec5SDimitry Andric       if (src_index == pkgIdIndex) {
3581fe6060f1SDimitry Andric         hw_thread.ids[pkgLevel] = threadInfo[i][src_index];
35820b57cec5SDimitry Andric       } else if (src_index == coreIdIndex) {
3583fe6060f1SDimitry Andric         hw_thread.ids[coreLevel] = threadInfo[i][src_index];
35840b57cec5SDimitry Andric       } else if (src_index == threadIdIndex) {
3585fe6060f1SDimitry Andric         hw_thread.ids[threadLevel] = threadInfo[i][src_index];
35860b57cec5SDimitry Andric       }
35870b57cec5SDimitry Andric     }
35880b57cec5SDimitry Andric   }
35890b57cec5SDimitry Andric 
35900b57cec5SDimitry Andric   __kmp_free(inMap);
35910b57cec5SDimitry Andric   __kmp_free(lastId);
35920b57cec5SDimitry Andric   __kmp_free(totals);
35930b57cec5SDimitry Andric   __kmp_free(maxCt);
35940b57cec5SDimitry Andric   __kmp_free(counts);
35950b57cec5SDimitry Andric   CLEANUP_THREAD_INFO;
3596fe6060f1SDimitry Andric   __kmp_topology->sort_ids();
3597fe6060f1SDimitry Andric   if (!__kmp_topology->check_ids()) {
3598fe6060f1SDimitry Andric     kmp_topology_t::deallocate(__kmp_topology);
3599fe6060f1SDimitry Andric     __kmp_topology = nullptr;
3600fe6060f1SDimitry Andric     *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
3601fe6060f1SDimitry Andric     return false;
3602fe6060f1SDimitry Andric   }
3603fe6060f1SDimitry Andric   return true;
36040b57cec5SDimitry Andric }
36050b57cec5SDimitry Andric 
36060b57cec5SDimitry Andric // Create and return a table of affinity masks, indexed by OS thread ID.
36070b57cec5SDimitry Andric // This routine handles OR'ing together all the affinity masks of threads
36080b57cec5SDimitry Andric // that are sufficiently close, if granularity > fine.
36095f757f3fSDimitry Andric template <typename FindNextFunctionType>
__kmp_create_os_id_masks(unsigned * numUnique,kmp_affinity_t & affinity,FindNextFunctionType find_next)3610bdd1243dSDimitry Andric static void __kmp_create_os_id_masks(unsigned *numUnique,
36115f757f3fSDimitry Andric                                      kmp_affinity_t &affinity,
36125f757f3fSDimitry Andric                                      FindNextFunctionType find_next) {
36130b57cec5SDimitry Andric   // First form a table of affinity masks in order of OS thread id.
3614fe6060f1SDimitry Andric   int maxOsId;
3615fe6060f1SDimitry Andric   int i;
3616fe6060f1SDimitry Andric   int numAddrs = __kmp_topology->get_num_hw_threads();
3617fe6060f1SDimitry Andric   int depth = __kmp_topology->get_depth();
36185f757f3fSDimitry Andric   const char *env_var = __kmp_get_affinity_env_var(affinity);
3619fe6060f1SDimitry Andric   KMP_ASSERT(numAddrs);
3620fe6060f1SDimitry Andric   KMP_ASSERT(depth);
36210b57cec5SDimitry Andric 
36225f757f3fSDimitry Andric   i = find_next(-1);
36235f757f3fSDimitry Andric   // If could not find HW thread location with attributes, then return and
36245f757f3fSDimitry Andric   // fallback to increment find_next and disregard core attributes.
36255f757f3fSDimitry Andric   if (i >= numAddrs)
36265f757f3fSDimitry Andric     return;
36275f757f3fSDimitry Andric 
36280b57cec5SDimitry Andric   maxOsId = 0;
36290b57cec5SDimitry Andric   for (i = numAddrs - 1;; --i) {
3630fe6060f1SDimitry Andric     int osId = __kmp_topology->at(i).os_id;
36310b57cec5SDimitry Andric     if (osId > maxOsId) {
36320b57cec5SDimitry Andric       maxOsId = osId;
36330b57cec5SDimitry Andric     }
36340b57cec5SDimitry Andric     if (i == 0)
36350b57cec5SDimitry Andric       break;
36360b57cec5SDimitry Andric   }
3637bdd1243dSDimitry Andric   affinity.num_os_id_masks = maxOsId + 1;
3638bdd1243dSDimitry Andric   KMP_CPU_ALLOC_ARRAY(affinity.os_id_masks, affinity.num_os_id_masks);
3639bdd1243dSDimitry Andric   KMP_ASSERT(affinity.gran_levels >= 0);
3640bdd1243dSDimitry Andric   if (affinity.flags.verbose && (affinity.gran_levels > 0)) {
3641bdd1243dSDimitry Andric     KMP_INFORM(ThreadsMigrate, env_var, affinity.gran_levels);
36420b57cec5SDimitry Andric   }
3643bdd1243dSDimitry Andric   if (affinity.gran_levels >= (int)depth) {
3644bdd1243dSDimitry Andric     KMP_AFF_WARNING(affinity, AffThreadsMayMigrate);
36450b57cec5SDimitry Andric   }
36460b57cec5SDimitry Andric 
36470b57cec5SDimitry Andric   // Run through the table, forming the masks for all threads on each core.
3648fe6060f1SDimitry Andric   // Threads on the same core will have identical kmp_hw_thread_t objects, not
36490b57cec5SDimitry Andric   // considering the last level, which must be the thread id. All threads on a
36500b57cec5SDimitry Andric   // core will appear consecutively.
3651fe6060f1SDimitry Andric   int unique = 0;
3652fe6060f1SDimitry Andric   int j = 0; // index of 1st thread on core
3653fe6060f1SDimitry Andric   int leader = 0;
36540b57cec5SDimitry Andric   kmp_affin_mask_t *sum;
36550b57cec5SDimitry Andric   KMP_CPU_ALLOC_ON_STACK(sum);
36560b57cec5SDimitry Andric   KMP_CPU_ZERO(sum);
36575f757f3fSDimitry Andric 
36585f757f3fSDimitry Andric   i = j = leader = find_next(-1);
36595f757f3fSDimitry Andric   KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
36605f757f3fSDimitry Andric   kmp_full_mask_modifier_t full_mask;
36615f757f3fSDimitry Andric   for (i = find_next(i); i < numAddrs; i = find_next(i)) {
36620b57cec5SDimitry Andric     // If this thread is sufficiently close to the leader (within the
36630b57cec5SDimitry Andric     // granularity setting), then set the bit for this os thread in the
36640b57cec5SDimitry Andric     // affinity mask for this group, and go on to the next thread.
36655f757f3fSDimitry Andric     if (__kmp_topology->is_close(leader, i, affinity)) {
3666fe6060f1SDimitry Andric       KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
36670b57cec5SDimitry Andric       continue;
36680b57cec5SDimitry Andric     }
36690b57cec5SDimitry Andric 
36700b57cec5SDimitry Andric     // For every thread in this group, copy the mask to the thread's entry in
3671bdd1243dSDimitry Andric     // the OS Id mask table. Mark the first address as a leader.
36725f757f3fSDimitry Andric     for (; j < i; j = find_next(j)) {
3673fe6060f1SDimitry Andric       int osId = __kmp_topology->at(j).os_id;
36740b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(osId <= maxOsId);
3675bdd1243dSDimitry Andric       kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.os_id_masks, osId);
36760b57cec5SDimitry Andric       KMP_CPU_COPY(mask, sum);
3677fe6060f1SDimitry Andric       __kmp_topology->at(j).leader = (j == leader);
36780b57cec5SDimitry Andric     }
36790b57cec5SDimitry Andric     unique++;
36800b57cec5SDimitry Andric 
36810b57cec5SDimitry Andric     // Start a new mask.
36820b57cec5SDimitry Andric     leader = i;
36835f757f3fSDimitry Andric     full_mask.include(sum);
36840b57cec5SDimitry Andric     KMP_CPU_ZERO(sum);
3685fe6060f1SDimitry Andric     KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
36860b57cec5SDimitry Andric   }
36870b57cec5SDimitry Andric 
36880b57cec5SDimitry Andric   // For every thread in last group, copy the mask to the thread's
3689bdd1243dSDimitry Andric   // entry in the OS Id mask table.
36905f757f3fSDimitry Andric   for (; j < i; j = find_next(j)) {
3691fe6060f1SDimitry Andric     int osId = __kmp_topology->at(j).os_id;
36920b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(osId <= maxOsId);
3693bdd1243dSDimitry Andric     kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.os_id_masks, osId);
36940b57cec5SDimitry Andric     KMP_CPU_COPY(mask, sum);
3695fe6060f1SDimitry Andric     __kmp_topology->at(j).leader = (j == leader);
36960b57cec5SDimitry Andric   }
36975f757f3fSDimitry Andric   full_mask.include(sum);
36980b57cec5SDimitry Andric   unique++;
36990b57cec5SDimitry Andric   KMP_CPU_FREE_FROM_STACK(sum);
37000b57cec5SDimitry Andric 
37015f757f3fSDimitry Andric   // See if the OS Id mask table further restricts or changes the full mask
37025f757f3fSDimitry Andric   if (full_mask.restrict_to_mask() && affinity.flags.verbose) {
37035f757f3fSDimitry Andric     __kmp_topology->print(env_var);
37045f757f3fSDimitry Andric   }
37055f757f3fSDimitry Andric 
37060b57cec5SDimitry Andric   *numUnique = unique;
37070b57cec5SDimitry Andric }
37080b57cec5SDimitry Andric 
37090b57cec5SDimitry Andric // Stuff for the affinity proclist parsers.  It's easier to declare these vars
37100b57cec5SDimitry Andric // as file-static than to try and pass them through the calling sequence of
37110b57cec5SDimitry Andric // the recursive-descent OMP_PLACES parser.
37120b57cec5SDimitry Andric static kmp_affin_mask_t *newMasks;
37130b57cec5SDimitry Andric static int numNewMasks;
37140b57cec5SDimitry Andric static int nextNewMask;
37150b57cec5SDimitry Andric 
37160b57cec5SDimitry Andric #define ADD_MASK(_mask)                                                        \
37170b57cec5SDimitry Andric   {                                                                            \
37180b57cec5SDimitry Andric     if (nextNewMask >= numNewMasks) {                                          \
37190b57cec5SDimitry Andric       int i;                                                                   \
37200b57cec5SDimitry Andric       numNewMasks *= 2;                                                        \
37210b57cec5SDimitry Andric       kmp_affin_mask_t *temp;                                                  \
37220b57cec5SDimitry Andric       KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks);                         \
37230b57cec5SDimitry Andric       for (i = 0; i < numNewMasks / 2; i++) {                                  \
37240b57cec5SDimitry Andric         kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);                    \
37250b57cec5SDimitry Andric         kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i);                       \
37260b57cec5SDimitry Andric         KMP_CPU_COPY(dest, src);                                               \
37270b57cec5SDimitry Andric       }                                                                        \
37280b57cec5SDimitry Andric       KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2);                  \
37290b57cec5SDimitry Andric       newMasks = temp;                                                         \
37300b57cec5SDimitry Andric     }                                                                          \
37310b57cec5SDimitry Andric     KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask));               \
37320b57cec5SDimitry Andric     nextNewMask++;                                                             \
37330b57cec5SDimitry Andric   }
37340b57cec5SDimitry Andric 
37350b57cec5SDimitry Andric #define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId)                             \
37360b57cec5SDimitry Andric   {                                                                            \
37370b57cec5SDimitry Andric     if (((_osId) > _maxOsId) ||                                                \
37380b57cec5SDimitry Andric         (!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) {     \
3739bdd1243dSDimitry Andric       KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, _osId);                \
37400b57cec5SDimitry Andric     } else {                                                                   \
37410b57cec5SDimitry Andric       ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId)));                            \
37420b57cec5SDimitry Andric     }                                                                          \
37430b57cec5SDimitry Andric   }
37440b57cec5SDimitry Andric 
37450b57cec5SDimitry Andric // Re-parse the proclist (for the explicit affinity type), and form the list
37460b57cec5SDimitry Andric // of affinity newMasks indexed by gtid.
__kmp_affinity_process_proclist(kmp_affinity_t & affinity)3747bdd1243dSDimitry Andric static void __kmp_affinity_process_proclist(kmp_affinity_t &affinity) {
37480b57cec5SDimitry Andric   int i;
3749bdd1243dSDimitry Andric   kmp_affin_mask_t **out_masks = &affinity.masks;
3750bdd1243dSDimitry Andric   unsigned *out_numMasks = &affinity.num_masks;
3751bdd1243dSDimitry Andric   const char *proclist = affinity.proclist;
3752bdd1243dSDimitry Andric   kmp_affin_mask_t *osId2Mask = affinity.os_id_masks;
3753bdd1243dSDimitry Andric   int maxOsId = affinity.num_os_id_masks - 1;
37540b57cec5SDimitry Andric   const char *scan = proclist;
37550b57cec5SDimitry Andric   const char *next = proclist;
37560b57cec5SDimitry Andric 
37570b57cec5SDimitry Andric   // We use malloc() for the temporary mask vector, so that we can use
37580b57cec5SDimitry Andric   // realloc() to extend it.
37590b57cec5SDimitry Andric   numNewMasks = 2;
37600b57cec5SDimitry Andric   KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
37610b57cec5SDimitry Andric   nextNewMask = 0;
37620b57cec5SDimitry Andric   kmp_affin_mask_t *sumMask;
37630b57cec5SDimitry Andric   KMP_CPU_ALLOC(sumMask);
37640b57cec5SDimitry Andric   int setSize = 0;
37650b57cec5SDimitry Andric 
37660b57cec5SDimitry Andric   for (;;) {
37670b57cec5SDimitry Andric     int start, end, stride;
37680b57cec5SDimitry Andric 
37690b57cec5SDimitry Andric     SKIP_WS(scan);
37700b57cec5SDimitry Andric     next = scan;
37710b57cec5SDimitry Andric     if (*next == '\0') {
37720b57cec5SDimitry Andric       break;
37730b57cec5SDimitry Andric     }
37740b57cec5SDimitry Andric 
37750b57cec5SDimitry Andric     if (*next == '{') {
37760b57cec5SDimitry Andric       int num;
37770b57cec5SDimitry Andric       setSize = 0;
37780b57cec5SDimitry Andric       next++; // skip '{'
37790b57cec5SDimitry Andric       SKIP_WS(next);
37800b57cec5SDimitry Andric       scan = next;
37810b57cec5SDimitry Andric 
37820b57cec5SDimitry Andric       // Read the first integer in the set.
37830b57cec5SDimitry Andric       KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad proclist");
37840b57cec5SDimitry Andric       SKIP_DIGITS(next);
37850b57cec5SDimitry Andric       num = __kmp_str_to_int(scan, *next);
37860b57cec5SDimitry Andric       KMP_ASSERT2(num >= 0, "bad explicit proc list");
37870b57cec5SDimitry Andric 
37880b57cec5SDimitry Andric       // Copy the mask for that osId to the sum (union) mask.
37890b57cec5SDimitry Andric       if ((num > maxOsId) ||
37900b57cec5SDimitry Andric           (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3791bdd1243dSDimitry Andric         KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, num);
37920b57cec5SDimitry Andric         KMP_CPU_ZERO(sumMask);
37930b57cec5SDimitry Andric       } else {
37940b57cec5SDimitry Andric         KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
37950b57cec5SDimitry Andric         setSize = 1;
37960b57cec5SDimitry Andric       }
37970b57cec5SDimitry Andric 
37980b57cec5SDimitry Andric       for (;;) {
37990b57cec5SDimitry Andric         // Check for end of set.
38000b57cec5SDimitry Andric         SKIP_WS(next);
38010b57cec5SDimitry Andric         if (*next == '}') {
38020b57cec5SDimitry Andric           next++; // skip '}'
38030b57cec5SDimitry Andric           break;
38040b57cec5SDimitry Andric         }
38050b57cec5SDimitry Andric 
38060b57cec5SDimitry Andric         // Skip optional comma.
38070b57cec5SDimitry Andric         if (*next == ',') {
38080b57cec5SDimitry Andric           next++;
38090b57cec5SDimitry Andric         }
38100b57cec5SDimitry Andric         SKIP_WS(next);
38110b57cec5SDimitry Andric 
38120b57cec5SDimitry Andric         // Read the next integer in the set.
38130b57cec5SDimitry Andric         scan = next;
38140b57cec5SDimitry Andric         KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
38150b57cec5SDimitry Andric 
38160b57cec5SDimitry Andric         SKIP_DIGITS(next);
38170b57cec5SDimitry Andric         num = __kmp_str_to_int(scan, *next);
38180b57cec5SDimitry Andric         KMP_ASSERT2(num >= 0, "bad explicit proc list");
38190b57cec5SDimitry Andric 
38200b57cec5SDimitry Andric         // Add the mask for that osId to the sum mask.
38210b57cec5SDimitry Andric         if ((num > maxOsId) ||
38220b57cec5SDimitry Andric             (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3823bdd1243dSDimitry Andric           KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, num);
38240b57cec5SDimitry Andric         } else {
38250b57cec5SDimitry Andric           KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
38260b57cec5SDimitry Andric           setSize++;
38270b57cec5SDimitry Andric         }
38280b57cec5SDimitry Andric       }
38290b57cec5SDimitry Andric       if (setSize > 0) {
38300b57cec5SDimitry Andric         ADD_MASK(sumMask);
38310b57cec5SDimitry Andric       }
38320b57cec5SDimitry Andric 
38330b57cec5SDimitry Andric       SKIP_WS(next);
38340b57cec5SDimitry Andric       if (*next == ',') {
38350b57cec5SDimitry Andric         next++;
38360b57cec5SDimitry Andric       }
38370b57cec5SDimitry Andric       scan = next;
38380b57cec5SDimitry Andric       continue;
38390b57cec5SDimitry Andric     }
38400b57cec5SDimitry Andric 
38410b57cec5SDimitry Andric     // Read the first integer.
38420b57cec5SDimitry Andric     KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
38430b57cec5SDimitry Andric     SKIP_DIGITS(next);
38440b57cec5SDimitry Andric     start = __kmp_str_to_int(scan, *next);
38450b57cec5SDimitry Andric     KMP_ASSERT2(start >= 0, "bad explicit proc list");
38460b57cec5SDimitry Andric     SKIP_WS(next);
38470b57cec5SDimitry Andric 
38480b57cec5SDimitry Andric     // If this isn't a range, then add a mask to the list and go on.
38490b57cec5SDimitry Andric     if (*next != '-') {
38500b57cec5SDimitry Andric       ADD_MASK_OSID(start, osId2Mask, maxOsId);
38510b57cec5SDimitry Andric 
38520b57cec5SDimitry Andric       // Skip optional comma.
38530b57cec5SDimitry Andric       if (*next == ',') {
38540b57cec5SDimitry Andric         next++;
38550b57cec5SDimitry Andric       }
38560b57cec5SDimitry Andric       scan = next;
38570b57cec5SDimitry Andric       continue;
38580b57cec5SDimitry Andric     }
38590b57cec5SDimitry Andric 
38600b57cec5SDimitry Andric     // This is a range.  Skip over the '-' and read in the 2nd int.
38610b57cec5SDimitry Andric     next++; // skip '-'
38620b57cec5SDimitry Andric     SKIP_WS(next);
38630b57cec5SDimitry Andric     scan = next;
38640b57cec5SDimitry Andric     KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
38650b57cec5SDimitry Andric     SKIP_DIGITS(next);
38660b57cec5SDimitry Andric     end = __kmp_str_to_int(scan, *next);
38670b57cec5SDimitry Andric     KMP_ASSERT2(end >= 0, "bad explicit proc list");
38680b57cec5SDimitry Andric 
38690b57cec5SDimitry Andric     // Check for a stride parameter
38700b57cec5SDimitry Andric     stride = 1;
38710b57cec5SDimitry Andric     SKIP_WS(next);
38720b57cec5SDimitry Andric     if (*next == ':') {
38730b57cec5SDimitry Andric       // A stride is specified.  Skip over the ':" and read the 3rd int.
38740b57cec5SDimitry Andric       int sign = +1;
38750b57cec5SDimitry Andric       next++; // skip ':'
38760b57cec5SDimitry Andric       SKIP_WS(next);
38770b57cec5SDimitry Andric       scan = next;
38780b57cec5SDimitry Andric       if (*next == '-') {
38790b57cec5SDimitry Andric         sign = -1;
38800b57cec5SDimitry Andric         next++;
38810b57cec5SDimitry Andric         SKIP_WS(next);
38820b57cec5SDimitry Andric         scan = next;
38830b57cec5SDimitry Andric       }
38840b57cec5SDimitry Andric       KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
38850b57cec5SDimitry Andric       SKIP_DIGITS(next);
38860b57cec5SDimitry Andric       stride = __kmp_str_to_int(scan, *next);
38870b57cec5SDimitry Andric       KMP_ASSERT2(stride >= 0, "bad explicit proc list");
38880b57cec5SDimitry Andric       stride *= sign;
38890b57cec5SDimitry Andric     }
38900b57cec5SDimitry Andric 
38910b57cec5SDimitry Andric     // Do some range checks.
38920b57cec5SDimitry Andric     KMP_ASSERT2(stride != 0, "bad explicit proc list");
38930b57cec5SDimitry Andric     if (stride > 0) {
38940b57cec5SDimitry Andric       KMP_ASSERT2(start <= end, "bad explicit proc list");
38950b57cec5SDimitry Andric     } else {
38960b57cec5SDimitry Andric       KMP_ASSERT2(start >= end, "bad explicit proc list");
38970b57cec5SDimitry Andric     }
38980b57cec5SDimitry Andric     KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
38990b57cec5SDimitry Andric 
39000b57cec5SDimitry Andric     // Add the mask for each OS proc # to the list.
39010b57cec5SDimitry Andric     if (stride > 0) {
39020b57cec5SDimitry Andric       do {
39030b57cec5SDimitry Andric         ADD_MASK_OSID(start, osId2Mask, maxOsId);
39040b57cec5SDimitry Andric         start += stride;
39050b57cec5SDimitry Andric       } while (start <= end);
39060b57cec5SDimitry Andric     } else {
39070b57cec5SDimitry Andric       do {
39080b57cec5SDimitry Andric         ADD_MASK_OSID(start, osId2Mask, maxOsId);
39090b57cec5SDimitry Andric         start += stride;
39100b57cec5SDimitry Andric       } while (start >= end);
39110b57cec5SDimitry Andric     }
39120b57cec5SDimitry Andric 
39130b57cec5SDimitry Andric     // Skip optional comma.
39140b57cec5SDimitry Andric     SKIP_WS(next);
39150b57cec5SDimitry Andric     if (*next == ',') {
39160b57cec5SDimitry Andric       next++;
39170b57cec5SDimitry Andric     }
39180b57cec5SDimitry Andric     scan = next;
39190b57cec5SDimitry Andric   }
39200b57cec5SDimitry Andric 
39210b57cec5SDimitry Andric   *out_numMasks = nextNewMask;
39220b57cec5SDimitry Andric   if (nextNewMask == 0) {
39230b57cec5SDimitry Andric     *out_masks = NULL;
39240b57cec5SDimitry Andric     KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
39250b57cec5SDimitry Andric     return;
39260b57cec5SDimitry Andric   }
39270b57cec5SDimitry Andric   KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
39280b57cec5SDimitry Andric   for (i = 0; i < nextNewMask; i++) {
39290b57cec5SDimitry Andric     kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
39300b57cec5SDimitry Andric     kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
39310b57cec5SDimitry Andric     KMP_CPU_COPY(dest, src);
39320b57cec5SDimitry Andric   }
39330b57cec5SDimitry Andric   KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
39340b57cec5SDimitry Andric   KMP_CPU_FREE(sumMask);
39350b57cec5SDimitry Andric }
39360b57cec5SDimitry Andric 
39370b57cec5SDimitry Andric /*-----------------------------------------------------------------------------
39380b57cec5SDimitry Andric Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
39390b57cec5SDimitry Andric places.  Again, Here is the grammar:
39400b57cec5SDimitry Andric 
39410b57cec5SDimitry Andric place_list := place
39420b57cec5SDimitry Andric place_list := place , place_list
39430b57cec5SDimitry Andric place := num
39440b57cec5SDimitry Andric place := place : num
39450b57cec5SDimitry Andric place := place : num : signed
39460b57cec5SDimitry Andric place := { subplacelist }
39470b57cec5SDimitry Andric place := ! place                  // (lowest priority)
39480b57cec5SDimitry Andric subplace_list := subplace
39490b57cec5SDimitry Andric subplace_list := subplace , subplace_list
39500b57cec5SDimitry Andric subplace := num
39510b57cec5SDimitry Andric subplace := num : num
39520b57cec5SDimitry Andric subplace := num : num : signed
39530b57cec5SDimitry Andric signed := num
39540b57cec5SDimitry Andric signed := + signed
39550b57cec5SDimitry Andric signed := - signed
39560b57cec5SDimitry Andric -----------------------------------------------------------------------------*/
__kmp_process_subplace_list(const char ** scan,kmp_affinity_t & affinity,int maxOsId,kmp_affin_mask_t * tempMask,int * setSize)39570b57cec5SDimitry Andric static void __kmp_process_subplace_list(const char **scan,
3958bdd1243dSDimitry Andric                                         kmp_affinity_t &affinity, int maxOsId,
3959bdd1243dSDimitry Andric                                         kmp_affin_mask_t *tempMask,
39600b57cec5SDimitry Andric                                         int *setSize) {
39610b57cec5SDimitry Andric   const char *next;
3962bdd1243dSDimitry Andric   kmp_affin_mask_t *osId2Mask = affinity.os_id_masks;
39630b57cec5SDimitry Andric 
39640b57cec5SDimitry Andric   for (;;) {
39650b57cec5SDimitry Andric     int start, count, stride, i;
39660b57cec5SDimitry Andric 
39670b57cec5SDimitry Andric     // Read in the starting proc id
39680b57cec5SDimitry Andric     SKIP_WS(*scan);
39690b57cec5SDimitry Andric     KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list");
39700b57cec5SDimitry Andric     next = *scan;
39710b57cec5SDimitry Andric     SKIP_DIGITS(next);
39720b57cec5SDimitry Andric     start = __kmp_str_to_int(*scan, *next);
39730b57cec5SDimitry Andric     KMP_ASSERT(start >= 0);
39740b57cec5SDimitry Andric     *scan = next;
39750b57cec5SDimitry Andric 
39760b57cec5SDimitry Andric     // valid follow sets are ',' ':' and '}'
39770b57cec5SDimitry Andric     SKIP_WS(*scan);
39780b57cec5SDimitry Andric     if (**scan == '}' || **scan == ',') {
39790b57cec5SDimitry Andric       if ((start > maxOsId) ||
39800b57cec5SDimitry Andric           (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3981bdd1243dSDimitry Andric         KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, start);
39820b57cec5SDimitry Andric       } else {
39830b57cec5SDimitry Andric         KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
39840b57cec5SDimitry Andric         (*setSize)++;
39850b57cec5SDimitry Andric       }
39860b57cec5SDimitry Andric       if (**scan == '}') {
39870b57cec5SDimitry Andric         break;
39880b57cec5SDimitry Andric       }
39890b57cec5SDimitry Andric       (*scan)++; // skip ','
39900b57cec5SDimitry Andric       continue;
39910b57cec5SDimitry Andric     }
39920b57cec5SDimitry Andric     KMP_ASSERT2(**scan == ':', "bad explicit places list");
39930b57cec5SDimitry Andric     (*scan)++; // skip ':'
39940b57cec5SDimitry Andric 
39950b57cec5SDimitry Andric     // Read count parameter
39960b57cec5SDimitry Andric     SKIP_WS(*scan);
39970b57cec5SDimitry Andric     KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list");
39980b57cec5SDimitry Andric     next = *scan;
39990b57cec5SDimitry Andric     SKIP_DIGITS(next);
40000b57cec5SDimitry Andric     count = __kmp_str_to_int(*scan, *next);
40010b57cec5SDimitry Andric     KMP_ASSERT(count >= 0);
40020b57cec5SDimitry Andric     *scan = next;
40030b57cec5SDimitry Andric 
40040b57cec5SDimitry Andric     // valid follow sets are ',' ':' and '}'
40050b57cec5SDimitry Andric     SKIP_WS(*scan);
40060b57cec5SDimitry Andric     if (**scan == '}' || **scan == ',') {
40070b57cec5SDimitry Andric       for (i = 0; i < count; i++) {
40080b57cec5SDimitry Andric         if ((start > maxOsId) ||
40090b57cec5SDimitry Andric             (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
4010bdd1243dSDimitry Andric           KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, start);
40110b57cec5SDimitry Andric           break; // don't proliferate warnings for large count
40120b57cec5SDimitry Andric         } else {
40130b57cec5SDimitry Andric           KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
40140b57cec5SDimitry Andric           start++;
40150b57cec5SDimitry Andric           (*setSize)++;
40160b57cec5SDimitry Andric         }
40170b57cec5SDimitry Andric       }
40180b57cec5SDimitry Andric       if (**scan == '}') {
40190b57cec5SDimitry Andric         break;
40200b57cec5SDimitry Andric       }
40210b57cec5SDimitry Andric       (*scan)++; // skip ','
40220b57cec5SDimitry Andric       continue;
40230b57cec5SDimitry Andric     }
40240b57cec5SDimitry Andric     KMP_ASSERT2(**scan == ':', "bad explicit places list");
40250b57cec5SDimitry Andric     (*scan)++; // skip ':'
40260b57cec5SDimitry Andric 
40270b57cec5SDimitry Andric     // Read stride parameter
40280b57cec5SDimitry Andric     int sign = +1;
40290b57cec5SDimitry Andric     for (;;) {
40300b57cec5SDimitry Andric       SKIP_WS(*scan);
40310b57cec5SDimitry Andric       if (**scan == '+') {
40320b57cec5SDimitry Andric         (*scan)++; // skip '+'
40330b57cec5SDimitry Andric         continue;
40340b57cec5SDimitry Andric       }
40350b57cec5SDimitry Andric       if (**scan == '-') {
40360b57cec5SDimitry Andric         sign *= -1;
40370b57cec5SDimitry Andric         (*scan)++; // skip '-'
40380b57cec5SDimitry Andric         continue;
40390b57cec5SDimitry Andric       }
40400b57cec5SDimitry Andric       break;
40410b57cec5SDimitry Andric     }
40420b57cec5SDimitry Andric     SKIP_WS(*scan);
40430b57cec5SDimitry Andric     KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list");
40440b57cec5SDimitry Andric     next = *scan;
40450b57cec5SDimitry Andric     SKIP_DIGITS(next);
40460b57cec5SDimitry Andric     stride = __kmp_str_to_int(*scan, *next);
40470b57cec5SDimitry Andric     KMP_ASSERT(stride >= 0);
40480b57cec5SDimitry Andric     *scan = next;
40490b57cec5SDimitry Andric     stride *= sign;
40500b57cec5SDimitry Andric 
40510b57cec5SDimitry Andric     // valid follow sets are ',' and '}'
40520b57cec5SDimitry Andric     SKIP_WS(*scan);
40530b57cec5SDimitry Andric     if (**scan == '}' || **scan == ',') {
40540b57cec5SDimitry Andric       for (i = 0; i < count; i++) {
40550b57cec5SDimitry Andric         if ((start > maxOsId) ||
40560b57cec5SDimitry Andric             (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
4057bdd1243dSDimitry Andric           KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, start);
40580b57cec5SDimitry Andric           break; // don't proliferate warnings for large count
40590b57cec5SDimitry Andric         } else {
40600b57cec5SDimitry Andric           KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
40610b57cec5SDimitry Andric           start += stride;
40620b57cec5SDimitry Andric           (*setSize)++;
40630b57cec5SDimitry Andric         }
40640b57cec5SDimitry Andric       }
40650b57cec5SDimitry Andric       if (**scan == '}') {
40660b57cec5SDimitry Andric         break;
40670b57cec5SDimitry Andric       }
40680b57cec5SDimitry Andric       (*scan)++; // skip ','
40690b57cec5SDimitry Andric       continue;
40700b57cec5SDimitry Andric     }
40710b57cec5SDimitry Andric 
40720b57cec5SDimitry Andric     KMP_ASSERT2(0, "bad explicit places list");
40730b57cec5SDimitry Andric   }
40740b57cec5SDimitry Andric }
40750b57cec5SDimitry Andric 
__kmp_process_place(const char ** scan,kmp_affinity_t & affinity,int maxOsId,kmp_affin_mask_t * tempMask,int * setSize)4076bdd1243dSDimitry Andric static void __kmp_process_place(const char **scan, kmp_affinity_t &affinity,
40770b57cec5SDimitry Andric                                 int maxOsId, kmp_affin_mask_t *tempMask,
40780b57cec5SDimitry Andric                                 int *setSize) {
40790b57cec5SDimitry Andric   const char *next;
4080bdd1243dSDimitry Andric   kmp_affin_mask_t *osId2Mask = affinity.os_id_masks;
40810b57cec5SDimitry Andric 
40820b57cec5SDimitry Andric   // valid follow sets are '{' '!' and num
40830b57cec5SDimitry Andric   SKIP_WS(*scan);
40840b57cec5SDimitry Andric   if (**scan == '{') {
40850b57cec5SDimitry Andric     (*scan)++; // skip '{'
4086bdd1243dSDimitry Andric     __kmp_process_subplace_list(scan, affinity, maxOsId, tempMask, setSize);
40870b57cec5SDimitry Andric     KMP_ASSERT2(**scan == '}', "bad explicit places list");
40880b57cec5SDimitry Andric     (*scan)++; // skip '}'
40890b57cec5SDimitry Andric   } else if (**scan == '!') {
40900b57cec5SDimitry Andric     (*scan)++; // skip '!'
4091bdd1243dSDimitry Andric     __kmp_process_place(scan, affinity, maxOsId, tempMask, setSize);
40920b57cec5SDimitry Andric     KMP_CPU_COMPLEMENT(maxOsId, tempMask);
40930b57cec5SDimitry Andric   } else if ((**scan >= '0') && (**scan <= '9')) {
40940b57cec5SDimitry Andric     next = *scan;
40950b57cec5SDimitry Andric     SKIP_DIGITS(next);
40960b57cec5SDimitry Andric     int num = __kmp_str_to_int(*scan, *next);
40970b57cec5SDimitry Andric     KMP_ASSERT(num >= 0);
40980b57cec5SDimitry Andric     if ((num > maxOsId) ||
40990b57cec5SDimitry Andric         (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
4100bdd1243dSDimitry Andric       KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, num);
41010b57cec5SDimitry Andric     } else {
41020b57cec5SDimitry Andric       KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
41030b57cec5SDimitry Andric       (*setSize)++;
41040b57cec5SDimitry Andric     }
41050b57cec5SDimitry Andric     *scan = next; // skip num
41060b57cec5SDimitry Andric   } else {
41070b57cec5SDimitry Andric     KMP_ASSERT2(0, "bad explicit places list");
41080b57cec5SDimitry Andric   }
41090b57cec5SDimitry Andric }
41100b57cec5SDimitry Andric 
41110b57cec5SDimitry Andric // static void
__kmp_affinity_process_placelist(kmp_affinity_t & affinity)4112bdd1243dSDimitry Andric void __kmp_affinity_process_placelist(kmp_affinity_t &affinity) {
41130b57cec5SDimitry Andric   int i, j, count, stride, sign;
4114bdd1243dSDimitry Andric   kmp_affin_mask_t **out_masks = &affinity.masks;
4115bdd1243dSDimitry Andric   unsigned *out_numMasks = &affinity.num_masks;
4116bdd1243dSDimitry Andric   const char *placelist = affinity.proclist;
4117bdd1243dSDimitry Andric   kmp_affin_mask_t *osId2Mask = affinity.os_id_masks;
4118bdd1243dSDimitry Andric   int maxOsId = affinity.num_os_id_masks - 1;
41190b57cec5SDimitry Andric   const char *scan = placelist;
41200b57cec5SDimitry Andric   const char *next = placelist;
41210b57cec5SDimitry Andric 
41220b57cec5SDimitry Andric   numNewMasks = 2;
41230b57cec5SDimitry Andric   KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
41240b57cec5SDimitry Andric   nextNewMask = 0;
41250b57cec5SDimitry Andric 
41260b57cec5SDimitry Andric   // tempMask is modified based on the previous or initial
41270b57cec5SDimitry Andric   //   place to form the current place
41280b57cec5SDimitry Andric   // previousMask contains the previous place
41290b57cec5SDimitry Andric   kmp_affin_mask_t *tempMask;
41300b57cec5SDimitry Andric   kmp_affin_mask_t *previousMask;
41310b57cec5SDimitry Andric   KMP_CPU_ALLOC(tempMask);
41320b57cec5SDimitry Andric   KMP_CPU_ZERO(tempMask);
41330b57cec5SDimitry Andric   KMP_CPU_ALLOC(previousMask);
41340b57cec5SDimitry Andric   KMP_CPU_ZERO(previousMask);
41350b57cec5SDimitry Andric   int setSize = 0;
41360b57cec5SDimitry Andric 
41370b57cec5SDimitry Andric   for (;;) {
4138bdd1243dSDimitry Andric     __kmp_process_place(&scan, affinity, maxOsId, tempMask, &setSize);
41390b57cec5SDimitry Andric 
41400b57cec5SDimitry Andric     // valid follow sets are ',' ':' and EOL
41410b57cec5SDimitry Andric     SKIP_WS(scan);
41420b57cec5SDimitry Andric     if (*scan == '\0' || *scan == ',') {
41430b57cec5SDimitry Andric       if (setSize > 0) {
41440b57cec5SDimitry Andric         ADD_MASK(tempMask);
41450b57cec5SDimitry Andric       }
41460b57cec5SDimitry Andric       KMP_CPU_ZERO(tempMask);
41470b57cec5SDimitry Andric       setSize = 0;
41480b57cec5SDimitry Andric       if (*scan == '\0') {
41490b57cec5SDimitry Andric         break;
41500b57cec5SDimitry Andric       }
41510b57cec5SDimitry Andric       scan++; // skip ','
41520b57cec5SDimitry Andric       continue;
41530b57cec5SDimitry Andric     }
41540b57cec5SDimitry Andric 
41550b57cec5SDimitry Andric     KMP_ASSERT2(*scan == ':', "bad explicit places list");
41560b57cec5SDimitry Andric     scan++; // skip ':'
41570b57cec5SDimitry Andric 
41580b57cec5SDimitry Andric     // Read count parameter
41590b57cec5SDimitry Andric     SKIP_WS(scan);
41600b57cec5SDimitry Andric     KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), "bad explicit places list");
41610b57cec5SDimitry Andric     next = scan;
41620b57cec5SDimitry Andric     SKIP_DIGITS(next);
41630b57cec5SDimitry Andric     count = __kmp_str_to_int(scan, *next);
41640b57cec5SDimitry Andric     KMP_ASSERT(count >= 0);
41650b57cec5SDimitry Andric     scan = next;
41660b57cec5SDimitry Andric 
41670b57cec5SDimitry Andric     // valid follow sets are ',' ':' and EOL
41680b57cec5SDimitry Andric     SKIP_WS(scan);
41690b57cec5SDimitry Andric     if (*scan == '\0' || *scan == ',') {
41700b57cec5SDimitry Andric       stride = +1;
41710b57cec5SDimitry Andric     } else {
41720b57cec5SDimitry Andric       KMP_ASSERT2(*scan == ':', "bad explicit places list");
41730b57cec5SDimitry Andric       scan++; // skip ':'
41740b57cec5SDimitry Andric 
41750b57cec5SDimitry Andric       // Read stride parameter
41760b57cec5SDimitry Andric       sign = +1;
41770b57cec5SDimitry Andric       for (;;) {
41780b57cec5SDimitry Andric         SKIP_WS(scan);
41790b57cec5SDimitry Andric         if (*scan == '+') {
41800b57cec5SDimitry Andric           scan++; // skip '+'
41810b57cec5SDimitry Andric           continue;
41820b57cec5SDimitry Andric         }
41830b57cec5SDimitry Andric         if (*scan == '-') {
41840b57cec5SDimitry Andric           sign *= -1;
41850b57cec5SDimitry Andric           scan++; // skip '-'
41860b57cec5SDimitry Andric           continue;
41870b57cec5SDimitry Andric         }
41880b57cec5SDimitry Andric         break;
41890b57cec5SDimitry Andric       }
41900b57cec5SDimitry Andric       SKIP_WS(scan);
41910b57cec5SDimitry Andric       KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), "bad explicit places list");
41920b57cec5SDimitry Andric       next = scan;
41930b57cec5SDimitry Andric       SKIP_DIGITS(next);
41940b57cec5SDimitry Andric       stride = __kmp_str_to_int(scan, *next);
41950b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(stride >= 0);
41960b57cec5SDimitry Andric       scan = next;
41970b57cec5SDimitry Andric       stride *= sign;
41980b57cec5SDimitry Andric     }
41990b57cec5SDimitry Andric 
42000b57cec5SDimitry Andric     // Add places determined by initial_place : count : stride
42010b57cec5SDimitry Andric     for (i = 0; i < count; i++) {
42020b57cec5SDimitry Andric       if (setSize == 0) {
42030b57cec5SDimitry Andric         break;
42040b57cec5SDimitry Andric       }
42050b57cec5SDimitry Andric       // Add the current place, then build the next place (tempMask) from that
42060b57cec5SDimitry Andric       KMP_CPU_COPY(previousMask, tempMask);
42070b57cec5SDimitry Andric       ADD_MASK(previousMask);
42080b57cec5SDimitry Andric       KMP_CPU_ZERO(tempMask);
42090b57cec5SDimitry Andric       setSize = 0;
42100b57cec5SDimitry Andric       KMP_CPU_SET_ITERATE(j, previousMask) {
42110b57cec5SDimitry Andric         if (!KMP_CPU_ISSET(j, previousMask)) {
42120b57cec5SDimitry Andric           continue;
42130b57cec5SDimitry Andric         }
42140b57cec5SDimitry Andric         if ((j + stride > maxOsId) || (j + stride < 0) ||
42150b57cec5SDimitry Andric             (!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
42160b57cec5SDimitry Andric             (!KMP_CPU_ISSET(j + stride,
42170b57cec5SDimitry Andric                             KMP_CPU_INDEX(osId2Mask, j + stride)))) {
4218fcaf7f86SDimitry Andric           if (i < count - 1) {
4219bdd1243dSDimitry Andric             KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, j + stride);
42200b57cec5SDimitry Andric           }
42210b57cec5SDimitry Andric           continue;
42220b57cec5SDimitry Andric         }
42230b57cec5SDimitry Andric         KMP_CPU_SET(j + stride, tempMask);
42240b57cec5SDimitry Andric         setSize++;
42250b57cec5SDimitry Andric       }
42260b57cec5SDimitry Andric     }
42270b57cec5SDimitry Andric     KMP_CPU_ZERO(tempMask);
42280b57cec5SDimitry Andric     setSize = 0;
42290b57cec5SDimitry Andric 
42300b57cec5SDimitry Andric     // valid follow sets are ',' and EOL
42310b57cec5SDimitry Andric     SKIP_WS(scan);
42320b57cec5SDimitry Andric     if (*scan == '\0') {
42330b57cec5SDimitry Andric       break;
42340b57cec5SDimitry Andric     }
42350b57cec5SDimitry Andric     if (*scan == ',') {
42360b57cec5SDimitry Andric       scan++; // skip ','
42370b57cec5SDimitry Andric       continue;
42380b57cec5SDimitry Andric     }
42390b57cec5SDimitry Andric 
42400b57cec5SDimitry Andric     KMP_ASSERT2(0, "bad explicit places list");
42410b57cec5SDimitry Andric   }
42420b57cec5SDimitry Andric 
42430b57cec5SDimitry Andric   *out_numMasks = nextNewMask;
42440b57cec5SDimitry Andric   if (nextNewMask == 0) {
42450b57cec5SDimitry Andric     *out_masks = NULL;
42460b57cec5SDimitry Andric     KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
42470b57cec5SDimitry Andric     return;
42480b57cec5SDimitry Andric   }
42490b57cec5SDimitry Andric   KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
42500b57cec5SDimitry Andric   KMP_CPU_FREE(tempMask);
42510b57cec5SDimitry Andric   KMP_CPU_FREE(previousMask);
42520b57cec5SDimitry Andric   for (i = 0; i < nextNewMask; i++) {
42530b57cec5SDimitry Andric     kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
42540b57cec5SDimitry Andric     kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
42550b57cec5SDimitry Andric     KMP_CPU_COPY(dest, src);
42560b57cec5SDimitry Andric   }
42570b57cec5SDimitry Andric   KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
42580b57cec5SDimitry Andric }
42590b57cec5SDimitry Andric 
42600b57cec5SDimitry Andric #undef ADD_MASK
42610b57cec5SDimitry Andric #undef ADD_MASK_OSID
42620b57cec5SDimitry Andric 
42630b57cec5SDimitry Andric // This function figures out the deepest level at which there is at least one
42640b57cec5SDimitry Andric // cluster/core with more than one processing unit bound to it.
__kmp_affinity_find_core_level(int nprocs,int bottom_level)4265fe6060f1SDimitry Andric static int __kmp_affinity_find_core_level(int nprocs, int bottom_level) {
42660b57cec5SDimitry Andric   int core_level = 0;
42670b57cec5SDimitry Andric 
42680b57cec5SDimitry Andric   for (int i = 0; i < nprocs; i++) {
4269fe6060f1SDimitry Andric     const kmp_hw_thread_t &hw_thread = __kmp_topology->at(i);
42700b57cec5SDimitry Andric     for (int j = bottom_level; j > 0; j--) {
4271fe6060f1SDimitry Andric       if (hw_thread.ids[j] > 0) {
42720b57cec5SDimitry Andric         if (core_level < (j - 1)) {
42730b57cec5SDimitry Andric           core_level = j - 1;
42740b57cec5SDimitry Andric         }
42750b57cec5SDimitry Andric       }
42760b57cec5SDimitry Andric     }
42770b57cec5SDimitry Andric   }
42780b57cec5SDimitry Andric   return core_level;
42790b57cec5SDimitry Andric }
42800b57cec5SDimitry Andric 
42810b57cec5SDimitry Andric // This function counts number of clusters/cores at given level.
__kmp_affinity_compute_ncores(int nprocs,int bottom_level,int core_level)4282fe6060f1SDimitry Andric static int __kmp_affinity_compute_ncores(int nprocs, int bottom_level,
42830b57cec5SDimitry Andric                                          int core_level) {
4284fe6060f1SDimitry Andric   return __kmp_topology->get_count(core_level);
4285fe6060f1SDimitry Andric }
4286fe6060f1SDimitry Andric // This function finds to which cluster/core given processing unit is bound.
__kmp_affinity_find_core(int proc,int bottom_level,int core_level)4287fe6060f1SDimitry Andric static int __kmp_affinity_find_core(int proc, int bottom_level,
4288fe6060f1SDimitry Andric                                     int core_level) {
4289fe6060f1SDimitry Andric   int core = 0;
4290fe6060f1SDimitry Andric   KMP_DEBUG_ASSERT(proc >= 0 && proc < __kmp_topology->get_num_hw_threads());
4291fe6060f1SDimitry Andric   for (int i = 0; i <= proc; ++i) {
4292fe6060f1SDimitry Andric     if (i + 1 <= proc) {
4293fe6060f1SDimitry Andric       for (int j = 0; j <= core_level; ++j) {
4294fe6060f1SDimitry Andric         if (__kmp_topology->at(i + 1).sub_ids[j] !=
4295fe6060f1SDimitry Andric             __kmp_topology->at(i).sub_ids[j]) {
4296fe6060f1SDimitry Andric           core++;
42970b57cec5SDimitry Andric           break;
42980b57cec5SDimitry Andric         }
42990b57cec5SDimitry Andric       }
43000b57cec5SDimitry Andric     }
43010b57cec5SDimitry Andric   }
4302fe6060f1SDimitry Andric   return core;
43030b57cec5SDimitry Andric }
43040b57cec5SDimitry Andric 
43050b57cec5SDimitry Andric // This function finds maximal number of processing units bound to a
43060b57cec5SDimitry Andric // cluster/core at given level.
__kmp_affinity_max_proc_per_core(int nprocs,int bottom_level,int core_level)4307fe6060f1SDimitry Andric static int __kmp_affinity_max_proc_per_core(int nprocs, int bottom_level,
43080b57cec5SDimitry Andric                                             int core_level) {
4309fe6060f1SDimitry Andric   if (core_level >= bottom_level)
4310fe6060f1SDimitry Andric     return 1;
4311fe6060f1SDimitry Andric   int thread_level = __kmp_topology->get_level(KMP_HW_THREAD);
4312fe6060f1SDimitry Andric   return __kmp_topology->calculate_ratio(thread_level, core_level);
43130b57cec5SDimitry Andric }
43140b57cec5SDimitry Andric 
43150b57cec5SDimitry Andric static int *procarr = NULL;
43160b57cec5SDimitry Andric static int __kmp_aff_depth = 0;
4317bdd1243dSDimitry Andric static int *__kmp_osid_to_hwthread_map = NULL;
4318bdd1243dSDimitry Andric 
__kmp_affinity_get_mask_topology_info(const kmp_affin_mask_t * mask,kmp_affinity_ids_t & ids,kmp_affinity_attrs_t & attrs)4319bdd1243dSDimitry Andric static void __kmp_affinity_get_mask_topology_info(const kmp_affin_mask_t *mask,
4320bdd1243dSDimitry Andric                                                   kmp_affinity_ids_t &ids,
4321bdd1243dSDimitry Andric                                                   kmp_affinity_attrs_t &attrs) {
4322bdd1243dSDimitry Andric   if (!KMP_AFFINITY_CAPABLE())
4323bdd1243dSDimitry Andric     return;
4324bdd1243dSDimitry Andric 
4325bdd1243dSDimitry Andric   // Initiailze ids and attrs thread data
4326bdd1243dSDimitry Andric   for (int i = 0; i < KMP_HW_LAST; ++i)
43275f757f3fSDimitry Andric     ids.ids[i] = kmp_hw_thread_t::UNKNOWN_ID;
4328bdd1243dSDimitry Andric   attrs = KMP_AFFINITY_ATTRS_UNKNOWN;
4329bdd1243dSDimitry Andric 
4330bdd1243dSDimitry Andric   // Iterate through each os id within the mask and determine
4331bdd1243dSDimitry Andric   // the topology id and attribute information
4332bdd1243dSDimitry Andric   int cpu;
4333bdd1243dSDimitry Andric   int depth = __kmp_topology->get_depth();
4334bdd1243dSDimitry Andric   KMP_CPU_SET_ITERATE(cpu, mask) {
4335bdd1243dSDimitry Andric     int osid_idx = __kmp_osid_to_hwthread_map[cpu];
43365f757f3fSDimitry Andric     ids.os_id = cpu;
4337bdd1243dSDimitry Andric     const kmp_hw_thread_t &hw_thread = __kmp_topology->at(osid_idx);
4338bdd1243dSDimitry Andric     for (int level = 0; level < depth; ++level) {
4339bdd1243dSDimitry Andric       kmp_hw_t type = __kmp_topology->get_type(level);
4340bdd1243dSDimitry Andric       int id = hw_thread.sub_ids[level];
43415f757f3fSDimitry Andric       if (ids.ids[type] == kmp_hw_thread_t::UNKNOWN_ID || ids.ids[type] == id) {
43425f757f3fSDimitry Andric         ids.ids[type] = id;
4343bdd1243dSDimitry Andric       } else {
4344bdd1243dSDimitry Andric         // This mask spans across multiple topology units, set it as such
4345bdd1243dSDimitry Andric         // and mark every level below as such as well.
43465f757f3fSDimitry Andric         ids.ids[type] = kmp_hw_thread_t::MULTIPLE_ID;
4347bdd1243dSDimitry Andric         for (; level < depth; ++level) {
4348bdd1243dSDimitry Andric           kmp_hw_t type = __kmp_topology->get_type(level);
43495f757f3fSDimitry Andric           ids.ids[type] = kmp_hw_thread_t::MULTIPLE_ID;
4350bdd1243dSDimitry Andric         }
4351bdd1243dSDimitry Andric       }
4352bdd1243dSDimitry Andric     }
4353bdd1243dSDimitry Andric     if (!attrs.valid) {
4354bdd1243dSDimitry Andric       attrs.core_type = hw_thread.attrs.get_core_type();
4355bdd1243dSDimitry Andric       attrs.core_eff = hw_thread.attrs.get_core_eff();
4356bdd1243dSDimitry Andric       attrs.valid = 1;
4357bdd1243dSDimitry Andric     } else {
4358bdd1243dSDimitry Andric       // This mask spans across multiple attributes, set it as such
4359bdd1243dSDimitry Andric       if (attrs.core_type != hw_thread.attrs.get_core_type())
4360bdd1243dSDimitry Andric         attrs.core_type = KMP_HW_CORE_TYPE_UNKNOWN;
4361bdd1243dSDimitry Andric       if (attrs.core_eff != hw_thread.attrs.get_core_eff())
4362bdd1243dSDimitry Andric         attrs.core_eff = kmp_hw_attr_t::UNKNOWN_CORE_EFF;
4363bdd1243dSDimitry Andric     }
4364bdd1243dSDimitry Andric   }
4365bdd1243dSDimitry Andric }
4366bdd1243dSDimitry Andric 
__kmp_affinity_get_thread_topology_info(kmp_info_t * th)4367bdd1243dSDimitry Andric static void __kmp_affinity_get_thread_topology_info(kmp_info_t *th) {
4368bdd1243dSDimitry Andric   if (!KMP_AFFINITY_CAPABLE())
4369bdd1243dSDimitry Andric     return;
4370bdd1243dSDimitry Andric   const kmp_affin_mask_t *mask = th->th.th_affin_mask;
4371bdd1243dSDimitry Andric   kmp_affinity_ids_t &ids = th->th.th_topology_ids;
4372bdd1243dSDimitry Andric   kmp_affinity_attrs_t &attrs = th->th.th_topology_attrs;
4373bdd1243dSDimitry Andric   __kmp_affinity_get_mask_topology_info(mask, ids, attrs);
4374bdd1243dSDimitry Andric }
4375bdd1243dSDimitry Andric 
4376bdd1243dSDimitry Andric // Assign the topology information to each place in the place list
4377bdd1243dSDimitry Andric // A thread can then grab not only its affinity mask, but the topology
4378bdd1243dSDimitry Andric // information associated with that mask. e.g., Which socket is a thread on
__kmp_affinity_get_topology_info(kmp_affinity_t & affinity)4379bdd1243dSDimitry Andric static void __kmp_affinity_get_topology_info(kmp_affinity_t &affinity) {
4380bdd1243dSDimitry Andric   if (!KMP_AFFINITY_CAPABLE())
4381bdd1243dSDimitry Andric     return;
4382bdd1243dSDimitry Andric   if (affinity.type != affinity_none) {
4383bdd1243dSDimitry Andric     KMP_ASSERT(affinity.num_os_id_masks);
4384bdd1243dSDimitry Andric     KMP_ASSERT(affinity.os_id_masks);
4385bdd1243dSDimitry Andric   }
4386bdd1243dSDimitry Andric   KMP_ASSERT(affinity.num_masks);
4387bdd1243dSDimitry Andric   KMP_ASSERT(affinity.masks);
4388bdd1243dSDimitry Andric   KMP_ASSERT(__kmp_affin_fullMask);
4389bdd1243dSDimitry Andric 
4390bdd1243dSDimitry Andric   int max_cpu = __kmp_affin_fullMask->get_max_cpu();
4391bdd1243dSDimitry Andric   int num_hw_threads = __kmp_topology->get_num_hw_threads();
4392bdd1243dSDimitry Andric 
4393bdd1243dSDimitry Andric   // Allocate thread topology information
4394bdd1243dSDimitry Andric   if (!affinity.ids) {
4395bdd1243dSDimitry Andric     affinity.ids = (kmp_affinity_ids_t *)__kmp_allocate(
4396bdd1243dSDimitry Andric         sizeof(kmp_affinity_ids_t) * affinity.num_masks);
4397bdd1243dSDimitry Andric   }
4398bdd1243dSDimitry Andric   if (!affinity.attrs) {
4399bdd1243dSDimitry Andric     affinity.attrs = (kmp_affinity_attrs_t *)__kmp_allocate(
4400bdd1243dSDimitry Andric         sizeof(kmp_affinity_attrs_t) * affinity.num_masks);
4401bdd1243dSDimitry Andric   }
4402bdd1243dSDimitry Andric   if (!__kmp_osid_to_hwthread_map) {
4403bdd1243dSDimitry Andric     // Want the +1 because max_cpu should be valid index into map
4404bdd1243dSDimitry Andric     __kmp_osid_to_hwthread_map =
4405bdd1243dSDimitry Andric         (int *)__kmp_allocate(sizeof(int) * (max_cpu + 1));
4406bdd1243dSDimitry Andric   }
4407bdd1243dSDimitry Andric 
4408bdd1243dSDimitry Andric   // Create the OS proc to hardware thread map
44095f757f3fSDimitry Andric   for (int hw_thread = 0; hw_thread < num_hw_threads; ++hw_thread) {
44105f757f3fSDimitry Andric     int os_id = __kmp_topology->at(hw_thread).os_id;
44115f757f3fSDimitry Andric     if (KMP_CPU_ISSET(os_id, __kmp_affin_fullMask))
44125f757f3fSDimitry Andric       __kmp_osid_to_hwthread_map[os_id] = hw_thread;
44135f757f3fSDimitry Andric   }
4414bdd1243dSDimitry Andric 
4415bdd1243dSDimitry Andric   for (unsigned i = 0; i < affinity.num_masks; ++i) {
4416bdd1243dSDimitry Andric     kmp_affinity_ids_t &ids = affinity.ids[i];
4417bdd1243dSDimitry Andric     kmp_affinity_attrs_t &attrs = affinity.attrs[i];
4418bdd1243dSDimitry Andric     kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.masks, i);
4419bdd1243dSDimitry Andric     __kmp_affinity_get_mask_topology_info(mask, ids, attrs);
4420bdd1243dSDimitry Andric   }
4421bdd1243dSDimitry Andric }
44220b57cec5SDimitry Andric 
44235f757f3fSDimitry Andric // Called when __kmp_topology is ready
__kmp_aux_affinity_initialize_other_data(kmp_affinity_t & affinity)44245f757f3fSDimitry Andric static void __kmp_aux_affinity_initialize_other_data(kmp_affinity_t &affinity) {
44255f757f3fSDimitry Andric   // Initialize other data structures which depend on the topology
44265f757f3fSDimitry Andric   if (__kmp_topology && __kmp_topology->get_num_hw_threads()) {
44275f757f3fSDimitry Andric     machine_hierarchy.init(__kmp_topology->get_num_hw_threads());
44285f757f3fSDimitry Andric     __kmp_affinity_get_topology_info(affinity);
44295f757f3fSDimitry Andric #if KMP_WEIGHTED_ITERATIONS_SUPPORTED
44305f757f3fSDimitry Andric     __kmp_first_osid_with_ecore = __kmp_get_first_osid_with_ecore();
44315f757f3fSDimitry Andric #endif
44325f757f3fSDimitry Andric   }
44335f757f3fSDimitry Andric }
44345f757f3fSDimitry Andric 
44350b57cec5SDimitry Andric // Create a one element mask array (set of places) which only contains the
44360b57cec5SDimitry Andric // initial process's affinity mask
__kmp_create_affinity_none_places(kmp_affinity_t & affinity)4437bdd1243dSDimitry Andric static void __kmp_create_affinity_none_places(kmp_affinity_t &affinity) {
44380b57cec5SDimitry Andric   KMP_ASSERT(__kmp_affin_fullMask != NULL);
4439bdd1243dSDimitry Andric   KMP_ASSERT(affinity.type == affinity_none);
44405f757f3fSDimitry Andric   KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads());
4441bdd1243dSDimitry Andric   affinity.num_masks = 1;
4442bdd1243dSDimitry Andric   KMP_CPU_ALLOC_ARRAY(affinity.masks, affinity.num_masks);
4443bdd1243dSDimitry Andric   kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, 0);
44440b57cec5SDimitry Andric   KMP_CPU_COPY(dest, __kmp_affin_fullMask);
44455f757f3fSDimitry Andric   __kmp_aux_affinity_initialize_other_data(affinity);
44460b57cec5SDimitry Andric }
44470b57cec5SDimitry Andric 
__kmp_aux_affinity_initialize_masks(kmp_affinity_t & affinity)4448bdd1243dSDimitry Andric static void __kmp_aux_affinity_initialize_masks(kmp_affinity_t &affinity) {
44490b57cec5SDimitry Andric   // Create the "full" mask - this defines all of the processors that we
44500b57cec5SDimitry Andric   // consider to be in the machine model. If respect is set, then it is the
44510b57cec5SDimitry Andric   // initialization thread's affinity mask. Otherwise, it is all processors that
44520b57cec5SDimitry Andric   // we know about on the machine.
4453bdd1243dSDimitry Andric   int verbose = affinity.flags.verbose;
4454bdd1243dSDimitry Andric   const char *env_var = affinity.env_var;
4455bdd1243dSDimitry Andric 
4456bdd1243dSDimitry Andric   // Already initialized
4457bdd1243dSDimitry Andric   if (__kmp_affin_fullMask && __kmp_affin_origMask)
4458bdd1243dSDimitry Andric     return;
4459bdd1243dSDimitry Andric 
44600b57cec5SDimitry Andric   if (__kmp_affin_fullMask == NULL) {
44610b57cec5SDimitry Andric     KMP_CPU_ALLOC(__kmp_affin_fullMask);
44620b57cec5SDimitry Andric   }
4463fcaf7f86SDimitry Andric   if (__kmp_affin_origMask == NULL) {
4464fcaf7f86SDimitry Andric     KMP_CPU_ALLOC(__kmp_affin_origMask);
4465fcaf7f86SDimitry Andric   }
44660b57cec5SDimitry Andric   if (KMP_AFFINITY_CAPABLE()) {
44670b57cec5SDimitry Andric     __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
4468fcaf7f86SDimitry Andric     // Make a copy before possible expanding to the entire machine mask
4469fcaf7f86SDimitry Andric     __kmp_affin_origMask->copy(__kmp_affin_fullMask);
4470bdd1243dSDimitry Andric     if (affinity.flags.respect) {
44710b57cec5SDimitry Andric       // Count the number of available processors.
44720b57cec5SDimitry Andric       unsigned i;
44730b57cec5SDimitry Andric       __kmp_avail_proc = 0;
44740b57cec5SDimitry Andric       KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
44750b57cec5SDimitry Andric         if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
44760b57cec5SDimitry Andric           continue;
44770b57cec5SDimitry Andric         }
44780b57cec5SDimitry Andric         __kmp_avail_proc++;
44790b57cec5SDimitry Andric       }
44800b57cec5SDimitry Andric       if (__kmp_avail_proc > __kmp_xproc) {
4481bdd1243dSDimitry Andric         KMP_AFF_WARNING(affinity, ErrorInitializeAffinity);
4482bdd1243dSDimitry Andric         affinity.type = affinity_none;
44830b57cec5SDimitry Andric         KMP_AFFINITY_DISABLE();
44840b57cec5SDimitry Andric         return;
44850b57cec5SDimitry Andric       }
4486e8d8bef9SDimitry Andric 
4487bdd1243dSDimitry Andric       if (verbose) {
4488e8d8bef9SDimitry Andric         char buf[KMP_AFFIN_MASK_PRINT_LEN];
4489e8d8bef9SDimitry Andric         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4490e8d8bef9SDimitry Andric                                   __kmp_affin_fullMask);
4491bdd1243dSDimitry Andric         KMP_INFORM(InitOSProcSetRespect, env_var, buf);
4492e8d8bef9SDimitry Andric       }
44930b57cec5SDimitry Andric     } else {
4494bdd1243dSDimitry Andric       if (verbose) {
4495e8d8bef9SDimitry Andric         char buf[KMP_AFFIN_MASK_PRINT_LEN];
4496e8d8bef9SDimitry Andric         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4497e8d8bef9SDimitry Andric                                   __kmp_affin_fullMask);
4498bdd1243dSDimitry Andric         KMP_INFORM(InitOSProcSetNotRespect, env_var, buf);
4499e8d8bef9SDimitry Andric       }
4500349cc55cSDimitry Andric       __kmp_avail_proc =
45010b57cec5SDimitry Andric           __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
4502e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS
4503fcaf7f86SDimitry Andric       if (__kmp_num_proc_groups <= 1) {
4504fcaf7f86SDimitry Andric         // Copy expanded full mask if topology has single processor group
4505fcaf7f86SDimitry Andric         __kmp_affin_origMask->copy(__kmp_affin_fullMask);
4506fcaf7f86SDimitry Andric       }
4507e8d8bef9SDimitry Andric       // Set the process affinity mask since threads' affinity
4508e8d8bef9SDimitry Andric       // masks must be subset of process mask in Windows* OS
4509e8d8bef9SDimitry Andric       __kmp_affin_fullMask->set_process_affinity(true);
4510e8d8bef9SDimitry Andric #endif
45110b57cec5SDimitry Andric     }
45120b57cec5SDimitry Andric   }
4513bdd1243dSDimitry Andric }
45140b57cec5SDimitry Andric 
__kmp_aux_affinity_initialize_topology(kmp_affinity_t & affinity)4515bdd1243dSDimitry Andric static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) {
4516bdd1243dSDimitry Andric   bool success = false;
4517bdd1243dSDimitry Andric   const char *env_var = affinity.env_var;
45180b57cec5SDimitry Andric   kmp_i18n_id_t msg_id = kmp_i18n_null;
4519bdd1243dSDimitry Andric   int verbose = affinity.flags.verbose;
45200b57cec5SDimitry Andric 
45210b57cec5SDimitry Andric   // For backward compatibility, setting KMP_CPUINFO_FILE =>
45220b57cec5SDimitry Andric   // KMP_TOPOLOGY_METHOD=cpuinfo
45230b57cec5SDimitry Andric   if ((__kmp_cpuinfo_file != NULL) &&
45240b57cec5SDimitry Andric       (__kmp_affinity_top_method == affinity_top_method_all)) {
45250b57cec5SDimitry Andric     __kmp_affinity_top_method = affinity_top_method_cpuinfo;
45260b57cec5SDimitry Andric   }
45270b57cec5SDimitry Andric 
45280b57cec5SDimitry Andric   if (__kmp_affinity_top_method == affinity_top_method_all) {
45290b57cec5SDimitry Andric // In the default code path, errors are not fatal - we just try using
45300b57cec5SDimitry Andric // another method. We only emit a warning message if affinity is on, or the
4531fe6060f1SDimitry Andric // verbose flag is set, an the nowarnings flag was not set.
45320b57cec5SDimitry Andric #if KMP_USE_HWLOC
4533fe6060f1SDimitry Andric     if (!success &&
45340b57cec5SDimitry Andric         __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
45350b57cec5SDimitry Andric       if (!__kmp_hwloc_error) {
4536fe6060f1SDimitry Andric         success = __kmp_affinity_create_hwloc_map(&msg_id);
4537bdd1243dSDimitry Andric         if (!success && verbose) {
4538bdd1243dSDimitry Andric           KMP_INFORM(AffIgnoringHwloc, env_var);
45390b57cec5SDimitry Andric         }
4540bdd1243dSDimitry Andric       } else if (verbose) {
4541bdd1243dSDimitry Andric         KMP_INFORM(AffIgnoringHwloc, env_var);
45420b57cec5SDimitry Andric       }
45430b57cec5SDimitry Andric     }
45440b57cec5SDimitry Andric #endif
45450b57cec5SDimitry Andric 
45460b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4547fe6060f1SDimitry Andric     if (!success) {
4548fe6060f1SDimitry Andric       success = __kmp_affinity_create_x2apicid_map(&msg_id);
4549bdd1243dSDimitry Andric       if (!success && verbose && msg_id != kmp_i18n_null) {
4550bdd1243dSDimitry Andric         KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id));
45510b57cec5SDimitry Andric       }
45520b57cec5SDimitry Andric     }
4553fe6060f1SDimitry Andric     if (!success) {
4554fe6060f1SDimitry Andric       success = __kmp_affinity_create_apicid_map(&msg_id);
4555bdd1243dSDimitry Andric       if (!success && verbose && msg_id != kmp_i18n_null) {
4556bdd1243dSDimitry Andric         KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id));
45570b57cec5SDimitry Andric       }
45580b57cec5SDimitry Andric     }
45590b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
45600b57cec5SDimitry Andric 
4561439352acSDimitry Andric #if KMP_OS_LINUX || KMP_OS_AIX
4562fe6060f1SDimitry Andric     if (!success) {
4563fe6060f1SDimitry Andric       int line = 0;
4564fe6060f1SDimitry Andric       success = __kmp_affinity_create_cpuinfo_map(&line, &msg_id);
4565bdd1243dSDimitry Andric       if (!success && verbose && msg_id != kmp_i18n_null) {
4566bdd1243dSDimitry Andric         KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id));
45670b57cec5SDimitry Andric       }
45680b57cec5SDimitry Andric     }
45690b57cec5SDimitry Andric #endif /* KMP_OS_LINUX */
45700b57cec5SDimitry Andric 
45710b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
4572fe6060f1SDimitry Andric     if (!success && (__kmp_num_proc_groups > 1)) {
4573fe6060f1SDimitry Andric       success = __kmp_affinity_create_proc_group_map(&msg_id);
4574bdd1243dSDimitry Andric       if (!success && verbose && msg_id != kmp_i18n_null) {
4575bdd1243dSDimitry Andric         KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id));
45760b57cec5SDimitry Andric       }
45770b57cec5SDimitry Andric     }
45780b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */
45790b57cec5SDimitry Andric 
4580fe6060f1SDimitry Andric     if (!success) {
4581fe6060f1SDimitry Andric       success = __kmp_affinity_create_flat_map(&msg_id);
4582bdd1243dSDimitry Andric       if (!success && verbose && msg_id != kmp_i18n_null) {
4583bdd1243dSDimitry Andric         KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id));
45840b57cec5SDimitry Andric       }
4585fe6060f1SDimitry Andric       KMP_ASSERT(success);
45860b57cec5SDimitry Andric     }
45870b57cec5SDimitry Andric   }
45880b57cec5SDimitry Andric 
4589fe6060f1SDimitry Andric // If the user has specified that a paricular topology discovery method is to be
4590fe6060f1SDimitry Andric // used, then we abort if that method fails. The exception is group affinity,
4591fe6060f1SDimitry Andric // which might have been implicitly set.
45920b57cec5SDimitry Andric #if KMP_USE_HWLOC
45930b57cec5SDimitry Andric   else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
45940b57cec5SDimitry Andric     KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
4595fe6060f1SDimitry Andric     success = __kmp_affinity_create_hwloc_map(&msg_id);
4596fe6060f1SDimitry Andric     if (!success) {
4597fe6060f1SDimitry Andric       KMP_ASSERT(msg_id != kmp_i18n_null);
4598fe6060f1SDimitry Andric       KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
45990b57cec5SDimitry Andric     }
46000b57cec5SDimitry Andric   }
46010b57cec5SDimitry Andric #endif // KMP_USE_HWLOC
46020b57cec5SDimitry Andric 
46030b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4604fe6060f1SDimitry Andric   else if (__kmp_affinity_top_method == affinity_top_method_x2apicid ||
4605fe6060f1SDimitry Andric            __kmp_affinity_top_method == affinity_top_method_x2apicid_1f) {
4606fe6060f1SDimitry Andric     success = __kmp_affinity_create_x2apicid_map(&msg_id);
4607fe6060f1SDimitry Andric     if (!success) {
46080b57cec5SDimitry Andric       KMP_ASSERT(msg_id != kmp_i18n_null);
46090b57cec5SDimitry Andric       KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
46100b57cec5SDimitry Andric     }
46110b57cec5SDimitry Andric   } else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
4612fe6060f1SDimitry Andric     success = __kmp_affinity_create_apicid_map(&msg_id);
4613fe6060f1SDimitry Andric     if (!success) {
46140b57cec5SDimitry Andric       KMP_ASSERT(msg_id != kmp_i18n_null);
46150b57cec5SDimitry Andric       KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
46160b57cec5SDimitry Andric     }
46170b57cec5SDimitry Andric   }
46180b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
46190b57cec5SDimitry Andric 
46200b57cec5SDimitry Andric   else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
46210b57cec5SDimitry Andric     int line = 0;
4622fe6060f1SDimitry Andric     success = __kmp_affinity_create_cpuinfo_map(&line, &msg_id);
4623fe6060f1SDimitry Andric     if (!success) {
46240b57cec5SDimitry Andric       KMP_ASSERT(msg_id != kmp_i18n_null);
4625fe6060f1SDimitry Andric       const char *filename = __kmp_cpuinfo_get_filename();
46260b57cec5SDimitry Andric       if (line > 0) {
46270b57cec5SDimitry Andric         KMP_FATAL(FileLineMsgExiting, filename, line,
46280b57cec5SDimitry Andric                   __kmp_i18n_catgets(msg_id));
46290b57cec5SDimitry Andric       } else {
46300b57cec5SDimitry Andric         KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
46310b57cec5SDimitry Andric       }
46320b57cec5SDimitry Andric     }
46330b57cec5SDimitry Andric   }
46340b57cec5SDimitry Andric 
46350b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
46360b57cec5SDimitry Andric   else if (__kmp_affinity_top_method == affinity_top_method_group) {
4637fe6060f1SDimitry Andric     success = __kmp_affinity_create_proc_group_map(&msg_id);
4638fe6060f1SDimitry Andric     KMP_ASSERT(success);
4639fe6060f1SDimitry Andric     if (!success) {
46400b57cec5SDimitry Andric       KMP_ASSERT(msg_id != kmp_i18n_null);
46410b57cec5SDimitry Andric       KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
46420b57cec5SDimitry Andric     }
46430b57cec5SDimitry Andric   }
46440b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */
46450b57cec5SDimitry Andric 
46460b57cec5SDimitry Andric   else if (__kmp_affinity_top_method == affinity_top_method_flat) {
4647fe6060f1SDimitry Andric     success = __kmp_affinity_create_flat_map(&msg_id);
46480b57cec5SDimitry Andric     // should not fail
4649fe6060f1SDimitry Andric     KMP_ASSERT(success);
46500b57cec5SDimitry Andric   }
46510b57cec5SDimitry Andric 
4652fe6060f1SDimitry Andric   // Early exit if topology could not be created
4653fe6060f1SDimitry Andric   if (!__kmp_topology) {
4654fcaf7f86SDimitry Andric     if (KMP_AFFINITY_CAPABLE()) {
4655bdd1243dSDimitry Andric       KMP_AFF_WARNING(affinity, ErrorInitializeAffinity);
46560b57cec5SDimitry Andric     }
4657fe6060f1SDimitry Andric     if (nPackages > 0 && nCoresPerPkg > 0 && __kmp_nThreadsPerCore > 0 &&
4658fe6060f1SDimitry Andric         __kmp_ncores > 0) {
4659fe6060f1SDimitry Andric       __kmp_topology = kmp_topology_t::allocate(0, 0, NULL);
4660fe6060f1SDimitry Andric       __kmp_topology->canonicalize(nPackages, nCoresPerPkg,
4661fe6060f1SDimitry Andric                                    __kmp_nThreadsPerCore, __kmp_ncores);
4662bdd1243dSDimitry Andric       if (verbose) {
4663bdd1243dSDimitry Andric         __kmp_topology->print(env_var);
4664fe6060f1SDimitry Andric       }
4665fe6060f1SDimitry Andric     }
4666bdd1243dSDimitry Andric     return false;
46670b57cec5SDimitry Andric   }
46680b57cec5SDimitry Andric 
4669bdd1243dSDimitry Andric   // Canonicalize, print (if requested), apply KMP_HW_SUBSET
4670fe6060f1SDimitry Andric   __kmp_topology->canonicalize();
4671bdd1243dSDimitry Andric   if (verbose)
4672bdd1243dSDimitry Andric     __kmp_topology->print(env_var);
4673fe6060f1SDimitry Andric   bool filtered = __kmp_topology->filter_hw_subset();
4674bdd1243dSDimitry Andric   if (filtered && verbose)
4675fe6060f1SDimitry Andric     __kmp_topology->print("KMP_HW_SUBSET");
4676bdd1243dSDimitry Andric   return success;
4677bdd1243dSDimitry Andric }
4678bdd1243dSDimitry Andric 
__kmp_aux_affinity_initialize(kmp_affinity_t & affinity)4679bdd1243dSDimitry Andric static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
4680bdd1243dSDimitry Andric   bool is_regular_affinity = (&affinity == &__kmp_affinity);
4681bdd1243dSDimitry Andric   bool is_hidden_helper_affinity = (&affinity == &__kmp_hh_affinity);
46825f757f3fSDimitry Andric   const char *env_var = __kmp_get_affinity_env_var(affinity);
4683bdd1243dSDimitry Andric 
4684bdd1243dSDimitry Andric   if (affinity.flags.initialized) {
4685bdd1243dSDimitry Andric     KMP_ASSERT(__kmp_affin_fullMask != NULL);
4686bdd1243dSDimitry Andric     return;
4687bdd1243dSDimitry Andric   }
4688bdd1243dSDimitry Andric 
4689bdd1243dSDimitry Andric   if (is_regular_affinity && (!__kmp_affin_fullMask || !__kmp_affin_origMask))
4690bdd1243dSDimitry Andric     __kmp_aux_affinity_initialize_masks(affinity);
4691bdd1243dSDimitry Andric 
4692bdd1243dSDimitry Andric   if (is_regular_affinity && !__kmp_topology) {
4693bdd1243dSDimitry Andric     bool success = __kmp_aux_affinity_initialize_topology(affinity);
4694bdd1243dSDimitry Andric     if (success) {
4695fe6060f1SDimitry Andric       KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads());
4696bdd1243dSDimitry Andric     } else {
4697bdd1243dSDimitry Andric       affinity.type = affinity_none;
4698bdd1243dSDimitry Andric       KMP_AFFINITY_DISABLE();
4699bdd1243dSDimitry Andric     }
4700bdd1243dSDimitry Andric   }
4701bdd1243dSDimitry Andric 
4702fe6060f1SDimitry Andric   // If KMP_AFFINITY=none, then only create the single "none" place
4703fe6060f1SDimitry Andric   // which is the process's initial affinity mask or the number of
4704fe6060f1SDimitry Andric   // hardware threads depending on respect,norespect
4705bdd1243dSDimitry Andric   if (affinity.type == affinity_none) {
4706bdd1243dSDimitry Andric     __kmp_create_affinity_none_places(affinity);
4707fe6060f1SDimitry Andric #if KMP_USE_HIER_SCHED
4708fe6060f1SDimitry Andric     __kmp_dispatch_set_hierarchy_values();
47090b57cec5SDimitry Andric #endif
4710bdd1243dSDimitry Andric     affinity.flags.initialized = TRUE;
4711fe6060f1SDimitry Andric     return;
47120b57cec5SDimitry Andric   }
4713bdd1243dSDimitry Andric 
4714bdd1243dSDimitry Andric   __kmp_topology->set_granularity(affinity);
4715fe6060f1SDimitry Andric   int depth = __kmp_topology->get_depth();
47160b57cec5SDimitry Andric 
47170b57cec5SDimitry Andric   // Create the table of masks, indexed by thread Id.
47180b57cec5SDimitry Andric   unsigned numUnique;
47195f757f3fSDimitry Andric   int numAddrs = __kmp_topology->get_num_hw_threads();
47205f757f3fSDimitry Andric   // If OMP_PLACES=cores:<attribute> specified, then attempt
47215f757f3fSDimitry Andric   // to make OS Id mask table using those attributes
47225f757f3fSDimitry Andric   if (affinity.core_attr_gran.valid) {
47235f757f3fSDimitry Andric     __kmp_create_os_id_masks(&numUnique, affinity, [&](int idx) {
47245f757f3fSDimitry Andric       KMP_ASSERT(idx >= -1);
47255f757f3fSDimitry Andric       for (int i = idx + 1; i < numAddrs; ++i)
47265f757f3fSDimitry Andric         if (__kmp_topology->at(i).attrs.contains(affinity.core_attr_gran))
47275f757f3fSDimitry Andric           return i;
47285f757f3fSDimitry Andric       return numAddrs;
47295f757f3fSDimitry Andric     });
47305f757f3fSDimitry Andric     if (!affinity.os_id_masks) {
47315f757f3fSDimitry Andric       const char *core_attribute;
47325f757f3fSDimitry Andric       if (affinity.core_attr_gran.core_eff != kmp_hw_attr_t::UNKNOWN_CORE_EFF)
47335f757f3fSDimitry Andric         core_attribute = "core_efficiency";
47345f757f3fSDimitry Andric       else
47355f757f3fSDimitry Andric         core_attribute = "core_type";
47365f757f3fSDimitry Andric       KMP_AFF_WARNING(affinity, AffIgnoringNotAvailable, env_var,
47375f757f3fSDimitry Andric                       core_attribute,
47385f757f3fSDimitry Andric                       __kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true))
47395f757f3fSDimitry Andric     }
47405f757f3fSDimitry Andric   }
47415f757f3fSDimitry Andric   // If core attributes did not work, or none were specified,
47425f757f3fSDimitry Andric   // then make OS Id mask table using typical incremental way.
47435f757f3fSDimitry Andric   if (!affinity.os_id_masks) {
47445f757f3fSDimitry Andric     __kmp_create_os_id_masks(&numUnique, affinity, [](int idx) {
47455f757f3fSDimitry Andric       KMP_ASSERT(idx >= -1);
47465f757f3fSDimitry Andric       return idx + 1;
47475f757f3fSDimitry Andric     });
47485f757f3fSDimitry Andric   }
4749bdd1243dSDimitry Andric   if (affinity.gran_levels == 0) {
47500b57cec5SDimitry Andric     KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
47510b57cec5SDimitry Andric   }
47520b57cec5SDimitry Andric 
4753bdd1243dSDimitry Andric   switch (affinity.type) {
47540b57cec5SDimitry Andric 
47550b57cec5SDimitry Andric   case affinity_explicit:
4756bdd1243dSDimitry Andric     KMP_DEBUG_ASSERT(affinity.proclist != NULL);
4757bdd1243dSDimitry Andric     if (is_hidden_helper_affinity ||
4758bdd1243dSDimitry Andric         __kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) {
4759bdd1243dSDimitry Andric       __kmp_affinity_process_proclist(affinity);
47600b57cec5SDimitry Andric     } else {
4761bdd1243dSDimitry Andric       __kmp_affinity_process_placelist(affinity);
47620b57cec5SDimitry Andric     }
4763bdd1243dSDimitry Andric     if (affinity.num_masks == 0) {
4764bdd1243dSDimitry Andric       KMP_AFF_WARNING(affinity, AffNoValidProcID);
4765bdd1243dSDimitry Andric       affinity.type = affinity_none;
4766bdd1243dSDimitry Andric       __kmp_create_affinity_none_places(affinity);
4767bdd1243dSDimitry Andric       affinity.flags.initialized = TRUE;
47680b57cec5SDimitry Andric       return;
47690b57cec5SDimitry Andric     }
47700b57cec5SDimitry Andric     break;
47710b57cec5SDimitry Andric 
4772fe6060f1SDimitry Andric   // The other affinity types rely on sorting the hardware threads according to
4773bdd1243dSDimitry Andric   // some permutation of the machine topology tree. Set affinity.compact
4774bdd1243dSDimitry Andric   // and affinity.offset appropriately, then jump to a common code
4775fe6060f1SDimitry Andric   // fragment to do the sort and create the array of affinity masks.
47760b57cec5SDimitry Andric   case affinity_logical:
4777bdd1243dSDimitry Andric     affinity.compact = 0;
4778bdd1243dSDimitry Andric     if (affinity.offset) {
4779bdd1243dSDimitry Andric       affinity.offset =
4780bdd1243dSDimitry Andric           __kmp_nThreadsPerCore * affinity.offset % __kmp_avail_proc;
47810b57cec5SDimitry Andric     }
4782fe6060f1SDimitry Andric     goto sortTopology;
47830b57cec5SDimitry Andric 
47840b57cec5SDimitry Andric   case affinity_physical:
47850b57cec5SDimitry Andric     if (__kmp_nThreadsPerCore > 1) {
4786bdd1243dSDimitry Andric       affinity.compact = 1;
4787bdd1243dSDimitry Andric       if (affinity.compact >= depth) {
4788bdd1243dSDimitry Andric         affinity.compact = 0;
47890b57cec5SDimitry Andric       }
47900b57cec5SDimitry Andric     } else {
4791bdd1243dSDimitry Andric       affinity.compact = 0;
47920b57cec5SDimitry Andric     }
4793bdd1243dSDimitry Andric     if (affinity.offset) {
4794bdd1243dSDimitry Andric       affinity.offset =
4795bdd1243dSDimitry Andric           __kmp_nThreadsPerCore * affinity.offset % __kmp_avail_proc;
47960b57cec5SDimitry Andric     }
4797fe6060f1SDimitry Andric     goto sortTopology;
47980b57cec5SDimitry Andric 
47990b57cec5SDimitry Andric   case affinity_scatter:
4800bdd1243dSDimitry Andric     if (affinity.compact >= depth) {
4801bdd1243dSDimitry Andric       affinity.compact = 0;
48020b57cec5SDimitry Andric     } else {
4803bdd1243dSDimitry Andric       affinity.compact = depth - 1 - affinity.compact;
48040b57cec5SDimitry Andric     }
4805fe6060f1SDimitry Andric     goto sortTopology;
48060b57cec5SDimitry Andric 
48070b57cec5SDimitry Andric   case affinity_compact:
4808bdd1243dSDimitry Andric     if (affinity.compact >= depth) {
4809bdd1243dSDimitry Andric       affinity.compact = depth - 1;
48100b57cec5SDimitry Andric     }
4811fe6060f1SDimitry Andric     goto sortTopology;
48120b57cec5SDimitry Andric 
48130b57cec5SDimitry Andric   case affinity_balanced:
4814bdd1243dSDimitry Andric     if (depth <= 1 || is_hidden_helper_affinity) {
4815bdd1243dSDimitry Andric       KMP_AFF_WARNING(affinity, AffBalancedNotAvail, env_var);
4816bdd1243dSDimitry Andric       affinity.type = affinity_none;
4817bdd1243dSDimitry Andric       __kmp_create_affinity_none_places(affinity);
4818bdd1243dSDimitry Andric       affinity.flags.initialized = TRUE;
48190b57cec5SDimitry Andric       return;
4820fe6060f1SDimitry Andric     } else if (!__kmp_topology->is_uniform()) {
48210b57cec5SDimitry Andric       // Save the depth for further usage
48220b57cec5SDimitry Andric       __kmp_aff_depth = depth;
48230b57cec5SDimitry Andric 
4824fe6060f1SDimitry Andric       int core_level =
4825fe6060f1SDimitry Andric           __kmp_affinity_find_core_level(__kmp_avail_proc, depth - 1);
4826fe6060f1SDimitry Andric       int ncores = __kmp_affinity_compute_ncores(__kmp_avail_proc, depth - 1,
4827fe6060f1SDimitry Andric                                                  core_level);
48280b57cec5SDimitry Andric       int maxprocpercore = __kmp_affinity_max_proc_per_core(
4829fe6060f1SDimitry Andric           __kmp_avail_proc, depth - 1, core_level);
48300b57cec5SDimitry Andric 
48310b57cec5SDimitry Andric       int nproc = ncores * maxprocpercore;
48320b57cec5SDimitry Andric       if ((nproc < 2) || (nproc < __kmp_avail_proc)) {
4833bdd1243dSDimitry Andric         KMP_AFF_WARNING(affinity, AffBalancedNotAvail, env_var);
4834bdd1243dSDimitry Andric         affinity.type = affinity_none;
4835bdd1243dSDimitry Andric         __kmp_create_affinity_none_places(affinity);
4836bdd1243dSDimitry Andric         affinity.flags.initialized = TRUE;
48370b57cec5SDimitry Andric         return;
48380b57cec5SDimitry Andric       }
48390b57cec5SDimitry Andric 
48400b57cec5SDimitry Andric       procarr = (int *)__kmp_allocate(sizeof(int) * nproc);
48410b57cec5SDimitry Andric       for (int i = 0; i < nproc; i++) {
48420b57cec5SDimitry Andric         procarr[i] = -1;
48430b57cec5SDimitry Andric       }
48440b57cec5SDimitry Andric 
48450b57cec5SDimitry Andric       int lastcore = -1;
48460b57cec5SDimitry Andric       int inlastcore = 0;
48470b57cec5SDimitry Andric       for (int i = 0; i < __kmp_avail_proc; i++) {
4848fe6060f1SDimitry Andric         int proc = __kmp_topology->at(i).os_id;
4849fe6060f1SDimitry Andric         int core = __kmp_affinity_find_core(i, depth - 1, core_level);
48500b57cec5SDimitry Andric 
48510b57cec5SDimitry Andric         if (core == lastcore) {
48520b57cec5SDimitry Andric           inlastcore++;
48530b57cec5SDimitry Andric         } else {
48540b57cec5SDimitry Andric           inlastcore = 0;
48550b57cec5SDimitry Andric         }
48560b57cec5SDimitry Andric         lastcore = core;
48570b57cec5SDimitry Andric 
48580b57cec5SDimitry Andric         procarr[core * maxprocpercore + inlastcore] = proc;
48590b57cec5SDimitry Andric       }
48600b57cec5SDimitry Andric     }
4861bdd1243dSDimitry Andric     if (affinity.compact >= depth) {
4862bdd1243dSDimitry Andric       affinity.compact = depth - 1;
48630b57cec5SDimitry Andric     }
48640b57cec5SDimitry Andric 
4865fe6060f1SDimitry Andric   sortTopology:
48660b57cec5SDimitry Andric     // Allocate the gtid->affinity mask table.
4867bdd1243dSDimitry Andric     if (affinity.flags.dups) {
4868bdd1243dSDimitry Andric       affinity.num_masks = __kmp_avail_proc;
48690b57cec5SDimitry Andric     } else {
4870bdd1243dSDimitry Andric       affinity.num_masks = numUnique;
48710b57cec5SDimitry Andric     }
48720b57cec5SDimitry Andric 
48730b57cec5SDimitry Andric     if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
48740b57cec5SDimitry Andric         (__kmp_affinity_num_places > 0) &&
4875bdd1243dSDimitry Andric         ((unsigned)__kmp_affinity_num_places < affinity.num_masks) &&
4876bdd1243dSDimitry Andric         !is_hidden_helper_affinity) {
4877bdd1243dSDimitry Andric       affinity.num_masks = __kmp_affinity_num_places;
48780b57cec5SDimitry Andric     }
48790b57cec5SDimitry Andric 
4880bdd1243dSDimitry Andric     KMP_CPU_ALLOC_ARRAY(affinity.masks, affinity.num_masks);
48810b57cec5SDimitry Andric 
4882fe6060f1SDimitry Andric     // Sort the topology table according to the current setting of
4883bdd1243dSDimitry Andric     // affinity.compact, then fill out affinity.masks.
4884bdd1243dSDimitry Andric     __kmp_topology->sort_compact(affinity);
48850b57cec5SDimitry Andric     {
48860b57cec5SDimitry Andric       int i;
48870b57cec5SDimitry Andric       unsigned j;
4888fe6060f1SDimitry Andric       int num_hw_threads = __kmp_topology->get_num_hw_threads();
48895f757f3fSDimitry Andric       kmp_full_mask_modifier_t full_mask;
4890fe6060f1SDimitry Andric       for (i = 0, j = 0; i < num_hw_threads; i++) {
4891bdd1243dSDimitry Andric         if ((!affinity.flags.dups) && (!__kmp_topology->at(i).leader)) {
48920b57cec5SDimitry Andric           continue;
48930b57cec5SDimitry Andric         }
4894fe6060f1SDimitry Andric         int osId = __kmp_topology->at(i).os_id;
4895fe6060f1SDimitry Andric 
4896bdd1243dSDimitry Andric         kmp_affin_mask_t *src = KMP_CPU_INDEX(affinity.os_id_masks, osId);
4897bdd1243dSDimitry Andric         kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, j);
48980b57cec5SDimitry Andric         KMP_ASSERT(KMP_CPU_ISSET(osId, src));
48990b57cec5SDimitry Andric         KMP_CPU_COPY(dest, src);
49005f757f3fSDimitry Andric         full_mask.include(src);
4901bdd1243dSDimitry Andric         if (++j >= affinity.num_masks) {
49020b57cec5SDimitry Andric           break;
49030b57cec5SDimitry Andric         }
49040b57cec5SDimitry Andric       }
4905bdd1243dSDimitry Andric       KMP_DEBUG_ASSERT(j == affinity.num_masks);
49065f757f3fSDimitry Andric       // See if the places list further restricts or changes the full mask
49075f757f3fSDimitry Andric       if (full_mask.restrict_to_mask() && affinity.flags.verbose) {
49085f757f3fSDimitry Andric         __kmp_topology->print(env_var);
49095f757f3fSDimitry Andric       }
49100b57cec5SDimitry Andric     }
4911fe6060f1SDimitry Andric     // Sort the topology back using ids
4912fe6060f1SDimitry Andric     __kmp_topology->sort_ids();
49130b57cec5SDimitry Andric     break;
49140b57cec5SDimitry Andric 
49150b57cec5SDimitry Andric   default:
49160b57cec5SDimitry Andric     KMP_ASSERT2(0, "Unexpected affinity setting");
49170b57cec5SDimitry Andric   }
49185f757f3fSDimitry Andric   __kmp_aux_affinity_initialize_other_data(affinity);
4919bdd1243dSDimitry Andric   affinity.flags.initialized = TRUE;
49200b57cec5SDimitry Andric }
49210b57cec5SDimitry Andric 
__kmp_affinity_initialize(kmp_affinity_t & affinity)4922bdd1243dSDimitry Andric void __kmp_affinity_initialize(kmp_affinity_t &affinity) {
49235ffd83dbSDimitry Andric   // Much of the code above was written assuming that if a machine was not
4924bdd1243dSDimitry Andric   // affinity capable, then affinity type == affinity_none.
4925bdd1243dSDimitry Andric   // We now explicitly represent this as affinity type == affinity_disabled.
4926bdd1243dSDimitry Andric   // There are too many checks for affinity type == affinity_none in this code.
4927bdd1243dSDimitry Andric   // Instead of trying to change them all, check if
4928bdd1243dSDimitry Andric   // affinity type == affinity_disabled, and if so, slam it with affinity_none,
4929bdd1243dSDimitry Andric   // call the real initialization routine, then restore affinity type to
4930bdd1243dSDimitry Andric   // affinity_disabled.
4931bdd1243dSDimitry Andric   int disabled = (affinity.type == affinity_disabled);
4932bdd1243dSDimitry Andric   if (!KMP_AFFINITY_CAPABLE())
49330b57cec5SDimitry Andric     KMP_ASSERT(disabled);
4934bdd1243dSDimitry Andric   if (disabled)
4935bdd1243dSDimitry Andric     affinity.type = affinity_none;
4936bdd1243dSDimitry Andric   __kmp_aux_affinity_initialize(affinity);
4937bdd1243dSDimitry Andric   if (disabled)
4938bdd1243dSDimitry Andric     affinity.type = affinity_disabled;
49390b57cec5SDimitry Andric }
49400b57cec5SDimitry Andric 
__kmp_affinity_uninitialize(void)49410b57cec5SDimitry Andric void __kmp_affinity_uninitialize(void) {
4942bdd1243dSDimitry Andric   for (kmp_affinity_t *affinity : __kmp_affinities) {
4943bdd1243dSDimitry Andric     if (affinity->masks != NULL)
4944bdd1243dSDimitry Andric       KMP_CPU_FREE_ARRAY(affinity->masks, affinity->num_masks);
4945bdd1243dSDimitry Andric     if (affinity->os_id_masks != NULL)
4946bdd1243dSDimitry Andric       KMP_CPU_FREE_ARRAY(affinity->os_id_masks, affinity->num_os_id_masks);
4947bdd1243dSDimitry Andric     if (affinity->proclist != NULL)
4948bdd1243dSDimitry Andric       __kmp_free(affinity->proclist);
4949bdd1243dSDimitry Andric     if (affinity->ids != NULL)
4950bdd1243dSDimitry Andric       __kmp_free(affinity->ids);
4951bdd1243dSDimitry Andric     if (affinity->attrs != NULL)
4952bdd1243dSDimitry Andric       __kmp_free(affinity->attrs);
4953bdd1243dSDimitry Andric     *affinity = KMP_AFFINITY_INIT(affinity->env_var);
49540b57cec5SDimitry Andric   }
4955fcaf7f86SDimitry Andric   if (__kmp_affin_origMask != NULL) {
4956bdd1243dSDimitry Andric     if (KMP_AFFINITY_CAPABLE()) {
4957439352acSDimitry Andric #if KMP_OS_AIX
4958439352acSDimitry Andric       // Uninitialize by unbinding the thread.
4959439352acSDimitry Andric       bindprocessor(BINDTHREAD, thread_self(), PROCESSOR_CLASS_ANY);
4960439352acSDimitry Andric #else
4961bdd1243dSDimitry Andric       __kmp_set_system_affinity(__kmp_affin_origMask, FALSE);
4962439352acSDimitry Andric #endif
4963bdd1243dSDimitry Andric     }
4964fcaf7f86SDimitry Andric     KMP_CPU_FREE(__kmp_affin_origMask);
4965fcaf7f86SDimitry Andric     __kmp_affin_origMask = NULL;
4966fcaf7f86SDimitry Andric   }
49670b57cec5SDimitry Andric   __kmp_affinity_num_places = 0;
49680b57cec5SDimitry Andric   if (procarr != NULL) {
49690b57cec5SDimitry Andric     __kmp_free(procarr);
49700b57cec5SDimitry Andric     procarr = NULL;
49710b57cec5SDimitry Andric   }
4972bdd1243dSDimitry Andric   if (__kmp_osid_to_hwthread_map) {
4973bdd1243dSDimitry Andric     __kmp_free(__kmp_osid_to_hwthread_map);
4974bdd1243dSDimitry Andric     __kmp_osid_to_hwthread_map = NULL;
4975bdd1243dSDimitry Andric   }
49760b57cec5SDimitry Andric #if KMP_USE_HWLOC
49770b57cec5SDimitry Andric   if (__kmp_hwloc_topology != NULL) {
49780b57cec5SDimitry Andric     hwloc_topology_destroy(__kmp_hwloc_topology);
49790b57cec5SDimitry Andric     __kmp_hwloc_topology = NULL;
49800b57cec5SDimitry Andric   }
49810b57cec5SDimitry Andric #endif
4982fe6060f1SDimitry Andric   if (__kmp_hw_subset) {
4983fe6060f1SDimitry Andric     kmp_hw_subset_t::deallocate(__kmp_hw_subset);
4984fe6060f1SDimitry Andric     __kmp_hw_subset = nullptr;
4985fe6060f1SDimitry Andric   }
4986fe6060f1SDimitry Andric   if (__kmp_topology) {
4987fe6060f1SDimitry Andric     kmp_topology_t::deallocate(__kmp_topology);
4988fe6060f1SDimitry Andric     __kmp_topology = nullptr;
4989fe6060f1SDimitry Andric   }
49900b57cec5SDimitry Andric   KMPAffinity::destroy_api();
49910b57cec5SDimitry Andric }
49920b57cec5SDimitry Andric 
__kmp_select_mask_by_gtid(int gtid,const kmp_affinity_t * affinity,int * place,kmp_affin_mask_t ** mask)4993bdd1243dSDimitry Andric static void __kmp_select_mask_by_gtid(int gtid, const kmp_affinity_t *affinity,
4994bdd1243dSDimitry Andric                                       int *place, kmp_affin_mask_t **mask) {
4995bdd1243dSDimitry Andric   int mask_idx;
4996bdd1243dSDimitry Andric   bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid);
4997bdd1243dSDimitry Andric   if (is_hidden_helper)
4998bdd1243dSDimitry Andric     // The first gtid is the regular primary thread, the second gtid is the main
4999bdd1243dSDimitry Andric     // thread of hidden team which does not participate in task execution.
5000bdd1243dSDimitry Andric     mask_idx = gtid - 2;
5001bdd1243dSDimitry Andric   else
5002bdd1243dSDimitry Andric     mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid);
5003bdd1243dSDimitry Andric   KMP_DEBUG_ASSERT(affinity->num_masks > 0);
5004bdd1243dSDimitry Andric   *place = (mask_idx + affinity->offset) % affinity->num_masks;
5005bdd1243dSDimitry Andric   *mask = KMP_CPU_INDEX(affinity->masks, *place);
5006bdd1243dSDimitry Andric }
5007bdd1243dSDimitry Andric 
5008bdd1243dSDimitry Andric // This function initializes the per-thread data concerning affinity including
5009bdd1243dSDimitry Andric // the mask and topology information
__kmp_affinity_set_init_mask(int gtid,int isa_root)50100b57cec5SDimitry Andric void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
5011bdd1243dSDimitry Andric 
5012bdd1243dSDimitry Andric   kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
5013bdd1243dSDimitry Andric 
5014bdd1243dSDimitry Andric   // Set the thread topology information to default of unknown
5015bdd1243dSDimitry Andric   for (int id = 0; id < KMP_HW_LAST; ++id)
50165f757f3fSDimitry Andric     th->th.th_topology_ids.ids[id] = kmp_hw_thread_t::UNKNOWN_ID;
5017bdd1243dSDimitry Andric   th->th.th_topology_attrs = KMP_AFFINITY_ATTRS_UNKNOWN;
5018bdd1243dSDimitry Andric 
50190b57cec5SDimitry Andric   if (!KMP_AFFINITY_CAPABLE()) {
50200b57cec5SDimitry Andric     return;
50210b57cec5SDimitry Andric   }
50220b57cec5SDimitry Andric 
50230b57cec5SDimitry Andric   if (th->th.th_affin_mask == NULL) {
50240b57cec5SDimitry Andric     KMP_CPU_ALLOC(th->th.th_affin_mask);
50250b57cec5SDimitry Andric   } else {
50260b57cec5SDimitry Andric     KMP_CPU_ZERO(th->th.th_affin_mask);
50270b57cec5SDimitry Andric   }
50280b57cec5SDimitry Andric 
50295ffd83dbSDimitry Andric   // Copy the thread mask to the kmp_info_t structure. If
5030bdd1243dSDimitry Andric   // __kmp_affinity.type == affinity_none, copy the "full" mask, i.e.
5031bdd1243dSDimitry Andric   // one that has all of the OS proc ids set, or if
5032bdd1243dSDimitry Andric   // __kmp_affinity.flags.respect is set, then the full mask is the
5033bdd1243dSDimitry Andric   // same as the mask of the initialization thread.
50340b57cec5SDimitry Andric   kmp_affin_mask_t *mask;
50350b57cec5SDimitry Andric   int i;
5036bdd1243dSDimitry Andric   const kmp_affinity_t *affinity;
5037bdd1243dSDimitry Andric   bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid);
50380b57cec5SDimitry Andric 
5039bdd1243dSDimitry Andric   if (is_hidden_helper)
5040bdd1243dSDimitry Andric     affinity = &__kmp_hh_affinity;
5041bdd1243dSDimitry Andric   else
5042bdd1243dSDimitry Andric     affinity = &__kmp_affinity;
5043bdd1243dSDimitry Andric 
5044bdd1243dSDimitry Andric   if (KMP_AFFINITY_NON_PROC_BIND || is_hidden_helper) {
5045bdd1243dSDimitry Andric     if ((affinity->type == affinity_none) ||
5046bdd1243dSDimitry Andric         (affinity->type == affinity_balanced) ||
5047bdd1243dSDimitry Andric         KMP_HIDDEN_HELPER_MAIN_THREAD(gtid)) {
50480b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
50490b57cec5SDimitry Andric       if (__kmp_num_proc_groups > 1) {
50500b57cec5SDimitry Andric         return;
50510b57cec5SDimitry Andric       }
50520b57cec5SDimitry Andric #endif
50530b57cec5SDimitry Andric       KMP_ASSERT(__kmp_affin_fullMask != NULL);
50540b57cec5SDimitry Andric       i = 0;
50550b57cec5SDimitry Andric       mask = __kmp_affin_fullMask;
50560b57cec5SDimitry Andric     } else {
5057bdd1243dSDimitry Andric       __kmp_select_mask_by_gtid(gtid, affinity, &i, &mask);
50580b57cec5SDimitry Andric     }
50590b57cec5SDimitry Andric   } else {
5060bdd1243dSDimitry Andric     if (!isa_root || __kmp_nested_proc_bind.bind_types[0] == proc_bind_false) {
50610b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
50620b57cec5SDimitry Andric       if (__kmp_num_proc_groups > 1) {
50630b57cec5SDimitry Andric         return;
50640b57cec5SDimitry Andric       }
50650b57cec5SDimitry Andric #endif
50660b57cec5SDimitry Andric       KMP_ASSERT(__kmp_affin_fullMask != NULL);
50670b57cec5SDimitry Andric       i = KMP_PLACE_ALL;
50680b57cec5SDimitry Andric       mask = __kmp_affin_fullMask;
50690b57cec5SDimitry Andric     } else {
5070bdd1243dSDimitry Andric       __kmp_select_mask_by_gtid(gtid, affinity, &i, &mask);
50710b57cec5SDimitry Andric     }
50720b57cec5SDimitry Andric   }
50730b57cec5SDimitry Andric 
50740b57cec5SDimitry Andric   th->th.th_current_place = i;
5075bdd1243dSDimitry Andric   if (isa_root && !is_hidden_helper) {
50760b57cec5SDimitry Andric     th->th.th_new_place = i;
50770b57cec5SDimitry Andric     th->th.th_first_place = 0;
5078bdd1243dSDimitry Andric     th->th.th_last_place = affinity->num_masks - 1;
50790b57cec5SDimitry Andric   } else if (KMP_AFFINITY_NON_PROC_BIND) {
50800b57cec5SDimitry Andric     // When using a Non-OMP_PROC_BIND affinity method,
50810b57cec5SDimitry Andric     // set all threads' place-partition-var to the entire place list
50820b57cec5SDimitry Andric     th->th.th_first_place = 0;
5083bdd1243dSDimitry Andric     th->th.th_last_place = affinity->num_masks - 1;
5084bdd1243dSDimitry Andric   }
5085bdd1243dSDimitry Andric   // Copy topology information associated with the place
5086bdd1243dSDimitry Andric   if (i >= 0) {
5087bdd1243dSDimitry Andric     th->th.th_topology_ids = __kmp_affinity.ids[i];
5088bdd1243dSDimitry Andric     th->th.th_topology_attrs = __kmp_affinity.attrs[i];
50890b57cec5SDimitry Andric   }
50900b57cec5SDimitry Andric 
50910b57cec5SDimitry Andric   if (i == KMP_PLACE_ALL) {
50925f757f3fSDimitry Andric     KA_TRACE(100, ("__kmp_affinity_set_init_mask: setting T#%d to all places\n",
50930b57cec5SDimitry Andric                    gtid));
50940b57cec5SDimitry Andric   } else {
50955f757f3fSDimitry Andric     KA_TRACE(100, ("__kmp_affinity_set_init_mask: setting T#%d to place %d\n",
50960b57cec5SDimitry Andric                    gtid, i));
50970b57cec5SDimitry Andric   }
50980b57cec5SDimitry Andric 
50990b57cec5SDimitry Andric   KMP_CPU_COPY(th->th.th_affin_mask, mask);
51005f757f3fSDimitry Andric }
51010b57cec5SDimitry Andric 
__kmp_affinity_bind_init_mask(int gtid)51025f757f3fSDimitry Andric void __kmp_affinity_bind_init_mask(int gtid) {
51035f757f3fSDimitry Andric   if (!KMP_AFFINITY_CAPABLE()) {
51045f757f3fSDimitry Andric     return;
51055f757f3fSDimitry Andric   }
51065f757f3fSDimitry Andric   kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
51075f757f3fSDimitry Andric   const kmp_affinity_t *affinity;
51085f757f3fSDimitry Andric   const char *env_var;
51095f757f3fSDimitry Andric   bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid);
51105f757f3fSDimitry Andric 
51115f757f3fSDimitry Andric   if (is_hidden_helper)
51125f757f3fSDimitry Andric     affinity = &__kmp_hh_affinity;
51135f757f3fSDimitry Andric   else
51145f757f3fSDimitry Andric     affinity = &__kmp_affinity;
51155f757f3fSDimitry Andric   env_var = __kmp_get_affinity_env_var(*affinity, /*for_binding=*/true);
51160b57cec5SDimitry Andric   /* to avoid duplicate printing (will be correctly printed on barrier) */
51175f757f3fSDimitry Andric   if (affinity->flags.verbose && (affinity->type == affinity_none ||
51185f757f3fSDimitry Andric                                   (th->th.th_current_place != KMP_PLACE_ALL &&
51195f757f3fSDimitry Andric                                    affinity->type != affinity_balanced)) &&
5120bdd1243dSDimitry Andric       !KMP_HIDDEN_HELPER_MAIN_THREAD(gtid)) {
51210b57cec5SDimitry Andric     char buf[KMP_AFFIN_MASK_PRINT_LEN];
51220b57cec5SDimitry Andric     __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
51230b57cec5SDimitry Andric                               th->th.th_affin_mask);
5124bdd1243dSDimitry Andric     KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(),
5125bdd1243dSDimitry Andric                gtid, buf);
51260b57cec5SDimitry Andric   }
51270b57cec5SDimitry Andric 
51280b57cec5SDimitry Andric #if KMP_OS_WINDOWS
51290b57cec5SDimitry Andric   // On Windows* OS, the process affinity mask might have changed. If the user
51300b57cec5SDimitry Andric   // didn't request affinity and this call fails, just continue silently.
51310b57cec5SDimitry Andric   // See CQ171393.
5132bdd1243dSDimitry Andric   if (affinity->type == affinity_none) {
51330b57cec5SDimitry Andric     __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
51340b57cec5SDimitry Andric   } else
51350b57cec5SDimitry Andric #endif
5136439352acSDimitry Andric #ifndef KMP_OS_AIX
5137439352acSDimitry Andric     // Do not set the full mask as the init mask on AIX.
51380b57cec5SDimitry Andric     __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
5139439352acSDimitry Andric #endif
51400b57cec5SDimitry Andric }
51410b57cec5SDimitry Andric 
__kmp_affinity_bind_place(int gtid)51425f757f3fSDimitry Andric void __kmp_affinity_bind_place(int gtid) {
5143bdd1243dSDimitry Andric   // Hidden helper threads should not be affected by OMP_PLACES/OMP_PROC_BIND
5144bdd1243dSDimitry Andric   if (!KMP_AFFINITY_CAPABLE() || KMP_HIDDEN_HELPER_THREAD(gtid)) {
51450b57cec5SDimitry Andric     return;
51460b57cec5SDimitry Andric   }
51470b57cec5SDimitry Andric 
51480b57cec5SDimitry Andric   kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
51490b57cec5SDimitry Andric 
51505f757f3fSDimitry Andric   KA_TRACE(100, ("__kmp_affinity_bind_place: binding T#%d to place %d (current "
51510b57cec5SDimitry Andric                  "place = %d)\n",
51520b57cec5SDimitry Andric                  gtid, th->th.th_new_place, th->th.th_current_place));
51530b57cec5SDimitry Andric 
51540b57cec5SDimitry Andric   // Check that the new place is within this thread's partition.
51550b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
51560b57cec5SDimitry Andric   KMP_ASSERT(th->th.th_new_place >= 0);
5157bdd1243dSDimitry Andric   KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity.num_masks);
51580b57cec5SDimitry Andric   if (th->th.th_first_place <= th->th.th_last_place) {
51590b57cec5SDimitry Andric     KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
51600b57cec5SDimitry Andric                (th->th.th_new_place <= th->th.th_last_place));
51610b57cec5SDimitry Andric   } else {
51620b57cec5SDimitry Andric     KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) ||
51630b57cec5SDimitry Andric                (th->th.th_new_place >= th->th.th_last_place));
51640b57cec5SDimitry Andric   }
51650b57cec5SDimitry Andric 
51665ffd83dbSDimitry Andric   // Copy the thread mask to the kmp_info_t structure,
51670b57cec5SDimitry Andric   // and set this thread's affinity.
51680b57cec5SDimitry Andric   kmp_affin_mask_t *mask =
5169bdd1243dSDimitry Andric       KMP_CPU_INDEX(__kmp_affinity.masks, th->th.th_new_place);
51700b57cec5SDimitry Andric   KMP_CPU_COPY(th->th.th_affin_mask, mask);
51710b57cec5SDimitry Andric   th->th.th_current_place = th->th.th_new_place;
51720b57cec5SDimitry Andric 
5173bdd1243dSDimitry Andric   if (__kmp_affinity.flags.verbose) {
51740b57cec5SDimitry Andric     char buf[KMP_AFFIN_MASK_PRINT_LEN];
51750b57cec5SDimitry Andric     __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
51760b57cec5SDimitry Andric                               th->th.th_affin_mask);
51770b57cec5SDimitry Andric     KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
51780b57cec5SDimitry Andric                __kmp_gettid(), gtid, buf);
51790b57cec5SDimitry Andric   }
51800b57cec5SDimitry Andric   __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
51810b57cec5SDimitry Andric }
51820b57cec5SDimitry Andric 
__kmp_aux_set_affinity(void ** mask)51830b57cec5SDimitry Andric int __kmp_aux_set_affinity(void **mask) {
51840b57cec5SDimitry Andric   int gtid;
51850b57cec5SDimitry Andric   kmp_info_t *th;
51860b57cec5SDimitry Andric   int retval;
51870b57cec5SDimitry Andric 
51880b57cec5SDimitry Andric   if (!KMP_AFFINITY_CAPABLE()) {
51890b57cec5SDimitry Andric     return -1;
51900b57cec5SDimitry Andric   }
51910b57cec5SDimitry Andric 
51920b57cec5SDimitry Andric   gtid = __kmp_entry_gtid();
5193fe6060f1SDimitry Andric   KA_TRACE(
5194fe6060f1SDimitry Andric       1000, (""); {
51950b57cec5SDimitry Andric         char buf[KMP_AFFIN_MASK_PRINT_LEN];
51960b57cec5SDimitry Andric         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
51970b57cec5SDimitry Andric                                   (kmp_affin_mask_t *)(*mask));
51980b57cec5SDimitry Andric         __kmp_debug_printf(
5199fe6060f1SDimitry Andric             "kmp_set_affinity: setting affinity mask for thread %d = %s\n",
5200fe6060f1SDimitry Andric             gtid, buf);
52010b57cec5SDimitry Andric       });
52020b57cec5SDimitry Andric 
52030b57cec5SDimitry Andric   if (__kmp_env_consistency_check) {
52040b57cec5SDimitry Andric     if ((mask == NULL) || (*mask == NULL)) {
52050b57cec5SDimitry Andric       KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
52060b57cec5SDimitry Andric     } else {
52070b57cec5SDimitry Andric       unsigned proc;
52080b57cec5SDimitry Andric       int num_procs = 0;
52090b57cec5SDimitry Andric 
52100b57cec5SDimitry Andric       KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t *)(*mask))) {
52110b57cec5SDimitry Andric         if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
52120b57cec5SDimitry Andric           KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
52130b57cec5SDimitry Andric         }
52140b57cec5SDimitry Andric         if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
52150b57cec5SDimitry Andric           continue;
52160b57cec5SDimitry Andric         }
52170b57cec5SDimitry Andric         num_procs++;
52180b57cec5SDimitry Andric       }
52190b57cec5SDimitry Andric       if (num_procs == 0) {
52200b57cec5SDimitry Andric         KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
52210b57cec5SDimitry Andric       }
52220b57cec5SDimitry Andric 
52230b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
52240b57cec5SDimitry Andric       if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
52250b57cec5SDimitry Andric         KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
52260b57cec5SDimitry Andric       }
52270b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */
52280b57cec5SDimitry Andric     }
52290b57cec5SDimitry Andric   }
52300b57cec5SDimitry Andric 
52310b57cec5SDimitry Andric   th = __kmp_threads[gtid];
52320b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
52330b57cec5SDimitry Andric   retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
52340b57cec5SDimitry Andric   if (retval == 0) {
52350b57cec5SDimitry Andric     KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
52360b57cec5SDimitry Andric   }
52370b57cec5SDimitry Andric 
52380b57cec5SDimitry Andric   th->th.th_current_place = KMP_PLACE_UNDEFINED;
52390b57cec5SDimitry Andric   th->th.th_new_place = KMP_PLACE_UNDEFINED;
52400b57cec5SDimitry Andric   th->th.th_first_place = 0;
5241bdd1243dSDimitry Andric   th->th.th_last_place = __kmp_affinity.num_masks - 1;
52420b57cec5SDimitry Andric 
52430b57cec5SDimitry Andric   // Turn off 4.0 affinity for the current tread at this parallel level.
52440b57cec5SDimitry Andric   th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
52450b57cec5SDimitry Andric 
52460b57cec5SDimitry Andric   return retval;
52470b57cec5SDimitry Andric }
52480b57cec5SDimitry Andric 
__kmp_aux_get_affinity(void ** mask)52490b57cec5SDimitry Andric int __kmp_aux_get_affinity(void **mask) {
52500b57cec5SDimitry Andric   int gtid;
52510b57cec5SDimitry Andric   int retval;
5252439352acSDimitry Andric #if KMP_OS_WINDOWS || KMP_OS_AIX || KMP_DEBUG
52530b57cec5SDimitry Andric   kmp_info_t *th;
5254349cc55cSDimitry Andric #endif
52550b57cec5SDimitry Andric   if (!KMP_AFFINITY_CAPABLE()) {
52560b57cec5SDimitry Andric     return -1;
52570b57cec5SDimitry Andric   }
52580b57cec5SDimitry Andric 
52590b57cec5SDimitry Andric   gtid = __kmp_entry_gtid();
5260439352acSDimitry Andric #if KMP_OS_WINDOWS || KMP_OS_AIX || KMP_DEBUG
52610b57cec5SDimitry Andric   th = __kmp_threads[gtid];
5262349cc55cSDimitry Andric #else
5263349cc55cSDimitry Andric   (void)gtid; // unused variable
5264349cc55cSDimitry Andric #endif
52650b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
52660b57cec5SDimitry Andric 
5267fe6060f1SDimitry Andric   KA_TRACE(
5268fe6060f1SDimitry Andric       1000, (""); {
52690b57cec5SDimitry Andric         char buf[KMP_AFFIN_MASK_PRINT_LEN];
52700b57cec5SDimitry Andric         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
52710b57cec5SDimitry Andric                                   th->th.th_affin_mask);
5272fe6060f1SDimitry Andric         __kmp_printf(
5273fe6060f1SDimitry Andric             "kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid,
5274fe6060f1SDimitry Andric             buf);
52750b57cec5SDimitry Andric       });
52760b57cec5SDimitry Andric 
52770b57cec5SDimitry Andric   if (__kmp_env_consistency_check) {
52780b57cec5SDimitry Andric     if ((mask == NULL) || (*mask == NULL)) {
52790b57cec5SDimitry Andric       KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
52800b57cec5SDimitry Andric     }
52810b57cec5SDimitry Andric   }
52820b57cec5SDimitry Andric 
5283439352acSDimitry Andric #if !KMP_OS_WINDOWS && !KMP_OS_AIX
52840b57cec5SDimitry Andric 
52850b57cec5SDimitry Andric   retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
5286fe6060f1SDimitry Andric   KA_TRACE(
5287fe6060f1SDimitry Andric       1000, (""); {
52880b57cec5SDimitry Andric         char buf[KMP_AFFIN_MASK_PRINT_LEN];
52890b57cec5SDimitry Andric         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
52900b57cec5SDimitry Andric                                   (kmp_affin_mask_t *)(*mask));
5291fe6060f1SDimitry Andric         __kmp_printf(
5292fe6060f1SDimitry Andric             "kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid,
5293fe6060f1SDimitry Andric             buf);
52940b57cec5SDimitry Andric       });
52950b57cec5SDimitry Andric   return retval;
52960b57cec5SDimitry Andric 
52970b57cec5SDimitry Andric #else
5298fe6060f1SDimitry Andric   (void)retval;
52990b57cec5SDimitry Andric 
53000b57cec5SDimitry Andric   KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
53010b57cec5SDimitry Andric   return 0;
53020b57cec5SDimitry Andric 
5303439352acSDimitry Andric #endif /* !KMP_OS_WINDOWS && !KMP_OS_AIX */
53040b57cec5SDimitry Andric }
53050b57cec5SDimitry Andric 
__kmp_aux_get_affinity_max_proc()53060b57cec5SDimitry Andric int __kmp_aux_get_affinity_max_proc() {
53070b57cec5SDimitry Andric   if (!KMP_AFFINITY_CAPABLE()) {
53080b57cec5SDimitry Andric     return 0;
53090b57cec5SDimitry Andric   }
53100b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
53110b57cec5SDimitry Andric   if (__kmp_num_proc_groups > 1) {
53120b57cec5SDimitry Andric     return (int)(__kmp_num_proc_groups * sizeof(DWORD_PTR) * CHAR_BIT);
53130b57cec5SDimitry Andric   }
53140b57cec5SDimitry Andric #endif
53150b57cec5SDimitry Andric   return __kmp_xproc;
53160b57cec5SDimitry Andric }
53170b57cec5SDimitry Andric 
__kmp_aux_set_affinity_mask_proc(int proc,void ** mask)53180b57cec5SDimitry Andric int __kmp_aux_set_affinity_mask_proc(int proc, void **mask) {
53190b57cec5SDimitry Andric   if (!KMP_AFFINITY_CAPABLE()) {
53200b57cec5SDimitry Andric     return -1;
53210b57cec5SDimitry Andric   }
53220b57cec5SDimitry Andric 
5323fe6060f1SDimitry Andric   KA_TRACE(
5324fe6060f1SDimitry Andric       1000, (""); {
53250b57cec5SDimitry Andric         int gtid = __kmp_entry_gtid();
53260b57cec5SDimitry Andric         char buf[KMP_AFFIN_MASK_PRINT_LEN];
53270b57cec5SDimitry Andric         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
53280b57cec5SDimitry Andric                                   (kmp_affin_mask_t *)(*mask));
53290b57cec5SDimitry Andric         __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in "
53300b57cec5SDimitry Andric                            "affinity mask for thread %d = %s\n",
53310b57cec5SDimitry Andric                            proc, gtid, buf);
53320b57cec5SDimitry Andric       });
53330b57cec5SDimitry Andric 
53340b57cec5SDimitry Andric   if (__kmp_env_consistency_check) {
53350b57cec5SDimitry Andric     if ((mask == NULL) || (*mask == NULL)) {
53360b57cec5SDimitry Andric       KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
53370b57cec5SDimitry Andric     }
53380b57cec5SDimitry Andric   }
53390b57cec5SDimitry Andric 
53400b57cec5SDimitry Andric   if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
53410b57cec5SDimitry Andric     return -1;
53420b57cec5SDimitry Andric   }
53430b57cec5SDimitry Andric   if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
53440b57cec5SDimitry Andric     return -2;
53450b57cec5SDimitry Andric   }
53460b57cec5SDimitry Andric 
53470b57cec5SDimitry Andric   KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
53480b57cec5SDimitry Andric   return 0;
53490b57cec5SDimitry Andric }
53500b57cec5SDimitry Andric 
__kmp_aux_unset_affinity_mask_proc(int proc,void ** mask)53510b57cec5SDimitry Andric int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask) {
53520b57cec5SDimitry Andric   if (!KMP_AFFINITY_CAPABLE()) {
53530b57cec5SDimitry Andric     return -1;
53540b57cec5SDimitry Andric   }
53550b57cec5SDimitry Andric 
5356fe6060f1SDimitry Andric   KA_TRACE(
5357fe6060f1SDimitry Andric       1000, (""); {
53580b57cec5SDimitry Andric         int gtid = __kmp_entry_gtid();
53590b57cec5SDimitry Andric         char buf[KMP_AFFIN_MASK_PRINT_LEN];
53600b57cec5SDimitry Andric         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
53610b57cec5SDimitry Andric                                   (kmp_affin_mask_t *)(*mask));
53620b57cec5SDimitry Andric         __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in "
53630b57cec5SDimitry Andric                            "affinity mask for thread %d = %s\n",
53640b57cec5SDimitry Andric                            proc, gtid, buf);
53650b57cec5SDimitry Andric       });
53660b57cec5SDimitry Andric 
53670b57cec5SDimitry Andric   if (__kmp_env_consistency_check) {
53680b57cec5SDimitry Andric     if ((mask == NULL) || (*mask == NULL)) {
53690b57cec5SDimitry Andric       KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
53700b57cec5SDimitry Andric     }
53710b57cec5SDimitry Andric   }
53720b57cec5SDimitry Andric 
53730b57cec5SDimitry Andric   if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
53740b57cec5SDimitry Andric     return -1;
53750b57cec5SDimitry Andric   }
53760b57cec5SDimitry Andric   if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
53770b57cec5SDimitry Andric     return -2;
53780b57cec5SDimitry Andric   }
53790b57cec5SDimitry Andric 
53800b57cec5SDimitry Andric   KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
53810b57cec5SDimitry Andric   return 0;
53820b57cec5SDimitry Andric }
53830b57cec5SDimitry Andric 
__kmp_aux_get_affinity_mask_proc(int proc,void ** mask)53840b57cec5SDimitry Andric int __kmp_aux_get_affinity_mask_proc(int proc, void **mask) {
53850b57cec5SDimitry Andric   if (!KMP_AFFINITY_CAPABLE()) {
53860b57cec5SDimitry Andric     return -1;
53870b57cec5SDimitry Andric   }
53880b57cec5SDimitry Andric 
5389fe6060f1SDimitry Andric   KA_TRACE(
5390fe6060f1SDimitry Andric       1000, (""); {
53910b57cec5SDimitry Andric         int gtid = __kmp_entry_gtid();
53920b57cec5SDimitry Andric         char buf[KMP_AFFIN_MASK_PRINT_LEN];
53930b57cec5SDimitry Andric         __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
53940b57cec5SDimitry Andric                                   (kmp_affin_mask_t *)(*mask));
53950b57cec5SDimitry Andric         __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in "
53960b57cec5SDimitry Andric                            "affinity mask for thread %d = %s\n",
53970b57cec5SDimitry Andric                            proc, gtid, buf);
53980b57cec5SDimitry Andric       });
53990b57cec5SDimitry Andric 
54000b57cec5SDimitry Andric   if (__kmp_env_consistency_check) {
54010b57cec5SDimitry Andric     if ((mask == NULL) || (*mask == NULL)) {
54020b57cec5SDimitry Andric       KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
54030b57cec5SDimitry Andric     }
54040b57cec5SDimitry Andric   }
54050b57cec5SDimitry Andric 
54060b57cec5SDimitry Andric   if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
54070b57cec5SDimitry Andric     return -1;
54080b57cec5SDimitry Andric   }
54090b57cec5SDimitry Andric   if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
54100b57cec5SDimitry Andric     return 0;
54110b57cec5SDimitry Andric   }
54120b57cec5SDimitry Andric 
54130b57cec5SDimitry Andric   return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
54140b57cec5SDimitry Andric }
54150b57cec5SDimitry Andric 
54165f757f3fSDimitry Andric #if KMP_WEIGHTED_ITERATIONS_SUPPORTED
54175f757f3fSDimitry Andric // Returns first os proc id with ATOM core
__kmp_get_first_osid_with_ecore(void)54185f757f3fSDimitry Andric int __kmp_get_first_osid_with_ecore(void) {
54195f757f3fSDimitry Andric   int low = 0;
54205f757f3fSDimitry Andric   int high = __kmp_topology->get_num_hw_threads() - 1;
54215f757f3fSDimitry Andric   int mid = 0;
54225f757f3fSDimitry Andric   while (high - low > 1) {
54235f757f3fSDimitry Andric     mid = (high + low) / 2;
54245f757f3fSDimitry Andric     if (__kmp_topology->at(mid).attrs.get_core_type() ==
54255f757f3fSDimitry Andric         KMP_HW_CORE_TYPE_CORE) {
54265f757f3fSDimitry Andric       low = mid + 1;
54275f757f3fSDimitry Andric     } else {
54285f757f3fSDimitry Andric       high = mid;
54295f757f3fSDimitry Andric     }
54305f757f3fSDimitry Andric   }
54315f757f3fSDimitry Andric   if (__kmp_topology->at(mid).attrs.get_core_type() == KMP_HW_CORE_TYPE_ATOM) {
54325f757f3fSDimitry Andric     return mid;
54335f757f3fSDimitry Andric   }
54345f757f3fSDimitry Andric   return -1;
54355f757f3fSDimitry Andric }
54365f757f3fSDimitry Andric #endif
54375f757f3fSDimitry Andric 
54380b57cec5SDimitry Andric // Dynamic affinity settings - Affinity balanced
__kmp_balanced_affinity(kmp_info_t * th,int nthreads)54390b57cec5SDimitry Andric void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) {
54400b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(th);
54410b57cec5SDimitry Andric   bool fine_gran = true;
54420b57cec5SDimitry Andric   int tid = th->th.th_info.ds.ds_tid;
5443bdd1243dSDimitry Andric   const char *env_var = "KMP_AFFINITY";
54440b57cec5SDimitry Andric 
5445fe6060f1SDimitry Andric   // Do not perform balanced affinity for the hidden helper threads
5446fe6060f1SDimitry Andric   if (KMP_HIDDEN_HELPER_THREAD(__kmp_gtid_from_thread(th)))
5447fe6060f1SDimitry Andric     return;
5448fe6060f1SDimitry Andric 
5449bdd1243dSDimitry Andric   switch (__kmp_affinity.gran) {
5450fe6060f1SDimitry Andric   case KMP_HW_THREAD:
54510b57cec5SDimitry Andric     break;
5452fe6060f1SDimitry Andric   case KMP_HW_CORE:
54530b57cec5SDimitry Andric     if (__kmp_nThreadsPerCore > 1) {
54540b57cec5SDimitry Andric       fine_gran = false;
54550b57cec5SDimitry Andric     }
54560b57cec5SDimitry Andric     break;
5457fe6060f1SDimitry Andric   case KMP_HW_SOCKET:
54580b57cec5SDimitry Andric     if (nCoresPerPkg > 1) {
54590b57cec5SDimitry Andric       fine_gran = false;
54600b57cec5SDimitry Andric     }
54610b57cec5SDimitry Andric     break;
54620b57cec5SDimitry Andric   default:
54630b57cec5SDimitry Andric     fine_gran = false;
54640b57cec5SDimitry Andric   }
54650b57cec5SDimitry Andric 
5466fe6060f1SDimitry Andric   if (__kmp_topology->is_uniform()) {
54670b57cec5SDimitry Andric     int coreID;
54680b57cec5SDimitry Andric     int threadID;
54690b57cec5SDimitry Andric     // Number of hyper threads per core in HT machine
54700b57cec5SDimitry Andric     int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
54710b57cec5SDimitry Andric     // Number of cores
54720b57cec5SDimitry Andric     int ncores = __kmp_ncores;
54730b57cec5SDimitry Andric     if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
54740b57cec5SDimitry Andric       __kmp_nth_per_core = __kmp_avail_proc / nPackages;
54750b57cec5SDimitry Andric       ncores = nPackages;
54760b57cec5SDimitry Andric     }
54770b57cec5SDimitry Andric     // How many threads will be bound to each core
54780b57cec5SDimitry Andric     int chunk = nthreads / ncores;
54790b57cec5SDimitry Andric     // How many cores will have an additional thread bound to it - "big cores"
54800b57cec5SDimitry Andric     int big_cores = nthreads % ncores;
54810b57cec5SDimitry Andric     // Number of threads on the big cores
54820b57cec5SDimitry Andric     int big_nth = (chunk + 1) * big_cores;
54830b57cec5SDimitry Andric     if (tid < big_nth) {
54840b57cec5SDimitry Andric       coreID = tid / (chunk + 1);
54850b57cec5SDimitry Andric       threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
54860b57cec5SDimitry Andric     } else { // tid >= big_nth
54870b57cec5SDimitry Andric       coreID = (tid - big_cores) / chunk;
54880b57cec5SDimitry Andric       threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
54890b57cec5SDimitry Andric     }
54900b57cec5SDimitry Andric     KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
54910b57cec5SDimitry Andric                       "Illegal set affinity operation when not capable");
54920b57cec5SDimitry Andric 
54930b57cec5SDimitry Andric     kmp_affin_mask_t *mask = th->th.th_affin_mask;
54940b57cec5SDimitry Andric     KMP_CPU_ZERO(mask);
54950b57cec5SDimitry Andric 
54960b57cec5SDimitry Andric     if (fine_gran) {
5497fe6060f1SDimitry Andric       int osID =
5498fe6060f1SDimitry Andric           __kmp_topology->at(coreID * __kmp_nth_per_core + threadID).os_id;
54990b57cec5SDimitry Andric       KMP_CPU_SET(osID, mask);
55000b57cec5SDimitry Andric     } else {
55010b57cec5SDimitry Andric       for (int i = 0; i < __kmp_nth_per_core; i++) {
55020b57cec5SDimitry Andric         int osID;
5503fe6060f1SDimitry Andric         osID = __kmp_topology->at(coreID * __kmp_nth_per_core + i).os_id;
55040b57cec5SDimitry Andric         KMP_CPU_SET(osID, mask);
55050b57cec5SDimitry Andric       }
55060b57cec5SDimitry Andric     }
5507bdd1243dSDimitry Andric     if (__kmp_affinity.flags.verbose) {
55080b57cec5SDimitry Andric       char buf[KMP_AFFIN_MASK_PRINT_LEN];
55090b57cec5SDimitry Andric       __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5510bdd1243dSDimitry Andric       KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(),
5511bdd1243dSDimitry Andric                  tid, buf);
55120b57cec5SDimitry Andric     }
5513bdd1243dSDimitry Andric     __kmp_affinity_get_thread_topology_info(th);
55140b57cec5SDimitry Andric     __kmp_set_system_affinity(mask, TRUE);
55150b57cec5SDimitry Andric   } else { // Non-uniform topology
55160b57cec5SDimitry Andric 
55170b57cec5SDimitry Andric     kmp_affin_mask_t *mask = th->th.th_affin_mask;
55180b57cec5SDimitry Andric     KMP_CPU_ZERO(mask);
55190b57cec5SDimitry Andric 
5520fe6060f1SDimitry Andric     int core_level =
5521fe6060f1SDimitry Andric         __kmp_affinity_find_core_level(__kmp_avail_proc, __kmp_aff_depth - 1);
5522fe6060f1SDimitry Andric     int ncores = __kmp_affinity_compute_ncores(__kmp_avail_proc,
55230b57cec5SDimitry Andric                                                __kmp_aff_depth - 1, core_level);
55240b57cec5SDimitry Andric     int nth_per_core = __kmp_affinity_max_proc_per_core(
5525fe6060f1SDimitry Andric         __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
55260b57cec5SDimitry Andric 
55270b57cec5SDimitry Andric     // For performance gain consider the special case nthreads ==
55280b57cec5SDimitry Andric     // __kmp_avail_proc
55290b57cec5SDimitry Andric     if (nthreads == __kmp_avail_proc) {
55300b57cec5SDimitry Andric       if (fine_gran) {
5531fe6060f1SDimitry Andric         int osID = __kmp_topology->at(tid).os_id;
55320b57cec5SDimitry Andric         KMP_CPU_SET(osID, mask);
55330b57cec5SDimitry Andric       } else {
5534fe6060f1SDimitry Andric         int core =
5535fe6060f1SDimitry Andric             __kmp_affinity_find_core(tid, __kmp_aff_depth - 1, core_level);
55360b57cec5SDimitry Andric         for (int i = 0; i < __kmp_avail_proc; i++) {
5537fe6060f1SDimitry Andric           int osID = __kmp_topology->at(i).os_id;
5538fe6060f1SDimitry Andric           if (__kmp_affinity_find_core(i, __kmp_aff_depth - 1, core_level) ==
5539fe6060f1SDimitry Andric               core) {
55400b57cec5SDimitry Andric             KMP_CPU_SET(osID, mask);
55410b57cec5SDimitry Andric           }
55420b57cec5SDimitry Andric         }
55430b57cec5SDimitry Andric       }
55440b57cec5SDimitry Andric     } else if (nthreads <= ncores) {
55450b57cec5SDimitry Andric 
55460b57cec5SDimitry Andric       int core = 0;
55470b57cec5SDimitry Andric       for (int i = 0; i < ncores; i++) {
55480b57cec5SDimitry Andric         // Check if this core from procarr[] is in the mask
55490b57cec5SDimitry Andric         int in_mask = 0;
55500b57cec5SDimitry Andric         for (int j = 0; j < nth_per_core; j++) {
55510b57cec5SDimitry Andric           if (procarr[i * nth_per_core + j] != -1) {
55520b57cec5SDimitry Andric             in_mask = 1;
55530b57cec5SDimitry Andric             break;
55540b57cec5SDimitry Andric           }
55550b57cec5SDimitry Andric         }
55560b57cec5SDimitry Andric         if (in_mask) {
55570b57cec5SDimitry Andric           if (tid == core) {
55580b57cec5SDimitry Andric             for (int j = 0; j < nth_per_core; j++) {
55590b57cec5SDimitry Andric               int osID = procarr[i * nth_per_core + j];
55600b57cec5SDimitry Andric               if (osID != -1) {
55610b57cec5SDimitry Andric                 KMP_CPU_SET(osID, mask);
55620b57cec5SDimitry Andric                 // For fine granularity it is enough to set the first available
55630b57cec5SDimitry Andric                 // osID for this core
55640b57cec5SDimitry Andric                 if (fine_gran) {
55650b57cec5SDimitry Andric                   break;
55660b57cec5SDimitry Andric                 }
55670b57cec5SDimitry Andric               }
55680b57cec5SDimitry Andric             }
55690b57cec5SDimitry Andric             break;
55700b57cec5SDimitry Andric           } else {
55710b57cec5SDimitry Andric             core++;
55720b57cec5SDimitry Andric           }
55730b57cec5SDimitry Andric         }
55740b57cec5SDimitry Andric       }
55750b57cec5SDimitry Andric     } else { // nthreads > ncores
55760b57cec5SDimitry Andric       // Array to save the number of processors at each core
55770b57cec5SDimitry Andric       int *nproc_at_core = (int *)KMP_ALLOCA(sizeof(int) * ncores);
55780b57cec5SDimitry Andric       // Array to save the number of cores with "x" available processors;
55790b57cec5SDimitry Andric       int *ncores_with_x_procs =
55800b57cec5SDimitry Andric           (int *)KMP_ALLOCA(sizeof(int) * (nth_per_core + 1));
55810b57cec5SDimitry Andric       // Array to save the number of cores with # procs from x to nth_per_core
55820b57cec5SDimitry Andric       int *ncores_with_x_to_max_procs =
55830b57cec5SDimitry Andric           (int *)KMP_ALLOCA(sizeof(int) * (nth_per_core + 1));
55840b57cec5SDimitry Andric 
55850b57cec5SDimitry Andric       for (int i = 0; i <= nth_per_core; i++) {
55860b57cec5SDimitry Andric         ncores_with_x_procs[i] = 0;
55870b57cec5SDimitry Andric         ncores_with_x_to_max_procs[i] = 0;
55880b57cec5SDimitry Andric       }
55890b57cec5SDimitry Andric 
55900b57cec5SDimitry Andric       for (int i = 0; i < ncores; i++) {
55910b57cec5SDimitry Andric         int cnt = 0;
55920b57cec5SDimitry Andric         for (int j = 0; j < nth_per_core; j++) {
55930b57cec5SDimitry Andric           if (procarr[i * nth_per_core + j] != -1) {
55940b57cec5SDimitry Andric             cnt++;
55950b57cec5SDimitry Andric           }
55960b57cec5SDimitry Andric         }
55970b57cec5SDimitry Andric         nproc_at_core[i] = cnt;
55980b57cec5SDimitry Andric         ncores_with_x_procs[cnt]++;
55990b57cec5SDimitry Andric       }
56000b57cec5SDimitry Andric 
56010b57cec5SDimitry Andric       for (int i = 0; i <= nth_per_core; i++) {
56020b57cec5SDimitry Andric         for (int j = i; j <= nth_per_core; j++) {
56030b57cec5SDimitry Andric           ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
56040b57cec5SDimitry Andric         }
56050b57cec5SDimitry Andric       }
56060b57cec5SDimitry Andric 
56070b57cec5SDimitry Andric       // Max number of processors
56080b57cec5SDimitry Andric       int nproc = nth_per_core * ncores;
56090b57cec5SDimitry Andric       // An array to keep number of threads per each context
56100b57cec5SDimitry Andric       int *newarr = (int *)__kmp_allocate(sizeof(int) * nproc);
56110b57cec5SDimitry Andric       for (int i = 0; i < nproc; i++) {
56120b57cec5SDimitry Andric         newarr[i] = 0;
56130b57cec5SDimitry Andric       }
56140b57cec5SDimitry Andric 
56150b57cec5SDimitry Andric       int nth = nthreads;
56160b57cec5SDimitry Andric       int flag = 0;
56170b57cec5SDimitry Andric       while (nth > 0) {
56180b57cec5SDimitry Andric         for (int j = 1; j <= nth_per_core; j++) {
56190b57cec5SDimitry Andric           int cnt = ncores_with_x_to_max_procs[j];
56200b57cec5SDimitry Andric           for (int i = 0; i < ncores; i++) {
56210b57cec5SDimitry Andric             // Skip the core with 0 processors
56220b57cec5SDimitry Andric             if (nproc_at_core[i] == 0) {
56230b57cec5SDimitry Andric               continue;
56240b57cec5SDimitry Andric             }
56250b57cec5SDimitry Andric             for (int k = 0; k < nth_per_core; k++) {
56260b57cec5SDimitry Andric               if (procarr[i * nth_per_core + k] != -1) {
56270b57cec5SDimitry Andric                 if (newarr[i * nth_per_core + k] == 0) {
56280b57cec5SDimitry Andric                   newarr[i * nth_per_core + k] = 1;
56290b57cec5SDimitry Andric                   cnt--;
56300b57cec5SDimitry Andric                   nth--;
56310b57cec5SDimitry Andric                   break;
56320b57cec5SDimitry Andric                 } else {
56330b57cec5SDimitry Andric                   if (flag != 0) {
56340b57cec5SDimitry Andric                     newarr[i * nth_per_core + k]++;
56350b57cec5SDimitry Andric                     cnt--;
56360b57cec5SDimitry Andric                     nth--;
56370b57cec5SDimitry Andric                     break;
56380b57cec5SDimitry Andric                   }
56390b57cec5SDimitry Andric                 }
56400b57cec5SDimitry Andric               }
56410b57cec5SDimitry Andric             }
56420b57cec5SDimitry Andric             if (cnt == 0 || nth == 0) {
56430b57cec5SDimitry Andric               break;
56440b57cec5SDimitry Andric             }
56450b57cec5SDimitry Andric           }
56460b57cec5SDimitry Andric           if (nth == 0) {
56470b57cec5SDimitry Andric             break;
56480b57cec5SDimitry Andric           }
56490b57cec5SDimitry Andric         }
56500b57cec5SDimitry Andric         flag = 1;
56510b57cec5SDimitry Andric       }
56520b57cec5SDimitry Andric       int sum = 0;
56530b57cec5SDimitry Andric       for (int i = 0; i < nproc; i++) {
56540b57cec5SDimitry Andric         sum += newarr[i];
56550b57cec5SDimitry Andric         if (sum > tid) {
56560b57cec5SDimitry Andric           if (fine_gran) {
56570b57cec5SDimitry Andric             int osID = procarr[i];
56580b57cec5SDimitry Andric             KMP_CPU_SET(osID, mask);
56590b57cec5SDimitry Andric           } else {
56600b57cec5SDimitry Andric             int coreID = i / nth_per_core;
56610b57cec5SDimitry Andric             for (int ii = 0; ii < nth_per_core; ii++) {
56620b57cec5SDimitry Andric               int osID = procarr[coreID * nth_per_core + ii];
56630b57cec5SDimitry Andric               if (osID != -1) {
56640b57cec5SDimitry Andric                 KMP_CPU_SET(osID, mask);
56650b57cec5SDimitry Andric               }
56660b57cec5SDimitry Andric             }
56670b57cec5SDimitry Andric           }
56680b57cec5SDimitry Andric           break;
56690b57cec5SDimitry Andric         }
56700b57cec5SDimitry Andric       }
56710b57cec5SDimitry Andric       __kmp_free(newarr);
56720b57cec5SDimitry Andric     }
56730b57cec5SDimitry Andric 
5674bdd1243dSDimitry Andric     if (__kmp_affinity.flags.verbose) {
56750b57cec5SDimitry Andric       char buf[KMP_AFFIN_MASK_PRINT_LEN];
56760b57cec5SDimitry Andric       __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5677bdd1243dSDimitry Andric       KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(),
5678bdd1243dSDimitry Andric                  tid, buf);
56790b57cec5SDimitry Andric     }
5680bdd1243dSDimitry Andric     __kmp_affinity_get_thread_topology_info(th);
56810b57cec5SDimitry Andric     __kmp_set_system_affinity(mask, TRUE);
56820b57cec5SDimitry Andric   }
56830b57cec5SDimitry Andric }
56840b57cec5SDimitry Andric 
5685*0fca6ea1SDimitry Andric #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY ||     \
5686*0fca6ea1SDimitry Andric     KMP_OS_AIX
56870b57cec5SDimitry Andric // We don't need this entry for Windows because
56880b57cec5SDimitry Andric // there is GetProcessAffinityMask() api
56890b57cec5SDimitry Andric //
56900b57cec5SDimitry Andric // The intended usage is indicated by these steps:
56910b57cec5SDimitry Andric // 1) The user gets the current affinity mask
56920b57cec5SDimitry Andric // 2) Then sets the affinity by calling this function
56930b57cec5SDimitry Andric // 3) Error check the return value
56940b57cec5SDimitry Andric // 4) Use non-OpenMP parallelization
56950b57cec5SDimitry Andric // 5) Reset the affinity to what was stored in step 1)
56960b57cec5SDimitry Andric #ifdef __cplusplus
56970b57cec5SDimitry Andric extern "C"
56980b57cec5SDimitry Andric #endif
56990b57cec5SDimitry Andric     int
kmp_set_thread_affinity_mask_initial()57000b57cec5SDimitry Andric     kmp_set_thread_affinity_mask_initial()
57010b57cec5SDimitry Andric // the function returns 0 on success,
57020b57cec5SDimitry Andric //   -1 if we cannot bind thread
57030b57cec5SDimitry Andric //   >0 (errno) if an error happened during binding
57040b57cec5SDimitry Andric {
57050b57cec5SDimitry Andric   int gtid = __kmp_get_gtid();
57060b57cec5SDimitry Andric   if (gtid < 0) {
57070b57cec5SDimitry Andric     // Do not touch non-omp threads
57080b57cec5SDimitry Andric     KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
57090b57cec5SDimitry Andric                   "non-omp thread, returning\n"));
57100b57cec5SDimitry Andric     return -1;
57110b57cec5SDimitry Andric   }
57120b57cec5SDimitry Andric   if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
57130b57cec5SDimitry Andric     KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
57140b57cec5SDimitry Andric                   "affinity not initialized, returning\n"));
57150b57cec5SDimitry Andric     return -1;
57160b57cec5SDimitry Andric   }
57170b57cec5SDimitry Andric   KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
57180b57cec5SDimitry Andric                 "set full mask for thread %d\n",
57190b57cec5SDimitry Andric                 gtid));
57200b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
5721439352acSDimitry Andric #if KMP_OS_AIX
5722439352acSDimitry Andric   return bindprocessor(BINDTHREAD, thread_self(), PROCESSOR_CLASS_ANY);
5723439352acSDimitry Andric #else
57240b57cec5SDimitry Andric   return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
5725439352acSDimitry Andric #endif
57260b57cec5SDimitry Andric }
57270b57cec5SDimitry Andric #endif
57280b57cec5SDimitry Andric 
57290b57cec5SDimitry Andric #endif // KMP_AFFINITY_SUPPORTED
5730