10b57cec5SDimitry Andric /*
20b57cec5SDimitry Andric * kmp_affinity.cpp -- affinity management
30b57cec5SDimitry Andric */
40b57cec5SDimitry Andric
50b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric
130b57cec5SDimitry Andric #include "kmp.h"
140b57cec5SDimitry Andric #include "kmp_affinity.h"
150b57cec5SDimitry Andric #include "kmp_i18n.h"
160b57cec5SDimitry Andric #include "kmp_io.h"
170b57cec5SDimitry Andric #include "kmp_str.h"
180b57cec5SDimitry Andric #include "kmp_wrapper_getpid.h"
190b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
200b57cec5SDimitry Andric #include "kmp_dispatch_hier.h"
210b57cec5SDimitry Andric #endif
22fe6060f1SDimitry Andric #if KMP_USE_HWLOC
23fe6060f1SDimitry Andric // Copied from hwloc
24fe6060f1SDimitry Andric #define HWLOC_GROUP_KIND_INTEL_MODULE 102
25fe6060f1SDimitry Andric #define HWLOC_GROUP_KIND_INTEL_TILE 103
26fe6060f1SDimitry Andric #define HWLOC_GROUP_KIND_INTEL_DIE 104
27fe6060f1SDimitry Andric #define HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP 220
28fe6060f1SDimitry Andric #endif
29349cc55cSDimitry Andric #include <ctype.h>
30fe6060f1SDimitry Andric
31fe6060f1SDimitry Andric // The machine topology
32fe6060f1SDimitry Andric kmp_topology_t *__kmp_topology = nullptr;
33fe6060f1SDimitry Andric // KMP_HW_SUBSET environment variable
34fe6060f1SDimitry Andric kmp_hw_subset_t *__kmp_hw_subset = nullptr;
350b57cec5SDimitry Andric
360b57cec5SDimitry Andric // Store the real or imagined machine hierarchy here
370b57cec5SDimitry Andric static hierarchy_info machine_hierarchy;
380b57cec5SDimitry Andric
__kmp_cleanup_hierarchy()390b57cec5SDimitry Andric void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
400b57cec5SDimitry Andric
415f757f3fSDimitry Andric #if KMP_AFFINITY_SUPPORTED
425f757f3fSDimitry Andric // Helper class to see if place lists further restrict the fullMask
435f757f3fSDimitry Andric class kmp_full_mask_modifier_t {
445f757f3fSDimitry Andric kmp_affin_mask_t *mask;
455f757f3fSDimitry Andric
465f757f3fSDimitry Andric public:
kmp_full_mask_modifier_t()475f757f3fSDimitry Andric kmp_full_mask_modifier_t() {
485f757f3fSDimitry Andric KMP_CPU_ALLOC(mask);
495f757f3fSDimitry Andric KMP_CPU_ZERO(mask);
505f757f3fSDimitry Andric }
~kmp_full_mask_modifier_t()515f757f3fSDimitry Andric ~kmp_full_mask_modifier_t() {
525f757f3fSDimitry Andric KMP_CPU_FREE(mask);
535f757f3fSDimitry Andric mask = nullptr;
545f757f3fSDimitry Andric }
include(const kmp_affin_mask_t * other)555f757f3fSDimitry Andric void include(const kmp_affin_mask_t *other) { KMP_CPU_UNION(mask, other); }
565f757f3fSDimitry Andric // If the new full mask is different from the current full mask,
575f757f3fSDimitry Andric // then switch them. Returns true if full mask was affected, false otherwise.
restrict_to_mask()585f757f3fSDimitry Andric bool restrict_to_mask() {
595f757f3fSDimitry Andric // See if the new mask further restricts or changes the full mask
605f757f3fSDimitry Andric if (KMP_CPU_EQUAL(__kmp_affin_fullMask, mask) || KMP_CPU_ISEMPTY(mask))
615f757f3fSDimitry Andric return false;
625f757f3fSDimitry Andric return __kmp_topology->restrict_to_mask(mask);
635f757f3fSDimitry Andric }
645f757f3fSDimitry Andric };
655f757f3fSDimitry Andric
665f757f3fSDimitry Andric static inline const char *
__kmp_get_affinity_env_var(const kmp_affinity_t & affinity,bool for_binding=false)675f757f3fSDimitry Andric __kmp_get_affinity_env_var(const kmp_affinity_t &affinity,
685f757f3fSDimitry Andric bool for_binding = false) {
695f757f3fSDimitry Andric if (affinity.flags.omp_places) {
705f757f3fSDimitry Andric if (for_binding)
715f757f3fSDimitry Andric return "OMP_PROC_BIND";
725f757f3fSDimitry Andric return "OMP_PLACES";
735f757f3fSDimitry Andric }
745f757f3fSDimitry Andric return affinity.env_var;
755f757f3fSDimitry Andric }
765f757f3fSDimitry Andric #endif // KMP_AFFINITY_SUPPORTED
775f757f3fSDimitry Andric
__kmp_get_hierarchy(kmp_uint32 nproc,kmp_bstate_t * thr_bar)780b57cec5SDimitry Andric void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
790b57cec5SDimitry Andric kmp_uint32 depth;
800b57cec5SDimitry Andric // The test below is true if affinity is available, but set to "none". Need to
810b57cec5SDimitry Andric // init on first use of hierarchical barrier.
820b57cec5SDimitry Andric if (TCR_1(machine_hierarchy.uninitialized))
83fe6060f1SDimitry Andric machine_hierarchy.init(nproc);
840b57cec5SDimitry Andric
850b57cec5SDimitry Andric // Adjust the hierarchy in case num threads exceeds original
860b57cec5SDimitry Andric if (nproc > machine_hierarchy.base_num_threads)
870b57cec5SDimitry Andric machine_hierarchy.resize(nproc);
880b57cec5SDimitry Andric
890b57cec5SDimitry Andric depth = machine_hierarchy.depth;
900b57cec5SDimitry Andric KMP_DEBUG_ASSERT(depth > 0);
910b57cec5SDimitry Andric
920b57cec5SDimitry Andric thr_bar->depth = depth;
93e8d8bef9SDimitry Andric __kmp_type_convert(machine_hierarchy.numPerLevel[0] - 1,
94e8d8bef9SDimitry Andric &(thr_bar->base_leaf_kids));
950b57cec5SDimitry Andric thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
960b57cec5SDimitry Andric }
970b57cec5SDimitry Andric
98fe6060f1SDimitry Andric static int nCoresPerPkg, nPackages;
99fe6060f1SDimitry Andric static int __kmp_nThreadsPerCore;
100fe6060f1SDimitry Andric #ifndef KMP_DFLT_NTH_CORES
101fe6060f1SDimitry Andric static int __kmp_ncores;
102fe6060f1SDimitry Andric #endif
103fe6060f1SDimitry Andric
__kmp_hw_get_catalog_string(kmp_hw_t type,bool plural)104fe6060f1SDimitry Andric const char *__kmp_hw_get_catalog_string(kmp_hw_t type, bool plural) {
105fe6060f1SDimitry Andric switch (type) {
106fe6060f1SDimitry Andric case KMP_HW_SOCKET:
107fe6060f1SDimitry Andric return ((plural) ? KMP_I18N_STR(Sockets) : KMP_I18N_STR(Socket));
108fe6060f1SDimitry Andric case KMP_HW_DIE:
109fe6060f1SDimitry Andric return ((plural) ? KMP_I18N_STR(Dice) : KMP_I18N_STR(Die));
110fe6060f1SDimitry Andric case KMP_HW_MODULE:
111fe6060f1SDimitry Andric return ((plural) ? KMP_I18N_STR(Modules) : KMP_I18N_STR(Module));
112fe6060f1SDimitry Andric case KMP_HW_TILE:
113fe6060f1SDimitry Andric return ((plural) ? KMP_I18N_STR(Tiles) : KMP_I18N_STR(Tile));
114fe6060f1SDimitry Andric case KMP_HW_NUMA:
115fe6060f1SDimitry Andric return ((plural) ? KMP_I18N_STR(NumaDomains) : KMP_I18N_STR(NumaDomain));
116fe6060f1SDimitry Andric case KMP_HW_L3:
117fe6060f1SDimitry Andric return ((plural) ? KMP_I18N_STR(L3Caches) : KMP_I18N_STR(L3Cache));
118fe6060f1SDimitry Andric case KMP_HW_L2:
119fe6060f1SDimitry Andric return ((plural) ? KMP_I18N_STR(L2Caches) : KMP_I18N_STR(L2Cache));
120fe6060f1SDimitry Andric case KMP_HW_L1:
121fe6060f1SDimitry Andric return ((plural) ? KMP_I18N_STR(L1Caches) : KMP_I18N_STR(L1Cache));
122fe6060f1SDimitry Andric case KMP_HW_LLC:
123fe6060f1SDimitry Andric return ((plural) ? KMP_I18N_STR(LLCaches) : KMP_I18N_STR(LLCache));
124fe6060f1SDimitry Andric case KMP_HW_CORE:
125fe6060f1SDimitry Andric return ((plural) ? KMP_I18N_STR(Cores) : KMP_I18N_STR(Core));
126fe6060f1SDimitry Andric case KMP_HW_THREAD:
127fe6060f1SDimitry Andric return ((plural) ? KMP_I18N_STR(Threads) : KMP_I18N_STR(Thread));
128fe6060f1SDimitry Andric case KMP_HW_PROC_GROUP:
129fe6060f1SDimitry Andric return ((plural) ? KMP_I18N_STR(ProcGroups) : KMP_I18N_STR(ProcGroup));
1307a6dacacSDimitry Andric case KMP_HW_UNKNOWN:
1317a6dacacSDimitry Andric case KMP_HW_LAST:
132fe6060f1SDimitry Andric return KMP_I18N_STR(Unknown);
133fe6060f1SDimitry Andric }
1347a6dacacSDimitry Andric KMP_ASSERT2(false, "Unhandled kmp_hw_t enumeration");
1357a6dacacSDimitry Andric KMP_BUILTIN_UNREACHABLE;
1367a6dacacSDimitry Andric }
137fe6060f1SDimitry Andric
__kmp_hw_get_keyword(kmp_hw_t type,bool plural)138fe6060f1SDimitry Andric const char *__kmp_hw_get_keyword(kmp_hw_t type, bool plural) {
139fe6060f1SDimitry Andric switch (type) {
140fe6060f1SDimitry Andric case KMP_HW_SOCKET:
141fe6060f1SDimitry Andric return ((plural) ? "sockets" : "socket");
142fe6060f1SDimitry Andric case KMP_HW_DIE:
143fe6060f1SDimitry Andric return ((plural) ? "dice" : "die");
144fe6060f1SDimitry Andric case KMP_HW_MODULE:
145fe6060f1SDimitry Andric return ((plural) ? "modules" : "module");
146fe6060f1SDimitry Andric case KMP_HW_TILE:
147fe6060f1SDimitry Andric return ((plural) ? "tiles" : "tile");
148fe6060f1SDimitry Andric case KMP_HW_NUMA:
149fe6060f1SDimitry Andric return ((plural) ? "numa_domains" : "numa_domain");
150fe6060f1SDimitry Andric case KMP_HW_L3:
151fe6060f1SDimitry Andric return ((plural) ? "l3_caches" : "l3_cache");
152fe6060f1SDimitry Andric case KMP_HW_L2:
153fe6060f1SDimitry Andric return ((plural) ? "l2_caches" : "l2_cache");
154fe6060f1SDimitry Andric case KMP_HW_L1:
155fe6060f1SDimitry Andric return ((plural) ? "l1_caches" : "l1_cache");
156fe6060f1SDimitry Andric case KMP_HW_LLC:
157fe6060f1SDimitry Andric return ((plural) ? "ll_caches" : "ll_cache");
158fe6060f1SDimitry Andric case KMP_HW_CORE:
159fe6060f1SDimitry Andric return ((plural) ? "cores" : "core");
160fe6060f1SDimitry Andric case KMP_HW_THREAD:
161fe6060f1SDimitry Andric return ((plural) ? "threads" : "thread");
162fe6060f1SDimitry Andric case KMP_HW_PROC_GROUP:
163fe6060f1SDimitry Andric return ((plural) ? "proc_groups" : "proc_group");
1647a6dacacSDimitry Andric case KMP_HW_UNKNOWN:
1657a6dacacSDimitry Andric case KMP_HW_LAST:
166fe6060f1SDimitry Andric return ((plural) ? "unknowns" : "unknown");
167fe6060f1SDimitry Andric }
1687a6dacacSDimitry Andric KMP_ASSERT2(false, "Unhandled kmp_hw_t enumeration");
1697a6dacacSDimitry Andric KMP_BUILTIN_UNREACHABLE;
1707a6dacacSDimitry Andric }
171fe6060f1SDimitry Andric
__kmp_hw_get_core_type_string(kmp_hw_core_type_t type)172349cc55cSDimitry Andric const char *__kmp_hw_get_core_type_string(kmp_hw_core_type_t type) {
173349cc55cSDimitry Andric switch (type) {
174349cc55cSDimitry Andric case KMP_HW_CORE_TYPE_UNKNOWN:
1757a6dacacSDimitry Andric case KMP_HW_MAX_NUM_CORE_TYPES:
176349cc55cSDimitry Andric return "unknown";
177349cc55cSDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
178349cc55cSDimitry Andric case KMP_HW_CORE_TYPE_ATOM:
179349cc55cSDimitry Andric return "Intel Atom(R) processor";
180349cc55cSDimitry Andric case KMP_HW_CORE_TYPE_CORE:
181349cc55cSDimitry Andric return "Intel(R) Core(TM) processor";
182349cc55cSDimitry Andric #endif
183349cc55cSDimitry Andric }
1847a6dacacSDimitry Andric KMP_ASSERT2(false, "Unhandled kmp_hw_core_type_t enumeration");
1857a6dacacSDimitry Andric KMP_BUILTIN_UNREACHABLE;
186349cc55cSDimitry Andric }
187349cc55cSDimitry Andric
188fcaf7f86SDimitry Andric #if KMP_AFFINITY_SUPPORTED
189fcaf7f86SDimitry Andric // If affinity is supported, check the affinity
190fcaf7f86SDimitry Andric // verbose and warning flags before printing warning
191bdd1243dSDimitry Andric #define KMP_AFF_WARNING(s, ...) \
192bdd1243dSDimitry Andric if (s.flags.verbose || (s.flags.warnings && (s.type != affinity_none))) { \
193fcaf7f86SDimitry Andric KMP_WARNING(__VA_ARGS__); \
194fcaf7f86SDimitry Andric }
195fcaf7f86SDimitry Andric #else
196bdd1243dSDimitry Andric #define KMP_AFF_WARNING(s, ...) KMP_WARNING(__VA_ARGS__)
197fcaf7f86SDimitry Andric #endif
198fcaf7f86SDimitry Andric
199fe6060f1SDimitry Andric ////////////////////////////////////////////////////////////////////////////////
200fe6060f1SDimitry Andric // kmp_hw_thread_t methods
compare_ids(const void * a,const void * b)201fe6060f1SDimitry Andric int kmp_hw_thread_t::compare_ids(const void *a, const void *b) {
202fe6060f1SDimitry Andric const kmp_hw_thread_t *ahwthread = (const kmp_hw_thread_t *)a;
203fe6060f1SDimitry Andric const kmp_hw_thread_t *bhwthread = (const kmp_hw_thread_t *)b;
204fe6060f1SDimitry Andric int depth = __kmp_topology->get_depth();
205fe6060f1SDimitry Andric for (int level = 0; level < depth; ++level) {
206fe6060f1SDimitry Andric if (ahwthread->ids[level] < bhwthread->ids[level])
207fe6060f1SDimitry Andric return -1;
208fe6060f1SDimitry Andric else if (ahwthread->ids[level] > bhwthread->ids[level])
209fe6060f1SDimitry Andric return 1;
210fe6060f1SDimitry Andric }
211fe6060f1SDimitry Andric if (ahwthread->os_id < bhwthread->os_id)
212fe6060f1SDimitry Andric return -1;
213fe6060f1SDimitry Andric else if (ahwthread->os_id > bhwthread->os_id)
214fe6060f1SDimitry Andric return 1;
215fe6060f1SDimitry Andric return 0;
216fe6060f1SDimitry Andric }
217fe6060f1SDimitry Andric
2180b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED
compare_compact(const void * a,const void * b)219fe6060f1SDimitry Andric int kmp_hw_thread_t::compare_compact(const void *a, const void *b) {
220fe6060f1SDimitry Andric int i;
221fe6060f1SDimitry Andric const kmp_hw_thread_t *aa = (const kmp_hw_thread_t *)a;
222fe6060f1SDimitry Andric const kmp_hw_thread_t *bb = (const kmp_hw_thread_t *)b;
223fe6060f1SDimitry Andric int depth = __kmp_topology->get_depth();
224bdd1243dSDimitry Andric int compact = __kmp_topology->compact;
225bdd1243dSDimitry Andric KMP_DEBUG_ASSERT(compact >= 0);
226bdd1243dSDimitry Andric KMP_DEBUG_ASSERT(compact <= depth);
227bdd1243dSDimitry Andric for (i = 0; i < compact; i++) {
228fe6060f1SDimitry Andric int j = depth - i - 1;
229fe6060f1SDimitry Andric if (aa->sub_ids[j] < bb->sub_ids[j])
230fe6060f1SDimitry Andric return -1;
231fe6060f1SDimitry Andric if (aa->sub_ids[j] > bb->sub_ids[j])
232fe6060f1SDimitry Andric return 1;
233fe6060f1SDimitry Andric }
234fe6060f1SDimitry Andric for (; i < depth; i++) {
235bdd1243dSDimitry Andric int j = i - compact;
236fe6060f1SDimitry Andric if (aa->sub_ids[j] < bb->sub_ids[j])
237fe6060f1SDimitry Andric return -1;
238fe6060f1SDimitry Andric if (aa->sub_ids[j] > bb->sub_ids[j])
239fe6060f1SDimitry Andric return 1;
240fe6060f1SDimitry Andric }
241fe6060f1SDimitry Andric return 0;
242fe6060f1SDimitry Andric }
243fe6060f1SDimitry Andric #endif
244fe6060f1SDimitry Andric
print() const245fe6060f1SDimitry Andric void kmp_hw_thread_t::print() const {
246fe6060f1SDimitry Andric int depth = __kmp_topology->get_depth();
247fe6060f1SDimitry Andric printf("%4d ", os_id);
248fe6060f1SDimitry Andric for (int i = 0; i < depth; ++i) {
249fe6060f1SDimitry Andric printf("%4d ", ids[i]);
250fe6060f1SDimitry Andric }
2510eae32dcSDimitry Andric if (attrs) {
2520eae32dcSDimitry Andric if (attrs.is_core_type_valid())
2530eae32dcSDimitry Andric printf(" (%s)", __kmp_hw_get_core_type_string(attrs.get_core_type()));
2540eae32dcSDimitry Andric if (attrs.is_core_eff_valid())
2550eae32dcSDimitry Andric printf(" (eff=%d)", attrs.get_core_eff());
256349cc55cSDimitry Andric }
2575f757f3fSDimitry Andric if (leader)
2585f757f3fSDimitry Andric printf(" (leader)");
259fe6060f1SDimitry Andric printf("\n");
260fe6060f1SDimitry Andric }
261fe6060f1SDimitry Andric
262fe6060f1SDimitry Andric ////////////////////////////////////////////////////////////////////////////////
263fe6060f1SDimitry Andric // kmp_topology_t methods
264fe6060f1SDimitry Andric
265349cc55cSDimitry Andric // Add a layer to the topology based on the ids. Assume the topology
266349cc55cSDimitry Andric // is perfectly nested (i.e., so no object has more than one parent)
_insert_layer(kmp_hw_t type,const int * ids)267349cc55cSDimitry Andric void kmp_topology_t::_insert_layer(kmp_hw_t type, const int *ids) {
268349cc55cSDimitry Andric // Figure out where the layer should go by comparing the ids of the current
269349cc55cSDimitry Andric // layers with the new ids
270349cc55cSDimitry Andric int target_layer;
271349cc55cSDimitry Andric int previous_id = kmp_hw_thread_t::UNKNOWN_ID;
272349cc55cSDimitry Andric int previous_new_id = kmp_hw_thread_t::UNKNOWN_ID;
273349cc55cSDimitry Andric
274349cc55cSDimitry Andric // Start from the highest layer and work down to find target layer
275349cc55cSDimitry Andric // If new layer is equal to another layer then put the new layer above
276349cc55cSDimitry Andric for (target_layer = 0; target_layer < depth; ++target_layer) {
277349cc55cSDimitry Andric bool layers_equal = true;
278349cc55cSDimitry Andric bool strictly_above_target_layer = false;
279349cc55cSDimitry Andric for (int i = 0; i < num_hw_threads; ++i) {
280349cc55cSDimitry Andric int id = hw_threads[i].ids[target_layer];
281349cc55cSDimitry Andric int new_id = ids[i];
282349cc55cSDimitry Andric if (id != previous_id && new_id == previous_new_id) {
283349cc55cSDimitry Andric // Found the layer we are strictly above
284349cc55cSDimitry Andric strictly_above_target_layer = true;
285349cc55cSDimitry Andric layers_equal = false;
286349cc55cSDimitry Andric break;
287349cc55cSDimitry Andric } else if (id == previous_id && new_id != previous_new_id) {
288349cc55cSDimitry Andric // Found a layer we are below. Move to next layer and check.
289349cc55cSDimitry Andric layers_equal = false;
290349cc55cSDimitry Andric break;
291349cc55cSDimitry Andric }
292349cc55cSDimitry Andric previous_id = id;
293349cc55cSDimitry Andric previous_new_id = new_id;
294349cc55cSDimitry Andric }
295349cc55cSDimitry Andric if (strictly_above_target_layer || layers_equal)
296349cc55cSDimitry Andric break;
297349cc55cSDimitry Andric }
298349cc55cSDimitry Andric
299349cc55cSDimitry Andric // Found the layer we are above. Now move everything to accommodate the new
300349cc55cSDimitry Andric // layer. And put the new ids and type into the topology.
301349cc55cSDimitry Andric for (int i = depth - 1, j = depth; i >= target_layer; --i, --j)
302349cc55cSDimitry Andric types[j] = types[i];
303349cc55cSDimitry Andric types[target_layer] = type;
304349cc55cSDimitry Andric for (int k = 0; k < num_hw_threads; ++k) {
305349cc55cSDimitry Andric for (int i = depth - 1, j = depth; i >= target_layer; --i, --j)
306349cc55cSDimitry Andric hw_threads[k].ids[j] = hw_threads[k].ids[i];
307349cc55cSDimitry Andric hw_threads[k].ids[target_layer] = ids[k];
308349cc55cSDimitry Andric }
309349cc55cSDimitry Andric equivalent[type] = type;
310349cc55cSDimitry Andric depth++;
311349cc55cSDimitry Andric }
312349cc55cSDimitry Andric
313349cc55cSDimitry Andric #if KMP_GROUP_AFFINITY
314349cc55cSDimitry Andric // Insert the Windows Processor Group structure into the topology
_insert_windows_proc_groups()315349cc55cSDimitry Andric void kmp_topology_t::_insert_windows_proc_groups() {
316349cc55cSDimitry Andric // Do not insert the processor group structure for a single group
317349cc55cSDimitry Andric if (__kmp_num_proc_groups == 1)
318349cc55cSDimitry Andric return;
319349cc55cSDimitry Andric kmp_affin_mask_t *mask;
320349cc55cSDimitry Andric int *ids = (int *)__kmp_allocate(sizeof(int) * num_hw_threads);
321349cc55cSDimitry Andric KMP_CPU_ALLOC(mask);
322349cc55cSDimitry Andric for (int i = 0; i < num_hw_threads; ++i) {
323349cc55cSDimitry Andric KMP_CPU_ZERO(mask);
324349cc55cSDimitry Andric KMP_CPU_SET(hw_threads[i].os_id, mask);
325349cc55cSDimitry Andric ids[i] = __kmp_get_proc_group(mask);
326349cc55cSDimitry Andric }
327349cc55cSDimitry Andric KMP_CPU_FREE(mask);
328349cc55cSDimitry Andric _insert_layer(KMP_HW_PROC_GROUP, ids);
329349cc55cSDimitry Andric __kmp_free(ids);
330*0fca6ea1SDimitry Andric
331*0fca6ea1SDimitry Andric // sort topology after adding proc groups
332*0fca6ea1SDimitry Andric __kmp_topology->sort_ids();
333349cc55cSDimitry Andric }
334349cc55cSDimitry Andric #endif
335349cc55cSDimitry Andric
336fe6060f1SDimitry Andric // Remove layers that don't add information to the topology.
337fe6060f1SDimitry Andric // This is done by having the layer take on the id = UNKNOWN_ID (-1)
_remove_radix1_layers()338fe6060f1SDimitry Andric void kmp_topology_t::_remove_radix1_layers() {
339fe6060f1SDimitry Andric int preference[KMP_HW_LAST];
340fe6060f1SDimitry Andric int top_index1, top_index2;
341fe6060f1SDimitry Andric // Set up preference associative array
342349cc55cSDimitry Andric preference[KMP_HW_SOCKET] = 110;
343349cc55cSDimitry Andric preference[KMP_HW_PROC_GROUP] = 100;
344fe6060f1SDimitry Andric preference[KMP_HW_CORE] = 95;
345fe6060f1SDimitry Andric preference[KMP_HW_THREAD] = 90;
346fe6060f1SDimitry Andric preference[KMP_HW_NUMA] = 85;
347fe6060f1SDimitry Andric preference[KMP_HW_DIE] = 80;
348fe6060f1SDimitry Andric preference[KMP_HW_TILE] = 75;
349fe6060f1SDimitry Andric preference[KMP_HW_MODULE] = 73;
350fe6060f1SDimitry Andric preference[KMP_HW_L3] = 70;
351fe6060f1SDimitry Andric preference[KMP_HW_L2] = 65;
352fe6060f1SDimitry Andric preference[KMP_HW_L1] = 60;
353fe6060f1SDimitry Andric preference[KMP_HW_LLC] = 5;
354fe6060f1SDimitry Andric top_index1 = 0;
355fe6060f1SDimitry Andric top_index2 = 1;
356fe6060f1SDimitry Andric while (top_index1 < depth - 1 && top_index2 < depth) {
357fe6060f1SDimitry Andric kmp_hw_t type1 = types[top_index1];
358fe6060f1SDimitry Andric kmp_hw_t type2 = types[top_index2];
359fe6060f1SDimitry Andric KMP_ASSERT_VALID_HW_TYPE(type1);
360fe6060f1SDimitry Andric KMP_ASSERT_VALID_HW_TYPE(type2);
361fe6060f1SDimitry Andric // Do not allow the three main topology levels (sockets, cores, threads) to
362fe6060f1SDimitry Andric // be compacted down
363fe6060f1SDimitry Andric if ((type1 == KMP_HW_THREAD || type1 == KMP_HW_CORE ||
364fe6060f1SDimitry Andric type1 == KMP_HW_SOCKET) &&
365fe6060f1SDimitry Andric (type2 == KMP_HW_THREAD || type2 == KMP_HW_CORE ||
366fe6060f1SDimitry Andric type2 == KMP_HW_SOCKET)) {
367fe6060f1SDimitry Andric top_index1 = top_index2++;
368fe6060f1SDimitry Andric continue;
369fe6060f1SDimitry Andric }
370fe6060f1SDimitry Andric bool radix1 = true;
371fe6060f1SDimitry Andric bool all_same = true;
372fe6060f1SDimitry Andric int id1 = hw_threads[0].ids[top_index1];
373fe6060f1SDimitry Andric int id2 = hw_threads[0].ids[top_index2];
374fe6060f1SDimitry Andric int pref1 = preference[type1];
375fe6060f1SDimitry Andric int pref2 = preference[type2];
376fe6060f1SDimitry Andric for (int hwidx = 1; hwidx < num_hw_threads; ++hwidx) {
377fe6060f1SDimitry Andric if (hw_threads[hwidx].ids[top_index1] == id1 &&
378fe6060f1SDimitry Andric hw_threads[hwidx].ids[top_index2] != id2) {
379fe6060f1SDimitry Andric radix1 = false;
380fe6060f1SDimitry Andric break;
381fe6060f1SDimitry Andric }
382fe6060f1SDimitry Andric if (hw_threads[hwidx].ids[top_index2] != id2)
383fe6060f1SDimitry Andric all_same = false;
384fe6060f1SDimitry Andric id1 = hw_threads[hwidx].ids[top_index1];
385fe6060f1SDimitry Andric id2 = hw_threads[hwidx].ids[top_index2];
386fe6060f1SDimitry Andric }
387fe6060f1SDimitry Andric if (radix1) {
388fe6060f1SDimitry Andric // Select the layer to remove based on preference
389fe6060f1SDimitry Andric kmp_hw_t remove_type, keep_type;
390fe6060f1SDimitry Andric int remove_layer, remove_layer_ids;
391fe6060f1SDimitry Andric if (pref1 > pref2) {
392fe6060f1SDimitry Andric remove_type = type2;
393fe6060f1SDimitry Andric remove_layer = remove_layer_ids = top_index2;
394fe6060f1SDimitry Andric keep_type = type1;
395fe6060f1SDimitry Andric } else {
396fe6060f1SDimitry Andric remove_type = type1;
397fe6060f1SDimitry Andric remove_layer = remove_layer_ids = top_index1;
398fe6060f1SDimitry Andric keep_type = type2;
399fe6060f1SDimitry Andric }
400fe6060f1SDimitry Andric // If all the indexes for the second (deeper) layer are the same.
401fe6060f1SDimitry Andric // e.g., all are zero, then make sure to keep the first layer's ids
402fe6060f1SDimitry Andric if (all_same)
403fe6060f1SDimitry Andric remove_layer_ids = top_index2;
404fe6060f1SDimitry Andric // Remove radix one type by setting the equivalence, removing the id from
405fe6060f1SDimitry Andric // the hw threads and removing the layer from types and depth
406fe6060f1SDimitry Andric set_equivalent_type(remove_type, keep_type);
407fe6060f1SDimitry Andric for (int idx = 0; idx < num_hw_threads; ++idx) {
408fe6060f1SDimitry Andric kmp_hw_thread_t &hw_thread = hw_threads[idx];
409fe6060f1SDimitry Andric for (int d = remove_layer_ids; d < depth - 1; ++d)
410fe6060f1SDimitry Andric hw_thread.ids[d] = hw_thread.ids[d + 1];
411fe6060f1SDimitry Andric }
412fe6060f1SDimitry Andric for (int idx = remove_layer; idx < depth - 1; ++idx)
413fe6060f1SDimitry Andric types[idx] = types[idx + 1];
414fe6060f1SDimitry Andric depth--;
415fe6060f1SDimitry Andric } else {
416fe6060f1SDimitry Andric top_index1 = top_index2++;
417fe6060f1SDimitry Andric }
418fe6060f1SDimitry Andric }
419fe6060f1SDimitry Andric KMP_ASSERT(depth > 0);
420fe6060f1SDimitry Andric }
421fe6060f1SDimitry Andric
_set_last_level_cache()422fe6060f1SDimitry Andric void kmp_topology_t::_set_last_level_cache() {
423fe6060f1SDimitry Andric if (get_equivalent_type(KMP_HW_L3) != KMP_HW_UNKNOWN)
424fe6060f1SDimitry Andric set_equivalent_type(KMP_HW_LLC, KMP_HW_L3);
425fe6060f1SDimitry Andric else if (get_equivalent_type(KMP_HW_L2) != KMP_HW_UNKNOWN)
426fe6060f1SDimitry Andric set_equivalent_type(KMP_HW_LLC, KMP_HW_L2);
427fe6060f1SDimitry Andric #if KMP_MIC_SUPPORTED
428fe6060f1SDimitry Andric else if (__kmp_mic_type == mic3) {
429fe6060f1SDimitry Andric if (get_equivalent_type(KMP_HW_L2) != KMP_HW_UNKNOWN)
430fe6060f1SDimitry Andric set_equivalent_type(KMP_HW_LLC, KMP_HW_L2);
431fe6060f1SDimitry Andric else if (get_equivalent_type(KMP_HW_TILE) != KMP_HW_UNKNOWN)
432fe6060f1SDimitry Andric set_equivalent_type(KMP_HW_LLC, KMP_HW_TILE);
433fe6060f1SDimitry Andric // L2/Tile wasn't detected so just say L1
434fe6060f1SDimitry Andric else
435fe6060f1SDimitry Andric set_equivalent_type(KMP_HW_LLC, KMP_HW_L1);
436fe6060f1SDimitry Andric }
437fe6060f1SDimitry Andric #endif
438fe6060f1SDimitry Andric else if (get_equivalent_type(KMP_HW_L1) != KMP_HW_UNKNOWN)
439fe6060f1SDimitry Andric set_equivalent_type(KMP_HW_LLC, KMP_HW_L1);
440fe6060f1SDimitry Andric // Fallback is to set last level cache to socket or core
441fe6060f1SDimitry Andric if (get_equivalent_type(KMP_HW_LLC) == KMP_HW_UNKNOWN) {
442fe6060f1SDimitry Andric if (get_equivalent_type(KMP_HW_SOCKET) != KMP_HW_UNKNOWN)
443fe6060f1SDimitry Andric set_equivalent_type(KMP_HW_LLC, KMP_HW_SOCKET);
444fe6060f1SDimitry Andric else if (get_equivalent_type(KMP_HW_CORE) != KMP_HW_UNKNOWN)
445fe6060f1SDimitry Andric set_equivalent_type(KMP_HW_LLC, KMP_HW_CORE);
446fe6060f1SDimitry Andric }
447fe6060f1SDimitry Andric KMP_ASSERT(get_equivalent_type(KMP_HW_LLC) != KMP_HW_UNKNOWN);
448fe6060f1SDimitry Andric }
449fe6060f1SDimitry Andric
450fe6060f1SDimitry Andric // Gather the count of each topology layer and the ratio
_gather_enumeration_information()451fe6060f1SDimitry Andric void kmp_topology_t::_gather_enumeration_information() {
452fe6060f1SDimitry Andric int previous_id[KMP_HW_LAST];
453fe6060f1SDimitry Andric int max[KMP_HW_LAST];
454fe6060f1SDimitry Andric
455fe6060f1SDimitry Andric for (int i = 0; i < depth; ++i) {
456fe6060f1SDimitry Andric previous_id[i] = kmp_hw_thread_t::UNKNOWN_ID;
457fe6060f1SDimitry Andric max[i] = 0;
458fe6060f1SDimitry Andric count[i] = 0;
459fe6060f1SDimitry Andric ratio[i] = 0;
460fe6060f1SDimitry Andric }
461349cc55cSDimitry Andric int core_level = get_level(KMP_HW_CORE);
462fe6060f1SDimitry Andric for (int i = 0; i < num_hw_threads; ++i) {
463fe6060f1SDimitry Andric kmp_hw_thread_t &hw_thread = hw_threads[i];
464fe6060f1SDimitry Andric for (int layer = 0; layer < depth; ++layer) {
465fe6060f1SDimitry Andric int id = hw_thread.ids[layer];
466fe6060f1SDimitry Andric if (id != previous_id[layer]) {
467fe6060f1SDimitry Andric // Add an additional increment to each count
468fe6060f1SDimitry Andric for (int l = layer; l < depth; ++l)
469fe6060f1SDimitry Andric count[l]++;
470fe6060f1SDimitry Andric // Keep track of topology layer ratio statistics
471fe6060f1SDimitry Andric max[layer]++;
472fe6060f1SDimitry Andric for (int l = layer + 1; l < depth; ++l) {
473fe6060f1SDimitry Andric if (max[l] > ratio[l])
474fe6060f1SDimitry Andric ratio[l] = max[l];
475fe6060f1SDimitry Andric max[l] = 1;
476fe6060f1SDimitry Andric }
4770eae32dcSDimitry Andric // Figure out the number of different core types
4780eae32dcSDimitry Andric // and efficiencies for hybrid CPUs
4790eae32dcSDimitry Andric if (__kmp_is_hybrid_cpu() && core_level >= 0 && layer <= core_level) {
4800eae32dcSDimitry Andric if (hw_thread.attrs.is_core_eff_valid() &&
4810eae32dcSDimitry Andric hw_thread.attrs.core_eff >= num_core_efficiencies) {
4820eae32dcSDimitry Andric // Because efficiencies can range from 0 to max efficiency - 1,
4830eae32dcSDimitry Andric // the number of efficiencies is max efficiency + 1
4840eae32dcSDimitry Andric num_core_efficiencies = hw_thread.attrs.core_eff + 1;
4850eae32dcSDimitry Andric }
4860eae32dcSDimitry Andric if (hw_thread.attrs.is_core_type_valid()) {
4870eae32dcSDimitry Andric bool found = false;
4880eae32dcSDimitry Andric for (int j = 0; j < num_core_types; ++j) {
4890eae32dcSDimitry Andric if (hw_thread.attrs.get_core_type() == core_types[j]) {
4900eae32dcSDimitry Andric found = true;
4910eae32dcSDimitry Andric break;
4920eae32dcSDimitry Andric }
4930eae32dcSDimitry Andric }
4940eae32dcSDimitry Andric if (!found) {
4950eae32dcSDimitry Andric KMP_ASSERT(num_core_types < KMP_HW_MAX_NUM_CORE_TYPES);
4960eae32dcSDimitry Andric core_types[num_core_types++] = hw_thread.attrs.get_core_type();
4970eae32dcSDimitry Andric }
4980eae32dcSDimitry Andric }
4990eae32dcSDimitry Andric }
500fe6060f1SDimitry Andric break;
501fe6060f1SDimitry Andric }
502fe6060f1SDimitry Andric }
503fe6060f1SDimitry Andric for (int layer = 0; layer < depth; ++layer) {
504fe6060f1SDimitry Andric previous_id[layer] = hw_thread.ids[layer];
505fe6060f1SDimitry Andric }
506fe6060f1SDimitry Andric }
507fe6060f1SDimitry Andric for (int layer = 0; layer < depth; ++layer) {
508fe6060f1SDimitry Andric if (max[layer] > ratio[layer])
509fe6060f1SDimitry Andric ratio[layer] = max[layer];
510fe6060f1SDimitry Andric }
511fe6060f1SDimitry Andric }
512fe6060f1SDimitry Andric
_get_ncores_with_attr(const kmp_hw_attr_t & attr,int above_level,bool find_all) const5130eae32dcSDimitry Andric int kmp_topology_t::_get_ncores_with_attr(const kmp_hw_attr_t &attr,
5140eae32dcSDimitry Andric int above_level,
5150eae32dcSDimitry Andric bool find_all) const {
5160eae32dcSDimitry Andric int current, current_max;
5170eae32dcSDimitry Andric int previous_id[KMP_HW_LAST];
5180eae32dcSDimitry Andric for (int i = 0; i < depth; ++i)
5190eae32dcSDimitry Andric previous_id[i] = kmp_hw_thread_t::UNKNOWN_ID;
5200eae32dcSDimitry Andric int core_level = get_level(KMP_HW_CORE);
5210eae32dcSDimitry Andric if (find_all)
5220eae32dcSDimitry Andric above_level = -1;
5230eae32dcSDimitry Andric KMP_ASSERT(above_level < core_level);
5240eae32dcSDimitry Andric current_max = 0;
5250eae32dcSDimitry Andric current = 0;
5260eae32dcSDimitry Andric for (int i = 0; i < num_hw_threads; ++i) {
5270eae32dcSDimitry Andric kmp_hw_thread_t &hw_thread = hw_threads[i];
5280eae32dcSDimitry Andric if (!find_all && hw_thread.ids[above_level] != previous_id[above_level]) {
5290eae32dcSDimitry Andric if (current > current_max)
5300eae32dcSDimitry Andric current_max = current;
5310eae32dcSDimitry Andric current = hw_thread.attrs.contains(attr);
5320eae32dcSDimitry Andric } else {
5330eae32dcSDimitry Andric for (int level = above_level + 1; level <= core_level; ++level) {
5340eae32dcSDimitry Andric if (hw_thread.ids[level] != previous_id[level]) {
5350eae32dcSDimitry Andric if (hw_thread.attrs.contains(attr))
5360eae32dcSDimitry Andric current++;
5370eae32dcSDimitry Andric break;
5380eae32dcSDimitry Andric }
5390eae32dcSDimitry Andric }
5400eae32dcSDimitry Andric }
5410eae32dcSDimitry Andric for (int level = 0; level < depth; ++level)
5420eae32dcSDimitry Andric previous_id[level] = hw_thread.ids[level];
5430eae32dcSDimitry Andric }
5440eae32dcSDimitry Andric if (current > current_max)
5450eae32dcSDimitry Andric current_max = current;
5460eae32dcSDimitry Andric return current_max;
5470eae32dcSDimitry Andric }
5480eae32dcSDimitry Andric
549fe6060f1SDimitry Andric // Find out if the topology is uniform
_discover_uniformity()550fe6060f1SDimitry Andric void kmp_topology_t::_discover_uniformity() {
551fe6060f1SDimitry Andric int num = 1;
552fe6060f1SDimitry Andric for (int level = 0; level < depth; ++level)
553fe6060f1SDimitry Andric num *= ratio[level];
554fe6060f1SDimitry Andric flags.uniform = (num == count[depth - 1]);
555fe6060f1SDimitry Andric }
556fe6060f1SDimitry Andric
557fe6060f1SDimitry Andric // Set all the sub_ids for each hardware thread
_set_sub_ids()558fe6060f1SDimitry Andric void kmp_topology_t::_set_sub_ids() {
559fe6060f1SDimitry Andric int previous_id[KMP_HW_LAST];
560fe6060f1SDimitry Andric int sub_id[KMP_HW_LAST];
561fe6060f1SDimitry Andric
562fe6060f1SDimitry Andric for (int i = 0; i < depth; ++i) {
563fe6060f1SDimitry Andric previous_id[i] = -1;
564fe6060f1SDimitry Andric sub_id[i] = -1;
565fe6060f1SDimitry Andric }
566fe6060f1SDimitry Andric for (int i = 0; i < num_hw_threads; ++i) {
567fe6060f1SDimitry Andric kmp_hw_thread_t &hw_thread = hw_threads[i];
568fe6060f1SDimitry Andric // Setup the sub_id
569fe6060f1SDimitry Andric for (int j = 0; j < depth; ++j) {
570fe6060f1SDimitry Andric if (hw_thread.ids[j] != previous_id[j]) {
571fe6060f1SDimitry Andric sub_id[j]++;
572fe6060f1SDimitry Andric for (int k = j + 1; k < depth; ++k) {
573fe6060f1SDimitry Andric sub_id[k] = 0;
574fe6060f1SDimitry Andric }
575fe6060f1SDimitry Andric break;
576fe6060f1SDimitry Andric }
577fe6060f1SDimitry Andric }
578fe6060f1SDimitry Andric // Set previous_id
579fe6060f1SDimitry Andric for (int j = 0; j < depth; ++j) {
580fe6060f1SDimitry Andric previous_id[j] = hw_thread.ids[j];
581fe6060f1SDimitry Andric }
582fe6060f1SDimitry Andric // Set the sub_ids field
583fe6060f1SDimitry Andric for (int j = 0; j < depth; ++j) {
584fe6060f1SDimitry Andric hw_thread.sub_ids[j] = sub_id[j];
585fe6060f1SDimitry Andric }
586fe6060f1SDimitry Andric }
587fe6060f1SDimitry Andric }
588fe6060f1SDimitry Andric
_set_globals()589fe6060f1SDimitry Andric void kmp_topology_t::_set_globals() {
590fe6060f1SDimitry Andric // Set nCoresPerPkg, nPackages, __kmp_nThreadsPerCore, __kmp_ncores
591fe6060f1SDimitry Andric int core_level, thread_level, package_level;
592fe6060f1SDimitry Andric package_level = get_level(KMP_HW_SOCKET);
593fe6060f1SDimitry Andric #if KMP_GROUP_AFFINITY
594fe6060f1SDimitry Andric if (package_level == -1)
595fe6060f1SDimitry Andric package_level = get_level(KMP_HW_PROC_GROUP);
596fe6060f1SDimitry Andric #endif
597fe6060f1SDimitry Andric core_level = get_level(KMP_HW_CORE);
598fe6060f1SDimitry Andric thread_level = get_level(KMP_HW_THREAD);
599fe6060f1SDimitry Andric
600fe6060f1SDimitry Andric KMP_ASSERT(core_level != -1);
601fe6060f1SDimitry Andric KMP_ASSERT(thread_level != -1);
602fe6060f1SDimitry Andric
603fe6060f1SDimitry Andric __kmp_nThreadsPerCore = calculate_ratio(thread_level, core_level);
604fe6060f1SDimitry Andric if (package_level != -1) {
605fe6060f1SDimitry Andric nCoresPerPkg = calculate_ratio(core_level, package_level);
606fe6060f1SDimitry Andric nPackages = get_count(package_level);
607fe6060f1SDimitry Andric } else {
608fe6060f1SDimitry Andric // assume one socket
609fe6060f1SDimitry Andric nCoresPerPkg = get_count(core_level);
610fe6060f1SDimitry Andric nPackages = 1;
611fe6060f1SDimitry Andric }
612fe6060f1SDimitry Andric #ifndef KMP_DFLT_NTH_CORES
613fe6060f1SDimitry Andric __kmp_ncores = get_count(core_level);
614fe6060f1SDimitry Andric #endif
615fe6060f1SDimitry Andric }
616fe6060f1SDimitry Andric
allocate(int nproc,int ndepth,const kmp_hw_t * types)617fe6060f1SDimitry Andric kmp_topology_t *kmp_topology_t::allocate(int nproc, int ndepth,
618fe6060f1SDimitry Andric const kmp_hw_t *types) {
619fe6060f1SDimitry Andric kmp_topology_t *retval;
620fe6060f1SDimitry Andric // Allocate all data in one large allocation
621fe6060f1SDimitry Andric size_t size = sizeof(kmp_topology_t) + sizeof(kmp_hw_thread_t) * nproc +
622349cc55cSDimitry Andric sizeof(int) * (size_t)KMP_HW_LAST * 3;
623fe6060f1SDimitry Andric char *bytes = (char *)__kmp_allocate(size);
624fe6060f1SDimitry Andric retval = (kmp_topology_t *)bytes;
625fe6060f1SDimitry Andric if (nproc > 0) {
626fe6060f1SDimitry Andric retval->hw_threads = (kmp_hw_thread_t *)(bytes + sizeof(kmp_topology_t));
627fe6060f1SDimitry Andric } else {
628fe6060f1SDimitry Andric retval->hw_threads = nullptr;
629fe6060f1SDimitry Andric }
630fe6060f1SDimitry Andric retval->num_hw_threads = nproc;
631fe6060f1SDimitry Andric retval->depth = ndepth;
632fe6060f1SDimitry Andric int *arr =
633fe6060f1SDimitry Andric (int *)(bytes + sizeof(kmp_topology_t) + sizeof(kmp_hw_thread_t) * nproc);
634fe6060f1SDimitry Andric retval->types = (kmp_hw_t *)arr;
635349cc55cSDimitry Andric retval->ratio = arr + (size_t)KMP_HW_LAST;
636349cc55cSDimitry Andric retval->count = arr + 2 * (size_t)KMP_HW_LAST;
6370eae32dcSDimitry Andric retval->num_core_efficiencies = 0;
6380eae32dcSDimitry Andric retval->num_core_types = 0;
639bdd1243dSDimitry Andric retval->compact = 0;
6400eae32dcSDimitry Andric for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i)
6410eae32dcSDimitry Andric retval->core_types[i] = KMP_HW_CORE_TYPE_UNKNOWN;
642fe6060f1SDimitry Andric KMP_FOREACH_HW_TYPE(type) { retval->equivalent[type] = KMP_HW_UNKNOWN; }
643fe6060f1SDimitry Andric for (int i = 0; i < ndepth; ++i) {
644fe6060f1SDimitry Andric retval->types[i] = types[i];
645fe6060f1SDimitry Andric retval->equivalent[types[i]] = types[i];
646fe6060f1SDimitry Andric }
647fe6060f1SDimitry Andric return retval;
648fe6060f1SDimitry Andric }
649fe6060f1SDimitry Andric
deallocate(kmp_topology_t * topology)650fe6060f1SDimitry Andric void kmp_topology_t::deallocate(kmp_topology_t *topology) {
651fe6060f1SDimitry Andric if (topology)
652fe6060f1SDimitry Andric __kmp_free(topology);
653fe6060f1SDimitry Andric }
654fe6060f1SDimitry Andric
check_ids() const655fe6060f1SDimitry Andric bool kmp_topology_t::check_ids() const {
656fe6060f1SDimitry Andric // Assume ids have been sorted
657fe6060f1SDimitry Andric if (num_hw_threads == 0)
658fe6060f1SDimitry Andric return true;
659fe6060f1SDimitry Andric for (int i = 1; i < num_hw_threads; ++i) {
660fe6060f1SDimitry Andric kmp_hw_thread_t ¤t_thread = hw_threads[i];
661fe6060f1SDimitry Andric kmp_hw_thread_t &previous_thread = hw_threads[i - 1];
662fe6060f1SDimitry Andric bool unique = false;
663fe6060f1SDimitry Andric for (int j = 0; j < depth; ++j) {
664fe6060f1SDimitry Andric if (previous_thread.ids[j] != current_thread.ids[j]) {
665fe6060f1SDimitry Andric unique = true;
666fe6060f1SDimitry Andric break;
667fe6060f1SDimitry Andric }
668fe6060f1SDimitry Andric }
669fe6060f1SDimitry Andric if (unique)
670fe6060f1SDimitry Andric continue;
671fe6060f1SDimitry Andric return false;
672fe6060f1SDimitry Andric }
673fe6060f1SDimitry Andric return true;
674fe6060f1SDimitry Andric }
675fe6060f1SDimitry Andric
dump() const676fe6060f1SDimitry Andric void kmp_topology_t::dump() const {
677fe6060f1SDimitry Andric printf("***********************\n");
678fe6060f1SDimitry Andric printf("*** __kmp_topology: ***\n");
679fe6060f1SDimitry Andric printf("***********************\n");
680fe6060f1SDimitry Andric printf("* depth: %d\n", depth);
681fe6060f1SDimitry Andric
682fe6060f1SDimitry Andric printf("* types: ");
683fe6060f1SDimitry Andric for (int i = 0; i < depth; ++i)
684fe6060f1SDimitry Andric printf("%15s ", __kmp_hw_get_keyword(types[i]));
685fe6060f1SDimitry Andric printf("\n");
686fe6060f1SDimitry Andric
687fe6060f1SDimitry Andric printf("* ratio: ");
688fe6060f1SDimitry Andric for (int i = 0; i < depth; ++i) {
689fe6060f1SDimitry Andric printf("%15d ", ratio[i]);
690fe6060f1SDimitry Andric }
691fe6060f1SDimitry Andric printf("\n");
692fe6060f1SDimitry Andric
693fe6060f1SDimitry Andric printf("* count: ");
694fe6060f1SDimitry Andric for (int i = 0; i < depth; ++i) {
695fe6060f1SDimitry Andric printf("%15d ", count[i]);
696fe6060f1SDimitry Andric }
697fe6060f1SDimitry Andric printf("\n");
698fe6060f1SDimitry Andric
6990eae32dcSDimitry Andric printf("* num_core_eff: %d\n", num_core_efficiencies);
7000eae32dcSDimitry Andric printf("* num_core_types: %d\n", num_core_types);
7010eae32dcSDimitry Andric printf("* core_types: ");
7020eae32dcSDimitry Andric for (int i = 0; i < num_core_types; ++i)
7030eae32dcSDimitry Andric printf("%3d ", core_types[i]);
7040eae32dcSDimitry Andric printf("\n");
705349cc55cSDimitry Andric
706fe6060f1SDimitry Andric printf("* equivalent map:\n");
707fe6060f1SDimitry Andric KMP_FOREACH_HW_TYPE(i) {
708fe6060f1SDimitry Andric const char *key = __kmp_hw_get_keyword(i);
709fe6060f1SDimitry Andric const char *value = __kmp_hw_get_keyword(equivalent[i]);
710fe6060f1SDimitry Andric printf("%-15s -> %-15s\n", key, value);
711fe6060f1SDimitry Andric }
712fe6060f1SDimitry Andric
713fe6060f1SDimitry Andric printf("* uniform: %s\n", (is_uniform() ? "Yes" : "No"));
714fe6060f1SDimitry Andric
715fe6060f1SDimitry Andric printf("* num_hw_threads: %d\n", num_hw_threads);
716fe6060f1SDimitry Andric printf("* hw_threads:\n");
717fe6060f1SDimitry Andric for (int i = 0; i < num_hw_threads; ++i) {
718fe6060f1SDimitry Andric hw_threads[i].print();
719fe6060f1SDimitry Andric }
720fe6060f1SDimitry Andric printf("***********************\n");
721fe6060f1SDimitry Andric }
722fe6060f1SDimitry Andric
print(const char * env_var) const723fe6060f1SDimitry Andric void kmp_topology_t::print(const char *env_var) const {
724fe6060f1SDimitry Andric kmp_str_buf_t buf;
725fe6060f1SDimitry Andric int print_types_depth;
726fe6060f1SDimitry Andric __kmp_str_buf_init(&buf);
727fe6060f1SDimitry Andric kmp_hw_t print_types[KMP_HW_LAST + 2];
728fe6060f1SDimitry Andric
729fe6060f1SDimitry Andric // Num Available Threads
730bdd1243dSDimitry Andric if (num_hw_threads) {
731fe6060f1SDimitry Andric KMP_INFORM(AvailableOSProc, env_var, num_hw_threads);
732bdd1243dSDimitry Andric } else {
733bdd1243dSDimitry Andric KMP_INFORM(AvailableOSProc, env_var, __kmp_xproc);
734bdd1243dSDimitry Andric }
735fe6060f1SDimitry Andric
736fe6060f1SDimitry Andric // Uniform or not
737fe6060f1SDimitry Andric if (is_uniform()) {
738fe6060f1SDimitry Andric KMP_INFORM(Uniform, env_var);
739fe6060f1SDimitry Andric } else {
740fe6060f1SDimitry Andric KMP_INFORM(NonUniform, env_var);
741fe6060f1SDimitry Andric }
742fe6060f1SDimitry Andric
743fe6060f1SDimitry Andric // Equivalent types
744fe6060f1SDimitry Andric KMP_FOREACH_HW_TYPE(type) {
745fe6060f1SDimitry Andric kmp_hw_t eq_type = equivalent[type];
746fe6060f1SDimitry Andric if (eq_type != KMP_HW_UNKNOWN && eq_type != type) {
747fe6060f1SDimitry Andric KMP_INFORM(AffEqualTopologyTypes, env_var,
748fe6060f1SDimitry Andric __kmp_hw_get_catalog_string(type),
749fe6060f1SDimitry Andric __kmp_hw_get_catalog_string(eq_type));
750fe6060f1SDimitry Andric }
751fe6060f1SDimitry Andric }
752fe6060f1SDimitry Andric
753fe6060f1SDimitry Andric // Quick topology
754fe6060f1SDimitry Andric KMP_ASSERT(depth > 0 && depth <= (int)KMP_HW_LAST);
755fe6060f1SDimitry Andric // Create a print types array that always guarantees printing
756fe6060f1SDimitry Andric // the core and thread level
757fe6060f1SDimitry Andric print_types_depth = 0;
758fe6060f1SDimitry Andric for (int level = 0; level < depth; ++level)
759fe6060f1SDimitry Andric print_types[print_types_depth++] = types[level];
760fe6060f1SDimitry Andric if (equivalent[KMP_HW_CORE] != KMP_HW_CORE) {
761fe6060f1SDimitry Andric // Force in the core level for quick topology
762fe6060f1SDimitry Andric if (print_types[print_types_depth - 1] == KMP_HW_THREAD) {
763fe6060f1SDimitry Andric // Force core before thread e.g., 1 socket X 2 threads/socket
764fe6060f1SDimitry Andric // becomes 1 socket X 1 core/socket X 2 threads/socket
765fe6060f1SDimitry Andric print_types[print_types_depth - 1] = KMP_HW_CORE;
766fe6060f1SDimitry Andric print_types[print_types_depth++] = KMP_HW_THREAD;
767fe6060f1SDimitry Andric } else {
768fe6060f1SDimitry Andric print_types[print_types_depth++] = KMP_HW_CORE;
769fe6060f1SDimitry Andric }
770fe6060f1SDimitry Andric }
771fe6060f1SDimitry Andric // Always put threads at very end of quick topology
772fe6060f1SDimitry Andric if (equivalent[KMP_HW_THREAD] != KMP_HW_THREAD)
773fe6060f1SDimitry Andric print_types[print_types_depth++] = KMP_HW_THREAD;
774fe6060f1SDimitry Andric
775fe6060f1SDimitry Andric __kmp_str_buf_clear(&buf);
776fe6060f1SDimitry Andric kmp_hw_t numerator_type;
777fe6060f1SDimitry Andric kmp_hw_t denominator_type = KMP_HW_UNKNOWN;
778fe6060f1SDimitry Andric int core_level = get_level(KMP_HW_CORE);
779fe6060f1SDimitry Andric int ncores = get_count(core_level);
780fe6060f1SDimitry Andric
781fe6060f1SDimitry Andric for (int plevel = 0, level = 0; plevel < print_types_depth; ++plevel) {
782fe6060f1SDimitry Andric int c;
783fe6060f1SDimitry Andric bool plural;
784fe6060f1SDimitry Andric numerator_type = print_types[plevel];
785fe6060f1SDimitry Andric KMP_ASSERT_VALID_HW_TYPE(numerator_type);
786fe6060f1SDimitry Andric if (equivalent[numerator_type] != numerator_type)
787fe6060f1SDimitry Andric c = 1;
788fe6060f1SDimitry Andric else
789fe6060f1SDimitry Andric c = get_ratio(level++);
790fe6060f1SDimitry Andric plural = (c > 1);
791fe6060f1SDimitry Andric if (plevel == 0) {
792fe6060f1SDimitry Andric __kmp_str_buf_print(&buf, "%d %s", c,
793fe6060f1SDimitry Andric __kmp_hw_get_catalog_string(numerator_type, plural));
794fe6060f1SDimitry Andric } else {
795fe6060f1SDimitry Andric __kmp_str_buf_print(&buf, " x %d %s/%s", c,
796fe6060f1SDimitry Andric __kmp_hw_get_catalog_string(numerator_type, plural),
797fe6060f1SDimitry Andric __kmp_hw_get_catalog_string(denominator_type));
798fe6060f1SDimitry Andric }
799fe6060f1SDimitry Andric denominator_type = numerator_type;
800fe6060f1SDimitry Andric }
801fe6060f1SDimitry Andric KMP_INFORM(TopologyGeneric, env_var, buf.str, ncores);
802fe6060f1SDimitry Andric
8030eae32dcSDimitry Andric // Hybrid topology information
804349cc55cSDimitry Andric if (__kmp_is_hybrid_cpu()) {
8050eae32dcSDimitry Andric for (int i = 0; i < num_core_types; ++i) {
8060eae32dcSDimitry Andric kmp_hw_core_type_t core_type = core_types[i];
8070eae32dcSDimitry Andric kmp_hw_attr_t attr;
8080eae32dcSDimitry Andric attr.clear();
8090eae32dcSDimitry Andric attr.set_core_type(core_type);
8100eae32dcSDimitry Andric int ncores = get_ncores_with_attr(attr);
8110eae32dcSDimitry Andric if (ncores > 0) {
8120eae32dcSDimitry Andric KMP_INFORM(TopologyHybrid, env_var, ncores,
8130eae32dcSDimitry Andric __kmp_hw_get_core_type_string(core_type));
8140eae32dcSDimitry Andric KMP_ASSERT(num_core_efficiencies <= KMP_HW_MAX_NUM_CORE_EFFS)
8150eae32dcSDimitry Andric for (int eff = 0; eff < num_core_efficiencies; ++eff) {
8160eae32dcSDimitry Andric attr.set_core_eff(eff);
8170eae32dcSDimitry Andric int ncores_with_eff = get_ncores_with_attr(attr);
8180eae32dcSDimitry Andric if (ncores_with_eff > 0) {
8190eae32dcSDimitry Andric KMP_INFORM(TopologyHybridCoreEff, env_var, ncores_with_eff, eff);
8200eae32dcSDimitry Andric }
8210eae32dcSDimitry Andric }
8220eae32dcSDimitry Andric }
823349cc55cSDimitry Andric }
824349cc55cSDimitry Andric }
825349cc55cSDimitry Andric
826fe6060f1SDimitry Andric if (num_hw_threads <= 0) {
827fe6060f1SDimitry Andric __kmp_str_buf_free(&buf);
828fe6060f1SDimitry Andric return;
829fe6060f1SDimitry Andric }
830fe6060f1SDimitry Andric
831fe6060f1SDimitry Andric // Full OS proc to hardware thread map
832fe6060f1SDimitry Andric KMP_INFORM(OSProcToPhysicalThreadMap, env_var);
833fe6060f1SDimitry Andric for (int i = 0; i < num_hw_threads; i++) {
834fe6060f1SDimitry Andric __kmp_str_buf_clear(&buf);
835fe6060f1SDimitry Andric for (int level = 0; level < depth; ++level) {
836fe6060f1SDimitry Andric kmp_hw_t type = types[level];
837fe6060f1SDimitry Andric __kmp_str_buf_print(&buf, "%s ", __kmp_hw_get_catalog_string(type));
838fe6060f1SDimitry Andric __kmp_str_buf_print(&buf, "%d ", hw_threads[i].ids[level]);
839fe6060f1SDimitry Andric }
840349cc55cSDimitry Andric if (__kmp_is_hybrid_cpu())
841349cc55cSDimitry Andric __kmp_str_buf_print(
8420eae32dcSDimitry Andric &buf, "(%s)",
8430eae32dcSDimitry Andric __kmp_hw_get_core_type_string(hw_threads[i].attrs.get_core_type()));
844fe6060f1SDimitry Andric KMP_INFORM(OSProcMapToPack, env_var, hw_threads[i].os_id, buf.str);
845fe6060f1SDimitry Andric }
846fe6060f1SDimitry Andric
847fe6060f1SDimitry Andric __kmp_str_buf_free(&buf);
848fe6060f1SDimitry Andric }
849fe6060f1SDimitry Andric
850bdd1243dSDimitry Andric #if KMP_AFFINITY_SUPPORTED
set_granularity(kmp_affinity_t & affinity) const851bdd1243dSDimitry Andric void kmp_topology_t::set_granularity(kmp_affinity_t &affinity) const {
8525f757f3fSDimitry Andric const char *env_var = __kmp_get_affinity_env_var(affinity);
8535f757f3fSDimitry Andric // If requested hybrid CPU attributes for granularity (either OMP_PLACES or
8545f757f3fSDimitry Andric // KMP_AFFINITY), but none exist, then reset granularity and have below method
8555f757f3fSDimitry Andric // select a granularity and warn user.
8565f757f3fSDimitry Andric if (!__kmp_is_hybrid_cpu()) {
8575f757f3fSDimitry Andric if (affinity.core_attr_gran.valid) {
8585f757f3fSDimitry Andric // OMP_PLACES with cores:<attribute> but non-hybrid arch, use cores
8595f757f3fSDimitry Andric // instead
8605f757f3fSDimitry Andric KMP_AFF_WARNING(
8615f757f3fSDimitry Andric affinity, AffIgnoringNonHybrid, env_var,
8625f757f3fSDimitry Andric __kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true));
8635f757f3fSDimitry Andric affinity.gran = KMP_HW_CORE;
8645f757f3fSDimitry Andric affinity.gran_levels = -1;
8655f757f3fSDimitry Andric affinity.core_attr_gran = KMP_AFFINITY_ATTRS_UNKNOWN;
8665f757f3fSDimitry Andric affinity.flags.core_types_gran = affinity.flags.core_effs_gran = 0;
8675f757f3fSDimitry Andric } else if (affinity.flags.core_types_gran ||
8685f757f3fSDimitry Andric affinity.flags.core_effs_gran) {
8695f757f3fSDimitry Andric // OMP_PLACES=core_types|core_effs but non-hybrid, use cores instead
8705f757f3fSDimitry Andric if (affinity.flags.omp_places) {
8715f757f3fSDimitry Andric KMP_AFF_WARNING(
8725f757f3fSDimitry Andric affinity, AffIgnoringNonHybrid, env_var,
8735f757f3fSDimitry Andric __kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true));
8745f757f3fSDimitry Andric } else {
8755f757f3fSDimitry Andric // KMP_AFFINITY=granularity=core_type|core_eff,...
8765f757f3fSDimitry Andric KMP_AFF_WARNING(affinity, AffGranularityBad, env_var,
8775f757f3fSDimitry Andric "Intel(R) Hybrid Technology core attribute",
8785f757f3fSDimitry Andric __kmp_hw_get_catalog_string(KMP_HW_CORE));
8795f757f3fSDimitry Andric }
8805f757f3fSDimitry Andric affinity.gran = KMP_HW_CORE;
8815f757f3fSDimitry Andric affinity.gran_levels = -1;
8825f757f3fSDimitry Andric affinity.core_attr_gran = KMP_AFFINITY_ATTRS_UNKNOWN;
8835f757f3fSDimitry Andric affinity.flags.core_types_gran = affinity.flags.core_effs_gran = 0;
8845f757f3fSDimitry Andric }
8855f757f3fSDimitry Andric }
886bdd1243dSDimitry Andric // Set the number of affinity granularity levels
887bdd1243dSDimitry Andric if (affinity.gran_levels < 0) {
888bdd1243dSDimitry Andric kmp_hw_t gran_type = get_equivalent_type(affinity.gran);
889bdd1243dSDimitry Andric // Check if user's granularity request is valid
890bdd1243dSDimitry Andric if (gran_type == KMP_HW_UNKNOWN) {
891bdd1243dSDimitry Andric // First try core, then thread, then package
892bdd1243dSDimitry Andric kmp_hw_t gran_types[3] = {KMP_HW_CORE, KMP_HW_THREAD, KMP_HW_SOCKET};
893bdd1243dSDimitry Andric for (auto g : gran_types) {
894bdd1243dSDimitry Andric if (get_equivalent_type(g) != KMP_HW_UNKNOWN) {
895bdd1243dSDimitry Andric gran_type = g;
896bdd1243dSDimitry Andric break;
897bdd1243dSDimitry Andric }
898bdd1243dSDimitry Andric }
899bdd1243dSDimitry Andric KMP_ASSERT(gran_type != KMP_HW_UNKNOWN);
900bdd1243dSDimitry Andric // Warn user what granularity setting will be used instead
901bdd1243dSDimitry Andric KMP_AFF_WARNING(affinity, AffGranularityBad, env_var,
902bdd1243dSDimitry Andric __kmp_hw_get_catalog_string(affinity.gran),
903bdd1243dSDimitry Andric __kmp_hw_get_catalog_string(gran_type));
904bdd1243dSDimitry Andric affinity.gran = gran_type;
905bdd1243dSDimitry Andric }
906bdd1243dSDimitry Andric #if KMP_GROUP_AFFINITY
907bdd1243dSDimitry Andric // If more than one processor group exists, and the level of
908bdd1243dSDimitry Andric // granularity specified by the user is too coarse, then the
909bdd1243dSDimitry Andric // granularity must be adjusted "down" to processor group affinity
910bdd1243dSDimitry Andric // because threads can only exist within one processor group.
911bdd1243dSDimitry Andric // For example, if a user sets granularity=socket and there are two
912bdd1243dSDimitry Andric // processor groups that cover a socket, then the runtime must
913bdd1243dSDimitry Andric // restrict the granularity down to the processor group level.
914bdd1243dSDimitry Andric if (__kmp_num_proc_groups > 1) {
915bdd1243dSDimitry Andric int gran_depth = get_level(gran_type);
916bdd1243dSDimitry Andric int proc_group_depth = get_level(KMP_HW_PROC_GROUP);
917bdd1243dSDimitry Andric if (gran_depth >= 0 && proc_group_depth >= 0 &&
918bdd1243dSDimitry Andric gran_depth < proc_group_depth) {
919bdd1243dSDimitry Andric KMP_AFF_WARNING(affinity, AffGranTooCoarseProcGroup, env_var,
920bdd1243dSDimitry Andric __kmp_hw_get_catalog_string(affinity.gran));
921bdd1243dSDimitry Andric affinity.gran = gran_type = KMP_HW_PROC_GROUP;
922bdd1243dSDimitry Andric }
923bdd1243dSDimitry Andric }
924bdd1243dSDimitry Andric #endif
925bdd1243dSDimitry Andric affinity.gran_levels = 0;
926bdd1243dSDimitry Andric for (int i = depth - 1; i >= 0 && get_type(i) != gran_type; --i)
927bdd1243dSDimitry Andric affinity.gran_levels++;
928bdd1243dSDimitry Andric }
929bdd1243dSDimitry Andric }
930bdd1243dSDimitry Andric #endif
931bdd1243dSDimitry Andric
canonicalize()932fe6060f1SDimitry Andric void kmp_topology_t::canonicalize() {
933349cc55cSDimitry Andric #if KMP_GROUP_AFFINITY
934349cc55cSDimitry Andric _insert_windows_proc_groups();
935349cc55cSDimitry Andric #endif
936fe6060f1SDimitry Andric _remove_radix1_layers();
937fe6060f1SDimitry Andric _gather_enumeration_information();
938fe6060f1SDimitry Andric _discover_uniformity();
939fe6060f1SDimitry Andric _set_sub_ids();
940fe6060f1SDimitry Andric _set_globals();
941fe6060f1SDimitry Andric _set_last_level_cache();
942fe6060f1SDimitry Andric
943fe6060f1SDimitry Andric #if KMP_MIC_SUPPORTED
944fe6060f1SDimitry Andric // Manually Add L2 = Tile equivalence
945fe6060f1SDimitry Andric if (__kmp_mic_type == mic3) {
946fe6060f1SDimitry Andric if (get_level(KMP_HW_L2) != -1)
947fe6060f1SDimitry Andric set_equivalent_type(KMP_HW_TILE, KMP_HW_L2);
948fe6060f1SDimitry Andric else if (get_level(KMP_HW_TILE) != -1)
949fe6060f1SDimitry Andric set_equivalent_type(KMP_HW_L2, KMP_HW_TILE);
950fe6060f1SDimitry Andric }
951fe6060f1SDimitry Andric #endif
952fe6060f1SDimitry Andric
953fe6060f1SDimitry Andric // Perform post canonicalization checking
954fe6060f1SDimitry Andric KMP_ASSERT(depth > 0);
955fe6060f1SDimitry Andric for (int level = 0; level < depth; ++level) {
956fe6060f1SDimitry Andric // All counts, ratios, and types must be valid
957fe6060f1SDimitry Andric KMP_ASSERT(count[level] > 0 && ratio[level] > 0);
958fe6060f1SDimitry Andric KMP_ASSERT_VALID_HW_TYPE(types[level]);
959fe6060f1SDimitry Andric // Detected types must point to themselves
960fe6060f1SDimitry Andric KMP_ASSERT(equivalent[types[level]] == types[level]);
961fe6060f1SDimitry Andric }
962fe6060f1SDimitry Andric }
963fe6060f1SDimitry Andric
964fe6060f1SDimitry Andric // Canonicalize an explicit packages X cores/pkg X threads/core topology
canonicalize(int npackages,int ncores_per_pkg,int nthreads_per_core,int ncores)965fe6060f1SDimitry Andric void kmp_topology_t::canonicalize(int npackages, int ncores_per_pkg,
966fe6060f1SDimitry Andric int nthreads_per_core, int ncores) {
967fe6060f1SDimitry Andric int ndepth = 3;
968fe6060f1SDimitry Andric depth = ndepth;
969fe6060f1SDimitry Andric KMP_FOREACH_HW_TYPE(i) { equivalent[i] = KMP_HW_UNKNOWN; }
970fe6060f1SDimitry Andric for (int level = 0; level < depth; ++level) {
971fe6060f1SDimitry Andric count[level] = 0;
972fe6060f1SDimitry Andric ratio[level] = 0;
973fe6060f1SDimitry Andric }
974fe6060f1SDimitry Andric count[0] = npackages;
975fe6060f1SDimitry Andric count[1] = ncores;
976fe6060f1SDimitry Andric count[2] = __kmp_xproc;
977fe6060f1SDimitry Andric ratio[0] = npackages;
978fe6060f1SDimitry Andric ratio[1] = ncores_per_pkg;
979fe6060f1SDimitry Andric ratio[2] = nthreads_per_core;
980fe6060f1SDimitry Andric equivalent[KMP_HW_SOCKET] = KMP_HW_SOCKET;
981fe6060f1SDimitry Andric equivalent[KMP_HW_CORE] = KMP_HW_CORE;
982fe6060f1SDimitry Andric equivalent[KMP_HW_THREAD] = KMP_HW_THREAD;
983fe6060f1SDimitry Andric types[0] = KMP_HW_SOCKET;
984fe6060f1SDimitry Andric types[1] = KMP_HW_CORE;
985fe6060f1SDimitry Andric types[2] = KMP_HW_THREAD;
986fe6060f1SDimitry Andric //__kmp_avail_proc = __kmp_xproc;
987fe6060f1SDimitry Andric _discover_uniformity();
988fe6060f1SDimitry Andric }
989fe6060f1SDimitry Andric
9905f757f3fSDimitry Andric #if KMP_AFFINITY_SUPPORTED
9910eae32dcSDimitry Andric static kmp_str_buf_t *
__kmp_hw_get_catalog_core_string(const kmp_hw_attr_t & attr,kmp_str_buf_t * buf,bool plural)9920eae32dcSDimitry Andric __kmp_hw_get_catalog_core_string(const kmp_hw_attr_t &attr, kmp_str_buf_t *buf,
9930eae32dcSDimitry Andric bool plural) {
9940eae32dcSDimitry Andric __kmp_str_buf_init(buf);
9950eae32dcSDimitry Andric if (attr.is_core_type_valid())
9960eae32dcSDimitry Andric __kmp_str_buf_print(buf, "%s %s",
9970eae32dcSDimitry Andric __kmp_hw_get_core_type_string(attr.get_core_type()),
9980eae32dcSDimitry Andric __kmp_hw_get_catalog_string(KMP_HW_CORE, plural));
9990eae32dcSDimitry Andric else
10000eae32dcSDimitry Andric __kmp_str_buf_print(buf, "%s eff=%d",
10010eae32dcSDimitry Andric __kmp_hw_get_catalog_string(KMP_HW_CORE, plural),
10020eae32dcSDimitry Andric attr.get_core_eff());
10030eae32dcSDimitry Andric return buf;
10040eae32dcSDimitry Andric }
10050eae32dcSDimitry Andric
restrict_to_mask(const kmp_affin_mask_t * mask)10065f757f3fSDimitry Andric bool kmp_topology_t::restrict_to_mask(const kmp_affin_mask_t *mask) {
10075f757f3fSDimitry Andric // Apply the filter
10085f757f3fSDimitry Andric bool affected;
10095f757f3fSDimitry Andric int new_index = 0;
10105f757f3fSDimitry Andric for (int i = 0; i < num_hw_threads; ++i) {
10115f757f3fSDimitry Andric int os_id = hw_threads[i].os_id;
10125f757f3fSDimitry Andric if (KMP_CPU_ISSET(os_id, mask)) {
10135f757f3fSDimitry Andric if (i != new_index)
10145f757f3fSDimitry Andric hw_threads[new_index] = hw_threads[i];
10155f757f3fSDimitry Andric new_index++;
10165f757f3fSDimitry Andric } else {
10175f757f3fSDimitry Andric KMP_CPU_CLR(os_id, __kmp_affin_fullMask);
10185f757f3fSDimitry Andric __kmp_avail_proc--;
10195f757f3fSDimitry Andric }
10205f757f3fSDimitry Andric }
10215f757f3fSDimitry Andric
10225f757f3fSDimitry Andric KMP_DEBUG_ASSERT(new_index <= num_hw_threads);
10235f757f3fSDimitry Andric affected = (num_hw_threads != new_index);
10245f757f3fSDimitry Andric num_hw_threads = new_index;
10255f757f3fSDimitry Andric
10265f757f3fSDimitry Andric // Post hardware subset canonicalization
10275f757f3fSDimitry Andric if (affected) {
10285f757f3fSDimitry Andric _gather_enumeration_information();
10295f757f3fSDimitry Andric _discover_uniformity();
10305f757f3fSDimitry Andric _set_globals();
10315f757f3fSDimitry Andric _set_last_level_cache();
10325f757f3fSDimitry Andric #if KMP_OS_WINDOWS
10335f757f3fSDimitry Andric // Copy filtered full mask if topology has single processor group
10345f757f3fSDimitry Andric if (__kmp_num_proc_groups <= 1)
10355f757f3fSDimitry Andric #endif
10365f757f3fSDimitry Andric __kmp_affin_origMask->copy(__kmp_affin_fullMask);
10375f757f3fSDimitry Andric }
10385f757f3fSDimitry Andric return affected;
10395f757f3fSDimitry Andric }
10405f757f3fSDimitry Andric
1041fe6060f1SDimitry Andric // Apply the KMP_HW_SUBSET envirable to the topology
1042fe6060f1SDimitry Andric // Returns true if KMP_HW_SUBSET filtered any processors
1043fe6060f1SDimitry Andric // otherwise, returns false
filter_hw_subset()1044fe6060f1SDimitry Andric bool kmp_topology_t::filter_hw_subset() {
1045fe6060f1SDimitry Andric // If KMP_HW_SUBSET wasn't requested, then do nothing.
1046fe6060f1SDimitry Andric if (!__kmp_hw_subset)
1047fe6060f1SDimitry Andric return false;
1048fe6060f1SDimitry Andric
1049349cc55cSDimitry Andric // First, sort the KMP_HW_SUBSET items by the machine topology
1050349cc55cSDimitry Andric __kmp_hw_subset->sort();
1051349cc55cSDimitry Andric
1052*0fca6ea1SDimitry Andric __kmp_hw_subset->canonicalize(__kmp_topology);
1053*0fca6ea1SDimitry Andric
1054fe6060f1SDimitry Andric // Check to see if KMP_HW_SUBSET is a valid subset of the detected topology
10550eae32dcSDimitry Andric bool using_core_types = false;
10560eae32dcSDimitry Andric bool using_core_effs = false;
1057*0fca6ea1SDimitry Andric bool is_absolute = __kmp_hw_subset->is_absolute();
1058fe6060f1SDimitry Andric int hw_subset_depth = __kmp_hw_subset->get_depth();
1059fe6060f1SDimitry Andric kmp_hw_t specified[KMP_HW_LAST];
1060d56accc7SDimitry Andric int *topology_levels = (int *)KMP_ALLOCA(sizeof(int) * hw_subset_depth);
1061fe6060f1SDimitry Andric KMP_ASSERT(hw_subset_depth > 0);
1062fe6060f1SDimitry Andric KMP_FOREACH_HW_TYPE(i) { specified[i] = KMP_HW_UNKNOWN; }
10630eae32dcSDimitry Andric int core_level = get_level(KMP_HW_CORE);
1064fe6060f1SDimitry Andric for (int i = 0; i < hw_subset_depth; ++i) {
1065fe6060f1SDimitry Andric int max_count;
10660eae32dcSDimitry Andric const kmp_hw_subset_t::item_t &item = __kmp_hw_subset->at(i);
10670eae32dcSDimitry Andric int num = item.num[0];
10680eae32dcSDimitry Andric int offset = item.offset[0];
10690eae32dcSDimitry Andric kmp_hw_t type = item.type;
1070fe6060f1SDimitry Andric kmp_hw_t equivalent_type = equivalent[type];
1071fe6060f1SDimitry Andric int level = get_level(type);
10720eae32dcSDimitry Andric topology_levels[i] = level;
1073fe6060f1SDimitry Andric
1074fe6060f1SDimitry Andric // Check to see if current layer is in detected machine topology
1075fe6060f1SDimitry Andric if (equivalent_type != KMP_HW_UNKNOWN) {
1076fe6060f1SDimitry Andric __kmp_hw_subset->at(i).type = equivalent_type;
1077fe6060f1SDimitry Andric } else {
1078bdd1243dSDimitry Andric KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetNotExistGeneric,
1079fe6060f1SDimitry Andric __kmp_hw_get_catalog_string(type));
1080fe6060f1SDimitry Andric return false;
1081fe6060f1SDimitry Andric }
1082fe6060f1SDimitry Andric
10830eae32dcSDimitry Andric // Check to see if current layer has already been
10840eae32dcSDimitry Andric // specified either directly or through an equivalent type
1085fe6060f1SDimitry Andric if (specified[equivalent_type] != KMP_HW_UNKNOWN) {
1086bdd1243dSDimitry Andric KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetEqvLayers,
1087bdd1243dSDimitry Andric __kmp_hw_get_catalog_string(type),
1088fe6060f1SDimitry Andric __kmp_hw_get_catalog_string(specified[equivalent_type]));
1089fe6060f1SDimitry Andric return false;
1090fe6060f1SDimitry Andric }
1091fe6060f1SDimitry Andric specified[equivalent_type] = type;
1092fe6060f1SDimitry Andric
1093fe6060f1SDimitry Andric // Check to see if each layer's num & offset parameters are valid
1094fe6060f1SDimitry Andric max_count = get_ratio(level);
1095*0fca6ea1SDimitry Andric if (!is_absolute) {
10960eae32dcSDimitry Andric if (max_count < 0 ||
10970eae32dcSDimitry Andric (num != kmp_hw_subset_t::USE_ALL && num + offset > max_count)) {
1098fe6060f1SDimitry Andric bool plural = (num > 1);
1099bdd1243dSDimitry Andric KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetManyGeneric,
1100fe6060f1SDimitry Andric __kmp_hw_get_catalog_string(type, plural));
1101fe6060f1SDimitry Andric return false;
1102fe6060f1SDimitry Andric }
1103*0fca6ea1SDimitry Andric }
11040eae32dcSDimitry Andric
11050eae32dcSDimitry Andric // Check to see if core attributes are consistent
11060eae32dcSDimitry Andric if (core_level == level) {
11070eae32dcSDimitry Andric // Determine which core attributes are specified
11080eae32dcSDimitry Andric for (int j = 0; j < item.num_attrs; ++j) {
11090eae32dcSDimitry Andric if (item.attr[j].is_core_type_valid())
11100eae32dcSDimitry Andric using_core_types = true;
11110eae32dcSDimitry Andric if (item.attr[j].is_core_eff_valid())
11120eae32dcSDimitry Andric using_core_effs = true;
1113fe6060f1SDimitry Andric }
1114fe6060f1SDimitry Andric
11150eae32dcSDimitry Andric // Check if using a single core attribute on non-hybrid arch.
11160eae32dcSDimitry Andric // Do not ignore all of KMP_HW_SUBSET, just ignore the attribute.
11170eae32dcSDimitry Andric //
11180eae32dcSDimitry Andric // Check if using multiple core attributes on non-hyrbid arch.
11190eae32dcSDimitry Andric // Ignore all of KMP_HW_SUBSET if this is the case.
11200eae32dcSDimitry Andric if ((using_core_effs || using_core_types) && !__kmp_is_hybrid_cpu()) {
11210eae32dcSDimitry Andric if (item.num_attrs == 1) {
11220eae32dcSDimitry Andric if (using_core_effs) {
1123bdd1243dSDimitry Andric KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetIgnoringAttr,
1124bdd1243dSDimitry Andric "efficiency");
11250eae32dcSDimitry Andric } else {
1126bdd1243dSDimitry Andric KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetIgnoringAttr,
1127bdd1243dSDimitry Andric "core_type");
11280eae32dcSDimitry Andric }
11290eae32dcSDimitry Andric using_core_effs = false;
11300eae32dcSDimitry Andric using_core_types = false;
11310eae32dcSDimitry Andric } else {
1132bdd1243dSDimitry Andric KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetAttrsNonHybrid);
11330eae32dcSDimitry Andric return false;
11340eae32dcSDimitry Andric }
11350eae32dcSDimitry Andric }
11360eae32dcSDimitry Andric
11370eae32dcSDimitry Andric // Check if using both core types and core efficiencies together
11380eae32dcSDimitry Andric if (using_core_types && using_core_effs) {
1139bdd1243dSDimitry Andric KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetIncompat, "core_type",
1140bdd1243dSDimitry Andric "efficiency");
11410eae32dcSDimitry Andric return false;
11420eae32dcSDimitry Andric }
11430eae32dcSDimitry Andric
11440eae32dcSDimitry Andric // Check that core efficiency values are valid
11450eae32dcSDimitry Andric if (using_core_effs) {
11460eae32dcSDimitry Andric for (int j = 0; j < item.num_attrs; ++j) {
11470eae32dcSDimitry Andric if (item.attr[j].is_core_eff_valid()) {
11480eae32dcSDimitry Andric int core_eff = item.attr[j].get_core_eff();
11490eae32dcSDimitry Andric if (core_eff < 0 || core_eff >= num_core_efficiencies) {
11500eae32dcSDimitry Andric kmp_str_buf_t buf;
11510eae32dcSDimitry Andric __kmp_str_buf_init(&buf);
11520eae32dcSDimitry Andric __kmp_str_buf_print(&buf, "%d", item.attr[j].get_core_eff());
11530eae32dcSDimitry Andric __kmp_msg(kmp_ms_warning,
11540eae32dcSDimitry Andric KMP_MSG(AffHWSubsetAttrInvalid, "efficiency", buf.str),
11550eae32dcSDimitry Andric KMP_HNT(ValidValuesRange, 0, num_core_efficiencies - 1),
11560eae32dcSDimitry Andric __kmp_msg_null);
11570eae32dcSDimitry Andric __kmp_str_buf_free(&buf);
11580eae32dcSDimitry Andric return false;
11590eae32dcSDimitry Andric }
11600eae32dcSDimitry Andric }
11610eae32dcSDimitry Andric }
11620eae32dcSDimitry Andric }
11630eae32dcSDimitry Andric
11640eae32dcSDimitry Andric // Check that the number of requested cores with attributes is valid
1165*0fca6ea1SDimitry Andric if ((using_core_types || using_core_effs) && !is_absolute) {
11660eae32dcSDimitry Andric for (int j = 0; j < item.num_attrs; ++j) {
11670eae32dcSDimitry Andric int num = item.num[j];
11680eae32dcSDimitry Andric int offset = item.offset[j];
11690eae32dcSDimitry Andric int level_above = core_level - 1;
11700eae32dcSDimitry Andric if (level_above >= 0) {
11710eae32dcSDimitry Andric max_count = get_ncores_with_attr_per(item.attr[j], level_above);
11720eae32dcSDimitry Andric if (max_count <= 0 ||
11730eae32dcSDimitry Andric (num != kmp_hw_subset_t::USE_ALL && num + offset > max_count)) {
11740eae32dcSDimitry Andric kmp_str_buf_t buf;
11750eae32dcSDimitry Andric __kmp_hw_get_catalog_core_string(item.attr[j], &buf, num > 0);
1176bdd1243dSDimitry Andric KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetManyGeneric, buf.str);
11770eae32dcSDimitry Andric __kmp_str_buf_free(&buf);
11780eae32dcSDimitry Andric return false;
11790eae32dcSDimitry Andric }
11800eae32dcSDimitry Andric }
11810eae32dcSDimitry Andric }
11820eae32dcSDimitry Andric }
11830eae32dcSDimitry Andric
11840eae32dcSDimitry Andric if ((using_core_types || using_core_effs) && item.num_attrs > 1) {
11850eae32dcSDimitry Andric for (int j = 0; j < item.num_attrs; ++j) {
11860eae32dcSDimitry Andric // Ambiguous use of specific core attribute + generic core
11870eae32dcSDimitry Andric // e.g., 4c & 3c:intel_core or 4c & 3c:eff1
11880eae32dcSDimitry Andric if (!item.attr[j]) {
11890eae32dcSDimitry Andric kmp_hw_attr_t other_attr;
11900eae32dcSDimitry Andric for (int k = 0; k < item.num_attrs; ++k) {
11910eae32dcSDimitry Andric if (item.attr[k] != item.attr[j]) {
11920eae32dcSDimitry Andric other_attr = item.attr[k];
11930eae32dcSDimitry Andric break;
11940eae32dcSDimitry Andric }
11950eae32dcSDimitry Andric }
11960eae32dcSDimitry Andric kmp_str_buf_t buf;
11970eae32dcSDimitry Andric __kmp_hw_get_catalog_core_string(other_attr, &buf, item.num[j] > 0);
1198bdd1243dSDimitry Andric KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetIncompat,
11990eae32dcSDimitry Andric __kmp_hw_get_catalog_string(KMP_HW_CORE), buf.str);
12000eae32dcSDimitry Andric __kmp_str_buf_free(&buf);
12010eae32dcSDimitry Andric return false;
12020eae32dcSDimitry Andric }
12030eae32dcSDimitry Andric // Allow specifying a specific core type or core eff exactly once
12040eae32dcSDimitry Andric for (int k = 0; k < j; ++k) {
12050eae32dcSDimitry Andric if (!item.attr[j] || !item.attr[k])
12060eae32dcSDimitry Andric continue;
12070eae32dcSDimitry Andric if (item.attr[k] == item.attr[j]) {
12080eae32dcSDimitry Andric kmp_str_buf_t buf;
12090eae32dcSDimitry Andric __kmp_hw_get_catalog_core_string(item.attr[j], &buf,
12100eae32dcSDimitry Andric item.num[j] > 0);
1211bdd1243dSDimitry Andric KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetAttrRepeat, buf.str);
12120eae32dcSDimitry Andric __kmp_str_buf_free(&buf);
12130eae32dcSDimitry Andric return false;
12140eae32dcSDimitry Andric }
12150eae32dcSDimitry Andric }
12160eae32dcSDimitry Andric }
12170eae32dcSDimitry Andric }
12180eae32dcSDimitry Andric }
12190eae32dcSDimitry Andric }
12200eae32dcSDimitry Andric
1221*0fca6ea1SDimitry Andric // For keeping track of sub_ids for an absolute KMP_HW_SUBSET
1222*0fca6ea1SDimitry Andric // or core attributes (core type or efficiency)
1223*0fca6ea1SDimitry Andric int prev_sub_ids[KMP_HW_LAST];
1224*0fca6ea1SDimitry Andric int abs_sub_ids[KMP_HW_LAST];
1225*0fca6ea1SDimitry Andric int core_eff_sub_ids[KMP_HW_MAX_NUM_CORE_EFFS];
1226*0fca6ea1SDimitry Andric int core_type_sub_ids[KMP_HW_MAX_NUM_CORE_TYPES];
1227*0fca6ea1SDimitry Andric for (size_t i = 0; i < KMP_HW_LAST; ++i) {
1228*0fca6ea1SDimitry Andric abs_sub_ids[i] = -1;
1229*0fca6ea1SDimitry Andric prev_sub_ids[i] = -1;
1230*0fca6ea1SDimitry Andric }
1231*0fca6ea1SDimitry Andric for (size_t i = 0; i < KMP_HW_MAX_NUM_CORE_EFFS; ++i)
1232*0fca6ea1SDimitry Andric core_eff_sub_ids[i] = -1;
1233*0fca6ea1SDimitry Andric for (size_t i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i)
1234*0fca6ea1SDimitry Andric core_type_sub_ids[i] = -1;
1235*0fca6ea1SDimitry Andric
1236*0fca6ea1SDimitry Andric // Determine which hardware threads should be filtered.
1237*0fca6ea1SDimitry Andric
1238*0fca6ea1SDimitry Andric // Helpful to determine if a topology layer is targeted by an absolute subset
1239*0fca6ea1SDimitry Andric auto is_targeted = [&](int level) {
1240*0fca6ea1SDimitry Andric if (is_absolute) {
1241*0fca6ea1SDimitry Andric for (int i = 0; i < hw_subset_depth; ++i)
1242*0fca6ea1SDimitry Andric if (topology_levels[i] == level)
1243*0fca6ea1SDimitry Andric return true;
1244*0fca6ea1SDimitry Andric return false;
1245*0fca6ea1SDimitry Andric }
1246*0fca6ea1SDimitry Andric // If not absolute KMP_HW_SUBSET, then every layer is seen as targeted
1247*0fca6ea1SDimitry Andric return true;
1248*0fca6ea1SDimitry Andric };
1249*0fca6ea1SDimitry Andric
1250*0fca6ea1SDimitry Andric // Helpful to index into core type sub Ids array
1251*0fca6ea1SDimitry Andric auto get_core_type_index = [](const kmp_hw_thread_t &t) {
12520eae32dcSDimitry Andric switch (t.attrs.get_core_type()) {
12537a6dacacSDimitry Andric case KMP_HW_CORE_TYPE_UNKNOWN:
12547a6dacacSDimitry Andric case KMP_HW_MAX_NUM_CORE_TYPES:
12557a6dacacSDimitry Andric return 0;
12560eae32dcSDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
12570eae32dcSDimitry Andric case KMP_HW_CORE_TYPE_ATOM:
12580eae32dcSDimitry Andric return 1;
12590eae32dcSDimitry Andric case KMP_HW_CORE_TYPE_CORE:
12600eae32dcSDimitry Andric return 2;
12610eae32dcSDimitry Andric #endif
12620eae32dcSDimitry Andric }
12637a6dacacSDimitry Andric KMP_ASSERT2(false, "Unhandled kmp_hw_thread_t enumeration");
12647a6dacacSDimitry Andric KMP_BUILTIN_UNREACHABLE;
12650eae32dcSDimitry Andric };
1266*0fca6ea1SDimitry Andric
1267*0fca6ea1SDimitry Andric // Helpful to index into core efficiencies sub Ids array
1268*0fca6ea1SDimitry Andric auto get_core_eff_index = [](const kmp_hw_thread_t &t) {
12690eae32dcSDimitry Andric return t.attrs.get_core_eff();
12700eae32dcSDimitry Andric };
12710eae32dcSDimitry Andric
12720eae32dcSDimitry Andric int num_filtered = 0;
12735f757f3fSDimitry Andric kmp_affin_mask_t *filtered_mask;
12745f757f3fSDimitry Andric KMP_CPU_ALLOC(filtered_mask);
12755f757f3fSDimitry Andric KMP_CPU_COPY(filtered_mask, __kmp_affin_fullMask);
1276fe6060f1SDimitry Andric for (int i = 0; i < num_hw_threads; ++i) {
1277fe6060f1SDimitry Andric kmp_hw_thread_t &hw_thread = hw_threads[i];
1278*0fca6ea1SDimitry Andric
1279*0fca6ea1SDimitry Andric // Figure out the absolute sub ids and core eff/type sub ids
1280*0fca6ea1SDimitry Andric if (is_absolute || using_core_effs || using_core_types) {
1281*0fca6ea1SDimitry Andric for (int level = 0; level < get_depth(); ++level) {
1282*0fca6ea1SDimitry Andric if (hw_thread.sub_ids[level] != prev_sub_ids[level]) {
1283*0fca6ea1SDimitry Andric bool found_targeted = false;
1284*0fca6ea1SDimitry Andric for (int j = level; j < get_depth(); ++j) {
1285*0fca6ea1SDimitry Andric bool targeted = is_targeted(j);
1286*0fca6ea1SDimitry Andric if (!found_targeted && targeted) {
1287*0fca6ea1SDimitry Andric found_targeted = true;
1288*0fca6ea1SDimitry Andric abs_sub_ids[j]++;
1289*0fca6ea1SDimitry Andric if (j == core_level && using_core_effs)
1290*0fca6ea1SDimitry Andric core_eff_sub_ids[get_core_eff_index(hw_thread)]++;
1291*0fca6ea1SDimitry Andric if (j == core_level && using_core_types)
1292*0fca6ea1SDimitry Andric core_type_sub_ids[get_core_type_index(hw_thread)]++;
1293*0fca6ea1SDimitry Andric } else if (targeted) {
1294*0fca6ea1SDimitry Andric abs_sub_ids[j] = 0;
1295*0fca6ea1SDimitry Andric if (j == core_level && using_core_effs)
1296*0fca6ea1SDimitry Andric core_eff_sub_ids[get_core_eff_index(hw_thread)] = 0;
1297*0fca6ea1SDimitry Andric if (j == core_level && using_core_types)
1298*0fca6ea1SDimitry Andric core_type_sub_ids[get_core_type_index(hw_thread)] = 0;
1299*0fca6ea1SDimitry Andric }
1300*0fca6ea1SDimitry Andric }
1301*0fca6ea1SDimitry Andric break;
1302*0fca6ea1SDimitry Andric }
1303*0fca6ea1SDimitry Andric }
1304*0fca6ea1SDimitry Andric for (int level = 0; level < get_depth(); ++level)
1305*0fca6ea1SDimitry Andric prev_sub_ids[level] = hw_thread.sub_ids[level];
1306*0fca6ea1SDimitry Andric }
13070eae32dcSDimitry Andric
1308fe6060f1SDimitry Andric // Check to see if this hardware thread should be filtered
1309fe6060f1SDimitry Andric bool should_be_filtered = false;
13100eae32dcSDimitry Andric for (int hw_subset_index = 0; hw_subset_index < hw_subset_depth;
13110eae32dcSDimitry Andric ++hw_subset_index) {
13120eae32dcSDimitry Andric const auto &hw_subset_item = __kmp_hw_subset->at(hw_subset_index);
13130eae32dcSDimitry Andric int level = topology_levels[hw_subset_index];
13140eae32dcSDimitry Andric if (level == -1)
1315fe6060f1SDimitry Andric continue;
13160eae32dcSDimitry Andric if ((using_core_effs || using_core_types) && level == core_level) {
13170eae32dcSDimitry Andric // Look for the core attribute in KMP_HW_SUBSET which corresponds
13180eae32dcSDimitry Andric // to this hardware thread's core attribute. Use this num,offset plus
13190eae32dcSDimitry Andric // the running sub_id for the particular core attribute of this hardware
13200eae32dcSDimitry Andric // thread to determine if the hardware thread should be filtered or not.
13210eae32dcSDimitry Andric int attr_idx;
13220eae32dcSDimitry Andric kmp_hw_core_type_t core_type = hw_thread.attrs.get_core_type();
13230eae32dcSDimitry Andric int core_eff = hw_thread.attrs.get_core_eff();
13240eae32dcSDimitry Andric for (attr_idx = 0; attr_idx < hw_subset_item.num_attrs; ++attr_idx) {
13250eae32dcSDimitry Andric if (using_core_types &&
13260eae32dcSDimitry Andric hw_subset_item.attr[attr_idx].get_core_type() == core_type)
13270eae32dcSDimitry Andric break;
13280eae32dcSDimitry Andric if (using_core_effs &&
13290eae32dcSDimitry Andric hw_subset_item.attr[attr_idx].get_core_eff() == core_eff)
13300eae32dcSDimitry Andric break;
13310eae32dcSDimitry Andric }
13320eae32dcSDimitry Andric // This core attribute isn't in the KMP_HW_SUBSET so always filter it.
13330eae32dcSDimitry Andric if (attr_idx == hw_subset_item.num_attrs) {
13340eae32dcSDimitry Andric should_be_filtered = true;
13350eae32dcSDimitry Andric break;
13360eae32dcSDimitry Andric }
13370eae32dcSDimitry Andric int sub_id;
13380eae32dcSDimitry Andric int num = hw_subset_item.num[attr_idx];
13390eae32dcSDimitry Andric int offset = hw_subset_item.offset[attr_idx];
13400eae32dcSDimitry Andric if (using_core_types)
1341*0fca6ea1SDimitry Andric sub_id = core_type_sub_ids[get_core_type_index(hw_thread)];
13420eae32dcSDimitry Andric else
1343*0fca6ea1SDimitry Andric sub_id = core_eff_sub_ids[get_core_eff_index(hw_thread)];
13440eae32dcSDimitry Andric if (sub_id < offset ||
13450eae32dcSDimitry Andric (num != kmp_hw_subset_t::USE_ALL && sub_id >= offset + num)) {
13460eae32dcSDimitry Andric should_be_filtered = true;
13470eae32dcSDimitry Andric break;
13480eae32dcSDimitry Andric }
13490eae32dcSDimitry Andric } else {
1350*0fca6ea1SDimitry Andric int sub_id;
13510eae32dcSDimitry Andric int num = hw_subset_item.num[0];
13520eae32dcSDimitry Andric int offset = hw_subset_item.offset[0];
1353*0fca6ea1SDimitry Andric if (is_absolute)
1354*0fca6ea1SDimitry Andric sub_id = abs_sub_ids[level];
1355*0fca6ea1SDimitry Andric else
1356*0fca6ea1SDimitry Andric sub_id = hw_thread.sub_ids[level];
1357*0fca6ea1SDimitry Andric if (sub_id < offset ||
1358*0fca6ea1SDimitry Andric (num != kmp_hw_subset_t::USE_ALL && sub_id >= offset + num)) {
1359fe6060f1SDimitry Andric should_be_filtered = true;
1360fe6060f1SDimitry Andric break;
1361fe6060f1SDimitry Andric }
1362fe6060f1SDimitry Andric }
13630eae32dcSDimitry Andric }
13640eae32dcSDimitry Andric // Collect filtering information
13655f757f3fSDimitry Andric if (should_be_filtered) {
13665f757f3fSDimitry Andric KMP_CPU_CLR(hw_thread.os_id, filtered_mask);
13670eae32dcSDimitry Andric num_filtered++;
13680eae32dcSDimitry Andric }
13695f757f3fSDimitry Andric }
13700eae32dcSDimitry Andric
13710eae32dcSDimitry Andric // One last check that we shouldn't allow filtering entire machine
13720eae32dcSDimitry Andric if (num_filtered == num_hw_threads) {
1373bdd1243dSDimitry Andric KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetAllFiltered);
13740eae32dcSDimitry Andric return false;
13750eae32dcSDimitry Andric }
13760eae32dcSDimitry Andric
13770eae32dcSDimitry Andric // Apply the filter
13785f757f3fSDimitry Andric restrict_to_mask(filtered_mask);
1379fe6060f1SDimitry Andric return true;
1380fe6060f1SDimitry Andric }
1381fe6060f1SDimitry Andric
is_close(int hwt1,int hwt2,const kmp_affinity_t & stgs) const13825f757f3fSDimitry Andric bool kmp_topology_t::is_close(int hwt1, int hwt2,
13835f757f3fSDimitry Andric const kmp_affinity_t &stgs) const {
13845f757f3fSDimitry Andric int hw_level = stgs.gran_levels;
1385fe6060f1SDimitry Andric if (hw_level >= depth)
1386fe6060f1SDimitry Andric return true;
1387fe6060f1SDimitry Andric bool retval = true;
1388fe6060f1SDimitry Andric const kmp_hw_thread_t &t1 = hw_threads[hwt1];
1389fe6060f1SDimitry Andric const kmp_hw_thread_t &t2 = hw_threads[hwt2];
13905f757f3fSDimitry Andric if (stgs.flags.core_types_gran)
13915f757f3fSDimitry Andric return t1.attrs.get_core_type() == t2.attrs.get_core_type();
13925f757f3fSDimitry Andric if (stgs.flags.core_effs_gran)
13935f757f3fSDimitry Andric return t1.attrs.get_core_eff() == t2.attrs.get_core_eff();
1394fe6060f1SDimitry Andric for (int i = 0; i < (depth - hw_level); ++i) {
1395fe6060f1SDimitry Andric if (t1.ids[i] != t2.ids[i])
1396fe6060f1SDimitry Andric return false;
1397fe6060f1SDimitry Andric }
1398fe6060f1SDimitry Andric return retval;
1399fe6060f1SDimitry Andric }
1400fe6060f1SDimitry Andric
1401fe6060f1SDimitry Andric ////////////////////////////////////////////////////////////////////////////////
1402fe6060f1SDimitry Andric
14030b57cec5SDimitry Andric bool KMPAffinity::picked_api = false;
14040b57cec5SDimitry Andric
operator new(size_t n)14050b57cec5SDimitry Andric void *KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); }
operator new[](size_t n)14060b57cec5SDimitry Andric void *KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n); }
operator delete(void * p)14070b57cec5SDimitry Andric void KMPAffinity::Mask::operator delete(void *p) { __kmp_free(p); }
operator delete[](void * p)14080b57cec5SDimitry Andric void KMPAffinity::Mask::operator delete[](void *p) { __kmp_free(p); }
operator new(size_t n)14090b57cec5SDimitry Andric void *KMPAffinity::operator new(size_t n) { return __kmp_allocate(n); }
operator delete(void * p)14100b57cec5SDimitry Andric void KMPAffinity::operator delete(void *p) { __kmp_free(p); }
14110b57cec5SDimitry Andric
pick_api()14120b57cec5SDimitry Andric void KMPAffinity::pick_api() {
14130b57cec5SDimitry Andric KMPAffinity *affinity_dispatch;
14140b57cec5SDimitry Andric if (picked_api)
14150b57cec5SDimitry Andric return;
14160b57cec5SDimitry Andric #if KMP_USE_HWLOC
14170b57cec5SDimitry Andric // Only use Hwloc if affinity isn't explicitly disabled and
14180b57cec5SDimitry Andric // user requests Hwloc topology method
14190b57cec5SDimitry Andric if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
1420bdd1243dSDimitry Andric __kmp_affinity.type != affinity_disabled) {
14210b57cec5SDimitry Andric affinity_dispatch = new KMPHwlocAffinity();
14220b57cec5SDimitry Andric } else
14230b57cec5SDimitry Andric #endif
14240b57cec5SDimitry Andric {
14250b57cec5SDimitry Andric affinity_dispatch = new KMPNativeAffinity();
14260b57cec5SDimitry Andric }
14270b57cec5SDimitry Andric __kmp_affinity_dispatch = affinity_dispatch;
14280b57cec5SDimitry Andric picked_api = true;
14290b57cec5SDimitry Andric }
14300b57cec5SDimitry Andric
destroy_api()14310b57cec5SDimitry Andric void KMPAffinity::destroy_api() {
14320b57cec5SDimitry Andric if (__kmp_affinity_dispatch != NULL) {
14330b57cec5SDimitry Andric delete __kmp_affinity_dispatch;
14340b57cec5SDimitry Andric __kmp_affinity_dispatch = NULL;
14350b57cec5SDimitry Andric picked_api = false;
14360b57cec5SDimitry Andric }
14370b57cec5SDimitry Andric }
14380b57cec5SDimitry Andric
14390b57cec5SDimitry Andric #define KMP_ADVANCE_SCAN(scan) \
14400b57cec5SDimitry Andric while (*scan != '\0') { \
14410b57cec5SDimitry Andric scan++; \
14420b57cec5SDimitry Andric }
14430b57cec5SDimitry Andric
14440b57cec5SDimitry Andric // Print the affinity mask to the character array in a pretty format.
14450b57cec5SDimitry Andric // The format is a comma separated list of non-negative integers or integer
14460b57cec5SDimitry Andric // ranges: e.g., 1,2,3-5,7,9-15
14470b57cec5SDimitry Andric // The format can also be the string "{<empty>}" if no bits are set in mask
__kmp_affinity_print_mask(char * buf,int buf_len,kmp_affin_mask_t * mask)14480b57cec5SDimitry Andric char *__kmp_affinity_print_mask(char *buf, int buf_len,
14490b57cec5SDimitry Andric kmp_affin_mask_t *mask) {
14500b57cec5SDimitry Andric int start = 0, finish = 0, previous = 0;
14510b57cec5SDimitry Andric bool first_range;
14520b57cec5SDimitry Andric KMP_ASSERT(buf);
14530b57cec5SDimitry Andric KMP_ASSERT(buf_len >= 40);
14540b57cec5SDimitry Andric KMP_ASSERT(mask);
14550b57cec5SDimitry Andric char *scan = buf;
14560b57cec5SDimitry Andric char *end = buf + buf_len - 1;
14570b57cec5SDimitry Andric
14580b57cec5SDimitry Andric // Check for empty set.
14590b57cec5SDimitry Andric if (mask->begin() == mask->end()) {
14600b57cec5SDimitry Andric KMP_SNPRINTF(scan, end - scan + 1, "{<empty>}");
14610b57cec5SDimitry Andric KMP_ADVANCE_SCAN(scan);
14620b57cec5SDimitry Andric KMP_ASSERT(scan <= end);
14630b57cec5SDimitry Andric return buf;
14640b57cec5SDimitry Andric }
14650b57cec5SDimitry Andric
14660b57cec5SDimitry Andric first_range = true;
14670b57cec5SDimitry Andric start = mask->begin();
14680b57cec5SDimitry Andric while (1) {
14690b57cec5SDimitry Andric // Find next range
14700b57cec5SDimitry Andric // [start, previous] is inclusive range of contiguous bits in mask
14710b57cec5SDimitry Andric for (finish = mask->next(start), previous = start;
14720b57cec5SDimitry Andric finish == previous + 1 && finish != mask->end();
14730b57cec5SDimitry Andric finish = mask->next(finish)) {
14740b57cec5SDimitry Andric previous = finish;
14750b57cec5SDimitry Andric }
14760b57cec5SDimitry Andric
14770b57cec5SDimitry Andric // The first range does not need a comma printed before it, but the rest
14780b57cec5SDimitry Andric // of the ranges do need a comma beforehand
14790b57cec5SDimitry Andric if (!first_range) {
14800b57cec5SDimitry Andric KMP_SNPRINTF(scan, end - scan + 1, "%s", ",");
14810b57cec5SDimitry Andric KMP_ADVANCE_SCAN(scan);
14820b57cec5SDimitry Andric } else {
14830b57cec5SDimitry Andric first_range = false;
14840b57cec5SDimitry Andric }
14850b57cec5SDimitry Andric // Range with three or more contiguous bits in the affinity mask
14860b57cec5SDimitry Andric if (previous - start > 1) {
1487e8d8bef9SDimitry Andric KMP_SNPRINTF(scan, end - scan + 1, "%u-%u", start, previous);
14880b57cec5SDimitry Andric } else {
14890b57cec5SDimitry Andric // Range with one or two contiguous bits in the affinity mask
1490e8d8bef9SDimitry Andric KMP_SNPRINTF(scan, end - scan + 1, "%u", start);
14910b57cec5SDimitry Andric KMP_ADVANCE_SCAN(scan);
14920b57cec5SDimitry Andric if (previous - start > 0) {
1493e8d8bef9SDimitry Andric KMP_SNPRINTF(scan, end - scan + 1, ",%u", previous);
14940b57cec5SDimitry Andric }
14950b57cec5SDimitry Andric }
14960b57cec5SDimitry Andric KMP_ADVANCE_SCAN(scan);
14970b57cec5SDimitry Andric // Start over with new start point
14980b57cec5SDimitry Andric start = finish;
14990b57cec5SDimitry Andric if (start == mask->end())
15000b57cec5SDimitry Andric break;
15010b57cec5SDimitry Andric // Check for overflow
15020b57cec5SDimitry Andric if (end - scan < 2)
15030b57cec5SDimitry Andric break;
15040b57cec5SDimitry Andric }
15050b57cec5SDimitry Andric
15060b57cec5SDimitry Andric // Check for overflow
15070b57cec5SDimitry Andric KMP_ASSERT(scan <= end);
15080b57cec5SDimitry Andric return buf;
15090b57cec5SDimitry Andric }
15100b57cec5SDimitry Andric #undef KMP_ADVANCE_SCAN
15110b57cec5SDimitry Andric
15120b57cec5SDimitry Andric // Print the affinity mask to the string buffer object in a pretty format
15130b57cec5SDimitry Andric // The format is a comma separated list of non-negative integers or integer
15140b57cec5SDimitry Andric // ranges: e.g., 1,2,3-5,7,9-15
15150b57cec5SDimitry Andric // The format can also be the string "{<empty>}" if no bits are set in mask
__kmp_affinity_str_buf_mask(kmp_str_buf_t * buf,kmp_affin_mask_t * mask)15160b57cec5SDimitry Andric kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
15170b57cec5SDimitry Andric kmp_affin_mask_t *mask) {
15180b57cec5SDimitry Andric int start = 0, finish = 0, previous = 0;
15190b57cec5SDimitry Andric bool first_range;
15200b57cec5SDimitry Andric KMP_ASSERT(buf);
15210b57cec5SDimitry Andric KMP_ASSERT(mask);
15220b57cec5SDimitry Andric
15230b57cec5SDimitry Andric __kmp_str_buf_clear(buf);
15240b57cec5SDimitry Andric
15250b57cec5SDimitry Andric // Check for empty set.
15260b57cec5SDimitry Andric if (mask->begin() == mask->end()) {
15270b57cec5SDimitry Andric __kmp_str_buf_print(buf, "%s", "{<empty>}");
15280b57cec5SDimitry Andric return buf;
15290b57cec5SDimitry Andric }
15300b57cec5SDimitry Andric
15310b57cec5SDimitry Andric first_range = true;
15320b57cec5SDimitry Andric start = mask->begin();
15330b57cec5SDimitry Andric while (1) {
15340b57cec5SDimitry Andric // Find next range
15350b57cec5SDimitry Andric // [start, previous] is inclusive range of contiguous bits in mask
15360b57cec5SDimitry Andric for (finish = mask->next(start), previous = start;
15370b57cec5SDimitry Andric finish == previous + 1 && finish != mask->end();
15380b57cec5SDimitry Andric finish = mask->next(finish)) {
15390b57cec5SDimitry Andric previous = finish;
15400b57cec5SDimitry Andric }
15410b57cec5SDimitry Andric
15420b57cec5SDimitry Andric // The first range does not need a comma printed before it, but the rest
15430b57cec5SDimitry Andric // of the ranges do need a comma beforehand
15440b57cec5SDimitry Andric if (!first_range) {
15450b57cec5SDimitry Andric __kmp_str_buf_print(buf, "%s", ",");
15460b57cec5SDimitry Andric } else {
15470b57cec5SDimitry Andric first_range = false;
15480b57cec5SDimitry Andric }
15490b57cec5SDimitry Andric // Range with three or more contiguous bits in the affinity mask
15500b57cec5SDimitry Andric if (previous - start > 1) {
1551e8d8bef9SDimitry Andric __kmp_str_buf_print(buf, "%u-%u", start, previous);
15520b57cec5SDimitry Andric } else {
15530b57cec5SDimitry Andric // Range with one or two contiguous bits in the affinity mask
1554e8d8bef9SDimitry Andric __kmp_str_buf_print(buf, "%u", start);
15550b57cec5SDimitry Andric if (previous - start > 0) {
1556e8d8bef9SDimitry Andric __kmp_str_buf_print(buf, ",%u", previous);
15570b57cec5SDimitry Andric }
15580b57cec5SDimitry Andric }
15590b57cec5SDimitry Andric // Start over with new start point
15600b57cec5SDimitry Andric start = finish;
15610b57cec5SDimitry Andric if (start == mask->end())
15620b57cec5SDimitry Andric break;
15630b57cec5SDimitry Andric }
15640b57cec5SDimitry Andric return buf;
15650b57cec5SDimitry Andric }
15660b57cec5SDimitry Andric
1567349cc55cSDimitry Andric // Return (possibly empty) affinity mask representing the offline CPUs
1568349cc55cSDimitry Andric // Caller must free the mask
__kmp_affinity_get_offline_cpus()1569349cc55cSDimitry Andric kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
1570349cc55cSDimitry Andric kmp_affin_mask_t *offline;
1571349cc55cSDimitry Andric KMP_CPU_ALLOC(offline);
1572349cc55cSDimitry Andric KMP_CPU_ZERO(offline);
1573349cc55cSDimitry Andric #if KMP_OS_LINUX
1574349cc55cSDimitry Andric int n, begin_cpu, end_cpu;
1575349cc55cSDimitry Andric kmp_safe_raii_file_t offline_file;
1576349cc55cSDimitry Andric auto skip_ws = [](FILE *f) {
1577349cc55cSDimitry Andric int c;
1578349cc55cSDimitry Andric do {
1579349cc55cSDimitry Andric c = fgetc(f);
1580349cc55cSDimitry Andric } while (isspace(c));
1581349cc55cSDimitry Andric if (c != EOF)
1582349cc55cSDimitry Andric ungetc(c, f);
1583349cc55cSDimitry Andric };
1584349cc55cSDimitry Andric // File contains CSV of integer ranges representing the offline CPUs
1585349cc55cSDimitry Andric // e.g., 1,2,4-7,9,11-15
1586349cc55cSDimitry Andric int status = offline_file.try_open("/sys/devices/system/cpu/offline", "r");
1587349cc55cSDimitry Andric if (status != 0)
1588349cc55cSDimitry Andric return offline;
1589349cc55cSDimitry Andric while (!feof(offline_file)) {
1590349cc55cSDimitry Andric skip_ws(offline_file);
1591349cc55cSDimitry Andric n = fscanf(offline_file, "%d", &begin_cpu);
1592349cc55cSDimitry Andric if (n != 1)
1593349cc55cSDimitry Andric break;
1594349cc55cSDimitry Andric skip_ws(offline_file);
1595349cc55cSDimitry Andric int c = fgetc(offline_file);
1596349cc55cSDimitry Andric if (c == EOF || c == ',') {
1597349cc55cSDimitry Andric // Just single CPU
1598349cc55cSDimitry Andric end_cpu = begin_cpu;
1599349cc55cSDimitry Andric } else if (c == '-') {
1600349cc55cSDimitry Andric // Range of CPUs
1601349cc55cSDimitry Andric skip_ws(offline_file);
1602349cc55cSDimitry Andric n = fscanf(offline_file, "%d", &end_cpu);
1603349cc55cSDimitry Andric if (n != 1)
1604349cc55cSDimitry Andric break;
1605349cc55cSDimitry Andric skip_ws(offline_file);
1606349cc55cSDimitry Andric c = fgetc(offline_file); // skip ','
1607349cc55cSDimitry Andric } else {
1608349cc55cSDimitry Andric // Syntax problem
1609349cc55cSDimitry Andric break;
1610349cc55cSDimitry Andric }
1611349cc55cSDimitry Andric // Ensure a valid range of CPUs
1612349cc55cSDimitry Andric if (begin_cpu < 0 || begin_cpu >= __kmp_xproc || end_cpu < 0 ||
1613349cc55cSDimitry Andric end_cpu >= __kmp_xproc || begin_cpu > end_cpu) {
1614349cc55cSDimitry Andric continue;
1615349cc55cSDimitry Andric }
1616349cc55cSDimitry Andric // Insert [begin_cpu, end_cpu] into offline mask
1617349cc55cSDimitry Andric for (int cpu = begin_cpu; cpu <= end_cpu; ++cpu) {
1618349cc55cSDimitry Andric KMP_CPU_SET(cpu, offline);
1619349cc55cSDimitry Andric }
1620349cc55cSDimitry Andric }
1621349cc55cSDimitry Andric #endif
1622349cc55cSDimitry Andric return offline;
1623349cc55cSDimitry Andric }
1624349cc55cSDimitry Andric
1625349cc55cSDimitry Andric // Return the number of available procs
__kmp_affinity_entire_machine_mask(kmp_affin_mask_t * mask)1626349cc55cSDimitry Andric int __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
1627349cc55cSDimitry Andric int avail_proc = 0;
16280b57cec5SDimitry Andric KMP_CPU_ZERO(mask);
16290b57cec5SDimitry Andric
16300b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
16310b57cec5SDimitry Andric
16320b57cec5SDimitry Andric if (__kmp_num_proc_groups > 1) {
16330b57cec5SDimitry Andric int group;
16340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
16350b57cec5SDimitry Andric for (group = 0; group < __kmp_num_proc_groups; group++) {
16360b57cec5SDimitry Andric int i;
16370b57cec5SDimitry Andric int num = __kmp_GetActiveProcessorCount(group);
16380b57cec5SDimitry Andric for (i = 0; i < num; i++) {
16390b57cec5SDimitry Andric KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
1640349cc55cSDimitry Andric avail_proc++;
16410b57cec5SDimitry Andric }
16420b57cec5SDimitry Andric }
16430b57cec5SDimitry Andric } else
16440b57cec5SDimitry Andric
16450b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */
16460b57cec5SDimitry Andric
16470b57cec5SDimitry Andric {
16480b57cec5SDimitry Andric int proc;
1649349cc55cSDimitry Andric kmp_affin_mask_t *offline_cpus = __kmp_affinity_get_offline_cpus();
16500b57cec5SDimitry Andric for (proc = 0; proc < __kmp_xproc; proc++) {
1651349cc55cSDimitry Andric // Skip offline CPUs
1652349cc55cSDimitry Andric if (KMP_CPU_ISSET(proc, offline_cpus))
1653349cc55cSDimitry Andric continue;
16540b57cec5SDimitry Andric KMP_CPU_SET(proc, mask);
1655349cc55cSDimitry Andric avail_proc++;
16560b57cec5SDimitry Andric }
1657349cc55cSDimitry Andric KMP_CPU_FREE(offline_cpus);
16580b57cec5SDimitry Andric }
1659349cc55cSDimitry Andric
1660349cc55cSDimitry Andric return avail_proc;
16610b57cec5SDimitry Andric }
16620b57cec5SDimitry Andric
1663fe6060f1SDimitry Andric // All of the __kmp_affinity_create_*_map() routines should allocate the
1664fe6060f1SDimitry Andric // internal topology object and set the layer ids for it. Each routine
1665fe6060f1SDimitry Andric // returns a boolean on whether it was successful at doing so.
16660b57cec5SDimitry Andric kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
1667fcaf7f86SDimitry Andric // Original mask is a subset of full mask in multiple processor groups topology
1668fcaf7f86SDimitry Andric kmp_affin_mask_t *__kmp_affin_origMask = NULL;
16690b57cec5SDimitry Andric
16700b57cec5SDimitry Andric #if KMP_USE_HWLOC
__kmp_hwloc_is_cache_type(hwloc_obj_t obj)1671fe6060f1SDimitry Andric static inline bool __kmp_hwloc_is_cache_type(hwloc_obj_t obj) {
1672fe6060f1SDimitry Andric #if HWLOC_API_VERSION >= 0x00020000
1673fe6060f1SDimitry Andric return hwloc_obj_type_is_cache(obj->type);
1674fe6060f1SDimitry Andric #else
1675fe6060f1SDimitry Andric return obj->type == HWLOC_OBJ_CACHE;
1676fe6060f1SDimitry Andric #endif
16770b57cec5SDimitry Andric }
16780b57cec5SDimitry Andric
1679fe6060f1SDimitry Andric // Returns KMP_HW_* type derived from HWLOC_* type
__kmp_hwloc_type_2_topology_type(hwloc_obj_t obj)1680fe6060f1SDimitry Andric static inline kmp_hw_t __kmp_hwloc_type_2_topology_type(hwloc_obj_t obj) {
16810b57cec5SDimitry Andric
1682fe6060f1SDimitry Andric if (__kmp_hwloc_is_cache_type(obj)) {
1683fe6060f1SDimitry Andric if (obj->attr->cache.type == HWLOC_OBJ_CACHE_INSTRUCTION)
1684fe6060f1SDimitry Andric return KMP_HW_UNKNOWN;
1685fe6060f1SDimitry Andric switch (obj->attr->cache.depth) {
1686fe6060f1SDimitry Andric case 1:
1687fe6060f1SDimitry Andric return KMP_HW_L1;
1688fe6060f1SDimitry Andric case 2:
1689fe6060f1SDimitry Andric #if KMP_MIC_SUPPORTED
1690fe6060f1SDimitry Andric if (__kmp_mic_type == mic3) {
1691fe6060f1SDimitry Andric return KMP_HW_TILE;
16920b57cec5SDimitry Andric }
1693fe6060f1SDimitry Andric #endif
1694fe6060f1SDimitry Andric return KMP_HW_L2;
1695fe6060f1SDimitry Andric case 3:
1696fe6060f1SDimitry Andric return KMP_HW_L3;
16970b57cec5SDimitry Andric }
1698fe6060f1SDimitry Andric return KMP_HW_UNKNOWN;
16990b57cec5SDimitry Andric }
1700fe6060f1SDimitry Andric
1701fe6060f1SDimitry Andric switch (obj->type) {
1702fe6060f1SDimitry Andric case HWLOC_OBJ_PACKAGE:
1703fe6060f1SDimitry Andric return KMP_HW_SOCKET;
1704fe6060f1SDimitry Andric case HWLOC_OBJ_NUMANODE:
1705fe6060f1SDimitry Andric return KMP_HW_NUMA;
1706fe6060f1SDimitry Andric case HWLOC_OBJ_CORE:
1707fe6060f1SDimitry Andric return KMP_HW_CORE;
1708fe6060f1SDimitry Andric case HWLOC_OBJ_PU:
1709fe6060f1SDimitry Andric return KMP_HW_THREAD;
1710fe6060f1SDimitry Andric case HWLOC_OBJ_GROUP:
1711bdd1243dSDimitry Andric #if HWLOC_API_VERSION >= 0x00020000
1712fe6060f1SDimitry Andric if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_DIE)
1713fe6060f1SDimitry Andric return KMP_HW_DIE;
1714fe6060f1SDimitry Andric else if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_TILE)
1715fe6060f1SDimitry Andric return KMP_HW_TILE;
1716fe6060f1SDimitry Andric else if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_MODULE)
1717fe6060f1SDimitry Andric return KMP_HW_MODULE;
1718fe6060f1SDimitry Andric else if (obj->attr->group.kind == HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP)
1719fe6060f1SDimitry Andric return KMP_HW_PROC_GROUP;
1720bdd1243dSDimitry Andric #endif
1721fe6060f1SDimitry Andric return KMP_HW_UNKNOWN;
1722fe6060f1SDimitry Andric #if HWLOC_API_VERSION >= 0x00020100
1723fe6060f1SDimitry Andric case HWLOC_OBJ_DIE:
1724fe6060f1SDimitry Andric return KMP_HW_DIE;
1725fe6060f1SDimitry Andric #endif
17260b57cec5SDimitry Andric }
1727fe6060f1SDimitry Andric return KMP_HW_UNKNOWN;
17280b57cec5SDimitry Andric }
17290b57cec5SDimitry Andric
17300b57cec5SDimitry Andric // Returns the number of objects of type 'type' below 'obj' within the topology
17310b57cec5SDimitry Andric // tree structure. e.g., if obj is a HWLOC_OBJ_PACKAGE object, and type is
17320b57cec5SDimitry Andric // HWLOC_OBJ_PU, then this will return the number of PU's under the SOCKET
17330b57cec5SDimitry Andric // object.
__kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,hwloc_obj_type_t type)17340b57cec5SDimitry Andric static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
17350b57cec5SDimitry Andric hwloc_obj_type_t type) {
17360b57cec5SDimitry Andric int retval = 0;
17370b57cec5SDimitry Andric hwloc_obj_t first;
17380b57cec5SDimitry Andric for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
17390b57cec5SDimitry Andric obj->logical_index, type, 0);
1740fe6060f1SDimitry Andric first != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology,
1741fe6060f1SDimitry Andric obj->type, first) == obj;
17420b57cec5SDimitry Andric first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
17430b57cec5SDimitry Andric first)) {
17440b57cec5SDimitry Andric ++retval;
17450b57cec5SDimitry Andric }
17460b57cec5SDimitry Andric return retval;
17470b57cec5SDimitry Andric }
17480b57cec5SDimitry Andric
1749fe6060f1SDimitry Andric // This gets the sub_id for a lower object under a higher object in the
1750fe6060f1SDimitry Andric // topology tree
__kmp_hwloc_get_sub_id(hwloc_topology_t t,hwloc_obj_t higher,hwloc_obj_t lower)1751fe6060f1SDimitry Andric static int __kmp_hwloc_get_sub_id(hwloc_topology_t t, hwloc_obj_t higher,
1752fe6060f1SDimitry Andric hwloc_obj_t lower) {
1753fe6060f1SDimitry Andric hwloc_obj_t obj;
1754fe6060f1SDimitry Andric hwloc_obj_type_t ltype = lower->type;
1755fe6060f1SDimitry Andric int lindex = lower->logical_index - 1;
1756fe6060f1SDimitry Andric int sub_id = 0;
1757fe6060f1SDimitry Andric // Get the previous lower object
1758fe6060f1SDimitry Andric obj = hwloc_get_obj_by_type(t, ltype, lindex);
1759fe6060f1SDimitry Andric while (obj && lindex >= 0 &&
1760fe6060f1SDimitry Andric hwloc_bitmap_isincluded(obj->cpuset, higher->cpuset)) {
1761fe6060f1SDimitry Andric if (obj->userdata) {
1762fe6060f1SDimitry Andric sub_id = (int)(RCAST(kmp_intptr_t, obj->userdata));
1763fe6060f1SDimitry Andric break;
17640b57cec5SDimitry Andric }
1765fe6060f1SDimitry Andric sub_id++;
1766fe6060f1SDimitry Andric lindex--;
1767fe6060f1SDimitry Andric obj = hwloc_get_obj_by_type(t, ltype, lindex);
1768fe6060f1SDimitry Andric }
1769fe6060f1SDimitry Andric // store sub_id + 1 so that 0 is differed from NULL
1770fe6060f1SDimitry Andric lower->userdata = RCAST(void *, sub_id + 1);
1771fe6060f1SDimitry Andric return sub_id;
17720b57cec5SDimitry Andric }
17730b57cec5SDimitry Andric
__kmp_affinity_create_hwloc_map(kmp_i18n_id_t * const msg_id)1774fe6060f1SDimitry Andric static bool __kmp_affinity_create_hwloc_map(kmp_i18n_id_t *const msg_id) {
1775fe6060f1SDimitry Andric kmp_hw_t type;
1776fe6060f1SDimitry Andric int hw_thread_index, sub_id;
1777fe6060f1SDimitry Andric int depth;
1778fe6060f1SDimitry Andric hwloc_obj_t pu, obj, root, prev;
1779fe6060f1SDimitry Andric kmp_hw_t types[KMP_HW_LAST];
1780fe6060f1SDimitry Andric hwloc_obj_type_t hwloc_types[KMP_HW_LAST];
17810b57cec5SDimitry Andric
1782fe6060f1SDimitry Andric hwloc_topology_t tp = __kmp_hwloc_topology;
17830b57cec5SDimitry Andric *msg_id = kmp_i18n_null;
1784bdd1243dSDimitry Andric if (__kmp_affinity.flags.verbose) {
1785fe6060f1SDimitry Andric KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
1786fe6060f1SDimitry Andric }
17870b57cec5SDimitry Andric
17880b57cec5SDimitry Andric if (!KMP_AFFINITY_CAPABLE()) {
17890b57cec5SDimitry Andric // Hack to try and infer the machine topology using only the data
1790fe6060f1SDimitry Andric // available from hwloc on the current thread, and __kmp_xproc.
1791bdd1243dSDimitry Andric KMP_ASSERT(__kmp_affinity.type == affinity_none);
1792e8d8bef9SDimitry Andric // hwloc only guarantees existance of PU object, so check PACKAGE and CORE
1793e8d8bef9SDimitry Andric hwloc_obj_t o = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0);
1794e8d8bef9SDimitry Andric if (o != NULL)
1795e8d8bef9SDimitry Andric nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(o, HWLOC_OBJ_CORE);
1796e8d8bef9SDimitry Andric else
1797e8d8bef9SDimitry Andric nCoresPerPkg = 1; // no PACKAGE found
1798e8d8bef9SDimitry Andric o = hwloc_get_obj_by_type(tp, HWLOC_OBJ_CORE, 0);
1799e8d8bef9SDimitry Andric if (o != NULL)
1800e8d8bef9SDimitry Andric __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(o, HWLOC_OBJ_PU);
1801e8d8bef9SDimitry Andric else
1802e8d8bef9SDimitry Andric __kmp_nThreadsPerCore = 1; // no CORE found
1803*0fca6ea1SDimitry Andric if (__kmp_nThreadsPerCore == 0)
1804*0fca6ea1SDimitry Andric __kmp_nThreadsPerCore = 1;
18050b57cec5SDimitry Andric __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1806e8d8bef9SDimitry Andric if (nCoresPerPkg == 0)
1807e8d8bef9SDimitry Andric nCoresPerPkg = 1; // to prevent possible division by 0
18080b57cec5SDimitry Andric nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1809fe6060f1SDimitry Andric return true;
18100b57cec5SDimitry Andric }
18110b57cec5SDimitry Andric
1812bdd1243dSDimitry Andric #if HWLOC_API_VERSION >= 0x00020400
1813349cc55cSDimitry Andric // Handle multiple types of cores if they exist on the system
1814349cc55cSDimitry Andric int nr_cpu_kinds = hwloc_cpukinds_get_nr(tp, 0);
1815349cc55cSDimitry Andric
1816349cc55cSDimitry Andric typedef struct kmp_hwloc_cpukinds_info_t {
1817349cc55cSDimitry Andric int efficiency;
1818349cc55cSDimitry Andric kmp_hw_core_type_t core_type;
1819349cc55cSDimitry Andric hwloc_bitmap_t mask;
1820349cc55cSDimitry Andric } kmp_hwloc_cpukinds_info_t;
1821349cc55cSDimitry Andric kmp_hwloc_cpukinds_info_t *cpukinds = nullptr;
1822349cc55cSDimitry Andric
1823349cc55cSDimitry Andric if (nr_cpu_kinds > 0) {
1824349cc55cSDimitry Andric unsigned nr_infos;
1825349cc55cSDimitry Andric struct hwloc_info_s *infos;
1826349cc55cSDimitry Andric cpukinds = (kmp_hwloc_cpukinds_info_t *)__kmp_allocate(
1827349cc55cSDimitry Andric sizeof(kmp_hwloc_cpukinds_info_t) * nr_cpu_kinds);
1828349cc55cSDimitry Andric for (unsigned idx = 0; idx < (unsigned)nr_cpu_kinds; ++idx) {
1829349cc55cSDimitry Andric cpukinds[idx].efficiency = -1;
1830349cc55cSDimitry Andric cpukinds[idx].core_type = KMP_HW_CORE_TYPE_UNKNOWN;
1831349cc55cSDimitry Andric cpukinds[idx].mask = hwloc_bitmap_alloc();
1832349cc55cSDimitry Andric if (hwloc_cpukinds_get_info(tp, idx, cpukinds[idx].mask,
1833349cc55cSDimitry Andric &cpukinds[idx].efficiency, &nr_infos, &infos,
1834349cc55cSDimitry Andric 0) == 0) {
1835349cc55cSDimitry Andric for (unsigned i = 0; i < nr_infos; ++i) {
1836349cc55cSDimitry Andric if (__kmp_str_match("CoreType", 8, infos[i].name)) {
1837349cc55cSDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1838349cc55cSDimitry Andric if (__kmp_str_match("IntelAtom", 9, infos[i].value)) {
1839349cc55cSDimitry Andric cpukinds[idx].core_type = KMP_HW_CORE_TYPE_ATOM;
1840349cc55cSDimitry Andric break;
1841349cc55cSDimitry Andric } else if (__kmp_str_match("IntelCore", 9, infos[i].value)) {
1842349cc55cSDimitry Andric cpukinds[idx].core_type = KMP_HW_CORE_TYPE_CORE;
1843349cc55cSDimitry Andric break;
1844349cc55cSDimitry Andric }
1845349cc55cSDimitry Andric #endif
1846349cc55cSDimitry Andric }
1847349cc55cSDimitry Andric }
1848349cc55cSDimitry Andric }
1849349cc55cSDimitry Andric }
1850349cc55cSDimitry Andric }
1851bdd1243dSDimitry Andric #endif
1852349cc55cSDimitry Andric
1853fe6060f1SDimitry Andric root = hwloc_get_root_obj(tp);
1854fe6060f1SDimitry Andric
1855fe6060f1SDimitry Andric // Figure out the depth and types in the topology
1856fe6060f1SDimitry Andric depth = 0;
1857*0fca6ea1SDimitry Andric obj = hwloc_get_pu_obj_by_os_index(tp, __kmp_affin_fullMask->begin());
1858*0fca6ea1SDimitry Andric while (obj && obj != root) {
1859fe6060f1SDimitry Andric #if HWLOC_API_VERSION >= 0x00020000
1860fe6060f1SDimitry Andric if (obj->memory_arity) {
1861fe6060f1SDimitry Andric hwloc_obj_t memory;
1862fe6060f1SDimitry Andric for (memory = obj->memory_first_child; memory;
1863fe6060f1SDimitry Andric memory = hwloc_get_next_child(tp, obj, memory)) {
1864fe6060f1SDimitry Andric if (memory->type == HWLOC_OBJ_NUMANODE)
1865fe6060f1SDimitry Andric break;
1866fe6060f1SDimitry Andric }
1867fe6060f1SDimitry Andric if (memory && memory->type == HWLOC_OBJ_NUMANODE) {
1868fe6060f1SDimitry Andric types[depth] = KMP_HW_NUMA;
1869fe6060f1SDimitry Andric hwloc_types[depth] = memory->type;
1870fe6060f1SDimitry Andric depth++;
1871fe6060f1SDimitry Andric }
1872fe6060f1SDimitry Andric }
1873fe6060f1SDimitry Andric #endif
1874fe6060f1SDimitry Andric type = __kmp_hwloc_type_2_topology_type(obj);
1875fe6060f1SDimitry Andric if (type != KMP_HW_UNKNOWN) {
1876fe6060f1SDimitry Andric types[depth] = type;
1877fe6060f1SDimitry Andric hwloc_types[depth] = obj->type;
1878fe6060f1SDimitry Andric depth++;
1879fe6060f1SDimitry Andric }
1880*0fca6ea1SDimitry Andric obj = obj->parent;
1881fe6060f1SDimitry Andric }
1882fe6060f1SDimitry Andric KMP_ASSERT(depth > 0);
1883fe6060f1SDimitry Andric
1884fe6060f1SDimitry Andric // Get the order for the types correct
1885fe6060f1SDimitry Andric for (int i = 0, j = depth - 1; i < j; ++i, --j) {
1886fe6060f1SDimitry Andric hwloc_obj_type_t hwloc_temp = hwloc_types[i];
1887fe6060f1SDimitry Andric kmp_hw_t temp = types[i];
1888fe6060f1SDimitry Andric types[i] = types[j];
1889fe6060f1SDimitry Andric types[j] = temp;
1890fe6060f1SDimitry Andric hwloc_types[i] = hwloc_types[j];
1891fe6060f1SDimitry Andric hwloc_types[j] = hwloc_temp;
1892fe6060f1SDimitry Andric }
18930b57cec5SDimitry Andric
18940b57cec5SDimitry Andric // Allocate the data structure to be returned.
1895fe6060f1SDimitry Andric __kmp_topology = kmp_topology_t::allocate(__kmp_avail_proc, depth, types);
18960b57cec5SDimitry Andric
1897fe6060f1SDimitry Andric hw_thread_index = 0;
1898fe6060f1SDimitry Andric pu = NULL;
189981ad6265SDimitry Andric while ((pu = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, pu))) {
1900fe6060f1SDimitry Andric int index = depth - 1;
1901fe6060f1SDimitry Andric bool included = KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask);
1902fe6060f1SDimitry Andric kmp_hw_thread_t &hw_thread = __kmp_topology->at(hw_thread_index);
1903fe6060f1SDimitry Andric if (included) {
1904fe6060f1SDimitry Andric hw_thread.clear();
1905fe6060f1SDimitry Andric hw_thread.ids[index] = pu->logical_index;
1906fe6060f1SDimitry Andric hw_thread.os_id = pu->os_index;
1907349cc55cSDimitry Andric // If multiple core types, then set that attribute for the hardware thread
1908bdd1243dSDimitry Andric #if HWLOC_API_VERSION >= 0x00020400
1909349cc55cSDimitry Andric if (cpukinds) {
1910349cc55cSDimitry Andric int cpukind_index = -1;
1911349cc55cSDimitry Andric for (int i = 0; i < nr_cpu_kinds; ++i) {
1912349cc55cSDimitry Andric if (hwloc_bitmap_isset(cpukinds[i].mask, hw_thread.os_id)) {
1913349cc55cSDimitry Andric cpukind_index = i;
1914349cc55cSDimitry Andric break;
1915349cc55cSDimitry Andric }
1916349cc55cSDimitry Andric }
19170eae32dcSDimitry Andric if (cpukind_index >= 0) {
19180eae32dcSDimitry Andric hw_thread.attrs.set_core_type(cpukinds[cpukind_index].core_type);
19190eae32dcSDimitry Andric hw_thread.attrs.set_core_eff(cpukinds[cpukind_index].efficiency);
19200eae32dcSDimitry Andric }
1921349cc55cSDimitry Andric }
1922bdd1243dSDimitry Andric #endif
1923fe6060f1SDimitry Andric index--;
19240b57cec5SDimitry Andric }
1925fe6060f1SDimitry Andric obj = pu;
1926fe6060f1SDimitry Andric prev = obj;
1927fe6060f1SDimitry Andric while (obj != root && obj != NULL) {
1928fe6060f1SDimitry Andric obj = obj->parent;
1929fe6060f1SDimitry Andric #if HWLOC_API_VERSION >= 0x00020000
1930fe6060f1SDimitry Andric // NUMA Nodes are handled differently since they are not within the
1931fe6060f1SDimitry Andric // parent/child structure anymore. They are separate children
1932fe6060f1SDimitry Andric // of obj (memory_first_child points to first memory child)
1933fe6060f1SDimitry Andric if (obj->memory_arity) {
1934fe6060f1SDimitry Andric hwloc_obj_t memory;
1935fe6060f1SDimitry Andric for (memory = obj->memory_first_child; memory;
1936fe6060f1SDimitry Andric memory = hwloc_get_next_child(tp, obj, memory)) {
1937fe6060f1SDimitry Andric if (memory->type == HWLOC_OBJ_NUMANODE)
19380b57cec5SDimitry Andric break;
1939fe6060f1SDimitry Andric }
1940fe6060f1SDimitry Andric if (memory && memory->type == HWLOC_OBJ_NUMANODE) {
1941fe6060f1SDimitry Andric sub_id = __kmp_hwloc_get_sub_id(tp, memory, prev);
1942fe6060f1SDimitry Andric if (included) {
1943fe6060f1SDimitry Andric hw_thread.ids[index] = memory->logical_index;
1944fe6060f1SDimitry Andric hw_thread.ids[index + 1] = sub_id;
1945fe6060f1SDimitry Andric index--;
1946fe6060f1SDimitry Andric }
1947fe6060f1SDimitry Andric prev = memory;
1948fe6060f1SDimitry Andric }
1949fe6060f1SDimitry Andric prev = obj;
1950fe6060f1SDimitry Andric }
1951fe6060f1SDimitry Andric #endif
1952fe6060f1SDimitry Andric type = __kmp_hwloc_type_2_topology_type(obj);
1953fe6060f1SDimitry Andric if (type != KMP_HW_UNKNOWN) {
1954fe6060f1SDimitry Andric sub_id = __kmp_hwloc_get_sub_id(tp, obj, prev);
1955fe6060f1SDimitry Andric if (included) {
1956fe6060f1SDimitry Andric hw_thread.ids[index] = obj->logical_index;
1957fe6060f1SDimitry Andric hw_thread.ids[index + 1] = sub_id;
1958fe6060f1SDimitry Andric index--;
1959fe6060f1SDimitry Andric }
1960fe6060f1SDimitry Andric prev = obj;
19610b57cec5SDimitry Andric }
19620b57cec5SDimitry Andric }
1963fe6060f1SDimitry Andric if (included)
1964fe6060f1SDimitry Andric hw_thread_index++;
19650b57cec5SDimitry Andric }
1966349cc55cSDimitry Andric
1967bdd1243dSDimitry Andric #if HWLOC_API_VERSION >= 0x00020400
1968349cc55cSDimitry Andric // Free the core types information
1969349cc55cSDimitry Andric if (cpukinds) {
1970349cc55cSDimitry Andric for (int idx = 0; idx < nr_cpu_kinds; ++idx)
1971349cc55cSDimitry Andric hwloc_bitmap_free(cpukinds[idx].mask);
1972349cc55cSDimitry Andric __kmp_free(cpukinds);
1973349cc55cSDimitry Andric }
1974bdd1243dSDimitry Andric #endif
1975fe6060f1SDimitry Andric __kmp_topology->sort_ids();
1976fe6060f1SDimitry Andric return true;
19770b57cec5SDimitry Andric }
19780b57cec5SDimitry Andric #endif // KMP_USE_HWLOC
19790b57cec5SDimitry Andric
19800b57cec5SDimitry Andric // If we don't know how to retrieve the machine's processor topology, or
19810b57cec5SDimitry Andric // encounter an error in doing so, this routine is called to form a "flat"
19820b57cec5SDimitry Andric // mapping of os thread id's <-> processor id's.
__kmp_affinity_create_flat_map(kmp_i18n_id_t * const msg_id)1983fe6060f1SDimitry Andric static bool __kmp_affinity_create_flat_map(kmp_i18n_id_t *const msg_id) {
19840b57cec5SDimitry Andric *msg_id = kmp_i18n_null;
1985fe6060f1SDimitry Andric int depth = 3;
1986fe6060f1SDimitry Andric kmp_hw_t types[] = {KMP_HW_SOCKET, KMP_HW_CORE, KMP_HW_THREAD};
1987fe6060f1SDimitry Andric
1988bdd1243dSDimitry Andric if (__kmp_affinity.flags.verbose) {
1989fe6060f1SDimitry Andric KMP_INFORM(UsingFlatOS, "KMP_AFFINITY");
1990fe6060f1SDimitry Andric }
19910b57cec5SDimitry Andric
1992bdd1243dSDimitry Andric // Even if __kmp_affinity.type == affinity_none, this routine might still
1993bdd1243dSDimitry Andric // be called to set __kmp_ncores, as well as
19940b57cec5SDimitry Andric // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
19950b57cec5SDimitry Andric if (!KMP_AFFINITY_CAPABLE()) {
1996bdd1243dSDimitry Andric KMP_ASSERT(__kmp_affinity.type == affinity_none);
19970b57cec5SDimitry Andric __kmp_ncores = nPackages = __kmp_xproc;
19980b57cec5SDimitry Andric __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1999fe6060f1SDimitry Andric return true;
20000b57cec5SDimitry Andric }
20010b57cec5SDimitry Andric
20020b57cec5SDimitry Andric // When affinity is off, this routine will still be called to set
20030b57cec5SDimitry Andric // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
20040b57cec5SDimitry Andric // Make sure all these vars are set correctly, and return now if affinity is
20050b57cec5SDimitry Andric // not enabled.
20060b57cec5SDimitry Andric __kmp_ncores = nPackages = __kmp_avail_proc;
20070b57cec5SDimitry Andric __kmp_nThreadsPerCore = nCoresPerPkg = 1;
20080b57cec5SDimitry Andric
20095ffd83dbSDimitry Andric // Construct the data structure to be returned.
2010fe6060f1SDimitry Andric __kmp_topology = kmp_topology_t::allocate(__kmp_avail_proc, depth, types);
20110b57cec5SDimitry Andric int avail_ct = 0;
20120b57cec5SDimitry Andric int i;
20130b57cec5SDimitry Andric KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
20140b57cec5SDimitry Andric // Skip this proc if it is not included in the machine model.
20150b57cec5SDimitry Andric if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
20160b57cec5SDimitry Andric continue;
20170b57cec5SDimitry Andric }
2018fe6060f1SDimitry Andric kmp_hw_thread_t &hw_thread = __kmp_topology->at(avail_ct);
2019fe6060f1SDimitry Andric hw_thread.clear();
2020fe6060f1SDimitry Andric hw_thread.os_id = i;
2021fe6060f1SDimitry Andric hw_thread.ids[0] = i;
2022fe6060f1SDimitry Andric hw_thread.ids[1] = 0;
2023fe6060f1SDimitry Andric hw_thread.ids[2] = 0;
2024fe6060f1SDimitry Andric avail_ct++;
20250b57cec5SDimitry Andric }
2026bdd1243dSDimitry Andric if (__kmp_affinity.flags.verbose) {
20270b57cec5SDimitry Andric KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
20280b57cec5SDimitry Andric }
2029fe6060f1SDimitry Andric return true;
20300b57cec5SDimitry Andric }
20310b57cec5SDimitry Andric
20320b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
20330b57cec5SDimitry Andric // If multiple Windows* OS processor groups exist, we can create a 2-level
20340b57cec5SDimitry Andric // topology map with the groups at level 0 and the individual procs at level 1.
20350b57cec5SDimitry Andric // This facilitates letting the threads float among all procs in a group,
20360b57cec5SDimitry Andric // if granularity=group (the default when there are multiple groups).
__kmp_affinity_create_proc_group_map(kmp_i18n_id_t * const msg_id)2037fe6060f1SDimitry Andric static bool __kmp_affinity_create_proc_group_map(kmp_i18n_id_t *const msg_id) {
20380b57cec5SDimitry Andric *msg_id = kmp_i18n_null;
2039fe6060f1SDimitry Andric int depth = 3;
2040fe6060f1SDimitry Andric kmp_hw_t types[] = {KMP_HW_PROC_GROUP, KMP_HW_CORE, KMP_HW_THREAD};
2041fe6060f1SDimitry Andric const static size_t BITS_PER_GROUP = CHAR_BIT * sizeof(DWORD_PTR);
20420b57cec5SDimitry Andric
2043bdd1243dSDimitry Andric if (__kmp_affinity.flags.verbose) {
2044fe6060f1SDimitry Andric KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
2045fe6060f1SDimitry Andric }
2046fe6060f1SDimitry Andric
2047fe6060f1SDimitry Andric // If we aren't affinity capable, then use flat topology
20480b57cec5SDimitry Andric if (!KMP_AFFINITY_CAPABLE()) {
2049bdd1243dSDimitry Andric KMP_ASSERT(__kmp_affinity.type == affinity_none);
2050fe6060f1SDimitry Andric nPackages = __kmp_num_proc_groups;
2051fe6060f1SDimitry Andric __kmp_nThreadsPerCore = 1;
2052fe6060f1SDimitry Andric __kmp_ncores = __kmp_xproc;
2053fe6060f1SDimitry Andric nCoresPerPkg = nPackages / __kmp_ncores;
2054fe6060f1SDimitry Andric return true;
20550b57cec5SDimitry Andric }
20560b57cec5SDimitry Andric
20575ffd83dbSDimitry Andric // Construct the data structure to be returned.
2058fe6060f1SDimitry Andric __kmp_topology = kmp_topology_t::allocate(__kmp_avail_proc, depth, types);
20590b57cec5SDimitry Andric int avail_ct = 0;
20600b57cec5SDimitry Andric int i;
20610b57cec5SDimitry Andric KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
20620b57cec5SDimitry Andric // Skip this proc if it is not included in the machine model.
20630b57cec5SDimitry Andric if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
20640b57cec5SDimitry Andric continue;
20650b57cec5SDimitry Andric }
2066fe6060f1SDimitry Andric kmp_hw_thread_t &hw_thread = __kmp_topology->at(avail_ct++);
2067fe6060f1SDimitry Andric hw_thread.clear();
2068fe6060f1SDimitry Andric hw_thread.os_id = i;
2069fe6060f1SDimitry Andric hw_thread.ids[0] = i / BITS_PER_GROUP;
2070fe6060f1SDimitry Andric hw_thread.ids[1] = hw_thread.ids[2] = i % BITS_PER_GROUP;
20710b57cec5SDimitry Andric }
2072fe6060f1SDimitry Andric return true;
20730b57cec5SDimitry Andric }
20740b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */
20750b57cec5SDimitry Andric
20760b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
20770b57cec5SDimitry Andric
2078fe6060f1SDimitry Andric template <kmp_uint32 LSB, kmp_uint32 MSB>
__kmp_extract_bits(kmp_uint32 v)2079fe6060f1SDimitry Andric static inline unsigned __kmp_extract_bits(kmp_uint32 v) {
2080fe6060f1SDimitry Andric const kmp_uint32 SHIFT_LEFT = sizeof(kmp_uint32) * 8 - 1 - MSB;
2081fe6060f1SDimitry Andric const kmp_uint32 SHIFT_RIGHT = LSB;
2082fe6060f1SDimitry Andric kmp_uint32 retval = v;
2083fe6060f1SDimitry Andric retval <<= SHIFT_LEFT;
2084fe6060f1SDimitry Andric retval >>= (SHIFT_LEFT + SHIFT_RIGHT);
2085fe6060f1SDimitry Andric return retval;
2086fe6060f1SDimitry Andric }
2087fe6060f1SDimitry Andric
__kmp_cpuid_mask_width(int count)20880b57cec5SDimitry Andric static int __kmp_cpuid_mask_width(int count) {
20890b57cec5SDimitry Andric int r = 0;
20900b57cec5SDimitry Andric
20910b57cec5SDimitry Andric while ((1 << r) < count)
20920b57cec5SDimitry Andric ++r;
20930b57cec5SDimitry Andric return r;
20940b57cec5SDimitry Andric }
20950b57cec5SDimitry Andric
20960b57cec5SDimitry Andric class apicThreadInfo {
20970b57cec5SDimitry Andric public:
20980b57cec5SDimitry Andric unsigned osId; // param to __kmp_affinity_bind_thread
20990b57cec5SDimitry Andric unsigned apicId; // from cpuid after binding
21000b57cec5SDimitry Andric unsigned maxCoresPerPkg; // ""
21010b57cec5SDimitry Andric unsigned maxThreadsPerPkg; // ""
21020b57cec5SDimitry Andric unsigned pkgId; // inferred from above values
21030b57cec5SDimitry Andric unsigned coreId; // ""
21040b57cec5SDimitry Andric unsigned threadId; // ""
21050b57cec5SDimitry Andric };
21060b57cec5SDimitry Andric
__kmp_affinity_cmp_apicThreadInfo_phys_id(const void * a,const void * b)21070b57cec5SDimitry Andric static int __kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a,
21080b57cec5SDimitry Andric const void *b) {
21090b57cec5SDimitry Andric const apicThreadInfo *aa = (const apicThreadInfo *)a;
21100b57cec5SDimitry Andric const apicThreadInfo *bb = (const apicThreadInfo *)b;
21110b57cec5SDimitry Andric if (aa->pkgId < bb->pkgId)
21120b57cec5SDimitry Andric return -1;
21130b57cec5SDimitry Andric if (aa->pkgId > bb->pkgId)
21140b57cec5SDimitry Andric return 1;
21150b57cec5SDimitry Andric if (aa->coreId < bb->coreId)
21160b57cec5SDimitry Andric return -1;
21170b57cec5SDimitry Andric if (aa->coreId > bb->coreId)
21180b57cec5SDimitry Andric return 1;
21190b57cec5SDimitry Andric if (aa->threadId < bb->threadId)
21200b57cec5SDimitry Andric return -1;
21210b57cec5SDimitry Andric if (aa->threadId > bb->threadId)
21220b57cec5SDimitry Andric return 1;
21230b57cec5SDimitry Andric return 0;
21240b57cec5SDimitry Andric }
21250b57cec5SDimitry Andric
2126fe6060f1SDimitry Andric class kmp_cache_info_t {
2127fe6060f1SDimitry Andric public:
2128fe6060f1SDimitry Andric struct info_t {
2129fe6060f1SDimitry Andric unsigned level, mask;
2130fe6060f1SDimitry Andric };
kmp_cache_info_t()2131fe6060f1SDimitry Andric kmp_cache_info_t() : depth(0) { get_leaf4_levels(); }
get_depth() const2132fe6060f1SDimitry Andric size_t get_depth() const { return depth; }
operator [](size_t index)2133fe6060f1SDimitry Andric info_t &operator[](size_t index) { return table[index]; }
operator [](size_t index) const2134fe6060f1SDimitry Andric const info_t &operator[](size_t index) const { return table[index]; }
2135fe6060f1SDimitry Andric
get_topology_type(unsigned level)2136fe6060f1SDimitry Andric static kmp_hw_t get_topology_type(unsigned level) {
2137fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(level >= 1 && level <= MAX_CACHE_LEVEL);
2138fe6060f1SDimitry Andric switch (level) {
2139fe6060f1SDimitry Andric case 1:
2140fe6060f1SDimitry Andric return KMP_HW_L1;
2141fe6060f1SDimitry Andric case 2:
2142fe6060f1SDimitry Andric return KMP_HW_L2;
2143fe6060f1SDimitry Andric case 3:
2144fe6060f1SDimitry Andric return KMP_HW_L3;
2145fe6060f1SDimitry Andric }
2146fe6060f1SDimitry Andric return KMP_HW_UNKNOWN;
2147fe6060f1SDimitry Andric }
2148fe6060f1SDimitry Andric
2149fe6060f1SDimitry Andric private:
2150fe6060f1SDimitry Andric static const int MAX_CACHE_LEVEL = 3;
2151fe6060f1SDimitry Andric
2152fe6060f1SDimitry Andric size_t depth;
2153fe6060f1SDimitry Andric info_t table[MAX_CACHE_LEVEL];
2154fe6060f1SDimitry Andric
get_leaf4_levels()2155fe6060f1SDimitry Andric void get_leaf4_levels() {
2156fe6060f1SDimitry Andric unsigned level = 0;
2157fe6060f1SDimitry Andric while (depth < MAX_CACHE_LEVEL) {
2158fe6060f1SDimitry Andric unsigned cache_type, max_threads_sharing;
2159fe6060f1SDimitry Andric unsigned cache_level, cache_mask_width;
2160fe6060f1SDimitry Andric kmp_cpuid buf2;
2161fe6060f1SDimitry Andric __kmp_x86_cpuid(4, level, &buf2);
2162fe6060f1SDimitry Andric cache_type = __kmp_extract_bits<0, 4>(buf2.eax);
2163fe6060f1SDimitry Andric if (!cache_type)
2164fe6060f1SDimitry Andric break;
2165fe6060f1SDimitry Andric // Skip instruction caches
2166fe6060f1SDimitry Andric if (cache_type == 2) {
2167fe6060f1SDimitry Andric level++;
2168fe6060f1SDimitry Andric continue;
2169fe6060f1SDimitry Andric }
2170fe6060f1SDimitry Andric max_threads_sharing = __kmp_extract_bits<14, 25>(buf2.eax) + 1;
2171fe6060f1SDimitry Andric cache_mask_width = __kmp_cpuid_mask_width(max_threads_sharing);
2172fe6060f1SDimitry Andric cache_level = __kmp_extract_bits<5, 7>(buf2.eax);
2173fe6060f1SDimitry Andric table[depth].level = cache_level;
2174fe6060f1SDimitry Andric table[depth].mask = ((-1) << cache_mask_width);
2175fe6060f1SDimitry Andric depth++;
2176fe6060f1SDimitry Andric level++;
2177fe6060f1SDimitry Andric }
2178fe6060f1SDimitry Andric }
2179fe6060f1SDimitry Andric };
2180fe6060f1SDimitry Andric
21810b57cec5SDimitry Andric // On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
21820b57cec5SDimitry Andric // an algorithm which cycles through the available os threads, setting
21830b57cec5SDimitry Andric // the current thread's affinity mask to that thread, and then retrieves
21840b57cec5SDimitry Andric // the Apic Id for each thread context using the cpuid instruction.
__kmp_affinity_create_apicid_map(kmp_i18n_id_t * const msg_id)2185fe6060f1SDimitry Andric static bool __kmp_affinity_create_apicid_map(kmp_i18n_id_t *const msg_id) {
21860b57cec5SDimitry Andric kmp_cpuid buf;
21870b57cec5SDimitry Andric *msg_id = kmp_i18n_null;
21880b57cec5SDimitry Andric
2189bdd1243dSDimitry Andric if (__kmp_affinity.flags.verbose) {
2190fe6060f1SDimitry Andric KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
2191fe6060f1SDimitry Andric }
2192fe6060f1SDimitry Andric
21930b57cec5SDimitry Andric // Check if cpuid leaf 4 is supported.
21940b57cec5SDimitry Andric __kmp_x86_cpuid(0, 0, &buf);
21950b57cec5SDimitry Andric if (buf.eax < 4) {
21960b57cec5SDimitry Andric *msg_id = kmp_i18n_str_NoLeaf4Support;
2197fe6060f1SDimitry Andric return false;
21980b57cec5SDimitry Andric }
21990b57cec5SDimitry Andric
22000b57cec5SDimitry Andric // The algorithm used starts by setting the affinity to each available thread
22010b57cec5SDimitry Andric // and retrieving info from the cpuid instruction, so if we are not capable of
22020b57cec5SDimitry Andric // calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we
22030b57cec5SDimitry Andric // need to do something else - use the defaults that we calculated from
22040b57cec5SDimitry Andric // issuing cpuid without binding to each proc.
22050b57cec5SDimitry Andric if (!KMP_AFFINITY_CAPABLE()) {
22060b57cec5SDimitry Andric // Hack to try and infer the machine topology using only the data
22070b57cec5SDimitry Andric // available from cpuid on the current thread, and __kmp_xproc.
2208bdd1243dSDimitry Andric KMP_ASSERT(__kmp_affinity.type == affinity_none);
22090b57cec5SDimitry Andric
22100b57cec5SDimitry Andric // Get an upper bound on the number of threads per package using cpuid(1).
22110b57cec5SDimitry Andric // On some OS/chps combinations where HT is supported by the chip but is
22120b57cec5SDimitry Andric // disabled, this value will be 2 on a single core chip. Usually, it will be
22130b57cec5SDimitry Andric // 2 if HT is enabled and 1 if HT is disabled.
22140b57cec5SDimitry Andric __kmp_x86_cpuid(1, 0, &buf);
22150b57cec5SDimitry Andric int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
22160b57cec5SDimitry Andric if (maxThreadsPerPkg == 0) {
22170b57cec5SDimitry Andric maxThreadsPerPkg = 1;
22180b57cec5SDimitry Andric }
22190b57cec5SDimitry Andric
22200b57cec5SDimitry Andric // The num cores per pkg comes from cpuid(4). 1 must be added to the encoded
22210b57cec5SDimitry Andric // value.
22220b57cec5SDimitry Andric //
22230b57cec5SDimitry Andric // The author of cpu_count.cpp treated this only an upper bound on the
22240b57cec5SDimitry Andric // number of cores, but I haven't seen any cases where it was greater than
22250b57cec5SDimitry Andric // the actual number of cores, so we will treat it as exact in this block of
22260b57cec5SDimitry Andric // code.
22270b57cec5SDimitry Andric //
22280b57cec5SDimitry Andric // First, we need to check if cpuid(4) is supported on this chip. To see if
22290b57cec5SDimitry Andric // cpuid(n) is supported, issue cpuid(0) and check if eax has the value n or
22300b57cec5SDimitry Andric // greater.
22310b57cec5SDimitry Andric __kmp_x86_cpuid(0, 0, &buf);
22320b57cec5SDimitry Andric if (buf.eax >= 4) {
22330b57cec5SDimitry Andric __kmp_x86_cpuid(4, 0, &buf);
22340b57cec5SDimitry Andric nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
22350b57cec5SDimitry Andric } else {
22360b57cec5SDimitry Andric nCoresPerPkg = 1;
22370b57cec5SDimitry Andric }
22380b57cec5SDimitry Andric
22390b57cec5SDimitry Andric // There is no way to reliably tell if HT is enabled without issuing the
22400b57cec5SDimitry Andric // cpuid instruction from every thread, can correlating the cpuid info, so
22410b57cec5SDimitry Andric // if the machine is not affinity capable, we assume that HT is off. We have
22420b57cec5SDimitry Andric // seen quite a few machines where maxThreadsPerPkg is 2, yet the machine
22430b57cec5SDimitry Andric // does not support HT.
22440b57cec5SDimitry Andric //
22450b57cec5SDimitry Andric // - Older OSes are usually found on machines with older chips, which do not
22460b57cec5SDimitry Andric // support HT.
22470b57cec5SDimitry Andric // - The performance penalty for mistakenly identifying a machine as HT when
2248480093f4SDimitry Andric // it isn't (which results in blocktime being incorrectly set to 0) is
22490b57cec5SDimitry Andric // greater than the penalty when for mistakenly identifying a machine as
22500b57cec5SDimitry Andric // being 1 thread/core when it is really HT enabled (which results in
22510b57cec5SDimitry Andric // blocktime being incorrectly set to a positive value).
22520b57cec5SDimitry Andric __kmp_ncores = __kmp_xproc;
22530b57cec5SDimitry Andric nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
22540b57cec5SDimitry Andric __kmp_nThreadsPerCore = 1;
2255fe6060f1SDimitry Andric return true;
22560b57cec5SDimitry Andric }
22570b57cec5SDimitry Andric
22580b57cec5SDimitry Andric // From here on, we can assume that it is safe to call
22590b57cec5SDimitry Andric // __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
2260bdd1243dSDimitry Andric // __kmp_affinity.type = affinity_none.
22610b57cec5SDimitry Andric
22620b57cec5SDimitry Andric // Save the affinity mask for the current thread.
2263fe6060f1SDimitry Andric kmp_affinity_raii_t previous_affinity;
22640b57cec5SDimitry Andric
22650b57cec5SDimitry Andric // Run through each of the available contexts, binding the current thread
22660b57cec5SDimitry Andric // to it, and obtaining the pertinent information using the cpuid instr.
22670b57cec5SDimitry Andric //
22680b57cec5SDimitry Andric // The relevant information is:
22690b57cec5SDimitry Andric // - Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
22700b57cec5SDimitry Andric // has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
22710b57cec5SDimitry Andric // - Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The value
22720b57cec5SDimitry Andric // of this field determines the width of the core# + thread# fields in the
22730b57cec5SDimitry Andric // Apic Id. It is also an upper bound on the number of threads per
22740b57cec5SDimitry Andric // package, but it has been verified that situations happen were it is not
22750b57cec5SDimitry Andric // exact. In particular, on certain OS/chip combinations where Intel(R)
22760b57cec5SDimitry Andric // Hyper-Threading Technology is supported by the chip but has been
22770b57cec5SDimitry Andric // disabled, the value of this field will be 2 (for a single core chip).
22780b57cec5SDimitry Andric // On other OS/chip combinations supporting Intel(R) Hyper-Threading
22790b57cec5SDimitry Andric // Technology, the value of this field will be 1 when Intel(R)
22800b57cec5SDimitry Andric // Hyper-Threading Technology is disabled and 2 when it is enabled.
22810b57cec5SDimitry Andric // - Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The value
22820b57cec5SDimitry Andric // of this field (+1) determines the width of the core# field in the Apic
22830b57cec5SDimitry Andric // Id. The comments in "cpucount.cpp" say that this value is an upper
22840b57cec5SDimitry Andric // bound, but the IA-32 architecture manual says that it is exactly the
22850b57cec5SDimitry Andric // number of cores per package, and I haven't seen any case where it
22860b57cec5SDimitry Andric // wasn't.
22870b57cec5SDimitry Andric //
22880b57cec5SDimitry Andric // From this information, deduce the package Id, core Id, and thread Id,
22890b57cec5SDimitry Andric // and set the corresponding fields in the apicThreadInfo struct.
22900b57cec5SDimitry Andric unsigned i;
22910b57cec5SDimitry Andric apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
22920b57cec5SDimitry Andric __kmp_avail_proc * sizeof(apicThreadInfo));
22930b57cec5SDimitry Andric unsigned nApics = 0;
22940b57cec5SDimitry Andric KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
22950b57cec5SDimitry Andric // Skip this proc if it is not included in the machine model.
22960b57cec5SDimitry Andric if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
22970b57cec5SDimitry Andric continue;
22980b57cec5SDimitry Andric }
22990b57cec5SDimitry Andric KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
23000b57cec5SDimitry Andric
23010b57cec5SDimitry Andric __kmp_affinity_dispatch->bind_thread(i);
23020b57cec5SDimitry Andric threadInfo[nApics].osId = i;
23030b57cec5SDimitry Andric
23040b57cec5SDimitry Andric // The apic id and max threads per pkg come from cpuid(1).
23050b57cec5SDimitry Andric __kmp_x86_cpuid(1, 0, &buf);
23060b57cec5SDimitry Andric if (((buf.edx >> 9) & 1) == 0) {
23070b57cec5SDimitry Andric __kmp_free(threadInfo);
23080b57cec5SDimitry Andric *msg_id = kmp_i18n_str_ApicNotPresent;
2309fe6060f1SDimitry Andric return false;
23100b57cec5SDimitry Andric }
23110b57cec5SDimitry Andric threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
23120b57cec5SDimitry Andric threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
23130b57cec5SDimitry Andric if (threadInfo[nApics].maxThreadsPerPkg == 0) {
23140b57cec5SDimitry Andric threadInfo[nApics].maxThreadsPerPkg = 1;
23150b57cec5SDimitry Andric }
23160b57cec5SDimitry Andric
23170b57cec5SDimitry Andric // Max cores per pkg comes from cpuid(4). 1 must be added to the encoded
23180b57cec5SDimitry Andric // value.
23190b57cec5SDimitry Andric //
23200b57cec5SDimitry Andric // First, we need to check if cpuid(4) is supported on this chip. To see if
23210b57cec5SDimitry Andric // cpuid(n) is supported, issue cpuid(0) and check if eax has the value n
23220b57cec5SDimitry Andric // or greater.
23230b57cec5SDimitry Andric __kmp_x86_cpuid(0, 0, &buf);
23240b57cec5SDimitry Andric if (buf.eax >= 4) {
23250b57cec5SDimitry Andric __kmp_x86_cpuid(4, 0, &buf);
23260b57cec5SDimitry Andric threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
23270b57cec5SDimitry Andric } else {
23280b57cec5SDimitry Andric threadInfo[nApics].maxCoresPerPkg = 1;
23290b57cec5SDimitry Andric }
23300b57cec5SDimitry Andric
23310b57cec5SDimitry Andric // Infer the pkgId / coreId / threadId using only the info obtained locally.
23320b57cec5SDimitry Andric int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
23330b57cec5SDimitry Andric threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
23340b57cec5SDimitry Andric
23350b57cec5SDimitry Andric int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
23360b57cec5SDimitry Andric int widthT = widthCT - widthC;
23370b57cec5SDimitry Andric if (widthT < 0) {
23380b57cec5SDimitry Andric // I've never seen this one happen, but I suppose it could, if the cpuid
23390b57cec5SDimitry Andric // instruction on a chip was really screwed up. Make sure to restore the
23400b57cec5SDimitry Andric // affinity mask before the tail call.
23410b57cec5SDimitry Andric __kmp_free(threadInfo);
23420b57cec5SDimitry Andric *msg_id = kmp_i18n_str_InvalidCpuidInfo;
2343fe6060f1SDimitry Andric return false;
23440b57cec5SDimitry Andric }
23450b57cec5SDimitry Andric
23460b57cec5SDimitry Andric int maskC = (1 << widthC) - 1;
23470b57cec5SDimitry Andric threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
23480b57cec5SDimitry Andric
23490b57cec5SDimitry Andric int maskT = (1 << widthT) - 1;
23500b57cec5SDimitry Andric threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
23510b57cec5SDimitry Andric
23520b57cec5SDimitry Andric nApics++;
23530b57cec5SDimitry Andric }
23540b57cec5SDimitry Andric
23550b57cec5SDimitry Andric // We've collected all the info we need.
23560b57cec5SDimitry Andric // Restore the old affinity mask for this thread.
2357fe6060f1SDimitry Andric previous_affinity.restore();
23580b57cec5SDimitry Andric
23590b57cec5SDimitry Andric // Sort the threadInfo table by physical Id.
23600b57cec5SDimitry Andric qsort(threadInfo, nApics, sizeof(*threadInfo),
23610b57cec5SDimitry Andric __kmp_affinity_cmp_apicThreadInfo_phys_id);
23620b57cec5SDimitry Andric
23630b57cec5SDimitry Andric // The table is now sorted by pkgId / coreId / threadId, but we really don't
23640b57cec5SDimitry Andric // know the radix of any of the fields. pkgId's may be sparsely assigned among
23650b57cec5SDimitry Andric // the chips on a system. Although coreId's are usually assigned
23660b57cec5SDimitry Andric // [0 .. coresPerPkg-1] and threadId's are usually assigned
23670b57cec5SDimitry Andric // [0..threadsPerCore-1], we don't want to make any such assumptions.
23680b57cec5SDimitry Andric //
23690b57cec5SDimitry Andric // For that matter, we don't know what coresPerPkg and threadsPerCore (or the
23700b57cec5SDimitry Andric // total # packages) are at this point - we want to determine that now. We
23710b57cec5SDimitry Andric // only have an upper bound on the first two figures.
23720b57cec5SDimitry Andric //
23730b57cec5SDimitry Andric // We also perform a consistency check at this point: the values returned by
23740b57cec5SDimitry Andric // the cpuid instruction for any thread bound to a given package had better
23750b57cec5SDimitry Andric // return the same info for maxThreadsPerPkg and maxCoresPerPkg.
23760b57cec5SDimitry Andric nPackages = 1;
23770b57cec5SDimitry Andric nCoresPerPkg = 1;
23780b57cec5SDimitry Andric __kmp_nThreadsPerCore = 1;
23790b57cec5SDimitry Andric unsigned nCores = 1;
23800b57cec5SDimitry Andric
23810b57cec5SDimitry Andric unsigned pkgCt = 1; // to determine radii
23820b57cec5SDimitry Andric unsigned lastPkgId = threadInfo[0].pkgId;
23830b57cec5SDimitry Andric unsigned coreCt = 1;
23840b57cec5SDimitry Andric unsigned lastCoreId = threadInfo[0].coreId;
23850b57cec5SDimitry Andric unsigned threadCt = 1;
23860b57cec5SDimitry Andric unsigned lastThreadId = threadInfo[0].threadId;
23870b57cec5SDimitry Andric
23880b57cec5SDimitry Andric // intra-pkg consist checks
23890b57cec5SDimitry Andric unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
23900b57cec5SDimitry Andric unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
23910b57cec5SDimitry Andric
23920b57cec5SDimitry Andric for (i = 1; i < nApics; i++) {
23930b57cec5SDimitry Andric if (threadInfo[i].pkgId != lastPkgId) {
23940b57cec5SDimitry Andric nCores++;
23950b57cec5SDimitry Andric pkgCt++;
23960b57cec5SDimitry Andric lastPkgId = threadInfo[i].pkgId;
23970b57cec5SDimitry Andric if ((int)coreCt > nCoresPerPkg)
23980b57cec5SDimitry Andric nCoresPerPkg = coreCt;
23990b57cec5SDimitry Andric coreCt = 1;
24000b57cec5SDimitry Andric lastCoreId = threadInfo[i].coreId;
24010b57cec5SDimitry Andric if ((int)threadCt > __kmp_nThreadsPerCore)
24020b57cec5SDimitry Andric __kmp_nThreadsPerCore = threadCt;
24030b57cec5SDimitry Andric threadCt = 1;
24040b57cec5SDimitry Andric lastThreadId = threadInfo[i].threadId;
24050b57cec5SDimitry Andric
24060b57cec5SDimitry Andric // This is a different package, so go on to the next iteration without
24070b57cec5SDimitry Andric // doing any consistency checks. Reset the consistency check vars, though.
24080b57cec5SDimitry Andric prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
24090b57cec5SDimitry Andric prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
24100b57cec5SDimitry Andric continue;
24110b57cec5SDimitry Andric }
24120b57cec5SDimitry Andric
24130b57cec5SDimitry Andric if (threadInfo[i].coreId != lastCoreId) {
24140b57cec5SDimitry Andric nCores++;
24150b57cec5SDimitry Andric coreCt++;
24160b57cec5SDimitry Andric lastCoreId = threadInfo[i].coreId;
24170b57cec5SDimitry Andric if ((int)threadCt > __kmp_nThreadsPerCore)
24180b57cec5SDimitry Andric __kmp_nThreadsPerCore = threadCt;
24190b57cec5SDimitry Andric threadCt = 1;
24200b57cec5SDimitry Andric lastThreadId = threadInfo[i].threadId;
24210b57cec5SDimitry Andric } else if (threadInfo[i].threadId != lastThreadId) {
24220b57cec5SDimitry Andric threadCt++;
24230b57cec5SDimitry Andric lastThreadId = threadInfo[i].threadId;
24240b57cec5SDimitry Andric } else {
24250b57cec5SDimitry Andric __kmp_free(threadInfo);
24260b57cec5SDimitry Andric *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
2427fe6060f1SDimitry Andric return false;
24280b57cec5SDimitry Andric }
24290b57cec5SDimitry Andric
24300b57cec5SDimitry Andric // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
24310b57cec5SDimitry Andric // fields agree between all the threads bounds to a given package.
24320b57cec5SDimitry Andric if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) ||
24330b57cec5SDimitry Andric (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
24340b57cec5SDimitry Andric __kmp_free(threadInfo);
24350b57cec5SDimitry Andric *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
2436fe6060f1SDimitry Andric return false;
24370b57cec5SDimitry Andric }
24380b57cec5SDimitry Andric }
2439fe6060f1SDimitry Andric // When affinity is off, this routine will still be called to set
2440fe6060f1SDimitry Andric // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
2441fe6060f1SDimitry Andric // Make sure all these vars are set correctly
24420b57cec5SDimitry Andric nPackages = pkgCt;
24430b57cec5SDimitry Andric if ((int)coreCt > nCoresPerPkg)
24440b57cec5SDimitry Andric nCoresPerPkg = coreCt;
24450b57cec5SDimitry Andric if ((int)threadCt > __kmp_nThreadsPerCore)
24460b57cec5SDimitry Andric __kmp_nThreadsPerCore = threadCt;
24470b57cec5SDimitry Andric __kmp_ncores = nCores;
24480b57cec5SDimitry Andric KMP_DEBUG_ASSERT(nApics == (unsigned)__kmp_avail_proc);
24490b57cec5SDimitry Andric
24500b57cec5SDimitry Andric // Now that we've determined the number of packages, the number of cores per
24510b57cec5SDimitry Andric // package, and the number of threads per core, we can construct the data
24520b57cec5SDimitry Andric // structure that is to be returned.
2453fe6060f1SDimitry Andric int idx = 0;
24540b57cec5SDimitry Andric int pkgLevel = 0;
2455fe6060f1SDimitry Andric int coreLevel = 1;
2456fe6060f1SDimitry Andric int threadLevel = 2;
2457fe6060f1SDimitry Andric //(__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
2458fe6060f1SDimitry Andric int depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
2459fe6060f1SDimitry Andric kmp_hw_t types[3];
2460fe6060f1SDimitry Andric if (pkgLevel >= 0)
2461fe6060f1SDimitry Andric types[idx++] = KMP_HW_SOCKET;
2462fe6060f1SDimitry Andric if (coreLevel >= 0)
2463fe6060f1SDimitry Andric types[idx++] = KMP_HW_CORE;
2464fe6060f1SDimitry Andric if (threadLevel >= 0)
2465fe6060f1SDimitry Andric types[idx++] = KMP_HW_THREAD;
24660b57cec5SDimitry Andric
24670b57cec5SDimitry Andric KMP_ASSERT(depth > 0);
2468fe6060f1SDimitry Andric __kmp_topology = kmp_topology_t::allocate(nApics, depth, types);
24690b57cec5SDimitry Andric
24700b57cec5SDimitry Andric for (i = 0; i < nApics; ++i) {
2471fe6060f1SDimitry Andric idx = 0;
24720b57cec5SDimitry Andric unsigned os = threadInfo[i].osId;
2473fe6060f1SDimitry Andric kmp_hw_thread_t &hw_thread = __kmp_topology->at(i);
2474fe6060f1SDimitry Andric hw_thread.clear();
24750b57cec5SDimitry Andric
24760b57cec5SDimitry Andric if (pkgLevel >= 0) {
2477fe6060f1SDimitry Andric hw_thread.ids[idx++] = threadInfo[i].pkgId;
24780b57cec5SDimitry Andric }
24790b57cec5SDimitry Andric if (coreLevel >= 0) {
2480fe6060f1SDimitry Andric hw_thread.ids[idx++] = threadInfo[i].coreId;
24810b57cec5SDimitry Andric }
24820b57cec5SDimitry Andric if (threadLevel >= 0) {
2483fe6060f1SDimitry Andric hw_thread.ids[idx++] = threadInfo[i].threadId;
24840b57cec5SDimitry Andric }
2485fe6060f1SDimitry Andric hw_thread.os_id = os;
24860b57cec5SDimitry Andric }
24870b57cec5SDimitry Andric
24880b57cec5SDimitry Andric __kmp_free(threadInfo);
2489fe6060f1SDimitry Andric __kmp_topology->sort_ids();
2490fe6060f1SDimitry Andric if (!__kmp_topology->check_ids()) {
2491fe6060f1SDimitry Andric kmp_topology_t::deallocate(__kmp_topology);
2492fe6060f1SDimitry Andric __kmp_topology = nullptr;
2493fe6060f1SDimitry Andric *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
2494fe6060f1SDimitry Andric return false;
2495fe6060f1SDimitry Andric }
2496fe6060f1SDimitry Andric return true;
24970b57cec5SDimitry Andric }
24980b57cec5SDimitry Andric
2499349cc55cSDimitry Andric // Hybrid cpu detection using CPUID.1A
2500349cc55cSDimitry Andric // Thread should be pinned to processor already
__kmp_get_hybrid_info(kmp_hw_core_type_t * type,int * efficiency,unsigned * native_model_id)25010eae32dcSDimitry Andric static void __kmp_get_hybrid_info(kmp_hw_core_type_t *type, int *efficiency,
2502349cc55cSDimitry Andric unsigned *native_model_id) {
2503349cc55cSDimitry Andric kmp_cpuid buf;
2504349cc55cSDimitry Andric __kmp_x86_cpuid(0x1a, 0, &buf);
2505349cc55cSDimitry Andric *type = (kmp_hw_core_type_t)__kmp_extract_bits<24, 31>(buf.eax);
25060eae32dcSDimitry Andric switch (*type) {
25070eae32dcSDimitry Andric case KMP_HW_CORE_TYPE_ATOM:
25080eae32dcSDimitry Andric *efficiency = 0;
25090eae32dcSDimitry Andric break;
25100eae32dcSDimitry Andric case KMP_HW_CORE_TYPE_CORE:
25110eae32dcSDimitry Andric *efficiency = 1;
25120eae32dcSDimitry Andric break;
25130eae32dcSDimitry Andric default:
25140eae32dcSDimitry Andric *efficiency = 0;
25150eae32dcSDimitry Andric }
2516349cc55cSDimitry Andric *native_model_id = __kmp_extract_bits<0, 23>(buf.eax);
2517349cc55cSDimitry Andric }
2518349cc55cSDimitry Andric
25190b57cec5SDimitry Andric // Intel(R) microarchitecture code name Nehalem, Dunnington and later
25200b57cec5SDimitry Andric // architectures support a newer interface for specifying the x2APIC Ids,
2521fe6060f1SDimitry Andric // based on CPUID.B or CPUID.1F
2522fe6060f1SDimitry Andric /*
2523fe6060f1SDimitry Andric * CPUID.B or 1F, Input ECX (sub leaf # aka level number)
2524fe6060f1SDimitry Andric Bits Bits Bits Bits
2525fe6060f1SDimitry Andric 31-16 15-8 7-4 4-0
2526fe6060f1SDimitry Andric ---+-----------+--------------+-------------+-----------------+
2527fe6060f1SDimitry Andric EAX| reserved | reserved | reserved | Bits to Shift |
2528fe6060f1SDimitry Andric ---+-----------|--------------+-------------+-----------------|
2529fe6060f1SDimitry Andric EBX| reserved | Num logical processors at level (16 bits) |
2530fe6060f1SDimitry Andric ---+-----------|--------------+-------------------------------|
2531fe6060f1SDimitry Andric ECX| reserved | Level Type | Level Number (8 bits) |
2532fe6060f1SDimitry Andric ---+-----------+--------------+-------------------------------|
2533fe6060f1SDimitry Andric EDX| X2APIC ID (32 bits) |
2534fe6060f1SDimitry Andric ---+----------------------------------------------------------+
2535fe6060f1SDimitry Andric */
2536fe6060f1SDimitry Andric
2537fe6060f1SDimitry Andric enum {
2538fe6060f1SDimitry Andric INTEL_LEVEL_TYPE_INVALID = 0, // Package level
2539fe6060f1SDimitry Andric INTEL_LEVEL_TYPE_SMT = 1,
2540fe6060f1SDimitry Andric INTEL_LEVEL_TYPE_CORE = 2,
2541bdd1243dSDimitry Andric INTEL_LEVEL_TYPE_MODULE = 3,
2542bdd1243dSDimitry Andric INTEL_LEVEL_TYPE_TILE = 4,
2543fe6060f1SDimitry Andric INTEL_LEVEL_TYPE_DIE = 5,
2544fe6060f1SDimitry Andric INTEL_LEVEL_TYPE_LAST = 6,
2545fe6060f1SDimitry Andric };
2546fe6060f1SDimitry Andric
2547fe6060f1SDimitry Andric struct cpuid_level_info_t {
2548fe6060f1SDimitry Andric unsigned level_type, mask, mask_width, nitems, cache_mask;
2549fe6060f1SDimitry Andric };
2550fe6060f1SDimitry Andric
__kmp_intel_type_2_topology_type(int intel_type)2551fe6060f1SDimitry Andric static kmp_hw_t __kmp_intel_type_2_topology_type(int intel_type) {
2552fe6060f1SDimitry Andric switch (intel_type) {
2553fe6060f1SDimitry Andric case INTEL_LEVEL_TYPE_INVALID:
2554fe6060f1SDimitry Andric return KMP_HW_SOCKET;
2555fe6060f1SDimitry Andric case INTEL_LEVEL_TYPE_SMT:
2556fe6060f1SDimitry Andric return KMP_HW_THREAD;
2557fe6060f1SDimitry Andric case INTEL_LEVEL_TYPE_CORE:
2558fe6060f1SDimitry Andric return KMP_HW_CORE;
2559fe6060f1SDimitry Andric case INTEL_LEVEL_TYPE_TILE:
2560fe6060f1SDimitry Andric return KMP_HW_TILE;
2561fe6060f1SDimitry Andric case INTEL_LEVEL_TYPE_MODULE:
2562fe6060f1SDimitry Andric return KMP_HW_MODULE;
2563fe6060f1SDimitry Andric case INTEL_LEVEL_TYPE_DIE:
2564fe6060f1SDimitry Andric return KMP_HW_DIE;
2565fe6060f1SDimitry Andric }
2566fe6060f1SDimitry Andric return KMP_HW_UNKNOWN;
2567fe6060f1SDimitry Andric }
2568fe6060f1SDimitry Andric
2569fe6060f1SDimitry Andric // This function takes the topology leaf, a levels array to store the levels
2570fe6060f1SDimitry Andric // detected and a bitmap of the known levels.
2571fe6060f1SDimitry Andric // Returns the number of levels in the topology
2572fe6060f1SDimitry Andric static unsigned
__kmp_x2apicid_get_levels(int leaf,cpuid_level_info_t levels[INTEL_LEVEL_TYPE_LAST],kmp_uint64 known_levels)2573fe6060f1SDimitry Andric __kmp_x2apicid_get_levels(int leaf,
2574fe6060f1SDimitry Andric cpuid_level_info_t levels[INTEL_LEVEL_TYPE_LAST],
2575fe6060f1SDimitry Andric kmp_uint64 known_levels) {
2576fe6060f1SDimitry Andric unsigned level, levels_index;
2577fe6060f1SDimitry Andric unsigned level_type, mask_width, nitems;
25780b57cec5SDimitry Andric kmp_cpuid buf;
25790b57cec5SDimitry Andric
2580fe6060f1SDimitry Andric // New algorithm has known topology layers act as highest unknown topology
2581fe6060f1SDimitry Andric // layers when unknown topology layers exist.
2582fe6060f1SDimitry Andric // e.g., Suppose layers were SMT <X> CORE <Y> <Z> PACKAGE, where <X> <Y> <Z>
2583fe6060f1SDimitry Andric // are unknown topology layers, Then SMT will take the characteristics of
2584fe6060f1SDimitry Andric // (SMT x <X>) and CORE will take the characteristics of (CORE x <Y> x <Z>).
2585fe6060f1SDimitry Andric // This eliminates unknown portions of the topology while still keeping the
2586fe6060f1SDimitry Andric // correct structure.
2587fe6060f1SDimitry Andric level = levels_index = 0;
2588fe6060f1SDimitry Andric do {
2589fe6060f1SDimitry Andric __kmp_x86_cpuid(leaf, level, &buf);
2590fe6060f1SDimitry Andric level_type = __kmp_extract_bits<8, 15>(buf.ecx);
2591fe6060f1SDimitry Andric mask_width = __kmp_extract_bits<0, 4>(buf.eax);
2592fe6060f1SDimitry Andric nitems = __kmp_extract_bits<0, 15>(buf.ebx);
2593fe6060f1SDimitry Andric if (level_type != INTEL_LEVEL_TYPE_INVALID && nitems == 0)
2594fe6060f1SDimitry Andric return 0;
25950b57cec5SDimitry Andric
2596fe6060f1SDimitry Andric if (known_levels & (1ull << level_type)) {
2597fe6060f1SDimitry Andric // Add a new level to the topology
2598fe6060f1SDimitry Andric KMP_ASSERT(levels_index < INTEL_LEVEL_TYPE_LAST);
2599fe6060f1SDimitry Andric levels[levels_index].level_type = level_type;
2600fe6060f1SDimitry Andric levels[levels_index].mask_width = mask_width;
2601fe6060f1SDimitry Andric levels[levels_index].nitems = nitems;
2602fe6060f1SDimitry Andric levels_index++;
2603fe6060f1SDimitry Andric } else {
2604fe6060f1SDimitry Andric // If it is an unknown level, then logically move the previous layer up
2605fe6060f1SDimitry Andric if (levels_index > 0) {
2606fe6060f1SDimitry Andric levels[levels_index - 1].mask_width = mask_width;
2607fe6060f1SDimitry Andric levels[levels_index - 1].nitems = nitems;
26080b57cec5SDimitry Andric }
2609fe6060f1SDimitry Andric }
26100b57cec5SDimitry Andric level++;
2611fe6060f1SDimitry Andric } while (level_type != INTEL_LEVEL_TYPE_INVALID);
2612fe6060f1SDimitry Andric
261306c3fb27SDimitry Andric // Ensure the INTEL_LEVEL_TYPE_INVALID (Socket) layer isn't first
261406c3fb27SDimitry Andric if (levels_index == 0 || levels[0].level_type == INTEL_LEVEL_TYPE_INVALID)
261506c3fb27SDimitry Andric return 0;
261606c3fb27SDimitry Andric
2617fe6060f1SDimitry Andric // Set the masks to & with apicid
2618fe6060f1SDimitry Andric for (unsigned i = 0; i < levels_index; ++i) {
2619fe6060f1SDimitry Andric if (levels[i].level_type != INTEL_LEVEL_TYPE_INVALID) {
2620fe6060f1SDimitry Andric levels[i].mask = ~((-1) << levels[i].mask_width);
2621fe6060f1SDimitry Andric levels[i].cache_mask = (-1) << levels[i].mask_width;
2622fe6060f1SDimitry Andric for (unsigned j = 0; j < i; ++j)
2623fe6060f1SDimitry Andric levels[i].mask ^= levels[j].mask;
2624fe6060f1SDimitry Andric } else {
262506c3fb27SDimitry Andric KMP_DEBUG_ASSERT(i > 0);
2626fe6060f1SDimitry Andric levels[i].mask = (-1) << levels[i - 1].mask_width;
2627fe6060f1SDimitry Andric levels[i].cache_mask = 0;
26280b57cec5SDimitry Andric }
2629fe6060f1SDimitry Andric }
2630fe6060f1SDimitry Andric return levels_index;
2631fe6060f1SDimitry Andric }
2632fe6060f1SDimitry Andric
__kmp_affinity_create_x2apicid_map(kmp_i18n_id_t * const msg_id)2633fe6060f1SDimitry Andric static bool __kmp_affinity_create_x2apicid_map(kmp_i18n_id_t *const msg_id) {
2634fe6060f1SDimitry Andric
2635fe6060f1SDimitry Andric cpuid_level_info_t levels[INTEL_LEVEL_TYPE_LAST];
2636fe6060f1SDimitry Andric kmp_hw_t types[INTEL_LEVEL_TYPE_LAST];
2637fe6060f1SDimitry Andric unsigned levels_index;
2638fe6060f1SDimitry Andric kmp_cpuid buf;
2639fe6060f1SDimitry Andric kmp_uint64 known_levels;
2640fe6060f1SDimitry Andric int topology_leaf, highest_leaf, apic_id;
2641fe6060f1SDimitry Andric int num_leaves;
2642fe6060f1SDimitry Andric static int leaves[] = {0, 0};
2643fe6060f1SDimitry Andric
2644fe6060f1SDimitry Andric kmp_i18n_id_t leaf_message_id;
2645fe6060f1SDimitry Andric
2646fe6060f1SDimitry Andric KMP_BUILD_ASSERT(sizeof(known_levels) * CHAR_BIT > KMP_HW_LAST);
2647fe6060f1SDimitry Andric
2648fe6060f1SDimitry Andric *msg_id = kmp_i18n_null;
2649bdd1243dSDimitry Andric if (__kmp_affinity.flags.verbose) {
2650fe6060f1SDimitry Andric KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
2651fe6060f1SDimitry Andric }
2652fe6060f1SDimitry Andric
2653fe6060f1SDimitry Andric // Figure out the known topology levels
2654fe6060f1SDimitry Andric known_levels = 0ull;
2655fe6060f1SDimitry Andric for (int i = 0; i < INTEL_LEVEL_TYPE_LAST; ++i) {
2656fe6060f1SDimitry Andric if (__kmp_intel_type_2_topology_type(i) != KMP_HW_UNKNOWN) {
2657fe6060f1SDimitry Andric known_levels |= (1ull << i);
2658fe6060f1SDimitry Andric }
2659fe6060f1SDimitry Andric }
2660fe6060f1SDimitry Andric
2661fe6060f1SDimitry Andric // Get the highest cpuid leaf supported
2662fe6060f1SDimitry Andric __kmp_x86_cpuid(0, 0, &buf);
2663fe6060f1SDimitry Andric highest_leaf = buf.eax;
2664fe6060f1SDimitry Andric
2665fe6060f1SDimitry Andric // If a specific topology method was requested, only allow that specific leaf
2666fe6060f1SDimitry Andric // otherwise, try both leaves 31 and 11 in that order
2667fe6060f1SDimitry Andric num_leaves = 0;
2668fe6060f1SDimitry Andric if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
2669fe6060f1SDimitry Andric num_leaves = 1;
2670fe6060f1SDimitry Andric leaves[0] = 11;
2671fe6060f1SDimitry Andric leaf_message_id = kmp_i18n_str_NoLeaf11Support;
2672fe6060f1SDimitry Andric } else if (__kmp_affinity_top_method == affinity_top_method_x2apicid_1f) {
2673fe6060f1SDimitry Andric num_leaves = 1;
2674fe6060f1SDimitry Andric leaves[0] = 31;
2675fe6060f1SDimitry Andric leaf_message_id = kmp_i18n_str_NoLeaf31Support;
2676fe6060f1SDimitry Andric } else {
2677fe6060f1SDimitry Andric num_leaves = 2;
2678fe6060f1SDimitry Andric leaves[0] = 31;
2679fe6060f1SDimitry Andric leaves[1] = 11;
2680fe6060f1SDimitry Andric leaf_message_id = kmp_i18n_str_NoLeaf11Support;
2681fe6060f1SDimitry Andric }
2682fe6060f1SDimitry Andric
2683fe6060f1SDimitry Andric // Check to see if cpuid leaf 31 or 11 is supported.
2684fe6060f1SDimitry Andric __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
2685fe6060f1SDimitry Andric topology_leaf = -1;
2686fe6060f1SDimitry Andric for (int i = 0; i < num_leaves; ++i) {
2687fe6060f1SDimitry Andric int leaf = leaves[i];
2688fe6060f1SDimitry Andric if (highest_leaf < leaf)
2689fe6060f1SDimitry Andric continue;
2690fe6060f1SDimitry Andric __kmp_x86_cpuid(leaf, 0, &buf);
2691fe6060f1SDimitry Andric if (buf.ebx == 0)
2692fe6060f1SDimitry Andric continue;
2693fe6060f1SDimitry Andric topology_leaf = leaf;
2694fe6060f1SDimitry Andric levels_index = __kmp_x2apicid_get_levels(leaf, levels, known_levels);
2695fe6060f1SDimitry Andric if (levels_index == 0)
2696fe6060f1SDimitry Andric continue;
26970b57cec5SDimitry Andric break;
26980b57cec5SDimitry Andric }
2699fe6060f1SDimitry Andric if (topology_leaf == -1 || levels_index == 0) {
2700fe6060f1SDimitry Andric *msg_id = leaf_message_id;
2701fe6060f1SDimitry Andric return false;
27020b57cec5SDimitry Andric }
2703fe6060f1SDimitry Andric KMP_ASSERT(levels_index <= INTEL_LEVEL_TYPE_LAST);
27040b57cec5SDimitry Andric
27050b57cec5SDimitry Andric // The algorithm used starts by setting the affinity to each available thread
27060b57cec5SDimitry Andric // and retrieving info from the cpuid instruction, so if we are not capable of
2707fe6060f1SDimitry Andric // calling __kmp_get_system_affinity() and __kmp_get_system_affinity(), then
2708fe6060f1SDimitry Andric // we need to do something else - use the defaults that we calculated from
27090b57cec5SDimitry Andric // issuing cpuid without binding to each proc.
27100b57cec5SDimitry Andric if (!KMP_AFFINITY_CAPABLE()) {
27110b57cec5SDimitry Andric // Hack to try and infer the machine topology using only the data
27120b57cec5SDimitry Andric // available from cpuid on the current thread, and __kmp_xproc.
2713bdd1243dSDimitry Andric KMP_ASSERT(__kmp_affinity.type == affinity_none);
2714fe6060f1SDimitry Andric for (unsigned i = 0; i < levels_index; ++i) {
2715fe6060f1SDimitry Andric if (levels[i].level_type == INTEL_LEVEL_TYPE_SMT) {
2716fe6060f1SDimitry Andric __kmp_nThreadsPerCore = levels[i].nitems;
2717fe6060f1SDimitry Andric } else if (levels[i].level_type == INTEL_LEVEL_TYPE_CORE) {
2718fe6060f1SDimitry Andric nCoresPerPkg = levels[i].nitems;
2719fe6060f1SDimitry Andric }
2720fe6060f1SDimitry Andric }
27210b57cec5SDimitry Andric __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
27220b57cec5SDimitry Andric nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
2723fe6060f1SDimitry Andric return true;
27240b57cec5SDimitry Andric }
2725fe6060f1SDimitry Andric
2726fe6060f1SDimitry Andric // Allocate the data structure to be returned.
2727fe6060f1SDimitry Andric int depth = levels_index;
2728fe6060f1SDimitry Andric for (int i = depth - 1, j = 0; i >= 0; --i, ++j)
2729fe6060f1SDimitry Andric types[j] = __kmp_intel_type_2_topology_type(levels[i].level_type);
2730fe6060f1SDimitry Andric __kmp_topology =
2731fe6060f1SDimitry Andric kmp_topology_t::allocate(__kmp_avail_proc, levels_index, types);
2732fe6060f1SDimitry Andric
2733fe6060f1SDimitry Andric // Insert equivalent cache types if they exist
2734fe6060f1SDimitry Andric kmp_cache_info_t cache_info;
2735fe6060f1SDimitry Andric for (size_t i = 0; i < cache_info.get_depth(); ++i) {
2736fe6060f1SDimitry Andric const kmp_cache_info_t::info_t &info = cache_info[i];
2737fe6060f1SDimitry Andric unsigned cache_mask = info.mask;
2738fe6060f1SDimitry Andric unsigned cache_level = info.level;
2739fe6060f1SDimitry Andric for (unsigned j = 0; j < levels_index; ++j) {
2740fe6060f1SDimitry Andric unsigned hw_cache_mask = levels[j].cache_mask;
2741fe6060f1SDimitry Andric kmp_hw_t cache_type = kmp_cache_info_t::get_topology_type(cache_level);
2742fe6060f1SDimitry Andric if (hw_cache_mask == cache_mask && j < levels_index - 1) {
2743fe6060f1SDimitry Andric kmp_hw_t type =
2744fe6060f1SDimitry Andric __kmp_intel_type_2_topology_type(levels[j + 1].level_type);
2745fe6060f1SDimitry Andric __kmp_topology->set_equivalent_type(cache_type, type);
27460b57cec5SDimitry Andric }
2747fe6060f1SDimitry Andric }
27480b57cec5SDimitry Andric }
27490b57cec5SDimitry Andric
27500b57cec5SDimitry Andric // From here on, we can assume that it is safe to call
27510b57cec5SDimitry Andric // __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
2752bdd1243dSDimitry Andric // __kmp_affinity.type = affinity_none.
27530b57cec5SDimitry Andric
27540b57cec5SDimitry Andric // Save the affinity mask for the current thread.
2755fe6060f1SDimitry Andric kmp_affinity_raii_t previous_affinity;
27560b57cec5SDimitry Andric
27570b57cec5SDimitry Andric // Run through each of the available contexts, binding the current thread
27580b57cec5SDimitry Andric // to it, and obtaining the pertinent information using the cpuid instr.
27590b57cec5SDimitry Andric unsigned int proc;
2760fe6060f1SDimitry Andric int hw_thread_index = 0;
27610b57cec5SDimitry Andric KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
2762fe6060f1SDimitry Andric cpuid_level_info_t my_levels[INTEL_LEVEL_TYPE_LAST];
2763fe6060f1SDimitry Andric unsigned my_levels_index;
2764fe6060f1SDimitry Andric
27650b57cec5SDimitry Andric // Skip this proc if it is not included in the machine model.
27660b57cec5SDimitry Andric if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
27670b57cec5SDimitry Andric continue;
27680b57cec5SDimitry Andric }
2769fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(hw_thread_index < __kmp_avail_proc);
27700b57cec5SDimitry Andric
27710b57cec5SDimitry Andric __kmp_affinity_dispatch->bind_thread(proc);
27720b57cec5SDimitry Andric
2773fe6060f1SDimitry Andric // New algorithm
2774fe6060f1SDimitry Andric __kmp_x86_cpuid(topology_leaf, 0, &buf);
2775fe6060f1SDimitry Andric apic_id = buf.edx;
2776fe6060f1SDimitry Andric kmp_hw_thread_t &hw_thread = __kmp_topology->at(hw_thread_index);
2777fe6060f1SDimitry Andric my_levels_index =
2778fe6060f1SDimitry Andric __kmp_x2apicid_get_levels(topology_leaf, my_levels, known_levels);
2779fe6060f1SDimitry Andric if (my_levels_index == 0 || my_levels_index != levels_index) {
2780fe6060f1SDimitry Andric *msg_id = kmp_i18n_str_InvalidCpuidInfo;
2781fe6060f1SDimitry Andric return false;
27820b57cec5SDimitry Andric }
2783fe6060f1SDimitry Andric hw_thread.clear();
2784fe6060f1SDimitry Andric hw_thread.os_id = proc;
2785fe6060f1SDimitry Andric // Put in topology information
2786fe6060f1SDimitry Andric for (unsigned j = 0, idx = depth - 1; j < my_levels_index; ++j, --idx) {
2787fe6060f1SDimitry Andric hw_thread.ids[idx] = apic_id & my_levels[j].mask;
2788fe6060f1SDimitry Andric if (j > 0) {
2789fe6060f1SDimitry Andric hw_thread.ids[idx] >>= my_levels[j - 1].mask_width;
27900b57cec5SDimitry Andric }
27910b57cec5SDimitry Andric }
2792349cc55cSDimitry Andric // Hybrid information
2793349cc55cSDimitry Andric if (__kmp_is_hybrid_cpu() && highest_leaf >= 0x1a) {
2794349cc55cSDimitry Andric kmp_hw_core_type_t type;
2795349cc55cSDimitry Andric unsigned native_model_id;
27960eae32dcSDimitry Andric int efficiency;
27970eae32dcSDimitry Andric __kmp_get_hybrid_info(&type, &efficiency, &native_model_id);
27980eae32dcSDimitry Andric hw_thread.attrs.set_core_type(type);
27990eae32dcSDimitry Andric hw_thread.attrs.set_core_eff(efficiency);
2800349cc55cSDimitry Andric }
2801fe6060f1SDimitry Andric hw_thread_index++;
28020b57cec5SDimitry Andric }
2803fe6060f1SDimitry Andric KMP_ASSERT(hw_thread_index > 0);
2804fe6060f1SDimitry Andric __kmp_topology->sort_ids();
2805fe6060f1SDimitry Andric if (!__kmp_topology->check_ids()) {
2806fe6060f1SDimitry Andric kmp_topology_t::deallocate(__kmp_topology);
2807fe6060f1SDimitry Andric __kmp_topology = nullptr;
28080b57cec5SDimitry Andric *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
2809fe6060f1SDimitry Andric return false;
28100b57cec5SDimitry Andric }
2811fe6060f1SDimitry Andric return true;
28120b57cec5SDimitry Andric }
28130b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
28140b57cec5SDimitry Andric
28150b57cec5SDimitry Andric #define osIdIndex 0
28160b57cec5SDimitry Andric #define threadIdIndex 1
28170b57cec5SDimitry Andric #define coreIdIndex 2
28180b57cec5SDimitry Andric #define pkgIdIndex 3
28190b57cec5SDimitry Andric #define nodeIdIndex 4
28200b57cec5SDimitry Andric
28210b57cec5SDimitry Andric typedef unsigned *ProcCpuInfo;
28220b57cec5SDimitry Andric static unsigned maxIndex = pkgIdIndex;
28230b57cec5SDimitry Andric
__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void * a,const void * b)28240b57cec5SDimitry Andric static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a,
28250b57cec5SDimitry Andric const void *b) {
28260b57cec5SDimitry Andric unsigned i;
28270b57cec5SDimitry Andric const unsigned *aa = *(unsigned *const *)a;
28280b57cec5SDimitry Andric const unsigned *bb = *(unsigned *const *)b;
28290b57cec5SDimitry Andric for (i = maxIndex;; i--) {
28300b57cec5SDimitry Andric if (aa[i] < bb[i])
28310b57cec5SDimitry Andric return -1;
28320b57cec5SDimitry Andric if (aa[i] > bb[i])
28330b57cec5SDimitry Andric return 1;
28340b57cec5SDimitry Andric if (i == osIdIndex)
28350b57cec5SDimitry Andric break;
28360b57cec5SDimitry Andric }
28370b57cec5SDimitry Andric return 0;
28380b57cec5SDimitry Andric }
28390b57cec5SDimitry Andric
28400b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED
28410b57cec5SDimitry Andric // Set the array sizes for the hierarchy layers
__kmp_dispatch_set_hierarchy_values()28420b57cec5SDimitry Andric static void __kmp_dispatch_set_hierarchy_values() {
28430b57cec5SDimitry Andric // Set the maximum number of L1's to number of cores
28445f757f3fSDimitry Andric // Set the maximum number of L2's to either number of cores / 2 for
28450b57cec5SDimitry Andric // Intel(R) Xeon Phi(TM) coprocessor formally codenamed Knights Landing
28460b57cec5SDimitry Andric // Or the number of cores for Intel(R) Xeon(R) processors
28470b57cec5SDimitry Andric // Set the maximum number of NUMA nodes and L3's to number of packages
28480b57cec5SDimitry Andric __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] =
28490b57cec5SDimitry Andric nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
28500b57cec5SDimitry Andric __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores;
2851*0fca6ea1SDimitry Andric #if KMP_ARCH_X86_64 && \
2852*0fca6ea1SDimitry Andric (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY || \
2853*0fca6ea1SDimitry Andric KMP_OS_WINDOWS) && \
28545ffd83dbSDimitry Andric KMP_MIC_SUPPORTED
28550b57cec5SDimitry Andric if (__kmp_mic_type >= mic3)
28560b57cec5SDimitry Andric __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2;
28570b57cec5SDimitry Andric else
28580b57cec5SDimitry Andric #endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
28590b57cec5SDimitry Andric __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores;
28600b57cec5SDimitry Andric __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L3 + 1] = nPackages;
28610b57cec5SDimitry Andric __kmp_hier_max_units[kmp_hier_layer_e::LAYER_NUMA + 1] = nPackages;
28620b57cec5SDimitry Andric __kmp_hier_max_units[kmp_hier_layer_e::LAYER_LOOP + 1] = 1;
28630b57cec5SDimitry Andric // Set the number of threads per unit
28640b57cec5SDimitry Andric // Number of hardware threads per L1/L2/L3/NUMA/LOOP
28650b57cec5SDimitry Andric __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1;
28660b57cec5SDimitry Andric __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] =
28670b57cec5SDimitry Andric __kmp_nThreadsPerCore;
2868*0fca6ea1SDimitry Andric #if KMP_ARCH_X86_64 && \
2869*0fca6ea1SDimitry Andric (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY || \
2870*0fca6ea1SDimitry Andric KMP_OS_WINDOWS) && \
28715ffd83dbSDimitry Andric KMP_MIC_SUPPORTED
28720b57cec5SDimitry Andric if (__kmp_mic_type >= mic3)
28730b57cec5SDimitry Andric __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
28740b57cec5SDimitry Andric 2 * __kmp_nThreadsPerCore;
28750b57cec5SDimitry Andric else
28760b57cec5SDimitry Andric #endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
28770b57cec5SDimitry Andric __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
28780b57cec5SDimitry Andric __kmp_nThreadsPerCore;
28790b57cec5SDimitry Andric __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L3 + 1] =
28800b57cec5SDimitry Andric nCoresPerPkg * __kmp_nThreadsPerCore;
28810b57cec5SDimitry Andric __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_NUMA + 1] =
28820b57cec5SDimitry Andric nCoresPerPkg * __kmp_nThreadsPerCore;
28830b57cec5SDimitry Andric __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LOOP + 1] =
28840b57cec5SDimitry Andric nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
28850b57cec5SDimitry Andric }
28860b57cec5SDimitry Andric
28870b57cec5SDimitry Andric // Return the index into the hierarchy for this tid and layer type (L1, L2, etc)
28880b57cec5SDimitry Andric // i.e., this thread's L1 or this thread's L2, etc.
__kmp_dispatch_get_index(int tid,kmp_hier_layer_e type)28890b57cec5SDimitry Andric int __kmp_dispatch_get_index(int tid, kmp_hier_layer_e type) {
28900b57cec5SDimitry Andric int index = type + 1;
28910b57cec5SDimitry Andric int num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1];
28920b57cec5SDimitry Andric KMP_DEBUG_ASSERT(type != kmp_hier_layer_e::LAYER_LAST);
28930b57cec5SDimitry Andric if (type == kmp_hier_layer_e::LAYER_THREAD)
28940b57cec5SDimitry Andric return tid;
28950b57cec5SDimitry Andric else if (type == kmp_hier_layer_e::LAYER_LOOP)
28960b57cec5SDimitry Andric return 0;
28970b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_hier_max_units[index] != 0);
28980b57cec5SDimitry Andric if (tid >= num_hw_threads)
28990b57cec5SDimitry Andric tid = tid % num_hw_threads;
29000b57cec5SDimitry Andric return (tid / __kmp_hier_threads_per[index]) % __kmp_hier_max_units[index];
29010b57cec5SDimitry Andric }
29020b57cec5SDimitry Andric
29030b57cec5SDimitry Andric // Return the number of t1's per t2
__kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1,kmp_hier_layer_e t2)29040b57cec5SDimitry Andric int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1, kmp_hier_layer_e t2) {
29050b57cec5SDimitry Andric int i1 = t1 + 1;
29060b57cec5SDimitry Andric int i2 = t2 + 1;
29070b57cec5SDimitry Andric KMP_DEBUG_ASSERT(i1 <= i2);
29080b57cec5SDimitry Andric KMP_DEBUG_ASSERT(t1 != kmp_hier_layer_e::LAYER_LAST);
29090b57cec5SDimitry Andric KMP_DEBUG_ASSERT(t2 != kmp_hier_layer_e::LAYER_LAST);
29100b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_hier_threads_per[i1] != 0);
29110b57cec5SDimitry Andric // (nthreads/t2) / (nthreads/t1) = t1 / t2
29120b57cec5SDimitry Andric return __kmp_hier_threads_per[i2] / __kmp_hier_threads_per[i1];
29130b57cec5SDimitry Andric }
29140b57cec5SDimitry Andric #endif // KMP_USE_HIER_SCHED
29150b57cec5SDimitry Andric
__kmp_cpuinfo_get_filename()2916fe6060f1SDimitry Andric static inline const char *__kmp_cpuinfo_get_filename() {
2917fe6060f1SDimitry Andric const char *filename;
2918fe6060f1SDimitry Andric if (__kmp_cpuinfo_file != nullptr)
2919fe6060f1SDimitry Andric filename = __kmp_cpuinfo_file;
2920fe6060f1SDimitry Andric else
2921fe6060f1SDimitry Andric filename = "/proc/cpuinfo";
2922fe6060f1SDimitry Andric return filename;
2923fe6060f1SDimitry Andric }
2924fe6060f1SDimitry Andric
__kmp_cpuinfo_get_envvar()2925fe6060f1SDimitry Andric static inline const char *__kmp_cpuinfo_get_envvar() {
2926fe6060f1SDimitry Andric const char *envvar = nullptr;
2927fe6060f1SDimitry Andric if (__kmp_cpuinfo_file != nullptr)
2928fe6060f1SDimitry Andric envvar = "KMP_CPUINFO_FILE";
2929fe6060f1SDimitry Andric return envvar;
2930fe6060f1SDimitry Andric }
2931fe6060f1SDimitry Andric
29320b57cec5SDimitry Andric // Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
2933439352acSDimitry Andric // affinity map. On AIX, the map is obtained through system SRAD (Scheduler
2934439352acSDimitry Andric // Resource Allocation Domain).
__kmp_affinity_create_cpuinfo_map(int * line,kmp_i18n_id_t * const msg_id)2935fe6060f1SDimitry Andric static bool __kmp_affinity_create_cpuinfo_map(int *line,
2936fe6060f1SDimitry Andric kmp_i18n_id_t *const msg_id) {
2937439352acSDimitry Andric *msg_id = kmp_i18n_null;
2938439352acSDimitry Andric
2939439352acSDimitry Andric #if KMP_OS_AIX
2940439352acSDimitry Andric unsigned num_records = __kmp_xproc;
2941439352acSDimitry Andric #else
2942fe6060f1SDimitry Andric const char *filename = __kmp_cpuinfo_get_filename();
2943fe6060f1SDimitry Andric const char *envvar = __kmp_cpuinfo_get_envvar();
29440b57cec5SDimitry Andric
2945bdd1243dSDimitry Andric if (__kmp_affinity.flags.verbose) {
2946fe6060f1SDimitry Andric KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
2947fe6060f1SDimitry Andric }
2948fe6060f1SDimitry Andric
2949fe6060f1SDimitry Andric kmp_safe_raii_file_t f(filename, "r", envvar);
2950fe6060f1SDimitry Andric
29510b57cec5SDimitry Andric // Scan of the file, and count the number of "processor" (osId) fields,
29520b57cec5SDimitry Andric // and find the highest value of <n> for a node_<n> field.
29530b57cec5SDimitry Andric char buf[256];
29540b57cec5SDimitry Andric unsigned num_records = 0;
29550b57cec5SDimitry Andric while (!feof(f)) {
29560b57cec5SDimitry Andric buf[sizeof(buf) - 1] = 1;
29570b57cec5SDimitry Andric if (!fgets(buf, sizeof(buf), f)) {
29580b57cec5SDimitry Andric // Read errors presumably because of EOF
29590b57cec5SDimitry Andric break;
29600b57cec5SDimitry Andric }
29610b57cec5SDimitry Andric
29620b57cec5SDimitry Andric char s1[] = "processor";
29630b57cec5SDimitry Andric if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
29640b57cec5SDimitry Andric num_records++;
29650b57cec5SDimitry Andric continue;
29660b57cec5SDimitry Andric }
29670b57cec5SDimitry Andric
29680b57cec5SDimitry Andric // FIXME - this will match "node_<n> <garbage>"
29690b57cec5SDimitry Andric unsigned level;
29700b57cec5SDimitry Andric if (KMP_SSCANF(buf, "node_%u id", &level) == 1) {
2971fe6060f1SDimitry Andric // validate the input fisrt:
2972fe6060f1SDimitry Andric if (level > (unsigned)__kmp_xproc) { // level is too big
2973fe6060f1SDimitry Andric level = __kmp_xproc;
2974fe6060f1SDimitry Andric }
29750b57cec5SDimitry Andric if (nodeIdIndex + level >= maxIndex) {
29760b57cec5SDimitry Andric maxIndex = nodeIdIndex + level;
29770b57cec5SDimitry Andric }
29780b57cec5SDimitry Andric continue;
29790b57cec5SDimitry Andric }
29800b57cec5SDimitry Andric }
29810b57cec5SDimitry Andric
29820b57cec5SDimitry Andric // Check for empty file / no valid processor records, or too many. The number
29830b57cec5SDimitry Andric // of records can't exceed the number of valid bits in the affinity mask.
29840b57cec5SDimitry Andric if (num_records == 0) {
29850b57cec5SDimitry Andric *msg_id = kmp_i18n_str_NoProcRecords;
2986fe6060f1SDimitry Andric return false;
29870b57cec5SDimitry Andric }
29880b57cec5SDimitry Andric if (num_records > (unsigned)__kmp_xproc) {
29890b57cec5SDimitry Andric *msg_id = kmp_i18n_str_TooManyProcRecords;
2990fe6060f1SDimitry Andric return false;
29910b57cec5SDimitry Andric }
29920b57cec5SDimitry Andric
2993480093f4SDimitry Andric // Set the file pointer back to the beginning, so that we can scan the file
29940b57cec5SDimitry Andric // again, this time performing a full parse of the data. Allocate a vector of
29950b57cec5SDimitry Andric // ProcCpuInfo object, where we will place the data. Adding an extra element
29960b57cec5SDimitry Andric // at the end allows us to remove a lot of extra checks for termination
29970b57cec5SDimitry Andric // conditions.
29980b57cec5SDimitry Andric if (fseek(f, 0, SEEK_SET) != 0) {
29990b57cec5SDimitry Andric *msg_id = kmp_i18n_str_CantRewindCpuinfo;
3000fe6060f1SDimitry Andric return false;
30010b57cec5SDimitry Andric }
3002439352acSDimitry Andric #endif // KMP_OS_AIX
30030b57cec5SDimitry Andric
30040b57cec5SDimitry Andric // Allocate the array of records to store the proc info in. The dummy
30050b57cec5SDimitry Andric // element at the end makes the logic in filling them out easier to code.
30060b57cec5SDimitry Andric unsigned **threadInfo =
30070b57cec5SDimitry Andric (unsigned **)__kmp_allocate((num_records + 1) * sizeof(unsigned *));
30080b57cec5SDimitry Andric unsigned i;
30090b57cec5SDimitry Andric for (i = 0; i <= num_records; i++) {
30100b57cec5SDimitry Andric threadInfo[i] =
30110b57cec5SDimitry Andric (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
30120b57cec5SDimitry Andric }
30130b57cec5SDimitry Andric
30140b57cec5SDimitry Andric #define CLEANUP_THREAD_INFO \
30150b57cec5SDimitry Andric for (i = 0; i <= num_records; i++) { \
30160b57cec5SDimitry Andric __kmp_free(threadInfo[i]); \
30170b57cec5SDimitry Andric } \
30180b57cec5SDimitry Andric __kmp_free(threadInfo);
30190b57cec5SDimitry Andric
30200b57cec5SDimitry Andric // A value of UINT_MAX means that we didn't find the field
30210b57cec5SDimitry Andric unsigned __index;
30220b57cec5SDimitry Andric
30230b57cec5SDimitry Andric #define INIT_PROC_INFO(p) \
30240b57cec5SDimitry Andric for (__index = 0; __index <= maxIndex; __index++) { \
30250b57cec5SDimitry Andric (p)[__index] = UINT_MAX; \
30260b57cec5SDimitry Andric }
30270b57cec5SDimitry Andric
30280b57cec5SDimitry Andric for (i = 0; i <= num_records; i++) {
30290b57cec5SDimitry Andric INIT_PROC_INFO(threadInfo[i]);
30300b57cec5SDimitry Andric }
30310b57cec5SDimitry Andric
3032439352acSDimitry Andric #if KMP_OS_AIX
3033439352acSDimitry Andric int smt_threads;
3034439352acSDimitry Andric lpar_info_format1_t cpuinfo;
3035439352acSDimitry Andric unsigned num_avail = __kmp_xproc;
3036439352acSDimitry Andric
3037439352acSDimitry Andric if (__kmp_affinity.flags.verbose)
3038439352acSDimitry Andric KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "system info for topology");
3039439352acSDimitry Andric
3040439352acSDimitry Andric // Get the number of SMT threads per core.
3041*0fca6ea1SDimitry Andric smt_threads = syssmt(GET_NUMBER_SMT_SETS, 0, 0, NULL);
3042439352acSDimitry Andric
3043439352acSDimitry Andric // Allocate a resource set containing available system resourses.
3044439352acSDimitry Andric rsethandle_t sys_rset = rs_alloc(RS_SYSTEM);
3045439352acSDimitry Andric if (sys_rset == NULL) {
3046439352acSDimitry Andric CLEANUP_THREAD_INFO;
3047439352acSDimitry Andric *msg_id = kmp_i18n_str_UnknownTopology;
3048439352acSDimitry Andric return false;
3049439352acSDimitry Andric }
3050439352acSDimitry Andric // Allocate a resource set for the SRAD info.
3051439352acSDimitry Andric rsethandle_t srad = rs_alloc(RS_EMPTY);
3052439352acSDimitry Andric if (srad == NULL) {
3053439352acSDimitry Andric rs_free(sys_rset);
3054439352acSDimitry Andric CLEANUP_THREAD_INFO;
3055439352acSDimitry Andric *msg_id = kmp_i18n_str_UnknownTopology;
3056439352acSDimitry Andric return false;
3057439352acSDimitry Andric }
3058439352acSDimitry Andric
3059439352acSDimitry Andric // Get the SRAD system detail level.
3060439352acSDimitry Andric int sradsdl = rs_getinfo(NULL, R_SRADSDL, 0);
3061439352acSDimitry Andric if (sradsdl < 0) {
3062439352acSDimitry Andric rs_free(sys_rset);
3063439352acSDimitry Andric rs_free(srad);
3064439352acSDimitry Andric CLEANUP_THREAD_INFO;
3065439352acSDimitry Andric *msg_id = kmp_i18n_str_UnknownTopology;
3066439352acSDimitry Andric return false;
3067439352acSDimitry Andric }
3068439352acSDimitry Andric // Get the number of RADs at that SRAD SDL.
3069439352acSDimitry Andric int num_rads = rs_numrads(sys_rset, sradsdl, 0);
3070439352acSDimitry Andric if (num_rads < 0) {
3071439352acSDimitry Andric rs_free(sys_rset);
3072439352acSDimitry Andric rs_free(srad);
3073439352acSDimitry Andric CLEANUP_THREAD_INFO;
3074439352acSDimitry Andric *msg_id = kmp_i18n_str_UnknownTopology;
3075439352acSDimitry Andric return false;
3076439352acSDimitry Andric }
3077439352acSDimitry Andric
3078439352acSDimitry Andric // Get the maximum number of procs that may be contained in a resource set.
3079439352acSDimitry Andric int max_procs = rs_getinfo(NULL, R_MAXPROCS, 0);
3080439352acSDimitry Andric if (max_procs < 0) {
3081439352acSDimitry Andric rs_free(sys_rset);
3082439352acSDimitry Andric rs_free(srad);
3083439352acSDimitry Andric CLEANUP_THREAD_INFO;
3084439352acSDimitry Andric *msg_id = kmp_i18n_str_UnknownTopology;
3085439352acSDimitry Andric return false;
3086439352acSDimitry Andric }
3087439352acSDimitry Andric
3088439352acSDimitry Andric int cur_rad = 0;
3089439352acSDimitry Andric int num_set = 0;
3090439352acSDimitry Andric for (int srad_idx = 0; cur_rad < num_rads && srad_idx < VMI_MAXRADS;
3091439352acSDimitry Andric ++srad_idx) {
3092439352acSDimitry Andric // Check if the SRAD is available in the RSET.
3093439352acSDimitry Andric if (rs_getrad(sys_rset, srad, sradsdl, srad_idx, 0) < 0)
3094439352acSDimitry Andric continue;
3095439352acSDimitry Andric
3096439352acSDimitry Andric for (int cpu = 0; cpu < max_procs; cpu++) {
3097439352acSDimitry Andric // Set the info for the cpu if it is in the SRAD.
3098439352acSDimitry Andric if (rs_op(RS_TESTRESOURCE, srad, NULL, R_PROCS, cpu)) {
3099439352acSDimitry Andric threadInfo[cpu][osIdIndex] = cpu;
3100439352acSDimitry Andric threadInfo[cpu][pkgIdIndex] = cur_rad;
3101439352acSDimitry Andric threadInfo[cpu][coreIdIndex] = cpu / smt_threads;
3102439352acSDimitry Andric ++num_set;
3103439352acSDimitry Andric if (num_set >= num_avail) {
3104439352acSDimitry Andric // Done if all available CPUs have been set.
3105439352acSDimitry Andric break;
3106439352acSDimitry Andric }
3107439352acSDimitry Andric }
3108439352acSDimitry Andric }
3109439352acSDimitry Andric ++cur_rad;
3110439352acSDimitry Andric }
3111439352acSDimitry Andric rs_free(sys_rset);
3112439352acSDimitry Andric rs_free(srad);
3113439352acSDimitry Andric
3114439352acSDimitry Andric // The topology is already sorted.
3115439352acSDimitry Andric
3116439352acSDimitry Andric #else // !KMP_OS_AIX
31170b57cec5SDimitry Andric unsigned num_avail = 0;
31180b57cec5SDimitry Andric *line = 0;
31195f757f3fSDimitry Andric #if KMP_ARCH_S390X
31205f757f3fSDimitry Andric bool reading_s390x_sys_info = true;
31215f757f3fSDimitry Andric #endif
31220b57cec5SDimitry Andric while (!feof(f)) {
31230b57cec5SDimitry Andric // Create an inner scoping level, so that all the goto targets at the end of
31240b57cec5SDimitry Andric // the loop appear in an outer scoping level. This avoids warnings about
31250b57cec5SDimitry Andric // jumping past an initialization to a target in the same block.
31260b57cec5SDimitry Andric {
31270b57cec5SDimitry Andric buf[sizeof(buf) - 1] = 1;
31280b57cec5SDimitry Andric bool long_line = false;
31290b57cec5SDimitry Andric if (!fgets(buf, sizeof(buf), f)) {
31300b57cec5SDimitry Andric // Read errors presumably because of EOF
31310b57cec5SDimitry Andric // If there is valid data in threadInfo[num_avail], then fake
31320b57cec5SDimitry Andric // a blank line in ensure that the last address gets parsed.
31330b57cec5SDimitry Andric bool valid = false;
31340b57cec5SDimitry Andric for (i = 0; i <= maxIndex; i++) {
31350b57cec5SDimitry Andric if (threadInfo[num_avail][i] != UINT_MAX) {
31360b57cec5SDimitry Andric valid = true;
31370b57cec5SDimitry Andric }
31380b57cec5SDimitry Andric }
31390b57cec5SDimitry Andric if (!valid) {
31400b57cec5SDimitry Andric break;
31410b57cec5SDimitry Andric }
31420b57cec5SDimitry Andric buf[0] = 0;
31430b57cec5SDimitry Andric } else if (!buf[sizeof(buf) - 1]) {
31440b57cec5SDimitry Andric // The line is longer than the buffer. Set a flag and don't
31450b57cec5SDimitry Andric // emit an error if we were going to ignore the line, anyway.
31460b57cec5SDimitry Andric long_line = true;
31470b57cec5SDimitry Andric
31480b57cec5SDimitry Andric #define CHECK_LINE \
31490b57cec5SDimitry Andric if (long_line) { \
31500b57cec5SDimitry Andric CLEANUP_THREAD_INFO; \
31510b57cec5SDimitry Andric *msg_id = kmp_i18n_str_LongLineCpuinfo; \
3152fe6060f1SDimitry Andric return false; \
31530b57cec5SDimitry Andric }
31540b57cec5SDimitry Andric }
31550b57cec5SDimitry Andric (*line)++;
31560b57cec5SDimitry Andric
3157bdd1243dSDimitry Andric #if KMP_ARCH_LOONGARCH64
3158bdd1243dSDimitry Andric // The parsing logic of /proc/cpuinfo in this function highly depends on
3159bdd1243dSDimitry Andric // the blank lines between each processor info block. But on LoongArch a
3160bdd1243dSDimitry Andric // blank line exists before the first processor info block (i.e. after the
3161bdd1243dSDimitry Andric // "system type" line). This blank line was added because the "system
3162bdd1243dSDimitry Andric // type" line is unrelated to any of the CPUs. We must skip this line so
3163bdd1243dSDimitry Andric // that the original logic works on LoongArch.
3164bdd1243dSDimitry Andric if (*buf == '\n' && *line == 2)
3165bdd1243dSDimitry Andric continue;
3166bdd1243dSDimitry Andric #endif
31675f757f3fSDimitry Andric #if KMP_ARCH_S390X
31685f757f3fSDimitry Andric // s390x /proc/cpuinfo starts with a variable number of lines containing
31695f757f3fSDimitry Andric // the overall system information. Skip them.
31705f757f3fSDimitry Andric if (reading_s390x_sys_info) {
31715f757f3fSDimitry Andric if (*buf == '\n')
31725f757f3fSDimitry Andric reading_s390x_sys_info = false;
31735f757f3fSDimitry Andric continue;
31745f757f3fSDimitry Andric }
31755f757f3fSDimitry Andric #endif
3176bdd1243dSDimitry Andric
31775f757f3fSDimitry Andric #if KMP_ARCH_S390X
31785f757f3fSDimitry Andric char s1[] = "cpu number";
31795f757f3fSDimitry Andric #else
31800b57cec5SDimitry Andric char s1[] = "processor";
31815f757f3fSDimitry Andric #endif
31820b57cec5SDimitry Andric if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
31830b57cec5SDimitry Andric CHECK_LINE;
31840b57cec5SDimitry Andric char *p = strchr(buf + sizeof(s1) - 1, ':');
31850b57cec5SDimitry Andric unsigned val;
31860b57cec5SDimitry Andric if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
31870b57cec5SDimitry Andric goto no_val;
31880b57cec5SDimitry Andric if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
31890b57cec5SDimitry Andric #if KMP_ARCH_AARCH64
31900b57cec5SDimitry Andric // Handle the old AArch64 /proc/cpuinfo layout differently,
31910b57cec5SDimitry Andric // it contains all of the 'processor' entries listed in a
31920b57cec5SDimitry Andric // single 'Processor' section, therefore the normal looking
31930b57cec5SDimitry Andric // for duplicates in that section will always fail.
31940b57cec5SDimitry Andric num_avail++;
31950b57cec5SDimitry Andric #else
31960b57cec5SDimitry Andric goto dup_field;
31970b57cec5SDimitry Andric #endif
31980b57cec5SDimitry Andric threadInfo[num_avail][osIdIndex] = val;
31990b57cec5SDimitry Andric #if KMP_OS_LINUX && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
32000b57cec5SDimitry Andric char path[256];
32010b57cec5SDimitry Andric KMP_SNPRINTF(
32020b57cec5SDimitry Andric path, sizeof(path),
32030b57cec5SDimitry Andric "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
32040b57cec5SDimitry Andric threadInfo[num_avail][osIdIndex]);
32050b57cec5SDimitry Andric __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
32060b57cec5SDimitry Andric
32075f757f3fSDimitry Andric #if KMP_ARCH_S390X
32085f757f3fSDimitry Andric // Disambiguate physical_package_id.
32095f757f3fSDimitry Andric unsigned book_id;
32105f757f3fSDimitry Andric KMP_SNPRINTF(path, sizeof(path),
32115f757f3fSDimitry Andric "/sys/devices/system/cpu/cpu%u/topology/book_id",
32125f757f3fSDimitry Andric threadInfo[num_avail][osIdIndex]);
32135f757f3fSDimitry Andric __kmp_read_from_file(path, "%u", &book_id);
32145f757f3fSDimitry Andric threadInfo[num_avail][pkgIdIndex] |= (book_id << 8);
32155f757f3fSDimitry Andric
32165f757f3fSDimitry Andric unsigned drawer_id;
32175f757f3fSDimitry Andric KMP_SNPRINTF(path, sizeof(path),
32185f757f3fSDimitry Andric "/sys/devices/system/cpu/cpu%u/topology/drawer_id",
32195f757f3fSDimitry Andric threadInfo[num_avail][osIdIndex]);
32205f757f3fSDimitry Andric __kmp_read_from_file(path, "%u", &drawer_id);
32215f757f3fSDimitry Andric threadInfo[num_avail][pkgIdIndex] |= (drawer_id << 16);
32225f757f3fSDimitry Andric #endif
32235f757f3fSDimitry Andric
32240b57cec5SDimitry Andric KMP_SNPRINTF(path, sizeof(path),
32250b57cec5SDimitry Andric "/sys/devices/system/cpu/cpu%u/topology/core_id",
32260b57cec5SDimitry Andric threadInfo[num_avail][osIdIndex]);
32270b57cec5SDimitry Andric __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
32280b57cec5SDimitry Andric continue;
32290b57cec5SDimitry Andric #else
32300b57cec5SDimitry Andric }
32310b57cec5SDimitry Andric char s2[] = "physical id";
32320b57cec5SDimitry Andric if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
32330b57cec5SDimitry Andric CHECK_LINE;
32340b57cec5SDimitry Andric char *p = strchr(buf + sizeof(s2) - 1, ':');
32350b57cec5SDimitry Andric unsigned val;
32360b57cec5SDimitry Andric if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
32370b57cec5SDimitry Andric goto no_val;
32380b57cec5SDimitry Andric if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
32390b57cec5SDimitry Andric goto dup_field;
32400b57cec5SDimitry Andric threadInfo[num_avail][pkgIdIndex] = val;
32410b57cec5SDimitry Andric continue;
32420b57cec5SDimitry Andric }
32430b57cec5SDimitry Andric char s3[] = "core id";
32440b57cec5SDimitry Andric if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
32450b57cec5SDimitry Andric CHECK_LINE;
32460b57cec5SDimitry Andric char *p = strchr(buf + sizeof(s3) - 1, ':');
32470b57cec5SDimitry Andric unsigned val;
32480b57cec5SDimitry Andric if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
32490b57cec5SDimitry Andric goto no_val;
32500b57cec5SDimitry Andric if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
32510b57cec5SDimitry Andric goto dup_field;
32520b57cec5SDimitry Andric threadInfo[num_avail][coreIdIndex] = val;
32530b57cec5SDimitry Andric continue;
32540b57cec5SDimitry Andric #endif // KMP_OS_LINUX && USE_SYSFS_INFO
32550b57cec5SDimitry Andric }
32560b57cec5SDimitry Andric char s4[] = "thread id";
32570b57cec5SDimitry Andric if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
32580b57cec5SDimitry Andric CHECK_LINE;
32590b57cec5SDimitry Andric char *p = strchr(buf + sizeof(s4) - 1, ':');
32600b57cec5SDimitry Andric unsigned val;
32610b57cec5SDimitry Andric if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
32620b57cec5SDimitry Andric goto no_val;
32630b57cec5SDimitry Andric if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
32640b57cec5SDimitry Andric goto dup_field;
32650b57cec5SDimitry Andric threadInfo[num_avail][threadIdIndex] = val;
32660b57cec5SDimitry Andric continue;
32670b57cec5SDimitry Andric }
32680b57cec5SDimitry Andric unsigned level;
32690b57cec5SDimitry Andric if (KMP_SSCANF(buf, "node_%u id", &level) == 1) {
32700b57cec5SDimitry Andric CHECK_LINE;
32710b57cec5SDimitry Andric char *p = strchr(buf + sizeof(s4) - 1, ':');
32720b57cec5SDimitry Andric unsigned val;
32730b57cec5SDimitry Andric if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
32740b57cec5SDimitry Andric goto no_val;
3275349cc55cSDimitry Andric // validate the input before using level:
3276349cc55cSDimitry Andric if (level > (unsigned)__kmp_xproc) { // level is too big
3277349cc55cSDimitry Andric level = __kmp_xproc;
3278349cc55cSDimitry Andric }
32790b57cec5SDimitry Andric if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
32800b57cec5SDimitry Andric goto dup_field;
32810b57cec5SDimitry Andric threadInfo[num_avail][nodeIdIndex + level] = val;
32820b57cec5SDimitry Andric continue;
32830b57cec5SDimitry Andric }
32840b57cec5SDimitry Andric
32850b57cec5SDimitry Andric // We didn't recognize the leading token on the line. There are lots of
32860b57cec5SDimitry Andric // leading tokens that we don't recognize - if the line isn't empty, go on
32870b57cec5SDimitry Andric // to the next line.
32880b57cec5SDimitry Andric if ((*buf != 0) && (*buf != '\n')) {
32890b57cec5SDimitry Andric // If the line is longer than the buffer, read characters
32900b57cec5SDimitry Andric // until we find a newline.
32910b57cec5SDimitry Andric if (long_line) {
32920b57cec5SDimitry Andric int ch;
32930b57cec5SDimitry Andric while (((ch = fgetc(f)) != EOF) && (ch != '\n'))
32940b57cec5SDimitry Andric ;
32950b57cec5SDimitry Andric }
32960b57cec5SDimitry Andric continue;
32970b57cec5SDimitry Andric }
32980b57cec5SDimitry Andric
32990b57cec5SDimitry Andric // A newline has signalled the end of the processor record.
33000b57cec5SDimitry Andric // Check that there aren't too many procs specified.
33010b57cec5SDimitry Andric if ((int)num_avail == __kmp_xproc) {
33020b57cec5SDimitry Andric CLEANUP_THREAD_INFO;
33030b57cec5SDimitry Andric *msg_id = kmp_i18n_str_TooManyEntries;
3304fe6060f1SDimitry Andric return false;
33050b57cec5SDimitry Andric }
33060b57cec5SDimitry Andric
33070b57cec5SDimitry Andric // Check for missing fields. The osId field must be there, and we
33080b57cec5SDimitry Andric // currently require that the physical id field is specified, also.
33090b57cec5SDimitry Andric if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
33100b57cec5SDimitry Andric CLEANUP_THREAD_INFO;
33110b57cec5SDimitry Andric *msg_id = kmp_i18n_str_MissingProcField;
3312fe6060f1SDimitry Andric return false;
33130b57cec5SDimitry Andric }
33140b57cec5SDimitry Andric if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
33150b57cec5SDimitry Andric CLEANUP_THREAD_INFO;
33160b57cec5SDimitry Andric *msg_id = kmp_i18n_str_MissingPhysicalIDField;
3317fe6060f1SDimitry Andric return false;
33180b57cec5SDimitry Andric }
33190b57cec5SDimitry Andric
33200b57cec5SDimitry Andric // Skip this proc if it is not included in the machine model.
3321bdd1243dSDimitry Andric if (KMP_AFFINITY_CAPABLE() &&
3322bdd1243dSDimitry Andric !KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],
33230b57cec5SDimitry Andric __kmp_affin_fullMask)) {
33240b57cec5SDimitry Andric INIT_PROC_INFO(threadInfo[num_avail]);
33250b57cec5SDimitry Andric continue;
33260b57cec5SDimitry Andric }
33270b57cec5SDimitry Andric
33280b57cec5SDimitry Andric // We have a successful parse of this proc's info.
33290b57cec5SDimitry Andric // Increment the counter, and prepare for the next proc.
33300b57cec5SDimitry Andric num_avail++;
33310b57cec5SDimitry Andric KMP_ASSERT(num_avail <= num_records);
33320b57cec5SDimitry Andric INIT_PROC_INFO(threadInfo[num_avail]);
33330b57cec5SDimitry Andric }
33340b57cec5SDimitry Andric continue;
33350b57cec5SDimitry Andric
33360b57cec5SDimitry Andric no_val:
33370b57cec5SDimitry Andric CLEANUP_THREAD_INFO;
33380b57cec5SDimitry Andric *msg_id = kmp_i18n_str_MissingValCpuinfo;
3339fe6060f1SDimitry Andric return false;
33400b57cec5SDimitry Andric
33410b57cec5SDimitry Andric dup_field:
33420b57cec5SDimitry Andric CLEANUP_THREAD_INFO;
33430b57cec5SDimitry Andric *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
3344fe6060f1SDimitry Andric return false;
33450b57cec5SDimitry Andric }
33460b57cec5SDimitry Andric *line = 0;
33470b57cec5SDimitry Andric
33480b57cec5SDimitry Andric #if KMP_MIC && REDUCE_TEAM_SIZE
33490b57cec5SDimitry Andric unsigned teamSize = 0;
33500b57cec5SDimitry Andric #endif // KMP_MIC && REDUCE_TEAM_SIZE
33510b57cec5SDimitry Andric
33520b57cec5SDimitry Andric // check for num_records == __kmp_xproc ???
33530b57cec5SDimitry Andric
33540b57cec5SDimitry Andric // If it is configured to omit the package level when there is only a single
33550b57cec5SDimitry Andric // package, the logic at the end of this routine won't work if there is only a
3356fe6060f1SDimitry Andric // single thread
33570b57cec5SDimitry Andric KMP_ASSERT(num_avail > 0);
33580b57cec5SDimitry Andric KMP_ASSERT(num_avail <= num_records);
33590b57cec5SDimitry Andric
33600b57cec5SDimitry Andric // Sort the threadInfo table by physical Id.
33610b57cec5SDimitry Andric qsort(threadInfo, num_avail, sizeof(*threadInfo),
33620b57cec5SDimitry Andric __kmp_affinity_cmp_ProcCpuInfo_phys_id);
33630b57cec5SDimitry Andric
3364439352acSDimitry Andric #endif // KMP_OS_AIX
3365439352acSDimitry Andric
33660b57cec5SDimitry Andric // The table is now sorted by pkgId / coreId / threadId, but we really don't
33670b57cec5SDimitry Andric // know the radix of any of the fields. pkgId's may be sparsely assigned among
33680b57cec5SDimitry Andric // the chips on a system. Although coreId's are usually assigned
33690b57cec5SDimitry Andric // [0 .. coresPerPkg-1] and threadId's are usually assigned
33700b57cec5SDimitry Andric // [0..threadsPerCore-1], we don't want to make any such assumptions.
33710b57cec5SDimitry Andric //
33720b57cec5SDimitry Andric // For that matter, we don't know what coresPerPkg and threadsPerCore (or the
33730b57cec5SDimitry Andric // total # packages) are at this point - we want to determine that now. We
33740b57cec5SDimitry Andric // only have an upper bound on the first two figures.
33750b57cec5SDimitry Andric unsigned *counts =
33760b57cec5SDimitry Andric (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
33770b57cec5SDimitry Andric unsigned *maxCt =
33780b57cec5SDimitry Andric (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
33790b57cec5SDimitry Andric unsigned *totals =
33800b57cec5SDimitry Andric (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
33810b57cec5SDimitry Andric unsigned *lastId =
33820b57cec5SDimitry Andric (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
33830b57cec5SDimitry Andric
33840b57cec5SDimitry Andric bool assign_thread_ids = false;
33850b57cec5SDimitry Andric unsigned threadIdCt;
33860b57cec5SDimitry Andric unsigned index;
33870b57cec5SDimitry Andric
33880b57cec5SDimitry Andric restart_radix_check:
33890b57cec5SDimitry Andric threadIdCt = 0;
33900b57cec5SDimitry Andric
33910b57cec5SDimitry Andric // Initialize the counter arrays with data from threadInfo[0].
33920b57cec5SDimitry Andric if (assign_thread_ids) {
33930b57cec5SDimitry Andric if (threadInfo[0][threadIdIndex] == UINT_MAX) {
33940b57cec5SDimitry Andric threadInfo[0][threadIdIndex] = threadIdCt++;
33950b57cec5SDimitry Andric } else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
33960b57cec5SDimitry Andric threadIdCt = threadInfo[0][threadIdIndex] + 1;
33970b57cec5SDimitry Andric }
33980b57cec5SDimitry Andric }
33990b57cec5SDimitry Andric for (index = 0; index <= maxIndex; index++) {
34000b57cec5SDimitry Andric counts[index] = 1;
34010b57cec5SDimitry Andric maxCt[index] = 1;
34020b57cec5SDimitry Andric totals[index] = 1;
34030b57cec5SDimitry Andric lastId[index] = threadInfo[0][index];
34040b57cec5SDimitry Andric ;
34050b57cec5SDimitry Andric }
34060b57cec5SDimitry Andric
34070b57cec5SDimitry Andric // Run through the rest of the OS procs.
34080b57cec5SDimitry Andric for (i = 1; i < num_avail; i++) {
34090b57cec5SDimitry Andric // Find the most significant index whose id differs from the id for the
34100b57cec5SDimitry Andric // previous OS proc.
34110b57cec5SDimitry Andric for (index = maxIndex; index >= threadIdIndex; index--) {
34120b57cec5SDimitry Andric if (assign_thread_ids && (index == threadIdIndex)) {
34130b57cec5SDimitry Andric // Auto-assign the thread id field if it wasn't specified.
34140b57cec5SDimitry Andric if (threadInfo[i][threadIdIndex] == UINT_MAX) {
34150b57cec5SDimitry Andric threadInfo[i][threadIdIndex] = threadIdCt++;
34160b57cec5SDimitry Andric }
34170b57cec5SDimitry Andric // Apparently the thread id field was specified for some entries and not
34180b57cec5SDimitry Andric // others. Start the thread id counter off at the next higher thread id.
34190b57cec5SDimitry Andric else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
34200b57cec5SDimitry Andric threadIdCt = threadInfo[i][threadIdIndex] + 1;
34210b57cec5SDimitry Andric }
34220b57cec5SDimitry Andric }
34230b57cec5SDimitry Andric if (threadInfo[i][index] != lastId[index]) {
34240b57cec5SDimitry Andric // Run through all indices which are less significant, and reset the
34250b57cec5SDimitry Andric // counts to 1. At all levels up to and including index, we need to
34260b57cec5SDimitry Andric // increment the totals and record the last id.
34270b57cec5SDimitry Andric unsigned index2;
34280b57cec5SDimitry Andric for (index2 = threadIdIndex; index2 < index; index2++) {
34290b57cec5SDimitry Andric totals[index2]++;
34300b57cec5SDimitry Andric if (counts[index2] > maxCt[index2]) {
34310b57cec5SDimitry Andric maxCt[index2] = counts[index2];
34320b57cec5SDimitry Andric }
34330b57cec5SDimitry Andric counts[index2] = 1;
34340b57cec5SDimitry Andric lastId[index2] = threadInfo[i][index2];
34350b57cec5SDimitry Andric }
34360b57cec5SDimitry Andric counts[index]++;
34370b57cec5SDimitry Andric totals[index]++;
34380b57cec5SDimitry Andric lastId[index] = threadInfo[i][index];
34390b57cec5SDimitry Andric
34400b57cec5SDimitry Andric if (assign_thread_ids && (index > threadIdIndex)) {
34410b57cec5SDimitry Andric
34420b57cec5SDimitry Andric #if KMP_MIC && REDUCE_TEAM_SIZE
34430b57cec5SDimitry Andric // The default team size is the total #threads in the machine
34440b57cec5SDimitry Andric // minus 1 thread for every core that has 3 or more threads.
34450b57cec5SDimitry Andric teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
34460b57cec5SDimitry Andric #endif // KMP_MIC && REDUCE_TEAM_SIZE
34470b57cec5SDimitry Andric
34480b57cec5SDimitry Andric // Restart the thread counter, as we are on a new core.
34490b57cec5SDimitry Andric threadIdCt = 0;
34500b57cec5SDimitry Andric
34510b57cec5SDimitry Andric // Auto-assign the thread id field if it wasn't specified.
34520b57cec5SDimitry Andric if (threadInfo[i][threadIdIndex] == UINT_MAX) {
34530b57cec5SDimitry Andric threadInfo[i][threadIdIndex] = threadIdCt++;
34540b57cec5SDimitry Andric }
34550b57cec5SDimitry Andric
3456480093f4SDimitry Andric // Apparently the thread id field was specified for some entries and
34570b57cec5SDimitry Andric // not others. Start the thread id counter off at the next higher
34580b57cec5SDimitry Andric // thread id.
34590b57cec5SDimitry Andric else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
34600b57cec5SDimitry Andric threadIdCt = threadInfo[i][threadIdIndex] + 1;
34610b57cec5SDimitry Andric }
34620b57cec5SDimitry Andric }
34630b57cec5SDimitry Andric break;
34640b57cec5SDimitry Andric }
34650b57cec5SDimitry Andric }
34660b57cec5SDimitry Andric if (index < threadIdIndex) {
34670b57cec5SDimitry Andric // If thread ids were specified, it is an error if they are not unique.
34680b57cec5SDimitry Andric // Also, check that we waven't already restarted the loop (to be safe -
34690b57cec5SDimitry Andric // shouldn't need to).
34700b57cec5SDimitry Andric if ((threadInfo[i][threadIdIndex] != UINT_MAX) || assign_thread_ids) {
34710b57cec5SDimitry Andric __kmp_free(lastId);
34720b57cec5SDimitry Andric __kmp_free(totals);
34730b57cec5SDimitry Andric __kmp_free(maxCt);
34740b57cec5SDimitry Andric __kmp_free(counts);
34750b57cec5SDimitry Andric CLEANUP_THREAD_INFO;
34760b57cec5SDimitry Andric *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
3477fe6060f1SDimitry Andric return false;
34780b57cec5SDimitry Andric }
34790b57cec5SDimitry Andric
34805f757f3fSDimitry Andric // If the thread ids were not specified and we see entries that
34810b57cec5SDimitry Andric // are duplicates, start the loop over and assign the thread ids manually.
34820b57cec5SDimitry Andric assign_thread_ids = true;
34830b57cec5SDimitry Andric goto restart_radix_check;
34840b57cec5SDimitry Andric }
34850b57cec5SDimitry Andric }
34860b57cec5SDimitry Andric
34870b57cec5SDimitry Andric #if KMP_MIC && REDUCE_TEAM_SIZE
34880b57cec5SDimitry Andric // The default team size is the total #threads in the machine
34890b57cec5SDimitry Andric // minus 1 thread for every core that has 3 or more threads.
34900b57cec5SDimitry Andric teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
34910b57cec5SDimitry Andric #endif // KMP_MIC && REDUCE_TEAM_SIZE
34920b57cec5SDimitry Andric
34930b57cec5SDimitry Andric for (index = threadIdIndex; index <= maxIndex; index++) {
34940b57cec5SDimitry Andric if (counts[index] > maxCt[index]) {
34950b57cec5SDimitry Andric maxCt[index] = counts[index];
34960b57cec5SDimitry Andric }
34970b57cec5SDimitry Andric }
34980b57cec5SDimitry Andric
34990b57cec5SDimitry Andric __kmp_nThreadsPerCore = maxCt[threadIdIndex];
35000b57cec5SDimitry Andric nCoresPerPkg = maxCt[coreIdIndex];
35010b57cec5SDimitry Andric nPackages = totals[pkgIdIndex];
35020b57cec5SDimitry Andric
35030b57cec5SDimitry Andric // When affinity is off, this routine will still be called to set
35040b57cec5SDimitry Andric // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
35050b57cec5SDimitry Andric // Make sure all these vars are set correctly, and return now if affinity is
35060b57cec5SDimitry Andric // not enabled.
35070b57cec5SDimitry Andric __kmp_ncores = totals[coreIdIndex];
35080b57cec5SDimitry Andric if (!KMP_AFFINITY_CAPABLE()) {
3509bdd1243dSDimitry Andric KMP_ASSERT(__kmp_affinity.type == affinity_none);
3510fe6060f1SDimitry Andric return true;
35110b57cec5SDimitry Andric }
35120b57cec5SDimitry Andric
35130b57cec5SDimitry Andric #if KMP_MIC && REDUCE_TEAM_SIZE
35140b57cec5SDimitry Andric // Set the default team size.
35150b57cec5SDimitry Andric if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
35160b57cec5SDimitry Andric __kmp_dflt_team_nth = teamSize;
35170b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting "
35180b57cec5SDimitry Andric "__kmp_dflt_team_nth = %d\n",
35190b57cec5SDimitry Andric __kmp_dflt_team_nth));
35200b57cec5SDimitry Andric }
35210b57cec5SDimitry Andric #endif // KMP_MIC && REDUCE_TEAM_SIZE
35220b57cec5SDimitry Andric
35230b57cec5SDimitry Andric KMP_DEBUG_ASSERT(num_avail == (unsigned)__kmp_avail_proc);
35240b57cec5SDimitry Andric
35250b57cec5SDimitry Andric // Count the number of levels which have more nodes at that level than at the
35260b57cec5SDimitry Andric // parent's level (with there being an implicit root node of the top level).
35270b57cec5SDimitry Andric // This is equivalent to saying that there is at least one node at this level
35280b57cec5SDimitry Andric // which has a sibling. These levels are in the map, and the package level is
35290b57cec5SDimitry Andric // always in the map.
35300b57cec5SDimitry Andric bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool));
35310b57cec5SDimitry Andric for (index = threadIdIndex; index < maxIndex; index++) {
35320b57cec5SDimitry Andric KMP_ASSERT(totals[index] >= totals[index + 1]);
35330b57cec5SDimitry Andric inMap[index] = (totals[index] > totals[index + 1]);
35340b57cec5SDimitry Andric }
35350b57cec5SDimitry Andric inMap[maxIndex] = (totals[maxIndex] > 1);
35360b57cec5SDimitry Andric inMap[pkgIdIndex] = true;
3537fe6060f1SDimitry Andric inMap[coreIdIndex] = true;
3538fe6060f1SDimitry Andric inMap[threadIdIndex] = true;
35390b57cec5SDimitry Andric
35400b57cec5SDimitry Andric int depth = 0;
3541fe6060f1SDimitry Andric int idx = 0;
3542fe6060f1SDimitry Andric kmp_hw_t types[KMP_HW_LAST];
3543fe6060f1SDimitry Andric int pkgLevel = -1;
3544fe6060f1SDimitry Andric int coreLevel = -1;
3545fe6060f1SDimitry Andric int threadLevel = -1;
35460b57cec5SDimitry Andric for (index = threadIdIndex; index <= maxIndex; index++) {
35470b57cec5SDimitry Andric if (inMap[index]) {
35480b57cec5SDimitry Andric depth++;
35490b57cec5SDimitry Andric }
35500b57cec5SDimitry Andric }
3551fe6060f1SDimitry Andric if (inMap[pkgIdIndex]) {
3552fe6060f1SDimitry Andric pkgLevel = idx;
3553fe6060f1SDimitry Andric types[idx++] = KMP_HW_SOCKET;
3554fe6060f1SDimitry Andric }
3555fe6060f1SDimitry Andric if (inMap[coreIdIndex]) {
3556fe6060f1SDimitry Andric coreLevel = idx;
3557fe6060f1SDimitry Andric types[idx++] = KMP_HW_CORE;
3558fe6060f1SDimitry Andric }
3559fe6060f1SDimitry Andric if (inMap[threadIdIndex]) {
3560fe6060f1SDimitry Andric threadLevel = idx;
3561fe6060f1SDimitry Andric types[idx++] = KMP_HW_THREAD;
3562fe6060f1SDimitry Andric }
35630b57cec5SDimitry Andric KMP_ASSERT(depth > 0);
35640b57cec5SDimitry Andric
35650b57cec5SDimitry Andric // Construct the data structure that is to be returned.
3566fe6060f1SDimitry Andric __kmp_topology = kmp_topology_t::allocate(num_avail, depth, types);
35670b57cec5SDimitry Andric
35680b57cec5SDimitry Andric for (i = 0; i < num_avail; ++i) {
35690b57cec5SDimitry Andric unsigned os = threadInfo[i][osIdIndex];
35700b57cec5SDimitry Andric int src_index;
3571fe6060f1SDimitry Andric kmp_hw_thread_t &hw_thread = __kmp_topology->at(i);
3572fe6060f1SDimitry Andric hw_thread.clear();
3573fe6060f1SDimitry Andric hw_thread.os_id = os;
35740b57cec5SDimitry Andric
3575fe6060f1SDimitry Andric idx = 0;
35760b57cec5SDimitry Andric for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
35770b57cec5SDimitry Andric if (!inMap[src_index]) {
35780b57cec5SDimitry Andric continue;
35790b57cec5SDimitry Andric }
35800b57cec5SDimitry Andric if (src_index == pkgIdIndex) {
3581fe6060f1SDimitry Andric hw_thread.ids[pkgLevel] = threadInfo[i][src_index];
35820b57cec5SDimitry Andric } else if (src_index == coreIdIndex) {
3583fe6060f1SDimitry Andric hw_thread.ids[coreLevel] = threadInfo[i][src_index];
35840b57cec5SDimitry Andric } else if (src_index == threadIdIndex) {
3585fe6060f1SDimitry Andric hw_thread.ids[threadLevel] = threadInfo[i][src_index];
35860b57cec5SDimitry Andric }
35870b57cec5SDimitry Andric }
35880b57cec5SDimitry Andric }
35890b57cec5SDimitry Andric
35900b57cec5SDimitry Andric __kmp_free(inMap);
35910b57cec5SDimitry Andric __kmp_free(lastId);
35920b57cec5SDimitry Andric __kmp_free(totals);
35930b57cec5SDimitry Andric __kmp_free(maxCt);
35940b57cec5SDimitry Andric __kmp_free(counts);
35950b57cec5SDimitry Andric CLEANUP_THREAD_INFO;
3596fe6060f1SDimitry Andric __kmp_topology->sort_ids();
3597fe6060f1SDimitry Andric if (!__kmp_topology->check_ids()) {
3598fe6060f1SDimitry Andric kmp_topology_t::deallocate(__kmp_topology);
3599fe6060f1SDimitry Andric __kmp_topology = nullptr;
3600fe6060f1SDimitry Andric *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
3601fe6060f1SDimitry Andric return false;
3602fe6060f1SDimitry Andric }
3603fe6060f1SDimitry Andric return true;
36040b57cec5SDimitry Andric }
36050b57cec5SDimitry Andric
36060b57cec5SDimitry Andric // Create and return a table of affinity masks, indexed by OS thread ID.
36070b57cec5SDimitry Andric // This routine handles OR'ing together all the affinity masks of threads
36080b57cec5SDimitry Andric // that are sufficiently close, if granularity > fine.
36095f757f3fSDimitry Andric template <typename FindNextFunctionType>
__kmp_create_os_id_masks(unsigned * numUnique,kmp_affinity_t & affinity,FindNextFunctionType find_next)3610bdd1243dSDimitry Andric static void __kmp_create_os_id_masks(unsigned *numUnique,
36115f757f3fSDimitry Andric kmp_affinity_t &affinity,
36125f757f3fSDimitry Andric FindNextFunctionType find_next) {
36130b57cec5SDimitry Andric // First form a table of affinity masks in order of OS thread id.
3614fe6060f1SDimitry Andric int maxOsId;
3615fe6060f1SDimitry Andric int i;
3616fe6060f1SDimitry Andric int numAddrs = __kmp_topology->get_num_hw_threads();
3617fe6060f1SDimitry Andric int depth = __kmp_topology->get_depth();
36185f757f3fSDimitry Andric const char *env_var = __kmp_get_affinity_env_var(affinity);
3619fe6060f1SDimitry Andric KMP_ASSERT(numAddrs);
3620fe6060f1SDimitry Andric KMP_ASSERT(depth);
36210b57cec5SDimitry Andric
36225f757f3fSDimitry Andric i = find_next(-1);
36235f757f3fSDimitry Andric // If could not find HW thread location with attributes, then return and
36245f757f3fSDimitry Andric // fallback to increment find_next and disregard core attributes.
36255f757f3fSDimitry Andric if (i >= numAddrs)
36265f757f3fSDimitry Andric return;
36275f757f3fSDimitry Andric
36280b57cec5SDimitry Andric maxOsId = 0;
36290b57cec5SDimitry Andric for (i = numAddrs - 1;; --i) {
3630fe6060f1SDimitry Andric int osId = __kmp_topology->at(i).os_id;
36310b57cec5SDimitry Andric if (osId > maxOsId) {
36320b57cec5SDimitry Andric maxOsId = osId;
36330b57cec5SDimitry Andric }
36340b57cec5SDimitry Andric if (i == 0)
36350b57cec5SDimitry Andric break;
36360b57cec5SDimitry Andric }
3637bdd1243dSDimitry Andric affinity.num_os_id_masks = maxOsId + 1;
3638bdd1243dSDimitry Andric KMP_CPU_ALLOC_ARRAY(affinity.os_id_masks, affinity.num_os_id_masks);
3639bdd1243dSDimitry Andric KMP_ASSERT(affinity.gran_levels >= 0);
3640bdd1243dSDimitry Andric if (affinity.flags.verbose && (affinity.gran_levels > 0)) {
3641bdd1243dSDimitry Andric KMP_INFORM(ThreadsMigrate, env_var, affinity.gran_levels);
36420b57cec5SDimitry Andric }
3643bdd1243dSDimitry Andric if (affinity.gran_levels >= (int)depth) {
3644bdd1243dSDimitry Andric KMP_AFF_WARNING(affinity, AffThreadsMayMigrate);
36450b57cec5SDimitry Andric }
36460b57cec5SDimitry Andric
36470b57cec5SDimitry Andric // Run through the table, forming the masks for all threads on each core.
3648fe6060f1SDimitry Andric // Threads on the same core will have identical kmp_hw_thread_t objects, not
36490b57cec5SDimitry Andric // considering the last level, which must be the thread id. All threads on a
36500b57cec5SDimitry Andric // core will appear consecutively.
3651fe6060f1SDimitry Andric int unique = 0;
3652fe6060f1SDimitry Andric int j = 0; // index of 1st thread on core
3653fe6060f1SDimitry Andric int leader = 0;
36540b57cec5SDimitry Andric kmp_affin_mask_t *sum;
36550b57cec5SDimitry Andric KMP_CPU_ALLOC_ON_STACK(sum);
36560b57cec5SDimitry Andric KMP_CPU_ZERO(sum);
36575f757f3fSDimitry Andric
36585f757f3fSDimitry Andric i = j = leader = find_next(-1);
36595f757f3fSDimitry Andric KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
36605f757f3fSDimitry Andric kmp_full_mask_modifier_t full_mask;
36615f757f3fSDimitry Andric for (i = find_next(i); i < numAddrs; i = find_next(i)) {
36620b57cec5SDimitry Andric // If this thread is sufficiently close to the leader (within the
36630b57cec5SDimitry Andric // granularity setting), then set the bit for this os thread in the
36640b57cec5SDimitry Andric // affinity mask for this group, and go on to the next thread.
36655f757f3fSDimitry Andric if (__kmp_topology->is_close(leader, i, affinity)) {
3666fe6060f1SDimitry Andric KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
36670b57cec5SDimitry Andric continue;
36680b57cec5SDimitry Andric }
36690b57cec5SDimitry Andric
36700b57cec5SDimitry Andric // For every thread in this group, copy the mask to the thread's entry in
3671bdd1243dSDimitry Andric // the OS Id mask table. Mark the first address as a leader.
36725f757f3fSDimitry Andric for (; j < i; j = find_next(j)) {
3673fe6060f1SDimitry Andric int osId = __kmp_topology->at(j).os_id;
36740b57cec5SDimitry Andric KMP_DEBUG_ASSERT(osId <= maxOsId);
3675bdd1243dSDimitry Andric kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.os_id_masks, osId);
36760b57cec5SDimitry Andric KMP_CPU_COPY(mask, sum);
3677fe6060f1SDimitry Andric __kmp_topology->at(j).leader = (j == leader);
36780b57cec5SDimitry Andric }
36790b57cec5SDimitry Andric unique++;
36800b57cec5SDimitry Andric
36810b57cec5SDimitry Andric // Start a new mask.
36820b57cec5SDimitry Andric leader = i;
36835f757f3fSDimitry Andric full_mask.include(sum);
36840b57cec5SDimitry Andric KMP_CPU_ZERO(sum);
3685fe6060f1SDimitry Andric KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
36860b57cec5SDimitry Andric }
36870b57cec5SDimitry Andric
36880b57cec5SDimitry Andric // For every thread in last group, copy the mask to the thread's
3689bdd1243dSDimitry Andric // entry in the OS Id mask table.
36905f757f3fSDimitry Andric for (; j < i; j = find_next(j)) {
3691fe6060f1SDimitry Andric int osId = __kmp_topology->at(j).os_id;
36920b57cec5SDimitry Andric KMP_DEBUG_ASSERT(osId <= maxOsId);
3693bdd1243dSDimitry Andric kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.os_id_masks, osId);
36940b57cec5SDimitry Andric KMP_CPU_COPY(mask, sum);
3695fe6060f1SDimitry Andric __kmp_topology->at(j).leader = (j == leader);
36960b57cec5SDimitry Andric }
36975f757f3fSDimitry Andric full_mask.include(sum);
36980b57cec5SDimitry Andric unique++;
36990b57cec5SDimitry Andric KMP_CPU_FREE_FROM_STACK(sum);
37000b57cec5SDimitry Andric
37015f757f3fSDimitry Andric // See if the OS Id mask table further restricts or changes the full mask
37025f757f3fSDimitry Andric if (full_mask.restrict_to_mask() && affinity.flags.verbose) {
37035f757f3fSDimitry Andric __kmp_topology->print(env_var);
37045f757f3fSDimitry Andric }
37055f757f3fSDimitry Andric
37060b57cec5SDimitry Andric *numUnique = unique;
37070b57cec5SDimitry Andric }
37080b57cec5SDimitry Andric
37090b57cec5SDimitry Andric // Stuff for the affinity proclist parsers. It's easier to declare these vars
37100b57cec5SDimitry Andric // as file-static than to try and pass them through the calling sequence of
37110b57cec5SDimitry Andric // the recursive-descent OMP_PLACES parser.
37120b57cec5SDimitry Andric static kmp_affin_mask_t *newMasks;
37130b57cec5SDimitry Andric static int numNewMasks;
37140b57cec5SDimitry Andric static int nextNewMask;
37150b57cec5SDimitry Andric
37160b57cec5SDimitry Andric #define ADD_MASK(_mask) \
37170b57cec5SDimitry Andric { \
37180b57cec5SDimitry Andric if (nextNewMask >= numNewMasks) { \
37190b57cec5SDimitry Andric int i; \
37200b57cec5SDimitry Andric numNewMasks *= 2; \
37210b57cec5SDimitry Andric kmp_affin_mask_t *temp; \
37220b57cec5SDimitry Andric KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
37230b57cec5SDimitry Andric for (i = 0; i < numNewMasks / 2; i++) { \
37240b57cec5SDimitry Andric kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \
37250b57cec5SDimitry Andric kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \
37260b57cec5SDimitry Andric KMP_CPU_COPY(dest, src); \
37270b57cec5SDimitry Andric } \
37280b57cec5SDimitry Andric KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \
37290b57cec5SDimitry Andric newMasks = temp; \
37300b57cec5SDimitry Andric } \
37310b57cec5SDimitry Andric KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
37320b57cec5SDimitry Andric nextNewMask++; \
37330b57cec5SDimitry Andric }
37340b57cec5SDimitry Andric
37350b57cec5SDimitry Andric #define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \
37360b57cec5SDimitry Andric { \
37370b57cec5SDimitry Andric if (((_osId) > _maxOsId) || \
37380b57cec5SDimitry Andric (!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
3739bdd1243dSDimitry Andric KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, _osId); \
37400b57cec5SDimitry Andric } else { \
37410b57cec5SDimitry Andric ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
37420b57cec5SDimitry Andric } \
37430b57cec5SDimitry Andric }
37440b57cec5SDimitry Andric
37450b57cec5SDimitry Andric // Re-parse the proclist (for the explicit affinity type), and form the list
37460b57cec5SDimitry Andric // of affinity newMasks indexed by gtid.
__kmp_affinity_process_proclist(kmp_affinity_t & affinity)3747bdd1243dSDimitry Andric static void __kmp_affinity_process_proclist(kmp_affinity_t &affinity) {
37480b57cec5SDimitry Andric int i;
3749bdd1243dSDimitry Andric kmp_affin_mask_t **out_masks = &affinity.masks;
3750bdd1243dSDimitry Andric unsigned *out_numMasks = &affinity.num_masks;
3751bdd1243dSDimitry Andric const char *proclist = affinity.proclist;
3752bdd1243dSDimitry Andric kmp_affin_mask_t *osId2Mask = affinity.os_id_masks;
3753bdd1243dSDimitry Andric int maxOsId = affinity.num_os_id_masks - 1;
37540b57cec5SDimitry Andric const char *scan = proclist;
37550b57cec5SDimitry Andric const char *next = proclist;
37560b57cec5SDimitry Andric
37570b57cec5SDimitry Andric // We use malloc() for the temporary mask vector, so that we can use
37580b57cec5SDimitry Andric // realloc() to extend it.
37590b57cec5SDimitry Andric numNewMasks = 2;
37600b57cec5SDimitry Andric KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
37610b57cec5SDimitry Andric nextNewMask = 0;
37620b57cec5SDimitry Andric kmp_affin_mask_t *sumMask;
37630b57cec5SDimitry Andric KMP_CPU_ALLOC(sumMask);
37640b57cec5SDimitry Andric int setSize = 0;
37650b57cec5SDimitry Andric
37660b57cec5SDimitry Andric for (;;) {
37670b57cec5SDimitry Andric int start, end, stride;
37680b57cec5SDimitry Andric
37690b57cec5SDimitry Andric SKIP_WS(scan);
37700b57cec5SDimitry Andric next = scan;
37710b57cec5SDimitry Andric if (*next == '\0') {
37720b57cec5SDimitry Andric break;
37730b57cec5SDimitry Andric }
37740b57cec5SDimitry Andric
37750b57cec5SDimitry Andric if (*next == '{') {
37760b57cec5SDimitry Andric int num;
37770b57cec5SDimitry Andric setSize = 0;
37780b57cec5SDimitry Andric next++; // skip '{'
37790b57cec5SDimitry Andric SKIP_WS(next);
37800b57cec5SDimitry Andric scan = next;
37810b57cec5SDimitry Andric
37820b57cec5SDimitry Andric // Read the first integer in the set.
37830b57cec5SDimitry Andric KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad proclist");
37840b57cec5SDimitry Andric SKIP_DIGITS(next);
37850b57cec5SDimitry Andric num = __kmp_str_to_int(scan, *next);
37860b57cec5SDimitry Andric KMP_ASSERT2(num >= 0, "bad explicit proc list");
37870b57cec5SDimitry Andric
37880b57cec5SDimitry Andric // Copy the mask for that osId to the sum (union) mask.
37890b57cec5SDimitry Andric if ((num > maxOsId) ||
37900b57cec5SDimitry Andric (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3791bdd1243dSDimitry Andric KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, num);
37920b57cec5SDimitry Andric KMP_CPU_ZERO(sumMask);
37930b57cec5SDimitry Andric } else {
37940b57cec5SDimitry Andric KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
37950b57cec5SDimitry Andric setSize = 1;
37960b57cec5SDimitry Andric }
37970b57cec5SDimitry Andric
37980b57cec5SDimitry Andric for (;;) {
37990b57cec5SDimitry Andric // Check for end of set.
38000b57cec5SDimitry Andric SKIP_WS(next);
38010b57cec5SDimitry Andric if (*next == '}') {
38020b57cec5SDimitry Andric next++; // skip '}'
38030b57cec5SDimitry Andric break;
38040b57cec5SDimitry Andric }
38050b57cec5SDimitry Andric
38060b57cec5SDimitry Andric // Skip optional comma.
38070b57cec5SDimitry Andric if (*next == ',') {
38080b57cec5SDimitry Andric next++;
38090b57cec5SDimitry Andric }
38100b57cec5SDimitry Andric SKIP_WS(next);
38110b57cec5SDimitry Andric
38120b57cec5SDimitry Andric // Read the next integer in the set.
38130b57cec5SDimitry Andric scan = next;
38140b57cec5SDimitry Andric KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
38150b57cec5SDimitry Andric
38160b57cec5SDimitry Andric SKIP_DIGITS(next);
38170b57cec5SDimitry Andric num = __kmp_str_to_int(scan, *next);
38180b57cec5SDimitry Andric KMP_ASSERT2(num >= 0, "bad explicit proc list");
38190b57cec5SDimitry Andric
38200b57cec5SDimitry Andric // Add the mask for that osId to the sum mask.
38210b57cec5SDimitry Andric if ((num > maxOsId) ||
38220b57cec5SDimitry Andric (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3823bdd1243dSDimitry Andric KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, num);
38240b57cec5SDimitry Andric } else {
38250b57cec5SDimitry Andric KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
38260b57cec5SDimitry Andric setSize++;
38270b57cec5SDimitry Andric }
38280b57cec5SDimitry Andric }
38290b57cec5SDimitry Andric if (setSize > 0) {
38300b57cec5SDimitry Andric ADD_MASK(sumMask);
38310b57cec5SDimitry Andric }
38320b57cec5SDimitry Andric
38330b57cec5SDimitry Andric SKIP_WS(next);
38340b57cec5SDimitry Andric if (*next == ',') {
38350b57cec5SDimitry Andric next++;
38360b57cec5SDimitry Andric }
38370b57cec5SDimitry Andric scan = next;
38380b57cec5SDimitry Andric continue;
38390b57cec5SDimitry Andric }
38400b57cec5SDimitry Andric
38410b57cec5SDimitry Andric // Read the first integer.
38420b57cec5SDimitry Andric KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
38430b57cec5SDimitry Andric SKIP_DIGITS(next);
38440b57cec5SDimitry Andric start = __kmp_str_to_int(scan, *next);
38450b57cec5SDimitry Andric KMP_ASSERT2(start >= 0, "bad explicit proc list");
38460b57cec5SDimitry Andric SKIP_WS(next);
38470b57cec5SDimitry Andric
38480b57cec5SDimitry Andric // If this isn't a range, then add a mask to the list and go on.
38490b57cec5SDimitry Andric if (*next != '-') {
38500b57cec5SDimitry Andric ADD_MASK_OSID(start, osId2Mask, maxOsId);
38510b57cec5SDimitry Andric
38520b57cec5SDimitry Andric // Skip optional comma.
38530b57cec5SDimitry Andric if (*next == ',') {
38540b57cec5SDimitry Andric next++;
38550b57cec5SDimitry Andric }
38560b57cec5SDimitry Andric scan = next;
38570b57cec5SDimitry Andric continue;
38580b57cec5SDimitry Andric }
38590b57cec5SDimitry Andric
38600b57cec5SDimitry Andric // This is a range. Skip over the '-' and read in the 2nd int.
38610b57cec5SDimitry Andric next++; // skip '-'
38620b57cec5SDimitry Andric SKIP_WS(next);
38630b57cec5SDimitry Andric scan = next;
38640b57cec5SDimitry Andric KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
38650b57cec5SDimitry Andric SKIP_DIGITS(next);
38660b57cec5SDimitry Andric end = __kmp_str_to_int(scan, *next);
38670b57cec5SDimitry Andric KMP_ASSERT2(end >= 0, "bad explicit proc list");
38680b57cec5SDimitry Andric
38690b57cec5SDimitry Andric // Check for a stride parameter
38700b57cec5SDimitry Andric stride = 1;
38710b57cec5SDimitry Andric SKIP_WS(next);
38720b57cec5SDimitry Andric if (*next == ':') {
38730b57cec5SDimitry Andric // A stride is specified. Skip over the ':" and read the 3rd int.
38740b57cec5SDimitry Andric int sign = +1;
38750b57cec5SDimitry Andric next++; // skip ':'
38760b57cec5SDimitry Andric SKIP_WS(next);
38770b57cec5SDimitry Andric scan = next;
38780b57cec5SDimitry Andric if (*next == '-') {
38790b57cec5SDimitry Andric sign = -1;
38800b57cec5SDimitry Andric next++;
38810b57cec5SDimitry Andric SKIP_WS(next);
38820b57cec5SDimitry Andric scan = next;
38830b57cec5SDimitry Andric }
38840b57cec5SDimitry Andric KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
38850b57cec5SDimitry Andric SKIP_DIGITS(next);
38860b57cec5SDimitry Andric stride = __kmp_str_to_int(scan, *next);
38870b57cec5SDimitry Andric KMP_ASSERT2(stride >= 0, "bad explicit proc list");
38880b57cec5SDimitry Andric stride *= sign;
38890b57cec5SDimitry Andric }
38900b57cec5SDimitry Andric
38910b57cec5SDimitry Andric // Do some range checks.
38920b57cec5SDimitry Andric KMP_ASSERT2(stride != 0, "bad explicit proc list");
38930b57cec5SDimitry Andric if (stride > 0) {
38940b57cec5SDimitry Andric KMP_ASSERT2(start <= end, "bad explicit proc list");
38950b57cec5SDimitry Andric } else {
38960b57cec5SDimitry Andric KMP_ASSERT2(start >= end, "bad explicit proc list");
38970b57cec5SDimitry Andric }
38980b57cec5SDimitry Andric KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
38990b57cec5SDimitry Andric
39000b57cec5SDimitry Andric // Add the mask for each OS proc # to the list.
39010b57cec5SDimitry Andric if (stride > 0) {
39020b57cec5SDimitry Andric do {
39030b57cec5SDimitry Andric ADD_MASK_OSID(start, osId2Mask, maxOsId);
39040b57cec5SDimitry Andric start += stride;
39050b57cec5SDimitry Andric } while (start <= end);
39060b57cec5SDimitry Andric } else {
39070b57cec5SDimitry Andric do {
39080b57cec5SDimitry Andric ADD_MASK_OSID(start, osId2Mask, maxOsId);
39090b57cec5SDimitry Andric start += stride;
39100b57cec5SDimitry Andric } while (start >= end);
39110b57cec5SDimitry Andric }
39120b57cec5SDimitry Andric
39130b57cec5SDimitry Andric // Skip optional comma.
39140b57cec5SDimitry Andric SKIP_WS(next);
39150b57cec5SDimitry Andric if (*next == ',') {
39160b57cec5SDimitry Andric next++;
39170b57cec5SDimitry Andric }
39180b57cec5SDimitry Andric scan = next;
39190b57cec5SDimitry Andric }
39200b57cec5SDimitry Andric
39210b57cec5SDimitry Andric *out_numMasks = nextNewMask;
39220b57cec5SDimitry Andric if (nextNewMask == 0) {
39230b57cec5SDimitry Andric *out_masks = NULL;
39240b57cec5SDimitry Andric KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
39250b57cec5SDimitry Andric return;
39260b57cec5SDimitry Andric }
39270b57cec5SDimitry Andric KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
39280b57cec5SDimitry Andric for (i = 0; i < nextNewMask; i++) {
39290b57cec5SDimitry Andric kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
39300b57cec5SDimitry Andric kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
39310b57cec5SDimitry Andric KMP_CPU_COPY(dest, src);
39320b57cec5SDimitry Andric }
39330b57cec5SDimitry Andric KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
39340b57cec5SDimitry Andric KMP_CPU_FREE(sumMask);
39350b57cec5SDimitry Andric }
39360b57cec5SDimitry Andric
39370b57cec5SDimitry Andric /*-----------------------------------------------------------------------------
39380b57cec5SDimitry Andric Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
39390b57cec5SDimitry Andric places. Again, Here is the grammar:
39400b57cec5SDimitry Andric
39410b57cec5SDimitry Andric place_list := place
39420b57cec5SDimitry Andric place_list := place , place_list
39430b57cec5SDimitry Andric place := num
39440b57cec5SDimitry Andric place := place : num
39450b57cec5SDimitry Andric place := place : num : signed
39460b57cec5SDimitry Andric place := { subplacelist }
39470b57cec5SDimitry Andric place := ! place // (lowest priority)
39480b57cec5SDimitry Andric subplace_list := subplace
39490b57cec5SDimitry Andric subplace_list := subplace , subplace_list
39500b57cec5SDimitry Andric subplace := num
39510b57cec5SDimitry Andric subplace := num : num
39520b57cec5SDimitry Andric subplace := num : num : signed
39530b57cec5SDimitry Andric signed := num
39540b57cec5SDimitry Andric signed := + signed
39550b57cec5SDimitry Andric signed := - signed
39560b57cec5SDimitry Andric -----------------------------------------------------------------------------*/
__kmp_process_subplace_list(const char ** scan,kmp_affinity_t & affinity,int maxOsId,kmp_affin_mask_t * tempMask,int * setSize)39570b57cec5SDimitry Andric static void __kmp_process_subplace_list(const char **scan,
3958bdd1243dSDimitry Andric kmp_affinity_t &affinity, int maxOsId,
3959bdd1243dSDimitry Andric kmp_affin_mask_t *tempMask,
39600b57cec5SDimitry Andric int *setSize) {
39610b57cec5SDimitry Andric const char *next;
3962bdd1243dSDimitry Andric kmp_affin_mask_t *osId2Mask = affinity.os_id_masks;
39630b57cec5SDimitry Andric
39640b57cec5SDimitry Andric for (;;) {
39650b57cec5SDimitry Andric int start, count, stride, i;
39660b57cec5SDimitry Andric
39670b57cec5SDimitry Andric // Read in the starting proc id
39680b57cec5SDimitry Andric SKIP_WS(*scan);
39690b57cec5SDimitry Andric KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list");
39700b57cec5SDimitry Andric next = *scan;
39710b57cec5SDimitry Andric SKIP_DIGITS(next);
39720b57cec5SDimitry Andric start = __kmp_str_to_int(*scan, *next);
39730b57cec5SDimitry Andric KMP_ASSERT(start >= 0);
39740b57cec5SDimitry Andric *scan = next;
39750b57cec5SDimitry Andric
39760b57cec5SDimitry Andric // valid follow sets are ',' ':' and '}'
39770b57cec5SDimitry Andric SKIP_WS(*scan);
39780b57cec5SDimitry Andric if (**scan == '}' || **scan == ',') {
39790b57cec5SDimitry Andric if ((start > maxOsId) ||
39800b57cec5SDimitry Andric (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3981bdd1243dSDimitry Andric KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, start);
39820b57cec5SDimitry Andric } else {
39830b57cec5SDimitry Andric KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
39840b57cec5SDimitry Andric (*setSize)++;
39850b57cec5SDimitry Andric }
39860b57cec5SDimitry Andric if (**scan == '}') {
39870b57cec5SDimitry Andric break;
39880b57cec5SDimitry Andric }
39890b57cec5SDimitry Andric (*scan)++; // skip ','
39900b57cec5SDimitry Andric continue;
39910b57cec5SDimitry Andric }
39920b57cec5SDimitry Andric KMP_ASSERT2(**scan == ':', "bad explicit places list");
39930b57cec5SDimitry Andric (*scan)++; // skip ':'
39940b57cec5SDimitry Andric
39950b57cec5SDimitry Andric // Read count parameter
39960b57cec5SDimitry Andric SKIP_WS(*scan);
39970b57cec5SDimitry Andric KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list");
39980b57cec5SDimitry Andric next = *scan;
39990b57cec5SDimitry Andric SKIP_DIGITS(next);
40000b57cec5SDimitry Andric count = __kmp_str_to_int(*scan, *next);
40010b57cec5SDimitry Andric KMP_ASSERT(count >= 0);
40020b57cec5SDimitry Andric *scan = next;
40030b57cec5SDimitry Andric
40040b57cec5SDimitry Andric // valid follow sets are ',' ':' and '}'
40050b57cec5SDimitry Andric SKIP_WS(*scan);
40060b57cec5SDimitry Andric if (**scan == '}' || **scan == ',') {
40070b57cec5SDimitry Andric for (i = 0; i < count; i++) {
40080b57cec5SDimitry Andric if ((start > maxOsId) ||
40090b57cec5SDimitry Andric (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
4010bdd1243dSDimitry Andric KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, start);
40110b57cec5SDimitry Andric break; // don't proliferate warnings for large count
40120b57cec5SDimitry Andric } else {
40130b57cec5SDimitry Andric KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
40140b57cec5SDimitry Andric start++;
40150b57cec5SDimitry Andric (*setSize)++;
40160b57cec5SDimitry Andric }
40170b57cec5SDimitry Andric }
40180b57cec5SDimitry Andric if (**scan == '}') {
40190b57cec5SDimitry Andric break;
40200b57cec5SDimitry Andric }
40210b57cec5SDimitry Andric (*scan)++; // skip ','
40220b57cec5SDimitry Andric continue;
40230b57cec5SDimitry Andric }
40240b57cec5SDimitry Andric KMP_ASSERT2(**scan == ':', "bad explicit places list");
40250b57cec5SDimitry Andric (*scan)++; // skip ':'
40260b57cec5SDimitry Andric
40270b57cec5SDimitry Andric // Read stride parameter
40280b57cec5SDimitry Andric int sign = +1;
40290b57cec5SDimitry Andric for (;;) {
40300b57cec5SDimitry Andric SKIP_WS(*scan);
40310b57cec5SDimitry Andric if (**scan == '+') {
40320b57cec5SDimitry Andric (*scan)++; // skip '+'
40330b57cec5SDimitry Andric continue;
40340b57cec5SDimitry Andric }
40350b57cec5SDimitry Andric if (**scan == '-') {
40360b57cec5SDimitry Andric sign *= -1;
40370b57cec5SDimitry Andric (*scan)++; // skip '-'
40380b57cec5SDimitry Andric continue;
40390b57cec5SDimitry Andric }
40400b57cec5SDimitry Andric break;
40410b57cec5SDimitry Andric }
40420b57cec5SDimitry Andric SKIP_WS(*scan);
40430b57cec5SDimitry Andric KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list");
40440b57cec5SDimitry Andric next = *scan;
40450b57cec5SDimitry Andric SKIP_DIGITS(next);
40460b57cec5SDimitry Andric stride = __kmp_str_to_int(*scan, *next);
40470b57cec5SDimitry Andric KMP_ASSERT(stride >= 0);
40480b57cec5SDimitry Andric *scan = next;
40490b57cec5SDimitry Andric stride *= sign;
40500b57cec5SDimitry Andric
40510b57cec5SDimitry Andric // valid follow sets are ',' and '}'
40520b57cec5SDimitry Andric SKIP_WS(*scan);
40530b57cec5SDimitry Andric if (**scan == '}' || **scan == ',') {
40540b57cec5SDimitry Andric for (i = 0; i < count; i++) {
40550b57cec5SDimitry Andric if ((start > maxOsId) ||
40560b57cec5SDimitry Andric (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
4057bdd1243dSDimitry Andric KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, start);
40580b57cec5SDimitry Andric break; // don't proliferate warnings for large count
40590b57cec5SDimitry Andric } else {
40600b57cec5SDimitry Andric KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
40610b57cec5SDimitry Andric start += stride;
40620b57cec5SDimitry Andric (*setSize)++;
40630b57cec5SDimitry Andric }
40640b57cec5SDimitry Andric }
40650b57cec5SDimitry Andric if (**scan == '}') {
40660b57cec5SDimitry Andric break;
40670b57cec5SDimitry Andric }
40680b57cec5SDimitry Andric (*scan)++; // skip ','
40690b57cec5SDimitry Andric continue;
40700b57cec5SDimitry Andric }
40710b57cec5SDimitry Andric
40720b57cec5SDimitry Andric KMP_ASSERT2(0, "bad explicit places list");
40730b57cec5SDimitry Andric }
40740b57cec5SDimitry Andric }
40750b57cec5SDimitry Andric
__kmp_process_place(const char ** scan,kmp_affinity_t & affinity,int maxOsId,kmp_affin_mask_t * tempMask,int * setSize)4076bdd1243dSDimitry Andric static void __kmp_process_place(const char **scan, kmp_affinity_t &affinity,
40770b57cec5SDimitry Andric int maxOsId, kmp_affin_mask_t *tempMask,
40780b57cec5SDimitry Andric int *setSize) {
40790b57cec5SDimitry Andric const char *next;
4080bdd1243dSDimitry Andric kmp_affin_mask_t *osId2Mask = affinity.os_id_masks;
40810b57cec5SDimitry Andric
40820b57cec5SDimitry Andric // valid follow sets are '{' '!' and num
40830b57cec5SDimitry Andric SKIP_WS(*scan);
40840b57cec5SDimitry Andric if (**scan == '{') {
40850b57cec5SDimitry Andric (*scan)++; // skip '{'
4086bdd1243dSDimitry Andric __kmp_process_subplace_list(scan, affinity, maxOsId, tempMask, setSize);
40870b57cec5SDimitry Andric KMP_ASSERT2(**scan == '}', "bad explicit places list");
40880b57cec5SDimitry Andric (*scan)++; // skip '}'
40890b57cec5SDimitry Andric } else if (**scan == '!') {
40900b57cec5SDimitry Andric (*scan)++; // skip '!'
4091bdd1243dSDimitry Andric __kmp_process_place(scan, affinity, maxOsId, tempMask, setSize);
40920b57cec5SDimitry Andric KMP_CPU_COMPLEMENT(maxOsId, tempMask);
40930b57cec5SDimitry Andric } else if ((**scan >= '0') && (**scan <= '9')) {
40940b57cec5SDimitry Andric next = *scan;
40950b57cec5SDimitry Andric SKIP_DIGITS(next);
40960b57cec5SDimitry Andric int num = __kmp_str_to_int(*scan, *next);
40970b57cec5SDimitry Andric KMP_ASSERT(num >= 0);
40980b57cec5SDimitry Andric if ((num > maxOsId) ||
40990b57cec5SDimitry Andric (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
4100bdd1243dSDimitry Andric KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, num);
41010b57cec5SDimitry Andric } else {
41020b57cec5SDimitry Andric KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
41030b57cec5SDimitry Andric (*setSize)++;
41040b57cec5SDimitry Andric }
41050b57cec5SDimitry Andric *scan = next; // skip num
41060b57cec5SDimitry Andric } else {
41070b57cec5SDimitry Andric KMP_ASSERT2(0, "bad explicit places list");
41080b57cec5SDimitry Andric }
41090b57cec5SDimitry Andric }
41100b57cec5SDimitry Andric
41110b57cec5SDimitry Andric // static void
__kmp_affinity_process_placelist(kmp_affinity_t & affinity)4112bdd1243dSDimitry Andric void __kmp_affinity_process_placelist(kmp_affinity_t &affinity) {
41130b57cec5SDimitry Andric int i, j, count, stride, sign;
4114bdd1243dSDimitry Andric kmp_affin_mask_t **out_masks = &affinity.masks;
4115bdd1243dSDimitry Andric unsigned *out_numMasks = &affinity.num_masks;
4116bdd1243dSDimitry Andric const char *placelist = affinity.proclist;
4117bdd1243dSDimitry Andric kmp_affin_mask_t *osId2Mask = affinity.os_id_masks;
4118bdd1243dSDimitry Andric int maxOsId = affinity.num_os_id_masks - 1;
41190b57cec5SDimitry Andric const char *scan = placelist;
41200b57cec5SDimitry Andric const char *next = placelist;
41210b57cec5SDimitry Andric
41220b57cec5SDimitry Andric numNewMasks = 2;
41230b57cec5SDimitry Andric KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
41240b57cec5SDimitry Andric nextNewMask = 0;
41250b57cec5SDimitry Andric
41260b57cec5SDimitry Andric // tempMask is modified based on the previous or initial
41270b57cec5SDimitry Andric // place to form the current place
41280b57cec5SDimitry Andric // previousMask contains the previous place
41290b57cec5SDimitry Andric kmp_affin_mask_t *tempMask;
41300b57cec5SDimitry Andric kmp_affin_mask_t *previousMask;
41310b57cec5SDimitry Andric KMP_CPU_ALLOC(tempMask);
41320b57cec5SDimitry Andric KMP_CPU_ZERO(tempMask);
41330b57cec5SDimitry Andric KMP_CPU_ALLOC(previousMask);
41340b57cec5SDimitry Andric KMP_CPU_ZERO(previousMask);
41350b57cec5SDimitry Andric int setSize = 0;
41360b57cec5SDimitry Andric
41370b57cec5SDimitry Andric for (;;) {
4138bdd1243dSDimitry Andric __kmp_process_place(&scan, affinity, maxOsId, tempMask, &setSize);
41390b57cec5SDimitry Andric
41400b57cec5SDimitry Andric // valid follow sets are ',' ':' and EOL
41410b57cec5SDimitry Andric SKIP_WS(scan);
41420b57cec5SDimitry Andric if (*scan == '\0' || *scan == ',') {
41430b57cec5SDimitry Andric if (setSize > 0) {
41440b57cec5SDimitry Andric ADD_MASK(tempMask);
41450b57cec5SDimitry Andric }
41460b57cec5SDimitry Andric KMP_CPU_ZERO(tempMask);
41470b57cec5SDimitry Andric setSize = 0;
41480b57cec5SDimitry Andric if (*scan == '\0') {
41490b57cec5SDimitry Andric break;
41500b57cec5SDimitry Andric }
41510b57cec5SDimitry Andric scan++; // skip ','
41520b57cec5SDimitry Andric continue;
41530b57cec5SDimitry Andric }
41540b57cec5SDimitry Andric
41550b57cec5SDimitry Andric KMP_ASSERT2(*scan == ':', "bad explicit places list");
41560b57cec5SDimitry Andric scan++; // skip ':'
41570b57cec5SDimitry Andric
41580b57cec5SDimitry Andric // Read count parameter
41590b57cec5SDimitry Andric SKIP_WS(scan);
41600b57cec5SDimitry Andric KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), "bad explicit places list");
41610b57cec5SDimitry Andric next = scan;
41620b57cec5SDimitry Andric SKIP_DIGITS(next);
41630b57cec5SDimitry Andric count = __kmp_str_to_int(scan, *next);
41640b57cec5SDimitry Andric KMP_ASSERT(count >= 0);
41650b57cec5SDimitry Andric scan = next;
41660b57cec5SDimitry Andric
41670b57cec5SDimitry Andric // valid follow sets are ',' ':' and EOL
41680b57cec5SDimitry Andric SKIP_WS(scan);
41690b57cec5SDimitry Andric if (*scan == '\0' || *scan == ',') {
41700b57cec5SDimitry Andric stride = +1;
41710b57cec5SDimitry Andric } else {
41720b57cec5SDimitry Andric KMP_ASSERT2(*scan == ':', "bad explicit places list");
41730b57cec5SDimitry Andric scan++; // skip ':'
41740b57cec5SDimitry Andric
41750b57cec5SDimitry Andric // Read stride parameter
41760b57cec5SDimitry Andric sign = +1;
41770b57cec5SDimitry Andric for (;;) {
41780b57cec5SDimitry Andric SKIP_WS(scan);
41790b57cec5SDimitry Andric if (*scan == '+') {
41800b57cec5SDimitry Andric scan++; // skip '+'
41810b57cec5SDimitry Andric continue;
41820b57cec5SDimitry Andric }
41830b57cec5SDimitry Andric if (*scan == '-') {
41840b57cec5SDimitry Andric sign *= -1;
41850b57cec5SDimitry Andric scan++; // skip '-'
41860b57cec5SDimitry Andric continue;
41870b57cec5SDimitry Andric }
41880b57cec5SDimitry Andric break;
41890b57cec5SDimitry Andric }
41900b57cec5SDimitry Andric SKIP_WS(scan);
41910b57cec5SDimitry Andric KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), "bad explicit places list");
41920b57cec5SDimitry Andric next = scan;
41930b57cec5SDimitry Andric SKIP_DIGITS(next);
41940b57cec5SDimitry Andric stride = __kmp_str_to_int(scan, *next);
41950b57cec5SDimitry Andric KMP_DEBUG_ASSERT(stride >= 0);
41960b57cec5SDimitry Andric scan = next;
41970b57cec5SDimitry Andric stride *= sign;
41980b57cec5SDimitry Andric }
41990b57cec5SDimitry Andric
42000b57cec5SDimitry Andric // Add places determined by initial_place : count : stride
42010b57cec5SDimitry Andric for (i = 0; i < count; i++) {
42020b57cec5SDimitry Andric if (setSize == 0) {
42030b57cec5SDimitry Andric break;
42040b57cec5SDimitry Andric }
42050b57cec5SDimitry Andric // Add the current place, then build the next place (tempMask) from that
42060b57cec5SDimitry Andric KMP_CPU_COPY(previousMask, tempMask);
42070b57cec5SDimitry Andric ADD_MASK(previousMask);
42080b57cec5SDimitry Andric KMP_CPU_ZERO(tempMask);
42090b57cec5SDimitry Andric setSize = 0;
42100b57cec5SDimitry Andric KMP_CPU_SET_ITERATE(j, previousMask) {
42110b57cec5SDimitry Andric if (!KMP_CPU_ISSET(j, previousMask)) {
42120b57cec5SDimitry Andric continue;
42130b57cec5SDimitry Andric }
42140b57cec5SDimitry Andric if ((j + stride > maxOsId) || (j + stride < 0) ||
42150b57cec5SDimitry Andric (!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
42160b57cec5SDimitry Andric (!KMP_CPU_ISSET(j + stride,
42170b57cec5SDimitry Andric KMP_CPU_INDEX(osId2Mask, j + stride)))) {
4218fcaf7f86SDimitry Andric if (i < count - 1) {
4219bdd1243dSDimitry Andric KMP_AFF_WARNING(affinity, AffIgnoreInvalidProcID, j + stride);
42200b57cec5SDimitry Andric }
42210b57cec5SDimitry Andric continue;
42220b57cec5SDimitry Andric }
42230b57cec5SDimitry Andric KMP_CPU_SET(j + stride, tempMask);
42240b57cec5SDimitry Andric setSize++;
42250b57cec5SDimitry Andric }
42260b57cec5SDimitry Andric }
42270b57cec5SDimitry Andric KMP_CPU_ZERO(tempMask);
42280b57cec5SDimitry Andric setSize = 0;
42290b57cec5SDimitry Andric
42300b57cec5SDimitry Andric // valid follow sets are ',' and EOL
42310b57cec5SDimitry Andric SKIP_WS(scan);
42320b57cec5SDimitry Andric if (*scan == '\0') {
42330b57cec5SDimitry Andric break;
42340b57cec5SDimitry Andric }
42350b57cec5SDimitry Andric if (*scan == ',') {
42360b57cec5SDimitry Andric scan++; // skip ','
42370b57cec5SDimitry Andric continue;
42380b57cec5SDimitry Andric }
42390b57cec5SDimitry Andric
42400b57cec5SDimitry Andric KMP_ASSERT2(0, "bad explicit places list");
42410b57cec5SDimitry Andric }
42420b57cec5SDimitry Andric
42430b57cec5SDimitry Andric *out_numMasks = nextNewMask;
42440b57cec5SDimitry Andric if (nextNewMask == 0) {
42450b57cec5SDimitry Andric *out_masks = NULL;
42460b57cec5SDimitry Andric KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
42470b57cec5SDimitry Andric return;
42480b57cec5SDimitry Andric }
42490b57cec5SDimitry Andric KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
42500b57cec5SDimitry Andric KMP_CPU_FREE(tempMask);
42510b57cec5SDimitry Andric KMP_CPU_FREE(previousMask);
42520b57cec5SDimitry Andric for (i = 0; i < nextNewMask; i++) {
42530b57cec5SDimitry Andric kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
42540b57cec5SDimitry Andric kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
42550b57cec5SDimitry Andric KMP_CPU_COPY(dest, src);
42560b57cec5SDimitry Andric }
42570b57cec5SDimitry Andric KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
42580b57cec5SDimitry Andric }
42590b57cec5SDimitry Andric
42600b57cec5SDimitry Andric #undef ADD_MASK
42610b57cec5SDimitry Andric #undef ADD_MASK_OSID
42620b57cec5SDimitry Andric
42630b57cec5SDimitry Andric // This function figures out the deepest level at which there is at least one
42640b57cec5SDimitry Andric // cluster/core with more than one processing unit bound to it.
__kmp_affinity_find_core_level(int nprocs,int bottom_level)4265fe6060f1SDimitry Andric static int __kmp_affinity_find_core_level(int nprocs, int bottom_level) {
42660b57cec5SDimitry Andric int core_level = 0;
42670b57cec5SDimitry Andric
42680b57cec5SDimitry Andric for (int i = 0; i < nprocs; i++) {
4269fe6060f1SDimitry Andric const kmp_hw_thread_t &hw_thread = __kmp_topology->at(i);
42700b57cec5SDimitry Andric for (int j = bottom_level; j > 0; j--) {
4271fe6060f1SDimitry Andric if (hw_thread.ids[j] > 0) {
42720b57cec5SDimitry Andric if (core_level < (j - 1)) {
42730b57cec5SDimitry Andric core_level = j - 1;
42740b57cec5SDimitry Andric }
42750b57cec5SDimitry Andric }
42760b57cec5SDimitry Andric }
42770b57cec5SDimitry Andric }
42780b57cec5SDimitry Andric return core_level;
42790b57cec5SDimitry Andric }
42800b57cec5SDimitry Andric
42810b57cec5SDimitry Andric // This function counts number of clusters/cores at given level.
__kmp_affinity_compute_ncores(int nprocs,int bottom_level,int core_level)4282fe6060f1SDimitry Andric static int __kmp_affinity_compute_ncores(int nprocs, int bottom_level,
42830b57cec5SDimitry Andric int core_level) {
4284fe6060f1SDimitry Andric return __kmp_topology->get_count(core_level);
4285fe6060f1SDimitry Andric }
4286fe6060f1SDimitry Andric // This function finds to which cluster/core given processing unit is bound.
__kmp_affinity_find_core(int proc,int bottom_level,int core_level)4287fe6060f1SDimitry Andric static int __kmp_affinity_find_core(int proc, int bottom_level,
4288fe6060f1SDimitry Andric int core_level) {
4289fe6060f1SDimitry Andric int core = 0;
4290fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(proc >= 0 && proc < __kmp_topology->get_num_hw_threads());
4291fe6060f1SDimitry Andric for (int i = 0; i <= proc; ++i) {
4292fe6060f1SDimitry Andric if (i + 1 <= proc) {
4293fe6060f1SDimitry Andric for (int j = 0; j <= core_level; ++j) {
4294fe6060f1SDimitry Andric if (__kmp_topology->at(i + 1).sub_ids[j] !=
4295fe6060f1SDimitry Andric __kmp_topology->at(i).sub_ids[j]) {
4296fe6060f1SDimitry Andric core++;
42970b57cec5SDimitry Andric break;
42980b57cec5SDimitry Andric }
42990b57cec5SDimitry Andric }
43000b57cec5SDimitry Andric }
43010b57cec5SDimitry Andric }
4302fe6060f1SDimitry Andric return core;
43030b57cec5SDimitry Andric }
43040b57cec5SDimitry Andric
43050b57cec5SDimitry Andric // This function finds maximal number of processing units bound to a
43060b57cec5SDimitry Andric // cluster/core at given level.
__kmp_affinity_max_proc_per_core(int nprocs,int bottom_level,int core_level)4307fe6060f1SDimitry Andric static int __kmp_affinity_max_proc_per_core(int nprocs, int bottom_level,
43080b57cec5SDimitry Andric int core_level) {
4309fe6060f1SDimitry Andric if (core_level >= bottom_level)
4310fe6060f1SDimitry Andric return 1;
4311fe6060f1SDimitry Andric int thread_level = __kmp_topology->get_level(KMP_HW_THREAD);
4312fe6060f1SDimitry Andric return __kmp_topology->calculate_ratio(thread_level, core_level);
43130b57cec5SDimitry Andric }
43140b57cec5SDimitry Andric
43150b57cec5SDimitry Andric static int *procarr = NULL;
43160b57cec5SDimitry Andric static int __kmp_aff_depth = 0;
4317bdd1243dSDimitry Andric static int *__kmp_osid_to_hwthread_map = NULL;
4318bdd1243dSDimitry Andric
__kmp_affinity_get_mask_topology_info(const kmp_affin_mask_t * mask,kmp_affinity_ids_t & ids,kmp_affinity_attrs_t & attrs)4319bdd1243dSDimitry Andric static void __kmp_affinity_get_mask_topology_info(const kmp_affin_mask_t *mask,
4320bdd1243dSDimitry Andric kmp_affinity_ids_t &ids,
4321bdd1243dSDimitry Andric kmp_affinity_attrs_t &attrs) {
4322bdd1243dSDimitry Andric if (!KMP_AFFINITY_CAPABLE())
4323bdd1243dSDimitry Andric return;
4324bdd1243dSDimitry Andric
4325bdd1243dSDimitry Andric // Initiailze ids and attrs thread data
4326bdd1243dSDimitry Andric for (int i = 0; i < KMP_HW_LAST; ++i)
43275f757f3fSDimitry Andric ids.ids[i] = kmp_hw_thread_t::UNKNOWN_ID;
4328bdd1243dSDimitry Andric attrs = KMP_AFFINITY_ATTRS_UNKNOWN;
4329bdd1243dSDimitry Andric
4330bdd1243dSDimitry Andric // Iterate through each os id within the mask and determine
4331bdd1243dSDimitry Andric // the topology id and attribute information
4332bdd1243dSDimitry Andric int cpu;
4333bdd1243dSDimitry Andric int depth = __kmp_topology->get_depth();
4334bdd1243dSDimitry Andric KMP_CPU_SET_ITERATE(cpu, mask) {
4335bdd1243dSDimitry Andric int osid_idx = __kmp_osid_to_hwthread_map[cpu];
43365f757f3fSDimitry Andric ids.os_id = cpu;
4337bdd1243dSDimitry Andric const kmp_hw_thread_t &hw_thread = __kmp_topology->at(osid_idx);
4338bdd1243dSDimitry Andric for (int level = 0; level < depth; ++level) {
4339bdd1243dSDimitry Andric kmp_hw_t type = __kmp_topology->get_type(level);
4340bdd1243dSDimitry Andric int id = hw_thread.sub_ids[level];
43415f757f3fSDimitry Andric if (ids.ids[type] == kmp_hw_thread_t::UNKNOWN_ID || ids.ids[type] == id) {
43425f757f3fSDimitry Andric ids.ids[type] = id;
4343bdd1243dSDimitry Andric } else {
4344bdd1243dSDimitry Andric // This mask spans across multiple topology units, set it as such
4345bdd1243dSDimitry Andric // and mark every level below as such as well.
43465f757f3fSDimitry Andric ids.ids[type] = kmp_hw_thread_t::MULTIPLE_ID;
4347bdd1243dSDimitry Andric for (; level < depth; ++level) {
4348bdd1243dSDimitry Andric kmp_hw_t type = __kmp_topology->get_type(level);
43495f757f3fSDimitry Andric ids.ids[type] = kmp_hw_thread_t::MULTIPLE_ID;
4350bdd1243dSDimitry Andric }
4351bdd1243dSDimitry Andric }
4352bdd1243dSDimitry Andric }
4353bdd1243dSDimitry Andric if (!attrs.valid) {
4354bdd1243dSDimitry Andric attrs.core_type = hw_thread.attrs.get_core_type();
4355bdd1243dSDimitry Andric attrs.core_eff = hw_thread.attrs.get_core_eff();
4356bdd1243dSDimitry Andric attrs.valid = 1;
4357bdd1243dSDimitry Andric } else {
4358bdd1243dSDimitry Andric // This mask spans across multiple attributes, set it as such
4359bdd1243dSDimitry Andric if (attrs.core_type != hw_thread.attrs.get_core_type())
4360bdd1243dSDimitry Andric attrs.core_type = KMP_HW_CORE_TYPE_UNKNOWN;
4361bdd1243dSDimitry Andric if (attrs.core_eff != hw_thread.attrs.get_core_eff())
4362bdd1243dSDimitry Andric attrs.core_eff = kmp_hw_attr_t::UNKNOWN_CORE_EFF;
4363bdd1243dSDimitry Andric }
4364bdd1243dSDimitry Andric }
4365bdd1243dSDimitry Andric }
4366bdd1243dSDimitry Andric
__kmp_affinity_get_thread_topology_info(kmp_info_t * th)4367bdd1243dSDimitry Andric static void __kmp_affinity_get_thread_topology_info(kmp_info_t *th) {
4368bdd1243dSDimitry Andric if (!KMP_AFFINITY_CAPABLE())
4369bdd1243dSDimitry Andric return;
4370bdd1243dSDimitry Andric const kmp_affin_mask_t *mask = th->th.th_affin_mask;
4371bdd1243dSDimitry Andric kmp_affinity_ids_t &ids = th->th.th_topology_ids;
4372bdd1243dSDimitry Andric kmp_affinity_attrs_t &attrs = th->th.th_topology_attrs;
4373bdd1243dSDimitry Andric __kmp_affinity_get_mask_topology_info(mask, ids, attrs);
4374bdd1243dSDimitry Andric }
4375bdd1243dSDimitry Andric
4376bdd1243dSDimitry Andric // Assign the topology information to each place in the place list
4377bdd1243dSDimitry Andric // A thread can then grab not only its affinity mask, but the topology
4378bdd1243dSDimitry Andric // information associated with that mask. e.g., Which socket is a thread on
__kmp_affinity_get_topology_info(kmp_affinity_t & affinity)4379bdd1243dSDimitry Andric static void __kmp_affinity_get_topology_info(kmp_affinity_t &affinity) {
4380bdd1243dSDimitry Andric if (!KMP_AFFINITY_CAPABLE())
4381bdd1243dSDimitry Andric return;
4382bdd1243dSDimitry Andric if (affinity.type != affinity_none) {
4383bdd1243dSDimitry Andric KMP_ASSERT(affinity.num_os_id_masks);
4384bdd1243dSDimitry Andric KMP_ASSERT(affinity.os_id_masks);
4385bdd1243dSDimitry Andric }
4386bdd1243dSDimitry Andric KMP_ASSERT(affinity.num_masks);
4387bdd1243dSDimitry Andric KMP_ASSERT(affinity.masks);
4388bdd1243dSDimitry Andric KMP_ASSERT(__kmp_affin_fullMask);
4389bdd1243dSDimitry Andric
4390bdd1243dSDimitry Andric int max_cpu = __kmp_affin_fullMask->get_max_cpu();
4391bdd1243dSDimitry Andric int num_hw_threads = __kmp_topology->get_num_hw_threads();
4392bdd1243dSDimitry Andric
4393bdd1243dSDimitry Andric // Allocate thread topology information
4394bdd1243dSDimitry Andric if (!affinity.ids) {
4395bdd1243dSDimitry Andric affinity.ids = (kmp_affinity_ids_t *)__kmp_allocate(
4396bdd1243dSDimitry Andric sizeof(kmp_affinity_ids_t) * affinity.num_masks);
4397bdd1243dSDimitry Andric }
4398bdd1243dSDimitry Andric if (!affinity.attrs) {
4399bdd1243dSDimitry Andric affinity.attrs = (kmp_affinity_attrs_t *)__kmp_allocate(
4400bdd1243dSDimitry Andric sizeof(kmp_affinity_attrs_t) * affinity.num_masks);
4401bdd1243dSDimitry Andric }
4402bdd1243dSDimitry Andric if (!__kmp_osid_to_hwthread_map) {
4403bdd1243dSDimitry Andric // Want the +1 because max_cpu should be valid index into map
4404bdd1243dSDimitry Andric __kmp_osid_to_hwthread_map =
4405bdd1243dSDimitry Andric (int *)__kmp_allocate(sizeof(int) * (max_cpu + 1));
4406bdd1243dSDimitry Andric }
4407bdd1243dSDimitry Andric
4408bdd1243dSDimitry Andric // Create the OS proc to hardware thread map
44095f757f3fSDimitry Andric for (int hw_thread = 0; hw_thread < num_hw_threads; ++hw_thread) {
44105f757f3fSDimitry Andric int os_id = __kmp_topology->at(hw_thread).os_id;
44115f757f3fSDimitry Andric if (KMP_CPU_ISSET(os_id, __kmp_affin_fullMask))
44125f757f3fSDimitry Andric __kmp_osid_to_hwthread_map[os_id] = hw_thread;
44135f757f3fSDimitry Andric }
4414bdd1243dSDimitry Andric
4415bdd1243dSDimitry Andric for (unsigned i = 0; i < affinity.num_masks; ++i) {
4416bdd1243dSDimitry Andric kmp_affinity_ids_t &ids = affinity.ids[i];
4417bdd1243dSDimitry Andric kmp_affinity_attrs_t &attrs = affinity.attrs[i];
4418bdd1243dSDimitry Andric kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.masks, i);
4419bdd1243dSDimitry Andric __kmp_affinity_get_mask_topology_info(mask, ids, attrs);
4420bdd1243dSDimitry Andric }
4421bdd1243dSDimitry Andric }
44220b57cec5SDimitry Andric
44235f757f3fSDimitry Andric // Called when __kmp_topology is ready
__kmp_aux_affinity_initialize_other_data(kmp_affinity_t & affinity)44245f757f3fSDimitry Andric static void __kmp_aux_affinity_initialize_other_data(kmp_affinity_t &affinity) {
44255f757f3fSDimitry Andric // Initialize other data structures which depend on the topology
44265f757f3fSDimitry Andric if (__kmp_topology && __kmp_topology->get_num_hw_threads()) {
44275f757f3fSDimitry Andric machine_hierarchy.init(__kmp_topology->get_num_hw_threads());
44285f757f3fSDimitry Andric __kmp_affinity_get_topology_info(affinity);
44295f757f3fSDimitry Andric #if KMP_WEIGHTED_ITERATIONS_SUPPORTED
44305f757f3fSDimitry Andric __kmp_first_osid_with_ecore = __kmp_get_first_osid_with_ecore();
44315f757f3fSDimitry Andric #endif
44325f757f3fSDimitry Andric }
44335f757f3fSDimitry Andric }
44345f757f3fSDimitry Andric
44350b57cec5SDimitry Andric // Create a one element mask array (set of places) which only contains the
44360b57cec5SDimitry Andric // initial process's affinity mask
__kmp_create_affinity_none_places(kmp_affinity_t & affinity)4437bdd1243dSDimitry Andric static void __kmp_create_affinity_none_places(kmp_affinity_t &affinity) {
44380b57cec5SDimitry Andric KMP_ASSERT(__kmp_affin_fullMask != NULL);
4439bdd1243dSDimitry Andric KMP_ASSERT(affinity.type == affinity_none);
44405f757f3fSDimitry Andric KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads());
4441bdd1243dSDimitry Andric affinity.num_masks = 1;
4442bdd1243dSDimitry Andric KMP_CPU_ALLOC_ARRAY(affinity.masks, affinity.num_masks);
4443bdd1243dSDimitry Andric kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, 0);
44440b57cec5SDimitry Andric KMP_CPU_COPY(dest, __kmp_affin_fullMask);
44455f757f3fSDimitry Andric __kmp_aux_affinity_initialize_other_data(affinity);
44460b57cec5SDimitry Andric }
44470b57cec5SDimitry Andric
__kmp_aux_affinity_initialize_masks(kmp_affinity_t & affinity)4448bdd1243dSDimitry Andric static void __kmp_aux_affinity_initialize_masks(kmp_affinity_t &affinity) {
44490b57cec5SDimitry Andric // Create the "full" mask - this defines all of the processors that we
44500b57cec5SDimitry Andric // consider to be in the machine model. If respect is set, then it is the
44510b57cec5SDimitry Andric // initialization thread's affinity mask. Otherwise, it is all processors that
44520b57cec5SDimitry Andric // we know about on the machine.
4453bdd1243dSDimitry Andric int verbose = affinity.flags.verbose;
4454bdd1243dSDimitry Andric const char *env_var = affinity.env_var;
4455bdd1243dSDimitry Andric
4456bdd1243dSDimitry Andric // Already initialized
4457bdd1243dSDimitry Andric if (__kmp_affin_fullMask && __kmp_affin_origMask)
4458bdd1243dSDimitry Andric return;
4459bdd1243dSDimitry Andric
44600b57cec5SDimitry Andric if (__kmp_affin_fullMask == NULL) {
44610b57cec5SDimitry Andric KMP_CPU_ALLOC(__kmp_affin_fullMask);
44620b57cec5SDimitry Andric }
4463fcaf7f86SDimitry Andric if (__kmp_affin_origMask == NULL) {
4464fcaf7f86SDimitry Andric KMP_CPU_ALLOC(__kmp_affin_origMask);
4465fcaf7f86SDimitry Andric }
44660b57cec5SDimitry Andric if (KMP_AFFINITY_CAPABLE()) {
44670b57cec5SDimitry Andric __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
4468fcaf7f86SDimitry Andric // Make a copy before possible expanding to the entire machine mask
4469fcaf7f86SDimitry Andric __kmp_affin_origMask->copy(__kmp_affin_fullMask);
4470bdd1243dSDimitry Andric if (affinity.flags.respect) {
44710b57cec5SDimitry Andric // Count the number of available processors.
44720b57cec5SDimitry Andric unsigned i;
44730b57cec5SDimitry Andric __kmp_avail_proc = 0;
44740b57cec5SDimitry Andric KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
44750b57cec5SDimitry Andric if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
44760b57cec5SDimitry Andric continue;
44770b57cec5SDimitry Andric }
44780b57cec5SDimitry Andric __kmp_avail_proc++;
44790b57cec5SDimitry Andric }
44800b57cec5SDimitry Andric if (__kmp_avail_proc > __kmp_xproc) {
4481bdd1243dSDimitry Andric KMP_AFF_WARNING(affinity, ErrorInitializeAffinity);
4482bdd1243dSDimitry Andric affinity.type = affinity_none;
44830b57cec5SDimitry Andric KMP_AFFINITY_DISABLE();
44840b57cec5SDimitry Andric return;
44850b57cec5SDimitry Andric }
4486e8d8bef9SDimitry Andric
4487bdd1243dSDimitry Andric if (verbose) {
4488e8d8bef9SDimitry Andric char buf[KMP_AFFIN_MASK_PRINT_LEN];
4489e8d8bef9SDimitry Andric __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4490e8d8bef9SDimitry Andric __kmp_affin_fullMask);
4491bdd1243dSDimitry Andric KMP_INFORM(InitOSProcSetRespect, env_var, buf);
4492e8d8bef9SDimitry Andric }
44930b57cec5SDimitry Andric } else {
4494bdd1243dSDimitry Andric if (verbose) {
4495e8d8bef9SDimitry Andric char buf[KMP_AFFIN_MASK_PRINT_LEN];
4496e8d8bef9SDimitry Andric __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4497e8d8bef9SDimitry Andric __kmp_affin_fullMask);
4498bdd1243dSDimitry Andric KMP_INFORM(InitOSProcSetNotRespect, env_var, buf);
4499e8d8bef9SDimitry Andric }
4500349cc55cSDimitry Andric __kmp_avail_proc =
45010b57cec5SDimitry Andric __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
4502e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS
4503fcaf7f86SDimitry Andric if (__kmp_num_proc_groups <= 1) {
4504fcaf7f86SDimitry Andric // Copy expanded full mask if topology has single processor group
4505fcaf7f86SDimitry Andric __kmp_affin_origMask->copy(__kmp_affin_fullMask);
4506fcaf7f86SDimitry Andric }
4507e8d8bef9SDimitry Andric // Set the process affinity mask since threads' affinity
4508e8d8bef9SDimitry Andric // masks must be subset of process mask in Windows* OS
4509e8d8bef9SDimitry Andric __kmp_affin_fullMask->set_process_affinity(true);
4510e8d8bef9SDimitry Andric #endif
45110b57cec5SDimitry Andric }
45120b57cec5SDimitry Andric }
4513bdd1243dSDimitry Andric }
45140b57cec5SDimitry Andric
__kmp_aux_affinity_initialize_topology(kmp_affinity_t & affinity)4515bdd1243dSDimitry Andric static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) {
4516bdd1243dSDimitry Andric bool success = false;
4517bdd1243dSDimitry Andric const char *env_var = affinity.env_var;
45180b57cec5SDimitry Andric kmp_i18n_id_t msg_id = kmp_i18n_null;
4519bdd1243dSDimitry Andric int verbose = affinity.flags.verbose;
45200b57cec5SDimitry Andric
45210b57cec5SDimitry Andric // For backward compatibility, setting KMP_CPUINFO_FILE =>
45220b57cec5SDimitry Andric // KMP_TOPOLOGY_METHOD=cpuinfo
45230b57cec5SDimitry Andric if ((__kmp_cpuinfo_file != NULL) &&
45240b57cec5SDimitry Andric (__kmp_affinity_top_method == affinity_top_method_all)) {
45250b57cec5SDimitry Andric __kmp_affinity_top_method = affinity_top_method_cpuinfo;
45260b57cec5SDimitry Andric }
45270b57cec5SDimitry Andric
45280b57cec5SDimitry Andric if (__kmp_affinity_top_method == affinity_top_method_all) {
45290b57cec5SDimitry Andric // In the default code path, errors are not fatal - we just try using
45300b57cec5SDimitry Andric // another method. We only emit a warning message if affinity is on, or the
4531fe6060f1SDimitry Andric // verbose flag is set, an the nowarnings flag was not set.
45320b57cec5SDimitry Andric #if KMP_USE_HWLOC
4533fe6060f1SDimitry Andric if (!success &&
45340b57cec5SDimitry Andric __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
45350b57cec5SDimitry Andric if (!__kmp_hwloc_error) {
4536fe6060f1SDimitry Andric success = __kmp_affinity_create_hwloc_map(&msg_id);
4537bdd1243dSDimitry Andric if (!success && verbose) {
4538bdd1243dSDimitry Andric KMP_INFORM(AffIgnoringHwloc, env_var);
45390b57cec5SDimitry Andric }
4540bdd1243dSDimitry Andric } else if (verbose) {
4541bdd1243dSDimitry Andric KMP_INFORM(AffIgnoringHwloc, env_var);
45420b57cec5SDimitry Andric }
45430b57cec5SDimitry Andric }
45440b57cec5SDimitry Andric #endif
45450b57cec5SDimitry Andric
45460b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4547fe6060f1SDimitry Andric if (!success) {
4548fe6060f1SDimitry Andric success = __kmp_affinity_create_x2apicid_map(&msg_id);
4549bdd1243dSDimitry Andric if (!success && verbose && msg_id != kmp_i18n_null) {
4550bdd1243dSDimitry Andric KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id));
45510b57cec5SDimitry Andric }
45520b57cec5SDimitry Andric }
4553fe6060f1SDimitry Andric if (!success) {
4554fe6060f1SDimitry Andric success = __kmp_affinity_create_apicid_map(&msg_id);
4555bdd1243dSDimitry Andric if (!success && verbose && msg_id != kmp_i18n_null) {
4556bdd1243dSDimitry Andric KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id));
45570b57cec5SDimitry Andric }
45580b57cec5SDimitry Andric }
45590b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
45600b57cec5SDimitry Andric
4561439352acSDimitry Andric #if KMP_OS_LINUX || KMP_OS_AIX
4562fe6060f1SDimitry Andric if (!success) {
4563fe6060f1SDimitry Andric int line = 0;
4564fe6060f1SDimitry Andric success = __kmp_affinity_create_cpuinfo_map(&line, &msg_id);
4565bdd1243dSDimitry Andric if (!success && verbose && msg_id != kmp_i18n_null) {
4566bdd1243dSDimitry Andric KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id));
45670b57cec5SDimitry Andric }
45680b57cec5SDimitry Andric }
45690b57cec5SDimitry Andric #endif /* KMP_OS_LINUX */
45700b57cec5SDimitry Andric
45710b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
4572fe6060f1SDimitry Andric if (!success && (__kmp_num_proc_groups > 1)) {
4573fe6060f1SDimitry Andric success = __kmp_affinity_create_proc_group_map(&msg_id);
4574bdd1243dSDimitry Andric if (!success && verbose && msg_id != kmp_i18n_null) {
4575bdd1243dSDimitry Andric KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id));
45760b57cec5SDimitry Andric }
45770b57cec5SDimitry Andric }
45780b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */
45790b57cec5SDimitry Andric
4580fe6060f1SDimitry Andric if (!success) {
4581fe6060f1SDimitry Andric success = __kmp_affinity_create_flat_map(&msg_id);
4582bdd1243dSDimitry Andric if (!success && verbose && msg_id != kmp_i18n_null) {
4583bdd1243dSDimitry Andric KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id));
45840b57cec5SDimitry Andric }
4585fe6060f1SDimitry Andric KMP_ASSERT(success);
45860b57cec5SDimitry Andric }
45870b57cec5SDimitry Andric }
45880b57cec5SDimitry Andric
4589fe6060f1SDimitry Andric // If the user has specified that a paricular topology discovery method is to be
4590fe6060f1SDimitry Andric // used, then we abort if that method fails. The exception is group affinity,
4591fe6060f1SDimitry Andric // which might have been implicitly set.
45920b57cec5SDimitry Andric #if KMP_USE_HWLOC
45930b57cec5SDimitry Andric else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
45940b57cec5SDimitry Andric KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
4595fe6060f1SDimitry Andric success = __kmp_affinity_create_hwloc_map(&msg_id);
4596fe6060f1SDimitry Andric if (!success) {
4597fe6060f1SDimitry Andric KMP_ASSERT(msg_id != kmp_i18n_null);
4598fe6060f1SDimitry Andric KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
45990b57cec5SDimitry Andric }
46000b57cec5SDimitry Andric }
46010b57cec5SDimitry Andric #endif // KMP_USE_HWLOC
46020b57cec5SDimitry Andric
46030b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4604fe6060f1SDimitry Andric else if (__kmp_affinity_top_method == affinity_top_method_x2apicid ||
4605fe6060f1SDimitry Andric __kmp_affinity_top_method == affinity_top_method_x2apicid_1f) {
4606fe6060f1SDimitry Andric success = __kmp_affinity_create_x2apicid_map(&msg_id);
4607fe6060f1SDimitry Andric if (!success) {
46080b57cec5SDimitry Andric KMP_ASSERT(msg_id != kmp_i18n_null);
46090b57cec5SDimitry Andric KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
46100b57cec5SDimitry Andric }
46110b57cec5SDimitry Andric } else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
4612fe6060f1SDimitry Andric success = __kmp_affinity_create_apicid_map(&msg_id);
4613fe6060f1SDimitry Andric if (!success) {
46140b57cec5SDimitry Andric KMP_ASSERT(msg_id != kmp_i18n_null);
46150b57cec5SDimitry Andric KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
46160b57cec5SDimitry Andric }
46170b57cec5SDimitry Andric }
46180b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
46190b57cec5SDimitry Andric
46200b57cec5SDimitry Andric else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
46210b57cec5SDimitry Andric int line = 0;
4622fe6060f1SDimitry Andric success = __kmp_affinity_create_cpuinfo_map(&line, &msg_id);
4623fe6060f1SDimitry Andric if (!success) {
46240b57cec5SDimitry Andric KMP_ASSERT(msg_id != kmp_i18n_null);
4625fe6060f1SDimitry Andric const char *filename = __kmp_cpuinfo_get_filename();
46260b57cec5SDimitry Andric if (line > 0) {
46270b57cec5SDimitry Andric KMP_FATAL(FileLineMsgExiting, filename, line,
46280b57cec5SDimitry Andric __kmp_i18n_catgets(msg_id));
46290b57cec5SDimitry Andric } else {
46300b57cec5SDimitry Andric KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
46310b57cec5SDimitry Andric }
46320b57cec5SDimitry Andric }
46330b57cec5SDimitry Andric }
46340b57cec5SDimitry Andric
46350b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
46360b57cec5SDimitry Andric else if (__kmp_affinity_top_method == affinity_top_method_group) {
4637fe6060f1SDimitry Andric success = __kmp_affinity_create_proc_group_map(&msg_id);
4638fe6060f1SDimitry Andric KMP_ASSERT(success);
4639fe6060f1SDimitry Andric if (!success) {
46400b57cec5SDimitry Andric KMP_ASSERT(msg_id != kmp_i18n_null);
46410b57cec5SDimitry Andric KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
46420b57cec5SDimitry Andric }
46430b57cec5SDimitry Andric }
46440b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */
46450b57cec5SDimitry Andric
46460b57cec5SDimitry Andric else if (__kmp_affinity_top_method == affinity_top_method_flat) {
4647fe6060f1SDimitry Andric success = __kmp_affinity_create_flat_map(&msg_id);
46480b57cec5SDimitry Andric // should not fail
4649fe6060f1SDimitry Andric KMP_ASSERT(success);
46500b57cec5SDimitry Andric }
46510b57cec5SDimitry Andric
4652fe6060f1SDimitry Andric // Early exit if topology could not be created
4653fe6060f1SDimitry Andric if (!__kmp_topology) {
4654fcaf7f86SDimitry Andric if (KMP_AFFINITY_CAPABLE()) {
4655bdd1243dSDimitry Andric KMP_AFF_WARNING(affinity, ErrorInitializeAffinity);
46560b57cec5SDimitry Andric }
4657fe6060f1SDimitry Andric if (nPackages > 0 && nCoresPerPkg > 0 && __kmp_nThreadsPerCore > 0 &&
4658fe6060f1SDimitry Andric __kmp_ncores > 0) {
4659fe6060f1SDimitry Andric __kmp_topology = kmp_topology_t::allocate(0, 0, NULL);
4660fe6060f1SDimitry Andric __kmp_topology->canonicalize(nPackages, nCoresPerPkg,
4661fe6060f1SDimitry Andric __kmp_nThreadsPerCore, __kmp_ncores);
4662bdd1243dSDimitry Andric if (verbose) {
4663bdd1243dSDimitry Andric __kmp_topology->print(env_var);
4664fe6060f1SDimitry Andric }
4665fe6060f1SDimitry Andric }
4666bdd1243dSDimitry Andric return false;
46670b57cec5SDimitry Andric }
46680b57cec5SDimitry Andric
4669bdd1243dSDimitry Andric // Canonicalize, print (if requested), apply KMP_HW_SUBSET
4670fe6060f1SDimitry Andric __kmp_topology->canonicalize();
4671bdd1243dSDimitry Andric if (verbose)
4672bdd1243dSDimitry Andric __kmp_topology->print(env_var);
4673fe6060f1SDimitry Andric bool filtered = __kmp_topology->filter_hw_subset();
4674bdd1243dSDimitry Andric if (filtered && verbose)
4675fe6060f1SDimitry Andric __kmp_topology->print("KMP_HW_SUBSET");
4676bdd1243dSDimitry Andric return success;
4677bdd1243dSDimitry Andric }
4678bdd1243dSDimitry Andric
__kmp_aux_affinity_initialize(kmp_affinity_t & affinity)4679bdd1243dSDimitry Andric static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
4680bdd1243dSDimitry Andric bool is_regular_affinity = (&affinity == &__kmp_affinity);
4681bdd1243dSDimitry Andric bool is_hidden_helper_affinity = (&affinity == &__kmp_hh_affinity);
46825f757f3fSDimitry Andric const char *env_var = __kmp_get_affinity_env_var(affinity);
4683bdd1243dSDimitry Andric
4684bdd1243dSDimitry Andric if (affinity.flags.initialized) {
4685bdd1243dSDimitry Andric KMP_ASSERT(__kmp_affin_fullMask != NULL);
4686bdd1243dSDimitry Andric return;
4687bdd1243dSDimitry Andric }
4688bdd1243dSDimitry Andric
4689bdd1243dSDimitry Andric if (is_regular_affinity && (!__kmp_affin_fullMask || !__kmp_affin_origMask))
4690bdd1243dSDimitry Andric __kmp_aux_affinity_initialize_masks(affinity);
4691bdd1243dSDimitry Andric
4692bdd1243dSDimitry Andric if (is_regular_affinity && !__kmp_topology) {
4693bdd1243dSDimitry Andric bool success = __kmp_aux_affinity_initialize_topology(affinity);
4694bdd1243dSDimitry Andric if (success) {
4695fe6060f1SDimitry Andric KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads());
4696bdd1243dSDimitry Andric } else {
4697bdd1243dSDimitry Andric affinity.type = affinity_none;
4698bdd1243dSDimitry Andric KMP_AFFINITY_DISABLE();
4699bdd1243dSDimitry Andric }
4700bdd1243dSDimitry Andric }
4701bdd1243dSDimitry Andric
4702fe6060f1SDimitry Andric // If KMP_AFFINITY=none, then only create the single "none" place
4703fe6060f1SDimitry Andric // which is the process's initial affinity mask or the number of
4704fe6060f1SDimitry Andric // hardware threads depending on respect,norespect
4705bdd1243dSDimitry Andric if (affinity.type == affinity_none) {
4706bdd1243dSDimitry Andric __kmp_create_affinity_none_places(affinity);
4707fe6060f1SDimitry Andric #if KMP_USE_HIER_SCHED
4708fe6060f1SDimitry Andric __kmp_dispatch_set_hierarchy_values();
47090b57cec5SDimitry Andric #endif
4710bdd1243dSDimitry Andric affinity.flags.initialized = TRUE;
4711fe6060f1SDimitry Andric return;
47120b57cec5SDimitry Andric }
4713bdd1243dSDimitry Andric
4714bdd1243dSDimitry Andric __kmp_topology->set_granularity(affinity);
4715fe6060f1SDimitry Andric int depth = __kmp_topology->get_depth();
47160b57cec5SDimitry Andric
47170b57cec5SDimitry Andric // Create the table of masks, indexed by thread Id.
47180b57cec5SDimitry Andric unsigned numUnique;
47195f757f3fSDimitry Andric int numAddrs = __kmp_topology->get_num_hw_threads();
47205f757f3fSDimitry Andric // If OMP_PLACES=cores:<attribute> specified, then attempt
47215f757f3fSDimitry Andric // to make OS Id mask table using those attributes
47225f757f3fSDimitry Andric if (affinity.core_attr_gran.valid) {
47235f757f3fSDimitry Andric __kmp_create_os_id_masks(&numUnique, affinity, [&](int idx) {
47245f757f3fSDimitry Andric KMP_ASSERT(idx >= -1);
47255f757f3fSDimitry Andric for (int i = idx + 1; i < numAddrs; ++i)
47265f757f3fSDimitry Andric if (__kmp_topology->at(i).attrs.contains(affinity.core_attr_gran))
47275f757f3fSDimitry Andric return i;
47285f757f3fSDimitry Andric return numAddrs;
47295f757f3fSDimitry Andric });
47305f757f3fSDimitry Andric if (!affinity.os_id_masks) {
47315f757f3fSDimitry Andric const char *core_attribute;
47325f757f3fSDimitry Andric if (affinity.core_attr_gran.core_eff != kmp_hw_attr_t::UNKNOWN_CORE_EFF)
47335f757f3fSDimitry Andric core_attribute = "core_efficiency";
47345f757f3fSDimitry Andric else
47355f757f3fSDimitry Andric core_attribute = "core_type";
47365f757f3fSDimitry Andric KMP_AFF_WARNING(affinity, AffIgnoringNotAvailable, env_var,
47375f757f3fSDimitry Andric core_attribute,
47385f757f3fSDimitry Andric __kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true))
47395f757f3fSDimitry Andric }
47405f757f3fSDimitry Andric }
47415f757f3fSDimitry Andric // If core attributes did not work, or none were specified,
47425f757f3fSDimitry Andric // then make OS Id mask table using typical incremental way.
47435f757f3fSDimitry Andric if (!affinity.os_id_masks) {
47445f757f3fSDimitry Andric __kmp_create_os_id_masks(&numUnique, affinity, [](int idx) {
47455f757f3fSDimitry Andric KMP_ASSERT(idx >= -1);
47465f757f3fSDimitry Andric return idx + 1;
47475f757f3fSDimitry Andric });
47485f757f3fSDimitry Andric }
4749bdd1243dSDimitry Andric if (affinity.gran_levels == 0) {
47500b57cec5SDimitry Andric KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
47510b57cec5SDimitry Andric }
47520b57cec5SDimitry Andric
4753bdd1243dSDimitry Andric switch (affinity.type) {
47540b57cec5SDimitry Andric
47550b57cec5SDimitry Andric case affinity_explicit:
4756bdd1243dSDimitry Andric KMP_DEBUG_ASSERT(affinity.proclist != NULL);
4757bdd1243dSDimitry Andric if (is_hidden_helper_affinity ||
4758bdd1243dSDimitry Andric __kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) {
4759bdd1243dSDimitry Andric __kmp_affinity_process_proclist(affinity);
47600b57cec5SDimitry Andric } else {
4761bdd1243dSDimitry Andric __kmp_affinity_process_placelist(affinity);
47620b57cec5SDimitry Andric }
4763bdd1243dSDimitry Andric if (affinity.num_masks == 0) {
4764bdd1243dSDimitry Andric KMP_AFF_WARNING(affinity, AffNoValidProcID);
4765bdd1243dSDimitry Andric affinity.type = affinity_none;
4766bdd1243dSDimitry Andric __kmp_create_affinity_none_places(affinity);
4767bdd1243dSDimitry Andric affinity.flags.initialized = TRUE;
47680b57cec5SDimitry Andric return;
47690b57cec5SDimitry Andric }
47700b57cec5SDimitry Andric break;
47710b57cec5SDimitry Andric
4772fe6060f1SDimitry Andric // The other affinity types rely on sorting the hardware threads according to
4773bdd1243dSDimitry Andric // some permutation of the machine topology tree. Set affinity.compact
4774bdd1243dSDimitry Andric // and affinity.offset appropriately, then jump to a common code
4775fe6060f1SDimitry Andric // fragment to do the sort and create the array of affinity masks.
47760b57cec5SDimitry Andric case affinity_logical:
4777bdd1243dSDimitry Andric affinity.compact = 0;
4778bdd1243dSDimitry Andric if (affinity.offset) {
4779bdd1243dSDimitry Andric affinity.offset =
4780bdd1243dSDimitry Andric __kmp_nThreadsPerCore * affinity.offset % __kmp_avail_proc;
47810b57cec5SDimitry Andric }
4782fe6060f1SDimitry Andric goto sortTopology;
47830b57cec5SDimitry Andric
47840b57cec5SDimitry Andric case affinity_physical:
47850b57cec5SDimitry Andric if (__kmp_nThreadsPerCore > 1) {
4786bdd1243dSDimitry Andric affinity.compact = 1;
4787bdd1243dSDimitry Andric if (affinity.compact >= depth) {
4788bdd1243dSDimitry Andric affinity.compact = 0;
47890b57cec5SDimitry Andric }
47900b57cec5SDimitry Andric } else {
4791bdd1243dSDimitry Andric affinity.compact = 0;
47920b57cec5SDimitry Andric }
4793bdd1243dSDimitry Andric if (affinity.offset) {
4794bdd1243dSDimitry Andric affinity.offset =
4795bdd1243dSDimitry Andric __kmp_nThreadsPerCore * affinity.offset % __kmp_avail_proc;
47960b57cec5SDimitry Andric }
4797fe6060f1SDimitry Andric goto sortTopology;
47980b57cec5SDimitry Andric
47990b57cec5SDimitry Andric case affinity_scatter:
4800bdd1243dSDimitry Andric if (affinity.compact >= depth) {
4801bdd1243dSDimitry Andric affinity.compact = 0;
48020b57cec5SDimitry Andric } else {
4803bdd1243dSDimitry Andric affinity.compact = depth - 1 - affinity.compact;
48040b57cec5SDimitry Andric }
4805fe6060f1SDimitry Andric goto sortTopology;
48060b57cec5SDimitry Andric
48070b57cec5SDimitry Andric case affinity_compact:
4808bdd1243dSDimitry Andric if (affinity.compact >= depth) {
4809bdd1243dSDimitry Andric affinity.compact = depth - 1;
48100b57cec5SDimitry Andric }
4811fe6060f1SDimitry Andric goto sortTopology;
48120b57cec5SDimitry Andric
48130b57cec5SDimitry Andric case affinity_balanced:
4814bdd1243dSDimitry Andric if (depth <= 1 || is_hidden_helper_affinity) {
4815bdd1243dSDimitry Andric KMP_AFF_WARNING(affinity, AffBalancedNotAvail, env_var);
4816bdd1243dSDimitry Andric affinity.type = affinity_none;
4817bdd1243dSDimitry Andric __kmp_create_affinity_none_places(affinity);
4818bdd1243dSDimitry Andric affinity.flags.initialized = TRUE;
48190b57cec5SDimitry Andric return;
4820fe6060f1SDimitry Andric } else if (!__kmp_topology->is_uniform()) {
48210b57cec5SDimitry Andric // Save the depth for further usage
48220b57cec5SDimitry Andric __kmp_aff_depth = depth;
48230b57cec5SDimitry Andric
4824fe6060f1SDimitry Andric int core_level =
4825fe6060f1SDimitry Andric __kmp_affinity_find_core_level(__kmp_avail_proc, depth - 1);
4826fe6060f1SDimitry Andric int ncores = __kmp_affinity_compute_ncores(__kmp_avail_proc, depth - 1,
4827fe6060f1SDimitry Andric core_level);
48280b57cec5SDimitry Andric int maxprocpercore = __kmp_affinity_max_proc_per_core(
4829fe6060f1SDimitry Andric __kmp_avail_proc, depth - 1, core_level);
48300b57cec5SDimitry Andric
48310b57cec5SDimitry Andric int nproc = ncores * maxprocpercore;
48320b57cec5SDimitry Andric if ((nproc < 2) || (nproc < __kmp_avail_proc)) {
4833bdd1243dSDimitry Andric KMP_AFF_WARNING(affinity, AffBalancedNotAvail, env_var);
4834bdd1243dSDimitry Andric affinity.type = affinity_none;
4835bdd1243dSDimitry Andric __kmp_create_affinity_none_places(affinity);
4836bdd1243dSDimitry Andric affinity.flags.initialized = TRUE;
48370b57cec5SDimitry Andric return;
48380b57cec5SDimitry Andric }
48390b57cec5SDimitry Andric
48400b57cec5SDimitry Andric procarr = (int *)__kmp_allocate(sizeof(int) * nproc);
48410b57cec5SDimitry Andric for (int i = 0; i < nproc; i++) {
48420b57cec5SDimitry Andric procarr[i] = -1;
48430b57cec5SDimitry Andric }
48440b57cec5SDimitry Andric
48450b57cec5SDimitry Andric int lastcore = -1;
48460b57cec5SDimitry Andric int inlastcore = 0;
48470b57cec5SDimitry Andric for (int i = 0; i < __kmp_avail_proc; i++) {
4848fe6060f1SDimitry Andric int proc = __kmp_topology->at(i).os_id;
4849fe6060f1SDimitry Andric int core = __kmp_affinity_find_core(i, depth - 1, core_level);
48500b57cec5SDimitry Andric
48510b57cec5SDimitry Andric if (core == lastcore) {
48520b57cec5SDimitry Andric inlastcore++;
48530b57cec5SDimitry Andric } else {
48540b57cec5SDimitry Andric inlastcore = 0;
48550b57cec5SDimitry Andric }
48560b57cec5SDimitry Andric lastcore = core;
48570b57cec5SDimitry Andric
48580b57cec5SDimitry Andric procarr[core * maxprocpercore + inlastcore] = proc;
48590b57cec5SDimitry Andric }
48600b57cec5SDimitry Andric }
4861bdd1243dSDimitry Andric if (affinity.compact >= depth) {
4862bdd1243dSDimitry Andric affinity.compact = depth - 1;
48630b57cec5SDimitry Andric }
48640b57cec5SDimitry Andric
4865fe6060f1SDimitry Andric sortTopology:
48660b57cec5SDimitry Andric // Allocate the gtid->affinity mask table.
4867bdd1243dSDimitry Andric if (affinity.flags.dups) {
4868bdd1243dSDimitry Andric affinity.num_masks = __kmp_avail_proc;
48690b57cec5SDimitry Andric } else {
4870bdd1243dSDimitry Andric affinity.num_masks = numUnique;
48710b57cec5SDimitry Andric }
48720b57cec5SDimitry Andric
48730b57cec5SDimitry Andric if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
48740b57cec5SDimitry Andric (__kmp_affinity_num_places > 0) &&
4875bdd1243dSDimitry Andric ((unsigned)__kmp_affinity_num_places < affinity.num_masks) &&
4876bdd1243dSDimitry Andric !is_hidden_helper_affinity) {
4877bdd1243dSDimitry Andric affinity.num_masks = __kmp_affinity_num_places;
48780b57cec5SDimitry Andric }
48790b57cec5SDimitry Andric
4880bdd1243dSDimitry Andric KMP_CPU_ALLOC_ARRAY(affinity.masks, affinity.num_masks);
48810b57cec5SDimitry Andric
4882fe6060f1SDimitry Andric // Sort the topology table according to the current setting of
4883bdd1243dSDimitry Andric // affinity.compact, then fill out affinity.masks.
4884bdd1243dSDimitry Andric __kmp_topology->sort_compact(affinity);
48850b57cec5SDimitry Andric {
48860b57cec5SDimitry Andric int i;
48870b57cec5SDimitry Andric unsigned j;
4888fe6060f1SDimitry Andric int num_hw_threads = __kmp_topology->get_num_hw_threads();
48895f757f3fSDimitry Andric kmp_full_mask_modifier_t full_mask;
4890fe6060f1SDimitry Andric for (i = 0, j = 0; i < num_hw_threads; i++) {
4891bdd1243dSDimitry Andric if ((!affinity.flags.dups) && (!__kmp_topology->at(i).leader)) {
48920b57cec5SDimitry Andric continue;
48930b57cec5SDimitry Andric }
4894fe6060f1SDimitry Andric int osId = __kmp_topology->at(i).os_id;
4895fe6060f1SDimitry Andric
4896bdd1243dSDimitry Andric kmp_affin_mask_t *src = KMP_CPU_INDEX(affinity.os_id_masks, osId);
4897bdd1243dSDimitry Andric kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, j);
48980b57cec5SDimitry Andric KMP_ASSERT(KMP_CPU_ISSET(osId, src));
48990b57cec5SDimitry Andric KMP_CPU_COPY(dest, src);
49005f757f3fSDimitry Andric full_mask.include(src);
4901bdd1243dSDimitry Andric if (++j >= affinity.num_masks) {
49020b57cec5SDimitry Andric break;
49030b57cec5SDimitry Andric }
49040b57cec5SDimitry Andric }
4905bdd1243dSDimitry Andric KMP_DEBUG_ASSERT(j == affinity.num_masks);
49065f757f3fSDimitry Andric // See if the places list further restricts or changes the full mask
49075f757f3fSDimitry Andric if (full_mask.restrict_to_mask() && affinity.flags.verbose) {
49085f757f3fSDimitry Andric __kmp_topology->print(env_var);
49095f757f3fSDimitry Andric }
49100b57cec5SDimitry Andric }
4911fe6060f1SDimitry Andric // Sort the topology back using ids
4912fe6060f1SDimitry Andric __kmp_topology->sort_ids();
49130b57cec5SDimitry Andric break;
49140b57cec5SDimitry Andric
49150b57cec5SDimitry Andric default:
49160b57cec5SDimitry Andric KMP_ASSERT2(0, "Unexpected affinity setting");
49170b57cec5SDimitry Andric }
49185f757f3fSDimitry Andric __kmp_aux_affinity_initialize_other_data(affinity);
4919bdd1243dSDimitry Andric affinity.flags.initialized = TRUE;
49200b57cec5SDimitry Andric }
49210b57cec5SDimitry Andric
__kmp_affinity_initialize(kmp_affinity_t & affinity)4922bdd1243dSDimitry Andric void __kmp_affinity_initialize(kmp_affinity_t &affinity) {
49235ffd83dbSDimitry Andric // Much of the code above was written assuming that if a machine was not
4924bdd1243dSDimitry Andric // affinity capable, then affinity type == affinity_none.
4925bdd1243dSDimitry Andric // We now explicitly represent this as affinity type == affinity_disabled.
4926bdd1243dSDimitry Andric // There are too many checks for affinity type == affinity_none in this code.
4927bdd1243dSDimitry Andric // Instead of trying to change them all, check if
4928bdd1243dSDimitry Andric // affinity type == affinity_disabled, and if so, slam it with affinity_none,
4929bdd1243dSDimitry Andric // call the real initialization routine, then restore affinity type to
4930bdd1243dSDimitry Andric // affinity_disabled.
4931bdd1243dSDimitry Andric int disabled = (affinity.type == affinity_disabled);
4932bdd1243dSDimitry Andric if (!KMP_AFFINITY_CAPABLE())
49330b57cec5SDimitry Andric KMP_ASSERT(disabled);
4934bdd1243dSDimitry Andric if (disabled)
4935bdd1243dSDimitry Andric affinity.type = affinity_none;
4936bdd1243dSDimitry Andric __kmp_aux_affinity_initialize(affinity);
4937bdd1243dSDimitry Andric if (disabled)
4938bdd1243dSDimitry Andric affinity.type = affinity_disabled;
49390b57cec5SDimitry Andric }
49400b57cec5SDimitry Andric
__kmp_affinity_uninitialize(void)49410b57cec5SDimitry Andric void __kmp_affinity_uninitialize(void) {
4942bdd1243dSDimitry Andric for (kmp_affinity_t *affinity : __kmp_affinities) {
4943bdd1243dSDimitry Andric if (affinity->masks != NULL)
4944bdd1243dSDimitry Andric KMP_CPU_FREE_ARRAY(affinity->masks, affinity->num_masks);
4945bdd1243dSDimitry Andric if (affinity->os_id_masks != NULL)
4946bdd1243dSDimitry Andric KMP_CPU_FREE_ARRAY(affinity->os_id_masks, affinity->num_os_id_masks);
4947bdd1243dSDimitry Andric if (affinity->proclist != NULL)
4948bdd1243dSDimitry Andric __kmp_free(affinity->proclist);
4949bdd1243dSDimitry Andric if (affinity->ids != NULL)
4950bdd1243dSDimitry Andric __kmp_free(affinity->ids);
4951bdd1243dSDimitry Andric if (affinity->attrs != NULL)
4952bdd1243dSDimitry Andric __kmp_free(affinity->attrs);
4953bdd1243dSDimitry Andric *affinity = KMP_AFFINITY_INIT(affinity->env_var);
49540b57cec5SDimitry Andric }
4955fcaf7f86SDimitry Andric if (__kmp_affin_origMask != NULL) {
4956bdd1243dSDimitry Andric if (KMP_AFFINITY_CAPABLE()) {
4957439352acSDimitry Andric #if KMP_OS_AIX
4958439352acSDimitry Andric // Uninitialize by unbinding the thread.
4959439352acSDimitry Andric bindprocessor(BINDTHREAD, thread_self(), PROCESSOR_CLASS_ANY);
4960439352acSDimitry Andric #else
4961bdd1243dSDimitry Andric __kmp_set_system_affinity(__kmp_affin_origMask, FALSE);
4962439352acSDimitry Andric #endif
4963bdd1243dSDimitry Andric }
4964fcaf7f86SDimitry Andric KMP_CPU_FREE(__kmp_affin_origMask);
4965fcaf7f86SDimitry Andric __kmp_affin_origMask = NULL;
4966fcaf7f86SDimitry Andric }
49670b57cec5SDimitry Andric __kmp_affinity_num_places = 0;
49680b57cec5SDimitry Andric if (procarr != NULL) {
49690b57cec5SDimitry Andric __kmp_free(procarr);
49700b57cec5SDimitry Andric procarr = NULL;
49710b57cec5SDimitry Andric }
4972bdd1243dSDimitry Andric if (__kmp_osid_to_hwthread_map) {
4973bdd1243dSDimitry Andric __kmp_free(__kmp_osid_to_hwthread_map);
4974bdd1243dSDimitry Andric __kmp_osid_to_hwthread_map = NULL;
4975bdd1243dSDimitry Andric }
49760b57cec5SDimitry Andric #if KMP_USE_HWLOC
49770b57cec5SDimitry Andric if (__kmp_hwloc_topology != NULL) {
49780b57cec5SDimitry Andric hwloc_topology_destroy(__kmp_hwloc_topology);
49790b57cec5SDimitry Andric __kmp_hwloc_topology = NULL;
49800b57cec5SDimitry Andric }
49810b57cec5SDimitry Andric #endif
4982fe6060f1SDimitry Andric if (__kmp_hw_subset) {
4983fe6060f1SDimitry Andric kmp_hw_subset_t::deallocate(__kmp_hw_subset);
4984fe6060f1SDimitry Andric __kmp_hw_subset = nullptr;
4985fe6060f1SDimitry Andric }
4986fe6060f1SDimitry Andric if (__kmp_topology) {
4987fe6060f1SDimitry Andric kmp_topology_t::deallocate(__kmp_topology);
4988fe6060f1SDimitry Andric __kmp_topology = nullptr;
4989fe6060f1SDimitry Andric }
49900b57cec5SDimitry Andric KMPAffinity::destroy_api();
49910b57cec5SDimitry Andric }
49920b57cec5SDimitry Andric
__kmp_select_mask_by_gtid(int gtid,const kmp_affinity_t * affinity,int * place,kmp_affin_mask_t ** mask)4993bdd1243dSDimitry Andric static void __kmp_select_mask_by_gtid(int gtid, const kmp_affinity_t *affinity,
4994bdd1243dSDimitry Andric int *place, kmp_affin_mask_t **mask) {
4995bdd1243dSDimitry Andric int mask_idx;
4996bdd1243dSDimitry Andric bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid);
4997bdd1243dSDimitry Andric if (is_hidden_helper)
4998bdd1243dSDimitry Andric // The first gtid is the regular primary thread, the second gtid is the main
4999bdd1243dSDimitry Andric // thread of hidden team which does not participate in task execution.
5000bdd1243dSDimitry Andric mask_idx = gtid - 2;
5001bdd1243dSDimitry Andric else
5002bdd1243dSDimitry Andric mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid);
5003bdd1243dSDimitry Andric KMP_DEBUG_ASSERT(affinity->num_masks > 0);
5004bdd1243dSDimitry Andric *place = (mask_idx + affinity->offset) % affinity->num_masks;
5005bdd1243dSDimitry Andric *mask = KMP_CPU_INDEX(affinity->masks, *place);
5006bdd1243dSDimitry Andric }
5007bdd1243dSDimitry Andric
5008bdd1243dSDimitry Andric // This function initializes the per-thread data concerning affinity including
5009bdd1243dSDimitry Andric // the mask and topology information
__kmp_affinity_set_init_mask(int gtid,int isa_root)50100b57cec5SDimitry Andric void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
5011bdd1243dSDimitry Andric
5012bdd1243dSDimitry Andric kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
5013bdd1243dSDimitry Andric
5014bdd1243dSDimitry Andric // Set the thread topology information to default of unknown
5015bdd1243dSDimitry Andric for (int id = 0; id < KMP_HW_LAST; ++id)
50165f757f3fSDimitry Andric th->th.th_topology_ids.ids[id] = kmp_hw_thread_t::UNKNOWN_ID;
5017bdd1243dSDimitry Andric th->th.th_topology_attrs = KMP_AFFINITY_ATTRS_UNKNOWN;
5018bdd1243dSDimitry Andric
50190b57cec5SDimitry Andric if (!KMP_AFFINITY_CAPABLE()) {
50200b57cec5SDimitry Andric return;
50210b57cec5SDimitry Andric }
50220b57cec5SDimitry Andric
50230b57cec5SDimitry Andric if (th->th.th_affin_mask == NULL) {
50240b57cec5SDimitry Andric KMP_CPU_ALLOC(th->th.th_affin_mask);
50250b57cec5SDimitry Andric } else {
50260b57cec5SDimitry Andric KMP_CPU_ZERO(th->th.th_affin_mask);
50270b57cec5SDimitry Andric }
50280b57cec5SDimitry Andric
50295ffd83dbSDimitry Andric // Copy the thread mask to the kmp_info_t structure. If
5030bdd1243dSDimitry Andric // __kmp_affinity.type == affinity_none, copy the "full" mask, i.e.
5031bdd1243dSDimitry Andric // one that has all of the OS proc ids set, or if
5032bdd1243dSDimitry Andric // __kmp_affinity.flags.respect is set, then the full mask is the
5033bdd1243dSDimitry Andric // same as the mask of the initialization thread.
50340b57cec5SDimitry Andric kmp_affin_mask_t *mask;
50350b57cec5SDimitry Andric int i;
5036bdd1243dSDimitry Andric const kmp_affinity_t *affinity;
5037bdd1243dSDimitry Andric bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid);
50380b57cec5SDimitry Andric
5039bdd1243dSDimitry Andric if (is_hidden_helper)
5040bdd1243dSDimitry Andric affinity = &__kmp_hh_affinity;
5041bdd1243dSDimitry Andric else
5042bdd1243dSDimitry Andric affinity = &__kmp_affinity;
5043bdd1243dSDimitry Andric
5044bdd1243dSDimitry Andric if (KMP_AFFINITY_NON_PROC_BIND || is_hidden_helper) {
5045bdd1243dSDimitry Andric if ((affinity->type == affinity_none) ||
5046bdd1243dSDimitry Andric (affinity->type == affinity_balanced) ||
5047bdd1243dSDimitry Andric KMP_HIDDEN_HELPER_MAIN_THREAD(gtid)) {
50480b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
50490b57cec5SDimitry Andric if (__kmp_num_proc_groups > 1) {
50500b57cec5SDimitry Andric return;
50510b57cec5SDimitry Andric }
50520b57cec5SDimitry Andric #endif
50530b57cec5SDimitry Andric KMP_ASSERT(__kmp_affin_fullMask != NULL);
50540b57cec5SDimitry Andric i = 0;
50550b57cec5SDimitry Andric mask = __kmp_affin_fullMask;
50560b57cec5SDimitry Andric } else {
5057bdd1243dSDimitry Andric __kmp_select_mask_by_gtid(gtid, affinity, &i, &mask);
50580b57cec5SDimitry Andric }
50590b57cec5SDimitry Andric } else {
5060bdd1243dSDimitry Andric if (!isa_root || __kmp_nested_proc_bind.bind_types[0] == proc_bind_false) {
50610b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
50620b57cec5SDimitry Andric if (__kmp_num_proc_groups > 1) {
50630b57cec5SDimitry Andric return;
50640b57cec5SDimitry Andric }
50650b57cec5SDimitry Andric #endif
50660b57cec5SDimitry Andric KMP_ASSERT(__kmp_affin_fullMask != NULL);
50670b57cec5SDimitry Andric i = KMP_PLACE_ALL;
50680b57cec5SDimitry Andric mask = __kmp_affin_fullMask;
50690b57cec5SDimitry Andric } else {
5070bdd1243dSDimitry Andric __kmp_select_mask_by_gtid(gtid, affinity, &i, &mask);
50710b57cec5SDimitry Andric }
50720b57cec5SDimitry Andric }
50730b57cec5SDimitry Andric
50740b57cec5SDimitry Andric th->th.th_current_place = i;
5075bdd1243dSDimitry Andric if (isa_root && !is_hidden_helper) {
50760b57cec5SDimitry Andric th->th.th_new_place = i;
50770b57cec5SDimitry Andric th->th.th_first_place = 0;
5078bdd1243dSDimitry Andric th->th.th_last_place = affinity->num_masks - 1;
50790b57cec5SDimitry Andric } else if (KMP_AFFINITY_NON_PROC_BIND) {
50800b57cec5SDimitry Andric // When using a Non-OMP_PROC_BIND affinity method,
50810b57cec5SDimitry Andric // set all threads' place-partition-var to the entire place list
50820b57cec5SDimitry Andric th->th.th_first_place = 0;
5083bdd1243dSDimitry Andric th->th.th_last_place = affinity->num_masks - 1;
5084bdd1243dSDimitry Andric }
5085bdd1243dSDimitry Andric // Copy topology information associated with the place
5086bdd1243dSDimitry Andric if (i >= 0) {
5087bdd1243dSDimitry Andric th->th.th_topology_ids = __kmp_affinity.ids[i];
5088bdd1243dSDimitry Andric th->th.th_topology_attrs = __kmp_affinity.attrs[i];
50890b57cec5SDimitry Andric }
50900b57cec5SDimitry Andric
50910b57cec5SDimitry Andric if (i == KMP_PLACE_ALL) {
50925f757f3fSDimitry Andric KA_TRACE(100, ("__kmp_affinity_set_init_mask: setting T#%d to all places\n",
50930b57cec5SDimitry Andric gtid));
50940b57cec5SDimitry Andric } else {
50955f757f3fSDimitry Andric KA_TRACE(100, ("__kmp_affinity_set_init_mask: setting T#%d to place %d\n",
50960b57cec5SDimitry Andric gtid, i));
50970b57cec5SDimitry Andric }
50980b57cec5SDimitry Andric
50990b57cec5SDimitry Andric KMP_CPU_COPY(th->th.th_affin_mask, mask);
51005f757f3fSDimitry Andric }
51010b57cec5SDimitry Andric
__kmp_affinity_bind_init_mask(int gtid)51025f757f3fSDimitry Andric void __kmp_affinity_bind_init_mask(int gtid) {
51035f757f3fSDimitry Andric if (!KMP_AFFINITY_CAPABLE()) {
51045f757f3fSDimitry Andric return;
51055f757f3fSDimitry Andric }
51065f757f3fSDimitry Andric kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
51075f757f3fSDimitry Andric const kmp_affinity_t *affinity;
51085f757f3fSDimitry Andric const char *env_var;
51095f757f3fSDimitry Andric bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid);
51105f757f3fSDimitry Andric
51115f757f3fSDimitry Andric if (is_hidden_helper)
51125f757f3fSDimitry Andric affinity = &__kmp_hh_affinity;
51135f757f3fSDimitry Andric else
51145f757f3fSDimitry Andric affinity = &__kmp_affinity;
51155f757f3fSDimitry Andric env_var = __kmp_get_affinity_env_var(*affinity, /*for_binding=*/true);
51160b57cec5SDimitry Andric /* to avoid duplicate printing (will be correctly printed on barrier) */
51175f757f3fSDimitry Andric if (affinity->flags.verbose && (affinity->type == affinity_none ||
51185f757f3fSDimitry Andric (th->th.th_current_place != KMP_PLACE_ALL &&
51195f757f3fSDimitry Andric affinity->type != affinity_balanced)) &&
5120bdd1243dSDimitry Andric !KMP_HIDDEN_HELPER_MAIN_THREAD(gtid)) {
51210b57cec5SDimitry Andric char buf[KMP_AFFIN_MASK_PRINT_LEN];
51220b57cec5SDimitry Andric __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
51230b57cec5SDimitry Andric th->th.th_affin_mask);
5124bdd1243dSDimitry Andric KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(),
5125bdd1243dSDimitry Andric gtid, buf);
51260b57cec5SDimitry Andric }
51270b57cec5SDimitry Andric
51280b57cec5SDimitry Andric #if KMP_OS_WINDOWS
51290b57cec5SDimitry Andric // On Windows* OS, the process affinity mask might have changed. If the user
51300b57cec5SDimitry Andric // didn't request affinity and this call fails, just continue silently.
51310b57cec5SDimitry Andric // See CQ171393.
5132bdd1243dSDimitry Andric if (affinity->type == affinity_none) {
51330b57cec5SDimitry Andric __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
51340b57cec5SDimitry Andric } else
51350b57cec5SDimitry Andric #endif
5136439352acSDimitry Andric #ifndef KMP_OS_AIX
5137439352acSDimitry Andric // Do not set the full mask as the init mask on AIX.
51380b57cec5SDimitry Andric __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
5139439352acSDimitry Andric #endif
51400b57cec5SDimitry Andric }
51410b57cec5SDimitry Andric
__kmp_affinity_bind_place(int gtid)51425f757f3fSDimitry Andric void __kmp_affinity_bind_place(int gtid) {
5143bdd1243dSDimitry Andric // Hidden helper threads should not be affected by OMP_PLACES/OMP_PROC_BIND
5144bdd1243dSDimitry Andric if (!KMP_AFFINITY_CAPABLE() || KMP_HIDDEN_HELPER_THREAD(gtid)) {
51450b57cec5SDimitry Andric return;
51460b57cec5SDimitry Andric }
51470b57cec5SDimitry Andric
51480b57cec5SDimitry Andric kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
51490b57cec5SDimitry Andric
51505f757f3fSDimitry Andric KA_TRACE(100, ("__kmp_affinity_bind_place: binding T#%d to place %d (current "
51510b57cec5SDimitry Andric "place = %d)\n",
51520b57cec5SDimitry Andric gtid, th->th.th_new_place, th->th.th_current_place));
51530b57cec5SDimitry Andric
51540b57cec5SDimitry Andric // Check that the new place is within this thread's partition.
51550b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
51560b57cec5SDimitry Andric KMP_ASSERT(th->th.th_new_place >= 0);
5157bdd1243dSDimitry Andric KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity.num_masks);
51580b57cec5SDimitry Andric if (th->th.th_first_place <= th->th.th_last_place) {
51590b57cec5SDimitry Andric KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
51600b57cec5SDimitry Andric (th->th.th_new_place <= th->th.th_last_place));
51610b57cec5SDimitry Andric } else {
51620b57cec5SDimitry Andric KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) ||
51630b57cec5SDimitry Andric (th->th.th_new_place >= th->th.th_last_place));
51640b57cec5SDimitry Andric }
51650b57cec5SDimitry Andric
51665ffd83dbSDimitry Andric // Copy the thread mask to the kmp_info_t structure,
51670b57cec5SDimitry Andric // and set this thread's affinity.
51680b57cec5SDimitry Andric kmp_affin_mask_t *mask =
5169bdd1243dSDimitry Andric KMP_CPU_INDEX(__kmp_affinity.masks, th->th.th_new_place);
51700b57cec5SDimitry Andric KMP_CPU_COPY(th->th.th_affin_mask, mask);
51710b57cec5SDimitry Andric th->th.th_current_place = th->th.th_new_place;
51720b57cec5SDimitry Andric
5173bdd1243dSDimitry Andric if (__kmp_affinity.flags.verbose) {
51740b57cec5SDimitry Andric char buf[KMP_AFFIN_MASK_PRINT_LEN];
51750b57cec5SDimitry Andric __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
51760b57cec5SDimitry Andric th->th.th_affin_mask);
51770b57cec5SDimitry Andric KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
51780b57cec5SDimitry Andric __kmp_gettid(), gtid, buf);
51790b57cec5SDimitry Andric }
51800b57cec5SDimitry Andric __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
51810b57cec5SDimitry Andric }
51820b57cec5SDimitry Andric
__kmp_aux_set_affinity(void ** mask)51830b57cec5SDimitry Andric int __kmp_aux_set_affinity(void **mask) {
51840b57cec5SDimitry Andric int gtid;
51850b57cec5SDimitry Andric kmp_info_t *th;
51860b57cec5SDimitry Andric int retval;
51870b57cec5SDimitry Andric
51880b57cec5SDimitry Andric if (!KMP_AFFINITY_CAPABLE()) {
51890b57cec5SDimitry Andric return -1;
51900b57cec5SDimitry Andric }
51910b57cec5SDimitry Andric
51920b57cec5SDimitry Andric gtid = __kmp_entry_gtid();
5193fe6060f1SDimitry Andric KA_TRACE(
5194fe6060f1SDimitry Andric 1000, (""); {
51950b57cec5SDimitry Andric char buf[KMP_AFFIN_MASK_PRINT_LEN];
51960b57cec5SDimitry Andric __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
51970b57cec5SDimitry Andric (kmp_affin_mask_t *)(*mask));
51980b57cec5SDimitry Andric __kmp_debug_printf(
5199fe6060f1SDimitry Andric "kmp_set_affinity: setting affinity mask for thread %d = %s\n",
5200fe6060f1SDimitry Andric gtid, buf);
52010b57cec5SDimitry Andric });
52020b57cec5SDimitry Andric
52030b57cec5SDimitry Andric if (__kmp_env_consistency_check) {
52040b57cec5SDimitry Andric if ((mask == NULL) || (*mask == NULL)) {
52050b57cec5SDimitry Andric KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
52060b57cec5SDimitry Andric } else {
52070b57cec5SDimitry Andric unsigned proc;
52080b57cec5SDimitry Andric int num_procs = 0;
52090b57cec5SDimitry Andric
52100b57cec5SDimitry Andric KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t *)(*mask))) {
52110b57cec5SDimitry Andric if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
52120b57cec5SDimitry Andric KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
52130b57cec5SDimitry Andric }
52140b57cec5SDimitry Andric if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
52150b57cec5SDimitry Andric continue;
52160b57cec5SDimitry Andric }
52170b57cec5SDimitry Andric num_procs++;
52180b57cec5SDimitry Andric }
52190b57cec5SDimitry Andric if (num_procs == 0) {
52200b57cec5SDimitry Andric KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
52210b57cec5SDimitry Andric }
52220b57cec5SDimitry Andric
52230b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
52240b57cec5SDimitry Andric if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
52250b57cec5SDimitry Andric KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
52260b57cec5SDimitry Andric }
52270b57cec5SDimitry Andric #endif /* KMP_GROUP_AFFINITY */
52280b57cec5SDimitry Andric }
52290b57cec5SDimitry Andric }
52300b57cec5SDimitry Andric
52310b57cec5SDimitry Andric th = __kmp_threads[gtid];
52320b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
52330b57cec5SDimitry Andric retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
52340b57cec5SDimitry Andric if (retval == 0) {
52350b57cec5SDimitry Andric KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
52360b57cec5SDimitry Andric }
52370b57cec5SDimitry Andric
52380b57cec5SDimitry Andric th->th.th_current_place = KMP_PLACE_UNDEFINED;
52390b57cec5SDimitry Andric th->th.th_new_place = KMP_PLACE_UNDEFINED;
52400b57cec5SDimitry Andric th->th.th_first_place = 0;
5241bdd1243dSDimitry Andric th->th.th_last_place = __kmp_affinity.num_masks - 1;
52420b57cec5SDimitry Andric
52430b57cec5SDimitry Andric // Turn off 4.0 affinity for the current tread at this parallel level.
52440b57cec5SDimitry Andric th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
52450b57cec5SDimitry Andric
52460b57cec5SDimitry Andric return retval;
52470b57cec5SDimitry Andric }
52480b57cec5SDimitry Andric
__kmp_aux_get_affinity(void ** mask)52490b57cec5SDimitry Andric int __kmp_aux_get_affinity(void **mask) {
52500b57cec5SDimitry Andric int gtid;
52510b57cec5SDimitry Andric int retval;
5252439352acSDimitry Andric #if KMP_OS_WINDOWS || KMP_OS_AIX || KMP_DEBUG
52530b57cec5SDimitry Andric kmp_info_t *th;
5254349cc55cSDimitry Andric #endif
52550b57cec5SDimitry Andric if (!KMP_AFFINITY_CAPABLE()) {
52560b57cec5SDimitry Andric return -1;
52570b57cec5SDimitry Andric }
52580b57cec5SDimitry Andric
52590b57cec5SDimitry Andric gtid = __kmp_entry_gtid();
5260439352acSDimitry Andric #if KMP_OS_WINDOWS || KMP_OS_AIX || KMP_DEBUG
52610b57cec5SDimitry Andric th = __kmp_threads[gtid];
5262349cc55cSDimitry Andric #else
5263349cc55cSDimitry Andric (void)gtid; // unused variable
5264349cc55cSDimitry Andric #endif
52650b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
52660b57cec5SDimitry Andric
5267fe6060f1SDimitry Andric KA_TRACE(
5268fe6060f1SDimitry Andric 1000, (""); {
52690b57cec5SDimitry Andric char buf[KMP_AFFIN_MASK_PRINT_LEN];
52700b57cec5SDimitry Andric __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
52710b57cec5SDimitry Andric th->th.th_affin_mask);
5272fe6060f1SDimitry Andric __kmp_printf(
5273fe6060f1SDimitry Andric "kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid,
5274fe6060f1SDimitry Andric buf);
52750b57cec5SDimitry Andric });
52760b57cec5SDimitry Andric
52770b57cec5SDimitry Andric if (__kmp_env_consistency_check) {
52780b57cec5SDimitry Andric if ((mask == NULL) || (*mask == NULL)) {
52790b57cec5SDimitry Andric KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
52800b57cec5SDimitry Andric }
52810b57cec5SDimitry Andric }
52820b57cec5SDimitry Andric
5283439352acSDimitry Andric #if !KMP_OS_WINDOWS && !KMP_OS_AIX
52840b57cec5SDimitry Andric
52850b57cec5SDimitry Andric retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
5286fe6060f1SDimitry Andric KA_TRACE(
5287fe6060f1SDimitry Andric 1000, (""); {
52880b57cec5SDimitry Andric char buf[KMP_AFFIN_MASK_PRINT_LEN];
52890b57cec5SDimitry Andric __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
52900b57cec5SDimitry Andric (kmp_affin_mask_t *)(*mask));
5291fe6060f1SDimitry Andric __kmp_printf(
5292fe6060f1SDimitry Andric "kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid,
5293fe6060f1SDimitry Andric buf);
52940b57cec5SDimitry Andric });
52950b57cec5SDimitry Andric return retval;
52960b57cec5SDimitry Andric
52970b57cec5SDimitry Andric #else
5298fe6060f1SDimitry Andric (void)retval;
52990b57cec5SDimitry Andric
53000b57cec5SDimitry Andric KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
53010b57cec5SDimitry Andric return 0;
53020b57cec5SDimitry Andric
5303439352acSDimitry Andric #endif /* !KMP_OS_WINDOWS && !KMP_OS_AIX */
53040b57cec5SDimitry Andric }
53050b57cec5SDimitry Andric
__kmp_aux_get_affinity_max_proc()53060b57cec5SDimitry Andric int __kmp_aux_get_affinity_max_proc() {
53070b57cec5SDimitry Andric if (!KMP_AFFINITY_CAPABLE()) {
53080b57cec5SDimitry Andric return 0;
53090b57cec5SDimitry Andric }
53100b57cec5SDimitry Andric #if KMP_GROUP_AFFINITY
53110b57cec5SDimitry Andric if (__kmp_num_proc_groups > 1) {
53120b57cec5SDimitry Andric return (int)(__kmp_num_proc_groups * sizeof(DWORD_PTR) * CHAR_BIT);
53130b57cec5SDimitry Andric }
53140b57cec5SDimitry Andric #endif
53150b57cec5SDimitry Andric return __kmp_xproc;
53160b57cec5SDimitry Andric }
53170b57cec5SDimitry Andric
__kmp_aux_set_affinity_mask_proc(int proc,void ** mask)53180b57cec5SDimitry Andric int __kmp_aux_set_affinity_mask_proc(int proc, void **mask) {
53190b57cec5SDimitry Andric if (!KMP_AFFINITY_CAPABLE()) {
53200b57cec5SDimitry Andric return -1;
53210b57cec5SDimitry Andric }
53220b57cec5SDimitry Andric
5323fe6060f1SDimitry Andric KA_TRACE(
5324fe6060f1SDimitry Andric 1000, (""); {
53250b57cec5SDimitry Andric int gtid = __kmp_entry_gtid();
53260b57cec5SDimitry Andric char buf[KMP_AFFIN_MASK_PRINT_LEN];
53270b57cec5SDimitry Andric __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
53280b57cec5SDimitry Andric (kmp_affin_mask_t *)(*mask));
53290b57cec5SDimitry Andric __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in "
53300b57cec5SDimitry Andric "affinity mask for thread %d = %s\n",
53310b57cec5SDimitry Andric proc, gtid, buf);
53320b57cec5SDimitry Andric });
53330b57cec5SDimitry Andric
53340b57cec5SDimitry Andric if (__kmp_env_consistency_check) {
53350b57cec5SDimitry Andric if ((mask == NULL) || (*mask == NULL)) {
53360b57cec5SDimitry Andric KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
53370b57cec5SDimitry Andric }
53380b57cec5SDimitry Andric }
53390b57cec5SDimitry Andric
53400b57cec5SDimitry Andric if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
53410b57cec5SDimitry Andric return -1;
53420b57cec5SDimitry Andric }
53430b57cec5SDimitry Andric if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
53440b57cec5SDimitry Andric return -2;
53450b57cec5SDimitry Andric }
53460b57cec5SDimitry Andric
53470b57cec5SDimitry Andric KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
53480b57cec5SDimitry Andric return 0;
53490b57cec5SDimitry Andric }
53500b57cec5SDimitry Andric
__kmp_aux_unset_affinity_mask_proc(int proc,void ** mask)53510b57cec5SDimitry Andric int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask) {
53520b57cec5SDimitry Andric if (!KMP_AFFINITY_CAPABLE()) {
53530b57cec5SDimitry Andric return -1;
53540b57cec5SDimitry Andric }
53550b57cec5SDimitry Andric
5356fe6060f1SDimitry Andric KA_TRACE(
5357fe6060f1SDimitry Andric 1000, (""); {
53580b57cec5SDimitry Andric int gtid = __kmp_entry_gtid();
53590b57cec5SDimitry Andric char buf[KMP_AFFIN_MASK_PRINT_LEN];
53600b57cec5SDimitry Andric __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
53610b57cec5SDimitry Andric (kmp_affin_mask_t *)(*mask));
53620b57cec5SDimitry Andric __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in "
53630b57cec5SDimitry Andric "affinity mask for thread %d = %s\n",
53640b57cec5SDimitry Andric proc, gtid, buf);
53650b57cec5SDimitry Andric });
53660b57cec5SDimitry Andric
53670b57cec5SDimitry Andric if (__kmp_env_consistency_check) {
53680b57cec5SDimitry Andric if ((mask == NULL) || (*mask == NULL)) {
53690b57cec5SDimitry Andric KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
53700b57cec5SDimitry Andric }
53710b57cec5SDimitry Andric }
53720b57cec5SDimitry Andric
53730b57cec5SDimitry Andric if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
53740b57cec5SDimitry Andric return -1;
53750b57cec5SDimitry Andric }
53760b57cec5SDimitry Andric if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
53770b57cec5SDimitry Andric return -2;
53780b57cec5SDimitry Andric }
53790b57cec5SDimitry Andric
53800b57cec5SDimitry Andric KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
53810b57cec5SDimitry Andric return 0;
53820b57cec5SDimitry Andric }
53830b57cec5SDimitry Andric
__kmp_aux_get_affinity_mask_proc(int proc,void ** mask)53840b57cec5SDimitry Andric int __kmp_aux_get_affinity_mask_proc(int proc, void **mask) {
53850b57cec5SDimitry Andric if (!KMP_AFFINITY_CAPABLE()) {
53860b57cec5SDimitry Andric return -1;
53870b57cec5SDimitry Andric }
53880b57cec5SDimitry Andric
5389fe6060f1SDimitry Andric KA_TRACE(
5390fe6060f1SDimitry Andric 1000, (""); {
53910b57cec5SDimitry Andric int gtid = __kmp_entry_gtid();
53920b57cec5SDimitry Andric char buf[KMP_AFFIN_MASK_PRINT_LEN];
53930b57cec5SDimitry Andric __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
53940b57cec5SDimitry Andric (kmp_affin_mask_t *)(*mask));
53950b57cec5SDimitry Andric __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in "
53960b57cec5SDimitry Andric "affinity mask for thread %d = %s\n",
53970b57cec5SDimitry Andric proc, gtid, buf);
53980b57cec5SDimitry Andric });
53990b57cec5SDimitry Andric
54000b57cec5SDimitry Andric if (__kmp_env_consistency_check) {
54010b57cec5SDimitry Andric if ((mask == NULL) || (*mask == NULL)) {
54020b57cec5SDimitry Andric KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
54030b57cec5SDimitry Andric }
54040b57cec5SDimitry Andric }
54050b57cec5SDimitry Andric
54060b57cec5SDimitry Andric if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
54070b57cec5SDimitry Andric return -1;
54080b57cec5SDimitry Andric }
54090b57cec5SDimitry Andric if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
54100b57cec5SDimitry Andric return 0;
54110b57cec5SDimitry Andric }
54120b57cec5SDimitry Andric
54130b57cec5SDimitry Andric return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
54140b57cec5SDimitry Andric }
54150b57cec5SDimitry Andric
54165f757f3fSDimitry Andric #if KMP_WEIGHTED_ITERATIONS_SUPPORTED
54175f757f3fSDimitry Andric // Returns first os proc id with ATOM core
__kmp_get_first_osid_with_ecore(void)54185f757f3fSDimitry Andric int __kmp_get_first_osid_with_ecore(void) {
54195f757f3fSDimitry Andric int low = 0;
54205f757f3fSDimitry Andric int high = __kmp_topology->get_num_hw_threads() - 1;
54215f757f3fSDimitry Andric int mid = 0;
54225f757f3fSDimitry Andric while (high - low > 1) {
54235f757f3fSDimitry Andric mid = (high + low) / 2;
54245f757f3fSDimitry Andric if (__kmp_topology->at(mid).attrs.get_core_type() ==
54255f757f3fSDimitry Andric KMP_HW_CORE_TYPE_CORE) {
54265f757f3fSDimitry Andric low = mid + 1;
54275f757f3fSDimitry Andric } else {
54285f757f3fSDimitry Andric high = mid;
54295f757f3fSDimitry Andric }
54305f757f3fSDimitry Andric }
54315f757f3fSDimitry Andric if (__kmp_topology->at(mid).attrs.get_core_type() == KMP_HW_CORE_TYPE_ATOM) {
54325f757f3fSDimitry Andric return mid;
54335f757f3fSDimitry Andric }
54345f757f3fSDimitry Andric return -1;
54355f757f3fSDimitry Andric }
54365f757f3fSDimitry Andric #endif
54375f757f3fSDimitry Andric
54380b57cec5SDimitry Andric // Dynamic affinity settings - Affinity balanced
__kmp_balanced_affinity(kmp_info_t * th,int nthreads)54390b57cec5SDimitry Andric void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) {
54400b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th);
54410b57cec5SDimitry Andric bool fine_gran = true;
54420b57cec5SDimitry Andric int tid = th->th.th_info.ds.ds_tid;
5443bdd1243dSDimitry Andric const char *env_var = "KMP_AFFINITY";
54440b57cec5SDimitry Andric
5445fe6060f1SDimitry Andric // Do not perform balanced affinity for the hidden helper threads
5446fe6060f1SDimitry Andric if (KMP_HIDDEN_HELPER_THREAD(__kmp_gtid_from_thread(th)))
5447fe6060f1SDimitry Andric return;
5448fe6060f1SDimitry Andric
5449bdd1243dSDimitry Andric switch (__kmp_affinity.gran) {
5450fe6060f1SDimitry Andric case KMP_HW_THREAD:
54510b57cec5SDimitry Andric break;
5452fe6060f1SDimitry Andric case KMP_HW_CORE:
54530b57cec5SDimitry Andric if (__kmp_nThreadsPerCore > 1) {
54540b57cec5SDimitry Andric fine_gran = false;
54550b57cec5SDimitry Andric }
54560b57cec5SDimitry Andric break;
5457fe6060f1SDimitry Andric case KMP_HW_SOCKET:
54580b57cec5SDimitry Andric if (nCoresPerPkg > 1) {
54590b57cec5SDimitry Andric fine_gran = false;
54600b57cec5SDimitry Andric }
54610b57cec5SDimitry Andric break;
54620b57cec5SDimitry Andric default:
54630b57cec5SDimitry Andric fine_gran = false;
54640b57cec5SDimitry Andric }
54650b57cec5SDimitry Andric
5466fe6060f1SDimitry Andric if (__kmp_topology->is_uniform()) {
54670b57cec5SDimitry Andric int coreID;
54680b57cec5SDimitry Andric int threadID;
54690b57cec5SDimitry Andric // Number of hyper threads per core in HT machine
54700b57cec5SDimitry Andric int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
54710b57cec5SDimitry Andric // Number of cores
54720b57cec5SDimitry Andric int ncores = __kmp_ncores;
54730b57cec5SDimitry Andric if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
54740b57cec5SDimitry Andric __kmp_nth_per_core = __kmp_avail_proc / nPackages;
54750b57cec5SDimitry Andric ncores = nPackages;
54760b57cec5SDimitry Andric }
54770b57cec5SDimitry Andric // How many threads will be bound to each core
54780b57cec5SDimitry Andric int chunk = nthreads / ncores;
54790b57cec5SDimitry Andric // How many cores will have an additional thread bound to it - "big cores"
54800b57cec5SDimitry Andric int big_cores = nthreads % ncores;
54810b57cec5SDimitry Andric // Number of threads on the big cores
54820b57cec5SDimitry Andric int big_nth = (chunk + 1) * big_cores;
54830b57cec5SDimitry Andric if (tid < big_nth) {
54840b57cec5SDimitry Andric coreID = tid / (chunk + 1);
54850b57cec5SDimitry Andric threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
54860b57cec5SDimitry Andric } else { // tid >= big_nth
54870b57cec5SDimitry Andric coreID = (tid - big_cores) / chunk;
54880b57cec5SDimitry Andric threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
54890b57cec5SDimitry Andric }
54900b57cec5SDimitry Andric KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
54910b57cec5SDimitry Andric "Illegal set affinity operation when not capable");
54920b57cec5SDimitry Andric
54930b57cec5SDimitry Andric kmp_affin_mask_t *mask = th->th.th_affin_mask;
54940b57cec5SDimitry Andric KMP_CPU_ZERO(mask);
54950b57cec5SDimitry Andric
54960b57cec5SDimitry Andric if (fine_gran) {
5497fe6060f1SDimitry Andric int osID =
5498fe6060f1SDimitry Andric __kmp_topology->at(coreID * __kmp_nth_per_core + threadID).os_id;
54990b57cec5SDimitry Andric KMP_CPU_SET(osID, mask);
55000b57cec5SDimitry Andric } else {
55010b57cec5SDimitry Andric for (int i = 0; i < __kmp_nth_per_core; i++) {
55020b57cec5SDimitry Andric int osID;
5503fe6060f1SDimitry Andric osID = __kmp_topology->at(coreID * __kmp_nth_per_core + i).os_id;
55040b57cec5SDimitry Andric KMP_CPU_SET(osID, mask);
55050b57cec5SDimitry Andric }
55060b57cec5SDimitry Andric }
5507bdd1243dSDimitry Andric if (__kmp_affinity.flags.verbose) {
55080b57cec5SDimitry Andric char buf[KMP_AFFIN_MASK_PRINT_LEN];
55090b57cec5SDimitry Andric __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5510bdd1243dSDimitry Andric KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(),
5511bdd1243dSDimitry Andric tid, buf);
55120b57cec5SDimitry Andric }
5513bdd1243dSDimitry Andric __kmp_affinity_get_thread_topology_info(th);
55140b57cec5SDimitry Andric __kmp_set_system_affinity(mask, TRUE);
55150b57cec5SDimitry Andric } else { // Non-uniform topology
55160b57cec5SDimitry Andric
55170b57cec5SDimitry Andric kmp_affin_mask_t *mask = th->th.th_affin_mask;
55180b57cec5SDimitry Andric KMP_CPU_ZERO(mask);
55190b57cec5SDimitry Andric
5520fe6060f1SDimitry Andric int core_level =
5521fe6060f1SDimitry Andric __kmp_affinity_find_core_level(__kmp_avail_proc, __kmp_aff_depth - 1);
5522fe6060f1SDimitry Andric int ncores = __kmp_affinity_compute_ncores(__kmp_avail_proc,
55230b57cec5SDimitry Andric __kmp_aff_depth - 1, core_level);
55240b57cec5SDimitry Andric int nth_per_core = __kmp_affinity_max_proc_per_core(
5525fe6060f1SDimitry Andric __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
55260b57cec5SDimitry Andric
55270b57cec5SDimitry Andric // For performance gain consider the special case nthreads ==
55280b57cec5SDimitry Andric // __kmp_avail_proc
55290b57cec5SDimitry Andric if (nthreads == __kmp_avail_proc) {
55300b57cec5SDimitry Andric if (fine_gran) {
5531fe6060f1SDimitry Andric int osID = __kmp_topology->at(tid).os_id;
55320b57cec5SDimitry Andric KMP_CPU_SET(osID, mask);
55330b57cec5SDimitry Andric } else {
5534fe6060f1SDimitry Andric int core =
5535fe6060f1SDimitry Andric __kmp_affinity_find_core(tid, __kmp_aff_depth - 1, core_level);
55360b57cec5SDimitry Andric for (int i = 0; i < __kmp_avail_proc; i++) {
5537fe6060f1SDimitry Andric int osID = __kmp_topology->at(i).os_id;
5538fe6060f1SDimitry Andric if (__kmp_affinity_find_core(i, __kmp_aff_depth - 1, core_level) ==
5539fe6060f1SDimitry Andric core) {
55400b57cec5SDimitry Andric KMP_CPU_SET(osID, mask);
55410b57cec5SDimitry Andric }
55420b57cec5SDimitry Andric }
55430b57cec5SDimitry Andric }
55440b57cec5SDimitry Andric } else if (nthreads <= ncores) {
55450b57cec5SDimitry Andric
55460b57cec5SDimitry Andric int core = 0;
55470b57cec5SDimitry Andric for (int i = 0; i < ncores; i++) {
55480b57cec5SDimitry Andric // Check if this core from procarr[] is in the mask
55490b57cec5SDimitry Andric int in_mask = 0;
55500b57cec5SDimitry Andric for (int j = 0; j < nth_per_core; j++) {
55510b57cec5SDimitry Andric if (procarr[i * nth_per_core + j] != -1) {
55520b57cec5SDimitry Andric in_mask = 1;
55530b57cec5SDimitry Andric break;
55540b57cec5SDimitry Andric }
55550b57cec5SDimitry Andric }
55560b57cec5SDimitry Andric if (in_mask) {
55570b57cec5SDimitry Andric if (tid == core) {
55580b57cec5SDimitry Andric for (int j = 0; j < nth_per_core; j++) {
55590b57cec5SDimitry Andric int osID = procarr[i * nth_per_core + j];
55600b57cec5SDimitry Andric if (osID != -1) {
55610b57cec5SDimitry Andric KMP_CPU_SET(osID, mask);
55620b57cec5SDimitry Andric // For fine granularity it is enough to set the first available
55630b57cec5SDimitry Andric // osID for this core
55640b57cec5SDimitry Andric if (fine_gran) {
55650b57cec5SDimitry Andric break;
55660b57cec5SDimitry Andric }
55670b57cec5SDimitry Andric }
55680b57cec5SDimitry Andric }
55690b57cec5SDimitry Andric break;
55700b57cec5SDimitry Andric } else {
55710b57cec5SDimitry Andric core++;
55720b57cec5SDimitry Andric }
55730b57cec5SDimitry Andric }
55740b57cec5SDimitry Andric }
55750b57cec5SDimitry Andric } else { // nthreads > ncores
55760b57cec5SDimitry Andric // Array to save the number of processors at each core
55770b57cec5SDimitry Andric int *nproc_at_core = (int *)KMP_ALLOCA(sizeof(int) * ncores);
55780b57cec5SDimitry Andric // Array to save the number of cores with "x" available processors;
55790b57cec5SDimitry Andric int *ncores_with_x_procs =
55800b57cec5SDimitry Andric (int *)KMP_ALLOCA(sizeof(int) * (nth_per_core + 1));
55810b57cec5SDimitry Andric // Array to save the number of cores with # procs from x to nth_per_core
55820b57cec5SDimitry Andric int *ncores_with_x_to_max_procs =
55830b57cec5SDimitry Andric (int *)KMP_ALLOCA(sizeof(int) * (nth_per_core + 1));
55840b57cec5SDimitry Andric
55850b57cec5SDimitry Andric for (int i = 0; i <= nth_per_core; i++) {
55860b57cec5SDimitry Andric ncores_with_x_procs[i] = 0;
55870b57cec5SDimitry Andric ncores_with_x_to_max_procs[i] = 0;
55880b57cec5SDimitry Andric }
55890b57cec5SDimitry Andric
55900b57cec5SDimitry Andric for (int i = 0; i < ncores; i++) {
55910b57cec5SDimitry Andric int cnt = 0;
55920b57cec5SDimitry Andric for (int j = 0; j < nth_per_core; j++) {
55930b57cec5SDimitry Andric if (procarr[i * nth_per_core + j] != -1) {
55940b57cec5SDimitry Andric cnt++;
55950b57cec5SDimitry Andric }
55960b57cec5SDimitry Andric }
55970b57cec5SDimitry Andric nproc_at_core[i] = cnt;
55980b57cec5SDimitry Andric ncores_with_x_procs[cnt]++;
55990b57cec5SDimitry Andric }
56000b57cec5SDimitry Andric
56010b57cec5SDimitry Andric for (int i = 0; i <= nth_per_core; i++) {
56020b57cec5SDimitry Andric for (int j = i; j <= nth_per_core; j++) {
56030b57cec5SDimitry Andric ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
56040b57cec5SDimitry Andric }
56050b57cec5SDimitry Andric }
56060b57cec5SDimitry Andric
56070b57cec5SDimitry Andric // Max number of processors
56080b57cec5SDimitry Andric int nproc = nth_per_core * ncores;
56090b57cec5SDimitry Andric // An array to keep number of threads per each context
56100b57cec5SDimitry Andric int *newarr = (int *)__kmp_allocate(sizeof(int) * nproc);
56110b57cec5SDimitry Andric for (int i = 0; i < nproc; i++) {
56120b57cec5SDimitry Andric newarr[i] = 0;
56130b57cec5SDimitry Andric }
56140b57cec5SDimitry Andric
56150b57cec5SDimitry Andric int nth = nthreads;
56160b57cec5SDimitry Andric int flag = 0;
56170b57cec5SDimitry Andric while (nth > 0) {
56180b57cec5SDimitry Andric for (int j = 1; j <= nth_per_core; j++) {
56190b57cec5SDimitry Andric int cnt = ncores_with_x_to_max_procs[j];
56200b57cec5SDimitry Andric for (int i = 0; i < ncores; i++) {
56210b57cec5SDimitry Andric // Skip the core with 0 processors
56220b57cec5SDimitry Andric if (nproc_at_core[i] == 0) {
56230b57cec5SDimitry Andric continue;
56240b57cec5SDimitry Andric }
56250b57cec5SDimitry Andric for (int k = 0; k < nth_per_core; k++) {
56260b57cec5SDimitry Andric if (procarr[i * nth_per_core + k] != -1) {
56270b57cec5SDimitry Andric if (newarr[i * nth_per_core + k] == 0) {
56280b57cec5SDimitry Andric newarr[i * nth_per_core + k] = 1;
56290b57cec5SDimitry Andric cnt--;
56300b57cec5SDimitry Andric nth--;
56310b57cec5SDimitry Andric break;
56320b57cec5SDimitry Andric } else {
56330b57cec5SDimitry Andric if (flag != 0) {
56340b57cec5SDimitry Andric newarr[i * nth_per_core + k]++;
56350b57cec5SDimitry Andric cnt--;
56360b57cec5SDimitry Andric nth--;
56370b57cec5SDimitry Andric break;
56380b57cec5SDimitry Andric }
56390b57cec5SDimitry Andric }
56400b57cec5SDimitry Andric }
56410b57cec5SDimitry Andric }
56420b57cec5SDimitry Andric if (cnt == 0 || nth == 0) {
56430b57cec5SDimitry Andric break;
56440b57cec5SDimitry Andric }
56450b57cec5SDimitry Andric }
56460b57cec5SDimitry Andric if (nth == 0) {
56470b57cec5SDimitry Andric break;
56480b57cec5SDimitry Andric }
56490b57cec5SDimitry Andric }
56500b57cec5SDimitry Andric flag = 1;
56510b57cec5SDimitry Andric }
56520b57cec5SDimitry Andric int sum = 0;
56530b57cec5SDimitry Andric for (int i = 0; i < nproc; i++) {
56540b57cec5SDimitry Andric sum += newarr[i];
56550b57cec5SDimitry Andric if (sum > tid) {
56560b57cec5SDimitry Andric if (fine_gran) {
56570b57cec5SDimitry Andric int osID = procarr[i];
56580b57cec5SDimitry Andric KMP_CPU_SET(osID, mask);
56590b57cec5SDimitry Andric } else {
56600b57cec5SDimitry Andric int coreID = i / nth_per_core;
56610b57cec5SDimitry Andric for (int ii = 0; ii < nth_per_core; ii++) {
56620b57cec5SDimitry Andric int osID = procarr[coreID * nth_per_core + ii];
56630b57cec5SDimitry Andric if (osID != -1) {
56640b57cec5SDimitry Andric KMP_CPU_SET(osID, mask);
56650b57cec5SDimitry Andric }
56660b57cec5SDimitry Andric }
56670b57cec5SDimitry Andric }
56680b57cec5SDimitry Andric break;
56690b57cec5SDimitry Andric }
56700b57cec5SDimitry Andric }
56710b57cec5SDimitry Andric __kmp_free(newarr);
56720b57cec5SDimitry Andric }
56730b57cec5SDimitry Andric
5674bdd1243dSDimitry Andric if (__kmp_affinity.flags.verbose) {
56750b57cec5SDimitry Andric char buf[KMP_AFFIN_MASK_PRINT_LEN];
56760b57cec5SDimitry Andric __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5677bdd1243dSDimitry Andric KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(),
5678bdd1243dSDimitry Andric tid, buf);
56790b57cec5SDimitry Andric }
5680bdd1243dSDimitry Andric __kmp_affinity_get_thread_topology_info(th);
56810b57cec5SDimitry Andric __kmp_set_system_affinity(mask, TRUE);
56820b57cec5SDimitry Andric }
56830b57cec5SDimitry Andric }
56840b57cec5SDimitry Andric
5685*0fca6ea1SDimitry Andric #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY || \
5686*0fca6ea1SDimitry Andric KMP_OS_AIX
56870b57cec5SDimitry Andric // We don't need this entry for Windows because
56880b57cec5SDimitry Andric // there is GetProcessAffinityMask() api
56890b57cec5SDimitry Andric //
56900b57cec5SDimitry Andric // The intended usage is indicated by these steps:
56910b57cec5SDimitry Andric // 1) The user gets the current affinity mask
56920b57cec5SDimitry Andric // 2) Then sets the affinity by calling this function
56930b57cec5SDimitry Andric // 3) Error check the return value
56940b57cec5SDimitry Andric // 4) Use non-OpenMP parallelization
56950b57cec5SDimitry Andric // 5) Reset the affinity to what was stored in step 1)
56960b57cec5SDimitry Andric #ifdef __cplusplus
56970b57cec5SDimitry Andric extern "C"
56980b57cec5SDimitry Andric #endif
56990b57cec5SDimitry Andric int
kmp_set_thread_affinity_mask_initial()57000b57cec5SDimitry Andric kmp_set_thread_affinity_mask_initial()
57010b57cec5SDimitry Andric // the function returns 0 on success,
57020b57cec5SDimitry Andric // -1 if we cannot bind thread
57030b57cec5SDimitry Andric // >0 (errno) if an error happened during binding
57040b57cec5SDimitry Andric {
57050b57cec5SDimitry Andric int gtid = __kmp_get_gtid();
57060b57cec5SDimitry Andric if (gtid < 0) {
57070b57cec5SDimitry Andric // Do not touch non-omp threads
57080b57cec5SDimitry Andric KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
57090b57cec5SDimitry Andric "non-omp thread, returning\n"));
57100b57cec5SDimitry Andric return -1;
57110b57cec5SDimitry Andric }
57120b57cec5SDimitry Andric if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
57130b57cec5SDimitry Andric KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
57140b57cec5SDimitry Andric "affinity not initialized, returning\n"));
57150b57cec5SDimitry Andric return -1;
57160b57cec5SDimitry Andric }
57170b57cec5SDimitry Andric KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
57180b57cec5SDimitry Andric "set full mask for thread %d\n",
57190b57cec5SDimitry Andric gtid));
57200b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
5721439352acSDimitry Andric #if KMP_OS_AIX
5722439352acSDimitry Andric return bindprocessor(BINDTHREAD, thread_self(), PROCESSOR_CLASS_ANY);
5723439352acSDimitry Andric #else
57240b57cec5SDimitry Andric return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
5725439352acSDimitry Andric #endif
57260b57cec5SDimitry Andric }
57270b57cec5SDimitry Andric #endif
57280b57cec5SDimitry Andric
57290b57cec5SDimitry Andric #endif // KMP_AFFINITY_SUPPORTED
5730