Lines Matching +full:ecx +full:- +full:1000
2 * kmp_affinity.cpp -- affinity management
5 //===----------------------------------------------------------------------===//
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
62 return __kmp_topology->restrict_to_mask(mask); in restrict_to_mask()
92 thr_bar->depth = depth; in __kmp_get_hierarchy()
93 __kmp_type_convert(machine_hierarchy.numPerLevel[0] - 1, in __kmp_get_hierarchy()
94 &(thr_bar->base_leaf_kids)); in __kmp_get_hierarchy()
95 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel; in __kmp_get_hierarchy()
204 int depth = __kmp_topology->get_depth(); in compare_ids()
206 if (ahwthread->ids[level] < bhwthread->ids[level]) in compare_ids()
207 return -1; in compare_ids()
208 else if (ahwthread->ids[level] > bhwthread->ids[level]) in compare_ids()
211 if (ahwthread->os_id < bhwthread->os_id) in compare_ids()
212 return -1; in compare_ids()
213 else if (ahwthread->os_id > bhwthread->os_id) in compare_ids()
223 int depth = __kmp_topology->get_depth(); in compare_compact()
224 int compact = __kmp_topology->compact; in compare_compact()
228 int j = depth - i - 1; in compare_compact()
229 if (aa->sub_ids[j] < bb->sub_ids[j]) in compare_compact()
230 return -1; in compare_compact()
231 if (aa->sub_ids[j] > bb->sub_ids[j]) in compare_compact()
235 int j = i - compact; in compare_compact()
236 if (aa->sub_ids[j] < bb->sub_ids[j]) in compare_compact()
237 return -1; in compare_compact()
238 if (aa->sub_ids[j] > bb->sub_ids[j]) in compare_compact()
246 int depth = __kmp_topology->get_depth(); in print()
301 for (int i = depth - 1, j = depth; i >= target_layer; --i, --j) in _insert_layer()
305 for (int i = depth - 1, j = depth; i >= target_layer; --i, --j) in _insert_layer()
332 __kmp_topology->sort_ids(); in _insert_windows_proc_groups()
337 // This is done by having the layer take on the id = UNKNOWN_ID (-1)
356 while (top_index1 < depth - 1 && top_index2 < depth) { in _remove_radix1_layers()
409 for (int d = remove_layer_ids; d < depth - 1; ++d) in _remove_radix1_layers()
412 for (int idx = remove_layer; idx < depth - 1; ++idx) in _remove_radix1_layers()
414 depth--; in _remove_radix1_layers()
482 // Because efficiencies can range from 0 to max efficiency - 1, in _gather_enumeration_information()
522 above_level = -1; in _get_ncores_with_attr()
554 flags.uniform = (num == count[depth - 1]); in _discover_uniformity()
563 previous_id[i] = -1; in _set_sub_ids()
564 sub_id[i] = -1; in _set_sub_ids()
594 if (package_level == -1) in _set_globals()
600 KMP_ASSERT(core_level != -1); in _set_globals()
601 KMP_ASSERT(thread_level != -1); in _set_globals()
604 if (package_level != -1) { in _set_globals()
626 retval->hw_threads = (kmp_hw_thread_t *)(bytes + sizeof(kmp_topology_t)); in allocate()
628 retval->hw_threads = nullptr; in allocate()
630 retval->num_hw_threads = nproc; in allocate()
631 retval->depth = ndepth; in allocate()
634 retval->types = (kmp_hw_t *)arr; in allocate()
635 retval->ratio = arr + (size_t)KMP_HW_LAST; in allocate()
636 retval->count = arr + 2 * (size_t)KMP_HW_LAST; in allocate()
637 retval->num_core_efficiencies = 0; in allocate()
638 retval->num_core_types = 0; in allocate()
639 retval->compact = 0; in allocate()
641 retval->core_types[i] = KMP_HW_CORE_TYPE_UNKNOWN; in allocate()
642 KMP_FOREACH_HW_TYPE(type) { retval->equivalent[type] = KMP_HW_UNKNOWN; } in allocate()
644 retval->types[i] = types[i]; in allocate()
645 retval->equivalent[types[i]] = types[i]; in allocate()
661 kmp_hw_thread_t &previous_thread = hw_threads[i - 1]; in check_ids()
710 printf("%-15s -> %-15s\n", key, value); in dump()
762 if (print_types[print_types_depth - 1] == KMP_HW_THREAD) { in print()
765 print_types[print_types_depth - 1] = KMP_HW_CORE; in print()
858 // OMP_PLACES with cores:<attribute> but non-hybrid arch, use cores in set_granularity()
864 affinity.gran_levels = -1; in set_granularity()
869 // OMP_PLACES=core_types|core_effs but non-hybrid, use cores instead in set_granularity()
881 affinity.gran_levels = -1; in set_granularity()
926 for (int i = depth - 1; i >= 0 && get_type(i) != gran_type; --i) in set_granularity()
946 if (get_level(KMP_HW_L2) != -1) in canonicalize()
948 else if (get_level(KMP_HW_TILE) != -1) in canonicalize()
1018 __kmp_avail_proc--; in restrict_to_mask()
1036 __kmp_affin_origMask->copy(__kmp_affin_fullMask); in restrict_to_mask()
1050 __kmp_hw_subset->sort(); in filter_hw_subset()
1052 __kmp_hw_subset->canonicalize(__kmp_topology); in filter_hw_subset()
1057 bool is_absolute = __kmp_hw_subset->is_absolute(); in filter_hw_subset()
1058 int hw_subset_depth = __kmp_hw_subset->get_depth(); in filter_hw_subset()
1066 const kmp_hw_subset_t::item_t &item = __kmp_hw_subset->at(i); in filter_hw_subset()
1076 __kmp_hw_subset->at(i).type = equivalent_type; in filter_hw_subset()
1115 // Check if using a single core attribute on non-hybrid arch. in filter_hw_subset()
1118 // Check if using multiple core attributes on non-hyrbid arch. in filter_hw_subset()
1155 KMP_HNT(ValidValuesRange, 0, num_core_efficiencies - 1), in filter_hw_subset()
1169 int level_above = core_level - 1; in filter_hw_subset()
1228 abs_sub_ids[i] = -1; in filter_hw_subset()
1229 prev_sub_ids[i] = -1; in filter_hw_subset()
1232 core_eff_sub_ids[i] = -1; in filter_hw_subset()
1234 core_type_sub_ids[i] = -1; in filter_hw_subset()
1312 const auto &hw_subset_item = __kmp_hw_subset->at(hw_subset_index); in filter_hw_subset()
1314 if (level == -1) in filter_hw_subset()
1394 for (int i = 0; i < (depth - hw_level); ++i) { in is_close()
1445 // The format is a comma separated list of non-negative integers or integer
1446 // ranges: e.g., 1,2,3-5,7,9-15
1456 char *end = buf + buf_len - 1; in __kmp_affinity_print_mask()
1459 if (mask->begin() == mask->end()) { in __kmp_affinity_print_mask()
1460 KMP_SNPRINTF(scan, end - scan + 1, "{<empty>}"); in __kmp_affinity_print_mask()
1467 start = mask->begin(); in __kmp_affinity_print_mask()
1471 for (finish = mask->next(start), previous = start; in __kmp_affinity_print_mask()
1472 finish == previous + 1 && finish != mask->end(); in __kmp_affinity_print_mask()
1473 finish = mask->next(finish)) { in __kmp_affinity_print_mask()
1480 KMP_SNPRINTF(scan, end - scan + 1, "%s", ","); in __kmp_affinity_print_mask()
1486 if (previous - start > 1) { in __kmp_affinity_print_mask()
1487 KMP_SNPRINTF(scan, end - scan + 1, "%u-%u", start, previous); in __kmp_affinity_print_mask()
1490 KMP_SNPRINTF(scan, end - scan + 1, "%u", start); in __kmp_affinity_print_mask()
1492 if (previous - start > 0) { in __kmp_affinity_print_mask()
1493 KMP_SNPRINTF(scan, end - scan + 1, ",%u", previous); in __kmp_affinity_print_mask()
1499 if (start == mask->end()) in __kmp_affinity_print_mask()
1502 if (end - scan < 2) in __kmp_affinity_print_mask()
1513 // The format is a comma separated list of non-negative integers or integer
1514 // ranges: e.g., 1,2,3-5,7,9-15
1526 if (mask->begin() == mask->end()) { in __kmp_affinity_str_buf_mask()
1532 start = mask->begin(); in __kmp_affinity_str_buf_mask()
1536 for (finish = mask->next(start), previous = start; in __kmp_affinity_str_buf_mask()
1537 finish == previous + 1 && finish != mask->end(); in __kmp_affinity_str_buf_mask()
1538 finish = mask->next(finish)) { in __kmp_affinity_str_buf_mask()
1550 if (previous - start > 1) { in __kmp_affinity_str_buf_mask()
1551 __kmp_str_buf_print(buf, "%u-%u", start, previous); in __kmp_affinity_str_buf_mask()
1555 if (previous - start > 0) { in __kmp_affinity_str_buf_mask()
1561 if (start == mask->end()) in __kmp_affinity_str_buf_mask()
1585 // e.g., 1,2,4-7,9,11-15 in __kmp_affinity_get_offline_cpus()
1599 } else if (c == '-') { in __kmp_affinity_get_offline_cpus()
1673 return hwloc_obj_type_is_cache(obj->type); in __kmp_hwloc_is_cache_type()
1675 return obj->type == HWLOC_OBJ_CACHE; in __kmp_hwloc_is_cache_type()
1683 if (obj->attr->cache.type == HWLOC_OBJ_CACHE_INSTRUCTION) in __kmp_hwloc_type_2_topology_type()
1685 switch (obj->attr->cache.depth) { in __kmp_hwloc_type_2_topology_type()
1701 switch (obj->type) { in __kmp_hwloc_type_2_topology_type()
1712 if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_DIE) in __kmp_hwloc_type_2_topology_type()
1714 else if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_TILE) in __kmp_hwloc_type_2_topology_type()
1716 else if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_MODULE) in __kmp_hwloc_type_2_topology_type()
1718 else if (obj->attr->group.kind == HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP) in __kmp_hwloc_type_2_topology_type()
1738 for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type, in __kmp_hwloc_get_nobjs_under_obj()
1739 obj->logical_index, type, 0); in __kmp_hwloc_get_nobjs_under_obj()
1741 obj->type, first) == obj; in __kmp_hwloc_get_nobjs_under_obj()
1742 first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type, in __kmp_hwloc_get_nobjs_under_obj()
1754 hwloc_obj_type_t ltype = lower->type; in __kmp_hwloc_get_sub_id()
1755 int lindex = lower->logical_index - 1; in __kmp_hwloc_get_sub_id()
1760 hwloc_bitmap_isincluded(obj->cpuset, higher->cpuset)) { in __kmp_hwloc_get_sub_id()
1761 if (obj->userdata) { in __kmp_hwloc_get_sub_id()
1762 sub_id = (int)(RCAST(kmp_intptr_t, obj->userdata)); in __kmp_hwloc_get_sub_id()
1766 lindex--; in __kmp_hwloc_get_sub_id()
1770 lower->userdata = RCAST(void *, sub_id + 1); in __kmp_hwloc_get_sub_id()
1808 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; in __kmp_affinity_create_hwloc_map()
1829 cpukinds[idx].efficiency = -1; in __kmp_affinity_create_hwloc_map()
1857 obj = hwloc_get_pu_obj_by_os_index(tp, __kmp_affin_fullMask->begin()); in __kmp_affinity_create_hwloc_map()
1860 if (obj->memory_arity) { in __kmp_affinity_create_hwloc_map()
1862 for (memory = obj->memory_first_child; memory; in __kmp_affinity_create_hwloc_map()
1864 if (memory->type == HWLOC_OBJ_NUMANODE) in __kmp_affinity_create_hwloc_map()
1867 if (memory && memory->type == HWLOC_OBJ_NUMANODE) { in __kmp_affinity_create_hwloc_map()
1869 hwloc_types[depth] = memory->type; in __kmp_affinity_create_hwloc_map()
1877 hwloc_types[depth] = obj->type; in __kmp_affinity_create_hwloc_map()
1880 obj = obj->parent; in __kmp_affinity_create_hwloc_map()
1885 for (int i = 0, j = depth - 1; i < j; ++i, --j) { in __kmp_affinity_create_hwloc_map()
1900 int index = depth - 1; in __kmp_affinity_create_hwloc_map()
1901 bool included = KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask); in __kmp_affinity_create_hwloc_map()
1902 kmp_hw_thread_t &hw_thread = __kmp_topology->at(hw_thread_index); in __kmp_affinity_create_hwloc_map()
1905 hw_thread.ids[index] = pu->logical_index; in __kmp_affinity_create_hwloc_map()
1906 hw_thread.os_id = pu->os_index; in __kmp_affinity_create_hwloc_map()
1910 int cpukind_index = -1; in __kmp_affinity_create_hwloc_map()
1923 index--; in __kmp_affinity_create_hwloc_map()
1928 obj = obj->parent; in __kmp_affinity_create_hwloc_map()
1933 if (obj->memory_arity) { in __kmp_affinity_create_hwloc_map()
1935 for (memory = obj->memory_first_child; memory; in __kmp_affinity_create_hwloc_map()
1937 if (memory->type == HWLOC_OBJ_NUMANODE) in __kmp_affinity_create_hwloc_map()
1940 if (memory && memory->type == HWLOC_OBJ_NUMANODE) { in __kmp_affinity_create_hwloc_map()
1943 hw_thread.ids[index] = memory->logical_index; in __kmp_affinity_create_hwloc_map()
1945 index--; in __kmp_affinity_create_hwloc_map()
1956 hw_thread.ids[index] = obj->logical_index; in __kmp_affinity_create_hwloc_map()
1958 index--; in __kmp_affinity_create_hwloc_map()
1975 __kmp_topology->sort_ids(); in __kmp_affinity_create_hwloc_map()
1982 // mapping of os thread id's <-> processor id's.
2018 kmp_hw_thread_t &hw_thread = __kmp_topology->at(avail_ct); in __kmp_affinity_create_flat_map()
2033 // If multiple Windows* OS processor groups exist, we can create a 2-level
2066 kmp_hw_thread_t &hw_thread = __kmp_topology->at(avail_ct++); in __kmp_affinity_create_proc_group_map()
2080 const kmp_uint32 SHIFT_LEFT = sizeof(kmp_uint32) * 8 - 1 - MSB; in __kmp_extract_bits()
2111 if (aa->pkgId < bb->pkgId) in __kmp_affinity_cmp_apicThreadInfo_phys_id()
2112 return -1; in __kmp_affinity_cmp_apicThreadInfo_phys_id()
2113 if (aa->pkgId > bb->pkgId) in __kmp_affinity_cmp_apicThreadInfo_phys_id()
2115 if (aa->coreId < bb->coreId) in __kmp_affinity_cmp_apicThreadInfo_phys_id()
2116 return -1; in __kmp_affinity_cmp_apicThreadInfo_phys_id()
2117 if (aa->coreId > bb->coreId) in __kmp_affinity_cmp_apicThreadInfo_phys_id()
2119 if (aa->threadId < bb->threadId) in __kmp_affinity_cmp_apicThreadInfo_phys_id()
2120 return -1; in __kmp_affinity_cmp_apicThreadInfo_phys_id()
2121 if (aa->threadId > bb->threadId) in __kmp_affinity_cmp_apicThreadInfo_phys_id()
2174 table[depth].mask = ((-1) << cache_mask_width); in get_leaf4_levels()
2181 // On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
2203 // need to do something else - use the defaults that we calculated from in __kmp_affinity_create_apicid_map()
2245 // - Older OSes are usually found on machines with older chips, which do not in __kmp_affinity_create_apicid_map()
2247 // - The performance penalty for mistakenly identifying a machine as HT when in __kmp_affinity_create_apicid_map()
2253 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; in __kmp_affinity_create_apicid_map()
2269 // - Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context in __kmp_affinity_create_apicid_map()
2271 // - Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The value in __kmp_affinity_create_apicid_map()
2276 // Hyper-Threading Technology is supported by the chip but has been in __kmp_affinity_create_apicid_map()
2278 // On other OS/chip combinations supporting Intel(R) Hyper-Threading in __kmp_affinity_create_apicid_map()
2280 // Hyper-Threading Technology is disabled and 2 when it is enabled. in __kmp_affinity_create_apicid_map()
2281 // - Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The value in __kmp_affinity_create_apicid_map()
2284 // bound, but the IA-32 architecture manual says that it is exactly the in __kmp_affinity_create_apicid_map()
2301 __kmp_affinity_dispatch->bind_thread(i); in __kmp_affinity_create_apicid_map()
2336 int widthT = widthCT - widthC; in __kmp_affinity_create_apicid_map()
2346 int maskC = (1 << widthC) - 1; in __kmp_affinity_create_apicid_map()
2349 int maskT = (1 << widthT) - 1; in __kmp_affinity_create_apicid_map()
2366 // [0 .. coresPerPkg-1] and threadId's are usually assigned in __kmp_affinity_create_apicid_map()
2367 // [0..threadsPerCore-1], we don't want to make any such assumptions. in __kmp_affinity_create_apicid_map()
2370 // total # packages) are at this point - we want to determine that now. We in __kmp_affinity_create_apicid_map()
2388 // intra-pkg consist checks in __kmp_affinity_create_apicid_map()
2457 //(__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1); in __kmp_affinity_create_apicid_map()
2473 kmp_hw_thread_t &hw_thread = __kmp_topology->at(i); in __kmp_affinity_create_apicid_map()
2489 __kmp_topology->sort_ids(); in __kmp_affinity_create_apicid_map()
2490 if (!__kmp_topology->check_ids()) { in __kmp_affinity_create_apicid_map()
2523 * CPUID.B or 1F, Input ECX (sub leaf # aka level number)
2525 31-16 15-8 7-4 4-0
2526 ---+-----------+--------------+-------------+-----------------+
2528 ---+-----------|--------------+-------------+-----------------|
2530 ---+-----------|--------------+-------------------------------|
2531 ECX| reserved | Level Type | Level Number (8 bits) |
2532 ---+-----------+--------------+-------------------------------|
2534 ---+----------------------------------------------------------+
2590 level_type = __kmp_extract_bits<8, 15>(buf.ecx); in __kmp_x2apicid_get_levels()
2606 levels[levels_index - 1].mask_width = mask_width; in __kmp_x2apicid_get_levels()
2607 levels[levels_index - 1].nitems = nitems; in __kmp_x2apicid_get_levels()
2620 levels[i].mask = ~((-1) << levels[i].mask_width); in __kmp_x2apicid_get_levels()
2621 levels[i].cache_mask = (-1) << levels[i].mask_width; in __kmp_x2apicid_get_levels()
2626 levels[i].mask = (-1) << levels[i - 1].mask_width; in __kmp_x2apicid_get_levels()
2685 topology_leaf = -1; in __kmp_affinity_create_x2apicid_map()
2699 if (topology_leaf == -1 || levels_index == 0) { in __kmp_affinity_create_x2apicid_map()
2708 // we need to do something else - use the defaults that we calculated from in __kmp_affinity_create_x2apicid_map()
2722 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; in __kmp_affinity_create_x2apicid_map()
2728 for (int i = depth - 1, j = 0; i >= 0; --i, ++j) in __kmp_affinity_create_x2apicid_map()
2742 if (hw_cache_mask == cache_mask && j < levels_index - 1) { in __kmp_affinity_create_x2apicid_map()
2745 __kmp_topology->set_equivalent_type(cache_type, type); in __kmp_affinity_create_x2apicid_map()
2771 __kmp_affinity_dispatch->bind_thread(proc); in __kmp_affinity_create_x2apicid_map()
2776 kmp_hw_thread_t &hw_thread = __kmp_topology->at(hw_thread_index); in __kmp_affinity_create_x2apicid_map()
2786 for (unsigned j = 0, idx = depth - 1; j < my_levels_index; ++j, --idx) { in __kmp_affinity_create_x2apicid_map()
2789 hw_thread.ids[idx] >>= my_levels[j - 1].mask_width; in __kmp_affinity_create_x2apicid_map()
2804 __kmp_topology->sort_ids(); in __kmp_affinity_create_x2apicid_map()
2805 if (!__kmp_topology->check_ids()) { in __kmp_affinity_create_x2apicid_map()
2829 for (i = maxIndex;; i--) { in __kmp_affinity_cmp_ProcCpuInfo_phys_id()
2831 return -1; in __kmp_affinity_cmp_ProcCpuInfo_phys_id()
2956 buf[sizeof(buf) - 1] = 1; in __kmp_affinity_create_cpuinfo_map()
2963 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) { in __kmp_affinity_create_cpuinfo_map()
2968 // FIXME - this will match "node_<n> <garbage>" in __kmp_affinity_create_cpuinfo_map()
3127 buf[sizeof(buf) - 1] = 1; in __kmp_affinity_create_cpuinfo_map()
3143 } else if (!buf[sizeof(buf) - 1]) { in __kmp_affinity_create_cpuinfo_map()
3182 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) { in __kmp_affinity_create_cpuinfo_map()
3184 char *p = strchr(buf + sizeof(s1) - 1, ':'); in __kmp_affinity_create_cpuinfo_map()
3232 if (strncmp(buf, s2, sizeof(s2) - 1) == 0) { in __kmp_affinity_create_cpuinfo_map()
3234 char *p = strchr(buf + sizeof(s2) - 1, ':'); in __kmp_affinity_create_cpuinfo_map()
3244 if (strncmp(buf, s3, sizeof(s3) - 1) == 0) { in __kmp_affinity_create_cpuinfo_map()
3246 char *p = strchr(buf + sizeof(s3) - 1, ':'); in __kmp_affinity_create_cpuinfo_map()
3257 if (strncmp(buf, s4, sizeof(s4) - 1) == 0) { in __kmp_affinity_create_cpuinfo_map()
3259 char *p = strchr(buf + sizeof(s4) - 1, ':'); in __kmp_affinity_create_cpuinfo_map()
3271 char *p = strchr(buf + sizeof(s4) - 1, ':'); in __kmp_affinity_create_cpuinfo_map()
3286 // leading tokens that we don't recognize - if the line isn't empty, go on in __kmp_affinity_create_cpuinfo_map()
3369 // [0 .. coresPerPkg-1] and threadId's are usually assigned in __kmp_affinity_create_cpuinfo_map()
3370 // [0..threadsPerCore-1], we don't want to make any such assumptions. in __kmp_affinity_create_cpuinfo_map()
3373 // total # packages) are at this point - we want to determine that now. We in __kmp_affinity_create_cpuinfo_map()
3411 for (index = maxIndex; index >= threadIdIndex; index--) { in __kmp_affinity_create_cpuinfo_map()
3413 // Auto-assign the thread id field if it wasn't specified. in __kmp_affinity_create_cpuinfo_map()
3445 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1); in __kmp_affinity_create_cpuinfo_map()
3451 // Auto-assign the thread id field if it wasn't specified. in __kmp_affinity_create_cpuinfo_map()
3468 // Also, check that we waven't already restarted the loop (to be safe - in __kmp_affinity_create_cpuinfo_map()
3490 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1); in __kmp_affinity_create_cpuinfo_map()
3543 int pkgLevel = -1; in __kmp_affinity_create_cpuinfo_map()
3544 int coreLevel = -1; in __kmp_affinity_create_cpuinfo_map()
3545 int threadLevel = -1; in __kmp_affinity_create_cpuinfo_map()
3571 kmp_hw_thread_t &hw_thread = __kmp_topology->at(i); in __kmp_affinity_create_cpuinfo_map()
3576 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) { in __kmp_affinity_create_cpuinfo_map()
3596 __kmp_topology->sort_ids(); in __kmp_affinity_create_cpuinfo_map()
3597 if (!__kmp_topology->check_ids()) { in __kmp_affinity_create_cpuinfo_map()
3616 int numAddrs = __kmp_topology->get_num_hw_threads(); in __kmp_create_os_id_masks()
3617 int depth = __kmp_topology->get_depth(); in __kmp_create_os_id_masks()
3622 i = find_next(-1); in __kmp_create_os_id_masks()
3629 for (i = numAddrs - 1;; --i) { in __kmp_create_os_id_masks()
3630 int osId = __kmp_topology->at(i).os_id; in __kmp_create_os_id_masks()
3658 i = j = leader = find_next(-1); in __kmp_create_os_id_masks()
3659 KMP_CPU_SET(__kmp_topology->at(i).os_id, sum); in __kmp_create_os_id_masks()
3665 if (__kmp_topology->is_close(leader, i, affinity)) { in __kmp_create_os_id_masks()
3666 KMP_CPU_SET(__kmp_topology->at(i).os_id, sum); in __kmp_create_os_id_masks()
3673 int osId = __kmp_topology->at(j).os_id; in __kmp_create_os_id_masks()
3677 __kmp_topology->at(j).leader = (j == leader); in __kmp_create_os_id_masks()
3685 KMP_CPU_SET(__kmp_topology->at(i).os_id, sum); in __kmp_create_os_id_masks()
3691 int osId = __kmp_topology->at(j).os_id; in __kmp_create_os_id_masks()
3695 __kmp_topology->at(j).leader = (j == leader); in __kmp_create_os_id_masks()
3703 __kmp_topology->print(env_var); in __kmp_create_os_id_masks()
3710 // as file-static than to try and pass them through the calling sequence of
3711 // the recursive-descent OMP_PLACES parser.
3745 // Re-parse the proclist (for the explicit affinity type), and form the list
3753 int maxOsId = affinity.num_os_id_masks - 1; in __kmp_affinity_process_proclist()
3849 if (*next != '-') { in __kmp_affinity_process_proclist()
3860 // This is a range. Skip over the '-' and read in the 2nd int. in __kmp_affinity_process_proclist()
3861 next++; // skip '-' in __kmp_affinity_process_proclist()
3878 if (*next == '-') { in __kmp_affinity_process_proclist()
3879 sign = -1; in __kmp_affinity_process_proclist()
3898 KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list"); in __kmp_affinity_process_proclist()
3937 /*-----------------------------------------------------------------------------
3938 Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
3955 signed := - signed
3956 -----------------------------------------------------------------------------*/
4035 if (**scan == '-') { in __kmp_process_subplace_list()
4036 sign *= -1; in __kmp_process_subplace_list()
4037 (*scan)++; // skip '-' in __kmp_process_subplace_list()
4118 int maxOsId = affinity.num_os_id_masks - 1; in __kmp_affinity_process_placelist()
4183 if (*scan == '-') { in __kmp_affinity_process_placelist()
4184 sign *= -1; in __kmp_affinity_process_placelist()
4185 scan++; // skip '-' in __kmp_affinity_process_placelist()
4218 if (i < count - 1) { in __kmp_affinity_process_placelist()
4269 const kmp_hw_thread_t &hw_thread = __kmp_topology->at(i); in __kmp_affinity_find_core_level()
4270 for (int j = bottom_level; j > 0; j--) { in __kmp_affinity_find_core_level()
4272 if (core_level < (j - 1)) { in __kmp_affinity_find_core_level()
4273 core_level = j - 1; in __kmp_affinity_find_core_level()
4284 return __kmp_topology->get_count(core_level); in __kmp_affinity_compute_ncores()
4290 KMP_DEBUG_ASSERT(proc >= 0 && proc < __kmp_topology->get_num_hw_threads()); in __kmp_affinity_find_core()
4294 if (__kmp_topology->at(i + 1).sub_ids[j] != in __kmp_affinity_find_core()
4295 __kmp_topology->at(i).sub_ids[j]) { in __kmp_affinity_find_core()
4311 int thread_level = __kmp_topology->get_level(KMP_HW_THREAD); in __kmp_affinity_max_proc_per_core()
4312 return __kmp_topology->calculate_ratio(thread_level, core_level); in __kmp_affinity_max_proc_per_core()
4333 int depth = __kmp_topology->get_depth(); in __kmp_affinity_get_mask_topology_info()
4337 const kmp_hw_thread_t &hw_thread = __kmp_topology->at(osid_idx); in __kmp_affinity_get_mask_topology_info()
4339 kmp_hw_t type = __kmp_topology->get_type(level); in __kmp_affinity_get_mask_topology_info()
4348 kmp_hw_t type = __kmp_topology->get_type(level); in __kmp_affinity_get_mask_topology_info()
4370 const kmp_affin_mask_t *mask = th->th.th_affin_mask; in __kmp_affinity_get_thread_topology_info()
4371 kmp_affinity_ids_t &ids = th->th.th_topology_ids; in __kmp_affinity_get_thread_topology_info()
4372 kmp_affinity_attrs_t &attrs = th->th.th_topology_attrs; in __kmp_affinity_get_thread_topology_info()
4390 int max_cpu = __kmp_affin_fullMask->get_max_cpu(); in __kmp_affinity_get_topology_info()
4391 int num_hw_threads = __kmp_topology->get_num_hw_threads(); in __kmp_affinity_get_topology_info()
4410 int os_id = __kmp_topology->at(hw_thread).os_id; in __kmp_affinity_get_topology_info()
4426 if (__kmp_topology && __kmp_topology->get_num_hw_threads()) { in __kmp_aux_affinity_initialize_other_data()
4427 machine_hierarchy.init(__kmp_topology->get_num_hw_threads()); in __kmp_aux_affinity_initialize_other_data()
4440 KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads()); in __kmp_create_affinity_none_places()
4449 // Create the "full" mask - this defines all of the processors that we in __kmp_aux_affinity_initialize_masks()
4469 __kmp_affin_origMask->copy(__kmp_affin_fullMask); in __kmp_aux_affinity_initialize_masks()
4505 __kmp_affin_origMask->copy(__kmp_affin_fullMask); in __kmp_aux_affinity_initialize_masks()
4509 __kmp_affin_fullMask->set_process_affinity(true); in __kmp_aux_affinity_initialize_masks()
4529 // In the default code path, errors are not fatal - we just try using in __kmp_aux_affinity_initialize_topology()
4534 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) { in __kmp_aux_affinity_initialize_topology()
4594 KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC); in __kmp_aux_affinity_initialize_topology()
4660 __kmp_topology->canonicalize(nPackages, nCoresPerPkg, in __kmp_aux_affinity_initialize_topology()
4663 __kmp_topology->print(env_var); in __kmp_aux_affinity_initialize_topology()
4670 __kmp_topology->canonicalize(); in __kmp_aux_affinity_initialize_topology()
4672 __kmp_topology->print(env_var); in __kmp_aux_affinity_initialize_topology()
4673 bool filtered = __kmp_topology->filter_hw_subset(); in __kmp_aux_affinity_initialize_topology()
4675 __kmp_topology->print("KMP_HW_SUBSET"); in __kmp_aux_affinity_initialize_topology()
4695 KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads()); in __kmp_aux_affinity_initialize()
4714 __kmp_topology->set_granularity(affinity); in __kmp_aux_affinity_initialize()
4715 int depth = __kmp_topology->get_depth(); in __kmp_aux_affinity_initialize()
4719 int numAddrs = __kmp_topology->get_num_hw_threads(); in __kmp_aux_affinity_initialize()
4724 KMP_ASSERT(idx >= -1); in __kmp_aux_affinity_initialize()
4726 if (__kmp_topology->at(i).attrs.contains(affinity.core_attr_gran)) in __kmp_aux_affinity_initialize()
4745 KMP_ASSERT(idx >= -1); in __kmp_aux_affinity_initialize()
4803 affinity.compact = depth - 1 - affinity.compact; in __kmp_aux_affinity_initialize()
4809 affinity.compact = depth - 1; in __kmp_aux_affinity_initialize()
4820 } else if (!__kmp_topology->is_uniform()) { in __kmp_aux_affinity_initialize()
4825 __kmp_affinity_find_core_level(__kmp_avail_proc, depth - 1); in __kmp_aux_affinity_initialize()
4826 int ncores = __kmp_affinity_compute_ncores(__kmp_avail_proc, depth - 1, in __kmp_aux_affinity_initialize()
4829 __kmp_avail_proc, depth - 1, core_level); in __kmp_aux_affinity_initialize()
4842 procarr[i] = -1; in __kmp_aux_affinity_initialize()
4845 int lastcore = -1; in __kmp_aux_affinity_initialize()
4848 int proc = __kmp_topology->at(i).os_id; in __kmp_aux_affinity_initialize()
4849 int core = __kmp_affinity_find_core(i, depth - 1, core_level); in __kmp_aux_affinity_initialize()
4862 affinity.compact = depth - 1; in __kmp_aux_affinity_initialize()
4866 // Allocate the gtid->affinity mask table. in __kmp_aux_affinity_initialize()
4884 __kmp_topology->sort_compact(affinity); in __kmp_aux_affinity_initialize()
4888 int num_hw_threads = __kmp_topology->get_num_hw_threads(); in __kmp_aux_affinity_initialize()
4891 if ((!affinity.flags.dups) && (!__kmp_topology->at(i).leader)) { in __kmp_aux_affinity_initialize()
4894 int osId = __kmp_topology->at(i).os_id; in __kmp_aux_affinity_initialize()
4908 __kmp_topology->print(env_var); in __kmp_aux_affinity_initialize()
4912 __kmp_topology->sort_ids(); in __kmp_aux_affinity_initialize()
4943 if (affinity->masks != NULL) in __kmp_affinity_uninitialize()
4944 KMP_CPU_FREE_ARRAY(affinity->masks, affinity->num_masks); in __kmp_affinity_uninitialize()
4945 if (affinity->os_id_masks != NULL) in __kmp_affinity_uninitialize()
4946 KMP_CPU_FREE_ARRAY(affinity->os_id_masks, affinity->num_os_id_masks); in __kmp_affinity_uninitialize()
4947 if (affinity->proclist != NULL) in __kmp_affinity_uninitialize()
4948 __kmp_free(affinity->proclist); in __kmp_affinity_uninitialize()
4949 if (affinity->ids != NULL) in __kmp_affinity_uninitialize()
4950 __kmp_free(affinity->ids); in __kmp_affinity_uninitialize()
4951 if (affinity->attrs != NULL) in __kmp_affinity_uninitialize()
4952 __kmp_free(affinity->attrs); in __kmp_affinity_uninitialize()
4953 *affinity = KMP_AFFINITY_INIT(affinity->env_var); in __kmp_affinity_uninitialize()
5000 mask_idx = gtid - 2; in __kmp_select_mask_by_gtid()
5003 KMP_DEBUG_ASSERT(affinity->num_masks > 0); in __kmp_select_mask_by_gtid()
5004 *place = (mask_idx + affinity->offset) % affinity->num_masks; in __kmp_select_mask_by_gtid()
5005 *mask = KMP_CPU_INDEX(affinity->masks, *place); in __kmp_select_mask_by_gtid()
5008 // This function initializes the per-thread data concerning affinity including
5016 th->th.th_topology_ids.ids[id] = kmp_hw_thread_t::UNKNOWN_ID; in __kmp_affinity_set_init_mask()
5017 th->th.th_topology_attrs = KMP_AFFINITY_ATTRS_UNKNOWN; in __kmp_affinity_set_init_mask()
5023 if (th->th.th_affin_mask == NULL) { in __kmp_affinity_set_init_mask()
5024 KMP_CPU_ALLOC(th->th.th_affin_mask); in __kmp_affinity_set_init_mask()
5026 KMP_CPU_ZERO(th->th.th_affin_mask); in __kmp_affinity_set_init_mask()
5045 if ((affinity->type == affinity_none) || in __kmp_affinity_set_init_mask()
5046 (affinity->type == affinity_balanced) || in __kmp_affinity_set_init_mask()
5074 th->th.th_current_place = i; in __kmp_affinity_set_init_mask()
5076 th->th.th_new_place = i; in __kmp_affinity_set_init_mask()
5077 th->th.th_first_place = 0; in __kmp_affinity_set_init_mask()
5078 th->th.th_last_place = affinity->num_masks - 1; in __kmp_affinity_set_init_mask()
5080 // When using a Non-OMP_PROC_BIND affinity method, in __kmp_affinity_set_init_mask()
5081 // set all threads' place-partition-var to the entire place list in __kmp_affinity_set_init_mask()
5082 th->th.th_first_place = 0; in __kmp_affinity_set_init_mask()
5083 th->th.th_last_place = affinity->num_masks - 1; in __kmp_affinity_set_init_mask()
5087 th->th.th_topology_ids = __kmp_affinity.ids[i]; in __kmp_affinity_set_init_mask()
5088 th->th.th_topology_attrs = __kmp_affinity.attrs[i]; in __kmp_affinity_set_init_mask()
5099 KMP_CPU_COPY(th->th.th_affin_mask, mask); in __kmp_affinity_set_init_mask()
5117 if (affinity->flags.verbose && (affinity->type == affinity_none || in __kmp_affinity_bind_init_mask()
5118 (th->th.th_current_place != KMP_PLACE_ALL && in __kmp_affinity_bind_init_mask()
5119 affinity->type != affinity_balanced)) && in __kmp_affinity_bind_init_mask()
5123 th->th.th_affin_mask); in __kmp_affinity_bind_init_mask()
5132 if (affinity->type == affinity_none) { in __kmp_affinity_bind_init_mask()
5133 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE); in __kmp_affinity_bind_init_mask()
5138 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE); in __kmp_affinity_bind_init_mask()
5152 gtid, th->th.th_new_place, th->th.th_current_place)); in __kmp_affinity_bind_place()
5155 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); in __kmp_affinity_bind_place()
5156 KMP_ASSERT(th->th.th_new_place >= 0); in __kmp_affinity_bind_place()
5157 KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity.num_masks); in __kmp_affinity_bind_place()
5158 if (th->th.th_first_place <= th->th.th_last_place) { in __kmp_affinity_bind_place()
5159 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) && in __kmp_affinity_bind_place()
5160 (th->th.th_new_place <= th->th.th_last_place)); in __kmp_affinity_bind_place()
5162 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) || in __kmp_affinity_bind_place()
5163 (th->th.th_new_place >= th->th.th_last_place)); in __kmp_affinity_bind_place()
5169 KMP_CPU_INDEX(__kmp_affinity.masks, th->th.th_new_place); in __kmp_affinity_bind_place()
5170 KMP_CPU_COPY(th->th.th_affin_mask, mask); in __kmp_affinity_bind_place()
5171 th->th.th_current_place = th->th.th_new_place; in __kmp_affinity_bind_place()
5176 th->th.th_affin_mask); in __kmp_affinity_bind_place()
5180 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE); in __kmp_affinity_bind_place()
5189 return -1; in __kmp_aux_set_affinity()
5194 1000, (""); { in __kmp_aux_set_affinity()
5232 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); in __kmp_aux_set_affinity()
5235 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask)); in __kmp_aux_set_affinity()
5238 th->th.th_current_place = KMP_PLACE_UNDEFINED; in __kmp_aux_set_affinity()
5239 th->th.th_new_place = KMP_PLACE_UNDEFINED; in __kmp_aux_set_affinity()
5240 th->th.th_first_place = 0; in __kmp_aux_set_affinity()
5241 th->th.th_last_place = __kmp_affinity.num_masks - 1; in __kmp_aux_set_affinity()
5244 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false; in __kmp_aux_set_affinity()
5256 return -1; in __kmp_aux_get_affinity()
5265 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); in __kmp_aux_get_affinity()
5268 1000, (""); { in __kmp_aux_get_affinity()
5271 th->th.th_affin_mask); in __kmp_aux_get_affinity()
5287 1000, (""); { in __kmp_aux_get_affinity()
5300 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask); in __kmp_aux_get_affinity()
5320 return -1; in __kmp_aux_set_affinity_mask_proc()
5324 1000, (""); { in __kmp_aux_set_affinity_mask_proc()
5341 return -1; in __kmp_aux_set_affinity_mask_proc()
5344 return -2; in __kmp_aux_set_affinity_mask_proc()
5353 return -1; in __kmp_aux_unset_affinity_mask_proc()
5357 1000, (""); { in __kmp_aux_unset_affinity_mask_proc()
5374 return -1; in __kmp_aux_unset_affinity_mask_proc()
5377 return -2; in __kmp_aux_unset_affinity_mask_proc()
5386 return -1; in __kmp_aux_get_affinity_mask_proc()
5390 1000, (""); { in __kmp_aux_get_affinity_mask_proc()
5407 return -1; in __kmp_aux_get_affinity_mask_proc()
5420 int high = __kmp_topology->get_num_hw_threads() - 1; in __kmp_get_first_osid_with_ecore()
5422 while (high - low > 1) { in __kmp_get_first_osid_with_ecore()
5424 if (__kmp_topology->at(mid).attrs.get_core_type() == in __kmp_get_first_osid_with_ecore()
5431 if (__kmp_topology->at(mid).attrs.get_core_type() == KMP_HW_CORE_TYPE_ATOM) { in __kmp_get_first_osid_with_ecore()
5434 return -1; in __kmp_get_first_osid_with_ecore()
5438 // Dynamic affinity settings - Affinity balanced
5442 int tid = th->th.th_info.ds.ds_tid; in __kmp_balanced_affinity()
5466 if (__kmp_topology->is_uniform()) { in __kmp_balanced_affinity()
5479 // How many cores will have an additional thread bound to it - "big cores" in __kmp_balanced_affinity()
5487 coreID = (tid - big_cores) / chunk; in __kmp_balanced_affinity()
5488 threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core; in __kmp_balanced_affinity()
5493 kmp_affin_mask_t *mask = th->th.th_affin_mask; in __kmp_balanced_affinity()
5498 __kmp_topology->at(coreID * __kmp_nth_per_core + threadID).os_id; in __kmp_balanced_affinity()
5503 osID = __kmp_topology->at(coreID * __kmp_nth_per_core + i).os_id; in __kmp_balanced_affinity()
5515 } else { // Non-uniform topology in __kmp_balanced_affinity()
5517 kmp_affin_mask_t *mask = th->th.th_affin_mask; in __kmp_balanced_affinity()
5521 __kmp_affinity_find_core_level(__kmp_avail_proc, __kmp_aff_depth - 1); in __kmp_balanced_affinity()
5523 __kmp_aff_depth - 1, core_level); in __kmp_balanced_affinity()
5525 __kmp_avail_proc, __kmp_aff_depth - 1, core_level); in __kmp_balanced_affinity()
5531 int osID = __kmp_topology->at(tid).os_id; in __kmp_balanced_affinity()
5535 __kmp_affinity_find_core(tid, __kmp_aff_depth - 1, core_level); in __kmp_balanced_affinity()
5537 int osID = __kmp_topology->at(i).os_id; in __kmp_balanced_affinity()
5538 if (__kmp_affinity_find_core(i, __kmp_aff_depth - 1, core_level) == in __kmp_balanced_affinity()
5551 if (procarr[i * nth_per_core + j] != -1) { in __kmp_balanced_affinity()
5560 if (osID != -1) { in __kmp_balanced_affinity()
5593 if (procarr[i * nth_per_core + j] != -1) { in __kmp_balanced_affinity()
5626 if (procarr[i * nth_per_core + k] != -1) { in __kmp_balanced_affinity()
5629 cnt--; in __kmp_balanced_affinity()
5630 nth--; in __kmp_balanced_affinity()
5635 cnt--; in __kmp_balanced_affinity()
5636 nth--; in __kmp_balanced_affinity()
5663 if (osID != -1) { in __kmp_balanced_affinity()
5694 // 4) Use non-OpenMP parallelization
5702 // -1 if we cannot bind thread in kmp_set_thread_affinity_mask_initial()
5707 // Do not touch non-omp threads in kmp_set_thread_affinity_mask_initial()
5709 "non-omp thread, returning\n")); in kmp_set_thread_affinity_mask_initial()
5710 return -1; in kmp_set_thread_affinity_mask_initial()
5715 return -1; in kmp_set_thread_affinity_mask_initial()