xref: /linux/arch/x86/kernel/cpu/topology.c (revision 705c09bb3cdffb141986598ad4ff9c9b0a66c3bd)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * CPU/APIC topology
4  *
5  * The APIC IDs describe the system topology in multiple domain levels.
6  * The CPUID topology parser provides the information which part of the
7  * APIC ID is associated to the individual levels:
8  *
9  * [PACKAGE][DIEGRP][DIE][TILE][MODULE][CORE][THREAD]
10  *
11  * The root space contains the package (socket) IDs.
12  *
13  * Not enumerated levels consume 0 bits space, but conceptually they are
14  * always represented. If e.g. only CORE and THREAD levels are enumerated
15  * then the DIE, MODULE and TILE have the same physical ID as the PACKAGE.
16  *
17  * If SMT is not supported, then the THREAD domain is still used. It then
18  * has the same physical ID as the CORE domain and is the only child of
19  * the core domain.
20  *
21  * This allows a unified view on the system independent of the enumerated
22  * domain levels without requiring any conditionals in the code.
23  */
24 #define pr_fmt(fmt) "CPU topo: " fmt
25 #include <linux/cpu.h>
26 
27 #include <xen/xen.h>
28 
29 #include <asm/apic.h>
30 #include <asm/hypervisor.h>
31 #include <asm/io_apic.h>
32 #include <asm/mpspec.h>
33 #include <asm/smp.h>
34 
35 #include "cpu.h"
36 
37 /*
38  * Map cpu index to physical APIC ID
39  */
40 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid, BAD_APICID);
41 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, CPU_ACPIID_INVALID);
42 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
43 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
44 
45 /* Bitmap of physically present CPUs. */
46 DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly;
47 
48 /* Used for CPU number allocation and parallel CPU bringup */
49 u32 cpuid_to_apicid[] __ro_after_init = { [0 ... NR_CPUS - 1] = BAD_APICID, };
50 
51 /* Bitmaps to mark registered APICs at each topology domain */
52 static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init;
53 
54 /*
55  * Keep track of assigned, disabled and rejected CPUs. Present assigned
56  * with 1 as CPU #0 is reserved for the boot CPU.
57  */
58 static struct {
59 	unsigned int		nr_assigned_cpus;
60 	unsigned int		nr_disabled_cpus;
61 	unsigned int		nr_rejected_cpus;
62 	u32			boot_cpu_apic_id;
63 	u32			real_bsp_apic_id;
64 } topo_info __ro_after_init = {
65 	.nr_assigned_cpus	= 1,
66 	.boot_cpu_apic_id	= BAD_APICID,
67 	.real_bsp_apic_id	= BAD_APICID,
68 };
69 
70 #define domain_weight(_dom)	bitmap_weight(apic_maps[_dom].map, MAX_LOCAL_APIC)
71 
72 bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
73 {
74 	return phys_id == (u64)cpuid_to_apicid[cpu];
75 }
76 
77 #ifdef CONFIG_SMP
78 static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid)
79 {
80 	if (!(apicid & (__max_threads_per_core - 1)))
81 		cpumask_set_cpu(cpu, &__cpu_primary_thread_mask);
82 }
83 #else
84 static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
85 #endif
86 
87 /*
88  * Convert the APIC ID to a domain level ID by masking out the low bits
89  * below the domain level @dom.
90  */
91 static inline u32 topo_apicid(u32 apicid, enum x86_topology_domains dom)
92 {
93 	if (dom == TOPO_SMT_DOMAIN)
94 		return apicid;
95 	return apicid & (UINT_MAX << x86_topo_system.dom_shifts[dom - 1]);
96 }
97 
98 static int topo_lookup_cpuid(u32 apic_id)
99 {
100 	int i;
101 
102 	/* CPU# to APICID mapping is persistent once it is established */
103 	for (i = 0; i < topo_info.nr_assigned_cpus; i++) {
104 		if (cpuid_to_apicid[i] == apic_id)
105 			return i;
106 	}
107 	return -ENODEV;
108 }
109 
110 static __init int topo_get_cpunr(u32 apic_id)
111 {
112 	int cpu = topo_lookup_cpuid(apic_id);
113 
114 	if (cpu >= 0)
115 		return cpu;
116 
117 	return topo_info.nr_assigned_cpus++;
118 }
119 
120 static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id)
121 {
122 #if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
123 	early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id;
124 	early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id;
125 #endif
126 	set_cpu_possible(cpu, true);
127 	set_cpu_present(cpu, true);
128 }
129 
130 static __init bool check_for_real_bsp(u32 apic_id)
131 {
132 	/*
133 	 * There is no real good way to detect whether this a kdump()
134 	 * kernel, but except on the Voyager SMP monstrosity which is not
135 	 * longer supported, the real BSP APIC ID is the first one which is
136 	 * enumerated by firmware. That allows to detect whether the boot
137 	 * CPU is the real BSP. If it is not, then do not register the APIC
138 	 * because sending INIT to the real BSP would reset the whole
139 	 * system.
140 	 *
141 	 * The first APIC ID which is enumerated by firmware is detectable
142 	 * because the boot CPU APIC ID is registered before that without
143 	 * invoking this code.
144 	 */
145 	if (topo_info.real_bsp_apic_id != BAD_APICID)
146 		return false;
147 
148 	if (apic_id == topo_info.boot_cpu_apic_id) {
149 		topo_info.real_bsp_apic_id = apic_id;
150 		return false;
151 	}
152 
153 	pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x > %x\n",
154 		topo_info.boot_cpu_apic_id, apic_id);
155 	pr_warn("Crash kernel detected. Disabling real BSP to prevent machine INIT\n");
156 
157 	topo_info.real_bsp_apic_id = apic_id;
158 	return true;
159 }
160 
161 static unsigned int topo_unit_count(u32 lvlid, enum x86_topology_domains at_level,
162 				    unsigned long *map)
163 {
164 	unsigned int id, end, cnt = 0;
165 
166 	/* Calculate the exclusive end */
167 	end = lvlid + (1U << x86_topo_system.dom_shifts[at_level]);
168 
169 	/* Unfortunately there is no bitmap_weight_range() */
170 	for (id = find_next_bit(map, end, lvlid); id < end; id = find_next_bit(map, end, ++id))
171 		cnt++;
172 	return cnt;
173 }
174 
175 static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
176 {
177 	int cpu, dom;
178 
179 	if (present) {
180 		set_bit(apic_id, phys_cpu_present_map);
181 
182 		/*
183 		 * Double registration is valid in case of the boot CPU
184 		 * APIC because that is registered before the enumeration
185 		 * of the APICs via firmware parsers or VM guest
186 		 * mechanisms.
187 		 */
188 		if (apic_id == topo_info.boot_cpu_apic_id)
189 			cpu = 0;
190 		else
191 			cpu = topo_get_cpunr(apic_id);
192 
193 		cpuid_to_apicid[cpu] = apic_id;
194 		topo_set_cpuids(cpu, apic_id, acpi_id);
195 	} else {
196 		u32 pkgid = topo_apicid(apic_id, TOPO_PKG_DOMAIN);
197 
198 		/*
199 		 * Check for present APICs in the same package when running
200 		 * on bare metal. Allow the bogosity in a guest.
201 		 */
202 		if (hypervisor_is_type(X86_HYPER_NATIVE) &&
203 		    topo_unit_count(pkgid, TOPO_PKG_DOMAIN, phys_cpu_present_map)) {
204 			pr_info_once("Ignoring hot-pluggable APIC ID %x in present package.\n",
205 				     apic_id);
206 			topo_info.nr_rejected_cpus++;
207 			return;
208 		}
209 
210 		topo_info.nr_disabled_cpus++;
211 	}
212 
213 	/* Register present and possible CPUs in the domain maps */
214 	for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++)
215 		set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map);
216 }
217 
218 /**
219  * topology_register_apic - Register an APIC in early topology maps
220  * @apic_id:	The APIC ID to set up
221  * @acpi_id:	The ACPI ID associated to the APIC
222  * @present:	True if the corresponding CPU is present
223  */
224 void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present)
225 {
226 	if (apic_id >= MAX_LOCAL_APIC) {
227 		pr_err_once("APIC ID %x exceeds kernel limit of: %x\n", apic_id, MAX_LOCAL_APIC - 1);
228 		topo_info.nr_rejected_cpus++;
229 		return;
230 	}
231 
232 	if (check_for_real_bsp(apic_id)) {
233 		topo_info.nr_rejected_cpus++;
234 		return;
235 	}
236 
237 	/* CPU numbers exhausted? */
238 	if (apic_id != topo_info.boot_cpu_apic_id && topo_info.nr_assigned_cpus >= nr_cpu_ids) {
239 		pr_warn_once("CPU limit of %d reached. Ignoring further CPUs\n", nr_cpu_ids);
240 		topo_info.nr_rejected_cpus++;
241 		return;
242 	}
243 
244 	topo_register_apic(apic_id, acpi_id, present);
245 }
246 
247 /**
248  * topology_register_boot_apic - Register the boot CPU APIC
249  * @apic_id:	The APIC ID to set up
250  *
251  * Separate so CPU #0 can be assigned
252  */
253 void __init topology_register_boot_apic(u32 apic_id)
254 {
255 	WARN_ON_ONCE(topo_info.boot_cpu_apic_id != BAD_APICID);
256 
257 	topo_info.boot_cpu_apic_id = apic_id;
258 	topo_register_apic(apic_id, CPU_ACPIID_INVALID, true);
259 }
260 
261 /**
262  * topology_get_logical_id - Retrieve the logical ID at a given topology domain level
263  * @apicid:		The APIC ID for which to lookup the logical ID
264  * @at_level:		The topology domain level to use
265  *
266  * @apicid must be a full APIC ID, not the normalized variant. It's valid to have
267  * all bits below the domain level specified by @at_level to be clear. So both
268  * real APIC IDs and backshifted normalized APIC IDs work correctly.
269  *
270  * Returns:
271  *  - >= 0:	The requested logical ID
272  *  - -ERANGE:	@apicid is out of range
273  *  - -ENODEV:	@apicid is not registered
274  */
275 int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level)
276 {
277 	/* Remove the bits below @at_level to get the proper level ID of @apicid */
278 	unsigned int lvlid = topo_apicid(apicid, at_level);
279 
280 	if (lvlid >= MAX_LOCAL_APIC)
281 		return -ERANGE;
282 	if (!test_bit(lvlid, apic_maps[at_level].map))
283 		return -ENODEV;
284 	/* Get the number of set bits before @lvlid. */
285 	return bitmap_weight(apic_maps[at_level].map, lvlid);
286 }
287 EXPORT_SYMBOL_GPL(topology_get_logical_id);
288 
289 /**
290  * topology_unit_count - Retrieve the count of specified units at a given topology domain level
291  * @apicid:		The APIC ID which specifies the search range
292  * @which_units:	The domain level specifying the units to count
293  * @at_level:		The domain level at which @which_units have to be counted
294  *
295  * This returns the number of possible units according to the enumerated
296  * information.
297  *
298  * E.g. topology_count_units(apicid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN)
299  * counts the number of possible cores in the package to which @apicid
300  * belongs.
301  *
302  * @at_level must obviously be greater than @which_level to produce useful
303  * results.  If @at_level is equal to @which_units the result is
304  * unsurprisingly 1. If @at_level is less than @which_units the results
305  * is by definition undefined and the function returns 0.
306  */
307 unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_units,
308 				 enum x86_topology_domains at_level)
309 {
310 	/* Remove the bits below @at_level to get the proper level ID of @apicid */
311 	unsigned int lvlid = topo_apicid(apicid, at_level);
312 
313 	if (lvlid >= MAX_LOCAL_APIC)
314 		return 0;
315 	if (!test_bit(lvlid, apic_maps[at_level].map))
316 		return 0;
317 	if (which_units > at_level)
318 		return 0;
319 	if (which_units == at_level)
320 		return 1;
321 	return topo_unit_count(lvlid, at_level, apic_maps[which_units].map);
322 }
323 
324 #ifdef CONFIG_ACPI_HOTPLUG_CPU
325 /**
326  * topology_hotplug_apic - Handle a physical hotplugged APIC after boot
327  * @apic_id:	The APIC ID to set up
328  * @acpi_id:	The ACPI ID associated to the APIC
329  */
330 int topology_hotplug_apic(u32 apic_id, u32 acpi_id)
331 {
332 	int cpu;
333 
334 	if (apic_id >= MAX_LOCAL_APIC)
335 		return -EINVAL;
336 
337 	/* Reject if the APIC ID was not registered during enumeration. */
338 	if (!test_bit(apic_id, apic_maps[TOPO_SMT_DOMAIN].map))
339 		return -ENODEV;
340 
341 	cpu = topo_lookup_cpuid(apic_id);
342 	if (cpu < 0)
343 		return -ENOSPC;
344 
345 	set_bit(apic_id, phys_cpu_present_map);
346 	topo_set_cpuids(cpu, apic_id, acpi_id);
347 	cpu_mark_primary_thread(cpu, apic_id);
348 	return cpu;
349 }
350 
351 /**
352  * topology_hotunplug_apic - Remove a physical hotplugged APIC after boot
353  * @cpu:	The CPU number for which the APIC ID is removed
354  */
355 void topology_hotunplug_apic(unsigned int cpu)
356 {
357 	u32 apic_id = cpuid_to_apicid[cpu];
358 
359 	if (apic_id == BAD_APICID)
360 		return;
361 
362 	per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
363 	clear_bit(apic_id, phys_cpu_present_map);
364 	set_cpu_present(cpu, false);
365 }
366 #endif
367 
368 #ifdef CONFIG_X86_LOCAL_APIC
369 static unsigned int max_possible_cpus __initdata = NR_CPUS;
370 
371 /**
372  * topology_apply_cmdline_limits_early - Apply topology command line limits early
373  *
374  * Ensure that command line limits are in effect before firmware parsing
375  * takes place.
376  */
377 void __init topology_apply_cmdline_limits_early(void)
378 {
379 	unsigned int possible = nr_cpu_ids;
380 
381 	/* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */
382 	if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled)
383 		possible = 1;
384 
385 	/* 'possible_cpus=N' */
386 	possible = min_t(unsigned int, max_possible_cpus, possible);
387 
388 	if (possible < nr_cpu_ids) {
389 		pr_info("Limiting to %u possible CPUs\n", possible);
390 		set_nr_cpu_ids(possible);
391 	}
392 }
393 
394 static __init bool restrict_to_up(void)
395 {
396 	if (!smp_found_config || ioapic_is_disabled)
397 		return true;
398 	/*
399 	 * XEN PV is special as it does not advertise the local APIC
400 	 * properly, but provides a fake topology for it so that the
401 	 * infrastructure works. So don't apply the restrictions vs. APIC
402 	 * here.
403 	 */
404 	if (xen_pv_domain())
405 		return false;
406 
407 	return apic_is_disabled;
408 }
409 
410 void __init topology_init_possible_cpus(void)
411 {
412 	unsigned int assigned = topo_info.nr_assigned_cpus;
413 	unsigned int disabled = topo_info.nr_disabled_cpus;
414 	unsigned int cnta, cntb, cpu, allowed = 1;
415 	unsigned int total = assigned + disabled;
416 	u32 apicid, firstid;
417 
418 	if (!restrict_to_up()) {
419 		if (WARN_ON_ONCE(assigned > nr_cpu_ids)) {
420 			disabled += assigned - nr_cpu_ids;
421 			assigned = nr_cpu_ids;
422 		}
423 		allowed = min_t(unsigned int, total, nr_cpu_ids);
424 	}
425 
426 	if (total > allowed)
427 		pr_warn("%u possible CPUs exceed the limit of %u\n", total, allowed);
428 
429 	assigned = min_t(unsigned int, allowed, assigned);
430 	disabled = allowed - assigned;
431 
432 	topo_info.nr_assigned_cpus = assigned;
433 	topo_info.nr_disabled_cpus = disabled;
434 
435 	total_cpus = allowed;
436 	set_nr_cpu_ids(allowed);
437 
438 	cnta = domain_weight(TOPO_PKG_DOMAIN);
439 	cntb = domain_weight(TOPO_DIE_DOMAIN);
440 	__max_logical_packages = cnta;
441 	__max_dies_per_package = 1U << (get_count_order(cntb) - get_count_order(cnta));
442 
443 	pr_info("Max. logical packages: %3u\n", cnta);
444 	pr_info("Max. logical dies:     %3u\n", cntb);
445 	pr_info("Max. dies per package: %3u\n", __max_dies_per_package);
446 
447 	cnta = domain_weight(TOPO_CORE_DOMAIN);
448 	cntb = domain_weight(TOPO_SMT_DOMAIN);
449 	/*
450 	 * Can't use order delta here as order(cnta) can be equal
451 	 * order(cntb) even if cnta != cntb.
452 	 */
453 	__max_threads_per_core = DIV_ROUND_UP(cntb, cnta);
454 	pr_info("Max. threads per core: %3u\n", __max_threads_per_core);
455 
456 	firstid = find_first_bit(apic_maps[TOPO_SMT_DOMAIN].map, MAX_LOCAL_APIC);
457 	__num_cores_per_package = topology_unit_count(firstid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN);
458 	pr_info("Num. cores per package:   %3u\n", __num_cores_per_package);
459 	__num_threads_per_package = topology_unit_count(firstid, TOPO_SMT_DOMAIN, TOPO_PKG_DOMAIN);
460 	pr_info("Num. threads per package: %3u\n", __num_threads_per_package);
461 
462 	pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled);
463 	if (topo_info.nr_rejected_cpus)
464 		pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus);
465 
466 	init_cpu_present(cpumask_of(0));
467 	init_cpu_possible(cpumask_of(0));
468 
469 	/* Assign CPU numbers to non-present CPUs */
470 	for (apicid = 0; disabled; disabled--, apicid++) {
471 		apicid = find_next_andnot_bit(apic_maps[TOPO_SMT_DOMAIN].map, phys_cpu_present_map,
472 					      MAX_LOCAL_APIC, apicid);
473 		if (apicid >= MAX_LOCAL_APIC)
474 			break;
475 		cpuid_to_apicid[topo_info.nr_assigned_cpus++] = apicid;
476 	}
477 
478 	for (cpu = 0; cpu < allowed; cpu++) {
479 		apicid = cpuid_to_apicid[cpu];
480 
481 		set_cpu_possible(cpu, true);
482 
483 		if (apicid == BAD_APICID)
484 			continue;
485 
486 		cpu_mark_primary_thread(cpu, apicid);
487 		set_cpu_present(cpu, test_bit(apicid, phys_cpu_present_map));
488 	}
489 }
490 
491 /*
492  * Late SMP disable after sizing CPU masks when APIC/IOAPIC setup failed.
493  */
494 void __init topology_reset_possible_cpus_up(void)
495 {
496 	init_cpu_present(cpumask_of(0));
497 	init_cpu_possible(cpumask_of(0));
498 
499 	bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC);
500 	if (topo_info.boot_cpu_apic_id != BAD_APICID)
501 		set_bit(topo_info.boot_cpu_apic_id, phys_cpu_present_map);
502 }
503 
504 static int __init setup_possible_cpus(char *str)
505 {
506 	get_option(&str, &max_possible_cpus);
507 	return 0;
508 }
509 early_param("possible_cpus", setup_possible_cpus);
510 #endif
511