1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * CPU/APIC topology 4 * 5 * The APIC IDs describe the system topology in multiple domain levels. 6 * The CPUID topology parser provides the information which part of the 7 * APIC ID is associated to the individual levels: 8 * 9 * [PACKAGE][DIEGRP][DIE][TILE][MODULE][CORE][THREAD] 10 * 11 * The root space contains the package (socket) IDs. 12 * 13 * Not enumerated levels consume 0 bits space, but conceptually they are 14 * always represented. If e.g. only CORE and THREAD levels are enumerated 15 * then the DIE, MODULE and TILE have the same physical ID as the PACKAGE. 16 * 17 * If SMT is not supported, then the THREAD domain is still used. It then 18 * has the same physical ID as the CORE domain and is the only child of 19 * the core domain. 20 * 21 * This allows a unified view on the system independent of the enumerated 22 * domain levels without requiring any conditionals in the code. 23 */ 24 #define pr_fmt(fmt) "CPU topo: " fmt 25 #include <linux/cpu.h> 26 27 #include <xen/xen.h> 28 29 #include <asm/apic.h> 30 #include <asm/hypervisor.h> 31 #include <asm/io_apic.h> 32 #include <asm/mpspec.h> 33 #include <asm/msr.h> 34 #include <asm/smp.h> 35 36 #include "cpu.h" 37 38 /* 39 * Map cpu index to physical APIC ID 40 */ 41 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid, BAD_APICID); 42 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, CPU_ACPIID_INVALID); 43 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); 44 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); 45 46 /* Bitmap of physically present CPUs. */ 47 DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly; 48 49 /* Used for CPU number allocation and parallel CPU bringup */ 50 u32 cpuid_to_apicid[] __ro_after_init = { [0 ... NR_CPUS - 1] = BAD_APICID, }; 51 52 /* Bitmaps to mark registered APICs at each topology domain */ 53 static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init; 54 55 /* 56 * Keep track of assigned, disabled and rejected CPUs. Present assigned 57 * with 1 as CPU #0 is reserved for the boot CPU. 58 */ 59 static struct { 60 unsigned int nr_assigned_cpus; 61 unsigned int nr_disabled_cpus; 62 unsigned int nr_rejected_cpus; 63 u32 boot_cpu_apic_id; 64 u32 real_bsp_apic_id; 65 } topo_info __ro_after_init = { 66 .nr_assigned_cpus = 1, 67 .boot_cpu_apic_id = BAD_APICID, 68 .real_bsp_apic_id = BAD_APICID, 69 }; 70 71 #define domain_weight(_dom) bitmap_weight(apic_maps[_dom].map, MAX_LOCAL_APIC) 72 73 bool arch_match_cpu_phys_id(int cpu, u64 phys_id) 74 { 75 return phys_id == (u64)cpuid_to_apicid[cpu]; 76 } 77 78 #ifdef CONFIG_SMP 79 static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) 80 { 81 if (!(apicid & (__max_threads_per_core - 1))) 82 cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); 83 } 84 #else 85 static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } 86 #endif 87 88 /* 89 * Convert the APIC ID to a domain level ID by masking out the low bits 90 * below the domain level @dom. 91 */ 92 static inline u32 topo_apicid(u32 apicid, enum x86_topology_domains dom) 93 { 94 if (dom == TOPO_SMT_DOMAIN) 95 return apicid; 96 return apicid & (UINT_MAX << x86_topo_system.dom_shifts[dom - 1]); 97 } 98 99 static int topo_lookup_cpuid(u32 apic_id) 100 { 101 int i; 102 103 /* CPU# to APICID mapping is persistent once it is established */ 104 for (i = 0; i < topo_info.nr_assigned_cpus; i++) { 105 if (cpuid_to_apicid[i] == apic_id) 106 return i; 107 } 108 return -ENODEV; 109 } 110 111 static __init int topo_get_cpunr(u32 apic_id) 112 { 113 int cpu = topo_lookup_cpuid(apic_id); 114 115 if (cpu >= 0) 116 return cpu; 117 118 return topo_info.nr_assigned_cpus++; 119 } 120 121 static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) 122 { 123 #if defined(CONFIG_SMP) || defined(CONFIG_X86_64) 124 early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id; 125 early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; 126 #endif 127 set_cpu_present(cpu, true); 128 } 129 130 static __init bool check_for_real_bsp(u32 apic_id) 131 { 132 bool is_bsp = false, has_apic_base = boot_cpu_data.x86 >= 6; 133 u64 msr; 134 135 /* 136 * There is no real good way to detect whether this a kdump() 137 * kernel, but except on the Voyager SMP monstrosity which is not 138 * longer supported, the real BSP APIC ID is the first one which is 139 * enumerated by firmware. That allows to detect whether the boot 140 * CPU is the real BSP. If it is not, then do not register the APIC 141 * because sending INIT to the real BSP would reset the whole 142 * system. 143 * 144 * The first APIC ID which is enumerated by firmware is detectable 145 * because the boot CPU APIC ID is registered before that without 146 * invoking this code. 147 */ 148 if (topo_info.real_bsp_apic_id != BAD_APICID) 149 return false; 150 151 /* 152 * Check whether the enumeration order is broken by evaluating the 153 * BSP bit in the APICBASE MSR. If the CPU does not have the 154 * APICBASE MSR then the BSP detection is not possible and the 155 * kernel must rely on the firmware enumeration order. 156 */ 157 if (has_apic_base) { 158 rdmsrq(MSR_IA32_APICBASE, msr); 159 is_bsp = !!(msr & MSR_IA32_APICBASE_BSP); 160 } 161 162 if (apic_id == topo_info.boot_cpu_apic_id) { 163 /* 164 * If the boot CPU has the APIC BSP bit set then the 165 * firmware enumeration is agreeing. If the CPU does not 166 * have the APICBASE MSR then the only choice is to trust 167 * the enumeration order. 168 */ 169 if (is_bsp || !has_apic_base) { 170 topo_info.real_bsp_apic_id = apic_id; 171 return false; 172 } 173 /* 174 * If the boot APIC is enumerated first, but the APICBASE 175 * MSR does not have the BSP bit set, then there is no way 176 * to discover the real BSP here. Assume a crash kernel and 177 * limit the number of CPUs to 1 as an INIT to the real BSP 178 * would reset the machine. 179 */ 180 pr_warn("Enumerated BSP APIC %x is not marked in APICBASE MSR\n", apic_id); 181 pr_warn("Assuming crash kernel. Limiting to one CPU to prevent machine INIT\n"); 182 set_nr_cpu_ids(1); 183 goto fwbug; 184 } 185 186 pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x != %x\n", 187 topo_info.boot_cpu_apic_id, apic_id); 188 189 if (is_bsp) { 190 /* 191 * The boot CPU has the APIC BSP bit set. Use it and complain 192 * about the broken firmware enumeration. 193 */ 194 topo_info.real_bsp_apic_id = topo_info.boot_cpu_apic_id; 195 goto fwbug; 196 } 197 198 pr_warn("Crash kernel detected. Disabling real BSP to prevent machine INIT\n"); 199 200 topo_info.real_bsp_apic_id = apic_id; 201 return true; 202 203 fwbug: 204 pr_warn(FW_BUG "APIC enumeration order not specification compliant\n"); 205 return false; 206 } 207 208 static unsigned int topo_unit_count(u32 lvlid, enum x86_topology_domains at_level, 209 unsigned long *map) 210 { 211 unsigned int id, end, cnt = 0; 212 213 /* Calculate the exclusive end */ 214 end = lvlid + (1U << x86_topo_system.dom_shifts[at_level]); 215 216 /* Unfortunately there is no bitmap_weight_range() */ 217 for (id = find_next_bit(map, end, lvlid); id < end; id = find_next_bit(map, end, ++id)) 218 cnt++; 219 return cnt; 220 } 221 222 static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) 223 { 224 int cpu, dom; 225 226 if (present) { 227 set_bit(apic_id, phys_cpu_present_map); 228 229 /* 230 * Double registration is valid in case of the boot CPU 231 * APIC because that is registered before the enumeration 232 * of the APICs via firmware parsers or VM guest 233 * mechanisms. 234 */ 235 if (apic_id == topo_info.boot_cpu_apic_id) 236 cpu = 0; 237 else 238 cpu = topo_get_cpunr(apic_id); 239 240 cpuid_to_apicid[cpu] = apic_id; 241 topo_set_cpuids(cpu, apic_id, acpi_id); 242 } else { 243 u32 pkgid = topo_apicid(apic_id, TOPO_PKG_DOMAIN); 244 245 /* 246 * Check for present APICs in the same package when running 247 * on bare metal. Allow the bogosity in a guest. 248 */ 249 if (hypervisor_is_type(X86_HYPER_NATIVE) && 250 topo_unit_count(pkgid, TOPO_PKG_DOMAIN, phys_cpu_present_map)) { 251 pr_info_once("Ignoring hot-pluggable APIC ID %x in present package.\n", 252 apic_id); 253 topo_info.nr_rejected_cpus++; 254 return; 255 } 256 257 topo_info.nr_disabled_cpus++; 258 } 259 260 /* 261 * Register present and possible CPUs in the domain 262 * maps. cpu_possible_map will be updated in 263 * topology_init_possible_cpus() after enumeration is done. 264 */ 265 for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) 266 set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map); 267 } 268 269 /** 270 * topology_register_apic - Register an APIC in early topology maps 271 * @apic_id: The APIC ID to set up 272 * @acpi_id: The ACPI ID associated to the APIC 273 * @present: True if the corresponding CPU is present 274 */ 275 void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present) 276 { 277 if (apic_id >= MAX_LOCAL_APIC) { 278 pr_err_once("APIC ID %x exceeds kernel limit of: %x\n", apic_id, MAX_LOCAL_APIC - 1); 279 topo_info.nr_rejected_cpus++; 280 return; 281 } 282 283 if (check_for_real_bsp(apic_id)) { 284 topo_info.nr_rejected_cpus++; 285 return; 286 } 287 288 /* CPU numbers exhausted? */ 289 if (apic_id != topo_info.boot_cpu_apic_id && topo_info.nr_assigned_cpus >= nr_cpu_ids) { 290 pr_warn_once("CPU limit of %d reached. Ignoring further CPUs\n", nr_cpu_ids); 291 topo_info.nr_rejected_cpus++; 292 return; 293 } 294 295 topo_register_apic(apic_id, acpi_id, present); 296 } 297 298 /** 299 * topology_register_boot_apic - Register the boot CPU APIC 300 * @apic_id: The APIC ID to set up 301 * 302 * Separate so CPU #0 can be assigned 303 */ 304 void __init topology_register_boot_apic(u32 apic_id) 305 { 306 WARN_ON_ONCE(topo_info.boot_cpu_apic_id != BAD_APICID); 307 308 topo_info.boot_cpu_apic_id = apic_id; 309 topo_register_apic(apic_id, CPU_ACPIID_INVALID, true); 310 } 311 312 /** 313 * topology_get_logical_id - Retrieve the logical ID at a given topology domain level 314 * @apicid: The APIC ID for which to lookup the logical ID 315 * @at_level: The topology domain level to use 316 * 317 * @apicid must be a full APIC ID, not the normalized variant. It's valid to have 318 * all bits below the domain level specified by @at_level to be clear. So both 319 * real APIC IDs and backshifted normalized APIC IDs work correctly. 320 * 321 * Returns: 322 * - >= 0: The requested logical ID 323 * - -ERANGE: @apicid is out of range 324 * - -ENODEV: @apicid is not registered 325 */ 326 int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level) 327 { 328 /* Remove the bits below @at_level to get the proper level ID of @apicid */ 329 unsigned int lvlid = topo_apicid(apicid, at_level); 330 331 if (lvlid >= MAX_LOCAL_APIC) 332 return -ERANGE; 333 if (!test_bit(lvlid, apic_maps[at_level].map)) 334 return -ENODEV; 335 /* Get the number of set bits before @lvlid. */ 336 return bitmap_weight(apic_maps[at_level].map, lvlid); 337 } 338 EXPORT_SYMBOL_GPL(topology_get_logical_id); 339 340 /** 341 * topology_unit_count - Retrieve the count of specified units at a given topology domain level 342 * @apicid: The APIC ID which specifies the search range 343 * @which_units: The domain level specifying the units to count 344 * @at_level: The domain level at which @which_units have to be counted 345 * 346 * This returns the number of possible units according to the enumerated 347 * information. 348 * 349 * E.g. topology_count_units(apicid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN) 350 * counts the number of possible cores in the package to which @apicid 351 * belongs. 352 * 353 * @at_level must obviously be greater than @which_level to produce useful 354 * results. If @at_level is equal to @which_units the result is 355 * unsurprisingly 1. If @at_level is less than @which_units the results 356 * is by definition undefined and the function returns 0. 357 */ 358 unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_units, 359 enum x86_topology_domains at_level) 360 { 361 /* Remove the bits below @at_level to get the proper level ID of @apicid */ 362 unsigned int lvlid = topo_apicid(apicid, at_level); 363 364 if (lvlid >= MAX_LOCAL_APIC) 365 return 0; 366 if (!test_bit(lvlid, apic_maps[at_level].map)) 367 return 0; 368 if (which_units > at_level) 369 return 0; 370 if (which_units == at_level) 371 return 1; 372 return topo_unit_count(lvlid, at_level, apic_maps[which_units].map); 373 } 374 375 #ifdef CONFIG_ACPI_HOTPLUG_CPU 376 /** 377 * topology_hotplug_apic - Handle a physical hotplugged APIC after boot 378 * @apic_id: The APIC ID to set up 379 * @acpi_id: The ACPI ID associated to the APIC 380 */ 381 int topology_hotplug_apic(u32 apic_id, u32 acpi_id) 382 { 383 int cpu; 384 385 if (apic_id >= MAX_LOCAL_APIC) 386 return -EINVAL; 387 388 /* Reject if the APIC ID was not registered during enumeration. */ 389 if (!test_bit(apic_id, apic_maps[TOPO_SMT_DOMAIN].map)) 390 return -ENODEV; 391 392 cpu = topo_lookup_cpuid(apic_id); 393 if (cpu < 0) 394 return -ENOSPC; 395 396 set_bit(apic_id, phys_cpu_present_map); 397 topo_set_cpuids(cpu, apic_id, acpi_id); 398 cpu_mark_primary_thread(cpu, apic_id); 399 return cpu; 400 } 401 402 /** 403 * topology_hotunplug_apic - Remove a physical hotplugged APIC after boot 404 * @cpu: The CPU number for which the APIC ID is removed 405 */ 406 void topology_hotunplug_apic(unsigned int cpu) 407 { 408 u32 apic_id = cpuid_to_apicid[cpu]; 409 410 if (apic_id == BAD_APICID) 411 return; 412 413 per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; 414 clear_bit(apic_id, phys_cpu_present_map); 415 set_cpu_present(cpu, false); 416 } 417 #endif 418 419 #ifdef CONFIG_X86_LOCAL_APIC 420 static unsigned int max_possible_cpus __initdata = NR_CPUS; 421 422 /** 423 * topology_apply_cmdline_limits_early - Apply topology command line limits early 424 * 425 * Ensure that command line limits are in effect before firmware parsing 426 * takes place. 427 */ 428 void __init topology_apply_cmdline_limits_early(void) 429 { 430 unsigned int possible = nr_cpu_ids; 431 432 /* 'maxcpus=0' 'nosmp' 'nolapic' */ 433 if (!setup_max_cpus || apic_is_disabled) 434 possible = 1; 435 436 /* 'possible_cpus=N' */ 437 possible = min_t(unsigned int, max_possible_cpus, possible); 438 439 if (possible < nr_cpu_ids) { 440 pr_info("Limiting to %u possible CPUs\n", possible); 441 set_nr_cpu_ids(possible); 442 } 443 } 444 445 static __init bool restrict_to_up(void) 446 { 447 if (!smp_found_config) 448 return true; 449 /* 450 * XEN PV is special as it does not advertise the local APIC 451 * properly, but provides a fake topology for it so that the 452 * infrastructure works. So don't apply the restrictions vs. APIC 453 * here. 454 */ 455 if (xen_pv_domain()) 456 return false; 457 458 return apic_is_disabled; 459 } 460 461 void __init topology_init_possible_cpus(void) 462 { 463 unsigned int assigned = topo_info.nr_assigned_cpus; 464 unsigned int disabled = topo_info.nr_disabled_cpus; 465 unsigned int cnta, cntb, cpu, allowed = 1; 466 unsigned int total = assigned + disabled; 467 u32 apicid, firstid; 468 469 /* 470 * If there was no APIC registered, then fake one so that the 471 * topology bitmap is populated. That ensures that the code below 472 * is valid and the various query interfaces can be used 473 * unconditionally. This does not affect the actual APIC code in 474 * any way because either the local APIC address has not been 475 * registered or the local APIC was disabled on the command line. 476 */ 477 if (topo_info.boot_cpu_apic_id == BAD_APICID) 478 topology_register_boot_apic(0); 479 480 if (!restrict_to_up()) { 481 if (WARN_ON_ONCE(assigned > nr_cpu_ids)) { 482 disabled += assigned - nr_cpu_ids; 483 assigned = nr_cpu_ids; 484 } 485 allowed = min_t(unsigned int, total, nr_cpu_ids); 486 } 487 488 if (total > allowed) 489 pr_warn("%u possible CPUs exceed the limit of %u\n", total, allowed); 490 491 assigned = min_t(unsigned int, allowed, assigned); 492 disabled = allowed - assigned; 493 494 topo_info.nr_assigned_cpus = assigned; 495 topo_info.nr_disabled_cpus = disabled; 496 497 total_cpus = allowed; 498 set_nr_cpu_ids(allowed); 499 500 cnta = domain_weight(TOPO_PKG_DOMAIN); 501 cntb = domain_weight(TOPO_DIE_DOMAIN); 502 __max_logical_packages = cnta; 503 __max_dies_per_package = 1U << (get_count_order(cntb) - get_count_order(cnta)); 504 505 pr_info("Max. logical packages: %3u\n", cnta); 506 pr_info("Max. logical dies: %3u\n", cntb); 507 pr_info("Max. dies per package: %3u\n", __max_dies_per_package); 508 509 cnta = domain_weight(TOPO_CORE_DOMAIN); 510 cntb = domain_weight(TOPO_SMT_DOMAIN); 511 /* 512 * Can't use order delta here as order(cnta) can be equal 513 * order(cntb) even if cnta != cntb. 514 */ 515 __max_threads_per_core = DIV_ROUND_UP(cntb, cnta); 516 pr_info("Max. threads per core: %3u\n", __max_threads_per_core); 517 518 firstid = find_first_bit(apic_maps[TOPO_SMT_DOMAIN].map, MAX_LOCAL_APIC); 519 __num_cores_per_package = topology_unit_count(firstid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN); 520 pr_info("Num. cores per package: %3u\n", __num_cores_per_package); 521 __num_threads_per_package = topology_unit_count(firstid, TOPO_SMT_DOMAIN, TOPO_PKG_DOMAIN); 522 pr_info("Num. threads per package: %3u\n", __num_threads_per_package); 523 524 pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled); 525 if (topo_info.nr_rejected_cpus) 526 pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus); 527 528 init_cpu_present(cpumask_of(0)); 529 init_cpu_possible(cpumask_of(0)); 530 531 /* Assign CPU numbers to non-present CPUs */ 532 for (apicid = 0; disabled; disabled--, apicid++) { 533 apicid = find_next_andnot_bit(apic_maps[TOPO_SMT_DOMAIN].map, phys_cpu_present_map, 534 MAX_LOCAL_APIC, apicid); 535 if (apicid >= MAX_LOCAL_APIC) 536 break; 537 cpuid_to_apicid[topo_info.nr_assigned_cpus++] = apicid; 538 } 539 540 for (cpu = 0; cpu < allowed; cpu++) { 541 apicid = cpuid_to_apicid[cpu]; 542 543 set_cpu_possible(cpu, true); 544 545 if (apicid == BAD_APICID) 546 continue; 547 548 cpu_mark_primary_thread(cpu, apicid); 549 set_cpu_present(cpu, test_bit(apicid, phys_cpu_present_map)); 550 } 551 } 552 553 /* 554 * Late SMP disable after sizing CPU masks when APIC/IOAPIC setup failed. 555 */ 556 void __init topology_reset_possible_cpus_up(void) 557 { 558 init_cpu_present(cpumask_of(0)); 559 init_cpu_possible(cpumask_of(0)); 560 561 bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC); 562 if (topo_info.boot_cpu_apic_id != BAD_APICID) 563 set_bit(topo_info.boot_cpu_apic_id, phys_cpu_present_map); 564 } 565 566 static int __init setup_possible_cpus(char *str) 567 { 568 get_option(&str, &max_possible_cpus); 569 return 0; 570 } 571 early_param("possible_cpus", setup_possible_cpus); 572 #endif 573