1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * CPU/APIC topology 4 * 5 * The APIC IDs describe the system topology in multiple domain levels. 6 * The CPUID topology parser provides the information which part of the 7 * APIC ID is associated to the individual levels: 8 * 9 * [PACKAGE][DIEGRP][DIE][TILE][MODULE][CORE][THREAD] 10 * 11 * The root space contains the package (socket) IDs. 12 * 13 * Not enumerated levels consume 0 bits space, but conceptually they are 14 * always represented. If e.g. only CORE and THREAD levels are enumerated 15 * then the DIE, MODULE and TILE have the same physical ID as the PACKAGE. 16 * 17 * If SMT is not supported, then the THREAD domain is still used. It then 18 * has the same physical ID as the CORE domain and is the only child of 19 * the core domain. 20 * 21 * This allows a unified view on the system independent of the enumerated 22 * domain levels without requiring any conditionals in the code. 23 */ 24 #define pr_fmt(fmt) "CPU topo: " fmt 25 #include <linux/cpu.h> 26 27 #include <xen/xen.h> 28 29 #include <asm/apic.h> 30 #include <asm/io_apic.h> 31 #include <asm/mpspec.h> 32 #include <asm/msr.h> 33 #include <asm/smp.h> 34 35 #include "cpu.h" 36 37 /* 38 * Map cpu index to physical APIC ID 39 */ 40 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid, BAD_APICID); 41 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, CPU_ACPIID_INVALID); 42 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); 43 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); 44 45 /* Bitmap of physically present CPUs. */ 46 DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly; 47 48 /* Used for CPU number allocation and parallel CPU bringup */ 49 u32 cpuid_to_apicid[] __ro_after_init = { [0 ... NR_CPUS - 1] = BAD_APICID, }; 50 51 /* Bitmaps to mark registered APICs at each topology domain */ 52 static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init; 53 54 /* 55 * Keep track of assigned, disabled and rejected CPUs. Present assigned 56 * with 1 as CPU #0 is reserved for the boot CPU. 57 */ 58 static struct { 59 unsigned int nr_assigned_cpus; 60 unsigned int nr_disabled_cpus; 61 unsigned int nr_rejected_cpus; 62 u32 boot_cpu_apic_id; 63 u32 real_bsp_apic_id; 64 } topo_info __ro_after_init = { 65 .nr_assigned_cpus = 1, 66 .boot_cpu_apic_id = BAD_APICID, 67 .real_bsp_apic_id = BAD_APICID, 68 }; 69 70 #define domain_weight(_dom) bitmap_weight(apic_maps[_dom].map, MAX_LOCAL_APIC) 71 72 bool arch_match_cpu_phys_id(int cpu, u64 phys_id) 73 { 74 return phys_id == (u64)cpuid_to_apicid[cpu]; 75 } 76 77 static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) 78 { 79 if (!(apicid & (__max_threads_per_core - 1))) 80 cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); 81 } 82 83 /* 84 * Convert the APIC ID to a domain level ID by masking out the low bits 85 * below the domain level @dom. 86 */ 87 static inline u32 topo_apicid(u32 apicid, enum x86_topology_domains dom) 88 { 89 if (dom == TOPO_SMT_DOMAIN) 90 return apicid; 91 return apicid & (UINT_MAX << x86_topo_system.dom_shifts[dom - 1]); 92 } 93 94 static int topo_lookup_cpuid(u32 apic_id) 95 { 96 int i; 97 98 /* CPU# to APICID mapping is persistent once it is established */ 99 for (i = 0; i < topo_info.nr_assigned_cpus; i++) { 100 if (cpuid_to_apicid[i] == apic_id) 101 return i; 102 } 103 return -ENODEV; 104 } 105 106 static __init int topo_get_cpunr(u32 apic_id) 107 { 108 int cpu = topo_lookup_cpuid(apic_id); 109 110 if (cpu >= 0) 111 return cpu; 112 113 return topo_info.nr_assigned_cpus++; 114 } 115 116 static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) 117 { 118 #if defined(CONFIG_SMP) || defined(CONFIG_X86_64) 119 early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id; 120 early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; 121 #endif 122 set_cpu_present(cpu, true); 123 } 124 125 static __init bool check_for_real_bsp(u32 apic_id) 126 { 127 bool is_bsp = false, has_apic_base = boot_cpu_data.x86 >= 6; 128 u64 msr; 129 130 /* 131 * There is no real good way to detect whether this a kdump() 132 * kernel, but except on the Voyager SMP monstrosity which is not 133 * longer supported, the real BSP APIC ID is the first one which is 134 * enumerated by firmware. That allows to detect whether the boot 135 * CPU is the real BSP. If it is not, then do not register the APIC 136 * because sending INIT to the real BSP would reset the whole 137 * system. 138 * 139 * The first APIC ID which is enumerated by firmware is detectable 140 * because the boot CPU APIC ID is registered before that without 141 * invoking this code. 142 */ 143 if (topo_info.real_bsp_apic_id != BAD_APICID) 144 return false; 145 146 /* 147 * Check whether the enumeration order is broken by evaluating the 148 * BSP bit in the APICBASE MSR. If the CPU does not have the 149 * APICBASE MSR then the BSP detection is not possible and the 150 * kernel must rely on the firmware enumeration order. 151 */ 152 if (has_apic_base) { 153 rdmsrq(MSR_IA32_APICBASE, msr); 154 is_bsp = !!(msr & MSR_IA32_APICBASE_BSP); 155 } 156 157 if (apic_id == topo_info.boot_cpu_apic_id) { 158 /* 159 * If the boot CPU has the APIC BSP bit set then the 160 * firmware enumeration is agreeing. If the CPU does not 161 * have the APICBASE MSR then the only choice is to trust 162 * the enumeration order. 163 */ 164 if (is_bsp || !has_apic_base) { 165 topo_info.real_bsp_apic_id = apic_id; 166 return false; 167 } 168 /* 169 * If the boot APIC is enumerated first, but the APICBASE 170 * MSR does not have the BSP bit set, then there is no way 171 * to discover the real BSP here. Assume a crash kernel and 172 * limit the number of CPUs to 1 as an INIT to the real BSP 173 * would reset the machine. 174 */ 175 pr_warn("Enumerated BSP APIC %x is not marked in APICBASE MSR\n", apic_id); 176 pr_warn("Assuming crash kernel. Limiting to one CPU to prevent machine INIT\n"); 177 set_nr_cpu_ids(1); 178 goto fwbug; 179 } 180 181 pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x != %x\n", 182 topo_info.boot_cpu_apic_id, apic_id); 183 184 if (is_bsp) { 185 /* 186 * The boot CPU has the APIC BSP bit set. Use it and complain 187 * about the broken firmware enumeration. 188 */ 189 topo_info.real_bsp_apic_id = topo_info.boot_cpu_apic_id; 190 goto fwbug; 191 } 192 193 pr_warn("Crash kernel detected. Disabling real BSP to prevent machine INIT\n"); 194 195 topo_info.real_bsp_apic_id = apic_id; 196 return true; 197 198 fwbug: 199 pr_warn(FW_BUG "APIC enumeration order not specification compliant\n"); 200 return false; 201 } 202 203 static unsigned int topo_unit_count(u32 lvlid, enum x86_topology_domains at_level, 204 unsigned long *map) 205 { 206 unsigned int id, end, cnt = 0; 207 208 /* Calculate the exclusive end */ 209 end = lvlid + (1U << x86_topo_system.dom_shifts[at_level]); 210 211 /* Unfortunately there is no bitmap_weight_range() */ 212 for (id = find_next_bit(map, end, lvlid); id < end; id = find_next_bit(map, end, ++id)) 213 cnt++; 214 return cnt; 215 } 216 217 static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) 218 { 219 int cpu, dom; 220 221 if (present) { 222 set_bit(apic_id, phys_cpu_present_map); 223 224 /* 225 * Double registration is valid in case of the boot CPU 226 * APIC because that is registered before the enumeration 227 * of the APICs via firmware parsers or VM guest 228 * mechanisms. 229 */ 230 if (apic_id == topo_info.boot_cpu_apic_id) 231 cpu = 0; 232 else 233 cpu = topo_get_cpunr(apic_id); 234 235 cpuid_to_apicid[cpu] = apic_id; 236 topo_set_cpuids(cpu, apic_id, acpi_id); 237 } else { 238 topo_info.nr_disabled_cpus++; 239 } 240 241 /* 242 * Register present and possible CPUs in the domain 243 * maps. cpu_possible_map will be updated in 244 * topology_init_possible_cpus() after enumeration is done. 245 */ 246 for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) 247 set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map); 248 } 249 250 /** 251 * topology_register_apic - Register an APIC in early topology maps 252 * @apic_id: The APIC ID to set up 253 * @acpi_id: The ACPI ID associated to the APIC 254 * @present: True if the corresponding CPU is present 255 */ 256 void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present) 257 { 258 if (apic_id >= MAX_LOCAL_APIC) { 259 pr_err_once("APIC ID %x exceeds kernel limit of: %x\n", apic_id, MAX_LOCAL_APIC - 1); 260 topo_info.nr_rejected_cpus++; 261 return; 262 } 263 264 if (check_for_real_bsp(apic_id)) { 265 topo_info.nr_rejected_cpus++; 266 return; 267 } 268 269 /* CPU numbers exhausted? */ 270 if (apic_id != topo_info.boot_cpu_apic_id && topo_info.nr_assigned_cpus >= nr_cpu_ids) { 271 pr_warn_once("CPU limit of %d reached. Ignoring further CPUs\n", nr_cpu_ids); 272 topo_info.nr_rejected_cpus++; 273 return; 274 } 275 276 topo_register_apic(apic_id, acpi_id, present); 277 } 278 279 /** 280 * topology_register_boot_apic - Register the boot CPU APIC 281 * @apic_id: The APIC ID to set up 282 * 283 * Separate so CPU #0 can be assigned 284 */ 285 void __init topology_register_boot_apic(u32 apic_id) 286 { 287 WARN_ON_ONCE(topo_info.boot_cpu_apic_id != BAD_APICID); 288 289 topo_info.boot_cpu_apic_id = apic_id; 290 topo_register_apic(apic_id, CPU_ACPIID_INVALID, true); 291 } 292 293 /** 294 * topology_get_logical_id - Retrieve the logical ID at a given topology domain level 295 * @apicid: The APIC ID for which to lookup the logical ID 296 * @at_level: The topology domain level to use 297 * 298 * @apicid must be a full APIC ID, not the normalized variant. It's valid to have 299 * all bits below the domain level specified by @at_level to be clear. So both 300 * real APIC IDs and backshifted normalized APIC IDs work correctly. 301 * 302 * Returns: 303 * - >= 0: The requested logical ID 304 * - -ERANGE: @apicid is out of range 305 * - -ENODEV: @apicid is not registered 306 */ 307 int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level) 308 { 309 /* Remove the bits below @at_level to get the proper level ID of @apicid */ 310 unsigned int lvlid = topo_apicid(apicid, at_level); 311 312 if (lvlid >= MAX_LOCAL_APIC) 313 return -ERANGE; 314 if (!test_bit(lvlid, apic_maps[at_level].map)) 315 return -ENODEV; 316 /* Get the number of set bits before @lvlid. */ 317 return bitmap_weight(apic_maps[at_level].map, lvlid); 318 } 319 EXPORT_SYMBOL_GPL(topology_get_logical_id); 320 321 /** 322 * topology_unit_count - Retrieve the count of specified units at a given topology domain level 323 * @apicid: The APIC ID which specifies the search range 324 * @which_units: The domain level specifying the units to count 325 * @at_level: The domain level at which @which_units have to be counted 326 * 327 * This returns the number of possible units according to the enumerated 328 * information. 329 * 330 * E.g. topology_count_units(apicid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN) 331 * counts the number of possible cores in the package to which @apicid 332 * belongs. 333 * 334 * @at_level must obviously be greater than @which_level to produce useful 335 * results. If @at_level is equal to @which_units the result is 336 * unsurprisingly 1. If @at_level is less than @which_units the results 337 * is by definition undefined and the function returns 0. 338 */ 339 unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_units, 340 enum x86_topology_domains at_level) 341 { 342 /* Remove the bits below @at_level to get the proper level ID of @apicid */ 343 unsigned int lvlid = topo_apicid(apicid, at_level); 344 345 if (lvlid >= MAX_LOCAL_APIC) 346 return 0; 347 if (!test_bit(lvlid, apic_maps[at_level].map)) 348 return 0; 349 if (which_units > at_level) 350 return 0; 351 if (which_units == at_level) 352 return 1; 353 return topo_unit_count(lvlid, at_level, apic_maps[which_units].map); 354 } 355 356 #ifdef CONFIG_SMP 357 int topology_get_primary_thread(unsigned int cpu) 358 { 359 u32 apic_id = cpuid_to_apicid[cpu]; 360 361 /* 362 * Get the core domain level APIC id, which is the primary thread 363 * and return the CPU number assigned to it. 364 */ 365 return topo_lookup_cpuid(topo_apicid(apic_id, TOPO_CORE_DOMAIN)); 366 } 367 #endif 368 369 #ifdef CONFIG_ACPI_HOTPLUG_CPU 370 /** 371 * topology_hotplug_apic - Handle a physical hotplugged APIC after boot 372 * @apic_id: The APIC ID to set up 373 * @acpi_id: The ACPI ID associated to the APIC 374 */ 375 int topology_hotplug_apic(u32 apic_id, u32 acpi_id) 376 { 377 int cpu; 378 379 if (apic_id >= MAX_LOCAL_APIC) 380 return -EINVAL; 381 382 /* Reject if the APIC ID was not registered during enumeration. */ 383 if (!test_bit(apic_id, apic_maps[TOPO_SMT_DOMAIN].map)) 384 return -ENODEV; 385 386 cpu = topo_lookup_cpuid(apic_id); 387 if (cpu < 0) 388 return -ENOSPC; 389 390 set_bit(apic_id, phys_cpu_present_map); 391 topo_set_cpuids(cpu, apic_id, acpi_id); 392 cpu_mark_primary_thread(cpu, apic_id); 393 return cpu; 394 } 395 396 /** 397 * topology_hotunplug_apic - Remove a physical hotplugged APIC after boot 398 * @cpu: The CPU number for which the APIC ID is removed 399 */ 400 void topology_hotunplug_apic(unsigned int cpu) 401 { 402 u32 apic_id = cpuid_to_apicid[cpu]; 403 404 if (apic_id == BAD_APICID) 405 return; 406 407 per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; 408 clear_bit(apic_id, phys_cpu_present_map); 409 set_cpu_present(cpu, false); 410 } 411 #endif 412 413 #ifdef CONFIG_X86_LOCAL_APIC 414 static unsigned int max_possible_cpus __initdata = NR_CPUS; 415 416 /** 417 * topology_apply_cmdline_limits_early - Apply topology command line limits early 418 * 419 * Ensure that command line limits are in effect before firmware parsing 420 * takes place. 421 */ 422 void __init topology_apply_cmdline_limits_early(void) 423 { 424 unsigned int possible = nr_cpu_ids; 425 426 /* 'maxcpus=0' 'nosmp' 'nolapic' */ 427 if (!setup_max_cpus || apic_is_disabled) 428 possible = 1; 429 430 /* 'possible_cpus=N' */ 431 possible = min_t(unsigned int, max_possible_cpus, possible); 432 433 if (possible < nr_cpu_ids) { 434 pr_info("Limiting to %u possible CPUs\n", possible); 435 set_nr_cpu_ids(possible); 436 } 437 } 438 439 static __init bool restrict_to_up(void) 440 { 441 if (!smp_found_config) 442 return true; 443 /* 444 * XEN PV is special as it does not advertise the local APIC 445 * properly, but provides a fake topology for it so that the 446 * infrastructure works. So don't apply the restrictions vs. APIC 447 * here. 448 */ 449 if (xen_pv_domain()) 450 return false; 451 452 return apic_is_disabled; 453 } 454 455 void __init topology_init_possible_cpus(void) 456 { 457 unsigned int assigned = topo_info.nr_assigned_cpus; 458 unsigned int disabled = topo_info.nr_disabled_cpus; 459 unsigned int cnta, cntb, cpu, allowed = 1; 460 unsigned int total = assigned + disabled; 461 u32 apicid, firstid; 462 463 /* 464 * If there was no APIC registered, then fake one so that the 465 * topology bitmap is populated. That ensures that the code below 466 * is valid and the various query interfaces can be used 467 * unconditionally. This does not affect the actual APIC code in 468 * any way because either the local APIC address has not been 469 * registered or the local APIC was disabled on the command line. 470 */ 471 if (topo_info.boot_cpu_apic_id == BAD_APICID) 472 topology_register_boot_apic(0); 473 474 if (!restrict_to_up()) { 475 if (WARN_ON_ONCE(assigned > nr_cpu_ids)) { 476 disabled += assigned - nr_cpu_ids; 477 assigned = nr_cpu_ids; 478 } 479 allowed = min_t(unsigned int, total, nr_cpu_ids); 480 } 481 482 if (total > allowed) 483 pr_warn("%u possible CPUs exceed the limit of %u\n", total, allowed); 484 485 assigned = min_t(unsigned int, allowed, assigned); 486 disabled = allowed - assigned; 487 488 topo_info.nr_assigned_cpus = assigned; 489 topo_info.nr_disabled_cpus = disabled; 490 491 total_cpus = allowed; 492 set_nr_cpu_ids(allowed); 493 494 cnta = domain_weight(TOPO_PKG_DOMAIN); 495 cntb = domain_weight(TOPO_DIE_DOMAIN); 496 __max_logical_packages = cnta; 497 __max_dies_per_package = 1U << (get_count_order(cntb) - get_count_order(cnta)); 498 499 pr_info("Max. logical packages: %3u\n", cnta); 500 pr_info("Max. logical dies: %3u\n", cntb); 501 pr_info("Max. dies per package: %3u\n", __max_dies_per_package); 502 503 cnta = domain_weight(TOPO_CORE_DOMAIN); 504 cntb = domain_weight(TOPO_SMT_DOMAIN); 505 /* 506 * Can't use order delta here as order(cnta) can be equal 507 * order(cntb) even if cnta != cntb. 508 */ 509 __max_threads_per_core = DIV_ROUND_UP(cntb, cnta); 510 pr_info("Max. threads per core: %3u\n", __max_threads_per_core); 511 512 firstid = find_first_bit(apic_maps[TOPO_SMT_DOMAIN].map, MAX_LOCAL_APIC); 513 __num_cores_per_package = topology_unit_count(firstid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN); 514 pr_info("Num. cores per package: %3u\n", __num_cores_per_package); 515 __num_threads_per_package = topology_unit_count(firstid, TOPO_SMT_DOMAIN, TOPO_PKG_DOMAIN); 516 pr_info("Num. threads per package: %3u\n", __num_threads_per_package); 517 518 pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled); 519 if (topo_info.nr_rejected_cpus) 520 pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus); 521 522 init_cpu_present(cpumask_of(0)); 523 init_cpu_possible(cpumask_of(0)); 524 525 /* Assign CPU numbers to non-present CPUs */ 526 for (apicid = 0; disabled; disabled--, apicid++) { 527 apicid = find_next_andnot_bit(apic_maps[TOPO_SMT_DOMAIN].map, phys_cpu_present_map, 528 MAX_LOCAL_APIC, apicid); 529 if (apicid >= MAX_LOCAL_APIC) 530 break; 531 cpuid_to_apicid[topo_info.nr_assigned_cpus++] = apicid; 532 } 533 534 for (cpu = 0; cpu < allowed; cpu++) { 535 apicid = cpuid_to_apicid[cpu]; 536 537 set_cpu_possible(cpu, true); 538 539 if (apicid == BAD_APICID) 540 continue; 541 542 cpu_mark_primary_thread(cpu, apicid); 543 set_cpu_present(cpu, test_bit(apicid, phys_cpu_present_map)); 544 } 545 } 546 547 /* 548 * Late SMP disable after sizing CPU masks when APIC/IOAPIC setup failed. 549 */ 550 void __init topology_reset_possible_cpus_up(void) 551 { 552 init_cpu_present(cpumask_of(0)); 553 init_cpu_possible(cpumask_of(0)); 554 555 bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC); 556 if (topo_info.boot_cpu_apic_id != BAD_APICID) 557 set_bit(topo_info.boot_cpu_apic_id, phys_cpu_present_map); 558 } 559 560 static int __init setup_possible_cpus(char *str) 561 { 562 get_option(&str, &max_possible_cpus); 563 return 0; 564 } 565 early_param("possible_cpus", setup_possible_cpus); 566 #endif 567