1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * CPU/APIC topology 4 * 5 * The APIC IDs describe the system topology in multiple domain levels. 6 * The CPUID topology parser provides the information which part of the 7 * APIC ID is associated to the individual levels: 8 * 9 * [PACKAGE][DIEGRP][DIE][TILE][MODULE][CORE][THREAD] 10 * 11 * The root space contains the package (socket) IDs. 12 * 13 * Not enumerated levels consume 0 bits space, but conceptually they are 14 * always represented. If e.g. only CORE and THREAD levels are enumerated 15 * then the DIE, MODULE and TILE have the same physical ID as the PACKAGE. 16 * 17 * If SMT is not supported, then the THREAD domain is still used. It then 18 * has the same physical ID as the CORE domain and is the only child of 19 * the core domain. 20 * 21 * This allows a unified view on the system independent of the enumerated 22 * domain levels without requiring any conditionals in the code. 23 */ 24 #define pr_fmt(fmt) "CPU topo: " fmt 25 #include <linux/cpu.h> 26 27 #include <xen/xen.h> 28 29 #include <asm/apic.h> 30 #include <asm/hypervisor.h> 31 #include <asm/io_apic.h> 32 #include <asm/mpspec.h> 33 #include <asm/smp.h> 34 35 #include "cpu.h" 36 37 /* 38 * Map cpu index to physical APIC ID 39 */ 40 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid, BAD_APICID); 41 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, CPU_ACPIID_INVALID); 42 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); 43 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); 44 45 /* Bitmap of physically present CPUs. */ 46 DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly; 47 48 /* Used for CPU number allocation and parallel CPU bringup */ 49 u32 cpuid_to_apicid[] __ro_after_init = { [0 ... NR_CPUS - 1] = BAD_APICID, }; 50 51 /* Bitmaps to mark registered APICs at each topology domain */ 52 static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init; 53 54 /* 55 * Keep track of assigned, disabled and rejected CPUs. Present assigned 56 * with 1 as CPU #0 is reserved for the boot CPU. 57 */ 58 static struct { 59 unsigned int nr_assigned_cpus; 60 unsigned int nr_disabled_cpus; 61 unsigned int nr_rejected_cpus; 62 u32 boot_cpu_apic_id; 63 u32 real_bsp_apic_id; 64 } topo_info __ro_after_init = { 65 .nr_assigned_cpus = 1, 66 .boot_cpu_apic_id = BAD_APICID, 67 .real_bsp_apic_id = BAD_APICID, 68 }; 69 70 #define domain_weight(_dom) bitmap_weight(apic_maps[_dom].map, MAX_LOCAL_APIC) 71 72 bool arch_match_cpu_phys_id(int cpu, u64 phys_id) 73 { 74 return phys_id == (u64)cpuid_to_apicid[cpu]; 75 } 76 77 #ifdef CONFIG_SMP 78 static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) 79 { 80 if (!(apicid & (__max_threads_per_core - 1))) 81 cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); 82 } 83 #else 84 static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } 85 #endif 86 87 /* 88 * Convert the APIC ID to a domain level ID by masking out the low bits 89 * below the domain level @dom. 90 */ 91 static inline u32 topo_apicid(u32 apicid, enum x86_topology_domains dom) 92 { 93 if (dom == TOPO_SMT_DOMAIN) 94 return apicid; 95 return apicid & (UINT_MAX << x86_topo_system.dom_shifts[dom - 1]); 96 } 97 98 static int topo_lookup_cpuid(u32 apic_id) 99 { 100 int i; 101 102 /* CPU# to APICID mapping is persistent once it is established */ 103 for (i = 0; i < topo_info.nr_assigned_cpus; i++) { 104 if (cpuid_to_apicid[i] == apic_id) 105 return i; 106 } 107 return -ENODEV; 108 } 109 110 static __init int topo_get_cpunr(u32 apic_id) 111 { 112 int cpu = topo_lookup_cpuid(apic_id); 113 114 if (cpu >= 0) 115 return cpu; 116 117 return topo_info.nr_assigned_cpus++; 118 } 119 120 static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) 121 { 122 #if defined(CONFIG_SMP) || defined(CONFIG_X86_64) 123 early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id; 124 early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; 125 #endif 126 set_cpu_possible(cpu, true); 127 set_cpu_present(cpu, true); 128 } 129 130 static __init bool check_for_real_bsp(u32 apic_id) 131 { 132 /* 133 * There is no real good way to detect whether this a kdump() 134 * kernel, but except on the Voyager SMP monstrosity which is not 135 * longer supported, the real BSP APIC ID is the first one which is 136 * enumerated by firmware. That allows to detect whether the boot 137 * CPU is the real BSP. If it is not, then do not register the APIC 138 * because sending INIT to the real BSP would reset the whole 139 * system. 140 * 141 * The first APIC ID which is enumerated by firmware is detectable 142 * because the boot CPU APIC ID is registered before that without 143 * invoking this code. 144 */ 145 if (topo_info.real_bsp_apic_id != BAD_APICID) 146 return false; 147 148 if (apic_id == topo_info.boot_cpu_apic_id) { 149 topo_info.real_bsp_apic_id = apic_id; 150 return false; 151 } 152 153 pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x > %x\n", 154 topo_info.boot_cpu_apic_id, apic_id); 155 pr_warn("Crash kernel detected. Disabling real BSP to prevent machine INIT\n"); 156 157 topo_info.real_bsp_apic_id = apic_id; 158 return true; 159 } 160 161 static unsigned int topo_unit_count(u32 lvlid, enum x86_topology_domains at_level, 162 unsigned long *map) 163 { 164 unsigned int id, end, cnt = 0; 165 166 /* Calculate the exclusive end */ 167 end = lvlid + (1U << x86_topo_system.dom_shifts[at_level]); 168 169 /* Unfortunately there is no bitmap_weight_range() */ 170 for (id = find_next_bit(map, end, lvlid); id < end; id = find_next_bit(map, end, ++id)) 171 cnt++; 172 return cnt; 173 } 174 175 static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) 176 { 177 int cpu, dom; 178 179 if (present) { 180 set_bit(apic_id, phys_cpu_present_map); 181 182 /* 183 * Double registration is valid in case of the boot CPU 184 * APIC because that is registered before the enumeration 185 * of the APICs via firmware parsers or VM guest 186 * mechanisms. 187 */ 188 if (apic_id == topo_info.boot_cpu_apic_id) 189 cpu = 0; 190 else 191 cpu = topo_get_cpunr(apic_id); 192 193 cpuid_to_apicid[cpu] = apic_id; 194 topo_set_cpuids(cpu, apic_id, acpi_id); 195 } else { 196 u32 pkgid = topo_apicid(apic_id, TOPO_PKG_DOMAIN); 197 198 /* 199 * Check for present APICs in the same package when running 200 * on bare metal. Allow the bogosity in a guest. 201 */ 202 if (hypervisor_is_type(X86_HYPER_NATIVE) && 203 topo_unit_count(pkgid, TOPO_PKG_DOMAIN, phys_cpu_present_map)) { 204 pr_info_once("Ignoring hot-pluggable APIC ID %x in present package.\n", 205 apic_id); 206 topo_info.nr_rejected_cpus++; 207 return; 208 } 209 210 topo_info.nr_disabled_cpus++; 211 } 212 213 /* Register present and possible CPUs in the domain maps */ 214 for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) 215 set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map); 216 } 217 218 /** 219 * topology_register_apic - Register an APIC in early topology maps 220 * @apic_id: The APIC ID to set up 221 * @acpi_id: The ACPI ID associated to the APIC 222 * @present: True if the corresponding CPU is present 223 */ 224 void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present) 225 { 226 if (apic_id >= MAX_LOCAL_APIC) { 227 pr_err_once("APIC ID %x exceeds kernel limit of: %x\n", apic_id, MAX_LOCAL_APIC - 1); 228 topo_info.nr_rejected_cpus++; 229 return; 230 } 231 232 if (check_for_real_bsp(apic_id)) { 233 topo_info.nr_rejected_cpus++; 234 return; 235 } 236 237 /* CPU numbers exhausted? */ 238 if (apic_id != topo_info.boot_cpu_apic_id && topo_info.nr_assigned_cpus >= nr_cpu_ids) { 239 pr_warn_once("CPU limit of %d reached. Ignoring further CPUs\n", nr_cpu_ids); 240 topo_info.nr_rejected_cpus++; 241 return; 242 } 243 244 topo_register_apic(apic_id, acpi_id, present); 245 } 246 247 /** 248 * topology_register_boot_apic - Register the boot CPU APIC 249 * @apic_id: The APIC ID to set up 250 * 251 * Separate so CPU #0 can be assigned 252 */ 253 void __init topology_register_boot_apic(u32 apic_id) 254 { 255 WARN_ON_ONCE(topo_info.boot_cpu_apic_id != BAD_APICID); 256 257 topo_info.boot_cpu_apic_id = apic_id; 258 topo_register_apic(apic_id, CPU_ACPIID_INVALID, true); 259 } 260 261 /** 262 * topology_get_logical_id - Retrieve the logical ID at a given topology domain level 263 * @apicid: The APIC ID for which to lookup the logical ID 264 * @at_level: The topology domain level to use 265 * 266 * @apicid must be a full APIC ID, not the normalized variant. It's valid to have 267 * all bits below the domain level specified by @at_level to be clear. So both 268 * real APIC IDs and backshifted normalized APIC IDs work correctly. 269 * 270 * Returns: 271 * - >= 0: The requested logical ID 272 * - -ERANGE: @apicid is out of range 273 * - -ENODEV: @apicid is not registered 274 */ 275 int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level) 276 { 277 /* Remove the bits below @at_level to get the proper level ID of @apicid */ 278 unsigned int lvlid = topo_apicid(apicid, at_level); 279 280 if (lvlid >= MAX_LOCAL_APIC) 281 return -ERANGE; 282 if (!test_bit(lvlid, apic_maps[at_level].map)) 283 return -ENODEV; 284 /* Get the number of set bits before @lvlid. */ 285 return bitmap_weight(apic_maps[at_level].map, lvlid); 286 } 287 EXPORT_SYMBOL_GPL(topology_get_logical_id); 288 289 /** 290 * topology_unit_count - Retrieve the count of specified units at a given topology domain level 291 * @apicid: The APIC ID which specifies the search range 292 * @which_units: The domain level specifying the units to count 293 * @at_level: The domain level at which @which_units have to be counted 294 * 295 * This returns the number of possible units according to the enumerated 296 * information. 297 * 298 * E.g. topology_count_units(apicid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN) 299 * counts the number of possible cores in the package to which @apicid 300 * belongs. 301 * 302 * @at_level must obviously be greater than @which_level to produce useful 303 * results. If @at_level is equal to @which_units the result is 304 * unsurprisingly 1. If @at_level is less than @which_units the results 305 * is by definition undefined and the function returns 0. 306 */ 307 unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_units, 308 enum x86_topology_domains at_level) 309 { 310 /* Remove the bits below @at_level to get the proper level ID of @apicid */ 311 unsigned int lvlid = topo_apicid(apicid, at_level); 312 313 if (lvlid >= MAX_LOCAL_APIC) 314 return 0; 315 if (!test_bit(lvlid, apic_maps[at_level].map)) 316 return 0; 317 if (which_units > at_level) 318 return 0; 319 if (which_units == at_level) 320 return 1; 321 return topo_unit_count(lvlid, at_level, apic_maps[which_units].map); 322 } 323 324 #ifdef CONFIG_ACPI_HOTPLUG_CPU 325 /** 326 * topology_hotplug_apic - Handle a physical hotplugged APIC after boot 327 * @apic_id: The APIC ID to set up 328 * @acpi_id: The ACPI ID associated to the APIC 329 */ 330 int topology_hotplug_apic(u32 apic_id, u32 acpi_id) 331 { 332 int cpu; 333 334 if (apic_id >= MAX_LOCAL_APIC) 335 return -EINVAL; 336 337 /* Reject if the APIC ID was not registered during enumeration. */ 338 if (!test_bit(apic_id, apic_maps[TOPO_SMT_DOMAIN].map)) 339 return -ENODEV; 340 341 cpu = topo_lookup_cpuid(apic_id); 342 if (cpu < 0) 343 return -ENOSPC; 344 345 set_bit(apic_id, phys_cpu_present_map); 346 topo_set_cpuids(cpu, apic_id, acpi_id); 347 cpu_mark_primary_thread(cpu, apic_id); 348 return cpu; 349 } 350 351 /** 352 * topology_hotunplug_apic - Remove a physical hotplugged APIC after boot 353 * @cpu: The CPU number for which the APIC ID is removed 354 */ 355 void topology_hotunplug_apic(unsigned int cpu) 356 { 357 u32 apic_id = cpuid_to_apicid[cpu]; 358 359 if (apic_id == BAD_APICID) 360 return; 361 362 per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; 363 clear_bit(apic_id, phys_cpu_present_map); 364 set_cpu_present(cpu, false); 365 } 366 #endif 367 368 #ifdef CONFIG_X86_LOCAL_APIC 369 static unsigned int max_possible_cpus __initdata = NR_CPUS; 370 371 /** 372 * topology_apply_cmdline_limits_early - Apply topology command line limits early 373 * 374 * Ensure that command line limits are in effect before firmware parsing 375 * takes place. 376 */ 377 void __init topology_apply_cmdline_limits_early(void) 378 { 379 unsigned int possible = nr_cpu_ids; 380 381 /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */ 382 if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled) 383 possible = 1; 384 385 /* 'possible_cpus=N' */ 386 possible = min_t(unsigned int, max_possible_cpus, possible); 387 388 if (possible < nr_cpu_ids) { 389 pr_info("Limiting to %u possible CPUs\n", possible); 390 set_nr_cpu_ids(possible); 391 } 392 } 393 394 static __init bool restrict_to_up(void) 395 { 396 if (!smp_found_config || ioapic_is_disabled) 397 return true; 398 /* 399 * XEN PV is special as it does not advertise the local APIC 400 * properly, but provides a fake topology for it so that the 401 * infrastructure works. So don't apply the restrictions vs. APIC 402 * here. 403 */ 404 if (xen_pv_domain()) 405 return false; 406 407 return apic_is_disabled; 408 } 409 410 void __init topology_init_possible_cpus(void) 411 { 412 unsigned int assigned = topo_info.nr_assigned_cpus; 413 unsigned int disabled = topo_info.nr_disabled_cpus; 414 unsigned int cnta, cntb, cpu, allowed = 1; 415 unsigned int total = assigned + disabled; 416 u32 apicid, firstid; 417 418 if (!restrict_to_up()) { 419 if (WARN_ON_ONCE(assigned > nr_cpu_ids)) { 420 disabled += assigned - nr_cpu_ids; 421 assigned = nr_cpu_ids; 422 } 423 allowed = min_t(unsigned int, total, nr_cpu_ids); 424 } 425 426 if (total > allowed) 427 pr_warn("%u possible CPUs exceed the limit of %u\n", total, allowed); 428 429 assigned = min_t(unsigned int, allowed, assigned); 430 disabled = allowed - assigned; 431 432 topo_info.nr_assigned_cpus = assigned; 433 topo_info.nr_disabled_cpus = disabled; 434 435 total_cpus = allowed; 436 set_nr_cpu_ids(allowed); 437 438 cnta = domain_weight(TOPO_PKG_DOMAIN); 439 cntb = domain_weight(TOPO_DIE_DOMAIN); 440 __max_logical_packages = cnta; 441 __max_dies_per_package = 1U << (get_count_order(cntb) - get_count_order(cnta)); 442 443 pr_info("Max. logical packages: %3u\n", cnta); 444 pr_info("Max. logical dies: %3u\n", cntb); 445 pr_info("Max. dies per package: %3u\n", __max_dies_per_package); 446 447 cnta = domain_weight(TOPO_CORE_DOMAIN); 448 cntb = domain_weight(TOPO_SMT_DOMAIN); 449 /* 450 * Can't use order delta here as order(cnta) can be equal 451 * order(cntb) even if cnta != cntb. 452 */ 453 __max_threads_per_core = DIV_ROUND_UP(cntb, cnta); 454 pr_info("Max. threads per core: %3u\n", __max_threads_per_core); 455 456 firstid = find_first_bit(apic_maps[TOPO_SMT_DOMAIN].map, MAX_LOCAL_APIC); 457 __num_cores_per_package = topology_unit_count(firstid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN); 458 pr_info("Num. cores per package: %3u\n", __num_cores_per_package); 459 __num_threads_per_package = topology_unit_count(firstid, TOPO_SMT_DOMAIN, TOPO_PKG_DOMAIN); 460 pr_info("Num. threads per package: %3u\n", __num_threads_per_package); 461 462 pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled); 463 if (topo_info.nr_rejected_cpus) 464 pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus); 465 466 init_cpu_present(cpumask_of(0)); 467 init_cpu_possible(cpumask_of(0)); 468 469 /* Assign CPU numbers to non-present CPUs */ 470 for (apicid = 0; disabled; disabled--, apicid++) { 471 apicid = find_next_andnot_bit(apic_maps[TOPO_SMT_DOMAIN].map, phys_cpu_present_map, 472 MAX_LOCAL_APIC, apicid); 473 if (apicid >= MAX_LOCAL_APIC) 474 break; 475 cpuid_to_apicid[topo_info.nr_assigned_cpus++] = apicid; 476 } 477 478 for (cpu = 0; cpu < allowed; cpu++) { 479 apicid = cpuid_to_apicid[cpu]; 480 481 set_cpu_possible(cpu, true); 482 483 if (apicid == BAD_APICID) 484 continue; 485 486 cpu_mark_primary_thread(cpu, apicid); 487 set_cpu_present(cpu, test_bit(apicid, phys_cpu_present_map)); 488 } 489 } 490 491 /* 492 * Late SMP disable after sizing CPU masks when APIC/IOAPIC setup failed. 493 */ 494 void __init topology_reset_possible_cpus_up(void) 495 { 496 init_cpu_present(cpumask_of(0)); 497 init_cpu_possible(cpumask_of(0)); 498 499 bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC); 500 if (topo_info.boot_cpu_apic_id != BAD_APICID) 501 set_bit(topo_info.boot_cpu_apic_id, phys_cpu_present_map); 502 } 503 504 static int __init setup_possible_cpus(char *str) 505 { 506 get_option(&str, &max_possible_cpus); 507 return 0; 508 } 509 early_param("possible_cpus", setup_possible_cpus); 510 #endif 511