1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * CPU subsystem support 4 */ 5 6 #include <linux/kernel.h> 7 #include <linux/module.h> 8 #include <linux/init.h> 9 #include <linux/sched.h> 10 #include <linux/cpu.h> 11 #include <linux/topology.h> 12 #include <linux/device.h> 13 #include <linux/node.h> 14 #include <linux/gfp.h> 15 #include <linux/slab.h> 16 #include <linux/percpu.h> 17 #include <linux/acpi.h> 18 #include <linux/of.h> 19 #include <linux/cpufeature.h> 20 #include <linux/tick.h> 21 #include <linux/pm_qos.h> 22 #include <linux/delay.h> 23 #include <linux/sched/isolation.h> 24 25 #include "base.h" 26 27 static DEFINE_PER_CPU(struct device *, cpu_sys_devices); 28 29 static int cpu_subsys_match(struct device *dev, const struct device_driver *drv) 30 { 31 /* ACPI style match is the only one that may succeed. */ 32 if (acpi_driver_match_device(dev, drv)) 33 return 1; 34 35 return 0; 36 } 37 38 #ifdef CONFIG_HOTPLUG_CPU 39 static void change_cpu_under_node(struct cpu *cpu, 40 unsigned int from_nid, unsigned int to_nid) 41 { 42 int cpuid = cpu->dev.id; 43 unregister_cpu_under_node(cpuid, from_nid); 44 register_cpu_under_node(cpuid, to_nid); 45 cpu->node_id = to_nid; 46 } 47 48 static int cpu_subsys_online(struct device *dev) 49 { 50 struct cpu *cpu = container_of(dev, struct cpu, dev); 51 int cpuid = dev->id; 52 int from_nid, to_nid; 53 int ret; 54 int retries = 0; 55 56 from_nid = cpu_to_node(cpuid); 57 if (from_nid == NUMA_NO_NODE) 58 return -ENODEV; 59 60 retry: 61 ret = cpu_device_up(dev); 62 63 /* 64 * If -EBUSY is returned, it is likely that hotplug is temporarily 65 * disabled when cpu_hotplug_disable() was called. This condition is 66 * transient. So we retry after waiting for an exponentially 67 * increasing delay up to a total of at least 620ms as some PCI 68 * device initialization can take quite a while. 69 */ 70 if (ret == -EBUSY) { 71 retries++; 72 if (retries > 5) 73 return ret; 74 msleep(10 * (1 << retries)); 75 goto retry; 76 } 77 78 /* 79 * When hot adding memory to memoryless node and enabling a cpu 80 * on the node, node number of the cpu may internally change. 81 */ 82 to_nid = cpu_to_node(cpuid); 83 if (from_nid != to_nid) 84 change_cpu_under_node(cpu, from_nid, to_nid); 85 86 return ret; 87 } 88 89 static int cpu_subsys_offline(struct device *dev) 90 { 91 return cpu_device_down(dev); 92 } 93 94 void unregister_cpu(struct cpu *cpu) 95 { 96 int logical_cpu = cpu->dev.id; 97 98 set_cpu_enabled(logical_cpu, false); 99 unregister_cpu_under_node(logical_cpu, cpu_to_node(logical_cpu)); 100 101 device_unregister(&cpu->dev); 102 per_cpu(cpu_sys_devices, logical_cpu) = NULL; 103 return; 104 } 105 106 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE 107 static ssize_t cpu_probe_store(struct device *dev, 108 struct device_attribute *attr, 109 const char *buf, 110 size_t count) 111 { 112 ssize_t cnt; 113 int ret; 114 115 ret = lock_device_hotplug_sysfs(); 116 if (ret) 117 return ret; 118 119 cnt = arch_cpu_probe(buf, count); 120 121 unlock_device_hotplug(); 122 return cnt; 123 } 124 125 static ssize_t cpu_release_store(struct device *dev, 126 struct device_attribute *attr, 127 const char *buf, 128 size_t count) 129 { 130 ssize_t cnt; 131 int ret; 132 133 ret = lock_device_hotplug_sysfs(); 134 if (ret) 135 return ret; 136 137 cnt = arch_cpu_release(buf, count); 138 139 unlock_device_hotplug(); 140 return cnt; 141 } 142 143 static DEVICE_ATTR(probe, S_IWUSR, NULL, cpu_probe_store); 144 static DEVICE_ATTR(release, S_IWUSR, NULL, cpu_release_store); 145 #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ 146 #endif /* CONFIG_HOTPLUG_CPU */ 147 148 #ifdef CONFIG_CRASH_DUMP 149 #include <linux/kexec.h> 150 151 static ssize_t crash_notes_show(struct device *dev, 152 struct device_attribute *attr, 153 char *buf) 154 { 155 struct cpu *cpu = container_of(dev, struct cpu, dev); 156 unsigned long long addr; 157 int cpunum; 158 159 cpunum = cpu->dev.id; 160 161 /* 162 * Might be reading other cpu's data based on which cpu read thread 163 * has been scheduled. But cpu data (memory) is allocated once during 164 * boot up and this data does not change there after. Hence this 165 * operation should be safe. No locking required. 166 */ 167 addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpunum)); 168 169 return sysfs_emit(buf, "%llx\n", addr); 170 } 171 static DEVICE_ATTR_ADMIN_RO(crash_notes); 172 173 static ssize_t crash_notes_size_show(struct device *dev, 174 struct device_attribute *attr, 175 char *buf) 176 { 177 return sysfs_emit(buf, "%zu\n", sizeof(note_buf_t)); 178 } 179 static DEVICE_ATTR_ADMIN_RO(crash_notes_size); 180 181 static struct attribute *crash_note_cpu_attrs[] = { 182 &dev_attr_crash_notes.attr, 183 &dev_attr_crash_notes_size.attr, 184 NULL 185 }; 186 187 static const struct attribute_group crash_note_cpu_attr_group = { 188 .attrs = crash_note_cpu_attrs, 189 }; 190 #endif 191 192 static const struct attribute_group *common_cpu_attr_groups[] = { 193 #ifdef CONFIG_CRASH_DUMP 194 &crash_note_cpu_attr_group, 195 #endif 196 NULL 197 }; 198 199 static const struct attribute_group *hotplugable_cpu_attr_groups[] = { 200 #ifdef CONFIG_CRASH_DUMP 201 &crash_note_cpu_attr_group, 202 #endif 203 NULL 204 }; 205 206 /* 207 * Print cpu online, possible, present, and system maps 208 */ 209 210 struct cpu_attr { 211 struct device_attribute attr; 212 const struct cpumask *const map; 213 }; 214 215 static ssize_t show_cpus_attr(struct device *dev, 216 struct device_attribute *attr, 217 char *buf) 218 { 219 struct cpu_attr *ca = container_of(attr, struct cpu_attr, attr); 220 221 return cpumap_print_to_pagebuf(true, buf, ca->map); 222 } 223 224 #define _CPU_ATTR(name, map) \ 225 { __ATTR(name, 0444, show_cpus_attr, NULL), map } 226 227 /* Keep in sync with cpu_subsys_attrs */ 228 static struct cpu_attr cpu_attrs[] = { 229 _CPU_ATTR(online, &__cpu_online_mask), 230 _CPU_ATTR(possible, &__cpu_possible_mask), 231 _CPU_ATTR(present, &__cpu_present_mask), 232 }; 233 234 /* 235 * Print values for NR_CPUS and offlined cpus 236 */ 237 static ssize_t print_cpus_kernel_max(struct device *dev, 238 struct device_attribute *attr, char *buf) 239 { 240 return sysfs_emit(buf, "%d\n", NR_CPUS - 1); 241 } 242 static DEVICE_ATTR(kernel_max, 0444, print_cpus_kernel_max, NULL); 243 244 /* arch-optional setting to enable display of offline cpus >= nr_cpu_ids */ 245 unsigned int total_cpus; 246 247 static ssize_t print_cpus_offline(struct device *dev, 248 struct device_attribute *attr, char *buf) 249 { 250 int len = 0; 251 cpumask_var_t offline; 252 253 /* display offline cpus < nr_cpu_ids */ 254 if (!alloc_cpumask_var(&offline, GFP_KERNEL)) 255 return -ENOMEM; 256 cpumask_andnot(offline, cpu_possible_mask, cpu_online_mask); 257 len += sysfs_emit_at(buf, len, "%*pbl", cpumask_pr_args(offline)); 258 free_cpumask_var(offline); 259 260 /* display offline cpus >= nr_cpu_ids */ 261 if (total_cpus && nr_cpu_ids < total_cpus) { 262 len += sysfs_emit_at(buf, len, ","); 263 264 if (nr_cpu_ids == total_cpus-1) 265 len += sysfs_emit_at(buf, len, "%u", nr_cpu_ids); 266 else 267 len += sysfs_emit_at(buf, len, "%u-%d", 268 nr_cpu_ids, total_cpus - 1); 269 } 270 271 len += sysfs_emit_at(buf, len, "\n"); 272 273 return len; 274 } 275 static DEVICE_ATTR(offline, 0444, print_cpus_offline, NULL); 276 277 static ssize_t print_cpus_enabled(struct device *dev, 278 struct device_attribute *attr, char *buf) 279 { 280 return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(cpu_enabled_mask)); 281 } 282 static DEVICE_ATTR(enabled, 0444, print_cpus_enabled, NULL); 283 284 static ssize_t print_cpus_isolated(struct device *dev, 285 struct device_attribute *attr, char *buf) 286 { 287 int len; 288 cpumask_var_t isolated; 289 290 if (!alloc_cpumask_var(&isolated, GFP_KERNEL)) 291 return -ENOMEM; 292 293 cpumask_andnot(isolated, cpu_possible_mask, 294 housekeeping_cpumask(HK_TYPE_DOMAIN)); 295 len = sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(isolated)); 296 297 free_cpumask_var(isolated); 298 299 return len; 300 } 301 static DEVICE_ATTR(isolated, 0444, print_cpus_isolated, NULL); 302 303 #ifdef CONFIG_NO_HZ_FULL 304 static ssize_t print_cpus_nohz_full(struct device *dev, 305 struct device_attribute *attr, char *buf) 306 { 307 return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(tick_nohz_full_mask)); 308 } 309 static DEVICE_ATTR(nohz_full, 0444, print_cpus_nohz_full, NULL); 310 #endif 311 312 #ifdef CONFIG_CRASH_HOTPLUG 313 static ssize_t crash_hotplug_show(struct device *dev, 314 struct device_attribute *attr, 315 char *buf) 316 { 317 return sysfs_emit(buf, "%d\n", crash_check_hotplug_support()); 318 } 319 static DEVICE_ATTR_RO(crash_hotplug); 320 #endif 321 322 static void cpu_device_release(struct device *dev) 323 { 324 /* 325 * This is an empty function to prevent the driver core from spitting a 326 * warning at us. Yes, I know this is directly opposite of what the 327 * documentation for the driver core and kobjects say, and the author 328 * of this code has already been publically ridiculed for doing 329 * something as foolish as this. However, at this point in time, it is 330 * the only way to handle the issue of statically allocated cpu 331 * devices. The different architectures will have their cpu device 332 * code reworked to properly handle this in the near future, so this 333 * function will then be changed to correctly free up the memory held 334 * by the cpu device. 335 * 336 * Never copy this way of doing things, or you too will be made fun of 337 * on the linux-kernel list, you have been warned. 338 */ 339 } 340 341 #ifdef CONFIG_GENERIC_CPU_AUTOPROBE 342 static ssize_t print_cpu_modalias(struct device *dev, 343 struct device_attribute *attr, 344 char *buf) 345 { 346 int len = 0; 347 u32 i; 348 349 len += sysfs_emit_at(buf, len, 350 "cpu:type:" CPU_FEATURE_TYPEFMT ":feature:", 351 CPU_FEATURE_TYPEVAL); 352 353 for (i = 0; i < MAX_CPU_FEATURES; i++) 354 if (cpu_have_feature(i)) { 355 if (len + sizeof(",XXXX\n") >= PAGE_SIZE) { 356 WARN(1, "CPU features overflow page\n"); 357 break; 358 } 359 len += sysfs_emit_at(buf, len, ",%04X", i); 360 } 361 len += sysfs_emit_at(buf, len, "\n"); 362 return len; 363 } 364 365 static int cpu_uevent(const struct device *dev, struct kobj_uevent_env *env) 366 { 367 char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL); 368 if (buf) { 369 print_cpu_modalias(NULL, NULL, buf); 370 add_uevent_var(env, "MODALIAS=%s", buf); 371 kfree(buf); 372 } 373 return 0; 374 } 375 #endif 376 377 const struct bus_type cpu_subsys = { 378 .name = "cpu", 379 .dev_name = "cpu", 380 .match = cpu_subsys_match, 381 #ifdef CONFIG_HOTPLUG_CPU 382 .online = cpu_subsys_online, 383 .offline = cpu_subsys_offline, 384 #endif 385 #ifdef CONFIG_GENERIC_CPU_AUTOPROBE 386 .uevent = cpu_uevent, 387 #endif 388 }; 389 EXPORT_SYMBOL_GPL(cpu_subsys); 390 391 /* 392 * register_cpu - Setup a sysfs device for a CPU. 393 * @cpu - cpu->hotpluggable field set to 1 will generate a control file in 394 * sysfs for this CPU. 395 * @num - CPU number to use when creating the device. 396 * 397 * Initialize and register the CPU device. 398 */ 399 int register_cpu(struct cpu *cpu, int num) 400 { 401 int error; 402 403 cpu->node_id = cpu_to_node(num); 404 memset(&cpu->dev, 0x00, sizeof(struct device)); 405 cpu->dev.id = num; 406 cpu->dev.bus = &cpu_subsys; 407 cpu->dev.release = cpu_device_release; 408 cpu->dev.offline_disabled = !cpu->hotpluggable; 409 cpu->dev.offline = !cpu_online(num); 410 cpu->dev.of_node = of_get_cpu_node(num, NULL); 411 cpu->dev.groups = common_cpu_attr_groups; 412 if (cpu->hotpluggable) 413 cpu->dev.groups = hotplugable_cpu_attr_groups; 414 error = device_register(&cpu->dev); 415 if (error) { 416 put_device(&cpu->dev); 417 return error; 418 } 419 420 per_cpu(cpu_sys_devices, num) = &cpu->dev; 421 register_cpu_under_node(num, cpu_to_node(num)); 422 dev_pm_qos_expose_latency_limit(&cpu->dev, 423 PM_QOS_RESUME_LATENCY_NO_CONSTRAINT); 424 set_cpu_enabled(num, true); 425 426 return 0; 427 } 428 429 struct device *get_cpu_device(unsigned int cpu) 430 { 431 if (cpu < nr_cpu_ids && cpu_possible(cpu)) 432 return per_cpu(cpu_sys_devices, cpu); 433 else 434 return NULL; 435 } 436 EXPORT_SYMBOL_GPL(get_cpu_device); 437 438 static void device_create_release(struct device *dev) 439 { 440 kfree(dev); 441 } 442 443 __printf(4, 0) 444 static struct device * 445 __cpu_device_create(struct device *parent, void *drvdata, 446 const struct attribute_group **groups, 447 const char *fmt, va_list args) 448 { 449 struct device *dev = NULL; 450 int retval = -ENOMEM; 451 452 dev = kzalloc(sizeof(*dev), GFP_KERNEL); 453 if (!dev) 454 goto error; 455 456 device_initialize(dev); 457 dev->parent = parent; 458 dev->groups = groups; 459 dev->release = device_create_release; 460 device_set_pm_not_required(dev); 461 dev_set_drvdata(dev, drvdata); 462 463 retval = kobject_set_name_vargs(&dev->kobj, fmt, args); 464 if (retval) 465 goto error; 466 467 retval = device_add(dev); 468 if (retval) 469 goto error; 470 471 return dev; 472 473 error: 474 put_device(dev); 475 return ERR_PTR(retval); 476 } 477 478 struct device *cpu_device_create(struct device *parent, void *drvdata, 479 const struct attribute_group **groups, 480 const char *fmt, ...) 481 { 482 va_list vargs; 483 struct device *dev; 484 485 va_start(vargs, fmt); 486 dev = __cpu_device_create(parent, drvdata, groups, fmt, vargs); 487 va_end(vargs); 488 return dev; 489 } 490 EXPORT_SYMBOL_GPL(cpu_device_create); 491 492 #ifdef CONFIG_GENERIC_CPU_AUTOPROBE 493 static DEVICE_ATTR(modalias, 0444, print_cpu_modalias, NULL); 494 #endif 495 496 static struct attribute *cpu_root_attrs[] = { 497 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE 498 &dev_attr_probe.attr, 499 &dev_attr_release.attr, 500 #endif 501 &cpu_attrs[0].attr.attr, 502 &cpu_attrs[1].attr.attr, 503 &cpu_attrs[2].attr.attr, 504 &dev_attr_kernel_max.attr, 505 &dev_attr_offline.attr, 506 &dev_attr_enabled.attr, 507 &dev_attr_isolated.attr, 508 #ifdef CONFIG_NO_HZ_FULL 509 &dev_attr_nohz_full.attr, 510 #endif 511 #ifdef CONFIG_CRASH_HOTPLUG 512 &dev_attr_crash_hotplug.attr, 513 #endif 514 #ifdef CONFIG_GENERIC_CPU_AUTOPROBE 515 &dev_attr_modalias.attr, 516 #endif 517 NULL 518 }; 519 520 static const struct attribute_group cpu_root_attr_group = { 521 .attrs = cpu_root_attrs, 522 }; 523 524 static const struct attribute_group *cpu_root_attr_groups[] = { 525 &cpu_root_attr_group, 526 NULL, 527 }; 528 529 bool cpu_is_hotpluggable(unsigned int cpu) 530 { 531 struct device *dev = get_cpu_device(cpu); 532 return dev && container_of(dev, struct cpu, dev)->hotpluggable 533 && tick_nohz_cpu_hotpluggable(cpu); 534 } 535 EXPORT_SYMBOL_GPL(cpu_is_hotpluggable); 536 537 #ifdef CONFIG_GENERIC_CPU_DEVICES 538 DEFINE_PER_CPU(struct cpu, cpu_devices); 539 540 bool __weak arch_cpu_is_hotpluggable(int cpu) 541 { 542 return false; 543 } 544 545 int __weak arch_register_cpu(int cpu) 546 { 547 struct cpu *c = &per_cpu(cpu_devices, cpu); 548 549 c->hotpluggable = arch_cpu_is_hotpluggable(cpu); 550 551 return register_cpu(c, cpu); 552 } 553 554 #ifdef CONFIG_HOTPLUG_CPU 555 void __weak arch_unregister_cpu(int num) 556 { 557 unregister_cpu(&per_cpu(cpu_devices, num)); 558 } 559 #endif /* CONFIG_HOTPLUG_CPU */ 560 #endif /* CONFIG_GENERIC_CPU_DEVICES */ 561 562 static void __init cpu_dev_register_generic(void) 563 { 564 int i, ret; 565 566 if (!IS_ENABLED(CONFIG_GENERIC_CPU_DEVICES)) 567 return; 568 569 for_each_present_cpu(i) { 570 ret = arch_register_cpu(i); 571 if (ret && ret != -EPROBE_DEFER) 572 pr_warn("register_cpu %d failed (%d)\n", i, ret); 573 } 574 } 575 576 #ifdef CONFIG_GENERIC_CPU_VULNERABILITIES 577 static ssize_t cpu_show_not_affected(struct device *dev, 578 struct device_attribute *attr, char *buf) 579 { 580 return sysfs_emit(buf, "Not affected\n"); 581 } 582 583 #define CPU_SHOW_VULN_FALLBACK(func) \ 584 ssize_t cpu_show_##func(struct device *, \ 585 struct device_attribute *, char *) \ 586 __attribute__((weak, alias("cpu_show_not_affected"))) 587 588 CPU_SHOW_VULN_FALLBACK(meltdown); 589 CPU_SHOW_VULN_FALLBACK(spectre_v1); 590 CPU_SHOW_VULN_FALLBACK(spectre_v2); 591 CPU_SHOW_VULN_FALLBACK(spec_store_bypass); 592 CPU_SHOW_VULN_FALLBACK(l1tf); 593 CPU_SHOW_VULN_FALLBACK(mds); 594 CPU_SHOW_VULN_FALLBACK(tsx_async_abort); 595 CPU_SHOW_VULN_FALLBACK(itlb_multihit); 596 CPU_SHOW_VULN_FALLBACK(srbds); 597 CPU_SHOW_VULN_FALLBACK(mmio_stale_data); 598 CPU_SHOW_VULN_FALLBACK(retbleed); 599 CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow); 600 CPU_SHOW_VULN_FALLBACK(gds); 601 CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling); 602 603 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); 604 static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); 605 static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); 606 static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); 607 static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); 608 static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); 609 static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); 610 static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); 611 static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); 612 static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); 613 static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); 614 static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); 615 static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); 616 static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL); 617 618 static struct attribute *cpu_root_vulnerabilities_attrs[] = { 619 &dev_attr_meltdown.attr, 620 &dev_attr_spectre_v1.attr, 621 &dev_attr_spectre_v2.attr, 622 &dev_attr_spec_store_bypass.attr, 623 &dev_attr_l1tf.attr, 624 &dev_attr_mds.attr, 625 &dev_attr_tsx_async_abort.attr, 626 &dev_attr_itlb_multihit.attr, 627 &dev_attr_srbds.attr, 628 &dev_attr_mmio_stale_data.attr, 629 &dev_attr_retbleed.attr, 630 &dev_attr_spec_rstack_overflow.attr, 631 &dev_attr_gather_data_sampling.attr, 632 &dev_attr_reg_file_data_sampling.attr, 633 NULL 634 }; 635 636 static const struct attribute_group cpu_root_vulnerabilities_group = { 637 .name = "vulnerabilities", 638 .attrs = cpu_root_vulnerabilities_attrs, 639 }; 640 641 static void __init cpu_register_vulnerabilities(void) 642 { 643 struct device *dev = bus_get_dev_root(&cpu_subsys); 644 645 if (dev) { 646 if (sysfs_create_group(&dev->kobj, &cpu_root_vulnerabilities_group)) 647 pr_err("Unable to register CPU vulnerabilities\n"); 648 put_device(dev); 649 } 650 } 651 652 #else 653 static inline void cpu_register_vulnerabilities(void) { } 654 #endif 655 656 void __init cpu_dev_init(void) 657 { 658 if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups)) 659 panic("Failed to register CPU subsystem"); 660 661 cpu_dev_register_generic(); 662 cpu_register_vulnerabilities(); 663 } 664