1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2007, 2011 4 */ 5 6 #define KMSG_COMPONENT "cpu" 7 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 8 9 #include <linux/workqueue.h> 10 #include <linux/memblock.h> 11 #include <linux/uaccess.h> 12 #include <linux/sysctl.h> 13 #include <linux/cpuset.h> 14 #include <linux/device.h> 15 #include <linux/export.h> 16 #include <linux/kernel.h> 17 #include <linux/sched.h> 18 #include <linux/sched/topology.h> 19 #include <linux/delay.h> 20 #include <linux/init.h> 21 #include <linux/slab.h> 22 #include <linux/cpu.h> 23 #include <linux/smp.h> 24 #include <linux/mm.h> 25 #include <linux/nodemask.h> 26 #include <linux/node.h> 27 #include <asm/hiperdispatch.h> 28 #include <asm/sysinfo.h> 29 30 #define PTF_HORIZONTAL (0UL) 31 #define PTF_VERTICAL (1UL) 32 #define PTF_CHECK (2UL) 33 34 enum { 35 TOPOLOGY_MODE_HW, 36 TOPOLOGY_MODE_SINGLE, 37 TOPOLOGY_MODE_PACKAGE, 38 TOPOLOGY_MODE_UNINITIALIZED 39 }; 40 41 struct mask_info { 42 struct mask_info *next; 43 unsigned char id; 44 cpumask_t mask; 45 }; 46 47 static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED; 48 static void set_topology_timer(void); 49 static void topology_work_fn(struct work_struct *work); 50 static struct sysinfo_15_1_x *tl_info; 51 static int cpu_management; 52 53 static DECLARE_WORK(topology_work, topology_work_fn); 54 55 /* 56 * Socket/Book linked lists and cpu_topology updates are 57 * protected by "sched_domains_mutex". 58 */ 59 static struct mask_info socket_info; 60 static struct mask_info book_info; 61 static struct mask_info drawer_info; 62 63 struct cpu_topology_s390 cpu_topology[NR_CPUS]; 64 EXPORT_SYMBOL_GPL(cpu_topology); 65 66 static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int cpu) 67 { 68 static cpumask_t mask; 69 70 cpumask_clear(&mask); 71 if (!cpumask_test_cpu(cpu, &cpu_setup_mask)) 72 goto out; 73 cpumask_set_cpu(cpu, &mask); 74 switch (topology_mode) { 75 case TOPOLOGY_MODE_HW: 76 while (info) { 77 if (cpumask_test_cpu(cpu, &info->mask)) { 78 cpumask_copy(&mask, &info->mask); 79 break; 80 } 81 info = info->next; 82 } 83 break; 84 case TOPOLOGY_MODE_PACKAGE: 85 cpumask_copy(&mask, cpu_present_mask); 86 break; 87 default: 88 fallthrough; 89 case TOPOLOGY_MODE_SINGLE: 90 break; 91 } 92 cpumask_and(&mask, &mask, &cpu_setup_mask); 93 out: 94 cpumask_copy(dst, &mask); 95 } 96 97 static void cpu_thread_map(cpumask_t *dst, unsigned int cpu) 98 { 99 static cpumask_t mask; 100 unsigned int max_cpu; 101 102 cpumask_clear(&mask); 103 if (!cpumask_test_cpu(cpu, &cpu_setup_mask)) 104 goto out; 105 cpumask_set_cpu(cpu, &mask); 106 if (topology_mode != TOPOLOGY_MODE_HW) 107 goto out; 108 cpu -= cpu % (smp_cpu_mtid + 1); 109 max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1); 110 for (; cpu <= max_cpu; cpu++) { 111 if (cpumask_test_cpu(cpu, &cpu_setup_mask)) 112 cpumask_set_cpu(cpu, &mask); 113 } 114 out: 115 cpumask_copy(dst, &mask); 116 } 117 118 #define TOPOLOGY_CORE_BITS 64 119 120 static void add_cpus_to_mask(struct topology_core *tl_core, 121 struct mask_info *drawer, 122 struct mask_info *book, 123 struct mask_info *socket) 124 { 125 struct cpu_topology_s390 *topo; 126 unsigned int core; 127 128 for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) { 129 unsigned int max_cpu, rcore; 130 int cpu; 131 132 rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin; 133 cpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); 134 if (cpu < 0) 135 continue; 136 max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1); 137 for (; cpu <= max_cpu; cpu++) { 138 topo = &cpu_topology[cpu]; 139 topo->drawer_id = drawer->id; 140 topo->book_id = book->id; 141 topo->socket_id = socket->id; 142 topo->core_id = rcore; 143 topo->thread_id = cpu; 144 topo->dedicated = tl_core->d; 145 cpumask_set_cpu(cpu, &drawer->mask); 146 cpumask_set_cpu(cpu, &book->mask); 147 cpumask_set_cpu(cpu, &socket->mask); 148 smp_cpu_set_polarization(cpu, tl_core->pp); 149 smp_cpu_set_capacity(cpu, CPU_CAPACITY_HIGH); 150 } 151 } 152 } 153 154 static void clear_masks(void) 155 { 156 struct mask_info *info; 157 158 info = &socket_info; 159 while (info) { 160 cpumask_clear(&info->mask); 161 info = info->next; 162 } 163 info = &book_info; 164 while (info) { 165 cpumask_clear(&info->mask); 166 info = info->next; 167 } 168 info = &drawer_info; 169 while (info) { 170 cpumask_clear(&info->mask); 171 info = info->next; 172 } 173 } 174 175 static union topology_entry *next_tle(union topology_entry *tle) 176 { 177 if (!tle->nl) 178 return (union topology_entry *)((struct topology_core *)tle + 1); 179 return (union topology_entry *)((struct topology_container *)tle + 1); 180 } 181 182 static void tl_to_masks(struct sysinfo_15_1_x *info) 183 { 184 struct mask_info *socket = &socket_info; 185 struct mask_info *book = &book_info; 186 struct mask_info *drawer = &drawer_info; 187 union topology_entry *tle, *end; 188 189 clear_masks(); 190 tle = info->tle; 191 end = (union topology_entry *)((unsigned long)info + info->length); 192 while (tle < end) { 193 switch (tle->nl) { 194 case 3: 195 drawer = drawer->next; 196 drawer->id = tle->container.id; 197 break; 198 case 2: 199 book = book->next; 200 book->id = tle->container.id; 201 break; 202 case 1: 203 socket = socket->next; 204 socket->id = tle->container.id; 205 break; 206 case 0: 207 add_cpus_to_mask(&tle->cpu, drawer, book, socket); 208 break; 209 default: 210 clear_masks(); 211 return; 212 } 213 tle = next_tle(tle); 214 } 215 } 216 217 static void topology_update_polarization_simple(void) 218 { 219 int cpu; 220 221 for_each_possible_cpu(cpu) 222 smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); 223 } 224 225 static int ptf(unsigned long fc) 226 { 227 int rc; 228 229 asm volatile( 230 " .insn rre,0xb9a20000,%1,%1\n" 231 " ipm %0\n" 232 " srl %0,28\n" 233 : "=d" (rc) 234 : "d" (fc) : "cc"); 235 return rc; 236 } 237 238 int topology_set_cpu_management(int fc) 239 { 240 int cpu, rc; 241 242 if (!MACHINE_HAS_TOPOLOGY) 243 return -EOPNOTSUPP; 244 if (fc) 245 rc = ptf(PTF_VERTICAL); 246 else 247 rc = ptf(PTF_HORIZONTAL); 248 if (rc) 249 return -EBUSY; 250 for_each_possible_cpu(cpu) 251 smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); 252 return rc; 253 } 254 255 void update_cpu_masks(void) 256 { 257 struct cpu_topology_s390 *topo, *topo_package, *topo_sibling; 258 int cpu, sibling, pkg_first, smt_first, id; 259 260 for_each_possible_cpu(cpu) { 261 topo = &cpu_topology[cpu]; 262 cpu_thread_map(&topo->thread_mask, cpu); 263 cpu_group_map(&topo->core_mask, &socket_info, cpu); 264 cpu_group_map(&topo->book_mask, &book_info, cpu); 265 cpu_group_map(&topo->drawer_mask, &drawer_info, cpu); 266 topo->booted_cores = 0; 267 if (topology_mode != TOPOLOGY_MODE_HW) { 268 id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu; 269 topo->thread_id = cpu; 270 topo->core_id = cpu; 271 topo->socket_id = id; 272 topo->book_id = id; 273 topo->drawer_id = id; 274 } 275 } 276 hd_reset_state(); 277 for_each_online_cpu(cpu) { 278 topo = &cpu_topology[cpu]; 279 pkg_first = cpumask_first(&topo->core_mask); 280 topo_package = &cpu_topology[pkg_first]; 281 if (cpu == pkg_first) { 282 for_each_cpu(sibling, &topo->core_mask) { 283 topo_sibling = &cpu_topology[sibling]; 284 smt_first = cpumask_first(&topo_sibling->thread_mask); 285 if (sibling == smt_first) { 286 topo_package->booted_cores++; 287 hd_add_core(sibling); 288 } 289 } 290 } else { 291 topo->booted_cores = topo_package->booted_cores; 292 } 293 } 294 } 295 296 void store_topology(struct sysinfo_15_1_x *info) 297 { 298 stsi(info, 15, 1, topology_mnest_limit()); 299 } 300 301 static void __arch_update_dedicated_flag(void *arg) 302 { 303 if (topology_cpu_dedicated(smp_processor_id())) 304 set_cpu_flag(CIF_DEDICATED_CPU); 305 else 306 clear_cpu_flag(CIF_DEDICATED_CPU); 307 } 308 309 static int __arch_update_cpu_topology(void) 310 { 311 struct sysinfo_15_1_x *info = tl_info; 312 int rc, hd_status; 313 314 hd_status = 0; 315 rc = 0; 316 mutex_lock(&smp_cpu_state_mutex); 317 if (MACHINE_HAS_TOPOLOGY) { 318 rc = 1; 319 store_topology(info); 320 tl_to_masks(info); 321 } 322 update_cpu_masks(); 323 if (!MACHINE_HAS_TOPOLOGY) 324 topology_update_polarization_simple(); 325 if (cpu_management == 1) 326 hd_status = hd_enable_hiperdispatch(); 327 mutex_unlock(&smp_cpu_state_mutex); 328 if (hd_status == 0) 329 hd_disable_hiperdispatch(); 330 return rc; 331 } 332 333 int arch_update_cpu_topology(void) 334 { 335 int rc; 336 337 rc = __arch_update_cpu_topology(); 338 on_each_cpu(__arch_update_dedicated_flag, NULL, 0); 339 return rc; 340 } 341 342 static void topology_work_fn(struct work_struct *work) 343 { 344 rebuild_sched_domains(); 345 } 346 347 void topology_schedule_update(void) 348 { 349 schedule_work(&topology_work); 350 } 351 352 static void topology_flush_work(void) 353 { 354 flush_work(&topology_work); 355 } 356 357 static void topology_timer_fn(struct timer_list *unused) 358 { 359 if (ptf(PTF_CHECK)) 360 topology_schedule_update(); 361 set_topology_timer(); 362 } 363 364 static struct timer_list topology_timer; 365 366 static atomic_t topology_poll = ATOMIC_INIT(0); 367 368 static void set_topology_timer(void) 369 { 370 if (atomic_add_unless(&topology_poll, -1, 0)) 371 mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100)); 372 else 373 mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC)); 374 } 375 376 void topology_expect_change(void) 377 { 378 if (!MACHINE_HAS_TOPOLOGY) 379 return; 380 /* This is racy, but it doesn't matter since it is just a heuristic. 381 * Worst case is that we poll in a higher frequency for a bit longer. 382 */ 383 if (atomic_read(&topology_poll) > 60) 384 return; 385 atomic_add(60, &topology_poll); 386 set_topology_timer(); 387 } 388 389 static int set_polarization(int polarization) 390 { 391 int rc = 0; 392 393 cpus_read_lock(); 394 mutex_lock(&smp_cpu_state_mutex); 395 if (cpu_management == polarization) 396 goto out; 397 rc = topology_set_cpu_management(polarization); 398 if (rc) 399 goto out; 400 cpu_management = polarization; 401 topology_expect_change(); 402 out: 403 mutex_unlock(&smp_cpu_state_mutex); 404 cpus_read_unlock(); 405 return rc; 406 } 407 408 static ssize_t dispatching_show(struct device *dev, 409 struct device_attribute *attr, 410 char *buf) 411 { 412 ssize_t count; 413 414 mutex_lock(&smp_cpu_state_mutex); 415 count = sprintf(buf, "%d\n", cpu_management); 416 mutex_unlock(&smp_cpu_state_mutex); 417 return count; 418 } 419 420 static ssize_t dispatching_store(struct device *dev, 421 struct device_attribute *attr, 422 const char *buf, 423 size_t count) 424 { 425 int val, rc; 426 char delim; 427 428 if (sscanf(buf, "%d %c", &val, &delim) != 1) 429 return -EINVAL; 430 if (val != 0 && val != 1) 431 return -EINVAL; 432 rc = set_polarization(val); 433 return rc ? rc : count; 434 } 435 static DEVICE_ATTR_RW(dispatching); 436 437 static ssize_t cpu_polarization_show(struct device *dev, 438 struct device_attribute *attr, char *buf) 439 { 440 int cpu = dev->id; 441 ssize_t count; 442 443 mutex_lock(&smp_cpu_state_mutex); 444 switch (smp_cpu_get_polarization(cpu)) { 445 case POLARIZATION_HRZ: 446 count = sprintf(buf, "horizontal\n"); 447 break; 448 case POLARIZATION_VL: 449 count = sprintf(buf, "vertical:low\n"); 450 break; 451 case POLARIZATION_VM: 452 count = sprintf(buf, "vertical:medium\n"); 453 break; 454 case POLARIZATION_VH: 455 count = sprintf(buf, "vertical:high\n"); 456 break; 457 default: 458 count = sprintf(buf, "unknown\n"); 459 break; 460 } 461 mutex_unlock(&smp_cpu_state_mutex); 462 return count; 463 } 464 static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); 465 466 static struct attribute *topology_cpu_attrs[] = { 467 &dev_attr_polarization.attr, 468 NULL, 469 }; 470 471 static struct attribute_group topology_cpu_attr_group = { 472 .attrs = topology_cpu_attrs, 473 }; 474 475 static ssize_t cpu_dedicated_show(struct device *dev, 476 struct device_attribute *attr, char *buf) 477 { 478 int cpu = dev->id; 479 ssize_t count; 480 481 mutex_lock(&smp_cpu_state_mutex); 482 count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu)); 483 mutex_unlock(&smp_cpu_state_mutex); 484 return count; 485 } 486 static DEVICE_ATTR(dedicated, 0444, cpu_dedicated_show, NULL); 487 488 static struct attribute *topology_extra_cpu_attrs[] = { 489 &dev_attr_dedicated.attr, 490 NULL, 491 }; 492 493 static struct attribute_group topology_extra_cpu_attr_group = { 494 .attrs = topology_extra_cpu_attrs, 495 }; 496 497 int topology_cpu_init(struct cpu *cpu) 498 { 499 int rc; 500 501 rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); 502 if (rc || !MACHINE_HAS_TOPOLOGY) 503 return rc; 504 rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group); 505 if (rc) 506 sysfs_remove_group(&cpu->dev.kobj, &topology_cpu_attr_group); 507 return rc; 508 } 509 510 static const struct cpumask *cpu_thread_mask(int cpu) 511 { 512 return &cpu_topology[cpu].thread_mask; 513 } 514 515 516 const struct cpumask *cpu_coregroup_mask(int cpu) 517 { 518 return &cpu_topology[cpu].core_mask; 519 } 520 521 static const struct cpumask *cpu_book_mask(int cpu) 522 { 523 return &cpu_topology[cpu].book_mask; 524 } 525 526 static const struct cpumask *cpu_drawer_mask(int cpu) 527 { 528 return &cpu_topology[cpu].drawer_mask; 529 } 530 531 static struct sched_domain_topology_level s390_topology[] = { 532 { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, 533 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, 534 { cpu_book_mask, SD_INIT_NAME(BOOK) }, 535 { cpu_drawer_mask, SD_INIT_NAME(DRAWER) }, 536 { cpu_cpu_mask, SD_INIT_NAME(PKG) }, 537 { NULL, }, 538 }; 539 540 static void __init alloc_masks(struct sysinfo_15_1_x *info, 541 struct mask_info *mask, int offset) 542 { 543 int i, nr_masks; 544 545 nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; 546 for (i = 0; i < info->mnest - offset; i++) 547 nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; 548 nr_masks = max(nr_masks, 1); 549 for (i = 0; i < nr_masks; i++) { 550 mask->next = memblock_alloc(sizeof(*mask->next), 8); 551 if (!mask->next) 552 panic("%s: Failed to allocate %zu bytes align=0x%x\n", 553 __func__, sizeof(*mask->next), 8); 554 mask = mask->next; 555 } 556 } 557 558 void __init topology_init_early(void) 559 { 560 struct sysinfo_15_1_x *info; 561 562 set_sched_topology(s390_topology); 563 if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) { 564 if (MACHINE_HAS_TOPOLOGY) 565 topology_mode = TOPOLOGY_MODE_HW; 566 else 567 topology_mode = TOPOLOGY_MODE_SINGLE; 568 } 569 if (!MACHINE_HAS_TOPOLOGY) 570 goto out; 571 tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 572 if (!tl_info) 573 panic("%s: Failed to allocate %lu bytes align=0x%lx\n", 574 __func__, PAGE_SIZE, PAGE_SIZE); 575 info = tl_info; 576 store_topology(info); 577 pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n", 578 info->mag[0], info->mag[1], info->mag[2], info->mag[3], 579 info->mag[4], info->mag[5], info->mnest); 580 alloc_masks(info, &socket_info, 1); 581 alloc_masks(info, &book_info, 2); 582 alloc_masks(info, &drawer_info, 3); 583 out: 584 cpumask_set_cpu(0, &cpu_setup_mask); 585 __arch_update_cpu_topology(); 586 __arch_update_dedicated_flag(NULL); 587 } 588 589 static inline int topology_get_mode(int enabled) 590 { 591 if (!enabled) 592 return TOPOLOGY_MODE_SINGLE; 593 return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; 594 } 595 596 static inline int topology_is_enabled(void) 597 { 598 return topology_mode != TOPOLOGY_MODE_SINGLE; 599 } 600 601 static int __init topology_setup(char *str) 602 { 603 bool enabled; 604 int rc; 605 606 rc = kstrtobool(str, &enabled); 607 if (rc) 608 return rc; 609 topology_mode = topology_get_mode(enabled); 610 return 0; 611 } 612 early_param("topology", topology_setup); 613 614 static int topology_ctl_handler(const struct ctl_table *ctl, int write, 615 void *buffer, size_t *lenp, loff_t *ppos) 616 { 617 int enabled = topology_is_enabled(); 618 int new_mode; 619 int rc; 620 struct ctl_table ctl_entry = { 621 .procname = ctl->procname, 622 .data = &enabled, 623 .maxlen = sizeof(int), 624 .extra1 = SYSCTL_ZERO, 625 .extra2 = SYSCTL_ONE, 626 }; 627 628 rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); 629 if (rc < 0 || !write) 630 return rc; 631 632 mutex_lock(&smp_cpu_state_mutex); 633 new_mode = topology_get_mode(enabled); 634 if (topology_mode != new_mode) { 635 topology_mode = new_mode; 636 topology_schedule_update(); 637 } 638 mutex_unlock(&smp_cpu_state_mutex); 639 topology_flush_work(); 640 641 return rc; 642 } 643 644 static int polarization_ctl_handler(const struct ctl_table *ctl, int write, 645 void *buffer, size_t *lenp, loff_t *ppos) 646 { 647 int polarization; 648 int rc; 649 struct ctl_table ctl_entry = { 650 .procname = ctl->procname, 651 .data = &polarization, 652 .maxlen = sizeof(int), 653 .extra1 = SYSCTL_ZERO, 654 .extra2 = SYSCTL_ONE, 655 }; 656 657 polarization = cpu_management; 658 rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); 659 if (rc < 0 || !write) 660 return rc; 661 return set_polarization(polarization); 662 } 663 664 static struct ctl_table topology_ctl_table[] = { 665 { 666 .procname = "topology", 667 .mode = 0644, 668 .proc_handler = topology_ctl_handler, 669 }, 670 { 671 .procname = "polarization", 672 .mode = 0644, 673 .proc_handler = polarization_ctl_handler, 674 }, 675 }; 676 677 static int __init topology_init(void) 678 { 679 struct device *dev_root; 680 int rc = 0; 681 682 timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE); 683 if (MACHINE_HAS_TOPOLOGY) 684 set_topology_timer(); 685 else 686 topology_update_polarization_simple(); 687 if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY_VERTICAL)) 688 set_polarization(1); 689 register_sysctl("s390", topology_ctl_table); 690 691 dev_root = bus_get_dev_root(&cpu_subsys); 692 if (dev_root) { 693 rc = device_create_file(dev_root, &dev_attr_dispatching); 694 put_device(dev_root); 695 } 696 return rc; 697 } 698 device_initcall(topology_init); 699