1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2007, 2011 4 */ 5 6 #define pr_fmt(fmt) "cpu: " fmt 7 8 #include <linux/cpufeature.h> 9 #include <linux/workqueue.h> 10 #include <linux/memblock.h> 11 #include <linux/uaccess.h> 12 #include <linux/sysctl.h> 13 #include <linux/cpuset.h> 14 #include <linux/device.h> 15 #include <linux/export.h> 16 #include <linux/kernel.h> 17 #include <linux/sched.h> 18 #include <linux/sched/topology.h> 19 #include <linux/delay.h> 20 #include <linux/init.h> 21 #include <linux/slab.h> 22 #include <linux/cpu.h> 23 #include <linux/smp.h> 24 #include <linux/mm.h> 25 #include <linux/nodemask.h> 26 #include <linux/node.h> 27 #include <asm/hiperdispatch.h> 28 #include <asm/sysinfo.h> 29 #include <asm/asm.h> 30 31 #define PTF_HORIZONTAL (0UL) 32 #define PTF_VERTICAL (1UL) 33 #define PTF_CHECK (2UL) 34 35 enum { 36 TOPOLOGY_MODE_HW, 37 TOPOLOGY_MODE_SINGLE, 38 TOPOLOGY_MODE_PACKAGE, 39 TOPOLOGY_MODE_UNINITIALIZED 40 }; 41 42 struct mask_info { 43 struct mask_info *next; 44 unsigned char id; 45 cpumask_t mask; 46 }; 47 48 static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED; 49 static void set_topology_timer(void); 50 static void topology_work_fn(struct work_struct *work); 51 static struct sysinfo_15_1_x *tl_info; 52 static int cpu_management; 53 54 static DECLARE_WORK(topology_work, topology_work_fn); 55 56 /* 57 * Socket/Book linked lists and cpu_topology updates are 58 * protected by "sched_domains_mutex". 59 */ 60 static struct mask_info socket_info; 61 static struct mask_info book_info; 62 static struct mask_info drawer_info; 63 64 struct cpu_topology_s390 cpu_topology[NR_CPUS]; 65 EXPORT_SYMBOL_GPL(cpu_topology); 66 67 static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int cpu) 68 { 69 static cpumask_t mask; 70 71 cpumask_clear(&mask); 72 if (!cpumask_test_cpu(cpu, &cpu_setup_mask)) 73 goto out; 74 cpumask_set_cpu(cpu, &mask); 75 switch (topology_mode) { 76 case TOPOLOGY_MODE_HW: 77 while (info) { 78 if (cpumask_test_cpu(cpu, &info->mask)) { 79 cpumask_copy(&mask, &info->mask); 80 break; 81 } 82 info = info->next; 83 } 84 break; 85 case TOPOLOGY_MODE_PACKAGE: 86 cpumask_copy(&mask, cpu_present_mask); 87 break; 88 default: 89 fallthrough; 90 case TOPOLOGY_MODE_SINGLE: 91 break; 92 } 93 cpumask_and(&mask, &mask, &cpu_setup_mask); 94 out: 95 cpumask_copy(dst, &mask); 96 } 97 98 static void cpu_thread_map(cpumask_t *dst, unsigned int cpu) 99 { 100 static cpumask_t mask; 101 unsigned int max_cpu; 102 103 cpumask_clear(&mask); 104 if (!cpumask_test_cpu(cpu, &cpu_setup_mask)) 105 goto out; 106 cpumask_set_cpu(cpu, &mask); 107 if (topology_mode != TOPOLOGY_MODE_HW) 108 goto out; 109 cpu -= cpu % (smp_cpu_mtid + 1); 110 max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1); 111 for (; cpu <= max_cpu; cpu++) { 112 if (cpumask_test_cpu(cpu, &cpu_setup_mask)) 113 cpumask_set_cpu(cpu, &mask); 114 } 115 out: 116 cpumask_copy(dst, &mask); 117 } 118 119 #define TOPOLOGY_CORE_BITS 64 120 121 static void add_cpus_to_mask(struct topology_core *tl_core, 122 struct mask_info *drawer, 123 struct mask_info *book, 124 struct mask_info *socket) 125 { 126 struct cpu_topology_s390 *topo; 127 unsigned int core; 128 129 for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) { 130 unsigned int max_cpu, rcore; 131 int cpu; 132 133 rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin; 134 cpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); 135 if (cpu < 0) 136 continue; 137 max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1); 138 for (; cpu <= max_cpu; cpu++) { 139 topo = &cpu_topology[cpu]; 140 topo->drawer_id = drawer->id; 141 topo->book_id = book->id; 142 topo->socket_id = socket->id; 143 topo->core_id = rcore; 144 topo->thread_id = cpu; 145 topo->dedicated = tl_core->d; 146 cpumask_set_cpu(cpu, &drawer->mask); 147 cpumask_set_cpu(cpu, &book->mask); 148 cpumask_set_cpu(cpu, &socket->mask); 149 smp_cpu_set_polarization(cpu, tl_core->pp); 150 smp_cpu_set_capacity(cpu, CPU_CAPACITY_HIGH); 151 } 152 } 153 } 154 155 static void clear_masks(void) 156 { 157 struct mask_info *info; 158 159 info = &socket_info; 160 while (info) { 161 cpumask_clear(&info->mask); 162 info = info->next; 163 } 164 info = &book_info; 165 while (info) { 166 cpumask_clear(&info->mask); 167 info = info->next; 168 } 169 info = &drawer_info; 170 while (info) { 171 cpumask_clear(&info->mask); 172 info = info->next; 173 } 174 } 175 176 static union topology_entry *next_tle(union topology_entry *tle) 177 { 178 if (!tle->nl) 179 return (union topology_entry *)((struct topology_core *)tle + 1); 180 return (union topology_entry *)((struct topology_container *)tle + 1); 181 } 182 183 static void tl_to_masks(struct sysinfo_15_1_x *info) 184 { 185 struct mask_info *socket = &socket_info; 186 struct mask_info *book = &book_info; 187 struct mask_info *drawer = &drawer_info; 188 union topology_entry *tle, *end; 189 190 clear_masks(); 191 tle = info->tle; 192 end = (union topology_entry *)((unsigned long)info + info->length); 193 while (tle < end) { 194 switch (tle->nl) { 195 /* 196 * Adjust drawer_id, book_id, and socked_id so they match the 197 * numbering scheme of e.g. the hardware management console. 198 */ 199 case 3: 200 drawer = drawer->next; 201 drawer->id = tle->container.id - 1; 202 break; 203 case 2: 204 book = book->next; 205 book->id = tle->container.id - 1; 206 break; 207 case 1: 208 socket = socket->next; 209 socket->id = tle->container.id - 1; 210 break; 211 case 0: 212 add_cpus_to_mask(&tle->cpu, drawer, book, socket); 213 break; 214 default: 215 clear_masks(); 216 return; 217 } 218 tle = next_tle(tle); 219 } 220 } 221 222 static void topology_update_polarization_simple(void) 223 { 224 int cpu; 225 226 for_each_possible_cpu(cpu) 227 smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); 228 } 229 230 static int ptf(unsigned long fc) 231 { 232 int cc; 233 234 asm volatile( 235 " .insn rre,0xb9a20000,%[fc],%[fc]\n" 236 CC_IPM(cc) 237 : CC_OUT(cc, cc) 238 : [fc] "d" (fc) 239 : CC_CLOBBER); 240 return CC_TRANSFORM(cc); 241 } 242 243 int topology_set_cpu_management(int fc) 244 { 245 int cpu, rc; 246 247 if (!cpu_has_topology()) 248 return -EOPNOTSUPP; 249 if (fc) 250 rc = ptf(PTF_VERTICAL); 251 else 252 rc = ptf(PTF_HORIZONTAL); 253 if (rc) 254 return -EBUSY; 255 for_each_possible_cpu(cpu) 256 smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); 257 return rc; 258 } 259 260 void update_cpu_masks(void) 261 { 262 struct cpu_topology_s390 *topo, *topo_package, *topo_sibling; 263 int cpu, sibling, pkg_first, smt_first, id; 264 265 for_each_possible_cpu(cpu) { 266 topo = &cpu_topology[cpu]; 267 cpu_thread_map(&topo->thread_mask, cpu); 268 cpu_group_map(&topo->core_mask, &socket_info, cpu); 269 cpu_group_map(&topo->book_mask, &book_info, cpu); 270 cpu_group_map(&topo->drawer_mask, &drawer_info, cpu); 271 topo->booted_cores = 0; 272 if (topology_mode != TOPOLOGY_MODE_HW) { 273 id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu; 274 topo->thread_id = cpu; 275 topo->core_id = cpu; 276 topo->socket_id = id; 277 topo->book_id = id; 278 topo->drawer_id = id; 279 } 280 } 281 hd_reset_state(); 282 for_each_online_cpu(cpu) { 283 topo = &cpu_topology[cpu]; 284 pkg_first = cpumask_first(&topo->core_mask); 285 topo_package = &cpu_topology[pkg_first]; 286 if (cpu == pkg_first) { 287 for_each_cpu(sibling, &topo->core_mask) { 288 topo_sibling = &cpu_topology[sibling]; 289 smt_first = cpumask_first(&topo_sibling->thread_mask); 290 if (sibling == smt_first) { 291 topo_package->booted_cores++; 292 hd_add_core(sibling); 293 } 294 } 295 } else { 296 topo->booted_cores = topo_package->booted_cores; 297 } 298 } 299 } 300 301 void store_topology(struct sysinfo_15_1_x *info) 302 { 303 stsi(info, 15, 1, topology_mnest_limit()); 304 } 305 306 static void __arch_update_dedicated_flag(void *arg) 307 { 308 if (topology_cpu_dedicated(smp_processor_id())) 309 set_cpu_flag(CIF_DEDICATED_CPU); 310 else 311 clear_cpu_flag(CIF_DEDICATED_CPU); 312 } 313 314 static int __arch_update_cpu_topology(void) 315 { 316 struct sysinfo_15_1_x *info = tl_info; 317 int rc, hd_status; 318 319 hd_status = 0; 320 rc = 0; 321 mutex_lock(&smp_cpu_state_mutex); 322 if (cpu_has_topology()) { 323 rc = 1; 324 store_topology(info); 325 tl_to_masks(info); 326 } 327 update_cpu_masks(); 328 if (!cpu_has_topology()) 329 topology_update_polarization_simple(); 330 if (cpu_management == 1) 331 hd_status = hd_enable_hiperdispatch(); 332 mutex_unlock(&smp_cpu_state_mutex); 333 if (hd_status == 0) 334 hd_disable_hiperdispatch(); 335 return rc; 336 } 337 338 int arch_update_cpu_topology(void) 339 { 340 int rc; 341 342 rc = __arch_update_cpu_topology(); 343 on_each_cpu(__arch_update_dedicated_flag, NULL, 0); 344 return rc; 345 } 346 347 static void topology_work_fn(struct work_struct *work) 348 { 349 rebuild_sched_domains(); 350 } 351 352 void topology_schedule_update(void) 353 { 354 schedule_work(&topology_work); 355 } 356 357 static void topology_flush_work(void) 358 { 359 flush_work(&topology_work); 360 } 361 362 static void topology_timer_fn(struct timer_list *unused) 363 { 364 if (ptf(PTF_CHECK)) 365 topology_schedule_update(); 366 set_topology_timer(); 367 } 368 369 static struct timer_list topology_timer; 370 371 static atomic_t topology_poll = ATOMIC_INIT(0); 372 373 static void set_topology_timer(void) 374 { 375 if (atomic_add_unless(&topology_poll, -1, 0)) 376 mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100)); 377 else 378 mod_timer(&topology_timer, jiffies + secs_to_jiffies(60)); 379 } 380 381 void topology_expect_change(void) 382 { 383 if (!cpu_has_topology()) 384 return; 385 /* This is racy, but it doesn't matter since it is just a heuristic. 386 * Worst case is that we poll in a higher frequency for a bit longer. 387 */ 388 if (atomic_read(&topology_poll) > 60) 389 return; 390 atomic_add(60, &topology_poll); 391 set_topology_timer(); 392 } 393 394 static int set_polarization(int polarization) 395 { 396 int rc = 0; 397 398 cpus_read_lock(); 399 mutex_lock(&smp_cpu_state_mutex); 400 if (cpu_management == polarization) 401 goto out; 402 rc = topology_set_cpu_management(polarization); 403 if (rc) 404 goto out; 405 cpu_management = polarization; 406 topology_expect_change(); 407 out: 408 mutex_unlock(&smp_cpu_state_mutex); 409 cpus_read_unlock(); 410 return rc; 411 } 412 413 static ssize_t dispatching_show(struct device *dev, 414 struct device_attribute *attr, 415 char *buf) 416 { 417 ssize_t count; 418 419 mutex_lock(&smp_cpu_state_mutex); 420 count = sysfs_emit(buf, "%d\n", cpu_management); 421 mutex_unlock(&smp_cpu_state_mutex); 422 return count; 423 } 424 425 static ssize_t dispatching_store(struct device *dev, 426 struct device_attribute *attr, 427 const char *buf, 428 size_t count) 429 { 430 int val, rc; 431 char delim; 432 433 if (sscanf(buf, "%d %c", &val, &delim) != 1) 434 return -EINVAL; 435 if (val != 0 && val != 1) 436 return -EINVAL; 437 rc = set_polarization(val); 438 return rc ? rc : count; 439 } 440 static DEVICE_ATTR_RW(dispatching); 441 442 static ssize_t cpu_polarization_show(struct device *dev, 443 struct device_attribute *attr, char *buf) 444 { 445 int cpu = dev->id; 446 ssize_t count; 447 448 mutex_lock(&smp_cpu_state_mutex); 449 switch (smp_cpu_get_polarization(cpu)) { 450 case POLARIZATION_HRZ: 451 count = sysfs_emit(buf, "horizontal\n"); 452 break; 453 case POLARIZATION_VL: 454 count = sysfs_emit(buf, "vertical:low\n"); 455 break; 456 case POLARIZATION_VM: 457 count = sysfs_emit(buf, "vertical:medium\n"); 458 break; 459 case POLARIZATION_VH: 460 count = sysfs_emit(buf, "vertical:high\n"); 461 break; 462 default: 463 count = sysfs_emit(buf, "unknown\n"); 464 break; 465 } 466 mutex_unlock(&smp_cpu_state_mutex); 467 return count; 468 } 469 static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); 470 471 static struct attribute *topology_cpu_attrs[] = { 472 &dev_attr_polarization.attr, 473 NULL, 474 }; 475 476 static struct attribute_group topology_cpu_attr_group = { 477 .attrs = topology_cpu_attrs, 478 }; 479 480 static ssize_t cpu_dedicated_show(struct device *dev, 481 struct device_attribute *attr, char *buf) 482 { 483 int cpu = dev->id; 484 ssize_t count; 485 486 mutex_lock(&smp_cpu_state_mutex); 487 count = sysfs_emit(buf, "%d\n", topology_cpu_dedicated(cpu)); 488 mutex_unlock(&smp_cpu_state_mutex); 489 return count; 490 } 491 static DEVICE_ATTR(dedicated, 0444, cpu_dedicated_show, NULL); 492 493 static struct attribute *topology_extra_cpu_attrs[] = { 494 &dev_attr_dedicated.attr, 495 NULL, 496 }; 497 498 static struct attribute_group topology_extra_cpu_attr_group = { 499 .attrs = topology_extra_cpu_attrs, 500 }; 501 502 int topology_cpu_init(struct cpu *cpu) 503 { 504 int rc; 505 506 rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); 507 if (rc || !cpu_has_topology()) 508 return rc; 509 rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group); 510 if (rc) 511 sysfs_remove_group(&cpu->dev.kobj, &topology_cpu_attr_group); 512 return rc; 513 } 514 515 const struct cpumask *cpu_coregroup_mask(int cpu) 516 { 517 return &cpu_topology[cpu].core_mask; 518 } 519 520 static const struct cpumask *tl_book_mask(struct sched_domain_topology_level *tl, int cpu) 521 { 522 return &cpu_topology[cpu].book_mask; 523 } 524 525 static const struct cpumask *tl_drawer_mask(struct sched_domain_topology_level *tl, int cpu) 526 { 527 return &cpu_topology[cpu].drawer_mask; 528 } 529 530 static struct sched_domain_topology_level s390_topology[] = { 531 SDTL_INIT(tl_smt_mask, cpu_smt_flags, SMT), 532 SDTL_INIT(tl_mc_mask, cpu_core_flags, MC), 533 SDTL_INIT(tl_book_mask, NULL, BOOK), 534 SDTL_INIT(tl_drawer_mask, NULL, DRAWER), 535 SDTL_INIT(tl_pkg_mask, NULL, PKG), 536 { NULL, }, 537 }; 538 539 static void __init alloc_masks(struct sysinfo_15_1_x *info, 540 struct mask_info *mask, int offset) 541 { 542 int i, nr_masks; 543 544 nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; 545 for (i = 0; i < info->mnest - offset; i++) 546 nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; 547 nr_masks = max(nr_masks, 1); 548 for (i = 0; i < nr_masks; i++) { 549 mask->next = memblock_alloc_or_panic(sizeof(*mask->next), 8); 550 mask = mask->next; 551 } 552 } 553 554 static int __init detect_polarization(union topology_entry *tle) 555 { 556 struct topology_core *tl_core; 557 558 while (tle->nl) 559 tle = next_tle(tle); 560 tl_core = (struct topology_core *)tle; 561 return tl_core->pp != POLARIZATION_HRZ; 562 } 563 564 void __init topology_init_early(void) 565 { 566 struct sysinfo_15_1_x *info; 567 568 set_sched_topology(s390_topology); 569 if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) { 570 if (cpu_has_topology()) 571 topology_mode = TOPOLOGY_MODE_HW; 572 else 573 topology_mode = TOPOLOGY_MODE_SINGLE; 574 } 575 if (!cpu_has_topology()) 576 goto out; 577 tl_info = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE); 578 info = tl_info; 579 store_topology(info); 580 cpu_management = detect_polarization(info->tle); 581 pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n", 582 info->mag[0], info->mag[1], info->mag[2], info->mag[3], 583 info->mag[4], info->mag[5], info->mnest); 584 alloc_masks(info, &socket_info, 1); 585 alloc_masks(info, &book_info, 2); 586 alloc_masks(info, &drawer_info, 3); 587 out: 588 cpumask_set_cpu(0, &cpu_setup_mask); 589 __arch_update_cpu_topology(); 590 __arch_update_dedicated_flag(NULL); 591 } 592 593 static inline int topology_get_mode(int enabled) 594 { 595 if (!enabled) 596 return TOPOLOGY_MODE_SINGLE; 597 return cpu_has_topology() ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; 598 } 599 600 static inline int topology_is_enabled(void) 601 { 602 return topology_mode != TOPOLOGY_MODE_SINGLE; 603 } 604 605 static int __init topology_setup(char *str) 606 { 607 bool enabled; 608 int rc; 609 610 rc = kstrtobool(str, &enabled); 611 if (rc) 612 return rc; 613 topology_mode = topology_get_mode(enabled); 614 return 0; 615 } 616 early_param("topology", topology_setup); 617 618 static int topology_ctl_handler(const struct ctl_table *ctl, int write, 619 void *buffer, size_t *lenp, loff_t *ppos) 620 { 621 int enabled = topology_is_enabled(); 622 int new_mode; 623 int rc; 624 struct ctl_table ctl_entry = { 625 .procname = ctl->procname, 626 .data = &enabled, 627 .maxlen = sizeof(int), 628 .extra1 = SYSCTL_ZERO, 629 .extra2 = SYSCTL_ONE, 630 }; 631 632 rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); 633 if (rc < 0 || !write) 634 return rc; 635 636 mutex_lock(&smp_cpu_state_mutex); 637 new_mode = topology_get_mode(enabled); 638 if (topology_mode != new_mode) { 639 topology_mode = new_mode; 640 topology_schedule_update(); 641 } 642 mutex_unlock(&smp_cpu_state_mutex); 643 topology_flush_work(); 644 645 return rc; 646 } 647 648 static int polarization_ctl_handler(const struct ctl_table *ctl, int write, 649 void *buffer, size_t *lenp, loff_t *ppos) 650 { 651 int polarization; 652 int rc; 653 struct ctl_table ctl_entry = { 654 .procname = ctl->procname, 655 .data = &polarization, 656 .maxlen = sizeof(int), 657 .extra1 = SYSCTL_ZERO, 658 .extra2 = SYSCTL_ONE, 659 }; 660 661 polarization = cpu_management; 662 rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); 663 if (rc < 0 || !write) 664 return rc; 665 return set_polarization(polarization); 666 } 667 668 static const struct ctl_table topology_ctl_table[] = { 669 { 670 .procname = "topology", 671 .mode = 0644, 672 .proc_handler = topology_ctl_handler, 673 }, 674 { 675 .procname = "polarization", 676 .mode = 0644, 677 .proc_handler = polarization_ctl_handler, 678 }, 679 }; 680 681 static int __init topology_init(void) 682 { 683 struct device *dev_root; 684 int rc = 0; 685 686 timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE); 687 if (cpu_has_topology()) 688 set_topology_timer(); 689 else 690 topology_update_polarization_simple(); 691 if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY_VERTICAL)) 692 set_polarization(1); 693 register_sysctl("s390", topology_ctl_table); 694 695 dev_root = bus_get_dev_root(&cpu_subsys); 696 if (dev_root) { 697 rc = device_create_file(dev_root, &dev_attr_dispatching); 698 put_device(dev_root); 699 } 700 return rc; 701 } 702 device_initcall(topology_init); 703