1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2007, 2011 4 */ 5 6 #define KMSG_COMPONENT "cpu" 7 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 8 9 #include <linux/workqueue.h> 10 #include <linux/memblock.h> 11 #include <linux/uaccess.h> 12 #include <linux/sysctl.h> 13 #include <linux/cpuset.h> 14 #include <linux/device.h> 15 #include <linux/export.h> 16 #include <linux/kernel.h> 17 #include <linux/sched.h> 18 #include <linux/sched/topology.h> 19 #include <linux/delay.h> 20 #include <linux/init.h> 21 #include <linux/slab.h> 22 #include <linux/cpu.h> 23 #include <linux/smp.h> 24 #include <linux/mm.h> 25 #include <linux/nodemask.h> 26 #include <linux/node.h> 27 #include <asm/sysinfo.h> 28 29 #define PTF_HORIZONTAL (0UL) 30 #define PTF_VERTICAL (1UL) 31 #define PTF_CHECK (2UL) 32 33 enum { 34 TOPOLOGY_MODE_HW, 35 TOPOLOGY_MODE_SINGLE, 36 TOPOLOGY_MODE_PACKAGE, 37 TOPOLOGY_MODE_UNINITIALIZED 38 }; 39 40 struct mask_info { 41 struct mask_info *next; 42 unsigned char id; 43 cpumask_t mask; 44 }; 45 46 static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED; 47 static void set_topology_timer(void); 48 static void topology_work_fn(struct work_struct *work); 49 static struct sysinfo_15_1_x *tl_info; 50 51 static DECLARE_WORK(topology_work, topology_work_fn); 52 53 /* 54 * Socket/Book linked lists and cpu_topology updates are 55 * protected by "sched_domains_mutex". 56 */ 57 static struct mask_info socket_info; 58 static struct mask_info book_info; 59 static struct mask_info drawer_info; 60 61 struct cpu_topology_s390 cpu_topology[NR_CPUS]; 62 EXPORT_SYMBOL_GPL(cpu_topology); 63 64 static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int cpu) 65 { 66 static cpumask_t mask; 67 68 cpumask_clear(&mask); 69 if (!cpumask_test_cpu(cpu, &cpu_setup_mask)) 70 goto out; 71 cpumask_set_cpu(cpu, &mask); 72 switch (topology_mode) { 73 case TOPOLOGY_MODE_HW: 74 while (info) { 75 if (cpumask_test_cpu(cpu, &info->mask)) { 76 cpumask_copy(&mask, &info->mask); 77 break; 78 } 79 info = info->next; 80 } 81 break; 82 case TOPOLOGY_MODE_PACKAGE: 83 cpumask_copy(&mask, cpu_present_mask); 84 break; 85 default: 86 fallthrough; 87 case TOPOLOGY_MODE_SINGLE: 88 break; 89 } 90 cpumask_and(&mask, &mask, &cpu_setup_mask); 91 out: 92 cpumask_copy(dst, &mask); 93 } 94 95 static void cpu_thread_map(cpumask_t *dst, unsigned int cpu) 96 { 97 static cpumask_t mask; 98 unsigned int max_cpu; 99 100 cpumask_clear(&mask); 101 if (!cpumask_test_cpu(cpu, &cpu_setup_mask)) 102 goto out; 103 cpumask_set_cpu(cpu, &mask); 104 if (topology_mode != TOPOLOGY_MODE_HW) 105 goto out; 106 cpu -= cpu % (smp_cpu_mtid + 1); 107 max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1); 108 for (; cpu <= max_cpu; cpu++) { 109 if (cpumask_test_cpu(cpu, &cpu_setup_mask)) 110 cpumask_set_cpu(cpu, &mask); 111 } 112 out: 113 cpumask_copy(dst, &mask); 114 } 115 116 #define TOPOLOGY_CORE_BITS 64 117 118 static void add_cpus_to_mask(struct topology_core *tl_core, 119 struct mask_info *drawer, 120 struct mask_info *book, 121 struct mask_info *socket) 122 { 123 struct cpu_topology_s390 *topo; 124 unsigned int core; 125 126 for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) { 127 unsigned int max_cpu, rcore; 128 int cpu; 129 130 rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin; 131 cpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); 132 if (cpu < 0) 133 continue; 134 max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1); 135 for (; cpu <= max_cpu; cpu++) { 136 topo = &cpu_topology[cpu]; 137 topo->drawer_id = drawer->id; 138 topo->book_id = book->id; 139 topo->socket_id = socket->id; 140 topo->core_id = rcore; 141 topo->thread_id = cpu; 142 topo->dedicated = tl_core->d; 143 cpumask_set_cpu(cpu, &drawer->mask); 144 cpumask_set_cpu(cpu, &book->mask); 145 cpumask_set_cpu(cpu, &socket->mask); 146 smp_cpu_set_polarization(cpu, tl_core->pp); 147 } 148 } 149 } 150 151 static void clear_masks(void) 152 { 153 struct mask_info *info; 154 155 info = &socket_info; 156 while (info) { 157 cpumask_clear(&info->mask); 158 info = info->next; 159 } 160 info = &book_info; 161 while (info) { 162 cpumask_clear(&info->mask); 163 info = info->next; 164 } 165 info = &drawer_info; 166 while (info) { 167 cpumask_clear(&info->mask); 168 info = info->next; 169 } 170 } 171 172 static union topology_entry *next_tle(union topology_entry *tle) 173 { 174 if (!tle->nl) 175 return (union topology_entry *)((struct topology_core *)tle + 1); 176 return (union topology_entry *)((struct topology_container *)tle + 1); 177 } 178 179 static void tl_to_masks(struct sysinfo_15_1_x *info) 180 { 181 struct mask_info *socket = &socket_info; 182 struct mask_info *book = &book_info; 183 struct mask_info *drawer = &drawer_info; 184 union topology_entry *tle, *end; 185 186 clear_masks(); 187 tle = info->tle; 188 end = (union topology_entry *)((unsigned long)info + info->length); 189 while (tle < end) { 190 switch (tle->nl) { 191 case 3: 192 drawer = drawer->next; 193 drawer->id = tle->container.id; 194 break; 195 case 2: 196 book = book->next; 197 book->id = tle->container.id; 198 break; 199 case 1: 200 socket = socket->next; 201 socket->id = tle->container.id; 202 break; 203 case 0: 204 add_cpus_to_mask(&tle->cpu, drawer, book, socket); 205 break; 206 default: 207 clear_masks(); 208 return; 209 } 210 tle = next_tle(tle); 211 } 212 } 213 214 static void topology_update_polarization_simple(void) 215 { 216 int cpu; 217 218 for_each_possible_cpu(cpu) 219 smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); 220 } 221 222 static int ptf(unsigned long fc) 223 { 224 int rc; 225 226 asm volatile( 227 " .insn rre,0xb9a20000,%1,%1\n" 228 " ipm %0\n" 229 " srl %0,28\n" 230 : "=d" (rc) 231 : "d" (fc) : "cc"); 232 return rc; 233 } 234 235 int topology_set_cpu_management(int fc) 236 { 237 int cpu, rc; 238 239 if (!MACHINE_HAS_TOPOLOGY) 240 return -EOPNOTSUPP; 241 if (fc) 242 rc = ptf(PTF_VERTICAL); 243 else 244 rc = ptf(PTF_HORIZONTAL); 245 if (rc) 246 return -EBUSY; 247 for_each_possible_cpu(cpu) 248 smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); 249 return rc; 250 } 251 252 void update_cpu_masks(void) 253 { 254 struct cpu_topology_s390 *topo, *topo_package, *topo_sibling; 255 int cpu, sibling, pkg_first, smt_first, id; 256 257 for_each_possible_cpu(cpu) { 258 topo = &cpu_topology[cpu]; 259 cpu_thread_map(&topo->thread_mask, cpu); 260 cpu_group_map(&topo->core_mask, &socket_info, cpu); 261 cpu_group_map(&topo->book_mask, &book_info, cpu); 262 cpu_group_map(&topo->drawer_mask, &drawer_info, cpu); 263 topo->booted_cores = 0; 264 if (topology_mode != TOPOLOGY_MODE_HW) { 265 id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu; 266 topo->thread_id = cpu; 267 topo->core_id = cpu; 268 topo->socket_id = id; 269 topo->book_id = id; 270 topo->drawer_id = id; 271 } 272 } 273 for_each_online_cpu(cpu) { 274 topo = &cpu_topology[cpu]; 275 pkg_first = cpumask_first(&topo->core_mask); 276 topo_package = &cpu_topology[pkg_first]; 277 if (cpu == pkg_first) { 278 for_each_cpu(sibling, &topo->core_mask) { 279 topo_sibling = &cpu_topology[sibling]; 280 smt_first = cpumask_first(&topo_sibling->thread_mask); 281 if (sibling == smt_first) 282 topo_package->booted_cores++; 283 } 284 } else { 285 topo->booted_cores = topo_package->booted_cores; 286 } 287 } 288 } 289 290 void store_topology(struct sysinfo_15_1_x *info) 291 { 292 stsi(info, 15, 1, topology_mnest_limit()); 293 } 294 295 static void __arch_update_dedicated_flag(void *arg) 296 { 297 if (topology_cpu_dedicated(smp_processor_id())) 298 set_cpu_flag(CIF_DEDICATED_CPU); 299 else 300 clear_cpu_flag(CIF_DEDICATED_CPU); 301 } 302 303 static int __arch_update_cpu_topology(void) 304 { 305 struct sysinfo_15_1_x *info = tl_info; 306 int rc = 0; 307 308 mutex_lock(&smp_cpu_state_mutex); 309 if (MACHINE_HAS_TOPOLOGY) { 310 rc = 1; 311 store_topology(info); 312 tl_to_masks(info); 313 } 314 update_cpu_masks(); 315 if (!MACHINE_HAS_TOPOLOGY) 316 topology_update_polarization_simple(); 317 mutex_unlock(&smp_cpu_state_mutex); 318 return rc; 319 } 320 321 int arch_update_cpu_topology(void) 322 { 323 struct device *dev; 324 int cpu, rc; 325 326 rc = __arch_update_cpu_topology(); 327 on_each_cpu(__arch_update_dedicated_flag, NULL, 0); 328 for_each_online_cpu(cpu) { 329 dev = get_cpu_device(cpu); 330 if (dev) 331 kobject_uevent(&dev->kobj, KOBJ_CHANGE); 332 } 333 return rc; 334 } 335 336 static void topology_work_fn(struct work_struct *work) 337 { 338 rebuild_sched_domains(); 339 } 340 341 void topology_schedule_update(void) 342 { 343 schedule_work(&topology_work); 344 } 345 346 static void topology_flush_work(void) 347 { 348 flush_work(&topology_work); 349 } 350 351 static void topology_timer_fn(struct timer_list *unused) 352 { 353 if (ptf(PTF_CHECK)) 354 topology_schedule_update(); 355 set_topology_timer(); 356 } 357 358 static struct timer_list topology_timer; 359 360 static atomic_t topology_poll = ATOMIC_INIT(0); 361 362 static void set_topology_timer(void) 363 { 364 if (atomic_add_unless(&topology_poll, -1, 0)) 365 mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100)); 366 else 367 mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC)); 368 } 369 370 void topology_expect_change(void) 371 { 372 if (!MACHINE_HAS_TOPOLOGY) 373 return; 374 /* This is racy, but it doesn't matter since it is just a heuristic. 375 * Worst case is that we poll in a higher frequency for a bit longer. 376 */ 377 if (atomic_read(&topology_poll) > 60) 378 return; 379 atomic_add(60, &topology_poll); 380 set_topology_timer(); 381 } 382 383 static int cpu_management; 384 385 static ssize_t dispatching_show(struct device *dev, 386 struct device_attribute *attr, 387 char *buf) 388 { 389 ssize_t count; 390 391 mutex_lock(&smp_cpu_state_mutex); 392 count = sprintf(buf, "%d\n", cpu_management); 393 mutex_unlock(&smp_cpu_state_mutex); 394 return count; 395 } 396 397 static ssize_t dispatching_store(struct device *dev, 398 struct device_attribute *attr, 399 const char *buf, 400 size_t count) 401 { 402 int val, rc; 403 char delim; 404 405 if (sscanf(buf, "%d %c", &val, &delim) != 1) 406 return -EINVAL; 407 if (val != 0 && val != 1) 408 return -EINVAL; 409 rc = 0; 410 cpus_read_lock(); 411 mutex_lock(&smp_cpu_state_mutex); 412 if (cpu_management == val) 413 goto out; 414 rc = topology_set_cpu_management(val); 415 if (rc) 416 goto out; 417 cpu_management = val; 418 topology_expect_change(); 419 out: 420 mutex_unlock(&smp_cpu_state_mutex); 421 cpus_read_unlock(); 422 return rc ? rc : count; 423 } 424 static DEVICE_ATTR_RW(dispatching); 425 426 static ssize_t cpu_polarization_show(struct device *dev, 427 struct device_attribute *attr, char *buf) 428 { 429 int cpu = dev->id; 430 ssize_t count; 431 432 mutex_lock(&smp_cpu_state_mutex); 433 switch (smp_cpu_get_polarization(cpu)) { 434 case POLARIZATION_HRZ: 435 count = sprintf(buf, "horizontal\n"); 436 break; 437 case POLARIZATION_VL: 438 count = sprintf(buf, "vertical:low\n"); 439 break; 440 case POLARIZATION_VM: 441 count = sprintf(buf, "vertical:medium\n"); 442 break; 443 case POLARIZATION_VH: 444 count = sprintf(buf, "vertical:high\n"); 445 break; 446 default: 447 count = sprintf(buf, "unknown\n"); 448 break; 449 } 450 mutex_unlock(&smp_cpu_state_mutex); 451 return count; 452 } 453 static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); 454 455 static struct attribute *topology_cpu_attrs[] = { 456 &dev_attr_polarization.attr, 457 NULL, 458 }; 459 460 static struct attribute_group topology_cpu_attr_group = { 461 .attrs = topology_cpu_attrs, 462 }; 463 464 static ssize_t cpu_dedicated_show(struct device *dev, 465 struct device_attribute *attr, char *buf) 466 { 467 int cpu = dev->id; 468 ssize_t count; 469 470 mutex_lock(&smp_cpu_state_mutex); 471 count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu)); 472 mutex_unlock(&smp_cpu_state_mutex); 473 return count; 474 } 475 static DEVICE_ATTR(dedicated, 0444, cpu_dedicated_show, NULL); 476 477 static struct attribute *topology_extra_cpu_attrs[] = { 478 &dev_attr_dedicated.attr, 479 NULL, 480 }; 481 482 static struct attribute_group topology_extra_cpu_attr_group = { 483 .attrs = topology_extra_cpu_attrs, 484 }; 485 486 int topology_cpu_init(struct cpu *cpu) 487 { 488 int rc; 489 490 rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); 491 if (rc || !MACHINE_HAS_TOPOLOGY) 492 return rc; 493 rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group); 494 if (rc) 495 sysfs_remove_group(&cpu->dev.kobj, &topology_cpu_attr_group); 496 return rc; 497 } 498 499 static const struct cpumask *cpu_thread_mask(int cpu) 500 { 501 return &cpu_topology[cpu].thread_mask; 502 } 503 504 505 const struct cpumask *cpu_coregroup_mask(int cpu) 506 { 507 return &cpu_topology[cpu].core_mask; 508 } 509 510 static const struct cpumask *cpu_book_mask(int cpu) 511 { 512 return &cpu_topology[cpu].book_mask; 513 } 514 515 static const struct cpumask *cpu_drawer_mask(int cpu) 516 { 517 return &cpu_topology[cpu].drawer_mask; 518 } 519 520 static struct sched_domain_topology_level s390_topology[] = { 521 { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, 522 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, 523 { cpu_book_mask, SD_INIT_NAME(BOOK) }, 524 { cpu_drawer_mask, SD_INIT_NAME(DRAWER) }, 525 { cpu_cpu_mask, SD_INIT_NAME(PKG) }, 526 { NULL, }, 527 }; 528 529 static void __init alloc_masks(struct sysinfo_15_1_x *info, 530 struct mask_info *mask, int offset) 531 { 532 int i, nr_masks; 533 534 nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; 535 for (i = 0; i < info->mnest - offset; i++) 536 nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; 537 nr_masks = max(nr_masks, 1); 538 for (i = 0; i < nr_masks; i++) { 539 mask->next = memblock_alloc(sizeof(*mask->next), 8); 540 if (!mask->next) 541 panic("%s: Failed to allocate %zu bytes align=0x%x\n", 542 __func__, sizeof(*mask->next), 8); 543 mask = mask->next; 544 } 545 } 546 547 void __init topology_init_early(void) 548 { 549 struct sysinfo_15_1_x *info; 550 551 set_sched_topology(s390_topology); 552 if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) { 553 if (MACHINE_HAS_TOPOLOGY) 554 topology_mode = TOPOLOGY_MODE_HW; 555 else 556 topology_mode = TOPOLOGY_MODE_SINGLE; 557 } 558 if (!MACHINE_HAS_TOPOLOGY) 559 goto out; 560 tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 561 if (!tl_info) 562 panic("%s: Failed to allocate %lu bytes align=0x%lx\n", 563 __func__, PAGE_SIZE, PAGE_SIZE); 564 info = tl_info; 565 store_topology(info); 566 pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n", 567 info->mag[0], info->mag[1], info->mag[2], info->mag[3], 568 info->mag[4], info->mag[5], info->mnest); 569 alloc_masks(info, &socket_info, 1); 570 alloc_masks(info, &book_info, 2); 571 alloc_masks(info, &drawer_info, 3); 572 out: 573 cpumask_set_cpu(0, &cpu_setup_mask); 574 __arch_update_cpu_topology(); 575 __arch_update_dedicated_flag(NULL); 576 } 577 578 static inline int topology_get_mode(int enabled) 579 { 580 if (!enabled) 581 return TOPOLOGY_MODE_SINGLE; 582 return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; 583 } 584 585 static inline int topology_is_enabled(void) 586 { 587 return topology_mode != TOPOLOGY_MODE_SINGLE; 588 } 589 590 static int __init topology_setup(char *str) 591 { 592 bool enabled; 593 int rc; 594 595 rc = kstrtobool(str, &enabled); 596 if (rc) 597 return rc; 598 topology_mode = topology_get_mode(enabled); 599 return 0; 600 } 601 early_param("topology", topology_setup); 602 603 static int topology_ctl_handler(struct ctl_table *ctl, int write, 604 void *buffer, size_t *lenp, loff_t *ppos) 605 { 606 int enabled = topology_is_enabled(); 607 int new_mode; 608 int rc; 609 struct ctl_table ctl_entry = { 610 .procname = ctl->procname, 611 .data = &enabled, 612 .maxlen = sizeof(int), 613 .extra1 = SYSCTL_ZERO, 614 .extra2 = SYSCTL_ONE, 615 }; 616 617 rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); 618 if (rc < 0 || !write) 619 return rc; 620 621 mutex_lock(&smp_cpu_state_mutex); 622 new_mode = topology_get_mode(enabled); 623 if (topology_mode != new_mode) { 624 topology_mode = new_mode; 625 topology_schedule_update(); 626 } 627 mutex_unlock(&smp_cpu_state_mutex); 628 topology_flush_work(); 629 630 return rc; 631 } 632 633 static struct ctl_table topology_ctl_table[] = { 634 { 635 .procname = "topology", 636 .mode = 0644, 637 .proc_handler = topology_ctl_handler, 638 }, 639 }; 640 641 static int __init topology_init(void) 642 { 643 struct device *dev_root; 644 int rc = 0; 645 646 timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE); 647 if (MACHINE_HAS_TOPOLOGY) 648 set_topology_timer(); 649 else 650 topology_update_polarization_simple(); 651 register_sysctl("s390", topology_ctl_table); 652 653 dev_root = bus_get_dev_root(&cpu_subsys); 654 if (dev_root) { 655 rc = device_create_file(dev_root, &dev_attr_dispatching); 656 put_device(dev_root); 657 } 658 return rc; 659 } 660 device_initcall(topology_init); 661