1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2007, 2011 4 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> 5 */ 6 7 #define KMSG_COMPONENT "cpu" 8 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 9 10 #include <linux/workqueue.h> 11 #include <linux/memblock.h> 12 #include <linux/uaccess.h> 13 #include <linux/sysctl.h> 14 #include <linux/cpuset.h> 15 #include <linux/device.h> 16 #include <linux/export.h> 17 #include <linux/kernel.h> 18 #include <linux/sched.h> 19 #include <linux/sched/topology.h> 20 #include <linux/delay.h> 21 #include <linux/init.h> 22 #include <linux/slab.h> 23 #include <linux/cpu.h> 24 #include <linux/smp.h> 25 #include <linux/mm.h> 26 #include <linux/nodemask.h> 27 #include <linux/node.h> 28 #include <asm/sysinfo.h> 29 30 #define PTF_HORIZONTAL (0UL) 31 #define PTF_VERTICAL (1UL) 32 #define PTF_CHECK (2UL) 33 34 enum { 35 TOPOLOGY_MODE_HW, 36 TOPOLOGY_MODE_SINGLE, 37 TOPOLOGY_MODE_PACKAGE, 38 TOPOLOGY_MODE_UNINITIALIZED 39 }; 40 41 struct mask_info { 42 struct mask_info *next; 43 unsigned char id; 44 cpumask_t mask; 45 }; 46 47 static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED; 48 static void set_topology_timer(void); 49 static void topology_work_fn(struct work_struct *work); 50 static struct sysinfo_15_1_x *tl_info; 51 52 static DECLARE_WORK(topology_work, topology_work_fn); 53 54 /* 55 * Socket/Book linked lists and cpu_topology updates are 56 * protected by "sched_domains_mutex". 57 */ 58 static struct mask_info socket_info; 59 static struct mask_info book_info; 60 static struct mask_info drawer_info; 61 62 struct cpu_topology_s390 cpu_topology[NR_CPUS]; 63 EXPORT_SYMBOL_GPL(cpu_topology); 64 65 static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int cpu) 66 { 67 static cpumask_t mask; 68 69 cpumask_copy(&mask, cpumask_of(cpu)); 70 switch (topology_mode) { 71 case TOPOLOGY_MODE_HW: 72 while (info) { 73 if (cpumask_test_cpu(cpu, &info->mask)) { 74 cpumask_copy(&mask, &info->mask); 75 break; 76 } 77 info = info->next; 78 } 79 if (cpumask_empty(&mask)) 80 cpumask_copy(&mask, cpumask_of(cpu)); 81 break; 82 case TOPOLOGY_MODE_PACKAGE: 83 cpumask_copy(&mask, cpu_present_mask); 84 break; 85 default: 86 fallthrough; 87 case TOPOLOGY_MODE_SINGLE: 88 cpumask_copy(&mask, cpumask_of(cpu)); 89 break; 90 } 91 cpumask_and(&mask, &mask, cpu_online_mask); 92 cpumask_copy(dst, &mask); 93 } 94 95 static void cpu_thread_map(cpumask_t *dst, unsigned int cpu) 96 { 97 static cpumask_t mask; 98 int i; 99 100 cpumask_copy(&mask, cpumask_of(cpu)); 101 if (topology_mode != TOPOLOGY_MODE_HW) 102 goto out; 103 cpu -= cpu % (smp_cpu_mtid + 1); 104 for (i = 0; i <= smp_cpu_mtid; i++) 105 if (cpu_present(cpu + i)) 106 cpumask_set_cpu(cpu + i, &mask); 107 cpumask_and(&mask, &mask, cpu_online_mask); 108 out: 109 cpumask_copy(dst, &mask); 110 } 111 112 #define TOPOLOGY_CORE_BITS 64 113 114 static void add_cpus_to_mask(struct topology_core *tl_core, 115 struct mask_info *drawer, 116 struct mask_info *book, 117 struct mask_info *socket) 118 { 119 struct cpu_topology_s390 *topo; 120 unsigned int core; 121 122 for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) { 123 unsigned int rcore; 124 int lcpu, i; 125 126 rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin; 127 lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); 128 if (lcpu < 0) 129 continue; 130 for (i = 0; i <= smp_cpu_mtid; i++) { 131 topo = &cpu_topology[lcpu + i]; 132 topo->drawer_id = drawer->id; 133 topo->book_id = book->id; 134 topo->socket_id = socket->id; 135 topo->core_id = rcore; 136 topo->thread_id = lcpu + i; 137 topo->dedicated = tl_core->d; 138 cpumask_set_cpu(lcpu + i, &drawer->mask); 139 cpumask_set_cpu(lcpu + i, &book->mask); 140 cpumask_set_cpu(lcpu + i, &socket->mask); 141 smp_cpu_set_polarization(lcpu + i, tl_core->pp); 142 } 143 } 144 } 145 146 static void clear_masks(void) 147 { 148 struct mask_info *info; 149 150 info = &socket_info; 151 while (info) { 152 cpumask_clear(&info->mask); 153 info = info->next; 154 } 155 info = &book_info; 156 while (info) { 157 cpumask_clear(&info->mask); 158 info = info->next; 159 } 160 info = &drawer_info; 161 while (info) { 162 cpumask_clear(&info->mask); 163 info = info->next; 164 } 165 } 166 167 static union topology_entry *next_tle(union topology_entry *tle) 168 { 169 if (!tle->nl) 170 return (union topology_entry *)((struct topology_core *)tle + 1); 171 return (union topology_entry *)((struct topology_container *)tle + 1); 172 } 173 174 static void tl_to_masks(struct sysinfo_15_1_x *info) 175 { 176 struct mask_info *socket = &socket_info; 177 struct mask_info *book = &book_info; 178 struct mask_info *drawer = &drawer_info; 179 union topology_entry *tle, *end; 180 181 clear_masks(); 182 tle = info->tle; 183 end = (union topology_entry *)((unsigned long)info + info->length); 184 while (tle < end) { 185 switch (tle->nl) { 186 case 3: 187 drawer = drawer->next; 188 drawer->id = tle->container.id; 189 break; 190 case 2: 191 book = book->next; 192 book->id = tle->container.id; 193 break; 194 case 1: 195 socket = socket->next; 196 socket->id = tle->container.id; 197 break; 198 case 0: 199 add_cpus_to_mask(&tle->cpu, drawer, book, socket); 200 break; 201 default: 202 clear_masks(); 203 return; 204 } 205 tle = next_tle(tle); 206 } 207 } 208 209 static void topology_update_polarization_simple(void) 210 { 211 int cpu; 212 213 for_each_possible_cpu(cpu) 214 smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); 215 } 216 217 static int ptf(unsigned long fc) 218 { 219 int rc; 220 221 asm volatile( 222 " .insn rre,0xb9a20000,%1,%1\n" 223 " ipm %0\n" 224 " srl %0,28\n" 225 : "=d" (rc) 226 : "d" (fc) : "cc"); 227 return rc; 228 } 229 230 int topology_set_cpu_management(int fc) 231 { 232 int cpu, rc; 233 234 if (!MACHINE_HAS_TOPOLOGY) 235 return -EOPNOTSUPP; 236 if (fc) 237 rc = ptf(PTF_VERTICAL); 238 else 239 rc = ptf(PTF_HORIZONTAL); 240 if (rc) 241 return -EBUSY; 242 for_each_possible_cpu(cpu) 243 smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); 244 return rc; 245 } 246 247 void update_cpu_masks(void) 248 { 249 struct cpu_topology_s390 *topo, *topo_package, *topo_sibling; 250 int cpu, sibling, pkg_first, smt_first, id; 251 252 for_each_possible_cpu(cpu) { 253 topo = &cpu_topology[cpu]; 254 cpu_thread_map(&topo->thread_mask, cpu); 255 cpu_group_map(&topo->core_mask, &socket_info, cpu); 256 cpu_group_map(&topo->book_mask, &book_info, cpu); 257 cpu_group_map(&topo->drawer_mask, &drawer_info, cpu); 258 topo->booted_cores = 0; 259 if (topology_mode != TOPOLOGY_MODE_HW) { 260 id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu; 261 topo->thread_id = cpu; 262 topo->core_id = cpu; 263 topo->socket_id = id; 264 topo->book_id = id; 265 topo->drawer_id = id; 266 } 267 } 268 for_each_online_cpu(cpu) { 269 topo = &cpu_topology[cpu]; 270 pkg_first = cpumask_first(&topo->core_mask); 271 topo_package = &cpu_topology[pkg_first]; 272 if (cpu == pkg_first) { 273 for_each_cpu(sibling, &topo->core_mask) { 274 topo_sibling = &cpu_topology[sibling]; 275 smt_first = cpumask_first(&topo_sibling->thread_mask); 276 if (sibling == smt_first) 277 topo_package->booted_cores++; 278 } 279 } else { 280 topo->booted_cores = topo_package->booted_cores; 281 } 282 } 283 } 284 285 void store_topology(struct sysinfo_15_1_x *info) 286 { 287 stsi(info, 15, 1, topology_mnest_limit()); 288 } 289 290 static void __arch_update_dedicated_flag(void *arg) 291 { 292 if (topology_cpu_dedicated(smp_processor_id())) 293 set_cpu_flag(CIF_DEDICATED_CPU); 294 else 295 clear_cpu_flag(CIF_DEDICATED_CPU); 296 } 297 298 static int __arch_update_cpu_topology(void) 299 { 300 struct sysinfo_15_1_x *info = tl_info; 301 int rc = 0; 302 303 mutex_lock(&smp_cpu_state_mutex); 304 if (MACHINE_HAS_TOPOLOGY) { 305 rc = 1; 306 store_topology(info); 307 tl_to_masks(info); 308 } 309 update_cpu_masks(); 310 if (!MACHINE_HAS_TOPOLOGY) 311 topology_update_polarization_simple(); 312 mutex_unlock(&smp_cpu_state_mutex); 313 return rc; 314 } 315 316 int arch_update_cpu_topology(void) 317 { 318 struct device *dev; 319 int cpu, rc; 320 321 rc = __arch_update_cpu_topology(); 322 on_each_cpu(__arch_update_dedicated_flag, NULL, 0); 323 for_each_online_cpu(cpu) { 324 dev = get_cpu_device(cpu); 325 if (dev) 326 kobject_uevent(&dev->kobj, KOBJ_CHANGE); 327 } 328 return rc; 329 } 330 331 static void topology_work_fn(struct work_struct *work) 332 { 333 rebuild_sched_domains(); 334 } 335 336 void topology_schedule_update(void) 337 { 338 schedule_work(&topology_work); 339 } 340 341 static void topology_flush_work(void) 342 { 343 flush_work(&topology_work); 344 } 345 346 static void topology_timer_fn(struct timer_list *unused) 347 { 348 if (ptf(PTF_CHECK)) 349 topology_schedule_update(); 350 set_topology_timer(); 351 } 352 353 static struct timer_list topology_timer; 354 355 static atomic_t topology_poll = ATOMIC_INIT(0); 356 357 static void set_topology_timer(void) 358 { 359 if (atomic_add_unless(&topology_poll, -1, 0)) 360 mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100)); 361 else 362 mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC)); 363 } 364 365 void topology_expect_change(void) 366 { 367 if (!MACHINE_HAS_TOPOLOGY) 368 return; 369 /* This is racy, but it doesn't matter since it is just a heuristic. 370 * Worst case is that we poll in a higher frequency for a bit longer. 371 */ 372 if (atomic_read(&topology_poll) > 60) 373 return; 374 atomic_add(60, &topology_poll); 375 set_topology_timer(); 376 } 377 378 static int cpu_management; 379 380 static ssize_t dispatching_show(struct device *dev, 381 struct device_attribute *attr, 382 char *buf) 383 { 384 ssize_t count; 385 386 mutex_lock(&smp_cpu_state_mutex); 387 count = sprintf(buf, "%d\n", cpu_management); 388 mutex_unlock(&smp_cpu_state_mutex); 389 return count; 390 } 391 392 static ssize_t dispatching_store(struct device *dev, 393 struct device_attribute *attr, 394 const char *buf, 395 size_t count) 396 { 397 int val, rc; 398 char delim; 399 400 if (sscanf(buf, "%d %c", &val, &delim) != 1) 401 return -EINVAL; 402 if (val != 0 && val != 1) 403 return -EINVAL; 404 rc = 0; 405 get_online_cpus(); 406 mutex_lock(&smp_cpu_state_mutex); 407 if (cpu_management == val) 408 goto out; 409 rc = topology_set_cpu_management(val); 410 if (rc) 411 goto out; 412 cpu_management = val; 413 topology_expect_change(); 414 out: 415 mutex_unlock(&smp_cpu_state_mutex); 416 put_online_cpus(); 417 return rc ? rc : count; 418 } 419 static DEVICE_ATTR_RW(dispatching); 420 421 static ssize_t cpu_polarization_show(struct device *dev, 422 struct device_attribute *attr, char *buf) 423 { 424 int cpu = dev->id; 425 ssize_t count; 426 427 mutex_lock(&smp_cpu_state_mutex); 428 switch (smp_cpu_get_polarization(cpu)) { 429 case POLARIZATION_HRZ: 430 count = sprintf(buf, "horizontal\n"); 431 break; 432 case POLARIZATION_VL: 433 count = sprintf(buf, "vertical:low\n"); 434 break; 435 case POLARIZATION_VM: 436 count = sprintf(buf, "vertical:medium\n"); 437 break; 438 case POLARIZATION_VH: 439 count = sprintf(buf, "vertical:high\n"); 440 break; 441 default: 442 count = sprintf(buf, "unknown\n"); 443 break; 444 } 445 mutex_unlock(&smp_cpu_state_mutex); 446 return count; 447 } 448 static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); 449 450 static struct attribute *topology_cpu_attrs[] = { 451 &dev_attr_polarization.attr, 452 NULL, 453 }; 454 455 static struct attribute_group topology_cpu_attr_group = { 456 .attrs = topology_cpu_attrs, 457 }; 458 459 static ssize_t cpu_dedicated_show(struct device *dev, 460 struct device_attribute *attr, char *buf) 461 { 462 int cpu = dev->id; 463 ssize_t count; 464 465 mutex_lock(&smp_cpu_state_mutex); 466 count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu)); 467 mutex_unlock(&smp_cpu_state_mutex); 468 return count; 469 } 470 static DEVICE_ATTR(dedicated, 0444, cpu_dedicated_show, NULL); 471 472 static struct attribute *topology_extra_cpu_attrs[] = { 473 &dev_attr_dedicated.attr, 474 NULL, 475 }; 476 477 static struct attribute_group topology_extra_cpu_attr_group = { 478 .attrs = topology_extra_cpu_attrs, 479 }; 480 481 int topology_cpu_init(struct cpu *cpu) 482 { 483 int rc; 484 485 rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); 486 if (rc || !MACHINE_HAS_TOPOLOGY) 487 return rc; 488 rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group); 489 if (rc) 490 sysfs_remove_group(&cpu->dev.kobj, &topology_cpu_attr_group); 491 return rc; 492 } 493 494 static const struct cpumask *cpu_thread_mask(int cpu) 495 { 496 return &cpu_topology[cpu].thread_mask; 497 } 498 499 500 const struct cpumask *cpu_coregroup_mask(int cpu) 501 { 502 return &cpu_topology[cpu].core_mask; 503 } 504 505 static const struct cpumask *cpu_book_mask(int cpu) 506 { 507 return &cpu_topology[cpu].book_mask; 508 } 509 510 static const struct cpumask *cpu_drawer_mask(int cpu) 511 { 512 return &cpu_topology[cpu].drawer_mask; 513 } 514 515 static struct sched_domain_topology_level s390_topology[] = { 516 { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, 517 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, 518 { cpu_book_mask, SD_INIT_NAME(BOOK) }, 519 { cpu_drawer_mask, SD_INIT_NAME(DRAWER) }, 520 { cpu_cpu_mask, SD_INIT_NAME(DIE) }, 521 { NULL, }, 522 }; 523 524 static void __init alloc_masks(struct sysinfo_15_1_x *info, 525 struct mask_info *mask, int offset) 526 { 527 int i, nr_masks; 528 529 nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; 530 for (i = 0; i < info->mnest - offset; i++) 531 nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; 532 nr_masks = max(nr_masks, 1); 533 for (i = 0; i < nr_masks; i++) { 534 mask->next = memblock_alloc(sizeof(*mask->next), 8); 535 if (!mask->next) 536 panic("%s: Failed to allocate %zu bytes align=0x%x\n", 537 __func__, sizeof(*mask->next), 8); 538 mask = mask->next; 539 } 540 } 541 542 void __init topology_init_early(void) 543 { 544 struct sysinfo_15_1_x *info; 545 546 set_sched_topology(s390_topology); 547 if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) { 548 if (MACHINE_HAS_TOPOLOGY) 549 topology_mode = TOPOLOGY_MODE_HW; 550 else 551 topology_mode = TOPOLOGY_MODE_SINGLE; 552 } 553 if (!MACHINE_HAS_TOPOLOGY) 554 goto out; 555 tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 556 if (!tl_info) 557 panic("%s: Failed to allocate %lu bytes align=0x%lx\n", 558 __func__, PAGE_SIZE, PAGE_SIZE); 559 info = tl_info; 560 store_topology(info); 561 pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n", 562 info->mag[0], info->mag[1], info->mag[2], info->mag[3], 563 info->mag[4], info->mag[5], info->mnest); 564 alloc_masks(info, &socket_info, 1); 565 alloc_masks(info, &book_info, 2); 566 alloc_masks(info, &drawer_info, 3); 567 out: 568 __arch_update_cpu_topology(); 569 __arch_update_dedicated_flag(NULL); 570 } 571 572 static inline int topology_get_mode(int enabled) 573 { 574 if (!enabled) 575 return TOPOLOGY_MODE_SINGLE; 576 return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; 577 } 578 579 static inline int topology_is_enabled(void) 580 { 581 return topology_mode != TOPOLOGY_MODE_SINGLE; 582 } 583 584 static int __init topology_setup(char *str) 585 { 586 bool enabled; 587 int rc; 588 589 rc = kstrtobool(str, &enabled); 590 if (rc) 591 return rc; 592 topology_mode = topology_get_mode(enabled); 593 return 0; 594 } 595 early_param("topology", topology_setup); 596 597 static int topology_ctl_handler(struct ctl_table *ctl, int write, 598 void *buffer, size_t *lenp, loff_t *ppos) 599 { 600 int enabled = topology_is_enabled(); 601 int new_mode; 602 int rc; 603 struct ctl_table ctl_entry = { 604 .procname = ctl->procname, 605 .data = &enabled, 606 .maxlen = sizeof(int), 607 .extra1 = SYSCTL_ZERO, 608 .extra2 = SYSCTL_ONE, 609 }; 610 611 rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); 612 if (rc < 0 || !write) 613 return rc; 614 615 mutex_lock(&smp_cpu_state_mutex); 616 new_mode = topology_get_mode(enabled); 617 if (topology_mode != new_mode) { 618 topology_mode = new_mode; 619 topology_schedule_update(); 620 } 621 mutex_unlock(&smp_cpu_state_mutex); 622 topology_flush_work(); 623 624 return rc; 625 } 626 627 static struct ctl_table topology_ctl_table[] = { 628 { 629 .procname = "topology", 630 .mode = 0644, 631 .proc_handler = topology_ctl_handler, 632 }, 633 { }, 634 }; 635 636 static struct ctl_table topology_dir_table[] = { 637 { 638 .procname = "s390", 639 .maxlen = 0, 640 .mode = 0555, 641 .child = topology_ctl_table, 642 }, 643 { }, 644 }; 645 646 static int __init topology_init(void) 647 { 648 timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE); 649 if (MACHINE_HAS_TOPOLOGY) 650 set_topology_timer(); 651 else 652 topology_update_polarization_simple(); 653 register_sysctl_table(topology_dir_table); 654 return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); 655 } 656 device_initcall(topology_init); 657