1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2007, 2011 4 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> 5 */ 6 7 #define KMSG_COMPONENT "cpu" 8 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 9 10 #include <linux/workqueue.h> 11 #include <linux/memblock.h> 12 #include <linux/uaccess.h> 13 #include <linux/sysctl.h> 14 #include <linux/cpuset.h> 15 #include <linux/device.h> 16 #include <linux/export.h> 17 #include <linux/kernel.h> 18 #include <linux/sched.h> 19 #include <linux/sched/topology.h> 20 #include <linux/delay.h> 21 #include <linux/init.h> 22 #include <linux/slab.h> 23 #include <linux/cpu.h> 24 #include <linux/smp.h> 25 #include <linux/mm.h> 26 #include <linux/nodemask.h> 27 #include <linux/node.h> 28 #include <asm/sysinfo.h> 29 30 #define PTF_HORIZONTAL (0UL) 31 #define PTF_VERTICAL (1UL) 32 #define PTF_CHECK (2UL) 33 34 enum { 35 TOPOLOGY_MODE_HW, 36 TOPOLOGY_MODE_SINGLE, 37 TOPOLOGY_MODE_PACKAGE, 38 TOPOLOGY_MODE_UNINITIALIZED 39 }; 40 41 struct mask_info { 42 struct mask_info *next; 43 unsigned char id; 44 cpumask_t mask; 45 }; 46 47 static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED; 48 static void set_topology_timer(void); 49 static void topology_work_fn(struct work_struct *work); 50 static struct sysinfo_15_1_x *tl_info; 51 52 static DECLARE_WORK(topology_work, topology_work_fn); 53 54 /* 55 * Socket/Book linked lists and cpu_topology updates are 56 * protected by "sched_domains_mutex". 57 */ 58 static struct mask_info socket_info; 59 static struct mask_info book_info; 60 static struct mask_info drawer_info; 61 62 struct cpu_topology_s390 cpu_topology[NR_CPUS]; 63 EXPORT_SYMBOL_GPL(cpu_topology); 64 65 static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int cpu) 66 { 67 static cpumask_t mask; 68 69 cpumask_copy(&mask, cpumask_of(cpu)); 70 switch (topology_mode) { 71 case TOPOLOGY_MODE_HW: 72 while (info) { 73 if (cpumask_test_cpu(cpu, &info->mask)) { 74 cpumask_copy(&mask, &info->mask); 75 break; 76 } 77 info = info->next; 78 } 79 break; 80 case TOPOLOGY_MODE_PACKAGE: 81 cpumask_copy(&mask, cpu_present_mask); 82 break; 83 default: 84 fallthrough; 85 case TOPOLOGY_MODE_SINGLE: 86 cpumask_copy(&mask, cpumask_of(cpu)); 87 break; 88 } 89 cpumask_and(&mask, &mask, cpu_online_mask); 90 cpumask_copy(dst, &mask); 91 } 92 93 static void cpu_thread_map(cpumask_t *dst, unsigned int cpu) 94 { 95 static cpumask_t mask; 96 int i; 97 98 cpumask_copy(&mask, cpumask_of(cpu)); 99 if (topology_mode != TOPOLOGY_MODE_HW) 100 goto out; 101 cpu -= cpu % (smp_cpu_mtid + 1); 102 for (i = 0; i <= smp_cpu_mtid; i++) 103 if (cpu_present(cpu + i)) 104 cpumask_set_cpu(cpu + i, &mask); 105 cpumask_and(&mask, &mask, cpu_online_mask); 106 out: 107 cpumask_copy(dst, &mask); 108 } 109 110 #define TOPOLOGY_CORE_BITS 64 111 112 static void add_cpus_to_mask(struct topology_core *tl_core, 113 struct mask_info *drawer, 114 struct mask_info *book, 115 struct mask_info *socket) 116 { 117 struct cpu_topology_s390 *topo; 118 unsigned int core; 119 120 for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) { 121 unsigned int rcore; 122 int lcpu, i; 123 124 rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin; 125 lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); 126 if (lcpu < 0) 127 continue; 128 for (i = 0; i <= smp_cpu_mtid; i++) { 129 topo = &cpu_topology[lcpu + i]; 130 topo->drawer_id = drawer->id; 131 topo->book_id = book->id; 132 topo->socket_id = socket->id; 133 topo->core_id = rcore; 134 topo->thread_id = lcpu + i; 135 topo->dedicated = tl_core->d; 136 cpumask_set_cpu(lcpu + i, &drawer->mask); 137 cpumask_set_cpu(lcpu + i, &book->mask); 138 cpumask_set_cpu(lcpu + i, &socket->mask); 139 smp_cpu_set_polarization(lcpu + i, tl_core->pp); 140 } 141 } 142 } 143 144 static void clear_masks(void) 145 { 146 struct mask_info *info; 147 148 info = &socket_info; 149 while (info) { 150 cpumask_clear(&info->mask); 151 info = info->next; 152 } 153 info = &book_info; 154 while (info) { 155 cpumask_clear(&info->mask); 156 info = info->next; 157 } 158 info = &drawer_info; 159 while (info) { 160 cpumask_clear(&info->mask); 161 info = info->next; 162 } 163 } 164 165 static union topology_entry *next_tle(union topology_entry *tle) 166 { 167 if (!tle->nl) 168 return (union topology_entry *)((struct topology_core *)tle + 1); 169 return (union topology_entry *)((struct topology_container *)tle + 1); 170 } 171 172 static void tl_to_masks(struct sysinfo_15_1_x *info) 173 { 174 struct mask_info *socket = &socket_info; 175 struct mask_info *book = &book_info; 176 struct mask_info *drawer = &drawer_info; 177 union topology_entry *tle, *end; 178 179 clear_masks(); 180 tle = info->tle; 181 end = (union topology_entry *)((unsigned long)info + info->length); 182 while (tle < end) { 183 switch (tle->nl) { 184 case 3: 185 drawer = drawer->next; 186 drawer->id = tle->container.id; 187 break; 188 case 2: 189 book = book->next; 190 book->id = tle->container.id; 191 break; 192 case 1: 193 socket = socket->next; 194 socket->id = tle->container.id; 195 break; 196 case 0: 197 add_cpus_to_mask(&tle->cpu, drawer, book, socket); 198 break; 199 default: 200 clear_masks(); 201 return; 202 } 203 tle = next_tle(tle); 204 } 205 } 206 207 static void topology_update_polarization_simple(void) 208 { 209 int cpu; 210 211 for_each_possible_cpu(cpu) 212 smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); 213 } 214 215 static int ptf(unsigned long fc) 216 { 217 int rc; 218 219 asm volatile( 220 " .insn rre,0xb9a20000,%1,%1\n" 221 " ipm %0\n" 222 " srl %0,28\n" 223 : "=d" (rc) 224 : "d" (fc) : "cc"); 225 return rc; 226 } 227 228 int topology_set_cpu_management(int fc) 229 { 230 int cpu, rc; 231 232 if (!MACHINE_HAS_TOPOLOGY) 233 return -EOPNOTSUPP; 234 if (fc) 235 rc = ptf(PTF_VERTICAL); 236 else 237 rc = ptf(PTF_HORIZONTAL); 238 if (rc) 239 return -EBUSY; 240 for_each_possible_cpu(cpu) 241 smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); 242 return rc; 243 } 244 245 void update_cpu_masks(void) 246 { 247 struct cpu_topology_s390 *topo, *topo_package, *topo_sibling; 248 int cpu, sibling, pkg_first, smt_first, id; 249 250 for_each_possible_cpu(cpu) { 251 topo = &cpu_topology[cpu]; 252 cpu_thread_map(&topo->thread_mask, cpu); 253 cpu_group_map(&topo->core_mask, &socket_info, cpu); 254 cpu_group_map(&topo->book_mask, &book_info, cpu); 255 cpu_group_map(&topo->drawer_mask, &drawer_info, cpu); 256 topo->booted_cores = 0; 257 if (topology_mode != TOPOLOGY_MODE_HW) { 258 id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu; 259 topo->thread_id = cpu; 260 topo->core_id = cpu; 261 topo->socket_id = id; 262 topo->book_id = id; 263 topo->drawer_id = id; 264 } 265 } 266 for_each_online_cpu(cpu) { 267 topo = &cpu_topology[cpu]; 268 pkg_first = cpumask_first(&topo->core_mask); 269 topo_package = &cpu_topology[pkg_first]; 270 if (cpu == pkg_first) { 271 for_each_cpu(sibling, &topo->core_mask) { 272 topo_sibling = &cpu_topology[sibling]; 273 smt_first = cpumask_first(&topo_sibling->thread_mask); 274 if (sibling == smt_first) 275 topo_package->booted_cores++; 276 } 277 } else { 278 topo->booted_cores = topo_package->booted_cores; 279 } 280 } 281 } 282 283 void store_topology(struct sysinfo_15_1_x *info) 284 { 285 stsi(info, 15, 1, topology_mnest_limit()); 286 } 287 288 static void __arch_update_dedicated_flag(void *arg) 289 { 290 if (topology_cpu_dedicated(smp_processor_id())) 291 set_cpu_flag(CIF_DEDICATED_CPU); 292 else 293 clear_cpu_flag(CIF_DEDICATED_CPU); 294 } 295 296 static int __arch_update_cpu_topology(void) 297 { 298 struct sysinfo_15_1_x *info = tl_info; 299 int rc = 0; 300 301 mutex_lock(&smp_cpu_state_mutex); 302 if (MACHINE_HAS_TOPOLOGY) { 303 rc = 1; 304 store_topology(info); 305 tl_to_masks(info); 306 } 307 update_cpu_masks(); 308 if (!MACHINE_HAS_TOPOLOGY) 309 topology_update_polarization_simple(); 310 mutex_unlock(&smp_cpu_state_mutex); 311 return rc; 312 } 313 314 int arch_update_cpu_topology(void) 315 { 316 struct device *dev; 317 int cpu, rc; 318 319 rc = __arch_update_cpu_topology(); 320 on_each_cpu(__arch_update_dedicated_flag, NULL, 0); 321 for_each_online_cpu(cpu) { 322 dev = get_cpu_device(cpu); 323 if (dev) 324 kobject_uevent(&dev->kobj, KOBJ_CHANGE); 325 } 326 return rc; 327 } 328 329 static void topology_work_fn(struct work_struct *work) 330 { 331 rebuild_sched_domains(); 332 } 333 334 void topology_schedule_update(void) 335 { 336 schedule_work(&topology_work); 337 } 338 339 static void topology_flush_work(void) 340 { 341 flush_work(&topology_work); 342 } 343 344 static void topology_timer_fn(struct timer_list *unused) 345 { 346 if (ptf(PTF_CHECK)) 347 topology_schedule_update(); 348 set_topology_timer(); 349 } 350 351 static struct timer_list topology_timer; 352 353 static atomic_t topology_poll = ATOMIC_INIT(0); 354 355 static void set_topology_timer(void) 356 { 357 if (atomic_add_unless(&topology_poll, -1, 0)) 358 mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100)); 359 else 360 mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC)); 361 } 362 363 void topology_expect_change(void) 364 { 365 if (!MACHINE_HAS_TOPOLOGY) 366 return; 367 /* This is racy, but it doesn't matter since it is just a heuristic. 368 * Worst case is that we poll in a higher frequency for a bit longer. 369 */ 370 if (atomic_read(&topology_poll) > 60) 371 return; 372 atomic_add(60, &topology_poll); 373 set_topology_timer(); 374 } 375 376 static int cpu_management; 377 378 static ssize_t dispatching_show(struct device *dev, 379 struct device_attribute *attr, 380 char *buf) 381 { 382 ssize_t count; 383 384 mutex_lock(&smp_cpu_state_mutex); 385 count = sprintf(buf, "%d\n", cpu_management); 386 mutex_unlock(&smp_cpu_state_mutex); 387 return count; 388 } 389 390 static ssize_t dispatching_store(struct device *dev, 391 struct device_attribute *attr, 392 const char *buf, 393 size_t count) 394 { 395 int val, rc; 396 char delim; 397 398 if (sscanf(buf, "%d %c", &val, &delim) != 1) 399 return -EINVAL; 400 if (val != 0 && val != 1) 401 return -EINVAL; 402 rc = 0; 403 get_online_cpus(); 404 mutex_lock(&smp_cpu_state_mutex); 405 if (cpu_management == val) 406 goto out; 407 rc = topology_set_cpu_management(val); 408 if (rc) 409 goto out; 410 cpu_management = val; 411 topology_expect_change(); 412 out: 413 mutex_unlock(&smp_cpu_state_mutex); 414 put_online_cpus(); 415 return rc ? rc : count; 416 } 417 static DEVICE_ATTR_RW(dispatching); 418 419 static ssize_t cpu_polarization_show(struct device *dev, 420 struct device_attribute *attr, char *buf) 421 { 422 int cpu = dev->id; 423 ssize_t count; 424 425 mutex_lock(&smp_cpu_state_mutex); 426 switch (smp_cpu_get_polarization(cpu)) { 427 case POLARIZATION_HRZ: 428 count = sprintf(buf, "horizontal\n"); 429 break; 430 case POLARIZATION_VL: 431 count = sprintf(buf, "vertical:low\n"); 432 break; 433 case POLARIZATION_VM: 434 count = sprintf(buf, "vertical:medium\n"); 435 break; 436 case POLARIZATION_VH: 437 count = sprintf(buf, "vertical:high\n"); 438 break; 439 default: 440 count = sprintf(buf, "unknown\n"); 441 break; 442 } 443 mutex_unlock(&smp_cpu_state_mutex); 444 return count; 445 } 446 static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); 447 448 static struct attribute *topology_cpu_attrs[] = { 449 &dev_attr_polarization.attr, 450 NULL, 451 }; 452 453 static struct attribute_group topology_cpu_attr_group = { 454 .attrs = topology_cpu_attrs, 455 }; 456 457 static ssize_t cpu_dedicated_show(struct device *dev, 458 struct device_attribute *attr, char *buf) 459 { 460 int cpu = dev->id; 461 ssize_t count; 462 463 mutex_lock(&smp_cpu_state_mutex); 464 count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu)); 465 mutex_unlock(&smp_cpu_state_mutex); 466 return count; 467 } 468 static DEVICE_ATTR(dedicated, 0444, cpu_dedicated_show, NULL); 469 470 static struct attribute *topology_extra_cpu_attrs[] = { 471 &dev_attr_dedicated.attr, 472 NULL, 473 }; 474 475 static struct attribute_group topology_extra_cpu_attr_group = { 476 .attrs = topology_extra_cpu_attrs, 477 }; 478 479 int topology_cpu_init(struct cpu *cpu) 480 { 481 int rc; 482 483 rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); 484 if (rc || !MACHINE_HAS_TOPOLOGY) 485 return rc; 486 rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group); 487 if (rc) 488 sysfs_remove_group(&cpu->dev.kobj, &topology_cpu_attr_group); 489 return rc; 490 } 491 492 static const struct cpumask *cpu_thread_mask(int cpu) 493 { 494 return &cpu_topology[cpu].thread_mask; 495 } 496 497 498 const struct cpumask *cpu_coregroup_mask(int cpu) 499 { 500 return &cpu_topology[cpu].core_mask; 501 } 502 503 static const struct cpumask *cpu_book_mask(int cpu) 504 { 505 return &cpu_topology[cpu].book_mask; 506 } 507 508 static const struct cpumask *cpu_drawer_mask(int cpu) 509 { 510 return &cpu_topology[cpu].drawer_mask; 511 } 512 513 static struct sched_domain_topology_level s390_topology[] = { 514 { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, 515 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, 516 { cpu_book_mask, SD_INIT_NAME(BOOK) }, 517 { cpu_drawer_mask, SD_INIT_NAME(DRAWER) }, 518 { cpu_cpu_mask, SD_INIT_NAME(DIE) }, 519 { NULL, }, 520 }; 521 522 static void __init alloc_masks(struct sysinfo_15_1_x *info, 523 struct mask_info *mask, int offset) 524 { 525 int i, nr_masks; 526 527 nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; 528 for (i = 0; i < info->mnest - offset; i++) 529 nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; 530 nr_masks = max(nr_masks, 1); 531 for (i = 0; i < nr_masks; i++) { 532 mask->next = memblock_alloc(sizeof(*mask->next), 8); 533 if (!mask->next) 534 panic("%s: Failed to allocate %zu bytes align=0x%x\n", 535 __func__, sizeof(*mask->next), 8); 536 mask = mask->next; 537 } 538 } 539 540 void __init topology_init_early(void) 541 { 542 struct sysinfo_15_1_x *info; 543 544 set_sched_topology(s390_topology); 545 if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) { 546 if (MACHINE_HAS_TOPOLOGY) 547 topology_mode = TOPOLOGY_MODE_HW; 548 else 549 topology_mode = TOPOLOGY_MODE_SINGLE; 550 } 551 if (!MACHINE_HAS_TOPOLOGY) 552 goto out; 553 tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 554 if (!tl_info) 555 panic("%s: Failed to allocate %lu bytes align=0x%lx\n", 556 __func__, PAGE_SIZE, PAGE_SIZE); 557 info = tl_info; 558 store_topology(info); 559 pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n", 560 info->mag[0], info->mag[1], info->mag[2], info->mag[3], 561 info->mag[4], info->mag[5], info->mnest); 562 alloc_masks(info, &socket_info, 1); 563 alloc_masks(info, &book_info, 2); 564 alloc_masks(info, &drawer_info, 3); 565 out: 566 __arch_update_cpu_topology(); 567 __arch_update_dedicated_flag(NULL); 568 } 569 570 static inline int topology_get_mode(int enabled) 571 { 572 if (!enabled) 573 return TOPOLOGY_MODE_SINGLE; 574 return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; 575 } 576 577 static inline int topology_is_enabled(void) 578 { 579 return topology_mode != TOPOLOGY_MODE_SINGLE; 580 } 581 582 static int __init topology_setup(char *str) 583 { 584 bool enabled; 585 int rc; 586 587 rc = kstrtobool(str, &enabled); 588 if (rc) 589 return rc; 590 topology_mode = topology_get_mode(enabled); 591 return 0; 592 } 593 early_param("topology", topology_setup); 594 595 static int topology_ctl_handler(struct ctl_table *ctl, int write, 596 void *buffer, size_t *lenp, loff_t *ppos) 597 { 598 int enabled = topology_is_enabled(); 599 int new_mode; 600 int rc; 601 struct ctl_table ctl_entry = { 602 .procname = ctl->procname, 603 .data = &enabled, 604 .maxlen = sizeof(int), 605 .extra1 = SYSCTL_ZERO, 606 .extra2 = SYSCTL_ONE, 607 }; 608 609 rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); 610 if (rc < 0 || !write) 611 return rc; 612 613 mutex_lock(&smp_cpu_state_mutex); 614 new_mode = topology_get_mode(enabled); 615 if (topology_mode != new_mode) { 616 topology_mode = new_mode; 617 topology_schedule_update(); 618 } 619 mutex_unlock(&smp_cpu_state_mutex); 620 topology_flush_work(); 621 622 return rc; 623 } 624 625 static struct ctl_table topology_ctl_table[] = { 626 { 627 .procname = "topology", 628 .mode = 0644, 629 .proc_handler = topology_ctl_handler, 630 }, 631 { }, 632 }; 633 634 static struct ctl_table topology_dir_table[] = { 635 { 636 .procname = "s390", 637 .maxlen = 0, 638 .mode = 0555, 639 .child = topology_ctl_table, 640 }, 641 { }, 642 }; 643 644 static int __init topology_init(void) 645 { 646 timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE); 647 if (MACHINE_HAS_TOPOLOGY) 648 set_topology_timer(); 649 else 650 topology_update_polarization_simple(); 651 register_sysctl_table(topology_dir_table); 652 return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); 653 } 654 device_initcall(topology_init); 655