1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2007, 2011 4 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> 5 */ 6 7 #define KMSG_COMPONENT "cpu" 8 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 9 10 #include <linux/workqueue.h> 11 #include <linux/memblock.h> 12 #include <linux/uaccess.h> 13 #include <linux/sysctl.h> 14 #include <linux/cpuset.h> 15 #include <linux/device.h> 16 #include <linux/export.h> 17 #include <linux/kernel.h> 18 #include <linux/sched.h> 19 #include <linux/sched/topology.h> 20 #include <linux/delay.h> 21 #include <linux/init.h> 22 #include <linux/slab.h> 23 #include <linux/cpu.h> 24 #include <linux/smp.h> 25 #include <linux/mm.h> 26 #include <linux/nodemask.h> 27 #include <linux/node.h> 28 #include <asm/sysinfo.h> 29 30 #define PTF_HORIZONTAL (0UL) 31 #define PTF_VERTICAL (1UL) 32 #define PTF_CHECK (2UL) 33 34 enum { 35 TOPOLOGY_MODE_HW, 36 TOPOLOGY_MODE_SINGLE, 37 TOPOLOGY_MODE_PACKAGE, 38 TOPOLOGY_MODE_UNINITIALIZED 39 }; 40 41 struct mask_info { 42 struct mask_info *next; 43 unsigned char id; 44 cpumask_t mask; 45 }; 46 47 static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED; 48 static void set_topology_timer(void); 49 static void topology_work_fn(struct work_struct *work); 50 static struct sysinfo_15_1_x *tl_info; 51 52 static DECLARE_WORK(topology_work, topology_work_fn); 53 54 /* 55 * Socket/Book linked lists and cpu_topology updates are 56 * protected by "sched_domains_mutex". 57 */ 58 static struct mask_info socket_info; 59 static struct mask_info book_info; 60 static struct mask_info drawer_info; 61 62 struct cpu_topology_s390 cpu_topology[NR_CPUS]; 63 EXPORT_SYMBOL_GPL(cpu_topology); 64 65 static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) 66 { 67 cpumask_t mask; 68 69 cpumask_copy(&mask, cpumask_of(cpu)); 70 switch (topology_mode) { 71 case TOPOLOGY_MODE_HW: 72 while (info) { 73 if (cpumask_test_cpu(cpu, &info->mask)) { 74 mask = info->mask; 75 break; 76 } 77 info = info->next; 78 } 79 if (cpumask_empty(&mask)) 80 cpumask_copy(&mask, cpumask_of(cpu)); 81 break; 82 case TOPOLOGY_MODE_PACKAGE: 83 cpumask_copy(&mask, cpu_present_mask); 84 break; 85 default: 86 fallthrough; 87 case TOPOLOGY_MODE_SINGLE: 88 cpumask_copy(&mask, cpumask_of(cpu)); 89 break; 90 } 91 cpumask_and(&mask, &mask, cpu_online_mask); 92 return mask; 93 } 94 95 static cpumask_t cpu_thread_map(unsigned int cpu) 96 { 97 cpumask_t mask; 98 int i; 99 100 cpumask_copy(&mask, cpumask_of(cpu)); 101 if (topology_mode != TOPOLOGY_MODE_HW) 102 return mask; 103 cpu -= cpu % (smp_cpu_mtid + 1); 104 for (i = 0; i <= smp_cpu_mtid; i++) 105 if (cpu_present(cpu + i)) 106 cpumask_set_cpu(cpu + i, &mask); 107 cpumask_and(&mask, &mask, cpu_online_mask); 108 return mask; 109 } 110 111 #define TOPOLOGY_CORE_BITS 64 112 113 static void add_cpus_to_mask(struct topology_core *tl_core, 114 struct mask_info *drawer, 115 struct mask_info *book, 116 struct mask_info *socket) 117 { 118 struct cpu_topology_s390 *topo; 119 unsigned int core; 120 121 for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) { 122 unsigned int rcore; 123 int lcpu, i; 124 125 rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin; 126 lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); 127 if (lcpu < 0) 128 continue; 129 for (i = 0; i <= smp_cpu_mtid; i++) { 130 topo = &cpu_topology[lcpu + i]; 131 topo->drawer_id = drawer->id; 132 topo->book_id = book->id; 133 topo->socket_id = socket->id; 134 topo->core_id = rcore; 135 topo->thread_id = lcpu + i; 136 topo->dedicated = tl_core->d; 137 cpumask_set_cpu(lcpu + i, &drawer->mask); 138 cpumask_set_cpu(lcpu + i, &book->mask); 139 cpumask_set_cpu(lcpu + i, &socket->mask); 140 smp_cpu_set_polarization(lcpu + i, tl_core->pp); 141 } 142 } 143 } 144 145 static void clear_masks(void) 146 { 147 struct mask_info *info; 148 149 info = &socket_info; 150 while (info) { 151 cpumask_clear(&info->mask); 152 info = info->next; 153 } 154 info = &book_info; 155 while (info) { 156 cpumask_clear(&info->mask); 157 info = info->next; 158 } 159 info = &drawer_info; 160 while (info) { 161 cpumask_clear(&info->mask); 162 info = info->next; 163 } 164 } 165 166 static union topology_entry *next_tle(union topology_entry *tle) 167 { 168 if (!tle->nl) 169 return (union topology_entry *)((struct topology_core *)tle + 1); 170 return (union topology_entry *)((struct topology_container *)tle + 1); 171 } 172 173 static void tl_to_masks(struct sysinfo_15_1_x *info) 174 { 175 struct mask_info *socket = &socket_info; 176 struct mask_info *book = &book_info; 177 struct mask_info *drawer = &drawer_info; 178 union topology_entry *tle, *end; 179 180 clear_masks(); 181 tle = info->tle; 182 end = (union topology_entry *)((unsigned long)info + info->length); 183 while (tle < end) { 184 switch (tle->nl) { 185 case 3: 186 drawer = drawer->next; 187 drawer->id = tle->container.id; 188 break; 189 case 2: 190 book = book->next; 191 book->id = tle->container.id; 192 break; 193 case 1: 194 socket = socket->next; 195 socket->id = tle->container.id; 196 break; 197 case 0: 198 add_cpus_to_mask(&tle->cpu, drawer, book, socket); 199 break; 200 default: 201 clear_masks(); 202 return; 203 } 204 tle = next_tle(tle); 205 } 206 } 207 208 static void topology_update_polarization_simple(void) 209 { 210 int cpu; 211 212 for_each_possible_cpu(cpu) 213 smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); 214 } 215 216 static int ptf(unsigned long fc) 217 { 218 int rc; 219 220 asm volatile( 221 " .insn rre,0xb9a20000,%1,%1\n" 222 " ipm %0\n" 223 " srl %0,28\n" 224 : "=d" (rc) 225 : "d" (fc) : "cc"); 226 return rc; 227 } 228 229 int topology_set_cpu_management(int fc) 230 { 231 int cpu, rc; 232 233 if (!MACHINE_HAS_TOPOLOGY) 234 return -EOPNOTSUPP; 235 if (fc) 236 rc = ptf(PTF_VERTICAL); 237 else 238 rc = ptf(PTF_HORIZONTAL); 239 if (rc) 240 return -EBUSY; 241 for_each_possible_cpu(cpu) 242 smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); 243 return rc; 244 } 245 246 void update_cpu_masks(void) 247 { 248 struct cpu_topology_s390 *topo, *topo_package, *topo_sibling; 249 int cpu, sibling, pkg_first, smt_first, id; 250 251 for_each_possible_cpu(cpu) { 252 topo = &cpu_topology[cpu]; 253 topo->thread_mask = cpu_thread_map(cpu); 254 topo->core_mask = cpu_group_map(&socket_info, cpu); 255 topo->book_mask = cpu_group_map(&book_info, cpu); 256 topo->drawer_mask = cpu_group_map(&drawer_info, cpu); 257 topo->booted_cores = 0; 258 if (topology_mode != TOPOLOGY_MODE_HW) { 259 id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu; 260 topo->thread_id = cpu; 261 topo->core_id = cpu; 262 topo->socket_id = id; 263 topo->book_id = id; 264 topo->drawer_id = id; 265 } 266 } 267 for_each_online_cpu(cpu) { 268 topo = &cpu_topology[cpu]; 269 pkg_first = cpumask_first(&topo->core_mask); 270 topo_package = &cpu_topology[pkg_first]; 271 if (cpu == pkg_first) { 272 for_each_cpu(sibling, &topo->core_mask) { 273 topo_sibling = &cpu_topology[sibling]; 274 smt_first = cpumask_first(&topo_sibling->thread_mask); 275 if (sibling == smt_first) 276 topo_package->booted_cores++; 277 } 278 } else { 279 topo->booted_cores = topo_package->booted_cores; 280 } 281 } 282 } 283 284 void store_topology(struct sysinfo_15_1_x *info) 285 { 286 stsi(info, 15, 1, topology_mnest_limit()); 287 } 288 289 static void __arch_update_dedicated_flag(void *arg) 290 { 291 if (topology_cpu_dedicated(smp_processor_id())) 292 set_cpu_flag(CIF_DEDICATED_CPU); 293 else 294 clear_cpu_flag(CIF_DEDICATED_CPU); 295 } 296 297 static int __arch_update_cpu_topology(void) 298 { 299 struct sysinfo_15_1_x *info = tl_info; 300 int rc = 0; 301 302 mutex_lock(&smp_cpu_state_mutex); 303 if (MACHINE_HAS_TOPOLOGY) { 304 rc = 1; 305 store_topology(info); 306 tl_to_masks(info); 307 } 308 update_cpu_masks(); 309 if (!MACHINE_HAS_TOPOLOGY) 310 topology_update_polarization_simple(); 311 mutex_unlock(&smp_cpu_state_mutex); 312 return rc; 313 } 314 315 int arch_update_cpu_topology(void) 316 { 317 struct device *dev; 318 int cpu, rc; 319 320 rc = __arch_update_cpu_topology(); 321 on_each_cpu(__arch_update_dedicated_flag, NULL, 0); 322 for_each_online_cpu(cpu) { 323 dev = get_cpu_device(cpu); 324 if (dev) 325 kobject_uevent(&dev->kobj, KOBJ_CHANGE); 326 } 327 return rc; 328 } 329 330 static void topology_work_fn(struct work_struct *work) 331 { 332 rebuild_sched_domains(); 333 } 334 335 void topology_schedule_update(void) 336 { 337 schedule_work(&topology_work); 338 } 339 340 static void topology_flush_work(void) 341 { 342 flush_work(&topology_work); 343 } 344 345 static void topology_timer_fn(struct timer_list *unused) 346 { 347 if (ptf(PTF_CHECK)) 348 topology_schedule_update(); 349 set_topology_timer(); 350 } 351 352 static struct timer_list topology_timer; 353 354 static atomic_t topology_poll = ATOMIC_INIT(0); 355 356 static void set_topology_timer(void) 357 { 358 if (atomic_add_unless(&topology_poll, -1, 0)) 359 mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100)); 360 else 361 mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC)); 362 } 363 364 void topology_expect_change(void) 365 { 366 if (!MACHINE_HAS_TOPOLOGY) 367 return; 368 /* This is racy, but it doesn't matter since it is just a heuristic. 369 * Worst case is that we poll in a higher frequency for a bit longer. 370 */ 371 if (atomic_read(&topology_poll) > 60) 372 return; 373 atomic_add(60, &topology_poll); 374 set_topology_timer(); 375 } 376 377 static int cpu_management; 378 379 static ssize_t dispatching_show(struct device *dev, 380 struct device_attribute *attr, 381 char *buf) 382 { 383 ssize_t count; 384 385 mutex_lock(&smp_cpu_state_mutex); 386 count = sprintf(buf, "%d\n", cpu_management); 387 mutex_unlock(&smp_cpu_state_mutex); 388 return count; 389 } 390 391 static ssize_t dispatching_store(struct device *dev, 392 struct device_attribute *attr, 393 const char *buf, 394 size_t count) 395 { 396 int val, rc; 397 char delim; 398 399 if (sscanf(buf, "%d %c", &val, &delim) != 1) 400 return -EINVAL; 401 if (val != 0 && val != 1) 402 return -EINVAL; 403 rc = 0; 404 get_online_cpus(); 405 mutex_lock(&smp_cpu_state_mutex); 406 if (cpu_management == val) 407 goto out; 408 rc = topology_set_cpu_management(val); 409 if (rc) 410 goto out; 411 cpu_management = val; 412 topology_expect_change(); 413 out: 414 mutex_unlock(&smp_cpu_state_mutex); 415 put_online_cpus(); 416 return rc ? rc : count; 417 } 418 static DEVICE_ATTR_RW(dispatching); 419 420 static ssize_t cpu_polarization_show(struct device *dev, 421 struct device_attribute *attr, char *buf) 422 { 423 int cpu = dev->id; 424 ssize_t count; 425 426 mutex_lock(&smp_cpu_state_mutex); 427 switch (smp_cpu_get_polarization(cpu)) { 428 case POLARIZATION_HRZ: 429 count = sprintf(buf, "horizontal\n"); 430 break; 431 case POLARIZATION_VL: 432 count = sprintf(buf, "vertical:low\n"); 433 break; 434 case POLARIZATION_VM: 435 count = sprintf(buf, "vertical:medium\n"); 436 break; 437 case POLARIZATION_VH: 438 count = sprintf(buf, "vertical:high\n"); 439 break; 440 default: 441 count = sprintf(buf, "unknown\n"); 442 break; 443 } 444 mutex_unlock(&smp_cpu_state_mutex); 445 return count; 446 } 447 static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); 448 449 static struct attribute *topology_cpu_attrs[] = { 450 &dev_attr_polarization.attr, 451 NULL, 452 }; 453 454 static struct attribute_group topology_cpu_attr_group = { 455 .attrs = topology_cpu_attrs, 456 }; 457 458 static ssize_t cpu_dedicated_show(struct device *dev, 459 struct device_attribute *attr, char *buf) 460 { 461 int cpu = dev->id; 462 ssize_t count; 463 464 mutex_lock(&smp_cpu_state_mutex); 465 count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu)); 466 mutex_unlock(&smp_cpu_state_mutex); 467 return count; 468 } 469 static DEVICE_ATTR(dedicated, 0444, cpu_dedicated_show, NULL); 470 471 static struct attribute *topology_extra_cpu_attrs[] = { 472 &dev_attr_dedicated.attr, 473 NULL, 474 }; 475 476 static struct attribute_group topology_extra_cpu_attr_group = { 477 .attrs = topology_extra_cpu_attrs, 478 }; 479 480 int topology_cpu_init(struct cpu *cpu) 481 { 482 int rc; 483 484 rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); 485 if (rc || !MACHINE_HAS_TOPOLOGY) 486 return rc; 487 rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group); 488 if (rc) 489 sysfs_remove_group(&cpu->dev.kobj, &topology_cpu_attr_group); 490 return rc; 491 } 492 493 static const struct cpumask *cpu_thread_mask(int cpu) 494 { 495 return &cpu_topology[cpu].thread_mask; 496 } 497 498 499 const struct cpumask *cpu_coregroup_mask(int cpu) 500 { 501 return &cpu_topology[cpu].core_mask; 502 } 503 504 static const struct cpumask *cpu_book_mask(int cpu) 505 { 506 return &cpu_topology[cpu].book_mask; 507 } 508 509 static const struct cpumask *cpu_drawer_mask(int cpu) 510 { 511 return &cpu_topology[cpu].drawer_mask; 512 } 513 514 static struct sched_domain_topology_level s390_topology[] = { 515 { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, 516 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, 517 { cpu_book_mask, SD_INIT_NAME(BOOK) }, 518 { cpu_drawer_mask, SD_INIT_NAME(DRAWER) }, 519 { cpu_cpu_mask, SD_INIT_NAME(DIE) }, 520 { NULL, }, 521 }; 522 523 static void __init alloc_masks(struct sysinfo_15_1_x *info, 524 struct mask_info *mask, int offset) 525 { 526 int i, nr_masks; 527 528 nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; 529 for (i = 0; i < info->mnest - offset; i++) 530 nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; 531 nr_masks = max(nr_masks, 1); 532 for (i = 0; i < nr_masks; i++) { 533 mask->next = memblock_alloc(sizeof(*mask->next), 8); 534 if (!mask->next) 535 panic("%s: Failed to allocate %zu bytes align=0x%x\n", 536 __func__, sizeof(*mask->next), 8); 537 mask = mask->next; 538 } 539 } 540 541 void __init topology_init_early(void) 542 { 543 struct sysinfo_15_1_x *info; 544 545 set_sched_topology(s390_topology); 546 if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) { 547 if (MACHINE_HAS_TOPOLOGY) 548 topology_mode = TOPOLOGY_MODE_HW; 549 else 550 topology_mode = TOPOLOGY_MODE_SINGLE; 551 } 552 if (!MACHINE_HAS_TOPOLOGY) 553 goto out; 554 tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 555 if (!tl_info) 556 panic("%s: Failed to allocate %lu bytes align=0x%lx\n", 557 __func__, PAGE_SIZE, PAGE_SIZE); 558 info = tl_info; 559 store_topology(info); 560 pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n", 561 info->mag[0], info->mag[1], info->mag[2], info->mag[3], 562 info->mag[4], info->mag[5], info->mnest); 563 alloc_masks(info, &socket_info, 1); 564 alloc_masks(info, &book_info, 2); 565 alloc_masks(info, &drawer_info, 3); 566 out: 567 __arch_update_cpu_topology(); 568 __arch_update_dedicated_flag(NULL); 569 } 570 571 static inline int topology_get_mode(int enabled) 572 { 573 if (!enabled) 574 return TOPOLOGY_MODE_SINGLE; 575 return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; 576 } 577 578 static inline int topology_is_enabled(void) 579 { 580 return topology_mode != TOPOLOGY_MODE_SINGLE; 581 } 582 583 static int __init topology_setup(char *str) 584 { 585 bool enabled; 586 int rc; 587 588 rc = kstrtobool(str, &enabled); 589 if (rc) 590 return rc; 591 topology_mode = topology_get_mode(enabled); 592 return 0; 593 } 594 early_param("topology", topology_setup); 595 596 static int topology_ctl_handler(struct ctl_table *ctl, int write, 597 void *buffer, size_t *lenp, loff_t *ppos) 598 { 599 int enabled = topology_is_enabled(); 600 int new_mode; 601 int rc; 602 struct ctl_table ctl_entry = { 603 .procname = ctl->procname, 604 .data = &enabled, 605 .maxlen = sizeof(int), 606 .extra1 = SYSCTL_ZERO, 607 .extra2 = SYSCTL_ONE, 608 }; 609 610 rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); 611 if (rc < 0 || !write) 612 return rc; 613 614 mutex_lock(&smp_cpu_state_mutex); 615 new_mode = topology_get_mode(enabled); 616 if (topology_mode != new_mode) { 617 topology_mode = new_mode; 618 topology_schedule_update(); 619 } 620 mutex_unlock(&smp_cpu_state_mutex); 621 topology_flush_work(); 622 623 return rc; 624 } 625 626 static struct ctl_table topology_ctl_table[] = { 627 { 628 .procname = "topology", 629 .mode = 0644, 630 .proc_handler = topology_ctl_handler, 631 }, 632 { }, 633 }; 634 635 static struct ctl_table topology_dir_table[] = { 636 { 637 .procname = "s390", 638 .maxlen = 0, 639 .mode = 0555, 640 .child = topology_ctl_table, 641 }, 642 { }, 643 }; 644 645 static int __init topology_init(void) 646 { 647 timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE); 648 if (MACHINE_HAS_TOPOLOGY) 649 set_topology_timer(); 650 else 651 topology_update_polarization_simple(); 652 register_sysctl_table(topology_dir_table); 653 return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); 654 } 655 device_initcall(topology_init); 656