1 /* 2 * Copyright IBM Corp. 2007, 2011 3 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> 4 */ 5 6 #define KMSG_COMPONENT "cpu" 7 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 8 9 #include <linux/workqueue.h> 10 #include <linux/bootmem.h> 11 #include <linux/uaccess.h> 12 #include <linux/sysctl.h> 13 #include <linux/cpuset.h> 14 #include <linux/device.h> 15 #include <linux/export.h> 16 #include <linux/kernel.h> 17 #include <linux/sched.h> 18 #include <linux/sched/topology.h> 19 #include <linux/delay.h> 20 #include <linux/init.h> 21 #include <linux/slab.h> 22 #include <linux/cpu.h> 23 #include <linux/smp.h> 24 #include <linux/mm.h> 25 #include <linux/nodemask.h> 26 #include <linux/node.h> 27 #include <asm/sysinfo.h> 28 #include <asm/numa.h> 29 30 #define PTF_HORIZONTAL (0UL) 31 #define PTF_VERTICAL (1UL) 32 #define PTF_CHECK (2UL) 33 34 enum { 35 TOPOLOGY_MODE_HW, 36 TOPOLOGY_MODE_SINGLE, 37 TOPOLOGY_MODE_PACKAGE, 38 TOPOLOGY_MODE_UNINITIALIZED 39 }; 40 41 struct mask_info { 42 struct mask_info *next; 43 unsigned char id; 44 cpumask_t mask; 45 }; 46 47 static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED; 48 static void set_topology_timer(void); 49 static void topology_work_fn(struct work_struct *work); 50 static struct sysinfo_15_1_x *tl_info; 51 52 static DECLARE_WORK(topology_work, topology_work_fn); 53 54 /* 55 * Socket/Book linked lists and cpu_topology updates are 56 * protected by "sched_domains_mutex". 57 */ 58 static struct mask_info socket_info; 59 static struct mask_info book_info; 60 static struct mask_info drawer_info; 61 62 struct cpu_topology_s390 cpu_topology[NR_CPUS]; 63 EXPORT_SYMBOL_GPL(cpu_topology); 64 65 cpumask_t cpus_with_topology; 66 67 static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) 68 { 69 cpumask_t mask; 70 71 cpumask_copy(&mask, cpumask_of(cpu)); 72 switch (topology_mode) { 73 case TOPOLOGY_MODE_HW: 74 while (info) { 75 if (cpumask_test_cpu(cpu, &info->mask)) { 76 mask = info->mask; 77 break; 78 } 79 info = info->next; 80 } 81 if (cpumask_empty(&mask)) 82 cpumask_copy(&mask, cpumask_of(cpu)); 83 break; 84 case TOPOLOGY_MODE_PACKAGE: 85 cpumask_copy(&mask, cpu_present_mask); 86 break; 87 default: 88 /* fallthrough */ 89 case TOPOLOGY_MODE_SINGLE: 90 cpumask_copy(&mask, cpumask_of(cpu)); 91 break; 92 } 93 return mask; 94 } 95 96 static cpumask_t cpu_thread_map(unsigned int cpu) 97 { 98 cpumask_t mask; 99 int i; 100 101 cpumask_copy(&mask, cpumask_of(cpu)); 102 if (topology_mode != TOPOLOGY_MODE_HW) 103 return mask; 104 cpu -= cpu % (smp_cpu_mtid + 1); 105 for (i = 0; i <= smp_cpu_mtid; i++) 106 if (cpu_present(cpu + i)) 107 cpumask_set_cpu(cpu + i, &mask); 108 return mask; 109 } 110 111 #define TOPOLOGY_CORE_BITS 64 112 113 static void add_cpus_to_mask(struct topology_core *tl_core, 114 struct mask_info *drawer, 115 struct mask_info *book, 116 struct mask_info *socket) 117 { 118 struct cpu_topology_s390 *topo; 119 unsigned int core; 120 121 for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) { 122 unsigned int rcore; 123 int lcpu, i; 124 125 rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin; 126 lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); 127 if (lcpu < 0) 128 continue; 129 for (i = 0; i <= smp_cpu_mtid; i++) { 130 topo = &cpu_topology[lcpu + i]; 131 topo->drawer_id = drawer->id; 132 topo->book_id = book->id; 133 topo->socket_id = socket->id; 134 topo->core_id = rcore; 135 topo->thread_id = lcpu + i; 136 topo->dedicated = tl_core->d; 137 cpumask_set_cpu(lcpu + i, &drawer->mask); 138 cpumask_set_cpu(lcpu + i, &book->mask); 139 cpumask_set_cpu(lcpu + i, &socket->mask); 140 cpumask_set_cpu(lcpu + i, &cpus_with_topology); 141 smp_cpu_set_polarization(lcpu + i, tl_core->pp); 142 } 143 } 144 } 145 146 static void clear_masks(void) 147 { 148 struct mask_info *info; 149 150 info = &socket_info; 151 while (info) { 152 cpumask_clear(&info->mask); 153 info = info->next; 154 } 155 info = &book_info; 156 while (info) { 157 cpumask_clear(&info->mask); 158 info = info->next; 159 } 160 info = &drawer_info; 161 while (info) { 162 cpumask_clear(&info->mask); 163 info = info->next; 164 } 165 } 166 167 static union topology_entry *next_tle(union topology_entry *tle) 168 { 169 if (!tle->nl) 170 return (union topology_entry *)((struct topology_core *)tle + 1); 171 return (union topology_entry *)((struct topology_container *)tle + 1); 172 } 173 174 static void tl_to_masks(struct sysinfo_15_1_x *info) 175 { 176 struct mask_info *socket = &socket_info; 177 struct mask_info *book = &book_info; 178 struct mask_info *drawer = &drawer_info; 179 union topology_entry *tle, *end; 180 181 clear_masks(); 182 tle = info->tle; 183 end = (union topology_entry *)((unsigned long)info + info->length); 184 while (tle < end) { 185 switch (tle->nl) { 186 case 3: 187 drawer = drawer->next; 188 drawer->id = tle->container.id; 189 break; 190 case 2: 191 book = book->next; 192 book->id = tle->container.id; 193 break; 194 case 1: 195 socket = socket->next; 196 socket->id = tle->container.id; 197 break; 198 case 0: 199 add_cpus_to_mask(&tle->cpu, drawer, book, socket); 200 break; 201 default: 202 clear_masks(); 203 return; 204 } 205 tle = next_tle(tle); 206 } 207 } 208 209 static void topology_update_polarization_simple(void) 210 { 211 int cpu; 212 213 for_each_possible_cpu(cpu) 214 smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); 215 } 216 217 static int ptf(unsigned long fc) 218 { 219 int rc; 220 221 asm volatile( 222 " .insn rre,0xb9a20000,%1,%1\n" 223 " ipm %0\n" 224 " srl %0,28\n" 225 : "=d" (rc) 226 : "d" (fc) : "cc"); 227 return rc; 228 } 229 230 int topology_set_cpu_management(int fc) 231 { 232 int cpu, rc; 233 234 if (!MACHINE_HAS_TOPOLOGY) 235 return -EOPNOTSUPP; 236 if (fc) 237 rc = ptf(PTF_VERTICAL); 238 else 239 rc = ptf(PTF_HORIZONTAL); 240 if (rc) 241 return -EBUSY; 242 for_each_possible_cpu(cpu) 243 smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); 244 return rc; 245 } 246 247 static void update_cpu_masks(void) 248 { 249 struct cpu_topology_s390 *topo; 250 int cpu, id; 251 252 for_each_possible_cpu(cpu) { 253 topo = &cpu_topology[cpu]; 254 topo->thread_mask = cpu_thread_map(cpu); 255 topo->core_mask = cpu_group_map(&socket_info, cpu); 256 topo->book_mask = cpu_group_map(&book_info, cpu); 257 topo->drawer_mask = cpu_group_map(&drawer_info, cpu); 258 if (topology_mode != TOPOLOGY_MODE_HW) { 259 id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu; 260 topo->thread_id = cpu; 261 topo->core_id = cpu; 262 topo->socket_id = id; 263 topo->book_id = id; 264 topo->drawer_id = id; 265 if (cpu_present(cpu)) 266 cpumask_set_cpu(cpu, &cpus_with_topology); 267 } 268 } 269 numa_update_cpu_topology(); 270 } 271 272 void store_topology(struct sysinfo_15_1_x *info) 273 { 274 stsi(info, 15, 1, topology_mnest_limit()); 275 } 276 277 static void __arch_update_dedicated_flag(void *arg) 278 { 279 if (topology_cpu_dedicated(smp_processor_id())) 280 set_cpu_flag(CIF_DEDICATED_CPU); 281 else 282 clear_cpu_flag(CIF_DEDICATED_CPU); 283 } 284 285 static int __arch_update_cpu_topology(void) 286 { 287 struct sysinfo_15_1_x *info = tl_info; 288 int rc = 0; 289 290 mutex_lock(&smp_cpu_state_mutex); 291 cpumask_clear(&cpus_with_topology); 292 if (MACHINE_HAS_TOPOLOGY) { 293 rc = 1; 294 store_topology(info); 295 tl_to_masks(info); 296 } 297 update_cpu_masks(); 298 if (!MACHINE_HAS_TOPOLOGY) 299 topology_update_polarization_simple(); 300 mutex_unlock(&smp_cpu_state_mutex); 301 return rc; 302 } 303 304 int arch_update_cpu_topology(void) 305 { 306 struct device *dev; 307 int cpu, rc; 308 309 rc = __arch_update_cpu_topology(); 310 on_each_cpu(__arch_update_dedicated_flag, NULL, 0); 311 for_each_online_cpu(cpu) { 312 dev = get_cpu_device(cpu); 313 kobject_uevent(&dev->kobj, KOBJ_CHANGE); 314 } 315 return rc; 316 } 317 318 static void topology_work_fn(struct work_struct *work) 319 { 320 rebuild_sched_domains(); 321 } 322 323 void topology_schedule_update(void) 324 { 325 schedule_work(&topology_work); 326 } 327 328 static void topology_flush_work(void) 329 { 330 flush_work(&topology_work); 331 } 332 333 static void topology_timer_fn(unsigned long ignored) 334 { 335 if (ptf(PTF_CHECK)) 336 topology_schedule_update(); 337 set_topology_timer(); 338 } 339 340 static struct timer_list topology_timer = 341 TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0); 342 343 static atomic_t topology_poll = ATOMIC_INIT(0); 344 345 static void set_topology_timer(void) 346 { 347 if (atomic_add_unless(&topology_poll, -1, 0)) 348 mod_timer(&topology_timer, jiffies + HZ / 10); 349 else 350 mod_timer(&topology_timer, jiffies + HZ * 60); 351 } 352 353 void topology_expect_change(void) 354 { 355 if (!MACHINE_HAS_TOPOLOGY) 356 return; 357 /* This is racy, but it doesn't matter since it is just a heuristic. 358 * Worst case is that we poll in a higher frequency for a bit longer. 359 */ 360 if (atomic_read(&topology_poll) > 60) 361 return; 362 atomic_add(60, &topology_poll); 363 set_topology_timer(); 364 } 365 366 static int cpu_management; 367 368 static ssize_t dispatching_show(struct device *dev, 369 struct device_attribute *attr, 370 char *buf) 371 { 372 ssize_t count; 373 374 mutex_lock(&smp_cpu_state_mutex); 375 count = sprintf(buf, "%d\n", cpu_management); 376 mutex_unlock(&smp_cpu_state_mutex); 377 return count; 378 } 379 380 static ssize_t dispatching_store(struct device *dev, 381 struct device_attribute *attr, 382 const char *buf, 383 size_t count) 384 { 385 int val, rc; 386 char delim; 387 388 if (sscanf(buf, "%d %c", &val, &delim) != 1) 389 return -EINVAL; 390 if (val != 0 && val != 1) 391 return -EINVAL; 392 rc = 0; 393 get_online_cpus(); 394 mutex_lock(&smp_cpu_state_mutex); 395 if (cpu_management == val) 396 goto out; 397 rc = topology_set_cpu_management(val); 398 if (rc) 399 goto out; 400 cpu_management = val; 401 topology_expect_change(); 402 out: 403 mutex_unlock(&smp_cpu_state_mutex); 404 put_online_cpus(); 405 return rc ? rc : count; 406 } 407 static DEVICE_ATTR(dispatching, 0644, dispatching_show, 408 dispatching_store); 409 410 static ssize_t cpu_polarization_show(struct device *dev, 411 struct device_attribute *attr, char *buf) 412 { 413 int cpu = dev->id; 414 ssize_t count; 415 416 mutex_lock(&smp_cpu_state_mutex); 417 switch (smp_cpu_get_polarization(cpu)) { 418 case POLARIZATION_HRZ: 419 count = sprintf(buf, "horizontal\n"); 420 break; 421 case POLARIZATION_VL: 422 count = sprintf(buf, "vertical:low\n"); 423 break; 424 case POLARIZATION_VM: 425 count = sprintf(buf, "vertical:medium\n"); 426 break; 427 case POLARIZATION_VH: 428 count = sprintf(buf, "vertical:high\n"); 429 break; 430 default: 431 count = sprintf(buf, "unknown\n"); 432 break; 433 } 434 mutex_unlock(&smp_cpu_state_mutex); 435 return count; 436 } 437 static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); 438 439 static struct attribute *topology_cpu_attrs[] = { 440 &dev_attr_polarization.attr, 441 NULL, 442 }; 443 444 static struct attribute_group topology_cpu_attr_group = { 445 .attrs = topology_cpu_attrs, 446 }; 447 448 static ssize_t cpu_dedicated_show(struct device *dev, 449 struct device_attribute *attr, char *buf) 450 { 451 int cpu = dev->id; 452 ssize_t count; 453 454 mutex_lock(&smp_cpu_state_mutex); 455 count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu)); 456 mutex_unlock(&smp_cpu_state_mutex); 457 return count; 458 } 459 static DEVICE_ATTR(dedicated, 0444, cpu_dedicated_show, NULL); 460 461 static struct attribute *topology_extra_cpu_attrs[] = { 462 &dev_attr_dedicated.attr, 463 NULL, 464 }; 465 466 static struct attribute_group topology_extra_cpu_attr_group = { 467 .attrs = topology_extra_cpu_attrs, 468 }; 469 470 int topology_cpu_init(struct cpu *cpu) 471 { 472 int rc; 473 474 rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); 475 if (rc || !MACHINE_HAS_TOPOLOGY) 476 return rc; 477 rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group); 478 if (rc) 479 sysfs_remove_group(&cpu->dev.kobj, &topology_cpu_attr_group); 480 return rc; 481 } 482 483 static const struct cpumask *cpu_thread_mask(int cpu) 484 { 485 return &cpu_topology[cpu].thread_mask; 486 } 487 488 489 const struct cpumask *cpu_coregroup_mask(int cpu) 490 { 491 return &cpu_topology[cpu].core_mask; 492 } 493 494 static const struct cpumask *cpu_book_mask(int cpu) 495 { 496 return &cpu_topology[cpu].book_mask; 497 } 498 499 static const struct cpumask *cpu_drawer_mask(int cpu) 500 { 501 return &cpu_topology[cpu].drawer_mask; 502 } 503 504 static struct sched_domain_topology_level s390_topology[] = { 505 { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, 506 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, 507 { cpu_book_mask, SD_INIT_NAME(BOOK) }, 508 { cpu_drawer_mask, SD_INIT_NAME(DRAWER) }, 509 { cpu_cpu_mask, SD_INIT_NAME(DIE) }, 510 { NULL, }, 511 }; 512 513 static void __init alloc_masks(struct sysinfo_15_1_x *info, 514 struct mask_info *mask, int offset) 515 { 516 int i, nr_masks; 517 518 nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; 519 for (i = 0; i < info->mnest - offset; i++) 520 nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; 521 nr_masks = max(nr_masks, 1); 522 for (i = 0; i < nr_masks; i++) { 523 mask->next = memblock_virt_alloc(sizeof(*mask->next), 8); 524 mask = mask->next; 525 } 526 } 527 528 void __init topology_init_early(void) 529 { 530 struct sysinfo_15_1_x *info; 531 532 set_sched_topology(s390_topology); 533 if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) { 534 if (MACHINE_HAS_TOPOLOGY) 535 topology_mode = TOPOLOGY_MODE_HW; 536 else 537 topology_mode = TOPOLOGY_MODE_SINGLE; 538 } 539 if (!MACHINE_HAS_TOPOLOGY) 540 goto out; 541 tl_info = memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE); 542 info = tl_info; 543 store_topology(info); 544 pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n", 545 info->mag[0], info->mag[1], info->mag[2], info->mag[3], 546 info->mag[4], info->mag[5], info->mnest); 547 alloc_masks(info, &socket_info, 1); 548 alloc_masks(info, &book_info, 2); 549 alloc_masks(info, &drawer_info, 3); 550 out: 551 __arch_update_cpu_topology(); 552 __arch_update_dedicated_flag(NULL); 553 } 554 555 static inline int topology_get_mode(int enabled) 556 { 557 if (!enabled) 558 return TOPOLOGY_MODE_SINGLE; 559 return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; 560 } 561 562 static inline int topology_is_enabled(void) 563 { 564 return topology_mode != TOPOLOGY_MODE_SINGLE; 565 } 566 567 static int __init topology_setup(char *str) 568 { 569 bool enabled; 570 int rc; 571 572 rc = kstrtobool(str, &enabled); 573 if (rc) 574 return rc; 575 topology_mode = topology_get_mode(enabled); 576 return 0; 577 } 578 early_param("topology", topology_setup); 579 580 static int topology_ctl_handler(struct ctl_table *ctl, int write, 581 void __user *buffer, size_t *lenp, loff_t *ppos) 582 { 583 unsigned int len; 584 int new_mode; 585 char buf[2]; 586 587 if (!*lenp || *ppos) { 588 *lenp = 0; 589 return 0; 590 } 591 if (!write) { 592 strncpy(buf, topology_is_enabled() ? "1\n" : "0\n", 593 ARRAY_SIZE(buf)); 594 len = strnlen(buf, ARRAY_SIZE(buf)); 595 if (len > *lenp) 596 len = *lenp; 597 if (copy_to_user(buffer, buf, len)) 598 return -EFAULT; 599 goto out; 600 } 601 len = *lenp; 602 if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) 603 return -EFAULT; 604 if (buf[0] != '0' && buf[0] != '1') 605 return -EINVAL; 606 mutex_lock(&smp_cpu_state_mutex); 607 new_mode = topology_get_mode(buf[0] == '1'); 608 if (topology_mode != new_mode) { 609 topology_mode = new_mode; 610 topology_schedule_update(); 611 } 612 mutex_unlock(&smp_cpu_state_mutex); 613 topology_flush_work(); 614 out: 615 *lenp = len; 616 *ppos += len; 617 return 0; 618 } 619 620 static struct ctl_table topology_ctl_table[] = { 621 { 622 .procname = "topology", 623 .mode = 0644, 624 .proc_handler = topology_ctl_handler, 625 }, 626 { }, 627 }; 628 629 static struct ctl_table topology_dir_table[] = { 630 { 631 .procname = "s390", 632 .maxlen = 0, 633 .mode = 0555, 634 .child = topology_ctl_table, 635 }, 636 { }, 637 }; 638 639 static int __init topology_init(void) 640 { 641 if (MACHINE_HAS_TOPOLOGY) 642 set_topology_timer(); 643 else 644 topology_update_polarization_simple(); 645 register_sysctl_table(topology_dir_table); 646 return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); 647 } 648 device_initcall(topology_init); 649