1 /* 2 * Copyright IBM Corp. 2007, 2011 3 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> 4 */ 5 6 #define KMSG_COMPONENT "cpu" 7 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 8 9 #include <linux/workqueue.h> 10 #include <linux/cpuset.h> 11 #include <linux/device.h> 12 #include <linux/export.h> 13 #include <linux/kernel.h> 14 #include <linux/sched.h> 15 #include <linux/delay.h> 16 #include <linux/init.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/smp.h> 20 #include <linux/mm.h> 21 #include <linux/nodemask.h> 22 #include <linux/node.h> 23 #include <asm/sysinfo.h> 24 #include <asm/numa.h> 25 26 #define PTF_HORIZONTAL (0UL) 27 #define PTF_VERTICAL (1UL) 28 #define PTF_CHECK (2UL) 29 30 struct mask_info { 31 struct mask_info *next; 32 unsigned char id; 33 cpumask_t mask; 34 }; 35 36 static void set_topology_timer(void); 37 static void topology_work_fn(struct work_struct *work); 38 static struct sysinfo_15_1_x *tl_info; 39 40 static int topology_enabled = 1; 41 static DECLARE_WORK(topology_work, topology_work_fn); 42 43 /* 44 * Socket/Book linked lists and per_cpu(cpu_topology) updates are 45 * protected by "sched_domains_mutex". 46 */ 47 static struct mask_info socket_info; 48 static struct mask_info book_info; 49 50 DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology); 51 EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology); 52 53 static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) 54 { 55 cpumask_t mask; 56 57 cpumask_copy(&mask, cpumask_of(cpu)); 58 if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) 59 return mask; 60 for (; info; info = info->next) { 61 if (cpumask_test_cpu(cpu, &info->mask)) 62 return info->mask; 63 } 64 return mask; 65 } 66 67 static cpumask_t cpu_thread_map(unsigned int cpu) 68 { 69 cpumask_t mask; 70 int i; 71 72 cpumask_copy(&mask, cpumask_of(cpu)); 73 if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) 74 return mask; 75 cpu -= cpu % (smp_cpu_mtid + 1); 76 for (i = 0; i <= smp_cpu_mtid; i++) 77 if (cpu_present(cpu + i)) 78 cpumask_set_cpu(cpu + i, &mask); 79 return mask; 80 } 81 82 static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core, 83 struct mask_info *book, 84 struct mask_info *socket, 85 int one_socket_per_cpu) 86 { 87 unsigned int core; 88 89 for_each_set_bit(core, &tl_core->mask[0], TOPOLOGY_CORE_BITS) { 90 unsigned int rcore; 91 int lcpu, i; 92 93 rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin; 94 lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); 95 if (lcpu < 0) 96 continue; 97 for (i = 0; i <= smp_cpu_mtid; i++) { 98 per_cpu(cpu_topology, lcpu + i).book_id = book->id; 99 per_cpu(cpu_topology, lcpu + i).core_id = rcore; 100 per_cpu(cpu_topology, lcpu + i).thread_id = lcpu + i; 101 cpumask_set_cpu(lcpu + i, &book->mask); 102 cpumask_set_cpu(lcpu + i, &socket->mask); 103 if (one_socket_per_cpu) 104 per_cpu(cpu_topology, lcpu + i).socket_id = rcore; 105 else 106 per_cpu(cpu_topology, lcpu + i).socket_id = socket->id; 107 smp_cpu_set_polarization(lcpu + i, tl_core->pp); 108 } 109 if (one_socket_per_cpu) 110 socket = socket->next; 111 } 112 return socket; 113 } 114 115 static void clear_masks(void) 116 { 117 struct mask_info *info; 118 119 info = &socket_info; 120 while (info) { 121 cpumask_clear(&info->mask); 122 info = info->next; 123 } 124 info = &book_info; 125 while (info) { 126 cpumask_clear(&info->mask); 127 info = info->next; 128 } 129 } 130 131 static union topology_entry *next_tle(union topology_entry *tle) 132 { 133 if (!tle->nl) 134 return (union topology_entry *)((struct topology_core *)tle + 1); 135 return (union topology_entry *)((struct topology_container *)tle + 1); 136 } 137 138 static void __tl_to_masks_generic(struct sysinfo_15_1_x *info) 139 { 140 struct mask_info *socket = &socket_info; 141 struct mask_info *book = &book_info; 142 union topology_entry *tle, *end; 143 144 tle = info->tle; 145 end = (union topology_entry *)((unsigned long)info + info->length); 146 while (tle < end) { 147 switch (tle->nl) { 148 case 2: 149 book = book->next; 150 book->id = tle->container.id; 151 break; 152 case 1: 153 socket = socket->next; 154 socket->id = tle->container.id; 155 break; 156 case 0: 157 add_cpus_to_mask(&tle->cpu, book, socket, 0); 158 break; 159 default: 160 clear_masks(); 161 return; 162 } 163 tle = next_tle(tle); 164 } 165 } 166 167 static void __tl_to_masks_z10(struct sysinfo_15_1_x *info) 168 { 169 struct mask_info *socket = &socket_info; 170 struct mask_info *book = &book_info; 171 union topology_entry *tle, *end; 172 173 tle = info->tle; 174 end = (union topology_entry *)((unsigned long)info + info->length); 175 while (tle < end) { 176 switch (tle->nl) { 177 case 1: 178 book = book->next; 179 book->id = tle->container.id; 180 break; 181 case 0: 182 socket = add_cpus_to_mask(&tle->cpu, book, socket, 1); 183 break; 184 default: 185 clear_masks(); 186 return; 187 } 188 tle = next_tle(tle); 189 } 190 } 191 192 static void tl_to_masks(struct sysinfo_15_1_x *info) 193 { 194 struct cpuid cpu_id; 195 196 get_cpu_id(&cpu_id); 197 clear_masks(); 198 switch (cpu_id.machine) { 199 case 0x2097: 200 case 0x2098: 201 __tl_to_masks_z10(info); 202 break; 203 default: 204 __tl_to_masks_generic(info); 205 } 206 } 207 208 static void topology_update_polarization_simple(void) 209 { 210 int cpu; 211 212 mutex_lock(&smp_cpu_state_mutex); 213 for_each_possible_cpu(cpu) 214 smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); 215 mutex_unlock(&smp_cpu_state_mutex); 216 } 217 218 static int ptf(unsigned long fc) 219 { 220 int rc; 221 222 asm volatile( 223 " .insn rre,0xb9a20000,%1,%1\n" 224 " ipm %0\n" 225 " srl %0,28\n" 226 : "=d" (rc) 227 : "d" (fc) : "cc"); 228 return rc; 229 } 230 231 int topology_set_cpu_management(int fc) 232 { 233 int cpu, rc; 234 235 if (!MACHINE_HAS_TOPOLOGY) 236 return -EOPNOTSUPP; 237 if (fc) 238 rc = ptf(PTF_VERTICAL); 239 else 240 rc = ptf(PTF_HORIZONTAL); 241 if (rc) 242 return -EBUSY; 243 for_each_possible_cpu(cpu) 244 smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); 245 return rc; 246 } 247 248 static void update_cpu_masks(void) 249 { 250 int cpu; 251 252 for_each_possible_cpu(cpu) { 253 per_cpu(cpu_topology, cpu).thread_mask = cpu_thread_map(cpu); 254 per_cpu(cpu_topology, cpu).core_mask = cpu_group_map(&socket_info, cpu); 255 per_cpu(cpu_topology, cpu).book_mask = cpu_group_map(&book_info, cpu); 256 if (!MACHINE_HAS_TOPOLOGY) { 257 per_cpu(cpu_topology, cpu).thread_id = cpu; 258 per_cpu(cpu_topology, cpu).core_id = cpu; 259 per_cpu(cpu_topology, cpu).socket_id = cpu; 260 per_cpu(cpu_topology, cpu).book_id = cpu; 261 } 262 } 263 numa_update_cpu_topology(); 264 } 265 266 void store_topology(struct sysinfo_15_1_x *info) 267 { 268 if (topology_max_mnest >= 3) 269 stsi(info, 15, 1, 3); 270 else 271 stsi(info, 15, 1, 2); 272 } 273 274 int arch_update_cpu_topology(void) 275 { 276 struct sysinfo_15_1_x *info = tl_info; 277 struct device *dev; 278 int cpu, rc = 0; 279 280 if (MACHINE_HAS_TOPOLOGY) { 281 rc = 1; 282 store_topology(info); 283 tl_to_masks(info); 284 } 285 update_cpu_masks(); 286 if (!MACHINE_HAS_TOPOLOGY) 287 topology_update_polarization_simple(); 288 for_each_online_cpu(cpu) { 289 dev = get_cpu_device(cpu); 290 kobject_uevent(&dev->kobj, KOBJ_CHANGE); 291 } 292 return rc; 293 } 294 295 static void topology_work_fn(struct work_struct *work) 296 { 297 rebuild_sched_domains(); 298 } 299 300 void topology_schedule_update(void) 301 { 302 schedule_work(&topology_work); 303 } 304 305 static void topology_timer_fn(unsigned long ignored) 306 { 307 if (ptf(PTF_CHECK)) 308 topology_schedule_update(); 309 set_topology_timer(); 310 } 311 312 static struct timer_list topology_timer = 313 TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0); 314 315 static atomic_t topology_poll = ATOMIC_INIT(0); 316 317 static void set_topology_timer(void) 318 { 319 if (atomic_add_unless(&topology_poll, -1, 0)) 320 mod_timer(&topology_timer, jiffies + HZ / 10); 321 else 322 mod_timer(&topology_timer, jiffies + HZ * 60); 323 } 324 325 void topology_expect_change(void) 326 { 327 if (!MACHINE_HAS_TOPOLOGY) 328 return; 329 /* This is racy, but it doesn't matter since it is just a heuristic. 330 * Worst case is that we poll in a higher frequency for a bit longer. 331 */ 332 if (atomic_read(&topology_poll) > 60) 333 return; 334 atomic_add(60, &topology_poll); 335 set_topology_timer(); 336 } 337 338 static int cpu_management; 339 340 static ssize_t dispatching_show(struct device *dev, 341 struct device_attribute *attr, 342 char *buf) 343 { 344 ssize_t count; 345 346 mutex_lock(&smp_cpu_state_mutex); 347 count = sprintf(buf, "%d\n", cpu_management); 348 mutex_unlock(&smp_cpu_state_mutex); 349 return count; 350 } 351 352 static ssize_t dispatching_store(struct device *dev, 353 struct device_attribute *attr, 354 const char *buf, 355 size_t count) 356 { 357 int val, rc; 358 char delim; 359 360 if (sscanf(buf, "%d %c", &val, &delim) != 1) 361 return -EINVAL; 362 if (val != 0 && val != 1) 363 return -EINVAL; 364 rc = 0; 365 get_online_cpus(); 366 mutex_lock(&smp_cpu_state_mutex); 367 if (cpu_management == val) 368 goto out; 369 rc = topology_set_cpu_management(val); 370 if (rc) 371 goto out; 372 cpu_management = val; 373 topology_expect_change(); 374 out: 375 mutex_unlock(&smp_cpu_state_mutex); 376 put_online_cpus(); 377 return rc ? rc : count; 378 } 379 static DEVICE_ATTR(dispatching, 0644, dispatching_show, 380 dispatching_store); 381 382 static ssize_t cpu_polarization_show(struct device *dev, 383 struct device_attribute *attr, char *buf) 384 { 385 int cpu = dev->id; 386 ssize_t count; 387 388 mutex_lock(&smp_cpu_state_mutex); 389 switch (smp_cpu_get_polarization(cpu)) { 390 case POLARIZATION_HRZ: 391 count = sprintf(buf, "horizontal\n"); 392 break; 393 case POLARIZATION_VL: 394 count = sprintf(buf, "vertical:low\n"); 395 break; 396 case POLARIZATION_VM: 397 count = sprintf(buf, "vertical:medium\n"); 398 break; 399 case POLARIZATION_VH: 400 count = sprintf(buf, "vertical:high\n"); 401 break; 402 default: 403 count = sprintf(buf, "unknown\n"); 404 break; 405 } 406 mutex_unlock(&smp_cpu_state_mutex); 407 return count; 408 } 409 static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); 410 411 static struct attribute *topology_cpu_attrs[] = { 412 &dev_attr_polarization.attr, 413 NULL, 414 }; 415 416 static struct attribute_group topology_cpu_attr_group = { 417 .attrs = topology_cpu_attrs, 418 }; 419 420 int topology_cpu_init(struct cpu *cpu) 421 { 422 return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); 423 } 424 425 static const struct cpumask *cpu_thread_mask(int cpu) 426 { 427 return &per_cpu(cpu_topology, cpu).thread_mask; 428 } 429 430 431 const struct cpumask *cpu_coregroup_mask(int cpu) 432 { 433 return &per_cpu(cpu_topology, cpu).core_mask; 434 } 435 436 static const struct cpumask *cpu_book_mask(int cpu) 437 { 438 return &per_cpu(cpu_topology, cpu).book_mask; 439 } 440 441 static int __init early_parse_topology(char *p) 442 { 443 if (strncmp(p, "off", 3)) 444 return 0; 445 topology_enabled = 0; 446 return 0; 447 } 448 early_param("topology", early_parse_topology); 449 450 static struct sched_domain_topology_level s390_topology[] = { 451 { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, 452 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, 453 { cpu_book_mask, SD_INIT_NAME(BOOK) }, 454 { cpu_cpu_mask, SD_INIT_NAME(DIE) }, 455 { NULL, }, 456 }; 457 458 static void __init alloc_masks(struct sysinfo_15_1_x *info, 459 struct mask_info *mask, int offset) 460 { 461 int i, nr_masks; 462 463 nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; 464 for (i = 0; i < info->mnest - offset; i++) 465 nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; 466 nr_masks = max(nr_masks, 1); 467 for (i = 0; i < nr_masks; i++) { 468 mask->next = kzalloc(sizeof(*mask->next), GFP_KERNEL); 469 mask = mask->next; 470 } 471 } 472 473 static int __init s390_topology_init(void) 474 { 475 struct sysinfo_15_1_x *info; 476 int i; 477 478 if (!MACHINE_HAS_TOPOLOGY) 479 return 0; 480 tl_info = (struct sysinfo_15_1_x *)__get_free_page(GFP_KERNEL); 481 info = tl_info; 482 store_topology(info); 483 pr_info("The CPU configuration topology of the machine is:"); 484 for (i = 0; i < TOPOLOGY_NR_MAG; i++) 485 printk(KERN_CONT " %d", info->mag[i]); 486 printk(KERN_CONT " / %d\n", info->mnest); 487 alloc_masks(info, &socket_info, 1); 488 alloc_masks(info, &book_info, 2); 489 set_sched_topology(s390_topology); 490 return 0; 491 } 492 early_initcall(s390_topology_init); 493 494 static int __init topology_init(void) 495 { 496 if (MACHINE_HAS_TOPOLOGY) 497 set_topology_timer(); 498 else 499 topology_update_polarization_simple(); 500 return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); 501 } 502 device_initcall(topology_init); 503