1 /* 2 * Copyright IBM Corp. 2007, 2011 3 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> 4 */ 5 6 #define KMSG_COMPONENT "cpu" 7 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 8 9 #include <linux/workqueue.h> 10 #include <linux/cpuset.h> 11 #include <linux/device.h> 12 #include <linux/export.h> 13 #include <linux/kernel.h> 14 #include <linux/sched.h> 15 #include <linux/delay.h> 16 #include <linux/init.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 19 #include <linux/smp.h> 20 #include <linux/mm.h> 21 #include <asm/sysinfo.h> 22 23 #define PTF_HORIZONTAL (0UL) 24 #define PTF_VERTICAL (1UL) 25 #define PTF_CHECK (2UL) 26 27 struct mask_info { 28 struct mask_info *next; 29 unsigned char id; 30 cpumask_t mask; 31 }; 32 33 static void set_topology_timer(void); 34 static void topology_work_fn(struct work_struct *work); 35 static struct sysinfo_15_1_x *tl_info; 36 37 static int topology_enabled = 1; 38 static DECLARE_WORK(topology_work, topology_work_fn); 39 40 /* topology_lock protects the socket and book linked lists */ 41 static DEFINE_SPINLOCK(topology_lock); 42 static struct mask_info socket_info; 43 static struct mask_info book_info; 44 45 DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology); 46 EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology); 47 48 static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) 49 { 50 cpumask_t mask; 51 52 cpumask_copy(&mask, cpumask_of(cpu)); 53 if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) 54 return mask; 55 for (; info; info = info->next) { 56 if (cpumask_test_cpu(cpu, &info->mask)) 57 return info->mask; 58 } 59 return mask; 60 } 61 62 static cpumask_t cpu_thread_map(unsigned int cpu) 63 { 64 cpumask_t mask; 65 int i; 66 67 cpumask_copy(&mask, cpumask_of(cpu)); 68 if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) 69 return mask; 70 cpu -= cpu % (smp_cpu_mtid + 1); 71 for (i = 0; i <= smp_cpu_mtid; i++) 72 if (cpu_present(cpu + i)) 73 cpumask_set_cpu(cpu + i, &mask); 74 return mask; 75 } 76 77 static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core, 78 struct mask_info *book, 79 struct mask_info *socket, 80 int one_socket_per_cpu) 81 { 82 unsigned int core; 83 84 for_each_set_bit(core, &tl_core->mask[0], TOPOLOGY_CORE_BITS) { 85 unsigned int rcore; 86 int lcpu, i; 87 88 rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin; 89 lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); 90 if (lcpu < 0) 91 continue; 92 for (i = 0; i <= smp_cpu_mtid; i++) { 93 per_cpu(cpu_topology, lcpu + i).book_id = book->id; 94 per_cpu(cpu_topology, lcpu + i).core_id = rcore; 95 per_cpu(cpu_topology, lcpu + i).thread_id = lcpu + i; 96 cpumask_set_cpu(lcpu + i, &book->mask); 97 cpumask_set_cpu(lcpu + i, &socket->mask); 98 if (one_socket_per_cpu) 99 per_cpu(cpu_topology, lcpu + i).socket_id = rcore; 100 else 101 per_cpu(cpu_topology, lcpu + i).socket_id = socket->id; 102 smp_cpu_set_polarization(lcpu + i, tl_core->pp); 103 } 104 if (one_socket_per_cpu) 105 socket = socket->next; 106 } 107 return socket; 108 } 109 110 static void clear_masks(void) 111 { 112 struct mask_info *info; 113 114 info = &socket_info; 115 while (info) { 116 cpumask_clear(&info->mask); 117 info = info->next; 118 } 119 info = &book_info; 120 while (info) { 121 cpumask_clear(&info->mask); 122 info = info->next; 123 } 124 } 125 126 static union topology_entry *next_tle(union topology_entry *tle) 127 { 128 if (!tle->nl) 129 return (union topology_entry *)((struct topology_core *)tle + 1); 130 return (union topology_entry *)((struct topology_container *)tle + 1); 131 } 132 133 static void __tl_to_masks_generic(struct sysinfo_15_1_x *info) 134 { 135 struct mask_info *socket = &socket_info; 136 struct mask_info *book = &book_info; 137 union topology_entry *tle, *end; 138 139 tle = info->tle; 140 end = (union topology_entry *)((unsigned long)info + info->length); 141 while (tle < end) { 142 switch (tle->nl) { 143 case 2: 144 book = book->next; 145 book->id = tle->container.id; 146 break; 147 case 1: 148 socket = socket->next; 149 socket->id = tle->container.id; 150 break; 151 case 0: 152 add_cpus_to_mask(&tle->cpu, book, socket, 0); 153 break; 154 default: 155 clear_masks(); 156 return; 157 } 158 tle = next_tle(tle); 159 } 160 } 161 162 static void __tl_to_masks_z10(struct sysinfo_15_1_x *info) 163 { 164 struct mask_info *socket = &socket_info; 165 struct mask_info *book = &book_info; 166 union topology_entry *tle, *end; 167 168 tle = info->tle; 169 end = (union topology_entry *)((unsigned long)info + info->length); 170 while (tle < end) { 171 switch (tle->nl) { 172 case 1: 173 book = book->next; 174 book->id = tle->container.id; 175 break; 176 case 0: 177 socket = add_cpus_to_mask(&tle->cpu, book, socket, 1); 178 break; 179 default: 180 clear_masks(); 181 return; 182 } 183 tle = next_tle(tle); 184 } 185 } 186 187 static void tl_to_masks(struct sysinfo_15_1_x *info) 188 { 189 struct cpuid cpu_id; 190 191 spin_lock_irq(&topology_lock); 192 get_cpu_id(&cpu_id); 193 clear_masks(); 194 switch (cpu_id.machine) { 195 case 0x2097: 196 case 0x2098: 197 __tl_to_masks_z10(info); 198 break; 199 default: 200 __tl_to_masks_generic(info); 201 } 202 spin_unlock_irq(&topology_lock); 203 } 204 205 static void topology_update_polarization_simple(void) 206 { 207 int cpu; 208 209 mutex_lock(&smp_cpu_state_mutex); 210 for_each_possible_cpu(cpu) 211 smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); 212 mutex_unlock(&smp_cpu_state_mutex); 213 } 214 215 static int ptf(unsigned long fc) 216 { 217 int rc; 218 219 asm volatile( 220 " .insn rre,0xb9a20000,%1,%1\n" 221 " ipm %0\n" 222 " srl %0,28\n" 223 : "=d" (rc) 224 : "d" (fc) : "cc"); 225 return rc; 226 } 227 228 int topology_set_cpu_management(int fc) 229 { 230 int cpu, rc; 231 232 if (!MACHINE_HAS_TOPOLOGY) 233 return -EOPNOTSUPP; 234 if (fc) 235 rc = ptf(PTF_VERTICAL); 236 else 237 rc = ptf(PTF_HORIZONTAL); 238 if (rc) 239 return -EBUSY; 240 for_each_possible_cpu(cpu) 241 smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); 242 return rc; 243 } 244 245 static void update_cpu_masks(void) 246 { 247 unsigned long flags; 248 int cpu; 249 250 spin_lock_irqsave(&topology_lock, flags); 251 for_each_possible_cpu(cpu) { 252 per_cpu(cpu_topology, cpu).thread_mask = cpu_thread_map(cpu); 253 per_cpu(cpu_topology, cpu).core_mask = cpu_group_map(&socket_info, cpu); 254 per_cpu(cpu_topology, cpu).book_mask = cpu_group_map(&book_info, cpu); 255 if (!MACHINE_HAS_TOPOLOGY) { 256 per_cpu(cpu_topology, cpu).thread_id = cpu; 257 per_cpu(cpu_topology, cpu).core_id = cpu; 258 per_cpu(cpu_topology, cpu).socket_id = cpu; 259 per_cpu(cpu_topology, cpu).book_id = cpu; 260 } 261 } 262 spin_unlock_irqrestore(&topology_lock, flags); 263 } 264 265 void store_topology(struct sysinfo_15_1_x *info) 266 { 267 if (topology_max_mnest >= 3) 268 stsi(info, 15, 1, 3); 269 else 270 stsi(info, 15, 1, 2); 271 } 272 273 int arch_update_cpu_topology(void) 274 { 275 struct sysinfo_15_1_x *info = tl_info; 276 struct device *dev; 277 int cpu; 278 279 if (!MACHINE_HAS_TOPOLOGY) { 280 update_cpu_masks(); 281 topology_update_polarization_simple(); 282 return 0; 283 } 284 store_topology(info); 285 tl_to_masks(info); 286 update_cpu_masks(); 287 for_each_online_cpu(cpu) { 288 dev = get_cpu_device(cpu); 289 kobject_uevent(&dev->kobj, KOBJ_CHANGE); 290 } 291 return 1; 292 } 293 294 static void topology_work_fn(struct work_struct *work) 295 { 296 rebuild_sched_domains(); 297 } 298 299 void topology_schedule_update(void) 300 { 301 schedule_work(&topology_work); 302 } 303 304 static void topology_timer_fn(unsigned long ignored) 305 { 306 if (ptf(PTF_CHECK)) 307 topology_schedule_update(); 308 set_topology_timer(); 309 } 310 311 static struct timer_list topology_timer = 312 TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0); 313 314 static atomic_t topology_poll = ATOMIC_INIT(0); 315 316 static void set_topology_timer(void) 317 { 318 if (atomic_add_unless(&topology_poll, -1, 0)) 319 mod_timer(&topology_timer, jiffies + HZ / 10); 320 else 321 mod_timer(&topology_timer, jiffies + HZ * 60); 322 } 323 324 void topology_expect_change(void) 325 { 326 if (!MACHINE_HAS_TOPOLOGY) 327 return; 328 /* This is racy, but it doesn't matter since it is just a heuristic. 329 * Worst case is that we poll in a higher frequency for a bit longer. 330 */ 331 if (atomic_read(&topology_poll) > 60) 332 return; 333 atomic_add(60, &topology_poll); 334 set_topology_timer(); 335 } 336 337 static int cpu_management; 338 339 static ssize_t dispatching_show(struct device *dev, 340 struct device_attribute *attr, 341 char *buf) 342 { 343 ssize_t count; 344 345 mutex_lock(&smp_cpu_state_mutex); 346 count = sprintf(buf, "%d\n", cpu_management); 347 mutex_unlock(&smp_cpu_state_mutex); 348 return count; 349 } 350 351 static ssize_t dispatching_store(struct device *dev, 352 struct device_attribute *attr, 353 const char *buf, 354 size_t count) 355 { 356 int val, rc; 357 char delim; 358 359 if (sscanf(buf, "%d %c", &val, &delim) != 1) 360 return -EINVAL; 361 if (val != 0 && val != 1) 362 return -EINVAL; 363 rc = 0; 364 get_online_cpus(); 365 mutex_lock(&smp_cpu_state_mutex); 366 if (cpu_management == val) 367 goto out; 368 rc = topology_set_cpu_management(val); 369 if (rc) 370 goto out; 371 cpu_management = val; 372 topology_expect_change(); 373 out: 374 mutex_unlock(&smp_cpu_state_mutex); 375 put_online_cpus(); 376 return rc ? rc : count; 377 } 378 static DEVICE_ATTR(dispatching, 0644, dispatching_show, 379 dispatching_store); 380 381 static ssize_t cpu_polarization_show(struct device *dev, 382 struct device_attribute *attr, char *buf) 383 { 384 int cpu = dev->id; 385 ssize_t count; 386 387 mutex_lock(&smp_cpu_state_mutex); 388 switch (smp_cpu_get_polarization(cpu)) { 389 case POLARIZATION_HRZ: 390 count = sprintf(buf, "horizontal\n"); 391 break; 392 case POLARIZATION_VL: 393 count = sprintf(buf, "vertical:low\n"); 394 break; 395 case POLARIZATION_VM: 396 count = sprintf(buf, "vertical:medium\n"); 397 break; 398 case POLARIZATION_VH: 399 count = sprintf(buf, "vertical:high\n"); 400 break; 401 default: 402 count = sprintf(buf, "unknown\n"); 403 break; 404 } 405 mutex_unlock(&smp_cpu_state_mutex); 406 return count; 407 } 408 static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); 409 410 static struct attribute *topology_cpu_attrs[] = { 411 &dev_attr_polarization.attr, 412 NULL, 413 }; 414 415 static struct attribute_group topology_cpu_attr_group = { 416 .attrs = topology_cpu_attrs, 417 }; 418 419 int topology_cpu_init(struct cpu *cpu) 420 { 421 return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); 422 } 423 424 const struct cpumask *cpu_thread_mask(int cpu) 425 { 426 return &per_cpu(cpu_topology, cpu).thread_mask; 427 } 428 429 430 const struct cpumask *cpu_coregroup_mask(int cpu) 431 { 432 return &per_cpu(cpu_topology, cpu).core_mask; 433 } 434 435 static const struct cpumask *cpu_book_mask(int cpu) 436 { 437 return &per_cpu(cpu_topology, cpu).book_mask; 438 } 439 440 static int __init early_parse_topology(char *p) 441 { 442 if (strncmp(p, "off", 3)) 443 return 0; 444 topology_enabled = 0; 445 return 0; 446 } 447 early_param("topology", early_parse_topology); 448 449 static struct sched_domain_topology_level s390_topology[] = { 450 { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, 451 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, 452 { cpu_book_mask, SD_INIT_NAME(BOOK) }, 453 { cpu_cpu_mask, SD_INIT_NAME(DIE) }, 454 { NULL, }, 455 }; 456 457 static void __init alloc_masks(struct sysinfo_15_1_x *info, 458 struct mask_info *mask, int offset) 459 { 460 int i, nr_masks; 461 462 nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; 463 for (i = 0; i < info->mnest - offset; i++) 464 nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; 465 nr_masks = max(nr_masks, 1); 466 for (i = 0; i < nr_masks; i++) { 467 mask->next = kzalloc(sizeof(*mask->next), GFP_KERNEL); 468 mask = mask->next; 469 } 470 } 471 472 static int __init s390_topology_init(void) 473 { 474 struct sysinfo_15_1_x *info; 475 int i; 476 477 if (!MACHINE_HAS_TOPOLOGY) 478 return 0; 479 tl_info = (struct sysinfo_15_1_x *)__get_free_page(GFP_KERNEL); 480 info = tl_info; 481 store_topology(info); 482 pr_info("The CPU configuration topology of the machine is:"); 483 for (i = 0; i < TOPOLOGY_NR_MAG; i++) 484 printk(KERN_CONT " %d", info->mag[i]); 485 printk(KERN_CONT " / %d\n", info->mnest); 486 alloc_masks(info, &socket_info, 1); 487 alloc_masks(info, &book_info, 2); 488 set_sched_topology(s390_topology); 489 return 0; 490 } 491 early_initcall(s390_topology_init); 492 493 static int __init topology_init(void) 494 { 495 if (MACHINE_HAS_TOPOLOGY) 496 set_topology_timer(); 497 else 498 topology_update_polarization_simple(); 499 return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); 500 } 501 device_initcall(topology_init); 502