1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Arch specific cpu topology information 4 * 5 * Copyright (C) 2016, ARM Ltd. 6 * Written by: Juri Lelli, ARM Ltd. 7 */ 8 9 #include <linux/acpi.h> 10 #include <linux/cpu.h> 11 #include <linux/cpufreq.h> 12 #include <linux/device.h> 13 #include <linux/of.h> 14 #include <linux/slab.h> 15 #include <linux/string.h> 16 #include <linux/sched/topology.h> 17 #include <linux/cpuset.h> 18 #include <linux/cpumask.h> 19 #include <linux/init.h> 20 #include <linux/percpu.h> 21 #include <linux/sched.h> 22 #include <linux/smp.h> 23 24 __weak bool arch_freq_counters_available(struct cpumask *cpus) 25 { 26 return false; 27 } 28 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE; 29 30 void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq, 31 unsigned long max_freq) 32 { 33 unsigned long scale; 34 int i; 35 36 /* 37 * If the use of counters for FIE is enabled, just return as we don't 38 * want to update the scale factor with information from CPUFREQ. 39 * Instead the scale factor will be updated from arch_scale_freq_tick. 40 */ 41 if (arch_freq_counters_available(cpus)) 42 return; 43 44 scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq; 45 46 for_each_cpu(i, cpus) 47 per_cpu(freq_scale, i) = scale; 48 } 49 50 DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; 51 52 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity) 53 { 54 per_cpu(cpu_scale, cpu) = capacity; 55 } 56 57 static ssize_t cpu_capacity_show(struct device *dev, 58 struct device_attribute *attr, 59 char *buf) 60 { 61 struct cpu *cpu = container_of(dev, struct cpu, dev); 62 63 return sprintf(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id)); 64 } 65 66 static void update_topology_flags_workfn(struct work_struct *work); 67 static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn); 68 69 static DEVICE_ATTR_RO(cpu_capacity); 70 71 static int register_cpu_capacity_sysctl(void) 72 { 73 int i; 74 struct device *cpu; 75 76 for_each_possible_cpu(i) { 77 cpu = get_cpu_device(i); 78 if (!cpu) { 79 pr_err("%s: too early to get CPU%d device!\n", 80 __func__, i); 81 continue; 82 } 83 device_create_file(cpu, &dev_attr_cpu_capacity); 84 } 85 86 return 0; 87 } 88 subsys_initcall(register_cpu_capacity_sysctl); 89 90 static int update_topology; 91 92 int topology_update_cpu_topology(void) 93 { 94 return update_topology; 95 } 96 97 /* 98 * Updating the sched_domains can't be done directly from cpufreq callbacks 99 * due to locking, so queue the work for later. 100 */ 101 static void update_topology_flags_workfn(struct work_struct *work) 102 { 103 update_topology = 1; 104 rebuild_sched_domains(); 105 pr_debug("sched_domain hierarchy rebuilt, flags updated\n"); 106 update_topology = 0; 107 } 108 109 static DEFINE_PER_CPU(u32, freq_factor) = 1; 110 static u32 *raw_capacity; 111 112 static int free_raw_capacity(void) 113 { 114 kfree(raw_capacity); 115 raw_capacity = NULL; 116 117 return 0; 118 } 119 120 void topology_normalize_cpu_scale(void) 121 { 122 u64 capacity; 123 u64 capacity_scale; 124 int cpu; 125 126 if (!raw_capacity) 127 return; 128 129 capacity_scale = 1; 130 for_each_possible_cpu(cpu) { 131 capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu); 132 capacity_scale = max(capacity, capacity_scale); 133 } 134 135 pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale); 136 for_each_possible_cpu(cpu) { 137 capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu); 138 capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT, 139 capacity_scale); 140 topology_set_cpu_scale(cpu, capacity); 141 pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", 142 cpu, topology_get_cpu_scale(cpu)); 143 } 144 } 145 146 bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu) 147 { 148 struct clk *cpu_clk; 149 static bool cap_parsing_failed; 150 int ret; 151 u32 cpu_capacity; 152 153 if (cap_parsing_failed) 154 return false; 155 156 ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz", 157 &cpu_capacity); 158 if (!ret) { 159 if (!raw_capacity) { 160 raw_capacity = kcalloc(num_possible_cpus(), 161 sizeof(*raw_capacity), 162 GFP_KERNEL); 163 if (!raw_capacity) { 164 cap_parsing_failed = true; 165 return false; 166 } 167 } 168 raw_capacity[cpu] = cpu_capacity; 169 pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n", 170 cpu_node, raw_capacity[cpu]); 171 172 /* 173 * Update freq_factor for calculating early boot cpu capacities. 174 * For non-clk CPU DVFS mechanism, there's no way to get the 175 * frequency value now, assuming they are running at the same 176 * frequency (by keeping the initial freq_factor value). 177 */ 178 cpu_clk = of_clk_get(cpu_node, 0); 179 if (!PTR_ERR_OR_ZERO(cpu_clk)) { 180 per_cpu(freq_factor, cpu) = 181 clk_get_rate(cpu_clk) / 1000; 182 clk_put(cpu_clk); 183 } 184 } else { 185 if (raw_capacity) { 186 pr_err("cpu_capacity: missing %pOF raw capacity\n", 187 cpu_node); 188 pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n"); 189 } 190 cap_parsing_failed = true; 191 free_raw_capacity(); 192 } 193 194 return !ret; 195 } 196 197 #ifdef CONFIG_CPU_FREQ 198 static cpumask_var_t cpus_to_visit; 199 static void parsing_done_workfn(struct work_struct *work); 200 static DECLARE_WORK(parsing_done_work, parsing_done_workfn); 201 202 static int 203 init_cpu_capacity_callback(struct notifier_block *nb, 204 unsigned long val, 205 void *data) 206 { 207 struct cpufreq_policy *policy = data; 208 int cpu; 209 210 if (!raw_capacity) 211 return 0; 212 213 if (val != CPUFREQ_CREATE_POLICY) 214 return 0; 215 216 pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n", 217 cpumask_pr_args(policy->related_cpus), 218 cpumask_pr_args(cpus_to_visit)); 219 220 cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus); 221 222 for_each_cpu(cpu, policy->related_cpus) 223 per_cpu(freq_factor, cpu) = policy->cpuinfo.max_freq / 1000; 224 225 if (cpumask_empty(cpus_to_visit)) { 226 topology_normalize_cpu_scale(); 227 schedule_work(&update_topology_flags_work); 228 free_raw_capacity(); 229 pr_debug("cpu_capacity: parsing done\n"); 230 schedule_work(&parsing_done_work); 231 } 232 233 return 0; 234 } 235 236 static struct notifier_block init_cpu_capacity_notifier = { 237 .notifier_call = init_cpu_capacity_callback, 238 }; 239 240 static int __init register_cpufreq_notifier(void) 241 { 242 int ret; 243 244 /* 245 * on ACPI-based systems we need to use the default cpu capacity 246 * until we have the necessary code to parse the cpu capacity, so 247 * skip registering cpufreq notifier. 248 */ 249 if (!acpi_disabled || !raw_capacity) 250 return -EINVAL; 251 252 if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) 253 return -ENOMEM; 254 255 cpumask_copy(cpus_to_visit, cpu_possible_mask); 256 257 ret = cpufreq_register_notifier(&init_cpu_capacity_notifier, 258 CPUFREQ_POLICY_NOTIFIER); 259 260 if (ret) 261 free_cpumask_var(cpus_to_visit); 262 263 return ret; 264 } 265 core_initcall(register_cpufreq_notifier); 266 267 static void parsing_done_workfn(struct work_struct *work) 268 { 269 cpufreq_unregister_notifier(&init_cpu_capacity_notifier, 270 CPUFREQ_POLICY_NOTIFIER); 271 free_cpumask_var(cpus_to_visit); 272 } 273 274 #else 275 core_initcall(free_raw_capacity); 276 #endif 277 278 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) 279 /* 280 * This function returns the logic cpu number of the node. 281 * There are basically three kinds of return values: 282 * (1) logic cpu number which is > 0. 283 * (2) -ENODEV when the device tree(DT) node is valid and found in the DT but 284 * there is no possible logical CPU in the kernel to match. This happens 285 * when CONFIG_NR_CPUS is configure to be smaller than the number of 286 * CPU nodes in DT. We need to just ignore this case. 287 * (3) -1 if the node does not exist in the device tree 288 */ 289 static int __init get_cpu_for_node(struct device_node *node) 290 { 291 struct device_node *cpu_node; 292 int cpu; 293 294 cpu_node = of_parse_phandle(node, "cpu", 0); 295 if (!cpu_node) 296 return -1; 297 298 cpu = of_cpu_node_to_id(cpu_node); 299 if (cpu >= 0) 300 topology_parse_cpu_capacity(cpu_node, cpu); 301 else 302 pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n", 303 cpu_node, cpumask_pr_args(cpu_possible_mask)); 304 305 of_node_put(cpu_node); 306 return cpu; 307 } 308 309 static int __init parse_core(struct device_node *core, int package_id, 310 int core_id) 311 { 312 char name[20]; 313 bool leaf = true; 314 int i = 0; 315 int cpu; 316 struct device_node *t; 317 318 do { 319 snprintf(name, sizeof(name), "thread%d", i); 320 t = of_get_child_by_name(core, name); 321 if (t) { 322 leaf = false; 323 cpu = get_cpu_for_node(t); 324 if (cpu >= 0) { 325 cpu_topology[cpu].package_id = package_id; 326 cpu_topology[cpu].core_id = core_id; 327 cpu_topology[cpu].thread_id = i; 328 } else if (cpu != -ENODEV) { 329 pr_err("%pOF: Can't get CPU for thread\n", t); 330 of_node_put(t); 331 return -EINVAL; 332 } 333 of_node_put(t); 334 } 335 i++; 336 } while (t); 337 338 cpu = get_cpu_for_node(core); 339 if (cpu >= 0) { 340 if (!leaf) { 341 pr_err("%pOF: Core has both threads and CPU\n", 342 core); 343 return -EINVAL; 344 } 345 346 cpu_topology[cpu].package_id = package_id; 347 cpu_topology[cpu].core_id = core_id; 348 } else if (leaf && cpu != -ENODEV) { 349 pr_err("%pOF: Can't get CPU for leaf core\n", core); 350 return -EINVAL; 351 } 352 353 return 0; 354 } 355 356 static int __init parse_cluster(struct device_node *cluster, int depth) 357 { 358 char name[20]; 359 bool leaf = true; 360 bool has_cores = false; 361 struct device_node *c; 362 static int package_id __initdata; 363 int core_id = 0; 364 int i, ret; 365 366 /* 367 * First check for child clusters; we currently ignore any 368 * information about the nesting of clusters and present the 369 * scheduler with a flat list of them. 370 */ 371 i = 0; 372 do { 373 snprintf(name, sizeof(name), "cluster%d", i); 374 c = of_get_child_by_name(cluster, name); 375 if (c) { 376 leaf = false; 377 ret = parse_cluster(c, depth + 1); 378 of_node_put(c); 379 if (ret != 0) 380 return ret; 381 } 382 i++; 383 } while (c); 384 385 /* Now check for cores */ 386 i = 0; 387 do { 388 snprintf(name, sizeof(name), "core%d", i); 389 c = of_get_child_by_name(cluster, name); 390 if (c) { 391 has_cores = true; 392 393 if (depth == 0) { 394 pr_err("%pOF: cpu-map children should be clusters\n", 395 c); 396 of_node_put(c); 397 return -EINVAL; 398 } 399 400 if (leaf) { 401 ret = parse_core(c, package_id, core_id++); 402 } else { 403 pr_err("%pOF: Non-leaf cluster with core %s\n", 404 cluster, name); 405 ret = -EINVAL; 406 } 407 408 of_node_put(c); 409 if (ret != 0) 410 return ret; 411 } 412 i++; 413 } while (c); 414 415 if (leaf && !has_cores) 416 pr_warn("%pOF: empty cluster\n", cluster); 417 418 if (leaf) 419 package_id++; 420 421 return 0; 422 } 423 424 static int __init parse_dt_topology(void) 425 { 426 struct device_node *cn, *map; 427 int ret = 0; 428 int cpu; 429 430 cn = of_find_node_by_path("/cpus"); 431 if (!cn) { 432 pr_err("No CPU information found in DT\n"); 433 return 0; 434 } 435 436 /* 437 * When topology is provided cpu-map is essentially a root 438 * cluster with restricted subnodes. 439 */ 440 map = of_get_child_by_name(cn, "cpu-map"); 441 if (!map) 442 goto out; 443 444 ret = parse_cluster(map, 0); 445 if (ret != 0) 446 goto out_map; 447 448 topology_normalize_cpu_scale(); 449 450 /* 451 * Check that all cores are in the topology; the SMP code will 452 * only mark cores described in the DT as possible. 453 */ 454 for_each_possible_cpu(cpu) 455 if (cpu_topology[cpu].package_id == -1) 456 ret = -EINVAL; 457 458 out_map: 459 of_node_put(map); 460 out: 461 of_node_put(cn); 462 return ret; 463 } 464 #endif 465 466 /* 467 * cpu topology table 468 */ 469 struct cpu_topology cpu_topology[NR_CPUS]; 470 EXPORT_SYMBOL_GPL(cpu_topology); 471 472 const struct cpumask *cpu_coregroup_mask(int cpu) 473 { 474 const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu)); 475 476 /* Find the smaller of NUMA, core or LLC siblings */ 477 if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) { 478 /* not numa in package, lets use the package siblings */ 479 core_mask = &cpu_topology[cpu].core_sibling; 480 } 481 if (cpu_topology[cpu].llc_id != -1) { 482 if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask)) 483 core_mask = &cpu_topology[cpu].llc_sibling; 484 } 485 486 return core_mask; 487 } 488 489 void update_siblings_masks(unsigned int cpuid) 490 { 491 struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; 492 int cpu; 493 494 /* update core and thread sibling masks */ 495 for_each_online_cpu(cpu) { 496 cpu_topo = &cpu_topology[cpu]; 497 498 if (cpuid_topo->llc_id == cpu_topo->llc_id) { 499 cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling); 500 cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling); 501 } 502 503 if (cpuid_topo->package_id != cpu_topo->package_id) 504 continue; 505 506 cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); 507 cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); 508 509 if (cpuid_topo->core_id != cpu_topo->core_id) 510 continue; 511 512 cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); 513 cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); 514 } 515 } 516 517 static void clear_cpu_topology(int cpu) 518 { 519 struct cpu_topology *cpu_topo = &cpu_topology[cpu]; 520 521 cpumask_clear(&cpu_topo->llc_sibling); 522 cpumask_set_cpu(cpu, &cpu_topo->llc_sibling); 523 524 cpumask_clear(&cpu_topo->core_sibling); 525 cpumask_set_cpu(cpu, &cpu_topo->core_sibling); 526 cpumask_clear(&cpu_topo->thread_sibling); 527 cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); 528 } 529 530 void __init reset_cpu_topology(void) 531 { 532 unsigned int cpu; 533 534 for_each_possible_cpu(cpu) { 535 struct cpu_topology *cpu_topo = &cpu_topology[cpu]; 536 537 cpu_topo->thread_id = -1; 538 cpu_topo->core_id = -1; 539 cpu_topo->package_id = -1; 540 cpu_topo->llc_id = -1; 541 542 clear_cpu_topology(cpu); 543 } 544 } 545 546 void remove_cpu_topology(unsigned int cpu) 547 { 548 int sibling; 549 550 for_each_cpu(sibling, topology_core_cpumask(cpu)) 551 cpumask_clear_cpu(cpu, topology_core_cpumask(sibling)); 552 for_each_cpu(sibling, topology_sibling_cpumask(cpu)) 553 cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); 554 for_each_cpu(sibling, topology_llc_cpumask(cpu)) 555 cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling)); 556 557 clear_cpu_topology(cpu); 558 } 559 560 __weak int __init parse_acpi_topology(void) 561 { 562 return 0; 563 } 564 565 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) 566 void __init init_cpu_topology(void) 567 { 568 reset_cpu_topology(); 569 570 /* 571 * Discard anything that was parsed if we hit an error so we 572 * don't use partial information. 573 */ 574 if (parse_acpi_topology()) 575 reset_cpu_topology(); 576 else if (of_have_populated_dt() && parse_dt_topology()) 577 reset_cpu_topology(); 578 } 579 #endif 580