xref: /linux/arch/arm/kernel/topology.c (revision 32786fdc9506aeba98278c1844d4bfb766863832)
1 /*
2  * arch/arm/kernel/topology.c
3  *
4  * Copyright (C) 2011 Linaro Limited.
5  * Written by: Vincent Guittot
6  *
7  * based on arch/sh/kernel/topology.c
8  *
9  * This file is subject to the terms and conditions of the GNU General Public
10  * License.  See the file "COPYING" in the main directory of this archive
11  * for more details.
12  */
13 
14 #include <linux/cpu.h>
15 #include <linux/cpufreq.h>
16 #include <linux/cpumask.h>
17 #include <linux/export.h>
18 #include <linux/init.h>
19 #include <linux/percpu.h>
20 #include <linux/node.h>
21 #include <linux/nodemask.h>
22 #include <linux/of.h>
23 #include <linux/sched.h>
24 #include <linux/slab.h>
25 #include <linux/string.h>
26 
27 #include <asm/cpu.h>
28 #include <asm/cputype.h>
29 #include <asm/topology.h>
30 
31 /*
32  * cpu capacity scale management
33  */
34 
35 /*
36  * cpu capacity table
37  * This per cpu data structure describes the relative capacity of each core.
38  * On a heteregenous system, cores don't have the same computation capacity
39  * and we reflect that difference in the cpu_capacity field so the scheduler
40  * can take this difference into account during load balance. A per cpu
41  * structure is preferred because each CPU updates its own cpu_capacity field
42  * during the load balance except for idle cores. One idle core is selected
43  * to run the rebalance_domains for all idle cores and the cpu_capacity can be
44  * updated during this sequence.
45  */
46 static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
47 static DEFINE_MUTEX(cpu_scale_mutex);
48 
49 unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
50 {
51 	return per_cpu(cpu_scale, cpu);
52 }
53 
54 static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
55 {
56 	per_cpu(cpu_scale, cpu) = capacity;
57 }
58 
59 #ifdef CONFIG_PROC_SYSCTL
60 static ssize_t cpu_capacity_show(struct device *dev,
61 				 struct device_attribute *attr,
62 				 char *buf)
63 {
64 	struct cpu *cpu = container_of(dev, struct cpu, dev);
65 
66 	return sprintf(buf, "%lu\n",
67 			arch_scale_cpu_capacity(NULL, cpu->dev.id));
68 }
69 
70 static ssize_t cpu_capacity_store(struct device *dev,
71 				  struct device_attribute *attr,
72 				  const char *buf,
73 				  size_t count)
74 {
75 	struct cpu *cpu = container_of(dev, struct cpu, dev);
76 	int this_cpu = cpu->dev.id, i;
77 	unsigned long new_capacity;
78 	ssize_t ret;
79 
80 	if (count) {
81 		ret = kstrtoul(buf, 0, &new_capacity);
82 		if (ret)
83 			return ret;
84 		if (new_capacity > SCHED_CAPACITY_SCALE)
85 			return -EINVAL;
86 
87 		mutex_lock(&cpu_scale_mutex);
88 		for_each_cpu(i, &cpu_topology[this_cpu].core_sibling)
89 			set_capacity_scale(i, new_capacity);
90 		mutex_unlock(&cpu_scale_mutex);
91 	}
92 
93 	return count;
94 }
95 
96 static DEVICE_ATTR_RW(cpu_capacity);
97 
98 static int register_cpu_capacity_sysctl(void)
99 {
100 	int i;
101 	struct device *cpu;
102 
103 	for_each_possible_cpu(i) {
104 		cpu = get_cpu_device(i);
105 		if (!cpu) {
106 			pr_err("%s: too early to get CPU%d device!\n",
107 			       __func__, i);
108 			continue;
109 		}
110 		device_create_file(cpu, &dev_attr_cpu_capacity);
111 	}
112 
113 	return 0;
114 }
115 subsys_initcall(register_cpu_capacity_sysctl);
116 #endif
117 
118 #ifdef CONFIG_OF
119 struct cpu_efficiency {
120 	const char *compatible;
121 	unsigned long efficiency;
122 };
123 
124 /*
125  * Table of relative efficiency of each processors
126  * The efficiency value must fit in 20bit and the final
127  * cpu_scale value must be in the range
128  *   0 < cpu_scale < 3*SCHED_CAPACITY_SCALE/2
129  * in order to return at most 1 when DIV_ROUND_CLOSEST
130  * is used to compute the capacity of a CPU.
131  * Processors that are not defined in the table,
132  * use the default SCHED_CAPACITY_SCALE value for cpu_scale.
133  */
134 static const struct cpu_efficiency table_efficiency[] = {
135 	{"arm,cortex-a15", 3891},
136 	{"arm,cortex-a7",  2048},
137 	{NULL, },
138 };
139 
140 static unsigned long *__cpu_capacity;
141 #define cpu_capacity(cpu)	__cpu_capacity[cpu]
142 
143 static unsigned long middle_capacity = 1;
144 static bool cap_from_dt = true;
145 static u32 *raw_capacity;
146 static bool cap_parsing_failed;
147 static u32 capacity_scale;
148 
149 static int __init parse_cpu_capacity(struct device_node *cpu_node, int cpu)
150 {
151 	int ret = 1;
152 	u32 cpu_capacity;
153 
154 	if (cap_parsing_failed)
155 		return !ret;
156 
157 	ret = of_property_read_u32(cpu_node,
158 				   "capacity-dmips-mhz",
159 				   &cpu_capacity);
160 	if (!ret) {
161 		if (!raw_capacity) {
162 			raw_capacity = kcalloc(num_possible_cpus(),
163 					       sizeof(*raw_capacity),
164 					       GFP_KERNEL);
165 			if (!raw_capacity) {
166 				pr_err("cpu_capacity: failed to allocate memory for raw capacities\n");
167 				cap_parsing_failed = true;
168 				return !ret;
169 			}
170 		}
171 		capacity_scale = max(cpu_capacity, capacity_scale);
172 		raw_capacity[cpu] = cpu_capacity;
173 		pr_debug("cpu_capacity: %s cpu_capacity=%u (raw)\n",
174 			cpu_node->full_name, raw_capacity[cpu]);
175 	} else {
176 		if (raw_capacity) {
177 			pr_err("cpu_capacity: missing %s raw capacity\n",
178 				cpu_node->full_name);
179 			pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
180 		}
181 		cap_parsing_failed = true;
182 		kfree(raw_capacity);
183 	}
184 
185 	return !ret;
186 }
187 
188 static void normalize_cpu_capacity(void)
189 {
190 	u64 capacity;
191 	int cpu;
192 
193 	if (!raw_capacity || cap_parsing_failed)
194 		return;
195 
196 	pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale);
197 	mutex_lock(&cpu_scale_mutex);
198 	for_each_possible_cpu(cpu) {
199 		capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
200 			/ capacity_scale;
201 		set_capacity_scale(cpu, capacity);
202 		pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
203 			cpu, arch_scale_cpu_capacity(NULL, cpu));
204 	}
205 	mutex_unlock(&cpu_scale_mutex);
206 }
207 
208 #ifdef CONFIG_CPU_FREQ
209 static cpumask_var_t cpus_to_visit;
210 static bool cap_parsing_done;
211 static void parsing_done_workfn(struct work_struct *work);
212 static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
213 
214 static int
215 init_cpu_capacity_callback(struct notifier_block *nb,
216 			   unsigned long val,
217 			   void *data)
218 {
219 	struct cpufreq_policy *policy = data;
220 	int cpu;
221 
222 	if (cap_parsing_failed || cap_parsing_done)
223 		return 0;
224 
225 	switch (val) {
226 	case CPUFREQ_NOTIFY:
227 		pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
228 				cpumask_pr_args(policy->related_cpus),
229 				cpumask_pr_args(cpus_to_visit));
230 		cpumask_andnot(cpus_to_visit,
231 			       cpus_to_visit,
232 			       policy->related_cpus);
233 		for_each_cpu(cpu, policy->related_cpus) {
234 			raw_capacity[cpu] = arch_scale_cpu_capacity(NULL, cpu) *
235 					    policy->cpuinfo.max_freq / 1000UL;
236 			capacity_scale = max(raw_capacity[cpu], capacity_scale);
237 		}
238 		if (cpumask_empty(cpus_to_visit)) {
239 			normalize_cpu_capacity();
240 			kfree(raw_capacity);
241 			pr_debug("cpu_capacity: parsing done\n");
242 			cap_parsing_done = true;
243 			schedule_work(&parsing_done_work);
244 		}
245 	}
246 	return 0;
247 }
248 
249 static struct notifier_block init_cpu_capacity_notifier = {
250 	.notifier_call = init_cpu_capacity_callback,
251 };
252 
253 static int __init register_cpufreq_notifier(void)
254 {
255 	if (cap_parsing_failed)
256 		return -EINVAL;
257 
258 	if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) {
259 		pr_err("cpu_capacity: failed to allocate memory for cpus_to_visit\n");
260 		return -ENOMEM;
261 	}
262 	cpumask_copy(cpus_to_visit, cpu_possible_mask);
263 
264 	return cpufreq_register_notifier(&init_cpu_capacity_notifier,
265 					 CPUFREQ_POLICY_NOTIFIER);
266 }
267 core_initcall(register_cpufreq_notifier);
268 
269 static void parsing_done_workfn(struct work_struct *work)
270 {
271 	cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
272 					 CPUFREQ_POLICY_NOTIFIER);
273 }
274 
275 #else
276 static int __init free_raw_capacity(void)
277 {
278 	kfree(raw_capacity);
279 
280 	return 0;
281 }
282 core_initcall(free_raw_capacity);
283 #endif
284 
285 /*
286  * Iterate all CPUs' descriptor in DT and compute the efficiency
287  * (as per table_efficiency). Also calculate a middle efficiency
288  * as close as possible to  (max{eff_i} - min{eff_i}) / 2
289  * This is later used to scale the cpu_capacity field such that an
290  * 'average' CPU is of middle capacity. Also see the comments near
291  * table_efficiency[] and update_cpu_capacity().
292  */
293 static void __init parse_dt_topology(void)
294 {
295 	const struct cpu_efficiency *cpu_eff;
296 	struct device_node *cn = NULL;
297 	unsigned long min_capacity = ULONG_MAX;
298 	unsigned long max_capacity = 0;
299 	unsigned long capacity = 0;
300 	int cpu = 0;
301 
302 	__cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity),
303 				 GFP_NOWAIT);
304 
305 	cn = of_find_node_by_path("/cpus");
306 	if (!cn) {
307 		pr_err("No CPU information found in DT\n");
308 		return;
309 	}
310 
311 	for_each_possible_cpu(cpu) {
312 		const u32 *rate;
313 		int len;
314 
315 		/* too early to use cpu->of_node */
316 		cn = of_get_cpu_node(cpu, NULL);
317 		if (!cn) {
318 			pr_err("missing device node for CPU %d\n", cpu);
319 			continue;
320 		}
321 
322 		if (parse_cpu_capacity(cn, cpu)) {
323 			of_node_put(cn);
324 			continue;
325 		}
326 
327 		cap_from_dt = false;
328 
329 		for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++)
330 			if (of_device_is_compatible(cn, cpu_eff->compatible))
331 				break;
332 
333 		if (cpu_eff->compatible == NULL)
334 			continue;
335 
336 		rate = of_get_property(cn, "clock-frequency", &len);
337 		if (!rate || len != 4) {
338 			pr_err("%s missing clock-frequency property\n",
339 				cn->full_name);
340 			continue;
341 		}
342 
343 		capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency;
344 
345 		/* Save min capacity of the system */
346 		if (capacity < min_capacity)
347 			min_capacity = capacity;
348 
349 		/* Save max capacity of the system */
350 		if (capacity > max_capacity)
351 			max_capacity = capacity;
352 
353 		cpu_capacity(cpu) = capacity;
354 	}
355 
356 	/* If min and max capacities are equals, we bypass the update of the
357 	 * cpu_scale because all CPUs have the same capacity. Otherwise, we
358 	 * compute a middle_capacity factor that will ensure that the capacity
359 	 * of an 'average' CPU of the system will be as close as possible to
360 	 * SCHED_CAPACITY_SCALE, which is the default value, but with the
361 	 * constraint explained near table_efficiency[].
362 	 */
363 	if (4*max_capacity < (3*(max_capacity + min_capacity)))
364 		middle_capacity = (min_capacity + max_capacity)
365 				>> (SCHED_CAPACITY_SHIFT+1);
366 	else
367 		middle_capacity = ((max_capacity / 3)
368 				>> (SCHED_CAPACITY_SHIFT-1)) + 1;
369 
370 	if (cap_from_dt && !cap_parsing_failed)
371 		normalize_cpu_capacity();
372 }
373 
374 /*
375  * Look for a customed capacity of a CPU in the cpu_capacity table during the
376  * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
377  * function returns directly for SMP system.
378  */
379 static void update_cpu_capacity(unsigned int cpu)
380 {
381 	if (!cpu_capacity(cpu) || cap_from_dt)
382 		return;
383 
384 	set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity);
385 
386 	pr_info("CPU%u: update cpu_capacity %lu\n",
387 		cpu, arch_scale_cpu_capacity(NULL, cpu));
388 }
389 
390 #else
391 static inline void parse_dt_topology(void) {}
392 static inline void update_cpu_capacity(unsigned int cpuid) {}
393 #endif
394 
395  /*
396  * cpu topology table
397  */
398 struct cputopo_arm cpu_topology[NR_CPUS];
399 EXPORT_SYMBOL_GPL(cpu_topology);
400 
401 const struct cpumask *cpu_coregroup_mask(int cpu)
402 {
403 	return &cpu_topology[cpu].core_sibling;
404 }
405 
406 /*
407  * The current assumption is that we can power gate each core independently.
408  * This will be superseded by DT binding once available.
409  */
410 const struct cpumask *cpu_corepower_mask(int cpu)
411 {
412 	return &cpu_topology[cpu].thread_sibling;
413 }
414 
415 static void update_siblings_masks(unsigned int cpuid)
416 {
417 	struct cputopo_arm *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
418 	int cpu;
419 
420 	/* update core and thread sibling masks */
421 	for_each_possible_cpu(cpu) {
422 		cpu_topo = &cpu_topology[cpu];
423 
424 		if (cpuid_topo->socket_id != cpu_topo->socket_id)
425 			continue;
426 
427 		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
428 		if (cpu != cpuid)
429 			cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
430 
431 		if (cpuid_topo->core_id != cpu_topo->core_id)
432 			continue;
433 
434 		cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
435 		if (cpu != cpuid)
436 			cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
437 	}
438 	smp_wmb();
439 }
440 
441 /*
442  * store_cpu_topology is called at boot when only one cpu is running
443  * and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
444  * which prevents simultaneous write access to cpu_topology array
445  */
446 void store_cpu_topology(unsigned int cpuid)
447 {
448 	struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid];
449 	unsigned int mpidr;
450 
451 	/* If the cpu topology has been already set, just return */
452 	if (cpuid_topo->core_id != -1)
453 		return;
454 
455 	mpidr = read_cpuid_mpidr();
456 
457 	/* create cpu topology mapping */
458 	if ((mpidr & MPIDR_SMP_BITMASK) == MPIDR_SMP_VALUE) {
459 		/*
460 		 * This is a multiprocessor system
461 		 * multiprocessor format & multiprocessor mode field are set
462 		 */
463 
464 		if (mpidr & MPIDR_MT_BITMASK) {
465 			/* core performance interdependency */
466 			cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
467 			cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
468 			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
469 		} else {
470 			/* largely independent cores */
471 			cpuid_topo->thread_id = -1;
472 			cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
473 			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
474 		}
475 	} else {
476 		/*
477 		 * This is an uniprocessor system
478 		 * we are in multiprocessor format but uniprocessor system
479 		 * or in the old uniprocessor format
480 		 */
481 		cpuid_topo->thread_id = -1;
482 		cpuid_topo->core_id = 0;
483 		cpuid_topo->socket_id = -1;
484 	}
485 
486 	update_siblings_masks(cpuid);
487 
488 	update_cpu_capacity(cpuid);
489 
490 	pr_info("CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
491 		cpuid, cpu_topology[cpuid].thread_id,
492 		cpu_topology[cpuid].core_id,
493 		cpu_topology[cpuid].socket_id, mpidr);
494 }
495 
496 static inline int cpu_corepower_flags(void)
497 {
498 	return SD_SHARE_PKG_RESOURCES  | SD_SHARE_POWERDOMAIN;
499 }
500 
501 static struct sched_domain_topology_level arm_topology[] = {
502 #ifdef CONFIG_SCHED_MC
503 	{ cpu_corepower_mask, cpu_corepower_flags, SD_INIT_NAME(GMC) },
504 	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
505 #endif
506 	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
507 	{ NULL, },
508 };
509 
510 /*
511  * init_cpu_topology is called at boot when only one cpu is running
512  * which prevent simultaneous write access to cpu_topology array
513  */
514 void __init init_cpu_topology(void)
515 {
516 	unsigned int cpu;
517 
518 	/* init core mask and capacity */
519 	for_each_possible_cpu(cpu) {
520 		struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]);
521 
522 		cpu_topo->thread_id = -1;
523 		cpu_topo->core_id =  -1;
524 		cpu_topo->socket_id = -1;
525 		cpumask_clear(&cpu_topo->core_sibling);
526 		cpumask_clear(&cpu_topo->thread_sibling);
527 	}
528 	smp_wmb();
529 
530 	parse_dt_topology();
531 
532 	/* Set scheduler topology descriptor */
533 	set_sched_topology(arm_topology);
534 }
535