1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Energy Model of CPUs 4 * 5 * Copyright (c) 2018, Arm ltd. 6 * Written by: Quentin Perret, Arm ltd. 7 */ 8 9 #define pr_fmt(fmt) "energy_model: " fmt 10 11 #include <linux/cpu.h> 12 #include <linux/cpumask.h> 13 #include <linux/debugfs.h> 14 #include <linux/energy_model.h> 15 #include <linux/sched/topology.h> 16 #include <linux/slab.h> 17 18 /* Mapping of each CPU to the performance domain to which it belongs. */ 19 static DEFINE_PER_CPU(struct em_perf_domain *, em_data); 20 21 /* 22 * Mutex serializing the registrations of performance domains and letting 23 * callbacks defined by drivers sleep. 24 */ 25 static DEFINE_MUTEX(em_pd_mutex); 26 27 #ifdef CONFIG_DEBUG_FS 28 static struct dentry *rootdir; 29 30 static void em_debug_create_cs(struct em_cap_state *cs, struct dentry *pd) 31 { 32 struct dentry *d; 33 char name[24]; 34 35 snprintf(name, sizeof(name), "cs:%lu", cs->frequency); 36 37 /* Create per-cs directory */ 38 d = debugfs_create_dir(name, pd); 39 debugfs_create_ulong("frequency", 0444, d, &cs->frequency); 40 debugfs_create_ulong("power", 0444, d, &cs->power); 41 debugfs_create_ulong("cost", 0444, d, &cs->cost); 42 } 43 44 static int em_debug_cpus_show(struct seq_file *s, void *unused) 45 { 46 seq_printf(s, "%*pbl\n", cpumask_pr_args(to_cpumask(s->private))); 47 48 return 0; 49 } 50 DEFINE_SHOW_ATTRIBUTE(em_debug_cpus); 51 52 static void em_debug_create_pd(struct em_perf_domain *pd, int cpu) 53 { 54 struct dentry *d; 55 char name[8]; 56 int i; 57 58 snprintf(name, sizeof(name), "pd%d", cpu); 59 60 /* Create the directory of the performance domain */ 61 d = debugfs_create_dir(name, rootdir); 62 63 debugfs_create_file("cpus", 0444, d, pd->cpus, &em_debug_cpus_fops); 64 65 /* Create a sub-directory for each capacity state */ 66 for (i = 0; i < pd->nr_cap_states; i++) 67 em_debug_create_cs(&pd->table[i], d); 68 } 69 70 static int __init em_debug_init(void) 71 { 72 /* Create /sys/kernel/debug/energy_model directory */ 73 rootdir = debugfs_create_dir("energy_model", NULL); 74 75 return 0; 76 } 77 core_initcall(em_debug_init); 78 #else /* CONFIG_DEBUG_FS */ 79 static void em_debug_create_pd(struct em_perf_domain *pd, int cpu) {} 80 #endif 81 static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states, 82 struct em_data_callback *cb) 83 { 84 unsigned long opp_eff, prev_opp_eff = ULONG_MAX; 85 unsigned long power, freq, prev_freq = 0; 86 int i, ret, cpu = cpumask_first(span); 87 struct em_cap_state *table; 88 struct em_perf_domain *pd; 89 u64 fmax; 90 91 if (!cb->active_power) 92 return NULL; 93 94 pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL); 95 if (!pd) 96 return NULL; 97 98 table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL); 99 if (!table) 100 goto free_pd; 101 102 /* Build the list of capacity states for this performance domain */ 103 for (i = 0, freq = 0; i < nr_states; i++, freq++) { 104 /* 105 * active_power() is a driver callback which ceils 'freq' to 106 * lowest capacity state of 'cpu' above 'freq' and updates 107 * 'power' and 'freq' accordingly. 108 */ 109 ret = cb->active_power(&power, &freq, cpu); 110 if (ret) { 111 pr_err("pd%d: invalid cap. state: %d\n", cpu, ret); 112 goto free_cs_table; 113 } 114 115 /* 116 * We expect the driver callback to increase the frequency for 117 * higher capacity states. 118 */ 119 if (freq <= prev_freq) { 120 pr_err("pd%d: non-increasing freq: %lu\n", cpu, freq); 121 goto free_cs_table; 122 } 123 124 /* 125 * The power returned by active_state() is expected to be 126 * positive, in milli-watts and to fit into 16 bits. 127 */ 128 if (!power || power > EM_CPU_MAX_POWER) { 129 pr_err("pd%d: invalid power: %lu\n", cpu, power); 130 goto free_cs_table; 131 } 132 133 table[i].power = power; 134 table[i].frequency = prev_freq = freq; 135 136 /* 137 * The hertz/watts efficiency ratio should decrease as the 138 * frequency grows on sane platforms. But this isn't always 139 * true in practice so warn the user if a higher OPP is more 140 * power efficient than a lower one. 141 */ 142 opp_eff = freq / power; 143 if (opp_eff >= prev_opp_eff) 144 pr_warn("pd%d: hertz/watts ratio non-monotonically decreasing: em_cap_state %d >= em_cap_state%d\n", 145 cpu, i, i - 1); 146 prev_opp_eff = opp_eff; 147 } 148 149 /* Compute the cost of each capacity_state. */ 150 fmax = (u64) table[nr_states - 1].frequency; 151 for (i = 0; i < nr_states; i++) { 152 table[i].cost = div64_u64(fmax * table[i].power, 153 table[i].frequency); 154 } 155 156 pd->table = table; 157 pd->nr_cap_states = nr_states; 158 cpumask_copy(to_cpumask(pd->cpus), span); 159 160 em_debug_create_pd(pd, cpu); 161 162 return pd; 163 164 free_cs_table: 165 kfree(table); 166 free_pd: 167 kfree(pd); 168 169 return NULL; 170 } 171 172 /** 173 * em_cpu_get() - Return the performance domain for a CPU 174 * @cpu : CPU to find the performance domain for 175 * 176 * Return: the performance domain to which 'cpu' belongs, or NULL if it doesn't 177 * exist. 178 */ 179 struct em_perf_domain *em_cpu_get(int cpu) 180 { 181 return READ_ONCE(per_cpu(em_data, cpu)); 182 } 183 EXPORT_SYMBOL_GPL(em_cpu_get); 184 185 /** 186 * em_register_perf_domain() - Register the Energy Model of a performance domain 187 * @span : Mask of CPUs in the performance domain 188 * @nr_states : Number of capacity states to register 189 * @cb : Callback functions providing the data of the Energy Model 190 * 191 * Create Energy Model tables for a performance domain using the callbacks 192 * defined in cb. 193 * 194 * If multiple clients register the same performance domain, all but the first 195 * registration will be ignored. 196 * 197 * Return 0 on success 198 */ 199 int em_register_perf_domain(cpumask_t *span, unsigned int nr_states, 200 struct em_data_callback *cb) 201 { 202 unsigned long cap, prev_cap = 0; 203 struct em_perf_domain *pd; 204 int cpu, ret = 0; 205 206 if (!span || !nr_states || !cb) 207 return -EINVAL; 208 209 /* 210 * Use a mutex to serialize the registration of performance domains and 211 * let the driver-defined callback functions sleep. 212 */ 213 mutex_lock(&em_pd_mutex); 214 215 for_each_cpu(cpu, span) { 216 /* Make sure we don't register again an existing domain. */ 217 if (READ_ONCE(per_cpu(em_data, cpu))) { 218 ret = -EEXIST; 219 goto unlock; 220 } 221 222 /* 223 * All CPUs of a domain must have the same micro-architecture 224 * since they all share the same table. 225 */ 226 cap = arch_scale_cpu_capacity(cpu); 227 if (prev_cap && prev_cap != cap) { 228 pr_err("CPUs of %*pbl must have the same capacity\n", 229 cpumask_pr_args(span)); 230 ret = -EINVAL; 231 goto unlock; 232 } 233 prev_cap = cap; 234 } 235 236 /* Create the performance domain and add it to the Energy Model. */ 237 pd = em_create_pd(span, nr_states, cb); 238 if (!pd) { 239 ret = -EINVAL; 240 goto unlock; 241 } 242 243 for_each_cpu(cpu, span) { 244 /* 245 * The per-cpu array can be read concurrently from em_cpu_get(). 246 * The barrier enforces the ordering needed to make sure readers 247 * can only access well formed em_perf_domain structs. 248 */ 249 smp_store_release(per_cpu_ptr(&em_data, cpu), pd); 250 } 251 252 pr_debug("Created perf domain %*pbl\n", cpumask_pr_args(span)); 253 unlock: 254 mutex_unlock(&em_pd_mutex); 255 256 return ret; 257 } 258 EXPORT_SYMBOL_GPL(em_register_perf_domain); 259