1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2024 Google LLC 4 */ 5 6 #include <linux/arch_topology.h> 7 #include <linux/cpufreq.h> 8 #include <linux/init.h> 9 #include <linux/sched.h> 10 #include <linux/kernel.h> 11 #include <linux/module.h> 12 #include <linux/of_address.h> 13 #include <linux/of_platform.h> 14 #include <linux/platform_device.h> 15 #include <linux/slab.h> 16 17 /* 18 * CPU0..CPUn 19 * +-------------+-------------------------------+--------+-------+ 20 * | Register | Description | Offset | Len | 21 * +-------------+-------------------------------+--------+-------+ 22 * | cur_perf | read this register to get | 0x0 | 0x4 | 23 * | | the current perf (integer val | | | 24 * | | representing perf relative to | | | 25 * | | max performance) | | | 26 * | | that vCPU is running at | | | 27 * +-------------+-------------------------------+--------+-------+ 28 * | set_perf | write to this register to set | 0x4 | 0x4 | 29 * | | perf value of the vCPU | | | 30 * +-------------+-------------------------------+--------+-------+ 31 * | perftbl_len | number of entries in perf | 0x8 | 0x4 | 32 * | | table. A single entry in the | | | 33 * | | perf table denotes no table | | | 34 * | | and the entry contains | | | 35 * | | the maximum perf value | | | 36 * | | that this vCPU supports. | | | 37 * | | The guest can request any | | | 38 * | | value between 1 and max perf | | | 39 * | | when perftbls are not used. | | | 40 * +---------------------------------------------+--------+-------+ 41 * | perftbl_sel | write to this register to | 0xc | 0x4 | 42 * | | select perf table entry to | | | 43 * | | read from | | | 44 * +---------------------------------------------+--------+-------+ 45 * | perftbl_rd | read this register to get | 0x10 | 0x4 | 46 * | | perf value of the selected | | | 47 * | | entry based on perftbl_sel | | | 48 * +---------------------------------------------+--------+-------+ 49 * | perf_domain | performance domain number | 0x14 | 0x4 | 50 * | | that this vCPU belongs to. | | | 51 * | | vCPUs sharing the same perf | | | 52 * | | domain number are part of the | | | 53 * | | same performance domain. | | | 54 * +-------------+-------------------------------+--------+-------+ 55 */ 56 57 #define REG_CUR_PERF_STATE_OFFSET 0x0 58 #define REG_SET_PERF_STATE_OFFSET 0x4 59 #define REG_PERFTBL_LEN_OFFSET 0x8 60 #define REG_PERFTBL_SEL_OFFSET 0xc 61 #define REG_PERFTBL_RD_OFFSET 0x10 62 #define REG_PERF_DOMAIN_OFFSET 0x14 63 #define PER_CPU_OFFSET 0x1000 64 65 #define PERFTBL_MAX_ENTRIES 64U 66 67 static void __iomem *base; 68 static DEFINE_PER_CPU(u32, perftbl_num_entries); 69 70 static void virt_scale_freq_tick(void) 71 { 72 int cpu = smp_processor_id(); 73 u32 max_freq = (u32)cpufreq_get_hw_max_freq(cpu); 74 u64 cur_freq; 75 unsigned long scale; 76 77 cur_freq = (u64)readl_relaxed(base + cpu * PER_CPU_OFFSET 78 + REG_CUR_PERF_STATE_OFFSET); 79 80 cur_freq <<= SCHED_CAPACITY_SHIFT; 81 scale = (unsigned long)div_u64(cur_freq, max_freq); 82 scale = min(scale, SCHED_CAPACITY_SCALE); 83 84 this_cpu_write(arch_freq_scale, scale); 85 } 86 87 static struct scale_freq_data virt_sfd = { 88 .source = SCALE_FREQ_SOURCE_VIRT, 89 .set_freq_scale = virt_scale_freq_tick, 90 }; 91 92 static unsigned int virt_cpufreq_set_perf(struct cpufreq_policy *policy, 93 unsigned int target_freq) 94 { 95 writel_relaxed(target_freq, 96 base + policy->cpu * PER_CPU_OFFSET + REG_SET_PERF_STATE_OFFSET); 97 return 0; 98 } 99 100 static unsigned int virt_cpufreq_fast_switch(struct cpufreq_policy *policy, 101 unsigned int target_freq) 102 { 103 virt_cpufreq_set_perf(policy, target_freq); 104 return target_freq; 105 } 106 107 static u32 virt_cpufreq_get_perftbl_entry(int cpu, u32 idx) 108 { 109 writel_relaxed(idx, base + cpu * PER_CPU_OFFSET + 110 REG_PERFTBL_SEL_OFFSET); 111 return readl_relaxed(base + cpu * PER_CPU_OFFSET + 112 REG_PERFTBL_RD_OFFSET); 113 } 114 115 static int virt_cpufreq_target(struct cpufreq_policy *policy, 116 unsigned int target_freq, 117 unsigned int relation) 118 { 119 struct cpufreq_freqs freqs; 120 int ret = 0; 121 122 freqs.old = policy->cur; 123 freqs.new = target_freq; 124 125 cpufreq_freq_transition_begin(policy, &freqs); 126 ret = virt_cpufreq_set_perf(policy, target_freq); 127 cpufreq_freq_transition_end(policy, &freqs, ret != 0); 128 129 return ret; 130 } 131 132 static int virt_cpufreq_get_sharing_cpus(struct cpufreq_policy *policy) 133 { 134 u32 cur_perf_domain, perf_domain; 135 struct device *cpu_dev; 136 int cpu; 137 138 cur_perf_domain = readl_relaxed(base + policy->cpu * 139 PER_CPU_OFFSET + REG_PERF_DOMAIN_OFFSET); 140 141 for_each_present_cpu(cpu) { 142 cpu_dev = get_cpu_device(cpu); 143 if (!cpu_dev) 144 continue; 145 146 perf_domain = readl_relaxed(base + cpu * 147 PER_CPU_OFFSET + REG_PERF_DOMAIN_OFFSET); 148 149 if (perf_domain == cur_perf_domain) 150 cpumask_set_cpu(cpu, policy->cpus); 151 } 152 153 return 0; 154 } 155 156 static int virt_cpufreq_get_freq_info(struct cpufreq_policy *policy) 157 { 158 struct cpufreq_frequency_table *table; 159 u32 num_perftbl_entries, idx; 160 161 num_perftbl_entries = per_cpu(perftbl_num_entries, policy->cpu); 162 163 if (num_perftbl_entries == 1) { 164 policy->cpuinfo.min_freq = 1; 165 policy->cpuinfo.max_freq = virt_cpufreq_get_perftbl_entry(policy->cpu, 0); 166 167 policy->cur = policy->cpuinfo.max_freq; 168 return 0; 169 } 170 171 table = kzalloc_objs(*table, num_perftbl_entries + 1); 172 if (!table) 173 return -ENOMEM; 174 175 for (idx = 0; idx < num_perftbl_entries; idx++) 176 table[idx].frequency = virt_cpufreq_get_perftbl_entry(policy->cpu, idx); 177 178 table[idx].frequency = CPUFREQ_TABLE_END; 179 policy->freq_table = table; 180 181 return 0; 182 } 183 184 static int virt_cpufreq_cpu_init(struct cpufreq_policy *policy) 185 { 186 struct device *cpu_dev; 187 int ret; 188 189 cpu_dev = get_cpu_device(policy->cpu); 190 if (!cpu_dev) 191 return -ENODEV; 192 193 ret = virt_cpufreq_get_freq_info(policy); 194 if (ret) { 195 dev_warn(cpu_dev, "failed to get cpufreq info\n"); 196 return ret; 197 } 198 199 ret = virt_cpufreq_get_sharing_cpus(policy); 200 if (ret) { 201 dev_warn(cpu_dev, "failed to get sharing cpumask\n"); 202 return ret; 203 } 204 205 /* 206 * To simplify and improve latency of handling frequency requests on 207 * the host side, this ensures that the vCPU thread triggering the MMIO 208 * abort is the same thread whose performance constraints (Ex. uclamp 209 * settings) need to be updated. This simplifies the VMM (Virtual 210 * Machine Manager) having to find the correct vCPU thread and/or 211 * facing permission issues when configuring other threads. 212 */ 213 policy->dvfs_possible_from_any_cpu = false; 214 policy->fast_switch_possible = true; 215 216 /* 217 * Using the default SCALE_FREQ_SOURCE_CPUFREQ is insufficient since 218 * the actual physical CPU frequency may not match requested frequency 219 * from the vCPU thread due to frequency update latencies or other 220 * inputs to the physical CPU frequency selection. This additional FIE 221 * source allows for more accurate freq_scale updates and only takes 222 * effect if another FIE source such as AMUs have not been registered. 223 */ 224 topology_set_scale_freq_source(&virt_sfd, policy->cpus); 225 226 return 0; 227 } 228 229 static void virt_cpufreq_cpu_exit(struct cpufreq_policy *policy) 230 { 231 topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_VIRT, policy->related_cpus); 232 kfree(policy->freq_table); 233 } 234 235 static int virt_cpufreq_online(struct cpufreq_policy *policy) 236 { 237 /* Nothing to restore. */ 238 return 0; 239 } 240 241 static int virt_cpufreq_offline(struct cpufreq_policy *policy) 242 { 243 /* Dummy offline() to avoid exit() being called and freeing resources. */ 244 return 0; 245 } 246 247 static int virt_cpufreq_verify_policy(struct cpufreq_policy_data *policy) 248 { 249 if (policy->freq_table) 250 return cpufreq_frequency_table_verify(policy); 251 252 cpufreq_verify_within_cpu_limits(policy); 253 return 0; 254 } 255 256 static struct cpufreq_driver cpufreq_virt_driver = { 257 .name = "virt-cpufreq", 258 .init = virt_cpufreq_cpu_init, 259 .exit = virt_cpufreq_cpu_exit, 260 .online = virt_cpufreq_online, 261 .offline = virt_cpufreq_offline, 262 .verify = virt_cpufreq_verify_policy, 263 .target = virt_cpufreq_target, 264 .fast_switch = virt_cpufreq_fast_switch, 265 }; 266 267 static int virt_cpufreq_driver_probe(struct platform_device *pdev) 268 { 269 u32 num_perftbl_entries; 270 int ret, cpu; 271 272 base = devm_platform_ioremap_resource(pdev, 0); 273 if (IS_ERR(base)) 274 return PTR_ERR(base); 275 276 for_each_possible_cpu(cpu) { 277 num_perftbl_entries = readl_relaxed(base + cpu * PER_CPU_OFFSET + 278 REG_PERFTBL_LEN_OFFSET); 279 280 if (!num_perftbl_entries || num_perftbl_entries > PERFTBL_MAX_ENTRIES) 281 return -ENODEV; 282 283 per_cpu(perftbl_num_entries, cpu) = num_perftbl_entries; 284 } 285 286 ret = cpufreq_register_driver(&cpufreq_virt_driver); 287 if (ret) { 288 dev_err(&pdev->dev, "Virtual CPUFreq driver failed to register: %d\n", ret); 289 return ret; 290 } 291 292 dev_dbg(&pdev->dev, "Virtual CPUFreq driver initialized\n"); 293 return 0; 294 } 295 296 static void virt_cpufreq_driver_remove(struct platform_device *pdev) 297 { 298 cpufreq_unregister_driver(&cpufreq_virt_driver); 299 } 300 301 static const struct of_device_id virt_cpufreq_match[] = { 302 { .compatible = "qemu,virtual-cpufreq", .data = NULL}, 303 {} 304 }; 305 MODULE_DEVICE_TABLE(of, virt_cpufreq_match); 306 307 static struct platform_driver virt_cpufreq_driver = { 308 .probe = virt_cpufreq_driver_probe, 309 .remove = virt_cpufreq_driver_remove, 310 .driver = { 311 .name = "virt-cpufreq", 312 .of_match_table = virt_cpufreq_match, 313 }, 314 }; 315 316 static int __init virt_cpufreq_init(void) 317 { 318 return platform_driver_register(&virt_cpufreq_driver); 319 } 320 postcore_initcall(virt_cpufreq_init); 321 322 static void __exit virt_cpufreq_exit(void) 323 { 324 platform_driver_unregister(&virt_cpufreq_driver); 325 } 326 module_exit(virt_cpufreq_exit); 327 328 MODULE_DESCRIPTION("Virtual cpufreq driver"); 329 MODULE_LICENSE("GPL"); 330