xref: /linux/drivers/cpufreq/virtual-cpufreq.c (revision 25768de50b1f2dbb6ea44bd5148a87fe2c9c3688)
1*4fd06a53SDavid Dai // SPDX-License-Identifier: GPL-2.0-only
2*4fd06a53SDavid Dai /*
3*4fd06a53SDavid Dai  * Copyright (C) 2024 Google LLC
4*4fd06a53SDavid Dai  */
5*4fd06a53SDavid Dai 
6*4fd06a53SDavid Dai #include <linux/arch_topology.h>
7*4fd06a53SDavid Dai #include <linux/cpufreq.h>
8*4fd06a53SDavid Dai #include <linux/init.h>
9*4fd06a53SDavid Dai #include <linux/sched.h>
10*4fd06a53SDavid Dai #include <linux/kernel.h>
11*4fd06a53SDavid Dai #include <linux/module.h>
12*4fd06a53SDavid Dai #include <linux/of_address.h>
13*4fd06a53SDavid Dai #include <linux/of_platform.h>
14*4fd06a53SDavid Dai #include <linux/platform_device.h>
15*4fd06a53SDavid Dai #include <linux/slab.h>
16*4fd06a53SDavid Dai 
17*4fd06a53SDavid Dai /*
18*4fd06a53SDavid Dai  * CPU0..CPUn
19*4fd06a53SDavid Dai  * +-------------+-------------------------------+--------+-------+
20*4fd06a53SDavid Dai  * | Register    | Description                   | Offset |   Len |
21*4fd06a53SDavid Dai  * +-------------+-------------------------------+--------+-------+
22*4fd06a53SDavid Dai  * | cur_perf    | read this register to get     |    0x0 |   0x4 |
23*4fd06a53SDavid Dai  * |             | the current perf (integer val |        |       |
24*4fd06a53SDavid Dai  * |             | representing perf relative to |        |       |
25*4fd06a53SDavid Dai  * |             | max performance)              |        |       |
26*4fd06a53SDavid Dai  * |             | that vCPU is running at       |        |       |
27*4fd06a53SDavid Dai  * +-------------+-------------------------------+--------+-------+
28*4fd06a53SDavid Dai  * | set_perf    | write to this register to set |    0x4 |   0x4 |
29*4fd06a53SDavid Dai  * |             | perf value of the vCPU        |        |       |
30*4fd06a53SDavid Dai  * +-------------+-------------------------------+--------+-------+
31*4fd06a53SDavid Dai  * | perftbl_len | number of entries in perf     |    0x8 |   0x4 |
32*4fd06a53SDavid Dai  * |             | table. A single entry in the  |        |       |
33*4fd06a53SDavid Dai  * |             | perf table denotes no table   |        |       |
34*4fd06a53SDavid Dai  * |             | and the entry contains        |        |       |
35*4fd06a53SDavid Dai  * |             | the maximum perf value        |        |       |
36*4fd06a53SDavid Dai  * |             | that this vCPU supports.      |        |       |
37*4fd06a53SDavid Dai  * |             | The guest can request any     |        |       |
38*4fd06a53SDavid Dai  * |             | value between 1 and max perf  |        |       |
39*4fd06a53SDavid Dai  * |             | when perftbls are not used.   |        |       |
40*4fd06a53SDavid Dai  * +---------------------------------------------+--------+-------+
41*4fd06a53SDavid Dai  * | perftbl_sel | write to this register to     |    0xc |   0x4 |
42*4fd06a53SDavid Dai  * |             | select perf table entry to    |        |       |
43*4fd06a53SDavid Dai  * |             | read from                     |        |       |
44*4fd06a53SDavid Dai  * +---------------------------------------------+--------+-------+
45*4fd06a53SDavid Dai  * | perftbl_rd  | read this register to get     |   0x10 |   0x4 |
46*4fd06a53SDavid Dai  * |             | perf value of the selected    |        |       |
47*4fd06a53SDavid Dai  * |             | entry based on perftbl_sel    |        |       |
48*4fd06a53SDavid Dai  * +---------------------------------------------+--------+-------+
49*4fd06a53SDavid Dai  * | perf_domain | performance domain number     |   0x14 |   0x4 |
50*4fd06a53SDavid Dai  * |             | that this vCPU belongs to.    |        |       |
51*4fd06a53SDavid Dai  * |             | vCPUs sharing the same perf   |        |       |
52*4fd06a53SDavid Dai  * |             | domain number are part of the |        |       |
53*4fd06a53SDavid Dai  * |             | same performance domain.      |        |       |
54*4fd06a53SDavid Dai  * +-------------+-------------------------------+--------+-------+
55*4fd06a53SDavid Dai  */
56*4fd06a53SDavid Dai 
57*4fd06a53SDavid Dai #define REG_CUR_PERF_STATE_OFFSET 0x0
58*4fd06a53SDavid Dai #define REG_SET_PERF_STATE_OFFSET 0x4
59*4fd06a53SDavid Dai #define REG_PERFTBL_LEN_OFFSET 0x8
60*4fd06a53SDavid Dai #define REG_PERFTBL_SEL_OFFSET 0xc
61*4fd06a53SDavid Dai #define REG_PERFTBL_RD_OFFSET 0x10
62*4fd06a53SDavid Dai #define REG_PERF_DOMAIN_OFFSET 0x14
63*4fd06a53SDavid Dai #define PER_CPU_OFFSET 0x1000
64*4fd06a53SDavid Dai 
65*4fd06a53SDavid Dai #define PERFTBL_MAX_ENTRIES 64U
66*4fd06a53SDavid Dai 
67*4fd06a53SDavid Dai static void __iomem *base;
68*4fd06a53SDavid Dai static DEFINE_PER_CPU(u32, perftbl_num_entries);
69*4fd06a53SDavid Dai 
70*4fd06a53SDavid Dai static void virt_scale_freq_tick(void)
71*4fd06a53SDavid Dai {
72*4fd06a53SDavid Dai 	int cpu = smp_processor_id();
73*4fd06a53SDavid Dai 	u32 max_freq = (u32)cpufreq_get_hw_max_freq(cpu);
74*4fd06a53SDavid Dai 	u64 cur_freq;
75*4fd06a53SDavid Dai 	unsigned long scale;
76*4fd06a53SDavid Dai 
77*4fd06a53SDavid Dai 	cur_freq = (u64)readl_relaxed(base + cpu * PER_CPU_OFFSET
78*4fd06a53SDavid Dai 			+ REG_CUR_PERF_STATE_OFFSET);
79*4fd06a53SDavid Dai 
80*4fd06a53SDavid Dai 	cur_freq <<= SCHED_CAPACITY_SHIFT;
81*4fd06a53SDavid Dai 	scale = (unsigned long)div_u64(cur_freq, max_freq);
82*4fd06a53SDavid Dai 	scale = min(scale, SCHED_CAPACITY_SCALE);
83*4fd06a53SDavid Dai 
84*4fd06a53SDavid Dai 	this_cpu_write(arch_freq_scale, scale);
85*4fd06a53SDavid Dai }
86*4fd06a53SDavid Dai 
87*4fd06a53SDavid Dai static struct scale_freq_data virt_sfd = {
88*4fd06a53SDavid Dai 	.source = SCALE_FREQ_SOURCE_VIRT,
89*4fd06a53SDavid Dai 	.set_freq_scale = virt_scale_freq_tick,
90*4fd06a53SDavid Dai };
91*4fd06a53SDavid Dai 
92*4fd06a53SDavid Dai static unsigned int virt_cpufreq_set_perf(struct cpufreq_policy *policy,
93*4fd06a53SDavid Dai 					  unsigned int target_freq)
94*4fd06a53SDavid Dai {
95*4fd06a53SDavid Dai 	writel_relaxed(target_freq,
96*4fd06a53SDavid Dai 		       base + policy->cpu * PER_CPU_OFFSET + REG_SET_PERF_STATE_OFFSET);
97*4fd06a53SDavid Dai 	return 0;
98*4fd06a53SDavid Dai }
99*4fd06a53SDavid Dai 
100*4fd06a53SDavid Dai static unsigned int virt_cpufreq_fast_switch(struct cpufreq_policy *policy,
101*4fd06a53SDavid Dai 					     unsigned int target_freq)
102*4fd06a53SDavid Dai {
103*4fd06a53SDavid Dai 	virt_cpufreq_set_perf(policy, target_freq);
104*4fd06a53SDavid Dai 	return target_freq;
105*4fd06a53SDavid Dai }
106*4fd06a53SDavid Dai 
107*4fd06a53SDavid Dai static u32 virt_cpufreq_get_perftbl_entry(int cpu, u32 idx)
108*4fd06a53SDavid Dai {
109*4fd06a53SDavid Dai 	writel_relaxed(idx, base + cpu * PER_CPU_OFFSET +
110*4fd06a53SDavid Dai 		       REG_PERFTBL_SEL_OFFSET);
111*4fd06a53SDavid Dai 	return readl_relaxed(base + cpu * PER_CPU_OFFSET +
112*4fd06a53SDavid Dai 			     REG_PERFTBL_RD_OFFSET);
113*4fd06a53SDavid Dai }
114*4fd06a53SDavid Dai 
115*4fd06a53SDavid Dai static int virt_cpufreq_target(struct cpufreq_policy *policy,
116*4fd06a53SDavid Dai 			       unsigned int target_freq,
117*4fd06a53SDavid Dai 			       unsigned int relation)
118*4fd06a53SDavid Dai {
119*4fd06a53SDavid Dai 	struct cpufreq_freqs freqs;
120*4fd06a53SDavid Dai 	int ret = 0;
121*4fd06a53SDavid Dai 
122*4fd06a53SDavid Dai 	freqs.old = policy->cur;
123*4fd06a53SDavid Dai 	freqs.new = target_freq;
124*4fd06a53SDavid Dai 
125*4fd06a53SDavid Dai 	cpufreq_freq_transition_begin(policy, &freqs);
126*4fd06a53SDavid Dai 	ret = virt_cpufreq_set_perf(policy, target_freq);
127*4fd06a53SDavid Dai 	cpufreq_freq_transition_end(policy, &freqs, ret != 0);
128*4fd06a53SDavid Dai 
129*4fd06a53SDavid Dai 	return ret;
130*4fd06a53SDavid Dai }
131*4fd06a53SDavid Dai 
132*4fd06a53SDavid Dai static int virt_cpufreq_get_sharing_cpus(struct cpufreq_policy *policy)
133*4fd06a53SDavid Dai {
134*4fd06a53SDavid Dai 	u32 cur_perf_domain, perf_domain;
135*4fd06a53SDavid Dai 	struct device *cpu_dev;
136*4fd06a53SDavid Dai 	int cpu;
137*4fd06a53SDavid Dai 
138*4fd06a53SDavid Dai 	cur_perf_domain = readl_relaxed(base + policy->cpu *
139*4fd06a53SDavid Dai 					PER_CPU_OFFSET + REG_PERF_DOMAIN_OFFSET);
140*4fd06a53SDavid Dai 
141*4fd06a53SDavid Dai 	for_each_possible_cpu(cpu) {
142*4fd06a53SDavid Dai 		cpu_dev = get_cpu_device(cpu);
143*4fd06a53SDavid Dai 		if (!cpu_dev)
144*4fd06a53SDavid Dai 			continue;
145*4fd06a53SDavid Dai 
146*4fd06a53SDavid Dai 		perf_domain = readl_relaxed(base + cpu *
147*4fd06a53SDavid Dai 					    PER_CPU_OFFSET + REG_PERF_DOMAIN_OFFSET);
148*4fd06a53SDavid Dai 
149*4fd06a53SDavid Dai 		if (perf_domain == cur_perf_domain)
150*4fd06a53SDavid Dai 			cpumask_set_cpu(cpu, policy->cpus);
151*4fd06a53SDavid Dai 	}
152*4fd06a53SDavid Dai 
153*4fd06a53SDavid Dai 	return 0;
154*4fd06a53SDavid Dai }
155*4fd06a53SDavid Dai 
156*4fd06a53SDavid Dai static int virt_cpufreq_get_freq_info(struct cpufreq_policy *policy)
157*4fd06a53SDavid Dai {
158*4fd06a53SDavid Dai 	struct cpufreq_frequency_table *table;
159*4fd06a53SDavid Dai 	u32 num_perftbl_entries, idx;
160*4fd06a53SDavid Dai 
161*4fd06a53SDavid Dai 	num_perftbl_entries = per_cpu(perftbl_num_entries, policy->cpu);
162*4fd06a53SDavid Dai 
163*4fd06a53SDavid Dai 	if (num_perftbl_entries == 1) {
164*4fd06a53SDavid Dai 		policy->cpuinfo.min_freq = 1;
165*4fd06a53SDavid Dai 		policy->cpuinfo.max_freq = virt_cpufreq_get_perftbl_entry(policy->cpu, 0);
166*4fd06a53SDavid Dai 
167*4fd06a53SDavid Dai 		policy->min = policy->cpuinfo.min_freq;
168*4fd06a53SDavid Dai 		policy->max = policy->cpuinfo.max_freq;
169*4fd06a53SDavid Dai 
170*4fd06a53SDavid Dai 		policy->cur = policy->max;
171*4fd06a53SDavid Dai 		return 0;
172*4fd06a53SDavid Dai 	}
173*4fd06a53SDavid Dai 
174*4fd06a53SDavid Dai 	table = kcalloc(num_perftbl_entries + 1, sizeof(*table), GFP_KERNEL);
175*4fd06a53SDavid Dai 	if (!table)
176*4fd06a53SDavid Dai 		return -ENOMEM;
177*4fd06a53SDavid Dai 
178*4fd06a53SDavid Dai 	for (idx = 0; idx < num_perftbl_entries; idx++)
179*4fd06a53SDavid Dai 		table[idx].frequency = virt_cpufreq_get_perftbl_entry(policy->cpu, idx);
180*4fd06a53SDavid Dai 
181*4fd06a53SDavid Dai 	table[idx].frequency = CPUFREQ_TABLE_END;
182*4fd06a53SDavid Dai 	policy->freq_table = table;
183*4fd06a53SDavid Dai 
184*4fd06a53SDavid Dai 	return 0;
185*4fd06a53SDavid Dai }
186*4fd06a53SDavid Dai 
187*4fd06a53SDavid Dai static int virt_cpufreq_cpu_init(struct cpufreq_policy *policy)
188*4fd06a53SDavid Dai {
189*4fd06a53SDavid Dai 	struct device *cpu_dev;
190*4fd06a53SDavid Dai 	int ret;
191*4fd06a53SDavid Dai 
192*4fd06a53SDavid Dai 	cpu_dev = get_cpu_device(policy->cpu);
193*4fd06a53SDavid Dai 	if (!cpu_dev)
194*4fd06a53SDavid Dai 		return -ENODEV;
195*4fd06a53SDavid Dai 
196*4fd06a53SDavid Dai 	ret = virt_cpufreq_get_freq_info(policy);
197*4fd06a53SDavid Dai 	if (ret) {
198*4fd06a53SDavid Dai 		dev_warn(cpu_dev, "failed to get cpufreq info\n");
199*4fd06a53SDavid Dai 		return ret;
200*4fd06a53SDavid Dai 	}
201*4fd06a53SDavid Dai 
202*4fd06a53SDavid Dai 	ret = virt_cpufreq_get_sharing_cpus(policy);
203*4fd06a53SDavid Dai 	if (ret) {
204*4fd06a53SDavid Dai 		dev_warn(cpu_dev, "failed to get sharing cpumask\n");
205*4fd06a53SDavid Dai 		return ret;
206*4fd06a53SDavid Dai 	}
207*4fd06a53SDavid Dai 
208*4fd06a53SDavid Dai 	/*
209*4fd06a53SDavid Dai 	 * To simplify and improve latency of handling frequency requests on
210*4fd06a53SDavid Dai 	 * the host side, this ensures that the vCPU thread triggering the MMIO
211*4fd06a53SDavid Dai 	 * abort is the same thread whose performance constraints (Ex. uclamp
212*4fd06a53SDavid Dai 	 * settings) need to be updated. This simplifies the VMM (Virtual
213*4fd06a53SDavid Dai 	 * Machine Manager) having to find the correct vCPU thread and/or
214*4fd06a53SDavid Dai 	 * facing permission issues when configuring other threads.
215*4fd06a53SDavid Dai 	 */
216*4fd06a53SDavid Dai 	policy->dvfs_possible_from_any_cpu = false;
217*4fd06a53SDavid Dai 	policy->fast_switch_possible = true;
218*4fd06a53SDavid Dai 
219*4fd06a53SDavid Dai 	/*
220*4fd06a53SDavid Dai 	 * Using the default SCALE_FREQ_SOURCE_CPUFREQ is insufficient since
221*4fd06a53SDavid Dai 	 * the actual physical CPU frequency may not match requested frequency
222*4fd06a53SDavid Dai 	 * from the vCPU thread due to frequency update latencies or other
223*4fd06a53SDavid Dai 	 * inputs to the physical CPU frequency selection. This additional FIE
224*4fd06a53SDavid Dai 	 * source allows for more accurate freq_scale updates and only takes
225*4fd06a53SDavid Dai 	 * effect if another FIE source such as AMUs have not been registered.
226*4fd06a53SDavid Dai 	 */
227*4fd06a53SDavid Dai 	topology_set_scale_freq_source(&virt_sfd, policy->cpus);
228*4fd06a53SDavid Dai 
229*4fd06a53SDavid Dai 	return 0;
230*4fd06a53SDavid Dai }
231*4fd06a53SDavid Dai 
232*4fd06a53SDavid Dai static void virt_cpufreq_cpu_exit(struct cpufreq_policy *policy)
233*4fd06a53SDavid Dai {
234*4fd06a53SDavid Dai 	topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_VIRT, policy->related_cpus);
235*4fd06a53SDavid Dai 	kfree(policy->freq_table);
236*4fd06a53SDavid Dai }
237*4fd06a53SDavid Dai 
238*4fd06a53SDavid Dai static int virt_cpufreq_online(struct cpufreq_policy *policy)
239*4fd06a53SDavid Dai {
240*4fd06a53SDavid Dai 	/* Nothing to restore. */
241*4fd06a53SDavid Dai 	return 0;
242*4fd06a53SDavid Dai }
243*4fd06a53SDavid Dai 
244*4fd06a53SDavid Dai static int virt_cpufreq_offline(struct cpufreq_policy *policy)
245*4fd06a53SDavid Dai {
246*4fd06a53SDavid Dai 	/* Dummy offline() to avoid exit() being called and freeing resources. */
247*4fd06a53SDavid Dai 	return 0;
248*4fd06a53SDavid Dai }
249*4fd06a53SDavid Dai 
250*4fd06a53SDavid Dai static int virt_cpufreq_verify_policy(struct cpufreq_policy_data *policy)
251*4fd06a53SDavid Dai {
252*4fd06a53SDavid Dai 	if (policy->freq_table)
253*4fd06a53SDavid Dai 		return cpufreq_frequency_table_verify(policy, policy->freq_table);
254*4fd06a53SDavid Dai 
255*4fd06a53SDavid Dai 	cpufreq_verify_within_cpu_limits(policy);
256*4fd06a53SDavid Dai 	return 0;
257*4fd06a53SDavid Dai }
258*4fd06a53SDavid Dai 
259*4fd06a53SDavid Dai static struct cpufreq_driver cpufreq_virt_driver = {
260*4fd06a53SDavid Dai 	.name		= "virt-cpufreq",
261*4fd06a53SDavid Dai 	.init		= virt_cpufreq_cpu_init,
262*4fd06a53SDavid Dai 	.exit		= virt_cpufreq_cpu_exit,
263*4fd06a53SDavid Dai 	.online         = virt_cpufreq_online,
264*4fd06a53SDavid Dai 	.offline        = virt_cpufreq_offline,
265*4fd06a53SDavid Dai 	.verify		= virt_cpufreq_verify_policy,
266*4fd06a53SDavid Dai 	.target		= virt_cpufreq_target,
267*4fd06a53SDavid Dai 	.fast_switch	= virt_cpufreq_fast_switch,
268*4fd06a53SDavid Dai 	.attr		= cpufreq_generic_attr,
269*4fd06a53SDavid Dai };
270*4fd06a53SDavid Dai 
271*4fd06a53SDavid Dai static int virt_cpufreq_driver_probe(struct platform_device *pdev)
272*4fd06a53SDavid Dai {
273*4fd06a53SDavid Dai 	u32 num_perftbl_entries;
274*4fd06a53SDavid Dai 	int ret, cpu;
275*4fd06a53SDavid Dai 
276*4fd06a53SDavid Dai 	base = devm_platform_ioremap_resource(pdev, 0);
277*4fd06a53SDavid Dai 	if (IS_ERR(base))
278*4fd06a53SDavid Dai 		return PTR_ERR(base);
279*4fd06a53SDavid Dai 
280*4fd06a53SDavid Dai 	for_each_possible_cpu(cpu) {
281*4fd06a53SDavid Dai 		num_perftbl_entries = readl_relaxed(base + cpu * PER_CPU_OFFSET +
282*4fd06a53SDavid Dai 						    REG_PERFTBL_LEN_OFFSET);
283*4fd06a53SDavid Dai 
284*4fd06a53SDavid Dai 		if (!num_perftbl_entries || num_perftbl_entries > PERFTBL_MAX_ENTRIES)
285*4fd06a53SDavid Dai 			return -ENODEV;
286*4fd06a53SDavid Dai 
287*4fd06a53SDavid Dai 		per_cpu(perftbl_num_entries, cpu) = num_perftbl_entries;
288*4fd06a53SDavid Dai 	}
289*4fd06a53SDavid Dai 
290*4fd06a53SDavid Dai 	ret = cpufreq_register_driver(&cpufreq_virt_driver);
291*4fd06a53SDavid Dai 	if (ret) {
292*4fd06a53SDavid Dai 		dev_err(&pdev->dev, "Virtual CPUFreq driver failed to register: %d\n", ret);
293*4fd06a53SDavid Dai 		return ret;
294*4fd06a53SDavid Dai 	}
295*4fd06a53SDavid Dai 
296*4fd06a53SDavid Dai 	dev_dbg(&pdev->dev, "Virtual CPUFreq driver initialized\n");
297*4fd06a53SDavid Dai 	return 0;
298*4fd06a53SDavid Dai }
299*4fd06a53SDavid Dai 
300*4fd06a53SDavid Dai static void virt_cpufreq_driver_remove(struct platform_device *pdev)
301*4fd06a53SDavid Dai {
302*4fd06a53SDavid Dai 	cpufreq_unregister_driver(&cpufreq_virt_driver);
303*4fd06a53SDavid Dai }
304*4fd06a53SDavid Dai 
305*4fd06a53SDavid Dai static const struct of_device_id virt_cpufreq_match[] = {
306*4fd06a53SDavid Dai 	{ .compatible = "qemu,virtual-cpufreq", .data = NULL},
307*4fd06a53SDavid Dai 	{}
308*4fd06a53SDavid Dai };
309*4fd06a53SDavid Dai MODULE_DEVICE_TABLE(of, virt_cpufreq_match);
310*4fd06a53SDavid Dai 
311*4fd06a53SDavid Dai static struct platform_driver virt_cpufreq_driver = {
312*4fd06a53SDavid Dai 	.probe = virt_cpufreq_driver_probe,
313*4fd06a53SDavid Dai 	.remove = virt_cpufreq_driver_remove,
314*4fd06a53SDavid Dai 	.driver = {
315*4fd06a53SDavid Dai 		.name = "virt-cpufreq",
316*4fd06a53SDavid Dai 		.of_match_table = virt_cpufreq_match,
317*4fd06a53SDavid Dai 	},
318*4fd06a53SDavid Dai };
319*4fd06a53SDavid Dai 
320*4fd06a53SDavid Dai static int __init virt_cpufreq_init(void)
321*4fd06a53SDavid Dai {
322*4fd06a53SDavid Dai 	return platform_driver_register(&virt_cpufreq_driver);
323*4fd06a53SDavid Dai }
324*4fd06a53SDavid Dai postcore_initcall(virt_cpufreq_init);
325*4fd06a53SDavid Dai 
326*4fd06a53SDavid Dai static void __exit virt_cpufreq_exit(void)
327*4fd06a53SDavid Dai {
328*4fd06a53SDavid Dai 	platform_driver_unregister(&virt_cpufreq_driver);
329*4fd06a53SDavid Dai }
330*4fd06a53SDavid Dai module_exit(virt_cpufreq_exit);
331*4fd06a53SDavid Dai 
332*4fd06a53SDavid Dai MODULE_DESCRIPTION("Virtual cpufreq driver");
333*4fd06a53SDavid Dai MODULE_LICENSE("GPL");
334