xref: /linux/drivers/powercap/intel_rapl_msr.c (revision aec2f682d47c54ef434b2d440992626d80b1ebdc)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Intel Running Average Power Limit (RAPL) Driver via MSR interface
4  * Copyright (c) 2019, Intel Corporation.
5  */
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/kernel.h>
9 #include <linux/module.h>
10 #include <linux/list.h>
11 #include <linux/types.h>
12 #include <linux/device.h>
13 #include <linux/slab.h>
14 #include <linux/log2.h>
15 #include <linux/bitmap.h>
16 #include <linux/delay.h>
17 #include <linux/sysfs.h>
18 #include <linux/cpu.h>
19 #include <linux/powercap.h>
20 #include <linux/suspend.h>
21 #include <linux/intel_rapl.h>
22 #include <linux/processor.h>
23 #include <linux/platform_device.h>
24 #include <linux/units.h>
25 #include <linux/bits.h>
26 
27 #include <asm/cpu_device_id.h>
28 #include <asm/intel-family.h>
29 #include <asm/iosf_mbi.h>
30 #include <asm/msr.h>
31 
32 /* Local defines */
33 #define MSR_PLATFORM_POWER_LIMIT	0x0000065C
34 #define MSR_VR_CURRENT_CONFIG		0x00000601
35 
36 #define ENERGY_UNIT_SCALE		1000	/* scale from driver unit to powercap unit */
37 
38 #define POWER_UNIT_OFFSET		0x00
39 #define POWER_UNIT_MASK			GENMASK(3, 0)
40 
41 #define ENERGY_UNIT_OFFSET		0x08
42 #define ENERGY_UNIT_MASK		GENMASK(12, 8)
43 
44 #define TIME_UNIT_OFFSET		0x10
45 #define TIME_UNIT_MASK			GENMASK(19, 16)
46 
47 /* bitmasks for RAPL MSRs, used by primitive access functions */
48 #define ENERGY_STATUS_MASK		GENMASK(31, 0)
49 
50 #define POWER_LIMIT1_MASK		GENMASK(14, 0)
51 #define POWER_LIMIT1_ENABLE		BIT(15)
52 #define POWER_LIMIT1_CLAMP		BIT(16)
53 
54 #define POWER_LIMIT2_MASK		GENMASK_ULL(46, 32)
55 #define POWER_LIMIT2_ENABLE		BIT_ULL(47)
56 #define POWER_LIMIT2_CLAMP		BIT_ULL(48)
57 #define POWER_HIGH_LOCK			BIT_ULL(63)
58 #define POWER_LOW_LOCK			BIT(31)
59 
60 #define POWER_LIMIT4_MASK		GENMASK(12, 0)
61 
62 #define TIME_WINDOW1_MASK		GENMASK_ULL(23, 17)
63 #define TIME_WINDOW2_MASK		GENMASK_ULL(55, 49)
64 
65 #define POWER_INFO_MAX_MASK		GENMASK_ULL(46, 32)
66 #define POWER_INFO_MIN_MASK		GENMASK_ULL(30, 16)
67 #define POWER_INFO_MAX_TIME_WIN_MASK	GENMASK_ULL(53, 48)
68 #define POWER_INFO_THERMAL_SPEC_MASK	GENMASK(14, 0)
69 
70 #define PERF_STATUS_THROTTLE_TIME_MASK	GENMASK(31, 0)
71 #define PP_POLICY_MASK			GENMASK(4, 0)
72 
73 /*
74  * SPR has different layout for Psys Domain PowerLimit registers.
75  * There are 17 bits of PL1 and PL2 instead of 15 bits.
76  * The Enable bits and TimeWindow bits are also shifted as a result.
77  */
78 #define PSYS_POWER_LIMIT1_MASK		GENMASK_ULL(16, 0)
79 #define PSYS_POWER_LIMIT1_ENABLE	BIT(17)
80 
81 #define PSYS_POWER_LIMIT2_MASK		GENMASK_ULL(48, 32)
82 #define PSYS_POWER_LIMIT2_ENABLE	BIT_ULL(49)
83 
84 #define PSYS_TIME_WINDOW1_MASK		GENMASK_ULL(25, 19)
85 #define PSYS_TIME_WINDOW2_MASK		GENMASK_ULL(57, 51)
86 
87 /* Sideband MBI registers */
88 #define IOSF_CPU_POWER_BUDGET_CTL_BYT	0x02
89 #define IOSF_CPU_POWER_BUDGET_CTL_TNG	0xDF
90 
91 /* private data for RAPL MSR Interface */
92 static struct rapl_if_priv *rapl_msr_priv;
93 
94 static bool rapl_msr_pmu __ro_after_init;
95 
96 static struct rapl_if_priv rapl_msr_priv_intel = {
97 	.type = RAPL_IF_MSR,
98 	.reg_unit.msr = MSR_RAPL_POWER_UNIT,
99 	.regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_LIMIT].msr	= MSR_PKG_POWER_LIMIT,
100 	.regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_STATUS].msr	= MSR_PKG_ENERGY_STATUS,
101 	.regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_PERF].msr	= MSR_PKG_PERF_STATUS,
102 	.regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_INFO].msr	= MSR_PKG_POWER_INFO,
103 	.regs[RAPL_DOMAIN_PP0][RAPL_DOMAIN_REG_LIMIT].msr	= MSR_PP0_POWER_LIMIT,
104 	.regs[RAPL_DOMAIN_PP0][RAPL_DOMAIN_REG_STATUS].msr	= MSR_PP0_ENERGY_STATUS,
105 	.regs[RAPL_DOMAIN_PP0][RAPL_DOMAIN_REG_POLICY].msr	= MSR_PP0_POLICY,
106 	.regs[RAPL_DOMAIN_PP1][RAPL_DOMAIN_REG_LIMIT].msr	= MSR_PP1_POWER_LIMIT,
107 	.regs[RAPL_DOMAIN_PP1][RAPL_DOMAIN_REG_STATUS].msr	= MSR_PP1_ENERGY_STATUS,
108 	.regs[RAPL_DOMAIN_PP1][RAPL_DOMAIN_REG_POLICY].msr	= MSR_PP1_POLICY,
109 	.regs[RAPL_DOMAIN_DRAM][RAPL_DOMAIN_REG_LIMIT].msr	= MSR_DRAM_POWER_LIMIT,
110 	.regs[RAPL_DOMAIN_DRAM][RAPL_DOMAIN_REG_STATUS].msr	= MSR_DRAM_ENERGY_STATUS,
111 	.regs[RAPL_DOMAIN_DRAM][RAPL_DOMAIN_REG_PERF].msr	= MSR_DRAM_PERF_STATUS,
112 	.regs[RAPL_DOMAIN_DRAM][RAPL_DOMAIN_REG_INFO].msr	= MSR_DRAM_POWER_INFO,
113 	.regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_LIMIT].msr	= MSR_PLATFORM_POWER_LIMIT,
114 	.regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_STATUS].msr	= MSR_PLATFORM_ENERGY_STATUS,
115 	.limits[RAPL_DOMAIN_PACKAGE] = BIT(POWER_LIMIT2),
116 	.limits[RAPL_DOMAIN_PLATFORM] = BIT(POWER_LIMIT2),
117 };
118 
119 static struct rapl_if_priv rapl_msr_priv_amd = {
120 	.type = RAPL_IF_MSR,
121 	.reg_unit.msr = MSR_AMD_RAPL_POWER_UNIT,
122 	.regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_STATUS].msr	= MSR_AMD_PKG_ENERGY_STATUS,
123 	.regs[RAPL_DOMAIN_PP0][RAPL_DOMAIN_REG_STATUS].msr	= MSR_AMD_CORE_ENERGY_STATUS,
124 };
125 
126 /* Handles CPU hotplug on multi-socket systems.
127  * If a CPU goes online as the first CPU of the physical package
128  * we add the RAPL package to the system. Similarly, when the last
129  * CPU of the package is removed, we remove the RAPL package and its
130  * associated domains. Cooling devices are handled accordingly at
131  * per-domain level.
132  */
133 static int rapl_cpu_online(unsigned int cpu)
134 {
135 	struct rapl_package *rp;
136 
137 	rp = rapl_find_package_domain_cpuslocked(cpu, rapl_msr_priv, true);
138 	if (!rp) {
139 		rp = rapl_add_package_cpuslocked(cpu, rapl_msr_priv, true);
140 		if (IS_ERR(rp))
141 			return PTR_ERR(rp);
142 		if (rapl_msr_pmu)
143 			rapl_package_add_pmu_locked(rp);
144 	}
145 	cpumask_set_cpu(cpu, &rp->cpumask);
146 	return 0;
147 }
148 
149 static int rapl_cpu_down_prep(unsigned int cpu)
150 {
151 	struct rapl_package *rp;
152 	int lead_cpu;
153 
154 	rp = rapl_find_package_domain_cpuslocked(cpu, rapl_msr_priv, true);
155 	if (!rp)
156 		return 0;
157 
158 	cpumask_clear_cpu(cpu, &rp->cpumask);
159 	lead_cpu = cpumask_first(&rp->cpumask);
160 	if (lead_cpu >= nr_cpu_ids) {
161 		if (rapl_msr_pmu)
162 			rapl_package_remove_pmu_locked(rp);
163 		rapl_remove_package_cpuslocked(rp);
164 	} else if (rp->lead_cpu == cpu) {
165 		rp->lead_cpu = lead_cpu;
166 	}
167 
168 	return 0;
169 }
170 
171 static int rapl_msr_read_raw(int cpu, struct reg_action *ra, bool pmu_ctx)
172 {
173 	/*
174 	 * When called from PMU context, perform MSR read directly using
175 	 * rdmsrq() without IPI overhead. Package-scoped MSRs are readable
176 	 * from any CPU in the package.
177 	 */
178 	if (pmu_ctx) {
179 		rdmsrq(ra->reg.msr, ra->value);
180 		goto out;
181 	}
182 
183 	if (rdmsrq_safe_on_cpu(cpu, ra->reg.msr, &ra->value)) {
184 		pr_debug("failed to read msr 0x%x on cpu %d\n", ra->reg.msr, cpu);
185 		return -EIO;
186 	}
187 
188 out:
189 	ra->value &= ra->mask;
190 	return 0;
191 }
192 
193 static void rapl_msr_update_func(void *info)
194 {
195 	struct reg_action *ra = info;
196 	u64 val;
197 
198 	ra->err = rdmsrq_safe(ra->reg.msr, &val);
199 	if (ra->err)
200 		return;
201 
202 	val &= ~ra->mask;
203 	val |= ra->value;
204 
205 	ra->err = wrmsrq_safe(ra->reg.msr, val);
206 }
207 
208 static int rapl_msr_write_raw(int cpu, struct reg_action *ra)
209 {
210 	int ret;
211 
212 	ret = smp_call_function_single(cpu, rapl_msr_update_func, ra, 1);
213 	if (WARN_ON_ONCE(ret))
214 		return ret;
215 
216 	return ra->err;
217 }
218 
219 static int rapl_check_unit_atom(struct rapl_domain *rd)
220 {
221 	struct reg_action ra;
222 	u32 value;
223 
224 	ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
225 	ra.mask = ~0;
226 	if (rapl_msr_read_raw(rd->rp->lead_cpu, &ra, false)) {
227 		pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
228 			ra.reg.val, rd->rp->name, rd->name);
229 		return -ENODEV;
230 	}
231 
232 	value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
233 	rd->energy_unit = ENERGY_UNIT_SCALE * (1ULL << value);
234 
235 	value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
236 	rd->power_unit = (1ULL << value) * MILLIWATT_PER_WATT;
237 
238 	value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
239 	rd->time_unit = USEC_PER_SEC >> value;
240 
241 	pr_debug("Atom %s:%s energy=%dpJ, time=%dus, power=%duW\n",
242 		 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
243 
244 	return 0;
245 }
246 
247 static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
248 {
249 	static u32 power_ctrl_orig_val;
250 	const struct rapl_defaults *defaults = rd->rp->priv->defaults;
251 	u32 mdata;
252 
253 	if (!defaults->floor_freq_reg_addr) {
254 		pr_err("Invalid floor frequency config register\n");
255 		return;
256 	}
257 
258 	if (!power_ctrl_orig_val)
259 		iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_CR_READ,
260 			      defaults->floor_freq_reg_addr,
261 			      &power_ctrl_orig_val);
262 	mdata = power_ctrl_orig_val;
263 	if (enable) {
264 		mdata &= ~GENMASK(14, 8);
265 		mdata |= BIT(8);
266 	}
267 	iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_CR_WRITE,
268 		       defaults->floor_freq_reg_addr, mdata);
269 }
270 
271 static u64 rapl_compute_time_window_atom(struct rapl_domain *rd, u64 value,
272 					 bool to_raw)
273 {
274 	if (to_raw)
275 		return div64_u64(value, rd->time_unit);
276 
277 	/*
278 	 * Atom time unit encoding is straight forward val * time_unit,
279 	 * where time_unit is default to 1 sec. Never 0.
280 	 */
281 	return value ? value * rd->time_unit : rd->time_unit;
282 }
283 
284 /* RAPL primitives for MSR I/F */
285 static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = {
286 	/* name, mask, shift, msr index, unit divisor */
287 	[POWER_LIMIT1]		= PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0,
288 						      RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
289 	[POWER_LIMIT2]		= PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32,
290 						      RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
291 	[POWER_LIMIT4]		= PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0,
292 						      RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
293 	[ENERGY_COUNTER]	= PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
294 						      RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
295 	[FW_LOCK]		= PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31,
296 						      RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
297 	[FW_HIGH_LOCK]		= PRIMITIVE_INFO_INIT(FW_LOCK, POWER_HIGH_LOCK, 63,
298 						      RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
299 	[PL1_ENABLE]		= PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15,
300 						      RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
301 	[PL1_CLAMP]		= PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16,
302 						      RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
303 	[PL2_ENABLE]		= PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47,
304 						      RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
305 	[PL2_CLAMP]		= PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48,
306 						      RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
307 	[TIME_WINDOW1]		= PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17,
308 						      RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
309 	[TIME_WINDOW2]		= PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49,
310 						      RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
311 	[THERMAL_SPEC_POWER]	= PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER,
312 						      POWER_INFO_THERMAL_SPEC_MASK, 0,
313 						      RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
314 	[MAX_POWER]		= PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32,
315 						      RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
316 	[MIN_POWER]		= PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16,
317 						      RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
318 	[MAX_TIME_WINDOW]	= PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW,
319 						      POWER_INFO_MAX_TIME_WIN_MASK, 48,
320 						      RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
321 	[THROTTLED_TIME]	= PRIMITIVE_INFO_INIT(THROTTLED_TIME,
322 						      PERF_STATUS_THROTTLE_TIME_MASK, 0,
323 						      RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
324 	[PRIORITY_LEVEL]	= PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0,
325 						      RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0),
326 	[PSYS_POWER_LIMIT1]	= PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0,
327 						      RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
328 	[PSYS_POWER_LIMIT2]	= PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK,
329 						      32, RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
330 	[PSYS_PL1_ENABLE]	= PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE,
331 						      17, RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT,
332 						      0),
333 	[PSYS_PL2_ENABLE]	= PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE,
334 						      49, RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT,
335 						      0),
336 	[PSYS_TIME_WINDOW1]	= PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK,
337 						      19, RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
338 	[PSYS_TIME_WINDOW2]	= PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK,
339 						      51, RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
340 };
341 
342 static const struct rapl_defaults rapl_defaults_core = {
343 	.floor_freq_reg_addr = 0,
344 	.check_unit = rapl_default_check_unit,
345 	.set_floor_freq = rapl_default_set_floor_freq,
346 	.compute_time_window = rapl_default_compute_time_window,
347 };
348 
349 static const struct rapl_defaults rapl_defaults_hsw_server = {
350 	.check_unit = rapl_default_check_unit,
351 	.set_floor_freq = rapl_default_set_floor_freq,
352 	.compute_time_window = rapl_default_compute_time_window,
353 	.dram_domain_energy_unit = 15300,
354 };
355 
356 static const struct rapl_defaults rapl_defaults_spr_server = {
357 	.check_unit = rapl_default_check_unit,
358 	.set_floor_freq = rapl_default_set_floor_freq,
359 	.compute_time_window = rapl_default_compute_time_window,
360 	.psys_domain_energy_unit = NANOJOULE_PER_JOULE,
361 	.spr_psys_bits = true,
362 };
363 
364 static const struct rapl_defaults rapl_defaults_byt = {
365 	.floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_BYT,
366 	.check_unit = rapl_check_unit_atom,
367 	.set_floor_freq = set_floor_freq_atom,
368 	.compute_time_window = rapl_compute_time_window_atom,
369 };
370 
371 static const struct rapl_defaults rapl_defaults_tng = {
372 	.floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_TNG,
373 	.check_unit = rapl_check_unit_atom,
374 	.set_floor_freq = set_floor_freq_atom,
375 	.compute_time_window = rapl_compute_time_window_atom,
376 };
377 
378 static const struct rapl_defaults rapl_defaults_ann = {
379 	.floor_freq_reg_addr = 0,
380 	.check_unit = rapl_check_unit_atom,
381 	.set_floor_freq = NULL,
382 	.compute_time_window = rapl_compute_time_window_atom,
383 };
384 
385 static const struct rapl_defaults rapl_defaults_cht = {
386 	.floor_freq_reg_addr = 0,
387 	.check_unit = rapl_check_unit_atom,
388 	.set_floor_freq = NULL,
389 	.compute_time_window = rapl_compute_time_window_atom,
390 };
391 
392 static const struct rapl_defaults rapl_defaults_amd = {
393 	.check_unit = rapl_default_check_unit,
394 };
395 
396 static const struct rapl_defaults rapl_defaults_core_pl4 = {
397 	.floor_freq_reg_addr = 0,
398 	.check_unit = rapl_default_check_unit,
399 	.set_floor_freq = rapl_default_set_floor_freq,
400 	.compute_time_window = rapl_default_compute_time_window,
401 	.msr_pl4_support = 1,
402 };
403 
404 static const struct rapl_defaults rapl_defaults_core_pl4_pmu = {
405 	.floor_freq_reg_addr = 0,
406 	.check_unit = rapl_default_check_unit,
407 	.set_floor_freq = rapl_default_set_floor_freq,
408 	.compute_time_window = rapl_default_compute_time_window,
409 	.msr_pl4_support = 1,
410 	.msr_pmu_support = 1,
411 };
412 
413 static const struct x86_cpu_id rapl_ids[]  = {
414 	X86_MATCH_VFM(INTEL_SANDYBRIDGE,		&rapl_defaults_core),
415 	X86_MATCH_VFM(INTEL_SANDYBRIDGE_X,		&rapl_defaults_core),
416 
417 	X86_MATCH_VFM(INTEL_IVYBRIDGE,			&rapl_defaults_core),
418 	X86_MATCH_VFM(INTEL_IVYBRIDGE_X,		&rapl_defaults_core),
419 
420 	X86_MATCH_VFM(INTEL_HASWELL,			&rapl_defaults_core),
421 	X86_MATCH_VFM(INTEL_HASWELL_L,			&rapl_defaults_core),
422 	X86_MATCH_VFM(INTEL_HASWELL_G,			&rapl_defaults_core),
423 	X86_MATCH_VFM(INTEL_HASWELL_X,			&rapl_defaults_hsw_server),
424 
425 	X86_MATCH_VFM(INTEL_BROADWELL,			&rapl_defaults_core),
426 	X86_MATCH_VFM(INTEL_BROADWELL_G,		&rapl_defaults_core),
427 	X86_MATCH_VFM(INTEL_BROADWELL_D,		&rapl_defaults_core),
428 	X86_MATCH_VFM(INTEL_BROADWELL_X,		&rapl_defaults_hsw_server),
429 
430 	X86_MATCH_VFM(INTEL_SKYLAKE,			&rapl_defaults_core),
431 	X86_MATCH_VFM(INTEL_SKYLAKE_L,			&rapl_defaults_core),
432 	X86_MATCH_VFM(INTEL_SKYLAKE_X,			&rapl_defaults_hsw_server),
433 	X86_MATCH_VFM(INTEL_KABYLAKE_L,			&rapl_defaults_core),
434 	X86_MATCH_VFM(INTEL_KABYLAKE,			&rapl_defaults_core),
435 	X86_MATCH_VFM(INTEL_CANNONLAKE_L,		&rapl_defaults_core),
436 	X86_MATCH_VFM(INTEL_ICELAKE_L,			&rapl_defaults_core_pl4),
437 	X86_MATCH_VFM(INTEL_ICELAKE,			&rapl_defaults_core),
438 	X86_MATCH_VFM(INTEL_ICELAKE_NNPI,		&rapl_defaults_core),
439 	X86_MATCH_VFM(INTEL_ICELAKE_X,			&rapl_defaults_hsw_server),
440 	X86_MATCH_VFM(INTEL_ICELAKE_D,			&rapl_defaults_hsw_server),
441 	X86_MATCH_VFM(INTEL_COMETLAKE_L,		&rapl_defaults_core),
442 	X86_MATCH_VFM(INTEL_COMETLAKE,			&rapl_defaults_core),
443 	X86_MATCH_VFM(INTEL_TIGERLAKE_L,		&rapl_defaults_core_pl4),
444 	X86_MATCH_VFM(INTEL_TIGERLAKE,			&rapl_defaults_core),
445 	X86_MATCH_VFM(INTEL_ROCKETLAKE,			&rapl_defaults_core),
446 	X86_MATCH_VFM(INTEL_ALDERLAKE,			&rapl_defaults_core_pl4),
447 	X86_MATCH_VFM(INTEL_ALDERLAKE_L,		&rapl_defaults_core_pl4),
448 	X86_MATCH_VFM(INTEL_ATOM_GRACEMONT,		&rapl_defaults_core_pl4),
449 	X86_MATCH_VFM(INTEL_RAPTORLAKE,			&rapl_defaults_core_pl4),
450 	X86_MATCH_VFM(INTEL_RAPTORLAKE_P,		&rapl_defaults_core_pl4),
451 	X86_MATCH_VFM(INTEL_RAPTORLAKE_S,		&rapl_defaults_core),
452 	X86_MATCH_VFM(INTEL_BARTLETTLAKE,		&rapl_defaults_core),
453 	X86_MATCH_VFM(INTEL_METEORLAKE,			&rapl_defaults_core_pl4),
454 	X86_MATCH_VFM(INTEL_METEORLAKE_L,		&rapl_defaults_core_pl4),
455 	X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X,		&rapl_defaults_spr_server),
456 	X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X,		&rapl_defaults_spr_server),
457 	X86_MATCH_VFM(INTEL_LUNARLAKE_M,		&rapl_defaults_core),
458 	X86_MATCH_VFM(INTEL_PANTHERLAKE_L,		&rapl_defaults_core_pl4_pmu),
459 	X86_MATCH_VFM(INTEL_WILDCATLAKE_L,		&rapl_defaults_core_pl4_pmu),
460 	X86_MATCH_VFM(INTEL_NOVALAKE,			&rapl_defaults_core_pl4),
461 	X86_MATCH_VFM(INTEL_NOVALAKE_L,			&rapl_defaults_core_pl4),
462 	X86_MATCH_VFM(INTEL_ARROWLAKE_H,		&rapl_defaults_core_pl4),
463 	X86_MATCH_VFM(INTEL_ARROWLAKE,			&rapl_defaults_core),
464 	X86_MATCH_VFM(INTEL_ARROWLAKE_U,		&rapl_defaults_core_pl4),
465 	X86_MATCH_VFM(INTEL_LAKEFIELD,			&rapl_defaults_core),
466 
467 	X86_MATCH_VFM(INTEL_ATOM_SILVERMONT,		&rapl_defaults_byt),
468 	X86_MATCH_VFM(INTEL_ATOM_AIRMONT,		&rapl_defaults_cht),
469 	X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID,	&rapl_defaults_tng),
470 	X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID2,	&rapl_defaults_ann),
471 	X86_MATCH_VFM(INTEL_ATOM_GOLDMONT,		&rapl_defaults_core),
472 	X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS,		&rapl_defaults_core),
473 	X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D,		&rapl_defaults_core),
474 	X86_MATCH_VFM(INTEL_ATOM_TREMONT,		&rapl_defaults_core),
475 	X86_MATCH_VFM(INTEL_ATOM_TREMONT_D,		&rapl_defaults_core),
476 	X86_MATCH_VFM(INTEL_ATOM_TREMONT_L,		&rapl_defaults_core),
477 
478 	X86_MATCH_VFM(INTEL_XEON_PHI_KNL,		&rapl_defaults_hsw_server),
479 	X86_MATCH_VFM(INTEL_XEON_PHI_KNM,		&rapl_defaults_hsw_server),
480 
481 	X86_MATCH_VENDOR_FAM(AMD, 0x17,			&rapl_defaults_amd),
482 	X86_MATCH_VENDOR_FAM(AMD, 0x19,			&rapl_defaults_amd),
483 	X86_MATCH_VENDOR_FAM(AMD, 0x1A,			&rapl_defaults_amd),
484 	X86_MATCH_VENDOR_FAM(HYGON, 0x18,		&rapl_defaults_amd),
485 	{}
486 };
487 MODULE_DEVICE_TABLE(x86cpu, rapl_ids);
488 
489 static int rapl_msr_probe(struct platform_device *pdev)
490 {
491 	int ret;
492 
493 	switch (boot_cpu_data.x86_vendor) {
494 	case X86_VENDOR_INTEL:
495 		rapl_msr_priv = &rapl_msr_priv_intel;
496 		break;
497 	case X86_VENDOR_HYGON:
498 	case X86_VENDOR_AMD:
499 		rapl_msr_priv = &rapl_msr_priv_amd;
500 		break;
501 	default:
502 		pr_err("intel-rapl does not support CPU vendor %d\n", boot_cpu_data.x86_vendor);
503 		return -ENODEV;
504 	}
505 	rapl_msr_priv->read_raw = rapl_msr_read_raw;
506 	rapl_msr_priv->write_raw = rapl_msr_write_raw;
507 	rapl_msr_priv->defaults = (const struct rapl_defaults *)pdev->dev.platform_data;
508 	rapl_msr_priv->rpi = rpi_msr;
509 
510 	if (rapl_msr_priv->defaults->msr_pl4_support) {
511 		rapl_msr_priv->limits[RAPL_DOMAIN_PACKAGE] |= BIT(POWER_LIMIT4);
512 		rapl_msr_priv->regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_PL4].msr =
513 			MSR_VR_CURRENT_CONFIG;
514 		pr_info("PL4 support detected (updated).\n");
515 	}
516 
517 	if (rapl_msr_priv->defaults->msr_pmu_support) {
518 		rapl_msr_pmu = true;
519 		pr_info("MSR-based RAPL PMU support enabled (updated)\n");
520 	}
521 
522 	rapl_msr_priv->control_type = powercap_register_control_type(NULL, "intel-rapl", NULL);
523 	if (IS_ERR(rapl_msr_priv->control_type)) {
524 		pr_debug("failed to register powercap control_type.\n");
525 		return PTR_ERR(rapl_msr_priv->control_type);
526 	}
527 
528 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powercap/rapl:online",
529 				rapl_cpu_online, rapl_cpu_down_prep);
530 	if (ret < 0)
531 		goto out;
532 	rapl_msr_priv->pcap_rapl_online = ret;
533 
534 	return 0;
535 
536 out:
537 	if (ret)
538 		powercap_unregister_control_type(rapl_msr_priv->control_type);
539 	return ret;
540 }
541 
542 static void rapl_msr_remove(struct platform_device *pdev)
543 {
544 	cpuhp_remove_state(rapl_msr_priv->pcap_rapl_online);
545 	powercap_unregister_control_type(rapl_msr_priv->control_type);
546 }
547 
548 static const struct platform_device_id rapl_msr_ids[] = {
549 	{ .name = "intel_rapl_msr", },
550 	{}
551 };
552 MODULE_DEVICE_TABLE(platform, rapl_msr_ids);
553 
554 static struct platform_driver intel_rapl_msr_driver = {
555 	.probe = rapl_msr_probe,
556 	.remove = rapl_msr_remove,
557 	.id_table = rapl_msr_ids,
558 	.driver = {
559 		.name = "intel_rapl_msr",
560 	},
561 };
562 
563 static struct platform_device *rapl_msr_platdev;
564 
565 static int intel_rapl_msr_init(void)
566 {
567 	const struct rapl_defaults *def;
568 	const struct x86_cpu_id *id;
569 	int ret;
570 
571 	ret = platform_driver_register(&intel_rapl_msr_driver);
572 	if (ret)
573 		return ret;
574 
575 	/* Create the MSR RAPL platform device for supported platforms */
576 	id = x86_match_cpu(rapl_ids);
577 	if (!id)
578 		return 0;
579 
580 	def = (const struct rapl_defaults *)id->driver_data;
581 
582 	rapl_msr_platdev = platform_device_register_data(NULL, "intel_rapl_msr", 0, def,
583 							 sizeof(*def));
584 	if (IS_ERR(rapl_msr_platdev))
585 		pr_debug("intel_rapl_msr device register failed, ret:%ld\n",
586 			 PTR_ERR(rapl_msr_platdev));
587 
588 	return 0;
589 }
590 module_init(intel_rapl_msr_init);
591 
592 static void intel_rapl_msr_exit(void)
593 {
594 	platform_device_unregister(rapl_msr_platdev);
595 	platform_driver_unregister(&intel_rapl_msr_driver);
596 }
597 module_exit(intel_rapl_msr_exit);
598 
599 MODULE_DESCRIPTION("Driver for Intel RAPL (Running Average Power Limit) control via MSR interface");
600 MODULE_AUTHOR("Zhang Rui <rui.zhang@intel.com>");
601 MODULE_LICENSE("GPL v2");
602 MODULE_IMPORT_NS("INTEL_RAPL");
603