xref: /linux/drivers/cpufreq/amd-pstate.c (revision a3a02a52bcfcbcc4a637d4b68bf1bc391c9fad02)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * amd-pstate.c - AMD Processor P-state Frequency Driver
4  *
5  * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved.
6  *
7  * Author: Huang Rui <ray.huang@amd.com>
8  *
9  * AMD P-State introduces a new CPU performance scaling design for AMD
10  * processors using the ACPI Collaborative Performance and Power Control (CPPC)
11  * feature which works with the AMD SMU firmware providing a finer grained
12  * frequency control range. It is to replace the legacy ACPI P-States control,
13  * allows a flexible, low-latency interface for the Linux kernel to directly
14  * communicate the performance hints to hardware.
15  *
16  * AMD P-State is supported on recent AMD Zen base CPU series include some of
17  * Zen2 and Zen3 processors. _CPC needs to be present in the ACPI tables of AMD
18  * P-State supported system. And there are two types of hardware implementations
19  * for AMD P-State: 1) Full MSR Solution and 2) Shared Memory Solution.
20  * X86_FEATURE_CPPC CPU feature flag is used to distinguish the different types.
21  */
22 
23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
24 
25 #include <linux/kernel.h>
26 #include <linux/module.h>
27 #include <linux/init.h>
28 #include <linux/smp.h>
29 #include <linux/sched.h>
30 #include <linux/cpufreq.h>
31 #include <linux/compiler.h>
32 #include <linux/dmi.h>
33 #include <linux/slab.h>
34 #include <linux/acpi.h>
35 #include <linux/io.h>
36 #include <linux/delay.h>
37 #include <linux/uaccess.h>
38 #include <linux/static_call.h>
39 #include <linux/topology.h>
40 
41 #include <acpi/processor.h>
42 #include <acpi/cppc_acpi.h>
43 
44 #include <asm/msr.h>
45 #include <asm/processor.h>
46 #include <asm/cpufeature.h>
47 #include <asm/cpu_device_id.h>
48 
49 #include "amd-pstate.h"
50 #include "amd-pstate-trace.h"
51 
52 #define AMD_PSTATE_TRANSITION_LATENCY	20000
53 #define AMD_PSTATE_TRANSITION_DELAY	1000
54 #define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600
55 #define CPPC_HIGHEST_PERF_PERFORMANCE	196
56 #define CPPC_HIGHEST_PERF_DEFAULT	166
57 
58 #define AMD_CPPC_EPP_PERFORMANCE		0x00
59 #define AMD_CPPC_EPP_BALANCE_PERFORMANCE	0x80
60 #define AMD_CPPC_EPP_BALANCE_POWERSAVE		0xBF
61 #define AMD_CPPC_EPP_POWERSAVE			0xFF
62 
63 /*
64  * enum amd_pstate_mode - driver working mode of amd pstate
65  */
66 enum amd_pstate_mode {
67 	AMD_PSTATE_UNDEFINED = 0,
68 	AMD_PSTATE_DISABLE,
69 	AMD_PSTATE_PASSIVE,
70 	AMD_PSTATE_ACTIVE,
71 	AMD_PSTATE_GUIDED,
72 	AMD_PSTATE_MAX,
73 };
74 
75 static const char * const amd_pstate_mode_string[] = {
76 	[AMD_PSTATE_UNDEFINED]   = "undefined",
77 	[AMD_PSTATE_DISABLE]     = "disable",
78 	[AMD_PSTATE_PASSIVE]     = "passive",
79 	[AMD_PSTATE_ACTIVE]      = "active",
80 	[AMD_PSTATE_GUIDED]      = "guided",
81 	NULL,
82 };
83 
84 struct quirk_entry {
85 	u32 nominal_freq;
86 	u32 lowest_freq;
87 };
88 
89 static struct cpufreq_driver *current_pstate_driver;
90 static struct cpufreq_driver amd_pstate_driver;
91 static struct cpufreq_driver amd_pstate_epp_driver;
92 static int cppc_state = AMD_PSTATE_UNDEFINED;
93 static bool cppc_enabled;
94 static bool amd_pstate_prefcore = true;
95 static struct quirk_entry *quirks;
96 
97 /*
98  * AMD Energy Preference Performance (EPP)
99  * The EPP is used in the CCLK DPM controller to drive
100  * the frequency that a core is going to operate during
101  * short periods of activity. EPP values will be utilized for
102  * different OS profiles (balanced, performance, power savings)
103  * display strings corresponding to EPP index in the
104  * energy_perf_strings[]
105  *	index		String
106  *-------------------------------------
107  *	0		default
108  *	1		performance
109  *	2		balance_performance
110  *	3		balance_power
111  *	4		power
112  */
113 enum energy_perf_value_index {
114 	EPP_INDEX_DEFAULT = 0,
115 	EPP_INDEX_PERFORMANCE,
116 	EPP_INDEX_BALANCE_PERFORMANCE,
117 	EPP_INDEX_BALANCE_POWERSAVE,
118 	EPP_INDEX_POWERSAVE,
119 };
120 
121 static const char * const energy_perf_strings[] = {
122 	[EPP_INDEX_DEFAULT] = "default",
123 	[EPP_INDEX_PERFORMANCE] = "performance",
124 	[EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance",
125 	[EPP_INDEX_BALANCE_POWERSAVE] = "balance_power",
126 	[EPP_INDEX_POWERSAVE] = "power",
127 	NULL
128 };
129 
130 static unsigned int epp_values[] = {
131 	[EPP_INDEX_DEFAULT] = 0,
132 	[EPP_INDEX_PERFORMANCE] = AMD_CPPC_EPP_PERFORMANCE,
133 	[EPP_INDEX_BALANCE_PERFORMANCE] = AMD_CPPC_EPP_BALANCE_PERFORMANCE,
134 	[EPP_INDEX_BALANCE_POWERSAVE] = AMD_CPPC_EPP_BALANCE_POWERSAVE,
135 	[EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE,
136  };
137 
138 typedef int (*cppc_mode_transition_fn)(int);
139 
140 static struct quirk_entry quirk_amd_7k62 = {
141 	.nominal_freq = 2600,
142 	.lowest_freq = 550,
143 };
144 
145 static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi)
146 {
147 	/**
148 	 * match the broken bios for family 17h processor support CPPC V2
149 	 * broken BIOS lack of nominal_freq and lowest_freq capabilities
150 	 * definition in ACPI tables
151 	 */
152 	if (cpu_feature_enabled(X86_FEATURE_ZEN2)) {
153 		quirks = dmi->driver_data;
154 		pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident);
155 		return 1;
156 	}
157 
158 	return 0;
159 }
160 
161 static const struct dmi_system_id amd_pstate_quirks_table[] __initconst = {
162 	{
163 		.callback = dmi_matched_7k62_bios_bug,
164 		.ident = "AMD EPYC 7K62",
165 		.matches = {
166 			DMI_MATCH(DMI_BIOS_VERSION, "5.14"),
167 			DMI_MATCH(DMI_BIOS_RELEASE, "12/12/2019"),
168 		},
169 		.driver_data = &quirk_amd_7k62,
170 	},
171 	{}
172 };
173 MODULE_DEVICE_TABLE(dmi, amd_pstate_quirks_table);
174 
175 static inline int get_mode_idx_from_str(const char *str, size_t size)
176 {
177 	int i;
178 
179 	for (i=0; i < AMD_PSTATE_MAX; i++) {
180 		if (!strncmp(str, amd_pstate_mode_string[i], size))
181 			return i;
182 	}
183 	return -EINVAL;
184 }
185 
186 static DEFINE_MUTEX(amd_pstate_limits_lock);
187 static DEFINE_MUTEX(amd_pstate_driver_lock);
188 
189 static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached)
190 {
191 	u64 epp;
192 	int ret;
193 
194 	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
195 		if (!cppc_req_cached) {
196 			epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
197 					&cppc_req_cached);
198 			if (epp)
199 				return epp;
200 		}
201 		epp = (cppc_req_cached >> 24) & 0xFF;
202 	} else {
203 		ret = cppc_get_epp_perf(cpudata->cpu, &epp);
204 		if (ret < 0) {
205 			pr_debug("Could not retrieve energy perf value (%d)\n", ret);
206 			return -EIO;
207 		}
208 	}
209 
210 	return (s16)(epp & 0xff);
211 }
212 
213 static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata)
214 {
215 	s16 epp;
216 	int index = -EINVAL;
217 
218 	epp = amd_pstate_get_epp(cpudata, 0);
219 	if (epp < 0)
220 		return epp;
221 
222 	switch (epp) {
223 	case AMD_CPPC_EPP_PERFORMANCE:
224 		index = EPP_INDEX_PERFORMANCE;
225 		break;
226 	case AMD_CPPC_EPP_BALANCE_PERFORMANCE:
227 		index = EPP_INDEX_BALANCE_PERFORMANCE;
228 		break;
229 	case AMD_CPPC_EPP_BALANCE_POWERSAVE:
230 		index = EPP_INDEX_BALANCE_POWERSAVE;
231 		break;
232 	case AMD_CPPC_EPP_POWERSAVE:
233 		index = EPP_INDEX_POWERSAVE;
234 		break;
235 	default:
236 		break;
237 	}
238 
239 	return index;
240 }
241 
242 static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
243 			       u32 des_perf, u32 max_perf, bool fast_switch)
244 {
245 	if (fast_switch)
246 		wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached));
247 	else
248 		wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
249 			      READ_ONCE(cpudata->cppc_req_cached));
250 }
251 
252 DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf);
253 
254 static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
255 					  u32 min_perf, u32 des_perf,
256 					  u32 max_perf, bool fast_switch)
257 {
258 	static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf,
259 					    max_perf, fast_switch);
260 }
261 
262 static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
263 {
264 	int ret;
265 	struct cppc_perf_ctrls perf_ctrls;
266 
267 	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
268 		u64 value = READ_ONCE(cpudata->cppc_req_cached);
269 
270 		value &= ~GENMASK_ULL(31, 24);
271 		value |= (u64)epp << 24;
272 		WRITE_ONCE(cpudata->cppc_req_cached, value);
273 
274 		ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
275 		if (!ret)
276 			cpudata->epp_cached = epp;
277 	} else {
278 		amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U,
279 					     cpudata->max_limit_perf, false);
280 
281 		perf_ctrls.energy_perf = epp;
282 		ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
283 		if (ret) {
284 			pr_debug("failed to set energy perf value (%d)\n", ret);
285 			return ret;
286 		}
287 		cpudata->epp_cached = epp;
288 	}
289 
290 	return ret;
291 }
292 
293 static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
294 		int pref_index)
295 {
296 	int epp = -EINVAL;
297 	int ret;
298 
299 	if (!pref_index)
300 		epp = cpudata->epp_default;
301 
302 	if (epp == -EINVAL)
303 		epp = epp_values[pref_index];
304 
305 	if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
306 		pr_debug("EPP cannot be set under performance policy\n");
307 		return -EBUSY;
308 	}
309 
310 	ret = amd_pstate_set_epp(cpudata, epp);
311 
312 	return ret;
313 }
314 
315 static inline int pstate_enable(bool enable)
316 {
317 	int ret, cpu;
318 	unsigned long logical_proc_id_mask = 0;
319 
320 	if (enable == cppc_enabled)
321 		return 0;
322 
323 	for_each_present_cpu(cpu) {
324 		unsigned long logical_id = topology_logical_die_id(cpu);
325 
326 		if (test_bit(logical_id, &logical_proc_id_mask))
327 			continue;
328 
329 		set_bit(logical_id, &logical_proc_id_mask);
330 
331 		ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE,
332 				enable);
333 		if (ret)
334 			return ret;
335 	}
336 
337 	cppc_enabled = enable;
338 	return 0;
339 }
340 
341 static int cppc_enable(bool enable)
342 {
343 	int cpu, ret = 0;
344 	struct cppc_perf_ctrls perf_ctrls;
345 
346 	if (enable == cppc_enabled)
347 		return 0;
348 
349 	for_each_present_cpu(cpu) {
350 		ret = cppc_set_enable(cpu, enable);
351 		if (ret)
352 			return ret;
353 
354 		/* Enable autonomous mode for EPP */
355 		if (cppc_state == AMD_PSTATE_ACTIVE) {
356 			/* Set desired perf as zero to allow EPP firmware control */
357 			perf_ctrls.desired_perf = 0;
358 			ret = cppc_set_perf(cpu, &perf_ctrls);
359 			if (ret)
360 				return ret;
361 		}
362 	}
363 
364 	cppc_enabled = enable;
365 	return ret;
366 }
367 
368 DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable);
369 
370 static inline int amd_pstate_enable(bool enable)
371 {
372 	return static_call(amd_pstate_enable)(enable);
373 }
374 
375 static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata)
376 {
377 	struct cpuinfo_x86 *c = &cpu_data(0);
378 
379 	/*
380 	 * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f,
381 	 * the highest performance level is set to 196.
382 	 * https://bugzilla.kernel.org/show_bug.cgi?id=218759
383 	 */
384 	if (c->x86 == 0x19 && (c->x86_model >= 0x70 && c->x86_model <= 0x7f))
385 		return CPPC_HIGHEST_PERF_PERFORMANCE;
386 
387 	return CPPC_HIGHEST_PERF_DEFAULT;
388 }
389 
390 static int pstate_init_perf(struct amd_cpudata *cpudata)
391 {
392 	u64 cap1;
393 	u32 highest_perf;
394 
395 	int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
396 				     &cap1);
397 	if (ret)
398 		return ret;
399 
400 	/* For platforms that do not support the preferred core feature, the
401 	 * highest_pef may be configured with 166 or 255, to avoid max frequency
402 	 * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as
403 	 * the default max perf.
404 	 */
405 	if (cpudata->hw_prefcore)
406 		highest_perf = amd_pstate_highest_perf_set(cpudata);
407 	else
408 		highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
409 
410 	WRITE_ONCE(cpudata->highest_perf, highest_perf);
411 	WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
412 	WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
413 	WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
414 	WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
415 	WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1));
416 	WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1));
417 	return 0;
418 }
419 
420 static int cppc_init_perf(struct amd_cpudata *cpudata)
421 {
422 	struct cppc_perf_caps cppc_perf;
423 	u32 highest_perf;
424 
425 	int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
426 	if (ret)
427 		return ret;
428 
429 	if (cpudata->hw_prefcore)
430 		highest_perf = amd_pstate_highest_perf_set(cpudata);
431 	else
432 		highest_perf = cppc_perf.highest_perf;
433 
434 	WRITE_ONCE(cpudata->highest_perf, highest_perf);
435 	WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
436 	WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf);
437 	WRITE_ONCE(cpudata->lowest_nonlinear_perf,
438 		   cppc_perf.lowest_nonlinear_perf);
439 	WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
440 	WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf);
441 	WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf);
442 
443 	if (cppc_state == AMD_PSTATE_ACTIVE)
444 		return 0;
445 
446 	ret = cppc_get_auto_sel_caps(cpudata->cpu, &cppc_perf);
447 	if (ret) {
448 		pr_warn("failed to get auto_sel, ret: %d\n", ret);
449 		return 0;
450 	}
451 
452 	ret = cppc_set_auto_sel(cpudata->cpu,
453 			(cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1);
454 
455 	if (ret)
456 		pr_warn("failed to set auto_sel, ret: %d\n", ret);
457 
458 	return ret;
459 }
460 
461 DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf);
462 
463 static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata)
464 {
465 	return static_call(amd_pstate_init_perf)(cpudata);
466 }
467 
468 static void cppc_update_perf(struct amd_cpudata *cpudata,
469 			     u32 min_perf, u32 des_perf,
470 			     u32 max_perf, bool fast_switch)
471 {
472 	struct cppc_perf_ctrls perf_ctrls;
473 
474 	perf_ctrls.max_perf = max_perf;
475 	perf_ctrls.min_perf = min_perf;
476 	perf_ctrls.desired_perf = des_perf;
477 
478 	cppc_set_perf(cpudata->cpu, &perf_ctrls);
479 }
480 
481 static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
482 {
483 	u64 aperf, mperf, tsc;
484 	unsigned long flags;
485 
486 	local_irq_save(flags);
487 	rdmsrl(MSR_IA32_APERF, aperf);
488 	rdmsrl(MSR_IA32_MPERF, mperf);
489 	tsc = rdtsc();
490 
491 	if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) {
492 		local_irq_restore(flags);
493 		return false;
494 	}
495 
496 	local_irq_restore(flags);
497 
498 	cpudata->cur.aperf = aperf;
499 	cpudata->cur.mperf = mperf;
500 	cpudata->cur.tsc =  tsc;
501 	cpudata->cur.aperf -= cpudata->prev.aperf;
502 	cpudata->cur.mperf -= cpudata->prev.mperf;
503 	cpudata->cur.tsc -= cpudata->prev.tsc;
504 
505 	cpudata->prev.aperf = aperf;
506 	cpudata->prev.mperf = mperf;
507 	cpudata->prev.tsc = tsc;
508 
509 	cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz), cpudata->cur.mperf);
510 
511 	return true;
512 }
513 
514 static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
515 			      u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags)
516 {
517 	unsigned long max_freq;
518 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu);
519 	u64 prev = READ_ONCE(cpudata->cppc_req_cached);
520 	u32 nominal_perf = READ_ONCE(cpudata->nominal_perf);
521 	u64 value = prev;
522 
523 	min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
524 			cpudata->max_limit_perf);
525 	max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
526 			cpudata->max_limit_perf);
527 	des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
528 
529 	max_freq = READ_ONCE(cpudata->max_limit_freq);
530 	policy->cur = div_u64(des_perf * max_freq, max_perf);
531 
532 	if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) {
533 		min_perf = des_perf;
534 		des_perf = 0;
535 	}
536 
537 	value &= ~AMD_CPPC_MIN_PERF(~0L);
538 	value |= AMD_CPPC_MIN_PERF(min_perf);
539 
540 	value &= ~AMD_CPPC_DES_PERF(~0L);
541 	value |= AMD_CPPC_DES_PERF(des_perf);
542 
543 	/* limit the max perf when core performance boost feature is disabled */
544 	if (!cpudata->boost_supported)
545 		max_perf = min_t(unsigned long, nominal_perf, max_perf);
546 
547 	value &= ~AMD_CPPC_MAX_PERF(~0L);
548 	value |= AMD_CPPC_MAX_PERF(max_perf);
549 
550 	if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) {
551 		trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq,
552 			cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc,
553 				cpudata->cpu, (value != prev), fast_switch);
554 	}
555 
556 	if (value == prev)
557 		return;
558 
559 	WRITE_ONCE(cpudata->cppc_req_cached, value);
560 
561 	amd_pstate_update_perf(cpudata, min_perf, des_perf,
562 			       max_perf, fast_switch);
563 }
564 
565 static int amd_pstate_verify(struct cpufreq_policy_data *policy)
566 {
567 	cpufreq_verify_within_cpu_limits(policy);
568 
569 	return 0;
570 }
571 
572 static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
573 {
574 	u32 max_limit_perf, min_limit_perf, lowest_perf;
575 	struct amd_cpudata *cpudata = policy->driver_data;
576 
577 	max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
578 	min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
579 
580 	lowest_perf = READ_ONCE(cpudata->lowest_perf);
581 	if (min_limit_perf < lowest_perf)
582 		min_limit_perf = lowest_perf;
583 
584 	if (max_limit_perf < min_limit_perf)
585 		max_limit_perf = min_limit_perf;
586 
587 	WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
588 	WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
589 	WRITE_ONCE(cpudata->max_limit_freq, policy->max);
590 	WRITE_ONCE(cpudata->min_limit_freq, policy->min);
591 
592 	return 0;
593 }
594 
595 static int amd_pstate_update_freq(struct cpufreq_policy *policy,
596 				  unsigned int target_freq, bool fast_switch)
597 {
598 	struct cpufreq_freqs freqs;
599 	struct amd_cpudata *cpudata = policy->driver_data;
600 	unsigned long max_perf, min_perf, des_perf, cap_perf;
601 
602 	if (!cpudata->max_freq)
603 		return -ENODEV;
604 
605 	if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
606 		amd_pstate_update_min_max_limit(policy);
607 
608 	cap_perf = READ_ONCE(cpudata->highest_perf);
609 	min_perf = READ_ONCE(cpudata->lowest_perf);
610 	max_perf = cap_perf;
611 
612 	freqs.old = policy->cur;
613 	freqs.new = target_freq;
614 
615 	des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf,
616 				     cpudata->max_freq);
617 
618 	WARN_ON(fast_switch && !policy->fast_switch_enabled);
619 	/*
620 	 * If fast_switch is desired, then there aren't any registered
621 	 * transition notifiers. See comment for
622 	 * cpufreq_enable_fast_switch().
623 	 */
624 	if (!fast_switch)
625 		cpufreq_freq_transition_begin(policy, &freqs);
626 
627 	amd_pstate_update(cpudata, min_perf, des_perf,
628 			max_perf, fast_switch, policy->governor->flags);
629 
630 	if (!fast_switch)
631 		cpufreq_freq_transition_end(policy, &freqs, false);
632 
633 	return 0;
634 }
635 
636 static int amd_pstate_target(struct cpufreq_policy *policy,
637 			     unsigned int target_freq,
638 			     unsigned int relation)
639 {
640 	return amd_pstate_update_freq(policy, target_freq, false);
641 }
642 
643 static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy,
644 				  unsigned int target_freq)
645 {
646 	if (!amd_pstate_update_freq(policy, target_freq, true))
647 		return target_freq;
648 	return policy->cur;
649 }
650 
651 static void amd_pstate_adjust_perf(unsigned int cpu,
652 				   unsigned long _min_perf,
653 				   unsigned long target_perf,
654 				   unsigned long capacity)
655 {
656 	unsigned long max_perf, min_perf, des_perf,
657 		      cap_perf, lowest_nonlinear_perf;
658 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
659 	struct amd_cpudata *cpudata = policy->driver_data;
660 
661 	if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
662 		amd_pstate_update_min_max_limit(policy);
663 
664 
665 	cap_perf = READ_ONCE(cpudata->highest_perf);
666 	lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
667 
668 	des_perf = cap_perf;
669 	if (target_perf < capacity)
670 		des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity);
671 
672 	min_perf = READ_ONCE(cpudata->lowest_perf);
673 	if (_min_perf < capacity)
674 		min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity);
675 
676 	if (min_perf < lowest_nonlinear_perf)
677 		min_perf = lowest_nonlinear_perf;
678 
679 	max_perf = cap_perf;
680 	if (max_perf < min_perf)
681 		max_perf = min_perf;
682 
683 	des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
684 
685 	amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
686 			policy->governor->flags);
687 	cpufreq_cpu_put(policy);
688 }
689 
690 static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on)
691 {
692 	struct amd_cpudata *cpudata = policy->driver_data;
693 	struct cppc_perf_ctrls perf_ctrls;
694 	u32 highest_perf, nominal_perf, nominal_freq, max_freq;
695 	int ret;
696 
697 	highest_perf = READ_ONCE(cpudata->highest_perf);
698 	nominal_perf = READ_ONCE(cpudata->nominal_perf);
699 	nominal_freq = READ_ONCE(cpudata->nominal_freq);
700 	max_freq = READ_ONCE(cpudata->max_freq);
701 
702 	if (boot_cpu_has(X86_FEATURE_CPPC)) {
703 		u64 value = READ_ONCE(cpudata->cppc_req_cached);
704 
705 		value &= ~GENMASK_ULL(7, 0);
706 		value |= on ? highest_perf : nominal_perf;
707 		WRITE_ONCE(cpudata->cppc_req_cached, value);
708 
709 		wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
710 	} else {
711 		perf_ctrls.max_perf = on ? highest_perf : nominal_perf;
712 		ret = cppc_set_perf(cpudata->cpu, &perf_ctrls);
713 		if (ret) {
714 			cpufreq_cpu_release(policy);
715 			pr_debug("Failed to set max perf on CPU:%d. ret:%d\n",
716 				cpudata->cpu, ret);
717 			return ret;
718 		}
719 	}
720 
721 	if (on)
722 		policy->cpuinfo.max_freq = max_freq;
723 	else if (policy->cpuinfo.max_freq > nominal_freq * 1000)
724 		policy->cpuinfo.max_freq = nominal_freq * 1000;
725 
726 	policy->max = policy->cpuinfo.max_freq;
727 
728 	if (cppc_state == AMD_PSTATE_PASSIVE) {
729 		ret = freq_qos_update_request(&cpudata->req[1], policy->cpuinfo.max_freq);
730 		if (ret < 0)
731 			pr_debug("Failed to update freq constraint: CPU%d\n", cpudata->cpu);
732 	}
733 
734 	return ret < 0 ? ret : 0;
735 }
736 
737 static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
738 {
739 	struct amd_cpudata *cpudata = policy->driver_data;
740 	int ret;
741 
742 	if (!cpudata->boost_supported) {
743 		pr_err("Boost mode is not supported by this processor or SBIOS\n");
744 		return -EOPNOTSUPP;
745 	}
746 	mutex_lock(&amd_pstate_driver_lock);
747 	ret = amd_pstate_cpu_boost_update(policy, state);
748 	WRITE_ONCE(cpudata->boost_state, !ret ? state : false);
749 	policy->boost_enabled = !ret ? state : false;
750 	refresh_frequency_limits(policy);
751 	mutex_unlock(&amd_pstate_driver_lock);
752 
753 	return ret;
754 }
755 
756 static int amd_pstate_init_boost_support(struct amd_cpudata *cpudata)
757 {
758 	u64 boost_val;
759 	int ret = -1;
760 
761 	/*
762 	 * If platform has no CPB support or disable it, initialize current driver
763 	 * boost_enabled state to be false, it is not an error for cpufreq core to handle.
764 	 */
765 	if (!cpu_feature_enabled(X86_FEATURE_CPB)) {
766 		pr_debug_once("Boost CPB capabilities not present in the processor\n");
767 		ret = 0;
768 		goto exit_err;
769 	}
770 
771 	/* at least one CPU supports CPB, even if others fail later on to set up */
772 	current_pstate_driver->boost_enabled = true;
773 
774 	ret = rdmsrl_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val);
775 	if (ret) {
776 		pr_err_once("failed to read initial CPU boost state!\n");
777 		ret = -EIO;
778 		goto exit_err;
779 	}
780 
781 	if (!(boost_val & MSR_K7_HWCR_CPB_DIS))
782 		cpudata->boost_supported = true;
783 
784 	return 0;
785 
786 exit_err:
787 	cpudata->boost_supported = false;
788 	return ret;
789 }
790 
791 static void amd_perf_ctl_reset(unsigned int cpu)
792 {
793 	wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
794 }
795 
796 /*
797  * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks
798  * due to locking, so queue the work for later.
799  */
800 static void amd_pstste_sched_prefcore_workfn(struct work_struct *work)
801 {
802 	sched_set_itmt_support();
803 }
804 static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
805 
806 /*
807  * Get the highest performance register value.
808  * @cpu: CPU from which to get highest performance.
809  * @highest_perf: Return address.
810  *
811  * Return: 0 for success, -EIO otherwise.
812  */
813 static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf)
814 {
815 	int ret;
816 
817 	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
818 		u64 cap1;
819 
820 		ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1);
821 		if (ret)
822 			return ret;
823 		WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
824 	} else {
825 		u64 cppc_highest_perf;
826 
827 		ret = cppc_get_highest_perf(cpu, &cppc_highest_perf);
828 		if (ret)
829 			return ret;
830 		WRITE_ONCE(*highest_perf, cppc_highest_perf);
831 	}
832 
833 	return (ret);
834 }
835 
836 #define CPPC_MAX_PERF	U8_MAX
837 
838 static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
839 {
840 	int ret, prio;
841 	u32 highest_perf;
842 
843 	ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf);
844 	if (ret)
845 		return;
846 
847 	cpudata->hw_prefcore = true;
848 	/* check if CPPC preferred core feature is enabled*/
849 	if (highest_perf < CPPC_MAX_PERF)
850 		prio = (int)highest_perf;
851 	else {
852 		pr_debug("AMD CPPC preferred core is unsupported!\n");
853 		cpudata->hw_prefcore = false;
854 		return;
855 	}
856 
857 	if (!amd_pstate_prefcore)
858 		return;
859 
860 	/*
861 	 * The priorities can be set regardless of whether or not
862 	 * sched_set_itmt_support(true) has been called and it is valid to
863 	 * update them at any time after it has been called.
864 	 */
865 	sched_set_itmt_core_prio(prio, cpudata->cpu);
866 
867 	schedule_work(&sched_prefcore_work);
868 }
869 
870 static void amd_pstate_update_limits(unsigned int cpu)
871 {
872 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
873 	struct amd_cpudata *cpudata = policy->driver_data;
874 	u32 prev_high = 0, cur_high = 0;
875 	int ret;
876 	bool highest_perf_changed = false;
877 
878 	mutex_lock(&amd_pstate_driver_lock);
879 	if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore))
880 		goto free_cpufreq_put;
881 
882 	ret = amd_pstate_get_highest_perf(cpu, &cur_high);
883 	if (ret)
884 		goto free_cpufreq_put;
885 
886 	prev_high = READ_ONCE(cpudata->prefcore_ranking);
887 	if (prev_high != cur_high) {
888 		highest_perf_changed = true;
889 		WRITE_ONCE(cpudata->prefcore_ranking, cur_high);
890 
891 		if (cur_high < CPPC_MAX_PERF)
892 			sched_set_itmt_core_prio((int)cur_high, cpu);
893 	}
894 
895 free_cpufreq_put:
896 	cpufreq_cpu_put(policy);
897 
898 	if (!highest_perf_changed)
899 		cpufreq_update_policy(cpu);
900 
901 	mutex_unlock(&amd_pstate_driver_lock);
902 }
903 
904 /*
905  * Get pstate transition delay time from ACPI tables that firmware set
906  * instead of using hardcode value directly.
907  */
908 static u32 amd_pstate_get_transition_delay_us(unsigned int cpu)
909 {
910 	u32 transition_delay_ns;
911 
912 	transition_delay_ns = cppc_get_transition_latency(cpu);
913 	if (transition_delay_ns == CPUFREQ_ETERNAL) {
914 		if (cpu_feature_enabled(X86_FEATURE_FAST_CPPC))
915 			return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY;
916 		else
917 			return AMD_PSTATE_TRANSITION_DELAY;
918 	}
919 
920 	return transition_delay_ns / NSEC_PER_USEC;
921 }
922 
923 /*
924  * Get pstate transition latency value from ACPI tables that firmware
925  * set instead of using hardcode value directly.
926  */
927 static u32 amd_pstate_get_transition_latency(unsigned int cpu)
928 {
929 	u32 transition_latency;
930 
931 	transition_latency = cppc_get_transition_latency(cpu);
932 	if (transition_latency  == CPUFREQ_ETERNAL)
933 		return AMD_PSTATE_TRANSITION_LATENCY;
934 
935 	return transition_latency;
936 }
937 
938 /*
939  * amd_pstate_init_freq: Initialize the max_freq, min_freq,
940  *                       nominal_freq and lowest_nonlinear_freq for
941  *                       the @cpudata object.
942  *
943  *  Requires: highest_perf, lowest_perf, nominal_perf and
944  *            lowest_nonlinear_perf members of @cpudata to be
945  *            initialized.
946  *
947  *  Returns 0 on success, non-zero value on failure.
948  */
949 static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
950 {
951 	int ret;
952 	u32 min_freq;
953 	u32 highest_perf, max_freq;
954 	u32 nominal_perf, nominal_freq;
955 	u32 lowest_nonlinear_perf, lowest_nonlinear_freq;
956 	u32 boost_ratio, lowest_nonlinear_ratio;
957 	struct cppc_perf_caps cppc_perf;
958 
959 	ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
960 	if (ret)
961 		return ret;
962 
963 	if (quirks && quirks->lowest_freq)
964 		min_freq = quirks->lowest_freq * 1000;
965 	else
966 		min_freq = cppc_perf.lowest_freq * 1000;
967 
968 	if (quirks && quirks->nominal_freq)
969 		nominal_freq = quirks->nominal_freq ;
970 	else
971 		nominal_freq = cppc_perf.nominal_freq;
972 
973 	nominal_perf = READ_ONCE(cpudata->nominal_perf);
974 
975 	highest_perf = READ_ONCE(cpudata->highest_perf);
976 	boost_ratio = div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
977 	max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000;
978 
979 	lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
980 	lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT,
981 					 nominal_perf);
982 	lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT) * 1000;
983 
984 	WRITE_ONCE(cpudata->min_freq, min_freq);
985 	WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq);
986 	WRITE_ONCE(cpudata->nominal_freq, nominal_freq);
987 	WRITE_ONCE(cpudata->max_freq, max_freq);
988 
989 	/**
990 	 * Below values need to be initialized correctly, otherwise driver will fail to load
991 	 * max_freq is calculated according to (nominal_freq * highest_perf)/nominal_perf
992 	 * lowest_nonlinear_freq is a value between [min_freq, nominal_freq]
993 	 * Check _CPC in ACPI table objects if any values are incorrect
994 	 */
995 	if (min_freq <= 0 || max_freq <= 0 || nominal_freq <= 0 || min_freq > max_freq) {
996 		pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n",
997 			min_freq, max_freq, nominal_freq * 1000);
998 		return -EINVAL;
999 	}
1000 
1001 	if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq * 1000) {
1002 		pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n",
1003 			lowest_nonlinear_freq, min_freq, nominal_freq * 1000);
1004 		return -EINVAL;
1005 	}
1006 
1007 	return 0;
1008 }
1009 
1010 static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
1011 {
1012 	int min_freq, max_freq, ret;
1013 	struct device *dev;
1014 	struct amd_cpudata *cpudata;
1015 
1016 	/*
1017 	 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
1018 	 * which is ideal for initialization process.
1019 	 */
1020 	amd_perf_ctl_reset(policy->cpu);
1021 	dev = get_cpu_device(policy->cpu);
1022 	if (!dev)
1023 		return -ENODEV;
1024 
1025 	cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
1026 	if (!cpudata)
1027 		return -ENOMEM;
1028 
1029 	cpudata->cpu = policy->cpu;
1030 
1031 	amd_pstate_init_prefcore(cpudata);
1032 
1033 	ret = amd_pstate_init_perf(cpudata);
1034 	if (ret)
1035 		goto free_cpudata1;
1036 
1037 	ret = amd_pstate_init_freq(cpudata);
1038 	if (ret)
1039 		goto free_cpudata1;
1040 
1041 	ret = amd_pstate_init_boost_support(cpudata);
1042 	if (ret)
1043 		goto free_cpudata1;
1044 
1045 	min_freq = READ_ONCE(cpudata->min_freq);
1046 	max_freq = READ_ONCE(cpudata->max_freq);
1047 
1048 	policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu);
1049 	policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu);
1050 
1051 	policy->min = min_freq;
1052 	policy->max = max_freq;
1053 
1054 	policy->cpuinfo.min_freq = min_freq;
1055 	policy->cpuinfo.max_freq = max_freq;
1056 
1057 	policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
1058 
1059 	/* It will be updated by governor */
1060 	policy->cur = policy->cpuinfo.min_freq;
1061 
1062 	if (cpu_feature_enabled(X86_FEATURE_CPPC))
1063 		policy->fast_switch_possible = true;
1064 
1065 	ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0],
1066 				   FREQ_QOS_MIN, policy->cpuinfo.min_freq);
1067 	if (ret < 0) {
1068 		dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret);
1069 		goto free_cpudata1;
1070 	}
1071 
1072 	ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1],
1073 				   FREQ_QOS_MAX, policy->cpuinfo.max_freq);
1074 	if (ret < 0) {
1075 		dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret);
1076 		goto free_cpudata2;
1077 	}
1078 
1079 	cpudata->max_limit_freq = max_freq;
1080 	cpudata->min_limit_freq = min_freq;
1081 
1082 	policy->driver_data = cpudata;
1083 
1084 	if (!current_pstate_driver->adjust_perf)
1085 		current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
1086 
1087 	return 0;
1088 
1089 free_cpudata2:
1090 	freq_qos_remove_request(&cpudata->req[0]);
1091 free_cpudata1:
1092 	kfree(cpudata);
1093 	return ret;
1094 }
1095 
1096 static void amd_pstate_cpu_exit(struct cpufreq_policy *policy)
1097 {
1098 	struct amd_cpudata *cpudata = policy->driver_data;
1099 
1100 	freq_qos_remove_request(&cpudata->req[1]);
1101 	freq_qos_remove_request(&cpudata->req[0]);
1102 	policy->fast_switch_possible = false;
1103 	kfree(cpudata);
1104 }
1105 
1106 static int amd_pstate_cpu_resume(struct cpufreq_policy *policy)
1107 {
1108 	int ret;
1109 
1110 	ret = amd_pstate_enable(true);
1111 	if (ret)
1112 		pr_err("failed to enable amd-pstate during resume, return %d\n", ret);
1113 
1114 	return ret;
1115 }
1116 
1117 static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy)
1118 {
1119 	int ret;
1120 
1121 	ret = amd_pstate_enable(false);
1122 	if (ret)
1123 		pr_err("failed to disable amd-pstate during suspend, return %d\n", ret);
1124 
1125 	return ret;
1126 }
1127 
1128 /* Sysfs attributes */
1129 
1130 /*
1131  * This frequency is to indicate the maximum hardware frequency.
1132  * If boost is not active but supported, the frequency will be larger than the
1133  * one in cpuinfo.
1134  */
1135 static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy,
1136 					char *buf)
1137 {
1138 	int max_freq;
1139 	struct amd_cpudata *cpudata = policy->driver_data;
1140 
1141 	max_freq = READ_ONCE(cpudata->max_freq);
1142 	if (max_freq < 0)
1143 		return max_freq;
1144 
1145 	return sysfs_emit(buf, "%u\n", max_freq);
1146 }
1147 
1148 static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy,
1149 						     char *buf)
1150 {
1151 	int freq;
1152 	struct amd_cpudata *cpudata = policy->driver_data;
1153 
1154 	freq = READ_ONCE(cpudata->lowest_nonlinear_freq);
1155 	if (freq < 0)
1156 		return freq;
1157 
1158 	return sysfs_emit(buf, "%u\n", freq);
1159 }
1160 
1161 /*
1162  * In some of ASICs, the highest_perf is not the one in the _CPC table, so we
1163  * need to expose it to sysfs.
1164  */
1165 static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
1166 					    char *buf)
1167 {
1168 	u32 perf;
1169 	struct amd_cpudata *cpudata = policy->driver_data;
1170 
1171 	perf = READ_ONCE(cpudata->highest_perf);
1172 
1173 	return sysfs_emit(buf, "%u\n", perf);
1174 }
1175 
1176 static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy,
1177 						char *buf)
1178 {
1179 	u32 perf;
1180 	struct amd_cpudata *cpudata = policy->driver_data;
1181 
1182 	perf = READ_ONCE(cpudata->prefcore_ranking);
1183 
1184 	return sysfs_emit(buf, "%u\n", perf);
1185 }
1186 
1187 static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy,
1188 					   char *buf)
1189 {
1190 	bool hw_prefcore;
1191 	struct amd_cpudata *cpudata = policy->driver_data;
1192 
1193 	hw_prefcore = READ_ONCE(cpudata->hw_prefcore);
1194 
1195 	return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore));
1196 }
1197 
1198 static ssize_t show_energy_performance_available_preferences(
1199 				struct cpufreq_policy *policy, char *buf)
1200 {
1201 	int i = 0;
1202 	int offset = 0;
1203 	struct amd_cpudata *cpudata = policy->driver_data;
1204 
1205 	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
1206 		return sysfs_emit_at(buf, offset, "%s\n",
1207 				energy_perf_strings[EPP_INDEX_PERFORMANCE]);
1208 
1209 	while (energy_perf_strings[i] != NULL)
1210 		offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]);
1211 
1212 	offset += sysfs_emit_at(buf, offset, "\n");
1213 
1214 	return offset;
1215 }
1216 
1217 static ssize_t store_energy_performance_preference(
1218 		struct cpufreq_policy *policy, const char *buf, size_t count)
1219 {
1220 	struct amd_cpudata *cpudata = policy->driver_data;
1221 	char str_preference[21];
1222 	ssize_t ret;
1223 
1224 	ret = sscanf(buf, "%20s", str_preference);
1225 	if (ret != 1)
1226 		return -EINVAL;
1227 
1228 	ret = match_string(energy_perf_strings, -1, str_preference);
1229 	if (ret < 0)
1230 		return -EINVAL;
1231 
1232 	mutex_lock(&amd_pstate_limits_lock);
1233 	ret = amd_pstate_set_energy_pref_index(cpudata, ret);
1234 	mutex_unlock(&amd_pstate_limits_lock);
1235 
1236 	return ret ?: count;
1237 }
1238 
1239 static ssize_t show_energy_performance_preference(
1240 				struct cpufreq_policy *policy, char *buf)
1241 {
1242 	struct amd_cpudata *cpudata = policy->driver_data;
1243 	int preference;
1244 
1245 	preference = amd_pstate_get_energy_pref_index(cpudata);
1246 	if (preference < 0)
1247 		return preference;
1248 
1249 	return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]);
1250 }
1251 
1252 static void amd_pstate_driver_cleanup(void)
1253 {
1254 	amd_pstate_enable(false);
1255 	cppc_state = AMD_PSTATE_DISABLE;
1256 	current_pstate_driver = NULL;
1257 }
1258 
1259 static int amd_pstate_register_driver(int mode)
1260 {
1261 	int ret;
1262 
1263 	if (mode == AMD_PSTATE_PASSIVE || mode == AMD_PSTATE_GUIDED)
1264 		current_pstate_driver = &amd_pstate_driver;
1265 	else if (mode == AMD_PSTATE_ACTIVE)
1266 		current_pstate_driver = &amd_pstate_epp_driver;
1267 	else
1268 		return -EINVAL;
1269 
1270 	cppc_state = mode;
1271 	ret = cpufreq_register_driver(current_pstate_driver);
1272 	if (ret) {
1273 		amd_pstate_driver_cleanup();
1274 		return ret;
1275 	}
1276 	return 0;
1277 }
1278 
1279 static int amd_pstate_unregister_driver(int dummy)
1280 {
1281 	cpufreq_unregister_driver(current_pstate_driver);
1282 	amd_pstate_driver_cleanup();
1283 	return 0;
1284 }
1285 
1286 static int amd_pstate_change_mode_without_dvr_change(int mode)
1287 {
1288 	int cpu = 0;
1289 
1290 	cppc_state = mode;
1291 
1292 	if (cpu_feature_enabled(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE)
1293 		return 0;
1294 
1295 	for_each_present_cpu(cpu) {
1296 		cppc_set_auto_sel(cpu, (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1);
1297 	}
1298 
1299 	return 0;
1300 }
1301 
1302 static int amd_pstate_change_driver_mode(int mode)
1303 {
1304 	int ret;
1305 
1306 	ret = amd_pstate_unregister_driver(0);
1307 	if (ret)
1308 		return ret;
1309 
1310 	ret = amd_pstate_register_driver(mode);
1311 	if (ret)
1312 		return ret;
1313 
1314 	return 0;
1315 }
1316 
1317 static cppc_mode_transition_fn mode_state_machine[AMD_PSTATE_MAX][AMD_PSTATE_MAX] = {
1318 	[AMD_PSTATE_DISABLE]         = {
1319 		[AMD_PSTATE_DISABLE]     = NULL,
1320 		[AMD_PSTATE_PASSIVE]     = amd_pstate_register_driver,
1321 		[AMD_PSTATE_ACTIVE]      = amd_pstate_register_driver,
1322 		[AMD_PSTATE_GUIDED]      = amd_pstate_register_driver,
1323 	},
1324 	[AMD_PSTATE_PASSIVE]         = {
1325 		[AMD_PSTATE_DISABLE]     = amd_pstate_unregister_driver,
1326 		[AMD_PSTATE_PASSIVE]     = NULL,
1327 		[AMD_PSTATE_ACTIVE]      = amd_pstate_change_driver_mode,
1328 		[AMD_PSTATE_GUIDED]      = amd_pstate_change_mode_without_dvr_change,
1329 	},
1330 	[AMD_PSTATE_ACTIVE]          = {
1331 		[AMD_PSTATE_DISABLE]     = amd_pstate_unregister_driver,
1332 		[AMD_PSTATE_PASSIVE]     = amd_pstate_change_driver_mode,
1333 		[AMD_PSTATE_ACTIVE]      = NULL,
1334 		[AMD_PSTATE_GUIDED]      = amd_pstate_change_driver_mode,
1335 	},
1336 	[AMD_PSTATE_GUIDED]          = {
1337 		[AMD_PSTATE_DISABLE]     = amd_pstate_unregister_driver,
1338 		[AMD_PSTATE_PASSIVE]     = amd_pstate_change_mode_without_dvr_change,
1339 		[AMD_PSTATE_ACTIVE]      = amd_pstate_change_driver_mode,
1340 		[AMD_PSTATE_GUIDED]      = NULL,
1341 	},
1342 };
1343 
1344 static ssize_t amd_pstate_show_status(char *buf)
1345 {
1346 	if (!current_pstate_driver)
1347 		return sysfs_emit(buf, "disable\n");
1348 
1349 	return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]);
1350 }
1351 
1352 static int amd_pstate_update_status(const char *buf, size_t size)
1353 {
1354 	int mode_idx;
1355 
1356 	if (size > strlen("passive") || size < strlen("active"))
1357 		return -EINVAL;
1358 
1359 	mode_idx = get_mode_idx_from_str(buf, size);
1360 
1361 	if (mode_idx < 0 || mode_idx >= AMD_PSTATE_MAX)
1362 		return -EINVAL;
1363 
1364 	if (mode_state_machine[cppc_state][mode_idx])
1365 		return mode_state_machine[cppc_state][mode_idx](mode_idx);
1366 
1367 	return 0;
1368 }
1369 
1370 static ssize_t status_show(struct device *dev,
1371 			   struct device_attribute *attr, char *buf)
1372 {
1373 	ssize_t ret;
1374 
1375 	mutex_lock(&amd_pstate_driver_lock);
1376 	ret = amd_pstate_show_status(buf);
1377 	mutex_unlock(&amd_pstate_driver_lock);
1378 
1379 	return ret;
1380 }
1381 
1382 static ssize_t status_store(struct device *a, struct device_attribute *b,
1383 			    const char *buf, size_t count)
1384 {
1385 	char *p = memchr(buf, '\n', count);
1386 	int ret;
1387 
1388 	mutex_lock(&amd_pstate_driver_lock);
1389 	ret = amd_pstate_update_status(buf, p ? p - buf : count);
1390 	mutex_unlock(&amd_pstate_driver_lock);
1391 
1392 	return ret < 0 ? ret : count;
1393 }
1394 
1395 static ssize_t prefcore_show(struct device *dev,
1396 			     struct device_attribute *attr, char *buf)
1397 {
1398 	return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore));
1399 }
1400 
1401 cpufreq_freq_attr_ro(amd_pstate_max_freq);
1402 cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
1403 
1404 cpufreq_freq_attr_ro(amd_pstate_highest_perf);
1405 cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking);
1406 cpufreq_freq_attr_ro(amd_pstate_hw_prefcore);
1407 cpufreq_freq_attr_rw(energy_performance_preference);
1408 cpufreq_freq_attr_ro(energy_performance_available_preferences);
1409 static DEVICE_ATTR_RW(status);
1410 static DEVICE_ATTR_RO(prefcore);
1411 
1412 static struct freq_attr *amd_pstate_attr[] = {
1413 	&amd_pstate_max_freq,
1414 	&amd_pstate_lowest_nonlinear_freq,
1415 	&amd_pstate_highest_perf,
1416 	&amd_pstate_prefcore_ranking,
1417 	&amd_pstate_hw_prefcore,
1418 	NULL,
1419 };
1420 
1421 static struct freq_attr *amd_pstate_epp_attr[] = {
1422 	&amd_pstate_max_freq,
1423 	&amd_pstate_lowest_nonlinear_freq,
1424 	&amd_pstate_highest_perf,
1425 	&amd_pstate_prefcore_ranking,
1426 	&amd_pstate_hw_prefcore,
1427 	&energy_performance_preference,
1428 	&energy_performance_available_preferences,
1429 	NULL,
1430 };
1431 
1432 static struct attribute *pstate_global_attributes[] = {
1433 	&dev_attr_status.attr,
1434 	&dev_attr_prefcore.attr,
1435 	NULL
1436 };
1437 
1438 static const struct attribute_group amd_pstate_global_attr_group = {
1439 	.name = "amd_pstate",
1440 	.attrs = pstate_global_attributes,
1441 };
1442 
1443 static bool amd_pstate_acpi_pm_profile_server(void)
1444 {
1445 	switch (acpi_gbl_FADT.preferred_profile) {
1446 	case PM_ENTERPRISE_SERVER:
1447 	case PM_SOHO_SERVER:
1448 	case PM_PERFORMANCE_SERVER:
1449 		return true;
1450 	}
1451 	return false;
1452 }
1453 
1454 static bool amd_pstate_acpi_pm_profile_undefined(void)
1455 {
1456 	if (acpi_gbl_FADT.preferred_profile == PM_UNSPECIFIED)
1457 		return true;
1458 	if (acpi_gbl_FADT.preferred_profile >= NR_PM_PROFILES)
1459 		return true;
1460 	return false;
1461 }
1462 
1463 static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
1464 {
1465 	int min_freq, max_freq, ret;
1466 	struct amd_cpudata *cpudata;
1467 	struct device *dev;
1468 	u64 value;
1469 
1470 	/*
1471 	 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
1472 	 * which is ideal for initialization process.
1473 	 */
1474 	amd_perf_ctl_reset(policy->cpu);
1475 	dev = get_cpu_device(policy->cpu);
1476 	if (!dev)
1477 		return -ENODEV;
1478 
1479 	cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
1480 	if (!cpudata)
1481 		return -ENOMEM;
1482 
1483 	cpudata->cpu = policy->cpu;
1484 	cpudata->epp_policy = 0;
1485 
1486 	amd_pstate_init_prefcore(cpudata);
1487 
1488 	ret = amd_pstate_init_perf(cpudata);
1489 	if (ret)
1490 		goto free_cpudata1;
1491 
1492 	ret = amd_pstate_init_freq(cpudata);
1493 	if (ret)
1494 		goto free_cpudata1;
1495 
1496 	ret = amd_pstate_init_boost_support(cpudata);
1497 	if (ret)
1498 		goto free_cpudata1;
1499 
1500 	min_freq = READ_ONCE(cpudata->min_freq);
1501 	max_freq = READ_ONCE(cpudata->max_freq);
1502 
1503 	policy->cpuinfo.min_freq = min_freq;
1504 	policy->cpuinfo.max_freq = max_freq;
1505 	/* It will be updated by governor */
1506 	policy->cur = policy->cpuinfo.min_freq;
1507 
1508 	policy->driver_data = cpudata;
1509 
1510 	cpudata->epp_cached = cpudata->epp_default = amd_pstate_get_epp(cpudata, 0);
1511 
1512 	policy->min = policy->cpuinfo.min_freq;
1513 	policy->max = policy->cpuinfo.max_freq;
1514 
1515 	policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
1516 
1517 	/*
1518 	 * Set the policy to provide a valid fallback value in case
1519 	 * the default cpufreq governor is neither powersave nor performance.
1520 	 */
1521 	if (amd_pstate_acpi_pm_profile_server() ||
1522 	    amd_pstate_acpi_pm_profile_undefined())
1523 		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
1524 	else
1525 		policy->policy = CPUFREQ_POLICY_POWERSAVE;
1526 
1527 	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
1528 		ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
1529 		if (ret)
1530 			return ret;
1531 		WRITE_ONCE(cpudata->cppc_req_cached, value);
1532 
1533 		ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value);
1534 		if (ret)
1535 			return ret;
1536 		WRITE_ONCE(cpudata->cppc_cap1_cached, value);
1537 	}
1538 
1539 	return 0;
1540 
1541 free_cpudata1:
1542 	kfree(cpudata);
1543 	return ret;
1544 }
1545 
1546 static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
1547 {
1548 	struct amd_cpudata *cpudata = policy->driver_data;
1549 
1550 	if (cpudata) {
1551 		kfree(cpudata);
1552 		policy->driver_data = NULL;
1553 	}
1554 
1555 	pr_debug("CPU %d exiting\n", policy->cpu);
1556 }
1557 
1558 static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
1559 {
1560 	struct amd_cpudata *cpudata = policy->driver_data;
1561 	u32 max_perf, min_perf, min_limit_perf, max_limit_perf;
1562 	u64 value;
1563 	s16 epp;
1564 
1565 	max_perf = READ_ONCE(cpudata->highest_perf);
1566 	min_perf = READ_ONCE(cpudata->lowest_perf);
1567 	max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
1568 	min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
1569 
1570 	if (min_limit_perf < min_perf)
1571 		min_limit_perf = min_perf;
1572 
1573 	if (max_limit_perf < min_limit_perf)
1574 		max_limit_perf = min_limit_perf;
1575 
1576 	WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
1577 	WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
1578 
1579 	max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
1580 			cpudata->max_limit_perf);
1581 	min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
1582 			cpudata->max_limit_perf);
1583 	value = READ_ONCE(cpudata->cppc_req_cached);
1584 
1585 	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
1586 		min_perf = max_perf;
1587 
1588 	/* Initial min/max values for CPPC Performance Controls Register */
1589 	value &= ~AMD_CPPC_MIN_PERF(~0L);
1590 	value |= AMD_CPPC_MIN_PERF(min_perf);
1591 
1592 	value &= ~AMD_CPPC_MAX_PERF(~0L);
1593 	value |= AMD_CPPC_MAX_PERF(max_perf);
1594 
1595 	/* CPPC EPP feature require to set zero to the desire perf bit */
1596 	value &= ~AMD_CPPC_DES_PERF(~0L);
1597 	value |= AMD_CPPC_DES_PERF(0);
1598 
1599 	cpudata->epp_policy = cpudata->policy;
1600 
1601 	/* Get BIOS pre-defined epp value */
1602 	epp = amd_pstate_get_epp(cpudata, value);
1603 	if (epp < 0) {
1604 		/**
1605 		 * This return value can only be negative for shared_memory
1606 		 * systems where EPP register read/write not supported.
1607 		 */
1608 		return;
1609 	}
1610 
1611 	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
1612 		epp = 0;
1613 
1614 	/* Set initial EPP value */
1615 	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
1616 		value &= ~GENMASK_ULL(31, 24);
1617 		value |= (u64)epp << 24;
1618 	}
1619 
1620 	WRITE_ONCE(cpudata->cppc_req_cached, value);
1621 	amd_pstate_set_epp(cpudata, epp);
1622 }
1623 
1624 static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
1625 {
1626 	struct amd_cpudata *cpudata = policy->driver_data;
1627 
1628 	if (!policy->cpuinfo.max_freq)
1629 		return -ENODEV;
1630 
1631 	pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
1632 				policy->cpuinfo.max_freq, policy->max);
1633 
1634 	cpudata->policy = policy->policy;
1635 
1636 	amd_pstate_epp_update_limit(policy);
1637 
1638 	/*
1639 	 * policy->cur is never updated with the amd_pstate_epp driver, but it
1640 	 * is used as a stale frequency value. So, keep it within limits.
1641 	 */
1642 	policy->cur = policy->min;
1643 
1644 	return 0;
1645 }
1646 
1647 static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata)
1648 {
1649 	struct cppc_perf_ctrls perf_ctrls;
1650 	u64 value, max_perf;
1651 	int ret;
1652 
1653 	ret = amd_pstate_enable(true);
1654 	if (ret)
1655 		pr_err("failed to enable amd pstate during resume, return %d\n", ret);
1656 
1657 	value = READ_ONCE(cpudata->cppc_req_cached);
1658 	max_perf = READ_ONCE(cpudata->highest_perf);
1659 
1660 	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
1661 		wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
1662 	} else {
1663 		perf_ctrls.max_perf = max_perf;
1664 		perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached);
1665 		cppc_set_perf(cpudata->cpu, &perf_ctrls);
1666 	}
1667 }
1668 
1669 static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy)
1670 {
1671 	struct amd_cpudata *cpudata = policy->driver_data;
1672 
1673 	pr_debug("AMD CPU Core %d going online\n", cpudata->cpu);
1674 
1675 	if (cppc_state == AMD_PSTATE_ACTIVE) {
1676 		amd_pstate_epp_reenable(cpudata);
1677 		cpudata->suspended = false;
1678 	}
1679 
1680 	return 0;
1681 }
1682 
1683 static void amd_pstate_epp_offline(struct cpufreq_policy *policy)
1684 {
1685 	struct amd_cpudata *cpudata = policy->driver_data;
1686 	struct cppc_perf_ctrls perf_ctrls;
1687 	int min_perf;
1688 	u64 value;
1689 
1690 	min_perf = READ_ONCE(cpudata->lowest_perf);
1691 	value = READ_ONCE(cpudata->cppc_req_cached);
1692 
1693 	mutex_lock(&amd_pstate_limits_lock);
1694 	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
1695 		cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN;
1696 
1697 		/* Set max perf same as min perf */
1698 		value &= ~AMD_CPPC_MAX_PERF(~0L);
1699 		value |= AMD_CPPC_MAX_PERF(min_perf);
1700 		value &= ~AMD_CPPC_MIN_PERF(~0L);
1701 		value |= AMD_CPPC_MIN_PERF(min_perf);
1702 		wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
1703 	} else {
1704 		perf_ctrls.desired_perf = 0;
1705 		perf_ctrls.max_perf = min_perf;
1706 		perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE);
1707 		cppc_set_perf(cpudata->cpu, &perf_ctrls);
1708 	}
1709 	mutex_unlock(&amd_pstate_limits_lock);
1710 }
1711 
1712 static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
1713 {
1714 	struct amd_cpudata *cpudata = policy->driver_data;
1715 
1716 	pr_debug("AMD CPU Core %d going offline\n", cpudata->cpu);
1717 
1718 	if (cpudata->suspended)
1719 		return 0;
1720 
1721 	if (cppc_state == AMD_PSTATE_ACTIVE)
1722 		amd_pstate_epp_offline(policy);
1723 
1724 	return 0;
1725 }
1726 
1727 static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy)
1728 {
1729 	cpufreq_verify_within_cpu_limits(policy);
1730 	pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min);
1731 	return 0;
1732 }
1733 
1734 static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)
1735 {
1736 	struct amd_cpudata *cpudata = policy->driver_data;
1737 	int ret;
1738 
1739 	/* avoid suspending when EPP is not enabled */
1740 	if (cppc_state != AMD_PSTATE_ACTIVE)
1741 		return 0;
1742 
1743 	/* set this flag to avoid setting core offline*/
1744 	cpudata->suspended = true;
1745 
1746 	/* disable CPPC in lowlevel firmware */
1747 	ret = amd_pstate_enable(false);
1748 	if (ret)
1749 		pr_err("failed to suspend, return %d\n", ret);
1750 
1751 	return 0;
1752 }
1753 
1754 static int amd_pstate_epp_resume(struct cpufreq_policy *policy)
1755 {
1756 	struct amd_cpudata *cpudata = policy->driver_data;
1757 
1758 	if (cpudata->suspended) {
1759 		mutex_lock(&amd_pstate_limits_lock);
1760 
1761 		/* enable amd pstate from suspend state*/
1762 		amd_pstate_epp_reenable(cpudata);
1763 
1764 		mutex_unlock(&amd_pstate_limits_lock);
1765 
1766 		cpudata->suspended = false;
1767 	}
1768 
1769 	return 0;
1770 }
1771 
1772 static struct cpufreq_driver amd_pstate_driver = {
1773 	.flags		= CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
1774 	.verify		= amd_pstate_verify,
1775 	.target		= amd_pstate_target,
1776 	.fast_switch    = amd_pstate_fast_switch,
1777 	.init		= amd_pstate_cpu_init,
1778 	.exit		= amd_pstate_cpu_exit,
1779 	.suspend	= amd_pstate_cpu_suspend,
1780 	.resume		= amd_pstate_cpu_resume,
1781 	.set_boost	= amd_pstate_set_boost,
1782 	.update_limits	= amd_pstate_update_limits,
1783 	.name		= "amd-pstate",
1784 	.attr		= amd_pstate_attr,
1785 };
1786 
1787 static struct cpufreq_driver amd_pstate_epp_driver = {
1788 	.flags		= CPUFREQ_CONST_LOOPS,
1789 	.verify		= amd_pstate_epp_verify_policy,
1790 	.setpolicy	= amd_pstate_epp_set_policy,
1791 	.init		= amd_pstate_epp_cpu_init,
1792 	.exit		= amd_pstate_epp_cpu_exit,
1793 	.offline	= amd_pstate_epp_cpu_offline,
1794 	.online		= amd_pstate_epp_cpu_online,
1795 	.suspend	= amd_pstate_epp_suspend,
1796 	.resume		= amd_pstate_epp_resume,
1797 	.update_limits	= amd_pstate_update_limits,
1798 	.set_boost	= amd_pstate_set_boost,
1799 	.name		= "amd-pstate-epp",
1800 	.attr		= amd_pstate_epp_attr,
1801 };
1802 
1803 static int __init amd_pstate_set_driver(int mode_idx)
1804 {
1805 	if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
1806 		cppc_state = mode_idx;
1807 		if (cppc_state == AMD_PSTATE_DISABLE)
1808 			pr_info("driver is explicitly disabled\n");
1809 
1810 		if (cppc_state == AMD_PSTATE_ACTIVE)
1811 			current_pstate_driver = &amd_pstate_epp_driver;
1812 
1813 		if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
1814 			current_pstate_driver = &amd_pstate_driver;
1815 
1816 		return 0;
1817 	}
1818 
1819 	return -EINVAL;
1820 }
1821 
1822 /**
1823  * CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F.
1824  * show the debug message that helps to check if the CPU has CPPC support for loading issue.
1825  */
1826 static bool amd_cppc_supported(void)
1827 {
1828 	struct cpuinfo_x86 *c = &cpu_data(0);
1829 	bool warn = false;
1830 
1831 	if ((boot_cpu_data.x86 == 0x17) && (boot_cpu_data.x86_model < 0x30)) {
1832 		pr_debug_once("CPPC feature is not supported by the processor\n");
1833 		return false;
1834 	}
1835 
1836 	/*
1837 	 * If the CPPC feature is disabled in the BIOS for processors that support MSR-based CPPC,
1838 	 * the AMD Pstate driver may not function correctly.
1839 	 * Check the CPPC flag and display a warning message if the platform supports CPPC.
1840 	 * Note: below checking code will not abort the driver registeration process because of
1841 	 * the code is added for debugging purposes.
1842 	 */
1843 	if (!cpu_feature_enabled(X86_FEATURE_CPPC)) {
1844 		if (cpu_feature_enabled(X86_FEATURE_ZEN1) || cpu_feature_enabled(X86_FEATURE_ZEN2)) {
1845 			if (c->x86_model > 0x60 && c->x86_model < 0xaf)
1846 				warn = true;
1847 		} else if (cpu_feature_enabled(X86_FEATURE_ZEN3) || cpu_feature_enabled(X86_FEATURE_ZEN4)) {
1848 			if ((c->x86_model > 0x10 && c->x86_model < 0x1F) ||
1849 					(c->x86_model > 0x40 && c->x86_model < 0xaf))
1850 				warn = true;
1851 		} else if (cpu_feature_enabled(X86_FEATURE_ZEN5)) {
1852 			warn = true;
1853 		}
1854 	}
1855 
1856 	if (warn)
1857 		pr_warn_once("The CPPC feature is supported but currently disabled by the BIOS.\n"
1858 					"Please enable it if your BIOS has the CPPC option.\n");
1859 	return true;
1860 }
1861 
1862 static int __init amd_pstate_init(void)
1863 {
1864 	struct device *dev_root;
1865 	int ret;
1866 
1867 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
1868 		return -ENODEV;
1869 
1870 	/* show debug message only if CPPC is not supported */
1871 	if (!amd_cppc_supported())
1872 		return -EOPNOTSUPP;
1873 
1874 	/* show warning message when BIOS broken or ACPI disabled */
1875 	if (!acpi_cpc_valid()) {
1876 		pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n");
1877 		return -ENODEV;
1878 	}
1879 
1880 	/* don't keep reloading if cpufreq_driver exists */
1881 	if (cpufreq_get_current_driver())
1882 		return -EEXIST;
1883 
1884 	quirks = NULL;
1885 
1886 	/* check if this machine need CPPC quirks */
1887 	dmi_check_system(amd_pstate_quirks_table);
1888 
1889 	/*
1890 	* determine the driver mode from the command line or kernel config.
1891 	* If no command line input is provided, cppc_state will be AMD_PSTATE_UNDEFINED.
1892 	* command line options will override the kernel config settings.
1893 	*/
1894 
1895 	if (cppc_state == AMD_PSTATE_UNDEFINED) {
1896 		/* Disable on the following configs by default:
1897 		 * 1. Undefined platforms
1898 		 * 2. Server platforms
1899 		 */
1900 		if (amd_pstate_acpi_pm_profile_undefined() ||
1901 		    amd_pstate_acpi_pm_profile_server()) {
1902 			pr_info("driver load is disabled, boot with specific mode to enable this\n");
1903 			return -ENODEV;
1904 		}
1905 		/* get driver mode from kernel config option [1:4] */
1906 		cppc_state = CONFIG_X86_AMD_PSTATE_DEFAULT_MODE;
1907 	}
1908 
1909 	switch (cppc_state) {
1910 	case AMD_PSTATE_DISABLE:
1911 		pr_info("driver load is disabled, boot with specific mode to enable this\n");
1912 		return -ENODEV;
1913 	case AMD_PSTATE_PASSIVE:
1914 	case AMD_PSTATE_ACTIVE:
1915 	case AMD_PSTATE_GUIDED:
1916 		ret = amd_pstate_set_driver(cppc_state);
1917 		if (ret)
1918 			return ret;
1919 		break;
1920 	default:
1921 		return -EINVAL;
1922 	}
1923 
1924 	/* capability check */
1925 	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
1926 		pr_debug("AMD CPPC MSR based functionality is supported\n");
1927 		if (cppc_state != AMD_PSTATE_ACTIVE)
1928 			current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
1929 	} else {
1930 		pr_debug("AMD CPPC shared memory based functionality is supported\n");
1931 		static_call_update(amd_pstate_enable, cppc_enable);
1932 		static_call_update(amd_pstate_init_perf, cppc_init_perf);
1933 		static_call_update(amd_pstate_update_perf, cppc_update_perf);
1934 	}
1935 
1936 	/* enable amd pstate feature */
1937 	ret = amd_pstate_enable(true);
1938 	if (ret) {
1939 		pr_err("failed to enable driver mode(%d)\n", cppc_state);
1940 		return ret;
1941 	}
1942 
1943 	ret = cpufreq_register_driver(current_pstate_driver);
1944 	if (ret) {
1945 		pr_err("failed to register with return %d\n", ret);
1946 		goto disable_driver;
1947 	}
1948 
1949 	dev_root = bus_get_dev_root(&cpu_subsys);
1950 	if (dev_root) {
1951 		ret = sysfs_create_group(&dev_root->kobj, &amd_pstate_global_attr_group);
1952 		put_device(dev_root);
1953 		if (ret) {
1954 			pr_err("sysfs attribute export failed with error %d.\n", ret);
1955 			goto global_attr_free;
1956 		}
1957 	}
1958 
1959 	return ret;
1960 
1961 global_attr_free:
1962 	cpufreq_unregister_driver(current_pstate_driver);
1963 disable_driver:
1964 	amd_pstate_enable(false);
1965 	return ret;
1966 }
1967 device_initcall(amd_pstate_init);
1968 
1969 static int __init amd_pstate_param(char *str)
1970 {
1971 	size_t size;
1972 	int mode_idx;
1973 
1974 	if (!str)
1975 		return -EINVAL;
1976 
1977 	size = strlen(str);
1978 	mode_idx = get_mode_idx_from_str(str, size);
1979 
1980 	return amd_pstate_set_driver(mode_idx);
1981 }
1982 
1983 static int __init amd_prefcore_param(char *str)
1984 {
1985 	if (!strcmp(str, "disable"))
1986 		amd_pstate_prefcore = false;
1987 
1988 	return 0;
1989 }
1990 
1991 early_param("amd_pstate", amd_pstate_param);
1992 early_param("amd_prefcore", amd_prefcore_param);
1993 
1994 MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
1995 MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");
1996