xref: /linux/drivers/cpufreq/amd-pstate.c (revision f9aa1fb9f8c0542f5f6e6e620de320995d5622ad)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * amd-pstate.c - AMD Processor P-state Frequency Driver
4  *
5  * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved.
6  *
7  * Author: Huang Rui <ray.huang@amd.com>
8  *
9  * AMD P-State introduces a new CPU performance scaling design for AMD
10  * processors using the ACPI Collaborative Performance and Power Control (CPPC)
11  * feature which works with the AMD SMU firmware providing a finer grained
12  * frequency control range. It is to replace the legacy ACPI P-States control,
13  * allows a flexible, low-latency interface for the Linux kernel to directly
14  * communicate the performance hints to hardware.
15  *
16  * AMD P-State is supported on recent AMD Zen base CPU series include some of
17  * Zen2 and Zen3 processors. _CPC needs to be present in the ACPI tables of AMD
18  * P-State supported system. And there are two types of hardware implementations
19  * for AMD P-State: 1) Full MSR Solution and 2) Shared Memory Solution.
20  * X86_FEATURE_CPPC CPU feature flag is used to distinguish the different types.
21  */
22 
23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
24 
25 #include <linux/kernel.h>
26 #include <linux/module.h>
27 #include <linux/init.h>
28 #include <linux/smp.h>
29 #include <linux/sched.h>
30 #include <linux/cpufreq.h>
31 #include <linux/compiler.h>
32 #include <linux/dmi.h>
33 #include <linux/slab.h>
34 #include <linux/acpi.h>
35 #include <linux/io.h>
36 #include <linux/delay.h>
37 #include <linux/uaccess.h>
38 #include <linux/static_call.h>
39 #include <linux/topology.h>
40 
41 #include <acpi/processor.h>
42 #include <acpi/cppc_acpi.h>
43 
44 #include <asm/msr.h>
45 #include <asm/processor.h>
46 #include <asm/cpufeature.h>
47 #include <asm/cpu_device_id.h>
48 
49 #include "amd-pstate.h"
50 #include "amd-pstate-trace.h"
51 
52 #define AMD_PSTATE_TRANSITION_LATENCY	20000
53 #define AMD_PSTATE_TRANSITION_DELAY	1000
54 #define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600
55 
56 #define AMD_CPPC_EPP_PERFORMANCE		0x00
57 #define AMD_CPPC_EPP_BALANCE_PERFORMANCE	0x80
58 #define AMD_CPPC_EPP_BALANCE_POWERSAVE		0xBF
59 #define AMD_CPPC_EPP_POWERSAVE			0xFF
60 
61 static const char * const amd_pstate_mode_string[] = {
62 	[AMD_PSTATE_UNDEFINED]   = "undefined",
63 	[AMD_PSTATE_DISABLE]     = "disable",
64 	[AMD_PSTATE_PASSIVE]     = "passive",
65 	[AMD_PSTATE_ACTIVE]      = "active",
66 	[AMD_PSTATE_GUIDED]      = "guided",
67 	NULL,
68 };
69 
70 const char *amd_pstate_get_mode_string(enum amd_pstate_mode mode)
71 {
72 	if (mode < 0 || mode >= AMD_PSTATE_MAX)
73 		return NULL;
74 	return amd_pstate_mode_string[mode];
75 }
76 EXPORT_SYMBOL_GPL(amd_pstate_get_mode_string);
77 
78 struct quirk_entry {
79 	u32 nominal_freq;
80 	u32 lowest_freq;
81 };
82 
83 static struct cpufreq_driver *current_pstate_driver;
84 static struct cpufreq_driver amd_pstate_driver;
85 static struct cpufreq_driver amd_pstate_epp_driver;
86 static int cppc_state = AMD_PSTATE_UNDEFINED;
87 static bool cppc_enabled;
88 static bool amd_pstate_prefcore = true;
89 static struct quirk_entry *quirks;
90 
91 /*
92  * AMD Energy Preference Performance (EPP)
93  * The EPP is used in the CCLK DPM controller to drive
94  * the frequency that a core is going to operate during
95  * short periods of activity. EPP values will be utilized for
96  * different OS profiles (balanced, performance, power savings)
97  * display strings corresponding to EPP index in the
98  * energy_perf_strings[]
99  *	index		String
100  *-------------------------------------
101  *	0		default
102  *	1		performance
103  *	2		balance_performance
104  *	3		balance_power
105  *	4		power
106  */
107 enum energy_perf_value_index {
108 	EPP_INDEX_DEFAULT = 0,
109 	EPP_INDEX_PERFORMANCE,
110 	EPP_INDEX_BALANCE_PERFORMANCE,
111 	EPP_INDEX_BALANCE_POWERSAVE,
112 	EPP_INDEX_POWERSAVE,
113 };
114 
115 static const char * const energy_perf_strings[] = {
116 	[EPP_INDEX_DEFAULT] = "default",
117 	[EPP_INDEX_PERFORMANCE] = "performance",
118 	[EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance",
119 	[EPP_INDEX_BALANCE_POWERSAVE] = "balance_power",
120 	[EPP_INDEX_POWERSAVE] = "power",
121 	NULL
122 };
123 
124 static unsigned int epp_values[] = {
125 	[EPP_INDEX_DEFAULT] = 0,
126 	[EPP_INDEX_PERFORMANCE] = AMD_CPPC_EPP_PERFORMANCE,
127 	[EPP_INDEX_BALANCE_PERFORMANCE] = AMD_CPPC_EPP_BALANCE_PERFORMANCE,
128 	[EPP_INDEX_BALANCE_POWERSAVE] = AMD_CPPC_EPP_BALANCE_POWERSAVE,
129 	[EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE,
130  };
131 
132 typedef int (*cppc_mode_transition_fn)(int);
133 
134 static struct quirk_entry quirk_amd_7k62 = {
135 	.nominal_freq = 2600,
136 	.lowest_freq = 550,
137 };
138 
139 static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi)
140 {
141 	/**
142 	 * match the broken bios for family 17h processor support CPPC V2
143 	 * broken BIOS lack of nominal_freq and lowest_freq capabilities
144 	 * definition in ACPI tables
145 	 */
146 	if (cpu_feature_enabled(X86_FEATURE_ZEN2)) {
147 		quirks = dmi->driver_data;
148 		pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident);
149 		return 1;
150 	}
151 
152 	return 0;
153 }
154 
155 static const struct dmi_system_id amd_pstate_quirks_table[] __initconst = {
156 	{
157 		.callback = dmi_matched_7k62_bios_bug,
158 		.ident = "AMD EPYC 7K62",
159 		.matches = {
160 			DMI_MATCH(DMI_BIOS_VERSION, "5.14"),
161 			DMI_MATCH(DMI_BIOS_RELEASE, "12/12/2019"),
162 		},
163 		.driver_data = &quirk_amd_7k62,
164 	},
165 	{}
166 };
167 MODULE_DEVICE_TABLE(dmi, amd_pstate_quirks_table);
168 
169 static inline int get_mode_idx_from_str(const char *str, size_t size)
170 {
171 	int i;
172 
173 	for (i=0; i < AMD_PSTATE_MAX; i++) {
174 		if (!strncmp(str, amd_pstate_mode_string[i], size))
175 			return i;
176 	}
177 	return -EINVAL;
178 }
179 
180 static DEFINE_MUTEX(amd_pstate_limits_lock);
181 static DEFINE_MUTEX(amd_pstate_driver_lock);
182 
183 static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached)
184 {
185 	u64 epp;
186 	int ret;
187 
188 	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
189 		if (!cppc_req_cached) {
190 			epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
191 					&cppc_req_cached);
192 			if (epp)
193 				return epp;
194 		}
195 		epp = (cppc_req_cached >> 24) & 0xFF;
196 	} else {
197 		ret = cppc_get_epp_perf(cpudata->cpu, &epp);
198 		if (ret < 0) {
199 			pr_debug("Could not retrieve energy perf value (%d)\n", ret);
200 			return -EIO;
201 		}
202 	}
203 
204 	return (s16)(epp & 0xff);
205 }
206 
207 static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata)
208 {
209 	s16 epp;
210 	int index = -EINVAL;
211 
212 	epp = amd_pstate_get_epp(cpudata, 0);
213 	if (epp < 0)
214 		return epp;
215 
216 	switch (epp) {
217 	case AMD_CPPC_EPP_PERFORMANCE:
218 		index = EPP_INDEX_PERFORMANCE;
219 		break;
220 	case AMD_CPPC_EPP_BALANCE_PERFORMANCE:
221 		index = EPP_INDEX_BALANCE_PERFORMANCE;
222 		break;
223 	case AMD_CPPC_EPP_BALANCE_POWERSAVE:
224 		index = EPP_INDEX_BALANCE_POWERSAVE;
225 		break;
226 	case AMD_CPPC_EPP_POWERSAVE:
227 		index = EPP_INDEX_POWERSAVE;
228 		break;
229 	default:
230 		break;
231 	}
232 
233 	return index;
234 }
235 
236 static void msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
237 			       u32 des_perf, u32 max_perf, bool fast_switch)
238 {
239 	if (fast_switch)
240 		wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached));
241 	else
242 		wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
243 			      READ_ONCE(cpudata->cppc_req_cached));
244 }
245 
246 DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf);
247 
248 static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
249 					  u32 min_perf, u32 des_perf,
250 					  u32 max_perf, bool fast_switch)
251 {
252 	static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf,
253 					    max_perf, fast_switch);
254 }
255 
256 static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
257 {
258 	int ret;
259 	struct cppc_perf_ctrls perf_ctrls;
260 
261 	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
262 		u64 value = READ_ONCE(cpudata->cppc_req_cached);
263 
264 		value &= ~GENMASK_ULL(31, 24);
265 		value |= (u64)epp << 24;
266 		WRITE_ONCE(cpudata->cppc_req_cached, value);
267 
268 		ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
269 		if (!ret)
270 			cpudata->epp_cached = epp;
271 	} else {
272 		amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U,
273 					     cpudata->max_limit_perf, false);
274 
275 		perf_ctrls.energy_perf = epp;
276 		ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
277 		if (ret) {
278 			pr_debug("failed to set energy perf value (%d)\n", ret);
279 			return ret;
280 		}
281 		cpudata->epp_cached = epp;
282 	}
283 
284 	return ret;
285 }
286 
287 static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
288 		int pref_index)
289 {
290 	int epp = -EINVAL;
291 	int ret;
292 
293 	if (!pref_index)
294 		epp = cpudata->epp_default;
295 
296 	if (epp == -EINVAL)
297 		epp = epp_values[pref_index];
298 
299 	if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
300 		pr_debug("EPP cannot be set under performance policy\n");
301 		return -EBUSY;
302 	}
303 
304 	ret = amd_pstate_set_epp(cpudata, epp);
305 
306 	return ret;
307 }
308 
309 static inline int msr_cppc_enable(bool enable)
310 {
311 	int ret, cpu;
312 	unsigned long logical_proc_id_mask = 0;
313 
314        /*
315         * MSR_AMD_CPPC_ENABLE is write-once, once set it cannot be cleared.
316         */
317 	if (!enable)
318 		return 0;
319 
320 	if (enable == cppc_enabled)
321 		return 0;
322 
323 	for_each_present_cpu(cpu) {
324 		unsigned long logical_id = topology_logical_package_id(cpu);
325 
326 		if (test_bit(logical_id, &logical_proc_id_mask))
327 			continue;
328 
329 		set_bit(logical_id, &logical_proc_id_mask);
330 
331 		ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE,
332 				enable);
333 		if (ret)
334 			return ret;
335 	}
336 
337 	cppc_enabled = enable;
338 	return 0;
339 }
340 
341 static int shmem_cppc_enable(bool enable)
342 {
343 	int cpu, ret = 0;
344 	struct cppc_perf_ctrls perf_ctrls;
345 
346 	if (enable == cppc_enabled)
347 		return 0;
348 
349 	for_each_present_cpu(cpu) {
350 		ret = cppc_set_enable(cpu, enable);
351 		if (ret)
352 			return ret;
353 
354 		/* Enable autonomous mode for EPP */
355 		if (cppc_state == AMD_PSTATE_ACTIVE) {
356 			/* Set desired perf as zero to allow EPP firmware control */
357 			perf_ctrls.desired_perf = 0;
358 			ret = cppc_set_perf(cpu, &perf_ctrls);
359 			if (ret)
360 				return ret;
361 		}
362 	}
363 
364 	cppc_enabled = enable;
365 	return ret;
366 }
367 
368 DEFINE_STATIC_CALL(amd_pstate_cppc_enable, msr_cppc_enable);
369 
370 static inline int amd_pstate_cppc_enable(bool enable)
371 {
372 	return static_call(amd_pstate_cppc_enable)(enable);
373 }
374 
375 static int msr_init_perf(struct amd_cpudata *cpudata)
376 {
377 	u64 cap1, numerator;
378 
379 	int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
380 				     &cap1);
381 	if (ret)
382 		return ret;
383 
384 	ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator);
385 	if (ret)
386 		return ret;
387 
388 	WRITE_ONCE(cpudata->highest_perf, numerator);
389 	WRITE_ONCE(cpudata->max_limit_perf, numerator);
390 	WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
391 	WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
392 	WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
393 	WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1));
394 	WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1));
395 	return 0;
396 }
397 
398 static int shmem_init_perf(struct amd_cpudata *cpudata)
399 {
400 	struct cppc_perf_caps cppc_perf;
401 	u64 numerator;
402 
403 	int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
404 	if (ret)
405 		return ret;
406 
407 	ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator);
408 	if (ret)
409 		return ret;
410 
411 	WRITE_ONCE(cpudata->highest_perf, numerator);
412 	WRITE_ONCE(cpudata->max_limit_perf, numerator);
413 	WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf);
414 	WRITE_ONCE(cpudata->lowest_nonlinear_perf,
415 		   cppc_perf.lowest_nonlinear_perf);
416 	WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
417 	WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf);
418 	WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf);
419 
420 	if (cppc_state == AMD_PSTATE_ACTIVE)
421 		return 0;
422 
423 	ret = cppc_get_auto_sel_caps(cpudata->cpu, &cppc_perf);
424 	if (ret) {
425 		pr_warn("failed to get auto_sel, ret: %d\n", ret);
426 		return 0;
427 	}
428 
429 	ret = cppc_set_auto_sel(cpudata->cpu,
430 			(cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1);
431 
432 	if (ret)
433 		pr_warn("failed to set auto_sel, ret: %d\n", ret);
434 
435 	return ret;
436 }
437 
438 DEFINE_STATIC_CALL(amd_pstate_init_perf, msr_init_perf);
439 
440 static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata)
441 {
442 	return static_call(amd_pstate_init_perf)(cpudata);
443 }
444 
445 static void shmem_update_perf(struct amd_cpudata *cpudata,
446 			     u32 min_perf, u32 des_perf,
447 			     u32 max_perf, bool fast_switch)
448 {
449 	struct cppc_perf_ctrls perf_ctrls;
450 
451 	perf_ctrls.max_perf = max_perf;
452 	perf_ctrls.min_perf = min_perf;
453 	perf_ctrls.desired_perf = des_perf;
454 
455 	cppc_set_perf(cpudata->cpu, &perf_ctrls);
456 }
457 
458 static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
459 {
460 	u64 aperf, mperf, tsc;
461 	unsigned long flags;
462 
463 	local_irq_save(flags);
464 	rdmsrl(MSR_IA32_APERF, aperf);
465 	rdmsrl(MSR_IA32_MPERF, mperf);
466 	tsc = rdtsc();
467 
468 	if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) {
469 		local_irq_restore(flags);
470 		return false;
471 	}
472 
473 	local_irq_restore(flags);
474 
475 	cpudata->cur.aperf = aperf;
476 	cpudata->cur.mperf = mperf;
477 	cpudata->cur.tsc =  tsc;
478 	cpudata->cur.aperf -= cpudata->prev.aperf;
479 	cpudata->cur.mperf -= cpudata->prev.mperf;
480 	cpudata->cur.tsc -= cpudata->prev.tsc;
481 
482 	cpudata->prev.aperf = aperf;
483 	cpudata->prev.mperf = mperf;
484 	cpudata->prev.tsc = tsc;
485 
486 	cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz), cpudata->cur.mperf);
487 
488 	return true;
489 }
490 
491 static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
492 			      u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags)
493 {
494 	unsigned long max_freq;
495 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu);
496 	u64 prev = READ_ONCE(cpudata->cppc_req_cached);
497 	u32 nominal_perf = READ_ONCE(cpudata->nominal_perf);
498 	u64 value = prev;
499 
500 	min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
501 			cpudata->max_limit_perf);
502 	max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
503 			cpudata->max_limit_perf);
504 	des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
505 
506 	max_freq = READ_ONCE(cpudata->max_limit_freq);
507 	policy->cur = div_u64(des_perf * max_freq, max_perf);
508 
509 	if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) {
510 		min_perf = des_perf;
511 		des_perf = 0;
512 	}
513 
514 	value &= ~AMD_CPPC_MIN_PERF(~0L);
515 	value |= AMD_CPPC_MIN_PERF(min_perf);
516 
517 	value &= ~AMD_CPPC_DES_PERF(~0L);
518 	value |= AMD_CPPC_DES_PERF(des_perf);
519 
520 	/* limit the max perf when core performance boost feature is disabled */
521 	if (!cpudata->boost_supported)
522 		max_perf = min_t(unsigned long, nominal_perf, max_perf);
523 
524 	value &= ~AMD_CPPC_MAX_PERF(~0L);
525 	value |= AMD_CPPC_MAX_PERF(max_perf);
526 
527 	if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) {
528 		trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq,
529 			cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc,
530 				cpudata->cpu, (value != prev), fast_switch);
531 	}
532 
533 	if (value == prev)
534 		goto cpufreq_policy_put;
535 
536 	WRITE_ONCE(cpudata->cppc_req_cached, value);
537 
538 	amd_pstate_update_perf(cpudata, min_perf, des_perf,
539 			       max_perf, fast_switch);
540 
541 cpufreq_policy_put:
542 	cpufreq_cpu_put(policy);
543 }
544 
545 static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
546 {
547 	/*
548 	 * Initialize lower frequency limit (i.e.policy->min) with
549 	 * lowest_nonlinear_frequency which is the most energy efficient
550 	 * frequency. Override the initial value set by cpufreq core and
551 	 * amd-pstate qos_requests.
552 	 */
553 	if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) {
554 		struct cpufreq_policy *policy = cpufreq_cpu_get(policy_data->cpu);
555 		struct amd_cpudata *cpudata;
556 
557 		if (!policy)
558 			return -EINVAL;
559 
560 		cpudata = policy->driver_data;
561 		policy_data->min = cpudata->lowest_nonlinear_freq;
562 		cpufreq_cpu_put(policy);
563 	}
564 
565 	cpufreq_verify_within_cpu_limits(policy_data);
566 	pr_debug("policy_max =%d, policy_min=%d\n", policy_data->max, policy_data->min);
567 
568 	return 0;
569 }
570 
571 static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
572 {
573 	u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf, max_freq;
574 	struct amd_cpudata *cpudata = policy->driver_data;
575 
576 	max_perf = READ_ONCE(cpudata->highest_perf);
577 	max_freq = READ_ONCE(cpudata->max_freq);
578 	max_limit_perf = div_u64(policy->max * max_perf, max_freq);
579 	min_limit_perf = div_u64(policy->min * max_perf, max_freq);
580 
581 	lowest_perf = READ_ONCE(cpudata->lowest_perf);
582 	if (min_limit_perf < lowest_perf)
583 		min_limit_perf = lowest_perf;
584 
585 	if (max_limit_perf < min_limit_perf)
586 		max_limit_perf = min_limit_perf;
587 
588 	WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
589 	WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
590 	WRITE_ONCE(cpudata->max_limit_freq, policy->max);
591 	WRITE_ONCE(cpudata->min_limit_freq, policy->min);
592 
593 	return 0;
594 }
595 
596 static int amd_pstate_update_freq(struct cpufreq_policy *policy,
597 				  unsigned int target_freq, bool fast_switch)
598 {
599 	struct cpufreq_freqs freqs;
600 	struct amd_cpudata *cpudata = policy->driver_data;
601 	unsigned long max_perf, min_perf, des_perf, cap_perf;
602 
603 	if (!cpudata->max_freq)
604 		return -ENODEV;
605 
606 	if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
607 		amd_pstate_update_min_max_limit(policy);
608 
609 	cap_perf = READ_ONCE(cpudata->highest_perf);
610 	min_perf = READ_ONCE(cpudata->lowest_perf);
611 	max_perf = cap_perf;
612 
613 	freqs.old = policy->cur;
614 	freqs.new = target_freq;
615 
616 	des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf,
617 				     cpudata->max_freq);
618 
619 	WARN_ON(fast_switch && !policy->fast_switch_enabled);
620 	/*
621 	 * If fast_switch is desired, then there aren't any registered
622 	 * transition notifiers. See comment for
623 	 * cpufreq_enable_fast_switch().
624 	 */
625 	if (!fast_switch)
626 		cpufreq_freq_transition_begin(policy, &freqs);
627 
628 	amd_pstate_update(cpudata, min_perf, des_perf,
629 			max_perf, fast_switch, policy->governor->flags);
630 
631 	if (!fast_switch)
632 		cpufreq_freq_transition_end(policy, &freqs, false);
633 
634 	return 0;
635 }
636 
637 static int amd_pstate_target(struct cpufreq_policy *policy,
638 			     unsigned int target_freq,
639 			     unsigned int relation)
640 {
641 	return amd_pstate_update_freq(policy, target_freq, false);
642 }
643 
644 static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy,
645 				  unsigned int target_freq)
646 {
647 	if (!amd_pstate_update_freq(policy, target_freq, true))
648 		return target_freq;
649 	return policy->cur;
650 }
651 
652 static void amd_pstate_adjust_perf(unsigned int cpu,
653 				   unsigned long _min_perf,
654 				   unsigned long target_perf,
655 				   unsigned long capacity)
656 {
657 	unsigned long max_perf, min_perf, des_perf,
658 		      cap_perf, lowest_nonlinear_perf;
659 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
660 	struct amd_cpudata *cpudata;
661 
662 	if (!policy)
663 		return;
664 
665 	cpudata = policy->driver_data;
666 
667 	if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
668 		amd_pstate_update_min_max_limit(policy);
669 
670 
671 	cap_perf = READ_ONCE(cpudata->highest_perf);
672 	lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
673 
674 	des_perf = cap_perf;
675 	if (target_perf < capacity)
676 		des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity);
677 
678 	min_perf = READ_ONCE(cpudata->lowest_perf);
679 	if (_min_perf < capacity)
680 		min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity);
681 
682 	if (min_perf < lowest_nonlinear_perf)
683 		min_perf = lowest_nonlinear_perf;
684 
685 	max_perf = cap_perf;
686 	if (max_perf < min_perf)
687 		max_perf = min_perf;
688 
689 	des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
690 
691 	amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
692 			policy->governor->flags);
693 	cpufreq_cpu_put(policy);
694 }
695 
696 static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on)
697 {
698 	struct amd_cpudata *cpudata = policy->driver_data;
699 	u32 nominal_freq, max_freq;
700 	int ret = 0;
701 
702 	nominal_freq = READ_ONCE(cpudata->nominal_freq);
703 	max_freq = READ_ONCE(cpudata->max_freq);
704 
705 	if (on)
706 		policy->cpuinfo.max_freq = max_freq;
707 	else if (policy->cpuinfo.max_freq > nominal_freq * 1000)
708 		policy->cpuinfo.max_freq = nominal_freq * 1000;
709 
710 	policy->max = policy->cpuinfo.max_freq;
711 
712 	if (cppc_state == AMD_PSTATE_PASSIVE) {
713 		ret = freq_qos_update_request(&cpudata->req[1], policy->cpuinfo.max_freq);
714 		if (ret < 0)
715 			pr_debug("Failed to update freq constraint: CPU%d\n", cpudata->cpu);
716 	}
717 
718 	return ret < 0 ? ret : 0;
719 }
720 
721 static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
722 {
723 	struct amd_cpudata *cpudata = policy->driver_data;
724 	int ret;
725 
726 	if (!cpudata->boost_supported) {
727 		pr_err("Boost mode is not supported by this processor or SBIOS\n");
728 		return -EOPNOTSUPP;
729 	}
730 	mutex_lock(&amd_pstate_driver_lock);
731 	ret = amd_pstate_cpu_boost_update(policy, state);
732 	WRITE_ONCE(cpudata->boost_state, !ret ? state : false);
733 	policy->boost_enabled = !ret ? state : false;
734 	refresh_frequency_limits(policy);
735 	mutex_unlock(&amd_pstate_driver_lock);
736 
737 	return ret;
738 }
739 
740 static int amd_pstate_init_boost_support(struct amd_cpudata *cpudata)
741 {
742 	u64 boost_val;
743 	int ret = -1;
744 
745 	/*
746 	 * If platform has no CPB support or disable it, initialize current driver
747 	 * boost_enabled state to be false, it is not an error for cpufreq core to handle.
748 	 */
749 	if (!cpu_feature_enabled(X86_FEATURE_CPB)) {
750 		pr_debug_once("Boost CPB capabilities not present in the processor\n");
751 		ret = 0;
752 		goto exit_err;
753 	}
754 
755 	/* at least one CPU supports CPB, even if others fail later on to set up */
756 	current_pstate_driver->boost_enabled = true;
757 
758 	ret = rdmsrl_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val);
759 	if (ret) {
760 		pr_err_once("failed to read initial CPU boost state!\n");
761 		ret = -EIO;
762 		goto exit_err;
763 	}
764 
765 	if (!(boost_val & MSR_K7_HWCR_CPB_DIS))
766 		cpudata->boost_supported = true;
767 
768 	return 0;
769 
770 exit_err:
771 	cpudata->boost_supported = false;
772 	return ret;
773 }
774 
775 static void amd_perf_ctl_reset(unsigned int cpu)
776 {
777 	wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
778 }
779 
780 /*
781  * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks
782  * due to locking, so queue the work for later.
783  */
784 static void amd_pstste_sched_prefcore_workfn(struct work_struct *work)
785 {
786 	sched_set_itmt_support();
787 }
788 static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
789 
790 #define CPPC_MAX_PERF	U8_MAX
791 
792 static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
793 {
794 	/* user disabled or not detected */
795 	if (!amd_pstate_prefcore)
796 		return;
797 
798 	cpudata->hw_prefcore = true;
799 
800 	/*
801 	 * The priorities can be set regardless of whether or not
802 	 * sched_set_itmt_support(true) has been called and it is valid to
803 	 * update them at any time after it has been called.
804 	 */
805 	sched_set_itmt_core_prio((int)READ_ONCE(cpudata->highest_perf), cpudata->cpu);
806 
807 	schedule_work(&sched_prefcore_work);
808 }
809 
810 static void amd_pstate_update_limits(unsigned int cpu)
811 {
812 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
813 	struct amd_cpudata *cpudata;
814 	u32 prev_high = 0, cur_high = 0;
815 	int ret;
816 	bool highest_perf_changed = false;
817 
818 	if (!policy)
819 		return;
820 
821 	cpudata = policy->driver_data;
822 
823 	if (!amd_pstate_prefcore)
824 		return;
825 
826 	mutex_lock(&amd_pstate_driver_lock);
827 	ret = amd_get_highest_perf(cpu, &cur_high);
828 	if (ret)
829 		goto free_cpufreq_put;
830 
831 	prev_high = READ_ONCE(cpudata->prefcore_ranking);
832 	highest_perf_changed = (prev_high != cur_high);
833 	if (highest_perf_changed) {
834 		WRITE_ONCE(cpudata->prefcore_ranking, cur_high);
835 
836 		if (cur_high < CPPC_MAX_PERF)
837 			sched_set_itmt_core_prio((int)cur_high, cpu);
838 	}
839 
840 free_cpufreq_put:
841 	cpufreq_cpu_put(policy);
842 
843 	if (!highest_perf_changed)
844 		cpufreq_update_policy(cpu);
845 
846 	mutex_unlock(&amd_pstate_driver_lock);
847 }
848 
849 /*
850  * Get pstate transition delay time from ACPI tables that firmware set
851  * instead of using hardcode value directly.
852  */
853 static u32 amd_pstate_get_transition_delay_us(unsigned int cpu)
854 {
855 	u32 transition_delay_ns;
856 
857 	transition_delay_ns = cppc_get_transition_latency(cpu);
858 	if (transition_delay_ns == CPUFREQ_ETERNAL) {
859 		if (cpu_feature_enabled(X86_FEATURE_AMD_FAST_CPPC))
860 			return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY;
861 		else
862 			return AMD_PSTATE_TRANSITION_DELAY;
863 	}
864 
865 	return transition_delay_ns / NSEC_PER_USEC;
866 }
867 
868 /*
869  * Get pstate transition latency value from ACPI tables that firmware
870  * set instead of using hardcode value directly.
871  */
872 static u32 amd_pstate_get_transition_latency(unsigned int cpu)
873 {
874 	u32 transition_latency;
875 
876 	transition_latency = cppc_get_transition_latency(cpu);
877 	if (transition_latency  == CPUFREQ_ETERNAL)
878 		return AMD_PSTATE_TRANSITION_LATENCY;
879 
880 	return transition_latency;
881 }
882 
883 /*
884  * amd_pstate_init_freq: Initialize the max_freq, min_freq,
885  *                       nominal_freq and lowest_nonlinear_freq for
886  *                       the @cpudata object.
887  *
888  *  Requires: highest_perf, lowest_perf, nominal_perf and
889  *            lowest_nonlinear_perf members of @cpudata to be
890  *            initialized.
891  *
892  *  Returns 0 on success, non-zero value on failure.
893  */
894 static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
895 {
896 	int ret;
897 	u32 min_freq, max_freq;
898 	u32 nominal_perf, nominal_freq;
899 	u32 lowest_nonlinear_perf, lowest_nonlinear_freq;
900 	u32 boost_ratio, lowest_nonlinear_ratio;
901 	struct cppc_perf_caps cppc_perf;
902 
903 	ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
904 	if (ret)
905 		return ret;
906 
907 	if (quirks && quirks->lowest_freq)
908 		min_freq = quirks->lowest_freq * 1000;
909 	else
910 		min_freq = cppc_perf.lowest_freq * 1000;
911 
912 	if (quirks && quirks->nominal_freq)
913 		nominal_freq = quirks->nominal_freq ;
914 	else
915 		nominal_freq = cppc_perf.nominal_freq;
916 
917 	nominal_perf = READ_ONCE(cpudata->nominal_perf);
918 
919 	boost_ratio = div_u64(cpudata->highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
920 	max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000;
921 
922 	lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
923 	lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT,
924 					 nominal_perf);
925 	lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT) * 1000;
926 
927 	WRITE_ONCE(cpudata->min_freq, min_freq);
928 	WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq);
929 	WRITE_ONCE(cpudata->nominal_freq, nominal_freq);
930 	WRITE_ONCE(cpudata->max_freq, max_freq);
931 
932 	/**
933 	 * Below values need to be initialized correctly, otherwise driver will fail to load
934 	 * max_freq is calculated according to (nominal_freq * highest_perf)/nominal_perf
935 	 * lowest_nonlinear_freq is a value between [min_freq, nominal_freq]
936 	 * Check _CPC in ACPI table objects if any values are incorrect
937 	 */
938 	if (min_freq <= 0 || max_freq <= 0 || nominal_freq <= 0 || min_freq > max_freq) {
939 		pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n",
940 			min_freq, max_freq, nominal_freq * 1000);
941 		return -EINVAL;
942 	}
943 
944 	if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq * 1000) {
945 		pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n",
946 			lowest_nonlinear_freq, min_freq, nominal_freq * 1000);
947 		return -EINVAL;
948 	}
949 
950 	return 0;
951 }
952 
953 static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
954 {
955 	int min_freq, max_freq, ret;
956 	struct device *dev;
957 	struct amd_cpudata *cpudata;
958 
959 	/*
960 	 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
961 	 * which is ideal for initialization process.
962 	 */
963 	amd_perf_ctl_reset(policy->cpu);
964 	dev = get_cpu_device(policy->cpu);
965 	if (!dev)
966 		return -ENODEV;
967 
968 	cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
969 	if (!cpudata)
970 		return -ENOMEM;
971 
972 	cpudata->cpu = policy->cpu;
973 
974 	ret = amd_pstate_init_perf(cpudata);
975 	if (ret)
976 		goto free_cpudata1;
977 
978 	amd_pstate_init_prefcore(cpudata);
979 
980 	ret = amd_pstate_init_freq(cpudata);
981 	if (ret)
982 		goto free_cpudata1;
983 
984 	ret = amd_pstate_init_boost_support(cpudata);
985 	if (ret)
986 		goto free_cpudata1;
987 
988 	min_freq = READ_ONCE(cpudata->min_freq);
989 	max_freq = READ_ONCE(cpudata->max_freq);
990 
991 	policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu);
992 	policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu);
993 
994 	policy->min = min_freq;
995 	policy->max = max_freq;
996 
997 	policy->cpuinfo.min_freq = min_freq;
998 	policy->cpuinfo.max_freq = max_freq;
999 
1000 	policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
1001 
1002 	/* It will be updated by governor */
1003 	policy->cur = policy->cpuinfo.min_freq;
1004 
1005 	if (cpu_feature_enabled(X86_FEATURE_CPPC))
1006 		policy->fast_switch_possible = true;
1007 
1008 	ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0],
1009 				   FREQ_QOS_MIN, FREQ_QOS_MIN_DEFAULT_VALUE);
1010 	if (ret < 0) {
1011 		dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret);
1012 		goto free_cpudata1;
1013 	}
1014 
1015 	ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1],
1016 				   FREQ_QOS_MAX, policy->cpuinfo.max_freq);
1017 	if (ret < 0) {
1018 		dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret);
1019 		goto free_cpudata2;
1020 	}
1021 
1022 	cpudata->max_limit_freq = max_freq;
1023 	cpudata->min_limit_freq = min_freq;
1024 
1025 	policy->driver_data = cpudata;
1026 
1027 	if (!current_pstate_driver->adjust_perf)
1028 		current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
1029 
1030 	return 0;
1031 
1032 free_cpudata2:
1033 	freq_qos_remove_request(&cpudata->req[0]);
1034 free_cpudata1:
1035 	kfree(cpudata);
1036 	return ret;
1037 }
1038 
1039 static void amd_pstate_cpu_exit(struct cpufreq_policy *policy)
1040 {
1041 	struct amd_cpudata *cpudata = policy->driver_data;
1042 
1043 	freq_qos_remove_request(&cpudata->req[1]);
1044 	freq_qos_remove_request(&cpudata->req[0]);
1045 	policy->fast_switch_possible = false;
1046 	kfree(cpudata);
1047 }
1048 
1049 static int amd_pstate_cpu_resume(struct cpufreq_policy *policy)
1050 {
1051 	int ret;
1052 
1053 	ret = amd_pstate_cppc_enable(true);
1054 	if (ret)
1055 		pr_err("failed to enable amd-pstate during resume, return %d\n", ret);
1056 
1057 	return ret;
1058 }
1059 
1060 static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy)
1061 {
1062 	int ret;
1063 
1064 	ret = amd_pstate_cppc_enable(false);
1065 	if (ret)
1066 		pr_err("failed to disable amd-pstate during suspend, return %d\n", ret);
1067 
1068 	return ret;
1069 }
1070 
1071 /* Sysfs attributes */
1072 
1073 /*
1074  * This frequency is to indicate the maximum hardware frequency.
1075  * If boost is not active but supported, the frequency will be larger than the
1076  * one in cpuinfo.
1077  */
1078 static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy,
1079 					char *buf)
1080 {
1081 	int max_freq;
1082 	struct amd_cpudata *cpudata = policy->driver_data;
1083 
1084 	max_freq = READ_ONCE(cpudata->max_freq);
1085 	if (max_freq < 0)
1086 		return max_freq;
1087 
1088 	return sysfs_emit(buf, "%u\n", max_freq);
1089 }
1090 
1091 static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy,
1092 						     char *buf)
1093 {
1094 	int freq;
1095 	struct amd_cpudata *cpudata = policy->driver_data;
1096 
1097 	freq = READ_ONCE(cpudata->lowest_nonlinear_freq);
1098 	if (freq < 0)
1099 		return freq;
1100 
1101 	return sysfs_emit(buf, "%u\n", freq);
1102 }
1103 
1104 /*
1105  * In some of ASICs, the highest_perf is not the one in the _CPC table, so we
1106  * need to expose it to sysfs.
1107  */
1108 static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
1109 					    char *buf)
1110 {
1111 	u32 perf;
1112 	struct amd_cpudata *cpudata = policy->driver_data;
1113 
1114 	perf = READ_ONCE(cpudata->highest_perf);
1115 
1116 	return sysfs_emit(buf, "%u\n", perf);
1117 }
1118 
1119 static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy,
1120 						char *buf)
1121 {
1122 	u32 perf;
1123 	struct amd_cpudata *cpudata = policy->driver_data;
1124 
1125 	perf = READ_ONCE(cpudata->prefcore_ranking);
1126 
1127 	return sysfs_emit(buf, "%u\n", perf);
1128 }
1129 
1130 static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy,
1131 					   char *buf)
1132 {
1133 	bool hw_prefcore;
1134 	struct amd_cpudata *cpudata = policy->driver_data;
1135 
1136 	hw_prefcore = READ_ONCE(cpudata->hw_prefcore);
1137 
1138 	return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore));
1139 }
1140 
1141 static ssize_t show_energy_performance_available_preferences(
1142 				struct cpufreq_policy *policy, char *buf)
1143 {
1144 	int i = 0;
1145 	int offset = 0;
1146 	struct amd_cpudata *cpudata = policy->driver_data;
1147 
1148 	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
1149 		return sysfs_emit_at(buf, offset, "%s\n",
1150 				energy_perf_strings[EPP_INDEX_PERFORMANCE]);
1151 
1152 	while (energy_perf_strings[i] != NULL)
1153 		offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]);
1154 
1155 	offset += sysfs_emit_at(buf, offset, "\n");
1156 
1157 	return offset;
1158 }
1159 
1160 static ssize_t store_energy_performance_preference(
1161 		struct cpufreq_policy *policy, const char *buf, size_t count)
1162 {
1163 	struct amd_cpudata *cpudata = policy->driver_data;
1164 	char str_preference[21];
1165 	ssize_t ret;
1166 
1167 	ret = sscanf(buf, "%20s", str_preference);
1168 	if (ret != 1)
1169 		return -EINVAL;
1170 
1171 	ret = match_string(energy_perf_strings, -1, str_preference);
1172 	if (ret < 0)
1173 		return -EINVAL;
1174 
1175 	mutex_lock(&amd_pstate_limits_lock);
1176 	ret = amd_pstate_set_energy_pref_index(cpudata, ret);
1177 	mutex_unlock(&amd_pstate_limits_lock);
1178 
1179 	return ret ?: count;
1180 }
1181 
1182 static ssize_t show_energy_performance_preference(
1183 				struct cpufreq_policy *policy, char *buf)
1184 {
1185 	struct amd_cpudata *cpudata = policy->driver_data;
1186 	int preference;
1187 
1188 	preference = amd_pstate_get_energy_pref_index(cpudata);
1189 	if (preference < 0)
1190 		return preference;
1191 
1192 	return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]);
1193 }
1194 
1195 static void amd_pstate_driver_cleanup(void)
1196 {
1197 	amd_pstate_cppc_enable(false);
1198 	cppc_state = AMD_PSTATE_DISABLE;
1199 	current_pstate_driver = NULL;
1200 }
1201 
1202 static int amd_pstate_set_driver(int mode_idx)
1203 {
1204 	if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
1205 		cppc_state = mode_idx;
1206 		if (cppc_state == AMD_PSTATE_DISABLE)
1207 			pr_info("driver is explicitly disabled\n");
1208 
1209 		if (cppc_state == AMD_PSTATE_ACTIVE)
1210 			current_pstate_driver = &amd_pstate_epp_driver;
1211 
1212 		if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
1213 			current_pstate_driver = &amd_pstate_driver;
1214 
1215 		return 0;
1216 	}
1217 
1218 	return -EINVAL;
1219 }
1220 
1221 static int amd_pstate_register_driver(int mode)
1222 {
1223 	int ret;
1224 
1225 	ret = amd_pstate_set_driver(mode);
1226 	if (ret)
1227 		return ret;
1228 
1229 	cppc_state = mode;
1230 
1231 	ret = amd_pstate_cppc_enable(true);
1232 	if (ret) {
1233 		pr_err("failed to enable cppc during amd-pstate driver registration, return %d\n",
1234 		       ret);
1235 		amd_pstate_driver_cleanup();
1236 		return ret;
1237 	}
1238 
1239 	ret = cpufreq_register_driver(current_pstate_driver);
1240 	if (ret) {
1241 		amd_pstate_driver_cleanup();
1242 		return ret;
1243 	}
1244 
1245 	return 0;
1246 }
1247 
1248 static int amd_pstate_unregister_driver(int dummy)
1249 {
1250 	cpufreq_unregister_driver(current_pstate_driver);
1251 	amd_pstate_driver_cleanup();
1252 	return 0;
1253 }
1254 
1255 static int amd_pstate_change_mode_without_dvr_change(int mode)
1256 {
1257 	int cpu = 0;
1258 
1259 	cppc_state = mode;
1260 
1261 	if (cpu_feature_enabled(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE)
1262 		return 0;
1263 
1264 	for_each_present_cpu(cpu) {
1265 		cppc_set_auto_sel(cpu, (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1);
1266 	}
1267 
1268 	return 0;
1269 }
1270 
1271 static int amd_pstate_change_driver_mode(int mode)
1272 {
1273 	int ret;
1274 
1275 	ret = amd_pstate_unregister_driver(0);
1276 	if (ret)
1277 		return ret;
1278 
1279 	ret = amd_pstate_register_driver(mode);
1280 	if (ret)
1281 		return ret;
1282 
1283 	return 0;
1284 }
1285 
1286 static cppc_mode_transition_fn mode_state_machine[AMD_PSTATE_MAX][AMD_PSTATE_MAX] = {
1287 	[AMD_PSTATE_DISABLE]         = {
1288 		[AMD_PSTATE_DISABLE]     = NULL,
1289 		[AMD_PSTATE_PASSIVE]     = amd_pstate_register_driver,
1290 		[AMD_PSTATE_ACTIVE]      = amd_pstate_register_driver,
1291 		[AMD_PSTATE_GUIDED]      = amd_pstate_register_driver,
1292 	},
1293 	[AMD_PSTATE_PASSIVE]         = {
1294 		[AMD_PSTATE_DISABLE]     = amd_pstate_unregister_driver,
1295 		[AMD_PSTATE_PASSIVE]     = NULL,
1296 		[AMD_PSTATE_ACTIVE]      = amd_pstate_change_driver_mode,
1297 		[AMD_PSTATE_GUIDED]      = amd_pstate_change_mode_without_dvr_change,
1298 	},
1299 	[AMD_PSTATE_ACTIVE]          = {
1300 		[AMD_PSTATE_DISABLE]     = amd_pstate_unregister_driver,
1301 		[AMD_PSTATE_PASSIVE]     = amd_pstate_change_driver_mode,
1302 		[AMD_PSTATE_ACTIVE]      = NULL,
1303 		[AMD_PSTATE_GUIDED]      = amd_pstate_change_driver_mode,
1304 	},
1305 	[AMD_PSTATE_GUIDED]          = {
1306 		[AMD_PSTATE_DISABLE]     = amd_pstate_unregister_driver,
1307 		[AMD_PSTATE_PASSIVE]     = amd_pstate_change_mode_without_dvr_change,
1308 		[AMD_PSTATE_ACTIVE]      = amd_pstate_change_driver_mode,
1309 		[AMD_PSTATE_GUIDED]      = NULL,
1310 	},
1311 };
1312 
1313 static ssize_t amd_pstate_show_status(char *buf)
1314 {
1315 	if (!current_pstate_driver)
1316 		return sysfs_emit(buf, "disable\n");
1317 
1318 	return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]);
1319 }
1320 
1321 int amd_pstate_update_status(const char *buf, size_t size)
1322 {
1323 	int mode_idx;
1324 
1325 	if (size > strlen("passive") || size < strlen("active"))
1326 		return -EINVAL;
1327 
1328 	mode_idx = get_mode_idx_from_str(buf, size);
1329 
1330 	if (mode_idx < 0 || mode_idx >= AMD_PSTATE_MAX)
1331 		return -EINVAL;
1332 
1333 	if (mode_state_machine[cppc_state][mode_idx])
1334 		return mode_state_machine[cppc_state][mode_idx](mode_idx);
1335 
1336 	return 0;
1337 }
1338 EXPORT_SYMBOL_GPL(amd_pstate_update_status);
1339 
1340 static ssize_t status_show(struct device *dev,
1341 			   struct device_attribute *attr, char *buf)
1342 {
1343 	ssize_t ret;
1344 
1345 	mutex_lock(&amd_pstate_driver_lock);
1346 	ret = amd_pstate_show_status(buf);
1347 	mutex_unlock(&amd_pstate_driver_lock);
1348 
1349 	return ret;
1350 }
1351 
1352 static ssize_t status_store(struct device *a, struct device_attribute *b,
1353 			    const char *buf, size_t count)
1354 {
1355 	char *p = memchr(buf, '\n', count);
1356 	int ret;
1357 
1358 	mutex_lock(&amd_pstate_driver_lock);
1359 	ret = amd_pstate_update_status(buf, p ? p - buf : count);
1360 	mutex_unlock(&amd_pstate_driver_lock);
1361 
1362 	return ret < 0 ? ret : count;
1363 }
1364 
1365 static ssize_t prefcore_show(struct device *dev,
1366 			     struct device_attribute *attr, char *buf)
1367 {
1368 	return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore));
1369 }
1370 
1371 cpufreq_freq_attr_ro(amd_pstate_max_freq);
1372 cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
1373 
1374 cpufreq_freq_attr_ro(amd_pstate_highest_perf);
1375 cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking);
1376 cpufreq_freq_attr_ro(amd_pstate_hw_prefcore);
1377 cpufreq_freq_attr_rw(energy_performance_preference);
1378 cpufreq_freq_attr_ro(energy_performance_available_preferences);
1379 static DEVICE_ATTR_RW(status);
1380 static DEVICE_ATTR_RO(prefcore);
1381 
1382 static struct freq_attr *amd_pstate_attr[] = {
1383 	&amd_pstate_max_freq,
1384 	&amd_pstate_lowest_nonlinear_freq,
1385 	&amd_pstate_highest_perf,
1386 	&amd_pstate_prefcore_ranking,
1387 	&amd_pstate_hw_prefcore,
1388 	NULL,
1389 };
1390 
1391 static struct freq_attr *amd_pstate_epp_attr[] = {
1392 	&amd_pstate_max_freq,
1393 	&amd_pstate_lowest_nonlinear_freq,
1394 	&amd_pstate_highest_perf,
1395 	&amd_pstate_prefcore_ranking,
1396 	&amd_pstate_hw_prefcore,
1397 	&energy_performance_preference,
1398 	&energy_performance_available_preferences,
1399 	NULL,
1400 };
1401 
1402 static struct attribute *pstate_global_attributes[] = {
1403 	&dev_attr_status.attr,
1404 	&dev_attr_prefcore.attr,
1405 	NULL
1406 };
1407 
1408 static const struct attribute_group amd_pstate_global_attr_group = {
1409 	.name = "amd_pstate",
1410 	.attrs = pstate_global_attributes,
1411 };
1412 
1413 static bool amd_pstate_acpi_pm_profile_server(void)
1414 {
1415 	switch (acpi_gbl_FADT.preferred_profile) {
1416 	case PM_ENTERPRISE_SERVER:
1417 	case PM_SOHO_SERVER:
1418 	case PM_PERFORMANCE_SERVER:
1419 		return true;
1420 	}
1421 	return false;
1422 }
1423 
1424 static bool amd_pstate_acpi_pm_profile_undefined(void)
1425 {
1426 	if (acpi_gbl_FADT.preferred_profile == PM_UNSPECIFIED)
1427 		return true;
1428 	if (acpi_gbl_FADT.preferred_profile >= NR_PM_PROFILES)
1429 		return true;
1430 	return false;
1431 }
1432 
1433 static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
1434 {
1435 	int min_freq, max_freq, ret;
1436 	struct amd_cpudata *cpudata;
1437 	struct device *dev;
1438 	u64 value;
1439 
1440 	/*
1441 	 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
1442 	 * which is ideal for initialization process.
1443 	 */
1444 	amd_perf_ctl_reset(policy->cpu);
1445 	dev = get_cpu_device(policy->cpu);
1446 	if (!dev)
1447 		return -ENODEV;
1448 
1449 	cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
1450 	if (!cpudata)
1451 		return -ENOMEM;
1452 
1453 	cpudata->cpu = policy->cpu;
1454 	cpudata->epp_policy = 0;
1455 
1456 	ret = amd_pstate_init_perf(cpudata);
1457 	if (ret)
1458 		goto free_cpudata1;
1459 
1460 	amd_pstate_init_prefcore(cpudata);
1461 
1462 	ret = amd_pstate_init_freq(cpudata);
1463 	if (ret)
1464 		goto free_cpudata1;
1465 
1466 	ret = amd_pstate_init_boost_support(cpudata);
1467 	if (ret)
1468 		goto free_cpudata1;
1469 
1470 	min_freq = READ_ONCE(cpudata->min_freq);
1471 	max_freq = READ_ONCE(cpudata->max_freq);
1472 
1473 	policy->cpuinfo.min_freq = min_freq;
1474 	policy->cpuinfo.max_freq = max_freq;
1475 	/* It will be updated by governor */
1476 	policy->cur = policy->cpuinfo.min_freq;
1477 
1478 	policy->driver_data = cpudata;
1479 
1480 	cpudata->epp_cached = cpudata->epp_default = amd_pstate_get_epp(cpudata, 0);
1481 
1482 	policy->min = policy->cpuinfo.min_freq;
1483 	policy->max = policy->cpuinfo.max_freq;
1484 
1485 	policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
1486 
1487 	/*
1488 	 * Set the policy to provide a valid fallback value in case
1489 	 * the default cpufreq governor is neither powersave nor performance.
1490 	 */
1491 	if (amd_pstate_acpi_pm_profile_server() ||
1492 	    amd_pstate_acpi_pm_profile_undefined())
1493 		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
1494 	else
1495 		policy->policy = CPUFREQ_POLICY_POWERSAVE;
1496 
1497 	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
1498 		ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
1499 		if (ret)
1500 			return ret;
1501 		WRITE_ONCE(cpudata->cppc_req_cached, value);
1502 
1503 		ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value);
1504 		if (ret)
1505 			return ret;
1506 		WRITE_ONCE(cpudata->cppc_cap1_cached, value);
1507 	}
1508 
1509 	current_pstate_driver->adjust_perf = NULL;
1510 
1511 	return 0;
1512 
1513 free_cpudata1:
1514 	kfree(cpudata);
1515 	return ret;
1516 }
1517 
1518 static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
1519 {
1520 	struct amd_cpudata *cpudata = policy->driver_data;
1521 
1522 	if (cpudata) {
1523 		kfree(cpudata);
1524 		policy->driver_data = NULL;
1525 	}
1526 
1527 	pr_debug("CPU %d exiting\n", policy->cpu);
1528 }
1529 
1530 static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
1531 {
1532 	struct amd_cpudata *cpudata = policy->driver_data;
1533 	u32 max_perf, min_perf;
1534 	u64 value;
1535 	s16 epp;
1536 
1537 	max_perf = READ_ONCE(cpudata->highest_perf);
1538 	min_perf = READ_ONCE(cpudata->lowest_perf);
1539 	amd_pstate_update_min_max_limit(policy);
1540 
1541 	max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
1542 			cpudata->max_limit_perf);
1543 	min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
1544 			cpudata->max_limit_perf);
1545 	value = READ_ONCE(cpudata->cppc_req_cached);
1546 
1547 	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
1548 		min_perf = min(cpudata->nominal_perf, max_perf);
1549 
1550 	/* Initial min/max values for CPPC Performance Controls Register */
1551 	value &= ~AMD_CPPC_MIN_PERF(~0L);
1552 	value |= AMD_CPPC_MIN_PERF(min_perf);
1553 
1554 	value &= ~AMD_CPPC_MAX_PERF(~0L);
1555 	value |= AMD_CPPC_MAX_PERF(max_perf);
1556 
1557 	/* CPPC EPP feature require to set zero to the desire perf bit */
1558 	value &= ~AMD_CPPC_DES_PERF(~0L);
1559 	value |= AMD_CPPC_DES_PERF(0);
1560 
1561 	cpudata->epp_policy = cpudata->policy;
1562 
1563 	/* Get BIOS pre-defined epp value */
1564 	epp = amd_pstate_get_epp(cpudata, value);
1565 	if (epp < 0) {
1566 		/**
1567 		 * This return value can only be negative for shared_memory
1568 		 * systems where EPP register read/write not supported.
1569 		 */
1570 		return epp;
1571 	}
1572 
1573 	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
1574 		epp = 0;
1575 
1576 	WRITE_ONCE(cpudata->cppc_req_cached, value);
1577 	return amd_pstate_set_epp(cpudata, epp);
1578 }
1579 
1580 static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
1581 {
1582 	struct amd_cpudata *cpudata = policy->driver_data;
1583 	int ret;
1584 
1585 	if (!policy->cpuinfo.max_freq)
1586 		return -ENODEV;
1587 
1588 	pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
1589 				policy->cpuinfo.max_freq, policy->max);
1590 
1591 	cpudata->policy = policy->policy;
1592 
1593 	ret = amd_pstate_epp_update_limit(policy);
1594 	if (ret)
1595 		return ret;
1596 
1597 	/*
1598 	 * policy->cur is never updated with the amd_pstate_epp driver, but it
1599 	 * is used as a stale frequency value. So, keep it within limits.
1600 	 */
1601 	policy->cur = policy->min;
1602 
1603 	return 0;
1604 }
1605 
1606 static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata)
1607 {
1608 	struct cppc_perf_ctrls perf_ctrls;
1609 	u64 value, max_perf;
1610 	int ret;
1611 
1612 	ret = amd_pstate_cppc_enable(true);
1613 	if (ret)
1614 		pr_err("failed to enable amd pstate during resume, return %d\n", ret);
1615 
1616 	value = READ_ONCE(cpudata->cppc_req_cached);
1617 	max_perf = READ_ONCE(cpudata->highest_perf);
1618 
1619 	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
1620 		wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
1621 	} else {
1622 		perf_ctrls.max_perf = max_perf;
1623 		cppc_set_perf(cpudata->cpu, &perf_ctrls);
1624 		perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached);
1625 		cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
1626 	}
1627 }
1628 
1629 static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy)
1630 {
1631 	struct amd_cpudata *cpudata = policy->driver_data;
1632 
1633 	pr_debug("AMD CPU Core %d going online\n", cpudata->cpu);
1634 
1635 	if (cppc_state == AMD_PSTATE_ACTIVE) {
1636 		amd_pstate_epp_reenable(cpudata);
1637 		cpudata->suspended = false;
1638 	}
1639 
1640 	return 0;
1641 }
1642 
1643 static void amd_pstate_epp_offline(struct cpufreq_policy *policy)
1644 {
1645 	struct amd_cpudata *cpudata = policy->driver_data;
1646 	struct cppc_perf_ctrls perf_ctrls;
1647 	int min_perf;
1648 	u64 value;
1649 
1650 	min_perf = READ_ONCE(cpudata->lowest_perf);
1651 	value = READ_ONCE(cpudata->cppc_req_cached);
1652 
1653 	mutex_lock(&amd_pstate_limits_lock);
1654 	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
1655 		cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN;
1656 
1657 		/* Set max perf same as min perf */
1658 		value &= ~AMD_CPPC_MAX_PERF(~0L);
1659 		value |= AMD_CPPC_MAX_PERF(min_perf);
1660 		value &= ~AMD_CPPC_MIN_PERF(~0L);
1661 		value |= AMD_CPPC_MIN_PERF(min_perf);
1662 		wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
1663 	} else {
1664 		perf_ctrls.desired_perf = 0;
1665 		perf_ctrls.min_perf = min_perf;
1666 		perf_ctrls.max_perf = min_perf;
1667 		cppc_set_perf(cpudata->cpu, &perf_ctrls);
1668 		perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE);
1669 		cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
1670 	}
1671 	mutex_unlock(&amd_pstate_limits_lock);
1672 }
1673 
1674 static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
1675 {
1676 	struct amd_cpudata *cpudata = policy->driver_data;
1677 
1678 	pr_debug("AMD CPU Core %d going offline\n", cpudata->cpu);
1679 
1680 	if (cpudata->suspended)
1681 		return 0;
1682 
1683 	if (cppc_state == AMD_PSTATE_ACTIVE)
1684 		amd_pstate_epp_offline(policy);
1685 
1686 	return 0;
1687 }
1688 
1689 static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)
1690 {
1691 	struct amd_cpudata *cpudata = policy->driver_data;
1692 	int ret;
1693 
1694 	/* avoid suspending when EPP is not enabled */
1695 	if (cppc_state != AMD_PSTATE_ACTIVE)
1696 		return 0;
1697 
1698 	/* set this flag to avoid setting core offline*/
1699 	cpudata->suspended = true;
1700 
1701 	/* disable CPPC in lowlevel firmware */
1702 	ret = amd_pstate_cppc_enable(false);
1703 	if (ret)
1704 		pr_err("failed to suspend, return %d\n", ret);
1705 
1706 	return 0;
1707 }
1708 
1709 static int amd_pstate_epp_resume(struct cpufreq_policy *policy)
1710 {
1711 	struct amd_cpudata *cpudata = policy->driver_data;
1712 
1713 	if (cpudata->suspended) {
1714 		mutex_lock(&amd_pstate_limits_lock);
1715 
1716 		/* enable amd pstate from suspend state*/
1717 		amd_pstate_epp_reenable(cpudata);
1718 
1719 		mutex_unlock(&amd_pstate_limits_lock);
1720 
1721 		cpudata->suspended = false;
1722 	}
1723 
1724 	return 0;
1725 }
1726 
1727 static struct cpufreq_driver amd_pstate_driver = {
1728 	.flags		= CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
1729 	.verify		= amd_pstate_verify,
1730 	.target		= amd_pstate_target,
1731 	.fast_switch    = amd_pstate_fast_switch,
1732 	.init		= amd_pstate_cpu_init,
1733 	.exit		= amd_pstate_cpu_exit,
1734 	.suspend	= amd_pstate_cpu_suspend,
1735 	.resume		= amd_pstate_cpu_resume,
1736 	.set_boost	= amd_pstate_set_boost,
1737 	.update_limits	= amd_pstate_update_limits,
1738 	.name		= "amd-pstate",
1739 	.attr		= amd_pstate_attr,
1740 };
1741 
1742 static struct cpufreq_driver amd_pstate_epp_driver = {
1743 	.flags		= CPUFREQ_CONST_LOOPS,
1744 	.verify		= amd_pstate_verify,
1745 	.setpolicy	= amd_pstate_epp_set_policy,
1746 	.init		= amd_pstate_epp_cpu_init,
1747 	.exit		= amd_pstate_epp_cpu_exit,
1748 	.offline	= amd_pstate_epp_cpu_offline,
1749 	.online		= amd_pstate_epp_cpu_online,
1750 	.suspend	= amd_pstate_epp_suspend,
1751 	.resume		= amd_pstate_epp_resume,
1752 	.update_limits	= amd_pstate_update_limits,
1753 	.set_boost	= amd_pstate_set_boost,
1754 	.name		= "amd-pstate-epp",
1755 	.attr		= amd_pstate_epp_attr,
1756 };
1757 
1758 /*
1759  * CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F.
1760  * show the debug message that helps to check if the CPU has CPPC support for loading issue.
1761  */
1762 static bool amd_cppc_supported(void)
1763 {
1764 	struct cpuinfo_x86 *c = &cpu_data(0);
1765 	bool warn = false;
1766 
1767 	if ((boot_cpu_data.x86 == 0x17) && (boot_cpu_data.x86_model < 0x30)) {
1768 		pr_debug_once("CPPC feature is not supported by the processor\n");
1769 		return false;
1770 	}
1771 
1772 	/*
1773 	 * If the CPPC feature is disabled in the BIOS for processors
1774 	 * that support MSR-based CPPC, the AMD Pstate driver may not
1775 	 * function correctly.
1776 	 *
1777 	 * For such processors, check the CPPC flag and display a
1778 	 * warning message if the platform supports CPPC.
1779 	 *
1780 	 * Note: The code check below will not abort the driver
1781 	 * registration process because of the code is added for
1782 	 * debugging purposes. Besides, it may still be possible for
1783 	 * the driver to work using the shared-memory mechanism.
1784 	 */
1785 	if (!cpu_feature_enabled(X86_FEATURE_CPPC)) {
1786 		if (cpu_feature_enabled(X86_FEATURE_ZEN2)) {
1787 			switch (c->x86_model) {
1788 			case 0x60 ... 0x6F:
1789 			case 0x80 ... 0xAF:
1790 				warn = true;
1791 				break;
1792 			}
1793 		} else if (cpu_feature_enabled(X86_FEATURE_ZEN3) ||
1794 			   cpu_feature_enabled(X86_FEATURE_ZEN4)) {
1795 			switch (c->x86_model) {
1796 			case 0x10 ... 0x1F:
1797 			case 0x40 ... 0xAF:
1798 				warn = true;
1799 				break;
1800 			}
1801 		} else if (cpu_feature_enabled(X86_FEATURE_ZEN5)) {
1802 			warn = true;
1803 		}
1804 	}
1805 
1806 	if (warn)
1807 		pr_warn_once("The CPPC feature is supported but currently disabled by the BIOS.\n"
1808 					"Please enable it if your BIOS has the CPPC option.\n");
1809 	return true;
1810 }
1811 
1812 static int __init amd_pstate_init(void)
1813 {
1814 	struct device *dev_root;
1815 	int ret;
1816 
1817 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
1818 		return -ENODEV;
1819 
1820 	/* show debug message only if CPPC is not supported */
1821 	if (!amd_cppc_supported())
1822 		return -EOPNOTSUPP;
1823 
1824 	/* show warning message when BIOS broken or ACPI disabled */
1825 	if (!acpi_cpc_valid()) {
1826 		pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n");
1827 		return -ENODEV;
1828 	}
1829 
1830 	/* don't keep reloading if cpufreq_driver exists */
1831 	if (cpufreq_get_current_driver())
1832 		return -EEXIST;
1833 
1834 	quirks = NULL;
1835 
1836 	/* check if this machine need CPPC quirks */
1837 	dmi_check_system(amd_pstate_quirks_table);
1838 
1839 	/*
1840 	* determine the driver mode from the command line or kernel config.
1841 	* If no command line input is provided, cppc_state will be AMD_PSTATE_UNDEFINED.
1842 	* command line options will override the kernel config settings.
1843 	*/
1844 
1845 	if (cppc_state == AMD_PSTATE_UNDEFINED) {
1846 		/* Disable on the following configs by default:
1847 		 * 1. Undefined platforms
1848 		 * 2. Server platforms with CPUs older than Family 0x1A.
1849 		 */
1850 		if (amd_pstate_acpi_pm_profile_undefined() ||
1851 		    (amd_pstate_acpi_pm_profile_server() && boot_cpu_data.x86 < 0x1A)) {
1852 			pr_info("driver load is disabled, boot with specific mode to enable this\n");
1853 			return -ENODEV;
1854 		}
1855 		/* get driver mode from kernel config option [1:4] */
1856 		cppc_state = CONFIG_X86_AMD_PSTATE_DEFAULT_MODE;
1857 	}
1858 
1859 	if (cppc_state == AMD_PSTATE_DISABLE) {
1860 		pr_info("driver load is disabled, boot with specific mode to enable this\n");
1861 		return -ENODEV;
1862 	}
1863 
1864 	/* capability check */
1865 	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
1866 		pr_debug("AMD CPPC MSR based functionality is supported\n");
1867 	} else {
1868 		pr_debug("AMD CPPC shared memory based functionality is supported\n");
1869 		static_call_update(amd_pstate_cppc_enable, shmem_cppc_enable);
1870 		static_call_update(amd_pstate_init_perf, shmem_init_perf);
1871 		static_call_update(amd_pstate_update_perf, shmem_update_perf);
1872 	}
1873 
1874 	if (amd_pstate_prefcore) {
1875 		ret = amd_detect_prefcore(&amd_pstate_prefcore);
1876 		if (ret)
1877 			return ret;
1878 	}
1879 
1880 	ret = amd_pstate_register_driver(cppc_state);
1881 	if (ret) {
1882 		pr_err("failed to register with return %d\n", ret);
1883 		return ret;
1884 	}
1885 
1886 	dev_root = bus_get_dev_root(&cpu_subsys);
1887 	if (dev_root) {
1888 		ret = sysfs_create_group(&dev_root->kobj, &amd_pstate_global_attr_group);
1889 		put_device(dev_root);
1890 		if (ret) {
1891 			pr_err("sysfs attribute export failed with error %d.\n", ret);
1892 			goto global_attr_free;
1893 		}
1894 	}
1895 
1896 	return ret;
1897 
1898 global_attr_free:
1899 	cpufreq_unregister_driver(current_pstate_driver);
1900 	amd_pstate_cppc_enable(false);
1901 	return ret;
1902 }
1903 device_initcall(amd_pstate_init);
1904 
1905 static int __init amd_pstate_param(char *str)
1906 {
1907 	size_t size;
1908 	int mode_idx;
1909 
1910 	if (!str)
1911 		return -EINVAL;
1912 
1913 	size = strlen(str);
1914 	mode_idx = get_mode_idx_from_str(str, size);
1915 
1916 	return amd_pstate_set_driver(mode_idx);
1917 }
1918 
1919 static int __init amd_prefcore_param(char *str)
1920 {
1921 	if (!strcmp(str, "disable"))
1922 		amd_pstate_prefcore = false;
1923 
1924 	return 0;
1925 }
1926 
1927 early_param("amd_pstate", amd_pstate_param);
1928 early_param("amd_prefcore", amd_prefcore_param);
1929 
1930 MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
1931 MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");
1932