1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * amd-pstate.c - AMD Processor P-state Frequency Driver 4 * 5 * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved. 6 * 7 * Author: Huang Rui <ray.huang@amd.com> 8 * 9 * AMD P-State introduces a new CPU performance scaling design for AMD 10 * processors using the ACPI Collaborative Performance and Power Control (CPPC) 11 * feature which works with the AMD SMU firmware providing a finer grained 12 * frequency control range. It is to replace the legacy ACPI P-States control, 13 * allows a flexible, low-latency interface for the Linux kernel to directly 14 * communicate the performance hints to hardware. 15 * 16 * AMD P-State is supported on recent AMD Zen base CPU series include some of 17 * Zen2 and Zen3 processors. _CPC needs to be present in the ACPI tables of AMD 18 * P-State supported system. And there are two types of hardware implementations 19 * for AMD P-State: 1) Full MSR Solution and 2) Shared Memory Solution. 20 * X86_FEATURE_CPPC CPU feature flag is used to distinguish the different types. 21 */ 22 23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 24 25 #include <linux/kernel.h> 26 #include <linux/module.h> 27 #include <linux/init.h> 28 #include <linux/smp.h> 29 #include <linux/sched.h> 30 #include <linux/cpufreq.h> 31 #include <linux/compiler.h> 32 #include <linux/dmi.h> 33 #include <linux/slab.h> 34 #include <linux/acpi.h> 35 #include <linux/io.h> 36 #include <linux/delay.h> 37 #include <linux/uaccess.h> 38 #include <linux/static_call.h> 39 #include <linux/topology.h> 40 41 #include <acpi/processor.h> 42 #include <acpi/cppc_acpi.h> 43 44 #include <asm/msr.h> 45 #include <asm/processor.h> 46 #include <asm/cpufeature.h> 47 #include <asm/cpu_device_id.h> 48 49 #include "amd-pstate.h" 50 #include "amd-pstate-trace.h" 51 52 #define AMD_PSTATE_TRANSITION_LATENCY 20000 53 #define AMD_PSTATE_TRANSITION_DELAY 1000 54 #define CPPC_HIGHEST_PERF_PERFORMANCE 196 55 #define CPPC_HIGHEST_PERF_DEFAULT 166 56 57 #define AMD_CPPC_EPP_PERFORMANCE 0x00 58 #define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80 59 #define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF 60 #define AMD_CPPC_EPP_POWERSAVE 0xFF 61 62 /* 63 * enum amd_pstate_mode - driver working mode of amd pstate 64 */ 65 enum amd_pstate_mode { 66 AMD_PSTATE_UNDEFINED = 0, 67 AMD_PSTATE_DISABLE, 68 AMD_PSTATE_PASSIVE, 69 AMD_PSTATE_ACTIVE, 70 AMD_PSTATE_GUIDED, 71 AMD_PSTATE_MAX, 72 }; 73 74 static const char * const amd_pstate_mode_string[] = { 75 [AMD_PSTATE_UNDEFINED] = "undefined", 76 [AMD_PSTATE_DISABLE] = "disable", 77 [AMD_PSTATE_PASSIVE] = "passive", 78 [AMD_PSTATE_ACTIVE] = "active", 79 [AMD_PSTATE_GUIDED] = "guided", 80 NULL, 81 }; 82 83 struct quirk_entry { 84 u32 nominal_freq; 85 u32 lowest_freq; 86 }; 87 88 /* 89 * TODO: We need more time to fine tune processors with shared memory solution 90 * with community together. 91 * 92 * There are some performance drops on the CPU benchmarks which reports from 93 * Suse. We are co-working with them to fine tune the shared memory solution. So 94 * we disable it by default to go acpi-cpufreq on these processors and add a 95 * module parameter to be able to enable it manually for debugging. 96 */ 97 static struct cpufreq_driver *current_pstate_driver; 98 static struct cpufreq_driver amd_pstate_driver; 99 static struct cpufreq_driver amd_pstate_epp_driver; 100 static int cppc_state = AMD_PSTATE_UNDEFINED; 101 static bool cppc_enabled; 102 static bool amd_pstate_prefcore = true; 103 static struct quirk_entry *quirks; 104 105 /* 106 * AMD Energy Preference Performance (EPP) 107 * The EPP is used in the CCLK DPM controller to drive 108 * the frequency that a core is going to operate during 109 * short periods of activity. EPP values will be utilized for 110 * different OS profiles (balanced, performance, power savings) 111 * display strings corresponding to EPP index in the 112 * energy_perf_strings[] 113 * index String 114 *------------------------------------- 115 * 0 default 116 * 1 performance 117 * 2 balance_performance 118 * 3 balance_power 119 * 4 power 120 */ 121 enum energy_perf_value_index { 122 EPP_INDEX_DEFAULT = 0, 123 EPP_INDEX_PERFORMANCE, 124 EPP_INDEX_BALANCE_PERFORMANCE, 125 EPP_INDEX_BALANCE_POWERSAVE, 126 EPP_INDEX_POWERSAVE, 127 }; 128 129 static const char * const energy_perf_strings[] = { 130 [EPP_INDEX_DEFAULT] = "default", 131 [EPP_INDEX_PERFORMANCE] = "performance", 132 [EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance", 133 [EPP_INDEX_BALANCE_POWERSAVE] = "balance_power", 134 [EPP_INDEX_POWERSAVE] = "power", 135 NULL 136 }; 137 138 static unsigned int epp_values[] = { 139 [EPP_INDEX_DEFAULT] = 0, 140 [EPP_INDEX_PERFORMANCE] = AMD_CPPC_EPP_PERFORMANCE, 141 [EPP_INDEX_BALANCE_PERFORMANCE] = AMD_CPPC_EPP_BALANCE_PERFORMANCE, 142 [EPP_INDEX_BALANCE_POWERSAVE] = AMD_CPPC_EPP_BALANCE_POWERSAVE, 143 [EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE, 144 }; 145 146 typedef int (*cppc_mode_transition_fn)(int); 147 148 static struct quirk_entry quirk_amd_7k62 = { 149 .nominal_freq = 2600, 150 .lowest_freq = 550, 151 }; 152 153 static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi) 154 { 155 /** 156 * match the broken bios for family 17h processor support CPPC V2 157 * broken BIOS lack of nominal_freq and lowest_freq capabilities 158 * definition in ACPI tables 159 */ 160 if (boot_cpu_has(X86_FEATURE_ZEN2)) { 161 quirks = dmi->driver_data; 162 pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident); 163 return 1; 164 } 165 166 return 0; 167 } 168 169 static const struct dmi_system_id amd_pstate_quirks_table[] __initconst = { 170 { 171 .callback = dmi_matched_7k62_bios_bug, 172 .ident = "AMD EPYC 7K62", 173 .matches = { 174 DMI_MATCH(DMI_BIOS_VERSION, "5.14"), 175 DMI_MATCH(DMI_BIOS_RELEASE, "12/12/2019"), 176 }, 177 .driver_data = &quirk_amd_7k62, 178 }, 179 {} 180 }; 181 MODULE_DEVICE_TABLE(dmi, amd_pstate_quirks_table); 182 183 static inline int get_mode_idx_from_str(const char *str, size_t size) 184 { 185 int i; 186 187 for (i=0; i < AMD_PSTATE_MAX; i++) { 188 if (!strncmp(str, amd_pstate_mode_string[i], size)) 189 return i; 190 } 191 return -EINVAL; 192 } 193 194 static DEFINE_MUTEX(amd_pstate_limits_lock); 195 static DEFINE_MUTEX(amd_pstate_driver_lock); 196 197 static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) 198 { 199 u64 epp; 200 int ret; 201 202 if (boot_cpu_has(X86_FEATURE_CPPC)) { 203 if (!cppc_req_cached) { 204 epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, 205 &cppc_req_cached); 206 if (epp) 207 return epp; 208 } 209 epp = (cppc_req_cached >> 24) & 0xFF; 210 } else { 211 ret = cppc_get_epp_perf(cpudata->cpu, &epp); 212 if (ret < 0) { 213 pr_debug("Could not retrieve energy perf value (%d)\n", ret); 214 return -EIO; 215 } 216 } 217 218 return (s16)(epp & 0xff); 219 } 220 221 static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata) 222 { 223 s16 epp; 224 int index = -EINVAL; 225 226 epp = amd_pstate_get_epp(cpudata, 0); 227 if (epp < 0) 228 return epp; 229 230 switch (epp) { 231 case AMD_CPPC_EPP_PERFORMANCE: 232 index = EPP_INDEX_PERFORMANCE; 233 break; 234 case AMD_CPPC_EPP_BALANCE_PERFORMANCE: 235 index = EPP_INDEX_BALANCE_PERFORMANCE; 236 break; 237 case AMD_CPPC_EPP_BALANCE_POWERSAVE: 238 index = EPP_INDEX_BALANCE_POWERSAVE; 239 break; 240 case AMD_CPPC_EPP_POWERSAVE: 241 index = EPP_INDEX_POWERSAVE; 242 break; 243 default: 244 break; 245 } 246 247 return index; 248 } 249 250 static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) 251 { 252 int ret; 253 struct cppc_perf_ctrls perf_ctrls; 254 255 if (boot_cpu_has(X86_FEATURE_CPPC)) { 256 u64 value = READ_ONCE(cpudata->cppc_req_cached); 257 258 value &= ~GENMASK_ULL(31, 24); 259 value |= (u64)epp << 24; 260 WRITE_ONCE(cpudata->cppc_req_cached, value); 261 262 ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); 263 if (!ret) 264 cpudata->epp_cached = epp; 265 } else { 266 perf_ctrls.energy_perf = epp; 267 ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); 268 if (ret) { 269 pr_debug("failed to set energy perf value (%d)\n", ret); 270 return ret; 271 } 272 cpudata->epp_cached = epp; 273 } 274 275 return ret; 276 } 277 278 static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, 279 int pref_index) 280 { 281 int epp = -EINVAL; 282 int ret; 283 284 if (!pref_index) { 285 pr_debug("EPP pref_index is invalid\n"); 286 return -EINVAL; 287 } 288 289 if (epp == -EINVAL) 290 epp = epp_values[pref_index]; 291 292 if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { 293 pr_debug("EPP cannot be set under performance policy\n"); 294 return -EBUSY; 295 } 296 297 ret = amd_pstate_set_epp(cpudata, epp); 298 299 return ret; 300 } 301 302 static inline int pstate_enable(bool enable) 303 { 304 int ret, cpu; 305 unsigned long logical_proc_id_mask = 0; 306 307 if (enable == cppc_enabled) 308 return 0; 309 310 for_each_present_cpu(cpu) { 311 unsigned long logical_id = topology_logical_die_id(cpu); 312 313 if (test_bit(logical_id, &logical_proc_id_mask)) 314 continue; 315 316 set_bit(logical_id, &logical_proc_id_mask); 317 318 ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE, 319 enable); 320 if (ret) 321 return ret; 322 } 323 324 cppc_enabled = enable; 325 return 0; 326 } 327 328 static int cppc_enable(bool enable) 329 { 330 int cpu, ret = 0; 331 struct cppc_perf_ctrls perf_ctrls; 332 333 if (enable == cppc_enabled) 334 return 0; 335 336 for_each_present_cpu(cpu) { 337 ret = cppc_set_enable(cpu, enable); 338 if (ret) 339 return ret; 340 341 /* Enable autonomous mode for EPP */ 342 if (cppc_state == AMD_PSTATE_ACTIVE) { 343 /* Set desired perf as zero to allow EPP firmware control */ 344 perf_ctrls.desired_perf = 0; 345 ret = cppc_set_perf(cpu, &perf_ctrls); 346 if (ret) 347 return ret; 348 } 349 } 350 351 cppc_enabled = enable; 352 return ret; 353 } 354 355 DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable); 356 357 static inline int amd_pstate_enable(bool enable) 358 { 359 return static_call(amd_pstate_enable)(enable); 360 } 361 362 static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata) 363 { 364 struct cpuinfo_x86 *c = &cpu_data(0); 365 366 /* 367 * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f, 368 * the highest performance level is set to 196. 369 * https://bugzilla.kernel.org/show_bug.cgi?id=218759 370 */ 371 if (c->x86 == 0x19 && (c->x86_model >= 0x70 && c->x86_model <= 0x7f)) 372 return CPPC_HIGHEST_PERF_PERFORMANCE; 373 374 return CPPC_HIGHEST_PERF_DEFAULT; 375 } 376 377 static int pstate_init_perf(struct amd_cpudata *cpudata) 378 { 379 u64 cap1; 380 u32 highest_perf; 381 382 int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, 383 &cap1); 384 if (ret) 385 return ret; 386 387 /* For platforms that do not support the preferred core feature, the 388 * highest_pef may be configured with 166 or 255, to avoid max frequency 389 * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as 390 * the default max perf. 391 */ 392 if (cpudata->hw_prefcore) 393 highest_perf = amd_pstate_highest_perf_set(cpudata); 394 else 395 highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); 396 397 WRITE_ONCE(cpudata->highest_perf, highest_perf); 398 WRITE_ONCE(cpudata->max_limit_perf, highest_perf); 399 WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); 400 WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); 401 WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); 402 WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1)); 403 WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1)); 404 return 0; 405 } 406 407 static int cppc_init_perf(struct amd_cpudata *cpudata) 408 { 409 struct cppc_perf_caps cppc_perf; 410 u32 highest_perf; 411 412 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 413 if (ret) 414 return ret; 415 416 if (cpudata->hw_prefcore) 417 highest_perf = amd_pstate_highest_perf_set(cpudata); 418 else 419 highest_perf = cppc_perf.highest_perf; 420 421 WRITE_ONCE(cpudata->highest_perf, highest_perf); 422 WRITE_ONCE(cpudata->max_limit_perf, highest_perf); 423 WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); 424 WRITE_ONCE(cpudata->lowest_nonlinear_perf, 425 cppc_perf.lowest_nonlinear_perf); 426 WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); 427 WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf); 428 WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf); 429 430 if (cppc_state == AMD_PSTATE_ACTIVE) 431 return 0; 432 433 ret = cppc_get_auto_sel_caps(cpudata->cpu, &cppc_perf); 434 if (ret) { 435 pr_warn("failed to get auto_sel, ret: %d\n", ret); 436 return 0; 437 } 438 439 ret = cppc_set_auto_sel(cpudata->cpu, 440 (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1); 441 442 if (ret) 443 pr_warn("failed to set auto_sel, ret: %d\n", ret); 444 445 return ret; 446 } 447 448 DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf); 449 450 static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) 451 { 452 return static_call(amd_pstate_init_perf)(cpudata); 453 } 454 455 static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, 456 u32 des_perf, u32 max_perf, bool fast_switch) 457 { 458 if (fast_switch) 459 wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); 460 else 461 wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, 462 READ_ONCE(cpudata->cppc_req_cached)); 463 } 464 465 static void cppc_update_perf(struct amd_cpudata *cpudata, 466 u32 min_perf, u32 des_perf, 467 u32 max_perf, bool fast_switch) 468 { 469 struct cppc_perf_ctrls perf_ctrls; 470 471 perf_ctrls.max_perf = max_perf; 472 perf_ctrls.min_perf = min_perf; 473 perf_ctrls.desired_perf = des_perf; 474 475 cppc_set_perf(cpudata->cpu, &perf_ctrls); 476 } 477 478 DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf); 479 480 static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, 481 u32 min_perf, u32 des_perf, 482 u32 max_perf, bool fast_switch) 483 { 484 static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, 485 max_perf, fast_switch); 486 } 487 488 static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) 489 { 490 u64 aperf, mperf, tsc; 491 unsigned long flags; 492 493 local_irq_save(flags); 494 rdmsrl(MSR_IA32_APERF, aperf); 495 rdmsrl(MSR_IA32_MPERF, mperf); 496 tsc = rdtsc(); 497 498 if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) { 499 local_irq_restore(flags); 500 return false; 501 } 502 503 local_irq_restore(flags); 504 505 cpudata->cur.aperf = aperf; 506 cpudata->cur.mperf = mperf; 507 cpudata->cur.tsc = tsc; 508 cpudata->cur.aperf -= cpudata->prev.aperf; 509 cpudata->cur.mperf -= cpudata->prev.mperf; 510 cpudata->cur.tsc -= cpudata->prev.tsc; 511 512 cpudata->prev.aperf = aperf; 513 cpudata->prev.mperf = mperf; 514 cpudata->prev.tsc = tsc; 515 516 cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz), cpudata->cur.mperf); 517 518 return true; 519 } 520 521 static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, 522 u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags) 523 { 524 u64 prev = READ_ONCE(cpudata->cppc_req_cached); 525 u64 value = prev; 526 527 min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, 528 cpudata->max_limit_perf); 529 max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, 530 cpudata->max_limit_perf); 531 des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); 532 533 if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) { 534 min_perf = des_perf; 535 des_perf = 0; 536 } 537 538 value &= ~AMD_CPPC_MIN_PERF(~0L); 539 value |= AMD_CPPC_MIN_PERF(min_perf); 540 541 value &= ~AMD_CPPC_DES_PERF(~0L); 542 value |= AMD_CPPC_DES_PERF(des_perf); 543 544 value &= ~AMD_CPPC_MAX_PERF(~0L); 545 value |= AMD_CPPC_MAX_PERF(max_perf); 546 547 if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { 548 trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, 549 cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc, 550 cpudata->cpu, (value != prev), fast_switch); 551 } 552 553 if (value == prev) 554 return; 555 556 WRITE_ONCE(cpudata->cppc_req_cached, value); 557 558 amd_pstate_update_perf(cpudata, min_perf, des_perf, 559 max_perf, fast_switch); 560 } 561 562 static int amd_pstate_verify(struct cpufreq_policy_data *policy) 563 { 564 cpufreq_verify_within_cpu_limits(policy); 565 566 return 0; 567 } 568 569 static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) 570 { 571 u32 max_limit_perf, min_limit_perf, lowest_perf; 572 struct amd_cpudata *cpudata = policy->driver_data; 573 574 max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); 575 min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); 576 577 lowest_perf = READ_ONCE(cpudata->lowest_perf); 578 if (min_limit_perf < lowest_perf) 579 min_limit_perf = lowest_perf; 580 581 if (max_limit_perf < min_limit_perf) 582 max_limit_perf = min_limit_perf; 583 584 WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); 585 WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); 586 WRITE_ONCE(cpudata->max_limit_freq, policy->max); 587 WRITE_ONCE(cpudata->min_limit_freq, policy->min); 588 589 return 0; 590 } 591 592 static int amd_pstate_update_freq(struct cpufreq_policy *policy, 593 unsigned int target_freq, bool fast_switch) 594 { 595 struct cpufreq_freqs freqs; 596 struct amd_cpudata *cpudata = policy->driver_data; 597 unsigned long max_perf, min_perf, des_perf, cap_perf; 598 599 if (!cpudata->max_freq) 600 return -ENODEV; 601 602 if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) 603 amd_pstate_update_min_max_limit(policy); 604 605 cap_perf = READ_ONCE(cpudata->highest_perf); 606 min_perf = READ_ONCE(cpudata->lowest_perf); 607 max_perf = cap_perf; 608 609 freqs.old = policy->cur; 610 freqs.new = target_freq; 611 612 des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf, 613 cpudata->max_freq); 614 615 WARN_ON(fast_switch && !policy->fast_switch_enabled); 616 /* 617 * If fast_switch is desired, then there aren't any registered 618 * transition notifiers. See comment for 619 * cpufreq_enable_fast_switch(). 620 */ 621 if (!fast_switch) 622 cpufreq_freq_transition_begin(policy, &freqs); 623 624 amd_pstate_update(cpudata, min_perf, des_perf, 625 max_perf, fast_switch, policy->governor->flags); 626 627 if (!fast_switch) 628 cpufreq_freq_transition_end(policy, &freqs, false); 629 630 return 0; 631 } 632 633 static int amd_pstate_target(struct cpufreq_policy *policy, 634 unsigned int target_freq, 635 unsigned int relation) 636 { 637 return amd_pstate_update_freq(policy, target_freq, false); 638 } 639 640 static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy, 641 unsigned int target_freq) 642 { 643 if (!amd_pstate_update_freq(policy, target_freq, true)) 644 return target_freq; 645 return policy->cur; 646 } 647 648 static void amd_pstate_adjust_perf(unsigned int cpu, 649 unsigned long _min_perf, 650 unsigned long target_perf, 651 unsigned long capacity) 652 { 653 unsigned long max_perf, min_perf, des_perf, 654 cap_perf, lowest_nonlinear_perf, max_freq; 655 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); 656 struct amd_cpudata *cpudata = policy->driver_data; 657 unsigned int target_freq; 658 659 if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) 660 amd_pstate_update_min_max_limit(policy); 661 662 663 cap_perf = READ_ONCE(cpudata->highest_perf); 664 lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); 665 max_freq = READ_ONCE(cpudata->max_freq); 666 667 des_perf = cap_perf; 668 if (target_perf < capacity) 669 des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity); 670 671 min_perf = READ_ONCE(cpudata->lowest_perf); 672 if (_min_perf < capacity) 673 min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity); 674 675 if (min_perf < lowest_nonlinear_perf) 676 min_perf = lowest_nonlinear_perf; 677 678 max_perf = cap_perf; 679 if (max_perf < min_perf) 680 max_perf = min_perf; 681 682 des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); 683 target_freq = div_u64(des_perf * max_freq, max_perf); 684 policy->cur = target_freq; 685 686 amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true, 687 policy->governor->flags); 688 cpufreq_cpu_put(policy); 689 } 690 691 static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) 692 { 693 struct amd_cpudata *cpudata = policy->driver_data; 694 int ret; 695 696 if (!cpudata->boost_supported) { 697 pr_err("Boost mode is not supported by this processor or SBIOS\n"); 698 return -EINVAL; 699 } 700 701 if (state) 702 policy->cpuinfo.max_freq = cpudata->max_freq; 703 else 704 policy->cpuinfo.max_freq = cpudata->nominal_freq * 1000; 705 706 policy->max = policy->cpuinfo.max_freq; 707 708 ret = freq_qos_update_request(&cpudata->req[1], 709 policy->cpuinfo.max_freq); 710 if (ret < 0) 711 return ret; 712 713 return 0; 714 } 715 716 static void amd_pstate_boost_init(struct amd_cpudata *cpudata) 717 { 718 u32 highest_perf, nominal_perf; 719 720 highest_perf = READ_ONCE(cpudata->highest_perf); 721 nominal_perf = READ_ONCE(cpudata->nominal_perf); 722 723 if (highest_perf <= nominal_perf) 724 return; 725 726 cpudata->boost_supported = true; 727 current_pstate_driver->boost_enabled = true; 728 } 729 730 static void amd_perf_ctl_reset(unsigned int cpu) 731 { 732 wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); 733 } 734 735 /* 736 * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks 737 * due to locking, so queue the work for later. 738 */ 739 static void amd_pstste_sched_prefcore_workfn(struct work_struct *work) 740 { 741 sched_set_itmt_support(); 742 } 743 static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn); 744 745 /* 746 * Get the highest performance register value. 747 * @cpu: CPU from which to get highest performance. 748 * @highest_perf: Return address. 749 * 750 * Return: 0 for success, -EIO otherwise. 751 */ 752 static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf) 753 { 754 int ret; 755 756 if (boot_cpu_has(X86_FEATURE_CPPC)) { 757 u64 cap1; 758 759 ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1); 760 if (ret) 761 return ret; 762 WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1)); 763 } else { 764 u64 cppc_highest_perf; 765 766 ret = cppc_get_highest_perf(cpu, &cppc_highest_perf); 767 if (ret) 768 return ret; 769 WRITE_ONCE(*highest_perf, cppc_highest_perf); 770 } 771 772 return (ret); 773 } 774 775 #define CPPC_MAX_PERF U8_MAX 776 777 static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) 778 { 779 int ret, prio; 780 u32 highest_perf; 781 782 ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf); 783 if (ret) 784 return; 785 786 cpudata->hw_prefcore = true; 787 /* check if CPPC preferred core feature is enabled*/ 788 if (highest_perf < CPPC_MAX_PERF) 789 prio = (int)highest_perf; 790 else { 791 pr_debug("AMD CPPC preferred core is unsupported!\n"); 792 cpudata->hw_prefcore = false; 793 return; 794 } 795 796 if (!amd_pstate_prefcore) 797 return; 798 799 /* 800 * The priorities can be set regardless of whether or not 801 * sched_set_itmt_support(true) has been called and it is valid to 802 * update them at any time after it has been called. 803 */ 804 sched_set_itmt_core_prio(prio, cpudata->cpu); 805 806 schedule_work(&sched_prefcore_work); 807 } 808 809 static void amd_pstate_update_limits(unsigned int cpu) 810 { 811 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); 812 struct amd_cpudata *cpudata = policy->driver_data; 813 u32 prev_high = 0, cur_high = 0; 814 int ret; 815 bool highest_perf_changed = false; 816 817 mutex_lock(&amd_pstate_driver_lock); 818 if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore)) 819 goto free_cpufreq_put; 820 821 ret = amd_pstate_get_highest_perf(cpu, &cur_high); 822 if (ret) 823 goto free_cpufreq_put; 824 825 prev_high = READ_ONCE(cpudata->prefcore_ranking); 826 if (prev_high != cur_high) { 827 highest_perf_changed = true; 828 WRITE_ONCE(cpudata->prefcore_ranking, cur_high); 829 830 if (cur_high < CPPC_MAX_PERF) 831 sched_set_itmt_core_prio((int)cur_high, cpu); 832 } 833 834 free_cpufreq_put: 835 cpufreq_cpu_put(policy); 836 837 if (!highest_perf_changed) 838 cpufreq_update_policy(cpu); 839 840 mutex_unlock(&amd_pstate_driver_lock); 841 } 842 843 /* 844 * Get pstate transition delay time from ACPI tables that firmware set 845 * instead of using hardcode value directly. 846 */ 847 static u32 amd_pstate_get_transition_delay_us(unsigned int cpu) 848 { 849 u32 transition_delay_ns; 850 851 transition_delay_ns = cppc_get_transition_latency(cpu); 852 if (transition_delay_ns == CPUFREQ_ETERNAL) 853 return AMD_PSTATE_TRANSITION_DELAY; 854 855 return transition_delay_ns / NSEC_PER_USEC; 856 } 857 858 /* 859 * Get pstate transition latency value from ACPI tables that firmware 860 * set instead of using hardcode value directly. 861 */ 862 static u32 amd_pstate_get_transition_latency(unsigned int cpu) 863 { 864 u32 transition_latency; 865 866 transition_latency = cppc_get_transition_latency(cpu); 867 if (transition_latency == CPUFREQ_ETERNAL) 868 return AMD_PSTATE_TRANSITION_LATENCY; 869 870 return transition_latency; 871 } 872 873 /* 874 * amd_pstate_init_freq: Initialize the max_freq, min_freq, 875 * nominal_freq and lowest_nonlinear_freq for 876 * the @cpudata object. 877 * 878 * Requires: highest_perf, lowest_perf, nominal_perf and 879 * lowest_nonlinear_perf members of @cpudata to be 880 * initialized. 881 * 882 * Returns 0 on success, non-zero value on failure. 883 */ 884 static int amd_pstate_init_freq(struct amd_cpudata *cpudata) 885 { 886 int ret; 887 u32 min_freq; 888 u32 highest_perf, max_freq; 889 u32 nominal_perf, nominal_freq; 890 u32 lowest_nonlinear_perf, lowest_nonlinear_freq; 891 u32 boost_ratio, lowest_nonlinear_ratio; 892 struct cppc_perf_caps cppc_perf; 893 894 ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 895 if (ret) 896 return ret; 897 898 if (quirks && quirks->lowest_freq) 899 min_freq = quirks->lowest_freq * 1000; 900 else 901 min_freq = cppc_perf.lowest_freq * 1000; 902 903 if (quirks && quirks->nominal_freq) 904 nominal_freq = quirks->nominal_freq ; 905 else 906 nominal_freq = cppc_perf.nominal_freq; 907 908 nominal_perf = READ_ONCE(cpudata->nominal_perf); 909 910 highest_perf = READ_ONCE(cpudata->highest_perf); 911 boost_ratio = div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf); 912 max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000; 913 914 lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); 915 lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, 916 nominal_perf); 917 lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT) * 1000; 918 919 WRITE_ONCE(cpudata->min_freq, min_freq); 920 WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq); 921 WRITE_ONCE(cpudata->nominal_freq, nominal_freq); 922 WRITE_ONCE(cpudata->max_freq, max_freq); 923 924 return 0; 925 } 926 927 static int amd_pstate_cpu_init(struct cpufreq_policy *policy) 928 { 929 int min_freq, max_freq, nominal_freq, ret; 930 struct device *dev; 931 struct amd_cpudata *cpudata; 932 933 /* 934 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, 935 * which is ideal for initialization process. 936 */ 937 amd_perf_ctl_reset(policy->cpu); 938 dev = get_cpu_device(policy->cpu); 939 if (!dev) 940 return -ENODEV; 941 942 cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); 943 if (!cpudata) 944 return -ENOMEM; 945 946 cpudata->cpu = policy->cpu; 947 948 amd_pstate_init_prefcore(cpudata); 949 950 ret = amd_pstate_init_perf(cpudata); 951 if (ret) 952 goto free_cpudata1; 953 954 ret = amd_pstate_init_freq(cpudata); 955 if (ret) 956 goto free_cpudata1; 957 958 min_freq = READ_ONCE(cpudata->min_freq); 959 max_freq = READ_ONCE(cpudata->max_freq); 960 nominal_freq = READ_ONCE(cpudata->nominal_freq); 961 962 if (min_freq <= 0 || max_freq <= 0 || 963 nominal_freq <= 0 || min_freq > max_freq) { 964 dev_err(dev, 965 "min_freq(%d) or max_freq(%d) or nominal_freq (%d) value is incorrect, check _CPC in ACPI tables\n", 966 min_freq, max_freq, nominal_freq); 967 ret = -EINVAL; 968 goto free_cpudata1; 969 } 970 971 policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu); 972 policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu); 973 974 policy->min = min_freq; 975 policy->max = max_freq; 976 977 policy->cpuinfo.min_freq = min_freq; 978 policy->cpuinfo.max_freq = max_freq; 979 980 /* It will be updated by governor */ 981 policy->cur = policy->cpuinfo.min_freq; 982 983 if (boot_cpu_has(X86_FEATURE_CPPC)) 984 policy->fast_switch_possible = true; 985 986 ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0], 987 FREQ_QOS_MIN, policy->cpuinfo.min_freq); 988 if (ret < 0) { 989 dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret); 990 goto free_cpudata1; 991 } 992 993 ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1], 994 FREQ_QOS_MAX, policy->cpuinfo.max_freq); 995 if (ret < 0) { 996 dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret); 997 goto free_cpudata2; 998 } 999 1000 cpudata->max_limit_freq = max_freq; 1001 cpudata->min_limit_freq = min_freq; 1002 1003 policy->driver_data = cpudata; 1004 1005 amd_pstate_boost_init(cpudata); 1006 if (!current_pstate_driver->adjust_perf) 1007 current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; 1008 1009 return 0; 1010 1011 free_cpudata2: 1012 freq_qos_remove_request(&cpudata->req[0]); 1013 free_cpudata1: 1014 kfree(cpudata); 1015 return ret; 1016 } 1017 1018 static int amd_pstate_cpu_exit(struct cpufreq_policy *policy) 1019 { 1020 struct amd_cpudata *cpudata = policy->driver_data; 1021 1022 freq_qos_remove_request(&cpudata->req[1]); 1023 freq_qos_remove_request(&cpudata->req[0]); 1024 policy->fast_switch_possible = false; 1025 kfree(cpudata); 1026 1027 return 0; 1028 } 1029 1030 static int amd_pstate_cpu_resume(struct cpufreq_policy *policy) 1031 { 1032 int ret; 1033 1034 ret = amd_pstate_enable(true); 1035 if (ret) 1036 pr_err("failed to enable amd-pstate during resume, return %d\n", ret); 1037 1038 return ret; 1039 } 1040 1041 static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy) 1042 { 1043 int ret; 1044 1045 ret = amd_pstate_enable(false); 1046 if (ret) 1047 pr_err("failed to disable amd-pstate during suspend, return %d\n", ret); 1048 1049 return ret; 1050 } 1051 1052 /* Sysfs attributes */ 1053 1054 /* 1055 * This frequency is to indicate the maximum hardware frequency. 1056 * If boost is not active but supported, the frequency will be larger than the 1057 * one in cpuinfo. 1058 */ 1059 static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, 1060 char *buf) 1061 { 1062 int max_freq; 1063 struct amd_cpudata *cpudata = policy->driver_data; 1064 1065 max_freq = READ_ONCE(cpudata->max_freq); 1066 if (max_freq < 0) 1067 return max_freq; 1068 1069 return sysfs_emit(buf, "%u\n", max_freq); 1070 } 1071 1072 static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy, 1073 char *buf) 1074 { 1075 int freq; 1076 struct amd_cpudata *cpudata = policy->driver_data; 1077 1078 freq = READ_ONCE(cpudata->lowest_nonlinear_freq); 1079 if (freq < 0) 1080 return freq; 1081 1082 return sysfs_emit(buf, "%u\n", freq); 1083 } 1084 1085 /* 1086 * In some of ASICs, the highest_perf is not the one in the _CPC table, so we 1087 * need to expose it to sysfs. 1088 */ 1089 static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, 1090 char *buf) 1091 { 1092 u32 perf; 1093 struct amd_cpudata *cpudata = policy->driver_data; 1094 1095 perf = READ_ONCE(cpudata->highest_perf); 1096 1097 return sysfs_emit(buf, "%u\n", perf); 1098 } 1099 1100 static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy, 1101 char *buf) 1102 { 1103 u32 perf; 1104 struct amd_cpudata *cpudata = policy->driver_data; 1105 1106 perf = READ_ONCE(cpudata->prefcore_ranking); 1107 1108 return sysfs_emit(buf, "%u\n", perf); 1109 } 1110 1111 static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy, 1112 char *buf) 1113 { 1114 bool hw_prefcore; 1115 struct amd_cpudata *cpudata = policy->driver_data; 1116 1117 hw_prefcore = READ_ONCE(cpudata->hw_prefcore); 1118 1119 return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore)); 1120 } 1121 1122 static ssize_t show_energy_performance_available_preferences( 1123 struct cpufreq_policy *policy, char *buf) 1124 { 1125 int i = 0; 1126 int offset = 0; 1127 struct amd_cpudata *cpudata = policy->driver_data; 1128 1129 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) 1130 return sysfs_emit_at(buf, offset, "%s\n", 1131 energy_perf_strings[EPP_INDEX_PERFORMANCE]); 1132 1133 while (energy_perf_strings[i] != NULL) 1134 offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]); 1135 1136 offset += sysfs_emit_at(buf, offset, "\n"); 1137 1138 return offset; 1139 } 1140 1141 static ssize_t store_energy_performance_preference( 1142 struct cpufreq_policy *policy, const char *buf, size_t count) 1143 { 1144 struct amd_cpudata *cpudata = policy->driver_data; 1145 char str_preference[21]; 1146 ssize_t ret; 1147 1148 ret = sscanf(buf, "%20s", str_preference); 1149 if (ret != 1) 1150 return -EINVAL; 1151 1152 ret = match_string(energy_perf_strings, -1, str_preference); 1153 if (ret < 0) 1154 return -EINVAL; 1155 1156 mutex_lock(&amd_pstate_limits_lock); 1157 ret = amd_pstate_set_energy_pref_index(cpudata, ret); 1158 mutex_unlock(&amd_pstate_limits_lock); 1159 1160 return ret ?: count; 1161 } 1162 1163 static ssize_t show_energy_performance_preference( 1164 struct cpufreq_policy *policy, char *buf) 1165 { 1166 struct amd_cpudata *cpudata = policy->driver_data; 1167 int preference; 1168 1169 preference = amd_pstate_get_energy_pref_index(cpudata); 1170 if (preference < 0) 1171 return preference; 1172 1173 return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]); 1174 } 1175 1176 static void amd_pstate_driver_cleanup(void) 1177 { 1178 amd_pstate_enable(false); 1179 cppc_state = AMD_PSTATE_DISABLE; 1180 current_pstate_driver = NULL; 1181 } 1182 1183 static int amd_pstate_register_driver(int mode) 1184 { 1185 int ret; 1186 1187 if (mode == AMD_PSTATE_PASSIVE || mode == AMD_PSTATE_GUIDED) 1188 current_pstate_driver = &amd_pstate_driver; 1189 else if (mode == AMD_PSTATE_ACTIVE) 1190 current_pstate_driver = &amd_pstate_epp_driver; 1191 else 1192 return -EINVAL; 1193 1194 cppc_state = mode; 1195 ret = cpufreq_register_driver(current_pstate_driver); 1196 if (ret) { 1197 amd_pstate_driver_cleanup(); 1198 return ret; 1199 } 1200 return 0; 1201 } 1202 1203 static int amd_pstate_unregister_driver(int dummy) 1204 { 1205 cpufreq_unregister_driver(current_pstate_driver); 1206 amd_pstate_driver_cleanup(); 1207 return 0; 1208 } 1209 1210 static int amd_pstate_change_mode_without_dvr_change(int mode) 1211 { 1212 int cpu = 0; 1213 1214 cppc_state = mode; 1215 1216 if (boot_cpu_has(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE) 1217 return 0; 1218 1219 for_each_present_cpu(cpu) { 1220 cppc_set_auto_sel(cpu, (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1); 1221 } 1222 1223 return 0; 1224 } 1225 1226 static int amd_pstate_change_driver_mode(int mode) 1227 { 1228 int ret; 1229 1230 ret = amd_pstate_unregister_driver(0); 1231 if (ret) 1232 return ret; 1233 1234 ret = amd_pstate_register_driver(mode); 1235 if (ret) 1236 return ret; 1237 1238 return 0; 1239 } 1240 1241 static cppc_mode_transition_fn mode_state_machine[AMD_PSTATE_MAX][AMD_PSTATE_MAX] = { 1242 [AMD_PSTATE_DISABLE] = { 1243 [AMD_PSTATE_DISABLE] = NULL, 1244 [AMD_PSTATE_PASSIVE] = amd_pstate_register_driver, 1245 [AMD_PSTATE_ACTIVE] = amd_pstate_register_driver, 1246 [AMD_PSTATE_GUIDED] = amd_pstate_register_driver, 1247 }, 1248 [AMD_PSTATE_PASSIVE] = { 1249 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, 1250 [AMD_PSTATE_PASSIVE] = NULL, 1251 [AMD_PSTATE_ACTIVE] = amd_pstate_change_driver_mode, 1252 [AMD_PSTATE_GUIDED] = amd_pstate_change_mode_without_dvr_change, 1253 }, 1254 [AMD_PSTATE_ACTIVE] = { 1255 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, 1256 [AMD_PSTATE_PASSIVE] = amd_pstate_change_driver_mode, 1257 [AMD_PSTATE_ACTIVE] = NULL, 1258 [AMD_PSTATE_GUIDED] = amd_pstate_change_driver_mode, 1259 }, 1260 [AMD_PSTATE_GUIDED] = { 1261 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, 1262 [AMD_PSTATE_PASSIVE] = amd_pstate_change_mode_without_dvr_change, 1263 [AMD_PSTATE_ACTIVE] = amd_pstate_change_driver_mode, 1264 [AMD_PSTATE_GUIDED] = NULL, 1265 }, 1266 }; 1267 1268 static ssize_t amd_pstate_show_status(char *buf) 1269 { 1270 if (!current_pstate_driver) 1271 return sysfs_emit(buf, "disable\n"); 1272 1273 return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]); 1274 } 1275 1276 static int amd_pstate_update_status(const char *buf, size_t size) 1277 { 1278 int mode_idx; 1279 1280 if (size > strlen("passive") || size < strlen("active")) 1281 return -EINVAL; 1282 1283 mode_idx = get_mode_idx_from_str(buf, size); 1284 1285 if (mode_idx < 0 || mode_idx >= AMD_PSTATE_MAX) 1286 return -EINVAL; 1287 1288 if (mode_state_machine[cppc_state][mode_idx]) 1289 return mode_state_machine[cppc_state][mode_idx](mode_idx); 1290 1291 return 0; 1292 } 1293 1294 static ssize_t status_show(struct device *dev, 1295 struct device_attribute *attr, char *buf) 1296 { 1297 ssize_t ret; 1298 1299 mutex_lock(&amd_pstate_driver_lock); 1300 ret = amd_pstate_show_status(buf); 1301 mutex_unlock(&amd_pstate_driver_lock); 1302 1303 return ret; 1304 } 1305 1306 static ssize_t status_store(struct device *a, struct device_attribute *b, 1307 const char *buf, size_t count) 1308 { 1309 char *p = memchr(buf, '\n', count); 1310 int ret; 1311 1312 mutex_lock(&amd_pstate_driver_lock); 1313 ret = amd_pstate_update_status(buf, p ? p - buf : count); 1314 mutex_unlock(&amd_pstate_driver_lock); 1315 1316 return ret < 0 ? ret : count; 1317 } 1318 1319 static ssize_t prefcore_show(struct device *dev, 1320 struct device_attribute *attr, char *buf) 1321 { 1322 return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore)); 1323 } 1324 1325 cpufreq_freq_attr_ro(amd_pstate_max_freq); 1326 cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); 1327 1328 cpufreq_freq_attr_ro(amd_pstate_highest_perf); 1329 cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking); 1330 cpufreq_freq_attr_ro(amd_pstate_hw_prefcore); 1331 cpufreq_freq_attr_rw(energy_performance_preference); 1332 cpufreq_freq_attr_ro(energy_performance_available_preferences); 1333 static DEVICE_ATTR_RW(status); 1334 static DEVICE_ATTR_RO(prefcore); 1335 1336 static struct freq_attr *amd_pstate_attr[] = { 1337 &amd_pstate_max_freq, 1338 &amd_pstate_lowest_nonlinear_freq, 1339 &amd_pstate_highest_perf, 1340 &amd_pstate_prefcore_ranking, 1341 &amd_pstate_hw_prefcore, 1342 NULL, 1343 }; 1344 1345 static struct freq_attr *amd_pstate_epp_attr[] = { 1346 &amd_pstate_max_freq, 1347 &amd_pstate_lowest_nonlinear_freq, 1348 &amd_pstate_highest_perf, 1349 &amd_pstate_prefcore_ranking, 1350 &amd_pstate_hw_prefcore, 1351 &energy_performance_preference, 1352 &energy_performance_available_preferences, 1353 NULL, 1354 }; 1355 1356 static struct attribute *pstate_global_attributes[] = { 1357 &dev_attr_status.attr, 1358 &dev_attr_prefcore.attr, 1359 NULL 1360 }; 1361 1362 static const struct attribute_group amd_pstate_global_attr_group = { 1363 .name = "amd_pstate", 1364 .attrs = pstate_global_attributes, 1365 }; 1366 1367 static bool amd_pstate_acpi_pm_profile_server(void) 1368 { 1369 switch (acpi_gbl_FADT.preferred_profile) { 1370 case PM_ENTERPRISE_SERVER: 1371 case PM_SOHO_SERVER: 1372 case PM_PERFORMANCE_SERVER: 1373 return true; 1374 } 1375 return false; 1376 } 1377 1378 static bool amd_pstate_acpi_pm_profile_undefined(void) 1379 { 1380 if (acpi_gbl_FADT.preferred_profile == PM_UNSPECIFIED) 1381 return true; 1382 if (acpi_gbl_FADT.preferred_profile >= NR_PM_PROFILES) 1383 return true; 1384 return false; 1385 } 1386 1387 static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) 1388 { 1389 int min_freq, max_freq, nominal_freq, ret; 1390 struct amd_cpudata *cpudata; 1391 struct device *dev; 1392 u64 value; 1393 1394 /* 1395 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, 1396 * which is ideal for initialization process. 1397 */ 1398 amd_perf_ctl_reset(policy->cpu); 1399 dev = get_cpu_device(policy->cpu); 1400 if (!dev) 1401 return -ENODEV; 1402 1403 cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); 1404 if (!cpudata) 1405 return -ENOMEM; 1406 1407 cpudata->cpu = policy->cpu; 1408 cpudata->epp_policy = 0; 1409 1410 amd_pstate_init_prefcore(cpudata); 1411 1412 ret = amd_pstate_init_perf(cpudata); 1413 if (ret) 1414 goto free_cpudata1; 1415 1416 ret = amd_pstate_init_freq(cpudata); 1417 if (ret) 1418 goto free_cpudata1; 1419 1420 min_freq = READ_ONCE(cpudata->min_freq); 1421 max_freq = READ_ONCE(cpudata->max_freq); 1422 nominal_freq = READ_ONCE(cpudata->nominal_freq); 1423 if (min_freq <= 0 || max_freq <= 0 || 1424 nominal_freq <= 0 || min_freq > max_freq) { 1425 dev_err(dev, 1426 "min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect, check _CPC in ACPI tables\n", 1427 min_freq, max_freq, nominal_freq); 1428 ret = -EINVAL; 1429 goto free_cpudata1; 1430 } 1431 1432 policy->cpuinfo.min_freq = min_freq; 1433 policy->cpuinfo.max_freq = max_freq; 1434 /* It will be updated by governor */ 1435 policy->cur = policy->cpuinfo.min_freq; 1436 1437 policy->driver_data = cpudata; 1438 1439 cpudata->epp_cached = amd_pstate_get_epp(cpudata, 0); 1440 1441 policy->min = policy->cpuinfo.min_freq; 1442 policy->max = policy->cpuinfo.max_freq; 1443 1444 /* 1445 * Set the policy to provide a valid fallback value in case 1446 * the default cpufreq governor is neither powersave nor performance. 1447 */ 1448 if (amd_pstate_acpi_pm_profile_server() || 1449 amd_pstate_acpi_pm_profile_undefined()) 1450 policy->policy = CPUFREQ_POLICY_PERFORMANCE; 1451 else 1452 policy->policy = CPUFREQ_POLICY_POWERSAVE; 1453 1454 if (boot_cpu_has(X86_FEATURE_CPPC)) { 1455 ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); 1456 if (ret) 1457 return ret; 1458 WRITE_ONCE(cpudata->cppc_req_cached, value); 1459 1460 ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value); 1461 if (ret) 1462 return ret; 1463 WRITE_ONCE(cpudata->cppc_cap1_cached, value); 1464 } 1465 amd_pstate_boost_init(cpudata); 1466 1467 return 0; 1468 1469 free_cpudata1: 1470 kfree(cpudata); 1471 return ret; 1472 } 1473 1474 static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) 1475 { 1476 struct amd_cpudata *cpudata = policy->driver_data; 1477 1478 if (cpudata) { 1479 kfree(cpudata); 1480 policy->driver_data = NULL; 1481 } 1482 1483 pr_debug("CPU %d exiting\n", policy->cpu); 1484 return 0; 1485 } 1486 1487 static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) 1488 { 1489 struct amd_cpudata *cpudata = policy->driver_data; 1490 u32 max_perf, min_perf, min_limit_perf, max_limit_perf; 1491 u64 value; 1492 s16 epp; 1493 1494 max_perf = READ_ONCE(cpudata->highest_perf); 1495 min_perf = READ_ONCE(cpudata->lowest_perf); 1496 max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); 1497 min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); 1498 1499 if (min_limit_perf < min_perf) 1500 min_limit_perf = min_perf; 1501 1502 if (max_limit_perf < min_limit_perf) 1503 max_limit_perf = min_limit_perf; 1504 1505 WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); 1506 WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); 1507 1508 max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, 1509 cpudata->max_limit_perf); 1510 min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, 1511 cpudata->max_limit_perf); 1512 value = READ_ONCE(cpudata->cppc_req_cached); 1513 1514 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) 1515 min_perf = max_perf; 1516 1517 /* Initial min/max values for CPPC Performance Controls Register */ 1518 value &= ~AMD_CPPC_MIN_PERF(~0L); 1519 value |= AMD_CPPC_MIN_PERF(min_perf); 1520 1521 value &= ~AMD_CPPC_MAX_PERF(~0L); 1522 value |= AMD_CPPC_MAX_PERF(max_perf); 1523 1524 /* CPPC EPP feature require to set zero to the desire perf bit */ 1525 value &= ~AMD_CPPC_DES_PERF(~0L); 1526 value |= AMD_CPPC_DES_PERF(0); 1527 1528 cpudata->epp_policy = cpudata->policy; 1529 1530 /* Get BIOS pre-defined epp value */ 1531 epp = amd_pstate_get_epp(cpudata, value); 1532 if (epp < 0) { 1533 /** 1534 * This return value can only be negative for shared_memory 1535 * systems where EPP register read/write not supported. 1536 */ 1537 return; 1538 } 1539 1540 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) 1541 epp = 0; 1542 1543 /* Set initial EPP value */ 1544 if (boot_cpu_has(X86_FEATURE_CPPC)) { 1545 value &= ~GENMASK_ULL(31, 24); 1546 value |= (u64)epp << 24; 1547 } 1548 1549 WRITE_ONCE(cpudata->cppc_req_cached, value); 1550 amd_pstate_set_epp(cpudata, epp); 1551 } 1552 1553 static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) 1554 { 1555 struct amd_cpudata *cpudata = policy->driver_data; 1556 1557 if (!policy->cpuinfo.max_freq) 1558 return -ENODEV; 1559 1560 pr_debug("set_policy: cpuinfo.max %u policy->max %u\n", 1561 policy->cpuinfo.max_freq, policy->max); 1562 1563 cpudata->policy = policy->policy; 1564 1565 amd_pstate_epp_update_limit(policy); 1566 1567 return 0; 1568 } 1569 1570 static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) 1571 { 1572 struct cppc_perf_ctrls perf_ctrls; 1573 u64 value, max_perf; 1574 int ret; 1575 1576 ret = amd_pstate_enable(true); 1577 if (ret) 1578 pr_err("failed to enable amd pstate during resume, return %d\n", ret); 1579 1580 value = READ_ONCE(cpudata->cppc_req_cached); 1581 max_perf = READ_ONCE(cpudata->highest_perf); 1582 1583 if (boot_cpu_has(X86_FEATURE_CPPC)) { 1584 wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); 1585 } else { 1586 perf_ctrls.max_perf = max_perf; 1587 perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached); 1588 cppc_set_perf(cpudata->cpu, &perf_ctrls); 1589 } 1590 } 1591 1592 static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) 1593 { 1594 struct amd_cpudata *cpudata = policy->driver_data; 1595 1596 pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); 1597 1598 if (cppc_state == AMD_PSTATE_ACTIVE) { 1599 amd_pstate_epp_reenable(cpudata); 1600 cpudata->suspended = false; 1601 } 1602 1603 return 0; 1604 } 1605 1606 static void amd_pstate_epp_offline(struct cpufreq_policy *policy) 1607 { 1608 struct amd_cpudata *cpudata = policy->driver_data; 1609 struct cppc_perf_ctrls perf_ctrls; 1610 int min_perf; 1611 u64 value; 1612 1613 min_perf = READ_ONCE(cpudata->lowest_perf); 1614 value = READ_ONCE(cpudata->cppc_req_cached); 1615 1616 mutex_lock(&amd_pstate_limits_lock); 1617 if (boot_cpu_has(X86_FEATURE_CPPC)) { 1618 cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN; 1619 1620 /* Set max perf same as min perf */ 1621 value &= ~AMD_CPPC_MAX_PERF(~0L); 1622 value |= AMD_CPPC_MAX_PERF(min_perf); 1623 value &= ~AMD_CPPC_MIN_PERF(~0L); 1624 value |= AMD_CPPC_MIN_PERF(min_perf); 1625 wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); 1626 } else { 1627 perf_ctrls.desired_perf = 0; 1628 perf_ctrls.max_perf = min_perf; 1629 perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE); 1630 cppc_set_perf(cpudata->cpu, &perf_ctrls); 1631 } 1632 mutex_unlock(&amd_pstate_limits_lock); 1633 } 1634 1635 static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) 1636 { 1637 struct amd_cpudata *cpudata = policy->driver_data; 1638 1639 pr_debug("AMD CPU Core %d going offline\n", cpudata->cpu); 1640 1641 if (cpudata->suspended) 1642 return 0; 1643 1644 if (cppc_state == AMD_PSTATE_ACTIVE) 1645 amd_pstate_epp_offline(policy); 1646 1647 return 0; 1648 } 1649 1650 static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy) 1651 { 1652 cpufreq_verify_within_cpu_limits(policy); 1653 pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min); 1654 return 0; 1655 } 1656 1657 static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) 1658 { 1659 struct amd_cpudata *cpudata = policy->driver_data; 1660 int ret; 1661 1662 /* avoid suspending when EPP is not enabled */ 1663 if (cppc_state != AMD_PSTATE_ACTIVE) 1664 return 0; 1665 1666 /* set this flag to avoid setting core offline*/ 1667 cpudata->suspended = true; 1668 1669 /* disable CPPC in lowlevel firmware */ 1670 ret = amd_pstate_enable(false); 1671 if (ret) 1672 pr_err("failed to suspend, return %d\n", ret); 1673 1674 return 0; 1675 } 1676 1677 static int amd_pstate_epp_resume(struct cpufreq_policy *policy) 1678 { 1679 struct amd_cpudata *cpudata = policy->driver_data; 1680 1681 if (cpudata->suspended) { 1682 mutex_lock(&amd_pstate_limits_lock); 1683 1684 /* enable amd pstate from suspend state*/ 1685 amd_pstate_epp_reenable(cpudata); 1686 1687 mutex_unlock(&amd_pstate_limits_lock); 1688 1689 cpudata->suspended = false; 1690 } 1691 1692 return 0; 1693 } 1694 1695 static struct cpufreq_driver amd_pstate_driver = { 1696 .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS, 1697 .verify = amd_pstate_verify, 1698 .target = amd_pstate_target, 1699 .fast_switch = amd_pstate_fast_switch, 1700 .init = amd_pstate_cpu_init, 1701 .exit = amd_pstate_cpu_exit, 1702 .suspend = amd_pstate_cpu_suspend, 1703 .resume = amd_pstate_cpu_resume, 1704 .set_boost = amd_pstate_set_boost, 1705 .update_limits = amd_pstate_update_limits, 1706 .name = "amd-pstate", 1707 .attr = amd_pstate_attr, 1708 }; 1709 1710 static struct cpufreq_driver amd_pstate_epp_driver = { 1711 .flags = CPUFREQ_CONST_LOOPS, 1712 .verify = amd_pstate_epp_verify_policy, 1713 .setpolicy = amd_pstate_epp_set_policy, 1714 .init = amd_pstate_epp_cpu_init, 1715 .exit = amd_pstate_epp_cpu_exit, 1716 .offline = amd_pstate_epp_cpu_offline, 1717 .online = amd_pstate_epp_cpu_online, 1718 .suspend = amd_pstate_epp_suspend, 1719 .resume = amd_pstate_epp_resume, 1720 .update_limits = amd_pstate_update_limits, 1721 .name = "amd-pstate-epp", 1722 .attr = amd_pstate_epp_attr, 1723 }; 1724 1725 static int __init amd_pstate_set_driver(int mode_idx) 1726 { 1727 if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) { 1728 cppc_state = mode_idx; 1729 if (cppc_state == AMD_PSTATE_DISABLE) 1730 pr_info("driver is explicitly disabled\n"); 1731 1732 if (cppc_state == AMD_PSTATE_ACTIVE) 1733 current_pstate_driver = &amd_pstate_epp_driver; 1734 1735 if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED) 1736 current_pstate_driver = &amd_pstate_driver; 1737 1738 return 0; 1739 } 1740 1741 return -EINVAL; 1742 } 1743 1744 static int __init amd_pstate_init(void) 1745 { 1746 struct device *dev_root; 1747 int ret; 1748 1749 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) 1750 return -ENODEV; 1751 1752 if (!acpi_cpc_valid()) { 1753 pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n"); 1754 return -ENODEV; 1755 } 1756 1757 /* don't keep reloading if cpufreq_driver exists */ 1758 if (cpufreq_get_current_driver()) 1759 return -EEXIST; 1760 1761 quirks = NULL; 1762 1763 /* check if this machine need CPPC quirks */ 1764 dmi_check_system(amd_pstate_quirks_table); 1765 1766 switch (cppc_state) { 1767 case AMD_PSTATE_UNDEFINED: 1768 /* Disable on the following configs by default: 1769 * 1. Undefined platforms 1770 * 2. Server platforms 1771 * 3. Shared memory designs 1772 */ 1773 if (amd_pstate_acpi_pm_profile_undefined() || 1774 amd_pstate_acpi_pm_profile_server() || 1775 !boot_cpu_has(X86_FEATURE_CPPC)) { 1776 pr_info("driver load is disabled, boot with specific mode to enable this\n"); 1777 return -ENODEV; 1778 } 1779 ret = amd_pstate_set_driver(CONFIG_X86_AMD_PSTATE_DEFAULT_MODE); 1780 if (ret) 1781 return ret; 1782 break; 1783 case AMD_PSTATE_DISABLE: 1784 return -ENODEV; 1785 case AMD_PSTATE_PASSIVE: 1786 case AMD_PSTATE_ACTIVE: 1787 case AMD_PSTATE_GUIDED: 1788 break; 1789 default: 1790 return -EINVAL; 1791 } 1792 1793 /* capability check */ 1794 if (boot_cpu_has(X86_FEATURE_CPPC)) { 1795 pr_debug("AMD CPPC MSR based functionality is supported\n"); 1796 if (cppc_state != AMD_PSTATE_ACTIVE) 1797 current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; 1798 } else { 1799 pr_debug("AMD CPPC shared memory based functionality is supported\n"); 1800 static_call_update(amd_pstate_enable, cppc_enable); 1801 static_call_update(amd_pstate_init_perf, cppc_init_perf); 1802 static_call_update(amd_pstate_update_perf, cppc_update_perf); 1803 } 1804 1805 /* enable amd pstate feature */ 1806 ret = amd_pstate_enable(true); 1807 if (ret) { 1808 pr_err("failed to enable with return %d\n", ret); 1809 return ret; 1810 } 1811 1812 ret = cpufreq_register_driver(current_pstate_driver); 1813 if (ret) 1814 pr_err("failed to register with return %d\n", ret); 1815 1816 dev_root = bus_get_dev_root(&cpu_subsys); 1817 if (dev_root) { 1818 ret = sysfs_create_group(&dev_root->kobj, &amd_pstate_global_attr_group); 1819 put_device(dev_root); 1820 if (ret) { 1821 pr_err("sysfs attribute export failed with error %d.\n", ret); 1822 goto global_attr_free; 1823 } 1824 } 1825 1826 return ret; 1827 1828 global_attr_free: 1829 cpufreq_unregister_driver(current_pstate_driver); 1830 return ret; 1831 } 1832 device_initcall(amd_pstate_init); 1833 1834 static int __init amd_pstate_param(char *str) 1835 { 1836 size_t size; 1837 int mode_idx; 1838 1839 if (!str) 1840 return -EINVAL; 1841 1842 size = strlen(str); 1843 mode_idx = get_mode_idx_from_str(str, size); 1844 1845 return amd_pstate_set_driver(mode_idx); 1846 } 1847 1848 static int __init amd_prefcore_param(char *str) 1849 { 1850 if (!strcmp(str, "disable")) 1851 amd_pstate_prefcore = false; 1852 1853 return 0; 1854 } 1855 1856 early_param("amd_pstate", amd_pstate_param); 1857 early_param("amd_prefcore", amd_prefcore_param); 1858 1859 MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>"); 1860 MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); 1861