1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * amd-pstate.c - AMD Processor P-state Frequency Driver 4 * 5 * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved. 6 * 7 * Author: Huang Rui <ray.huang@amd.com> 8 * 9 * AMD P-State introduces a new CPU performance scaling design for AMD 10 * processors using the ACPI Collaborative Performance and Power Control (CPPC) 11 * feature which works with the AMD SMU firmware providing a finer grained 12 * frequency control range. It is to replace the legacy ACPI P-States control, 13 * allows a flexible, low-latency interface for the Linux kernel to directly 14 * communicate the performance hints to hardware. 15 * 16 * AMD P-State is supported on recent AMD Zen base CPU series include some of 17 * Zen2 and Zen3 processors. _CPC needs to be present in the ACPI tables of AMD 18 * P-State supported system. And there are two types of hardware implementations 19 * for AMD P-State: 1) Full MSR Solution and 2) Shared Memory Solution. 20 * X86_FEATURE_CPPC CPU feature flag is used to distinguish the different types. 21 */ 22 23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 24 25 #include <linux/bitfield.h> 26 #include <linux/kernel.h> 27 #include <linux/module.h> 28 #include <linux/init.h> 29 #include <linux/smp.h> 30 #include <linux/sched.h> 31 #include <linux/cpufreq.h> 32 #include <linux/compiler.h> 33 #include <linux/dmi.h> 34 #include <linux/slab.h> 35 #include <linux/acpi.h> 36 #include <linux/io.h> 37 #include <linux/delay.h> 38 #include <linux/uaccess.h> 39 #include <linux/static_call.h> 40 #include <linux/topology.h> 41 42 #include <acpi/processor.h> 43 #include <acpi/cppc_acpi.h> 44 45 #include <asm/msr.h> 46 #include <asm/processor.h> 47 #include <asm/cpufeature.h> 48 #include <asm/cpu_device_id.h> 49 50 #include "amd-pstate.h" 51 #include "amd-pstate-trace.h" 52 53 #define AMD_PSTATE_TRANSITION_LATENCY 20000 54 #define AMD_PSTATE_TRANSITION_DELAY 1000 55 #define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600 56 57 #define AMD_CPPC_EPP_PERFORMANCE 0x00 58 #define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80 59 #define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF 60 #define AMD_CPPC_EPP_POWERSAVE 0xFF 61 62 static const char * const amd_pstate_mode_string[] = { 63 [AMD_PSTATE_UNDEFINED] = "undefined", 64 [AMD_PSTATE_DISABLE] = "disable", 65 [AMD_PSTATE_PASSIVE] = "passive", 66 [AMD_PSTATE_ACTIVE] = "active", 67 [AMD_PSTATE_GUIDED] = "guided", 68 NULL, 69 }; 70 71 const char *amd_pstate_get_mode_string(enum amd_pstate_mode mode) 72 { 73 if (mode < 0 || mode >= AMD_PSTATE_MAX) 74 return NULL; 75 return amd_pstate_mode_string[mode]; 76 } 77 EXPORT_SYMBOL_GPL(amd_pstate_get_mode_string); 78 79 struct quirk_entry { 80 u32 nominal_freq; 81 u32 lowest_freq; 82 }; 83 84 static struct cpufreq_driver *current_pstate_driver; 85 static struct cpufreq_driver amd_pstate_driver; 86 static struct cpufreq_driver amd_pstate_epp_driver; 87 static int cppc_state = AMD_PSTATE_UNDEFINED; 88 static bool cppc_enabled; 89 static bool amd_pstate_prefcore = true; 90 static struct quirk_entry *quirks; 91 92 #define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0) 93 #define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8) 94 #define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16) 95 #define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24) 96 97 /* 98 * AMD Energy Preference Performance (EPP) 99 * The EPP is used in the CCLK DPM controller to drive 100 * the frequency that a core is going to operate during 101 * short periods of activity. EPP values will be utilized for 102 * different OS profiles (balanced, performance, power savings) 103 * display strings corresponding to EPP index in the 104 * energy_perf_strings[] 105 * index String 106 *------------------------------------- 107 * 0 default 108 * 1 performance 109 * 2 balance_performance 110 * 3 balance_power 111 * 4 power 112 */ 113 enum energy_perf_value_index { 114 EPP_INDEX_DEFAULT = 0, 115 EPP_INDEX_PERFORMANCE, 116 EPP_INDEX_BALANCE_PERFORMANCE, 117 EPP_INDEX_BALANCE_POWERSAVE, 118 EPP_INDEX_POWERSAVE, 119 }; 120 121 static const char * const energy_perf_strings[] = { 122 [EPP_INDEX_DEFAULT] = "default", 123 [EPP_INDEX_PERFORMANCE] = "performance", 124 [EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance", 125 [EPP_INDEX_BALANCE_POWERSAVE] = "balance_power", 126 [EPP_INDEX_POWERSAVE] = "power", 127 NULL 128 }; 129 130 static unsigned int epp_values[] = { 131 [EPP_INDEX_DEFAULT] = 0, 132 [EPP_INDEX_PERFORMANCE] = AMD_CPPC_EPP_PERFORMANCE, 133 [EPP_INDEX_BALANCE_PERFORMANCE] = AMD_CPPC_EPP_BALANCE_PERFORMANCE, 134 [EPP_INDEX_BALANCE_POWERSAVE] = AMD_CPPC_EPP_BALANCE_POWERSAVE, 135 [EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE, 136 }; 137 138 typedef int (*cppc_mode_transition_fn)(int); 139 140 static struct quirk_entry quirk_amd_7k62 = { 141 .nominal_freq = 2600, 142 .lowest_freq = 550, 143 }; 144 145 static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi) 146 { 147 /** 148 * match the broken bios for family 17h processor support CPPC V2 149 * broken BIOS lack of nominal_freq and lowest_freq capabilities 150 * definition in ACPI tables 151 */ 152 if (cpu_feature_enabled(X86_FEATURE_ZEN2)) { 153 quirks = dmi->driver_data; 154 pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident); 155 return 1; 156 } 157 158 return 0; 159 } 160 161 static const struct dmi_system_id amd_pstate_quirks_table[] __initconst = { 162 { 163 .callback = dmi_matched_7k62_bios_bug, 164 .ident = "AMD EPYC 7K62", 165 .matches = { 166 DMI_MATCH(DMI_BIOS_VERSION, "5.14"), 167 DMI_MATCH(DMI_BIOS_RELEASE, "12/12/2019"), 168 }, 169 .driver_data = &quirk_amd_7k62, 170 }, 171 {} 172 }; 173 MODULE_DEVICE_TABLE(dmi, amd_pstate_quirks_table); 174 175 static inline int get_mode_idx_from_str(const char *str, size_t size) 176 { 177 int i; 178 179 for (i=0; i < AMD_PSTATE_MAX; i++) { 180 if (!strncmp(str, amd_pstate_mode_string[i], size)) 181 return i; 182 } 183 return -EINVAL; 184 } 185 186 static DEFINE_MUTEX(amd_pstate_limits_lock); 187 static DEFINE_MUTEX(amd_pstate_driver_lock); 188 189 static s16 msr_get_epp(struct amd_cpudata *cpudata) 190 { 191 u64 value; 192 int ret; 193 194 ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); 195 if (ret < 0) { 196 pr_debug("Could not retrieve energy perf value (%d)\n", ret); 197 return ret; 198 } 199 200 return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, value); 201 } 202 203 DEFINE_STATIC_CALL(amd_pstate_get_epp, msr_get_epp); 204 205 static inline s16 amd_pstate_get_epp(struct amd_cpudata *cpudata) 206 { 207 return static_call(amd_pstate_get_epp)(cpudata); 208 } 209 210 static s16 shmem_get_epp(struct amd_cpudata *cpudata) 211 { 212 u64 epp; 213 int ret; 214 215 ret = cppc_get_epp_perf(cpudata->cpu, &epp); 216 if (ret < 0) { 217 pr_debug("Could not retrieve energy perf value (%d)\n", ret); 218 return ret; 219 } 220 221 return (s16)(epp & 0xff); 222 } 223 224 static int msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, 225 u32 des_perf, u32 max_perf, u32 epp, bool fast_switch) 226 { 227 u64 value, prev; 228 229 value = prev = READ_ONCE(cpudata->cppc_req_cached); 230 231 value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | 232 AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); 233 value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); 234 value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); 235 value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); 236 value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); 237 238 if (value == prev) 239 return 0; 240 241 if (fast_switch) { 242 wrmsrl(MSR_AMD_CPPC_REQ, value); 243 return 0; 244 } else { 245 int ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); 246 247 if (ret) 248 return ret; 249 } 250 251 WRITE_ONCE(cpudata->cppc_req_cached, value); 252 WRITE_ONCE(cpudata->epp_cached, epp); 253 254 return 0; 255 } 256 257 DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf); 258 259 static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata, 260 u32 min_perf, u32 des_perf, 261 u32 max_perf, u32 epp, 262 bool fast_switch) 263 { 264 return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, 265 max_perf, epp, fast_switch); 266 } 267 268 static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) 269 { 270 u64 value, prev; 271 int ret; 272 273 value = prev = READ_ONCE(cpudata->cppc_req_cached); 274 value &= ~AMD_CPPC_EPP_PERF_MASK; 275 value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); 276 277 if (value == prev) 278 return 0; 279 280 ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); 281 if (ret) { 282 pr_err("failed to set energy perf value (%d)\n", ret); 283 return ret; 284 } 285 286 /* update both so that msr_update_perf() can effectively check */ 287 WRITE_ONCE(cpudata->epp_cached, epp); 288 WRITE_ONCE(cpudata->cppc_req_cached, value); 289 290 return ret; 291 } 292 293 DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp); 294 295 static inline int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) 296 { 297 return static_call(amd_pstate_set_epp)(cpudata, epp); 298 } 299 300 static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp) 301 { 302 int ret; 303 struct cppc_perf_ctrls perf_ctrls; 304 305 if (epp == cpudata->epp_cached) 306 return 0; 307 308 perf_ctrls.energy_perf = epp; 309 ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); 310 if (ret) { 311 pr_debug("failed to set energy perf value (%d)\n", ret); 312 return ret; 313 } 314 WRITE_ONCE(cpudata->epp_cached, epp); 315 316 return ret; 317 } 318 319 static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy, 320 int pref_index) 321 { 322 struct amd_cpudata *cpudata = policy->driver_data; 323 int epp; 324 325 if (!pref_index) 326 epp = cpudata->epp_default; 327 else 328 epp = epp_values[pref_index]; 329 330 if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { 331 pr_debug("EPP cannot be set under performance policy\n"); 332 return -EBUSY; 333 } 334 335 if (trace_amd_pstate_epp_perf_enabled()) { 336 trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, 337 epp, 338 FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), 339 FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), 340 policy->boost_enabled); 341 } 342 343 return amd_pstate_set_epp(cpudata, epp); 344 } 345 346 static inline int msr_cppc_enable(bool enable) 347 { 348 int ret, cpu; 349 unsigned long logical_proc_id_mask = 0; 350 351 /* 352 * MSR_AMD_CPPC_ENABLE is write-once, once set it cannot be cleared. 353 */ 354 if (!enable) 355 return 0; 356 357 if (enable == cppc_enabled) 358 return 0; 359 360 for_each_present_cpu(cpu) { 361 unsigned long logical_id = topology_logical_package_id(cpu); 362 363 if (test_bit(logical_id, &logical_proc_id_mask)) 364 continue; 365 366 set_bit(logical_id, &logical_proc_id_mask); 367 368 ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE, 369 enable); 370 if (ret) 371 return ret; 372 } 373 374 cppc_enabled = enable; 375 return 0; 376 } 377 378 static int shmem_cppc_enable(bool enable) 379 { 380 int cpu, ret = 0; 381 struct cppc_perf_ctrls perf_ctrls; 382 383 if (enable == cppc_enabled) 384 return 0; 385 386 for_each_present_cpu(cpu) { 387 ret = cppc_set_enable(cpu, enable); 388 if (ret) 389 return ret; 390 391 /* Enable autonomous mode for EPP */ 392 if (cppc_state == AMD_PSTATE_ACTIVE) { 393 /* Set desired perf as zero to allow EPP firmware control */ 394 perf_ctrls.desired_perf = 0; 395 ret = cppc_set_perf(cpu, &perf_ctrls); 396 if (ret) 397 return ret; 398 } 399 } 400 401 cppc_enabled = enable; 402 return ret; 403 } 404 405 DEFINE_STATIC_CALL(amd_pstate_cppc_enable, msr_cppc_enable); 406 407 static inline int amd_pstate_cppc_enable(bool enable) 408 { 409 return static_call(amd_pstate_cppc_enable)(enable); 410 } 411 412 static int msr_init_perf(struct amd_cpudata *cpudata) 413 { 414 u64 cap1, numerator; 415 416 int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, 417 &cap1); 418 if (ret) 419 return ret; 420 421 ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator); 422 if (ret) 423 return ret; 424 425 WRITE_ONCE(cpudata->highest_perf, numerator); 426 WRITE_ONCE(cpudata->max_limit_perf, numerator); 427 WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); 428 WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); 429 WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); 430 WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1)); 431 WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1)); 432 return 0; 433 } 434 435 static int shmem_init_perf(struct amd_cpudata *cpudata) 436 { 437 struct cppc_perf_caps cppc_perf; 438 u64 numerator; 439 440 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 441 if (ret) 442 return ret; 443 444 ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator); 445 if (ret) 446 return ret; 447 448 WRITE_ONCE(cpudata->highest_perf, numerator); 449 WRITE_ONCE(cpudata->max_limit_perf, numerator); 450 WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); 451 WRITE_ONCE(cpudata->lowest_nonlinear_perf, 452 cppc_perf.lowest_nonlinear_perf); 453 WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); 454 WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf); 455 WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf); 456 457 if (cppc_state == AMD_PSTATE_ACTIVE) 458 return 0; 459 460 ret = cppc_get_auto_sel_caps(cpudata->cpu, &cppc_perf); 461 if (ret) { 462 pr_warn("failed to get auto_sel, ret: %d\n", ret); 463 return 0; 464 } 465 466 ret = cppc_set_auto_sel(cpudata->cpu, 467 (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1); 468 469 if (ret) 470 pr_warn("failed to set auto_sel, ret: %d\n", ret); 471 472 return ret; 473 } 474 475 DEFINE_STATIC_CALL(amd_pstate_init_perf, msr_init_perf); 476 477 static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) 478 { 479 return static_call(amd_pstate_init_perf)(cpudata); 480 } 481 482 static int shmem_update_perf(struct amd_cpudata *cpudata, u32 min_perf, 483 u32 des_perf, u32 max_perf, u32 epp, bool fast_switch) 484 { 485 struct cppc_perf_ctrls perf_ctrls; 486 487 if (cppc_state == AMD_PSTATE_ACTIVE) { 488 int ret = shmem_set_epp(cpudata, epp); 489 490 if (ret) 491 return ret; 492 } 493 494 perf_ctrls.max_perf = max_perf; 495 perf_ctrls.min_perf = min_perf; 496 perf_ctrls.desired_perf = des_perf; 497 498 return cppc_set_perf(cpudata->cpu, &perf_ctrls); 499 } 500 501 static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) 502 { 503 u64 aperf, mperf, tsc; 504 unsigned long flags; 505 506 local_irq_save(flags); 507 rdmsrl(MSR_IA32_APERF, aperf); 508 rdmsrl(MSR_IA32_MPERF, mperf); 509 tsc = rdtsc(); 510 511 if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) { 512 local_irq_restore(flags); 513 return false; 514 } 515 516 local_irq_restore(flags); 517 518 cpudata->cur.aperf = aperf; 519 cpudata->cur.mperf = mperf; 520 cpudata->cur.tsc = tsc; 521 cpudata->cur.aperf -= cpudata->prev.aperf; 522 cpudata->cur.mperf -= cpudata->prev.mperf; 523 cpudata->cur.tsc -= cpudata->prev.tsc; 524 525 cpudata->prev.aperf = aperf; 526 cpudata->prev.mperf = mperf; 527 cpudata->prev.tsc = tsc; 528 529 cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz), cpudata->cur.mperf); 530 531 return true; 532 } 533 534 static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, 535 u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags) 536 { 537 unsigned long max_freq; 538 struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu); 539 u32 nominal_perf = READ_ONCE(cpudata->nominal_perf); 540 541 des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); 542 543 max_freq = READ_ONCE(cpudata->max_limit_freq); 544 policy->cur = div_u64(des_perf * max_freq, max_perf); 545 546 if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) { 547 min_perf = des_perf; 548 des_perf = 0; 549 } 550 551 /* limit the max perf when core performance boost feature is disabled */ 552 if (!cpudata->boost_supported) 553 max_perf = min_t(unsigned long, nominal_perf, max_perf); 554 555 if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { 556 trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, 557 cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc, 558 cpudata->cpu, fast_switch); 559 } 560 561 amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch); 562 563 cpufreq_cpu_put(policy); 564 } 565 566 static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) 567 { 568 /* 569 * Initialize lower frequency limit (i.e.policy->min) with 570 * lowest_nonlinear_frequency which is the most energy efficient 571 * frequency. Override the initial value set by cpufreq core and 572 * amd-pstate qos_requests. 573 */ 574 if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) { 575 struct cpufreq_policy *policy = cpufreq_cpu_get(policy_data->cpu); 576 struct amd_cpudata *cpudata; 577 578 if (!policy) 579 return -EINVAL; 580 581 cpudata = policy->driver_data; 582 policy_data->min = cpudata->lowest_nonlinear_freq; 583 cpufreq_cpu_put(policy); 584 } 585 586 cpufreq_verify_within_cpu_limits(policy_data); 587 pr_debug("policy_max =%d, policy_min=%d\n", policy_data->max, policy_data->min); 588 589 return 0; 590 } 591 592 static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) 593 { 594 u32 max_limit_perf, min_limit_perf, max_perf, max_freq; 595 struct amd_cpudata *cpudata = policy->driver_data; 596 597 max_perf = READ_ONCE(cpudata->highest_perf); 598 max_freq = READ_ONCE(cpudata->max_freq); 599 max_limit_perf = div_u64(policy->max * max_perf, max_freq); 600 min_limit_perf = div_u64(policy->min * max_perf, max_freq); 601 602 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) 603 min_limit_perf = min(cpudata->nominal_perf, max_limit_perf); 604 605 WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); 606 WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); 607 WRITE_ONCE(cpudata->max_limit_freq, policy->max); 608 WRITE_ONCE(cpudata->min_limit_freq, policy->min); 609 610 return 0; 611 } 612 613 static int amd_pstate_update_freq(struct cpufreq_policy *policy, 614 unsigned int target_freq, bool fast_switch) 615 { 616 struct cpufreq_freqs freqs; 617 struct amd_cpudata *cpudata = policy->driver_data; 618 unsigned long max_perf, min_perf, des_perf, cap_perf; 619 620 if (!cpudata->max_freq) 621 return -ENODEV; 622 623 if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) 624 amd_pstate_update_min_max_limit(policy); 625 626 cap_perf = READ_ONCE(cpudata->highest_perf); 627 min_perf = READ_ONCE(cpudata->lowest_perf); 628 max_perf = cap_perf; 629 630 freqs.old = policy->cur; 631 freqs.new = target_freq; 632 633 des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf, 634 cpudata->max_freq); 635 636 WARN_ON(fast_switch && !policy->fast_switch_enabled); 637 /* 638 * If fast_switch is desired, then there aren't any registered 639 * transition notifiers. See comment for 640 * cpufreq_enable_fast_switch(). 641 */ 642 if (!fast_switch) 643 cpufreq_freq_transition_begin(policy, &freqs); 644 645 amd_pstate_update(cpudata, min_perf, des_perf, 646 max_perf, fast_switch, policy->governor->flags); 647 648 if (!fast_switch) 649 cpufreq_freq_transition_end(policy, &freqs, false); 650 651 return 0; 652 } 653 654 static int amd_pstate_target(struct cpufreq_policy *policy, 655 unsigned int target_freq, 656 unsigned int relation) 657 { 658 return amd_pstate_update_freq(policy, target_freq, false); 659 } 660 661 static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy, 662 unsigned int target_freq) 663 { 664 if (!amd_pstate_update_freq(policy, target_freq, true)) 665 return target_freq; 666 return policy->cur; 667 } 668 669 static void amd_pstate_adjust_perf(unsigned int cpu, 670 unsigned long _min_perf, 671 unsigned long target_perf, 672 unsigned long capacity) 673 { 674 unsigned long max_perf, min_perf, des_perf, 675 cap_perf, lowest_nonlinear_perf; 676 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); 677 struct amd_cpudata *cpudata; 678 679 if (!policy) 680 return; 681 682 cpudata = policy->driver_data; 683 684 if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) 685 amd_pstate_update_min_max_limit(policy); 686 687 688 cap_perf = READ_ONCE(cpudata->highest_perf); 689 lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); 690 691 des_perf = cap_perf; 692 if (target_perf < capacity) 693 des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity); 694 695 min_perf = READ_ONCE(cpudata->lowest_perf); 696 if (_min_perf < capacity) 697 min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity); 698 699 if (min_perf < lowest_nonlinear_perf) 700 min_perf = lowest_nonlinear_perf; 701 702 max_perf = cap_perf; 703 if (max_perf < min_perf) 704 max_perf = min_perf; 705 706 des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); 707 708 amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true, 709 policy->governor->flags); 710 cpufreq_cpu_put(policy); 711 } 712 713 static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) 714 { 715 struct amd_cpudata *cpudata = policy->driver_data; 716 u32 nominal_freq, max_freq; 717 int ret = 0; 718 719 nominal_freq = READ_ONCE(cpudata->nominal_freq); 720 max_freq = READ_ONCE(cpudata->max_freq); 721 722 if (on) 723 policy->cpuinfo.max_freq = max_freq; 724 else if (policy->cpuinfo.max_freq > nominal_freq) 725 policy->cpuinfo.max_freq = nominal_freq; 726 727 policy->max = policy->cpuinfo.max_freq; 728 729 if (cppc_state == AMD_PSTATE_PASSIVE) { 730 ret = freq_qos_update_request(&cpudata->req[1], policy->cpuinfo.max_freq); 731 if (ret < 0) 732 pr_debug("Failed to update freq constraint: CPU%d\n", cpudata->cpu); 733 } 734 735 return ret < 0 ? ret : 0; 736 } 737 738 static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) 739 { 740 struct amd_cpudata *cpudata = policy->driver_data; 741 int ret; 742 743 if (!cpudata->boost_supported) { 744 pr_err("Boost mode is not supported by this processor or SBIOS\n"); 745 return -EOPNOTSUPP; 746 } 747 guard(mutex)(&amd_pstate_driver_lock); 748 749 ret = amd_pstate_cpu_boost_update(policy, state); 750 policy->boost_enabled = !ret ? state : false; 751 refresh_frequency_limits(policy); 752 753 return ret; 754 } 755 756 static int amd_pstate_init_boost_support(struct amd_cpudata *cpudata) 757 { 758 u64 boost_val; 759 int ret = -1; 760 761 /* 762 * If platform has no CPB support or disable it, initialize current driver 763 * boost_enabled state to be false, it is not an error for cpufreq core to handle. 764 */ 765 if (!cpu_feature_enabled(X86_FEATURE_CPB)) { 766 pr_debug_once("Boost CPB capabilities not present in the processor\n"); 767 ret = 0; 768 goto exit_err; 769 } 770 771 ret = rdmsrl_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val); 772 if (ret) { 773 pr_err_once("failed to read initial CPU boost state!\n"); 774 ret = -EIO; 775 goto exit_err; 776 } 777 778 if (!(boost_val & MSR_K7_HWCR_CPB_DIS)) 779 cpudata->boost_supported = true; 780 781 return 0; 782 783 exit_err: 784 cpudata->boost_supported = false; 785 return ret; 786 } 787 788 static void amd_perf_ctl_reset(unsigned int cpu) 789 { 790 wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); 791 } 792 793 /* 794 * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks 795 * due to locking, so queue the work for later. 796 */ 797 static void amd_pstste_sched_prefcore_workfn(struct work_struct *work) 798 { 799 sched_set_itmt_support(); 800 } 801 static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn); 802 803 #define CPPC_MAX_PERF U8_MAX 804 805 static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) 806 { 807 /* user disabled or not detected */ 808 if (!amd_pstate_prefcore) 809 return; 810 811 cpudata->hw_prefcore = true; 812 813 /* 814 * The priorities can be set regardless of whether or not 815 * sched_set_itmt_support(true) has been called and it is valid to 816 * update them at any time after it has been called. 817 */ 818 sched_set_itmt_core_prio((int)READ_ONCE(cpudata->prefcore_ranking), cpudata->cpu); 819 820 schedule_work(&sched_prefcore_work); 821 } 822 823 static void amd_pstate_update_limits(unsigned int cpu) 824 { 825 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); 826 struct amd_cpudata *cpudata; 827 u32 prev_high = 0, cur_high = 0; 828 int ret; 829 bool highest_perf_changed = false; 830 831 if (!policy) 832 return; 833 834 cpudata = policy->driver_data; 835 836 if (!amd_pstate_prefcore) 837 return; 838 839 guard(mutex)(&amd_pstate_driver_lock); 840 841 ret = amd_get_highest_perf(cpu, &cur_high); 842 if (ret) 843 goto free_cpufreq_put; 844 845 prev_high = READ_ONCE(cpudata->prefcore_ranking); 846 highest_perf_changed = (prev_high != cur_high); 847 if (highest_perf_changed) { 848 WRITE_ONCE(cpudata->prefcore_ranking, cur_high); 849 850 if (cur_high < CPPC_MAX_PERF) 851 sched_set_itmt_core_prio((int)cur_high, cpu); 852 } 853 854 free_cpufreq_put: 855 cpufreq_cpu_put(policy); 856 857 if (!highest_perf_changed) 858 cpufreq_update_policy(cpu); 859 860 } 861 862 /* 863 * Get pstate transition delay time from ACPI tables that firmware set 864 * instead of using hardcode value directly. 865 */ 866 static u32 amd_pstate_get_transition_delay_us(unsigned int cpu) 867 { 868 u32 transition_delay_ns; 869 870 transition_delay_ns = cppc_get_transition_latency(cpu); 871 if (transition_delay_ns == CPUFREQ_ETERNAL) { 872 if (cpu_feature_enabled(X86_FEATURE_AMD_FAST_CPPC)) 873 return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY; 874 else 875 return AMD_PSTATE_TRANSITION_DELAY; 876 } 877 878 return transition_delay_ns / NSEC_PER_USEC; 879 } 880 881 /* 882 * Get pstate transition latency value from ACPI tables that firmware 883 * set instead of using hardcode value directly. 884 */ 885 static u32 amd_pstate_get_transition_latency(unsigned int cpu) 886 { 887 u32 transition_latency; 888 889 transition_latency = cppc_get_transition_latency(cpu); 890 if (transition_latency == CPUFREQ_ETERNAL) 891 return AMD_PSTATE_TRANSITION_LATENCY; 892 893 return transition_latency; 894 } 895 896 /* 897 * amd_pstate_init_freq: Initialize the max_freq, min_freq, 898 * nominal_freq and lowest_nonlinear_freq for 899 * the @cpudata object. 900 * 901 * Requires: highest_perf, lowest_perf, nominal_perf and 902 * lowest_nonlinear_perf members of @cpudata to be 903 * initialized. 904 * 905 * Returns 0 on success, non-zero value on failure. 906 */ 907 static int amd_pstate_init_freq(struct amd_cpudata *cpudata) 908 { 909 int ret; 910 u32 min_freq, max_freq; 911 u32 highest_perf, nominal_perf, nominal_freq; 912 u32 lowest_nonlinear_perf, lowest_nonlinear_freq; 913 struct cppc_perf_caps cppc_perf; 914 915 ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 916 if (ret) 917 return ret; 918 919 if (quirks && quirks->lowest_freq) 920 min_freq = quirks->lowest_freq; 921 else 922 min_freq = cppc_perf.lowest_freq; 923 924 if (quirks && quirks->nominal_freq) 925 nominal_freq = quirks->nominal_freq; 926 else 927 nominal_freq = cppc_perf.nominal_freq; 928 929 highest_perf = READ_ONCE(cpudata->highest_perf); 930 nominal_perf = READ_ONCE(cpudata->nominal_perf); 931 max_freq = div_u64((u64)highest_perf * nominal_freq, nominal_perf); 932 933 lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); 934 lowest_nonlinear_freq = div_u64((u64)nominal_freq * lowest_nonlinear_perf, nominal_perf); 935 WRITE_ONCE(cpudata->min_freq, min_freq * 1000); 936 WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq * 1000); 937 WRITE_ONCE(cpudata->nominal_freq, nominal_freq * 1000); 938 WRITE_ONCE(cpudata->max_freq, max_freq * 1000); 939 940 /** 941 * Below values need to be initialized correctly, otherwise driver will fail to load 942 * max_freq is calculated according to (nominal_freq * highest_perf)/nominal_perf 943 * lowest_nonlinear_freq is a value between [min_freq, nominal_freq] 944 * Check _CPC in ACPI table objects if any values are incorrect 945 */ 946 if (min_freq <= 0 || max_freq <= 0 || nominal_freq <= 0 || min_freq > max_freq) { 947 pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n", 948 min_freq, max_freq, nominal_freq); 949 return -EINVAL; 950 } 951 952 if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq) { 953 pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n", 954 lowest_nonlinear_freq, min_freq, nominal_freq); 955 return -EINVAL; 956 } 957 958 return 0; 959 } 960 961 static int amd_pstate_cpu_init(struct cpufreq_policy *policy) 962 { 963 int min_freq, max_freq, ret; 964 struct device *dev; 965 struct amd_cpudata *cpudata; 966 967 /* 968 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, 969 * which is ideal for initialization process. 970 */ 971 amd_perf_ctl_reset(policy->cpu); 972 dev = get_cpu_device(policy->cpu); 973 if (!dev) 974 return -ENODEV; 975 976 cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); 977 if (!cpudata) 978 return -ENOMEM; 979 980 cpudata->cpu = policy->cpu; 981 982 ret = amd_pstate_init_perf(cpudata); 983 if (ret) 984 goto free_cpudata1; 985 986 amd_pstate_init_prefcore(cpudata); 987 988 ret = amd_pstate_init_freq(cpudata); 989 if (ret) 990 goto free_cpudata1; 991 992 ret = amd_pstate_init_boost_support(cpudata); 993 if (ret) 994 goto free_cpudata1; 995 996 min_freq = READ_ONCE(cpudata->min_freq); 997 max_freq = READ_ONCE(cpudata->max_freq); 998 999 policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu); 1000 policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu); 1001 1002 policy->min = min_freq; 1003 policy->max = max_freq; 1004 1005 policy->cpuinfo.min_freq = min_freq; 1006 policy->cpuinfo.max_freq = max_freq; 1007 1008 policy->boost_enabled = READ_ONCE(cpudata->boost_supported); 1009 1010 /* It will be updated by governor */ 1011 policy->cur = policy->cpuinfo.min_freq; 1012 1013 if (cpu_feature_enabled(X86_FEATURE_CPPC)) 1014 policy->fast_switch_possible = true; 1015 1016 ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0], 1017 FREQ_QOS_MIN, FREQ_QOS_MIN_DEFAULT_VALUE); 1018 if (ret < 0) { 1019 dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret); 1020 goto free_cpudata1; 1021 } 1022 1023 ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1], 1024 FREQ_QOS_MAX, policy->cpuinfo.max_freq); 1025 if (ret < 0) { 1026 dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret); 1027 goto free_cpudata2; 1028 } 1029 1030 cpudata->max_limit_freq = max_freq; 1031 cpudata->min_limit_freq = min_freq; 1032 1033 policy->driver_data = cpudata; 1034 1035 if (!current_pstate_driver->adjust_perf) 1036 current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; 1037 1038 return 0; 1039 1040 free_cpudata2: 1041 freq_qos_remove_request(&cpudata->req[0]); 1042 free_cpudata1: 1043 kfree(cpudata); 1044 return ret; 1045 } 1046 1047 static void amd_pstate_cpu_exit(struct cpufreq_policy *policy) 1048 { 1049 struct amd_cpudata *cpudata = policy->driver_data; 1050 1051 freq_qos_remove_request(&cpudata->req[1]); 1052 freq_qos_remove_request(&cpudata->req[0]); 1053 policy->fast_switch_possible = false; 1054 kfree(cpudata); 1055 } 1056 1057 static int amd_pstate_cpu_resume(struct cpufreq_policy *policy) 1058 { 1059 int ret; 1060 1061 ret = amd_pstate_cppc_enable(true); 1062 if (ret) 1063 pr_err("failed to enable amd-pstate during resume, return %d\n", ret); 1064 1065 return ret; 1066 } 1067 1068 static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy) 1069 { 1070 int ret; 1071 1072 ret = amd_pstate_cppc_enable(false); 1073 if (ret) 1074 pr_err("failed to disable amd-pstate during suspend, return %d\n", ret); 1075 1076 return ret; 1077 } 1078 1079 /* Sysfs attributes */ 1080 1081 /* 1082 * This frequency is to indicate the maximum hardware frequency. 1083 * If boost is not active but supported, the frequency will be larger than the 1084 * one in cpuinfo. 1085 */ 1086 static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, 1087 char *buf) 1088 { 1089 int max_freq; 1090 struct amd_cpudata *cpudata = policy->driver_data; 1091 1092 max_freq = READ_ONCE(cpudata->max_freq); 1093 if (max_freq < 0) 1094 return max_freq; 1095 1096 return sysfs_emit(buf, "%u\n", max_freq); 1097 } 1098 1099 static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy, 1100 char *buf) 1101 { 1102 int freq; 1103 struct amd_cpudata *cpudata = policy->driver_data; 1104 1105 freq = READ_ONCE(cpudata->lowest_nonlinear_freq); 1106 if (freq < 0) 1107 return freq; 1108 1109 return sysfs_emit(buf, "%u\n", freq); 1110 } 1111 1112 /* 1113 * In some of ASICs, the highest_perf is not the one in the _CPC table, so we 1114 * need to expose it to sysfs. 1115 */ 1116 static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, 1117 char *buf) 1118 { 1119 u32 perf; 1120 struct amd_cpudata *cpudata = policy->driver_data; 1121 1122 perf = READ_ONCE(cpudata->highest_perf); 1123 1124 return sysfs_emit(buf, "%u\n", perf); 1125 } 1126 1127 static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy, 1128 char *buf) 1129 { 1130 u32 perf; 1131 struct amd_cpudata *cpudata = policy->driver_data; 1132 1133 perf = READ_ONCE(cpudata->prefcore_ranking); 1134 1135 return sysfs_emit(buf, "%u\n", perf); 1136 } 1137 1138 static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy, 1139 char *buf) 1140 { 1141 bool hw_prefcore; 1142 struct amd_cpudata *cpudata = policy->driver_data; 1143 1144 hw_prefcore = READ_ONCE(cpudata->hw_prefcore); 1145 1146 return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore)); 1147 } 1148 1149 static ssize_t show_energy_performance_available_preferences( 1150 struct cpufreq_policy *policy, char *buf) 1151 { 1152 int i = 0; 1153 int offset = 0; 1154 struct amd_cpudata *cpudata = policy->driver_data; 1155 1156 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) 1157 return sysfs_emit_at(buf, offset, "%s\n", 1158 energy_perf_strings[EPP_INDEX_PERFORMANCE]); 1159 1160 while (energy_perf_strings[i] != NULL) 1161 offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]); 1162 1163 offset += sysfs_emit_at(buf, offset, "\n"); 1164 1165 return offset; 1166 } 1167 1168 static ssize_t store_energy_performance_preference( 1169 struct cpufreq_policy *policy, const char *buf, size_t count) 1170 { 1171 char str_preference[21]; 1172 ssize_t ret; 1173 1174 ret = sscanf(buf, "%20s", str_preference); 1175 if (ret != 1) 1176 return -EINVAL; 1177 1178 ret = match_string(energy_perf_strings, -1, str_preference); 1179 if (ret < 0) 1180 return -EINVAL; 1181 1182 guard(mutex)(&amd_pstate_limits_lock); 1183 1184 ret = amd_pstate_set_energy_pref_index(policy, ret); 1185 1186 return ret ? ret : count; 1187 } 1188 1189 static ssize_t show_energy_performance_preference( 1190 struct cpufreq_policy *policy, char *buf) 1191 { 1192 struct amd_cpudata *cpudata = policy->driver_data; 1193 int preference; 1194 1195 switch (cpudata->epp_cached) { 1196 case AMD_CPPC_EPP_PERFORMANCE: 1197 preference = EPP_INDEX_PERFORMANCE; 1198 break; 1199 case AMD_CPPC_EPP_BALANCE_PERFORMANCE: 1200 preference = EPP_INDEX_BALANCE_PERFORMANCE; 1201 break; 1202 case AMD_CPPC_EPP_BALANCE_POWERSAVE: 1203 preference = EPP_INDEX_BALANCE_POWERSAVE; 1204 break; 1205 case AMD_CPPC_EPP_POWERSAVE: 1206 preference = EPP_INDEX_POWERSAVE; 1207 break; 1208 default: 1209 return -EINVAL; 1210 } 1211 1212 return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]); 1213 } 1214 1215 static void amd_pstate_driver_cleanup(void) 1216 { 1217 amd_pstate_cppc_enable(false); 1218 cppc_state = AMD_PSTATE_DISABLE; 1219 current_pstate_driver = NULL; 1220 } 1221 1222 static int amd_pstate_set_driver(int mode_idx) 1223 { 1224 if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) { 1225 cppc_state = mode_idx; 1226 if (cppc_state == AMD_PSTATE_DISABLE) 1227 pr_info("driver is explicitly disabled\n"); 1228 1229 if (cppc_state == AMD_PSTATE_ACTIVE) 1230 current_pstate_driver = &amd_pstate_epp_driver; 1231 1232 if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED) 1233 current_pstate_driver = &amd_pstate_driver; 1234 1235 return 0; 1236 } 1237 1238 return -EINVAL; 1239 } 1240 1241 static int amd_pstate_register_driver(int mode) 1242 { 1243 int ret; 1244 1245 ret = amd_pstate_set_driver(mode); 1246 if (ret) 1247 return ret; 1248 1249 cppc_state = mode; 1250 1251 ret = amd_pstate_cppc_enable(true); 1252 if (ret) { 1253 pr_err("failed to enable cppc during amd-pstate driver registration, return %d\n", 1254 ret); 1255 amd_pstate_driver_cleanup(); 1256 return ret; 1257 } 1258 1259 /* at least one CPU supports CPB */ 1260 current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB); 1261 1262 ret = cpufreq_register_driver(current_pstate_driver); 1263 if (ret) { 1264 amd_pstate_driver_cleanup(); 1265 return ret; 1266 } 1267 1268 return 0; 1269 } 1270 1271 static int amd_pstate_unregister_driver(int dummy) 1272 { 1273 cpufreq_unregister_driver(current_pstate_driver); 1274 amd_pstate_driver_cleanup(); 1275 return 0; 1276 } 1277 1278 static int amd_pstate_change_mode_without_dvr_change(int mode) 1279 { 1280 int cpu = 0; 1281 1282 cppc_state = mode; 1283 1284 if (cpu_feature_enabled(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE) 1285 return 0; 1286 1287 for_each_present_cpu(cpu) { 1288 cppc_set_auto_sel(cpu, (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1); 1289 } 1290 1291 return 0; 1292 } 1293 1294 static int amd_pstate_change_driver_mode(int mode) 1295 { 1296 int ret; 1297 1298 ret = amd_pstate_unregister_driver(0); 1299 if (ret) 1300 return ret; 1301 1302 ret = amd_pstate_register_driver(mode); 1303 if (ret) 1304 return ret; 1305 1306 return 0; 1307 } 1308 1309 static cppc_mode_transition_fn mode_state_machine[AMD_PSTATE_MAX][AMD_PSTATE_MAX] = { 1310 [AMD_PSTATE_DISABLE] = { 1311 [AMD_PSTATE_DISABLE] = NULL, 1312 [AMD_PSTATE_PASSIVE] = amd_pstate_register_driver, 1313 [AMD_PSTATE_ACTIVE] = amd_pstate_register_driver, 1314 [AMD_PSTATE_GUIDED] = amd_pstate_register_driver, 1315 }, 1316 [AMD_PSTATE_PASSIVE] = { 1317 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, 1318 [AMD_PSTATE_PASSIVE] = NULL, 1319 [AMD_PSTATE_ACTIVE] = amd_pstate_change_driver_mode, 1320 [AMD_PSTATE_GUIDED] = amd_pstate_change_mode_without_dvr_change, 1321 }, 1322 [AMD_PSTATE_ACTIVE] = { 1323 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, 1324 [AMD_PSTATE_PASSIVE] = amd_pstate_change_driver_mode, 1325 [AMD_PSTATE_ACTIVE] = NULL, 1326 [AMD_PSTATE_GUIDED] = amd_pstate_change_driver_mode, 1327 }, 1328 [AMD_PSTATE_GUIDED] = { 1329 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, 1330 [AMD_PSTATE_PASSIVE] = amd_pstate_change_mode_without_dvr_change, 1331 [AMD_PSTATE_ACTIVE] = amd_pstate_change_driver_mode, 1332 [AMD_PSTATE_GUIDED] = NULL, 1333 }, 1334 }; 1335 1336 static ssize_t amd_pstate_show_status(char *buf) 1337 { 1338 if (!current_pstate_driver) 1339 return sysfs_emit(buf, "disable\n"); 1340 1341 return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]); 1342 } 1343 1344 int amd_pstate_update_status(const char *buf, size_t size) 1345 { 1346 int mode_idx; 1347 1348 if (size > strlen("passive") || size < strlen("active")) 1349 return -EINVAL; 1350 1351 mode_idx = get_mode_idx_from_str(buf, size); 1352 1353 if (mode_idx < 0 || mode_idx >= AMD_PSTATE_MAX) 1354 return -EINVAL; 1355 1356 if (mode_state_machine[cppc_state][mode_idx]) 1357 return mode_state_machine[cppc_state][mode_idx](mode_idx); 1358 1359 return 0; 1360 } 1361 EXPORT_SYMBOL_GPL(amd_pstate_update_status); 1362 1363 static ssize_t status_show(struct device *dev, 1364 struct device_attribute *attr, char *buf) 1365 { 1366 1367 guard(mutex)(&amd_pstate_driver_lock); 1368 1369 return amd_pstate_show_status(buf); 1370 } 1371 1372 static ssize_t status_store(struct device *a, struct device_attribute *b, 1373 const char *buf, size_t count) 1374 { 1375 char *p = memchr(buf, '\n', count); 1376 int ret; 1377 1378 guard(mutex)(&amd_pstate_driver_lock); 1379 ret = amd_pstate_update_status(buf, p ? p - buf : count); 1380 1381 return ret < 0 ? ret : count; 1382 } 1383 1384 static ssize_t prefcore_show(struct device *dev, 1385 struct device_attribute *attr, char *buf) 1386 { 1387 return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore)); 1388 } 1389 1390 cpufreq_freq_attr_ro(amd_pstate_max_freq); 1391 cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); 1392 1393 cpufreq_freq_attr_ro(amd_pstate_highest_perf); 1394 cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking); 1395 cpufreq_freq_attr_ro(amd_pstate_hw_prefcore); 1396 cpufreq_freq_attr_rw(energy_performance_preference); 1397 cpufreq_freq_attr_ro(energy_performance_available_preferences); 1398 static DEVICE_ATTR_RW(status); 1399 static DEVICE_ATTR_RO(prefcore); 1400 1401 static struct freq_attr *amd_pstate_attr[] = { 1402 &amd_pstate_max_freq, 1403 &amd_pstate_lowest_nonlinear_freq, 1404 &amd_pstate_highest_perf, 1405 &amd_pstate_prefcore_ranking, 1406 &amd_pstate_hw_prefcore, 1407 NULL, 1408 }; 1409 1410 static struct freq_attr *amd_pstate_epp_attr[] = { 1411 &amd_pstate_max_freq, 1412 &amd_pstate_lowest_nonlinear_freq, 1413 &amd_pstate_highest_perf, 1414 &amd_pstate_prefcore_ranking, 1415 &amd_pstate_hw_prefcore, 1416 &energy_performance_preference, 1417 &energy_performance_available_preferences, 1418 NULL, 1419 }; 1420 1421 static struct attribute *pstate_global_attributes[] = { 1422 &dev_attr_status.attr, 1423 &dev_attr_prefcore.attr, 1424 NULL 1425 }; 1426 1427 static const struct attribute_group amd_pstate_global_attr_group = { 1428 .name = "amd_pstate", 1429 .attrs = pstate_global_attributes, 1430 }; 1431 1432 static bool amd_pstate_acpi_pm_profile_server(void) 1433 { 1434 switch (acpi_gbl_FADT.preferred_profile) { 1435 case PM_ENTERPRISE_SERVER: 1436 case PM_SOHO_SERVER: 1437 case PM_PERFORMANCE_SERVER: 1438 return true; 1439 } 1440 return false; 1441 } 1442 1443 static bool amd_pstate_acpi_pm_profile_undefined(void) 1444 { 1445 if (acpi_gbl_FADT.preferred_profile == PM_UNSPECIFIED) 1446 return true; 1447 if (acpi_gbl_FADT.preferred_profile >= NR_PM_PROFILES) 1448 return true; 1449 return false; 1450 } 1451 1452 static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) 1453 { 1454 int min_freq, max_freq, ret; 1455 struct amd_cpudata *cpudata; 1456 struct device *dev; 1457 u64 value; 1458 1459 /* 1460 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, 1461 * which is ideal for initialization process. 1462 */ 1463 amd_perf_ctl_reset(policy->cpu); 1464 dev = get_cpu_device(policy->cpu); 1465 if (!dev) 1466 return -ENODEV; 1467 1468 cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); 1469 if (!cpudata) 1470 return -ENOMEM; 1471 1472 cpudata->cpu = policy->cpu; 1473 1474 ret = amd_pstate_init_perf(cpudata); 1475 if (ret) 1476 goto free_cpudata1; 1477 1478 amd_pstate_init_prefcore(cpudata); 1479 1480 ret = amd_pstate_init_freq(cpudata); 1481 if (ret) 1482 goto free_cpudata1; 1483 1484 ret = amd_pstate_init_boost_support(cpudata); 1485 if (ret) 1486 goto free_cpudata1; 1487 1488 min_freq = READ_ONCE(cpudata->min_freq); 1489 max_freq = READ_ONCE(cpudata->max_freq); 1490 1491 policy->cpuinfo.min_freq = min_freq; 1492 policy->cpuinfo.max_freq = max_freq; 1493 /* It will be updated by governor */ 1494 policy->cur = policy->cpuinfo.min_freq; 1495 1496 policy->driver_data = cpudata; 1497 1498 policy->min = policy->cpuinfo.min_freq; 1499 policy->max = policy->cpuinfo.max_freq; 1500 1501 policy->boost_enabled = READ_ONCE(cpudata->boost_supported); 1502 1503 /* 1504 * Set the policy to provide a valid fallback value in case 1505 * the default cpufreq governor is neither powersave nor performance. 1506 */ 1507 if (amd_pstate_acpi_pm_profile_server() || 1508 amd_pstate_acpi_pm_profile_undefined()) { 1509 policy->policy = CPUFREQ_POLICY_PERFORMANCE; 1510 cpudata->epp_default = amd_pstate_get_epp(cpudata); 1511 } else { 1512 policy->policy = CPUFREQ_POLICY_POWERSAVE; 1513 cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE; 1514 } 1515 1516 if (cpu_feature_enabled(X86_FEATURE_CPPC)) { 1517 ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); 1518 if (ret) 1519 return ret; 1520 WRITE_ONCE(cpudata->cppc_req_cached, value); 1521 1522 ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value); 1523 if (ret) 1524 return ret; 1525 WRITE_ONCE(cpudata->cppc_cap1_cached, value); 1526 } 1527 ret = amd_pstate_set_epp(cpudata, cpudata->epp_default); 1528 if (ret) 1529 return ret; 1530 1531 current_pstate_driver->adjust_perf = NULL; 1532 1533 return 0; 1534 1535 free_cpudata1: 1536 kfree(cpudata); 1537 return ret; 1538 } 1539 1540 static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) 1541 { 1542 struct amd_cpudata *cpudata = policy->driver_data; 1543 1544 if (cpudata) { 1545 kfree(cpudata); 1546 policy->driver_data = NULL; 1547 } 1548 1549 pr_debug("CPU %d exiting\n", policy->cpu); 1550 } 1551 1552 static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) 1553 { 1554 struct amd_cpudata *cpudata = policy->driver_data; 1555 u32 epp; 1556 1557 amd_pstate_update_min_max_limit(policy); 1558 1559 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) 1560 epp = 0; 1561 else 1562 epp = READ_ONCE(cpudata->epp_cached); 1563 1564 if (trace_amd_pstate_epp_perf_enabled()) { 1565 trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp, 1566 cpudata->min_limit_perf, 1567 cpudata->max_limit_perf, 1568 policy->boost_enabled); 1569 } 1570 1571 return amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, 1572 cpudata->max_limit_perf, epp, false); 1573 } 1574 1575 static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) 1576 { 1577 struct amd_cpudata *cpudata = policy->driver_data; 1578 int ret; 1579 1580 if (!policy->cpuinfo.max_freq) 1581 return -ENODEV; 1582 1583 pr_debug("set_policy: cpuinfo.max %u policy->max %u\n", 1584 policy->cpuinfo.max_freq, policy->max); 1585 1586 cpudata->policy = policy->policy; 1587 1588 ret = amd_pstate_epp_update_limit(policy); 1589 if (ret) 1590 return ret; 1591 1592 /* 1593 * policy->cur is never updated with the amd_pstate_epp driver, but it 1594 * is used as a stale frequency value. So, keep it within limits. 1595 */ 1596 policy->cur = policy->min; 1597 1598 return 0; 1599 } 1600 1601 static int amd_pstate_epp_reenable(struct cpufreq_policy *policy) 1602 { 1603 struct amd_cpudata *cpudata = policy->driver_data; 1604 u64 max_perf; 1605 int ret; 1606 1607 ret = amd_pstate_cppc_enable(true); 1608 if (ret) 1609 pr_err("failed to enable amd pstate during resume, return %d\n", ret); 1610 1611 max_perf = READ_ONCE(cpudata->highest_perf); 1612 1613 if (trace_amd_pstate_epp_perf_enabled()) { 1614 trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, 1615 cpudata->epp_cached, 1616 FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), 1617 max_perf, policy->boost_enabled); 1618 } 1619 1620 return amd_pstate_update_perf(cpudata, 0, 0, max_perf, cpudata->epp_cached, false); 1621 } 1622 1623 static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) 1624 { 1625 struct amd_cpudata *cpudata = policy->driver_data; 1626 int ret; 1627 1628 pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); 1629 1630 ret = amd_pstate_epp_reenable(policy); 1631 if (ret) 1632 return ret; 1633 cpudata->suspended = false; 1634 1635 return 0; 1636 } 1637 1638 static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) 1639 { 1640 struct amd_cpudata *cpudata = policy->driver_data; 1641 int min_perf; 1642 1643 if (cpudata->suspended) 1644 return 0; 1645 1646 min_perf = READ_ONCE(cpudata->lowest_perf); 1647 1648 guard(mutex)(&amd_pstate_limits_lock); 1649 1650 if (trace_amd_pstate_epp_perf_enabled()) { 1651 trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, 1652 AMD_CPPC_EPP_BALANCE_POWERSAVE, 1653 min_perf, min_perf, policy->boost_enabled); 1654 } 1655 1656 return amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, 1657 AMD_CPPC_EPP_BALANCE_POWERSAVE, false); 1658 } 1659 1660 static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) 1661 { 1662 struct amd_cpudata *cpudata = policy->driver_data; 1663 int ret; 1664 1665 /* avoid suspending when EPP is not enabled */ 1666 if (cppc_state != AMD_PSTATE_ACTIVE) 1667 return 0; 1668 1669 /* set this flag to avoid setting core offline*/ 1670 cpudata->suspended = true; 1671 1672 /* disable CPPC in lowlevel firmware */ 1673 ret = amd_pstate_cppc_enable(false); 1674 if (ret) 1675 pr_err("failed to suspend, return %d\n", ret); 1676 1677 return 0; 1678 } 1679 1680 static int amd_pstate_epp_resume(struct cpufreq_policy *policy) 1681 { 1682 struct amd_cpudata *cpudata = policy->driver_data; 1683 1684 if (cpudata->suspended) { 1685 guard(mutex)(&amd_pstate_limits_lock); 1686 1687 /* enable amd pstate from suspend state*/ 1688 amd_pstate_epp_reenable(policy); 1689 1690 cpudata->suspended = false; 1691 } 1692 1693 return 0; 1694 } 1695 1696 static struct cpufreq_driver amd_pstate_driver = { 1697 .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS, 1698 .verify = amd_pstate_verify, 1699 .target = amd_pstate_target, 1700 .fast_switch = amd_pstate_fast_switch, 1701 .init = amd_pstate_cpu_init, 1702 .exit = amd_pstate_cpu_exit, 1703 .suspend = amd_pstate_cpu_suspend, 1704 .resume = amd_pstate_cpu_resume, 1705 .set_boost = amd_pstate_set_boost, 1706 .update_limits = amd_pstate_update_limits, 1707 .name = "amd-pstate", 1708 .attr = amd_pstate_attr, 1709 }; 1710 1711 static struct cpufreq_driver amd_pstate_epp_driver = { 1712 .flags = CPUFREQ_CONST_LOOPS, 1713 .verify = amd_pstate_verify, 1714 .setpolicy = amd_pstate_epp_set_policy, 1715 .init = amd_pstate_epp_cpu_init, 1716 .exit = amd_pstate_epp_cpu_exit, 1717 .offline = amd_pstate_epp_cpu_offline, 1718 .online = amd_pstate_epp_cpu_online, 1719 .suspend = amd_pstate_epp_suspend, 1720 .resume = amd_pstate_epp_resume, 1721 .update_limits = amd_pstate_update_limits, 1722 .set_boost = amd_pstate_set_boost, 1723 .name = "amd-pstate-epp", 1724 .attr = amd_pstate_epp_attr, 1725 }; 1726 1727 /* 1728 * CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F. 1729 * show the debug message that helps to check if the CPU has CPPC support for loading issue. 1730 */ 1731 static bool amd_cppc_supported(void) 1732 { 1733 struct cpuinfo_x86 *c = &cpu_data(0); 1734 bool warn = false; 1735 1736 if ((boot_cpu_data.x86 == 0x17) && (boot_cpu_data.x86_model < 0x30)) { 1737 pr_debug_once("CPPC feature is not supported by the processor\n"); 1738 return false; 1739 } 1740 1741 /* 1742 * If the CPPC feature is disabled in the BIOS for processors 1743 * that support MSR-based CPPC, the AMD Pstate driver may not 1744 * function correctly. 1745 * 1746 * For such processors, check the CPPC flag and display a 1747 * warning message if the platform supports CPPC. 1748 * 1749 * Note: The code check below will not abort the driver 1750 * registration process because of the code is added for 1751 * debugging purposes. Besides, it may still be possible for 1752 * the driver to work using the shared-memory mechanism. 1753 */ 1754 if (!cpu_feature_enabled(X86_FEATURE_CPPC)) { 1755 if (cpu_feature_enabled(X86_FEATURE_ZEN2)) { 1756 switch (c->x86_model) { 1757 case 0x60 ... 0x6F: 1758 case 0x80 ... 0xAF: 1759 warn = true; 1760 break; 1761 } 1762 } else if (cpu_feature_enabled(X86_FEATURE_ZEN3) || 1763 cpu_feature_enabled(X86_FEATURE_ZEN4)) { 1764 switch (c->x86_model) { 1765 case 0x10 ... 0x1F: 1766 case 0x40 ... 0xAF: 1767 warn = true; 1768 break; 1769 } 1770 } else if (cpu_feature_enabled(X86_FEATURE_ZEN5)) { 1771 warn = true; 1772 } 1773 } 1774 1775 if (warn) 1776 pr_warn_once("The CPPC feature is supported but currently disabled by the BIOS.\n" 1777 "Please enable it if your BIOS has the CPPC option.\n"); 1778 return true; 1779 } 1780 1781 static int __init amd_pstate_init(void) 1782 { 1783 struct device *dev_root; 1784 int ret; 1785 1786 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) 1787 return -ENODEV; 1788 1789 /* show debug message only if CPPC is not supported */ 1790 if (!amd_cppc_supported()) 1791 return -EOPNOTSUPP; 1792 1793 /* show warning message when BIOS broken or ACPI disabled */ 1794 if (!acpi_cpc_valid()) { 1795 pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n"); 1796 return -ENODEV; 1797 } 1798 1799 /* don't keep reloading if cpufreq_driver exists */ 1800 if (cpufreq_get_current_driver()) 1801 return -EEXIST; 1802 1803 quirks = NULL; 1804 1805 /* check if this machine need CPPC quirks */ 1806 dmi_check_system(amd_pstate_quirks_table); 1807 1808 /* 1809 * determine the driver mode from the command line or kernel config. 1810 * If no command line input is provided, cppc_state will be AMD_PSTATE_UNDEFINED. 1811 * command line options will override the kernel config settings. 1812 */ 1813 1814 if (cppc_state == AMD_PSTATE_UNDEFINED) { 1815 /* Disable on the following configs by default: 1816 * 1. Undefined platforms 1817 * 2. Server platforms with CPUs older than Family 0x1A. 1818 */ 1819 if (amd_pstate_acpi_pm_profile_undefined() || 1820 (amd_pstate_acpi_pm_profile_server() && boot_cpu_data.x86 < 0x1A)) { 1821 pr_info("driver load is disabled, boot with specific mode to enable this\n"); 1822 return -ENODEV; 1823 } 1824 /* get driver mode from kernel config option [1:4] */ 1825 cppc_state = CONFIG_X86_AMD_PSTATE_DEFAULT_MODE; 1826 } 1827 1828 if (cppc_state == AMD_PSTATE_DISABLE) { 1829 pr_info("driver load is disabled, boot with specific mode to enable this\n"); 1830 return -ENODEV; 1831 } 1832 1833 /* capability check */ 1834 if (cpu_feature_enabled(X86_FEATURE_CPPC)) { 1835 pr_debug("AMD CPPC MSR based functionality is supported\n"); 1836 } else { 1837 pr_debug("AMD CPPC shared memory based functionality is supported\n"); 1838 static_call_update(amd_pstate_cppc_enable, shmem_cppc_enable); 1839 static_call_update(amd_pstate_init_perf, shmem_init_perf); 1840 static_call_update(amd_pstate_update_perf, shmem_update_perf); 1841 static_call_update(amd_pstate_get_epp, shmem_get_epp); 1842 static_call_update(amd_pstate_set_epp, shmem_set_epp); 1843 } 1844 1845 if (amd_pstate_prefcore) { 1846 ret = amd_detect_prefcore(&amd_pstate_prefcore); 1847 if (ret) 1848 return ret; 1849 } 1850 1851 ret = amd_pstate_register_driver(cppc_state); 1852 if (ret) { 1853 pr_err("failed to register with return %d\n", ret); 1854 return ret; 1855 } 1856 1857 dev_root = bus_get_dev_root(&cpu_subsys); 1858 if (dev_root) { 1859 ret = sysfs_create_group(&dev_root->kobj, &amd_pstate_global_attr_group); 1860 put_device(dev_root); 1861 if (ret) { 1862 pr_err("sysfs attribute export failed with error %d.\n", ret); 1863 goto global_attr_free; 1864 } 1865 } 1866 1867 return ret; 1868 1869 global_attr_free: 1870 cpufreq_unregister_driver(current_pstate_driver); 1871 amd_pstate_cppc_enable(false); 1872 return ret; 1873 } 1874 device_initcall(amd_pstate_init); 1875 1876 static int __init amd_pstate_param(char *str) 1877 { 1878 size_t size; 1879 int mode_idx; 1880 1881 if (!str) 1882 return -EINVAL; 1883 1884 size = strlen(str); 1885 mode_idx = get_mode_idx_from_str(str, size); 1886 1887 return amd_pstate_set_driver(mode_idx); 1888 } 1889 1890 static int __init amd_prefcore_param(char *str) 1891 { 1892 if (!strcmp(str, "disable")) 1893 amd_pstate_prefcore = false; 1894 1895 return 0; 1896 } 1897 1898 early_param("amd_pstate", amd_pstate_param); 1899 early_param("amd_prefcore", amd_prefcore_param); 1900 1901 MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>"); 1902 MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); 1903