1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * amd-pstate.c - AMD Processor P-state Frequency Driver 4 * 5 * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved. 6 * 7 * Author: Huang Rui <ray.huang@amd.com> 8 * 9 * AMD P-State introduces a new CPU performance scaling design for AMD 10 * processors using the ACPI Collaborative Performance and Power Control (CPPC) 11 * feature which works with the AMD SMU firmware providing a finer grained 12 * frequency control range. It is to replace the legacy ACPI P-States control, 13 * allows a flexible, low-latency interface for the Linux kernel to directly 14 * communicate the performance hints to hardware. 15 * 16 * AMD P-State is supported on recent AMD Zen base CPU series include some of 17 * Zen2 and Zen3 processors. _CPC needs to be present in the ACPI tables of AMD 18 * P-State supported system. And there are two types of hardware implementations 19 * for AMD P-State: 1) Full MSR Solution and 2) Shared Memory Solution. 20 * X86_FEATURE_CPPC CPU feature flag is used to distinguish the different types. 21 */ 22 23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 24 25 #include <linux/bitfield.h> 26 #include <linux/kernel.h> 27 #include <linux/module.h> 28 #include <linux/init.h> 29 #include <linux/smp.h> 30 #include <linux/sched.h> 31 #include <linux/cpufreq.h> 32 #include <linux/compiler.h> 33 #include <linux/dmi.h> 34 #include <linux/slab.h> 35 #include <linux/acpi.h> 36 #include <linux/io.h> 37 #include <linux/delay.h> 38 #include <linux/uaccess.h> 39 #include <linux/power_supply.h> 40 #include <linux/static_call.h> 41 #include <linux/topology.h> 42 43 #include <acpi/processor.h> 44 #include <acpi/cppc_acpi.h> 45 46 #include <asm/msr.h> 47 #include <asm/processor.h> 48 #include <asm/cpufeature.h> 49 #include <asm/cpu_device_id.h> 50 51 #include "amd-pstate.h" 52 #include "amd-pstate-trace.h" 53 54 #define AMD_PSTATE_TRANSITION_LATENCY 20000 55 #define AMD_PSTATE_TRANSITION_DELAY 1000 56 #define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600 57 58 #define AMD_CPPC_EPP_PERFORMANCE 0x00 59 #define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80 60 #define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF 61 #define AMD_CPPC_EPP_POWERSAVE 0xFF 62 63 static const char * const amd_pstate_mode_string[] = { 64 [AMD_PSTATE_UNDEFINED] = "undefined", 65 [AMD_PSTATE_DISABLE] = "disable", 66 [AMD_PSTATE_PASSIVE] = "passive", 67 [AMD_PSTATE_ACTIVE] = "active", 68 [AMD_PSTATE_GUIDED] = "guided", 69 }; 70 static_assert(ARRAY_SIZE(amd_pstate_mode_string) == AMD_PSTATE_MAX); 71 72 const char *amd_pstate_get_mode_string(enum amd_pstate_mode mode) 73 { 74 if (mode < AMD_PSTATE_UNDEFINED || mode >= AMD_PSTATE_MAX) 75 mode = AMD_PSTATE_UNDEFINED; 76 return amd_pstate_mode_string[mode]; 77 } 78 EXPORT_SYMBOL_GPL(amd_pstate_get_mode_string); 79 80 struct quirk_entry { 81 u32 nominal_freq; 82 u32 lowest_freq; 83 }; 84 85 static struct cpufreq_driver *current_pstate_driver; 86 static struct cpufreq_driver amd_pstate_driver; 87 static struct cpufreq_driver amd_pstate_epp_driver; 88 static int cppc_state = AMD_PSTATE_UNDEFINED; 89 static bool amd_pstate_prefcore = true; 90 static bool dynamic_epp; 91 static struct quirk_entry *quirks; 92 93 /* 94 * AMD Energy Preference Performance (EPP) 95 * The EPP is used in the CCLK DPM controller to drive 96 * the frequency that a core is going to operate during 97 * short periods of activity. EPP values will be utilized for 98 * different OS profiles (balanced, performance, power savings) 99 * display strings corresponding to EPP index in the 100 * energy_perf_strings[] 101 * index String 102 *------------------------------------- 103 * 0 default 104 * 1 performance 105 * 2 balance_performance 106 * 3 balance_power 107 * 4 power 108 * 5 custom (for raw EPP values) 109 */ 110 enum energy_perf_value_index { 111 EPP_INDEX_DEFAULT = 0, 112 EPP_INDEX_PERFORMANCE, 113 EPP_INDEX_BALANCE_PERFORMANCE, 114 EPP_INDEX_BALANCE_POWERSAVE, 115 EPP_INDEX_POWERSAVE, 116 EPP_INDEX_CUSTOM, 117 EPP_INDEX_MAX, 118 }; 119 120 static const char * const energy_perf_strings[] = { 121 [EPP_INDEX_DEFAULT] = "default", 122 [EPP_INDEX_PERFORMANCE] = "performance", 123 [EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance", 124 [EPP_INDEX_BALANCE_POWERSAVE] = "balance_power", 125 [EPP_INDEX_POWERSAVE] = "power", 126 [EPP_INDEX_CUSTOM] = "custom", 127 }; 128 static_assert(ARRAY_SIZE(energy_perf_strings) == EPP_INDEX_MAX); 129 130 static unsigned int epp_values[] = { 131 [EPP_INDEX_DEFAULT] = 0, 132 [EPP_INDEX_PERFORMANCE] = AMD_CPPC_EPP_PERFORMANCE, 133 [EPP_INDEX_BALANCE_PERFORMANCE] = AMD_CPPC_EPP_BALANCE_PERFORMANCE, 134 [EPP_INDEX_BALANCE_POWERSAVE] = AMD_CPPC_EPP_BALANCE_POWERSAVE, 135 [EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE, 136 }; 137 static_assert(ARRAY_SIZE(epp_values) == EPP_INDEX_MAX - 1); 138 139 typedef int (*cppc_mode_transition_fn)(int); 140 141 static struct quirk_entry quirk_amd_7k62 = { 142 .nominal_freq = 2600, 143 .lowest_freq = 550, 144 }; 145 146 static inline u8 freq_to_perf(union perf_cached perf, u32 nominal_freq, unsigned int freq_val) 147 { 148 u32 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * perf.nominal_perf, nominal_freq); 149 150 return (u8)clamp(perf_val, perf.lowest_perf, perf.highest_perf); 151 } 152 153 static inline u32 perf_to_freq(union perf_cached perf, u32 nominal_freq, u8 perf_val) 154 { 155 return DIV_ROUND_UP_ULL((u64)nominal_freq * perf_val, 156 perf.nominal_perf); 157 } 158 159 static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi) 160 { 161 /** 162 * match the broken bios for family 17h processor support CPPC V2 163 * broken BIOS lack of nominal_freq and lowest_freq capabilities 164 * definition in ACPI tables 165 */ 166 if (cpu_feature_enabled(X86_FEATURE_ZEN2)) { 167 quirks = dmi->driver_data; 168 pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident); 169 return 1; 170 } 171 172 return 0; 173 } 174 175 static const struct dmi_system_id amd_pstate_quirks_table[] __initconst = { 176 { 177 .callback = dmi_matched_7k62_bios_bug, 178 .ident = "AMD EPYC 7K62", 179 .matches = { 180 DMI_MATCH(DMI_BIOS_VERSION, "5.14"), 181 DMI_MATCH(DMI_BIOS_RELEASE, "12/12/2019"), 182 }, 183 .driver_data = &quirk_amd_7k62, 184 }, 185 {} 186 }; 187 MODULE_DEVICE_TABLE(dmi, amd_pstate_quirks_table); 188 189 static inline int get_mode_idx_from_str(const char *str, size_t size) 190 { 191 int i; 192 193 for (i = 0; i < AMD_PSTATE_MAX; i++) { 194 if (!strncmp(str, amd_pstate_mode_string[i], size)) 195 return i; 196 } 197 return -EINVAL; 198 } 199 200 static DEFINE_MUTEX(amd_pstate_driver_lock); 201 202 static u8 msr_get_epp(struct amd_cpudata *cpudata) 203 { 204 u64 value; 205 int ret; 206 207 ret = rdmsrq_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); 208 if (ret < 0) { 209 pr_debug("Could not retrieve energy perf value (%d)\n", ret); 210 return ret; 211 } 212 213 return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, value); 214 } 215 216 DEFINE_STATIC_CALL(amd_pstate_get_epp, msr_get_epp); 217 218 static inline s16 amd_pstate_get_epp(struct amd_cpudata *cpudata) 219 { 220 return static_call(amd_pstate_get_epp)(cpudata); 221 } 222 223 static u8 shmem_get_epp(struct amd_cpudata *cpudata) 224 { 225 u64 epp; 226 int ret; 227 228 ret = cppc_get_epp_perf(cpudata->cpu, &epp); 229 if (ret < 0) { 230 pr_debug("Could not retrieve energy perf value (%d)\n", ret); 231 return ret; 232 } 233 234 return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, epp); 235 } 236 237 static int msr_update_perf(struct cpufreq_policy *policy, u8 min_perf, 238 u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) 239 { 240 struct amd_cpudata *cpudata = policy->driver_data; 241 u64 value, prev; 242 243 value = prev = READ_ONCE(cpudata->cppc_req_cached); 244 245 FIELD_MODIFY(AMD_CPPC_MAX_PERF_MASK, &value, max_perf); 246 FIELD_MODIFY(AMD_CPPC_DES_PERF_MASK, &value, des_perf); 247 FIELD_MODIFY(AMD_CPPC_MIN_PERF_MASK, &value, min_perf); 248 FIELD_MODIFY(AMD_CPPC_EPP_PERF_MASK, &value, epp); 249 250 if (trace_amd_pstate_epp_perf_enabled()) { 251 union perf_cached perf = READ_ONCE(cpudata->perf); 252 253 trace_call__amd_pstate_epp_perf(cpudata->cpu, 254 perf.highest_perf, 255 epp, 256 min_perf, 257 max_perf, 258 policy->boost_enabled, 259 value != prev); 260 } 261 262 if (value == prev) 263 return 0; 264 265 if (fast_switch) { 266 wrmsrq(MSR_AMD_CPPC_REQ, value); 267 } else { 268 int ret = wrmsrq_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); 269 270 if (ret) 271 return ret; 272 } 273 274 WRITE_ONCE(cpudata->cppc_req_cached, value); 275 276 return 0; 277 } 278 279 DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf); 280 281 static inline int amd_pstate_update_perf(struct cpufreq_policy *policy, 282 u8 min_perf, u8 des_perf, 283 u8 max_perf, u8 epp, 284 bool fast_switch) 285 { 286 return static_call(amd_pstate_update_perf)(policy, min_perf, des_perf, 287 max_perf, epp, fast_switch); 288 } 289 290 static int msr_set_epp(struct cpufreq_policy *policy, u8 epp) 291 { 292 struct amd_cpudata *cpudata = policy->driver_data; 293 u64 value, prev; 294 int ret; 295 296 value = prev = READ_ONCE(cpudata->cppc_req_cached); 297 FIELD_MODIFY(AMD_CPPC_EPP_PERF_MASK, &value, epp); 298 299 if (trace_amd_pstate_epp_perf_enabled()) { 300 union perf_cached perf = cpudata->perf; 301 302 trace_call__amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, 303 epp, 304 FIELD_GET(AMD_CPPC_MIN_PERF_MASK, 305 cpudata->cppc_req_cached), 306 FIELD_GET(AMD_CPPC_MAX_PERF_MASK, 307 cpudata->cppc_req_cached), 308 policy->boost_enabled, 309 value != prev); 310 } 311 312 if (value == prev) 313 return 0; 314 315 ret = wrmsrq_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); 316 if (ret) { 317 pr_err("failed to set energy perf value (%d)\n", ret); 318 return ret; 319 } 320 321 /* update both so that msr_update_perf() can effectively check */ 322 WRITE_ONCE(cpudata->cppc_req_cached, value); 323 324 return ret; 325 } 326 327 DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp); 328 329 static inline int amd_pstate_set_epp(struct cpufreq_policy *policy, u8 epp) 330 { 331 return static_call(amd_pstate_set_epp)(policy, epp); 332 } 333 334 static int amd_pstate_set_floor_perf(struct cpufreq_policy *policy, u8 perf) 335 { 336 struct amd_cpudata *cpudata = policy->driver_data; 337 u64 value, prev; 338 bool changed; 339 int ret; 340 341 if (!cpu_feature_enabled(X86_FEATURE_CPPC_PERF_PRIO)) 342 return 0; 343 344 value = prev = READ_ONCE(cpudata->cppc_req2_cached); 345 FIELD_MODIFY(AMD_CPPC_FLOOR_PERF_MASK, &value, perf); 346 347 changed = value != prev; 348 if (!changed) { 349 ret = 0; 350 goto out_trace; 351 } 352 353 ret = wrmsrq_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ2, value); 354 if (ret) { 355 changed = false; 356 pr_err("failed to set CPPC REQ2 value. Error (%d)\n", ret); 357 goto out_trace; 358 } 359 360 WRITE_ONCE(cpudata->cppc_req2_cached, value); 361 362 out_trace: 363 if (trace_amd_pstate_cppc_req2_enabled()) 364 trace_amd_pstate_cppc_req2(cpudata->cpu, perf, changed, ret); 365 return ret; 366 } 367 368 static int amd_pstate_init_floor_perf(struct cpufreq_policy *policy) 369 { 370 struct amd_cpudata *cpudata = policy->driver_data; 371 u8 floor_perf; 372 u64 value; 373 int ret; 374 375 if (!cpu_feature_enabled(X86_FEATURE_CPPC_PERF_PRIO)) 376 return 0; 377 378 ret = rdmsrq_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ2, &value); 379 if (ret) { 380 pr_err("failed to read CPPC REQ2 value. Error (%d)\n", ret); 381 return ret; 382 } 383 384 WRITE_ONCE(cpudata->cppc_req2_cached, value); 385 floor_perf = FIELD_GET(AMD_CPPC_FLOOR_PERF_MASK, 386 cpudata->cppc_req2_cached); 387 388 /* Set a sane value for floor_perf if the default value is invalid */ 389 if (floor_perf < cpudata->perf.lowest_perf) { 390 floor_perf = cpudata->perf.nominal_perf; 391 ret = amd_pstate_set_floor_perf(policy, floor_perf); 392 if (ret) 393 return ret; 394 } 395 396 397 cpudata->bios_floor_perf = floor_perf; 398 cpudata->floor_freq = perf_to_freq(cpudata->perf, cpudata->nominal_freq, 399 floor_perf); 400 return 0; 401 } 402 403 static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp) 404 { 405 struct amd_cpudata *cpudata = policy->driver_data; 406 struct cppc_perf_ctrls perf_ctrls; 407 u8 epp_cached; 408 u64 value; 409 int ret; 410 411 412 epp_cached = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); 413 if (trace_amd_pstate_epp_perf_enabled()) { 414 union perf_cached perf = cpudata->perf; 415 416 trace_call__amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, 417 epp, 418 FIELD_GET(AMD_CPPC_MIN_PERF_MASK, 419 cpudata->cppc_req_cached), 420 FIELD_GET(AMD_CPPC_MAX_PERF_MASK, 421 cpudata->cppc_req_cached), 422 policy->boost_enabled, 423 epp != epp_cached); 424 } 425 426 if (epp == epp_cached) 427 return 0; 428 429 perf_ctrls.energy_perf = epp; 430 ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); 431 if (ret) { 432 pr_debug("failed to set energy perf value (%d)\n", ret); 433 return ret; 434 } 435 436 value = READ_ONCE(cpudata->cppc_req_cached); 437 FIELD_MODIFY(AMD_CPPC_EPP_PERF_MASK, &value, epp); 438 WRITE_ONCE(cpudata->cppc_req_cached, value); 439 440 return ret; 441 } 442 443 static inline int msr_cppc_enable(struct cpufreq_policy *policy) 444 { 445 return wrmsrq_safe_on_cpu(policy->cpu, MSR_AMD_CPPC_ENABLE, 1); 446 } 447 448 static int shmem_cppc_enable(struct cpufreq_policy *policy) 449 { 450 return cppc_set_enable(policy->cpu, 1); 451 } 452 453 DEFINE_STATIC_CALL(amd_pstate_cppc_enable, msr_cppc_enable); 454 455 static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy) 456 { 457 return static_call(amd_pstate_cppc_enable)(policy); 458 } 459 460 static int msr_init_perf(struct amd_cpudata *cpudata) 461 { 462 union perf_cached perf = READ_ONCE(cpudata->perf); 463 u64 cap1, numerator, cppc_req; 464 u8 min_perf; 465 466 int ret = rdmsrq_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, 467 &cap1); 468 if (ret) 469 return ret; 470 471 ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator); 472 if (ret) 473 return ret; 474 475 ret = rdmsrq_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req); 476 if (ret) 477 return ret; 478 479 WRITE_ONCE(cpudata->cppc_req_cached, cppc_req); 480 min_perf = FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cppc_req); 481 482 /* 483 * Clear out the min_perf part to check if the rest of the MSR is 0, if yes, this is an 484 * indication that the min_perf value is the one specified through the BIOS option 485 */ 486 cppc_req &= ~(AMD_CPPC_MIN_PERF_MASK); 487 488 if (!cppc_req) 489 perf.bios_min_perf = min_perf; 490 491 perf.highest_perf = numerator; 492 perf.max_limit_perf = numerator; 493 perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1); 494 perf.nominal_perf = FIELD_GET(AMD_CPPC_NOMINAL_PERF_MASK, cap1); 495 perf.lowest_nonlinear_perf = FIELD_GET(AMD_CPPC_LOWNONLIN_PERF_MASK, cap1); 496 perf.lowest_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1); 497 WRITE_ONCE(cpudata->perf, perf); 498 WRITE_ONCE(cpudata->prefcore_ranking, FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, cap1)); 499 WRITE_ONCE(cpudata->floor_perf_cnt, FIELD_GET(AMD_CPPC_FLOOR_PERF_CNT_MASK, cap1)); 500 501 return 0; 502 } 503 504 static int shmem_init_perf(struct amd_cpudata *cpudata) 505 { 506 struct cppc_perf_caps cppc_perf; 507 union perf_cached perf = READ_ONCE(cpudata->perf); 508 u64 numerator; 509 bool auto_sel; 510 511 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 512 if (ret) 513 return ret; 514 515 ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator); 516 if (ret) 517 return ret; 518 519 perf.highest_perf = numerator; 520 perf.max_limit_perf = numerator; 521 perf.min_limit_perf = cppc_perf.lowest_perf; 522 perf.nominal_perf = cppc_perf.nominal_perf; 523 perf.lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; 524 perf.lowest_perf = cppc_perf.lowest_perf; 525 WRITE_ONCE(cpudata->perf, perf); 526 WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf); 527 528 if (cppc_state == AMD_PSTATE_ACTIVE) 529 return 0; 530 531 ret = cppc_get_auto_sel(cpudata->cpu, &auto_sel); 532 if (ret) { 533 pr_warn("failed to get auto_sel, ret: %d\n", ret); 534 return 0; 535 } 536 537 ret = cppc_set_auto_sel(cpudata->cpu, 538 (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1); 539 540 if (ret) 541 pr_warn("failed to set auto_sel, ret: %d\n", ret); 542 543 return ret; 544 } 545 546 DEFINE_STATIC_CALL(amd_pstate_init_perf, msr_init_perf); 547 548 static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) 549 { 550 return static_call(amd_pstate_init_perf)(cpudata); 551 } 552 553 static int shmem_update_perf(struct cpufreq_policy *policy, u8 min_perf, 554 u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) 555 { 556 struct amd_cpudata *cpudata = policy->driver_data; 557 struct cppc_perf_ctrls perf_ctrls; 558 u64 value, prev; 559 int ret; 560 561 if (cppc_state == AMD_PSTATE_ACTIVE) { 562 int ret = shmem_set_epp(policy, epp); 563 564 if (ret) 565 return ret; 566 } 567 568 value = prev = READ_ONCE(cpudata->cppc_req_cached); 569 570 FIELD_MODIFY(AMD_CPPC_MAX_PERF_MASK, &value, max_perf); 571 FIELD_MODIFY(AMD_CPPC_DES_PERF_MASK, &value, des_perf); 572 FIELD_MODIFY(AMD_CPPC_MIN_PERF_MASK, &value, min_perf); 573 FIELD_MODIFY(AMD_CPPC_EPP_PERF_MASK, &value, epp); 574 575 if (trace_amd_pstate_epp_perf_enabled()) { 576 union perf_cached perf = READ_ONCE(cpudata->perf); 577 578 trace_call__amd_pstate_epp_perf(cpudata->cpu, 579 perf.highest_perf, 580 epp, 581 min_perf, 582 max_perf, 583 policy->boost_enabled, 584 value != prev); 585 } 586 587 if (value == prev) 588 return 0; 589 590 perf_ctrls.max_perf = max_perf; 591 perf_ctrls.min_perf = min_perf; 592 perf_ctrls.desired_perf = des_perf; 593 594 ret = cppc_set_perf(cpudata->cpu, &perf_ctrls); 595 if (ret) 596 return ret; 597 598 WRITE_ONCE(cpudata->cppc_req_cached, value); 599 600 return 0; 601 } 602 603 static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) 604 { 605 u64 aperf, mperf, tsc; 606 unsigned long flags; 607 608 local_irq_save(flags); 609 rdmsrq(MSR_IA32_APERF, aperf); 610 rdmsrq(MSR_IA32_MPERF, mperf); 611 tsc = rdtsc(); 612 613 if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) { 614 local_irq_restore(flags); 615 return false; 616 } 617 618 local_irq_restore(flags); 619 620 cpudata->cur.aperf = aperf; 621 cpudata->cur.mperf = mperf; 622 cpudata->cur.tsc = tsc; 623 cpudata->cur.aperf -= cpudata->prev.aperf; 624 cpudata->cur.mperf -= cpudata->prev.mperf; 625 cpudata->cur.tsc -= cpudata->prev.tsc; 626 627 cpudata->prev.aperf = aperf; 628 cpudata->prev.mperf = mperf; 629 cpudata->prev.tsc = tsc; 630 631 cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz), cpudata->cur.mperf); 632 633 return true; 634 } 635 636 static void amd_pstate_update(struct cpufreq_policy *policy, u8 min_perf, 637 u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags) 638 { 639 struct amd_cpudata *cpudata = policy->driver_data; 640 union perf_cached perf = READ_ONCE(cpudata->perf); 641 642 /* limit the max perf when core performance boost feature is disabled */ 643 if (!cpudata->boost_supported) 644 max_perf = min_t(u8, perf.nominal_perf, max_perf); 645 646 des_perf = clamp_t(u8, des_perf, min_perf, max_perf); 647 648 policy->cur = perf_to_freq(perf, cpudata->nominal_freq, des_perf); 649 650 if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) { 651 min_perf = des_perf; 652 des_perf = 0; 653 } 654 655 if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { 656 trace_call__amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, 657 cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc, 658 cpudata->cpu, fast_switch); 659 } 660 661 amd_pstate_update_perf(policy, min_perf, des_perf, max_perf, 0, fast_switch); 662 } 663 664 static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) 665 { 666 /* 667 * Initialize lower frequency limit (i.e.policy->min) with 668 * lowest_nonlinear_frequency or the min frequency (if) specified in BIOS, 669 * Override the initial value set by cpufreq core and amd-pstate qos_requests. 670 */ 671 if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) { 672 struct cpufreq_policy *policy __free(put_cpufreq_policy) = 673 cpufreq_cpu_get(policy_data->cpu); 674 struct amd_cpudata *cpudata; 675 union perf_cached perf; 676 677 if (!policy) 678 return -EINVAL; 679 680 cpudata = policy->driver_data; 681 perf = READ_ONCE(cpudata->perf); 682 683 if (perf.bios_min_perf) 684 policy_data->min = perf_to_freq(perf, cpudata->nominal_freq, 685 perf.bios_min_perf); 686 else 687 policy_data->min = cpudata->lowest_nonlinear_freq; 688 } 689 690 cpufreq_verify_within_cpu_limits(policy_data); 691 692 return 0; 693 } 694 695 static void amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) 696 { 697 struct amd_cpudata *cpudata = policy->driver_data; 698 union perf_cached perf = READ_ONCE(cpudata->perf); 699 700 perf.max_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->max); 701 WRITE_ONCE(cpudata->max_limit_freq, policy->max); 702 703 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { 704 /* 705 * For performance policy, set MinPerf to nominal_perf rather than 706 * highest_perf or lowest_nonlinear_perf. 707 * 708 * Per commit 0c411b39e4f4c, using highest_perf was observed 709 * to cause frequency throttling on power-limited platforms, leading to 710 * performance regressions. Using lowest_nonlinear_perf would limit 711 * performance too much for HPC workloads requiring high frequency 712 * operation and minimal wakeup latency from idle states. 713 * 714 * nominal_perf therefore provides a balance by avoiding throttling 715 * while still maintaining enough performance for HPC workloads. 716 */ 717 perf.min_limit_perf = min(perf.nominal_perf, perf.max_limit_perf); 718 WRITE_ONCE(cpudata->min_limit_freq, min(cpudata->nominal_freq, cpudata->max_limit_freq)); 719 } else { 720 perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min); 721 WRITE_ONCE(cpudata->min_limit_freq, policy->min); 722 } 723 724 WRITE_ONCE(cpudata->perf, perf); 725 } 726 727 static int amd_pstate_update_freq(struct cpufreq_policy *policy, 728 unsigned int target_freq, bool fast_switch) 729 { 730 struct cpufreq_freqs freqs; 731 struct amd_cpudata *cpudata; 732 union perf_cached perf; 733 u8 des_perf; 734 735 cpudata = policy->driver_data; 736 737 if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) 738 amd_pstate_update_min_max_limit(policy); 739 740 perf = READ_ONCE(cpudata->perf); 741 742 freqs.old = policy->cur; 743 freqs.new = target_freq; 744 745 des_perf = freq_to_perf(perf, cpudata->nominal_freq, target_freq); 746 747 WARN_ON(fast_switch && !policy->fast_switch_enabled); 748 /* 749 * If fast_switch is desired, then there aren't any registered 750 * transition notifiers. See comment for 751 * cpufreq_enable_fast_switch(). 752 */ 753 if (!fast_switch) 754 cpufreq_freq_transition_begin(policy, &freqs); 755 756 amd_pstate_update(policy, perf.min_limit_perf, des_perf, 757 perf.max_limit_perf, fast_switch, 758 policy->governor->flags); 759 760 if (!fast_switch) 761 cpufreq_freq_transition_end(policy, &freqs, false); 762 763 return 0; 764 } 765 766 static int amd_pstate_target(struct cpufreq_policy *policy, 767 unsigned int target_freq, 768 unsigned int relation) 769 { 770 return amd_pstate_update_freq(policy, target_freq, false); 771 } 772 773 static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy, 774 unsigned int target_freq) 775 { 776 if (!amd_pstate_update_freq(policy, target_freq, true)) 777 return target_freq; 778 return policy->cur; 779 } 780 781 static void amd_pstate_adjust_perf(struct cpufreq_policy *policy, 782 unsigned long _min_perf, 783 unsigned long target_perf, 784 unsigned long capacity) 785 { 786 u8 max_perf, min_perf, des_perf, cap_perf; 787 struct amd_cpudata *cpudata; 788 union perf_cached perf; 789 790 if (!policy) 791 return; 792 793 cpudata = policy->driver_data; 794 795 if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) 796 amd_pstate_update_min_max_limit(policy); 797 798 perf = READ_ONCE(cpudata->perf); 799 cap_perf = perf.highest_perf; 800 801 des_perf = cap_perf; 802 if (target_perf < capacity) 803 des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity); 804 805 if (_min_perf < capacity) 806 min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity); 807 else 808 min_perf = cap_perf; 809 810 if (min_perf < perf.min_limit_perf) 811 min_perf = perf.min_limit_perf; 812 813 max_perf = perf.max_limit_perf; 814 if (max_perf < min_perf) 815 max_perf = min_perf; 816 817 amd_pstate_update(policy, min_perf, des_perf, max_perf, true, 818 policy->governor->flags); 819 } 820 821 static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) 822 { 823 struct amd_cpudata *cpudata = policy->driver_data; 824 u32 nominal_freq; 825 int ret = 0; 826 827 nominal_freq = READ_ONCE(cpudata->nominal_freq); 828 829 if (on) 830 policy->cpuinfo.max_freq = cpudata->max_freq; 831 else if (policy->cpuinfo.max_freq > nominal_freq) 832 policy->cpuinfo.max_freq = nominal_freq; 833 834 if (cppc_state == AMD_PSTATE_PASSIVE) { 835 ret = freq_qos_update_request(&cpudata->req[1], policy->cpuinfo.max_freq); 836 if (ret < 0) 837 pr_debug("Failed to update freq constraint: CPU%d\n", cpudata->cpu); 838 } 839 840 return ret < 0 ? ret : 0; 841 } 842 843 static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) 844 { 845 struct amd_cpudata *cpudata = policy->driver_data; 846 int ret; 847 848 if (!cpudata->boost_supported) { 849 pr_err("Boost mode is not supported by this processor or SBIOS\n"); 850 return -EOPNOTSUPP; 851 } 852 853 ret = amd_pstate_cpu_boost_update(policy, state); 854 refresh_frequency_limits(policy); 855 856 return ret; 857 } 858 859 static int amd_pstate_init_boost_support(struct amd_cpudata *cpudata) 860 { 861 u64 boost_val; 862 int ret = -1; 863 864 /* 865 * If platform has no CPB support or disable it, initialize current driver 866 * boost_enabled state to be false, it is not an error for cpufreq core to handle. 867 */ 868 if (!cpu_feature_enabled(X86_FEATURE_CPB)) { 869 pr_debug_once("Boost CPB capabilities not present in the processor\n"); 870 ret = 0; 871 goto exit_err; 872 } 873 874 ret = rdmsrq_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val); 875 if (ret) { 876 pr_err_once("failed to read initial CPU boost state!\n"); 877 ret = -EIO; 878 goto exit_err; 879 } 880 881 if (!(boost_val & MSR_K7_HWCR_CPB_DIS)) 882 cpudata->boost_supported = true; 883 884 return 0; 885 886 exit_err: 887 cpudata->boost_supported = false; 888 return ret; 889 } 890 891 static void amd_perf_ctl_reset(unsigned int cpu) 892 { 893 wrmsrq_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); 894 } 895 896 #define CPPC_MAX_PERF U8_MAX 897 898 static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) 899 { 900 /* user disabled or not detected */ 901 if (!amd_pstate_prefcore) 902 return; 903 904 /* should use amd-hfi instead */ 905 if (cpu_feature_enabled(X86_FEATURE_AMD_WORKLOAD_CLASS) && 906 IS_ENABLED(CONFIG_AMD_HFI)) { 907 amd_pstate_prefcore = false; 908 return; 909 } 910 911 cpudata->hw_prefcore = true; 912 913 /* Priorities must be initialized before ITMT support can be toggled on. */ 914 sched_set_itmt_core_prio((int)READ_ONCE(cpudata->prefcore_ranking), cpudata->cpu); 915 } 916 917 static void amd_pstate_update_limits(struct cpufreq_policy *policy) 918 { 919 struct amd_cpudata *cpudata; 920 u32 prev_high = 0, cur_high = 0; 921 bool highest_perf_changed = false; 922 unsigned int cpu = policy->cpu; 923 924 if (!amd_pstate_prefcore) 925 return; 926 927 if (amd_get_highest_perf(cpu, &cur_high)) 928 return; 929 930 cpudata = policy->driver_data; 931 932 prev_high = READ_ONCE(cpudata->prefcore_ranking); 933 highest_perf_changed = (prev_high != cur_high); 934 if (highest_perf_changed) { 935 WRITE_ONCE(cpudata->prefcore_ranking, cur_high); 936 937 if (cur_high < CPPC_MAX_PERF) { 938 sched_set_itmt_core_prio((int)cur_high, cpu); 939 sched_update_asym_prefer_cpu(cpu, prev_high, cur_high); 940 } 941 } 942 } 943 944 /* 945 * Get pstate transition delay time from ACPI tables that firmware set 946 * instead of using hardcode value directly. 947 */ 948 static u32 amd_pstate_get_transition_delay_us(unsigned int cpu) 949 { 950 int transition_delay_ns; 951 952 transition_delay_ns = cppc_get_transition_latency(cpu); 953 if (transition_delay_ns < 0) { 954 if (cpu_feature_enabled(X86_FEATURE_AMD_FAST_CPPC)) 955 return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY; 956 else 957 return AMD_PSTATE_TRANSITION_DELAY; 958 } 959 960 return transition_delay_ns / NSEC_PER_USEC; 961 } 962 963 /* 964 * Get pstate transition latency value from ACPI tables that firmware 965 * set instead of using hardcode value directly. 966 */ 967 static u32 amd_pstate_get_transition_latency(unsigned int cpu) 968 { 969 int transition_latency; 970 971 transition_latency = cppc_get_transition_latency(cpu); 972 if (transition_latency < 0) 973 return AMD_PSTATE_TRANSITION_LATENCY; 974 975 return transition_latency; 976 } 977 978 /* 979 * amd_pstate_init_freq: Initialize the nominal_freq and lowest_nonlinear_freq 980 * for the @cpudata object. 981 * 982 * Requires: all perf members of @cpudata to be initialized. 983 * 984 * Returns 0 on success, non-zero value on failure. 985 */ 986 static int amd_pstate_init_freq(struct amd_cpudata *cpudata) 987 { 988 u32 min_freq, max_freq, nominal_freq, lowest_nonlinear_freq; 989 struct cppc_perf_caps cppc_perf; 990 union perf_cached perf; 991 int ret; 992 993 ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 994 if (ret) 995 return ret; 996 perf = READ_ONCE(cpudata->perf); 997 998 if (quirks && quirks->nominal_freq) 999 nominal_freq = quirks->nominal_freq; 1000 else 1001 nominal_freq = cppc_perf.nominal_freq; 1002 nominal_freq *= 1000; 1003 1004 if (quirks && quirks->lowest_freq) { 1005 min_freq = quirks->lowest_freq; 1006 perf.lowest_perf = freq_to_perf(perf, nominal_freq, min_freq); 1007 WRITE_ONCE(cpudata->perf, perf); 1008 } else 1009 min_freq = cppc_perf.lowest_freq; 1010 1011 min_freq *= 1000; 1012 1013 WRITE_ONCE(cpudata->nominal_freq, nominal_freq); 1014 1015 /* max_freq is calculated according to (nominal_freq * highest_perf)/nominal_perf */ 1016 max_freq = perf_to_freq(perf, nominal_freq, perf.highest_perf); 1017 WRITE_ONCE(cpudata->max_freq, max_freq); 1018 1019 lowest_nonlinear_freq = perf_to_freq(perf, nominal_freq, perf.lowest_nonlinear_perf); 1020 WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq); 1021 1022 /** 1023 * Below values need to be initialized correctly, otherwise driver will fail to load 1024 * lowest_nonlinear_freq is a value between [min_freq, nominal_freq] 1025 * Check _CPC in ACPI table objects if any values are incorrect 1026 */ 1027 if (min_freq <= 0 || max_freq <= 0 || nominal_freq <= 0 || min_freq > max_freq) { 1028 pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n", 1029 min_freq, max_freq, nominal_freq); 1030 return -EINVAL; 1031 } 1032 1033 if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq) { 1034 pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n", 1035 lowest_nonlinear_freq, min_freq, nominal_freq); 1036 return -EINVAL; 1037 } 1038 1039 return 0; 1040 } 1041 1042 static int amd_pstate_cpu_init(struct cpufreq_policy *policy) 1043 { 1044 struct amd_cpudata *cpudata; 1045 union perf_cached perf; 1046 struct device *dev; 1047 int ret; 1048 1049 /* 1050 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, 1051 * which is ideal for initialization process. 1052 */ 1053 amd_perf_ctl_reset(policy->cpu); 1054 dev = get_cpu_device(policy->cpu); 1055 if (!dev) 1056 return -ENODEV; 1057 1058 cpudata = kzalloc_obj(*cpudata); 1059 if (!cpudata) 1060 return -ENOMEM; 1061 1062 cpudata->cpu = policy->cpu; 1063 1064 ret = amd_pstate_init_perf(cpudata); 1065 if (ret) 1066 goto free_cpudata1; 1067 1068 amd_pstate_init_prefcore(cpudata); 1069 1070 ret = amd_pstate_init_freq(cpudata); 1071 if (ret) 1072 goto free_cpudata1; 1073 1074 ret = amd_pstate_init_boost_support(cpudata); 1075 if (ret) 1076 goto free_cpudata1; 1077 1078 policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu); 1079 policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu); 1080 1081 perf = READ_ONCE(cpudata->perf); 1082 1083 policy->cpuinfo.min_freq = perf_to_freq(perf, cpudata->nominal_freq, 1084 perf.lowest_perf); 1085 policy->cpuinfo.max_freq = cpudata->max_freq; 1086 1087 policy->driver_data = cpudata; 1088 ret = amd_pstate_cppc_enable(policy); 1089 if (ret) 1090 goto free_cpudata1; 1091 1092 policy->boost_supported = READ_ONCE(cpudata->boost_supported); 1093 1094 /* It will be updated by governor */ 1095 policy->cur = policy->cpuinfo.min_freq; 1096 1097 if (cpu_feature_enabled(X86_FEATURE_CPPC)) 1098 policy->fast_switch_possible = true; 1099 1100 ret = amd_pstate_init_floor_perf(policy); 1101 if (ret) { 1102 dev_err(dev, "Failed to initialize Floor Perf (%d)\n", ret); 1103 goto free_cpudata1; 1104 } 1105 1106 ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0], 1107 FREQ_QOS_MIN, FREQ_QOS_MIN_DEFAULT_VALUE); 1108 if (ret < 0) { 1109 dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret); 1110 goto free_cpudata1; 1111 } 1112 1113 ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1], 1114 FREQ_QOS_MAX, policy->cpuinfo.max_freq); 1115 if (ret < 0) { 1116 dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret); 1117 goto free_cpudata2; 1118 } 1119 1120 1121 if (!current_pstate_driver->adjust_perf) 1122 current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; 1123 1124 return 0; 1125 1126 free_cpudata2: 1127 freq_qos_remove_request(&cpudata->req[0]); 1128 free_cpudata1: 1129 pr_warn("Failed to initialize CPU %d: %d\n", policy->cpu, ret); 1130 kfree(cpudata); 1131 policy->driver_data = NULL; 1132 return ret; 1133 } 1134 1135 static void amd_pstate_cpu_exit(struct cpufreq_policy *policy) 1136 { 1137 struct amd_cpudata *cpudata = policy->driver_data; 1138 union perf_cached perf = READ_ONCE(cpudata->perf); 1139 1140 /* Reset CPPC_REQ MSR to the BIOS value */ 1141 amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false); 1142 amd_pstate_set_floor_perf(policy, cpudata->bios_floor_perf); 1143 1144 freq_qos_remove_request(&cpudata->req[1]); 1145 freq_qos_remove_request(&cpudata->req[0]); 1146 policy->fast_switch_possible = false; 1147 kfree(cpudata); 1148 } 1149 1150 static int amd_pstate_get_balanced_epp(struct cpufreq_policy *policy) 1151 { 1152 struct amd_cpudata *cpudata = policy->driver_data; 1153 1154 if (power_supply_is_system_supplied()) 1155 return cpudata->epp_default_ac; 1156 else 1157 return cpudata->epp_default_dc; 1158 } 1159 1160 static int amd_pstate_power_supply_notifier(struct notifier_block *nb, 1161 unsigned long event, void *data) 1162 { 1163 struct amd_cpudata *cpudata = container_of(nb, struct amd_cpudata, power_nb); 1164 struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu); 1165 u8 epp; 1166 int ret; 1167 1168 if (event != PSY_EVENT_PROP_CHANGED) 1169 return NOTIFY_OK; 1170 1171 /* dynamic actions are only applied while platform profile is in balanced */ 1172 if (cpudata->current_profile != PLATFORM_PROFILE_BALANCED) 1173 return 0; 1174 1175 epp = amd_pstate_get_balanced_epp(policy); 1176 1177 ret = amd_pstate_set_epp(policy, epp); 1178 if (ret) 1179 pr_warn("Failed to set CPU %d EPP %u: %d\n", cpudata->cpu, epp, ret); 1180 1181 return NOTIFY_OK; 1182 } 1183 1184 static int amd_pstate_profile_probe(void *drvdata, unsigned long *choices) 1185 { 1186 set_bit(PLATFORM_PROFILE_LOW_POWER, choices); 1187 set_bit(PLATFORM_PROFILE_BALANCED, choices); 1188 set_bit(PLATFORM_PROFILE_PERFORMANCE, choices); 1189 1190 return 0; 1191 } 1192 1193 static int amd_pstate_profile_get(struct device *dev, 1194 enum platform_profile_option *profile) 1195 { 1196 struct amd_cpudata *cpudata = dev_get_drvdata(dev); 1197 1198 *profile = cpudata->current_profile; 1199 1200 return 0; 1201 } 1202 1203 static int amd_pstate_profile_set(struct device *dev, 1204 enum platform_profile_option profile) 1205 { 1206 struct amd_cpudata *cpudata = dev_get_drvdata(dev); 1207 struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu); 1208 int ret; 1209 1210 switch (profile) { 1211 case PLATFORM_PROFILE_LOW_POWER: 1212 ret = amd_pstate_set_epp(policy, AMD_CPPC_EPP_POWERSAVE); 1213 if (ret) 1214 return ret; 1215 break; 1216 case PLATFORM_PROFILE_BALANCED: 1217 ret = amd_pstate_set_epp(policy, 1218 amd_pstate_get_balanced_epp(policy)); 1219 if (ret) 1220 return ret; 1221 break; 1222 case PLATFORM_PROFILE_PERFORMANCE: 1223 ret = amd_pstate_set_epp(policy, AMD_CPPC_EPP_PERFORMANCE); 1224 if (ret) 1225 return ret; 1226 break; 1227 default: 1228 pr_err("Unknown Platform Profile %d\n", profile); 1229 return -EOPNOTSUPP; 1230 } 1231 1232 cpudata->current_profile = profile; 1233 1234 return 0; 1235 } 1236 1237 static const struct platform_profile_ops amd_pstate_profile_ops = { 1238 .probe = amd_pstate_profile_probe, 1239 .profile_set = amd_pstate_profile_set, 1240 .profile_get = amd_pstate_profile_get, 1241 }; 1242 1243 void amd_pstate_clear_dynamic_epp(struct cpufreq_policy *policy) 1244 { 1245 struct amd_cpudata *cpudata = policy->driver_data; 1246 1247 if (cpudata->power_nb.notifier_call) 1248 power_supply_unreg_notifier(&cpudata->power_nb); 1249 if (cpudata->ppdev) { 1250 platform_profile_remove(cpudata->ppdev); 1251 cpudata->ppdev = NULL; 1252 } 1253 kfree(cpudata->profile_name); 1254 cpudata->dynamic_epp = false; 1255 } 1256 EXPORT_SYMBOL_GPL(amd_pstate_clear_dynamic_epp); 1257 1258 static int amd_pstate_set_dynamic_epp(struct cpufreq_policy *policy) 1259 { 1260 struct amd_cpudata *cpudata = policy->driver_data; 1261 int ret; 1262 u8 epp; 1263 1264 switch (cpudata->current_profile) { 1265 case PLATFORM_PROFILE_PERFORMANCE: 1266 epp = AMD_CPPC_EPP_PERFORMANCE; 1267 break; 1268 case PLATFORM_PROFILE_LOW_POWER: 1269 epp = AMD_CPPC_EPP_POWERSAVE; 1270 break; 1271 case PLATFORM_PROFILE_BALANCED: 1272 epp = amd_pstate_get_balanced_epp(policy); 1273 break; 1274 default: 1275 pr_err("Unknown Platform Profile %d\n", cpudata->current_profile); 1276 return -EOPNOTSUPP; 1277 } 1278 ret = amd_pstate_set_epp(policy, epp); 1279 if (ret) 1280 return ret; 1281 1282 cpudata->profile_name = kasprintf(GFP_KERNEL, "amd-pstate-epp-cpu%d", cpudata->cpu); 1283 if (!cpudata->profile_name) 1284 return -ENOMEM; 1285 1286 cpudata->ppdev = platform_profile_register(get_cpu_device(policy->cpu), 1287 cpudata->profile_name, 1288 policy->driver_data, 1289 &amd_pstate_profile_ops); 1290 if (IS_ERR(cpudata->ppdev)) { 1291 ret = PTR_ERR(cpudata->ppdev); 1292 goto cleanup; 1293 } 1294 1295 /* only enable notifier if things will actually change */ 1296 if (cpudata->epp_default_ac != cpudata->epp_default_dc) { 1297 cpudata->power_nb.notifier_call = amd_pstate_power_supply_notifier; 1298 ret = power_supply_reg_notifier(&cpudata->power_nb); 1299 if (ret) 1300 goto cleanup; 1301 } 1302 1303 cpudata->dynamic_epp = true; 1304 1305 return 0; 1306 1307 cleanup: 1308 amd_pstate_clear_dynamic_epp(policy); 1309 1310 return ret; 1311 } 1312 1313 /* Sysfs attributes */ 1314 1315 /* 1316 * This frequency is to indicate the maximum hardware frequency. 1317 * If boost is not active but supported, the frequency will be larger than the 1318 * one in cpuinfo. 1319 */ 1320 static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, 1321 char *buf) 1322 { 1323 struct amd_cpudata *cpudata = policy->driver_data; 1324 1325 return sysfs_emit(buf, "%u\n", cpudata->max_freq); 1326 } 1327 1328 static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy, 1329 char *buf) 1330 { 1331 struct amd_cpudata *cpudata; 1332 union perf_cached perf; 1333 1334 cpudata = policy->driver_data; 1335 perf = READ_ONCE(cpudata->perf); 1336 1337 return sysfs_emit(buf, "%u\n", 1338 perf_to_freq(perf, cpudata->nominal_freq, perf.lowest_nonlinear_perf)); 1339 } 1340 1341 /* 1342 * In some of ASICs, the highest_perf is not the one in the _CPC table, so we 1343 * need to expose it to sysfs. 1344 */ 1345 static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, 1346 char *buf) 1347 { 1348 struct amd_cpudata *cpudata; 1349 1350 cpudata = policy->driver_data; 1351 1352 return sysfs_emit(buf, "%u\n", cpudata->perf.highest_perf); 1353 } 1354 1355 static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy, 1356 char *buf) 1357 { 1358 u8 perf; 1359 struct amd_cpudata *cpudata = policy->driver_data; 1360 1361 perf = READ_ONCE(cpudata->prefcore_ranking); 1362 1363 return sysfs_emit(buf, "%u\n", perf); 1364 } 1365 1366 static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy, 1367 char *buf) 1368 { 1369 bool hw_prefcore; 1370 struct amd_cpudata *cpudata = policy->driver_data; 1371 1372 hw_prefcore = READ_ONCE(cpudata->hw_prefcore); 1373 1374 return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore)); 1375 } 1376 1377 static ssize_t show_energy_performance_available_preferences( 1378 struct cpufreq_policy *policy, char *buf) 1379 { 1380 int offset = 0, i; 1381 struct amd_cpudata *cpudata = policy->driver_data; 1382 1383 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) 1384 return sysfs_emit_at(buf, offset, "%s\n", 1385 energy_perf_strings[EPP_INDEX_PERFORMANCE]); 1386 1387 for (i = 0; i < ARRAY_SIZE(energy_perf_strings); i++) 1388 offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i]); 1389 1390 offset += sysfs_emit_at(buf, offset, "\n"); 1391 1392 return offset; 1393 } 1394 1395 ssize_t store_energy_performance_preference(struct cpufreq_policy *policy, 1396 const char *buf, size_t count) 1397 { 1398 struct amd_cpudata *cpudata = policy->driver_data; 1399 ssize_t ret; 1400 bool raw_epp = false; 1401 u8 epp; 1402 1403 if (cpudata->dynamic_epp) { 1404 pr_debug("EPP cannot be set when dynamic EPP is enabled\n"); 1405 return -EBUSY; 1406 } 1407 1408 /* 1409 * if the value matches a number, use that, otherwise see if 1410 * matches an index in the energy_perf_strings array 1411 */ 1412 ret = kstrtou8(buf, 0, &epp); 1413 raw_epp = !ret; 1414 if (ret) { 1415 ret = sysfs_match_string(energy_perf_strings, buf); 1416 if (ret < 0 || ret == EPP_INDEX_CUSTOM) 1417 return -EINVAL; 1418 if (ret) 1419 epp = epp_values[ret]; 1420 else 1421 epp = cpudata->epp_default_dc; 1422 } 1423 1424 if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { 1425 pr_debug("EPP cannot be set under performance policy\n"); 1426 return -EBUSY; 1427 } 1428 1429 ret = amd_pstate_set_epp(policy, epp); 1430 if (ret) 1431 return ret; 1432 1433 cpudata->raw_epp = raw_epp; 1434 1435 return count; 1436 } 1437 EXPORT_SYMBOL_GPL(store_energy_performance_preference); 1438 1439 ssize_t show_energy_performance_preference(struct cpufreq_policy *policy, char *buf) 1440 { 1441 struct amd_cpudata *cpudata = policy->driver_data; 1442 u8 preference, epp; 1443 1444 epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); 1445 1446 if (cpudata->raw_epp) 1447 return sysfs_emit(buf, "%u\n", epp); 1448 1449 switch (epp) { 1450 case AMD_CPPC_EPP_PERFORMANCE: 1451 preference = EPP_INDEX_PERFORMANCE; 1452 break; 1453 case AMD_CPPC_EPP_BALANCE_PERFORMANCE: 1454 preference = EPP_INDEX_BALANCE_PERFORMANCE; 1455 break; 1456 case AMD_CPPC_EPP_BALANCE_POWERSAVE: 1457 preference = EPP_INDEX_BALANCE_POWERSAVE; 1458 break; 1459 case AMD_CPPC_EPP_POWERSAVE: 1460 preference = EPP_INDEX_POWERSAVE; 1461 break; 1462 default: 1463 return -EINVAL; 1464 } 1465 1466 return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]); 1467 } 1468 EXPORT_SYMBOL_GPL(show_energy_performance_preference); 1469 1470 static ssize_t store_amd_pstate_floor_freq(struct cpufreq_policy *policy, 1471 const char *buf, size_t count) 1472 { 1473 struct amd_cpudata *cpudata = policy->driver_data; 1474 union perf_cached perf = READ_ONCE(cpudata->perf); 1475 unsigned int freq; 1476 u8 floor_perf; 1477 int ret; 1478 1479 ret = kstrtouint(buf, 0, &freq); 1480 if (ret) 1481 return ret; 1482 1483 if (freq < policy->cpuinfo.min_freq || freq > policy->max) 1484 return -EINVAL; 1485 1486 floor_perf = freq_to_perf(perf, cpudata->nominal_freq, freq); 1487 ret = amd_pstate_set_floor_perf(policy, floor_perf); 1488 1489 if (!ret) 1490 cpudata->floor_freq = freq; 1491 1492 return ret ?: count; 1493 } 1494 1495 static ssize_t show_amd_pstate_floor_freq(struct cpufreq_policy *policy, char *buf) 1496 { 1497 struct amd_cpudata *cpudata = policy->driver_data; 1498 1499 return sysfs_emit(buf, "%u\n", cpudata->floor_freq); 1500 } 1501 1502 static ssize_t show_amd_pstate_floor_count(struct cpufreq_policy *policy, char *buf) 1503 { 1504 struct amd_cpudata *cpudata = policy->driver_data; 1505 u8 count = cpudata->floor_perf_cnt; 1506 1507 return sysfs_emit(buf, "%u\n", count); 1508 } 1509 1510 cpufreq_freq_attr_ro(amd_pstate_max_freq); 1511 cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); 1512 1513 cpufreq_freq_attr_ro(amd_pstate_highest_perf); 1514 cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking); 1515 cpufreq_freq_attr_ro(amd_pstate_hw_prefcore); 1516 cpufreq_freq_attr_rw(energy_performance_preference); 1517 cpufreq_freq_attr_ro(energy_performance_available_preferences); 1518 cpufreq_freq_attr_rw(amd_pstate_floor_freq); 1519 cpufreq_freq_attr_ro(amd_pstate_floor_count); 1520 1521 struct freq_attr_visibility { 1522 struct freq_attr *attr; 1523 bool (*visibility_fn)(void); 1524 }; 1525 1526 /* For attributes which are always visible */ 1527 static bool always_visible(void) 1528 { 1529 return true; 1530 } 1531 1532 /* Determines whether prefcore related attributes should be visible */ 1533 static bool prefcore_visibility(void) 1534 { 1535 return amd_pstate_prefcore; 1536 } 1537 1538 /* Determines whether energy performance preference should be visible */ 1539 static bool epp_visibility(void) 1540 { 1541 return cppc_state == AMD_PSTATE_ACTIVE; 1542 } 1543 1544 /* Determines whether amd_pstate_floor_freq related attributes should be visible */ 1545 static bool floor_freq_visibility(void) 1546 { 1547 return cpu_feature_enabled(X86_FEATURE_CPPC_PERF_PRIO); 1548 } 1549 1550 static struct freq_attr_visibility amd_pstate_attr_visibility[] = { 1551 {&amd_pstate_max_freq, always_visible}, 1552 {&amd_pstate_lowest_nonlinear_freq, always_visible}, 1553 {&amd_pstate_highest_perf, always_visible}, 1554 {&amd_pstate_prefcore_ranking, prefcore_visibility}, 1555 {&amd_pstate_hw_prefcore, prefcore_visibility}, 1556 {&energy_performance_preference, epp_visibility}, 1557 {&energy_performance_available_preferences, epp_visibility}, 1558 {&amd_pstate_floor_freq, floor_freq_visibility}, 1559 {&amd_pstate_floor_count, floor_freq_visibility}, 1560 }; 1561 1562 struct freq_attr **amd_pstate_get_current_attrs(void) 1563 { 1564 if (!current_pstate_driver) 1565 return NULL; 1566 return current_pstate_driver->attr; 1567 } 1568 EXPORT_SYMBOL_GPL(amd_pstate_get_current_attrs); 1569 1570 static struct freq_attr **get_freq_attrs(void) 1571 { 1572 bool attr_visible[ARRAY_SIZE(amd_pstate_attr_visibility)]; 1573 struct freq_attr **attrs; 1574 int i, j, count; 1575 1576 for (i = 0, count = 0; i < ARRAY_SIZE(amd_pstate_attr_visibility); i++) { 1577 struct freq_attr_visibility *v = &amd_pstate_attr_visibility[i]; 1578 1579 attr_visible[i] = v->visibility_fn(); 1580 if (attr_visible[i]) 1581 count++; 1582 } 1583 1584 /* amd_pstate_{max_freq, lowest_nonlinear_freq, highest_perf} should always be visible */ 1585 BUG_ON(!count); 1586 1587 attrs = kcalloc(count + 1, sizeof(struct freq_attr *), GFP_KERNEL); 1588 if (!attrs) 1589 return ERR_PTR(-ENOMEM); 1590 1591 for (i = 0, j = 0; i < ARRAY_SIZE(amd_pstate_attr_visibility); i++) { 1592 if (!attr_visible[i]) 1593 continue; 1594 1595 attrs[j++] = amd_pstate_attr_visibility[i].attr; 1596 } 1597 1598 return attrs; 1599 } 1600 1601 static void amd_pstate_driver_cleanup(void) 1602 { 1603 if (amd_pstate_prefcore) 1604 sched_clear_itmt_support(); 1605 1606 cppc_state = AMD_PSTATE_DISABLE; 1607 kfree(current_pstate_driver->attr); 1608 current_pstate_driver->attr = NULL; 1609 current_pstate_driver = NULL; 1610 } 1611 1612 static int amd_pstate_set_driver(int mode_idx) 1613 { 1614 if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) { 1615 cppc_state = mode_idx; 1616 if (cppc_state == AMD_PSTATE_DISABLE) 1617 pr_info("driver is explicitly disabled\n"); 1618 1619 if (cppc_state == AMD_PSTATE_ACTIVE) 1620 current_pstate_driver = &amd_pstate_epp_driver; 1621 1622 if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED) 1623 current_pstate_driver = &amd_pstate_driver; 1624 1625 return 0; 1626 } 1627 1628 return -EINVAL; 1629 } 1630 1631 static int amd_pstate_register_driver(int mode) 1632 { 1633 struct freq_attr **attr = NULL; 1634 int ret; 1635 1636 ret = amd_pstate_set_driver(mode); 1637 if (ret) 1638 return ret; 1639 1640 cppc_state = mode; 1641 1642 /* 1643 * Note: It is important to compute the attrs _after_ 1644 * re-initializing the cppc_state. Some attributes become 1645 * visible only when cppc_state is AMD_PSTATE_ACTIVE. 1646 */ 1647 attr = get_freq_attrs(); 1648 if (IS_ERR(attr)) { 1649 ret = (int) PTR_ERR(attr); 1650 pr_err("Couldn't compute freq_attrs for current mode %s [%d]\n", 1651 amd_pstate_get_mode_string(cppc_state), ret); 1652 amd_pstate_driver_cleanup(); 1653 return ret; 1654 } 1655 1656 current_pstate_driver->attr = attr; 1657 1658 /* at least one CPU supports CPB */ 1659 current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB); 1660 1661 ret = cpufreq_register_driver(current_pstate_driver); 1662 if (ret) { 1663 amd_pstate_driver_cleanup(); 1664 return ret; 1665 } 1666 1667 /* Enable ITMT support once all CPUs have initialized their asym priorities. */ 1668 if (amd_pstate_prefcore) 1669 sched_set_itmt_support(); 1670 1671 return 0; 1672 } 1673 1674 static int amd_pstate_unregister_driver(int dummy) 1675 { 1676 cpufreq_unregister_driver(current_pstate_driver); 1677 amd_pstate_driver_cleanup(); 1678 return 0; 1679 } 1680 1681 static int amd_pstate_change_mode_without_dvr_change(int mode) 1682 { 1683 int cpu = 0; 1684 1685 cppc_state = mode; 1686 1687 if (cpu_feature_enabled(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE) 1688 return 0; 1689 1690 for_each_online_cpu(cpu) { 1691 cppc_set_auto_sel(cpu, (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1); 1692 } 1693 1694 return 0; 1695 } 1696 1697 static int amd_pstate_change_driver_mode(int mode) 1698 { 1699 int ret; 1700 1701 lockdep_assert_held(&amd_pstate_driver_lock); 1702 1703 ret = amd_pstate_unregister_driver(0); 1704 if (ret) 1705 return ret; 1706 1707 ret = amd_pstate_register_driver(mode); 1708 if (ret) 1709 return ret; 1710 1711 return 0; 1712 } 1713 1714 static cppc_mode_transition_fn mode_state_machine[AMD_PSTATE_MAX][AMD_PSTATE_MAX] = { 1715 [AMD_PSTATE_DISABLE] = { 1716 [AMD_PSTATE_DISABLE] = NULL, 1717 [AMD_PSTATE_PASSIVE] = amd_pstate_register_driver, 1718 [AMD_PSTATE_ACTIVE] = amd_pstate_register_driver, 1719 [AMD_PSTATE_GUIDED] = amd_pstate_register_driver, 1720 }, 1721 [AMD_PSTATE_PASSIVE] = { 1722 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, 1723 [AMD_PSTATE_PASSIVE] = NULL, 1724 [AMD_PSTATE_ACTIVE] = amd_pstate_change_driver_mode, 1725 [AMD_PSTATE_GUIDED] = amd_pstate_change_mode_without_dvr_change, 1726 }, 1727 [AMD_PSTATE_ACTIVE] = { 1728 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, 1729 [AMD_PSTATE_PASSIVE] = amd_pstate_change_driver_mode, 1730 [AMD_PSTATE_ACTIVE] = NULL, 1731 [AMD_PSTATE_GUIDED] = amd_pstate_change_driver_mode, 1732 }, 1733 [AMD_PSTATE_GUIDED] = { 1734 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, 1735 [AMD_PSTATE_PASSIVE] = amd_pstate_change_mode_without_dvr_change, 1736 [AMD_PSTATE_ACTIVE] = amd_pstate_change_driver_mode, 1737 [AMD_PSTATE_GUIDED] = NULL, 1738 }, 1739 }; 1740 1741 static ssize_t amd_pstate_show_status(char *buf) 1742 { 1743 if (!current_pstate_driver) 1744 return sysfs_emit(buf, "disable\n"); 1745 1746 return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]); 1747 } 1748 1749 int amd_pstate_get_status(void) 1750 { 1751 return cppc_state; 1752 } 1753 EXPORT_SYMBOL_GPL(amd_pstate_get_status); 1754 1755 int amd_pstate_update_status(const char *buf, size_t size) 1756 { 1757 int mode_idx; 1758 1759 if (size > strlen("passive") || size < strlen("active")) 1760 return -EINVAL; 1761 1762 mode_idx = get_mode_idx_from_str(buf, size); 1763 if (mode_idx < 0) 1764 return mode_idx; 1765 1766 if (mode_state_machine[cppc_state][mode_idx]) { 1767 guard(mutex)(&amd_pstate_driver_lock); 1768 return mode_state_machine[cppc_state][mode_idx](mode_idx); 1769 } 1770 1771 return 0; 1772 } 1773 EXPORT_SYMBOL_GPL(amd_pstate_update_status); 1774 1775 static ssize_t status_show(struct device *dev, 1776 struct device_attribute *attr, char *buf) 1777 { 1778 1779 guard(mutex)(&amd_pstate_driver_lock); 1780 1781 return amd_pstate_show_status(buf); 1782 } 1783 1784 static ssize_t status_store(struct device *a, struct device_attribute *b, 1785 const char *buf, size_t count) 1786 { 1787 char *p = memchr(buf, '\n', count); 1788 int ret; 1789 1790 ret = amd_pstate_update_status(buf, p ? p - buf : count); 1791 1792 return ret < 0 ? ret : count; 1793 } 1794 1795 static ssize_t prefcore_show(struct device *dev, 1796 struct device_attribute *attr, char *buf) 1797 { 1798 return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore)); 1799 } 1800 1801 static ssize_t dynamic_epp_show(struct device *dev, 1802 struct device_attribute *attr, char *buf) 1803 { 1804 return sysfs_emit(buf, "%s\n", str_enabled_disabled(dynamic_epp)); 1805 } 1806 1807 static ssize_t dynamic_epp_store(struct device *a, struct device_attribute *b, 1808 const char *buf, size_t count) 1809 { 1810 bool enabled; 1811 int ret; 1812 1813 ret = kstrtobool(buf, &enabled); 1814 if (ret) 1815 return ret; 1816 1817 guard(mutex)(&amd_pstate_driver_lock); 1818 1819 if (cppc_state != AMD_PSTATE_ACTIVE) { 1820 pr_debug("dynamic_epp can only be toggled in active mode\n"); 1821 return -EINVAL; 1822 } 1823 1824 /* Nothing to do */ 1825 if (dynamic_epp == enabled) 1826 return count; 1827 1828 /* reinitialize with desired dynamic EPP value */ 1829 dynamic_epp = enabled; 1830 ret = amd_pstate_change_driver_mode(cppc_state); 1831 if (ret) 1832 dynamic_epp = false; 1833 1834 return ret ? ret : count; 1835 } 1836 1837 static DEVICE_ATTR_RW(status); 1838 static DEVICE_ATTR_RO(prefcore); 1839 static DEVICE_ATTR_RW(dynamic_epp); 1840 1841 static struct attribute *pstate_global_attributes[] = { 1842 &dev_attr_status.attr, 1843 &dev_attr_prefcore.attr, 1844 &dev_attr_dynamic_epp.attr, 1845 NULL 1846 }; 1847 1848 static const struct attribute_group amd_pstate_global_attr_group = { 1849 .name = "amd_pstate", 1850 .attrs = pstate_global_attributes, 1851 }; 1852 1853 static bool amd_pstate_acpi_pm_profile_server(void) 1854 { 1855 switch (acpi_gbl_FADT.preferred_profile) { 1856 case PM_ENTERPRISE_SERVER: 1857 case PM_SOHO_SERVER: 1858 case PM_PERFORMANCE_SERVER: 1859 return true; 1860 } 1861 return false; 1862 } 1863 1864 static bool amd_pstate_acpi_pm_profile_undefined(void) 1865 { 1866 if (acpi_gbl_FADT.preferred_profile == PM_UNSPECIFIED) 1867 return true; 1868 if (acpi_gbl_FADT.preferred_profile >= NR_PM_PROFILES) 1869 return true; 1870 return false; 1871 } 1872 1873 static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) 1874 { 1875 struct amd_cpudata *cpudata; 1876 union perf_cached perf; 1877 struct device *dev; 1878 int ret; 1879 1880 /* 1881 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, 1882 * which is ideal for initialization process. 1883 */ 1884 amd_perf_ctl_reset(policy->cpu); 1885 dev = get_cpu_device(policy->cpu); 1886 if (!dev) 1887 return -ENODEV; 1888 1889 cpudata = kzalloc_obj(*cpudata); 1890 if (!cpudata) 1891 return -ENOMEM; 1892 1893 cpudata->cpu = policy->cpu; 1894 1895 ret = amd_pstate_init_perf(cpudata); 1896 if (ret) 1897 goto free_cpudata1; 1898 1899 amd_pstate_init_prefcore(cpudata); 1900 1901 ret = amd_pstate_init_freq(cpudata); 1902 if (ret) 1903 goto free_cpudata1; 1904 1905 ret = amd_pstate_init_boost_support(cpudata); 1906 if (ret) 1907 goto free_cpudata1; 1908 1909 perf = READ_ONCE(cpudata->perf); 1910 1911 policy->cpuinfo.min_freq = perf_to_freq(perf, cpudata->nominal_freq, 1912 perf.lowest_perf); 1913 policy->cpuinfo.max_freq = cpudata->max_freq; 1914 policy->driver_data = cpudata; 1915 1916 ret = amd_pstate_cppc_enable(policy); 1917 if (ret) 1918 goto free_cpudata1; 1919 1920 /* It will be updated by governor */ 1921 policy->cur = policy->cpuinfo.min_freq; 1922 1923 1924 policy->boost_supported = READ_ONCE(cpudata->boost_supported); 1925 1926 /* 1927 * Set the policy to provide a valid fallback value in case 1928 * the default cpufreq governor is neither powersave nor performance. 1929 */ 1930 if (amd_pstate_acpi_pm_profile_server() || 1931 amd_pstate_acpi_pm_profile_undefined()) { 1932 policy->policy = CPUFREQ_POLICY_PERFORMANCE; 1933 cpudata->epp_default_ac = cpudata->epp_default_dc = amd_pstate_get_epp(cpudata); 1934 cpudata->current_profile = PLATFORM_PROFILE_PERFORMANCE; 1935 } else { 1936 policy->policy = CPUFREQ_POLICY_POWERSAVE; 1937 cpudata->epp_default_ac = AMD_CPPC_EPP_PERFORMANCE; 1938 cpudata->epp_default_dc = AMD_CPPC_EPP_BALANCE_PERFORMANCE; 1939 cpudata->current_profile = PLATFORM_PROFILE_BALANCED; 1940 } 1941 1942 if (dynamic_epp) 1943 ret = amd_pstate_set_dynamic_epp(policy); 1944 else 1945 ret = amd_pstate_set_epp(policy, cpudata->epp_default_dc); 1946 if (ret) 1947 goto free_cpudata1; 1948 1949 ret = amd_pstate_init_floor_perf(policy); 1950 if (ret) { 1951 dev_err(dev, "Failed to initialize Floor Perf (%d)\n", ret); 1952 goto free_cpudata1; 1953 } 1954 1955 current_pstate_driver->adjust_perf = NULL; 1956 1957 return 0; 1958 1959 free_cpudata1: 1960 pr_warn("Failed to initialize CPU %d: %d\n", policy->cpu, ret); 1961 kfree(cpudata); 1962 policy->driver_data = NULL; 1963 return ret; 1964 } 1965 1966 static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) 1967 { 1968 struct amd_cpudata *cpudata = policy->driver_data; 1969 1970 if (cpudata) { 1971 union perf_cached perf = READ_ONCE(cpudata->perf); 1972 1973 if (cpudata->dynamic_epp) 1974 amd_pstate_clear_dynamic_epp(policy); 1975 1976 /* Reset CPPC_REQ MSR to the BIOS value */ 1977 amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false); 1978 amd_pstate_set_floor_perf(policy, cpudata->bios_floor_perf); 1979 1980 kfree(cpudata); 1981 policy->driver_data = NULL; 1982 } 1983 1984 pr_debug("CPU %d exiting\n", policy->cpu); 1985 } 1986 1987 static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy, bool policy_change) 1988 { 1989 struct amd_cpudata *cpudata = policy->driver_data; 1990 union perf_cached perf; 1991 u8 epp; 1992 1993 if (policy_change || 1994 policy->min != cpudata->min_limit_freq || 1995 policy->max != cpudata->max_limit_freq) 1996 amd_pstate_update_min_max_limit(policy); 1997 1998 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) 1999 epp = 0; 2000 else 2001 epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); 2002 2003 perf = READ_ONCE(cpudata->perf); 2004 2005 return amd_pstate_update_perf(policy, perf.min_limit_perf, 0U, 2006 perf.max_limit_perf, epp, false); 2007 } 2008 2009 static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) 2010 { 2011 struct amd_cpudata *cpudata = policy->driver_data; 2012 int ret; 2013 2014 if (!policy->cpuinfo.max_freq) 2015 return -ENODEV; 2016 2017 cpudata->policy = policy->policy; 2018 2019 ret = amd_pstate_epp_update_limit(policy, true); 2020 if (ret) 2021 return ret; 2022 2023 /* 2024 * policy->cur is never updated with the amd_pstate_epp driver, but it 2025 * is used as a stale frequency value. So, keep it within limits. 2026 */ 2027 policy->cur = policy->min; 2028 2029 return 0; 2030 } 2031 2032 static int amd_pstate_cpu_online(struct cpufreq_policy *policy) 2033 { 2034 struct amd_cpudata *cpudata = policy->driver_data; 2035 union perf_cached perf = READ_ONCE(cpudata->perf); 2036 u8 cached_floor_perf; 2037 int ret; 2038 2039 ret = amd_pstate_cppc_enable(policy); 2040 if (ret) 2041 return ret; 2042 2043 cached_floor_perf = freq_to_perf(perf, cpudata->nominal_freq, cpudata->floor_freq); 2044 return amd_pstate_set_floor_perf(policy, cached_floor_perf); 2045 } 2046 2047 static int amd_pstate_cpu_offline(struct cpufreq_policy *policy) 2048 { 2049 struct amd_cpudata *cpudata = policy->driver_data; 2050 union perf_cached perf = READ_ONCE(cpudata->perf); 2051 int ret; 2052 2053 /* 2054 * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified 2055 * min_perf value across kexec reboots. If this CPU is just onlined normally after this, the 2056 * limits, epp and desired perf will get reset to the cached values in cpudata struct 2057 */ 2058 ret = amd_pstate_update_perf(policy, perf.bios_min_perf, 2059 FIELD_GET(AMD_CPPC_DES_PERF_MASK, cpudata->cppc_req_cached), 2060 FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), 2061 FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached), 2062 false); 2063 if (ret) 2064 return ret; 2065 2066 return amd_pstate_set_floor_perf(policy, cpudata->bios_floor_perf); 2067 } 2068 2069 static int amd_pstate_suspend(struct cpufreq_policy *policy) 2070 { 2071 struct amd_cpudata *cpudata = policy->driver_data; 2072 union perf_cached perf = READ_ONCE(cpudata->perf); 2073 int ret; 2074 2075 /* 2076 * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified 2077 * min_perf value across kexec reboots. If this CPU is just resumed back without kexec, 2078 * the limits, epp and desired perf will get reset to the cached values in cpudata struct 2079 */ 2080 ret = amd_pstate_update_perf(policy, perf.bios_min_perf, 2081 FIELD_GET(AMD_CPPC_DES_PERF_MASK, cpudata->cppc_req_cached), 2082 FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), 2083 FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached), 2084 false); 2085 if (ret) 2086 return ret; 2087 2088 ret = amd_pstate_set_floor_perf(policy, cpudata->bios_floor_perf); 2089 if (ret) 2090 return ret; 2091 2092 /* set this flag to avoid setting core offline*/ 2093 cpudata->suspended = true; 2094 2095 return 0; 2096 } 2097 2098 static int amd_pstate_resume(struct cpufreq_policy *policy) 2099 { 2100 struct amd_cpudata *cpudata = policy->driver_data; 2101 union perf_cached perf = READ_ONCE(cpudata->perf); 2102 int cur_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->cur); 2103 u8 cached_floor_perf; 2104 int ret; 2105 2106 /* Set CPPC_REQ to last sane value until the governor updates it */ 2107 ret = amd_pstate_update_perf(policy, perf.min_limit_perf, cur_perf, perf.max_limit_perf, 2108 0U, false); 2109 if (ret) 2110 return ret; 2111 2112 cached_floor_perf = freq_to_perf(perf, cpudata->nominal_freq, cpudata->floor_freq); 2113 return amd_pstate_set_floor_perf(policy, cached_floor_perf); 2114 } 2115 2116 static int amd_pstate_epp_resume(struct cpufreq_policy *policy) 2117 { 2118 struct amd_cpudata *cpudata = policy->driver_data; 2119 union perf_cached perf = READ_ONCE(cpudata->perf); 2120 u8 cached_floor_perf; 2121 2122 if (cpudata->suspended) { 2123 int ret; 2124 2125 /* enable amd pstate from suspend state*/ 2126 ret = amd_pstate_epp_update_limit(policy, false); 2127 if (ret) 2128 return ret; 2129 2130 cpudata->suspended = false; 2131 } 2132 2133 cached_floor_perf = freq_to_perf(perf, cpudata->nominal_freq, cpudata->floor_freq); 2134 return amd_pstate_set_floor_perf(policy, cached_floor_perf); 2135 } 2136 2137 static struct cpufreq_driver amd_pstate_driver = { 2138 .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS, 2139 .verify = amd_pstate_verify, 2140 .target = amd_pstate_target, 2141 .fast_switch = amd_pstate_fast_switch, 2142 .init = amd_pstate_cpu_init, 2143 .exit = amd_pstate_cpu_exit, 2144 .online = amd_pstate_cpu_online, 2145 .offline = amd_pstate_cpu_offline, 2146 .suspend = amd_pstate_suspend, 2147 .resume = amd_pstate_resume, 2148 .set_boost = amd_pstate_set_boost, 2149 .update_limits = amd_pstate_update_limits, 2150 .name = "amd-pstate", 2151 }; 2152 2153 static struct cpufreq_driver amd_pstate_epp_driver = { 2154 .flags = CPUFREQ_CONST_LOOPS, 2155 .verify = amd_pstate_verify, 2156 .setpolicy = amd_pstate_epp_set_policy, 2157 .init = amd_pstate_epp_cpu_init, 2158 .exit = amd_pstate_epp_cpu_exit, 2159 .offline = amd_pstate_cpu_offline, 2160 .online = amd_pstate_cpu_online, 2161 .suspend = amd_pstate_suspend, 2162 .resume = amd_pstate_epp_resume, 2163 .update_limits = amd_pstate_update_limits, 2164 .set_boost = amd_pstate_set_boost, 2165 .name = "amd-pstate-epp", 2166 }; 2167 2168 /* 2169 * CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F. 2170 * show the debug message that helps to check if the CPU has CPPC support for loading issue. 2171 */ 2172 static bool amd_cppc_supported(void) 2173 { 2174 struct cpuinfo_x86 *c = &cpu_data(0); 2175 bool warn = false; 2176 2177 if ((boot_cpu_data.x86 == 0x17) && (boot_cpu_data.x86_model < 0x30)) { 2178 pr_debug_once("CPPC feature is not supported by the processor\n"); 2179 return false; 2180 } 2181 2182 /* 2183 * If the CPPC feature is disabled in the BIOS for processors 2184 * that support MSR-based CPPC, the AMD Pstate driver may not 2185 * function correctly. 2186 * 2187 * For such processors, check the CPPC flag and display a 2188 * warning message if the platform supports CPPC. 2189 * 2190 * Note: The code check below will not abort the driver 2191 * registration process because of the code is added for 2192 * debugging purposes. Besides, it may still be possible for 2193 * the driver to work using the shared-memory mechanism. 2194 */ 2195 if (!cpu_feature_enabled(X86_FEATURE_CPPC)) { 2196 if (cpu_feature_enabled(X86_FEATURE_ZEN2)) { 2197 switch (c->x86_model) { 2198 case 0x60 ... 0x6F: 2199 case 0x80 ... 0xAF: 2200 warn = true; 2201 break; 2202 } 2203 } else if (cpu_feature_enabled(X86_FEATURE_ZEN3) || 2204 cpu_feature_enabled(X86_FEATURE_ZEN4)) { 2205 switch (c->x86_model) { 2206 case 0x10 ... 0x1F: 2207 case 0x40 ... 0xAF: 2208 warn = true; 2209 break; 2210 } 2211 } else if (cpu_feature_enabled(X86_FEATURE_ZEN5)) { 2212 warn = true; 2213 } 2214 } 2215 2216 if (warn) 2217 pr_warn_once("The CPPC feature is supported but currently disabled by the BIOS.\n" 2218 "Please enable it if your BIOS has the CPPC option.\n"); 2219 return true; 2220 } 2221 2222 static int __init amd_pstate_init(void) 2223 { 2224 struct device *dev_root; 2225 int ret; 2226 2227 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) 2228 return -ENODEV; 2229 2230 /* show debug message only if CPPC is not supported */ 2231 if (!amd_cppc_supported()) 2232 return -EOPNOTSUPP; 2233 2234 /* show warning message when BIOS broken or ACPI disabled */ 2235 if (!acpi_cpc_valid()) { 2236 pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n"); 2237 return -ENODEV; 2238 } 2239 2240 /* don't keep reloading if cpufreq_driver exists */ 2241 if (cpufreq_get_current_driver()) 2242 return -EEXIST; 2243 2244 quirks = NULL; 2245 2246 /* check if this machine need CPPC quirks */ 2247 dmi_check_system(amd_pstate_quirks_table); 2248 2249 /* 2250 * determine the driver mode from the command line or kernel config. 2251 * If no command line input is provided, cppc_state will be AMD_PSTATE_UNDEFINED. 2252 * command line options will override the kernel config settings. 2253 */ 2254 2255 if (cppc_state == AMD_PSTATE_UNDEFINED) { 2256 /* Disable on the following configs by default: 2257 * 1. Undefined platforms 2258 * 2. Server platforms with CPUs older than Family 0x1A. 2259 */ 2260 if (amd_pstate_acpi_pm_profile_undefined() || 2261 (amd_pstate_acpi_pm_profile_server() && boot_cpu_data.x86 < 0x1A)) { 2262 pr_info("driver load is disabled, boot with specific mode to enable this\n"); 2263 return -ENODEV; 2264 } 2265 /* get driver mode from kernel config option [1:4] */ 2266 cppc_state = CONFIG_X86_AMD_PSTATE_DEFAULT_MODE; 2267 } 2268 2269 if (cppc_state == AMD_PSTATE_DISABLE) { 2270 pr_info("driver load is disabled, boot with specific mode to enable this\n"); 2271 return -ENODEV; 2272 } 2273 2274 /* capability check */ 2275 if (cpu_feature_enabled(X86_FEATURE_CPPC)) { 2276 pr_debug("AMD CPPC MSR based functionality is supported\n"); 2277 } else { 2278 pr_debug("AMD CPPC shared memory based functionality is supported\n"); 2279 static_call_update(amd_pstate_cppc_enable, shmem_cppc_enable); 2280 static_call_update(amd_pstate_init_perf, shmem_init_perf); 2281 static_call_update(amd_pstate_update_perf, shmem_update_perf); 2282 static_call_update(amd_pstate_get_epp, shmem_get_epp); 2283 static_call_update(amd_pstate_set_epp, shmem_set_epp); 2284 } 2285 2286 if (amd_pstate_prefcore) { 2287 ret = amd_detect_prefcore(&amd_pstate_prefcore); 2288 if (ret) 2289 return ret; 2290 } 2291 2292 ret = amd_pstate_register_driver(cppc_state); 2293 if (ret) { 2294 pr_err("failed to register with return %d\n", ret); 2295 return ret; 2296 } 2297 2298 dev_root = bus_get_dev_root(&cpu_subsys); 2299 if (dev_root) { 2300 ret = sysfs_create_group(&dev_root->kobj, &amd_pstate_global_attr_group); 2301 put_device(dev_root); 2302 if (ret) { 2303 pr_err("sysfs attribute export failed with error %d.\n", ret); 2304 goto global_attr_free; 2305 } 2306 } 2307 2308 return ret; 2309 2310 global_attr_free: 2311 amd_pstate_unregister_driver(0); 2312 return ret; 2313 } 2314 device_initcall(amd_pstate_init); 2315 2316 static int __init amd_pstate_param(char *str) 2317 { 2318 size_t size; 2319 int mode_idx; 2320 2321 if (!str) 2322 return -EINVAL; 2323 2324 size = strlen(str); 2325 mode_idx = get_mode_idx_from_str(str, size); 2326 2327 return amd_pstate_set_driver(mode_idx); 2328 } 2329 2330 static int __init amd_prefcore_param(char *str) 2331 { 2332 if (!strcmp(str, "disable")) 2333 amd_pstate_prefcore = false; 2334 2335 return 0; 2336 } 2337 2338 static int __init amd_dynamic_epp_param(char *str) 2339 { 2340 if (!strcmp(str, "disable")) 2341 dynamic_epp = false; 2342 if (!strcmp(str, "enable")) 2343 dynamic_epp = true; 2344 2345 return 0; 2346 } 2347 2348 early_param("amd_pstate", amd_pstate_param); 2349 early_param("amd_prefcore", amd_prefcore_param); 2350 early_param("amd_dynamic_epp", amd_dynamic_epp_param); 2351 2352 MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>"); 2353 MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); 2354