1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * amd-pstate.c - AMD Processor P-state Frequency Driver 4 * 5 * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved. 6 * 7 * Author: Huang Rui <ray.huang@amd.com> 8 * 9 * AMD P-State introduces a new CPU performance scaling design for AMD 10 * processors using the ACPI Collaborative Performance and Power Control (CPPC) 11 * feature which works with the AMD SMU firmware providing a finer grained 12 * frequency control range. It is to replace the legacy ACPI P-States control, 13 * allows a flexible, low-latency interface for the Linux kernel to directly 14 * communicate the performance hints to hardware. 15 * 16 * AMD P-State is supported on recent AMD Zen base CPU series include some of 17 * Zen2 and Zen3 processors. _CPC needs to be present in the ACPI tables of AMD 18 * P-State supported system. And there are two types of hardware implementations 19 * for AMD P-State: 1) Full MSR Solution and 2) Shared Memory Solution. 20 * X86_FEATURE_CPPC CPU feature flag is used to distinguish the different types. 21 */ 22 23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 24 25 #include <linux/kernel.h> 26 #include <linux/module.h> 27 #include <linux/init.h> 28 #include <linux/smp.h> 29 #include <linux/sched.h> 30 #include <linux/cpufreq.h> 31 #include <linux/compiler.h> 32 #include <linux/dmi.h> 33 #include <linux/slab.h> 34 #include <linux/acpi.h> 35 #include <linux/io.h> 36 #include <linux/delay.h> 37 #include <linux/uaccess.h> 38 #include <linux/static_call.h> 39 #include <linux/amd-pstate.h> 40 #include <linux/topology.h> 41 42 #include <acpi/processor.h> 43 #include <acpi/cppc_acpi.h> 44 45 #include <asm/msr.h> 46 #include <asm/processor.h> 47 #include <asm/cpufeature.h> 48 #include <asm/cpu_device_id.h> 49 #include "amd-pstate-trace.h" 50 51 #define AMD_PSTATE_TRANSITION_LATENCY 20000 52 #define AMD_PSTATE_TRANSITION_DELAY 1000 53 #define AMD_PSTATE_PREFCORE_THRESHOLD 166 54 55 /* 56 * TODO: We need more time to fine tune processors with shared memory solution 57 * with community together. 58 * 59 * There are some performance drops on the CPU benchmarks which reports from 60 * Suse. We are co-working with them to fine tune the shared memory solution. So 61 * we disable it by default to go acpi-cpufreq on these processors and add a 62 * module parameter to be able to enable it manually for debugging. 63 */ 64 static struct cpufreq_driver *current_pstate_driver; 65 static struct cpufreq_driver amd_pstate_driver; 66 static struct cpufreq_driver amd_pstate_epp_driver; 67 static int cppc_state = AMD_PSTATE_UNDEFINED; 68 static bool cppc_enabled; 69 static bool amd_pstate_prefcore = true; 70 71 /* 72 * AMD Energy Preference Performance (EPP) 73 * The EPP is used in the CCLK DPM controller to drive 74 * the frequency that a core is going to operate during 75 * short periods of activity. EPP values will be utilized for 76 * different OS profiles (balanced, performance, power savings) 77 * display strings corresponding to EPP index in the 78 * energy_perf_strings[] 79 * index String 80 *------------------------------------- 81 * 0 default 82 * 1 performance 83 * 2 balance_performance 84 * 3 balance_power 85 * 4 power 86 */ 87 enum energy_perf_value_index { 88 EPP_INDEX_DEFAULT = 0, 89 EPP_INDEX_PERFORMANCE, 90 EPP_INDEX_BALANCE_PERFORMANCE, 91 EPP_INDEX_BALANCE_POWERSAVE, 92 EPP_INDEX_POWERSAVE, 93 }; 94 95 static const char * const energy_perf_strings[] = { 96 [EPP_INDEX_DEFAULT] = "default", 97 [EPP_INDEX_PERFORMANCE] = "performance", 98 [EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance", 99 [EPP_INDEX_BALANCE_POWERSAVE] = "balance_power", 100 [EPP_INDEX_POWERSAVE] = "power", 101 NULL 102 }; 103 104 static unsigned int epp_values[] = { 105 [EPP_INDEX_DEFAULT] = 0, 106 [EPP_INDEX_PERFORMANCE] = AMD_CPPC_EPP_PERFORMANCE, 107 [EPP_INDEX_BALANCE_PERFORMANCE] = AMD_CPPC_EPP_BALANCE_PERFORMANCE, 108 [EPP_INDEX_BALANCE_POWERSAVE] = AMD_CPPC_EPP_BALANCE_POWERSAVE, 109 [EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE, 110 }; 111 112 typedef int (*cppc_mode_transition_fn)(int); 113 114 static inline int get_mode_idx_from_str(const char *str, size_t size) 115 { 116 int i; 117 118 for (i=0; i < AMD_PSTATE_MAX; i++) { 119 if (!strncmp(str, amd_pstate_mode_string[i], size)) 120 return i; 121 } 122 return -EINVAL; 123 } 124 125 static DEFINE_MUTEX(amd_pstate_limits_lock); 126 static DEFINE_MUTEX(amd_pstate_driver_lock); 127 128 static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) 129 { 130 u64 epp; 131 int ret; 132 133 if (boot_cpu_has(X86_FEATURE_CPPC)) { 134 if (!cppc_req_cached) { 135 epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, 136 &cppc_req_cached); 137 if (epp) 138 return epp; 139 } 140 epp = (cppc_req_cached >> 24) & 0xFF; 141 } else { 142 ret = cppc_get_epp_perf(cpudata->cpu, &epp); 143 if (ret < 0) { 144 pr_debug("Could not retrieve energy perf value (%d)\n", ret); 145 return -EIO; 146 } 147 } 148 149 return (s16)(epp & 0xff); 150 } 151 152 static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata) 153 { 154 s16 epp; 155 int index = -EINVAL; 156 157 epp = amd_pstate_get_epp(cpudata, 0); 158 if (epp < 0) 159 return epp; 160 161 switch (epp) { 162 case AMD_CPPC_EPP_PERFORMANCE: 163 index = EPP_INDEX_PERFORMANCE; 164 break; 165 case AMD_CPPC_EPP_BALANCE_PERFORMANCE: 166 index = EPP_INDEX_BALANCE_PERFORMANCE; 167 break; 168 case AMD_CPPC_EPP_BALANCE_POWERSAVE: 169 index = EPP_INDEX_BALANCE_POWERSAVE; 170 break; 171 case AMD_CPPC_EPP_POWERSAVE: 172 index = EPP_INDEX_POWERSAVE; 173 break; 174 default: 175 break; 176 } 177 178 return index; 179 } 180 181 static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) 182 { 183 int ret; 184 struct cppc_perf_ctrls perf_ctrls; 185 186 if (boot_cpu_has(X86_FEATURE_CPPC)) { 187 u64 value = READ_ONCE(cpudata->cppc_req_cached); 188 189 value &= ~GENMASK_ULL(31, 24); 190 value |= (u64)epp << 24; 191 WRITE_ONCE(cpudata->cppc_req_cached, value); 192 193 ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); 194 if (!ret) 195 cpudata->epp_cached = epp; 196 } else { 197 perf_ctrls.energy_perf = epp; 198 ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); 199 if (ret) { 200 pr_debug("failed to set energy perf value (%d)\n", ret); 201 return ret; 202 } 203 cpudata->epp_cached = epp; 204 } 205 206 return ret; 207 } 208 209 static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, 210 int pref_index) 211 { 212 int epp = -EINVAL; 213 int ret; 214 215 if (!pref_index) { 216 pr_debug("EPP pref_index is invalid\n"); 217 return -EINVAL; 218 } 219 220 if (epp == -EINVAL) 221 epp = epp_values[pref_index]; 222 223 if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { 224 pr_debug("EPP cannot be set under performance policy\n"); 225 return -EBUSY; 226 } 227 228 ret = amd_pstate_set_epp(cpudata, epp); 229 230 return ret; 231 } 232 233 static inline int pstate_enable(bool enable) 234 { 235 int ret, cpu; 236 unsigned long logical_proc_id_mask = 0; 237 238 if (enable == cppc_enabled) 239 return 0; 240 241 for_each_present_cpu(cpu) { 242 unsigned long logical_id = topology_logical_die_id(cpu); 243 244 if (test_bit(logical_id, &logical_proc_id_mask)) 245 continue; 246 247 set_bit(logical_id, &logical_proc_id_mask); 248 249 ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE, 250 enable); 251 if (ret) 252 return ret; 253 } 254 255 cppc_enabled = enable; 256 return 0; 257 } 258 259 static int cppc_enable(bool enable) 260 { 261 int cpu, ret = 0; 262 struct cppc_perf_ctrls perf_ctrls; 263 264 if (enable == cppc_enabled) 265 return 0; 266 267 for_each_present_cpu(cpu) { 268 ret = cppc_set_enable(cpu, enable); 269 if (ret) 270 return ret; 271 272 /* Enable autonomous mode for EPP */ 273 if (cppc_state == AMD_PSTATE_ACTIVE) { 274 /* Set desired perf as zero to allow EPP firmware control */ 275 perf_ctrls.desired_perf = 0; 276 ret = cppc_set_perf(cpu, &perf_ctrls); 277 if (ret) 278 return ret; 279 } 280 } 281 282 cppc_enabled = enable; 283 return ret; 284 } 285 286 DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable); 287 288 static inline int amd_pstate_enable(bool enable) 289 { 290 return static_call(amd_pstate_enable)(enable); 291 } 292 293 static int pstate_init_perf(struct amd_cpudata *cpudata) 294 { 295 u64 cap1; 296 u32 highest_perf; 297 298 int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, 299 &cap1); 300 if (ret) 301 return ret; 302 303 /* For platforms that do not support the preferred core feature, the 304 * highest_pef may be configured with 166 or 255, to avoid max frequency 305 * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as 306 * the default max perf. 307 */ 308 if (cpudata->hw_prefcore) 309 highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD; 310 else 311 highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); 312 313 WRITE_ONCE(cpudata->highest_perf, highest_perf); 314 WRITE_ONCE(cpudata->max_limit_perf, highest_perf); 315 WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); 316 WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); 317 WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); 318 WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1)); 319 WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1)); 320 return 0; 321 } 322 323 static int cppc_init_perf(struct amd_cpudata *cpudata) 324 { 325 struct cppc_perf_caps cppc_perf; 326 u32 highest_perf; 327 328 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 329 if (ret) 330 return ret; 331 332 if (cpudata->hw_prefcore) 333 highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD; 334 else 335 highest_perf = cppc_perf.highest_perf; 336 337 WRITE_ONCE(cpudata->highest_perf, highest_perf); 338 WRITE_ONCE(cpudata->max_limit_perf, highest_perf); 339 WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); 340 WRITE_ONCE(cpudata->lowest_nonlinear_perf, 341 cppc_perf.lowest_nonlinear_perf); 342 WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); 343 WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf); 344 WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf); 345 346 if (cppc_state == AMD_PSTATE_ACTIVE) 347 return 0; 348 349 ret = cppc_get_auto_sel_caps(cpudata->cpu, &cppc_perf); 350 if (ret) { 351 pr_warn("failed to get auto_sel, ret: %d\n", ret); 352 return 0; 353 } 354 355 ret = cppc_set_auto_sel(cpudata->cpu, 356 (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1); 357 358 if (ret) 359 pr_warn("failed to set auto_sel, ret: %d\n", ret); 360 361 return ret; 362 } 363 364 DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf); 365 366 static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) 367 { 368 return static_call(amd_pstate_init_perf)(cpudata); 369 } 370 371 static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, 372 u32 des_perf, u32 max_perf, bool fast_switch) 373 { 374 if (fast_switch) 375 wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); 376 else 377 wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, 378 READ_ONCE(cpudata->cppc_req_cached)); 379 } 380 381 static void cppc_update_perf(struct amd_cpudata *cpudata, 382 u32 min_perf, u32 des_perf, 383 u32 max_perf, bool fast_switch) 384 { 385 struct cppc_perf_ctrls perf_ctrls; 386 387 perf_ctrls.max_perf = max_perf; 388 perf_ctrls.min_perf = min_perf; 389 perf_ctrls.desired_perf = des_perf; 390 391 cppc_set_perf(cpudata->cpu, &perf_ctrls); 392 } 393 394 DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf); 395 396 static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, 397 u32 min_perf, u32 des_perf, 398 u32 max_perf, bool fast_switch) 399 { 400 static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, 401 max_perf, fast_switch); 402 } 403 404 static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) 405 { 406 u64 aperf, mperf, tsc; 407 unsigned long flags; 408 409 local_irq_save(flags); 410 rdmsrl(MSR_IA32_APERF, aperf); 411 rdmsrl(MSR_IA32_MPERF, mperf); 412 tsc = rdtsc(); 413 414 if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) { 415 local_irq_restore(flags); 416 return false; 417 } 418 419 local_irq_restore(flags); 420 421 cpudata->cur.aperf = aperf; 422 cpudata->cur.mperf = mperf; 423 cpudata->cur.tsc = tsc; 424 cpudata->cur.aperf -= cpudata->prev.aperf; 425 cpudata->cur.mperf -= cpudata->prev.mperf; 426 cpudata->cur.tsc -= cpudata->prev.tsc; 427 428 cpudata->prev.aperf = aperf; 429 cpudata->prev.mperf = mperf; 430 cpudata->prev.tsc = tsc; 431 432 cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz), cpudata->cur.mperf); 433 434 return true; 435 } 436 437 static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, 438 u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags) 439 { 440 u64 prev = READ_ONCE(cpudata->cppc_req_cached); 441 u64 value = prev; 442 443 min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, 444 cpudata->max_limit_perf); 445 max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, 446 cpudata->max_limit_perf); 447 des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); 448 449 if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) { 450 min_perf = des_perf; 451 des_perf = 0; 452 } 453 454 value &= ~AMD_CPPC_MIN_PERF(~0L); 455 value |= AMD_CPPC_MIN_PERF(min_perf); 456 457 value &= ~AMD_CPPC_DES_PERF(~0L); 458 value |= AMD_CPPC_DES_PERF(des_perf); 459 460 value &= ~AMD_CPPC_MAX_PERF(~0L); 461 value |= AMD_CPPC_MAX_PERF(max_perf); 462 463 if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { 464 trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, 465 cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc, 466 cpudata->cpu, (value != prev), fast_switch); 467 } 468 469 if (value == prev) 470 return; 471 472 WRITE_ONCE(cpudata->cppc_req_cached, value); 473 474 amd_pstate_update_perf(cpudata, min_perf, des_perf, 475 max_perf, fast_switch); 476 } 477 478 static int amd_pstate_verify(struct cpufreq_policy_data *policy) 479 { 480 cpufreq_verify_within_cpu_limits(policy); 481 482 return 0; 483 } 484 485 static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) 486 { 487 u32 max_limit_perf, min_limit_perf, lowest_perf; 488 struct amd_cpudata *cpudata = policy->driver_data; 489 490 max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); 491 min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); 492 493 lowest_perf = READ_ONCE(cpudata->lowest_perf); 494 if (min_limit_perf < lowest_perf) 495 min_limit_perf = lowest_perf; 496 497 if (max_limit_perf < min_limit_perf) 498 max_limit_perf = min_limit_perf; 499 500 WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); 501 WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); 502 WRITE_ONCE(cpudata->max_limit_freq, policy->max); 503 WRITE_ONCE(cpudata->min_limit_freq, policy->min); 504 505 return 0; 506 } 507 508 static int amd_pstate_update_freq(struct cpufreq_policy *policy, 509 unsigned int target_freq, bool fast_switch) 510 { 511 struct cpufreq_freqs freqs; 512 struct amd_cpudata *cpudata = policy->driver_data; 513 unsigned long max_perf, min_perf, des_perf, cap_perf; 514 515 if (!cpudata->max_freq) 516 return -ENODEV; 517 518 if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) 519 amd_pstate_update_min_max_limit(policy); 520 521 cap_perf = READ_ONCE(cpudata->highest_perf); 522 min_perf = READ_ONCE(cpudata->lowest_perf); 523 max_perf = cap_perf; 524 525 freqs.old = policy->cur; 526 freqs.new = target_freq; 527 528 des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf, 529 cpudata->max_freq); 530 531 WARN_ON(fast_switch && !policy->fast_switch_enabled); 532 /* 533 * If fast_switch is desired, then there aren't any registered 534 * transition notifiers. See comment for 535 * cpufreq_enable_fast_switch(). 536 */ 537 if (!fast_switch) 538 cpufreq_freq_transition_begin(policy, &freqs); 539 540 amd_pstate_update(cpudata, min_perf, des_perf, 541 max_perf, fast_switch, policy->governor->flags); 542 543 if (!fast_switch) 544 cpufreq_freq_transition_end(policy, &freqs, false); 545 546 return 0; 547 } 548 549 static int amd_pstate_target(struct cpufreq_policy *policy, 550 unsigned int target_freq, 551 unsigned int relation) 552 { 553 return amd_pstate_update_freq(policy, target_freq, false); 554 } 555 556 static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy, 557 unsigned int target_freq) 558 { 559 if (!amd_pstate_update_freq(policy, target_freq, true)) 560 return target_freq; 561 return policy->cur; 562 } 563 564 static void amd_pstate_adjust_perf(unsigned int cpu, 565 unsigned long _min_perf, 566 unsigned long target_perf, 567 unsigned long capacity) 568 { 569 unsigned long max_perf, min_perf, des_perf, 570 cap_perf, lowest_nonlinear_perf, max_freq; 571 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); 572 struct amd_cpudata *cpudata = policy->driver_data; 573 unsigned int target_freq; 574 575 if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) 576 amd_pstate_update_min_max_limit(policy); 577 578 579 cap_perf = READ_ONCE(cpudata->highest_perf); 580 lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); 581 max_freq = READ_ONCE(cpudata->max_freq); 582 583 des_perf = cap_perf; 584 if (target_perf < capacity) 585 des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity); 586 587 min_perf = READ_ONCE(cpudata->lowest_perf); 588 if (_min_perf < capacity) 589 min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity); 590 591 if (min_perf < lowest_nonlinear_perf) 592 min_perf = lowest_nonlinear_perf; 593 594 max_perf = cap_perf; 595 if (max_perf < min_perf) 596 max_perf = min_perf; 597 598 des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); 599 target_freq = div_u64(des_perf * max_freq, max_perf); 600 policy->cur = target_freq; 601 602 amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true, 603 policy->governor->flags); 604 cpufreq_cpu_put(policy); 605 } 606 607 static int amd_get_min_freq(struct amd_cpudata *cpudata) 608 { 609 struct cppc_perf_caps cppc_perf; 610 611 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 612 if (ret) 613 return ret; 614 615 /* Switch to khz */ 616 return cppc_perf.lowest_freq * 1000; 617 } 618 619 static int amd_get_max_freq(struct amd_cpudata *cpudata) 620 { 621 struct cppc_perf_caps cppc_perf; 622 u32 max_perf, max_freq, nominal_freq, nominal_perf; 623 u64 boost_ratio; 624 625 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 626 if (ret) 627 return ret; 628 629 nominal_freq = cppc_perf.nominal_freq; 630 nominal_perf = READ_ONCE(cpudata->nominal_perf); 631 max_perf = READ_ONCE(cpudata->highest_perf); 632 633 boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT, 634 nominal_perf); 635 636 max_freq = nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT; 637 638 /* Switch to khz */ 639 return max_freq * 1000; 640 } 641 642 static int amd_get_nominal_freq(struct amd_cpudata *cpudata) 643 { 644 struct cppc_perf_caps cppc_perf; 645 646 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 647 if (ret) 648 return ret; 649 650 /* Switch to khz */ 651 return cppc_perf.nominal_freq * 1000; 652 } 653 654 static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata) 655 { 656 struct cppc_perf_caps cppc_perf; 657 u32 lowest_nonlinear_freq, lowest_nonlinear_perf, 658 nominal_freq, nominal_perf; 659 u64 lowest_nonlinear_ratio; 660 661 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 662 if (ret) 663 return ret; 664 665 nominal_freq = cppc_perf.nominal_freq; 666 nominal_perf = READ_ONCE(cpudata->nominal_perf); 667 668 lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; 669 670 lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, 671 nominal_perf); 672 673 lowest_nonlinear_freq = nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT; 674 675 /* Switch to khz */ 676 return lowest_nonlinear_freq * 1000; 677 } 678 679 static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) 680 { 681 struct amd_cpudata *cpudata = policy->driver_data; 682 int ret; 683 684 if (!cpudata->boost_supported) { 685 pr_err("Boost mode is not supported by this processor or SBIOS\n"); 686 return -EINVAL; 687 } 688 689 if (state) 690 policy->cpuinfo.max_freq = cpudata->max_freq; 691 else 692 policy->cpuinfo.max_freq = cpudata->nominal_freq; 693 694 policy->max = policy->cpuinfo.max_freq; 695 696 ret = freq_qos_update_request(&cpudata->req[1], 697 policy->cpuinfo.max_freq); 698 if (ret < 0) 699 return ret; 700 701 return 0; 702 } 703 704 static void amd_pstate_boost_init(struct amd_cpudata *cpudata) 705 { 706 u32 highest_perf, nominal_perf; 707 708 highest_perf = READ_ONCE(cpudata->highest_perf); 709 nominal_perf = READ_ONCE(cpudata->nominal_perf); 710 711 if (highest_perf <= nominal_perf) 712 return; 713 714 cpudata->boost_supported = true; 715 current_pstate_driver->boost_enabled = true; 716 } 717 718 static void amd_perf_ctl_reset(unsigned int cpu) 719 { 720 wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); 721 } 722 723 /* 724 * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks 725 * due to locking, so queue the work for later. 726 */ 727 static void amd_pstste_sched_prefcore_workfn(struct work_struct *work) 728 { 729 sched_set_itmt_support(); 730 } 731 static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn); 732 733 /* 734 * Get the highest performance register value. 735 * @cpu: CPU from which to get highest performance. 736 * @highest_perf: Return address. 737 * 738 * Return: 0 for success, -EIO otherwise. 739 */ 740 static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf) 741 { 742 int ret; 743 744 if (boot_cpu_has(X86_FEATURE_CPPC)) { 745 u64 cap1; 746 747 ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1); 748 if (ret) 749 return ret; 750 WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1)); 751 } else { 752 u64 cppc_highest_perf; 753 754 ret = cppc_get_highest_perf(cpu, &cppc_highest_perf); 755 if (ret) 756 return ret; 757 WRITE_ONCE(*highest_perf, cppc_highest_perf); 758 } 759 760 return (ret); 761 } 762 763 #define CPPC_MAX_PERF U8_MAX 764 765 static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) 766 { 767 int ret, prio; 768 u32 highest_perf; 769 770 ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf); 771 if (ret) 772 return; 773 774 cpudata->hw_prefcore = true; 775 /* check if CPPC preferred core feature is enabled*/ 776 if (highest_perf < CPPC_MAX_PERF) 777 prio = (int)highest_perf; 778 else { 779 pr_debug("AMD CPPC preferred core is unsupported!\n"); 780 cpudata->hw_prefcore = false; 781 return; 782 } 783 784 if (!amd_pstate_prefcore) 785 return; 786 787 /* 788 * The priorities can be set regardless of whether or not 789 * sched_set_itmt_support(true) has been called and it is valid to 790 * update them at any time after it has been called. 791 */ 792 sched_set_itmt_core_prio(prio, cpudata->cpu); 793 794 schedule_work(&sched_prefcore_work); 795 } 796 797 static void amd_pstate_update_limits(unsigned int cpu) 798 { 799 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); 800 struct amd_cpudata *cpudata = policy->driver_data; 801 u32 prev_high = 0, cur_high = 0; 802 int ret; 803 bool highest_perf_changed = false; 804 805 mutex_lock(&amd_pstate_driver_lock); 806 if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore)) 807 goto free_cpufreq_put; 808 809 ret = amd_pstate_get_highest_perf(cpu, &cur_high); 810 if (ret) 811 goto free_cpufreq_put; 812 813 prev_high = READ_ONCE(cpudata->prefcore_ranking); 814 if (prev_high != cur_high) { 815 highest_perf_changed = true; 816 WRITE_ONCE(cpudata->prefcore_ranking, cur_high); 817 818 if (cur_high < CPPC_MAX_PERF) 819 sched_set_itmt_core_prio((int)cur_high, cpu); 820 } 821 822 free_cpufreq_put: 823 cpufreq_cpu_put(policy); 824 825 if (!highest_perf_changed) 826 cpufreq_update_policy(cpu); 827 828 mutex_unlock(&amd_pstate_driver_lock); 829 } 830 831 static int amd_pstate_cpu_init(struct cpufreq_policy *policy) 832 { 833 int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; 834 struct device *dev; 835 struct amd_cpudata *cpudata; 836 837 /* 838 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, 839 * which is ideal for initialization process. 840 */ 841 amd_perf_ctl_reset(policy->cpu); 842 dev = get_cpu_device(policy->cpu); 843 if (!dev) 844 return -ENODEV; 845 846 cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); 847 if (!cpudata) 848 return -ENOMEM; 849 850 cpudata->cpu = policy->cpu; 851 852 amd_pstate_init_prefcore(cpudata); 853 854 ret = amd_pstate_init_perf(cpudata); 855 if (ret) 856 goto free_cpudata1; 857 858 min_freq = amd_get_min_freq(cpudata); 859 max_freq = amd_get_max_freq(cpudata); 860 nominal_freq = amd_get_nominal_freq(cpudata); 861 lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); 862 863 if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { 864 dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", 865 min_freq, max_freq); 866 ret = -EINVAL; 867 goto free_cpudata1; 868 } 869 870 policy->cpuinfo.transition_latency = AMD_PSTATE_TRANSITION_LATENCY; 871 policy->transition_delay_us = AMD_PSTATE_TRANSITION_DELAY; 872 873 policy->min = min_freq; 874 policy->max = max_freq; 875 876 policy->cpuinfo.min_freq = min_freq; 877 policy->cpuinfo.max_freq = max_freq; 878 879 /* It will be updated by governor */ 880 policy->cur = policy->cpuinfo.min_freq; 881 882 if (boot_cpu_has(X86_FEATURE_CPPC)) 883 policy->fast_switch_possible = true; 884 885 ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0], 886 FREQ_QOS_MIN, policy->cpuinfo.min_freq); 887 if (ret < 0) { 888 dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret); 889 goto free_cpudata1; 890 } 891 892 ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1], 893 FREQ_QOS_MAX, policy->cpuinfo.max_freq); 894 if (ret < 0) { 895 dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret); 896 goto free_cpudata2; 897 } 898 899 /* Initial processor data capability frequencies */ 900 cpudata->max_freq = max_freq; 901 cpudata->min_freq = min_freq; 902 cpudata->max_limit_freq = max_freq; 903 cpudata->min_limit_freq = min_freq; 904 cpudata->nominal_freq = nominal_freq; 905 cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; 906 907 policy->driver_data = cpudata; 908 909 amd_pstate_boost_init(cpudata); 910 if (!current_pstate_driver->adjust_perf) 911 current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; 912 913 return 0; 914 915 free_cpudata2: 916 freq_qos_remove_request(&cpudata->req[0]); 917 free_cpudata1: 918 kfree(cpudata); 919 return ret; 920 } 921 922 static int amd_pstate_cpu_exit(struct cpufreq_policy *policy) 923 { 924 struct amd_cpudata *cpudata = policy->driver_data; 925 926 freq_qos_remove_request(&cpudata->req[1]); 927 freq_qos_remove_request(&cpudata->req[0]); 928 policy->fast_switch_possible = false; 929 kfree(cpudata); 930 931 return 0; 932 } 933 934 static int amd_pstate_cpu_resume(struct cpufreq_policy *policy) 935 { 936 int ret; 937 938 ret = amd_pstate_enable(true); 939 if (ret) 940 pr_err("failed to enable amd-pstate during resume, return %d\n", ret); 941 942 return ret; 943 } 944 945 static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy) 946 { 947 int ret; 948 949 ret = amd_pstate_enable(false); 950 if (ret) 951 pr_err("failed to disable amd-pstate during suspend, return %d\n", ret); 952 953 return ret; 954 } 955 956 /* Sysfs attributes */ 957 958 /* 959 * This frequency is to indicate the maximum hardware frequency. 960 * If boost is not active but supported, the frequency will be larger than the 961 * one in cpuinfo. 962 */ 963 static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, 964 char *buf) 965 { 966 int max_freq; 967 struct amd_cpudata *cpudata = policy->driver_data; 968 969 max_freq = amd_get_max_freq(cpudata); 970 if (max_freq < 0) 971 return max_freq; 972 973 return sysfs_emit(buf, "%u\n", max_freq); 974 } 975 976 static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy, 977 char *buf) 978 { 979 int freq; 980 struct amd_cpudata *cpudata = policy->driver_data; 981 982 freq = amd_get_lowest_nonlinear_freq(cpudata); 983 if (freq < 0) 984 return freq; 985 986 return sysfs_emit(buf, "%u\n", freq); 987 } 988 989 /* 990 * In some of ASICs, the highest_perf is not the one in the _CPC table, so we 991 * need to expose it to sysfs. 992 */ 993 static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, 994 char *buf) 995 { 996 u32 perf; 997 struct amd_cpudata *cpudata = policy->driver_data; 998 999 perf = READ_ONCE(cpudata->highest_perf); 1000 1001 return sysfs_emit(buf, "%u\n", perf); 1002 } 1003 1004 static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy, 1005 char *buf) 1006 { 1007 u32 perf; 1008 struct amd_cpudata *cpudata = policy->driver_data; 1009 1010 perf = READ_ONCE(cpudata->prefcore_ranking); 1011 1012 return sysfs_emit(buf, "%u\n", perf); 1013 } 1014 1015 static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy, 1016 char *buf) 1017 { 1018 bool hw_prefcore; 1019 struct amd_cpudata *cpudata = policy->driver_data; 1020 1021 hw_prefcore = READ_ONCE(cpudata->hw_prefcore); 1022 1023 return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore)); 1024 } 1025 1026 static ssize_t show_energy_performance_available_preferences( 1027 struct cpufreq_policy *policy, char *buf) 1028 { 1029 int i = 0; 1030 int offset = 0; 1031 struct amd_cpudata *cpudata = policy->driver_data; 1032 1033 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) 1034 return sysfs_emit_at(buf, offset, "%s\n", 1035 energy_perf_strings[EPP_INDEX_PERFORMANCE]); 1036 1037 while (energy_perf_strings[i] != NULL) 1038 offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]); 1039 1040 offset += sysfs_emit_at(buf, offset, "\n"); 1041 1042 return offset; 1043 } 1044 1045 static ssize_t store_energy_performance_preference( 1046 struct cpufreq_policy *policy, const char *buf, size_t count) 1047 { 1048 struct amd_cpudata *cpudata = policy->driver_data; 1049 char str_preference[21]; 1050 ssize_t ret; 1051 1052 ret = sscanf(buf, "%20s", str_preference); 1053 if (ret != 1) 1054 return -EINVAL; 1055 1056 ret = match_string(energy_perf_strings, -1, str_preference); 1057 if (ret < 0) 1058 return -EINVAL; 1059 1060 mutex_lock(&amd_pstate_limits_lock); 1061 ret = amd_pstate_set_energy_pref_index(cpudata, ret); 1062 mutex_unlock(&amd_pstate_limits_lock); 1063 1064 return ret ?: count; 1065 } 1066 1067 static ssize_t show_energy_performance_preference( 1068 struct cpufreq_policy *policy, char *buf) 1069 { 1070 struct amd_cpudata *cpudata = policy->driver_data; 1071 int preference; 1072 1073 preference = amd_pstate_get_energy_pref_index(cpudata); 1074 if (preference < 0) 1075 return preference; 1076 1077 return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]); 1078 } 1079 1080 static void amd_pstate_driver_cleanup(void) 1081 { 1082 amd_pstate_enable(false); 1083 cppc_state = AMD_PSTATE_DISABLE; 1084 current_pstate_driver = NULL; 1085 } 1086 1087 static int amd_pstate_register_driver(int mode) 1088 { 1089 int ret; 1090 1091 if (mode == AMD_PSTATE_PASSIVE || mode == AMD_PSTATE_GUIDED) 1092 current_pstate_driver = &amd_pstate_driver; 1093 else if (mode == AMD_PSTATE_ACTIVE) 1094 current_pstate_driver = &amd_pstate_epp_driver; 1095 else 1096 return -EINVAL; 1097 1098 cppc_state = mode; 1099 ret = cpufreq_register_driver(current_pstate_driver); 1100 if (ret) { 1101 amd_pstate_driver_cleanup(); 1102 return ret; 1103 } 1104 return 0; 1105 } 1106 1107 static int amd_pstate_unregister_driver(int dummy) 1108 { 1109 cpufreq_unregister_driver(current_pstate_driver); 1110 amd_pstate_driver_cleanup(); 1111 return 0; 1112 } 1113 1114 static int amd_pstate_change_mode_without_dvr_change(int mode) 1115 { 1116 int cpu = 0; 1117 1118 cppc_state = mode; 1119 1120 if (boot_cpu_has(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE) 1121 return 0; 1122 1123 for_each_present_cpu(cpu) { 1124 cppc_set_auto_sel(cpu, (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1); 1125 } 1126 1127 return 0; 1128 } 1129 1130 static int amd_pstate_change_driver_mode(int mode) 1131 { 1132 int ret; 1133 1134 ret = amd_pstate_unregister_driver(0); 1135 if (ret) 1136 return ret; 1137 1138 ret = amd_pstate_register_driver(mode); 1139 if (ret) 1140 return ret; 1141 1142 return 0; 1143 } 1144 1145 static cppc_mode_transition_fn mode_state_machine[AMD_PSTATE_MAX][AMD_PSTATE_MAX] = { 1146 [AMD_PSTATE_DISABLE] = { 1147 [AMD_PSTATE_DISABLE] = NULL, 1148 [AMD_PSTATE_PASSIVE] = amd_pstate_register_driver, 1149 [AMD_PSTATE_ACTIVE] = amd_pstate_register_driver, 1150 [AMD_PSTATE_GUIDED] = amd_pstate_register_driver, 1151 }, 1152 [AMD_PSTATE_PASSIVE] = { 1153 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, 1154 [AMD_PSTATE_PASSIVE] = NULL, 1155 [AMD_PSTATE_ACTIVE] = amd_pstate_change_driver_mode, 1156 [AMD_PSTATE_GUIDED] = amd_pstate_change_mode_without_dvr_change, 1157 }, 1158 [AMD_PSTATE_ACTIVE] = { 1159 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, 1160 [AMD_PSTATE_PASSIVE] = amd_pstate_change_driver_mode, 1161 [AMD_PSTATE_ACTIVE] = NULL, 1162 [AMD_PSTATE_GUIDED] = amd_pstate_change_driver_mode, 1163 }, 1164 [AMD_PSTATE_GUIDED] = { 1165 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, 1166 [AMD_PSTATE_PASSIVE] = amd_pstate_change_mode_without_dvr_change, 1167 [AMD_PSTATE_ACTIVE] = amd_pstate_change_driver_mode, 1168 [AMD_PSTATE_GUIDED] = NULL, 1169 }, 1170 }; 1171 1172 static ssize_t amd_pstate_show_status(char *buf) 1173 { 1174 if (!current_pstate_driver) 1175 return sysfs_emit(buf, "disable\n"); 1176 1177 return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]); 1178 } 1179 1180 static int amd_pstate_update_status(const char *buf, size_t size) 1181 { 1182 int mode_idx; 1183 1184 if (size > strlen("passive") || size < strlen("active")) 1185 return -EINVAL; 1186 1187 mode_idx = get_mode_idx_from_str(buf, size); 1188 1189 if (mode_idx < 0 || mode_idx >= AMD_PSTATE_MAX) 1190 return -EINVAL; 1191 1192 if (mode_state_machine[cppc_state][mode_idx]) 1193 return mode_state_machine[cppc_state][mode_idx](mode_idx); 1194 1195 return 0; 1196 } 1197 1198 static ssize_t status_show(struct device *dev, 1199 struct device_attribute *attr, char *buf) 1200 { 1201 ssize_t ret; 1202 1203 mutex_lock(&amd_pstate_driver_lock); 1204 ret = amd_pstate_show_status(buf); 1205 mutex_unlock(&amd_pstate_driver_lock); 1206 1207 return ret; 1208 } 1209 1210 static ssize_t status_store(struct device *a, struct device_attribute *b, 1211 const char *buf, size_t count) 1212 { 1213 char *p = memchr(buf, '\n', count); 1214 int ret; 1215 1216 mutex_lock(&amd_pstate_driver_lock); 1217 ret = amd_pstate_update_status(buf, p ? p - buf : count); 1218 mutex_unlock(&amd_pstate_driver_lock); 1219 1220 return ret < 0 ? ret : count; 1221 } 1222 1223 static ssize_t prefcore_show(struct device *dev, 1224 struct device_attribute *attr, char *buf) 1225 { 1226 return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore)); 1227 } 1228 1229 cpufreq_freq_attr_ro(amd_pstate_max_freq); 1230 cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); 1231 1232 cpufreq_freq_attr_ro(amd_pstate_highest_perf); 1233 cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking); 1234 cpufreq_freq_attr_ro(amd_pstate_hw_prefcore); 1235 cpufreq_freq_attr_rw(energy_performance_preference); 1236 cpufreq_freq_attr_ro(energy_performance_available_preferences); 1237 static DEVICE_ATTR_RW(status); 1238 static DEVICE_ATTR_RO(prefcore); 1239 1240 static struct freq_attr *amd_pstate_attr[] = { 1241 &amd_pstate_max_freq, 1242 &amd_pstate_lowest_nonlinear_freq, 1243 &amd_pstate_highest_perf, 1244 &amd_pstate_prefcore_ranking, 1245 &amd_pstate_hw_prefcore, 1246 NULL, 1247 }; 1248 1249 static struct freq_attr *amd_pstate_epp_attr[] = { 1250 &amd_pstate_max_freq, 1251 &amd_pstate_lowest_nonlinear_freq, 1252 &amd_pstate_highest_perf, 1253 &amd_pstate_prefcore_ranking, 1254 &amd_pstate_hw_prefcore, 1255 &energy_performance_preference, 1256 &energy_performance_available_preferences, 1257 NULL, 1258 }; 1259 1260 static struct attribute *pstate_global_attributes[] = { 1261 &dev_attr_status.attr, 1262 &dev_attr_prefcore.attr, 1263 NULL 1264 }; 1265 1266 static const struct attribute_group amd_pstate_global_attr_group = { 1267 .name = "amd_pstate", 1268 .attrs = pstate_global_attributes, 1269 }; 1270 1271 static bool amd_pstate_acpi_pm_profile_server(void) 1272 { 1273 switch (acpi_gbl_FADT.preferred_profile) { 1274 case PM_ENTERPRISE_SERVER: 1275 case PM_SOHO_SERVER: 1276 case PM_PERFORMANCE_SERVER: 1277 return true; 1278 } 1279 return false; 1280 } 1281 1282 static bool amd_pstate_acpi_pm_profile_undefined(void) 1283 { 1284 if (acpi_gbl_FADT.preferred_profile == PM_UNSPECIFIED) 1285 return true; 1286 if (acpi_gbl_FADT.preferred_profile >= NR_PM_PROFILES) 1287 return true; 1288 return false; 1289 } 1290 1291 static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) 1292 { 1293 int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; 1294 struct amd_cpudata *cpudata; 1295 struct device *dev; 1296 u64 value; 1297 1298 /* 1299 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, 1300 * which is ideal for initialization process. 1301 */ 1302 amd_perf_ctl_reset(policy->cpu); 1303 dev = get_cpu_device(policy->cpu); 1304 if (!dev) 1305 return -ENODEV; 1306 1307 cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); 1308 if (!cpudata) 1309 return -ENOMEM; 1310 1311 cpudata->cpu = policy->cpu; 1312 cpudata->epp_policy = 0; 1313 1314 amd_pstate_init_prefcore(cpudata); 1315 1316 ret = amd_pstate_init_perf(cpudata); 1317 if (ret) 1318 goto free_cpudata1; 1319 1320 min_freq = amd_get_min_freq(cpudata); 1321 max_freq = amd_get_max_freq(cpudata); 1322 nominal_freq = amd_get_nominal_freq(cpudata); 1323 lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); 1324 if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { 1325 dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", 1326 min_freq, max_freq); 1327 ret = -EINVAL; 1328 goto free_cpudata1; 1329 } 1330 1331 policy->cpuinfo.min_freq = min_freq; 1332 policy->cpuinfo.max_freq = max_freq; 1333 /* It will be updated by governor */ 1334 policy->cur = policy->cpuinfo.min_freq; 1335 1336 /* Initial processor data capability frequencies */ 1337 cpudata->max_freq = max_freq; 1338 cpudata->min_freq = min_freq; 1339 cpudata->nominal_freq = nominal_freq; 1340 cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; 1341 1342 policy->driver_data = cpudata; 1343 1344 cpudata->epp_cached = amd_pstate_get_epp(cpudata, 0); 1345 1346 policy->min = policy->cpuinfo.min_freq; 1347 policy->max = policy->cpuinfo.max_freq; 1348 1349 /* 1350 * Set the policy to provide a valid fallback value in case 1351 * the default cpufreq governor is neither powersave nor performance. 1352 */ 1353 if (amd_pstate_acpi_pm_profile_server() || 1354 amd_pstate_acpi_pm_profile_undefined()) 1355 policy->policy = CPUFREQ_POLICY_PERFORMANCE; 1356 else 1357 policy->policy = CPUFREQ_POLICY_POWERSAVE; 1358 1359 if (boot_cpu_has(X86_FEATURE_CPPC)) { 1360 ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); 1361 if (ret) 1362 return ret; 1363 WRITE_ONCE(cpudata->cppc_req_cached, value); 1364 1365 ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value); 1366 if (ret) 1367 return ret; 1368 WRITE_ONCE(cpudata->cppc_cap1_cached, value); 1369 } 1370 amd_pstate_boost_init(cpudata); 1371 1372 return 0; 1373 1374 free_cpudata1: 1375 kfree(cpudata); 1376 return ret; 1377 } 1378 1379 static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) 1380 { 1381 pr_debug("CPU %d exiting\n", policy->cpu); 1382 return 0; 1383 } 1384 1385 static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) 1386 { 1387 struct amd_cpudata *cpudata = policy->driver_data; 1388 u32 max_perf, min_perf, min_limit_perf, max_limit_perf; 1389 u64 value; 1390 s16 epp; 1391 1392 max_perf = READ_ONCE(cpudata->highest_perf); 1393 min_perf = READ_ONCE(cpudata->lowest_perf); 1394 max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); 1395 min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); 1396 1397 if (min_limit_perf < min_perf) 1398 min_limit_perf = min_perf; 1399 1400 if (max_limit_perf < min_limit_perf) 1401 max_limit_perf = min_limit_perf; 1402 1403 WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); 1404 WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); 1405 1406 max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, 1407 cpudata->max_limit_perf); 1408 min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, 1409 cpudata->max_limit_perf); 1410 value = READ_ONCE(cpudata->cppc_req_cached); 1411 1412 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) 1413 min_perf = max_perf; 1414 1415 /* Initial min/max values for CPPC Performance Controls Register */ 1416 value &= ~AMD_CPPC_MIN_PERF(~0L); 1417 value |= AMD_CPPC_MIN_PERF(min_perf); 1418 1419 value &= ~AMD_CPPC_MAX_PERF(~0L); 1420 value |= AMD_CPPC_MAX_PERF(max_perf); 1421 1422 /* CPPC EPP feature require to set zero to the desire perf bit */ 1423 value &= ~AMD_CPPC_DES_PERF(~0L); 1424 value |= AMD_CPPC_DES_PERF(0); 1425 1426 cpudata->epp_policy = cpudata->policy; 1427 1428 /* Get BIOS pre-defined epp value */ 1429 epp = amd_pstate_get_epp(cpudata, value); 1430 if (epp < 0) { 1431 /** 1432 * This return value can only be negative for shared_memory 1433 * systems where EPP register read/write not supported. 1434 */ 1435 return; 1436 } 1437 1438 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) 1439 epp = 0; 1440 1441 /* Set initial EPP value */ 1442 if (boot_cpu_has(X86_FEATURE_CPPC)) { 1443 value &= ~GENMASK_ULL(31, 24); 1444 value |= (u64)epp << 24; 1445 } 1446 1447 WRITE_ONCE(cpudata->cppc_req_cached, value); 1448 amd_pstate_set_epp(cpudata, epp); 1449 } 1450 1451 static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) 1452 { 1453 struct amd_cpudata *cpudata = policy->driver_data; 1454 1455 if (!policy->cpuinfo.max_freq) 1456 return -ENODEV; 1457 1458 pr_debug("set_policy: cpuinfo.max %u policy->max %u\n", 1459 policy->cpuinfo.max_freq, policy->max); 1460 1461 cpudata->policy = policy->policy; 1462 1463 amd_pstate_epp_update_limit(policy); 1464 1465 return 0; 1466 } 1467 1468 static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) 1469 { 1470 struct cppc_perf_ctrls perf_ctrls; 1471 u64 value, max_perf; 1472 int ret; 1473 1474 ret = amd_pstate_enable(true); 1475 if (ret) 1476 pr_err("failed to enable amd pstate during resume, return %d\n", ret); 1477 1478 value = READ_ONCE(cpudata->cppc_req_cached); 1479 max_perf = READ_ONCE(cpudata->highest_perf); 1480 1481 if (boot_cpu_has(X86_FEATURE_CPPC)) { 1482 wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); 1483 } else { 1484 perf_ctrls.max_perf = max_perf; 1485 perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached); 1486 cppc_set_perf(cpudata->cpu, &perf_ctrls); 1487 } 1488 } 1489 1490 static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) 1491 { 1492 struct amd_cpudata *cpudata = policy->driver_data; 1493 1494 pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); 1495 1496 if (cppc_state == AMD_PSTATE_ACTIVE) { 1497 amd_pstate_epp_reenable(cpudata); 1498 cpudata->suspended = false; 1499 } 1500 1501 return 0; 1502 } 1503 1504 static void amd_pstate_epp_offline(struct cpufreq_policy *policy) 1505 { 1506 struct amd_cpudata *cpudata = policy->driver_data; 1507 struct cppc_perf_ctrls perf_ctrls; 1508 int min_perf; 1509 u64 value; 1510 1511 min_perf = READ_ONCE(cpudata->lowest_perf); 1512 value = READ_ONCE(cpudata->cppc_req_cached); 1513 1514 mutex_lock(&amd_pstate_limits_lock); 1515 if (boot_cpu_has(X86_FEATURE_CPPC)) { 1516 cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN; 1517 1518 /* Set max perf same as min perf */ 1519 value &= ~AMD_CPPC_MAX_PERF(~0L); 1520 value |= AMD_CPPC_MAX_PERF(min_perf); 1521 value &= ~AMD_CPPC_MIN_PERF(~0L); 1522 value |= AMD_CPPC_MIN_PERF(min_perf); 1523 wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); 1524 } else { 1525 perf_ctrls.desired_perf = 0; 1526 perf_ctrls.max_perf = min_perf; 1527 perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE); 1528 cppc_set_perf(cpudata->cpu, &perf_ctrls); 1529 } 1530 mutex_unlock(&amd_pstate_limits_lock); 1531 } 1532 1533 static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) 1534 { 1535 struct amd_cpudata *cpudata = policy->driver_data; 1536 1537 pr_debug("AMD CPU Core %d going offline\n", cpudata->cpu); 1538 1539 if (cpudata->suspended) 1540 return 0; 1541 1542 if (cppc_state == AMD_PSTATE_ACTIVE) 1543 amd_pstate_epp_offline(policy); 1544 1545 return 0; 1546 } 1547 1548 static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy) 1549 { 1550 cpufreq_verify_within_cpu_limits(policy); 1551 pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min); 1552 return 0; 1553 } 1554 1555 static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) 1556 { 1557 struct amd_cpudata *cpudata = policy->driver_data; 1558 int ret; 1559 1560 /* avoid suspending when EPP is not enabled */ 1561 if (cppc_state != AMD_PSTATE_ACTIVE) 1562 return 0; 1563 1564 /* set this flag to avoid setting core offline*/ 1565 cpudata->suspended = true; 1566 1567 /* disable CPPC in lowlevel firmware */ 1568 ret = amd_pstate_enable(false); 1569 if (ret) 1570 pr_err("failed to suspend, return %d\n", ret); 1571 1572 return 0; 1573 } 1574 1575 static int amd_pstate_epp_resume(struct cpufreq_policy *policy) 1576 { 1577 struct amd_cpudata *cpudata = policy->driver_data; 1578 1579 if (cpudata->suspended) { 1580 mutex_lock(&amd_pstate_limits_lock); 1581 1582 /* enable amd pstate from suspend state*/ 1583 amd_pstate_epp_reenable(cpudata); 1584 1585 mutex_unlock(&amd_pstate_limits_lock); 1586 1587 cpudata->suspended = false; 1588 } 1589 1590 return 0; 1591 } 1592 1593 static struct cpufreq_driver amd_pstate_driver = { 1594 .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS, 1595 .verify = amd_pstate_verify, 1596 .target = amd_pstate_target, 1597 .fast_switch = amd_pstate_fast_switch, 1598 .init = amd_pstate_cpu_init, 1599 .exit = amd_pstate_cpu_exit, 1600 .suspend = amd_pstate_cpu_suspend, 1601 .resume = amd_pstate_cpu_resume, 1602 .set_boost = amd_pstate_set_boost, 1603 .update_limits = amd_pstate_update_limits, 1604 .name = "amd-pstate", 1605 .attr = amd_pstate_attr, 1606 }; 1607 1608 static struct cpufreq_driver amd_pstate_epp_driver = { 1609 .flags = CPUFREQ_CONST_LOOPS, 1610 .verify = amd_pstate_epp_verify_policy, 1611 .setpolicy = amd_pstate_epp_set_policy, 1612 .init = amd_pstate_epp_cpu_init, 1613 .exit = amd_pstate_epp_cpu_exit, 1614 .offline = amd_pstate_epp_cpu_offline, 1615 .online = amd_pstate_epp_cpu_online, 1616 .suspend = amd_pstate_epp_suspend, 1617 .resume = amd_pstate_epp_resume, 1618 .update_limits = amd_pstate_update_limits, 1619 .name = "amd-pstate-epp", 1620 .attr = amd_pstate_epp_attr, 1621 }; 1622 1623 static int __init amd_pstate_set_driver(int mode_idx) 1624 { 1625 if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) { 1626 cppc_state = mode_idx; 1627 if (cppc_state == AMD_PSTATE_DISABLE) 1628 pr_info("driver is explicitly disabled\n"); 1629 1630 if (cppc_state == AMD_PSTATE_ACTIVE) 1631 current_pstate_driver = &amd_pstate_epp_driver; 1632 1633 if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED) 1634 current_pstate_driver = &amd_pstate_driver; 1635 1636 return 0; 1637 } 1638 1639 return -EINVAL; 1640 } 1641 1642 static int __init amd_pstate_init(void) 1643 { 1644 struct device *dev_root; 1645 int ret; 1646 1647 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) 1648 return -ENODEV; 1649 1650 if (!acpi_cpc_valid()) { 1651 pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n"); 1652 return -ENODEV; 1653 } 1654 1655 /* don't keep reloading if cpufreq_driver exists */ 1656 if (cpufreq_get_current_driver()) 1657 return -EEXIST; 1658 1659 switch (cppc_state) { 1660 case AMD_PSTATE_UNDEFINED: 1661 /* Disable on the following configs by default: 1662 * 1. Undefined platforms 1663 * 2. Server platforms 1664 * 3. Shared memory designs 1665 */ 1666 if (amd_pstate_acpi_pm_profile_undefined() || 1667 amd_pstate_acpi_pm_profile_server() || 1668 !boot_cpu_has(X86_FEATURE_CPPC)) { 1669 pr_info("driver load is disabled, boot with specific mode to enable this\n"); 1670 return -ENODEV; 1671 } 1672 ret = amd_pstate_set_driver(CONFIG_X86_AMD_PSTATE_DEFAULT_MODE); 1673 if (ret) 1674 return ret; 1675 break; 1676 case AMD_PSTATE_DISABLE: 1677 return -ENODEV; 1678 case AMD_PSTATE_PASSIVE: 1679 case AMD_PSTATE_ACTIVE: 1680 case AMD_PSTATE_GUIDED: 1681 break; 1682 default: 1683 return -EINVAL; 1684 } 1685 1686 /* capability check */ 1687 if (boot_cpu_has(X86_FEATURE_CPPC)) { 1688 pr_debug("AMD CPPC MSR based functionality is supported\n"); 1689 if (cppc_state != AMD_PSTATE_ACTIVE) 1690 current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; 1691 } else { 1692 pr_debug("AMD CPPC shared memory based functionality is supported\n"); 1693 static_call_update(amd_pstate_enable, cppc_enable); 1694 static_call_update(amd_pstate_init_perf, cppc_init_perf); 1695 static_call_update(amd_pstate_update_perf, cppc_update_perf); 1696 } 1697 1698 /* enable amd pstate feature */ 1699 ret = amd_pstate_enable(true); 1700 if (ret) { 1701 pr_err("failed to enable with return %d\n", ret); 1702 return ret; 1703 } 1704 1705 ret = cpufreq_register_driver(current_pstate_driver); 1706 if (ret) 1707 pr_err("failed to register with return %d\n", ret); 1708 1709 dev_root = bus_get_dev_root(&cpu_subsys); 1710 if (dev_root) { 1711 ret = sysfs_create_group(&dev_root->kobj, &amd_pstate_global_attr_group); 1712 put_device(dev_root); 1713 if (ret) { 1714 pr_err("sysfs attribute export failed with error %d.\n", ret); 1715 goto global_attr_free; 1716 } 1717 } 1718 1719 return ret; 1720 1721 global_attr_free: 1722 cpufreq_unregister_driver(current_pstate_driver); 1723 return ret; 1724 } 1725 device_initcall(amd_pstate_init); 1726 1727 static int __init amd_pstate_param(char *str) 1728 { 1729 size_t size; 1730 int mode_idx; 1731 1732 if (!str) 1733 return -EINVAL; 1734 1735 size = strlen(str); 1736 mode_idx = get_mode_idx_from_str(str, size); 1737 1738 return amd_pstate_set_driver(mode_idx); 1739 } 1740 1741 static int __init amd_prefcore_param(char *str) 1742 { 1743 if (!strcmp(str, "disable")) 1744 amd_pstate_prefcore = false; 1745 1746 return 0; 1747 } 1748 1749 early_param("amd_pstate", amd_pstate_param); 1750 early_param("amd_prefcore", amd_prefcore_param); 1751 1752 MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>"); 1753 MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); 1754