1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * amd-pstate.c - AMD Processor P-state Frequency Driver 4 * 5 * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved. 6 * 7 * Author: Huang Rui <ray.huang@amd.com> 8 * 9 * AMD P-State introduces a new CPU performance scaling design for AMD 10 * processors using the ACPI Collaborative Performance and Power Control (CPPC) 11 * feature which works with the AMD SMU firmware providing a finer grained 12 * frequency control range. It is to replace the legacy ACPI P-States control, 13 * allows a flexible, low-latency interface for the Linux kernel to directly 14 * communicate the performance hints to hardware. 15 * 16 * AMD P-State is supported on recent AMD Zen base CPU series include some of 17 * Zen2 and Zen3 processors. _CPC needs to be present in the ACPI tables of AMD 18 * P-State supported system. And there are two types of hardware implementations 19 * for AMD P-State: 1) Full MSR Solution and 2) Shared Memory Solution. 20 * X86_FEATURE_CPPC CPU feature flag is used to distinguish the different types. 21 */ 22 23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 24 25 #include <linux/kernel.h> 26 #include <linux/module.h> 27 #include <linux/init.h> 28 #include <linux/smp.h> 29 #include <linux/sched.h> 30 #include <linux/cpufreq.h> 31 #include <linux/compiler.h> 32 #include <linux/dmi.h> 33 #include <linux/slab.h> 34 #include <linux/acpi.h> 35 #include <linux/io.h> 36 #include <linux/delay.h> 37 #include <linux/uaccess.h> 38 #include <linux/static_call.h> 39 #include <linux/amd-pstate.h> 40 41 #include <acpi/processor.h> 42 #include <acpi/cppc_acpi.h> 43 44 #include <asm/msr.h> 45 #include <asm/processor.h> 46 #include <asm/cpufeature.h> 47 #include <asm/cpu_device_id.h> 48 #include "amd-pstate-trace.h" 49 50 #define AMD_PSTATE_TRANSITION_LATENCY 0x20000 51 #define AMD_PSTATE_TRANSITION_DELAY 500 52 53 /* 54 * TODO: We need more time to fine tune processors with shared memory solution 55 * with community together. 56 * 57 * There are some performance drops on the CPU benchmarks which reports from 58 * Suse. We are co-working with them to fine tune the shared memory solution. So 59 * we disable it by default to go acpi-cpufreq on these processors and add a 60 * module parameter to be able to enable it manually for debugging. 61 */ 62 static bool shared_mem = false; 63 module_param(shared_mem, bool, 0444); 64 MODULE_PARM_DESC(shared_mem, 65 "enable amd-pstate on processors with shared memory solution (false = disabled (default), true = enabled)"); 66 67 static struct cpufreq_driver amd_pstate_driver; 68 69 static inline int pstate_enable(bool enable) 70 { 71 return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable); 72 } 73 74 static int cppc_enable(bool enable) 75 { 76 int cpu, ret = 0; 77 78 for_each_present_cpu(cpu) { 79 ret = cppc_set_enable(cpu, enable); 80 if (ret) 81 return ret; 82 } 83 84 return ret; 85 } 86 87 DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable); 88 89 static inline int amd_pstate_enable(bool enable) 90 { 91 return static_call(amd_pstate_enable)(enable); 92 } 93 94 static int pstate_init_perf(struct amd_cpudata *cpudata) 95 { 96 u64 cap1; 97 98 int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, 99 &cap1); 100 if (ret) 101 return ret; 102 103 /* 104 * TODO: Introduce AMD specific power feature. 105 * 106 * CPPC entry doesn't indicate the highest performance in some ASICs. 107 */ 108 WRITE_ONCE(cpudata->highest_perf, amd_get_highest_perf()); 109 110 WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); 111 WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); 112 WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); 113 114 return 0; 115 } 116 117 static int cppc_init_perf(struct amd_cpudata *cpudata) 118 { 119 struct cppc_perf_caps cppc_perf; 120 121 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 122 if (ret) 123 return ret; 124 125 WRITE_ONCE(cpudata->highest_perf, amd_get_highest_perf()); 126 127 WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); 128 WRITE_ONCE(cpudata->lowest_nonlinear_perf, 129 cppc_perf.lowest_nonlinear_perf); 130 WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); 131 132 return 0; 133 } 134 135 DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf); 136 137 static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) 138 { 139 return static_call(amd_pstate_init_perf)(cpudata); 140 } 141 142 static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, 143 u32 des_perf, u32 max_perf, bool fast_switch) 144 { 145 if (fast_switch) 146 wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); 147 else 148 wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, 149 READ_ONCE(cpudata->cppc_req_cached)); 150 } 151 152 static void cppc_update_perf(struct amd_cpudata *cpudata, 153 u32 min_perf, u32 des_perf, 154 u32 max_perf, bool fast_switch) 155 { 156 struct cppc_perf_ctrls perf_ctrls; 157 158 perf_ctrls.max_perf = max_perf; 159 perf_ctrls.min_perf = min_perf; 160 perf_ctrls.desired_perf = des_perf; 161 162 cppc_set_perf(cpudata->cpu, &perf_ctrls); 163 } 164 165 DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf); 166 167 static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, 168 u32 min_perf, u32 des_perf, 169 u32 max_perf, bool fast_switch) 170 { 171 static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, 172 max_perf, fast_switch); 173 } 174 175 static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) 176 { 177 u64 aperf, mperf, tsc; 178 unsigned long flags; 179 180 local_irq_save(flags); 181 rdmsrl(MSR_IA32_APERF, aperf); 182 rdmsrl(MSR_IA32_MPERF, mperf); 183 tsc = rdtsc(); 184 185 if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) { 186 local_irq_restore(flags); 187 return false; 188 } 189 190 local_irq_restore(flags); 191 192 cpudata->cur.aperf = aperf; 193 cpudata->cur.mperf = mperf; 194 cpudata->cur.tsc = tsc; 195 cpudata->cur.aperf -= cpudata->prev.aperf; 196 cpudata->cur.mperf -= cpudata->prev.mperf; 197 cpudata->cur.tsc -= cpudata->prev.tsc; 198 199 cpudata->prev.aperf = aperf; 200 cpudata->prev.mperf = mperf; 201 cpudata->prev.tsc = tsc; 202 203 cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz), cpudata->cur.mperf); 204 205 return true; 206 } 207 208 static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, 209 u32 des_perf, u32 max_perf, bool fast_switch) 210 { 211 u64 prev = READ_ONCE(cpudata->cppc_req_cached); 212 u64 value = prev; 213 214 value &= ~AMD_CPPC_MIN_PERF(~0L); 215 value |= AMD_CPPC_MIN_PERF(min_perf); 216 217 value &= ~AMD_CPPC_DES_PERF(~0L); 218 value |= AMD_CPPC_DES_PERF(des_perf); 219 220 value &= ~AMD_CPPC_MAX_PERF(~0L); 221 value |= AMD_CPPC_MAX_PERF(max_perf); 222 223 if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { 224 trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, 225 cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc, 226 cpudata->cpu, (value != prev), fast_switch); 227 } 228 229 if (value == prev) 230 return; 231 232 WRITE_ONCE(cpudata->cppc_req_cached, value); 233 234 amd_pstate_update_perf(cpudata, min_perf, des_perf, 235 max_perf, fast_switch); 236 } 237 238 static int amd_pstate_verify(struct cpufreq_policy_data *policy) 239 { 240 cpufreq_verify_within_cpu_limits(policy); 241 242 return 0; 243 } 244 245 static int amd_pstate_target(struct cpufreq_policy *policy, 246 unsigned int target_freq, 247 unsigned int relation) 248 { 249 struct cpufreq_freqs freqs; 250 struct amd_cpudata *cpudata = policy->driver_data; 251 unsigned long max_perf, min_perf, des_perf, cap_perf; 252 253 if (!cpudata->max_freq) 254 return -ENODEV; 255 256 cap_perf = READ_ONCE(cpudata->highest_perf); 257 min_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); 258 max_perf = cap_perf; 259 260 freqs.old = policy->cur; 261 freqs.new = target_freq; 262 263 des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf, 264 cpudata->max_freq); 265 266 cpufreq_freq_transition_begin(policy, &freqs); 267 amd_pstate_update(cpudata, min_perf, des_perf, 268 max_perf, false); 269 cpufreq_freq_transition_end(policy, &freqs, false); 270 271 return 0; 272 } 273 274 static void amd_pstate_adjust_perf(unsigned int cpu, 275 unsigned long _min_perf, 276 unsigned long target_perf, 277 unsigned long capacity) 278 { 279 unsigned long max_perf, min_perf, des_perf, 280 cap_perf, lowest_nonlinear_perf; 281 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); 282 struct amd_cpudata *cpudata = policy->driver_data; 283 284 cap_perf = READ_ONCE(cpudata->highest_perf); 285 lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); 286 287 des_perf = cap_perf; 288 if (target_perf < capacity) 289 des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity); 290 291 min_perf = READ_ONCE(cpudata->highest_perf); 292 if (_min_perf < capacity) 293 min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity); 294 295 if (min_perf < lowest_nonlinear_perf) 296 min_perf = lowest_nonlinear_perf; 297 298 max_perf = cap_perf; 299 if (max_perf < min_perf) 300 max_perf = min_perf; 301 302 des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); 303 304 amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true); 305 } 306 307 static int amd_get_min_freq(struct amd_cpudata *cpudata) 308 { 309 struct cppc_perf_caps cppc_perf; 310 311 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 312 if (ret) 313 return ret; 314 315 /* Switch to khz */ 316 return cppc_perf.lowest_freq * 1000; 317 } 318 319 static int amd_get_max_freq(struct amd_cpudata *cpudata) 320 { 321 struct cppc_perf_caps cppc_perf; 322 u32 max_perf, max_freq, nominal_freq, nominal_perf; 323 u64 boost_ratio; 324 325 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 326 if (ret) 327 return ret; 328 329 nominal_freq = cppc_perf.nominal_freq; 330 nominal_perf = READ_ONCE(cpudata->nominal_perf); 331 max_perf = READ_ONCE(cpudata->highest_perf); 332 333 boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT, 334 nominal_perf); 335 336 max_freq = nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT; 337 338 /* Switch to khz */ 339 return max_freq * 1000; 340 } 341 342 static int amd_get_nominal_freq(struct amd_cpudata *cpudata) 343 { 344 struct cppc_perf_caps cppc_perf; 345 346 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 347 if (ret) 348 return ret; 349 350 /* Switch to khz */ 351 return cppc_perf.nominal_freq * 1000; 352 } 353 354 static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata) 355 { 356 struct cppc_perf_caps cppc_perf; 357 u32 lowest_nonlinear_freq, lowest_nonlinear_perf, 358 nominal_freq, nominal_perf; 359 u64 lowest_nonlinear_ratio; 360 361 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 362 if (ret) 363 return ret; 364 365 nominal_freq = cppc_perf.nominal_freq; 366 nominal_perf = READ_ONCE(cpudata->nominal_perf); 367 368 lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; 369 370 lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, 371 nominal_perf); 372 373 lowest_nonlinear_freq = nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT; 374 375 /* Switch to khz */ 376 return lowest_nonlinear_freq * 1000; 377 } 378 379 static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) 380 { 381 struct amd_cpudata *cpudata = policy->driver_data; 382 int ret; 383 384 if (!cpudata->boost_supported) { 385 pr_err("Boost mode is not supported by this processor or SBIOS\n"); 386 return -EINVAL; 387 } 388 389 if (state) 390 policy->cpuinfo.max_freq = cpudata->max_freq; 391 else 392 policy->cpuinfo.max_freq = cpudata->nominal_freq; 393 394 policy->max = policy->cpuinfo.max_freq; 395 396 ret = freq_qos_update_request(&cpudata->req[1], 397 policy->cpuinfo.max_freq); 398 if (ret < 0) 399 return ret; 400 401 return 0; 402 } 403 404 static void amd_pstate_boost_init(struct amd_cpudata *cpudata) 405 { 406 u32 highest_perf, nominal_perf; 407 408 highest_perf = READ_ONCE(cpudata->highest_perf); 409 nominal_perf = READ_ONCE(cpudata->nominal_perf); 410 411 if (highest_perf <= nominal_perf) 412 return; 413 414 cpudata->boost_supported = true; 415 amd_pstate_driver.boost_enabled = true; 416 } 417 418 static int amd_pstate_cpu_init(struct cpufreq_policy *policy) 419 { 420 int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; 421 struct device *dev; 422 struct amd_cpudata *cpudata; 423 424 dev = get_cpu_device(policy->cpu); 425 if (!dev) 426 return -ENODEV; 427 428 cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); 429 if (!cpudata) 430 return -ENOMEM; 431 432 cpudata->cpu = policy->cpu; 433 434 ret = amd_pstate_init_perf(cpudata); 435 if (ret) 436 goto free_cpudata1; 437 438 min_freq = amd_get_min_freq(cpudata); 439 max_freq = amd_get_max_freq(cpudata); 440 nominal_freq = amd_get_nominal_freq(cpudata); 441 lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); 442 443 if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { 444 dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", 445 min_freq, max_freq); 446 ret = -EINVAL; 447 goto free_cpudata1; 448 } 449 450 policy->cpuinfo.transition_latency = AMD_PSTATE_TRANSITION_LATENCY; 451 policy->transition_delay_us = AMD_PSTATE_TRANSITION_DELAY; 452 453 policy->min = min_freq; 454 policy->max = max_freq; 455 456 policy->cpuinfo.min_freq = min_freq; 457 policy->cpuinfo.max_freq = max_freq; 458 459 /* It will be updated by governor */ 460 policy->cur = policy->cpuinfo.min_freq; 461 462 if (boot_cpu_has(X86_FEATURE_CPPC)) 463 policy->fast_switch_possible = true; 464 465 ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0], 466 FREQ_QOS_MIN, policy->cpuinfo.min_freq); 467 if (ret < 0) { 468 dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret); 469 goto free_cpudata1; 470 } 471 472 ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1], 473 FREQ_QOS_MAX, policy->cpuinfo.max_freq); 474 if (ret < 0) { 475 dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret); 476 goto free_cpudata2; 477 } 478 479 /* Initial processor data capability frequencies */ 480 cpudata->max_freq = max_freq; 481 cpudata->min_freq = min_freq; 482 cpudata->nominal_freq = nominal_freq; 483 cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; 484 485 policy->driver_data = cpudata; 486 487 amd_pstate_boost_init(cpudata); 488 489 return 0; 490 491 free_cpudata2: 492 freq_qos_remove_request(&cpudata->req[0]); 493 free_cpudata1: 494 kfree(cpudata); 495 return ret; 496 } 497 498 static int amd_pstate_cpu_exit(struct cpufreq_policy *policy) 499 { 500 struct amd_cpudata *cpudata; 501 502 cpudata = policy->driver_data; 503 504 freq_qos_remove_request(&cpudata->req[1]); 505 freq_qos_remove_request(&cpudata->req[0]); 506 kfree(cpudata); 507 508 return 0; 509 } 510 511 static int amd_pstate_cpu_resume(struct cpufreq_policy *policy) 512 { 513 int ret; 514 515 ret = amd_pstate_enable(true); 516 if (ret) 517 pr_err("failed to enable amd-pstate during resume, return %d\n", ret); 518 519 return ret; 520 } 521 522 static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy) 523 { 524 int ret; 525 526 ret = amd_pstate_enable(false); 527 if (ret) 528 pr_err("failed to disable amd-pstate during suspend, return %d\n", ret); 529 530 return ret; 531 } 532 533 /* Sysfs attributes */ 534 535 /* 536 * This frequency is to indicate the maximum hardware frequency. 537 * If boost is not active but supported, the frequency will be larger than the 538 * one in cpuinfo. 539 */ 540 static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, 541 char *buf) 542 { 543 int max_freq; 544 struct amd_cpudata *cpudata; 545 546 cpudata = policy->driver_data; 547 548 max_freq = amd_get_max_freq(cpudata); 549 if (max_freq < 0) 550 return max_freq; 551 552 return sprintf(&buf[0], "%u\n", max_freq); 553 } 554 555 static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy, 556 char *buf) 557 { 558 int freq; 559 struct amd_cpudata *cpudata; 560 561 cpudata = policy->driver_data; 562 563 freq = amd_get_lowest_nonlinear_freq(cpudata); 564 if (freq < 0) 565 return freq; 566 567 return sprintf(&buf[0], "%u\n", freq); 568 } 569 570 /* 571 * In some of ASICs, the highest_perf is not the one in the _CPC table, so we 572 * need to expose it to sysfs. 573 */ 574 static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, 575 char *buf) 576 { 577 u32 perf; 578 struct amd_cpudata *cpudata = policy->driver_data; 579 580 perf = READ_ONCE(cpudata->highest_perf); 581 582 return sprintf(&buf[0], "%u\n", perf); 583 } 584 585 cpufreq_freq_attr_ro(amd_pstate_max_freq); 586 cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); 587 588 cpufreq_freq_attr_ro(amd_pstate_highest_perf); 589 590 static struct freq_attr *amd_pstate_attr[] = { 591 &amd_pstate_max_freq, 592 &amd_pstate_lowest_nonlinear_freq, 593 &amd_pstate_highest_perf, 594 NULL, 595 }; 596 597 static struct cpufreq_driver amd_pstate_driver = { 598 .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS, 599 .verify = amd_pstate_verify, 600 .target = amd_pstate_target, 601 .init = amd_pstate_cpu_init, 602 .exit = amd_pstate_cpu_exit, 603 .suspend = amd_pstate_cpu_suspend, 604 .resume = amd_pstate_cpu_resume, 605 .set_boost = amd_pstate_set_boost, 606 .name = "amd-pstate", 607 .attr = amd_pstate_attr, 608 }; 609 610 static int __init amd_pstate_init(void) 611 { 612 int ret; 613 614 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) 615 return -ENODEV; 616 617 if (!acpi_cpc_valid()) { 618 pr_debug("the _CPC object is not present in SBIOS\n"); 619 return -ENODEV; 620 } 621 622 /* don't keep reloading if cpufreq_driver exists */ 623 if (cpufreq_get_current_driver()) 624 return -EEXIST; 625 626 /* capability check */ 627 if (boot_cpu_has(X86_FEATURE_CPPC)) { 628 pr_debug("AMD CPPC MSR based functionality is supported\n"); 629 amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf; 630 } else if (shared_mem) { 631 static_call_update(amd_pstate_enable, cppc_enable); 632 static_call_update(amd_pstate_init_perf, cppc_init_perf); 633 static_call_update(amd_pstate_update_perf, cppc_update_perf); 634 } else { 635 pr_info("This processor supports shared memory solution, you can enable it with amd_pstate.shared_mem=1\n"); 636 return -ENODEV; 637 } 638 639 /* enable amd pstate feature */ 640 ret = amd_pstate_enable(true); 641 if (ret) { 642 pr_err("failed to enable amd-pstate with return %d\n", ret); 643 return ret; 644 } 645 646 ret = cpufreq_register_driver(&amd_pstate_driver); 647 if (ret) 648 pr_err("failed to register amd_pstate_driver with return %d\n", 649 ret); 650 651 return ret; 652 } 653 654 static void __exit amd_pstate_exit(void) 655 { 656 cpufreq_unregister_driver(&amd_pstate_driver); 657 658 amd_pstate_enable(false); 659 } 660 661 module_init(amd_pstate_init); 662 module_exit(amd_pstate_exit); 663 664 MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>"); 665 MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); 666 MODULE_LICENSE("GPL"); 667