1 /* 2 * intel_pstate.c: Native P state management for Intel processors 3 * 4 * (C) Copyright 2012 Intel Corporation 5 * Author: Dirk Brandewie <dirk.j.brandewie@intel.com> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; version 2 10 * of the License. 11 */ 12 13 #include <linux/kernel.h> 14 #include <linux/kernel_stat.h> 15 #include <linux/module.h> 16 #include <linux/ktime.h> 17 #include <linux/hrtimer.h> 18 #include <linux/tick.h> 19 #include <linux/slab.h> 20 #include <linux/sched.h> 21 #include <linux/list.h> 22 #include <linux/cpu.h> 23 #include <linux/cpufreq.h> 24 #include <linux/sysfs.h> 25 #include <linux/types.h> 26 #include <linux/fs.h> 27 #include <linux/debugfs.h> 28 #include <linux/acpi.h> 29 #include <trace/events/power.h> 30 31 #include <asm/div64.h> 32 #include <asm/msr.h> 33 #include <asm/cpu_device_id.h> 34 35 #define BYT_RATIOS 0x66a 36 #define BYT_VIDS 0x66b 37 #define BYT_TURBO_RATIOS 0x66c 38 #define BYT_TURBO_VIDS 0x66d 39 40 #define FRAC_BITS 8 41 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) 42 #define fp_toint(X) ((X) >> FRAC_BITS) 43 44 45 static inline int32_t mul_fp(int32_t x, int32_t y) 46 { 47 return ((int64_t)x * (int64_t)y) >> FRAC_BITS; 48 } 49 50 static inline int32_t div_fp(int32_t x, int32_t y) 51 { 52 return div_s64((int64_t)x << FRAC_BITS, y); 53 } 54 55 static inline int ceiling_fp(int32_t x) 56 { 57 int mask, ret; 58 59 ret = fp_toint(x); 60 mask = (1 << FRAC_BITS) - 1; 61 if (x & mask) 62 ret += 1; 63 return ret; 64 } 65 66 struct sample { 67 int32_t core_pct_busy; 68 u64 aperf; 69 u64 mperf; 70 int freq; 71 ktime_t time; 72 }; 73 74 struct pstate_data { 75 int current_pstate; 76 int min_pstate; 77 int max_pstate; 78 int scaling; 79 int turbo_pstate; 80 }; 81 82 struct vid_data { 83 int min; 84 int max; 85 int turbo; 86 int32_t ratio; 87 }; 88 89 struct _pid { 90 int setpoint; 91 int32_t integral; 92 int32_t p_gain; 93 int32_t i_gain; 94 int32_t d_gain; 95 int deadband; 96 int32_t last_err; 97 }; 98 99 struct cpudata { 100 int cpu; 101 102 struct timer_list timer; 103 104 struct pstate_data pstate; 105 struct vid_data vid; 106 struct _pid pid; 107 108 ktime_t last_sample_time; 109 u64 prev_aperf; 110 u64 prev_mperf; 111 struct sample sample; 112 }; 113 114 static struct cpudata **all_cpu_data; 115 struct pstate_adjust_policy { 116 int sample_rate_ms; 117 int deadband; 118 int setpoint; 119 int p_gain_pct; 120 int d_gain_pct; 121 int i_gain_pct; 122 }; 123 124 struct pstate_funcs { 125 int (*get_max)(void); 126 int (*get_min)(void); 127 int (*get_turbo)(void); 128 int (*get_scaling)(void); 129 void (*set)(struct cpudata*, int pstate); 130 void (*get_vid)(struct cpudata *); 131 }; 132 133 struct cpu_defaults { 134 struct pstate_adjust_policy pid_policy; 135 struct pstate_funcs funcs; 136 }; 137 138 static struct pstate_adjust_policy pid_params; 139 static struct pstate_funcs pstate_funcs; 140 static int hwp_active; 141 142 struct perf_limits { 143 int no_turbo; 144 int turbo_disabled; 145 int max_perf_pct; 146 int min_perf_pct; 147 int32_t max_perf; 148 int32_t min_perf; 149 int max_policy_pct; 150 int max_sysfs_pct; 151 }; 152 153 static struct perf_limits limits = { 154 .no_turbo = 0, 155 .turbo_disabled = 0, 156 .max_perf_pct = 100, 157 .max_perf = int_tofp(1), 158 .min_perf_pct = 0, 159 .min_perf = 0, 160 .max_policy_pct = 100, 161 .max_sysfs_pct = 100, 162 }; 163 164 static inline void pid_reset(struct _pid *pid, int setpoint, int busy, 165 int deadband, int integral) { 166 pid->setpoint = setpoint; 167 pid->deadband = deadband; 168 pid->integral = int_tofp(integral); 169 pid->last_err = int_tofp(setpoint) - int_tofp(busy); 170 } 171 172 static inline void pid_p_gain_set(struct _pid *pid, int percent) 173 { 174 pid->p_gain = div_fp(int_tofp(percent), int_tofp(100)); 175 } 176 177 static inline void pid_i_gain_set(struct _pid *pid, int percent) 178 { 179 pid->i_gain = div_fp(int_tofp(percent), int_tofp(100)); 180 } 181 182 static inline void pid_d_gain_set(struct _pid *pid, int percent) 183 { 184 pid->d_gain = div_fp(int_tofp(percent), int_tofp(100)); 185 } 186 187 static signed int pid_calc(struct _pid *pid, int32_t busy) 188 { 189 signed int result; 190 int32_t pterm, dterm, fp_error; 191 int32_t integral_limit; 192 193 fp_error = int_tofp(pid->setpoint) - busy; 194 195 if (abs(fp_error) <= int_tofp(pid->deadband)) 196 return 0; 197 198 pterm = mul_fp(pid->p_gain, fp_error); 199 200 pid->integral += fp_error; 201 202 /* limit the integral term */ 203 integral_limit = int_tofp(30); 204 if (pid->integral > integral_limit) 205 pid->integral = integral_limit; 206 if (pid->integral < -integral_limit) 207 pid->integral = -integral_limit; 208 209 dterm = mul_fp(pid->d_gain, fp_error - pid->last_err); 210 pid->last_err = fp_error; 211 212 result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm; 213 result = result + (1 << (FRAC_BITS-1)); 214 return (signed int)fp_toint(result); 215 } 216 217 static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu) 218 { 219 pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct); 220 pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct); 221 pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct); 222 223 pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0); 224 } 225 226 static inline void intel_pstate_reset_all_pid(void) 227 { 228 unsigned int cpu; 229 230 for_each_online_cpu(cpu) { 231 if (all_cpu_data[cpu]) 232 intel_pstate_busy_pid_reset(all_cpu_data[cpu]); 233 } 234 } 235 236 static inline void update_turbo_state(void) 237 { 238 u64 misc_en; 239 struct cpudata *cpu; 240 241 cpu = all_cpu_data[0]; 242 rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); 243 limits.turbo_disabled = 244 (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE || 245 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); 246 } 247 248 #define PCT_TO_HWP(x) (x * 255 / 100) 249 static void intel_pstate_hwp_set(void) 250 { 251 int min, max, cpu; 252 u64 value, freq; 253 254 get_online_cpus(); 255 256 for_each_online_cpu(cpu) { 257 rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); 258 min = PCT_TO_HWP(limits.min_perf_pct); 259 value &= ~HWP_MIN_PERF(~0L); 260 value |= HWP_MIN_PERF(min); 261 262 max = PCT_TO_HWP(limits.max_perf_pct); 263 if (limits.no_turbo) { 264 rdmsrl( MSR_HWP_CAPABILITIES, freq); 265 max = HWP_GUARANTEED_PERF(freq); 266 } 267 268 value &= ~HWP_MAX_PERF(~0L); 269 value |= HWP_MAX_PERF(max); 270 wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); 271 } 272 273 put_online_cpus(); 274 } 275 276 /************************** debugfs begin ************************/ 277 static int pid_param_set(void *data, u64 val) 278 { 279 *(u32 *)data = val; 280 intel_pstate_reset_all_pid(); 281 return 0; 282 } 283 284 static int pid_param_get(void *data, u64 *val) 285 { 286 *val = *(u32 *)data; 287 return 0; 288 } 289 DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n"); 290 291 struct pid_param { 292 char *name; 293 void *value; 294 }; 295 296 static struct pid_param pid_files[] = { 297 {"sample_rate_ms", &pid_params.sample_rate_ms}, 298 {"d_gain_pct", &pid_params.d_gain_pct}, 299 {"i_gain_pct", &pid_params.i_gain_pct}, 300 {"deadband", &pid_params.deadband}, 301 {"setpoint", &pid_params.setpoint}, 302 {"p_gain_pct", &pid_params.p_gain_pct}, 303 {NULL, NULL} 304 }; 305 306 static void __init intel_pstate_debug_expose_params(void) 307 { 308 struct dentry *debugfs_parent; 309 int i = 0; 310 311 if (hwp_active) 312 return; 313 debugfs_parent = debugfs_create_dir("pstate_snb", NULL); 314 if (IS_ERR_OR_NULL(debugfs_parent)) 315 return; 316 while (pid_files[i].name) { 317 debugfs_create_file(pid_files[i].name, 0660, 318 debugfs_parent, pid_files[i].value, 319 &fops_pid_param); 320 i++; 321 } 322 } 323 324 /************************** debugfs end ************************/ 325 326 /************************** sysfs begin ************************/ 327 #define show_one(file_name, object) \ 328 static ssize_t show_##file_name \ 329 (struct kobject *kobj, struct attribute *attr, char *buf) \ 330 { \ 331 return sprintf(buf, "%u\n", limits.object); \ 332 } 333 334 static ssize_t show_no_turbo(struct kobject *kobj, 335 struct attribute *attr, char *buf) 336 { 337 ssize_t ret; 338 339 update_turbo_state(); 340 if (limits.turbo_disabled) 341 ret = sprintf(buf, "%u\n", limits.turbo_disabled); 342 else 343 ret = sprintf(buf, "%u\n", limits.no_turbo); 344 345 return ret; 346 } 347 348 static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, 349 const char *buf, size_t count) 350 { 351 unsigned int input; 352 int ret; 353 354 ret = sscanf(buf, "%u", &input); 355 if (ret != 1) 356 return -EINVAL; 357 358 update_turbo_state(); 359 if (limits.turbo_disabled) { 360 pr_warn("Turbo disabled by BIOS or unavailable on processor\n"); 361 return -EPERM; 362 } 363 364 limits.no_turbo = clamp_t(int, input, 0, 1); 365 366 if (hwp_active) 367 intel_pstate_hwp_set(); 368 369 return count; 370 } 371 372 static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, 373 const char *buf, size_t count) 374 { 375 unsigned int input; 376 int ret; 377 378 ret = sscanf(buf, "%u", &input); 379 if (ret != 1) 380 return -EINVAL; 381 382 limits.max_sysfs_pct = clamp_t(int, input, 0 , 100); 383 limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); 384 limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); 385 386 if (hwp_active) 387 intel_pstate_hwp_set(); 388 return count; 389 } 390 391 static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, 392 const char *buf, size_t count) 393 { 394 unsigned int input; 395 int ret; 396 397 ret = sscanf(buf, "%u", &input); 398 if (ret != 1) 399 return -EINVAL; 400 limits.min_perf_pct = clamp_t(int, input, 0 , 100); 401 limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); 402 403 if (hwp_active) 404 intel_pstate_hwp_set(); 405 return count; 406 } 407 408 show_one(max_perf_pct, max_perf_pct); 409 show_one(min_perf_pct, min_perf_pct); 410 411 define_one_global_rw(no_turbo); 412 define_one_global_rw(max_perf_pct); 413 define_one_global_rw(min_perf_pct); 414 415 static struct attribute *intel_pstate_attributes[] = { 416 &no_turbo.attr, 417 &max_perf_pct.attr, 418 &min_perf_pct.attr, 419 NULL 420 }; 421 422 static struct attribute_group intel_pstate_attr_group = { 423 .attrs = intel_pstate_attributes, 424 }; 425 426 static void __init intel_pstate_sysfs_expose_params(void) 427 { 428 struct kobject *intel_pstate_kobject; 429 int rc; 430 431 intel_pstate_kobject = kobject_create_and_add("intel_pstate", 432 &cpu_subsys.dev_root->kobj); 433 BUG_ON(!intel_pstate_kobject); 434 rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group); 435 BUG_ON(rc); 436 } 437 /************************** sysfs end ************************/ 438 439 static void intel_pstate_hwp_enable(void) 440 { 441 hwp_active++; 442 pr_info("intel_pstate HWP enabled\n"); 443 444 wrmsrl( MSR_PM_ENABLE, 0x1); 445 } 446 447 static int byt_get_min_pstate(void) 448 { 449 u64 value; 450 451 rdmsrl(BYT_RATIOS, value); 452 return (value >> 8) & 0x7F; 453 } 454 455 static int byt_get_max_pstate(void) 456 { 457 u64 value; 458 459 rdmsrl(BYT_RATIOS, value); 460 return (value >> 16) & 0x7F; 461 } 462 463 static int byt_get_turbo_pstate(void) 464 { 465 u64 value; 466 467 rdmsrl(BYT_TURBO_RATIOS, value); 468 return value & 0x7F; 469 } 470 471 static void byt_set_pstate(struct cpudata *cpudata, int pstate) 472 { 473 u64 val; 474 int32_t vid_fp; 475 u32 vid; 476 477 val = pstate << 8; 478 if (limits.no_turbo && !limits.turbo_disabled) 479 val |= (u64)1 << 32; 480 481 vid_fp = cpudata->vid.min + mul_fp( 482 int_tofp(pstate - cpudata->pstate.min_pstate), 483 cpudata->vid.ratio); 484 485 vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max); 486 vid = ceiling_fp(vid_fp); 487 488 if (pstate > cpudata->pstate.max_pstate) 489 vid = cpudata->vid.turbo; 490 491 val |= vid; 492 493 wrmsrl(MSR_IA32_PERF_CTL, val); 494 } 495 496 #define BYT_BCLK_FREQS 5 497 static int byt_freq_table[BYT_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800}; 498 499 static int byt_get_scaling(void) 500 { 501 u64 value; 502 int i; 503 504 rdmsrl(MSR_FSB_FREQ, value); 505 i = value & 0x3; 506 507 BUG_ON(i > BYT_BCLK_FREQS); 508 509 return byt_freq_table[i] * 100; 510 } 511 512 static void byt_get_vid(struct cpudata *cpudata) 513 { 514 u64 value; 515 516 rdmsrl(BYT_VIDS, value); 517 cpudata->vid.min = int_tofp((value >> 8) & 0x7f); 518 cpudata->vid.max = int_tofp((value >> 16) & 0x7f); 519 cpudata->vid.ratio = div_fp( 520 cpudata->vid.max - cpudata->vid.min, 521 int_tofp(cpudata->pstate.max_pstate - 522 cpudata->pstate.min_pstate)); 523 524 rdmsrl(BYT_TURBO_VIDS, value); 525 cpudata->vid.turbo = value & 0x7f; 526 } 527 528 static int core_get_min_pstate(void) 529 { 530 u64 value; 531 532 rdmsrl(MSR_PLATFORM_INFO, value); 533 return (value >> 40) & 0xFF; 534 } 535 536 static int core_get_max_pstate(void) 537 { 538 u64 value; 539 540 rdmsrl(MSR_PLATFORM_INFO, value); 541 return (value >> 8) & 0xFF; 542 } 543 544 static int core_get_turbo_pstate(void) 545 { 546 u64 value; 547 int nont, ret; 548 549 rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value); 550 nont = core_get_max_pstate(); 551 ret = (value) & 255; 552 if (ret <= nont) 553 ret = nont; 554 return ret; 555 } 556 557 static inline int core_get_scaling(void) 558 { 559 return 100000; 560 } 561 562 static void core_set_pstate(struct cpudata *cpudata, int pstate) 563 { 564 u64 val; 565 566 val = pstate << 8; 567 if (limits.no_turbo && !limits.turbo_disabled) 568 val |= (u64)1 << 32; 569 570 wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); 571 } 572 573 static struct cpu_defaults core_params = { 574 .pid_policy = { 575 .sample_rate_ms = 10, 576 .deadband = 0, 577 .setpoint = 97, 578 .p_gain_pct = 20, 579 .d_gain_pct = 0, 580 .i_gain_pct = 0, 581 }, 582 .funcs = { 583 .get_max = core_get_max_pstate, 584 .get_min = core_get_min_pstate, 585 .get_turbo = core_get_turbo_pstate, 586 .get_scaling = core_get_scaling, 587 .set = core_set_pstate, 588 }, 589 }; 590 591 static struct cpu_defaults byt_params = { 592 .pid_policy = { 593 .sample_rate_ms = 10, 594 .deadband = 0, 595 .setpoint = 97, 596 .p_gain_pct = 14, 597 .d_gain_pct = 0, 598 .i_gain_pct = 4, 599 }, 600 .funcs = { 601 .get_max = byt_get_max_pstate, 602 .get_min = byt_get_min_pstate, 603 .get_turbo = byt_get_turbo_pstate, 604 .set = byt_set_pstate, 605 .get_scaling = byt_get_scaling, 606 .get_vid = byt_get_vid, 607 }, 608 }; 609 610 static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) 611 { 612 int max_perf = cpu->pstate.turbo_pstate; 613 int max_perf_adj; 614 int min_perf; 615 616 if (limits.no_turbo || limits.turbo_disabled) 617 max_perf = cpu->pstate.max_pstate; 618 619 max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); 620 *max = clamp_t(int, max_perf_adj, 621 cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); 622 623 min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf)); 624 *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); 625 } 626 627 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) 628 { 629 int max_perf, min_perf; 630 631 update_turbo_state(); 632 633 intel_pstate_get_min_max(cpu, &min_perf, &max_perf); 634 635 pstate = clamp_t(int, pstate, min_perf, max_perf); 636 637 if (pstate == cpu->pstate.current_pstate) 638 return; 639 640 trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); 641 642 cpu->pstate.current_pstate = pstate; 643 644 pstate_funcs.set(cpu, pstate); 645 } 646 647 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) 648 { 649 cpu->pstate.min_pstate = pstate_funcs.get_min(); 650 cpu->pstate.max_pstate = pstate_funcs.get_max(); 651 cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); 652 cpu->pstate.scaling = pstate_funcs.get_scaling(); 653 654 if (pstate_funcs.get_vid) 655 pstate_funcs.get_vid(cpu); 656 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate); 657 } 658 659 static inline void intel_pstate_calc_busy(struct cpudata *cpu) 660 { 661 struct sample *sample = &cpu->sample; 662 int64_t core_pct; 663 664 core_pct = int_tofp(sample->aperf) * int_tofp(100); 665 core_pct = div64_u64(core_pct, int_tofp(sample->mperf)); 666 667 sample->freq = fp_toint( 668 mul_fp(int_tofp( 669 cpu->pstate.max_pstate * cpu->pstate.scaling / 100), 670 core_pct)); 671 672 sample->core_pct_busy = (int32_t)core_pct; 673 } 674 675 static inline void intel_pstate_sample(struct cpudata *cpu) 676 { 677 u64 aperf, mperf; 678 unsigned long flags; 679 680 local_irq_save(flags); 681 rdmsrl(MSR_IA32_APERF, aperf); 682 rdmsrl(MSR_IA32_MPERF, mperf); 683 local_irq_restore(flags); 684 685 cpu->last_sample_time = cpu->sample.time; 686 cpu->sample.time = ktime_get(); 687 cpu->sample.aperf = aperf; 688 cpu->sample.mperf = mperf; 689 cpu->sample.aperf -= cpu->prev_aperf; 690 cpu->sample.mperf -= cpu->prev_mperf; 691 692 intel_pstate_calc_busy(cpu); 693 694 cpu->prev_aperf = aperf; 695 cpu->prev_mperf = mperf; 696 } 697 698 static inline void intel_hwp_set_sample_time(struct cpudata *cpu) 699 { 700 int delay; 701 702 delay = msecs_to_jiffies(50); 703 mod_timer_pinned(&cpu->timer, jiffies + delay); 704 } 705 706 static inline void intel_pstate_set_sample_time(struct cpudata *cpu) 707 { 708 int delay; 709 710 delay = msecs_to_jiffies(pid_params.sample_rate_ms); 711 mod_timer_pinned(&cpu->timer, jiffies + delay); 712 } 713 714 static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) 715 { 716 int32_t core_busy, max_pstate, current_pstate, sample_ratio; 717 u32 duration_us; 718 u32 sample_time; 719 720 core_busy = cpu->sample.core_pct_busy; 721 max_pstate = int_tofp(cpu->pstate.max_pstate); 722 current_pstate = int_tofp(cpu->pstate.current_pstate); 723 core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); 724 725 sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC; 726 duration_us = (u32) ktime_us_delta(cpu->sample.time, 727 cpu->last_sample_time); 728 if (duration_us > sample_time * 3) { 729 sample_ratio = div_fp(int_tofp(sample_time), 730 int_tofp(duration_us)); 731 core_busy = mul_fp(core_busy, sample_ratio); 732 } 733 734 return core_busy; 735 } 736 737 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) 738 { 739 int32_t busy_scaled; 740 struct _pid *pid; 741 signed int ctl; 742 743 pid = &cpu->pid; 744 busy_scaled = intel_pstate_get_scaled_busy(cpu); 745 746 ctl = pid_calc(pid, busy_scaled); 747 748 /* Negative values of ctl increase the pstate and vice versa */ 749 intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl); 750 } 751 752 static void intel_hwp_timer_func(unsigned long __data) 753 { 754 struct cpudata *cpu = (struct cpudata *) __data; 755 756 intel_pstate_sample(cpu); 757 intel_hwp_set_sample_time(cpu); 758 } 759 760 static void intel_pstate_timer_func(unsigned long __data) 761 { 762 struct cpudata *cpu = (struct cpudata *) __data; 763 struct sample *sample; 764 765 intel_pstate_sample(cpu); 766 767 sample = &cpu->sample; 768 769 intel_pstate_adjust_busy_pstate(cpu); 770 771 trace_pstate_sample(fp_toint(sample->core_pct_busy), 772 fp_toint(intel_pstate_get_scaled_busy(cpu)), 773 cpu->pstate.current_pstate, 774 sample->mperf, 775 sample->aperf, 776 sample->freq); 777 778 intel_pstate_set_sample_time(cpu); 779 } 780 781 #define ICPU(model, policy) \ 782 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\ 783 (unsigned long)&policy } 784 785 static const struct x86_cpu_id intel_pstate_cpu_ids[] = { 786 ICPU(0x2a, core_params), 787 ICPU(0x2d, core_params), 788 ICPU(0x37, byt_params), 789 ICPU(0x3a, core_params), 790 ICPU(0x3c, core_params), 791 ICPU(0x3d, core_params), 792 ICPU(0x3e, core_params), 793 ICPU(0x3f, core_params), 794 ICPU(0x45, core_params), 795 ICPU(0x46, core_params), 796 ICPU(0x47, core_params), 797 ICPU(0x4c, byt_params), 798 ICPU(0x4f, core_params), 799 ICPU(0x56, core_params), 800 {} 801 }; 802 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); 803 804 static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = { 805 ICPU(0x56, core_params), 806 {} 807 }; 808 809 static int intel_pstate_init_cpu(unsigned int cpunum) 810 { 811 struct cpudata *cpu; 812 813 if (!all_cpu_data[cpunum]) 814 all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), 815 GFP_KERNEL); 816 if (!all_cpu_data[cpunum]) 817 return -ENOMEM; 818 819 cpu = all_cpu_data[cpunum]; 820 821 cpu->cpu = cpunum; 822 intel_pstate_get_cpu_pstates(cpu); 823 824 init_timer_deferrable(&cpu->timer); 825 cpu->timer.data = (unsigned long)cpu; 826 cpu->timer.expires = jiffies + HZ/100; 827 828 if (!hwp_active) 829 cpu->timer.function = intel_pstate_timer_func; 830 else 831 cpu->timer.function = intel_hwp_timer_func; 832 833 intel_pstate_busy_pid_reset(cpu); 834 intel_pstate_sample(cpu); 835 836 add_timer_on(&cpu->timer, cpunum); 837 838 pr_debug("Intel pstate controlling: cpu %d\n", cpunum); 839 840 return 0; 841 } 842 843 static unsigned int intel_pstate_get(unsigned int cpu_num) 844 { 845 struct sample *sample; 846 struct cpudata *cpu; 847 848 cpu = all_cpu_data[cpu_num]; 849 if (!cpu) 850 return 0; 851 sample = &cpu->sample; 852 return sample->freq; 853 } 854 855 static int intel_pstate_set_policy(struct cpufreq_policy *policy) 856 { 857 if (!policy->cpuinfo.max_freq) 858 return -ENODEV; 859 860 if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { 861 limits.min_perf_pct = 100; 862 limits.min_perf = int_tofp(1); 863 limits.max_policy_pct = 100; 864 limits.max_perf_pct = 100; 865 limits.max_perf = int_tofp(1); 866 limits.no_turbo = 0; 867 return 0; 868 } 869 870 limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq; 871 limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100); 872 limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); 873 874 limits.max_policy_pct = (policy->max * 100) / policy->cpuinfo.max_freq; 875 limits.max_policy_pct = clamp_t(int, limits.max_policy_pct, 0 , 100); 876 limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); 877 limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); 878 879 if (hwp_active) 880 intel_pstate_hwp_set(); 881 882 return 0; 883 } 884 885 static int intel_pstate_verify_policy(struct cpufreq_policy *policy) 886 { 887 cpufreq_verify_within_cpu_limits(policy); 888 889 if (policy->policy != CPUFREQ_POLICY_POWERSAVE && 890 policy->policy != CPUFREQ_POLICY_PERFORMANCE) 891 return -EINVAL; 892 893 return 0; 894 } 895 896 static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) 897 { 898 int cpu_num = policy->cpu; 899 struct cpudata *cpu = all_cpu_data[cpu_num]; 900 901 pr_info("intel_pstate CPU %d exiting\n", cpu_num); 902 903 del_timer_sync(&all_cpu_data[cpu_num]->timer); 904 if (hwp_active) 905 return; 906 907 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate); 908 } 909 910 static int intel_pstate_cpu_init(struct cpufreq_policy *policy) 911 { 912 struct cpudata *cpu; 913 int rc; 914 915 rc = intel_pstate_init_cpu(policy->cpu); 916 if (rc) 917 return rc; 918 919 cpu = all_cpu_data[policy->cpu]; 920 921 if (limits.min_perf_pct == 100 && limits.max_perf_pct == 100) 922 policy->policy = CPUFREQ_POLICY_PERFORMANCE; 923 else 924 policy->policy = CPUFREQ_POLICY_POWERSAVE; 925 926 policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; 927 policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; 928 929 /* cpuinfo and default policy values */ 930 policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; 931 policy->cpuinfo.max_freq = 932 cpu->pstate.turbo_pstate * cpu->pstate.scaling; 933 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; 934 cpumask_set_cpu(policy->cpu, policy->cpus); 935 936 return 0; 937 } 938 939 static struct cpufreq_driver intel_pstate_driver = { 940 .flags = CPUFREQ_CONST_LOOPS, 941 .verify = intel_pstate_verify_policy, 942 .setpolicy = intel_pstate_set_policy, 943 .get = intel_pstate_get, 944 .init = intel_pstate_cpu_init, 945 .stop_cpu = intel_pstate_stop_cpu, 946 .name = "intel_pstate", 947 }; 948 949 static int __initdata no_load; 950 static int __initdata no_hwp; 951 952 static int intel_pstate_msrs_not_valid(void) 953 { 954 /* Check that all the msr's we are using are valid. */ 955 u64 aperf, mperf, tmp; 956 957 rdmsrl(MSR_IA32_APERF, aperf); 958 rdmsrl(MSR_IA32_MPERF, mperf); 959 960 if (!pstate_funcs.get_max() || 961 !pstate_funcs.get_min() || 962 !pstate_funcs.get_turbo()) 963 return -ENODEV; 964 965 rdmsrl(MSR_IA32_APERF, tmp); 966 if (!(tmp - aperf)) 967 return -ENODEV; 968 969 rdmsrl(MSR_IA32_MPERF, tmp); 970 if (!(tmp - mperf)) 971 return -ENODEV; 972 973 return 0; 974 } 975 976 static void copy_pid_params(struct pstate_adjust_policy *policy) 977 { 978 pid_params.sample_rate_ms = policy->sample_rate_ms; 979 pid_params.p_gain_pct = policy->p_gain_pct; 980 pid_params.i_gain_pct = policy->i_gain_pct; 981 pid_params.d_gain_pct = policy->d_gain_pct; 982 pid_params.deadband = policy->deadband; 983 pid_params.setpoint = policy->setpoint; 984 } 985 986 static void copy_cpu_funcs(struct pstate_funcs *funcs) 987 { 988 pstate_funcs.get_max = funcs->get_max; 989 pstate_funcs.get_min = funcs->get_min; 990 pstate_funcs.get_turbo = funcs->get_turbo; 991 pstate_funcs.get_scaling = funcs->get_scaling; 992 pstate_funcs.set = funcs->set; 993 pstate_funcs.get_vid = funcs->get_vid; 994 } 995 996 #if IS_ENABLED(CONFIG_ACPI) 997 #include <acpi/processor.h> 998 999 static bool intel_pstate_no_acpi_pss(void) 1000 { 1001 int i; 1002 1003 for_each_possible_cpu(i) { 1004 acpi_status status; 1005 union acpi_object *pss; 1006 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 1007 struct acpi_processor *pr = per_cpu(processors, i); 1008 1009 if (!pr) 1010 continue; 1011 1012 status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer); 1013 if (ACPI_FAILURE(status)) 1014 continue; 1015 1016 pss = buffer.pointer; 1017 if (pss && pss->type == ACPI_TYPE_PACKAGE) { 1018 kfree(pss); 1019 return false; 1020 } 1021 1022 kfree(pss); 1023 } 1024 1025 return true; 1026 } 1027 1028 static bool intel_pstate_has_acpi_ppc(void) 1029 { 1030 int i; 1031 1032 for_each_possible_cpu(i) { 1033 struct acpi_processor *pr = per_cpu(processors, i); 1034 1035 if (!pr) 1036 continue; 1037 if (acpi_has_method(pr->handle, "_PPC")) 1038 return true; 1039 } 1040 return false; 1041 } 1042 1043 enum { 1044 PSS, 1045 PPC, 1046 }; 1047 1048 struct hw_vendor_info { 1049 u16 valid; 1050 char oem_id[ACPI_OEM_ID_SIZE]; 1051 char oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; 1052 int oem_pwr_table; 1053 }; 1054 1055 /* Hardware vendor-specific info that has its own power management modes */ 1056 static struct hw_vendor_info vendor_info[] = { 1057 {1, "HP ", "ProLiant", PSS}, 1058 {1, "ORACLE", "X4-2 ", PPC}, 1059 {1, "ORACLE", "X4-2L ", PPC}, 1060 {1, "ORACLE", "X4-2B ", PPC}, 1061 {1, "ORACLE", "X3-2 ", PPC}, 1062 {1, "ORACLE", "X3-2L ", PPC}, 1063 {1, "ORACLE", "X3-2B ", PPC}, 1064 {1, "ORACLE", "X4470M2 ", PPC}, 1065 {1, "ORACLE", "X4270M3 ", PPC}, 1066 {1, "ORACLE", "X4270M2 ", PPC}, 1067 {1, "ORACLE", "X4170M2 ", PPC}, 1068 {0, "", ""}, 1069 }; 1070 1071 static bool intel_pstate_platform_pwr_mgmt_exists(void) 1072 { 1073 struct acpi_table_header hdr; 1074 struct hw_vendor_info *v_info; 1075 const struct x86_cpu_id *id; 1076 u64 misc_pwr; 1077 1078 id = x86_match_cpu(intel_pstate_cpu_oob_ids); 1079 if (id) { 1080 rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr); 1081 if ( misc_pwr & (1 << 8)) 1082 return true; 1083 } 1084 1085 if (acpi_disabled || 1086 ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr))) 1087 return false; 1088 1089 for (v_info = vendor_info; v_info->valid; v_info++) { 1090 if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) && 1091 !strncmp(hdr.oem_table_id, v_info->oem_table_id, 1092 ACPI_OEM_TABLE_ID_SIZE)) 1093 switch (v_info->oem_pwr_table) { 1094 case PSS: 1095 return intel_pstate_no_acpi_pss(); 1096 case PPC: 1097 return intel_pstate_has_acpi_ppc(); 1098 } 1099 } 1100 1101 return false; 1102 } 1103 #else /* CONFIG_ACPI not enabled */ 1104 static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; } 1105 static inline bool intel_pstate_has_acpi_ppc(void) { return false; } 1106 #endif /* CONFIG_ACPI */ 1107 1108 static int __init intel_pstate_init(void) 1109 { 1110 int cpu, rc = 0; 1111 const struct x86_cpu_id *id; 1112 struct cpu_defaults *cpu_info; 1113 struct cpuinfo_x86 *c = &boot_cpu_data; 1114 1115 if (no_load) 1116 return -ENODEV; 1117 1118 id = x86_match_cpu(intel_pstate_cpu_ids); 1119 if (!id) 1120 return -ENODEV; 1121 1122 /* 1123 * The Intel pstate driver will be ignored if the platform 1124 * firmware has its own power management modes. 1125 */ 1126 if (intel_pstate_platform_pwr_mgmt_exists()) 1127 return -ENODEV; 1128 1129 cpu_info = (struct cpu_defaults *)id->driver_data; 1130 1131 copy_pid_params(&cpu_info->pid_policy); 1132 copy_cpu_funcs(&cpu_info->funcs); 1133 1134 if (intel_pstate_msrs_not_valid()) 1135 return -ENODEV; 1136 1137 pr_info("Intel P-state driver initializing.\n"); 1138 1139 all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); 1140 if (!all_cpu_data) 1141 return -ENOMEM; 1142 1143 if (cpu_has(c,X86_FEATURE_HWP) && !no_hwp) 1144 intel_pstate_hwp_enable(); 1145 1146 rc = cpufreq_register_driver(&intel_pstate_driver); 1147 if (rc) 1148 goto out; 1149 1150 intel_pstate_debug_expose_params(); 1151 intel_pstate_sysfs_expose_params(); 1152 1153 return rc; 1154 out: 1155 get_online_cpus(); 1156 for_each_online_cpu(cpu) { 1157 if (all_cpu_data[cpu]) { 1158 del_timer_sync(&all_cpu_data[cpu]->timer); 1159 kfree(all_cpu_data[cpu]); 1160 } 1161 } 1162 1163 put_online_cpus(); 1164 vfree(all_cpu_data); 1165 return -ENODEV; 1166 } 1167 device_initcall(intel_pstate_init); 1168 1169 static int __init intel_pstate_setup(char *str) 1170 { 1171 if (!str) 1172 return -EINVAL; 1173 1174 if (!strcmp(str, "disable")) 1175 no_load = 1; 1176 if (!strcmp(str, "no_hwp")) 1177 no_hwp = 1; 1178 return 0; 1179 } 1180 early_param("intel_pstate", intel_pstate_setup); 1181 1182 MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>"); 1183 MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors"); 1184 MODULE_LICENSE("GPL"); 1185