1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2009-2010 Intel Corporation 4 * 5 * Authors: 6 * Jesse Barnes <jbarnes@virtuousgeek.org> 7 */ 8 9 /* 10 * Some Intel Ibex Peak based platforms support so-called "intelligent 11 * power sharing", which allows the CPU and GPU to cooperate to maximize 12 * performance within a given TDP (thermal design point). This driver 13 * performs the coordination between the CPU and GPU, monitors thermal and 14 * power statistics in the platform, and initializes power monitoring 15 * hardware. It also provides a few tunables to control behavior. Its 16 * primary purpose is to safely allow CPU and GPU turbo modes to be enabled 17 * by tracking power and thermal budget; secondarily it can boost turbo 18 * performance by allocating more power or thermal budget to the CPU or GPU 19 * based on available headroom and activity. 20 * 21 * The basic algorithm is driven by a 5s moving average of temperature. If 22 * thermal headroom is available, the CPU and/or GPU power clamps may be 23 * adjusted upwards. If we hit the thermal ceiling or a thermal trigger, 24 * we scale back the clamp. Aside from trigger events (when we're critically 25 * close or over our TDP) we don't adjust the clamps more than once every 26 * five seconds. 27 * 28 * The thermal device (device 31, function 6) has a set of registers that 29 * are updated by the ME firmware. The ME should also take the clamp values 30 * written to those registers and write them to the CPU, but we currently 31 * bypass that functionality and write the CPU MSR directly. 32 * 33 * UNSUPPORTED: 34 * - dual MCP configs 35 * 36 * TODO: 37 * - handle CPU hotplug 38 * - provide turbo enable/disable api 39 * 40 * Related documents: 41 * - CDI 403777, 403778 - Auburndale EDS vol 1 & 2 42 * - CDI 401376 - Ibex Peak EDS 43 * - ref 26037, 26641 - IPS BIOS spec 44 * - ref 26489 - Nehalem BIOS writer's guide 45 * - ref 26921 - Ibex Peak BIOS Specification 46 */ 47 48 #include <linux/debugfs.h> 49 #include <linux/delay.h> 50 #include <linux/interrupt.h> 51 #include <linux/kernel.h> 52 #include <linux/kthread.h> 53 #include <linux/module.h> 54 #include <linux/pci.h> 55 #include <linux/sched.h> 56 #include <linux/sched/loadavg.h> 57 #include <linux/seq_file.h> 58 #include <linux/string.h> 59 #include <linux/tick.h> 60 #include <linux/timer.h> 61 #include <linux/dmi.h> 62 #include <drm/intel/i915_drm.h> 63 #include <asm/msr.h> 64 #include <asm/processor.h> 65 #include <asm/cpu_device_id.h> 66 #include "intel_ips.h" 67 68 #include <linux/io-64-nonatomic-lo-hi.h> 69 70 #define PCI_DEVICE_ID_INTEL_THERMAL_SENSOR 0x3b32 71 72 /* 73 * Package level MSRs for monitor/control 74 */ 75 #define PLATFORM_INFO 0xce 76 #define PLATFORM_TDP (1<<29) 77 #define PLATFORM_RATIO (1<<28) 78 79 #define IA32_MISC_ENABLE 0x1a0 80 #define IA32_MISC_TURBO_EN (1ULL<<38) 81 82 #define TURBO_POWER_CURRENT_LIMIT 0x1ac 83 #define TURBO_TDC_OVR_EN (1UL<<31) 84 #define TURBO_TDC_MASK (0x000000007fff0000UL) 85 #define TURBO_TDC_SHIFT (16) 86 #define TURBO_TDP_OVR_EN (1UL<<15) 87 #define TURBO_TDP_MASK (0x0000000000003fffUL) 88 89 /* 90 * Core/thread MSRs for monitoring 91 */ 92 #define IA32_PERF_CTL 0x199 93 #define IA32_PERF_TURBO_DIS (1ULL<<32) 94 95 /* 96 * Thermal PCI device regs 97 */ 98 #define THM_CFG_TBAR 0x10 99 #define THM_CFG_TBAR_HI 0x14 100 101 #define THM_TSIU 0x00 102 #define THM_TSE 0x01 103 #define TSE_EN 0xb8 104 #define THM_TSS 0x02 105 #define THM_TSTR 0x03 106 #define THM_TSTTP 0x04 107 #define THM_TSCO 0x08 108 #define THM_TSES 0x0c 109 #define THM_TSGPEN 0x0d 110 #define TSGPEN_HOT_LOHI (1<<1) 111 #define TSGPEN_CRIT_LOHI (1<<2) 112 #define THM_TSPC 0x0e 113 #define THM_PPEC 0x10 114 #define THM_CTA 0x12 115 #define THM_PTA 0x14 116 #define PTA_SLOPE_MASK (0xff00) 117 #define PTA_SLOPE_SHIFT 8 118 #define PTA_OFFSET_MASK (0x00ff) 119 #define THM_MGTA 0x16 120 #define MGTA_SLOPE_MASK (0xff00) 121 #define MGTA_SLOPE_SHIFT 8 122 #define MGTA_OFFSET_MASK (0x00ff) 123 #define THM_TRC 0x1a 124 #define TRC_CORE2_EN (1<<15) 125 #define TRC_THM_EN (1<<12) 126 #define TRC_C6_WAR (1<<8) 127 #define TRC_CORE1_EN (1<<7) 128 #define TRC_CORE_PWR (1<<6) 129 #define TRC_PCH_EN (1<<5) 130 #define TRC_MCH_EN (1<<4) 131 #define TRC_DIMM4 (1<<3) 132 #define TRC_DIMM3 (1<<2) 133 #define TRC_DIMM2 (1<<1) 134 #define TRC_DIMM1 (1<<0) 135 #define THM_TES 0x20 136 #define THM_TEN 0x21 137 #define TEN_UPDATE_EN 1 138 #define THM_PSC 0x24 139 #define PSC_NTG (1<<0) /* No GFX turbo support */ 140 #define PSC_NTPC (1<<1) /* No CPU turbo support */ 141 #define PSC_PP_DEF (0<<2) /* Perf policy up to driver */ 142 #define PSP_PP_PC (1<<2) /* BIOS prefers CPU perf */ 143 #define PSP_PP_BAL (2<<2) /* BIOS wants balanced perf */ 144 #define PSP_PP_GFX (3<<2) /* BIOS prefers GFX perf */ 145 #define PSP_PBRT (1<<4) /* BIOS run time support */ 146 #define THM_CTV1 0x30 147 #define CTV_TEMP_ERROR (1<<15) 148 #define CTV_TEMP_MASK 0x3f 149 #define CTV_ 150 #define THM_CTV2 0x32 151 #define THM_CEC 0x34 /* undocumented power accumulator in joules */ 152 #define THM_AE 0x3f 153 #define THM_HTS 0x50 /* 32 bits */ 154 #define HTS_PCPL_MASK (0x7fe00000) 155 #define HTS_PCPL_SHIFT 21 156 #define HTS_GPL_MASK (0x001ff000) 157 #define HTS_GPL_SHIFT 12 158 #define HTS_PP_MASK (0x00000c00) 159 #define HTS_PP_SHIFT 10 160 #define HTS_PP_DEF 0 161 #define HTS_PP_PROC 1 162 #define HTS_PP_BAL 2 163 #define HTS_PP_GFX 3 164 #define HTS_PCTD_DIS (1<<9) 165 #define HTS_GTD_DIS (1<<8) 166 #define HTS_PTL_MASK (0x000000fe) 167 #define HTS_PTL_SHIFT 1 168 #define HTS_NVV (1<<0) 169 #define THM_HTSHI 0x54 /* 16 bits */ 170 #define HTS2_PPL_MASK (0x03ff) 171 #define HTS2_PRST_MASK (0x3c00) 172 #define HTS2_PRST_SHIFT 10 173 #define HTS2_PRST_UNLOADED 0 174 #define HTS2_PRST_RUNNING 1 175 #define HTS2_PRST_TDISOP 2 /* turbo disabled due to power */ 176 #define HTS2_PRST_TDISHT 3 /* turbo disabled due to high temp */ 177 #define HTS2_PRST_TDISUSR 4 /* user disabled turbo */ 178 #define HTS2_PRST_TDISPLAT 5 /* platform disabled turbo */ 179 #define HTS2_PRST_TDISPM 6 /* power management disabled turbo */ 180 #define HTS2_PRST_TDISERR 7 /* some kind of error disabled turbo */ 181 #define THM_PTL 0x56 182 #define THM_MGTV 0x58 183 #define TV_MASK 0x000000000000ff00 184 #define TV_SHIFT 8 185 #define THM_PTV 0x60 186 #define PTV_MASK 0x00ff 187 #define THM_MMGPC 0x64 188 #define THM_MPPC 0x66 189 #define THM_MPCPC 0x68 190 #define THM_TSPIEN 0x82 191 #define TSPIEN_AUX_LOHI (1<<0) 192 #define TSPIEN_HOT_LOHI (1<<1) 193 #define TSPIEN_CRIT_LOHI (1<<2) 194 #define TSPIEN_AUX2_LOHI (1<<3) 195 #define THM_TSLOCK 0x83 196 #define THM_ATR 0x84 197 #define THM_TOF 0x87 198 #define THM_STS 0x98 199 #define STS_PCPL_MASK (0x7fe00000) 200 #define STS_PCPL_SHIFT 21 201 #define STS_GPL_MASK (0x001ff000) 202 #define STS_GPL_SHIFT 12 203 #define STS_PP_MASK (0x00000c00) 204 #define STS_PP_SHIFT 10 205 #define STS_PP_DEF 0 206 #define STS_PP_PROC 1 207 #define STS_PP_BAL 2 208 #define STS_PP_GFX 3 209 #define STS_PCTD_DIS (1<<9) 210 #define STS_GTD_DIS (1<<8) 211 #define STS_PTL_MASK (0x000000fe) 212 #define STS_PTL_SHIFT 1 213 #define STS_NVV (1<<0) 214 #define THM_SEC 0x9c 215 #define SEC_ACK (1<<0) 216 #define THM_TC3 0xa4 217 #define THM_TC1 0xa8 218 #define STS_PPL_MASK (0x0003ff00) 219 #define STS_PPL_SHIFT 16 220 #define THM_TC2 0xac 221 #define THM_DTV 0xb0 222 #define THM_ITV 0xd8 223 #define ITV_ME_SEQNO_MASK 0x00ff0000 /* ME should update every ~200ms */ 224 #define ITV_ME_SEQNO_SHIFT (16) 225 #define ITV_MCH_TEMP_MASK 0x0000ff00 226 #define ITV_MCH_TEMP_SHIFT (8) 227 #define ITV_PCH_TEMP_MASK 0x000000ff 228 229 #define thm_readb(off) readb(ips->regmap + (off)) 230 #define thm_readw(off) readw(ips->regmap + (off)) 231 #define thm_readl(off) readl(ips->regmap + (off)) 232 #define thm_readq(off) readq(ips->regmap + (off)) 233 234 #define thm_writeb(off, val) writeb((val), ips->regmap + (off)) 235 #define thm_writew(off, val) writew((val), ips->regmap + (off)) 236 #define thm_writel(off, val) writel((val), ips->regmap + (off)) 237 238 static const int IPS_ADJUST_PERIOD = 5000; /* ms */ 239 static bool late_i915_load = false; 240 241 /* For initial average collection */ 242 static const int IPS_SAMPLE_PERIOD = 200; /* ms */ 243 static const int IPS_SAMPLE_WINDOW = 5000; /* 5s moving window of samples */ 244 #define IPS_SAMPLE_COUNT (IPS_SAMPLE_WINDOW / IPS_SAMPLE_PERIOD) 245 246 /* Per-SKU limits */ 247 struct ips_mcp_limits { 248 int mcp_power_limit; /* mW units */ 249 int core_power_limit; 250 int mch_power_limit; 251 int core_temp_limit; /* degrees C */ 252 int mch_temp_limit; 253 }; 254 255 /* Max temps are -10 degrees C to avoid PROCHOT# */ 256 257 static struct ips_mcp_limits ips_sv_limits = { 258 .mcp_power_limit = 35000, 259 .core_power_limit = 29000, 260 .mch_power_limit = 20000, 261 .core_temp_limit = 95, 262 .mch_temp_limit = 90 263 }; 264 265 static struct ips_mcp_limits ips_lv_limits = { 266 .mcp_power_limit = 25000, 267 .core_power_limit = 21000, 268 .mch_power_limit = 13000, 269 .core_temp_limit = 95, 270 .mch_temp_limit = 90 271 }; 272 273 static struct ips_mcp_limits ips_ulv_limits = { 274 .mcp_power_limit = 18000, 275 .core_power_limit = 14000, 276 .mch_power_limit = 11000, 277 .core_temp_limit = 95, 278 .mch_temp_limit = 90 279 }; 280 281 struct ips_driver { 282 struct device *dev; 283 void __iomem *regmap; 284 int irq; 285 286 struct task_struct *monitor; 287 struct task_struct *adjust; 288 struct dentry *debug_root; 289 struct timer_list timer; 290 291 /* Average CPU core temps (all averages in .01 degrees C for precision) */ 292 u16 ctv1_avg_temp; 293 u16 ctv2_avg_temp; 294 /* GMCH average */ 295 u16 mch_avg_temp; 296 /* Average for the CPU (both cores?) */ 297 u16 mcp_avg_temp; 298 /* Average power consumption (in mW) */ 299 u32 cpu_avg_power; 300 u32 mch_avg_power; 301 302 /* Offset values */ 303 u16 cta_val; 304 u16 pta_val; 305 u16 mgta_val; 306 307 /* Maximums & prefs, protected by turbo status lock */ 308 spinlock_t turbo_status_lock; 309 u16 mcp_temp_limit; 310 u16 mcp_power_limit; 311 u16 core_power_limit; 312 u16 mch_power_limit; 313 bool cpu_turbo_enabled; 314 bool __cpu_turbo_on; 315 bool gpu_turbo_enabled; 316 bool __gpu_turbo_on; 317 bool gpu_preferred; 318 bool poll_turbo_status; 319 bool second_cpu; 320 bool turbo_toggle_allowed; 321 struct ips_mcp_limits *limits; 322 323 /* Optional MCH interfaces for if i915 is in use */ 324 unsigned long (*read_mch_val)(void); 325 bool (*gpu_raise)(void); 326 bool (*gpu_lower)(void); 327 bool (*gpu_busy)(void); 328 bool (*gpu_turbo_disable)(void); 329 330 /* For restoration at unload */ 331 u64 orig_turbo_limit; 332 u64 orig_turbo_ratios; 333 }; 334 335 static bool 336 ips_gpu_turbo_enabled(struct ips_driver *ips); 337 338 /** 339 * ips_cpu_busy - is CPU busy? 340 * @ips: IPS driver struct 341 * 342 * Check CPU for load to see whether we should increase its thermal budget. 343 * 344 * RETURNS: 345 * True if the CPU could use more power, false otherwise. 346 */ 347 static bool ips_cpu_busy(struct ips_driver *ips) 348 { 349 if ((avenrun[0] >> FSHIFT) > 1) 350 return true; 351 352 return false; 353 } 354 355 /** 356 * ips_cpu_raise - raise CPU power clamp 357 * @ips: IPS driver struct 358 * 359 * Raise the CPU power clamp by %IPS_CPU_STEP, in accordance with TDP for 360 * this platform. 361 * 362 * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR upwards (as 363 * long as we haven't hit the TDP limit for the SKU). 364 */ 365 static void ips_cpu_raise(struct ips_driver *ips) 366 { 367 u64 turbo_override; 368 u16 cur_tdp_limit, new_tdp_limit; 369 370 if (!ips->cpu_turbo_enabled) 371 return; 372 373 rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override); 374 375 cur_tdp_limit = turbo_override & TURBO_TDP_MASK; 376 new_tdp_limit = cur_tdp_limit + 8; /* 1W increase */ 377 378 /* Clamp to SKU TDP limit */ 379 if (((new_tdp_limit * 10) / 8) > ips->core_power_limit) 380 new_tdp_limit = cur_tdp_limit; 381 382 thm_writew(THM_MPCPC, (new_tdp_limit * 10) / 8); 383 384 turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN; 385 wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override); 386 387 turbo_override &= ~TURBO_TDP_MASK; 388 turbo_override |= new_tdp_limit; 389 390 wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override); 391 } 392 393 /** 394 * ips_cpu_lower - lower CPU power clamp 395 * @ips: IPS driver struct 396 * 397 * Lower CPU power clamp b %IPS_CPU_STEP if possible. 398 * 399 * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR down, going 400 * as low as the platform limits will allow (though we could go lower there 401 * wouldn't be much point). 402 */ 403 static void ips_cpu_lower(struct ips_driver *ips) 404 { 405 u64 turbo_override; 406 u16 cur_limit, new_limit; 407 408 rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override); 409 410 cur_limit = turbo_override & TURBO_TDP_MASK; 411 new_limit = cur_limit - 8; /* 1W decrease */ 412 413 /* Clamp to SKU TDP limit */ 414 if (new_limit < (ips->orig_turbo_limit & TURBO_TDP_MASK)) 415 new_limit = ips->orig_turbo_limit & TURBO_TDP_MASK; 416 417 thm_writew(THM_MPCPC, (new_limit * 10) / 8); 418 419 turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN; 420 wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override); 421 422 turbo_override &= ~TURBO_TDP_MASK; 423 turbo_override |= new_limit; 424 425 wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override); 426 } 427 428 /** 429 * do_enable_cpu_turbo - internal turbo enable function 430 * @data: unused 431 * 432 * Internal function for actually updating MSRs. When we enable/disable 433 * turbo, we need to do it on each CPU; this function is the one called 434 * by on_each_cpu() when needed. 435 */ 436 static void do_enable_cpu_turbo(void *data) 437 { 438 u64 perf_ctl; 439 440 rdmsrl(IA32_PERF_CTL, perf_ctl); 441 if (perf_ctl & IA32_PERF_TURBO_DIS) { 442 perf_ctl &= ~IA32_PERF_TURBO_DIS; 443 wrmsrl(IA32_PERF_CTL, perf_ctl); 444 } 445 } 446 447 /** 448 * ips_enable_cpu_turbo - enable turbo mode on all CPUs 449 * @ips: IPS driver struct 450 * 451 * Enable turbo mode by clearing the disable bit in IA32_PERF_CTL on 452 * all logical threads. 453 */ 454 static void ips_enable_cpu_turbo(struct ips_driver *ips) 455 { 456 /* Already on, no need to mess with MSRs */ 457 if (ips->__cpu_turbo_on) 458 return; 459 460 if (ips->turbo_toggle_allowed) 461 on_each_cpu(do_enable_cpu_turbo, ips, 1); 462 463 ips->__cpu_turbo_on = true; 464 } 465 466 /** 467 * do_disable_cpu_turbo - internal turbo disable function 468 * @data: unused 469 * 470 * Internal function for actually updating MSRs. When we enable/disable 471 * turbo, we need to do it on each CPU; this function is the one called 472 * by on_each_cpu() when needed. 473 */ 474 static void do_disable_cpu_turbo(void *data) 475 { 476 u64 perf_ctl; 477 478 rdmsrl(IA32_PERF_CTL, perf_ctl); 479 if (!(perf_ctl & IA32_PERF_TURBO_DIS)) { 480 perf_ctl |= IA32_PERF_TURBO_DIS; 481 wrmsrl(IA32_PERF_CTL, perf_ctl); 482 } 483 } 484 485 /** 486 * ips_disable_cpu_turbo - disable turbo mode on all CPUs 487 * @ips: IPS driver struct 488 * 489 * Disable turbo mode by setting the disable bit in IA32_PERF_CTL on 490 * all logical threads. 491 */ 492 static void ips_disable_cpu_turbo(struct ips_driver *ips) 493 { 494 /* Already off, leave it */ 495 if (!ips->__cpu_turbo_on) 496 return; 497 498 if (ips->turbo_toggle_allowed) 499 on_each_cpu(do_disable_cpu_turbo, ips, 1); 500 501 ips->__cpu_turbo_on = false; 502 } 503 504 /** 505 * ips_gpu_busy - is GPU busy? 506 * @ips: IPS driver struct 507 * 508 * Check GPU for load to see whether we should increase its thermal budget. 509 * We need to call into the i915 driver in this case. 510 * 511 * RETURNS: 512 * True if the GPU could use more power, false otherwise. 513 */ 514 static bool ips_gpu_busy(struct ips_driver *ips) 515 { 516 if (!ips_gpu_turbo_enabled(ips)) 517 return false; 518 519 return ips->gpu_busy(); 520 } 521 522 /** 523 * ips_gpu_raise - raise GPU power clamp 524 * @ips: IPS driver struct 525 * 526 * Raise the GPU frequency/power if possible. We need to call into the 527 * i915 driver in this case. 528 */ 529 static void ips_gpu_raise(struct ips_driver *ips) 530 { 531 if (!ips_gpu_turbo_enabled(ips)) 532 return; 533 534 if (!ips->gpu_raise()) 535 ips->gpu_turbo_enabled = false; 536 537 return; 538 } 539 540 /** 541 * ips_gpu_lower - lower GPU power clamp 542 * @ips: IPS driver struct 543 * 544 * Lower GPU frequency/power if possible. Need to call i915. 545 */ 546 static void ips_gpu_lower(struct ips_driver *ips) 547 { 548 if (!ips_gpu_turbo_enabled(ips)) 549 return; 550 551 if (!ips->gpu_lower()) 552 ips->gpu_turbo_enabled = false; 553 554 return; 555 } 556 557 /** 558 * ips_enable_gpu_turbo - notify the gfx driver turbo is available 559 * @ips: IPS driver struct 560 * 561 * Call into the graphics driver indicating that it can safely use 562 * turbo mode. 563 */ 564 static void ips_enable_gpu_turbo(struct ips_driver *ips) 565 { 566 if (ips->__gpu_turbo_on) 567 return; 568 ips->__gpu_turbo_on = true; 569 } 570 571 /** 572 * ips_disable_gpu_turbo - notify the gfx driver to disable turbo mode 573 * @ips: IPS driver struct 574 * 575 * Request that the graphics driver disable turbo mode. 576 */ 577 static void ips_disable_gpu_turbo(struct ips_driver *ips) 578 { 579 /* Avoid calling i915 if turbo is already disabled */ 580 if (!ips->__gpu_turbo_on) 581 return; 582 583 if (!ips->gpu_turbo_disable()) 584 dev_err(ips->dev, "failed to disable graphics turbo\n"); 585 else 586 ips->__gpu_turbo_on = false; 587 } 588 589 /** 590 * mcp_exceeded - check whether we're outside our thermal & power limits 591 * @ips: IPS driver struct 592 * 593 * Check whether the MCP is over its thermal or power budget. 594 * 595 * Returns: %true if the temp or power has exceeded its maximum, else %false 596 */ 597 static bool mcp_exceeded(struct ips_driver *ips) 598 { 599 unsigned long flags; 600 bool ret = false; 601 u32 temp_limit; 602 u32 avg_power; 603 604 spin_lock_irqsave(&ips->turbo_status_lock, flags); 605 606 temp_limit = ips->mcp_temp_limit * 100; 607 if (ips->mcp_avg_temp > temp_limit) 608 ret = true; 609 610 avg_power = ips->cpu_avg_power + ips->mch_avg_power; 611 if (avg_power > ips->mcp_power_limit) 612 ret = true; 613 614 spin_unlock_irqrestore(&ips->turbo_status_lock, flags); 615 616 return ret; 617 } 618 619 /** 620 * cpu_exceeded - check whether a CPU core is outside its limits 621 * @ips: IPS driver struct 622 * @cpu: CPU number to check 623 * 624 * Check a given CPU's average temp or power is over its limit. 625 * 626 * Returns: %true if the temp or power has exceeded its maximum, else %false 627 */ 628 static bool cpu_exceeded(struct ips_driver *ips, int cpu) 629 { 630 unsigned long flags; 631 int avg; 632 bool ret = false; 633 634 spin_lock_irqsave(&ips->turbo_status_lock, flags); 635 avg = cpu ? ips->ctv2_avg_temp : ips->ctv1_avg_temp; 636 if (avg > (ips->limits->core_temp_limit * 100)) 637 ret = true; 638 if (ips->cpu_avg_power > ips->core_power_limit * 100) 639 ret = true; 640 spin_unlock_irqrestore(&ips->turbo_status_lock, flags); 641 642 if (ret) 643 dev_info(ips->dev, "CPU power or thermal limit exceeded\n"); 644 645 return ret; 646 } 647 648 /** 649 * mch_exceeded - check whether the GPU is over budget 650 * @ips: IPS driver struct 651 * 652 * Check the MCH temp & power against their maximums. 653 * 654 * Returns: %true if the temp or power has exceeded its maximum, else %false 655 */ 656 static bool mch_exceeded(struct ips_driver *ips) 657 { 658 unsigned long flags; 659 bool ret = false; 660 661 spin_lock_irqsave(&ips->turbo_status_lock, flags); 662 if (ips->mch_avg_temp > (ips->limits->mch_temp_limit * 100)) 663 ret = true; 664 if (ips->mch_avg_power > ips->mch_power_limit) 665 ret = true; 666 spin_unlock_irqrestore(&ips->turbo_status_lock, flags); 667 668 return ret; 669 } 670 671 /** 672 * verify_limits - verify BIOS provided limits 673 * @ips: IPS structure 674 * 675 * BIOS can optionally provide non-default limits for power and temp. Check 676 * them here and use the defaults if the BIOS values are not provided or 677 * are otherwise unusable. 678 */ 679 static void verify_limits(struct ips_driver *ips) 680 { 681 if (ips->mcp_power_limit < ips->limits->mcp_power_limit || 682 ips->mcp_power_limit > 35000) 683 ips->mcp_power_limit = ips->limits->mcp_power_limit; 684 685 if (ips->mcp_temp_limit < ips->limits->core_temp_limit || 686 ips->mcp_temp_limit < ips->limits->mch_temp_limit || 687 ips->mcp_temp_limit > 150) 688 ips->mcp_temp_limit = min(ips->limits->core_temp_limit, 689 ips->limits->mch_temp_limit); 690 } 691 692 /** 693 * update_turbo_limits - get various limits & settings from regs 694 * @ips: IPS driver struct 695 * 696 * Update the IPS power & temp limits, along with turbo enable flags, 697 * based on latest register contents. 698 * 699 * Used at init time and for runtime BIOS support, which requires polling 700 * the regs for updates (as a result of AC->DC transition for example). 701 * 702 * LOCKING: 703 * Caller must hold turbo_status_lock (outside of init) 704 */ 705 static void update_turbo_limits(struct ips_driver *ips) 706 { 707 u32 hts = thm_readl(THM_HTS); 708 709 ips->cpu_turbo_enabled = !(hts & HTS_PCTD_DIS); 710 /* 711 * Disable turbo for now, until we can figure out why the power figures 712 * are wrong 713 */ 714 ips->cpu_turbo_enabled = false; 715 716 if (ips->gpu_busy) 717 ips->gpu_turbo_enabled = !(hts & HTS_GTD_DIS); 718 719 ips->core_power_limit = thm_readw(THM_MPCPC); 720 ips->mch_power_limit = thm_readw(THM_MMGPC); 721 ips->mcp_temp_limit = thm_readw(THM_PTL); 722 ips->mcp_power_limit = thm_readw(THM_MPPC); 723 724 verify_limits(ips); 725 /* Ignore BIOS CPU vs GPU pref */ 726 } 727 728 /** 729 * ips_adjust - adjust power clamp based on thermal state 730 * @data: ips driver structure 731 * 732 * Wake up every 5s or so and check whether we should adjust the power clamp. 733 * Check CPU and GPU load to determine which needs adjustment. There are 734 * several things to consider here: 735 * - do we need to adjust up or down? 736 * - is CPU busy? 737 * - is GPU busy? 738 * - is CPU in turbo? 739 * - is GPU in turbo? 740 * - is CPU or GPU preferred? (CPU is default) 741 * 742 * So, given the above, we do the following: 743 * - up (TDP available) 744 * - CPU not busy, GPU not busy - nothing 745 * - CPU busy, GPU not busy - adjust CPU up 746 * - CPU not busy, GPU busy - adjust GPU up 747 * - CPU busy, GPU busy - adjust preferred unit up, taking headroom from 748 * non-preferred unit if necessary 749 * - down (at TDP limit) 750 * - adjust both CPU and GPU down if possible 751 * 752 * |cpu+ gpu+ cpu+gpu- cpu-gpu+ cpu-gpu- 753 * cpu < gpu < |cpu+gpu+ cpu+ gpu+ nothing 754 * cpu < gpu >= |cpu+gpu-(mcp<) cpu+gpu-(mcp<) gpu- gpu- 755 * cpu >= gpu < |cpu-gpu+(mcp<) cpu- cpu-gpu+(mcp<) cpu- 756 * cpu >= gpu >=|cpu-gpu- cpu-gpu- cpu-gpu- cpu-gpu- 757 * 758 * Returns: %0 759 */ 760 static int ips_adjust(void *data) 761 { 762 struct ips_driver *ips = data; 763 unsigned long flags; 764 765 dev_dbg(ips->dev, "starting ips-adjust thread\n"); 766 767 /* 768 * Adjust CPU and GPU clamps every 5s if needed. Doing it more 769 * often isn't recommended due to ME interaction. 770 */ 771 do { 772 bool cpu_busy = ips_cpu_busy(ips); 773 bool gpu_busy = ips_gpu_busy(ips); 774 775 spin_lock_irqsave(&ips->turbo_status_lock, flags); 776 if (ips->poll_turbo_status) 777 update_turbo_limits(ips); 778 spin_unlock_irqrestore(&ips->turbo_status_lock, flags); 779 780 /* Update turbo status if necessary */ 781 if (ips->cpu_turbo_enabled) 782 ips_enable_cpu_turbo(ips); 783 else 784 ips_disable_cpu_turbo(ips); 785 786 if (ips->gpu_turbo_enabled) 787 ips_enable_gpu_turbo(ips); 788 else 789 ips_disable_gpu_turbo(ips); 790 791 /* We're outside our comfort zone, crank them down */ 792 if (mcp_exceeded(ips)) { 793 ips_cpu_lower(ips); 794 ips_gpu_lower(ips); 795 goto sleep; 796 } 797 798 if (!cpu_exceeded(ips, 0) && cpu_busy) 799 ips_cpu_raise(ips); 800 else 801 ips_cpu_lower(ips); 802 803 if (!mch_exceeded(ips) && gpu_busy) 804 ips_gpu_raise(ips); 805 else 806 ips_gpu_lower(ips); 807 808 sleep: 809 schedule_timeout_interruptible(msecs_to_jiffies(IPS_ADJUST_PERIOD)); 810 } while (!kthread_should_stop()); 811 812 dev_dbg(ips->dev, "ips-adjust thread stopped\n"); 813 814 return 0; 815 } 816 817 /* 818 * Helpers for reading out temp/power values and calculating their 819 * averages for the decision making and monitoring functions. 820 */ 821 822 static u16 calc_avg_temp(struct ips_driver *ips, u16 *array) 823 { 824 u64 total = 0; 825 int i; 826 u16 avg; 827 828 for (i = 0; i < IPS_SAMPLE_COUNT; i++) 829 total += (u64)(array[i] * 100); 830 831 do_div(total, IPS_SAMPLE_COUNT); 832 833 avg = (u16)total; 834 835 return avg; 836 } 837 838 static u16 read_mgtv(struct ips_driver *ips) 839 { 840 u16 __maybe_unused ret; 841 u64 slope, offset; 842 u64 val; 843 844 val = thm_readq(THM_MGTV); 845 val = (val & TV_MASK) >> TV_SHIFT; 846 847 slope = offset = thm_readw(THM_MGTA); 848 slope = (slope & MGTA_SLOPE_MASK) >> MGTA_SLOPE_SHIFT; 849 offset = offset & MGTA_OFFSET_MASK; 850 851 ret = ((val * slope + 0x40) >> 7) + offset; 852 853 return 0; /* MCH temp reporting buggy */ 854 } 855 856 static u16 read_ptv(struct ips_driver *ips) 857 { 858 u16 val; 859 860 val = thm_readw(THM_PTV) & PTV_MASK; 861 862 return val; 863 } 864 865 static u16 read_ctv(struct ips_driver *ips, int cpu) 866 { 867 int reg = cpu ? THM_CTV2 : THM_CTV1; 868 u16 val; 869 870 val = thm_readw(reg); 871 if (!(val & CTV_TEMP_ERROR)) 872 val = (val) >> 6; /* discard fractional component */ 873 else 874 val = 0; 875 876 return val; 877 } 878 879 static u32 get_cpu_power(struct ips_driver *ips, u32 *last, int period) 880 { 881 u32 val; 882 u32 ret; 883 884 /* 885 * CEC is in joules/65535. Take difference over time to 886 * get watts. 887 */ 888 val = thm_readl(THM_CEC); 889 890 /* period is in ms and we want mW */ 891 ret = (((val - *last) * 1000) / period); 892 ret = (ret * 1000) / 65535; 893 *last = val; 894 895 return 0; 896 } 897 898 static const u16 temp_decay_factor = 2; 899 static u16 update_average_temp(u16 avg, u16 val) 900 { 901 u16 ret; 902 903 /* Multiply by 100 for extra precision */ 904 ret = (val * 100 / temp_decay_factor) + 905 (((temp_decay_factor - 1) * avg) / temp_decay_factor); 906 return ret; 907 } 908 909 static const u16 power_decay_factor = 2; 910 static u16 update_average_power(u32 avg, u32 val) 911 { 912 u32 ret; 913 914 ret = (val / power_decay_factor) + 915 (((power_decay_factor - 1) * avg) / power_decay_factor); 916 917 return ret; 918 } 919 920 static u32 calc_avg_power(struct ips_driver *ips, u32 *array) 921 { 922 u64 total = 0; 923 u32 avg; 924 int i; 925 926 for (i = 0; i < IPS_SAMPLE_COUNT; i++) 927 total += array[i]; 928 929 do_div(total, IPS_SAMPLE_COUNT); 930 avg = (u32)total; 931 932 return avg; 933 } 934 935 static void monitor_timeout(struct timer_list *t) 936 { 937 struct ips_driver *ips = from_timer(ips, t, timer); 938 wake_up_process(ips->monitor); 939 } 940 941 /** 942 * ips_monitor - temp/power monitoring thread 943 * @data: ips driver structure 944 * 945 * This is the main function for the IPS driver. It monitors power and 946 * temperature in the MCP and adjusts CPU and GPU power clamps accordingly. 947 * 948 * We keep a 5s moving average of power consumption and temperature. Using 949 * that data, along with CPU vs GPU preference, we adjust the power clamps 950 * up or down. 951 * 952 * Returns: %0 on success or -errno on error 953 */ 954 static int ips_monitor(void *data) 955 { 956 struct ips_driver *ips = data; 957 unsigned long seqno_timestamp, expire, last_msecs, last_sample_period; 958 int i; 959 u32 *cpu_samples, *mchp_samples, old_cpu_power; 960 u16 *mcp_samples, *ctv1_samples, *ctv2_samples, *mch_samples; 961 u8 cur_seqno, last_seqno; 962 963 mcp_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u16), GFP_KERNEL); 964 ctv1_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u16), GFP_KERNEL); 965 ctv2_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u16), GFP_KERNEL); 966 mch_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u16), GFP_KERNEL); 967 cpu_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u32), GFP_KERNEL); 968 mchp_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u32), GFP_KERNEL); 969 if (!mcp_samples || !ctv1_samples || !ctv2_samples || !mch_samples || 970 !cpu_samples || !mchp_samples) { 971 dev_err(ips->dev, 972 "failed to allocate sample array, ips disabled\n"); 973 kfree(mcp_samples); 974 kfree(ctv1_samples); 975 kfree(ctv2_samples); 976 kfree(mch_samples); 977 kfree(cpu_samples); 978 kfree(mchp_samples); 979 return -ENOMEM; 980 } 981 982 last_seqno = (thm_readl(THM_ITV) & ITV_ME_SEQNO_MASK) >> 983 ITV_ME_SEQNO_SHIFT; 984 seqno_timestamp = get_jiffies_64(); 985 986 old_cpu_power = thm_readl(THM_CEC); 987 schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD)); 988 989 /* Collect an initial average */ 990 for (i = 0; i < IPS_SAMPLE_COUNT; i++) { 991 u32 mchp, cpu_power; 992 u16 val; 993 994 mcp_samples[i] = read_ptv(ips); 995 996 val = read_ctv(ips, 0); 997 ctv1_samples[i] = val; 998 999 val = read_ctv(ips, 1); 1000 ctv2_samples[i] = val; 1001 1002 val = read_mgtv(ips); 1003 mch_samples[i] = val; 1004 1005 cpu_power = get_cpu_power(ips, &old_cpu_power, 1006 IPS_SAMPLE_PERIOD); 1007 cpu_samples[i] = cpu_power; 1008 1009 if (ips->read_mch_val) { 1010 mchp = ips->read_mch_val(); 1011 mchp_samples[i] = mchp; 1012 } 1013 1014 schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD)); 1015 if (kthread_should_stop()) 1016 break; 1017 } 1018 1019 ips->mcp_avg_temp = calc_avg_temp(ips, mcp_samples); 1020 ips->ctv1_avg_temp = calc_avg_temp(ips, ctv1_samples); 1021 ips->ctv2_avg_temp = calc_avg_temp(ips, ctv2_samples); 1022 ips->mch_avg_temp = calc_avg_temp(ips, mch_samples); 1023 ips->cpu_avg_power = calc_avg_power(ips, cpu_samples); 1024 ips->mch_avg_power = calc_avg_power(ips, mchp_samples); 1025 kfree(mcp_samples); 1026 kfree(ctv1_samples); 1027 kfree(ctv2_samples); 1028 kfree(mch_samples); 1029 kfree(cpu_samples); 1030 kfree(mchp_samples); 1031 1032 /* Start the adjustment thread now that we have data */ 1033 wake_up_process(ips->adjust); 1034 1035 /* 1036 * Ok, now we have an initial avg. From here on out, we track the 1037 * running avg using a decaying average calculation. This allows 1038 * us to reduce the sample frequency if the CPU and GPU are idle. 1039 */ 1040 old_cpu_power = thm_readl(THM_CEC); 1041 schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD)); 1042 last_sample_period = IPS_SAMPLE_PERIOD; 1043 1044 timer_setup(&ips->timer, monitor_timeout, TIMER_DEFERRABLE); 1045 do { 1046 u32 cpu_val, mch_val; 1047 u16 val; 1048 1049 /* MCP itself */ 1050 val = read_ptv(ips); 1051 ips->mcp_avg_temp = update_average_temp(ips->mcp_avg_temp, val); 1052 1053 /* Processor 0 */ 1054 val = read_ctv(ips, 0); 1055 ips->ctv1_avg_temp = 1056 update_average_temp(ips->ctv1_avg_temp, val); 1057 /* Power */ 1058 cpu_val = get_cpu_power(ips, &old_cpu_power, 1059 last_sample_period); 1060 ips->cpu_avg_power = 1061 update_average_power(ips->cpu_avg_power, cpu_val); 1062 1063 if (ips->second_cpu) { 1064 /* Processor 1 */ 1065 val = read_ctv(ips, 1); 1066 ips->ctv2_avg_temp = 1067 update_average_temp(ips->ctv2_avg_temp, val); 1068 } 1069 1070 /* MCH */ 1071 val = read_mgtv(ips); 1072 ips->mch_avg_temp = update_average_temp(ips->mch_avg_temp, val); 1073 /* Power */ 1074 if (ips->read_mch_val) { 1075 mch_val = ips->read_mch_val(); 1076 ips->mch_avg_power = 1077 update_average_power(ips->mch_avg_power, 1078 mch_val); 1079 } 1080 1081 /* 1082 * Make sure ME is updating thermal regs. 1083 * Note: 1084 * If it's been more than a second since the last update, 1085 * the ME is probably hung. 1086 */ 1087 cur_seqno = (thm_readl(THM_ITV) & ITV_ME_SEQNO_MASK) >> 1088 ITV_ME_SEQNO_SHIFT; 1089 if (cur_seqno == last_seqno && 1090 time_after(jiffies, seqno_timestamp + HZ)) { 1091 dev_warn(ips->dev, 1092 "ME failed to update for more than 1s, likely hung\n"); 1093 } else { 1094 seqno_timestamp = get_jiffies_64(); 1095 last_seqno = cur_seqno; 1096 } 1097 1098 last_msecs = jiffies_to_msecs(jiffies); 1099 expire = jiffies + msecs_to_jiffies(IPS_SAMPLE_PERIOD); 1100 1101 __set_current_state(TASK_INTERRUPTIBLE); 1102 mod_timer(&ips->timer, expire); 1103 schedule(); 1104 1105 /* Calculate actual sample period for power averaging */ 1106 last_sample_period = jiffies_to_msecs(jiffies) - last_msecs; 1107 if (!last_sample_period) 1108 last_sample_period = 1; 1109 } while (!kthread_should_stop()); 1110 1111 del_timer_sync(&ips->timer); 1112 1113 dev_dbg(ips->dev, "ips-monitor thread stopped\n"); 1114 1115 return 0; 1116 } 1117 1118 /** 1119 * ips_irq_handler - handle temperature triggers and other IPS events 1120 * @irq: irq number 1121 * @arg: unused 1122 * 1123 * Handle temperature limit trigger events, generally by lowering the clamps. 1124 * If we're at a critical limit, we clamp back to the lowest possible value 1125 * to prevent emergency shutdown. 1126 * 1127 * Returns: IRQ_NONE or IRQ_HANDLED 1128 */ 1129 static irqreturn_t ips_irq_handler(int irq, void *arg) 1130 { 1131 struct ips_driver *ips = arg; 1132 u8 tses = thm_readb(THM_TSES); 1133 u8 tes = thm_readb(THM_TES); 1134 1135 if (!tses && !tes) 1136 return IRQ_NONE; 1137 1138 dev_info(ips->dev, "TSES: 0x%02x\n", tses); 1139 dev_info(ips->dev, "TES: 0x%02x\n", tes); 1140 1141 /* STS update from EC? */ 1142 if (tes & 1) { 1143 u32 sts, tc1; 1144 1145 sts = thm_readl(THM_STS); 1146 tc1 = thm_readl(THM_TC1); 1147 1148 if (sts & STS_NVV) { 1149 spin_lock(&ips->turbo_status_lock); 1150 ips->core_power_limit = (sts & STS_PCPL_MASK) >> 1151 STS_PCPL_SHIFT; 1152 ips->mch_power_limit = (sts & STS_GPL_MASK) >> 1153 STS_GPL_SHIFT; 1154 /* ignore EC CPU vs GPU pref */ 1155 ips->cpu_turbo_enabled = !(sts & STS_PCTD_DIS); 1156 /* 1157 * Disable turbo for now, until we can figure 1158 * out why the power figures are wrong 1159 */ 1160 ips->cpu_turbo_enabled = false; 1161 if (ips->gpu_busy) 1162 ips->gpu_turbo_enabled = !(sts & STS_GTD_DIS); 1163 ips->mcp_temp_limit = (sts & STS_PTL_MASK) >> 1164 STS_PTL_SHIFT; 1165 ips->mcp_power_limit = (tc1 & STS_PPL_MASK) >> 1166 STS_PPL_SHIFT; 1167 verify_limits(ips); 1168 spin_unlock(&ips->turbo_status_lock); 1169 1170 thm_writeb(THM_SEC, SEC_ACK); 1171 } 1172 thm_writeb(THM_TES, tes); 1173 } 1174 1175 /* Thermal trip */ 1176 if (tses) { 1177 dev_warn(ips->dev, "thermal trip occurred, tses: 0x%04x\n", 1178 tses); 1179 thm_writeb(THM_TSES, tses); 1180 } 1181 1182 return IRQ_HANDLED; 1183 } 1184 1185 #ifndef CONFIG_DEBUG_FS 1186 static void ips_debugfs_init(struct ips_driver *ips) { return; } 1187 static void ips_debugfs_cleanup(struct ips_driver *ips) { return; } 1188 #else 1189 1190 /* Expose current state and limits in debugfs if possible */ 1191 1192 static int cpu_temp_show(struct seq_file *m, void *data) 1193 { 1194 struct ips_driver *ips = m->private; 1195 1196 seq_printf(m, "%d.%02d\n", ips->ctv1_avg_temp / 100, 1197 ips->ctv1_avg_temp % 100); 1198 1199 return 0; 1200 } 1201 DEFINE_SHOW_ATTRIBUTE(cpu_temp); 1202 1203 static int cpu_power_show(struct seq_file *m, void *data) 1204 { 1205 struct ips_driver *ips = m->private; 1206 1207 seq_printf(m, "%dmW\n", ips->cpu_avg_power); 1208 1209 return 0; 1210 } 1211 DEFINE_SHOW_ATTRIBUTE(cpu_power); 1212 1213 static int cpu_clamp_show(struct seq_file *m, void *data) 1214 { 1215 u64 turbo_override; 1216 int tdp, tdc; 1217 1218 rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override); 1219 1220 tdp = (int)(turbo_override & TURBO_TDP_MASK); 1221 tdc = (int)((turbo_override & TURBO_TDC_MASK) >> TURBO_TDC_SHIFT); 1222 1223 /* Convert to .1W/A units */ 1224 tdp = tdp * 10 / 8; 1225 tdc = tdc * 10 / 8; 1226 1227 /* Watts Amperes */ 1228 seq_printf(m, "%d.%dW %d.%dA\n", tdp / 10, tdp % 10, 1229 tdc / 10, tdc % 10); 1230 1231 return 0; 1232 } 1233 DEFINE_SHOW_ATTRIBUTE(cpu_clamp); 1234 1235 static int mch_temp_show(struct seq_file *m, void *data) 1236 { 1237 struct ips_driver *ips = m->private; 1238 1239 seq_printf(m, "%d.%02d\n", ips->mch_avg_temp / 100, 1240 ips->mch_avg_temp % 100); 1241 1242 return 0; 1243 } 1244 DEFINE_SHOW_ATTRIBUTE(mch_temp); 1245 1246 static int mch_power_show(struct seq_file *m, void *data) 1247 { 1248 struct ips_driver *ips = m->private; 1249 1250 seq_printf(m, "%dmW\n", ips->mch_avg_power); 1251 1252 return 0; 1253 } 1254 DEFINE_SHOW_ATTRIBUTE(mch_power); 1255 1256 static void ips_debugfs_cleanup(struct ips_driver *ips) 1257 { 1258 debugfs_remove_recursive(ips->debug_root); 1259 } 1260 1261 static void ips_debugfs_init(struct ips_driver *ips) 1262 { 1263 ips->debug_root = debugfs_create_dir("ips", NULL); 1264 1265 debugfs_create_file("cpu_temp", 0444, ips->debug_root, ips, &cpu_temp_fops); 1266 debugfs_create_file("cpu_power", 0444, ips->debug_root, ips, &cpu_power_fops); 1267 debugfs_create_file("cpu_clamp", 0444, ips->debug_root, ips, &cpu_clamp_fops); 1268 debugfs_create_file("mch_temp", 0444, ips->debug_root, ips, &mch_temp_fops); 1269 debugfs_create_file("mch_power", 0444, ips->debug_root, ips, &mch_power_fops); 1270 } 1271 #endif /* CONFIG_DEBUG_FS */ 1272 1273 /** 1274 * ips_detect_cpu - detect whether CPU supports IPS 1275 * @ips: IPS driver struct 1276 * 1277 * Walk our list and see if we're on a supported CPU. If we find one, 1278 * return the limits for it. 1279 * 1280 * Returns: the &ips_mcp_limits struct that matches the boot CPU or %NULL 1281 */ 1282 static struct ips_mcp_limits *ips_detect_cpu(struct ips_driver *ips) 1283 { 1284 u64 turbo_power, misc_en; 1285 struct ips_mcp_limits *limits = NULL; 1286 u16 tdp; 1287 1288 if (!(boot_cpu_data.x86_vfm == INTEL_WESTMERE)) { 1289 dev_info(ips->dev, "Non-IPS CPU detected.\n"); 1290 return NULL; 1291 } 1292 1293 rdmsrl(IA32_MISC_ENABLE, misc_en); 1294 /* 1295 * If the turbo enable bit isn't set, we shouldn't try to enable/disable 1296 * turbo manually or we'll get an illegal MSR access, even though 1297 * turbo will still be available. 1298 */ 1299 if (misc_en & IA32_MISC_TURBO_EN) 1300 ips->turbo_toggle_allowed = true; 1301 else 1302 ips->turbo_toggle_allowed = false; 1303 1304 if (strstr(boot_cpu_data.x86_model_id, "CPU M")) 1305 limits = &ips_sv_limits; 1306 else if (strstr(boot_cpu_data.x86_model_id, "CPU L")) 1307 limits = &ips_lv_limits; 1308 else if (strstr(boot_cpu_data.x86_model_id, "CPU U")) 1309 limits = &ips_ulv_limits; 1310 else { 1311 dev_info(ips->dev, "No CPUID match found.\n"); 1312 return NULL; 1313 } 1314 1315 rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_power); 1316 tdp = turbo_power & TURBO_TDP_MASK; 1317 1318 /* Sanity check TDP against CPU */ 1319 if (limits->core_power_limit != (tdp / 8) * 1000) { 1320 dev_info(ips->dev, 1321 "CPU TDP doesn't match expected value (found %d, expected %d)\n", 1322 tdp / 8, limits->core_power_limit / 1000); 1323 limits->core_power_limit = (tdp / 8) * 1000; 1324 } 1325 1326 return limits; 1327 } 1328 1329 /** 1330 * ips_get_i915_syms - try to get GPU control methods from i915 driver 1331 * @ips: IPS driver 1332 * 1333 * The i915 driver exports several interfaces to allow the IPS driver to 1334 * monitor and control graphics turbo mode. If we can find them, we can 1335 * enable graphics turbo, otherwise we must disable it to avoid exceeding 1336 * thermal and power limits in the MCP. 1337 * 1338 * Returns: %true if the required symbols are found, else %false 1339 */ 1340 static bool ips_get_i915_syms(struct ips_driver *ips) 1341 { 1342 ips->read_mch_val = symbol_get(i915_read_mch_val); 1343 if (!ips->read_mch_val) 1344 goto out_err; 1345 ips->gpu_raise = symbol_get(i915_gpu_raise); 1346 if (!ips->gpu_raise) 1347 goto out_put_mch; 1348 ips->gpu_lower = symbol_get(i915_gpu_lower); 1349 if (!ips->gpu_lower) 1350 goto out_put_raise; 1351 ips->gpu_busy = symbol_get(i915_gpu_busy); 1352 if (!ips->gpu_busy) 1353 goto out_put_lower; 1354 ips->gpu_turbo_disable = symbol_get(i915_gpu_turbo_disable); 1355 if (!ips->gpu_turbo_disable) 1356 goto out_put_busy; 1357 1358 return true; 1359 1360 out_put_busy: 1361 symbol_put(i915_gpu_busy); 1362 out_put_lower: 1363 symbol_put(i915_gpu_lower); 1364 out_put_raise: 1365 symbol_put(i915_gpu_raise); 1366 out_put_mch: 1367 symbol_put(i915_read_mch_val); 1368 out_err: 1369 return false; 1370 } 1371 1372 static bool 1373 ips_gpu_turbo_enabled(struct ips_driver *ips) 1374 { 1375 if (!ips->gpu_busy && late_i915_load) { 1376 if (ips_get_i915_syms(ips)) { 1377 dev_info(ips->dev, 1378 "i915 driver attached, reenabling gpu turbo\n"); 1379 ips->gpu_turbo_enabled = !(thm_readl(THM_HTS) & HTS_GTD_DIS); 1380 } 1381 } 1382 1383 return ips->gpu_turbo_enabled; 1384 } 1385 1386 void 1387 ips_link_to_i915_driver(void) 1388 { 1389 /* We can't cleanly get at the various ips_driver structs from 1390 * this caller (the i915 driver), so just set a flag saying 1391 * that it's time to try getting the symbols again. 1392 */ 1393 late_i915_load = true; 1394 } 1395 EXPORT_SYMBOL_GPL(ips_link_to_i915_driver); 1396 1397 static const struct pci_device_id ips_id_table[] = { 1398 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_THERMAL_SENSOR), }, 1399 { 0, } 1400 }; 1401 1402 MODULE_DEVICE_TABLE(pci, ips_id_table); 1403 1404 static int ips_blacklist_callback(const struct dmi_system_id *id) 1405 { 1406 pr_info("Blacklisted intel_ips for %s\n", id->ident); 1407 return 1; 1408 } 1409 1410 static const struct dmi_system_id ips_blacklist[] = { 1411 { 1412 .callback = ips_blacklist_callback, 1413 .ident = "HP ProBook", 1414 .matches = { 1415 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), 1416 DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook"), 1417 }, 1418 }, 1419 { } /* terminating entry */ 1420 }; 1421 1422 static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id) 1423 { 1424 u64 platform_info; 1425 struct ips_driver *ips; 1426 u32 hts; 1427 int ret = 0; 1428 u16 htshi, trc, trc_required_mask; 1429 u8 tse; 1430 1431 if (dmi_check_system(ips_blacklist)) 1432 return -ENODEV; 1433 1434 ips = devm_kzalloc(&dev->dev, sizeof(*ips), GFP_KERNEL); 1435 if (!ips) 1436 return -ENOMEM; 1437 1438 spin_lock_init(&ips->turbo_status_lock); 1439 ips->dev = &dev->dev; 1440 1441 ips->limits = ips_detect_cpu(ips); 1442 if (!ips->limits) { 1443 dev_info(&dev->dev, "IPS not supported on this CPU\n"); 1444 return -ENXIO; 1445 } 1446 1447 ret = pcim_enable_device(dev); 1448 if (ret) { 1449 dev_err(&dev->dev, "can't enable PCI device, aborting\n"); 1450 return ret; 1451 } 1452 1453 ret = pcim_iomap_regions(dev, 1 << 0, pci_name(dev)); 1454 if (ret) { 1455 dev_err(&dev->dev, "failed to map thermal regs, aborting\n"); 1456 return ret; 1457 } 1458 ips->regmap = pcim_iomap_table(dev)[0]; 1459 1460 pci_set_drvdata(dev, ips); 1461 1462 tse = thm_readb(THM_TSE); 1463 if (tse != TSE_EN) { 1464 dev_err(&dev->dev, "thermal device not enabled (0x%02x), aborting\n", tse); 1465 return -ENXIO; 1466 } 1467 1468 trc = thm_readw(THM_TRC); 1469 trc_required_mask = TRC_CORE1_EN | TRC_CORE_PWR | TRC_MCH_EN; 1470 if ((trc & trc_required_mask) != trc_required_mask) { 1471 dev_err(&dev->dev, "thermal reporting for required devices not enabled, aborting\n"); 1472 return -ENXIO; 1473 } 1474 1475 if (trc & TRC_CORE2_EN) 1476 ips->second_cpu = true; 1477 1478 update_turbo_limits(ips); 1479 dev_dbg(&dev->dev, "max cpu power clamp: %dW\n", 1480 ips->mcp_power_limit / 10); 1481 dev_dbg(&dev->dev, "max core power clamp: %dW\n", 1482 ips->core_power_limit / 10); 1483 /* BIOS may update limits at runtime */ 1484 if (thm_readl(THM_PSC) & PSP_PBRT) 1485 ips->poll_turbo_status = true; 1486 1487 if (!ips_get_i915_syms(ips)) { 1488 dev_info(&dev->dev, "failed to get i915 symbols, graphics turbo disabled until i915 loads\n"); 1489 ips->gpu_turbo_enabled = false; 1490 } else { 1491 dev_dbg(&dev->dev, "graphics turbo enabled\n"); 1492 ips->gpu_turbo_enabled = true; 1493 } 1494 1495 /* 1496 * Check PLATFORM_INFO MSR to make sure this chip is 1497 * turbo capable. 1498 */ 1499 rdmsrl(PLATFORM_INFO, platform_info); 1500 if (!(platform_info & PLATFORM_TDP)) { 1501 dev_err(&dev->dev, "platform indicates TDP override unavailable, aborting\n"); 1502 return -ENODEV; 1503 } 1504 1505 /* 1506 * IRQ handler for ME interaction 1507 * Note: don't use MSI here as the PCH has bugs. 1508 */ 1509 ret = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_INTX); 1510 if (ret < 0) 1511 return ret; 1512 1513 ips->irq = pci_irq_vector(dev, 0); 1514 1515 ret = request_irq(ips->irq, ips_irq_handler, IRQF_SHARED, "ips", ips); 1516 if (ret) { 1517 dev_err(&dev->dev, "request irq failed, aborting\n"); 1518 return ret; 1519 } 1520 1521 /* Enable aux, hot & critical interrupts */ 1522 thm_writeb(THM_TSPIEN, TSPIEN_AUX2_LOHI | TSPIEN_CRIT_LOHI | 1523 TSPIEN_HOT_LOHI | TSPIEN_AUX_LOHI); 1524 thm_writeb(THM_TEN, TEN_UPDATE_EN); 1525 1526 /* Collect adjustment values */ 1527 ips->cta_val = thm_readw(THM_CTA); 1528 ips->pta_val = thm_readw(THM_PTA); 1529 ips->mgta_val = thm_readw(THM_MGTA); 1530 1531 /* Save turbo limits & ratios */ 1532 rdmsrl(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit); 1533 1534 ips_disable_cpu_turbo(ips); 1535 ips->cpu_turbo_enabled = false; 1536 1537 /* Create thermal adjust thread */ 1538 ips->adjust = kthread_create(ips_adjust, ips, "ips-adjust"); 1539 if (IS_ERR(ips->adjust)) { 1540 dev_err(&dev->dev, 1541 "failed to create thermal adjust thread, aborting\n"); 1542 ret = -ENOMEM; 1543 goto error_free_irq; 1544 1545 } 1546 1547 /* 1548 * Set up the work queue and monitor thread. The monitor thread 1549 * will wake up ips_adjust thread. 1550 */ 1551 ips->monitor = kthread_run(ips_monitor, ips, "ips-monitor"); 1552 if (IS_ERR(ips->monitor)) { 1553 dev_err(&dev->dev, 1554 "failed to create thermal monitor thread, aborting\n"); 1555 ret = -ENOMEM; 1556 goto error_thread_cleanup; 1557 } 1558 1559 hts = (ips->core_power_limit << HTS_PCPL_SHIFT) | 1560 (ips->mcp_temp_limit << HTS_PTL_SHIFT) | HTS_NVV; 1561 htshi = HTS2_PRST_RUNNING << HTS2_PRST_SHIFT; 1562 1563 thm_writew(THM_HTSHI, htshi); 1564 thm_writel(THM_HTS, hts); 1565 1566 ips_debugfs_init(ips); 1567 1568 dev_info(&dev->dev, "IPS driver initialized, MCP temp limit %d\n", 1569 ips->mcp_temp_limit); 1570 return ret; 1571 1572 error_thread_cleanup: 1573 kthread_stop(ips->adjust); 1574 error_free_irq: 1575 free_irq(ips->irq, ips); 1576 pci_free_irq_vectors(dev); 1577 return ret; 1578 } 1579 1580 static void ips_remove(struct pci_dev *dev) 1581 { 1582 struct ips_driver *ips = pci_get_drvdata(dev); 1583 u64 turbo_override; 1584 1585 ips_debugfs_cleanup(ips); 1586 1587 /* Release i915 driver */ 1588 if (ips->read_mch_val) 1589 symbol_put(i915_read_mch_val); 1590 if (ips->gpu_raise) 1591 symbol_put(i915_gpu_raise); 1592 if (ips->gpu_lower) 1593 symbol_put(i915_gpu_lower); 1594 if (ips->gpu_busy) 1595 symbol_put(i915_gpu_busy); 1596 if (ips->gpu_turbo_disable) 1597 symbol_put(i915_gpu_turbo_disable); 1598 1599 rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override); 1600 turbo_override &= ~(TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN); 1601 wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override); 1602 wrmsrl(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit); 1603 1604 free_irq(ips->irq, ips); 1605 pci_free_irq_vectors(dev); 1606 if (ips->adjust) 1607 kthread_stop(ips->adjust); 1608 if (ips->monitor) 1609 kthread_stop(ips->monitor); 1610 dev_dbg(&dev->dev, "IPS driver removed\n"); 1611 } 1612 1613 static struct pci_driver ips_pci_driver = { 1614 .name = "intel ips", 1615 .id_table = ips_id_table, 1616 .probe = ips_probe, 1617 .remove = ips_remove, 1618 }; 1619 1620 module_pci_driver(ips_pci_driver); 1621 1622 MODULE_LICENSE("GPL v2"); 1623 MODULE_AUTHOR("Jesse Barnes <jbarnes@virtuousgeek.org>"); 1624 MODULE_DESCRIPTION("Intelligent Power Sharing Driver"); 1625