1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #include "i915_drv.h" 8 #include "intel_gt.h" 9 #include "intel_gt_irq.h" 10 #include "intel_gt_pm_irq.h" 11 #include "intel_rps.h" 12 #include "intel_sideband.h" 13 #include "../../../platform/x86/intel_ips.h" 14 15 /* 16 * Lock protecting IPS related data structures 17 */ 18 static DEFINE_SPINLOCK(mchdev_lock); 19 20 static struct intel_gt *rps_to_gt(struct intel_rps *rps) 21 { 22 return container_of(rps, struct intel_gt, rps); 23 } 24 25 static struct drm_i915_private *rps_to_i915(struct intel_rps *rps) 26 { 27 return rps_to_gt(rps)->i915; 28 } 29 30 static struct intel_uncore *rps_to_uncore(struct intel_rps *rps) 31 { 32 return rps_to_gt(rps)->uncore; 33 } 34 35 static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask) 36 { 37 return mask & ~rps->pm_intrmsk_mbz; 38 } 39 40 static u32 rps_pm_mask(struct intel_rps *rps, u8 val) 41 { 42 u32 mask = 0; 43 44 /* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */ 45 if (val > rps->min_freq_softlimit) 46 mask |= (GEN6_PM_RP_UP_EI_EXPIRED | 47 GEN6_PM_RP_DOWN_THRESHOLD | 48 GEN6_PM_RP_DOWN_TIMEOUT); 49 50 if (val < rps->max_freq_softlimit) 51 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; 52 53 mask &= rps->pm_events; 54 55 return rps_pm_sanitize_mask(rps, ~mask); 56 } 57 58 static void rps_reset_ei(struct intel_rps *rps) 59 { 60 memset(&rps->ei, 0, sizeof(rps->ei)); 61 } 62 63 static void rps_enable_interrupts(struct intel_rps *rps) 64 { 65 struct intel_gt *gt = rps_to_gt(rps); 66 67 rps_reset_ei(rps); 68 69 if (IS_VALLEYVIEW(gt->i915)) 70 /* WaGsvRC0ResidencyMethod:vlv */ 71 rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED; 72 else 73 rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | 74 GEN6_PM_RP_DOWN_THRESHOLD | 75 GEN6_PM_RP_DOWN_TIMEOUT); 76 77 spin_lock_irq(>->irq_lock); 78 gen6_gt_pm_enable_irq(gt, rps->pm_events); 79 spin_unlock_irq(>->irq_lock); 80 81 intel_uncore_write(gt->uncore, GEN6_PMINTRMSK, 82 rps_pm_mask(rps, rps->cur_freq)); 83 } 84 85 static void gen6_rps_reset_interrupts(struct intel_rps *rps) 86 { 87 gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS); 88 } 89 90 static void gen11_rps_reset_interrupts(struct intel_rps *rps) 91 { 92 while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM)) 93 ; 94 } 95 96 static void rps_reset_interrupts(struct intel_rps *rps) 97 { 98 struct intel_gt *gt = rps_to_gt(rps); 99 100 spin_lock_irq(>->irq_lock); 101 if (INTEL_GEN(gt->i915) >= 11) 102 gen11_rps_reset_interrupts(rps); 103 else 104 gen6_rps_reset_interrupts(rps); 105 106 rps->pm_iir = 0; 107 spin_unlock_irq(>->irq_lock); 108 } 109 110 static void rps_disable_interrupts(struct intel_rps *rps) 111 { 112 struct intel_gt *gt = rps_to_gt(rps); 113 114 rps->pm_events = 0; 115 116 intel_uncore_write(gt->uncore, GEN6_PMINTRMSK, 117 rps_pm_sanitize_mask(rps, ~0u)); 118 119 spin_lock_irq(>->irq_lock); 120 gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS); 121 spin_unlock_irq(>->irq_lock); 122 123 intel_synchronize_irq(gt->i915); 124 125 /* 126 * Now that we will not be generating any more work, flush any 127 * outstanding tasks. As we are called on the RPS idle path, 128 * we will reset the GPU to minimum frequencies, so the current 129 * state of the worker can be discarded. 130 */ 131 cancel_work_sync(&rps->work); 132 133 rps_reset_interrupts(rps); 134 } 135 136 static const struct cparams { 137 u16 i; 138 u16 t; 139 u16 m; 140 u16 c; 141 } cparams[] = { 142 { 1, 1333, 301, 28664 }, 143 { 1, 1066, 294, 24460 }, 144 { 1, 800, 294, 25192 }, 145 { 0, 1333, 276, 27605 }, 146 { 0, 1066, 276, 27605 }, 147 { 0, 800, 231, 23784 }, 148 }; 149 150 static void gen5_rps_init(struct intel_rps *rps) 151 { 152 struct drm_i915_private *i915 = rps_to_i915(rps); 153 struct intel_uncore *uncore = rps_to_uncore(rps); 154 u8 fmax, fmin, fstart; 155 u32 rgvmodectl; 156 int c_m, i; 157 158 if (i915->fsb_freq <= 3200) 159 c_m = 0; 160 else if (i915->fsb_freq <= 4800) 161 c_m = 1; 162 else 163 c_m = 2; 164 165 for (i = 0; i < ARRAY_SIZE(cparams); i++) { 166 if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) { 167 rps->ips.m = cparams[i].m; 168 rps->ips.c = cparams[i].c; 169 break; 170 } 171 } 172 173 rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); 174 175 /* Set up min, max, and cur for interrupt handling */ 176 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; 177 fmin = (rgvmodectl & MEMMODE_FMIN_MASK); 178 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> 179 MEMMODE_FSTART_SHIFT; 180 DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", 181 fmax, fmin, fstart); 182 183 rps->min_freq = fmax; 184 rps->max_freq = fmin; 185 186 rps->idle_freq = rps->min_freq; 187 rps->cur_freq = rps->idle_freq; 188 } 189 190 static unsigned long 191 __ips_chipset_val(struct intel_ips *ips) 192 { 193 struct intel_uncore *uncore = 194 rps_to_uncore(container_of(ips, struct intel_rps, ips)); 195 unsigned long now = jiffies_to_msecs(jiffies), dt; 196 unsigned long result; 197 u64 total, delta; 198 199 lockdep_assert_held(&mchdev_lock); 200 201 /* 202 * Prevent division-by-zero if we are asking too fast. 203 * Also, we don't get interesting results if we are polling 204 * faster than once in 10ms, so just return the saved value 205 * in such cases. 206 */ 207 dt = now - ips->last_time1; 208 if (dt <= 10) 209 return ips->chipset_power; 210 211 /* FIXME: handle per-counter overflow */ 212 total = intel_uncore_read(uncore, DMIEC); 213 total += intel_uncore_read(uncore, DDREC); 214 total += intel_uncore_read(uncore, CSIEC); 215 216 delta = total - ips->last_count1; 217 218 result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10); 219 220 ips->last_count1 = total; 221 ips->last_time1 = now; 222 223 ips->chipset_power = result; 224 225 return result; 226 } 227 228 static unsigned long ips_mch_val(struct intel_uncore *uncore) 229 { 230 unsigned int m, x, b; 231 u32 tsfs; 232 233 tsfs = intel_uncore_read(uncore, TSFS); 234 x = intel_uncore_read8(uncore, TR1); 235 236 b = tsfs & TSFS_INTR_MASK; 237 m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT; 238 239 return m * x / 127 - b; 240 } 241 242 static int _pxvid_to_vd(u8 pxvid) 243 { 244 if (pxvid == 0) 245 return 0; 246 247 if (pxvid >= 8 && pxvid < 31) 248 pxvid = 31; 249 250 return (pxvid + 2) * 125; 251 } 252 253 static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid) 254 { 255 const int vd = _pxvid_to_vd(pxvid); 256 257 if (INTEL_INFO(i915)->is_mobile) 258 return max(vd - 1125, 0); 259 260 return vd; 261 } 262 263 static void __gen5_ips_update(struct intel_ips *ips) 264 { 265 struct intel_uncore *uncore = 266 rps_to_uncore(container_of(ips, struct intel_rps, ips)); 267 u64 now, delta, dt; 268 u32 count; 269 270 lockdep_assert_held(&mchdev_lock); 271 272 now = ktime_get_raw_ns(); 273 dt = now - ips->last_time2; 274 do_div(dt, NSEC_PER_MSEC); 275 276 /* Don't divide by 0 */ 277 if (dt <= 10) 278 return; 279 280 count = intel_uncore_read(uncore, GFXEC); 281 delta = count - ips->last_count2; 282 283 ips->last_count2 = count; 284 ips->last_time2 = now; 285 286 /* More magic constants... */ 287 ips->gfx_power = div_u64(delta * 1181, dt * 10); 288 } 289 290 static void gen5_rps_update(struct intel_rps *rps) 291 { 292 spin_lock_irq(&mchdev_lock); 293 __gen5_ips_update(&rps->ips); 294 spin_unlock_irq(&mchdev_lock); 295 } 296 297 static bool gen5_rps_set(struct intel_rps *rps, u8 val) 298 { 299 struct intel_uncore *uncore = rps_to_uncore(rps); 300 u16 rgvswctl; 301 302 lockdep_assert_held(&mchdev_lock); 303 304 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); 305 if (rgvswctl & MEMCTL_CMD_STS) { 306 DRM_DEBUG("gpu busy, RCS change rejected\n"); 307 return false; /* still busy with another command */ 308 } 309 310 /* Invert the frequency bin into an ips delay */ 311 val = rps->max_freq - val; 312 val = rps->min_freq + val; 313 314 rgvswctl = 315 (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | 316 (val << MEMCTL_FREQ_SHIFT) | 317 MEMCTL_SFCAVM; 318 intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); 319 intel_uncore_posting_read16(uncore, MEMSWCTL); 320 321 rgvswctl |= MEMCTL_CMD_STS; 322 intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); 323 324 return true; 325 } 326 327 static unsigned long intel_pxfreq(u32 vidfreq) 328 { 329 int div = (vidfreq & 0x3f0000) >> 16; 330 int post = (vidfreq & 0x3000) >> 12; 331 int pre = (vidfreq & 0x7); 332 333 if (!pre) 334 return 0; 335 336 return div * 133333 / (pre << post); 337 } 338 339 static unsigned int init_emon(struct intel_uncore *uncore) 340 { 341 u8 pxw[16]; 342 int i; 343 344 /* Disable to program */ 345 intel_uncore_write(uncore, ECR, 0); 346 intel_uncore_posting_read(uncore, ECR); 347 348 /* Program energy weights for various events */ 349 intel_uncore_write(uncore, SDEW, 0x15040d00); 350 intel_uncore_write(uncore, CSIEW0, 0x007f0000); 351 intel_uncore_write(uncore, CSIEW1, 0x1e220004); 352 intel_uncore_write(uncore, CSIEW2, 0x04000004); 353 354 for (i = 0; i < 5; i++) 355 intel_uncore_write(uncore, PEW(i), 0); 356 for (i = 0; i < 3; i++) 357 intel_uncore_write(uncore, DEW(i), 0); 358 359 /* Program P-state weights to account for frequency power adjustment */ 360 for (i = 0; i < 16; i++) { 361 u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i)); 362 unsigned int freq = intel_pxfreq(pxvidfreq); 363 unsigned int vid = 364 (pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; 365 unsigned int val; 366 367 val = vid * vid * freq / 1000 * 255; 368 val /= 127 * 127 * 900; 369 370 pxw[i] = val; 371 } 372 /* Render standby states get 0 weight */ 373 pxw[14] = 0; 374 pxw[15] = 0; 375 376 for (i = 0; i < 4; i++) { 377 intel_uncore_write(uncore, PXW(i), 378 pxw[i * 4 + 0] << 24 | 379 pxw[i * 4 + 1] << 16 | 380 pxw[i * 4 + 2] << 8 | 381 pxw[i * 4 + 3] << 0); 382 } 383 384 /* Adjust magic regs to magic values (more experimental results) */ 385 intel_uncore_write(uncore, OGW0, 0); 386 intel_uncore_write(uncore, OGW1, 0); 387 intel_uncore_write(uncore, EG0, 0x00007f00); 388 intel_uncore_write(uncore, EG1, 0x0000000e); 389 intel_uncore_write(uncore, EG2, 0x000e0000); 390 intel_uncore_write(uncore, EG3, 0x68000300); 391 intel_uncore_write(uncore, EG4, 0x42000000); 392 intel_uncore_write(uncore, EG5, 0x00140031); 393 intel_uncore_write(uncore, EG6, 0); 394 intel_uncore_write(uncore, EG7, 0); 395 396 for (i = 0; i < 8; i++) 397 intel_uncore_write(uncore, PXWL(i), 0); 398 399 /* Enable PMON + select events */ 400 intel_uncore_write(uncore, ECR, 0x80000019); 401 402 return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK; 403 } 404 405 static bool gen5_rps_enable(struct intel_rps *rps) 406 { 407 struct intel_uncore *uncore = rps_to_uncore(rps); 408 u8 fstart, vstart; 409 u32 rgvmodectl; 410 411 spin_lock_irq(&mchdev_lock); 412 413 rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); 414 415 /* Enable temp reporting */ 416 intel_uncore_write16(uncore, PMMISC, 417 intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN); 418 intel_uncore_write16(uncore, TSC1, 419 intel_uncore_read16(uncore, TSC1) | TSE); 420 421 /* 100ms RC evaluation intervals */ 422 intel_uncore_write(uncore, RCUPEI, 100000); 423 intel_uncore_write(uncore, RCDNEI, 100000); 424 425 /* Set max/min thresholds to 90ms and 80ms respectively */ 426 intel_uncore_write(uncore, RCBMAXAVG, 90000); 427 intel_uncore_write(uncore, RCBMINAVG, 80000); 428 429 intel_uncore_write(uncore, MEMIHYST, 1); 430 431 /* Set up min, max, and cur for interrupt handling */ 432 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> 433 MEMMODE_FSTART_SHIFT; 434 435 vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) & 436 PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; 437 438 intel_uncore_write(uncore, 439 MEMINTREN, 440 MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); 441 442 intel_uncore_write(uncore, VIDSTART, vstart); 443 intel_uncore_posting_read(uncore, VIDSTART); 444 445 rgvmodectl |= MEMMODE_SWMODE_EN; 446 intel_uncore_write(uncore, MEMMODECTL, rgvmodectl); 447 448 if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) & 449 MEMCTL_CMD_STS) == 0, 10)) 450 DRM_ERROR("stuck trying to change perf mode\n"); 451 mdelay(1); 452 453 gen5_rps_set(rps, rps->cur_freq); 454 455 rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC); 456 rps->ips.last_count1 += intel_uncore_read(uncore, DDREC); 457 rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC); 458 rps->ips.last_time1 = jiffies_to_msecs(jiffies); 459 460 rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC); 461 rps->ips.last_time2 = ktime_get_raw_ns(); 462 463 spin_unlock_irq(&mchdev_lock); 464 465 rps->ips.corr = init_emon(uncore); 466 467 return true; 468 } 469 470 static void gen5_rps_disable(struct intel_rps *rps) 471 { 472 struct intel_uncore *uncore = rps_to_uncore(rps); 473 u16 rgvswctl; 474 475 spin_lock_irq(&mchdev_lock); 476 477 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); 478 479 /* Ack interrupts, disable EFC interrupt */ 480 intel_uncore_write(uncore, MEMINTREN, 481 intel_uncore_read(uncore, MEMINTREN) & 482 ~MEMINT_EVAL_CHG_EN); 483 intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); 484 intel_uncore_write(uncore, DEIER, 485 intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT); 486 intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT); 487 intel_uncore_write(uncore, DEIMR, 488 intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT); 489 490 /* Go back to the starting frequency */ 491 gen5_rps_set(rps, rps->idle_freq); 492 mdelay(1); 493 rgvswctl |= MEMCTL_CMD_STS; 494 intel_uncore_write(uncore, MEMSWCTL, rgvswctl); 495 mdelay(1); 496 497 spin_unlock_irq(&mchdev_lock); 498 } 499 500 static u32 rps_limits(struct intel_rps *rps, u8 val) 501 { 502 u32 limits; 503 504 /* 505 * Only set the down limit when we've reached the lowest level to avoid 506 * getting more interrupts, otherwise leave this clear. This prevents a 507 * race in the hw when coming out of rc6: There's a tiny window where 508 * the hw runs at the minimal clock before selecting the desired 509 * frequency, if the down threshold expires in that window we will not 510 * receive a down interrupt. 511 */ 512 if (INTEL_GEN(rps_to_i915(rps)) >= 9) { 513 limits = rps->max_freq_softlimit << 23; 514 if (val <= rps->min_freq_softlimit) 515 limits |= rps->min_freq_softlimit << 14; 516 } else { 517 limits = rps->max_freq_softlimit << 24; 518 if (val <= rps->min_freq_softlimit) 519 limits |= rps->min_freq_softlimit << 16; 520 } 521 522 return limits; 523 } 524 525 static void rps_set_power(struct intel_rps *rps, int new_power) 526 { 527 struct intel_uncore *uncore = rps_to_uncore(rps); 528 struct drm_i915_private *i915 = rps_to_i915(rps); 529 u32 threshold_up = 0, threshold_down = 0; /* in % */ 530 u32 ei_up = 0, ei_down = 0; 531 532 lockdep_assert_held(&rps->power.mutex); 533 534 if (new_power == rps->power.mode) 535 return; 536 537 /* Note the units here are not exactly 1us, but 1280ns. */ 538 switch (new_power) { 539 case LOW_POWER: 540 /* Upclock if more than 95% busy over 16ms */ 541 ei_up = 16000; 542 threshold_up = 95; 543 544 /* Downclock if less than 85% busy over 32ms */ 545 ei_down = 32000; 546 threshold_down = 85; 547 break; 548 549 case BETWEEN: 550 /* Upclock if more than 90% busy over 13ms */ 551 ei_up = 13000; 552 threshold_up = 90; 553 554 /* Downclock if less than 75% busy over 32ms */ 555 ei_down = 32000; 556 threshold_down = 75; 557 break; 558 559 case HIGH_POWER: 560 /* Upclock if more than 85% busy over 10ms */ 561 ei_up = 10000; 562 threshold_up = 85; 563 564 /* Downclock if less than 60% busy over 32ms */ 565 ei_down = 32000; 566 threshold_down = 60; 567 break; 568 } 569 570 /* When byt can survive without system hang with dynamic 571 * sw freq adjustments, this restriction can be lifted. 572 */ 573 if (IS_VALLEYVIEW(i915)) 574 goto skip_hw_write; 575 576 intel_uncore_write(uncore, GEN6_RP_UP_EI, 577 GT_INTERVAL_FROM_US(i915, ei_up)); 578 intel_uncore_write(uncore, GEN6_RP_UP_THRESHOLD, 579 GT_INTERVAL_FROM_US(i915, 580 ei_up * threshold_up / 100)); 581 582 intel_uncore_write(uncore, GEN6_RP_DOWN_EI, 583 GT_INTERVAL_FROM_US(i915, ei_down)); 584 intel_uncore_write(uncore, GEN6_RP_DOWN_THRESHOLD, 585 GT_INTERVAL_FROM_US(i915, 586 ei_down * threshold_down / 100)); 587 588 intel_uncore_write(uncore, GEN6_RP_CONTROL, 589 (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | 590 GEN6_RP_MEDIA_HW_NORMAL_MODE | 591 GEN6_RP_MEDIA_IS_GFX | 592 GEN6_RP_ENABLE | 593 GEN6_RP_UP_BUSY_AVG | 594 GEN6_RP_DOWN_IDLE_AVG); 595 596 skip_hw_write: 597 rps->power.mode = new_power; 598 rps->power.up_threshold = threshold_up; 599 rps->power.down_threshold = threshold_down; 600 } 601 602 static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val) 603 { 604 int new_power; 605 606 new_power = rps->power.mode; 607 switch (rps->power.mode) { 608 case LOW_POWER: 609 if (val > rps->efficient_freq + 1 && 610 val > rps->cur_freq) 611 new_power = BETWEEN; 612 break; 613 614 case BETWEEN: 615 if (val <= rps->efficient_freq && 616 val < rps->cur_freq) 617 new_power = LOW_POWER; 618 else if (val >= rps->rp0_freq && 619 val > rps->cur_freq) 620 new_power = HIGH_POWER; 621 break; 622 623 case HIGH_POWER: 624 if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && 625 val < rps->cur_freq) 626 new_power = BETWEEN; 627 break; 628 } 629 /* Max/min bins are special */ 630 if (val <= rps->min_freq_softlimit) 631 new_power = LOW_POWER; 632 if (val >= rps->max_freq_softlimit) 633 new_power = HIGH_POWER; 634 635 mutex_lock(&rps->power.mutex); 636 if (rps->power.interactive) 637 new_power = HIGH_POWER; 638 rps_set_power(rps, new_power); 639 mutex_unlock(&rps->power.mutex); 640 } 641 642 void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive) 643 { 644 mutex_lock(&rps->power.mutex); 645 if (interactive) { 646 if (!rps->power.interactive++ && rps->active) 647 rps_set_power(rps, HIGH_POWER); 648 } else { 649 GEM_BUG_ON(!rps->power.interactive); 650 rps->power.interactive--; 651 } 652 mutex_unlock(&rps->power.mutex); 653 } 654 655 static int gen6_rps_set(struct intel_rps *rps, u8 val) 656 { 657 struct intel_uncore *uncore = rps_to_uncore(rps); 658 struct drm_i915_private *i915 = rps_to_i915(rps); 659 u32 swreq; 660 661 if (INTEL_GEN(i915) >= 9) 662 swreq = GEN9_FREQUENCY(val); 663 else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) 664 swreq = HSW_FREQUENCY(val); 665 else 666 swreq = (GEN6_FREQUENCY(val) | 667 GEN6_OFFSET(0) | 668 GEN6_AGGRESSIVE_TURBO); 669 intel_uncore_write(uncore, GEN6_RPNSWREQ, swreq); 670 671 return 0; 672 } 673 674 static int vlv_rps_set(struct intel_rps *rps, u8 val) 675 { 676 struct drm_i915_private *i915 = rps_to_i915(rps); 677 int err; 678 679 vlv_punit_get(i915); 680 err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val); 681 vlv_punit_put(i915); 682 683 return err; 684 } 685 686 static int rps_set(struct intel_rps *rps, u8 val) 687 { 688 struct drm_i915_private *i915 = rps_to_i915(rps); 689 int err; 690 691 if (INTEL_GEN(i915) < 6) 692 return 0; 693 694 if (val == rps->last_freq) 695 return 0; 696 697 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) 698 err = vlv_rps_set(rps, val); 699 else 700 err = gen6_rps_set(rps, val); 701 if (err) 702 return err; 703 704 gen6_rps_set_thresholds(rps, val); 705 rps->last_freq = val; 706 707 return 0; 708 } 709 710 void intel_rps_unpark(struct intel_rps *rps) 711 { 712 u8 freq; 713 714 if (!rps->enabled) 715 return; 716 717 /* 718 * Use the user's desired frequency as a guide, but for better 719 * performance, jump directly to RPe as our starting frequency. 720 */ 721 mutex_lock(&rps->lock); 722 rps->active = true; 723 freq = max(rps->cur_freq, rps->efficient_freq), 724 freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit); 725 intel_rps_set(rps, freq); 726 rps->last_adj = 0; 727 mutex_unlock(&rps->lock); 728 729 if (INTEL_GEN(rps_to_i915(rps)) >= 6) 730 rps_enable_interrupts(rps); 731 732 if (IS_GEN(rps_to_i915(rps), 5)) 733 gen5_rps_update(rps); 734 } 735 736 void intel_rps_park(struct intel_rps *rps) 737 { 738 struct drm_i915_private *i915 = rps_to_i915(rps); 739 740 if (!rps->enabled) 741 return; 742 743 if (INTEL_GEN(i915) >= 6) 744 rps_disable_interrupts(rps); 745 746 rps->active = false; 747 if (rps->last_freq <= rps->idle_freq) 748 return; 749 750 /* 751 * The punit delays the write of the frequency and voltage until it 752 * determines the GPU is awake. During normal usage we don't want to 753 * waste power changing the frequency if the GPU is sleeping (rc6). 754 * However, the GPU and driver is now idle and we do not want to delay 755 * switching to minimum voltage (reducing power whilst idle) as we do 756 * not expect to be woken in the near future and so must flush the 757 * change by waking the device. 758 * 759 * We choose to take the media powerwell (either would do to trick the 760 * punit into committing the voltage change) as that takes a lot less 761 * power than the render powerwell. 762 */ 763 intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA); 764 rps_set(rps, rps->idle_freq); 765 intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA); 766 } 767 768 void intel_rps_boost(struct i915_request *rq) 769 { 770 struct intel_rps *rps = &rq->engine->gt->rps; 771 unsigned long flags; 772 773 if (i915_request_signaled(rq) || !rps->active) 774 return; 775 776 /* Serializes with i915_request_retire() */ 777 spin_lock_irqsave(&rq->lock, flags); 778 if (!i915_request_has_waitboost(rq) && 779 !dma_fence_is_signaled_locked(&rq->fence)) { 780 rq->flags |= I915_REQUEST_WAITBOOST; 781 782 if (!atomic_fetch_inc(&rps->num_waiters) && 783 READ_ONCE(rps->cur_freq) < rps->boost_freq) 784 schedule_work(&rps->work); 785 786 atomic_inc(&rps->boosts); 787 } 788 spin_unlock_irqrestore(&rq->lock, flags); 789 } 790 791 int intel_rps_set(struct intel_rps *rps, u8 val) 792 { 793 int err = 0; 794 795 lockdep_assert_held(&rps->lock); 796 GEM_BUG_ON(val > rps->max_freq); 797 GEM_BUG_ON(val < rps->min_freq); 798 799 if (rps->active) { 800 err = rps_set(rps, val); 801 802 /* 803 * Make sure we continue to get interrupts 804 * until we hit the minimum or maximum frequencies. 805 */ 806 if (INTEL_GEN(rps_to_i915(rps)) >= 6) { 807 struct intel_uncore *uncore = rps_to_uncore(rps); 808 809 intel_uncore_write(uncore, GEN6_RP_INTERRUPT_LIMITS, 810 rps_limits(rps, val)); 811 812 intel_uncore_write(uncore, GEN6_PMINTRMSK, 813 rps_pm_mask(rps, val)); 814 } 815 } 816 817 if (err == 0) 818 rps->cur_freq = val; 819 820 return err; 821 } 822 823 static void gen6_rps_init(struct intel_rps *rps) 824 { 825 struct drm_i915_private *i915 = rps_to_i915(rps); 826 struct intel_uncore *uncore = rps_to_uncore(rps); 827 828 /* All of these values are in units of 50MHz */ 829 830 /* static values from HW: RP0 > RP1 > RPn (min_freq) */ 831 if (IS_GEN9_LP(i915)) { 832 u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); 833 834 rps->rp0_freq = (rp_state_cap >> 16) & 0xff; 835 rps->rp1_freq = (rp_state_cap >> 8) & 0xff; 836 rps->min_freq = (rp_state_cap >> 0) & 0xff; 837 } else { 838 u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); 839 840 rps->rp0_freq = (rp_state_cap >> 0) & 0xff; 841 rps->rp1_freq = (rp_state_cap >> 8) & 0xff; 842 rps->min_freq = (rp_state_cap >> 16) & 0xff; 843 } 844 845 /* hw_max = RP0 until we check for overclocking */ 846 rps->max_freq = rps->rp0_freq; 847 848 rps->efficient_freq = rps->rp1_freq; 849 if (IS_HASWELL(i915) || IS_BROADWELL(i915) || 850 IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) { 851 u32 ddcc_status = 0; 852 853 if (sandybridge_pcode_read(i915, 854 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, 855 &ddcc_status, NULL) == 0) 856 rps->efficient_freq = 857 clamp_t(u8, 858 (ddcc_status >> 8) & 0xff, 859 rps->min_freq, 860 rps->max_freq); 861 } 862 863 if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) { 864 /* Store the frequency values in 16.66 MHZ units, which is 865 * the natural hardware unit for SKL 866 */ 867 rps->rp0_freq *= GEN9_FREQ_SCALER; 868 rps->rp1_freq *= GEN9_FREQ_SCALER; 869 rps->min_freq *= GEN9_FREQ_SCALER; 870 rps->max_freq *= GEN9_FREQ_SCALER; 871 rps->efficient_freq *= GEN9_FREQ_SCALER; 872 } 873 } 874 875 static bool rps_reset(struct intel_rps *rps) 876 { 877 /* force a reset */ 878 rps->power.mode = -1; 879 rps->last_freq = -1; 880 881 if (rps_set(rps, rps->min_freq)) { 882 DRM_ERROR("Failed to reset RPS to initial values\n"); 883 return false; 884 } 885 886 rps->cur_freq = rps->min_freq; 887 return true; 888 } 889 890 /* See the Gen9_GT_PM_Programming_Guide doc for the below */ 891 static bool gen9_rps_enable(struct intel_rps *rps) 892 { 893 struct drm_i915_private *i915 = rps_to_i915(rps); 894 struct intel_uncore *uncore = rps_to_uncore(rps); 895 896 /* Program defaults and thresholds for RPS */ 897 if (IS_GEN(i915, 9)) 898 intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, 899 GEN9_FREQUENCY(rps->rp1_freq)); 900 901 /* 1 second timeout */ 902 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 903 GT_INTERVAL_FROM_US(i915, 1000000)); 904 905 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa); 906 907 return rps_reset(rps); 908 } 909 910 static bool gen8_rps_enable(struct intel_rps *rps) 911 { 912 struct intel_uncore *uncore = rps_to_uncore(rps); 913 914 intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, 915 HSW_FREQUENCY(rps->rp1_freq)); 916 917 /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ 918 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 919 100000000 / 128); /* 1 second timeout */ 920 921 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 922 923 return rps_reset(rps); 924 } 925 926 static bool gen6_rps_enable(struct intel_rps *rps) 927 { 928 struct intel_uncore *uncore = rps_to_uncore(rps); 929 930 /* Power down if completely idle for over 50ms */ 931 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000); 932 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 933 934 return rps_reset(rps); 935 } 936 937 static int chv_rps_max_freq(struct intel_rps *rps) 938 { 939 struct drm_i915_private *i915 = rps_to_i915(rps); 940 u32 val; 941 942 val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); 943 944 switch (RUNTIME_INFO(i915)->sseu.eu_total) { 945 case 8: 946 /* (2 * 4) config */ 947 val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT; 948 break; 949 case 12: 950 /* (2 * 6) config */ 951 val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT; 952 break; 953 case 16: 954 /* (2 * 8) config */ 955 default: 956 /* Setting (2 * 8) Min RP0 for any other combination */ 957 val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT; 958 break; 959 } 960 961 return val & FB_GFX_FREQ_FUSE_MASK; 962 } 963 964 static int chv_rps_rpe_freq(struct intel_rps *rps) 965 { 966 struct drm_i915_private *i915 = rps_to_i915(rps); 967 u32 val; 968 969 val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG); 970 val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT; 971 972 return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; 973 } 974 975 static int chv_rps_guar_freq(struct intel_rps *rps) 976 { 977 struct drm_i915_private *i915 = rps_to_i915(rps); 978 u32 val; 979 980 val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); 981 982 return val & FB_GFX_FREQ_FUSE_MASK; 983 } 984 985 static u32 chv_rps_min_freq(struct intel_rps *rps) 986 { 987 struct drm_i915_private *i915 = rps_to_i915(rps); 988 u32 val; 989 990 val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE); 991 val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT; 992 993 return val & FB_GFX_FREQ_FUSE_MASK; 994 } 995 996 static bool chv_rps_enable(struct intel_rps *rps) 997 { 998 struct intel_uncore *uncore = rps_to_uncore(rps); 999 struct drm_i915_private *i915 = rps_to_i915(rps); 1000 u32 val; 1001 1002 /* 1: Program defaults and thresholds for RPS*/ 1003 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); 1004 intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); 1005 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); 1006 intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); 1007 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); 1008 1009 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1010 1011 /* 2: Enable RPS */ 1012 intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, 1013 GEN6_RP_MEDIA_HW_NORMAL_MODE | 1014 GEN6_RP_MEDIA_IS_GFX | 1015 GEN6_RP_ENABLE | 1016 GEN6_RP_UP_BUSY_AVG | 1017 GEN6_RP_DOWN_IDLE_AVG); 1018 1019 /* Setting Fixed Bias */ 1020 vlv_punit_get(i915); 1021 1022 val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50; 1023 vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); 1024 1025 val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 1026 1027 vlv_punit_put(i915); 1028 1029 /* RPS code assumes GPLL is used */ 1030 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); 1031 1032 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); 1033 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); 1034 1035 return rps_reset(rps); 1036 } 1037 1038 static int vlv_rps_guar_freq(struct intel_rps *rps) 1039 { 1040 struct drm_i915_private *i915 = rps_to_i915(rps); 1041 u32 val, rp1; 1042 1043 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); 1044 1045 rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK; 1046 rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; 1047 1048 return rp1; 1049 } 1050 1051 static int vlv_rps_max_freq(struct intel_rps *rps) 1052 { 1053 struct drm_i915_private *i915 = rps_to_i915(rps); 1054 u32 val, rp0; 1055 1056 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); 1057 1058 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; 1059 /* Clamp to max */ 1060 rp0 = min_t(u32, rp0, 0xea); 1061 1062 return rp0; 1063 } 1064 1065 static int vlv_rps_rpe_freq(struct intel_rps *rps) 1066 { 1067 struct drm_i915_private *i915 = rps_to_i915(rps); 1068 u32 val, rpe; 1069 1070 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO); 1071 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; 1072 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI); 1073 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; 1074 1075 return rpe; 1076 } 1077 1078 static int vlv_rps_min_freq(struct intel_rps *rps) 1079 { 1080 struct drm_i915_private *i915 = rps_to_i915(rps); 1081 u32 val; 1082 1083 val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff; 1084 /* 1085 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value 1086 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on 1087 * a BYT-M B0 the above register contains 0xbf. Moreover when setting 1088 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 1089 * to make sure it matches what Punit accepts. 1090 */ 1091 return max_t(u32, val, 0xc0); 1092 } 1093 1094 static bool vlv_rps_enable(struct intel_rps *rps) 1095 { 1096 struct intel_uncore *uncore = rps_to_uncore(rps); 1097 struct drm_i915_private *i915 = rps_to_i915(rps); 1098 u32 val; 1099 1100 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); 1101 intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); 1102 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); 1103 intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); 1104 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); 1105 1106 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1107 1108 intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, 1109 GEN6_RP_MEDIA_TURBO | 1110 GEN6_RP_MEDIA_HW_NORMAL_MODE | 1111 GEN6_RP_MEDIA_IS_GFX | 1112 GEN6_RP_ENABLE | 1113 GEN6_RP_UP_BUSY_AVG | 1114 GEN6_RP_DOWN_IDLE_CONT); 1115 1116 vlv_punit_get(i915); 1117 1118 /* Setting Fixed Bias */ 1119 val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875; 1120 vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); 1121 1122 val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 1123 1124 vlv_punit_put(i915); 1125 1126 /* RPS code assumes GPLL is used */ 1127 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); 1128 1129 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); 1130 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); 1131 1132 return rps_reset(rps); 1133 } 1134 1135 static unsigned long __ips_gfx_val(struct intel_ips *ips) 1136 { 1137 struct intel_rps *rps = container_of(ips, typeof(*rps), ips); 1138 struct intel_uncore *uncore = rps_to_uncore(rps); 1139 unsigned long t, corr, state1, corr2, state2; 1140 u32 pxvid, ext_v; 1141 1142 lockdep_assert_held(&mchdev_lock); 1143 1144 pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq)); 1145 pxvid = (pxvid >> 24) & 0x7f; 1146 ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid); 1147 1148 state1 = ext_v; 1149 1150 /* Revel in the empirically derived constants */ 1151 1152 /* Correction factor in 1/100000 units */ 1153 t = ips_mch_val(uncore); 1154 if (t > 80) 1155 corr = t * 2349 + 135940; 1156 else if (t >= 50) 1157 corr = t * 964 + 29317; 1158 else /* < 50 */ 1159 corr = t * 301 + 1004; 1160 1161 corr = corr * 150142 * state1 / 10000 - 78642; 1162 corr /= 100000; 1163 corr2 = corr * ips->corr; 1164 1165 state2 = corr2 * state1 / 10000; 1166 state2 /= 100; /* convert to mW */ 1167 1168 __gen5_ips_update(ips); 1169 1170 return ips->gfx_power + state2; 1171 } 1172 1173 void intel_rps_enable(struct intel_rps *rps) 1174 { 1175 struct drm_i915_private *i915 = rps_to_i915(rps); 1176 struct intel_uncore *uncore = rps_to_uncore(rps); 1177 1178 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 1179 if (IS_CHERRYVIEW(i915)) 1180 rps->enabled = chv_rps_enable(rps); 1181 else if (IS_VALLEYVIEW(i915)) 1182 rps->enabled = vlv_rps_enable(rps); 1183 else if (INTEL_GEN(i915) >= 9) 1184 rps->enabled = gen9_rps_enable(rps); 1185 else if (INTEL_GEN(i915) >= 8) 1186 rps->enabled = gen8_rps_enable(rps); 1187 else if (INTEL_GEN(i915) >= 6) 1188 rps->enabled = gen6_rps_enable(rps); 1189 else if (IS_IRONLAKE_M(i915)) 1190 rps->enabled = gen5_rps_enable(rps); 1191 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 1192 if (!rps->enabled) 1193 return; 1194 1195 WARN_ON(rps->max_freq < rps->min_freq); 1196 WARN_ON(rps->idle_freq > rps->max_freq); 1197 1198 WARN_ON(rps->efficient_freq < rps->min_freq); 1199 WARN_ON(rps->efficient_freq > rps->max_freq); 1200 } 1201 1202 static void gen6_rps_disable(struct intel_rps *rps) 1203 { 1204 intel_uncore_write(rps_to_uncore(rps), GEN6_RP_CONTROL, 0); 1205 } 1206 1207 void intel_rps_disable(struct intel_rps *rps) 1208 { 1209 struct drm_i915_private *i915 = rps_to_i915(rps); 1210 1211 rps->enabled = false; 1212 1213 if (INTEL_GEN(i915) >= 6) 1214 gen6_rps_disable(rps); 1215 else if (IS_IRONLAKE_M(i915)) 1216 gen5_rps_disable(rps); 1217 } 1218 1219 static int byt_gpu_freq(struct intel_rps *rps, int val) 1220 { 1221 /* 1222 * N = val - 0xb7 1223 * Slow = Fast = GPLL ref * N 1224 */ 1225 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); 1226 } 1227 1228 static int byt_freq_opcode(struct intel_rps *rps, int val) 1229 { 1230 return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; 1231 } 1232 1233 static int chv_gpu_freq(struct intel_rps *rps, int val) 1234 { 1235 /* 1236 * N = val / 2 1237 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 1238 */ 1239 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); 1240 } 1241 1242 static int chv_freq_opcode(struct intel_rps *rps, int val) 1243 { 1244 /* CHV needs even values */ 1245 return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; 1246 } 1247 1248 int intel_gpu_freq(struct intel_rps *rps, int val) 1249 { 1250 struct drm_i915_private *i915 = rps_to_i915(rps); 1251 1252 if (INTEL_GEN(i915) >= 9) 1253 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, 1254 GEN9_FREQ_SCALER); 1255 else if (IS_CHERRYVIEW(i915)) 1256 return chv_gpu_freq(rps, val); 1257 else if (IS_VALLEYVIEW(i915)) 1258 return byt_gpu_freq(rps, val); 1259 else 1260 return val * GT_FREQUENCY_MULTIPLIER; 1261 } 1262 1263 int intel_freq_opcode(struct intel_rps *rps, int val) 1264 { 1265 struct drm_i915_private *i915 = rps_to_i915(rps); 1266 1267 if (INTEL_GEN(i915) >= 9) 1268 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, 1269 GT_FREQUENCY_MULTIPLIER); 1270 else if (IS_CHERRYVIEW(i915)) 1271 return chv_freq_opcode(rps, val); 1272 else if (IS_VALLEYVIEW(i915)) 1273 return byt_freq_opcode(rps, val); 1274 else 1275 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); 1276 } 1277 1278 static void vlv_init_gpll_ref_freq(struct intel_rps *rps) 1279 { 1280 struct drm_i915_private *i915 = rps_to_i915(rps); 1281 1282 rps->gpll_ref_freq = 1283 vlv_get_cck_clock(i915, "GPLL ref", 1284 CCK_GPLL_CLOCK_CONTROL, 1285 i915->czclk_freq); 1286 1287 DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", rps->gpll_ref_freq); 1288 } 1289 1290 static void vlv_rps_init(struct intel_rps *rps) 1291 { 1292 struct drm_i915_private *i915 = rps_to_i915(rps); 1293 u32 val; 1294 1295 vlv_iosf_sb_get(i915, 1296 BIT(VLV_IOSF_SB_PUNIT) | 1297 BIT(VLV_IOSF_SB_NC) | 1298 BIT(VLV_IOSF_SB_CCK)); 1299 1300 vlv_init_gpll_ref_freq(rps); 1301 1302 val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 1303 switch ((val >> 6) & 3) { 1304 case 0: 1305 case 1: 1306 i915->mem_freq = 800; 1307 break; 1308 case 2: 1309 i915->mem_freq = 1066; 1310 break; 1311 case 3: 1312 i915->mem_freq = 1333; 1313 break; 1314 } 1315 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq); 1316 1317 rps->max_freq = vlv_rps_max_freq(rps); 1318 rps->rp0_freq = rps->max_freq; 1319 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", 1320 intel_gpu_freq(rps, rps->max_freq), 1321 rps->max_freq); 1322 1323 rps->efficient_freq = vlv_rps_rpe_freq(rps); 1324 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", 1325 intel_gpu_freq(rps, rps->efficient_freq), 1326 rps->efficient_freq); 1327 1328 rps->rp1_freq = vlv_rps_guar_freq(rps); 1329 DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", 1330 intel_gpu_freq(rps, rps->rp1_freq), 1331 rps->rp1_freq); 1332 1333 rps->min_freq = vlv_rps_min_freq(rps); 1334 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", 1335 intel_gpu_freq(rps, rps->min_freq), 1336 rps->min_freq); 1337 1338 vlv_iosf_sb_put(i915, 1339 BIT(VLV_IOSF_SB_PUNIT) | 1340 BIT(VLV_IOSF_SB_NC) | 1341 BIT(VLV_IOSF_SB_CCK)); 1342 } 1343 1344 static void chv_rps_init(struct intel_rps *rps) 1345 { 1346 struct drm_i915_private *i915 = rps_to_i915(rps); 1347 u32 val; 1348 1349 vlv_iosf_sb_get(i915, 1350 BIT(VLV_IOSF_SB_PUNIT) | 1351 BIT(VLV_IOSF_SB_NC) | 1352 BIT(VLV_IOSF_SB_CCK)); 1353 1354 vlv_init_gpll_ref_freq(rps); 1355 1356 val = vlv_cck_read(i915, CCK_FUSE_REG); 1357 1358 switch ((val >> 2) & 0x7) { 1359 case 3: 1360 i915->mem_freq = 2000; 1361 break; 1362 default: 1363 i915->mem_freq = 1600; 1364 break; 1365 } 1366 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq); 1367 1368 rps->max_freq = chv_rps_max_freq(rps); 1369 rps->rp0_freq = rps->max_freq; 1370 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", 1371 intel_gpu_freq(rps, rps->max_freq), 1372 rps->max_freq); 1373 1374 rps->efficient_freq = chv_rps_rpe_freq(rps); 1375 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", 1376 intel_gpu_freq(rps, rps->efficient_freq), 1377 rps->efficient_freq); 1378 1379 rps->rp1_freq = chv_rps_guar_freq(rps); 1380 DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", 1381 intel_gpu_freq(rps, rps->rp1_freq), 1382 rps->rp1_freq); 1383 1384 rps->min_freq = chv_rps_min_freq(rps); 1385 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", 1386 intel_gpu_freq(rps, rps->min_freq), 1387 rps->min_freq); 1388 1389 vlv_iosf_sb_put(i915, 1390 BIT(VLV_IOSF_SB_PUNIT) | 1391 BIT(VLV_IOSF_SB_NC) | 1392 BIT(VLV_IOSF_SB_CCK)); 1393 1394 WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq | 1395 rps->min_freq) & 1, 1396 "Odd GPU freq values\n"); 1397 } 1398 1399 static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei) 1400 { 1401 ei->ktime = ktime_get_raw(); 1402 ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT); 1403 ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT); 1404 } 1405 1406 static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir) 1407 { 1408 struct intel_uncore *uncore = rps_to_uncore(rps); 1409 const struct intel_rps_ei *prev = &rps->ei; 1410 struct intel_rps_ei now; 1411 u32 events = 0; 1412 1413 if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) 1414 return 0; 1415 1416 vlv_c0_read(uncore, &now); 1417 1418 if (prev->ktime) { 1419 u64 time, c0; 1420 u32 render, media; 1421 1422 time = ktime_us_delta(now.ktime, prev->ktime); 1423 1424 time *= rps_to_i915(rps)->czclk_freq; 1425 1426 /* Workload can be split between render + media, 1427 * e.g. SwapBuffers being blitted in X after being rendered in 1428 * mesa. To account for this we need to combine both engines 1429 * into our activity counter. 1430 */ 1431 render = now.render_c0 - prev->render_c0; 1432 media = now.media_c0 - prev->media_c0; 1433 c0 = max(render, media); 1434 c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ 1435 1436 if (c0 > time * rps->power.up_threshold) 1437 events = GEN6_PM_RP_UP_THRESHOLD; 1438 else if (c0 < time * rps->power.down_threshold) 1439 events = GEN6_PM_RP_DOWN_THRESHOLD; 1440 } 1441 1442 rps->ei = now; 1443 return events; 1444 } 1445 1446 static void rps_work(struct work_struct *work) 1447 { 1448 struct intel_rps *rps = container_of(work, typeof(*rps), work); 1449 struct intel_gt *gt = rps_to_gt(rps); 1450 bool client_boost = false; 1451 int new_freq, adj, min, max; 1452 u32 pm_iir = 0; 1453 1454 spin_lock_irq(>->irq_lock); 1455 pm_iir = fetch_and_zero(&rps->pm_iir); 1456 client_boost = atomic_read(&rps->num_waiters); 1457 spin_unlock_irq(>->irq_lock); 1458 1459 /* Make sure we didn't queue anything we're not going to process. */ 1460 if ((pm_iir & rps->pm_events) == 0 && !client_boost) 1461 goto out; 1462 1463 mutex_lock(&rps->lock); 1464 1465 pm_iir |= vlv_wa_c0_ei(rps, pm_iir); 1466 1467 adj = rps->last_adj; 1468 new_freq = rps->cur_freq; 1469 min = rps->min_freq_softlimit; 1470 max = rps->max_freq_softlimit; 1471 if (client_boost) 1472 max = rps->max_freq; 1473 if (client_boost && new_freq < rps->boost_freq) { 1474 new_freq = rps->boost_freq; 1475 adj = 0; 1476 } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { 1477 if (adj > 0) 1478 adj *= 2; 1479 else /* CHV needs even encode values */ 1480 adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1; 1481 1482 if (new_freq >= rps->max_freq_softlimit) 1483 adj = 0; 1484 } else if (client_boost) { 1485 adj = 0; 1486 } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { 1487 if (rps->cur_freq > rps->efficient_freq) 1488 new_freq = rps->efficient_freq; 1489 else if (rps->cur_freq > rps->min_freq_softlimit) 1490 new_freq = rps->min_freq_softlimit; 1491 adj = 0; 1492 } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { 1493 if (adj < 0) 1494 adj *= 2; 1495 else /* CHV needs even encode values */ 1496 adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1; 1497 1498 if (new_freq <= rps->min_freq_softlimit) 1499 adj = 0; 1500 } else { /* unknown event */ 1501 adj = 0; 1502 } 1503 1504 rps->last_adj = adj; 1505 1506 /* 1507 * Limit deboosting and boosting to keep ourselves at the extremes 1508 * when in the respective power modes (i.e. slowly decrease frequencies 1509 * while in the HIGH_POWER zone and slowly increase frequencies while 1510 * in the LOW_POWER zone). On idle, we will hit the timeout and drop 1511 * to the next level quickly, and conversely if busy we expect to 1512 * hit a waitboost and rapidly switch into max power. 1513 */ 1514 if ((adj < 0 && rps->power.mode == HIGH_POWER) || 1515 (adj > 0 && rps->power.mode == LOW_POWER)) 1516 rps->last_adj = 0; 1517 1518 /* sysfs frequency interfaces may have snuck in while servicing the 1519 * interrupt 1520 */ 1521 new_freq += adj; 1522 new_freq = clamp_t(int, new_freq, min, max); 1523 1524 if (intel_rps_set(rps, new_freq)) { 1525 DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); 1526 rps->last_adj = 0; 1527 } 1528 1529 mutex_unlock(&rps->lock); 1530 1531 out: 1532 spin_lock_irq(>->irq_lock); 1533 gen6_gt_pm_unmask_irq(gt, rps->pm_events); 1534 spin_unlock_irq(>->irq_lock); 1535 } 1536 1537 void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) 1538 { 1539 struct intel_gt *gt = rps_to_gt(rps); 1540 const u32 events = rps->pm_events & pm_iir; 1541 1542 lockdep_assert_held(>->irq_lock); 1543 1544 if (unlikely(!events)) 1545 return; 1546 1547 gen6_gt_pm_mask_irq(gt, events); 1548 1549 rps->pm_iir |= events; 1550 schedule_work(&rps->work); 1551 } 1552 1553 void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) 1554 { 1555 struct intel_gt *gt = rps_to_gt(rps); 1556 1557 if (pm_iir & rps->pm_events) { 1558 spin_lock(>->irq_lock); 1559 gen6_gt_pm_mask_irq(gt, pm_iir & rps->pm_events); 1560 rps->pm_iir |= pm_iir & rps->pm_events; 1561 schedule_work(&rps->work); 1562 spin_unlock(>->irq_lock); 1563 } 1564 1565 if (INTEL_GEN(gt->i915) >= 8) 1566 return; 1567 1568 if (pm_iir & PM_VEBOX_USER_INTERRUPT) 1569 intel_engine_breadcrumbs_irq(gt->engine[VECS0]); 1570 1571 if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) 1572 DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); 1573 } 1574 1575 void gen5_rps_irq_handler(struct intel_rps *rps) 1576 { 1577 struct intel_uncore *uncore = rps_to_uncore(rps); 1578 u32 busy_up, busy_down, max_avg, min_avg; 1579 u8 new_freq; 1580 1581 spin_lock(&mchdev_lock); 1582 1583 intel_uncore_write16(uncore, 1584 MEMINTRSTS, 1585 intel_uncore_read(uncore, MEMINTRSTS)); 1586 1587 intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); 1588 busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG); 1589 busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG); 1590 max_avg = intel_uncore_read(uncore, RCBMAXAVG); 1591 min_avg = intel_uncore_read(uncore, RCBMINAVG); 1592 1593 /* Handle RCS change request from hw */ 1594 new_freq = rps->cur_freq; 1595 if (busy_up > max_avg) 1596 new_freq++; 1597 else if (busy_down < min_avg) 1598 new_freq--; 1599 new_freq = clamp(new_freq, 1600 rps->min_freq_softlimit, 1601 rps->max_freq_softlimit); 1602 1603 if (new_freq != rps->cur_freq && gen5_rps_set(rps, new_freq)) 1604 rps->cur_freq = new_freq; 1605 1606 spin_unlock(&mchdev_lock); 1607 } 1608 1609 void intel_rps_init_early(struct intel_rps *rps) 1610 { 1611 mutex_init(&rps->lock); 1612 mutex_init(&rps->power.mutex); 1613 1614 INIT_WORK(&rps->work, rps_work); 1615 1616 atomic_set(&rps->num_waiters, 0); 1617 } 1618 1619 void intel_rps_init(struct intel_rps *rps) 1620 { 1621 struct drm_i915_private *i915 = rps_to_i915(rps); 1622 1623 if (IS_CHERRYVIEW(i915)) 1624 chv_rps_init(rps); 1625 else if (IS_VALLEYVIEW(i915)) 1626 vlv_rps_init(rps); 1627 else if (INTEL_GEN(i915) >= 6) 1628 gen6_rps_init(rps); 1629 else if (IS_IRONLAKE_M(i915)) 1630 gen5_rps_init(rps); 1631 1632 /* Derive initial user preferences/limits from the hardware limits */ 1633 rps->max_freq_softlimit = rps->max_freq; 1634 rps->min_freq_softlimit = rps->min_freq; 1635 1636 /* After setting max-softlimit, find the overclock max freq */ 1637 if (IS_GEN(i915, 6) || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) { 1638 u32 params = 0; 1639 1640 sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS, 1641 ¶ms, NULL); 1642 if (params & BIT(31)) { /* OC supported */ 1643 DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", 1644 (rps->max_freq & 0xff) * 50, 1645 (params & 0xff) * 50); 1646 rps->max_freq = params & 0xff; 1647 } 1648 } 1649 1650 /* Finally allow us to boost to max by default */ 1651 rps->boost_freq = rps->max_freq; 1652 rps->idle_freq = rps->min_freq; 1653 rps->cur_freq = rps->idle_freq; 1654 1655 rps->pm_intrmsk_mbz = 0; 1656 1657 /* 1658 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer 1659 * if GEN6_PM_UP_EI_EXPIRED is masked. 1660 * 1661 * TODO: verify if this can be reproduced on VLV,CHV. 1662 */ 1663 if (INTEL_GEN(i915) <= 7) 1664 rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; 1665 1666 if (INTEL_GEN(i915) >= 8) 1667 rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; 1668 } 1669 1670 u32 intel_get_cagf(struct intel_rps *rps, u32 rpstat) 1671 { 1672 struct drm_i915_private *i915 = rps_to_i915(rps); 1673 u32 cagf; 1674 1675 if (INTEL_GEN(i915) >= 9) 1676 cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; 1677 else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) 1678 cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; 1679 else 1680 cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; 1681 1682 return cagf; 1683 } 1684 1685 /* External interface for intel_ips.ko */ 1686 1687 static struct drm_i915_private __rcu *ips_mchdev; 1688 1689 /** 1690 * Tells the intel_ips driver that the i915 driver is now loaded, if 1691 * IPS got loaded first. 1692 * 1693 * This awkward dance is so that neither module has to depend on the 1694 * other in order for IPS to do the appropriate communication of 1695 * GPU turbo limits to i915. 1696 */ 1697 static void 1698 ips_ping_for_i915_load(void) 1699 { 1700 void (*link)(void); 1701 1702 link = symbol_get(ips_link_to_i915_driver); 1703 if (link) { 1704 link(); 1705 symbol_put(ips_link_to_i915_driver); 1706 } 1707 } 1708 1709 void intel_rps_driver_register(struct intel_rps *rps) 1710 { 1711 struct intel_gt *gt = rps_to_gt(rps); 1712 1713 /* 1714 * We only register the i915 ips part with intel-ips once everything is 1715 * set up, to avoid intel-ips sneaking in and reading bogus values. 1716 */ 1717 if (IS_GEN(gt->i915, 5)) { 1718 rcu_assign_pointer(ips_mchdev, gt->i915); 1719 ips_ping_for_i915_load(); 1720 } 1721 } 1722 1723 void intel_rps_driver_unregister(struct intel_rps *rps) 1724 { 1725 rcu_assign_pointer(ips_mchdev, NULL); 1726 } 1727 1728 static struct drm_i915_private *mchdev_get(void) 1729 { 1730 struct drm_i915_private *i915; 1731 1732 rcu_read_lock(); 1733 i915 = rcu_dereference(ips_mchdev); 1734 if (!kref_get_unless_zero(&i915->drm.ref)) 1735 i915 = NULL; 1736 rcu_read_unlock(); 1737 1738 return i915; 1739 } 1740 1741 /** 1742 * i915_read_mch_val - return value for IPS use 1743 * 1744 * Calculate and return a value for the IPS driver to use when deciding whether 1745 * we have thermal and power headroom to increase CPU or GPU power budget. 1746 */ 1747 unsigned long i915_read_mch_val(void) 1748 { 1749 struct drm_i915_private *i915; 1750 unsigned long chipset_val = 0; 1751 unsigned long graphics_val = 0; 1752 intel_wakeref_t wakeref; 1753 1754 i915 = mchdev_get(); 1755 if (!i915) 1756 return 0; 1757 1758 with_intel_runtime_pm(&i915->runtime_pm, wakeref) { 1759 struct intel_ips *ips = &i915->gt.rps.ips; 1760 1761 spin_lock_irq(&mchdev_lock); 1762 chipset_val = __ips_chipset_val(ips); 1763 graphics_val = __ips_gfx_val(ips); 1764 spin_unlock_irq(&mchdev_lock); 1765 } 1766 1767 drm_dev_put(&i915->drm); 1768 return chipset_val + graphics_val; 1769 } 1770 EXPORT_SYMBOL_GPL(i915_read_mch_val); 1771 1772 /** 1773 * i915_gpu_raise - raise GPU frequency limit 1774 * 1775 * Raise the limit; IPS indicates we have thermal headroom. 1776 */ 1777 bool i915_gpu_raise(void) 1778 { 1779 struct drm_i915_private *i915; 1780 struct intel_rps *rps; 1781 1782 i915 = mchdev_get(); 1783 if (!i915) 1784 return false; 1785 1786 rps = &i915->gt.rps; 1787 1788 spin_lock_irq(&mchdev_lock); 1789 if (rps->max_freq_softlimit < rps->max_freq) 1790 rps->max_freq_softlimit++; 1791 spin_unlock_irq(&mchdev_lock); 1792 1793 drm_dev_put(&i915->drm); 1794 return true; 1795 } 1796 EXPORT_SYMBOL_GPL(i915_gpu_raise); 1797 1798 /** 1799 * i915_gpu_lower - lower GPU frequency limit 1800 * 1801 * IPS indicates we're close to a thermal limit, so throttle back the GPU 1802 * frequency maximum. 1803 */ 1804 bool i915_gpu_lower(void) 1805 { 1806 struct drm_i915_private *i915; 1807 struct intel_rps *rps; 1808 1809 i915 = mchdev_get(); 1810 if (!i915) 1811 return false; 1812 1813 rps = &i915->gt.rps; 1814 1815 spin_lock_irq(&mchdev_lock); 1816 if (rps->max_freq_softlimit > rps->min_freq) 1817 rps->max_freq_softlimit--; 1818 spin_unlock_irq(&mchdev_lock); 1819 1820 drm_dev_put(&i915->drm); 1821 return true; 1822 } 1823 EXPORT_SYMBOL_GPL(i915_gpu_lower); 1824 1825 /** 1826 * i915_gpu_busy - indicate GPU business to IPS 1827 * 1828 * Tell the IPS driver whether or not the GPU is busy. 1829 */ 1830 bool i915_gpu_busy(void) 1831 { 1832 struct drm_i915_private *i915; 1833 bool ret; 1834 1835 i915 = mchdev_get(); 1836 if (!i915) 1837 return false; 1838 1839 ret = i915->gt.awake; 1840 1841 drm_dev_put(&i915->drm); 1842 return ret; 1843 } 1844 EXPORT_SYMBOL_GPL(i915_gpu_busy); 1845 1846 /** 1847 * i915_gpu_turbo_disable - disable graphics turbo 1848 * 1849 * Disable graphics turbo by resetting the max frequency and setting the 1850 * current frequency to the default. 1851 */ 1852 bool i915_gpu_turbo_disable(void) 1853 { 1854 struct drm_i915_private *i915; 1855 struct intel_rps *rps; 1856 bool ret; 1857 1858 i915 = mchdev_get(); 1859 if (!i915) 1860 return false; 1861 1862 rps = &i915->gt.rps; 1863 1864 spin_lock_irq(&mchdev_lock); 1865 rps->max_freq_softlimit = rps->min_freq; 1866 ret = gen5_rps_set(&i915->gt.rps, rps->min_freq); 1867 spin_unlock_irq(&mchdev_lock); 1868 1869 drm_dev_put(&i915->drm); 1870 return ret; 1871 } 1872 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); 1873