1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include <linux/string_helpers.h> 7 8 #include <drm/intel/i915_drm.h> 9 10 #include "display/intel_display.h" 11 #include "display/intel_display_irq.h" 12 #include "i915_drv.h" 13 #include "i915_irq.h" 14 #include "i915_reg.h" 15 #include "intel_breadcrumbs.h" 16 #include "intel_gt.h" 17 #include "intel_gt_clock_utils.h" 18 #include "intel_gt_irq.h" 19 #include "intel_gt_pm.h" 20 #include "intel_gt_pm_irq.h" 21 #include "intel_gt_print.h" 22 #include "intel_gt_regs.h" 23 #include "intel_mchbar_regs.h" 24 #include "intel_pcode.h" 25 #include "intel_rps.h" 26 #include "vlv_sideband.h" 27 #include "../../../platform/x86/intel_ips.h" 28 29 #define BUSY_MAX_EI 20u /* ms */ 30 31 /* 32 * Lock protecting IPS related data structures 33 */ 34 static DEFINE_SPINLOCK(mchdev_lock); 35 36 static struct intel_gt *rps_to_gt(struct intel_rps *rps) 37 { 38 return container_of(rps, struct intel_gt, rps); 39 } 40 41 static struct drm_i915_private *rps_to_i915(struct intel_rps *rps) 42 { 43 return rps_to_gt(rps)->i915; 44 } 45 46 static struct intel_uncore *rps_to_uncore(struct intel_rps *rps) 47 { 48 return rps_to_gt(rps)->uncore; 49 } 50 51 static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps) 52 { 53 struct intel_gt *gt = rps_to_gt(rps); 54 55 return >_to_guc(gt)->slpc; 56 } 57 58 static bool rps_uses_slpc(struct intel_rps *rps) 59 { 60 struct intel_gt *gt = rps_to_gt(rps); 61 62 return intel_uc_uses_guc_slpc(>->uc); 63 } 64 65 static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask) 66 { 67 return mask & ~rps->pm_intrmsk_mbz; 68 } 69 70 static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) 71 { 72 intel_uncore_write_fw(uncore, reg, val); 73 } 74 75 static void rps_timer(struct timer_list *t) 76 { 77 struct intel_rps *rps = from_timer(rps, t, timer); 78 struct intel_gt *gt = rps_to_gt(rps); 79 struct intel_engine_cs *engine; 80 ktime_t dt, last, timestamp; 81 enum intel_engine_id id; 82 s64 max_busy[3] = {}; 83 84 timestamp = 0; 85 for_each_engine(engine, gt, id) { 86 s64 busy; 87 int i; 88 89 dt = intel_engine_get_busy_time(engine, ×tamp); 90 last = engine->stats.rps; 91 engine->stats.rps = dt; 92 93 busy = ktime_to_ns(ktime_sub(dt, last)); 94 for (i = 0; i < ARRAY_SIZE(max_busy); i++) { 95 if (busy > max_busy[i]) 96 swap(busy, max_busy[i]); 97 } 98 } 99 last = rps->pm_timestamp; 100 rps->pm_timestamp = timestamp; 101 102 if (intel_rps_is_active(rps)) { 103 s64 busy; 104 int i; 105 106 dt = ktime_sub(timestamp, last); 107 108 /* 109 * Our goal is to evaluate each engine independently, so we run 110 * at the lowest clocks required to sustain the heaviest 111 * workload. However, a task may be split into sequential 112 * dependent operations across a set of engines, such that 113 * the independent contributions do not account for high load, 114 * but overall the task is GPU bound. For example, consider 115 * video decode on vcs followed by colour post-processing 116 * on vecs, followed by general post-processing on rcs. 117 * Since multi-engines being active does imply a single 118 * continuous workload across all engines, we hedge our 119 * bets by only contributing a factor of the distributed 120 * load into our busyness calculation. 121 */ 122 busy = max_busy[0]; 123 for (i = 1; i < ARRAY_SIZE(max_busy); i++) { 124 if (!max_busy[i]) 125 break; 126 127 busy += div_u64(max_busy[i], 1 << i); 128 } 129 GT_TRACE(gt, 130 "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n", 131 busy, (int)div64_u64(100 * busy, dt), 132 max_busy[0], max_busy[1], max_busy[2], 133 rps->pm_interval); 134 135 if (100 * busy > rps->power.up_threshold * dt && 136 rps->cur_freq < rps->max_freq_softlimit) { 137 rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD; 138 rps->pm_interval = 1; 139 queue_work(gt->i915->unordered_wq, &rps->work); 140 } else if (100 * busy < rps->power.down_threshold * dt && 141 rps->cur_freq > rps->min_freq_softlimit) { 142 rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD; 143 rps->pm_interval = 1; 144 queue_work(gt->i915->unordered_wq, &rps->work); 145 } else { 146 rps->last_adj = 0; 147 } 148 149 mod_timer(&rps->timer, 150 jiffies + msecs_to_jiffies(rps->pm_interval)); 151 rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI); 152 } 153 } 154 155 static void rps_start_timer(struct intel_rps *rps) 156 { 157 rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); 158 rps->pm_interval = 1; 159 mod_timer(&rps->timer, jiffies + 1); 160 } 161 162 static void rps_stop_timer(struct intel_rps *rps) 163 { 164 timer_delete_sync(&rps->timer); 165 rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); 166 cancel_work_sync(&rps->work); 167 } 168 169 static u32 rps_pm_mask(struct intel_rps *rps, u8 val) 170 { 171 u32 mask = 0; 172 173 /* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */ 174 if (val > rps->min_freq_softlimit) 175 mask |= (GEN6_PM_RP_UP_EI_EXPIRED | 176 GEN6_PM_RP_DOWN_THRESHOLD | 177 GEN6_PM_RP_DOWN_TIMEOUT); 178 179 if (val < rps->max_freq_softlimit) 180 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; 181 182 mask &= rps->pm_events; 183 184 return rps_pm_sanitize_mask(rps, ~mask); 185 } 186 187 static void rps_reset_ei(struct intel_rps *rps) 188 { 189 memset(&rps->ei, 0, sizeof(rps->ei)); 190 } 191 192 static void rps_enable_interrupts(struct intel_rps *rps) 193 { 194 struct intel_gt *gt = rps_to_gt(rps); 195 196 GEM_BUG_ON(rps_uses_slpc(rps)); 197 198 GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n", 199 rps->pm_events, rps_pm_mask(rps, rps->last_freq)); 200 201 rps_reset_ei(rps); 202 203 spin_lock_irq(gt->irq_lock); 204 gen6_gt_pm_enable_irq(gt, rps->pm_events); 205 spin_unlock_irq(gt->irq_lock); 206 207 intel_uncore_write(gt->uncore, 208 GEN6_PMINTRMSK, rps_pm_mask(rps, rps->last_freq)); 209 } 210 211 static void gen6_rps_reset_interrupts(struct intel_rps *rps) 212 { 213 gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS); 214 } 215 216 static void gen11_rps_reset_interrupts(struct intel_rps *rps) 217 { 218 while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM)) 219 ; 220 } 221 222 static void rps_reset_interrupts(struct intel_rps *rps) 223 { 224 struct intel_gt *gt = rps_to_gt(rps); 225 226 spin_lock_irq(gt->irq_lock); 227 if (GRAPHICS_VER(gt->i915) >= 11) 228 gen11_rps_reset_interrupts(rps); 229 else 230 gen6_rps_reset_interrupts(rps); 231 232 rps->pm_iir = 0; 233 spin_unlock_irq(gt->irq_lock); 234 } 235 236 static void rps_disable_interrupts(struct intel_rps *rps) 237 { 238 struct intel_gt *gt = rps_to_gt(rps); 239 240 intel_uncore_write(gt->uncore, 241 GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u)); 242 243 spin_lock_irq(gt->irq_lock); 244 gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS); 245 spin_unlock_irq(gt->irq_lock); 246 247 intel_synchronize_irq(gt->i915); 248 249 /* 250 * Now that we will not be generating any more work, flush any 251 * outstanding tasks. As we are called on the RPS idle path, 252 * we will reset the GPU to minimum frequencies, so the current 253 * state of the worker can be discarded. 254 */ 255 cancel_work_sync(&rps->work); 256 257 rps_reset_interrupts(rps); 258 GT_TRACE(gt, "interrupts:off\n"); 259 } 260 261 static const struct cparams { 262 u16 i; 263 u16 t; 264 u16 m; 265 u16 c; 266 } cparams[] = { 267 { 1, 1333, 301, 28664 }, 268 { 1, 1067, 294, 24460 }, 269 { 1, 800, 294, 25192 }, 270 { 0, 1333, 276, 27605 }, 271 { 0, 1067, 276, 27605 }, 272 { 0, 800, 231, 23784 }, 273 }; 274 275 static void gen5_rps_init(struct intel_rps *rps) 276 { 277 struct drm_i915_private *i915 = rps_to_i915(rps); 278 struct intel_uncore *uncore = rps_to_uncore(rps); 279 u8 fmax, fmin, fstart; 280 u32 rgvmodectl; 281 int c_m, i; 282 283 if (i915->fsb_freq <= 3200000) 284 c_m = 0; 285 else if (i915->fsb_freq <= 4800000) 286 c_m = 1; 287 else 288 c_m = 2; 289 290 for (i = 0; i < ARRAY_SIZE(cparams); i++) { 291 if (cparams[i].i == c_m && 292 cparams[i].t == DIV_ROUND_CLOSEST(i915->mem_freq, 1000)) { 293 rps->ips.m = cparams[i].m; 294 rps->ips.c = cparams[i].c; 295 break; 296 } 297 } 298 299 rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); 300 301 /* Set up min, max, and cur for interrupt handling */ 302 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; 303 fmin = (rgvmodectl & MEMMODE_FMIN_MASK); 304 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> 305 MEMMODE_FSTART_SHIFT; 306 drm_dbg(&i915->drm, "fmax: %d, fmin: %d, fstart: %d\n", 307 fmax, fmin, fstart); 308 309 rps->min_freq = fmax; 310 rps->efficient_freq = fstart; 311 rps->max_freq = fmin; 312 } 313 314 static unsigned long 315 __ips_chipset_val(struct intel_ips *ips) 316 { 317 struct intel_uncore *uncore = 318 rps_to_uncore(container_of(ips, struct intel_rps, ips)); 319 unsigned long now = jiffies_to_msecs(jiffies), dt; 320 unsigned long result; 321 u64 total, delta; 322 323 lockdep_assert_held(&mchdev_lock); 324 325 /* 326 * Prevent division-by-zero if we are asking too fast. 327 * Also, we don't get interesting results if we are polling 328 * faster than once in 10ms, so just return the saved value 329 * in such cases. 330 */ 331 dt = now - ips->last_time1; 332 if (dt <= 10) 333 return ips->chipset_power; 334 335 /* FIXME: handle per-counter overflow */ 336 total = intel_uncore_read(uncore, DMIEC); 337 total += intel_uncore_read(uncore, DDREC); 338 total += intel_uncore_read(uncore, CSIEC); 339 340 delta = total - ips->last_count1; 341 342 result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10); 343 344 ips->last_count1 = total; 345 ips->last_time1 = now; 346 347 ips->chipset_power = result; 348 349 return result; 350 } 351 352 static unsigned long ips_mch_val(struct intel_uncore *uncore) 353 { 354 unsigned int m, x, b; 355 u32 tsfs; 356 357 tsfs = intel_uncore_read(uncore, TSFS); 358 x = intel_uncore_read8(uncore, TR1); 359 360 b = tsfs & TSFS_INTR_MASK; 361 m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT; 362 363 return m * x / 127 - b; 364 } 365 366 static int _pxvid_to_vd(u8 pxvid) 367 { 368 if (pxvid == 0) 369 return 0; 370 371 if (pxvid >= 8 && pxvid < 31) 372 pxvid = 31; 373 374 return (pxvid + 2) * 125; 375 } 376 377 static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid) 378 { 379 const int vd = _pxvid_to_vd(pxvid); 380 381 if (INTEL_INFO(i915)->is_mobile) 382 return max(vd - 1125, 0); 383 384 return vd; 385 } 386 387 static void __gen5_ips_update(struct intel_ips *ips) 388 { 389 struct intel_uncore *uncore = 390 rps_to_uncore(container_of(ips, struct intel_rps, ips)); 391 u64 now, delta, dt; 392 u32 count; 393 394 lockdep_assert_held(&mchdev_lock); 395 396 now = ktime_get_raw_ns(); 397 dt = now - ips->last_time2; 398 do_div(dt, NSEC_PER_MSEC); 399 400 /* Don't divide by 0 */ 401 if (dt <= 10) 402 return; 403 404 count = intel_uncore_read(uncore, GFXEC); 405 delta = count - ips->last_count2; 406 407 ips->last_count2 = count; 408 ips->last_time2 = now; 409 410 /* More magic constants... */ 411 ips->gfx_power = div_u64(delta * 1181, dt * 10); 412 } 413 414 static void gen5_rps_update(struct intel_rps *rps) 415 { 416 spin_lock_irq(&mchdev_lock); 417 __gen5_ips_update(&rps->ips); 418 spin_unlock_irq(&mchdev_lock); 419 } 420 421 static unsigned int gen5_invert_freq(struct intel_rps *rps, 422 unsigned int val) 423 { 424 /* Invert the frequency bin into an ips delay */ 425 val = rps->max_freq - val; 426 val = rps->min_freq + val; 427 428 return val; 429 } 430 431 static int __gen5_rps_set(struct intel_rps *rps, u8 val) 432 { 433 struct intel_uncore *uncore = rps_to_uncore(rps); 434 u16 rgvswctl; 435 436 lockdep_assert_held(&mchdev_lock); 437 438 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); 439 if (rgvswctl & MEMCTL_CMD_STS) { 440 drm_dbg(&rps_to_i915(rps)->drm, 441 "gpu busy, RCS change rejected\n"); 442 return -EBUSY; /* still busy with another command */ 443 } 444 445 /* Invert the frequency bin into an ips delay */ 446 val = gen5_invert_freq(rps, val); 447 448 rgvswctl = 449 (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | 450 (val << MEMCTL_FREQ_SHIFT) | 451 MEMCTL_SFCAVM; 452 intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); 453 intel_uncore_posting_read16(uncore, MEMSWCTL); 454 455 rgvswctl |= MEMCTL_CMD_STS; 456 intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); 457 458 return 0; 459 } 460 461 static int gen5_rps_set(struct intel_rps *rps, u8 val) 462 { 463 int err; 464 465 spin_lock_irq(&mchdev_lock); 466 err = __gen5_rps_set(rps, val); 467 spin_unlock_irq(&mchdev_lock); 468 469 return err; 470 } 471 472 static unsigned long intel_pxfreq(u32 vidfreq) 473 { 474 int div = (vidfreq & 0x3f0000) >> 16; 475 int post = (vidfreq & 0x3000) >> 12; 476 int pre = (vidfreq & 0x7); 477 478 if (!pre) 479 return 0; 480 481 return div * 133333 / (pre << post); 482 } 483 484 static unsigned int init_emon(struct intel_uncore *uncore) 485 { 486 u8 pxw[16]; 487 int i; 488 489 /* Disable to program */ 490 intel_uncore_write(uncore, ECR, 0); 491 intel_uncore_posting_read(uncore, ECR); 492 493 /* Program energy weights for various events */ 494 intel_uncore_write(uncore, SDEW, 0x15040d00); 495 intel_uncore_write(uncore, CSIEW0, 0x007f0000); 496 intel_uncore_write(uncore, CSIEW1, 0x1e220004); 497 intel_uncore_write(uncore, CSIEW2, 0x04000004); 498 499 for (i = 0; i < 5; i++) 500 intel_uncore_write(uncore, PEW(i), 0); 501 for (i = 0; i < 3; i++) 502 intel_uncore_write(uncore, DEW(i), 0); 503 504 /* Program P-state weights to account for frequency power adjustment */ 505 for (i = 0; i < 16; i++) { 506 u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i)); 507 unsigned int freq = intel_pxfreq(pxvidfreq); 508 unsigned int vid = 509 (pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; 510 unsigned int val; 511 512 val = vid * vid * freq / 1000 * 255; 513 val /= 127 * 127 * 900; 514 515 pxw[i] = val; 516 } 517 /* Render standby states get 0 weight */ 518 pxw[14] = 0; 519 pxw[15] = 0; 520 521 for (i = 0; i < 4; i++) { 522 intel_uncore_write(uncore, PXW(i), 523 pxw[i * 4 + 0] << 24 | 524 pxw[i * 4 + 1] << 16 | 525 pxw[i * 4 + 2] << 8 | 526 pxw[i * 4 + 3] << 0); 527 } 528 529 /* Adjust magic regs to magic values (more experimental results) */ 530 intel_uncore_write(uncore, OGW0, 0); 531 intel_uncore_write(uncore, OGW1, 0); 532 intel_uncore_write(uncore, EG0, 0x00007f00); 533 intel_uncore_write(uncore, EG1, 0x0000000e); 534 intel_uncore_write(uncore, EG2, 0x000e0000); 535 intel_uncore_write(uncore, EG3, 0x68000300); 536 intel_uncore_write(uncore, EG4, 0x42000000); 537 intel_uncore_write(uncore, EG5, 0x00140031); 538 intel_uncore_write(uncore, EG6, 0); 539 intel_uncore_write(uncore, EG7, 0); 540 541 for (i = 0; i < 8; i++) 542 intel_uncore_write(uncore, PXWL(i), 0); 543 544 /* Enable PMON + select events */ 545 intel_uncore_write(uncore, ECR, 0x80000019); 546 547 return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK; 548 } 549 550 static bool gen5_rps_enable(struct intel_rps *rps) 551 { 552 struct drm_i915_private *i915 = rps_to_i915(rps); 553 struct intel_uncore *uncore = rps_to_uncore(rps); 554 u8 fstart, vstart; 555 u32 rgvmodectl; 556 557 spin_lock_irq(&mchdev_lock); 558 559 rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); 560 561 /* Enable temp reporting */ 562 intel_uncore_write16(uncore, PMMISC, 563 intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN); 564 intel_uncore_write16(uncore, TSC1, 565 intel_uncore_read16(uncore, TSC1) | TSE); 566 567 /* 100ms RC evaluation intervals */ 568 intel_uncore_write(uncore, RCUPEI, 100000); 569 intel_uncore_write(uncore, RCDNEI, 100000); 570 571 /* Set max/min thresholds to 90ms and 80ms respectively */ 572 intel_uncore_write(uncore, RCBMAXAVG, 90000); 573 intel_uncore_write(uncore, RCBMINAVG, 80000); 574 575 intel_uncore_write(uncore, MEMIHYST, 1); 576 577 /* Set up min, max, and cur for interrupt handling */ 578 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> 579 MEMMODE_FSTART_SHIFT; 580 581 vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) & 582 PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; 583 584 intel_uncore_write(uncore, 585 MEMINTREN, 586 MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); 587 588 intel_uncore_write(uncore, VIDSTART, vstart); 589 intel_uncore_posting_read(uncore, VIDSTART); 590 591 rgvmodectl |= MEMMODE_SWMODE_EN; 592 intel_uncore_write(uncore, MEMMODECTL, rgvmodectl); 593 594 if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) & 595 MEMCTL_CMD_STS) == 0, 10)) 596 drm_err(&uncore->i915->drm, 597 "stuck trying to change perf mode\n"); 598 mdelay(1); 599 600 __gen5_rps_set(rps, rps->cur_freq); 601 602 rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC); 603 rps->ips.last_count1 += intel_uncore_read(uncore, DDREC); 604 rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC); 605 rps->ips.last_time1 = jiffies_to_msecs(jiffies); 606 607 rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC); 608 rps->ips.last_time2 = ktime_get_raw_ns(); 609 610 spin_lock(&i915->irq_lock); 611 ilk_enable_display_irq(i915, DE_PCU_EVENT); 612 spin_unlock(&i915->irq_lock); 613 614 spin_unlock_irq(&mchdev_lock); 615 616 rps->ips.corr = init_emon(uncore); 617 618 return true; 619 } 620 621 static void gen5_rps_disable(struct intel_rps *rps) 622 { 623 struct drm_i915_private *i915 = rps_to_i915(rps); 624 struct intel_uncore *uncore = rps_to_uncore(rps); 625 u16 rgvswctl; 626 627 spin_lock_irq(&mchdev_lock); 628 629 spin_lock(&i915->irq_lock); 630 ilk_disable_display_irq(i915, DE_PCU_EVENT); 631 spin_unlock(&i915->irq_lock); 632 633 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); 634 635 /* Ack interrupts, disable EFC interrupt */ 636 intel_uncore_rmw(uncore, MEMINTREN, MEMINT_EVAL_CHG_EN, 0); 637 intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); 638 639 /* Go back to the starting frequency */ 640 __gen5_rps_set(rps, rps->idle_freq); 641 mdelay(1); 642 rgvswctl |= MEMCTL_CMD_STS; 643 intel_uncore_write(uncore, MEMSWCTL, rgvswctl); 644 mdelay(1); 645 646 spin_unlock_irq(&mchdev_lock); 647 } 648 649 static u32 rps_limits(struct intel_rps *rps, u8 val) 650 { 651 u32 limits; 652 653 /* 654 * Only set the down limit when we've reached the lowest level to avoid 655 * getting more interrupts, otherwise leave this clear. This prevents a 656 * race in the hw when coming out of rc6: There's a tiny window where 657 * the hw runs at the minimal clock before selecting the desired 658 * frequency, if the down threshold expires in that window we will not 659 * receive a down interrupt. 660 */ 661 if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) { 662 limits = rps->max_freq_softlimit << 23; 663 if (val <= rps->min_freq_softlimit) 664 limits |= rps->min_freq_softlimit << 14; 665 } else { 666 limits = rps->max_freq_softlimit << 24; 667 if (val <= rps->min_freq_softlimit) 668 limits |= rps->min_freq_softlimit << 16; 669 } 670 671 return limits; 672 } 673 674 static void rps_set_power(struct intel_rps *rps, int new_power) 675 { 676 struct intel_gt *gt = rps_to_gt(rps); 677 struct intel_uncore *uncore = gt->uncore; 678 u32 ei_up = 0, ei_down = 0; 679 680 lockdep_assert_held(&rps->power.mutex); 681 682 if (new_power == rps->power.mode) 683 return; 684 685 /* Note the units here are not exactly 1us, but 1280ns. */ 686 switch (new_power) { 687 case LOW_POWER: 688 ei_up = 16000; 689 ei_down = 32000; 690 break; 691 692 case BETWEEN: 693 ei_up = 13000; 694 ei_down = 32000; 695 break; 696 697 case HIGH_POWER: 698 ei_up = 10000; 699 ei_down = 32000; 700 break; 701 } 702 703 /* When byt can survive without system hang with dynamic 704 * sw freq adjustments, this restriction can be lifted. 705 */ 706 if (IS_VALLEYVIEW(gt->i915)) 707 goto skip_hw_write; 708 709 GT_TRACE(gt, 710 "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n", 711 new_power, 712 rps->power.up_threshold, ei_up, 713 rps->power.down_threshold, ei_down); 714 715 set(uncore, GEN6_RP_UP_EI, 716 intel_gt_ns_to_pm_interval(gt, ei_up * 1000)); 717 set(uncore, GEN6_RP_UP_THRESHOLD, 718 intel_gt_ns_to_pm_interval(gt, 719 ei_up * rps->power.up_threshold * 10)); 720 721 set(uncore, GEN6_RP_DOWN_EI, 722 intel_gt_ns_to_pm_interval(gt, ei_down * 1000)); 723 set(uncore, GEN6_RP_DOWN_THRESHOLD, 724 intel_gt_ns_to_pm_interval(gt, 725 ei_down * 726 rps->power.down_threshold * 10)); 727 728 set(uncore, GEN6_RP_CONTROL, 729 (GRAPHICS_VER(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | 730 GEN6_RP_MEDIA_HW_NORMAL_MODE | 731 GEN6_RP_MEDIA_IS_GFX | 732 GEN6_RP_ENABLE | 733 GEN6_RP_UP_BUSY_AVG | 734 GEN6_RP_DOWN_IDLE_AVG); 735 736 skip_hw_write: 737 rps->power.mode = new_power; 738 } 739 740 static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val) 741 { 742 int new_power; 743 744 new_power = rps->power.mode; 745 switch (rps->power.mode) { 746 case LOW_POWER: 747 if (val > rps->efficient_freq + 1 && 748 val > rps->cur_freq) 749 new_power = BETWEEN; 750 break; 751 752 case BETWEEN: 753 if (val <= rps->efficient_freq && 754 val < rps->cur_freq) 755 new_power = LOW_POWER; 756 else if (val >= rps->rp0_freq && 757 val > rps->cur_freq) 758 new_power = HIGH_POWER; 759 break; 760 761 case HIGH_POWER: 762 if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && 763 val < rps->cur_freq) 764 new_power = BETWEEN; 765 break; 766 } 767 /* Max/min bins are special */ 768 if (val <= rps->min_freq_softlimit) 769 new_power = LOW_POWER; 770 if (val >= rps->max_freq_softlimit) 771 new_power = HIGH_POWER; 772 773 mutex_lock(&rps->power.mutex); 774 if (rps->power.interactive) 775 new_power = HIGH_POWER; 776 rps_set_power(rps, new_power); 777 mutex_unlock(&rps->power.mutex); 778 } 779 780 void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive) 781 { 782 GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n", 783 str_yes_no(interactive)); 784 785 mutex_lock(&rps->power.mutex); 786 if (interactive) { 787 if (!rps->power.interactive++ && intel_rps_is_active(rps)) 788 rps_set_power(rps, HIGH_POWER); 789 } else { 790 GEM_BUG_ON(!rps->power.interactive); 791 rps->power.interactive--; 792 } 793 mutex_unlock(&rps->power.mutex); 794 } 795 796 static int gen6_rps_set(struct intel_rps *rps, u8 val) 797 { 798 struct intel_uncore *uncore = rps_to_uncore(rps); 799 struct drm_i915_private *i915 = rps_to_i915(rps); 800 u32 swreq; 801 802 GEM_BUG_ON(rps_uses_slpc(rps)); 803 804 if (GRAPHICS_VER(i915) >= 9) 805 swreq = GEN9_FREQUENCY(val); 806 else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) 807 swreq = HSW_FREQUENCY(val); 808 else 809 swreq = (GEN6_FREQUENCY(val) | 810 GEN6_OFFSET(0) | 811 GEN6_AGGRESSIVE_TURBO); 812 set(uncore, GEN6_RPNSWREQ, swreq); 813 814 GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d, swreq:%x\n", 815 val, intel_gpu_freq(rps, val), swreq); 816 817 return 0; 818 } 819 820 static int vlv_rps_set(struct intel_rps *rps, u8 val) 821 { 822 struct drm_i915_private *i915 = rps_to_i915(rps); 823 int err; 824 825 vlv_punit_get(i915); 826 err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val); 827 vlv_punit_put(i915); 828 829 GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n", 830 val, intel_gpu_freq(rps, val)); 831 832 return err; 833 } 834 835 static int rps_set(struct intel_rps *rps, u8 val, bool update) 836 { 837 struct drm_i915_private *i915 = rps_to_i915(rps); 838 int err; 839 840 if (val == rps->last_freq) 841 return 0; 842 843 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) 844 err = vlv_rps_set(rps, val); 845 else if (GRAPHICS_VER(i915) >= 6) 846 err = gen6_rps_set(rps, val); 847 else 848 err = gen5_rps_set(rps, val); 849 if (err) 850 return err; 851 852 if (update && GRAPHICS_VER(i915) >= 6) 853 gen6_rps_set_thresholds(rps, val); 854 rps->last_freq = val; 855 856 return 0; 857 } 858 859 void intel_rps_unpark(struct intel_rps *rps) 860 { 861 if (!intel_rps_is_enabled(rps)) 862 return; 863 864 GT_TRACE(rps_to_gt(rps), "unpark:%x\n", rps->cur_freq); 865 866 /* 867 * Use the user's desired frequency as a guide, but for better 868 * performance, jump directly to RPe as our starting frequency. 869 */ 870 mutex_lock(&rps->lock); 871 872 intel_rps_set_active(rps); 873 intel_rps_set(rps, 874 clamp(rps->cur_freq, 875 rps->min_freq_softlimit, 876 rps->max_freq_softlimit)); 877 878 mutex_unlock(&rps->lock); 879 880 rps->pm_iir = 0; 881 if (intel_rps_has_interrupts(rps)) 882 rps_enable_interrupts(rps); 883 if (intel_rps_uses_timer(rps)) 884 rps_start_timer(rps); 885 886 if (GRAPHICS_VER(rps_to_i915(rps)) == 5) 887 gen5_rps_update(rps); 888 } 889 890 void intel_rps_park(struct intel_rps *rps) 891 { 892 int adj; 893 894 if (!intel_rps_is_enabled(rps)) 895 return; 896 897 if (!intel_rps_clear_active(rps)) 898 return; 899 900 if (intel_rps_uses_timer(rps)) 901 rps_stop_timer(rps); 902 if (intel_rps_has_interrupts(rps)) 903 rps_disable_interrupts(rps); 904 905 if (rps->last_freq <= rps->idle_freq) 906 return; 907 908 /* 909 * The punit delays the write of the frequency and voltage until it 910 * determines the GPU is awake. During normal usage we don't want to 911 * waste power changing the frequency if the GPU is sleeping (rc6). 912 * However, the GPU and driver is now idle and we do not want to delay 913 * switching to minimum voltage (reducing power whilst idle) as we do 914 * not expect to be woken in the near future and so must flush the 915 * change by waking the device. 916 * 917 * We choose to take the media powerwell (either would do to trick the 918 * punit into committing the voltage change) as that takes a lot less 919 * power than the render powerwell. 920 */ 921 intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA); 922 rps_set(rps, rps->idle_freq, false); 923 intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA); 924 925 /* 926 * Since we will try and restart from the previously requested 927 * frequency on unparking, treat this idle point as a downclock 928 * interrupt and reduce the frequency for resume. If we park/unpark 929 * more frequently than the rps worker can run, we will not respond 930 * to any EI and never see a change in frequency. 931 * 932 * (Note we accommodate Cherryview's limitation of only using an 933 * even bin by applying it to all.) 934 */ 935 adj = rps->last_adj; 936 if (adj < 0) 937 adj *= 2; 938 else /* CHV needs even encode values */ 939 adj = -2; 940 rps->last_adj = adj; 941 rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq); 942 if (rps->cur_freq < rps->efficient_freq) { 943 rps->cur_freq = rps->efficient_freq; 944 rps->last_adj = 0; 945 } 946 947 GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq); 948 } 949 950 u32 intel_rps_get_boost_frequency(struct intel_rps *rps) 951 { 952 struct intel_guc_slpc *slpc; 953 954 if (rps_uses_slpc(rps)) { 955 slpc = rps_to_slpc(rps); 956 957 return slpc->boost_freq; 958 } else { 959 return intel_gpu_freq(rps, rps->boost_freq); 960 } 961 } 962 963 static int rps_set_boost_freq(struct intel_rps *rps, u32 val) 964 { 965 bool boost = false; 966 967 /* Validate against (static) hardware limits */ 968 val = intel_freq_opcode(rps, val); 969 if (val < rps->min_freq || val > rps->max_freq) 970 return -EINVAL; 971 972 mutex_lock(&rps->lock); 973 if (val != rps->boost_freq) { 974 rps->boost_freq = val; 975 boost = atomic_read(&rps->num_waiters); 976 } 977 mutex_unlock(&rps->lock); 978 if (boost) 979 queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work); 980 981 return 0; 982 } 983 984 int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq) 985 { 986 struct intel_guc_slpc *slpc; 987 988 if (rps_uses_slpc(rps)) { 989 slpc = rps_to_slpc(rps); 990 991 return intel_guc_slpc_set_boost_freq(slpc, freq); 992 } else { 993 return rps_set_boost_freq(rps, freq); 994 } 995 } 996 997 void intel_rps_dec_waiters(struct intel_rps *rps) 998 { 999 struct intel_guc_slpc *slpc; 1000 1001 if (rps_uses_slpc(rps)) { 1002 slpc = rps_to_slpc(rps); 1003 1004 /* Don't decrement num_waiters for req where increment was skipped */ 1005 if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING) 1006 return; 1007 1008 intel_guc_slpc_dec_waiters(slpc); 1009 } else { 1010 atomic_dec(&rps->num_waiters); 1011 } 1012 } 1013 1014 void intel_rps_boost(struct i915_request *rq) 1015 { 1016 struct intel_guc_slpc *slpc; 1017 1018 if (i915_request_signaled(rq) || i915_request_has_waitboost(rq)) 1019 return; 1020 1021 /* Waitboost is not needed for contexts marked with a Freq hint */ 1022 if (test_bit(CONTEXT_LOW_LATENCY, &rq->context->flags)) 1023 return; 1024 1025 /* Serializes with i915_request_retire() */ 1026 if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) { 1027 struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; 1028 1029 if (rps_uses_slpc(rps)) { 1030 slpc = rps_to_slpc(rps); 1031 1032 /* Waitboost should not be done with power saving profile */ 1033 if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING) 1034 return; 1035 1036 /* Return if old value is non zero */ 1037 if (!atomic_fetch_inc(&slpc->num_waiters)) { 1038 /* 1039 * Skip queuing boost work if frequency is already boosted, 1040 * but still increment num_waiters. 1041 */ 1042 if (slpc->min_freq_softlimit >= slpc->boost_freq) 1043 return; 1044 1045 GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", 1046 rq->fence.context, rq->fence.seqno); 1047 queue_work(rps_to_gt(rps)->i915->unordered_wq, 1048 &slpc->boost_work); 1049 } 1050 1051 return; 1052 } 1053 1054 if (atomic_fetch_inc(&rps->num_waiters)) 1055 return; 1056 1057 if (!intel_rps_is_active(rps)) 1058 return; 1059 1060 GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", 1061 rq->fence.context, rq->fence.seqno); 1062 1063 if (READ_ONCE(rps->cur_freq) < rps->boost_freq) 1064 queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work); 1065 1066 WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */ 1067 } 1068 } 1069 1070 int intel_rps_set(struct intel_rps *rps, u8 val) 1071 { 1072 int err; 1073 1074 lockdep_assert_held(&rps->lock); 1075 GEM_BUG_ON(val > rps->max_freq); 1076 GEM_BUG_ON(val < rps->min_freq); 1077 1078 if (intel_rps_is_active(rps)) { 1079 err = rps_set(rps, val, true); 1080 if (err) 1081 return err; 1082 1083 /* 1084 * Make sure we continue to get interrupts 1085 * until we hit the minimum or maximum frequencies. 1086 */ 1087 if (intel_rps_has_interrupts(rps)) { 1088 struct intel_uncore *uncore = rps_to_uncore(rps); 1089 1090 set(uncore, 1091 GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val)); 1092 1093 set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val)); 1094 } 1095 } 1096 1097 rps->cur_freq = val; 1098 return 0; 1099 } 1100 1101 static u32 intel_rps_read_state_cap(struct intel_rps *rps) 1102 { 1103 struct drm_i915_private *i915 = rps_to_i915(rps); 1104 struct intel_uncore *uncore = rps_to_uncore(rps); 1105 1106 if (IS_GEN9_LP(i915)) 1107 return intel_uncore_read(uncore, BXT_RP_STATE_CAP); 1108 else 1109 return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); 1110 } 1111 1112 static void 1113 mtl_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) 1114 { 1115 struct intel_uncore *uncore = rps_to_uncore(rps); 1116 u32 rp_state_cap = rps_to_gt(rps)->type == GT_MEDIA ? 1117 intel_uncore_read(uncore, MTL_MEDIAP_STATE_CAP) : 1118 intel_uncore_read(uncore, MTL_RP_STATE_CAP); 1119 u32 rpe = rps_to_gt(rps)->type == GT_MEDIA ? 1120 intel_uncore_read(uncore, MTL_MPE_FREQUENCY) : 1121 intel_uncore_read(uncore, MTL_GT_RPE_FREQUENCY); 1122 1123 /* MTL values are in units of 16.67 MHz */ 1124 caps->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, rp_state_cap); 1125 caps->min_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, rp_state_cap); 1126 caps->rp1_freq = REG_FIELD_GET(MTL_RPE_MASK, rpe); 1127 } 1128 1129 static void 1130 __gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) 1131 { 1132 struct drm_i915_private *i915 = rps_to_i915(rps); 1133 u32 rp_state_cap; 1134 1135 rp_state_cap = intel_rps_read_state_cap(rps); 1136 1137 /* static values from HW: RP0 > RP1 > RPn (min_freq) */ 1138 if (IS_GEN9_LP(i915)) { 1139 caps->rp0_freq = (rp_state_cap >> 16) & 0xff; 1140 caps->rp1_freq = (rp_state_cap >> 8) & 0xff; 1141 caps->min_freq = (rp_state_cap >> 0) & 0xff; 1142 } else { 1143 caps->rp0_freq = (rp_state_cap >> 0) & 0xff; 1144 if (GRAPHICS_VER(i915) >= 10) 1145 caps->rp1_freq = REG_FIELD_GET(RPE_MASK, 1146 intel_uncore_read(to_gt(i915)->uncore, 1147 GEN10_FREQ_INFO_REC)); 1148 else 1149 caps->rp1_freq = (rp_state_cap >> 8) & 0xff; 1150 caps->min_freq = (rp_state_cap >> 16) & 0xff; 1151 } 1152 1153 if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { 1154 /* 1155 * In this case rp_state_cap register reports frequencies in 1156 * units of 50 MHz. Convert these to the actual "hw unit", i.e. 1157 * units of 16.67 MHz 1158 */ 1159 caps->rp0_freq *= GEN9_FREQ_SCALER; 1160 caps->rp1_freq *= GEN9_FREQ_SCALER; 1161 caps->min_freq *= GEN9_FREQ_SCALER; 1162 } 1163 } 1164 1165 /** 1166 * gen6_rps_get_freq_caps - Get freq caps exposed by HW 1167 * @rps: the intel_rps structure 1168 * @caps: returned freq caps 1169 * 1170 * Returned "caps" frequencies should be converted to MHz using 1171 * intel_gpu_freq() 1172 */ 1173 void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) 1174 { 1175 struct drm_i915_private *i915 = rps_to_i915(rps); 1176 1177 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) 1178 return mtl_get_freq_caps(rps, caps); 1179 else 1180 return __gen6_rps_get_freq_caps(rps, caps); 1181 } 1182 1183 static void gen6_rps_init(struct intel_rps *rps) 1184 { 1185 struct drm_i915_private *i915 = rps_to_i915(rps); 1186 struct intel_rps_freq_caps caps; 1187 1188 gen6_rps_get_freq_caps(rps, &caps); 1189 rps->rp0_freq = caps.rp0_freq; 1190 rps->rp1_freq = caps.rp1_freq; 1191 rps->min_freq = caps.min_freq; 1192 1193 /* hw_max = RP0 until we check for overclocking */ 1194 rps->max_freq = rps->rp0_freq; 1195 1196 rps->efficient_freq = rps->rp1_freq; 1197 if (IS_HASWELL(i915) || IS_BROADWELL(i915) || 1198 IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { 1199 u32 ddcc_status = 0; 1200 u32 mult = 1; 1201 1202 if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) 1203 mult = GEN9_FREQ_SCALER; 1204 if (snb_pcode_read(rps_to_gt(rps)->uncore, 1205 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, 1206 &ddcc_status, NULL) == 0) 1207 rps->efficient_freq = 1208 clamp_t(u32, 1209 ((ddcc_status >> 8) & 0xff) * mult, 1210 rps->min_freq, 1211 rps->max_freq); 1212 } 1213 } 1214 1215 static bool rps_reset(struct intel_rps *rps) 1216 { 1217 struct drm_i915_private *i915 = rps_to_i915(rps); 1218 1219 /* force a reset */ 1220 rps->power.mode = -1; 1221 rps->last_freq = -1; 1222 1223 if (rps_set(rps, rps->min_freq, true)) { 1224 drm_err(&i915->drm, "Failed to reset RPS to initial values\n"); 1225 return false; 1226 } 1227 1228 rps->cur_freq = rps->min_freq; 1229 return true; 1230 } 1231 1232 /* See the Gen9_GT_PM_Programming_Guide doc for the below */ 1233 static bool gen9_rps_enable(struct intel_rps *rps) 1234 { 1235 struct intel_gt *gt = rps_to_gt(rps); 1236 struct intel_uncore *uncore = gt->uncore; 1237 1238 /* Program defaults and thresholds for RPS */ 1239 if (GRAPHICS_VER(gt->i915) == 9) 1240 intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, 1241 GEN9_FREQUENCY(rps->rp1_freq)); 1242 1243 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa); 1244 1245 rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD; 1246 1247 return rps_reset(rps); 1248 } 1249 1250 static bool gen8_rps_enable(struct intel_rps *rps) 1251 { 1252 struct intel_uncore *uncore = rps_to_uncore(rps); 1253 1254 intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, 1255 HSW_FREQUENCY(rps->rp1_freq)); 1256 1257 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1258 1259 rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD; 1260 1261 return rps_reset(rps); 1262 } 1263 1264 static bool gen6_rps_enable(struct intel_rps *rps) 1265 { 1266 struct intel_uncore *uncore = rps_to_uncore(rps); 1267 1268 /* Power down if completely idle for over 50ms */ 1269 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000); 1270 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1271 1272 rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | 1273 GEN6_PM_RP_DOWN_THRESHOLD | 1274 GEN6_PM_RP_DOWN_TIMEOUT); 1275 1276 return rps_reset(rps); 1277 } 1278 1279 static int chv_rps_max_freq(struct intel_rps *rps) 1280 { 1281 struct drm_i915_private *i915 = rps_to_i915(rps); 1282 struct intel_gt *gt = rps_to_gt(rps); 1283 u32 val; 1284 1285 val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); 1286 1287 switch (gt->info.sseu.eu_total) { 1288 case 8: 1289 /* (2 * 4) config */ 1290 val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT; 1291 break; 1292 case 12: 1293 /* (2 * 6) config */ 1294 val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT; 1295 break; 1296 case 16: 1297 /* (2 * 8) config */ 1298 default: 1299 /* Setting (2 * 8) Min RP0 for any other combination */ 1300 val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT; 1301 break; 1302 } 1303 1304 return val & FB_GFX_FREQ_FUSE_MASK; 1305 } 1306 1307 static int chv_rps_rpe_freq(struct intel_rps *rps) 1308 { 1309 struct drm_i915_private *i915 = rps_to_i915(rps); 1310 u32 val; 1311 1312 val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG); 1313 val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT; 1314 1315 return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; 1316 } 1317 1318 static int chv_rps_guar_freq(struct intel_rps *rps) 1319 { 1320 struct drm_i915_private *i915 = rps_to_i915(rps); 1321 u32 val; 1322 1323 val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); 1324 1325 return val & FB_GFX_FREQ_FUSE_MASK; 1326 } 1327 1328 static u32 chv_rps_min_freq(struct intel_rps *rps) 1329 { 1330 struct drm_i915_private *i915 = rps_to_i915(rps); 1331 u32 val; 1332 1333 val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE); 1334 val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT; 1335 1336 return val & FB_GFX_FREQ_FUSE_MASK; 1337 } 1338 1339 static bool chv_rps_enable(struct intel_rps *rps) 1340 { 1341 struct intel_uncore *uncore = rps_to_uncore(rps); 1342 struct drm_i915_private *i915 = rps_to_i915(rps); 1343 u32 val; 1344 1345 /* 1: Program defaults and thresholds for RPS*/ 1346 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); 1347 intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); 1348 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); 1349 intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); 1350 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); 1351 1352 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1353 1354 /* 2: Enable RPS */ 1355 intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, 1356 GEN6_RP_MEDIA_HW_NORMAL_MODE | 1357 GEN6_RP_MEDIA_IS_GFX | 1358 GEN6_RP_ENABLE | 1359 GEN6_RP_UP_BUSY_AVG | 1360 GEN6_RP_DOWN_IDLE_AVG); 1361 1362 rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | 1363 GEN6_PM_RP_DOWN_THRESHOLD | 1364 GEN6_PM_RP_DOWN_TIMEOUT); 1365 1366 /* Setting Fixed Bias */ 1367 vlv_punit_get(i915); 1368 1369 val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50; 1370 vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); 1371 1372 val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 1373 1374 vlv_punit_put(i915); 1375 1376 /* RPS code assumes GPLL is used */ 1377 drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0, 1378 "GPLL not enabled\n"); 1379 1380 drm_dbg(&i915->drm, "GPLL enabled? %s\n", 1381 str_yes_no(val & GPLLENABLE)); 1382 drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val); 1383 1384 return rps_reset(rps); 1385 } 1386 1387 static int vlv_rps_guar_freq(struct intel_rps *rps) 1388 { 1389 struct drm_i915_private *i915 = rps_to_i915(rps); 1390 u32 val, rp1; 1391 1392 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); 1393 1394 rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK; 1395 rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; 1396 1397 return rp1; 1398 } 1399 1400 static int vlv_rps_max_freq(struct intel_rps *rps) 1401 { 1402 struct drm_i915_private *i915 = rps_to_i915(rps); 1403 u32 val, rp0; 1404 1405 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); 1406 1407 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; 1408 /* Clamp to max */ 1409 rp0 = min_t(u32, rp0, 0xea); 1410 1411 return rp0; 1412 } 1413 1414 static int vlv_rps_rpe_freq(struct intel_rps *rps) 1415 { 1416 struct drm_i915_private *i915 = rps_to_i915(rps); 1417 u32 val, rpe; 1418 1419 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO); 1420 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; 1421 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI); 1422 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; 1423 1424 return rpe; 1425 } 1426 1427 static int vlv_rps_min_freq(struct intel_rps *rps) 1428 { 1429 struct drm_i915_private *i915 = rps_to_i915(rps); 1430 u32 val; 1431 1432 val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff; 1433 /* 1434 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value 1435 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on 1436 * a BYT-M B0 the above register contains 0xbf. Moreover when setting 1437 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 1438 * to make sure it matches what Punit accepts. 1439 */ 1440 return max_t(u32, val, 0xc0); 1441 } 1442 1443 static bool vlv_rps_enable(struct intel_rps *rps) 1444 { 1445 struct intel_uncore *uncore = rps_to_uncore(rps); 1446 struct drm_i915_private *i915 = rps_to_i915(rps); 1447 u32 val; 1448 1449 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); 1450 intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); 1451 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); 1452 intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); 1453 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); 1454 1455 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1456 1457 intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, 1458 GEN6_RP_MEDIA_TURBO | 1459 GEN6_RP_MEDIA_HW_NORMAL_MODE | 1460 GEN6_RP_MEDIA_IS_GFX | 1461 GEN6_RP_ENABLE | 1462 GEN6_RP_UP_BUSY_AVG | 1463 GEN6_RP_DOWN_IDLE_CONT); 1464 1465 /* WaGsvRC0ResidencyMethod:vlv */ 1466 rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED; 1467 1468 vlv_punit_get(i915); 1469 1470 /* Setting Fixed Bias */ 1471 val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875; 1472 vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); 1473 1474 val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 1475 1476 vlv_punit_put(i915); 1477 1478 /* RPS code assumes GPLL is used */ 1479 drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0, 1480 "GPLL not enabled\n"); 1481 1482 drm_dbg(&i915->drm, "GPLL enabled? %s\n", 1483 str_yes_no(val & GPLLENABLE)); 1484 drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val); 1485 1486 return rps_reset(rps); 1487 } 1488 1489 static unsigned long __ips_gfx_val(struct intel_ips *ips) 1490 { 1491 struct intel_rps *rps = container_of(ips, typeof(*rps), ips); 1492 struct intel_uncore *uncore = rps_to_uncore(rps); 1493 unsigned int t, state1, state2; 1494 u32 pxvid, ext_v; 1495 u64 corr, corr2; 1496 1497 lockdep_assert_held(&mchdev_lock); 1498 1499 pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq)); 1500 pxvid = (pxvid >> 24) & 0x7f; 1501 ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid); 1502 1503 state1 = ext_v; 1504 1505 /* Revel in the empirically derived constants */ 1506 1507 /* Correction factor in 1/100000 units */ 1508 t = ips_mch_val(uncore); 1509 if (t > 80) 1510 corr = t * 2349 + 135940; 1511 else if (t >= 50) 1512 corr = t * 964 + 29317; 1513 else /* < 50 */ 1514 corr = t * 301 + 1004; 1515 1516 corr = div_u64(corr * 150142 * state1, 10000) - 78642; 1517 corr2 = div_u64(corr, 100000) * ips->corr; 1518 1519 state2 = div_u64(corr2 * state1, 10000); 1520 state2 /= 100; /* convert to mW */ 1521 1522 __gen5_ips_update(ips); 1523 1524 return ips->gfx_power + state2; 1525 } 1526 1527 static bool has_busy_stats(struct intel_rps *rps) 1528 { 1529 struct intel_engine_cs *engine; 1530 enum intel_engine_id id; 1531 1532 for_each_engine(engine, rps_to_gt(rps), id) { 1533 if (!intel_engine_supports_stats(engine)) 1534 return false; 1535 } 1536 1537 return true; 1538 } 1539 1540 void intel_rps_enable(struct intel_rps *rps) 1541 { 1542 struct drm_i915_private *i915 = rps_to_i915(rps); 1543 struct intel_uncore *uncore = rps_to_uncore(rps); 1544 bool enabled = false; 1545 1546 if (!HAS_RPS(i915)) 1547 return; 1548 1549 if (rps_uses_slpc(rps)) 1550 return; 1551 1552 intel_gt_check_clock_frequency(rps_to_gt(rps)); 1553 1554 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 1555 if (rps->max_freq <= rps->min_freq) 1556 /* leave disabled, no room for dynamic reclocking */; 1557 else if (IS_CHERRYVIEW(i915)) 1558 enabled = chv_rps_enable(rps); 1559 else if (IS_VALLEYVIEW(i915)) 1560 enabled = vlv_rps_enable(rps); 1561 else if (GRAPHICS_VER(i915) >= 9) 1562 enabled = gen9_rps_enable(rps); 1563 else if (GRAPHICS_VER(i915) >= 8) 1564 enabled = gen8_rps_enable(rps); 1565 else if (GRAPHICS_VER(i915) >= 6) 1566 enabled = gen6_rps_enable(rps); 1567 else if (IS_IRONLAKE_M(i915)) 1568 enabled = gen5_rps_enable(rps); 1569 else 1570 MISSING_CASE(GRAPHICS_VER(i915)); 1571 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 1572 if (!enabled) 1573 return; 1574 1575 GT_TRACE(rps_to_gt(rps), 1576 "min:%x, max:%x, freq:[%d, %d], thresholds:[%u, %u]\n", 1577 rps->min_freq, rps->max_freq, 1578 intel_gpu_freq(rps, rps->min_freq), 1579 intel_gpu_freq(rps, rps->max_freq), 1580 rps->power.up_threshold, 1581 rps->power.down_threshold); 1582 1583 GEM_BUG_ON(rps->max_freq < rps->min_freq); 1584 GEM_BUG_ON(rps->idle_freq > rps->max_freq); 1585 1586 GEM_BUG_ON(rps->efficient_freq < rps->min_freq); 1587 GEM_BUG_ON(rps->efficient_freq > rps->max_freq); 1588 1589 if (has_busy_stats(rps)) 1590 intel_rps_set_timer(rps); 1591 else if (GRAPHICS_VER(i915) >= 6 && GRAPHICS_VER(i915) <= 11) 1592 intel_rps_set_interrupts(rps); 1593 else 1594 /* Ironlake currently uses intel_ips.ko */ {} 1595 1596 intel_rps_set_enabled(rps); 1597 } 1598 1599 static void gen6_rps_disable(struct intel_rps *rps) 1600 { 1601 set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0); 1602 } 1603 1604 void intel_rps_disable(struct intel_rps *rps) 1605 { 1606 struct drm_i915_private *i915 = rps_to_i915(rps); 1607 1608 if (!intel_rps_is_enabled(rps)) 1609 return; 1610 1611 intel_rps_clear_enabled(rps); 1612 intel_rps_clear_interrupts(rps); 1613 intel_rps_clear_timer(rps); 1614 1615 if (GRAPHICS_VER(i915) >= 6) 1616 gen6_rps_disable(rps); 1617 else if (IS_IRONLAKE_M(i915)) 1618 gen5_rps_disable(rps); 1619 } 1620 1621 static int byt_gpu_freq(struct intel_rps *rps, int val) 1622 { 1623 /* 1624 * N = val - 0xb7 1625 * Slow = Fast = GPLL ref * N 1626 */ 1627 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); 1628 } 1629 1630 static int byt_freq_opcode(struct intel_rps *rps, int val) 1631 { 1632 return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; 1633 } 1634 1635 static int chv_gpu_freq(struct intel_rps *rps, int val) 1636 { 1637 /* 1638 * N = val / 2 1639 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 1640 */ 1641 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); 1642 } 1643 1644 static int chv_freq_opcode(struct intel_rps *rps, int val) 1645 { 1646 /* CHV needs even values */ 1647 return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; 1648 } 1649 1650 int intel_gpu_freq(struct intel_rps *rps, int val) 1651 { 1652 struct drm_i915_private *i915 = rps_to_i915(rps); 1653 1654 if (GRAPHICS_VER(i915) >= 9) 1655 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, 1656 GEN9_FREQ_SCALER); 1657 else if (IS_CHERRYVIEW(i915)) 1658 return chv_gpu_freq(rps, val); 1659 else if (IS_VALLEYVIEW(i915)) 1660 return byt_gpu_freq(rps, val); 1661 else if (GRAPHICS_VER(i915) >= 6) 1662 return val * GT_FREQUENCY_MULTIPLIER; 1663 else 1664 return val; 1665 } 1666 1667 int intel_freq_opcode(struct intel_rps *rps, int val) 1668 { 1669 struct drm_i915_private *i915 = rps_to_i915(rps); 1670 1671 if (GRAPHICS_VER(i915) >= 9) 1672 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, 1673 GT_FREQUENCY_MULTIPLIER); 1674 else if (IS_CHERRYVIEW(i915)) 1675 return chv_freq_opcode(rps, val); 1676 else if (IS_VALLEYVIEW(i915)) 1677 return byt_freq_opcode(rps, val); 1678 else if (GRAPHICS_VER(i915) >= 6) 1679 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); 1680 else 1681 return val; 1682 } 1683 1684 static void vlv_init_gpll_ref_freq(struct intel_rps *rps) 1685 { 1686 struct drm_i915_private *i915 = rps_to_i915(rps); 1687 1688 rps->gpll_ref_freq = 1689 vlv_get_cck_clock(i915, "GPLL ref", 1690 CCK_GPLL_CLOCK_CONTROL, 1691 i915->czclk_freq); 1692 1693 drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n", 1694 rps->gpll_ref_freq); 1695 } 1696 1697 static void vlv_rps_init(struct intel_rps *rps) 1698 { 1699 struct drm_i915_private *i915 = rps_to_i915(rps); 1700 1701 vlv_iosf_sb_get(i915, 1702 BIT(VLV_IOSF_SB_PUNIT) | 1703 BIT(VLV_IOSF_SB_NC) | 1704 BIT(VLV_IOSF_SB_CCK)); 1705 1706 vlv_init_gpll_ref_freq(rps); 1707 1708 rps->max_freq = vlv_rps_max_freq(rps); 1709 rps->rp0_freq = rps->max_freq; 1710 drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n", 1711 intel_gpu_freq(rps, rps->max_freq), rps->max_freq); 1712 1713 rps->efficient_freq = vlv_rps_rpe_freq(rps); 1714 drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n", 1715 intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq); 1716 1717 rps->rp1_freq = vlv_rps_guar_freq(rps); 1718 drm_dbg(&i915->drm, "RP1(Guar Freq) GPU freq: %d MHz (%u)\n", 1719 intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq); 1720 1721 rps->min_freq = vlv_rps_min_freq(rps); 1722 drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n", 1723 intel_gpu_freq(rps, rps->min_freq), rps->min_freq); 1724 1725 vlv_iosf_sb_put(i915, 1726 BIT(VLV_IOSF_SB_PUNIT) | 1727 BIT(VLV_IOSF_SB_NC) | 1728 BIT(VLV_IOSF_SB_CCK)); 1729 } 1730 1731 static void chv_rps_init(struct intel_rps *rps) 1732 { 1733 struct drm_i915_private *i915 = rps_to_i915(rps); 1734 1735 vlv_iosf_sb_get(i915, 1736 BIT(VLV_IOSF_SB_PUNIT) | 1737 BIT(VLV_IOSF_SB_NC) | 1738 BIT(VLV_IOSF_SB_CCK)); 1739 1740 vlv_init_gpll_ref_freq(rps); 1741 1742 rps->max_freq = chv_rps_max_freq(rps); 1743 rps->rp0_freq = rps->max_freq; 1744 drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n", 1745 intel_gpu_freq(rps, rps->max_freq), rps->max_freq); 1746 1747 rps->efficient_freq = chv_rps_rpe_freq(rps); 1748 drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n", 1749 intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq); 1750 1751 rps->rp1_freq = chv_rps_guar_freq(rps); 1752 drm_dbg(&i915->drm, "RP1(Guar) GPU freq: %d MHz (%u)\n", 1753 intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq); 1754 1755 rps->min_freq = chv_rps_min_freq(rps); 1756 drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n", 1757 intel_gpu_freq(rps, rps->min_freq), rps->min_freq); 1758 1759 vlv_iosf_sb_put(i915, 1760 BIT(VLV_IOSF_SB_PUNIT) | 1761 BIT(VLV_IOSF_SB_NC) | 1762 BIT(VLV_IOSF_SB_CCK)); 1763 1764 drm_WARN_ONCE(&i915->drm, (rps->max_freq | rps->efficient_freq | 1765 rps->rp1_freq | rps->min_freq) & 1, 1766 "Odd GPU freq values\n"); 1767 } 1768 1769 static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei) 1770 { 1771 ei->ktime = ktime_get_raw(); 1772 ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT); 1773 ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT); 1774 } 1775 1776 static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir) 1777 { 1778 struct intel_uncore *uncore = rps_to_uncore(rps); 1779 const struct intel_rps_ei *prev = &rps->ei; 1780 struct intel_rps_ei now; 1781 u32 events = 0; 1782 1783 if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) 1784 return 0; 1785 1786 vlv_c0_read(uncore, &now); 1787 1788 if (prev->ktime) { 1789 u64 time, c0; 1790 u32 render, media; 1791 1792 time = ktime_us_delta(now.ktime, prev->ktime); 1793 1794 time *= rps_to_i915(rps)->czclk_freq; 1795 1796 /* Workload can be split between render + media, 1797 * e.g. SwapBuffers being blitted in X after being rendered in 1798 * mesa. To account for this we need to combine both engines 1799 * into our activity counter. 1800 */ 1801 render = now.render_c0 - prev->render_c0; 1802 media = now.media_c0 - prev->media_c0; 1803 c0 = max(render, media); 1804 c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ 1805 1806 if (c0 > time * rps->power.up_threshold) 1807 events = GEN6_PM_RP_UP_THRESHOLD; 1808 else if (c0 < time * rps->power.down_threshold) 1809 events = GEN6_PM_RP_DOWN_THRESHOLD; 1810 } 1811 1812 rps->ei = now; 1813 return events; 1814 } 1815 1816 static void rps_work(struct work_struct *work) 1817 { 1818 struct intel_rps *rps = container_of(work, typeof(*rps), work); 1819 struct intel_gt *gt = rps_to_gt(rps); 1820 struct drm_i915_private *i915 = rps_to_i915(rps); 1821 bool client_boost = false; 1822 int new_freq, adj, min, max; 1823 u32 pm_iir = 0; 1824 1825 spin_lock_irq(gt->irq_lock); 1826 pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events; 1827 client_boost = atomic_read(&rps->num_waiters); 1828 spin_unlock_irq(gt->irq_lock); 1829 1830 /* Make sure we didn't queue anything we're not going to process. */ 1831 if (!pm_iir && !client_boost) 1832 goto out; 1833 1834 mutex_lock(&rps->lock); 1835 if (!intel_rps_is_active(rps)) { 1836 mutex_unlock(&rps->lock); 1837 return; 1838 } 1839 1840 pm_iir |= vlv_wa_c0_ei(rps, pm_iir); 1841 1842 adj = rps->last_adj; 1843 new_freq = rps->cur_freq; 1844 min = rps->min_freq_softlimit; 1845 max = rps->max_freq_softlimit; 1846 if (client_boost) 1847 max = rps->max_freq; 1848 1849 GT_TRACE(gt, 1850 "pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n", 1851 pm_iir, str_yes_no(client_boost), 1852 adj, new_freq, min, max); 1853 1854 if (client_boost && new_freq < rps->boost_freq) { 1855 new_freq = rps->boost_freq; 1856 adj = 0; 1857 } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { 1858 if (adj > 0) 1859 adj *= 2; 1860 else /* CHV needs even encode values */ 1861 adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1; 1862 1863 if (new_freq >= rps->max_freq_softlimit) 1864 adj = 0; 1865 } else if (client_boost) { 1866 adj = 0; 1867 } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { 1868 if (rps->cur_freq > rps->efficient_freq) 1869 new_freq = rps->efficient_freq; 1870 else if (rps->cur_freq > rps->min_freq_softlimit) 1871 new_freq = rps->min_freq_softlimit; 1872 adj = 0; 1873 } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { 1874 if (adj < 0) 1875 adj *= 2; 1876 else /* CHV needs even encode values */ 1877 adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1; 1878 1879 if (new_freq <= rps->min_freq_softlimit) 1880 adj = 0; 1881 } else { /* unknown event */ 1882 adj = 0; 1883 } 1884 1885 /* 1886 * sysfs frequency limits may have snuck in while 1887 * servicing the interrupt 1888 */ 1889 new_freq += adj; 1890 new_freq = clamp_t(int, new_freq, min, max); 1891 1892 if (intel_rps_set(rps, new_freq)) { 1893 drm_dbg(&i915->drm, "Failed to set new GPU frequency\n"); 1894 adj = 0; 1895 } 1896 rps->last_adj = adj; 1897 1898 mutex_unlock(&rps->lock); 1899 1900 out: 1901 spin_lock_irq(gt->irq_lock); 1902 gen6_gt_pm_unmask_irq(gt, rps->pm_events); 1903 spin_unlock_irq(gt->irq_lock); 1904 } 1905 1906 void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) 1907 { 1908 struct intel_gt *gt = rps_to_gt(rps); 1909 const u32 events = rps->pm_events & pm_iir; 1910 1911 lockdep_assert_held(gt->irq_lock); 1912 1913 if (unlikely(!events)) 1914 return; 1915 1916 GT_TRACE(gt, "irq events:%x\n", events); 1917 1918 gen6_gt_pm_mask_irq(gt, events); 1919 1920 rps->pm_iir |= events; 1921 queue_work(gt->i915->unordered_wq, &rps->work); 1922 } 1923 1924 void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) 1925 { 1926 struct intel_gt *gt = rps_to_gt(rps); 1927 u32 events; 1928 1929 events = pm_iir & rps->pm_events; 1930 if (events) { 1931 spin_lock(gt->irq_lock); 1932 1933 GT_TRACE(gt, "irq events:%x\n", events); 1934 1935 gen6_gt_pm_mask_irq(gt, events); 1936 rps->pm_iir |= events; 1937 1938 queue_work(gt->i915->unordered_wq, &rps->work); 1939 spin_unlock(gt->irq_lock); 1940 } 1941 1942 if (GRAPHICS_VER(gt->i915) >= 8) 1943 return; 1944 1945 if (pm_iir & PM_VEBOX_USER_INTERRUPT) 1946 intel_engine_cs_irq(gt->engine[VECS0], pm_iir >> 10); 1947 1948 if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) 1949 drm_dbg(&rps_to_i915(rps)->drm, 1950 "Command parser error, pm_iir 0x%08x\n", pm_iir); 1951 } 1952 1953 void gen5_rps_irq_handler(struct intel_rps *rps) 1954 { 1955 struct intel_uncore *uncore = rps_to_uncore(rps); 1956 u32 busy_up, busy_down, max_avg, min_avg; 1957 u8 new_freq; 1958 1959 spin_lock(&mchdev_lock); 1960 1961 intel_uncore_write16(uncore, 1962 MEMINTRSTS, 1963 intel_uncore_read(uncore, MEMINTRSTS)); 1964 1965 intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); 1966 busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG); 1967 busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG); 1968 max_avg = intel_uncore_read(uncore, RCBMAXAVG); 1969 min_avg = intel_uncore_read(uncore, RCBMINAVG); 1970 1971 /* Handle RCS change request from hw */ 1972 new_freq = rps->cur_freq; 1973 if (busy_up > max_avg) 1974 new_freq++; 1975 else if (busy_down < min_avg) 1976 new_freq--; 1977 new_freq = clamp(new_freq, 1978 rps->min_freq_softlimit, 1979 rps->max_freq_softlimit); 1980 1981 if (new_freq != rps->cur_freq && !__gen5_rps_set(rps, new_freq)) 1982 rps->cur_freq = new_freq; 1983 1984 spin_unlock(&mchdev_lock); 1985 } 1986 1987 void intel_rps_init_early(struct intel_rps *rps) 1988 { 1989 mutex_init(&rps->lock); 1990 mutex_init(&rps->power.mutex); 1991 1992 INIT_WORK(&rps->work, rps_work); 1993 timer_setup(&rps->timer, rps_timer, 0); 1994 1995 atomic_set(&rps->num_waiters, 0); 1996 } 1997 1998 void intel_rps_init(struct intel_rps *rps) 1999 { 2000 struct drm_i915_private *i915 = rps_to_i915(rps); 2001 2002 if (rps_uses_slpc(rps)) 2003 return; 2004 2005 if (IS_CHERRYVIEW(i915)) 2006 chv_rps_init(rps); 2007 else if (IS_VALLEYVIEW(i915)) 2008 vlv_rps_init(rps); 2009 else if (GRAPHICS_VER(i915) >= 6) 2010 gen6_rps_init(rps); 2011 else if (IS_IRONLAKE_M(i915)) 2012 gen5_rps_init(rps); 2013 2014 /* Derive initial user preferences/limits from the hardware limits */ 2015 rps->max_freq_softlimit = rps->max_freq; 2016 rps_to_gt(rps)->defaults.max_freq = rps->max_freq_softlimit; 2017 rps->min_freq_softlimit = rps->min_freq; 2018 rps_to_gt(rps)->defaults.min_freq = rps->min_freq_softlimit; 2019 2020 /* After setting max-softlimit, find the overclock max freq */ 2021 if (GRAPHICS_VER(i915) == 6 || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) { 2022 u32 params = 0; 2023 2024 snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_READ_OC_PARAMS, ¶ms, NULL); 2025 if (params & BIT(31)) { /* OC supported */ 2026 drm_dbg(&i915->drm, 2027 "Overclocking supported, max: %dMHz, overclock: %dMHz\n", 2028 (rps->max_freq & 0xff) * 50, 2029 (params & 0xff) * 50); 2030 rps->max_freq = params & 0xff; 2031 } 2032 } 2033 2034 /* Set default thresholds in % */ 2035 rps->power.up_threshold = 95; 2036 rps_to_gt(rps)->defaults.rps_up_threshold = rps->power.up_threshold; 2037 rps->power.down_threshold = 85; 2038 rps_to_gt(rps)->defaults.rps_down_threshold = rps->power.down_threshold; 2039 2040 /* Finally allow us to boost to max by default */ 2041 rps->boost_freq = rps->max_freq; 2042 rps->idle_freq = rps->min_freq; 2043 2044 /* Start in the middle, from here we will autotune based on workload */ 2045 rps->cur_freq = rps->efficient_freq; 2046 2047 rps->pm_intrmsk_mbz = 0; 2048 2049 /* 2050 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer 2051 * if GEN6_PM_UP_EI_EXPIRED is masked. 2052 * 2053 * TODO: verify if this can be reproduced on VLV,CHV. 2054 */ 2055 if (GRAPHICS_VER(i915) <= 7) 2056 rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; 2057 2058 if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) < 11) 2059 rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; 2060 2061 /* GuC needs ARAT expired interrupt unmasked */ 2062 if (intel_uc_uses_guc_submission(&rps_to_gt(rps)->uc)) 2063 rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; 2064 } 2065 2066 void intel_rps_sanitize(struct intel_rps *rps) 2067 { 2068 if (rps_uses_slpc(rps)) 2069 return; 2070 2071 if (GRAPHICS_VER(rps_to_i915(rps)) >= 6) 2072 rps_disable_interrupts(rps); 2073 } 2074 2075 u32 intel_rps_read_rpstat(struct intel_rps *rps) 2076 { 2077 struct drm_i915_private *i915 = rps_to_i915(rps); 2078 i915_reg_t rpstat; 2079 2080 rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; 2081 2082 return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat); 2083 } 2084 2085 static u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) 2086 { 2087 struct drm_i915_private *i915 = rps_to_i915(rps); 2088 u32 cagf; 2089 2090 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) 2091 cagf = REG_FIELD_GET(MTL_CAGF_MASK, rpstat); 2092 else if (GRAPHICS_VER(i915) >= 12) 2093 cagf = REG_FIELD_GET(GEN12_CAGF_MASK, rpstat); 2094 else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) 2095 cagf = REG_FIELD_GET(RPE_MASK, rpstat); 2096 else if (GRAPHICS_VER(i915) >= 9) 2097 cagf = REG_FIELD_GET(GEN9_CAGF_MASK, rpstat); 2098 else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) 2099 cagf = REG_FIELD_GET(HSW_CAGF_MASK, rpstat); 2100 else if (GRAPHICS_VER(i915) >= 6) 2101 cagf = REG_FIELD_GET(GEN6_CAGF_MASK, rpstat); 2102 else 2103 cagf = gen5_invert_freq(rps, REG_FIELD_GET(MEMSTAT_PSTATE_MASK, rpstat)); 2104 2105 return cagf; 2106 } 2107 2108 static u32 __read_cagf(struct intel_rps *rps, bool take_fw) 2109 { 2110 struct drm_i915_private *i915 = rps_to_i915(rps); 2111 struct intel_uncore *uncore = rps_to_uncore(rps); 2112 i915_reg_t r = INVALID_MMIO_REG; 2113 u32 freq; 2114 2115 /* 2116 * For Gen12+ reading freq from HW does not need a forcewake and 2117 * registers will return 0 freq when GT is in RC6 2118 */ 2119 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { 2120 r = MTL_MIRROR_TARGET_WP1; 2121 } else if (GRAPHICS_VER(i915) >= 12) { 2122 r = GEN12_RPSTAT1; 2123 } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { 2124 vlv_punit_get(i915); 2125 freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 2126 vlv_punit_put(i915); 2127 } else if (GRAPHICS_VER(i915) >= 6) { 2128 r = GEN6_RPSTAT1; 2129 } else { 2130 r = MEMSTAT_ILK; 2131 } 2132 2133 if (i915_mmio_reg_valid(r)) 2134 freq = take_fw ? intel_uncore_read(uncore, r) : intel_uncore_read_fw(uncore, r); 2135 2136 return intel_rps_get_cagf(rps, freq); 2137 } 2138 2139 static u32 read_cagf(struct intel_rps *rps) 2140 { 2141 return __read_cagf(rps, true); 2142 } 2143 2144 u32 intel_rps_read_actual_frequency(struct intel_rps *rps) 2145 { 2146 struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; 2147 intel_wakeref_t wakeref; 2148 u32 freq = 0; 2149 2150 with_intel_runtime_pm_if_in_use(rpm, wakeref) 2151 freq = intel_gpu_freq(rps, read_cagf(rps)); 2152 2153 return freq; 2154 } 2155 2156 u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps) 2157 { 2158 return intel_gpu_freq(rps, __read_cagf(rps, false)); 2159 } 2160 2161 static u32 intel_rps_read_punit_req(struct intel_rps *rps) 2162 { 2163 struct intel_uncore *uncore = rps_to_uncore(rps); 2164 struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; 2165 intel_wakeref_t wakeref; 2166 u32 freq = 0; 2167 2168 with_intel_runtime_pm_if_in_use(rpm, wakeref) 2169 freq = intel_uncore_read(uncore, GEN6_RPNSWREQ); 2170 2171 return freq; 2172 } 2173 2174 static u32 intel_rps_get_req(u32 pureq) 2175 { 2176 u32 req = pureq >> GEN9_SW_REQ_UNSLICE_RATIO_SHIFT; 2177 2178 return req; 2179 } 2180 2181 u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps) 2182 { 2183 u32 freq = intel_rps_get_req(intel_rps_read_punit_req(rps)); 2184 2185 return intel_gpu_freq(rps, freq); 2186 } 2187 2188 u32 intel_rps_get_requested_frequency(struct intel_rps *rps) 2189 { 2190 if (rps_uses_slpc(rps)) 2191 return intel_rps_read_punit_req_frequency(rps); 2192 else 2193 return intel_gpu_freq(rps, rps->cur_freq); 2194 } 2195 2196 u32 intel_rps_get_max_frequency(struct intel_rps *rps) 2197 { 2198 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2199 2200 if (rps_uses_slpc(rps)) 2201 return slpc->max_freq_softlimit; 2202 else 2203 return intel_gpu_freq(rps, rps->max_freq_softlimit); 2204 } 2205 2206 /** 2207 * intel_rps_get_max_raw_freq - returns the max frequency in some raw format. 2208 * @rps: the intel_rps structure 2209 * 2210 * Returns the max frequency in a raw format. In newer platforms raw is in 2211 * units of 50 MHz. 2212 */ 2213 u32 intel_rps_get_max_raw_freq(struct intel_rps *rps) 2214 { 2215 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2216 u32 freq; 2217 2218 if (rps_uses_slpc(rps)) { 2219 return DIV_ROUND_CLOSEST(slpc->rp0_freq, 2220 GT_FREQUENCY_MULTIPLIER); 2221 } else { 2222 freq = rps->max_freq; 2223 if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) { 2224 /* Convert GT frequency to 50 MHz units */ 2225 freq /= GEN9_FREQ_SCALER; 2226 } 2227 return freq; 2228 } 2229 } 2230 2231 u32 intel_rps_get_rp0_frequency(struct intel_rps *rps) 2232 { 2233 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2234 2235 if (rps_uses_slpc(rps)) 2236 return slpc->rp0_freq; 2237 else 2238 return intel_gpu_freq(rps, rps->rp0_freq); 2239 } 2240 2241 u32 intel_rps_get_rp1_frequency(struct intel_rps *rps) 2242 { 2243 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2244 2245 if (rps_uses_slpc(rps)) 2246 return slpc->rp1_freq; 2247 else 2248 return intel_gpu_freq(rps, rps->rp1_freq); 2249 } 2250 2251 u32 intel_rps_get_rpn_frequency(struct intel_rps *rps) 2252 { 2253 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2254 2255 if (rps_uses_slpc(rps)) 2256 return slpc->min_freq; 2257 else 2258 return intel_gpu_freq(rps, rps->min_freq); 2259 } 2260 2261 static void rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p) 2262 { 2263 struct intel_gt *gt = rps_to_gt(rps); 2264 struct drm_i915_private *i915 = gt->i915; 2265 struct intel_uncore *uncore = gt->uncore; 2266 struct intel_rps_freq_caps caps; 2267 u32 rp_state_limits; 2268 u32 gt_perf_status; 2269 u32 rpmodectl, rpinclimit, rpdeclimit; 2270 u32 rpstat, cagf, reqf; 2271 u32 rpcurupei, rpcurup, rpprevup; 2272 u32 rpcurdownei, rpcurdown, rpprevdown; 2273 u32 rpupei, rpupt, rpdownei, rpdownt; 2274 u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask; 2275 2276 rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); 2277 gen6_rps_get_freq_caps(rps, &caps); 2278 if (IS_GEN9_LP(i915)) 2279 gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); 2280 else 2281 gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS); 2282 2283 /* RPSTAT1 is in the GT power well */ 2284 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 2285 2286 reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ); 2287 if (GRAPHICS_VER(i915) >= 9) { 2288 reqf >>= 23; 2289 } else { 2290 reqf &= ~GEN6_TURBO_DISABLE; 2291 if (IS_HASWELL(i915) || IS_BROADWELL(i915)) 2292 reqf >>= 24; 2293 else 2294 reqf >>= 25; 2295 } 2296 reqf = intel_gpu_freq(rps, reqf); 2297 2298 rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); 2299 rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); 2300 rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); 2301 2302 rpstat = intel_rps_read_rpstat(rps); 2303 rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; 2304 rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; 2305 rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; 2306 rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; 2307 rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; 2308 rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; 2309 2310 rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI); 2311 rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); 2312 2313 rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); 2314 rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); 2315 2316 cagf = intel_rps_read_actual_frequency(rps); 2317 2318 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 2319 2320 if (GRAPHICS_VER(i915) >= 11) { 2321 pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE); 2322 pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK); 2323 /* 2324 * The equivalent to the PM ISR & IIR cannot be read 2325 * without affecting the current state of the system 2326 */ 2327 pm_isr = 0; 2328 pm_iir = 0; 2329 } else if (GRAPHICS_VER(i915) >= 8) { 2330 pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2)); 2331 pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2)); 2332 pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2)); 2333 pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2)); 2334 } else { 2335 pm_ier = intel_uncore_read(uncore, GEN6_PMIER); 2336 pm_imr = intel_uncore_read(uncore, GEN6_PMIMR); 2337 pm_isr = intel_uncore_read(uncore, GEN6_PMISR); 2338 pm_iir = intel_uncore_read(uncore, GEN6_PMIIR); 2339 } 2340 pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); 2341 2342 drm_printf(p, "Video Turbo Mode: %s\n", 2343 str_yes_no(rpmodectl & GEN6_RP_MEDIA_TURBO)); 2344 drm_printf(p, "HW control enabled: %s\n", 2345 str_yes_no(rpmodectl & GEN6_RP_ENABLE)); 2346 drm_printf(p, "SW control enabled: %s\n", 2347 str_yes_no((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE)); 2348 2349 drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", 2350 pm_ier, pm_imr, pm_mask); 2351 if (GRAPHICS_VER(i915) <= 10) 2352 drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n", 2353 pm_isr, pm_iir); 2354 drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", 2355 rps->pm_intrmsk_mbz); 2356 drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); 2357 drm_printf(p, "Render p-state ratio: %d\n", 2358 (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8); 2359 drm_printf(p, "Render p-state VID: %d\n", 2360 gt_perf_status & 0xff); 2361 drm_printf(p, "Render p-state limit: %d\n", 2362 rp_state_limits & 0xff); 2363 drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat); 2364 drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl); 2365 drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit); 2366 drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit); 2367 drm_printf(p, "RPNSWREQ: %dMHz\n", reqf); 2368 drm_printf(p, "CAGF: %dMHz\n", cagf); 2369 drm_printf(p, "RP CUR UP EI: %d (%lldns)\n", 2370 rpcurupei, 2371 intel_gt_pm_interval_to_ns(gt, rpcurupei)); 2372 drm_printf(p, "RP CUR UP: %d (%lldns)\n", 2373 rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup)); 2374 drm_printf(p, "RP PREV UP: %d (%lldns)\n", 2375 rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup)); 2376 drm_printf(p, "Up threshold: %d%%\n", 2377 rps->power.up_threshold); 2378 drm_printf(p, "RP UP EI: %d (%lldns)\n", 2379 rpupei, intel_gt_pm_interval_to_ns(gt, rpupei)); 2380 drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n", 2381 rpupt, intel_gt_pm_interval_to_ns(gt, rpupt)); 2382 2383 drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n", 2384 rpcurdownei, 2385 intel_gt_pm_interval_to_ns(gt, rpcurdownei)); 2386 drm_printf(p, "RP CUR DOWN: %d (%lldns)\n", 2387 rpcurdown, 2388 intel_gt_pm_interval_to_ns(gt, rpcurdown)); 2389 drm_printf(p, "RP PREV DOWN: %d (%lldns)\n", 2390 rpprevdown, 2391 intel_gt_pm_interval_to_ns(gt, rpprevdown)); 2392 drm_printf(p, "Down threshold: %d%%\n", 2393 rps->power.down_threshold); 2394 drm_printf(p, "RP DOWN EI: %d (%lldns)\n", 2395 rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei)); 2396 drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n", 2397 rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt)); 2398 2399 drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", 2400 intel_gpu_freq(rps, caps.min_freq)); 2401 drm_printf(p, "Nominal (RP1) frequency: %dMHz\n", 2402 intel_gpu_freq(rps, caps.rp1_freq)); 2403 drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n", 2404 intel_gpu_freq(rps, caps.rp0_freq)); 2405 drm_printf(p, "Max overclocked frequency: %dMHz\n", 2406 intel_gpu_freq(rps, rps->max_freq)); 2407 2408 drm_printf(p, "Current freq: %d MHz\n", 2409 intel_gpu_freq(rps, rps->cur_freq)); 2410 drm_printf(p, "Actual freq: %d MHz\n", cagf); 2411 drm_printf(p, "Idle freq: %d MHz\n", 2412 intel_gpu_freq(rps, rps->idle_freq)); 2413 drm_printf(p, "Min freq: %d MHz\n", 2414 intel_gpu_freq(rps, rps->min_freq)); 2415 drm_printf(p, "Boost freq: %d MHz\n", 2416 intel_gpu_freq(rps, rps->boost_freq)); 2417 drm_printf(p, "Max freq: %d MHz\n", 2418 intel_gpu_freq(rps, rps->max_freq)); 2419 drm_printf(p, 2420 "efficient (RPe) frequency: %d MHz\n", 2421 intel_gpu_freq(rps, rps->efficient_freq)); 2422 } 2423 2424 static void slpc_frequency_dump(struct intel_rps *rps, struct drm_printer *p) 2425 { 2426 struct intel_gt *gt = rps_to_gt(rps); 2427 struct intel_uncore *uncore = gt->uncore; 2428 struct intel_rps_freq_caps caps; 2429 u32 pm_mask; 2430 2431 gen6_rps_get_freq_caps(rps, &caps); 2432 pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); 2433 2434 drm_printf(p, "PM MASK=0x%08x\n", pm_mask); 2435 drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", 2436 rps->pm_intrmsk_mbz); 2437 drm_printf(p, "RPSTAT1: 0x%08x\n", intel_rps_read_rpstat(rps)); 2438 drm_printf(p, "RPNSWREQ: %dMHz\n", intel_rps_get_requested_frequency(rps)); 2439 drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", 2440 intel_gpu_freq(rps, caps.min_freq)); 2441 drm_printf(p, "Nominal (RP1) frequency: %dMHz\n", 2442 intel_gpu_freq(rps, caps.rp1_freq)); 2443 drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n", 2444 intel_gpu_freq(rps, caps.rp0_freq)); 2445 drm_printf(p, "Current freq: %d MHz\n", 2446 intel_rps_get_requested_frequency(rps)); 2447 drm_printf(p, "Actual freq: %d MHz\n", 2448 intel_rps_read_actual_frequency(rps)); 2449 drm_printf(p, "Min freq: %d MHz\n", 2450 intel_rps_get_min_frequency(rps)); 2451 drm_printf(p, "Boost freq: %d MHz\n", 2452 intel_rps_get_boost_frequency(rps)); 2453 drm_printf(p, "Max freq: %d MHz\n", 2454 intel_rps_get_max_frequency(rps)); 2455 drm_printf(p, 2456 "efficient (RPe) frequency: %d MHz\n", 2457 intel_gpu_freq(rps, caps.rp1_freq)); 2458 } 2459 2460 void gen6_rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p) 2461 { 2462 if (rps_uses_slpc(rps)) 2463 return slpc_frequency_dump(rps, p); 2464 else 2465 return rps_frequency_dump(rps, p); 2466 } 2467 2468 static int set_max_freq(struct intel_rps *rps, u32 val) 2469 { 2470 struct drm_i915_private *i915 = rps_to_i915(rps); 2471 int ret = 0; 2472 2473 mutex_lock(&rps->lock); 2474 2475 val = intel_freq_opcode(rps, val); 2476 if (val < rps->min_freq || 2477 val > rps->max_freq || 2478 val < rps->min_freq_softlimit) { 2479 ret = -EINVAL; 2480 goto unlock; 2481 } 2482 2483 if (val > rps->rp0_freq) 2484 drm_dbg(&i915->drm, "User requested overclocking to %d\n", 2485 intel_gpu_freq(rps, val)); 2486 2487 rps->max_freq_softlimit = val; 2488 2489 val = clamp_t(int, rps->cur_freq, 2490 rps->min_freq_softlimit, 2491 rps->max_freq_softlimit); 2492 2493 /* 2494 * We still need *_set_rps to process the new max_delay and 2495 * update the interrupt limits and PMINTRMSK even though 2496 * frequency request may be unchanged. 2497 */ 2498 intel_rps_set(rps, val); 2499 2500 unlock: 2501 mutex_unlock(&rps->lock); 2502 2503 return ret; 2504 } 2505 2506 int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val) 2507 { 2508 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2509 2510 if (rps_uses_slpc(rps)) 2511 return intel_guc_slpc_set_max_freq(slpc, val); 2512 else 2513 return set_max_freq(rps, val); 2514 } 2515 2516 u32 intel_rps_get_min_frequency(struct intel_rps *rps) 2517 { 2518 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2519 2520 if (rps_uses_slpc(rps)) 2521 return slpc->min_freq_softlimit; 2522 else 2523 return intel_gpu_freq(rps, rps->min_freq_softlimit); 2524 } 2525 2526 /** 2527 * intel_rps_get_min_raw_freq - returns the min frequency in some raw format. 2528 * @rps: the intel_rps structure 2529 * 2530 * Returns the min frequency in a raw format. In newer platforms raw is in 2531 * units of 50 MHz. 2532 */ 2533 u32 intel_rps_get_min_raw_freq(struct intel_rps *rps) 2534 { 2535 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2536 u32 freq; 2537 2538 if (rps_uses_slpc(rps)) { 2539 return DIV_ROUND_CLOSEST(slpc->min_freq, 2540 GT_FREQUENCY_MULTIPLIER); 2541 } else { 2542 freq = rps->min_freq; 2543 if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) { 2544 /* Convert GT frequency to 50 MHz units */ 2545 freq /= GEN9_FREQ_SCALER; 2546 } 2547 return freq; 2548 } 2549 } 2550 2551 static int set_min_freq(struct intel_rps *rps, u32 val) 2552 { 2553 int ret = 0; 2554 2555 mutex_lock(&rps->lock); 2556 2557 val = intel_freq_opcode(rps, val); 2558 if (val < rps->min_freq || 2559 val > rps->max_freq || 2560 val > rps->max_freq_softlimit) { 2561 ret = -EINVAL; 2562 goto unlock; 2563 } 2564 2565 rps->min_freq_softlimit = val; 2566 2567 val = clamp_t(int, rps->cur_freq, 2568 rps->min_freq_softlimit, 2569 rps->max_freq_softlimit); 2570 2571 /* 2572 * We still need *_set_rps to process the new min_delay and 2573 * update the interrupt limits and PMINTRMSK even though 2574 * frequency request may be unchanged. 2575 */ 2576 intel_rps_set(rps, val); 2577 2578 unlock: 2579 mutex_unlock(&rps->lock); 2580 2581 return ret; 2582 } 2583 2584 int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val) 2585 { 2586 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2587 2588 if (rps_uses_slpc(rps)) 2589 return intel_guc_slpc_set_min_freq(slpc, val); 2590 else 2591 return set_min_freq(rps, val); 2592 } 2593 2594 u8 intel_rps_get_up_threshold(struct intel_rps *rps) 2595 { 2596 return rps->power.up_threshold; 2597 } 2598 2599 static int rps_set_threshold(struct intel_rps *rps, u8 *threshold, u8 val) 2600 { 2601 int ret; 2602 2603 if (val > 100) 2604 return -EINVAL; 2605 2606 ret = mutex_lock_interruptible(&rps->lock); 2607 if (ret) 2608 return ret; 2609 2610 if (*threshold == val) 2611 goto out_unlock; 2612 2613 *threshold = val; 2614 2615 /* Force reset. */ 2616 rps->last_freq = -1; 2617 mutex_lock(&rps->power.mutex); 2618 rps->power.mode = -1; 2619 mutex_unlock(&rps->power.mutex); 2620 2621 intel_rps_set(rps, clamp(rps->cur_freq, 2622 rps->min_freq_softlimit, 2623 rps->max_freq_softlimit)); 2624 2625 out_unlock: 2626 mutex_unlock(&rps->lock); 2627 2628 return ret; 2629 } 2630 2631 int intel_rps_set_up_threshold(struct intel_rps *rps, u8 threshold) 2632 { 2633 return rps_set_threshold(rps, &rps->power.up_threshold, threshold); 2634 } 2635 2636 u8 intel_rps_get_down_threshold(struct intel_rps *rps) 2637 { 2638 return rps->power.down_threshold; 2639 } 2640 2641 int intel_rps_set_down_threshold(struct intel_rps *rps, u8 threshold) 2642 { 2643 return rps_set_threshold(rps, &rps->power.down_threshold, threshold); 2644 } 2645 2646 static void intel_rps_set_manual(struct intel_rps *rps, bool enable) 2647 { 2648 struct intel_uncore *uncore = rps_to_uncore(rps); 2649 u32 state = enable ? GEN9_RPSWCTL_ENABLE : GEN9_RPSWCTL_DISABLE; 2650 2651 /* Allow punit to process software requests */ 2652 intel_uncore_write(uncore, GEN6_RP_CONTROL, state); 2653 } 2654 2655 void intel_rps_raise_unslice(struct intel_rps *rps) 2656 { 2657 struct intel_uncore *uncore = rps_to_uncore(rps); 2658 2659 mutex_lock(&rps->lock); 2660 2661 if (rps_uses_slpc(rps)) { 2662 /* RP limits have not been initialized yet for SLPC path */ 2663 struct intel_rps_freq_caps caps; 2664 2665 gen6_rps_get_freq_caps(rps, &caps); 2666 2667 intel_rps_set_manual(rps, true); 2668 intel_uncore_write(uncore, GEN6_RPNSWREQ, 2669 ((caps.rp0_freq << 2670 GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) | 2671 GEN9_IGNORE_SLICE_RATIO)); 2672 intel_rps_set_manual(rps, false); 2673 } else { 2674 intel_rps_set(rps, rps->rp0_freq); 2675 } 2676 2677 mutex_unlock(&rps->lock); 2678 } 2679 2680 void intel_rps_lower_unslice(struct intel_rps *rps) 2681 { 2682 struct intel_uncore *uncore = rps_to_uncore(rps); 2683 2684 mutex_lock(&rps->lock); 2685 2686 if (rps_uses_slpc(rps)) { 2687 /* RP limits have not been initialized yet for SLPC path */ 2688 struct intel_rps_freq_caps caps; 2689 2690 gen6_rps_get_freq_caps(rps, &caps); 2691 2692 intel_rps_set_manual(rps, true); 2693 intel_uncore_write(uncore, GEN6_RPNSWREQ, 2694 ((caps.min_freq << 2695 GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) | 2696 GEN9_IGNORE_SLICE_RATIO)); 2697 intel_rps_set_manual(rps, false); 2698 } else { 2699 intel_rps_set(rps, rps->min_freq); 2700 } 2701 2702 mutex_unlock(&rps->lock); 2703 } 2704 2705 static u32 rps_read_mmio(struct intel_rps *rps, i915_reg_t reg32) 2706 { 2707 struct intel_gt *gt = rps_to_gt(rps); 2708 intel_wakeref_t wakeref; 2709 u32 val; 2710 2711 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 2712 val = intel_uncore_read(gt->uncore, reg32); 2713 2714 return val; 2715 } 2716 2717 bool rps_read_mask_mmio(struct intel_rps *rps, 2718 i915_reg_t reg32, u32 mask) 2719 { 2720 return rps_read_mmio(rps, reg32) & mask; 2721 } 2722 2723 /* External interface for intel_ips.ko */ 2724 2725 static struct drm_i915_private __rcu *ips_mchdev; 2726 2727 /* 2728 * Tells the intel_ips driver that the i915 driver is now loaded, if 2729 * IPS got loaded first. 2730 * 2731 * This awkward dance is so that neither module has to depend on the 2732 * other in order for IPS to do the appropriate communication of 2733 * GPU turbo limits to i915. 2734 */ 2735 static void 2736 ips_ping_for_i915_load(void) 2737 { 2738 void (*link)(void); 2739 2740 link = symbol_get(ips_link_to_i915_driver); 2741 if (link) { 2742 link(); 2743 symbol_put(ips_link_to_i915_driver); 2744 } 2745 } 2746 2747 void intel_rps_driver_register(struct intel_rps *rps) 2748 { 2749 struct intel_gt *gt = rps_to_gt(rps); 2750 2751 /* 2752 * We only register the i915 ips part with intel-ips once everything is 2753 * set up, to avoid intel-ips sneaking in and reading bogus values. 2754 */ 2755 if (GRAPHICS_VER(gt->i915) == 5) { 2756 GEM_BUG_ON(ips_mchdev); 2757 rcu_assign_pointer(ips_mchdev, gt->i915); 2758 ips_ping_for_i915_load(); 2759 } 2760 } 2761 2762 void intel_rps_driver_unregister(struct intel_rps *rps) 2763 { 2764 if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps)) 2765 rcu_assign_pointer(ips_mchdev, NULL); 2766 } 2767 2768 static struct drm_i915_private *mchdev_get(void) 2769 { 2770 struct drm_i915_private *i915; 2771 2772 rcu_read_lock(); 2773 i915 = rcu_dereference(ips_mchdev); 2774 if (i915 && !kref_get_unless_zero(&i915->drm.ref)) 2775 i915 = NULL; 2776 rcu_read_unlock(); 2777 2778 return i915; 2779 } 2780 2781 /** 2782 * i915_read_mch_val - return value for IPS use 2783 * 2784 * Calculate and return a value for the IPS driver to use when deciding whether 2785 * we have thermal and power headroom to increase CPU or GPU power budget. 2786 */ 2787 unsigned long i915_read_mch_val(void) 2788 { 2789 struct drm_i915_private *i915; 2790 unsigned long chipset_val = 0; 2791 unsigned long graphics_val = 0; 2792 intel_wakeref_t wakeref; 2793 2794 i915 = mchdev_get(); 2795 if (!i915) 2796 return 0; 2797 2798 with_intel_runtime_pm(&i915->runtime_pm, wakeref) { 2799 struct intel_ips *ips = &to_gt(i915)->rps.ips; 2800 2801 spin_lock_irq(&mchdev_lock); 2802 chipset_val = __ips_chipset_val(ips); 2803 graphics_val = __ips_gfx_val(ips); 2804 spin_unlock_irq(&mchdev_lock); 2805 } 2806 2807 drm_dev_put(&i915->drm); 2808 return chipset_val + graphics_val; 2809 } 2810 EXPORT_SYMBOL_GPL(i915_read_mch_val); 2811 2812 /** 2813 * i915_gpu_raise - raise GPU frequency limit 2814 * 2815 * Raise the limit; IPS indicates we have thermal headroom. 2816 */ 2817 bool i915_gpu_raise(void) 2818 { 2819 struct drm_i915_private *i915; 2820 struct intel_rps *rps; 2821 2822 i915 = mchdev_get(); 2823 if (!i915) 2824 return false; 2825 2826 rps = &to_gt(i915)->rps; 2827 2828 spin_lock_irq(&mchdev_lock); 2829 if (rps->max_freq_softlimit < rps->max_freq) 2830 rps->max_freq_softlimit++; 2831 spin_unlock_irq(&mchdev_lock); 2832 2833 drm_dev_put(&i915->drm); 2834 return true; 2835 } 2836 EXPORT_SYMBOL_GPL(i915_gpu_raise); 2837 2838 /** 2839 * i915_gpu_lower - lower GPU frequency limit 2840 * 2841 * IPS indicates we're close to a thermal limit, so throttle back the GPU 2842 * frequency maximum. 2843 */ 2844 bool i915_gpu_lower(void) 2845 { 2846 struct drm_i915_private *i915; 2847 struct intel_rps *rps; 2848 2849 i915 = mchdev_get(); 2850 if (!i915) 2851 return false; 2852 2853 rps = &to_gt(i915)->rps; 2854 2855 spin_lock_irq(&mchdev_lock); 2856 if (rps->max_freq_softlimit > rps->min_freq) 2857 rps->max_freq_softlimit--; 2858 spin_unlock_irq(&mchdev_lock); 2859 2860 drm_dev_put(&i915->drm); 2861 return true; 2862 } 2863 EXPORT_SYMBOL_GPL(i915_gpu_lower); 2864 2865 /** 2866 * i915_gpu_busy - indicate GPU business to IPS 2867 * 2868 * Tell the IPS driver whether or not the GPU is busy. 2869 */ 2870 bool i915_gpu_busy(void) 2871 { 2872 struct drm_i915_private *i915; 2873 bool ret; 2874 2875 i915 = mchdev_get(); 2876 if (!i915) 2877 return false; 2878 2879 ret = to_gt(i915)->awake; 2880 2881 drm_dev_put(&i915->drm); 2882 return ret; 2883 } 2884 EXPORT_SYMBOL_GPL(i915_gpu_busy); 2885 2886 /** 2887 * i915_gpu_turbo_disable - disable graphics turbo 2888 * 2889 * Disable graphics turbo by resetting the max frequency and setting the 2890 * current frequency to the default. 2891 */ 2892 bool i915_gpu_turbo_disable(void) 2893 { 2894 struct drm_i915_private *i915; 2895 struct intel_rps *rps; 2896 bool ret; 2897 2898 i915 = mchdev_get(); 2899 if (!i915) 2900 return false; 2901 2902 rps = &to_gt(i915)->rps; 2903 2904 spin_lock_irq(&mchdev_lock); 2905 rps->max_freq_softlimit = rps->min_freq; 2906 ret = !__gen5_rps_set(&to_gt(i915)->rps, rps->min_freq); 2907 spin_unlock_irq(&mchdev_lock); 2908 2909 drm_dev_put(&i915->drm); 2910 return ret; 2911 } 2912 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); 2913 2914 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 2915 #include "selftest_rps.c" 2916 #include "selftest_slpc.c" 2917 #endif 2918