1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include <linux/pm_runtime.h> 7 #include <linux/string_helpers.h> 8 9 #include <drm/drm_print.h> 10 #include <drm/intel/intel_pcode_regs.h> 11 #include <drm/intel/intel_gmd_interrupt_regs.h> 12 13 #include "display/vlv_clock.h" 14 #include "gem/i915_gem_region.h" 15 #include "i915_drv.h" 16 #include "i915_reg.h" 17 #include "i915_vgpu.h" 18 #include "intel_engine_regs.h" 19 #include "intel_gt.h" 20 #include "intel_gt_pm.h" 21 #include "intel_gt_regs.h" 22 #include "intel_pcode.h" 23 #include "intel_rc6.h" 24 25 /** 26 * DOC: RC6 27 * 28 * RC6 is a special power stage which allows the GPU to enter an very 29 * low-voltage mode when idle, using down to 0V while at this stage. This 30 * stage is entered automatically when the GPU is idle when RC6 support is 31 * enabled, and as soon as new workload arises GPU wakes up automatically as 32 * well. 33 * 34 * There are different RC6 modes available in Intel GPU, which differentiate 35 * among each other with the latency required to enter and leave RC6 and 36 * voltage consumed by the GPU in different states. 37 * 38 * The combination of the following flags define which states GPU is allowed 39 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and 40 * RC6pp is deepest RC6. Their support by hardware varies according to the 41 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one 42 * which brings the most power savings; deeper states save more power, but 43 * require higher latency to switch to and wake up. 44 */ 45 46 static struct intel_gt *rc6_to_gt(struct intel_rc6 *rc6) 47 { 48 return container_of(rc6, struct intel_gt, rc6); 49 } 50 51 static struct intel_uncore *rc6_to_uncore(struct intel_rc6 *rc) 52 { 53 return rc6_to_gt(rc)->uncore; 54 } 55 56 static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc) 57 { 58 return rc6_to_gt(rc)->i915; 59 } 60 61 static void gen11_rc6_enable(struct intel_rc6 *rc6) 62 { 63 struct intel_gt *gt = rc6_to_gt(rc6); 64 struct intel_uncore *uncore = gt->uncore; 65 struct intel_engine_cs *engine; 66 enum intel_engine_id id; 67 u32 pg_enable; 68 int i; 69 70 /* 71 * With GuCRC, these parameters are set by GuC 72 */ 73 if (!intel_uc_uses_guc_rc(>->uc)) { 74 /* 2b: Program RC6 thresholds.*/ 75 intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); 76 intel_uncore_write_fw(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); 77 78 intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 79 intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 80 for_each_engine(engine, rc6_to_gt(rc6), id) 81 intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10); 82 83 intel_uncore_write_fw(uncore, GUC_MAX_IDLE_COUNT, 0xA); 84 85 intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0); 86 87 intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ 88 } 89 90 /* 91 * 2c: Program Coarse Power Gating Policies. 92 * 93 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we 94 * use instead is a more conservative estimate for the maximum time 95 * it takes us to service a CS interrupt and submit a new ELSP - that 96 * is the time which the GPU is idle waiting for the CPU to select the 97 * next request to execute. If the idle hysteresis is less than that 98 * interrupt service latency, the hardware will automatically gate 99 * the power well and we will then incur the wake up cost on top of 100 * the service latency. A similar guide from plane_state is that we 101 * do not want the enable hysteresis to less than the wakeup latency. 102 * 103 * igt/gem_exec_nop/sequential provides a rough estimate for the 104 * service latency, and puts it under 10us for Icelake, similar to 105 * Broadwell+, To be conservative, we want to factor in a context 106 * switch on top (due to ksoftirqd). 107 */ 108 intel_uncore_write_fw(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60); 109 intel_uncore_write_fw(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60); 110 111 /* 3a: Enable RC6 112 * 113 * With GuCRC, we do not enable bit 31 of RC_CTL, 114 * thus allowing GuC to control RC6 entry/exit fully instead. 115 * We will not set the HW ENABLE and EI bits 116 */ 117 if (!intel_guc_rc_enable(gt_to_guc(gt))) 118 rc6->ctl_enable = GEN6_RC_CTL_RC6_ENABLE; 119 else 120 rc6->ctl_enable = 121 GEN6_RC_CTL_HW_ENABLE | 122 GEN6_RC_CTL_RC6_ENABLE | 123 GEN6_RC_CTL_EI_MODE(1); 124 125 pg_enable = 126 GEN9_RENDER_PG_ENABLE | 127 GEN9_MEDIA_PG_ENABLE | 128 GEN11_MEDIA_SAMPLER_PG_ENABLE; 129 130 if (GRAPHICS_VER(gt->i915) >= 12 && !IS_DG1(gt->i915)) { 131 for (i = 0; i < I915_MAX_VCS; i++) 132 if (HAS_ENGINE(gt, _VCS(i))) 133 pg_enable |= (VDN_HCP_POWERGATE_ENABLE(i) | 134 VDN_MFX_POWERGATE_ENABLE(i)); 135 } 136 137 intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, pg_enable); 138 } 139 140 static void gen9_rc6_enable(struct intel_rc6 *rc6) 141 { 142 struct intel_uncore *uncore = rc6_to_uncore(rc6); 143 struct intel_engine_cs *engine; 144 enum intel_engine_id id; 145 146 /* 2b: Program RC6 thresholds.*/ 147 if (GRAPHICS_VER(rc6_to_i915(rc6)) >= 11) { 148 intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); 149 intel_uncore_write_fw(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); 150 } else if (IS_SKYLAKE(rc6_to_i915(rc6))) { 151 /* 152 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only 153 * when CPG is enabled 154 */ 155 intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); 156 } else { 157 intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); 158 } 159 160 intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 161 intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 162 for_each_engine(engine, rc6_to_gt(rc6), id) 163 intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10); 164 165 intel_uncore_write_fw(uncore, GUC_MAX_IDLE_COUNT, 0xA); 166 167 intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0); 168 169 /* 170 * 2c: Program Coarse Power Gating Policies. 171 * 172 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we 173 * use instead is a more conservative estimate for the maximum time 174 * it takes us to service a CS interrupt and submit a new ELSP - that 175 * is the time which the GPU is idle waiting for the CPU to select the 176 * next request to execute. If the idle hysteresis is less than that 177 * interrupt service latency, the hardware will automatically gate 178 * the power well and we will then incur the wake up cost on top of 179 * the service latency. A similar guide from plane_state is that we 180 * do not want the enable hysteresis to less than the wakeup latency. 181 * 182 * igt/gem_exec_nop/sequential provides a rough estimate for the 183 * service latency, and puts it around 10us for Broadwell (and other 184 * big core) and around 40us for Broxton (and other low power cores). 185 * [Note that for legacy ringbuffer submission, this is less than 1us!] 186 * However, the wakeup latency on Broxton is closer to 100us. To be 187 * conservative, we have to factor in a context switch on top (due 188 * to ksoftirqd). 189 */ 190 intel_uncore_write_fw(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); 191 intel_uncore_write_fw(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); 192 193 /* 3a: Enable RC6 */ 194 intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ 195 196 rc6->ctl_enable = 197 GEN6_RC_CTL_HW_ENABLE | 198 GEN6_RC_CTL_RC6_ENABLE | 199 GEN6_RC_CTL_EI_MODE(1); 200 201 /* 202 * WaRsDisableCoarsePowerGating:skl,cnl 203 * - Render/Media PG need to be disabled with RC6. 204 */ 205 if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6))) 206 intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, 207 GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); 208 } 209 210 static void gen8_rc6_enable(struct intel_rc6 *rc6) 211 { 212 struct intel_uncore *uncore = rc6_to_uncore(rc6); 213 struct intel_engine_cs *engine; 214 enum intel_engine_id id; 215 216 /* 2b: Program RC6 thresholds.*/ 217 intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); 218 intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 219 intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 220 for_each_engine(engine, rc6_to_gt(rc6), id) 221 intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10); 222 intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0); 223 intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ 224 225 /* 3: Enable RC6 */ 226 rc6->ctl_enable = 227 GEN6_RC_CTL_HW_ENABLE | 228 GEN7_RC_CTL_TO_MODE | 229 GEN6_RC_CTL_RC6_ENABLE; 230 } 231 232 static void gen6_rc6_enable(struct intel_rc6 *rc6) 233 { 234 struct intel_uncore *uncore = rc6_to_uncore(rc6); 235 struct drm_i915_private *i915 = rc6_to_i915(rc6); 236 struct intel_engine_cs *engine; 237 enum intel_engine_id id; 238 u32 rc6vids, rc6_mask; 239 int ret; 240 241 intel_uncore_write_fw(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); 242 intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); 243 intel_uncore_write_fw(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30); 244 intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); 245 intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); 246 247 for_each_engine(engine, rc6_to_gt(rc6), id) 248 intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10); 249 250 intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0); 251 intel_uncore_write_fw(uncore, GEN6_RC1e_THRESHOLD, 1000); 252 intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 50000); 253 intel_uncore_write_fw(uncore, GEN6_RC6p_THRESHOLD, 150000); 254 intel_uncore_write_fw(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */ 255 256 /* We don't use those on Haswell */ 257 rc6_mask = GEN6_RC_CTL_RC6_ENABLE; 258 if (HAS_RC6p(i915)) 259 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; 260 if (HAS_RC6pp(i915)) 261 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; 262 rc6->ctl_enable = 263 rc6_mask | 264 GEN6_RC_CTL_EI_MODE(1) | 265 GEN6_RC_CTL_HW_ENABLE; 266 267 rc6vids = 0; 268 ret = snb_pcode_read(rc6_to_gt(rc6)->uncore, GEN6_PCODE_READ_RC6VIDS, &rc6vids, NULL); 269 if (GRAPHICS_VER(i915) == 6 && ret) { 270 drm_dbg(&i915->drm, "Couldn't check for BIOS workaround\n"); 271 } else if (GRAPHICS_VER(i915) == 6 && 272 (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { 273 drm_dbg(&i915->drm, 274 "You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", 275 GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); 276 rc6vids &= 0xffff00; 277 rc6vids |= GEN6_ENCODE_RC6_VID(450); 278 ret = snb_pcode_write(rc6_to_gt(rc6)->uncore, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); 279 if (ret) 280 drm_err(&i915->drm, 281 "Couldn't fix incorrect rc6 voltage\n"); 282 } 283 } 284 285 /* Check that the pcbr address is not empty. */ 286 static int chv_rc6_init(struct intel_rc6 *rc6) 287 { 288 struct intel_uncore *uncore = rc6_to_uncore(rc6); 289 struct drm_i915_private *i915 = rc6_to_i915(rc6); 290 resource_size_t pctx_paddr, paddr; 291 resource_size_t pctx_size = 32 * SZ_1K; 292 u32 pcbr; 293 294 pcbr = intel_uncore_read(uncore, VLV_PCBR); 295 if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { 296 drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n"); 297 paddr = i915->dsm.stolen.end + 1 - pctx_size; 298 GEM_BUG_ON(paddr > U32_MAX); 299 300 pctx_paddr = (paddr & ~4095); 301 intel_uncore_write(uncore, VLV_PCBR, pctx_paddr); 302 } 303 304 return 0; 305 } 306 307 static int vlv_rc6_init(struct intel_rc6 *rc6) 308 { 309 struct drm_i915_private *i915 = rc6_to_i915(rc6); 310 struct intel_uncore *uncore = rc6_to_uncore(rc6); 311 struct drm_i915_gem_object *pctx; 312 resource_size_t pctx_paddr; 313 resource_size_t pctx_size = 24 * SZ_1K; 314 u32 pcbr; 315 316 pcbr = intel_uncore_read(uncore, VLV_PCBR); 317 if (pcbr) { 318 /* BIOS set it up already, grab the pre-alloc'd space */ 319 resource_size_t pcbr_offset; 320 321 pcbr_offset = (pcbr & ~4095) - i915->dsm.stolen.start; 322 pctx = i915_gem_object_create_region_at(i915->mm.stolen_region, 323 pcbr_offset, 324 pctx_size, 325 0); 326 if (IS_ERR(pctx)) 327 return PTR_ERR(pctx); 328 329 goto out; 330 } 331 332 drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n"); 333 334 /* 335 * From the Gunit register HAS: 336 * The Gfx driver is expected to program this register and ensure 337 * proper allocation within Gfx stolen memory. For example, this 338 * register should be programmed such than the PCBR range does not 339 * overlap with other ranges, such as the frame buffer, protected 340 * memory, or any other relevant ranges. 341 */ 342 pctx = i915_gem_object_create_stolen(i915, pctx_size); 343 if (IS_ERR(pctx)) { 344 drm_dbg(&i915->drm, 345 "not enough stolen space for PCTX, disabling\n"); 346 return PTR_ERR(pctx); 347 } 348 349 GEM_BUG_ON(range_end_overflows_t(u64, 350 i915->dsm.stolen.start, 351 pctx->stolen->start, 352 U32_MAX)); 353 pctx_paddr = i915->dsm.stolen.start + pctx->stolen->start; 354 intel_uncore_write(uncore, VLV_PCBR, pctx_paddr); 355 356 out: 357 rc6->pctx = pctx; 358 return 0; 359 } 360 361 static void chv_rc6_enable(struct intel_rc6 *rc6) 362 { 363 struct intel_uncore *uncore = rc6_to_uncore(rc6); 364 struct intel_engine_cs *engine; 365 enum intel_engine_id id; 366 367 /* 2a: Program RC6 thresholds.*/ 368 intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); 369 intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 370 intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 371 372 for_each_engine(engine, rc6_to_gt(rc6), id) 373 intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10); 374 intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0); 375 376 /* TO threshold set to 500 us (0x186 * 1.28 us) */ 377 intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 0x186); 378 379 /* Allows RC6 residency counter to work */ 380 intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL, 381 REG_MASKED_FIELD_ENABLE(VLV_COUNT_RANGE_HIGH | 382 VLV_MEDIA_RC6_COUNT_EN | 383 VLV_RENDER_RC6_COUNT_EN)); 384 385 /* 3: Enable RC6 */ 386 rc6->ctl_enable = GEN7_RC_CTL_TO_MODE; 387 } 388 389 static void vlv_rc6_enable(struct intel_rc6 *rc6) 390 { 391 struct intel_uncore *uncore = rc6_to_uncore(rc6); 392 struct intel_engine_cs *engine; 393 enum intel_engine_id id; 394 395 intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); 396 intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); 397 intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); 398 399 for_each_engine(engine, rc6_to_gt(rc6), id) 400 intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10); 401 402 intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 0x557); 403 404 /* Allows RC6 residency counter to work */ 405 intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL, 406 REG_MASKED_FIELD_ENABLE(VLV_COUNT_RANGE_HIGH | 407 VLV_MEDIA_RC0_COUNT_EN | 408 VLV_RENDER_RC0_COUNT_EN | 409 VLV_MEDIA_RC6_COUNT_EN | 410 VLV_RENDER_RC6_COUNT_EN)); 411 412 rc6->ctl_enable = 413 GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; 414 } 415 416 bool intel_check_bios_c6_setup(struct intel_rc6 *rc6) 417 { 418 if (!rc6->bios_state_captured) { 419 struct intel_uncore *uncore = rc6_to_uncore(rc6); 420 intel_wakeref_t wakeref; 421 422 with_intel_runtime_pm(uncore->rpm, wakeref) 423 rc6->bios_rc_state = intel_uncore_read(uncore, GEN6_RC_STATE); 424 425 rc6->bios_state_captured = true; 426 } 427 428 return rc6->bios_rc_state & RC_SW_TARGET_STATE_MASK; 429 } 430 431 static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6) 432 { 433 struct intel_uncore *uncore = rc6_to_uncore(rc6); 434 struct drm_i915_private *i915 = rc6_to_i915(rc6); 435 u32 rc6_ctx_base, rc_ctl, rc_sw_target; 436 bool enable_rc6 = true; 437 438 rc_ctl = intel_uncore_read(uncore, GEN6_RC_CONTROL); 439 rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE); 440 rc_sw_target &= RC_SW_TARGET_STATE_MASK; 441 rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT; 442 drm_dbg(&i915->drm, "BIOS enabled RC states: " 443 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n", 444 str_on_off(rc_ctl & GEN6_RC_CTL_HW_ENABLE), 445 str_on_off(rc_ctl & GEN6_RC_CTL_RC6_ENABLE), 446 rc_sw_target); 447 448 if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) { 449 drm_dbg(&i915->drm, "RC6 Base location not set properly.\n"); 450 enable_rc6 = false; 451 } 452 453 /* 454 * The exact context size is not known for BXT, so assume a page size 455 * for this check. 456 */ 457 rc6_ctx_base = 458 intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK; 459 if (!(rc6_ctx_base >= i915->dsm.reserved.start && 460 rc6_ctx_base + PAGE_SIZE < i915->dsm.reserved.end)) { 461 drm_dbg(&i915->drm, "RC6 Base address not as expected.\n"); 462 enable_rc6 = false; 463 } 464 465 if (!((intel_uncore_read(uncore, PWRCTX_MAXCNT(RENDER_RING_BASE)) & IDLE_TIME_MASK) > 1 && 466 (intel_uncore_read(uncore, PWRCTX_MAXCNT(GEN6_BSD_RING_BASE)) & IDLE_TIME_MASK) > 1 && 467 (intel_uncore_read(uncore, PWRCTX_MAXCNT(BLT_RING_BASE)) & IDLE_TIME_MASK) > 1 && 468 (intel_uncore_read(uncore, PWRCTX_MAXCNT(VEBOX_RING_BASE)) & IDLE_TIME_MASK) > 1)) { 469 drm_dbg(&i915->drm, 470 "Engine Idle wait time not set properly.\n"); 471 enable_rc6 = false; 472 } 473 474 if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) || 475 !intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) || 476 !intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) { 477 drm_dbg(&i915->drm, "Pushbus not setup properly.\n"); 478 enable_rc6 = false; 479 } 480 481 if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) { 482 drm_dbg(&i915->drm, "GFX pause not setup properly.\n"); 483 enable_rc6 = false; 484 } 485 486 if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) { 487 drm_dbg(&i915->drm, "GPM control not setup properly.\n"); 488 enable_rc6 = false; 489 } 490 491 return enable_rc6; 492 } 493 494 static bool rc6_supported(struct intel_rc6 *rc6) 495 { 496 struct drm_i915_private *i915 = rc6_to_i915(rc6); 497 struct intel_gt *gt = rc6_to_gt(rc6); 498 499 if (!HAS_RC6(i915)) 500 return false; 501 502 if (intel_vgpu_active(i915)) 503 return false; 504 505 if (is_mock_gt(rc6_to_gt(rc6))) 506 return false; 507 508 if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) { 509 drm_notice(&i915->drm, 510 "RC6 and powersaving disabled by BIOS\n"); 511 return false; 512 } 513 514 if (IS_METEORLAKE(gt->i915) && 515 !intel_check_bios_c6_setup(rc6)) { 516 drm_notice(&i915->drm, 517 "C6 disabled by BIOS\n"); 518 return false; 519 } 520 521 if (IS_MEDIA_GT_IP_STEP(gt, IP_VER(13, 0), STEP_A0, STEP_B0)) { 522 drm_notice(&i915->drm, 523 "Media RC6 disabled on A step\n"); 524 return false; 525 } 526 527 return true; 528 } 529 530 static void rpm_get(struct intel_rc6 *rc6) 531 { 532 GEM_BUG_ON(rc6->wakeref); 533 pm_runtime_get_sync(rc6_to_i915(rc6)->drm.dev); 534 rc6->wakeref = true; 535 } 536 537 static void rpm_put(struct intel_rc6 *rc6) 538 { 539 GEM_BUG_ON(!rc6->wakeref); 540 pm_runtime_put(rc6_to_i915(rc6)->drm.dev); 541 rc6->wakeref = false; 542 } 543 544 static bool pctx_corrupted(struct intel_rc6 *rc6) 545 { 546 struct drm_i915_private *i915 = rc6_to_i915(rc6); 547 548 if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) 549 return false; 550 551 if (intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO)) 552 return false; 553 554 drm_notice(&i915->drm, 555 "RC6 context corruption, disabling runtime power management\n"); 556 return true; 557 } 558 559 static void __intel_rc6_disable(struct intel_rc6 *rc6) 560 { 561 struct drm_i915_private *i915 = rc6_to_i915(rc6); 562 struct intel_uncore *uncore = rc6_to_uncore(rc6); 563 struct intel_gt *gt = rc6_to_gt(rc6); 564 565 /* Take control of RC6 back from GuC */ 566 intel_guc_rc_disable(gt_to_guc(gt)); 567 568 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 569 if (GRAPHICS_VER(i915) >= 9) 570 intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, 0); 571 intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, 0); 572 intel_uncore_write_fw(uncore, GEN6_RC_STATE, 0); 573 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 574 } 575 576 static void rc6_res_reg_init(struct intel_rc6 *rc6) 577 { 578 i915_reg_t res_reg[INTEL_RC6_RES_MAX] = { 579 [0 ... INTEL_RC6_RES_MAX - 1] = INVALID_MMIO_REG, 580 }; 581 582 switch (rc6_to_gt(rc6)->type) { 583 case GT_MEDIA: 584 res_reg[INTEL_RC6_RES_RC6] = MTL_MEDIA_MC6; 585 break; 586 default: 587 res_reg[INTEL_RC6_RES_RC6_LOCKED] = GEN6_GT_GFX_RC6_LOCKED; 588 res_reg[INTEL_RC6_RES_RC6] = GEN6_GT_GFX_RC6; 589 res_reg[INTEL_RC6_RES_RC6p] = GEN6_GT_GFX_RC6p; 590 res_reg[INTEL_RC6_RES_RC6pp] = GEN6_GT_GFX_RC6pp; 591 break; 592 } 593 594 memcpy(rc6->res_reg, res_reg, sizeof(res_reg)); 595 } 596 597 void intel_rc6_init(struct intel_rc6 *rc6) 598 { 599 struct drm_i915_private *i915 = rc6_to_i915(rc6); 600 int err; 601 602 /* Disable runtime-pm until we can save the GPU state with rc6 pctx */ 603 rpm_get(rc6); 604 605 if (!rc6_supported(rc6)) 606 return; 607 608 rc6_res_reg_init(rc6); 609 610 if (IS_CHERRYVIEW(i915)) 611 err = chv_rc6_init(rc6); 612 else if (IS_VALLEYVIEW(i915)) 613 err = vlv_rc6_init(rc6); 614 else 615 err = 0; 616 617 /* Sanitize rc6, ensure it is disabled before we are ready. */ 618 __intel_rc6_disable(rc6); 619 620 rc6->supported = err == 0; 621 } 622 623 void intel_rc6_sanitize(struct intel_rc6 *rc6) 624 { 625 memset(rc6->prev_hw_residency, 0, sizeof(rc6->prev_hw_residency)); 626 627 if (rc6->enabled) { /* unbalanced suspend/resume */ 628 rpm_get(rc6); 629 rc6->enabled = false; 630 } 631 632 if (rc6->supported) 633 __intel_rc6_disable(rc6); 634 } 635 636 void intel_rc6_enable(struct intel_rc6 *rc6) 637 { 638 struct drm_i915_private *i915 = rc6_to_i915(rc6); 639 struct intel_uncore *uncore = rc6_to_uncore(rc6); 640 641 if (!rc6->supported) 642 return; 643 644 GEM_BUG_ON(rc6->enabled); 645 646 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 647 648 if (IS_CHERRYVIEW(i915)) 649 chv_rc6_enable(rc6); 650 else if (IS_VALLEYVIEW(i915)) 651 vlv_rc6_enable(rc6); 652 else if (GRAPHICS_VER(i915) >= 11) 653 gen11_rc6_enable(rc6); 654 else if (GRAPHICS_VER(i915) >= 9) 655 gen9_rc6_enable(rc6); 656 else if (IS_BROADWELL(i915)) 657 gen8_rc6_enable(rc6); 658 else if (GRAPHICS_VER(i915) >= 6) 659 gen6_rc6_enable(rc6); 660 661 rc6->manual = rc6->ctl_enable & GEN6_RC_CTL_RC6_ENABLE; 662 if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) 663 rc6->ctl_enable = 0; 664 665 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 666 667 if (unlikely(pctx_corrupted(rc6))) 668 return; 669 670 /* rc6 is ready, runtime-pm is go! */ 671 rpm_put(rc6); 672 rc6->enabled = true; 673 } 674 675 void intel_rc6_unpark(struct intel_rc6 *rc6) 676 { 677 struct intel_uncore *uncore = rc6_to_uncore(rc6); 678 679 if (!rc6->enabled) 680 return; 681 682 /* Restore HW timers for automatic RC6 entry while busy */ 683 intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, rc6->ctl_enable); 684 } 685 686 void intel_rc6_park(struct intel_rc6 *rc6) 687 { 688 struct intel_uncore *uncore = rc6_to_uncore(rc6); 689 unsigned int target; 690 691 if (!rc6->enabled) 692 return; 693 694 if (unlikely(pctx_corrupted(rc6))) { 695 intel_rc6_disable(rc6); 696 return; 697 } 698 699 if (!rc6->manual) 700 return; 701 702 /* Turn off the HW timers and go directly to rc6 */ 703 intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE); 704 705 if (HAS_RC6pp(rc6_to_i915(rc6))) 706 target = 0x6; /* deepest rc6 */ 707 else if (HAS_RC6p(rc6_to_i915(rc6))) 708 target = 0x5; /* deep rc6 */ 709 else 710 target = 0x4; /* normal rc6 */ 711 intel_uncore_write_fw(uncore, GEN6_RC_STATE, target << RC_SW_TARGET_STATE_SHIFT); 712 } 713 714 void intel_rc6_disable(struct intel_rc6 *rc6) 715 { 716 if (!rc6->enabled) 717 return; 718 719 rpm_get(rc6); 720 rc6->enabled = false; 721 722 __intel_rc6_disable(rc6); 723 } 724 725 void intel_rc6_fini(struct intel_rc6 *rc6) 726 { 727 struct drm_i915_gem_object *pctx; 728 struct intel_uncore *uncore = rc6_to_uncore(rc6); 729 730 intel_rc6_disable(rc6); 731 732 /* We want the BIOS C6 state preserved across loads for MTL */ 733 if (IS_METEORLAKE(rc6_to_i915(rc6)) && rc6->bios_state_captured) 734 intel_uncore_write_fw(uncore, GEN6_RC_STATE, rc6->bios_rc_state); 735 736 pctx = fetch_and_zero(&rc6->pctx); 737 if (pctx) 738 i915_gem_object_put(pctx); 739 740 if (rc6->wakeref) 741 rpm_put(rc6); 742 } 743 744 static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg) 745 { 746 u32 lower, upper, tmp; 747 int loop = 2; 748 749 /* 750 * The register accessed do not need forcewake. We borrow 751 * uncore lock to prevent concurrent access to range reg. 752 */ 753 lockdep_assert_held(&uncore->lock); 754 755 /* 756 * vlv and chv residency counters are 40 bits in width. 757 * With a control bit, we can choose between upper or lower 758 * 32bit window into this counter. 759 * 760 * Although we always use the counter in high-range mode elsewhere, 761 * userspace may attempt to read the value before rc6 is initialised, 762 * before we have set the default VLV_COUNTER_CONTROL value. So always 763 * set the high bit to be safe. 764 */ 765 intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL, 766 REG_MASKED_FIELD_ENABLE(VLV_COUNT_RANGE_HIGH)); 767 upper = intel_uncore_read_fw(uncore, reg); 768 do { 769 tmp = upper; 770 771 intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL, 772 REG_MASKED_FIELD_DISABLE(VLV_COUNT_RANGE_HIGH)); 773 lower = intel_uncore_read_fw(uncore, reg); 774 775 intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL, 776 REG_MASKED_FIELD_ENABLE(VLV_COUNT_RANGE_HIGH)); 777 upper = intel_uncore_read_fw(uncore, reg); 778 } while (upper != tmp && --loop); 779 780 /* 781 * Everywhere else we always use VLV_COUNTER_CONTROL with the 782 * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set 783 * now. 784 */ 785 786 return lower | (u64)upper << 8; 787 } 788 789 u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, enum intel_rc6_res_type id) 790 { 791 struct drm_i915_private *i915 = rc6_to_i915(rc6); 792 struct intel_uncore *uncore = rc6_to_uncore(rc6); 793 u64 time_hw, prev_hw, overflow_hw; 794 i915_reg_t reg = rc6->res_reg[id]; 795 unsigned int fw_domains; 796 unsigned long flags; 797 u32 mul, div; 798 799 if (!rc6->supported) 800 return 0; 801 802 fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ); 803 804 spin_lock_irqsave(&uncore->lock, flags); 805 intel_uncore_forcewake_get__locked(uncore, fw_domains); 806 807 /* On VLV and CHV, residency time is in CZ units rather than 1.28us */ 808 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { 809 mul = 1000000; 810 div = vlv_clock_get_czclk(&i915->drm); 811 overflow_hw = BIT_ULL(40); 812 time_hw = vlv_residency_raw(uncore, reg); 813 } else { 814 /* 833.33ns units on Gen9LP, 1.28us elsewhere. */ 815 if (IS_GEN9_LP(i915)) { 816 mul = 10000; 817 div = 12; 818 } else { 819 mul = 1280; 820 div = 1; 821 } 822 823 overflow_hw = BIT_ULL(32); 824 time_hw = intel_uncore_read_fw(uncore, reg); 825 } 826 827 /* 828 * Counter wrap handling. 829 * 830 * Store previous hw counter values for counter wrap-around handling. But 831 * relying on a sufficient frequency of queries otherwise counters can still wrap. 832 */ 833 prev_hw = rc6->prev_hw_residency[id]; 834 rc6->prev_hw_residency[id] = time_hw; 835 836 /* RC6 delta from last sample. */ 837 if (time_hw >= prev_hw) 838 time_hw -= prev_hw; 839 else 840 time_hw += overflow_hw - prev_hw; 841 842 /* Add delta to RC6 extended raw driver copy. */ 843 time_hw += rc6->cur_residency[id]; 844 rc6->cur_residency[id] = time_hw; 845 846 intel_uncore_forcewake_put__locked(uncore, fw_domains); 847 spin_unlock_irqrestore(&uncore->lock, flags); 848 849 return mul_u64_u32_div(time_hw, mul, div); 850 } 851 852 u64 intel_rc6_residency_us(struct intel_rc6 *rc6, enum intel_rc6_res_type id) 853 { 854 return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, id), 1000); 855 } 856 857 void intel_rc6_print_residency(struct seq_file *m, const char *title, 858 enum intel_rc6_res_type id) 859 { 860 struct intel_gt *gt = m->private; 861 i915_reg_t reg = gt->rc6.res_reg[id]; 862 intel_wakeref_t wakeref; 863 864 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 865 seq_printf(m, "%s %u (%llu us)\n", title, 866 intel_uncore_read(gt->uncore, reg), 867 intel_rc6_residency_us(>->rc6, id)); 868 } 869 870 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 871 #include "selftest_rc6.c" 872 #endif 873