1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include <linux/pm_runtime.h> 7 #include <linux/string_helpers.h> 8 9 #include <drm/drm_print.h> 10 11 #include "gem/i915_gem_region.h" 12 #include "i915_drv.h" 13 #include "i915_reg.h" 14 #include "i915_vgpu.h" 15 #include "intel_engine_regs.h" 16 #include "intel_gt.h" 17 #include "intel_gt_pm.h" 18 #include "intel_gt_regs.h" 19 #include "intel_pcode.h" 20 #include "intel_rc6.h" 21 22 /** 23 * DOC: RC6 24 * 25 * RC6 is a special power stage which allows the GPU to enter an very 26 * low-voltage mode when idle, using down to 0V while at this stage. This 27 * stage is entered automatically when the GPU is idle when RC6 support is 28 * enabled, and as soon as new workload arises GPU wakes up automatically as 29 * well. 30 * 31 * There are different RC6 modes available in Intel GPU, which differentiate 32 * among each other with the latency required to enter and leave RC6 and 33 * voltage consumed by the GPU in different states. 34 * 35 * The combination of the following flags define which states GPU is allowed 36 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and 37 * RC6pp is deepest RC6. Their support by hardware varies according to the 38 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one 39 * which brings the most power savings; deeper states save more power, but 40 * require higher latency to switch to and wake up. 41 */ 42 43 static struct intel_gt *rc6_to_gt(struct intel_rc6 *rc6) 44 { 45 return container_of(rc6, struct intel_gt, rc6); 46 } 47 48 static struct intel_uncore *rc6_to_uncore(struct intel_rc6 *rc) 49 { 50 return rc6_to_gt(rc)->uncore; 51 } 52 53 static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc) 54 { 55 return rc6_to_gt(rc)->i915; 56 } 57 58 static void gen11_rc6_enable(struct intel_rc6 *rc6) 59 { 60 struct intel_gt *gt = rc6_to_gt(rc6); 61 struct intel_uncore *uncore = gt->uncore; 62 struct intel_engine_cs *engine; 63 enum intel_engine_id id; 64 u32 pg_enable; 65 int i; 66 67 /* 68 * With GuCRC, these parameters are set by GuC 69 */ 70 if (!intel_uc_uses_guc_rc(>->uc)) { 71 /* 2b: Program RC6 thresholds.*/ 72 intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); 73 intel_uncore_write_fw(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); 74 75 intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 76 intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 77 for_each_engine(engine, rc6_to_gt(rc6), id) 78 intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10); 79 80 intel_uncore_write_fw(uncore, GUC_MAX_IDLE_COUNT, 0xA); 81 82 intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0); 83 84 intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ 85 } 86 87 /* 88 * 2c: Program Coarse Power Gating Policies. 89 * 90 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we 91 * use instead is a more conservative estimate for the maximum time 92 * it takes us to service a CS interrupt and submit a new ELSP - that 93 * is the time which the GPU is idle waiting for the CPU to select the 94 * next request to execute. If the idle hysteresis is less than that 95 * interrupt service latency, the hardware will automatically gate 96 * the power well and we will then incur the wake up cost on top of 97 * the service latency. A similar guide from plane_state is that we 98 * do not want the enable hysteresis to less than the wakeup latency. 99 * 100 * igt/gem_exec_nop/sequential provides a rough estimate for the 101 * service latency, and puts it under 10us for Icelake, similar to 102 * Broadwell+, To be conservative, we want to factor in a context 103 * switch on top (due to ksoftirqd). 104 */ 105 intel_uncore_write_fw(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60); 106 intel_uncore_write_fw(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60); 107 108 /* 3a: Enable RC6 109 * 110 * With GuCRC, we do not enable bit 31 of RC_CTL, 111 * thus allowing GuC to control RC6 entry/exit fully instead. 112 * We will not set the HW ENABLE and EI bits 113 */ 114 if (!intel_guc_rc_enable(gt_to_guc(gt))) 115 rc6->ctl_enable = GEN6_RC_CTL_RC6_ENABLE; 116 else 117 rc6->ctl_enable = 118 GEN6_RC_CTL_HW_ENABLE | 119 GEN6_RC_CTL_RC6_ENABLE | 120 GEN6_RC_CTL_EI_MODE(1); 121 122 pg_enable = 123 GEN9_RENDER_PG_ENABLE | 124 GEN9_MEDIA_PG_ENABLE | 125 GEN11_MEDIA_SAMPLER_PG_ENABLE; 126 127 if (GRAPHICS_VER(gt->i915) >= 12 && !IS_DG1(gt->i915)) { 128 for (i = 0; i < I915_MAX_VCS; i++) 129 if (HAS_ENGINE(gt, _VCS(i))) 130 pg_enable |= (VDN_HCP_POWERGATE_ENABLE(i) | 131 VDN_MFX_POWERGATE_ENABLE(i)); 132 } 133 134 intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, pg_enable); 135 } 136 137 static void gen9_rc6_enable(struct intel_rc6 *rc6) 138 { 139 struct intel_uncore *uncore = rc6_to_uncore(rc6); 140 struct intel_engine_cs *engine; 141 enum intel_engine_id id; 142 143 /* 2b: Program RC6 thresholds.*/ 144 if (GRAPHICS_VER(rc6_to_i915(rc6)) >= 11) { 145 intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); 146 intel_uncore_write_fw(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); 147 } else if (IS_SKYLAKE(rc6_to_i915(rc6))) { 148 /* 149 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only 150 * when CPG is enabled 151 */ 152 intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); 153 } else { 154 intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); 155 } 156 157 intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 158 intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 159 for_each_engine(engine, rc6_to_gt(rc6), id) 160 intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10); 161 162 intel_uncore_write_fw(uncore, GUC_MAX_IDLE_COUNT, 0xA); 163 164 intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0); 165 166 /* 167 * 2c: Program Coarse Power Gating Policies. 168 * 169 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we 170 * use instead is a more conservative estimate for the maximum time 171 * it takes us to service a CS interrupt and submit a new ELSP - that 172 * is the time which the GPU is idle waiting for the CPU to select the 173 * next request to execute. If the idle hysteresis is less than that 174 * interrupt service latency, the hardware will automatically gate 175 * the power well and we will then incur the wake up cost on top of 176 * the service latency. A similar guide from plane_state is that we 177 * do not want the enable hysteresis to less than the wakeup latency. 178 * 179 * igt/gem_exec_nop/sequential provides a rough estimate for the 180 * service latency, and puts it around 10us for Broadwell (and other 181 * big core) and around 40us for Broxton (and other low power cores). 182 * [Note that for legacy ringbuffer submission, this is less than 1us!] 183 * However, the wakeup latency on Broxton is closer to 100us. To be 184 * conservative, we have to factor in a context switch on top (due 185 * to ksoftirqd). 186 */ 187 intel_uncore_write_fw(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); 188 intel_uncore_write_fw(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); 189 190 /* 3a: Enable RC6 */ 191 intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ 192 193 rc6->ctl_enable = 194 GEN6_RC_CTL_HW_ENABLE | 195 GEN6_RC_CTL_RC6_ENABLE | 196 GEN6_RC_CTL_EI_MODE(1); 197 198 /* 199 * WaRsDisableCoarsePowerGating:skl,cnl 200 * - Render/Media PG need to be disabled with RC6. 201 */ 202 if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6))) 203 intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, 204 GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); 205 } 206 207 static void gen8_rc6_enable(struct intel_rc6 *rc6) 208 { 209 struct intel_uncore *uncore = rc6_to_uncore(rc6); 210 struct intel_engine_cs *engine; 211 enum intel_engine_id id; 212 213 /* 2b: Program RC6 thresholds.*/ 214 intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); 215 intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 216 intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 217 for_each_engine(engine, rc6_to_gt(rc6), id) 218 intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10); 219 intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0); 220 intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ 221 222 /* 3: Enable RC6 */ 223 rc6->ctl_enable = 224 GEN6_RC_CTL_HW_ENABLE | 225 GEN7_RC_CTL_TO_MODE | 226 GEN6_RC_CTL_RC6_ENABLE; 227 } 228 229 static void gen6_rc6_enable(struct intel_rc6 *rc6) 230 { 231 struct intel_uncore *uncore = rc6_to_uncore(rc6); 232 struct drm_i915_private *i915 = rc6_to_i915(rc6); 233 struct intel_engine_cs *engine; 234 enum intel_engine_id id; 235 u32 rc6vids, rc6_mask; 236 int ret; 237 238 intel_uncore_write_fw(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); 239 intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); 240 intel_uncore_write_fw(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30); 241 intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); 242 intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); 243 244 for_each_engine(engine, rc6_to_gt(rc6), id) 245 intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10); 246 247 intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0); 248 intel_uncore_write_fw(uncore, GEN6_RC1e_THRESHOLD, 1000); 249 intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 50000); 250 intel_uncore_write_fw(uncore, GEN6_RC6p_THRESHOLD, 150000); 251 intel_uncore_write_fw(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */ 252 253 /* We don't use those on Haswell */ 254 rc6_mask = GEN6_RC_CTL_RC6_ENABLE; 255 if (HAS_RC6p(i915)) 256 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; 257 if (HAS_RC6pp(i915)) 258 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; 259 rc6->ctl_enable = 260 rc6_mask | 261 GEN6_RC_CTL_EI_MODE(1) | 262 GEN6_RC_CTL_HW_ENABLE; 263 264 rc6vids = 0; 265 ret = snb_pcode_read(rc6_to_gt(rc6)->uncore, GEN6_PCODE_READ_RC6VIDS, &rc6vids, NULL); 266 if (GRAPHICS_VER(i915) == 6 && ret) { 267 drm_dbg(&i915->drm, "Couldn't check for BIOS workaround\n"); 268 } else if (GRAPHICS_VER(i915) == 6 && 269 (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { 270 drm_dbg(&i915->drm, 271 "You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", 272 GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); 273 rc6vids &= 0xffff00; 274 rc6vids |= GEN6_ENCODE_RC6_VID(450); 275 ret = snb_pcode_write(rc6_to_gt(rc6)->uncore, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); 276 if (ret) 277 drm_err(&i915->drm, 278 "Couldn't fix incorrect rc6 voltage\n"); 279 } 280 } 281 282 /* Check that the pcbr address is not empty. */ 283 static int chv_rc6_init(struct intel_rc6 *rc6) 284 { 285 struct intel_uncore *uncore = rc6_to_uncore(rc6); 286 struct drm_i915_private *i915 = rc6_to_i915(rc6); 287 resource_size_t pctx_paddr, paddr; 288 resource_size_t pctx_size = 32 * SZ_1K; 289 u32 pcbr; 290 291 pcbr = intel_uncore_read(uncore, VLV_PCBR); 292 if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { 293 drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n"); 294 paddr = i915->dsm.stolen.end + 1 - pctx_size; 295 GEM_BUG_ON(paddr > U32_MAX); 296 297 pctx_paddr = (paddr & ~4095); 298 intel_uncore_write(uncore, VLV_PCBR, pctx_paddr); 299 } 300 301 return 0; 302 } 303 304 static int vlv_rc6_init(struct intel_rc6 *rc6) 305 { 306 struct drm_i915_private *i915 = rc6_to_i915(rc6); 307 struct intel_uncore *uncore = rc6_to_uncore(rc6); 308 struct drm_i915_gem_object *pctx; 309 resource_size_t pctx_paddr; 310 resource_size_t pctx_size = 24 * SZ_1K; 311 u32 pcbr; 312 313 pcbr = intel_uncore_read(uncore, VLV_PCBR); 314 if (pcbr) { 315 /* BIOS set it up already, grab the pre-alloc'd space */ 316 resource_size_t pcbr_offset; 317 318 pcbr_offset = (pcbr & ~4095) - i915->dsm.stolen.start; 319 pctx = i915_gem_object_create_region_at(i915->mm.stolen_region, 320 pcbr_offset, 321 pctx_size, 322 0); 323 if (IS_ERR(pctx)) 324 return PTR_ERR(pctx); 325 326 goto out; 327 } 328 329 drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n"); 330 331 /* 332 * From the Gunit register HAS: 333 * The Gfx driver is expected to program this register and ensure 334 * proper allocation within Gfx stolen memory. For example, this 335 * register should be programmed such than the PCBR range does not 336 * overlap with other ranges, such as the frame buffer, protected 337 * memory, or any other relevant ranges. 338 */ 339 pctx = i915_gem_object_create_stolen(i915, pctx_size); 340 if (IS_ERR(pctx)) { 341 drm_dbg(&i915->drm, 342 "not enough stolen space for PCTX, disabling\n"); 343 return PTR_ERR(pctx); 344 } 345 346 GEM_BUG_ON(range_end_overflows_t(u64, 347 i915->dsm.stolen.start, 348 pctx->stolen->start, 349 U32_MAX)); 350 pctx_paddr = i915->dsm.stolen.start + pctx->stolen->start; 351 intel_uncore_write(uncore, VLV_PCBR, pctx_paddr); 352 353 out: 354 rc6->pctx = pctx; 355 return 0; 356 } 357 358 static void chv_rc6_enable(struct intel_rc6 *rc6) 359 { 360 struct intel_uncore *uncore = rc6_to_uncore(rc6); 361 struct intel_engine_cs *engine; 362 enum intel_engine_id id; 363 364 /* 2a: Program RC6 thresholds.*/ 365 intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); 366 intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 367 intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 368 369 for_each_engine(engine, rc6_to_gt(rc6), id) 370 intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10); 371 intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0); 372 373 /* TO threshold set to 500 us (0x186 * 1.28 us) */ 374 intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 0x186); 375 376 /* Allows RC6 residency counter to work */ 377 intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL, 378 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | 379 VLV_MEDIA_RC6_COUNT_EN | 380 VLV_RENDER_RC6_COUNT_EN)); 381 382 /* 3: Enable RC6 */ 383 rc6->ctl_enable = GEN7_RC_CTL_TO_MODE; 384 } 385 386 static void vlv_rc6_enable(struct intel_rc6 *rc6) 387 { 388 struct intel_uncore *uncore = rc6_to_uncore(rc6); 389 struct intel_engine_cs *engine; 390 enum intel_engine_id id; 391 392 intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); 393 intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); 394 intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); 395 396 for_each_engine(engine, rc6_to_gt(rc6), id) 397 intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10); 398 399 intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 0x557); 400 401 /* Allows RC6 residency counter to work */ 402 intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL, 403 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | 404 VLV_MEDIA_RC0_COUNT_EN | 405 VLV_RENDER_RC0_COUNT_EN | 406 VLV_MEDIA_RC6_COUNT_EN | 407 VLV_RENDER_RC6_COUNT_EN)); 408 409 rc6->ctl_enable = 410 GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; 411 } 412 413 bool intel_check_bios_c6_setup(struct intel_rc6 *rc6) 414 { 415 if (!rc6->bios_state_captured) { 416 struct intel_uncore *uncore = rc6_to_uncore(rc6); 417 intel_wakeref_t wakeref; 418 419 with_intel_runtime_pm(uncore->rpm, wakeref) 420 rc6->bios_rc_state = intel_uncore_read(uncore, GEN6_RC_STATE); 421 422 rc6->bios_state_captured = true; 423 } 424 425 return rc6->bios_rc_state & RC_SW_TARGET_STATE_MASK; 426 } 427 428 static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6) 429 { 430 struct intel_uncore *uncore = rc6_to_uncore(rc6); 431 struct drm_i915_private *i915 = rc6_to_i915(rc6); 432 u32 rc6_ctx_base, rc_ctl, rc_sw_target; 433 bool enable_rc6 = true; 434 435 rc_ctl = intel_uncore_read(uncore, GEN6_RC_CONTROL); 436 rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE); 437 rc_sw_target &= RC_SW_TARGET_STATE_MASK; 438 rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT; 439 drm_dbg(&i915->drm, "BIOS enabled RC states: " 440 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n", 441 str_on_off(rc_ctl & GEN6_RC_CTL_HW_ENABLE), 442 str_on_off(rc_ctl & GEN6_RC_CTL_RC6_ENABLE), 443 rc_sw_target); 444 445 if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) { 446 drm_dbg(&i915->drm, "RC6 Base location not set properly.\n"); 447 enable_rc6 = false; 448 } 449 450 /* 451 * The exact context size is not known for BXT, so assume a page size 452 * for this check. 453 */ 454 rc6_ctx_base = 455 intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK; 456 if (!(rc6_ctx_base >= i915->dsm.reserved.start && 457 rc6_ctx_base + PAGE_SIZE < i915->dsm.reserved.end)) { 458 drm_dbg(&i915->drm, "RC6 Base address not as expected.\n"); 459 enable_rc6 = false; 460 } 461 462 if (!((intel_uncore_read(uncore, PWRCTX_MAXCNT(RENDER_RING_BASE)) & IDLE_TIME_MASK) > 1 && 463 (intel_uncore_read(uncore, PWRCTX_MAXCNT(GEN6_BSD_RING_BASE)) & IDLE_TIME_MASK) > 1 && 464 (intel_uncore_read(uncore, PWRCTX_MAXCNT(BLT_RING_BASE)) & IDLE_TIME_MASK) > 1 && 465 (intel_uncore_read(uncore, PWRCTX_MAXCNT(VEBOX_RING_BASE)) & IDLE_TIME_MASK) > 1)) { 466 drm_dbg(&i915->drm, 467 "Engine Idle wait time not set properly.\n"); 468 enable_rc6 = false; 469 } 470 471 if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) || 472 !intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) || 473 !intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) { 474 drm_dbg(&i915->drm, "Pushbus not setup properly.\n"); 475 enable_rc6 = false; 476 } 477 478 if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) { 479 drm_dbg(&i915->drm, "GFX pause not setup properly.\n"); 480 enable_rc6 = false; 481 } 482 483 if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) { 484 drm_dbg(&i915->drm, "GPM control not setup properly.\n"); 485 enable_rc6 = false; 486 } 487 488 return enable_rc6; 489 } 490 491 static bool rc6_supported(struct intel_rc6 *rc6) 492 { 493 struct drm_i915_private *i915 = rc6_to_i915(rc6); 494 struct intel_gt *gt = rc6_to_gt(rc6); 495 496 if (!HAS_RC6(i915)) 497 return false; 498 499 if (intel_vgpu_active(i915)) 500 return false; 501 502 if (is_mock_gt(rc6_to_gt(rc6))) 503 return false; 504 505 if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) { 506 drm_notice(&i915->drm, 507 "RC6 and powersaving disabled by BIOS\n"); 508 return false; 509 } 510 511 if (IS_METEORLAKE(gt->i915) && 512 !intel_check_bios_c6_setup(rc6)) { 513 drm_notice(&i915->drm, 514 "C6 disabled by BIOS\n"); 515 return false; 516 } 517 518 if (IS_MEDIA_GT_IP_STEP(gt, IP_VER(13, 0), STEP_A0, STEP_B0)) { 519 drm_notice(&i915->drm, 520 "Media RC6 disabled on A step\n"); 521 return false; 522 } 523 524 return true; 525 } 526 527 static void rpm_get(struct intel_rc6 *rc6) 528 { 529 GEM_BUG_ON(rc6->wakeref); 530 pm_runtime_get_sync(rc6_to_i915(rc6)->drm.dev); 531 rc6->wakeref = true; 532 } 533 534 static void rpm_put(struct intel_rc6 *rc6) 535 { 536 GEM_BUG_ON(!rc6->wakeref); 537 pm_runtime_put(rc6_to_i915(rc6)->drm.dev); 538 rc6->wakeref = false; 539 } 540 541 static bool pctx_corrupted(struct intel_rc6 *rc6) 542 { 543 struct drm_i915_private *i915 = rc6_to_i915(rc6); 544 545 if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) 546 return false; 547 548 if (intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO)) 549 return false; 550 551 drm_notice(&i915->drm, 552 "RC6 context corruption, disabling runtime power management\n"); 553 return true; 554 } 555 556 static void __intel_rc6_disable(struct intel_rc6 *rc6) 557 { 558 struct drm_i915_private *i915 = rc6_to_i915(rc6); 559 struct intel_uncore *uncore = rc6_to_uncore(rc6); 560 struct intel_gt *gt = rc6_to_gt(rc6); 561 562 /* Take control of RC6 back from GuC */ 563 intel_guc_rc_disable(gt_to_guc(gt)); 564 565 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 566 if (GRAPHICS_VER(i915) >= 9) 567 intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, 0); 568 intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, 0); 569 intel_uncore_write_fw(uncore, GEN6_RC_STATE, 0); 570 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 571 } 572 573 static void rc6_res_reg_init(struct intel_rc6 *rc6) 574 { 575 i915_reg_t res_reg[INTEL_RC6_RES_MAX] = { 576 [0 ... INTEL_RC6_RES_MAX - 1] = INVALID_MMIO_REG, 577 }; 578 579 switch (rc6_to_gt(rc6)->type) { 580 case GT_MEDIA: 581 res_reg[INTEL_RC6_RES_RC6] = MTL_MEDIA_MC6; 582 break; 583 default: 584 res_reg[INTEL_RC6_RES_RC6_LOCKED] = GEN6_GT_GFX_RC6_LOCKED; 585 res_reg[INTEL_RC6_RES_RC6] = GEN6_GT_GFX_RC6; 586 res_reg[INTEL_RC6_RES_RC6p] = GEN6_GT_GFX_RC6p; 587 res_reg[INTEL_RC6_RES_RC6pp] = GEN6_GT_GFX_RC6pp; 588 break; 589 } 590 591 memcpy(rc6->res_reg, res_reg, sizeof(res_reg)); 592 } 593 594 void intel_rc6_init(struct intel_rc6 *rc6) 595 { 596 struct drm_i915_private *i915 = rc6_to_i915(rc6); 597 int err; 598 599 /* Disable runtime-pm until we can save the GPU state with rc6 pctx */ 600 rpm_get(rc6); 601 602 if (!rc6_supported(rc6)) 603 return; 604 605 rc6_res_reg_init(rc6); 606 607 if (IS_CHERRYVIEW(i915)) 608 err = chv_rc6_init(rc6); 609 else if (IS_VALLEYVIEW(i915)) 610 err = vlv_rc6_init(rc6); 611 else 612 err = 0; 613 614 /* Sanitize rc6, ensure it is disabled before we are ready. */ 615 __intel_rc6_disable(rc6); 616 617 rc6->supported = err == 0; 618 } 619 620 void intel_rc6_sanitize(struct intel_rc6 *rc6) 621 { 622 memset(rc6->prev_hw_residency, 0, sizeof(rc6->prev_hw_residency)); 623 624 if (rc6->enabled) { /* unbalanced suspend/resume */ 625 rpm_get(rc6); 626 rc6->enabled = false; 627 } 628 629 if (rc6->supported) 630 __intel_rc6_disable(rc6); 631 } 632 633 void intel_rc6_enable(struct intel_rc6 *rc6) 634 { 635 struct drm_i915_private *i915 = rc6_to_i915(rc6); 636 struct intel_uncore *uncore = rc6_to_uncore(rc6); 637 638 if (!rc6->supported) 639 return; 640 641 GEM_BUG_ON(rc6->enabled); 642 643 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 644 645 if (IS_CHERRYVIEW(i915)) 646 chv_rc6_enable(rc6); 647 else if (IS_VALLEYVIEW(i915)) 648 vlv_rc6_enable(rc6); 649 else if (GRAPHICS_VER(i915) >= 11) 650 gen11_rc6_enable(rc6); 651 else if (GRAPHICS_VER(i915) >= 9) 652 gen9_rc6_enable(rc6); 653 else if (IS_BROADWELL(i915)) 654 gen8_rc6_enable(rc6); 655 else if (GRAPHICS_VER(i915) >= 6) 656 gen6_rc6_enable(rc6); 657 658 rc6->manual = rc6->ctl_enable & GEN6_RC_CTL_RC6_ENABLE; 659 if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) 660 rc6->ctl_enable = 0; 661 662 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 663 664 if (unlikely(pctx_corrupted(rc6))) 665 return; 666 667 /* rc6 is ready, runtime-pm is go! */ 668 rpm_put(rc6); 669 rc6->enabled = true; 670 } 671 672 void intel_rc6_unpark(struct intel_rc6 *rc6) 673 { 674 struct intel_uncore *uncore = rc6_to_uncore(rc6); 675 676 if (!rc6->enabled) 677 return; 678 679 /* Restore HW timers for automatic RC6 entry while busy */ 680 intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, rc6->ctl_enable); 681 } 682 683 void intel_rc6_park(struct intel_rc6 *rc6) 684 { 685 struct intel_uncore *uncore = rc6_to_uncore(rc6); 686 unsigned int target; 687 688 if (!rc6->enabled) 689 return; 690 691 if (unlikely(pctx_corrupted(rc6))) { 692 intel_rc6_disable(rc6); 693 return; 694 } 695 696 if (!rc6->manual) 697 return; 698 699 /* Turn off the HW timers and go directly to rc6 */ 700 intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE); 701 702 if (HAS_RC6pp(rc6_to_i915(rc6))) 703 target = 0x6; /* deepest rc6 */ 704 else if (HAS_RC6p(rc6_to_i915(rc6))) 705 target = 0x5; /* deep rc6 */ 706 else 707 target = 0x4; /* normal rc6 */ 708 intel_uncore_write_fw(uncore, GEN6_RC_STATE, target << RC_SW_TARGET_STATE_SHIFT); 709 } 710 711 void intel_rc6_disable(struct intel_rc6 *rc6) 712 { 713 if (!rc6->enabled) 714 return; 715 716 rpm_get(rc6); 717 rc6->enabled = false; 718 719 __intel_rc6_disable(rc6); 720 } 721 722 void intel_rc6_fini(struct intel_rc6 *rc6) 723 { 724 struct drm_i915_gem_object *pctx; 725 struct intel_uncore *uncore = rc6_to_uncore(rc6); 726 727 intel_rc6_disable(rc6); 728 729 /* We want the BIOS C6 state preserved across loads for MTL */ 730 if (IS_METEORLAKE(rc6_to_i915(rc6)) && rc6->bios_state_captured) 731 intel_uncore_write_fw(uncore, GEN6_RC_STATE, rc6->bios_rc_state); 732 733 pctx = fetch_and_zero(&rc6->pctx); 734 if (pctx) 735 i915_gem_object_put(pctx); 736 737 if (rc6->wakeref) 738 rpm_put(rc6); 739 } 740 741 static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg) 742 { 743 u32 lower, upper, tmp; 744 int loop = 2; 745 746 /* 747 * The register accessed do not need forcewake. We borrow 748 * uncore lock to prevent concurrent access to range reg. 749 */ 750 lockdep_assert_held(&uncore->lock); 751 752 /* 753 * vlv and chv residency counters are 40 bits in width. 754 * With a control bit, we can choose between upper or lower 755 * 32bit window into this counter. 756 * 757 * Although we always use the counter in high-range mode elsewhere, 758 * userspace may attempt to read the value before rc6 is initialised, 759 * before we have set the default VLV_COUNTER_CONTROL value. So always 760 * set the high bit to be safe. 761 */ 762 intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL, 763 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); 764 upper = intel_uncore_read_fw(uncore, reg); 765 do { 766 tmp = upper; 767 768 intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL, 769 _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH)); 770 lower = intel_uncore_read_fw(uncore, reg); 771 772 intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL, 773 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); 774 upper = intel_uncore_read_fw(uncore, reg); 775 } while (upper != tmp && --loop); 776 777 /* 778 * Everywhere else we always use VLV_COUNTER_CONTROL with the 779 * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set 780 * now. 781 */ 782 783 return lower | (u64)upper << 8; 784 } 785 786 u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, enum intel_rc6_res_type id) 787 { 788 struct drm_i915_private *i915 = rc6_to_i915(rc6); 789 struct intel_uncore *uncore = rc6_to_uncore(rc6); 790 u64 time_hw, prev_hw, overflow_hw; 791 i915_reg_t reg = rc6->res_reg[id]; 792 unsigned int fw_domains; 793 unsigned long flags; 794 u32 mul, div; 795 796 if (!rc6->supported) 797 return 0; 798 799 fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ); 800 801 spin_lock_irqsave(&uncore->lock, flags); 802 intel_uncore_forcewake_get__locked(uncore, fw_domains); 803 804 /* On VLV and CHV, residency time is in CZ units rather than 1.28us */ 805 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { 806 mul = 1000000; 807 div = i915->czclk_freq; 808 overflow_hw = BIT_ULL(40); 809 time_hw = vlv_residency_raw(uncore, reg); 810 } else { 811 /* 833.33ns units on Gen9LP, 1.28us elsewhere. */ 812 if (IS_GEN9_LP(i915)) { 813 mul = 10000; 814 div = 12; 815 } else { 816 mul = 1280; 817 div = 1; 818 } 819 820 overflow_hw = BIT_ULL(32); 821 time_hw = intel_uncore_read_fw(uncore, reg); 822 } 823 824 /* 825 * Counter wrap handling. 826 * 827 * Store previous hw counter values for counter wrap-around handling. But 828 * relying on a sufficient frequency of queries otherwise counters can still wrap. 829 */ 830 prev_hw = rc6->prev_hw_residency[id]; 831 rc6->prev_hw_residency[id] = time_hw; 832 833 /* RC6 delta from last sample. */ 834 if (time_hw >= prev_hw) 835 time_hw -= prev_hw; 836 else 837 time_hw += overflow_hw - prev_hw; 838 839 /* Add delta to RC6 extended raw driver copy. */ 840 time_hw += rc6->cur_residency[id]; 841 rc6->cur_residency[id] = time_hw; 842 843 intel_uncore_forcewake_put__locked(uncore, fw_domains); 844 spin_unlock_irqrestore(&uncore->lock, flags); 845 846 return mul_u64_u32_div(time_hw, mul, div); 847 } 848 849 u64 intel_rc6_residency_us(struct intel_rc6 *rc6, enum intel_rc6_res_type id) 850 { 851 return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, id), 1000); 852 } 853 854 void intel_rc6_print_residency(struct seq_file *m, const char *title, 855 enum intel_rc6_res_type id) 856 { 857 struct intel_gt *gt = m->private; 858 i915_reg_t reg = gt->rc6.res_reg[id]; 859 intel_wakeref_t wakeref; 860 861 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 862 seq_printf(m, "%s %u (%llu us)\n", title, 863 intel_uncore_read(gt->uncore, reg), 864 intel_rc6_residency_us(>->rc6, id)); 865 } 866 867 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 868 #include "selftest_rc6.c" 869 #endif 870