1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include "intel_context.h" 7 #include "intel_engine_pm.h" 8 #include "intel_gpu_commands.h" 9 #include "intel_gt_requests.h" 10 #include "intel_ring.h" 11 #include "intel_rps.h" 12 #include "selftest_rc6.h" 13 14 #include "selftests/i915_random.h" 15 #include "selftests/librapl.h" 16 17 static u64 rc6_residency(struct intel_rc6 *rc6) 18 { 19 u64 result; 20 21 /* XXX VLV_GT_MEDIA_RC6? */ 22 23 result = intel_rc6_residency_ns(rc6, INTEL_RC6_RES_RC6); 24 if (HAS_RC6p(rc6_to_i915(rc6))) 25 result += intel_rc6_residency_ns(rc6, INTEL_RC6_RES_RC6p); 26 if (HAS_RC6pp(rc6_to_i915(rc6))) 27 result += intel_rc6_residency_ns(rc6, INTEL_RC6_RES_RC6pp); 28 29 return result; 30 } 31 32 int live_rc6_manual(void *arg) 33 { 34 struct intel_gt *gt = arg; 35 struct intel_rc6 *rc6 = >->rc6; 36 struct intel_rps *rps = >->rps; 37 intel_wakeref_t wakeref; 38 u64 rc0_sample_energy[2]; 39 u64 rc6_sample_energy[2]; 40 u64 sleep_time = 1000; 41 u32 rc0_freq = 0; 42 u32 rc6_freq = 0; 43 u64 rc0_power; 44 u64 rc6_power; 45 bool has_power; 46 u64 threshold; 47 ktime_t dt; 48 u64 res[2]; 49 int err = 0; 50 u64 diff; 51 52 53 /* 54 * Our claim is that we can "encourage" the GPU to enter rc6 at will. 55 * Let's try it! 56 */ 57 58 if (!rc6->enabled) 59 return 0; 60 61 /* bsw/byt use a PCU and decouple RC6 from our manual control */ 62 if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915)) 63 return 0; 64 65 has_power = librapl_supported(gt->i915); 66 wakeref = intel_runtime_pm_get(gt->uncore->rpm); 67 68 /* Force RC6 off for starters */ 69 __intel_rc6_disable(rc6); 70 /* wakeup is not immediate, takes about 100us on icl */ 71 usleep_range(1000, 2000); 72 73 res[0] = rc6_residency(rc6); 74 75 dt = ktime_get(); 76 rc0_sample_energy[0] = librapl_energy_uJ(); 77 msleep(sleep_time); 78 rc0_sample_energy[1] = librapl_energy_uJ() - rc0_sample_energy[0]; 79 dt = ktime_sub(ktime_get(), dt); 80 res[1] = rc6_residency(rc6); 81 rc0_freq = intel_rps_read_actual_frequency_fw(rps); 82 if ((res[1] - res[0]) >> 10) { 83 pr_err("RC6 residency increased by %lldus while disabled for 1000ms!\n", 84 (res[1] - res[0]) >> 10); 85 err = -EINVAL; 86 goto out_unlock; 87 } 88 89 if (has_power) { 90 rc0_power = div64_u64(NSEC_PER_SEC * rc0_sample_energy[1], 91 ktime_to_ns(dt)); 92 93 if (!rc0_power) { 94 if (rc0_freq) 95 pr_debug("No power measured while in RC0! GPU Freq: %uMHz in RC0\n", 96 rc0_freq); 97 else 98 pr_err("No power and freq measured while in RC0\n"); 99 err = -EINVAL; 100 goto out_unlock; 101 } 102 } 103 104 /* Manually enter RC6 */ 105 intel_rc6_park(rc6); 106 107 res[0] = rc6_residency(rc6); 108 intel_uncore_forcewake_flush(rc6_to_uncore(rc6), FORCEWAKE_ALL); 109 dt = ktime_get(); 110 rc6_sample_energy[0] = librapl_energy_uJ(); 111 msleep(sleep_time); 112 rc6_freq = intel_rps_read_actual_frequency_fw(rps); 113 rc6_sample_energy[1] = librapl_energy_uJ() - rc6_sample_energy[0]; 114 dt = ktime_sub(ktime_get(), dt); 115 res[1] = rc6_residency(rc6); 116 if (res[1] == res[0]) { 117 pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x, residency=%lld\n", 118 intel_uncore_read_fw(gt->uncore, GEN6_RC_STATE), 119 intel_uncore_read_fw(gt->uncore, GEN6_RC_CONTROL), 120 res[0]); 121 err = -EINVAL; 122 } 123 124 if (has_power) { 125 rc6_power = div64_u64(NSEC_PER_SEC * rc6_sample_energy[1], 126 ktime_to_ns(dt)); 127 pr_info("GPU consumed %lluuW in RC0 and %lluuW in RC6\n", 128 rc0_power, rc6_power); 129 130 if (2 * rc6_power > rc0_power) { 131 pr_err("GPU leaked energy while in RC6!\n" 132 "GPU Freq: %uMHz in RC6 and %uMHz in RC0\n" 133 "RC0 energy before & after sleep respectively: %lluuJ %lluuJ\n" 134 "RC6 energy before & after sleep respectively: %lluuJ %lluuJ\n", 135 rc6_freq, rc0_freq, rc0_sample_energy[0], rc0_sample_energy[1], 136 rc6_sample_energy[0], rc6_sample_energy[1]); 137 138 diff = res[1] - res[0]; 139 threshold = (9 * NSEC_PER_MSEC * sleep_time) / 10; 140 if (diff < threshold) 141 pr_err("Did not enter RC6 properly, RC6 start residency=%lluns, RC6 end residency=%lluns\n", 142 res[0], res[1]); 143 err = -EINVAL; 144 goto out_unlock; 145 } 146 } 147 148 /* Restore what should have been the original state! */ 149 intel_rc6_unpark(rc6); 150 151 out_unlock: 152 intel_runtime_pm_put(gt->uncore->rpm, wakeref); 153 return err; 154 } 155 156 static const u32 *__live_rc6_ctx(struct intel_context *ce) 157 { 158 struct i915_request *rq; 159 const u32 *result; 160 u32 cmd; 161 u32 *cs; 162 163 rq = intel_context_create_request(ce); 164 if (IS_ERR(rq)) 165 return ERR_CAST(rq); 166 167 cs = intel_ring_begin(rq, 4); 168 if (IS_ERR(cs)) { 169 i915_request_add(rq); 170 return cs; 171 } 172 173 cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; 174 if (GRAPHICS_VER(rq->i915) >= 8) 175 cmd++; 176 177 *cs++ = cmd; 178 *cs++ = i915_mmio_reg_offset(GEN8_RC6_CTX_INFO); 179 *cs++ = ce->timeline->hwsp_offset + 8; 180 *cs++ = 0; 181 intel_ring_advance(rq, cs); 182 183 result = rq->hwsp_seqno + 2; 184 i915_request_add(rq); 185 186 return result; 187 } 188 189 static struct intel_engine_cs ** 190 randomised_engines(struct intel_gt *gt, 191 struct rnd_state *prng, 192 unsigned int *count) 193 { 194 struct intel_engine_cs *engine, **engines; 195 enum intel_engine_id id; 196 int n; 197 198 n = 0; 199 for_each_engine(engine, gt, id) 200 n++; 201 if (!n) 202 return NULL; 203 204 engines = kmalloc_array(n, sizeof(*engines), GFP_KERNEL); 205 if (!engines) 206 return NULL; 207 208 n = 0; 209 for_each_engine(engine, gt, id) 210 engines[n++] = engine; 211 212 i915_prandom_shuffle(engines, sizeof(*engines), n, prng); 213 214 *count = n; 215 return engines; 216 } 217 218 int live_rc6_ctx_wa(void *arg) 219 { 220 struct intel_gt *gt = arg; 221 struct intel_engine_cs **engines; 222 unsigned int n, count; 223 I915_RND_STATE(prng); 224 int err = 0; 225 226 /* A read of CTX_INFO upsets rc6. Poke the bear! */ 227 if (GRAPHICS_VER(gt->i915) < 8) 228 return 0; 229 230 engines = randomised_engines(gt, &prng, &count); 231 if (!engines) 232 return 0; 233 234 for (n = 0; n < count; n++) { 235 struct intel_engine_cs *engine = engines[n]; 236 int pass; 237 238 for (pass = 0; pass < 2; pass++) { 239 struct i915_gpu_error *error = >->i915->gpu_error; 240 struct intel_context *ce; 241 unsigned int resets = 242 i915_reset_engine_count(error, engine); 243 const u32 *res; 244 245 /* Use a sacrificial context */ 246 ce = intel_context_create(engine); 247 if (IS_ERR(ce)) { 248 err = PTR_ERR(ce); 249 goto out; 250 } 251 252 intel_engine_pm_get(engine); 253 res = __live_rc6_ctx(ce); 254 intel_engine_pm_put(engine); 255 intel_context_put(ce); 256 if (IS_ERR(res)) { 257 err = PTR_ERR(res); 258 goto out; 259 } 260 261 if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) { 262 intel_gt_set_wedged(gt); 263 err = -ETIME; 264 goto out; 265 } 266 267 intel_gt_pm_wait_for_idle(gt); 268 pr_debug("%s: CTX_INFO=%0x\n", 269 engine->name, READ_ONCE(*res)); 270 271 if (resets != 272 i915_reset_engine_count(error, engine)) { 273 pr_err("%s: GPU reset required\n", 274 engine->name); 275 add_taint_for_CI(gt->i915, TAINT_WARN); 276 err = -EIO; 277 goto out; 278 } 279 } 280 } 281 282 out: 283 kfree(engines); 284 return err; 285 } 286