1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include "i915_selftest.h" 7 #include "intel_engine_heartbeat.h" 8 #include "intel_engine_pm.h" 9 #include "intel_gt.h" 10 11 #include "gem/selftests/mock_context.h" 12 #include "selftests/igt_flush_test.h" 13 #include "selftests/mock_drm.h" 14 15 static int request_sync(struct i915_request *rq) 16 { 17 struct intel_timeline *tl = i915_request_timeline(rq); 18 long timeout; 19 int err = 0; 20 21 intel_timeline_get(tl); 22 i915_request_get(rq); 23 24 /* Opencode i915_request_add() so we can keep the timeline locked. */ 25 __i915_request_commit(rq); 26 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 27 __i915_request_queue_bh(rq); 28 29 timeout = i915_request_wait(rq, 0, HZ / 10); 30 if (timeout < 0) 31 err = timeout; 32 else 33 i915_request_retire_upto(rq); 34 35 lockdep_unpin_lock(&tl->mutex, rq->cookie); 36 mutex_unlock(&tl->mutex); 37 38 i915_request_put(rq); 39 intel_timeline_put(tl); 40 41 return err; 42 } 43 44 static int context_sync(struct intel_context *ce) 45 { 46 struct intel_timeline *tl = ce->timeline; 47 int err = 0; 48 49 mutex_lock(&tl->mutex); 50 do { 51 struct i915_request *rq; 52 long timeout; 53 54 if (list_empty(&tl->requests)) 55 break; 56 57 rq = list_last_entry(&tl->requests, typeof(*rq), link); 58 i915_request_get(rq); 59 60 timeout = i915_request_wait(rq, 0, HZ / 10); 61 if (timeout < 0) 62 err = timeout; 63 else 64 i915_request_retire_upto(rq); 65 66 i915_request_put(rq); 67 } while (!err); 68 mutex_unlock(&tl->mutex); 69 70 /* Wait for all barriers to complete (remote CPU) before we check */ 71 i915_active_unlock_wait(&ce->active); 72 return err; 73 } 74 75 static int __live_context_size(struct intel_engine_cs *engine) 76 { 77 struct intel_context *ce; 78 struct i915_request *rq; 79 void *vaddr; 80 int err; 81 82 ce = intel_context_create(engine); 83 if (IS_ERR(ce)) 84 return PTR_ERR(ce); 85 86 err = intel_context_pin(ce); 87 if (err) 88 goto err; 89 90 vaddr = i915_gem_object_pin_map_unlocked(ce->state->obj, 91 intel_gt_coherent_map_type(engine->gt, 92 ce->state->obj, 93 false)); 94 if (IS_ERR(vaddr)) { 95 err = PTR_ERR(vaddr); 96 intel_context_unpin(ce); 97 goto err; 98 } 99 100 /* 101 * Note that execlists also applies a redzone which it checks on 102 * context unpin when debugging. We are using the same location 103 * and same poison value so that our checks overlap. Despite the 104 * redundancy, we want to keep this little selftest so that we 105 * get coverage of any and all submission backends, and we can 106 * always extend this test to ensure we trick the HW into a 107 * compromising position wrt to the various sections that need 108 * to be written into the context state. 109 * 110 * TLDR; this overlaps with the execlists redzone. 111 */ 112 vaddr += engine->context_size - I915_GTT_PAGE_SIZE; 113 memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); 114 115 rq = intel_context_create_request(ce); 116 intel_context_unpin(ce); 117 if (IS_ERR(rq)) { 118 err = PTR_ERR(rq); 119 goto err_unpin; 120 } 121 122 err = request_sync(rq); 123 if (err) 124 goto err_unpin; 125 126 /* Force the context switch */ 127 rq = intel_engine_create_kernel_request(engine); 128 if (IS_ERR(rq)) { 129 err = PTR_ERR(rq); 130 goto err_unpin; 131 } 132 err = request_sync(rq); 133 if (err) 134 goto err_unpin; 135 136 if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) { 137 pr_err("%s context overwrote trailing red-zone!", engine->name); 138 err = -EINVAL; 139 } 140 141 err_unpin: 142 i915_gem_object_unpin_map(ce->state->obj); 143 err: 144 intel_context_put(ce); 145 return err; 146 } 147 148 static int live_context_size(void *arg) 149 { 150 struct intel_gt *gt = arg; 151 struct intel_engine_cs *engine; 152 enum intel_engine_id id; 153 int err = 0; 154 155 /* 156 * Check that our context sizes are correct by seeing if the 157 * HW tries to write past the end of one. 158 */ 159 160 for_each_engine(engine, gt, id) { 161 struct file *saved; 162 163 if (!engine->context_size) 164 continue; 165 166 intel_engine_pm_get(engine); 167 168 /* 169 * Hide the old default state -- we lie about the context size 170 * and get confused when the default state is smaller than 171 * expected. For our do nothing request, inheriting the 172 * active state is sufficient, we are only checking that we 173 * don't use more than we planned. 174 */ 175 saved = fetch_and_zero(&engine->default_state); 176 177 /* Overlaps with the execlists redzone */ 178 engine->context_size += I915_GTT_PAGE_SIZE; 179 180 err = __live_context_size(engine); 181 182 engine->context_size -= I915_GTT_PAGE_SIZE; 183 184 engine->default_state = saved; 185 186 intel_engine_pm_put(engine); 187 188 if (err) 189 break; 190 } 191 192 return err; 193 } 194 195 static int __live_active_context(struct intel_engine_cs *engine) 196 { 197 unsigned long saved_heartbeat; 198 struct intel_context *ce; 199 int pass; 200 int err; 201 202 /* 203 * We keep active contexts alive until after a subsequent context 204 * switch as the final write from the context-save will be after 205 * we retire the final request. We track when we unpin the context, 206 * under the presumption that the final pin is from the last request, 207 * and instead of immediately unpinning the context, we add a task 208 * to unpin the context from the next idle-barrier. 209 * 210 * This test makes sure that the context is kept alive until a 211 * subsequent idle-barrier (emitted when the engine wakeref hits 0 212 * with no more outstanding requests). 213 * 214 * In GuC submission mode we don't use idle barriers and we instead 215 * get a message from the GuC to signal that it is safe to unpin the 216 * context from memory. 217 */ 218 if (intel_engine_uses_guc(engine)) 219 return 0; 220 221 if (intel_engine_pm_is_awake(engine)) { 222 pr_err("%s is awake before starting %s!\n", 223 engine->name, __func__); 224 return -EINVAL; 225 } 226 227 ce = intel_context_create(engine); 228 if (IS_ERR(ce)) 229 return PTR_ERR(ce); 230 231 saved_heartbeat = engine->props.heartbeat_interval_ms; 232 engine->props.heartbeat_interval_ms = 0; 233 234 for (pass = 0; pass <= 2; pass++) { 235 struct i915_request *rq; 236 237 intel_engine_pm_get(engine); 238 239 rq = intel_context_create_request(ce); 240 if (IS_ERR(rq)) { 241 err = PTR_ERR(rq); 242 goto out_engine; 243 } 244 245 err = request_sync(rq); 246 if (err) 247 goto out_engine; 248 249 /* Context will be kept active until after an idle-barrier. */ 250 if (i915_active_is_idle(&ce->active)) { 251 pr_err("context is not active; expected idle-barrier (%s pass %d)\n", 252 engine->name, pass); 253 err = -EINVAL; 254 goto out_engine; 255 } 256 257 if (!intel_engine_pm_is_awake(engine)) { 258 pr_err("%s is asleep before idle-barrier\n", 259 engine->name); 260 err = -EINVAL; 261 goto out_engine; 262 } 263 264 out_engine: 265 intel_engine_pm_put(engine); 266 if (err) 267 goto err; 268 } 269 270 /* Now make sure our idle-barriers are flushed */ 271 err = intel_engine_flush_barriers(engine); 272 if (err) 273 goto err; 274 275 /* Wait for the barrier and in the process wait for engine to park */ 276 err = context_sync(engine->kernel_context); 277 if (err) 278 goto err; 279 280 if (!i915_active_is_idle(&ce->active)) { 281 pr_err("context is still active!"); 282 err = -EINVAL; 283 } 284 285 intel_engine_pm_flush(engine); 286 287 if (intel_engine_pm_is_awake(engine)) { 288 struct drm_printer p = drm_dbg_printer(&engine->i915->drm, 289 DRM_UT_DRIVER, NULL); 290 291 intel_engine_dump(engine, &p, 292 "%s is still awake:%d after idle-barriers\n", 293 engine->name, 294 atomic_read(&engine->wakeref.count)); 295 GEM_TRACE_DUMP(); 296 297 err = -EINVAL; 298 goto err; 299 } 300 301 err: 302 engine->props.heartbeat_interval_ms = saved_heartbeat; 303 intel_context_put(ce); 304 return err; 305 } 306 307 static int live_active_context(void *arg) 308 { 309 struct intel_gt *gt = arg; 310 struct intel_engine_cs *engine; 311 enum intel_engine_id id; 312 int err = 0; 313 314 for_each_engine(engine, gt, id) { 315 err = __live_active_context(engine); 316 if (err) 317 break; 318 319 err = igt_flush_test(gt->i915); 320 if (err) 321 break; 322 } 323 324 return err; 325 } 326 327 static int __remote_sync(struct intel_context *ce, struct intel_context *remote) 328 { 329 struct i915_request *rq; 330 int err; 331 332 err = intel_context_pin(remote); 333 if (err) 334 return err; 335 336 rq = intel_context_create_request(ce); 337 if (IS_ERR(rq)) { 338 err = PTR_ERR(rq); 339 goto unpin; 340 } 341 342 err = intel_context_prepare_remote_request(remote, rq); 343 if (err) { 344 i915_request_add(rq); 345 goto unpin; 346 } 347 348 err = request_sync(rq); 349 350 unpin: 351 intel_context_unpin(remote); 352 return err; 353 } 354 355 static int __live_remote_context(struct intel_engine_cs *engine) 356 { 357 struct intel_context *local, *remote; 358 unsigned long saved_heartbeat; 359 int pass; 360 int err; 361 362 /* 363 * Check that our idle barriers do not interfere with normal 364 * activity tracking. In particular, check that operating 365 * on the context image remotely (intel_context_prepare_remote_request), 366 * which inserts foreign fences into intel_context.active, does not 367 * clobber the idle-barrier. 368 * 369 * In GuC submission mode we don't use idle barriers. 370 */ 371 if (intel_engine_uses_guc(engine)) 372 return 0; 373 374 if (intel_engine_pm_is_awake(engine)) { 375 pr_err("%s is awake before starting %s!\n", 376 engine->name, __func__); 377 return -EINVAL; 378 } 379 380 remote = intel_context_create(engine); 381 if (IS_ERR(remote)) 382 return PTR_ERR(remote); 383 384 local = intel_context_create(engine); 385 if (IS_ERR(local)) { 386 err = PTR_ERR(local); 387 goto err_remote; 388 } 389 390 saved_heartbeat = engine->props.heartbeat_interval_ms; 391 engine->props.heartbeat_interval_ms = 0; 392 intel_engine_pm_get(engine); 393 394 for (pass = 0; pass <= 2; pass++) { 395 err = __remote_sync(local, remote); 396 if (err) 397 break; 398 399 err = __remote_sync(engine->kernel_context, remote); 400 if (err) 401 break; 402 403 if (i915_active_is_idle(&remote->active)) { 404 pr_err("remote context is not active; expected idle-barrier (%s pass %d)\n", 405 engine->name, pass); 406 err = -EINVAL; 407 break; 408 } 409 } 410 411 intel_engine_pm_put(engine); 412 engine->props.heartbeat_interval_ms = saved_heartbeat; 413 414 intel_context_put(local); 415 err_remote: 416 intel_context_put(remote); 417 return err; 418 } 419 420 static int live_remote_context(void *arg) 421 { 422 struct intel_gt *gt = arg; 423 struct intel_engine_cs *engine; 424 enum intel_engine_id id; 425 int err = 0; 426 427 for_each_engine(engine, gt, id) { 428 err = __live_remote_context(engine); 429 if (err) 430 break; 431 432 err = igt_flush_test(gt->i915); 433 if (err) 434 break; 435 } 436 437 return err; 438 } 439 440 int intel_context_live_selftests(struct drm_i915_private *i915) 441 { 442 static const struct i915_subtest tests[] = { 443 SUBTEST(live_context_size), 444 SUBTEST(live_active_context), 445 SUBTEST(live_remote_context), 446 }; 447 struct intel_gt *gt = to_gt(i915); 448 449 if (intel_gt_is_wedged(gt)) 450 return 0; 451 452 return intel_gt_live_subtests(tests, gt); 453 } 454