1 /* 2 * SPDX-License-Identifier: GPL-2.0 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #include "i915_selftest.h" 8 #include "intel_engine_heartbeat.h" 9 #include "intel_engine_pm.h" 10 #include "intel_gt.h" 11 12 #include "gem/selftests/mock_context.h" 13 #include "selftests/igt_flush_test.h" 14 #include "selftests/mock_drm.h" 15 16 static int request_sync(struct i915_request *rq) 17 { 18 struct intel_timeline *tl = i915_request_timeline(rq); 19 long timeout; 20 int err = 0; 21 22 intel_timeline_get(tl); 23 i915_request_get(rq); 24 25 /* Opencode i915_request_add() so we can keep the timeline locked. */ 26 __i915_request_commit(rq); 27 __i915_request_queue(rq, NULL); 28 29 timeout = i915_request_wait(rq, 0, HZ / 10); 30 if (timeout < 0) 31 err = timeout; 32 else 33 i915_request_retire_upto(rq); 34 35 lockdep_unpin_lock(&tl->mutex, rq->cookie); 36 mutex_unlock(&tl->mutex); 37 38 i915_request_put(rq); 39 intel_timeline_put(tl); 40 41 return err; 42 } 43 44 static int context_sync(struct intel_context *ce) 45 { 46 struct intel_timeline *tl = ce->timeline; 47 int err = 0; 48 49 mutex_lock(&tl->mutex); 50 do { 51 struct i915_request *rq; 52 long timeout; 53 54 if (list_empty(&tl->requests)) 55 break; 56 57 rq = list_last_entry(&tl->requests, typeof(*rq), link); 58 i915_request_get(rq); 59 60 timeout = i915_request_wait(rq, 0, HZ / 10); 61 if (timeout < 0) 62 err = timeout; 63 else 64 i915_request_retire_upto(rq); 65 66 i915_request_put(rq); 67 } while (!err); 68 mutex_unlock(&tl->mutex); 69 70 return err; 71 } 72 73 static int __live_context_size(struct intel_engine_cs *engine) 74 { 75 struct intel_context *ce; 76 struct i915_request *rq; 77 void *vaddr; 78 int err; 79 80 ce = intel_context_create(engine); 81 if (IS_ERR(ce)) 82 return PTR_ERR(ce); 83 84 err = intel_context_pin(ce); 85 if (err) 86 goto err; 87 88 vaddr = i915_gem_object_pin_map(ce->state->obj, 89 i915_coherent_map_type(engine->i915)); 90 if (IS_ERR(vaddr)) { 91 err = PTR_ERR(vaddr); 92 intel_context_unpin(ce); 93 goto err; 94 } 95 96 /* 97 * Note that execlists also applies a redzone which it checks on 98 * context unpin when debugging. We are using the same location 99 * and same poison value so that our checks overlap. Despite the 100 * redundancy, we want to keep this little selftest so that we 101 * get coverage of any and all submission backends, and we can 102 * always extend this test to ensure we trick the HW into a 103 * compromising position wrt to the various sections that need 104 * to be written into the context state. 105 * 106 * TLDR; this overlaps with the execlists redzone. 107 */ 108 vaddr += engine->context_size - I915_GTT_PAGE_SIZE; 109 memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); 110 111 rq = intel_context_create_request(ce); 112 intel_context_unpin(ce); 113 if (IS_ERR(rq)) { 114 err = PTR_ERR(rq); 115 goto err_unpin; 116 } 117 118 err = request_sync(rq); 119 if (err) 120 goto err_unpin; 121 122 /* Force the context switch */ 123 rq = intel_engine_create_kernel_request(engine); 124 if (IS_ERR(rq)) { 125 err = PTR_ERR(rq); 126 goto err_unpin; 127 } 128 err = request_sync(rq); 129 if (err) 130 goto err_unpin; 131 132 if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) { 133 pr_err("%s context overwrote trailing red-zone!", engine->name); 134 err = -EINVAL; 135 } 136 137 err_unpin: 138 i915_gem_object_unpin_map(ce->state->obj); 139 err: 140 intel_context_put(ce); 141 return err; 142 } 143 144 static int live_context_size(void *arg) 145 { 146 struct intel_gt *gt = arg; 147 struct intel_engine_cs *engine; 148 enum intel_engine_id id; 149 int err = 0; 150 151 /* 152 * Check that our context sizes are correct by seeing if the 153 * HW tries to write past the end of one. 154 */ 155 156 for_each_engine(engine, gt, id) { 157 struct { 158 struct drm_i915_gem_object *state; 159 void *pinned; 160 } saved; 161 162 if (!engine->context_size) 163 continue; 164 165 intel_engine_pm_get(engine); 166 167 /* 168 * Hide the old default state -- we lie about the context size 169 * and get confused when the default state is smaller than 170 * expected. For our do nothing request, inheriting the 171 * active state is sufficient, we are only checking that we 172 * don't use more than we planned. 173 */ 174 saved.state = fetch_and_zero(&engine->default_state); 175 saved.pinned = fetch_and_zero(&engine->pinned_default_state); 176 177 /* Overlaps with the execlists redzone */ 178 engine->context_size += I915_GTT_PAGE_SIZE; 179 180 err = __live_context_size(engine); 181 182 engine->context_size -= I915_GTT_PAGE_SIZE; 183 184 engine->pinned_default_state = saved.pinned; 185 engine->default_state = saved.state; 186 187 intel_engine_pm_put(engine); 188 189 if (err) 190 break; 191 } 192 193 return err; 194 } 195 196 static int __live_active_context(struct intel_engine_cs *engine) 197 { 198 unsigned long saved_heartbeat; 199 struct intel_context *ce; 200 int pass; 201 int err; 202 203 /* 204 * We keep active contexts alive until after a subsequent context 205 * switch as the final write from the context-save will be after 206 * we retire the final request. We track when we unpin the context, 207 * under the presumption that the final pin is from the last request, 208 * and instead of immediately unpinning the context, we add a task 209 * to unpin the context from the next idle-barrier. 210 * 211 * This test makes sure that the context is kept alive until a 212 * subsequent idle-barrier (emitted when the engine wakeref hits 0 213 * with no more outstanding requests). 214 */ 215 216 if (intel_engine_pm_is_awake(engine)) { 217 pr_err("%s is awake before starting %s!\n", 218 engine->name, __func__); 219 return -EINVAL; 220 } 221 222 ce = intel_context_create(engine); 223 if (IS_ERR(ce)) 224 return PTR_ERR(ce); 225 226 saved_heartbeat = engine->props.heartbeat_interval_ms; 227 engine->props.heartbeat_interval_ms = 0; 228 229 for (pass = 0; pass <= 2; pass++) { 230 struct i915_request *rq; 231 232 intel_engine_pm_get(engine); 233 234 rq = intel_context_create_request(ce); 235 if (IS_ERR(rq)) { 236 err = PTR_ERR(rq); 237 goto out_engine; 238 } 239 240 err = request_sync(rq); 241 if (err) 242 goto out_engine; 243 244 /* Context will be kept active until after an idle-barrier. */ 245 if (i915_active_is_idle(&ce->active)) { 246 pr_err("context is not active; expected idle-barrier (%s pass %d)\n", 247 engine->name, pass); 248 err = -EINVAL; 249 goto out_engine; 250 } 251 252 if (!intel_engine_pm_is_awake(engine)) { 253 pr_err("%s is asleep before idle-barrier\n", 254 engine->name); 255 err = -EINVAL; 256 goto out_engine; 257 } 258 259 out_engine: 260 intel_engine_pm_put(engine); 261 if (err) 262 goto err; 263 } 264 265 /* Now make sure our idle-barriers are flushed */ 266 err = intel_engine_flush_barriers(engine); 267 if (err) 268 goto err; 269 270 /* Wait for the barrier and in the process wait for engine to park */ 271 err = context_sync(engine->kernel_context); 272 if (err) 273 goto err; 274 275 if (!i915_active_is_idle(&ce->active)) { 276 pr_err("context is still active!"); 277 err = -EINVAL; 278 } 279 280 intel_engine_pm_flush(engine); 281 282 if (intel_engine_pm_is_awake(engine)) { 283 struct drm_printer p = drm_debug_printer(__func__); 284 285 intel_engine_dump(engine, &p, 286 "%s is still awake:%d after idle-barriers\n", 287 engine->name, 288 atomic_read(&engine->wakeref.count)); 289 GEM_TRACE_DUMP(); 290 291 err = -EINVAL; 292 goto err; 293 } 294 295 err: 296 engine->props.heartbeat_interval_ms = saved_heartbeat; 297 intel_context_put(ce); 298 return err; 299 } 300 301 static int live_active_context(void *arg) 302 { 303 struct intel_gt *gt = arg; 304 struct intel_engine_cs *engine; 305 enum intel_engine_id id; 306 int err = 0; 307 308 for_each_engine(engine, gt, id) { 309 err = __live_active_context(engine); 310 if (err) 311 break; 312 313 err = igt_flush_test(gt->i915); 314 if (err) 315 break; 316 } 317 318 return err; 319 } 320 321 static int __remote_sync(struct intel_context *ce, struct intel_context *remote) 322 { 323 struct i915_request *rq; 324 int err; 325 326 err = intel_context_pin(remote); 327 if (err) 328 return err; 329 330 rq = intel_context_create_request(ce); 331 if (IS_ERR(rq)) { 332 err = PTR_ERR(rq); 333 goto unpin; 334 } 335 336 err = intel_context_prepare_remote_request(remote, rq); 337 if (err) { 338 i915_request_add(rq); 339 goto unpin; 340 } 341 342 err = request_sync(rq); 343 344 unpin: 345 intel_context_unpin(remote); 346 return err; 347 } 348 349 static int __live_remote_context(struct intel_engine_cs *engine) 350 { 351 struct intel_context *local, *remote; 352 unsigned long saved_heartbeat; 353 int pass; 354 int err; 355 356 /* 357 * Check that our idle barriers do not interfere with normal 358 * activity tracking. In particular, check that operating 359 * on the context image remotely (intel_context_prepare_remote_request), 360 * which inserts foreign fences into intel_context.active, does not 361 * clobber the idle-barrier. 362 */ 363 364 if (intel_engine_pm_is_awake(engine)) { 365 pr_err("%s is awake before starting %s!\n", 366 engine->name, __func__); 367 return -EINVAL; 368 } 369 370 remote = intel_context_create(engine); 371 if (IS_ERR(remote)) 372 return PTR_ERR(remote); 373 374 local = intel_context_create(engine); 375 if (IS_ERR(local)) { 376 err = PTR_ERR(local); 377 goto err_remote; 378 } 379 380 saved_heartbeat = engine->props.heartbeat_interval_ms; 381 engine->props.heartbeat_interval_ms = 0; 382 intel_engine_pm_get(engine); 383 384 for (pass = 0; pass <= 2; pass++) { 385 err = __remote_sync(local, remote); 386 if (err) 387 break; 388 389 err = __remote_sync(engine->kernel_context, remote); 390 if (err) 391 break; 392 393 if (i915_active_is_idle(&remote->active)) { 394 pr_err("remote context is not active; expected idle-barrier (%s pass %d)\n", 395 engine->name, pass); 396 err = -EINVAL; 397 break; 398 } 399 } 400 401 intel_engine_pm_put(engine); 402 engine->props.heartbeat_interval_ms = saved_heartbeat; 403 404 intel_context_put(local); 405 err_remote: 406 intel_context_put(remote); 407 return err; 408 } 409 410 static int live_remote_context(void *arg) 411 { 412 struct intel_gt *gt = arg; 413 struct intel_engine_cs *engine; 414 enum intel_engine_id id; 415 int err = 0; 416 417 for_each_engine(engine, gt, id) { 418 err = __live_remote_context(engine); 419 if (err) 420 break; 421 422 err = igt_flush_test(gt->i915); 423 if (err) 424 break; 425 } 426 427 return err; 428 } 429 430 int intel_context_live_selftests(struct drm_i915_private *i915) 431 { 432 static const struct i915_subtest tests[] = { 433 SUBTEST(live_context_size), 434 SUBTEST(live_active_context), 435 SUBTEST(live_remote_context), 436 }; 437 struct intel_gt *gt = &i915->gt; 438 439 if (intel_gt_is_wedged(gt)) 440 return 0; 441 442 return intel_gt_live_subtests(tests, gt); 443 } 444