xref: /linux/drivers/gpu/drm/i915/gt/selftest_execlists.c (revision 32a92f8c89326985e05dce8b22d3f0aa07a3e1bd)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2018 Intel Corporation
4  */
5 
6 #include <linux/prime_numbers.h>
7 
8 #include <drm/drm_print.h>
9 
10 #include "gem/i915_gem_internal.h"
11 #include "gem/i915_gem_pm.h"
12 #include "gt/intel_engine_heartbeat.h"
13 #include "gt/intel_reset.h"
14 #include "gt/selftest_engine_heartbeat.h"
15 
16 #include "i915_jiffies.h"
17 #include "i915_selftest.h"
18 #include "selftests/i915_random.h"
19 #include "selftests/igt_flush_test.h"
20 #include "selftests/igt_live_test.h"
21 #include "selftests/igt_spinner.h"
22 #include "selftests/lib_sw_fence.h"
23 
24 #include "gem/selftests/igt_gem_utils.h"
25 #include "gem/selftests/mock_context.h"
26 
27 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
28 #define NUM_GPR 16
29 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
30 
is_active(struct i915_request * rq)31 static bool is_active(struct i915_request *rq)
32 {
33 	if (i915_request_is_active(rq))
34 		return true;
35 
36 	if (i915_request_on_hold(rq))
37 		return true;
38 
39 	if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
40 		return true;
41 
42 	return false;
43 }
44 
wait_for_submit(struct intel_engine_cs * engine,struct i915_request * rq,unsigned long timeout)45 static int wait_for_submit(struct intel_engine_cs *engine,
46 			   struct i915_request *rq,
47 			   unsigned long timeout)
48 {
49 	/* Ignore our own attempts to suppress excess tasklets */
50 	tasklet_hi_schedule(&engine->sched_engine->tasklet);
51 
52 	timeout += jiffies;
53 	do {
54 		bool done = time_after(jiffies, timeout);
55 
56 		if (i915_request_completed(rq)) /* that was quick! */
57 			return 0;
58 
59 		/* Wait until the HW has acknowledged the submission (or err) */
60 		intel_engine_flush_submission(engine);
61 		if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
62 			return 0;
63 
64 		if (done)
65 			return -ETIME;
66 
67 		cond_resched();
68 	} while (1);
69 }
70 
wait_for_reset(struct intel_engine_cs * engine,struct i915_request * rq,unsigned long timeout)71 static int wait_for_reset(struct intel_engine_cs *engine,
72 			  struct i915_request *rq,
73 			  unsigned long timeout)
74 {
75 	timeout += jiffies;
76 
77 	do {
78 		cond_resched();
79 		intel_engine_flush_submission(engine);
80 
81 		if (READ_ONCE(engine->execlists.pending[0]))
82 			continue;
83 
84 		if (i915_request_completed(rq))
85 			break;
86 
87 		if (READ_ONCE(rq->fence.error))
88 			break;
89 	} while (time_before(jiffies, timeout));
90 
91 	if (rq->fence.error != -EIO) {
92 		pr_err("%s: hanging request %llx:%lld not reset\n",
93 		       engine->name,
94 		       rq->fence.context,
95 		       rq->fence.seqno);
96 		return -EINVAL;
97 	}
98 
99 	/* Give the request a jiffy to complete after flushing the worker */
100 	if (i915_request_wait(rq, 0,
101 			      max(0l, (long)(timeout - jiffies)) + 1) < 0) {
102 		pr_err("%s: hanging request %llx:%lld did not complete\n",
103 		       engine->name,
104 		       rq->fence.context,
105 		       rq->fence.seqno);
106 		return -ETIME;
107 	}
108 
109 	return 0;
110 }
111 
live_sanitycheck(void * arg)112 static int live_sanitycheck(void *arg)
113 {
114 	struct intel_gt *gt = arg;
115 	struct intel_engine_cs *engine;
116 	enum intel_engine_id id;
117 	struct igt_spinner spin;
118 	int err = 0;
119 
120 	if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
121 		return 0;
122 
123 	if (igt_spinner_init(&spin, gt))
124 		return -ENOMEM;
125 
126 	for_each_engine(engine, gt, id) {
127 		struct intel_context *ce;
128 		struct i915_request *rq;
129 
130 		ce = intel_context_create(engine);
131 		if (IS_ERR(ce)) {
132 			err = PTR_ERR(ce);
133 			break;
134 		}
135 
136 		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
137 		if (IS_ERR(rq)) {
138 			err = PTR_ERR(rq);
139 			goto out_ctx;
140 		}
141 
142 		i915_request_add(rq);
143 		if (!igt_wait_for_spinner(&spin, rq)) {
144 			GEM_TRACE("spinner failed to start\n");
145 			GEM_TRACE_DUMP();
146 			intel_gt_set_wedged(gt);
147 			err = -EIO;
148 			goto out_ctx;
149 		}
150 
151 		igt_spinner_end(&spin);
152 		if (igt_flush_test(gt->i915)) {
153 			err = -EIO;
154 			goto out_ctx;
155 		}
156 
157 out_ctx:
158 		intel_context_put(ce);
159 		if (err)
160 			break;
161 	}
162 
163 	igt_spinner_fini(&spin);
164 	return err;
165 }
166 
live_unlite_restore(struct intel_gt * gt,int prio)167 static int live_unlite_restore(struct intel_gt *gt, int prio)
168 {
169 	struct intel_engine_cs *engine;
170 	enum intel_engine_id id;
171 	struct igt_spinner spin;
172 	int err = -ENOMEM;
173 
174 	/*
175 	 * Check that we can correctly context switch between 2 instances
176 	 * on the same engine from the same parent context.
177 	 */
178 
179 	if (igt_spinner_init(&spin, gt))
180 		return err;
181 
182 	err = 0;
183 	for_each_engine(engine, gt, id) {
184 		struct intel_context *ce[2] = {};
185 		struct i915_request *rq[2];
186 		struct igt_live_test t;
187 		int n;
188 
189 		if (prio && !intel_engine_has_preemption(engine))
190 			continue;
191 
192 		if (!intel_engine_can_store_dword(engine))
193 			continue;
194 
195 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
196 			err = -EIO;
197 			break;
198 		}
199 		st_engine_heartbeat_disable(engine);
200 
201 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
202 			struct intel_context *tmp;
203 
204 			tmp = intel_context_create(engine);
205 			if (IS_ERR(tmp)) {
206 				err = PTR_ERR(tmp);
207 				goto err_ce;
208 			}
209 
210 			err = intel_context_pin(tmp);
211 			if (err) {
212 				intel_context_put(tmp);
213 				goto err_ce;
214 			}
215 
216 			/*
217 			 * Setup the pair of contexts such that if we
218 			 * lite-restore using the RING_TAIL from ce[1] it
219 			 * will execute garbage from ce[0]->ring.
220 			 */
221 			memset(tmp->ring->vaddr,
222 			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
223 			       tmp->ring->vma->size);
224 
225 			ce[n] = tmp;
226 		}
227 		GEM_BUG_ON(!ce[1]->ring->size);
228 		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
229 		lrc_update_regs(ce[1], engine, ce[1]->ring->head);
230 
231 		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
232 		if (IS_ERR(rq[0])) {
233 			err = PTR_ERR(rq[0]);
234 			goto err_ce;
235 		}
236 
237 		i915_request_get(rq[0]);
238 		i915_request_add(rq[0]);
239 		GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
240 
241 		if (!igt_wait_for_spinner(&spin, rq[0])) {
242 			i915_request_put(rq[0]);
243 			goto err_ce;
244 		}
245 
246 		rq[1] = i915_request_create(ce[1]);
247 		if (IS_ERR(rq[1])) {
248 			err = PTR_ERR(rq[1]);
249 			i915_request_put(rq[0]);
250 			goto err_ce;
251 		}
252 
253 		if (!prio) {
254 			/*
255 			 * Ensure we do the switch to ce[1] on completion.
256 			 *
257 			 * rq[0] is already submitted, so this should reduce
258 			 * to a no-op (a wait on a request on the same engine
259 			 * uses the submit fence, not the completion fence),
260 			 * but it will install a dependency on rq[1] for rq[0]
261 			 * that will prevent the pair being reordered by
262 			 * timeslicing.
263 			 */
264 			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
265 		}
266 
267 		i915_request_get(rq[1]);
268 		i915_request_add(rq[1]);
269 		GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
270 		i915_request_put(rq[0]);
271 
272 		if (prio) {
273 			struct i915_sched_attr attr = {
274 				.priority = prio,
275 			};
276 
277 			/* Alternatively preempt the spinner with ce[1] */
278 			engine->sched_engine->schedule(rq[1], &attr);
279 		}
280 
281 		/* And switch back to ce[0] for good measure */
282 		rq[0] = i915_request_create(ce[0]);
283 		if (IS_ERR(rq[0])) {
284 			err = PTR_ERR(rq[0]);
285 			i915_request_put(rq[1]);
286 			goto err_ce;
287 		}
288 
289 		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
290 		i915_request_get(rq[0]);
291 		i915_request_add(rq[0]);
292 		GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
293 		i915_request_put(rq[1]);
294 		i915_request_put(rq[0]);
295 
296 err_ce:
297 		intel_engine_flush_submission(engine);
298 		igt_spinner_end(&spin);
299 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
300 			if (IS_ERR_OR_NULL(ce[n]))
301 				break;
302 
303 			intel_context_unpin(ce[n]);
304 			intel_context_put(ce[n]);
305 		}
306 
307 		st_engine_heartbeat_enable(engine);
308 		if (igt_live_test_end(&t))
309 			err = -EIO;
310 		if (err)
311 			break;
312 	}
313 
314 	igt_spinner_fini(&spin);
315 	return err;
316 }
317 
live_unlite_switch(void * arg)318 static int live_unlite_switch(void *arg)
319 {
320 	return live_unlite_restore(arg, 0);
321 }
322 
live_unlite_preempt(void * arg)323 static int live_unlite_preempt(void *arg)
324 {
325 	return live_unlite_restore(arg, I915_PRIORITY_MAX);
326 }
327 
live_unlite_ring(void * arg)328 static int live_unlite_ring(void *arg)
329 {
330 	struct intel_gt *gt = arg;
331 	struct intel_engine_cs *engine;
332 	struct igt_spinner spin;
333 	enum intel_engine_id id;
334 	int err = 0;
335 
336 	/*
337 	 * Setup a preemption event that will cause almost the entire ring
338 	 * to be unwound, potentially fooling our intel_ring_direction()
339 	 * into emitting a forward lite-restore instead of the rollback.
340 	 */
341 
342 	if (igt_spinner_init(&spin, gt))
343 		return -ENOMEM;
344 
345 	for_each_engine(engine, gt, id) {
346 		struct intel_context *ce[2] = {};
347 		struct i915_request *rq;
348 		struct igt_live_test t;
349 		int n;
350 
351 		if (!intel_engine_has_preemption(engine))
352 			continue;
353 
354 		if (!intel_engine_can_store_dword(engine))
355 			continue;
356 
357 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
358 			err = -EIO;
359 			break;
360 		}
361 		st_engine_heartbeat_disable(engine);
362 
363 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
364 			struct intel_context *tmp;
365 
366 			tmp = intel_context_create(engine);
367 			if (IS_ERR(tmp)) {
368 				err = PTR_ERR(tmp);
369 				goto err_ce;
370 			}
371 
372 			err = intel_context_pin(tmp);
373 			if (err) {
374 				intel_context_put(tmp);
375 				goto err_ce;
376 			}
377 
378 			memset32(tmp->ring->vaddr,
379 				 0xdeadbeef, /* trigger a hang if executed */
380 				 tmp->ring->vma->size / sizeof(u32));
381 
382 			ce[n] = tmp;
383 		}
384 
385 		/* Create max prio spinner, followed by N low prio nops */
386 		rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
387 		if (IS_ERR(rq)) {
388 			err = PTR_ERR(rq);
389 			goto err_ce;
390 		}
391 
392 		i915_request_get(rq);
393 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
394 		i915_request_add(rq);
395 
396 		if (!igt_wait_for_spinner(&spin, rq)) {
397 			intel_gt_set_wedged(gt);
398 			i915_request_put(rq);
399 			err = -ETIME;
400 			goto err_ce;
401 		}
402 
403 		/* Fill the ring, until we will cause a wrap */
404 		n = 0;
405 		while (intel_ring_direction(ce[0]->ring,
406 					    rq->wa_tail,
407 					    ce[0]->ring->tail) <= 0) {
408 			struct i915_request *tmp;
409 
410 			tmp = intel_context_create_request(ce[0]);
411 			if (IS_ERR(tmp)) {
412 				err = PTR_ERR(tmp);
413 				i915_request_put(rq);
414 				goto err_ce;
415 			}
416 
417 			i915_request_add(tmp);
418 			intel_engine_flush_submission(engine);
419 			n++;
420 		}
421 		intel_engine_flush_submission(engine);
422 		pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
423 			 engine->name, n,
424 			 ce[0]->ring->size,
425 			 ce[0]->ring->tail,
426 			 ce[0]->ring->emit,
427 			 rq->tail);
428 		GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
429 						rq->tail,
430 						ce[0]->ring->tail) <= 0);
431 		i915_request_put(rq);
432 
433 		/* Create a second ring to preempt the first ring after rq[0] */
434 		rq = intel_context_create_request(ce[1]);
435 		if (IS_ERR(rq)) {
436 			err = PTR_ERR(rq);
437 			goto err_ce;
438 		}
439 
440 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
441 		i915_request_get(rq);
442 		i915_request_add(rq);
443 
444 		err = wait_for_submit(engine, rq, HZ / 2);
445 		i915_request_put(rq);
446 		if (err) {
447 			pr_err("%s: preemption request was not submitted\n",
448 			       engine->name);
449 			err = -ETIME;
450 		}
451 
452 		pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
453 			 engine->name,
454 			 ce[0]->ring->tail, ce[0]->ring->emit,
455 			 ce[1]->ring->tail, ce[1]->ring->emit);
456 
457 err_ce:
458 		intel_engine_flush_submission(engine);
459 		igt_spinner_end(&spin);
460 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
461 			if (IS_ERR_OR_NULL(ce[n]))
462 				break;
463 
464 			intel_context_unpin(ce[n]);
465 			intel_context_put(ce[n]);
466 		}
467 		st_engine_heartbeat_enable(engine);
468 		if (igt_live_test_end(&t))
469 			err = -EIO;
470 		if (err)
471 			break;
472 	}
473 
474 	igt_spinner_fini(&spin);
475 	return err;
476 }
477 
live_pin_rewind(void * arg)478 static int live_pin_rewind(void *arg)
479 {
480 	struct intel_gt *gt = arg;
481 	struct intel_engine_cs *engine;
482 	enum intel_engine_id id;
483 	int err = 0;
484 
485 	/*
486 	 * We have to be careful not to trust intel_ring too much, for example
487 	 * ring->head is updated upon retire which is out of sync with pinning
488 	 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
489 	 * or else we risk writing an older, stale value.
490 	 *
491 	 * To simulate this, let's apply a bit of deliberate sabotague.
492 	 */
493 
494 	for_each_engine(engine, gt, id) {
495 		struct intel_context *ce;
496 		struct i915_request *rq;
497 		struct intel_ring *ring;
498 		struct igt_live_test t;
499 
500 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
501 			err = -EIO;
502 			break;
503 		}
504 
505 		ce = intel_context_create(engine);
506 		if (IS_ERR(ce)) {
507 			err = PTR_ERR(ce);
508 			break;
509 		}
510 
511 		err = intel_context_pin(ce);
512 		if (err) {
513 			intel_context_put(ce);
514 			break;
515 		}
516 
517 		/* Keep the context awake while we play games */
518 		err = i915_active_acquire(&ce->active);
519 		if (err) {
520 			intel_context_unpin(ce);
521 			intel_context_put(ce);
522 			break;
523 		}
524 		ring = ce->ring;
525 
526 		/* Poison the ring, and offset the next request from HEAD */
527 		memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
528 		ring->emit = ring->size / 2;
529 		ring->tail = ring->emit;
530 		GEM_BUG_ON(ring->head);
531 
532 		intel_context_unpin(ce);
533 
534 		/* Submit a simple nop request */
535 		GEM_BUG_ON(intel_context_is_pinned(ce));
536 		rq = intel_context_create_request(ce);
537 		i915_active_release(&ce->active); /* e.g. async retire */
538 		intel_context_put(ce);
539 		if (IS_ERR(rq)) {
540 			err = PTR_ERR(rq);
541 			break;
542 		}
543 		GEM_BUG_ON(!rq->head);
544 		i915_request_add(rq);
545 
546 		/* Expect not to hang! */
547 		if (igt_live_test_end(&t)) {
548 			err = -EIO;
549 			break;
550 		}
551 	}
552 
553 	return err;
554 }
555 
engine_lock_reset_tasklet(struct intel_engine_cs * engine)556 static int engine_lock_reset_tasklet(struct intel_engine_cs *engine)
557 {
558 	tasklet_disable(&engine->sched_engine->tasklet);
559 	local_bh_disable();
560 
561 	if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
562 			     &engine->gt->reset.flags)) {
563 		local_bh_enable();
564 		tasklet_enable(&engine->sched_engine->tasklet);
565 
566 		intel_gt_set_wedged(engine->gt);
567 		return -EBUSY;
568 	}
569 
570 	return 0;
571 }
572 
engine_unlock_reset_tasklet(struct intel_engine_cs * engine)573 static void engine_unlock_reset_tasklet(struct intel_engine_cs *engine)
574 {
575 	clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
576 			      &engine->gt->reset.flags);
577 
578 	local_bh_enable();
579 	tasklet_enable(&engine->sched_engine->tasklet);
580 }
581 
live_hold_reset(void * arg)582 static int live_hold_reset(void *arg)
583 {
584 	struct intel_gt *gt = arg;
585 	struct intel_engine_cs *engine;
586 	enum intel_engine_id id;
587 	struct igt_spinner spin;
588 	int err = 0;
589 
590 	/*
591 	 * In order to support offline error capture for fast preempt reset,
592 	 * we need to decouple the guilty request and ensure that it and its
593 	 * descendents are not executed while the capture is in progress.
594 	 */
595 
596 	if (!intel_has_reset_engine(gt))
597 		return 0;
598 
599 	if (igt_spinner_init(&spin, gt))
600 		return -ENOMEM;
601 
602 	for_each_engine(engine, gt, id) {
603 		struct intel_context *ce;
604 		struct i915_request *rq;
605 
606 		ce = intel_context_create(engine);
607 		if (IS_ERR(ce)) {
608 			err = PTR_ERR(ce);
609 			break;
610 		}
611 
612 		st_engine_heartbeat_disable(engine);
613 
614 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
615 		if (IS_ERR(rq)) {
616 			err = PTR_ERR(rq);
617 			goto out;
618 		}
619 		i915_request_add(rq);
620 
621 		if (!igt_wait_for_spinner(&spin, rq)) {
622 			intel_gt_set_wedged(gt);
623 			err = -ETIME;
624 			goto out;
625 		}
626 
627 		/* We have our request executing, now remove it and reset */
628 
629 		err = engine_lock_reset_tasklet(engine);
630 		if (err)
631 			goto out;
632 
633 		engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet);
634 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
635 
636 		i915_request_get(rq);
637 		execlists_hold(engine, rq);
638 		GEM_BUG_ON(!i915_request_on_hold(rq));
639 
640 		__intel_engine_reset_bh(engine, NULL);
641 		GEM_BUG_ON(rq->fence.error != -EIO);
642 
643 		engine_unlock_reset_tasklet(engine);
644 
645 		/* Check that we do not resubmit the held request */
646 		if (!i915_request_wait(rq, 0, HZ / 5)) {
647 			pr_err("%s: on hold request completed!\n",
648 			       engine->name);
649 			i915_request_put(rq);
650 			err = -EIO;
651 			goto out;
652 		}
653 		GEM_BUG_ON(!i915_request_on_hold(rq));
654 
655 		/* But is resubmitted on release */
656 		execlists_unhold(engine, rq);
657 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
658 			pr_err("%s: held request did not complete!\n",
659 			       engine->name);
660 			intel_gt_set_wedged(gt);
661 			err = -ETIME;
662 		}
663 		i915_request_put(rq);
664 
665 out:
666 		st_engine_heartbeat_enable(engine);
667 		intel_context_put(ce);
668 		if (err)
669 			break;
670 	}
671 
672 	igt_spinner_fini(&spin);
673 	return err;
674 }
675 
error_repr(int err)676 static const char *error_repr(int err)
677 {
678 	return err ? "bad" : "good";
679 }
680 
live_error_interrupt(void * arg)681 static int live_error_interrupt(void *arg)
682 {
683 	static const struct error_phase {
684 		enum { GOOD = 0, BAD = -EIO } error[2];
685 	} phases[] = {
686 		{ { BAD,  GOOD } },
687 		{ { BAD,  BAD  } },
688 		{ { BAD,  GOOD } },
689 		{ { GOOD, GOOD } }, /* sentinel */
690 	};
691 	struct intel_gt *gt = arg;
692 	struct intel_engine_cs *engine;
693 	enum intel_engine_id id;
694 
695 	/*
696 	 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
697 	 * of invalid commands in user batches that will cause a GPU hang.
698 	 * This is a faster mechanism than using hangcheck/heartbeats, but
699 	 * only detects problems the HW knows about -- it will not warn when
700 	 * we kill the HW!
701 	 *
702 	 * To verify our detection and reset, we throw some invalid commands
703 	 * at the HW and wait for the interrupt.
704 	 */
705 
706 	if (!intel_has_reset_engine(gt))
707 		return 0;
708 
709 	for_each_engine(engine, gt, id) {
710 		const struct error_phase *p;
711 		int err = 0;
712 
713 		st_engine_heartbeat_disable(engine);
714 
715 		for (p = phases; p->error[0] != GOOD; p++) {
716 			struct i915_request *client[ARRAY_SIZE(phases->error)];
717 			u32 *cs;
718 			int i;
719 
720 			memset(client, 0, sizeof(*client));
721 			for (i = 0; i < ARRAY_SIZE(client); i++) {
722 				struct intel_context *ce;
723 				struct i915_request *rq;
724 
725 				ce = intel_context_create(engine);
726 				if (IS_ERR(ce)) {
727 					err = PTR_ERR(ce);
728 					goto out;
729 				}
730 
731 				rq = intel_context_create_request(ce);
732 				intel_context_put(ce);
733 				if (IS_ERR(rq)) {
734 					err = PTR_ERR(rq);
735 					goto out;
736 				}
737 
738 				if (rq->engine->emit_init_breadcrumb) {
739 					err = rq->engine->emit_init_breadcrumb(rq);
740 					if (err) {
741 						i915_request_add(rq);
742 						goto out;
743 					}
744 				}
745 
746 				cs = intel_ring_begin(rq, 2);
747 				if (IS_ERR(cs)) {
748 					i915_request_add(rq);
749 					err = PTR_ERR(cs);
750 					goto out;
751 				}
752 
753 				if (p->error[i]) {
754 					*cs++ = 0xdeadbeef;
755 					*cs++ = 0xdeadbeef;
756 				} else {
757 					*cs++ = MI_NOOP;
758 					*cs++ = MI_NOOP;
759 				}
760 
761 				client[i] = i915_request_get(rq);
762 				i915_request_add(rq);
763 			}
764 
765 			err = wait_for_submit(engine, client[0], HZ / 2);
766 			if (err) {
767 				pr_err("%s: first request did not start within time!\n",
768 				       engine->name);
769 				err = -ETIME;
770 				goto out;
771 			}
772 
773 			for (i = 0; i < ARRAY_SIZE(client); i++) {
774 				if (i915_request_wait(client[i], 0, HZ / 5) < 0)
775 					pr_debug("%s: %s request incomplete!\n",
776 						 engine->name,
777 						 error_repr(p->error[i]));
778 
779 				if (!i915_request_started(client[i])) {
780 					pr_err("%s: %s request not started!\n",
781 					       engine->name,
782 					       error_repr(p->error[i]));
783 					err = -ETIME;
784 					goto out;
785 				}
786 
787 				/* Kick the tasklet to process the error */
788 				intel_engine_flush_submission(engine);
789 				if (client[i]->fence.error != p->error[i]) {
790 					pr_err("%s: %s request (%s) with wrong error code: %d\n",
791 					       engine->name,
792 					       error_repr(p->error[i]),
793 					       i915_request_completed(client[i]) ? "completed" : "running",
794 					       client[i]->fence.error);
795 					err = -EINVAL;
796 					goto out;
797 				}
798 			}
799 
800 out:
801 			for (i = 0; i < ARRAY_SIZE(client); i++)
802 				if (client[i])
803 					i915_request_put(client[i]);
804 			if (err) {
805 				pr_err("%s: failed at phase[%zd] { %d, %d }\n",
806 				       engine->name, p - phases,
807 				       p->error[0], p->error[1]);
808 				break;
809 			}
810 		}
811 
812 		st_engine_heartbeat_enable(engine);
813 		if (err) {
814 			intel_gt_set_wedged(gt);
815 			return err;
816 		}
817 	}
818 
819 	return 0;
820 }
821 
822 static int
emit_semaphore_chain(struct i915_request * rq,struct i915_vma * vma,int idx)823 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
824 {
825 	u32 *cs;
826 
827 	cs = intel_ring_begin(rq, 10);
828 	if (IS_ERR(cs))
829 		return PTR_ERR(cs);
830 
831 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
832 
833 	*cs++ = MI_SEMAPHORE_WAIT |
834 		MI_SEMAPHORE_GLOBAL_GTT |
835 		MI_SEMAPHORE_POLL |
836 		MI_SEMAPHORE_SAD_NEQ_SDD;
837 	*cs++ = 0;
838 	*cs++ = i915_ggtt_offset(vma) + 4 * idx;
839 	*cs++ = 0;
840 
841 	if (idx > 0) {
842 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
843 		*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
844 		*cs++ = 0;
845 		*cs++ = 1;
846 	} else {
847 		*cs++ = MI_NOOP;
848 		*cs++ = MI_NOOP;
849 		*cs++ = MI_NOOP;
850 		*cs++ = MI_NOOP;
851 	}
852 
853 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
854 
855 	intel_ring_advance(rq, cs);
856 	return 0;
857 }
858 
859 static struct i915_request *
semaphore_queue(struct intel_engine_cs * engine,struct i915_vma * vma,int idx)860 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
861 {
862 	struct intel_context *ce;
863 	struct i915_request *rq;
864 	int err;
865 
866 	ce = intel_context_create(engine);
867 	if (IS_ERR(ce))
868 		return ERR_CAST(ce);
869 
870 	rq = intel_context_create_request(ce);
871 	if (IS_ERR(rq))
872 		goto out_ce;
873 
874 	err = 0;
875 	if (rq->engine->emit_init_breadcrumb)
876 		err = rq->engine->emit_init_breadcrumb(rq);
877 	if (err == 0)
878 		err = emit_semaphore_chain(rq, vma, idx);
879 	if (err == 0)
880 		i915_request_get(rq);
881 	i915_request_add(rq);
882 	if (err)
883 		rq = ERR_PTR(err);
884 
885 out_ce:
886 	intel_context_put(ce);
887 	return rq;
888 }
889 
890 static int
release_queue(struct intel_engine_cs * engine,struct i915_vma * vma,int idx,int prio)891 release_queue(struct intel_engine_cs *engine,
892 	      struct i915_vma *vma,
893 	      int idx, int prio)
894 {
895 	struct i915_sched_attr attr = {
896 		.priority = prio,
897 	};
898 	struct i915_request *rq;
899 	u32 *cs;
900 
901 	rq = intel_engine_create_kernel_request(engine);
902 	if (IS_ERR(rq))
903 		return PTR_ERR(rq);
904 
905 	cs = intel_ring_begin(rq, 4);
906 	if (IS_ERR(cs)) {
907 		i915_request_add(rq);
908 		return PTR_ERR(cs);
909 	}
910 
911 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
912 	*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
913 	*cs++ = 0;
914 	*cs++ = 1;
915 
916 	intel_ring_advance(rq, cs);
917 
918 	i915_request_get(rq);
919 	i915_request_add(rq);
920 
921 	local_bh_disable();
922 	engine->sched_engine->schedule(rq, &attr);
923 	local_bh_enable(); /* kick tasklet */
924 
925 	i915_request_put(rq);
926 
927 	return 0;
928 }
929 
930 static int
slice_semaphore_queue(struct intel_engine_cs * outer,struct i915_vma * vma,int count)931 slice_semaphore_queue(struct intel_engine_cs *outer,
932 		      struct i915_vma *vma,
933 		      int count)
934 {
935 	struct intel_engine_cs *engine;
936 	struct i915_request *head;
937 	enum intel_engine_id id;
938 	int err, i, n = 0;
939 
940 	head = semaphore_queue(outer, vma, n++);
941 	if (IS_ERR(head))
942 		return PTR_ERR(head);
943 
944 	for_each_engine(engine, outer->gt, id) {
945 		if (!intel_engine_has_preemption(engine))
946 			continue;
947 
948 		for (i = 0; i < count; i++) {
949 			struct i915_request *rq;
950 
951 			rq = semaphore_queue(engine, vma, n++);
952 			if (IS_ERR(rq)) {
953 				err = PTR_ERR(rq);
954 				goto out;
955 			}
956 
957 			i915_request_put(rq);
958 		}
959 	}
960 
961 	err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
962 	if (err)
963 		goto out;
964 
965 	if (i915_request_wait(head, 0,
966 			      2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
967 		pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
968 		       outer->name, count, n);
969 		GEM_TRACE_DUMP();
970 		intel_gt_set_wedged(outer->gt);
971 		err = -EIO;
972 	}
973 
974 out:
975 	i915_request_put(head);
976 	return err;
977 }
978 
live_timeslice_preempt(void * arg)979 static int live_timeslice_preempt(void *arg)
980 {
981 	struct intel_gt *gt = arg;
982 	struct drm_i915_gem_object *obj;
983 	struct intel_engine_cs *engine;
984 	enum intel_engine_id id;
985 	struct i915_vma *vma;
986 	void *vaddr;
987 	int err = 0;
988 
989 	/*
990 	 * If a request takes too long, we would like to give other users
991 	 * a fair go on the GPU. In particular, users may create batches
992 	 * that wait upon external input, where that input may even be
993 	 * supplied by another GPU job. To avoid blocking forever, we
994 	 * need to preempt the current task and replace it with another
995 	 * ready task.
996 	 */
997 	if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
998 		return 0;
999 
1000 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1001 	if (IS_ERR(obj))
1002 		return PTR_ERR(obj);
1003 
1004 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1005 	if (IS_ERR(vma)) {
1006 		err = PTR_ERR(vma);
1007 		goto err_obj;
1008 	}
1009 
1010 	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1011 	if (IS_ERR(vaddr)) {
1012 		err = PTR_ERR(vaddr);
1013 		goto err_obj;
1014 	}
1015 
1016 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1017 	if (err)
1018 		goto err_map;
1019 
1020 	err = i915_vma_sync(vma);
1021 	if (err)
1022 		goto err_pin;
1023 
1024 	for_each_engine(engine, gt, id) {
1025 		if (!intel_engine_has_preemption(engine))
1026 			continue;
1027 
1028 		memset(vaddr, 0, PAGE_SIZE);
1029 
1030 		st_engine_heartbeat_disable(engine);
1031 		err = slice_semaphore_queue(engine, vma, 5);
1032 		st_engine_heartbeat_enable(engine);
1033 		if (err)
1034 			goto err_pin;
1035 
1036 		if (igt_flush_test(gt->i915)) {
1037 			err = -EIO;
1038 			goto err_pin;
1039 		}
1040 	}
1041 
1042 err_pin:
1043 	i915_vma_unpin(vma);
1044 err_map:
1045 	i915_gem_object_unpin_map(obj);
1046 err_obj:
1047 	i915_gem_object_put(obj);
1048 	return err;
1049 }
1050 
1051 static struct i915_request *
create_rewinder(struct intel_context * ce,struct i915_request * wait,void * slot,int idx)1052 create_rewinder(struct intel_context *ce,
1053 		struct i915_request *wait,
1054 		void *slot, int idx)
1055 {
1056 	const u32 offset =
1057 		i915_ggtt_offset(ce->engine->status_page.vma) +
1058 		offset_in_page(slot);
1059 	struct i915_request *rq;
1060 	u32 *cs;
1061 	int err;
1062 
1063 	rq = intel_context_create_request(ce);
1064 	if (IS_ERR(rq))
1065 		return rq;
1066 
1067 	if (wait) {
1068 		err = i915_request_await_dma_fence(rq, &wait->fence);
1069 		if (err)
1070 			goto err;
1071 	}
1072 
1073 	cs = intel_ring_begin(rq, 14);
1074 	if (IS_ERR(cs)) {
1075 		err = PTR_ERR(cs);
1076 		goto err;
1077 	}
1078 
1079 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1080 	*cs++ = MI_NOOP;
1081 
1082 	*cs++ = MI_SEMAPHORE_WAIT |
1083 		MI_SEMAPHORE_GLOBAL_GTT |
1084 		MI_SEMAPHORE_POLL |
1085 		MI_SEMAPHORE_SAD_GTE_SDD;
1086 	*cs++ = idx;
1087 	*cs++ = offset;
1088 	*cs++ = 0;
1089 
1090 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1091 	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1092 	*cs++ = offset + idx * sizeof(u32);
1093 	*cs++ = 0;
1094 
1095 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1096 	*cs++ = offset;
1097 	*cs++ = 0;
1098 	*cs++ = idx + 1;
1099 
1100 	intel_ring_advance(rq, cs);
1101 
1102 	err = 0;
1103 err:
1104 	i915_request_get(rq);
1105 	i915_request_add(rq);
1106 	if (err) {
1107 		i915_request_put(rq);
1108 		return ERR_PTR(err);
1109 	}
1110 
1111 	return rq;
1112 }
1113 
live_timeslice_rewind(void * arg)1114 static int live_timeslice_rewind(void *arg)
1115 {
1116 	struct intel_gt *gt = arg;
1117 	struct intel_engine_cs *engine;
1118 	enum intel_engine_id id;
1119 
1120 	/*
1121 	 * The usual presumption on timeslice expiration is that we replace
1122 	 * the active context with another. However, given a chain of
1123 	 * dependencies we may end up with replacing the context with itself,
1124 	 * but only a few of those requests, forcing us to rewind the
1125 	 * RING_TAIL of the original request.
1126 	 */
1127 	if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
1128 		return 0;
1129 
1130 	for_each_engine(engine, gt, id) {
1131 		enum { A1, A2, B1 };
1132 		enum { X = 1, Z, Y };
1133 		struct i915_request *rq[3] = {};
1134 		struct intel_context *ce;
1135 		unsigned long timeslice;
1136 		int i, err = 0;
1137 		u32 *slot;
1138 
1139 		if (!intel_engine_has_timeslices(engine))
1140 			continue;
1141 
1142 		/*
1143 		 * A:rq1 -- semaphore wait, timestamp X
1144 		 * A:rq2 -- write timestamp Y
1145 		 *
1146 		 * B:rq1 [await A:rq1] -- write timestamp Z
1147 		 *
1148 		 * Force timeslice, release semaphore.
1149 		 *
1150 		 * Expect execution/evaluation order XZY
1151 		 */
1152 
1153 		st_engine_heartbeat_disable(engine);
1154 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1155 
1156 		slot = memset32(engine->status_page.addr + 1000, 0, 4);
1157 
1158 		ce = intel_context_create(engine);
1159 		if (IS_ERR(ce)) {
1160 			err = PTR_ERR(ce);
1161 			goto err;
1162 		}
1163 
1164 		rq[A1] = create_rewinder(ce, NULL, slot, X);
1165 		if (IS_ERR(rq[A1])) {
1166 			intel_context_put(ce);
1167 			goto err;
1168 		}
1169 
1170 		rq[A2] = create_rewinder(ce, NULL, slot, Y);
1171 		intel_context_put(ce);
1172 		if (IS_ERR(rq[A2]))
1173 			goto err;
1174 
1175 		err = wait_for_submit(engine, rq[A2], HZ / 2);
1176 		if (err) {
1177 			pr_err("%s: failed to submit first context\n",
1178 			       engine->name);
1179 			goto err;
1180 		}
1181 
1182 		ce = intel_context_create(engine);
1183 		if (IS_ERR(ce)) {
1184 			err = PTR_ERR(ce);
1185 			goto err;
1186 		}
1187 
1188 		rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1189 		intel_context_put(ce);
1190 		if (IS_ERR(rq[2]))
1191 			goto err;
1192 
1193 		err = wait_for_submit(engine, rq[B1], HZ / 2);
1194 		if (err) {
1195 			pr_err("%s: failed to submit second context\n",
1196 			       engine->name);
1197 			goto err;
1198 		}
1199 
1200 		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1201 		ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1202 		while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
1203 			/* Wait for the timeslice to kick in */
1204 			timer_delete(&engine->execlists.timer);
1205 			tasklet_hi_schedule(&engine->sched_engine->tasklet);
1206 			intel_engine_flush_submission(engine);
1207 		}
1208 		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1209 		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1210 		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1211 		GEM_BUG_ON(i915_request_is_active(rq[A2]));
1212 
1213 		/* Release the hounds! */
1214 		slot[0] = 1;
1215 		wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1216 
1217 		for (i = 1; i <= 3; i++) {
1218 			unsigned long timeout = jiffies + HZ / 2;
1219 
1220 			while (!READ_ONCE(slot[i]) &&
1221 			       time_before(jiffies, timeout))
1222 				;
1223 
1224 			if (!time_before(jiffies, timeout)) {
1225 				pr_err("%s: rq[%d] timed out\n",
1226 				       engine->name, i - 1);
1227 				err = -ETIME;
1228 				goto err;
1229 			}
1230 
1231 			pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1232 		}
1233 
1234 		/* XZY: XZ < XY */
1235 		if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1236 			pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1237 			       engine->name,
1238 			       slot[Z] - slot[X],
1239 			       slot[Y] - slot[X]);
1240 			err = -EINVAL;
1241 		}
1242 
1243 err:
1244 		memset32(&slot[0], -1, 4);
1245 		wmb();
1246 
1247 		engine->props.timeslice_duration_ms = timeslice;
1248 		st_engine_heartbeat_enable(engine);
1249 		for (i = 0; i < 3; i++)
1250 			i915_request_put(rq[i]);
1251 		if (igt_flush_test(gt->i915))
1252 			err = -EIO;
1253 		if (err)
1254 			return err;
1255 	}
1256 
1257 	return 0;
1258 }
1259 
nop_request(struct intel_engine_cs * engine)1260 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1261 {
1262 	struct i915_request *rq;
1263 
1264 	rq = intel_engine_create_kernel_request(engine);
1265 	if (IS_ERR(rq))
1266 		return rq;
1267 
1268 	i915_request_get(rq);
1269 	i915_request_add(rq);
1270 
1271 	return rq;
1272 }
1273 
slice_timeout(struct intel_engine_cs * engine)1274 static long slice_timeout(struct intel_engine_cs *engine)
1275 {
1276 	long timeout;
1277 
1278 	/* Enough time for a timeslice to kick in, and kick out */
1279 	timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1280 
1281 	/* Enough time for the nop request to complete */
1282 	timeout += HZ / 5;
1283 
1284 	return timeout + 1;
1285 }
1286 
live_timeslice_queue(void * arg)1287 static int live_timeslice_queue(void *arg)
1288 {
1289 	struct intel_gt *gt = arg;
1290 	struct drm_i915_gem_object *obj;
1291 	struct intel_engine_cs *engine;
1292 	enum intel_engine_id id;
1293 	struct i915_vma *vma;
1294 	void *vaddr;
1295 	int err = 0;
1296 
1297 	/*
1298 	 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1299 	 * timeslicing between them disabled, we *do* enable timeslicing
1300 	 * if the queue demands it. (Normally, we do not submit if
1301 	 * ELSP[1] is already occupied, so must rely on timeslicing to
1302 	 * eject ELSP[0] in favour of the queue.)
1303 	 */
1304 	if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
1305 		return 0;
1306 
1307 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1308 	if (IS_ERR(obj))
1309 		return PTR_ERR(obj);
1310 
1311 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1312 	if (IS_ERR(vma)) {
1313 		err = PTR_ERR(vma);
1314 		goto err_obj;
1315 	}
1316 
1317 	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1318 	if (IS_ERR(vaddr)) {
1319 		err = PTR_ERR(vaddr);
1320 		goto err_obj;
1321 	}
1322 
1323 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1324 	if (err)
1325 		goto err_map;
1326 
1327 	err = i915_vma_sync(vma);
1328 	if (err)
1329 		goto err_pin;
1330 
1331 	for_each_engine(engine, gt, id) {
1332 		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
1333 		struct i915_request *rq, *nop;
1334 
1335 		if (!intel_engine_has_preemption(engine))
1336 			continue;
1337 
1338 		st_engine_heartbeat_disable(engine);
1339 		memset(vaddr, 0, PAGE_SIZE);
1340 
1341 		/* ELSP[0]: semaphore wait */
1342 		rq = semaphore_queue(engine, vma, 0);
1343 		if (IS_ERR(rq)) {
1344 			err = PTR_ERR(rq);
1345 			goto err_heartbeat;
1346 		}
1347 		engine->sched_engine->schedule(rq, &attr);
1348 		err = wait_for_submit(engine, rq, HZ / 2);
1349 		if (err) {
1350 			pr_err("%s: Timed out trying to submit semaphores\n",
1351 			       engine->name);
1352 			goto err_rq;
1353 		}
1354 
1355 		/* ELSP[1]: nop request */
1356 		nop = nop_request(engine);
1357 		if (IS_ERR(nop)) {
1358 			err = PTR_ERR(nop);
1359 			goto err_rq;
1360 		}
1361 		err = wait_for_submit(engine, nop, HZ / 2);
1362 		i915_request_put(nop);
1363 		if (err) {
1364 			pr_err("%s: Timed out trying to submit nop\n",
1365 			       engine->name);
1366 			goto err_rq;
1367 		}
1368 
1369 		GEM_BUG_ON(i915_request_completed(rq));
1370 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1371 
1372 		/* Queue: semaphore signal, matching priority as semaphore */
1373 		err = release_queue(engine, vma, 1, effective_prio(rq));
1374 		if (err)
1375 			goto err_rq;
1376 
1377 		/* Wait until we ack the release_queue and start timeslicing */
1378 		do {
1379 			cond_resched();
1380 			intel_engine_flush_submission(engine);
1381 		} while (READ_ONCE(engine->execlists.pending[0]));
1382 
1383 		/* Timeslice every jiffy, so within 2 we should signal */
1384 		if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1385 			struct drm_printer p =
1386 				drm_info_printer(gt->i915->drm.dev);
1387 
1388 			pr_err("%s: Failed to timeslice into queue\n",
1389 			       engine->name);
1390 			intel_engine_dump(engine, &p,
1391 					  "%s\n", engine->name);
1392 
1393 			memset(vaddr, 0xff, PAGE_SIZE);
1394 			err = -EIO;
1395 		}
1396 err_rq:
1397 		i915_request_put(rq);
1398 err_heartbeat:
1399 		st_engine_heartbeat_enable(engine);
1400 		if (err)
1401 			break;
1402 	}
1403 
1404 err_pin:
1405 	i915_vma_unpin(vma);
1406 err_map:
1407 	i915_gem_object_unpin_map(obj);
1408 err_obj:
1409 	i915_gem_object_put(obj);
1410 	return err;
1411 }
1412 
live_timeslice_nopreempt(void * arg)1413 static int live_timeslice_nopreempt(void *arg)
1414 {
1415 	struct intel_gt *gt = arg;
1416 	struct intel_engine_cs *engine;
1417 	enum intel_engine_id id;
1418 	struct igt_spinner spin;
1419 	int err = 0;
1420 
1421 	/*
1422 	 * We should not timeslice into a request that is marked with
1423 	 * I915_REQUEST_NOPREEMPT.
1424 	 */
1425 	if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
1426 		return 0;
1427 
1428 	if (igt_spinner_init(&spin, gt))
1429 		return -ENOMEM;
1430 
1431 	for_each_engine(engine, gt, id) {
1432 		struct intel_context *ce;
1433 		struct i915_request *rq;
1434 		unsigned long timeslice;
1435 
1436 		if (!intel_engine_has_preemption(engine))
1437 			continue;
1438 
1439 		ce = intel_context_create(engine);
1440 		if (IS_ERR(ce)) {
1441 			err = PTR_ERR(ce);
1442 			break;
1443 		}
1444 
1445 		st_engine_heartbeat_disable(engine);
1446 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1447 
1448 		/* Create an unpreemptible spinner */
1449 
1450 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1451 		intel_context_put(ce);
1452 		if (IS_ERR(rq)) {
1453 			err = PTR_ERR(rq);
1454 			goto out_heartbeat;
1455 		}
1456 
1457 		i915_request_get(rq);
1458 		i915_request_add(rq);
1459 
1460 		if (!igt_wait_for_spinner(&spin, rq)) {
1461 			i915_request_put(rq);
1462 			err = -ETIME;
1463 			goto out_spin;
1464 		}
1465 
1466 		set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1467 		i915_request_put(rq);
1468 
1469 		/* Followed by a maximum priority barrier (heartbeat) */
1470 
1471 		ce = intel_context_create(engine);
1472 		if (IS_ERR(ce)) {
1473 			err = PTR_ERR(ce);
1474 			goto out_spin;
1475 		}
1476 
1477 		rq = intel_context_create_request(ce);
1478 		intel_context_put(ce);
1479 		if (IS_ERR(rq)) {
1480 			err = PTR_ERR(rq);
1481 			goto out_spin;
1482 		}
1483 
1484 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1485 		i915_request_get(rq);
1486 		i915_request_add(rq);
1487 
1488 		/*
1489 		 * Wait until the barrier is in ELSP, and we know timeslicing
1490 		 * will have been activated.
1491 		 */
1492 		if (wait_for_submit(engine, rq, HZ / 2)) {
1493 			i915_request_put(rq);
1494 			err = -ETIME;
1495 			goto out_spin;
1496 		}
1497 
1498 		/*
1499 		 * Since the ELSP[0] request is unpreemptible, it should not
1500 		 * allow the maximum priority barrier through. Wait long
1501 		 * enough to see if it is timesliced in by mistake.
1502 		 */
1503 		if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1504 			pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1505 			       engine->name);
1506 			err = -EINVAL;
1507 		}
1508 		i915_request_put(rq);
1509 
1510 out_spin:
1511 		igt_spinner_end(&spin);
1512 out_heartbeat:
1513 		xchg(&engine->props.timeslice_duration_ms, timeslice);
1514 		st_engine_heartbeat_enable(engine);
1515 		if (err)
1516 			break;
1517 
1518 		if (igt_flush_test(gt->i915)) {
1519 			err = -EIO;
1520 			break;
1521 		}
1522 	}
1523 
1524 	igt_spinner_fini(&spin);
1525 	return err;
1526 }
1527 
live_busywait_preempt(void * arg)1528 static int live_busywait_preempt(void *arg)
1529 {
1530 	struct intel_gt *gt = arg;
1531 	struct i915_gem_context *ctx_hi, *ctx_lo;
1532 	struct intel_engine_cs *engine;
1533 	struct drm_i915_gem_object *obj;
1534 	struct i915_vma *vma;
1535 	enum intel_engine_id id;
1536 	u32 *map;
1537 	int err;
1538 
1539 	/*
1540 	 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1541 	 * preempt the busywaits used to synchronise between rings.
1542 	 */
1543 
1544 	ctx_hi = kernel_context(gt->i915, NULL);
1545 	if (IS_ERR(ctx_hi))
1546 		return PTR_ERR(ctx_hi);
1547 
1548 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1549 
1550 	ctx_lo = kernel_context(gt->i915, NULL);
1551 	if (IS_ERR(ctx_lo)) {
1552 		err = PTR_ERR(ctx_lo);
1553 		goto err_ctx_hi;
1554 	}
1555 
1556 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1557 
1558 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1559 	if (IS_ERR(obj)) {
1560 		err = PTR_ERR(obj);
1561 		goto err_ctx_lo;
1562 	}
1563 
1564 	map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1565 	if (IS_ERR(map)) {
1566 		err = PTR_ERR(map);
1567 		goto err_obj;
1568 	}
1569 
1570 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1571 	if (IS_ERR(vma)) {
1572 		err = PTR_ERR(vma);
1573 		goto err_map;
1574 	}
1575 
1576 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1577 	if (err)
1578 		goto err_map;
1579 
1580 	err = i915_vma_sync(vma);
1581 	if (err)
1582 		goto err_vma;
1583 
1584 	for_each_engine(engine, gt, id) {
1585 		struct i915_request *lo, *hi;
1586 		struct igt_live_test t;
1587 		u32 *cs;
1588 
1589 		if (!intel_engine_has_preemption(engine))
1590 			continue;
1591 
1592 		if (!intel_engine_can_store_dword(engine))
1593 			continue;
1594 
1595 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1596 			err = -EIO;
1597 			goto err_vma;
1598 		}
1599 
1600 		/*
1601 		 * We create two requests. The low priority request
1602 		 * busywaits on a semaphore (inside the ringbuffer where
1603 		 * is should be preemptible) and the high priority requests
1604 		 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1605 		 * allowing the first request to complete. If preemption
1606 		 * fails, we hang instead.
1607 		 */
1608 
1609 		lo = igt_request_alloc(ctx_lo, engine);
1610 		if (IS_ERR(lo)) {
1611 			err = PTR_ERR(lo);
1612 			goto err_vma;
1613 		}
1614 
1615 		cs = intel_ring_begin(lo, 8);
1616 		if (IS_ERR(cs)) {
1617 			err = PTR_ERR(cs);
1618 			i915_request_add(lo);
1619 			goto err_vma;
1620 		}
1621 
1622 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1623 		*cs++ = i915_ggtt_offset(vma);
1624 		*cs++ = 0;
1625 		*cs++ = 1;
1626 
1627 		/* XXX Do we need a flush + invalidate here? */
1628 
1629 		*cs++ = MI_SEMAPHORE_WAIT |
1630 			MI_SEMAPHORE_GLOBAL_GTT |
1631 			MI_SEMAPHORE_POLL |
1632 			MI_SEMAPHORE_SAD_EQ_SDD;
1633 		*cs++ = 0;
1634 		*cs++ = i915_ggtt_offset(vma);
1635 		*cs++ = 0;
1636 
1637 		intel_ring_advance(lo, cs);
1638 
1639 		i915_request_get(lo);
1640 		i915_request_add(lo);
1641 
1642 		if (wait_for(READ_ONCE(*map), 10)) {
1643 			i915_request_put(lo);
1644 			err = -ETIMEDOUT;
1645 			goto err_vma;
1646 		}
1647 
1648 		/* Low priority request should be busywaiting now */
1649 		if (i915_request_wait(lo, 0, 1) != -ETIME) {
1650 			i915_request_put(lo);
1651 			pr_err("%s: Busywaiting request did not!\n",
1652 			       engine->name);
1653 			err = -EIO;
1654 			goto err_vma;
1655 		}
1656 
1657 		hi = igt_request_alloc(ctx_hi, engine);
1658 		if (IS_ERR(hi)) {
1659 			err = PTR_ERR(hi);
1660 			i915_request_put(lo);
1661 			goto err_vma;
1662 		}
1663 
1664 		cs = intel_ring_begin(hi, 4);
1665 		if (IS_ERR(cs)) {
1666 			err = PTR_ERR(cs);
1667 			i915_request_add(hi);
1668 			i915_request_put(lo);
1669 			goto err_vma;
1670 		}
1671 
1672 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1673 		*cs++ = i915_ggtt_offset(vma);
1674 		*cs++ = 0;
1675 		*cs++ = 0;
1676 
1677 		intel_ring_advance(hi, cs);
1678 		i915_request_add(hi);
1679 
1680 		if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1681 			struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1682 
1683 			pr_err("%s: Failed to preempt semaphore busywait!\n",
1684 			       engine->name);
1685 
1686 			intel_engine_dump(engine, &p, "%s\n", engine->name);
1687 			GEM_TRACE_DUMP();
1688 
1689 			i915_request_put(lo);
1690 			intel_gt_set_wedged(gt);
1691 			err = -EIO;
1692 			goto err_vma;
1693 		}
1694 		GEM_BUG_ON(READ_ONCE(*map));
1695 		i915_request_put(lo);
1696 
1697 		if (igt_live_test_end(&t)) {
1698 			err = -EIO;
1699 			goto err_vma;
1700 		}
1701 	}
1702 
1703 	err = 0;
1704 err_vma:
1705 	i915_vma_unpin(vma);
1706 err_map:
1707 	i915_gem_object_unpin_map(obj);
1708 err_obj:
1709 	i915_gem_object_put(obj);
1710 err_ctx_lo:
1711 	kernel_context_close(ctx_lo);
1712 err_ctx_hi:
1713 	kernel_context_close(ctx_hi);
1714 	return err;
1715 }
1716 
1717 static struct i915_request *
spinner_create_request(struct igt_spinner * spin,struct i915_gem_context * ctx,struct intel_engine_cs * engine,u32 arb)1718 spinner_create_request(struct igt_spinner *spin,
1719 		       struct i915_gem_context *ctx,
1720 		       struct intel_engine_cs *engine,
1721 		       u32 arb)
1722 {
1723 	struct intel_context *ce;
1724 	struct i915_request *rq;
1725 
1726 	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1727 	if (IS_ERR(ce))
1728 		return ERR_CAST(ce);
1729 
1730 	rq = igt_spinner_create_request(spin, ce, arb);
1731 	intel_context_put(ce);
1732 	return rq;
1733 }
1734 
live_preempt(void * arg)1735 static int live_preempt(void *arg)
1736 {
1737 	struct intel_gt *gt = arg;
1738 	struct i915_gem_context *ctx_hi, *ctx_lo;
1739 	struct igt_spinner spin_hi, spin_lo;
1740 	struct intel_engine_cs *engine;
1741 	enum intel_engine_id id;
1742 	int err = -ENOMEM;
1743 
1744 	ctx_hi = kernel_context(gt->i915, NULL);
1745 	if (!ctx_hi)
1746 		return -ENOMEM;
1747 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1748 
1749 	ctx_lo = kernel_context(gt->i915, NULL);
1750 	if (!ctx_lo)
1751 		goto err_ctx_hi;
1752 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1753 
1754 	if (igt_spinner_init(&spin_hi, gt))
1755 		goto err_ctx_lo;
1756 
1757 	if (igt_spinner_init(&spin_lo, gt))
1758 		goto err_spin_hi;
1759 
1760 	for_each_engine(engine, gt, id) {
1761 		struct igt_live_test t;
1762 		struct i915_request *rq;
1763 
1764 		if (!intel_engine_has_preemption(engine))
1765 			continue;
1766 
1767 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1768 			err = -EIO;
1769 			goto err_spin_lo;
1770 		}
1771 
1772 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1773 					    MI_ARB_CHECK);
1774 		if (IS_ERR(rq)) {
1775 			err = PTR_ERR(rq);
1776 			goto err_spin_lo;
1777 		}
1778 
1779 		i915_request_add(rq);
1780 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1781 			GEM_TRACE("lo spinner failed to start\n");
1782 			GEM_TRACE_DUMP();
1783 			intel_gt_set_wedged(gt);
1784 			err = -EIO;
1785 			goto err_spin_lo;
1786 		}
1787 
1788 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1789 					    MI_ARB_CHECK);
1790 		if (IS_ERR(rq)) {
1791 			igt_spinner_end(&spin_lo);
1792 			err = PTR_ERR(rq);
1793 			goto err_spin_lo;
1794 		}
1795 
1796 		i915_request_add(rq);
1797 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1798 			GEM_TRACE("hi spinner failed to start\n");
1799 			GEM_TRACE_DUMP();
1800 			intel_gt_set_wedged(gt);
1801 			err = -EIO;
1802 			goto err_spin_lo;
1803 		}
1804 
1805 		igt_spinner_end(&spin_hi);
1806 		igt_spinner_end(&spin_lo);
1807 
1808 		if (igt_live_test_end(&t)) {
1809 			err = -EIO;
1810 			goto err_spin_lo;
1811 		}
1812 	}
1813 
1814 	err = 0;
1815 err_spin_lo:
1816 	igt_spinner_fini(&spin_lo);
1817 err_spin_hi:
1818 	igt_spinner_fini(&spin_hi);
1819 err_ctx_lo:
1820 	kernel_context_close(ctx_lo);
1821 err_ctx_hi:
1822 	kernel_context_close(ctx_hi);
1823 	return err;
1824 }
1825 
live_late_preempt(void * arg)1826 static int live_late_preempt(void *arg)
1827 {
1828 	struct intel_gt *gt = arg;
1829 	struct i915_gem_context *ctx_hi, *ctx_lo;
1830 	struct igt_spinner spin_hi, spin_lo;
1831 	struct intel_engine_cs *engine;
1832 	struct i915_sched_attr attr = {};
1833 	enum intel_engine_id id;
1834 	int err = -ENOMEM;
1835 
1836 	ctx_hi = kernel_context(gt->i915, NULL);
1837 	if (!ctx_hi)
1838 		return -ENOMEM;
1839 
1840 	ctx_lo = kernel_context(gt->i915, NULL);
1841 	if (!ctx_lo)
1842 		goto err_ctx_hi;
1843 
1844 	if (igt_spinner_init(&spin_hi, gt))
1845 		goto err_ctx_lo;
1846 
1847 	if (igt_spinner_init(&spin_lo, gt))
1848 		goto err_spin_hi;
1849 
1850 	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1851 	ctx_lo->sched.priority = 1;
1852 
1853 	for_each_engine(engine, gt, id) {
1854 		struct igt_live_test t;
1855 		struct i915_request *rq;
1856 
1857 		if (!intel_engine_has_preemption(engine))
1858 			continue;
1859 
1860 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1861 			err = -EIO;
1862 			goto err_spin_lo;
1863 		}
1864 
1865 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1866 					    MI_ARB_CHECK);
1867 		if (IS_ERR(rq)) {
1868 			err = PTR_ERR(rq);
1869 			goto err_spin_lo;
1870 		}
1871 
1872 		i915_request_add(rq);
1873 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1874 			pr_err("First context failed to start\n");
1875 			goto err_wedged;
1876 		}
1877 
1878 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1879 					    MI_NOOP);
1880 		if (IS_ERR(rq)) {
1881 			igt_spinner_end(&spin_lo);
1882 			err = PTR_ERR(rq);
1883 			goto err_spin_lo;
1884 		}
1885 
1886 		i915_request_add(rq);
1887 		if (igt_wait_for_spinner(&spin_hi, rq)) {
1888 			pr_err("Second context overtook first?\n");
1889 			goto err_wedged;
1890 		}
1891 
1892 		attr.priority = I915_PRIORITY_MAX;
1893 		engine->sched_engine->schedule(rq, &attr);
1894 
1895 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1896 			pr_err("High priority context failed to preempt the low priority context\n");
1897 			GEM_TRACE_DUMP();
1898 			goto err_wedged;
1899 		}
1900 
1901 		igt_spinner_end(&spin_hi);
1902 		igt_spinner_end(&spin_lo);
1903 
1904 		if (igt_live_test_end(&t)) {
1905 			err = -EIO;
1906 			goto err_spin_lo;
1907 		}
1908 	}
1909 
1910 	err = 0;
1911 err_spin_lo:
1912 	igt_spinner_fini(&spin_lo);
1913 err_spin_hi:
1914 	igt_spinner_fini(&spin_hi);
1915 err_ctx_lo:
1916 	kernel_context_close(ctx_lo);
1917 err_ctx_hi:
1918 	kernel_context_close(ctx_hi);
1919 	return err;
1920 
1921 err_wedged:
1922 	igt_spinner_end(&spin_hi);
1923 	igt_spinner_end(&spin_lo);
1924 	intel_gt_set_wedged(gt);
1925 	err = -EIO;
1926 	goto err_spin_lo;
1927 }
1928 
1929 struct preempt_client {
1930 	struct igt_spinner spin;
1931 	struct i915_gem_context *ctx;
1932 };
1933 
preempt_client_init(struct intel_gt * gt,struct preempt_client * c)1934 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1935 {
1936 	c->ctx = kernel_context(gt->i915, NULL);
1937 	if (!c->ctx)
1938 		return -ENOMEM;
1939 
1940 	if (igt_spinner_init(&c->spin, gt))
1941 		goto err_ctx;
1942 
1943 	return 0;
1944 
1945 err_ctx:
1946 	kernel_context_close(c->ctx);
1947 	return -ENOMEM;
1948 }
1949 
preempt_client_fini(struct preempt_client * c)1950 static void preempt_client_fini(struct preempt_client *c)
1951 {
1952 	igt_spinner_fini(&c->spin);
1953 	kernel_context_close(c->ctx);
1954 }
1955 
live_nopreempt(void * arg)1956 static int live_nopreempt(void *arg)
1957 {
1958 	struct intel_gt *gt = arg;
1959 	struct intel_engine_cs *engine;
1960 	struct preempt_client a, b;
1961 	enum intel_engine_id id;
1962 	int err = -ENOMEM;
1963 
1964 	/*
1965 	 * Verify that we can disable preemption for an individual request
1966 	 * that may be being observed and not want to be interrupted.
1967 	 */
1968 
1969 	if (preempt_client_init(gt, &a))
1970 		return -ENOMEM;
1971 	if (preempt_client_init(gt, &b))
1972 		goto err_client_a;
1973 	b.ctx->sched.priority = I915_PRIORITY_MAX;
1974 
1975 	for_each_engine(engine, gt, id) {
1976 		struct i915_request *rq_a, *rq_b;
1977 
1978 		if (!intel_engine_has_preemption(engine))
1979 			continue;
1980 
1981 		engine->execlists.preempt_hang.count = 0;
1982 
1983 		rq_a = spinner_create_request(&a.spin,
1984 					      a.ctx, engine,
1985 					      MI_ARB_CHECK);
1986 		if (IS_ERR(rq_a)) {
1987 			err = PTR_ERR(rq_a);
1988 			goto err_client_b;
1989 		}
1990 
1991 		/* Low priority client, but unpreemptable! */
1992 		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1993 
1994 		i915_request_add(rq_a);
1995 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1996 			pr_err("First client failed to start\n");
1997 			goto err_wedged;
1998 		}
1999 
2000 		rq_b = spinner_create_request(&b.spin,
2001 					      b.ctx, engine,
2002 					      MI_ARB_CHECK);
2003 		if (IS_ERR(rq_b)) {
2004 			err = PTR_ERR(rq_b);
2005 			goto err_client_b;
2006 		}
2007 
2008 		i915_request_add(rq_b);
2009 
2010 		/* B is much more important than A! (But A is unpreemptable.) */
2011 		GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
2012 
2013 		/* Wait long enough for preemption and timeslicing */
2014 		if (igt_wait_for_spinner(&b.spin, rq_b)) {
2015 			pr_err("Second client started too early!\n");
2016 			goto err_wedged;
2017 		}
2018 
2019 		igt_spinner_end(&a.spin);
2020 
2021 		if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2022 			pr_err("Second client failed to start\n");
2023 			goto err_wedged;
2024 		}
2025 
2026 		igt_spinner_end(&b.spin);
2027 
2028 		if (engine->execlists.preempt_hang.count) {
2029 			pr_err("Preemption recorded x%d; should have been suppressed!\n",
2030 			       engine->execlists.preempt_hang.count);
2031 			err = -EINVAL;
2032 			goto err_wedged;
2033 		}
2034 
2035 		if (igt_flush_test(gt->i915))
2036 			goto err_wedged;
2037 	}
2038 
2039 	err = 0;
2040 err_client_b:
2041 	preempt_client_fini(&b);
2042 err_client_a:
2043 	preempt_client_fini(&a);
2044 	return err;
2045 
2046 err_wedged:
2047 	igt_spinner_end(&b.spin);
2048 	igt_spinner_end(&a.spin);
2049 	intel_gt_set_wedged(gt);
2050 	err = -EIO;
2051 	goto err_client_b;
2052 }
2053 
2054 struct live_preempt_cancel {
2055 	struct intel_engine_cs *engine;
2056 	struct preempt_client a, b;
2057 };
2058 
__cancel_active0(struct live_preempt_cancel * arg)2059 static int __cancel_active0(struct live_preempt_cancel *arg)
2060 {
2061 	struct i915_request *rq;
2062 	struct igt_live_test t;
2063 	int err;
2064 
2065 	/* Preempt cancel of ELSP0 */
2066 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2067 	if (igt_live_test_begin(&t, arg->engine->i915,
2068 				__func__, arg->engine->name))
2069 		return -EIO;
2070 
2071 	rq = spinner_create_request(&arg->a.spin,
2072 				    arg->a.ctx, arg->engine,
2073 				    MI_ARB_CHECK);
2074 	if (IS_ERR(rq))
2075 		return PTR_ERR(rq);
2076 
2077 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2078 	i915_request_get(rq);
2079 	i915_request_add(rq);
2080 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2081 		err = -EIO;
2082 		goto out;
2083 	}
2084 
2085 	intel_context_ban(rq->context, rq);
2086 	err = intel_engine_pulse(arg->engine);
2087 	if (err)
2088 		goto out;
2089 
2090 	err = wait_for_reset(arg->engine, rq, HZ / 2);
2091 	if (err) {
2092 		pr_err("Cancelled inflight0 request did not reset\n");
2093 		goto out;
2094 	}
2095 
2096 out:
2097 	i915_request_put(rq);
2098 	if (igt_live_test_end(&t))
2099 		err = -EIO;
2100 	return err;
2101 }
2102 
__cancel_active1(struct live_preempt_cancel * arg)2103 static int __cancel_active1(struct live_preempt_cancel *arg)
2104 {
2105 	struct i915_request *rq[2] = {};
2106 	struct igt_live_test t;
2107 	int err;
2108 
2109 	/* Preempt cancel of ELSP1 */
2110 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2111 	if (igt_live_test_begin(&t, arg->engine->i915,
2112 				__func__, arg->engine->name))
2113 		return -EIO;
2114 
2115 	rq[0] = spinner_create_request(&arg->a.spin,
2116 				       arg->a.ctx, arg->engine,
2117 				       MI_NOOP); /* no preemption */
2118 	if (IS_ERR(rq[0]))
2119 		return PTR_ERR(rq[0]);
2120 
2121 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2122 	i915_request_get(rq[0]);
2123 	i915_request_add(rq[0]);
2124 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2125 		err = -EIO;
2126 		goto out;
2127 	}
2128 
2129 	rq[1] = spinner_create_request(&arg->b.spin,
2130 				       arg->b.ctx, arg->engine,
2131 				       MI_ARB_CHECK);
2132 	if (IS_ERR(rq[1])) {
2133 		err = PTR_ERR(rq[1]);
2134 		goto out;
2135 	}
2136 
2137 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2138 	i915_request_get(rq[1]);
2139 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2140 	i915_request_add(rq[1]);
2141 	if (err)
2142 		goto out;
2143 
2144 	intel_context_ban(rq[1]->context, rq[1]);
2145 	err = intel_engine_pulse(arg->engine);
2146 	if (err)
2147 		goto out;
2148 
2149 	igt_spinner_end(&arg->a.spin);
2150 	err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2151 	if (err)
2152 		goto out;
2153 
2154 	if (rq[0]->fence.error != 0) {
2155 		pr_err("Normal inflight0 request did not complete\n");
2156 		err = -EINVAL;
2157 		goto out;
2158 	}
2159 
2160 	if (rq[1]->fence.error != -EIO) {
2161 		pr_err("Cancelled inflight1 request did not report -EIO\n");
2162 		err = -EINVAL;
2163 		goto out;
2164 	}
2165 
2166 out:
2167 	i915_request_put(rq[1]);
2168 	i915_request_put(rq[0]);
2169 	if (igt_live_test_end(&t))
2170 		err = -EIO;
2171 	return err;
2172 }
2173 
__cancel_queued(struct live_preempt_cancel * arg)2174 static int __cancel_queued(struct live_preempt_cancel *arg)
2175 {
2176 	struct i915_request *rq[3] = {};
2177 	struct igt_live_test t;
2178 	int err;
2179 
2180 	/* Full ELSP and one in the wings */
2181 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2182 	if (igt_live_test_begin(&t, arg->engine->i915,
2183 				__func__, arg->engine->name))
2184 		return -EIO;
2185 
2186 	rq[0] = spinner_create_request(&arg->a.spin,
2187 				       arg->a.ctx, arg->engine,
2188 				       MI_ARB_CHECK);
2189 	if (IS_ERR(rq[0]))
2190 		return PTR_ERR(rq[0]);
2191 
2192 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2193 	i915_request_get(rq[0]);
2194 	i915_request_add(rq[0]);
2195 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2196 		err = -EIO;
2197 		goto out;
2198 	}
2199 
2200 	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2201 	if (IS_ERR(rq[1])) {
2202 		err = PTR_ERR(rq[1]);
2203 		goto out;
2204 	}
2205 
2206 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2207 	i915_request_get(rq[1]);
2208 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2209 	i915_request_add(rq[1]);
2210 	if (err)
2211 		goto out;
2212 
2213 	rq[2] = spinner_create_request(&arg->b.spin,
2214 				       arg->a.ctx, arg->engine,
2215 				       MI_ARB_CHECK);
2216 	if (IS_ERR(rq[2])) {
2217 		err = PTR_ERR(rq[2]);
2218 		goto out;
2219 	}
2220 
2221 	i915_request_get(rq[2]);
2222 	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2223 	i915_request_add(rq[2]);
2224 	if (err)
2225 		goto out;
2226 
2227 	intel_context_ban(rq[2]->context, rq[2]);
2228 	err = intel_engine_pulse(arg->engine);
2229 	if (err)
2230 		goto out;
2231 
2232 	err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2233 	if (err)
2234 		goto out;
2235 
2236 	if (rq[0]->fence.error != -EIO) {
2237 		pr_err("Cancelled inflight0 request did not report -EIO\n");
2238 		err = -EINVAL;
2239 		goto out;
2240 	}
2241 
2242 	/*
2243 	 * The behavior between having semaphores and not is different. With
2244 	 * semaphores the subsequent request is on the hardware and not cancelled
2245 	 * while without the request is held in the driver and cancelled.
2246 	 */
2247 	if (intel_engine_has_semaphores(rq[1]->engine) &&
2248 	    rq[1]->fence.error != 0) {
2249 		pr_err("Normal inflight1 request did not complete\n");
2250 		err = -EINVAL;
2251 		goto out;
2252 	}
2253 
2254 	if (rq[2]->fence.error != -EIO) {
2255 		pr_err("Cancelled queued request did not report -EIO\n");
2256 		err = -EINVAL;
2257 		goto out;
2258 	}
2259 
2260 out:
2261 	i915_request_put(rq[2]);
2262 	i915_request_put(rq[1]);
2263 	i915_request_put(rq[0]);
2264 	if (igt_live_test_end(&t))
2265 		err = -EIO;
2266 	return err;
2267 }
2268 
__cancel_hostile(struct live_preempt_cancel * arg)2269 static int __cancel_hostile(struct live_preempt_cancel *arg)
2270 {
2271 	struct i915_request *rq;
2272 	int err;
2273 
2274 	/* Preempt cancel non-preemptible spinner in ELSP0 */
2275 	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
2276 		return 0;
2277 
2278 	if (!intel_has_reset_engine(arg->engine->gt))
2279 		return 0;
2280 
2281 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2282 	rq = spinner_create_request(&arg->a.spin,
2283 				    arg->a.ctx, arg->engine,
2284 				    MI_NOOP); /* preemption disabled */
2285 	if (IS_ERR(rq))
2286 		return PTR_ERR(rq);
2287 
2288 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2289 	i915_request_get(rq);
2290 	i915_request_add(rq);
2291 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2292 		err = -EIO;
2293 		goto out;
2294 	}
2295 
2296 	intel_context_ban(rq->context, rq);
2297 	err = intel_engine_pulse(arg->engine); /* force reset */
2298 	if (err)
2299 		goto out;
2300 
2301 	err = wait_for_reset(arg->engine, rq, HZ / 2);
2302 	if (err) {
2303 		pr_err("Cancelled inflight0 request did not reset\n");
2304 		goto out;
2305 	}
2306 
2307 out:
2308 	i915_request_put(rq);
2309 	if (igt_flush_test(arg->engine->i915))
2310 		err = -EIO;
2311 	return err;
2312 }
2313 
force_reset_timeout(struct intel_engine_cs * engine)2314 static void force_reset_timeout(struct intel_engine_cs *engine)
2315 {
2316 	engine->reset_timeout.probability = 999;
2317 	atomic_set(&engine->reset_timeout.times, -1);
2318 }
2319 
cancel_reset_timeout(struct intel_engine_cs * engine)2320 static void cancel_reset_timeout(struct intel_engine_cs *engine)
2321 {
2322 	memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
2323 }
2324 
__cancel_fail(struct live_preempt_cancel * arg)2325 static int __cancel_fail(struct live_preempt_cancel *arg)
2326 {
2327 	struct intel_engine_cs *engine = arg->engine;
2328 	struct i915_request *rq;
2329 	int err;
2330 
2331 	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
2332 		return 0;
2333 
2334 	if (!intel_has_reset_engine(engine->gt))
2335 		return 0;
2336 
2337 	GEM_TRACE("%s(%s)\n", __func__, engine->name);
2338 	rq = spinner_create_request(&arg->a.spin,
2339 				    arg->a.ctx, engine,
2340 				    MI_NOOP); /* preemption disabled */
2341 	if (IS_ERR(rq))
2342 		return PTR_ERR(rq);
2343 
2344 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2345 	i915_request_get(rq);
2346 	i915_request_add(rq);
2347 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2348 		err = -EIO;
2349 		goto out;
2350 	}
2351 
2352 	intel_context_set_banned(rq->context);
2353 
2354 	err = intel_engine_pulse(engine);
2355 	if (err)
2356 		goto out;
2357 
2358 	force_reset_timeout(engine);
2359 
2360 	/* force preempt reset [failure] */
2361 	while (!engine->execlists.pending[0])
2362 		intel_engine_flush_submission(engine);
2363 	timer_delete_sync(&engine->execlists.preempt);
2364 	intel_engine_flush_submission(engine);
2365 
2366 	cancel_reset_timeout(engine);
2367 
2368 	/* after failure, require heartbeats to reset device */
2369 	intel_engine_set_heartbeat(engine, 1);
2370 	err = wait_for_reset(engine, rq, HZ / 2);
2371 	intel_engine_set_heartbeat(engine,
2372 				   engine->defaults.heartbeat_interval_ms);
2373 	if (err) {
2374 		pr_err("Cancelled inflight0 request did not reset\n");
2375 		goto out;
2376 	}
2377 
2378 out:
2379 	i915_request_put(rq);
2380 	if (igt_flush_test(engine->i915))
2381 		err = -EIO;
2382 	return err;
2383 }
2384 
live_preempt_cancel(void * arg)2385 static int live_preempt_cancel(void *arg)
2386 {
2387 	struct intel_gt *gt = arg;
2388 	struct live_preempt_cancel data;
2389 	enum intel_engine_id id;
2390 	int err = -ENOMEM;
2391 
2392 	/*
2393 	 * To cancel an inflight context, we need to first remove it from the
2394 	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2395 	 */
2396 
2397 	if (preempt_client_init(gt, &data.a))
2398 		return -ENOMEM;
2399 	if (preempt_client_init(gt, &data.b))
2400 		goto err_client_a;
2401 
2402 	for_each_engine(data.engine, gt, id) {
2403 		if (!intel_engine_has_preemption(data.engine))
2404 			continue;
2405 
2406 		err = __cancel_active0(&data);
2407 		if (err)
2408 			goto err_wedged;
2409 
2410 		err = __cancel_active1(&data);
2411 		if (err)
2412 			goto err_wedged;
2413 
2414 		err = __cancel_queued(&data);
2415 		if (err)
2416 			goto err_wedged;
2417 
2418 		err = __cancel_hostile(&data);
2419 		if (err)
2420 			goto err_wedged;
2421 
2422 		err = __cancel_fail(&data);
2423 		if (err)
2424 			goto err_wedged;
2425 	}
2426 
2427 	err = 0;
2428 err_client_b:
2429 	preempt_client_fini(&data.b);
2430 err_client_a:
2431 	preempt_client_fini(&data.a);
2432 	return err;
2433 
2434 err_wedged:
2435 	GEM_TRACE_DUMP();
2436 	igt_spinner_end(&data.b.spin);
2437 	igt_spinner_end(&data.a.spin);
2438 	intel_gt_set_wedged(gt);
2439 	goto err_client_b;
2440 }
2441 
live_suppress_self_preempt(void * arg)2442 static int live_suppress_self_preempt(void *arg)
2443 {
2444 	struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2445 	struct intel_gt *gt = arg;
2446 	struct intel_engine_cs *engine;
2447 	struct preempt_client a, b;
2448 	enum intel_engine_id id;
2449 	int err = -ENOMEM;
2450 
2451 	/*
2452 	 * Verify that if a preemption request does not cause a change in
2453 	 * the current execution order, the preempt-to-idle injection is
2454 	 * skipped and that we do not accidentally apply it after the CS
2455 	 * completion event.
2456 	 */
2457 
2458 	if (intel_uc_uses_guc_submission(&gt->uc))
2459 		return 0; /* presume black blox */
2460 
2461 	if (intel_vgpu_active(gt->i915))
2462 		return 0; /* GVT forces single port & request submission */
2463 
2464 	if (preempt_client_init(gt, &a))
2465 		return -ENOMEM;
2466 	if (preempt_client_init(gt, &b))
2467 		goto err_client_a;
2468 
2469 	for_each_engine(engine, gt, id) {
2470 		struct i915_request *rq_a, *rq_b;
2471 		int depth;
2472 
2473 		if (!intel_engine_has_preemption(engine))
2474 			continue;
2475 
2476 		if (igt_flush_test(gt->i915))
2477 			goto err_wedged;
2478 
2479 		st_engine_heartbeat_disable(engine);
2480 		engine->execlists.preempt_hang.count = 0;
2481 
2482 		rq_a = spinner_create_request(&a.spin,
2483 					      a.ctx, engine,
2484 					      MI_NOOP);
2485 		if (IS_ERR(rq_a)) {
2486 			err = PTR_ERR(rq_a);
2487 			st_engine_heartbeat_enable(engine);
2488 			goto err_client_b;
2489 		}
2490 
2491 		i915_request_add(rq_a);
2492 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2493 			pr_err("First client failed to start\n");
2494 			st_engine_heartbeat_enable(engine);
2495 			goto err_wedged;
2496 		}
2497 
2498 		/* Keep postponing the timer to avoid premature slicing */
2499 		mod_timer(&engine->execlists.timer, jiffies + HZ);
2500 		for (depth = 0; depth < 8; depth++) {
2501 			rq_b = spinner_create_request(&b.spin,
2502 						      b.ctx, engine,
2503 						      MI_NOOP);
2504 			if (IS_ERR(rq_b)) {
2505 				err = PTR_ERR(rq_b);
2506 				st_engine_heartbeat_enable(engine);
2507 				goto err_client_b;
2508 			}
2509 			i915_request_add(rq_b);
2510 
2511 			GEM_BUG_ON(i915_request_completed(rq_a));
2512 			engine->sched_engine->schedule(rq_a, &attr);
2513 			igt_spinner_end(&a.spin);
2514 
2515 			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2516 				pr_err("Second client failed to start\n");
2517 				st_engine_heartbeat_enable(engine);
2518 				goto err_wedged;
2519 			}
2520 
2521 			swap(a, b);
2522 			rq_a = rq_b;
2523 		}
2524 		igt_spinner_end(&a.spin);
2525 
2526 		if (engine->execlists.preempt_hang.count) {
2527 			pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2528 			       engine->name,
2529 			       engine->execlists.preempt_hang.count,
2530 			       depth);
2531 			st_engine_heartbeat_enable(engine);
2532 			err = -EINVAL;
2533 			goto err_client_b;
2534 		}
2535 
2536 		st_engine_heartbeat_enable(engine);
2537 		if (igt_flush_test(gt->i915))
2538 			goto err_wedged;
2539 	}
2540 
2541 	err = 0;
2542 err_client_b:
2543 	preempt_client_fini(&b);
2544 err_client_a:
2545 	preempt_client_fini(&a);
2546 	return err;
2547 
2548 err_wedged:
2549 	igt_spinner_end(&b.spin);
2550 	igt_spinner_end(&a.spin);
2551 	intel_gt_set_wedged(gt);
2552 	err = -EIO;
2553 	goto err_client_b;
2554 }
2555 
live_chain_preempt(void * arg)2556 static int live_chain_preempt(void *arg)
2557 {
2558 	struct intel_gt *gt = arg;
2559 	struct intel_engine_cs *engine;
2560 	struct preempt_client hi, lo;
2561 	enum intel_engine_id id;
2562 	int err = -ENOMEM;
2563 
2564 	/*
2565 	 * Build a chain AB...BA between two contexts (A, B) and request
2566 	 * preemption of the last request. It should then complete before
2567 	 * the previously submitted spinner in B.
2568 	 */
2569 
2570 	if (preempt_client_init(gt, &hi))
2571 		return -ENOMEM;
2572 
2573 	if (preempt_client_init(gt, &lo))
2574 		goto err_client_hi;
2575 
2576 	for_each_engine(engine, gt, id) {
2577 		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2578 		struct igt_live_test t;
2579 		struct i915_request *rq;
2580 		int ring_size, count, i;
2581 
2582 		if (!intel_engine_has_preemption(engine))
2583 			continue;
2584 
2585 		rq = spinner_create_request(&lo.spin,
2586 					    lo.ctx, engine,
2587 					    MI_ARB_CHECK);
2588 		if (IS_ERR(rq))
2589 			goto err_wedged;
2590 
2591 		i915_request_get(rq);
2592 		i915_request_add(rq);
2593 
2594 		ring_size = rq->wa_tail - rq->head;
2595 		if (ring_size < 0)
2596 			ring_size += rq->ring->size;
2597 		ring_size = rq->ring->size / ring_size;
2598 		pr_debug("%s(%s): Using maximum of %d requests\n",
2599 			 __func__, engine->name, ring_size);
2600 
2601 		igt_spinner_end(&lo.spin);
2602 		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2603 			pr_err("Timed out waiting to flush %s\n", engine->name);
2604 			i915_request_put(rq);
2605 			goto err_wedged;
2606 		}
2607 		i915_request_put(rq);
2608 
2609 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2610 			err = -EIO;
2611 			goto err_wedged;
2612 		}
2613 
2614 		for_each_prime_number_from(count, 1, ring_size) {
2615 			rq = spinner_create_request(&hi.spin,
2616 						    hi.ctx, engine,
2617 						    MI_ARB_CHECK);
2618 			if (IS_ERR(rq))
2619 				goto err_wedged;
2620 			i915_request_add(rq);
2621 			if (!igt_wait_for_spinner(&hi.spin, rq))
2622 				goto err_wedged;
2623 
2624 			rq = spinner_create_request(&lo.spin,
2625 						    lo.ctx, engine,
2626 						    MI_ARB_CHECK);
2627 			if (IS_ERR(rq))
2628 				goto err_wedged;
2629 			i915_request_add(rq);
2630 
2631 			for (i = 0; i < count; i++) {
2632 				rq = igt_request_alloc(lo.ctx, engine);
2633 				if (IS_ERR(rq))
2634 					goto err_wedged;
2635 				i915_request_add(rq);
2636 			}
2637 
2638 			rq = igt_request_alloc(hi.ctx, engine);
2639 			if (IS_ERR(rq))
2640 				goto err_wedged;
2641 
2642 			i915_request_get(rq);
2643 			i915_request_add(rq);
2644 			engine->sched_engine->schedule(rq, &attr);
2645 
2646 			igt_spinner_end(&hi.spin);
2647 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2648 				struct drm_printer p =
2649 					drm_info_printer(gt->i915->drm.dev);
2650 
2651 				pr_err("Failed to preempt over chain of %d\n",
2652 				       count);
2653 				intel_engine_dump(engine, &p,
2654 						  "%s\n", engine->name);
2655 				i915_request_put(rq);
2656 				goto err_wedged;
2657 			}
2658 			igt_spinner_end(&lo.spin);
2659 			i915_request_put(rq);
2660 
2661 			rq = igt_request_alloc(lo.ctx, engine);
2662 			if (IS_ERR(rq))
2663 				goto err_wedged;
2664 
2665 			i915_request_get(rq);
2666 			i915_request_add(rq);
2667 
2668 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2669 				struct drm_printer p =
2670 					drm_info_printer(gt->i915->drm.dev);
2671 
2672 				pr_err("Failed to flush low priority chain of %d requests\n",
2673 				       count);
2674 				intel_engine_dump(engine, &p,
2675 						  "%s\n", engine->name);
2676 
2677 				i915_request_put(rq);
2678 				goto err_wedged;
2679 			}
2680 			i915_request_put(rq);
2681 		}
2682 
2683 		if (igt_live_test_end(&t)) {
2684 			err = -EIO;
2685 			goto err_wedged;
2686 		}
2687 	}
2688 
2689 	err = 0;
2690 err_client_lo:
2691 	preempt_client_fini(&lo);
2692 err_client_hi:
2693 	preempt_client_fini(&hi);
2694 	return err;
2695 
2696 err_wedged:
2697 	igt_spinner_end(&hi.spin);
2698 	igt_spinner_end(&lo.spin);
2699 	intel_gt_set_wedged(gt);
2700 	err = -EIO;
2701 	goto err_client_lo;
2702 }
2703 
create_gang(struct intel_engine_cs * engine,struct i915_request ** prev)2704 static int create_gang(struct intel_engine_cs *engine,
2705 		       struct i915_request **prev)
2706 {
2707 	struct drm_i915_gem_object *obj;
2708 	struct intel_context *ce;
2709 	struct i915_request *rq;
2710 	struct i915_vma *vma;
2711 	u32 *cs;
2712 	int err;
2713 
2714 	ce = intel_context_create(engine);
2715 	if (IS_ERR(ce))
2716 		return PTR_ERR(ce);
2717 
2718 	obj = i915_gem_object_create_internal(engine->i915, 4096);
2719 	if (IS_ERR(obj)) {
2720 		err = PTR_ERR(obj);
2721 		goto err_ce;
2722 	}
2723 
2724 	vma = i915_vma_instance(obj, ce->vm, NULL);
2725 	if (IS_ERR(vma)) {
2726 		err = PTR_ERR(vma);
2727 		goto err_obj;
2728 	}
2729 
2730 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
2731 	if (err)
2732 		goto err_obj;
2733 
2734 	cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
2735 	if (IS_ERR(cs)) {
2736 		err = PTR_ERR(cs);
2737 		goto err_obj;
2738 	}
2739 
2740 	/* Semaphore target: spin until zero */
2741 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2742 
2743 	*cs++ = MI_SEMAPHORE_WAIT |
2744 		MI_SEMAPHORE_POLL |
2745 		MI_SEMAPHORE_SAD_EQ_SDD;
2746 	*cs++ = 0;
2747 	*cs++ = lower_32_bits(i915_vma_offset(vma));
2748 	*cs++ = upper_32_bits(i915_vma_offset(vma));
2749 
2750 	if (*prev) {
2751 		u64 offset = i915_vma_offset((*prev)->batch);
2752 
2753 		/* Terminate the spinner in the next lower priority batch. */
2754 		*cs++ = MI_STORE_DWORD_IMM_GEN4;
2755 		*cs++ = lower_32_bits(offset);
2756 		*cs++ = upper_32_bits(offset);
2757 		*cs++ = 0;
2758 	}
2759 
2760 	*cs++ = MI_BATCH_BUFFER_END;
2761 	i915_gem_object_flush_map(obj);
2762 	i915_gem_object_unpin_map(obj);
2763 
2764 	rq = intel_context_create_request(ce);
2765 	if (IS_ERR(rq)) {
2766 		err = PTR_ERR(rq);
2767 		goto err_obj;
2768 	}
2769 
2770 	rq->batch = i915_vma_get(vma);
2771 	i915_request_get(rq);
2772 
2773 	err = igt_vma_move_to_active_unlocked(vma, rq, 0);
2774 	if (!err)
2775 		err = rq->engine->emit_bb_start(rq,
2776 						i915_vma_offset(vma),
2777 						PAGE_SIZE, 0);
2778 	i915_request_add(rq);
2779 	if (err)
2780 		goto err_rq;
2781 
2782 	i915_gem_object_put(obj);
2783 	intel_context_put(ce);
2784 
2785 	rq->mock.link.next = &(*prev)->mock.link;
2786 	*prev = rq;
2787 	return 0;
2788 
2789 err_rq:
2790 	i915_vma_put(rq->batch);
2791 	i915_request_put(rq);
2792 err_obj:
2793 	i915_gem_object_put(obj);
2794 err_ce:
2795 	intel_context_put(ce);
2796 	return err;
2797 }
2798 
__live_preempt_ring(struct intel_engine_cs * engine,struct igt_spinner * spin,int queue_sz,int ring_sz)2799 static int __live_preempt_ring(struct intel_engine_cs *engine,
2800 			       struct igt_spinner *spin,
2801 			       int queue_sz, int ring_sz)
2802 {
2803 	struct intel_context *ce[2] = {};
2804 	struct i915_request *rq;
2805 	struct igt_live_test t;
2806 	int err = 0;
2807 	int n;
2808 
2809 	if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2810 		return -EIO;
2811 
2812 	for (n = 0; n < ARRAY_SIZE(ce); n++) {
2813 		struct intel_context *tmp;
2814 
2815 		tmp = intel_context_create(engine);
2816 		if (IS_ERR(tmp)) {
2817 			err = PTR_ERR(tmp);
2818 			goto err_ce;
2819 		}
2820 
2821 		tmp->ring_size = ring_sz;
2822 
2823 		err = intel_context_pin(tmp);
2824 		if (err) {
2825 			intel_context_put(tmp);
2826 			goto err_ce;
2827 		}
2828 
2829 		memset32(tmp->ring->vaddr,
2830 			 0xdeadbeef, /* trigger a hang if executed */
2831 			 tmp->ring->vma->size / sizeof(u32));
2832 
2833 		ce[n] = tmp;
2834 	}
2835 
2836 	rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2837 	if (IS_ERR(rq)) {
2838 		err = PTR_ERR(rq);
2839 		goto err_ce;
2840 	}
2841 
2842 	i915_request_get(rq);
2843 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2844 	i915_request_add(rq);
2845 
2846 	if (!igt_wait_for_spinner(spin, rq)) {
2847 		intel_gt_set_wedged(engine->gt);
2848 		i915_request_put(rq);
2849 		err = -ETIME;
2850 		goto err_ce;
2851 	}
2852 
2853 	/* Fill the ring, until we will cause a wrap */
2854 	n = 0;
2855 	while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2856 		struct i915_request *tmp;
2857 
2858 		tmp = intel_context_create_request(ce[0]);
2859 		if (IS_ERR(tmp)) {
2860 			err = PTR_ERR(tmp);
2861 			i915_request_put(rq);
2862 			goto err_ce;
2863 		}
2864 
2865 		i915_request_add(tmp);
2866 		intel_engine_flush_submission(engine);
2867 		n++;
2868 	}
2869 	intel_engine_flush_submission(engine);
2870 	pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2871 		 engine->name, queue_sz, n,
2872 		 ce[0]->ring->size,
2873 		 ce[0]->ring->tail,
2874 		 ce[0]->ring->emit,
2875 		 rq->tail);
2876 	i915_request_put(rq);
2877 
2878 	/* Create a second request to preempt the first ring */
2879 	rq = intel_context_create_request(ce[1]);
2880 	if (IS_ERR(rq)) {
2881 		err = PTR_ERR(rq);
2882 		goto err_ce;
2883 	}
2884 
2885 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2886 	i915_request_get(rq);
2887 	i915_request_add(rq);
2888 
2889 	err = wait_for_submit(engine, rq, HZ / 2);
2890 	i915_request_put(rq);
2891 	if (err) {
2892 		pr_err("%s: preemption request was not submitted\n",
2893 		       engine->name);
2894 		err = -ETIME;
2895 	}
2896 
2897 	pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2898 		 engine->name,
2899 		 ce[0]->ring->tail, ce[0]->ring->emit,
2900 		 ce[1]->ring->tail, ce[1]->ring->emit);
2901 
2902 err_ce:
2903 	intel_engine_flush_submission(engine);
2904 	igt_spinner_end(spin);
2905 	for (n = 0; n < ARRAY_SIZE(ce); n++) {
2906 		if (IS_ERR_OR_NULL(ce[n]))
2907 			break;
2908 
2909 		intel_context_unpin(ce[n]);
2910 		intel_context_put(ce[n]);
2911 	}
2912 	if (igt_live_test_end(&t))
2913 		err = -EIO;
2914 	return err;
2915 }
2916 
live_preempt_ring(void * arg)2917 static int live_preempt_ring(void *arg)
2918 {
2919 	struct intel_gt *gt = arg;
2920 	struct intel_engine_cs *engine;
2921 	struct igt_spinner spin;
2922 	enum intel_engine_id id;
2923 	int err = 0;
2924 
2925 	/*
2926 	 * Check that we rollback large chunks of a ring in order to do a
2927 	 * preemption event. Similar to live_unlite_ring, but looking at
2928 	 * ring size rather than the impact of intel_ring_direction().
2929 	 */
2930 
2931 	if (igt_spinner_init(&spin, gt))
2932 		return -ENOMEM;
2933 
2934 	for_each_engine(engine, gt, id) {
2935 		int n;
2936 
2937 		if (!intel_engine_has_preemption(engine))
2938 			continue;
2939 
2940 		if (!intel_engine_can_store_dword(engine))
2941 			continue;
2942 
2943 		st_engine_heartbeat_disable(engine);
2944 
2945 		for (n = 0; n <= 3; n++) {
2946 			err = __live_preempt_ring(engine, &spin,
2947 						  n * SZ_4K / 4, SZ_4K);
2948 			if (err)
2949 				break;
2950 		}
2951 
2952 		st_engine_heartbeat_enable(engine);
2953 		if (err)
2954 			break;
2955 	}
2956 
2957 	igt_spinner_fini(&spin);
2958 	return err;
2959 }
2960 
live_preempt_gang(void * arg)2961 static int live_preempt_gang(void *arg)
2962 {
2963 	struct intel_gt *gt = arg;
2964 	struct intel_engine_cs *engine;
2965 	enum intel_engine_id id;
2966 
2967 	/*
2968 	 * Build as long a chain of preempters as we can, with each
2969 	 * request higher priority than the last. Once we are ready, we release
2970 	 * the last batch which then precolates down the chain, each releasing
2971 	 * the next oldest in turn. The intent is to simply push as hard as we
2972 	 * can with the number of preemptions, trying to exceed narrow HW
2973 	 * limits. At a minimum, we insist that we can sort all the user
2974 	 * high priority levels into execution order.
2975 	 */
2976 
2977 	for_each_engine(engine, gt, id) {
2978 		struct i915_request *rq = NULL;
2979 		struct igt_live_test t;
2980 		IGT_TIMEOUT(end_time);
2981 		int prio = 0;
2982 		int err = 0;
2983 		u32 *cs;
2984 
2985 		if (!intel_engine_has_preemption(engine))
2986 			continue;
2987 
2988 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2989 			return -EIO;
2990 
2991 		do {
2992 			struct i915_sched_attr attr = { .priority = prio++ };
2993 
2994 			err = create_gang(engine, &rq);
2995 			if (err)
2996 				break;
2997 
2998 			/* Submit each spinner at increasing priority */
2999 			engine->sched_engine->schedule(rq, &attr);
3000 		} while (prio <= I915_PRIORITY_MAX &&
3001 			 !__igt_timeout(end_time, NULL));
3002 		pr_debug("%s: Preempt chain of %d requests\n",
3003 			 engine->name, prio);
3004 
3005 		/*
3006 		 * Such that the last spinner is the highest priority and
3007 		 * should execute first. When that spinner completes,
3008 		 * it will terminate the next lowest spinner until there
3009 		 * are no more spinners and the gang is complete.
3010 		 */
3011 		cs = i915_gem_object_pin_map_unlocked(rq->batch->obj, I915_MAP_WC);
3012 		if (!IS_ERR(cs)) {
3013 			*cs = 0;
3014 			i915_gem_object_unpin_map(rq->batch->obj);
3015 		} else {
3016 			err = PTR_ERR(cs);
3017 			intel_gt_set_wedged(gt);
3018 		}
3019 
3020 		while (rq) { /* wait for each rq from highest to lowest prio */
3021 			struct i915_request *n = list_next_entry(rq, mock.link);
3022 
3023 			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
3024 				struct drm_printer p =
3025 					drm_info_printer(engine->i915->drm.dev);
3026 
3027 				pr_err("Failed to flush chain of %d requests, at %d\n",
3028 				       prio, rq_prio(rq));
3029 				intel_engine_dump(engine, &p,
3030 						  "%s\n", engine->name);
3031 
3032 				err = -ETIME;
3033 			}
3034 
3035 			i915_vma_put(rq->batch);
3036 			i915_request_put(rq);
3037 			rq = n;
3038 		}
3039 
3040 		if (igt_live_test_end(&t))
3041 			err = -EIO;
3042 		if (err)
3043 			return err;
3044 	}
3045 
3046 	return 0;
3047 }
3048 
3049 static struct i915_vma *
create_gpr_user(struct intel_engine_cs * engine,struct i915_vma * result,unsigned int offset)3050 create_gpr_user(struct intel_engine_cs *engine,
3051 		struct i915_vma *result,
3052 		unsigned int offset)
3053 {
3054 	struct drm_i915_gem_object *obj;
3055 	struct i915_vma *vma;
3056 	u32 *cs;
3057 	int err;
3058 	int i;
3059 
3060 	obj = i915_gem_object_create_internal(engine->i915, 4096);
3061 	if (IS_ERR(obj))
3062 		return ERR_CAST(obj);
3063 
3064 	vma = i915_vma_instance(obj, result->vm, NULL);
3065 	if (IS_ERR(vma)) {
3066 		i915_gem_object_put(obj);
3067 		return vma;
3068 	}
3069 
3070 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3071 	if (err) {
3072 		i915_vma_put(vma);
3073 		return ERR_PTR(err);
3074 	}
3075 
3076 	cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
3077 	if (IS_ERR(cs)) {
3078 		i915_vma_put(vma);
3079 		return ERR_CAST(cs);
3080 	}
3081 
3082 	/* All GPR are clear for new contexts. We use GPR(0) as a constant */
3083 	*cs++ = MI_LOAD_REGISTER_IMM(1);
3084 	*cs++ = CS_GPR(engine, 0);
3085 	*cs++ = 1;
3086 
3087 	for (i = 1; i < NUM_GPR; i++) {
3088 		u64 addr;
3089 
3090 		/*
3091 		 * Perform: GPR[i]++
3092 		 *
3093 		 * As we read and write into the context saved GPR[i], if
3094 		 * we restart this batch buffer from an earlier point, we
3095 		 * will repeat the increment and store a value > 1.
3096 		 */
3097 		*cs++ = MI_MATH(4);
3098 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3099 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3100 		*cs++ = MI_MATH_ADD;
3101 		*cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3102 
3103 		addr = i915_vma_offset(result) + offset + i * sizeof(*cs);
3104 		*cs++ = MI_STORE_REGISTER_MEM_GEN8;
3105 		*cs++ = CS_GPR(engine, 2 * i);
3106 		*cs++ = lower_32_bits(addr);
3107 		*cs++ = upper_32_bits(addr);
3108 
3109 		*cs++ = MI_SEMAPHORE_WAIT |
3110 			MI_SEMAPHORE_POLL |
3111 			MI_SEMAPHORE_SAD_GTE_SDD;
3112 		*cs++ = i;
3113 		*cs++ = lower_32_bits(i915_vma_offset(result));
3114 		*cs++ = upper_32_bits(i915_vma_offset(result));
3115 	}
3116 
3117 	*cs++ = MI_BATCH_BUFFER_END;
3118 	i915_gem_object_flush_map(obj);
3119 	i915_gem_object_unpin_map(obj);
3120 
3121 	return vma;
3122 }
3123 
create_global(struct intel_gt * gt,size_t sz)3124 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3125 {
3126 	struct drm_i915_gem_object *obj;
3127 	struct i915_vma *vma;
3128 	int err;
3129 
3130 	obj = i915_gem_object_create_internal(gt->i915, sz);
3131 	if (IS_ERR(obj))
3132 		return ERR_CAST(obj);
3133 
3134 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
3135 	if (IS_ERR(vma)) {
3136 		i915_gem_object_put(obj);
3137 		return vma;
3138 	}
3139 
3140 	err = i915_ggtt_pin(vma, NULL, 0, 0);
3141 	if (err) {
3142 		i915_vma_put(vma);
3143 		return ERR_PTR(err);
3144 	}
3145 
3146 	return vma;
3147 }
3148 
3149 static struct i915_request *
create_gpr_client(struct intel_engine_cs * engine,struct i915_vma * global,unsigned int offset)3150 create_gpr_client(struct intel_engine_cs *engine,
3151 		  struct i915_vma *global,
3152 		  unsigned int offset)
3153 {
3154 	struct i915_vma *batch, *vma;
3155 	struct intel_context *ce;
3156 	struct i915_request *rq;
3157 	int err;
3158 
3159 	ce = intel_context_create(engine);
3160 	if (IS_ERR(ce))
3161 		return ERR_CAST(ce);
3162 
3163 	vma = i915_vma_instance(global->obj, ce->vm, NULL);
3164 	if (IS_ERR(vma)) {
3165 		err = PTR_ERR(vma);
3166 		goto out_ce;
3167 	}
3168 
3169 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3170 	if (err)
3171 		goto out_ce;
3172 
3173 	batch = create_gpr_user(engine, vma, offset);
3174 	if (IS_ERR(batch)) {
3175 		err = PTR_ERR(batch);
3176 		goto out_vma;
3177 	}
3178 
3179 	rq = intel_context_create_request(ce);
3180 	if (IS_ERR(rq)) {
3181 		err = PTR_ERR(rq);
3182 		goto out_batch;
3183 	}
3184 
3185 	err = igt_vma_move_to_active_unlocked(vma, rq, 0);
3186 
3187 	i915_vma_lock(batch);
3188 	if (!err)
3189 		err = i915_vma_move_to_active(batch, rq, 0);
3190 	if (!err)
3191 		err = rq->engine->emit_bb_start(rq,
3192 						i915_vma_offset(batch),
3193 						PAGE_SIZE, 0);
3194 	i915_vma_unlock(batch);
3195 	i915_vma_unpin(batch);
3196 
3197 	if (!err)
3198 		i915_request_get(rq);
3199 	i915_request_add(rq);
3200 
3201 out_batch:
3202 	i915_vma_put(batch);
3203 out_vma:
3204 	i915_vma_unpin(vma);
3205 out_ce:
3206 	intel_context_put(ce);
3207 	return err ? ERR_PTR(err) : rq;
3208 }
3209 
preempt_user(struct intel_engine_cs * engine,struct i915_vma * global,int id)3210 static int preempt_user(struct intel_engine_cs *engine,
3211 			struct i915_vma *global,
3212 			int id)
3213 {
3214 	struct i915_sched_attr attr = {
3215 		.priority = I915_PRIORITY_MAX
3216 	};
3217 	struct i915_request *rq;
3218 	int err = 0;
3219 	u32 *cs;
3220 
3221 	rq = intel_engine_create_kernel_request(engine);
3222 	if (IS_ERR(rq))
3223 		return PTR_ERR(rq);
3224 
3225 	cs = intel_ring_begin(rq, 4);
3226 	if (IS_ERR(cs)) {
3227 		i915_request_add(rq);
3228 		return PTR_ERR(cs);
3229 	}
3230 
3231 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3232 	*cs++ = i915_ggtt_offset(global);
3233 	*cs++ = 0;
3234 	*cs++ = id;
3235 
3236 	intel_ring_advance(rq, cs);
3237 
3238 	i915_request_get(rq);
3239 	i915_request_add(rq);
3240 
3241 	engine->sched_engine->schedule(rq, &attr);
3242 
3243 	if (i915_request_wait(rq, 0, HZ / 2) < 0)
3244 		err = -ETIME;
3245 	i915_request_put(rq);
3246 
3247 	return err;
3248 }
3249 
live_preempt_user(void * arg)3250 static int live_preempt_user(void *arg)
3251 {
3252 	struct intel_gt *gt = arg;
3253 	struct intel_engine_cs *engine;
3254 	struct i915_vma *global;
3255 	enum intel_engine_id id;
3256 	u32 *result;
3257 	int err = 0;
3258 
3259 	/*
3260 	 * In our other tests, we look at preemption in carefully
3261 	 * controlled conditions in the ringbuffer. Since most of the
3262 	 * time is spent in user batches, most of our preemptions naturally
3263 	 * occur there. We want to verify that when we preempt inside a batch
3264 	 * we continue on from the current instruction and do not roll back
3265 	 * to the start, or another earlier arbitration point.
3266 	 *
3267 	 * To verify this, we create a batch which is a mixture of
3268 	 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3269 	 * a few preempting contexts thrown into the mix, we look for any
3270 	 * repeated instructions (which show up as incorrect values).
3271 	 */
3272 
3273 	global = create_global(gt, 4096);
3274 	if (IS_ERR(global))
3275 		return PTR_ERR(global);
3276 
3277 	result = i915_gem_object_pin_map_unlocked(global->obj, I915_MAP_WC);
3278 	if (IS_ERR(result)) {
3279 		i915_vma_unpin_and_release(&global, 0);
3280 		return PTR_ERR(result);
3281 	}
3282 
3283 	for_each_engine(engine, gt, id) {
3284 		struct i915_request *client[3] = {};
3285 		struct igt_live_test t;
3286 		int i;
3287 
3288 		if (!intel_engine_has_preemption(engine))
3289 			continue;
3290 
3291 		if (GRAPHICS_VER(gt->i915) == 8 && engine->class != RENDER_CLASS)
3292 			continue; /* we need per-context GPR */
3293 
3294 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3295 			err = -EIO;
3296 			break;
3297 		}
3298 
3299 		memset(result, 0, 4096);
3300 
3301 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3302 			struct i915_request *rq;
3303 
3304 			rq = create_gpr_client(engine, global,
3305 					       NUM_GPR * i * sizeof(u32));
3306 			if (IS_ERR(rq)) {
3307 				err = PTR_ERR(rq);
3308 				goto end_test;
3309 			}
3310 
3311 			client[i] = rq;
3312 		}
3313 
3314 		/* Continuously preempt the set of 3 running contexts */
3315 		for (i = 1; i <= NUM_GPR; i++) {
3316 			err = preempt_user(engine, global, i);
3317 			if (err)
3318 				goto end_test;
3319 		}
3320 
3321 		if (READ_ONCE(result[0]) != NUM_GPR) {
3322 			pr_err("%s: Failed to release semaphore\n",
3323 			       engine->name);
3324 			err = -EIO;
3325 			goto end_test;
3326 		}
3327 
3328 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3329 			int gpr;
3330 
3331 			if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3332 				err = -ETIME;
3333 				goto end_test;
3334 			}
3335 
3336 			for (gpr = 1; gpr < NUM_GPR; gpr++) {
3337 				if (result[NUM_GPR * i + gpr] != 1) {
3338 					pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3339 					       engine->name,
3340 					       i, gpr, result[NUM_GPR * i + gpr]);
3341 					err = -EINVAL;
3342 					goto end_test;
3343 				}
3344 			}
3345 		}
3346 
3347 end_test:
3348 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3349 			if (!client[i])
3350 				break;
3351 
3352 			i915_request_put(client[i]);
3353 		}
3354 
3355 		/* Flush the semaphores on error */
3356 		smp_store_mb(result[0], -1);
3357 		if (igt_live_test_end(&t))
3358 			err = -EIO;
3359 		if (err)
3360 			break;
3361 	}
3362 
3363 	i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3364 	return err;
3365 }
3366 
live_preempt_timeout(void * arg)3367 static int live_preempt_timeout(void *arg)
3368 {
3369 	struct intel_gt *gt = arg;
3370 	struct i915_gem_context *ctx_hi, *ctx_lo;
3371 	struct igt_spinner spin_lo;
3372 	struct intel_engine_cs *engine;
3373 	enum intel_engine_id id;
3374 	int err = -ENOMEM;
3375 
3376 	/*
3377 	 * Check that we force preemption to occur by cancelling the previous
3378 	 * context if it refuses to yield the GPU.
3379 	 */
3380 	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
3381 		return 0;
3382 
3383 	if (!intel_has_reset_engine(gt))
3384 		return 0;
3385 
3386 	ctx_hi = kernel_context(gt->i915, NULL);
3387 	if (!ctx_hi)
3388 		return -ENOMEM;
3389 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
3390 
3391 	ctx_lo = kernel_context(gt->i915, NULL);
3392 	if (!ctx_lo)
3393 		goto err_ctx_hi;
3394 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
3395 
3396 	if (igt_spinner_init(&spin_lo, gt))
3397 		goto err_ctx_lo;
3398 
3399 	for_each_engine(engine, gt, id) {
3400 		unsigned long saved_timeout;
3401 		struct i915_request *rq;
3402 
3403 		if (!intel_engine_has_preemption(engine))
3404 			continue;
3405 
3406 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3407 					    MI_NOOP); /* preemption disabled */
3408 		if (IS_ERR(rq)) {
3409 			err = PTR_ERR(rq);
3410 			goto err_spin_lo;
3411 		}
3412 
3413 		i915_request_add(rq);
3414 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
3415 			intel_gt_set_wedged(gt);
3416 			err = -EIO;
3417 			goto err_spin_lo;
3418 		}
3419 
3420 		rq = igt_request_alloc(ctx_hi, engine);
3421 		if (IS_ERR(rq)) {
3422 			igt_spinner_end(&spin_lo);
3423 			err = PTR_ERR(rq);
3424 			goto err_spin_lo;
3425 		}
3426 
3427 		/* Flush the previous CS ack before changing timeouts */
3428 		while (READ_ONCE(engine->execlists.pending[0]))
3429 			cpu_relax();
3430 
3431 		saved_timeout = engine->props.preempt_timeout_ms;
3432 		engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffy */
3433 
3434 		i915_request_get(rq);
3435 		i915_request_add(rq);
3436 
3437 		intel_engine_flush_submission(engine);
3438 		engine->props.preempt_timeout_ms = saved_timeout;
3439 
3440 		if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3441 			intel_gt_set_wedged(gt);
3442 			i915_request_put(rq);
3443 			err = -ETIME;
3444 			goto err_spin_lo;
3445 		}
3446 
3447 		igt_spinner_end(&spin_lo);
3448 		i915_request_put(rq);
3449 	}
3450 
3451 	err = 0;
3452 err_spin_lo:
3453 	igt_spinner_fini(&spin_lo);
3454 err_ctx_lo:
3455 	kernel_context_close(ctx_lo);
3456 err_ctx_hi:
3457 	kernel_context_close(ctx_hi);
3458 	return err;
3459 }
3460 
random_range(struct rnd_state * rnd,int min,int max)3461 static int random_range(struct rnd_state *rnd, int min, int max)
3462 {
3463 	return i915_prandom_u32_max_state(max - min, rnd) + min;
3464 }
3465 
random_priority(struct rnd_state * rnd)3466 static int random_priority(struct rnd_state *rnd)
3467 {
3468 	return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3469 }
3470 
3471 struct preempt_smoke {
3472 	struct intel_gt *gt;
3473 	struct kthread_work work;
3474 	struct i915_gem_context **contexts;
3475 	struct intel_engine_cs *engine;
3476 	struct drm_i915_gem_object *batch;
3477 	unsigned int ncontext;
3478 	struct rnd_state prng;
3479 	unsigned long count;
3480 	int result;
3481 };
3482 
smoke_context(struct preempt_smoke * smoke)3483 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3484 {
3485 	return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3486 							  &smoke->prng)];
3487 }
3488 
smoke_submit(struct preempt_smoke * smoke,struct i915_gem_context * ctx,int prio,struct drm_i915_gem_object * batch)3489 static int smoke_submit(struct preempt_smoke *smoke,
3490 			struct i915_gem_context *ctx, int prio,
3491 			struct drm_i915_gem_object *batch)
3492 {
3493 	struct i915_request *rq;
3494 	struct i915_vma *vma = NULL;
3495 	int err = 0;
3496 
3497 	if (batch) {
3498 		struct i915_address_space *vm;
3499 
3500 		vm = i915_gem_context_get_eb_vm(ctx);
3501 		vma = i915_vma_instance(batch, vm, NULL);
3502 		i915_vm_put(vm);
3503 		if (IS_ERR(vma))
3504 			return PTR_ERR(vma);
3505 
3506 		err = i915_vma_pin(vma, 0, 0, PIN_USER);
3507 		if (err)
3508 			return err;
3509 	}
3510 
3511 	ctx->sched.priority = prio;
3512 
3513 	rq = igt_request_alloc(ctx, smoke->engine);
3514 	if (IS_ERR(rq)) {
3515 		err = PTR_ERR(rq);
3516 		goto unpin;
3517 	}
3518 
3519 	if (vma) {
3520 		err = igt_vma_move_to_active_unlocked(vma, rq, 0);
3521 		if (!err)
3522 			err = rq->engine->emit_bb_start(rq,
3523 							i915_vma_offset(vma),
3524 							PAGE_SIZE, 0);
3525 	}
3526 
3527 	i915_request_add(rq);
3528 
3529 unpin:
3530 	if (vma)
3531 		i915_vma_unpin(vma);
3532 
3533 	return err;
3534 }
3535 
smoke_crescendo_work(struct kthread_work * work)3536 static void smoke_crescendo_work(struct kthread_work *work)
3537 {
3538 	struct preempt_smoke *smoke = container_of(work, typeof(*smoke), work);
3539 	IGT_TIMEOUT(end_time);
3540 	unsigned long count;
3541 
3542 	count = 0;
3543 	do {
3544 		struct i915_gem_context *ctx = smoke_context(smoke);
3545 
3546 		smoke->result = smoke_submit(smoke, ctx,
3547 					     count % I915_PRIORITY_MAX,
3548 					     smoke->batch);
3549 
3550 		count++;
3551 	} while (!smoke->result && count < smoke->ncontext &&
3552 		 !__igt_timeout(end_time, NULL));
3553 
3554 	smoke->count = count;
3555 }
3556 
smoke_crescendo(struct preempt_smoke * smoke,unsigned int flags)3557 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3558 #define BATCH BIT(0)
3559 {
3560 	struct kthread_worker *worker[I915_NUM_ENGINES] = {};
3561 	struct preempt_smoke *arg;
3562 	struct intel_engine_cs *engine;
3563 	enum intel_engine_id id;
3564 	unsigned long count;
3565 	int err = 0;
3566 
3567 	arg = kmalloc_objs(*arg, I915_NUM_ENGINES);
3568 	if (!arg)
3569 		return -ENOMEM;
3570 
3571 	memset(arg, 0, I915_NUM_ENGINES * sizeof(*arg));
3572 
3573 	for_each_engine(engine, smoke->gt, id) {
3574 		arg[id] = *smoke;
3575 		arg[id].engine = engine;
3576 		if (!(flags & BATCH))
3577 			arg[id].batch = NULL;
3578 		arg[id].count = 0;
3579 
3580 		worker[id] = kthread_run_worker(0, "igt/smoke:%d", id);
3581 		if (IS_ERR(worker[id])) {
3582 			err = PTR_ERR(worker[id]);
3583 			break;
3584 		}
3585 
3586 		kthread_init_work(&arg[id].work, smoke_crescendo_work);
3587 		kthread_queue_work(worker[id], &arg[id].work);
3588 	}
3589 
3590 	count = 0;
3591 	for_each_engine(engine, smoke->gt, id) {
3592 		if (IS_ERR_OR_NULL(worker[id]))
3593 			continue;
3594 
3595 		kthread_flush_work(&arg[id].work);
3596 		if (arg[id].result && !err)
3597 			err = arg[id].result;
3598 
3599 		count += arg[id].count;
3600 
3601 		kthread_destroy_worker(worker[id]);
3602 	}
3603 
3604 	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3605 		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3606 
3607 	kfree(arg);
3608 	return 0;
3609 }
3610 
smoke_random(struct preempt_smoke * smoke,unsigned int flags)3611 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3612 {
3613 	enum intel_engine_id id;
3614 	IGT_TIMEOUT(end_time);
3615 	unsigned long count;
3616 
3617 	count = 0;
3618 	do {
3619 		for_each_engine(smoke->engine, smoke->gt, id) {
3620 			struct i915_gem_context *ctx = smoke_context(smoke);
3621 			int err;
3622 
3623 			err = smoke_submit(smoke,
3624 					   ctx, random_priority(&smoke->prng),
3625 					   flags & BATCH ? smoke->batch : NULL);
3626 			if (err)
3627 				return err;
3628 
3629 			count++;
3630 		}
3631 	} while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3632 
3633 	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3634 		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3635 	return 0;
3636 }
3637 
live_preempt_smoke(void * arg)3638 static int live_preempt_smoke(void *arg)
3639 {
3640 	struct preempt_smoke smoke = {
3641 		.gt = arg,
3642 		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3643 		.ncontext = 256,
3644 	};
3645 	const unsigned int phase[] = { 0, BATCH };
3646 	struct igt_live_test t;
3647 	int err = -ENOMEM;
3648 	u32 *cs;
3649 	int n;
3650 
3651 	smoke.contexts = kmalloc_objs(*smoke.contexts, smoke.ncontext);
3652 	if (!smoke.contexts)
3653 		return -ENOMEM;
3654 
3655 	smoke.batch =
3656 		i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3657 	if (IS_ERR(smoke.batch)) {
3658 		err = PTR_ERR(smoke.batch);
3659 		goto err_free;
3660 	}
3661 
3662 	cs = i915_gem_object_pin_map_unlocked(smoke.batch, I915_MAP_WB);
3663 	if (IS_ERR(cs)) {
3664 		err = PTR_ERR(cs);
3665 		goto err_batch;
3666 	}
3667 	for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3668 		cs[n] = MI_ARB_CHECK;
3669 	cs[n] = MI_BATCH_BUFFER_END;
3670 	i915_gem_object_flush_map(smoke.batch);
3671 	i915_gem_object_unpin_map(smoke.batch);
3672 
3673 	if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3674 		err = -EIO;
3675 		goto err_batch;
3676 	}
3677 
3678 	for (n = 0; n < smoke.ncontext; n++) {
3679 		smoke.contexts[n] = kernel_context(smoke.gt->i915, NULL);
3680 		if (!smoke.contexts[n])
3681 			goto err_ctx;
3682 	}
3683 
3684 	for (n = 0; n < ARRAY_SIZE(phase); n++) {
3685 		err = smoke_crescendo(&smoke, phase[n]);
3686 		if (err)
3687 			goto err_ctx;
3688 
3689 		err = smoke_random(&smoke, phase[n]);
3690 		if (err)
3691 			goto err_ctx;
3692 	}
3693 
3694 err_ctx:
3695 	if (igt_live_test_end(&t))
3696 		err = -EIO;
3697 
3698 	for (n = 0; n < smoke.ncontext; n++) {
3699 		if (!smoke.contexts[n])
3700 			break;
3701 		kernel_context_close(smoke.contexts[n]);
3702 	}
3703 
3704 err_batch:
3705 	i915_gem_object_put(smoke.batch);
3706 err_free:
3707 	kfree(smoke.contexts);
3708 
3709 	return err;
3710 }
3711 
nop_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling,unsigned int nctx,unsigned int flags)3712 static int nop_virtual_engine(struct intel_gt *gt,
3713 			      struct intel_engine_cs **siblings,
3714 			      unsigned int nsibling,
3715 			      unsigned int nctx,
3716 			      unsigned int flags)
3717 #define CHAIN BIT(0)
3718 {
3719 	IGT_TIMEOUT(end_time);
3720 	struct i915_request *request[16] = {};
3721 	struct intel_context *ve[16];
3722 	unsigned long n, prime, nc;
3723 	struct igt_live_test t;
3724 	ktime_t times[2] = {};
3725 	int err;
3726 
3727 	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3728 
3729 	for (n = 0; n < nctx; n++) {
3730 		ve[n] = intel_engine_create_virtual(siblings, nsibling, 0);
3731 		if (IS_ERR(ve[n])) {
3732 			err = PTR_ERR(ve[n]);
3733 			nctx = n;
3734 			goto out;
3735 		}
3736 
3737 		err = intel_context_pin(ve[n]);
3738 		if (err) {
3739 			intel_context_put(ve[n]);
3740 			nctx = n;
3741 			goto out;
3742 		}
3743 	}
3744 
3745 	err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3746 	if (err)
3747 		goto out;
3748 
3749 	for_each_prime_number_from(prime, 1, 8192) {
3750 		times[1] = ktime_get_raw();
3751 
3752 		if (flags & CHAIN) {
3753 			for (nc = 0; nc < nctx; nc++) {
3754 				for (n = 0; n < prime; n++) {
3755 					struct i915_request *rq;
3756 
3757 					rq = i915_request_create(ve[nc]);
3758 					if (IS_ERR(rq)) {
3759 						err = PTR_ERR(rq);
3760 						goto out;
3761 					}
3762 
3763 					if (request[nc])
3764 						i915_request_put(request[nc]);
3765 					request[nc] = i915_request_get(rq);
3766 					i915_request_add(rq);
3767 				}
3768 			}
3769 		} else {
3770 			for (n = 0; n < prime; n++) {
3771 				for (nc = 0; nc < nctx; nc++) {
3772 					struct i915_request *rq;
3773 
3774 					rq = i915_request_create(ve[nc]);
3775 					if (IS_ERR(rq)) {
3776 						err = PTR_ERR(rq);
3777 						goto out;
3778 					}
3779 
3780 					if (request[nc])
3781 						i915_request_put(request[nc]);
3782 					request[nc] = i915_request_get(rq);
3783 					i915_request_add(rq);
3784 				}
3785 			}
3786 		}
3787 
3788 		for (nc = 0; nc < nctx; nc++) {
3789 			if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3790 				pr_err("%s(%s): wait for %llx:%lld timed out\n",
3791 				       __func__, ve[0]->engine->name,
3792 				       request[nc]->fence.context,
3793 				       request[nc]->fence.seqno);
3794 
3795 				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3796 					  __func__, ve[0]->engine->name,
3797 					  request[nc]->fence.context,
3798 					  request[nc]->fence.seqno);
3799 				GEM_TRACE_DUMP();
3800 				intel_gt_set_wedged(gt);
3801 				break;
3802 			}
3803 		}
3804 
3805 		times[1] = ktime_sub(ktime_get_raw(), times[1]);
3806 		if (prime == 1)
3807 			times[0] = times[1];
3808 
3809 		for (nc = 0; nc < nctx; nc++) {
3810 			i915_request_put(request[nc]);
3811 			request[nc] = NULL;
3812 		}
3813 
3814 		if (__igt_timeout(end_time, NULL))
3815 			break;
3816 	}
3817 
3818 	err = igt_live_test_end(&t);
3819 	if (err)
3820 		goto out;
3821 
3822 	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3823 		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3824 		prime, div64_u64(ktime_to_ns(times[1]), prime));
3825 
3826 out:
3827 	if (igt_flush_test(gt->i915))
3828 		err = -EIO;
3829 
3830 	for (nc = 0; nc < nctx; nc++) {
3831 		i915_request_put(request[nc]);
3832 		intel_context_unpin(ve[nc]);
3833 		intel_context_put(ve[nc]);
3834 	}
3835 	return err;
3836 }
3837 
3838 static unsigned int
__select_siblings(struct intel_gt * gt,unsigned int class,struct intel_engine_cs ** siblings,bool (* filter)(const struct intel_engine_cs *))3839 __select_siblings(struct intel_gt *gt,
3840 		  unsigned int class,
3841 		  struct intel_engine_cs **siblings,
3842 		  bool (*filter)(const struct intel_engine_cs *))
3843 {
3844 	unsigned int n = 0;
3845 	unsigned int inst;
3846 
3847 	for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3848 		if (!gt->engine_class[class][inst])
3849 			continue;
3850 
3851 		if (filter && !filter(gt->engine_class[class][inst]))
3852 			continue;
3853 
3854 		siblings[n++] = gt->engine_class[class][inst];
3855 	}
3856 
3857 	return n;
3858 }
3859 
3860 static unsigned int
select_siblings(struct intel_gt * gt,unsigned int class,struct intel_engine_cs ** siblings)3861 select_siblings(struct intel_gt *gt,
3862 		unsigned int class,
3863 		struct intel_engine_cs **siblings)
3864 {
3865 	return __select_siblings(gt, class, siblings, NULL);
3866 }
3867 
live_virtual_engine(void * arg)3868 static int live_virtual_engine(void *arg)
3869 {
3870 	struct intel_gt *gt = arg;
3871 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3872 	struct intel_engine_cs *engine;
3873 	enum intel_engine_id id;
3874 	unsigned int class;
3875 	int err;
3876 
3877 	if (intel_uc_uses_guc_submission(&gt->uc))
3878 		return 0;
3879 
3880 	for_each_engine(engine, gt, id) {
3881 		err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3882 		if (err) {
3883 			pr_err("Failed to wrap engine %s: err=%d\n",
3884 			       engine->name, err);
3885 			return err;
3886 		}
3887 	}
3888 
3889 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3890 		int nsibling, n;
3891 
3892 		nsibling = select_siblings(gt, class, siblings);
3893 		if (nsibling < 2)
3894 			continue;
3895 
3896 		for (n = 1; n <= nsibling + 1; n++) {
3897 			err = nop_virtual_engine(gt, siblings, nsibling,
3898 						 n, 0);
3899 			if (err)
3900 				return err;
3901 		}
3902 
3903 		err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3904 		if (err)
3905 			return err;
3906 	}
3907 
3908 	return 0;
3909 }
3910 
mask_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)3911 static int mask_virtual_engine(struct intel_gt *gt,
3912 			       struct intel_engine_cs **siblings,
3913 			       unsigned int nsibling)
3914 {
3915 	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3916 	struct intel_context *ve;
3917 	struct igt_live_test t;
3918 	unsigned int n;
3919 	int err;
3920 
3921 	/*
3922 	 * Check that by setting the execution mask on a request, we can
3923 	 * restrict it to our desired engine within the virtual engine.
3924 	 */
3925 
3926 	ve = intel_engine_create_virtual(siblings, nsibling, 0);
3927 	if (IS_ERR(ve)) {
3928 		err = PTR_ERR(ve);
3929 		goto out_close;
3930 	}
3931 
3932 	err = intel_context_pin(ve);
3933 	if (err)
3934 		goto out_put;
3935 
3936 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3937 	if (err)
3938 		goto out_unpin;
3939 
3940 	for (n = 0; n < nsibling; n++) {
3941 		request[n] = i915_request_create(ve);
3942 		if (IS_ERR(request[n])) {
3943 			err = PTR_ERR(request[n]);
3944 			nsibling = n;
3945 			goto out;
3946 		}
3947 
3948 		/* Reverse order as it's more likely to be unnatural */
3949 		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3950 
3951 		i915_request_get(request[n]);
3952 		i915_request_add(request[n]);
3953 	}
3954 
3955 	for (n = 0; n < nsibling; n++) {
3956 		if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3957 			pr_err("%s(%s): wait for %llx:%lld timed out\n",
3958 			       __func__, ve->engine->name,
3959 			       request[n]->fence.context,
3960 			       request[n]->fence.seqno);
3961 
3962 			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3963 				  __func__, ve->engine->name,
3964 				  request[n]->fence.context,
3965 				  request[n]->fence.seqno);
3966 			GEM_TRACE_DUMP();
3967 			intel_gt_set_wedged(gt);
3968 			err = -EIO;
3969 			goto out;
3970 		}
3971 
3972 		if (request[n]->engine != siblings[nsibling - n - 1]) {
3973 			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3974 			       request[n]->engine->name,
3975 			       siblings[nsibling - n - 1]->name);
3976 			err = -EINVAL;
3977 			goto out;
3978 		}
3979 	}
3980 
3981 	err = igt_live_test_end(&t);
3982 out:
3983 	if (igt_flush_test(gt->i915))
3984 		err = -EIO;
3985 
3986 	for (n = 0; n < nsibling; n++)
3987 		i915_request_put(request[n]);
3988 
3989 out_unpin:
3990 	intel_context_unpin(ve);
3991 out_put:
3992 	intel_context_put(ve);
3993 out_close:
3994 	return err;
3995 }
3996 
live_virtual_mask(void * arg)3997 static int live_virtual_mask(void *arg)
3998 {
3999 	struct intel_gt *gt = arg;
4000 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4001 	unsigned int class;
4002 	int err;
4003 
4004 	if (intel_uc_uses_guc_submission(&gt->uc))
4005 		return 0;
4006 
4007 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4008 		unsigned int nsibling;
4009 
4010 		nsibling = select_siblings(gt, class, siblings);
4011 		if (nsibling < 2)
4012 			continue;
4013 
4014 		err = mask_virtual_engine(gt, siblings, nsibling);
4015 		if (err)
4016 			return err;
4017 	}
4018 
4019 	return 0;
4020 }
4021 
slicein_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)4022 static int slicein_virtual_engine(struct intel_gt *gt,
4023 				  struct intel_engine_cs **siblings,
4024 				  unsigned int nsibling)
4025 {
4026 	const long timeout = slice_timeout(siblings[0]);
4027 	struct intel_context *ce;
4028 	struct i915_request *rq;
4029 	struct igt_spinner spin;
4030 	unsigned int n;
4031 	int err = 0;
4032 
4033 	/*
4034 	 * Virtual requests must take part in timeslicing on the target engines.
4035 	 */
4036 
4037 	if (igt_spinner_init(&spin, gt))
4038 		return -ENOMEM;
4039 
4040 	for (n = 0; n < nsibling; n++) {
4041 		ce = intel_context_create(siblings[n]);
4042 		if (IS_ERR(ce)) {
4043 			err = PTR_ERR(ce);
4044 			goto out;
4045 		}
4046 
4047 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4048 		intel_context_put(ce);
4049 		if (IS_ERR(rq)) {
4050 			err = PTR_ERR(rq);
4051 			goto out;
4052 		}
4053 
4054 		i915_request_add(rq);
4055 	}
4056 
4057 	ce = intel_engine_create_virtual(siblings, nsibling, 0);
4058 	if (IS_ERR(ce)) {
4059 		err = PTR_ERR(ce);
4060 		goto out;
4061 	}
4062 
4063 	rq = intel_context_create_request(ce);
4064 	intel_context_put(ce);
4065 	if (IS_ERR(rq)) {
4066 		err = PTR_ERR(rq);
4067 		goto out;
4068 	}
4069 
4070 	i915_request_get(rq);
4071 	i915_request_add(rq);
4072 	if (i915_request_wait(rq, 0, timeout) < 0) {
4073 		GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4074 			      __func__, rq->engine->name);
4075 		GEM_TRACE_DUMP();
4076 		intel_gt_set_wedged(gt);
4077 		err = -EIO;
4078 	}
4079 	i915_request_put(rq);
4080 
4081 out:
4082 	igt_spinner_end(&spin);
4083 	if (igt_flush_test(gt->i915))
4084 		err = -EIO;
4085 	igt_spinner_fini(&spin);
4086 	return err;
4087 }
4088 
sliceout_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)4089 static int sliceout_virtual_engine(struct intel_gt *gt,
4090 				   struct intel_engine_cs **siblings,
4091 				   unsigned int nsibling)
4092 {
4093 	const long timeout = slice_timeout(siblings[0]);
4094 	struct intel_context *ce;
4095 	struct i915_request *rq;
4096 	struct igt_spinner spin;
4097 	unsigned int n;
4098 	int err = 0;
4099 
4100 	/*
4101 	 * Virtual requests must allow others a fair timeslice.
4102 	 */
4103 
4104 	if (igt_spinner_init(&spin, gt))
4105 		return -ENOMEM;
4106 
4107 	/* XXX We do not handle oversubscription and fairness with normal rq */
4108 	for (n = 0; n < nsibling; n++) {
4109 		ce = intel_engine_create_virtual(siblings, nsibling, 0);
4110 		if (IS_ERR(ce)) {
4111 			err = PTR_ERR(ce);
4112 			goto out;
4113 		}
4114 
4115 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4116 		intel_context_put(ce);
4117 		if (IS_ERR(rq)) {
4118 			err = PTR_ERR(rq);
4119 			goto out;
4120 		}
4121 
4122 		i915_request_add(rq);
4123 	}
4124 
4125 	for (n = 0; !err && n < nsibling; n++) {
4126 		ce = intel_context_create(siblings[n]);
4127 		if (IS_ERR(ce)) {
4128 			err = PTR_ERR(ce);
4129 			goto out;
4130 		}
4131 
4132 		rq = intel_context_create_request(ce);
4133 		intel_context_put(ce);
4134 		if (IS_ERR(rq)) {
4135 			err = PTR_ERR(rq);
4136 			goto out;
4137 		}
4138 
4139 		i915_request_get(rq);
4140 		i915_request_add(rq);
4141 		if (i915_request_wait(rq, 0, timeout) < 0) {
4142 			GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4143 				      __func__, siblings[n]->name);
4144 			GEM_TRACE_DUMP();
4145 			intel_gt_set_wedged(gt);
4146 			err = -EIO;
4147 		}
4148 		i915_request_put(rq);
4149 	}
4150 
4151 out:
4152 	igt_spinner_end(&spin);
4153 	if (igt_flush_test(gt->i915))
4154 		err = -EIO;
4155 	igt_spinner_fini(&spin);
4156 	return err;
4157 }
4158 
live_virtual_slice(void * arg)4159 static int live_virtual_slice(void *arg)
4160 {
4161 	struct intel_gt *gt = arg;
4162 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4163 	unsigned int class;
4164 	int err;
4165 
4166 	if (intel_uc_uses_guc_submission(&gt->uc))
4167 		return 0;
4168 
4169 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4170 		unsigned int nsibling;
4171 
4172 		nsibling = __select_siblings(gt, class, siblings,
4173 					     intel_engine_has_timeslices);
4174 		if (nsibling < 2)
4175 			continue;
4176 
4177 		err = slicein_virtual_engine(gt, siblings, nsibling);
4178 		if (err)
4179 			return err;
4180 
4181 		err = sliceout_virtual_engine(gt, siblings, nsibling);
4182 		if (err)
4183 			return err;
4184 	}
4185 
4186 	return 0;
4187 }
4188 
preserved_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)4189 static int preserved_virtual_engine(struct intel_gt *gt,
4190 				    struct intel_engine_cs **siblings,
4191 				    unsigned int nsibling)
4192 {
4193 	struct i915_request *last = NULL;
4194 	struct intel_context *ve;
4195 	struct i915_vma *scratch;
4196 	struct igt_live_test t;
4197 	unsigned int n;
4198 	int err = 0;
4199 	u32 *cs;
4200 
4201 	scratch =
4202 		__vm_create_scratch_for_read_pinned(&siblings[0]->gt->ggtt->vm,
4203 						    PAGE_SIZE);
4204 	if (IS_ERR(scratch))
4205 		return PTR_ERR(scratch);
4206 
4207 	err = i915_vma_sync(scratch);
4208 	if (err)
4209 		goto out_scratch;
4210 
4211 	ve = intel_engine_create_virtual(siblings, nsibling, 0);
4212 	if (IS_ERR(ve)) {
4213 		err = PTR_ERR(ve);
4214 		goto out_scratch;
4215 	}
4216 
4217 	err = intel_context_pin(ve);
4218 	if (err)
4219 		goto out_put;
4220 
4221 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4222 	if (err)
4223 		goto out_unpin;
4224 
4225 	for (n = 0; n < NUM_GPR_DW; n++) {
4226 		struct intel_engine_cs *engine = siblings[n % nsibling];
4227 		struct i915_request *rq;
4228 
4229 		rq = i915_request_create(ve);
4230 		if (IS_ERR(rq)) {
4231 			err = PTR_ERR(rq);
4232 			goto out_end;
4233 		}
4234 
4235 		i915_request_put(last);
4236 		last = i915_request_get(rq);
4237 
4238 		cs = intel_ring_begin(rq, 8);
4239 		if (IS_ERR(cs)) {
4240 			i915_request_add(rq);
4241 			err = PTR_ERR(cs);
4242 			goto out_end;
4243 		}
4244 
4245 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4246 		*cs++ = CS_GPR(engine, n);
4247 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4248 		*cs++ = 0;
4249 
4250 		*cs++ = MI_LOAD_REGISTER_IMM(1);
4251 		*cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4252 		*cs++ = n + 1;
4253 
4254 		*cs++ = MI_NOOP;
4255 		intel_ring_advance(rq, cs);
4256 
4257 		/* Restrict this request to run on a particular engine */
4258 		rq->execution_mask = engine->mask;
4259 		i915_request_add(rq);
4260 	}
4261 
4262 	if (i915_request_wait(last, 0, HZ / 5) < 0) {
4263 		err = -ETIME;
4264 		goto out_end;
4265 	}
4266 
4267 	cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
4268 	if (IS_ERR(cs)) {
4269 		err = PTR_ERR(cs);
4270 		goto out_end;
4271 	}
4272 
4273 	for (n = 0; n < NUM_GPR_DW; n++) {
4274 		if (cs[n] != n) {
4275 			pr_err("Incorrect value[%d] found for GPR[%d]\n",
4276 			       cs[n], n);
4277 			err = -EINVAL;
4278 			break;
4279 		}
4280 	}
4281 
4282 	i915_gem_object_unpin_map(scratch->obj);
4283 
4284 out_end:
4285 	if (igt_live_test_end(&t))
4286 		err = -EIO;
4287 	i915_request_put(last);
4288 out_unpin:
4289 	intel_context_unpin(ve);
4290 out_put:
4291 	intel_context_put(ve);
4292 out_scratch:
4293 	i915_vma_unpin_and_release(&scratch, 0);
4294 	return err;
4295 }
4296 
live_virtual_preserved(void * arg)4297 static int live_virtual_preserved(void *arg)
4298 {
4299 	struct intel_gt *gt = arg;
4300 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4301 	unsigned int class;
4302 
4303 	/*
4304 	 * Check that the context image retains non-privileged (user) registers
4305 	 * from one engine to the next. For this we check that the CS_GPR
4306 	 * are preserved.
4307 	 */
4308 
4309 	if (intel_uc_uses_guc_submission(&gt->uc))
4310 		return 0;
4311 
4312 	/* As we use CS_GPR we cannot run before they existed on all engines. */
4313 	if (GRAPHICS_VER(gt->i915) < 9)
4314 		return 0;
4315 
4316 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4317 		int nsibling, err;
4318 
4319 		nsibling = select_siblings(gt, class, siblings);
4320 		if (nsibling < 2)
4321 			continue;
4322 
4323 		err = preserved_virtual_engine(gt, siblings, nsibling);
4324 		if (err)
4325 			return err;
4326 	}
4327 
4328 	return 0;
4329 }
4330 
reset_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)4331 static int reset_virtual_engine(struct intel_gt *gt,
4332 				struct intel_engine_cs **siblings,
4333 				unsigned int nsibling)
4334 {
4335 	struct intel_engine_cs *engine;
4336 	struct intel_context *ve;
4337 	struct igt_spinner spin;
4338 	struct i915_request *rq;
4339 	unsigned int n;
4340 	int err = 0;
4341 
4342 	/*
4343 	 * In order to support offline error capture for fast preempt reset,
4344 	 * we need to decouple the guilty request and ensure that it and its
4345 	 * descendents are not executed while the capture is in progress.
4346 	 */
4347 
4348 	if (igt_spinner_init(&spin, gt))
4349 		return -ENOMEM;
4350 
4351 	ve = intel_engine_create_virtual(siblings, nsibling, 0);
4352 	if (IS_ERR(ve)) {
4353 		err = PTR_ERR(ve);
4354 		goto out_spin;
4355 	}
4356 
4357 	for (n = 0; n < nsibling; n++)
4358 		st_engine_heartbeat_disable(siblings[n]);
4359 
4360 	rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4361 	if (IS_ERR(rq)) {
4362 		err = PTR_ERR(rq);
4363 		goto out_heartbeat;
4364 	}
4365 	i915_request_add(rq);
4366 
4367 	if (!igt_wait_for_spinner(&spin, rq)) {
4368 		intel_gt_set_wedged(gt);
4369 		err = -ETIME;
4370 		goto out_heartbeat;
4371 	}
4372 
4373 	engine = rq->engine;
4374 	GEM_BUG_ON(engine == ve->engine);
4375 
4376 	/* Take ownership of the reset and tasklet */
4377 	err = engine_lock_reset_tasklet(engine);
4378 	if (err)
4379 		goto out_heartbeat;
4380 
4381 	engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet);
4382 	GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4383 
4384 	/* Fake a preemption event; failed of course */
4385 	spin_lock_irq(&engine->sched_engine->lock);
4386 	__unwind_incomplete_requests(engine);
4387 	spin_unlock_irq(&engine->sched_engine->lock);
4388 	GEM_BUG_ON(rq->engine != engine);
4389 
4390 	/* Reset the engine while keeping our active request on hold */
4391 	execlists_hold(engine, rq);
4392 	GEM_BUG_ON(!i915_request_on_hold(rq));
4393 
4394 	__intel_engine_reset_bh(engine, NULL);
4395 	GEM_BUG_ON(rq->fence.error != -EIO);
4396 
4397 	/* Release our grasp on the engine, letting CS flow again */
4398 	engine_unlock_reset_tasklet(engine);
4399 
4400 	/* Check that we do not resubmit the held request */
4401 	i915_request_get(rq);
4402 	if (!i915_request_wait(rq, 0, HZ / 5)) {
4403 		pr_err("%s: on hold request completed!\n",
4404 		       engine->name);
4405 		intel_gt_set_wedged(gt);
4406 		err = -EIO;
4407 		goto out_rq;
4408 	}
4409 	GEM_BUG_ON(!i915_request_on_hold(rq));
4410 
4411 	/* But is resubmitted on release */
4412 	execlists_unhold(engine, rq);
4413 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4414 		pr_err("%s: held request did not complete!\n",
4415 		       engine->name);
4416 		intel_gt_set_wedged(gt);
4417 		err = -ETIME;
4418 	}
4419 
4420 out_rq:
4421 	i915_request_put(rq);
4422 out_heartbeat:
4423 	for (n = 0; n < nsibling; n++)
4424 		st_engine_heartbeat_enable(siblings[n]);
4425 
4426 	intel_context_put(ve);
4427 out_spin:
4428 	igt_spinner_fini(&spin);
4429 	return err;
4430 }
4431 
live_virtual_reset(void * arg)4432 static int live_virtual_reset(void *arg)
4433 {
4434 	struct intel_gt *gt = arg;
4435 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4436 	unsigned int class;
4437 
4438 	/*
4439 	 * Check that we handle a reset event within a virtual engine.
4440 	 * Only the physical engine is reset, but we have to check the flow
4441 	 * of the virtual requests around the reset, and make sure it is not
4442 	 * forgotten.
4443 	 */
4444 
4445 	if (intel_uc_uses_guc_submission(&gt->uc))
4446 		return 0;
4447 
4448 	if (!intel_has_reset_engine(gt))
4449 		return 0;
4450 
4451 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4452 		int nsibling, err;
4453 
4454 		nsibling = select_siblings(gt, class, siblings);
4455 		if (nsibling < 2)
4456 			continue;
4457 
4458 		err = reset_virtual_engine(gt, siblings, nsibling);
4459 		if (err)
4460 			return err;
4461 	}
4462 
4463 	return 0;
4464 }
4465 
intel_execlists_live_selftests(struct drm_i915_private * i915)4466 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4467 {
4468 	static const struct i915_subtest tests[] = {
4469 		SUBTEST(live_sanitycheck),
4470 		SUBTEST(live_unlite_switch),
4471 		SUBTEST(live_unlite_preempt),
4472 		SUBTEST(live_unlite_ring),
4473 		SUBTEST(live_pin_rewind),
4474 		SUBTEST(live_hold_reset),
4475 		SUBTEST(live_error_interrupt),
4476 		SUBTEST(live_timeslice_preempt),
4477 		SUBTEST(live_timeslice_rewind),
4478 		SUBTEST(live_timeslice_queue),
4479 		SUBTEST(live_timeslice_nopreempt),
4480 		SUBTEST(live_busywait_preempt),
4481 		SUBTEST(live_preempt),
4482 		SUBTEST(live_late_preempt),
4483 		SUBTEST(live_nopreempt),
4484 		SUBTEST(live_preempt_cancel),
4485 		SUBTEST(live_suppress_self_preempt),
4486 		SUBTEST(live_chain_preempt),
4487 		SUBTEST(live_preempt_ring),
4488 		SUBTEST(live_preempt_gang),
4489 		SUBTEST(live_preempt_timeout),
4490 		SUBTEST(live_preempt_user),
4491 		SUBTEST(live_preempt_smoke),
4492 		SUBTEST(live_virtual_engine),
4493 		SUBTEST(live_virtual_mask),
4494 		SUBTEST(live_virtual_preserved),
4495 		SUBTEST(live_virtual_slice),
4496 		SUBTEST(live_virtual_reset),
4497 	};
4498 
4499 	if (to_gt(i915)->submission_method != INTEL_SUBMISSION_ELSP)
4500 		return 0;
4501 
4502 	if (intel_gt_is_wedged(to_gt(i915)))
4503 		return 0;
4504 
4505 	return intel_gt_live_subtests(tests, to_gt(i915));
4506 }
4507