xref: /linux/drivers/gpu/drm/i915/gt/selftest_execlists.c (revision 815e260a18a3af4dab59025ee99a7156c0e8b5e0)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2018 Intel Corporation
4  */
5 
6 #include <linux/prime_numbers.h>
7 
8 #include <drm/drm_print.h>
9 
10 #include "gem/i915_gem_internal.h"
11 #include "gem/i915_gem_pm.h"
12 #include "gt/intel_engine_heartbeat.h"
13 #include "gt/intel_reset.h"
14 #include "gt/selftest_engine_heartbeat.h"
15 
16 #include "i915_jiffies.h"
17 #include "i915_selftest.h"
18 #include "selftests/i915_random.h"
19 #include "selftests/igt_flush_test.h"
20 #include "selftests/igt_live_test.h"
21 #include "selftests/igt_spinner.h"
22 #include "selftests/lib_sw_fence.h"
23 
24 #include "gem/selftests/igt_gem_utils.h"
25 #include "gem/selftests/mock_context.h"
26 
27 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
28 #define NUM_GPR 16
29 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
30 
31 static bool is_active(struct i915_request *rq)
32 {
33 	if (i915_request_is_active(rq))
34 		return true;
35 
36 	if (i915_request_on_hold(rq))
37 		return true;
38 
39 	if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
40 		return true;
41 
42 	return false;
43 }
44 
45 static int wait_for_submit(struct intel_engine_cs *engine,
46 			   struct i915_request *rq,
47 			   unsigned long timeout)
48 {
49 	/* Ignore our own attempts to suppress excess tasklets */
50 	tasklet_hi_schedule(&engine->sched_engine->tasklet);
51 
52 	timeout += jiffies;
53 	do {
54 		bool done = time_after(jiffies, timeout);
55 
56 		if (i915_request_completed(rq)) /* that was quick! */
57 			return 0;
58 
59 		/* Wait until the HW has acknowledged the submission (or err) */
60 		intel_engine_flush_submission(engine);
61 		if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
62 			return 0;
63 
64 		if (done)
65 			return -ETIME;
66 
67 		cond_resched();
68 	} while (1);
69 }
70 
71 static int wait_for_reset(struct intel_engine_cs *engine,
72 			  struct i915_request *rq,
73 			  unsigned long timeout)
74 {
75 	timeout += jiffies;
76 
77 	do {
78 		cond_resched();
79 		intel_engine_flush_submission(engine);
80 
81 		if (READ_ONCE(engine->execlists.pending[0]))
82 			continue;
83 
84 		if (i915_request_completed(rq))
85 			break;
86 
87 		if (READ_ONCE(rq->fence.error))
88 			break;
89 	} while (time_before(jiffies, timeout));
90 
91 	if (rq->fence.error != -EIO) {
92 		pr_err("%s: hanging request %llx:%lld not reset\n",
93 		       engine->name,
94 		       rq->fence.context,
95 		       rq->fence.seqno);
96 		return -EINVAL;
97 	}
98 
99 	/* Give the request a jiffy to complete after flushing the worker */
100 	if (i915_request_wait(rq, 0,
101 			      max(0l, (long)(timeout - jiffies)) + 1) < 0) {
102 		pr_err("%s: hanging request %llx:%lld did not complete\n",
103 		       engine->name,
104 		       rq->fence.context,
105 		       rq->fence.seqno);
106 		return -ETIME;
107 	}
108 
109 	return 0;
110 }
111 
112 static int live_sanitycheck(void *arg)
113 {
114 	struct intel_gt *gt = arg;
115 	struct intel_engine_cs *engine;
116 	enum intel_engine_id id;
117 	struct igt_spinner spin;
118 	int err = 0;
119 
120 	if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
121 		return 0;
122 
123 	if (igt_spinner_init(&spin, gt))
124 		return -ENOMEM;
125 
126 	for_each_engine(engine, gt, id) {
127 		struct intel_context *ce;
128 		struct i915_request *rq;
129 
130 		ce = intel_context_create(engine);
131 		if (IS_ERR(ce)) {
132 			err = PTR_ERR(ce);
133 			break;
134 		}
135 
136 		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
137 		if (IS_ERR(rq)) {
138 			err = PTR_ERR(rq);
139 			goto out_ctx;
140 		}
141 
142 		i915_request_add(rq);
143 		if (!igt_wait_for_spinner(&spin, rq)) {
144 			GEM_TRACE("spinner failed to start\n");
145 			GEM_TRACE_DUMP();
146 			intel_gt_set_wedged(gt);
147 			err = -EIO;
148 			goto out_ctx;
149 		}
150 
151 		igt_spinner_end(&spin);
152 		if (igt_flush_test(gt->i915)) {
153 			err = -EIO;
154 			goto out_ctx;
155 		}
156 
157 out_ctx:
158 		intel_context_put(ce);
159 		if (err)
160 			break;
161 	}
162 
163 	igt_spinner_fini(&spin);
164 	return err;
165 }
166 
167 static int live_unlite_restore(struct intel_gt *gt, int prio)
168 {
169 	struct intel_engine_cs *engine;
170 	enum intel_engine_id id;
171 	struct igt_spinner spin;
172 	int err = -ENOMEM;
173 
174 	/*
175 	 * Check that we can correctly context switch between 2 instances
176 	 * on the same engine from the same parent context.
177 	 */
178 
179 	if (igt_spinner_init(&spin, gt))
180 		return err;
181 
182 	err = 0;
183 	for_each_engine(engine, gt, id) {
184 		struct intel_context *ce[2] = {};
185 		struct i915_request *rq[2];
186 		struct igt_live_test t;
187 		int n;
188 
189 		if (prio && !intel_engine_has_preemption(engine))
190 			continue;
191 
192 		if (!intel_engine_can_store_dword(engine))
193 			continue;
194 
195 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
196 			err = -EIO;
197 			break;
198 		}
199 		st_engine_heartbeat_disable(engine);
200 
201 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
202 			struct intel_context *tmp;
203 
204 			tmp = intel_context_create(engine);
205 			if (IS_ERR(tmp)) {
206 				err = PTR_ERR(tmp);
207 				goto err_ce;
208 			}
209 
210 			err = intel_context_pin(tmp);
211 			if (err) {
212 				intel_context_put(tmp);
213 				goto err_ce;
214 			}
215 
216 			/*
217 			 * Setup the pair of contexts such that if we
218 			 * lite-restore using the RING_TAIL from ce[1] it
219 			 * will execute garbage from ce[0]->ring.
220 			 */
221 			memset(tmp->ring->vaddr,
222 			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
223 			       tmp->ring->vma->size);
224 
225 			ce[n] = tmp;
226 		}
227 		GEM_BUG_ON(!ce[1]->ring->size);
228 		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
229 		lrc_update_regs(ce[1], engine, ce[1]->ring->head);
230 
231 		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
232 		if (IS_ERR(rq[0])) {
233 			err = PTR_ERR(rq[0]);
234 			goto err_ce;
235 		}
236 
237 		i915_request_get(rq[0]);
238 		i915_request_add(rq[0]);
239 		GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
240 
241 		if (!igt_wait_for_spinner(&spin, rq[0])) {
242 			i915_request_put(rq[0]);
243 			goto err_ce;
244 		}
245 
246 		rq[1] = i915_request_create(ce[1]);
247 		if (IS_ERR(rq[1])) {
248 			err = PTR_ERR(rq[1]);
249 			i915_request_put(rq[0]);
250 			goto err_ce;
251 		}
252 
253 		if (!prio) {
254 			/*
255 			 * Ensure we do the switch to ce[1] on completion.
256 			 *
257 			 * rq[0] is already submitted, so this should reduce
258 			 * to a no-op (a wait on a request on the same engine
259 			 * uses the submit fence, not the completion fence),
260 			 * but it will install a dependency on rq[1] for rq[0]
261 			 * that will prevent the pair being reordered by
262 			 * timeslicing.
263 			 */
264 			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
265 		}
266 
267 		i915_request_get(rq[1]);
268 		i915_request_add(rq[1]);
269 		GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
270 		i915_request_put(rq[0]);
271 
272 		if (prio) {
273 			struct i915_sched_attr attr = {
274 				.priority = prio,
275 			};
276 
277 			/* Alternatively preempt the spinner with ce[1] */
278 			engine->sched_engine->schedule(rq[1], &attr);
279 		}
280 
281 		/* And switch back to ce[0] for good measure */
282 		rq[0] = i915_request_create(ce[0]);
283 		if (IS_ERR(rq[0])) {
284 			err = PTR_ERR(rq[0]);
285 			i915_request_put(rq[1]);
286 			goto err_ce;
287 		}
288 
289 		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
290 		i915_request_get(rq[0]);
291 		i915_request_add(rq[0]);
292 		GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
293 		i915_request_put(rq[1]);
294 		i915_request_put(rq[0]);
295 
296 err_ce:
297 		intel_engine_flush_submission(engine);
298 		igt_spinner_end(&spin);
299 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
300 			if (IS_ERR_OR_NULL(ce[n]))
301 				break;
302 
303 			intel_context_unpin(ce[n]);
304 			intel_context_put(ce[n]);
305 		}
306 
307 		st_engine_heartbeat_enable(engine);
308 		if (igt_live_test_end(&t))
309 			err = -EIO;
310 		if (err)
311 			break;
312 	}
313 
314 	igt_spinner_fini(&spin);
315 	return err;
316 }
317 
318 static int live_unlite_switch(void *arg)
319 {
320 	return live_unlite_restore(arg, 0);
321 }
322 
323 static int live_unlite_preempt(void *arg)
324 {
325 	return live_unlite_restore(arg, I915_PRIORITY_MAX);
326 }
327 
328 static int live_unlite_ring(void *arg)
329 {
330 	struct intel_gt *gt = arg;
331 	struct intel_engine_cs *engine;
332 	struct igt_spinner spin;
333 	enum intel_engine_id id;
334 	int err = 0;
335 
336 	/*
337 	 * Setup a preemption event that will cause almost the entire ring
338 	 * to be unwound, potentially fooling our intel_ring_direction()
339 	 * into emitting a forward lite-restore instead of the rollback.
340 	 */
341 
342 	if (igt_spinner_init(&spin, gt))
343 		return -ENOMEM;
344 
345 	for_each_engine(engine, gt, id) {
346 		struct intel_context *ce[2] = {};
347 		struct i915_request *rq;
348 		struct igt_live_test t;
349 		int n;
350 
351 		if (!intel_engine_has_preemption(engine))
352 			continue;
353 
354 		if (!intel_engine_can_store_dword(engine))
355 			continue;
356 
357 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
358 			err = -EIO;
359 			break;
360 		}
361 		st_engine_heartbeat_disable(engine);
362 
363 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
364 			struct intel_context *tmp;
365 
366 			tmp = intel_context_create(engine);
367 			if (IS_ERR(tmp)) {
368 				err = PTR_ERR(tmp);
369 				goto err_ce;
370 			}
371 
372 			err = intel_context_pin(tmp);
373 			if (err) {
374 				intel_context_put(tmp);
375 				goto err_ce;
376 			}
377 
378 			memset32(tmp->ring->vaddr,
379 				 0xdeadbeef, /* trigger a hang if executed */
380 				 tmp->ring->vma->size / sizeof(u32));
381 
382 			ce[n] = tmp;
383 		}
384 
385 		/* Create max prio spinner, followed by N low prio nops */
386 		rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
387 		if (IS_ERR(rq)) {
388 			err = PTR_ERR(rq);
389 			goto err_ce;
390 		}
391 
392 		i915_request_get(rq);
393 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
394 		i915_request_add(rq);
395 
396 		if (!igt_wait_for_spinner(&spin, rq)) {
397 			intel_gt_set_wedged(gt);
398 			i915_request_put(rq);
399 			err = -ETIME;
400 			goto err_ce;
401 		}
402 
403 		/* Fill the ring, until we will cause a wrap */
404 		n = 0;
405 		while (intel_ring_direction(ce[0]->ring,
406 					    rq->wa_tail,
407 					    ce[0]->ring->tail) <= 0) {
408 			struct i915_request *tmp;
409 
410 			tmp = intel_context_create_request(ce[0]);
411 			if (IS_ERR(tmp)) {
412 				err = PTR_ERR(tmp);
413 				i915_request_put(rq);
414 				goto err_ce;
415 			}
416 
417 			i915_request_add(tmp);
418 			intel_engine_flush_submission(engine);
419 			n++;
420 		}
421 		intel_engine_flush_submission(engine);
422 		pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
423 			 engine->name, n,
424 			 ce[0]->ring->size,
425 			 ce[0]->ring->tail,
426 			 ce[0]->ring->emit,
427 			 rq->tail);
428 		GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
429 						rq->tail,
430 						ce[0]->ring->tail) <= 0);
431 		i915_request_put(rq);
432 
433 		/* Create a second ring to preempt the first ring after rq[0] */
434 		rq = intel_context_create_request(ce[1]);
435 		if (IS_ERR(rq)) {
436 			err = PTR_ERR(rq);
437 			goto err_ce;
438 		}
439 
440 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
441 		i915_request_get(rq);
442 		i915_request_add(rq);
443 
444 		err = wait_for_submit(engine, rq, HZ / 2);
445 		i915_request_put(rq);
446 		if (err) {
447 			pr_err("%s: preemption request was not submitted\n",
448 			       engine->name);
449 			err = -ETIME;
450 		}
451 
452 		pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
453 			 engine->name,
454 			 ce[0]->ring->tail, ce[0]->ring->emit,
455 			 ce[1]->ring->tail, ce[1]->ring->emit);
456 
457 err_ce:
458 		intel_engine_flush_submission(engine);
459 		igt_spinner_end(&spin);
460 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
461 			if (IS_ERR_OR_NULL(ce[n]))
462 				break;
463 
464 			intel_context_unpin(ce[n]);
465 			intel_context_put(ce[n]);
466 		}
467 		st_engine_heartbeat_enable(engine);
468 		if (igt_live_test_end(&t))
469 			err = -EIO;
470 		if (err)
471 			break;
472 	}
473 
474 	igt_spinner_fini(&spin);
475 	return err;
476 }
477 
478 static int live_pin_rewind(void *arg)
479 {
480 	struct intel_gt *gt = arg;
481 	struct intel_engine_cs *engine;
482 	enum intel_engine_id id;
483 	int err = 0;
484 
485 	/*
486 	 * We have to be careful not to trust intel_ring too much, for example
487 	 * ring->head is updated upon retire which is out of sync with pinning
488 	 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
489 	 * or else we risk writing an older, stale value.
490 	 *
491 	 * To simulate this, let's apply a bit of deliberate sabotague.
492 	 */
493 
494 	for_each_engine(engine, gt, id) {
495 		struct intel_context *ce;
496 		struct i915_request *rq;
497 		struct intel_ring *ring;
498 		struct igt_live_test t;
499 
500 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
501 			err = -EIO;
502 			break;
503 		}
504 
505 		ce = intel_context_create(engine);
506 		if (IS_ERR(ce)) {
507 			err = PTR_ERR(ce);
508 			break;
509 		}
510 
511 		err = intel_context_pin(ce);
512 		if (err) {
513 			intel_context_put(ce);
514 			break;
515 		}
516 
517 		/* Keep the context awake while we play games */
518 		err = i915_active_acquire(&ce->active);
519 		if (err) {
520 			intel_context_unpin(ce);
521 			intel_context_put(ce);
522 			break;
523 		}
524 		ring = ce->ring;
525 
526 		/* Poison the ring, and offset the next request from HEAD */
527 		memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
528 		ring->emit = ring->size / 2;
529 		ring->tail = ring->emit;
530 		GEM_BUG_ON(ring->head);
531 
532 		intel_context_unpin(ce);
533 
534 		/* Submit a simple nop request */
535 		GEM_BUG_ON(intel_context_is_pinned(ce));
536 		rq = intel_context_create_request(ce);
537 		i915_active_release(&ce->active); /* e.g. async retire */
538 		intel_context_put(ce);
539 		if (IS_ERR(rq)) {
540 			err = PTR_ERR(rq);
541 			break;
542 		}
543 		GEM_BUG_ON(!rq->head);
544 		i915_request_add(rq);
545 
546 		/* Expect not to hang! */
547 		if (igt_live_test_end(&t)) {
548 			err = -EIO;
549 			break;
550 		}
551 	}
552 
553 	return err;
554 }
555 
556 static int engine_lock_reset_tasklet(struct intel_engine_cs *engine)
557 {
558 	tasklet_disable(&engine->sched_engine->tasklet);
559 	local_bh_disable();
560 
561 	if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
562 			     &engine->gt->reset.flags)) {
563 		local_bh_enable();
564 		tasklet_enable(&engine->sched_engine->tasklet);
565 
566 		intel_gt_set_wedged(engine->gt);
567 		return -EBUSY;
568 	}
569 
570 	return 0;
571 }
572 
573 static void engine_unlock_reset_tasklet(struct intel_engine_cs *engine)
574 {
575 	clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
576 			      &engine->gt->reset.flags);
577 
578 	local_bh_enable();
579 	tasklet_enable(&engine->sched_engine->tasklet);
580 }
581 
582 static int live_hold_reset(void *arg)
583 {
584 	struct intel_gt *gt = arg;
585 	struct intel_engine_cs *engine;
586 	enum intel_engine_id id;
587 	struct igt_spinner spin;
588 	int err = 0;
589 
590 	/*
591 	 * In order to support offline error capture for fast preempt reset,
592 	 * we need to decouple the guilty request and ensure that it and its
593 	 * descendents are not executed while the capture is in progress.
594 	 */
595 
596 	if (!intel_has_reset_engine(gt))
597 		return 0;
598 
599 	if (igt_spinner_init(&spin, gt))
600 		return -ENOMEM;
601 
602 	for_each_engine(engine, gt, id) {
603 		struct intel_context *ce;
604 		struct i915_request *rq;
605 
606 		ce = intel_context_create(engine);
607 		if (IS_ERR(ce)) {
608 			err = PTR_ERR(ce);
609 			break;
610 		}
611 
612 		st_engine_heartbeat_disable(engine);
613 
614 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
615 		if (IS_ERR(rq)) {
616 			err = PTR_ERR(rq);
617 			goto out;
618 		}
619 		i915_request_add(rq);
620 
621 		if (!igt_wait_for_spinner(&spin, rq)) {
622 			intel_gt_set_wedged(gt);
623 			err = -ETIME;
624 			goto out;
625 		}
626 
627 		/* We have our request executing, now remove it and reset */
628 
629 		err = engine_lock_reset_tasklet(engine);
630 		if (err)
631 			goto out;
632 
633 		engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet);
634 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
635 
636 		i915_request_get(rq);
637 		execlists_hold(engine, rq);
638 		GEM_BUG_ON(!i915_request_on_hold(rq));
639 
640 		__intel_engine_reset_bh(engine, NULL);
641 		GEM_BUG_ON(rq->fence.error != -EIO);
642 
643 		engine_unlock_reset_tasklet(engine);
644 
645 		/* Check that we do not resubmit the held request */
646 		if (!i915_request_wait(rq, 0, HZ / 5)) {
647 			pr_err("%s: on hold request completed!\n",
648 			       engine->name);
649 			i915_request_put(rq);
650 			err = -EIO;
651 			goto out;
652 		}
653 		GEM_BUG_ON(!i915_request_on_hold(rq));
654 
655 		/* But is resubmitted on release */
656 		execlists_unhold(engine, rq);
657 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
658 			pr_err("%s: held request did not complete!\n",
659 			       engine->name);
660 			intel_gt_set_wedged(gt);
661 			err = -ETIME;
662 		}
663 		i915_request_put(rq);
664 
665 out:
666 		st_engine_heartbeat_enable(engine);
667 		intel_context_put(ce);
668 		if (err)
669 			break;
670 	}
671 
672 	igt_spinner_fini(&spin);
673 	return err;
674 }
675 
676 static const char *error_repr(int err)
677 {
678 	return err ? "bad" : "good";
679 }
680 
681 static int live_error_interrupt(void *arg)
682 {
683 	static const struct error_phase {
684 		enum { GOOD = 0, BAD = -EIO } error[2];
685 	} phases[] = {
686 		{ { BAD,  GOOD } },
687 		{ { BAD,  BAD  } },
688 		{ { BAD,  GOOD } },
689 		{ { GOOD, GOOD } }, /* sentinel */
690 	};
691 	struct intel_gt *gt = arg;
692 	struct intel_engine_cs *engine;
693 	enum intel_engine_id id;
694 
695 	/*
696 	 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
697 	 * of invalid commands in user batches that will cause a GPU hang.
698 	 * This is a faster mechanism than using hangcheck/heartbeats, but
699 	 * only detects problems the HW knows about -- it will not warn when
700 	 * we kill the HW!
701 	 *
702 	 * To verify our detection and reset, we throw some invalid commands
703 	 * at the HW and wait for the interrupt.
704 	 */
705 
706 	if (!intel_has_reset_engine(gt))
707 		return 0;
708 
709 	for_each_engine(engine, gt, id) {
710 		const struct error_phase *p;
711 		int err = 0;
712 
713 		st_engine_heartbeat_disable(engine);
714 
715 		for (p = phases; p->error[0] != GOOD; p++) {
716 			struct i915_request *client[ARRAY_SIZE(phases->error)];
717 			u32 *cs;
718 			int i;
719 
720 			memset(client, 0, sizeof(*client));
721 			for (i = 0; i < ARRAY_SIZE(client); i++) {
722 				struct intel_context *ce;
723 				struct i915_request *rq;
724 
725 				ce = intel_context_create(engine);
726 				if (IS_ERR(ce)) {
727 					err = PTR_ERR(ce);
728 					goto out;
729 				}
730 
731 				rq = intel_context_create_request(ce);
732 				intel_context_put(ce);
733 				if (IS_ERR(rq)) {
734 					err = PTR_ERR(rq);
735 					goto out;
736 				}
737 
738 				if (rq->engine->emit_init_breadcrumb) {
739 					err = rq->engine->emit_init_breadcrumb(rq);
740 					if (err) {
741 						i915_request_add(rq);
742 						goto out;
743 					}
744 				}
745 
746 				cs = intel_ring_begin(rq, 2);
747 				if (IS_ERR(cs)) {
748 					i915_request_add(rq);
749 					err = PTR_ERR(cs);
750 					goto out;
751 				}
752 
753 				if (p->error[i]) {
754 					*cs++ = 0xdeadbeef;
755 					*cs++ = 0xdeadbeef;
756 				} else {
757 					*cs++ = MI_NOOP;
758 					*cs++ = MI_NOOP;
759 				}
760 
761 				client[i] = i915_request_get(rq);
762 				i915_request_add(rq);
763 			}
764 
765 			err = wait_for_submit(engine, client[0], HZ / 2);
766 			if (err) {
767 				pr_err("%s: first request did not start within time!\n",
768 				       engine->name);
769 				err = -ETIME;
770 				goto out;
771 			}
772 
773 			for (i = 0; i < ARRAY_SIZE(client); i++) {
774 				if (i915_request_wait(client[i], 0, HZ / 5) < 0)
775 					pr_debug("%s: %s request incomplete!\n",
776 						 engine->name,
777 						 error_repr(p->error[i]));
778 
779 				if (!i915_request_started(client[i])) {
780 					pr_err("%s: %s request not started!\n",
781 					       engine->name,
782 					       error_repr(p->error[i]));
783 					err = -ETIME;
784 					goto out;
785 				}
786 
787 				/* Kick the tasklet to process the error */
788 				intel_engine_flush_submission(engine);
789 				if (client[i]->fence.error != p->error[i]) {
790 					pr_err("%s: %s request (%s) with wrong error code: %d\n",
791 					       engine->name,
792 					       error_repr(p->error[i]),
793 					       i915_request_completed(client[i]) ? "completed" : "running",
794 					       client[i]->fence.error);
795 					err = -EINVAL;
796 					goto out;
797 				}
798 			}
799 
800 out:
801 			for (i = 0; i < ARRAY_SIZE(client); i++)
802 				if (client[i])
803 					i915_request_put(client[i]);
804 			if (err) {
805 				pr_err("%s: failed at phase[%zd] { %d, %d }\n",
806 				       engine->name, p - phases,
807 				       p->error[0], p->error[1]);
808 				break;
809 			}
810 		}
811 
812 		st_engine_heartbeat_enable(engine);
813 		if (err) {
814 			intel_gt_set_wedged(gt);
815 			return err;
816 		}
817 	}
818 
819 	return 0;
820 }
821 
822 static int
823 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
824 {
825 	u32 *cs;
826 
827 	cs = intel_ring_begin(rq, 10);
828 	if (IS_ERR(cs))
829 		return PTR_ERR(cs);
830 
831 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
832 
833 	*cs++ = MI_SEMAPHORE_WAIT |
834 		MI_SEMAPHORE_GLOBAL_GTT |
835 		MI_SEMAPHORE_POLL |
836 		MI_SEMAPHORE_SAD_NEQ_SDD;
837 	*cs++ = 0;
838 	*cs++ = i915_ggtt_offset(vma) + 4 * idx;
839 	*cs++ = 0;
840 
841 	if (idx > 0) {
842 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
843 		*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
844 		*cs++ = 0;
845 		*cs++ = 1;
846 	} else {
847 		*cs++ = MI_NOOP;
848 		*cs++ = MI_NOOP;
849 		*cs++ = MI_NOOP;
850 		*cs++ = MI_NOOP;
851 	}
852 
853 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
854 
855 	intel_ring_advance(rq, cs);
856 	return 0;
857 }
858 
859 static struct i915_request *
860 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
861 {
862 	struct intel_context *ce;
863 	struct i915_request *rq;
864 	int err;
865 
866 	ce = intel_context_create(engine);
867 	if (IS_ERR(ce))
868 		return ERR_CAST(ce);
869 
870 	rq = intel_context_create_request(ce);
871 	if (IS_ERR(rq))
872 		goto out_ce;
873 
874 	err = 0;
875 	if (rq->engine->emit_init_breadcrumb)
876 		err = rq->engine->emit_init_breadcrumb(rq);
877 	if (err == 0)
878 		err = emit_semaphore_chain(rq, vma, idx);
879 	if (err == 0)
880 		i915_request_get(rq);
881 	i915_request_add(rq);
882 	if (err)
883 		rq = ERR_PTR(err);
884 
885 out_ce:
886 	intel_context_put(ce);
887 	return rq;
888 }
889 
890 static int
891 release_queue(struct intel_engine_cs *engine,
892 	      struct i915_vma *vma,
893 	      int idx, int prio)
894 {
895 	struct i915_sched_attr attr = {
896 		.priority = prio,
897 	};
898 	struct i915_request *rq;
899 	u32 *cs;
900 
901 	rq = intel_engine_create_kernel_request(engine);
902 	if (IS_ERR(rq))
903 		return PTR_ERR(rq);
904 
905 	cs = intel_ring_begin(rq, 4);
906 	if (IS_ERR(cs)) {
907 		i915_request_add(rq);
908 		return PTR_ERR(cs);
909 	}
910 
911 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
912 	*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
913 	*cs++ = 0;
914 	*cs++ = 1;
915 
916 	intel_ring_advance(rq, cs);
917 
918 	i915_request_get(rq);
919 	i915_request_add(rq);
920 
921 	local_bh_disable();
922 	engine->sched_engine->schedule(rq, &attr);
923 	local_bh_enable(); /* kick tasklet */
924 
925 	i915_request_put(rq);
926 
927 	return 0;
928 }
929 
930 static int
931 slice_semaphore_queue(struct intel_engine_cs *outer,
932 		      struct i915_vma *vma,
933 		      int count)
934 {
935 	struct intel_engine_cs *engine;
936 	struct i915_request *head;
937 	enum intel_engine_id id;
938 	int err, i, n = 0;
939 
940 	head = semaphore_queue(outer, vma, n++);
941 	if (IS_ERR(head))
942 		return PTR_ERR(head);
943 
944 	for_each_engine(engine, outer->gt, id) {
945 		if (!intel_engine_has_preemption(engine))
946 			continue;
947 
948 		for (i = 0; i < count; i++) {
949 			struct i915_request *rq;
950 
951 			rq = semaphore_queue(engine, vma, n++);
952 			if (IS_ERR(rq)) {
953 				err = PTR_ERR(rq);
954 				goto out;
955 			}
956 
957 			i915_request_put(rq);
958 		}
959 	}
960 
961 	err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
962 	if (err)
963 		goto out;
964 
965 	if (i915_request_wait(head, 0,
966 			      2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
967 		pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
968 		       outer->name, count, n);
969 		GEM_TRACE_DUMP();
970 		intel_gt_set_wedged(outer->gt);
971 		err = -EIO;
972 	}
973 
974 out:
975 	i915_request_put(head);
976 	return err;
977 }
978 
979 static int live_timeslice_preempt(void *arg)
980 {
981 	struct intel_gt *gt = arg;
982 	struct drm_i915_gem_object *obj;
983 	struct intel_engine_cs *engine;
984 	enum intel_engine_id id;
985 	struct i915_vma *vma;
986 	void *vaddr;
987 	int err = 0;
988 
989 	/*
990 	 * If a request takes too long, we would like to give other users
991 	 * a fair go on the GPU. In particular, users may create batches
992 	 * that wait upon external input, where that input may even be
993 	 * supplied by another GPU job. To avoid blocking forever, we
994 	 * need to preempt the current task and replace it with another
995 	 * ready task.
996 	 */
997 	if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
998 		return 0;
999 
1000 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1001 	if (IS_ERR(obj))
1002 		return PTR_ERR(obj);
1003 
1004 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1005 	if (IS_ERR(vma)) {
1006 		err = PTR_ERR(vma);
1007 		goto err_obj;
1008 	}
1009 
1010 	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1011 	if (IS_ERR(vaddr)) {
1012 		err = PTR_ERR(vaddr);
1013 		goto err_obj;
1014 	}
1015 
1016 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1017 	if (err)
1018 		goto err_map;
1019 
1020 	err = i915_vma_sync(vma);
1021 	if (err)
1022 		goto err_pin;
1023 
1024 	for_each_engine(engine, gt, id) {
1025 		if (!intel_engine_has_preemption(engine))
1026 			continue;
1027 
1028 		memset(vaddr, 0, PAGE_SIZE);
1029 
1030 		st_engine_heartbeat_disable(engine);
1031 		err = slice_semaphore_queue(engine, vma, 5);
1032 		st_engine_heartbeat_enable(engine);
1033 		if (err)
1034 			goto err_pin;
1035 
1036 		if (igt_flush_test(gt->i915)) {
1037 			err = -EIO;
1038 			goto err_pin;
1039 		}
1040 	}
1041 
1042 err_pin:
1043 	i915_vma_unpin(vma);
1044 err_map:
1045 	i915_gem_object_unpin_map(obj);
1046 err_obj:
1047 	i915_gem_object_put(obj);
1048 	return err;
1049 }
1050 
1051 static struct i915_request *
1052 create_rewinder(struct intel_context *ce,
1053 		struct i915_request *wait,
1054 		void *slot, int idx)
1055 {
1056 	const u32 offset =
1057 		i915_ggtt_offset(ce->engine->status_page.vma) +
1058 		offset_in_page(slot);
1059 	struct i915_request *rq;
1060 	u32 *cs;
1061 	int err;
1062 
1063 	rq = intel_context_create_request(ce);
1064 	if (IS_ERR(rq))
1065 		return rq;
1066 
1067 	if (wait) {
1068 		err = i915_request_await_dma_fence(rq, &wait->fence);
1069 		if (err)
1070 			goto err;
1071 	}
1072 
1073 	cs = intel_ring_begin(rq, 14);
1074 	if (IS_ERR(cs)) {
1075 		err = PTR_ERR(cs);
1076 		goto err;
1077 	}
1078 
1079 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1080 	*cs++ = MI_NOOP;
1081 
1082 	*cs++ = MI_SEMAPHORE_WAIT |
1083 		MI_SEMAPHORE_GLOBAL_GTT |
1084 		MI_SEMAPHORE_POLL |
1085 		MI_SEMAPHORE_SAD_GTE_SDD;
1086 	*cs++ = idx;
1087 	*cs++ = offset;
1088 	*cs++ = 0;
1089 
1090 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1091 	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1092 	*cs++ = offset + idx * sizeof(u32);
1093 	*cs++ = 0;
1094 
1095 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1096 	*cs++ = offset;
1097 	*cs++ = 0;
1098 	*cs++ = idx + 1;
1099 
1100 	intel_ring_advance(rq, cs);
1101 
1102 	err = 0;
1103 err:
1104 	i915_request_get(rq);
1105 	i915_request_add(rq);
1106 	if (err) {
1107 		i915_request_put(rq);
1108 		return ERR_PTR(err);
1109 	}
1110 
1111 	return rq;
1112 }
1113 
1114 static int live_timeslice_rewind(void *arg)
1115 {
1116 	struct intel_gt *gt = arg;
1117 	struct intel_engine_cs *engine;
1118 	enum intel_engine_id id;
1119 
1120 	/*
1121 	 * The usual presumption on timeslice expiration is that we replace
1122 	 * the active context with another. However, given a chain of
1123 	 * dependencies we may end up with replacing the context with itself,
1124 	 * but only a few of those requests, forcing us to rewind the
1125 	 * RING_TAIL of the original request.
1126 	 */
1127 	if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
1128 		return 0;
1129 
1130 	for_each_engine(engine, gt, id) {
1131 		enum { A1, A2, B1 };
1132 		enum { X = 1, Z, Y };
1133 		struct i915_request *rq[3] = {};
1134 		struct intel_context *ce;
1135 		unsigned long timeslice;
1136 		int i, err = 0;
1137 		u32 *slot;
1138 
1139 		if (!intel_engine_has_timeslices(engine))
1140 			continue;
1141 
1142 		/*
1143 		 * A:rq1 -- semaphore wait, timestamp X
1144 		 * A:rq2 -- write timestamp Y
1145 		 *
1146 		 * B:rq1 [await A:rq1] -- write timestamp Z
1147 		 *
1148 		 * Force timeslice, release semaphore.
1149 		 *
1150 		 * Expect execution/evaluation order XZY
1151 		 */
1152 
1153 		st_engine_heartbeat_disable(engine);
1154 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1155 
1156 		slot = memset32(engine->status_page.addr + 1000, 0, 4);
1157 
1158 		ce = intel_context_create(engine);
1159 		if (IS_ERR(ce)) {
1160 			err = PTR_ERR(ce);
1161 			goto err;
1162 		}
1163 
1164 		rq[A1] = create_rewinder(ce, NULL, slot, X);
1165 		if (IS_ERR(rq[A1])) {
1166 			intel_context_put(ce);
1167 			goto err;
1168 		}
1169 
1170 		rq[A2] = create_rewinder(ce, NULL, slot, Y);
1171 		intel_context_put(ce);
1172 		if (IS_ERR(rq[A2]))
1173 			goto err;
1174 
1175 		err = wait_for_submit(engine, rq[A2], HZ / 2);
1176 		if (err) {
1177 			pr_err("%s: failed to submit first context\n",
1178 			       engine->name);
1179 			goto err;
1180 		}
1181 
1182 		ce = intel_context_create(engine);
1183 		if (IS_ERR(ce)) {
1184 			err = PTR_ERR(ce);
1185 			goto err;
1186 		}
1187 
1188 		rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1189 		intel_context_put(ce);
1190 		if (IS_ERR(rq[2]))
1191 			goto err;
1192 
1193 		err = wait_for_submit(engine, rq[B1], HZ / 2);
1194 		if (err) {
1195 			pr_err("%s: failed to submit second context\n",
1196 			       engine->name);
1197 			goto err;
1198 		}
1199 
1200 		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1201 		ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1202 		while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
1203 			/* Wait for the timeslice to kick in */
1204 			timer_delete(&engine->execlists.timer);
1205 			tasklet_hi_schedule(&engine->sched_engine->tasklet);
1206 			intel_engine_flush_submission(engine);
1207 		}
1208 		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1209 		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1210 		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1211 		GEM_BUG_ON(i915_request_is_active(rq[A2]));
1212 
1213 		/* Release the hounds! */
1214 		slot[0] = 1;
1215 		wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1216 
1217 		for (i = 1; i <= 3; i++) {
1218 			unsigned long timeout = jiffies + HZ / 2;
1219 
1220 			while (!READ_ONCE(slot[i]) &&
1221 			       time_before(jiffies, timeout))
1222 				;
1223 
1224 			if (!time_before(jiffies, timeout)) {
1225 				pr_err("%s: rq[%d] timed out\n",
1226 				       engine->name, i - 1);
1227 				err = -ETIME;
1228 				goto err;
1229 			}
1230 
1231 			pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1232 		}
1233 
1234 		/* XZY: XZ < XY */
1235 		if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1236 			pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1237 			       engine->name,
1238 			       slot[Z] - slot[X],
1239 			       slot[Y] - slot[X]);
1240 			err = -EINVAL;
1241 		}
1242 
1243 err:
1244 		memset32(&slot[0], -1, 4);
1245 		wmb();
1246 
1247 		engine->props.timeslice_duration_ms = timeslice;
1248 		st_engine_heartbeat_enable(engine);
1249 		for (i = 0; i < 3; i++)
1250 			i915_request_put(rq[i]);
1251 		if (igt_flush_test(gt->i915))
1252 			err = -EIO;
1253 		if (err)
1254 			return err;
1255 	}
1256 
1257 	return 0;
1258 }
1259 
1260 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1261 {
1262 	struct i915_request *rq;
1263 
1264 	rq = intel_engine_create_kernel_request(engine);
1265 	if (IS_ERR(rq))
1266 		return rq;
1267 
1268 	i915_request_get(rq);
1269 	i915_request_add(rq);
1270 
1271 	return rq;
1272 }
1273 
1274 static long slice_timeout(struct intel_engine_cs *engine)
1275 {
1276 	long timeout;
1277 
1278 	/* Enough time for a timeslice to kick in, and kick out */
1279 	timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1280 
1281 	/* Enough time for the nop request to complete */
1282 	timeout += HZ / 5;
1283 
1284 	return timeout + 1;
1285 }
1286 
1287 static int live_timeslice_queue(void *arg)
1288 {
1289 	struct intel_gt *gt = arg;
1290 	struct drm_i915_gem_object *obj;
1291 	struct intel_engine_cs *engine;
1292 	enum intel_engine_id id;
1293 	struct i915_vma *vma;
1294 	void *vaddr;
1295 	int err = 0;
1296 
1297 	/*
1298 	 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1299 	 * timeslicing between them disabled, we *do* enable timeslicing
1300 	 * if the queue demands it. (Normally, we do not submit if
1301 	 * ELSP[1] is already occupied, so must rely on timeslicing to
1302 	 * eject ELSP[0] in favour of the queue.)
1303 	 */
1304 	if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
1305 		return 0;
1306 
1307 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1308 	if (IS_ERR(obj))
1309 		return PTR_ERR(obj);
1310 
1311 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1312 	if (IS_ERR(vma)) {
1313 		err = PTR_ERR(vma);
1314 		goto err_obj;
1315 	}
1316 
1317 	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1318 	if (IS_ERR(vaddr)) {
1319 		err = PTR_ERR(vaddr);
1320 		goto err_obj;
1321 	}
1322 
1323 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1324 	if (err)
1325 		goto err_map;
1326 
1327 	err = i915_vma_sync(vma);
1328 	if (err)
1329 		goto err_pin;
1330 
1331 	for_each_engine(engine, gt, id) {
1332 		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
1333 		struct i915_request *rq, *nop;
1334 
1335 		if (!intel_engine_has_preemption(engine))
1336 			continue;
1337 
1338 		st_engine_heartbeat_disable(engine);
1339 		memset(vaddr, 0, PAGE_SIZE);
1340 
1341 		/* ELSP[0]: semaphore wait */
1342 		rq = semaphore_queue(engine, vma, 0);
1343 		if (IS_ERR(rq)) {
1344 			err = PTR_ERR(rq);
1345 			goto err_heartbeat;
1346 		}
1347 		engine->sched_engine->schedule(rq, &attr);
1348 		err = wait_for_submit(engine, rq, HZ / 2);
1349 		if (err) {
1350 			pr_err("%s: Timed out trying to submit semaphores\n",
1351 			       engine->name);
1352 			goto err_rq;
1353 		}
1354 
1355 		/* ELSP[1]: nop request */
1356 		nop = nop_request(engine);
1357 		if (IS_ERR(nop)) {
1358 			err = PTR_ERR(nop);
1359 			goto err_rq;
1360 		}
1361 		err = wait_for_submit(engine, nop, HZ / 2);
1362 		i915_request_put(nop);
1363 		if (err) {
1364 			pr_err("%s: Timed out trying to submit nop\n",
1365 			       engine->name);
1366 			goto err_rq;
1367 		}
1368 
1369 		GEM_BUG_ON(i915_request_completed(rq));
1370 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1371 
1372 		/* Queue: semaphore signal, matching priority as semaphore */
1373 		err = release_queue(engine, vma, 1, effective_prio(rq));
1374 		if (err)
1375 			goto err_rq;
1376 
1377 		/* Wait until we ack the release_queue and start timeslicing */
1378 		do {
1379 			cond_resched();
1380 			intel_engine_flush_submission(engine);
1381 		} while (READ_ONCE(engine->execlists.pending[0]));
1382 
1383 		/* Timeslice every jiffy, so within 2 we should signal */
1384 		if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1385 			struct drm_printer p =
1386 				drm_info_printer(gt->i915->drm.dev);
1387 
1388 			pr_err("%s: Failed to timeslice into queue\n",
1389 			       engine->name);
1390 			intel_engine_dump(engine, &p,
1391 					  "%s\n", engine->name);
1392 
1393 			memset(vaddr, 0xff, PAGE_SIZE);
1394 			err = -EIO;
1395 		}
1396 err_rq:
1397 		i915_request_put(rq);
1398 err_heartbeat:
1399 		st_engine_heartbeat_enable(engine);
1400 		if (err)
1401 			break;
1402 	}
1403 
1404 err_pin:
1405 	i915_vma_unpin(vma);
1406 err_map:
1407 	i915_gem_object_unpin_map(obj);
1408 err_obj:
1409 	i915_gem_object_put(obj);
1410 	return err;
1411 }
1412 
1413 static int live_timeslice_nopreempt(void *arg)
1414 {
1415 	struct intel_gt *gt = arg;
1416 	struct intel_engine_cs *engine;
1417 	enum intel_engine_id id;
1418 	struct igt_spinner spin;
1419 	int err = 0;
1420 
1421 	/*
1422 	 * We should not timeslice into a request that is marked with
1423 	 * I915_REQUEST_NOPREEMPT.
1424 	 */
1425 	if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
1426 		return 0;
1427 
1428 	if (igt_spinner_init(&spin, gt))
1429 		return -ENOMEM;
1430 
1431 	for_each_engine(engine, gt, id) {
1432 		struct intel_context *ce;
1433 		struct i915_request *rq;
1434 		unsigned long timeslice;
1435 
1436 		if (!intel_engine_has_preemption(engine))
1437 			continue;
1438 
1439 		ce = intel_context_create(engine);
1440 		if (IS_ERR(ce)) {
1441 			err = PTR_ERR(ce);
1442 			break;
1443 		}
1444 
1445 		st_engine_heartbeat_disable(engine);
1446 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1447 
1448 		/* Create an unpreemptible spinner */
1449 
1450 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1451 		intel_context_put(ce);
1452 		if (IS_ERR(rq)) {
1453 			err = PTR_ERR(rq);
1454 			goto out_heartbeat;
1455 		}
1456 
1457 		i915_request_get(rq);
1458 		i915_request_add(rq);
1459 
1460 		if (!igt_wait_for_spinner(&spin, rq)) {
1461 			i915_request_put(rq);
1462 			err = -ETIME;
1463 			goto out_spin;
1464 		}
1465 
1466 		set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1467 		i915_request_put(rq);
1468 
1469 		/* Followed by a maximum priority barrier (heartbeat) */
1470 
1471 		ce = intel_context_create(engine);
1472 		if (IS_ERR(ce)) {
1473 			err = PTR_ERR(ce);
1474 			goto out_spin;
1475 		}
1476 
1477 		rq = intel_context_create_request(ce);
1478 		intel_context_put(ce);
1479 		if (IS_ERR(rq)) {
1480 			err = PTR_ERR(rq);
1481 			goto out_spin;
1482 		}
1483 
1484 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1485 		i915_request_get(rq);
1486 		i915_request_add(rq);
1487 
1488 		/*
1489 		 * Wait until the barrier is in ELSP, and we know timeslicing
1490 		 * will have been activated.
1491 		 */
1492 		if (wait_for_submit(engine, rq, HZ / 2)) {
1493 			i915_request_put(rq);
1494 			err = -ETIME;
1495 			goto out_spin;
1496 		}
1497 
1498 		/*
1499 		 * Since the ELSP[0] request is unpreemptible, it should not
1500 		 * allow the maximum priority barrier through. Wait long
1501 		 * enough to see if it is timesliced in by mistake.
1502 		 */
1503 		if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1504 			pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1505 			       engine->name);
1506 			err = -EINVAL;
1507 		}
1508 		i915_request_put(rq);
1509 
1510 out_spin:
1511 		igt_spinner_end(&spin);
1512 out_heartbeat:
1513 		xchg(&engine->props.timeslice_duration_ms, timeslice);
1514 		st_engine_heartbeat_enable(engine);
1515 		if (err)
1516 			break;
1517 
1518 		if (igt_flush_test(gt->i915)) {
1519 			err = -EIO;
1520 			break;
1521 		}
1522 	}
1523 
1524 	igt_spinner_fini(&spin);
1525 	return err;
1526 }
1527 
1528 static int live_busywait_preempt(void *arg)
1529 {
1530 	struct intel_gt *gt = arg;
1531 	struct i915_gem_context *ctx_hi, *ctx_lo;
1532 	struct intel_engine_cs *engine;
1533 	struct drm_i915_gem_object *obj;
1534 	struct i915_vma *vma;
1535 	enum intel_engine_id id;
1536 	u32 *map;
1537 	int err;
1538 
1539 	/*
1540 	 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1541 	 * preempt the busywaits used to synchronise between rings.
1542 	 */
1543 
1544 	ctx_hi = kernel_context(gt->i915, NULL);
1545 	if (IS_ERR(ctx_hi))
1546 		return PTR_ERR(ctx_hi);
1547 
1548 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1549 
1550 	ctx_lo = kernel_context(gt->i915, NULL);
1551 	if (IS_ERR(ctx_lo)) {
1552 		err = PTR_ERR(ctx_lo);
1553 		goto err_ctx_hi;
1554 	}
1555 
1556 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1557 
1558 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1559 	if (IS_ERR(obj)) {
1560 		err = PTR_ERR(obj);
1561 		goto err_ctx_lo;
1562 	}
1563 
1564 	map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1565 	if (IS_ERR(map)) {
1566 		err = PTR_ERR(map);
1567 		goto err_obj;
1568 	}
1569 
1570 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1571 	if (IS_ERR(vma)) {
1572 		err = PTR_ERR(vma);
1573 		goto err_map;
1574 	}
1575 
1576 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1577 	if (err)
1578 		goto err_map;
1579 
1580 	err = i915_vma_sync(vma);
1581 	if (err)
1582 		goto err_vma;
1583 
1584 	for_each_engine(engine, gt, id) {
1585 		struct i915_request *lo, *hi;
1586 		struct igt_live_test t;
1587 		u32 *cs;
1588 
1589 		if (!intel_engine_has_preemption(engine))
1590 			continue;
1591 
1592 		if (!intel_engine_can_store_dword(engine))
1593 			continue;
1594 
1595 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1596 			err = -EIO;
1597 			goto err_vma;
1598 		}
1599 
1600 		/*
1601 		 * We create two requests. The low priority request
1602 		 * busywaits on a semaphore (inside the ringbuffer where
1603 		 * is should be preemptible) and the high priority requests
1604 		 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1605 		 * allowing the first request to complete. If preemption
1606 		 * fails, we hang instead.
1607 		 */
1608 
1609 		lo = igt_request_alloc(ctx_lo, engine);
1610 		if (IS_ERR(lo)) {
1611 			err = PTR_ERR(lo);
1612 			goto err_vma;
1613 		}
1614 
1615 		cs = intel_ring_begin(lo, 8);
1616 		if (IS_ERR(cs)) {
1617 			err = PTR_ERR(cs);
1618 			i915_request_add(lo);
1619 			goto err_vma;
1620 		}
1621 
1622 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1623 		*cs++ = i915_ggtt_offset(vma);
1624 		*cs++ = 0;
1625 		*cs++ = 1;
1626 
1627 		/* XXX Do we need a flush + invalidate here? */
1628 
1629 		*cs++ = MI_SEMAPHORE_WAIT |
1630 			MI_SEMAPHORE_GLOBAL_GTT |
1631 			MI_SEMAPHORE_POLL |
1632 			MI_SEMAPHORE_SAD_EQ_SDD;
1633 		*cs++ = 0;
1634 		*cs++ = i915_ggtt_offset(vma);
1635 		*cs++ = 0;
1636 
1637 		intel_ring_advance(lo, cs);
1638 
1639 		i915_request_get(lo);
1640 		i915_request_add(lo);
1641 
1642 		if (wait_for(READ_ONCE(*map), 10)) {
1643 			i915_request_put(lo);
1644 			err = -ETIMEDOUT;
1645 			goto err_vma;
1646 		}
1647 
1648 		/* Low priority request should be busywaiting now */
1649 		if (i915_request_wait(lo, 0, 1) != -ETIME) {
1650 			i915_request_put(lo);
1651 			pr_err("%s: Busywaiting request did not!\n",
1652 			       engine->name);
1653 			err = -EIO;
1654 			goto err_vma;
1655 		}
1656 
1657 		hi = igt_request_alloc(ctx_hi, engine);
1658 		if (IS_ERR(hi)) {
1659 			err = PTR_ERR(hi);
1660 			i915_request_put(lo);
1661 			goto err_vma;
1662 		}
1663 
1664 		cs = intel_ring_begin(hi, 4);
1665 		if (IS_ERR(cs)) {
1666 			err = PTR_ERR(cs);
1667 			i915_request_add(hi);
1668 			i915_request_put(lo);
1669 			goto err_vma;
1670 		}
1671 
1672 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1673 		*cs++ = i915_ggtt_offset(vma);
1674 		*cs++ = 0;
1675 		*cs++ = 0;
1676 
1677 		intel_ring_advance(hi, cs);
1678 		i915_request_add(hi);
1679 
1680 		if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1681 			struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1682 
1683 			pr_err("%s: Failed to preempt semaphore busywait!\n",
1684 			       engine->name);
1685 
1686 			intel_engine_dump(engine, &p, "%s\n", engine->name);
1687 			GEM_TRACE_DUMP();
1688 
1689 			i915_request_put(lo);
1690 			intel_gt_set_wedged(gt);
1691 			err = -EIO;
1692 			goto err_vma;
1693 		}
1694 		GEM_BUG_ON(READ_ONCE(*map));
1695 		i915_request_put(lo);
1696 
1697 		if (igt_live_test_end(&t)) {
1698 			err = -EIO;
1699 			goto err_vma;
1700 		}
1701 	}
1702 
1703 	err = 0;
1704 err_vma:
1705 	i915_vma_unpin(vma);
1706 err_map:
1707 	i915_gem_object_unpin_map(obj);
1708 err_obj:
1709 	i915_gem_object_put(obj);
1710 err_ctx_lo:
1711 	kernel_context_close(ctx_lo);
1712 err_ctx_hi:
1713 	kernel_context_close(ctx_hi);
1714 	return err;
1715 }
1716 
1717 static struct i915_request *
1718 spinner_create_request(struct igt_spinner *spin,
1719 		       struct i915_gem_context *ctx,
1720 		       struct intel_engine_cs *engine,
1721 		       u32 arb)
1722 {
1723 	struct intel_context *ce;
1724 	struct i915_request *rq;
1725 
1726 	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1727 	if (IS_ERR(ce))
1728 		return ERR_CAST(ce);
1729 
1730 	rq = igt_spinner_create_request(spin, ce, arb);
1731 	intel_context_put(ce);
1732 	return rq;
1733 }
1734 
1735 static int live_preempt(void *arg)
1736 {
1737 	struct intel_gt *gt = arg;
1738 	struct i915_gem_context *ctx_hi, *ctx_lo;
1739 	struct igt_spinner spin_hi, spin_lo;
1740 	struct intel_engine_cs *engine;
1741 	enum intel_engine_id id;
1742 	int err = -ENOMEM;
1743 
1744 	ctx_hi = kernel_context(gt->i915, NULL);
1745 	if (!ctx_hi)
1746 		return -ENOMEM;
1747 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1748 
1749 	ctx_lo = kernel_context(gt->i915, NULL);
1750 	if (!ctx_lo)
1751 		goto err_ctx_hi;
1752 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1753 
1754 	if (igt_spinner_init(&spin_hi, gt))
1755 		goto err_ctx_lo;
1756 
1757 	if (igt_spinner_init(&spin_lo, gt))
1758 		goto err_spin_hi;
1759 
1760 	for_each_engine(engine, gt, id) {
1761 		struct igt_live_test t;
1762 		struct i915_request *rq;
1763 
1764 		if (!intel_engine_has_preemption(engine))
1765 			continue;
1766 
1767 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1768 			err = -EIO;
1769 			goto err_spin_lo;
1770 		}
1771 
1772 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1773 					    MI_ARB_CHECK);
1774 		if (IS_ERR(rq)) {
1775 			err = PTR_ERR(rq);
1776 			goto err_spin_lo;
1777 		}
1778 
1779 		i915_request_add(rq);
1780 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1781 			GEM_TRACE("lo spinner failed to start\n");
1782 			GEM_TRACE_DUMP();
1783 			intel_gt_set_wedged(gt);
1784 			err = -EIO;
1785 			goto err_spin_lo;
1786 		}
1787 
1788 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1789 					    MI_ARB_CHECK);
1790 		if (IS_ERR(rq)) {
1791 			igt_spinner_end(&spin_lo);
1792 			err = PTR_ERR(rq);
1793 			goto err_spin_lo;
1794 		}
1795 
1796 		i915_request_add(rq);
1797 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1798 			GEM_TRACE("hi spinner failed to start\n");
1799 			GEM_TRACE_DUMP();
1800 			intel_gt_set_wedged(gt);
1801 			err = -EIO;
1802 			goto err_spin_lo;
1803 		}
1804 
1805 		igt_spinner_end(&spin_hi);
1806 		igt_spinner_end(&spin_lo);
1807 
1808 		if (igt_live_test_end(&t)) {
1809 			err = -EIO;
1810 			goto err_spin_lo;
1811 		}
1812 	}
1813 
1814 	err = 0;
1815 err_spin_lo:
1816 	igt_spinner_fini(&spin_lo);
1817 err_spin_hi:
1818 	igt_spinner_fini(&spin_hi);
1819 err_ctx_lo:
1820 	kernel_context_close(ctx_lo);
1821 err_ctx_hi:
1822 	kernel_context_close(ctx_hi);
1823 	return err;
1824 }
1825 
1826 static int live_late_preempt(void *arg)
1827 {
1828 	struct intel_gt *gt = arg;
1829 	struct i915_gem_context *ctx_hi, *ctx_lo;
1830 	struct igt_spinner spin_hi, spin_lo;
1831 	struct intel_engine_cs *engine;
1832 	struct i915_sched_attr attr = {};
1833 	enum intel_engine_id id;
1834 	int err = -ENOMEM;
1835 
1836 	ctx_hi = kernel_context(gt->i915, NULL);
1837 	if (!ctx_hi)
1838 		return -ENOMEM;
1839 
1840 	ctx_lo = kernel_context(gt->i915, NULL);
1841 	if (!ctx_lo)
1842 		goto err_ctx_hi;
1843 
1844 	if (igt_spinner_init(&spin_hi, gt))
1845 		goto err_ctx_lo;
1846 
1847 	if (igt_spinner_init(&spin_lo, gt))
1848 		goto err_spin_hi;
1849 
1850 	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1851 	ctx_lo->sched.priority = 1;
1852 
1853 	for_each_engine(engine, gt, id) {
1854 		struct igt_live_test t;
1855 		struct i915_request *rq;
1856 
1857 		if (!intel_engine_has_preemption(engine))
1858 			continue;
1859 
1860 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1861 			err = -EIO;
1862 			goto err_spin_lo;
1863 		}
1864 
1865 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1866 					    MI_ARB_CHECK);
1867 		if (IS_ERR(rq)) {
1868 			err = PTR_ERR(rq);
1869 			goto err_spin_lo;
1870 		}
1871 
1872 		i915_request_add(rq);
1873 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1874 			pr_err("First context failed to start\n");
1875 			goto err_wedged;
1876 		}
1877 
1878 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1879 					    MI_NOOP);
1880 		if (IS_ERR(rq)) {
1881 			igt_spinner_end(&spin_lo);
1882 			err = PTR_ERR(rq);
1883 			goto err_spin_lo;
1884 		}
1885 
1886 		i915_request_add(rq);
1887 		if (igt_wait_for_spinner(&spin_hi, rq)) {
1888 			pr_err("Second context overtook first?\n");
1889 			goto err_wedged;
1890 		}
1891 
1892 		attr.priority = I915_PRIORITY_MAX;
1893 		engine->sched_engine->schedule(rq, &attr);
1894 
1895 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1896 			pr_err("High priority context failed to preempt the low priority context\n");
1897 			GEM_TRACE_DUMP();
1898 			goto err_wedged;
1899 		}
1900 
1901 		igt_spinner_end(&spin_hi);
1902 		igt_spinner_end(&spin_lo);
1903 
1904 		if (igt_live_test_end(&t)) {
1905 			err = -EIO;
1906 			goto err_spin_lo;
1907 		}
1908 	}
1909 
1910 	err = 0;
1911 err_spin_lo:
1912 	igt_spinner_fini(&spin_lo);
1913 err_spin_hi:
1914 	igt_spinner_fini(&spin_hi);
1915 err_ctx_lo:
1916 	kernel_context_close(ctx_lo);
1917 err_ctx_hi:
1918 	kernel_context_close(ctx_hi);
1919 	return err;
1920 
1921 err_wedged:
1922 	igt_spinner_end(&spin_hi);
1923 	igt_spinner_end(&spin_lo);
1924 	intel_gt_set_wedged(gt);
1925 	err = -EIO;
1926 	goto err_spin_lo;
1927 }
1928 
1929 struct preempt_client {
1930 	struct igt_spinner spin;
1931 	struct i915_gem_context *ctx;
1932 };
1933 
1934 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1935 {
1936 	c->ctx = kernel_context(gt->i915, NULL);
1937 	if (!c->ctx)
1938 		return -ENOMEM;
1939 
1940 	if (igt_spinner_init(&c->spin, gt))
1941 		goto err_ctx;
1942 
1943 	return 0;
1944 
1945 err_ctx:
1946 	kernel_context_close(c->ctx);
1947 	return -ENOMEM;
1948 }
1949 
1950 static void preempt_client_fini(struct preempt_client *c)
1951 {
1952 	igt_spinner_fini(&c->spin);
1953 	kernel_context_close(c->ctx);
1954 }
1955 
1956 static int live_nopreempt(void *arg)
1957 {
1958 	struct intel_gt *gt = arg;
1959 	struct intel_engine_cs *engine;
1960 	struct preempt_client a, b;
1961 	enum intel_engine_id id;
1962 	int err = -ENOMEM;
1963 
1964 	/*
1965 	 * Verify that we can disable preemption for an individual request
1966 	 * that may be being observed and not want to be interrupted.
1967 	 */
1968 
1969 	if (preempt_client_init(gt, &a))
1970 		return -ENOMEM;
1971 	if (preempt_client_init(gt, &b))
1972 		goto err_client_a;
1973 	b.ctx->sched.priority = I915_PRIORITY_MAX;
1974 
1975 	for_each_engine(engine, gt, id) {
1976 		struct i915_request *rq_a, *rq_b;
1977 
1978 		if (!intel_engine_has_preemption(engine))
1979 			continue;
1980 
1981 		engine->execlists.preempt_hang.count = 0;
1982 
1983 		rq_a = spinner_create_request(&a.spin,
1984 					      a.ctx, engine,
1985 					      MI_ARB_CHECK);
1986 		if (IS_ERR(rq_a)) {
1987 			err = PTR_ERR(rq_a);
1988 			goto err_client_b;
1989 		}
1990 
1991 		/* Low priority client, but unpreemptable! */
1992 		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1993 
1994 		i915_request_add(rq_a);
1995 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1996 			pr_err("First client failed to start\n");
1997 			goto err_wedged;
1998 		}
1999 
2000 		rq_b = spinner_create_request(&b.spin,
2001 					      b.ctx, engine,
2002 					      MI_ARB_CHECK);
2003 		if (IS_ERR(rq_b)) {
2004 			err = PTR_ERR(rq_b);
2005 			goto err_client_b;
2006 		}
2007 
2008 		i915_request_add(rq_b);
2009 
2010 		/* B is much more important than A! (But A is unpreemptable.) */
2011 		GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
2012 
2013 		/* Wait long enough for preemption and timeslicing */
2014 		if (igt_wait_for_spinner(&b.spin, rq_b)) {
2015 			pr_err("Second client started too early!\n");
2016 			goto err_wedged;
2017 		}
2018 
2019 		igt_spinner_end(&a.spin);
2020 
2021 		if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2022 			pr_err("Second client failed to start\n");
2023 			goto err_wedged;
2024 		}
2025 
2026 		igt_spinner_end(&b.spin);
2027 
2028 		if (engine->execlists.preempt_hang.count) {
2029 			pr_err("Preemption recorded x%d; should have been suppressed!\n",
2030 			       engine->execlists.preempt_hang.count);
2031 			err = -EINVAL;
2032 			goto err_wedged;
2033 		}
2034 
2035 		if (igt_flush_test(gt->i915))
2036 			goto err_wedged;
2037 	}
2038 
2039 	err = 0;
2040 err_client_b:
2041 	preempt_client_fini(&b);
2042 err_client_a:
2043 	preempt_client_fini(&a);
2044 	return err;
2045 
2046 err_wedged:
2047 	igt_spinner_end(&b.spin);
2048 	igt_spinner_end(&a.spin);
2049 	intel_gt_set_wedged(gt);
2050 	err = -EIO;
2051 	goto err_client_b;
2052 }
2053 
2054 struct live_preempt_cancel {
2055 	struct intel_engine_cs *engine;
2056 	struct preempt_client a, b;
2057 };
2058 
2059 static int __cancel_active0(struct live_preempt_cancel *arg)
2060 {
2061 	struct i915_request *rq;
2062 	struct igt_live_test t;
2063 	int err;
2064 
2065 	/* Preempt cancel of ELSP0 */
2066 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2067 	if (igt_live_test_begin(&t, arg->engine->i915,
2068 				__func__, arg->engine->name))
2069 		return -EIO;
2070 
2071 	rq = spinner_create_request(&arg->a.spin,
2072 				    arg->a.ctx, arg->engine,
2073 				    MI_ARB_CHECK);
2074 	if (IS_ERR(rq))
2075 		return PTR_ERR(rq);
2076 
2077 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2078 	i915_request_get(rq);
2079 	i915_request_add(rq);
2080 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2081 		err = -EIO;
2082 		goto out;
2083 	}
2084 
2085 	intel_context_ban(rq->context, rq);
2086 	err = intel_engine_pulse(arg->engine);
2087 	if (err)
2088 		goto out;
2089 
2090 	err = wait_for_reset(arg->engine, rq, HZ / 2);
2091 	if (err) {
2092 		pr_err("Cancelled inflight0 request did not reset\n");
2093 		goto out;
2094 	}
2095 
2096 out:
2097 	i915_request_put(rq);
2098 	if (igt_live_test_end(&t))
2099 		err = -EIO;
2100 	return err;
2101 }
2102 
2103 static int __cancel_active1(struct live_preempt_cancel *arg)
2104 {
2105 	struct i915_request *rq[2] = {};
2106 	struct igt_live_test t;
2107 	int err;
2108 
2109 	/* Preempt cancel of ELSP1 */
2110 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2111 	if (igt_live_test_begin(&t, arg->engine->i915,
2112 				__func__, arg->engine->name))
2113 		return -EIO;
2114 
2115 	rq[0] = spinner_create_request(&arg->a.spin,
2116 				       arg->a.ctx, arg->engine,
2117 				       MI_NOOP); /* no preemption */
2118 	if (IS_ERR(rq[0]))
2119 		return PTR_ERR(rq[0]);
2120 
2121 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2122 	i915_request_get(rq[0]);
2123 	i915_request_add(rq[0]);
2124 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2125 		err = -EIO;
2126 		goto out;
2127 	}
2128 
2129 	rq[1] = spinner_create_request(&arg->b.spin,
2130 				       arg->b.ctx, arg->engine,
2131 				       MI_ARB_CHECK);
2132 	if (IS_ERR(rq[1])) {
2133 		err = PTR_ERR(rq[1]);
2134 		goto out;
2135 	}
2136 
2137 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2138 	i915_request_get(rq[1]);
2139 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2140 	i915_request_add(rq[1]);
2141 	if (err)
2142 		goto out;
2143 
2144 	intel_context_ban(rq[1]->context, rq[1]);
2145 	err = intel_engine_pulse(arg->engine);
2146 	if (err)
2147 		goto out;
2148 
2149 	igt_spinner_end(&arg->a.spin);
2150 	err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2151 	if (err)
2152 		goto out;
2153 
2154 	if (rq[0]->fence.error != 0) {
2155 		pr_err("Normal inflight0 request did not complete\n");
2156 		err = -EINVAL;
2157 		goto out;
2158 	}
2159 
2160 	if (rq[1]->fence.error != -EIO) {
2161 		pr_err("Cancelled inflight1 request did not report -EIO\n");
2162 		err = -EINVAL;
2163 		goto out;
2164 	}
2165 
2166 out:
2167 	i915_request_put(rq[1]);
2168 	i915_request_put(rq[0]);
2169 	if (igt_live_test_end(&t))
2170 		err = -EIO;
2171 	return err;
2172 }
2173 
2174 static int __cancel_queued(struct live_preempt_cancel *arg)
2175 {
2176 	struct i915_request *rq[3] = {};
2177 	struct igt_live_test t;
2178 	int err;
2179 
2180 	/* Full ELSP and one in the wings */
2181 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2182 	if (igt_live_test_begin(&t, arg->engine->i915,
2183 				__func__, arg->engine->name))
2184 		return -EIO;
2185 
2186 	rq[0] = spinner_create_request(&arg->a.spin,
2187 				       arg->a.ctx, arg->engine,
2188 				       MI_ARB_CHECK);
2189 	if (IS_ERR(rq[0]))
2190 		return PTR_ERR(rq[0]);
2191 
2192 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2193 	i915_request_get(rq[0]);
2194 	i915_request_add(rq[0]);
2195 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2196 		err = -EIO;
2197 		goto out;
2198 	}
2199 
2200 	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2201 	if (IS_ERR(rq[1])) {
2202 		err = PTR_ERR(rq[1]);
2203 		goto out;
2204 	}
2205 
2206 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2207 	i915_request_get(rq[1]);
2208 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2209 	i915_request_add(rq[1]);
2210 	if (err)
2211 		goto out;
2212 
2213 	rq[2] = spinner_create_request(&arg->b.spin,
2214 				       arg->a.ctx, arg->engine,
2215 				       MI_ARB_CHECK);
2216 	if (IS_ERR(rq[2])) {
2217 		err = PTR_ERR(rq[2]);
2218 		goto out;
2219 	}
2220 
2221 	i915_request_get(rq[2]);
2222 	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2223 	i915_request_add(rq[2]);
2224 	if (err)
2225 		goto out;
2226 
2227 	intel_context_ban(rq[2]->context, rq[2]);
2228 	err = intel_engine_pulse(arg->engine);
2229 	if (err)
2230 		goto out;
2231 
2232 	err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2233 	if (err)
2234 		goto out;
2235 
2236 	if (rq[0]->fence.error != -EIO) {
2237 		pr_err("Cancelled inflight0 request did not report -EIO\n");
2238 		err = -EINVAL;
2239 		goto out;
2240 	}
2241 
2242 	/*
2243 	 * The behavior between having semaphores and not is different. With
2244 	 * semaphores the subsequent request is on the hardware and not cancelled
2245 	 * while without the request is held in the driver and cancelled.
2246 	 */
2247 	if (intel_engine_has_semaphores(rq[1]->engine) &&
2248 	    rq[1]->fence.error != 0) {
2249 		pr_err("Normal inflight1 request did not complete\n");
2250 		err = -EINVAL;
2251 		goto out;
2252 	}
2253 
2254 	if (rq[2]->fence.error != -EIO) {
2255 		pr_err("Cancelled queued request did not report -EIO\n");
2256 		err = -EINVAL;
2257 		goto out;
2258 	}
2259 
2260 out:
2261 	i915_request_put(rq[2]);
2262 	i915_request_put(rq[1]);
2263 	i915_request_put(rq[0]);
2264 	if (igt_live_test_end(&t))
2265 		err = -EIO;
2266 	return err;
2267 }
2268 
2269 static int __cancel_hostile(struct live_preempt_cancel *arg)
2270 {
2271 	struct i915_request *rq;
2272 	int err;
2273 
2274 	/* Preempt cancel non-preemptible spinner in ELSP0 */
2275 	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
2276 		return 0;
2277 
2278 	if (!intel_has_reset_engine(arg->engine->gt))
2279 		return 0;
2280 
2281 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2282 	rq = spinner_create_request(&arg->a.spin,
2283 				    arg->a.ctx, arg->engine,
2284 				    MI_NOOP); /* preemption disabled */
2285 	if (IS_ERR(rq))
2286 		return PTR_ERR(rq);
2287 
2288 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2289 	i915_request_get(rq);
2290 	i915_request_add(rq);
2291 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2292 		err = -EIO;
2293 		goto out;
2294 	}
2295 
2296 	intel_context_ban(rq->context, rq);
2297 	err = intel_engine_pulse(arg->engine); /* force reset */
2298 	if (err)
2299 		goto out;
2300 
2301 	err = wait_for_reset(arg->engine, rq, HZ / 2);
2302 	if (err) {
2303 		pr_err("Cancelled inflight0 request did not reset\n");
2304 		goto out;
2305 	}
2306 
2307 out:
2308 	i915_request_put(rq);
2309 	if (igt_flush_test(arg->engine->i915))
2310 		err = -EIO;
2311 	return err;
2312 }
2313 
2314 static void force_reset_timeout(struct intel_engine_cs *engine)
2315 {
2316 	engine->reset_timeout.probability = 999;
2317 	atomic_set(&engine->reset_timeout.times, -1);
2318 }
2319 
2320 static void cancel_reset_timeout(struct intel_engine_cs *engine)
2321 {
2322 	memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
2323 }
2324 
2325 static int __cancel_fail(struct live_preempt_cancel *arg)
2326 {
2327 	struct intel_engine_cs *engine = arg->engine;
2328 	struct i915_request *rq;
2329 	int err;
2330 
2331 	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
2332 		return 0;
2333 
2334 	if (!intel_has_reset_engine(engine->gt))
2335 		return 0;
2336 
2337 	GEM_TRACE("%s(%s)\n", __func__, engine->name);
2338 	rq = spinner_create_request(&arg->a.spin,
2339 				    arg->a.ctx, engine,
2340 				    MI_NOOP); /* preemption disabled */
2341 	if (IS_ERR(rq))
2342 		return PTR_ERR(rq);
2343 
2344 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2345 	i915_request_get(rq);
2346 	i915_request_add(rq);
2347 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2348 		err = -EIO;
2349 		goto out;
2350 	}
2351 
2352 	intel_context_set_banned(rq->context);
2353 
2354 	err = intel_engine_pulse(engine);
2355 	if (err)
2356 		goto out;
2357 
2358 	force_reset_timeout(engine);
2359 
2360 	/* force preempt reset [failure] */
2361 	while (!engine->execlists.pending[0])
2362 		intel_engine_flush_submission(engine);
2363 	timer_delete_sync(&engine->execlists.preempt);
2364 	intel_engine_flush_submission(engine);
2365 
2366 	cancel_reset_timeout(engine);
2367 
2368 	/* after failure, require heartbeats to reset device */
2369 	intel_engine_set_heartbeat(engine, 1);
2370 	err = wait_for_reset(engine, rq, HZ / 2);
2371 	intel_engine_set_heartbeat(engine,
2372 				   engine->defaults.heartbeat_interval_ms);
2373 	if (err) {
2374 		pr_err("Cancelled inflight0 request did not reset\n");
2375 		goto out;
2376 	}
2377 
2378 out:
2379 	i915_request_put(rq);
2380 	if (igt_flush_test(engine->i915))
2381 		err = -EIO;
2382 	return err;
2383 }
2384 
2385 static int live_preempt_cancel(void *arg)
2386 {
2387 	struct intel_gt *gt = arg;
2388 	struct live_preempt_cancel data;
2389 	enum intel_engine_id id;
2390 	int err = -ENOMEM;
2391 
2392 	/*
2393 	 * To cancel an inflight context, we need to first remove it from the
2394 	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2395 	 */
2396 
2397 	if (preempt_client_init(gt, &data.a))
2398 		return -ENOMEM;
2399 	if (preempt_client_init(gt, &data.b))
2400 		goto err_client_a;
2401 
2402 	for_each_engine(data.engine, gt, id) {
2403 		if (!intel_engine_has_preemption(data.engine))
2404 			continue;
2405 
2406 		err = __cancel_active0(&data);
2407 		if (err)
2408 			goto err_wedged;
2409 
2410 		err = __cancel_active1(&data);
2411 		if (err)
2412 			goto err_wedged;
2413 
2414 		err = __cancel_queued(&data);
2415 		if (err)
2416 			goto err_wedged;
2417 
2418 		err = __cancel_hostile(&data);
2419 		if (err)
2420 			goto err_wedged;
2421 
2422 		err = __cancel_fail(&data);
2423 		if (err)
2424 			goto err_wedged;
2425 	}
2426 
2427 	err = 0;
2428 err_client_b:
2429 	preempt_client_fini(&data.b);
2430 err_client_a:
2431 	preempt_client_fini(&data.a);
2432 	return err;
2433 
2434 err_wedged:
2435 	GEM_TRACE_DUMP();
2436 	igt_spinner_end(&data.b.spin);
2437 	igt_spinner_end(&data.a.spin);
2438 	intel_gt_set_wedged(gt);
2439 	goto err_client_b;
2440 }
2441 
2442 static int live_suppress_self_preempt(void *arg)
2443 {
2444 	struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2445 	struct intel_gt *gt = arg;
2446 	struct intel_engine_cs *engine;
2447 	struct preempt_client a, b;
2448 	enum intel_engine_id id;
2449 	int err = -ENOMEM;
2450 
2451 	/*
2452 	 * Verify that if a preemption request does not cause a change in
2453 	 * the current execution order, the preempt-to-idle injection is
2454 	 * skipped and that we do not accidentally apply it after the CS
2455 	 * completion event.
2456 	 */
2457 
2458 	if (intel_uc_uses_guc_submission(&gt->uc))
2459 		return 0; /* presume black blox */
2460 
2461 	if (intel_vgpu_active(gt->i915))
2462 		return 0; /* GVT forces single port & request submission */
2463 
2464 	if (preempt_client_init(gt, &a))
2465 		return -ENOMEM;
2466 	if (preempt_client_init(gt, &b))
2467 		goto err_client_a;
2468 
2469 	for_each_engine(engine, gt, id) {
2470 		struct i915_request *rq_a, *rq_b;
2471 		int depth;
2472 
2473 		if (!intel_engine_has_preemption(engine))
2474 			continue;
2475 
2476 		if (igt_flush_test(gt->i915))
2477 			goto err_wedged;
2478 
2479 		st_engine_heartbeat_disable(engine);
2480 		engine->execlists.preempt_hang.count = 0;
2481 
2482 		rq_a = spinner_create_request(&a.spin,
2483 					      a.ctx, engine,
2484 					      MI_NOOP);
2485 		if (IS_ERR(rq_a)) {
2486 			err = PTR_ERR(rq_a);
2487 			st_engine_heartbeat_enable(engine);
2488 			goto err_client_b;
2489 		}
2490 
2491 		i915_request_add(rq_a);
2492 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2493 			pr_err("First client failed to start\n");
2494 			st_engine_heartbeat_enable(engine);
2495 			goto err_wedged;
2496 		}
2497 
2498 		/* Keep postponing the timer to avoid premature slicing */
2499 		mod_timer(&engine->execlists.timer, jiffies + HZ);
2500 		for (depth = 0; depth < 8; depth++) {
2501 			rq_b = spinner_create_request(&b.spin,
2502 						      b.ctx, engine,
2503 						      MI_NOOP);
2504 			if (IS_ERR(rq_b)) {
2505 				err = PTR_ERR(rq_b);
2506 				st_engine_heartbeat_enable(engine);
2507 				goto err_client_b;
2508 			}
2509 			i915_request_add(rq_b);
2510 
2511 			GEM_BUG_ON(i915_request_completed(rq_a));
2512 			engine->sched_engine->schedule(rq_a, &attr);
2513 			igt_spinner_end(&a.spin);
2514 
2515 			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2516 				pr_err("Second client failed to start\n");
2517 				st_engine_heartbeat_enable(engine);
2518 				goto err_wedged;
2519 			}
2520 
2521 			swap(a, b);
2522 			rq_a = rq_b;
2523 		}
2524 		igt_spinner_end(&a.spin);
2525 
2526 		if (engine->execlists.preempt_hang.count) {
2527 			pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2528 			       engine->name,
2529 			       engine->execlists.preempt_hang.count,
2530 			       depth);
2531 			st_engine_heartbeat_enable(engine);
2532 			err = -EINVAL;
2533 			goto err_client_b;
2534 		}
2535 
2536 		st_engine_heartbeat_enable(engine);
2537 		if (igt_flush_test(gt->i915))
2538 			goto err_wedged;
2539 	}
2540 
2541 	err = 0;
2542 err_client_b:
2543 	preempt_client_fini(&b);
2544 err_client_a:
2545 	preempt_client_fini(&a);
2546 	return err;
2547 
2548 err_wedged:
2549 	igt_spinner_end(&b.spin);
2550 	igt_spinner_end(&a.spin);
2551 	intel_gt_set_wedged(gt);
2552 	err = -EIO;
2553 	goto err_client_b;
2554 }
2555 
2556 static int live_chain_preempt(void *arg)
2557 {
2558 	struct intel_gt *gt = arg;
2559 	struct intel_engine_cs *engine;
2560 	struct preempt_client hi, lo;
2561 	enum intel_engine_id id;
2562 	int err = -ENOMEM;
2563 
2564 	/*
2565 	 * Build a chain AB...BA between two contexts (A, B) and request
2566 	 * preemption of the last request. It should then complete before
2567 	 * the previously submitted spinner in B.
2568 	 */
2569 
2570 	if (preempt_client_init(gt, &hi))
2571 		return -ENOMEM;
2572 
2573 	if (preempt_client_init(gt, &lo))
2574 		goto err_client_hi;
2575 
2576 	for_each_engine(engine, gt, id) {
2577 		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2578 		struct igt_live_test t;
2579 		struct i915_request *rq;
2580 		int ring_size, count, i;
2581 
2582 		if (!intel_engine_has_preemption(engine))
2583 			continue;
2584 
2585 		rq = spinner_create_request(&lo.spin,
2586 					    lo.ctx, engine,
2587 					    MI_ARB_CHECK);
2588 		if (IS_ERR(rq))
2589 			goto err_wedged;
2590 
2591 		i915_request_get(rq);
2592 		i915_request_add(rq);
2593 
2594 		ring_size = rq->wa_tail - rq->head;
2595 		if (ring_size < 0)
2596 			ring_size += rq->ring->size;
2597 		ring_size = rq->ring->size / ring_size;
2598 		pr_debug("%s(%s): Using maximum of %d requests\n",
2599 			 __func__, engine->name, ring_size);
2600 
2601 		igt_spinner_end(&lo.spin);
2602 		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2603 			pr_err("Timed out waiting to flush %s\n", engine->name);
2604 			i915_request_put(rq);
2605 			goto err_wedged;
2606 		}
2607 		i915_request_put(rq);
2608 
2609 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2610 			err = -EIO;
2611 			goto err_wedged;
2612 		}
2613 
2614 		for_each_prime_number_from(count, 1, ring_size) {
2615 			rq = spinner_create_request(&hi.spin,
2616 						    hi.ctx, engine,
2617 						    MI_ARB_CHECK);
2618 			if (IS_ERR(rq))
2619 				goto err_wedged;
2620 			i915_request_add(rq);
2621 			if (!igt_wait_for_spinner(&hi.spin, rq))
2622 				goto err_wedged;
2623 
2624 			rq = spinner_create_request(&lo.spin,
2625 						    lo.ctx, engine,
2626 						    MI_ARB_CHECK);
2627 			if (IS_ERR(rq))
2628 				goto err_wedged;
2629 			i915_request_add(rq);
2630 
2631 			for (i = 0; i < count; i++) {
2632 				rq = igt_request_alloc(lo.ctx, engine);
2633 				if (IS_ERR(rq))
2634 					goto err_wedged;
2635 				i915_request_add(rq);
2636 			}
2637 
2638 			rq = igt_request_alloc(hi.ctx, engine);
2639 			if (IS_ERR(rq))
2640 				goto err_wedged;
2641 
2642 			i915_request_get(rq);
2643 			i915_request_add(rq);
2644 			engine->sched_engine->schedule(rq, &attr);
2645 
2646 			igt_spinner_end(&hi.spin);
2647 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2648 				struct drm_printer p =
2649 					drm_info_printer(gt->i915->drm.dev);
2650 
2651 				pr_err("Failed to preempt over chain of %d\n",
2652 				       count);
2653 				intel_engine_dump(engine, &p,
2654 						  "%s\n", engine->name);
2655 				i915_request_put(rq);
2656 				goto err_wedged;
2657 			}
2658 			igt_spinner_end(&lo.spin);
2659 			i915_request_put(rq);
2660 
2661 			rq = igt_request_alloc(lo.ctx, engine);
2662 			if (IS_ERR(rq))
2663 				goto err_wedged;
2664 
2665 			i915_request_get(rq);
2666 			i915_request_add(rq);
2667 
2668 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2669 				struct drm_printer p =
2670 					drm_info_printer(gt->i915->drm.dev);
2671 
2672 				pr_err("Failed to flush low priority chain of %d requests\n",
2673 				       count);
2674 				intel_engine_dump(engine, &p,
2675 						  "%s\n", engine->name);
2676 
2677 				i915_request_put(rq);
2678 				goto err_wedged;
2679 			}
2680 			i915_request_put(rq);
2681 		}
2682 
2683 		if (igt_live_test_end(&t)) {
2684 			err = -EIO;
2685 			goto err_wedged;
2686 		}
2687 	}
2688 
2689 	err = 0;
2690 err_client_lo:
2691 	preempt_client_fini(&lo);
2692 err_client_hi:
2693 	preempt_client_fini(&hi);
2694 	return err;
2695 
2696 err_wedged:
2697 	igt_spinner_end(&hi.spin);
2698 	igt_spinner_end(&lo.spin);
2699 	intel_gt_set_wedged(gt);
2700 	err = -EIO;
2701 	goto err_client_lo;
2702 }
2703 
2704 static int create_gang(struct intel_engine_cs *engine,
2705 		       struct i915_request **prev)
2706 {
2707 	struct drm_i915_gem_object *obj;
2708 	struct intel_context *ce;
2709 	struct i915_request *rq;
2710 	struct i915_vma *vma;
2711 	u32 *cs;
2712 	int err;
2713 
2714 	ce = intel_context_create(engine);
2715 	if (IS_ERR(ce))
2716 		return PTR_ERR(ce);
2717 
2718 	obj = i915_gem_object_create_internal(engine->i915, 4096);
2719 	if (IS_ERR(obj)) {
2720 		err = PTR_ERR(obj);
2721 		goto err_ce;
2722 	}
2723 
2724 	vma = i915_vma_instance(obj, ce->vm, NULL);
2725 	if (IS_ERR(vma)) {
2726 		err = PTR_ERR(vma);
2727 		goto err_obj;
2728 	}
2729 
2730 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
2731 	if (err)
2732 		goto err_obj;
2733 
2734 	cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
2735 	if (IS_ERR(cs)) {
2736 		err = PTR_ERR(cs);
2737 		goto err_obj;
2738 	}
2739 
2740 	/* Semaphore target: spin until zero */
2741 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2742 
2743 	*cs++ = MI_SEMAPHORE_WAIT |
2744 		MI_SEMAPHORE_POLL |
2745 		MI_SEMAPHORE_SAD_EQ_SDD;
2746 	*cs++ = 0;
2747 	*cs++ = lower_32_bits(i915_vma_offset(vma));
2748 	*cs++ = upper_32_bits(i915_vma_offset(vma));
2749 
2750 	if (*prev) {
2751 		u64 offset = i915_vma_offset((*prev)->batch);
2752 
2753 		/* Terminate the spinner in the next lower priority batch. */
2754 		*cs++ = MI_STORE_DWORD_IMM_GEN4;
2755 		*cs++ = lower_32_bits(offset);
2756 		*cs++ = upper_32_bits(offset);
2757 		*cs++ = 0;
2758 	}
2759 
2760 	*cs++ = MI_BATCH_BUFFER_END;
2761 	i915_gem_object_flush_map(obj);
2762 	i915_gem_object_unpin_map(obj);
2763 
2764 	rq = intel_context_create_request(ce);
2765 	if (IS_ERR(rq)) {
2766 		err = PTR_ERR(rq);
2767 		goto err_obj;
2768 	}
2769 
2770 	rq->batch = i915_vma_get(vma);
2771 	i915_request_get(rq);
2772 
2773 	err = igt_vma_move_to_active_unlocked(vma, rq, 0);
2774 	if (!err)
2775 		err = rq->engine->emit_bb_start(rq,
2776 						i915_vma_offset(vma),
2777 						PAGE_SIZE, 0);
2778 	i915_request_add(rq);
2779 	if (err)
2780 		goto err_rq;
2781 
2782 	i915_gem_object_put(obj);
2783 	intel_context_put(ce);
2784 
2785 	rq->mock.link.next = &(*prev)->mock.link;
2786 	*prev = rq;
2787 	return 0;
2788 
2789 err_rq:
2790 	i915_vma_put(rq->batch);
2791 	i915_request_put(rq);
2792 err_obj:
2793 	i915_gem_object_put(obj);
2794 err_ce:
2795 	intel_context_put(ce);
2796 	return err;
2797 }
2798 
2799 static int __live_preempt_ring(struct intel_engine_cs *engine,
2800 			       struct igt_spinner *spin,
2801 			       int queue_sz, int ring_sz)
2802 {
2803 	struct intel_context *ce[2] = {};
2804 	struct i915_request *rq;
2805 	struct igt_live_test t;
2806 	int err = 0;
2807 	int n;
2808 
2809 	if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2810 		return -EIO;
2811 
2812 	for (n = 0; n < ARRAY_SIZE(ce); n++) {
2813 		struct intel_context *tmp;
2814 
2815 		tmp = intel_context_create(engine);
2816 		if (IS_ERR(tmp)) {
2817 			err = PTR_ERR(tmp);
2818 			goto err_ce;
2819 		}
2820 
2821 		tmp->ring_size = ring_sz;
2822 
2823 		err = intel_context_pin(tmp);
2824 		if (err) {
2825 			intel_context_put(tmp);
2826 			goto err_ce;
2827 		}
2828 
2829 		memset32(tmp->ring->vaddr,
2830 			 0xdeadbeef, /* trigger a hang if executed */
2831 			 tmp->ring->vma->size / sizeof(u32));
2832 
2833 		ce[n] = tmp;
2834 	}
2835 
2836 	rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2837 	if (IS_ERR(rq)) {
2838 		err = PTR_ERR(rq);
2839 		goto err_ce;
2840 	}
2841 
2842 	i915_request_get(rq);
2843 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2844 	i915_request_add(rq);
2845 
2846 	if (!igt_wait_for_spinner(spin, rq)) {
2847 		intel_gt_set_wedged(engine->gt);
2848 		i915_request_put(rq);
2849 		err = -ETIME;
2850 		goto err_ce;
2851 	}
2852 
2853 	/* Fill the ring, until we will cause a wrap */
2854 	n = 0;
2855 	while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2856 		struct i915_request *tmp;
2857 
2858 		tmp = intel_context_create_request(ce[0]);
2859 		if (IS_ERR(tmp)) {
2860 			err = PTR_ERR(tmp);
2861 			i915_request_put(rq);
2862 			goto err_ce;
2863 		}
2864 
2865 		i915_request_add(tmp);
2866 		intel_engine_flush_submission(engine);
2867 		n++;
2868 	}
2869 	intel_engine_flush_submission(engine);
2870 	pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2871 		 engine->name, queue_sz, n,
2872 		 ce[0]->ring->size,
2873 		 ce[0]->ring->tail,
2874 		 ce[0]->ring->emit,
2875 		 rq->tail);
2876 	i915_request_put(rq);
2877 
2878 	/* Create a second request to preempt the first ring */
2879 	rq = intel_context_create_request(ce[1]);
2880 	if (IS_ERR(rq)) {
2881 		err = PTR_ERR(rq);
2882 		goto err_ce;
2883 	}
2884 
2885 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2886 	i915_request_get(rq);
2887 	i915_request_add(rq);
2888 
2889 	err = wait_for_submit(engine, rq, HZ / 2);
2890 	i915_request_put(rq);
2891 	if (err) {
2892 		pr_err("%s: preemption request was not submitted\n",
2893 		       engine->name);
2894 		err = -ETIME;
2895 	}
2896 
2897 	pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2898 		 engine->name,
2899 		 ce[0]->ring->tail, ce[0]->ring->emit,
2900 		 ce[1]->ring->tail, ce[1]->ring->emit);
2901 
2902 err_ce:
2903 	intel_engine_flush_submission(engine);
2904 	igt_spinner_end(spin);
2905 	for (n = 0; n < ARRAY_SIZE(ce); n++) {
2906 		if (IS_ERR_OR_NULL(ce[n]))
2907 			break;
2908 
2909 		intel_context_unpin(ce[n]);
2910 		intel_context_put(ce[n]);
2911 	}
2912 	if (igt_live_test_end(&t))
2913 		err = -EIO;
2914 	return err;
2915 }
2916 
2917 static int live_preempt_ring(void *arg)
2918 {
2919 	struct intel_gt *gt = arg;
2920 	struct intel_engine_cs *engine;
2921 	struct igt_spinner spin;
2922 	enum intel_engine_id id;
2923 	int err = 0;
2924 
2925 	/*
2926 	 * Check that we rollback large chunks of a ring in order to do a
2927 	 * preemption event. Similar to live_unlite_ring, but looking at
2928 	 * ring size rather than the impact of intel_ring_direction().
2929 	 */
2930 
2931 	if (igt_spinner_init(&spin, gt))
2932 		return -ENOMEM;
2933 
2934 	for_each_engine(engine, gt, id) {
2935 		int n;
2936 
2937 		if (!intel_engine_has_preemption(engine))
2938 			continue;
2939 
2940 		if (!intel_engine_can_store_dword(engine))
2941 			continue;
2942 
2943 		st_engine_heartbeat_disable(engine);
2944 
2945 		for (n = 0; n <= 3; n++) {
2946 			err = __live_preempt_ring(engine, &spin,
2947 						  n * SZ_4K / 4, SZ_4K);
2948 			if (err)
2949 				break;
2950 		}
2951 
2952 		st_engine_heartbeat_enable(engine);
2953 		if (err)
2954 			break;
2955 	}
2956 
2957 	igt_spinner_fini(&spin);
2958 	return err;
2959 }
2960 
2961 static int live_preempt_gang(void *arg)
2962 {
2963 	struct intel_gt *gt = arg;
2964 	struct intel_engine_cs *engine;
2965 	enum intel_engine_id id;
2966 
2967 	/*
2968 	 * Build as long a chain of preempters as we can, with each
2969 	 * request higher priority than the last. Once we are ready, we release
2970 	 * the last batch which then precolates down the chain, each releasing
2971 	 * the next oldest in turn. The intent is to simply push as hard as we
2972 	 * can with the number of preemptions, trying to exceed narrow HW
2973 	 * limits. At a minimum, we insist that we can sort all the user
2974 	 * high priority levels into execution order.
2975 	 */
2976 
2977 	for_each_engine(engine, gt, id) {
2978 		struct i915_request *rq = NULL;
2979 		struct igt_live_test t;
2980 		IGT_TIMEOUT(end_time);
2981 		int prio = 0;
2982 		int err = 0;
2983 		u32 *cs;
2984 
2985 		if (!intel_engine_has_preemption(engine))
2986 			continue;
2987 
2988 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2989 			return -EIO;
2990 
2991 		do {
2992 			struct i915_sched_attr attr = { .priority = prio++ };
2993 
2994 			err = create_gang(engine, &rq);
2995 			if (err)
2996 				break;
2997 
2998 			/* Submit each spinner at increasing priority */
2999 			engine->sched_engine->schedule(rq, &attr);
3000 		} while (prio <= I915_PRIORITY_MAX &&
3001 			 !__igt_timeout(end_time, NULL));
3002 		pr_debug("%s: Preempt chain of %d requests\n",
3003 			 engine->name, prio);
3004 
3005 		/*
3006 		 * Such that the last spinner is the highest priority and
3007 		 * should execute first. When that spinner completes,
3008 		 * it will terminate the next lowest spinner until there
3009 		 * are no more spinners and the gang is complete.
3010 		 */
3011 		cs = i915_gem_object_pin_map_unlocked(rq->batch->obj, I915_MAP_WC);
3012 		if (!IS_ERR(cs)) {
3013 			*cs = 0;
3014 			i915_gem_object_unpin_map(rq->batch->obj);
3015 		} else {
3016 			err = PTR_ERR(cs);
3017 			intel_gt_set_wedged(gt);
3018 		}
3019 
3020 		while (rq) { /* wait for each rq from highest to lowest prio */
3021 			struct i915_request *n = list_next_entry(rq, mock.link);
3022 
3023 			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
3024 				struct drm_printer p =
3025 					drm_info_printer(engine->i915->drm.dev);
3026 
3027 				pr_err("Failed to flush chain of %d requests, at %d\n",
3028 				       prio, rq_prio(rq));
3029 				intel_engine_dump(engine, &p,
3030 						  "%s\n", engine->name);
3031 
3032 				err = -ETIME;
3033 			}
3034 
3035 			i915_vma_put(rq->batch);
3036 			i915_request_put(rq);
3037 			rq = n;
3038 		}
3039 
3040 		if (igt_live_test_end(&t))
3041 			err = -EIO;
3042 		if (err)
3043 			return err;
3044 	}
3045 
3046 	return 0;
3047 }
3048 
3049 static struct i915_vma *
3050 create_gpr_user(struct intel_engine_cs *engine,
3051 		struct i915_vma *result,
3052 		unsigned int offset)
3053 {
3054 	struct drm_i915_gem_object *obj;
3055 	struct i915_vma *vma;
3056 	u32 *cs;
3057 	int err;
3058 	int i;
3059 
3060 	obj = i915_gem_object_create_internal(engine->i915, 4096);
3061 	if (IS_ERR(obj))
3062 		return ERR_CAST(obj);
3063 
3064 	vma = i915_vma_instance(obj, result->vm, NULL);
3065 	if (IS_ERR(vma)) {
3066 		i915_gem_object_put(obj);
3067 		return vma;
3068 	}
3069 
3070 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3071 	if (err) {
3072 		i915_vma_put(vma);
3073 		return ERR_PTR(err);
3074 	}
3075 
3076 	cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
3077 	if (IS_ERR(cs)) {
3078 		i915_vma_put(vma);
3079 		return ERR_CAST(cs);
3080 	}
3081 
3082 	/* All GPR are clear for new contexts. We use GPR(0) as a constant */
3083 	*cs++ = MI_LOAD_REGISTER_IMM(1);
3084 	*cs++ = CS_GPR(engine, 0);
3085 	*cs++ = 1;
3086 
3087 	for (i = 1; i < NUM_GPR; i++) {
3088 		u64 addr;
3089 
3090 		/*
3091 		 * Perform: GPR[i]++
3092 		 *
3093 		 * As we read and write into the context saved GPR[i], if
3094 		 * we restart this batch buffer from an earlier point, we
3095 		 * will repeat the increment and store a value > 1.
3096 		 */
3097 		*cs++ = MI_MATH(4);
3098 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3099 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3100 		*cs++ = MI_MATH_ADD;
3101 		*cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3102 
3103 		addr = i915_vma_offset(result) + offset + i * sizeof(*cs);
3104 		*cs++ = MI_STORE_REGISTER_MEM_GEN8;
3105 		*cs++ = CS_GPR(engine, 2 * i);
3106 		*cs++ = lower_32_bits(addr);
3107 		*cs++ = upper_32_bits(addr);
3108 
3109 		*cs++ = MI_SEMAPHORE_WAIT |
3110 			MI_SEMAPHORE_POLL |
3111 			MI_SEMAPHORE_SAD_GTE_SDD;
3112 		*cs++ = i;
3113 		*cs++ = lower_32_bits(i915_vma_offset(result));
3114 		*cs++ = upper_32_bits(i915_vma_offset(result));
3115 	}
3116 
3117 	*cs++ = MI_BATCH_BUFFER_END;
3118 	i915_gem_object_flush_map(obj);
3119 	i915_gem_object_unpin_map(obj);
3120 
3121 	return vma;
3122 }
3123 
3124 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3125 {
3126 	struct drm_i915_gem_object *obj;
3127 	struct i915_vma *vma;
3128 	int err;
3129 
3130 	obj = i915_gem_object_create_internal(gt->i915, sz);
3131 	if (IS_ERR(obj))
3132 		return ERR_CAST(obj);
3133 
3134 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
3135 	if (IS_ERR(vma)) {
3136 		i915_gem_object_put(obj);
3137 		return vma;
3138 	}
3139 
3140 	err = i915_ggtt_pin(vma, NULL, 0, 0);
3141 	if (err) {
3142 		i915_vma_put(vma);
3143 		return ERR_PTR(err);
3144 	}
3145 
3146 	return vma;
3147 }
3148 
3149 static struct i915_request *
3150 create_gpr_client(struct intel_engine_cs *engine,
3151 		  struct i915_vma *global,
3152 		  unsigned int offset)
3153 {
3154 	struct i915_vma *batch, *vma;
3155 	struct intel_context *ce;
3156 	struct i915_request *rq;
3157 	int err;
3158 
3159 	ce = intel_context_create(engine);
3160 	if (IS_ERR(ce))
3161 		return ERR_CAST(ce);
3162 
3163 	vma = i915_vma_instance(global->obj, ce->vm, NULL);
3164 	if (IS_ERR(vma)) {
3165 		err = PTR_ERR(vma);
3166 		goto out_ce;
3167 	}
3168 
3169 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3170 	if (err)
3171 		goto out_ce;
3172 
3173 	batch = create_gpr_user(engine, vma, offset);
3174 	if (IS_ERR(batch)) {
3175 		err = PTR_ERR(batch);
3176 		goto out_vma;
3177 	}
3178 
3179 	rq = intel_context_create_request(ce);
3180 	if (IS_ERR(rq)) {
3181 		err = PTR_ERR(rq);
3182 		goto out_batch;
3183 	}
3184 
3185 	err = igt_vma_move_to_active_unlocked(vma, rq, 0);
3186 
3187 	i915_vma_lock(batch);
3188 	if (!err)
3189 		err = i915_vma_move_to_active(batch, rq, 0);
3190 	if (!err)
3191 		err = rq->engine->emit_bb_start(rq,
3192 						i915_vma_offset(batch),
3193 						PAGE_SIZE, 0);
3194 	i915_vma_unlock(batch);
3195 	i915_vma_unpin(batch);
3196 
3197 	if (!err)
3198 		i915_request_get(rq);
3199 	i915_request_add(rq);
3200 
3201 out_batch:
3202 	i915_vma_put(batch);
3203 out_vma:
3204 	i915_vma_unpin(vma);
3205 out_ce:
3206 	intel_context_put(ce);
3207 	return err ? ERR_PTR(err) : rq;
3208 }
3209 
3210 static int preempt_user(struct intel_engine_cs *engine,
3211 			struct i915_vma *global,
3212 			int id)
3213 {
3214 	struct i915_sched_attr attr = {
3215 		.priority = I915_PRIORITY_MAX
3216 	};
3217 	struct i915_request *rq;
3218 	int err = 0;
3219 	u32 *cs;
3220 
3221 	rq = intel_engine_create_kernel_request(engine);
3222 	if (IS_ERR(rq))
3223 		return PTR_ERR(rq);
3224 
3225 	cs = intel_ring_begin(rq, 4);
3226 	if (IS_ERR(cs)) {
3227 		i915_request_add(rq);
3228 		return PTR_ERR(cs);
3229 	}
3230 
3231 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3232 	*cs++ = i915_ggtt_offset(global);
3233 	*cs++ = 0;
3234 	*cs++ = id;
3235 
3236 	intel_ring_advance(rq, cs);
3237 
3238 	i915_request_get(rq);
3239 	i915_request_add(rq);
3240 
3241 	engine->sched_engine->schedule(rq, &attr);
3242 
3243 	if (i915_request_wait(rq, 0, HZ / 2) < 0)
3244 		err = -ETIME;
3245 	i915_request_put(rq);
3246 
3247 	return err;
3248 }
3249 
3250 static int live_preempt_user(void *arg)
3251 {
3252 	struct intel_gt *gt = arg;
3253 	struct intel_engine_cs *engine;
3254 	struct i915_vma *global;
3255 	enum intel_engine_id id;
3256 	u32 *result;
3257 	int err = 0;
3258 
3259 	/*
3260 	 * In our other tests, we look at preemption in carefully
3261 	 * controlled conditions in the ringbuffer. Since most of the
3262 	 * time is spent in user batches, most of our preemptions naturally
3263 	 * occur there. We want to verify that when we preempt inside a batch
3264 	 * we continue on from the current instruction and do not roll back
3265 	 * to the start, or another earlier arbitration point.
3266 	 *
3267 	 * To verify this, we create a batch which is a mixture of
3268 	 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3269 	 * a few preempting contexts thrown into the mix, we look for any
3270 	 * repeated instructions (which show up as incorrect values).
3271 	 */
3272 
3273 	global = create_global(gt, 4096);
3274 	if (IS_ERR(global))
3275 		return PTR_ERR(global);
3276 
3277 	result = i915_gem_object_pin_map_unlocked(global->obj, I915_MAP_WC);
3278 	if (IS_ERR(result)) {
3279 		i915_vma_unpin_and_release(&global, 0);
3280 		return PTR_ERR(result);
3281 	}
3282 
3283 	for_each_engine(engine, gt, id) {
3284 		struct i915_request *client[3] = {};
3285 		struct igt_live_test t;
3286 		int i;
3287 
3288 		if (!intel_engine_has_preemption(engine))
3289 			continue;
3290 
3291 		if (GRAPHICS_VER(gt->i915) == 8 && engine->class != RENDER_CLASS)
3292 			continue; /* we need per-context GPR */
3293 
3294 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3295 			err = -EIO;
3296 			break;
3297 		}
3298 
3299 		memset(result, 0, 4096);
3300 
3301 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3302 			struct i915_request *rq;
3303 
3304 			rq = create_gpr_client(engine, global,
3305 					       NUM_GPR * i * sizeof(u32));
3306 			if (IS_ERR(rq)) {
3307 				err = PTR_ERR(rq);
3308 				goto end_test;
3309 			}
3310 
3311 			client[i] = rq;
3312 		}
3313 
3314 		/* Continuously preempt the set of 3 running contexts */
3315 		for (i = 1; i <= NUM_GPR; i++) {
3316 			err = preempt_user(engine, global, i);
3317 			if (err)
3318 				goto end_test;
3319 		}
3320 
3321 		if (READ_ONCE(result[0]) != NUM_GPR) {
3322 			pr_err("%s: Failed to release semaphore\n",
3323 			       engine->name);
3324 			err = -EIO;
3325 			goto end_test;
3326 		}
3327 
3328 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3329 			int gpr;
3330 
3331 			if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3332 				err = -ETIME;
3333 				goto end_test;
3334 			}
3335 
3336 			for (gpr = 1; gpr < NUM_GPR; gpr++) {
3337 				if (result[NUM_GPR * i + gpr] != 1) {
3338 					pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3339 					       engine->name,
3340 					       i, gpr, result[NUM_GPR * i + gpr]);
3341 					err = -EINVAL;
3342 					goto end_test;
3343 				}
3344 			}
3345 		}
3346 
3347 end_test:
3348 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3349 			if (!client[i])
3350 				break;
3351 
3352 			i915_request_put(client[i]);
3353 		}
3354 
3355 		/* Flush the semaphores on error */
3356 		smp_store_mb(result[0], -1);
3357 		if (igt_live_test_end(&t))
3358 			err = -EIO;
3359 		if (err)
3360 			break;
3361 	}
3362 
3363 	i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3364 	return err;
3365 }
3366 
3367 static int live_preempt_timeout(void *arg)
3368 {
3369 	struct intel_gt *gt = arg;
3370 	struct i915_gem_context *ctx_hi, *ctx_lo;
3371 	struct igt_spinner spin_lo;
3372 	struct intel_engine_cs *engine;
3373 	enum intel_engine_id id;
3374 	int err = -ENOMEM;
3375 
3376 	/*
3377 	 * Check that we force preemption to occur by cancelling the previous
3378 	 * context if it refuses to yield the GPU.
3379 	 */
3380 	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
3381 		return 0;
3382 
3383 	if (!intel_has_reset_engine(gt))
3384 		return 0;
3385 
3386 	ctx_hi = kernel_context(gt->i915, NULL);
3387 	if (!ctx_hi)
3388 		return -ENOMEM;
3389 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
3390 
3391 	ctx_lo = kernel_context(gt->i915, NULL);
3392 	if (!ctx_lo)
3393 		goto err_ctx_hi;
3394 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
3395 
3396 	if (igt_spinner_init(&spin_lo, gt))
3397 		goto err_ctx_lo;
3398 
3399 	for_each_engine(engine, gt, id) {
3400 		unsigned long saved_timeout;
3401 		struct i915_request *rq;
3402 
3403 		if (!intel_engine_has_preemption(engine))
3404 			continue;
3405 
3406 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3407 					    MI_NOOP); /* preemption disabled */
3408 		if (IS_ERR(rq)) {
3409 			err = PTR_ERR(rq);
3410 			goto err_spin_lo;
3411 		}
3412 
3413 		i915_request_add(rq);
3414 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
3415 			intel_gt_set_wedged(gt);
3416 			err = -EIO;
3417 			goto err_spin_lo;
3418 		}
3419 
3420 		rq = igt_request_alloc(ctx_hi, engine);
3421 		if (IS_ERR(rq)) {
3422 			igt_spinner_end(&spin_lo);
3423 			err = PTR_ERR(rq);
3424 			goto err_spin_lo;
3425 		}
3426 
3427 		/* Flush the previous CS ack before changing timeouts */
3428 		while (READ_ONCE(engine->execlists.pending[0]))
3429 			cpu_relax();
3430 
3431 		saved_timeout = engine->props.preempt_timeout_ms;
3432 		engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffy */
3433 
3434 		i915_request_get(rq);
3435 		i915_request_add(rq);
3436 
3437 		intel_engine_flush_submission(engine);
3438 		engine->props.preempt_timeout_ms = saved_timeout;
3439 
3440 		if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3441 			intel_gt_set_wedged(gt);
3442 			i915_request_put(rq);
3443 			err = -ETIME;
3444 			goto err_spin_lo;
3445 		}
3446 
3447 		igt_spinner_end(&spin_lo);
3448 		i915_request_put(rq);
3449 	}
3450 
3451 	err = 0;
3452 err_spin_lo:
3453 	igt_spinner_fini(&spin_lo);
3454 err_ctx_lo:
3455 	kernel_context_close(ctx_lo);
3456 err_ctx_hi:
3457 	kernel_context_close(ctx_hi);
3458 	return err;
3459 }
3460 
3461 static int random_range(struct rnd_state *rnd, int min, int max)
3462 {
3463 	return i915_prandom_u32_max_state(max - min, rnd) + min;
3464 }
3465 
3466 static int random_priority(struct rnd_state *rnd)
3467 {
3468 	return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3469 }
3470 
3471 struct preempt_smoke {
3472 	struct intel_gt *gt;
3473 	struct kthread_work work;
3474 	struct i915_gem_context **contexts;
3475 	struct intel_engine_cs *engine;
3476 	struct drm_i915_gem_object *batch;
3477 	unsigned int ncontext;
3478 	struct rnd_state prng;
3479 	unsigned long count;
3480 	int result;
3481 };
3482 
3483 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3484 {
3485 	return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3486 							  &smoke->prng)];
3487 }
3488 
3489 static int smoke_submit(struct preempt_smoke *smoke,
3490 			struct i915_gem_context *ctx, int prio,
3491 			struct drm_i915_gem_object *batch)
3492 {
3493 	struct i915_request *rq;
3494 	struct i915_vma *vma = NULL;
3495 	int err = 0;
3496 
3497 	if (batch) {
3498 		struct i915_address_space *vm;
3499 
3500 		vm = i915_gem_context_get_eb_vm(ctx);
3501 		vma = i915_vma_instance(batch, vm, NULL);
3502 		i915_vm_put(vm);
3503 		if (IS_ERR(vma))
3504 			return PTR_ERR(vma);
3505 
3506 		err = i915_vma_pin(vma, 0, 0, PIN_USER);
3507 		if (err)
3508 			return err;
3509 	}
3510 
3511 	ctx->sched.priority = prio;
3512 
3513 	rq = igt_request_alloc(ctx, smoke->engine);
3514 	if (IS_ERR(rq)) {
3515 		err = PTR_ERR(rq);
3516 		goto unpin;
3517 	}
3518 
3519 	if (vma) {
3520 		err = igt_vma_move_to_active_unlocked(vma, rq, 0);
3521 		if (!err)
3522 			err = rq->engine->emit_bb_start(rq,
3523 							i915_vma_offset(vma),
3524 							PAGE_SIZE, 0);
3525 	}
3526 
3527 	i915_request_add(rq);
3528 
3529 unpin:
3530 	if (vma)
3531 		i915_vma_unpin(vma);
3532 
3533 	return err;
3534 }
3535 
3536 static void smoke_crescendo_work(struct kthread_work *work)
3537 {
3538 	struct preempt_smoke *smoke = container_of(work, typeof(*smoke), work);
3539 	IGT_TIMEOUT(end_time);
3540 	unsigned long count;
3541 
3542 	count = 0;
3543 	do {
3544 		struct i915_gem_context *ctx = smoke_context(smoke);
3545 
3546 		smoke->result = smoke_submit(smoke, ctx,
3547 					     count % I915_PRIORITY_MAX,
3548 					     smoke->batch);
3549 
3550 		count++;
3551 	} while (!smoke->result && count < smoke->ncontext &&
3552 		 !__igt_timeout(end_time, NULL));
3553 
3554 	smoke->count = count;
3555 }
3556 
3557 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3558 #define BATCH BIT(0)
3559 {
3560 	struct kthread_worker *worker[I915_NUM_ENGINES] = {};
3561 	struct preempt_smoke *arg;
3562 	struct intel_engine_cs *engine;
3563 	enum intel_engine_id id;
3564 	unsigned long count;
3565 	int err = 0;
3566 
3567 	arg = kmalloc_array(I915_NUM_ENGINES, sizeof(*arg), GFP_KERNEL);
3568 	if (!arg)
3569 		return -ENOMEM;
3570 
3571 	memset(arg, 0, I915_NUM_ENGINES * sizeof(*arg));
3572 
3573 	for_each_engine(engine, smoke->gt, id) {
3574 		arg[id] = *smoke;
3575 		arg[id].engine = engine;
3576 		if (!(flags & BATCH))
3577 			arg[id].batch = NULL;
3578 		arg[id].count = 0;
3579 
3580 		worker[id] = kthread_run_worker(0, "igt/smoke:%d", id);
3581 		if (IS_ERR(worker[id])) {
3582 			err = PTR_ERR(worker[id]);
3583 			break;
3584 		}
3585 
3586 		kthread_init_work(&arg[id].work, smoke_crescendo_work);
3587 		kthread_queue_work(worker[id], &arg[id].work);
3588 	}
3589 
3590 	count = 0;
3591 	for_each_engine(engine, smoke->gt, id) {
3592 		if (IS_ERR_OR_NULL(worker[id]))
3593 			continue;
3594 
3595 		kthread_flush_work(&arg[id].work);
3596 		if (arg[id].result && !err)
3597 			err = arg[id].result;
3598 
3599 		count += arg[id].count;
3600 
3601 		kthread_destroy_worker(worker[id]);
3602 	}
3603 
3604 	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3605 		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3606 
3607 	kfree(arg);
3608 	return 0;
3609 }
3610 
3611 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3612 {
3613 	enum intel_engine_id id;
3614 	IGT_TIMEOUT(end_time);
3615 	unsigned long count;
3616 
3617 	count = 0;
3618 	do {
3619 		for_each_engine(smoke->engine, smoke->gt, id) {
3620 			struct i915_gem_context *ctx = smoke_context(smoke);
3621 			int err;
3622 
3623 			err = smoke_submit(smoke,
3624 					   ctx, random_priority(&smoke->prng),
3625 					   flags & BATCH ? smoke->batch : NULL);
3626 			if (err)
3627 				return err;
3628 
3629 			count++;
3630 		}
3631 	} while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3632 
3633 	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3634 		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3635 	return 0;
3636 }
3637 
3638 static int live_preempt_smoke(void *arg)
3639 {
3640 	struct preempt_smoke smoke = {
3641 		.gt = arg,
3642 		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3643 		.ncontext = 256,
3644 	};
3645 	const unsigned int phase[] = { 0, BATCH };
3646 	struct igt_live_test t;
3647 	int err = -ENOMEM;
3648 	u32 *cs;
3649 	int n;
3650 
3651 	smoke.contexts = kmalloc_array(smoke.ncontext,
3652 				       sizeof(*smoke.contexts),
3653 				       GFP_KERNEL);
3654 	if (!smoke.contexts)
3655 		return -ENOMEM;
3656 
3657 	smoke.batch =
3658 		i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3659 	if (IS_ERR(smoke.batch)) {
3660 		err = PTR_ERR(smoke.batch);
3661 		goto err_free;
3662 	}
3663 
3664 	cs = i915_gem_object_pin_map_unlocked(smoke.batch, I915_MAP_WB);
3665 	if (IS_ERR(cs)) {
3666 		err = PTR_ERR(cs);
3667 		goto err_batch;
3668 	}
3669 	for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3670 		cs[n] = MI_ARB_CHECK;
3671 	cs[n] = MI_BATCH_BUFFER_END;
3672 	i915_gem_object_flush_map(smoke.batch);
3673 	i915_gem_object_unpin_map(smoke.batch);
3674 
3675 	if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3676 		err = -EIO;
3677 		goto err_batch;
3678 	}
3679 
3680 	for (n = 0; n < smoke.ncontext; n++) {
3681 		smoke.contexts[n] = kernel_context(smoke.gt->i915, NULL);
3682 		if (!smoke.contexts[n])
3683 			goto err_ctx;
3684 	}
3685 
3686 	for (n = 0; n < ARRAY_SIZE(phase); n++) {
3687 		err = smoke_crescendo(&smoke, phase[n]);
3688 		if (err)
3689 			goto err_ctx;
3690 
3691 		err = smoke_random(&smoke, phase[n]);
3692 		if (err)
3693 			goto err_ctx;
3694 	}
3695 
3696 err_ctx:
3697 	if (igt_live_test_end(&t))
3698 		err = -EIO;
3699 
3700 	for (n = 0; n < smoke.ncontext; n++) {
3701 		if (!smoke.contexts[n])
3702 			break;
3703 		kernel_context_close(smoke.contexts[n]);
3704 	}
3705 
3706 err_batch:
3707 	i915_gem_object_put(smoke.batch);
3708 err_free:
3709 	kfree(smoke.contexts);
3710 
3711 	return err;
3712 }
3713 
3714 static int nop_virtual_engine(struct intel_gt *gt,
3715 			      struct intel_engine_cs **siblings,
3716 			      unsigned int nsibling,
3717 			      unsigned int nctx,
3718 			      unsigned int flags)
3719 #define CHAIN BIT(0)
3720 {
3721 	IGT_TIMEOUT(end_time);
3722 	struct i915_request *request[16] = {};
3723 	struct intel_context *ve[16];
3724 	unsigned long n, prime, nc;
3725 	struct igt_live_test t;
3726 	ktime_t times[2] = {};
3727 	int err;
3728 
3729 	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3730 
3731 	for (n = 0; n < nctx; n++) {
3732 		ve[n] = intel_engine_create_virtual(siblings, nsibling, 0);
3733 		if (IS_ERR(ve[n])) {
3734 			err = PTR_ERR(ve[n]);
3735 			nctx = n;
3736 			goto out;
3737 		}
3738 
3739 		err = intel_context_pin(ve[n]);
3740 		if (err) {
3741 			intel_context_put(ve[n]);
3742 			nctx = n;
3743 			goto out;
3744 		}
3745 	}
3746 
3747 	err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3748 	if (err)
3749 		goto out;
3750 
3751 	for_each_prime_number_from(prime, 1, 8192) {
3752 		times[1] = ktime_get_raw();
3753 
3754 		if (flags & CHAIN) {
3755 			for (nc = 0; nc < nctx; nc++) {
3756 				for (n = 0; n < prime; n++) {
3757 					struct i915_request *rq;
3758 
3759 					rq = i915_request_create(ve[nc]);
3760 					if (IS_ERR(rq)) {
3761 						err = PTR_ERR(rq);
3762 						goto out;
3763 					}
3764 
3765 					if (request[nc])
3766 						i915_request_put(request[nc]);
3767 					request[nc] = i915_request_get(rq);
3768 					i915_request_add(rq);
3769 				}
3770 			}
3771 		} else {
3772 			for (n = 0; n < prime; n++) {
3773 				for (nc = 0; nc < nctx; nc++) {
3774 					struct i915_request *rq;
3775 
3776 					rq = i915_request_create(ve[nc]);
3777 					if (IS_ERR(rq)) {
3778 						err = PTR_ERR(rq);
3779 						goto out;
3780 					}
3781 
3782 					if (request[nc])
3783 						i915_request_put(request[nc]);
3784 					request[nc] = i915_request_get(rq);
3785 					i915_request_add(rq);
3786 				}
3787 			}
3788 		}
3789 
3790 		for (nc = 0; nc < nctx; nc++) {
3791 			if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3792 				pr_err("%s(%s): wait for %llx:%lld timed out\n",
3793 				       __func__, ve[0]->engine->name,
3794 				       request[nc]->fence.context,
3795 				       request[nc]->fence.seqno);
3796 
3797 				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3798 					  __func__, ve[0]->engine->name,
3799 					  request[nc]->fence.context,
3800 					  request[nc]->fence.seqno);
3801 				GEM_TRACE_DUMP();
3802 				intel_gt_set_wedged(gt);
3803 				break;
3804 			}
3805 		}
3806 
3807 		times[1] = ktime_sub(ktime_get_raw(), times[1]);
3808 		if (prime == 1)
3809 			times[0] = times[1];
3810 
3811 		for (nc = 0; nc < nctx; nc++) {
3812 			i915_request_put(request[nc]);
3813 			request[nc] = NULL;
3814 		}
3815 
3816 		if (__igt_timeout(end_time, NULL))
3817 			break;
3818 	}
3819 
3820 	err = igt_live_test_end(&t);
3821 	if (err)
3822 		goto out;
3823 
3824 	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3825 		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3826 		prime, div64_u64(ktime_to_ns(times[1]), prime));
3827 
3828 out:
3829 	if (igt_flush_test(gt->i915))
3830 		err = -EIO;
3831 
3832 	for (nc = 0; nc < nctx; nc++) {
3833 		i915_request_put(request[nc]);
3834 		intel_context_unpin(ve[nc]);
3835 		intel_context_put(ve[nc]);
3836 	}
3837 	return err;
3838 }
3839 
3840 static unsigned int
3841 __select_siblings(struct intel_gt *gt,
3842 		  unsigned int class,
3843 		  struct intel_engine_cs **siblings,
3844 		  bool (*filter)(const struct intel_engine_cs *))
3845 {
3846 	unsigned int n = 0;
3847 	unsigned int inst;
3848 
3849 	for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3850 		if (!gt->engine_class[class][inst])
3851 			continue;
3852 
3853 		if (filter && !filter(gt->engine_class[class][inst]))
3854 			continue;
3855 
3856 		siblings[n++] = gt->engine_class[class][inst];
3857 	}
3858 
3859 	return n;
3860 }
3861 
3862 static unsigned int
3863 select_siblings(struct intel_gt *gt,
3864 		unsigned int class,
3865 		struct intel_engine_cs **siblings)
3866 {
3867 	return __select_siblings(gt, class, siblings, NULL);
3868 }
3869 
3870 static int live_virtual_engine(void *arg)
3871 {
3872 	struct intel_gt *gt = arg;
3873 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3874 	struct intel_engine_cs *engine;
3875 	enum intel_engine_id id;
3876 	unsigned int class;
3877 	int err;
3878 
3879 	if (intel_uc_uses_guc_submission(&gt->uc))
3880 		return 0;
3881 
3882 	for_each_engine(engine, gt, id) {
3883 		err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3884 		if (err) {
3885 			pr_err("Failed to wrap engine %s: err=%d\n",
3886 			       engine->name, err);
3887 			return err;
3888 		}
3889 	}
3890 
3891 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3892 		int nsibling, n;
3893 
3894 		nsibling = select_siblings(gt, class, siblings);
3895 		if (nsibling < 2)
3896 			continue;
3897 
3898 		for (n = 1; n <= nsibling + 1; n++) {
3899 			err = nop_virtual_engine(gt, siblings, nsibling,
3900 						 n, 0);
3901 			if (err)
3902 				return err;
3903 		}
3904 
3905 		err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3906 		if (err)
3907 			return err;
3908 	}
3909 
3910 	return 0;
3911 }
3912 
3913 static int mask_virtual_engine(struct intel_gt *gt,
3914 			       struct intel_engine_cs **siblings,
3915 			       unsigned int nsibling)
3916 {
3917 	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3918 	struct intel_context *ve;
3919 	struct igt_live_test t;
3920 	unsigned int n;
3921 	int err;
3922 
3923 	/*
3924 	 * Check that by setting the execution mask on a request, we can
3925 	 * restrict it to our desired engine within the virtual engine.
3926 	 */
3927 
3928 	ve = intel_engine_create_virtual(siblings, nsibling, 0);
3929 	if (IS_ERR(ve)) {
3930 		err = PTR_ERR(ve);
3931 		goto out_close;
3932 	}
3933 
3934 	err = intel_context_pin(ve);
3935 	if (err)
3936 		goto out_put;
3937 
3938 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3939 	if (err)
3940 		goto out_unpin;
3941 
3942 	for (n = 0; n < nsibling; n++) {
3943 		request[n] = i915_request_create(ve);
3944 		if (IS_ERR(request[n])) {
3945 			err = PTR_ERR(request[n]);
3946 			nsibling = n;
3947 			goto out;
3948 		}
3949 
3950 		/* Reverse order as it's more likely to be unnatural */
3951 		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3952 
3953 		i915_request_get(request[n]);
3954 		i915_request_add(request[n]);
3955 	}
3956 
3957 	for (n = 0; n < nsibling; n++) {
3958 		if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3959 			pr_err("%s(%s): wait for %llx:%lld timed out\n",
3960 			       __func__, ve->engine->name,
3961 			       request[n]->fence.context,
3962 			       request[n]->fence.seqno);
3963 
3964 			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3965 				  __func__, ve->engine->name,
3966 				  request[n]->fence.context,
3967 				  request[n]->fence.seqno);
3968 			GEM_TRACE_DUMP();
3969 			intel_gt_set_wedged(gt);
3970 			err = -EIO;
3971 			goto out;
3972 		}
3973 
3974 		if (request[n]->engine != siblings[nsibling - n - 1]) {
3975 			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3976 			       request[n]->engine->name,
3977 			       siblings[nsibling - n - 1]->name);
3978 			err = -EINVAL;
3979 			goto out;
3980 		}
3981 	}
3982 
3983 	err = igt_live_test_end(&t);
3984 out:
3985 	if (igt_flush_test(gt->i915))
3986 		err = -EIO;
3987 
3988 	for (n = 0; n < nsibling; n++)
3989 		i915_request_put(request[n]);
3990 
3991 out_unpin:
3992 	intel_context_unpin(ve);
3993 out_put:
3994 	intel_context_put(ve);
3995 out_close:
3996 	return err;
3997 }
3998 
3999 static int live_virtual_mask(void *arg)
4000 {
4001 	struct intel_gt *gt = arg;
4002 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4003 	unsigned int class;
4004 	int err;
4005 
4006 	if (intel_uc_uses_guc_submission(&gt->uc))
4007 		return 0;
4008 
4009 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4010 		unsigned int nsibling;
4011 
4012 		nsibling = select_siblings(gt, class, siblings);
4013 		if (nsibling < 2)
4014 			continue;
4015 
4016 		err = mask_virtual_engine(gt, siblings, nsibling);
4017 		if (err)
4018 			return err;
4019 	}
4020 
4021 	return 0;
4022 }
4023 
4024 static int slicein_virtual_engine(struct intel_gt *gt,
4025 				  struct intel_engine_cs **siblings,
4026 				  unsigned int nsibling)
4027 {
4028 	const long timeout = slice_timeout(siblings[0]);
4029 	struct intel_context *ce;
4030 	struct i915_request *rq;
4031 	struct igt_spinner spin;
4032 	unsigned int n;
4033 	int err = 0;
4034 
4035 	/*
4036 	 * Virtual requests must take part in timeslicing on the target engines.
4037 	 */
4038 
4039 	if (igt_spinner_init(&spin, gt))
4040 		return -ENOMEM;
4041 
4042 	for (n = 0; n < nsibling; n++) {
4043 		ce = intel_context_create(siblings[n]);
4044 		if (IS_ERR(ce)) {
4045 			err = PTR_ERR(ce);
4046 			goto out;
4047 		}
4048 
4049 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4050 		intel_context_put(ce);
4051 		if (IS_ERR(rq)) {
4052 			err = PTR_ERR(rq);
4053 			goto out;
4054 		}
4055 
4056 		i915_request_add(rq);
4057 	}
4058 
4059 	ce = intel_engine_create_virtual(siblings, nsibling, 0);
4060 	if (IS_ERR(ce)) {
4061 		err = PTR_ERR(ce);
4062 		goto out;
4063 	}
4064 
4065 	rq = intel_context_create_request(ce);
4066 	intel_context_put(ce);
4067 	if (IS_ERR(rq)) {
4068 		err = PTR_ERR(rq);
4069 		goto out;
4070 	}
4071 
4072 	i915_request_get(rq);
4073 	i915_request_add(rq);
4074 	if (i915_request_wait(rq, 0, timeout) < 0) {
4075 		GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4076 			      __func__, rq->engine->name);
4077 		GEM_TRACE_DUMP();
4078 		intel_gt_set_wedged(gt);
4079 		err = -EIO;
4080 	}
4081 	i915_request_put(rq);
4082 
4083 out:
4084 	igt_spinner_end(&spin);
4085 	if (igt_flush_test(gt->i915))
4086 		err = -EIO;
4087 	igt_spinner_fini(&spin);
4088 	return err;
4089 }
4090 
4091 static int sliceout_virtual_engine(struct intel_gt *gt,
4092 				   struct intel_engine_cs **siblings,
4093 				   unsigned int nsibling)
4094 {
4095 	const long timeout = slice_timeout(siblings[0]);
4096 	struct intel_context *ce;
4097 	struct i915_request *rq;
4098 	struct igt_spinner spin;
4099 	unsigned int n;
4100 	int err = 0;
4101 
4102 	/*
4103 	 * Virtual requests must allow others a fair timeslice.
4104 	 */
4105 
4106 	if (igt_spinner_init(&spin, gt))
4107 		return -ENOMEM;
4108 
4109 	/* XXX We do not handle oversubscription and fairness with normal rq */
4110 	for (n = 0; n < nsibling; n++) {
4111 		ce = intel_engine_create_virtual(siblings, nsibling, 0);
4112 		if (IS_ERR(ce)) {
4113 			err = PTR_ERR(ce);
4114 			goto out;
4115 		}
4116 
4117 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4118 		intel_context_put(ce);
4119 		if (IS_ERR(rq)) {
4120 			err = PTR_ERR(rq);
4121 			goto out;
4122 		}
4123 
4124 		i915_request_add(rq);
4125 	}
4126 
4127 	for (n = 0; !err && n < nsibling; n++) {
4128 		ce = intel_context_create(siblings[n]);
4129 		if (IS_ERR(ce)) {
4130 			err = PTR_ERR(ce);
4131 			goto out;
4132 		}
4133 
4134 		rq = intel_context_create_request(ce);
4135 		intel_context_put(ce);
4136 		if (IS_ERR(rq)) {
4137 			err = PTR_ERR(rq);
4138 			goto out;
4139 		}
4140 
4141 		i915_request_get(rq);
4142 		i915_request_add(rq);
4143 		if (i915_request_wait(rq, 0, timeout) < 0) {
4144 			GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4145 				      __func__, siblings[n]->name);
4146 			GEM_TRACE_DUMP();
4147 			intel_gt_set_wedged(gt);
4148 			err = -EIO;
4149 		}
4150 		i915_request_put(rq);
4151 	}
4152 
4153 out:
4154 	igt_spinner_end(&spin);
4155 	if (igt_flush_test(gt->i915))
4156 		err = -EIO;
4157 	igt_spinner_fini(&spin);
4158 	return err;
4159 }
4160 
4161 static int live_virtual_slice(void *arg)
4162 {
4163 	struct intel_gt *gt = arg;
4164 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4165 	unsigned int class;
4166 	int err;
4167 
4168 	if (intel_uc_uses_guc_submission(&gt->uc))
4169 		return 0;
4170 
4171 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4172 		unsigned int nsibling;
4173 
4174 		nsibling = __select_siblings(gt, class, siblings,
4175 					     intel_engine_has_timeslices);
4176 		if (nsibling < 2)
4177 			continue;
4178 
4179 		err = slicein_virtual_engine(gt, siblings, nsibling);
4180 		if (err)
4181 			return err;
4182 
4183 		err = sliceout_virtual_engine(gt, siblings, nsibling);
4184 		if (err)
4185 			return err;
4186 	}
4187 
4188 	return 0;
4189 }
4190 
4191 static int preserved_virtual_engine(struct intel_gt *gt,
4192 				    struct intel_engine_cs **siblings,
4193 				    unsigned int nsibling)
4194 {
4195 	struct i915_request *last = NULL;
4196 	struct intel_context *ve;
4197 	struct i915_vma *scratch;
4198 	struct igt_live_test t;
4199 	unsigned int n;
4200 	int err = 0;
4201 	u32 *cs;
4202 
4203 	scratch =
4204 		__vm_create_scratch_for_read_pinned(&siblings[0]->gt->ggtt->vm,
4205 						    PAGE_SIZE);
4206 	if (IS_ERR(scratch))
4207 		return PTR_ERR(scratch);
4208 
4209 	err = i915_vma_sync(scratch);
4210 	if (err)
4211 		goto out_scratch;
4212 
4213 	ve = intel_engine_create_virtual(siblings, nsibling, 0);
4214 	if (IS_ERR(ve)) {
4215 		err = PTR_ERR(ve);
4216 		goto out_scratch;
4217 	}
4218 
4219 	err = intel_context_pin(ve);
4220 	if (err)
4221 		goto out_put;
4222 
4223 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4224 	if (err)
4225 		goto out_unpin;
4226 
4227 	for (n = 0; n < NUM_GPR_DW; n++) {
4228 		struct intel_engine_cs *engine = siblings[n % nsibling];
4229 		struct i915_request *rq;
4230 
4231 		rq = i915_request_create(ve);
4232 		if (IS_ERR(rq)) {
4233 			err = PTR_ERR(rq);
4234 			goto out_end;
4235 		}
4236 
4237 		i915_request_put(last);
4238 		last = i915_request_get(rq);
4239 
4240 		cs = intel_ring_begin(rq, 8);
4241 		if (IS_ERR(cs)) {
4242 			i915_request_add(rq);
4243 			err = PTR_ERR(cs);
4244 			goto out_end;
4245 		}
4246 
4247 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4248 		*cs++ = CS_GPR(engine, n);
4249 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4250 		*cs++ = 0;
4251 
4252 		*cs++ = MI_LOAD_REGISTER_IMM(1);
4253 		*cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4254 		*cs++ = n + 1;
4255 
4256 		*cs++ = MI_NOOP;
4257 		intel_ring_advance(rq, cs);
4258 
4259 		/* Restrict this request to run on a particular engine */
4260 		rq->execution_mask = engine->mask;
4261 		i915_request_add(rq);
4262 	}
4263 
4264 	if (i915_request_wait(last, 0, HZ / 5) < 0) {
4265 		err = -ETIME;
4266 		goto out_end;
4267 	}
4268 
4269 	cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
4270 	if (IS_ERR(cs)) {
4271 		err = PTR_ERR(cs);
4272 		goto out_end;
4273 	}
4274 
4275 	for (n = 0; n < NUM_GPR_DW; n++) {
4276 		if (cs[n] != n) {
4277 			pr_err("Incorrect value[%d] found for GPR[%d]\n",
4278 			       cs[n], n);
4279 			err = -EINVAL;
4280 			break;
4281 		}
4282 	}
4283 
4284 	i915_gem_object_unpin_map(scratch->obj);
4285 
4286 out_end:
4287 	if (igt_live_test_end(&t))
4288 		err = -EIO;
4289 	i915_request_put(last);
4290 out_unpin:
4291 	intel_context_unpin(ve);
4292 out_put:
4293 	intel_context_put(ve);
4294 out_scratch:
4295 	i915_vma_unpin_and_release(&scratch, 0);
4296 	return err;
4297 }
4298 
4299 static int live_virtual_preserved(void *arg)
4300 {
4301 	struct intel_gt *gt = arg;
4302 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4303 	unsigned int class;
4304 
4305 	/*
4306 	 * Check that the context image retains non-privileged (user) registers
4307 	 * from one engine to the next. For this we check that the CS_GPR
4308 	 * are preserved.
4309 	 */
4310 
4311 	if (intel_uc_uses_guc_submission(&gt->uc))
4312 		return 0;
4313 
4314 	/* As we use CS_GPR we cannot run before they existed on all engines. */
4315 	if (GRAPHICS_VER(gt->i915) < 9)
4316 		return 0;
4317 
4318 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4319 		int nsibling, err;
4320 
4321 		nsibling = select_siblings(gt, class, siblings);
4322 		if (nsibling < 2)
4323 			continue;
4324 
4325 		err = preserved_virtual_engine(gt, siblings, nsibling);
4326 		if (err)
4327 			return err;
4328 	}
4329 
4330 	return 0;
4331 }
4332 
4333 static int reset_virtual_engine(struct intel_gt *gt,
4334 				struct intel_engine_cs **siblings,
4335 				unsigned int nsibling)
4336 {
4337 	struct intel_engine_cs *engine;
4338 	struct intel_context *ve;
4339 	struct igt_spinner spin;
4340 	struct i915_request *rq;
4341 	unsigned int n;
4342 	int err = 0;
4343 
4344 	/*
4345 	 * In order to support offline error capture for fast preempt reset,
4346 	 * we need to decouple the guilty request and ensure that it and its
4347 	 * descendents are not executed while the capture is in progress.
4348 	 */
4349 
4350 	if (igt_spinner_init(&spin, gt))
4351 		return -ENOMEM;
4352 
4353 	ve = intel_engine_create_virtual(siblings, nsibling, 0);
4354 	if (IS_ERR(ve)) {
4355 		err = PTR_ERR(ve);
4356 		goto out_spin;
4357 	}
4358 
4359 	for (n = 0; n < nsibling; n++)
4360 		st_engine_heartbeat_disable(siblings[n]);
4361 
4362 	rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4363 	if (IS_ERR(rq)) {
4364 		err = PTR_ERR(rq);
4365 		goto out_heartbeat;
4366 	}
4367 	i915_request_add(rq);
4368 
4369 	if (!igt_wait_for_spinner(&spin, rq)) {
4370 		intel_gt_set_wedged(gt);
4371 		err = -ETIME;
4372 		goto out_heartbeat;
4373 	}
4374 
4375 	engine = rq->engine;
4376 	GEM_BUG_ON(engine == ve->engine);
4377 
4378 	/* Take ownership of the reset and tasklet */
4379 	err = engine_lock_reset_tasklet(engine);
4380 	if (err)
4381 		goto out_heartbeat;
4382 
4383 	engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet);
4384 	GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4385 
4386 	/* Fake a preemption event; failed of course */
4387 	spin_lock_irq(&engine->sched_engine->lock);
4388 	__unwind_incomplete_requests(engine);
4389 	spin_unlock_irq(&engine->sched_engine->lock);
4390 	GEM_BUG_ON(rq->engine != engine);
4391 
4392 	/* Reset the engine while keeping our active request on hold */
4393 	execlists_hold(engine, rq);
4394 	GEM_BUG_ON(!i915_request_on_hold(rq));
4395 
4396 	__intel_engine_reset_bh(engine, NULL);
4397 	GEM_BUG_ON(rq->fence.error != -EIO);
4398 
4399 	/* Release our grasp on the engine, letting CS flow again */
4400 	engine_unlock_reset_tasklet(engine);
4401 
4402 	/* Check that we do not resubmit the held request */
4403 	i915_request_get(rq);
4404 	if (!i915_request_wait(rq, 0, HZ / 5)) {
4405 		pr_err("%s: on hold request completed!\n",
4406 		       engine->name);
4407 		intel_gt_set_wedged(gt);
4408 		err = -EIO;
4409 		goto out_rq;
4410 	}
4411 	GEM_BUG_ON(!i915_request_on_hold(rq));
4412 
4413 	/* But is resubmitted on release */
4414 	execlists_unhold(engine, rq);
4415 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4416 		pr_err("%s: held request did not complete!\n",
4417 		       engine->name);
4418 		intel_gt_set_wedged(gt);
4419 		err = -ETIME;
4420 	}
4421 
4422 out_rq:
4423 	i915_request_put(rq);
4424 out_heartbeat:
4425 	for (n = 0; n < nsibling; n++)
4426 		st_engine_heartbeat_enable(siblings[n]);
4427 
4428 	intel_context_put(ve);
4429 out_spin:
4430 	igt_spinner_fini(&spin);
4431 	return err;
4432 }
4433 
4434 static int live_virtual_reset(void *arg)
4435 {
4436 	struct intel_gt *gt = arg;
4437 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4438 	unsigned int class;
4439 
4440 	/*
4441 	 * Check that we handle a reset event within a virtual engine.
4442 	 * Only the physical engine is reset, but we have to check the flow
4443 	 * of the virtual requests around the reset, and make sure it is not
4444 	 * forgotten.
4445 	 */
4446 
4447 	if (intel_uc_uses_guc_submission(&gt->uc))
4448 		return 0;
4449 
4450 	if (!intel_has_reset_engine(gt))
4451 		return 0;
4452 
4453 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4454 		int nsibling, err;
4455 
4456 		nsibling = select_siblings(gt, class, siblings);
4457 		if (nsibling < 2)
4458 			continue;
4459 
4460 		err = reset_virtual_engine(gt, siblings, nsibling);
4461 		if (err)
4462 			return err;
4463 	}
4464 
4465 	return 0;
4466 }
4467 
4468 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4469 {
4470 	static const struct i915_subtest tests[] = {
4471 		SUBTEST(live_sanitycheck),
4472 		SUBTEST(live_unlite_switch),
4473 		SUBTEST(live_unlite_preempt),
4474 		SUBTEST(live_unlite_ring),
4475 		SUBTEST(live_pin_rewind),
4476 		SUBTEST(live_hold_reset),
4477 		SUBTEST(live_error_interrupt),
4478 		SUBTEST(live_timeslice_preempt),
4479 		SUBTEST(live_timeslice_rewind),
4480 		SUBTEST(live_timeslice_queue),
4481 		SUBTEST(live_timeslice_nopreempt),
4482 		SUBTEST(live_busywait_preempt),
4483 		SUBTEST(live_preempt),
4484 		SUBTEST(live_late_preempt),
4485 		SUBTEST(live_nopreempt),
4486 		SUBTEST(live_preempt_cancel),
4487 		SUBTEST(live_suppress_self_preempt),
4488 		SUBTEST(live_chain_preempt),
4489 		SUBTEST(live_preempt_ring),
4490 		SUBTEST(live_preempt_gang),
4491 		SUBTEST(live_preempt_timeout),
4492 		SUBTEST(live_preempt_user),
4493 		SUBTEST(live_preempt_smoke),
4494 		SUBTEST(live_virtual_engine),
4495 		SUBTEST(live_virtual_mask),
4496 		SUBTEST(live_virtual_preserved),
4497 		SUBTEST(live_virtual_slice),
4498 		SUBTEST(live_virtual_reset),
4499 	};
4500 
4501 	if (to_gt(i915)->submission_method != INTEL_SUBMISSION_ELSP)
4502 		return 0;
4503 
4504 	if (intel_gt_is_wedged(to_gt(i915)))
4505 		return 0;
4506 
4507 	return intel_gt_live_subtests(tests, to_gt(i915));
4508 }
4509