xref: /linux/drivers/gpu/drm/i915/gt/selftest_execlists.c (revision a3cbcadfdfc330c28a45f06e8f92fd1d59aafa19)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2018 Intel Corporation
4  */
5 
6 #include <linux/prime_numbers.h>
7 
8 #include "gem/i915_gem_pm.h"
9 #include "gt/intel_engine_heartbeat.h"
10 #include "gt/intel_reset.h"
11 #include "gt/selftest_engine_heartbeat.h"
12 
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
19 
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
22 
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24 #define NUM_GPR 16
25 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
26 
27 static bool is_active(struct i915_request *rq)
28 {
29 	if (i915_request_is_active(rq))
30 		return true;
31 
32 	if (i915_request_on_hold(rq))
33 		return true;
34 
35 	if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
36 		return true;
37 
38 	return false;
39 }
40 
41 static int wait_for_submit(struct intel_engine_cs *engine,
42 			   struct i915_request *rq,
43 			   unsigned long timeout)
44 {
45 	/* Ignore our own attempts to suppress excess tasklets */
46 	tasklet_hi_schedule(&engine->execlists.tasklet);
47 
48 	timeout += jiffies;
49 	do {
50 		bool done = time_after(jiffies, timeout);
51 
52 		if (i915_request_completed(rq)) /* that was quick! */
53 			return 0;
54 
55 		/* Wait until the HW has acknowleged the submission (or err) */
56 		intel_engine_flush_submission(engine);
57 		if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
58 			return 0;
59 
60 		if (done)
61 			return -ETIME;
62 
63 		cond_resched();
64 	} while (1);
65 }
66 
67 static int wait_for_reset(struct intel_engine_cs *engine,
68 			  struct i915_request *rq,
69 			  unsigned long timeout)
70 {
71 	timeout += jiffies;
72 
73 	do {
74 		cond_resched();
75 		intel_engine_flush_submission(engine);
76 
77 		if (READ_ONCE(engine->execlists.pending[0]))
78 			continue;
79 
80 		if (i915_request_completed(rq))
81 			break;
82 
83 		if (READ_ONCE(rq->fence.error))
84 			break;
85 	} while (time_before(jiffies, timeout));
86 
87 	flush_scheduled_work();
88 
89 	if (rq->fence.error != -EIO) {
90 		pr_err("%s: hanging request %llx:%lld not reset\n",
91 		       engine->name,
92 		       rq->fence.context,
93 		       rq->fence.seqno);
94 		return -EINVAL;
95 	}
96 
97 	/* Give the request a jiffie to complete after flushing the worker */
98 	if (i915_request_wait(rq, 0,
99 			      max(0l, (long)(timeout - jiffies)) + 1) < 0) {
100 		pr_err("%s: hanging request %llx:%lld did not complete\n",
101 		       engine->name,
102 		       rq->fence.context,
103 		       rq->fence.seqno);
104 		return -ETIME;
105 	}
106 
107 	return 0;
108 }
109 
110 static int live_sanitycheck(void *arg)
111 {
112 	struct intel_gt *gt = arg;
113 	struct intel_engine_cs *engine;
114 	enum intel_engine_id id;
115 	struct igt_spinner spin;
116 	int err = 0;
117 
118 	if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
119 		return 0;
120 
121 	if (igt_spinner_init(&spin, gt))
122 		return -ENOMEM;
123 
124 	for_each_engine(engine, gt, id) {
125 		struct intel_context *ce;
126 		struct i915_request *rq;
127 
128 		ce = intel_context_create(engine);
129 		if (IS_ERR(ce)) {
130 			err = PTR_ERR(ce);
131 			break;
132 		}
133 
134 		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
135 		if (IS_ERR(rq)) {
136 			err = PTR_ERR(rq);
137 			goto out_ctx;
138 		}
139 
140 		i915_request_add(rq);
141 		if (!igt_wait_for_spinner(&spin, rq)) {
142 			GEM_TRACE("spinner failed to start\n");
143 			GEM_TRACE_DUMP();
144 			intel_gt_set_wedged(gt);
145 			err = -EIO;
146 			goto out_ctx;
147 		}
148 
149 		igt_spinner_end(&spin);
150 		if (igt_flush_test(gt->i915)) {
151 			err = -EIO;
152 			goto out_ctx;
153 		}
154 
155 out_ctx:
156 		intel_context_put(ce);
157 		if (err)
158 			break;
159 	}
160 
161 	igt_spinner_fini(&spin);
162 	return err;
163 }
164 
165 static int live_unlite_restore(struct intel_gt *gt, int prio)
166 {
167 	struct intel_engine_cs *engine;
168 	enum intel_engine_id id;
169 	struct igt_spinner spin;
170 	int err = -ENOMEM;
171 
172 	/*
173 	 * Check that we can correctly context switch between 2 instances
174 	 * on the same engine from the same parent context.
175 	 */
176 
177 	if (igt_spinner_init(&spin, gt))
178 		return err;
179 
180 	err = 0;
181 	for_each_engine(engine, gt, id) {
182 		struct intel_context *ce[2] = {};
183 		struct i915_request *rq[2];
184 		struct igt_live_test t;
185 		int n;
186 
187 		if (prio && !intel_engine_has_preemption(engine))
188 			continue;
189 
190 		if (!intel_engine_can_store_dword(engine))
191 			continue;
192 
193 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
194 			err = -EIO;
195 			break;
196 		}
197 		st_engine_heartbeat_disable(engine);
198 
199 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
200 			struct intel_context *tmp;
201 
202 			tmp = intel_context_create(engine);
203 			if (IS_ERR(tmp)) {
204 				err = PTR_ERR(tmp);
205 				goto err_ce;
206 			}
207 
208 			err = intel_context_pin(tmp);
209 			if (err) {
210 				intel_context_put(tmp);
211 				goto err_ce;
212 			}
213 
214 			/*
215 			 * Setup the pair of contexts such that if we
216 			 * lite-restore using the RING_TAIL from ce[1] it
217 			 * will execute garbage from ce[0]->ring.
218 			 */
219 			memset(tmp->ring->vaddr,
220 			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
221 			       tmp->ring->vma->size);
222 
223 			ce[n] = tmp;
224 		}
225 		GEM_BUG_ON(!ce[1]->ring->size);
226 		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
227 		lrc_update_regs(ce[1], engine, ce[1]->ring->head);
228 
229 		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
230 		if (IS_ERR(rq[0])) {
231 			err = PTR_ERR(rq[0]);
232 			goto err_ce;
233 		}
234 
235 		i915_request_get(rq[0]);
236 		i915_request_add(rq[0]);
237 		GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
238 
239 		if (!igt_wait_for_spinner(&spin, rq[0])) {
240 			i915_request_put(rq[0]);
241 			goto err_ce;
242 		}
243 
244 		rq[1] = i915_request_create(ce[1]);
245 		if (IS_ERR(rq[1])) {
246 			err = PTR_ERR(rq[1]);
247 			i915_request_put(rq[0]);
248 			goto err_ce;
249 		}
250 
251 		if (!prio) {
252 			/*
253 			 * Ensure we do the switch to ce[1] on completion.
254 			 *
255 			 * rq[0] is already submitted, so this should reduce
256 			 * to a no-op (a wait on a request on the same engine
257 			 * uses the submit fence, not the completion fence),
258 			 * but it will install a dependency on rq[1] for rq[0]
259 			 * that will prevent the pair being reordered by
260 			 * timeslicing.
261 			 */
262 			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
263 		}
264 
265 		i915_request_get(rq[1]);
266 		i915_request_add(rq[1]);
267 		GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
268 		i915_request_put(rq[0]);
269 
270 		if (prio) {
271 			struct i915_sched_attr attr = {
272 				.priority = prio,
273 			};
274 
275 			/* Alternatively preempt the spinner with ce[1] */
276 			engine->schedule(rq[1], &attr);
277 		}
278 
279 		/* And switch back to ce[0] for good measure */
280 		rq[0] = i915_request_create(ce[0]);
281 		if (IS_ERR(rq[0])) {
282 			err = PTR_ERR(rq[0]);
283 			i915_request_put(rq[1]);
284 			goto err_ce;
285 		}
286 
287 		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
288 		i915_request_get(rq[0]);
289 		i915_request_add(rq[0]);
290 		GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
291 		i915_request_put(rq[1]);
292 		i915_request_put(rq[0]);
293 
294 err_ce:
295 		intel_engine_flush_submission(engine);
296 		igt_spinner_end(&spin);
297 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
298 			if (IS_ERR_OR_NULL(ce[n]))
299 				break;
300 
301 			intel_context_unpin(ce[n]);
302 			intel_context_put(ce[n]);
303 		}
304 
305 		st_engine_heartbeat_enable(engine);
306 		if (igt_live_test_end(&t))
307 			err = -EIO;
308 		if (err)
309 			break;
310 	}
311 
312 	igt_spinner_fini(&spin);
313 	return err;
314 }
315 
316 static int live_unlite_switch(void *arg)
317 {
318 	return live_unlite_restore(arg, 0);
319 }
320 
321 static int live_unlite_preempt(void *arg)
322 {
323 	return live_unlite_restore(arg, I915_PRIORITY_MAX);
324 }
325 
326 static int live_unlite_ring(void *arg)
327 {
328 	struct intel_gt *gt = arg;
329 	struct intel_engine_cs *engine;
330 	struct igt_spinner spin;
331 	enum intel_engine_id id;
332 	int err = 0;
333 
334 	/*
335 	 * Setup a preemption event that will cause almost the entire ring
336 	 * to be unwound, potentially fooling our intel_ring_direction()
337 	 * into emitting a forward lite-restore instead of the rollback.
338 	 */
339 
340 	if (igt_spinner_init(&spin, gt))
341 		return -ENOMEM;
342 
343 	for_each_engine(engine, gt, id) {
344 		struct intel_context *ce[2] = {};
345 		struct i915_request *rq;
346 		struct igt_live_test t;
347 		int n;
348 
349 		if (!intel_engine_has_preemption(engine))
350 			continue;
351 
352 		if (!intel_engine_can_store_dword(engine))
353 			continue;
354 
355 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
356 			err = -EIO;
357 			break;
358 		}
359 		st_engine_heartbeat_disable(engine);
360 
361 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
362 			struct intel_context *tmp;
363 
364 			tmp = intel_context_create(engine);
365 			if (IS_ERR(tmp)) {
366 				err = PTR_ERR(tmp);
367 				goto err_ce;
368 			}
369 
370 			err = intel_context_pin(tmp);
371 			if (err) {
372 				intel_context_put(tmp);
373 				goto err_ce;
374 			}
375 
376 			memset32(tmp->ring->vaddr,
377 				 0xdeadbeef, /* trigger a hang if executed */
378 				 tmp->ring->vma->size / sizeof(u32));
379 
380 			ce[n] = tmp;
381 		}
382 
383 		/* Create max prio spinner, followed by N low prio nops */
384 		rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
385 		if (IS_ERR(rq)) {
386 			err = PTR_ERR(rq);
387 			goto err_ce;
388 		}
389 
390 		i915_request_get(rq);
391 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
392 		i915_request_add(rq);
393 
394 		if (!igt_wait_for_spinner(&spin, rq)) {
395 			intel_gt_set_wedged(gt);
396 			i915_request_put(rq);
397 			err = -ETIME;
398 			goto err_ce;
399 		}
400 
401 		/* Fill the ring, until we will cause a wrap */
402 		n = 0;
403 		while (intel_ring_direction(ce[0]->ring,
404 					    rq->wa_tail,
405 					    ce[0]->ring->tail) <= 0) {
406 			struct i915_request *tmp;
407 
408 			tmp = intel_context_create_request(ce[0]);
409 			if (IS_ERR(tmp)) {
410 				err = PTR_ERR(tmp);
411 				i915_request_put(rq);
412 				goto err_ce;
413 			}
414 
415 			i915_request_add(tmp);
416 			intel_engine_flush_submission(engine);
417 			n++;
418 		}
419 		intel_engine_flush_submission(engine);
420 		pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
421 			 engine->name, n,
422 			 ce[0]->ring->size,
423 			 ce[0]->ring->tail,
424 			 ce[0]->ring->emit,
425 			 rq->tail);
426 		GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
427 						rq->tail,
428 						ce[0]->ring->tail) <= 0);
429 		i915_request_put(rq);
430 
431 		/* Create a second ring to preempt the first ring after rq[0] */
432 		rq = intel_context_create_request(ce[1]);
433 		if (IS_ERR(rq)) {
434 			err = PTR_ERR(rq);
435 			goto err_ce;
436 		}
437 
438 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
439 		i915_request_get(rq);
440 		i915_request_add(rq);
441 
442 		err = wait_for_submit(engine, rq, HZ / 2);
443 		i915_request_put(rq);
444 		if (err) {
445 			pr_err("%s: preemption request was not submitted\n",
446 			       engine->name);
447 			err = -ETIME;
448 		}
449 
450 		pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
451 			 engine->name,
452 			 ce[0]->ring->tail, ce[0]->ring->emit,
453 			 ce[1]->ring->tail, ce[1]->ring->emit);
454 
455 err_ce:
456 		intel_engine_flush_submission(engine);
457 		igt_spinner_end(&spin);
458 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
459 			if (IS_ERR_OR_NULL(ce[n]))
460 				break;
461 
462 			intel_context_unpin(ce[n]);
463 			intel_context_put(ce[n]);
464 		}
465 		st_engine_heartbeat_enable(engine);
466 		if (igt_live_test_end(&t))
467 			err = -EIO;
468 		if (err)
469 			break;
470 	}
471 
472 	igt_spinner_fini(&spin);
473 	return err;
474 }
475 
476 static int live_pin_rewind(void *arg)
477 {
478 	struct intel_gt *gt = arg;
479 	struct intel_engine_cs *engine;
480 	enum intel_engine_id id;
481 	int err = 0;
482 
483 	/*
484 	 * We have to be careful not to trust intel_ring too much, for example
485 	 * ring->head is updated upon retire which is out of sync with pinning
486 	 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
487 	 * or else we risk writing an older, stale value.
488 	 *
489 	 * To simulate this, let's apply a bit of deliberate sabotague.
490 	 */
491 
492 	for_each_engine(engine, gt, id) {
493 		struct intel_context *ce;
494 		struct i915_request *rq;
495 		struct intel_ring *ring;
496 		struct igt_live_test t;
497 
498 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
499 			err = -EIO;
500 			break;
501 		}
502 
503 		ce = intel_context_create(engine);
504 		if (IS_ERR(ce)) {
505 			err = PTR_ERR(ce);
506 			break;
507 		}
508 
509 		err = intel_context_pin(ce);
510 		if (err) {
511 			intel_context_put(ce);
512 			break;
513 		}
514 
515 		/* Keep the context awake while we play games */
516 		err = i915_active_acquire(&ce->active);
517 		if (err) {
518 			intel_context_unpin(ce);
519 			intel_context_put(ce);
520 			break;
521 		}
522 		ring = ce->ring;
523 
524 		/* Poison the ring, and offset the next request from HEAD */
525 		memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
526 		ring->emit = ring->size / 2;
527 		ring->tail = ring->emit;
528 		GEM_BUG_ON(ring->head);
529 
530 		intel_context_unpin(ce);
531 
532 		/* Submit a simple nop request */
533 		GEM_BUG_ON(intel_context_is_pinned(ce));
534 		rq = intel_context_create_request(ce);
535 		i915_active_release(&ce->active); /* e.g. async retire */
536 		intel_context_put(ce);
537 		if (IS_ERR(rq)) {
538 			err = PTR_ERR(rq);
539 			break;
540 		}
541 		GEM_BUG_ON(!rq->head);
542 		i915_request_add(rq);
543 
544 		/* Expect not to hang! */
545 		if (igt_live_test_end(&t)) {
546 			err = -EIO;
547 			break;
548 		}
549 	}
550 
551 	return err;
552 }
553 
554 static int engine_lock_reset_tasklet(struct intel_engine_cs *engine)
555 {
556 	tasklet_disable(&engine->execlists.tasklet);
557 	local_bh_disable();
558 
559 	if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
560 			     &engine->gt->reset.flags)) {
561 		local_bh_enable();
562 		tasklet_enable(&engine->execlists.tasklet);
563 
564 		intel_gt_set_wedged(engine->gt);
565 		return -EBUSY;
566 	}
567 
568 	return 0;
569 }
570 
571 static void engine_unlock_reset_tasklet(struct intel_engine_cs *engine)
572 {
573 	clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
574 			      &engine->gt->reset.flags);
575 
576 	local_bh_enable();
577 	tasklet_enable(&engine->execlists.tasklet);
578 }
579 
580 static int live_hold_reset(void *arg)
581 {
582 	struct intel_gt *gt = arg;
583 	struct intel_engine_cs *engine;
584 	enum intel_engine_id id;
585 	struct igt_spinner spin;
586 	int err = 0;
587 
588 	/*
589 	 * In order to support offline error capture for fast preempt reset,
590 	 * we need to decouple the guilty request and ensure that it and its
591 	 * descendents are not executed while the capture is in progress.
592 	 */
593 
594 	if (!intel_has_reset_engine(gt))
595 		return 0;
596 
597 	if (igt_spinner_init(&spin, gt))
598 		return -ENOMEM;
599 
600 	for_each_engine(engine, gt, id) {
601 		struct intel_context *ce;
602 		struct i915_request *rq;
603 
604 		ce = intel_context_create(engine);
605 		if (IS_ERR(ce)) {
606 			err = PTR_ERR(ce);
607 			break;
608 		}
609 
610 		st_engine_heartbeat_disable(engine);
611 
612 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
613 		if (IS_ERR(rq)) {
614 			err = PTR_ERR(rq);
615 			goto out;
616 		}
617 		i915_request_add(rq);
618 
619 		if (!igt_wait_for_spinner(&spin, rq)) {
620 			intel_gt_set_wedged(gt);
621 			err = -ETIME;
622 			goto out;
623 		}
624 
625 		/* We have our request executing, now remove it and reset */
626 
627 		err = engine_lock_reset_tasklet(engine);
628 		if (err)
629 			goto out;
630 
631 		engine->execlists.tasklet.callback(&engine->execlists.tasklet);
632 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
633 
634 		i915_request_get(rq);
635 		execlists_hold(engine, rq);
636 		GEM_BUG_ON(!i915_request_on_hold(rq));
637 
638 		__intel_engine_reset_bh(engine, NULL);
639 		GEM_BUG_ON(rq->fence.error != -EIO);
640 
641 		engine_unlock_reset_tasklet(engine);
642 
643 		/* Check that we do not resubmit the held request */
644 		if (!i915_request_wait(rq, 0, HZ / 5)) {
645 			pr_err("%s: on hold request completed!\n",
646 			       engine->name);
647 			i915_request_put(rq);
648 			err = -EIO;
649 			goto out;
650 		}
651 		GEM_BUG_ON(!i915_request_on_hold(rq));
652 
653 		/* But is resubmitted on release */
654 		execlists_unhold(engine, rq);
655 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
656 			pr_err("%s: held request did not complete!\n",
657 			       engine->name);
658 			intel_gt_set_wedged(gt);
659 			err = -ETIME;
660 		}
661 		i915_request_put(rq);
662 
663 out:
664 		st_engine_heartbeat_enable(engine);
665 		intel_context_put(ce);
666 		if (err)
667 			break;
668 	}
669 
670 	igt_spinner_fini(&spin);
671 	return err;
672 }
673 
674 static const char *error_repr(int err)
675 {
676 	return err ? "bad" : "good";
677 }
678 
679 static int live_error_interrupt(void *arg)
680 {
681 	static const struct error_phase {
682 		enum { GOOD = 0, BAD = -EIO } error[2];
683 	} phases[] = {
684 		{ { BAD,  GOOD } },
685 		{ { BAD,  BAD  } },
686 		{ { BAD,  GOOD } },
687 		{ { GOOD, GOOD } }, /* sentinel */
688 	};
689 	struct intel_gt *gt = arg;
690 	struct intel_engine_cs *engine;
691 	enum intel_engine_id id;
692 
693 	/*
694 	 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
695 	 * of invalid commands in user batches that will cause a GPU hang.
696 	 * This is a faster mechanism than using hangcheck/heartbeats, but
697 	 * only detects problems the HW knows about -- it will not warn when
698 	 * we kill the HW!
699 	 *
700 	 * To verify our detection and reset, we throw some invalid commands
701 	 * at the HW and wait for the interrupt.
702 	 */
703 
704 	if (!intel_has_reset_engine(gt))
705 		return 0;
706 
707 	for_each_engine(engine, gt, id) {
708 		const struct error_phase *p;
709 		int err = 0;
710 
711 		st_engine_heartbeat_disable(engine);
712 
713 		for (p = phases; p->error[0] != GOOD; p++) {
714 			struct i915_request *client[ARRAY_SIZE(phases->error)];
715 			u32 *cs;
716 			int i;
717 
718 			memset(client, 0, sizeof(*client));
719 			for (i = 0; i < ARRAY_SIZE(client); i++) {
720 				struct intel_context *ce;
721 				struct i915_request *rq;
722 
723 				ce = intel_context_create(engine);
724 				if (IS_ERR(ce)) {
725 					err = PTR_ERR(ce);
726 					goto out;
727 				}
728 
729 				rq = intel_context_create_request(ce);
730 				intel_context_put(ce);
731 				if (IS_ERR(rq)) {
732 					err = PTR_ERR(rq);
733 					goto out;
734 				}
735 
736 				if (rq->engine->emit_init_breadcrumb) {
737 					err = rq->engine->emit_init_breadcrumb(rq);
738 					if (err) {
739 						i915_request_add(rq);
740 						goto out;
741 					}
742 				}
743 
744 				cs = intel_ring_begin(rq, 2);
745 				if (IS_ERR(cs)) {
746 					i915_request_add(rq);
747 					err = PTR_ERR(cs);
748 					goto out;
749 				}
750 
751 				if (p->error[i]) {
752 					*cs++ = 0xdeadbeef;
753 					*cs++ = 0xdeadbeef;
754 				} else {
755 					*cs++ = MI_NOOP;
756 					*cs++ = MI_NOOP;
757 				}
758 
759 				client[i] = i915_request_get(rq);
760 				i915_request_add(rq);
761 			}
762 
763 			err = wait_for_submit(engine, client[0], HZ / 2);
764 			if (err) {
765 				pr_err("%s: first request did not start within time!\n",
766 				       engine->name);
767 				err = -ETIME;
768 				goto out;
769 			}
770 
771 			for (i = 0; i < ARRAY_SIZE(client); i++) {
772 				if (i915_request_wait(client[i], 0, HZ / 5) < 0)
773 					pr_debug("%s: %s request incomplete!\n",
774 						 engine->name,
775 						 error_repr(p->error[i]));
776 
777 				if (!i915_request_started(client[i])) {
778 					pr_err("%s: %s request not started!\n",
779 					       engine->name,
780 					       error_repr(p->error[i]));
781 					err = -ETIME;
782 					goto out;
783 				}
784 
785 				/* Kick the tasklet to process the error */
786 				intel_engine_flush_submission(engine);
787 				if (client[i]->fence.error != p->error[i]) {
788 					pr_err("%s: %s request (%s) with wrong error code: %d\n",
789 					       engine->name,
790 					       error_repr(p->error[i]),
791 					       i915_request_completed(client[i]) ? "completed" : "running",
792 					       client[i]->fence.error);
793 					err = -EINVAL;
794 					goto out;
795 				}
796 			}
797 
798 out:
799 			for (i = 0; i < ARRAY_SIZE(client); i++)
800 				if (client[i])
801 					i915_request_put(client[i]);
802 			if (err) {
803 				pr_err("%s: failed at phase[%zd] { %d, %d }\n",
804 				       engine->name, p - phases,
805 				       p->error[0], p->error[1]);
806 				break;
807 			}
808 		}
809 
810 		st_engine_heartbeat_enable(engine);
811 		if (err) {
812 			intel_gt_set_wedged(gt);
813 			return err;
814 		}
815 	}
816 
817 	return 0;
818 }
819 
820 static int
821 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
822 {
823 	u32 *cs;
824 
825 	cs = intel_ring_begin(rq, 10);
826 	if (IS_ERR(cs))
827 		return PTR_ERR(cs);
828 
829 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
830 
831 	*cs++ = MI_SEMAPHORE_WAIT |
832 		MI_SEMAPHORE_GLOBAL_GTT |
833 		MI_SEMAPHORE_POLL |
834 		MI_SEMAPHORE_SAD_NEQ_SDD;
835 	*cs++ = 0;
836 	*cs++ = i915_ggtt_offset(vma) + 4 * idx;
837 	*cs++ = 0;
838 
839 	if (idx > 0) {
840 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
841 		*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
842 		*cs++ = 0;
843 		*cs++ = 1;
844 	} else {
845 		*cs++ = MI_NOOP;
846 		*cs++ = MI_NOOP;
847 		*cs++ = MI_NOOP;
848 		*cs++ = MI_NOOP;
849 	}
850 
851 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
852 
853 	intel_ring_advance(rq, cs);
854 	return 0;
855 }
856 
857 static struct i915_request *
858 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
859 {
860 	struct intel_context *ce;
861 	struct i915_request *rq;
862 	int err;
863 
864 	ce = intel_context_create(engine);
865 	if (IS_ERR(ce))
866 		return ERR_CAST(ce);
867 
868 	rq = intel_context_create_request(ce);
869 	if (IS_ERR(rq))
870 		goto out_ce;
871 
872 	err = 0;
873 	if (rq->engine->emit_init_breadcrumb)
874 		err = rq->engine->emit_init_breadcrumb(rq);
875 	if (err == 0)
876 		err = emit_semaphore_chain(rq, vma, idx);
877 	if (err == 0)
878 		i915_request_get(rq);
879 	i915_request_add(rq);
880 	if (err)
881 		rq = ERR_PTR(err);
882 
883 out_ce:
884 	intel_context_put(ce);
885 	return rq;
886 }
887 
888 static int
889 release_queue(struct intel_engine_cs *engine,
890 	      struct i915_vma *vma,
891 	      int idx, int prio)
892 {
893 	struct i915_sched_attr attr = {
894 		.priority = prio,
895 	};
896 	struct i915_request *rq;
897 	u32 *cs;
898 
899 	rq = intel_engine_create_kernel_request(engine);
900 	if (IS_ERR(rq))
901 		return PTR_ERR(rq);
902 
903 	cs = intel_ring_begin(rq, 4);
904 	if (IS_ERR(cs)) {
905 		i915_request_add(rq);
906 		return PTR_ERR(cs);
907 	}
908 
909 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
910 	*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
911 	*cs++ = 0;
912 	*cs++ = 1;
913 
914 	intel_ring_advance(rq, cs);
915 
916 	i915_request_get(rq);
917 	i915_request_add(rq);
918 
919 	local_bh_disable();
920 	engine->schedule(rq, &attr);
921 	local_bh_enable(); /* kick tasklet */
922 
923 	i915_request_put(rq);
924 
925 	return 0;
926 }
927 
928 static int
929 slice_semaphore_queue(struct intel_engine_cs *outer,
930 		      struct i915_vma *vma,
931 		      int count)
932 {
933 	struct intel_engine_cs *engine;
934 	struct i915_request *head;
935 	enum intel_engine_id id;
936 	int err, i, n = 0;
937 
938 	head = semaphore_queue(outer, vma, n++);
939 	if (IS_ERR(head))
940 		return PTR_ERR(head);
941 
942 	for_each_engine(engine, outer->gt, id) {
943 		if (!intel_engine_has_preemption(engine))
944 			continue;
945 
946 		for (i = 0; i < count; i++) {
947 			struct i915_request *rq;
948 
949 			rq = semaphore_queue(engine, vma, n++);
950 			if (IS_ERR(rq)) {
951 				err = PTR_ERR(rq);
952 				goto out;
953 			}
954 
955 			i915_request_put(rq);
956 		}
957 	}
958 
959 	err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
960 	if (err)
961 		goto out;
962 
963 	if (i915_request_wait(head, 0,
964 			      2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
965 		pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
966 		       outer->name, count, n);
967 		GEM_TRACE_DUMP();
968 		intel_gt_set_wedged(outer->gt);
969 		err = -EIO;
970 	}
971 
972 out:
973 	i915_request_put(head);
974 	return err;
975 }
976 
977 static int live_timeslice_preempt(void *arg)
978 {
979 	struct intel_gt *gt = arg;
980 	struct drm_i915_gem_object *obj;
981 	struct intel_engine_cs *engine;
982 	enum intel_engine_id id;
983 	struct i915_vma *vma;
984 	void *vaddr;
985 	int err = 0;
986 
987 	/*
988 	 * If a request takes too long, we would like to give other users
989 	 * a fair go on the GPU. In particular, users may create batches
990 	 * that wait upon external input, where that input may even be
991 	 * supplied by another GPU job. To avoid blocking forever, we
992 	 * need to preempt the current task and replace it with another
993 	 * ready task.
994 	 */
995 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
996 		return 0;
997 
998 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
999 	if (IS_ERR(obj))
1000 		return PTR_ERR(obj);
1001 
1002 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1003 	if (IS_ERR(vma)) {
1004 		err = PTR_ERR(vma);
1005 		goto err_obj;
1006 	}
1007 
1008 	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1009 	if (IS_ERR(vaddr)) {
1010 		err = PTR_ERR(vaddr);
1011 		goto err_obj;
1012 	}
1013 
1014 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1015 	if (err)
1016 		goto err_map;
1017 
1018 	err = i915_vma_sync(vma);
1019 	if (err)
1020 		goto err_pin;
1021 
1022 	for_each_engine(engine, gt, id) {
1023 		if (!intel_engine_has_preemption(engine))
1024 			continue;
1025 
1026 		memset(vaddr, 0, PAGE_SIZE);
1027 
1028 		st_engine_heartbeat_disable(engine);
1029 		err = slice_semaphore_queue(engine, vma, 5);
1030 		st_engine_heartbeat_enable(engine);
1031 		if (err)
1032 			goto err_pin;
1033 
1034 		if (igt_flush_test(gt->i915)) {
1035 			err = -EIO;
1036 			goto err_pin;
1037 		}
1038 	}
1039 
1040 err_pin:
1041 	i915_vma_unpin(vma);
1042 err_map:
1043 	i915_gem_object_unpin_map(obj);
1044 err_obj:
1045 	i915_gem_object_put(obj);
1046 	return err;
1047 }
1048 
1049 static struct i915_request *
1050 create_rewinder(struct intel_context *ce,
1051 		struct i915_request *wait,
1052 		void *slot, int idx)
1053 {
1054 	const u32 offset =
1055 		i915_ggtt_offset(ce->engine->status_page.vma) +
1056 		offset_in_page(slot);
1057 	struct i915_request *rq;
1058 	u32 *cs;
1059 	int err;
1060 
1061 	rq = intel_context_create_request(ce);
1062 	if (IS_ERR(rq))
1063 		return rq;
1064 
1065 	if (wait) {
1066 		err = i915_request_await_dma_fence(rq, &wait->fence);
1067 		if (err)
1068 			goto err;
1069 	}
1070 
1071 	cs = intel_ring_begin(rq, 14);
1072 	if (IS_ERR(cs)) {
1073 		err = PTR_ERR(cs);
1074 		goto err;
1075 	}
1076 
1077 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1078 	*cs++ = MI_NOOP;
1079 
1080 	*cs++ = MI_SEMAPHORE_WAIT |
1081 		MI_SEMAPHORE_GLOBAL_GTT |
1082 		MI_SEMAPHORE_POLL |
1083 		MI_SEMAPHORE_SAD_GTE_SDD;
1084 	*cs++ = idx;
1085 	*cs++ = offset;
1086 	*cs++ = 0;
1087 
1088 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1089 	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1090 	*cs++ = offset + idx * sizeof(u32);
1091 	*cs++ = 0;
1092 
1093 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1094 	*cs++ = offset;
1095 	*cs++ = 0;
1096 	*cs++ = idx + 1;
1097 
1098 	intel_ring_advance(rq, cs);
1099 
1100 	err = 0;
1101 err:
1102 	i915_request_get(rq);
1103 	i915_request_add(rq);
1104 	if (err) {
1105 		i915_request_put(rq);
1106 		return ERR_PTR(err);
1107 	}
1108 
1109 	return rq;
1110 }
1111 
1112 static int live_timeslice_rewind(void *arg)
1113 {
1114 	struct intel_gt *gt = arg;
1115 	struct intel_engine_cs *engine;
1116 	enum intel_engine_id id;
1117 
1118 	/*
1119 	 * The usual presumption on timeslice expiration is that we replace
1120 	 * the active context with another. However, given a chain of
1121 	 * dependencies we may end up with replacing the context with itself,
1122 	 * but only a few of those requests, forcing us to rewind the
1123 	 * RING_TAIL of the original request.
1124 	 */
1125 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1126 		return 0;
1127 
1128 	for_each_engine(engine, gt, id) {
1129 		enum { A1, A2, B1 };
1130 		enum { X = 1, Z, Y };
1131 		struct i915_request *rq[3] = {};
1132 		struct intel_context *ce;
1133 		unsigned long timeslice;
1134 		int i, err = 0;
1135 		u32 *slot;
1136 
1137 		if (!intel_engine_has_timeslices(engine))
1138 			continue;
1139 
1140 		/*
1141 		 * A:rq1 -- semaphore wait, timestamp X
1142 		 * A:rq2 -- write timestamp Y
1143 		 *
1144 		 * B:rq1 [await A:rq1] -- write timestamp Z
1145 		 *
1146 		 * Force timeslice, release semaphore.
1147 		 *
1148 		 * Expect execution/evaluation order XZY
1149 		 */
1150 
1151 		st_engine_heartbeat_disable(engine);
1152 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1153 
1154 		slot = memset32(engine->status_page.addr + 1000, 0, 4);
1155 
1156 		ce = intel_context_create(engine);
1157 		if (IS_ERR(ce)) {
1158 			err = PTR_ERR(ce);
1159 			goto err;
1160 		}
1161 
1162 		rq[A1] = create_rewinder(ce, NULL, slot, X);
1163 		if (IS_ERR(rq[A1])) {
1164 			intel_context_put(ce);
1165 			goto err;
1166 		}
1167 
1168 		rq[A2] = create_rewinder(ce, NULL, slot, Y);
1169 		intel_context_put(ce);
1170 		if (IS_ERR(rq[A2]))
1171 			goto err;
1172 
1173 		err = wait_for_submit(engine, rq[A2], HZ / 2);
1174 		if (err) {
1175 			pr_err("%s: failed to submit first context\n",
1176 			       engine->name);
1177 			goto err;
1178 		}
1179 
1180 		ce = intel_context_create(engine);
1181 		if (IS_ERR(ce)) {
1182 			err = PTR_ERR(ce);
1183 			goto err;
1184 		}
1185 
1186 		rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1187 		intel_context_put(ce);
1188 		if (IS_ERR(rq[2]))
1189 			goto err;
1190 
1191 		err = wait_for_submit(engine, rq[B1], HZ / 2);
1192 		if (err) {
1193 			pr_err("%s: failed to submit second context\n",
1194 			       engine->name);
1195 			goto err;
1196 		}
1197 
1198 		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1199 		ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1200 		while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
1201 			/* Wait for the timeslice to kick in */
1202 			del_timer(&engine->execlists.timer);
1203 			tasklet_hi_schedule(&engine->execlists.tasklet);
1204 			intel_engine_flush_submission(engine);
1205 		}
1206 		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1207 		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1208 		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1209 		GEM_BUG_ON(i915_request_is_active(rq[A2]));
1210 
1211 		/* Release the hounds! */
1212 		slot[0] = 1;
1213 		wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1214 
1215 		for (i = 1; i <= 3; i++) {
1216 			unsigned long timeout = jiffies + HZ / 2;
1217 
1218 			while (!READ_ONCE(slot[i]) &&
1219 			       time_before(jiffies, timeout))
1220 				;
1221 
1222 			if (!time_before(jiffies, timeout)) {
1223 				pr_err("%s: rq[%d] timed out\n",
1224 				       engine->name, i - 1);
1225 				err = -ETIME;
1226 				goto err;
1227 			}
1228 
1229 			pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1230 		}
1231 
1232 		/* XZY: XZ < XY */
1233 		if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1234 			pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1235 			       engine->name,
1236 			       slot[Z] - slot[X],
1237 			       slot[Y] - slot[X]);
1238 			err = -EINVAL;
1239 		}
1240 
1241 err:
1242 		memset32(&slot[0], -1, 4);
1243 		wmb();
1244 
1245 		engine->props.timeslice_duration_ms = timeslice;
1246 		st_engine_heartbeat_enable(engine);
1247 		for (i = 0; i < 3; i++)
1248 			i915_request_put(rq[i]);
1249 		if (igt_flush_test(gt->i915))
1250 			err = -EIO;
1251 		if (err)
1252 			return err;
1253 	}
1254 
1255 	return 0;
1256 }
1257 
1258 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1259 {
1260 	struct i915_request *rq;
1261 
1262 	rq = intel_engine_create_kernel_request(engine);
1263 	if (IS_ERR(rq))
1264 		return rq;
1265 
1266 	i915_request_get(rq);
1267 	i915_request_add(rq);
1268 
1269 	return rq;
1270 }
1271 
1272 static long slice_timeout(struct intel_engine_cs *engine)
1273 {
1274 	long timeout;
1275 
1276 	/* Enough time for a timeslice to kick in, and kick out */
1277 	timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1278 
1279 	/* Enough time for the nop request to complete */
1280 	timeout += HZ / 5;
1281 
1282 	return timeout + 1;
1283 }
1284 
1285 static int live_timeslice_queue(void *arg)
1286 {
1287 	struct intel_gt *gt = arg;
1288 	struct drm_i915_gem_object *obj;
1289 	struct intel_engine_cs *engine;
1290 	enum intel_engine_id id;
1291 	struct i915_vma *vma;
1292 	void *vaddr;
1293 	int err = 0;
1294 
1295 	/*
1296 	 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1297 	 * timeslicing between them disabled, we *do* enable timeslicing
1298 	 * if the queue demands it. (Normally, we do not submit if
1299 	 * ELSP[1] is already occupied, so must rely on timeslicing to
1300 	 * eject ELSP[0] in favour of the queue.)
1301 	 */
1302 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1303 		return 0;
1304 
1305 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1306 	if (IS_ERR(obj))
1307 		return PTR_ERR(obj);
1308 
1309 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1310 	if (IS_ERR(vma)) {
1311 		err = PTR_ERR(vma);
1312 		goto err_obj;
1313 	}
1314 
1315 	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1316 	if (IS_ERR(vaddr)) {
1317 		err = PTR_ERR(vaddr);
1318 		goto err_obj;
1319 	}
1320 
1321 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1322 	if (err)
1323 		goto err_map;
1324 
1325 	err = i915_vma_sync(vma);
1326 	if (err)
1327 		goto err_pin;
1328 
1329 	for_each_engine(engine, gt, id) {
1330 		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
1331 		struct i915_request *rq, *nop;
1332 
1333 		if (!intel_engine_has_preemption(engine))
1334 			continue;
1335 
1336 		st_engine_heartbeat_disable(engine);
1337 		memset(vaddr, 0, PAGE_SIZE);
1338 
1339 		/* ELSP[0]: semaphore wait */
1340 		rq = semaphore_queue(engine, vma, 0);
1341 		if (IS_ERR(rq)) {
1342 			err = PTR_ERR(rq);
1343 			goto err_heartbeat;
1344 		}
1345 		engine->schedule(rq, &attr);
1346 		err = wait_for_submit(engine, rq, HZ / 2);
1347 		if (err) {
1348 			pr_err("%s: Timed out trying to submit semaphores\n",
1349 			       engine->name);
1350 			goto err_rq;
1351 		}
1352 
1353 		/* ELSP[1]: nop request */
1354 		nop = nop_request(engine);
1355 		if (IS_ERR(nop)) {
1356 			err = PTR_ERR(nop);
1357 			goto err_rq;
1358 		}
1359 		err = wait_for_submit(engine, nop, HZ / 2);
1360 		i915_request_put(nop);
1361 		if (err) {
1362 			pr_err("%s: Timed out trying to submit nop\n",
1363 			       engine->name);
1364 			goto err_rq;
1365 		}
1366 
1367 		GEM_BUG_ON(i915_request_completed(rq));
1368 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1369 
1370 		/* Queue: semaphore signal, matching priority as semaphore */
1371 		err = release_queue(engine, vma, 1, effective_prio(rq));
1372 		if (err)
1373 			goto err_rq;
1374 
1375 		/* Wait until we ack the release_queue and start timeslicing */
1376 		do {
1377 			cond_resched();
1378 			intel_engine_flush_submission(engine);
1379 		} while (READ_ONCE(engine->execlists.pending[0]));
1380 
1381 		/* Timeslice every jiffy, so within 2 we should signal */
1382 		if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1383 			struct drm_printer p =
1384 				drm_info_printer(gt->i915->drm.dev);
1385 
1386 			pr_err("%s: Failed to timeslice into queue\n",
1387 			       engine->name);
1388 			intel_engine_dump(engine, &p,
1389 					  "%s\n", engine->name);
1390 
1391 			memset(vaddr, 0xff, PAGE_SIZE);
1392 			err = -EIO;
1393 		}
1394 err_rq:
1395 		i915_request_put(rq);
1396 err_heartbeat:
1397 		st_engine_heartbeat_enable(engine);
1398 		if (err)
1399 			break;
1400 	}
1401 
1402 err_pin:
1403 	i915_vma_unpin(vma);
1404 err_map:
1405 	i915_gem_object_unpin_map(obj);
1406 err_obj:
1407 	i915_gem_object_put(obj);
1408 	return err;
1409 }
1410 
1411 static int live_timeslice_nopreempt(void *arg)
1412 {
1413 	struct intel_gt *gt = arg;
1414 	struct intel_engine_cs *engine;
1415 	enum intel_engine_id id;
1416 	struct igt_spinner spin;
1417 	int err = 0;
1418 
1419 	/*
1420 	 * We should not timeslice into a request that is marked with
1421 	 * I915_REQUEST_NOPREEMPT.
1422 	 */
1423 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1424 		return 0;
1425 
1426 	if (igt_spinner_init(&spin, gt))
1427 		return -ENOMEM;
1428 
1429 	for_each_engine(engine, gt, id) {
1430 		struct intel_context *ce;
1431 		struct i915_request *rq;
1432 		unsigned long timeslice;
1433 
1434 		if (!intel_engine_has_preemption(engine))
1435 			continue;
1436 
1437 		ce = intel_context_create(engine);
1438 		if (IS_ERR(ce)) {
1439 			err = PTR_ERR(ce);
1440 			break;
1441 		}
1442 
1443 		st_engine_heartbeat_disable(engine);
1444 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1445 
1446 		/* Create an unpreemptible spinner */
1447 
1448 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1449 		intel_context_put(ce);
1450 		if (IS_ERR(rq)) {
1451 			err = PTR_ERR(rq);
1452 			goto out_heartbeat;
1453 		}
1454 
1455 		i915_request_get(rq);
1456 		i915_request_add(rq);
1457 
1458 		if (!igt_wait_for_spinner(&spin, rq)) {
1459 			i915_request_put(rq);
1460 			err = -ETIME;
1461 			goto out_spin;
1462 		}
1463 
1464 		set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1465 		i915_request_put(rq);
1466 
1467 		/* Followed by a maximum priority barrier (heartbeat) */
1468 
1469 		ce = intel_context_create(engine);
1470 		if (IS_ERR(ce)) {
1471 			err = PTR_ERR(ce);
1472 			goto out_spin;
1473 		}
1474 
1475 		rq = intel_context_create_request(ce);
1476 		intel_context_put(ce);
1477 		if (IS_ERR(rq)) {
1478 			err = PTR_ERR(rq);
1479 			goto out_spin;
1480 		}
1481 
1482 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1483 		i915_request_get(rq);
1484 		i915_request_add(rq);
1485 
1486 		/*
1487 		 * Wait until the barrier is in ELSP, and we know timeslicing
1488 		 * will have been activated.
1489 		 */
1490 		if (wait_for_submit(engine, rq, HZ / 2)) {
1491 			i915_request_put(rq);
1492 			err = -ETIME;
1493 			goto out_spin;
1494 		}
1495 
1496 		/*
1497 		 * Since the ELSP[0] request is unpreemptible, it should not
1498 		 * allow the maximum priority barrier through. Wait long
1499 		 * enough to see if it is timesliced in by mistake.
1500 		 */
1501 		if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1502 			pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1503 			       engine->name);
1504 			err = -EINVAL;
1505 		}
1506 		i915_request_put(rq);
1507 
1508 out_spin:
1509 		igt_spinner_end(&spin);
1510 out_heartbeat:
1511 		xchg(&engine->props.timeslice_duration_ms, timeslice);
1512 		st_engine_heartbeat_enable(engine);
1513 		if (err)
1514 			break;
1515 
1516 		if (igt_flush_test(gt->i915)) {
1517 			err = -EIO;
1518 			break;
1519 		}
1520 	}
1521 
1522 	igt_spinner_fini(&spin);
1523 	return err;
1524 }
1525 
1526 static int live_busywait_preempt(void *arg)
1527 {
1528 	struct intel_gt *gt = arg;
1529 	struct i915_gem_context *ctx_hi, *ctx_lo;
1530 	struct intel_engine_cs *engine;
1531 	struct drm_i915_gem_object *obj;
1532 	struct i915_vma *vma;
1533 	enum intel_engine_id id;
1534 	int err = -ENOMEM;
1535 	u32 *map;
1536 
1537 	/*
1538 	 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1539 	 * preempt the busywaits used to synchronise between rings.
1540 	 */
1541 
1542 	ctx_hi = kernel_context(gt->i915);
1543 	if (!ctx_hi)
1544 		return -ENOMEM;
1545 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1546 
1547 	ctx_lo = kernel_context(gt->i915);
1548 	if (!ctx_lo)
1549 		goto err_ctx_hi;
1550 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1551 
1552 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1553 	if (IS_ERR(obj)) {
1554 		err = PTR_ERR(obj);
1555 		goto err_ctx_lo;
1556 	}
1557 
1558 	map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1559 	if (IS_ERR(map)) {
1560 		err = PTR_ERR(map);
1561 		goto err_obj;
1562 	}
1563 
1564 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1565 	if (IS_ERR(vma)) {
1566 		err = PTR_ERR(vma);
1567 		goto err_map;
1568 	}
1569 
1570 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1571 	if (err)
1572 		goto err_map;
1573 
1574 	err = i915_vma_sync(vma);
1575 	if (err)
1576 		goto err_vma;
1577 
1578 	for_each_engine(engine, gt, id) {
1579 		struct i915_request *lo, *hi;
1580 		struct igt_live_test t;
1581 		u32 *cs;
1582 
1583 		if (!intel_engine_has_preemption(engine))
1584 			continue;
1585 
1586 		if (!intel_engine_can_store_dword(engine))
1587 			continue;
1588 
1589 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1590 			err = -EIO;
1591 			goto err_vma;
1592 		}
1593 
1594 		/*
1595 		 * We create two requests. The low priority request
1596 		 * busywaits on a semaphore (inside the ringbuffer where
1597 		 * is should be preemptible) and the high priority requests
1598 		 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1599 		 * allowing the first request to complete. If preemption
1600 		 * fails, we hang instead.
1601 		 */
1602 
1603 		lo = igt_request_alloc(ctx_lo, engine);
1604 		if (IS_ERR(lo)) {
1605 			err = PTR_ERR(lo);
1606 			goto err_vma;
1607 		}
1608 
1609 		cs = intel_ring_begin(lo, 8);
1610 		if (IS_ERR(cs)) {
1611 			err = PTR_ERR(cs);
1612 			i915_request_add(lo);
1613 			goto err_vma;
1614 		}
1615 
1616 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1617 		*cs++ = i915_ggtt_offset(vma);
1618 		*cs++ = 0;
1619 		*cs++ = 1;
1620 
1621 		/* XXX Do we need a flush + invalidate here? */
1622 
1623 		*cs++ = MI_SEMAPHORE_WAIT |
1624 			MI_SEMAPHORE_GLOBAL_GTT |
1625 			MI_SEMAPHORE_POLL |
1626 			MI_SEMAPHORE_SAD_EQ_SDD;
1627 		*cs++ = 0;
1628 		*cs++ = i915_ggtt_offset(vma);
1629 		*cs++ = 0;
1630 
1631 		intel_ring_advance(lo, cs);
1632 
1633 		i915_request_get(lo);
1634 		i915_request_add(lo);
1635 
1636 		if (wait_for(READ_ONCE(*map), 10)) {
1637 			i915_request_put(lo);
1638 			err = -ETIMEDOUT;
1639 			goto err_vma;
1640 		}
1641 
1642 		/* Low priority request should be busywaiting now */
1643 		if (i915_request_wait(lo, 0, 1) != -ETIME) {
1644 			i915_request_put(lo);
1645 			pr_err("%s: Busywaiting request did not!\n",
1646 			       engine->name);
1647 			err = -EIO;
1648 			goto err_vma;
1649 		}
1650 
1651 		hi = igt_request_alloc(ctx_hi, engine);
1652 		if (IS_ERR(hi)) {
1653 			err = PTR_ERR(hi);
1654 			i915_request_put(lo);
1655 			goto err_vma;
1656 		}
1657 
1658 		cs = intel_ring_begin(hi, 4);
1659 		if (IS_ERR(cs)) {
1660 			err = PTR_ERR(cs);
1661 			i915_request_add(hi);
1662 			i915_request_put(lo);
1663 			goto err_vma;
1664 		}
1665 
1666 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1667 		*cs++ = i915_ggtt_offset(vma);
1668 		*cs++ = 0;
1669 		*cs++ = 0;
1670 
1671 		intel_ring_advance(hi, cs);
1672 		i915_request_add(hi);
1673 
1674 		if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1675 			struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1676 
1677 			pr_err("%s: Failed to preempt semaphore busywait!\n",
1678 			       engine->name);
1679 
1680 			intel_engine_dump(engine, &p, "%s\n", engine->name);
1681 			GEM_TRACE_DUMP();
1682 
1683 			i915_request_put(lo);
1684 			intel_gt_set_wedged(gt);
1685 			err = -EIO;
1686 			goto err_vma;
1687 		}
1688 		GEM_BUG_ON(READ_ONCE(*map));
1689 		i915_request_put(lo);
1690 
1691 		if (igt_live_test_end(&t)) {
1692 			err = -EIO;
1693 			goto err_vma;
1694 		}
1695 	}
1696 
1697 	err = 0;
1698 err_vma:
1699 	i915_vma_unpin(vma);
1700 err_map:
1701 	i915_gem_object_unpin_map(obj);
1702 err_obj:
1703 	i915_gem_object_put(obj);
1704 err_ctx_lo:
1705 	kernel_context_close(ctx_lo);
1706 err_ctx_hi:
1707 	kernel_context_close(ctx_hi);
1708 	return err;
1709 }
1710 
1711 static struct i915_request *
1712 spinner_create_request(struct igt_spinner *spin,
1713 		       struct i915_gem_context *ctx,
1714 		       struct intel_engine_cs *engine,
1715 		       u32 arb)
1716 {
1717 	struct intel_context *ce;
1718 	struct i915_request *rq;
1719 
1720 	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1721 	if (IS_ERR(ce))
1722 		return ERR_CAST(ce);
1723 
1724 	rq = igt_spinner_create_request(spin, ce, arb);
1725 	intel_context_put(ce);
1726 	return rq;
1727 }
1728 
1729 static int live_preempt(void *arg)
1730 {
1731 	struct intel_gt *gt = arg;
1732 	struct i915_gem_context *ctx_hi, *ctx_lo;
1733 	struct igt_spinner spin_hi, spin_lo;
1734 	struct intel_engine_cs *engine;
1735 	enum intel_engine_id id;
1736 	int err = -ENOMEM;
1737 
1738 	if (igt_spinner_init(&spin_hi, gt))
1739 		return -ENOMEM;
1740 
1741 	if (igt_spinner_init(&spin_lo, gt))
1742 		goto err_spin_hi;
1743 
1744 	ctx_hi = kernel_context(gt->i915);
1745 	if (!ctx_hi)
1746 		goto err_spin_lo;
1747 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1748 
1749 	ctx_lo = kernel_context(gt->i915);
1750 	if (!ctx_lo)
1751 		goto err_ctx_hi;
1752 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1753 
1754 	for_each_engine(engine, gt, id) {
1755 		struct igt_live_test t;
1756 		struct i915_request *rq;
1757 
1758 		if (!intel_engine_has_preemption(engine))
1759 			continue;
1760 
1761 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1762 			err = -EIO;
1763 			goto err_ctx_lo;
1764 		}
1765 
1766 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1767 					    MI_ARB_CHECK);
1768 		if (IS_ERR(rq)) {
1769 			err = PTR_ERR(rq);
1770 			goto err_ctx_lo;
1771 		}
1772 
1773 		i915_request_add(rq);
1774 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1775 			GEM_TRACE("lo spinner failed to start\n");
1776 			GEM_TRACE_DUMP();
1777 			intel_gt_set_wedged(gt);
1778 			err = -EIO;
1779 			goto err_ctx_lo;
1780 		}
1781 
1782 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1783 					    MI_ARB_CHECK);
1784 		if (IS_ERR(rq)) {
1785 			igt_spinner_end(&spin_lo);
1786 			err = PTR_ERR(rq);
1787 			goto err_ctx_lo;
1788 		}
1789 
1790 		i915_request_add(rq);
1791 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1792 			GEM_TRACE("hi spinner failed to start\n");
1793 			GEM_TRACE_DUMP();
1794 			intel_gt_set_wedged(gt);
1795 			err = -EIO;
1796 			goto err_ctx_lo;
1797 		}
1798 
1799 		igt_spinner_end(&spin_hi);
1800 		igt_spinner_end(&spin_lo);
1801 
1802 		if (igt_live_test_end(&t)) {
1803 			err = -EIO;
1804 			goto err_ctx_lo;
1805 		}
1806 	}
1807 
1808 	err = 0;
1809 err_ctx_lo:
1810 	kernel_context_close(ctx_lo);
1811 err_ctx_hi:
1812 	kernel_context_close(ctx_hi);
1813 err_spin_lo:
1814 	igt_spinner_fini(&spin_lo);
1815 err_spin_hi:
1816 	igt_spinner_fini(&spin_hi);
1817 	return err;
1818 }
1819 
1820 static int live_late_preempt(void *arg)
1821 {
1822 	struct intel_gt *gt = arg;
1823 	struct i915_gem_context *ctx_hi, *ctx_lo;
1824 	struct igt_spinner spin_hi, spin_lo;
1825 	struct intel_engine_cs *engine;
1826 	struct i915_sched_attr attr = {};
1827 	enum intel_engine_id id;
1828 	int err = -ENOMEM;
1829 
1830 	if (igt_spinner_init(&spin_hi, gt))
1831 		return -ENOMEM;
1832 
1833 	if (igt_spinner_init(&spin_lo, gt))
1834 		goto err_spin_hi;
1835 
1836 	ctx_hi = kernel_context(gt->i915);
1837 	if (!ctx_hi)
1838 		goto err_spin_lo;
1839 
1840 	ctx_lo = kernel_context(gt->i915);
1841 	if (!ctx_lo)
1842 		goto err_ctx_hi;
1843 
1844 	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1845 	ctx_lo->sched.priority = 1;
1846 
1847 	for_each_engine(engine, gt, id) {
1848 		struct igt_live_test t;
1849 		struct i915_request *rq;
1850 
1851 		if (!intel_engine_has_preemption(engine))
1852 			continue;
1853 
1854 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1855 			err = -EIO;
1856 			goto err_ctx_lo;
1857 		}
1858 
1859 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1860 					    MI_ARB_CHECK);
1861 		if (IS_ERR(rq)) {
1862 			err = PTR_ERR(rq);
1863 			goto err_ctx_lo;
1864 		}
1865 
1866 		i915_request_add(rq);
1867 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1868 			pr_err("First context failed to start\n");
1869 			goto err_wedged;
1870 		}
1871 
1872 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1873 					    MI_NOOP);
1874 		if (IS_ERR(rq)) {
1875 			igt_spinner_end(&spin_lo);
1876 			err = PTR_ERR(rq);
1877 			goto err_ctx_lo;
1878 		}
1879 
1880 		i915_request_add(rq);
1881 		if (igt_wait_for_spinner(&spin_hi, rq)) {
1882 			pr_err("Second context overtook first?\n");
1883 			goto err_wedged;
1884 		}
1885 
1886 		attr.priority = I915_PRIORITY_MAX;
1887 		engine->schedule(rq, &attr);
1888 
1889 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1890 			pr_err("High priority context failed to preempt the low priority context\n");
1891 			GEM_TRACE_DUMP();
1892 			goto err_wedged;
1893 		}
1894 
1895 		igt_spinner_end(&spin_hi);
1896 		igt_spinner_end(&spin_lo);
1897 
1898 		if (igt_live_test_end(&t)) {
1899 			err = -EIO;
1900 			goto err_ctx_lo;
1901 		}
1902 	}
1903 
1904 	err = 0;
1905 err_ctx_lo:
1906 	kernel_context_close(ctx_lo);
1907 err_ctx_hi:
1908 	kernel_context_close(ctx_hi);
1909 err_spin_lo:
1910 	igt_spinner_fini(&spin_lo);
1911 err_spin_hi:
1912 	igt_spinner_fini(&spin_hi);
1913 	return err;
1914 
1915 err_wedged:
1916 	igt_spinner_end(&spin_hi);
1917 	igt_spinner_end(&spin_lo);
1918 	intel_gt_set_wedged(gt);
1919 	err = -EIO;
1920 	goto err_ctx_lo;
1921 }
1922 
1923 struct preempt_client {
1924 	struct igt_spinner spin;
1925 	struct i915_gem_context *ctx;
1926 };
1927 
1928 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1929 {
1930 	c->ctx = kernel_context(gt->i915);
1931 	if (!c->ctx)
1932 		return -ENOMEM;
1933 
1934 	if (igt_spinner_init(&c->spin, gt))
1935 		goto err_ctx;
1936 
1937 	return 0;
1938 
1939 err_ctx:
1940 	kernel_context_close(c->ctx);
1941 	return -ENOMEM;
1942 }
1943 
1944 static void preempt_client_fini(struct preempt_client *c)
1945 {
1946 	igt_spinner_fini(&c->spin);
1947 	kernel_context_close(c->ctx);
1948 }
1949 
1950 static int live_nopreempt(void *arg)
1951 {
1952 	struct intel_gt *gt = arg;
1953 	struct intel_engine_cs *engine;
1954 	struct preempt_client a, b;
1955 	enum intel_engine_id id;
1956 	int err = -ENOMEM;
1957 
1958 	/*
1959 	 * Verify that we can disable preemption for an individual request
1960 	 * that may be being observed and not want to be interrupted.
1961 	 */
1962 
1963 	if (preempt_client_init(gt, &a))
1964 		return -ENOMEM;
1965 	if (preempt_client_init(gt, &b))
1966 		goto err_client_a;
1967 	b.ctx->sched.priority = I915_PRIORITY_MAX;
1968 
1969 	for_each_engine(engine, gt, id) {
1970 		struct i915_request *rq_a, *rq_b;
1971 
1972 		if (!intel_engine_has_preemption(engine))
1973 			continue;
1974 
1975 		engine->execlists.preempt_hang.count = 0;
1976 
1977 		rq_a = spinner_create_request(&a.spin,
1978 					      a.ctx, engine,
1979 					      MI_ARB_CHECK);
1980 		if (IS_ERR(rq_a)) {
1981 			err = PTR_ERR(rq_a);
1982 			goto err_client_b;
1983 		}
1984 
1985 		/* Low priority client, but unpreemptable! */
1986 		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1987 
1988 		i915_request_add(rq_a);
1989 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1990 			pr_err("First client failed to start\n");
1991 			goto err_wedged;
1992 		}
1993 
1994 		rq_b = spinner_create_request(&b.spin,
1995 					      b.ctx, engine,
1996 					      MI_ARB_CHECK);
1997 		if (IS_ERR(rq_b)) {
1998 			err = PTR_ERR(rq_b);
1999 			goto err_client_b;
2000 		}
2001 
2002 		i915_request_add(rq_b);
2003 
2004 		/* B is much more important than A! (But A is unpreemptable.) */
2005 		GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
2006 
2007 		/* Wait long enough for preemption and timeslicing */
2008 		if (igt_wait_for_spinner(&b.spin, rq_b)) {
2009 			pr_err("Second client started too early!\n");
2010 			goto err_wedged;
2011 		}
2012 
2013 		igt_spinner_end(&a.spin);
2014 
2015 		if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2016 			pr_err("Second client failed to start\n");
2017 			goto err_wedged;
2018 		}
2019 
2020 		igt_spinner_end(&b.spin);
2021 
2022 		if (engine->execlists.preempt_hang.count) {
2023 			pr_err("Preemption recorded x%d; should have been suppressed!\n",
2024 			       engine->execlists.preempt_hang.count);
2025 			err = -EINVAL;
2026 			goto err_wedged;
2027 		}
2028 
2029 		if (igt_flush_test(gt->i915))
2030 			goto err_wedged;
2031 	}
2032 
2033 	err = 0;
2034 err_client_b:
2035 	preempt_client_fini(&b);
2036 err_client_a:
2037 	preempt_client_fini(&a);
2038 	return err;
2039 
2040 err_wedged:
2041 	igt_spinner_end(&b.spin);
2042 	igt_spinner_end(&a.spin);
2043 	intel_gt_set_wedged(gt);
2044 	err = -EIO;
2045 	goto err_client_b;
2046 }
2047 
2048 struct live_preempt_cancel {
2049 	struct intel_engine_cs *engine;
2050 	struct preempt_client a, b;
2051 };
2052 
2053 static int __cancel_active0(struct live_preempt_cancel *arg)
2054 {
2055 	struct i915_request *rq;
2056 	struct igt_live_test t;
2057 	int err;
2058 
2059 	/* Preempt cancel of ELSP0 */
2060 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2061 	if (igt_live_test_begin(&t, arg->engine->i915,
2062 				__func__, arg->engine->name))
2063 		return -EIO;
2064 
2065 	rq = spinner_create_request(&arg->a.spin,
2066 				    arg->a.ctx, arg->engine,
2067 				    MI_ARB_CHECK);
2068 	if (IS_ERR(rq))
2069 		return PTR_ERR(rq);
2070 
2071 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2072 	i915_request_get(rq);
2073 	i915_request_add(rq);
2074 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2075 		err = -EIO;
2076 		goto out;
2077 	}
2078 
2079 	intel_context_set_banned(rq->context);
2080 	err = intel_engine_pulse(arg->engine);
2081 	if (err)
2082 		goto out;
2083 
2084 	err = wait_for_reset(arg->engine, rq, HZ / 2);
2085 	if (err) {
2086 		pr_err("Cancelled inflight0 request did not reset\n");
2087 		goto out;
2088 	}
2089 
2090 out:
2091 	i915_request_put(rq);
2092 	if (igt_live_test_end(&t))
2093 		err = -EIO;
2094 	return err;
2095 }
2096 
2097 static int __cancel_active1(struct live_preempt_cancel *arg)
2098 {
2099 	struct i915_request *rq[2] = {};
2100 	struct igt_live_test t;
2101 	int err;
2102 
2103 	/* Preempt cancel of ELSP1 */
2104 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2105 	if (igt_live_test_begin(&t, arg->engine->i915,
2106 				__func__, arg->engine->name))
2107 		return -EIO;
2108 
2109 	rq[0] = spinner_create_request(&arg->a.spin,
2110 				       arg->a.ctx, arg->engine,
2111 				       MI_NOOP); /* no preemption */
2112 	if (IS_ERR(rq[0]))
2113 		return PTR_ERR(rq[0]);
2114 
2115 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2116 	i915_request_get(rq[0]);
2117 	i915_request_add(rq[0]);
2118 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2119 		err = -EIO;
2120 		goto out;
2121 	}
2122 
2123 	rq[1] = spinner_create_request(&arg->b.spin,
2124 				       arg->b.ctx, arg->engine,
2125 				       MI_ARB_CHECK);
2126 	if (IS_ERR(rq[1])) {
2127 		err = PTR_ERR(rq[1]);
2128 		goto out;
2129 	}
2130 
2131 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2132 	i915_request_get(rq[1]);
2133 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2134 	i915_request_add(rq[1]);
2135 	if (err)
2136 		goto out;
2137 
2138 	intel_context_set_banned(rq[1]->context);
2139 	err = intel_engine_pulse(arg->engine);
2140 	if (err)
2141 		goto out;
2142 
2143 	igt_spinner_end(&arg->a.spin);
2144 	err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2145 	if (err)
2146 		goto out;
2147 
2148 	if (rq[0]->fence.error != 0) {
2149 		pr_err("Normal inflight0 request did not complete\n");
2150 		err = -EINVAL;
2151 		goto out;
2152 	}
2153 
2154 	if (rq[1]->fence.error != -EIO) {
2155 		pr_err("Cancelled inflight1 request did not report -EIO\n");
2156 		err = -EINVAL;
2157 		goto out;
2158 	}
2159 
2160 out:
2161 	i915_request_put(rq[1]);
2162 	i915_request_put(rq[0]);
2163 	if (igt_live_test_end(&t))
2164 		err = -EIO;
2165 	return err;
2166 }
2167 
2168 static int __cancel_queued(struct live_preempt_cancel *arg)
2169 {
2170 	struct i915_request *rq[3] = {};
2171 	struct igt_live_test t;
2172 	int err;
2173 
2174 	/* Full ELSP and one in the wings */
2175 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2176 	if (igt_live_test_begin(&t, arg->engine->i915,
2177 				__func__, arg->engine->name))
2178 		return -EIO;
2179 
2180 	rq[0] = spinner_create_request(&arg->a.spin,
2181 				       arg->a.ctx, arg->engine,
2182 				       MI_ARB_CHECK);
2183 	if (IS_ERR(rq[0]))
2184 		return PTR_ERR(rq[0]);
2185 
2186 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2187 	i915_request_get(rq[0]);
2188 	i915_request_add(rq[0]);
2189 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2190 		err = -EIO;
2191 		goto out;
2192 	}
2193 
2194 	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2195 	if (IS_ERR(rq[1])) {
2196 		err = PTR_ERR(rq[1]);
2197 		goto out;
2198 	}
2199 
2200 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2201 	i915_request_get(rq[1]);
2202 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2203 	i915_request_add(rq[1]);
2204 	if (err)
2205 		goto out;
2206 
2207 	rq[2] = spinner_create_request(&arg->b.spin,
2208 				       arg->a.ctx, arg->engine,
2209 				       MI_ARB_CHECK);
2210 	if (IS_ERR(rq[2])) {
2211 		err = PTR_ERR(rq[2]);
2212 		goto out;
2213 	}
2214 
2215 	i915_request_get(rq[2]);
2216 	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2217 	i915_request_add(rq[2]);
2218 	if (err)
2219 		goto out;
2220 
2221 	intel_context_set_banned(rq[2]->context);
2222 	err = intel_engine_pulse(arg->engine);
2223 	if (err)
2224 		goto out;
2225 
2226 	err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2227 	if (err)
2228 		goto out;
2229 
2230 	if (rq[0]->fence.error != -EIO) {
2231 		pr_err("Cancelled inflight0 request did not report -EIO\n");
2232 		err = -EINVAL;
2233 		goto out;
2234 	}
2235 
2236 	if (rq[1]->fence.error != 0) {
2237 		pr_err("Normal inflight1 request did not complete\n");
2238 		err = -EINVAL;
2239 		goto out;
2240 	}
2241 
2242 	if (rq[2]->fence.error != -EIO) {
2243 		pr_err("Cancelled queued request did not report -EIO\n");
2244 		err = -EINVAL;
2245 		goto out;
2246 	}
2247 
2248 out:
2249 	i915_request_put(rq[2]);
2250 	i915_request_put(rq[1]);
2251 	i915_request_put(rq[0]);
2252 	if (igt_live_test_end(&t))
2253 		err = -EIO;
2254 	return err;
2255 }
2256 
2257 static int __cancel_hostile(struct live_preempt_cancel *arg)
2258 {
2259 	struct i915_request *rq;
2260 	int err;
2261 
2262 	/* Preempt cancel non-preemptible spinner in ELSP0 */
2263 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2264 		return 0;
2265 
2266 	if (!intel_has_reset_engine(arg->engine->gt))
2267 		return 0;
2268 
2269 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2270 	rq = spinner_create_request(&arg->a.spin,
2271 				    arg->a.ctx, arg->engine,
2272 				    MI_NOOP); /* preemption disabled */
2273 	if (IS_ERR(rq))
2274 		return PTR_ERR(rq);
2275 
2276 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2277 	i915_request_get(rq);
2278 	i915_request_add(rq);
2279 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2280 		err = -EIO;
2281 		goto out;
2282 	}
2283 
2284 	intel_context_set_banned(rq->context);
2285 	err = intel_engine_pulse(arg->engine); /* force reset */
2286 	if (err)
2287 		goto out;
2288 
2289 	err = wait_for_reset(arg->engine, rq, HZ / 2);
2290 	if (err) {
2291 		pr_err("Cancelled inflight0 request did not reset\n");
2292 		goto out;
2293 	}
2294 
2295 out:
2296 	i915_request_put(rq);
2297 	if (igt_flush_test(arg->engine->i915))
2298 		err = -EIO;
2299 	return err;
2300 }
2301 
2302 static void force_reset_timeout(struct intel_engine_cs *engine)
2303 {
2304 	engine->reset_timeout.probability = 999;
2305 	atomic_set(&engine->reset_timeout.times, -1);
2306 }
2307 
2308 static void cancel_reset_timeout(struct intel_engine_cs *engine)
2309 {
2310 	memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
2311 }
2312 
2313 static int __cancel_fail(struct live_preempt_cancel *arg)
2314 {
2315 	struct intel_engine_cs *engine = arg->engine;
2316 	struct i915_request *rq;
2317 	int err;
2318 
2319 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2320 		return 0;
2321 
2322 	if (!intel_has_reset_engine(engine->gt))
2323 		return 0;
2324 
2325 	GEM_TRACE("%s(%s)\n", __func__, engine->name);
2326 	rq = spinner_create_request(&arg->a.spin,
2327 				    arg->a.ctx, engine,
2328 				    MI_NOOP); /* preemption disabled */
2329 	if (IS_ERR(rq))
2330 		return PTR_ERR(rq);
2331 
2332 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2333 	i915_request_get(rq);
2334 	i915_request_add(rq);
2335 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2336 		err = -EIO;
2337 		goto out;
2338 	}
2339 
2340 	intel_context_set_banned(rq->context);
2341 
2342 	err = intel_engine_pulse(engine);
2343 	if (err)
2344 		goto out;
2345 
2346 	force_reset_timeout(engine);
2347 
2348 	/* force preempt reset [failure] */
2349 	while (!engine->execlists.pending[0])
2350 		intel_engine_flush_submission(engine);
2351 	del_timer_sync(&engine->execlists.preempt);
2352 	intel_engine_flush_submission(engine);
2353 
2354 	cancel_reset_timeout(engine);
2355 
2356 	/* after failure, require heartbeats to reset device */
2357 	intel_engine_set_heartbeat(engine, 1);
2358 	err = wait_for_reset(engine, rq, HZ / 2);
2359 	intel_engine_set_heartbeat(engine,
2360 				   engine->defaults.heartbeat_interval_ms);
2361 	if (err) {
2362 		pr_err("Cancelled inflight0 request did not reset\n");
2363 		goto out;
2364 	}
2365 
2366 out:
2367 	i915_request_put(rq);
2368 	if (igt_flush_test(engine->i915))
2369 		err = -EIO;
2370 	return err;
2371 }
2372 
2373 static int live_preempt_cancel(void *arg)
2374 {
2375 	struct intel_gt *gt = arg;
2376 	struct live_preempt_cancel data;
2377 	enum intel_engine_id id;
2378 	int err = -ENOMEM;
2379 
2380 	/*
2381 	 * To cancel an inflight context, we need to first remove it from the
2382 	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2383 	 */
2384 
2385 	if (preempt_client_init(gt, &data.a))
2386 		return -ENOMEM;
2387 	if (preempt_client_init(gt, &data.b))
2388 		goto err_client_a;
2389 
2390 	for_each_engine(data.engine, gt, id) {
2391 		if (!intel_engine_has_preemption(data.engine))
2392 			continue;
2393 
2394 		err = __cancel_active0(&data);
2395 		if (err)
2396 			goto err_wedged;
2397 
2398 		err = __cancel_active1(&data);
2399 		if (err)
2400 			goto err_wedged;
2401 
2402 		err = __cancel_queued(&data);
2403 		if (err)
2404 			goto err_wedged;
2405 
2406 		err = __cancel_hostile(&data);
2407 		if (err)
2408 			goto err_wedged;
2409 
2410 		err = __cancel_fail(&data);
2411 		if (err)
2412 			goto err_wedged;
2413 	}
2414 
2415 	err = 0;
2416 err_client_b:
2417 	preempt_client_fini(&data.b);
2418 err_client_a:
2419 	preempt_client_fini(&data.a);
2420 	return err;
2421 
2422 err_wedged:
2423 	GEM_TRACE_DUMP();
2424 	igt_spinner_end(&data.b.spin);
2425 	igt_spinner_end(&data.a.spin);
2426 	intel_gt_set_wedged(gt);
2427 	goto err_client_b;
2428 }
2429 
2430 static int live_suppress_self_preempt(void *arg)
2431 {
2432 	struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2433 	struct intel_gt *gt = arg;
2434 	struct intel_engine_cs *engine;
2435 	struct preempt_client a, b;
2436 	enum intel_engine_id id;
2437 	int err = -ENOMEM;
2438 
2439 	/*
2440 	 * Verify that if a preemption request does not cause a change in
2441 	 * the current execution order, the preempt-to-idle injection is
2442 	 * skipped and that we do not accidentally apply it after the CS
2443 	 * completion event.
2444 	 */
2445 
2446 	if (intel_uc_uses_guc_submission(&gt->uc))
2447 		return 0; /* presume black blox */
2448 
2449 	if (intel_vgpu_active(gt->i915))
2450 		return 0; /* GVT forces single port & request submission */
2451 
2452 	if (preempt_client_init(gt, &a))
2453 		return -ENOMEM;
2454 	if (preempt_client_init(gt, &b))
2455 		goto err_client_a;
2456 
2457 	for_each_engine(engine, gt, id) {
2458 		struct i915_request *rq_a, *rq_b;
2459 		int depth;
2460 
2461 		if (!intel_engine_has_preemption(engine))
2462 			continue;
2463 
2464 		if (igt_flush_test(gt->i915))
2465 			goto err_wedged;
2466 
2467 		st_engine_heartbeat_disable(engine);
2468 		engine->execlists.preempt_hang.count = 0;
2469 
2470 		rq_a = spinner_create_request(&a.spin,
2471 					      a.ctx, engine,
2472 					      MI_NOOP);
2473 		if (IS_ERR(rq_a)) {
2474 			err = PTR_ERR(rq_a);
2475 			st_engine_heartbeat_enable(engine);
2476 			goto err_client_b;
2477 		}
2478 
2479 		i915_request_add(rq_a);
2480 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2481 			pr_err("First client failed to start\n");
2482 			st_engine_heartbeat_enable(engine);
2483 			goto err_wedged;
2484 		}
2485 
2486 		/* Keep postponing the timer to avoid premature slicing */
2487 		mod_timer(&engine->execlists.timer, jiffies + HZ);
2488 		for (depth = 0; depth < 8; depth++) {
2489 			rq_b = spinner_create_request(&b.spin,
2490 						      b.ctx, engine,
2491 						      MI_NOOP);
2492 			if (IS_ERR(rq_b)) {
2493 				err = PTR_ERR(rq_b);
2494 				st_engine_heartbeat_enable(engine);
2495 				goto err_client_b;
2496 			}
2497 			i915_request_add(rq_b);
2498 
2499 			GEM_BUG_ON(i915_request_completed(rq_a));
2500 			engine->schedule(rq_a, &attr);
2501 			igt_spinner_end(&a.spin);
2502 
2503 			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2504 				pr_err("Second client failed to start\n");
2505 				st_engine_heartbeat_enable(engine);
2506 				goto err_wedged;
2507 			}
2508 
2509 			swap(a, b);
2510 			rq_a = rq_b;
2511 		}
2512 		igt_spinner_end(&a.spin);
2513 
2514 		if (engine->execlists.preempt_hang.count) {
2515 			pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2516 			       engine->name,
2517 			       engine->execlists.preempt_hang.count,
2518 			       depth);
2519 			st_engine_heartbeat_enable(engine);
2520 			err = -EINVAL;
2521 			goto err_client_b;
2522 		}
2523 
2524 		st_engine_heartbeat_enable(engine);
2525 		if (igt_flush_test(gt->i915))
2526 			goto err_wedged;
2527 	}
2528 
2529 	err = 0;
2530 err_client_b:
2531 	preempt_client_fini(&b);
2532 err_client_a:
2533 	preempt_client_fini(&a);
2534 	return err;
2535 
2536 err_wedged:
2537 	igt_spinner_end(&b.spin);
2538 	igt_spinner_end(&a.spin);
2539 	intel_gt_set_wedged(gt);
2540 	err = -EIO;
2541 	goto err_client_b;
2542 }
2543 
2544 static int live_chain_preempt(void *arg)
2545 {
2546 	struct intel_gt *gt = arg;
2547 	struct intel_engine_cs *engine;
2548 	struct preempt_client hi, lo;
2549 	enum intel_engine_id id;
2550 	int err = -ENOMEM;
2551 
2552 	/*
2553 	 * Build a chain AB...BA between two contexts (A, B) and request
2554 	 * preemption of the last request. It should then complete before
2555 	 * the previously submitted spinner in B.
2556 	 */
2557 
2558 	if (preempt_client_init(gt, &hi))
2559 		return -ENOMEM;
2560 
2561 	if (preempt_client_init(gt, &lo))
2562 		goto err_client_hi;
2563 
2564 	for_each_engine(engine, gt, id) {
2565 		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2566 		struct igt_live_test t;
2567 		struct i915_request *rq;
2568 		int ring_size, count, i;
2569 
2570 		if (!intel_engine_has_preemption(engine))
2571 			continue;
2572 
2573 		rq = spinner_create_request(&lo.spin,
2574 					    lo.ctx, engine,
2575 					    MI_ARB_CHECK);
2576 		if (IS_ERR(rq))
2577 			goto err_wedged;
2578 
2579 		i915_request_get(rq);
2580 		i915_request_add(rq);
2581 
2582 		ring_size = rq->wa_tail - rq->head;
2583 		if (ring_size < 0)
2584 			ring_size += rq->ring->size;
2585 		ring_size = rq->ring->size / ring_size;
2586 		pr_debug("%s(%s): Using maximum of %d requests\n",
2587 			 __func__, engine->name, ring_size);
2588 
2589 		igt_spinner_end(&lo.spin);
2590 		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2591 			pr_err("Timed out waiting to flush %s\n", engine->name);
2592 			i915_request_put(rq);
2593 			goto err_wedged;
2594 		}
2595 		i915_request_put(rq);
2596 
2597 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2598 			err = -EIO;
2599 			goto err_wedged;
2600 		}
2601 
2602 		for_each_prime_number_from(count, 1, ring_size) {
2603 			rq = spinner_create_request(&hi.spin,
2604 						    hi.ctx, engine,
2605 						    MI_ARB_CHECK);
2606 			if (IS_ERR(rq))
2607 				goto err_wedged;
2608 			i915_request_add(rq);
2609 			if (!igt_wait_for_spinner(&hi.spin, rq))
2610 				goto err_wedged;
2611 
2612 			rq = spinner_create_request(&lo.spin,
2613 						    lo.ctx, engine,
2614 						    MI_ARB_CHECK);
2615 			if (IS_ERR(rq))
2616 				goto err_wedged;
2617 			i915_request_add(rq);
2618 
2619 			for (i = 0; i < count; i++) {
2620 				rq = igt_request_alloc(lo.ctx, engine);
2621 				if (IS_ERR(rq))
2622 					goto err_wedged;
2623 				i915_request_add(rq);
2624 			}
2625 
2626 			rq = igt_request_alloc(hi.ctx, engine);
2627 			if (IS_ERR(rq))
2628 				goto err_wedged;
2629 
2630 			i915_request_get(rq);
2631 			i915_request_add(rq);
2632 			engine->schedule(rq, &attr);
2633 
2634 			igt_spinner_end(&hi.spin);
2635 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2636 				struct drm_printer p =
2637 					drm_info_printer(gt->i915->drm.dev);
2638 
2639 				pr_err("Failed to preempt over chain of %d\n",
2640 				       count);
2641 				intel_engine_dump(engine, &p,
2642 						  "%s\n", engine->name);
2643 				i915_request_put(rq);
2644 				goto err_wedged;
2645 			}
2646 			igt_spinner_end(&lo.spin);
2647 			i915_request_put(rq);
2648 
2649 			rq = igt_request_alloc(lo.ctx, engine);
2650 			if (IS_ERR(rq))
2651 				goto err_wedged;
2652 
2653 			i915_request_get(rq);
2654 			i915_request_add(rq);
2655 
2656 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2657 				struct drm_printer p =
2658 					drm_info_printer(gt->i915->drm.dev);
2659 
2660 				pr_err("Failed to flush low priority chain of %d requests\n",
2661 				       count);
2662 				intel_engine_dump(engine, &p,
2663 						  "%s\n", engine->name);
2664 
2665 				i915_request_put(rq);
2666 				goto err_wedged;
2667 			}
2668 			i915_request_put(rq);
2669 		}
2670 
2671 		if (igt_live_test_end(&t)) {
2672 			err = -EIO;
2673 			goto err_wedged;
2674 		}
2675 	}
2676 
2677 	err = 0;
2678 err_client_lo:
2679 	preempt_client_fini(&lo);
2680 err_client_hi:
2681 	preempt_client_fini(&hi);
2682 	return err;
2683 
2684 err_wedged:
2685 	igt_spinner_end(&hi.spin);
2686 	igt_spinner_end(&lo.spin);
2687 	intel_gt_set_wedged(gt);
2688 	err = -EIO;
2689 	goto err_client_lo;
2690 }
2691 
2692 static int create_gang(struct intel_engine_cs *engine,
2693 		       struct i915_request **prev)
2694 {
2695 	struct drm_i915_gem_object *obj;
2696 	struct intel_context *ce;
2697 	struct i915_request *rq;
2698 	struct i915_vma *vma;
2699 	u32 *cs;
2700 	int err;
2701 
2702 	ce = intel_context_create(engine);
2703 	if (IS_ERR(ce))
2704 		return PTR_ERR(ce);
2705 
2706 	obj = i915_gem_object_create_internal(engine->i915, 4096);
2707 	if (IS_ERR(obj)) {
2708 		err = PTR_ERR(obj);
2709 		goto err_ce;
2710 	}
2711 
2712 	vma = i915_vma_instance(obj, ce->vm, NULL);
2713 	if (IS_ERR(vma)) {
2714 		err = PTR_ERR(vma);
2715 		goto err_obj;
2716 	}
2717 
2718 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
2719 	if (err)
2720 		goto err_obj;
2721 
2722 	cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
2723 	if (IS_ERR(cs)) {
2724 		err = PTR_ERR(cs);
2725 		goto err_obj;
2726 	}
2727 
2728 	/* Semaphore target: spin until zero */
2729 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2730 
2731 	*cs++ = MI_SEMAPHORE_WAIT |
2732 		MI_SEMAPHORE_POLL |
2733 		MI_SEMAPHORE_SAD_EQ_SDD;
2734 	*cs++ = 0;
2735 	*cs++ = lower_32_bits(vma->node.start);
2736 	*cs++ = upper_32_bits(vma->node.start);
2737 
2738 	if (*prev) {
2739 		u64 offset = (*prev)->batch->node.start;
2740 
2741 		/* Terminate the spinner in the next lower priority batch. */
2742 		*cs++ = MI_STORE_DWORD_IMM_GEN4;
2743 		*cs++ = lower_32_bits(offset);
2744 		*cs++ = upper_32_bits(offset);
2745 		*cs++ = 0;
2746 	}
2747 
2748 	*cs++ = MI_BATCH_BUFFER_END;
2749 	i915_gem_object_flush_map(obj);
2750 	i915_gem_object_unpin_map(obj);
2751 
2752 	rq = intel_context_create_request(ce);
2753 	if (IS_ERR(rq)) {
2754 		err = PTR_ERR(rq);
2755 		goto err_obj;
2756 	}
2757 
2758 	rq->batch = i915_vma_get(vma);
2759 	i915_request_get(rq);
2760 
2761 	i915_vma_lock(vma);
2762 	err = i915_request_await_object(rq, vma->obj, false);
2763 	if (!err)
2764 		err = i915_vma_move_to_active(vma, rq, 0);
2765 	if (!err)
2766 		err = rq->engine->emit_bb_start(rq,
2767 						vma->node.start,
2768 						PAGE_SIZE, 0);
2769 	i915_vma_unlock(vma);
2770 	i915_request_add(rq);
2771 	if (err)
2772 		goto err_rq;
2773 
2774 	i915_gem_object_put(obj);
2775 	intel_context_put(ce);
2776 
2777 	rq->mock.link.next = &(*prev)->mock.link;
2778 	*prev = rq;
2779 	return 0;
2780 
2781 err_rq:
2782 	i915_vma_put(rq->batch);
2783 	i915_request_put(rq);
2784 err_obj:
2785 	i915_gem_object_put(obj);
2786 err_ce:
2787 	intel_context_put(ce);
2788 	return err;
2789 }
2790 
2791 static int __live_preempt_ring(struct intel_engine_cs *engine,
2792 			       struct igt_spinner *spin,
2793 			       int queue_sz, int ring_sz)
2794 {
2795 	struct intel_context *ce[2] = {};
2796 	struct i915_request *rq;
2797 	struct igt_live_test t;
2798 	int err = 0;
2799 	int n;
2800 
2801 	if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2802 		return -EIO;
2803 
2804 	for (n = 0; n < ARRAY_SIZE(ce); n++) {
2805 		struct intel_context *tmp;
2806 
2807 		tmp = intel_context_create(engine);
2808 		if (IS_ERR(tmp)) {
2809 			err = PTR_ERR(tmp);
2810 			goto err_ce;
2811 		}
2812 
2813 		tmp->ring = __intel_context_ring_size(ring_sz);
2814 
2815 		err = intel_context_pin(tmp);
2816 		if (err) {
2817 			intel_context_put(tmp);
2818 			goto err_ce;
2819 		}
2820 
2821 		memset32(tmp->ring->vaddr,
2822 			 0xdeadbeef, /* trigger a hang if executed */
2823 			 tmp->ring->vma->size / sizeof(u32));
2824 
2825 		ce[n] = tmp;
2826 	}
2827 
2828 	rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2829 	if (IS_ERR(rq)) {
2830 		err = PTR_ERR(rq);
2831 		goto err_ce;
2832 	}
2833 
2834 	i915_request_get(rq);
2835 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2836 	i915_request_add(rq);
2837 
2838 	if (!igt_wait_for_spinner(spin, rq)) {
2839 		intel_gt_set_wedged(engine->gt);
2840 		i915_request_put(rq);
2841 		err = -ETIME;
2842 		goto err_ce;
2843 	}
2844 
2845 	/* Fill the ring, until we will cause a wrap */
2846 	n = 0;
2847 	while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2848 		struct i915_request *tmp;
2849 
2850 		tmp = intel_context_create_request(ce[0]);
2851 		if (IS_ERR(tmp)) {
2852 			err = PTR_ERR(tmp);
2853 			i915_request_put(rq);
2854 			goto err_ce;
2855 		}
2856 
2857 		i915_request_add(tmp);
2858 		intel_engine_flush_submission(engine);
2859 		n++;
2860 	}
2861 	intel_engine_flush_submission(engine);
2862 	pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2863 		 engine->name, queue_sz, n,
2864 		 ce[0]->ring->size,
2865 		 ce[0]->ring->tail,
2866 		 ce[0]->ring->emit,
2867 		 rq->tail);
2868 	i915_request_put(rq);
2869 
2870 	/* Create a second request to preempt the first ring */
2871 	rq = intel_context_create_request(ce[1]);
2872 	if (IS_ERR(rq)) {
2873 		err = PTR_ERR(rq);
2874 		goto err_ce;
2875 	}
2876 
2877 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2878 	i915_request_get(rq);
2879 	i915_request_add(rq);
2880 
2881 	err = wait_for_submit(engine, rq, HZ / 2);
2882 	i915_request_put(rq);
2883 	if (err) {
2884 		pr_err("%s: preemption request was not submitted\n",
2885 		       engine->name);
2886 		err = -ETIME;
2887 	}
2888 
2889 	pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2890 		 engine->name,
2891 		 ce[0]->ring->tail, ce[0]->ring->emit,
2892 		 ce[1]->ring->tail, ce[1]->ring->emit);
2893 
2894 err_ce:
2895 	intel_engine_flush_submission(engine);
2896 	igt_spinner_end(spin);
2897 	for (n = 0; n < ARRAY_SIZE(ce); n++) {
2898 		if (IS_ERR_OR_NULL(ce[n]))
2899 			break;
2900 
2901 		intel_context_unpin(ce[n]);
2902 		intel_context_put(ce[n]);
2903 	}
2904 	if (igt_live_test_end(&t))
2905 		err = -EIO;
2906 	return err;
2907 }
2908 
2909 static int live_preempt_ring(void *arg)
2910 {
2911 	struct intel_gt *gt = arg;
2912 	struct intel_engine_cs *engine;
2913 	struct igt_spinner spin;
2914 	enum intel_engine_id id;
2915 	int err = 0;
2916 
2917 	/*
2918 	 * Check that we rollback large chunks of a ring in order to do a
2919 	 * preemption event. Similar to live_unlite_ring, but looking at
2920 	 * ring size rather than the impact of intel_ring_direction().
2921 	 */
2922 
2923 	if (igt_spinner_init(&spin, gt))
2924 		return -ENOMEM;
2925 
2926 	for_each_engine(engine, gt, id) {
2927 		int n;
2928 
2929 		if (!intel_engine_has_preemption(engine))
2930 			continue;
2931 
2932 		if (!intel_engine_can_store_dword(engine))
2933 			continue;
2934 
2935 		st_engine_heartbeat_disable(engine);
2936 
2937 		for (n = 0; n <= 3; n++) {
2938 			err = __live_preempt_ring(engine, &spin,
2939 						  n * SZ_4K / 4, SZ_4K);
2940 			if (err)
2941 				break;
2942 		}
2943 
2944 		st_engine_heartbeat_enable(engine);
2945 		if (err)
2946 			break;
2947 	}
2948 
2949 	igt_spinner_fini(&spin);
2950 	return err;
2951 }
2952 
2953 static int live_preempt_gang(void *arg)
2954 {
2955 	struct intel_gt *gt = arg;
2956 	struct intel_engine_cs *engine;
2957 	enum intel_engine_id id;
2958 
2959 	/*
2960 	 * Build as long a chain of preempters as we can, with each
2961 	 * request higher priority than the last. Once we are ready, we release
2962 	 * the last batch which then precolates down the chain, each releasing
2963 	 * the next oldest in turn. The intent is to simply push as hard as we
2964 	 * can with the number of preemptions, trying to exceed narrow HW
2965 	 * limits. At a minimum, we insist that we can sort all the user
2966 	 * high priority levels into execution order.
2967 	 */
2968 
2969 	for_each_engine(engine, gt, id) {
2970 		struct i915_request *rq = NULL;
2971 		struct igt_live_test t;
2972 		IGT_TIMEOUT(end_time);
2973 		int prio = 0;
2974 		int err = 0;
2975 		u32 *cs;
2976 
2977 		if (!intel_engine_has_preemption(engine))
2978 			continue;
2979 
2980 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2981 			return -EIO;
2982 
2983 		do {
2984 			struct i915_sched_attr attr = { .priority = prio++ };
2985 
2986 			err = create_gang(engine, &rq);
2987 			if (err)
2988 				break;
2989 
2990 			/* Submit each spinner at increasing priority */
2991 			engine->schedule(rq, &attr);
2992 		} while (prio <= I915_PRIORITY_MAX &&
2993 			 !__igt_timeout(end_time, NULL));
2994 		pr_debug("%s: Preempt chain of %d requests\n",
2995 			 engine->name, prio);
2996 
2997 		/*
2998 		 * Such that the last spinner is the highest priority and
2999 		 * should execute first. When that spinner completes,
3000 		 * it will terminate the next lowest spinner until there
3001 		 * are no more spinners and the gang is complete.
3002 		 */
3003 		cs = i915_gem_object_pin_map_unlocked(rq->batch->obj, I915_MAP_WC);
3004 		if (!IS_ERR(cs)) {
3005 			*cs = 0;
3006 			i915_gem_object_unpin_map(rq->batch->obj);
3007 		} else {
3008 			err = PTR_ERR(cs);
3009 			intel_gt_set_wedged(gt);
3010 		}
3011 
3012 		while (rq) { /* wait for each rq from highest to lowest prio */
3013 			struct i915_request *n = list_next_entry(rq, mock.link);
3014 
3015 			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
3016 				struct drm_printer p =
3017 					drm_info_printer(engine->i915->drm.dev);
3018 
3019 				pr_err("Failed to flush chain of %d requests, at %d\n",
3020 				       prio, rq_prio(rq));
3021 				intel_engine_dump(engine, &p,
3022 						  "%s\n", engine->name);
3023 
3024 				err = -ETIME;
3025 			}
3026 
3027 			i915_vma_put(rq->batch);
3028 			i915_request_put(rq);
3029 			rq = n;
3030 		}
3031 
3032 		if (igt_live_test_end(&t))
3033 			err = -EIO;
3034 		if (err)
3035 			return err;
3036 	}
3037 
3038 	return 0;
3039 }
3040 
3041 static struct i915_vma *
3042 create_gpr_user(struct intel_engine_cs *engine,
3043 		struct i915_vma *result,
3044 		unsigned int offset)
3045 {
3046 	struct drm_i915_gem_object *obj;
3047 	struct i915_vma *vma;
3048 	u32 *cs;
3049 	int err;
3050 	int i;
3051 
3052 	obj = i915_gem_object_create_internal(engine->i915, 4096);
3053 	if (IS_ERR(obj))
3054 		return ERR_CAST(obj);
3055 
3056 	vma = i915_vma_instance(obj, result->vm, NULL);
3057 	if (IS_ERR(vma)) {
3058 		i915_gem_object_put(obj);
3059 		return vma;
3060 	}
3061 
3062 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3063 	if (err) {
3064 		i915_vma_put(vma);
3065 		return ERR_PTR(err);
3066 	}
3067 
3068 	cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
3069 	if (IS_ERR(cs)) {
3070 		i915_vma_put(vma);
3071 		return ERR_CAST(cs);
3072 	}
3073 
3074 	/* All GPR are clear for new contexts. We use GPR(0) as a constant */
3075 	*cs++ = MI_LOAD_REGISTER_IMM(1);
3076 	*cs++ = CS_GPR(engine, 0);
3077 	*cs++ = 1;
3078 
3079 	for (i = 1; i < NUM_GPR; i++) {
3080 		u64 addr;
3081 
3082 		/*
3083 		 * Perform: GPR[i]++
3084 		 *
3085 		 * As we read and write into the context saved GPR[i], if
3086 		 * we restart this batch buffer from an earlier point, we
3087 		 * will repeat the increment and store a value > 1.
3088 		 */
3089 		*cs++ = MI_MATH(4);
3090 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3091 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3092 		*cs++ = MI_MATH_ADD;
3093 		*cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3094 
3095 		addr = result->node.start + offset + i * sizeof(*cs);
3096 		*cs++ = MI_STORE_REGISTER_MEM_GEN8;
3097 		*cs++ = CS_GPR(engine, 2 * i);
3098 		*cs++ = lower_32_bits(addr);
3099 		*cs++ = upper_32_bits(addr);
3100 
3101 		*cs++ = MI_SEMAPHORE_WAIT |
3102 			MI_SEMAPHORE_POLL |
3103 			MI_SEMAPHORE_SAD_GTE_SDD;
3104 		*cs++ = i;
3105 		*cs++ = lower_32_bits(result->node.start);
3106 		*cs++ = upper_32_bits(result->node.start);
3107 	}
3108 
3109 	*cs++ = MI_BATCH_BUFFER_END;
3110 	i915_gem_object_flush_map(obj);
3111 	i915_gem_object_unpin_map(obj);
3112 
3113 	return vma;
3114 }
3115 
3116 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3117 {
3118 	struct drm_i915_gem_object *obj;
3119 	struct i915_vma *vma;
3120 	int err;
3121 
3122 	obj = i915_gem_object_create_internal(gt->i915, sz);
3123 	if (IS_ERR(obj))
3124 		return ERR_CAST(obj);
3125 
3126 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
3127 	if (IS_ERR(vma)) {
3128 		i915_gem_object_put(obj);
3129 		return vma;
3130 	}
3131 
3132 	err = i915_ggtt_pin(vma, NULL, 0, 0);
3133 	if (err) {
3134 		i915_vma_put(vma);
3135 		return ERR_PTR(err);
3136 	}
3137 
3138 	return vma;
3139 }
3140 
3141 static struct i915_request *
3142 create_gpr_client(struct intel_engine_cs *engine,
3143 		  struct i915_vma *global,
3144 		  unsigned int offset)
3145 {
3146 	struct i915_vma *batch, *vma;
3147 	struct intel_context *ce;
3148 	struct i915_request *rq;
3149 	int err;
3150 
3151 	ce = intel_context_create(engine);
3152 	if (IS_ERR(ce))
3153 		return ERR_CAST(ce);
3154 
3155 	vma = i915_vma_instance(global->obj, ce->vm, NULL);
3156 	if (IS_ERR(vma)) {
3157 		err = PTR_ERR(vma);
3158 		goto out_ce;
3159 	}
3160 
3161 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3162 	if (err)
3163 		goto out_ce;
3164 
3165 	batch = create_gpr_user(engine, vma, offset);
3166 	if (IS_ERR(batch)) {
3167 		err = PTR_ERR(batch);
3168 		goto out_vma;
3169 	}
3170 
3171 	rq = intel_context_create_request(ce);
3172 	if (IS_ERR(rq)) {
3173 		err = PTR_ERR(rq);
3174 		goto out_batch;
3175 	}
3176 
3177 	i915_vma_lock(vma);
3178 	err = i915_request_await_object(rq, vma->obj, false);
3179 	if (!err)
3180 		err = i915_vma_move_to_active(vma, rq, 0);
3181 	i915_vma_unlock(vma);
3182 
3183 	i915_vma_lock(batch);
3184 	if (!err)
3185 		err = i915_request_await_object(rq, batch->obj, false);
3186 	if (!err)
3187 		err = i915_vma_move_to_active(batch, rq, 0);
3188 	if (!err)
3189 		err = rq->engine->emit_bb_start(rq,
3190 						batch->node.start,
3191 						PAGE_SIZE, 0);
3192 	i915_vma_unlock(batch);
3193 	i915_vma_unpin(batch);
3194 
3195 	if (!err)
3196 		i915_request_get(rq);
3197 	i915_request_add(rq);
3198 
3199 out_batch:
3200 	i915_vma_put(batch);
3201 out_vma:
3202 	i915_vma_unpin(vma);
3203 out_ce:
3204 	intel_context_put(ce);
3205 	return err ? ERR_PTR(err) : rq;
3206 }
3207 
3208 static int preempt_user(struct intel_engine_cs *engine,
3209 			struct i915_vma *global,
3210 			int id)
3211 {
3212 	struct i915_sched_attr attr = {
3213 		.priority = I915_PRIORITY_MAX
3214 	};
3215 	struct i915_request *rq;
3216 	int err = 0;
3217 	u32 *cs;
3218 
3219 	rq = intel_engine_create_kernel_request(engine);
3220 	if (IS_ERR(rq))
3221 		return PTR_ERR(rq);
3222 
3223 	cs = intel_ring_begin(rq, 4);
3224 	if (IS_ERR(cs)) {
3225 		i915_request_add(rq);
3226 		return PTR_ERR(cs);
3227 	}
3228 
3229 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3230 	*cs++ = i915_ggtt_offset(global);
3231 	*cs++ = 0;
3232 	*cs++ = id;
3233 
3234 	intel_ring_advance(rq, cs);
3235 
3236 	i915_request_get(rq);
3237 	i915_request_add(rq);
3238 
3239 	engine->schedule(rq, &attr);
3240 
3241 	if (i915_request_wait(rq, 0, HZ / 2) < 0)
3242 		err = -ETIME;
3243 	i915_request_put(rq);
3244 
3245 	return err;
3246 }
3247 
3248 static int live_preempt_user(void *arg)
3249 {
3250 	struct intel_gt *gt = arg;
3251 	struct intel_engine_cs *engine;
3252 	struct i915_vma *global;
3253 	enum intel_engine_id id;
3254 	u32 *result;
3255 	int err = 0;
3256 
3257 	/*
3258 	 * In our other tests, we look at preemption in carefully
3259 	 * controlled conditions in the ringbuffer. Since most of the
3260 	 * time is spent in user batches, most of our preemptions naturally
3261 	 * occur there. We want to verify that when we preempt inside a batch
3262 	 * we continue on from the current instruction and do not roll back
3263 	 * to the start, or another earlier arbitration point.
3264 	 *
3265 	 * To verify this, we create a batch which is a mixture of
3266 	 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3267 	 * a few preempting contexts thrown into the mix, we look for any
3268 	 * repeated instructions (which show up as incorrect values).
3269 	 */
3270 
3271 	global = create_global(gt, 4096);
3272 	if (IS_ERR(global))
3273 		return PTR_ERR(global);
3274 
3275 	result = i915_gem_object_pin_map_unlocked(global->obj, I915_MAP_WC);
3276 	if (IS_ERR(result)) {
3277 		i915_vma_unpin_and_release(&global, 0);
3278 		return PTR_ERR(result);
3279 	}
3280 
3281 	for_each_engine(engine, gt, id) {
3282 		struct i915_request *client[3] = {};
3283 		struct igt_live_test t;
3284 		int i;
3285 
3286 		if (!intel_engine_has_preemption(engine))
3287 			continue;
3288 
3289 		if (GRAPHICS_VER(gt->i915) == 8 && engine->class != RENDER_CLASS)
3290 			continue; /* we need per-context GPR */
3291 
3292 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3293 			err = -EIO;
3294 			break;
3295 		}
3296 
3297 		memset(result, 0, 4096);
3298 
3299 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3300 			struct i915_request *rq;
3301 
3302 			rq = create_gpr_client(engine, global,
3303 					       NUM_GPR * i * sizeof(u32));
3304 			if (IS_ERR(rq)) {
3305 				err = PTR_ERR(rq);
3306 				goto end_test;
3307 			}
3308 
3309 			client[i] = rq;
3310 		}
3311 
3312 		/* Continuously preempt the set of 3 running contexts */
3313 		for (i = 1; i <= NUM_GPR; i++) {
3314 			err = preempt_user(engine, global, i);
3315 			if (err)
3316 				goto end_test;
3317 		}
3318 
3319 		if (READ_ONCE(result[0]) != NUM_GPR) {
3320 			pr_err("%s: Failed to release semaphore\n",
3321 			       engine->name);
3322 			err = -EIO;
3323 			goto end_test;
3324 		}
3325 
3326 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3327 			int gpr;
3328 
3329 			if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3330 				err = -ETIME;
3331 				goto end_test;
3332 			}
3333 
3334 			for (gpr = 1; gpr < NUM_GPR; gpr++) {
3335 				if (result[NUM_GPR * i + gpr] != 1) {
3336 					pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3337 					       engine->name,
3338 					       i, gpr, result[NUM_GPR * i + gpr]);
3339 					err = -EINVAL;
3340 					goto end_test;
3341 				}
3342 			}
3343 		}
3344 
3345 end_test:
3346 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3347 			if (!client[i])
3348 				break;
3349 
3350 			i915_request_put(client[i]);
3351 		}
3352 
3353 		/* Flush the semaphores on error */
3354 		smp_store_mb(result[0], -1);
3355 		if (igt_live_test_end(&t))
3356 			err = -EIO;
3357 		if (err)
3358 			break;
3359 	}
3360 
3361 	i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3362 	return err;
3363 }
3364 
3365 static int live_preempt_timeout(void *arg)
3366 {
3367 	struct intel_gt *gt = arg;
3368 	struct i915_gem_context *ctx_hi, *ctx_lo;
3369 	struct igt_spinner spin_lo;
3370 	struct intel_engine_cs *engine;
3371 	enum intel_engine_id id;
3372 	int err = -ENOMEM;
3373 
3374 	/*
3375 	 * Check that we force preemption to occur by cancelling the previous
3376 	 * context if it refuses to yield the GPU.
3377 	 */
3378 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3379 		return 0;
3380 
3381 	if (!intel_has_reset_engine(gt))
3382 		return 0;
3383 
3384 	if (igt_spinner_init(&spin_lo, gt))
3385 		return -ENOMEM;
3386 
3387 	ctx_hi = kernel_context(gt->i915);
3388 	if (!ctx_hi)
3389 		goto err_spin_lo;
3390 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
3391 
3392 	ctx_lo = kernel_context(gt->i915);
3393 	if (!ctx_lo)
3394 		goto err_ctx_hi;
3395 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
3396 
3397 	for_each_engine(engine, gt, id) {
3398 		unsigned long saved_timeout;
3399 		struct i915_request *rq;
3400 
3401 		if (!intel_engine_has_preemption(engine))
3402 			continue;
3403 
3404 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3405 					    MI_NOOP); /* preemption disabled */
3406 		if (IS_ERR(rq)) {
3407 			err = PTR_ERR(rq);
3408 			goto err_ctx_lo;
3409 		}
3410 
3411 		i915_request_add(rq);
3412 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
3413 			intel_gt_set_wedged(gt);
3414 			err = -EIO;
3415 			goto err_ctx_lo;
3416 		}
3417 
3418 		rq = igt_request_alloc(ctx_hi, engine);
3419 		if (IS_ERR(rq)) {
3420 			igt_spinner_end(&spin_lo);
3421 			err = PTR_ERR(rq);
3422 			goto err_ctx_lo;
3423 		}
3424 
3425 		/* Flush the previous CS ack before changing timeouts */
3426 		while (READ_ONCE(engine->execlists.pending[0]))
3427 			cpu_relax();
3428 
3429 		saved_timeout = engine->props.preempt_timeout_ms;
3430 		engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3431 
3432 		i915_request_get(rq);
3433 		i915_request_add(rq);
3434 
3435 		intel_engine_flush_submission(engine);
3436 		engine->props.preempt_timeout_ms = saved_timeout;
3437 
3438 		if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3439 			intel_gt_set_wedged(gt);
3440 			i915_request_put(rq);
3441 			err = -ETIME;
3442 			goto err_ctx_lo;
3443 		}
3444 
3445 		igt_spinner_end(&spin_lo);
3446 		i915_request_put(rq);
3447 	}
3448 
3449 	err = 0;
3450 err_ctx_lo:
3451 	kernel_context_close(ctx_lo);
3452 err_ctx_hi:
3453 	kernel_context_close(ctx_hi);
3454 err_spin_lo:
3455 	igt_spinner_fini(&spin_lo);
3456 	return err;
3457 }
3458 
3459 static int random_range(struct rnd_state *rnd, int min, int max)
3460 {
3461 	return i915_prandom_u32_max_state(max - min, rnd) + min;
3462 }
3463 
3464 static int random_priority(struct rnd_state *rnd)
3465 {
3466 	return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3467 }
3468 
3469 struct preempt_smoke {
3470 	struct intel_gt *gt;
3471 	struct i915_gem_context **contexts;
3472 	struct intel_engine_cs *engine;
3473 	struct drm_i915_gem_object *batch;
3474 	unsigned int ncontext;
3475 	struct rnd_state prng;
3476 	unsigned long count;
3477 };
3478 
3479 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3480 {
3481 	return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3482 							  &smoke->prng)];
3483 }
3484 
3485 static int smoke_submit(struct preempt_smoke *smoke,
3486 			struct i915_gem_context *ctx, int prio,
3487 			struct drm_i915_gem_object *batch)
3488 {
3489 	struct i915_request *rq;
3490 	struct i915_vma *vma = NULL;
3491 	int err = 0;
3492 
3493 	if (batch) {
3494 		struct i915_address_space *vm;
3495 
3496 		vm = i915_gem_context_get_vm_rcu(ctx);
3497 		vma = i915_vma_instance(batch, vm, NULL);
3498 		i915_vm_put(vm);
3499 		if (IS_ERR(vma))
3500 			return PTR_ERR(vma);
3501 
3502 		err = i915_vma_pin(vma, 0, 0, PIN_USER);
3503 		if (err)
3504 			return err;
3505 	}
3506 
3507 	ctx->sched.priority = prio;
3508 
3509 	rq = igt_request_alloc(ctx, smoke->engine);
3510 	if (IS_ERR(rq)) {
3511 		err = PTR_ERR(rq);
3512 		goto unpin;
3513 	}
3514 
3515 	if (vma) {
3516 		i915_vma_lock(vma);
3517 		err = i915_request_await_object(rq, vma->obj, false);
3518 		if (!err)
3519 			err = i915_vma_move_to_active(vma, rq, 0);
3520 		if (!err)
3521 			err = rq->engine->emit_bb_start(rq,
3522 							vma->node.start,
3523 							PAGE_SIZE, 0);
3524 		i915_vma_unlock(vma);
3525 	}
3526 
3527 	i915_request_add(rq);
3528 
3529 unpin:
3530 	if (vma)
3531 		i915_vma_unpin(vma);
3532 
3533 	return err;
3534 }
3535 
3536 static int smoke_crescendo_thread(void *arg)
3537 {
3538 	struct preempt_smoke *smoke = arg;
3539 	IGT_TIMEOUT(end_time);
3540 	unsigned long count;
3541 
3542 	count = 0;
3543 	do {
3544 		struct i915_gem_context *ctx = smoke_context(smoke);
3545 		int err;
3546 
3547 		err = smoke_submit(smoke,
3548 				   ctx, count % I915_PRIORITY_MAX,
3549 				   smoke->batch);
3550 		if (err)
3551 			return err;
3552 
3553 		count++;
3554 	} while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3555 
3556 	smoke->count = count;
3557 	return 0;
3558 }
3559 
3560 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3561 #define BATCH BIT(0)
3562 {
3563 	struct task_struct *tsk[I915_NUM_ENGINES] = {};
3564 	struct preempt_smoke arg[I915_NUM_ENGINES];
3565 	struct intel_engine_cs *engine;
3566 	enum intel_engine_id id;
3567 	unsigned long count;
3568 	int err = 0;
3569 
3570 	for_each_engine(engine, smoke->gt, id) {
3571 		arg[id] = *smoke;
3572 		arg[id].engine = engine;
3573 		if (!(flags & BATCH))
3574 			arg[id].batch = NULL;
3575 		arg[id].count = 0;
3576 
3577 		tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3578 				      "igt/smoke:%d", id);
3579 		if (IS_ERR(tsk[id])) {
3580 			err = PTR_ERR(tsk[id]);
3581 			break;
3582 		}
3583 		get_task_struct(tsk[id]);
3584 	}
3585 
3586 	yield(); /* start all threads before we kthread_stop() */
3587 
3588 	count = 0;
3589 	for_each_engine(engine, smoke->gt, id) {
3590 		int status;
3591 
3592 		if (IS_ERR_OR_NULL(tsk[id]))
3593 			continue;
3594 
3595 		status = kthread_stop(tsk[id]);
3596 		if (status && !err)
3597 			err = status;
3598 
3599 		count += arg[id].count;
3600 
3601 		put_task_struct(tsk[id]);
3602 	}
3603 
3604 	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3605 		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3606 	return 0;
3607 }
3608 
3609 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3610 {
3611 	enum intel_engine_id id;
3612 	IGT_TIMEOUT(end_time);
3613 	unsigned long count;
3614 
3615 	count = 0;
3616 	do {
3617 		for_each_engine(smoke->engine, smoke->gt, id) {
3618 			struct i915_gem_context *ctx = smoke_context(smoke);
3619 			int err;
3620 
3621 			err = smoke_submit(smoke,
3622 					   ctx, random_priority(&smoke->prng),
3623 					   flags & BATCH ? smoke->batch : NULL);
3624 			if (err)
3625 				return err;
3626 
3627 			count++;
3628 		}
3629 	} while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3630 
3631 	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3632 		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3633 	return 0;
3634 }
3635 
3636 static int live_preempt_smoke(void *arg)
3637 {
3638 	struct preempt_smoke smoke = {
3639 		.gt = arg,
3640 		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3641 		.ncontext = 256,
3642 	};
3643 	const unsigned int phase[] = { 0, BATCH };
3644 	struct igt_live_test t;
3645 	int err = -ENOMEM;
3646 	u32 *cs;
3647 	int n;
3648 
3649 	smoke.contexts = kmalloc_array(smoke.ncontext,
3650 				       sizeof(*smoke.contexts),
3651 				       GFP_KERNEL);
3652 	if (!smoke.contexts)
3653 		return -ENOMEM;
3654 
3655 	smoke.batch =
3656 		i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3657 	if (IS_ERR(smoke.batch)) {
3658 		err = PTR_ERR(smoke.batch);
3659 		goto err_free;
3660 	}
3661 
3662 	cs = i915_gem_object_pin_map_unlocked(smoke.batch, I915_MAP_WB);
3663 	if (IS_ERR(cs)) {
3664 		err = PTR_ERR(cs);
3665 		goto err_batch;
3666 	}
3667 	for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3668 		cs[n] = MI_ARB_CHECK;
3669 	cs[n] = MI_BATCH_BUFFER_END;
3670 	i915_gem_object_flush_map(smoke.batch);
3671 	i915_gem_object_unpin_map(smoke.batch);
3672 
3673 	if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3674 		err = -EIO;
3675 		goto err_batch;
3676 	}
3677 
3678 	for (n = 0; n < smoke.ncontext; n++) {
3679 		smoke.contexts[n] = kernel_context(smoke.gt->i915);
3680 		if (!smoke.contexts[n])
3681 			goto err_ctx;
3682 	}
3683 
3684 	for (n = 0; n < ARRAY_SIZE(phase); n++) {
3685 		err = smoke_crescendo(&smoke, phase[n]);
3686 		if (err)
3687 			goto err_ctx;
3688 
3689 		err = smoke_random(&smoke, phase[n]);
3690 		if (err)
3691 			goto err_ctx;
3692 	}
3693 
3694 err_ctx:
3695 	if (igt_live_test_end(&t))
3696 		err = -EIO;
3697 
3698 	for (n = 0; n < smoke.ncontext; n++) {
3699 		if (!smoke.contexts[n])
3700 			break;
3701 		kernel_context_close(smoke.contexts[n]);
3702 	}
3703 
3704 err_batch:
3705 	i915_gem_object_put(smoke.batch);
3706 err_free:
3707 	kfree(smoke.contexts);
3708 
3709 	return err;
3710 }
3711 
3712 static int nop_virtual_engine(struct intel_gt *gt,
3713 			      struct intel_engine_cs **siblings,
3714 			      unsigned int nsibling,
3715 			      unsigned int nctx,
3716 			      unsigned int flags)
3717 #define CHAIN BIT(0)
3718 {
3719 	IGT_TIMEOUT(end_time);
3720 	struct i915_request *request[16] = {};
3721 	struct intel_context *ve[16];
3722 	unsigned long n, prime, nc;
3723 	struct igt_live_test t;
3724 	ktime_t times[2] = {};
3725 	int err;
3726 
3727 	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3728 
3729 	for (n = 0; n < nctx; n++) {
3730 		ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3731 		if (IS_ERR(ve[n])) {
3732 			err = PTR_ERR(ve[n]);
3733 			nctx = n;
3734 			goto out;
3735 		}
3736 
3737 		err = intel_context_pin(ve[n]);
3738 		if (err) {
3739 			intel_context_put(ve[n]);
3740 			nctx = n;
3741 			goto out;
3742 		}
3743 	}
3744 
3745 	err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3746 	if (err)
3747 		goto out;
3748 
3749 	for_each_prime_number_from(prime, 1, 8192) {
3750 		times[1] = ktime_get_raw();
3751 
3752 		if (flags & CHAIN) {
3753 			for (nc = 0; nc < nctx; nc++) {
3754 				for (n = 0; n < prime; n++) {
3755 					struct i915_request *rq;
3756 
3757 					rq = i915_request_create(ve[nc]);
3758 					if (IS_ERR(rq)) {
3759 						err = PTR_ERR(rq);
3760 						goto out;
3761 					}
3762 
3763 					if (request[nc])
3764 						i915_request_put(request[nc]);
3765 					request[nc] = i915_request_get(rq);
3766 					i915_request_add(rq);
3767 				}
3768 			}
3769 		} else {
3770 			for (n = 0; n < prime; n++) {
3771 				for (nc = 0; nc < nctx; nc++) {
3772 					struct i915_request *rq;
3773 
3774 					rq = i915_request_create(ve[nc]);
3775 					if (IS_ERR(rq)) {
3776 						err = PTR_ERR(rq);
3777 						goto out;
3778 					}
3779 
3780 					if (request[nc])
3781 						i915_request_put(request[nc]);
3782 					request[nc] = i915_request_get(rq);
3783 					i915_request_add(rq);
3784 				}
3785 			}
3786 		}
3787 
3788 		for (nc = 0; nc < nctx; nc++) {
3789 			if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3790 				pr_err("%s(%s): wait for %llx:%lld timed out\n",
3791 				       __func__, ve[0]->engine->name,
3792 				       request[nc]->fence.context,
3793 				       request[nc]->fence.seqno);
3794 
3795 				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3796 					  __func__, ve[0]->engine->name,
3797 					  request[nc]->fence.context,
3798 					  request[nc]->fence.seqno);
3799 				GEM_TRACE_DUMP();
3800 				intel_gt_set_wedged(gt);
3801 				break;
3802 			}
3803 		}
3804 
3805 		times[1] = ktime_sub(ktime_get_raw(), times[1]);
3806 		if (prime == 1)
3807 			times[0] = times[1];
3808 
3809 		for (nc = 0; nc < nctx; nc++) {
3810 			i915_request_put(request[nc]);
3811 			request[nc] = NULL;
3812 		}
3813 
3814 		if (__igt_timeout(end_time, NULL))
3815 			break;
3816 	}
3817 
3818 	err = igt_live_test_end(&t);
3819 	if (err)
3820 		goto out;
3821 
3822 	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3823 		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3824 		prime, div64_u64(ktime_to_ns(times[1]), prime));
3825 
3826 out:
3827 	if (igt_flush_test(gt->i915))
3828 		err = -EIO;
3829 
3830 	for (nc = 0; nc < nctx; nc++) {
3831 		i915_request_put(request[nc]);
3832 		intel_context_unpin(ve[nc]);
3833 		intel_context_put(ve[nc]);
3834 	}
3835 	return err;
3836 }
3837 
3838 static unsigned int
3839 __select_siblings(struct intel_gt *gt,
3840 		  unsigned int class,
3841 		  struct intel_engine_cs **siblings,
3842 		  bool (*filter)(const struct intel_engine_cs *))
3843 {
3844 	unsigned int n = 0;
3845 	unsigned int inst;
3846 
3847 	for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3848 		if (!gt->engine_class[class][inst])
3849 			continue;
3850 
3851 		if (filter && !filter(gt->engine_class[class][inst]))
3852 			continue;
3853 
3854 		siblings[n++] = gt->engine_class[class][inst];
3855 	}
3856 
3857 	return n;
3858 }
3859 
3860 static unsigned int
3861 select_siblings(struct intel_gt *gt,
3862 		unsigned int class,
3863 		struct intel_engine_cs **siblings)
3864 {
3865 	return __select_siblings(gt, class, siblings, NULL);
3866 }
3867 
3868 static int live_virtual_engine(void *arg)
3869 {
3870 	struct intel_gt *gt = arg;
3871 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3872 	struct intel_engine_cs *engine;
3873 	enum intel_engine_id id;
3874 	unsigned int class;
3875 	int err;
3876 
3877 	if (intel_uc_uses_guc_submission(&gt->uc))
3878 		return 0;
3879 
3880 	for_each_engine(engine, gt, id) {
3881 		err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3882 		if (err) {
3883 			pr_err("Failed to wrap engine %s: err=%d\n",
3884 			       engine->name, err);
3885 			return err;
3886 		}
3887 	}
3888 
3889 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3890 		int nsibling, n;
3891 
3892 		nsibling = select_siblings(gt, class, siblings);
3893 		if (nsibling < 2)
3894 			continue;
3895 
3896 		for (n = 1; n <= nsibling + 1; n++) {
3897 			err = nop_virtual_engine(gt, siblings, nsibling,
3898 						 n, 0);
3899 			if (err)
3900 				return err;
3901 		}
3902 
3903 		err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3904 		if (err)
3905 			return err;
3906 	}
3907 
3908 	return 0;
3909 }
3910 
3911 static int mask_virtual_engine(struct intel_gt *gt,
3912 			       struct intel_engine_cs **siblings,
3913 			       unsigned int nsibling)
3914 {
3915 	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3916 	struct intel_context *ve;
3917 	struct igt_live_test t;
3918 	unsigned int n;
3919 	int err;
3920 
3921 	/*
3922 	 * Check that by setting the execution mask on a request, we can
3923 	 * restrict it to our desired engine within the virtual engine.
3924 	 */
3925 
3926 	ve = intel_execlists_create_virtual(siblings, nsibling);
3927 	if (IS_ERR(ve)) {
3928 		err = PTR_ERR(ve);
3929 		goto out_close;
3930 	}
3931 
3932 	err = intel_context_pin(ve);
3933 	if (err)
3934 		goto out_put;
3935 
3936 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3937 	if (err)
3938 		goto out_unpin;
3939 
3940 	for (n = 0; n < nsibling; n++) {
3941 		request[n] = i915_request_create(ve);
3942 		if (IS_ERR(request[n])) {
3943 			err = PTR_ERR(request[n]);
3944 			nsibling = n;
3945 			goto out;
3946 		}
3947 
3948 		/* Reverse order as it's more likely to be unnatural */
3949 		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3950 
3951 		i915_request_get(request[n]);
3952 		i915_request_add(request[n]);
3953 	}
3954 
3955 	for (n = 0; n < nsibling; n++) {
3956 		if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3957 			pr_err("%s(%s): wait for %llx:%lld timed out\n",
3958 			       __func__, ve->engine->name,
3959 			       request[n]->fence.context,
3960 			       request[n]->fence.seqno);
3961 
3962 			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3963 				  __func__, ve->engine->name,
3964 				  request[n]->fence.context,
3965 				  request[n]->fence.seqno);
3966 			GEM_TRACE_DUMP();
3967 			intel_gt_set_wedged(gt);
3968 			err = -EIO;
3969 			goto out;
3970 		}
3971 
3972 		if (request[n]->engine != siblings[nsibling - n - 1]) {
3973 			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3974 			       request[n]->engine->name,
3975 			       siblings[nsibling - n - 1]->name);
3976 			err = -EINVAL;
3977 			goto out;
3978 		}
3979 	}
3980 
3981 	err = igt_live_test_end(&t);
3982 out:
3983 	if (igt_flush_test(gt->i915))
3984 		err = -EIO;
3985 
3986 	for (n = 0; n < nsibling; n++)
3987 		i915_request_put(request[n]);
3988 
3989 out_unpin:
3990 	intel_context_unpin(ve);
3991 out_put:
3992 	intel_context_put(ve);
3993 out_close:
3994 	return err;
3995 }
3996 
3997 static int live_virtual_mask(void *arg)
3998 {
3999 	struct intel_gt *gt = arg;
4000 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4001 	unsigned int class;
4002 	int err;
4003 
4004 	if (intel_uc_uses_guc_submission(&gt->uc))
4005 		return 0;
4006 
4007 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4008 		unsigned int nsibling;
4009 
4010 		nsibling = select_siblings(gt, class, siblings);
4011 		if (nsibling < 2)
4012 			continue;
4013 
4014 		err = mask_virtual_engine(gt, siblings, nsibling);
4015 		if (err)
4016 			return err;
4017 	}
4018 
4019 	return 0;
4020 }
4021 
4022 static int slicein_virtual_engine(struct intel_gt *gt,
4023 				  struct intel_engine_cs **siblings,
4024 				  unsigned int nsibling)
4025 {
4026 	const long timeout = slice_timeout(siblings[0]);
4027 	struct intel_context *ce;
4028 	struct i915_request *rq;
4029 	struct igt_spinner spin;
4030 	unsigned int n;
4031 	int err = 0;
4032 
4033 	/*
4034 	 * Virtual requests must take part in timeslicing on the target engines.
4035 	 */
4036 
4037 	if (igt_spinner_init(&spin, gt))
4038 		return -ENOMEM;
4039 
4040 	for (n = 0; n < nsibling; n++) {
4041 		ce = intel_context_create(siblings[n]);
4042 		if (IS_ERR(ce)) {
4043 			err = PTR_ERR(ce);
4044 			goto out;
4045 		}
4046 
4047 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4048 		intel_context_put(ce);
4049 		if (IS_ERR(rq)) {
4050 			err = PTR_ERR(rq);
4051 			goto out;
4052 		}
4053 
4054 		i915_request_add(rq);
4055 	}
4056 
4057 	ce = intel_execlists_create_virtual(siblings, nsibling);
4058 	if (IS_ERR(ce)) {
4059 		err = PTR_ERR(ce);
4060 		goto out;
4061 	}
4062 
4063 	rq = intel_context_create_request(ce);
4064 	intel_context_put(ce);
4065 	if (IS_ERR(rq)) {
4066 		err = PTR_ERR(rq);
4067 		goto out;
4068 	}
4069 
4070 	i915_request_get(rq);
4071 	i915_request_add(rq);
4072 	if (i915_request_wait(rq, 0, timeout) < 0) {
4073 		GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4074 			      __func__, rq->engine->name);
4075 		GEM_TRACE_DUMP();
4076 		intel_gt_set_wedged(gt);
4077 		err = -EIO;
4078 	}
4079 	i915_request_put(rq);
4080 
4081 out:
4082 	igt_spinner_end(&spin);
4083 	if (igt_flush_test(gt->i915))
4084 		err = -EIO;
4085 	igt_spinner_fini(&spin);
4086 	return err;
4087 }
4088 
4089 static int sliceout_virtual_engine(struct intel_gt *gt,
4090 				   struct intel_engine_cs **siblings,
4091 				   unsigned int nsibling)
4092 {
4093 	const long timeout = slice_timeout(siblings[0]);
4094 	struct intel_context *ce;
4095 	struct i915_request *rq;
4096 	struct igt_spinner spin;
4097 	unsigned int n;
4098 	int err = 0;
4099 
4100 	/*
4101 	 * Virtual requests must allow others a fair timeslice.
4102 	 */
4103 
4104 	if (igt_spinner_init(&spin, gt))
4105 		return -ENOMEM;
4106 
4107 	/* XXX We do not handle oversubscription and fairness with normal rq */
4108 	for (n = 0; n < nsibling; n++) {
4109 		ce = intel_execlists_create_virtual(siblings, nsibling);
4110 		if (IS_ERR(ce)) {
4111 			err = PTR_ERR(ce);
4112 			goto out;
4113 		}
4114 
4115 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4116 		intel_context_put(ce);
4117 		if (IS_ERR(rq)) {
4118 			err = PTR_ERR(rq);
4119 			goto out;
4120 		}
4121 
4122 		i915_request_add(rq);
4123 	}
4124 
4125 	for (n = 0; !err && n < nsibling; n++) {
4126 		ce = intel_context_create(siblings[n]);
4127 		if (IS_ERR(ce)) {
4128 			err = PTR_ERR(ce);
4129 			goto out;
4130 		}
4131 
4132 		rq = intel_context_create_request(ce);
4133 		intel_context_put(ce);
4134 		if (IS_ERR(rq)) {
4135 			err = PTR_ERR(rq);
4136 			goto out;
4137 		}
4138 
4139 		i915_request_get(rq);
4140 		i915_request_add(rq);
4141 		if (i915_request_wait(rq, 0, timeout) < 0) {
4142 			GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4143 				      __func__, siblings[n]->name);
4144 			GEM_TRACE_DUMP();
4145 			intel_gt_set_wedged(gt);
4146 			err = -EIO;
4147 		}
4148 		i915_request_put(rq);
4149 	}
4150 
4151 out:
4152 	igt_spinner_end(&spin);
4153 	if (igt_flush_test(gt->i915))
4154 		err = -EIO;
4155 	igt_spinner_fini(&spin);
4156 	return err;
4157 }
4158 
4159 static int live_virtual_slice(void *arg)
4160 {
4161 	struct intel_gt *gt = arg;
4162 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4163 	unsigned int class;
4164 	int err;
4165 
4166 	if (intel_uc_uses_guc_submission(&gt->uc))
4167 		return 0;
4168 
4169 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4170 		unsigned int nsibling;
4171 
4172 		nsibling = __select_siblings(gt, class, siblings,
4173 					     intel_engine_has_timeslices);
4174 		if (nsibling < 2)
4175 			continue;
4176 
4177 		err = slicein_virtual_engine(gt, siblings, nsibling);
4178 		if (err)
4179 			return err;
4180 
4181 		err = sliceout_virtual_engine(gt, siblings, nsibling);
4182 		if (err)
4183 			return err;
4184 	}
4185 
4186 	return 0;
4187 }
4188 
4189 static int preserved_virtual_engine(struct intel_gt *gt,
4190 				    struct intel_engine_cs **siblings,
4191 				    unsigned int nsibling)
4192 {
4193 	struct i915_request *last = NULL;
4194 	struct intel_context *ve;
4195 	struct i915_vma *scratch;
4196 	struct igt_live_test t;
4197 	unsigned int n;
4198 	int err = 0;
4199 	u32 *cs;
4200 
4201 	scratch =
4202 		__vm_create_scratch_for_read_pinned(&siblings[0]->gt->ggtt->vm,
4203 						    PAGE_SIZE);
4204 	if (IS_ERR(scratch))
4205 		return PTR_ERR(scratch);
4206 
4207 	err = i915_vma_sync(scratch);
4208 	if (err)
4209 		goto out_scratch;
4210 
4211 	ve = intel_execlists_create_virtual(siblings, nsibling);
4212 	if (IS_ERR(ve)) {
4213 		err = PTR_ERR(ve);
4214 		goto out_scratch;
4215 	}
4216 
4217 	err = intel_context_pin(ve);
4218 	if (err)
4219 		goto out_put;
4220 
4221 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4222 	if (err)
4223 		goto out_unpin;
4224 
4225 	for (n = 0; n < NUM_GPR_DW; n++) {
4226 		struct intel_engine_cs *engine = siblings[n % nsibling];
4227 		struct i915_request *rq;
4228 
4229 		rq = i915_request_create(ve);
4230 		if (IS_ERR(rq)) {
4231 			err = PTR_ERR(rq);
4232 			goto out_end;
4233 		}
4234 
4235 		i915_request_put(last);
4236 		last = i915_request_get(rq);
4237 
4238 		cs = intel_ring_begin(rq, 8);
4239 		if (IS_ERR(cs)) {
4240 			i915_request_add(rq);
4241 			err = PTR_ERR(cs);
4242 			goto out_end;
4243 		}
4244 
4245 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4246 		*cs++ = CS_GPR(engine, n);
4247 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4248 		*cs++ = 0;
4249 
4250 		*cs++ = MI_LOAD_REGISTER_IMM(1);
4251 		*cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4252 		*cs++ = n + 1;
4253 
4254 		*cs++ = MI_NOOP;
4255 		intel_ring_advance(rq, cs);
4256 
4257 		/* Restrict this request to run on a particular engine */
4258 		rq->execution_mask = engine->mask;
4259 		i915_request_add(rq);
4260 	}
4261 
4262 	if (i915_request_wait(last, 0, HZ / 5) < 0) {
4263 		err = -ETIME;
4264 		goto out_end;
4265 	}
4266 
4267 	cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
4268 	if (IS_ERR(cs)) {
4269 		err = PTR_ERR(cs);
4270 		goto out_end;
4271 	}
4272 
4273 	for (n = 0; n < NUM_GPR_DW; n++) {
4274 		if (cs[n] != n) {
4275 			pr_err("Incorrect value[%d] found for GPR[%d]\n",
4276 			       cs[n], n);
4277 			err = -EINVAL;
4278 			break;
4279 		}
4280 	}
4281 
4282 	i915_gem_object_unpin_map(scratch->obj);
4283 
4284 out_end:
4285 	if (igt_live_test_end(&t))
4286 		err = -EIO;
4287 	i915_request_put(last);
4288 out_unpin:
4289 	intel_context_unpin(ve);
4290 out_put:
4291 	intel_context_put(ve);
4292 out_scratch:
4293 	i915_vma_unpin_and_release(&scratch, 0);
4294 	return err;
4295 }
4296 
4297 static int live_virtual_preserved(void *arg)
4298 {
4299 	struct intel_gt *gt = arg;
4300 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4301 	unsigned int class;
4302 
4303 	/*
4304 	 * Check that the context image retains non-privileged (user) registers
4305 	 * from one engine to the next. For this we check that the CS_GPR
4306 	 * are preserved.
4307 	 */
4308 
4309 	if (intel_uc_uses_guc_submission(&gt->uc))
4310 		return 0;
4311 
4312 	/* As we use CS_GPR we cannot run before they existed on all engines. */
4313 	if (GRAPHICS_VER(gt->i915) < 9)
4314 		return 0;
4315 
4316 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4317 		int nsibling, err;
4318 
4319 		nsibling = select_siblings(gt, class, siblings);
4320 		if (nsibling < 2)
4321 			continue;
4322 
4323 		err = preserved_virtual_engine(gt, siblings, nsibling);
4324 		if (err)
4325 			return err;
4326 	}
4327 
4328 	return 0;
4329 }
4330 
4331 static int bond_virtual_engine(struct intel_gt *gt,
4332 			       unsigned int class,
4333 			       struct intel_engine_cs **siblings,
4334 			       unsigned int nsibling,
4335 			       unsigned int flags)
4336 #define BOND_SCHEDULE BIT(0)
4337 {
4338 	struct intel_engine_cs *master;
4339 	struct i915_request *rq[16];
4340 	enum intel_engine_id id;
4341 	struct igt_spinner spin;
4342 	unsigned long n;
4343 	int err;
4344 
4345 	/*
4346 	 * A set of bonded requests is intended to be run concurrently
4347 	 * across a number of engines. We use one request per-engine
4348 	 * and a magic fence to schedule each of the bonded requests
4349 	 * at the same time. A consequence of our current scheduler is that
4350 	 * we only move requests to the HW ready queue when the request
4351 	 * becomes ready, that is when all of its prerequisite fences have
4352 	 * been signaled. As one of those fences is the master submit fence,
4353 	 * there is a delay on all secondary fences as the HW may be
4354 	 * currently busy. Equally, as all the requests are independent,
4355 	 * they may have other fences that delay individual request
4356 	 * submission to HW. Ergo, we do not guarantee that all requests are
4357 	 * immediately submitted to HW at the same time, just that if the
4358 	 * rules are abided by, they are ready at the same time as the
4359 	 * first is submitted. Userspace can embed semaphores in its batch
4360 	 * to ensure parallel execution of its phases as it requires.
4361 	 * Though naturally it gets requested that perhaps the scheduler should
4362 	 * take care of parallel execution, even across preemption events on
4363 	 * different HW. (The proper answer is of course "lalalala".)
4364 	 *
4365 	 * With the submit-fence, we have identified three possible phases
4366 	 * of synchronisation depending on the master fence: queued (not
4367 	 * ready), executing, and signaled. The first two are quite simple
4368 	 * and checked below. However, the signaled master fence handling is
4369 	 * contentious. Currently we do not distinguish between a signaled
4370 	 * fence and an expired fence, as once signaled it does not convey
4371 	 * any information about the previous execution. It may even be freed
4372 	 * and hence checking later it may not exist at all. Ergo we currently
4373 	 * do not apply the bonding constraint for an already signaled fence,
4374 	 * as our expectation is that it should not constrain the secondaries
4375 	 * and is outside of the scope of the bonded request API (i.e. all
4376 	 * userspace requests are meant to be running in parallel). As
4377 	 * it imposes no constraint, and is effectively a no-op, we do not
4378 	 * check below as normal execution flows are checked extensively above.
4379 	 *
4380 	 * XXX Is the degenerate handling of signaled submit fences the
4381 	 * expected behaviour for userpace?
4382 	 */
4383 
4384 	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
4385 
4386 	if (igt_spinner_init(&spin, gt))
4387 		return -ENOMEM;
4388 
4389 	err = 0;
4390 	rq[0] = ERR_PTR(-ENOMEM);
4391 	for_each_engine(master, gt, id) {
4392 		struct i915_sw_fence fence = {};
4393 		struct intel_context *ce;
4394 
4395 		if (master->class == class)
4396 			continue;
4397 
4398 		ce = intel_context_create(master);
4399 		if (IS_ERR(ce)) {
4400 			err = PTR_ERR(ce);
4401 			goto out;
4402 		}
4403 
4404 		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
4405 
4406 		rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
4407 		intel_context_put(ce);
4408 		if (IS_ERR(rq[0])) {
4409 			err = PTR_ERR(rq[0]);
4410 			goto out;
4411 		}
4412 		i915_request_get(rq[0]);
4413 
4414 		if (flags & BOND_SCHEDULE) {
4415 			onstack_fence_init(&fence);
4416 			err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
4417 							       &fence,
4418 							       GFP_KERNEL);
4419 		}
4420 
4421 		i915_request_add(rq[0]);
4422 		if (err < 0)
4423 			goto out;
4424 
4425 		if (!(flags & BOND_SCHEDULE) &&
4426 		    !igt_wait_for_spinner(&spin, rq[0])) {
4427 			err = -EIO;
4428 			goto out;
4429 		}
4430 
4431 		for (n = 0; n < nsibling; n++) {
4432 			struct intel_context *ve;
4433 
4434 			ve = intel_execlists_create_virtual(siblings, nsibling);
4435 			if (IS_ERR(ve)) {
4436 				err = PTR_ERR(ve);
4437 				onstack_fence_fini(&fence);
4438 				goto out;
4439 			}
4440 
4441 			err = intel_virtual_engine_attach_bond(ve->engine,
4442 							       master,
4443 							       siblings[n]);
4444 			if (err) {
4445 				intel_context_put(ve);
4446 				onstack_fence_fini(&fence);
4447 				goto out;
4448 			}
4449 
4450 			err = intel_context_pin(ve);
4451 			intel_context_put(ve);
4452 			if (err) {
4453 				onstack_fence_fini(&fence);
4454 				goto out;
4455 			}
4456 
4457 			rq[n + 1] = i915_request_create(ve);
4458 			intel_context_unpin(ve);
4459 			if (IS_ERR(rq[n + 1])) {
4460 				err = PTR_ERR(rq[n + 1]);
4461 				onstack_fence_fini(&fence);
4462 				goto out;
4463 			}
4464 			i915_request_get(rq[n + 1]);
4465 
4466 			err = i915_request_await_execution(rq[n + 1],
4467 							   &rq[0]->fence,
4468 							   ve->engine->bond_execute);
4469 			i915_request_add(rq[n + 1]);
4470 			if (err < 0) {
4471 				onstack_fence_fini(&fence);
4472 				goto out;
4473 			}
4474 		}
4475 		onstack_fence_fini(&fence);
4476 		intel_engine_flush_submission(master);
4477 		igt_spinner_end(&spin);
4478 
4479 		if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4480 			pr_err("Master request did not execute (on %s)!\n",
4481 			       rq[0]->engine->name);
4482 			err = -EIO;
4483 			goto out;
4484 		}
4485 
4486 		for (n = 0; n < nsibling; n++) {
4487 			if (i915_request_wait(rq[n + 1], 0,
4488 					      MAX_SCHEDULE_TIMEOUT) < 0) {
4489 				err = -EIO;
4490 				goto out;
4491 			}
4492 
4493 			if (rq[n + 1]->engine != siblings[n]) {
4494 				pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4495 				       siblings[n]->name,
4496 				       rq[n + 1]->engine->name,
4497 				       rq[0]->engine->name);
4498 				err = -EINVAL;
4499 				goto out;
4500 			}
4501 		}
4502 
4503 		for (n = 0; !IS_ERR(rq[n]); n++)
4504 			i915_request_put(rq[n]);
4505 		rq[0] = ERR_PTR(-ENOMEM);
4506 	}
4507 
4508 out:
4509 	for (n = 0; !IS_ERR(rq[n]); n++)
4510 		i915_request_put(rq[n]);
4511 	if (igt_flush_test(gt->i915))
4512 		err = -EIO;
4513 
4514 	igt_spinner_fini(&spin);
4515 	return err;
4516 }
4517 
4518 static int live_virtual_bond(void *arg)
4519 {
4520 	static const struct phase {
4521 		const char *name;
4522 		unsigned int flags;
4523 	} phases[] = {
4524 		{ "", 0 },
4525 		{ "schedule", BOND_SCHEDULE },
4526 		{ },
4527 	};
4528 	struct intel_gt *gt = arg;
4529 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4530 	unsigned int class;
4531 	int err;
4532 
4533 	if (intel_uc_uses_guc_submission(&gt->uc))
4534 		return 0;
4535 
4536 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4537 		const struct phase *p;
4538 		int nsibling;
4539 
4540 		nsibling = select_siblings(gt, class, siblings);
4541 		if (nsibling < 2)
4542 			continue;
4543 
4544 		for (p = phases; p->name; p++) {
4545 			err = bond_virtual_engine(gt,
4546 						  class, siblings, nsibling,
4547 						  p->flags);
4548 			if (err) {
4549 				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4550 				       __func__, p->name, class, nsibling, err);
4551 				return err;
4552 			}
4553 		}
4554 	}
4555 
4556 	return 0;
4557 }
4558 
4559 static int reset_virtual_engine(struct intel_gt *gt,
4560 				struct intel_engine_cs **siblings,
4561 				unsigned int nsibling)
4562 {
4563 	struct intel_engine_cs *engine;
4564 	struct intel_context *ve;
4565 	struct igt_spinner spin;
4566 	struct i915_request *rq;
4567 	unsigned int n;
4568 	int err = 0;
4569 
4570 	/*
4571 	 * In order to support offline error capture for fast preempt reset,
4572 	 * we need to decouple the guilty request and ensure that it and its
4573 	 * descendents are not executed while the capture is in progress.
4574 	 */
4575 
4576 	if (igt_spinner_init(&spin, gt))
4577 		return -ENOMEM;
4578 
4579 	ve = intel_execlists_create_virtual(siblings, nsibling);
4580 	if (IS_ERR(ve)) {
4581 		err = PTR_ERR(ve);
4582 		goto out_spin;
4583 	}
4584 
4585 	for (n = 0; n < nsibling; n++)
4586 		st_engine_heartbeat_disable(siblings[n]);
4587 
4588 	rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4589 	if (IS_ERR(rq)) {
4590 		err = PTR_ERR(rq);
4591 		goto out_heartbeat;
4592 	}
4593 	i915_request_add(rq);
4594 
4595 	if (!igt_wait_for_spinner(&spin, rq)) {
4596 		intel_gt_set_wedged(gt);
4597 		err = -ETIME;
4598 		goto out_heartbeat;
4599 	}
4600 
4601 	engine = rq->engine;
4602 	GEM_BUG_ON(engine == ve->engine);
4603 
4604 	/* Take ownership of the reset and tasklet */
4605 	err = engine_lock_reset_tasklet(engine);
4606 	if (err)
4607 		goto out_heartbeat;
4608 
4609 	engine->execlists.tasklet.callback(&engine->execlists.tasklet);
4610 	GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4611 
4612 	/* Fake a preemption event; failed of course */
4613 	spin_lock_irq(&engine->active.lock);
4614 	__unwind_incomplete_requests(engine);
4615 	spin_unlock_irq(&engine->active.lock);
4616 	GEM_BUG_ON(rq->engine != engine);
4617 
4618 	/* Reset the engine while keeping our active request on hold */
4619 	execlists_hold(engine, rq);
4620 	GEM_BUG_ON(!i915_request_on_hold(rq));
4621 
4622 	__intel_engine_reset_bh(engine, NULL);
4623 	GEM_BUG_ON(rq->fence.error != -EIO);
4624 
4625 	/* Release our grasp on the engine, letting CS flow again */
4626 	engine_unlock_reset_tasklet(engine);
4627 
4628 	/* Check that we do not resubmit the held request */
4629 	i915_request_get(rq);
4630 	if (!i915_request_wait(rq, 0, HZ / 5)) {
4631 		pr_err("%s: on hold request completed!\n",
4632 		       engine->name);
4633 		intel_gt_set_wedged(gt);
4634 		err = -EIO;
4635 		goto out_rq;
4636 	}
4637 	GEM_BUG_ON(!i915_request_on_hold(rq));
4638 
4639 	/* But is resubmitted on release */
4640 	execlists_unhold(engine, rq);
4641 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4642 		pr_err("%s: held request did not complete!\n",
4643 		       engine->name);
4644 		intel_gt_set_wedged(gt);
4645 		err = -ETIME;
4646 	}
4647 
4648 out_rq:
4649 	i915_request_put(rq);
4650 out_heartbeat:
4651 	for (n = 0; n < nsibling; n++)
4652 		st_engine_heartbeat_enable(siblings[n]);
4653 
4654 	intel_context_put(ve);
4655 out_spin:
4656 	igt_spinner_fini(&spin);
4657 	return err;
4658 }
4659 
4660 static int live_virtual_reset(void *arg)
4661 {
4662 	struct intel_gt *gt = arg;
4663 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4664 	unsigned int class;
4665 
4666 	/*
4667 	 * Check that we handle a reset event within a virtual engine.
4668 	 * Only the physical engine is reset, but we have to check the flow
4669 	 * of the virtual requests around the reset, and make sure it is not
4670 	 * forgotten.
4671 	 */
4672 
4673 	if (intel_uc_uses_guc_submission(&gt->uc))
4674 		return 0;
4675 
4676 	if (!intel_has_reset_engine(gt))
4677 		return 0;
4678 
4679 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4680 		int nsibling, err;
4681 
4682 		nsibling = select_siblings(gt, class, siblings);
4683 		if (nsibling < 2)
4684 			continue;
4685 
4686 		err = reset_virtual_engine(gt, siblings, nsibling);
4687 		if (err)
4688 			return err;
4689 	}
4690 
4691 	return 0;
4692 }
4693 
4694 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4695 {
4696 	static const struct i915_subtest tests[] = {
4697 		SUBTEST(live_sanitycheck),
4698 		SUBTEST(live_unlite_switch),
4699 		SUBTEST(live_unlite_preempt),
4700 		SUBTEST(live_unlite_ring),
4701 		SUBTEST(live_pin_rewind),
4702 		SUBTEST(live_hold_reset),
4703 		SUBTEST(live_error_interrupt),
4704 		SUBTEST(live_timeslice_preempt),
4705 		SUBTEST(live_timeslice_rewind),
4706 		SUBTEST(live_timeslice_queue),
4707 		SUBTEST(live_timeslice_nopreempt),
4708 		SUBTEST(live_busywait_preempt),
4709 		SUBTEST(live_preempt),
4710 		SUBTEST(live_late_preempt),
4711 		SUBTEST(live_nopreempt),
4712 		SUBTEST(live_preempt_cancel),
4713 		SUBTEST(live_suppress_self_preempt),
4714 		SUBTEST(live_chain_preempt),
4715 		SUBTEST(live_preempt_ring),
4716 		SUBTEST(live_preempt_gang),
4717 		SUBTEST(live_preempt_timeout),
4718 		SUBTEST(live_preempt_user),
4719 		SUBTEST(live_preempt_smoke),
4720 		SUBTEST(live_virtual_engine),
4721 		SUBTEST(live_virtual_mask),
4722 		SUBTEST(live_virtual_preserved),
4723 		SUBTEST(live_virtual_slice),
4724 		SUBTEST(live_virtual_bond),
4725 		SUBTEST(live_virtual_reset),
4726 	};
4727 
4728 	if (i915->gt.submission_method != INTEL_SUBMISSION_ELSP)
4729 		return 0;
4730 
4731 	if (intel_gt_is_wedged(&i915->gt))
4732 		return 0;
4733 
4734 	return intel_gt_live_subtests(tests, &i915->gt);
4735 }
4736