xref: /linux/drivers/gpu/drm/i915/gt/selftest_execlists.c (revision 172cdcaefea5c297fdb3d20b7d5aff60ae4fbce6)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2018 Intel Corporation
4  */
5 
6 #include <linux/prime_numbers.h>
7 
8 #include "gem/i915_gem_pm.h"
9 #include "gt/intel_engine_heartbeat.h"
10 #include "gt/intel_reset.h"
11 #include "gt/selftest_engine_heartbeat.h"
12 
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
19 
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
22 
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24 #define NUM_GPR 16
25 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
26 
27 static bool is_active(struct i915_request *rq)
28 {
29 	if (i915_request_is_active(rq))
30 		return true;
31 
32 	if (i915_request_on_hold(rq))
33 		return true;
34 
35 	if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
36 		return true;
37 
38 	return false;
39 }
40 
41 static int wait_for_submit(struct intel_engine_cs *engine,
42 			   struct i915_request *rq,
43 			   unsigned long timeout)
44 {
45 	/* Ignore our own attempts to suppress excess tasklets */
46 	tasklet_hi_schedule(&engine->execlists.tasklet);
47 
48 	timeout += jiffies;
49 	do {
50 		bool done = time_after(jiffies, timeout);
51 
52 		if (i915_request_completed(rq)) /* that was quick! */
53 			return 0;
54 
55 		/* Wait until the HW has acknowleged the submission (or err) */
56 		intel_engine_flush_submission(engine);
57 		if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
58 			return 0;
59 
60 		if (done)
61 			return -ETIME;
62 
63 		cond_resched();
64 	} while (1);
65 }
66 
67 static int wait_for_reset(struct intel_engine_cs *engine,
68 			  struct i915_request *rq,
69 			  unsigned long timeout)
70 {
71 	timeout += jiffies;
72 
73 	do {
74 		cond_resched();
75 		intel_engine_flush_submission(engine);
76 
77 		if (READ_ONCE(engine->execlists.pending[0]))
78 			continue;
79 
80 		if (i915_request_completed(rq))
81 			break;
82 
83 		if (READ_ONCE(rq->fence.error))
84 			break;
85 	} while (time_before(jiffies, timeout));
86 
87 	flush_scheduled_work();
88 
89 	if (rq->fence.error != -EIO) {
90 		pr_err("%s: hanging request %llx:%lld not reset\n",
91 		       engine->name,
92 		       rq->fence.context,
93 		       rq->fence.seqno);
94 		return -EINVAL;
95 	}
96 
97 	/* Give the request a jiffie to complete after flushing the worker */
98 	if (i915_request_wait(rq, 0,
99 			      max(0l, (long)(timeout - jiffies)) + 1) < 0) {
100 		pr_err("%s: hanging request %llx:%lld did not complete\n",
101 		       engine->name,
102 		       rq->fence.context,
103 		       rq->fence.seqno);
104 		return -ETIME;
105 	}
106 
107 	return 0;
108 }
109 
110 static int live_sanitycheck(void *arg)
111 {
112 	struct intel_gt *gt = arg;
113 	struct intel_engine_cs *engine;
114 	enum intel_engine_id id;
115 	struct igt_spinner spin;
116 	int err = 0;
117 
118 	if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
119 		return 0;
120 
121 	if (igt_spinner_init(&spin, gt))
122 		return -ENOMEM;
123 
124 	for_each_engine(engine, gt, id) {
125 		struct intel_context *ce;
126 		struct i915_request *rq;
127 
128 		ce = intel_context_create(engine);
129 		if (IS_ERR(ce)) {
130 			err = PTR_ERR(ce);
131 			break;
132 		}
133 
134 		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
135 		if (IS_ERR(rq)) {
136 			err = PTR_ERR(rq);
137 			goto out_ctx;
138 		}
139 
140 		i915_request_add(rq);
141 		if (!igt_wait_for_spinner(&spin, rq)) {
142 			GEM_TRACE("spinner failed to start\n");
143 			GEM_TRACE_DUMP();
144 			intel_gt_set_wedged(gt);
145 			err = -EIO;
146 			goto out_ctx;
147 		}
148 
149 		igt_spinner_end(&spin);
150 		if (igt_flush_test(gt->i915)) {
151 			err = -EIO;
152 			goto out_ctx;
153 		}
154 
155 out_ctx:
156 		intel_context_put(ce);
157 		if (err)
158 			break;
159 	}
160 
161 	igt_spinner_fini(&spin);
162 	return err;
163 }
164 
165 static int live_unlite_restore(struct intel_gt *gt, int prio)
166 {
167 	struct intel_engine_cs *engine;
168 	enum intel_engine_id id;
169 	struct igt_spinner spin;
170 	int err = -ENOMEM;
171 
172 	/*
173 	 * Check that we can correctly context switch between 2 instances
174 	 * on the same engine from the same parent context.
175 	 */
176 
177 	if (igt_spinner_init(&spin, gt))
178 		return err;
179 
180 	err = 0;
181 	for_each_engine(engine, gt, id) {
182 		struct intel_context *ce[2] = {};
183 		struct i915_request *rq[2];
184 		struct igt_live_test t;
185 		int n;
186 
187 		if (prio && !intel_engine_has_preemption(engine))
188 			continue;
189 
190 		if (!intel_engine_can_store_dword(engine))
191 			continue;
192 
193 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
194 			err = -EIO;
195 			break;
196 		}
197 		st_engine_heartbeat_disable(engine);
198 
199 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
200 			struct intel_context *tmp;
201 
202 			tmp = intel_context_create(engine);
203 			if (IS_ERR(tmp)) {
204 				err = PTR_ERR(tmp);
205 				goto err_ce;
206 			}
207 
208 			err = intel_context_pin(tmp);
209 			if (err) {
210 				intel_context_put(tmp);
211 				goto err_ce;
212 			}
213 
214 			/*
215 			 * Setup the pair of contexts such that if we
216 			 * lite-restore using the RING_TAIL from ce[1] it
217 			 * will execute garbage from ce[0]->ring.
218 			 */
219 			memset(tmp->ring->vaddr,
220 			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
221 			       tmp->ring->vma->size);
222 
223 			ce[n] = tmp;
224 		}
225 		GEM_BUG_ON(!ce[1]->ring->size);
226 		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
227 		lrc_update_regs(ce[1], engine, ce[1]->ring->head);
228 
229 		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
230 		if (IS_ERR(rq[0])) {
231 			err = PTR_ERR(rq[0]);
232 			goto err_ce;
233 		}
234 
235 		i915_request_get(rq[0]);
236 		i915_request_add(rq[0]);
237 		GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
238 
239 		if (!igt_wait_for_spinner(&spin, rq[0])) {
240 			i915_request_put(rq[0]);
241 			goto err_ce;
242 		}
243 
244 		rq[1] = i915_request_create(ce[1]);
245 		if (IS_ERR(rq[1])) {
246 			err = PTR_ERR(rq[1]);
247 			i915_request_put(rq[0]);
248 			goto err_ce;
249 		}
250 
251 		if (!prio) {
252 			/*
253 			 * Ensure we do the switch to ce[1] on completion.
254 			 *
255 			 * rq[0] is already submitted, so this should reduce
256 			 * to a no-op (a wait on a request on the same engine
257 			 * uses the submit fence, not the completion fence),
258 			 * but it will install a dependency on rq[1] for rq[0]
259 			 * that will prevent the pair being reordered by
260 			 * timeslicing.
261 			 */
262 			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
263 		}
264 
265 		i915_request_get(rq[1]);
266 		i915_request_add(rq[1]);
267 		GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
268 		i915_request_put(rq[0]);
269 
270 		if (prio) {
271 			struct i915_sched_attr attr = {
272 				.priority = prio,
273 			};
274 
275 			/* Alternatively preempt the spinner with ce[1] */
276 			engine->schedule(rq[1], &attr);
277 		}
278 
279 		/* And switch back to ce[0] for good measure */
280 		rq[0] = i915_request_create(ce[0]);
281 		if (IS_ERR(rq[0])) {
282 			err = PTR_ERR(rq[0]);
283 			i915_request_put(rq[1]);
284 			goto err_ce;
285 		}
286 
287 		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
288 		i915_request_get(rq[0]);
289 		i915_request_add(rq[0]);
290 		GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
291 		i915_request_put(rq[1]);
292 		i915_request_put(rq[0]);
293 
294 err_ce:
295 		intel_engine_flush_submission(engine);
296 		igt_spinner_end(&spin);
297 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
298 			if (IS_ERR_OR_NULL(ce[n]))
299 				break;
300 
301 			intel_context_unpin(ce[n]);
302 			intel_context_put(ce[n]);
303 		}
304 
305 		st_engine_heartbeat_enable(engine);
306 		if (igt_live_test_end(&t))
307 			err = -EIO;
308 		if (err)
309 			break;
310 	}
311 
312 	igt_spinner_fini(&spin);
313 	return err;
314 }
315 
316 static int live_unlite_switch(void *arg)
317 {
318 	return live_unlite_restore(arg, 0);
319 }
320 
321 static int live_unlite_preempt(void *arg)
322 {
323 	return live_unlite_restore(arg, I915_PRIORITY_MAX);
324 }
325 
326 static int live_unlite_ring(void *arg)
327 {
328 	struct intel_gt *gt = arg;
329 	struct intel_engine_cs *engine;
330 	struct igt_spinner spin;
331 	enum intel_engine_id id;
332 	int err = 0;
333 
334 	/*
335 	 * Setup a preemption event that will cause almost the entire ring
336 	 * to be unwound, potentially fooling our intel_ring_direction()
337 	 * into emitting a forward lite-restore instead of the rollback.
338 	 */
339 
340 	if (igt_spinner_init(&spin, gt))
341 		return -ENOMEM;
342 
343 	for_each_engine(engine, gt, id) {
344 		struct intel_context *ce[2] = {};
345 		struct i915_request *rq;
346 		struct igt_live_test t;
347 		int n;
348 
349 		if (!intel_engine_has_preemption(engine))
350 			continue;
351 
352 		if (!intel_engine_can_store_dword(engine))
353 			continue;
354 
355 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
356 			err = -EIO;
357 			break;
358 		}
359 		st_engine_heartbeat_disable(engine);
360 
361 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
362 			struct intel_context *tmp;
363 
364 			tmp = intel_context_create(engine);
365 			if (IS_ERR(tmp)) {
366 				err = PTR_ERR(tmp);
367 				goto err_ce;
368 			}
369 
370 			err = intel_context_pin(tmp);
371 			if (err) {
372 				intel_context_put(tmp);
373 				goto err_ce;
374 			}
375 
376 			memset32(tmp->ring->vaddr,
377 				 0xdeadbeef, /* trigger a hang if executed */
378 				 tmp->ring->vma->size / sizeof(u32));
379 
380 			ce[n] = tmp;
381 		}
382 
383 		/* Create max prio spinner, followed by N low prio nops */
384 		rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
385 		if (IS_ERR(rq)) {
386 			err = PTR_ERR(rq);
387 			goto err_ce;
388 		}
389 
390 		i915_request_get(rq);
391 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
392 		i915_request_add(rq);
393 
394 		if (!igt_wait_for_spinner(&spin, rq)) {
395 			intel_gt_set_wedged(gt);
396 			i915_request_put(rq);
397 			err = -ETIME;
398 			goto err_ce;
399 		}
400 
401 		/* Fill the ring, until we will cause a wrap */
402 		n = 0;
403 		while (intel_ring_direction(ce[0]->ring,
404 					    rq->wa_tail,
405 					    ce[0]->ring->tail) <= 0) {
406 			struct i915_request *tmp;
407 
408 			tmp = intel_context_create_request(ce[0]);
409 			if (IS_ERR(tmp)) {
410 				err = PTR_ERR(tmp);
411 				i915_request_put(rq);
412 				goto err_ce;
413 			}
414 
415 			i915_request_add(tmp);
416 			intel_engine_flush_submission(engine);
417 			n++;
418 		}
419 		intel_engine_flush_submission(engine);
420 		pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
421 			 engine->name, n,
422 			 ce[0]->ring->size,
423 			 ce[0]->ring->tail,
424 			 ce[0]->ring->emit,
425 			 rq->tail);
426 		GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
427 						rq->tail,
428 						ce[0]->ring->tail) <= 0);
429 		i915_request_put(rq);
430 
431 		/* Create a second ring to preempt the first ring after rq[0] */
432 		rq = intel_context_create_request(ce[1]);
433 		if (IS_ERR(rq)) {
434 			err = PTR_ERR(rq);
435 			goto err_ce;
436 		}
437 
438 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
439 		i915_request_get(rq);
440 		i915_request_add(rq);
441 
442 		err = wait_for_submit(engine, rq, HZ / 2);
443 		i915_request_put(rq);
444 		if (err) {
445 			pr_err("%s: preemption request was not submitted\n",
446 			       engine->name);
447 			err = -ETIME;
448 		}
449 
450 		pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
451 			 engine->name,
452 			 ce[0]->ring->tail, ce[0]->ring->emit,
453 			 ce[1]->ring->tail, ce[1]->ring->emit);
454 
455 err_ce:
456 		intel_engine_flush_submission(engine);
457 		igt_spinner_end(&spin);
458 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
459 			if (IS_ERR_OR_NULL(ce[n]))
460 				break;
461 
462 			intel_context_unpin(ce[n]);
463 			intel_context_put(ce[n]);
464 		}
465 		st_engine_heartbeat_enable(engine);
466 		if (igt_live_test_end(&t))
467 			err = -EIO;
468 		if (err)
469 			break;
470 	}
471 
472 	igt_spinner_fini(&spin);
473 	return err;
474 }
475 
476 static int live_pin_rewind(void *arg)
477 {
478 	struct intel_gt *gt = arg;
479 	struct intel_engine_cs *engine;
480 	enum intel_engine_id id;
481 	int err = 0;
482 
483 	/*
484 	 * We have to be careful not to trust intel_ring too much, for example
485 	 * ring->head is updated upon retire which is out of sync with pinning
486 	 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
487 	 * or else we risk writing an older, stale value.
488 	 *
489 	 * To simulate this, let's apply a bit of deliberate sabotague.
490 	 */
491 
492 	for_each_engine(engine, gt, id) {
493 		struct intel_context *ce;
494 		struct i915_request *rq;
495 		struct intel_ring *ring;
496 		struct igt_live_test t;
497 
498 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
499 			err = -EIO;
500 			break;
501 		}
502 
503 		ce = intel_context_create(engine);
504 		if (IS_ERR(ce)) {
505 			err = PTR_ERR(ce);
506 			break;
507 		}
508 
509 		err = intel_context_pin(ce);
510 		if (err) {
511 			intel_context_put(ce);
512 			break;
513 		}
514 
515 		/* Keep the context awake while we play games */
516 		err = i915_active_acquire(&ce->active);
517 		if (err) {
518 			intel_context_unpin(ce);
519 			intel_context_put(ce);
520 			break;
521 		}
522 		ring = ce->ring;
523 
524 		/* Poison the ring, and offset the next request from HEAD */
525 		memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
526 		ring->emit = ring->size / 2;
527 		ring->tail = ring->emit;
528 		GEM_BUG_ON(ring->head);
529 
530 		intel_context_unpin(ce);
531 
532 		/* Submit a simple nop request */
533 		GEM_BUG_ON(intel_context_is_pinned(ce));
534 		rq = intel_context_create_request(ce);
535 		i915_active_release(&ce->active); /* e.g. async retire */
536 		intel_context_put(ce);
537 		if (IS_ERR(rq)) {
538 			err = PTR_ERR(rq);
539 			break;
540 		}
541 		GEM_BUG_ON(!rq->head);
542 		i915_request_add(rq);
543 
544 		/* Expect not to hang! */
545 		if (igt_live_test_end(&t)) {
546 			err = -EIO;
547 			break;
548 		}
549 	}
550 
551 	return err;
552 }
553 
554 static int live_hold_reset(void *arg)
555 {
556 	struct intel_gt *gt = arg;
557 	struct intel_engine_cs *engine;
558 	enum intel_engine_id id;
559 	struct igt_spinner spin;
560 	int err = 0;
561 
562 	/*
563 	 * In order to support offline error capture for fast preempt reset,
564 	 * we need to decouple the guilty request and ensure that it and its
565 	 * descendents are not executed while the capture is in progress.
566 	 */
567 
568 	if (!intel_has_reset_engine(gt))
569 		return 0;
570 
571 	if (igt_spinner_init(&spin, gt))
572 		return -ENOMEM;
573 
574 	for_each_engine(engine, gt, id) {
575 		struct intel_context *ce;
576 		struct i915_request *rq;
577 
578 		ce = intel_context_create(engine);
579 		if (IS_ERR(ce)) {
580 			err = PTR_ERR(ce);
581 			break;
582 		}
583 
584 		st_engine_heartbeat_disable(engine);
585 
586 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
587 		if (IS_ERR(rq)) {
588 			err = PTR_ERR(rq);
589 			goto out;
590 		}
591 		i915_request_add(rq);
592 
593 		if (!igt_wait_for_spinner(&spin, rq)) {
594 			intel_gt_set_wedged(gt);
595 			err = -ETIME;
596 			goto out;
597 		}
598 
599 		/* We have our request executing, now remove it and reset */
600 
601 		local_bh_disable();
602 		if (test_and_set_bit(I915_RESET_ENGINE + id,
603 				     &gt->reset.flags)) {
604 			local_bh_enable();
605 			intel_gt_set_wedged(gt);
606 			err = -EBUSY;
607 			goto out;
608 		}
609 		tasklet_disable(&engine->execlists.tasklet);
610 
611 		engine->execlists.tasklet.callback(&engine->execlists.tasklet);
612 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
613 
614 		i915_request_get(rq);
615 		execlists_hold(engine, rq);
616 		GEM_BUG_ON(!i915_request_on_hold(rq));
617 
618 		__intel_engine_reset_bh(engine, NULL);
619 		GEM_BUG_ON(rq->fence.error != -EIO);
620 
621 		tasklet_enable(&engine->execlists.tasklet);
622 		clear_and_wake_up_bit(I915_RESET_ENGINE + id,
623 				      &gt->reset.flags);
624 		local_bh_enable();
625 
626 		/* Check that we do not resubmit the held request */
627 		if (!i915_request_wait(rq, 0, HZ / 5)) {
628 			pr_err("%s: on hold request completed!\n",
629 			       engine->name);
630 			i915_request_put(rq);
631 			err = -EIO;
632 			goto out;
633 		}
634 		GEM_BUG_ON(!i915_request_on_hold(rq));
635 
636 		/* But is resubmitted on release */
637 		execlists_unhold(engine, rq);
638 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
639 			pr_err("%s: held request did not complete!\n",
640 			       engine->name);
641 			intel_gt_set_wedged(gt);
642 			err = -ETIME;
643 		}
644 		i915_request_put(rq);
645 
646 out:
647 		st_engine_heartbeat_enable(engine);
648 		intel_context_put(ce);
649 		if (err)
650 			break;
651 	}
652 
653 	igt_spinner_fini(&spin);
654 	return err;
655 }
656 
657 static const char *error_repr(int err)
658 {
659 	return err ? "bad" : "good";
660 }
661 
662 static int live_error_interrupt(void *arg)
663 {
664 	static const struct error_phase {
665 		enum { GOOD = 0, BAD = -EIO } error[2];
666 	} phases[] = {
667 		{ { BAD,  GOOD } },
668 		{ { BAD,  BAD  } },
669 		{ { BAD,  GOOD } },
670 		{ { GOOD, GOOD } }, /* sentinel */
671 	};
672 	struct intel_gt *gt = arg;
673 	struct intel_engine_cs *engine;
674 	enum intel_engine_id id;
675 
676 	/*
677 	 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
678 	 * of invalid commands in user batches that will cause a GPU hang.
679 	 * This is a faster mechanism than using hangcheck/heartbeats, but
680 	 * only detects problems the HW knows about -- it will not warn when
681 	 * we kill the HW!
682 	 *
683 	 * To verify our detection and reset, we throw some invalid commands
684 	 * at the HW and wait for the interrupt.
685 	 */
686 
687 	if (!intel_has_reset_engine(gt))
688 		return 0;
689 
690 	for_each_engine(engine, gt, id) {
691 		const struct error_phase *p;
692 		int err = 0;
693 
694 		st_engine_heartbeat_disable(engine);
695 
696 		for (p = phases; p->error[0] != GOOD; p++) {
697 			struct i915_request *client[ARRAY_SIZE(phases->error)];
698 			u32 *cs;
699 			int i;
700 
701 			memset(client, 0, sizeof(*client));
702 			for (i = 0; i < ARRAY_SIZE(client); i++) {
703 				struct intel_context *ce;
704 				struct i915_request *rq;
705 
706 				ce = intel_context_create(engine);
707 				if (IS_ERR(ce)) {
708 					err = PTR_ERR(ce);
709 					goto out;
710 				}
711 
712 				rq = intel_context_create_request(ce);
713 				intel_context_put(ce);
714 				if (IS_ERR(rq)) {
715 					err = PTR_ERR(rq);
716 					goto out;
717 				}
718 
719 				if (rq->engine->emit_init_breadcrumb) {
720 					err = rq->engine->emit_init_breadcrumb(rq);
721 					if (err) {
722 						i915_request_add(rq);
723 						goto out;
724 					}
725 				}
726 
727 				cs = intel_ring_begin(rq, 2);
728 				if (IS_ERR(cs)) {
729 					i915_request_add(rq);
730 					err = PTR_ERR(cs);
731 					goto out;
732 				}
733 
734 				if (p->error[i]) {
735 					*cs++ = 0xdeadbeef;
736 					*cs++ = 0xdeadbeef;
737 				} else {
738 					*cs++ = MI_NOOP;
739 					*cs++ = MI_NOOP;
740 				}
741 
742 				client[i] = i915_request_get(rq);
743 				i915_request_add(rq);
744 			}
745 
746 			err = wait_for_submit(engine, client[0], HZ / 2);
747 			if (err) {
748 				pr_err("%s: first request did not start within time!\n",
749 				       engine->name);
750 				err = -ETIME;
751 				goto out;
752 			}
753 
754 			for (i = 0; i < ARRAY_SIZE(client); i++) {
755 				if (i915_request_wait(client[i], 0, HZ / 5) < 0)
756 					pr_debug("%s: %s request incomplete!\n",
757 						 engine->name,
758 						 error_repr(p->error[i]));
759 
760 				if (!i915_request_started(client[i])) {
761 					pr_err("%s: %s request not started!\n",
762 					       engine->name,
763 					       error_repr(p->error[i]));
764 					err = -ETIME;
765 					goto out;
766 				}
767 
768 				/* Kick the tasklet to process the error */
769 				intel_engine_flush_submission(engine);
770 				if (client[i]->fence.error != p->error[i]) {
771 					pr_err("%s: %s request (%s) with wrong error code: %d\n",
772 					       engine->name,
773 					       error_repr(p->error[i]),
774 					       i915_request_completed(client[i]) ? "completed" : "running",
775 					       client[i]->fence.error);
776 					err = -EINVAL;
777 					goto out;
778 				}
779 			}
780 
781 out:
782 			for (i = 0; i < ARRAY_SIZE(client); i++)
783 				if (client[i])
784 					i915_request_put(client[i]);
785 			if (err) {
786 				pr_err("%s: failed at phase[%zd] { %d, %d }\n",
787 				       engine->name, p - phases,
788 				       p->error[0], p->error[1]);
789 				break;
790 			}
791 		}
792 
793 		st_engine_heartbeat_enable(engine);
794 		if (err) {
795 			intel_gt_set_wedged(gt);
796 			return err;
797 		}
798 	}
799 
800 	return 0;
801 }
802 
803 static int
804 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
805 {
806 	u32 *cs;
807 
808 	cs = intel_ring_begin(rq, 10);
809 	if (IS_ERR(cs))
810 		return PTR_ERR(cs);
811 
812 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
813 
814 	*cs++ = MI_SEMAPHORE_WAIT |
815 		MI_SEMAPHORE_GLOBAL_GTT |
816 		MI_SEMAPHORE_POLL |
817 		MI_SEMAPHORE_SAD_NEQ_SDD;
818 	*cs++ = 0;
819 	*cs++ = i915_ggtt_offset(vma) + 4 * idx;
820 	*cs++ = 0;
821 
822 	if (idx > 0) {
823 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
824 		*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
825 		*cs++ = 0;
826 		*cs++ = 1;
827 	} else {
828 		*cs++ = MI_NOOP;
829 		*cs++ = MI_NOOP;
830 		*cs++ = MI_NOOP;
831 		*cs++ = MI_NOOP;
832 	}
833 
834 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
835 
836 	intel_ring_advance(rq, cs);
837 	return 0;
838 }
839 
840 static struct i915_request *
841 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
842 {
843 	struct intel_context *ce;
844 	struct i915_request *rq;
845 	int err;
846 
847 	ce = intel_context_create(engine);
848 	if (IS_ERR(ce))
849 		return ERR_CAST(ce);
850 
851 	rq = intel_context_create_request(ce);
852 	if (IS_ERR(rq))
853 		goto out_ce;
854 
855 	err = 0;
856 	if (rq->engine->emit_init_breadcrumb)
857 		err = rq->engine->emit_init_breadcrumb(rq);
858 	if (err == 0)
859 		err = emit_semaphore_chain(rq, vma, idx);
860 	if (err == 0)
861 		i915_request_get(rq);
862 	i915_request_add(rq);
863 	if (err)
864 		rq = ERR_PTR(err);
865 
866 out_ce:
867 	intel_context_put(ce);
868 	return rq;
869 }
870 
871 static int
872 release_queue(struct intel_engine_cs *engine,
873 	      struct i915_vma *vma,
874 	      int idx, int prio)
875 {
876 	struct i915_sched_attr attr = {
877 		.priority = prio,
878 	};
879 	struct i915_request *rq;
880 	u32 *cs;
881 
882 	rq = intel_engine_create_kernel_request(engine);
883 	if (IS_ERR(rq))
884 		return PTR_ERR(rq);
885 
886 	cs = intel_ring_begin(rq, 4);
887 	if (IS_ERR(cs)) {
888 		i915_request_add(rq);
889 		return PTR_ERR(cs);
890 	}
891 
892 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
893 	*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
894 	*cs++ = 0;
895 	*cs++ = 1;
896 
897 	intel_ring_advance(rq, cs);
898 
899 	i915_request_get(rq);
900 	i915_request_add(rq);
901 
902 	local_bh_disable();
903 	engine->schedule(rq, &attr);
904 	local_bh_enable(); /* kick tasklet */
905 
906 	i915_request_put(rq);
907 
908 	return 0;
909 }
910 
911 static int
912 slice_semaphore_queue(struct intel_engine_cs *outer,
913 		      struct i915_vma *vma,
914 		      int count)
915 {
916 	struct intel_engine_cs *engine;
917 	struct i915_request *head;
918 	enum intel_engine_id id;
919 	int err, i, n = 0;
920 
921 	head = semaphore_queue(outer, vma, n++);
922 	if (IS_ERR(head))
923 		return PTR_ERR(head);
924 
925 	for_each_engine(engine, outer->gt, id) {
926 		if (!intel_engine_has_preemption(engine))
927 			continue;
928 
929 		for (i = 0; i < count; i++) {
930 			struct i915_request *rq;
931 
932 			rq = semaphore_queue(engine, vma, n++);
933 			if (IS_ERR(rq)) {
934 				err = PTR_ERR(rq);
935 				goto out;
936 			}
937 
938 			i915_request_put(rq);
939 		}
940 	}
941 
942 	err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
943 	if (err)
944 		goto out;
945 
946 	if (i915_request_wait(head, 0,
947 			      2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
948 		pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
949 		       outer->name, count, n);
950 		GEM_TRACE_DUMP();
951 		intel_gt_set_wedged(outer->gt);
952 		err = -EIO;
953 	}
954 
955 out:
956 	i915_request_put(head);
957 	return err;
958 }
959 
960 static int live_timeslice_preempt(void *arg)
961 {
962 	struct intel_gt *gt = arg;
963 	struct drm_i915_gem_object *obj;
964 	struct intel_engine_cs *engine;
965 	enum intel_engine_id id;
966 	struct i915_vma *vma;
967 	void *vaddr;
968 	int err = 0;
969 
970 	/*
971 	 * If a request takes too long, we would like to give other users
972 	 * a fair go on the GPU. In particular, users may create batches
973 	 * that wait upon external input, where that input may even be
974 	 * supplied by another GPU job. To avoid blocking forever, we
975 	 * need to preempt the current task and replace it with another
976 	 * ready task.
977 	 */
978 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
979 		return 0;
980 
981 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
982 	if (IS_ERR(obj))
983 		return PTR_ERR(obj);
984 
985 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
986 	if (IS_ERR(vma)) {
987 		err = PTR_ERR(vma);
988 		goto err_obj;
989 	}
990 
991 	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
992 	if (IS_ERR(vaddr)) {
993 		err = PTR_ERR(vaddr);
994 		goto err_obj;
995 	}
996 
997 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
998 	if (err)
999 		goto err_map;
1000 
1001 	err = i915_vma_sync(vma);
1002 	if (err)
1003 		goto err_pin;
1004 
1005 	for_each_engine(engine, gt, id) {
1006 		if (!intel_engine_has_preemption(engine))
1007 			continue;
1008 
1009 		memset(vaddr, 0, PAGE_SIZE);
1010 
1011 		st_engine_heartbeat_disable(engine);
1012 		err = slice_semaphore_queue(engine, vma, 5);
1013 		st_engine_heartbeat_enable(engine);
1014 		if (err)
1015 			goto err_pin;
1016 
1017 		if (igt_flush_test(gt->i915)) {
1018 			err = -EIO;
1019 			goto err_pin;
1020 		}
1021 	}
1022 
1023 err_pin:
1024 	i915_vma_unpin(vma);
1025 err_map:
1026 	i915_gem_object_unpin_map(obj);
1027 err_obj:
1028 	i915_gem_object_put(obj);
1029 	return err;
1030 }
1031 
1032 static struct i915_request *
1033 create_rewinder(struct intel_context *ce,
1034 		struct i915_request *wait,
1035 		void *slot, int idx)
1036 {
1037 	const u32 offset =
1038 		i915_ggtt_offset(ce->engine->status_page.vma) +
1039 		offset_in_page(slot);
1040 	struct i915_request *rq;
1041 	u32 *cs;
1042 	int err;
1043 
1044 	rq = intel_context_create_request(ce);
1045 	if (IS_ERR(rq))
1046 		return rq;
1047 
1048 	if (wait) {
1049 		err = i915_request_await_dma_fence(rq, &wait->fence);
1050 		if (err)
1051 			goto err;
1052 	}
1053 
1054 	cs = intel_ring_begin(rq, 14);
1055 	if (IS_ERR(cs)) {
1056 		err = PTR_ERR(cs);
1057 		goto err;
1058 	}
1059 
1060 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1061 	*cs++ = MI_NOOP;
1062 
1063 	*cs++ = MI_SEMAPHORE_WAIT |
1064 		MI_SEMAPHORE_GLOBAL_GTT |
1065 		MI_SEMAPHORE_POLL |
1066 		MI_SEMAPHORE_SAD_GTE_SDD;
1067 	*cs++ = idx;
1068 	*cs++ = offset;
1069 	*cs++ = 0;
1070 
1071 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1072 	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1073 	*cs++ = offset + idx * sizeof(u32);
1074 	*cs++ = 0;
1075 
1076 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1077 	*cs++ = offset;
1078 	*cs++ = 0;
1079 	*cs++ = idx + 1;
1080 
1081 	intel_ring_advance(rq, cs);
1082 
1083 	err = 0;
1084 err:
1085 	i915_request_get(rq);
1086 	i915_request_add(rq);
1087 	if (err) {
1088 		i915_request_put(rq);
1089 		return ERR_PTR(err);
1090 	}
1091 
1092 	return rq;
1093 }
1094 
1095 static int live_timeslice_rewind(void *arg)
1096 {
1097 	struct intel_gt *gt = arg;
1098 	struct intel_engine_cs *engine;
1099 	enum intel_engine_id id;
1100 
1101 	/*
1102 	 * The usual presumption on timeslice expiration is that we replace
1103 	 * the active context with another. However, given a chain of
1104 	 * dependencies we may end up with replacing the context with itself,
1105 	 * but only a few of those requests, forcing us to rewind the
1106 	 * RING_TAIL of the original request.
1107 	 */
1108 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1109 		return 0;
1110 
1111 	for_each_engine(engine, gt, id) {
1112 		enum { A1, A2, B1 };
1113 		enum { X = 1, Z, Y };
1114 		struct i915_request *rq[3] = {};
1115 		struct intel_context *ce;
1116 		unsigned long timeslice;
1117 		int i, err = 0;
1118 		u32 *slot;
1119 
1120 		if (!intel_engine_has_timeslices(engine))
1121 			continue;
1122 
1123 		/*
1124 		 * A:rq1 -- semaphore wait, timestamp X
1125 		 * A:rq2 -- write timestamp Y
1126 		 *
1127 		 * B:rq1 [await A:rq1] -- write timestamp Z
1128 		 *
1129 		 * Force timeslice, release semaphore.
1130 		 *
1131 		 * Expect execution/evaluation order XZY
1132 		 */
1133 
1134 		st_engine_heartbeat_disable(engine);
1135 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1136 
1137 		slot = memset32(engine->status_page.addr + 1000, 0, 4);
1138 
1139 		ce = intel_context_create(engine);
1140 		if (IS_ERR(ce)) {
1141 			err = PTR_ERR(ce);
1142 			goto err;
1143 		}
1144 
1145 		rq[A1] = create_rewinder(ce, NULL, slot, X);
1146 		if (IS_ERR(rq[A1])) {
1147 			intel_context_put(ce);
1148 			goto err;
1149 		}
1150 
1151 		rq[A2] = create_rewinder(ce, NULL, slot, Y);
1152 		intel_context_put(ce);
1153 		if (IS_ERR(rq[A2]))
1154 			goto err;
1155 
1156 		err = wait_for_submit(engine, rq[A2], HZ / 2);
1157 		if (err) {
1158 			pr_err("%s: failed to submit first context\n",
1159 			       engine->name);
1160 			goto err;
1161 		}
1162 
1163 		ce = intel_context_create(engine);
1164 		if (IS_ERR(ce)) {
1165 			err = PTR_ERR(ce);
1166 			goto err;
1167 		}
1168 
1169 		rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1170 		intel_context_put(ce);
1171 		if (IS_ERR(rq[2]))
1172 			goto err;
1173 
1174 		err = wait_for_submit(engine, rq[B1], HZ / 2);
1175 		if (err) {
1176 			pr_err("%s: failed to submit second context\n",
1177 			       engine->name);
1178 			goto err;
1179 		}
1180 
1181 		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1182 		ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1183 		while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
1184 			/* Wait for the timeslice to kick in */
1185 			del_timer(&engine->execlists.timer);
1186 			tasklet_hi_schedule(&engine->execlists.tasklet);
1187 			intel_engine_flush_submission(engine);
1188 		}
1189 		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1190 		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1191 		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1192 		GEM_BUG_ON(i915_request_is_active(rq[A2]));
1193 
1194 		/* Release the hounds! */
1195 		slot[0] = 1;
1196 		wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1197 
1198 		for (i = 1; i <= 3; i++) {
1199 			unsigned long timeout = jiffies + HZ / 2;
1200 
1201 			while (!READ_ONCE(slot[i]) &&
1202 			       time_before(jiffies, timeout))
1203 				;
1204 
1205 			if (!time_before(jiffies, timeout)) {
1206 				pr_err("%s: rq[%d] timed out\n",
1207 				       engine->name, i - 1);
1208 				err = -ETIME;
1209 				goto err;
1210 			}
1211 
1212 			pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1213 		}
1214 
1215 		/* XZY: XZ < XY */
1216 		if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1217 			pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1218 			       engine->name,
1219 			       slot[Z] - slot[X],
1220 			       slot[Y] - slot[X]);
1221 			err = -EINVAL;
1222 		}
1223 
1224 err:
1225 		memset32(&slot[0], -1, 4);
1226 		wmb();
1227 
1228 		engine->props.timeslice_duration_ms = timeslice;
1229 		st_engine_heartbeat_enable(engine);
1230 		for (i = 0; i < 3; i++)
1231 			i915_request_put(rq[i]);
1232 		if (igt_flush_test(gt->i915))
1233 			err = -EIO;
1234 		if (err)
1235 			return err;
1236 	}
1237 
1238 	return 0;
1239 }
1240 
1241 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1242 {
1243 	struct i915_request *rq;
1244 
1245 	rq = intel_engine_create_kernel_request(engine);
1246 	if (IS_ERR(rq))
1247 		return rq;
1248 
1249 	i915_request_get(rq);
1250 	i915_request_add(rq);
1251 
1252 	return rq;
1253 }
1254 
1255 static long slice_timeout(struct intel_engine_cs *engine)
1256 {
1257 	long timeout;
1258 
1259 	/* Enough time for a timeslice to kick in, and kick out */
1260 	timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1261 
1262 	/* Enough time for the nop request to complete */
1263 	timeout += HZ / 5;
1264 
1265 	return timeout + 1;
1266 }
1267 
1268 static int live_timeslice_queue(void *arg)
1269 {
1270 	struct intel_gt *gt = arg;
1271 	struct drm_i915_gem_object *obj;
1272 	struct intel_engine_cs *engine;
1273 	enum intel_engine_id id;
1274 	struct i915_vma *vma;
1275 	void *vaddr;
1276 	int err = 0;
1277 
1278 	/*
1279 	 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1280 	 * timeslicing between them disabled, we *do* enable timeslicing
1281 	 * if the queue demands it. (Normally, we do not submit if
1282 	 * ELSP[1] is already occupied, so must rely on timeslicing to
1283 	 * eject ELSP[0] in favour of the queue.)
1284 	 */
1285 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1286 		return 0;
1287 
1288 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1289 	if (IS_ERR(obj))
1290 		return PTR_ERR(obj);
1291 
1292 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1293 	if (IS_ERR(vma)) {
1294 		err = PTR_ERR(vma);
1295 		goto err_obj;
1296 	}
1297 
1298 	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1299 	if (IS_ERR(vaddr)) {
1300 		err = PTR_ERR(vaddr);
1301 		goto err_obj;
1302 	}
1303 
1304 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1305 	if (err)
1306 		goto err_map;
1307 
1308 	err = i915_vma_sync(vma);
1309 	if (err)
1310 		goto err_pin;
1311 
1312 	for_each_engine(engine, gt, id) {
1313 		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
1314 		struct i915_request *rq, *nop;
1315 
1316 		if (!intel_engine_has_preemption(engine))
1317 			continue;
1318 
1319 		st_engine_heartbeat_disable(engine);
1320 		memset(vaddr, 0, PAGE_SIZE);
1321 
1322 		/* ELSP[0]: semaphore wait */
1323 		rq = semaphore_queue(engine, vma, 0);
1324 		if (IS_ERR(rq)) {
1325 			err = PTR_ERR(rq);
1326 			goto err_heartbeat;
1327 		}
1328 		engine->schedule(rq, &attr);
1329 		err = wait_for_submit(engine, rq, HZ / 2);
1330 		if (err) {
1331 			pr_err("%s: Timed out trying to submit semaphores\n",
1332 			       engine->name);
1333 			goto err_rq;
1334 		}
1335 
1336 		/* ELSP[1]: nop request */
1337 		nop = nop_request(engine);
1338 		if (IS_ERR(nop)) {
1339 			err = PTR_ERR(nop);
1340 			goto err_rq;
1341 		}
1342 		err = wait_for_submit(engine, nop, HZ / 2);
1343 		i915_request_put(nop);
1344 		if (err) {
1345 			pr_err("%s: Timed out trying to submit nop\n",
1346 			       engine->name);
1347 			goto err_rq;
1348 		}
1349 
1350 		GEM_BUG_ON(i915_request_completed(rq));
1351 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1352 
1353 		/* Queue: semaphore signal, matching priority as semaphore */
1354 		err = release_queue(engine, vma, 1, effective_prio(rq));
1355 		if (err)
1356 			goto err_rq;
1357 
1358 		/* Wait until we ack the release_queue and start timeslicing */
1359 		do {
1360 			cond_resched();
1361 			intel_engine_flush_submission(engine);
1362 		} while (READ_ONCE(engine->execlists.pending[0]));
1363 
1364 		/* Timeslice every jiffy, so within 2 we should signal */
1365 		if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1366 			struct drm_printer p =
1367 				drm_info_printer(gt->i915->drm.dev);
1368 
1369 			pr_err("%s: Failed to timeslice into queue\n",
1370 			       engine->name);
1371 			intel_engine_dump(engine, &p,
1372 					  "%s\n", engine->name);
1373 
1374 			memset(vaddr, 0xff, PAGE_SIZE);
1375 			err = -EIO;
1376 		}
1377 err_rq:
1378 		i915_request_put(rq);
1379 err_heartbeat:
1380 		st_engine_heartbeat_enable(engine);
1381 		if (err)
1382 			break;
1383 	}
1384 
1385 err_pin:
1386 	i915_vma_unpin(vma);
1387 err_map:
1388 	i915_gem_object_unpin_map(obj);
1389 err_obj:
1390 	i915_gem_object_put(obj);
1391 	return err;
1392 }
1393 
1394 static int live_timeslice_nopreempt(void *arg)
1395 {
1396 	struct intel_gt *gt = arg;
1397 	struct intel_engine_cs *engine;
1398 	enum intel_engine_id id;
1399 	struct igt_spinner spin;
1400 	int err = 0;
1401 
1402 	/*
1403 	 * We should not timeslice into a request that is marked with
1404 	 * I915_REQUEST_NOPREEMPT.
1405 	 */
1406 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1407 		return 0;
1408 
1409 	if (igt_spinner_init(&spin, gt))
1410 		return -ENOMEM;
1411 
1412 	for_each_engine(engine, gt, id) {
1413 		struct intel_context *ce;
1414 		struct i915_request *rq;
1415 		unsigned long timeslice;
1416 
1417 		if (!intel_engine_has_preemption(engine))
1418 			continue;
1419 
1420 		ce = intel_context_create(engine);
1421 		if (IS_ERR(ce)) {
1422 			err = PTR_ERR(ce);
1423 			break;
1424 		}
1425 
1426 		st_engine_heartbeat_disable(engine);
1427 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1428 
1429 		/* Create an unpreemptible spinner */
1430 
1431 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1432 		intel_context_put(ce);
1433 		if (IS_ERR(rq)) {
1434 			err = PTR_ERR(rq);
1435 			goto out_heartbeat;
1436 		}
1437 
1438 		i915_request_get(rq);
1439 		i915_request_add(rq);
1440 
1441 		if (!igt_wait_for_spinner(&spin, rq)) {
1442 			i915_request_put(rq);
1443 			err = -ETIME;
1444 			goto out_spin;
1445 		}
1446 
1447 		set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1448 		i915_request_put(rq);
1449 
1450 		/* Followed by a maximum priority barrier (heartbeat) */
1451 
1452 		ce = intel_context_create(engine);
1453 		if (IS_ERR(ce)) {
1454 			err = PTR_ERR(ce);
1455 			goto out_spin;
1456 		}
1457 
1458 		rq = intel_context_create_request(ce);
1459 		intel_context_put(ce);
1460 		if (IS_ERR(rq)) {
1461 			err = PTR_ERR(rq);
1462 			goto out_spin;
1463 		}
1464 
1465 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1466 		i915_request_get(rq);
1467 		i915_request_add(rq);
1468 
1469 		/*
1470 		 * Wait until the barrier is in ELSP, and we know timeslicing
1471 		 * will have been activated.
1472 		 */
1473 		if (wait_for_submit(engine, rq, HZ / 2)) {
1474 			i915_request_put(rq);
1475 			err = -ETIME;
1476 			goto out_spin;
1477 		}
1478 
1479 		/*
1480 		 * Since the ELSP[0] request is unpreemptible, it should not
1481 		 * allow the maximum priority barrier through. Wait long
1482 		 * enough to see if it is timesliced in by mistake.
1483 		 */
1484 		if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1485 			pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1486 			       engine->name);
1487 			err = -EINVAL;
1488 		}
1489 		i915_request_put(rq);
1490 
1491 out_spin:
1492 		igt_spinner_end(&spin);
1493 out_heartbeat:
1494 		xchg(&engine->props.timeslice_duration_ms, timeslice);
1495 		st_engine_heartbeat_enable(engine);
1496 		if (err)
1497 			break;
1498 
1499 		if (igt_flush_test(gt->i915)) {
1500 			err = -EIO;
1501 			break;
1502 		}
1503 	}
1504 
1505 	igt_spinner_fini(&spin);
1506 	return err;
1507 }
1508 
1509 static int live_busywait_preempt(void *arg)
1510 {
1511 	struct intel_gt *gt = arg;
1512 	struct i915_gem_context *ctx_hi, *ctx_lo;
1513 	struct intel_engine_cs *engine;
1514 	struct drm_i915_gem_object *obj;
1515 	struct i915_vma *vma;
1516 	enum intel_engine_id id;
1517 	int err = -ENOMEM;
1518 	u32 *map;
1519 
1520 	/*
1521 	 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1522 	 * preempt the busywaits used to synchronise between rings.
1523 	 */
1524 
1525 	ctx_hi = kernel_context(gt->i915);
1526 	if (!ctx_hi)
1527 		return -ENOMEM;
1528 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1529 
1530 	ctx_lo = kernel_context(gt->i915);
1531 	if (!ctx_lo)
1532 		goto err_ctx_hi;
1533 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1534 
1535 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1536 	if (IS_ERR(obj)) {
1537 		err = PTR_ERR(obj);
1538 		goto err_ctx_lo;
1539 	}
1540 
1541 	map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1542 	if (IS_ERR(map)) {
1543 		err = PTR_ERR(map);
1544 		goto err_obj;
1545 	}
1546 
1547 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1548 	if (IS_ERR(vma)) {
1549 		err = PTR_ERR(vma);
1550 		goto err_map;
1551 	}
1552 
1553 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1554 	if (err)
1555 		goto err_map;
1556 
1557 	err = i915_vma_sync(vma);
1558 	if (err)
1559 		goto err_vma;
1560 
1561 	for_each_engine(engine, gt, id) {
1562 		struct i915_request *lo, *hi;
1563 		struct igt_live_test t;
1564 		u32 *cs;
1565 
1566 		if (!intel_engine_has_preemption(engine))
1567 			continue;
1568 
1569 		if (!intel_engine_can_store_dword(engine))
1570 			continue;
1571 
1572 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1573 			err = -EIO;
1574 			goto err_vma;
1575 		}
1576 
1577 		/*
1578 		 * We create two requests. The low priority request
1579 		 * busywaits on a semaphore (inside the ringbuffer where
1580 		 * is should be preemptible) and the high priority requests
1581 		 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1582 		 * allowing the first request to complete. If preemption
1583 		 * fails, we hang instead.
1584 		 */
1585 
1586 		lo = igt_request_alloc(ctx_lo, engine);
1587 		if (IS_ERR(lo)) {
1588 			err = PTR_ERR(lo);
1589 			goto err_vma;
1590 		}
1591 
1592 		cs = intel_ring_begin(lo, 8);
1593 		if (IS_ERR(cs)) {
1594 			err = PTR_ERR(cs);
1595 			i915_request_add(lo);
1596 			goto err_vma;
1597 		}
1598 
1599 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1600 		*cs++ = i915_ggtt_offset(vma);
1601 		*cs++ = 0;
1602 		*cs++ = 1;
1603 
1604 		/* XXX Do we need a flush + invalidate here? */
1605 
1606 		*cs++ = MI_SEMAPHORE_WAIT |
1607 			MI_SEMAPHORE_GLOBAL_GTT |
1608 			MI_SEMAPHORE_POLL |
1609 			MI_SEMAPHORE_SAD_EQ_SDD;
1610 		*cs++ = 0;
1611 		*cs++ = i915_ggtt_offset(vma);
1612 		*cs++ = 0;
1613 
1614 		intel_ring_advance(lo, cs);
1615 
1616 		i915_request_get(lo);
1617 		i915_request_add(lo);
1618 
1619 		if (wait_for(READ_ONCE(*map), 10)) {
1620 			i915_request_put(lo);
1621 			err = -ETIMEDOUT;
1622 			goto err_vma;
1623 		}
1624 
1625 		/* Low priority request should be busywaiting now */
1626 		if (i915_request_wait(lo, 0, 1) != -ETIME) {
1627 			i915_request_put(lo);
1628 			pr_err("%s: Busywaiting request did not!\n",
1629 			       engine->name);
1630 			err = -EIO;
1631 			goto err_vma;
1632 		}
1633 
1634 		hi = igt_request_alloc(ctx_hi, engine);
1635 		if (IS_ERR(hi)) {
1636 			err = PTR_ERR(hi);
1637 			i915_request_put(lo);
1638 			goto err_vma;
1639 		}
1640 
1641 		cs = intel_ring_begin(hi, 4);
1642 		if (IS_ERR(cs)) {
1643 			err = PTR_ERR(cs);
1644 			i915_request_add(hi);
1645 			i915_request_put(lo);
1646 			goto err_vma;
1647 		}
1648 
1649 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1650 		*cs++ = i915_ggtt_offset(vma);
1651 		*cs++ = 0;
1652 		*cs++ = 0;
1653 
1654 		intel_ring_advance(hi, cs);
1655 		i915_request_add(hi);
1656 
1657 		if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1658 			struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1659 
1660 			pr_err("%s: Failed to preempt semaphore busywait!\n",
1661 			       engine->name);
1662 
1663 			intel_engine_dump(engine, &p, "%s\n", engine->name);
1664 			GEM_TRACE_DUMP();
1665 
1666 			i915_request_put(lo);
1667 			intel_gt_set_wedged(gt);
1668 			err = -EIO;
1669 			goto err_vma;
1670 		}
1671 		GEM_BUG_ON(READ_ONCE(*map));
1672 		i915_request_put(lo);
1673 
1674 		if (igt_live_test_end(&t)) {
1675 			err = -EIO;
1676 			goto err_vma;
1677 		}
1678 	}
1679 
1680 	err = 0;
1681 err_vma:
1682 	i915_vma_unpin(vma);
1683 err_map:
1684 	i915_gem_object_unpin_map(obj);
1685 err_obj:
1686 	i915_gem_object_put(obj);
1687 err_ctx_lo:
1688 	kernel_context_close(ctx_lo);
1689 err_ctx_hi:
1690 	kernel_context_close(ctx_hi);
1691 	return err;
1692 }
1693 
1694 static struct i915_request *
1695 spinner_create_request(struct igt_spinner *spin,
1696 		       struct i915_gem_context *ctx,
1697 		       struct intel_engine_cs *engine,
1698 		       u32 arb)
1699 {
1700 	struct intel_context *ce;
1701 	struct i915_request *rq;
1702 
1703 	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1704 	if (IS_ERR(ce))
1705 		return ERR_CAST(ce);
1706 
1707 	rq = igt_spinner_create_request(spin, ce, arb);
1708 	intel_context_put(ce);
1709 	return rq;
1710 }
1711 
1712 static int live_preempt(void *arg)
1713 {
1714 	struct intel_gt *gt = arg;
1715 	struct i915_gem_context *ctx_hi, *ctx_lo;
1716 	struct igt_spinner spin_hi, spin_lo;
1717 	struct intel_engine_cs *engine;
1718 	enum intel_engine_id id;
1719 	int err = -ENOMEM;
1720 
1721 	if (igt_spinner_init(&spin_hi, gt))
1722 		return -ENOMEM;
1723 
1724 	if (igt_spinner_init(&spin_lo, gt))
1725 		goto err_spin_hi;
1726 
1727 	ctx_hi = kernel_context(gt->i915);
1728 	if (!ctx_hi)
1729 		goto err_spin_lo;
1730 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1731 
1732 	ctx_lo = kernel_context(gt->i915);
1733 	if (!ctx_lo)
1734 		goto err_ctx_hi;
1735 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1736 
1737 	for_each_engine(engine, gt, id) {
1738 		struct igt_live_test t;
1739 		struct i915_request *rq;
1740 
1741 		if (!intel_engine_has_preemption(engine))
1742 			continue;
1743 
1744 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1745 			err = -EIO;
1746 			goto err_ctx_lo;
1747 		}
1748 
1749 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1750 					    MI_ARB_CHECK);
1751 		if (IS_ERR(rq)) {
1752 			err = PTR_ERR(rq);
1753 			goto err_ctx_lo;
1754 		}
1755 
1756 		i915_request_add(rq);
1757 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1758 			GEM_TRACE("lo spinner failed to start\n");
1759 			GEM_TRACE_DUMP();
1760 			intel_gt_set_wedged(gt);
1761 			err = -EIO;
1762 			goto err_ctx_lo;
1763 		}
1764 
1765 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1766 					    MI_ARB_CHECK);
1767 		if (IS_ERR(rq)) {
1768 			igt_spinner_end(&spin_lo);
1769 			err = PTR_ERR(rq);
1770 			goto err_ctx_lo;
1771 		}
1772 
1773 		i915_request_add(rq);
1774 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1775 			GEM_TRACE("hi spinner failed to start\n");
1776 			GEM_TRACE_DUMP();
1777 			intel_gt_set_wedged(gt);
1778 			err = -EIO;
1779 			goto err_ctx_lo;
1780 		}
1781 
1782 		igt_spinner_end(&spin_hi);
1783 		igt_spinner_end(&spin_lo);
1784 
1785 		if (igt_live_test_end(&t)) {
1786 			err = -EIO;
1787 			goto err_ctx_lo;
1788 		}
1789 	}
1790 
1791 	err = 0;
1792 err_ctx_lo:
1793 	kernel_context_close(ctx_lo);
1794 err_ctx_hi:
1795 	kernel_context_close(ctx_hi);
1796 err_spin_lo:
1797 	igt_spinner_fini(&spin_lo);
1798 err_spin_hi:
1799 	igt_spinner_fini(&spin_hi);
1800 	return err;
1801 }
1802 
1803 static int live_late_preempt(void *arg)
1804 {
1805 	struct intel_gt *gt = arg;
1806 	struct i915_gem_context *ctx_hi, *ctx_lo;
1807 	struct igt_spinner spin_hi, spin_lo;
1808 	struct intel_engine_cs *engine;
1809 	struct i915_sched_attr attr = {};
1810 	enum intel_engine_id id;
1811 	int err = -ENOMEM;
1812 
1813 	if (igt_spinner_init(&spin_hi, gt))
1814 		return -ENOMEM;
1815 
1816 	if (igt_spinner_init(&spin_lo, gt))
1817 		goto err_spin_hi;
1818 
1819 	ctx_hi = kernel_context(gt->i915);
1820 	if (!ctx_hi)
1821 		goto err_spin_lo;
1822 
1823 	ctx_lo = kernel_context(gt->i915);
1824 	if (!ctx_lo)
1825 		goto err_ctx_hi;
1826 
1827 	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1828 	ctx_lo->sched.priority = 1;
1829 
1830 	for_each_engine(engine, gt, id) {
1831 		struct igt_live_test t;
1832 		struct i915_request *rq;
1833 
1834 		if (!intel_engine_has_preemption(engine))
1835 			continue;
1836 
1837 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1838 			err = -EIO;
1839 			goto err_ctx_lo;
1840 		}
1841 
1842 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1843 					    MI_ARB_CHECK);
1844 		if (IS_ERR(rq)) {
1845 			err = PTR_ERR(rq);
1846 			goto err_ctx_lo;
1847 		}
1848 
1849 		i915_request_add(rq);
1850 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1851 			pr_err("First context failed to start\n");
1852 			goto err_wedged;
1853 		}
1854 
1855 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1856 					    MI_NOOP);
1857 		if (IS_ERR(rq)) {
1858 			igt_spinner_end(&spin_lo);
1859 			err = PTR_ERR(rq);
1860 			goto err_ctx_lo;
1861 		}
1862 
1863 		i915_request_add(rq);
1864 		if (igt_wait_for_spinner(&spin_hi, rq)) {
1865 			pr_err("Second context overtook first?\n");
1866 			goto err_wedged;
1867 		}
1868 
1869 		attr.priority = I915_PRIORITY_MAX;
1870 		engine->schedule(rq, &attr);
1871 
1872 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1873 			pr_err("High priority context failed to preempt the low priority context\n");
1874 			GEM_TRACE_DUMP();
1875 			goto err_wedged;
1876 		}
1877 
1878 		igt_spinner_end(&spin_hi);
1879 		igt_spinner_end(&spin_lo);
1880 
1881 		if (igt_live_test_end(&t)) {
1882 			err = -EIO;
1883 			goto err_ctx_lo;
1884 		}
1885 	}
1886 
1887 	err = 0;
1888 err_ctx_lo:
1889 	kernel_context_close(ctx_lo);
1890 err_ctx_hi:
1891 	kernel_context_close(ctx_hi);
1892 err_spin_lo:
1893 	igt_spinner_fini(&spin_lo);
1894 err_spin_hi:
1895 	igt_spinner_fini(&spin_hi);
1896 	return err;
1897 
1898 err_wedged:
1899 	igt_spinner_end(&spin_hi);
1900 	igt_spinner_end(&spin_lo);
1901 	intel_gt_set_wedged(gt);
1902 	err = -EIO;
1903 	goto err_ctx_lo;
1904 }
1905 
1906 struct preempt_client {
1907 	struct igt_spinner spin;
1908 	struct i915_gem_context *ctx;
1909 };
1910 
1911 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1912 {
1913 	c->ctx = kernel_context(gt->i915);
1914 	if (!c->ctx)
1915 		return -ENOMEM;
1916 
1917 	if (igt_spinner_init(&c->spin, gt))
1918 		goto err_ctx;
1919 
1920 	return 0;
1921 
1922 err_ctx:
1923 	kernel_context_close(c->ctx);
1924 	return -ENOMEM;
1925 }
1926 
1927 static void preempt_client_fini(struct preempt_client *c)
1928 {
1929 	igt_spinner_fini(&c->spin);
1930 	kernel_context_close(c->ctx);
1931 }
1932 
1933 static int live_nopreempt(void *arg)
1934 {
1935 	struct intel_gt *gt = arg;
1936 	struct intel_engine_cs *engine;
1937 	struct preempt_client a, b;
1938 	enum intel_engine_id id;
1939 	int err = -ENOMEM;
1940 
1941 	/*
1942 	 * Verify that we can disable preemption for an individual request
1943 	 * that may be being observed and not want to be interrupted.
1944 	 */
1945 
1946 	if (preempt_client_init(gt, &a))
1947 		return -ENOMEM;
1948 	if (preempt_client_init(gt, &b))
1949 		goto err_client_a;
1950 	b.ctx->sched.priority = I915_PRIORITY_MAX;
1951 
1952 	for_each_engine(engine, gt, id) {
1953 		struct i915_request *rq_a, *rq_b;
1954 
1955 		if (!intel_engine_has_preemption(engine))
1956 			continue;
1957 
1958 		engine->execlists.preempt_hang.count = 0;
1959 
1960 		rq_a = spinner_create_request(&a.spin,
1961 					      a.ctx, engine,
1962 					      MI_ARB_CHECK);
1963 		if (IS_ERR(rq_a)) {
1964 			err = PTR_ERR(rq_a);
1965 			goto err_client_b;
1966 		}
1967 
1968 		/* Low priority client, but unpreemptable! */
1969 		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1970 
1971 		i915_request_add(rq_a);
1972 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1973 			pr_err("First client failed to start\n");
1974 			goto err_wedged;
1975 		}
1976 
1977 		rq_b = spinner_create_request(&b.spin,
1978 					      b.ctx, engine,
1979 					      MI_ARB_CHECK);
1980 		if (IS_ERR(rq_b)) {
1981 			err = PTR_ERR(rq_b);
1982 			goto err_client_b;
1983 		}
1984 
1985 		i915_request_add(rq_b);
1986 
1987 		/* B is much more important than A! (But A is unpreemptable.) */
1988 		GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1989 
1990 		/* Wait long enough for preemption and timeslicing */
1991 		if (igt_wait_for_spinner(&b.spin, rq_b)) {
1992 			pr_err("Second client started too early!\n");
1993 			goto err_wedged;
1994 		}
1995 
1996 		igt_spinner_end(&a.spin);
1997 
1998 		if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1999 			pr_err("Second client failed to start\n");
2000 			goto err_wedged;
2001 		}
2002 
2003 		igt_spinner_end(&b.spin);
2004 
2005 		if (engine->execlists.preempt_hang.count) {
2006 			pr_err("Preemption recorded x%d; should have been suppressed!\n",
2007 			       engine->execlists.preempt_hang.count);
2008 			err = -EINVAL;
2009 			goto err_wedged;
2010 		}
2011 
2012 		if (igt_flush_test(gt->i915))
2013 			goto err_wedged;
2014 	}
2015 
2016 	err = 0;
2017 err_client_b:
2018 	preempt_client_fini(&b);
2019 err_client_a:
2020 	preempt_client_fini(&a);
2021 	return err;
2022 
2023 err_wedged:
2024 	igt_spinner_end(&b.spin);
2025 	igt_spinner_end(&a.spin);
2026 	intel_gt_set_wedged(gt);
2027 	err = -EIO;
2028 	goto err_client_b;
2029 }
2030 
2031 struct live_preempt_cancel {
2032 	struct intel_engine_cs *engine;
2033 	struct preempt_client a, b;
2034 };
2035 
2036 static int __cancel_active0(struct live_preempt_cancel *arg)
2037 {
2038 	struct i915_request *rq;
2039 	struct igt_live_test t;
2040 	int err;
2041 
2042 	/* Preempt cancel of ELSP0 */
2043 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2044 	if (igt_live_test_begin(&t, arg->engine->i915,
2045 				__func__, arg->engine->name))
2046 		return -EIO;
2047 
2048 	rq = spinner_create_request(&arg->a.spin,
2049 				    arg->a.ctx, arg->engine,
2050 				    MI_ARB_CHECK);
2051 	if (IS_ERR(rq))
2052 		return PTR_ERR(rq);
2053 
2054 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2055 	i915_request_get(rq);
2056 	i915_request_add(rq);
2057 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2058 		err = -EIO;
2059 		goto out;
2060 	}
2061 
2062 	intel_context_set_banned(rq->context);
2063 	err = intel_engine_pulse(arg->engine);
2064 	if (err)
2065 		goto out;
2066 
2067 	err = wait_for_reset(arg->engine, rq, HZ / 2);
2068 	if (err) {
2069 		pr_err("Cancelled inflight0 request did not reset\n");
2070 		goto out;
2071 	}
2072 
2073 out:
2074 	i915_request_put(rq);
2075 	if (igt_live_test_end(&t))
2076 		err = -EIO;
2077 	return err;
2078 }
2079 
2080 static int __cancel_active1(struct live_preempt_cancel *arg)
2081 {
2082 	struct i915_request *rq[2] = {};
2083 	struct igt_live_test t;
2084 	int err;
2085 
2086 	/* Preempt cancel of ELSP1 */
2087 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2088 	if (igt_live_test_begin(&t, arg->engine->i915,
2089 				__func__, arg->engine->name))
2090 		return -EIO;
2091 
2092 	rq[0] = spinner_create_request(&arg->a.spin,
2093 				       arg->a.ctx, arg->engine,
2094 				       MI_NOOP); /* no preemption */
2095 	if (IS_ERR(rq[0]))
2096 		return PTR_ERR(rq[0]);
2097 
2098 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2099 	i915_request_get(rq[0]);
2100 	i915_request_add(rq[0]);
2101 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2102 		err = -EIO;
2103 		goto out;
2104 	}
2105 
2106 	rq[1] = spinner_create_request(&arg->b.spin,
2107 				       arg->b.ctx, arg->engine,
2108 				       MI_ARB_CHECK);
2109 	if (IS_ERR(rq[1])) {
2110 		err = PTR_ERR(rq[1]);
2111 		goto out;
2112 	}
2113 
2114 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2115 	i915_request_get(rq[1]);
2116 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2117 	i915_request_add(rq[1]);
2118 	if (err)
2119 		goto out;
2120 
2121 	intel_context_set_banned(rq[1]->context);
2122 	err = intel_engine_pulse(arg->engine);
2123 	if (err)
2124 		goto out;
2125 
2126 	igt_spinner_end(&arg->a.spin);
2127 	err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2128 	if (err)
2129 		goto out;
2130 
2131 	if (rq[0]->fence.error != 0) {
2132 		pr_err("Normal inflight0 request did not complete\n");
2133 		err = -EINVAL;
2134 		goto out;
2135 	}
2136 
2137 	if (rq[1]->fence.error != -EIO) {
2138 		pr_err("Cancelled inflight1 request did not report -EIO\n");
2139 		err = -EINVAL;
2140 		goto out;
2141 	}
2142 
2143 out:
2144 	i915_request_put(rq[1]);
2145 	i915_request_put(rq[0]);
2146 	if (igt_live_test_end(&t))
2147 		err = -EIO;
2148 	return err;
2149 }
2150 
2151 static int __cancel_queued(struct live_preempt_cancel *arg)
2152 {
2153 	struct i915_request *rq[3] = {};
2154 	struct igt_live_test t;
2155 	int err;
2156 
2157 	/* Full ELSP and one in the wings */
2158 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2159 	if (igt_live_test_begin(&t, arg->engine->i915,
2160 				__func__, arg->engine->name))
2161 		return -EIO;
2162 
2163 	rq[0] = spinner_create_request(&arg->a.spin,
2164 				       arg->a.ctx, arg->engine,
2165 				       MI_ARB_CHECK);
2166 	if (IS_ERR(rq[0]))
2167 		return PTR_ERR(rq[0]);
2168 
2169 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2170 	i915_request_get(rq[0]);
2171 	i915_request_add(rq[0]);
2172 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2173 		err = -EIO;
2174 		goto out;
2175 	}
2176 
2177 	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2178 	if (IS_ERR(rq[1])) {
2179 		err = PTR_ERR(rq[1]);
2180 		goto out;
2181 	}
2182 
2183 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2184 	i915_request_get(rq[1]);
2185 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2186 	i915_request_add(rq[1]);
2187 	if (err)
2188 		goto out;
2189 
2190 	rq[2] = spinner_create_request(&arg->b.spin,
2191 				       arg->a.ctx, arg->engine,
2192 				       MI_ARB_CHECK);
2193 	if (IS_ERR(rq[2])) {
2194 		err = PTR_ERR(rq[2]);
2195 		goto out;
2196 	}
2197 
2198 	i915_request_get(rq[2]);
2199 	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2200 	i915_request_add(rq[2]);
2201 	if (err)
2202 		goto out;
2203 
2204 	intel_context_set_banned(rq[2]->context);
2205 	err = intel_engine_pulse(arg->engine);
2206 	if (err)
2207 		goto out;
2208 
2209 	err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2210 	if (err)
2211 		goto out;
2212 
2213 	if (rq[0]->fence.error != -EIO) {
2214 		pr_err("Cancelled inflight0 request did not report -EIO\n");
2215 		err = -EINVAL;
2216 		goto out;
2217 	}
2218 
2219 	if (rq[1]->fence.error != 0) {
2220 		pr_err("Normal inflight1 request did not complete\n");
2221 		err = -EINVAL;
2222 		goto out;
2223 	}
2224 
2225 	if (rq[2]->fence.error != -EIO) {
2226 		pr_err("Cancelled queued request did not report -EIO\n");
2227 		err = -EINVAL;
2228 		goto out;
2229 	}
2230 
2231 out:
2232 	i915_request_put(rq[2]);
2233 	i915_request_put(rq[1]);
2234 	i915_request_put(rq[0]);
2235 	if (igt_live_test_end(&t))
2236 		err = -EIO;
2237 	return err;
2238 }
2239 
2240 static int __cancel_hostile(struct live_preempt_cancel *arg)
2241 {
2242 	struct i915_request *rq;
2243 	int err;
2244 
2245 	/* Preempt cancel non-preemptible spinner in ELSP0 */
2246 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2247 		return 0;
2248 
2249 	if (!intel_has_reset_engine(arg->engine->gt))
2250 		return 0;
2251 
2252 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2253 	rq = spinner_create_request(&arg->a.spin,
2254 				    arg->a.ctx, arg->engine,
2255 				    MI_NOOP); /* preemption disabled */
2256 	if (IS_ERR(rq))
2257 		return PTR_ERR(rq);
2258 
2259 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2260 	i915_request_get(rq);
2261 	i915_request_add(rq);
2262 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2263 		err = -EIO;
2264 		goto out;
2265 	}
2266 
2267 	intel_context_set_banned(rq->context);
2268 	err = intel_engine_pulse(arg->engine); /* force reset */
2269 	if (err)
2270 		goto out;
2271 
2272 	err = wait_for_reset(arg->engine, rq, HZ / 2);
2273 	if (err) {
2274 		pr_err("Cancelled inflight0 request did not reset\n");
2275 		goto out;
2276 	}
2277 
2278 out:
2279 	i915_request_put(rq);
2280 	if (igt_flush_test(arg->engine->i915))
2281 		err = -EIO;
2282 	return err;
2283 }
2284 
2285 static void force_reset_timeout(struct intel_engine_cs *engine)
2286 {
2287 	engine->reset_timeout.probability = 999;
2288 	atomic_set(&engine->reset_timeout.times, -1);
2289 }
2290 
2291 static void cancel_reset_timeout(struct intel_engine_cs *engine)
2292 {
2293 	memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
2294 }
2295 
2296 static int __cancel_fail(struct live_preempt_cancel *arg)
2297 {
2298 	struct intel_engine_cs *engine = arg->engine;
2299 	struct i915_request *rq;
2300 	int err;
2301 
2302 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2303 		return 0;
2304 
2305 	if (!intel_has_reset_engine(engine->gt))
2306 		return 0;
2307 
2308 	GEM_TRACE("%s(%s)\n", __func__, engine->name);
2309 	rq = spinner_create_request(&arg->a.spin,
2310 				    arg->a.ctx, engine,
2311 				    MI_NOOP); /* preemption disabled */
2312 	if (IS_ERR(rq))
2313 		return PTR_ERR(rq);
2314 
2315 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2316 	i915_request_get(rq);
2317 	i915_request_add(rq);
2318 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2319 		err = -EIO;
2320 		goto out;
2321 	}
2322 
2323 	intel_context_set_banned(rq->context);
2324 
2325 	err = intel_engine_pulse(engine);
2326 	if (err)
2327 		goto out;
2328 
2329 	force_reset_timeout(engine);
2330 
2331 	/* force preempt reset [failure] */
2332 	while (!engine->execlists.pending[0])
2333 		intel_engine_flush_submission(engine);
2334 	del_timer_sync(&engine->execlists.preempt);
2335 	intel_engine_flush_submission(engine);
2336 
2337 	cancel_reset_timeout(engine);
2338 
2339 	/* after failure, require heartbeats to reset device */
2340 	intel_engine_set_heartbeat(engine, 1);
2341 	err = wait_for_reset(engine, rq, HZ / 2);
2342 	intel_engine_set_heartbeat(engine,
2343 				   engine->defaults.heartbeat_interval_ms);
2344 	if (err) {
2345 		pr_err("Cancelled inflight0 request did not reset\n");
2346 		goto out;
2347 	}
2348 
2349 out:
2350 	i915_request_put(rq);
2351 	if (igt_flush_test(engine->i915))
2352 		err = -EIO;
2353 	return err;
2354 }
2355 
2356 static int live_preempt_cancel(void *arg)
2357 {
2358 	struct intel_gt *gt = arg;
2359 	struct live_preempt_cancel data;
2360 	enum intel_engine_id id;
2361 	int err = -ENOMEM;
2362 
2363 	/*
2364 	 * To cancel an inflight context, we need to first remove it from the
2365 	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2366 	 */
2367 
2368 	if (preempt_client_init(gt, &data.a))
2369 		return -ENOMEM;
2370 	if (preempt_client_init(gt, &data.b))
2371 		goto err_client_a;
2372 
2373 	for_each_engine(data.engine, gt, id) {
2374 		if (!intel_engine_has_preemption(data.engine))
2375 			continue;
2376 
2377 		err = __cancel_active0(&data);
2378 		if (err)
2379 			goto err_wedged;
2380 
2381 		err = __cancel_active1(&data);
2382 		if (err)
2383 			goto err_wedged;
2384 
2385 		err = __cancel_queued(&data);
2386 		if (err)
2387 			goto err_wedged;
2388 
2389 		err = __cancel_hostile(&data);
2390 		if (err)
2391 			goto err_wedged;
2392 
2393 		err = __cancel_fail(&data);
2394 		if (err)
2395 			goto err_wedged;
2396 	}
2397 
2398 	err = 0;
2399 err_client_b:
2400 	preempt_client_fini(&data.b);
2401 err_client_a:
2402 	preempt_client_fini(&data.a);
2403 	return err;
2404 
2405 err_wedged:
2406 	GEM_TRACE_DUMP();
2407 	igt_spinner_end(&data.b.spin);
2408 	igt_spinner_end(&data.a.spin);
2409 	intel_gt_set_wedged(gt);
2410 	goto err_client_b;
2411 }
2412 
2413 static int live_suppress_self_preempt(void *arg)
2414 {
2415 	struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2416 	struct intel_gt *gt = arg;
2417 	struct intel_engine_cs *engine;
2418 	struct preempt_client a, b;
2419 	enum intel_engine_id id;
2420 	int err = -ENOMEM;
2421 
2422 	/*
2423 	 * Verify that if a preemption request does not cause a change in
2424 	 * the current execution order, the preempt-to-idle injection is
2425 	 * skipped and that we do not accidentally apply it after the CS
2426 	 * completion event.
2427 	 */
2428 
2429 	if (intel_uc_uses_guc_submission(&gt->uc))
2430 		return 0; /* presume black blox */
2431 
2432 	if (intel_vgpu_active(gt->i915))
2433 		return 0; /* GVT forces single port & request submission */
2434 
2435 	if (preempt_client_init(gt, &a))
2436 		return -ENOMEM;
2437 	if (preempt_client_init(gt, &b))
2438 		goto err_client_a;
2439 
2440 	for_each_engine(engine, gt, id) {
2441 		struct i915_request *rq_a, *rq_b;
2442 		int depth;
2443 
2444 		if (!intel_engine_has_preemption(engine))
2445 			continue;
2446 
2447 		if (igt_flush_test(gt->i915))
2448 			goto err_wedged;
2449 
2450 		st_engine_heartbeat_disable(engine);
2451 		engine->execlists.preempt_hang.count = 0;
2452 
2453 		rq_a = spinner_create_request(&a.spin,
2454 					      a.ctx, engine,
2455 					      MI_NOOP);
2456 		if (IS_ERR(rq_a)) {
2457 			err = PTR_ERR(rq_a);
2458 			st_engine_heartbeat_enable(engine);
2459 			goto err_client_b;
2460 		}
2461 
2462 		i915_request_add(rq_a);
2463 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2464 			pr_err("First client failed to start\n");
2465 			st_engine_heartbeat_enable(engine);
2466 			goto err_wedged;
2467 		}
2468 
2469 		/* Keep postponing the timer to avoid premature slicing */
2470 		mod_timer(&engine->execlists.timer, jiffies + HZ);
2471 		for (depth = 0; depth < 8; depth++) {
2472 			rq_b = spinner_create_request(&b.spin,
2473 						      b.ctx, engine,
2474 						      MI_NOOP);
2475 			if (IS_ERR(rq_b)) {
2476 				err = PTR_ERR(rq_b);
2477 				st_engine_heartbeat_enable(engine);
2478 				goto err_client_b;
2479 			}
2480 			i915_request_add(rq_b);
2481 
2482 			GEM_BUG_ON(i915_request_completed(rq_a));
2483 			engine->schedule(rq_a, &attr);
2484 			igt_spinner_end(&a.spin);
2485 
2486 			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2487 				pr_err("Second client failed to start\n");
2488 				st_engine_heartbeat_enable(engine);
2489 				goto err_wedged;
2490 			}
2491 
2492 			swap(a, b);
2493 			rq_a = rq_b;
2494 		}
2495 		igt_spinner_end(&a.spin);
2496 
2497 		if (engine->execlists.preempt_hang.count) {
2498 			pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2499 			       engine->name,
2500 			       engine->execlists.preempt_hang.count,
2501 			       depth);
2502 			st_engine_heartbeat_enable(engine);
2503 			err = -EINVAL;
2504 			goto err_client_b;
2505 		}
2506 
2507 		st_engine_heartbeat_enable(engine);
2508 		if (igt_flush_test(gt->i915))
2509 			goto err_wedged;
2510 	}
2511 
2512 	err = 0;
2513 err_client_b:
2514 	preempt_client_fini(&b);
2515 err_client_a:
2516 	preempt_client_fini(&a);
2517 	return err;
2518 
2519 err_wedged:
2520 	igt_spinner_end(&b.spin);
2521 	igt_spinner_end(&a.spin);
2522 	intel_gt_set_wedged(gt);
2523 	err = -EIO;
2524 	goto err_client_b;
2525 }
2526 
2527 static int live_chain_preempt(void *arg)
2528 {
2529 	struct intel_gt *gt = arg;
2530 	struct intel_engine_cs *engine;
2531 	struct preempt_client hi, lo;
2532 	enum intel_engine_id id;
2533 	int err = -ENOMEM;
2534 
2535 	/*
2536 	 * Build a chain AB...BA between two contexts (A, B) and request
2537 	 * preemption of the last request. It should then complete before
2538 	 * the previously submitted spinner in B.
2539 	 */
2540 
2541 	if (preempt_client_init(gt, &hi))
2542 		return -ENOMEM;
2543 
2544 	if (preempt_client_init(gt, &lo))
2545 		goto err_client_hi;
2546 
2547 	for_each_engine(engine, gt, id) {
2548 		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2549 		struct igt_live_test t;
2550 		struct i915_request *rq;
2551 		int ring_size, count, i;
2552 
2553 		if (!intel_engine_has_preemption(engine))
2554 			continue;
2555 
2556 		rq = spinner_create_request(&lo.spin,
2557 					    lo.ctx, engine,
2558 					    MI_ARB_CHECK);
2559 		if (IS_ERR(rq))
2560 			goto err_wedged;
2561 
2562 		i915_request_get(rq);
2563 		i915_request_add(rq);
2564 
2565 		ring_size = rq->wa_tail - rq->head;
2566 		if (ring_size < 0)
2567 			ring_size += rq->ring->size;
2568 		ring_size = rq->ring->size / ring_size;
2569 		pr_debug("%s(%s): Using maximum of %d requests\n",
2570 			 __func__, engine->name, ring_size);
2571 
2572 		igt_spinner_end(&lo.spin);
2573 		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2574 			pr_err("Timed out waiting to flush %s\n", engine->name);
2575 			i915_request_put(rq);
2576 			goto err_wedged;
2577 		}
2578 		i915_request_put(rq);
2579 
2580 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2581 			err = -EIO;
2582 			goto err_wedged;
2583 		}
2584 
2585 		for_each_prime_number_from(count, 1, ring_size) {
2586 			rq = spinner_create_request(&hi.spin,
2587 						    hi.ctx, engine,
2588 						    MI_ARB_CHECK);
2589 			if (IS_ERR(rq))
2590 				goto err_wedged;
2591 			i915_request_add(rq);
2592 			if (!igt_wait_for_spinner(&hi.spin, rq))
2593 				goto err_wedged;
2594 
2595 			rq = spinner_create_request(&lo.spin,
2596 						    lo.ctx, engine,
2597 						    MI_ARB_CHECK);
2598 			if (IS_ERR(rq))
2599 				goto err_wedged;
2600 			i915_request_add(rq);
2601 
2602 			for (i = 0; i < count; i++) {
2603 				rq = igt_request_alloc(lo.ctx, engine);
2604 				if (IS_ERR(rq))
2605 					goto err_wedged;
2606 				i915_request_add(rq);
2607 			}
2608 
2609 			rq = igt_request_alloc(hi.ctx, engine);
2610 			if (IS_ERR(rq))
2611 				goto err_wedged;
2612 
2613 			i915_request_get(rq);
2614 			i915_request_add(rq);
2615 			engine->schedule(rq, &attr);
2616 
2617 			igt_spinner_end(&hi.spin);
2618 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2619 				struct drm_printer p =
2620 					drm_info_printer(gt->i915->drm.dev);
2621 
2622 				pr_err("Failed to preempt over chain of %d\n",
2623 				       count);
2624 				intel_engine_dump(engine, &p,
2625 						  "%s\n", engine->name);
2626 				i915_request_put(rq);
2627 				goto err_wedged;
2628 			}
2629 			igt_spinner_end(&lo.spin);
2630 			i915_request_put(rq);
2631 
2632 			rq = igt_request_alloc(lo.ctx, engine);
2633 			if (IS_ERR(rq))
2634 				goto err_wedged;
2635 
2636 			i915_request_get(rq);
2637 			i915_request_add(rq);
2638 
2639 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2640 				struct drm_printer p =
2641 					drm_info_printer(gt->i915->drm.dev);
2642 
2643 				pr_err("Failed to flush low priority chain of %d requests\n",
2644 				       count);
2645 				intel_engine_dump(engine, &p,
2646 						  "%s\n", engine->name);
2647 
2648 				i915_request_put(rq);
2649 				goto err_wedged;
2650 			}
2651 			i915_request_put(rq);
2652 		}
2653 
2654 		if (igt_live_test_end(&t)) {
2655 			err = -EIO;
2656 			goto err_wedged;
2657 		}
2658 	}
2659 
2660 	err = 0;
2661 err_client_lo:
2662 	preempt_client_fini(&lo);
2663 err_client_hi:
2664 	preempt_client_fini(&hi);
2665 	return err;
2666 
2667 err_wedged:
2668 	igt_spinner_end(&hi.spin);
2669 	igt_spinner_end(&lo.spin);
2670 	intel_gt_set_wedged(gt);
2671 	err = -EIO;
2672 	goto err_client_lo;
2673 }
2674 
2675 static int create_gang(struct intel_engine_cs *engine,
2676 		       struct i915_request **prev)
2677 {
2678 	struct drm_i915_gem_object *obj;
2679 	struct intel_context *ce;
2680 	struct i915_request *rq;
2681 	struct i915_vma *vma;
2682 	u32 *cs;
2683 	int err;
2684 
2685 	ce = intel_context_create(engine);
2686 	if (IS_ERR(ce))
2687 		return PTR_ERR(ce);
2688 
2689 	obj = i915_gem_object_create_internal(engine->i915, 4096);
2690 	if (IS_ERR(obj)) {
2691 		err = PTR_ERR(obj);
2692 		goto err_ce;
2693 	}
2694 
2695 	vma = i915_vma_instance(obj, ce->vm, NULL);
2696 	if (IS_ERR(vma)) {
2697 		err = PTR_ERR(vma);
2698 		goto err_obj;
2699 	}
2700 
2701 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
2702 	if (err)
2703 		goto err_obj;
2704 
2705 	cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
2706 	if (IS_ERR(cs)) {
2707 		err = PTR_ERR(cs);
2708 		goto err_obj;
2709 	}
2710 
2711 	/* Semaphore target: spin until zero */
2712 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2713 
2714 	*cs++ = MI_SEMAPHORE_WAIT |
2715 		MI_SEMAPHORE_POLL |
2716 		MI_SEMAPHORE_SAD_EQ_SDD;
2717 	*cs++ = 0;
2718 	*cs++ = lower_32_bits(vma->node.start);
2719 	*cs++ = upper_32_bits(vma->node.start);
2720 
2721 	if (*prev) {
2722 		u64 offset = (*prev)->batch->node.start;
2723 
2724 		/* Terminate the spinner in the next lower priority batch. */
2725 		*cs++ = MI_STORE_DWORD_IMM_GEN4;
2726 		*cs++ = lower_32_bits(offset);
2727 		*cs++ = upper_32_bits(offset);
2728 		*cs++ = 0;
2729 	}
2730 
2731 	*cs++ = MI_BATCH_BUFFER_END;
2732 	i915_gem_object_flush_map(obj);
2733 	i915_gem_object_unpin_map(obj);
2734 
2735 	rq = intel_context_create_request(ce);
2736 	if (IS_ERR(rq)) {
2737 		err = PTR_ERR(rq);
2738 		goto err_obj;
2739 	}
2740 
2741 	rq->batch = i915_vma_get(vma);
2742 	i915_request_get(rq);
2743 
2744 	i915_vma_lock(vma);
2745 	err = i915_request_await_object(rq, vma->obj, false);
2746 	if (!err)
2747 		err = i915_vma_move_to_active(vma, rq, 0);
2748 	if (!err)
2749 		err = rq->engine->emit_bb_start(rq,
2750 						vma->node.start,
2751 						PAGE_SIZE, 0);
2752 	i915_vma_unlock(vma);
2753 	i915_request_add(rq);
2754 	if (err)
2755 		goto err_rq;
2756 
2757 	i915_gem_object_put(obj);
2758 	intel_context_put(ce);
2759 
2760 	rq->mock.link.next = &(*prev)->mock.link;
2761 	*prev = rq;
2762 	return 0;
2763 
2764 err_rq:
2765 	i915_vma_put(rq->batch);
2766 	i915_request_put(rq);
2767 err_obj:
2768 	i915_gem_object_put(obj);
2769 err_ce:
2770 	intel_context_put(ce);
2771 	return err;
2772 }
2773 
2774 static int __live_preempt_ring(struct intel_engine_cs *engine,
2775 			       struct igt_spinner *spin,
2776 			       int queue_sz, int ring_sz)
2777 {
2778 	struct intel_context *ce[2] = {};
2779 	struct i915_request *rq;
2780 	struct igt_live_test t;
2781 	int err = 0;
2782 	int n;
2783 
2784 	if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2785 		return -EIO;
2786 
2787 	for (n = 0; n < ARRAY_SIZE(ce); n++) {
2788 		struct intel_context *tmp;
2789 
2790 		tmp = intel_context_create(engine);
2791 		if (IS_ERR(tmp)) {
2792 			err = PTR_ERR(tmp);
2793 			goto err_ce;
2794 		}
2795 
2796 		tmp->ring = __intel_context_ring_size(ring_sz);
2797 
2798 		err = intel_context_pin(tmp);
2799 		if (err) {
2800 			intel_context_put(tmp);
2801 			goto err_ce;
2802 		}
2803 
2804 		memset32(tmp->ring->vaddr,
2805 			 0xdeadbeef, /* trigger a hang if executed */
2806 			 tmp->ring->vma->size / sizeof(u32));
2807 
2808 		ce[n] = tmp;
2809 	}
2810 
2811 	rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2812 	if (IS_ERR(rq)) {
2813 		err = PTR_ERR(rq);
2814 		goto err_ce;
2815 	}
2816 
2817 	i915_request_get(rq);
2818 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2819 	i915_request_add(rq);
2820 
2821 	if (!igt_wait_for_spinner(spin, rq)) {
2822 		intel_gt_set_wedged(engine->gt);
2823 		i915_request_put(rq);
2824 		err = -ETIME;
2825 		goto err_ce;
2826 	}
2827 
2828 	/* Fill the ring, until we will cause a wrap */
2829 	n = 0;
2830 	while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2831 		struct i915_request *tmp;
2832 
2833 		tmp = intel_context_create_request(ce[0]);
2834 		if (IS_ERR(tmp)) {
2835 			err = PTR_ERR(tmp);
2836 			i915_request_put(rq);
2837 			goto err_ce;
2838 		}
2839 
2840 		i915_request_add(tmp);
2841 		intel_engine_flush_submission(engine);
2842 		n++;
2843 	}
2844 	intel_engine_flush_submission(engine);
2845 	pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2846 		 engine->name, queue_sz, n,
2847 		 ce[0]->ring->size,
2848 		 ce[0]->ring->tail,
2849 		 ce[0]->ring->emit,
2850 		 rq->tail);
2851 	i915_request_put(rq);
2852 
2853 	/* Create a second request to preempt the first ring */
2854 	rq = intel_context_create_request(ce[1]);
2855 	if (IS_ERR(rq)) {
2856 		err = PTR_ERR(rq);
2857 		goto err_ce;
2858 	}
2859 
2860 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2861 	i915_request_get(rq);
2862 	i915_request_add(rq);
2863 
2864 	err = wait_for_submit(engine, rq, HZ / 2);
2865 	i915_request_put(rq);
2866 	if (err) {
2867 		pr_err("%s: preemption request was not submitted\n",
2868 		       engine->name);
2869 		err = -ETIME;
2870 	}
2871 
2872 	pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2873 		 engine->name,
2874 		 ce[0]->ring->tail, ce[0]->ring->emit,
2875 		 ce[1]->ring->tail, ce[1]->ring->emit);
2876 
2877 err_ce:
2878 	intel_engine_flush_submission(engine);
2879 	igt_spinner_end(spin);
2880 	for (n = 0; n < ARRAY_SIZE(ce); n++) {
2881 		if (IS_ERR_OR_NULL(ce[n]))
2882 			break;
2883 
2884 		intel_context_unpin(ce[n]);
2885 		intel_context_put(ce[n]);
2886 	}
2887 	if (igt_live_test_end(&t))
2888 		err = -EIO;
2889 	return err;
2890 }
2891 
2892 static int live_preempt_ring(void *arg)
2893 {
2894 	struct intel_gt *gt = arg;
2895 	struct intel_engine_cs *engine;
2896 	struct igt_spinner spin;
2897 	enum intel_engine_id id;
2898 	int err = 0;
2899 
2900 	/*
2901 	 * Check that we rollback large chunks of a ring in order to do a
2902 	 * preemption event. Similar to live_unlite_ring, but looking at
2903 	 * ring size rather than the impact of intel_ring_direction().
2904 	 */
2905 
2906 	if (igt_spinner_init(&spin, gt))
2907 		return -ENOMEM;
2908 
2909 	for_each_engine(engine, gt, id) {
2910 		int n;
2911 
2912 		if (!intel_engine_has_preemption(engine))
2913 			continue;
2914 
2915 		if (!intel_engine_can_store_dword(engine))
2916 			continue;
2917 
2918 		st_engine_heartbeat_disable(engine);
2919 
2920 		for (n = 0; n <= 3; n++) {
2921 			err = __live_preempt_ring(engine, &spin,
2922 						  n * SZ_4K / 4, SZ_4K);
2923 			if (err)
2924 				break;
2925 		}
2926 
2927 		st_engine_heartbeat_enable(engine);
2928 		if (err)
2929 			break;
2930 	}
2931 
2932 	igt_spinner_fini(&spin);
2933 	return err;
2934 }
2935 
2936 static int live_preempt_gang(void *arg)
2937 {
2938 	struct intel_gt *gt = arg;
2939 	struct intel_engine_cs *engine;
2940 	enum intel_engine_id id;
2941 
2942 	/*
2943 	 * Build as long a chain of preempters as we can, with each
2944 	 * request higher priority than the last. Once we are ready, we release
2945 	 * the last batch which then precolates down the chain, each releasing
2946 	 * the next oldest in turn. The intent is to simply push as hard as we
2947 	 * can with the number of preemptions, trying to exceed narrow HW
2948 	 * limits. At a minimum, we insist that we can sort all the user
2949 	 * high priority levels into execution order.
2950 	 */
2951 
2952 	for_each_engine(engine, gt, id) {
2953 		struct i915_request *rq = NULL;
2954 		struct igt_live_test t;
2955 		IGT_TIMEOUT(end_time);
2956 		int prio = 0;
2957 		int err = 0;
2958 		u32 *cs;
2959 
2960 		if (!intel_engine_has_preemption(engine))
2961 			continue;
2962 
2963 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2964 			return -EIO;
2965 
2966 		do {
2967 			struct i915_sched_attr attr = { .priority = prio++ };
2968 
2969 			err = create_gang(engine, &rq);
2970 			if (err)
2971 				break;
2972 
2973 			/* Submit each spinner at increasing priority */
2974 			engine->schedule(rq, &attr);
2975 		} while (prio <= I915_PRIORITY_MAX &&
2976 			 !__igt_timeout(end_time, NULL));
2977 		pr_debug("%s: Preempt chain of %d requests\n",
2978 			 engine->name, prio);
2979 
2980 		/*
2981 		 * Such that the last spinner is the highest priority and
2982 		 * should execute first. When that spinner completes,
2983 		 * it will terminate the next lowest spinner until there
2984 		 * are no more spinners and the gang is complete.
2985 		 */
2986 		cs = i915_gem_object_pin_map_unlocked(rq->batch->obj, I915_MAP_WC);
2987 		if (!IS_ERR(cs)) {
2988 			*cs = 0;
2989 			i915_gem_object_unpin_map(rq->batch->obj);
2990 		} else {
2991 			err = PTR_ERR(cs);
2992 			intel_gt_set_wedged(gt);
2993 		}
2994 
2995 		while (rq) { /* wait for each rq from highest to lowest prio */
2996 			struct i915_request *n = list_next_entry(rq, mock.link);
2997 
2998 			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2999 				struct drm_printer p =
3000 					drm_info_printer(engine->i915->drm.dev);
3001 
3002 				pr_err("Failed to flush chain of %d requests, at %d\n",
3003 				       prio, rq_prio(rq));
3004 				intel_engine_dump(engine, &p,
3005 						  "%s\n", engine->name);
3006 
3007 				err = -ETIME;
3008 			}
3009 
3010 			i915_vma_put(rq->batch);
3011 			i915_request_put(rq);
3012 			rq = n;
3013 		}
3014 
3015 		if (igt_live_test_end(&t))
3016 			err = -EIO;
3017 		if (err)
3018 			return err;
3019 	}
3020 
3021 	return 0;
3022 }
3023 
3024 static struct i915_vma *
3025 create_gpr_user(struct intel_engine_cs *engine,
3026 		struct i915_vma *result,
3027 		unsigned int offset)
3028 {
3029 	struct drm_i915_gem_object *obj;
3030 	struct i915_vma *vma;
3031 	u32 *cs;
3032 	int err;
3033 	int i;
3034 
3035 	obj = i915_gem_object_create_internal(engine->i915, 4096);
3036 	if (IS_ERR(obj))
3037 		return ERR_CAST(obj);
3038 
3039 	vma = i915_vma_instance(obj, result->vm, NULL);
3040 	if (IS_ERR(vma)) {
3041 		i915_gem_object_put(obj);
3042 		return vma;
3043 	}
3044 
3045 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3046 	if (err) {
3047 		i915_vma_put(vma);
3048 		return ERR_PTR(err);
3049 	}
3050 
3051 	cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
3052 	if (IS_ERR(cs)) {
3053 		i915_vma_put(vma);
3054 		return ERR_CAST(cs);
3055 	}
3056 
3057 	/* All GPR are clear for new contexts. We use GPR(0) as a constant */
3058 	*cs++ = MI_LOAD_REGISTER_IMM(1);
3059 	*cs++ = CS_GPR(engine, 0);
3060 	*cs++ = 1;
3061 
3062 	for (i = 1; i < NUM_GPR; i++) {
3063 		u64 addr;
3064 
3065 		/*
3066 		 * Perform: GPR[i]++
3067 		 *
3068 		 * As we read and write into the context saved GPR[i], if
3069 		 * we restart this batch buffer from an earlier point, we
3070 		 * will repeat the increment and store a value > 1.
3071 		 */
3072 		*cs++ = MI_MATH(4);
3073 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3074 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3075 		*cs++ = MI_MATH_ADD;
3076 		*cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3077 
3078 		addr = result->node.start + offset + i * sizeof(*cs);
3079 		*cs++ = MI_STORE_REGISTER_MEM_GEN8;
3080 		*cs++ = CS_GPR(engine, 2 * i);
3081 		*cs++ = lower_32_bits(addr);
3082 		*cs++ = upper_32_bits(addr);
3083 
3084 		*cs++ = MI_SEMAPHORE_WAIT |
3085 			MI_SEMAPHORE_POLL |
3086 			MI_SEMAPHORE_SAD_GTE_SDD;
3087 		*cs++ = i;
3088 		*cs++ = lower_32_bits(result->node.start);
3089 		*cs++ = upper_32_bits(result->node.start);
3090 	}
3091 
3092 	*cs++ = MI_BATCH_BUFFER_END;
3093 	i915_gem_object_flush_map(obj);
3094 	i915_gem_object_unpin_map(obj);
3095 
3096 	return vma;
3097 }
3098 
3099 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3100 {
3101 	struct drm_i915_gem_object *obj;
3102 	struct i915_vma *vma;
3103 	int err;
3104 
3105 	obj = i915_gem_object_create_internal(gt->i915, sz);
3106 	if (IS_ERR(obj))
3107 		return ERR_CAST(obj);
3108 
3109 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
3110 	if (IS_ERR(vma)) {
3111 		i915_gem_object_put(obj);
3112 		return vma;
3113 	}
3114 
3115 	err = i915_ggtt_pin(vma, NULL, 0, 0);
3116 	if (err) {
3117 		i915_vma_put(vma);
3118 		return ERR_PTR(err);
3119 	}
3120 
3121 	return vma;
3122 }
3123 
3124 static struct i915_request *
3125 create_gpr_client(struct intel_engine_cs *engine,
3126 		  struct i915_vma *global,
3127 		  unsigned int offset)
3128 {
3129 	struct i915_vma *batch, *vma;
3130 	struct intel_context *ce;
3131 	struct i915_request *rq;
3132 	int err;
3133 
3134 	ce = intel_context_create(engine);
3135 	if (IS_ERR(ce))
3136 		return ERR_CAST(ce);
3137 
3138 	vma = i915_vma_instance(global->obj, ce->vm, NULL);
3139 	if (IS_ERR(vma)) {
3140 		err = PTR_ERR(vma);
3141 		goto out_ce;
3142 	}
3143 
3144 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3145 	if (err)
3146 		goto out_ce;
3147 
3148 	batch = create_gpr_user(engine, vma, offset);
3149 	if (IS_ERR(batch)) {
3150 		err = PTR_ERR(batch);
3151 		goto out_vma;
3152 	}
3153 
3154 	rq = intel_context_create_request(ce);
3155 	if (IS_ERR(rq)) {
3156 		err = PTR_ERR(rq);
3157 		goto out_batch;
3158 	}
3159 
3160 	i915_vma_lock(vma);
3161 	err = i915_request_await_object(rq, vma->obj, false);
3162 	if (!err)
3163 		err = i915_vma_move_to_active(vma, rq, 0);
3164 	i915_vma_unlock(vma);
3165 
3166 	i915_vma_lock(batch);
3167 	if (!err)
3168 		err = i915_request_await_object(rq, batch->obj, false);
3169 	if (!err)
3170 		err = i915_vma_move_to_active(batch, rq, 0);
3171 	if (!err)
3172 		err = rq->engine->emit_bb_start(rq,
3173 						batch->node.start,
3174 						PAGE_SIZE, 0);
3175 	i915_vma_unlock(batch);
3176 	i915_vma_unpin(batch);
3177 
3178 	if (!err)
3179 		i915_request_get(rq);
3180 	i915_request_add(rq);
3181 
3182 out_batch:
3183 	i915_vma_put(batch);
3184 out_vma:
3185 	i915_vma_unpin(vma);
3186 out_ce:
3187 	intel_context_put(ce);
3188 	return err ? ERR_PTR(err) : rq;
3189 }
3190 
3191 static int preempt_user(struct intel_engine_cs *engine,
3192 			struct i915_vma *global,
3193 			int id)
3194 {
3195 	struct i915_sched_attr attr = {
3196 		.priority = I915_PRIORITY_MAX
3197 	};
3198 	struct i915_request *rq;
3199 	int err = 0;
3200 	u32 *cs;
3201 
3202 	rq = intel_engine_create_kernel_request(engine);
3203 	if (IS_ERR(rq))
3204 		return PTR_ERR(rq);
3205 
3206 	cs = intel_ring_begin(rq, 4);
3207 	if (IS_ERR(cs)) {
3208 		i915_request_add(rq);
3209 		return PTR_ERR(cs);
3210 	}
3211 
3212 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3213 	*cs++ = i915_ggtt_offset(global);
3214 	*cs++ = 0;
3215 	*cs++ = id;
3216 
3217 	intel_ring_advance(rq, cs);
3218 
3219 	i915_request_get(rq);
3220 	i915_request_add(rq);
3221 
3222 	engine->schedule(rq, &attr);
3223 
3224 	if (i915_request_wait(rq, 0, HZ / 2) < 0)
3225 		err = -ETIME;
3226 	i915_request_put(rq);
3227 
3228 	return err;
3229 }
3230 
3231 static int live_preempt_user(void *arg)
3232 {
3233 	struct intel_gt *gt = arg;
3234 	struct intel_engine_cs *engine;
3235 	struct i915_vma *global;
3236 	enum intel_engine_id id;
3237 	u32 *result;
3238 	int err = 0;
3239 
3240 	/*
3241 	 * In our other tests, we look at preemption in carefully
3242 	 * controlled conditions in the ringbuffer. Since most of the
3243 	 * time is spent in user batches, most of our preemptions naturally
3244 	 * occur there. We want to verify that when we preempt inside a batch
3245 	 * we continue on from the current instruction and do not roll back
3246 	 * to the start, or another earlier arbitration point.
3247 	 *
3248 	 * To verify this, we create a batch which is a mixture of
3249 	 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3250 	 * a few preempting contexts thrown into the mix, we look for any
3251 	 * repeated instructions (which show up as incorrect values).
3252 	 */
3253 
3254 	global = create_global(gt, 4096);
3255 	if (IS_ERR(global))
3256 		return PTR_ERR(global);
3257 
3258 	result = i915_gem_object_pin_map_unlocked(global->obj, I915_MAP_WC);
3259 	if (IS_ERR(result)) {
3260 		i915_vma_unpin_and_release(&global, 0);
3261 		return PTR_ERR(result);
3262 	}
3263 
3264 	for_each_engine(engine, gt, id) {
3265 		struct i915_request *client[3] = {};
3266 		struct igt_live_test t;
3267 		int i;
3268 
3269 		if (!intel_engine_has_preemption(engine))
3270 			continue;
3271 
3272 		if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS)
3273 			continue; /* we need per-context GPR */
3274 
3275 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3276 			err = -EIO;
3277 			break;
3278 		}
3279 
3280 		memset(result, 0, 4096);
3281 
3282 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3283 			struct i915_request *rq;
3284 
3285 			rq = create_gpr_client(engine, global,
3286 					       NUM_GPR * i * sizeof(u32));
3287 			if (IS_ERR(rq)) {
3288 				err = PTR_ERR(rq);
3289 				goto end_test;
3290 			}
3291 
3292 			client[i] = rq;
3293 		}
3294 
3295 		/* Continuously preempt the set of 3 running contexts */
3296 		for (i = 1; i <= NUM_GPR; i++) {
3297 			err = preempt_user(engine, global, i);
3298 			if (err)
3299 				goto end_test;
3300 		}
3301 
3302 		if (READ_ONCE(result[0]) != NUM_GPR) {
3303 			pr_err("%s: Failed to release semaphore\n",
3304 			       engine->name);
3305 			err = -EIO;
3306 			goto end_test;
3307 		}
3308 
3309 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3310 			int gpr;
3311 
3312 			if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3313 				err = -ETIME;
3314 				goto end_test;
3315 			}
3316 
3317 			for (gpr = 1; gpr < NUM_GPR; gpr++) {
3318 				if (result[NUM_GPR * i + gpr] != 1) {
3319 					pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3320 					       engine->name,
3321 					       i, gpr, result[NUM_GPR * i + gpr]);
3322 					err = -EINVAL;
3323 					goto end_test;
3324 				}
3325 			}
3326 		}
3327 
3328 end_test:
3329 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3330 			if (!client[i])
3331 				break;
3332 
3333 			i915_request_put(client[i]);
3334 		}
3335 
3336 		/* Flush the semaphores on error */
3337 		smp_store_mb(result[0], -1);
3338 		if (igt_live_test_end(&t))
3339 			err = -EIO;
3340 		if (err)
3341 			break;
3342 	}
3343 
3344 	i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3345 	return err;
3346 }
3347 
3348 static int live_preempt_timeout(void *arg)
3349 {
3350 	struct intel_gt *gt = arg;
3351 	struct i915_gem_context *ctx_hi, *ctx_lo;
3352 	struct igt_spinner spin_lo;
3353 	struct intel_engine_cs *engine;
3354 	enum intel_engine_id id;
3355 	int err = -ENOMEM;
3356 
3357 	/*
3358 	 * Check that we force preemption to occur by cancelling the previous
3359 	 * context if it refuses to yield the GPU.
3360 	 */
3361 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3362 		return 0;
3363 
3364 	if (!intel_has_reset_engine(gt))
3365 		return 0;
3366 
3367 	if (igt_spinner_init(&spin_lo, gt))
3368 		return -ENOMEM;
3369 
3370 	ctx_hi = kernel_context(gt->i915);
3371 	if (!ctx_hi)
3372 		goto err_spin_lo;
3373 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
3374 
3375 	ctx_lo = kernel_context(gt->i915);
3376 	if (!ctx_lo)
3377 		goto err_ctx_hi;
3378 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
3379 
3380 	for_each_engine(engine, gt, id) {
3381 		unsigned long saved_timeout;
3382 		struct i915_request *rq;
3383 
3384 		if (!intel_engine_has_preemption(engine))
3385 			continue;
3386 
3387 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3388 					    MI_NOOP); /* preemption disabled */
3389 		if (IS_ERR(rq)) {
3390 			err = PTR_ERR(rq);
3391 			goto err_ctx_lo;
3392 		}
3393 
3394 		i915_request_add(rq);
3395 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
3396 			intel_gt_set_wedged(gt);
3397 			err = -EIO;
3398 			goto err_ctx_lo;
3399 		}
3400 
3401 		rq = igt_request_alloc(ctx_hi, engine);
3402 		if (IS_ERR(rq)) {
3403 			igt_spinner_end(&spin_lo);
3404 			err = PTR_ERR(rq);
3405 			goto err_ctx_lo;
3406 		}
3407 
3408 		/* Flush the previous CS ack before changing timeouts */
3409 		while (READ_ONCE(engine->execlists.pending[0]))
3410 			cpu_relax();
3411 
3412 		saved_timeout = engine->props.preempt_timeout_ms;
3413 		engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3414 
3415 		i915_request_get(rq);
3416 		i915_request_add(rq);
3417 
3418 		intel_engine_flush_submission(engine);
3419 		engine->props.preempt_timeout_ms = saved_timeout;
3420 
3421 		if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3422 			intel_gt_set_wedged(gt);
3423 			i915_request_put(rq);
3424 			err = -ETIME;
3425 			goto err_ctx_lo;
3426 		}
3427 
3428 		igt_spinner_end(&spin_lo);
3429 		i915_request_put(rq);
3430 	}
3431 
3432 	err = 0;
3433 err_ctx_lo:
3434 	kernel_context_close(ctx_lo);
3435 err_ctx_hi:
3436 	kernel_context_close(ctx_hi);
3437 err_spin_lo:
3438 	igt_spinner_fini(&spin_lo);
3439 	return err;
3440 }
3441 
3442 static int random_range(struct rnd_state *rnd, int min, int max)
3443 {
3444 	return i915_prandom_u32_max_state(max - min, rnd) + min;
3445 }
3446 
3447 static int random_priority(struct rnd_state *rnd)
3448 {
3449 	return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3450 }
3451 
3452 struct preempt_smoke {
3453 	struct intel_gt *gt;
3454 	struct i915_gem_context **contexts;
3455 	struct intel_engine_cs *engine;
3456 	struct drm_i915_gem_object *batch;
3457 	unsigned int ncontext;
3458 	struct rnd_state prng;
3459 	unsigned long count;
3460 };
3461 
3462 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3463 {
3464 	return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3465 							  &smoke->prng)];
3466 }
3467 
3468 static int smoke_submit(struct preempt_smoke *smoke,
3469 			struct i915_gem_context *ctx, int prio,
3470 			struct drm_i915_gem_object *batch)
3471 {
3472 	struct i915_request *rq;
3473 	struct i915_vma *vma = NULL;
3474 	int err = 0;
3475 
3476 	if (batch) {
3477 		struct i915_address_space *vm;
3478 
3479 		vm = i915_gem_context_get_vm_rcu(ctx);
3480 		vma = i915_vma_instance(batch, vm, NULL);
3481 		i915_vm_put(vm);
3482 		if (IS_ERR(vma))
3483 			return PTR_ERR(vma);
3484 
3485 		err = i915_vma_pin(vma, 0, 0, PIN_USER);
3486 		if (err)
3487 			return err;
3488 	}
3489 
3490 	ctx->sched.priority = prio;
3491 
3492 	rq = igt_request_alloc(ctx, smoke->engine);
3493 	if (IS_ERR(rq)) {
3494 		err = PTR_ERR(rq);
3495 		goto unpin;
3496 	}
3497 
3498 	if (vma) {
3499 		i915_vma_lock(vma);
3500 		err = i915_request_await_object(rq, vma->obj, false);
3501 		if (!err)
3502 			err = i915_vma_move_to_active(vma, rq, 0);
3503 		if (!err)
3504 			err = rq->engine->emit_bb_start(rq,
3505 							vma->node.start,
3506 							PAGE_SIZE, 0);
3507 		i915_vma_unlock(vma);
3508 	}
3509 
3510 	i915_request_add(rq);
3511 
3512 unpin:
3513 	if (vma)
3514 		i915_vma_unpin(vma);
3515 
3516 	return err;
3517 }
3518 
3519 static int smoke_crescendo_thread(void *arg)
3520 {
3521 	struct preempt_smoke *smoke = arg;
3522 	IGT_TIMEOUT(end_time);
3523 	unsigned long count;
3524 
3525 	count = 0;
3526 	do {
3527 		struct i915_gem_context *ctx = smoke_context(smoke);
3528 		int err;
3529 
3530 		err = smoke_submit(smoke,
3531 				   ctx, count % I915_PRIORITY_MAX,
3532 				   smoke->batch);
3533 		if (err)
3534 			return err;
3535 
3536 		count++;
3537 	} while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3538 
3539 	smoke->count = count;
3540 	return 0;
3541 }
3542 
3543 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3544 #define BATCH BIT(0)
3545 {
3546 	struct task_struct *tsk[I915_NUM_ENGINES] = {};
3547 	struct preempt_smoke arg[I915_NUM_ENGINES];
3548 	struct intel_engine_cs *engine;
3549 	enum intel_engine_id id;
3550 	unsigned long count;
3551 	int err = 0;
3552 
3553 	for_each_engine(engine, smoke->gt, id) {
3554 		arg[id] = *smoke;
3555 		arg[id].engine = engine;
3556 		if (!(flags & BATCH))
3557 			arg[id].batch = NULL;
3558 		arg[id].count = 0;
3559 
3560 		tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3561 				      "igt/smoke:%d", id);
3562 		if (IS_ERR(tsk[id])) {
3563 			err = PTR_ERR(tsk[id]);
3564 			break;
3565 		}
3566 		get_task_struct(tsk[id]);
3567 	}
3568 
3569 	yield(); /* start all threads before we kthread_stop() */
3570 
3571 	count = 0;
3572 	for_each_engine(engine, smoke->gt, id) {
3573 		int status;
3574 
3575 		if (IS_ERR_OR_NULL(tsk[id]))
3576 			continue;
3577 
3578 		status = kthread_stop(tsk[id]);
3579 		if (status && !err)
3580 			err = status;
3581 
3582 		count += arg[id].count;
3583 
3584 		put_task_struct(tsk[id]);
3585 	}
3586 
3587 	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3588 		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3589 	return 0;
3590 }
3591 
3592 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3593 {
3594 	enum intel_engine_id id;
3595 	IGT_TIMEOUT(end_time);
3596 	unsigned long count;
3597 
3598 	count = 0;
3599 	do {
3600 		for_each_engine(smoke->engine, smoke->gt, id) {
3601 			struct i915_gem_context *ctx = smoke_context(smoke);
3602 			int err;
3603 
3604 			err = smoke_submit(smoke,
3605 					   ctx, random_priority(&smoke->prng),
3606 					   flags & BATCH ? smoke->batch : NULL);
3607 			if (err)
3608 				return err;
3609 
3610 			count++;
3611 		}
3612 	} while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3613 
3614 	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3615 		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3616 	return 0;
3617 }
3618 
3619 static int live_preempt_smoke(void *arg)
3620 {
3621 	struct preempt_smoke smoke = {
3622 		.gt = arg,
3623 		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3624 		.ncontext = 256,
3625 	};
3626 	const unsigned int phase[] = { 0, BATCH };
3627 	struct igt_live_test t;
3628 	int err = -ENOMEM;
3629 	u32 *cs;
3630 	int n;
3631 
3632 	smoke.contexts = kmalloc_array(smoke.ncontext,
3633 				       sizeof(*smoke.contexts),
3634 				       GFP_KERNEL);
3635 	if (!smoke.contexts)
3636 		return -ENOMEM;
3637 
3638 	smoke.batch =
3639 		i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3640 	if (IS_ERR(smoke.batch)) {
3641 		err = PTR_ERR(smoke.batch);
3642 		goto err_free;
3643 	}
3644 
3645 	cs = i915_gem_object_pin_map_unlocked(smoke.batch, I915_MAP_WB);
3646 	if (IS_ERR(cs)) {
3647 		err = PTR_ERR(cs);
3648 		goto err_batch;
3649 	}
3650 	for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3651 		cs[n] = MI_ARB_CHECK;
3652 	cs[n] = MI_BATCH_BUFFER_END;
3653 	i915_gem_object_flush_map(smoke.batch);
3654 	i915_gem_object_unpin_map(smoke.batch);
3655 
3656 	if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3657 		err = -EIO;
3658 		goto err_batch;
3659 	}
3660 
3661 	for (n = 0; n < smoke.ncontext; n++) {
3662 		smoke.contexts[n] = kernel_context(smoke.gt->i915);
3663 		if (!smoke.contexts[n])
3664 			goto err_ctx;
3665 	}
3666 
3667 	for (n = 0; n < ARRAY_SIZE(phase); n++) {
3668 		err = smoke_crescendo(&smoke, phase[n]);
3669 		if (err)
3670 			goto err_ctx;
3671 
3672 		err = smoke_random(&smoke, phase[n]);
3673 		if (err)
3674 			goto err_ctx;
3675 	}
3676 
3677 err_ctx:
3678 	if (igt_live_test_end(&t))
3679 		err = -EIO;
3680 
3681 	for (n = 0; n < smoke.ncontext; n++) {
3682 		if (!smoke.contexts[n])
3683 			break;
3684 		kernel_context_close(smoke.contexts[n]);
3685 	}
3686 
3687 err_batch:
3688 	i915_gem_object_put(smoke.batch);
3689 err_free:
3690 	kfree(smoke.contexts);
3691 
3692 	return err;
3693 }
3694 
3695 static int nop_virtual_engine(struct intel_gt *gt,
3696 			      struct intel_engine_cs **siblings,
3697 			      unsigned int nsibling,
3698 			      unsigned int nctx,
3699 			      unsigned int flags)
3700 #define CHAIN BIT(0)
3701 {
3702 	IGT_TIMEOUT(end_time);
3703 	struct i915_request *request[16] = {};
3704 	struct intel_context *ve[16];
3705 	unsigned long n, prime, nc;
3706 	struct igt_live_test t;
3707 	ktime_t times[2] = {};
3708 	int err;
3709 
3710 	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3711 
3712 	for (n = 0; n < nctx; n++) {
3713 		ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3714 		if (IS_ERR(ve[n])) {
3715 			err = PTR_ERR(ve[n]);
3716 			nctx = n;
3717 			goto out;
3718 		}
3719 
3720 		err = intel_context_pin(ve[n]);
3721 		if (err) {
3722 			intel_context_put(ve[n]);
3723 			nctx = n;
3724 			goto out;
3725 		}
3726 	}
3727 
3728 	err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3729 	if (err)
3730 		goto out;
3731 
3732 	for_each_prime_number_from(prime, 1, 8192) {
3733 		times[1] = ktime_get_raw();
3734 
3735 		if (flags & CHAIN) {
3736 			for (nc = 0; nc < nctx; nc++) {
3737 				for (n = 0; n < prime; n++) {
3738 					struct i915_request *rq;
3739 
3740 					rq = i915_request_create(ve[nc]);
3741 					if (IS_ERR(rq)) {
3742 						err = PTR_ERR(rq);
3743 						goto out;
3744 					}
3745 
3746 					if (request[nc])
3747 						i915_request_put(request[nc]);
3748 					request[nc] = i915_request_get(rq);
3749 					i915_request_add(rq);
3750 				}
3751 			}
3752 		} else {
3753 			for (n = 0; n < prime; n++) {
3754 				for (nc = 0; nc < nctx; nc++) {
3755 					struct i915_request *rq;
3756 
3757 					rq = i915_request_create(ve[nc]);
3758 					if (IS_ERR(rq)) {
3759 						err = PTR_ERR(rq);
3760 						goto out;
3761 					}
3762 
3763 					if (request[nc])
3764 						i915_request_put(request[nc]);
3765 					request[nc] = i915_request_get(rq);
3766 					i915_request_add(rq);
3767 				}
3768 			}
3769 		}
3770 
3771 		for (nc = 0; nc < nctx; nc++) {
3772 			if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3773 				pr_err("%s(%s): wait for %llx:%lld timed out\n",
3774 				       __func__, ve[0]->engine->name,
3775 				       request[nc]->fence.context,
3776 				       request[nc]->fence.seqno);
3777 
3778 				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3779 					  __func__, ve[0]->engine->name,
3780 					  request[nc]->fence.context,
3781 					  request[nc]->fence.seqno);
3782 				GEM_TRACE_DUMP();
3783 				intel_gt_set_wedged(gt);
3784 				break;
3785 			}
3786 		}
3787 
3788 		times[1] = ktime_sub(ktime_get_raw(), times[1]);
3789 		if (prime == 1)
3790 			times[0] = times[1];
3791 
3792 		for (nc = 0; nc < nctx; nc++) {
3793 			i915_request_put(request[nc]);
3794 			request[nc] = NULL;
3795 		}
3796 
3797 		if (__igt_timeout(end_time, NULL))
3798 			break;
3799 	}
3800 
3801 	err = igt_live_test_end(&t);
3802 	if (err)
3803 		goto out;
3804 
3805 	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3806 		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3807 		prime, div64_u64(ktime_to_ns(times[1]), prime));
3808 
3809 out:
3810 	if (igt_flush_test(gt->i915))
3811 		err = -EIO;
3812 
3813 	for (nc = 0; nc < nctx; nc++) {
3814 		i915_request_put(request[nc]);
3815 		intel_context_unpin(ve[nc]);
3816 		intel_context_put(ve[nc]);
3817 	}
3818 	return err;
3819 }
3820 
3821 static unsigned int
3822 __select_siblings(struct intel_gt *gt,
3823 		  unsigned int class,
3824 		  struct intel_engine_cs **siblings,
3825 		  bool (*filter)(const struct intel_engine_cs *))
3826 {
3827 	unsigned int n = 0;
3828 	unsigned int inst;
3829 
3830 	for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3831 		if (!gt->engine_class[class][inst])
3832 			continue;
3833 
3834 		if (filter && !filter(gt->engine_class[class][inst]))
3835 			continue;
3836 
3837 		siblings[n++] = gt->engine_class[class][inst];
3838 	}
3839 
3840 	return n;
3841 }
3842 
3843 static unsigned int
3844 select_siblings(struct intel_gt *gt,
3845 		unsigned int class,
3846 		struct intel_engine_cs **siblings)
3847 {
3848 	return __select_siblings(gt, class, siblings, NULL);
3849 }
3850 
3851 static int live_virtual_engine(void *arg)
3852 {
3853 	struct intel_gt *gt = arg;
3854 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3855 	struct intel_engine_cs *engine;
3856 	enum intel_engine_id id;
3857 	unsigned int class;
3858 	int err;
3859 
3860 	if (intel_uc_uses_guc_submission(&gt->uc))
3861 		return 0;
3862 
3863 	for_each_engine(engine, gt, id) {
3864 		err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3865 		if (err) {
3866 			pr_err("Failed to wrap engine %s: err=%d\n",
3867 			       engine->name, err);
3868 			return err;
3869 		}
3870 	}
3871 
3872 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3873 		int nsibling, n;
3874 
3875 		nsibling = select_siblings(gt, class, siblings);
3876 		if (nsibling < 2)
3877 			continue;
3878 
3879 		for (n = 1; n <= nsibling + 1; n++) {
3880 			err = nop_virtual_engine(gt, siblings, nsibling,
3881 						 n, 0);
3882 			if (err)
3883 				return err;
3884 		}
3885 
3886 		err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3887 		if (err)
3888 			return err;
3889 	}
3890 
3891 	return 0;
3892 }
3893 
3894 static int mask_virtual_engine(struct intel_gt *gt,
3895 			       struct intel_engine_cs **siblings,
3896 			       unsigned int nsibling)
3897 {
3898 	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3899 	struct intel_context *ve;
3900 	struct igt_live_test t;
3901 	unsigned int n;
3902 	int err;
3903 
3904 	/*
3905 	 * Check that by setting the execution mask on a request, we can
3906 	 * restrict it to our desired engine within the virtual engine.
3907 	 */
3908 
3909 	ve = intel_execlists_create_virtual(siblings, nsibling);
3910 	if (IS_ERR(ve)) {
3911 		err = PTR_ERR(ve);
3912 		goto out_close;
3913 	}
3914 
3915 	err = intel_context_pin(ve);
3916 	if (err)
3917 		goto out_put;
3918 
3919 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3920 	if (err)
3921 		goto out_unpin;
3922 
3923 	for (n = 0; n < nsibling; n++) {
3924 		request[n] = i915_request_create(ve);
3925 		if (IS_ERR(request[n])) {
3926 			err = PTR_ERR(request[n]);
3927 			nsibling = n;
3928 			goto out;
3929 		}
3930 
3931 		/* Reverse order as it's more likely to be unnatural */
3932 		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3933 
3934 		i915_request_get(request[n]);
3935 		i915_request_add(request[n]);
3936 	}
3937 
3938 	for (n = 0; n < nsibling; n++) {
3939 		if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3940 			pr_err("%s(%s): wait for %llx:%lld timed out\n",
3941 			       __func__, ve->engine->name,
3942 			       request[n]->fence.context,
3943 			       request[n]->fence.seqno);
3944 
3945 			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3946 				  __func__, ve->engine->name,
3947 				  request[n]->fence.context,
3948 				  request[n]->fence.seqno);
3949 			GEM_TRACE_DUMP();
3950 			intel_gt_set_wedged(gt);
3951 			err = -EIO;
3952 			goto out;
3953 		}
3954 
3955 		if (request[n]->engine != siblings[nsibling - n - 1]) {
3956 			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3957 			       request[n]->engine->name,
3958 			       siblings[nsibling - n - 1]->name);
3959 			err = -EINVAL;
3960 			goto out;
3961 		}
3962 	}
3963 
3964 	err = igt_live_test_end(&t);
3965 out:
3966 	if (igt_flush_test(gt->i915))
3967 		err = -EIO;
3968 
3969 	for (n = 0; n < nsibling; n++)
3970 		i915_request_put(request[n]);
3971 
3972 out_unpin:
3973 	intel_context_unpin(ve);
3974 out_put:
3975 	intel_context_put(ve);
3976 out_close:
3977 	return err;
3978 }
3979 
3980 static int live_virtual_mask(void *arg)
3981 {
3982 	struct intel_gt *gt = arg;
3983 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3984 	unsigned int class;
3985 	int err;
3986 
3987 	if (intel_uc_uses_guc_submission(&gt->uc))
3988 		return 0;
3989 
3990 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3991 		unsigned int nsibling;
3992 
3993 		nsibling = select_siblings(gt, class, siblings);
3994 		if (nsibling < 2)
3995 			continue;
3996 
3997 		err = mask_virtual_engine(gt, siblings, nsibling);
3998 		if (err)
3999 			return err;
4000 	}
4001 
4002 	return 0;
4003 }
4004 
4005 static int slicein_virtual_engine(struct intel_gt *gt,
4006 				  struct intel_engine_cs **siblings,
4007 				  unsigned int nsibling)
4008 {
4009 	const long timeout = slice_timeout(siblings[0]);
4010 	struct intel_context *ce;
4011 	struct i915_request *rq;
4012 	struct igt_spinner spin;
4013 	unsigned int n;
4014 	int err = 0;
4015 
4016 	/*
4017 	 * Virtual requests must take part in timeslicing on the target engines.
4018 	 */
4019 
4020 	if (igt_spinner_init(&spin, gt))
4021 		return -ENOMEM;
4022 
4023 	for (n = 0; n < nsibling; n++) {
4024 		ce = intel_context_create(siblings[n]);
4025 		if (IS_ERR(ce)) {
4026 			err = PTR_ERR(ce);
4027 			goto out;
4028 		}
4029 
4030 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4031 		intel_context_put(ce);
4032 		if (IS_ERR(rq)) {
4033 			err = PTR_ERR(rq);
4034 			goto out;
4035 		}
4036 
4037 		i915_request_add(rq);
4038 	}
4039 
4040 	ce = intel_execlists_create_virtual(siblings, nsibling);
4041 	if (IS_ERR(ce)) {
4042 		err = PTR_ERR(ce);
4043 		goto out;
4044 	}
4045 
4046 	rq = intel_context_create_request(ce);
4047 	intel_context_put(ce);
4048 	if (IS_ERR(rq)) {
4049 		err = PTR_ERR(rq);
4050 		goto out;
4051 	}
4052 
4053 	i915_request_get(rq);
4054 	i915_request_add(rq);
4055 	if (i915_request_wait(rq, 0, timeout) < 0) {
4056 		GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4057 			      __func__, rq->engine->name);
4058 		GEM_TRACE_DUMP();
4059 		intel_gt_set_wedged(gt);
4060 		err = -EIO;
4061 	}
4062 	i915_request_put(rq);
4063 
4064 out:
4065 	igt_spinner_end(&spin);
4066 	if (igt_flush_test(gt->i915))
4067 		err = -EIO;
4068 	igt_spinner_fini(&spin);
4069 	return err;
4070 }
4071 
4072 static int sliceout_virtual_engine(struct intel_gt *gt,
4073 				   struct intel_engine_cs **siblings,
4074 				   unsigned int nsibling)
4075 {
4076 	const long timeout = slice_timeout(siblings[0]);
4077 	struct intel_context *ce;
4078 	struct i915_request *rq;
4079 	struct igt_spinner spin;
4080 	unsigned int n;
4081 	int err = 0;
4082 
4083 	/*
4084 	 * Virtual requests must allow others a fair timeslice.
4085 	 */
4086 
4087 	if (igt_spinner_init(&spin, gt))
4088 		return -ENOMEM;
4089 
4090 	/* XXX We do not handle oversubscription and fairness with normal rq */
4091 	for (n = 0; n < nsibling; n++) {
4092 		ce = intel_execlists_create_virtual(siblings, nsibling);
4093 		if (IS_ERR(ce)) {
4094 			err = PTR_ERR(ce);
4095 			goto out;
4096 		}
4097 
4098 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4099 		intel_context_put(ce);
4100 		if (IS_ERR(rq)) {
4101 			err = PTR_ERR(rq);
4102 			goto out;
4103 		}
4104 
4105 		i915_request_add(rq);
4106 	}
4107 
4108 	for (n = 0; !err && n < nsibling; n++) {
4109 		ce = intel_context_create(siblings[n]);
4110 		if (IS_ERR(ce)) {
4111 			err = PTR_ERR(ce);
4112 			goto out;
4113 		}
4114 
4115 		rq = intel_context_create_request(ce);
4116 		intel_context_put(ce);
4117 		if (IS_ERR(rq)) {
4118 			err = PTR_ERR(rq);
4119 			goto out;
4120 		}
4121 
4122 		i915_request_get(rq);
4123 		i915_request_add(rq);
4124 		if (i915_request_wait(rq, 0, timeout) < 0) {
4125 			GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4126 				      __func__, siblings[n]->name);
4127 			GEM_TRACE_DUMP();
4128 			intel_gt_set_wedged(gt);
4129 			err = -EIO;
4130 		}
4131 		i915_request_put(rq);
4132 	}
4133 
4134 out:
4135 	igt_spinner_end(&spin);
4136 	if (igt_flush_test(gt->i915))
4137 		err = -EIO;
4138 	igt_spinner_fini(&spin);
4139 	return err;
4140 }
4141 
4142 static int live_virtual_slice(void *arg)
4143 {
4144 	struct intel_gt *gt = arg;
4145 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4146 	unsigned int class;
4147 	int err;
4148 
4149 	if (intel_uc_uses_guc_submission(&gt->uc))
4150 		return 0;
4151 
4152 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4153 		unsigned int nsibling;
4154 
4155 		nsibling = __select_siblings(gt, class, siblings,
4156 					     intel_engine_has_timeslices);
4157 		if (nsibling < 2)
4158 			continue;
4159 
4160 		err = slicein_virtual_engine(gt, siblings, nsibling);
4161 		if (err)
4162 			return err;
4163 
4164 		err = sliceout_virtual_engine(gt, siblings, nsibling);
4165 		if (err)
4166 			return err;
4167 	}
4168 
4169 	return 0;
4170 }
4171 
4172 static int preserved_virtual_engine(struct intel_gt *gt,
4173 				    struct intel_engine_cs **siblings,
4174 				    unsigned int nsibling)
4175 {
4176 	struct i915_request *last = NULL;
4177 	struct intel_context *ve;
4178 	struct i915_vma *scratch;
4179 	struct igt_live_test t;
4180 	unsigned int n;
4181 	int err = 0;
4182 	u32 *cs;
4183 
4184 	scratch =
4185 		__vm_create_scratch_for_read_pinned(&siblings[0]->gt->ggtt->vm,
4186 						    PAGE_SIZE);
4187 	if (IS_ERR(scratch))
4188 		return PTR_ERR(scratch);
4189 
4190 	err = i915_vma_sync(scratch);
4191 	if (err)
4192 		goto out_scratch;
4193 
4194 	ve = intel_execlists_create_virtual(siblings, nsibling);
4195 	if (IS_ERR(ve)) {
4196 		err = PTR_ERR(ve);
4197 		goto out_scratch;
4198 	}
4199 
4200 	err = intel_context_pin(ve);
4201 	if (err)
4202 		goto out_put;
4203 
4204 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4205 	if (err)
4206 		goto out_unpin;
4207 
4208 	for (n = 0; n < NUM_GPR_DW; n++) {
4209 		struct intel_engine_cs *engine = siblings[n % nsibling];
4210 		struct i915_request *rq;
4211 
4212 		rq = i915_request_create(ve);
4213 		if (IS_ERR(rq)) {
4214 			err = PTR_ERR(rq);
4215 			goto out_end;
4216 		}
4217 
4218 		i915_request_put(last);
4219 		last = i915_request_get(rq);
4220 
4221 		cs = intel_ring_begin(rq, 8);
4222 		if (IS_ERR(cs)) {
4223 			i915_request_add(rq);
4224 			err = PTR_ERR(cs);
4225 			goto out_end;
4226 		}
4227 
4228 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4229 		*cs++ = CS_GPR(engine, n);
4230 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4231 		*cs++ = 0;
4232 
4233 		*cs++ = MI_LOAD_REGISTER_IMM(1);
4234 		*cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4235 		*cs++ = n + 1;
4236 
4237 		*cs++ = MI_NOOP;
4238 		intel_ring_advance(rq, cs);
4239 
4240 		/* Restrict this request to run on a particular engine */
4241 		rq->execution_mask = engine->mask;
4242 		i915_request_add(rq);
4243 	}
4244 
4245 	if (i915_request_wait(last, 0, HZ / 5) < 0) {
4246 		err = -ETIME;
4247 		goto out_end;
4248 	}
4249 
4250 	cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
4251 	if (IS_ERR(cs)) {
4252 		err = PTR_ERR(cs);
4253 		goto out_end;
4254 	}
4255 
4256 	for (n = 0; n < NUM_GPR_DW; n++) {
4257 		if (cs[n] != n) {
4258 			pr_err("Incorrect value[%d] found for GPR[%d]\n",
4259 			       cs[n], n);
4260 			err = -EINVAL;
4261 			break;
4262 		}
4263 	}
4264 
4265 	i915_gem_object_unpin_map(scratch->obj);
4266 
4267 out_end:
4268 	if (igt_live_test_end(&t))
4269 		err = -EIO;
4270 	i915_request_put(last);
4271 out_unpin:
4272 	intel_context_unpin(ve);
4273 out_put:
4274 	intel_context_put(ve);
4275 out_scratch:
4276 	i915_vma_unpin_and_release(&scratch, 0);
4277 	return err;
4278 }
4279 
4280 static int live_virtual_preserved(void *arg)
4281 {
4282 	struct intel_gt *gt = arg;
4283 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4284 	unsigned int class;
4285 
4286 	/*
4287 	 * Check that the context image retains non-privileged (user) registers
4288 	 * from one engine to the next. For this we check that the CS_GPR
4289 	 * are preserved.
4290 	 */
4291 
4292 	if (intel_uc_uses_guc_submission(&gt->uc))
4293 		return 0;
4294 
4295 	/* As we use CS_GPR we cannot run before they existed on all engines. */
4296 	if (INTEL_GEN(gt->i915) < 9)
4297 		return 0;
4298 
4299 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4300 		int nsibling, err;
4301 
4302 		nsibling = select_siblings(gt, class, siblings);
4303 		if (nsibling < 2)
4304 			continue;
4305 
4306 		err = preserved_virtual_engine(gt, siblings, nsibling);
4307 		if (err)
4308 			return err;
4309 	}
4310 
4311 	return 0;
4312 }
4313 
4314 static int bond_virtual_engine(struct intel_gt *gt,
4315 			       unsigned int class,
4316 			       struct intel_engine_cs **siblings,
4317 			       unsigned int nsibling,
4318 			       unsigned int flags)
4319 #define BOND_SCHEDULE BIT(0)
4320 {
4321 	struct intel_engine_cs *master;
4322 	struct i915_request *rq[16];
4323 	enum intel_engine_id id;
4324 	struct igt_spinner spin;
4325 	unsigned long n;
4326 	int err;
4327 
4328 	/*
4329 	 * A set of bonded requests is intended to be run concurrently
4330 	 * across a number of engines. We use one request per-engine
4331 	 * and a magic fence to schedule each of the bonded requests
4332 	 * at the same time. A consequence of our current scheduler is that
4333 	 * we only move requests to the HW ready queue when the request
4334 	 * becomes ready, that is when all of its prerequisite fences have
4335 	 * been signaled. As one of those fences is the master submit fence,
4336 	 * there is a delay on all secondary fences as the HW may be
4337 	 * currently busy. Equally, as all the requests are independent,
4338 	 * they may have other fences that delay individual request
4339 	 * submission to HW. Ergo, we do not guarantee that all requests are
4340 	 * immediately submitted to HW at the same time, just that if the
4341 	 * rules are abided by, they are ready at the same time as the
4342 	 * first is submitted. Userspace can embed semaphores in its batch
4343 	 * to ensure parallel execution of its phases as it requires.
4344 	 * Though naturally it gets requested that perhaps the scheduler should
4345 	 * take care of parallel execution, even across preemption events on
4346 	 * different HW. (The proper answer is of course "lalalala".)
4347 	 *
4348 	 * With the submit-fence, we have identified three possible phases
4349 	 * of synchronisation depending on the master fence: queued (not
4350 	 * ready), executing, and signaled. The first two are quite simple
4351 	 * and checked below. However, the signaled master fence handling is
4352 	 * contentious. Currently we do not distinguish between a signaled
4353 	 * fence and an expired fence, as once signaled it does not convey
4354 	 * any information about the previous execution. It may even be freed
4355 	 * and hence checking later it may not exist at all. Ergo we currently
4356 	 * do not apply the bonding constraint for an already signaled fence,
4357 	 * as our expectation is that it should not constrain the secondaries
4358 	 * and is outside of the scope of the bonded request API (i.e. all
4359 	 * userspace requests are meant to be running in parallel). As
4360 	 * it imposes no constraint, and is effectively a no-op, we do not
4361 	 * check below as normal execution flows are checked extensively above.
4362 	 *
4363 	 * XXX Is the degenerate handling of signaled submit fences the
4364 	 * expected behaviour for userpace?
4365 	 */
4366 
4367 	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
4368 
4369 	if (igt_spinner_init(&spin, gt))
4370 		return -ENOMEM;
4371 
4372 	err = 0;
4373 	rq[0] = ERR_PTR(-ENOMEM);
4374 	for_each_engine(master, gt, id) {
4375 		struct i915_sw_fence fence = {};
4376 		struct intel_context *ce;
4377 
4378 		if (master->class == class)
4379 			continue;
4380 
4381 		ce = intel_context_create(master);
4382 		if (IS_ERR(ce)) {
4383 			err = PTR_ERR(ce);
4384 			goto out;
4385 		}
4386 
4387 		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
4388 
4389 		rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
4390 		intel_context_put(ce);
4391 		if (IS_ERR(rq[0])) {
4392 			err = PTR_ERR(rq[0]);
4393 			goto out;
4394 		}
4395 		i915_request_get(rq[0]);
4396 
4397 		if (flags & BOND_SCHEDULE) {
4398 			onstack_fence_init(&fence);
4399 			err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
4400 							       &fence,
4401 							       GFP_KERNEL);
4402 		}
4403 
4404 		i915_request_add(rq[0]);
4405 		if (err < 0)
4406 			goto out;
4407 
4408 		if (!(flags & BOND_SCHEDULE) &&
4409 		    !igt_wait_for_spinner(&spin, rq[0])) {
4410 			err = -EIO;
4411 			goto out;
4412 		}
4413 
4414 		for (n = 0; n < nsibling; n++) {
4415 			struct intel_context *ve;
4416 
4417 			ve = intel_execlists_create_virtual(siblings, nsibling);
4418 			if (IS_ERR(ve)) {
4419 				err = PTR_ERR(ve);
4420 				onstack_fence_fini(&fence);
4421 				goto out;
4422 			}
4423 
4424 			err = intel_virtual_engine_attach_bond(ve->engine,
4425 							       master,
4426 							       siblings[n]);
4427 			if (err) {
4428 				intel_context_put(ve);
4429 				onstack_fence_fini(&fence);
4430 				goto out;
4431 			}
4432 
4433 			err = intel_context_pin(ve);
4434 			intel_context_put(ve);
4435 			if (err) {
4436 				onstack_fence_fini(&fence);
4437 				goto out;
4438 			}
4439 
4440 			rq[n + 1] = i915_request_create(ve);
4441 			intel_context_unpin(ve);
4442 			if (IS_ERR(rq[n + 1])) {
4443 				err = PTR_ERR(rq[n + 1]);
4444 				onstack_fence_fini(&fence);
4445 				goto out;
4446 			}
4447 			i915_request_get(rq[n + 1]);
4448 
4449 			err = i915_request_await_execution(rq[n + 1],
4450 							   &rq[0]->fence,
4451 							   ve->engine->bond_execute);
4452 			i915_request_add(rq[n + 1]);
4453 			if (err < 0) {
4454 				onstack_fence_fini(&fence);
4455 				goto out;
4456 			}
4457 		}
4458 		onstack_fence_fini(&fence);
4459 		intel_engine_flush_submission(master);
4460 		igt_spinner_end(&spin);
4461 
4462 		if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4463 			pr_err("Master request did not execute (on %s)!\n",
4464 			       rq[0]->engine->name);
4465 			err = -EIO;
4466 			goto out;
4467 		}
4468 
4469 		for (n = 0; n < nsibling; n++) {
4470 			if (i915_request_wait(rq[n + 1], 0,
4471 					      MAX_SCHEDULE_TIMEOUT) < 0) {
4472 				err = -EIO;
4473 				goto out;
4474 			}
4475 
4476 			if (rq[n + 1]->engine != siblings[n]) {
4477 				pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4478 				       siblings[n]->name,
4479 				       rq[n + 1]->engine->name,
4480 				       rq[0]->engine->name);
4481 				err = -EINVAL;
4482 				goto out;
4483 			}
4484 		}
4485 
4486 		for (n = 0; !IS_ERR(rq[n]); n++)
4487 			i915_request_put(rq[n]);
4488 		rq[0] = ERR_PTR(-ENOMEM);
4489 	}
4490 
4491 out:
4492 	for (n = 0; !IS_ERR(rq[n]); n++)
4493 		i915_request_put(rq[n]);
4494 	if (igt_flush_test(gt->i915))
4495 		err = -EIO;
4496 
4497 	igt_spinner_fini(&spin);
4498 	return err;
4499 }
4500 
4501 static int live_virtual_bond(void *arg)
4502 {
4503 	static const struct phase {
4504 		const char *name;
4505 		unsigned int flags;
4506 	} phases[] = {
4507 		{ "", 0 },
4508 		{ "schedule", BOND_SCHEDULE },
4509 		{ },
4510 	};
4511 	struct intel_gt *gt = arg;
4512 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4513 	unsigned int class;
4514 	int err;
4515 
4516 	if (intel_uc_uses_guc_submission(&gt->uc))
4517 		return 0;
4518 
4519 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4520 		const struct phase *p;
4521 		int nsibling;
4522 
4523 		nsibling = select_siblings(gt, class, siblings);
4524 		if (nsibling < 2)
4525 			continue;
4526 
4527 		for (p = phases; p->name; p++) {
4528 			err = bond_virtual_engine(gt,
4529 						  class, siblings, nsibling,
4530 						  p->flags);
4531 			if (err) {
4532 				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4533 				       __func__, p->name, class, nsibling, err);
4534 				return err;
4535 			}
4536 		}
4537 	}
4538 
4539 	return 0;
4540 }
4541 
4542 static int reset_virtual_engine(struct intel_gt *gt,
4543 				struct intel_engine_cs **siblings,
4544 				unsigned int nsibling)
4545 {
4546 	struct intel_engine_cs *engine;
4547 	struct intel_context *ve;
4548 	struct igt_spinner spin;
4549 	struct i915_request *rq;
4550 	unsigned int n;
4551 	int err = 0;
4552 
4553 	/*
4554 	 * In order to support offline error capture for fast preempt reset,
4555 	 * we need to decouple the guilty request and ensure that it and its
4556 	 * descendents are not executed while the capture is in progress.
4557 	 */
4558 
4559 	if (igt_spinner_init(&spin, gt))
4560 		return -ENOMEM;
4561 
4562 	ve = intel_execlists_create_virtual(siblings, nsibling);
4563 	if (IS_ERR(ve)) {
4564 		err = PTR_ERR(ve);
4565 		goto out_spin;
4566 	}
4567 
4568 	for (n = 0; n < nsibling; n++)
4569 		st_engine_heartbeat_disable(siblings[n]);
4570 
4571 	rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4572 	if (IS_ERR(rq)) {
4573 		err = PTR_ERR(rq);
4574 		goto out_heartbeat;
4575 	}
4576 	i915_request_add(rq);
4577 
4578 	if (!igt_wait_for_spinner(&spin, rq)) {
4579 		intel_gt_set_wedged(gt);
4580 		err = -ETIME;
4581 		goto out_heartbeat;
4582 	}
4583 
4584 	engine = rq->engine;
4585 	GEM_BUG_ON(engine == ve->engine);
4586 
4587 	/* Take ownership of the reset and tasklet */
4588 	local_bh_disable();
4589 	if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
4590 			     &gt->reset.flags)) {
4591 		local_bh_enable();
4592 		intel_gt_set_wedged(gt);
4593 		err = -EBUSY;
4594 		goto out_heartbeat;
4595 	}
4596 	tasklet_disable(&engine->execlists.tasklet);
4597 
4598 	engine->execlists.tasklet.callback(&engine->execlists.tasklet);
4599 	GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4600 
4601 	/* Fake a preemption event; failed of course */
4602 	spin_lock_irq(&engine->active.lock);
4603 	__unwind_incomplete_requests(engine);
4604 	spin_unlock_irq(&engine->active.lock);
4605 	GEM_BUG_ON(rq->engine != engine);
4606 
4607 	/* Reset the engine while keeping our active request on hold */
4608 	execlists_hold(engine, rq);
4609 	GEM_BUG_ON(!i915_request_on_hold(rq));
4610 
4611 	__intel_engine_reset_bh(engine, NULL);
4612 	GEM_BUG_ON(rq->fence.error != -EIO);
4613 
4614 	/* Release our grasp on the engine, letting CS flow again */
4615 	tasklet_enable(&engine->execlists.tasklet);
4616 	clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
4617 	local_bh_enable();
4618 
4619 	/* Check that we do not resubmit the held request */
4620 	i915_request_get(rq);
4621 	if (!i915_request_wait(rq, 0, HZ / 5)) {
4622 		pr_err("%s: on hold request completed!\n",
4623 		       engine->name);
4624 		intel_gt_set_wedged(gt);
4625 		err = -EIO;
4626 		goto out_rq;
4627 	}
4628 	GEM_BUG_ON(!i915_request_on_hold(rq));
4629 
4630 	/* But is resubmitted on release */
4631 	execlists_unhold(engine, rq);
4632 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4633 		pr_err("%s: held request did not complete!\n",
4634 		       engine->name);
4635 		intel_gt_set_wedged(gt);
4636 		err = -ETIME;
4637 	}
4638 
4639 out_rq:
4640 	i915_request_put(rq);
4641 out_heartbeat:
4642 	for (n = 0; n < nsibling; n++)
4643 		st_engine_heartbeat_enable(siblings[n]);
4644 
4645 	intel_context_put(ve);
4646 out_spin:
4647 	igt_spinner_fini(&spin);
4648 	return err;
4649 }
4650 
4651 static int live_virtual_reset(void *arg)
4652 {
4653 	struct intel_gt *gt = arg;
4654 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4655 	unsigned int class;
4656 
4657 	/*
4658 	 * Check that we handle a reset event within a virtual engine.
4659 	 * Only the physical engine is reset, but we have to check the flow
4660 	 * of the virtual requests around the reset, and make sure it is not
4661 	 * forgotten.
4662 	 */
4663 
4664 	if (intel_uc_uses_guc_submission(&gt->uc))
4665 		return 0;
4666 
4667 	if (!intel_has_reset_engine(gt))
4668 		return 0;
4669 
4670 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4671 		int nsibling, err;
4672 
4673 		nsibling = select_siblings(gt, class, siblings);
4674 		if (nsibling < 2)
4675 			continue;
4676 
4677 		err = reset_virtual_engine(gt, siblings, nsibling);
4678 		if (err)
4679 			return err;
4680 	}
4681 
4682 	return 0;
4683 }
4684 
4685 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4686 {
4687 	static const struct i915_subtest tests[] = {
4688 		SUBTEST(live_sanitycheck),
4689 		SUBTEST(live_unlite_switch),
4690 		SUBTEST(live_unlite_preempt),
4691 		SUBTEST(live_unlite_ring),
4692 		SUBTEST(live_pin_rewind),
4693 		SUBTEST(live_hold_reset),
4694 		SUBTEST(live_error_interrupt),
4695 		SUBTEST(live_timeslice_preempt),
4696 		SUBTEST(live_timeslice_rewind),
4697 		SUBTEST(live_timeslice_queue),
4698 		SUBTEST(live_timeslice_nopreempt),
4699 		SUBTEST(live_busywait_preempt),
4700 		SUBTEST(live_preempt),
4701 		SUBTEST(live_late_preempt),
4702 		SUBTEST(live_nopreempt),
4703 		SUBTEST(live_preempt_cancel),
4704 		SUBTEST(live_suppress_self_preempt),
4705 		SUBTEST(live_chain_preempt),
4706 		SUBTEST(live_preempt_ring),
4707 		SUBTEST(live_preempt_gang),
4708 		SUBTEST(live_preempt_timeout),
4709 		SUBTEST(live_preempt_user),
4710 		SUBTEST(live_preempt_smoke),
4711 		SUBTEST(live_virtual_engine),
4712 		SUBTEST(live_virtual_mask),
4713 		SUBTEST(live_virtual_preserved),
4714 		SUBTEST(live_virtual_slice),
4715 		SUBTEST(live_virtual_bond),
4716 		SUBTEST(live_virtual_reset),
4717 	};
4718 
4719 	if (!HAS_EXECLISTS(i915))
4720 		return 0;
4721 
4722 	if (intel_gt_is_wedged(&i915->gt))
4723 		return 0;
4724 
4725 	return intel_gt_live_subtests(tests, &i915->gt);
4726 }
4727