xref: /linux/drivers/gpu/drm/i915/gt/selftest_lrc.c (revision 95298d63c67673c654c08952672d016212b26054)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2018 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
12 
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
19 
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
22 
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24 #define NUM_GPR 16
25 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
26 
27 static struct i915_vma *create_scratch(struct intel_gt *gt)
28 {
29 	struct drm_i915_gem_object *obj;
30 	struct i915_vma *vma;
31 	int err;
32 
33 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
34 	if (IS_ERR(obj))
35 		return ERR_CAST(obj);
36 
37 	i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
38 
39 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
40 	if (IS_ERR(vma)) {
41 		i915_gem_object_put(obj);
42 		return vma;
43 	}
44 
45 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
46 	if (err) {
47 		i915_gem_object_put(obj);
48 		return ERR_PTR(err);
49 	}
50 
51 	return vma;
52 }
53 
54 static void engine_heartbeat_disable(struct intel_engine_cs *engine)
55 {
56 	engine->props.heartbeat_interval_ms = 0;
57 
58 	intel_engine_pm_get(engine);
59 	intel_engine_park_heartbeat(engine);
60 }
61 
62 static void engine_heartbeat_enable(struct intel_engine_cs *engine)
63 {
64 	intel_engine_pm_put(engine);
65 
66 	engine->props.heartbeat_interval_ms =
67 		engine->defaults.heartbeat_interval_ms;
68 }
69 
70 static bool is_active(struct i915_request *rq)
71 {
72 	if (i915_request_is_active(rq))
73 		return true;
74 
75 	if (i915_request_on_hold(rq))
76 		return true;
77 
78 	if (i915_request_started(rq))
79 		return true;
80 
81 	return false;
82 }
83 
84 static int wait_for_submit(struct intel_engine_cs *engine,
85 			   struct i915_request *rq,
86 			   unsigned long timeout)
87 {
88 	timeout += jiffies;
89 	do {
90 		bool done = time_after(jiffies, timeout);
91 
92 		if (i915_request_completed(rq)) /* that was quick! */
93 			return 0;
94 
95 		/* Wait until the HW has acknowleged the submission (or err) */
96 		intel_engine_flush_submission(engine);
97 		if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
98 			return 0;
99 
100 		if (done)
101 			return -ETIME;
102 
103 		cond_resched();
104 	} while (1);
105 }
106 
107 static int wait_for_reset(struct intel_engine_cs *engine,
108 			  struct i915_request *rq,
109 			  unsigned long timeout)
110 {
111 	timeout += jiffies;
112 
113 	do {
114 		cond_resched();
115 		intel_engine_flush_submission(engine);
116 
117 		if (READ_ONCE(engine->execlists.pending[0]))
118 			continue;
119 
120 		if (i915_request_completed(rq))
121 			break;
122 
123 		if (READ_ONCE(rq->fence.error))
124 			break;
125 	} while (time_before(jiffies, timeout));
126 
127 	flush_scheduled_work();
128 
129 	if (rq->fence.error != -EIO) {
130 		pr_err("%s: hanging request %llx:%lld not reset\n",
131 		       engine->name,
132 		       rq->fence.context,
133 		       rq->fence.seqno);
134 		return -EINVAL;
135 	}
136 
137 	/* Give the request a jiffie to complete after flushing the worker */
138 	if (i915_request_wait(rq, 0,
139 			      max(0l, (long)(timeout - jiffies)) + 1) < 0) {
140 		pr_err("%s: hanging request %llx:%lld did not complete\n",
141 		       engine->name,
142 		       rq->fence.context,
143 		       rq->fence.seqno);
144 		return -ETIME;
145 	}
146 
147 	return 0;
148 }
149 
150 static int live_sanitycheck(void *arg)
151 {
152 	struct intel_gt *gt = arg;
153 	struct intel_engine_cs *engine;
154 	enum intel_engine_id id;
155 	struct igt_spinner spin;
156 	int err = 0;
157 
158 	if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
159 		return 0;
160 
161 	if (igt_spinner_init(&spin, gt))
162 		return -ENOMEM;
163 
164 	for_each_engine(engine, gt, id) {
165 		struct intel_context *ce;
166 		struct i915_request *rq;
167 
168 		ce = intel_context_create(engine);
169 		if (IS_ERR(ce)) {
170 			err = PTR_ERR(ce);
171 			break;
172 		}
173 
174 		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
175 		if (IS_ERR(rq)) {
176 			err = PTR_ERR(rq);
177 			goto out_ctx;
178 		}
179 
180 		i915_request_add(rq);
181 		if (!igt_wait_for_spinner(&spin, rq)) {
182 			GEM_TRACE("spinner failed to start\n");
183 			GEM_TRACE_DUMP();
184 			intel_gt_set_wedged(gt);
185 			err = -EIO;
186 			goto out_ctx;
187 		}
188 
189 		igt_spinner_end(&spin);
190 		if (igt_flush_test(gt->i915)) {
191 			err = -EIO;
192 			goto out_ctx;
193 		}
194 
195 out_ctx:
196 		intel_context_put(ce);
197 		if (err)
198 			break;
199 	}
200 
201 	igt_spinner_fini(&spin);
202 	return err;
203 }
204 
205 static int live_unlite_restore(struct intel_gt *gt, int prio)
206 {
207 	struct intel_engine_cs *engine;
208 	enum intel_engine_id id;
209 	struct igt_spinner spin;
210 	int err = -ENOMEM;
211 
212 	/*
213 	 * Check that we can correctly context switch between 2 instances
214 	 * on the same engine from the same parent context.
215 	 */
216 
217 	if (igt_spinner_init(&spin, gt))
218 		return err;
219 
220 	err = 0;
221 	for_each_engine(engine, gt, id) {
222 		struct intel_context *ce[2] = {};
223 		struct i915_request *rq[2];
224 		struct igt_live_test t;
225 		int n;
226 
227 		if (prio && !intel_engine_has_preemption(engine))
228 			continue;
229 
230 		if (!intel_engine_can_store_dword(engine))
231 			continue;
232 
233 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
234 			err = -EIO;
235 			break;
236 		}
237 		engine_heartbeat_disable(engine);
238 
239 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
240 			struct intel_context *tmp;
241 
242 			tmp = intel_context_create(engine);
243 			if (IS_ERR(tmp)) {
244 				err = PTR_ERR(tmp);
245 				goto err_ce;
246 			}
247 
248 			err = intel_context_pin(tmp);
249 			if (err) {
250 				intel_context_put(tmp);
251 				goto err_ce;
252 			}
253 
254 			/*
255 			 * Setup the pair of contexts such that if we
256 			 * lite-restore using the RING_TAIL from ce[1] it
257 			 * will execute garbage from ce[0]->ring.
258 			 */
259 			memset(tmp->ring->vaddr,
260 			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
261 			       tmp->ring->vma->size);
262 
263 			ce[n] = tmp;
264 		}
265 		GEM_BUG_ON(!ce[1]->ring->size);
266 		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
267 		__execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
268 
269 		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
270 		if (IS_ERR(rq[0])) {
271 			err = PTR_ERR(rq[0]);
272 			goto err_ce;
273 		}
274 
275 		i915_request_get(rq[0]);
276 		i915_request_add(rq[0]);
277 		GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
278 
279 		if (!igt_wait_for_spinner(&spin, rq[0])) {
280 			i915_request_put(rq[0]);
281 			goto err_ce;
282 		}
283 
284 		rq[1] = i915_request_create(ce[1]);
285 		if (IS_ERR(rq[1])) {
286 			err = PTR_ERR(rq[1]);
287 			i915_request_put(rq[0]);
288 			goto err_ce;
289 		}
290 
291 		if (!prio) {
292 			/*
293 			 * Ensure we do the switch to ce[1] on completion.
294 			 *
295 			 * rq[0] is already submitted, so this should reduce
296 			 * to a no-op (a wait on a request on the same engine
297 			 * uses the submit fence, not the completion fence),
298 			 * but it will install a dependency on rq[1] for rq[0]
299 			 * that will prevent the pair being reordered by
300 			 * timeslicing.
301 			 */
302 			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
303 		}
304 
305 		i915_request_get(rq[1]);
306 		i915_request_add(rq[1]);
307 		GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
308 		i915_request_put(rq[0]);
309 
310 		if (prio) {
311 			struct i915_sched_attr attr = {
312 				.priority = prio,
313 			};
314 
315 			/* Alternatively preempt the spinner with ce[1] */
316 			engine->schedule(rq[1], &attr);
317 		}
318 
319 		/* And switch back to ce[0] for good measure */
320 		rq[0] = i915_request_create(ce[0]);
321 		if (IS_ERR(rq[0])) {
322 			err = PTR_ERR(rq[0]);
323 			i915_request_put(rq[1]);
324 			goto err_ce;
325 		}
326 
327 		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
328 		i915_request_get(rq[0]);
329 		i915_request_add(rq[0]);
330 		GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
331 		i915_request_put(rq[1]);
332 		i915_request_put(rq[0]);
333 
334 err_ce:
335 		tasklet_kill(&engine->execlists.tasklet); /* flush submission */
336 		igt_spinner_end(&spin);
337 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
338 			if (IS_ERR_OR_NULL(ce[n]))
339 				break;
340 
341 			intel_context_unpin(ce[n]);
342 			intel_context_put(ce[n]);
343 		}
344 
345 		engine_heartbeat_enable(engine);
346 		if (igt_live_test_end(&t))
347 			err = -EIO;
348 		if (err)
349 			break;
350 	}
351 
352 	igt_spinner_fini(&spin);
353 	return err;
354 }
355 
356 static int live_unlite_switch(void *arg)
357 {
358 	return live_unlite_restore(arg, 0);
359 }
360 
361 static int live_unlite_preempt(void *arg)
362 {
363 	return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
364 }
365 
366 static int live_pin_rewind(void *arg)
367 {
368 	struct intel_gt *gt = arg;
369 	struct intel_engine_cs *engine;
370 	enum intel_engine_id id;
371 	int err = 0;
372 
373 	/*
374 	 * We have to be careful not to trust intel_ring too much, for example
375 	 * ring->head is updated upon retire which is out of sync with pinning
376 	 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
377 	 * or else we risk writing an older, stale value.
378 	 *
379 	 * To simulate this, let's apply a bit of deliberate sabotague.
380 	 */
381 
382 	for_each_engine(engine, gt, id) {
383 		struct intel_context *ce;
384 		struct i915_request *rq;
385 		struct intel_ring *ring;
386 		struct igt_live_test t;
387 
388 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
389 			err = -EIO;
390 			break;
391 		}
392 
393 		ce = intel_context_create(engine);
394 		if (IS_ERR(ce)) {
395 			err = PTR_ERR(ce);
396 			break;
397 		}
398 
399 		err = intel_context_pin(ce);
400 		if (err) {
401 			intel_context_put(ce);
402 			break;
403 		}
404 
405 		/* Keep the context awake while we play games */
406 		err = i915_active_acquire(&ce->active);
407 		if (err) {
408 			intel_context_unpin(ce);
409 			intel_context_put(ce);
410 			break;
411 		}
412 		ring = ce->ring;
413 
414 		/* Poison the ring, and offset the next request from HEAD */
415 		memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
416 		ring->emit = ring->size / 2;
417 		ring->tail = ring->emit;
418 		GEM_BUG_ON(ring->head);
419 
420 		intel_context_unpin(ce);
421 
422 		/* Submit a simple nop request */
423 		GEM_BUG_ON(intel_context_is_pinned(ce));
424 		rq = intel_context_create_request(ce);
425 		i915_active_release(&ce->active); /* e.g. async retire */
426 		intel_context_put(ce);
427 		if (IS_ERR(rq)) {
428 			err = PTR_ERR(rq);
429 			break;
430 		}
431 		GEM_BUG_ON(!rq->head);
432 		i915_request_add(rq);
433 
434 		/* Expect not to hang! */
435 		if (igt_live_test_end(&t)) {
436 			err = -EIO;
437 			break;
438 		}
439 	}
440 
441 	return err;
442 }
443 
444 static int live_hold_reset(void *arg)
445 {
446 	struct intel_gt *gt = arg;
447 	struct intel_engine_cs *engine;
448 	enum intel_engine_id id;
449 	struct igt_spinner spin;
450 	int err = 0;
451 
452 	/*
453 	 * In order to support offline error capture for fast preempt reset,
454 	 * we need to decouple the guilty request and ensure that it and its
455 	 * descendents are not executed while the capture is in progress.
456 	 */
457 
458 	if (!intel_has_reset_engine(gt))
459 		return 0;
460 
461 	if (igt_spinner_init(&spin, gt))
462 		return -ENOMEM;
463 
464 	for_each_engine(engine, gt, id) {
465 		struct intel_context *ce;
466 		struct i915_request *rq;
467 
468 		ce = intel_context_create(engine);
469 		if (IS_ERR(ce)) {
470 			err = PTR_ERR(ce);
471 			break;
472 		}
473 
474 		engine_heartbeat_disable(engine);
475 
476 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
477 		if (IS_ERR(rq)) {
478 			err = PTR_ERR(rq);
479 			goto out;
480 		}
481 		i915_request_add(rq);
482 
483 		if (!igt_wait_for_spinner(&spin, rq)) {
484 			intel_gt_set_wedged(gt);
485 			err = -ETIME;
486 			goto out;
487 		}
488 
489 		/* We have our request executing, now remove it and reset */
490 
491 		if (test_and_set_bit(I915_RESET_ENGINE + id,
492 				     &gt->reset.flags)) {
493 			intel_gt_set_wedged(gt);
494 			err = -EBUSY;
495 			goto out;
496 		}
497 		tasklet_disable(&engine->execlists.tasklet);
498 
499 		engine->execlists.tasklet.func(engine->execlists.tasklet.data);
500 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
501 
502 		i915_request_get(rq);
503 		execlists_hold(engine, rq);
504 		GEM_BUG_ON(!i915_request_on_hold(rq));
505 
506 		intel_engine_reset(engine, NULL);
507 		GEM_BUG_ON(rq->fence.error != -EIO);
508 
509 		tasklet_enable(&engine->execlists.tasklet);
510 		clear_and_wake_up_bit(I915_RESET_ENGINE + id,
511 				      &gt->reset.flags);
512 
513 		/* Check that we do not resubmit the held request */
514 		if (!i915_request_wait(rq, 0, HZ / 5)) {
515 			pr_err("%s: on hold request completed!\n",
516 			       engine->name);
517 			i915_request_put(rq);
518 			err = -EIO;
519 			goto out;
520 		}
521 		GEM_BUG_ON(!i915_request_on_hold(rq));
522 
523 		/* But is resubmitted on release */
524 		execlists_unhold(engine, rq);
525 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
526 			pr_err("%s: held request did not complete!\n",
527 			       engine->name);
528 			intel_gt_set_wedged(gt);
529 			err = -ETIME;
530 		}
531 		i915_request_put(rq);
532 
533 out:
534 		engine_heartbeat_enable(engine);
535 		intel_context_put(ce);
536 		if (err)
537 			break;
538 	}
539 
540 	igt_spinner_fini(&spin);
541 	return err;
542 }
543 
544 static const char *error_repr(int err)
545 {
546 	return err ? "bad" : "good";
547 }
548 
549 static int live_error_interrupt(void *arg)
550 {
551 	static const struct error_phase {
552 		enum { GOOD = 0, BAD = -EIO } error[2];
553 	} phases[] = {
554 		{ { BAD,  GOOD } },
555 		{ { BAD,  BAD  } },
556 		{ { BAD,  GOOD } },
557 		{ { GOOD, GOOD } }, /* sentinel */
558 	};
559 	struct intel_gt *gt = arg;
560 	struct intel_engine_cs *engine;
561 	enum intel_engine_id id;
562 
563 	/*
564 	 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
565 	 * of invalid commands in user batches that will cause a GPU hang.
566 	 * This is a faster mechanism than using hangcheck/heartbeats, but
567 	 * only detects problems the HW knows about -- it will not warn when
568 	 * we kill the HW!
569 	 *
570 	 * To verify our detection and reset, we throw some invalid commands
571 	 * at the HW and wait for the interrupt.
572 	 */
573 
574 	if (!intel_has_reset_engine(gt))
575 		return 0;
576 
577 	for_each_engine(engine, gt, id) {
578 		const struct error_phase *p;
579 		int err = 0;
580 
581 		engine_heartbeat_disable(engine);
582 
583 		for (p = phases; p->error[0] != GOOD; p++) {
584 			struct i915_request *client[ARRAY_SIZE(phases->error)];
585 			u32 *cs;
586 			int i;
587 
588 			memset(client, 0, sizeof(*client));
589 			for (i = 0; i < ARRAY_SIZE(client); i++) {
590 				struct intel_context *ce;
591 				struct i915_request *rq;
592 
593 				ce = intel_context_create(engine);
594 				if (IS_ERR(ce)) {
595 					err = PTR_ERR(ce);
596 					goto out;
597 				}
598 
599 				rq = intel_context_create_request(ce);
600 				intel_context_put(ce);
601 				if (IS_ERR(rq)) {
602 					err = PTR_ERR(rq);
603 					goto out;
604 				}
605 
606 				if (rq->engine->emit_init_breadcrumb) {
607 					err = rq->engine->emit_init_breadcrumb(rq);
608 					if (err) {
609 						i915_request_add(rq);
610 						goto out;
611 					}
612 				}
613 
614 				cs = intel_ring_begin(rq, 2);
615 				if (IS_ERR(cs)) {
616 					i915_request_add(rq);
617 					err = PTR_ERR(cs);
618 					goto out;
619 				}
620 
621 				if (p->error[i]) {
622 					*cs++ = 0xdeadbeef;
623 					*cs++ = 0xdeadbeef;
624 				} else {
625 					*cs++ = MI_NOOP;
626 					*cs++ = MI_NOOP;
627 				}
628 
629 				client[i] = i915_request_get(rq);
630 				i915_request_add(rq);
631 			}
632 
633 			err = wait_for_submit(engine, client[0], HZ / 2);
634 			if (err) {
635 				pr_err("%s: first request did not start within time!\n",
636 				       engine->name);
637 				err = -ETIME;
638 				goto out;
639 			}
640 
641 			for (i = 0; i < ARRAY_SIZE(client); i++) {
642 				if (i915_request_wait(client[i], 0, HZ / 5) < 0)
643 					pr_debug("%s: %s request incomplete!\n",
644 						 engine->name,
645 						 error_repr(p->error[i]));
646 
647 				if (!i915_request_started(client[i])) {
648 					pr_err("%s: %s request not started!\n",
649 					       engine->name,
650 					       error_repr(p->error[i]));
651 					err = -ETIME;
652 					goto out;
653 				}
654 
655 				/* Kick the tasklet to process the error */
656 				intel_engine_flush_submission(engine);
657 				if (client[i]->fence.error != p->error[i]) {
658 					pr_err("%s: %s request (%s) with wrong error code: %d\n",
659 					       engine->name,
660 					       error_repr(p->error[i]),
661 					       i915_request_completed(client[i]) ? "completed" : "running",
662 					       client[i]->fence.error);
663 					err = -EINVAL;
664 					goto out;
665 				}
666 			}
667 
668 out:
669 			for (i = 0; i < ARRAY_SIZE(client); i++)
670 				if (client[i])
671 					i915_request_put(client[i]);
672 			if (err) {
673 				pr_err("%s: failed at phase[%zd] { %d, %d }\n",
674 				       engine->name, p - phases,
675 				       p->error[0], p->error[1]);
676 				break;
677 			}
678 		}
679 
680 		engine_heartbeat_enable(engine);
681 		if (err) {
682 			intel_gt_set_wedged(gt);
683 			return err;
684 		}
685 	}
686 
687 	return 0;
688 }
689 
690 static int
691 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
692 {
693 	u32 *cs;
694 
695 	cs = intel_ring_begin(rq, 10);
696 	if (IS_ERR(cs))
697 		return PTR_ERR(cs);
698 
699 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
700 
701 	*cs++ = MI_SEMAPHORE_WAIT |
702 		MI_SEMAPHORE_GLOBAL_GTT |
703 		MI_SEMAPHORE_POLL |
704 		MI_SEMAPHORE_SAD_NEQ_SDD;
705 	*cs++ = 0;
706 	*cs++ = i915_ggtt_offset(vma) + 4 * idx;
707 	*cs++ = 0;
708 
709 	if (idx > 0) {
710 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
711 		*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
712 		*cs++ = 0;
713 		*cs++ = 1;
714 	} else {
715 		*cs++ = MI_NOOP;
716 		*cs++ = MI_NOOP;
717 		*cs++ = MI_NOOP;
718 		*cs++ = MI_NOOP;
719 	}
720 
721 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
722 
723 	intel_ring_advance(rq, cs);
724 	return 0;
725 }
726 
727 static struct i915_request *
728 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
729 {
730 	struct intel_context *ce;
731 	struct i915_request *rq;
732 	int err;
733 
734 	ce = intel_context_create(engine);
735 	if (IS_ERR(ce))
736 		return ERR_CAST(ce);
737 
738 	rq = intel_context_create_request(ce);
739 	if (IS_ERR(rq))
740 		goto out_ce;
741 
742 	err = 0;
743 	if (rq->engine->emit_init_breadcrumb)
744 		err = rq->engine->emit_init_breadcrumb(rq);
745 	if (err == 0)
746 		err = emit_semaphore_chain(rq, vma, idx);
747 	if (err == 0)
748 		i915_request_get(rq);
749 	i915_request_add(rq);
750 	if (err)
751 		rq = ERR_PTR(err);
752 
753 out_ce:
754 	intel_context_put(ce);
755 	return rq;
756 }
757 
758 static int
759 release_queue(struct intel_engine_cs *engine,
760 	      struct i915_vma *vma,
761 	      int idx, int prio)
762 {
763 	struct i915_sched_attr attr = {
764 		.priority = prio,
765 	};
766 	struct i915_request *rq;
767 	u32 *cs;
768 
769 	rq = intel_engine_create_kernel_request(engine);
770 	if (IS_ERR(rq))
771 		return PTR_ERR(rq);
772 
773 	cs = intel_ring_begin(rq, 4);
774 	if (IS_ERR(cs)) {
775 		i915_request_add(rq);
776 		return PTR_ERR(cs);
777 	}
778 
779 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
780 	*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
781 	*cs++ = 0;
782 	*cs++ = 1;
783 
784 	intel_ring_advance(rq, cs);
785 
786 	i915_request_get(rq);
787 	i915_request_add(rq);
788 
789 	local_bh_disable();
790 	engine->schedule(rq, &attr);
791 	local_bh_enable(); /* kick tasklet */
792 
793 	i915_request_put(rq);
794 
795 	return 0;
796 }
797 
798 static int
799 slice_semaphore_queue(struct intel_engine_cs *outer,
800 		      struct i915_vma *vma,
801 		      int count)
802 {
803 	struct intel_engine_cs *engine;
804 	struct i915_request *head;
805 	enum intel_engine_id id;
806 	int err, i, n = 0;
807 
808 	head = semaphore_queue(outer, vma, n++);
809 	if (IS_ERR(head))
810 		return PTR_ERR(head);
811 
812 	for_each_engine(engine, outer->gt, id) {
813 		for (i = 0; i < count; i++) {
814 			struct i915_request *rq;
815 
816 			rq = semaphore_queue(engine, vma, n++);
817 			if (IS_ERR(rq)) {
818 				err = PTR_ERR(rq);
819 				goto out;
820 			}
821 
822 			i915_request_put(rq);
823 		}
824 	}
825 
826 	err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
827 	if (err)
828 		goto out;
829 
830 	if (i915_request_wait(head, 0,
831 			      2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
832 		pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
833 		       count, n);
834 		GEM_TRACE_DUMP();
835 		intel_gt_set_wedged(outer->gt);
836 		err = -EIO;
837 	}
838 
839 out:
840 	i915_request_put(head);
841 	return err;
842 }
843 
844 static int live_timeslice_preempt(void *arg)
845 {
846 	struct intel_gt *gt = arg;
847 	struct drm_i915_gem_object *obj;
848 	struct i915_vma *vma;
849 	void *vaddr;
850 	int err = 0;
851 	int count;
852 
853 	/*
854 	 * If a request takes too long, we would like to give other users
855 	 * a fair go on the GPU. In particular, users may create batches
856 	 * that wait upon external input, where that input may even be
857 	 * supplied by another GPU job. To avoid blocking forever, we
858 	 * need to preempt the current task and replace it with another
859 	 * ready task.
860 	 */
861 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
862 		return 0;
863 
864 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
865 	if (IS_ERR(obj))
866 		return PTR_ERR(obj);
867 
868 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
869 	if (IS_ERR(vma)) {
870 		err = PTR_ERR(vma);
871 		goto err_obj;
872 	}
873 
874 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
875 	if (IS_ERR(vaddr)) {
876 		err = PTR_ERR(vaddr);
877 		goto err_obj;
878 	}
879 
880 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
881 	if (err)
882 		goto err_map;
883 
884 	err = i915_vma_sync(vma);
885 	if (err)
886 		goto err_pin;
887 
888 	for_each_prime_number_from(count, 1, 16) {
889 		struct intel_engine_cs *engine;
890 		enum intel_engine_id id;
891 
892 		for_each_engine(engine, gt, id) {
893 			if (!intel_engine_has_preemption(engine))
894 				continue;
895 
896 			memset(vaddr, 0, PAGE_SIZE);
897 
898 			engine_heartbeat_disable(engine);
899 			err = slice_semaphore_queue(engine, vma, count);
900 			engine_heartbeat_enable(engine);
901 			if (err)
902 				goto err_pin;
903 
904 			if (igt_flush_test(gt->i915)) {
905 				err = -EIO;
906 				goto err_pin;
907 			}
908 		}
909 	}
910 
911 err_pin:
912 	i915_vma_unpin(vma);
913 err_map:
914 	i915_gem_object_unpin_map(obj);
915 err_obj:
916 	i915_gem_object_put(obj);
917 	return err;
918 }
919 
920 static struct i915_request *
921 create_rewinder(struct intel_context *ce,
922 		struct i915_request *wait,
923 		void *slot, int idx)
924 {
925 	const u32 offset =
926 		i915_ggtt_offset(ce->engine->status_page.vma) +
927 		offset_in_page(slot);
928 	struct i915_request *rq;
929 	u32 *cs;
930 	int err;
931 
932 	rq = intel_context_create_request(ce);
933 	if (IS_ERR(rq))
934 		return rq;
935 
936 	if (wait) {
937 		err = i915_request_await_dma_fence(rq, &wait->fence);
938 		if (err)
939 			goto err;
940 	}
941 
942 	cs = intel_ring_begin(rq, 14);
943 	if (IS_ERR(cs)) {
944 		err = PTR_ERR(cs);
945 		goto err;
946 	}
947 
948 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
949 	*cs++ = MI_NOOP;
950 
951 	*cs++ = MI_SEMAPHORE_WAIT |
952 		MI_SEMAPHORE_GLOBAL_GTT |
953 		MI_SEMAPHORE_POLL |
954 		MI_SEMAPHORE_SAD_GTE_SDD;
955 	*cs++ = idx;
956 	*cs++ = offset;
957 	*cs++ = 0;
958 
959 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
960 	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
961 	*cs++ = offset + idx * sizeof(u32);
962 	*cs++ = 0;
963 
964 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
965 	*cs++ = offset;
966 	*cs++ = 0;
967 	*cs++ = idx + 1;
968 
969 	intel_ring_advance(rq, cs);
970 
971 	rq->sched.attr.priority = I915_PRIORITY_MASK;
972 	err = 0;
973 err:
974 	i915_request_get(rq);
975 	i915_request_add(rq);
976 	if (err) {
977 		i915_request_put(rq);
978 		return ERR_PTR(err);
979 	}
980 
981 	return rq;
982 }
983 
984 static int live_timeslice_rewind(void *arg)
985 {
986 	struct intel_gt *gt = arg;
987 	struct intel_engine_cs *engine;
988 	enum intel_engine_id id;
989 
990 	/*
991 	 * The usual presumption on timeslice expiration is that we replace
992 	 * the active context with another. However, given a chain of
993 	 * dependencies we may end up with replacing the context with itself,
994 	 * but only a few of those requests, forcing us to rewind the
995 	 * RING_TAIL of the original request.
996 	 */
997 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
998 		return 0;
999 
1000 	for_each_engine(engine, gt, id) {
1001 		enum { A1, A2, B1 };
1002 		enum { X = 1, Z, Y };
1003 		struct i915_request *rq[3] = {};
1004 		struct intel_context *ce;
1005 		unsigned long timeslice;
1006 		int i, err = 0;
1007 		u32 *slot;
1008 
1009 		if (!intel_engine_has_timeslices(engine))
1010 			continue;
1011 
1012 		/*
1013 		 * A:rq1 -- semaphore wait, timestamp X
1014 		 * A:rq2 -- write timestamp Y
1015 		 *
1016 		 * B:rq1 [await A:rq1] -- write timestamp Z
1017 		 *
1018 		 * Force timeslice, release semaphore.
1019 		 *
1020 		 * Expect execution/evaluation order XZY
1021 		 */
1022 
1023 		engine_heartbeat_disable(engine);
1024 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1025 
1026 		slot = memset32(engine->status_page.addr + 1000, 0, 4);
1027 
1028 		ce = intel_context_create(engine);
1029 		if (IS_ERR(ce)) {
1030 			err = PTR_ERR(ce);
1031 			goto err;
1032 		}
1033 
1034 		rq[0] = create_rewinder(ce, NULL, slot, X);
1035 		if (IS_ERR(rq[0])) {
1036 			intel_context_put(ce);
1037 			goto err;
1038 		}
1039 
1040 		rq[1] = create_rewinder(ce, NULL, slot, Y);
1041 		intel_context_put(ce);
1042 		if (IS_ERR(rq[1]))
1043 			goto err;
1044 
1045 		err = wait_for_submit(engine, rq[1], HZ / 2);
1046 		if (err) {
1047 			pr_err("%s: failed to submit first context\n",
1048 			       engine->name);
1049 			goto err;
1050 		}
1051 
1052 		ce = intel_context_create(engine);
1053 		if (IS_ERR(ce)) {
1054 			err = PTR_ERR(ce);
1055 			goto err;
1056 		}
1057 
1058 		rq[2] = create_rewinder(ce, rq[0], slot, Z);
1059 		intel_context_put(ce);
1060 		if (IS_ERR(rq[2]))
1061 			goto err;
1062 
1063 		err = wait_for_submit(engine, rq[2], HZ / 2);
1064 		if (err) {
1065 			pr_err("%s: failed to submit second context\n",
1066 			       engine->name);
1067 			goto err;
1068 		}
1069 
1070 		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1071 		if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
1072 			/* Wait for the timeslice to kick in */
1073 			del_timer(&engine->execlists.timer);
1074 			tasklet_hi_schedule(&engine->execlists.tasklet);
1075 			intel_engine_flush_submission(engine);
1076 		}
1077 		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1078 		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1079 		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1080 		GEM_BUG_ON(i915_request_is_active(rq[A2]));
1081 
1082 		/* Release the hounds! */
1083 		slot[0] = 1;
1084 		wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1085 
1086 		for (i = 1; i <= 3; i++) {
1087 			unsigned long timeout = jiffies + HZ / 2;
1088 
1089 			while (!READ_ONCE(slot[i]) &&
1090 			       time_before(jiffies, timeout))
1091 				;
1092 
1093 			if (!time_before(jiffies, timeout)) {
1094 				pr_err("%s: rq[%d] timed out\n",
1095 				       engine->name, i - 1);
1096 				err = -ETIME;
1097 				goto err;
1098 			}
1099 
1100 			pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1101 		}
1102 
1103 		/* XZY: XZ < XY */
1104 		if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1105 			pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1106 			       engine->name,
1107 			       slot[Z] - slot[X],
1108 			       slot[Y] - slot[X]);
1109 			err = -EINVAL;
1110 		}
1111 
1112 err:
1113 		memset32(&slot[0], -1, 4);
1114 		wmb();
1115 
1116 		engine->props.timeslice_duration_ms = timeslice;
1117 		engine_heartbeat_enable(engine);
1118 		for (i = 0; i < 3; i++)
1119 			i915_request_put(rq[i]);
1120 		if (igt_flush_test(gt->i915))
1121 			err = -EIO;
1122 		if (err)
1123 			return err;
1124 	}
1125 
1126 	return 0;
1127 }
1128 
1129 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1130 {
1131 	struct i915_request *rq;
1132 
1133 	rq = intel_engine_create_kernel_request(engine);
1134 	if (IS_ERR(rq))
1135 		return rq;
1136 
1137 	i915_request_get(rq);
1138 	i915_request_add(rq);
1139 
1140 	return rq;
1141 }
1142 
1143 static long timeslice_threshold(const struct intel_engine_cs *engine)
1144 {
1145 	return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
1146 }
1147 
1148 static int live_timeslice_queue(void *arg)
1149 {
1150 	struct intel_gt *gt = arg;
1151 	struct drm_i915_gem_object *obj;
1152 	struct intel_engine_cs *engine;
1153 	enum intel_engine_id id;
1154 	struct i915_vma *vma;
1155 	void *vaddr;
1156 	int err = 0;
1157 
1158 	/*
1159 	 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1160 	 * timeslicing between them disabled, we *do* enable timeslicing
1161 	 * if the queue demands it. (Normally, we do not submit if
1162 	 * ELSP[1] is already occupied, so must rely on timeslicing to
1163 	 * eject ELSP[0] in favour of the queue.)
1164 	 */
1165 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1166 		return 0;
1167 
1168 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1169 	if (IS_ERR(obj))
1170 		return PTR_ERR(obj);
1171 
1172 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1173 	if (IS_ERR(vma)) {
1174 		err = PTR_ERR(vma);
1175 		goto err_obj;
1176 	}
1177 
1178 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1179 	if (IS_ERR(vaddr)) {
1180 		err = PTR_ERR(vaddr);
1181 		goto err_obj;
1182 	}
1183 
1184 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1185 	if (err)
1186 		goto err_map;
1187 
1188 	err = i915_vma_sync(vma);
1189 	if (err)
1190 		goto err_pin;
1191 
1192 	for_each_engine(engine, gt, id) {
1193 		struct i915_sched_attr attr = {
1194 			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1195 		};
1196 		struct i915_request *rq, *nop;
1197 
1198 		if (!intel_engine_has_preemption(engine))
1199 			continue;
1200 
1201 		engine_heartbeat_disable(engine);
1202 		memset(vaddr, 0, PAGE_SIZE);
1203 
1204 		/* ELSP[0]: semaphore wait */
1205 		rq = semaphore_queue(engine, vma, 0);
1206 		if (IS_ERR(rq)) {
1207 			err = PTR_ERR(rq);
1208 			goto err_heartbeat;
1209 		}
1210 		engine->schedule(rq, &attr);
1211 		err = wait_for_submit(engine, rq, HZ / 2);
1212 		if (err) {
1213 			pr_err("%s: Timed out trying to submit semaphores\n",
1214 			       engine->name);
1215 			goto err_rq;
1216 		}
1217 
1218 		/* ELSP[1]: nop request */
1219 		nop = nop_request(engine);
1220 		if (IS_ERR(nop)) {
1221 			err = PTR_ERR(nop);
1222 			goto err_rq;
1223 		}
1224 		err = wait_for_submit(engine, nop, HZ / 2);
1225 		i915_request_put(nop);
1226 		if (err) {
1227 			pr_err("%s: Timed out trying to submit nop\n",
1228 			       engine->name);
1229 			goto err_rq;
1230 		}
1231 
1232 		GEM_BUG_ON(i915_request_completed(rq));
1233 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1234 
1235 		/* Queue: semaphore signal, matching priority as semaphore */
1236 		err = release_queue(engine, vma, 1, effective_prio(rq));
1237 		if (err)
1238 			goto err_rq;
1239 
1240 		/* Wait until we ack the release_queue and start timeslicing */
1241 		do {
1242 			cond_resched();
1243 			intel_engine_flush_submission(engine);
1244 		} while (READ_ONCE(engine->execlists.pending[0]));
1245 
1246 		if (!READ_ONCE(engine->execlists.timer.expires) &&
1247 		    execlists_active(&engine->execlists) == rq &&
1248 		    !i915_request_completed(rq)) {
1249 			struct drm_printer p =
1250 				drm_info_printer(gt->i915->drm.dev);
1251 
1252 			GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
1253 				      engine->name);
1254 			intel_engine_dump(engine, &p,
1255 					  "%s\n", engine->name);
1256 			GEM_TRACE_DUMP();
1257 
1258 			memset(vaddr, 0xff, PAGE_SIZE);
1259 			err = -EINVAL;
1260 		}
1261 
1262 		/* Timeslice every jiffy, so within 2 we should signal */
1263 		if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
1264 			struct drm_printer p =
1265 				drm_info_printer(gt->i915->drm.dev);
1266 
1267 			pr_err("%s: Failed to timeslice into queue\n",
1268 			       engine->name);
1269 			intel_engine_dump(engine, &p,
1270 					  "%s\n", engine->name);
1271 
1272 			memset(vaddr, 0xff, PAGE_SIZE);
1273 			err = -EIO;
1274 		}
1275 err_rq:
1276 		i915_request_put(rq);
1277 err_heartbeat:
1278 		engine_heartbeat_enable(engine);
1279 		if (err)
1280 			break;
1281 	}
1282 
1283 err_pin:
1284 	i915_vma_unpin(vma);
1285 err_map:
1286 	i915_gem_object_unpin_map(obj);
1287 err_obj:
1288 	i915_gem_object_put(obj);
1289 	return err;
1290 }
1291 
1292 static int live_timeslice_nopreempt(void *arg)
1293 {
1294 	struct intel_gt *gt = arg;
1295 	struct intel_engine_cs *engine;
1296 	enum intel_engine_id id;
1297 	struct igt_spinner spin;
1298 	int err = 0;
1299 
1300 	/*
1301 	 * We should not timeslice into a request that is marked with
1302 	 * I915_REQUEST_NOPREEMPT.
1303 	 */
1304 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1305 		return 0;
1306 
1307 	if (igt_spinner_init(&spin, gt))
1308 		return -ENOMEM;
1309 
1310 	for_each_engine(engine, gt, id) {
1311 		struct intel_context *ce;
1312 		struct i915_request *rq;
1313 		unsigned long timeslice;
1314 
1315 		if (!intel_engine_has_preemption(engine))
1316 			continue;
1317 
1318 		ce = intel_context_create(engine);
1319 		if (IS_ERR(ce)) {
1320 			err = PTR_ERR(ce);
1321 			break;
1322 		}
1323 
1324 		engine_heartbeat_disable(engine);
1325 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1326 
1327 		/* Create an unpreemptible spinner */
1328 
1329 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1330 		intel_context_put(ce);
1331 		if (IS_ERR(rq)) {
1332 			err = PTR_ERR(rq);
1333 			goto out_heartbeat;
1334 		}
1335 
1336 		i915_request_get(rq);
1337 		i915_request_add(rq);
1338 
1339 		if (!igt_wait_for_spinner(&spin, rq)) {
1340 			i915_request_put(rq);
1341 			err = -ETIME;
1342 			goto out_spin;
1343 		}
1344 
1345 		set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1346 		i915_request_put(rq);
1347 
1348 		/* Followed by a maximum priority barrier (heartbeat) */
1349 
1350 		ce = intel_context_create(engine);
1351 		if (IS_ERR(ce)) {
1352 			err = PTR_ERR(rq);
1353 			goto out_spin;
1354 		}
1355 
1356 		rq = intel_context_create_request(ce);
1357 		intel_context_put(ce);
1358 		if (IS_ERR(rq)) {
1359 			err = PTR_ERR(rq);
1360 			goto out_spin;
1361 		}
1362 
1363 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1364 		i915_request_get(rq);
1365 		i915_request_add(rq);
1366 
1367 		/*
1368 		 * Wait until the barrier is in ELSP, and we know timeslicing
1369 		 * will have been activated.
1370 		 */
1371 		if (wait_for_submit(engine, rq, HZ / 2)) {
1372 			i915_request_put(rq);
1373 			err = -ETIME;
1374 			goto out_spin;
1375 		}
1376 
1377 		/*
1378 		 * Since the ELSP[0] request is unpreemptible, it should not
1379 		 * allow the maximum priority barrier through. Wait long
1380 		 * enough to see if it is timesliced in by mistake.
1381 		 */
1382 		if (i915_request_wait(rq, 0, timeslice_threshold(engine)) >= 0) {
1383 			pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1384 			       engine->name);
1385 			err = -EINVAL;
1386 		}
1387 		i915_request_put(rq);
1388 
1389 out_spin:
1390 		igt_spinner_end(&spin);
1391 out_heartbeat:
1392 		xchg(&engine->props.timeslice_duration_ms, timeslice);
1393 		engine_heartbeat_enable(engine);
1394 		if (err)
1395 			break;
1396 
1397 		if (igt_flush_test(gt->i915)) {
1398 			err = -EIO;
1399 			break;
1400 		}
1401 	}
1402 
1403 	igt_spinner_fini(&spin);
1404 	return err;
1405 }
1406 
1407 static int live_busywait_preempt(void *arg)
1408 {
1409 	struct intel_gt *gt = arg;
1410 	struct i915_gem_context *ctx_hi, *ctx_lo;
1411 	struct intel_engine_cs *engine;
1412 	struct drm_i915_gem_object *obj;
1413 	struct i915_vma *vma;
1414 	enum intel_engine_id id;
1415 	int err = -ENOMEM;
1416 	u32 *map;
1417 
1418 	/*
1419 	 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1420 	 * preempt the busywaits used to synchronise between rings.
1421 	 */
1422 
1423 	ctx_hi = kernel_context(gt->i915);
1424 	if (!ctx_hi)
1425 		return -ENOMEM;
1426 	ctx_hi->sched.priority =
1427 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1428 
1429 	ctx_lo = kernel_context(gt->i915);
1430 	if (!ctx_lo)
1431 		goto err_ctx_hi;
1432 	ctx_lo->sched.priority =
1433 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1434 
1435 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1436 	if (IS_ERR(obj)) {
1437 		err = PTR_ERR(obj);
1438 		goto err_ctx_lo;
1439 	}
1440 
1441 	map = i915_gem_object_pin_map(obj, I915_MAP_WC);
1442 	if (IS_ERR(map)) {
1443 		err = PTR_ERR(map);
1444 		goto err_obj;
1445 	}
1446 
1447 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1448 	if (IS_ERR(vma)) {
1449 		err = PTR_ERR(vma);
1450 		goto err_map;
1451 	}
1452 
1453 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1454 	if (err)
1455 		goto err_map;
1456 
1457 	err = i915_vma_sync(vma);
1458 	if (err)
1459 		goto err_vma;
1460 
1461 	for_each_engine(engine, gt, id) {
1462 		struct i915_request *lo, *hi;
1463 		struct igt_live_test t;
1464 		u32 *cs;
1465 
1466 		if (!intel_engine_has_preemption(engine))
1467 			continue;
1468 
1469 		if (!intel_engine_can_store_dword(engine))
1470 			continue;
1471 
1472 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1473 			err = -EIO;
1474 			goto err_vma;
1475 		}
1476 
1477 		/*
1478 		 * We create two requests. The low priority request
1479 		 * busywaits on a semaphore (inside the ringbuffer where
1480 		 * is should be preemptible) and the high priority requests
1481 		 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1482 		 * allowing the first request to complete. If preemption
1483 		 * fails, we hang instead.
1484 		 */
1485 
1486 		lo = igt_request_alloc(ctx_lo, engine);
1487 		if (IS_ERR(lo)) {
1488 			err = PTR_ERR(lo);
1489 			goto err_vma;
1490 		}
1491 
1492 		cs = intel_ring_begin(lo, 8);
1493 		if (IS_ERR(cs)) {
1494 			err = PTR_ERR(cs);
1495 			i915_request_add(lo);
1496 			goto err_vma;
1497 		}
1498 
1499 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1500 		*cs++ = i915_ggtt_offset(vma);
1501 		*cs++ = 0;
1502 		*cs++ = 1;
1503 
1504 		/* XXX Do we need a flush + invalidate here? */
1505 
1506 		*cs++ = MI_SEMAPHORE_WAIT |
1507 			MI_SEMAPHORE_GLOBAL_GTT |
1508 			MI_SEMAPHORE_POLL |
1509 			MI_SEMAPHORE_SAD_EQ_SDD;
1510 		*cs++ = 0;
1511 		*cs++ = i915_ggtt_offset(vma);
1512 		*cs++ = 0;
1513 
1514 		intel_ring_advance(lo, cs);
1515 
1516 		i915_request_get(lo);
1517 		i915_request_add(lo);
1518 
1519 		if (wait_for(READ_ONCE(*map), 10)) {
1520 			i915_request_put(lo);
1521 			err = -ETIMEDOUT;
1522 			goto err_vma;
1523 		}
1524 
1525 		/* Low priority request should be busywaiting now */
1526 		if (i915_request_wait(lo, 0, 1) != -ETIME) {
1527 			i915_request_put(lo);
1528 			pr_err("%s: Busywaiting request did not!\n",
1529 			       engine->name);
1530 			err = -EIO;
1531 			goto err_vma;
1532 		}
1533 
1534 		hi = igt_request_alloc(ctx_hi, engine);
1535 		if (IS_ERR(hi)) {
1536 			err = PTR_ERR(hi);
1537 			i915_request_put(lo);
1538 			goto err_vma;
1539 		}
1540 
1541 		cs = intel_ring_begin(hi, 4);
1542 		if (IS_ERR(cs)) {
1543 			err = PTR_ERR(cs);
1544 			i915_request_add(hi);
1545 			i915_request_put(lo);
1546 			goto err_vma;
1547 		}
1548 
1549 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1550 		*cs++ = i915_ggtt_offset(vma);
1551 		*cs++ = 0;
1552 		*cs++ = 0;
1553 
1554 		intel_ring_advance(hi, cs);
1555 		i915_request_add(hi);
1556 
1557 		if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1558 			struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1559 
1560 			pr_err("%s: Failed to preempt semaphore busywait!\n",
1561 			       engine->name);
1562 
1563 			intel_engine_dump(engine, &p, "%s\n", engine->name);
1564 			GEM_TRACE_DUMP();
1565 
1566 			i915_request_put(lo);
1567 			intel_gt_set_wedged(gt);
1568 			err = -EIO;
1569 			goto err_vma;
1570 		}
1571 		GEM_BUG_ON(READ_ONCE(*map));
1572 		i915_request_put(lo);
1573 
1574 		if (igt_live_test_end(&t)) {
1575 			err = -EIO;
1576 			goto err_vma;
1577 		}
1578 	}
1579 
1580 	err = 0;
1581 err_vma:
1582 	i915_vma_unpin(vma);
1583 err_map:
1584 	i915_gem_object_unpin_map(obj);
1585 err_obj:
1586 	i915_gem_object_put(obj);
1587 err_ctx_lo:
1588 	kernel_context_close(ctx_lo);
1589 err_ctx_hi:
1590 	kernel_context_close(ctx_hi);
1591 	return err;
1592 }
1593 
1594 static struct i915_request *
1595 spinner_create_request(struct igt_spinner *spin,
1596 		       struct i915_gem_context *ctx,
1597 		       struct intel_engine_cs *engine,
1598 		       u32 arb)
1599 {
1600 	struct intel_context *ce;
1601 	struct i915_request *rq;
1602 
1603 	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1604 	if (IS_ERR(ce))
1605 		return ERR_CAST(ce);
1606 
1607 	rq = igt_spinner_create_request(spin, ce, arb);
1608 	intel_context_put(ce);
1609 	return rq;
1610 }
1611 
1612 static int live_preempt(void *arg)
1613 {
1614 	struct intel_gt *gt = arg;
1615 	struct i915_gem_context *ctx_hi, *ctx_lo;
1616 	struct igt_spinner spin_hi, spin_lo;
1617 	struct intel_engine_cs *engine;
1618 	enum intel_engine_id id;
1619 	int err = -ENOMEM;
1620 
1621 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1622 		return 0;
1623 
1624 	if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
1625 		pr_err("Logical preemption supported, but not exposed\n");
1626 
1627 	if (igt_spinner_init(&spin_hi, gt))
1628 		return -ENOMEM;
1629 
1630 	if (igt_spinner_init(&spin_lo, gt))
1631 		goto err_spin_hi;
1632 
1633 	ctx_hi = kernel_context(gt->i915);
1634 	if (!ctx_hi)
1635 		goto err_spin_lo;
1636 	ctx_hi->sched.priority =
1637 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1638 
1639 	ctx_lo = kernel_context(gt->i915);
1640 	if (!ctx_lo)
1641 		goto err_ctx_hi;
1642 	ctx_lo->sched.priority =
1643 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1644 
1645 	for_each_engine(engine, gt, id) {
1646 		struct igt_live_test t;
1647 		struct i915_request *rq;
1648 
1649 		if (!intel_engine_has_preemption(engine))
1650 			continue;
1651 
1652 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1653 			err = -EIO;
1654 			goto err_ctx_lo;
1655 		}
1656 
1657 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1658 					    MI_ARB_CHECK);
1659 		if (IS_ERR(rq)) {
1660 			err = PTR_ERR(rq);
1661 			goto err_ctx_lo;
1662 		}
1663 
1664 		i915_request_add(rq);
1665 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1666 			GEM_TRACE("lo spinner failed to start\n");
1667 			GEM_TRACE_DUMP();
1668 			intel_gt_set_wedged(gt);
1669 			err = -EIO;
1670 			goto err_ctx_lo;
1671 		}
1672 
1673 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1674 					    MI_ARB_CHECK);
1675 		if (IS_ERR(rq)) {
1676 			igt_spinner_end(&spin_lo);
1677 			err = PTR_ERR(rq);
1678 			goto err_ctx_lo;
1679 		}
1680 
1681 		i915_request_add(rq);
1682 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1683 			GEM_TRACE("hi spinner failed to start\n");
1684 			GEM_TRACE_DUMP();
1685 			intel_gt_set_wedged(gt);
1686 			err = -EIO;
1687 			goto err_ctx_lo;
1688 		}
1689 
1690 		igt_spinner_end(&spin_hi);
1691 		igt_spinner_end(&spin_lo);
1692 
1693 		if (igt_live_test_end(&t)) {
1694 			err = -EIO;
1695 			goto err_ctx_lo;
1696 		}
1697 	}
1698 
1699 	err = 0;
1700 err_ctx_lo:
1701 	kernel_context_close(ctx_lo);
1702 err_ctx_hi:
1703 	kernel_context_close(ctx_hi);
1704 err_spin_lo:
1705 	igt_spinner_fini(&spin_lo);
1706 err_spin_hi:
1707 	igt_spinner_fini(&spin_hi);
1708 	return err;
1709 }
1710 
1711 static int live_late_preempt(void *arg)
1712 {
1713 	struct intel_gt *gt = arg;
1714 	struct i915_gem_context *ctx_hi, *ctx_lo;
1715 	struct igt_spinner spin_hi, spin_lo;
1716 	struct intel_engine_cs *engine;
1717 	struct i915_sched_attr attr = {};
1718 	enum intel_engine_id id;
1719 	int err = -ENOMEM;
1720 
1721 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1722 		return 0;
1723 
1724 	if (igt_spinner_init(&spin_hi, gt))
1725 		return -ENOMEM;
1726 
1727 	if (igt_spinner_init(&spin_lo, gt))
1728 		goto err_spin_hi;
1729 
1730 	ctx_hi = kernel_context(gt->i915);
1731 	if (!ctx_hi)
1732 		goto err_spin_lo;
1733 
1734 	ctx_lo = kernel_context(gt->i915);
1735 	if (!ctx_lo)
1736 		goto err_ctx_hi;
1737 
1738 	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1739 	ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1740 
1741 	for_each_engine(engine, gt, id) {
1742 		struct igt_live_test t;
1743 		struct i915_request *rq;
1744 
1745 		if (!intel_engine_has_preemption(engine))
1746 			continue;
1747 
1748 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1749 			err = -EIO;
1750 			goto err_ctx_lo;
1751 		}
1752 
1753 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1754 					    MI_ARB_CHECK);
1755 		if (IS_ERR(rq)) {
1756 			err = PTR_ERR(rq);
1757 			goto err_ctx_lo;
1758 		}
1759 
1760 		i915_request_add(rq);
1761 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1762 			pr_err("First context failed to start\n");
1763 			goto err_wedged;
1764 		}
1765 
1766 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1767 					    MI_NOOP);
1768 		if (IS_ERR(rq)) {
1769 			igt_spinner_end(&spin_lo);
1770 			err = PTR_ERR(rq);
1771 			goto err_ctx_lo;
1772 		}
1773 
1774 		i915_request_add(rq);
1775 		if (igt_wait_for_spinner(&spin_hi, rq)) {
1776 			pr_err("Second context overtook first?\n");
1777 			goto err_wedged;
1778 		}
1779 
1780 		attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1781 		engine->schedule(rq, &attr);
1782 
1783 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1784 			pr_err("High priority context failed to preempt the low priority context\n");
1785 			GEM_TRACE_DUMP();
1786 			goto err_wedged;
1787 		}
1788 
1789 		igt_spinner_end(&spin_hi);
1790 		igt_spinner_end(&spin_lo);
1791 
1792 		if (igt_live_test_end(&t)) {
1793 			err = -EIO;
1794 			goto err_ctx_lo;
1795 		}
1796 	}
1797 
1798 	err = 0;
1799 err_ctx_lo:
1800 	kernel_context_close(ctx_lo);
1801 err_ctx_hi:
1802 	kernel_context_close(ctx_hi);
1803 err_spin_lo:
1804 	igt_spinner_fini(&spin_lo);
1805 err_spin_hi:
1806 	igt_spinner_fini(&spin_hi);
1807 	return err;
1808 
1809 err_wedged:
1810 	igt_spinner_end(&spin_hi);
1811 	igt_spinner_end(&spin_lo);
1812 	intel_gt_set_wedged(gt);
1813 	err = -EIO;
1814 	goto err_ctx_lo;
1815 }
1816 
1817 struct preempt_client {
1818 	struct igt_spinner spin;
1819 	struct i915_gem_context *ctx;
1820 };
1821 
1822 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1823 {
1824 	c->ctx = kernel_context(gt->i915);
1825 	if (!c->ctx)
1826 		return -ENOMEM;
1827 
1828 	if (igt_spinner_init(&c->spin, gt))
1829 		goto err_ctx;
1830 
1831 	return 0;
1832 
1833 err_ctx:
1834 	kernel_context_close(c->ctx);
1835 	return -ENOMEM;
1836 }
1837 
1838 static void preempt_client_fini(struct preempt_client *c)
1839 {
1840 	igt_spinner_fini(&c->spin);
1841 	kernel_context_close(c->ctx);
1842 }
1843 
1844 static int live_nopreempt(void *arg)
1845 {
1846 	struct intel_gt *gt = arg;
1847 	struct intel_engine_cs *engine;
1848 	struct preempt_client a, b;
1849 	enum intel_engine_id id;
1850 	int err = -ENOMEM;
1851 
1852 	/*
1853 	 * Verify that we can disable preemption for an individual request
1854 	 * that may be being observed and not want to be interrupted.
1855 	 */
1856 
1857 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1858 		return 0;
1859 
1860 	if (preempt_client_init(gt, &a))
1861 		return -ENOMEM;
1862 	if (preempt_client_init(gt, &b))
1863 		goto err_client_a;
1864 	b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1865 
1866 	for_each_engine(engine, gt, id) {
1867 		struct i915_request *rq_a, *rq_b;
1868 
1869 		if (!intel_engine_has_preemption(engine))
1870 			continue;
1871 
1872 		engine->execlists.preempt_hang.count = 0;
1873 
1874 		rq_a = spinner_create_request(&a.spin,
1875 					      a.ctx, engine,
1876 					      MI_ARB_CHECK);
1877 		if (IS_ERR(rq_a)) {
1878 			err = PTR_ERR(rq_a);
1879 			goto err_client_b;
1880 		}
1881 
1882 		/* Low priority client, but unpreemptable! */
1883 		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1884 
1885 		i915_request_add(rq_a);
1886 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1887 			pr_err("First client failed to start\n");
1888 			goto err_wedged;
1889 		}
1890 
1891 		rq_b = spinner_create_request(&b.spin,
1892 					      b.ctx, engine,
1893 					      MI_ARB_CHECK);
1894 		if (IS_ERR(rq_b)) {
1895 			err = PTR_ERR(rq_b);
1896 			goto err_client_b;
1897 		}
1898 
1899 		i915_request_add(rq_b);
1900 
1901 		/* B is much more important than A! (But A is unpreemptable.) */
1902 		GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1903 
1904 		/* Wait long enough for preemption and timeslicing */
1905 		if (igt_wait_for_spinner(&b.spin, rq_b)) {
1906 			pr_err("Second client started too early!\n");
1907 			goto err_wedged;
1908 		}
1909 
1910 		igt_spinner_end(&a.spin);
1911 
1912 		if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1913 			pr_err("Second client failed to start\n");
1914 			goto err_wedged;
1915 		}
1916 
1917 		igt_spinner_end(&b.spin);
1918 
1919 		if (engine->execlists.preempt_hang.count) {
1920 			pr_err("Preemption recorded x%d; should have been suppressed!\n",
1921 			       engine->execlists.preempt_hang.count);
1922 			err = -EINVAL;
1923 			goto err_wedged;
1924 		}
1925 
1926 		if (igt_flush_test(gt->i915))
1927 			goto err_wedged;
1928 	}
1929 
1930 	err = 0;
1931 err_client_b:
1932 	preempt_client_fini(&b);
1933 err_client_a:
1934 	preempt_client_fini(&a);
1935 	return err;
1936 
1937 err_wedged:
1938 	igt_spinner_end(&b.spin);
1939 	igt_spinner_end(&a.spin);
1940 	intel_gt_set_wedged(gt);
1941 	err = -EIO;
1942 	goto err_client_b;
1943 }
1944 
1945 struct live_preempt_cancel {
1946 	struct intel_engine_cs *engine;
1947 	struct preempt_client a, b;
1948 };
1949 
1950 static int __cancel_active0(struct live_preempt_cancel *arg)
1951 {
1952 	struct i915_request *rq;
1953 	struct igt_live_test t;
1954 	int err;
1955 
1956 	/* Preempt cancel of ELSP0 */
1957 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1958 	if (igt_live_test_begin(&t, arg->engine->i915,
1959 				__func__, arg->engine->name))
1960 		return -EIO;
1961 
1962 	rq = spinner_create_request(&arg->a.spin,
1963 				    arg->a.ctx, arg->engine,
1964 				    MI_ARB_CHECK);
1965 	if (IS_ERR(rq))
1966 		return PTR_ERR(rq);
1967 
1968 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
1969 	i915_request_get(rq);
1970 	i915_request_add(rq);
1971 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1972 		err = -EIO;
1973 		goto out;
1974 	}
1975 
1976 	intel_context_set_banned(rq->context);
1977 	err = intel_engine_pulse(arg->engine);
1978 	if (err)
1979 		goto out;
1980 
1981 	err = wait_for_reset(arg->engine, rq, HZ / 2);
1982 	if (err) {
1983 		pr_err("Cancelled inflight0 request did not reset\n");
1984 		goto out;
1985 	}
1986 
1987 out:
1988 	i915_request_put(rq);
1989 	if (igt_live_test_end(&t))
1990 		err = -EIO;
1991 	return err;
1992 }
1993 
1994 static int __cancel_active1(struct live_preempt_cancel *arg)
1995 {
1996 	struct i915_request *rq[2] = {};
1997 	struct igt_live_test t;
1998 	int err;
1999 
2000 	/* Preempt cancel of ELSP1 */
2001 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2002 	if (igt_live_test_begin(&t, arg->engine->i915,
2003 				__func__, arg->engine->name))
2004 		return -EIO;
2005 
2006 	rq[0] = spinner_create_request(&arg->a.spin,
2007 				       arg->a.ctx, arg->engine,
2008 				       MI_NOOP); /* no preemption */
2009 	if (IS_ERR(rq[0]))
2010 		return PTR_ERR(rq[0]);
2011 
2012 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2013 	i915_request_get(rq[0]);
2014 	i915_request_add(rq[0]);
2015 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2016 		err = -EIO;
2017 		goto out;
2018 	}
2019 
2020 	rq[1] = spinner_create_request(&arg->b.spin,
2021 				       arg->b.ctx, arg->engine,
2022 				       MI_ARB_CHECK);
2023 	if (IS_ERR(rq[1])) {
2024 		err = PTR_ERR(rq[1]);
2025 		goto out;
2026 	}
2027 
2028 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2029 	i915_request_get(rq[1]);
2030 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2031 	i915_request_add(rq[1]);
2032 	if (err)
2033 		goto out;
2034 
2035 	intel_context_set_banned(rq[1]->context);
2036 	err = intel_engine_pulse(arg->engine);
2037 	if (err)
2038 		goto out;
2039 
2040 	igt_spinner_end(&arg->a.spin);
2041 	err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2042 	if (err)
2043 		goto out;
2044 
2045 	if (rq[0]->fence.error != 0) {
2046 		pr_err("Normal inflight0 request did not complete\n");
2047 		err = -EINVAL;
2048 		goto out;
2049 	}
2050 
2051 	if (rq[1]->fence.error != -EIO) {
2052 		pr_err("Cancelled inflight1 request did not report -EIO\n");
2053 		err = -EINVAL;
2054 		goto out;
2055 	}
2056 
2057 out:
2058 	i915_request_put(rq[1]);
2059 	i915_request_put(rq[0]);
2060 	if (igt_live_test_end(&t))
2061 		err = -EIO;
2062 	return err;
2063 }
2064 
2065 static int __cancel_queued(struct live_preempt_cancel *arg)
2066 {
2067 	struct i915_request *rq[3] = {};
2068 	struct igt_live_test t;
2069 	int err;
2070 
2071 	/* Full ELSP and one in the wings */
2072 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2073 	if (igt_live_test_begin(&t, arg->engine->i915,
2074 				__func__, arg->engine->name))
2075 		return -EIO;
2076 
2077 	rq[0] = spinner_create_request(&arg->a.spin,
2078 				       arg->a.ctx, arg->engine,
2079 				       MI_ARB_CHECK);
2080 	if (IS_ERR(rq[0]))
2081 		return PTR_ERR(rq[0]);
2082 
2083 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2084 	i915_request_get(rq[0]);
2085 	i915_request_add(rq[0]);
2086 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2087 		err = -EIO;
2088 		goto out;
2089 	}
2090 
2091 	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2092 	if (IS_ERR(rq[1])) {
2093 		err = PTR_ERR(rq[1]);
2094 		goto out;
2095 	}
2096 
2097 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2098 	i915_request_get(rq[1]);
2099 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2100 	i915_request_add(rq[1]);
2101 	if (err)
2102 		goto out;
2103 
2104 	rq[2] = spinner_create_request(&arg->b.spin,
2105 				       arg->a.ctx, arg->engine,
2106 				       MI_ARB_CHECK);
2107 	if (IS_ERR(rq[2])) {
2108 		err = PTR_ERR(rq[2]);
2109 		goto out;
2110 	}
2111 
2112 	i915_request_get(rq[2]);
2113 	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2114 	i915_request_add(rq[2]);
2115 	if (err)
2116 		goto out;
2117 
2118 	intel_context_set_banned(rq[2]->context);
2119 	err = intel_engine_pulse(arg->engine);
2120 	if (err)
2121 		goto out;
2122 
2123 	err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2124 	if (err)
2125 		goto out;
2126 
2127 	if (rq[0]->fence.error != -EIO) {
2128 		pr_err("Cancelled inflight0 request did not report -EIO\n");
2129 		err = -EINVAL;
2130 		goto out;
2131 	}
2132 
2133 	if (rq[1]->fence.error != 0) {
2134 		pr_err("Normal inflight1 request did not complete\n");
2135 		err = -EINVAL;
2136 		goto out;
2137 	}
2138 
2139 	if (rq[2]->fence.error != -EIO) {
2140 		pr_err("Cancelled queued request did not report -EIO\n");
2141 		err = -EINVAL;
2142 		goto out;
2143 	}
2144 
2145 out:
2146 	i915_request_put(rq[2]);
2147 	i915_request_put(rq[1]);
2148 	i915_request_put(rq[0]);
2149 	if (igt_live_test_end(&t))
2150 		err = -EIO;
2151 	return err;
2152 }
2153 
2154 static int __cancel_hostile(struct live_preempt_cancel *arg)
2155 {
2156 	struct i915_request *rq;
2157 	int err;
2158 
2159 	/* Preempt cancel non-preemptible spinner in ELSP0 */
2160 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2161 		return 0;
2162 
2163 	if (!intel_has_reset_engine(arg->engine->gt))
2164 		return 0;
2165 
2166 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2167 	rq = spinner_create_request(&arg->a.spin,
2168 				    arg->a.ctx, arg->engine,
2169 				    MI_NOOP); /* preemption disabled */
2170 	if (IS_ERR(rq))
2171 		return PTR_ERR(rq);
2172 
2173 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2174 	i915_request_get(rq);
2175 	i915_request_add(rq);
2176 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2177 		err = -EIO;
2178 		goto out;
2179 	}
2180 
2181 	intel_context_set_banned(rq->context);
2182 	err = intel_engine_pulse(arg->engine); /* force reset */
2183 	if (err)
2184 		goto out;
2185 
2186 	err = wait_for_reset(arg->engine, rq, HZ / 2);
2187 	if (err) {
2188 		pr_err("Cancelled inflight0 request did not reset\n");
2189 		goto out;
2190 	}
2191 
2192 out:
2193 	i915_request_put(rq);
2194 	if (igt_flush_test(arg->engine->i915))
2195 		err = -EIO;
2196 	return err;
2197 }
2198 
2199 static int live_preempt_cancel(void *arg)
2200 {
2201 	struct intel_gt *gt = arg;
2202 	struct live_preempt_cancel data;
2203 	enum intel_engine_id id;
2204 	int err = -ENOMEM;
2205 
2206 	/*
2207 	 * To cancel an inflight context, we need to first remove it from the
2208 	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2209 	 */
2210 
2211 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2212 		return 0;
2213 
2214 	if (preempt_client_init(gt, &data.a))
2215 		return -ENOMEM;
2216 	if (preempt_client_init(gt, &data.b))
2217 		goto err_client_a;
2218 
2219 	for_each_engine(data.engine, gt, id) {
2220 		if (!intel_engine_has_preemption(data.engine))
2221 			continue;
2222 
2223 		err = __cancel_active0(&data);
2224 		if (err)
2225 			goto err_wedged;
2226 
2227 		err = __cancel_active1(&data);
2228 		if (err)
2229 			goto err_wedged;
2230 
2231 		err = __cancel_queued(&data);
2232 		if (err)
2233 			goto err_wedged;
2234 
2235 		err = __cancel_hostile(&data);
2236 		if (err)
2237 			goto err_wedged;
2238 	}
2239 
2240 	err = 0;
2241 err_client_b:
2242 	preempt_client_fini(&data.b);
2243 err_client_a:
2244 	preempt_client_fini(&data.a);
2245 	return err;
2246 
2247 err_wedged:
2248 	GEM_TRACE_DUMP();
2249 	igt_spinner_end(&data.b.spin);
2250 	igt_spinner_end(&data.a.spin);
2251 	intel_gt_set_wedged(gt);
2252 	goto err_client_b;
2253 }
2254 
2255 static int live_suppress_self_preempt(void *arg)
2256 {
2257 	struct intel_gt *gt = arg;
2258 	struct intel_engine_cs *engine;
2259 	struct i915_sched_attr attr = {
2260 		.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
2261 	};
2262 	struct preempt_client a, b;
2263 	enum intel_engine_id id;
2264 	int err = -ENOMEM;
2265 
2266 	/*
2267 	 * Verify that if a preemption request does not cause a change in
2268 	 * the current execution order, the preempt-to-idle injection is
2269 	 * skipped and that we do not accidentally apply it after the CS
2270 	 * completion event.
2271 	 */
2272 
2273 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2274 		return 0;
2275 
2276 	if (intel_uc_uses_guc_submission(&gt->uc))
2277 		return 0; /* presume black blox */
2278 
2279 	if (intel_vgpu_active(gt->i915))
2280 		return 0; /* GVT forces single port & request submission */
2281 
2282 	if (preempt_client_init(gt, &a))
2283 		return -ENOMEM;
2284 	if (preempt_client_init(gt, &b))
2285 		goto err_client_a;
2286 
2287 	for_each_engine(engine, gt, id) {
2288 		struct i915_request *rq_a, *rq_b;
2289 		int depth;
2290 
2291 		if (!intel_engine_has_preemption(engine))
2292 			continue;
2293 
2294 		if (igt_flush_test(gt->i915))
2295 			goto err_wedged;
2296 
2297 		intel_engine_pm_get(engine);
2298 		engine->execlists.preempt_hang.count = 0;
2299 
2300 		rq_a = spinner_create_request(&a.spin,
2301 					      a.ctx, engine,
2302 					      MI_NOOP);
2303 		if (IS_ERR(rq_a)) {
2304 			err = PTR_ERR(rq_a);
2305 			intel_engine_pm_put(engine);
2306 			goto err_client_b;
2307 		}
2308 
2309 		i915_request_add(rq_a);
2310 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2311 			pr_err("First client failed to start\n");
2312 			intel_engine_pm_put(engine);
2313 			goto err_wedged;
2314 		}
2315 
2316 		/* Keep postponing the timer to avoid premature slicing */
2317 		mod_timer(&engine->execlists.timer, jiffies + HZ);
2318 		for (depth = 0; depth < 8; depth++) {
2319 			rq_b = spinner_create_request(&b.spin,
2320 						      b.ctx, engine,
2321 						      MI_NOOP);
2322 			if (IS_ERR(rq_b)) {
2323 				err = PTR_ERR(rq_b);
2324 				intel_engine_pm_put(engine);
2325 				goto err_client_b;
2326 			}
2327 			i915_request_add(rq_b);
2328 
2329 			GEM_BUG_ON(i915_request_completed(rq_a));
2330 			engine->schedule(rq_a, &attr);
2331 			igt_spinner_end(&a.spin);
2332 
2333 			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2334 				pr_err("Second client failed to start\n");
2335 				intel_engine_pm_put(engine);
2336 				goto err_wedged;
2337 			}
2338 
2339 			swap(a, b);
2340 			rq_a = rq_b;
2341 		}
2342 		igt_spinner_end(&a.spin);
2343 
2344 		if (engine->execlists.preempt_hang.count) {
2345 			pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2346 			       engine->name,
2347 			       engine->execlists.preempt_hang.count,
2348 			       depth);
2349 			intel_engine_pm_put(engine);
2350 			err = -EINVAL;
2351 			goto err_client_b;
2352 		}
2353 
2354 		intel_engine_pm_put(engine);
2355 		if (igt_flush_test(gt->i915))
2356 			goto err_wedged;
2357 	}
2358 
2359 	err = 0;
2360 err_client_b:
2361 	preempt_client_fini(&b);
2362 err_client_a:
2363 	preempt_client_fini(&a);
2364 	return err;
2365 
2366 err_wedged:
2367 	igt_spinner_end(&b.spin);
2368 	igt_spinner_end(&a.spin);
2369 	intel_gt_set_wedged(gt);
2370 	err = -EIO;
2371 	goto err_client_b;
2372 }
2373 
2374 static int __i915_sw_fence_call
2375 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
2376 {
2377 	return NOTIFY_DONE;
2378 }
2379 
2380 static struct i915_request *dummy_request(struct intel_engine_cs *engine)
2381 {
2382 	struct i915_request *rq;
2383 
2384 	rq = kzalloc(sizeof(*rq), GFP_KERNEL);
2385 	if (!rq)
2386 		return NULL;
2387 
2388 	rq->engine = engine;
2389 
2390 	spin_lock_init(&rq->lock);
2391 	INIT_LIST_HEAD(&rq->fence.cb_list);
2392 	rq->fence.lock = &rq->lock;
2393 	rq->fence.ops = &i915_fence_ops;
2394 
2395 	i915_sched_node_init(&rq->sched);
2396 
2397 	/* mark this request as permanently incomplete */
2398 	rq->fence.seqno = 1;
2399 	BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
2400 	rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
2401 	GEM_BUG_ON(i915_request_completed(rq));
2402 
2403 	i915_sw_fence_init(&rq->submit, dummy_notify);
2404 	set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
2405 
2406 	spin_lock_init(&rq->lock);
2407 	rq->fence.lock = &rq->lock;
2408 	INIT_LIST_HEAD(&rq->fence.cb_list);
2409 
2410 	return rq;
2411 }
2412 
2413 static void dummy_request_free(struct i915_request *dummy)
2414 {
2415 	/* We have to fake the CS interrupt to kick the next request */
2416 	i915_sw_fence_commit(&dummy->submit);
2417 
2418 	i915_request_mark_complete(dummy);
2419 	dma_fence_signal(&dummy->fence);
2420 
2421 	i915_sched_node_fini(&dummy->sched);
2422 	i915_sw_fence_fini(&dummy->submit);
2423 
2424 	dma_fence_free(&dummy->fence);
2425 }
2426 
2427 static int live_suppress_wait_preempt(void *arg)
2428 {
2429 	struct intel_gt *gt = arg;
2430 	struct preempt_client client[4];
2431 	struct i915_request *rq[ARRAY_SIZE(client)] = {};
2432 	struct intel_engine_cs *engine;
2433 	enum intel_engine_id id;
2434 	int err = -ENOMEM;
2435 	int i;
2436 
2437 	/*
2438 	 * Waiters are given a little priority nudge, but not enough
2439 	 * to actually cause any preemption. Double check that we do
2440 	 * not needlessly generate preempt-to-idle cycles.
2441 	 */
2442 
2443 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2444 		return 0;
2445 
2446 	if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
2447 		return -ENOMEM;
2448 	if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
2449 		goto err_client_0;
2450 	if (preempt_client_init(gt, &client[2])) /* head of queue */
2451 		goto err_client_1;
2452 	if (preempt_client_init(gt, &client[3])) /* bystander */
2453 		goto err_client_2;
2454 
2455 	for_each_engine(engine, gt, id) {
2456 		int depth;
2457 
2458 		if (!intel_engine_has_preemption(engine))
2459 			continue;
2460 
2461 		if (!engine->emit_init_breadcrumb)
2462 			continue;
2463 
2464 		for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
2465 			struct i915_request *dummy;
2466 
2467 			engine->execlists.preempt_hang.count = 0;
2468 
2469 			dummy = dummy_request(engine);
2470 			if (!dummy)
2471 				goto err_client_3;
2472 
2473 			for (i = 0; i < ARRAY_SIZE(client); i++) {
2474 				struct i915_request *this;
2475 
2476 				this = spinner_create_request(&client[i].spin,
2477 							      client[i].ctx, engine,
2478 							      MI_NOOP);
2479 				if (IS_ERR(this)) {
2480 					err = PTR_ERR(this);
2481 					goto err_wedged;
2482 				}
2483 
2484 				/* Disable NEWCLIENT promotion */
2485 				__i915_active_fence_set(&i915_request_timeline(this)->last_request,
2486 							&dummy->fence);
2487 
2488 				rq[i] = i915_request_get(this);
2489 				i915_request_add(this);
2490 			}
2491 
2492 			dummy_request_free(dummy);
2493 
2494 			GEM_BUG_ON(i915_request_completed(rq[0]));
2495 			if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
2496 				pr_err("%s: First client failed to start\n",
2497 				       engine->name);
2498 				goto err_wedged;
2499 			}
2500 			GEM_BUG_ON(!i915_request_started(rq[0]));
2501 
2502 			if (i915_request_wait(rq[depth],
2503 					      I915_WAIT_PRIORITY,
2504 					      1) != -ETIME) {
2505 				pr_err("%s: Waiter depth:%d completed!\n",
2506 				       engine->name, depth);
2507 				goto err_wedged;
2508 			}
2509 
2510 			for (i = 0; i < ARRAY_SIZE(client); i++) {
2511 				igt_spinner_end(&client[i].spin);
2512 				i915_request_put(rq[i]);
2513 				rq[i] = NULL;
2514 			}
2515 
2516 			if (igt_flush_test(gt->i915))
2517 				goto err_wedged;
2518 
2519 			if (engine->execlists.preempt_hang.count) {
2520 				pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
2521 				       engine->name,
2522 				       engine->execlists.preempt_hang.count,
2523 				       depth);
2524 				err = -EINVAL;
2525 				goto err_client_3;
2526 			}
2527 		}
2528 	}
2529 
2530 	err = 0;
2531 err_client_3:
2532 	preempt_client_fini(&client[3]);
2533 err_client_2:
2534 	preempt_client_fini(&client[2]);
2535 err_client_1:
2536 	preempt_client_fini(&client[1]);
2537 err_client_0:
2538 	preempt_client_fini(&client[0]);
2539 	return err;
2540 
2541 err_wedged:
2542 	for (i = 0; i < ARRAY_SIZE(client); i++) {
2543 		igt_spinner_end(&client[i].spin);
2544 		i915_request_put(rq[i]);
2545 	}
2546 	intel_gt_set_wedged(gt);
2547 	err = -EIO;
2548 	goto err_client_3;
2549 }
2550 
2551 static int live_chain_preempt(void *arg)
2552 {
2553 	struct intel_gt *gt = arg;
2554 	struct intel_engine_cs *engine;
2555 	struct preempt_client hi, lo;
2556 	enum intel_engine_id id;
2557 	int err = -ENOMEM;
2558 
2559 	/*
2560 	 * Build a chain AB...BA between two contexts (A, B) and request
2561 	 * preemption of the last request. It should then complete before
2562 	 * the previously submitted spinner in B.
2563 	 */
2564 
2565 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2566 		return 0;
2567 
2568 	if (preempt_client_init(gt, &hi))
2569 		return -ENOMEM;
2570 
2571 	if (preempt_client_init(gt, &lo))
2572 		goto err_client_hi;
2573 
2574 	for_each_engine(engine, gt, id) {
2575 		struct i915_sched_attr attr = {
2576 			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
2577 		};
2578 		struct igt_live_test t;
2579 		struct i915_request *rq;
2580 		int ring_size, count, i;
2581 
2582 		if (!intel_engine_has_preemption(engine))
2583 			continue;
2584 
2585 		rq = spinner_create_request(&lo.spin,
2586 					    lo.ctx, engine,
2587 					    MI_ARB_CHECK);
2588 		if (IS_ERR(rq))
2589 			goto err_wedged;
2590 
2591 		i915_request_get(rq);
2592 		i915_request_add(rq);
2593 
2594 		ring_size = rq->wa_tail - rq->head;
2595 		if (ring_size < 0)
2596 			ring_size += rq->ring->size;
2597 		ring_size = rq->ring->size / ring_size;
2598 		pr_debug("%s(%s): Using maximum of %d requests\n",
2599 			 __func__, engine->name, ring_size);
2600 
2601 		igt_spinner_end(&lo.spin);
2602 		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2603 			pr_err("Timed out waiting to flush %s\n", engine->name);
2604 			i915_request_put(rq);
2605 			goto err_wedged;
2606 		}
2607 		i915_request_put(rq);
2608 
2609 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2610 			err = -EIO;
2611 			goto err_wedged;
2612 		}
2613 
2614 		for_each_prime_number_from(count, 1, ring_size) {
2615 			rq = spinner_create_request(&hi.spin,
2616 						    hi.ctx, engine,
2617 						    MI_ARB_CHECK);
2618 			if (IS_ERR(rq))
2619 				goto err_wedged;
2620 			i915_request_add(rq);
2621 			if (!igt_wait_for_spinner(&hi.spin, rq))
2622 				goto err_wedged;
2623 
2624 			rq = spinner_create_request(&lo.spin,
2625 						    lo.ctx, engine,
2626 						    MI_ARB_CHECK);
2627 			if (IS_ERR(rq))
2628 				goto err_wedged;
2629 			i915_request_add(rq);
2630 
2631 			for (i = 0; i < count; i++) {
2632 				rq = igt_request_alloc(lo.ctx, engine);
2633 				if (IS_ERR(rq))
2634 					goto err_wedged;
2635 				i915_request_add(rq);
2636 			}
2637 
2638 			rq = igt_request_alloc(hi.ctx, engine);
2639 			if (IS_ERR(rq))
2640 				goto err_wedged;
2641 
2642 			i915_request_get(rq);
2643 			i915_request_add(rq);
2644 			engine->schedule(rq, &attr);
2645 
2646 			igt_spinner_end(&hi.spin);
2647 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2648 				struct drm_printer p =
2649 					drm_info_printer(gt->i915->drm.dev);
2650 
2651 				pr_err("Failed to preempt over chain of %d\n",
2652 				       count);
2653 				intel_engine_dump(engine, &p,
2654 						  "%s\n", engine->name);
2655 				i915_request_put(rq);
2656 				goto err_wedged;
2657 			}
2658 			igt_spinner_end(&lo.spin);
2659 			i915_request_put(rq);
2660 
2661 			rq = igt_request_alloc(lo.ctx, engine);
2662 			if (IS_ERR(rq))
2663 				goto err_wedged;
2664 
2665 			i915_request_get(rq);
2666 			i915_request_add(rq);
2667 
2668 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2669 				struct drm_printer p =
2670 					drm_info_printer(gt->i915->drm.dev);
2671 
2672 				pr_err("Failed to flush low priority chain of %d requests\n",
2673 				       count);
2674 				intel_engine_dump(engine, &p,
2675 						  "%s\n", engine->name);
2676 
2677 				i915_request_put(rq);
2678 				goto err_wedged;
2679 			}
2680 			i915_request_put(rq);
2681 		}
2682 
2683 		if (igt_live_test_end(&t)) {
2684 			err = -EIO;
2685 			goto err_wedged;
2686 		}
2687 	}
2688 
2689 	err = 0;
2690 err_client_lo:
2691 	preempt_client_fini(&lo);
2692 err_client_hi:
2693 	preempt_client_fini(&hi);
2694 	return err;
2695 
2696 err_wedged:
2697 	igt_spinner_end(&hi.spin);
2698 	igt_spinner_end(&lo.spin);
2699 	intel_gt_set_wedged(gt);
2700 	err = -EIO;
2701 	goto err_client_lo;
2702 }
2703 
2704 static int create_gang(struct intel_engine_cs *engine,
2705 		       struct i915_request **prev)
2706 {
2707 	struct drm_i915_gem_object *obj;
2708 	struct intel_context *ce;
2709 	struct i915_request *rq;
2710 	struct i915_vma *vma;
2711 	u32 *cs;
2712 	int err;
2713 
2714 	ce = intel_context_create(engine);
2715 	if (IS_ERR(ce))
2716 		return PTR_ERR(ce);
2717 
2718 	obj = i915_gem_object_create_internal(engine->i915, 4096);
2719 	if (IS_ERR(obj)) {
2720 		err = PTR_ERR(obj);
2721 		goto err_ce;
2722 	}
2723 
2724 	vma = i915_vma_instance(obj, ce->vm, NULL);
2725 	if (IS_ERR(vma)) {
2726 		err = PTR_ERR(vma);
2727 		goto err_obj;
2728 	}
2729 
2730 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
2731 	if (err)
2732 		goto err_obj;
2733 
2734 	cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2735 	if (IS_ERR(cs))
2736 		goto err_obj;
2737 
2738 	/* Semaphore target: spin until zero */
2739 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2740 
2741 	*cs++ = MI_SEMAPHORE_WAIT |
2742 		MI_SEMAPHORE_POLL |
2743 		MI_SEMAPHORE_SAD_EQ_SDD;
2744 	*cs++ = 0;
2745 	*cs++ = lower_32_bits(vma->node.start);
2746 	*cs++ = upper_32_bits(vma->node.start);
2747 
2748 	if (*prev) {
2749 		u64 offset = (*prev)->batch->node.start;
2750 
2751 		/* Terminate the spinner in the next lower priority batch. */
2752 		*cs++ = MI_STORE_DWORD_IMM_GEN4;
2753 		*cs++ = lower_32_bits(offset);
2754 		*cs++ = upper_32_bits(offset);
2755 		*cs++ = 0;
2756 	}
2757 
2758 	*cs++ = MI_BATCH_BUFFER_END;
2759 	i915_gem_object_flush_map(obj);
2760 	i915_gem_object_unpin_map(obj);
2761 
2762 	rq = intel_context_create_request(ce);
2763 	if (IS_ERR(rq))
2764 		goto err_obj;
2765 
2766 	rq->batch = i915_vma_get(vma);
2767 	i915_request_get(rq);
2768 
2769 	i915_vma_lock(vma);
2770 	err = i915_request_await_object(rq, vma->obj, false);
2771 	if (!err)
2772 		err = i915_vma_move_to_active(vma, rq, 0);
2773 	if (!err)
2774 		err = rq->engine->emit_bb_start(rq,
2775 						vma->node.start,
2776 						PAGE_SIZE, 0);
2777 	i915_vma_unlock(vma);
2778 	i915_request_add(rq);
2779 	if (err)
2780 		goto err_rq;
2781 
2782 	i915_gem_object_put(obj);
2783 	intel_context_put(ce);
2784 
2785 	rq->client_link.next = &(*prev)->client_link;
2786 	*prev = rq;
2787 	return 0;
2788 
2789 err_rq:
2790 	i915_vma_put(rq->batch);
2791 	i915_request_put(rq);
2792 err_obj:
2793 	i915_gem_object_put(obj);
2794 err_ce:
2795 	intel_context_put(ce);
2796 	return err;
2797 }
2798 
2799 static int live_preempt_gang(void *arg)
2800 {
2801 	struct intel_gt *gt = arg;
2802 	struct intel_engine_cs *engine;
2803 	enum intel_engine_id id;
2804 
2805 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2806 		return 0;
2807 
2808 	/*
2809 	 * Build as long a chain of preempters as we can, with each
2810 	 * request higher priority than the last. Once we are ready, we release
2811 	 * the last batch which then precolates down the chain, each releasing
2812 	 * the next oldest in turn. The intent is to simply push as hard as we
2813 	 * can with the number of preemptions, trying to exceed narrow HW
2814 	 * limits. At a minimum, we insist that we can sort all the user
2815 	 * high priority levels into execution order.
2816 	 */
2817 
2818 	for_each_engine(engine, gt, id) {
2819 		struct i915_request *rq = NULL;
2820 		struct igt_live_test t;
2821 		IGT_TIMEOUT(end_time);
2822 		int prio = 0;
2823 		int err = 0;
2824 		u32 *cs;
2825 
2826 		if (!intel_engine_has_preemption(engine))
2827 			continue;
2828 
2829 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2830 			return -EIO;
2831 
2832 		do {
2833 			struct i915_sched_attr attr = {
2834 				.priority = I915_USER_PRIORITY(prio++),
2835 			};
2836 
2837 			err = create_gang(engine, &rq);
2838 			if (err)
2839 				break;
2840 
2841 			/* Submit each spinner at increasing priority */
2842 			engine->schedule(rq, &attr);
2843 
2844 			if (prio <= I915_PRIORITY_MAX)
2845 				continue;
2846 
2847 			if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
2848 				break;
2849 
2850 			if (__igt_timeout(end_time, NULL))
2851 				break;
2852 		} while (1);
2853 		pr_debug("%s: Preempt chain of %d requests\n",
2854 			 engine->name, prio);
2855 
2856 		/*
2857 		 * Such that the last spinner is the highest priority and
2858 		 * should execute first. When that spinner completes,
2859 		 * it will terminate the next lowest spinner until there
2860 		 * are no more spinners and the gang is complete.
2861 		 */
2862 		cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2863 		if (!IS_ERR(cs)) {
2864 			*cs = 0;
2865 			i915_gem_object_unpin_map(rq->batch->obj);
2866 		} else {
2867 			err = PTR_ERR(cs);
2868 			intel_gt_set_wedged(gt);
2869 		}
2870 
2871 		while (rq) { /* wait for each rq from highest to lowest prio */
2872 			struct i915_request *n =
2873 				list_next_entry(rq, client_link);
2874 
2875 			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2876 				struct drm_printer p =
2877 					drm_info_printer(engine->i915->drm.dev);
2878 
2879 				pr_err("Failed to flush chain of %d requests, at %d\n",
2880 				       prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2881 				intel_engine_dump(engine, &p,
2882 						  "%s\n", engine->name);
2883 
2884 				err = -ETIME;
2885 			}
2886 
2887 			i915_vma_put(rq->batch);
2888 			i915_request_put(rq);
2889 			rq = n;
2890 		}
2891 
2892 		if (igt_live_test_end(&t))
2893 			err = -EIO;
2894 		if (err)
2895 			return err;
2896 	}
2897 
2898 	return 0;
2899 }
2900 
2901 static struct i915_vma *
2902 create_gpr_user(struct intel_engine_cs *engine,
2903 		struct i915_vma *result,
2904 		unsigned int offset)
2905 {
2906 	struct drm_i915_gem_object *obj;
2907 	struct i915_vma *vma;
2908 	u32 *cs;
2909 	int err;
2910 	int i;
2911 
2912 	obj = i915_gem_object_create_internal(engine->i915, 4096);
2913 	if (IS_ERR(obj))
2914 		return ERR_CAST(obj);
2915 
2916 	vma = i915_vma_instance(obj, result->vm, NULL);
2917 	if (IS_ERR(vma)) {
2918 		i915_gem_object_put(obj);
2919 		return vma;
2920 	}
2921 
2922 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
2923 	if (err) {
2924 		i915_vma_put(vma);
2925 		return ERR_PTR(err);
2926 	}
2927 
2928 	cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2929 	if (IS_ERR(cs)) {
2930 		i915_vma_put(vma);
2931 		return ERR_CAST(cs);
2932 	}
2933 
2934 	/* All GPR are clear for new contexts. We use GPR(0) as a constant */
2935 	*cs++ = MI_LOAD_REGISTER_IMM(1);
2936 	*cs++ = CS_GPR(engine, 0);
2937 	*cs++ = 1;
2938 
2939 	for (i = 1; i < NUM_GPR; i++) {
2940 		u64 addr;
2941 
2942 		/*
2943 		 * Perform: GPR[i]++
2944 		 *
2945 		 * As we read and write into the context saved GPR[i], if
2946 		 * we restart this batch buffer from an earlier point, we
2947 		 * will repeat the increment and store a value > 1.
2948 		 */
2949 		*cs++ = MI_MATH(4);
2950 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
2951 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
2952 		*cs++ = MI_MATH_ADD;
2953 		*cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
2954 
2955 		addr = result->node.start + offset + i * sizeof(*cs);
2956 		*cs++ = MI_STORE_REGISTER_MEM_GEN8;
2957 		*cs++ = CS_GPR(engine, 2 * i);
2958 		*cs++ = lower_32_bits(addr);
2959 		*cs++ = upper_32_bits(addr);
2960 
2961 		*cs++ = MI_SEMAPHORE_WAIT |
2962 			MI_SEMAPHORE_POLL |
2963 			MI_SEMAPHORE_SAD_GTE_SDD;
2964 		*cs++ = i;
2965 		*cs++ = lower_32_bits(result->node.start);
2966 		*cs++ = upper_32_bits(result->node.start);
2967 	}
2968 
2969 	*cs++ = MI_BATCH_BUFFER_END;
2970 	i915_gem_object_flush_map(obj);
2971 	i915_gem_object_unpin_map(obj);
2972 
2973 	return vma;
2974 }
2975 
2976 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
2977 {
2978 	struct drm_i915_gem_object *obj;
2979 	struct i915_vma *vma;
2980 	int err;
2981 
2982 	obj = i915_gem_object_create_internal(gt->i915, sz);
2983 	if (IS_ERR(obj))
2984 		return ERR_CAST(obj);
2985 
2986 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
2987 	if (IS_ERR(vma)) {
2988 		i915_gem_object_put(obj);
2989 		return vma;
2990 	}
2991 
2992 	err = i915_ggtt_pin(vma, 0, 0);
2993 	if (err) {
2994 		i915_vma_put(vma);
2995 		return ERR_PTR(err);
2996 	}
2997 
2998 	return vma;
2999 }
3000 
3001 static struct i915_request *
3002 create_gpr_client(struct intel_engine_cs *engine,
3003 		  struct i915_vma *global,
3004 		  unsigned int offset)
3005 {
3006 	struct i915_vma *batch, *vma;
3007 	struct intel_context *ce;
3008 	struct i915_request *rq;
3009 	int err;
3010 
3011 	ce = intel_context_create(engine);
3012 	if (IS_ERR(ce))
3013 		return ERR_CAST(ce);
3014 
3015 	vma = i915_vma_instance(global->obj, ce->vm, NULL);
3016 	if (IS_ERR(vma)) {
3017 		err = PTR_ERR(vma);
3018 		goto out_ce;
3019 	}
3020 
3021 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3022 	if (err)
3023 		goto out_ce;
3024 
3025 	batch = create_gpr_user(engine, vma, offset);
3026 	if (IS_ERR(batch)) {
3027 		err = PTR_ERR(batch);
3028 		goto out_vma;
3029 	}
3030 
3031 	rq = intel_context_create_request(ce);
3032 	if (IS_ERR(rq)) {
3033 		err = PTR_ERR(rq);
3034 		goto out_batch;
3035 	}
3036 
3037 	i915_vma_lock(vma);
3038 	err = i915_request_await_object(rq, vma->obj, false);
3039 	if (!err)
3040 		err = i915_vma_move_to_active(vma, rq, 0);
3041 	i915_vma_unlock(vma);
3042 
3043 	i915_vma_lock(batch);
3044 	if (!err)
3045 		err = i915_request_await_object(rq, batch->obj, false);
3046 	if (!err)
3047 		err = i915_vma_move_to_active(batch, rq, 0);
3048 	if (!err)
3049 		err = rq->engine->emit_bb_start(rq,
3050 						batch->node.start,
3051 						PAGE_SIZE, 0);
3052 	i915_vma_unlock(batch);
3053 	i915_vma_unpin(batch);
3054 
3055 	if (!err)
3056 		i915_request_get(rq);
3057 	i915_request_add(rq);
3058 
3059 out_batch:
3060 	i915_vma_put(batch);
3061 out_vma:
3062 	i915_vma_unpin(vma);
3063 out_ce:
3064 	intel_context_put(ce);
3065 	return err ? ERR_PTR(err) : rq;
3066 }
3067 
3068 static int preempt_user(struct intel_engine_cs *engine,
3069 			struct i915_vma *global,
3070 			int id)
3071 {
3072 	struct i915_sched_attr attr = {
3073 		.priority = I915_PRIORITY_MAX
3074 	};
3075 	struct i915_request *rq;
3076 	int err = 0;
3077 	u32 *cs;
3078 
3079 	rq = intel_engine_create_kernel_request(engine);
3080 	if (IS_ERR(rq))
3081 		return PTR_ERR(rq);
3082 
3083 	cs = intel_ring_begin(rq, 4);
3084 	if (IS_ERR(cs)) {
3085 		i915_request_add(rq);
3086 		return PTR_ERR(cs);
3087 	}
3088 
3089 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3090 	*cs++ = i915_ggtt_offset(global);
3091 	*cs++ = 0;
3092 	*cs++ = id;
3093 
3094 	intel_ring_advance(rq, cs);
3095 
3096 	i915_request_get(rq);
3097 	i915_request_add(rq);
3098 
3099 	engine->schedule(rq, &attr);
3100 
3101 	if (i915_request_wait(rq, 0, HZ / 2) < 0)
3102 		err = -ETIME;
3103 	i915_request_put(rq);
3104 
3105 	return err;
3106 }
3107 
3108 static int live_preempt_user(void *arg)
3109 {
3110 	struct intel_gt *gt = arg;
3111 	struct intel_engine_cs *engine;
3112 	struct i915_vma *global;
3113 	enum intel_engine_id id;
3114 	u32 *result;
3115 	int err = 0;
3116 
3117 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
3118 		return 0;
3119 
3120 	/*
3121 	 * In our other tests, we look at preemption in carefully
3122 	 * controlled conditions in the ringbuffer. Since most of the
3123 	 * time is spent in user batches, most of our preemptions naturally
3124 	 * occur there. We want to verify that when we preempt inside a batch
3125 	 * we continue on from the current instruction and do not roll back
3126 	 * to the start, or another earlier arbitration point.
3127 	 *
3128 	 * To verify this, we create a batch which is a mixture of
3129 	 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3130 	 * a few preempting contexts thrown into the mix, we look for any
3131 	 * repeated instructions (which show up as incorrect values).
3132 	 */
3133 
3134 	global = create_global(gt, 4096);
3135 	if (IS_ERR(global))
3136 		return PTR_ERR(global);
3137 
3138 	result = i915_gem_object_pin_map(global->obj, I915_MAP_WC);
3139 	if (IS_ERR(result)) {
3140 		i915_vma_unpin_and_release(&global, 0);
3141 		return PTR_ERR(result);
3142 	}
3143 
3144 	for_each_engine(engine, gt, id) {
3145 		struct i915_request *client[3] = {};
3146 		struct igt_live_test t;
3147 		int i;
3148 
3149 		if (!intel_engine_has_preemption(engine))
3150 			continue;
3151 
3152 		if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS)
3153 			continue; /* we need per-context GPR */
3154 
3155 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3156 			err = -EIO;
3157 			break;
3158 		}
3159 
3160 		memset(result, 0, 4096);
3161 
3162 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3163 			struct i915_request *rq;
3164 
3165 			rq = create_gpr_client(engine, global,
3166 					       NUM_GPR * i * sizeof(u32));
3167 			if (IS_ERR(rq))
3168 				goto end_test;
3169 
3170 			client[i] = rq;
3171 		}
3172 
3173 		/* Continuously preempt the set of 3 running contexts */
3174 		for (i = 1; i <= NUM_GPR; i++) {
3175 			err = preempt_user(engine, global, i);
3176 			if (err)
3177 				goto end_test;
3178 		}
3179 
3180 		if (READ_ONCE(result[0]) != NUM_GPR) {
3181 			pr_err("%s: Failed to release semaphore\n",
3182 			       engine->name);
3183 			err = -EIO;
3184 			goto end_test;
3185 		}
3186 
3187 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3188 			int gpr;
3189 
3190 			if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3191 				err = -ETIME;
3192 				goto end_test;
3193 			}
3194 
3195 			for (gpr = 1; gpr < NUM_GPR; gpr++) {
3196 				if (result[NUM_GPR * i + gpr] != 1) {
3197 					pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3198 					       engine->name,
3199 					       i, gpr, result[NUM_GPR * i + gpr]);
3200 					err = -EINVAL;
3201 					goto end_test;
3202 				}
3203 			}
3204 		}
3205 
3206 end_test:
3207 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3208 			if (!client[i])
3209 				break;
3210 
3211 			i915_request_put(client[i]);
3212 		}
3213 
3214 		/* Flush the semaphores on error */
3215 		smp_store_mb(result[0], -1);
3216 		if (igt_live_test_end(&t))
3217 			err = -EIO;
3218 		if (err)
3219 			break;
3220 	}
3221 
3222 	i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3223 	return err;
3224 }
3225 
3226 static int live_preempt_timeout(void *arg)
3227 {
3228 	struct intel_gt *gt = arg;
3229 	struct i915_gem_context *ctx_hi, *ctx_lo;
3230 	struct igt_spinner spin_lo;
3231 	struct intel_engine_cs *engine;
3232 	enum intel_engine_id id;
3233 	int err = -ENOMEM;
3234 
3235 	/*
3236 	 * Check that we force preemption to occur by cancelling the previous
3237 	 * context if it refuses to yield the GPU.
3238 	 */
3239 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3240 		return 0;
3241 
3242 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
3243 		return 0;
3244 
3245 	if (!intel_has_reset_engine(gt))
3246 		return 0;
3247 
3248 	if (igt_spinner_init(&spin_lo, gt))
3249 		return -ENOMEM;
3250 
3251 	ctx_hi = kernel_context(gt->i915);
3252 	if (!ctx_hi)
3253 		goto err_spin_lo;
3254 	ctx_hi->sched.priority =
3255 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
3256 
3257 	ctx_lo = kernel_context(gt->i915);
3258 	if (!ctx_lo)
3259 		goto err_ctx_hi;
3260 	ctx_lo->sched.priority =
3261 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
3262 
3263 	for_each_engine(engine, gt, id) {
3264 		unsigned long saved_timeout;
3265 		struct i915_request *rq;
3266 
3267 		if (!intel_engine_has_preemption(engine))
3268 			continue;
3269 
3270 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3271 					    MI_NOOP); /* preemption disabled */
3272 		if (IS_ERR(rq)) {
3273 			err = PTR_ERR(rq);
3274 			goto err_ctx_lo;
3275 		}
3276 
3277 		i915_request_add(rq);
3278 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
3279 			intel_gt_set_wedged(gt);
3280 			err = -EIO;
3281 			goto err_ctx_lo;
3282 		}
3283 
3284 		rq = igt_request_alloc(ctx_hi, engine);
3285 		if (IS_ERR(rq)) {
3286 			igt_spinner_end(&spin_lo);
3287 			err = PTR_ERR(rq);
3288 			goto err_ctx_lo;
3289 		}
3290 
3291 		/* Flush the previous CS ack before changing timeouts */
3292 		while (READ_ONCE(engine->execlists.pending[0]))
3293 			cpu_relax();
3294 
3295 		saved_timeout = engine->props.preempt_timeout_ms;
3296 		engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3297 
3298 		i915_request_get(rq);
3299 		i915_request_add(rq);
3300 
3301 		intel_engine_flush_submission(engine);
3302 		engine->props.preempt_timeout_ms = saved_timeout;
3303 
3304 		if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3305 			intel_gt_set_wedged(gt);
3306 			i915_request_put(rq);
3307 			err = -ETIME;
3308 			goto err_ctx_lo;
3309 		}
3310 
3311 		igt_spinner_end(&spin_lo);
3312 		i915_request_put(rq);
3313 	}
3314 
3315 	err = 0;
3316 err_ctx_lo:
3317 	kernel_context_close(ctx_lo);
3318 err_ctx_hi:
3319 	kernel_context_close(ctx_hi);
3320 err_spin_lo:
3321 	igt_spinner_fini(&spin_lo);
3322 	return err;
3323 }
3324 
3325 static int random_range(struct rnd_state *rnd, int min, int max)
3326 {
3327 	return i915_prandom_u32_max_state(max - min, rnd) + min;
3328 }
3329 
3330 static int random_priority(struct rnd_state *rnd)
3331 {
3332 	return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3333 }
3334 
3335 struct preempt_smoke {
3336 	struct intel_gt *gt;
3337 	struct i915_gem_context **contexts;
3338 	struct intel_engine_cs *engine;
3339 	struct drm_i915_gem_object *batch;
3340 	unsigned int ncontext;
3341 	struct rnd_state prng;
3342 	unsigned long count;
3343 };
3344 
3345 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3346 {
3347 	return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3348 							  &smoke->prng)];
3349 }
3350 
3351 static int smoke_submit(struct preempt_smoke *smoke,
3352 			struct i915_gem_context *ctx, int prio,
3353 			struct drm_i915_gem_object *batch)
3354 {
3355 	struct i915_request *rq;
3356 	struct i915_vma *vma = NULL;
3357 	int err = 0;
3358 
3359 	if (batch) {
3360 		struct i915_address_space *vm;
3361 
3362 		vm = i915_gem_context_get_vm_rcu(ctx);
3363 		vma = i915_vma_instance(batch, vm, NULL);
3364 		i915_vm_put(vm);
3365 		if (IS_ERR(vma))
3366 			return PTR_ERR(vma);
3367 
3368 		err = i915_vma_pin(vma, 0, 0, PIN_USER);
3369 		if (err)
3370 			return err;
3371 	}
3372 
3373 	ctx->sched.priority = prio;
3374 
3375 	rq = igt_request_alloc(ctx, smoke->engine);
3376 	if (IS_ERR(rq)) {
3377 		err = PTR_ERR(rq);
3378 		goto unpin;
3379 	}
3380 
3381 	if (vma) {
3382 		i915_vma_lock(vma);
3383 		err = i915_request_await_object(rq, vma->obj, false);
3384 		if (!err)
3385 			err = i915_vma_move_to_active(vma, rq, 0);
3386 		if (!err)
3387 			err = rq->engine->emit_bb_start(rq,
3388 							vma->node.start,
3389 							PAGE_SIZE, 0);
3390 		i915_vma_unlock(vma);
3391 	}
3392 
3393 	i915_request_add(rq);
3394 
3395 unpin:
3396 	if (vma)
3397 		i915_vma_unpin(vma);
3398 
3399 	return err;
3400 }
3401 
3402 static int smoke_crescendo_thread(void *arg)
3403 {
3404 	struct preempt_smoke *smoke = arg;
3405 	IGT_TIMEOUT(end_time);
3406 	unsigned long count;
3407 
3408 	count = 0;
3409 	do {
3410 		struct i915_gem_context *ctx = smoke_context(smoke);
3411 		int err;
3412 
3413 		err = smoke_submit(smoke,
3414 				   ctx, count % I915_PRIORITY_MAX,
3415 				   smoke->batch);
3416 		if (err)
3417 			return err;
3418 
3419 		count++;
3420 	} while (!__igt_timeout(end_time, NULL));
3421 
3422 	smoke->count = count;
3423 	return 0;
3424 }
3425 
3426 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3427 #define BATCH BIT(0)
3428 {
3429 	struct task_struct *tsk[I915_NUM_ENGINES] = {};
3430 	struct preempt_smoke arg[I915_NUM_ENGINES];
3431 	struct intel_engine_cs *engine;
3432 	enum intel_engine_id id;
3433 	unsigned long count;
3434 	int err = 0;
3435 
3436 	for_each_engine(engine, smoke->gt, id) {
3437 		arg[id] = *smoke;
3438 		arg[id].engine = engine;
3439 		if (!(flags & BATCH))
3440 			arg[id].batch = NULL;
3441 		arg[id].count = 0;
3442 
3443 		tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3444 				      "igt/smoke:%d", id);
3445 		if (IS_ERR(tsk[id])) {
3446 			err = PTR_ERR(tsk[id]);
3447 			break;
3448 		}
3449 		get_task_struct(tsk[id]);
3450 	}
3451 
3452 	yield(); /* start all threads before we kthread_stop() */
3453 
3454 	count = 0;
3455 	for_each_engine(engine, smoke->gt, id) {
3456 		int status;
3457 
3458 		if (IS_ERR_OR_NULL(tsk[id]))
3459 			continue;
3460 
3461 		status = kthread_stop(tsk[id]);
3462 		if (status && !err)
3463 			err = status;
3464 
3465 		count += arg[id].count;
3466 
3467 		put_task_struct(tsk[id]);
3468 	}
3469 
3470 	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3471 		count, flags,
3472 		RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
3473 	return 0;
3474 }
3475 
3476 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3477 {
3478 	enum intel_engine_id id;
3479 	IGT_TIMEOUT(end_time);
3480 	unsigned long count;
3481 
3482 	count = 0;
3483 	do {
3484 		for_each_engine(smoke->engine, smoke->gt, id) {
3485 			struct i915_gem_context *ctx = smoke_context(smoke);
3486 			int err;
3487 
3488 			err = smoke_submit(smoke,
3489 					   ctx, random_priority(&smoke->prng),
3490 					   flags & BATCH ? smoke->batch : NULL);
3491 			if (err)
3492 				return err;
3493 
3494 			count++;
3495 		}
3496 	} while (!__igt_timeout(end_time, NULL));
3497 
3498 	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3499 		count, flags,
3500 		RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
3501 	return 0;
3502 }
3503 
3504 static int live_preempt_smoke(void *arg)
3505 {
3506 	struct preempt_smoke smoke = {
3507 		.gt = arg,
3508 		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3509 		.ncontext = 1024,
3510 	};
3511 	const unsigned int phase[] = { 0, BATCH };
3512 	struct igt_live_test t;
3513 	int err = -ENOMEM;
3514 	u32 *cs;
3515 	int n;
3516 
3517 	if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
3518 		return 0;
3519 
3520 	smoke.contexts = kmalloc_array(smoke.ncontext,
3521 				       sizeof(*smoke.contexts),
3522 				       GFP_KERNEL);
3523 	if (!smoke.contexts)
3524 		return -ENOMEM;
3525 
3526 	smoke.batch =
3527 		i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3528 	if (IS_ERR(smoke.batch)) {
3529 		err = PTR_ERR(smoke.batch);
3530 		goto err_free;
3531 	}
3532 
3533 	cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
3534 	if (IS_ERR(cs)) {
3535 		err = PTR_ERR(cs);
3536 		goto err_batch;
3537 	}
3538 	for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3539 		cs[n] = MI_ARB_CHECK;
3540 	cs[n] = MI_BATCH_BUFFER_END;
3541 	i915_gem_object_flush_map(smoke.batch);
3542 	i915_gem_object_unpin_map(smoke.batch);
3543 
3544 	if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3545 		err = -EIO;
3546 		goto err_batch;
3547 	}
3548 
3549 	for (n = 0; n < smoke.ncontext; n++) {
3550 		smoke.contexts[n] = kernel_context(smoke.gt->i915);
3551 		if (!smoke.contexts[n])
3552 			goto err_ctx;
3553 	}
3554 
3555 	for (n = 0; n < ARRAY_SIZE(phase); n++) {
3556 		err = smoke_crescendo(&smoke, phase[n]);
3557 		if (err)
3558 			goto err_ctx;
3559 
3560 		err = smoke_random(&smoke, phase[n]);
3561 		if (err)
3562 			goto err_ctx;
3563 	}
3564 
3565 err_ctx:
3566 	if (igt_live_test_end(&t))
3567 		err = -EIO;
3568 
3569 	for (n = 0; n < smoke.ncontext; n++) {
3570 		if (!smoke.contexts[n])
3571 			break;
3572 		kernel_context_close(smoke.contexts[n]);
3573 	}
3574 
3575 err_batch:
3576 	i915_gem_object_put(smoke.batch);
3577 err_free:
3578 	kfree(smoke.contexts);
3579 
3580 	return err;
3581 }
3582 
3583 static int nop_virtual_engine(struct intel_gt *gt,
3584 			      struct intel_engine_cs **siblings,
3585 			      unsigned int nsibling,
3586 			      unsigned int nctx,
3587 			      unsigned int flags)
3588 #define CHAIN BIT(0)
3589 {
3590 	IGT_TIMEOUT(end_time);
3591 	struct i915_request *request[16] = {};
3592 	struct intel_context *ve[16];
3593 	unsigned long n, prime, nc;
3594 	struct igt_live_test t;
3595 	ktime_t times[2] = {};
3596 	int err;
3597 
3598 	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3599 
3600 	for (n = 0; n < nctx; n++) {
3601 		ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3602 		if (IS_ERR(ve[n])) {
3603 			err = PTR_ERR(ve[n]);
3604 			nctx = n;
3605 			goto out;
3606 		}
3607 
3608 		err = intel_context_pin(ve[n]);
3609 		if (err) {
3610 			intel_context_put(ve[n]);
3611 			nctx = n;
3612 			goto out;
3613 		}
3614 	}
3615 
3616 	err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3617 	if (err)
3618 		goto out;
3619 
3620 	for_each_prime_number_from(prime, 1, 8192) {
3621 		times[1] = ktime_get_raw();
3622 
3623 		if (flags & CHAIN) {
3624 			for (nc = 0; nc < nctx; nc++) {
3625 				for (n = 0; n < prime; n++) {
3626 					struct i915_request *rq;
3627 
3628 					rq = i915_request_create(ve[nc]);
3629 					if (IS_ERR(rq)) {
3630 						err = PTR_ERR(rq);
3631 						goto out;
3632 					}
3633 
3634 					if (request[nc])
3635 						i915_request_put(request[nc]);
3636 					request[nc] = i915_request_get(rq);
3637 					i915_request_add(rq);
3638 				}
3639 			}
3640 		} else {
3641 			for (n = 0; n < prime; n++) {
3642 				for (nc = 0; nc < nctx; nc++) {
3643 					struct i915_request *rq;
3644 
3645 					rq = i915_request_create(ve[nc]);
3646 					if (IS_ERR(rq)) {
3647 						err = PTR_ERR(rq);
3648 						goto out;
3649 					}
3650 
3651 					if (request[nc])
3652 						i915_request_put(request[nc]);
3653 					request[nc] = i915_request_get(rq);
3654 					i915_request_add(rq);
3655 				}
3656 			}
3657 		}
3658 
3659 		for (nc = 0; nc < nctx; nc++) {
3660 			if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3661 				pr_err("%s(%s): wait for %llx:%lld timed out\n",
3662 				       __func__, ve[0]->engine->name,
3663 				       request[nc]->fence.context,
3664 				       request[nc]->fence.seqno);
3665 
3666 				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3667 					  __func__, ve[0]->engine->name,
3668 					  request[nc]->fence.context,
3669 					  request[nc]->fence.seqno);
3670 				GEM_TRACE_DUMP();
3671 				intel_gt_set_wedged(gt);
3672 				break;
3673 			}
3674 		}
3675 
3676 		times[1] = ktime_sub(ktime_get_raw(), times[1]);
3677 		if (prime == 1)
3678 			times[0] = times[1];
3679 
3680 		for (nc = 0; nc < nctx; nc++) {
3681 			i915_request_put(request[nc]);
3682 			request[nc] = NULL;
3683 		}
3684 
3685 		if (__igt_timeout(end_time, NULL))
3686 			break;
3687 	}
3688 
3689 	err = igt_live_test_end(&t);
3690 	if (err)
3691 		goto out;
3692 
3693 	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3694 		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3695 		prime, div64_u64(ktime_to_ns(times[1]), prime));
3696 
3697 out:
3698 	if (igt_flush_test(gt->i915))
3699 		err = -EIO;
3700 
3701 	for (nc = 0; nc < nctx; nc++) {
3702 		i915_request_put(request[nc]);
3703 		intel_context_unpin(ve[nc]);
3704 		intel_context_put(ve[nc]);
3705 	}
3706 	return err;
3707 }
3708 
3709 static int live_virtual_engine(void *arg)
3710 {
3711 	struct intel_gt *gt = arg;
3712 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3713 	struct intel_engine_cs *engine;
3714 	enum intel_engine_id id;
3715 	unsigned int class, inst;
3716 	int err;
3717 
3718 	if (intel_uc_uses_guc_submission(&gt->uc))
3719 		return 0;
3720 
3721 	for_each_engine(engine, gt, id) {
3722 		err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3723 		if (err) {
3724 			pr_err("Failed to wrap engine %s: err=%d\n",
3725 			       engine->name, err);
3726 			return err;
3727 		}
3728 	}
3729 
3730 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3731 		int nsibling, n;
3732 
3733 		nsibling = 0;
3734 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3735 			if (!gt->engine_class[class][inst])
3736 				continue;
3737 
3738 			siblings[nsibling++] = gt->engine_class[class][inst];
3739 		}
3740 		if (nsibling < 2)
3741 			continue;
3742 
3743 		for (n = 1; n <= nsibling + 1; n++) {
3744 			err = nop_virtual_engine(gt, siblings, nsibling,
3745 						 n, 0);
3746 			if (err)
3747 				return err;
3748 		}
3749 
3750 		err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3751 		if (err)
3752 			return err;
3753 	}
3754 
3755 	return 0;
3756 }
3757 
3758 static int mask_virtual_engine(struct intel_gt *gt,
3759 			       struct intel_engine_cs **siblings,
3760 			       unsigned int nsibling)
3761 {
3762 	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3763 	struct intel_context *ve;
3764 	struct igt_live_test t;
3765 	unsigned int n;
3766 	int err;
3767 
3768 	/*
3769 	 * Check that by setting the execution mask on a request, we can
3770 	 * restrict it to our desired engine within the virtual engine.
3771 	 */
3772 
3773 	ve = intel_execlists_create_virtual(siblings, nsibling);
3774 	if (IS_ERR(ve)) {
3775 		err = PTR_ERR(ve);
3776 		goto out_close;
3777 	}
3778 
3779 	err = intel_context_pin(ve);
3780 	if (err)
3781 		goto out_put;
3782 
3783 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3784 	if (err)
3785 		goto out_unpin;
3786 
3787 	for (n = 0; n < nsibling; n++) {
3788 		request[n] = i915_request_create(ve);
3789 		if (IS_ERR(request[n])) {
3790 			err = PTR_ERR(request[n]);
3791 			nsibling = n;
3792 			goto out;
3793 		}
3794 
3795 		/* Reverse order as it's more likely to be unnatural */
3796 		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3797 
3798 		i915_request_get(request[n]);
3799 		i915_request_add(request[n]);
3800 	}
3801 
3802 	for (n = 0; n < nsibling; n++) {
3803 		if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3804 			pr_err("%s(%s): wait for %llx:%lld timed out\n",
3805 			       __func__, ve->engine->name,
3806 			       request[n]->fence.context,
3807 			       request[n]->fence.seqno);
3808 
3809 			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3810 				  __func__, ve->engine->name,
3811 				  request[n]->fence.context,
3812 				  request[n]->fence.seqno);
3813 			GEM_TRACE_DUMP();
3814 			intel_gt_set_wedged(gt);
3815 			err = -EIO;
3816 			goto out;
3817 		}
3818 
3819 		if (request[n]->engine != siblings[nsibling - n - 1]) {
3820 			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3821 			       request[n]->engine->name,
3822 			       siblings[nsibling - n - 1]->name);
3823 			err = -EINVAL;
3824 			goto out;
3825 		}
3826 	}
3827 
3828 	err = igt_live_test_end(&t);
3829 out:
3830 	if (igt_flush_test(gt->i915))
3831 		err = -EIO;
3832 
3833 	for (n = 0; n < nsibling; n++)
3834 		i915_request_put(request[n]);
3835 
3836 out_unpin:
3837 	intel_context_unpin(ve);
3838 out_put:
3839 	intel_context_put(ve);
3840 out_close:
3841 	return err;
3842 }
3843 
3844 static int live_virtual_mask(void *arg)
3845 {
3846 	struct intel_gt *gt = arg;
3847 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3848 	unsigned int class, inst;
3849 	int err;
3850 
3851 	if (intel_uc_uses_guc_submission(&gt->uc))
3852 		return 0;
3853 
3854 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3855 		unsigned int nsibling;
3856 
3857 		nsibling = 0;
3858 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3859 			if (!gt->engine_class[class][inst])
3860 				break;
3861 
3862 			siblings[nsibling++] = gt->engine_class[class][inst];
3863 		}
3864 		if (nsibling < 2)
3865 			continue;
3866 
3867 		err = mask_virtual_engine(gt, siblings, nsibling);
3868 		if (err)
3869 			return err;
3870 	}
3871 
3872 	return 0;
3873 }
3874 
3875 static int preserved_virtual_engine(struct intel_gt *gt,
3876 				    struct intel_engine_cs **siblings,
3877 				    unsigned int nsibling)
3878 {
3879 	struct i915_request *last = NULL;
3880 	struct intel_context *ve;
3881 	struct i915_vma *scratch;
3882 	struct igt_live_test t;
3883 	unsigned int n;
3884 	int err = 0;
3885 	u32 *cs;
3886 
3887 	scratch = create_scratch(siblings[0]->gt);
3888 	if (IS_ERR(scratch))
3889 		return PTR_ERR(scratch);
3890 
3891 	err = i915_vma_sync(scratch);
3892 	if (err)
3893 		goto out_scratch;
3894 
3895 	ve = intel_execlists_create_virtual(siblings, nsibling);
3896 	if (IS_ERR(ve)) {
3897 		err = PTR_ERR(ve);
3898 		goto out_scratch;
3899 	}
3900 
3901 	err = intel_context_pin(ve);
3902 	if (err)
3903 		goto out_put;
3904 
3905 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3906 	if (err)
3907 		goto out_unpin;
3908 
3909 	for (n = 0; n < NUM_GPR_DW; n++) {
3910 		struct intel_engine_cs *engine = siblings[n % nsibling];
3911 		struct i915_request *rq;
3912 
3913 		rq = i915_request_create(ve);
3914 		if (IS_ERR(rq)) {
3915 			err = PTR_ERR(rq);
3916 			goto out_end;
3917 		}
3918 
3919 		i915_request_put(last);
3920 		last = i915_request_get(rq);
3921 
3922 		cs = intel_ring_begin(rq, 8);
3923 		if (IS_ERR(cs)) {
3924 			i915_request_add(rq);
3925 			err = PTR_ERR(cs);
3926 			goto out_end;
3927 		}
3928 
3929 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3930 		*cs++ = CS_GPR(engine, n);
3931 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
3932 		*cs++ = 0;
3933 
3934 		*cs++ = MI_LOAD_REGISTER_IMM(1);
3935 		*cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
3936 		*cs++ = n + 1;
3937 
3938 		*cs++ = MI_NOOP;
3939 		intel_ring_advance(rq, cs);
3940 
3941 		/* Restrict this request to run on a particular engine */
3942 		rq->execution_mask = engine->mask;
3943 		i915_request_add(rq);
3944 	}
3945 
3946 	if (i915_request_wait(last, 0, HZ / 5) < 0) {
3947 		err = -ETIME;
3948 		goto out_end;
3949 	}
3950 
3951 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3952 	if (IS_ERR(cs)) {
3953 		err = PTR_ERR(cs);
3954 		goto out_end;
3955 	}
3956 
3957 	for (n = 0; n < NUM_GPR_DW; n++) {
3958 		if (cs[n] != n) {
3959 			pr_err("Incorrect value[%d] found for GPR[%d]\n",
3960 			       cs[n], n);
3961 			err = -EINVAL;
3962 			break;
3963 		}
3964 	}
3965 
3966 	i915_gem_object_unpin_map(scratch->obj);
3967 
3968 out_end:
3969 	if (igt_live_test_end(&t))
3970 		err = -EIO;
3971 	i915_request_put(last);
3972 out_unpin:
3973 	intel_context_unpin(ve);
3974 out_put:
3975 	intel_context_put(ve);
3976 out_scratch:
3977 	i915_vma_unpin_and_release(&scratch, 0);
3978 	return err;
3979 }
3980 
3981 static int live_virtual_preserved(void *arg)
3982 {
3983 	struct intel_gt *gt = arg;
3984 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3985 	unsigned int class, inst;
3986 
3987 	/*
3988 	 * Check that the context image retains non-privileged (user) registers
3989 	 * from one engine to the next. For this we check that the CS_GPR
3990 	 * are preserved.
3991 	 */
3992 
3993 	if (intel_uc_uses_guc_submission(&gt->uc))
3994 		return 0;
3995 
3996 	/* As we use CS_GPR we cannot run before they existed on all engines. */
3997 	if (INTEL_GEN(gt->i915) < 9)
3998 		return 0;
3999 
4000 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4001 		int nsibling, err;
4002 
4003 		nsibling = 0;
4004 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
4005 			if (!gt->engine_class[class][inst])
4006 				continue;
4007 
4008 			siblings[nsibling++] = gt->engine_class[class][inst];
4009 		}
4010 		if (nsibling < 2)
4011 			continue;
4012 
4013 		err = preserved_virtual_engine(gt, siblings, nsibling);
4014 		if (err)
4015 			return err;
4016 	}
4017 
4018 	return 0;
4019 }
4020 
4021 static int bond_virtual_engine(struct intel_gt *gt,
4022 			       unsigned int class,
4023 			       struct intel_engine_cs **siblings,
4024 			       unsigned int nsibling,
4025 			       unsigned int flags)
4026 #define BOND_SCHEDULE BIT(0)
4027 {
4028 	struct intel_engine_cs *master;
4029 	struct i915_request *rq[16];
4030 	enum intel_engine_id id;
4031 	struct igt_spinner spin;
4032 	unsigned long n;
4033 	int err;
4034 
4035 	/*
4036 	 * A set of bonded requests is intended to be run concurrently
4037 	 * across a number of engines. We use one request per-engine
4038 	 * and a magic fence to schedule each of the bonded requests
4039 	 * at the same time. A consequence of our current scheduler is that
4040 	 * we only move requests to the HW ready queue when the request
4041 	 * becomes ready, that is when all of its prerequisite fences have
4042 	 * been signaled. As one of those fences is the master submit fence,
4043 	 * there is a delay on all secondary fences as the HW may be
4044 	 * currently busy. Equally, as all the requests are independent,
4045 	 * they may have other fences that delay individual request
4046 	 * submission to HW. Ergo, we do not guarantee that all requests are
4047 	 * immediately submitted to HW at the same time, just that if the
4048 	 * rules are abided by, they are ready at the same time as the
4049 	 * first is submitted. Userspace can embed semaphores in its batch
4050 	 * to ensure parallel execution of its phases as it requires.
4051 	 * Though naturally it gets requested that perhaps the scheduler should
4052 	 * take care of parallel execution, even across preemption events on
4053 	 * different HW. (The proper answer is of course "lalalala".)
4054 	 *
4055 	 * With the submit-fence, we have identified three possible phases
4056 	 * of synchronisation depending on the master fence: queued (not
4057 	 * ready), executing, and signaled. The first two are quite simple
4058 	 * and checked below. However, the signaled master fence handling is
4059 	 * contentious. Currently we do not distinguish between a signaled
4060 	 * fence and an expired fence, as once signaled it does not convey
4061 	 * any information about the previous execution. It may even be freed
4062 	 * and hence checking later it may not exist at all. Ergo we currently
4063 	 * do not apply the bonding constraint for an already signaled fence,
4064 	 * as our expectation is that it should not constrain the secondaries
4065 	 * and is outside of the scope of the bonded request API (i.e. all
4066 	 * userspace requests are meant to be running in parallel). As
4067 	 * it imposes no constraint, and is effectively a no-op, we do not
4068 	 * check below as normal execution flows are checked extensively above.
4069 	 *
4070 	 * XXX Is the degenerate handling of signaled submit fences the
4071 	 * expected behaviour for userpace?
4072 	 */
4073 
4074 	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
4075 
4076 	if (igt_spinner_init(&spin, gt))
4077 		return -ENOMEM;
4078 
4079 	err = 0;
4080 	rq[0] = ERR_PTR(-ENOMEM);
4081 	for_each_engine(master, gt, id) {
4082 		struct i915_sw_fence fence = {};
4083 		struct intel_context *ce;
4084 
4085 		if (master->class == class)
4086 			continue;
4087 
4088 		ce = intel_context_create(master);
4089 		if (IS_ERR(ce)) {
4090 			err = PTR_ERR(ce);
4091 			goto out;
4092 		}
4093 
4094 		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
4095 
4096 		rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
4097 		intel_context_put(ce);
4098 		if (IS_ERR(rq[0])) {
4099 			err = PTR_ERR(rq[0]);
4100 			goto out;
4101 		}
4102 		i915_request_get(rq[0]);
4103 
4104 		if (flags & BOND_SCHEDULE) {
4105 			onstack_fence_init(&fence);
4106 			err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
4107 							       &fence,
4108 							       GFP_KERNEL);
4109 		}
4110 
4111 		i915_request_add(rq[0]);
4112 		if (err < 0)
4113 			goto out;
4114 
4115 		if (!(flags & BOND_SCHEDULE) &&
4116 		    !igt_wait_for_spinner(&spin, rq[0])) {
4117 			err = -EIO;
4118 			goto out;
4119 		}
4120 
4121 		for (n = 0; n < nsibling; n++) {
4122 			struct intel_context *ve;
4123 
4124 			ve = intel_execlists_create_virtual(siblings, nsibling);
4125 			if (IS_ERR(ve)) {
4126 				err = PTR_ERR(ve);
4127 				onstack_fence_fini(&fence);
4128 				goto out;
4129 			}
4130 
4131 			err = intel_virtual_engine_attach_bond(ve->engine,
4132 							       master,
4133 							       siblings[n]);
4134 			if (err) {
4135 				intel_context_put(ve);
4136 				onstack_fence_fini(&fence);
4137 				goto out;
4138 			}
4139 
4140 			err = intel_context_pin(ve);
4141 			intel_context_put(ve);
4142 			if (err) {
4143 				onstack_fence_fini(&fence);
4144 				goto out;
4145 			}
4146 
4147 			rq[n + 1] = i915_request_create(ve);
4148 			intel_context_unpin(ve);
4149 			if (IS_ERR(rq[n + 1])) {
4150 				err = PTR_ERR(rq[n + 1]);
4151 				onstack_fence_fini(&fence);
4152 				goto out;
4153 			}
4154 			i915_request_get(rq[n + 1]);
4155 
4156 			err = i915_request_await_execution(rq[n + 1],
4157 							   &rq[0]->fence,
4158 							   ve->engine->bond_execute);
4159 			i915_request_add(rq[n + 1]);
4160 			if (err < 0) {
4161 				onstack_fence_fini(&fence);
4162 				goto out;
4163 			}
4164 		}
4165 		onstack_fence_fini(&fence);
4166 		intel_engine_flush_submission(master);
4167 		igt_spinner_end(&spin);
4168 
4169 		if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4170 			pr_err("Master request did not execute (on %s)!\n",
4171 			       rq[0]->engine->name);
4172 			err = -EIO;
4173 			goto out;
4174 		}
4175 
4176 		for (n = 0; n < nsibling; n++) {
4177 			if (i915_request_wait(rq[n + 1], 0,
4178 					      MAX_SCHEDULE_TIMEOUT) < 0) {
4179 				err = -EIO;
4180 				goto out;
4181 			}
4182 
4183 			if (rq[n + 1]->engine != siblings[n]) {
4184 				pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4185 				       siblings[n]->name,
4186 				       rq[n + 1]->engine->name,
4187 				       rq[0]->engine->name);
4188 				err = -EINVAL;
4189 				goto out;
4190 			}
4191 		}
4192 
4193 		for (n = 0; !IS_ERR(rq[n]); n++)
4194 			i915_request_put(rq[n]);
4195 		rq[0] = ERR_PTR(-ENOMEM);
4196 	}
4197 
4198 out:
4199 	for (n = 0; !IS_ERR(rq[n]); n++)
4200 		i915_request_put(rq[n]);
4201 	if (igt_flush_test(gt->i915))
4202 		err = -EIO;
4203 
4204 	igt_spinner_fini(&spin);
4205 	return err;
4206 }
4207 
4208 static int live_virtual_bond(void *arg)
4209 {
4210 	static const struct phase {
4211 		const char *name;
4212 		unsigned int flags;
4213 	} phases[] = {
4214 		{ "", 0 },
4215 		{ "schedule", BOND_SCHEDULE },
4216 		{ },
4217 	};
4218 	struct intel_gt *gt = arg;
4219 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4220 	unsigned int class, inst;
4221 	int err;
4222 
4223 	if (intel_uc_uses_guc_submission(&gt->uc))
4224 		return 0;
4225 
4226 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4227 		const struct phase *p;
4228 		int nsibling;
4229 
4230 		nsibling = 0;
4231 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
4232 			if (!gt->engine_class[class][inst])
4233 				break;
4234 
4235 			GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
4236 			siblings[nsibling++] = gt->engine_class[class][inst];
4237 		}
4238 		if (nsibling < 2)
4239 			continue;
4240 
4241 		for (p = phases; p->name; p++) {
4242 			err = bond_virtual_engine(gt,
4243 						  class, siblings, nsibling,
4244 						  p->flags);
4245 			if (err) {
4246 				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4247 				       __func__, p->name, class, nsibling, err);
4248 				return err;
4249 			}
4250 		}
4251 	}
4252 
4253 	return 0;
4254 }
4255 
4256 static int reset_virtual_engine(struct intel_gt *gt,
4257 				struct intel_engine_cs **siblings,
4258 				unsigned int nsibling)
4259 {
4260 	struct intel_engine_cs *engine;
4261 	struct intel_context *ve;
4262 	struct igt_spinner spin;
4263 	struct i915_request *rq;
4264 	unsigned int n;
4265 	int err = 0;
4266 
4267 	/*
4268 	 * In order to support offline error capture for fast preempt reset,
4269 	 * we need to decouple the guilty request and ensure that it and its
4270 	 * descendents are not executed while the capture is in progress.
4271 	 */
4272 
4273 	if (igt_spinner_init(&spin, gt))
4274 		return -ENOMEM;
4275 
4276 	ve = intel_execlists_create_virtual(siblings, nsibling);
4277 	if (IS_ERR(ve)) {
4278 		err = PTR_ERR(ve);
4279 		goto out_spin;
4280 	}
4281 
4282 	for (n = 0; n < nsibling; n++)
4283 		engine_heartbeat_disable(siblings[n]);
4284 
4285 	rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4286 	if (IS_ERR(rq)) {
4287 		err = PTR_ERR(rq);
4288 		goto out_heartbeat;
4289 	}
4290 	i915_request_add(rq);
4291 
4292 	if (!igt_wait_for_spinner(&spin, rq)) {
4293 		intel_gt_set_wedged(gt);
4294 		err = -ETIME;
4295 		goto out_heartbeat;
4296 	}
4297 
4298 	engine = rq->engine;
4299 	GEM_BUG_ON(engine == ve->engine);
4300 
4301 	/* Take ownership of the reset and tasklet */
4302 	if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
4303 			     &gt->reset.flags)) {
4304 		intel_gt_set_wedged(gt);
4305 		err = -EBUSY;
4306 		goto out_heartbeat;
4307 	}
4308 	tasklet_disable(&engine->execlists.tasklet);
4309 
4310 	engine->execlists.tasklet.func(engine->execlists.tasklet.data);
4311 	GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4312 
4313 	/* Fake a preemption event; failed of course */
4314 	spin_lock_irq(&engine->active.lock);
4315 	__unwind_incomplete_requests(engine);
4316 	spin_unlock_irq(&engine->active.lock);
4317 	GEM_BUG_ON(rq->engine != ve->engine);
4318 
4319 	/* Reset the engine while keeping our active request on hold */
4320 	execlists_hold(engine, rq);
4321 	GEM_BUG_ON(!i915_request_on_hold(rq));
4322 
4323 	intel_engine_reset(engine, NULL);
4324 	GEM_BUG_ON(rq->fence.error != -EIO);
4325 
4326 	/* Release our grasp on the engine, letting CS flow again */
4327 	tasklet_enable(&engine->execlists.tasklet);
4328 	clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
4329 
4330 	/* Check that we do not resubmit the held request */
4331 	i915_request_get(rq);
4332 	if (!i915_request_wait(rq, 0, HZ / 5)) {
4333 		pr_err("%s: on hold request completed!\n",
4334 		       engine->name);
4335 		intel_gt_set_wedged(gt);
4336 		err = -EIO;
4337 		goto out_rq;
4338 	}
4339 	GEM_BUG_ON(!i915_request_on_hold(rq));
4340 
4341 	/* But is resubmitted on release */
4342 	execlists_unhold(engine, rq);
4343 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4344 		pr_err("%s: held request did not complete!\n",
4345 		       engine->name);
4346 		intel_gt_set_wedged(gt);
4347 		err = -ETIME;
4348 	}
4349 
4350 out_rq:
4351 	i915_request_put(rq);
4352 out_heartbeat:
4353 	for (n = 0; n < nsibling; n++)
4354 		engine_heartbeat_enable(siblings[n]);
4355 
4356 	intel_context_put(ve);
4357 out_spin:
4358 	igt_spinner_fini(&spin);
4359 	return err;
4360 }
4361 
4362 static int live_virtual_reset(void *arg)
4363 {
4364 	struct intel_gt *gt = arg;
4365 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4366 	unsigned int class, inst;
4367 
4368 	/*
4369 	 * Check that we handle a reset event within a virtual engine.
4370 	 * Only the physical engine is reset, but we have to check the flow
4371 	 * of the virtual requests around the reset, and make sure it is not
4372 	 * forgotten.
4373 	 */
4374 
4375 	if (intel_uc_uses_guc_submission(&gt->uc))
4376 		return 0;
4377 
4378 	if (!intel_has_reset_engine(gt))
4379 		return 0;
4380 
4381 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4382 		int nsibling, err;
4383 
4384 		nsibling = 0;
4385 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
4386 			if (!gt->engine_class[class][inst])
4387 				continue;
4388 
4389 			siblings[nsibling++] = gt->engine_class[class][inst];
4390 		}
4391 		if (nsibling < 2)
4392 			continue;
4393 
4394 		err = reset_virtual_engine(gt, siblings, nsibling);
4395 		if (err)
4396 			return err;
4397 	}
4398 
4399 	return 0;
4400 }
4401 
4402 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4403 {
4404 	static const struct i915_subtest tests[] = {
4405 		SUBTEST(live_sanitycheck),
4406 		SUBTEST(live_unlite_switch),
4407 		SUBTEST(live_unlite_preempt),
4408 		SUBTEST(live_pin_rewind),
4409 		SUBTEST(live_hold_reset),
4410 		SUBTEST(live_error_interrupt),
4411 		SUBTEST(live_timeslice_preempt),
4412 		SUBTEST(live_timeslice_rewind),
4413 		SUBTEST(live_timeslice_queue),
4414 		SUBTEST(live_timeslice_nopreempt),
4415 		SUBTEST(live_busywait_preempt),
4416 		SUBTEST(live_preempt),
4417 		SUBTEST(live_late_preempt),
4418 		SUBTEST(live_nopreempt),
4419 		SUBTEST(live_preempt_cancel),
4420 		SUBTEST(live_suppress_self_preempt),
4421 		SUBTEST(live_suppress_wait_preempt),
4422 		SUBTEST(live_chain_preempt),
4423 		SUBTEST(live_preempt_gang),
4424 		SUBTEST(live_preempt_timeout),
4425 		SUBTEST(live_preempt_user),
4426 		SUBTEST(live_preempt_smoke),
4427 		SUBTEST(live_virtual_engine),
4428 		SUBTEST(live_virtual_mask),
4429 		SUBTEST(live_virtual_preserved),
4430 		SUBTEST(live_virtual_bond),
4431 		SUBTEST(live_virtual_reset),
4432 	};
4433 
4434 	if (!HAS_EXECLISTS(i915))
4435 		return 0;
4436 
4437 	if (intel_gt_is_wedged(&i915->gt))
4438 		return 0;
4439 
4440 	return intel_gt_live_subtests(tests, &i915->gt);
4441 }
4442 
4443 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
4444 {
4445 	const u32 offset =
4446 		i915_ggtt_offset(ce->engine->status_page.vma) +
4447 		offset_in_page(slot);
4448 	struct i915_request *rq;
4449 	u32 *cs;
4450 
4451 	rq = intel_context_create_request(ce);
4452 	if (IS_ERR(rq))
4453 		return PTR_ERR(rq);
4454 
4455 	cs = intel_ring_begin(rq, 4);
4456 	if (IS_ERR(cs)) {
4457 		i915_request_add(rq);
4458 		return PTR_ERR(cs);
4459 	}
4460 
4461 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
4462 	*cs++ = offset;
4463 	*cs++ = 0;
4464 	*cs++ = 1;
4465 
4466 	intel_ring_advance(rq, cs);
4467 
4468 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4469 	i915_request_add(rq);
4470 	return 0;
4471 }
4472 
4473 static int context_flush(struct intel_context *ce, long timeout)
4474 {
4475 	struct i915_request *rq;
4476 	struct dma_fence *fence;
4477 	int err = 0;
4478 
4479 	rq = intel_engine_create_kernel_request(ce->engine);
4480 	if (IS_ERR(rq))
4481 		return PTR_ERR(rq);
4482 
4483 	fence = i915_active_fence_get(&ce->timeline->last_request);
4484 	if (fence) {
4485 		i915_request_await_dma_fence(rq, fence);
4486 		dma_fence_put(fence);
4487 	}
4488 
4489 	rq = i915_request_get(rq);
4490 	i915_request_add(rq);
4491 	if (i915_request_wait(rq, 0, timeout) < 0)
4492 		err = -ETIME;
4493 	i915_request_put(rq);
4494 
4495 	rmb(); /* We know the request is written, make sure all state is too! */
4496 	return err;
4497 }
4498 
4499 static int live_lrc_layout(void *arg)
4500 {
4501 	struct intel_gt *gt = arg;
4502 	struct intel_engine_cs *engine;
4503 	enum intel_engine_id id;
4504 	u32 *lrc;
4505 	int err;
4506 
4507 	/*
4508 	 * Check the registers offsets we use to create the initial reg state
4509 	 * match the layout saved by HW.
4510 	 */
4511 
4512 	lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
4513 	if (!lrc)
4514 		return -ENOMEM;
4515 
4516 	err = 0;
4517 	for_each_engine(engine, gt, id) {
4518 		u32 *hw;
4519 		int dw;
4520 
4521 		if (!engine->default_state)
4522 			continue;
4523 
4524 		hw = shmem_pin_map(engine->default_state);
4525 		if (IS_ERR(hw)) {
4526 			err = PTR_ERR(hw);
4527 			break;
4528 		}
4529 		hw += LRC_STATE_OFFSET / sizeof(*hw);
4530 
4531 		execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
4532 					 engine->kernel_context,
4533 					 engine,
4534 					 engine->kernel_context->ring,
4535 					 true);
4536 
4537 		dw = 0;
4538 		do {
4539 			u32 lri = hw[dw];
4540 
4541 			if (lri == 0) {
4542 				dw++;
4543 				continue;
4544 			}
4545 
4546 			if (lrc[dw] == 0) {
4547 				pr_debug("%s: skipped instruction %x at dword %d\n",
4548 					 engine->name, lri, dw);
4549 				dw++;
4550 				continue;
4551 			}
4552 
4553 			if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4554 				pr_err("%s: Expected LRI command at dword %d, found %08x\n",
4555 				       engine->name, dw, lri);
4556 				err = -EINVAL;
4557 				break;
4558 			}
4559 
4560 			if (lrc[dw] != lri) {
4561 				pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
4562 				       engine->name, dw, lri, lrc[dw]);
4563 				err = -EINVAL;
4564 				break;
4565 			}
4566 
4567 			lri &= 0x7f;
4568 			lri++;
4569 			dw++;
4570 
4571 			while (lri) {
4572 				if (hw[dw] != lrc[dw]) {
4573 					pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
4574 					       engine->name, dw, hw[dw], lrc[dw]);
4575 					err = -EINVAL;
4576 					break;
4577 				}
4578 
4579 				/*
4580 				 * Skip over the actual register value as we
4581 				 * expect that to differ.
4582 				 */
4583 				dw += 2;
4584 				lri -= 2;
4585 			}
4586 		} while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
4587 
4588 		if (err) {
4589 			pr_info("%s: HW register image:\n", engine->name);
4590 			igt_hexdump(hw, PAGE_SIZE);
4591 
4592 			pr_info("%s: SW register image:\n", engine->name);
4593 			igt_hexdump(lrc, PAGE_SIZE);
4594 		}
4595 
4596 		shmem_unpin_map(engine->default_state, hw);
4597 		if (err)
4598 			break;
4599 	}
4600 
4601 	kfree(lrc);
4602 	return err;
4603 }
4604 
4605 static int find_offset(const u32 *lri, u32 offset)
4606 {
4607 	int i;
4608 
4609 	for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
4610 		if (lri[i] == offset)
4611 			return i;
4612 
4613 	return -1;
4614 }
4615 
4616 static int live_lrc_fixed(void *arg)
4617 {
4618 	struct intel_gt *gt = arg;
4619 	struct intel_engine_cs *engine;
4620 	enum intel_engine_id id;
4621 	int err = 0;
4622 
4623 	/*
4624 	 * Check the assumed register offsets match the actual locations in
4625 	 * the context image.
4626 	 */
4627 
4628 	for_each_engine(engine, gt, id) {
4629 		const struct {
4630 			u32 reg;
4631 			u32 offset;
4632 			const char *name;
4633 		} tbl[] = {
4634 			{
4635 				i915_mmio_reg_offset(RING_START(engine->mmio_base)),
4636 				CTX_RING_START - 1,
4637 				"RING_START"
4638 			},
4639 			{
4640 				i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
4641 				CTX_RING_CTL - 1,
4642 				"RING_CTL"
4643 			},
4644 			{
4645 				i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
4646 				CTX_RING_HEAD - 1,
4647 				"RING_HEAD"
4648 			},
4649 			{
4650 				i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
4651 				CTX_RING_TAIL - 1,
4652 				"RING_TAIL"
4653 			},
4654 			{
4655 				i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
4656 				lrc_ring_mi_mode(engine),
4657 				"RING_MI_MODE"
4658 			},
4659 			{
4660 				i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
4661 				CTX_BB_STATE - 1,
4662 				"BB_STATE"
4663 			},
4664 			{
4665 				i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
4666 				lrc_ring_wa_bb_per_ctx(engine),
4667 				"RING_BB_PER_CTX_PTR"
4668 			},
4669 			{
4670 				i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
4671 				lrc_ring_indirect_ptr(engine),
4672 				"RING_INDIRECT_CTX_PTR"
4673 			},
4674 			{
4675 				i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
4676 				lrc_ring_indirect_offset(engine),
4677 				"RING_INDIRECT_CTX_OFFSET"
4678 			},
4679 			{
4680 				i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
4681 				CTX_TIMESTAMP - 1,
4682 				"RING_CTX_TIMESTAMP"
4683 			},
4684 			{
4685 				i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
4686 				lrc_ring_gpr0(engine),
4687 				"RING_CS_GPR0"
4688 			},
4689 			{
4690 				i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
4691 				lrc_ring_cmd_buf_cctl(engine),
4692 				"RING_CMD_BUF_CCTL"
4693 			},
4694 			{ },
4695 		}, *t;
4696 		u32 *hw;
4697 
4698 		if (!engine->default_state)
4699 			continue;
4700 
4701 		hw = shmem_pin_map(engine->default_state);
4702 		if (IS_ERR(hw)) {
4703 			err = PTR_ERR(hw);
4704 			break;
4705 		}
4706 		hw += LRC_STATE_OFFSET / sizeof(*hw);
4707 
4708 		for (t = tbl; t->name; t++) {
4709 			int dw = find_offset(hw, t->reg);
4710 
4711 			if (dw != t->offset) {
4712 				pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
4713 				       engine->name,
4714 				       t->name,
4715 				       t->reg,
4716 				       dw,
4717 				       t->offset);
4718 				err = -EINVAL;
4719 			}
4720 		}
4721 
4722 		shmem_unpin_map(engine->default_state, hw);
4723 	}
4724 
4725 	return err;
4726 }
4727 
4728 static int __live_lrc_state(struct intel_engine_cs *engine,
4729 			    struct i915_vma *scratch)
4730 {
4731 	struct intel_context *ce;
4732 	struct i915_request *rq;
4733 	enum {
4734 		RING_START_IDX = 0,
4735 		RING_TAIL_IDX,
4736 		MAX_IDX
4737 	};
4738 	u32 expected[MAX_IDX];
4739 	u32 *cs;
4740 	int err;
4741 	int n;
4742 
4743 	ce = intel_context_create(engine);
4744 	if (IS_ERR(ce))
4745 		return PTR_ERR(ce);
4746 
4747 	err = intel_context_pin(ce);
4748 	if (err)
4749 		goto err_put;
4750 
4751 	rq = i915_request_create(ce);
4752 	if (IS_ERR(rq)) {
4753 		err = PTR_ERR(rq);
4754 		goto err_unpin;
4755 	}
4756 
4757 	cs = intel_ring_begin(rq, 4 * MAX_IDX);
4758 	if (IS_ERR(cs)) {
4759 		err = PTR_ERR(cs);
4760 		i915_request_add(rq);
4761 		goto err_unpin;
4762 	}
4763 
4764 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4765 	*cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
4766 	*cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
4767 	*cs++ = 0;
4768 
4769 	expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
4770 
4771 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4772 	*cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
4773 	*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
4774 	*cs++ = 0;
4775 
4776 	i915_vma_lock(scratch);
4777 	err = i915_request_await_object(rq, scratch->obj, true);
4778 	if (!err)
4779 		err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
4780 	i915_vma_unlock(scratch);
4781 
4782 	i915_request_get(rq);
4783 	i915_request_add(rq);
4784 	if (err)
4785 		goto err_rq;
4786 
4787 	intel_engine_flush_submission(engine);
4788 	expected[RING_TAIL_IDX] = ce->ring->tail;
4789 
4790 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4791 		err = -ETIME;
4792 		goto err_rq;
4793 	}
4794 
4795 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4796 	if (IS_ERR(cs)) {
4797 		err = PTR_ERR(cs);
4798 		goto err_rq;
4799 	}
4800 
4801 	for (n = 0; n < MAX_IDX; n++) {
4802 		if (cs[n] != expected[n]) {
4803 			pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
4804 			       engine->name, n, cs[n], expected[n]);
4805 			err = -EINVAL;
4806 			break;
4807 		}
4808 	}
4809 
4810 	i915_gem_object_unpin_map(scratch->obj);
4811 
4812 err_rq:
4813 	i915_request_put(rq);
4814 err_unpin:
4815 	intel_context_unpin(ce);
4816 err_put:
4817 	intel_context_put(ce);
4818 	return err;
4819 }
4820 
4821 static int live_lrc_state(void *arg)
4822 {
4823 	struct intel_gt *gt = arg;
4824 	struct intel_engine_cs *engine;
4825 	struct i915_vma *scratch;
4826 	enum intel_engine_id id;
4827 	int err = 0;
4828 
4829 	/*
4830 	 * Check the live register state matches what we expect for this
4831 	 * intel_context.
4832 	 */
4833 
4834 	scratch = create_scratch(gt);
4835 	if (IS_ERR(scratch))
4836 		return PTR_ERR(scratch);
4837 
4838 	for_each_engine(engine, gt, id) {
4839 		err = __live_lrc_state(engine, scratch);
4840 		if (err)
4841 			break;
4842 	}
4843 
4844 	if (igt_flush_test(gt->i915))
4845 		err = -EIO;
4846 
4847 	i915_vma_unpin_and_release(&scratch, 0);
4848 	return err;
4849 }
4850 
4851 static int gpr_make_dirty(struct intel_context *ce)
4852 {
4853 	struct i915_request *rq;
4854 	u32 *cs;
4855 	int n;
4856 
4857 	rq = intel_context_create_request(ce);
4858 	if (IS_ERR(rq))
4859 		return PTR_ERR(rq);
4860 
4861 	cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
4862 	if (IS_ERR(cs)) {
4863 		i915_request_add(rq);
4864 		return PTR_ERR(cs);
4865 	}
4866 
4867 	*cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
4868 	for (n = 0; n < NUM_GPR_DW; n++) {
4869 		*cs++ = CS_GPR(ce->engine, n);
4870 		*cs++ = STACK_MAGIC;
4871 	}
4872 	*cs++ = MI_NOOP;
4873 
4874 	intel_ring_advance(rq, cs);
4875 
4876 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4877 	i915_request_add(rq);
4878 
4879 	return 0;
4880 }
4881 
4882 static struct i915_request *
4883 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
4884 {
4885 	const u32 offset =
4886 		i915_ggtt_offset(ce->engine->status_page.vma) +
4887 		offset_in_page(slot);
4888 	struct i915_request *rq;
4889 	u32 *cs;
4890 	int err;
4891 	int n;
4892 
4893 	rq = intel_context_create_request(ce);
4894 	if (IS_ERR(rq))
4895 		return rq;
4896 
4897 	cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
4898 	if (IS_ERR(cs)) {
4899 		i915_request_add(rq);
4900 		return ERR_CAST(cs);
4901 	}
4902 
4903 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4904 	*cs++ = MI_NOOP;
4905 
4906 	*cs++ = MI_SEMAPHORE_WAIT |
4907 		MI_SEMAPHORE_GLOBAL_GTT |
4908 		MI_SEMAPHORE_POLL |
4909 		MI_SEMAPHORE_SAD_NEQ_SDD;
4910 	*cs++ = 0;
4911 	*cs++ = offset;
4912 	*cs++ = 0;
4913 
4914 	for (n = 0; n < NUM_GPR_DW; n++) {
4915 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4916 		*cs++ = CS_GPR(ce->engine, n);
4917 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4918 		*cs++ = 0;
4919 	}
4920 
4921 	i915_vma_lock(scratch);
4922 	err = i915_request_await_object(rq, scratch->obj, true);
4923 	if (!err)
4924 		err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
4925 	i915_vma_unlock(scratch);
4926 
4927 	i915_request_get(rq);
4928 	i915_request_add(rq);
4929 	if (err) {
4930 		i915_request_put(rq);
4931 		rq = ERR_PTR(err);
4932 	}
4933 
4934 	return rq;
4935 }
4936 
4937 static int __live_lrc_gpr(struct intel_engine_cs *engine,
4938 			  struct i915_vma *scratch,
4939 			  bool preempt)
4940 {
4941 	u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
4942 	struct intel_context *ce;
4943 	struct i915_request *rq;
4944 	u32 *cs;
4945 	int err;
4946 	int n;
4947 
4948 	if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
4949 		return 0; /* GPR only on rcs0 for gen8 */
4950 
4951 	err = gpr_make_dirty(engine->kernel_context);
4952 	if (err)
4953 		return err;
4954 
4955 	ce = intel_context_create(engine);
4956 	if (IS_ERR(ce))
4957 		return PTR_ERR(ce);
4958 
4959 	rq = __gpr_read(ce, scratch, slot);
4960 	if (IS_ERR(rq)) {
4961 		err = PTR_ERR(rq);
4962 		goto err_put;
4963 	}
4964 
4965 	err = wait_for_submit(engine, rq, HZ / 2);
4966 	if (err)
4967 		goto err_rq;
4968 
4969 	if (preempt) {
4970 		err = gpr_make_dirty(engine->kernel_context);
4971 		if (err)
4972 			goto err_rq;
4973 
4974 		err = emit_semaphore_signal(engine->kernel_context, slot);
4975 		if (err)
4976 			goto err_rq;
4977 	} else {
4978 		slot[0] = 1;
4979 		wmb();
4980 	}
4981 
4982 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4983 		err = -ETIME;
4984 		goto err_rq;
4985 	}
4986 
4987 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4988 	if (IS_ERR(cs)) {
4989 		err = PTR_ERR(cs);
4990 		goto err_rq;
4991 	}
4992 
4993 	for (n = 0; n < NUM_GPR_DW; n++) {
4994 		if (cs[n]) {
4995 			pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
4996 			       engine->name,
4997 			       n / 2, n & 1 ? "udw" : "ldw",
4998 			       cs[n]);
4999 			err = -EINVAL;
5000 			break;
5001 		}
5002 	}
5003 
5004 	i915_gem_object_unpin_map(scratch->obj);
5005 
5006 err_rq:
5007 	memset32(&slot[0], -1, 4);
5008 	wmb();
5009 	i915_request_put(rq);
5010 err_put:
5011 	intel_context_put(ce);
5012 	return err;
5013 }
5014 
5015 static int live_lrc_gpr(void *arg)
5016 {
5017 	struct intel_gt *gt = arg;
5018 	struct intel_engine_cs *engine;
5019 	struct i915_vma *scratch;
5020 	enum intel_engine_id id;
5021 	int err = 0;
5022 
5023 	/*
5024 	 * Check that GPR registers are cleared in new contexts as we need
5025 	 * to avoid leaking any information from previous contexts.
5026 	 */
5027 
5028 	scratch = create_scratch(gt);
5029 	if (IS_ERR(scratch))
5030 		return PTR_ERR(scratch);
5031 
5032 	for_each_engine(engine, gt, id) {
5033 		engine_heartbeat_disable(engine);
5034 
5035 		err = __live_lrc_gpr(engine, scratch, false);
5036 		if (err)
5037 			goto err;
5038 
5039 		err = __live_lrc_gpr(engine, scratch, true);
5040 		if (err)
5041 			goto err;
5042 
5043 err:
5044 		engine_heartbeat_enable(engine);
5045 		if (igt_flush_test(gt->i915))
5046 			err = -EIO;
5047 		if (err)
5048 			break;
5049 	}
5050 
5051 	i915_vma_unpin_and_release(&scratch, 0);
5052 	return err;
5053 }
5054 
5055 static struct i915_request *
5056 create_timestamp(struct intel_context *ce, void *slot, int idx)
5057 {
5058 	const u32 offset =
5059 		i915_ggtt_offset(ce->engine->status_page.vma) +
5060 		offset_in_page(slot);
5061 	struct i915_request *rq;
5062 	u32 *cs;
5063 	int err;
5064 
5065 	rq = intel_context_create_request(ce);
5066 	if (IS_ERR(rq))
5067 		return rq;
5068 
5069 	cs = intel_ring_begin(rq, 10);
5070 	if (IS_ERR(cs)) {
5071 		err = PTR_ERR(cs);
5072 		goto err;
5073 	}
5074 
5075 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5076 	*cs++ = MI_NOOP;
5077 
5078 	*cs++ = MI_SEMAPHORE_WAIT |
5079 		MI_SEMAPHORE_GLOBAL_GTT |
5080 		MI_SEMAPHORE_POLL |
5081 		MI_SEMAPHORE_SAD_NEQ_SDD;
5082 	*cs++ = 0;
5083 	*cs++ = offset;
5084 	*cs++ = 0;
5085 
5086 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5087 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
5088 	*cs++ = offset + idx * sizeof(u32);
5089 	*cs++ = 0;
5090 
5091 	intel_ring_advance(rq, cs);
5092 
5093 	rq->sched.attr.priority = I915_PRIORITY_MASK;
5094 	err = 0;
5095 err:
5096 	i915_request_get(rq);
5097 	i915_request_add(rq);
5098 	if (err) {
5099 		i915_request_put(rq);
5100 		return ERR_PTR(err);
5101 	}
5102 
5103 	return rq;
5104 }
5105 
5106 struct lrc_timestamp {
5107 	struct intel_engine_cs *engine;
5108 	struct intel_context *ce[2];
5109 	u32 poison;
5110 };
5111 
5112 static bool timestamp_advanced(u32 start, u32 end)
5113 {
5114 	return (s32)(end - start) > 0;
5115 }
5116 
5117 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
5118 {
5119 	u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
5120 	struct i915_request *rq;
5121 	u32 timestamp;
5122 	int err = 0;
5123 
5124 	arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
5125 	rq = create_timestamp(arg->ce[0], slot, 1);
5126 	if (IS_ERR(rq))
5127 		return PTR_ERR(rq);
5128 
5129 	err = wait_for_submit(rq->engine, rq, HZ / 2);
5130 	if (err)
5131 		goto err;
5132 
5133 	if (preempt) {
5134 		arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
5135 		err = emit_semaphore_signal(arg->ce[1], slot);
5136 		if (err)
5137 			goto err;
5138 	} else {
5139 		slot[0] = 1;
5140 		wmb();
5141 	}
5142 
5143 	/* And wait for switch to kernel (to save our context to memory) */
5144 	err = context_flush(arg->ce[0], HZ / 2);
5145 	if (err)
5146 		goto err;
5147 
5148 	if (!timestamp_advanced(arg->poison, slot[1])) {
5149 		pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
5150 		       arg->engine->name, preempt ? "preempt" : "simple",
5151 		       arg->poison, slot[1]);
5152 		err = -EINVAL;
5153 	}
5154 
5155 	timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
5156 	if (!timestamp_advanced(slot[1], timestamp)) {
5157 		pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
5158 		       arg->engine->name, preempt ? "preempt" : "simple",
5159 		       slot[1], timestamp);
5160 		err = -EINVAL;
5161 	}
5162 
5163 err:
5164 	memset32(slot, -1, 4);
5165 	i915_request_put(rq);
5166 	return err;
5167 }
5168 
5169 static int live_lrc_timestamp(void *arg)
5170 {
5171 	struct lrc_timestamp data = {};
5172 	struct intel_gt *gt = arg;
5173 	enum intel_engine_id id;
5174 	const u32 poison[] = {
5175 		0,
5176 		S32_MAX,
5177 		(u32)S32_MAX + 1,
5178 		U32_MAX,
5179 	};
5180 
5181 	/*
5182 	 * We want to verify that the timestamp is saved and restore across
5183 	 * context switches and is monotonic.
5184 	 *
5185 	 * So we do this with a little bit of LRC poisoning to check various
5186 	 * boundary conditions, and see what happens if we preempt the context
5187 	 * with a second request (carrying more poison into the timestamp).
5188 	 */
5189 
5190 	for_each_engine(data.engine, gt, id) {
5191 		int i, err = 0;
5192 
5193 		engine_heartbeat_disable(data.engine);
5194 
5195 		for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
5196 			struct intel_context *tmp;
5197 
5198 			tmp = intel_context_create(data.engine);
5199 			if (IS_ERR(tmp)) {
5200 				err = PTR_ERR(tmp);
5201 				goto err;
5202 			}
5203 
5204 			err = intel_context_pin(tmp);
5205 			if (err) {
5206 				intel_context_put(tmp);
5207 				goto err;
5208 			}
5209 
5210 			data.ce[i] = tmp;
5211 		}
5212 
5213 		for (i = 0; i < ARRAY_SIZE(poison); i++) {
5214 			data.poison = poison[i];
5215 
5216 			err = __lrc_timestamp(&data, false);
5217 			if (err)
5218 				break;
5219 
5220 			err = __lrc_timestamp(&data, true);
5221 			if (err)
5222 				break;
5223 		}
5224 
5225 err:
5226 		engine_heartbeat_enable(data.engine);
5227 		for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
5228 			if (!data.ce[i])
5229 				break;
5230 
5231 			intel_context_unpin(data.ce[i]);
5232 			intel_context_put(data.ce[i]);
5233 		}
5234 
5235 		if (igt_flush_test(gt->i915))
5236 			err = -EIO;
5237 		if (err)
5238 			return err;
5239 	}
5240 
5241 	return 0;
5242 }
5243 
5244 static struct i915_vma *
5245 create_user_vma(struct i915_address_space *vm, unsigned long size)
5246 {
5247 	struct drm_i915_gem_object *obj;
5248 	struct i915_vma *vma;
5249 	int err;
5250 
5251 	obj = i915_gem_object_create_internal(vm->i915, size);
5252 	if (IS_ERR(obj))
5253 		return ERR_CAST(obj);
5254 
5255 	vma = i915_vma_instance(obj, vm, NULL);
5256 	if (IS_ERR(vma)) {
5257 		i915_gem_object_put(obj);
5258 		return vma;
5259 	}
5260 
5261 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
5262 	if (err) {
5263 		i915_gem_object_put(obj);
5264 		return ERR_PTR(err);
5265 	}
5266 
5267 	return vma;
5268 }
5269 
5270 static struct i915_vma *
5271 store_context(struct intel_context *ce, struct i915_vma *scratch)
5272 {
5273 	struct i915_vma *batch;
5274 	u32 dw, x, *cs, *hw;
5275 	u32 *defaults;
5276 
5277 	batch = create_user_vma(ce->vm, SZ_64K);
5278 	if (IS_ERR(batch))
5279 		return batch;
5280 
5281 	cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
5282 	if (IS_ERR(cs)) {
5283 		i915_vma_put(batch);
5284 		return ERR_CAST(cs);
5285 	}
5286 
5287 	defaults = shmem_pin_map(ce->engine->default_state);
5288 	if (!defaults) {
5289 		i915_gem_object_unpin_map(batch->obj);
5290 		i915_vma_put(batch);
5291 		return ERR_PTR(-ENOMEM);
5292 	}
5293 
5294 	x = 0;
5295 	dw = 0;
5296 	hw = defaults;
5297 	hw += LRC_STATE_OFFSET / sizeof(*hw);
5298 	do {
5299 		u32 len = hw[dw] & 0x7f;
5300 
5301 		if (hw[dw] == 0) {
5302 			dw++;
5303 			continue;
5304 		}
5305 
5306 		if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5307 			dw += len + 2;
5308 			continue;
5309 		}
5310 
5311 		dw++;
5312 		len = (len + 1) / 2;
5313 		while (len--) {
5314 			*cs++ = MI_STORE_REGISTER_MEM_GEN8;
5315 			*cs++ = hw[dw];
5316 			*cs++ = lower_32_bits(scratch->node.start + x);
5317 			*cs++ = upper_32_bits(scratch->node.start + x);
5318 
5319 			dw += 2;
5320 			x += 4;
5321 		}
5322 	} while (dw < PAGE_SIZE / sizeof(u32) &&
5323 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5324 
5325 	*cs++ = MI_BATCH_BUFFER_END;
5326 
5327 	shmem_unpin_map(ce->engine->default_state, defaults);
5328 
5329 	i915_gem_object_flush_map(batch->obj);
5330 	i915_gem_object_unpin_map(batch->obj);
5331 
5332 	return batch;
5333 }
5334 
5335 static int move_to_active(struct i915_request *rq,
5336 			  struct i915_vma *vma,
5337 			  unsigned int flags)
5338 {
5339 	int err;
5340 
5341 	i915_vma_lock(vma);
5342 	err = i915_request_await_object(rq, vma->obj, flags);
5343 	if (!err)
5344 		err = i915_vma_move_to_active(vma, rq, flags);
5345 	i915_vma_unlock(vma);
5346 
5347 	return err;
5348 }
5349 
5350 static struct i915_request *
5351 record_registers(struct intel_context *ce,
5352 		 struct i915_vma *before,
5353 		 struct i915_vma *after,
5354 		 u32 *sema)
5355 {
5356 	struct i915_vma *b_before, *b_after;
5357 	struct i915_request *rq;
5358 	u32 *cs;
5359 	int err;
5360 
5361 	b_before = store_context(ce, before);
5362 	if (IS_ERR(b_before))
5363 		return ERR_CAST(b_before);
5364 
5365 	b_after = store_context(ce, after);
5366 	if (IS_ERR(b_after)) {
5367 		rq = ERR_CAST(b_after);
5368 		goto err_before;
5369 	}
5370 
5371 	rq = intel_context_create_request(ce);
5372 	if (IS_ERR(rq))
5373 		goto err_after;
5374 
5375 	err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
5376 	if (err)
5377 		goto err_rq;
5378 
5379 	err = move_to_active(rq, b_before, 0);
5380 	if (err)
5381 		goto err_rq;
5382 
5383 	err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
5384 	if (err)
5385 		goto err_rq;
5386 
5387 	err = move_to_active(rq, b_after, 0);
5388 	if (err)
5389 		goto err_rq;
5390 
5391 	cs = intel_ring_begin(rq, 14);
5392 	if (IS_ERR(cs)) {
5393 		err = PTR_ERR(cs);
5394 		goto err_rq;
5395 	}
5396 
5397 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5398 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5399 	*cs++ = lower_32_bits(b_before->node.start);
5400 	*cs++ = upper_32_bits(b_before->node.start);
5401 
5402 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5403 	*cs++ = MI_SEMAPHORE_WAIT |
5404 		MI_SEMAPHORE_GLOBAL_GTT |
5405 		MI_SEMAPHORE_POLL |
5406 		MI_SEMAPHORE_SAD_NEQ_SDD;
5407 	*cs++ = 0;
5408 	*cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5409 		offset_in_page(sema);
5410 	*cs++ = 0;
5411 	*cs++ = MI_NOOP;
5412 
5413 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5414 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5415 	*cs++ = lower_32_bits(b_after->node.start);
5416 	*cs++ = upper_32_bits(b_after->node.start);
5417 
5418 	intel_ring_advance(rq, cs);
5419 
5420 	WRITE_ONCE(*sema, 0);
5421 	i915_request_get(rq);
5422 	i915_request_add(rq);
5423 err_after:
5424 	i915_vma_put(b_after);
5425 err_before:
5426 	i915_vma_put(b_before);
5427 	return rq;
5428 
5429 err_rq:
5430 	i915_request_add(rq);
5431 	rq = ERR_PTR(err);
5432 	goto err_after;
5433 }
5434 
5435 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
5436 {
5437 	struct i915_vma *batch;
5438 	u32 dw, *cs, *hw;
5439 	u32 *defaults;
5440 
5441 	batch = create_user_vma(ce->vm, SZ_64K);
5442 	if (IS_ERR(batch))
5443 		return batch;
5444 
5445 	cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
5446 	if (IS_ERR(cs)) {
5447 		i915_vma_put(batch);
5448 		return ERR_CAST(cs);
5449 	}
5450 
5451 	defaults = shmem_pin_map(ce->engine->default_state);
5452 	if (!defaults) {
5453 		i915_gem_object_unpin_map(batch->obj);
5454 		i915_vma_put(batch);
5455 		return ERR_PTR(-ENOMEM);
5456 	}
5457 
5458 	dw = 0;
5459 	hw = defaults;
5460 	hw += LRC_STATE_OFFSET / sizeof(*hw);
5461 	do {
5462 		u32 len = hw[dw] & 0x7f;
5463 
5464 		if (hw[dw] == 0) {
5465 			dw++;
5466 			continue;
5467 		}
5468 
5469 		if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5470 			dw += len + 2;
5471 			continue;
5472 		}
5473 
5474 		dw++;
5475 		len = (len + 1) / 2;
5476 		*cs++ = MI_LOAD_REGISTER_IMM(len);
5477 		while (len--) {
5478 			*cs++ = hw[dw];
5479 			*cs++ = poison;
5480 			dw += 2;
5481 		}
5482 	} while (dw < PAGE_SIZE / sizeof(u32) &&
5483 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5484 
5485 	*cs++ = MI_BATCH_BUFFER_END;
5486 
5487 	shmem_unpin_map(ce->engine->default_state, defaults);
5488 
5489 	i915_gem_object_flush_map(batch->obj);
5490 	i915_gem_object_unpin_map(batch->obj);
5491 
5492 	return batch;
5493 }
5494 
5495 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
5496 {
5497 	struct i915_request *rq;
5498 	struct i915_vma *batch;
5499 	u32 *cs;
5500 	int err;
5501 
5502 	batch = load_context(ce, poison);
5503 	if (IS_ERR(batch))
5504 		return PTR_ERR(batch);
5505 
5506 	rq = intel_context_create_request(ce);
5507 	if (IS_ERR(rq)) {
5508 		err = PTR_ERR(rq);
5509 		goto err_batch;
5510 	}
5511 
5512 	err = move_to_active(rq, batch, 0);
5513 	if (err)
5514 		goto err_rq;
5515 
5516 	cs = intel_ring_begin(rq, 8);
5517 	if (IS_ERR(cs)) {
5518 		err = PTR_ERR(cs);
5519 		goto err_rq;
5520 	}
5521 
5522 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5523 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5524 	*cs++ = lower_32_bits(batch->node.start);
5525 	*cs++ = upper_32_bits(batch->node.start);
5526 
5527 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
5528 	*cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5529 		offset_in_page(sema);
5530 	*cs++ = 0;
5531 	*cs++ = 1;
5532 
5533 	intel_ring_advance(rq, cs);
5534 
5535 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
5536 err_rq:
5537 	i915_request_add(rq);
5538 err_batch:
5539 	i915_vma_put(batch);
5540 	return err;
5541 }
5542 
5543 static bool is_moving(u32 a, u32 b)
5544 {
5545 	return a != b;
5546 }
5547 
5548 static int compare_isolation(struct intel_engine_cs *engine,
5549 			     struct i915_vma *ref[2],
5550 			     struct i915_vma *result[2],
5551 			     struct intel_context *ce,
5552 			     u32 poison)
5553 {
5554 	u32 x, dw, *hw, *lrc;
5555 	u32 *A[2], *B[2];
5556 	u32 *defaults;
5557 	int err = 0;
5558 
5559 	A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
5560 	if (IS_ERR(A[0]))
5561 		return PTR_ERR(A[0]);
5562 
5563 	A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC);
5564 	if (IS_ERR(A[1])) {
5565 		err = PTR_ERR(A[1]);
5566 		goto err_A0;
5567 	}
5568 
5569 	B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC);
5570 	if (IS_ERR(B[0])) {
5571 		err = PTR_ERR(B[0]);
5572 		goto err_A1;
5573 	}
5574 
5575 	B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC);
5576 	if (IS_ERR(B[1])) {
5577 		err = PTR_ERR(B[1]);
5578 		goto err_B0;
5579 	}
5580 
5581 	lrc = i915_gem_object_pin_map(ce->state->obj,
5582 				      i915_coherent_map_type(engine->i915));
5583 	if (IS_ERR(lrc)) {
5584 		err = PTR_ERR(lrc);
5585 		goto err_B1;
5586 	}
5587 	lrc += LRC_STATE_OFFSET / sizeof(*hw);
5588 
5589 	defaults = shmem_pin_map(ce->engine->default_state);
5590 	if (!defaults) {
5591 		err = -ENOMEM;
5592 		goto err_lrc;
5593 	}
5594 
5595 	x = 0;
5596 	dw = 0;
5597 	hw = defaults;
5598 	hw += LRC_STATE_OFFSET / sizeof(*hw);
5599 	do {
5600 		u32 len = hw[dw] & 0x7f;
5601 
5602 		if (hw[dw] == 0) {
5603 			dw++;
5604 			continue;
5605 		}
5606 
5607 		if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5608 			dw += len + 2;
5609 			continue;
5610 		}
5611 
5612 		dw++;
5613 		len = (len + 1) / 2;
5614 		while (len--) {
5615 			if (!is_moving(A[0][x], A[1][x]) &&
5616 			    (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
5617 				switch (hw[dw] & 4095) {
5618 				case 0x30: /* RING_HEAD */
5619 				case 0x34: /* RING_TAIL */
5620 					break;
5621 
5622 				default:
5623 					pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
5624 					       engine->name, dw,
5625 					       hw[dw], hw[dw + 1],
5626 					       A[0][x], B[0][x], B[1][x],
5627 					       poison, lrc[dw + 1]);
5628 					err = -EINVAL;
5629 				}
5630 			}
5631 			dw += 2;
5632 			x++;
5633 		}
5634 	} while (dw < PAGE_SIZE / sizeof(u32) &&
5635 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5636 
5637 	shmem_unpin_map(ce->engine->default_state, defaults);
5638 err_lrc:
5639 	i915_gem_object_unpin_map(ce->state->obj);
5640 err_B1:
5641 	i915_gem_object_unpin_map(result[1]->obj);
5642 err_B0:
5643 	i915_gem_object_unpin_map(result[0]->obj);
5644 err_A1:
5645 	i915_gem_object_unpin_map(ref[1]->obj);
5646 err_A0:
5647 	i915_gem_object_unpin_map(ref[0]->obj);
5648 	return err;
5649 }
5650 
5651 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
5652 {
5653 	u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
5654 	struct i915_vma *ref[2], *result[2];
5655 	struct intel_context *A, *B;
5656 	struct i915_request *rq;
5657 	int err;
5658 
5659 	A = intel_context_create(engine);
5660 	if (IS_ERR(A))
5661 		return PTR_ERR(A);
5662 
5663 	B = intel_context_create(engine);
5664 	if (IS_ERR(B)) {
5665 		err = PTR_ERR(B);
5666 		goto err_A;
5667 	}
5668 
5669 	ref[0] = create_user_vma(A->vm, SZ_64K);
5670 	if (IS_ERR(ref[0])) {
5671 		err = PTR_ERR(ref[0]);
5672 		goto err_B;
5673 	}
5674 
5675 	ref[1] = create_user_vma(A->vm, SZ_64K);
5676 	if (IS_ERR(ref[1])) {
5677 		err = PTR_ERR(ref[1]);
5678 		goto err_ref0;
5679 	}
5680 
5681 	rq = record_registers(A, ref[0], ref[1], sema);
5682 	if (IS_ERR(rq)) {
5683 		err = PTR_ERR(rq);
5684 		goto err_ref1;
5685 	}
5686 
5687 	WRITE_ONCE(*sema, 1);
5688 	wmb();
5689 
5690 	if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5691 		i915_request_put(rq);
5692 		err = -ETIME;
5693 		goto err_ref1;
5694 	}
5695 	i915_request_put(rq);
5696 
5697 	result[0] = create_user_vma(A->vm, SZ_64K);
5698 	if (IS_ERR(result[0])) {
5699 		err = PTR_ERR(result[0]);
5700 		goto err_ref1;
5701 	}
5702 
5703 	result[1] = create_user_vma(A->vm, SZ_64K);
5704 	if (IS_ERR(result[1])) {
5705 		err = PTR_ERR(result[1]);
5706 		goto err_result0;
5707 	}
5708 
5709 	rq = record_registers(A, result[0], result[1], sema);
5710 	if (IS_ERR(rq)) {
5711 		err = PTR_ERR(rq);
5712 		goto err_result1;
5713 	}
5714 
5715 	err = poison_registers(B, poison, sema);
5716 	if (err) {
5717 		WRITE_ONCE(*sema, -1);
5718 		i915_request_put(rq);
5719 		goto err_result1;
5720 	}
5721 
5722 	if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5723 		i915_request_put(rq);
5724 		err = -ETIME;
5725 		goto err_result1;
5726 	}
5727 	i915_request_put(rq);
5728 
5729 	err = compare_isolation(engine, ref, result, A, poison);
5730 
5731 err_result1:
5732 	i915_vma_put(result[1]);
5733 err_result0:
5734 	i915_vma_put(result[0]);
5735 err_ref1:
5736 	i915_vma_put(ref[1]);
5737 err_ref0:
5738 	i915_vma_put(ref[0]);
5739 err_B:
5740 	intel_context_put(B);
5741 err_A:
5742 	intel_context_put(A);
5743 	return err;
5744 }
5745 
5746 static bool skip_isolation(const struct intel_engine_cs *engine)
5747 {
5748 	if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9)
5749 		return true;
5750 
5751 	if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11)
5752 		return true;
5753 
5754 	return false;
5755 }
5756 
5757 static int live_lrc_isolation(void *arg)
5758 {
5759 	struct intel_gt *gt = arg;
5760 	struct intel_engine_cs *engine;
5761 	enum intel_engine_id id;
5762 	const u32 poison[] = {
5763 		STACK_MAGIC,
5764 		0x3a3a3a3a,
5765 		0x5c5c5c5c,
5766 		0xffffffff,
5767 		0xffff0000,
5768 	};
5769 	int err = 0;
5770 
5771 	/*
5772 	 * Our goal is try and verify that per-context state cannot be
5773 	 * tampered with by another non-privileged client.
5774 	 *
5775 	 * We take the list of context registers from the LRI in the default
5776 	 * context image and attempt to modify that list from a remote context.
5777 	 */
5778 
5779 	for_each_engine(engine, gt, id) {
5780 		int i;
5781 
5782 		/* Just don't even ask */
5783 		if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
5784 		    skip_isolation(engine))
5785 			continue;
5786 
5787 		intel_engine_pm_get(engine);
5788 		for (i = 0; i < ARRAY_SIZE(poison); i++) {
5789 			int result;
5790 
5791 			result = __lrc_isolation(engine, poison[i]);
5792 			if (result && !err)
5793 				err = result;
5794 
5795 			result = __lrc_isolation(engine, ~poison[i]);
5796 			if (result && !err)
5797 				err = result;
5798 		}
5799 		intel_engine_pm_put(engine);
5800 		if (igt_flush_test(gt->i915)) {
5801 			err = -EIO;
5802 			break;
5803 		}
5804 	}
5805 
5806 	return err;
5807 }
5808 
5809 static int indirect_ctx_submit_req(struct intel_context *ce)
5810 {
5811 	struct i915_request *rq;
5812 	int err = 0;
5813 
5814 	rq = intel_context_create_request(ce);
5815 	if (IS_ERR(rq))
5816 		return PTR_ERR(rq);
5817 
5818 	i915_request_get(rq);
5819 	i915_request_add(rq);
5820 
5821 	if (i915_request_wait(rq, 0, HZ / 5) < 0)
5822 		err = -ETIME;
5823 
5824 	i915_request_put(rq);
5825 
5826 	return err;
5827 }
5828 
5829 #define CTX_BB_CANARY_OFFSET (3 * 1024)
5830 #define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET / sizeof(u32))
5831 
5832 static u32 *
5833 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
5834 {
5835 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 |
5836 		MI_SRM_LRM_GLOBAL_GTT |
5837 		MI_LRI_LRM_CS_MMIO;
5838 	*cs++ = i915_mmio_reg_offset(RING_START(0));
5839 	*cs++ = i915_ggtt_offset(ce->state) +
5840 		context_wa_bb_offset(ce) +
5841 		CTX_BB_CANARY_OFFSET;
5842 	*cs++ = 0;
5843 
5844 	return cs;
5845 }
5846 
5847 static void
5848 indirect_ctx_bb_setup(struct intel_context *ce)
5849 {
5850 	u32 *cs = context_indirect_bb(ce);
5851 
5852 	cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
5853 
5854 	setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
5855 }
5856 
5857 static bool check_ring_start(struct intel_context *ce)
5858 {
5859 	const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
5860 		LRC_STATE_OFFSET + context_wa_bb_offset(ce);
5861 
5862 	if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
5863 		return true;
5864 
5865 	pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
5866 	       ctx_bb[CTX_BB_CANARY_INDEX],
5867 	       ce->lrc_reg_state[CTX_RING_START]);
5868 
5869 	return false;
5870 }
5871 
5872 static int indirect_ctx_bb_check(struct intel_context *ce)
5873 {
5874 	int err;
5875 
5876 	err = indirect_ctx_submit_req(ce);
5877 	if (err)
5878 		return err;
5879 
5880 	if (!check_ring_start(ce))
5881 		return -EINVAL;
5882 
5883 	return 0;
5884 }
5885 
5886 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
5887 {
5888 	struct intel_context *a, *b;
5889 	int err;
5890 
5891 	a = intel_context_create(engine);
5892 	if (IS_ERR(a))
5893 		return PTR_ERR(a);
5894 	err = intel_context_pin(a);
5895 	if (err)
5896 		goto put_a;
5897 
5898 	b = intel_context_create(engine);
5899 	if (IS_ERR(b)) {
5900 		err = PTR_ERR(b);
5901 		goto unpin_a;
5902 	}
5903 	err = intel_context_pin(b);
5904 	if (err)
5905 		goto put_b;
5906 
5907 	/* We use the already reserved extra page in context state */
5908 	if (!a->wa_bb_page) {
5909 		GEM_BUG_ON(b->wa_bb_page);
5910 		GEM_BUG_ON(INTEL_GEN(engine->i915) == 12);
5911 		goto unpin_b;
5912 	}
5913 
5914 	/*
5915 	 * In order to test that our per context bb is truly per context,
5916 	 * and executes at the intended spot on context restoring process,
5917 	 * make the batch store the ring start value to memory.
5918 	 * As ring start is restored apriori of starting the indirect ctx bb and
5919 	 * as it will be different for each context, it fits to this purpose.
5920 	 */
5921 	indirect_ctx_bb_setup(a);
5922 	indirect_ctx_bb_setup(b);
5923 
5924 	err = indirect_ctx_bb_check(a);
5925 	if (err)
5926 		goto unpin_b;
5927 
5928 	err = indirect_ctx_bb_check(b);
5929 
5930 unpin_b:
5931 	intel_context_unpin(b);
5932 put_b:
5933 	intel_context_put(b);
5934 unpin_a:
5935 	intel_context_unpin(a);
5936 put_a:
5937 	intel_context_put(a);
5938 
5939 	return err;
5940 }
5941 
5942 static int live_lrc_indirect_ctx_bb(void *arg)
5943 {
5944 	struct intel_gt *gt = arg;
5945 	struct intel_engine_cs *engine;
5946 	enum intel_engine_id id;
5947 	int err = 0;
5948 
5949 	for_each_engine(engine, gt, id) {
5950 		intel_engine_pm_get(engine);
5951 		err = __live_lrc_indirect_ctx_bb(engine);
5952 		intel_engine_pm_put(engine);
5953 
5954 		if (igt_flush_test(gt->i915))
5955 			err = -EIO;
5956 
5957 		if (err)
5958 			break;
5959 	}
5960 
5961 	return err;
5962 }
5963 
5964 static void garbage_reset(struct intel_engine_cs *engine,
5965 			  struct i915_request *rq)
5966 {
5967 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
5968 	unsigned long *lock = &engine->gt->reset.flags;
5969 
5970 	if (test_and_set_bit(bit, lock))
5971 		return;
5972 
5973 	tasklet_disable(&engine->execlists.tasklet);
5974 
5975 	if (!rq->fence.error)
5976 		intel_engine_reset(engine, NULL);
5977 
5978 	tasklet_enable(&engine->execlists.tasklet);
5979 	clear_and_wake_up_bit(bit, lock);
5980 }
5981 
5982 static struct i915_request *garbage(struct intel_context *ce,
5983 				    struct rnd_state *prng)
5984 {
5985 	struct i915_request *rq;
5986 	int err;
5987 
5988 	err = intel_context_pin(ce);
5989 	if (err)
5990 		return ERR_PTR(err);
5991 
5992 	prandom_bytes_state(prng,
5993 			    ce->lrc_reg_state,
5994 			    ce->engine->context_size -
5995 			    LRC_STATE_OFFSET);
5996 
5997 	rq = intel_context_create_request(ce);
5998 	if (IS_ERR(rq)) {
5999 		err = PTR_ERR(rq);
6000 		goto err_unpin;
6001 	}
6002 
6003 	i915_request_get(rq);
6004 	i915_request_add(rq);
6005 	return rq;
6006 
6007 err_unpin:
6008 	intel_context_unpin(ce);
6009 	return ERR_PTR(err);
6010 }
6011 
6012 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
6013 {
6014 	struct intel_context *ce;
6015 	struct i915_request *hang;
6016 	int err = 0;
6017 
6018 	ce = intel_context_create(engine);
6019 	if (IS_ERR(ce))
6020 		return PTR_ERR(ce);
6021 
6022 	hang = garbage(ce, prng);
6023 	if (IS_ERR(hang)) {
6024 		err = PTR_ERR(hang);
6025 		goto err_ce;
6026 	}
6027 
6028 	if (wait_for_submit(engine, hang, HZ / 2)) {
6029 		i915_request_put(hang);
6030 		err = -ETIME;
6031 		goto err_ce;
6032 	}
6033 
6034 	intel_context_set_banned(ce);
6035 	garbage_reset(engine, hang);
6036 
6037 	intel_engine_flush_submission(engine);
6038 	if (!hang->fence.error) {
6039 		i915_request_put(hang);
6040 		pr_err("%s: corrupted context was not reset\n",
6041 		       engine->name);
6042 		err = -EINVAL;
6043 		goto err_ce;
6044 	}
6045 
6046 	if (i915_request_wait(hang, 0, HZ / 2) < 0) {
6047 		pr_err("%s: corrupted context did not recover\n",
6048 		       engine->name);
6049 		i915_request_put(hang);
6050 		err = -EIO;
6051 		goto err_ce;
6052 	}
6053 	i915_request_put(hang);
6054 
6055 err_ce:
6056 	intel_context_put(ce);
6057 	return err;
6058 }
6059 
6060 static int live_lrc_garbage(void *arg)
6061 {
6062 	struct intel_gt *gt = arg;
6063 	struct intel_engine_cs *engine;
6064 	enum intel_engine_id id;
6065 
6066 	/*
6067 	 * Verify that we can recover if one context state is completely
6068 	 * corrupted.
6069 	 */
6070 
6071 	if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
6072 		return 0;
6073 
6074 	for_each_engine(engine, gt, id) {
6075 		I915_RND_STATE(prng);
6076 		int err = 0, i;
6077 
6078 		if (!intel_has_reset_engine(engine->gt))
6079 			continue;
6080 
6081 		intel_engine_pm_get(engine);
6082 		for (i = 0; i < 3; i++) {
6083 			err = __lrc_garbage(engine, &prng);
6084 			if (err)
6085 				break;
6086 		}
6087 		intel_engine_pm_put(engine);
6088 
6089 		if (igt_flush_test(gt->i915))
6090 			err = -EIO;
6091 		if (err)
6092 			return err;
6093 	}
6094 
6095 	return 0;
6096 }
6097 
6098 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
6099 {
6100 	struct intel_context *ce;
6101 	struct i915_request *rq;
6102 	IGT_TIMEOUT(end_time);
6103 	int err;
6104 
6105 	ce = intel_context_create(engine);
6106 	if (IS_ERR(ce))
6107 		return PTR_ERR(ce);
6108 
6109 	ce->runtime.num_underflow = 0;
6110 	ce->runtime.max_underflow = 0;
6111 
6112 	do {
6113 		unsigned int loop = 1024;
6114 
6115 		while (loop) {
6116 			rq = intel_context_create_request(ce);
6117 			if (IS_ERR(rq)) {
6118 				err = PTR_ERR(rq);
6119 				goto err_rq;
6120 			}
6121 
6122 			if (--loop == 0)
6123 				i915_request_get(rq);
6124 
6125 			i915_request_add(rq);
6126 		}
6127 
6128 		if (__igt_timeout(end_time, NULL))
6129 			break;
6130 
6131 		i915_request_put(rq);
6132 	} while (1);
6133 
6134 	err = i915_request_wait(rq, 0, HZ / 5);
6135 	if (err < 0) {
6136 		pr_err("%s: request not completed!\n", engine->name);
6137 		goto err_wait;
6138 	}
6139 
6140 	igt_flush_test(engine->i915);
6141 
6142 	pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
6143 		engine->name,
6144 		intel_context_get_total_runtime_ns(ce),
6145 		intel_context_get_avg_runtime_ns(ce));
6146 
6147 	err = 0;
6148 	if (ce->runtime.num_underflow) {
6149 		pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
6150 		       engine->name,
6151 		       ce->runtime.num_underflow,
6152 		       ce->runtime.max_underflow);
6153 		GEM_TRACE_DUMP();
6154 		err = -EOVERFLOW;
6155 	}
6156 
6157 err_wait:
6158 	i915_request_put(rq);
6159 err_rq:
6160 	intel_context_put(ce);
6161 	return err;
6162 }
6163 
6164 static int live_pphwsp_runtime(void *arg)
6165 {
6166 	struct intel_gt *gt = arg;
6167 	struct intel_engine_cs *engine;
6168 	enum intel_engine_id id;
6169 	int err = 0;
6170 
6171 	/*
6172 	 * Check that cumulative context runtime as stored in the pphwsp[16]
6173 	 * is monotonic.
6174 	 */
6175 
6176 	for_each_engine(engine, gt, id) {
6177 		err = __live_pphwsp_runtime(engine);
6178 		if (err)
6179 			break;
6180 	}
6181 
6182 	if (igt_flush_test(gt->i915))
6183 		err = -EIO;
6184 
6185 	return err;
6186 }
6187 
6188 int intel_lrc_live_selftests(struct drm_i915_private *i915)
6189 {
6190 	static const struct i915_subtest tests[] = {
6191 		SUBTEST(live_lrc_layout),
6192 		SUBTEST(live_lrc_fixed),
6193 		SUBTEST(live_lrc_state),
6194 		SUBTEST(live_lrc_gpr),
6195 		SUBTEST(live_lrc_isolation),
6196 		SUBTEST(live_lrc_timestamp),
6197 		SUBTEST(live_lrc_garbage),
6198 		SUBTEST(live_pphwsp_runtime),
6199 		SUBTEST(live_lrc_indirect_ctx_bb),
6200 	};
6201 
6202 	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
6203 		return 0;
6204 
6205 	return intel_gt_live_subtests(tests, &i915->gt);
6206 }
6207