xref: /linux/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c (revision 47b60ec7ba22a6359379bce9643bfff7a1ffe9ed)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 #include <linux/string_helpers.h>
9 
10 #include "gem/i915_gem_internal.h"
11 #include "gem/i915_gem_pm.h"
12 #include "gt/intel_engine_pm.h"
13 #include "gt/intel_engine_regs.h"
14 #include "gt/intel_gt.h"
15 #include "gt/intel_gt_requests.h"
16 #include "gt/intel_reset.h"
17 #include "i915_selftest.h"
18 
19 #include "gem/selftests/igt_gem_utils.h"
20 #include "selftests/i915_random.h"
21 #include "selftests/igt_flush_test.h"
22 #include "selftests/igt_live_test.h"
23 #include "selftests/igt_reset.h"
24 #include "selftests/igt_spinner.h"
25 #include "selftests/mock_drm.h"
26 #include "selftests/mock_gem_device.h"
27 
28 #include "huge_gem_object.h"
29 #include "igt_gem_utils.h"
30 
31 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
32 
33 static int live_nop_switch(void *arg)
34 {
35 	const unsigned int nctx = 1024;
36 	struct drm_i915_private *i915 = arg;
37 	struct intel_engine_cs *engine;
38 	struct i915_gem_context **ctx;
39 	struct igt_live_test t;
40 	struct file *file;
41 	unsigned long n;
42 	int err = -ENODEV;
43 
44 	/*
45 	 * Create as many contexts as we can feasibly get away with
46 	 * and check we can switch between them rapidly.
47 	 *
48 	 * Serves as very simple stress test for submission and HW switching
49 	 * between contexts.
50 	 */
51 
52 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
53 		return 0;
54 
55 	file = mock_file(i915);
56 	if (IS_ERR(file))
57 		return PTR_ERR(file);
58 
59 	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
60 	if (!ctx) {
61 		err = -ENOMEM;
62 		goto out_file;
63 	}
64 
65 	for (n = 0; n < nctx; n++) {
66 		ctx[n] = live_context(i915, file);
67 		if (IS_ERR(ctx[n])) {
68 			err = PTR_ERR(ctx[n]);
69 			goto out_file;
70 		}
71 	}
72 
73 	for_each_uabi_engine(engine, i915) {
74 		struct i915_request *rq = NULL;
75 		unsigned long end_time, prime;
76 		ktime_t times[2] = {};
77 
78 		times[0] = ktime_get_raw();
79 		for (n = 0; n < nctx; n++) {
80 			struct i915_request *this;
81 
82 			this = igt_request_alloc(ctx[n], engine);
83 			if (IS_ERR(this)) {
84 				err = PTR_ERR(this);
85 				goto out_file;
86 			}
87 			if (rq) {
88 				i915_request_await_dma_fence(this, &rq->fence);
89 				i915_request_put(rq);
90 			}
91 			rq = i915_request_get(this);
92 			i915_request_add(this);
93 		}
94 		if (i915_request_wait(rq, 0, 10 * HZ) < 0) {
95 			pr_err("Failed to populated %d contexts\n", nctx);
96 			intel_gt_set_wedged(to_gt(i915));
97 			i915_request_put(rq);
98 			err = -EIO;
99 			goto out_file;
100 		}
101 		i915_request_put(rq);
102 
103 		times[1] = ktime_get_raw();
104 
105 		pr_info("Populated %d contexts on %s in %lluns\n",
106 			nctx, engine->name, ktime_to_ns(times[1] - times[0]));
107 
108 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
109 		if (err)
110 			goto out_file;
111 
112 		end_time = jiffies + i915_selftest.timeout_jiffies;
113 		for_each_prime_number_from(prime, 2, 8192) {
114 			times[1] = ktime_get_raw();
115 
116 			rq = NULL;
117 			for (n = 0; n < prime; n++) {
118 				struct i915_request *this;
119 
120 				this = igt_request_alloc(ctx[n % nctx], engine);
121 				if (IS_ERR(this)) {
122 					err = PTR_ERR(this);
123 					goto out_file;
124 				}
125 
126 				if (rq) { /* Force submission order */
127 					i915_request_await_dma_fence(this, &rq->fence);
128 					i915_request_put(rq);
129 				}
130 
131 				/*
132 				 * This space is left intentionally blank.
133 				 *
134 				 * We do not actually want to perform any
135 				 * action with this request, we just want
136 				 * to measure the latency in allocation
137 				 * and submission of our breadcrumbs -
138 				 * ensuring that the bare request is sufficient
139 				 * for the system to work (i.e. proper HEAD
140 				 * tracking of the rings, interrupt handling,
141 				 * etc). It also gives us the lowest bounds
142 				 * for latency.
143 				 */
144 
145 				rq = i915_request_get(this);
146 				i915_request_add(this);
147 			}
148 			GEM_BUG_ON(!rq);
149 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
150 				pr_err("Switching between %ld contexts timed out\n",
151 				       prime);
152 				intel_gt_set_wedged(to_gt(i915));
153 				i915_request_put(rq);
154 				break;
155 			}
156 			i915_request_put(rq);
157 
158 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
159 			if (prime == 2)
160 				times[0] = times[1];
161 
162 			if (__igt_timeout(end_time, NULL))
163 				break;
164 		}
165 
166 		err = igt_live_test_end(&t);
167 		if (err)
168 			goto out_file;
169 
170 		pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
171 			engine->name,
172 			ktime_to_ns(times[0]),
173 			prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
174 	}
175 
176 out_file:
177 	fput(file);
178 	return err;
179 }
180 
181 struct parallel_switch {
182 	struct kthread_worker *worker;
183 	struct kthread_work work;
184 	struct intel_context *ce[2];
185 	int result;
186 };
187 
188 static void __live_parallel_switch1(struct kthread_work *work)
189 {
190 	struct parallel_switch *arg =
191 		container_of(work, typeof(*arg), work);
192 	IGT_TIMEOUT(end_time);
193 	unsigned long count;
194 
195 	count = 0;
196 	arg->result = 0;
197 	do {
198 		struct i915_request *rq = NULL;
199 		int n;
200 
201 		for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
202 			struct i915_request *prev = rq;
203 
204 			rq = i915_request_create(arg->ce[n]);
205 			if (IS_ERR(rq)) {
206 				i915_request_put(prev);
207 				arg->result = PTR_ERR(rq);
208 				break;
209 			}
210 
211 			i915_request_get(rq);
212 			if (prev) {
213 				arg->result =
214 					i915_request_await_dma_fence(rq,
215 								     &prev->fence);
216 				i915_request_put(prev);
217 			}
218 
219 			i915_request_add(rq);
220 		}
221 
222 		if (IS_ERR_OR_NULL(rq))
223 			break;
224 
225 		if (i915_request_wait(rq, 0, HZ) < 0)
226 			arg->result = -ETIME;
227 
228 		i915_request_put(rq);
229 
230 		count++;
231 	} while (!arg->result && !__igt_timeout(end_time, NULL));
232 
233 	pr_info("%s: %lu switches (sync) <%d>\n",
234 		arg->ce[0]->engine->name, count, arg->result);
235 }
236 
237 static void __live_parallel_switchN(struct kthread_work *work)
238 {
239 	struct parallel_switch *arg =
240 		container_of(work, typeof(*arg), work);
241 	struct i915_request *rq = NULL;
242 	IGT_TIMEOUT(end_time);
243 	unsigned long count;
244 	int n;
245 
246 	count = 0;
247 	arg->result = 0;
248 	do {
249 		for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
250 			struct i915_request *prev = rq;
251 
252 			rq = i915_request_create(arg->ce[n]);
253 			if (IS_ERR(rq)) {
254 				i915_request_put(prev);
255 				arg->result = PTR_ERR(rq);
256 				break;
257 			}
258 
259 			i915_request_get(rq);
260 			if (prev) {
261 				arg->result =
262 					i915_request_await_dma_fence(rq,
263 								     &prev->fence);
264 				i915_request_put(prev);
265 			}
266 
267 			i915_request_add(rq);
268 		}
269 
270 		count++;
271 	} while (!arg->result && !__igt_timeout(end_time, NULL));
272 
273 	if (!IS_ERR_OR_NULL(rq))
274 		i915_request_put(rq);
275 
276 	pr_info("%s: %lu switches (many) <%d>\n",
277 		arg->ce[0]->engine->name, count, arg->result);
278 }
279 
280 static int live_parallel_switch(void *arg)
281 {
282 	struct drm_i915_private *i915 = arg;
283 	static void (* const func[])(struct kthread_work *) = {
284 		__live_parallel_switch1,
285 		__live_parallel_switchN,
286 		NULL,
287 	};
288 	struct parallel_switch *data = NULL;
289 	struct i915_gem_engines *engines;
290 	struct i915_gem_engines_iter it;
291 	void (* const *fn)(struct kthread_work *);
292 	struct i915_gem_context *ctx;
293 	struct intel_context *ce;
294 	struct file *file;
295 	int n, m, count;
296 	int err = 0;
297 
298 	/*
299 	 * Check we can process switches on all engines simultaneously.
300 	 */
301 
302 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
303 		return 0;
304 
305 	file = mock_file(i915);
306 	if (IS_ERR(file))
307 		return PTR_ERR(file);
308 
309 	ctx = live_context(i915, file);
310 	if (IS_ERR(ctx)) {
311 		err = PTR_ERR(ctx);
312 		goto out_file;
313 	}
314 
315 	engines = i915_gem_context_lock_engines(ctx);
316 	count = engines->num_engines;
317 
318 	data = kcalloc(count, sizeof(*data), GFP_KERNEL);
319 	if (!data) {
320 		i915_gem_context_unlock_engines(ctx);
321 		err = -ENOMEM;
322 		goto out_file;
323 	}
324 
325 	m = 0; /* Use the first context as our template for the engines */
326 	for_each_gem_engine(ce, engines, it) {
327 		err = intel_context_pin(ce);
328 		if (err) {
329 			i915_gem_context_unlock_engines(ctx);
330 			goto out;
331 		}
332 		data[m++].ce[0] = intel_context_get(ce);
333 	}
334 	i915_gem_context_unlock_engines(ctx);
335 
336 	/* Clone the same set of engines into the other contexts */
337 	for (n = 1; n < ARRAY_SIZE(data->ce); n++) {
338 		ctx = live_context(i915, file);
339 		if (IS_ERR(ctx)) {
340 			err = PTR_ERR(ctx);
341 			goto out;
342 		}
343 
344 		for (m = 0; m < count; m++) {
345 			if (!data[m].ce[0])
346 				continue;
347 
348 			ce = intel_context_create(data[m].ce[0]->engine);
349 			if (IS_ERR(ce)) {
350 				err = PTR_ERR(ce);
351 				goto out;
352 			}
353 
354 			err = intel_context_pin(ce);
355 			if (err) {
356 				intel_context_put(ce);
357 				goto out;
358 			}
359 
360 			data[m].ce[n] = ce;
361 		}
362 	}
363 
364 	for (n = 0; n < count; n++) {
365 		struct kthread_worker *worker;
366 
367 		if (!data[n].ce[0])
368 			continue;
369 
370 		worker = kthread_create_worker(0, "igt/parallel:%s",
371 					       data[n].ce[0]->engine->name);
372 		if (IS_ERR(worker)) {
373 			err = PTR_ERR(worker);
374 			goto out;
375 		}
376 
377 		data[n].worker = worker;
378 	}
379 
380 	for (fn = func; !err && *fn; fn++) {
381 		struct igt_live_test t;
382 
383 		err = igt_live_test_begin(&t, i915, __func__, "");
384 		if (err)
385 			break;
386 
387 		for (n = 0; n < count; n++) {
388 			if (!data[n].ce[0])
389 				continue;
390 
391 			data[n].result = 0;
392 			kthread_init_work(&data[n].work, *fn);
393 			kthread_queue_work(data[n].worker, &data[n].work);
394 		}
395 
396 		for (n = 0; n < count; n++) {
397 			if (data[n].ce[0]) {
398 				kthread_flush_work(&data[n].work);
399 				if (data[n].result && !err)
400 					err = data[n].result;
401 			}
402 		}
403 
404 		if (igt_live_test_end(&t)) {
405 			err = err ?: -EIO;
406 			break;
407 		}
408 	}
409 
410 out:
411 	for (n = 0; n < count; n++) {
412 		for (m = 0; m < ARRAY_SIZE(data->ce); m++) {
413 			if (!data[n].ce[m])
414 				continue;
415 
416 			intel_context_unpin(data[n].ce[m]);
417 			intel_context_put(data[n].ce[m]);
418 		}
419 
420 		if (data[n].worker)
421 			kthread_destroy_worker(data[n].worker);
422 	}
423 	kfree(data);
424 out_file:
425 	fput(file);
426 	return err;
427 }
428 
429 static unsigned long real_page_count(struct drm_i915_gem_object *obj)
430 {
431 	return huge_gem_object_phys_size(obj) >> PAGE_SHIFT;
432 }
433 
434 static unsigned long fake_page_count(struct drm_i915_gem_object *obj)
435 {
436 	return huge_gem_object_dma_size(obj) >> PAGE_SHIFT;
437 }
438 
439 static int gpu_fill(struct intel_context *ce,
440 		    struct drm_i915_gem_object *obj,
441 		    unsigned int dw)
442 {
443 	struct i915_vma *vma;
444 	int err;
445 
446 	GEM_BUG_ON(obj->base.size > ce->vm->total);
447 	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
448 
449 	vma = i915_vma_instance(obj, ce->vm, NULL);
450 	if (IS_ERR(vma))
451 		return PTR_ERR(vma);
452 
453 	err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
454 	if (err)
455 		return err;
456 
457 	/*
458 	 * Within the GTT the huge objects maps every page onto
459 	 * its 1024 real pages (using phys_pfn = dma_pfn % 1024).
460 	 * We set the nth dword within the page using the nth
461 	 * mapping via the GTT - this should exercise the GTT mapping
462 	 * whilst checking that each context provides a unique view
463 	 * into the object.
464 	 */
465 	err = igt_gpu_fill_dw(ce, vma,
466 			      (dw * real_page_count(obj)) << PAGE_SHIFT |
467 			      (dw * sizeof(u32)),
468 			      real_page_count(obj),
469 			      dw);
470 	i915_vma_unpin(vma);
471 
472 	return err;
473 }
474 
475 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
476 {
477 	const bool has_llc = HAS_LLC(to_i915(obj->base.dev));
478 	unsigned int need_flush;
479 	unsigned long n, m;
480 	int err;
481 
482 	i915_gem_object_lock(obj, NULL);
483 	err = i915_gem_object_prepare_write(obj, &need_flush);
484 	if (err)
485 		goto out;
486 
487 	for (n = 0; n < real_page_count(obj); n++) {
488 		u32 *map;
489 
490 		map = kmap_atomic(i915_gem_object_get_page(obj, n));
491 		for (m = 0; m < DW_PER_PAGE; m++)
492 			map[m] = value;
493 		if (!has_llc)
494 			drm_clflush_virt_range(map, PAGE_SIZE);
495 		kunmap_atomic(map);
496 	}
497 
498 	i915_gem_object_finish_access(obj);
499 	obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
500 	obj->write_domain = 0;
501 out:
502 	i915_gem_object_unlock(obj);
503 	return err;
504 }
505 
506 static noinline int cpu_check(struct drm_i915_gem_object *obj,
507 			      unsigned int idx, unsigned int max)
508 {
509 	unsigned int needs_flush;
510 	unsigned long n;
511 	int err;
512 
513 	i915_gem_object_lock(obj, NULL);
514 	err = i915_gem_object_prepare_read(obj, &needs_flush);
515 	if (err)
516 		goto out_unlock;
517 
518 	for (n = 0; n < real_page_count(obj); n++) {
519 		u32 *map, m;
520 
521 		map = kmap_atomic(i915_gem_object_get_page(obj, n));
522 		if (needs_flush & CLFLUSH_BEFORE)
523 			drm_clflush_virt_range(map, PAGE_SIZE);
524 
525 		for (m = 0; m < max; m++) {
526 			if (map[m] != m) {
527 				pr_err("%pS: Invalid value at object %d page %ld/%ld, offset %d/%d: found %x expected %x\n",
528 				       __builtin_return_address(0), idx,
529 				       n, real_page_count(obj), m, max,
530 				       map[m], m);
531 				err = -EINVAL;
532 				goto out_unmap;
533 			}
534 		}
535 
536 		for (; m < DW_PER_PAGE; m++) {
537 			if (map[m] != STACK_MAGIC) {
538 				pr_err("%pS: Invalid value at object %d page %ld, offset %d: found %x expected %x (uninitialised)\n",
539 				       __builtin_return_address(0), idx, n, m,
540 				       map[m], STACK_MAGIC);
541 				err = -EINVAL;
542 				goto out_unmap;
543 			}
544 		}
545 
546 out_unmap:
547 		kunmap_atomic(map);
548 		if (err)
549 			break;
550 	}
551 
552 	i915_gem_object_finish_access(obj);
553 out_unlock:
554 	i915_gem_object_unlock(obj);
555 	return err;
556 }
557 
558 static int file_add_object(struct file *file, struct drm_i915_gem_object *obj)
559 {
560 	int err;
561 
562 	GEM_BUG_ON(obj->base.handle_count);
563 
564 	/* tie the object to the drm_file for easy reaping */
565 	err = idr_alloc(&to_drm_file(file)->object_idr,
566 			&obj->base, 1, 0, GFP_KERNEL);
567 	if (err < 0)
568 		return err;
569 
570 	i915_gem_object_get(obj);
571 	obj->base.handle_count++;
572 	return 0;
573 }
574 
575 static struct drm_i915_gem_object *
576 create_test_object(struct i915_address_space *vm,
577 		   struct file *file,
578 		   struct list_head *objects)
579 {
580 	struct drm_i915_gem_object *obj;
581 	u64 size;
582 	int err;
583 
584 	/* Keep in GEM's good graces */
585 	intel_gt_retire_requests(vm->gt);
586 
587 	size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
588 	size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
589 
590 	obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size);
591 	if (IS_ERR(obj))
592 		return obj;
593 
594 	err = file_add_object(file, obj);
595 	i915_gem_object_put(obj);
596 	if (err)
597 		return ERR_PTR(err);
598 
599 	err = cpu_fill(obj, STACK_MAGIC);
600 	if (err) {
601 		pr_err("Failed to fill object with cpu, err=%d\n",
602 		       err);
603 		return ERR_PTR(err);
604 	}
605 
606 	list_add_tail(&obj->st_link, objects);
607 	return obj;
608 }
609 
610 static unsigned long max_dwords(struct drm_i915_gem_object *obj)
611 {
612 	unsigned long npages = fake_page_count(obj);
613 
614 	GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE));
615 	return npages / DW_PER_PAGE;
616 }
617 
618 static void throttle_release(struct i915_request **q, int count)
619 {
620 	int i;
621 
622 	for (i = 0; i < count; i++) {
623 		if (IS_ERR_OR_NULL(q[i]))
624 			continue;
625 
626 		i915_request_put(fetch_and_zero(&q[i]));
627 	}
628 }
629 
630 static int throttle(struct intel_context *ce,
631 		    struct i915_request **q, int count)
632 {
633 	int i;
634 
635 	if (!IS_ERR_OR_NULL(q[0])) {
636 		if (i915_request_wait(q[0],
637 				      I915_WAIT_INTERRUPTIBLE,
638 				      MAX_SCHEDULE_TIMEOUT) < 0)
639 			return -EINTR;
640 
641 		i915_request_put(q[0]);
642 	}
643 
644 	for (i = 0; i < count - 1; i++)
645 		q[i] = q[i + 1];
646 
647 	q[i] = intel_context_create_request(ce);
648 	if (IS_ERR(q[i]))
649 		return PTR_ERR(q[i]);
650 
651 	i915_request_get(q[i]);
652 	i915_request_add(q[i]);
653 
654 	return 0;
655 }
656 
657 static int igt_ctx_exec(void *arg)
658 {
659 	struct drm_i915_private *i915 = arg;
660 	struct intel_engine_cs *engine;
661 	int err = -ENODEV;
662 
663 	/*
664 	 * Create a few different contexts (with different mm) and write
665 	 * through each ctx/mm using the GPU making sure those writes end
666 	 * up in the expected pages of our obj.
667 	 */
668 
669 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
670 		return 0;
671 
672 	for_each_uabi_engine(engine, i915) {
673 		struct drm_i915_gem_object *obj = NULL;
674 		unsigned long ncontexts, ndwords, dw;
675 		struct i915_request *tq[5] = {};
676 		struct igt_live_test t;
677 		IGT_TIMEOUT(end_time);
678 		LIST_HEAD(objects);
679 		struct file *file;
680 
681 		if (!intel_engine_can_store_dword(engine))
682 			continue;
683 
684 		if (!engine->context_size)
685 			continue; /* No logical context support in HW */
686 
687 		file = mock_file(i915);
688 		if (IS_ERR(file))
689 			return PTR_ERR(file);
690 
691 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
692 		if (err)
693 			goto out_file;
694 
695 		ncontexts = 0;
696 		ndwords = 0;
697 		dw = 0;
698 		while (!time_after(jiffies, end_time)) {
699 			struct i915_gem_context *ctx;
700 			struct intel_context *ce;
701 
702 			ctx = kernel_context(i915, NULL);
703 			if (IS_ERR(ctx)) {
704 				err = PTR_ERR(ctx);
705 				goto out_file;
706 			}
707 
708 			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
709 			GEM_BUG_ON(IS_ERR(ce));
710 
711 			if (!obj) {
712 				obj = create_test_object(ce->vm, file, &objects);
713 				if (IS_ERR(obj)) {
714 					err = PTR_ERR(obj);
715 					intel_context_put(ce);
716 					kernel_context_close(ctx);
717 					goto out_file;
718 				}
719 			}
720 
721 			err = gpu_fill(ce, obj, dw);
722 			if (err) {
723 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
724 				       ndwords, dw, max_dwords(obj),
725 				       engine->name,
726 				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
727 				       err);
728 				intel_context_put(ce);
729 				kernel_context_close(ctx);
730 				goto out_file;
731 			}
732 
733 			err = throttle(ce, tq, ARRAY_SIZE(tq));
734 			if (err) {
735 				intel_context_put(ce);
736 				kernel_context_close(ctx);
737 				goto out_file;
738 			}
739 
740 			if (++dw == max_dwords(obj)) {
741 				obj = NULL;
742 				dw = 0;
743 			}
744 
745 			ndwords++;
746 			ncontexts++;
747 
748 			intel_context_put(ce);
749 			kernel_context_close(ctx);
750 		}
751 
752 		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
753 			ncontexts, engine->name, ndwords);
754 
755 		ncontexts = dw = 0;
756 		list_for_each_entry(obj, &objects, st_link) {
757 			unsigned int rem =
758 				min_t(unsigned int, ndwords - dw, max_dwords(obj));
759 
760 			err = cpu_check(obj, ncontexts++, rem);
761 			if (err)
762 				break;
763 
764 			dw += rem;
765 		}
766 
767 out_file:
768 		throttle_release(tq, ARRAY_SIZE(tq));
769 		if (igt_live_test_end(&t))
770 			err = -EIO;
771 
772 		fput(file);
773 		if (err)
774 			return err;
775 
776 		i915_gem_drain_freed_objects(i915);
777 	}
778 
779 	return 0;
780 }
781 
782 static int igt_shared_ctx_exec(void *arg)
783 {
784 	struct drm_i915_private *i915 = arg;
785 	struct i915_request *tq[5] = {};
786 	struct i915_gem_context *parent;
787 	struct intel_engine_cs *engine;
788 	struct igt_live_test t;
789 	struct file *file;
790 	int err = 0;
791 
792 	/*
793 	 * Create a few different contexts with the same mm and write
794 	 * through each ctx using the GPU making sure those writes end
795 	 * up in the expected pages of our obj.
796 	 */
797 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
798 		return 0;
799 
800 	file = mock_file(i915);
801 	if (IS_ERR(file))
802 		return PTR_ERR(file);
803 
804 	parent = live_context(i915, file);
805 	if (IS_ERR(parent)) {
806 		err = PTR_ERR(parent);
807 		goto out_file;
808 	}
809 
810 	if (!parent->vm) { /* not full-ppgtt; nothing to share */
811 		err = 0;
812 		goto out_file;
813 	}
814 
815 	err = igt_live_test_begin(&t, i915, __func__, "");
816 	if (err)
817 		goto out_file;
818 
819 	for_each_uabi_engine(engine, i915) {
820 		unsigned long ncontexts, ndwords, dw;
821 		struct drm_i915_gem_object *obj = NULL;
822 		IGT_TIMEOUT(end_time);
823 		LIST_HEAD(objects);
824 
825 		if (!intel_engine_can_store_dword(engine))
826 			continue;
827 
828 		dw = 0;
829 		ndwords = 0;
830 		ncontexts = 0;
831 		while (!time_after(jiffies, end_time)) {
832 			struct i915_gem_context *ctx;
833 			struct intel_context *ce;
834 
835 			ctx = kernel_context(i915, parent->vm);
836 			if (IS_ERR(ctx)) {
837 				err = PTR_ERR(ctx);
838 				goto out_test;
839 			}
840 
841 			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
842 			GEM_BUG_ON(IS_ERR(ce));
843 
844 			if (!obj) {
845 				obj = create_test_object(parent->vm,
846 							 file, &objects);
847 				if (IS_ERR(obj)) {
848 					err = PTR_ERR(obj);
849 					intel_context_put(ce);
850 					kernel_context_close(ctx);
851 					goto out_test;
852 				}
853 			}
854 
855 			err = gpu_fill(ce, obj, dw);
856 			if (err) {
857 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
858 				       ndwords, dw, max_dwords(obj),
859 				       engine->name,
860 				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
861 				       err);
862 				intel_context_put(ce);
863 				kernel_context_close(ctx);
864 				goto out_test;
865 			}
866 
867 			err = throttle(ce, tq, ARRAY_SIZE(tq));
868 			if (err) {
869 				intel_context_put(ce);
870 				kernel_context_close(ctx);
871 				goto out_test;
872 			}
873 
874 			if (++dw == max_dwords(obj)) {
875 				obj = NULL;
876 				dw = 0;
877 			}
878 
879 			ndwords++;
880 			ncontexts++;
881 
882 			intel_context_put(ce);
883 			kernel_context_close(ctx);
884 		}
885 		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
886 			ncontexts, engine->name, ndwords);
887 
888 		ncontexts = dw = 0;
889 		list_for_each_entry(obj, &objects, st_link) {
890 			unsigned int rem =
891 				min_t(unsigned int, ndwords - dw, max_dwords(obj));
892 
893 			err = cpu_check(obj, ncontexts++, rem);
894 			if (err)
895 				goto out_test;
896 
897 			dw += rem;
898 		}
899 
900 		i915_gem_drain_freed_objects(i915);
901 	}
902 out_test:
903 	throttle_release(tq, ARRAY_SIZE(tq));
904 	if (igt_live_test_end(&t))
905 		err = -EIO;
906 out_file:
907 	fput(file);
908 	return err;
909 }
910 
911 static int rpcs_query_batch(struct drm_i915_gem_object *rpcs,
912 			    struct i915_vma *vma,
913 			    struct intel_engine_cs *engine)
914 {
915 	u32 *cmd;
916 
917 	GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8);
918 
919 	cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB);
920 	if (IS_ERR(cmd))
921 		return PTR_ERR(cmd);
922 
923 	*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
924 	*cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base));
925 	*cmd++ = lower_32_bits(i915_vma_offset(vma));
926 	*cmd++ = upper_32_bits(i915_vma_offset(vma));
927 	*cmd = MI_BATCH_BUFFER_END;
928 
929 	__i915_gem_object_flush_map(rpcs, 0, 64);
930 	i915_gem_object_unpin_map(rpcs);
931 
932 	intel_gt_chipset_flush(vma->vm->gt);
933 
934 	return 0;
935 }
936 
937 static int
938 emit_rpcs_query(struct drm_i915_gem_object *obj,
939 		struct intel_context *ce,
940 		struct i915_request **rq_out)
941 {
942 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
943 	struct i915_request *rq;
944 	struct i915_gem_ww_ctx ww;
945 	struct i915_vma *batch;
946 	struct i915_vma *vma;
947 	struct drm_i915_gem_object *rpcs;
948 	int err;
949 
950 	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
951 
952 	if (GRAPHICS_VER(i915) < 8)
953 		return -EINVAL;
954 
955 	vma = i915_vma_instance(obj, ce->vm, NULL);
956 	if (IS_ERR(vma))
957 		return PTR_ERR(vma);
958 
959 	rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE);
960 	if (IS_ERR(rpcs))
961 		return PTR_ERR(rpcs);
962 
963 	batch = i915_vma_instance(rpcs, ce->vm, NULL);
964 	if (IS_ERR(batch)) {
965 		err = PTR_ERR(batch);
966 		goto err_put;
967 	}
968 
969 	i915_gem_ww_ctx_init(&ww, false);
970 retry:
971 	err = i915_gem_object_lock(obj, &ww);
972 	if (!err)
973 		err = i915_gem_object_lock(rpcs, &ww);
974 	if (!err)
975 		err = i915_gem_object_set_to_gtt_domain(obj, false);
976 	if (!err)
977 		err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
978 	if (err)
979 		goto err_put;
980 
981 	err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER);
982 	if (err)
983 		goto err_vma;
984 
985 	err = rpcs_query_batch(rpcs, vma, ce->engine);
986 	if (err)
987 		goto err_batch;
988 
989 	rq = i915_request_create(ce);
990 	if (IS_ERR(rq)) {
991 		err = PTR_ERR(rq);
992 		goto err_batch;
993 	}
994 
995 	err = i915_vma_move_to_active(batch, rq, 0);
996 	if (err)
997 		goto skip_request;
998 
999 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
1000 	if (err)
1001 		goto skip_request;
1002 
1003 	if (rq->engine->emit_init_breadcrumb) {
1004 		err = rq->engine->emit_init_breadcrumb(rq);
1005 		if (err)
1006 			goto skip_request;
1007 	}
1008 
1009 	err = rq->engine->emit_bb_start(rq,
1010 					i915_vma_offset(batch),
1011 					i915_vma_size(batch),
1012 					0);
1013 	if (err)
1014 		goto skip_request;
1015 
1016 	*rq_out = i915_request_get(rq);
1017 
1018 skip_request:
1019 	if (err)
1020 		i915_request_set_error_once(rq, err);
1021 	i915_request_add(rq);
1022 err_batch:
1023 	i915_vma_unpin(batch);
1024 err_vma:
1025 	i915_vma_unpin(vma);
1026 err_put:
1027 	if (err == -EDEADLK) {
1028 		err = i915_gem_ww_ctx_backoff(&ww);
1029 		if (!err)
1030 			goto retry;
1031 	}
1032 	i915_gem_ww_ctx_fini(&ww);
1033 	i915_gem_object_put(rpcs);
1034 	return err;
1035 }
1036 
1037 #define TEST_IDLE	BIT(0)
1038 #define TEST_BUSY	BIT(1)
1039 #define TEST_RESET	BIT(2)
1040 
1041 static int
1042 __sseu_prepare(const char *name,
1043 	       unsigned int flags,
1044 	       struct intel_context *ce,
1045 	       struct igt_spinner **spin)
1046 {
1047 	struct i915_request *rq;
1048 	int ret;
1049 
1050 	*spin = NULL;
1051 	if (!(flags & (TEST_BUSY | TEST_RESET)))
1052 		return 0;
1053 
1054 	*spin = kzalloc(sizeof(**spin), GFP_KERNEL);
1055 	if (!*spin)
1056 		return -ENOMEM;
1057 
1058 	ret = igt_spinner_init(*spin, ce->engine->gt);
1059 	if (ret)
1060 		goto err_free;
1061 
1062 	rq = igt_spinner_create_request(*spin, ce, MI_NOOP);
1063 	if (IS_ERR(rq)) {
1064 		ret = PTR_ERR(rq);
1065 		goto err_fini;
1066 	}
1067 
1068 	i915_request_add(rq);
1069 
1070 	if (!igt_wait_for_spinner(*spin, rq)) {
1071 		pr_err("%s: Spinner failed to start!\n", name);
1072 		ret = -ETIMEDOUT;
1073 		goto err_end;
1074 	}
1075 
1076 	return 0;
1077 
1078 err_end:
1079 	igt_spinner_end(*spin);
1080 err_fini:
1081 	igt_spinner_fini(*spin);
1082 err_free:
1083 	kfree(fetch_and_zero(spin));
1084 	return ret;
1085 }
1086 
1087 static int
1088 __read_slice_count(struct intel_context *ce,
1089 		   struct drm_i915_gem_object *obj,
1090 		   struct igt_spinner *spin,
1091 		   u32 *rpcs)
1092 {
1093 	struct i915_request *rq = NULL;
1094 	u32 s_mask, s_shift;
1095 	unsigned int cnt;
1096 	u32 *buf, val;
1097 	long ret;
1098 
1099 	ret = emit_rpcs_query(obj, ce, &rq);
1100 	if (ret)
1101 		return ret;
1102 
1103 	if (spin)
1104 		igt_spinner_end(spin);
1105 
1106 	ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
1107 	i915_request_put(rq);
1108 	if (ret < 0)
1109 		return ret;
1110 
1111 	buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1112 	if (IS_ERR(buf)) {
1113 		ret = PTR_ERR(buf);
1114 		return ret;
1115 	}
1116 
1117 	if (GRAPHICS_VER(ce->engine->i915) >= 11) {
1118 		s_mask = GEN11_RPCS_S_CNT_MASK;
1119 		s_shift = GEN11_RPCS_S_CNT_SHIFT;
1120 	} else {
1121 		s_mask = GEN8_RPCS_S_CNT_MASK;
1122 		s_shift = GEN8_RPCS_S_CNT_SHIFT;
1123 	}
1124 
1125 	val = *buf;
1126 	cnt = (val & s_mask) >> s_shift;
1127 	*rpcs = val;
1128 
1129 	i915_gem_object_unpin_map(obj);
1130 
1131 	return cnt;
1132 }
1133 
1134 static int
1135 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected,
1136 	     const char *prefix, const char *suffix)
1137 {
1138 	if (slices == expected)
1139 		return 0;
1140 
1141 	if (slices < 0) {
1142 		pr_err("%s: %s read slice count failed with %d%s\n",
1143 		       name, prefix, slices, suffix);
1144 		return slices;
1145 	}
1146 
1147 	pr_err("%s: %s slice count %d is not %u%s\n",
1148 	       name, prefix, slices, expected, suffix);
1149 
1150 	pr_info("RPCS=0x%x; %u%sx%u%s\n",
1151 		rpcs, slices,
1152 		(rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "",
1153 		(rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT,
1154 		(rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : "");
1155 
1156 	return -EINVAL;
1157 }
1158 
1159 static int
1160 __sseu_finish(const char *name,
1161 	      unsigned int flags,
1162 	      struct intel_context *ce,
1163 	      struct drm_i915_gem_object *obj,
1164 	      unsigned int expected,
1165 	      struct igt_spinner *spin)
1166 {
1167 	unsigned int slices = hweight32(ce->engine->sseu.slice_mask);
1168 	u32 rpcs = 0;
1169 	int ret = 0;
1170 
1171 	if (flags & TEST_RESET) {
1172 		ret = intel_engine_reset(ce->engine, "sseu");
1173 		if (ret)
1174 			goto out;
1175 	}
1176 
1177 	ret = __read_slice_count(ce, obj,
1178 				 flags & TEST_RESET ? NULL : spin, &rpcs);
1179 	ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!");
1180 	if (ret)
1181 		goto out;
1182 
1183 	ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs);
1184 	ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
1185 
1186 out:
1187 	if (spin)
1188 		igt_spinner_end(spin);
1189 
1190 	if ((flags & TEST_IDLE) && ret == 0) {
1191 		ret = igt_flush_test(ce->engine->i915);
1192 		if (ret)
1193 			return ret;
1194 
1195 		ret = __read_slice_count(ce, obj, NULL, &rpcs);
1196 		ret = __check_rpcs(name, rpcs, ret, expected,
1197 				   "Context", " after idle!");
1198 	}
1199 
1200 	return ret;
1201 }
1202 
1203 static int
1204 __sseu_test(const char *name,
1205 	    unsigned int flags,
1206 	    struct intel_context *ce,
1207 	    struct drm_i915_gem_object *obj,
1208 	    struct intel_sseu sseu)
1209 {
1210 	struct igt_spinner *spin = NULL;
1211 	int ret;
1212 
1213 	intel_engine_pm_get(ce->engine);
1214 
1215 	ret = __sseu_prepare(name, flags, ce, &spin);
1216 	if (ret)
1217 		goto out_pm;
1218 
1219 	ret = intel_context_reconfigure_sseu(ce, sseu);
1220 	if (ret)
1221 		goto out_spin;
1222 
1223 	ret = __sseu_finish(name, flags, ce, obj,
1224 			    hweight32(sseu.slice_mask), spin);
1225 
1226 out_spin:
1227 	if (spin) {
1228 		igt_spinner_end(spin);
1229 		igt_spinner_fini(spin);
1230 		kfree(spin);
1231 	}
1232 out_pm:
1233 	intel_engine_pm_put(ce->engine);
1234 	return ret;
1235 }
1236 
1237 static int
1238 __igt_ctx_sseu(struct drm_i915_private *i915,
1239 	       const char *name,
1240 	       unsigned int flags)
1241 {
1242 	struct drm_i915_gem_object *obj;
1243 	int inst = 0;
1244 	int ret = 0;
1245 
1246 	if (GRAPHICS_VER(i915) < 9)
1247 		return 0;
1248 
1249 	if (flags & TEST_RESET)
1250 		igt_global_reset_lock(to_gt(i915));
1251 
1252 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1253 	if (IS_ERR(obj)) {
1254 		ret = PTR_ERR(obj);
1255 		goto out_unlock;
1256 	}
1257 
1258 	do {
1259 		struct intel_engine_cs *engine;
1260 		struct intel_context *ce;
1261 		struct intel_sseu pg_sseu;
1262 
1263 		engine = intel_engine_lookup_user(i915,
1264 						  I915_ENGINE_CLASS_RENDER,
1265 						  inst++);
1266 		if (!engine)
1267 			break;
1268 
1269 		if (hweight32(engine->sseu.slice_mask) < 2)
1270 			continue;
1271 
1272 		if (!engine->gt->info.sseu.has_slice_pg)
1273 			continue;
1274 
1275 		/*
1276 		 * Gen11 VME friendly power-gated configuration with
1277 		 * half enabled sub-slices.
1278 		 */
1279 		pg_sseu = engine->sseu;
1280 		pg_sseu.slice_mask = 1;
1281 		pg_sseu.subslice_mask =
1282 			~(~0 << (hweight32(engine->sseu.subslice_mask) / 2));
1283 
1284 		pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
1285 			engine->name, name, flags,
1286 			hweight32(engine->sseu.slice_mask),
1287 			hweight32(pg_sseu.slice_mask));
1288 
1289 		ce = intel_context_create(engine);
1290 		if (IS_ERR(ce)) {
1291 			ret = PTR_ERR(ce);
1292 			goto out_put;
1293 		}
1294 
1295 		ret = intel_context_pin(ce);
1296 		if (ret)
1297 			goto out_ce;
1298 
1299 		/* First set the default mask. */
1300 		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1301 		if (ret)
1302 			goto out_unpin;
1303 
1304 		/* Then set a power-gated configuration. */
1305 		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1306 		if (ret)
1307 			goto out_unpin;
1308 
1309 		/* Back to defaults. */
1310 		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1311 		if (ret)
1312 			goto out_unpin;
1313 
1314 		/* One last power-gated configuration for the road. */
1315 		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1316 		if (ret)
1317 			goto out_unpin;
1318 
1319 out_unpin:
1320 		intel_context_unpin(ce);
1321 out_ce:
1322 		intel_context_put(ce);
1323 	} while (!ret);
1324 
1325 	if (igt_flush_test(i915))
1326 		ret = -EIO;
1327 
1328 out_put:
1329 	i915_gem_object_put(obj);
1330 
1331 out_unlock:
1332 	if (flags & TEST_RESET)
1333 		igt_global_reset_unlock(to_gt(i915));
1334 
1335 	if (ret)
1336 		pr_err("%s: Failed with %d!\n", name, ret);
1337 
1338 	return ret;
1339 }
1340 
1341 static int igt_ctx_sseu(void *arg)
1342 {
1343 	struct {
1344 		const char *name;
1345 		unsigned int flags;
1346 	} *phase, phases[] = {
1347 		{ .name = "basic", .flags = 0 },
1348 		{ .name = "idle", .flags = TEST_IDLE },
1349 		{ .name = "busy", .flags = TEST_BUSY },
1350 		{ .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET },
1351 		{ .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE },
1352 		{ .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE },
1353 	};
1354 	unsigned int i;
1355 	int ret = 0;
1356 
1357 	for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases);
1358 	     i++, phase++)
1359 		ret = __igt_ctx_sseu(arg, phase->name, phase->flags);
1360 
1361 	return ret;
1362 }
1363 
1364 static int igt_ctx_readonly(void *arg)
1365 {
1366 	struct drm_i915_private *i915 = arg;
1367 	unsigned long idx, ndwords, dw, num_engines;
1368 	struct drm_i915_gem_object *obj = NULL;
1369 	struct i915_request *tq[5] = {};
1370 	struct i915_gem_engines_iter it;
1371 	struct i915_address_space *vm;
1372 	struct i915_gem_context *ctx;
1373 	struct intel_context *ce;
1374 	struct igt_live_test t;
1375 	I915_RND_STATE(prng);
1376 	IGT_TIMEOUT(end_time);
1377 	LIST_HEAD(objects);
1378 	struct file *file;
1379 	int err = -ENODEV;
1380 
1381 	/*
1382 	 * Create a few read-only objects (with the occasional writable object)
1383 	 * and try to write into these object checking that the GPU discards
1384 	 * any write to a read-only object.
1385 	 */
1386 
1387 	file = mock_file(i915);
1388 	if (IS_ERR(file))
1389 		return PTR_ERR(file);
1390 
1391 	err = igt_live_test_begin(&t, i915, __func__, "");
1392 	if (err)
1393 		goto out_file;
1394 
1395 	ctx = live_context(i915, file);
1396 	if (IS_ERR(ctx)) {
1397 		err = PTR_ERR(ctx);
1398 		goto out_file;
1399 	}
1400 
1401 	vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm;
1402 	if (!vm || !vm->has_read_only) {
1403 		err = 0;
1404 		goto out_file;
1405 	}
1406 
1407 	num_engines = 0;
1408 	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
1409 		if (intel_engine_can_store_dword(ce->engine))
1410 			num_engines++;
1411 	i915_gem_context_unlock_engines(ctx);
1412 
1413 	ndwords = 0;
1414 	dw = 0;
1415 	while (!time_after(jiffies, end_time)) {
1416 		for_each_gem_engine(ce,
1417 				    i915_gem_context_lock_engines(ctx), it) {
1418 			if (!intel_engine_can_store_dword(ce->engine))
1419 				continue;
1420 
1421 			if (!obj) {
1422 				obj = create_test_object(ce->vm, file, &objects);
1423 				if (IS_ERR(obj)) {
1424 					err = PTR_ERR(obj);
1425 					i915_gem_context_unlock_engines(ctx);
1426 					goto out_file;
1427 				}
1428 
1429 				if (prandom_u32_state(&prng) & 1)
1430 					i915_gem_object_set_readonly(obj);
1431 			}
1432 
1433 			err = gpu_fill(ce, obj, dw);
1434 			if (err) {
1435 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
1436 				       ndwords, dw, max_dwords(obj),
1437 				       ce->engine->name,
1438 				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
1439 				       err);
1440 				i915_gem_context_unlock_engines(ctx);
1441 				goto out_file;
1442 			}
1443 
1444 			err = throttle(ce, tq, ARRAY_SIZE(tq));
1445 			if (err) {
1446 				i915_gem_context_unlock_engines(ctx);
1447 				goto out_file;
1448 			}
1449 
1450 			if (++dw == max_dwords(obj)) {
1451 				obj = NULL;
1452 				dw = 0;
1453 			}
1454 			ndwords++;
1455 		}
1456 		i915_gem_context_unlock_engines(ctx);
1457 	}
1458 	pr_info("Submitted %lu dwords (across %lu engines)\n",
1459 		ndwords, num_engines);
1460 
1461 	dw = 0;
1462 	idx = 0;
1463 	list_for_each_entry(obj, &objects, st_link) {
1464 		unsigned int rem =
1465 			min_t(unsigned int, ndwords - dw, max_dwords(obj));
1466 		unsigned int num_writes;
1467 
1468 		num_writes = rem;
1469 		if (i915_gem_object_is_readonly(obj))
1470 			num_writes = 0;
1471 
1472 		err = cpu_check(obj, idx++, num_writes);
1473 		if (err)
1474 			break;
1475 
1476 		dw += rem;
1477 	}
1478 
1479 out_file:
1480 	throttle_release(tq, ARRAY_SIZE(tq));
1481 	if (igt_live_test_end(&t))
1482 		err = -EIO;
1483 
1484 	fput(file);
1485 	return err;
1486 }
1487 
1488 static int check_scratch(struct i915_address_space *vm, u64 offset)
1489 {
1490 	struct drm_mm_node *node;
1491 
1492 	mutex_lock(&vm->mutex);
1493 	node = __drm_mm_interval_first(&vm->mm,
1494 				       offset, offset + sizeof(u32) - 1);
1495 	mutex_unlock(&vm->mutex);
1496 	if (!node || node->start > offset)
1497 		return 0;
1498 
1499 	GEM_BUG_ON(offset >= node->start + node->size);
1500 
1501 	pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n",
1502 	       upper_32_bits(offset), lower_32_bits(offset));
1503 	return -EINVAL;
1504 }
1505 
1506 static int write_to_scratch(struct i915_gem_context *ctx,
1507 			    struct intel_engine_cs *engine,
1508 			    struct drm_i915_gem_object *obj,
1509 			    u64 offset, u32 value)
1510 {
1511 	struct drm_i915_private *i915 = ctx->i915;
1512 	struct i915_address_space *vm;
1513 	struct i915_request *rq;
1514 	struct i915_vma *vma;
1515 	u32 *cmd;
1516 	int err;
1517 
1518 	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1519 
1520 	err = check_scratch(ctx->vm, offset);
1521 	if (err)
1522 		return err;
1523 
1524 	cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1525 	if (IS_ERR(cmd))
1526 		return PTR_ERR(cmd);
1527 
1528 	*cmd++ = MI_STORE_DWORD_IMM_GEN4;
1529 	if (GRAPHICS_VER(i915) >= 8) {
1530 		*cmd++ = lower_32_bits(offset);
1531 		*cmd++ = upper_32_bits(offset);
1532 	} else {
1533 		*cmd++ = 0;
1534 		*cmd++ = offset;
1535 	}
1536 	*cmd++ = value;
1537 	*cmd = MI_BATCH_BUFFER_END;
1538 	__i915_gem_object_flush_map(obj, 0, 64);
1539 	i915_gem_object_unpin_map(obj);
1540 
1541 	intel_gt_chipset_flush(engine->gt);
1542 
1543 	vm = i915_gem_context_get_eb_vm(ctx);
1544 	vma = i915_vma_instance(obj, vm, NULL);
1545 	if (IS_ERR(vma)) {
1546 		err = PTR_ERR(vma);
1547 		goto out_vm;
1548 	}
1549 
1550 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1551 	if (err)
1552 		goto out_vm;
1553 
1554 	rq = igt_request_alloc(ctx, engine);
1555 	if (IS_ERR(rq)) {
1556 		err = PTR_ERR(rq);
1557 		goto err_unpin;
1558 	}
1559 
1560 	err = igt_vma_move_to_active_unlocked(vma, rq, 0);
1561 	if (err)
1562 		goto skip_request;
1563 
1564 	if (rq->engine->emit_init_breadcrumb) {
1565 		err = rq->engine->emit_init_breadcrumb(rq);
1566 		if (err)
1567 			goto skip_request;
1568 	}
1569 
1570 	err = engine->emit_bb_start(rq, i915_vma_offset(vma),
1571 				    i915_vma_size(vma), 0);
1572 	if (err)
1573 		goto skip_request;
1574 
1575 	i915_vma_unpin(vma);
1576 
1577 	i915_request_add(rq);
1578 
1579 	goto out_vm;
1580 skip_request:
1581 	i915_request_set_error_once(rq, err);
1582 	i915_request_add(rq);
1583 err_unpin:
1584 	i915_vma_unpin(vma);
1585 out_vm:
1586 	i915_vm_put(vm);
1587 
1588 	if (!err)
1589 		err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
1590 
1591 	return err;
1592 }
1593 
1594 static int read_from_scratch(struct i915_gem_context *ctx,
1595 			     struct intel_engine_cs *engine,
1596 			     struct drm_i915_gem_object *obj,
1597 			     u64 offset, u32 *value)
1598 {
1599 	struct drm_i915_private *i915 = ctx->i915;
1600 	struct i915_address_space *vm;
1601 	const u32 result = 0x100;
1602 	struct i915_request *rq;
1603 	struct i915_vma *vma;
1604 	unsigned int flags;
1605 	u32 *cmd;
1606 	int err;
1607 
1608 	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1609 
1610 	err = check_scratch(ctx->vm, offset);
1611 	if (err)
1612 		return err;
1613 
1614 	if (GRAPHICS_VER(i915) >= 8) {
1615 		const u32 GPR0 = engine->mmio_base + 0x600;
1616 
1617 		vm = i915_gem_context_get_eb_vm(ctx);
1618 		vma = i915_vma_instance(obj, vm, NULL);
1619 		if (IS_ERR(vma)) {
1620 			err = PTR_ERR(vma);
1621 			goto out_vm;
1622 		}
1623 
1624 		err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1625 		if (err)
1626 			goto out_vm;
1627 
1628 		cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1629 		if (IS_ERR(cmd)) {
1630 			err = PTR_ERR(cmd);
1631 			goto err_unpin;
1632 		}
1633 
1634 		memset(cmd, POISON_INUSE, PAGE_SIZE);
1635 		*cmd++ = MI_LOAD_REGISTER_MEM_GEN8;
1636 		*cmd++ = GPR0;
1637 		*cmd++ = lower_32_bits(offset);
1638 		*cmd++ = upper_32_bits(offset);
1639 		*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
1640 		*cmd++ = GPR0;
1641 		*cmd++ = result;
1642 		*cmd++ = 0;
1643 		*cmd = MI_BATCH_BUFFER_END;
1644 
1645 		i915_gem_object_flush_map(obj);
1646 		i915_gem_object_unpin_map(obj);
1647 
1648 		flags = 0;
1649 	} else {
1650 		const u32 reg = engine->mmio_base + 0x420;
1651 
1652 		/* hsw: register access even to 3DPRIM! is protected */
1653 		vm = i915_vm_get(&engine->gt->ggtt->vm);
1654 		vma = i915_vma_instance(obj, vm, NULL);
1655 		if (IS_ERR(vma)) {
1656 			err = PTR_ERR(vma);
1657 			goto out_vm;
1658 		}
1659 
1660 		err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1661 		if (err)
1662 			goto out_vm;
1663 
1664 		cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1665 		if (IS_ERR(cmd)) {
1666 			err = PTR_ERR(cmd);
1667 			goto err_unpin;
1668 		}
1669 
1670 		memset(cmd, POISON_INUSE, PAGE_SIZE);
1671 		*cmd++ = MI_LOAD_REGISTER_MEM;
1672 		*cmd++ = reg;
1673 		*cmd++ = offset;
1674 		*cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
1675 		*cmd++ = reg;
1676 		*cmd++ = i915_vma_offset(vma) + result;
1677 		*cmd = MI_BATCH_BUFFER_END;
1678 
1679 		i915_gem_object_flush_map(obj);
1680 		i915_gem_object_unpin_map(obj);
1681 
1682 		flags = I915_DISPATCH_SECURE;
1683 	}
1684 
1685 	intel_gt_chipset_flush(engine->gt);
1686 
1687 	rq = igt_request_alloc(ctx, engine);
1688 	if (IS_ERR(rq)) {
1689 		err = PTR_ERR(rq);
1690 		goto err_unpin;
1691 	}
1692 
1693 	err = igt_vma_move_to_active_unlocked(vma, rq, EXEC_OBJECT_WRITE);
1694 	if (err)
1695 		goto skip_request;
1696 
1697 	if (rq->engine->emit_init_breadcrumb) {
1698 		err = rq->engine->emit_init_breadcrumb(rq);
1699 		if (err)
1700 			goto skip_request;
1701 	}
1702 
1703 	err = engine->emit_bb_start(rq, i915_vma_offset(vma),
1704 				    i915_vma_size(vma), flags);
1705 	if (err)
1706 		goto skip_request;
1707 
1708 	i915_vma_unpin(vma);
1709 
1710 	i915_request_add(rq);
1711 
1712 	i915_gem_object_lock(obj, NULL);
1713 	err = i915_gem_object_set_to_cpu_domain(obj, false);
1714 	i915_gem_object_unlock(obj);
1715 	if (err)
1716 		goto out_vm;
1717 
1718 	cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1719 	if (IS_ERR(cmd)) {
1720 		err = PTR_ERR(cmd);
1721 		goto out_vm;
1722 	}
1723 
1724 	*value = cmd[result / sizeof(*cmd)];
1725 	i915_gem_object_unpin_map(obj);
1726 
1727 	goto out_vm;
1728 skip_request:
1729 	i915_request_set_error_once(rq, err);
1730 	i915_request_add(rq);
1731 err_unpin:
1732 	i915_vma_unpin(vma);
1733 out_vm:
1734 	i915_vm_put(vm);
1735 
1736 	if (!err)
1737 		err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
1738 
1739 	return err;
1740 }
1741 
1742 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
1743 {
1744 	struct i915_address_space *vm;
1745 	u32 *vaddr;
1746 	int err = 0;
1747 
1748 	vm = ctx->vm;
1749 	if (!vm)
1750 		return -ENODEV;
1751 
1752 	if (!vm->scratch[0]) {
1753 		pr_err("No scratch page!\n");
1754 		return -EINVAL;
1755 	}
1756 
1757 	vaddr = __px_vaddr(vm->scratch[0]);
1758 
1759 	memcpy(out, vaddr, sizeof(*out));
1760 	if (memchr_inv(vaddr, *out, PAGE_SIZE)) {
1761 		pr_err("Inconsistent initial state of scratch page!\n");
1762 		err = -EINVAL;
1763 	}
1764 
1765 	return err;
1766 }
1767 
1768 static int igt_vm_isolation(void *arg)
1769 {
1770 	struct drm_i915_private *i915 = arg;
1771 	struct i915_gem_context *ctx_a, *ctx_b;
1772 	struct drm_i915_gem_object *obj_a, *obj_b;
1773 	unsigned long num_engines, count;
1774 	struct intel_engine_cs *engine;
1775 	struct igt_live_test t;
1776 	I915_RND_STATE(prng);
1777 	struct file *file;
1778 	u64 vm_total;
1779 	u32 expected;
1780 	int err;
1781 
1782 	if (GRAPHICS_VER(i915) < 7)
1783 		return 0;
1784 
1785 	/*
1786 	 * The simple goal here is that a write into one context is not
1787 	 * observed in a second (separate page tables and scratch).
1788 	 */
1789 
1790 	file = mock_file(i915);
1791 	if (IS_ERR(file))
1792 		return PTR_ERR(file);
1793 
1794 	err = igt_live_test_begin(&t, i915, __func__, "");
1795 	if (err)
1796 		goto out_file;
1797 
1798 	ctx_a = live_context(i915, file);
1799 	if (IS_ERR(ctx_a)) {
1800 		err = PTR_ERR(ctx_a);
1801 		goto out_file;
1802 	}
1803 
1804 	ctx_b = live_context(i915, file);
1805 	if (IS_ERR(ctx_b)) {
1806 		err = PTR_ERR(ctx_b);
1807 		goto out_file;
1808 	}
1809 
1810 	/* We can only test vm isolation, if the vm are distinct */
1811 	if (ctx_a->vm == ctx_b->vm)
1812 		goto out_file;
1813 
1814 	/* Read the initial state of the scratch page */
1815 	err = check_scratch_page(ctx_a, &expected);
1816 	if (err)
1817 		goto out_file;
1818 
1819 	err = check_scratch_page(ctx_b, &expected);
1820 	if (err)
1821 		goto out_file;
1822 
1823 	vm_total = ctx_a->vm->total;
1824 	GEM_BUG_ON(ctx_b->vm->total != vm_total);
1825 
1826 	obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE);
1827 	if (IS_ERR(obj_a)) {
1828 		err = PTR_ERR(obj_a);
1829 		goto out_file;
1830 	}
1831 
1832 	obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE);
1833 	if (IS_ERR(obj_b)) {
1834 		err = PTR_ERR(obj_b);
1835 		goto put_a;
1836 	}
1837 
1838 	count = 0;
1839 	num_engines = 0;
1840 	for_each_uabi_engine(engine, i915) {
1841 		IGT_TIMEOUT(end_time);
1842 		unsigned long this = 0;
1843 
1844 		if (!intel_engine_can_store_dword(engine))
1845 			continue;
1846 
1847 		/* Not all engines have their own GPR! */
1848 		if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS)
1849 			continue;
1850 
1851 		while (!__igt_timeout(end_time, NULL)) {
1852 			u32 value = 0xc5c5c5c5;
1853 			u64 offset;
1854 
1855 			/* Leave enough space at offset 0 for the batch */
1856 			offset = igt_random_offset(&prng,
1857 						   I915_GTT_PAGE_SIZE, vm_total,
1858 						   sizeof(u32), alignof_dword);
1859 
1860 			err = write_to_scratch(ctx_a, engine, obj_a,
1861 					       offset, 0xdeadbeef);
1862 			if (err == 0)
1863 				err = read_from_scratch(ctx_b, engine, obj_b,
1864 							offset, &value);
1865 			if (err)
1866 				goto put_b;
1867 
1868 			if (value != expected) {
1869 				pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
1870 				       engine->name, value,
1871 				       upper_32_bits(offset),
1872 				       lower_32_bits(offset),
1873 				       this);
1874 				err = -EINVAL;
1875 				goto put_b;
1876 			}
1877 
1878 			this++;
1879 		}
1880 		count += this;
1881 		num_engines++;
1882 	}
1883 	pr_info("Checked %lu scratch offsets across %lu engines\n",
1884 		count, num_engines);
1885 
1886 put_b:
1887 	i915_gem_object_put(obj_b);
1888 put_a:
1889 	i915_gem_object_put(obj_a);
1890 out_file:
1891 	if (igt_live_test_end(&t))
1892 		err = -EIO;
1893 	fput(file);
1894 	return err;
1895 }
1896 
1897 int i915_gem_context_live_selftests(struct drm_i915_private *i915)
1898 {
1899 	static const struct i915_subtest tests[] = {
1900 		SUBTEST(live_nop_switch),
1901 		SUBTEST(live_parallel_switch),
1902 		SUBTEST(igt_ctx_exec),
1903 		SUBTEST(igt_ctx_readonly),
1904 		SUBTEST(igt_ctx_sseu),
1905 		SUBTEST(igt_shared_ctx_exec),
1906 		SUBTEST(igt_vm_isolation),
1907 	};
1908 
1909 	if (intel_gt_is_wedged(to_gt(i915)))
1910 		return 0;
1911 
1912 	return i915_live_subtests(tests, i915);
1913 }
1914