xref: /linux/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c (revision 41c177cf354126a22443b5c80cec9fdd313e67e1)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 #include <linux/string_helpers.h>
9 
10 #include "gem/i915_gem_internal.h"
11 #include "gem/i915_gem_pm.h"
12 #include "gt/intel_engine_pm.h"
13 #include "gt/intel_engine_regs.h"
14 #include "gt/intel_gt.h"
15 #include "gt/intel_gt_requests.h"
16 #include "gt/intel_reset.h"
17 #include "i915_selftest.h"
18 
19 #include "gem/selftests/igt_gem_utils.h"
20 #include "selftests/i915_random.h"
21 #include "selftests/igt_flush_test.h"
22 #include "selftests/igt_live_test.h"
23 #include "selftests/igt_reset.h"
24 #include "selftests/igt_spinner.h"
25 #include "selftests/mock_drm.h"
26 #include "selftests/mock_gem_device.h"
27 
28 #include "huge_gem_object.h"
29 #include "igt_gem_utils.h"
30 
31 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
32 
live_nop_switch(void * arg)33 static int live_nop_switch(void *arg)
34 {
35 	const unsigned int nctx = 1024;
36 	struct drm_i915_private *i915 = arg;
37 	struct intel_engine_cs *engine;
38 	struct i915_gem_context **ctx;
39 	struct igt_live_test t;
40 	struct file *file;
41 	unsigned long n;
42 	int err = -ENODEV;
43 
44 	/*
45 	 * Create as many contexts as we can feasibly get away with
46 	 * and check we can switch between them rapidly.
47 	 *
48 	 * Serves as very simple stress test for submission and HW switching
49 	 * between contexts.
50 	 */
51 
52 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
53 		return 0;
54 
55 	file = mock_file(i915);
56 	if (IS_ERR(file))
57 		return PTR_ERR(file);
58 
59 	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
60 	if (!ctx) {
61 		err = -ENOMEM;
62 		goto out_file;
63 	}
64 
65 	for (n = 0; n < nctx; n++) {
66 		ctx[n] = live_context(i915, file);
67 		if (IS_ERR(ctx[n])) {
68 			err = PTR_ERR(ctx[n]);
69 			goto out_ctx;
70 		}
71 	}
72 
73 	for_each_uabi_engine(engine, i915) {
74 		struct i915_request *rq = NULL;
75 		unsigned long end_time, prime;
76 		ktime_t times[2] = {};
77 
78 		times[0] = ktime_get_raw();
79 		for (n = 0; n < nctx; n++) {
80 			struct i915_request *this;
81 
82 			this = igt_request_alloc(ctx[n], engine);
83 			if (IS_ERR(this)) {
84 				err = PTR_ERR(this);
85 				goto out_ctx;
86 			}
87 			if (rq) {
88 				i915_request_await_dma_fence(this, &rq->fence);
89 				i915_request_put(rq);
90 			}
91 			rq = i915_request_get(this);
92 			i915_request_add(this);
93 		}
94 		if (i915_request_wait(rq, 0, 10 * HZ) < 0) {
95 			pr_err("Failed to populated %d contexts\n", nctx);
96 			intel_gt_set_wedged(engine->gt);
97 			i915_request_put(rq);
98 			err = -EIO;
99 			goto out_ctx;
100 		}
101 		i915_request_put(rq);
102 
103 		times[1] = ktime_get_raw();
104 
105 		pr_info("Populated %d contexts on %s in %lluns\n",
106 			nctx, engine->name, ktime_to_ns(times[1] - times[0]));
107 
108 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
109 		if (err)
110 			goto out_ctx;
111 
112 		end_time = jiffies + i915_selftest.timeout_jiffies;
113 		for_each_prime_number_from(prime, 2, 8192) {
114 			times[1] = ktime_get_raw();
115 
116 			rq = NULL;
117 			for (n = 0; n < prime; n++) {
118 				struct i915_request *this;
119 
120 				this = igt_request_alloc(ctx[n % nctx], engine);
121 				if (IS_ERR(this)) {
122 					err = PTR_ERR(this);
123 					goto out_ctx;
124 				}
125 
126 				if (rq) { /* Force submission order */
127 					i915_request_await_dma_fence(this, &rq->fence);
128 					i915_request_put(rq);
129 				}
130 
131 				/*
132 				 * This space is left intentionally blank.
133 				 *
134 				 * We do not actually want to perform any
135 				 * action with this request, we just want
136 				 * to measure the latency in allocation
137 				 * and submission of our breadcrumbs -
138 				 * ensuring that the bare request is sufficient
139 				 * for the system to work (i.e. proper HEAD
140 				 * tracking of the rings, interrupt handling,
141 				 * etc). It also gives us the lowest bounds
142 				 * for latency.
143 				 */
144 
145 				rq = i915_request_get(this);
146 				i915_request_add(this);
147 			}
148 			GEM_BUG_ON(!rq);
149 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
150 				pr_err("Switching between %ld contexts timed out\n",
151 				       prime);
152 				intel_gt_set_wedged(engine->gt);
153 				i915_request_put(rq);
154 				break;
155 			}
156 			i915_request_put(rq);
157 
158 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
159 			if (prime == 2)
160 				times[0] = times[1];
161 
162 			if (__igt_timeout(end_time, NULL))
163 				break;
164 		}
165 
166 		err = igt_live_test_end(&t);
167 		if (err)
168 			goto out_ctx;
169 
170 		pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
171 			engine->name,
172 			ktime_to_ns(times[0]),
173 			prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
174 	}
175 
176 out_ctx:
177 	kfree(ctx);
178 out_file:
179 	fput(file);
180 	return err;
181 }
182 
183 struct parallel_switch {
184 	struct kthread_worker *worker;
185 	struct kthread_work work;
186 	struct intel_context *ce[2];
187 	int result;
188 };
189 
__live_parallel_switch1(struct kthread_work * work)190 static void __live_parallel_switch1(struct kthread_work *work)
191 {
192 	struct parallel_switch *arg =
193 		container_of(work, typeof(*arg), work);
194 	IGT_TIMEOUT(end_time);
195 	unsigned long count;
196 
197 	count = 0;
198 	arg->result = 0;
199 	do {
200 		struct i915_request *rq = NULL;
201 		int n;
202 
203 		for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
204 			struct i915_request *prev = rq;
205 
206 			rq = i915_request_create(arg->ce[n]);
207 			if (IS_ERR(rq)) {
208 				i915_request_put(prev);
209 				arg->result = PTR_ERR(rq);
210 				break;
211 			}
212 
213 			i915_request_get(rq);
214 			if (prev) {
215 				arg->result =
216 					i915_request_await_dma_fence(rq,
217 								     &prev->fence);
218 				i915_request_put(prev);
219 			}
220 
221 			i915_request_add(rq);
222 		}
223 
224 		if (IS_ERR_OR_NULL(rq))
225 			break;
226 
227 		if (i915_request_wait(rq, 0, HZ) < 0)
228 			arg->result = -ETIME;
229 
230 		i915_request_put(rq);
231 
232 		count++;
233 	} while (!arg->result && !__igt_timeout(end_time, NULL));
234 
235 	pr_info("%s: %lu switches (sync) <%d>\n",
236 		arg->ce[0]->engine->name, count, arg->result);
237 }
238 
__live_parallel_switchN(struct kthread_work * work)239 static void __live_parallel_switchN(struct kthread_work *work)
240 {
241 	struct parallel_switch *arg =
242 		container_of(work, typeof(*arg), work);
243 	struct i915_request *rq = NULL;
244 	IGT_TIMEOUT(end_time);
245 	unsigned long count;
246 	int n;
247 
248 	count = 0;
249 	arg->result = 0;
250 	do {
251 		for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
252 			struct i915_request *prev = rq;
253 
254 			rq = i915_request_create(arg->ce[n]);
255 			if (IS_ERR(rq)) {
256 				i915_request_put(prev);
257 				arg->result = PTR_ERR(rq);
258 				break;
259 			}
260 
261 			i915_request_get(rq);
262 			if (prev) {
263 				arg->result =
264 					i915_request_await_dma_fence(rq,
265 								     &prev->fence);
266 				i915_request_put(prev);
267 			}
268 
269 			i915_request_add(rq);
270 		}
271 
272 		count++;
273 	} while (!arg->result && !__igt_timeout(end_time, NULL));
274 
275 	if (!IS_ERR_OR_NULL(rq))
276 		i915_request_put(rq);
277 
278 	pr_info("%s: %lu switches (many) <%d>\n",
279 		arg->ce[0]->engine->name, count, arg->result);
280 }
281 
live_parallel_switch(void * arg)282 static int live_parallel_switch(void *arg)
283 {
284 	struct drm_i915_private *i915 = arg;
285 	static void (* const func[])(struct kthread_work *) = {
286 		__live_parallel_switch1,
287 		__live_parallel_switchN,
288 		NULL,
289 	};
290 	struct parallel_switch *data = NULL;
291 	struct i915_gem_engines *engines;
292 	struct i915_gem_engines_iter it;
293 	void (* const *fn)(struct kthread_work *);
294 	struct i915_gem_context *ctx;
295 	struct intel_context *ce;
296 	struct file *file;
297 	int n, m, count;
298 	int err = 0;
299 
300 	/*
301 	 * Check we can process switches on all engines simultaneously.
302 	 */
303 
304 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
305 		return 0;
306 
307 	file = mock_file(i915);
308 	if (IS_ERR(file))
309 		return PTR_ERR(file);
310 
311 	ctx = live_context(i915, file);
312 	if (IS_ERR(ctx)) {
313 		err = PTR_ERR(ctx);
314 		goto out_file;
315 	}
316 
317 	engines = i915_gem_context_lock_engines(ctx);
318 	count = engines->num_engines;
319 
320 	data = kcalloc(count, sizeof(*data), GFP_KERNEL);
321 	if (!data) {
322 		i915_gem_context_unlock_engines(ctx);
323 		err = -ENOMEM;
324 		goto out_file;
325 	}
326 
327 	m = 0; /* Use the first context as our template for the engines */
328 	for_each_gem_engine(ce, engines, it) {
329 		err = intel_context_pin(ce);
330 		if (err) {
331 			i915_gem_context_unlock_engines(ctx);
332 			goto out;
333 		}
334 		data[m++].ce[0] = intel_context_get(ce);
335 	}
336 	i915_gem_context_unlock_engines(ctx);
337 
338 	/* Clone the same set of engines into the other contexts */
339 	for (n = 1; n < ARRAY_SIZE(data->ce); n++) {
340 		ctx = live_context(i915, file);
341 		if (IS_ERR(ctx)) {
342 			err = PTR_ERR(ctx);
343 			goto out;
344 		}
345 
346 		for (m = 0; m < count; m++) {
347 			if (!data[m].ce[0])
348 				continue;
349 
350 			ce = intel_context_create(data[m].ce[0]->engine);
351 			if (IS_ERR(ce)) {
352 				err = PTR_ERR(ce);
353 				goto out;
354 			}
355 
356 			err = intel_context_pin(ce);
357 			if (err) {
358 				intel_context_put(ce);
359 				goto out;
360 			}
361 
362 			data[m].ce[n] = ce;
363 		}
364 	}
365 
366 	for (n = 0; n < count; n++) {
367 		struct kthread_worker *worker;
368 
369 		if (!data[n].ce[0])
370 			continue;
371 
372 		worker = kthread_create_worker(0, "igt/parallel:%s",
373 					       data[n].ce[0]->engine->name);
374 		if (IS_ERR(worker)) {
375 			err = PTR_ERR(worker);
376 			goto out;
377 		}
378 
379 		data[n].worker = worker;
380 	}
381 
382 	for (fn = func; !err && *fn; fn++) {
383 		struct igt_live_test t;
384 
385 		err = igt_live_test_begin(&t, i915, __func__, "");
386 		if (err)
387 			break;
388 
389 		for (n = 0; n < count; n++) {
390 			if (!data[n].ce[0])
391 				continue;
392 
393 			data[n].result = 0;
394 			kthread_init_work(&data[n].work, *fn);
395 			kthread_queue_work(data[n].worker, &data[n].work);
396 		}
397 
398 		for (n = 0; n < count; n++) {
399 			if (data[n].ce[0]) {
400 				kthread_flush_work(&data[n].work);
401 				if (data[n].result && !err)
402 					err = data[n].result;
403 			}
404 		}
405 
406 		if (igt_live_test_end(&t)) {
407 			err = err ?: -EIO;
408 			break;
409 		}
410 	}
411 
412 out:
413 	for (n = 0; n < count; n++) {
414 		for (m = 0; m < ARRAY_SIZE(data->ce); m++) {
415 			if (!data[n].ce[m])
416 				continue;
417 
418 			intel_context_unpin(data[n].ce[m]);
419 			intel_context_put(data[n].ce[m]);
420 		}
421 
422 		if (data[n].worker)
423 			kthread_destroy_worker(data[n].worker);
424 	}
425 	kfree(data);
426 out_file:
427 	fput(file);
428 	return err;
429 }
430 
real_page_count(struct drm_i915_gem_object * obj)431 static unsigned long real_page_count(struct drm_i915_gem_object *obj)
432 {
433 	return huge_gem_object_phys_size(obj) >> PAGE_SHIFT;
434 }
435 
fake_page_count(struct drm_i915_gem_object * obj)436 static unsigned long fake_page_count(struct drm_i915_gem_object *obj)
437 {
438 	return huge_gem_object_dma_size(obj) >> PAGE_SHIFT;
439 }
440 
gpu_fill(struct intel_context * ce,struct drm_i915_gem_object * obj,unsigned int dw)441 static int gpu_fill(struct intel_context *ce,
442 		    struct drm_i915_gem_object *obj,
443 		    unsigned int dw)
444 {
445 	struct i915_vma *vma;
446 	int err;
447 
448 	GEM_BUG_ON(obj->base.size > ce->vm->total);
449 	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
450 
451 	vma = i915_vma_instance(obj, ce->vm, NULL);
452 	if (IS_ERR(vma))
453 		return PTR_ERR(vma);
454 
455 	err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
456 	if (err)
457 		return err;
458 
459 	/*
460 	 * Within the GTT the huge objects maps every page onto
461 	 * its 1024 real pages (using phys_pfn = dma_pfn % 1024).
462 	 * We set the nth dword within the page using the nth
463 	 * mapping via the GTT - this should exercise the GTT mapping
464 	 * whilst checking that each context provides a unique view
465 	 * into the object.
466 	 */
467 	err = igt_gpu_fill_dw(ce, vma,
468 			      (dw * real_page_count(obj)) << PAGE_SHIFT |
469 			      (dw * sizeof(u32)),
470 			      real_page_count(obj),
471 			      dw);
472 	i915_vma_unpin(vma);
473 
474 	return err;
475 }
476 
cpu_fill(struct drm_i915_gem_object * obj,u32 value)477 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
478 {
479 	const bool has_llc = HAS_LLC(to_i915(obj->base.dev));
480 	unsigned int need_flush;
481 	unsigned long n, m;
482 	int err;
483 
484 	i915_gem_object_lock(obj, NULL);
485 	err = i915_gem_object_prepare_write(obj, &need_flush);
486 	if (err)
487 		goto out;
488 
489 	for (n = 0; n < real_page_count(obj); n++) {
490 		u32 *map;
491 
492 		map = kmap_local_page(i915_gem_object_get_page(obj, n));
493 		for (m = 0; m < DW_PER_PAGE; m++)
494 			map[m] = value;
495 		if (!has_llc)
496 			drm_clflush_virt_range(map, PAGE_SIZE);
497 		kunmap_local(map);
498 	}
499 
500 	i915_gem_object_finish_access(obj);
501 	obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
502 	obj->write_domain = 0;
503 out:
504 	i915_gem_object_unlock(obj);
505 	return err;
506 }
507 
cpu_check(struct drm_i915_gem_object * obj,unsigned int idx,unsigned int max)508 static noinline int cpu_check(struct drm_i915_gem_object *obj,
509 			      unsigned int idx, unsigned int max)
510 {
511 	unsigned int needs_flush;
512 	unsigned long n;
513 	int err;
514 
515 	i915_gem_object_lock(obj, NULL);
516 	err = i915_gem_object_prepare_read(obj, &needs_flush);
517 	if (err)
518 		goto out_unlock;
519 
520 	for (n = 0; n < real_page_count(obj); n++) {
521 		u32 *map, m;
522 
523 		map = kmap_local_page(i915_gem_object_get_page(obj, n));
524 		if (needs_flush & CLFLUSH_BEFORE)
525 			drm_clflush_virt_range(map, PAGE_SIZE);
526 
527 		for (m = 0; m < max; m++) {
528 			if (map[m] != m) {
529 				pr_err("%pS: Invalid value at object %d page %ld/%ld, offset %d/%d: found %x expected %x\n",
530 				       __builtin_return_address(0), idx,
531 				       n, real_page_count(obj), m, max,
532 				       map[m], m);
533 				err = -EINVAL;
534 				goto out_unmap;
535 			}
536 		}
537 
538 		for (; m < DW_PER_PAGE; m++) {
539 			if (map[m] != STACK_MAGIC) {
540 				pr_err("%pS: Invalid value at object %d page %ld, offset %d: found %x expected %x (uninitialised)\n",
541 				       __builtin_return_address(0), idx, n, m,
542 				       map[m], STACK_MAGIC);
543 				err = -EINVAL;
544 				goto out_unmap;
545 			}
546 		}
547 
548 out_unmap:
549 		kunmap_local(map);
550 		if (err)
551 			break;
552 	}
553 
554 	i915_gem_object_finish_access(obj);
555 out_unlock:
556 	i915_gem_object_unlock(obj);
557 	return err;
558 }
559 
file_add_object(struct file * file,struct drm_i915_gem_object * obj)560 static int file_add_object(struct file *file, struct drm_i915_gem_object *obj)
561 {
562 	int err;
563 
564 	GEM_BUG_ON(obj->base.handle_count);
565 
566 	/* tie the object to the drm_file for easy reaping */
567 	err = idr_alloc(&to_drm_file(file)->object_idr,
568 			&obj->base, 1, 0, GFP_KERNEL);
569 	if (err < 0)
570 		return err;
571 
572 	i915_gem_object_get(obj);
573 	obj->base.handle_count++;
574 	return 0;
575 }
576 
577 static struct drm_i915_gem_object *
create_test_object(struct i915_address_space * vm,struct file * file,struct list_head * objects)578 create_test_object(struct i915_address_space *vm,
579 		   struct file *file,
580 		   struct list_head *objects)
581 {
582 	struct drm_i915_gem_object *obj;
583 	u64 size;
584 	int err;
585 
586 	/* Keep in GEM's good graces */
587 	intel_gt_retire_requests(vm->gt);
588 
589 	size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
590 	size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
591 
592 	obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size);
593 	if (IS_ERR(obj))
594 		return obj;
595 
596 	err = file_add_object(file, obj);
597 	i915_gem_object_put(obj);
598 	if (err)
599 		return ERR_PTR(err);
600 
601 	err = cpu_fill(obj, STACK_MAGIC);
602 	if (err) {
603 		pr_err("Failed to fill object with cpu, err=%d\n",
604 		       err);
605 		return ERR_PTR(err);
606 	}
607 
608 	list_add_tail(&obj->st_link, objects);
609 	return obj;
610 }
611 
max_dwords(struct drm_i915_gem_object * obj)612 static unsigned long max_dwords(struct drm_i915_gem_object *obj)
613 {
614 	unsigned long npages = fake_page_count(obj);
615 
616 	GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE));
617 	return npages / DW_PER_PAGE;
618 }
619 
throttle_release(struct i915_request ** q,int count)620 static void throttle_release(struct i915_request **q, int count)
621 {
622 	int i;
623 
624 	for (i = 0; i < count; i++) {
625 		if (IS_ERR_OR_NULL(q[i]))
626 			continue;
627 
628 		i915_request_put(fetch_and_zero(&q[i]));
629 	}
630 }
631 
throttle(struct intel_context * ce,struct i915_request ** q,int count)632 static int throttle(struct intel_context *ce,
633 		    struct i915_request **q, int count)
634 {
635 	int i;
636 
637 	if (!IS_ERR_OR_NULL(q[0])) {
638 		if (i915_request_wait(q[0],
639 				      I915_WAIT_INTERRUPTIBLE,
640 				      MAX_SCHEDULE_TIMEOUT) < 0)
641 			return -EINTR;
642 
643 		i915_request_put(q[0]);
644 	}
645 
646 	for (i = 0; i < count - 1; i++)
647 		q[i] = q[i + 1];
648 
649 	q[i] = intel_context_create_request(ce);
650 	if (IS_ERR(q[i]))
651 		return PTR_ERR(q[i]);
652 
653 	i915_request_get(q[i]);
654 	i915_request_add(q[i]);
655 
656 	return 0;
657 }
658 
igt_ctx_exec(void * arg)659 static int igt_ctx_exec(void *arg)
660 {
661 	struct drm_i915_private *i915 = arg;
662 	struct intel_engine_cs *engine;
663 	int err = -ENODEV;
664 
665 	/*
666 	 * Create a few different contexts (with different mm) and write
667 	 * through each ctx/mm using the GPU making sure those writes end
668 	 * up in the expected pages of our obj.
669 	 */
670 
671 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
672 		return 0;
673 
674 	for_each_uabi_engine(engine, i915) {
675 		struct drm_i915_gem_object *obj = NULL;
676 		unsigned long ncontexts, ndwords, dw;
677 		struct i915_request *tq[5] = {};
678 		struct igt_live_test t;
679 		IGT_TIMEOUT(end_time);
680 		LIST_HEAD(objects);
681 		struct file *file;
682 
683 		if (!intel_engine_can_store_dword(engine))
684 			continue;
685 
686 		if (!engine->context_size)
687 			continue; /* No logical context support in HW */
688 
689 		file = mock_file(i915);
690 		if (IS_ERR(file))
691 			return PTR_ERR(file);
692 
693 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
694 		if (err)
695 			goto out_file;
696 
697 		ncontexts = 0;
698 		ndwords = 0;
699 		dw = 0;
700 		while (!time_after(jiffies, end_time)) {
701 			struct i915_gem_context *ctx;
702 			struct intel_context *ce;
703 
704 			ctx = kernel_context(i915, NULL);
705 			if (IS_ERR(ctx)) {
706 				err = PTR_ERR(ctx);
707 				goto out_file;
708 			}
709 
710 			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
711 			GEM_BUG_ON(IS_ERR(ce));
712 
713 			if (!obj) {
714 				obj = create_test_object(ce->vm, file, &objects);
715 				if (IS_ERR(obj)) {
716 					err = PTR_ERR(obj);
717 					intel_context_put(ce);
718 					kernel_context_close(ctx);
719 					goto out_file;
720 				}
721 			}
722 
723 			err = gpu_fill(ce, obj, dw);
724 			if (err) {
725 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
726 				       ndwords, dw, max_dwords(obj),
727 				       engine->name,
728 				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
729 				       err);
730 				intel_context_put(ce);
731 				kernel_context_close(ctx);
732 				goto out_file;
733 			}
734 
735 			err = throttle(ce, tq, ARRAY_SIZE(tq));
736 			if (err) {
737 				intel_context_put(ce);
738 				kernel_context_close(ctx);
739 				goto out_file;
740 			}
741 
742 			if (++dw == max_dwords(obj)) {
743 				obj = NULL;
744 				dw = 0;
745 			}
746 
747 			ndwords++;
748 			ncontexts++;
749 
750 			intel_context_put(ce);
751 			kernel_context_close(ctx);
752 		}
753 
754 		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
755 			ncontexts, engine->name, ndwords);
756 
757 		ncontexts = dw = 0;
758 		list_for_each_entry(obj, &objects, st_link) {
759 			unsigned int rem =
760 				min_t(unsigned int, ndwords - dw, max_dwords(obj));
761 
762 			err = cpu_check(obj, ncontexts++, rem);
763 			if (err)
764 				break;
765 
766 			dw += rem;
767 		}
768 
769 out_file:
770 		throttle_release(tq, ARRAY_SIZE(tq));
771 		if (igt_live_test_end(&t))
772 			err = -EIO;
773 
774 		fput(file);
775 		if (err)
776 			return err;
777 
778 		i915_gem_drain_freed_objects(i915);
779 	}
780 
781 	return 0;
782 }
783 
igt_shared_ctx_exec(void * arg)784 static int igt_shared_ctx_exec(void *arg)
785 {
786 	struct drm_i915_private *i915 = arg;
787 	struct i915_request *tq[5] = {};
788 	struct i915_gem_context *parent;
789 	struct intel_engine_cs *engine;
790 	struct igt_live_test t;
791 	struct file *file;
792 	int err = 0;
793 
794 	/*
795 	 * Create a few different contexts with the same mm and write
796 	 * through each ctx using the GPU making sure those writes end
797 	 * up in the expected pages of our obj.
798 	 */
799 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
800 		return 0;
801 
802 	file = mock_file(i915);
803 	if (IS_ERR(file))
804 		return PTR_ERR(file);
805 
806 	parent = live_context(i915, file);
807 	if (IS_ERR(parent)) {
808 		err = PTR_ERR(parent);
809 		goto out_file;
810 	}
811 
812 	if (!parent->vm) { /* not full-ppgtt; nothing to share */
813 		err = 0;
814 		goto out_file;
815 	}
816 
817 	err = igt_live_test_begin(&t, i915, __func__, "");
818 	if (err)
819 		goto out_file;
820 
821 	for_each_uabi_engine(engine, i915) {
822 		unsigned long ncontexts, ndwords, dw;
823 		struct drm_i915_gem_object *obj = NULL;
824 		IGT_TIMEOUT(end_time);
825 		LIST_HEAD(objects);
826 
827 		if (!intel_engine_can_store_dword(engine))
828 			continue;
829 
830 		dw = 0;
831 		ndwords = 0;
832 		ncontexts = 0;
833 		while (!time_after(jiffies, end_time)) {
834 			struct i915_gem_context *ctx;
835 			struct intel_context *ce;
836 
837 			ctx = kernel_context(i915, parent->vm);
838 			if (IS_ERR(ctx)) {
839 				err = PTR_ERR(ctx);
840 				goto out_test;
841 			}
842 
843 			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
844 			GEM_BUG_ON(IS_ERR(ce));
845 
846 			if (!obj) {
847 				obj = create_test_object(parent->vm,
848 							 file, &objects);
849 				if (IS_ERR(obj)) {
850 					err = PTR_ERR(obj);
851 					intel_context_put(ce);
852 					kernel_context_close(ctx);
853 					goto out_test;
854 				}
855 			}
856 
857 			err = gpu_fill(ce, obj, dw);
858 			if (err) {
859 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
860 				       ndwords, dw, max_dwords(obj),
861 				       engine->name,
862 				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
863 				       err);
864 				intel_context_put(ce);
865 				kernel_context_close(ctx);
866 				goto out_test;
867 			}
868 
869 			err = throttle(ce, tq, ARRAY_SIZE(tq));
870 			if (err) {
871 				intel_context_put(ce);
872 				kernel_context_close(ctx);
873 				goto out_test;
874 			}
875 
876 			if (++dw == max_dwords(obj)) {
877 				obj = NULL;
878 				dw = 0;
879 			}
880 
881 			ndwords++;
882 			ncontexts++;
883 
884 			intel_context_put(ce);
885 			kernel_context_close(ctx);
886 		}
887 		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
888 			ncontexts, engine->name, ndwords);
889 
890 		ncontexts = dw = 0;
891 		list_for_each_entry(obj, &objects, st_link) {
892 			unsigned int rem =
893 				min_t(unsigned int, ndwords - dw, max_dwords(obj));
894 
895 			err = cpu_check(obj, ncontexts++, rem);
896 			if (err)
897 				goto out_test;
898 
899 			dw += rem;
900 		}
901 
902 		i915_gem_drain_freed_objects(i915);
903 	}
904 out_test:
905 	throttle_release(tq, ARRAY_SIZE(tq));
906 	if (igt_live_test_end(&t))
907 		err = -EIO;
908 out_file:
909 	fput(file);
910 	return err;
911 }
912 
rpcs_query_batch(struct drm_i915_gem_object * rpcs,struct i915_vma * vma,struct intel_engine_cs * engine)913 static int rpcs_query_batch(struct drm_i915_gem_object *rpcs,
914 			    struct i915_vma *vma,
915 			    struct intel_engine_cs *engine)
916 {
917 	u32 *cmd;
918 
919 	GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8);
920 
921 	cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB);
922 	if (IS_ERR(cmd))
923 		return PTR_ERR(cmd);
924 
925 	*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
926 	*cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base));
927 	*cmd++ = lower_32_bits(i915_vma_offset(vma));
928 	*cmd++ = upper_32_bits(i915_vma_offset(vma));
929 	*cmd = MI_BATCH_BUFFER_END;
930 
931 	__i915_gem_object_flush_map(rpcs, 0, 64);
932 	i915_gem_object_unpin_map(rpcs);
933 
934 	intel_gt_chipset_flush(vma->vm->gt);
935 
936 	return 0;
937 }
938 
939 static int
emit_rpcs_query(struct drm_i915_gem_object * obj,struct intel_context * ce,struct i915_request ** rq_out)940 emit_rpcs_query(struct drm_i915_gem_object *obj,
941 		struct intel_context *ce,
942 		struct i915_request **rq_out)
943 {
944 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
945 	struct i915_request *rq;
946 	struct i915_gem_ww_ctx ww;
947 	struct i915_vma *batch;
948 	struct i915_vma *vma;
949 	struct drm_i915_gem_object *rpcs;
950 	int err;
951 
952 	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
953 
954 	if (GRAPHICS_VER(i915) < 8)
955 		return -EINVAL;
956 
957 	vma = i915_vma_instance(obj, ce->vm, NULL);
958 	if (IS_ERR(vma))
959 		return PTR_ERR(vma);
960 
961 	rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE);
962 	if (IS_ERR(rpcs))
963 		return PTR_ERR(rpcs);
964 
965 	batch = i915_vma_instance(rpcs, ce->vm, NULL);
966 	if (IS_ERR(batch)) {
967 		err = PTR_ERR(batch);
968 		goto err_put;
969 	}
970 
971 	i915_gem_ww_ctx_init(&ww, false);
972 retry:
973 	err = i915_gem_object_lock(obj, &ww);
974 	if (!err)
975 		err = i915_gem_object_lock(rpcs, &ww);
976 	if (!err)
977 		err = i915_gem_object_set_to_gtt_domain(obj, false);
978 	if (!err)
979 		err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
980 	if (err)
981 		goto err_put;
982 
983 	err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER);
984 	if (err)
985 		goto err_vma;
986 
987 	err = rpcs_query_batch(rpcs, vma, ce->engine);
988 	if (err)
989 		goto err_batch;
990 
991 	rq = i915_request_create(ce);
992 	if (IS_ERR(rq)) {
993 		err = PTR_ERR(rq);
994 		goto err_batch;
995 	}
996 
997 	err = i915_vma_move_to_active(batch, rq, 0);
998 	if (err)
999 		goto skip_request;
1000 
1001 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
1002 	if (err)
1003 		goto skip_request;
1004 
1005 	if (rq->engine->emit_init_breadcrumb) {
1006 		err = rq->engine->emit_init_breadcrumb(rq);
1007 		if (err)
1008 			goto skip_request;
1009 	}
1010 
1011 	err = rq->engine->emit_bb_start(rq,
1012 					i915_vma_offset(batch),
1013 					i915_vma_size(batch),
1014 					0);
1015 	if (err)
1016 		goto skip_request;
1017 
1018 	*rq_out = i915_request_get(rq);
1019 
1020 skip_request:
1021 	if (err)
1022 		i915_request_set_error_once(rq, err);
1023 	i915_request_add(rq);
1024 err_batch:
1025 	i915_vma_unpin(batch);
1026 err_vma:
1027 	i915_vma_unpin(vma);
1028 err_put:
1029 	if (err == -EDEADLK) {
1030 		err = i915_gem_ww_ctx_backoff(&ww);
1031 		if (!err)
1032 			goto retry;
1033 	}
1034 	i915_gem_ww_ctx_fini(&ww);
1035 	i915_gem_object_put(rpcs);
1036 	return err;
1037 }
1038 
1039 #define TEST_IDLE	BIT(0)
1040 #define TEST_BUSY	BIT(1)
1041 #define TEST_RESET	BIT(2)
1042 
1043 static int
__sseu_prepare(const char * name,unsigned int flags,struct intel_context * ce,struct igt_spinner ** spin)1044 __sseu_prepare(const char *name,
1045 	       unsigned int flags,
1046 	       struct intel_context *ce,
1047 	       struct igt_spinner **spin)
1048 {
1049 	struct i915_request *rq;
1050 	int ret;
1051 
1052 	*spin = NULL;
1053 	if (!(flags & (TEST_BUSY | TEST_RESET)))
1054 		return 0;
1055 
1056 	*spin = kzalloc(sizeof(**spin), GFP_KERNEL);
1057 	if (!*spin)
1058 		return -ENOMEM;
1059 
1060 	ret = igt_spinner_init(*spin, ce->engine->gt);
1061 	if (ret)
1062 		goto err_free;
1063 
1064 	rq = igt_spinner_create_request(*spin, ce, MI_NOOP);
1065 	if (IS_ERR(rq)) {
1066 		ret = PTR_ERR(rq);
1067 		goto err_fini;
1068 	}
1069 
1070 	i915_request_add(rq);
1071 
1072 	if (!igt_wait_for_spinner(*spin, rq)) {
1073 		pr_err("%s: Spinner failed to start!\n", name);
1074 		ret = -ETIMEDOUT;
1075 		goto err_end;
1076 	}
1077 
1078 	return 0;
1079 
1080 err_end:
1081 	igt_spinner_end(*spin);
1082 err_fini:
1083 	igt_spinner_fini(*spin);
1084 err_free:
1085 	kfree(fetch_and_zero(spin));
1086 	return ret;
1087 }
1088 
1089 static int
__read_slice_count(struct intel_context * ce,struct drm_i915_gem_object * obj,struct igt_spinner * spin,u32 * rpcs)1090 __read_slice_count(struct intel_context *ce,
1091 		   struct drm_i915_gem_object *obj,
1092 		   struct igt_spinner *spin,
1093 		   u32 *rpcs)
1094 {
1095 	struct i915_request *rq = NULL;
1096 	u32 s_mask, s_shift;
1097 	unsigned int cnt;
1098 	u32 *buf, val;
1099 	long ret;
1100 
1101 	ret = emit_rpcs_query(obj, ce, &rq);
1102 	if (ret)
1103 		return ret;
1104 
1105 	if (spin)
1106 		igt_spinner_end(spin);
1107 
1108 	ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
1109 	i915_request_put(rq);
1110 	if (ret < 0)
1111 		return ret;
1112 
1113 	buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1114 	if (IS_ERR(buf)) {
1115 		ret = PTR_ERR(buf);
1116 		return ret;
1117 	}
1118 
1119 	if (GRAPHICS_VER(ce->engine->i915) >= 11) {
1120 		s_mask = GEN11_RPCS_S_CNT_MASK;
1121 		s_shift = GEN11_RPCS_S_CNT_SHIFT;
1122 	} else {
1123 		s_mask = GEN8_RPCS_S_CNT_MASK;
1124 		s_shift = GEN8_RPCS_S_CNT_SHIFT;
1125 	}
1126 
1127 	val = *buf;
1128 	cnt = (val & s_mask) >> s_shift;
1129 	*rpcs = val;
1130 
1131 	i915_gem_object_unpin_map(obj);
1132 
1133 	return cnt;
1134 }
1135 
1136 static int
__check_rpcs(const char * name,u32 rpcs,int slices,unsigned int expected,const char * prefix,const char * suffix)1137 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected,
1138 	     const char *prefix, const char *suffix)
1139 {
1140 	if (slices == expected)
1141 		return 0;
1142 
1143 	if (slices < 0) {
1144 		pr_err("%s: %s read slice count failed with %d%s\n",
1145 		       name, prefix, slices, suffix);
1146 		return slices;
1147 	}
1148 
1149 	pr_err("%s: %s slice count %d is not %u%s\n",
1150 	       name, prefix, slices, expected, suffix);
1151 
1152 	pr_info("RPCS=0x%x; %u%sx%u%s\n",
1153 		rpcs, slices,
1154 		(rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "",
1155 		(rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT,
1156 		(rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : "");
1157 
1158 	return -EINVAL;
1159 }
1160 
1161 static int
__sseu_finish(const char * name,unsigned int flags,struct intel_context * ce,struct drm_i915_gem_object * obj,unsigned int expected,struct igt_spinner * spin)1162 __sseu_finish(const char *name,
1163 	      unsigned int flags,
1164 	      struct intel_context *ce,
1165 	      struct drm_i915_gem_object *obj,
1166 	      unsigned int expected,
1167 	      struct igt_spinner *spin)
1168 {
1169 	unsigned int slices = hweight32(ce->engine->sseu.slice_mask);
1170 	u32 rpcs = 0;
1171 	int ret = 0;
1172 
1173 	if (flags & TEST_RESET) {
1174 		ret = intel_engine_reset(ce->engine, "sseu");
1175 		if (ret)
1176 			goto out;
1177 	}
1178 
1179 	ret = __read_slice_count(ce, obj,
1180 				 flags & TEST_RESET ? NULL : spin, &rpcs);
1181 	ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!");
1182 	if (ret)
1183 		goto out;
1184 
1185 	ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs);
1186 	ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
1187 
1188 out:
1189 	if (spin)
1190 		igt_spinner_end(spin);
1191 
1192 	if ((flags & TEST_IDLE) && ret == 0) {
1193 		ret = igt_flush_test(ce->engine->i915);
1194 		if (ret)
1195 			return ret;
1196 
1197 		ret = __read_slice_count(ce, obj, NULL, &rpcs);
1198 		ret = __check_rpcs(name, rpcs, ret, expected,
1199 				   "Context", " after idle!");
1200 	}
1201 
1202 	return ret;
1203 }
1204 
1205 static int
__sseu_test(const char * name,unsigned int flags,struct intel_context * ce,struct drm_i915_gem_object * obj,struct intel_sseu sseu)1206 __sseu_test(const char *name,
1207 	    unsigned int flags,
1208 	    struct intel_context *ce,
1209 	    struct drm_i915_gem_object *obj,
1210 	    struct intel_sseu sseu)
1211 {
1212 	struct igt_spinner *spin = NULL;
1213 	int ret;
1214 
1215 	intel_engine_pm_get(ce->engine);
1216 
1217 	ret = __sseu_prepare(name, flags, ce, &spin);
1218 	if (ret)
1219 		goto out_pm;
1220 
1221 	ret = intel_context_reconfigure_sseu(ce, sseu);
1222 	if (ret)
1223 		goto out_spin;
1224 
1225 	ret = __sseu_finish(name, flags, ce, obj,
1226 			    hweight32(sseu.slice_mask), spin);
1227 
1228 out_spin:
1229 	if (spin) {
1230 		igt_spinner_end(spin);
1231 		igt_spinner_fini(spin);
1232 		kfree(spin);
1233 	}
1234 out_pm:
1235 	intel_engine_pm_put(ce->engine);
1236 	return ret;
1237 }
1238 
1239 static int
__igt_ctx_sseu(struct drm_i915_private * i915,const char * name,unsigned int flags)1240 __igt_ctx_sseu(struct drm_i915_private *i915,
1241 	       const char *name,
1242 	       unsigned int flags)
1243 {
1244 	struct drm_i915_gem_object *obj;
1245 	int inst = 0;
1246 	int ret = 0;
1247 
1248 	if (GRAPHICS_VER(i915) < 9)
1249 		return 0;
1250 
1251 	if (flags & TEST_RESET)
1252 		igt_global_reset_lock(to_gt(i915));
1253 
1254 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1255 	if (IS_ERR(obj)) {
1256 		ret = PTR_ERR(obj);
1257 		goto out_unlock;
1258 	}
1259 
1260 	do {
1261 		struct intel_engine_cs *engine;
1262 		struct intel_context *ce;
1263 		struct intel_sseu pg_sseu;
1264 
1265 		engine = intel_engine_lookup_user(i915,
1266 						  I915_ENGINE_CLASS_RENDER,
1267 						  inst++);
1268 		if (!engine)
1269 			break;
1270 
1271 		if (hweight32(engine->sseu.slice_mask) < 2)
1272 			continue;
1273 
1274 		if (!engine->gt->info.sseu.has_slice_pg)
1275 			continue;
1276 
1277 		/*
1278 		 * Gen11 VME friendly power-gated configuration with
1279 		 * half enabled sub-slices.
1280 		 */
1281 		pg_sseu = engine->sseu;
1282 		pg_sseu.slice_mask = 1;
1283 		pg_sseu.subslice_mask =
1284 			~(~0 << (hweight32(engine->sseu.subslice_mask) / 2));
1285 
1286 		pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
1287 			engine->name, name, flags,
1288 			hweight32(engine->sseu.slice_mask),
1289 			hweight32(pg_sseu.slice_mask));
1290 
1291 		ce = intel_context_create(engine);
1292 		if (IS_ERR(ce)) {
1293 			ret = PTR_ERR(ce);
1294 			goto out_put;
1295 		}
1296 
1297 		ret = intel_context_pin(ce);
1298 		if (ret)
1299 			goto out_ce;
1300 
1301 		/* First set the default mask. */
1302 		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1303 		if (ret)
1304 			goto out_unpin;
1305 
1306 		/* Then set a power-gated configuration. */
1307 		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1308 		if (ret)
1309 			goto out_unpin;
1310 
1311 		/* Back to defaults. */
1312 		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1313 		if (ret)
1314 			goto out_unpin;
1315 
1316 		/* One last power-gated configuration for the road. */
1317 		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1318 		if (ret)
1319 			goto out_unpin;
1320 
1321 out_unpin:
1322 		intel_context_unpin(ce);
1323 out_ce:
1324 		intel_context_put(ce);
1325 	} while (!ret);
1326 
1327 	if (igt_flush_test(i915))
1328 		ret = -EIO;
1329 
1330 out_put:
1331 	i915_gem_object_put(obj);
1332 
1333 out_unlock:
1334 	if (flags & TEST_RESET)
1335 		igt_global_reset_unlock(to_gt(i915));
1336 
1337 	if (ret)
1338 		pr_err("%s: Failed with %d!\n", name, ret);
1339 
1340 	return ret;
1341 }
1342 
igt_ctx_sseu(void * arg)1343 static int igt_ctx_sseu(void *arg)
1344 {
1345 	struct {
1346 		const char *name;
1347 		unsigned int flags;
1348 	} *phase, phases[] = {
1349 		{ .name = "basic", .flags = 0 },
1350 		{ .name = "idle", .flags = TEST_IDLE },
1351 		{ .name = "busy", .flags = TEST_BUSY },
1352 		{ .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET },
1353 		{ .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE },
1354 		{ .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE },
1355 	};
1356 	unsigned int i;
1357 	int ret = 0;
1358 
1359 	for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases);
1360 	     i++, phase++)
1361 		ret = __igt_ctx_sseu(arg, phase->name, phase->flags);
1362 
1363 	return ret;
1364 }
1365 
igt_ctx_readonly(void * arg)1366 static int igt_ctx_readonly(void *arg)
1367 {
1368 	struct drm_i915_private *i915 = arg;
1369 	unsigned long idx, ndwords, dw, num_engines;
1370 	struct drm_i915_gem_object *obj = NULL;
1371 	struct i915_request *tq[5] = {};
1372 	struct i915_gem_engines_iter it;
1373 	struct i915_address_space *vm;
1374 	struct i915_gem_context *ctx;
1375 	struct intel_context *ce;
1376 	struct igt_live_test t;
1377 	I915_RND_STATE(prng);
1378 	IGT_TIMEOUT(end_time);
1379 	LIST_HEAD(objects);
1380 	struct file *file;
1381 	int err = -ENODEV;
1382 
1383 	/*
1384 	 * Create a few read-only objects (with the occasional writable object)
1385 	 * and try to write into these object checking that the GPU discards
1386 	 * any write to a read-only object.
1387 	 */
1388 
1389 	file = mock_file(i915);
1390 	if (IS_ERR(file))
1391 		return PTR_ERR(file);
1392 
1393 	err = igt_live_test_begin(&t, i915, __func__, "");
1394 	if (err)
1395 		goto out_file;
1396 
1397 	ctx = live_context(i915, file);
1398 	if (IS_ERR(ctx)) {
1399 		err = PTR_ERR(ctx);
1400 		goto out_file;
1401 	}
1402 
1403 	vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm;
1404 	if (!vm || !vm->has_read_only) {
1405 		err = 0;
1406 		goto out_file;
1407 	}
1408 
1409 	num_engines = 0;
1410 	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
1411 		if (intel_engine_can_store_dword(ce->engine))
1412 			num_engines++;
1413 	i915_gem_context_unlock_engines(ctx);
1414 
1415 	ndwords = 0;
1416 	dw = 0;
1417 	while (!time_after(jiffies, end_time)) {
1418 		for_each_gem_engine(ce,
1419 				    i915_gem_context_lock_engines(ctx), it) {
1420 			if (!intel_engine_can_store_dword(ce->engine))
1421 				continue;
1422 
1423 			if (!obj) {
1424 				obj = create_test_object(ce->vm, file, &objects);
1425 				if (IS_ERR(obj)) {
1426 					err = PTR_ERR(obj);
1427 					i915_gem_context_unlock_engines(ctx);
1428 					goto out_file;
1429 				}
1430 
1431 				if (prandom_u32_state(&prng) & 1)
1432 					i915_gem_object_set_readonly(obj);
1433 			}
1434 
1435 			err = gpu_fill(ce, obj, dw);
1436 			if (err) {
1437 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
1438 				       ndwords, dw, max_dwords(obj),
1439 				       ce->engine->name,
1440 				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
1441 				       err);
1442 				i915_gem_context_unlock_engines(ctx);
1443 				goto out_file;
1444 			}
1445 
1446 			err = throttle(ce, tq, ARRAY_SIZE(tq));
1447 			if (err) {
1448 				i915_gem_context_unlock_engines(ctx);
1449 				goto out_file;
1450 			}
1451 
1452 			if (++dw == max_dwords(obj)) {
1453 				obj = NULL;
1454 				dw = 0;
1455 			}
1456 			ndwords++;
1457 		}
1458 		i915_gem_context_unlock_engines(ctx);
1459 	}
1460 	pr_info("Submitted %lu dwords (across %lu engines)\n",
1461 		ndwords, num_engines);
1462 
1463 	dw = 0;
1464 	idx = 0;
1465 	list_for_each_entry(obj, &objects, st_link) {
1466 		unsigned int rem =
1467 			min_t(unsigned int, ndwords - dw, max_dwords(obj));
1468 		unsigned int num_writes;
1469 
1470 		num_writes = rem;
1471 		if (i915_gem_object_is_readonly(obj))
1472 			num_writes = 0;
1473 
1474 		err = cpu_check(obj, idx++, num_writes);
1475 		if (err)
1476 			break;
1477 
1478 		dw += rem;
1479 	}
1480 
1481 out_file:
1482 	throttle_release(tq, ARRAY_SIZE(tq));
1483 	if (igt_live_test_end(&t))
1484 		err = -EIO;
1485 
1486 	fput(file);
1487 	return err;
1488 }
1489 
check_scratch(struct i915_address_space * vm,u64 offset)1490 static int check_scratch(struct i915_address_space *vm, u64 offset)
1491 {
1492 	struct drm_mm_node *node;
1493 
1494 	mutex_lock(&vm->mutex);
1495 	node = __drm_mm_interval_first(&vm->mm,
1496 				       offset, offset + sizeof(u32) - 1);
1497 	mutex_unlock(&vm->mutex);
1498 	if (!node || node->start > offset)
1499 		return 0;
1500 
1501 	GEM_BUG_ON(offset >= node->start + node->size);
1502 
1503 	pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n",
1504 	       upper_32_bits(offset), lower_32_bits(offset));
1505 	return -EINVAL;
1506 }
1507 
write_to_scratch(struct i915_gem_context * ctx,struct intel_engine_cs * engine,struct drm_i915_gem_object * obj,u64 offset,u32 value)1508 static int write_to_scratch(struct i915_gem_context *ctx,
1509 			    struct intel_engine_cs *engine,
1510 			    struct drm_i915_gem_object *obj,
1511 			    u64 offset, u32 value)
1512 {
1513 	struct drm_i915_private *i915 = ctx->i915;
1514 	struct i915_address_space *vm;
1515 	struct i915_request *rq;
1516 	struct i915_vma *vma;
1517 	u32 *cmd;
1518 	int err;
1519 
1520 	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1521 
1522 	err = check_scratch(ctx->vm, offset);
1523 	if (err)
1524 		return err;
1525 
1526 	cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1527 	if (IS_ERR(cmd))
1528 		return PTR_ERR(cmd);
1529 
1530 	*cmd++ = MI_STORE_DWORD_IMM_GEN4;
1531 	if (GRAPHICS_VER(i915) >= 8) {
1532 		*cmd++ = lower_32_bits(offset);
1533 		*cmd++ = upper_32_bits(offset);
1534 	} else {
1535 		*cmd++ = 0;
1536 		*cmd++ = offset;
1537 	}
1538 	*cmd++ = value;
1539 	*cmd = MI_BATCH_BUFFER_END;
1540 	__i915_gem_object_flush_map(obj, 0, 64);
1541 	i915_gem_object_unpin_map(obj);
1542 
1543 	intel_gt_chipset_flush(engine->gt);
1544 
1545 	vm = i915_gem_context_get_eb_vm(ctx);
1546 	vma = i915_vma_instance(obj, vm, NULL);
1547 	if (IS_ERR(vma)) {
1548 		err = PTR_ERR(vma);
1549 		goto out_vm;
1550 	}
1551 
1552 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1553 	if (err)
1554 		goto out_vm;
1555 
1556 	rq = igt_request_alloc(ctx, engine);
1557 	if (IS_ERR(rq)) {
1558 		err = PTR_ERR(rq);
1559 		goto err_unpin;
1560 	}
1561 
1562 	err = igt_vma_move_to_active_unlocked(vma, rq, 0);
1563 	if (err)
1564 		goto skip_request;
1565 
1566 	if (rq->engine->emit_init_breadcrumb) {
1567 		err = rq->engine->emit_init_breadcrumb(rq);
1568 		if (err)
1569 			goto skip_request;
1570 	}
1571 
1572 	err = engine->emit_bb_start(rq, i915_vma_offset(vma),
1573 				    i915_vma_size(vma), 0);
1574 	if (err)
1575 		goto skip_request;
1576 
1577 	i915_vma_unpin(vma);
1578 
1579 	i915_request_add(rq);
1580 
1581 	goto out_vm;
1582 skip_request:
1583 	i915_request_set_error_once(rq, err);
1584 	i915_request_add(rq);
1585 err_unpin:
1586 	i915_vma_unpin(vma);
1587 out_vm:
1588 	i915_vm_put(vm);
1589 
1590 	if (!err)
1591 		err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
1592 
1593 	return err;
1594 }
1595 
read_from_scratch(struct i915_gem_context * ctx,struct intel_engine_cs * engine,struct drm_i915_gem_object * obj,u64 offset,u32 * value)1596 static int read_from_scratch(struct i915_gem_context *ctx,
1597 			     struct intel_engine_cs *engine,
1598 			     struct drm_i915_gem_object *obj,
1599 			     u64 offset, u32 *value)
1600 {
1601 	struct drm_i915_private *i915 = ctx->i915;
1602 	struct i915_address_space *vm;
1603 	const u32 result = 0x100;
1604 	struct i915_request *rq;
1605 	struct i915_vma *vma;
1606 	unsigned int flags;
1607 	u32 *cmd;
1608 	int err;
1609 
1610 	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1611 
1612 	err = check_scratch(ctx->vm, offset);
1613 	if (err)
1614 		return err;
1615 
1616 	if (GRAPHICS_VER(i915) >= 8) {
1617 		const u32 GPR0 = engine->mmio_base + 0x600;
1618 
1619 		vm = i915_gem_context_get_eb_vm(ctx);
1620 		vma = i915_vma_instance(obj, vm, NULL);
1621 		if (IS_ERR(vma)) {
1622 			err = PTR_ERR(vma);
1623 			goto out_vm;
1624 		}
1625 
1626 		err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1627 		if (err)
1628 			goto out_vm;
1629 
1630 		cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1631 		if (IS_ERR(cmd)) {
1632 			err = PTR_ERR(cmd);
1633 			goto err_unpin;
1634 		}
1635 
1636 		memset(cmd, POISON_INUSE, PAGE_SIZE);
1637 		*cmd++ = MI_LOAD_REGISTER_MEM_GEN8;
1638 		*cmd++ = GPR0;
1639 		*cmd++ = lower_32_bits(offset);
1640 		*cmd++ = upper_32_bits(offset);
1641 		*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
1642 		*cmd++ = GPR0;
1643 		*cmd++ = result;
1644 		*cmd++ = 0;
1645 		*cmd = MI_BATCH_BUFFER_END;
1646 
1647 		i915_gem_object_flush_map(obj);
1648 		i915_gem_object_unpin_map(obj);
1649 
1650 		flags = 0;
1651 	} else {
1652 		const u32 reg = engine->mmio_base + 0x420;
1653 
1654 		/* hsw: register access even to 3DPRIM! is protected */
1655 		vm = i915_vm_get(&engine->gt->ggtt->vm);
1656 		vma = i915_vma_instance(obj, vm, NULL);
1657 		if (IS_ERR(vma)) {
1658 			err = PTR_ERR(vma);
1659 			goto out_vm;
1660 		}
1661 
1662 		err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1663 		if (err)
1664 			goto out_vm;
1665 
1666 		cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1667 		if (IS_ERR(cmd)) {
1668 			err = PTR_ERR(cmd);
1669 			goto err_unpin;
1670 		}
1671 
1672 		memset(cmd, POISON_INUSE, PAGE_SIZE);
1673 		*cmd++ = MI_LOAD_REGISTER_MEM;
1674 		*cmd++ = reg;
1675 		*cmd++ = offset;
1676 		*cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
1677 		*cmd++ = reg;
1678 		*cmd++ = i915_vma_offset(vma) + result;
1679 		*cmd = MI_BATCH_BUFFER_END;
1680 
1681 		i915_gem_object_flush_map(obj);
1682 		i915_gem_object_unpin_map(obj);
1683 
1684 		flags = I915_DISPATCH_SECURE;
1685 	}
1686 
1687 	intel_gt_chipset_flush(engine->gt);
1688 
1689 	rq = igt_request_alloc(ctx, engine);
1690 	if (IS_ERR(rq)) {
1691 		err = PTR_ERR(rq);
1692 		goto err_unpin;
1693 	}
1694 
1695 	err = igt_vma_move_to_active_unlocked(vma, rq, EXEC_OBJECT_WRITE);
1696 	if (err)
1697 		goto skip_request;
1698 
1699 	if (rq->engine->emit_init_breadcrumb) {
1700 		err = rq->engine->emit_init_breadcrumb(rq);
1701 		if (err)
1702 			goto skip_request;
1703 	}
1704 
1705 	err = engine->emit_bb_start(rq, i915_vma_offset(vma),
1706 				    i915_vma_size(vma), flags);
1707 	if (err)
1708 		goto skip_request;
1709 
1710 	i915_vma_unpin(vma);
1711 
1712 	i915_request_add(rq);
1713 
1714 	i915_gem_object_lock(obj, NULL);
1715 	err = i915_gem_object_set_to_cpu_domain(obj, false);
1716 	i915_gem_object_unlock(obj);
1717 	if (err)
1718 		goto out_vm;
1719 
1720 	cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1721 	if (IS_ERR(cmd)) {
1722 		err = PTR_ERR(cmd);
1723 		goto out_vm;
1724 	}
1725 
1726 	*value = cmd[result / sizeof(*cmd)];
1727 	i915_gem_object_unpin_map(obj);
1728 
1729 	goto out_vm;
1730 skip_request:
1731 	i915_request_set_error_once(rq, err);
1732 	i915_request_add(rq);
1733 err_unpin:
1734 	i915_vma_unpin(vma);
1735 out_vm:
1736 	i915_vm_put(vm);
1737 
1738 	if (!err)
1739 		err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
1740 
1741 	return err;
1742 }
1743 
check_scratch_page(struct i915_gem_context * ctx,u32 * out)1744 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
1745 {
1746 	struct i915_address_space *vm;
1747 	u32 *vaddr;
1748 	int err = 0;
1749 
1750 	vm = ctx->vm;
1751 	if (!vm)
1752 		return -ENODEV;
1753 
1754 	if (!vm->scratch[0]) {
1755 		pr_err("No scratch page!\n");
1756 		return -EINVAL;
1757 	}
1758 
1759 	vaddr = __px_vaddr(vm->scratch[0]);
1760 
1761 	memcpy(out, vaddr, sizeof(*out));
1762 	if (memchr_inv(vaddr, *out, PAGE_SIZE)) {
1763 		pr_err("Inconsistent initial state of scratch page!\n");
1764 		err = -EINVAL;
1765 	}
1766 
1767 	return err;
1768 }
1769 
igt_vm_isolation(void * arg)1770 static int igt_vm_isolation(void *arg)
1771 {
1772 	struct drm_i915_private *i915 = arg;
1773 	struct i915_gem_context *ctx_a, *ctx_b;
1774 	struct drm_i915_gem_object *obj_a, *obj_b;
1775 	unsigned long num_engines, count;
1776 	struct intel_engine_cs *engine;
1777 	struct igt_live_test t;
1778 	I915_RND_STATE(prng);
1779 	struct file *file;
1780 	u64 vm_total;
1781 	u32 expected;
1782 	int err;
1783 
1784 	if (GRAPHICS_VER(i915) < 7)
1785 		return 0;
1786 
1787 	/*
1788 	 * The simple goal here is that a write into one context is not
1789 	 * observed in a second (separate page tables and scratch).
1790 	 */
1791 
1792 	file = mock_file(i915);
1793 	if (IS_ERR(file))
1794 		return PTR_ERR(file);
1795 
1796 	err = igt_live_test_begin(&t, i915, __func__, "");
1797 	if (err)
1798 		goto out_file;
1799 
1800 	ctx_a = live_context(i915, file);
1801 	if (IS_ERR(ctx_a)) {
1802 		err = PTR_ERR(ctx_a);
1803 		goto out_file;
1804 	}
1805 
1806 	ctx_b = live_context(i915, file);
1807 	if (IS_ERR(ctx_b)) {
1808 		err = PTR_ERR(ctx_b);
1809 		goto out_file;
1810 	}
1811 
1812 	/* We can only test vm isolation, if the vm are distinct */
1813 	if (ctx_a->vm == ctx_b->vm)
1814 		goto out_file;
1815 
1816 	/* Read the initial state of the scratch page */
1817 	err = check_scratch_page(ctx_a, &expected);
1818 	if (err)
1819 		goto out_file;
1820 
1821 	err = check_scratch_page(ctx_b, &expected);
1822 	if (err)
1823 		goto out_file;
1824 
1825 	vm_total = ctx_a->vm->total;
1826 	GEM_BUG_ON(ctx_b->vm->total != vm_total);
1827 
1828 	obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE);
1829 	if (IS_ERR(obj_a)) {
1830 		err = PTR_ERR(obj_a);
1831 		goto out_file;
1832 	}
1833 
1834 	obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE);
1835 	if (IS_ERR(obj_b)) {
1836 		err = PTR_ERR(obj_b);
1837 		goto put_a;
1838 	}
1839 
1840 	count = 0;
1841 	num_engines = 0;
1842 	for_each_uabi_engine(engine, i915) {
1843 		IGT_TIMEOUT(end_time);
1844 		unsigned long this = 0;
1845 
1846 		if (!intel_engine_can_store_dword(engine))
1847 			continue;
1848 
1849 		/* Not all engines have their own GPR! */
1850 		if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS)
1851 			continue;
1852 
1853 		while (!__igt_timeout(end_time, NULL)) {
1854 			u32 value = 0xc5c5c5c5;
1855 			u64 offset;
1856 
1857 			/* Leave enough space at offset 0 for the batch */
1858 			offset = igt_random_offset(&prng,
1859 						   I915_GTT_PAGE_SIZE, vm_total,
1860 						   sizeof(u32), alignof_dword);
1861 
1862 			err = write_to_scratch(ctx_a, engine, obj_a,
1863 					       offset, 0xdeadbeef);
1864 			if (err == 0)
1865 				err = read_from_scratch(ctx_b, engine, obj_b,
1866 							offset, &value);
1867 			if (err)
1868 				goto put_b;
1869 
1870 			if (value != expected) {
1871 				pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
1872 				       engine->name, value,
1873 				       upper_32_bits(offset),
1874 				       lower_32_bits(offset),
1875 				       this);
1876 				err = -EINVAL;
1877 				goto put_b;
1878 			}
1879 
1880 			this++;
1881 		}
1882 		count += this;
1883 		num_engines++;
1884 	}
1885 	pr_info("Checked %lu scratch offsets across %lu engines\n",
1886 		count, num_engines);
1887 
1888 put_b:
1889 	i915_gem_object_put(obj_b);
1890 put_a:
1891 	i915_gem_object_put(obj_a);
1892 out_file:
1893 	if (igt_live_test_end(&t))
1894 		err = -EIO;
1895 	fput(file);
1896 	return err;
1897 }
1898 
i915_gem_context_live_selftests(struct drm_i915_private * i915)1899 int i915_gem_context_live_selftests(struct drm_i915_private *i915)
1900 {
1901 	static const struct i915_subtest tests[] = {
1902 		SUBTEST(live_nop_switch),
1903 		SUBTEST(live_parallel_switch),
1904 		SUBTEST(igt_ctx_exec),
1905 		SUBTEST(igt_ctx_readonly),
1906 		SUBTEST(igt_ctx_sseu),
1907 		SUBTEST(igt_shared_ctx_exec),
1908 		SUBTEST(igt_vm_isolation),
1909 	};
1910 
1911 	if (intel_gt_is_wedged(to_gt(i915)))
1912 		return 0;
1913 
1914 	return i915_live_subtests(tests, i915);
1915 }
1916