xref: /linux/drivers/gpu/drm/i915/gt/selftest_migrate.c (revision 0b364cf53b20204e92bac7c6ebd1ee7d3ec62931)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5 
6 #include <linux/sort.h>
7 
8 #include "gem/i915_gem_internal.h"
9 #include "gem/i915_gem_lmem.h"
10 
11 #include "selftests/igt_spinner.h"
12 #include "selftests/i915_random.h"
13 
14 static const unsigned int sizes[] = {
15 	SZ_4K,
16 	SZ_64K,
17 	SZ_2M,
18 	CHUNK_SZ - SZ_4K,
19 	CHUNK_SZ,
20 	CHUNK_SZ + SZ_4K,
21 	SZ_64M,
22 };
23 
24 static struct drm_i915_gem_object *
25 create_lmem_or_internal(struct drm_i915_private *i915, size_t size)
26 {
27 	struct drm_i915_gem_object *obj;
28 
29 	obj = i915_gem_object_create_lmem(i915, size, 0);
30 	if (!IS_ERR(obj))
31 		return obj;
32 
33 	return i915_gem_object_create_internal(i915, size);
34 }
35 
36 static int copy(struct intel_migrate *migrate,
37 		int (*fn)(struct intel_migrate *migrate,
38 			  struct i915_gem_ww_ctx *ww,
39 			  struct drm_i915_gem_object *src,
40 			  struct drm_i915_gem_object *dst,
41 			  struct i915_request **out),
42 		u32 sz, struct rnd_state *prng)
43 {
44 	struct drm_i915_private *i915 = migrate->context->engine->i915;
45 	struct drm_i915_gem_object *src, *dst;
46 	struct i915_request *rq;
47 	struct i915_gem_ww_ctx ww;
48 	u32 *vaddr;
49 	int err = 0;
50 	int i;
51 
52 	src = create_lmem_or_internal(i915, sz);
53 	if (IS_ERR(src))
54 		return 0;
55 
56 	sz = src->base.size;
57 	dst = i915_gem_object_create_internal(i915, sz);
58 	if (IS_ERR(dst))
59 		goto err_free_src;
60 
61 	for_i915_gem_ww(&ww, err, true) {
62 		err = i915_gem_object_lock(src, &ww);
63 		if (err)
64 			continue;
65 
66 		err = i915_gem_object_lock(dst, &ww);
67 		if (err)
68 			continue;
69 
70 		vaddr = i915_gem_object_pin_map(src, I915_MAP_WC);
71 		if (IS_ERR(vaddr)) {
72 			err = PTR_ERR(vaddr);
73 			continue;
74 		}
75 
76 		for (i = 0; i < sz / sizeof(u32); i++)
77 			vaddr[i] = i;
78 		i915_gem_object_flush_map(src);
79 
80 		vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC);
81 		if (IS_ERR(vaddr)) {
82 			err = PTR_ERR(vaddr);
83 			goto unpin_src;
84 		}
85 
86 		for (i = 0; i < sz / sizeof(u32); i++)
87 			vaddr[i] = ~i;
88 		i915_gem_object_flush_map(dst);
89 
90 		err = fn(migrate, &ww, src, dst, &rq);
91 		if (!err)
92 			continue;
93 
94 		if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
95 			pr_err("%ps failed, size: %u\n", fn, sz);
96 		if (rq) {
97 			i915_request_wait(rq, 0, HZ);
98 			i915_request_put(rq);
99 		}
100 		i915_gem_object_unpin_map(dst);
101 unpin_src:
102 		i915_gem_object_unpin_map(src);
103 	}
104 	if (err)
105 		goto err_out;
106 
107 	if (rq) {
108 		if (i915_request_wait(rq, 0, HZ) < 0) {
109 			pr_err("%ps timed out, size: %u\n", fn, sz);
110 			err = -ETIME;
111 		}
112 		i915_request_put(rq);
113 	}
114 
115 	for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
116 		int x = i * 1024 + i915_prandom_u32_max_state(1024, prng);
117 
118 		if (vaddr[x] != x) {
119 			pr_err("%ps failed, size: %u, offset: %zu\n",
120 			       fn, sz, x * sizeof(u32));
121 			igt_hexdump(vaddr + i * 1024, 4096);
122 			err = -EINVAL;
123 		}
124 	}
125 
126 	i915_gem_object_unpin_map(dst);
127 	i915_gem_object_unpin_map(src);
128 
129 err_out:
130 	i915_gem_object_put(dst);
131 err_free_src:
132 	i915_gem_object_put(src);
133 
134 	return err;
135 }
136 
137 static int intel_context_copy_ccs(struct intel_context *ce,
138 				  const struct i915_deps *deps,
139 				  struct scatterlist *sg,
140 				  unsigned int pat_index,
141 				  bool write_to_ccs,
142 				  struct i915_request **out)
143 {
144 	u8 src_access = write_to_ccs ? DIRECT_ACCESS : INDIRECT_ACCESS;
145 	u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : DIRECT_ACCESS;
146 	struct sgt_dma it = sg_sgt(sg);
147 	struct i915_request *rq;
148 	u32 offset;
149 	int err;
150 
151 	GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
152 	*out = NULL;
153 
154 	GEM_BUG_ON(ce->ring->size < SZ_64K);
155 
156 	offset = 0;
157 	if (HAS_64K_PAGES(ce->engine->i915))
158 		offset = CHUNK_SZ;
159 
160 	do {
161 		int len;
162 
163 		rq = i915_request_create(ce);
164 		if (IS_ERR(rq)) {
165 			err = PTR_ERR(rq);
166 			goto out_ce;
167 		}
168 
169 		if (deps) {
170 			err = i915_request_await_deps(rq, deps);
171 			if (err)
172 				goto out_rq;
173 
174 			if (rq->engine->emit_init_breadcrumb) {
175 				err = rq->engine->emit_init_breadcrumb(rq);
176 				if (err)
177 					goto out_rq;
178 			}
179 
180 			deps = NULL;
181 		}
182 
183 		/* The PTE updates + clear must not be interrupted. */
184 		err = emit_no_arbitration(rq);
185 		if (err)
186 			goto out_rq;
187 
188 		len = emit_pte(rq, &it, pat_index, true, offset, CHUNK_SZ);
189 		if (len <= 0) {
190 			err = len;
191 			goto out_rq;
192 		}
193 
194 		err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
195 		if (err)
196 			goto out_rq;
197 
198 		err = emit_copy_ccs(rq, offset, dst_access,
199 				    offset, src_access, len);
200 		if (err)
201 			goto out_rq;
202 
203 		err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
204 
205 		/* Arbitration is re-enabled between requests. */
206 out_rq:
207 		if (*out)
208 			i915_request_put(*out);
209 		*out = i915_request_get(rq);
210 		i915_request_add(rq);
211 		if (err || !it.sg || !sg_dma_len(it.sg))
212 			break;
213 
214 		cond_resched();
215 	} while (1);
216 
217 out_ce:
218 	return err;
219 }
220 
221 static int
222 intel_migrate_ccs_copy(struct intel_migrate *m,
223 		       struct i915_gem_ww_ctx *ww,
224 		       const struct i915_deps *deps,
225 		       struct scatterlist *sg,
226 		       unsigned int pat_index,
227 		       bool write_to_ccs,
228 		       struct i915_request **out)
229 {
230 	struct intel_context *ce;
231 	int err;
232 
233 	*out = NULL;
234 	if (!m->context)
235 		return -ENODEV;
236 
237 	ce = intel_migrate_create_context(m);
238 	if (IS_ERR(ce))
239 		ce = intel_context_get(m->context);
240 	GEM_BUG_ON(IS_ERR(ce));
241 
242 	err = intel_context_pin_ww(ce, ww);
243 	if (err)
244 		goto out;
245 
246 	err = intel_context_copy_ccs(ce, deps, sg, pat_index,
247 				     write_to_ccs, out);
248 
249 	intel_context_unpin(ce);
250 out:
251 	intel_context_put(ce);
252 	return err;
253 }
254 
255 static int clear(struct intel_migrate *migrate,
256 		 int (*fn)(struct intel_migrate *migrate,
257 			   struct i915_gem_ww_ctx *ww,
258 			   struct drm_i915_gem_object *obj,
259 			   u32 value,
260 			   struct i915_request **out),
261 		 u32 sz, struct rnd_state *prng)
262 {
263 	struct drm_i915_private *i915 = migrate->context->engine->i915;
264 	struct drm_i915_gem_object *obj;
265 	struct i915_request *rq;
266 	struct i915_gem_ww_ctx ww;
267 	u32 *vaddr, val = 0;
268 	bool ccs_cap = false;
269 	int err = 0;
270 	int i;
271 
272 	obj = create_lmem_or_internal(i915, sz);
273 	if (IS_ERR(obj))
274 		return 0;
275 
276 	/* Consider the rounded up memory too */
277 	sz = obj->base.size;
278 
279 	if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj))
280 		ccs_cap = true;
281 
282 	for_i915_gem_ww(&ww, err, true) {
283 		int ccs_bytes, ccs_bytes_per_chunk;
284 
285 		err = i915_gem_object_lock(obj, &ww);
286 		if (err)
287 			continue;
288 
289 		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
290 		if (IS_ERR(vaddr)) {
291 			err = PTR_ERR(vaddr);
292 			continue;
293 		}
294 
295 		for (i = 0; i < sz / sizeof(u32); i++)
296 			vaddr[i] = ~i;
297 		i915_gem_object_flush_map(obj);
298 
299 		if (ccs_cap && !val) {
300 			/* Write the obj data into ccs surface */
301 			err = intel_migrate_ccs_copy(migrate, &ww, NULL,
302 						     obj->mm.pages->sgl,
303 						     obj->pat_index,
304 						     true, &rq);
305 			if (rq && !err) {
306 				if (i915_request_wait(rq, 0, HZ) < 0) {
307 					pr_err("%ps timed out, size: %u\n",
308 					       fn, sz);
309 					err = -ETIME;
310 				}
311 				i915_request_put(rq);
312 				rq = NULL;
313 			}
314 			if (err)
315 				continue;
316 		}
317 
318 		err = fn(migrate, &ww, obj, val, &rq);
319 		if (rq && !err) {
320 			if (i915_request_wait(rq, 0, HZ) < 0) {
321 				pr_err("%ps timed out, size: %u\n", fn, sz);
322 				err = -ETIME;
323 			}
324 			i915_request_put(rq);
325 			rq = NULL;
326 		}
327 		if (err)
328 			continue;
329 
330 		i915_gem_object_flush_map(obj);
331 
332 		/* Verify the set/clear of the obj mem */
333 		for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
334 			int x = i * 1024 +
335 				i915_prandom_u32_max_state(1024, prng);
336 
337 			if (vaddr[x] != val) {
338 				pr_err("%ps failed, (%u != %u), offset: %zu\n",
339 				       fn, vaddr[x], val, x * sizeof(u32));
340 				igt_hexdump(vaddr + i * 1024, 4096);
341 				err = -EINVAL;
342 			}
343 		}
344 		if (err)
345 			continue;
346 
347 		if (ccs_cap && !val) {
348 			for (i = 0; i < sz / sizeof(u32); i++)
349 				vaddr[i] = ~i;
350 			i915_gem_object_flush_map(obj);
351 
352 			err = intel_migrate_ccs_copy(migrate, &ww, NULL,
353 						     obj->mm.pages->sgl,
354 						     obj->pat_index,
355 						     false, &rq);
356 			if (rq && !err) {
357 				if (i915_request_wait(rq, 0, HZ) < 0) {
358 					pr_err("%ps timed out, size: %u\n",
359 					       fn, sz);
360 					err = -ETIME;
361 				}
362 				i915_request_put(rq);
363 				rq = NULL;
364 			}
365 			if (err)
366 				continue;
367 
368 			ccs_bytes = GET_CCS_BYTES(i915, sz);
369 			ccs_bytes_per_chunk = GET_CCS_BYTES(i915, CHUNK_SZ);
370 			i915_gem_object_flush_map(obj);
371 
372 			for (i = 0; !err && i < DIV_ROUND_UP(ccs_bytes, PAGE_SIZE); i++) {
373 				int offset = ((i * PAGE_SIZE)  /
374 					ccs_bytes_per_chunk) * CHUNK_SZ / sizeof(u32);
375 				int ccs_bytes_left = (ccs_bytes - i * PAGE_SIZE) / sizeof(u32);
376 				int x = i915_prandom_u32_max_state(min_t(int, 1024,
377 									 ccs_bytes_left), prng);
378 
379 				if (vaddr[offset + x]) {
380 					pr_err("%ps ccs clearing failed, offset: %ld/%d\n",
381 					       fn, i * PAGE_SIZE + x * sizeof(u32), ccs_bytes);
382 					igt_hexdump(vaddr + offset,
383 						    min_t(int, 4096,
384 							  ccs_bytes_left * sizeof(u32)));
385 					err = -EINVAL;
386 				}
387 			}
388 
389 			if (err)
390 				continue;
391 		}
392 		i915_gem_object_unpin_map(obj);
393 	}
394 
395 	if (err) {
396 		if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
397 			pr_err("%ps failed, size: %u\n", fn, sz);
398 		if (rq && err != -EINVAL) {
399 			i915_request_wait(rq, 0, HZ);
400 			i915_request_put(rq);
401 		}
402 
403 		i915_gem_object_unpin_map(obj);
404 	}
405 
406 	i915_gem_object_put(obj);
407 	return err;
408 }
409 
410 static int __migrate_copy(struct intel_migrate *migrate,
411 			  struct i915_gem_ww_ctx *ww,
412 			  struct drm_i915_gem_object *src,
413 			  struct drm_i915_gem_object *dst,
414 			  struct i915_request **out)
415 {
416 	return intel_migrate_copy(migrate, ww, NULL,
417 				  src->mm.pages->sgl, src->pat_index,
418 				  i915_gem_object_is_lmem(src),
419 				  dst->mm.pages->sgl, dst->pat_index,
420 				  i915_gem_object_is_lmem(dst),
421 				  out);
422 }
423 
424 static int __global_copy(struct intel_migrate *migrate,
425 			 struct i915_gem_ww_ctx *ww,
426 			 struct drm_i915_gem_object *src,
427 			 struct drm_i915_gem_object *dst,
428 			 struct i915_request **out)
429 {
430 	return intel_context_migrate_copy(migrate->context, NULL,
431 					  src->mm.pages->sgl, src->pat_index,
432 					  i915_gem_object_is_lmem(src),
433 					  dst->mm.pages->sgl, dst->pat_index,
434 					  i915_gem_object_is_lmem(dst),
435 					  out);
436 }
437 
438 static int
439 migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
440 {
441 	return copy(migrate, __migrate_copy, sz, prng);
442 }
443 
444 static int
445 global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
446 {
447 	return copy(migrate, __global_copy, sz, prng);
448 }
449 
450 static int __migrate_clear(struct intel_migrate *migrate,
451 			   struct i915_gem_ww_ctx *ww,
452 			   struct drm_i915_gem_object *obj,
453 			   u32 value,
454 			   struct i915_request **out)
455 {
456 	return intel_migrate_clear(migrate, ww, NULL,
457 				   obj->mm.pages->sgl,
458 				   obj->pat_index,
459 				   i915_gem_object_is_lmem(obj),
460 				   value, out);
461 }
462 
463 static int __global_clear(struct intel_migrate *migrate,
464 			  struct i915_gem_ww_ctx *ww,
465 			  struct drm_i915_gem_object *obj,
466 			  u32 value,
467 			  struct i915_request **out)
468 {
469 	return intel_context_migrate_clear(migrate->context, NULL,
470 					   obj->mm.pages->sgl,
471 					   obj->pat_index,
472 					   i915_gem_object_is_lmem(obj),
473 					   value, out);
474 }
475 
476 static int
477 migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
478 {
479 	return clear(migrate, __migrate_clear, sz, prng);
480 }
481 
482 static int
483 global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
484 {
485 	return clear(migrate, __global_clear, sz, prng);
486 }
487 
488 static int live_migrate_copy(void *arg)
489 {
490 	struct intel_gt *gt = arg;
491 	struct intel_migrate *migrate = &gt->migrate;
492 	struct drm_i915_private *i915 = migrate->context->engine->i915;
493 	I915_RND_STATE(prng);
494 	int i;
495 
496 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
497 		int err;
498 
499 		err = migrate_copy(migrate, sizes[i], &prng);
500 		if (err == 0)
501 			err = global_copy(migrate, sizes[i], &prng);
502 		i915_gem_drain_freed_objects(i915);
503 		if (err)
504 			return err;
505 	}
506 
507 	return 0;
508 }
509 
510 static int live_migrate_clear(void *arg)
511 {
512 	struct intel_gt *gt = arg;
513 	struct intel_migrate *migrate = &gt->migrate;
514 	struct drm_i915_private *i915 = migrate->context->engine->i915;
515 	I915_RND_STATE(prng);
516 	int i;
517 
518 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
519 		int err;
520 
521 		err = migrate_clear(migrate, sizes[i], &prng);
522 		if (err == 0)
523 			err = global_clear(migrate, sizes[i], &prng);
524 
525 		i915_gem_drain_freed_objects(i915);
526 		if (err)
527 			return err;
528 	}
529 
530 	return 0;
531 }
532 
533 struct spinner_timer {
534 	struct timer_list timer;
535 	struct igt_spinner spin;
536 };
537 
538 static void spinner_kill(struct timer_list *timer)
539 {
540 	struct spinner_timer *st = from_timer(st, timer, timer);
541 
542 	igt_spinner_end(&st->spin);
543 	pr_info("%s\n", __func__);
544 }
545 
546 static int live_emit_pte_full_ring(void *arg)
547 {
548 	struct intel_gt *gt = arg;
549 	struct intel_migrate *migrate = &gt->migrate;
550 	struct drm_i915_private *i915 = migrate->context->engine->i915;
551 	struct drm_i915_gem_object *obj;
552 	struct intel_context *ce;
553 	struct i915_request *rq, *prev;
554 	struct spinner_timer st;
555 	struct sgt_dma it;
556 	int len, sz, err;
557 	u32 *cs;
558 
559 	/*
560 	 * Simple regression test to check that we don't trample the
561 	 * rq->reserved_space when returning from emit_pte(), if the ring is
562 	 * nearly full.
563 	 */
564 
565 	if (igt_spinner_init(&st.spin, to_gt(i915)))
566 		return -ENOMEM;
567 
568 	obj = i915_gem_object_create_internal(i915, 2 * PAGE_SIZE);
569 	if (IS_ERR(obj)) {
570 		err = PTR_ERR(obj);
571 		goto out_spinner;
572 	}
573 
574 	err = i915_gem_object_pin_pages_unlocked(obj);
575 	if (err)
576 		goto out_obj;
577 
578 	ce = intel_migrate_create_context(migrate);
579 	if (IS_ERR(ce)) {
580 		err = PTR_ERR(ce);
581 		goto out_obj;
582 	}
583 
584 	ce->ring_size = SZ_4K; /* Not too big */
585 
586 	err = intel_context_pin(ce);
587 	if (err)
588 		goto out_put;
589 
590 	rq = igt_spinner_create_request(&st.spin, ce, MI_ARB_CHECK);
591 	if (IS_ERR(rq)) {
592 		err = PTR_ERR(rq);
593 		goto out_unpin;
594 	}
595 
596 	i915_request_add(rq);
597 	if (!igt_wait_for_spinner(&st.spin, rq)) {
598 		err = -EIO;
599 		goto out_unpin;
600 	}
601 
602 	/*
603 	 * Fill the rest of the ring leaving I915_EMIT_PTE_NUM_DWORDS +
604 	 * ring->reserved_space at the end. To actually emit the PTEs we require
605 	 * slightly more than I915_EMIT_PTE_NUM_DWORDS, since our object size is
606 	 * greater than PAGE_SIZE. The correct behaviour is to wait for more
607 	 * ring space in emit_pte(), otherwise we trample on the reserved_space
608 	 * resulting in crashes when later submitting the rq.
609 	 */
610 
611 	prev = NULL;
612 	do {
613 		if (prev)
614 			i915_request_add(rq);
615 
616 		rq = i915_request_create(ce);
617 		if (IS_ERR(rq)) {
618 			err = PTR_ERR(rq);
619 			goto out_unpin;
620 		}
621 
622 		sz = (rq->ring->space - rq->reserved_space) / sizeof(u32) -
623 			I915_EMIT_PTE_NUM_DWORDS;
624 		sz = min_t(u32, sz, (SZ_1K - rq->reserved_space) / sizeof(u32) -
625 			   I915_EMIT_PTE_NUM_DWORDS);
626 		cs = intel_ring_begin(rq, sz);
627 		if (IS_ERR(cs)) {
628 			err = PTR_ERR(cs);
629 			goto out_rq;
630 		}
631 
632 		memset32(cs, MI_NOOP, sz);
633 		cs += sz;
634 		intel_ring_advance(rq, cs);
635 
636 		pr_info("%s emit=%u sz=%d\n", __func__, rq->ring->emit, sz);
637 
638 		prev = rq;
639 	} while (rq->ring->space > (rq->reserved_space +
640 				    I915_EMIT_PTE_NUM_DWORDS * sizeof(u32)));
641 
642 	timer_setup_on_stack(&st.timer, spinner_kill, 0);
643 	mod_timer(&st.timer, jiffies + 2 * HZ);
644 
645 	/*
646 	 * This should wait for the spinner to be killed, otherwise we should go
647 	 * down in flames when doing i915_request_add().
648 	 */
649 	pr_info("%s emite_pte ring space=%u\n", __func__, rq->ring->space);
650 	it = sg_sgt(obj->mm.pages->sgl);
651 	len = emit_pte(rq, &it, obj->pat_index, false, 0, CHUNK_SZ);
652 	if (!len) {
653 		err = -EINVAL;
654 		goto out_rq;
655 	}
656 	if (len < 0) {
657 		err = len;
658 		goto out_rq;
659 	}
660 
661 out_rq:
662 	i915_request_add(rq); /* GEM_BUG_ON(rq->reserved_space > ring->space)? */
663 	del_timer_sync(&st.timer);
664 	destroy_timer_on_stack(&st.timer);
665 out_unpin:
666 	intel_context_unpin(ce);
667 out_put:
668 	intel_context_put(ce);
669 out_obj:
670 	i915_gem_object_put(obj);
671 out_spinner:
672 	igt_spinner_fini(&st.spin);
673 	return err;
674 }
675 
676 struct threaded_migrate {
677 	struct intel_migrate *migrate;
678 	struct task_struct *tsk;
679 	struct rnd_state prng;
680 };
681 
682 static int threaded_migrate(struct intel_migrate *migrate,
683 			    int (*fn)(void *arg),
684 			    unsigned int flags)
685 {
686 	const unsigned int n_cpus = num_online_cpus() + 1;
687 	struct threaded_migrate *thread;
688 	I915_RND_STATE(prng);
689 	unsigned int i;
690 	int err = 0;
691 
692 	thread = kcalloc(n_cpus, sizeof(*thread), GFP_KERNEL);
693 	if (!thread)
694 		return 0;
695 
696 	for (i = 0; i < n_cpus; ++i) {
697 		struct task_struct *tsk;
698 
699 		thread[i].migrate = migrate;
700 		thread[i].prng =
701 			I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
702 
703 		tsk = kthread_run(fn, &thread[i], "igt-%d", i);
704 		if (IS_ERR(tsk)) {
705 			err = PTR_ERR(tsk);
706 			break;
707 		}
708 
709 		get_task_struct(tsk);
710 		thread[i].tsk = tsk;
711 	}
712 
713 	msleep(10 * n_cpus); /* start all threads before we kthread_stop() */
714 
715 	for (i = 0; i < n_cpus; ++i) {
716 		struct task_struct *tsk = thread[i].tsk;
717 		int status;
718 
719 		if (IS_ERR_OR_NULL(tsk))
720 			continue;
721 
722 		status = kthread_stop_put(tsk);
723 		if (status && !err)
724 			err = status;
725 	}
726 
727 	kfree(thread);
728 	return err;
729 }
730 
731 static int __thread_migrate_copy(void *arg)
732 {
733 	struct threaded_migrate *tm = arg;
734 
735 	return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
736 }
737 
738 static int thread_migrate_copy(void *arg)
739 {
740 	struct intel_gt *gt = arg;
741 	struct intel_migrate *migrate = &gt->migrate;
742 
743 	return threaded_migrate(migrate, __thread_migrate_copy, 0);
744 }
745 
746 static int __thread_global_copy(void *arg)
747 {
748 	struct threaded_migrate *tm = arg;
749 
750 	return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
751 }
752 
753 static int thread_global_copy(void *arg)
754 {
755 	struct intel_gt *gt = arg;
756 	struct intel_migrate *migrate = &gt->migrate;
757 
758 	return threaded_migrate(migrate, __thread_global_copy, 0);
759 }
760 
761 static int __thread_migrate_clear(void *arg)
762 {
763 	struct threaded_migrate *tm = arg;
764 
765 	return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
766 }
767 
768 static int __thread_global_clear(void *arg)
769 {
770 	struct threaded_migrate *tm = arg;
771 
772 	return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
773 }
774 
775 static int thread_migrate_clear(void *arg)
776 {
777 	struct intel_gt *gt = arg;
778 	struct intel_migrate *migrate = &gt->migrate;
779 
780 	return threaded_migrate(migrate, __thread_migrate_clear, 0);
781 }
782 
783 static int thread_global_clear(void *arg)
784 {
785 	struct intel_gt *gt = arg;
786 	struct intel_migrate *migrate = &gt->migrate;
787 
788 	return threaded_migrate(migrate, __thread_global_clear, 0);
789 }
790 
791 int intel_migrate_live_selftests(struct drm_i915_private *i915)
792 {
793 	static const struct i915_subtest tests[] = {
794 		SUBTEST(live_migrate_copy),
795 		SUBTEST(live_migrate_clear),
796 		SUBTEST(live_emit_pte_full_ring),
797 		SUBTEST(thread_migrate_copy),
798 		SUBTEST(thread_migrate_clear),
799 		SUBTEST(thread_global_copy),
800 		SUBTEST(thread_global_clear),
801 	};
802 	struct intel_gt *gt = to_gt(i915);
803 
804 	if (!gt->migrate.context)
805 		return 0;
806 
807 	return intel_gt_live_subtests(tests, gt);
808 }
809 
810 static struct drm_i915_gem_object *
811 create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem)
812 {
813 	struct drm_i915_gem_object *obj = NULL;
814 	int err;
815 
816 	if (try_lmem)
817 		obj = i915_gem_object_create_lmem(gt->i915, sz, 0);
818 
819 	if (IS_ERR_OR_NULL(obj)) {
820 		obj = i915_gem_object_create_internal(gt->i915, sz);
821 		if (IS_ERR(obj))
822 			return obj;
823 	}
824 
825 	i915_gem_object_trylock(obj, NULL);
826 	err = i915_gem_object_pin_pages(obj);
827 	if (err) {
828 		i915_gem_object_unlock(obj);
829 		i915_gem_object_put(obj);
830 		return ERR_PTR(err);
831 	}
832 
833 	return obj;
834 }
835 
836 static int wrap_ktime_compare(const void *A, const void *B)
837 {
838 	const ktime_t *a = A, *b = B;
839 
840 	return ktime_compare(*a, *b);
841 }
842 
843 static int __perf_clear_blt(struct intel_context *ce,
844 			    struct scatterlist *sg,
845 			    unsigned int pat_index,
846 			    bool is_lmem,
847 			    size_t sz)
848 {
849 	ktime_t t[5];
850 	int pass;
851 	int err = 0;
852 
853 	for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
854 		struct i915_request *rq;
855 		ktime_t t0, t1;
856 
857 		t0 = ktime_get();
858 
859 		err = intel_context_migrate_clear(ce, NULL, sg, pat_index,
860 						  is_lmem, 0, &rq);
861 		if (rq) {
862 			if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
863 				err = -EIO;
864 			i915_request_put(rq);
865 		}
866 		if (err)
867 			break;
868 
869 		t1 = ktime_get();
870 		t[pass] = ktime_sub(t1, t0);
871 	}
872 	if (err)
873 		return err;
874 
875 	sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
876 	pr_info("%s: %zd KiB fill: %lld MiB/s\n",
877 		ce->engine->name, sz >> 10,
878 		div64_u64(mul_u32_u32(4 * sz,
879 				      1000 * 1000 * 1000),
880 			  t[1] + 2 * t[2] + t[3]) >> 20);
881 	return 0;
882 }
883 
884 static int perf_clear_blt(void *arg)
885 {
886 	struct intel_gt *gt = arg;
887 	static const unsigned long sizes[] = {
888 		SZ_4K,
889 		SZ_64K,
890 		SZ_2M,
891 		SZ_64M
892 	};
893 	int i;
894 
895 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
896 		struct drm_i915_gem_object *dst;
897 		int err;
898 
899 		dst = create_init_lmem_internal(gt, sizes[i], true);
900 		if (IS_ERR(dst))
901 			return PTR_ERR(dst);
902 
903 		err = __perf_clear_blt(gt->migrate.context,
904 				       dst->mm.pages->sgl,
905 				       i915_gem_get_pat_index(gt->i915,
906 							      I915_CACHE_NONE),
907 				       i915_gem_object_is_lmem(dst),
908 				       sizes[i]);
909 
910 		i915_gem_object_unlock(dst);
911 		i915_gem_object_put(dst);
912 		if (err)
913 			return err;
914 	}
915 
916 	return 0;
917 }
918 
919 static int __perf_copy_blt(struct intel_context *ce,
920 			   struct scatterlist *src,
921 			   unsigned int src_pat_index,
922 			   bool src_is_lmem,
923 			   struct scatterlist *dst,
924 			   unsigned int dst_pat_index,
925 			   bool dst_is_lmem,
926 			   size_t sz)
927 {
928 	ktime_t t[5];
929 	int pass;
930 	int err = 0;
931 
932 	for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
933 		struct i915_request *rq;
934 		ktime_t t0, t1;
935 
936 		t0 = ktime_get();
937 
938 		err = intel_context_migrate_copy(ce, NULL,
939 						 src, src_pat_index,
940 						 src_is_lmem,
941 						 dst, dst_pat_index,
942 						 dst_is_lmem,
943 						 &rq);
944 		if (rq) {
945 			if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
946 				err = -EIO;
947 			i915_request_put(rq);
948 		}
949 		if (err)
950 			break;
951 
952 		t1 = ktime_get();
953 		t[pass] = ktime_sub(t1, t0);
954 	}
955 	if (err)
956 		return err;
957 
958 	sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
959 	pr_info("%s: %zd KiB copy: %lld MiB/s\n",
960 		ce->engine->name, sz >> 10,
961 		div64_u64(mul_u32_u32(4 * sz,
962 				      1000 * 1000 * 1000),
963 			  t[1] + 2 * t[2] + t[3]) >> 20);
964 	return 0;
965 }
966 
967 static int perf_copy_blt(void *arg)
968 {
969 	struct intel_gt *gt = arg;
970 	static const unsigned long sizes[] = {
971 		SZ_4K,
972 		SZ_64K,
973 		SZ_2M,
974 		SZ_64M
975 	};
976 	int i;
977 
978 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
979 		struct drm_i915_gem_object *src, *dst;
980 		size_t sz;
981 		int err;
982 
983 		src = create_init_lmem_internal(gt, sizes[i], true);
984 		if (IS_ERR(src))
985 			return PTR_ERR(src);
986 
987 		sz = src->base.size;
988 		dst = create_init_lmem_internal(gt, sz, false);
989 		if (IS_ERR(dst)) {
990 			err = PTR_ERR(dst);
991 			goto err_src;
992 		}
993 
994 		err = __perf_copy_blt(gt->migrate.context,
995 				      src->mm.pages->sgl,
996 				      i915_gem_get_pat_index(gt->i915,
997 							     I915_CACHE_NONE),
998 				      i915_gem_object_is_lmem(src),
999 				      dst->mm.pages->sgl,
1000 				      i915_gem_get_pat_index(gt->i915,
1001 							     I915_CACHE_NONE),
1002 				      i915_gem_object_is_lmem(dst),
1003 				      sz);
1004 
1005 		i915_gem_object_unlock(dst);
1006 		i915_gem_object_put(dst);
1007 err_src:
1008 		i915_gem_object_unlock(src);
1009 		i915_gem_object_put(src);
1010 		if (err)
1011 			return err;
1012 	}
1013 
1014 	return 0;
1015 }
1016 
1017 int intel_migrate_perf_selftests(struct drm_i915_private *i915)
1018 {
1019 	static const struct i915_subtest tests[] = {
1020 		SUBTEST(perf_clear_blt),
1021 		SUBTEST(perf_copy_blt),
1022 	};
1023 	struct intel_gt *gt = to_gt(i915);
1024 
1025 	if (intel_gt_is_wedged(gt))
1026 		return 0;
1027 
1028 	if (!gt->migrate.context)
1029 		return 0;
1030 
1031 	return intel_gt_live_subtests(tests, gt);
1032 }
1033