xref: /linux/drivers/gpu/drm/xe/tests/xe_bo.c (revision 79d2e1919a2728ef49d938eb20ebd5903c14dfb0)
1 // SPDX-License-Identifier: GPL-2.0 AND MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include <kunit/test.h>
7 #include <kunit/visibility.h>
8 
9 #include <linux/iosys-map.h>
10 #include <linux/math64.h>
11 #include <linux/prandom.h>
12 #include <linux/swap.h>
13 
14 #include <uapi/linux/sysinfo.h>
15 
16 #include "tests/xe_kunit_helpers.h"
17 #include "tests/xe_pci_test.h"
18 #include "tests/xe_test.h"
19 
20 #include "xe_bo_evict.h"
21 #include "xe_pci.h"
22 #include "xe_pm.h"
23 
24 static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
25 			    bool clear, u64 get_val, u64 assign_val,
26 			    struct kunit *test)
27 {
28 	struct dma_fence *fence;
29 	struct ttm_tt *ttm;
30 	struct page *page;
31 	pgoff_t ccs_page;
32 	long timeout;
33 	u64 *cpu_map;
34 	int ret;
35 	u32 offset;
36 
37 	/* Move bo to VRAM if not already there. */
38 	ret = xe_bo_validate(bo, NULL, false);
39 	if (ret) {
40 		KUNIT_FAIL(test, "Failed to validate bo.\n");
41 		return ret;
42 	}
43 
44 	/* Optionally clear bo *and* CCS data in VRAM. */
45 	if (clear) {
46 		fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource,
47 					 XE_MIGRATE_CLEAR_FLAG_FULL);
48 		if (IS_ERR(fence)) {
49 			KUNIT_FAIL(test, "Failed to submit bo clear.\n");
50 			return PTR_ERR(fence);
51 		}
52 		dma_fence_put(fence);
53 	}
54 
55 	/* Evict to system. CCS data should be copied. */
56 	ret = xe_bo_evict(bo, true);
57 	if (ret) {
58 		KUNIT_FAIL(test, "Failed to evict bo.\n");
59 		return ret;
60 	}
61 
62 	/* Sync all migration blits */
63 	timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
64 					DMA_RESV_USAGE_KERNEL,
65 					true,
66 					5 * HZ);
67 	if (timeout <= 0) {
68 		KUNIT_FAIL(test, "Failed to sync bo eviction.\n");
69 		return -ETIME;
70 	}
71 
72 	/*
73 	 * Bo with CCS data is now in system memory. Verify backing store
74 	 * and data integrity. Then assign for the next testing round while
75 	 * we still have a CPU map.
76 	 */
77 	ttm = bo->ttm.ttm;
78 	if (!ttm || !ttm_tt_is_populated(ttm)) {
79 		KUNIT_FAIL(test, "Bo was not in expected placement.\n");
80 		return -EINVAL;
81 	}
82 
83 	ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT;
84 	if (ccs_page >= ttm->num_pages) {
85 		KUNIT_FAIL(test, "No TTM CCS pages present.\n");
86 		return -EINVAL;
87 	}
88 
89 	page = ttm->pages[ccs_page];
90 	cpu_map = kmap_local_page(page);
91 
92 	/* Check first CCS value */
93 	if (cpu_map[0] != get_val) {
94 		KUNIT_FAIL(test,
95 			   "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
96 			   (unsigned long long)get_val,
97 			   (unsigned long long)cpu_map[0]);
98 		ret = -EINVAL;
99 	}
100 
101 	/* Check last CCS value, or at least last value in page. */
102 	offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size);
103 	offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1;
104 	if (cpu_map[offset] != get_val) {
105 		KUNIT_FAIL(test,
106 			   "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
107 			   (unsigned long long)get_val,
108 			   (unsigned long long)cpu_map[offset]);
109 		ret = -EINVAL;
110 	}
111 
112 	cpu_map[0] = assign_val;
113 	cpu_map[offset] = assign_val;
114 	kunmap_local(cpu_map);
115 
116 	return ret;
117 }
118 
119 static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
120 			      struct kunit *test)
121 {
122 	struct xe_bo *bo;
123 
124 	int ret;
125 
126 	/* TODO: Sanity check */
127 	unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
128 
129 	if (IS_DGFX(xe))
130 		kunit_info(test, "Testing vram id %u\n", tile->id);
131 	else
132 		kunit_info(test, "Testing system memory\n");
133 
134 	bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
135 			       bo_flags);
136 	if (IS_ERR(bo)) {
137 		KUNIT_FAIL(test, "Failed to create bo.\n");
138 		return;
139 	}
140 
141 	xe_bo_lock(bo, false);
142 
143 	kunit_info(test, "Verifying that CCS data is cleared on creation.\n");
144 	ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL,
145 			       test);
146 	if (ret)
147 		goto out_unlock;
148 
149 	kunit_info(test, "Verifying that CCS data survives migration.\n");
150 	ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL,
151 			       0xdeadbeefdeadbeefULL, test);
152 	if (ret)
153 		goto out_unlock;
154 
155 	kunit_info(test, "Verifying that CCS data can be properly cleared.\n");
156 	ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test);
157 
158 out_unlock:
159 	xe_bo_unlock(bo);
160 	xe_bo_put(bo);
161 }
162 
163 static int ccs_test_run_device(struct xe_device *xe)
164 {
165 	struct kunit *test = kunit_get_current_test();
166 	struct xe_tile *tile;
167 	int id;
168 
169 	if (!xe_device_has_flat_ccs(xe)) {
170 		kunit_skip(test, "non-flat-ccs device\n");
171 		return 0;
172 	}
173 
174 	/* For xe2+ dgfx, we don't handle ccs metadata */
175 	if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe)) {
176 		kunit_skip(test, "xe2+ dgfx device\n");
177 		return 0;
178 	}
179 
180 	xe_pm_runtime_get(xe);
181 
182 	for_each_tile(tile, xe, id) {
183 		/* For igfx run only for primary tile */
184 		if (!IS_DGFX(xe) && id > 0)
185 			continue;
186 		ccs_test_run_tile(xe, tile, test);
187 	}
188 
189 	xe_pm_runtime_put(xe);
190 
191 	return 0;
192 }
193 
194 static void xe_ccs_migrate_kunit(struct kunit *test)
195 {
196 	struct xe_device *xe = test->priv;
197 
198 	ccs_test_run_device(xe);
199 }
200 
201 static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test)
202 {
203 	struct xe_bo *bo, *external;
204 	unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
205 	struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate);
206 	struct xe_gt *__gt;
207 	int err, i, id;
208 
209 	kunit_info(test, "Testing device %s vram id %u\n",
210 		   dev_name(xe->drm.dev), tile->id);
211 
212 	for (i = 0; i < 2; ++i) {
213 		xe_vm_lock(vm, false);
214 		bo = xe_bo_create_user(xe, NULL, vm, 0x10000,
215 				       DRM_XE_GEM_CPU_CACHING_WC,
216 				       bo_flags);
217 		xe_vm_unlock(vm);
218 		if (IS_ERR(bo)) {
219 			KUNIT_FAIL(test, "bo create err=%pe\n", bo);
220 			break;
221 		}
222 
223 		external = xe_bo_create_user(xe, NULL, NULL, 0x10000,
224 					     DRM_XE_GEM_CPU_CACHING_WC,
225 					     bo_flags);
226 		if (IS_ERR(external)) {
227 			KUNIT_FAIL(test, "external bo create err=%pe\n", external);
228 			goto cleanup_bo;
229 		}
230 
231 		xe_bo_lock(external, false);
232 		err = xe_bo_pin_external(external);
233 		xe_bo_unlock(external);
234 		if (err) {
235 			KUNIT_FAIL(test, "external bo pin err=%pe\n",
236 				   ERR_PTR(err));
237 			goto cleanup_external;
238 		}
239 
240 		err = xe_bo_evict_all(xe);
241 		if (err) {
242 			KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err));
243 			goto cleanup_all;
244 		}
245 
246 		for_each_gt(__gt, xe, id)
247 			xe_gt_sanitize(__gt);
248 		err = xe_bo_restore_kernel(xe);
249 		/*
250 		 * Snapshotting the CTB and copying back a potentially old
251 		 * version seems risky, depending on what might have been
252 		 * inflight. Also it seems snapshotting the ADS object and
253 		 * copying back results in serious breakage. Normally when
254 		 * calling xe_bo_restore_kernel() we always fully restart the
255 		 * GT, which re-intializes such things.  We could potentially
256 		 * skip saving and restoring such objects in xe_bo_evict_all()
257 		 * however seems quite fragile not to also restart the GT. Try
258 		 * to do that here by triggering a GT reset.
259 		 */
260 		for_each_gt(__gt, xe, id) {
261 			xe_gt_reset_async(__gt);
262 			flush_work(&__gt->reset.worker);
263 		}
264 		if (err) {
265 			KUNIT_FAIL(test, "restore kernel err=%pe\n",
266 				   ERR_PTR(err));
267 			goto cleanup_all;
268 		}
269 
270 		err = xe_bo_restore_user(xe);
271 		if (err) {
272 			KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err));
273 			goto cleanup_all;
274 		}
275 
276 		if (!xe_bo_is_vram(external)) {
277 			KUNIT_FAIL(test, "external bo is not vram\n");
278 			err = -EPROTO;
279 			goto cleanup_all;
280 		}
281 
282 		if (xe_bo_is_vram(bo)) {
283 			KUNIT_FAIL(test, "bo is vram\n");
284 			err = -EPROTO;
285 			goto cleanup_all;
286 		}
287 
288 		if (i) {
289 			down_read(&vm->lock);
290 			xe_vm_lock(vm, false);
291 			err = xe_bo_validate(bo, bo->vm, false);
292 			xe_vm_unlock(vm);
293 			up_read(&vm->lock);
294 			if (err) {
295 				KUNIT_FAIL(test, "bo valid err=%pe\n",
296 					   ERR_PTR(err));
297 				goto cleanup_all;
298 			}
299 			xe_bo_lock(external, false);
300 			err = xe_bo_validate(external, NULL, false);
301 			xe_bo_unlock(external);
302 			if (err) {
303 				KUNIT_FAIL(test, "external bo valid err=%pe\n",
304 					   ERR_PTR(err));
305 				goto cleanup_all;
306 			}
307 		}
308 
309 		xe_bo_lock(external, false);
310 		xe_bo_unpin_external(external);
311 		xe_bo_unlock(external);
312 
313 		xe_bo_put(external);
314 
315 		xe_bo_lock(bo, false);
316 		__xe_bo_unset_bulk_move(bo);
317 		xe_bo_unlock(bo);
318 		xe_bo_put(bo);
319 		continue;
320 
321 cleanup_all:
322 		xe_bo_lock(external, false);
323 		xe_bo_unpin_external(external);
324 		xe_bo_unlock(external);
325 cleanup_external:
326 		xe_bo_put(external);
327 cleanup_bo:
328 		xe_bo_lock(bo, false);
329 		__xe_bo_unset_bulk_move(bo);
330 		xe_bo_unlock(bo);
331 		xe_bo_put(bo);
332 		break;
333 	}
334 
335 	xe_vm_put(vm);
336 
337 	return 0;
338 }
339 
340 static int evict_test_run_device(struct xe_device *xe)
341 {
342 	struct kunit *test = kunit_get_current_test();
343 	struct xe_tile *tile;
344 	int id;
345 
346 	if (!IS_DGFX(xe)) {
347 		kunit_skip(test, "non-discrete device\n");
348 		return 0;
349 	}
350 
351 	xe_pm_runtime_get(xe);
352 
353 	for_each_tile(tile, xe, id)
354 		evict_test_run_tile(xe, tile, test);
355 
356 	xe_pm_runtime_put(xe);
357 
358 	return 0;
359 }
360 
361 static void xe_bo_evict_kunit(struct kunit *test)
362 {
363 	struct xe_device *xe = test->priv;
364 
365 	evict_test_run_device(xe);
366 }
367 
368 struct xe_bo_link {
369 	struct list_head link;
370 	struct xe_bo *bo;
371 	u32 val;
372 };
373 
374 #define XE_BO_SHRINK_SIZE ((unsigned long)SZ_64M)
375 
376 static int shrink_test_fill_random(struct xe_bo *bo, struct rnd_state *state,
377 				   struct xe_bo_link *link)
378 {
379 	struct iosys_map map;
380 	int ret = ttm_bo_vmap(&bo->ttm, &map);
381 	size_t __maybe_unused i;
382 
383 	if (ret)
384 		return ret;
385 
386 	for (i = 0; i < bo->ttm.base.size; i += sizeof(u32)) {
387 		u32 val = prandom_u32_state(state);
388 
389 		iosys_map_wr(&map, i, u32, val);
390 		if (i == 0)
391 			link->val = val;
392 	}
393 
394 	ttm_bo_vunmap(&bo->ttm, &map);
395 	return 0;
396 }
397 
398 static bool shrink_test_verify(struct kunit *test, struct xe_bo *bo,
399 			       unsigned int bo_nr, struct rnd_state *state,
400 			       struct xe_bo_link *link)
401 {
402 	struct iosys_map map;
403 	int ret = ttm_bo_vmap(&bo->ttm, &map);
404 	size_t i;
405 	bool failed = false;
406 
407 	if (ret) {
408 		KUNIT_FAIL(test, "Error mapping bo %u for content check.\n", bo_nr);
409 		return true;
410 	}
411 
412 	for (i = 0; i < bo->ttm.base.size; i += sizeof(u32)) {
413 		u32 val = prandom_u32_state(state);
414 
415 		if (iosys_map_rd(&map, i, u32) != val) {
416 			KUNIT_FAIL(test, "Content not preserved, bo %u offset 0x%016llx",
417 				   bo_nr, (unsigned long long)i);
418 			kunit_info(test, "Failed value is 0x%08x, recorded 0x%08x\n",
419 				   (unsigned int)iosys_map_rd(&map, i, u32), val);
420 			if (i == 0 && val != link->val)
421 				kunit_info(test, "Looks like PRNG is out of sync.\n");
422 			failed = true;
423 			break;
424 		}
425 	}
426 
427 	ttm_bo_vunmap(&bo->ttm, &map);
428 
429 	return failed;
430 }
431 
432 /*
433  * Try to create system bos corresponding to twice the amount
434  * of available system memory to test shrinker functionality.
435  * If no swap space is available to accommodate the
436  * memory overcommit, mark bos purgeable.
437  */
438 static int shrink_test_run_device(struct xe_device *xe)
439 {
440 	struct kunit *test = kunit_get_current_test();
441 	LIST_HEAD(bos);
442 	struct xe_bo_link *link, *next;
443 	struct sysinfo si;
444 	u64 ram, ram_and_swap, purgeable = 0, alloced, to_alloc, limit;
445 	unsigned int interrupted = 0, successful = 0, count = 0;
446 	struct rnd_state prng;
447 	u64 rand_seed;
448 	bool failed = false;
449 
450 	rand_seed = get_random_u64();
451 	prandom_seed_state(&prng, rand_seed);
452 	kunit_info(test, "Random seed is 0x%016llx.\n",
453 		   (unsigned long long)rand_seed);
454 
455 	/* Skip if execution time is expected to be too long. */
456 
457 	limit = SZ_32G;
458 	/* IGFX with flat CCS needs to copy when swapping / shrinking */
459 	if (!IS_DGFX(xe) && xe_device_has_flat_ccs(xe))
460 		limit = SZ_16G;
461 
462 	si_meminfo(&si);
463 	ram = (size_t)si.freeram * si.mem_unit;
464 	if (ram > limit) {
465 		kunit_skip(test, "Too long expected execution time.\n");
466 		return 0;
467 	}
468 	to_alloc = ram * 2;
469 
470 	ram_and_swap = ram + get_nr_swap_pages() * PAGE_SIZE;
471 	if (to_alloc > ram_and_swap)
472 		purgeable = to_alloc - ram_and_swap;
473 	purgeable += div64_u64(purgeable, 5);
474 
475 	kunit_info(test, "Free ram is %lu bytes. Will allocate twice of that.\n",
476 		   (unsigned long)ram);
477 	for (alloced = 0; alloced < to_alloc; alloced += XE_BO_SHRINK_SIZE) {
478 		struct xe_bo *bo;
479 		unsigned int mem_type;
480 		struct xe_ttm_tt *xe_tt;
481 
482 		link = kzalloc(sizeof(*link), GFP_KERNEL);
483 		if (!link) {
484 			KUNIT_FAIL(test, "Unexpected link allocation failure\n");
485 			failed = true;
486 			break;
487 		}
488 
489 		INIT_LIST_HEAD(&link->link);
490 
491 		/* We can create bos using WC caching here. But it is slower. */
492 		bo = xe_bo_create_user(xe, NULL, NULL, XE_BO_SHRINK_SIZE,
493 				       DRM_XE_GEM_CPU_CACHING_WB,
494 				       XE_BO_FLAG_SYSTEM);
495 		if (IS_ERR(bo)) {
496 			if (bo != ERR_PTR(-ENOMEM) && bo != ERR_PTR(-ENOSPC) &&
497 			    bo != ERR_PTR(-EINTR) && bo != ERR_PTR(-ERESTARTSYS))
498 				KUNIT_FAIL(test, "Error creating bo: %pe\n", bo);
499 			kfree(link);
500 			failed = true;
501 			break;
502 		}
503 		xe_bo_lock(bo, false);
504 		xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
505 
506 		/*
507 		 * Allocate purgeable bos first, because if we do it the
508 		 * other way around, they may not be subject to swapping...
509 		 */
510 		if (alloced < purgeable) {
511 			xe_tt->purgeable = true;
512 			bo->ttm.priority = 0;
513 		} else {
514 			int ret = shrink_test_fill_random(bo, &prng, link);
515 
516 			if (ret) {
517 				xe_bo_unlock(bo);
518 				xe_bo_put(bo);
519 				KUNIT_FAIL(test, "Error filling bo with random data: %pe\n",
520 					   ERR_PTR(ret));
521 				kfree(link);
522 				failed = true;
523 				break;
524 			}
525 		}
526 
527 		mem_type = bo->ttm.resource->mem_type;
528 		xe_bo_unlock(bo);
529 		link->bo = bo;
530 		list_add_tail(&link->link, &bos);
531 
532 		if (mem_type != XE_PL_TT) {
533 			KUNIT_FAIL(test, "Bo in incorrect memory type: %u\n",
534 				   bo->ttm.resource->mem_type);
535 			failed = true;
536 		}
537 		cond_resched();
538 		if (signal_pending(current))
539 			break;
540 	}
541 
542 	/*
543 	 * Read back and destroy bos. Reset the pseudo-random seed to get an
544 	 * identical pseudo-random number sequence for readback.
545 	 */
546 	prandom_seed_state(&prng, rand_seed);
547 	list_for_each_entry_safe(link, next, &bos, link) {
548 		static struct ttm_operation_ctx ctx = {.interruptible = true};
549 		struct xe_bo *bo = link->bo;
550 		struct xe_ttm_tt *xe_tt;
551 		int ret;
552 
553 		count++;
554 		if (!signal_pending(current) && !failed) {
555 			bool purgeable, intr = false;
556 
557 			xe_bo_lock(bo, NULL);
558 
559 			/* xe_tt->purgeable is cleared on validate. */
560 			xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
561 			purgeable = xe_tt->purgeable;
562 			do {
563 				ret = ttm_bo_validate(&bo->ttm, &tt_placement, &ctx);
564 				if (ret == -EINTR)
565 					intr = true;
566 			} while (ret == -EINTR && !signal_pending(current));
567 
568 			if (!ret && !purgeable)
569 				failed = shrink_test_verify(test, bo, count, &prng, link);
570 
571 			xe_bo_unlock(bo);
572 			if (ret) {
573 				KUNIT_FAIL(test, "Validation failed: %pe\n",
574 					   ERR_PTR(ret));
575 				failed = true;
576 			} else if (intr) {
577 				interrupted++;
578 			} else {
579 				successful++;
580 			}
581 		}
582 		xe_bo_put(link->bo);
583 		list_del(&link->link);
584 		kfree(link);
585 	}
586 	kunit_info(test, "Readbacks interrupted: %u successful: %u\n",
587 		   interrupted, successful);
588 
589 	return 0;
590 }
591 
592 static void xe_bo_shrink_kunit(struct kunit *test)
593 {
594 	struct xe_device *xe = test->priv;
595 
596 	shrink_test_run_device(xe);
597 }
598 
599 static struct kunit_case xe_bo_tests[] = {
600 	KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param),
601 	KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param),
602 	KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param,
603 			      {.speed = KUNIT_SPEED_SLOW}),
604 	{}
605 };
606 
607 VISIBLE_IF_KUNIT
608 struct kunit_suite xe_bo_test_suite = {
609 	.name = "xe_bo",
610 	.test_cases = xe_bo_tests,
611 	.init = xe_kunit_helper_xe_device_live_test_init,
612 };
613 EXPORT_SYMBOL_IF_KUNIT(xe_bo_test_suite);
614