xref: /linux/drivers/gpu/drm/xe/tests/xe_bo.c (revision 001821b0e79716c4e17c71d8e053a23599a7a508)
1 // SPDX-License-Identifier: GPL-2.0 AND MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include <kunit/test.h>
7 #include <kunit/visibility.h>
8 
9 #include "tests/xe_bo_test.h"
10 #include "tests/xe_pci_test.h"
11 #include "tests/xe_test.h"
12 
13 #include "xe_bo_evict.h"
14 #include "xe_pci.h"
15 #include "xe_pm.h"
16 
17 static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
18 			    bool clear, u64 get_val, u64 assign_val,
19 			    struct kunit *test)
20 {
21 	struct dma_fence *fence;
22 	struct ttm_tt *ttm;
23 	struct page *page;
24 	pgoff_t ccs_page;
25 	long timeout;
26 	u64 *cpu_map;
27 	int ret;
28 	u32 offset;
29 
30 	/* Move bo to VRAM if not already there. */
31 	ret = xe_bo_validate(bo, NULL, false);
32 	if (ret) {
33 		KUNIT_FAIL(test, "Failed to validate bo.\n");
34 		return ret;
35 	}
36 
37 	/* Optionally clear bo *and* CCS data in VRAM. */
38 	if (clear) {
39 		fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource);
40 		if (IS_ERR(fence)) {
41 			KUNIT_FAIL(test, "Failed to submit bo clear.\n");
42 			return PTR_ERR(fence);
43 		}
44 		dma_fence_put(fence);
45 	}
46 
47 	/* Evict to system. CCS data should be copied. */
48 	ret = xe_bo_evict(bo, true);
49 	if (ret) {
50 		KUNIT_FAIL(test, "Failed to evict bo.\n");
51 		return ret;
52 	}
53 
54 	/* Sync all migration blits */
55 	timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
56 					DMA_RESV_USAGE_KERNEL,
57 					true,
58 					5 * HZ);
59 	if (timeout <= 0) {
60 		KUNIT_FAIL(test, "Failed to sync bo eviction.\n");
61 		return -ETIME;
62 	}
63 
64 	/*
65 	 * Bo with CCS data is now in system memory. Verify backing store
66 	 * and data integrity. Then assign for the next testing round while
67 	 * we still have a CPU map.
68 	 */
69 	ttm = bo->ttm.ttm;
70 	if (!ttm || !ttm_tt_is_populated(ttm)) {
71 		KUNIT_FAIL(test, "Bo was not in expected placement.\n");
72 		return -EINVAL;
73 	}
74 
75 	ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT;
76 	if (ccs_page >= ttm->num_pages) {
77 		KUNIT_FAIL(test, "No TTM CCS pages present.\n");
78 		return -EINVAL;
79 	}
80 
81 	page = ttm->pages[ccs_page];
82 	cpu_map = kmap_local_page(page);
83 
84 	/* Check first CCS value */
85 	if (cpu_map[0] != get_val) {
86 		KUNIT_FAIL(test,
87 			   "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
88 			   (unsigned long long)get_val,
89 			   (unsigned long long)cpu_map[0]);
90 		ret = -EINVAL;
91 	}
92 
93 	/* Check last CCS value, or at least last value in page. */
94 	offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size);
95 	offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1;
96 	if (cpu_map[offset] != get_val) {
97 		KUNIT_FAIL(test,
98 			   "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
99 			   (unsigned long long)get_val,
100 			   (unsigned long long)cpu_map[offset]);
101 		ret = -EINVAL;
102 	}
103 
104 	cpu_map[0] = assign_val;
105 	cpu_map[offset] = assign_val;
106 	kunmap_local(cpu_map);
107 
108 	return ret;
109 }
110 
111 static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
112 			      struct kunit *test)
113 {
114 	struct xe_bo *bo;
115 
116 	int ret;
117 
118 	/* TODO: Sanity check */
119 	unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
120 
121 	if (IS_DGFX(xe))
122 		kunit_info(test, "Testing vram id %u\n", tile->id);
123 	else
124 		kunit_info(test, "Testing system memory\n");
125 
126 	bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
127 			       ttm_bo_type_device, bo_flags);
128 	if (IS_ERR(bo)) {
129 		KUNIT_FAIL(test, "Failed to create bo.\n");
130 		return;
131 	}
132 
133 	xe_bo_lock(bo, false);
134 
135 	kunit_info(test, "Verifying that CCS data is cleared on creation.\n");
136 	ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL,
137 			       test);
138 	if (ret)
139 		goto out_unlock;
140 
141 	kunit_info(test, "Verifying that CCS data survives migration.\n");
142 	ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL,
143 			       0xdeadbeefdeadbeefULL, test);
144 	if (ret)
145 		goto out_unlock;
146 
147 	kunit_info(test, "Verifying that CCS data can be properly cleared.\n");
148 	ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test);
149 
150 out_unlock:
151 	xe_bo_unlock(bo);
152 	xe_bo_put(bo);
153 }
154 
155 static int ccs_test_run_device(struct xe_device *xe)
156 {
157 	struct kunit *test = xe_cur_kunit();
158 	struct xe_tile *tile;
159 	int id;
160 
161 	if (!xe_device_has_flat_ccs(xe)) {
162 		kunit_info(test, "Skipping non-flat-ccs device.\n");
163 		return 0;
164 	}
165 
166 	xe_pm_runtime_get(xe);
167 
168 	for_each_tile(tile, xe, id) {
169 		/* For igfx run only for primary tile */
170 		if (!IS_DGFX(xe) && id > 0)
171 			continue;
172 		ccs_test_run_tile(xe, tile, test);
173 	}
174 
175 	xe_pm_runtime_put(xe);
176 
177 	return 0;
178 }
179 
180 void xe_ccs_migrate_kunit(struct kunit *test)
181 {
182 	xe_call_for_each_device(ccs_test_run_device);
183 }
184 EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit);
185 
186 static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test)
187 {
188 	struct xe_bo *bo, *external;
189 	unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
190 	struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate);
191 	struct xe_gt *__gt;
192 	int err, i, id;
193 
194 	kunit_info(test, "Testing device %s vram id %u\n",
195 		   dev_name(xe->drm.dev), tile->id);
196 
197 	for (i = 0; i < 2; ++i) {
198 		xe_vm_lock(vm, false);
199 		bo = xe_bo_create_user(xe, NULL, vm, 0x10000,
200 				       DRM_XE_GEM_CPU_CACHING_WC,
201 				       ttm_bo_type_device,
202 				       bo_flags);
203 		xe_vm_unlock(vm);
204 		if (IS_ERR(bo)) {
205 			KUNIT_FAIL(test, "bo create err=%pe\n", bo);
206 			break;
207 		}
208 
209 		external = xe_bo_create_user(xe, NULL, NULL, 0x10000,
210 					     DRM_XE_GEM_CPU_CACHING_WC,
211 					     ttm_bo_type_device, bo_flags);
212 		if (IS_ERR(external)) {
213 			KUNIT_FAIL(test, "external bo create err=%pe\n", external);
214 			goto cleanup_bo;
215 		}
216 
217 		xe_bo_lock(external, false);
218 		err = xe_bo_pin_external(external);
219 		xe_bo_unlock(external);
220 		if (err) {
221 			KUNIT_FAIL(test, "external bo pin err=%pe\n",
222 				   ERR_PTR(err));
223 			goto cleanup_external;
224 		}
225 
226 		err = xe_bo_evict_all(xe);
227 		if (err) {
228 			KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err));
229 			goto cleanup_all;
230 		}
231 
232 		for_each_gt(__gt, xe, id)
233 			xe_gt_sanitize(__gt);
234 		err = xe_bo_restore_kernel(xe);
235 		/*
236 		 * Snapshotting the CTB and copying back a potentially old
237 		 * version seems risky, depending on what might have been
238 		 * inflight. Also it seems snapshotting the ADS object and
239 		 * copying back results in serious breakage. Normally when
240 		 * calling xe_bo_restore_kernel() we always fully restart the
241 		 * GT, which re-intializes such things.  We could potentially
242 		 * skip saving and restoring such objects in xe_bo_evict_all()
243 		 * however seems quite fragile not to also restart the GT. Try
244 		 * to do that here by triggering a GT reset.
245 		 */
246 		for_each_gt(__gt, xe, id) {
247 			xe_gt_reset_async(__gt);
248 			flush_work(&__gt->reset.worker);
249 		}
250 		if (err) {
251 			KUNIT_FAIL(test, "restore kernel err=%pe\n",
252 				   ERR_PTR(err));
253 			goto cleanup_all;
254 		}
255 
256 		err = xe_bo_restore_user(xe);
257 		if (err) {
258 			KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err));
259 			goto cleanup_all;
260 		}
261 
262 		if (!xe_bo_is_vram(external)) {
263 			KUNIT_FAIL(test, "external bo is not vram\n");
264 			err = -EPROTO;
265 			goto cleanup_all;
266 		}
267 
268 		if (xe_bo_is_vram(bo)) {
269 			KUNIT_FAIL(test, "bo is vram\n");
270 			err = -EPROTO;
271 			goto cleanup_all;
272 		}
273 
274 		if (i) {
275 			down_read(&vm->lock);
276 			xe_vm_lock(vm, false);
277 			err = xe_bo_validate(bo, bo->vm, false);
278 			xe_vm_unlock(vm);
279 			up_read(&vm->lock);
280 			if (err) {
281 				KUNIT_FAIL(test, "bo valid err=%pe\n",
282 					   ERR_PTR(err));
283 				goto cleanup_all;
284 			}
285 			xe_bo_lock(external, false);
286 			err = xe_bo_validate(external, NULL, false);
287 			xe_bo_unlock(external);
288 			if (err) {
289 				KUNIT_FAIL(test, "external bo valid err=%pe\n",
290 					   ERR_PTR(err));
291 				goto cleanup_all;
292 			}
293 		}
294 
295 		xe_bo_lock(external, false);
296 		xe_bo_unpin_external(external);
297 		xe_bo_unlock(external);
298 
299 		xe_bo_put(external);
300 
301 		xe_bo_lock(bo, false);
302 		__xe_bo_unset_bulk_move(bo);
303 		xe_bo_unlock(bo);
304 		xe_bo_put(bo);
305 		continue;
306 
307 cleanup_all:
308 		xe_bo_lock(external, false);
309 		xe_bo_unpin_external(external);
310 		xe_bo_unlock(external);
311 cleanup_external:
312 		xe_bo_put(external);
313 cleanup_bo:
314 		xe_bo_lock(bo, false);
315 		__xe_bo_unset_bulk_move(bo);
316 		xe_bo_unlock(bo);
317 		xe_bo_put(bo);
318 		break;
319 	}
320 
321 	xe_vm_put(vm);
322 
323 	return 0;
324 }
325 
326 static int evict_test_run_device(struct xe_device *xe)
327 {
328 	struct kunit *test = xe_cur_kunit();
329 	struct xe_tile *tile;
330 	int id;
331 
332 	if (!IS_DGFX(xe)) {
333 		kunit_info(test, "Skipping non-discrete device %s.\n",
334 			   dev_name(xe->drm.dev));
335 		return 0;
336 	}
337 
338 	xe_pm_runtime_get(xe);
339 
340 	for_each_tile(tile, xe, id)
341 		evict_test_run_tile(xe, tile, test);
342 
343 	xe_pm_runtime_put(xe);
344 
345 	return 0;
346 }
347 
348 void xe_bo_evict_kunit(struct kunit *test)
349 {
350 	xe_call_for_each_device(evict_test_run_device);
351 }
352 EXPORT_SYMBOL_IF_KUNIT(xe_bo_evict_kunit);
353