1 // SPDX-License-Identifier: GPL-2.0 AND MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include <kunit/test.h> 7 #include <kunit/visibility.h> 8 9 #include "tests/xe_kunit_helpers.h" 10 #include "tests/xe_pci_test.h" 11 #include "tests/xe_test.h" 12 13 #include "xe_bo_evict.h" 14 #include "xe_pci.h" 15 #include "xe_pm.h" 16 17 static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, 18 bool clear, u64 get_val, u64 assign_val, 19 struct kunit *test) 20 { 21 struct dma_fence *fence; 22 struct ttm_tt *ttm; 23 struct page *page; 24 pgoff_t ccs_page; 25 long timeout; 26 u64 *cpu_map; 27 int ret; 28 u32 offset; 29 30 /* Move bo to VRAM if not already there. */ 31 ret = xe_bo_validate(bo, NULL, false); 32 if (ret) { 33 KUNIT_FAIL(test, "Failed to validate bo.\n"); 34 return ret; 35 } 36 37 /* Optionally clear bo *and* CCS data in VRAM. */ 38 if (clear) { 39 fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource, 40 XE_MIGRATE_CLEAR_FLAG_FULL); 41 if (IS_ERR(fence)) { 42 KUNIT_FAIL(test, "Failed to submit bo clear.\n"); 43 return PTR_ERR(fence); 44 } 45 dma_fence_put(fence); 46 } 47 48 /* Evict to system. CCS data should be copied. */ 49 ret = xe_bo_evict(bo, true); 50 if (ret) { 51 KUNIT_FAIL(test, "Failed to evict bo.\n"); 52 return ret; 53 } 54 55 /* Sync all migration blits */ 56 timeout = dma_resv_wait_timeout(bo->ttm.base.resv, 57 DMA_RESV_USAGE_KERNEL, 58 true, 59 5 * HZ); 60 if (timeout <= 0) { 61 KUNIT_FAIL(test, "Failed to sync bo eviction.\n"); 62 return -ETIME; 63 } 64 65 /* 66 * Bo with CCS data is now in system memory. Verify backing store 67 * and data integrity. Then assign for the next testing round while 68 * we still have a CPU map. 69 */ 70 ttm = bo->ttm.ttm; 71 if (!ttm || !ttm_tt_is_populated(ttm)) { 72 KUNIT_FAIL(test, "Bo was not in expected placement.\n"); 73 return -EINVAL; 74 } 75 76 ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT; 77 if (ccs_page >= ttm->num_pages) { 78 KUNIT_FAIL(test, "No TTM CCS pages present.\n"); 79 return -EINVAL; 80 } 81 82 page = ttm->pages[ccs_page]; 83 cpu_map = kmap_local_page(page); 84 85 /* Check first CCS value */ 86 if (cpu_map[0] != get_val) { 87 KUNIT_FAIL(test, 88 "Expected CCS readout 0x%016llx, got 0x%016llx.\n", 89 (unsigned long long)get_val, 90 (unsigned long long)cpu_map[0]); 91 ret = -EINVAL; 92 } 93 94 /* Check last CCS value, or at least last value in page. */ 95 offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size); 96 offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1; 97 if (cpu_map[offset] != get_val) { 98 KUNIT_FAIL(test, 99 "Expected CCS readout 0x%016llx, got 0x%016llx.\n", 100 (unsigned long long)get_val, 101 (unsigned long long)cpu_map[offset]); 102 ret = -EINVAL; 103 } 104 105 cpu_map[0] = assign_val; 106 cpu_map[offset] = assign_val; 107 kunmap_local(cpu_map); 108 109 return ret; 110 } 111 112 static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, 113 struct kunit *test) 114 { 115 struct xe_bo *bo; 116 117 int ret; 118 119 /* TODO: Sanity check */ 120 unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); 121 122 if (IS_DGFX(xe)) 123 kunit_info(test, "Testing vram id %u\n", tile->id); 124 else 125 kunit_info(test, "Testing system memory\n"); 126 127 bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, 128 bo_flags); 129 if (IS_ERR(bo)) { 130 KUNIT_FAIL(test, "Failed to create bo.\n"); 131 return; 132 } 133 134 xe_bo_lock(bo, false); 135 136 kunit_info(test, "Verifying that CCS data is cleared on creation.\n"); 137 ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL, 138 test); 139 if (ret) 140 goto out_unlock; 141 142 kunit_info(test, "Verifying that CCS data survives migration.\n"); 143 ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL, 144 0xdeadbeefdeadbeefULL, test); 145 if (ret) 146 goto out_unlock; 147 148 kunit_info(test, "Verifying that CCS data can be properly cleared.\n"); 149 ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test); 150 151 out_unlock: 152 xe_bo_unlock(bo); 153 xe_bo_put(bo); 154 } 155 156 static int ccs_test_run_device(struct xe_device *xe) 157 { 158 struct kunit *test = kunit_get_current_test(); 159 struct xe_tile *tile; 160 int id; 161 162 if (!xe_device_has_flat_ccs(xe)) { 163 kunit_skip(test, "non-flat-ccs device\n"); 164 return 0; 165 } 166 167 /* For xe2+ dgfx, we don't handle ccs metadata */ 168 if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe)) { 169 kunit_skip(test, "xe2+ dgfx device\n"); 170 return 0; 171 } 172 173 xe_pm_runtime_get(xe); 174 175 for_each_tile(tile, xe, id) { 176 /* For igfx run only for primary tile */ 177 if (!IS_DGFX(xe) && id > 0) 178 continue; 179 ccs_test_run_tile(xe, tile, test); 180 } 181 182 xe_pm_runtime_put(xe); 183 184 return 0; 185 } 186 187 static void xe_ccs_migrate_kunit(struct kunit *test) 188 { 189 struct xe_device *xe = test->priv; 190 191 ccs_test_run_device(xe); 192 } 193 194 static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test) 195 { 196 struct xe_bo *bo, *external; 197 unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); 198 struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate); 199 struct xe_gt *__gt; 200 int err, i, id; 201 202 kunit_info(test, "Testing device %s vram id %u\n", 203 dev_name(xe->drm.dev), tile->id); 204 205 for (i = 0; i < 2; ++i) { 206 xe_vm_lock(vm, false); 207 bo = xe_bo_create_user(xe, NULL, vm, 0x10000, 208 DRM_XE_GEM_CPU_CACHING_WC, 209 bo_flags); 210 xe_vm_unlock(vm); 211 if (IS_ERR(bo)) { 212 KUNIT_FAIL(test, "bo create err=%pe\n", bo); 213 break; 214 } 215 216 external = xe_bo_create_user(xe, NULL, NULL, 0x10000, 217 DRM_XE_GEM_CPU_CACHING_WC, 218 bo_flags); 219 if (IS_ERR(external)) { 220 KUNIT_FAIL(test, "external bo create err=%pe\n", external); 221 goto cleanup_bo; 222 } 223 224 xe_bo_lock(external, false); 225 err = xe_bo_pin_external(external); 226 xe_bo_unlock(external); 227 if (err) { 228 KUNIT_FAIL(test, "external bo pin err=%pe\n", 229 ERR_PTR(err)); 230 goto cleanup_external; 231 } 232 233 err = xe_bo_evict_all(xe); 234 if (err) { 235 KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err)); 236 goto cleanup_all; 237 } 238 239 for_each_gt(__gt, xe, id) 240 xe_gt_sanitize(__gt); 241 err = xe_bo_restore_kernel(xe); 242 /* 243 * Snapshotting the CTB and copying back a potentially old 244 * version seems risky, depending on what might have been 245 * inflight. Also it seems snapshotting the ADS object and 246 * copying back results in serious breakage. Normally when 247 * calling xe_bo_restore_kernel() we always fully restart the 248 * GT, which re-intializes such things. We could potentially 249 * skip saving and restoring such objects in xe_bo_evict_all() 250 * however seems quite fragile not to also restart the GT. Try 251 * to do that here by triggering a GT reset. 252 */ 253 for_each_gt(__gt, xe, id) { 254 xe_gt_reset_async(__gt); 255 flush_work(&__gt->reset.worker); 256 } 257 if (err) { 258 KUNIT_FAIL(test, "restore kernel err=%pe\n", 259 ERR_PTR(err)); 260 goto cleanup_all; 261 } 262 263 err = xe_bo_restore_user(xe); 264 if (err) { 265 KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err)); 266 goto cleanup_all; 267 } 268 269 if (!xe_bo_is_vram(external)) { 270 KUNIT_FAIL(test, "external bo is not vram\n"); 271 err = -EPROTO; 272 goto cleanup_all; 273 } 274 275 if (xe_bo_is_vram(bo)) { 276 KUNIT_FAIL(test, "bo is vram\n"); 277 err = -EPROTO; 278 goto cleanup_all; 279 } 280 281 if (i) { 282 down_read(&vm->lock); 283 xe_vm_lock(vm, false); 284 err = xe_bo_validate(bo, bo->vm, false); 285 xe_vm_unlock(vm); 286 up_read(&vm->lock); 287 if (err) { 288 KUNIT_FAIL(test, "bo valid err=%pe\n", 289 ERR_PTR(err)); 290 goto cleanup_all; 291 } 292 xe_bo_lock(external, false); 293 err = xe_bo_validate(external, NULL, false); 294 xe_bo_unlock(external); 295 if (err) { 296 KUNIT_FAIL(test, "external bo valid err=%pe\n", 297 ERR_PTR(err)); 298 goto cleanup_all; 299 } 300 } 301 302 xe_bo_lock(external, false); 303 xe_bo_unpin_external(external); 304 xe_bo_unlock(external); 305 306 xe_bo_put(external); 307 308 xe_bo_lock(bo, false); 309 __xe_bo_unset_bulk_move(bo); 310 xe_bo_unlock(bo); 311 xe_bo_put(bo); 312 continue; 313 314 cleanup_all: 315 xe_bo_lock(external, false); 316 xe_bo_unpin_external(external); 317 xe_bo_unlock(external); 318 cleanup_external: 319 xe_bo_put(external); 320 cleanup_bo: 321 xe_bo_lock(bo, false); 322 __xe_bo_unset_bulk_move(bo); 323 xe_bo_unlock(bo); 324 xe_bo_put(bo); 325 break; 326 } 327 328 xe_vm_put(vm); 329 330 return 0; 331 } 332 333 static int evict_test_run_device(struct xe_device *xe) 334 { 335 struct kunit *test = kunit_get_current_test(); 336 struct xe_tile *tile; 337 int id; 338 339 if (!IS_DGFX(xe)) { 340 kunit_skip(test, "non-discrete device\n"); 341 return 0; 342 } 343 344 xe_pm_runtime_get(xe); 345 346 for_each_tile(tile, xe, id) 347 evict_test_run_tile(xe, tile, test); 348 349 xe_pm_runtime_put(xe); 350 351 return 0; 352 } 353 354 static void xe_bo_evict_kunit(struct kunit *test) 355 { 356 struct xe_device *xe = test->priv; 357 358 evict_test_run_device(xe); 359 } 360 361 static struct kunit_case xe_bo_tests[] = { 362 KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param), 363 KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param), 364 {} 365 }; 366 367 VISIBLE_IF_KUNIT 368 struct kunit_suite xe_bo_test_suite = { 369 .name = "xe_bo", 370 .test_cases = xe_bo_tests, 371 .init = xe_kunit_helper_xe_device_live_test_init, 372 }; 373 EXPORT_SYMBOL_IF_KUNIT(xe_bo_test_suite); 374