1 // SPDX-License-Identifier: GPL-2.0 AND MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include <kunit/test.h> 7 #include <kunit/visibility.h> 8 9 #include "tests/xe_bo_test.h" 10 #include "tests/xe_pci_test.h" 11 #include "tests/xe_test.h" 12 13 #include "xe_bo_evict.h" 14 #include "xe_pci.h" 15 #include "xe_pm.h" 16 17 static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, 18 bool clear, u64 get_val, u64 assign_val, 19 struct kunit *test) 20 { 21 struct dma_fence *fence; 22 struct ttm_tt *ttm; 23 struct page *page; 24 pgoff_t ccs_page; 25 long timeout; 26 u64 *cpu_map; 27 int ret; 28 u32 offset; 29 30 /* Move bo to VRAM if not already there. */ 31 ret = xe_bo_validate(bo, NULL, false); 32 if (ret) { 33 KUNIT_FAIL(test, "Failed to validate bo.\n"); 34 return ret; 35 } 36 37 /* Optionally clear bo *and* CCS data in VRAM. */ 38 if (clear) { 39 fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource); 40 if (IS_ERR(fence)) { 41 KUNIT_FAIL(test, "Failed to submit bo clear.\n"); 42 return PTR_ERR(fence); 43 } 44 dma_fence_put(fence); 45 } 46 47 /* Evict to system. CCS data should be copied. */ 48 ret = xe_bo_evict(bo, true); 49 if (ret) { 50 KUNIT_FAIL(test, "Failed to evict bo.\n"); 51 return ret; 52 } 53 54 /* Sync all migration blits */ 55 timeout = dma_resv_wait_timeout(bo->ttm.base.resv, 56 DMA_RESV_USAGE_KERNEL, 57 true, 58 5 * HZ); 59 if (timeout <= 0) { 60 KUNIT_FAIL(test, "Failed to sync bo eviction.\n"); 61 return -ETIME; 62 } 63 64 /* 65 * Bo with CCS data is now in system memory. Verify backing store 66 * and data integrity. Then assign for the next testing round while 67 * we still have a CPU map. 68 */ 69 ttm = bo->ttm.ttm; 70 if (!ttm || !ttm_tt_is_populated(ttm)) { 71 KUNIT_FAIL(test, "Bo was not in expected placement.\n"); 72 return -EINVAL; 73 } 74 75 ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT; 76 if (ccs_page >= ttm->num_pages) { 77 KUNIT_FAIL(test, "No TTM CCS pages present.\n"); 78 return -EINVAL; 79 } 80 81 page = ttm->pages[ccs_page]; 82 cpu_map = kmap_local_page(page); 83 84 /* Check first CCS value */ 85 if (cpu_map[0] != get_val) { 86 KUNIT_FAIL(test, 87 "Expected CCS readout 0x%016llx, got 0x%016llx.\n", 88 (unsigned long long)get_val, 89 (unsigned long long)cpu_map[0]); 90 ret = -EINVAL; 91 } 92 93 /* Check last CCS value, or at least last value in page. */ 94 offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size); 95 offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1; 96 if (cpu_map[offset] != get_val) { 97 KUNIT_FAIL(test, 98 "Expected CCS readout 0x%016llx, got 0x%016llx.\n", 99 (unsigned long long)get_val, 100 (unsigned long long)cpu_map[offset]); 101 ret = -EINVAL; 102 } 103 104 cpu_map[0] = assign_val; 105 cpu_map[offset] = assign_val; 106 kunmap_local(cpu_map); 107 108 return ret; 109 } 110 111 static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, 112 struct kunit *test) 113 { 114 struct xe_bo *bo; 115 116 int ret; 117 118 /* TODO: Sanity check */ 119 unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile); 120 121 if (IS_DGFX(xe)) 122 kunit_info(test, "Testing vram id %u\n", tile->id); 123 else 124 kunit_info(test, "Testing system memory\n"); 125 126 bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, 127 ttm_bo_type_device, bo_flags); 128 129 xe_bo_lock(bo, false); 130 131 if (IS_ERR(bo)) { 132 KUNIT_FAIL(test, "Failed to create bo.\n"); 133 return; 134 } 135 136 kunit_info(test, "Verifying that CCS data is cleared on creation.\n"); 137 ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL, 138 test); 139 if (ret) 140 goto out_unlock; 141 142 kunit_info(test, "Verifying that CCS data survives migration.\n"); 143 ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL, 144 0xdeadbeefdeadbeefULL, test); 145 if (ret) 146 goto out_unlock; 147 148 kunit_info(test, "Verifying that CCS data can be properly cleared.\n"); 149 ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test); 150 151 out_unlock: 152 xe_bo_unlock(bo); 153 xe_bo_put(bo); 154 } 155 156 static int ccs_test_run_device(struct xe_device *xe) 157 { 158 struct kunit *test = xe_cur_kunit(); 159 struct xe_tile *tile; 160 int id; 161 162 if (!xe_device_has_flat_ccs(xe)) { 163 kunit_info(test, "Skipping non-flat-ccs device.\n"); 164 return 0; 165 } 166 167 xe_device_mem_access_get(xe); 168 169 for_each_tile(tile, xe, id) { 170 /* For igfx run only for primary tile */ 171 if (!IS_DGFX(xe) && id > 0) 172 continue; 173 ccs_test_run_tile(xe, tile, test); 174 } 175 176 xe_device_mem_access_put(xe); 177 178 return 0; 179 } 180 181 void xe_ccs_migrate_kunit(struct kunit *test) 182 { 183 xe_call_for_each_device(ccs_test_run_device); 184 } 185 EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit); 186 187 static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test) 188 { 189 struct xe_bo *bo, *external; 190 unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile); 191 struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate); 192 struct xe_gt *__gt; 193 int err, i, id; 194 195 kunit_info(test, "Testing device %s vram id %u\n", 196 dev_name(xe->drm.dev), tile->id); 197 198 for (i = 0; i < 2; ++i) { 199 xe_vm_lock(vm, false); 200 bo = xe_bo_create_user(xe, NULL, vm, 0x10000, 201 DRM_XE_GEM_CPU_CACHING_WC, 202 ttm_bo_type_device, 203 bo_flags); 204 xe_vm_unlock(vm); 205 if (IS_ERR(bo)) { 206 KUNIT_FAIL(test, "bo create err=%pe\n", bo); 207 break; 208 } 209 210 external = xe_bo_create_user(xe, NULL, NULL, 0x10000, 211 DRM_XE_GEM_CPU_CACHING_WC, 212 ttm_bo_type_device, bo_flags); 213 if (IS_ERR(external)) { 214 KUNIT_FAIL(test, "external bo create err=%pe\n", external); 215 goto cleanup_bo; 216 } 217 218 xe_bo_lock(external, false); 219 err = xe_bo_pin_external(external); 220 xe_bo_unlock(external); 221 if (err) { 222 KUNIT_FAIL(test, "external bo pin err=%pe\n", 223 ERR_PTR(err)); 224 goto cleanup_external; 225 } 226 227 err = xe_bo_evict_all(xe); 228 if (err) { 229 KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err)); 230 goto cleanup_all; 231 } 232 233 for_each_gt(__gt, xe, id) 234 xe_gt_sanitize(__gt); 235 err = xe_bo_restore_kernel(xe); 236 /* 237 * Snapshotting the CTB and copying back a potentially old 238 * version seems risky, depending on what might have been 239 * inflight. Also it seems snapshotting the ADS object and 240 * copying back results in serious breakage. Normally when 241 * calling xe_bo_restore_kernel() we always fully restart the 242 * GT, which re-intializes such things. We could potentially 243 * skip saving and restoring such objects in xe_bo_evict_all() 244 * however seems quite fragile not to also restart the GT. Try 245 * to do that here by triggering a GT reset. 246 */ 247 for_each_gt(__gt, xe, id) { 248 xe_gt_reset_async(__gt); 249 flush_work(&__gt->reset.worker); 250 } 251 if (err) { 252 KUNIT_FAIL(test, "restore kernel err=%pe\n", 253 ERR_PTR(err)); 254 goto cleanup_all; 255 } 256 257 err = xe_bo_restore_user(xe); 258 if (err) { 259 KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err)); 260 goto cleanup_all; 261 } 262 263 if (!xe_bo_is_vram(external)) { 264 KUNIT_FAIL(test, "external bo is not vram\n"); 265 err = -EPROTO; 266 goto cleanup_all; 267 } 268 269 if (xe_bo_is_vram(bo)) { 270 KUNIT_FAIL(test, "bo is vram\n"); 271 err = -EPROTO; 272 goto cleanup_all; 273 } 274 275 if (i) { 276 down_read(&vm->lock); 277 xe_vm_lock(vm, false); 278 err = xe_bo_validate(bo, bo->vm, false); 279 xe_vm_unlock(vm); 280 up_read(&vm->lock); 281 if (err) { 282 KUNIT_FAIL(test, "bo valid err=%pe\n", 283 ERR_PTR(err)); 284 goto cleanup_all; 285 } 286 xe_bo_lock(external, false); 287 err = xe_bo_validate(external, NULL, false); 288 xe_bo_unlock(external); 289 if (err) { 290 KUNIT_FAIL(test, "external bo valid err=%pe\n", 291 ERR_PTR(err)); 292 goto cleanup_all; 293 } 294 } 295 296 xe_bo_lock(external, false); 297 xe_bo_unpin_external(external); 298 xe_bo_unlock(external); 299 300 xe_bo_put(external); 301 302 xe_bo_lock(bo, false); 303 __xe_bo_unset_bulk_move(bo); 304 xe_bo_unlock(bo); 305 xe_bo_put(bo); 306 continue; 307 308 cleanup_all: 309 xe_bo_lock(external, false); 310 xe_bo_unpin_external(external); 311 xe_bo_unlock(external); 312 cleanup_external: 313 xe_bo_put(external); 314 cleanup_bo: 315 xe_bo_lock(bo, false); 316 __xe_bo_unset_bulk_move(bo); 317 xe_bo_unlock(bo); 318 xe_bo_put(bo); 319 break; 320 } 321 322 xe_vm_put(vm); 323 324 return 0; 325 } 326 327 static int evict_test_run_device(struct xe_device *xe) 328 { 329 struct kunit *test = xe_cur_kunit(); 330 struct xe_tile *tile; 331 int id; 332 333 if (!IS_DGFX(xe)) { 334 kunit_info(test, "Skipping non-discrete device %s.\n", 335 dev_name(xe->drm.dev)); 336 return 0; 337 } 338 339 xe_device_mem_access_get(xe); 340 341 for_each_tile(tile, xe, id) 342 evict_test_run_tile(xe, tile, test); 343 344 xe_device_mem_access_put(xe); 345 346 return 0; 347 } 348 349 void xe_bo_evict_kunit(struct kunit *test) 350 { 351 xe_call_for_each_device(evict_test_run_device); 352 } 353 EXPORT_SYMBOL_IF_KUNIT(xe_bo_evict_kunit); 354