1 // SPDX-License-Identifier: GPL-2.0 AND MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include <kunit/test.h> 7 #include <kunit/visibility.h> 8 9 #include "tests/xe_bo_test.h" 10 #include "tests/xe_pci_test.h" 11 #include "tests/xe_test.h" 12 13 #include "xe_bo_evict.h" 14 #include "xe_pci.h" 15 #include "xe_pm.h" 16 17 static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo, 18 bool clear, u64 get_val, u64 assign_val, 19 struct kunit *test) 20 { 21 struct dma_fence *fence; 22 struct ttm_tt *ttm; 23 struct page *page; 24 pgoff_t ccs_page; 25 long timeout; 26 u64 *cpu_map; 27 int ret; 28 u32 offset; 29 30 /* Move bo to VRAM if not already there. */ 31 ret = xe_bo_validate(bo, NULL, false); 32 if (ret) { 33 KUNIT_FAIL(test, "Failed to validate bo.\n"); 34 return ret; 35 } 36 37 /* Optionally clear bo *and* CCS data in VRAM. */ 38 if (clear) { 39 fence = xe_migrate_clear(gt_to_tile(gt)->migrate, bo, bo->ttm.resource); 40 if (IS_ERR(fence)) { 41 KUNIT_FAIL(test, "Failed to submit bo clear.\n"); 42 return PTR_ERR(fence); 43 } 44 dma_fence_put(fence); 45 } 46 47 /* Evict to system. CCS data should be copied. */ 48 ret = xe_bo_evict(bo, true); 49 if (ret) { 50 KUNIT_FAIL(test, "Failed to evict bo.\n"); 51 return ret; 52 } 53 54 /* Sync all migration blits */ 55 timeout = dma_resv_wait_timeout(bo->ttm.base.resv, 56 DMA_RESV_USAGE_KERNEL, 57 true, 58 5 * HZ); 59 if (timeout <= 0) { 60 KUNIT_FAIL(test, "Failed to sync bo eviction.\n"); 61 return -ETIME; 62 } 63 64 /* 65 * Bo with CCS data is now in system memory. Verify backing store 66 * and data integrity. Then assign for the next testing round while 67 * we still have a CPU map. 68 */ 69 ttm = bo->ttm.ttm; 70 if (!ttm || !ttm_tt_is_populated(ttm)) { 71 KUNIT_FAIL(test, "Bo was not in expected placement.\n"); 72 return -EINVAL; 73 } 74 75 ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT; 76 if (ccs_page >= ttm->num_pages) { 77 KUNIT_FAIL(test, "No TTM CCS pages present.\n"); 78 return -EINVAL; 79 } 80 81 page = ttm->pages[ccs_page]; 82 cpu_map = kmap_local_page(page); 83 84 /* Check first CCS value */ 85 if (cpu_map[0] != get_val) { 86 KUNIT_FAIL(test, 87 "Expected CCS readout 0x%016llx, got 0x%016llx.\n", 88 (unsigned long long)get_val, 89 (unsigned long long)cpu_map[0]); 90 ret = -EINVAL; 91 } 92 93 /* Check last CCS value, or at least last value in page. */ 94 offset = xe_device_ccs_bytes(gt_to_xe(gt), bo->size); 95 offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1; 96 if (cpu_map[offset] != get_val) { 97 KUNIT_FAIL(test, 98 "Expected CCS readout 0x%016llx, got 0x%016llx.\n", 99 (unsigned long long)get_val, 100 (unsigned long long)cpu_map[offset]); 101 ret = -EINVAL; 102 } 103 104 cpu_map[0] = assign_val; 105 cpu_map[offset] = assign_val; 106 kunmap_local(cpu_map); 107 108 return ret; 109 } 110 111 static void ccs_test_run_gt(struct xe_device *xe, struct xe_gt *gt, 112 struct kunit *test) 113 { 114 struct xe_bo *bo; 115 u32 vram_bit; 116 int ret; 117 118 /* TODO: Sanity check */ 119 vram_bit = XE_BO_CREATE_VRAM0_BIT << gt_to_tile(gt)->id; 120 kunit_info(test, "Testing gt id %u vram id %u\n", gt->info.id, 121 gt_to_tile(gt)->id); 122 123 bo = xe_bo_create_locked(xe, NULL, NULL, SZ_1M, ttm_bo_type_device, 124 vram_bit); 125 if (IS_ERR(bo)) { 126 KUNIT_FAIL(test, "Failed to create bo.\n"); 127 return; 128 } 129 130 kunit_info(test, "Verifying that CCS data is cleared on creation.\n"); 131 ret = ccs_test_migrate(gt, bo, false, 0ULL, 0xdeadbeefdeadbeefULL, 132 test); 133 if (ret) 134 goto out_unlock; 135 136 kunit_info(test, "Verifying that CCS data survives migration.\n"); 137 ret = ccs_test_migrate(gt, bo, false, 0xdeadbeefdeadbeefULL, 138 0xdeadbeefdeadbeefULL, test); 139 if (ret) 140 goto out_unlock; 141 142 kunit_info(test, "Verifying that CCS data can be properly cleared.\n"); 143 ret = ccs_test_migrate(gt, bo, true, 0ULL, 0ULL, test); 144 145 out_unlock: 146 xe_bo_unlock(bo); 147 xe_bo_put(bo); 148 } 149 150 static int ccs_test_run_device(struct xe_device *xe) 151 { 152 struct kunit *test = xe_cur_kunit(); 153 struct xe_gt *gt; 154 int id; 155 156 if (!xe_device_has_flat_ccs(xe)) { 157 kunit_info(test, "Skipping non-flat-ccs device.\n"); 158 return 0; 159 } 160 161 xe_device_mem_access_get(xe); 162 163 for_each_gt(gt, xe, id) 164 ccs_test_run_gt(xe, gt, test); 165 166 xe_device_mem_access_put(xe); 167 168 return 0; 169 } 170 171 void xe_ccs_migrate_kunit(struct kunit *test) 172 { 173 xe_call_for_each_device(ccs_test_run_device); 174 } 175 EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit); 176 177 static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test) 178 { 179 struct xe_bo *bo, *external; 180 unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile); 181 struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate); 182 struct xe_gt *__gt; 183 int err, i, id; 184 185 kunit_info(test, "Testing device %s vram id %u\n", 186 dev_name(xe->drm.dev), tile->id); 187 188 for (i = 0; i < 2; ++i) { 189 xe_vm_lock(vm, false); 190 bo = xe_bo_create_user(xe, NULL, vm, 0x10000, 191 DRM_XE_GEM_CPU_CACHING_WC, 192 ttm_bo_type_device, 193 bo_flags); 194 xe_vm_unlock(vm); 195 if (IS_ERR(bo)) { 196 KUNIT_FAIL(test, "bo create err=%pe\n", bo); 197 break; 198 } 199 200 external = xe_bo_create_user(xe, NULL, NULL, 0x10000, 201 DRM_XE_GEM_CPU_CACHING_WC, 202 ttm_bo_type_device, bo_flags); 203 if (IS_ERR(external)) { 204 KUNIT_FAIL(test, "external bo create err=%pe\n", external); 205 goto cleanup_bo; 206 } 207 208 xe_bo_lock(external, false); 209 err = xe_bo_pin_external(external); 210 xe_bo_unlock(external); 211 if (err) { 212 KUNIT_FAIL(test, "external bo pin err=%pe\n", 213 ERR_PTR(err)); 214 goto cleanup_external; 215 } 216 217 err = xe_bo_evict_all(xe); 218 if (err) { 219 KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err)); 220 goto cleanup_all; 221 } 222 223 for_each_gt(__gt, xe, id) 224 xe_gt_sanitize(__gt); 225 err = xe_bo_restore_kernel(xe); 226 /* 227 * Snapshotting the CTB and copying back a potentially old 228 * version seems risky, depending on what might have been 229 * inflight. Also it seems snapshotting the ADS object and 230 * copying back results in serious breakage. Normally when 231 * calling xe_bo_restore_kernel() we always fully restart the 232 * GT, which re-intializes such things. We could potentially 233 * skip saving and restoring such objects in xe_bo_evict_all() 234 * however seems quite fragile not to also restart the GT. Try 235 * to do that here by triggering a GT reset. 236 */ 237 for_each_gt(__gt, xe, id) { 238 xe_gt_reset_async(__gt); 239 flush_work(&__gt->reset.worker); 240 } 241 if (err) { 242 KUNIT_FAIL(test, "restore kernel err=%pe\n", 243 ERR_PTR(err)); 244 goto cleanup_all; 245 } 246 247 err = xe_bo_restore_user(xe); 248 if (err) { 249 KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err)); 250 goto cleanup_all; 251 } 252 253 if (!xe_bo_is_vram(external)) { 254 KUNIT_FAIL(test, "external bo is not vram\n"); 255 err = -EPROTO; 256 goto cleanup_all; 257 } 258 259 if (xe_bo_is_vram(bo)) { 260 KUNIT_FAIL(test, "bo is vram\n"); 261 err = -EPROTO; 262 goto cleanup_all; 263 } 264 265 if (i) { 266 down_read(&vm->lock); 267 xe_vm_lock(vm, false); 268 err = xe_bo_validate(bo, bo->vm, false); 269 xe_vm_unlock(vm); 270 up_read(&vm->lock); 271 if (err) { 272 KUNIT_FAIL(test, "bo valid err=%pe\n", 273 ERR_PTR(err)); 274 goto cleanup_all; 275 } 276 xe_bo_lock(external, false); 277 err = xe_bo_validate(external, NULL, false); 278 xe_bo_unlock(external); 279 if (err) { 280 KUNIT_FAIL(test, "external bo valid err=%pe\n", 281 ERR_PTR(err)); 282 goto cleanup_all; 283 } 284 } 285 286 xe_bo_lock(external, false); 287 xe_bo_unpin_external(external); 288 xe_bo_unlock(external); 289 290 xe_bo_put(external); 291 292 xe_bo_lock(bo, false); 293 __xe_bo_unset_bulk_move(bo); 294 xe_bo_unlock(bo); 295 xe_bo_put(bo); 296 continue; 297 298 cleanup_all: 299 xe_bo_lock(external, false); 300 xe_bo_unpin_external(external); 301 xe_bo_unlock(external); 302 cleanup_external: 303 xe_bo_put(external); 304 cleanup_bo: 305 xe_bo_lock(bo, false); 306 __xe_bo_unset_bulk_move(bo); 307 xe_bo_unlock(bo); 308 xe_bo_put(bo); 309 break; 310 } 311 312 xe_vm_put(vm); 313 314 return 0; 315 } 316 317 static int evict_test_run_device(struct xe_device *xe) 318 { 319 struct kunit *test = xe_cur_kunit(); 320 struct xe_tile *tile; 321 int id; 322 323 if (!IS_DGFX(xe)) { 324 kunit_info(test, "Skipping non-discrete device %s.\n", 325 dev_name(xe->drm.dev)); 326 return 0; 327 } 328 329 xe_device_mem_access_get(xe); 330 331 for_each_tile(tile, xe, id) 332 evict_test_run_tile(xe, tile, test); 333 334 xe_device_mem_access_put(xe); 335 336 return 0; 337 } 338 339 void xe_bo_evict_kunit(struct kunit *test) 340 { 341 xe_call_for_each_device(evict_test_run_device); 342 } 343 EXPORT_SYMBOL_IF_KUNIT(xe_bo_evict_kunit); 344