1 // SPDX-License-Identifier: GPL-2.0 AND MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include <kunit/test.h> 7 #include <kunit/visibility.h> 8 9 #include "tests/xe_bo_test.h" 10 #include "tests/xe_pci_test.h" 11 #include "tests/xe_test.h" 12 13 #include "xe_bo_evict.h" 14 #include "xe_pci.h" 15 #include "xe_pm.h" 16 17 static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, 18 bool clear, u64 get_val, u64 assign_val, 19 struct kunit *test) 20 { 21 struct dma_fence *fence; 22 struct ttm_tt *ttm; 23 struct page *page; 24 pgoff_t ccs_page; 25 long timeout; 26 u64 *cpu_map; 27 int ret; 28 u32 offset; 29 30 /* Move bo to VRAM if not already there. */ 31 ret = xe_bo_validate(bo, NULL, false); 32 if (ret) { 33 KUNIT_FAIL(test, "Failed to validate bo.\n"); 34 return ret; 35 } 36 37 /* Optionally clear bo *and* CCS data in VRAM. */ 38 if (clear) { 39 fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource); 40 if (IS_ERR(fence)) { 41 KUNIT_FAIL(test, "Failed to submit bo clear.\n"); 42 return PTR_ERR(fence); 43 } 44 dma_fence_put(fence); 45 } 46 47 /* Evict to system. CCS data should be copied. */ 48 ret = xe_bo_evict(bo, true); 49 if (ret) { 50 KUNIT_FAIL(test, "Failed to evict bo.\n"); 51 return ret; 52 } 53 54 /* Sync all migration blits */ 55 timeout = dma_resv_wait_timeout(bo->ttm.base.resv, 56 DMA_RESV_USAGE_KERNEL, 57 true, 58 5 * HZ); 59 if (timeout <= 0) { 60 KUNIT_FAIL(test, "Failed to sync bo eviction.\n"); 61 return -ETIME; 62 } 63 64 /* 65 * Bo with CCS data is now in system memory. Verify backing store 66 * and data integrity. Then assign for the next testing round while 67 * we still have a CPU map. 68 */ 69 ttm = bo->ttm.ttm; 70 if (!ttm || !ttm_tt_is_populated(ttm)) { 71 KUNIT_FAIL(test, "Bo was not in expected placement.\n"); 72 return -EINVAL; 73 } 74 75 ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT; 76 if (ccs_page >= ttm->num_pages) { 77 KUNIT_FAIL(test, "No TTM CCS pages present.\n"); 78 return -EINVAL; 79 } 80 81 page = ttm->pages[ccs_page]; 82 cpu_map = kmap_local_page(page); 83 84 /* Check first CCS value */ 85 if (cpu_map[0] != get_val) { 86 KUNIT_FAIL(test, 87 "Expected CCS readout 0x%016llx, got 0x%016llx.\n", 88 (unsigned long long)get_val, 89 (unsigned long long)cpu_map[0]); 90 ret = -EINVAL; 91 } 92 93 /* Check last CCS value, or at least last value in page. */ 94 offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size); 95 offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1; 96 if (cpu_map[offset] != get_val) { 97 KUNIT_FAIL(test, 98 "Expected CCS readout 0x%016llx, got 0x%016llx.\n", 99 (unsigned long long)get_val, 100 (unsigned long long)cpu_map[offset]); 101 ret = -EINVAL; 102 } 103 104 cpu_map[0] = assign_val; 105 cpu_map[offset] = assign_val; 106 kunmap_local(cpu_map); 107 108 return ret; 109 } 110 111 static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, 112 struct kunit *test) 113 { 114 struct xe_bo *bo; 115 116 int ret; 117 118 /* TODO: Sanity check */ 119 unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); 120 121 if (IS_DGFX(xe)) 122 kunit_info(test, "Testing vram id %u\n", tile->id); 123 else 124 kunit_info(test, "Testing system memory\n"); 125 126 bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, 127 ttm_bo_type_device, bo_flags); 128 if (IS_ERR(bo)) { 129 KUNIT_FAIL(test, "Failed to create bo.\n"); 130 return; 131 } 132 133 xe_bo_lock(bo, false); 134 135 kunit_info(test, "Verifying that CCS data is cleared on creation.\n"); 136 ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL, 137 test); 138 if (ret) 139 goto out_unlock; 140 141 kunit_info(test, "Verifying that CCS data survives migration.\n"); 142 ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL, 143 0xdeadbeefdeadbeefULL, test); 144 if (ret) 145 goto out_unlock; 146 147 kunit_info(test, "Verifying that CCS data can be properly cleared.\n"); 148 ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test); 149 150 out_unlock: 151 xe_bo_unlock(bo); 152 xe_bo_put(bo); 153 } 154 155 static int ccs_test_run_device(struct xe_device *xe) 156 { 157 struct kunit *test = xe_cur_kunit(); 158 struct xe_tile *tile; 159 int id; 160 161 if (!xe_device_has_flat_ccs(xe)) { 162 kunit_info(test, "Skipping non-flat-ccs device.\n"); 163 return 0; 164 } 165 166 xe_pm_runtime_get(xe); 167 168 for_each_tile(tile, xe, id) { 169 /* For igfx run only for primary tile */ 170 if (!IS_DGFX(xe) && id > 0) 171 continue; 172 ccs_test_run_tile(xe, tile, test); 173 } 174 175 xe_pm_runtime_put(xe); 176 177 return 0; 178 } 179 180 void xe_ccs_migrate_kunit(struct kunit *test) 181 { 182 xe_call_for_each_device(ccs_test_run_device); 183 } 184 EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit); 185 186 static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test) 187 { 188 struct xe_bo *bo, *external; 189 unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); 190 struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate); 191 struct xe_gt *__gt; 192 int err, i, id; 193 194 kunit_info(test, "Testing device %s vram id %u\n", 195 dev_name(xe->drm.dev), tile->id); 196 197 for (i = 0; i < 2; ++i) { 198 xe_vm_lock(vm, false); 199 bo = xe_bo_create_user(xe, NULL, vm, 0x10000, 200 DRM_XE_GEM_CPU_CACHING_WC, 201 ttm_bo_type_device, 202 bo_flags); 203 xe_vm_unlock(vm); 204 if (IS_ERR(bo)) { 205 KUNIT_FAIL(test, "bo create err=%pe\n", bo); 206 break; 207 } 208 209 external = xe_bo_create_user(xe, NULL, NULL, 0x10000, 210 DRM_XE_GEM_CPU_CACHING_WC, 211 ttm_bo_type_device, bo_flags); 212 if (IS_ERR(external)) { 213 KUNIT_FAIL(test, "external bo create err=%pe\n", external); 214 goto cleanup_bo; 215 } 216 217 xe_bo_lock(external, false); 218 err = xe_bo_pin_external(external); 219 xe_bo_unlock(external); 220 if (err) { 221 KUNIT_FAIL(test, "external bo pin err=%pe\n", 222 ERR_PTR(err)); 223 goto cleanup_external; 224 } 225 226 err = xe_bo_evict_all(xe); 227 if (err) { 228 KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err)); 229 goto cleanup_all; 230 } 231 232 for_each_gt(__gt, xe, id) 233 xe_gt_sanitize(__gt); 234 err = xe_bo_restore_kernel(xe); 235 /* 236 * Snapshotting the CTB and copying back a potentially old 237 * version seems risky, depending on what might have been 238 * inflight. Also it seems snapshotting the ADS object and 239 * copying back results in serious breakage. Normally when 240 * calling xe_bo_restore_kernel() we always fully restart the 241 * GT, which re-intializes such things. We could potentially 242 * skip saving and restoring such objects in xe_bo_evict_all() 243 * however seems quite fragile not to also restart the GT. Try 244 * to do that here by triggering a GT reset. 245 */ 246 for_each_gt(__gt, xe, id) { 247 xe_gt_reset_async(__gt); 248 flush_work(&__gt->reset.worker); 249 } 250 if (err) { 251 KUNIT_FAIL(test, "restore kernel err=%pe\n", 252 ERR_PTR(err)); 253 goto cleanup_all; 254 } 255 256 err = xe_bo_restore_user(xe); 257 if (err) { 258 KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err)); 259 goto cleanup_all; 260 } 261 262 if (!xe_bo_is_vram(external)) { 263 KUNIT_FAIL(test, "external bo is not vram\n"); 264 err = -EPROTO; 265 goto cleanup_all; 266 } 267 268 if (xe_bo_is_vram(bo)) { 269 KUNIT_FAIL(test, "bo is vram\n"); 270 err = -EPROTO; 271 goto cleanup_all; 272 } 273 274 if (i) { 275 down_read(&vm->lock); 276 xe_vm_lock(vm, false); 277 err = xe_bo_validate(bo, bo->vm, false); 278 xe_vm_unlock(vm); 279 up_read(&vm->lock); 280 if (err) { 281 KUNIT_FAIL(test, "bo valid err=%pe\n", 282 ERR_PTR(err)); 283 goto cleanup_all; 284 } 285 xe_bo_lock(external, false); 286 err = xe_bo_validate(external, NULL, false); 287 xe_bo_unlock(external); 288 if (err) { 289 KUNIT_FAIL(test, "external bo valid err=%pe\n", 290 ERR_PTR(err)); 291 goto cleanup_all; 292 } 293 } 294 295 xe_bo_lock(external, false); 296 xe_bo_unpin_external(external); 297 xe_bo_unlock(external); 298 299 xe_bo_put(external); 300 301 xe_bo_lock(bo, false); 302 __xe_bo_unset_bulk_move(bo); 303 xe_bo_unlock(bo); 304 xe_bo_put(bo); 305 continue; 306 307 cleanup_all: 308 xe_bo_lock(external, false); 309 xe_bo_unpin_external(external); 310 xe_bo_unlock(external); 311 cleanup_external: 312 xe_bo_put(external); 313 cleanup_bo: 314 xe_bo_lock(bo, false); 315 __xe_bo_unset_bulk_move(bo); 316 xe_bo_unlock(bo); 317 xe_bo_put(bo); 318 break; 319 } 320 321 xe_vm_put(vm); 322 323 return 0; 324 } 325 326 static int evict_test_run_device(struct xe_device *xe) 327 { 328 struct kunit *test = xe_cur_kunit(); 329 struct xe_tile *tile; 330 int id; 331 332 if (!IS_DGFX(xe)) { 333 kunit_info(test, "Skipping non-discrete device %s.\n", 334 dev_name(xe->drm.dev)); 335 return 0; 336 } 337 338 xe_pm_runtime_get(xe); 339 340 for_each_tile(tile, xe, id) 341 evict_test_run_tile(xe, tile, test); 342 343 xe_pm_runtime_put(xe); 344 345 return 0; 346 } 347 348 void xe_bo_evict_kunit(struct kunit *test) 349 { 350 xe_call_for_each_device(evict_test_run_device); 351 } 352 EXPORT_SYMBOL_IF_KUNIT(xe_bo_evict_kunit); 353