1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "i915_selftest.h" 7 8 #include "gem/i915_gem_internal.h" 9 #include "gem/i915_gem_lmem.h" 10 #include "gem/i915_gem_region.h" 11 12 #include "gen8_engine_cs.h" 13 #include "i915_gem_ww.h" 14 #include "intel_engine_regs.h" 15 #include "intel_gpu_commands.h" 16 #include "intel_context.h" 17 #include "intel_gt.h" 18 #include "intel_ring.h" 19 20 #include "selftests/igt_flush_test.h" 21 #include "selftests/i915_random.h" 22 23 static void vma_set_qw(struct i915_vma *vma, u64 addr, u64 val) 24 { 25 GEM_BUG_ON(addr < i915_vma_offset(vma)); 26 GEM_BUG_ON(addr >= i915_vma_offset(vma) + i915_vma_size(vma) + sizeof(val)); 27 memset64(page_mask_bits(vma->obj->mm.mapping) + 28 (addr - i915_vma_offset(vma)), val, 1); 29 } 30 31 static int 32 pte_tlbinv(struct intel_context *ce, 33 struct i915_vma *va, 34 struct i915_vma *vb, 35 u64 align, 36 void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length), 37 u64 length, 38 struct rnd_state *prng) 39 { 40 const unsigned int pat_index = 41 i915_gem_get_pat_index(ce->vm->i915, I915_CACHE_NONE); 42 struct drm_i915_gem_object *batch; 43 struct drm_mm_node vb_node; 44 struct i915_request *rq; 45 struct i915_vma *vma; 46 u64 addr; 47 int err; 48 u32 *cs; 49 50 batch = i915_gem_object_create_internal(ce->vm->i915, 4096); 51 if (IS_ERR(batch)) 52 return PTR_ERR(batch); 53 54 vma = i915_vma_instance(batch, ce->vm, NULL); 55 if (IS_ERR(vma)) { 56 err = PTR_ERR(vma); 57 goto out; 58 } 59 60 err = i915_vma_pin(vma, 0, 0, PIN_USER); 61 if (err) 62 goto out; 63 64 /* Pin va at random but aligned offset after vma */ 65 addr = round_up(vma->node.start + vma->node.size, align); 66 /* MI_CONDITIONAL_BATCH_BUFFER_END limits address to 48b */ 67 addr = igt_random_offset(prng, addr, min(ce->vm->total, BIT_ULL(48)), 68 va->size, align); 69 err = i915_vma_pin(va, 0, 0, addr | PIN_OFFSET_FIXED | PIN_USER); 70 if (err) { 71 pr_err("Cannot pin at %llx+%llx\n", addr, va->size); 72 goto out; 73 } 74 GEM_BUG_ON(i915_vma_offset(va) != addr); 75 if (vb != va) { 76 vb_node = vb->node; 77 vb->node = va->node; /* overwrites the _same_ PTE */ 78 } 79 80 /* 81 * Now choose random dword at the 1st pinned page. 82 * 83 * SZ_64K pages on dg1 require that the whole PT be marked 84 * containing 64KiB entries. So we make sure that vma 85 * covers the whole PT, despite being randomly aligned to 64KiB 86 * and restrict our sampling to the 2MiB PT within where 87 * we know that we will be using 64KiB pages. 88 */ 89 if (align == SZ_64K) 90 addr = round_up(addr, SZ_2M); 91 addr = igt_random_offset(prng, addr, addr + align, 8, 8); 92 93 if (va != vb) 94 pr_info("%s(%s): Sampling %llx, with alignment %llx, using PTE size %x (phys %x, sg %x), invalidate:%llx+%llx\n", 95 ce->engine->name, va->obj->mm.region->name ?: "smem", 96 addr, align, va->resource->page_sizes_gtt, 97 va->page_sizes.phys, va->page_sizes.sg, 98 addr & -length, length); 99 100 cs = i915_gem_object_pin_map_unlocked(batch, I915_MAP_WC); 101 *cs++ = MI_NOOP; /* for later termination */ 102 /* 103 * Sample the target to see if we spot the updated backing store. 104 * Gen8 VCS compares immediate value with bitwise-and of two 105 * consecutive DWORDS pointed by addr, other gen/engines compare value 106 * with DWORD pointed by addr. Moreover we want to exercise DWORD size 107 * invalidations. To fulfill all these requirements below values 108 * have been chosen. 109 */ 110 *cs++ = MI_CONDITIONAL_BATCH_BUFFER_END | MI_DO_COMPARE | 2; 111 *cs++ = 0; /* break if *addr == 0 */ 112 *cs++ = lower_32_bits(addr); 113 *cs++ = upper_32_bits(addr); 114 vma_set_qw(va, addr, -1); 115 vma_set_qw(vb, addr, 0); 116 117 /* Keep sampling until we get bored */ 118 *cs++ = MI_BATCH_BUFFER_START | BIT(8) | 1; 119 *cs++ = lower_32_bits(i915_vma_offset(vma)); 120 *cs++ = upper_32_bits(i915_vma_offset(vma)); 121 122 i915_gem_object_flush_map(batch); 123 124 rq = i915_request_create(ce); 125 if (IS_ERR(rq)) { 126 err = PTR_ERR(rq); 127 goto out_va; 128 } 129 130 err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), 0, 0); 131 if (err) { 132 i915_request_add(rq); 133 goto out_va; 134 } 135 136 i915_request_get(rq); 137 i915_request_add(rq); 138 139 /* 140 * Short sleep to sanitycheck the batch is spinning before we begin. 141 * FIXME: Why is GSC so slow? 142 */ 143 if (ce->engine->class == OTHER_CLASS) 144 msleep(200); 145 else 146 msleep(10); 147 148 if (va == vb) { 149 if (!i915_request_completed(rq)) { 150 pr_err("%s(%s): Semaphore sanitycheck failed %llx, with alignment %llx, using PTE size %x (phys %x, sg %x)\n", 151 ce->engine->name, va->obj->mm.region->name ?: "smem", 152 addr, align, va->resource->page_sizes_gtt, 153 va->page_sizes.phys, va->page_sizes.sg); 154 err = -EIO; 155 } 156 } else if (!i915_request_completed(rq)) { 157 struct i915_vma_resource vb_res = { 158 .bi.pages = vb->obj->mm.pages, 159 .bi.page_sizes = vb->obj->mm.page_sizes, 160 .start = i915_vma_offset(vb), 161 .vma_size = i915_vma_size(vb) 162 }; 163 unsigned int pte_flags = 0; 164 165 /* Flip the PTE between A and B */ 166 if (i915_gem_object_is_lmem(vb->obj)) 167 pte_flags |= PTE_LM; 168 ce->vm->insert_entries(ce->vm, &vb_res, pat_index, pte_flags); 169 170 /* Flush the PTE update to concurrent HW */ 171 tlbinv(ce->vm, addr & -length, length); 172 173 if (wait_for(i915_request_completed(rq), HZ / 2)) { 174 pr_err("%s: Request did not complete; the COND_BBE did not read the updated PTE\n", 175 ce->engine->name); 176 err = -EINVAL; 177 } 178 } else { 179 pr_err("Spinner ended unexpectedly\n"); 180 err = -EIO; 181 } 182 i915_request_put(rq); 183 184 cs = page_mask_bits(batch->mm.mapping); 185 *cs = MI_BATCH_BUFFER_END; 186 wmb(); 187 188 out_va: 189 if (vb != va) 190 vb->node = vb_node; 191 i915_vma_unpin(va); 192 if (i915_vma_unbind_unlocked(va)) 193 err = -EIO; 194 out: 195 i915_gem_object_put(batch); 196 return err; 197 } 198 199 static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt) 200 { 201 struct intel_memory_region *mr = gt->i915->mm.regions[INTEL_REGION_LMEM_0]; 202 resource_size_t size = SZ_1G; 203 204 /* 205 * Allocation of largest possible page size allows to test all types 206 * of pages. To succeed with both allocations, especially in case of Small 207 * BAR, try to allocate no more than quarter of mappable memory. 208 */ 209 if (mr && size > resource_size(&mr->io) / 4) 210 size = resource_size(&mr->io) / 4; 211 212 return i915_gem_object_create_lmem(gt->i915, size, I915_BO_ALLOC_CONTIGUOUS); 213 } 214 215 static struct drm_i915_gem_object *create_smem(struct intel_gt *gt) 216 { 217 /* 218 * SZ_64K pages require covering the whole 2M PT (gen8 to tgl/dg1). 219 * While that does not require the whole 2M block to be contiguous 220 * it is easier to make it so, since we need that for SZ_2M pagees. 221 * Since we randomly offset the start of the vma, we need a 4M object 222 * so that there is a 2M range within it is suitable for SZ_64K PTE. 223 */ 224 return i915_gem_object_create_internal(gt->i915, SZ_4M); 225 } 226 227 static int 228 mem_tlbinv(struct intel_gt *gt, 229 struct drm_i915_gem_object *(*create_fn)(struct intel_gt *), 230 void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length)) 231 { 232 unsigned int ppgtt_size = RUNTIME_INFO(gt->i915)->ppgtt_size; 233 struct intel_engine_cs *engine; 234 struct drm_i915_gem_object *A, *B; 235 struct i915_ppgtt *ppgtt; 236 struct i915_vma *va, *vb; 237 enum intel_engine_id id; 238 I915_RND_STATE(prng); 239 void *vaddr; 240 int err; 241 242 /* 243 * Check that the TLB invalidate is able to revoke an active 244 * page. We load a page into a spinning COND_BBE loop and then 245 * remap that page to a new physical address. The old address, and 246 * so the loop keeps spinning, is retained in the TLB cache until 247 * we issue an invalidate. 248 */ 249 250 A = create_fn(gt); 251 if (IS_ERR(A)) 252 return PTR_ERR(A); 253 254 vaddr = i915_gem_object_pin_map_unlocked(A, I915_MAP_WC); 255 if (IS_ERR(vaddr)) { 256 err = PTR_ERR(vaddr); 257 goto out_a; 258 } 259 260 B = create_fn(gt); 261 if (IS_ERR(B)) { 262 err = PTR_ERR(B); 263 goto out_a; 264 } 265 266 vaddr = i915_gem_object_pin_map_unlocked(B, I915_MAP_WC); 267 if (IS_ERR(vaddr)) { 268 err = PTR_ERR(vaddr); 269 goto out_b; 270 } 271 272 GEM_BUG_ON(A->base.size != B->base.size); 273 if ((A->mm.page_sizes.phys | B->mm.page_sizes.phys) & (A->base.size - 1)) 274 pr_warn("Failed to allocate contiguous pages for size %zx\n", 275 A->base.size); 276 277 ppgtt = i915_ppgtt_create(gt, 0); 278 if (IS_ERR(ppgtt)) { 279 err = PTR_ERR(ppgtt); 280 goto out_b; 281 } 282 283 va = i915_vma_instance(A, &ppgtt->vm, NULL); 284 if (IS_ERR(va)) { 285 err = PTR_ERR(va); 286 goto out_vm; 287 } 288 289 vb = i915_vma_instance(B, &ppgtt->vm, NULL); 290 if (IS_ERR(vb)) { 291 err = PTR_ERR(vb); 292 goto out_vm; 293 } 294 295 err = 0; 296 for_each_engine(engine, gt, id) { 297 struct i915_gem_ww_ctx ww; 298 struct intel_context *ce; 299 int bit; 300 301 ce = intel_context_create(engine); 302 if (IS_ERR(ce)) { 303 err = PTR_ERR(ce); 304 break; 305 } 306 307 i915_vm_put(ce->vm); 308 ce->vm = i915_vm_get(&ppgtt->vm); 309 310 for_i915_gem_ww(&ww, err, true) 311 err = intel_context_pin_ww(ce, &ww); 312 if (err) 313 goto err_put; 314 315 for_each_set_bit(bit, 316 (unsigned long *)&RUNTIME_INFO(gt->i915)->page_sizes, 317 BITS_PER_TYPE(RUNTIME_INFO(gt->i915)->page_sizes)) { 318 unsigned int len; 319 320 if (BIT_ULL(bit) < i915_vm_obj_min_alignment(va->vm, va->obj)) 321 continue; 322 323 /* sanitycheck the semaphore wake up */ 324 err = pte_tlbinv(ce, va, va, 325 BIT_ULL(bit), 326 NULL, SZ_4K, 327 &prng); 328 if (err) 329 goto err_unpin; 330 331 for (len = 2; len <= ppgtt_size; len = min(2 * len, ppgtt_size)) { 332 err = pte_tlbinv(ce, va, vb, 333 BIT_ULL(bit), 334 tlbinv, 335 BIT_ULL(len), 336 &prng); 337 if (err) 338 goto err_unpin; 339 if (len == ppgtt_size) 340 break; 341 } 342 } 343 err_unpin: 344 intel_context_unpin(ce); 345 err_put: 346 intel_context_put(ce); 347 if (err) 348 break; 349 } 350 351 if (igt_flush_test(gt->i915)) 352 err = -EIO; 353 354 out_vm: 355 i915_vm_put(&ppgtt->vm); 356 out_b: 357 i915_gem_object_put(B); 358 out_a: 359 i915_gem_object_put(A); 360 return err; 361 } 362 363 static void tlbinv_full(struct i915_address_space *vm, u64 addr, u64 length) 364 { 365 intel_gt_invalidate_tlb_full(vm->gt, intel_gt_tlb_seqno(vm->gt) | 1); 366 } 367 368 static int invalidate_full(void *arg) 369 { 370 struct intel_gt *gt = arg; 371 int err; 372 373 if (GRAPHICS_VER(gt->i915) < 8) 374 return 0; /* TLB invalidate not implemented */ 375 376 err = mem_tlbinv(gt, create_smem, tlbinv_full); 377 if (err == 0) 378 err = mem_tlbinv(gt, create_lmem, tlbinv_full); 379 if (err == -ENODEV || err == -ENXIO) 380 err = 0; 381 382 return err; 383 } 384 385 int intel_tlb_live_selftests(struct drm_i915_private *i915) 386 { 387 static const struct i915_subtest tests[] = { 388 SUBTEST(invalidate_full), 389 }; 390 struct intel_gt *gt; 391 unsigned int i; 392 393 for_each_gt(gt, i915, i) { 394 int err; 395 396 if (intel_gt_is_wedged(gt)) 397 continue; 398 399 err = intel_gt_live_subtests(tests, gt); 400 if (err) 401 return err; 402 } 403 404 return 0; 405 } 406