1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include <linux/dma-fence-array.h> 7 8 #include "xe_pt.h" 9 10 #include "regs/xe_gtt_defs.h" 11 #include "xe_bo.h" 12 #include "xe_device.h" 13 #include "xe_drm_client.h" 14 #include "xe_exec_queue.h" 15 #include "xe_gt.h" 16 #include "xe_gt_tlb_invalidation.h" 17 #include "xe_migrate.h" 18 #include "xe_pt_types.h" 19 #include "xe_pt_walk.h" 20 #include "xe_res_cursor.h" 21 #include "xe_sched_job.h" 22 #include "xe_sync.h" 23 #include "xe_trace.h" 24 #include "xe_ttm_stolen_mgr.h" 25 #include "xe_vm.h" 26 27 struct xe_pt_dir { 28 struct xe_pt pt; 29 /** @children: Array of page-table child nodes */ 30 struct xe_ptw *children[XE_PDES]; 31 /** @staging: Array of page-table staging nodes */ 32 struct xe_ptw *staging[XE_PDES]; 33 }; 34 35 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 36 #define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr)) 37 #define xe_pt_addr(__xe_pt) ((__xe_pt)->addr) 38 #else 39 #define xe_pt_set_addr(__xe_pt, __addr) 40 #define xe_pt_addr(__xe_pt) 0ull 41 #endif 42 43 static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48}; 44 static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48}; 45 46 #define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1) 47 48 static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) 49 { 50 return container_of(pt, struct xe_pt_dir, pt); 51 } 52 53 static struct xe_pt * 54 xe_pt_entry_staging(struct xe_pt_dir *pt_dir, unsigned int index) 55 { 56 return container_of(pt_dir->staging[index], struct xe_pt, base); 57 } 58 59 static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, 60 unsigned int level) 61 { 62 struct xe_device *xe = tile_to_xe(tile); 63 u16 pat_index = xe->pat.idx[XE_CACHE_WB]; 64 u8 id = tile->id; 65 66 if (!xe_vm_has_scratch(vm)) 67 return 0; 68 69 if (level > MAX_HUGEPTE_LEVEL) 70 return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, 71 0, pat_index); 72 73 return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | 74 XE_PTE_NULL; 75 } 76 77 static void xe_pt_free(struct xe_pt *pt) 78 { 79 if (pt->level) 80 kfree(as_xe_pt_dir(pt)); 81 else 82 kfree(pt); 83 } 84 85 /** 86 * xe_pt_create() - Create a page-table. 87 * @vm: The vm to create for. 88 * @tile: The tile to create for. 89 * @level: The page-table level. 90 * 91 * Allocate and initialize a single struct xe_pt metadata structure. Also 92 * create the corresponding page-table bo, but don't initialize it. If the 93 * level is grater than zero, then it's assumed to be a directory page- 94 * table and the directory structure is also allocated and initialized to 95 * NULL pointers. 96 * 97 * Return: A valid struct xe_pt pointer on success, Pointer error code on 98 * error. 99 */ 100 struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, 101 unsigned int level) 102 { 103 struct xe_pt *pt; 104 struct xe_bo *bo; 105 int err; 106 107 if (level) { 108 struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL); 109 110 pt = (dir) ? &dir->pt : NULL; 111 } else { 112 pt = kzalloc(sizeof(*pt), GFP_KERNEL); 113 } 114 if (!pt) 115 return ERR_PTR(-ENOMEM); 116 117 pt->level = level; 118 bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, 119 ttm_bo_type_kernel, 120 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 121 XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | 122 XE_BO_FLAG_PINNED | 123 XE_BO_FLAG_NO_RESV_EVICT | 124 XE_BO_FLAG_PAGETABLE); 125 if (IS_ERR(bo)) { 126 err = PTR_ERR(bo); 127 goto err_kfree; 128 } 129 pt->bo = bo; 130 pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; 131 pt->base.staging = level ? as_xe_pt_dir(pt)->staging : NULL; 132 133 if (vm->xef) 134 xe_drm_client_add_bo(vm->xef->client, pt->bo); 135 xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL); 136 137 return pt; 138 139 err_kfree: 140 xe_pt_free(pt); 141 return ERR_PTR(err); 142 } 143 ALLOW_ERROR_INJECTION(xe_pt_create, ERRNO); 144 145 /** 146 * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero 147 * entries. 148 * @tile: The tile the scratch pagetable of which to use. 149 * @vm: The vm we populate for. 150 * @pt: The pagetable the bo of which to initialize. 151 * 152 * Populate the page-table bo of @pt with entries pointing into the tile's 153 * scratch page-table tree if any. Otherwise populate with zeros. 154 */ 155 void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, 156 struct xe_pt *pt) 157 { 158 struct iosys_map *map = &pt->bo->vmap; 159 u64 empty; 160 int i; 161 162 if (!xe_vm_has_scratch(vm)) { 163 /* 164 * FIXME: Some memory is allocated already allocated to zero? 165 * Find out which memory that is and avoid this memset... 166 */ 167 xe_map_memset(vm->xe, map, 0, 0, SZ_4K); 168 } else { 169 empty = __xe_pt_empty_pte(tile, vm, pt->level); 170 for (i = 0; i < XE_PDES; i++) 171 xe_pt_write(vm->xe, map, i, empty); 172 } 173 } 174 175 /** 176 * xe_pt_shift() - Return the ilog2 value of the size of the address range of 177 * a page-table at a certain level. 178 * @level: The level. 179 * 180 * Return: The ilog2 value of the size of the address range of a page-table 181 * at level @level. 182 */ 183 unsigned int xe_pt_shift(unsigned int level) 184 { 185 return XE_PTE_SHIFT + XE_PDE_SHIFT * level; 186 } 187 188 /** 189 * xe_pt_destroy() - Destroy a page-table tree. 190 * @pt: The root of the page-table tree to destroy. 191 * @flags: vm flags. Currently unused. 192 * @deferred: List head of lockless list for deferred putting. NULL for 193 * immediate putting. 194 * 195 * Puts the page-table bo, recursively calls xe_pt_destroy on all children 196 * and finally frees @pt. TODO: Can we remove the @flags argument? 197 */ 198 void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) 199 { 200 int i; 201 202 if (!pt) 203 return; 204 205 XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list)); 206 xe_bo_unpin(pt->bo); 207 xe_bo_put_deferred(pt->bo, deferred); 208 209 if (pt->level > 0 && pt->num_live) { 210 struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); 211 212 for (i = 0; i < XE_PDES; i++) { 213 if (xe_pt_entry_staging(pt_dir, i)) 214 xe_pt_destroy(xe_pt_entry_staging(pt_dir, i), flags, 215 deferred); 216 } 217 } 218 xe_pt_free(pt); 219 } 220 221 /** 222 * DOC: Pagetable building 223 * 224 * Below we use the term "page-table" for both page-directories, containing 225 * pointers to lower level page-directories or page-tables, and level 0 226 * page-tables that contain only page-table-entries pointing to memory pages. 227 * 228 * When inserting an address range in an already existing page-table tree 229 * there will typically be a set of page-tables that are shared with other 230 * address ranges, and a set that are private to this address range. 231 * The set of shared page-tables can be at most two per level, 232 * and those can't be updated immediately because the entries of those 233 * page-tables may still be in use by the gpu for other mappings. Therefore 234 * when inserting entries into those, we instead stage those insertions by 235 * adding insertion data into struct xe_vm_pgtable_update structures. This 236 * data, (subtrees for the cpu and page-table-entries for the gpu) is then 237 * added in a separate commit step. CPU-data is committed while still under the 238 * vm lock, the object lock and for userptr, the notifier lock in read mode. 239 * The GPU async data is committed either by the GPU or CPU after fulfilling 240 * relevant dependencies. 241 * For non-shared page-tables (and, in fact, for shared ones that aren't 242 * existing at the time of staging), we add the data in-place without the 243 * special update structures. This private part of the page-table tree will 244 * remain disconnected from the vm page-table tree until data is committed to 245 * the shared page tables of the vm tree in the commit phase. 246 */ 247 248 struct xe_pt_update { 249 /** @update: The update structure we're building for this parent. */ 250 struct xe_vm_pgtable_update *update; 251 /** @parent: The parent. Used to detect a parent change. */ 252 struct xe_pt *parent; 253 /** @preexisting: Whether the parent was pre-existing or allocated */ 254 bool preexisting; 255 }; 256 257 struct xe_pt_stage_bind_walk { 258 /** base: The base class. */ 259 struct xe_pt_walk base; 260 261 /* Input parameters for the walk */ 262 /** @vm: The vm we're building for. */ 263 struct xe_vm *vm; 264 /** @tile: The tile we're building for. */ 265 struct xe_tile *tile; 266 /** @default_pte: PTE flag only template. No address is associated */ 267 u64 default_pte; 268 /** @dma_offset: DMA offset to add to the PTE. */ 269 u64 dma_offset; 270 /** 271 * @needs_64k: This address range enforces 64K alignment and 272 * granularity. 273 */ 274 bool needs_64K; 275 /** 276 * @vma: VMA being mapped 277 */ 278 struct xe_vma *vma; 279 280 /* Also input, but is updated during the walk*/ 281 /** @curs: The DMA address cursor. */ 282 struct xe_res_cursor *curs; 283 /** @va_curs_start: The Virtual address corresponding to @curs->start */ 284 u64 va_curs_start; 285 286 /* Output */ 287 struct xe_walk_update { 288 /** @wupd.entries: Caller provided storage. */ 289 struct xe_vm_pgtable_update *entries; 290 /** @wupd.num_used_entries: Number of update @entries used. */ 291 unsigned int num_used_entries; 292 /** @wupd.updates: Tracks the update entry at a given level */ 293 struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1]; 294 } wupd; 295 296 /* Walk state */ 297 /** 298 * @l0_end_addr: The end address of the current l0 leaf. Used for 299 * 64K granularity detection. 300 */ 301 u64 l0_end_addr; 302 /** @addr_64K: The start address of the current 64K chunk. */ 303 u64 addr_64K; 304 /** @found_64: Whether @add_64K actually points to a 64K chunk. */ 305 bool found_64K; 306 }; 307 308 static int 309 xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, 310 pgoff_t offset, bool alloc_entries) 311 { 312 struct xe_pt_update *upd = &wupd->updates[parent->level]; 313 struct xe_vm_pgtable_update *entry; 314 315 /* 316 * For *each level*, we could only have one active 317 * struct xt_pt_update at any one time. Once we move on to a 318 * new parent and page-directory, the old one is complete, and 319 * updates are either already stored in the build tree or in 320 * @wupd->entries 321 */ 322 if (likely(upd->parent == parent)) 323 return 0; 324 325 upd->parent = parent; 326 upd->preexisting = true; 327 328 if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1) 329 return -EINVAL; 330 331 entry = wupd->entries + wupd->num_used_entries++; 332 upd->update = entry; 333 entry->ofs = offset; 334 entry->pt_bo = parent->bo; 335 entry->pt = parent; 336 entry->flags = 0; 337 entry->qwords = 0; 338 entry->pt_bo->update_index = -1; 339 340 if (alloc_entries) { 341 entry->pt_entries = kmalloc_array(XE_PDES, 342 sizeof(*entry->pt_entries), 343 GFP_KERNEL); 344 if (!entry->pt_entries) 345 return -ENOMEM; 346 } 347 348 return 0; 349 } 350 351 /* 352 * NOTE: This is a very frequently called function so we allow ourselves 353 * to annotate (using branch prediction hints) the fastpath of updating a 354 * non-pre-existing pagetable with leaf ptes. 355 */ 356 static int 357 xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, 358 pgoff_t offset, struct xe_pt *xe_child, u64 pte) 359 { 360 struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level]; 361 struct xe_pt_update *child_upd = xe_child ? 362 &xe_walk->wupd.updates[xe_child->level] : NULL; 363 int ret; 364 365 ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true); 366 if (unlikely(ret)) 367 return ret; 368 369 /* 370 * Register this new pagetable so that it won't be recognized as 371 * a shared pagetable by a subsequent insertion. 372 */ 373 if (unlikely(child_upd)) { 374 child_upd->update = NULL; 375 child_upd->parent = xe_child; 376 child_upd->preexisting = false; 377 } 378 379 if (likely(!upd->preexisting)) { 380 /* Continue building a non-connected subtree. */ 381 struct iosys_map *map = &parent->bo->vmap; 382 383 if (unlikely(xe_child)) { 384 parent->base.children[offset] = &xe_child->base; 385 parent->base.staging[offset] = &xe_child->base; 386 } 387 388 xe_pt_write(xe_walk->vm->xe, map, offset, pte); 389 parent->num_live++; 390 } else { 391 /* Shared pt. Stage update. */ 392 unsigned int idx; 393 struct xe_vm_pgtable_update *entry = upd->update; 394 395 idx = offset - entry->ofs; 396 entry->pt_entries[idx].pt = xe_child; 397 entry->pt_entries[idx].pte = pte; 398 entry->qwords++; 399 } 400 401 return 0; 402 } 403 404 static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, 405 struct xe_pt_stage_bind_walk *xe_walk) 406 { 407 u64 size, dma; 408 409 if (level > MAX_HUGEPTE_LEVEL) 410 return false; 411 412 /* Does the virtual range requested cover a huge pte? */ 413 if (!xe_pt_covers(addr, next, level, &xe_walk->base)) 414 return false; 415 416 /* Does the DMA segment cover the whole pte? */ 417 if (next - xe_walk->va_curs_start > xe_walk->curs->size) 418 return false; 419 420 /* null VMA's do not have dma addresses */ 421 if (xe_vma_is_null(xe_walk->vma)) 422 return true; 423 424 /* Is the DMA address huge PTE size aligned? */ 425 size = next - addr; 426 dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); 427 428 return IS_ALIGNED(dma, size); 429 } 430 431 /* 432 * Scan the requested mapping to check whether it can be done entirely 433 * with 64K PTEs. 434 */ 435 static bool 436 xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) 437 { 438 struct xe_res_cursor curs = *xe_walk->curs; 439 440 if (!IS_ALIGNED(addr, SZ_64K)) 441 return false; 442 443 if (next > xe_walk->l0_end_addr) 444 return false; 445 446 /* null VMA's do not have dma addresses */ 447 if (xe_vma_is_null(xe_walk->vma)) 448 return true; 449 450 xe_res_next(&curs, addr - xe_walk->va_curs_start); 451 for (; addr < next; addr += SZ_64K) { 452 if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K) 453 return false; 454 455 xe_res_next(&curs, SZ_64K); 456 } 457 458 return addr == next; 459 } 460 461 /* 462 * For non-compact "normal" 4K level-0 pagetables, we want to try to group 463 * addresses together in 64K-contigous regions to add a 64K TLB hint for the 464 * device to the PTE. 465 * This function determines whether the address is part of such a 466 * segment. For VRAM in normal pagetables, this is strictly necessary on 467 * some devices. 468 */ 469 static bool 470 xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) 471 { 472 /* Address is within an already found 64k region */ 473 if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K) 474 return true; 475 476 xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk); 477 xe_walk->addr_64K = addr; 478 479 return xe_walk->found_64K; 480 } 481 482 static int 483 xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, 484 unsigned int level, u64 addr, u64 next, 485 struct xe_ptw **child, 486 enum page_walk_action *action, 487 struct xe_pt_walk *walk) 488 { 489 struct xe_pt_stage_bind_walk *xe_walk = 490 container_of(walk, typeof(*xe_walk), base); 491 u16 pat_index = xe_walk->vma->pat_index; 492 struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); 493 struct xe_vm *vm = xe_walk->vm; 494 struct xe_pt *xe_child; 495 bool covers; 496 int ret = 0; 497 u64 pte; 498 499 /* Is this a leaf entry ?*/ 500 if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { 501 struct xe_res_cursor *curs = xe_walk->curs; 502 bool is_null = xe_vma_is_null(xe_walk->vma); 503 504 XE_WARN_ON(xe_walk->va_curs_start != addr); 505 506 pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : 507 xe_res_dma(curs) + xe_walk->dma_offset, 508 xe_walk->vma, pat_index, level); 509 pte |= xe_walk->default_pte; 510 511 /* 512 * Set the XE_PTE_PS64 hint if possible, otherwise if 513 * this device *requires* 64K PTE size for VRAM, fail. 514 */ 515 if (level == 0 && !xe_parent->is_compact) { 516 if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { 517 xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K; 518 pte |= XE_PTE_PS64; 519 } else if (XE_WARN_ON(xe_walk->needs_64K)) { 520 return -EINVAL; 521 } 522 } 523 524 ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); 525 if (unlikely(ret)) 526 return ret; 527 528 if (!is_null) 529 xe_res_next(curs, next - addr); 530 xe_walk->va_curs_start = next; 531 xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); 532 *action = ACTION_CONTINUE; 533 534 return ret; 535 } 536 537 /* 538 * Descending to lower level. Determine if we need to allocate a 539 * new page table or -directory, which we do if there is no 540 * previous one or there is one we can completely replace. 541 */ 542 if (level == 1) { 543 walk->shifts = xe_normal_pt_shifts; 544 xe_walk->l0_end_addr = next; 545 } 546 547 covers = xe_pt_covers(addr, next, level, &xe_walk->base); 548 if (covers || !*child) { 549 u64 flags = 0; 550 551 xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); 552 if (IS_ERR(xe_child)) 553 return PTR_ERR(xe_child); 554 555 xe_pt_set_addr(xe_child, 556 round_down(addr, 1ull << walk->shifts[level])); 557 558 if (!covers) 559 xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child); 560 561 *child = &xe_child->base; 562 563 /* 564 * Prefer the compact pagetable layout for L0 if possible. Only 565 * possible if VMA covers entire 2MB region as compact 64k and 566 * 4k pages cannot be mixed within a 2MB region. 567 * TODO: Suballocate the pt bo to avoid wasting a lot of 568 * memory. 569 */ 570 if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 && 571 covers && xe_pt_scan_64K(addr, next, xe_walk)) { 572 walk->shifts = xe_compact_pt_shifts; 573 xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT; 574 flags |= XE_PDE_64K; 575 xe_child->is_compact = true; 576 } 577 578 pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags; 579 ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, 580 pte); 581 } 582 583 *action = ACTION_SUBTREE; 584 return ret; 585 } 586 587 static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { 588 .pt_entry = xe_pt_stage_bind_entry, 589 }; 590 591 /** 592 * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address 593 * range. 594 * @tile: The tile we're building for. 595 * @vma: The vma indicating the address range. 596 * @entries: Storage for the update entries used for connecting the tree to 597 * the main tree at commit time. 598 * @num_entries: On output contains the number of @entries used. 599 * 600 * This function builds a disconnected page-table tree for a given address 601 * range. The tree is connected to the main vm tree for the gpu using 602 * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind(). 603 * The function builds xe_vm_pgtable_update structures for already existing 604 * shared page-tables, and non-existing shared and non-shared page-tables 605 * are built and populated directly. 606 * 607 * Return 0 on success, negative error code on error. 608 */ 609 static int 610 xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, 611 struct xe_vm_pgtable_update *entries, u32 *num_entries) 612 { 613 struct xe_device *xe = tile_to_xe(tile); 614 struct xe_bo *bo = xe_vma_bo(vma); 615 bool is_devmem = !xe_vma_is_userptr(vma) && bo && 616 (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)); 617 struct xe_res_cursor curs; 618 struct xe_pt_stage_bind_walk xe_walk = { 619 .base = { 620 .ops = &xe_pt_stage_bind_ops, 621 .shifts = xe_normal_pt_shifts, 622 .max_level = XE_PT_HIGHEST_LEVEL, 623 .staging = true, 624 }, 625 .vm = xe_vma_vm(vma), 626 .tile = tile, 627 .curs = &curs, 628 .va_curs_start = xe_vma_start(vma), 629 .vma = vma, 630 .wupd.entries = entries, 631 .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem, 632 }; 633 struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; 634 int ret; 635 636 /** 637 * Default atomic expectations for different allocation scenarios are as follows: 638 * 639 * 1. Traditional API: When the VM is not in LR mode: 640 * - Device atomics are expected to function with all allocations. 641 * 642 * 2. Compute/SVM API: When the VM is in LR mode: 643 * - Device atomics are the default behavior when the bo is placed in a single region. 644 * - In all other cases device atomics will be disabled with AE=0 until an application 645 * request differently using a ioctl like madvise. 646 */ 647 if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { 648 if (xe_vm_in_lr_mode(xe_vma_vm(vma))) { 649 if (bo && xe_bo_has_single_placement(bo)) 650 xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; 651 /** 652 * If a SMEM+LMEM allocation is backed by SMEM, a device 653 * atomics will cause a gpu page fault and which then 654 * gets migrated to LMEM, bind such allocations with 655 * device atomics enabled. 656 */ 657 else if (is_devmem && !xe_bo_has_single_placement(bo)) 658 xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; 659 } else { 660 xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; 661 } 662 663 /** 664 * Unset AE if the platform(PVC) doesn't support it on an 665 * allocation 666 */ 667 if (!xe->info.has_device_atomics_on_smem && !is_devmem) 668 xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE; 669 } 670 671 if (is_devmem) { 672 xe_walk.default_pte |= XE_PPGTT_PTE_DM; 673 xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); 674 } 675 676 if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) 677 xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); 678 679 xe_bo_assert_held(bo); 680 681 if (!xe_vma_is_null(vma)) { 682 if (xe_vma_is_userptr(vma)) 683 xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, 684 xe_vma_size(vma), &curs); 685 else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) 686 xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), 687 xe_vma_size(vma), &curs); 688 else 689 xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma), 690 xe_vma_size(vma), &curs); 691 } else { 692 curs.size = xe_vma_size(vma); 693 } 694 695 ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma), 696 xe_vma_end(vma), &xe_walk.base); 697 698 *num_entries = xe_walk.wupd.num_used_entries; 699 return ret; 700 } 701 702 /** 703 * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a 704 * shared pagetable. 705 * @addr: The start address within the non-shared pagetable. 706 * @end: The end address within the non-shared pagetable. 707 * @level: The level of the non-shared pagetable. 708 * @walk: Walk info. The function adjusts the walk action. 709 * @action: next action to perform (see enum page_walk_action) 710 * @offset: Ignored on input, First non-shared entry on output. 711 * @end_offset: Ignored on input, Last non-shared entry + 1 on output. 712 * 713 * A non-shared page-table has some entries that belong to the address range 714 * and others that don't. This function determines the entries that belong 715 * fully to the address range. Depending on level, some entries may 716 * partially belong to the address range (that can't happen at level 0). 717 * The function detects that and adjust those offsets to not include those 718 * partial entries. Iff it does detect partial entries, we know that there must 719 * be shared page tables also at lower levels, so it adjusts the walk action 720 * accordingly. 721 * 722 * Return: true if there were non-shared entries, false otherwise. 723 */ 724 static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level, 725 struct xe_pt_walk *walk, 726 enum page_walk_action *action, 727 pgoff_t *offset, pgoff_t *end_offset) 728 { 729 u64 size = 1ull << walk->shifts[level]; 730 731 *offset = xe_pt_offset(addr, level, walk); 732 *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset; 733 734 if (!level) 735 return true; 736 737 /* 738 * If addr or next are not size aligned, there are shared pts at lower 739 * level, so in that case traverse down the subtree 740 */ 741 *action = ACTION_CONTINUE; 742 if (!IS_ALIGNED(addr, size)) { 743 *action = ACTION_SUBTREE; 744 (*offset)++; 745 } 746 747 if (!IS_ALIGNED(end, size)) { 748 *action = ACTION_SUBTREE; 749 (*end_offset)--; 750 } 751 752 return *end_offset > *offset; 753 } 754 755 struct xe_pt_zap_ptes_walk { 756 /** @base: The walk base-class */ 757 struct xe_pt_walk base; 758 759 /* Input parameters for the walk */ 760 /** @tile: The tile we're building for */ 761 struct xe_tile *tile; 762 763 /* Output */ 764 /** @needs_invalidate: Whether we need to invalidate TLB*/ 765 bool needs_invalidate; 766 }; 767 768 static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, 769 unsigned int level, u64 addr, u64 next, 770 struct xe_ptw **child, 771 enum page_walk_action *action, 772 struct xe_pt_walk *walk) 773 { 774 struct xe_pt_zap_ptes_walk *xe_walk = 775 container_of(walk, typeof(*xe_walk), base); 776 struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); 777 pgoff_t end_offset; 778 779 XE_WARN_ON(!*child); 780 XE_WARN_ON(!level); 781 782 /* 783 * Note that we're called from an entry callback, and we're dealing 784 * with the child of that entry rather than the parent, so need to 785 * adjust level down. 786 */ 787 if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset, 788 &end_offset)) { 789 xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap, 790 offset * sizeof(u64), 0, 791 (end_offset - offset) * sizeof(u64)); 792 xe_walk->needs_invalidate = true; 793 } 794 795 return 0; 796 } 797 798 static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { 799 .pt_entry = xe_pt_zap_ptes_entry, 800 }; 801 802 /** 803 * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range 804 * @tile: The tile we're zapping for. 805 * @vma: GPU VMA detailing address range. 806 * 807 * Eviction and Userptr invalidation needs to be able to zap the 808 * gpu ptes of a given address range in pagefaulting mode. 809 * In order to be able to do that, that function needs access to the shared 810 * page-table entrieaso it can either clear the leaf PTEs or 811 * clear the pointers to lower-level page-tables. The caller is required 812 * to hold the necessary locks to ensure neither the page-table connectivity 813 * nor the page-table entries of the range is updated from under us. 814 * 815 * Return: Whether ptes were actually updated and a TLB invalidation is 816 * required. 817 */ 818 bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) 819 { 820 struct xe_pt_zap_ptes_walk xe_walk = { 821 .base = { 822 .ops = &xe_pt_zap_ptes_ops, 823 .shifts = xe_normal_pt_shifts, 824 .max_level = XE_PT_HIGHEST_LEVEL, 825 }, 826 .tile = tile, 827 }; 828 struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; 829 u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated); 830 831 if (!(pt_mask & BIT(tile->id))) 832 return false; 833 834 (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), 835 xe_vma_end(vma), &xe_walk.base); 836 837 return xe_walk.needs_invalidate; 838 } 839 840 static void 841 xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile, 842 struct iosys_map *map, void *data, 843 u32 qword_ofs, u32 num_qwords, 844 const struct xe_vm_pgtable_update *update) 845 { 846 struct xe_pt_entry *ptes = update->pt_entries; 847 u64 *ptr = data; 848 u32 i; 849 850 for (i = 0; i < num_qwords; i++) { 851 if (map) 852 xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * 853 sizeof(u64), u64, ptes[i].pte); 854 else 855 ptr[i] = ptes[i].pte; 856 } 857 } 858 859 static void xe_pt_cancel_bind(struct xe_vma *vma, 860 struct xe_vm_pgtable_update *entries, 861 u32 num_entries) 862 { 863 u32 i, j; 864 865 for (i = 0; i < num_entries; i++) { 866 struct xe_pt *pt = entries[i].pt; 867 868 if (!pt) 869 continue; 870 871 if (pt->level) { 872 for (j = 0; j < entries[i].qwords; j++) 873 xe_pt_destroy(entries[i].pt_entries[j].pt, 874 xe_vma_vm(vma)->flags, NULL); 875 } 876 877 kfree(entries[i].pt_entries); 878 entries[i].pt_entries = NULL; 879 entries[i].qwords = 0; 880 } 881 } 882 883 static void xe_pt_commit_prepare_locks_assert(struct xe_vma *vma) 884 { 885 struct xe_vm *vm = xe_vma_vm(vma); 886 887 lockdep_assert_held(&vm->lock); 888 889 if (!xe_vma_is_userptr(vma) && !xe_vma_is_null(vma)) 890 dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); 891 892 xe_vm_assert_held(vm); 893 } 894 895 static void xe_pt_commit_locks_assert(struct xe_vma *vma) 896 { 897 struct xe_vm *vm = xe_vma_vm(vma); 898 899 xe_pt_commit_prepare_locks_assert(vma); 900 901 if (xe_vma_is_userptr(vma)) 902 lockdep_assert_held_read(&vm->userptr.notifier_lock); 903 } 904 905 static void xe_pt_commit(struct xe_vma *vma, 906 struct xe_vm_pgtable_update *entries, 907 u32 num_entries, struct llist_head *deferred) 908 { 909 u32 i, j; 910 911 xe_pt_commit_locks_assert(vma); 912 913 for (i = 0; i < num_entries; i++) { 914 struct xe_pt *pt = entries[i].pt; 915 struct xe_pt_dir *pt_dir; 916 917 if (!pt->level) 918 continue; 919 920 pt_dir = as_xe_pt_dir(pt); 921 for (j = 0; j < entries[i].qwords; j++) { 922 struct xe_pt *oldpte = entries[i].pt_entries[j].pt; 923 int j_ = j + entries[i].ofs; 924 925 pt_dir->children[j_] = pt_dir->staging[j_]; 926 xe_pt_destroy(oldpte, xe_vma_vm(vma)->flags, deferred); 927 } 928 } 929 } 930 931 static void xe_pt_abort_bind(struct xe_vma *vma, 932 struct xe_vm_pgtable_update *entries, 933 u32 num_entries, bool rebind) 934 { 935 int i, j; 936 937 xe_pt_commit_prepare_locks_assert(vma); 938 939 for (i = num_entries - 1; i >= 0; --i) { 940 struct xe_pt *pt = entries[i].pt; 941 struct xe_pt_dir *pt_dir; 942 943 if (!rebind) 944 pt->num_live -= entries[i].qwords; 945 946 if (!pt->level) 947 continue; 948 949 pt_dir = as_xe_pt_dir(pt); 950 for (j = 0; j < entries[i].qwords; j++) { 951 u32 j_ = j + entries[i].ofs; 952 struct xe_pt *newpte = xe_pt_entry_staging(pt_dir, j_); 953 struct xe_pt *oldpte = entries[i].pt_entries[j].pt; 954 955 pt_dir->staging[j_] = oldpte ? &oldpte->base : 0; 956 xe_pt_destroy(newpte, xe_vma_vm(vma)->flags, NULL); 957 } 958 } 959 } 960 961 static void xe_pt_commit_prepare_bind(struct xe_vma *vma, 962 struct xe_vm_pgtable_update *entries, 963 u32 num_entries, bool rebind) 964 { 965 u32 i, j; 966 967 xe_pt_commit_prepare_locks_assert(vma); 968 969 for (i = 0; i < num_entries; i++) { 970 struct xe_pt *pt = entries[i].pt; 971 struct xe_pt_dir *pt_dir; 972 973 if (!rebind) 974 pt->num_live += entries[i].qwords; 975 976 if (!pt->level) 977 continue; 978 979 pt_dir = as_xe_pt_dir(pt); 980 for (j = 0; j < entries[i].qwords; j++) { 981 u32 j_ = j + entries[i].ofs; 982 struct xe_pt *newpte = entries[i].pt_entries[j].pt; 983 struct xe_pt *oldpte = NULL; 984 985 if (xe_pt_entry_staging(pt_dir, j_)) 986 oldpte = xe_pt_entry_staging(pt_dir, j_); 987 988 pt_dir->staging[j_] = &newpte->base; 989 entries[i].pt_entries[j].pt = oldpte; 990 } 991 } 992 } 993 994 static void xe_pt_free_bind(struct xe_vm_pgtable_update *entries, 995 u32 num_entries) 996 { 997 u32 i; 998 999 for (i = 0; i < num_entries; i++) 1000 kfree(entries[i].pt_entries); 1001 } 1002 1003 static int 1004 xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, 1005 struct xe_vm_pgtable_update *entries, u32 *num_entries) 1006 { 1007 int err; 1008 1009 *num_entries = 0; 1010 err = xe_pt_stage_bind(tile, vma, entries, num_entries); 1011 if (!err) 1012 xe_tile_assert(tile, *num_entries); 1013 1014 return err; 1015 } 1016 1017 static void xe_vm_dbg_print_entries(struct xe_device *xe, 1018 const struct xe_vm_pgtable_update *entries, 1019 unsigned int num_entries, bool bind) 1020 #if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) 1021 { 1022 unsigned int i; 1023 1024 vm_dbg(&xe->drm, "%s: %u entries to update\n", bind ? "bind" : "unbind", 1025 num_entries); 1026 for (i = 0; i < num_entries; i++) { 1027 const struct xe_vm_pgtable_update *entry = &entries[i]; 1028 struct xe_pt *xe_pt = entry->pt; 1029 u64 page_size = 1ull << xe_pt_shift(xe_pt->level); 1030 u64 end; 1031 u64 start; 1032 1033 xe_assert(xe, !entry->pt->is_compact); 1034 start = entry->ofs * page_size; 1035 end = start + page_size * entry->qwords; 1036 vm_dbg(&xe->drm, 1037 "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n", 1038 i, xe_pt->level, entry->ofs, entry->qwords, 1039 xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0); 1040 } 1041 } 1042 #else 1043 {} 1044 #endif 1045 1046 static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs) 1047 { 1048 int i; 1049 1050 for (i = 0; i < num_syncs; i++) { 1051 struct dma_fence *fence = syncs[i].fence; 1052 1053 if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 1054 &fence->flags)) 1055 return false; 1056 } 1057 1058 return true; 1059 } 1060 1061 static int job_test_add_deps(struct xe_sched_job *job, 1062 struct dma_resv *resv, 1063 enum dma_resv_usage usage) 1064 { 1065 if (!job) { 1066 if (!dma_resv_test_signaled(resv, usage)) 1067 return -ETIME; 1068 1069 return 0; 1070 } 1071 1072 return xe_sched_job_add_deps(job, resv, usage); 1073 } 1074 1075 static int vma_add_deps(struct xe_vma *vma, struct xe_sched_job *job) 1076 { 1077 struct xe_bo *bo = xe_vma_bo(vma); 1078 1079 xe_bo_assert_held(bo); 1080 1081 if (bo && !bo->vm) 1082 return job_test_add_deps(job, bo->ttm.base.resv, 1083 DMA_RESV_USAGE_KERNEL); 1084 1085 return 0; 1086 } 1087 1088 static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op, 1089 struct xe_sched_job *job) 1090 { 1091 int err = 0; 1092 1093 switch (op->base.op) { 1094 case DRM_GPUVA_OP_MAP: 1095 if (!op->map.immediate && xe_vm_in_fault_mode(vm)) 1096 break; 1097 1098 err = vma_add_deps(op->map.vma, job); 1099 break; 1100 case DRM_GPUVA_OP_REMAP: 1101 if (op->remap.prev) 1102 err = vma_add_deps(op->remap.prev, job); 1103 if (!err && op->remap.next) 1104 err = vma_add_deps(op->remap.next, job); 1105 break; 1106 case DRM_GPUVA_OP_UNMAP: 1107 break; 1108 case DRM_GPUVA_OP_PREFETCH: 1109 err = vma_add_deps(gpuva_to_vma(op->base.prefetch.va), job); 1110 break; 1111 default: 1112 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 1113 } 1114 1115 return err; 1116 } 1117 1118 static int xe_pt_vm_dependencies(struct xe_sched_job *job, 1119 struct xe_vm *vm, 1120 struct xe_vma_ops *vops, 1121 struct xe_vm_pgtable_update_ops *pt_update_ops, 1122 struct xe_range_fence_tree *rftree) 1123 { 1124 struct xe_range_fence *rtfence; 1125 struct dma_fence *fence; 1126 struct xe_vma_op *op; 1127 int err = 0, i; 1128 1129 xe_vm_assert_held(vm); 1130 1131 if (!job && !no_in_syncs(vops->syncs, vops->num_syncs)) 1132 return -ETIME; 1133 1134 if (!job && !xe_exec_queue_is_idle(pt_update_ops->q)) 1135 return -ETIME; 1136 1137 if (pt_update_ops->wait_vm_bookkeep || pt_update_ops->wait_vm_kernel) { 1138 err = job_test_add_deps(job, xe_vm_resv(vm), 1139 pt_update_ops->wait_vm_bookkeep ? 1140 DMA_RESV_USAGE_BOOKKEEP : 1141 DMA_RESV_USAGE_KERNEL); 1142 if (err) 1143 return err; 1144 } 1145 1146 rtfence = xe_range_fence_tree_first(rftree, pt_update_ops->start, 1147 pt_update_ops->last); 1148 while (rtfence) { 1149 fence = rtfence->fence; 1150 1151 if (!dma_fence_is_signaled(fence)) { 1152 /* 1153 * Is this a CPU update? GPU is busy updating, so return 1154 * an error 1155 */ 1156 if (!job) 1157 return -ETIME; 1158 1159 dma_fence_get(fence); 1160 err = drm_sched_job_add_dependency(&job->drm, fence); 1161 if (err) 1162 return err; 1163 } 1164 1165 rtfence = xe_range_fence_tree_next(rtfence, 1166 pt_update_ops->start, 1167 pt_update_ops->last); 1168 } 1169 1170 list_for_each_entry(op, &vops->list, link) { 1171 err = op_add_deps(vm, op, job); 1172 if (err) 1173 return err; 1174 } 1175 1176 if (!(pt_update_ops->q->flags & EXEC_QUEUE_FLAG_KERNEL)) { 1177 if (job) 1178 err = xe_sched_job_last_fence_add_dep(job, vm); 1179 else 1180 err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); 1181 } 1182 1183 for (i = 0; job && !err && i < vops->num_syncs; i++) 1184 err = xe_sync_entry_add_deps(&vops->syncs[i], job); 1185 1186 return err; 1187 } 1188 1189 static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) 1190 { 1191 struct xe_vma_ops *vops = pt_update->vops; 1192 struct xe_vm *vm = vops->vm; 1193 struct xe_range_fence_tree *rftree = &vm->rftree[pt_update->tile_id]; 1194 struct xe_vm_pgtable_update_ops *pt_update_ops = 1195 &vops->pt_update_ops[pt_update->tile_id]; 1196 1197 return xe_pt_vm_dependencies(pt_update->job, vm, pt_update->vops, 1198 pt_update_ops, rftree); 1199 } 1200 1201 #ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT 1202 1203 static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) 1204 { 1205 u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2; 1206 static u32 count; 1207 1208 if (count++ % divisor == divisor - 1) { 1209 uvma->userptr.divisor = divisor << 1; 1210 return true; 1211 } 1212 1213 return false; 1214 } 1215 1216 #else 1217 1218 static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) 1219 { 1220 return false; 1221 } 1222 1223 #endif 1224 1225 static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, 1226 struct xe_vm_pgtable_update_ops *pt_update) 1227 { 1228 struct xe_userptr_vma *uvma; 1229 unsigned long notifier_seq; 1230 1231 lockdep_assert_held_read(&vm->userptr.notifier_lock); 1232 1233 if (!xe_vma_is_userptr(vma)) 1234 return 0; 1235 1236 uvma = to_userptr_vma(vma); 1237 if (xe_pt_userptr_inject_eagain(uvma)) 1238 xe_vma_userptr_force_invalidate(uvma); 1239 1240 notifier_seq = uvma->userptr.notifier_seq; 1241 1242 if (!mmu_interval_read_retry(&uvma->userptr.notifier, 1243 notifier_seq)) 1244 return 0; 1245 1246 if (xe_vm_in_fault_mode(vm)) 1247 return -EAGAIN; 1248 1249 /* 1250 * Just continue the operation since exec or rebind worker 1251 * will take care of rebinding. 1252 */ 1253 return 0; 1254 } 1255 1256 static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op, 1257 struct xe_vm_pgtable_update_ops *pt_update) 1258 { 1259 int err = 0; 1260 1261 lockdep_assert_held_read(&vm->userptr.notifier_lock); 1262 1263 switch (op->base.op) { 1264 case DRM_GPUVA_OP_MAP: 1265 if (!op->map.immediate && xe_vm_in_fault_mode(vm)) 1266 break; 1267 1268 err = vma_check_userptr(vm, op->map.vma, pt_update); 1269 break; 1270 case DRM_GPUVA_OP_REMAP: 1271 if (op->remap.prev) 1272 err = vma_check_userptr(vm, op->remap.prev, pt_update); 1273 if (!err && op->remap.next) 1274 err = vma_check_userptr(vm, op->remap.next, pt_update); 1275 break; 1276 case DRM_GPUVA_OP_UNMAP: 1277 break; 1278 case DRM_GPUVA_OP_PREFETCH: 1279 err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), 1280 pt_update); 1281 break; 1282 default: 1283 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 1284 } 1285 1286 return err; 1287 } 1288 1289 static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) 1290 { 1291 struct xe_vm *vm = pt_update->vops->vm; 1292 struct xe_vma_ops *vops = pt_update->vops; 1293 struct xe_vm_pgtable_update_ops *pt_update_ops = 1294 &vops->pt_update_ops[pt_update->tile_id]; 1295 struct xe_vma_op *op; 1296 int err; 1297 1298 err = xe_pt_pre_commit(pt_update); 1299 if (err) 1300 return err; 1301 1302 down_read(&vm->userptr.notifier_lock); 1303 1304 list_for_each_entry(op, &vops->list, link) { 1305 err = op_check_userptr(vm, op, pt_update_ops); 1306 if (err) { 1307 up_read(&vm->userptr.notifier_lock); 1308 break; 1309 } 1310 } 1311 1312 return err; 1313 } 1314 1315 struct invalidation_fence { 1316 struct xe_gt_tlb_invalidation_fence base; 1317 struct xe_gt *gt; 1318 struct dma_fence *fence; 1319 struct dma_fence_cb cb; 1320 struct work_struct work; 1321 u64 start; 1322 u64 end; 1323 u32 asid; 1324 }; 1325 1326 static void invalidation_fence_cb(struct dma_fence *fence, 1327 struct dma_fence_cb *cb) 1328 { 1329 struct invalidation_fence *ifence = 1330 container_of(cb, struct invalidation_fence, cb); 1331 struct xe_device *xe = gt_to_xe(ifence->gt); 1332 1333 trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base); 1334 if (!ifence->fence->error) { 1335 queue_work(system_wq, &ifence->work); 1336 } else { 1337 ifence->base.base.error = ifence->fence->error; 1338 xe_gt_tlb_invalidation_fence_signal(&ifence->base); 1339 } 1340 dma_fence_put(ifence->fence); 1341 } 1342 1343 static void invalidation_fence_work_func(struct work_struct *w) 1344 { 1345 struct invalidation_fence *ifence = 1346 container_of(w, struct invalidation_fence, work); 1347 struct xe_device *xe = gt_to_xe(ifence->gt); 1348 1349 trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base); 1350 xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, 1351 ifence->end, ifence->asid); 1352 } 1353 1354 static void invalidation_fence_init(struct xe_gt *gt, 1355 struct invalidation_fence *ifence, 1356 struct dma_fence *fence, 1357 u64 start, u64 end, u32 asid) 1358 { 1359 int ret; 1360 1361 trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); 1362 1363 xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); 1364 1365 ifence->fence = fence; 1366 ifence->gt = gt; 1367 ifence->start = start; 1368 ifence->end = end; 1369 ifence->asid = asid; 1370 1371 INIT_WORK(&ifence->work, invalidation_fence_work_func); 1372 ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); 1373 if (ret == -ENOENT) { 1374 dma_fence_put(ifence->fence); /* Usually dropped in CB */ 1375 invalidation_fence_work_func(&ifence->work); 1376 } else if (ret) { 1377 dma_fence_put(&ifence->base.base); /* Caller ref */ 1378 dma_fence_put(&ifence->base.base); /* Creation ref */ 1379 } 1380 1381 xe_gt_assert(gt, !ret || ret == -ENOENT); 1382 } 1383 1384 struct xe_pt_stage_unbind_walk { 1385 /** @base: The pagewalk base-class. */ 1386 struct xe_pt_walk base; 1387 1388 /* Input parameters for the walk */ 1389 /** @tile: The tile we're unbinding from. */ 1390 struct xe_tile *tile; 1391 1392 /** 1393 * @modified_start: Walk range start, modified to include any 1394 * shared pagetables that we're the only user of and can thus 1395 * treat as private. 1396 */ 1397 u64 modified_start; 1398 /** @modified_end: Walk range start, modified like @modified_start. */ 1399 u64 modified_end; 1400 1401 /* Output */ 1402 /* @wupd: Structure to track the page-table updates we're building */ 1403 struct xe_walk_update wupd; 1404 }; 1405 1406 /* 1407 * Check whether this range is the only one populating this pagetable, 1408 * and in that case, update the walk range checks so that higher levels don't 1409 * view us as a shared pagetable. 1410 */ 1411 static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, 1412 const struct xe_pt *child, 1413 enum page_walk_action *action, 1414 struct xe_pt_walk *walk) 1415 { 1416 struct xe_pt_stage_unbind_walk *xe_walk = 1417 container_of(walk, typeof(*xe_walk), base); 1418 unsigned int shift = walk->shifts[level]; 1419 u64 size = 1ull << shift; 1420 1421 if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) && 1422 ((next - addr) >> shift) == child->num_live) { 1423 u64 size = 1ull << walk->shifts[level + 1]; 1424 1425 *action = ACTION_CONTINUE; 1426 1427 if (xe_walk->modified_start >= addr) 1428 xe_walk->modified_start = round_down(addr, size); 1429 if (xe_walk->modified_end <= next) 1430 xe_walk->modified_end = round_up(next, size); 1431 1432 return true; 1433 } 1434 1435 return false; 1436 } 1437 1438 static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, 1439 unsigned int level, u64 addr, u64 next, 1440 struct xe_ptw **child, 1441 enum page_walk_action *action, 1442 struct xe_pt_walk *walk) 1443 { 1444 struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); 1445 1446 XE_WARN_ON(!*child); 1447 XE_WARN_ON(!level); 1448 1449 xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); 1450 1451 return 0; 1452 } 1453 1454 static int 1455 xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, 1456 unsigned int level, u64 addr, u64 next, 1457 struct xe_ptw **child, 1458 enum page_walk_action *action, 1459 struct xe_pt_walk *walk) 1460 { 1461 struct xe_pt_stage_unbind_walk *xe_walk = 1462 container_of(walk, typeof(*xe_walk), base); 1463 struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); 1464 pgoff_t end_offset; 1465 u64 size = 1ull << walk->shifts[--level]; 1466 int err; 1467 1468 if (!IS_ALIGNED(addr, size)) 1469 addr = xe_walk->modified_start; 1470 if (!IS_ALIGNED(next, size)) 1471 next = xe_walk->modified_end; 1472 1473 /* Parent == *child is the root pt. Don't kill it. */ 1474 if (parent != *child && 1475 xe_pt_check_kill(addr, next, level, xe_child, action, walk)) 1476 return 0; 1477 1478 if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset, 1479 &end_offset)) 1480 return 0; 1481 1482 err = xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, true); 1483 if (err) 1484 return err; 1485 1486 xe_walk->wupd.updates[level].update->qwords = end_offset - offset; 1487 1488 return 0; 1489 } 1490 1491 static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { 1492 .pt_entry = xe_pt_stage_unbind_entry, 1493 .pt_post_descend = xe_pt_stage_unbind_post_descend, 1494 }; 1495 1496 /** 1497 * xe_pt_stage_unbind() - Build page-table update structures for an unbind 1498 * operation 1499 * @tile: The tile we're unbinding for. 1500 * @vma: The vma we're unbinding. 1501 * @entries: Caller-provided storage for the update structures. 1502 * 1503 * Builds page-table update structures for an unbind operation. The function 1504 * will attempt to remove all page-tables that we're the only user 1505 * of, and for that to work, the unbind operation must be committed in the 1506 * same critical section that blocks racing binds to the same page-table tree. 1507 * 1508 * Return: The number of entries used. 1509 */ 1510 static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, 1511 struct xe_vm_pgtable_update *entries) 1512 { 1513 struct xe_pt_stage_unbind_walk xe_walk = { 1514 .base = { 1515 .ops = &xe_pt_stage_unbind_ops, 1516 .shifts = xe_normal_pt_shifts, 1517 .max_level = XE_PT_HIGHEST_LEVEL, 1518 .staging = true, 1519 }, 1520 .tile = tile, 1521 .modified_start = xe_vma_start(vma), 1522 .modified_end = xe_vma_end(vma), 1523 .wupd.entries = entries, 1524 }; 1525 struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; 1526 1527 (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), 1528 xe_vma_end(vma), &xe_walk.base); 1529 1530 return xe_walk.wupd.num_used_entries; 1531 } 1532 1533 static void 1534 xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, 1535 struct xe_tile *tile, struct iosys_map *map, 1536 void *ptr, u32 qword_ofs, u32 num_qwords, 1537 const struct xe_vm_pgtable_update *update) 1538 { 1539 struct xe_vm *vm = pt_update->vops->vm; 1540 u64 empty = __xe_pt_empty_pte(tile, vm, update->pt->level); 1541 int i; 1542 1543 if (map && map->is_iomem) 1544 for (i = 0; i < num_qwords; ++i) 1545 xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * 1546 sizeof(u64), u64, empty); 1547 else if (map) 1548 memset64(map->vaddr + qword_ofs * sizeof(u64), empty, 1549 num_qwords); 1550 else 1551 memset64(ptr, empty, num_qwords); 1552 } 1553 1554 static void xe_pt_abort_unbind(struct xe_vma *vma, 1555 struct xe_vm_pgtable_update *entries, 1556 u32 num_entries) 1557 { 1558 int i, j; 1559 1560 xe_pt_commit_prepare_locks_assert(vma); 1561 1562 for (i = num_entries - 1; i >= 0; --i) { 1563 struct xe_vm_pgtable_update *entry = &entries[i]; 1564 struct xe_pt *pt = entry->pt; 1565 struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); 1566 1567 pt->num_live += entry->qwords; 1568 1569 if (!pt->level) 1570 continue; 1571 1572 for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) 1573 pt_dir->staging[j] = 1574 entries[i].pt_entries[j - entry->ofs].pt ? 1575 &entries[i].pt_entries[j - entry->ofs].pt->base : NULL; 1576 } 1577 } 1578 1579 static void 1580 xe_pt_commit_prepare_unbind(struct xe_vma *vma, 1581 struct xe_vm_pgtable_update *entries, 1582 u32 num_entries) 1583 { 1584 int i, j; 1585 1586 xe_pt_commit_prepare_locks_assert(vma); 1587 1588 for (i = 0; i < num_entries; ++i) { 1589 struct xe_vm_pgtable_update *entry = &entries[i]; 1590 struct xe_pt *pt = entry->pt; 1591 struct xe_pt_dir *pt_dir; 1592 1593 pt->num_live -= entry->qwords; 1594 if (!pt->level) 1595 continue; 1596 1597 pt_dir = as_xe_pt_dir(pt); 1598 for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) { 1599 entry->pt_entries[j - entry->ofs].pt = 1600 xe_pt_entry_staging(pt_dir, j); 1601 pt_dir->staging[j] = NULL; 1602 } 1603 } 1604 } 1605 1606 static void 1607 xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops, 1608 struct xe_vma *vma) 1609 { 1610 u32 current_op = pt_update_ops->current_op; 1611 struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; 1612 int i, level = 0; 1613 u64 start, last; 1614 1615 for (i = 0; i < pt_op->num_entries; i++) { 1616 const struct xe_vm_pgtable_update *entry = &pt_op->entries[i]; 1617 1618 if (entry->pt->level > level) 1619 level = entry->pt->level; 1620 } 1621 1622 /* Greedy (non-optimal) calculation but simple */ 1623 start = ALIGN_DOWN(xe_vma_start(vma), 0x1ull << xe_pt_shift(level)); 1624 last = ALIGN(xe_vma_end(vma), 0x1ull << xe_pt_shift(level)) - 1; 1625 1626 if (start < pt_update_ops->start) 1627 pt_update_ops->start = start; 1628 if (last > pt_update_ops->last) 1629 pt_update_ops->last = last; 1630 } 1631 1632 static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma) 1633 { 1634 int shift = xe_device_get_root_tile(xe)->media_gt ? 1 : 0; 1635 1636 if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) 1637 return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1638 xe->info.tile_count << shift); 1639 1640 return 0; 1641 } 1642 1643 static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, 1644 struct xe_vm_pgtable_update_ops *pt_update_ops, 1645 struct xe_vma *vma) 1646 { 1647 u32 current_op = pt_update_ops->current_op; 1648 struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; 1649 int err; 1650 1651 xe_bo_assert_held(xe_vma_bo(vma)); 1652 1653 vm_dbg(&xe_vma_vm(vma)->xe->drm, 1654 "Preparing bind, with range [%llx...%llx)\n", 1655 xe_vma_start(vma), xe_vma_end(vma) - 1); 1656 1657 pt_op->vma = NULL; 1658 pt_op->bind = true; 1659 pt_op->rebind = BIT(tile->id) & vma->tile_present; 1660 1661 err = vma_reserve_fences(tile_to_xe(tile), vma); 1662 if (err) 1663 return err; 1664 1665 err = xe_pt_prepare_bind(tile, vma, pt_op->entries, 1666 &pt_op->num_entries); 1667 if (!err) { 1668 xe_tile_assert(tile, pt_op->num_entries <= 1669 ARRAY_SIZE(pt_op->entries)); 1670 xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, 1671 pt_op->num_entries, true); 1672 1673 xe_pt_update_ops_rfence_interval(pt_update_ops, vma); 1674 ++pt_update_ops->current_op; 1675 pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); 1676 1677 /* 1678 * If rebind, we have to invalidate TLB on !LR vms to invalidate 1679 * cached PTEs point to freed memory. On LR vms this is done 1680 * automatically when the context is re-enabled by the rebind worker, 1681 * or in fault mode it was invalidated on PTE zapping. 1682 * 1683 * If !rebind, and scratch enabled VMs, there is a chance the scratch 1684 * PTE is already cached in the TLB so it needs to be invalidated. 1685 * On !LR VMs this is done in the ring ops preceding a batch, but on 1686 * non-faulting LR, in particular on user-space batch buffer chaining, 1687 * it needs to be done here. 1688 */ 1689 if ((!pt_op->rebind && xe_vm_has_scratch(vm) && 1690 xe_vm_in_preempt_fence_mode(vm))) 1691 pt_update_ops->needs_invalidation = true; 1692 else if (pt_op->rebind && !xe_vm_in_lr_mode(vm)) 1693 /* We bump also if batch_invalidate_tlb is true */ 1694 vm->tlb_flush_seqno++; 1695 1696 vma->tile_staged |= BIT(tile->id); 1697 pt_op->vma = vma; 1698 xe_pt_commit_prepare_bind(vma, pt_op->entries, 1699 pt_op->num_entries, pt_op->rebind); 1700 } else { 1701 xe_pt_cancel_bind(vma, pt_op->entries, pt_op->num_entries); 1702 } 1703 1704 return err; 1705 } 1706 1707 static int unbind_op_prepare(struct xe_tile *tile, 1708 struct xe_vm_pgtable_update_ops *pt_update_ops, 1709 struct xe_vma *vma) 1710 { 1711 u32 current_op = pt_update_ops->current_op; 1712 struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; 1713 int err; 1714 1715 if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id))) 1716 return 0; 1717 1718 xe_bo_assert_held(xe_vma_bo(vma)); 1719 1720 vm_dbg(&xe_vma_vm(vma)->xe->drm, 1721 "Preparing unbind, with range [%llx...%llx)\n", 1722 xe_vma_start(vma), xe_vma_end(vma) - 1); 1723 1724 /* 1725 * Wait for invalidation to complete. Can corrupt internal page table 1726 * state if an invalidation is running while preparing an unbind. 1727 */ 1728 if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma))) 1729 mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier); 1730 1731 pt_op->vma = vma; 1732 pt_op->bind = false; 1733 pt_op->rebind = false; 1734 1735 err = vma_reserve_fences(tile_to_xe(tile), vma); 1736 if (err) 1737 return err; 1738 1739 pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries); 1740 1741 xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, 1742 pt_op->num_entries, false); 1743 xe_pt_update_ops_rfence_interval(pt_update_ops, vma); 1744 ++pt_update_ops->current_op; 1745 pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); 1746 pt_update_ops->needs_invalidation = true; 1747 1748 xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries); 1749 1750 return 0; 1751 } 1752 1753 static int op_prepare(struct xe_vm *vm, 1754 struct xe_tile *tile, 1755 struct xe_vm_pgtable_update_ops *pt_update_ops, 1756 struct xe_vma_op *op) 1757 { 1758 int err = 0; 1759 1760 xe_vm_assert_held(vm); 1761 1762 switch (op->base.op) { 1763 case DRM_GPUVA_OP_MAP: 1764 if (!op->map.immediate && xe_vm_in_fault_mode(vm)) 1765 break; 1766 1767 err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma); 1768 pt_update_ops->wait_vm_kernel = true; 1769 break; 1770 case DRM_GPUVA_OP_REMAP: 1771 err = unbind_op_prepare(tile, pt_update_ops, 1772 gpuva_to_vma(op->base.remap.unmap->va)); 1773 1774 if (!err && op->remap.prev) { 1775 err = bind_op_prepare(vm, tile, pt_update_ops, 1776 op->remap.prev); 1777 pt_update_ops->wait_vm_bookkeep = true; 1778 } 1779 if (!err && op->remap.next) { 1780 err = bind_op_prepare(vm, tile, pt_update_ops, 1781 op->remap.next); 1782 pt_update_ops->wait_vm_bookkeep = true; 1783 } 1784 break; 1785 case DRM_GPUVA_OP_UNMAP: 1786 err = unbind_op_prepare(tile, pt_update_ops, 1787 gpuva_to_vma(op->base.unmap.va)); 1788 break; 1789 case DRM_GPUVA_OP_PREFETCH: 1790 err = bind_op_prepare(vm, tile, pt_update_ops, 1791 gpuva_to_vma(op->base.prefetch.va)); 1792 pt_update_ops->wait_vm_kernel = true; 1793 break; 1794 default: 1795 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 1796 } 1797 1798 return err; 1799 } 1800 1801 static void 1802 xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops) 1803 { 1804 init_llist_head(&pt_update_ops->deferred); 1805 pt_update_ops->start = ~0x0ull; 1806 pt_update_ops->last = 0x0ull; 1807 } 1808 1809 /** 1810 * xe_pt_update_ops_prepare() - Prepare PT update operations 1811 * @tile: Tile of PT update operations 1812 * @vops: VMA operationa 1813 * 1814 * Prepare PT update operations which includes updating internal PT state, 1815 * allocate memory for page tables, populate page table being pruned in, and 1816 * create PT update operations for leaf insertion / removal. 1817 * 1818 * Return: 0 on success, negative error code on error. 1819 */ 1820 int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) 1821 { 1822 struct xe_vm_pgtable_update_ops *pt_update_ops = 1823 &vops->pt_update_ops[tile->id]; 1824 struct xe_vma_op *op; 1825 int shift = tile->media_gt ? 1 : 0; 1826 int err; 1827 1828 lockdep_assert_held(&vops->vm->lock); 1829 xe_vm_assert_held(vops->vm); 1830 1831 xe_pt_update_ops_init(pt_update_ops); 1832 1833 err = dma_resv_reserve_fences(xe_vm_resv(vops->vm), 1834 tile_to_xe(tile)->info.tile_count << shift); 1835 if (err) 1836 return err; 1837 1838 list_for_each_entry(op, &vops->list, link) { 1839 err = op_prepare(vops->vm, tile, pt_update_ops, op); 1840 1841 if (err) 1842 return err; 1843 } 1844 1845 xe_tile_assert(tile, pt_update_ops->current_op <= 1846 pt_update_ops->num_ops); 1847 1848 #ifdef TEST_VM_OPS_ERROR 1849 if (vops->inject_error && 1850 vops->vm->xe->vm_inject_error_position == FORCE_OP_ERROR_PREPARE) 1851 return -ENOSPC; 1852 #endif 1853 1854 return 0; 1855 } 1856 ALLOW_ERROR_INJECTION(xe_pt_update_ops_prepare, ERRNO); 1857 1858 static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, 1859 struct xe_vm_pgtable_update_ops *pt_update_ops, 1860 struct xe_vma *vma, struct dma_fence *fence, 1861 struct dma_fence *fence2) 1862 { 1863 if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { 1864 dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, 1865 pt_update_ops->wait_vm_bookkeep ? 1866 DMA_RESV_USAGE_KERNEL : 1867 DMA_RESV_USAGE_BOOKKEEP); 1868 if (fence2) 1869 dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, 1870 pt_update_ops->wait_vm_bookkeep ? 1871 DMA_RESV_USAGE_KERNEL : 1872 DMA_RESV_USAGE_BOOKKEEP); 1873 } 1874 vma->tile_present |= BIT(tile->id); 1875 vma->tile_staged &= ~BIT(tile->id); 1876 if (xe_vma_is_userptr(vma)) { 1877 lockdep_assert_held_read(&vm->userptr.notifier_lock); 1878 to_userptr_vma(vma)->userptr.initial_bind = true; 1879 } 1880 1881 /* 1882 * Kick rebind worker if this bind triggers preempt fences and not in 1883 * the rebind worker 1884 */ 1885 if (pt_update_ops->wait_vm_bookkeep && 1886 xe_vm_in_preempt_fence_mode(vm) && 1887 !current->mm) 1888 xe_vm_queue_rebind_worker(vm); 1889 } 1890 1891 static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, 1892 struct xe_vm_pgtable_update_ops *pt_update_ops, 1893 struct xe_vma *vma, struct dma_fence *fence, 1894 struct dma_fence *fence2) 1895 { 1896 if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { 1897 dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, 1898 pt_update_ops->wait_vm_bookkeep ? 1899 DMA_RESV_USAGE_KERNEL : 1900 DMA_RESV_USAGE_BOOKKEEP); 1901 if (fence2) 1902 dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, 1903 pt_update_ops->wait_vm_bookkeep ? 1904 DMA_RESV_USAGE_KERNEL : 1905 DMA_RESV_USAGE_BOOKKEEP); 1906 } 1907 vma->tile_present &= ~BIT(tile->id); 1908 if (!vma->tile_present) { 1909 list_del_init(&vma->combined_links.rebind); 1910 if (xe_vma_is_userptr(vma)) { 1911 lockdep_assert_held_read(&vm->userptr.notifier_lock); 1912 1913 spin_lock(&vm->userptr.invalidated_lock); 1914 list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); 1915 spin_unlock(&vm->userptr.invalidated_lock); 1916 } 1917 } 1918 } 1919 1920 static void op_commit(struct xe_vm *vm, 1921 struct xe_tile *tile, 1922 struct xe_vm_pgtable_update_ops *pt_update_ops, 1923 struct xe_vma_op *op, struct dma_fence *fence, 1924 struct dma_fence *fence2) 1925 { 1926 xe_vm_assert_held(vm); 1927 1928 switch (op->base.op) { 1929 case DRM_GPUVA_OP_MAP: 1930 if (!op->map.immediate && xe_vm_in_fault_mode(vm)) 1931 break; 1932 1933 bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, 1934 fence2); 1935 break; 1936 case DRM_GPUVA_OP_REMAP: 1937 unbind_op_commit(vm, tile, pt_update_ops, 1938 gpuva_to_vma(op->base.remap.unmap->va), fence, 1939 fence2); 1940 1941 if (op->remap.prev) 1942 bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, 1943 fence, fence2); 1944 if (op->remap.next) 1945 bind_op_commit(vm, tile, pt_update_ops, op->remap.next, 1946 fence, fence2); 1947 break; 1948 case DRM_GPUVA_OP_UNMAP: 1949 unbind_op_commit(vm, tile, pt_update_ops, 1950 gpuva_to_vma(op->base.unmap.va), fence, fence2); 1951 break; 1952 case DRM_GPUVA_OP_PREFETCH: 1953 bind_op_commit(vm, tile, pt_update_ops, 1954 gpuva_to_vma(op->base.prefetch.va), fence, fence2); 1955 break; 1956 default: 1957 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 1958 } 1959 } 1960 1961 static const struct xe_migrate_pt_update_ops migrate_ops = { 1962 .populate = xe_vm_populate_pgtable, 1963 .clear = xe_migrate_clear_pgtable_callback, 1964 .pre_commit = xe_pt_pre_commit, 1965 }; 1966 1967 static const struct xe_migrate_pt_update_ops userptr_migrate_ops = { 1968 .populate = xe_vm_populate_pgtable, 1969 .clear = xe_migrate_clear_pgtable_callback, 1970 .pre_commit = xe_pt_userptr_pre_commit, 1971 }; 1972 1973 /** 1974 * xe_pt_update_ops_run() - Run PT update operations 1975 * @tile: Tile of PT update operations 1976 * @vops: VMA operationa 1977 * 1978 * Run PT update operations which includes committing internal PT state changes, 1979 * creating job for PT update operations for leaf insertion / removal, and 1980 * installing job fence in various places. 1981 * 1982 * Return: fence on success, negative ERR_PTR on error. 1983 */ 1984 struct dma_fence * 1985 xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) 1986 { 1987 struct xe_vm *vm = vops->vm; 1988 struct xe_vm_pgtable_update_ops *pt_update_ops = 1989 &vops->pt_update_ops[tile->id]; 1990 struct dma_fence *fence; 1991 struct invalidation_fence *ifence = NULL, *mfence = NULL; 1992 struct dma_fence **fences = NULL; 1993 struct dma_fence_array *cf = NULL; 1994 struct xe_range_fence *rfence; 1995 struct xe_vma_op *op; 1996 int err = 0, i; 1997 struct xe_migrate_pt_update update = { 1998 .ops = pt_update_ops->needs_userptr_lock ? 1999 &userptr_migrate_ops : 2000 &migrate_ops, 2001 .vops = vops, 2002 .tile_id = tile->id, 2003 }; 2004 2005 lockdep_assert_held(&vm->lock); 2006 xe_vm_assert_held(vm); 2007 2008 if (!pt_update_ops->current_op) { 2009 xe_tile_assert(tile, xe_vm_in_fault_mode(vm)); 2010 2011 return dma_fence_get_stub(); 2012 } 2013 2014 #ifdef TEST_VM_OPS_ERROR 2015 if (vops->inject_error && 2016 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_RUN) 2017 return ERR_PTR(-ENOSPC); 2018 #endif 2019 2020 if (pt_update_ops->needs_invalidation) { 2021 ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); 2022 if (!ifence) { 2023 err = -ENOMEM; 2024 goto kill_vm_tile1; 2025 } 2026 if (tile->media_gt) { 2027 mfence = kzalloc(sizeof(*ifence), GFP_KERNEL); 2028 if (!mfence) { 2029 err = -ENOMEM; 2030 goto free_ifence; 2031 } 2032 fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL); 2033 if (!fences) { 2034 err = -ENOMEM; 2035 goto free_ifence; 2036 } 2037 cf = dma_fence_array_alloc(2); 2038 if (!cf) { 2039 err = -ENOMEM; 2040 goto free_ifence; 2041 } 2042 } 2043 } 2044 2045 rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); 2046 if (!rfence) { 2047 err = -ENOMEM; 2048 goto free_ifence; 2049 } 2050 2051 fence = xe_migrate_update_pgtables(tile->migrate, &update); 2052 if (IS_ERR(fence)) { 2053 err = PTR_ERR(fence); 2054 goto free_rfence; 2055 } 2056 2057 /* Point of no return - VM killed if failure after this */ 2058 for (i = 0; i < pt_update_ops->current_op; ++i) { 2059 struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; 2060 2061 xe_pt_commit(pt_op->vma, pt_op->entries, 2062 pt_op->num_entries, &pt_update_ops->deferred); 2063 pt_op->vma = NULL; /* skip in xe_pt_update_ops_abort */ 2064 } 2065 2066 if (xe_range_fence_insert(&vm->rftree[tile->id], rfence, 2067 &xe_range_fence_kfree_ops, 2068 pt_update_ops->start, 2069 pt_update_ops->last, fence)) 2070 dma_fence_wait(fence, false); 2071 2072 /* tlb invalidation must be done before signaling rebind */ 2073 if (ifence) { 2074 if (mfence) 2075 dma_fence_get(fence); 2076 invalidation_fence_init(tile->primary_gt, ifence, fence, 2077 pt_update_ops->start, 2078 pt_update_ops->last, vm->usm.asid); 2079 if (mfence) { 2080 invalidation_fence_init(tile->media_gt, mfence, fence, 2081 pt_update_ops->start, 2082 pt_update_ops->last, vm->usm.asid); 2083 fences[0] = &ifence->base.base; 2084 fences[1] = &mfence->base.base; 2085 dma_fence_array_init(cf, 2, fences, 2086 vm->composite_fence_ctx, 2087 vm->composite_fence_seqno++, 2088 false); 2089 fence = &cf->base; 2090 } else { 2091 fence = &ifence->base.base; 2092 } 2093 } 2094 2095 if (!mfence) { 2096 dma_resv_add_fence(xe_vm_resv(vm), fence, 2097 pt_update_ops->wait_vm_bookkeep ? 2098 DMA_RESV_USAGE_KERNEL : 2099 DMA_RESV_USAGE_BOOKKEEP); 2100 2101 list_for_each_entry(op, &vops->list, link) 2102 op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); 2103 } else { 2104 dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base, 2105 pt_update_ops->wait_vm_bookkeep ? 2106 DMA_RESV_USAGE_KERNEL : 2107 DMA_RESV_USAGE_BOOKKEEP); 2108 2109 dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base, 2110 pt_update_ops->wait_vm_bookkeep ? 2111 DMA_RESV_USAGE_KERNEL : 2112 DMA_RESV_USAGE_BOOKKEEP); 2113 2114 list_for_each_entry(op, &vops->list, link) 2115 op_commit(vops->vm, tile, pt_update_ops, op, 2116 &ifence->base.base, &mfence->base.base); 2117 } 2118 2119 if (pt_update_ops->needs_userptr_lock) 2120 up_read(&vm->userptr.notifier_lock); 2121 2122 return fence; 2123 2124 free_rfence: 2125 kfree(rfence); 2126 free_ifence: 2127 kfree(cf); 2128 kfree(fences); 2129 kfree(mfence); 2130 kfree(ifence); 2131 kill_vm_tile1: 2132 if (err != -EAGAIN && tile->id) 2133 xe_vm_kill(vops->vm, false); 2134 2135 return ERR_PTR(err); 2136 } 2137 ALLOW_ERROR_INJECTION(xe_pt_update_ops_run, ERRNO); 2138 2139 /** 2140 * xe_pt_update_ops_fini() - Finish PT update operations 2141 * @tile: Tile of PT update operations 2142 * @vops: VMA operations 2143 * 2144 * Finish PT update operations by committing to destroy page table memory 2145 */ 2146 void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops) 2147 { 2148 struct xe_vm_pgtable_update_ops *pt_update_ops = 2149 &vops->pt_update_ops[tile->id]; 2150 int i; 2151 2152 lockdep_assert_held(&vops->vm->lock); 2153 xe_vm_assert_held(vops->vm); 2154 2155 for (i = 0; i < pt_update_ops->current_op; ++i) { 2156 struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; 2157 2158 xe_pt_free_bind(pt_op->entries, pt_op->num_entries); 2159 } 2160 xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); 2161 } 2162 2163 /** 2164 * xe_pt_update_ops_abort() - Abort PT update operations 2165 * @tile: Tile of PT update operations 2166 * @vops: VMA operationa 2167 * 2168 * Abort PT update operations by unwinding internal PT state 2169 */ 2170 void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops) 2171 { 2172 struct xe_vm_pgtable_update_ops *pt_update_ops = 2173 &vops->pt_update_ops[tile->id]; 2174 int i; 2175 2176 lockdep_assert_held(&vops->vm->lock); 2177 xe_vm_assert_held(vops->vm); 2178 2179 for (i = pt_update_ops->num_ops - 1; i >= 0; --i) { 2180 struct xe_vm_pgtable_update_op *pt_op = 2181 &pt_update_ops->ops[i]; 2182 2183 if (!pt_op->vma || i >= pt_update_ops->current_op) 2184 continue; 2185 2186 if (pt_op->bind) 2187 xe_pt_abort_bind(pt_op->vma, pt_op->entries, 2188 pt_op->num_entries, 2189 pt_op->rebind); 2190 else 2191 xe_pt_abort_unbind(pt_op->vma, pt_op->entries, 2192 pt_op->num_entries); 2193 } 2194 2195 xe_pt_update_ops_fini(tile, vops); 2196 } 2197