1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include <linux/dma-fence-array.h> 7 8 #include "xe_pt.h" 9 10 #include "regs/xe_gtt_defs.h" 11 #include "xe_bo.h" 12 #include "xe_device.h" 13 #include "xe_drm_client.h" 14 #include "xe_exec_queue.h" 15 #include "xe_gt.h" 16 #include "xe_tlb_inval_job.h" 17 #include "xe_migrate.h" 18 #include "xe_pt_types.h" 19 #include "xe_pt_walk.h" 20 #include "xe_res_cursor.h" 21 #include "xe_sched_job.h" 22 #include "xe_sync.h" 23 #include "xe_svm.h" 24 #include "xe_tlb_inval_job.h" 25 #include "xe_trace.h" 26 #include "xe_ttm_stolen_mgr.h" 27 #include "xe_vm.h" 28 29 struct xe_pt_dir { 30 struct xe_pt pt; 31 /** @children: Array of page-table child nodes */ 32 struct xe_ptw *children[XE_PDES]; 33 /** @staging: Array of page-table staging nodes */ 34 struct xe_ptw *staging[XE_PDES]; 35 }; 36 37 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 38 #define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr)) 39 #define xe_pt_addr(__xe_pt) ((__xe_pt)->addr) 40 #else 41 #define xe_pt_set_addr(__xe_pt, __addr) 42 #define xe_pt_addr(__xe_pt) 0ull 43 #endif 44 45 static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48}; 46 static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48}; 47 48 #define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1) 49 50 static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) 51 { 52 return container_of(pt, struct xe_pt_dir, pt); 53 } 54 55 static struct xe_pt * 56 xe_pt_entry_staging(struct xe_pt_dir *pt_dir, unsigned int index) 57 { 58 return container_of(pt_dir->staging[index], struct xe_pt, base); 59 } 60 61 static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, 62 unsigned int level) 63 { 64 struct xe_device *xe = tile_to_xe(tile); 65 u16 pat_index = xe->pat.idx[XE_CACHE_WB]; 66 u8 id = tile->id; 67 68 if (!xe_vm_has_scratch(vm)) 69 return 0; 70 71 if (level > MAX_HUGEPTE_LEVEL) 72 return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, 73 0); 74 75 return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | 76 XE_PTE_NULL; 77 } 78 79 static void xe_pt_free(struct xe_pt *pt) 80 { 81 if (pt->level) 82 kfree(as_xe_pt_dir(pt)); 83 else 84 kfree(pt); 85 } 86 87 /** 88 * xe_pt_create() - Create a page-table. 89 * @vm: The vm to create for. 90 * @tile: The tile to create for. 91 * @level: The page-table level. 92 * 93 * Allocate and initialize a single struct xe_pt metadata structure. Also 94 * create the corresponding page-table bo, but don't initialize it. If the 95 * level is grater than zero, then it's assumed to be a directory page- 96 * table and the directory structure is also allocated and initialized to 97 * NULL pointers. 98 * 99 * Return: A valid struct xe_pt pointer on success, Pointer error code on 100 * error. 101 */ 102 struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, 103 unsigned int level) 104 { 105 struct xe_pt *pt; 106 struct xe_bo *bo; 107 u32 bo_flags; 108 int err; 109 110 if (level) { 111 struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL); 112 113 pt = (dir) ? &dir->pt : NULL; 114 } else { 115 pt = kzalloc(sizeof(*pt), GFP_KERNEL); 116 } 117 if (!pt) 118 return ERR_PTR(-ENOMEM); 119 120 bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | 121 XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | 122 XE_BO_FLAG_NO_RESV_EVICT | XE_BO_FLAG_PAGETABLE; 123 if (vm->xef) /* userspace */ 124 bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; 125 126 pt->level = level; 127 bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, 128 ttm_bo_type_kernel, 129 bo_flags); 130 if (IS_ERR(bo)) { 131 err = PTR_ERR(bo); 132 goto err_kfree; 133 } 134 pt->bo = bo; 135 pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; 136 pt->base.staging = level ? as_xe_pt_dir(pt)->staging : NULL; 137 138 if (vm->xef) 139 xe_drm_client_add_bo(vm->xef->client, pt->bo); 140 xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL); 141 142 return pt; 143 144 err_kfree: 145 xe_pt_free(pt); 146 return ERR_PTR(err); 147 } 148 ALLOW_ERROR_INJECTION(xe_pt_create, ERRNO); 149 150 /** 151 * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero 152 * entries. 153 * @tile: The tile the scratch pagetable of which to use. 154 * @vm: The vm we populate for. 155 * @pt: The pagetable the bo of which to initialize. 156 * 157 * Populate the page-table bo of @pt with entries pointing into the tile's 158 * scratch page-table tree if any. Otherwise populate with zeros. 159 */ 160 void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, 161 struct xe_pt *pt) 162 { 163 struct iosys_map *map = &pt->bo->vmap; 164 u64 empty; 165 int i; 166 167 if (!xe_vm_has_scratch(vm)) { 168 /* 169 * FIXME: Some memory is allocated already allocated to zero? 170 * Find out which memory that is and avoid this memset... 171 */ 172 xe_map_memset(vm->xe, map, 0, 0, SZ_4K); 173 } else { 174 empty = __xe_pt_empty_pte(tile, vm, pt->level); 175 for (i = 0; i < XE_PDES; i++) 176 xe_pt_write(vm->xe, map, i, empty); 177 } 178 } 179 180 /** 181 * xe_pt_shift() - Return the ilog2 value of the size of the address range of 182 * a page-table at a certain level. 183 * @level: The level. 184 * 185 * Return: The ilog2 value of the size of the address range of a page-table 186 * at level @level. 187 */ 188 unsigned int xe_pt_shift(unsigned int level) 189 { 190 return XE_PTE_SHIFT + XE_PDE_SHIFT * level; 191 } 192 193 /** 194 * xe_pt_destroy() - Destroy a page-table tree. 195 * @pt: The root of the page-table tree to destroy. 196 * @flags: vm flags. Currently unused. 197 * @deferred: List head of lockless list for deferred putting. NULL for 198 * immediate putting. 199 * 200 * Puts the page-table bo, recursively calls xe_pt_destroy on all children 201 * and finally frees @pt. TODO: Can we remove the @flags argument? 202 */ 203 void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) 204 { 205 int i; 206 207 if (!pt) 208 return; 209 210 XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list)); 211 xe_bo_unpin(pt->bo); 212 xe_bo_put_deferred(pt->bo, deferred); 213 214 if (pt->level > 0 && pt->num_live) { 215 struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); 216 217 for (i = 0; i < XE_PDES; i++) { 218 if (xe_pt_entry_staging(pt_dir, i)) 219 xe_pt_destroy(xe_pt_entry_staging(pt_dir, i), flags, 220 deferred); 221 } 222 } 223 xe_pt_free(pt); 224 } 225 226 /** 227 * xe_pt_clear() - Clear a page-table. 228 * @xe: xe device. 229 * @pt: The page-table. 230 * 231 * Clears page-table by setting to zero. 232 */ 233 void xe_pt_clear(struct xe_device *xe, struct xe_pt *pt) 234 { 235 struct iosys_map *map = &pt->bo->vmap; 236 237 xe_map_memset(xe, map, 0, 0, SZ_4K); 238 } 239 240 /** 241 * DOC: Pagetable building 242 * 243 * Below we use the term "page-table" for both page-directories, containing 244 * pointers to lower level page-directories or page-tables, and level 0 245 * page-tables that contain only page-table-entries pointing to memory pages. 246 * 247 * When inserting an address range in an already existing page-table tree 248 * there will typically be a set of page-tables that are shared with other 249 * address ranges, and a set that are private to this address range. 250 * The set of shared page-tables can be at most two per level, 251 * and those can't be updated immediately because the entries of those 252 * page-tables may still be in use by the gpu for other mappings. Therefore 253 * when inserting entries into those, we instead stage those insertions by 254 * adding insertion data into struct xe_vm_pgtable_update structures. This 255 * data, (subtrees for the cpu and page-table-entries for the gpu) is then 256 * added in a separate commit step. CPU-data is committed while still under the 257 * vm lock, the object lock and for userptr, the notifier lock in read mode. 258 * The GPU async data is committed either by the GPU or CPU after fulfilling 259 * relevant dependencies. 260 * For non-shared page-tables (and, in fact, for shared ones that aren't 261 * existing at the time of staging), we add the data in-place without the 262 * special update structures. This private part of the page-table tree will 263 * remain disconnected from the vm page-table tree until data is committed to 264 * the shared page tables of the vm tree in the commit phase. 265 */ 266 267 struct xe_pt_update { 268 /** @update: The update structure we're building for this parent. */ 269 struct xe_vm_pgtable_update *update; 270 /** @parent: The parent. Used to detect a parent change. */ 271 struct xe_pt *parent; 272 /** @preexisting: Whether the parent was pre-existing or allocated */ 273 bool preexisting; 274 }; 275 276 /** 277 * struct xe_pt_stage_bind_walk - Walk state for the stage_bind walk. 278 */ 279 struct xe_pt_stage_bind_walk { 280 /** @base: The base class. */ 281 struct xe_pt_walk base; 282 283 /* Input parameters for the walk */ 284 /** @vm: The vm we're building for. */ 285 struct xe_vm *vm; 286 /** @tile: The tile we're building for. */ 287 struct xe_tile *tile; 288 /** @default_vram_pte: PTE flag only template for VRAM. No address is associated */ 289 u64 default_vram_pte; 290 /** @default_system_pte: PTE flag only template for System. No address is associated */ 291 u64 default_system_pte; 292 /** @dma_offset: DMA offset to add to the PTE. */ 293 u64 dma_offset; 294 /** 295 * @needs_64K: This address range enforces 64K alignment and 296 * granularity on VRAM. 297 */ 298 bool needs_64K; 299 /** @clear_pt: clear page table entries during the bind walk */ 300 bool clear_pt; 301 /** 302 * @vma: VMA being mapped 303 */ 304 struct xe_vma *vma; 305 306 /* Also input, but is updated during the walk*/ 307 /** @curs: The DMA address cursor. */ 308 struct xe_res_cursor *curs; 309 /** @va_curs_start: The Virtual address corresponding to @curs->start */ 310 u64 va_curs_start; 311 312 /* Output */ 313 /** @wupd: Walk output data for page-table updates. */ 314 struct xe_walk_update { 315 /** @wupd.entries: Caller provided storage. */ 316 struct xe_vm_pgtable_update *entries; 317 /** @wupd.num_used_entries: Number of update @entries used. */ 318 unsigned int num_used_entries; 319 /** @wupd.updates: Tracks the update entry at a given level */ 320 struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1]; 321 } wupd; 322 323 /* Walk state */ 324 /** 325 * @l0_end_addr: The end address of the current l0 leaf. Used for 326 * 64K granularity detection. 327 */ 328 u64 l0_end_addr; 329 /** @addr_64K: The start address of the current 64K chunk. */ 330 u64 addr_64K; 331 /** @found_64K: Whether @add_64K actually points to a 64K chunk. */ 332 bool found_64K; 333 }; 334 335 static int 336 xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, 337 pgoff_t offset, bool alloc_entries) 338 { 339 struct xe_pt_update *upd = &wupd->updates[parent->level]; 340 struct xe_vm_pgtable_update *entry; 341 342 /* 343 * For *each level*, we could only have one active 344 * struct xt_pt_update at any one time. Once we move on to a 345 * new parent and page-directory, the old one is complete, and 346 * updates are either already stored in the build tree or in 347 * @wupd->entries 348 */ 349 if (likely(upd->parent == parent)) 350 return 0; 351 352 upd->parent = parent; 353 upd->preexisting = true; 354 355 if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1) 356 return -EINVAL; 357 358 entry = wupd->entries + wupd->num_used_entries++; 359 upd->update = entry; 360 entry->ofs = offset; 361 entry->pt_bo = parent->bo; 362 entry->pt = parent; 363 entry->flags = 0; 364 entry->qwords = 0; 365 entry->pt_bo->update_index = -1; 366 367 if (alloc_entries) { 368 entry->pt_entries = kmalloc_array(XE_PDES, 369 sizeof(*entry->pt_entries), 370 GFP_KERNEL); 371 if (!entry->pt_entries) 372 return -ENOMEM; 373 } 374 375 return 0; 376 } 377 378 /* 379 * NOTE: This is a very frequently called function so we allow ourselves 380 * to annotate (using branch prediction hints) the fastpath of updating a 381 * non-pre-existing pagetable with leaf ptes. 382 */ 383 static int 384 xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, 385 pgoff_t offset, struct xe_pt *xe_child, u64 pte) 386 { 387 struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level]; 388 struct xe_pt_update *child_upd = xe_child ? 389 &xe_walk->wupd.updates[xe_child->level] : NULL; 390 int ret; 391 392 ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true); 393 if (unlikely(ret)) 394 return ret; 395 396 /* 397 * Register this new pagetable so that it won't be recognized as 398 * a shared pagetable by a subsequent insertion. 399 */ 400 if (unlikely(child_upd)) { 401 child_upd->update = NULL; 402 child_upd->parent = xe_child; 403 child_upd->preexisting = false; 404 } 405 406 if (likely(!upd->preexisting)) { 407 /* Continue building a non-connected subtree. */ 408 struct iosys_map *map = &parent->bo->vmap; 409 410 if (unlikely(xe_child)) { 411 parent->base.children[offset] = &xe_child->base; 412 parent->base.staging[offset] = &xe_child->base; 413 } 414 415 xe_pt_write(xe_walk->vm->xe, map, offset, pte); 416 parent->num_live++; 417 } else { 418 /* Shared pt. Stage update. */ 419 unsigned int idx; 420 struct xe_vm_pgtable_update *entry = upd->update; 421 422 idx = offset - entry->ofs; 423 entry->pt_entries[idx].pt = xe_child; 424 entry->pt_entries[idx].pte = pte; 425 entry->qwords++; 426 } 427 428 return 0; 429 } 430 431 static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, 432 struct xe_pt_stage_bind_walk *xe_walk) 433 { 434 u64 size, dma; 435 436 if (level > MAX_HUGEPTE_LEVEL) 437 return false; 438 439 /* Does the virtual range requested cover a huge pte? */ 440 if (!xe_pt_covers(addr, next, level, &xe_walk->base)) 441 return false; 442 443 /* Does the DMA segment cover the whole pte? */ 444 if (next - xe_walk->va_curs_start > xe_walk->curs->size) 445 return false; 446 447 /* null VMA's do not have dma addresses */ 448 if (xe_vma_is_null(xe_walk->vma)) 449 return true; 450 451 /* if we are clearing page table, no dma addresses*/ 452 if (xe_walk->clear_pt) 453 return true; 454 455 /* Is the DMA address huge PTE size aligned? */ 456 size = next - addr; 457 dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); 458 459 return IS_ALIGNED(dma, size); 460 } 461 462 /* 463 * Scan the requested mapping to check whether it can be done entirely 464 * with 64K PTEs. 465 */ 466 static bool 467 xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) 468 { 469 struct xe_res_cursor curs = *xe_walk->curs; 470 471 if (!IS_ALIGNED(addr, SZ_64K)) 472 return false; 473 474 if (next > xe_walk->l0_end_addr) 475 return false; 476 477 /* null VMA's do not have dma addresses */ 478 if (xe_vma_is_null(xe_walk->vma)) 479 return true; 480 481 xe_res_next(&curs, addr - xe_walk->va_curs_start); 482 for (; addr < next; addr += SZ_64K) { 483 if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K) 484 return false; 485 486 xe_res_next(&curs, SZ_64K); 487 } 488 489 return addr == next; 490 } 491 492 /* 493 * For non-compact "normal" 4K level-0 pagetables, we want to try to group 494 * addresses together in 64K-contigous regions to add a 64K TLB hint for the 495 * device to the PTE. 496 * This function determines whether the address is part of such a 497 * segment. For VRAM in normal pagetables, this is strictly necessary on 498 * some devices. 499 */ 500 static bool 501 xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) 502 { 503 /* Address is within an already found 64k region */ 504 if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K) 505 return true; 506 507 xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk); 508 xe_walk->addr_64K = addr; 509 510 return xe_walk->found_64K; 511 } 512 513 static int 514 xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, 515 unsigned int level, u64 addr, u64 next, 516 struct xe_ptw **child, 517 enum page_walk_action *action, 518 struct xe_pt_walk *walk) 519 { 520 struct xe_pt_stage_bind_walk *xe_walk = 521 container_of(walk, typeof(*xe_walk), base); 522 u16 pat_index = xe_walk->vma->attr.pat_index; 523 struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); 524 struct xe_vm *vm = xe_walk->vm; 525 struct xe_pt *xe_child; 526 bool covers; 527 int ret = 0; 528 u64 pte; 529 530 /* Is this a leaf entry ?*/ 531 if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { 532 struct xe_res_cursor *curs = xe_walk->curs; 533 bool is_null = xe_vma_is_null(xe_walk->vma); 534 bool is_vram = is_null ? false : xe_res_is_vram(curs); 535 536 XE_WARN_ON(xe_walk->va_curs_start != addr); 537 538 if (xe_walk->clear_pt) { 539 pte = 0; 540 } else { 541 pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : 542 xe_res_dma(curs) + 543 xe_walk->dma_offset, 544 xe_walk->vma, 545 pat_index, level); 546 if (!is_null) 547 pte |= is_vram ? xe_walk->default_vram_pte : 548 xe_walk->default_system_pte; 549 550 /* 551 * Set the XE_PTE_PS64 hint if possible, otherwise if 552 * this device *requires* 64K PTE size for VRAM, fail. 553 */ 554 if (level == 0 && !xe_parent->is_compact) { 555 if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { 556 xe_walk->vma->gpuva.flags |= 557 XE_VMA_PTE_64K; 558 pte |= XE_PTE_PS64; 559 } else if (XE_WARN_ON(xe_walk->needs_64K && 560 is_vram)) { 561 return -EINVAL; 562 } 563 } 564 } 565 566 ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); 567 if (unlikely(ret)) 568 return ret; 569 570 if (!is_null && !xe_walk->clear_pt) 571 xe_res_next(curs, next - addr); 572 xe_walk->va_curs_start = next; 573 xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); 574 *action = ACTION_CONTINUE; 575 576 return ret; 577 } 578 579 /* 580 * Descending to lower level. Determine if we need to allocate a 581 * new page table or -directory, which we do if there is no 582 * previous one or there is one we can completely replace. 583 */ 584 if (level == 1) { 585 walk->shifts = xe_normal_pt_shifts; 586 xe_walk->l0_end_addr = next; 587 } 588 589 covers = xe_pt_covers(addr, next, level, &xe_walk->base); 590 if (covers || !*child) { 591 u64 flags = 0; 592 593 xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); 594 if (IS_ERR(xe_child)) 595 return PTR_ERR(xe_child); 596 597 xe_pt_set_addr(xe_child, 598 round_down(addr, 1ull << walk->shifts[level])); 599 600 if (!covers) 601 xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child); 602 603 *child = &xe_child->base; 604 605 /* 606 * Prefer the compact pagetable layout for L0 if possible. Only 607 * possible if VMA covers entire 2MB region as compact 64k and 608 * 4k pages cannot be mixed within a 2MB region. 609 * TODO: Suballocate the pt bo to avoid wasting a lot of 610 * memory. 611 */ 612 if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 && 613 covers && xe_pt_scan_64K(addr, next, xe_walk)) { 614 walk->shifts = xe_compact_pt_shifts; 615 xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT; 616 flags |= XE_PDE_64K; 617 xe_child->is_compact = true; 618 } 619 620 pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0) | flags; 621 ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, 622 pte); 623 } 624 625 *action = ACTION_SUBTREE; 626 return ret; 627 } 628 629 static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { 630 .pt_entry = xe_pt_stage_bind_entry, 631 }; 632 633 /* 634 * Default atomic expectations for different allocation scenarios are as follows: 635 * 636 * 1. Traditional API: When the VM is not in LR mode: 637 * - Device atomics are expected to function with all allocations. 638 * 639 * 2. Compute/SVM API: When the VM is in LR mode: 640 * - Device atomics are the default behavior when the bo is placed in a single region. 641 * - In all other cases device atomics will be disabled with AE=0 until an application 642 * request differently using a ioctl like madvise. 643 */ 644 static bool xe_atomic_for_vram(struct xe_vm *vm, struct xe_vma *vma) 645 { 646 if (vma->attr.atomic_access == DRM_XE_ATOMIC_CPU) 647 return false; 648 649 return true; 650 } 651 652 static bool xe_atomic_for_system(struct xe_vm *vm, struct xe_vma *vma) 653 { 654 struct xe_device *xe = vm->xe; 655 struct xe_bo *bo = xe_vma_bo(vma); 656 657 if (!xe->info.has_device_atomics_on_smem || 658 vma->attr.atomic_access == DRM_XE_ATOMIC_CPU) 659 return false; 660 661 if (vma->attr.atomic_access == DRM_XE_ATOMIC_DEVICE) 662 return true; 663 664 /* 665 * If a SMEM+LMEM allocation is backed by SMEM, a device 666 * atomics will cause a gpu page fault and which then 667 * gets migrated to LMEM, bind such allocations with 668 * device atomics enabled. 669 */ 670 return (!IS_DGFX(xe) || (!xe_vm_in_lr_mode(vm) || 671 (bo && xe_bo_has_single_placement(bo)))); 672 } 673 674 /** 675 * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address 676 * range. 677 * @tile: The tile we're building for. 678 * @vma: The vma indicating the address range. 679 * @range: The range indicating the address range. 680 * @entries: Storage for the update entries used for connecting the tree to 681 * the main tree at commit time. 682 * @num_entries: On output contains the number of @entries used. 683 * @clear_pt: Clear the page table entries. 684 * 685 * This function builds a disconnected page-table tree for a given address 686 * range. The tree is connected to the main vm tree for the gpu using 687 * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind(). 688 * The function builds xe_vm_pgtable_update structures for already existing 689 * shared page-tables, and non-existing shared and non-shared page-tables 690 * are built and populated directly. 691 * 692 * Return 0 on success, negative error code on error. 693 */ 694 static int 695 xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, 696 struct xe_svm_range *range, 697 struct xe_vm_pgtable_update *entries, 698 u32 *num_entries, bool clear_pt) 699 { 700 struct xe_device *xe = tile_to_xe(tile); 701 struct xe_bo *bo = xe_vma_bo(vma); 702 struct xe_res_cursor curs; 703 struct xe_vm *vm = xe_vma_vm(vma); 704 struct xe_pt_stage_bind_walk xe_walk = { 705 .base = { 706 .ops = &xe_pt_stage_bind_ops, 707 .shifts = xe_normal_pt_shifts, 708 .max_level = XE_PT_HIGHEST_LEVEL, 709 .staging = true, 710 }, 711 .vm = vm, 712 .tile = tile, 713 .curs = &curs, 714 .va_curs_start = range ? range->base.itree.start : 715 xe_vma_start(vma), 716 .vma = vma, 717 .wupd.entries = entries, 718 .clear_pt = clear_pt, 719 }; 720 struct xe_pt *pt = vm->pt_root[tile->id]; 721 int ret; 722 723 if (range) { 724 /* Move this entire thing to xe_svm.c? */ 725 xe_svm_notifier_lock(vm); 726 if (!xe_svm_range_pages_valid(range)) { 727 xe_svm_range_debug(range, "BIND PREPARE - RETRY"); 728 xe_svm_notifier_unlock(vm); 729 return -EAGAIN; 730 } 731 if (xe_svm_range_has_dma_mapping(range)) { 732 xe_res_first_dma(range->base.dma_addr, 0, 733 range->base.itree.last + 1 - range->base.itree.start, 734 &curs); 735 xe_svm_range_debug(range, "BIND PREPARE - MIXED"); 736 } else { 737 xe_assert(xe, false); 738 } 739 /* 740 * Note, when unlocking the resource cursor dma addresses may become 741 * stale, but the bind will be aborted anyway at commit time. 742 */ 743 xe_svm_notifier_unlock(vm); 744 } 745 746 xe_walk.needs_64K = (vm->flags & XE_VM_FLAG_64K); 747 if (clear_pt) 748 goto walk_pt; 749 750 if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { 751 xe_walk.default_vram_pte = xe_atomic_for_vram(vm, vma) ? XE_USM_PPGTT_PTE_AE : 0; 752 xe_walk.default_system_pte = xe_atomic_for_system(vm, vma) ? 753 XE_USM_PPGTT_PTE_AE : 0; 754 } 755 756 xe_walk.default_vram_pte |= XE_PPGTT_PTE_DM; 757 xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0; 758 if (!range) 759 xe_bo_assert_held(bo); 760 761 if (!xe_vma_is_null(vma) && !range) { 762 if (xe_vma_is_userptr(vma)) 763 xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, 764 xe_vma_size(vma), &curs); 765 else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) 766 xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), 767 xe_vma_size(vma), &curs); 768 else 769 xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma), 770 xe_vma_size(vma), &curs); 771 } else if (!range) { 772 curs.size = xe_vma_size(vma); 773 } 774 775 walk_pt: 776 ret = xe_pt_walk_range(&pt->base, pt->level, 777 range ? range->base.itree.start : xe_vma_start(vma), 778 range ? range->base.itree.last + 1 : xe_vma_end(vma), 779 &xe_walk.base); 780 781 *num_entries = xe_walk.wupd.num_used_entries; 782 return ret; 783 } 784 785 /** 786 * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a 787 * shared pagetable. 788 * @addr: The start address within the non-shared pagetable. 789 * @end: The end address within the non-shared pagetable. 790 * @level: The level of the non-shared pagetable. 791 * @walk: Walk info. The function adjusts the walk action. 792 * @action: next action to perform (see enum page_walk_action) 793 * @offset: Ignored on input, First non-shared entry on output. 794 * @end_offset: Ignored on input, Last non-shared entry + 1 on output. 795 * 796 * A non-shared page-table has some entries that belong to the address range 797 * and others that don't. This function determines the entries that belong 798 * fully to the address range. Depending on level, some entries may 799 * partially belong to the address range (that can't happen at level 0). 800 * The function detects that and adjust those offsets to not include those 801 * partial entries. Iff it does detect partial entries, we know that there must 802 * be shared page tables also at lower levels, so it adjusts the walk action 803 * accordingly. 804 * 805 * Return: true if there were non-shared entries, false otherwise. 806 */ 807 static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level, 808 struct xe_pt_walk *walk, 809 enum page_walk_action *action, 810 pgoff_t *offset, pgoff_t *end_offset) 811 { 812 u64 size = 1ull << walk->shifts[level]; 813 814 *offset = xe_pt_offset(addr, level, walk); 815 *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset; 816 817 if (!level) 818 return true; 819 820 /* 821 * If addr or next are not size aligned, there are shared pts at lower 822 * level, so in that case traverse down the subtree 823 */ 824 *action = ACTION_CONTINUE; 825 if (!IS_ALIGNED(addr, size)) { 826 *action = ACTION_SUBTREE; 827 (*offset)++; 828 } 829 830 if (!IS_ALIGNED(end, size)) { 831 *action = ACTION_SUBTREE; 832 (*end_offset)--; 833 } 834 835 return *end_offset > *offset; 836 } 837 838 struct xe_pt_zap_ptes_walk { 839 /** @base: The walk base-class */ 840 struct xe_pt_walk base; 841 842 /* Input parameters for the walk */ 843 /** @tile: The tile we're building for */ 844 struct xe_tile *tile; 845 846 /* Output */ 847 /** @needs_invalidate: Whether we need to invalidate TLB*/ 848 bool needs_invalidate; 849 }; 850 851 static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, 852 unsigned int level, u64 addr, u64 next, 853 struct xe_ptw **child, 854 enum page_walk_action *action, 855 struct xe_pt_walk *walk) 856 { 857 struct xe_pt_zap_ptes_walk *xe_walk = 858 container_of(walk, typeof(*xe_walk), base); 859 struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); 860 pgoff_t end_offset; 861 862 XE_WARN_ON(!*child); 863 XE_WARN_ON(!level); 864 865 /* 866 * Note that we're called from an entry callback, and we're dealing 867 * with the child of that entry rather than the parent, so need to 868 * adjust level down. 869 */ 870 if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset, 871 &end_offset)) { 872 xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap, 873 offset * sizeof(u64), 0, 874 (end_offset - offset) * sizeof(u64)); 875 xe_walk->needs_invalidate = true; 876 } 877 878 return 0; 879 } 880 881 static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { 882 .pt_entry = xe_pt_zap_ptes_entry, 883 }; 884 885 /** 886 * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range 887 * @tile: The tile we're zapping for. 888 * @vma: GPU VMA detailing address range. 889 * 890 * Eviction and Userptr invalidation needs to be able to zap the 891 * gpu ptes of a given address range in pagefaulting mode. 892 * In order to be able to do that, that function needs access to the shared 893 * page-table entrieaso it can either clear the leaf PTEs or 894 * clear the pointers to lower-level page-tables. The caller is required 895 * to hold the necessary locks to ensure neither the page-table connectivity 896 * nor the page-table entries of the range is updated from under us. 897 * 898 * Return: Whether ptes were actually updated and a TLB invalidation is 899 * required. 900 */ 901 bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) 902 { 903 struct xe_pt_zap_ptes_walk xe_walk = { 904 .base = { 905 .ops = &xe_pt_zap_ptes_ops, 906 .shifts = xe_normal_pt_shifts, 907 .max_level = XE_PT_HIGHEST_LEVEL, 908 }, 909 .tile = tile, 910 }; 911 struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; 912 u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated); 913 914 if (xe_vma_bo(vma)) 915 xe_bo_assert_held(xe_vma_bo(vma)); 916 else if (xe_vma_is_userptr(vma)) 917 lockdep_assert_held(&xe_vma_vm(vma)->userptr.notifier_lock); 918 919 if (!(pt_mask & BIT(tile->id))) 920 return false; 921 922 (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), 923 xe_vma_end(vma), &xe_walk.base); 924 925 return xe_walk.needs_invalidate; 926 } 927 928 /** 929 * xe_pt_zap_ptes_range() - Zap (zero) gpu ptes of a SVM range 930 * @tile: The tile we're zapping for. 931 * @vm: The VM we're zapping for. 932 * @range: The SVM range we're zapping for. 933 * 934 * SVM invalidation needs to be able to zap the gpu ptes of a given address 935 * range. In order to be able to do that, that function needs access to the 936 * shared page-table entries so it can either clear the leaf PTEs or 937 * clear the pointers to lower-level page-tables. The caller is required 938 * to hold the SVM notifier lock. 939 * 940 * Return: Whether ptes were actually updated and a TLB invalidation is 941 * required. 942 */ 943 bool xe_pt_zap_ptes_range(struct xe_tile *tile, struct xe_vm *vm, 944 struct xe_svm_range *range) 945 { 946 struct xe_pt_zap_ptes_walk xe_walk = { 947 .base = { 948 .ops = &xe_pt_zap_ptes_ops, 949 .shifts = xe_normal_pt_shifts, 950 .max_level = XE_PT_HIGHEST_LEVEL, 951 }, 952 .tile = tile, 953 }; 954 struct xe_pt *pt = vm->pt_root[tile->id]; 955 u8 pt_mask = (range->tile_present & ~range->tile_invalidated); 956 957 /* 958 * Locking rules: 959 * 960 * - notifier_lock (write): full protection against page table changes 961 * and MMU notifier invalidations. 962 * 963 * - notifier_lock (read) + vm_lock (write): combined protection against 964 * invalidations and concurrent page table modifications. (e.g., madvise) 965 * 966 */ 967 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || 968 (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && 969 lockdep_is_held_type(&vm->lock, 0))); 970 971 if (!(pt_mask & BIT(tile->id))) 972 return false; 973 974 (void)xe_pt_walk_shared(&pt->base, pt->level, range->base.itree.start, 975 range->base.itree.last + 1, &xe_walk.base); 976 977 return xe_walk.needs_invalidate; 978 } 979 980 static void 981 xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile, 982 struct iosys_map *map, void *data, 983 u32 qword_ofs, u32 num_qwords, 984 const struct xe_vm_pgtable_update *update) 985 { 986 struct xe_pt_entry *ptes = update->pt_entries; 987 u64 *ptr = data; 988 u32 i; 989 990 for (i = 0; i < num_qwords; i++) { 991 if (map) 992 xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * 993 sizeof(u64), u64, ptes[i].pte); 994 else 995 ptr[i] = ptes[i].pte; 996 } 997 } 998 999 static void xe_pt_cancel_bind(struct xe_vma *vma, 1000 struct xe_vm_pgtable_update *entries, 1001 u32 num_entries) 1002 { 1003 u32 i, j; 1004 1005 for (i = 0; i < num_entries; i++) { 1006 struct xe_pt *pt = entries[i].pt; 1007 1008 if (!pt) 1009 continue; 1010 1011 if (pt->level) { 1012 for (j = 0; j < entries[i].qwords; j++) 1013 xe_pt_destroy(entries[i].pt_entries[j].pt, 1014 xe_vma_vm(vma)->flags, NULL); 1015 } 1016 1017 kfree(entries[i].pt_entries); 1018 entries[i].pt_entries = NULL; 1019 entries[i].qwords = 0; 1020 } 1021 } 1022 1023 #define XE_INVALID_VMA ((struct xe_vma *)(0xdeaddeadull)) 1024 1025 static void xe_pt_commit_prepare_locks_assert(struct xe_vma *vma) 1026 { 1027 struct xe_vm *vm; 1028 1029 if (vma == XE_INVALID_VMA) 1030 return; 1031 1032 vm = xe_vma_vm(vma); 1033 lockdep_assert_held(&vm->lock); 1034 1035 if (!xe_vma_has_no_bo(vma)) 1036 dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); 1037 1038 xe_vm_assert_held(vm); 1039 } 1040 1041 static void xe_pt_commit_locks_assert(struct xe_vma *vma) 1042 { 1043 struct xe_vm *vm; 1044 1045 if (vma == XE_INVALID_VMA) 1046 return; 1047 1048 vm = xe_vma_vm(vma); 1049 xe_pt_commit_prepare_locks_assert(vma); 1050 1051 if (xe_vma_is_userptr(vma)) 1052 lockdep_assert_held_read(&vm->userptr.notifier_lock); 1053 } 1054 1055 static void xe_pt_commit(struct xe_vma *vma, 1056 struct xe_vm_pgtable_update *entries, 1057 u32 num_entries, struct llist_head *deferred) 1058 { 1059 u32 i, j; 1060 1061 xe_pt_commit_locks_assert(vma); 1062 1063 for (i = 0; i < num_entries; i++) { 1064 struct xe_pt *pt = entries[i].pt; 1065 struct xe_pt_dir *pt_dir; 1066 1067 if (!pt->level) 1068 continue; 1069 1070 pt_dir = as_xe_pt_dir(pt); 1071 for (j = 0; j < entries[i].qwords; j++) { 1072 struct xe_pt *oldpte = entries[i].pt_entries[j].pt; 1073 int j_ = j + entries[i].ofs; 1074 1075 pt_dir->children[j_] = pt_dir->staging[j_]; 1076 xe_pt_destroy(oldpte, (vma == XE_INVALID_VMA) ? 0 : 1077 xe_vma_vm(vma)->flags, deferred); 1078 } 1079 } 1080 } 1081 1082 static void xe_pt_abort_bind(struct xe_vma *vma, 1083 struct xe_vm_pgtable_update *entries, 1084 u32 num_entries, bool rebind) 1085 { 1086 int i, j; 1087 1088 xe_pt_commit_prepare_locks_assert(vma); 1089 1090 for (i = num_entries - 1; i >= 0; --i) { 1091 struct xe_pt *pt = entries[i].pt; 1092 struct xe_pt_dir *pt_dir; 1093 1094 if (!rebind) 1095 pt->num_live -= entries[i].qwords; 1096 1097 if (!pt->level) 1098 continue; 1099 1100 pt_dir = as_xe_pt_dir(pt); 1101 for (j = 0; j < entries[i].qwords; j++) { 1102 u32 j_ = j + entries[i].ofs; 1103 struct xe_pt *newpte = xe_pt_entry_staging(pt_dir, j_); 1104 struct xe_pt *oldpte = entries[i].pt_entries[j].pt; 1105 1106 pt_dir->staging[j_] = oldpte ? &oldpte->base : 0; 1107 xe_pt_destroy(newpte, xe_vma_vm(vma)->flags, NULL); 1108 } 1109 } 1110 } 1111 1112 static void xe_pt_commit_prepare_bind(struct xe_vma *vma, 1113 struct xe_vm_pgtable_update *entries, 1114 u32 num_entries, bool rebind) 1115 { 1116 u32 i, j; 1117 1118 xe_pt_commit_prepare_locks_assert(vma); 1119 1120 for (i = 0; i < num_entries; i++) { 1121 struct xe_pt *pt = entries[i].pt; 1122 struct xe_pt_dir *pt_dir; 1123 1124 if (!rebind) 1125 pt->num_live += entries[i].qwords; 1126 1127 if (!pt->level) 1128 continue; 1129 1130 pt_dir = as_xe_pt_dir(pt); 1131 for (j = 0; j < entries[i].qwords; j++) { 1132 u32 j_ = j + entries[i].ofs; 1133 struct xe_pt *newpte = entries[i].pt_entries[j].pt; 1134 struct xe_pt *oldpte = NULL; 1135 1136 if (xe_pt_entry_staging(pt_dir, j_)) 1137 oldpte = xe_pt_entry_staging(pt_dir, j_); 1138 1139 pt_dir->staging[j_] = &newpte->base; 1140 entries[i].pt_entries[j].pt = oldpte; 1141 } 1142 } 1143 } 1144 1145 static void xe_pt_free_bind(struct xe_vm_pgtable_update *entries, 1146 u32 num_entries) 1147 { 1148 u32 i; 1149 1150 for (i = 0; i < num_entries; i++) 1151 kfree(entries[i].pt_entries); 1152 } 1153 1154 static int 1155 xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, 1156 struct xe_svm_range *range, 1157 struct xe_vm_pgtable_update *entries, 1158 u32 *num_entries, bool invalidate_on_bind) 1159 { 1160 int err; 1161 1162 *num_entries = 0; 1163 err = xe_pt_stage_bind(tile, vma, range, entries, num_entries, 1164 invalidate_on_bind); 1165 if (!err) 1166 xe_tile_assert(tile, *num_entries); 1167 1168 return err; 1169 } 1170 1171 static void xe_vm_dbg_print_entries(struct xe_device *xe, 1172 const struct xe_vm_pgtable_update *entries, 1173 unsigned int num_entries, bool bind) 1174 #if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) 1175 { 1176 unsigned int i; 1177 1178 vm_dbg(&xe->drm, "%s: %u entries to update\n", bind ? "bind" : "unbind", 1179 num_entries); 1180 for (i = 0; i < num_entries; i++) { 1181 const struct xe_vm_pgtable_update *entry = &entries[i]; 1182 struct xe_pt *xe_pt = entry->pt; 1183 u64 page_size = 1ull << xe_pt_shift(xe_pt->level); 1184 u64 end; 1185 u64 start; 1186 1187 xe_assert(xe, !entry->pt->is_compact); 1188 start = entry->ofs * page_size; 1189 end = start + page_size * entry->qwords; 1190 vm_dbg(&xe->drm, 1191 "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n", 1192 i, xe_pt->level, entry->ofs, entry->qwords, 1193 xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0); 1194 } 1195 } 1196 #else 1197 {} 1198 #endif 1199 1200 static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs) 1201 { 1202 int i; 1203 1204 for (i = 0; i < num_syncs; i++) { 1205 struct dma_fence *fence = syncs[i].fence; 1206 1207 if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 1208 &fence->flags)) 1209 return false; 1210 } 1211 1212 return true; 1213 } 1214 1215 static int job_test_add_deps(struct xe_sched_job *job, 1216 struct dma_resv *resv, 1217 enum dma_resv_usage usage) 1218 { 1219 if (!job) { 1220 if (!dma_resv_test_signaled(resv, usage)) 1221 return -ETIME; 1222 1223 return 0; 1224 } 1225 1226 return xe_sched_job_add_deps(job, resv, usage); 1227 } 1228 1229 static int vma_add_deps(struct xe_vma *vma, struct xe_sched_job *job) 1230 { 1231 struct xe_bo *bo = xe_vma_bo(vma); 1232 1233 xe_bo_assert_held(bo); 1234 1235 if (bo && !bo->vm) 1236 return job_test_add_deps(job, bo->ttm.base.resv, 1237 DMA_RESV_USAGE_KERNEL); 1238 1239 return 0; 1240 } 1241 1242 static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op, 1243 struct xe_sched_job *job) 1244 { 1245 int err = 0; 1246 1247 /* 1248 * No need to check for is_cpu_addr_mirror here as vma_add_deps is a 1249 * NOP if VMA is_cpu_addr_mirror 1250 */ 1251 1252 switch (op->base.op) { 1253 case DRM_GPUVA_OP_MAP: 1254 if (!op->map.immediate && xe_vm_in_fault_mode(vm)) 1255 break; 1256 1257 err = vma_add_deps(op->map.vma, job); 1258 break; 1259 case DRM_GPUVA_OP_REMAP: 1260 if (op->remap.prev) 1261 err = vma_add_deps(op->remap.prev, job); 1262 if (!err && op->remap.next) 1263 err = vma_add_deps(op->remap.next, job); 1264 break; 1265 case DRM_GPUVA_OP_UNMAP: 1266 break; 1267 case DRM_GPUVA_OP_PREFETCH: 1268 err = vma_add_deps(gpuva_to_vma(op->base.prefetch.va), job); 1269 break; 1270 case DRM_GPUVA_OP_DRIVER: 1271 break; 1272 default: 1273 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 1274 } 1275 1276 return err; 1277 } 1278 1279 static int xe_pt_vm_dependencies(struct xe_sched_job *job, 1280 struct xe_tlb_inval_job *ijob, 1281 struct xe_tlb_inval_job *mjob, 1282 struct xe_vm *vm, 1283 struct xe_vma_ops *vops, 1284 struct xe_vm_pgtable_update_ops *pt_update_ops, 1285 struct xe_range_fence_tree *rftree) 1286 { 1287 struct xe_range_fence *rtfence; 1288 struct dma_fence *fence; 1289 struct xe_vma_op *op; 1290 int err = 0, i; 1291 1292 xe_vm_assert_held(vm); 1293 1294 if (!job && !no_in_syncs(vops->syncs, vops->num_syncs)) 1295 return -ETIME; 1296 1297 if (!job && !xe_exec_queue_is_idle(pt_update_ops->q)) 1298 return -ETIME; 1299 1300 if (pt_update_ops->wait_vm_bookkeep || pt_update_ops->wait_vm_kernel) { 1301 err = job_test_add_deps(job, xe_vm_resv(vm), 1302 pt_update_ops->wait_vm_bookkeep ? 1303 DMA_RESV_USAGE_BOOKKEEP : 1304 DMA_RESV_USAGE_KERNEL); 1305 if (err) 1306 return err; 1307 } 1308 1309 rtfence = xe_range_fence_tree_first(rftree, pt_update_ops->start, 1310 pt_update_ops->last); 1311 while (rtfence) { 1312 fence = rtfence->fence; 1313 1314 if (!dma_fence_is_signaled(fence)) { 1315 /* 1316 * Is this a CPU update? GPU is busy updating, so return 1317 * an error 1318 */ 1319 if (!job) 1320 return -ETIME; 1321 1322 dma_fence_get(fence); 1323 err = drm_sched_job_add_dependency(&job->drm, fence); 1324 if (err) 1325 return err; 1326 } 1327 1328 rtfence = xe_range_fence_tree_next(rtfence, 1329 pt_update_ops->start, 1330 pt_update_ops->last); 1331 } 1332 1333 list_for_each_entry(op, &vops->list, link) { 1334 err = op_add_deps(vm, op, job); 1335 if (err) 1336 return err; 1337 } 1338 1339 if (!(pt_update_ops->q->flags & EXEC_QUEUE_FLAG_KERNEL)) { 1340 if (job) 1341 err = xe_sched_job_last_fence_add_dep(job, vm); 1342 else 1343 err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); 1344 } 1345 1346 for (i = 0; job && !err && i < vops->num_syncs; i++) 1347 err = xe_sync_entry_add_deps(&vops->syncs[i], job); 1348 1349 if (job) { 1350 if (ijob) { 1351 err = xe_tlb_inval_job_alloc_dep(ijob); 1352 if (err) 1353 return err; 1354 } 1355 1356 if (mjob) { 1357 err = xe_tlb_inval_job_alloc_dep(mjob); 1358 if (err) 1359 return err; 1360 } 1361 } 1362 1363 return err; 1364 } 1365 1366 static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) 1367 { 1368 struct xe_vma_ops *vops = pt_update->vops; 1369 struct xe_vm *vm = vops->vm; 1370 struct xe_range_fence_tree *rftree = &vm->rftree[pt_update->tile_id]; 1371 struct xe_vm_pgtable_update_ops *pt_update_ops = 1372 &vops->pt_update_ops[pt_update->tile_id]; 1373 1374 return xe_pt_vm_dependencies(pt_update->job, pt_update->ijob, 1375 pt_update->mjob, vm, pt_update->vops, 1376 pt_update_ops, rftree); 1377 } 1378 1379 #ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT 1380 1381 static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) 1382 { 1383 u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2; 1384 static u32 count; 1385 1386 if (count++ % divisor == divisor - 1) { 1387 uvma->userptr.divisor = divisor << 1; 1388 return true; 1389 } 1390 1391 return false; 1392 } 1393 1394 #else 1395 1396 static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) 1397 { 1398 return false; 1399 } 1400 1401 #endif 1402 1403 static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, 1404 struct xe_vm_pgtable_update_ops *pt_update) 1405 { 1406 struct xe_userptr_vma *uvma; 1407 unsigned long notifier_seq; 1408 1409 lockdep_assert_held_read(&vm->userptr.notifier_lock); 1410 1411 if (!xe_vma_is_userptr(vma)) 1412 return 0; 1413 1414 uvma = to_userptr_vma(vma); 1415 if (xe_pt_userptr_inject_eagain(uvma)) 1416 xe_vma_userptr_force_invalidate(uvma); 1417 1418 notifier_seq = uvma->userptr.notifier_seq; 1419 1420 if (!mmu_interval_read_retry(&uvma->userptr.notifier, 1421 notifier_seq)) 1422 return 0; 1423 1424 if (xe_vm_in_fault_mode(vm)) 1425 return -EAGAIN; 1426 1427 /* 1428 * Just continue the operation since exec or rebind worker 1429 * will take care of rebinding. 1430 */ 1431 return 0; 1432 } 1433 1434 static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op, 1435 struct xe_vm_pgtable_update_ops *pt_update) 1436 { 1437 int err = 0; 1438 1439 lockdep_assert_held_read(&vm->userptr.notifier_lock); 1440 1441 switch (op->base.op) { 1442 case DRM_GPUVA_OP_MAP: 1443 if (!op->map.immediate && xe_vm_in_fault_mode(vm)) 1444 break; 1445 1446 err = vma_check_userptr(vm, op->map.vma, pt_update); 1447 break; 1448 case DRM_GPUVA_OP_REMAP: 1449 if (op->remap.prev) 1450 err = vma_check_userptr(vm, op->remap.prev, pt_update); 1451 if (!err && op->remap.next) 1452 err = vma_check_userptr(vm, op->remap.next, pt_update); 1453 break; 1454 case DRM_GPUVA_OP_UNMAP: 1455 break; 1456 case DRM_GPUVA_OP_PREFETCH: 1457 err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), 1458 pt_update); 1459 break; 1460 default: 1461 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 1462 } 1463 1464 return err; 1465 } 1466 1467 static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) 1468 { 1469 struct xe_vm *vm = pt_update->vops->vm; 1470 struct xe_vma_ops *vops = pt_update->vops; 1471 struct xe_vm_pgtable_update_ops *pt_update_ops = 1472 &vops->pt_update_ops[pt_update->tile_id]; 1473 struct xe_vma_op *op; 1474 int err; 1475 1476 err = xe_pt_pre_commit(pt_update); 1477 if (err) 1478 return err; 1479 1480 down_read(&vm->userptr.notifier_lock); 1481 1482 list_for_each_entry(op, &vops->list, link) { 1483 err = op_check_userptr(vm, op, pt_update_ops); 1484 if (err) { 1485 up_read(&vm->userptr.notifier_lock); 1486 break; 1487 } 1488 } 1489 1490 return err; 1491 } 1492 1493 #if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) 1494 static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update) 1495 { 1496 struct xe_vm *vm = pt_update->vops->vm; 1497 struct xe_vma_ops *vops = pt_update->vops; 1498 struct xe_vma_op *op; 1499 unsigned long i; 1500 int err; 1501 1502 err = xe_pt_pre_commit(pt_update); 1503 if (err) 1504 return err; 1505 1506 xe_svm_notifier_lock(vm); 1507 1508 list_for_each_entry(op, &vops->list, link) { 1509 struct xe_svm_range *range = NULL; 1510 1511 if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) 1512 continue; 1513 1514 if (op->base.op == DRM_GPUVA_OP_PREFETCH) { 1515 xe_assert(vm->xe, 1516 xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va))); 1517 xa_for_each(&op->prefetch_range.range, i, range) { 1518 xe_svm_range_debug(range, "PRE-COMMIT"); 1519 1520 if (!xe_svm_range_pages_valid(range)) { 1521 xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); 1522 xe_svm_notifier_unlock(vm); 1523 return -ENODATA; 1524 } 1525 } 1526 } else { 1527 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma)); 1528 xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE); 1529 range = op->map_range.range; 1530 1531 xe_svm_range_debug(range, "PRE-COMMIT"); 1532 1533 if (!xe_svm_range_pages_valid(range)) { 1534 xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); 1535 xe_svm_notifier_unlock(vm); 1536 return -EAGAIN; 1537 } 1538 } 1539 } 1540 1541 return 0; 1542 } 1543 #endif 1544 1545 struct xe_pt_stage_unbind_walk { 1546 /** @base: The pagewalk base-class. */ 1547 struct xe_pt_walk base; 1548 1549 /* Input parameters for the walk */ 1550 /** @tile: The tile we're unbinding from. */ 1551 struct xe_tile *tile; 1552 1553 /** 1554 * @modified_start: Walk range start, modified to include any 1555 * shared pagetables that we're the only user of and can thus 1556 * treat as private. 1557 */ 1558 u64 modified_start; 1559 /** @modified_end: Walk range start, modified like @modified_start. */ 1560 u64 modified_end; 1561 1562 /* Output */ 1563 /* @wupd: Structure to track the page-table updates we're building */ 1564 struct xe_walk_update wupd; 1565 }; 1566 1567 /* 1568 * Check whether this range is the only one populating this pagetable, 1569 * and in that case, update the walk range checks so that higher levels don't 1570 * view us as a shared pagetable. 1571 */ 1572 static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, 1573 const struct xe_pt *child, 1574 enum page_walk_action *action, 1575 struct xe_pt_walk *walk) 1576 { 1577 struct xe_pt_stage_unbind_walk *xe_walk = 1578 container_of(walk, typeof(*xe_walk), base); 1579 unsigned int shift = walk->shifts[level]; 1580 u64 size = 1ull << shift; 1581 1582 if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) && 1583 ((next - addr) >> shift) == child->num_live) { 1584 u64 size = 1ull << walk->shifts[level + 1]; 1585 1586 *action = ACTION_CONTINUE; 1587 1588 if (xe_walk->modified_start >= addr) 1589 xe_walk->modified_start = round_down(addr, size); 1590 if (xe_walk->modified_end <= next) 1591 xe_walk->modified_end = round_up(next, size); 1592 1593 return true; 1594 } 1595 1596 return false; 1597 } 1598 1599 static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, 1600 unsigned int level, u64 addr, u64 next, 1601 struct xe_ptw **child, 1602 enum page_walk_action *action, 1603 struct xe_pt_walk *walk) 1604 { 1605 struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); 1606 1607 XE_WARN_ON(!*child); 1608 XE_WARN_ON(!level); 1609 1610 xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); 1611 1612 return 0; 1613 } 1614 1615 static int 1616 xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, 1617 unsigned int level, u64 addr, u64 next, 1618 struct xe_ptw **child, 1619 enum page_walk_action *action, 1620 struct xe_pt_walk *walk) 1621 { 1622 struct xe_pt_stage_unbind_walk *xe_walk = 1623 container_of(walk, typeof(*xe_walk), base); 1624 struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); 1625 pgoff_t end_offset; 1626 u64 size = 1ull << walk->shifts[--level]; 1627 int err; 1628 1629 if (!IS_ALIGNED(addr, size)) 1630 addr = xe_walk->modified_start; 1631 if (!IS_ALIGNED(next, size)) 1632 next = xe_walk->modified_end; 1633 1634 /* Parent == *child is the root pt. Don't kill it. */ 1635 if (parent != *child && 1636 xe_pt_check_kill(addr, next, level, xe_child, action, walk)) 1637 return 0; 1638 1639 if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset, 1640 &end_offset)) 1641 return 0; 1642 1643 err = xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, true); 1644 if (err) 1645 return err; 1646 1647 xe_walk->wupd.updates[level].update->qwords = end_offset - offset; 1648 1649 return 0; 1650 } 1651 1652 static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { 1653 .pt_entry = xe_pt_stage_unbind_entry, 1654 .pt_post_descend = xe_pt_stage_unbind_post_descend, 1655 }; 1656 1657 /** 1658 * xe_pt_stage_unbind() - Build page-table update structures for an unbind 1659 * operation 1660 * @tile: The tile we're unbinding for. 1661 * @vm: The vm 1662 * @vma: The vma we're unbinding. 1663 * @range: The range we're unbinding. 1664 * @entries: Caller-provided storage for the update structures. 1665 * 1666 * Builds page-table update structures for an unbind operation. The function 1667 * will attempt to remove all page-tables that we're the only user 1668 * of, and for that to work, the unbind operation must be committed in the 1669 * same critical section that blocks racing binds to the same page-table tree. 1670 * 1671 * Return: The number of entries used. 1672 */ 1673 static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, 1674 struct xe_vm *vm, 1675 struct xe_vma *vma, 1676 struct xe_svm_range *range, 1677 struct xe_vm_pgtable_update *entries) 1678 { 1679 u64 start = range ? range->base.itree.start : xe_vma_start(vma); 1680 u64 end = range ? range->base.itree.last + 1 : xe_vma_end(vma); 1681 struct xe_pt_stage_unbind_walk xe_walk = { 1682 .base = { 1683 .ops = &xe_pt_stage_unbind_ops, 1684 .shifts = xe_normal_pt_shifts, 1685 .max_level = XE_PT_HIGHEST_LEVEL, 1686 .staging = true, 1687 }, 1688 .tile = tile, 1689 .modified_start = start, 1690 .modified_end = end, 1691 .wupd.entries = entries, 1692 }; 1693 struct xe_pt *pt = vm->pt_root[tile->id]; 1694 1695 (void)xe_pt_walk_shared(&pt->base, pt->level, start, end, 1696 &xe_walk.base); 1697 1698 return xe_walk.wupd.num_used_entries; 1699 } 1700 1701 static void 1702 xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, 1703 struct xe_tile *tile, struct iosys_map *map, 1704 void *ptr, u32 qword_ofs, u32 num_qwords, 1705 const struct xe_vm_pgtable_update *update) 1706 { 1707 struct xe_vm *vm = pt_update->vops->vm; 1708 u64 empty = __xe_pt_empty_pte(tile, vm, update->pt->level); 1709 int i; 1710 1711 if (map && map->is_iomem) 1712 for (i = 0; i < num_qwords; ++i) 1713 xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * 1714 sizeof(u64), u64, empty); 1715 else if (map) 1716 memset64(map->vaddr + qword_ofs * sizeof(u64), empty, 1717 num_qwords); 1718 else 1719 memset64(ptr, empty, num_qwords); 1720 } 1721 1722 static void xe_pt_abort_unbind(struct xe_vma *vma, 1723 struct xe_vm_pgtable_update *entries, 1724 u32 num_entries) 1725 { 1726 int i, j; 1727 1728 xe_pt_commit_prepare_locks_assert(vma); 1729 1730 for (i = num_entries - 1; i >= 0; --i) { 1731 struct xe_vm_pgtable_update *entry = &entries[i]; 1732 struct xe_pt *pt = entry->pt; 1733 struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); 1734 1735 pt->num_live += entry->qwords; 1736 1737 if (!pt->level) 1738 continue; 1739 1740 for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) 1741 pt_dir->staging[j] = 1742 entries[i].pt_entries[j - entry->ofs].pt ? 1743 &entries[i].pt_entries[j - entry->ofs].pt->base : NULL; 1744 } 1745 } 1746 1747 static void 1748 xe_pt_commit_prepare_unbind(struct xe_vma *vma, 1749 struct xe_vm_pgtable_update *entries, 1750 u32 num_entries) 1751 { 1752 int i, j; 1753 1754 xe_pt_commit_prepare_locks_assert(vma); 1755 1756 for (i = 0; i < num_entries; ++i) { 1757 struct xe_vm_pgtable_update *entry = &entries[i]; 1758 struct xe_pt *pt = entry->pt; 1759 struct xe_pt_dir *pt_dir; 1760 1761 pt->num_live -= entry->qwords; 1762 if (!pt->level) 1763 continue; 1764 1765 pt_dir = as_xe_pt_dir(pt); 1766 for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) { 1767 entry->pt_entries[j - entry->ofs].pt = 1768 xe_pt_entry_staging(pt_dir, j); 1769 pt_dir->staging[j] = NULL; 1770 } 1771 } 1772 } 1773 1774 static void 1775 xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops, 1776 u64 start, u64 end) 1777 { 1778 u64 last; 1779 u32 current_op = pt_update_ops->current_op; 1780 struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; 1781 int i, level = 0; 1782 1783 for (i = 0; i < pt_op->num_entries; i++) { 1784 const struct xe_vm_pgtable_update *entry = &pt_op->entries[i]; 1785 1786 if (entry->pt->level > level) 1787 level = entry->pt->level; 1788 } 1789 1790 /* Greedy (non-optimal) calculation but simple */ 1791 start = ALIGN_DOWN(start, 0x1ull << xe_pt_shift(level)); 1792 last = ALIGN(end, 0x1ull << xe_pt_shift(level)) - 1; 1793 1794 if (start < pt_update_ops->start) 1795 pt_update_ops->start = start; 1796 if (last > pt_update_ops->last) 1797 pt_update_ops->last = last; 1798 } 1799 1800 static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma) 1801 { 1802 int shift = xe_device_get_root_tile(xe)->media_gt ? 1 : 0; 1803 1804 if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) 1805 return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1806 xe->info.tile_count << shift); 1807 1808 return 0; 1809 } 1810 1811 static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, 1812 struct xe_vm_pgtable_update_ops *pt_update_ops, 1813 struct xe_vma *vma, bool invalidate_on_bind) 1814 { 1815 u32 current_op = pt_update_ops->current_op; 1816 struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; 1817 int err; 1818 1819 xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma)); 1820 xe_bo_assert_held(xe_vma_bo(vma)); 1821 1822 vm_dbg(&xe_vma_vm(vma)->xe->drm, 1823 "Preparing bind, with range [%llx...%llx)\n", 1824 xe_vma_start(vma), xe_vma_end(vma) - 1); 1825 1826 pt_op->vma = NULL; 1827 pt_op->bind = true; 1828 pt_op->rebind = BIT(tile->id) & vma->tile_present; 1829 1830 err = vma_reserve_fences(tile_to_xe(tile), vma); 1831 if (err) 1832 return err; 1833 1834 err = xe_pt_prepare_bind(tile, vma, NULL, pt_op->entries, 1835 &pt_op->num_entries, invalidate_on_bind); 1836 if (!err) { 1837 xe_tile_assert(tile, pt_op->num_entries <= 1838 ARRAY_SIZE(pt_op->entries)); 1839 xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, 1840 pt_op->num_entries, true); 1841 1842 xe_pt_update_ops_rfence_interval(pt_update_ops, 1843 xe_vma_start(vma), 1844 xe_vma_end(vma)); 1845 ++pt_update_ops->current_op; 1846 pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); 1847 1848 /* 1849 * If rebind, we have to invalidate TLB on !LR vms to invalidate 1850 * cached PTEs point to freed memory. On LR vms this is done 1851 * automatically when the context is re-enabled by the rebind worker, 1852 * or in fault mode it was invalidated on PTE zapping. 1853 * 1854 * If !rebind, and scratch enabled VMs, there is a chance the scratch 1855 * PTE is already cached in the TLB so it needs to be invalidated. 1856 * On !LR VMs this is done in the ring ops preceding a batch, but on 1857 * LR, in particular on user-space batch buffer chaining, it needs to 1858 * be done here. 1859 */ 1860 if ((!pt_op->rebind && xe_vm_has_scratch(vm) && 1861 xe_vm_in_lr_mode(vm))) 1862 pt_update_ops->needs_invalidation = true; 1863 else if (pt_op->rebind && !xe_vm_in_lr_mode(vm)) 1864 /* We bump also if batch_invalidate_tlb is true */ 1865 vm->tlb_flush_seqno++; 1866 1867 vma->tile_staged |= BIT(tile->id); 1868 pt_op->vma = vma; 1869 xe_pt_commit_prepare_bind(vma, pt_op->entries, 1870 pt_op->num_entries, pt_op->rebind); 1871 } else { 1872 xe_pt_cancel_bind(vma, pt_op->entries, pt_op->num_entries); 1873 } 1874 1875 return err; 1876 } 1877 1878 static int bind_range_prepare(struct xe_vm *vm, struct xe_tile *tile, 1879 struct xe_vm_pgtable_update_ops *pt_update_ops, 1880 struct xe_vma *vma, struct xe_svm_range *range) 1881 { 1882 u32 current_op = pt_update_ops->current_op; 1883 struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; 1884 int err; 1885 1886 xe_tile_assert(tile, xe_vma_is_cpu_addr_mirror(vma)); 1887 1888 vm_dbg(&xe_vma_vm(vma)->xe->drm, 1889 "Preparing bind, with range [%lx...%lx)\n", 1890 range->base.itree.start, range->base.itree.last); 1891 1892 pt_op->vma = NULL; 1893 pt_op->bind = true; 1894 pt_op->rebind = BIT(tile->id) & range->tile_present; 1895 1896 err = xe_pt_prepare_bind(tile, vma, range, pt_op->entries, 1897 &pt_op->num_entries, false); 1898 if (!err) { 1899 xe_tile_assert(tile, pt_op->num_entries <= 1900 ARRAY_SIZE(pt_op->entries)); 1901 xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, 1902 pt_op->num_entries, true); 1903 1904 xe_pt_update_ops_rfence_interval(pt_update_ops, 1905 range->base.itree.start, 1906 range->base.itree.last + 1); 1907 ++pt_update_ops->current_op; 1908 pt_update_ops->needs_svm_lock = true; 1909 1910 pt_op->vma = vma; 1911 xe_pt_commit_prepare_bind(vma, pt_op->entries, 1912 pt_op->num_entries, pt_op->rebind); 1913 } else { 1914 xe_pt_cancel_bind(vma, pt_op->entries, pt_op->num_entries); 1915 } 1916 1917 return err; 1918 } 1919 1920 static int unbind_op_prepare(struct xe_tile *tile, 1921 struct xe_vm_pgtable_update_ops *pt_update_ops, 1922 struct xe_vma *vma) 1923 { 1924 u32 current_op = pt_update_ops->current_op; 1925 struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; 1926 int err; 1927 1928 if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id))) 1929 return 0; 1930 1931 xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma)); 1932 xe_bo_assert_held(xe_vma_bo(vma)); 1933 1934 vm_dbg(&xe_vma_vm(vma)->xe->drm, 1935 "Preparing unbind, with range [%llx...%llx)\n", 1936 xe_vma_start(vma), xe_vma_end(vma) - 1); 1937 1938 pt_op->vma = vma; 1939 pt_op->bind = false; 1940 pt_op->rebind = false; 1941 1942 err = vma_reserve_fences(tile_to_xe(tile), vma); 1943 if (err) 1944 return err; 1945 1946 pt_op->num_entries = xe_pt_stage_unbind(tile, xe_vma_vm(vma), 1947 vma, NULL, pt_op->entries); 1948 1949 xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, 1950 pt_op->num_entries, false); 1951 xe_pt_update_ops_rfence_interval(pt_update_ops, xe_vma_start(vma), 1952 xe_vma_end(vma)); 1953 ++pt_update_ops->current_op; 1954 pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); 1955 pt_update_ops->needs_invalidation = true; 1956 1957 xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries); 1958 1959 return 0; 1960 } 1961 1962 static bool 1963 xe_pt_op_check_range_skip_invalidation(struct xe_vm_pgtable_update_op *pt_op, 1964 struct xe_svm_range *range) 1965 { 1966 struct xe_vm_pgtable_update *update = pt_op->entries; 1967 1968 XE_WARN_ON(!pt_op->num_entries); 1969 1970 /* 1971 * We can't skip the invalidation if we are removing PTEs that span more 1972 * than the range, do some checks to ensure we are removing PTEs that 1973 * are invalid. 1974 */ 1975 1976 if (pt_op->num_entries > 1) 1977 return false; 1978 1979 if (update->pt->level == 0) 1980 return true; 1981 1982 if (update->pt->level == 1) 1983 return xe_svm_range_size(range) >= SZ_2M; 1984 1985 return false; 1986 } 1987 1988 static int unbind_range_prepare(struct xe_vm *vm, 1989 struct xe_tile *tile, 1990 struct xe_vm_pgtable_update_ops *pt_update_ops, 1991 struct xe_svm_range *range) 1992 { 1993 u32 current_op = pt_update_ops->current_op; 1994 struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; 1995 1996 if (!(range->tile_present & BIT(tile->id))) 1997 return 0; 1998 1999 vm_dbg(&vm->xe->drm, 2000 "Preparing unbind, with range [%lx...%lx)\n", 2001 range->base.itree.start, range->base.itree.last); 2002 2003 pt_op->vma = XE_INVALID_VMA; 2004 pt_op->bind = false; 2005 pt_op->rebind = false; 2006 2007 pt_op->num_entries = xe_pt_stage_unbind(tile, vm, NULL, range, 2008 pt_op->entries); 2009 2010 xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, 2011 pt_op->num_entries, false); 2012 xe_pt_update_ops_rfence_interval(pt_update_ops, range->base.itree.start, 2013 range->base.itree.last + 1); 2014 ++pt_update_ops->current_op; 2015 pt_update_ops->needs_svm_lock = true; 2016 pt_update_ops->needs_invalidation |= xe_vm_has_scratch(vm) || 2017 xe_vm_has_valid_gpu_mapping(tile, range->tile_present, 2018 range->tile_invalidated) || 2019 !xe_pt_op_check_range_skip_invalidation(pt_op, range); 2020 2021 xe_pt_commit_prepare_unbind(XE_INVALID_VMA, pt_op->entries, 2022 pt_op->num_entries); 2023 2024 return 0; 2025 } 2026 2027 static int op_prepare(struct xe_vm *vm, 2028 struct xe_tile *tile, 2029 struct xe_vm_pgtable_update_ops *pt_update_ops, 2030 struct xe_vma_op *op) 2031 { 2032 int err = 0; 2033 2034 xe_vm_assert_held(vm); 2035 2036 switch (op->base.op) { 2037 case DRM_GPUVA_OP_MAP: 2038 if ((!op->map.immediate && xe_vm_in_fault_mode(vm) && 2039 !op->map.invalidate_on_bind) || 2040 op->map.is_cpu_addr_mirror) 2041 break; 2042 2043 err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma, 2044 op->map.invalidate_on_bind); 2045 pt_update_ops->wait_vm_kernel = true; 2046 break; 2047 case DRM_GPUVA_OP_REMAP: 2048 { 2049 struct xe_vma *old = gpuva_to_vma(op->base.remap.unmap->va); 2050 2051 if (xe_vma_is_cpu_addr_mirror(old)) 2052 break; 2053 2054 err = unbind_op_prepare(tile, pt_update_ops, old); 2055 2056 if (!err && op->remap.prev) { 2057 err = bind_op_prepare(vm, tile, pt_update_ops, 2058 op->remap.prev, false); 2059 pt_update_ops->wait_vm_bookkeep = true; 2060 } 2061 if (!err && op->remap.next) { 2062 err = bind_op_prepare(vm, tile, pt_update_ops, 2063 op->remap.next, false); 2064 pt_update_ops->wait_vm_bookkeep = true; 2065 } 2066 break; 2067 } 2068 case DRM_GPUVA_OP_UNMAP: 2069 { 2070 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2071 2072 if (xe_vma_is_cpu_addr_mirror(vma)) 2073 break; 2074 2075 err = unbind_op_prepare(tile, pt_update_ops, vma); 2076 break; 2077 } 2078 case DRM_GPUVA_OP_PREFETCH: 2079 { 2080 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2081 2082 if (xe_vma_is_cpu_addr_mirror(vma)) { 2083 struct xe_svm_range *range; 2084 unsigned long i; 2085 2086 xa_for_each(&op->prefetch_range.range, i, range) { 2087 err = bind_range_prepare(vm, tile, pt_update_ops, 2088 vma, range); 2089 if (err) 2090 return err; 2091 } 2092 } else { 2093 err = bind_op_prepare(vm, tile, pt_update_ops, vma, false); 2094 pt_update_ops->wait_vm_kernel = true; 2095 } 2096 break; 2097 } 2098 case DRM_GPUVA_OP_DRIVER: 2099 if (op->subop == XE_VMA_SUBOP_MAP_RANGE) { 2100 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma)); 2101 2102 err = bind_range_prepare(vm, tile, pt_update_ops, 2103 op->map_range.vma, 2104 op->map_range.range); 2105 } else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) { 2106 err = unbind_range_prepare(vm, tile, pt_update_ops, 2107 op->unmap_range.range); 2108 } 2109 break; 2110 default: 2111 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2112 } 2113 2114 return err; 2115 } 2116 2117 static void 2118 xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops) 2119 { 2120 init_llist_head(&pt_update_ops->deferred); 2121 pt_update_ops->start = ~0x0ull; 2122 pt_update_ops->last = 0x0ull; 2123 } 2124 2125 /** 2126 * xe_pt_update_ops_prepare() - Prepare PT update operations 2127 * @tile: Tile of PT update operations 2128 * @vops: VMA operationa 2129 * 2130 * Prepare PT update operations which includes updating internal PT state, 2131 * allocate memory for page tables, populate page table being pruned in, and 2132 * create PT update operations for leaf insertion / removal. 2133 * 2134 * Return: 0 on success, negative error code on error. 2135 */ 2136 int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) 2137 { 2138 struct xe_vm_pgtable_update_ops *pt_update_ops = 2139 &vops->pt_update_ops[tile->id]; 2140 struct xe_vma_op *op; 2141 int shift = tile->media_gt ? 1 : 0; 2142 int err; 2143 2144 lockdep_assert_held(&vops->vm->lock); 2145 xe_vm_assert_held(vops->vm); 2146 2147 xe_pt_update_ops_init(pt_update_ops); 2148 2149 err = dma_resv_reserve_fences(xe_vm_resv(vops->vm), 2150 tile_to_xe(tile)->info.tile_count << shift); 2151 if (err) 2152 return err; 2153 2154 list_for_each_entry(op, &vops->list, link) { 2155 err = op_prepare(vops->vm, tile, pt_update_ops, op); 2156 2157 if (err) 2158 return err; 2159 } 2160 2161 xe_tile_assert(tile, pt_update_ops->current_op <= 2162 pt_update_ops->num_ops); 2163 2164 #ifdef TEST_VM_OPS_ERROR 2165 if (vops->inject_error && 2166 vops->vm->xe->vm_inject_error_position == FORCE_OP_ERROR_PREPARE) 2167 return -ENOSPC; 2168 #endif 2169 2170 return 0; 2171 } 2172 ALLOW_ERROR_INJECTION(xe_pt_update_ops_prepare, ERRNO); 2173 2174 static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, 2175 struct xe_vm_pgtable_update_ops *pt_update_ops, 2176 struct xe_vma *vma, struct dma_fence *fence, 2177 struct dma_fence *fence2, bool invalidate_on_bind) 2178 { 2179 xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma)); 2180 2181 if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { 2182 dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, 2183 pt_update_ops->wait_vm_bookkeep ? 2184 DMA_RESV_USAGE_KERNEL : 2185 DMA_RESV_USAGE_BOOKKEEP); 2186 if (fence2) 2187 dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, 2188 pt_update_ops->wait_vm_bookkeep ? 2189 DMA_RESV_USAGE_KERNEL : 2190 DMA_RESV_USAGE_BOOKKEEP); 2191 } 2192 /* All WRITE_ONCE pair with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 2193 WRITE_ONCE(vma->tile_present, vma->tile_present | BIT(tile->id)); 2194 if (invalidate_on_bind) 2195 WRITE_ONCE(vma->tile_invalidated, 2196 vma->tile_invalidated | BIT(tile->id)); 2197 else 2198 WRITE_ONCE(vma->tile_invalidated, 2199 vma->tile_invalidated & ~BIT(tile->id)); 2200 vma->tile_staged &= ~BIT(tile->id); 2201 if (xe_vma_is_userptr(vma)) { 2202 lockdep_assert_held_read(&vm->userptr.notifier_lock); 2203 to_userptr_vma(vma)->userptr.initial_bind = true; 2204 } 2205 2206 /* 2207 * Kick rebind worker if this bind triggers preempt fences and not in 2208 * the rebind worker 2209 */ 2210 if (pt_update_ops->wait_vm_bookkeep && 2211 xe_vm_in_preempt_fence_mode(vm) && 2212 !current->mm) 2213 xe_vm_queue_rebind_worker(vm); 2214 } 2215 2216 static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, 2217 struct xe_vm_pgtable_update_ops *pt_update_ops, 2218 struct xe_vma *vma, struct dma_fence *fence, 2219 struct dma_fence *fence2) 2220 { 2221 xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma)); 2222 2223 if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { 2224 dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, 2225 pt_update_ops->wait_vm_bookkeep ? 2226 DMA_RESV_USAGE_KERNEL : 2227 DMA_RESV_USAGE_BOOKKEEP); 2228 if (fence2) 2229 dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, 2230 pt_update_ops->wait_vm_bookkeep ? 2231 DMA_RESV_USAGE_KERNEL : 2232 DMA_RESV_USAGE_BOOKKEEP); 2233 } 2234 vma->tile_present &= ~BIT(tile->id); 2235 if (!vma->tile_present) { 2236 list_del_init(&vma->combined_links.rebind); 2237 if (xe_vma_is_userptr(vma)) { 2238 lockdep_assert_held_read(&vm->userptr.notifier_lock); 2239 2240 spin_lock(&vm->userptr.invalidated_lock); 2241 list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); 2242 spin_unlock(&vm->userptr.invalidated_lock); 2243 } 2244 } 2245 } 2246 2247 static void range_present_and_invalidated_tile(struct xe_vm *vm, 2248 struct xe_svm_range *range, 2249 u8 tile_id) 2250 { 2251 /* All WRITE_ONCE pair with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 2252 2253 lockdep_assert_held(&vm->svm.gpusvm.notifier_lock); 2254 2255 WRITE_ONCE(range->tile_present, range->tile_present | BIT(tile_id)); 2256 WRITE_ONCE(range->tile_invalidated, range->tile_invalidated & ~BIT(tile_id)); 2257 } 2258 2259 static void op_commit(struct xe_vm *vm, 2260 struct xe_tile *tile, 2261 struct xe_vm_pgtable_update_ops *pt_update_ops, 2262 struct xe_vma_op *op, struct dma_fence *fence, 2263 struct dma_fence *fence2) 2264 { 2265 xe_vm_assert_held(vm); 2266 2267 switch (op->base.op) { 2268 case DRM_GPUVA_OP_MAP: 2269 if ((!op->map.immediate && xe_vm_in_fault_mode(vm)) || 2270 op->map.is_cpu_addr_mirror) 2271 break; 2272 2273 bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, 2274 fence2, op->map.invalidate_on_bind); 2275 break; 2276 case DRM_GPUVA_OP_REMAP: 2277 { 2278 struct xe_vma *old = gpuva_to_vma(op->base.remap.unmap->va); 2279 2280 if (xe_vma_is_cpu_addr_mirror(old)) 2281 break; 2282 2283 unbind_op_commit(vm, tile, pt_update_ops, old, fence, fence2); 2284 2285 if (op->remap.prev) 2286 bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, 2287 fence, fence2, false); 2288 if (op->remap.next) 2289 bind_op_commit(vm, tile, pt_update_ops, op->remap.next, 2290 fence, fence2, false); 2291 break; 2292 } 2293 case DRM_GPUVA_OP_UNMAP: 2294 { 2295 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2296 2297 if (!xe_vma_is_cpu_addr_mirror(vma)) 2298 unbind_op_commit(vm, tile, pt_update_ops, vma, fence, 2299 fence2); 2300 break; 2301 } 2302 case DRM_GPUVA_OP_PREFETCH: 2303 { 2304 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); 2305 2306 if (xe_vma_is_cpu_addr_mirror(vma)) { 2307 struct xe_svm_range *range = NULL; 2308 unsigned long i; 2309 2310 xa_for_each(&op->prefetch_range.range, i, range) 2311 range_present_and_invalidated_tile(vm, range, tile->id); 2312 } else { 2313 bind_op_commit(vm, tile, pt_update_ops, vma, fence, 2314 fence2, false); 2315 } 2316 break; 2317 } 2318 case DRM_GPUVA_OP_DRIVER: 2319 { 2320 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ 2321 if (op->subop == XE_VMA_SUBOP_MAP_RANGE) 2322 range_present_and_invalidated_tile(vm, op->map_range.range, tile->id); 2323 else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) 2324 WRITE_ONCE(op->unmap_range.range->tile_present, 2325 op->unmap_range.range->tile_present & 2326 ~BIT(tile->id)); 2327 2328 break; 2329 } 2330 default: 2331 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 2332 } 2333 } 2334 2335 static const struct xe_migrate_pt_update_ops migrate_ops = { 2336 .populate = xe_vm_populate_pgtable, 2337 .clear = xe_migrate_clear_pgtable_callback, 2338 .pre_commit = xe_pt_pre_commit, 2339 }; 2340 2341 static const struct xe_migrate_pt_update_ops userptr_migrate_ops = { 2342 .populate = xe_vm_populate_pgtable, 2343 .clear = xe_migrate_clear_pgtable_callback, 2344 .pre_commit = xe_pt_userptr_pre_commit, 2345 }; 2346 2347 #if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) 2348 static const struct xe_migrate_pt_update_ops svm_migrate_ops = { 2349 .populate = xe_vm_populate_pgtable, 2350 .clear = xe_migrate_clear_pgtable_callback, 2351 .pre_commit = xe_pt_svm_pre_commit, 2352 }; 2353 #else 2354 static const struct xe_migrate_pt_update_ops svm_migrate_ops; 2355 #endif 2356 2357 static struct xe_dep_scheduler *to_dep_scheduler(struct xe_exec_queue *q, 2358 struct xe_gt *gt) 2359 { 2360 if (xe_gt_is_media_type(gt)) 2361 return q->tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT].dep_scheduler; 2362 2363 return q->tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT].dep_scheduler; 2364 } 2365 2366 /** 2367 * xe_pt_update_ops_run() - Run PT update operations 2368 * @tile: Tile of PT update operations 2369 * @vops: VMA operationa 2370 * 2371 * Run PT update operations which includes committing internal PT state changes, 2372 * creating job for PT update operations for leaf insertion / removal, and 2373 * installing job fence in various places. 2374 * 2375 * Return: fence on success, negative ERR_PTR on error. 2376 */ 2377 struct dma_fence * 2378 xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) 2379 { 2380 struct xe_vm *vm = vops->vm; 2381 struct xe_vm_pgtable_update_ops *pt_update_ops = 2382 &vops->pt_update_ops[tile->id]; 2383 struct dma_fence *fence, *ifence, *mfence; 2384 struct xe_tlb_inval_job *ijob = NULL, *mjob = NULL; 2385 struct dma_fence **fences = NULL; 2386 struct dma_fence_array *cf = NULL; 2387 struct xe_range_fence *rfence; 2388 struct xe_vma_op *op; 2389 int err = 0, i; 2390 struct xe_migrate_pt_update update = { 2391 .ops = pt_update_ops->needs_svm_lock ? 2392 &svm_migrate_ops : 2393 pt_update_ops->needs_userptr_lock ? 2394 &userptr_migrate_ops : 2395 &migrate_ops, 2396 .vops = vops, 2397 .tile_id = tile->id, 2398 }; 2399 2400 lockdep_assert_held(&vm->lock); 2401 xe_vm_assert_held(vm); 2402 2403 if (!pt_update_ops->current_op) { 2404 xe_tile_assert(tile, xe_vm_in_fault_mode(vm)); 2405 2406 return dma_fence_get_stub(); 2407 } 2408 2409 #ifdef TEST_VM_OPS_ERROR 2410 if (vops->inject_error && 2411 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_RUN) 2412 return ERR_PTR(-ENOSPC); 2413 #endif 2414 2415 if (pt_update_ops->needs_invalidation) { 2416 struct xe_exec_queue *q = pt_update_ops->q; 2417 struct xe_dep_scheduler *dep_scheduler = 2418 to_dep_scheduler(q, tile->primary_gt); 2419 2420 ijob = xe_tlb_inval_job_create(q, &tile->primary_gt->tlb_inval, 2421 dep_scheduler, 2422 pt_update_ops->start, 2423 pt_update_ops->last, 2424 vm->usm.asid); 2425 if (IS_ERR(ijob)) { 2426 err = PTR_ERR(ijob); 2427 goto kill_vm_tile1; 2428 } 2429 update.ijob = ijob; 2430 2431 if (tile->media_gt) { 2432 dep_scheduler = to_dep_scheduler(q, tile->media_gt); 2433 2434 mjob = xe_tlb_inval_job_create(q, 2435 &tile->media_gt->tlb_inval, 2436 dep_scheduler, 2437 pt_update_ops->start, 2438 pt_update_ops->last, 2439 vm->usm.asid); 2440 if (IS_ERR(mjob)) { 2441 err = PTR_ERR(mjob); 2442 goto free_ijob; 2443 } 2444 update.mjob = mjob; 2445 2446 fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL); 2447 if (!fences) { 2448 err = -ENOMEM; 2449 goto free_ijob; 2450 } 2451 cf = dma_fence_array_alloc(2); 2452 if (!cf) { 2453 err = -ENOMEM; 2454 goto free_ijob; 2455 } 2456 } 2457 } 2458 2459 rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); 2460 if (!rfence) { 2461 err = -ENOMEM; 2462 goto free_ijob; 2463 } 2464 2465 fence = xe_migrate_update_pgtables(tile->migrate, &update); 2466 if (IS_ERR(fence)) { 2467 err = PTR_ERR(fence); 2468 goto free_rfence; 2469 } 2470 2471 /* Point of no return - VM killed if failure after this */ 2472 for (i = 0; i < pt_update_ops->current_op; ++i) { 2473 struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; 2474 2475 xe_pt_commit(pt_op->vma, pt_op->entries, 2476 pt_op->num_entries, &pt_update_ops->deferred); 2477 pt_op->vma = NULL; /* skip in xe_pt_update_ops_abort */ 2478 } 2479 2480 if (xe_range_fence_insert(&vm->rftree[tile->id], rfence, 2481 &xe_range_fence_kfree_ops, 2482 pt_update_ops->start, 2483 pt_update_ops->last, fence)) 2484 dma_fence_wait(fence, false); 2485 2486 /* tlb invalidation must be done before signaling unbind/rebind */ 2487 if (ijob) { 2488 struct dma_fence *__fence; 2489 2490 ifence = xe_tlb_inval_job_push(ijob, tile->migrate, fence); 2491 __fence = ifence; 2492 2493 if (mjob) { 2494 fences[0] = ifence; 2495 mfence = xe_tlb_inval_job_push(mjob, tile->migrate, 2496 fence); 2497 fences[1] = mfence; 2498 2499 dma_fence_array_init(cf, 2, fences, 2500 vm->composite_fence_ctx, 2501 vm->composite_fence_seqno++, 2502 false); 2503 __fence = &cf->base; 2504 } 2505 2506 dma_fence_put(fence); 2507 fence = __fence; 2508 } 2509 2510 if (!mjob) { 2511 dma_resv_add_fence(xe_vm_resv(vm), fence, 2512 pt_update_ops->wait_vm_bookkeep ? 2513 DMA_RESV_USAGE_KERNEL : 2514 DMA_RESV_USAGE_BOOKKEEP); 2515 2516 list_for_each_entry(op, &vops->list, link) 2517 op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); 2518 } else { 2519 dma_resv_add_fence(xe_vm_resv(vm), ifence, 2520 pt_update_ops->wait_vm_bookkeep ? 2521 DMA_RESV_USAGE_KERNEL : 2522 DMA_RESV_USAGE_BOOKKEEP); 2523 2524 dma_resv_add_fence(xe_vm_resv(vm), mfence, 2525 pt_update_ops->wait_vm_bookkeep ? 2526 DMA_RESV_USAGE_KERNEL : 2527 DMA_RESV_USAGE_BOOKKEEP); 2528 2529 list_for_each_entry(op, &vops->list, link) 2530 op_commit(vops->vm, tile, pt_update_ops, op, ifence, 2531 mfence); 2532 } 2533 2534 if (pt_update_ops->needs_svm_lock) 2535 xe_svm_notifier_unlock(vm); 2536 if (pt_update_ops->needs_userptr_lock) 2537 up_read(&vm->userptr.notifier_lock); 2538 2539 xe_tlb_inval_job_put(mjob); 2540 xe_tlb_inval_job_put(ijob); 2541 2542 return fence; 2543 2544 free_rfence: 2545 kfree(rfence); 2546 free_ijob: 2547 kfree(cf); 2548 kfree(fences); 2549 xe_tlb_inval_job_put(mjob); 2550 xe_tlb_inval_job_put(ijob); 2551 kill_vm_tile1: 2552 if (err != -EAGAIN && err != -ENODATA && tile->id) 2553 xe_vm_kill(vops->vm, false); 2554 2555 return ERR_PTR(err); 2556 } 2557 ALLOW_ERROR_INJECTION(xe_pt_update_ops_run, ERRNO); 2558 2559 /** 2560 * xe_pt_update_ops_fini() - Finish PT update operations 2561 * @tile: Tile of PT update operations 2562 * @vops: VMA operations 2563 * 2564 * Finish PT update operations by committing to destroy page table memory 2565 */ 2566 void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops) 2567 { 2568 struct xe_vm_pgtable_update_ops *pt_update_ops = 2569 &vops->pt_update_ops[tile->id]; 2570 int i; 2571 2572 lockdep_assert_held(&vops->vm->lock); 2573 xe_vm_assert_held(vops->vm); 2574 2575 for (i = 0; i < pt_update_ops->current_op; ++i) { 2576 struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; 2577 2578 xe_pt_free_bind(pt_op->entries, pt_op->num_entries); 2579 } 2580 xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); 2581 } 2582 2583 /** 2584 * xe_pt_update_ops_abort() - Abort PT update operations 2585 * @tile: Tile of PT update operations 2586 * @vops: VMA operationa 2587 * 2588 * Abort PT update operations by unwinding internal PT state 2589 */ 2590 void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops) 2591 { 2592 struct xe_vm_pgtable_update_ops *pt_update_ops = 2593 &vops->pt_update_ops[tile->id]; 2594 int i; 2595 2596 lockdep_assert_held(&vops->vm->lock); 2597 xe_vm_assert_held(vops->vm); 2598 2599 for (i = pt_update_ops->num_ops - 1; i >= 0; --i) { 2600 struct xe_vm_pgtable_update_op *pt_op = 2601 &pt_update_ops->ops[i]; 2602 2603 if (!pt_op->vma || i >= pt_update_ops->current_op) 2604 continue; 2605 2606 if (pt_op->bind) 2607 xe_pt_abort_bind(pt_op->vma, pt_op->entries, 2608 pt_op->num_entries, 2609 pt_op->rebind); 2610 else 2611 xe_pt_abort_unbind(pt_op->vma, pt_op->entries, 2612 pt_op->num_entries); 2613 } 2614 2615 xe_pt_update_ops_fini(tile, vops); 2616 } 2617