1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2022 Intel Corporation
4 */
5
6 #include <linux/dma-fence-array.h>
7
8 #include "xe_pt.h"
9
10 #include "regs/xe_gtt_defs.h"
11 #include "xe_bo.h"
12 #include "xe_device.h"
13 #include "xe_drm_client.h"
14 #include "xe_exec_queue.h"
15 #include "xe_gt.h"
16 #include "xe_gt_tlb_invalidation.h"
17 #include "xe_migrate.h"
18 #include "xe_pt_types.h"
19 #include "xe_pt_walk.h"
20 #include "xe_res_cursor.h"
21 #include "xe_sched_job.h"
22 #include "xe_sync.h"
23 #include "xe_svm.h"
24 #include "xe_trace.h"
25 #include "xe_ttm_stolen_mgr.h"
26 #include "xe_vm.h"
27
28 struct xe_pt_dir {
29 struct xe_pt pt;
30 /** @children: Array of page-table child nodes */
31 struct xe_ptw *children[XE_PDES];
32 /** @staging: Array of page-table staging nodes */
33 struct xe_ptw *staging[XE_PDES];
34 };
35
36 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
37 #define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr))
38 #define xe_pt_addr(__xe_pt) ((__xe_pt)->addr)
39 #else
40 #define xe_pt_set_addr(__xe_pt, __addr)
41 #define xe_pt_addr(__xe_pt) 0ull
42 #endif
43
44 static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48};
45 static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48};
46
47 #define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1)
48
as_xe_pt_dir(struct xe_pt * pt)49 static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt)
50 {
51 return container_of(pt, struct xe_pt_dir, pt);
52 }
53
54 static struct xe_pt *
xe_pt_entry_staging(struct xe_pt_dir * pt_dir,unsigned int index)55 xe_pt_entry_staging(struct xe_pt_dir *pt_dir, unsigned int index)
56 {
57 return container_of(pt_dir->staging[index], struct xe_pt, base);
58 }
59
__xe_pt_empty_pte(struct xe_tile * tile,struct xe_vm * vm,unsigned int level)60 static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
61 unsigned int level)
62 {
63 struct xe_device *xe = tile_to_xe(tile);
64 u16 pat_index = xe->pat.idx[XE_CACHE_WB];
65 u8 id = tile->id;
66
67 if (!xe_vm_has_scratch(vm))
68 return 0;
69
70 if (level > MAX_HUGEPTE_LEVEL)
71 return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo,
72 0, pat_index);
73
74 return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) |
75 XE_PTE_NULL;
76 }
77
xe_pt_free(struct xe_pt * pt)78 static void xe_pt_free(struct xe_pt *pt)
79 {
80 if (pt->level)
81 kfree(as_xe_pt_dir(pt));
82 else
83 kfree(pt);
84 }
85
86 /**
87 * xe_pt_create() - Create a page-table.
88 * @vm: The vm to create for.
89 * @tile: The tile to create for.
90 * @level: The page-table level.
91 *
92 * Allocate and initialize a single struct xe_pt metadata structure. Also
93 * create the corresponding page-table bo, but don't initialize it. If the
94 * level is grater than zero, then it's assumed to be a directory page-
95 * table and the directory structure is also allocated and initialized to
96 * NULL pointers.
97 *
98 * Return: A valid struct xe_pt pointer on success, Pointer error code on
99 * error.
100 */
xe_pt_create(struct xe_vm * vm,struct xe_tile * tile,unsigned int level)101 struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
102 unsigned int level)
103 {
104 struct xe_pt *pt;
105 struct xe_bo *bo;
106 int err;
107
108 if (level) {
109 struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL);
110
111 pt = (dir) ? &dir->pt : NULL;
112 } else {
113 pt = kzalloc(sizeof(*pt), GFP_KERNEL);
114 }
115 if (!pt)
116 return ERR_PTR(-ENOMEM);
117
118 pt->level = level;
119 bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
120 ttm_bo_type_kernel,
121 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
122 XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE |
123 XE_BO_FLAG_PINNED |
124 XE_BO_FLAG_NO_RESV_EVICT |
125 XE_BO_FLAG_PAGETABLE);
126 if (IS_ERR(bo)) {
127 err = PTR_ERR(bo);
128 goto err_kfree;
129 }
130 pt->bo = bo;
131 pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL;
132 pt->base.staging = level ? as_xe_pt_dir(pt)->staging : NULL;
133
134 if (vm->xef)
135 xe_drm_client_add_bo(vm->xef->client, pt->bo);
136 xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL);
137
138 return pt;
139
140 err_kfree:
141 xe_pt_free(pt);
142 return ERR_PTR(err);
143 }
144 ALLOW_ERROR_INJECTION(xe_pt_create, ERRNO);
145
146 /**
147 * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero
148 * entries.
149 * @tile: The tile the scratch pagetable of which to use.
150 * @vm: The vm we populate for.
151 * @pt: The pagetable the bo of which to initialize.
152 *
153 * Populate the page-table bo of @pt with entries pointing into the tile's
154 * scratch page-table tree if any. Otherwise populate with zeros.
155 */
xe_pt_populate_empty(struct xe_tile * tile,struct xe_vm * vm,struct xe_pt * pt)156 void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm,
157 struct xe_pt *pt)
158 {
159 struct iosys_map *map = &pt->bo->vmap;
160 u64 empty;
161 int i;
162
163 if (!xe_vm_has_scratch(vm)) {
164 /*
165 * FIXME: Some memory is allocated already allocated to zero?
166 * Find out which memory that is and avoid this memset...
167 */
168 xe_map_memset(vm->xe, map, 0, 0, SZ_4K);
169 } else {
170 empty = __xe_pt_empty_pte(tile, vm, pt->level);
171 for (i = 0; i < XE_PDES; i++)
172 xe_pt_write(vm->xe, map, i, empty);
173 }
174 }
175
176 /**
177 * xe_pt_shift() - Return the ilog2 value of the size of the address range of
178 * a page-table at a certain level.
179 * @level: The level.
180 *
181 * Return: The ilog2 value of the size of the address range of a page-table
182 * at level @level.
183 */
xe_pt_shift(unsigned int level)184 unsigned int xe_pt_shift(unsigned int level)
185 {
186 return XE_PTE_SHIFT + XE_PDE_SHIFT * level;
187 }
188
189 /**
190 * xe_pt_destroy() - Destroy a page-table tree.
191 * @pt: The root of the page-table tree to destroy.
192 * @flags: vm flags. Currently unused.
193 * @deferred: List head of lockless list for deferred putting. NULL for
194 * immediate putting.
195 *
196 * Puts the page-table bo, recursively calls xe_pt_destroy on all children
197 * and finally frees @pt. TODO: Can we remove the @flags argument?
198 */
xe_pt_destroy(struct xe_pt * pt,u32 flags,struct llist_head * deferred)199 void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred)
200 {
201 int i;
202
203 if (!pt)
204 return;
205
206 XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list));
207 xe_bo_unpin(pt->bo);
208 xe_bo_put_deferred(pt->bo, deferred);
209
210 if (pt->level > 0 && pt->num_live) {
211 struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
212
213 for (i = 0; i < XE_PDES; i++) {
214 if (xe_pt_entry_staging(pt_dir, i))
215 xe_pt_destroy(xe_pt_entry_staging(pt_dir, i), flags,
216 deferred);
217 }
218 }
219 xe_pt_free(pt);
220 }
221
222 /**
223 * xe_pt_clear() - Clear a page-table.
224 * @xe: xe device.
225 * @pt: The page-table.
226 *
227 * Clears page-table by setting to zero.
228 */
xe_pt_clear(struct xe_device * xe,struct xe_pt * pt)229 void xe_pt_clear(struct xe_device *xe, struct xe_pt *pt)
230 {
231 struct iosys_map *map = &pt->bo->vmap;
232
233 xe_map_memset(xe, map, 0, 0, SZ_4K);
234 }
235
236 /**
237 * DOC: Pagetable building
238 *
239 * Below we use the term "page-table" for both page-directories, containing
240 * pointers to lower level page-directories or page-tables, and level 0
241 * page-tables that contain only page-table-entries pointing to memory pages.
242 *
243 * When inserting an address range in an already existing page-table tree
244 * there will typically be a set of page-tables that are shared with other
245 * address ranges, and a set that are private to this address range.
246 * The set of shared page-tables can be at most two per level,
247 * and those can't be updated immediately because the entries of those
248 * page-tables may still be in use by the gpu for other mappings. Therefore
249 * when inserting entries into those, we instead stage those insertions by
250 * adding insertion data into struct xe_vm_pgtable_update structures. This
251 * data, (subtrees for the cpu and page-table-entries for the gpu) is then
252 * added in a separate commit step. CPU-data is committed while still under the
253 * vm lock, the object lock and for userptr, the notifier lock in read mode.
254 * The GPU async data is committed either by the GPU or CPU after fulfilling
255 * relevant dependencies.
256 * For non-shared page-tables (and, in fact, for shared ones that aren't
257 * existing at the time of staging), we add the data in-place without the
258 * special update structures. This private part of the page-table tree will
259 * remain disconnected from the vm page-table tree until data is committed to
260 * the shared page tables of the vm tree in the commit phase.
261 */
262
263 struct xe_pt_update {
264 /** @update: The update structure we're building for this parent. */
265 struct xe_vm_pgtable_update *update;
266 /** @parent: The parent. Used to detect a parent change. */
267 struct xe_pt *parent;
268 /** @preexisting: Whether the parent was pre-existing or allocated */
269 bool preexisting;
270 };
271
272 struct xe_pt_stage_bind_walk {
273 /** base: The base class. */
274 struct xe_pt_walk base;
275
276 /* Input parameters for the walk */
277 /** @vm: The vm we're building for. */
278 struct xe_vm *vm;
279 /** @tile: The tile we're building for. */
280 struct xe_tile *tile;
281 /** @default_pte: PTE flag only template. No address is associated */
282 u64 default_pte;
283 /** @dma_offset: DMA offset to add to the PTE. */
284 u64 dma_offset;
285 /**
286 * @needs_64k: This address range enforces 64K alignment and
287 * granularity.
288 */
289 bool needs_64K;
290 /**
291 * @vma: VMA being mapped
292 */
293 struct xe_vma *vma;
294
295 /* Also input, but is updated during the walk*/
296 /** @curs: The DMA address cursor. */
297 struct xe_res_cursor *curs;
298 /** @va_curs_start: The Virtual address corresponding to @curs->start */
299 u64 va_curs_start;
300
301 /* Output */
302 struct xe_walk_update {
303 /** @wupd.entries: Caller provided storage. */
304 struct xe_vm_pgtable_update *entries;
305 /** @wupd.num_used_entries: Number of update @entries used. */
306 unsigned int num_used_entries;
307 /** @wupd.updates: Tracks the update entry at a given level */
308 struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1];
309 } wupd;
310
311 /* Walk state */
312 /**
313 * @l0_end_addr: The end address of the current l0 leaf. Used for
314 * 64K granularity detection.
315 */
316 u64 l0_end_addr;
317 /** @addr_64K: The start address of the current 64K chunk. */
318 u64 addr_64K;
319 /** @found_64: Whether @add_64K actually points to a 64K chunk. */
320 bool found_64K;
321 };
322
323 static int
xe_pt_new_shared(struct xe_walk_update * wupd,struct xe_pt * parent,pgoff_t offset,bool alloc_entries)324 xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent,
325 pgoff_t offset, bool alloc_entries)
326 {
327 struct xe_pt_update *upd = &wupd->updates[parent->level];
328 struct xe_vm_pgtable_update *entry;
329
330 /*
331 * For *each level*, we could only have one active
332 * struct xt_pt_update at any one time. Once we move on to a
333 * new parent and page-directory, the old one is complete, and
334 * updates are either already stored in the build tree or in
335 * @wupd->entries
336 */
337 if (likely(upd->parent == parent))
338 return 0;
339
340 upd->parent = parent;
341 upd->preexisting = true;
342
343 if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1)
344 return -EINVAL;
345
346 entry = wupd->entries + wupd->num_used_entries++;
347 upd->update = entry;
348 entry->ofs = offset;
349 entry->pt_bo = parent->bo;
350 entry->pt = parent;
351 entry->flags = 0;
352 entry->qwords = 0;
353 entry->pt_bo->update_index = -1;
354
355 if (alloc_entries) {
356 entry->pt_entries = kmalloc_array(XE_PDES,
357 sizeof(*entry->pt_entries),
358 GFP_KERNEL);
359 if (!entry->pt_entries)
360 return -ENOMEM;
361 }
362
363 return 0;
364 }
365
366 /*
367 * NOTE: This is a very frequently called function so we allow ourselves
368 * to annotate (using branch prediction hints) the fastpath of updating a
369 * non-pre-existing pagetable with leaf ptes.
370 */
371 static int
xe_pt_insert_entry(struct xe_pt_stage_bind_walk * xe_walk,struct xe_pt * parent,pgoff_t offset,struct xe_pt * xe_child,u64 pte)372 xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent,
373 pgoff_t offset, struct xe_pt *xe_child, u64 pte)
374 {
375 struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level];
376 struct xe_pt_update *child_upd = xe_child ?
377 &xe_walk->wupd.updates[xe_child->level] : NULL;
378 int ret;
379
380 ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true);
381 if (unlikely(ret))
382 return ret;
383
384 /*
385 * Register this new pagetable so that it won't be recognized as
386 * a shared pagetable by a subsequent insertion.
387 */
388 if (unlikely(child_upd)) {
389 child_upd->update = NULL;
390 child_upd->parent = xe_child;
391 child_upd->preexisting = false;
392 }
393
394 if (likely(!upd->preexisting)) {
395 /* Continue building a non-connected subtree. */
396 struct iosys_map *map = &parent->bo->vmap;
397
398 if (unlikely(xe_child)) {
399 parent->base.children[offset] = &xe_child->base;
400 parent->base.staging[offset] = &xe_child->base;
401 }
402
403 xe_pt_write(xe_walk->vm->xe, map, offset, pte);
404 parent->num_live++;
405 } else {
406 /* Shared pt. Stage update. */
407 unsigned int idx;
408 struct xe_vm_pgtable_update *entry = upd->update;
409
410 idx = offset - entry->ofs;
411 entry->pt_entries[idx].pt = xe_child;
412 entry->pt_entries[idx].pte = pte;
413 entry->qwords++;
414 }
415
416 return 0;
417 }
418
xe_pt_hugepte_possible(u64 addr,u64 next,unsigned int level,struct xe_pt_stage_bind_walk * xe_walk)419 static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level,
420 struct xe_pt_stage_bind_walk *xe_walk)
421 {
422 u64 size, dma;
423
424 if (level > MAX_HUGEPTE_LEVEL)
425 return false;
426
427 /* Does the virtual range requested cover a huge pte? */
428 if (!xe_pt_covers(addr, next, level, &xe_walk->base))
429 return false;
430
431 /* Does the DMA segment cover the whole pte? */
432 if (next - xe_walk->va_curs_start > xe_walk->curs->size)
433 return false;
434
435 /* null VMA's do not have dma addresses */
436 if (xe_vma_is_null(xe_walk->vma))
437 return true;
438
439 /* Is the DMA address huge PTE size aligned? */
440 size = next - addr;
441 dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs);
442
443 return IS_ALIGNED(dma, size);
444 }
445
446 /*
447 * Scan the requested mapping to check whether it can be done entirely
448 * with 64K PTEs.
449 */
450 static bool
xe_pt_scan_64K(u64 addr,u64 next,struct xe_pt_stage_bind_walk * xe_walk)451 xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk)
452 {
453 struct xe_res_cursor curs = *xe_walk->curs;
454
455 if (!IS_ALIGNED(addr, SZ_64K))
456 return false;
457
458 if (next > xe_walk->l0_end_addr)
459 return false;
460
461 /* null VMA's do not have dma addresses */
462 if (xe_vma_is_null(xe_walk->vma))
463 return true;
464
465 xe_res_next(&curs, addr - xe_walk->va_curs_start);
466 for (; addr < next; addr += SZ_64K) {
467 if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K)
468 return false;
469
470 xe_res_next(&curs, SZ_64K);
471 }
472
473 return addr == next;
474 }
475
476 /*
477 * For non-compact "normal" 4K level-0 pagetables, we want to try to group
478 * addresses together in 64K-contigous regions to add a 64K TLB hint for the
479 * device to the PTE.
480 * This function determines whether the address is part of such a
481 * segment. For VRAM in normal pagetables, this is strictly necessary on
482 * some devices.
483 */
484 static bool
xe_pt_is_pte_ps64K(u64 addr,u64 next,struct xe_pt_stage_bind_walk * xe_walk)485 xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk)
486 {
487 /* Address is within an already found 64k region */
488 if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K)
489 return true;
490
491 xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk);
492 xe_walk->addr_64K = addr;
493
494 return xe_walk->found_64K;
495 }
496
497 static int
xe_pt_stage_bind_entry(struct xe_ptw * parent,pgoff_t offset,unsigned int level,u64 addr,u64 next,struct xe_ptw ** child,enum page_walk_action * action,struct xe_pt_walk * walk)498 xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
499 unsigned int level, u64 addr, u64 next,
500 struct xe_ptw **child,
501 enum page_walk_action *action,
502 struct xe_pt_walk *walk)
503 {
504 struct xe_pt_stage_bind_walk *xe_walk =
505 container_of(walk, typeof(*xe_walk), base);
506 u16 pat_index = xe_walk->vma->pat_index;
507 struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base);
508 struct xe_vm *vm = xe_walk->vm;
509 struct xe_pt *xe_child;
510 bool covers;
511 int ret = 0;
512 u64 pte;
513
514 /* Is this a leaf entry ?*/
515 if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) {
516 struct xe_res_cursor *curs = xe_walk->curs;
517 bool is_null = xe_vma_is_null(xe_walk->vma);
518
519 XE_WARN_ON(xe_walk->va_curs_start != addr);
520
521 pte = vm->pt_ops->pte_encode_vma(is_null ? 0 :
522 xe_res_dma(curs) + xe_walk->dma_offset,
523 xe_walk->vma, pat_index, level);
524 pte |= xe_walk->default_pte;
525
526 /*
527 * Set the XE_PTE_PS64 hint if possible, otherwise if
528 * this device *requires* 64K PTE size for VRAM, fail.
529 */
530 if (level == 0 && !xe_parent->is_compact) {
531 if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) {
532 xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K;
533 pte |= XE_PTE_PS64;
534 } else if (XE_WARN_ON(xe_walk->needs_64K)) {
535 return -EINVAL;
536 }
537 }
538
539 ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte);
540 if (unlikely(ret))
541 return ret;
542
543 if (!is_null)
544 xe_res_next(curs, next - addr);
545 xe_walk->va_curs_start = next;
546 xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level);
547 *action = ACTION_CONTINUE;
548
549 return ret;
550 }
551
552 /*
553 * Descending to lower level. Determine if we need to allocate a
554 * new page table or -directory, which we do if there is no
555 * previous one or there is one we can completely replace.
556 */
557 if (level == 1) {
558 walk->shifts = xe_normal_pt_shifts;
559 xe_walk->l0_end_addr = next;
560 }
561
562 covers = xe_pt_covers(addr, next, level, &xe_walk->base);
563 if (covers || !*child) {
564 u64 flags = 0;
565
566 xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1);
567 if (IS_ERR(xe_child))
568 return PTR_ERR(xe_child);
569
570 xe_pt_set_addr(xe_child,
571 round_down(addr, 1ull << walk->shifts[level]));
572
573 if (!covers)
574 xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child);
575
576 *child = &xe_child->base;
577
578 /*
579 * Prefer the compact pagetable layout for L0 if possible. Only
580 * possible if VMA covers entire 2MB region as compact 64k and
581 * 4k pages cannot be mixed within a 2MB region.
582 * TODO: Suballocate the pt bo to avoid wasting a lot of
583 * memory.
584 */
585 if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 &&
586 covers && xe_pt_scan_64K(addr, next, xe_walk)) {
587 walk->shifts = xe_compact_pt_shifts;
588 xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT;
589 flags |= XE_PDE_64K;
590 xe_child->is_compact = true;
591 }
592
593 pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags;
594 ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child,
595 pte);
596 }
597
598 *action = ACTION_SUBTREE;
599 return ret;
600 }
601
602 static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
603 .pt_entry = xe_pt_stage_bind_entry,
604 };
605
606 /**
607 * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address
608 * range.
609 * @tile: The tile we're building for.
610 * @vma: The vma indicating the address range.
611 * @range: The range indicating the address range.
612 * @entries: Storage for the update entries used for connecting the tree to
613 * the main tree at commit time.
614 * @num_entries: On output contains the number of @entries used.
615 *
616 * This function builds a disconnected page-table tree for a given address
617 * range. The tree is connected to the main vm tree for the gpu using
618 * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind().
619 * The function builds xe_vm_pgtable_update structures for already existing
620 * shared page-tables, and non-existing shared and non-shared page-tables
621 * are built and populated directly.
622 *
623 * Return 0 on success, negative error code on error.
624 */
625 static int
xe_pt_stage_bind(struct xe_tile * tile,struct xe_vma * vma,struct xe_svm_range * range,struct xe_vm_pgtable_update * entries,u32 * num_entries)626 xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
627 struct xe_svm_range *range,
628 struct xe_vm_pgtable_update *entries, u32 *num_entries)
629 {
630 struct xe_device *xe = tile_to_xe(tile);
631 struct xe_bo *bo = xe_vma_bo(vma);
632 bool is_devmem = !xe_vma_is_userptr(vma) && bo &&
633 (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo));
634 struct xe_res_cursor curs;
635 struct xe_pt_stage_bind_walk xe_walk = {
636 .base = {
637 .ops = &xe_pt_stage_bind_ops,
638 .shifts = xe_normal_pt_shifts,
639 .max_level = XE_PT_HIGHEST_LEVEL,
640 .staging = true,
641 },
642 .vm = xe_vma_vm(vma),
643 .tile = tile,
644 .curs = &curs,
645 .va_curs_start = range ? range->base.itree.start :
646 xe_vma_start(vma),
647 .vma = vma,
648 .wupd.entries = entries,
649 };
650 struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
651 int ret;
652
653 if (range) {
654 /* Move this entire thing to xe_svm.c? */
655 xe_svm_notifier_lock(xe_vma_vm(vma));
656 if (!xe_svm_range_pages_valid(range)) {
657 xe_svm_range_debug(range, "BIND PREPARE - RETRY");
658 xe_svm_notifier_unlock(xe_vma_vm(vma));
659 return -EAGAIN;
660 }
661 if (xe_svm_range_has_dma_mapping(range)) {
662 xe_res_first_dma(range->base.dma_addr, 0,
663 range->base.itree.last + 1 - range->base.itree.start,
664 &curs);
665 is_devmem = xe_res_is_vram(&curs);
666 if (is_devmem)
667 xe_svm_range_debug(range, "BIND PREPARE - DMA VRAM");
668 else
669 xe_svm_range_debug(range, "BIND PREPARE - DMA");
670 } else {
671 xe_assert(xe, false);
672 }
673 /*
674 * Note, when unlocking the resource cursor dma addresses may become
675 * stale, but the bind will be aborted anyway at commit time.
676 */
677 xe_svm_notifier_unlock(xe_vma_vm(vma));
678 }
679
680 xe_walk.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem;
681
682 /**
683 * Default atomic expectations for different allocation scenarios are as follows:
684 *
685 * 1. Traditional API: When the VM is not in LR mode:
686 * - Device atomics are expected to function with all allocations.
687 *
688 * 2. Compute/SVM API: When the VM is in LR mode:
689 * - Device atomics are the default behavior when the bo is placed in a single region.
690 * - In all other cases device atomics will be disabled with AE=0 until an application
691 * request differently using a ioctl like madvise.
692 */
693 if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) {
694 if (xe_vm_in_lr_mode(xe_vma_vm(vma))) {
695 if (bo && xe_bo_has_single_placement(bo))
696 xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
697 /**
698 * If a SMEM+LMEM allocation is backed by SMEM, a device
699 * atomics will cause a gpu page fault and which then
700 * gets migrated to LMEM, bind such allocations with
701 * device atomics enabled.
702 */
703 else if (is_devmem)
704 xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
705 } else {
706 xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
707 }
708
709 /**
710 * Unset AE if the platform(PVC) doesn't support it on an
711 * allocation
712 */
713 if (!xe->info.has_device_atomics_on_smem && !is_devmem)
714 xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE;
715 }
716
717 if (is_devmem) {
718 xe_walk.default_pte |= XE_PPGTT_PTE_DM;
719 xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0;
720 }
721
722 if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo))
723 xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo));
724
725 if (!range)
726 xe_bo_assert_held(bo);
727
728 if (!xe_vma_is_null(vma) && !range) {
729 if (xe_vma_is_userptr(vma))
730 xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0,
731 xe_vma_size(vma), &curs);
732 else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo))
733 xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma),
734 xe_vma_size(vma), &curs);
735 else
736 xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma),
737 xe_vma_size(vma), &curs);
738 } else if (!range) {
739 curs.size = xe_vma_size(vma);
740 }
741
742 ret = xe_pt_walk_range(&pt->base, pt->level,
743 range ? range->base.itree.start : xe_vma_start(vma),
744 range ? range->base.itree.last + 1 : xe_vma_end(vma),
745 &xe_walk.base);
746
747 *num_entries = xe_walk.wupd.num_used_entries;
748 return ret;
749 }
750
751 /**
752 * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a
753 * shared pagetable.
754 * @addr: The start address within the non-shared pagetable.
755 * @end: The end address within the non-shared pagetable.
756 * @level: The level of the non-shared pagetable.
757 * @walk: Walk info. The function adjusts the walk action.
758 * @action: next action to perform (see enum page_walk_action)
759 * @offset: Ignored on input, First non-shared entry on output.
760 * @end_offset: Ignored on input, Last non-shared entry + 1 on output.
761 *
762 * A non-shared page-table has some entries that belong to the address range
763 * and others that don't. This function determines the entries that belong
764 * fully to the address range. Depending on level, some entries may
765 * partially belong to the address range (that can't happen at level 0).
766 * The function detects that and adjust those offsets to not include those
767 * partial entries. Iff it does detect partial entries, we know that there must
768 * be shared page tables also at lower levels, so it adjusts the walk action
769 * accordingly.
770 *
771 * Return: true if there were non-shared entries, false otherwise.
772 */
xe_pt_nonshared_offsets(u64 addr,u64 end,unsigned int level,struct xe_pt_walk * walk,enum page_walk_action * action,pgoff_t * offset,pgoff_t * end_offset)773 static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level,
774 struct xe_pt_walk *walk,
775 enum page_walk_action *action,
776 pgoff_t *offset, pgoff_t *end_offset)
777 {
778 u64 size = 1ull << walk->shifts[level];
779
780 *offset = xe_pt_offset(addr, level, walk);
781 *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset;
782
783 if (!level)
784 return true;
785
786 /*
787 * If addr or next are not size aligned, there are shared pts at lower
788 * level, so in that case traverse down the subtree
789 */
790 *action = ACTION_CONTINUE;
791 if (!IS_ALIGNED(addr, size)) {
792 *action = ACTION_SUBTREE;
793 (*offset)++;
794 }
795
796 if (!IS_ALIGNED(end, size)) {
797 *action = ACTION_SUBTREE;
798 (*end_offset)--;
799 }
800
801 return *end_offset > *offset;
802 }
803
804 struct xe_pt_zap_ptes_walk {
805 /** @base: The walk base-class */
806 struct xe_pt_walk base;
807
808 /* Input parameters for the walk */
809 /** @tile: The tile we're building for */
810 struct xe_tile *tile;
811
812 /* Output */
813 /** @needs_invalidate: Whether we need to invalidate TLB*/
814 bool needs_invalidate;
815 };
816
xe_pt_zap_ptes_entry(struct xe_ptw * parent,pgoff_t offset,unsigned int level,u64 addr,u64 next,struct xe_ptw ** child,enum page_walk_action * action,struct xe_pt_walk * walk)817 static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset,
818 unsigned int level, u64 addr, u64 next,
819 struct xe_ptw **child,
820 enum page_walk_action *action,
821 struct xe_pt_walk *walk)
822 {
823 struct xe_pt_zap_ptes_walk *xe_walk =
824 container_of(walk, typeof(*xe_walk), base);
825 struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
826 pgoff_t end_offset;
827
828 XE_WARN_ON(!*child);
829 XE_WARN_ON(!level);
830
831 /*
832 * Note that we're called from an entry callback, and we're dealing
833 * with the child of that entry rather than the parent, so need to
834 * adjust level down.
835 */
836 if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset,
837 &end_offset)) {
838 xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap,
839 offset * sizeof(u64), 0,
840 (end_offset - offset) * sizeof(u64));
841 xe_walk->needs_invalidate = true;
842 }
843
844 return 0;
845 }
846
847 static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = {
848 .pt_entry = xe_pt_zap_ptes_entry,
849 };
850
851 /**
852 * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range
853 * @tile: The tile we're zapping for.
854 * @vma: GPU VMA detailing address range.
855 *
856 * Eviction and Userptr invalidation needs to be able to zap the
857 * gpu ptes of a given address range in pagefaulting mode.
858 * In order to be able to do that, that function needs access to the shared
859 * page-table entrieaso it can either clear the leaf PTEs or
860 * clear the pointers to lower-level page-tables. The caller is required
861 * to hold the necessary locks to ensure neither the page-table connectivity
862 * nor the page-table entries of the range is updated from under us.
863 *
864 * Return: Whether ptes were actually updated and a TLB invalidation is
865 * required.
866 */
xe_pt_zap_ptes(struct xe_tile * tile,struct xe_vma * vma)867 bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma)
868 {
869 struct xe_pt_zap_ptes_walk xe_walk = {
870 .base = {
871 .ops = &xe_pt_zap_ptes_ops,
872 .shifts = xe_normal_pt_shifts,
873 .max_level = XE_PT_HIGHEST_LEVEL,
874 },
875 .tile = tile,
876 };
877 struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
878 u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated);
879
880 if (!(pt_mask & BIT(tile->id)))
881 return false;
882
883 (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma),
884 xe_vma_end(vma), &xe_walk.base);
885
886 return xe_walk.needs_invalidate;
887 }
888
889 /**
890 * xe_pt_zap_ptes_range() - Zap (zero) gpu ptes of a SVM range
891 * @tile: The tile we're zapping for.
892 * @vm: The VM we're zapping for.
893 * @range: The SVM range we're zapping for.
894 *
895 * SVM invalidation needs to be able to zap the gpu ptes of a given address
896 * range. In order to be able to do that, that function needs access to the
897 * shared page-table entries so it can either clear the leaf PTEs or
898 * clear the pointers to lower-level page-tables. The caller is required
899 * to hold the SVM notifier lock.
900 *
901 * Return: Whether ptes were actually updated and a TLB invalidation is
902 * required.
903 */
xe_pt_zap_ptes_range(struct xe_tile * tile,struct xe_vm * vm,struct xe_svm_range * range)904 bool xe_pt_zap_ptes_range(struct xe_tile *tile, struct xe_vm *vm,
905 struct xe_svm_range *range)
906 {
907 struct xe_pt_zap_ptes_walk xe_walk = {
908 .base = {
909 .ops = &xe_pt_zap_ptes_ops,
910 .shifts = xe_normal_pt_shifts,
911 .max_level = XE_PT_HIGHEST_LEVEL,
912 },
913 .tile = tile,
914 };
915 struct xe_pt *pt = vm->pt_root[tile->id];
916 u8 pt_mask = (range->tile_present & ~range->tile_invalidated);
917
918 xe_svm_assert_in_notifier(vm);
919
920 if (!(pt_mask & BIT(tile->id)))
921 return false;
922
923 (void)xe_pt_walk_shared(&pt->base, pt->level, range->base.itree.start,
924 range->base.itree.last + 1, &xe_walk.base);
925
926 return xe_walk.needs_invalidate;
927 }
928
929 static void
xe_vm_populate_pgtable(struct xe_migrate_pt_update * pt_update,struct xe_tile * tile,struct iosys_map * map,void * data,u32 qword_ofs,u32 num_qwords,const struct xe_vm_pgtable_update * update)930 xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile,
931 struct iosys_map *map, void *data,
932 u32 qword_ofs, u32 num_qwords,
933 const struct xe_vm_pgtable_update *update)
934 {
935 struct xe_pt_entry *ptes = update->pt_entries;
936 u64 *ptr = data;
937 u32 i;
938
939 for (i = 0; i < num_qwords; i++) {
940 if (map)
941 xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) *
942 sizeof(u64), u64, ptes[i].pte);
943 else
944 ptr[i] = ptes[i].pte;
945 }
946 }
947
xe_pt_cancel_bind(struct xe_vma * vma,struct xe_vm_pgtable_update * entries,u32 num_entries)948 static void xe_pt_cancel_bind(struct xe_vma *vma,
949 struct xe_vm_pgtable_update *entries,
950 u32 num_entries)
951 {
952 u32 i, j;
953
954 for (i = 0; i < num_entries; i++) {
955 struct xe_pt *pt = entries[i].pt;
956
957 if (!pt)
958 continue;
959
960 if (pt->level) {
961 for (j = 0; j < entries[i].qwords; j++)
962 xe_pt_destroy(entries[i].pt_entries[j].pt,
963 xe_vma_vm(vma)->flags, NULL);
964 }
965
966 kfree(entries[i].pt_entries);
967 entries[i].pt_entries = NULL;
968 entries[i].qwords = 0;
969 }
970 }
971
972 #define XE_INVALID_VMA ((struct xe_vma *)(0xdeaddeadull))
973
xe_pt_commit_prepare_locks_assert(struct xe_vma * vma)974 static void xe_pt_commit_prepare_locks_assert(struct xe_vma *vma)
975 {
976 struct xe_vm *vm;
977
978 if (vma == XE_INVALID_VMA)
979 return;
980
981 vm = xe_vma_vm(vma);
982 lockdep_assert_held(&vm->lock);
983
984 if (!xe_vma_has_no_bo(vma))
985 dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv);
986
987 xe_vm_assert_held(vm);
988 }
989
xe_pt_commit_locks_assert(struct xe_vma * vma)990 static void xe_pt_commit_locks_assert(struct xe_vma *vma)
991 {
992 struct xe_vm *vm;
993
994 if (vma == XE_INVALID_VMA)
995 return;
996
997 vm = xe_vma_vm(vma);
998 xe_pt_commit_prepare_locks_assert(vma);
999
1000 if (xe_vma_is_userptr(vma))
1001 lockdep_assert_held_read(&vm->userptr.notifier_lock);
1002 }
1003
xe_pt_commit(struct xe_vma * vma,struct xe_vm_pgtable_update * entries,u32 num_entries,struct llist_head * deferred)1004 static void xe_pt_commit(struct xe_vma *vma,
1005 struct xe_vm_pgtable_update *entries,
1006 u32 num_entries, struct llist_head *deferred)
1007 {
1008 u32 i, j;
1009
1010 xe_pt_commit_locks_assert(vma);
1011
1012 for (i = 0; i < num_entries; i++) {
1013 struct xe_pt *pt = entries[i].pt;
1014 struct xe_pt_dir *pt_dir;
1015
1016 if (!pt->level)
1017 continue;
1018
1019 pt_dir = as_xe_pt_dir(pt);
1020 for (j = 0; j < entries[i].qwords; j++) {
1021 struct xe_pt *oldpte = entries[i].pt_entries[j].pt;
1022 int j_ = j + entries[i].ofs;
1023
1024 pt_dir->children[j_] = pt_dir->staging[j_];
1025 xe_pt_destroy(oldpte, (vma == XE_INVALID_VMA) ? 0 :
1026 xe_vma_vm(vma)->flags, deferred);
1027 }
1028 }
1029 }
1030
xe_pt_abort_bind(struct xe_vma * vma,struct xe_vm_pgtable_update * entries,u32 num_entries,bool rebind)1031 static void xe_pt_abort_bind(struct xe_vma *vma,
1032 struct xe_vm_pgtable_update *entries,
1033 u32 num_entries, bool rebind)
1034 {
1035 int i, j;
1036
1037 xe_pt_commit_prepare_locks_assert(vma);
1038
1039 for (i = num_entries - 1; i >= 0; --i) {
1040 struct xe_pt *pt = entries[i].pt;
1041 struct xe_pt_dir *pt_dir;
1042
1043 if (!rebind)
1044 pt->num_live -= entries[i].qwords;
1045
1046 if (!pt->level)
1047 continue;
1048
1049 pt_dir = as_xe_pt_dir(pt);
1050 for (j = 0; j < entries[i].qwords; j++) {
1051 u32 j_ = j + entries[i].ofs;
1052 struct xe_pt *newpte = xe_pt_entry_staging(pt_dir, j_);
1053 struct xe_pt *oldpte = entries[i].pt_entries[j].pt;
1054
1055 pt_dir->staging[j_] = oldpte ? &oldpte->base : 0;
1056 xe_pt_destroy(newpte, xe_vma_vm(vma)->flags, NULL);
1057 }
1058 }
1059 }
1060
xe_pt_commit_prepare_bind(struct xe_vma * vma,struct xe_vm_pgtable_update * entries,u32 num_entries,bool rebind)1061 static void xe_pt_commit_prepare_bind(struct xe_vma *vma,
1062 struct xe_vm_pgtable_update *entries,
1063 u32 num_entries, bool rebind)
1064 {
1065 u32 i, j;
1066
1067 xe_pt_commit_prepare_locks_assert(vma);
1068
1069 for (i = 0; i < num_entries; i++) {
1070 struct xe_pt *pt = entries[i].pt;
1071 struct xe_pt_dir *pt_dir;
1072
1073 if (!rebind)
1074 pt->num_live += entries[i].qwords;
1075
1076 if (!pt->level)
1077 continue;
1078
1079 pt_dir = as_xe_pt_dir(pt);
1080 for (j = 0; j < entries[i].qwords; j++) {
1081 u32 j_ = j + entries[i].ofs;
1082 struct xe_pt *newpte = entries[i].pt_entries[j].pt;
1083 struct xe_pt *oldpte = NULL;
1084
1085 if (xe_pt_entry_staging(pt_dir, j_))
1086 oldpte = xe_pt_entry_staging(pt_dir, j_);
1087
1088 pt_dir->staging[j_] = &newpte->base;
1089 entries[i].pt_entries[j].pt = oldpte;
1090 }
1091 }
1092 }
1093
xe_pt_free_bind(struct xe_vm_pgtable_update * entries,u32 num_entries)1094 static void xe_pt_free_bind(struct xe_vm_pgtable_update *entries,
1095 u32 num_entries)
1096 {
1097 u32 i;
1098
1099 for (i = 0; i < num_entries; i++)
1100 kfree(entries[i].pt_entries);
1101 }
1102
1103 static int
xe_pt_prepare_bind(struct xe_tile * tile,struct xe_vma * vma,struct xe_svm_range * range,struct xe_vm_pgtable_update * entries,u32 * num_entries)1104 xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma,
1105 struct xe_svm_range *range,
1106 struct xe_vm_pgtable_update *entries, u32 *num_entries)
1107 {
1108 int err;
1109
1110 *num_entries = 0;
1111 err = xe_pt_stage_bind(tile, vma, range, entries, num_entries);
1112 if (!err)
1113 xe_tile_assert(tile, *num_entries);
1114
1115 return err;
1116 }
1117
xe_vm_dbg_print_entries(struct xe_device * xe,const struct xe_vm_pgtable_update * entries,unsigned int num_entries,bool bind)1118 static void xe_vm_dbg_print_entries(struct xe_device *xe,
1119 const struct xe_vm_pgtable_update *entries,
1120 unsigned int num_entries, bool bind)
1121 #if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM))
1122 {
1123 unsigned int i;
1124
1125 vm_dbg(&xe->drm, "%s: %u entries to update\n", bind ? "bind" : "unbind",
1126 num_entries);
1127 for (i = 0; i < num_entries; i++) {
1128 const struct xe_vm_pgtable_update *entry = &entries[i];
1129 struct xe_pt *xe_pt = entry->pt;
1130 u64 page_size = 1ull << xe_pt_shift(xe_pt->level);
1131 u64 end;
1132 u64 start;
1133
1134 xe_assert(xe, !entry->pt->is_compact);
1135 start = entry->ofs * page_size;
1136 end = start + page_size * entry->qwords;
1137 vm_dbg(&xe->drm,
1138 "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n",
1139 i, xe_pt->level, entry->ofs, entry->qwords,
1140 xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0);
1141 }
1142 }
1143 #else
1144 {}
1145 #endif
1146
no_in_syncs(struct xe_sync_entry * syncs,u32 num_syncs)1147 static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs)
1148 {
1149 int i;
1150
1151 for (i = 0; i < num_syncs; i++) {
1152 struct dma_fence *fence = syncs[i].fence;
1153
1154 if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
1155 &fence->flags))
1156 return false;
1157 }
1158
1159 return true;
1160 }
1161
job_test_add_deps(struct xe_sched_job * job,struct dma_resv * resv,enum dma_resv_usage usage)1162 static int job_test_add_deps(struct xe_sched_job *job,
1163 struct dma_resv *resv,
1164 enum dma_resv_usage usage)
1165 {
1166 if (!job) {
1167 if (!dma_resv_test_signaled(resv, usage))
1168 return -ETIME;
1169
1170 return 0;
1171 }
1172
1173 return xe_sched_job_add_deps(job, resv, usage);
1174 }
1175
vma_add_deps(struct xe_vma * vma,struct xe_sched_job * job)1176 static int vma_add_deps(struct xe_vma *vma, struct xe_sched_job *job)
1177 {
1178 struct xe_bo *bo = xe_vma_bo(vma);
1179
1180 xe_bo_assert_held(bo);
1181
1182 if (bo && !bo->vm)
1183 return job_test_add_deps(job, bo->ttm.base.resv,
1184 DMA_RESV_USAGE_KERNEL);
1185
1186 return 0;
1187 }
1188
op_add_deps(struct xe_vm * vm,struct xe_vma_op * op,struct xe_sched_job * job)1189 static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op,
1190 struct xe_sched_job *job)
1191 {
1192 int err = 0;
1193
1194 /*
1195 * No need to check for is_cpu_addr_mirror here as vma_add_deps is a
1196 * NOP if VMA is_cpu_addr_mirror
1197 */
1198
1199 switch (op->base.op) {
1200 case DRM_GPUVA_OP_MAP:
1201 if (!op->map.immediate && xe_vm_in_fault_mode(vm))
1202 break;
1203
1204 err = vma_add_deps(op->map.vma, job);
1205 break;
1206 case DRM_GPUVA_OP_REMAP:
1207 if (op->remap.prev)
1208 err = vma_add_deps(op->remap.prev, job);
1209 if (!err && op->remap.next)
1210 err = vma_add_deps(op->remap.next, job);
1211 break;
1212 case DRM_GPUVA_OP_UNMAP:
1213 break;
1214 case DRM_GPUVA_OP_PREFETCH:
1215 err = vma_add_deps(gpuva_to_vma(op->base.prefetch.va), job);
1216 break;
1217 case DRM_GPUVA_OP_DRIVER:
1218 break;
1219 default:
1220 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
1221 }
1222
1223 return err;
1224 }
1225
xe_pt_vm_dependencies(struct xe_sched_job * job,struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_vm_pgtable_update_ops * pt_update_ops,struct xe_range_fence_tree * rftree)1226 static int xe_pt_vm_dependencies(struct xe_sched_job *job,
1227 struct xe_vm *vm,
1228 struct xe_vma_ops *vops,
1229 struct xe_vm_pgtable_update_ops *pt_update_ops,
1230 struct xe_range_fence_tree *rftree)
1231 {
1232 struct xe_range_fence *rtfence;
1233 struct dma_fence *fence;
1234 struct xe_vma_op *op;
1235 int err = 0, i;
1236
1237 xe_vm_assert_held(vm);
1238
1239 if (!job && !no_in_syncs(vops->syncs, vops->num_syncs))
1240 return -ETIME;
1241
1242 if (!job && !xe_exec_queue_is_idle(pt_update_ops->q))
1243 return -ETIME;
1244
1245 if (pt_update_ops->wait_vm_bookkeep || pt_update_ops->wait_vm_kernel) {
1246 err = job_test_add_deps(job, xe_vm_resv(vm),
1247 pt_update_ops->wait_vm_bookkeep ?
1248 DMA_RESV_USAGE_BOOKKEEP :
1249 DMA_RESV_USAGE_KERNEL);
1250 if (err)
1251 return err;
1252 }
1253
1254 rtfence = xe_range_fence_tree_first(rftree, pt_update_ops->start,
1255 pt_update_ops->last);
1256 while (rtfence) {
1257 fence = rtfence->fence;
1258
1259 if (!dma_fence_is_signaled(fence)) {
1260 /*
1261 * Is this a CPU update? GPU is busy updating, so return
1262 * an error
1263 */
1264 if (!job)
1265 return -ETIME;
1266
1267 dma_fence_get(fence);
1268 err = drm_sched_job_add_dependency(&job->drm, fence);
1269 if (err)
1270 return err;
1271 }
1272
1273 rtfence = xe_range_fence_tree_next(rtfence,
1274 pt_update_ops->start,
1275 pt_update_ops->last);
1276 }
1277
1278 list_for_each_entry(op, &vops->list, link) {
1279 err = op_add_deps(vm, op, job);
1280 if (err)
1281 return err;
1282 }
1283
1284 if (!(pt_update_ops->q->flags & EXEC_QUEUE_FLAG_KERNEL)) {
1285 if (job)
1286 err = xe_sched_job_last_fence_add_dep(job, vm);
1287 else
1288 err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm);
1289 }
1290
1291 for (i = 0; job && !err && i < vops->num_syncs; i++)
1292 err = xe_sync_entry_add_deps(&vops->syncs[i], job);
1293
1294 return err;
1295 }
1296
xe_pt_pre_commit(struct xe_migrate_pt_update * pt_update)1297 static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update)
1298 {
1299 struct xe_vma_ops *vops = pt_update->vops;
1300 struct xe_vm *vm = vops->vm;
1301 struct xe_range_fence_tree *rftree = &vm->rftree[pt_update->tile_id];
1302 struct xe_vm_pgtable_update_ops *pt_update_ops =
1303 &vops->pt_update_ops[pt_update->tile_id];
1304
1305 return xe_pt_vm_dependencies(pt_update->job, vm, pt_update->vops,
1306 pt_update_ops, rftree);
1307 }
1308
1309 #ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT
1310
xe_pt_userptr_inject_eagain(struct xe_userptr_vma * uvma)1311 static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma)
1312 {
1313 u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2;
1314 static u32 count;
1315
1316 if (count++ % divisor == divisor - 1) {
1317 uvma->userptr.divisor = divisor << 1;
1318 return true;
1319 }
1320
1321 return false;
1322 }
1323
1324 #else
1325
xe_pt_userptr_inject_eagain(struct xe_userptr_vma * uvma)1326 static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma)
1327 {
1328 return false;
1329 }
1330
1331 #endif
1332
vma_check_userptr(struct xe_vm * vm,struct xe_vma * vma,struct xe_vm_pgtable_update_ops * pt_update)1333 static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma,
1334 struct xe_vm_pgtable_update_ops *pt_update)
1335 {
1336 struct xe_userptr_vma *uvma;
1337 unsigned long notifier_seq;
1338
1339 lockdep_assert_held_read(&vm->userptr.notifier_lock);
1340
1341 if (!xe_vma_is_userptr(vma))
1342 return 0;
1343
1344 uvma = to_userptr_vma(vma);
1345 if (xe_pt_userptr_inject_eagain(uvma))
1346 xe_vma_userptr_force_invalidate(uvma);
1347
1348 notifier_seq = uvma->userptr.notifier_seq;
1349
1350 if (!mmu_interval_read_retry(&uvma->userptr.notifier,
1351 notifier_seq))
1352 return 0;
1353
1354 if (xe_vm_in_fault_mode(vm))
1355 return -EAGAIN;
1356
1357 /*
1358 * Just continue the operation since exec or rebind worker
1359 * will take care of rebinding.
1360 */
1361 return 0;
1362 }
1363
op_check_userptr(struct xe_vm * vm,struct xe_vma_op * op,struct xe_vm_pgtable_update_ops * pt_update)1364 static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op,
1365 struct xe_vm_pgtable_update_ops *pt_update)
1366 {
1367 int err = 0;
1368
1369 lockdep_assert_held_read(&vm->userptr.notifier_lock);
1370
1371 switch (op->base.op) {
1372 case DRM_GPUVA_OP_MAP:
1373 if (!op->map.immediate && xe_vm_in_fault_mode(vm))
1374 break;
1375
1376 err = vma_check_userptr(vm, op->map.vma, pt_update);
1377 break;
1378 case DRM_GPUVA_OP_REMAP:
1379 if (op->remap.prev)
1380 err = vma_check_userptr(vm, op->remap.prev, pt_update);
1381 if (!err && op->remap.next)
1382 err = vma_check_userptr(vm, op->remap.next, pt_update);
1383 break;
1384 case DRM_GPUVA_OP_UNMAP:
1385 break;
1386 case DRM_GPUVA_OP_PREFETCH:
1387 err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va),
1388 pt_update);
1389 break;
1390 default:
1391 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
1392 }
1393
1394 return err;
1395 }
1396
xe_pt_userptr_pre_commit(struct xe_migrate_pt_update * pt_update)1397 static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
1398 {
1399 struct xe_vm *vm = pt_update->vops->vm;
1400 struct xe_vma_ops *vops = pt_update->vops;
1401 struct xe_vm_pgtable_update_ops *pt_update_ops =
1402 &vops->pt_update_ops[pt_update->tile_id];
1403 struct xe_vma_op *op;
1404 int err;
1405
1406 err = xe_pt_pre_commit(pt_update);
1407 if (err)
1408 return err;
1409
1410 down_read(&vm->userptr.notifier_lock);
1411
1412 list_for_each_entry(op, &vops->list, link) {
1413 err = op_check_userptr(vm, op, pt_update_ops);
1414 if (err) {
1415 up_read(&vm->userptr.notifier_lock);
1416 break;
1417 }
1418 }
1419
1420 return err;
1421 }
1422
xe_pt_svm_pre_commit(struct xe_migrate_pt_update * pt_update)1423 static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
1424 {
1425 struct xe_vm *vm = pt_update->vops->vm;
1426 struct xe_vma_ops *vops = pt_update->vops;
1427 struct xe_vma_op *op;
1428 int err;
1429
1430 err = xe_pt_pre_commit(pt_update);
1431 if (err)
1432 return err;
1433
1434 xe_svm_notifier_lock(vm);
1435
1436 list_for_each_entry(op, &vops->list, link) {
1437 struct xe_svm_range *range = op->map_range.range;
1438
1439 if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE)
1440 continue;
1441
1442 xe_svm_range_debug(range, "PRE-COMMIT");
1443
1444 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma));
1445 xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE);
1446
1447 if (!xe_svm_range_pages_valid(range)) {
1448 xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
1449 xe_svm_notifier_unlock(vm);
1450 return -EAGAIN;
1451 }
1452 }
1453
1454 return 0;
1455 }
1456
1457 struct invalidation_fence {
1458 struct xe_gt_tlb_invalidation_fence base;
1459 struct xe_gt *gt;
1460 struct dma_fence *fence;
1461 struct dma_fence_cb cb;
1462 struct work_struct work;
1463 u64 start;
1464 u64 end;
1465 u32 asid;
1466 };
1467
invalidation_fence_cb(struct dma_fence * fence,struct dma_fence_cb * cb)1468 static void invalidation_fence_cb(struct dma_fence *fence,
1469 struct dma_fence_cb *cb)
1470 {
1471 struct invalidation_fence *ifence =
1472 container_of(cb, struct invalidation_fence, cb);
1473 struct xe_device *xe = gt_to_xe(ifence->gt);
1474
1475 trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base);
1476 if (!ifence->fence->error) {
1477 queue_work(system_wq, &ifence->work);
1478 } else {
1479 ifence->base.base.error = ifence->fence->error;
1480 xe_gt_tlb_invalidation_fence_signal(&ifence->base);
1481 }
1482 dma_fence_put(ifence->fence);
1483 }
1484
invalidation_fence_work_func(struct work_struct * w)1485 static void invalidation_fence_work_func(struct work_struct *w)
1486 {
1487 struct invalidation_fence *ifence =
1488 container_of(w, struct invalidation_fence, work);
1489 struct xe_device *xe = gt_to_xe(ifence->gt);
1490
1491 trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base);
1492 xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start,
1493 ifence->end, ifence->asid);
1494 }
1495
invalidation_fence_init(struct xe_gt * gt,struct invalidation_fence * ifence,struct dma_fence * fence,u64 start,u64 end,u32 asid)1496 static void invalidation_fence_init(struct xe_gt *gt,
1497 struct invalidation_fence *ifence,
1498 struct dma_fence *fence,
1499 u64 start, u64 end, u32 asid)
1500 {
1501 int ret;
1502
1503 trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base);
1504
1505 xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false);
1506
1507 ifence->fence = fence;
1508 ifence->gt = gt;
1509 ifence->start = start;
1510 ifence->end = end;
1511 ifence->asid = asid;
1512
1513 INIT_WORK(&ifence->work, invalidation_fence_work_func);
1514 ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
1515 if (ret == -ENOENT) {
1516 dma_fence_put(ifence->fence); /* Usually dropped in CB */
1517 invalidation_fence_work_func(&ifence->work);
1518 } else if (ret) {
1519 dma_fence_put(&ifence->base.base); /* Caller ref */
1520 dma_fence_put(&ifence->base.base); /* Creation ref */
1521 }
1522
1523 xe_gt_assert(gt, !ret || ret == -ENOENT);
1524 }
1525
1526 struct xe_pt_stage_unbind_walk {
1527 /** @base: The pagewalk base-class. */
1528 struct xe_pt_walk base;
1529
1530 /* Input parameters for the walk */
1531 /** @tile: The tile we're unbinding from. */
1532 struct xe_tile *tile;
1533
1534 /**
1535 * @modified_start: Walk range start, modified to include any
1536 * shared pagetables that we're the only user of and can thus
1537 * treat as private.
1538 */
1539 u64 modified_start;
1540 /** @modified_end: Walk range start, modified like @modified_start. */
1541 u64 modified_end;
1542
1543 /* Output */
1544 /* @wupd: Structure to track the page-table updates we're building */
1545 struct xe_walk_update wupd;
1546 };
1547
1548 /*
1549 * Check whether this range is the only one populating this pagetable,
1550 * and in that case, update the walk range checks so that higher levels don't
1551 * view us as a shared pagetable.
1552 */
xe_pt_check_kill(u64 addr,u64 next,unsigned int level,const struct xe_pt * child,enum page_walk_action * action,struct xe_pt_walk * walk)1553 static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level,
1554 const struct xe_pt *child,
1555 enum page_walk_action *action,
1556 struct xe_pt_walk *walk)
1557 {
1558 struct xe_pt_stage_unbind_walk *xe_walk =
1559 container_of(walk, typeof(*xe_walk), base);
1560 unsigned int shift = walk->shifts[level];
1561 u64 size = 1ull << shift;
1562
1563 if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) &&
1564 ((next - addr) >> shift) == child->num_live) {
1565 u64 size = 1ull << walk->shifts[level + 1];
1566
1567 *action = ACTION_CONTINUE;
1568
1569 if (xe_walk->modified_start >= addr)
1570 xe_walk->modified_start = round_down(addr, size);
1571 if (xe_walk->modified_end <= next)
1572 xe_walk->modified_end = round_up(next, size);
1573
1574 return true;
1575 }
1576
1577 return false;
1578 }
1579
xe_pt_stage_unbind_entry(struct xe_ptw * parent,pgoff_t offset,unsigned int level,u64 addr,u64 next,struct xe_ptw ** child,enum page_walk_action * action,struct xe_pt_walk * walk)1580 static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
1581 unsigned int level, u64 addr, u64 next,
1582 struct xe_ptw **child,
1583 enum page_walk_action *action,
1584 struct xe_pt_walk *walk)
1585 {
1586 struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
1587
1588 XE_WARN_ON(!*child);
1589 XE_WARN_ON(!level);
1590
1591 xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk);
1592
1593 return 0;
1594 }
1595
1596 static int
xe_pt_stage_unbind_post_descend(struct xe_ptw * parent,pgoff_t offset,unsigned int level,u64 addr,u64 next,struct xe_ptw ** child,enum page_walk_action * action,struct xe_pt_walk * walk)1597 xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset,
1598 unsigned int level, u64 addr, u64 next,
1599 struct xe_ptw **child,
1600 enum page_walk_action *action,
1601 struct xe_pt_walk *walk)
1602 {
1603 struct xe_pt_stage_unbind_walk *xe_walk =
1604 container_of(walk, typeof(*xe_walk), base);
1605 struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
1606 pgoff_t end_offset;
1607 u64 size = 1ull << walk->shifts[--level];
1608 int err;
1609
1610 if (!IS_ALIGNED(addr, size))
1611 addr = xe_walk->modified_start;
1612 if (!IS_ALIGNED(next, size))
1613 next = xe_walk->modified_end;
1614
1615 /* Parent == *child is the root pt. Don't kill it. */
1616 if (parent != *child &&
1617 xe_pt_check_kill(addr, next, level, xe_child, action, walk))
1618 return 0;
1619
1620 if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset,
1621 &end_offset))
1622 return 0;
1623
1624 err = xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, true);
1625 if (err)
1626 return err;
1627
1628 xe_walk->wupd.updates[level].update->qwords = end_offset - offset;
1629
1630 return 0;
1631 }
1632
1633 static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = {
1634 .pt_entry = xe_pt_stage_unbind_entry,
1635 .pt_post_descend = xe_pt_stage_unbind_post_descend,
1636 };
1637
1638 /**
1639 * xe_pt_stage_unbind() - Build page-table update structures for an unbind
1640 * operation
1641 * @tile: The tile we're unbinding for.
1642 * @vm: The vm
1643 * @vma: The vma we're unbinding.
1644 * @range: The range we're unbinding.
1645 * @entries: Caller-provided storage for the update structures.
1646 *
1647 * Builds page-table update structures for an unbind operation. The function
1648 * will attempt to remove all page-tables that we're the only user
1649 * of, and for that to work, the unbind operation must be committed in the
1650 * same critical section that blocks racing binds to the same page-table tree.
1651 *
1652 * Return: The number of entries used.
1653 */
xe_pt_stage_unbind(struct xe_tile * tile,struct xe_vm * vm,struct xe_vma * vma,struct xe_svm_range * range,struct xe_vm_pgtable_update * entries)1654 static unsigned int xe_pt_stage_unbind(struct xe_tile *tile,
1655 struct xe_vm *vm,
1656 struct xe_vma *vma,
1657 struct xe_svm_range *range,
1658 struct xe_vm_pgtable_update *entries)
1659 {
1660 u64 start = range ? range->base.itree.start : xe_vma_start(vma);
1661 u64 end = range ? range->base.itree.last + 1 : xe_vma_end(vma);
1662 struct xe_pt_stage_unbind_walk xe_walk = {
1663 .base = {
1664 .ops = &xe_pt_stage_unbind_ops,
1665 .shifts = xe_normal_pt_shifts,
1666 .max_level = XE_PT_HIGHEST_LEVEL,
1667 .staging = true,
1668 },
1669 .tile = tile,
1670 .modified_start = start,
1671 .modified_end = end,
1672 .wupd.entries = entries,
1673 };
1674 struct xe_pt *pt = vm->pt_root[tile->id];
1675
1676 (void)xe_pt_walk_shared(&pt->base, pt->level, start, end,
1677 &xe_walk.base);
1678
1679 return xe_walk.wupd.num_used_entries;
1680 }
1681
1682 static void
xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update * pt_update,struct xe_tile * tile,struct iosys_map * map,void * ptr,u32 qword_ofs,u32 num_qwords,const struct xe_vm_pgtable_update * update)1683 xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update,
1684 struct xe_tile *tile, struct iosys_map *map,
1685 void *ptr, u32 qword_ofs, u32 num_qwords,
1686 const struct xe_vm_pgtable_update *update)
1687 {
1688 struct xe_vm *vm = pt_update->vops->vm;
1689 u64 empty = __xe_pt_empty_pte(tile, vm, update->pt->level);
1690 int i;
1691
1692 if (map && map->is_iomem)
1693 for (i = 0; i < num_qwords; ++i)
1694 xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) *
1695 sizeof(u64), u64, empty);
1696 else if (map)
1697 memset64(map->vaddr + qword_ofs * sizeof(u64), empty,
1698 num_qwords);
1699 else
1700 memset64(ptr, empty, num_qwords);
1701 }
1702
xe_pt_abort_unbind(struct xe_vma * vma,struct xe_vm_pgtable_update * entries,u32 num_entries)1703 static void xe_pt_abort_unbind(struct xe_vma *vma,
1704 struct xe_vm_pgtable_update *entries,
1705 u32 num_entries)
1706 {
1707 int i, j;
1708
1709 xe_pt_commit_prepare_locks_assert(vma);
1710
1711 for (i = num_entries - 1; i >= 0; --i) {
1712 struct xe_vm_pgtable_update *entry = &entries[i];
1713 struct xe_pt *pt = entry->pt;
1714 struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
1715
1716 pt->num_live += entry->qwords;
1717
1718 if (!pt->level)
1719 continue;
1720
1721 for (j = entry->ofs; j < entry->ofs + entry->qwords; j++)
1722 pt_dir->staging[j] =
1723 entries[i].pt_entries[j - entry->ofs].pt ?
1724 &entries[i].pt_entries[j - entry->ofs].pt->base : NULL;
1725 }
1726 }
1727
1728 static void
xe_pt_commit_prepare_unbind(struct xe_vma * vma,struct xe_vm_pgtable_update * entries,u32 num_entries)1729 xe_pt_commit_prepare_unbind(struct xe_vma *vma,
1730 struct xe_vm_pgtable_update *entries,
1731 u32 num_entries)
1732 {
1733 int i, j;
1734
1735 xe_pt_commit_prepare_locks_assert(vma);
1736
1737 for (i = 0; i < num_entries; ++i) {
1738 struct xe_vm_pgtable_update *entry = &entries[i];
1739 struct xe_pt *pt = entry->pt;
1740 struct xe_pt_dir *pt_dir;
1741
1742 pt->num_live -= entry->qwords;
1743 if (!pt->level)
1744 continue;
1745
1746 pt_dir = as_xe_pt_dir(pt);
1747 for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) {
1748 entry->pt_entries[j - entry->ofs].pt =
1749 xe_pt_entry_staging(pt_dir, j);
1750 pt_dir->staging[j] = NULL;
1751 }
1752 }
1753 }
1754
1755 static void
xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops * pt_update_ops,u64 start,u64 end)1756 xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops,
1757 u64 start, u64 end)
1758 {
1759 u64 last;
1760 u32 current_op = pt_update_ops->current_op;
1761 struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
1762 int i, level = 0;
1763
1764 for (i = 0; i < pt_op->num_entries; i++) {
1765 const struct xe_vm_pgtable_update *entry = &pt_op->entries[i];
1766
1767 if (entry->pt->level > level)
1768 level = entry->pt->level;
1769 }
1770
1771 /* Greedy (non-optimal) calculation but simple */
1772 start = ALIGN_DOWN(start, 0x1ull << xe_pt_shift(level));
1773 last = ALIGN(end, 0x1ull << xe_pt_shift(level)) - 1;
1774
1775 if (start < pt_update_ops->start)
1776 pt_update_ops->start = start;
1777 if (last > pt_update_ops->last)
1778 pt_update_ops->last = last;
1779 }
1780
vma_reserve_fences(struct xe_device * xe,struct xe_vma * vma)1781 static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma)
1782 {
1783 int shift = xe_device_get_root_tile(xe)->media_gt ? 1 : 0;
1784
1785 if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
1786 return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv,
1787 xe->info.tile_count << shift);
1788
1789 return 0;
1790 }
1791
bind_op_prepare(struct xe_vm * vm,struct xe_tile * tile,struct xe_vm_pgtable_update_ops * pt_update_ops,struct xe_vma * vma)1792 static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
1793 struct xe_vm_pgtable_update_ops *pt_update_ops,
1794 struct xe_vma *vma)
1795 {
1796 u32 current_op = pt_update_ops->current_op;
1797 struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
1798 int err;
1799
1800 xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma));
1801 xe_bo_assert_held(xe_vma_bo(vma));
1802
1803 vm_dbg(&xe_vma_vm(vma)->xe->drm,
1804 "Preparing bind, with range [%llx...%llx)\n",
1805 xe_vma_start(vma), xe_vma_end(vma) - 1);
1806
1807 pt_op->vma = NULL;
1808 pt_op->bind = true;
1809 pt_op->rebind = BIT(tile->id) & vma->tile_present;
1810
1811 err = vma_reserve_fences(tile_to_xe(tile), vma);
1812 if (err)
1813 return err;
1814
1815 err = xe_pt_prepare_bind(tile, vma, NULL, pt_op->entries,
1816 &pt_op->num_entries);
1817 if (!err) {
1818 xe_tile_assert(tile, pt_op->num_entries <=
1819 ARRAY_SIZE(pt_op->entries));
1820 xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries,
1821 pt_op->num_entries, true);
1822
1823 xe_pt_update_ops_rfence_interval(pt_update_ops,
1824 xe_vma_start(vma),
1825 xe_vma_end(vma));
1826 ++pt_update_ops->current_op;
1827 pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma);
1828
1829 /*
1830 * If rebind, we have to invalidate TLB on !LR vms to invalidate
1831 * cached PTEs point to freed memory. On LR vms this is done
1832 * automatically when the context is re-enabled by the rebind worker,
1833 * or in fault mode it was invalidated on PTE zapping.
1834 *
1835 * If !rebind, and scratch enabled VMs, there is a chance the scratch
1836 * PTE is already cached in the TLB so it needs to be invalidated.
1837 * On !LR VMs this is done in the ring ops preceding a batch, but on
1838 * non-faulting LR, in particular on user-space batch buffer chaining,
1839 * it needs to be done here.
1840 */
1841 if ((!pt_op->rebind && xe_vm_has_scratch(vm) &&
1842 xe_vm_in_preempt_fence_mode(vm)))
1843 pt_update_ops->needs_invalidation = true;
1844 else if (pt_op->rebind && !xe_vm_in_lr_mode(vm))
1845 /* We bump also if batch_invalidate_tlb is true */
1846 vm->tlb_flush_seqno++;
1847
1848 vma->tile_staged |= BIT(tile->id);
1849 pt_op->vma = vma;
1850 xe_pt_commit_prepare_bind(vma, pt_op->entries,
1851 pt_op->num_entries, pt_op->rebind);
1852 } else {
1853 xe_pt_cancel_bind(vma, pt_op->entries, pt_op->num_entries);
1854 }
1855
1856 return err;
1857 }
1858
bind_range_prepare(struct xe_vm * vm,struct xe_tile * tile,struct xe_vm_pgtable_update_ops * pt_update_ops,struct xe_vma * vma,struct xe_svm_range * range)1859 static int bind_range_prepare(struct xe_vm *vm, struct xe_tile *tile,
1860 struct xe_vm_pgtable_update_ops *pt_update_ops,
1861 struct xe_vma *vma, struct xe_svm_range *range)
1862 {
1863 u32 current_op = pt_update_ops->current_op;
1864 struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
1865 int err;
1866
1867 xe_tile_assert(tile, xe_vma_is_cpu_addr_mirror(vma));
1868
1869 vm_dbg(&xe_vma_vm(vma)->xe->drm,
1870 "Preparing bind, with range [%lx...%lx)\n",
1871 range->base.itree.start, range->base.itree.last);
1872
1873 pt_op->vma = NULL;
1874 pt_op->bind = true;
1875 pt_op->rebind = BIT(tile->id) & range->tile_present;
1876
1877 err = xe_pt_prepare_bind(tile, vma, range, pt_op->entries,
1878 &pt_op->num_entries);
1879 if (!err) {
1880 xe_tile_assert(tile, pt_op->num_entries <=
1881 ARRAY_SIZE(pt_op->entries));
1882 xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries,
1883 pt_op->num_entries, true);
1884
1885 xe_pt_update_ops_rfence_interval(pt_update_ops,
1886 range->base.itree.start,
1887 range->base.itree.last + 1);
1888 ++pt_update_ops->current_op;
1889 pt_update_ops->needs_svm_lock = true;
1890
1891 pt_op->vma = vma;
1892 xe_pt_commit_prepare_bind(vma, pt_op->entries,
1893 pt_op->num_entries, pt_op->rebind);
1894 } else {
1895 xe_pt_cancel_bind(vma, pt_op->entries, pt_op->num_entries);
1896 }
1897
1898 return err;
1899 }
1900
unbind_op_prepare(struct xe_tile * tile,struct xe_vm_pgtable_update_ops * pt_update_ops,struct xe_vma * vma)1901 static int unbind_op_prepare(struct xe_tile *tile,
1902 struct xe_vm_pgtable_update_ops *pt_update_ops,
1903 struct xe_vma *vma)
1904 {
1905 u32 current_op = pt_update_ops->current_op;
1906 struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
1907 int err;
1908
1909 if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id)))
1910 return 0;
1911
1912 xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma));
1913 xe_bo_assert_held(xe_vma_bo(vma));
1914
1915 vm_dbg(&xe_vma_vm(vma)->xe->drm,
1916 "Preparing unbind, with range [%llx...%llx)\n",
1917 xe_vma_start(vma), xe_vma_end(vma) - 1);
1918
1919 pt_op->vma = vma;
1920 pt_op->bind = false;
1921 pt_op->rebind = false;
1922
1923 err = vma_reserve_fences(tile_to_xe(tile), vma);
1924 if (err)
1925 return err;
1926
1927 pt_op->num_entries = xe_pt_stage_unbind(tile, xe_vma_vm(vma),
1928 vma, NULL, pt_op->entries);
1929
1930 xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries,
1931 pt_op->num_entries, false);
1932 xe_pt_update_ops_rfence_interval(pt_update_ops, xe_vma_start(vma),
1933 xe_vma_end(vma));
1934 ++pt_update_ops->current_op;
1935 pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma);
1936 pt_update_ops->needs_invalidation = true;
1937
1938 xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries);
1939
1940 return 0;
1941 }
1942
unbind_range_prepare(struct xe_vm * vm,struct xe_tile * tile,struct xe_vm_pgtable_update_ops * pt_update_ops,struct xe_svm_range * range)1943 static int unbind_range_prepare(struct xe_vm *vm,
1944 struct xe_tile *tile,
1945 struct xe_vm_pgtable_update_ops *pt_update_ops,
1946 struct xe_svm_range *range)
1947 {
1948 u32 current_op = pt_update_ops->current_op;
1949 struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
1950
1951 if (!(range->tile_present & BIT(tile->id)))
1952 return 0;
1953
1954 vm_dbg(&vm->xe->drm,
1955 "Preparing unbind, with range [%lx...%lx)\n",
1956 range->base.itree.start, range->base.itree.last);
1957
1958 pt_op->vma = XE_INVALID_VMA;
1959 pt_op->bind = false;
1960 pt_op->rebind = false;
1961
1962 pt_op->num_entries = xe_pt_stage_unbind(tile, vm, NULL, range,
1963 pt_op->entries);
1964
1965 xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries,
1966 pt_op->num_entries, false);
1967 xe_pt_update_ops_rfence_interval(pt_update_ops, range->base.itree.start,
1968 range->base.itree.last + 1);
1969 ++pt_update_ops->current_op;
1970 pt_update_ops->needs_svm_lock = true;
1971 pt_update_ops->needs_invalidation = true;
1972
1973 xe_pt_commit_prepare_unbind(XE_INVALID_VMA, pt_op->entries,
1974 pt_op->num_entries);
1975
1976 return 0;
1977 }
1978
op_prepare(struct xe_vm * vm,struct xe_tile * tile,struct xe_vm_pgtable_update_ops * pt_update_ops,struct xe_vma_op * op)1979 static int op_prepare(struct xe_vm *vm,
1980 struct xe_tile *tile,
1981 struct xe_vm_pgtable_update_ops *pt_update_ops,
1982 struct xe_vma_op *op)
1983 {
1984 int err = 0;
1985
1986 xe_vm_assert_held(vm);
1987
1988 switch (op->base.op) {
1989 case DRM_GPUVA_OP_MAP:
1990 if ((!op->map.immediate && xe_vm_in_fault_mode(vm)) ||
1991 op->map.is_cpu_addr_mirror)
1992 break;
1993
1994 err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma);
1995 pt_update_ops->wait_vm_kernel = true;
1996 break;
1997 case DRM_GPUVA_OP_REMAP:
1998 {
1999 struct xe_vma *old = gpuva_to_vma(op->base.remap.unmap->va);
2000
2001 if (xe_vma_is_cpu_addr_mirror(old))
2002 break;
2003
2004 err = unbind_op_prepare(tile, pt_update_ops, old);
2005
2006 if (!err && op->remap.prev) {
2007 err = bind_op_prepare(vm, tile, pt_update_ops,
2008 op->remap.prev);
2009 pt_update_ops->wait_vm_bookkeep = true;
2010 }
2011 if (!err && op->remap.next) {
2012 err = bind_op_prepare(vm, tile, pt_update_ops,
2013 op->remap.next);
2014 pt_update_ops->wait_vm_bookkeep = true;
2015 }
2016 break;
2017 }
2018 case DRM_GPUVA_OP_UNMAP:
2019 {
2020 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2021
2022 if (xe_vma_is_cpu_addr_mirror(vma))
2023 break;
2024
2025 err = unbind_op_prepare(tile, pt_update_ops, vma);
2026 break;
2027 }
2028 case DRM_GPUVA_OP_PREFETCH:
2029 {
2030 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2031
2032 if (xe_vma_is_cpu_addr_mirror(vma))
2033 break;
2034
2035 err = bind_op_prepare(vm, tile, pt_update_ops, vma);
2036 pt_update_ops->wait_vm_kernel = true;
2037 break;
2038 }
2039 case DRM_GPUVA_OP_DRIVER:
2040 if (op->subop == XE_VMA_SUBOP_MAP_RANGE) {
2041 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma));
2042
2043 err = bind_range_prepare(vm, tile, pt_update_ops,
2044 op->map_range.vma,
2045 op->map_range.range);
2046 } else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) {
2047 err = unbind_range_prepare(vm, tile, pt_update_ops,
2048 op->unmap_range.range);
2049 }
2050 break;
2051 default:
2052 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2053 }
2054
2055 return err;
2056 }
2057
2058 static void
xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops * pt_update_ops)2059 xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops)
2060 {
2061 init_llist_head(&pt_update_ops->deferred);
2062 pt_update_ops->start = ~0x0ull;
2063 pt_update_ops->last = 0x0ull;
2064 }
2065
2066 /**
2067 * xe_pt_update_ops_prepare() - Prepare PT update operations
2068 * @tile: Tile of PT update operations
2069 * @vops: VMA operationa
2070 *
2071 * Prepare PT update operations which includes updating internal PT state,
2072 * allocate memory for page tables, populate page table being pruned in, and
2073 * create PT update operations for leaf insertion / removal.
2074 *
2075 * Return: 0 on success, negative error code on error.
2076 */
xe_pt_update_ops_prepare(struct xe_tile * tile,struct xe_vma_ops * vops)2077 int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
2078 {
2079 struct xe_vm_pgtable_update_ops *pt_update_ops =
2080 &vops->pt_update_ops[tile->id];
2081 struct xe_vma_op *op;
2082 int shift = tile->media_gt ? 1 : 0;
2083 int err;
2084
2085 lockdep_assert_held(&vops->vm->lock);
2086 xe_vm_assert_held(vops->vm);
2087
2088 xe_pt_update_ops_init(pt_update_ops);
2089
2090 err = dma_resv_reserve_fences(xe_vm_resv(vops->vm),
2091 tile_to_xe(tile)->info.tile_count << shift);
2092 if (err)
2093 return err;
2094
2095 list_for_each_entry(op, &vops->list, link) {
2096 err = op_prepare(vops->vm, tile, pt_update_ops, op);
2097
2098 if (err)
2099 return err;
2100 }
2101
2102 xe_tile_assert(tile, pt_update_ops->current_op <=
2103 pt_update_ops->num_ops);
2104
2105 #ifdef TEST_VM_OPS_ERROR
2106 if (vops->inject_error &&
2107 vops->vm->xe->vm_inject_error_position == FORCE_OP_ERROR_PREPARE)
2108 return -ENOSPC;
2109 #endif
2110
2111 return 0;
2112 }
2113 ALLOW_ERROR_INJECTION(xe_pt_update_ops_prepare, ERRNO);
2114
bind_op_commit(struct xe_vm * vm,struct xe_tile * tile,struct xe_vm_pgtable_update_ops * pt_update_ops,struct xe_vma * vma,struct dma_fence * fence,struct dma_fence * fence2)2115 static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
2116 struct xe_vm_pgtable_update_ops *pt_update_ops,
2117 struct xe_vma *vma, struct dma_fence *fence,
2118 struct dma_fence *fence2)
2119 {
2120 xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma));
2121
2122 if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) {
2123 dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
2124 pt_update_ops->wait_vm_bookkeep ?
2125 DMA_RESV_USAGE_KERNEL :
2126 DMA_RESV_USAGE_BOOKKEEP);
2127 if (fence2)
2128 dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2,
2129 pt_update_ops->wait_vm_bookkeep ?
2130 DMA_RESV_USAGE_KERNEL :
2131 DMA_RESV_USAGE_BOOKKEEP);
2132 }
2133 vma->tile_present |= BIT(tile->id);
2134 vma->tile_staged &= ~BIT(tile->id);
2135 if (xe_vma_is_userptr(vma)) {
2136 lockdep_assert_held_read(&vm->userptr.notifier_lock);
2137 to_userptr_vma(vma)->userptr.initial_bind = true;
2138 }
2139
2140 /*
2141 * Kick rebind worker if this bind triggers preempt fences and not in
2142 * the rebind worker
2143 */
2144 if (pt_update_ops->wait_vm_bookkeep &&
2145 xe_vm_in_preempt_fence_mode(vm) &&
2146 !current->mm)
2147 xe_vm_queue_rebind_worker(vm);
2148 }
2149
unbind_op_commit(struct xe_vm * vm,struct xe_tile * tile,struct xe_vm_pgtable_update_ops * pt_update_ops,struct xe_vma * vma,struct dma_fence * fence,struct dma_fence * fence2)2150 static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
2151 struct xe_vm_pgtable_update_ops *pt_update_ops,
2152 struct xe_vma *vma, struct dma_fence *fence,
2153 struct dma_fence *fence2)
2154 {
2155 xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma));
2156
2157 if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) {
2158 dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
2159 pt_update_ops->wait_vm_bookkeep ?
2160 DMA_RESV_USAGE_KERNEL :
2161 DMA_RESV_USAGE_BOOKKEEP);
2162 if (fence2)
2163 dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2,
2164 pt_update_ops->wait_vm_bookkeep ?
2165 DMA_RESV_USAGE_KERNEL :
2166 DMA_RESV_USAGE_BOOKKEEP);
2167 }
2168 vma->tile_present &= ~BIT(tile->id);
2169 if (!vma->tile_present) {
2170 list_del_init(&vma->combined_links.rebind);
2171 if (xe_vma_is_userptr(vma)) {
2172 lockdep_assert_held_read(&vm->userptr.notifier_lock);
2173
2174 spin_lock(&vm->userptr.invalidated_lock);
2175 list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link);
2176 spin_unlock(&vm->userptr.invalidated_lock);
2177 }
2178 }
2179 }
2180
op_commit(struct xe_vm * vm,struct xe_tile * tile,struct xe_vm_pgtable_update_ops * pt_update_ops,struct xe_vma_op * op,struct dma_fence * fence,struct dma_fence * fence2)2181 static void op_commit(struct xe_vm *vm,
2182 struct xe_tile *tile,
2183 struct xe_vm_pgtable_update_ops *pt_update_ops,
2184 struct xe_vma_op *op, struct dma_fence *fence,
2185 struct dma_fence *fence2)
2186 {
2187 xe_vm_assert_held(vm);
2188
2189 switch (op->base.op) {
2190 case DRM_GPUVA_OP_MAP:
2191 if ((!op->map.immediate && xe_vm_in_fault_mode(vm)) ||
2192 op->map.is_cpu_addr_mirror)
2193 break;
2194
2195 bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence,
2196 fence2);
2197 break;
2198 case DRM_GPUVA_OP_REMAP:
2199 {
2200 struct xe_vma *old = gpuva_to_vma(op->base.remap.unmap->va);
2201
2202 if (xe_vma_is_cpu_addr_mirror(old))
2203 break;
2204
2205 unbind_op_commit(vm, tile, pt_update_ops, old, fence, fence2);
2206
2207 if (op->remap.prev)
2208 bind_op_commit(vm, tile, pt_update_ops, op->remap.prev,
2209 fence, fence2);
2210 if (op->remap.next)
2211 bind_op_commit(vm, tile, pt_update_ops, op->remap.next,
2212 fence, fence2);
2213 break;
2214 }
2215 case DRM_GPUVA_OP_UNMAP:
2216 {
2217 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2218
2219 if (!xe_vma_is_cpu_addr_mirror(vma))
2220 unbind_op_commit(vm, tile, pt_update_ops, vma, fence,
2221 fence2);
2222 break;
2223 }
2224 case DRM_GPUVA_OP_PREFETCH:
2225 {
2226 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2227
2228 if (!xe_vma_is_cpu_addr_mirror(vma))
2229 bind_op_commit(vm, tile, pt_update_ops, vma, fence,
2230 fence2);
2231 break;
2232 }
2233 case DRM_GPUVA_OP_DRIVER:
2234 {
2235 if (op->subop == XE_VMA_SUBOP_MAP_RANGE) {
2236 op->map_range.range->tile_present |= BIT(tile->id);
2237 op->map_range.range->tile_invalidated &= ~BIT(tile->id);
2238 } else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) {
2239 op->unmap_range.range->tile_present &= ~BIT(tile->id);
2240 }
2241 break;
2242 }
2243 default:
2244 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2245 }
2246 }
2247
2248 static const struct xe_migrate_pt_update_ops migrate_ops = {
2249 .populate = xe_vm_populate_pgtable,
2250 .clear = xe_migrate_clear_pgtable_callback,
2251 .pre_commit = xe_pt_pre_commit,
2252 };
2253
2254 static const struct xe_migrate_pt_update_ops userptr_migrate_ops = {
2255 .populate = xe_vm_populate_pgtable,
2256 .clear = xe_migrate_clear_pgtable_callback,
2257 .pre_commit = xe_pt_userptr_pre_commit,
2258 };
2259
2260 static const struct xe_migrate_pt_update_ops svm_migrate_ops = {
2261 .populate = xe_vm_populate_pgtable,
2262 .clear = xe_migrate_clear_pgtable_callback,
2263 .pre_commit = xe_pt_svm_pre_commit,
2264 };
2265
2266 /**
2267 * xe_pt_update_ops_run() - Run PT update operations
2268 * @tile: Tile of PT update operations
2269 * @vops: VMA operationa
2270 *
2271 * Run PT update operations which includes committing internal PT state changes,
2272 * creating job for PT update operations for leaf insertion / removal, and
2273 * installing job fence in various places.
2274 *
2275 * Return: fence on success, negative ERR_PTR on error.
2276 */
2277 struct dma_fence *
xe_pt_update_ops_run(struct xe_tile * tile,struct xe_vma_ops * vops)2278 xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
2279 {
2280 struct xe_vm *vm = vops->vm;
2281 struct xe_vm_pgtable_update_ops *pt_update_ops =
2282 &vops->pt_update_ops[tile->id];
2283 struct dma_fence *fence;
2284 struct invalidation_fence *ifence = NULL, *mfence = NULL;
2285 struct dma_fence **fences = NULL;
2286 struct dma_fence_array *cf = NULL;
2287 struct xe_range_fence *rfence;
2288 struct xe_vma_op *op;
2289 int err = 0, i;
2290 struct xe_migrate_pt_update update = {
2291 .ops = pt_update_ops->needs_svm_lock ?
2292 &svm_migrate_ops :
2293 pt_update_ops->needs_userptr_lock ?
2294 &userptr_migrate_ops :
2295 &migrate_ops,
2296 .vops = vops,
2297 .tile_id = tile->id,
2298 };
2299
2300 lockdep_assert_held(&vm->lock);
2301 xe_vm_assert_held(vm);
2302
2303 if (!pt_update_ops->current_op) {
2304 xe_tile_assert(tile, xe_vm_in_fault_mode(vm));
2305
2306 return dma_fence_get_stub();
2307 }
2308
2309 #ifdef TEST_VM_OPS_ERROR
2310 if (vops->inject_error &&
2311 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_RUN)
2312 return ERR_PTR(-ENOSPC);
2313 #endif
2314
2315 if (pt_update_ops->needs_invalidation) {
2316 ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
2317 if (!ifence) {
2318 err = -ENOMEM;
2319 goto kill_vm_tile1;
2320 }
2321 if (tile->media_gt) {
2322 mfence = kzalloc(sizeof(*ifence), GFP_KERNEL);
2323 if (!mfence) {
2324 err = -ENOMEM;
2325 goto free_ifence;
2326 }
2327 fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL);
2328 if (!fences) {
2329 err = -ENOMEM;
2330 goto free_ifence;
2331 }
2332 cf = dma_fence_array_alloc(2);
2333 if (!cf) {
2334 err = -ENOMEM;
2335 goto free_ifence;
2336 }
2337 }
2338 }
2339
2340 rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
2341 if (!rfence) {
2342 err = -ENOMEM;
2343 goto free_ifence;
2344 }
2345
2346 fence = xe_migrate_update_pgtables(tile->migrate, &update);
2347 if (IS_ERR(fence)) {
2348 err = PTR_ERR(fence);
2349 goto free_rfence;
2350 }
2351
2352 /* Point of no return - VM killed if failure after this */
2353 for (i = 0; i < pt_update_ops->current_op; ++i) {
2354 struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i];
2355
2356 xe_pt_commit(pt_op->vma, pt_op->entries,
2357 pt_op->num_entries, &pt_update_ops->deferred);
2358 pt_op->vma = NULL; /* skip in xe_pt_update_ops_abort */
2359 }
2360
2361 if (xe_range_fence_insert(&vm->rftree[tile->id], rfence,
2362 &xe_range_fence_kfree_ops,
2363 pt_update_ops->start,
2364 pt_update_ops->last, fence))
2365 dma_fence_wait(fence, false);
2366
2367 /* tlb invalidation must be done before signaling rebind */
2368 if (ifence) {
2369 if (mfence)
2370 dma_fence_get(fence);
2371 invalidation_fence_init(tile->primary_gt, ifence, fence,
2372 pt_update_ops->start,
2373 pt_update_ops->last, vm->usm.asid);
2374 if (mfence) {
2375 invalidation_fence_init(tile->media_gt, mfence, fence,
2376 pt_update_ops->start,
2377 pt_update_ops->last, vm->usm.asid);
2378 fences[0] = &ifence->base.base;
2379 fences[1] = &mfence->base.base;
2380 dma_fence_array_init(cf, 2, fences,
2381 vm->composite_fence_ctx,
2382 vm->composite_fence_seqno++,
2383 false);
2384 fence = &cf->base;
2385 } else {
2386 fence = &ifence->base.base;
2387 }
2388 }
2389
2390 if (!mfence) {
2391 dma_resv_add_fence(xe_vm_resv(vm), fence,
2392 pt_update_ops->wait_vm_bookkeep ?
2393 DMA_RESV_USAGE_KERNEL :
2394 DMA_RESV_USAGE_BOOKKEEP);
2395
2396 list_for_each_entry(op, &vops->list, link)
2397 op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL);
2398 } else {
2399 dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base,
2400 pt_update_ops->wait_vm_bookkeep ?
2401 DMA_RESV_USAGE_KERNEL :
2402 DMA_RESV_USAGE_BOOKKEEP);
2403
2404 dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base,
2405 pt_update_ops->wait_vm_bookkeep ?
2406 DMA_RESV_USAGE_KERNEL :
2407 DMA_RESV_USAGE_BOOKKEEP);
2408
2409 list_for_each_entry(op, &vops->list, link)
2410 op_commit(vops->vm, tile, pt_update_ops, op,
2411 &ifence->base.base, &mfence->base.base);
2412 }
2413
2414 if (pt_update_ops->needs_svm_lock)
2415 xe_svm_notifier_unlock(vm);
2416 if (pt_update_ops->needs_userptr_lock)
2417 up_read(&vm->userptr.notifier_lock);
2418
2419 return fence;
2420
2421 free_rfence:
2422 kfree(rfence);
2423 free_ifence:
2424 kfree(cf);
2425 kfree(fences);
2426 kfree(mfence);
2427 kfree(ifence);
2428 kill_vm_tile1:
2429 if (err != -EAGAIN && tile->id)
2430 xe_vm_kill(vops->vm, false);
2431
2432 return ERR_PTR(err);
2433 }
2434 ALLOW_ERROR_INJECTION(xe_pt_update_ops_run, ERRNO);
2435
2436 /**
2437 * xe_pt_update_ops_fini() - Finish PT update operations
2438 * @tile: Tile of PT update operations
2439 * @vops: VMA operations
2440 *
2441 * Finish PT update operations by committing to destroy page table memory
2442 */
xe_pt_update_ops_fini(struct xe_tile * tile,struct xe_vma_ops * vops)2443 void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops)
2444 {
2445 struct xe_vm_pgtable_update_ops *pt_update_ops =
2446 &vops->pt_update_ops[tile->id];
2447 int i;
2448
2449 lockdep_assert_held(&vops->vm->lock);
2450 xe_vm_assert_held(vops->vm);
2451
2452 for (i = 0; i < pt_update_ops->current_op; ++i) {
2453 struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i];
2454
2455 xe_pt_free_bind(pt_op->entries, pt_op->num_entries);
2456 }
2457 xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred);
2458 }
2459
2460 /**
2461 * xe_pt_update_ops_abort() - Abort PT update operations
2462 * @tile: Tile of PT update operations
2463 * @vops: VMA operationa
2464 *
2465 * Abort PT update operations by unwinding internal PT state
2466 */
xe_pt_update_ops_abort(struct xe_tile * tile,struct xe_vma_ops * vops)2467 void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops)
2468 {
2469 struct xe_vm_pgtable_update_ops *pt_update_ops =
2470 &vops->pt_update_ops[tile->id];
2471 int i;
2472
2473 lockdep_assert_held(&vops->vm->lock);
2474 xe_vm_assert_held(vops->vm);
2475
2476 for (i = pt_update_ops->num_ops - 1; i >= 0; --i) {
2477 struct xe_vm_pgtable_update_op *pt_op =
2478 &pt_update_ops->ops[i];
2479
2480 if (!pt_op->vma || i >= pt_update_ops->current_op)
2481 continue;
2482
2483 if (pt_op->bind)
2484 xe_pt_abort_bind(pt_op->vma, pt_op->entries,
2485 pt_op->num_entries,
2486 pt_op->rebind);
2487 else
2488 xe_pt_abort_unbind(pt_op->vma, pt_op->entries,
2489 pt_op->num_entries);
2490 }
2491
2492 xe_pt_update_ops_fini(tile, vops);
2493 }
2494