1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10
11 #include <drm/drm_drv.h>
12 #include <drm/drm_exec.h>
13 #include <drm/drm_print.h>
14 #include <drm/ttm/ttm_tt.h>
15 #include <uapi/drm/xe_drm.h>
16 #include <linux/ascii85.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/mm.h>
20 #include <linux/swap.h>
21
22 #include <generated/xe_wa_oob.h>
23
24 #include "regs/xe_gtt_defs.h"
25 #include "xe_assert.h"
26 #include "xe_bo.h"
27 #include "xe_device.h"
28 #include "xe_drm_client.h"
29 #include "xe_exec_queue.h"
30 #include "xe_migrate.h"
31 #include "xe_pat.h"
32 #include "xe_pm.h"
33 #include "xe_preempt_fence.h"
34 #include "xe_pt.h"
35 #include "xe_pxp.h"
36 #include "xe_sriov_vf.h"
37 #include "xe_svm.h"
38 #include "xe_sync.h"
39 #include "xe_tile.h"
40 #include "xe_tlb_inval.h"
41 #include "xe_trace_bo.h"
42 #include "xe_wa.h"
43
xe_vm_obj(struct xe_vm * vm)44 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
45 {
46 return vm->gpuvm.r_obj;
47 }
48
49 /**
50 * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction
51 * @vm: The vm whose resv is to be locked.
52 * @exec: The drm_exec transaction.
53 *
54 * Helper to lock the vm's resv as part of a drm_exec transaction.
55 *
56 * Return: %0 on success. See drm_exec_lock_obj() for error codes.
57 */
xe_vm_drm_exec_lock(struct xe_vm * vm,struct drm_exec * exec)58 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec)
59 {
60 return drm_exec_lock_obj(exec, xe_vm_obj(vm));
61 }
62
preempt_fences_waiting(struct xe_vm * vm)63 static bool preempt_fences_waiting(struct xe_vm *vm)
64 {
65 struct xe_exec_queue *q;
66
67 lockdep_assert_held(&vm->lock);
68 xe_vm_assert_held(vm);
69
70 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
71 if (!q->lr.pfence ||
72 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
73 &q->lr.pfence->flags)) {
74 return true;
75 }
76 }
77
78 return false;
79 }
80
free_preempt_fences(struct list_head * list)81 static void free_preempt_fences(struct list_head *list)
82 {
83 struct list_head *link, *next;
84
85 list_for_each_safe(link, next, list)
86 xe_preempt_fence_free(to_preempt_fence_from_link(link));
87 }
88
alloc_preempt_fences(struct xe_vm * vm,struct list_head * list,unsigned int * count)89 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
90 unsigned int *count)
91 {
92 lockdep_assert_held(&vm->lock);
93 xe_vm_assert_held(vm);
94
95 if (*count >= vm->preempt.num_exec_queues)
96 return 0;
97
98 for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
99 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
100
101 if (IS_ERR(pfence))
102 return PTR_ERR(pfence);
103
104 list_move_tail(xe_preempt_fence_link(pfence), list);
105 }
106
107 return 0;
108 }
109
wait_for_existing_preempt_fences(struct xe_vm * vm)110 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
111 {
112 struct xe_exec_queue *q;
113 bool vf_migration = IS_SRIOV_VF(vm->xe) &&
114 xe_sriov_vf_migration_supported(vm->xe);
115 signed long wait_time = vf_migration ? HZ / 5 : MAX_SCHEDULE_TIMEOUT;
116
117 xe_vm_assert_held(vm);
118
119 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
120 if (q->lr.pfence) {
121 long timeout;
122
123 timeout = dma_fence_wait_timeout(q->lr.pfence, false,
124 wait_time);
125 if (!timeout) {
126 xe_assert(vm->xe, vf_migration);
127 return -EAGAIN;
128 }
129
130 /* Only -ETIME on fence indicates VM needs to be killed */
131 if (timeout < 0 || q->lr.pfence->error == -ETIME)
132 return -ETIME;
133
134 dma_fence_put(q->lr.pfence);
135 q->lr.pfence = NULL;
136 }
137 }
138
139 return 0;
140 }
141
xe_vm_is_idle(struct xe_vm * vm)142 static bool xe_vm_is_idle(struct xe_vm *vm)
143 {
144 struct xe_exec_queue *q;
145
146 xe_vm_assert_held(vm);
147 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
148 if (!xe_exec_queue_is_idle(q))
149 return false;
150 }
151
152 return true;
153 }
154
arm_preempt_fences(struct xe_vm * vm,struct list_head * list)155 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
156 {
157 struct list_head *link;
158 struct xe_exec_queue *q;
159
160 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
161 struct dma_fence *fence;
162
163 link = list->next;
164 xe_assert(vm->xe, link != list);
165
166 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
167 q, q->lr.context,
168 ++q->lr.seqno);
169 dma_fence_put(q->lr.pfence);
170 q->lr.pfence = fence;
171 }
172 }
173
add_preempt_fences(struct xe_vm * vm,struct xe_bo * bo)174 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
175 {
176 struct xe_exec_queue *q;
177 int err;
178
179 xe_bo_assert_held(bo);
180
181 if (!vm->preempt.num_exec_queues)
182 return 0;
183
184 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
185 if (err)
186 return err;
187
188 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
189 if (q->lr.pfence) {
190 dma_resv_add_fence(bo->ttm.base.resv,
191 q->lr.pfence,
192 DMA_RESV_USAGE_BOOKKEEP);
193 }
194
195 return 0;
196 }
197
resume_and_reinstall_preempt_fences(struct xe_vm * vm,struct drm_exec * exec)198 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
199 struct drm_exec *exec)
200 {
201 struct xe_exec_queue *q;
202
203 lockdep_assert_held(&vm->lock);
204 xe_vm_assert_held(vm);
205
206 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
207 q->ops->resume(q);
208
209 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
210 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
211 }
212 }
213
xe_vm_add_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)214 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
215 {
216 struct drm_gpuvm_exec vm_exec = {
217 .vm = &vm->gpuvm,
218 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
219 .num_fences = 1,
220 };
221 struct drm_exec *exec = &vm_exec.exec;
222 struct xe_validation_ctx ctx;
223 struct dma_fence *pfence;
224 int err;
225 bool wait;
226
227 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
228
229 down_write(&vm->lock);
230 err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val);
231 if (err)
232 goto out_up_write;
233
234 pfence = xe_preempt_fence_create(q, q->lr.context,
235 ++q->lr.seqno);
236 if (IS_ERR(pfence)) {
237 err = PTR_ERR(pfence);
238 goto out_fini;
239 }
240
241 list_add(&q->lr.link, &vm->preempt.exec_queues);
242 ++vm->preempt.num_exec_queues;
243 q->lr.pfence = pfence;
244
245 xe_svm_notifier_lock(vm);
246
247 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
248 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
249
250 /*
251 * Check to see if a preemption on VM is in flight or userptr
252 * invalidation, if so trigger this preempt fence to sync state with
253 * other preempt fences on the VM.
254 */
255 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
256 if (wait)
257 dma_fence_enable_sw_signaling(pfence);
258
259 xe_svm_notifier_unlock(vm);
260
261 out_fini:
262 xe_validation_ctx_fini(&ctx);
263 out_up_write:
264 up_write(&vm->lock);
265
266 return err;
267 }
268 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO);
269
270 /**
271 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
272 * @vm: The VM.
273 * @q: The exec_queue
274 *
275 * Note that this function might be called multiple times on the same queue.
276 */
xe_vm_remove_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)277 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
278 {
279 if (!xe_vm_in_preempt_fence_mode(vm))
280 return;
281
282 down_write(&vm->lock);
283 if (!list_empty(&q->lr.link)) {
284 list_del_init(&q->lr.link);
285 --vm->preempt.num_exec_queues;
286 }
287 if (q->lr.pfence) {
288 dma_fence_enable_sw_signaling(q->lr.pfence);
289 dma_fence_put(q->lr.pfence);
290 q->lr.pfence = NULL;
291 }
292 up_write(&vm->lock);
293 }
294
295 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
296
297 /**
298 * xe_vm_kill() - VM Kill
299 * @vm: The VM.
300 * @unlocked: Flag indicates the VM's dma-resv is not held
301 *
302 * Kill the VM by setting banned flag indicated VM is no longer available for
303 * use. If in preempt fence mode, also kill all exec queue attached to the VM.
304 */
xe_vm_kill(struct xe_vm * vm,bool unlocked)305 void xe_vm_kill(struct xe_vm *vm, bool unlocked)
306 {
307 struct xe_exec_queue *q;
308
309 lockdep_assert_held(&vm->lock);
310
311 if (unlocked)
312 xe_vm_lock(vm, false);
313
314 vm->flags |= XE_VM_FLAG_BANNED;
315 trace_xe_vm_kill(vm);
316
317 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
318 q->ops->kill(q);
319
320 if (unlocked)
321 xe_vm_unlock(vm);
322
323 /* TODO: Inform user the VM is banned */
324 }
325
xe_gpuvm_validate(struct drm_gpuvm_bo * vm_bo,struct drm_exec * exec)326 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
327 {
328 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
329 struct drm_gpuva *gpuva;
330 int ret;
331
332 lockdep_assert_held(&vm->lock);
333 drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
334 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
335 &vm->rebind_list);
336
337 if (!try_wait_for_completion(&vm->xe->pm_block))
338 return -EAGAIN;
339
340 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec);
341 if (ret)
342 return ret;
343
344 vm_bo->evicted = false;
345 return 0;
346 }
347
348 /**
349 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
350 * @vm: The vm for which we are rebinding.
351 * @exec: The struct drm_exec with the locked GEM objects.
352 * @num_fences: The number of fences to reserve for the operation, not
353 * including rebinds and validations.
354 *
355 * Validates all evicted gem objects and rebinds their vmas. Note that
356 * rebindings may cause evictions and hence the validation-rebind
357 * sequence is rerun until there are no more objects to validate.
358 *
359 * Return: 0 on success, negative error code on error. In particular,
360 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
361 * the drm_exec transaction needs to be restarted.
362 */
xe_vm_validate_rebind(struct xe_vm * vm,struct drm_exec * exec,unsigned int num_fences)363 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
364 unsigned int num_fences)
365 {
366 struct drm_gem_object *obj;
367 unsigned long index;
368 int ret;
369
370 do {
371 ret = drm_gpuvm_validate(&vm->gpuvm, exec);
372 if (ret)
373 return ret;
374
375 ret = xe_vm_rebind(vm, false);
376 if (ret)
377 return ret;
378 } while (!list_empty(&vm->gpuvm.evict.list));
379
380 drm_exec_for_each_locked_object(exec, index, obj) {
381 ret = dma_resv_reserve_fences(obj->resv, num_fences);
382 if (ret)
383 return ret;
384 }
385
386 return 0;
387 }
388
xe_preempt_work_begin(struct drm_exec * exec,struct xe_vm * vm,bool * done)389 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
390 bool *done)
391 {
392 int err;
393
394 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
395 if (err)
396 return err;
397
398 if (xe_vm_is_idle(vm)) {
399 vm->preempt.rebind_deactivated = true;
400 *done = true;
401 return 0;
402 }
403
404 if (!preempt_fences_waiting(vm)) {
405 *done = true;
406 return 0;
407 }
408
409 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
410 if (err)
411 return err;
412
413 err = wait_for_existing_preempt_fences(vm);
414 if (err)
415 return err;
416
417 /*
418 * Add validation and rebinding to the locking loop since both can
419 * cause evictions which may require blocing dma_resv locks.
420 * The fence reservation here is intended for the new preempt fences
421 * we attach at the end of the rebind work.
422 */
423 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
424 }
425
vm_suspend_rebind_worker(struct xe_vm * vm)426 static bool vm_suspend_rebind_worker(struct xe_vm *vm)
427 {
428 struct xe_device *xe = vm->xe;
429 bool ret = false;
430
431 mutex_lock(&xe->rebind_resume_lock);
432 if (!try_wait_for_completion(&vm->xe->pm_block)) {
433 ret = true;
434 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list);
435 }
436 mutex_unlock(&xe->rebind_resume_lock);
437
438 return ret;
439 }
440
441 /**
442 * xe_vm_resume_rebind_worker() - Resume the rebind worker.
443 * @vm: The vm whose preempt worker to resume.
444 *
445 * Resume a preempt worker that was previously suspended by
446 * vm_suspend_rebind_worker().
447 */
xe_vm_resume_rebind_worker(struct xe_vm * vm)448 void xe_vm_resume_rebind_worker(struct xe_vm *vm)
449 {
450 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
451 }
452
preempt_rebind_work_func(struct work_struct * w)453 static void preempt_rebind_work_func(struct work_struct *w)
454 {
455 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
456 struct xe_validation_ctx ctx;
457 struct drm_exec exec;
458 unsigned int fence_count = 0;
459 LIST_HEAD(preempt_fences);
460 int err = 0;
461 long wait;
462 int __maybe_unused tries = 0;
463
464 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
465 trace_xe_vm_rebind_worker_enter(vm);
466
467 down_write(&vm->lock);
468
469 if (xe_vm_is_closed_or_banned(vm)) {
470 up_write(&vm->lock);
471 trace_xe_vm_rebind_worker_exit(vm);
472 return;
473 }
474
475 retry:
476 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) {
477 up_write(&vm->lock);
478 /* We don't actually block but don't make progress. */
479 xe_pm_might_block_on_suspend();
480 return;
481 }
482
483 if (xe_vm_userptr_check_repin(vm)) {
484 err = xe_vm_userptr_pin(vm);
485 if (err)
486 goto out_unlock_outer;
487 }
488
489 err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec,
490 (struct xe_val_flags) {.interruptible = true});
491 if (err)
492 goto out_unlock_outer;
493
494 drm_exec_until_all_locked(&exec) {
495 bool done = false;
496
497 err = xe_preempt_work_begin(&exec, vm, &done);
498 drm_exec_retry_on_contention(&exec);
499 xe_validation_retry_on_oom(&ctx, &err);
500 if (err || done) {
501 xe_validation_ctx_fini(&ctx);
502 goto out_unlock_outer;
503 }
504 }
505
506 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
507 if (err)
508 goto out_unlock;
509
510 xe_vm_set_validation_exec(vm, &exec);
511 err = xe_vm_rebind(vm, true);
512 xe_vm_set_validation_exec(vm, NULL);
513 if (err)
514 goto out_unlock;
515
516 /* Wait on rebinds and munmap style VM unbinds */
517 wait = dma_resv_wait_timeout(xe_vm_resv(vm),
518 DMA_RESV_USAGE_KERNEL,
519 false, MAX_SCHEDULE_TIMEOUT);
520 if (wait <= 0) {
521 err = -ETIME;
522 goto out_unlock;
523 }
524
525 #define retry_required(__tries, __vm) \
526 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
527 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
528 __xe_vm_userptr_needs_repin(__vm))
529
530 xe_svm_notifier_lock(vm);
531 if (retry_required(tries, vm)) {
532 xe_svm_notifier_unlock(vm);
533 err = -EAGAIN;
534 goto out_unlock;
535 }
536
537 #undef retry_required
538
539 spin_lock(&vm->xe->ttm.lru_lock);
540 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
541 spin_unlock(&vm->xe->ttm.lru_lock);
542
543 /* Point of no return. */
544 arm_preempt_fences(vm, &preempt_fences);
545 resume_and_reinstall_preempt_fences(vm, &exec);
546 xe_svm_notifier_unlock(vm);
547
548 out_unlock:
549 xe_validation_ctx_fini(&ctx);
550 out_unlock_outer:
551 if (err == -EAGAIN) {
552 trace_xe_vm_rebind_worker_retry(vm);
553
554 /*
555 * We can't block in workers on a VF which supports migration
556 * given this can block the VF post-migration workers from
557 * getting scheduled.
558 */
559 if (IS_SRIOV_VF(vm->xe) &&
560 xe_sriov_vf_migration_supported(vm->xe)) {
561 up_write(&vm->lock);
562 xe_vm_queue_rebind_worker(vm);
563 return;
564 }
565
566 goto retry;
567 }
568
569 if (err) {
570 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
571 xe_vm_kill(vm, true);
572 }
573 up_write(&vm->lock);
574
575 free_preempt_fences(&preempt_fences);
576
577 trace_xe_vm_rebind_worker_exit(vm);
578 }
579
xe_vma_ops_alloc(struct xe_vma_ops * vops,bool array_of_binds)580 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
581 {
582 int i;
583
584 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
585 if (!vops->pt_update_ops[i].num_ops)
586 continue;
587
588 vops->pt_update_ops[i].ops =
589 kmalloc_objs(*vops->pt_update_ops[i].ops,
590 vops->pt_update_ops[i].num_ops,
591 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
592 if (!vops->pt_update_ops[i].ops)
593 return array_of_binds ? -ENOBUFS : -ENOMEM;
594 }
595
596 return 0;
597 }
598 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
599
xe_vma_svm_prefetch_op_fini(struct xe_vma_op * op)600 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op)
601 {
602 struct xe_vma *vma;
603
604 vma = gpuva_to_vma(op->base.prefetch.va);
605
606 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma))
607 xa_destroy(&op->prefetch_range.range);
608 }
609
xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops * vops)610 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops)
611 {
612 struct xe_vma_op *op;
613
614 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
615 return;
616
617 list_for_each_entry(op, &vops->list, link)
618 xe_vma_svm_prefetch_op_fini(op);
619 }
620
xe_vma_ops_fini(struct xe_vma_ops * vops)621 static void xe_vma_ops_fini(struct xe_vma_ops *vops)
622 {
623 int i;
624
625 xe_vma_svm_prefetch_ops_fini(vops);
626
627 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
628 kfree(vops->pt_update_ops[i].ops);
629 }
630
xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops * vops,u8 tile_mask,int inc_val)631 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val)
632 {
633 int i;
634
635 if (!inc_val)
636 return;
637
638 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
639 if (BIT(i) & tile_mask)
640 vops->pt_update_ops[i].num_ops += inc_val;
641 }
642
643 #define XE_VMA_CREATE_MASK ( \
644 XE_VMA_READ_ONLY | \
645 XE_VMA_DUMPABLE | \
646 XE_VMA_SYSTEM_ALLOCATOR | \
647 DRM_GPUVA_SPARSE | \
648 XE_VMA_MADV_AUTORESET)
649
xe_vm_populate_rebind(struct xe_vma_op * op,struct xe_vma * vma,u8 tile_mask)650 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
651 u8 tile_mask)
652 {
653 INIT_LIST_HEAD(&op->link);
654 op->tile_mask = tile_mask;
655 op->base.op = DRM_GPUVA_OP_MAP;
656 op->base.map.va.addr = vma->gpuva.va.addr;
657 op->base.map.va.range = vma->gpuva.va.range;
658 op->base.map.gem.obj = vma->gpuva.gem.obj;
659 op->base.map.gem.offset = vma->gpuva.gem.offset;
660 op->map.vma = vma;
661 op->map.immediate = true;
662 op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK;
663 }
664
xe_vm_ops_add_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,u8 tile_mask)665 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
666 u8 tile_mask)
667 {
668 struct xe_vma_op *op;
669
670 op = kzalloc_obj(*op);
671 if (!op)
672 return -ENOMEM;
673
674 xe_vm_populate_rebind(op, vma, tile_mask);
675 list_add_tail(&op->link, &vops->list);
676 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
677
678 return 0;
679 }
680
681 static struct dma_fence *ops_execute(struct xe_vm *vm,
682 struct xe_vma_ops *vops);
683 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
684 struct xe_exec_queue *q,
685 struct xe_sync_entry *syncs, u32 num_syncs);
686
xe_vm_rebind(struct xe_vm * vm,bool rebind_worker)687 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
688 {
689 struct dma_fence *fence;
690 struct xe_vma *vma, *next;
691 struct xe_vma_ops vops;
692 struct xe_vma_op *op, *next_op;
693 int err, i;
694
695 lockdep_assert_held(&vm->lock);
696 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
697 list_empty(&vm->rebind_list))
698 return 0;
699
700 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
701 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
702 vops.pt_update_ops[i].wait_vm_bookkeep = true;
703
704 xe_vm_assert_held(vm);
705 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
706 xe_assert(vm->xe, vma->tile_present);
707
708 if (rebind_worker)
709 trace_xe_vma_rebind_worker(vma);
710 else
711 trace_xe_vma_rebind_exec(vma);
712
713 err = xe_vm_ops_add_rebind(&vops, vma,
714 vma->tile_present);
715 if (err)
716 goto free_ops;
717 }
718
719 err = xe_vma_ops_alloc(&vops, false);
720 if (err)
721 goto free_ops;
722
723 fence = ops_execute(vm, &vops);
724 if (IS_ERR(fence)) {
725 err = PTR_ERR(fence);
726 } else {
727 dma_fence_put(fence);
728 list_for_each_entry_safe(vma, next, &vm->rebind_list,
729 combined_links.rebind)
730 list_del_init(&vma->combined_links.rebind);
731 }
732 free_ops:
733 list_for_each_entry_safe(op, next_op, &vops.list, link) {
734 list_del(&op->link);
735 kfree(op);
736 }
737 xe_vma_ops_fini(&vops);
738
739 return err;
740 }
741
xe_vma_rebind(struct xe_vm * vm,struct xe_vma * vma,u8 tile_mask)742 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
743 {
744 struct dma_fence *fence = NULL;
745 struct xe_vma_ops vops;
746 struct xe_vma_op *op, *next_op;
747 struct xe_tile *tile;
748 u8 id;
749 int err;
750
751 lockdep_assert_held(&vm->lock);
752 xe_vm_assert_held(vm);
753 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
754
755 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
756 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT;
757 for_each_tile(tile, vm->xe, id) {
758 vops.pt_update_ops[id].wait_vm_bookkeep = true;
759 vops.pt_update_ops[tile->id].q =
760 xe_migrate_exec_queue(tile->migrate);
761 }
762
763 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
764 if (err)
765 return ERR_PTR(err);
766
767 err = xe_vma_ops_alloc(&vops, false);
768 if (err) {
769 fence = ERR_PTR(err);
770 goto free_ops;
771 }
772
773 fence = ops_execute(vm, &vops);
774
775 free_ops:
776 list_for_each_entry_safe(op, next_op, &vops.list, link) {
777 list_del(&op->link);
778 kfree(op);
779 }
780 xe_vma_ops_fini(&vops);
781
782 return fence;
783 }
784
xe_vm_populate_range_rebind(struct xe_vma_op * op,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)785 static void xe_vm_populate_range_rebind(struct xe_vma_op *op,
786 struct xe_vma *vma,
787 struct xe_svm_range *range,
788 u8 tile_mask)
789 {
790 INIT_LIST_HEAD(&op->link);
791 op->tile_mask = tile_mask;
792 op->base.op = DRM_GPUVA_OP_DRIVER;
793 op->subop = XE_VMA_SUBOP_MAP_RANGE;
794 op->map_range.vma = vma;
795 op->map_range.range = range;
796 }
797
798 static int
xe_vm_ops_add_range_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)799 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops,
800 struct xe_vma *vma,
801 struct xe_svm_range *range,
802 u8 tile_mask)
803 {
804 struct xe_vma_op *op;
805
806 op = kzalloc_obj(*op);
807 if (!op)
808 return -ENOMEM;
809
810 xe_vm_populate_range_rebind(op, vma, range, tile_mask);
811 list_add_tail(&op->link, &vops->list);
812 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
813
814 return 0;
815 }
816
817 /**
818 * xe_vm_range_rebind() - VM range (re)bind
819 * @vm: The VM which the range belongs to.
820 * @vma: The VMA which the range belongs to.
821 * @range: SVM range to rebind.
822 * @tile_mask: Tile mask to bind the range to.
823 *
824 * (re)bind SVM range setting up GPU page tables for the range.
825 *
826 * Return: dma fence for rebind to signal completion on success, ERR_PTR on
827 * failure
828 */
xe_vm_range_rebind(struct xe_vm * vm,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)829 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
830 struct xe_vma *vma,
831 struct xe_svm_range *range,
832 u8 tile_mask)
833 {
834 struct dma_fence *fence = NULL;
835 struct xe_vma_ops vops;
836 struct xe_vma_op *op, *next_op;
837 struct xe_tile *tile;
838 u8 id;
839 int err;
840
841 lockdep_assert_held(&vm->lock);
842 xe_vm_assert_held(vm);
843 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
844 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
845
846 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
847 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT;
848 for_each_tile(tile, vm->xe, id) {
849 vops.pt_update_ops[id].wait_vm_bookkeep = true;
850 vops.pt_update_ops[tile->id].q =
851 xe_migrate_exec_queue(tile->migrate);
852 }
853
854 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask);
855 if (err)
856 return ERR_PTR(err);
857
858 err = xe_vma_ops_alloc(&vops, false);
859 if (err) {
860 fence = ERR_PTR(err);
861 goto free_ops;
862 }
863
864 fence = ops_execute(vm, &vops);
865
866 free_ops:
867 list_for_each_entry_safe(op, next_op, &vops.list, link) {
868 list_del(&op->link);
869 kfree(op);
870 }
871 xe_vma_ops_fini(&vops);
872
873 return fence;
874 }
875
xe_vm_populate_range_unbind(struct xe_vma_op * op,struct xe_svm_range * range)876 static void xe_vm_populate_range_unbind(struct xe_vma_op *op,
877 struct xe_svm_range *range)
878 {
879 INIT_LIST_HEAD(&op->link);
880 op->tile_mask = range->tile_present;
881 op->base.op = DRM_GPUVA_OP_DRIVER;
882 op->subop = XE_VMA_SUBOP_UNMAP_RANGE;
883 op->unmap_range.range = range;
884 }
885
886 static int
xe_vm_ops_add_range_unbind(struct xe_vma_ops * vops,struct xe_svm_range * range)887 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops,
888 struct xe_svm_range *range)
889 {
890 struct xe_vma_op *op;
891
892 op = kzalloc_obj(*op);
893 if (!op)
894 return -ENOMEM;
895
896 xe_vm_populate_range_unbind(op, range);
897 list_add_tail(&op->link, &vops->list);
898 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1);
899
900 return 0;
901 }
902
903 /**
904 * xe_vm_range_unbind() - VM range unbind
905 * @vm: The VM which the range belongs to.
906 * @range: SVM range to rebind.
907 *
908 * Unbind SVM range removing the GPU page tables for the range.
909 *
910 * Return: dma fence for unbind to signal completion on success, ERR_PTR on
911 * failure
912 */
xe_vm_range_unbind(struct xe_vm * vm,struct xe_svm_range * range)913 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
914 struct xe_svm_range *range)
915 {
916 struct dma_fence *fence = NULL;
917 struct xe_vma_ops vops;
918 struct xe_vma_op *op, *next_op;
919 struct xe_tile *tile;
920 u8 id;
921 int err;
922
923 lockdep_assert_held(&vm->lock);
924 xe_vm_assert_held(vm);
925 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
926
927 if (!range->tile_present)
928 return dma_fence_get_stub();
929
930 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
931 for_each_tile(tile, vm->xe, id) {
932 vops.pt_update_ops[id].wait_vm_bookkeep = true;
933 vops.pt_update_ops[tile->id].q =
934 xe_migrate_exec_queue(tile->migrate);
935 }
936
937 err = xe_vm_ops_add_range_unbind(&vops, range);
938 if (err)
939 return ERR_PTR(err);
940
941 err = xe_vma_ops_alloc(&vops, false);
942 if (err) {
943 fence = ERR_PTR(err);
944 goto free_ops;
945 }
946
947 fence = ops_execute(vm, &vops);
948
949 free_ops:
950 list_for_each_entry_safe(op, next_op, &vops.list, link) {
951 list_del(&op->link);
952 kfree(op);
953 }
954 xe_vma_ops_fini(&vops);
955
956 return fence;
957 }
958
xe_vma_mem_attr_fini(struct xe_vma_mem_attr * attr)959 static void xe_vma_mem_attr_fini(struct xe_vma_mem_attr *attr)
960 {
961 drm_pagemap_put(attr->preferred_loc.dpagemap);
962 }
963
xe_vma_free(struct xe_vma * vma)964 static void xe_vma_free(struct xe_vma *vma)
965 {
966 xe_vma_mem_attr_fini(&vma->attr);
967
968 if (xe_vma_is_userptr(vma))
969 kfree(to_userptr_vma(vma));
970 else
971 kfree(vma);
972 }
973
974 /**
975 * xe_vma_mem_attr_copy() - copy an xe_vma_mem_attr structure.
976 * @to: Destination.
977 * @from: Source.
978 *
979 * Copies an xe_vma_mem_attr structure taking care to get reference
980 * counting of individual members right.
981 */
xe_vma_mem_attr_copy(struct xe_vma_mem_attr * to,struct xe_vma_mem_attr * from)982 void xe_vma_mem_attr_copy(struct xe_vma_mem_attr *to, struct xe_vma_mem_attr *from)
983 {
984 xe_vma_mem_attr_fini(to);
985 *to = *from;
986 if (to->preferred_loc.dpagemap)
987 drm_pagemap_get(to->preferred_loc.dpagemap);
988 }
989
xe_vma_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 start,u64 end,struct xe_vma_mem_attr * attr,unsigned int flags)990 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
991 struct xe_bo *bo,
992 u64 bo_offset_or_userptr,
993 u64 start, u64 end,
994 struct xe_vma_mem_attr *attr,
995 unsigned int flags)
996 {
997 struct xe_vma *vma;
998 struct xe_tile *tile;
999 u8 id;
1000 bool is_null = (flags & DRM_GPUVA_SPARSE);
1001 bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR);
1002
1003 xe_assert(vm->xe, start < end);
1004 xe_assert(vm->xe, end < vm->size);
1005
1006 /*
1007 * Allocate and ensure that the xe_vma_is_userptr() return
1008 * matches what was allocated.
1009 */
1010 if (!bo && !is_null && !is_cpu_addr_mirror) {
1011 struct xe_userptr_vma *uvma = kzalloc_obj(*uvma);
1012
1013 if (!uvma)
1014 return ERR_PTR(-ENOMEM);
1015
1016 vma = &uvma->vma;
1017 } else {
1018 vma = kzalloc_obj(*vma);
1019 if (!vma)
1020 return ERR_PTR(-ENOMEM);
1021
1022 if (bo)
1023 vma->gpuva.gem.obj = &bo->ttm.base;
1024 }
1025
1026 INIT_LIST_HEAD(&vma->combined_links.rebind);
1027
1028 INIT_LIST_HEAD(&vma->gpuva.gem.entry);
1029 vma->gpuva.vm = &vm->gpuvm;
1030 vma->gpuva.va.addr = start;
1031 vma->gpuva.va.range = end - start + 1;
1032 vma->gpuva.flags = flags;
1033
1034 for_each_tile(tile, vm->xe, id)
1035 vma->tile_mask |= 0x1 << id;
1036
1037 if (vm->xe->info.has_atomic_enable_pte_bit)
1038 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
1039
1040 xe_vma_mem_attr_copy(&vma->attr, attr);
1041 if (bo) {
1042 struct drm_gpuvm_bo *vm_bo;
1043
1044 xe_bo_assert_held(bo);
1045
1046 vm_bo = drm_gpuvm_bo_obtain_locked(vma->gpuva.vm, &bo->ttm.base);
1047 if (IS_ERR(vm_bo)) {
1048 xe_vma_free(vma);
1049 return ERR_CAST(vm_bo);
1050 }
1051
1052 drm_gpuvm_bo_extobj_add(vm_bo);
1053 drm_gem_object_get(&bo->ttm.base);
1054 vma->gpuva.gem.offset = bo_offset_or_userptr;
1055 drm_gpuva_link(&vma->gpuva, vm_bo);
1056 drm_gpuvm_bo_put(vm_bo);
1057 } else /* userptr or null */ {
1058 if (!is_null && !is_cpu_addr_mirror) {
1059 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1060 u64 size = end - start + 1;
1061 int err;
1062
1063 vma->gpuva.gem.offset = bo_offset_or_userptr;
1064
1065 err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size);
1066 if (err) {
1067 xe_vma_free(vma);
1068 return ERR_PTR(err);
1069 }
1070 }
1071
1072 xe_vm_get(vm);
1073 }
1074
1075 return vma;
1076 }
1077
xe_vma_destroy_late(struct xe_vma * vma)1078 static void xe_vma_destroy_late(struct xe_vma *vma)
1079 {
1080 struct xe_vm *vm = xe_vma_vm(vma);
1081
1082 if (vma->ufence) {
1083 xe_sync_ufence_put(vma->ufence);
1084 vma->ufence = NULL;
1085 }
1086
1087 if (xe_vma_is_userptr(vma)) {
1088 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1089
1090 xe_userptr_remove(uvma);
1091 xe_vm_put(vm);
1092 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) {
1093 xe_vm_put(vm);
1094 } else {
1095 xe_bo_put(xe_vma_bo(vma));
1096 }
1097
1098 xe_vma_free(vma);
1099 }
1100
vma_destroy_work_func(struct work_struct * w)1101 static void vma_destroy_work_func(struct work_struct *w)
1102 {
1103 struct xe_vma *vma =
1104 container_of(w, struct xe_vma, destroy_work);
1105
1106 xe_vma_destroy_late(vma);
1107 }
1108
vma_destroy_cb(struct dma_fence * fence,struct dma_fence_cb * cb)1109 static void vma_destroy_cb(struct dma_fence *fence,
1110 struct dma_fence_cb *cb)
1111 {
1112 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1113
1114 INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1115 queue_work(system_unbound_wq, &vma->destroy_work);
1116 }
1117
xe_vma_destroy(struct xe_vma * vma,struct dma_fence * fence)1118 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1119 {
1120 struct xe_vm *vm = xe_vma_vm(vma);
1121
1122 lockdep_assert_held_write(&vm->lock);
1123 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1124
1125 if (xe_vma_is_userptr(vma)) {
1126 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1127 xe_userptr_destroy(to_userptr_vma(vma));
1128 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) {
1129 xe_bo_assert_held(xe_vma_bo(vma));
1130
1131 drm_gpuva_unlink(&vma->gpuva);
1132 }
1133
1134 xe_vm_assert_held(vm);
1135 if (fence) {
1136 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1137 vma_destroy_cb);
1138
1139 if (ret) {
1140 XE_WARN_ON(ret != -ENOENT);
1141 xe_vma_destroy_late(vma);
1142 }
1143 } else {
1144 xe_vma_destroy_late(vma);
1145 }
1146 }
1147
1148 /**
1149 * xe_vm_lock_vma() - drm_exec utility to lock a vma
1150 * @exec: The drm_exec object we're currently locking for.
1151 * @vma: The vma for witch we want to lock the vm resv and any attached
1152 * object's resv.
1153 *
1154 * Return: 0 on success, negative error code on error. In particular
1155 * may return -EDEADLK on WW transaction contention and -EINTR if
1156 * an interruptible wait is terminated by a signal.
1157 */
xe_vm_lock_vma(struct drm_exec * exec,struct xe_vma * vma)1158 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
1159 {
1160 struct xe_vm *vm = xe_vma_vm(vma);
1161 struct xe_bo *bo = xe_vma_bo(vma);
1162 int err;
1163
1164 XE_WARN_ON(!vm);
1165
1166 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1167 if (!err && bo && !bo->vm)
1168 err = drm_exec_lock_obj(exec, &bo->ttm.base);
1169
1170 return err;
1171 }
1172
xe_vma_destroy_unlocked(struct xe_vma * vma)1173 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1174 {
1175 struct xe_device *xe = xe_vma_vm(vma)->xe;
1176 struct xe_validation_ctx ctx;
1177 struct drm_exec exec;
1178 int err = 0;
1179
1180 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
1181 err = xe_vm_lock_vma(&exec, vma);
1182 drm_exec_retry_on_contention(&exec);
1183 if (XE_WARN_ON(err))
1184 break;
1185 xe_vma_destroy(vma, NULL);
1186 }
1187 xe_assert(xe, !err);
1188 }
1189
1190 struct xe_vma *
xe_vm_find_overlapping_vma(struct xe_vm * vm,u64 start,u64 range)1191 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1192 {
1193 struct drm_gpuva *gpuva;
1194
1195 lockdep_assert_held(&vm->lock);
1196
1197 if (xe_vm_is_closed_or_banned(vm))
1198 return NULL;
1199
1200 xe_assert(vm->xe, start + range <= vm->size);
1201
1202 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1203
1204 return gpuva ? gpuva_to_vma(gpuva) : NULL;
1205 }
1206
xe_vm_insert_vma(struct xe_vm * vm,struct xe_vma * vma)1207 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1208 {
1209 int err;
1210
1211 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1212 lockdep_assert_held(&vm->lock);
1213
1214 mutex_lock(&vm->snap_mutex);
1215 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1216 mutex_unlock(&vm->snap_mutex);
1217 XE_WARN_ON(err); /* Shouldn't be possible */
1218
1219 return err;
1220 }
1221
xe_vm_remove_vma(struct xe_vm * vm,struct xe_vma * vma)1222 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1223 {
1224 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1225 lockdep_assert_held(&vm->lock);
1226
1227 mutex_lock(&vm->snap_mutex);
1228 drm_gpuva_remove(&vma->gpuva);
1229 mutex_unlock(&vm->snap_mutex);
1230 if (vm->usm.last_fault_vma == vma)
1231 vm->usm.last_fault_vma = NULL;
1232 }
1233
xe_vm_op_alloc(void)1234 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1235 {
1236 struct xe_vma_op *op;
1237
1238 op = kzalloc_obj(*op);
1239
1240 if (unlikely(!op))
1241 return NULL;
1242
1243 return &op->base;
1244 }
1245
1246 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1247
1248 static const struct drm_gpuvm_ops gpuvm_ops = {
1249 .op_alloc = xe_vm_op_alloc,
1250 .vm_bo_validate = xe_gpuvm_validate,
1251 .vm_free = xe_vm_free,
1252 };
1253
pde_encode_pat_index(u16 pat_index)1254 static u64 pde_encode_pat_index(u16 pat_index)
1255 {
1256 u64 pte = 0;
1257
1258 if (pat_index & BIT(0))
1259 pte |= XE_PPGTT_PTE_PAT0;
1260
1261 if (pat_index & BIT(1))
1262 pte |= XE_PPGTT_PTE_PAT1;
1263
1264 return pte;
1265 }
1266
pte_encode_pat_index(u16 pat_index,u32 pt_level)1267 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
1268 {
1269 u64 pte = 0;
1270
1271 if (pat_index & BIT(0))
1272 pte |= XE_PPGTT_PTE_PAT0;
1273
1274 if (pat_index & BIT(1))
1275 pte |= XE_PPGTT_PTE_PAT1;
1276
1277 if (pat_index & BIT(2)) {
1278 if (pt_level)
1279 pte |= XE_PPGTT_PDE_PDPE_PAT2;
1280 else
1281 pte |= XE_PPGTT_PTE_PAT2;
1282 }
1283
1284 if (pat_index & BIT(3))
1285 pte |= XELPG_PPGTT_PTE_PAT3;
1286
1287 if (pat_index & (BIT(4)))
1288 pte |= XE2_PPGTT_PTE_PAT4;
1289
1290 return pte;
1291 }
1292
pte_encode_ps(u32 pt_level)1293 static u64 pte_encode_ps(u32 pt_level)
1294 {
1295 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1296
1297 if (pt_level == 1)
1298 return XE_PDE_PS_2M;
1299 else if (pt_level == 2)
1300 return XE_PDPE_PS_1G;
1301
1302 return 0;
1303 }
1304
pde_pat_index(struct xe_bo * bo)1305 static u16 pde_pat_index(struct xe_bo *bo)
1306 {
1307 struct xe_device *xe = xe_bo_device(bo);
1308 u16 pat_index;
1309
1310 /*
1311 * We only have two bits to encode the PAT index in non-leaf nodes, but
1312 * these only point to other paging structures so we only need a minimal
1313 * selection of options. The user PAT index is only for encoding leaf
1314 * nodes, where we have use of more bits to do the encoding. The
1315 * non-leaf nodes are instead under driver control so the chosen index
1316 * here should be distinct from the user PAT index. Also the
1317 * corresponding coherency of the PAT index should be tied to the
1318 * allocation type of the page table (or at least we should pick
1319 * something which is always safe).
1320 */
1321 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached)
1322 pat_index = xe->pat.idx[XE_CACHE_WB];
1323 else
1324 pat_index = xe->pat.idx[XE_CACHE_NONE];
1325
1326 xe_assert(xe, pat_index <= 3);
1327
1328 return pat_index;
1329 }
1330
xelp_pde_encode_bo(struct xe_bo * bo,u64 bo_offset)1331 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset)
1332 {
1333 u64 pde;
1334
1335 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1336 pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1337 pde |= pde_encode_pat_index(pde_pat_index(bo));
1338
1339 return pde;
1340 }
1341
xelp_pte_encode_bo(struct xe_bo * bo,u64 bo_offset,u16 pat_index,u32 pt_level)1342 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1343 u16 pat_index, u32 pt_level)
1344 {
1345 u64 pte;
1346
1347 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1348 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1349 pte |= pte_encode_pat_index(pat_index, pt_level);
1350 pte |= pte_encode_ps(pt_level);
1351
1352 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1353 pte |= XE_PPGTT_PTE_DM;
1354
1355 return pte;
1356 }
1357
xelp_pte_encode_vma(u64 pte,struct xe_vma * vma,u16 pat_index,u32 pt_level)1358 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1359 u16 pat_index, u32 pt_level)
1360 {
1361 pte |= XE_PAGE_PRESENT;
1362
1363 if (likely(!xe_vma_read_only(vma)))
1364 pte |= XE_PAGE_RW;
1365
1366 pte |= pte_encode_pat_index(pat_index, pt_level);
1367 pte |= pte_encode_ps(pt_level);
1368
1369 if (unlikely(xe_vma_is_null(vma)))
1370 pte |= XE_PTE_NULL;
1371
1372 return pte;
1373 }
1374
xelp_pte_encode_addr(struct xe_device * xe,u64 addr,u16 pat_index,u32 pt_level,bool devmem,u64 flags)1375 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1376 u16 pat_index,
1377 u32 pt_level, bool devmem, u64 flags)
1378 {
1379 u64 pte;
1380
1381 /* Avoid passing random bits directly as flags */
1382 xe_assert(xe, !(flags & ~XE_PTE_PS64));
1383
1384 pte = addr;
1385 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1386 pte |= pte_encode_pat_index(pat_index, pt_level);
1387 pte |= pte_encode_ps(pt_level);
1388
1389 if (devmem)
1390 pte |= XE_PPGTT_PTE_DM;
1391
1392 pte |= flags;
1393
1394 return pte;
1395 }
1396
1397 static const struct xe_pt_ops xelp_pt_ops = {
1398 .pte_encode_bo = xelp_pte_encode_bo,
1399 .pte_encode_vma = xelp_pte_encode_vma,
1400 .pte_encode_addr = xelp_pte_encode_addr,
1401 .pde_encode_bo = xelp_pde_encode_bo,
1402 };
1403
1404 static void vm_destroy_work_func(struct work_struct *w);
1405
1406 /**
1407 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1408 * given tile and vm.
1409 * @xe: xe device.
1410 * @tile: tile to set up for.
1411 * @vm: vm to set up for.
1412 * @exec: The struct drm_exec object used to lock the vm resv.
1413 *
1414 * Sets up a pagetable tree with one page-table per level and a single
1415 * leaf PTE. All pagetable entries point to the single page-table or,
1416 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1417 * writes become NOPs.
1418 *
1419 * Return: 0 on success, negative error code on error.
1420 */
xe_vm_create_scratch(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,struct drm_exec * exec)1421 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1422 struct xe_vm *vm, struct drm_exec *exec)
1423 {
1424 u8 id = tile->id;
1425 int i;
1426
1427 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1428 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec);
1429 if (IS_ERR(vm->scratch_pt[id][i])) {
1430 int err = PTR_ERR(vm->scratch_pt[id][i]);
1431
1432 vm->scratch_pt[id][i] = NULL;
1433 return err;
1434 }
1435 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1436 }
1437
1438 return 0;
1439 }
1440 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO);
1441
xe_vm_free_scratch(struct xe_vm * vm)1442 static void xe_vm_free_scratch(struct xe_vm *vm)
1443 {
1444 struct xe_tile *tile;
1445 u8 id;
1446
1447 if (!xe_vm_has_scratch(vm))
1448 return;
1449
1450 for_each_tile(tile, vm->xe, id) {
1451 u32 i;
1452
1453 if (!vm->pt_root[id])
1454 continue;
1455
1456 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1457 if (vm->scratch_pt[id][i])
1458 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1459 }
1460 }
1461
xe_vm_pt_destroy(struct xe_vm * vm)1462 static void xe_vm_pt_destroy(struct xe_vm *vm)
1463 {
1464 struct xe_tile *tile;
1465 u8 id;
1466
1467 xe_vm_assert_held(vm);
1468
1469 for_each_tile(tile, vm->xe, id) {
1470 if (vm->pt_root[id]) {
1471 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1472 vm->pt_root[id] = NULL;
1473 }
1474 }
1475 }
1476
xe_vm_create(struct xe_device * xe,u32 flags,struct xe_file * xef)1477 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
1478 {
1479 struct drm_gem_object *vm_resv_obj;
1480 struct xe_validation_ctx ctx;
1481 struct drm_exec exec;
1482 struct xe_vm *vm;
1483 int err;
1484 struct xe_tile *tile;
1485 u8 id;
1486
1487 /*
1488 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to
1489 * ever be in faulting mode.
1490 */
1491 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE)));
1492
1493 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1494 if (!vm)
1495 return ERR_PTR(-ENOMEM);
1496
1497 vm->xe = xe;
1498
1499 vm->size = 1ull << xe->info.va_bits;
1500 vm->flags = flags;
1501
1502 if (xef)
1503 vm->xef = xe_file_get(xef);
1504 /**
1505 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be
1506 * manipulated under the PXP mutex. However, the PXP mutex can be taken
1507 * under a user-VM lock when the PXP session is started at exec_queue
1508 * creation time. Those are different VMs and therefore there is no risk
1509 * of deadlock, but we need to tell lockdep that this is the case or it
1510 * will print a warning.
1511 */
1512 if (flags & XE_VM_FLAG_GSC) {
1513 static struct lock_class_key gsc_vm_key;
1514
1515 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key);
1516 } else {
1517 init_rwsem(&vm->lock);
1518 }
1519 mutex_init(&vm->snap_mutex);
1520
1521 INIT_LIST_HEAD(&vm->rebind_list);
1522
1523 INIT_LIST_HEAD(&vm->userptr.repin_list);
1524 INIT_LIST_HEAD(&vm->userptr.invalidated);
1525 spin_lock_init(&vm->userptr.invalidated_lock);
1526
1527 ttm_lru_bulk_move_init(&vm->lru_bulk_move);
1528
1529 INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1530
1531 INIT_LIST_HEAD(&vm->preempt.exec_queues);
1532 if (flags & XE_VM_FLAG_FAULT_MODE)
1533 vm->preempt.min_run_period_ms = xe->min_run_period_pf_ms;
1534 else
1535 vm->preempt.min_run_period_ms = xe->min_run_period_lr_ms;
1536
1537 for_each_tile(tile, xe, id)
1538 xe_range_fence_tree_init(&vm->rftree[id]);
1539
1540 vm->pt_ops = &xelp_pt_ops;
1541
1542 /*
1543 * Long-running workloads are not protected by the scheduler references.
1544 * By design, run_job for long-running workloads returns NULL and the
1545 * scheduler drops all the references of it, hence protecting the VM
1546 * for this case is necessary.
1547 */
1548 if (flags & XE_VM_FLAG_LR_MODE) {
1549 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1550 xe_pm_runtime_get_noresume(xe);
1551 INIT_LIST_HEAD(&vm->preempt.pm_activate_link);
1552 }
1553
1554 err = xe_svm_init(vm);
1555 if (err)
1556 goto err_no_resv;
1557
1558 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1559 if (!vm_resv_obj) {
1560 err = -ENOMEM;
1561 goto err_svm_fini;
1562 }
1563
1564 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1565 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1566
1567 drm_gem_object_put(vm_resv_obj);
1568
1569 err = 0;
1570 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
1571 err) {
1572 err = xe_vm_drm_exec_lock(vm, &exec);
1573 drm_exec_retry_on_contention(&exec);
1574
1575 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1576 vm->flags |= XE_VM_FLAG_64K;
1577
1578 for_each_tile(tile, xe, id) {
1579 if (flags & XE_VM_FLAG_MIGRATION &&
1580 tile->id != XE_VM_FLAG_TILE_ID(flags))
1581 continue;
1582
1583 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level,
1584 &exec);
1585 if (IS_ERR(vm->pt_root[id])) {
1586 err = PTR_ERR(vm->pt_root[id]);
1587 vm->pt_root[id] = NULL;
1588 xe_vm_pt_destroy(vm);
1589 drm_exec_retry_on_contention(&exec);
1590 xe_validation_retry_on_oom(&ctx, &err);
1591 break;
1592 }
1593 }
1594 if (err)
1595 break;
1596
1597 if (xe_vm_has_scratch(vm)) {
1598 for_each_tile(tile, xe, id) {
1599 if (!vm->pt_root[id])
1600 continue;
1601
1602 err = xe_vm_create_scratch(xe, tile, vm, &exec);
1603 if (err) {
1604 xe_vm_free_scratch(vm);
1605 xe_vm_pt_destroy(vm);
1606 drm_exec_retry_on_contention(&exec);
1607 xe_validation_retry_on_oom(&ctx, &err);
1608 break;
1609 }
1610 }
1611 if (err)
1612 break;
1613 vm->batch_invalidate_tlb = true;
1614 }
1615
1616 if (vm->flags & XE_VM_FLAG_LR_MODE) {
1617 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1618 vm->batch_invalidate_tlb = false;
1619 }
1620
1621 /* Fill pt_root after allocating scratch tables */
1622 for_each_tile(tile, xe, id) {
1623 if (!vm->pt_root[id])
1624 continue;
1625
1626 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1627 }
1628 }
1629 if (err)
1630 goto err_close;
1631
1632 /* Kernel migration VM shouldn't have a circular loop.. */
1633 if (!(flags & XE_VM_FLAG_MIGRATION)) {
1634 for_each_tile(tile, xe, id) {
1635 struct xe_exec_queue *q;
1636 u32 create_flags = EXEC_QUEUE_FLAG_VM;
1637
1638 if (!vm->pt_root[id])
1639 continue;
1640
1641 q = xe_exec_queue_create_bind(xe, tile, vm, create_flags, 0);
1642 if (IS_ERR(q)) {
1643 err = PTR_ERR(q);
1644 goto err_close;
1645 }
1646 vm->q[id] = q;
1647 }
1648 }
1649
1650 if (xef && xe->info.has_asid) {
1651 u32 asid;
1652
1653 down_write(&xe->usm.lock);
1654 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1655 XA_LIMIT(1, XE_MAX_ASID - 1),
1656 &xe->usm.next_asid, GFP_KERNEL);
1657 up_write(&xe->usm.lock);
1658 if (err < 0)
1659 goto err_close;
1660
1661 vm->usm.asid = asid;
1662 }
1663
1664 trace_xe_vm_create(vm);
1665
1666 return vm;
1667
1668 err_close:
1669 xe_vm_close_and_put(vm);
1670 return ERR_PTR(err);
1671
1672 err_svm_fini:
1673 if (flags & XE_VM_FLAG_FAULT_MODE) {
1674 vm->size = 0; /* close the vm */
1675 xe_svm_fini(vm);
1676 }
1677 err_no_resv:
1678 mutex_destroy(&vm->snap_mutex);
1679 for_each_tile(tile, xe, id)
1680 xe_range_fence_tree_fini(&vm->rftree[id]);
1681 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1682 if (vm->xef)
1683 xe_file_put(vm->xef);
1684 kfree(vm);
1685 if (flags & XE_VM_FLAG_LR_MODE)
1686 xe_pm_runtime_put(xe);
1687 return ERR_PTR(err);
1688 }
1689
xe_vm_close(struct xe_vm * vm)1690 static void xe_vm_close(struct xe_vm *vm)
1691 {
1692 struct xe_device *xe = vm->xe;
1693 bool bound;
1694 int idx;
1695
1696 bound = drm_dev_enter(&xe->drm, &idx);
1697
1698 down_write(&vm->lock);
1699 if (xe_vm_in_fault_mode(vm))
1700 xe_svm_notifier_lock(vm);
1701
1702 vm->size = 0;
1703
1704 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) {
1705 struct xe_tile *tile;
1706 struct xe_gt *gt;
1707 u8 id;
1708
1709 /* Wait for pending binds */
1710 dma_resv_wait_timeout(xe_vm_resv(vm),
1711 DMA_RESV_USAGE_BOOKKEEP,
1712 false, MAX_SCHEDULE_TIMEOUT);
1713
1714 if (bound) {
1715 for_each_tile(tile, xe, id)
1716 if (vm->pt_root[id])
1717 xe_pt_clear(xe, vm->pt_root[id]);
1718
1719 for_each_gt(gt, xe, id)
1720 xe_tlb_inval_vm(>->tlb_inval, vm);
1721 }
1722 }
1723
1724 if (xe_vm_in_fault_mode(vm))
1725 xe_svm_notifier_unlock(vm);
1726 up_write(&vm->lock);
1727
1728 if (bound)
1729 drm_dev_exit(idx);
1730 }
1731
xe_vm_close_and_put(struct xe_vm * vm)1732 void xe_vm_close_and_put(struct xe_vm *vm)
1733 {
1734 LIST_HEAD(contested);
1735 struct xe_device *xe = vm->xe;
1736 struct xe_tile *tile;
1737 struct xe_vma *vma, *next_vma;
1738 struct drm_gpuva *gpuva, *next;
1739 u8 id;
1740
1741 xe_assert(xe, !vm->preempt.num_exec_queues);
1742
1743 xe_vm_close(vm);
1744 if (xe_vm_in_preempt_fence_mode(vm)) {
1745 mutex_lock(&xe->rebind_resume_lock);
1746 list_del_init(&vm->preempt.pm_activate_link);
1747 mutex_unlock(&xe->rebind_resume_lock);
1748 flush_work(&vm->preempt.rebind_work);
1749 }
1750 if (xe_vm_in_fault_mode(vm))
1751 xe_svm_close(vm);
1752
1753 down_write(&vm->lock);
1754 for_each_tile(tile, xe, id) {
1755 if (vm->q[id]) {
1756 int i;
1757
1758 xe_exec_queue_last_fence_put(vm->q[id], vm);
1759 for_each_tlb_inval(i)
1760 xe_exec_queue_tlb_inval_last_fence_put(vm->q[id], vm, i);
1761 }
1762 }
1763 up_write(&vm->lock);
1764
1765 for_each_tile(tile, xe, id) {
1766 if (vm->q[id]) {
1767 xe_exec_queue_kill(vm->q[id]);
1768 xe_exec_queue_put(vm->q[id]);
1769 vm->q[id] = NULL;
1770 }
1771 }
1772
1773 down_write(&vm->lock);
1774 xe_vm_lock(vm, false);
1775 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1776 vma = gpuva_to_vma(gpuva);
1777
1778 if (xe_vma_has_no_bo(vma)) {
1779 xe_svm_notifier_lock(vm);
1780 vma->gpuva.flags |= XE_VMA_DESTROYED;
1781 xe_svm_notifier_unlock(vm);
1782 }
1783
1784 xe_vm_remove_vma(vm, vma);
1785
1786 /* easy case, remove from VMA? */
1787 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1788 list_del_init(&vma->combined_links.rebind);
1789 xe_vma_destroy(vma, NULL);
1790 continue;
1791 }
1792
1793 list_move_tail(&vma->combined_links.destroy, &contested);
1794 vma->gpuva.flags |= XE_VMA_DESTROYED;
1795 }
1796
1797 /*
1798 * All vm operations will add shared fences to resv.
1799 * The only exception is eviction for a shared object,
1800 * but even so, the unbind when evicted would still
1801 * install a fence to resv. Hence it's safe to
1802 * destroy the pagetables immediately.
1803 */
1804 xe_vm_free_scratch(vm);
1805 xe_vm_pt_destroy(vm);
1806 xe_vm_unlock(vm);
1807
1808 /*
1809 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1810 * Since we hold a refcount to the bo, we can remove and free
1811 * the members safely without locking.
1812 */
1813 list_for_each_entry_safe(vma, next_vma, &contested,
1814 combined_links.destroy) {
1815 list_del_init(&vma->combined_links.destroy);
1816 xe_vma_destroy_unlocked(vma);
1817 }
1818
1819 xe_svm_fini(vm);
1820
1821 up_write(&vm->lock);
1822
1823 down_write(&xe->usm.lock);
1824 if (vm->usm.asid) {
1825 void *lookup;
1826
1827 xe_assert(xe, xe->info.has_asid);
1828 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
1829
1830 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1831 xe_assert(xe, lookup == vm);
1832 }
1833 up_write(&xe->usm.lock);
1834
1835 for_each_tile(tile, xe, id)
1836 xe_range_fence_tree_fini(&vm->rftree[id]);
1837
1838 xe_vm_put(vm);
1839 }
1840
vm_destroy_work_func(struct work_struct * w)1841 static void vm_destroy_work_func(struct work_struct *w)
1842 {
1843 struct xe_vm *vm =
1844 container_of(w, struct xe_vm, destroy_work);
1845 struct xe_device *xe = vm->xe;
1846 struct xe_tile *tile;
1847 u8 id;
1848
1849 /* xe_vm_close_and_put was not called? */
1850 xe_assert(xe, !vm->size);
1851
1852 if (xe_vm_in_preempt_fence_mode(vm))
1853 flush_work(&vm->preempt.rebind_work);
1854
1855 mutex_destroy(&vm->snap_mutex);
1856
1857 if (vm->flags & XE_VM_FLAG_LR_MODE)
1858 xe_pm_runtime_put(xe);
1859
1860 for_each_tile(tile, xe, id)
1861 XE_WARN_ON(vm->pt_root[id]);
1862
1863 trace_xe_vm_free(vm);
1864
1865 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1866
1867 if (vm->xef)
1868 xe_file_put(vm->xef);
1869
1870 kfree(vm);
1871 }
1872
xe_vm_free(struct drm_gpuvm * gpuvm)1873 static void xe_vm_free(struct drm_gpuvm *gpuvm)
1874 {
1875 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1876
1877 /* To destroy the VM we need to be able to sleep */
1878 queue_work(system_unbound_wq, &vm->destroy_work);
1879 }
1880
xe_vm_lookup(struct xe_file * xef,u32 id)1881 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1882 {
1883 struct xe_vm *vm;
1884
1885 mutex_lock(&xef->vm.lock);
1886 vm = xa_load(&xef->vm.xa, id);
1887 if (vm)
1888 xe_vm_get(vm);
1889 mutex_unlock(&xef->vm.lock);
1890
1891 return vm;
1892 }
1893
xe_vm_pdp4_descriptor(struct xe_vm * vm,struct xe_tile * tile)1894 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1895 {
1896 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0);
1897 }
1898
1899 static struct xe_exec_queue *
to_wait_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)1900 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
1901 {
1902 return q ? q : vm->q[0];
1903 }
1904
1905 static struct xe_user_fence *
find_ufence_get(struct xe_sync_entry * syncs,u32 num_syncs)1906 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
1907 {
1908 unsigned int i;
1909
1910 for (i = 0; i < num_syncs; i++) {
1911 struct xe_sync_entry *e = &syncs[i];
1912
1913 if (xe_sync_is_ufence(e))
1914 return xe_sync_ufence_get(e);
1915 }
1916
1917 return NULL;
1918 }
1919
1920 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
1921 DRM_XE_VM_CREATE_FLAG_LR_MODE | \
1922 DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1923
xe_vm_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1924 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1925 struct drm_file *file)
1926 {
1927 struct xe_device *xe = to_xe_device(dev);
1928 struct xe_file *xef = to_xe_file(file);
1929 struct drm_xe_vm_create *args = data;
1930 struct xe_gt *wa_gt = xe_root_mmio_gt(xe);
1931 struct xe_vm *vm;
1932 u32 id;
1933 int err;
1934 u32 flags = 0;
1935
1936 if (XE_IOCTL_DBG(xe, args->extensions))
1937 return -EINVAL;
1938
1939 if (wa_gt && XE_GT_WA(wa_gt, 22014953428))
1940 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
1941
1942 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1943 !xe->info.has_usm))
1944 return -EINVAL;
1945
1946 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1947 return -EINVAL;
1948
1949 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1950 return -EINVAL;
1951
1952 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
1953 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1954 !xe->info.needs_scratch))
1955 return -EINVAL;
1956
1957 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
1958 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1959 return -EINVAL;
1960
1961 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
1962 flags |= XE_VM_FLAG_SCRATCH_PAGE;
1963 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
1964 flags |= XE_VM_FLAG_LR_MODE;
1965 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1966 flags |= XE_VM_FLAG_FAULT_MODE;
1967
1968 vm = xe_vm_create(xe, flags, xef);
1969 if (IS_ERR(vm))
1970 return PTR_ERR(vm);
1971
1972 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
1973 /* Warning: Security issue - never enable by default */
1974 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
1975 #endif
1976
1977 /* user id alloc must always be last in ioctl to prevent UAF */
1978 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
1979 if (err)
1980 goto err_close_and_put;
1981
1982 args->vm_id = id;
1983
1984 return 0;
1985
1986 err_close_and_put:
1987 xe_vm_close_and_put(vm);
1988
1989 return err;
1990 }
1991
xe_vm_destroy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1992 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
1993 struct drm_file *file)
1994 {
1995 struct xe_device *xe = to_xe_device(dev);
1996 struct xe_file *xef = to_xe_file(file);
1997 struct drm_xe_vm_destroy *args = data;
1998 struct xe_vm *vm;
1999 int err = 0;
2000
2001 if (XE_IOCTL_DBG(xe, args->pad) ||
2002 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2003 return -EINVAL;
2004
2005 mutex_lock(&xef->vm.lock);
2006 vm = xa_load(&xef->vm.xa, args->vm_id);
2007 if (XE_IOCTL_DBG(xe, !vm))
2008 err = -ENOENT;
2009 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
2010 err = -EBUSY;
2011 else
2012 xa_erase(&xef->vm.xa, args->vm_id);
2013 mutex_unlock(&xef->vm.lock);
2014
2015 if (!err)
2016 xe_vm_close_and_put(vm);
2017
2018 return err;
2019 }
2020
xe_vm_query_vmas(struct xe_vm * vm,u64 start,u64 end)2021 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end)
2022 {
2023 struct drm_gpuva *gpuva;
2024 u32 num_vmas = 0;
2025
2026 lockdep_assert_held(&vm->lock);
2027 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end)
2028 num_vmas++;
2029
2030 return num_vmas;
2031 }
2032
get_mem_attrs(struct xe_vm * vm,u32 * num_vmas,u64 start,u64 end,struct drm_xe_mem_range_attr * attrs)2033 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start,
2034 u64 end, struct drm_xe_mem_range_attr *attrs)
2035 {
2036 struct drm_gpuva *gpuva;
2037 int i = 0;
2038
2039 lockdep_assert_held(&vm->lock);
2040
2041 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
2042 struct xe_vma *vma = gpuva_to_vma(gpuva);
2043
2044 if (i == *num_vmas)
2045 return -ENOSPC;
2046
2047 attrs[i].start = xe_vma_start(vma);
2048 attrs[i].end = xe_vma_end(vma);
2049 attrs[i].atomic.val = vma->attr.atomic_access;
2050 attrs[i].pat_index.val = vma->attr.pat_index;
2051 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd;
2052 attrs[i].preferred_mem_loc.migration_policy =
2053 vma->attr.preferred_loc.migration_policy;
2054
2055 i++;
2056 }
2057
2058 *num_vmas = i;
2059 return 0;
2060 }
2061
xe_vm_query_vmas_attrs_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2062 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2063 {
2064 struct xe_device *xe = to_xe_device(dev);
2065 struct xe_file *xef = to_xe_file(file);
2066 struct drm_xe_mem_range_attr *mem_attrs;
2067 struct drm_xe_vm_query_mem_range_attr *args = data;
2068 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2069 struct xe_vm *vm;
2070 int err = 0;
2071
2072 if (XE_IOCTL_DBG(xe,
2073 ((args->num_mem_ranges == 0 &&
2074 (attrs_user || args->sizeof_mem_range_attr != 0)) ||
2075 (args->num_mem_ranges > 0 &&
2076 (!attrs_user ||
2077 args->sizeof_mem_range_attr !=
2078 sizeof(struct drm_xe_mem_range_attr))))))
2079 return -EINVAL;
2080
2081 vm = xe_vm_lookup(xef, args->vm_id);
2082 if (XE_IOCTL_DBG(xe, !vm))
2083 return -EINVAL;
2084
2085 err = down_read_interruptible(&vm->lock);
2086 if (err)
2087 goto put_vm;
2088
2089 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2090
2091 if (args->num_mem_ranges == 0 && !attrs_user) {
2092 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range);
2093 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr);
2094 goto unlock_vm;
2095 }
2096
2097 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr,
2098 GFP_KERNEL | __GFP_ACCOUNT |
2099 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
2100 if (!mem_attrs) {
2101 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM;
2102 goto unlock_vm;
2103 }
2104
2105 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr);
2106 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start,
2107 args->start + args->range, mem_attrs);
2108 if (err)
2109 goto free_mem_attrs;
2110
2111 err = copy_to_user(attrs_user, mem_attrs,
2112 args->sizeof_mem_range_attr * args->num_mem_ranges);
2113 if (err)
2114 err = -EFAULT;
2115
2116 free_mem_attrs:
2117 kvfree(mem_attrs);
2118 unlock_vm:
2119 up_read(&vm->lock);
2120 put_vm:
2121 xe_vm_put(vm);
2122 return err;
2123 }
2124
vma_matches(struct xe_vma * vma,u64 page_addr)2125 static bool vma_matches(struct xe_vma *vma, u64 page_addr)
2126 {
2127 if (page_addr > xe_vma_end(vma) - 1 ||
2128 page_addr + SZ_4K - 1 < xe_vma_start(vma))
2129 return false;
2130
2131 return true;
2132 }
2133
2134 /**
2135 * xe_vm_find_vma_by_addr() - Find a VMA by its address
2136 *
2137 * @vm: the xe_vm the vma belongs to
2138 * @page_addr: address to look up
2139 */
xe_vm_find_vma_by_addr(struct xe_vm * vm,u64 page_addr)2140 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr)
2141 {
2142 struct xe_vma *vma = NULL;
2143
2144 if (vm->usm.last_fault_vma) { /* Fast lookup */
2145 if (vma_matches(vm->usm.last_fault_vma, page_addr))
2146 vma = vm->usm.last_fault_vma;
2147 }
2148 if (!vma)
2149 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);
2150
2151 return vma;
2152 }
2153
2154 static const u32 region_to_mem_type[] = {
2155 XE_PL_TT,
2156 XE_PL_VRAM0,
2157 XE_PL_VRAM1,
2158 };
2159
prep_vma_destroy(struct xe_vm * vm,struct xe_vma * vma,bool post_commit)2160 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
2161 bool post_commit)
2162 {
2163 xe_svm_notifier_lock(vm);
2164 vma->gpuva.flags |= XE_VMA_DESTROYED;
2165 xe_svm_notifier_unlock(vm);
2166 if (post_commit)
2167 xe_vm_remove_vma(vm, vma);
2168 }
2169
2170 #undef ULL
2171 #define ULL unsigned long long
2172
2173 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2174 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2175 {
2176 struct xe_vma *vma;
2177
2178 switch (op->op) {
2179 case DRM_GPUVA_OP_MAP:
2180 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2181 (ULL)op->map.va.addr, (ULL)op->map.va.range);
2182 break;
2183 case DRM_GPUVA_OP_REMAP:
2184 vma = gpuva_to_vma(op->remap.unmap->va);
2185 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2186 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2187 op->remap.unmap->keep ? 1 : 0);
2188 if (op->remap.prev)
2189 vm_dbg(&xe->drm,
2190 "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2191 (ULL)op->remap.prev->va.addr,
2192 (ULL)op->remap.prev->va.range);
2193 if (op->remap.next)
2194 vm_dbg(&xe->drm,
2195 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2196 (ULL)op->remap.next->va.addr,
2197 (ULL)op->remap.next->va.range);
2198 break;
2199 case DRM_GPUVA_OP_UNMAP:
2200 vma = gpuva_to_vma(op->unmap.va);
2201 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2202 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2203 op->unmap.keep ? 1 : 0);
2204 break;
2205 case DRM_GPUVA_OP_PREFETCH:
2206 vma = gpuva_to_vma(op->prefetch.va);
2207 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2208 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2209 break;
2210 default:
2211 drm_warn(&xe->drm, "NOT POSSIBLE\n");
2212 }
2213 }
2214 #else
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2215 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2216 {
2217 }
2218 #endif
2219
__xe_vm_needs_clear_scratch_pages(struct xe_vm * vm,u32 bind_flags)2220 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
2221 {
2222 if (!xe_vm_in_fault_mode(vm))
2223 return false;
2224
2225 if (!xe_vm_has_scratch(vm))
2226 return false;
2227
2228 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE)
2229 return false;
2230
2231 return true;
2232 }
2233
xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops * ops)2234 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops)
2235 {
2236 struct drm_gpuva_op *__op;
2237
2238 drm_gpuva_for_each_op(__op, ops) {
2239 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2240
2241 xe_vma_svm_prefetch_op_fini(op);
2242 }
2243 }
2244
2245 /*
2246 * Create operations list from IOCTL arguments, setup operations fields so parse
2247 * and commit steps are decoupled from IOCTL arguments. This step can fail.
2248 */
2249 static struct drm_gpuva_ops *
vm_bind_ioctl_ops_create(struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 addr,u64 range,u32 operation,u32 flags,u32 prefetch_region,u16 pat_index)2250 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
2251 struct xe_bo *bo, u64 bo_offset_or_userptr,
2252 u64 addr, u64 range,
2253 u32 operation, u32 flags,
2254 u32 prefetch_region, u16 pat_index)
2255 {
2256 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2257 struct drm_gpuva_ops *ops;
2258 struct drm_gpuva_op *__op;
2259 struct drm_gpuvm_bo *vm_bo;
2260 u64 range_start = addr;
2261 u64 range_end = addr + range;
2262 int err;
2263
2264 lockdep_assert_held_write(&vm->lock);
2265
2266 vm_dbg(&vm->xe->drm,
2267 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2268 operation, (ULL)addr, (ULL)range,
2269 (ULL)bo_offset_or_userptr);
2270
2271 switch (operation) {
2272 case DRM_XE_VM_BIND_OP_MAP:
2273 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) {
2274 xe_vm_find_cpu_addr_mirror_vma_range(vm, &range_start, &range_end);
2275 vops->flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP;
2276 }
2277
2278 fallthrough;
2279 case DRM_XE_VM_BIND_OP_MAP_USERPTR: {
2280 struct drm_gpuvm_map_req map_req = {
2281 .map.va.addr = range_start,
2282 .map.va.range = range_end - range_start,
2283 .map.gem.obj = obj,
2284 .map.gem.offset = bo_offset_or_userptr,
2285 };
2286
2287 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req);
2288 break;
2289 }
2290 case DRM_XE_VM_BIND_OP_UNMAP:
2291 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2292 break;
2293 case DRM_XE_VM_BIND_OP_PREFETCH:
2294 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2295 break;
2296 case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2297 xe_assert(vm->xe, bo);
2298
2299 err = xe_bo_lock(bo, true);
2300 if (err)
2301 return ERR_PTR(err);
2302
2303 vm_bo = drm_gpuvm_bo_obtain_locked(&vm->gpuvm, obj);
2304 if (IS_ERR(vm_bo)) {
2305 xe_bo_unlock(bo);
2306 return ERR_CAST(vm_bo);
2307 }
2308
2309 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2310 drm_gpuvm_bo_put(vm_bo);
2311 xe_bo_unlock(bo);
2312 break;
2313 default:
2314 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
2315 ops = ERR_PTR(-EINVAL);
2316 }
2317 if (IS_ERR(ops))
2318 return ops;
2319
2320 drm_gpuva_for_each_op(__op, ops) {
2321 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2322
2323 if (__op->op == DRM_GPUVA_OP_MAP) {
2324 op->map.immediate =
2325 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2326 if (flags & DRM_XE_VM_BIND_FLAG_READONLY)
2327 op->map.vma_flags |= XE_VMA_READ_ONLY;
2328 if (flags & DRM_XE_VM_BIND_FLAG_NULL)
2329 op->map.vma_flags |= DRM_GPUVA_SPARSE;
2330 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR)
2331 op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR;
2332 if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE)
2333 op->map.vma_flags |= XE_VMA_DUMPABLE;
2334 if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET)
2335 op->map.vma_flags |= XE_VMA_MADV_AUTORESET;
2336 op->map.pat_index = pat_index;
2337 op->map.invalidate_on_bind =
2338 __xe_vm_needs_clear_scratch_pages(vm, flags);
2339 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2340 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2341 struct xe_tile *tile;
2342 struct xe_svm_range *svm_range;
2343 struct drm_gpusvm_ctx ctx = {};
2344 struct drm_pagemap *dpagemap = NULL;
2345 u8 id, tile_mask = 0;
2346 u32 i;
2347
2348 if (!xe_vma_is_cpu_addr_mirror(vma)) {
2349 op->prefetch.region = prefetch_region;
2350 break;
2351 }
2352
2353 ctx.read_only = xe_vma_read_only(vma);
2354 ctx.devmem_possible = IS_DGFX(vm->xe) &&
2355 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2356
2357 for_each_tile(tile, vm->xe, id)
2358 tile_mask |= 0x1 << id;
2359
2360 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC);
2361 op->prefetch_range.ranges_count = 0;
2362
2363 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) {
2364 dpagemap = xe_vma_resolve_pagemap(vma,
2365 xe_device_get_root_tile(vm->xe));
2366 } else if (prefetch_region) {
2367 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] -
2368 XE_PL_VRAM0];
2369 dpagemap = xe_tile_local_pagemap(tile);
2370 }
2371
2372 op->prefetch_range.dpagemap = dpagemap;
2373 alloc_next_range:
2374 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
2375
2376 if (PTR_ERR(svm_range) == -ENOENT) {
2377 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma);
2378
2379 addr = ret == ULONG_MAX ? 0 : ret;
2380 if (addr)
2381 goto alloc_next_range;
2382 else
2383 goto print_op_label;
2384 }
2385
2386 if (IS_ERR(svm_range)) {
2387 err = PTR_ERR(svm_range);
2388 goto unwind_prefetch_ops;
2389 }
2390
2391 if (xe_svm_range_validate(vm, svm_range, tile_mask, dpagemap)) {
2392 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID");
2393 goto check_next_range;
2394 }
2395
2396 err = xa_alloc(&op->prefetch_range.range,
2397 &i, svm_range, xa_limit_32b,
2398 GFP_KERNEL);
2399
2400 if (err)
2401 goto unwind_prefetch_ops;
2402
2403 op->prefetch_range.ranges_count++;
2404 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH;
2405 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED");
2406 check_next_range:
2407 if (range_end > xe_svm_range_end(svm_range) &&
2408 xe_svm_range_end(svm_range) < xe_vma_end(vma)) {
2409 addr = xe_svm_range_end(svm_range);
2410 goto alloc_next_range;
2411 }
2412 }
2413 print_op_label:
2414 print_op(vm->xe, __op);
2415 }
2416
2417 return ops;
2418
2419 unwind_prefetch_ops:
2420 xe_svm_prefetch_gpuva_ops_fini(ops);
2421 drm_gpuva_ops_free(&vm->gpuvm, ops);
2422 return ERR_PTR(err);
2423 }
2424
2425 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
2426
new_vma(struct xe_vm * vm,struct drm_gpuva_op_map * op,struct xe_vma_mem_attr * attr,unsigned int flags)2427 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2428 struct xe_vma_mem_attr *attr, unsigned int flags)
2429 {
2430 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2431 struct xe_validation_ctx ctx;
2432 struct drm_exec exec;
2433 struct xe_vma *vma;
2434 int err = 0;
2435
2436 lockdep_assert_held_write(&vm->lock);
2437
2438 if (bo) {
2439 err = 0;
2440 xe_validation_guard(&ctx, &vm->xe->val, &exec,
2441 (struct xe_val_flags) {.interruptible = true}, err) {
2442 if (!bo->vm) {
2443 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2444 drm_exec_retry_on_contention(&exec);
2445 }
2446 if (!err) {
2447 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2448 drm_exec_retry_on_contention(&exec);
2449 }
2450 if (err)
2451 return ERR_PTR(err);
2452
2453 vma = xe_vma_create(vm, bo, op->gem.offset,
2454 op->va.addr, op->va.addr +
2455 op->va.range - 1, attr, flags);
2456 if (IS_ERR(vma))
2457 return vma;
2458
2459 if (!bo->vm) {
2460 err = add_preempt_fences(vm, bo);
2461 if (err) {
2462 prep_vma_destroy(vm, vma, false);
2463 xe_vma_destroy(vma, NULL);
2464 }
2465 }
2466 }
2467 if (err)
2468 return ERR_PTR(err);
2469 } else {
2470 vma = xe_vma_create(vm, NULL, op->gem.offset,
2471 op->va.addr, op->va.addr +
2472 op->va.range - 1, attr, flags);
2473 if (IS_ERR(vma))
2474 return vma;
2475
2476 if (xe_vma_is_userptr(vma)) {
2477 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2478 /*
2479 * -EBUSY has dedicated meaning that a user fence
2480 * attached to the VMA is busy, in practice
2481 * xe_vma_userptr_pin_pages can only fail with -EBUSY if
2482 * we are low on memory so convert this to -ENOMEM.
2483 */
2484 if (err == -EBUSY)
2485 err = -ENOMEM;
2486 }
2487 }
2488 if (err) {
2489 prep_vma_destroy(vm, vma, false);
2490 xe_vma_destroy_unlocked(vma);
2491 vma = ERR_PTR(err);
2492 }
2493
2494 return vma;
2495 }
2496
xe_vma_max_pte_size(struct xe_vma * vma)2497 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2498 {
2499 if (vma->gpuva.flags & XE_VMA_PTE_1G)
2500 return SZ_1G;
2501 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2502 return SZ_2M;
2503 else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2504 return SZ_64K;
2505 else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2506 return SZ_4K;
2507
2508 return SZ_1G; /* Uninitialized, used max size */
2509 }
2510
xe_vma_set_pte_size(struct xe_vma * vma,u64 size)2511 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2512 {
2513 switch (size) {
2514 case SZ_1G:
2515 vma->gpuva.flags |= XE_VMA_PTE_1G;
2516 break;
2517 case SZ_2M:
2518 vma->gpuva.flags |= XE_VMA_PTE_2M;
2519 break;
2520 case SZ_64K:
2521 vma->gpuva.flags |= XE_VMA_PTE_64K;
2522 break;
2523 case SZ_4K:
2524 vma->gpuva.flags |= XE_VMA_PTE_4K;
2525 break;
2526 }
2527 }
2528
xe_vma_op_commit(struct xe_vm * vm,struct xe_vma_op * op)2529 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2530 {
2531 int err = 0;
2532
2533 lockdep_assert_held_write(&vm->lock);
2534
2535 switch (op->base.op) {
2536 case DRM_GPUVA_OP_MAP:
2537 err |= xe_vm_insert_vma(vm, op->map.vma);
2538 if (!err)
2539 op->flags |= XE_VMA_OP_COMMITTED;
2540 break;
2541 case DRM_GPUVA_OP_REMAP:
2542 {
2543 u8 tile_present =
2544 gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2545
2546 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2547 true);
2548 op->flags |= XE_VMA_OP_COMMITTED;
2549
2550 if (op->remap.prev) {
2551 err |= xe_vm_insert_vma(vm, op->remap.prev);
2552 if (!err)
2553 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2554 if (!err && op->remap.skip_prev) {
2555 op->remap.prev->tile_present =
2556 tile_present;
2557 }
2558 }
2559 if (op->remap.next) {
2560 err |= xe_vm_insert_vma(vm, op->remap.next);
2561 if (!err)
2562 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2563 if (!err && op->remap.skip_next) {
2564 op->remap.next->tile_present =
2565 tile_present;
2566 }
2567 }
2568
2569 /*
2570 * Adjust for partial unbind after removing VMA from VM. In case
2571 * of unwind we might need to undo this later.
2572 */
2573 if (!err) {
2574 op->base.remap.unmap->va->va.addr = op->remap.start;
2575 op->base.remap.unmap->va->va.range = op->remap.range;
2576 }
2577 break;
2578 }
2579 case DRM_GPUVA_OP_UNMAP:
2580 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2581 op->flags |= XE_VMA_OP_COMMITTED;
2582 break;
2583 case DRM_GPUVA_OP_PREFETCH:
2584 op->flags |= XE_VMA_OP_COMMITTED;
2585 break;
2586 default:
2587 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
2588 }
2589
2590 return err;
2591 }
2592
2593 /**
2594 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes
2595 * @vma: Pointer to the xe_vma structure to check
2596 *
2597 * This function determines whether the given VMA (Virtual Memory Area)
2598 * has its memory attributes set to their default values. Specifically,
2599 * it checks the following conditions:
2600 *
2601 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED`
2602 * - `pat_index` is equal to `default_pat_index`
2603 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE`
2604 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES`
2605 *
2606 * Return: true if all attributes are at their default values, false otherwise.
2607 */
xe_vma_has_default_mem_attrs(struct xe_vma * vma)2608 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma)
2609 {
2610 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED &&
2611 vma->attr.pat_index == vma->attr.default_pat_index &&
2612 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE &&
2613 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES);
2614 }
2615
vm_bind_ioctl_ops_parse(struct xe_vm * vm,struct drm_gpuva_ops * ops,struct xe_vma_ops * vops)2616 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
2617 struct xe_vma_ops *vops)
2618 {
2619 struct xe_device *xe = vm->xe;
2620 struct drm_gpuva_op *__op;
2621 struct xe_tile *tile;
2622 u8 id, tile_mask = 0;
2623 int err = 0;
2624
2625 lockdep_assert_held_write(&vm->lock);
2626
2627 for_each_tile(tile, vm->xe, id)
2628 tile_mask |= 0x1 << id;
2629
2630 drm_gpuva_for_each_op(__op, ops) {
2631 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2632 struct xe_vma *vma;
2633 unsigned int flags = 0;
2634
2635 INIT_LIST_HEAD(&op->link);
2636 list_add_tail(&op->link, &vops->list);
2637 op->tile_mask = tile_mask;
2638
2639 switch (op->base.op) {
2640 case DRM_GPUVA_OP_MAP:
2641 {
2642 struct xe_vma_mem_attr default_attr = {
2643 .preferred_loc = {
2644 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE,
2645 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES,
2646 },
2647 .atomic_access = DRM_XE_ATOMIC_UNDEFINED,
2648 .default_pat_index = op->map.pat_index,
2649 .pat_index = op->map.pat_index,
2650 };
2651
2652 flags |= op->map.vma_flags & XE_VMA_CREATE_MASK;
2653
2654 vma = new_vma(vm, &op->base.map, &default_attr,
2655 flags);
2656 if (IS_ERR(vma))
2657 return PTR_ERR(vma);
2658
2659 op->map.vma = vma;
2660 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) &&
2661 !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) ||
2662 op->map.invalidate_on_bind)
2663 xe_vma_ops_incr_pt_update_ops(vops,
2664 op->tile_mask, 1);
2665 break;
2666 }
2667 case DRM_GPUVA_OP_REMAP:
2668 {
2669 struct xe_vma *old =
2670 gpuva_to_vma(op->base.remap.unmap->va);
2671 bool skip = xe_vma_is_cpu_addr_mirror(old);
2672 u64 start = xe_vma_start(old), end = xe_vma_end(old);
2673 int num_remap_ops = 0;
2674
2675 if (op->base.remap.prev)
2676 start = op->base.remap.prev->va.addr +
2677 op->base.remap.prev->va.range;
2678 if (op->base.remap.next)
2679 end = op->base.remap.next->va.addr;
2680
2681 if (xe_vma_is_cpu_addr_mirror(old) &&
2682 xe_svm_has_mapping(vm, start, end)) {
2683 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE)
2684 xe_svm_unmap_address_range(vm, start, end);
2685 else
2686 return -EBUSY;
2687 }
2688
2689 op->remap.start = xe_vma_start(old);
2690 op->remap.range = xe_vma_size(old);
2691 op->remap.old_start = op->remap.start;
2692 op->remap.old_range = op->remap.range;
2693
2694 flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK;
2695 if (op->base.remap.prev) {
2696 vma = new_vma(vm, op->base.remap.prev,
2697 &old->attr, flags);
2698 if (IS_ERR(vma))
2699 return PTR_ERR(vma);
2700
2701 op->remap.prev = vma;
2702
2703 /*
2704 * Userptr creates a new SG mapping so
2705 * we must also rebind.
2706 */
2707 op->remap.skip_prev = skip ||
2708 (!xe_vma_is_userptr(old) &&
2709 IS_ALIGNED(xe_vma_end(vma),
2710 xe_vma_max_pte_size(old)));
2711 if (op->remap.skip_prev) {
2712 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2713 op->remap.range -=
2714 xe_vma_end(vma) -
2715 xe_vma_start(old);
2716 op->remap.start = xe_vma_end(vma);
2717 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2718 (ULL)op->remap.start,
2719 (ULL)op->remap.range);
2720 } else {
2721 num_remap_ops++;
2722 }
2723 }
2724
2725 if (op->base.remap.next) {
2726 vma = new_vma(vm, op->base.remap.next,
2727 &old->attr, flags);
2728 if (IS_ERR(vma))
2729 return PTR_ERR(vma);
2730
2731 op->remap.next = vma;
2732
2733 /*
2734 * Userptr creates a new SG mapping so
2735 * we must also rebind.
2736 */
2737 op->remap.skip_next = skip ||
2738 (!xe_vma_is_userptr(old) &&
2739 IS_ALIGNED(xe_vma_start(vma),
2740 xe_vma_max_pte_size(old)));
2741 if (op->remap.skip_next) {
2742 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2743 op->remap.range -=
2744 xe_vma_end(old) -
2745 xe_vma_start(vma);
2746 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2747 (ULL)op->remap.start,
2748 (ULL)op->remap.range);
2749 } else {
2750 num_remap_ops++;
2751 }
2752 }
2753 if (!skip)
2754 num_remap_ops++;
2755
2756 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops);
2757 break;
2758 }
2759 case DRM_GPUVA_OP_UNMAP:
2760 vma = gpuva_to_vma(op->base.unmap.va);
2761
2762 if (xe_vma_is_cpu_addr_mirror(vma) &&
2763 xe_svm_has_mapping(vm, xe_vma_start(vma),
2764 xe_vma_end(vma)) &&
2765 !(vops->flags & XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP))
2766 return -EBUSY;
2767
2768 if (!xe_vma_is_cpu_addr_mirror(vma))
2769 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2770 break;
2771 case DRM_GPUVA_OP_PREFETCH:
2772 vma = gpuva_to_vma(op->base.prefetch.va);
2773
2774 if (xe_vma_is_userptr(vma)) {
2775 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2776 if (err)
2777 return err;
2778 }
2779
2780 if (xe_vma_is_cpu_addr_mirror(vma))
2781 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask,
2782 op->prefetch_range.ranges_count);
2783 else
2784 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2785
2786 break;
2787 default:
2788 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
2789 }
2790
2791 err = xe_vma_op_commit(vm, op);
2792 if (err)
2793 return err;
2794 }
2795
2796 return 0;
2797 }
2798
xe_vma_op_unwind(struct xe_vm * vm,struct xe_vma_op * op,bool post_commit,bool prev_post_commit,bool next_post_commit)2799 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2800 bool post_commit, bool prev_post_commit,
2801 bool next_post_commit)
2802 {
2803 lockdep_assert_held_write(&vm->lock);
2804
2805 switch (op->base.op) {
2806 case DRM_GPUVA_OP_MAP:
2807 if (op->map.vma) {
2808 prep_vma_destroy(vm, op->map.vma, post_commit);
2809 xe_vma_destroy_unlocked(op->map.vma);
2810 }
2811 break;
2812 case DRM_GPUVA_OP_UNMAP:
2813 {
2814 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2815
2816 if (vma) {
2817 xe_svm_notifier_lock(vm);
2818 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2819 xe_svm_notifier_unlock(vm);
2820 if (post_commit)
2821 xe_vm_insert_vma(vm, vma);
2822 }
2823 break;
2824 }
2825 case DRM_GPUVA_OP_REMAP:
2826 {
2827 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2828
2829 if (op->remap.prev) {
2830 prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2831 xe_vma_destroy_unlocked(op->remap.prev);
2832 }
2833 if (op->remap.next) {
2834 prep_vma_destroy(vm, op->remap.next, next_post_commit);
2835 xe_vma_destroy_unlocked(op->remap.next);
2836 }
2837 if (vma) {
2838 xe_svm_notifier_lock(vm);
2839 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2840 xe_svm_notifier_unlock(vm);
2841 if (post_commit) {
2842 /*
2843 * Restore the old va range, in case of the
2844 * prev/next skip optimisation. Otherwise what
2845 * we re-insert here could be smaller than the
2846 * original range.
2847 */
2848 op->base.remap.unmap->va->va.addr =
2849 op->remap.old_start;
2850 op->base.remap.unmap->va->va.range =
2851 op->remap.old_range;
2852 xe_vm_insert_vma(vm, vma);
2853 }
2854 }
2855 break;
2856 }
2857 case DRM_GPUVA_OP_PREFETCH:
2858 /* Nothing to do */
2859 break;
2860 default:
2861 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
2862 }
2863 }
2864
vm_bind_ioctl_ops_unwind(struct xe_vm * vm,struct drm_gpuva_ops ** ops,int num_ops_list)2865 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2866 struct drm_gpuva_ops **ops,
2867 int num_ops_list)
2868 {
2869 int i;
2870
2871 for (i = num_ops_list - 1; i >= 0; --i) {
2872 struct drm_gpuva_ops *__ops = ops[i];
2873 struct drm_gpuva_op *__op;
2874
2875 if (!__ops)
2876 continue;
2877
2878 drm_gpuva_for_each_op_reverse(__op, __ops) {
2879 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2880
2881 xe_vma_op_unwind(vm, op,
2882 op->flags & XE_VMA_OP_COMMITTED,
2883 op->flags & XE_VMA_OP_PREV_COMMITTED,
2884 op->flags & XE_VMA_OP_NEXT_COMMITTED);
2885 }
2886 }
2887 }
2888
vma_lock_and_validate(struct drm_exec * exec,struct xe_vma * vma,bool res_evict,bool validate)2889 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
2890 bool res_evict, bool validate)
2891 {
2892 struct xe_bo *bo = xe_vma_bo(vma);
2893 struct xe_vm *vm = xe_vma_vm(vma);
2894 int err = 0;
2895
2896 if (bo) {
2897 if (!bo->vm)
2898 err = drm_exec_lock_obj(exec, &bo->ttm.base);
2899 if (!err && validate)
2900 err = xe_bo_validate(bo, vm,
2901 !xe_vm_in_preempt_fence_mode(vm) &&
2902 res_evict, exec);
2903 }
2904
2905 return err;
2906 }
2907
check_ufence(struct xe_vma * vma)2908 static int check_ufence(struct xe_vma *vma)
2909 {
2910 if (vma->ufence) {
2911 struct xe_user_fence * const f = vma->ufence;
2912
2913 if (!xe_sync_ufence_get_status(f))
2914 return -EBUSY;
2915
2916 vma->ufence = NULL;
2917 xe_sync_ufence_put(f);
2918 }
2919
2920 return 0;
2921 }
2922
prefetch_ranges(struct xe_vm * vm,struct xe_vma_op * op)2923 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
2924 {
2925 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2926 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2927 struct drm_pagemap *dpagemap = op->prefetch_range.dpagemap;
2928 int err = 0;
2929
2930 struct xe_svm_range *svm_range;
2931 struct drm_gpusvm_ctx ctx = {};
2932 unsigned long i;
2933
2934 if (!xe_vma_is_cpu_addr_mirror(vma))
2935 return 0;
2936
2937 ctx.read_only = xe_vma_read_only(vma);
2938 ctx.devmem_possible = devmem_possible;
2939 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
2940 ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !dpagemap);
2941
2942 /* TODO: Threading the migration */
2943 xa_for_each(&op->prefetch_range.range, i, svm_range) {
2944 if (!dpagemap)
2945 xe_svm_range_migrate_to_smem(vm, svm_range);
2946
2947 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) {
2948 drm_dbg(&vm->xe->drm,
2949 "Prefetch pagemap is %s start 0x%016lx end 0x%016lx\n",
2950 dpagemap ? dpagemap->drm->unique : "system",
2951 xe_svm_range_start(svm_range), xe_svm_range_end(svm_range));
2952 }
2953
2954 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, dpagemap)) {
2955 err = xe_svm_alloc_vram(svm_range, &ctx, dpagemap);
2956 if (err) {
2957 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
2958 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2959 return -ENODATA;
2960 }
2961 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM");
2962 }
2963
2964 err = xe_svm_range_get_pages(vm, svm_range, &ctx);
2965 if (err) {
2966 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
2967 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2968 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
2969 err = -ENODATA;
2970 return err;
2971 }
2972 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE");
2973 }
2974
2975 return err;
2976 }
2977
op_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_vma_op * op)2978 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
2979 struct xe_vma_ops *vops, struct xe_vma_op *op)
2980 {
2981 int err = 0;
2982 bool res_evict;
2983
2984 /*
2985 * We only allow evicting a BO within the VM if it is not part of an
2986 * array of binds, as an array of binds can evict another BO within the
2987 * bind.
2988 */
2989 res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS);
2990
2991 switch (op->base.op) {
2992 case DRM_GPUVA_OP_MAP:
2993 if (!op->map.invalidate_on_bind)
2994 err = vma_lock_and_validate(exec, op->map.vma,
2995 res_evict,
2996 !xe_vm_in_fault_mode(vm) ||
2997 op->map.immediate);
2998 break;
2999 case DRM_GPUVA_OP_REMAP:
3000 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
3001 if (err)
3002 break;
3003
3004 err = vma_lock_and_validate(exec,
3005 gpuva_to_vma(op->base.remap.unmap->va),
3006 res_evict, false);
3007 if (!err && op->remap.prev)
3008 err = vma_lock_and_validate(exec, op->remap.prev,
3009 res_evict, true);
3010 if (!err && op->remap.next)
3011 err = vma_lock_and_validate(exec, op->remap.next,
3012 res_evict, true);
3013 break;
3014 case DRM_GPUVA_OP_UNMAP:
3015 err = check_ufence(gpuva_to_vma(op->base.unmap.va));
3016 if (err)
3017 break;
3018
3019 err = vma_lock_and_validate(exec,
3020 gpuva_to_vma(op->base.unmap.va),
3021 res_evict, false);
3022 break;
3023 case DRM_GPUVA_OP_PREFETCH:
3024 {
3025 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
3026 u32 region;
3027
3028 if (!xe_vma_is_cpu_addr_mirror(vma)) {
3029 region = op->prefetch.region;
3030 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC ||
3031 region <= ARRAY_SIZE(region_to_mem_type));
3032 }
3033
3034 err = vma_lock_and_validate(exec,
3035 gpuva_to_vma(op->base.prefetch.va),
3036 res_evict, false);
3037 if (!err && !xe_vma_has_no_bo(vma))
3038 err = xe_bo_migrate(xe_vma_bo(vma),
3039 region_to_mem_type[region],
3040 NULL,
3041 exec);
3042 break;
3043 }
3044 default:
3045 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
3046 }
3047
3048 return err;
3049 }
3050
vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm * vm,struct xe_vma_ops * vops)3051 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops)
3052 {
3053 struct xe_vma_op *op;
3054 int err;
3055
3056 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
3057 return 0;
3058
3059 list_for_each_entry(op, &vops->list, link) {
3060 if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
3061 err = prefetch_ranges(vm, op);
3062 if (err)
3063 return err;
3064 }
3065 }
3066
3067 return 0;
3068 }
3069
vm_bind_ioctl_ops_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops)3070 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
3071 struct xe_vm *vm,
3072 struct xe_vma_ops *vops)
3073 {
3074 struct xe_vma_op *op;
3075 int err;
3076
3077 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
3078 if (err)
3079 return err;
3080
3081 list_for_each_entry(op, &vops->list, link) {
3082 err = op_lock_and_prep(exec, vm, vops, op);
3083 if (err)
3084 return err;
3085 }
3086
3087 #ifdef TEST_VM_OPS_ERROR
3088 if (vops->inject_error &&
3089 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
3090 return -ENOSPC;
3091 #endif
3092
3093 return 0;
3094 }
3095
op_trace(struct xe_vma_op * op)3096 static void op_trace(struct xe_vma_op *op)
3097 {
3098 switch (op->base.op) {
3099 case DRM_GPUVA_OP_MAP:
3100 trace_xe_vma_bind(op->map.vma);
3101 break;
3102 case DRM_GPUVA_OP_REMAP:
3103 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
3104 if (op->remap.prev)
3105 trace_xe_vma_bind(op->remap.prev);
3106 if (op->remap.next)
3107 trace_xe_vma_bind(op->remap.next);
3108 break;
3109 case DRM_GPUVA_OP_UNMAP:
3110 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
3111 break;
3112 case DRM_GPUVA_OP_PREFETCH:
3113 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
3114 break;
3115 case DRM_GPUVA_OP_DRIVER:
3116 break;
3117 default:
3118 XE_WARN_ON("NOT POSSIBLE");
3119 }
3120 }
3121
trace_xe_vm_ops_execute(struct xe_vma_ops * vops)3122 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
3123 {
3124 struct xe_vma_op *op;
3125
3126 list_for_each_entry(op, &vops->list, link)
3127 op_trace(op);
3128 }
3129
vm_ops_setup_tile_args(struct xe_vm * vm,struct xe_vma_ops * vops)3130 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
3131 {
3132 struct xe_exec_queue *q = vops->q;
3133 struct xe_tile *tile;
3134 int number_tiles = 0;
3135 u8 id;
3136
3137 for_each_tile(tile, vm->xe, id) {
3138 if (vops->pt_update_ops[id].num_ops)
3139 ++number_tiles;
3140
3141 if (vops->pt_update_ops[id].q)
3142 continue;
3143
3144 if (q) {
3145 vops->pt_update_ops[id].q = q;
3146 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
3147 q = list_next_entry(q, multi_gt_list);
3148 } else {
3149 vops->pt_update_ops[id].q = vm->q[id];
3150 }
3151 }
3152
3153 return number_tiles;
3154 }
3155
ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3156 static struct dma_fence *ops_execute(struct xe_vm *vm,
3157 struct xe_vma_ops *vops)
3158 {
3159 struct xe_tile *tile;
3160 struct dma_fence *fence = NULL;
3161 struct dma_fence **fences = NULL;
3162 struct dma_fence_array *cf = NULL;
3163 int number_tiles = 0, current_fence = 0, n_fence = 0, err, i;
3164 u8 id;
3165
3166 number_tiles = vm_ops_setup_tile_args(vm, vops);
3167 if (number_tiles == 0)
3168 return ERR_PTR(-ENODATA);
3169
3170 for_each_tile(tile, vm->xe, id) {
3171 ++n_fence;
3172
3173 if (!(vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT))
3174 for_each_tlb_inval(i)
3175 ++n_fence;
3176 }
3177
3178 fences = kmalloc_objs(*fences, n_fence);
3179 if (!fences) {
3180 fence = ERR_PTR(-ENOMEM);
3181 goto err_trace;
3182 }
3183
3184 cf = dma_fence_array_alloc(n_fence);
3185 if (!cf) {
3186 fence = ERR_PTR(-ENOMEM);
3187 goto err_out;
3188 }
3189
3190 for_each_tile(tile, vm->xe, id) {
3191 if (!vops->pt_update_ops[id].num_ops)
3192 continue;
3193
3194 err = xe_pt_update_ops_prepare(tile, vops);
3195 if (err) {
3196 fence = ERR_PTR(err);
3197 goto err_out;
3198 }
3199 }
3200
3201 trace_xe_vm_ops_execute(vops);
3202
3203 for_each_tile(tile, vm->xe, id) {
3204 struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q;
3205
3206 fence = NULL;
3207 if (!vops->pt_update_ops[id].num_ops)
3208 goto collect_fences;
3209
3210 fence = xe_pt_update_ops_run(tile, vops);
3211 if (IS_ERR(fence))
3212 goto err_out;
3213
3214 collect_fences:
3215 fences[current_fence++] = fence ?: dma_fence_get_stub();
3216 if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT)
3217 continue;
3218
3219 xe_migrate_job_lock(tile->migrate, q);
3220 for_each_tlb_inval(i)
3221 fences[current_fence++] =
3222 xe_exec_queue_tlb_inval_last_fence_get(q, vm, i);
3223 xe_migrate_job_unlock(tile->migrate, q);
3224 }
3225
3226 xe_assert(vm->xe, current_fence == n_fence);
3227 dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1),
3228 1, false);
3229 fence = &cf->base;
3230
3231 for_each_tile(tile, vm->xe, id) {
3232 if (!vops->pt_update_ops[id].num_ops)
3233 continue;
3234
3235 xe_pt_update_ops_fini(tile, vops);
3236 }
3237
3238 return fence;
3239
3240 err_out:
3241 for_each_tile(tile, vm->xe, id) {
3242 if (!vops->pt_update_ops[id].num_ops)
3243 continue;
3244
3245 xe_pt_update_ops_abort(tile, vops);
3246 }
3247 while (current_fence)
3248 dma_fence_put(fences[--current_fence]);
3249 kfree(fences);
3250 kfree(cf);
3251
3252 err_trace:
3253 trace_xe_vm_ops_fail(vm);
3254 return fence;
3255 }
3256
vma_add_ufence(struct xe_vma * vma,struct xe_user_fence * ufence)3257 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
3258 {
3259 if (vma->ufence)
3260 xe_sync_ufence_put(vma->ufence);
3261 vma->ufence = __xe_sync_ufence_get(ufence);
3262 }
3263
op_add_ufence(struct xe_vm * vm,struct xe_vma_op * op,struct xe_user_fence * ufence)3264 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
3265 struct xe_user_fence *ufence)
3266 {
3267 switch (op->base.op) {
3268 case DRM_GPUVA_OP_MAP:
3269 if (!xe_vma_is_cpu_addr_mirror(op->map.vma))
3270 vma_add_ufence(op->map.vma, ufence);
3271 break;
3272 case DRM_GPUVA_OP_REMAP:
3273 if (op->remap.prev)
3274 vma_add_ufence(op->remap.prev, ufence);
3275 if (op->remap.next)
3276 vma_add_ufence(op->remap.next, ufence);
3277 break;
3278 case DRM_GPUVA_OP_UNMAP:
3279 break;
3280 case DRM_GPUVA_OP_PREFETCH:
3281 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
3282 break;
3283 default:
3284 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
3285 }
3286 }
3287
vm_bind_ioctl_ops_fini(struct xe_vm * vm,struct xe_vma_ops * vops,struct dma_fence * fence)3288 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
3289 struct dma_fence *fence)
3290 {
3291 struct xe_user_fence *ufence;
3292 struct xe_vma_op *op;
3293 int i;
3294
3295 ufence = find_ufence_get(vops->syncs, vops->num_syncs);
3296 list_for_each_entry(op, &vops->list, link) {
3297 if (ufence)
3298 op_add_ufence(vm, op, ufence);
3299
3300 if (op->base.op == DRM_GPUVA_OP_UNMAP)
3301 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
3302 else if (op->base.op == DRM_GPUVA_OP_REMAP)
3303 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
3304 fence);
3305 }
3306 if (ufence)
3307 xe_sync_ufence_put(ufence);
3308 if (fence) {
3309 for (i = 0; i < vops->num_syncs; i++)
3310 xe_sync_entry_signal(vops->syncs + i, fence);
3311 }
3312 }
3313
vm_bind_ioctl_ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3314 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
3315 struct xe_vma_ops *vops)
3316 {
3317 struct xe_validation_ctx ctx;
3318 struct drm_exec exec;
3319 struct dma_fence *fence;
3320 int err = 0;
3321
3322 lockdep_assert_held_write(&vm->lock);
3323
3324 xe_validation_guard(&ctx, &vm->xe->val, &exec,
3325 ((struct xe_val_flags) {
3326 .interruptible = true,
3327 .exec_ignore_duplicates = true,
3328 }), err) {
3329 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
3330 drm_exec_retry_on_contention(&exec);
3331 xe_validation_retry_on_oom(&ctx, &err);
3332 if (err)
3333 return ERR_PTR(err);
3334
3335 xe_vm_set_validation_exec(vm, &exec);
3336 fence = ops_execute(vm, vops);
3337 xe_vm_set_validation_exec(vm, NULL);
3338 if (IS_ERR(fence)) {
3339 if (PTR_ERR(fence) == -ENODATA)
3340 vm_bind_ioctl_ops_fini(vm, vops, NULL);
3341 return fence;
3342 }
3343
3344 vm_bind_ioctl_ops_fini(vm, vops, fence);
3345 }
3346
3347 return err ? ERR_PTR(err) : fence;
3348 }
3349 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
3350
3351 #define SUPPORTED_FLAGS_STUB \
3352 (DRM_XE_VM_BIND_FLAG_READONLY | \
3353 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
3354 DRM_XE_VM_BIND_FLAG_NULL | \
3355 DRM_XE_VM_BIND_FLAG_DUMPABLE | \
3356 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \
3357 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \
3358 DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET)
3359
3360 #ifdef TEST_VM_OPS_ERROR
3361 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
3362 #else
3363 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB
3364 #endif
3365
3366 #define XE_64K_PAGE_MASK 0xffffull
3367 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
3368
vm_bind_ioctl_check_args(struct xe_device * xe,struct xe_vm * vm,struct drm_xe_vm_bind * args,struct drm_xe_vm_bind_op ** bind_ops)3369 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
3370 struct drm_xe_vm_bind *args,
3371 struct drm_xe_vm_bind_op **bind_ops)
3372 {
3373 int err;
3374 int i;
3375
3376 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
3377 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3378 return -EINVAL;
3379
3380 if (XE_IOCTL_DBG(xe, args->extensions))
3381 return -EINVAL;
3382
3383 if (XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS))
3384 return -EINVAL;
3385
3386 if (args->num_binds > 1) {
3387 u64 __user *bind_user =
3388 u64_to_user_ptr(args->vector_of_binds);
3389
3390 *bind_ops = kvmalloc_objs(struct drm_xe_vm_bind_op,
3391 args->num_binds,
3392 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3393 if (!*bind_ops)
3394 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
3395
3396 err = copy_from_user(*bind_ops, bind_user,
3397 sizeof(struct drm_xe_vm_bind_op) *
3398 args->num_binds);
3399 if (XE_IOCTL_DBG(xe, err)) {
3400 err = -EFAULT;
3401 goto free_bind_ops;
3402 }
3403 } else {
3404 *bind_ops = &args->bind;
3405 }
3406
3407 for (i = 0; i < args->num_binds; ++i) {
3408 u64 range = (*bind_ops)[i].range;
3409 u64 addr = (*bind_ops)[i].addr;
3410 u32 op = (*bind_ops)[i].op;
3411 u32 flags = (*bind_ops)[i].flags;
3412 u32 obj = (*bind_ops)[i].obj;
3413 u64 obj_offset = (*bind_ops)[i].obj_offset;
3414 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
3415 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
3416 bool is_cpu_addr_mirror = flags &
3417 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
3418 u16 pat_index = (*bind_ops)[i].pat_index;
3419 u16 coh_mode;
3420 bool comp_en;
3421
3422 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror &&
3423 (!xe_vm_in_fault_mode(vm) ||
3424 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) {
3425 err = -EINVAL;
3426 goto free_bind_ops;
3427 }
3428
3429 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
3430 err = -EINVAL;
3431 goto free_bind_ops;
3432 }
3433
3434 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
3435 (*bind_ops)[i].pat_index = pat_index;
3436 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3437 comp_en = xe_pat_index_get_comp_en(xe, pat_index);
3438 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
3439 err = -EINVAL;
3440 goto free_bind_ops;
3441 }
3442
3443 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
3444 err = -EINVAL;
3445 goto free_bind_ops;
3446 }
3447
3448 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
3449 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
3450 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) ||
3451 XE_IOCTL_DBG(xe, obj_offset && (is_null ||
3452 is_cpu_addr_mirror)) ||
3453 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
3454 (is_null || is_cpu_addr_mirror)) ||
3455 XE_IOCTL_DBG(xe, !obj &&
3456 op == DRM_XE_VM_BIND_OP_MAP &&
3457 !is_null && !is_cpu_addr_mirror) ||
3458 XE_IOCTL_DBG(xe, !obj &&
3459 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3460 XE_IOCTL_DBG(xe, addr &&
3461 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3462 XE_IOCTL_DBG(xe, range &&
3463 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3464 XE_IOCTL_DBG(xe, obj &&
3465 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3466 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3467 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3468 XE_IOCTL_DBG(xe, comp_en &&
3469 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3470 XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR &&
3471 !IS_ENABLED(CONFIG_DRM_GPUSVM)) ||
3472 XE_IOCTL_DBG(xe, obj &&
3473 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
3474 XE_IOCTL_DBG(xe, prefetch_region &&
3475 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
3476 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC &&
3477 /* Guard against undefined shift in BIT(prefetch_region) */
3478 (prefetch_region >= (sizeof(xe->info.mem_region_mask) * 8) ||
3479 !(BIT(prefetch_region) & xe->info.mem_region_mask)))) ||
3480 XE_IOCTL_DBG(xe, obj &&
3481 op == DRM_XE_VM_BIND_OP_UNMAP) ||
3482 XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) &&
3483 (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) {
3484 err = -EINVAL;
3485 goto free_bind_ops;
3486 }
3487
3488 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
3489 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
3490 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
3491 XE_IOCTL_DBG(xe, !range &&
3492 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
3493 err = -EINVAL;
3494 goto free_bind_ops;
3495 }
3496 }
3497
3498 return 0;
3499
3500 free_bind_ops:
3501 if (args->num_binds > 1)
3502 kvfree(*bind_ops);
3503 *bind_ops = NULL;
3504 return err;
3505 }
3506
vm_bind_ioctl_signal_fences(struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,int num_syncs)3507 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
3508 struct xe_exec_queue *q,
3509 struct xe_sync_entry *syncs,
3510 int num_syncs)
3511 {
3512 struct dma_fence *fence = NULL;
3513 int i, err = 0;
3514
3515 if (num_syncs) {
3516 fence = xe_sync_in_fence_get(syncs, num_syncs,
3517 to_wait_exec_queue(vm, q), vm);
3518 if (IS_ERR(fence))
3519 return PTR_ERR(fence);
3520
3521 for (i = 0; i < num_syncs; i++)
3522 xe_sync_entry_signal(&syncs[i], fence);
3523 }
3524
3525 dma_fence_put(fence);
3526
3527 return err;
3528 }
3529
xe_vma_ops_init(struct xe_vma_ops * vops,struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,u32 num_syncs)3530 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
3531 struct xe_exec_queue *q,
3532 struct xe_sync_entry *syncs, u32 num_syncs)
3533 {
3534 memset(vops, 0, sizeof(*vops));
3535 INIT_LIST_HEAD(&vops->list);
3536 vops->vm = vm;
3537 vops->q = q;
3538 vops->syncs = syncs;
3539 vops->num_syncs = num_syncs;
3540 vops->flags = 0;
3541 }
3542
xe_vm_bind_ioctl_validate_bo(struct xe_device * xe,struct xe_bo * bo,u64 addr,u64 range,u64 obj_offset,u16 pat_index,u32 op,u32 bind_flags)3543 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
3544 u64 addr, u64 range, u64 obj_offset,
3545 u16 pat_index, u32 op, u32 bind_flags)
3546 {
3547 u16 coh_mode;
3548 bool comp_en;
3549
3550 if (XE_IOCTL_DBG(xe, (bo->flags & XE_BO_FLAG_NO_COMPRESSION) &&
3551 xe_pat_index_get_comp_en(xe, pat_index)))
3552 return -EINVAL;
3553
3554 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) ||
3555 XE_IOCTL_DBG(xe, obj_offset >
3556 xe_bo_size(bo) - range)) {
3557 return -EINVAL;
3558 }
3559
3560 /*
3561 * Some platforms require 64k VM_BIND alignment,
3562 * specifically those with XE_VRAM_FLAGS_NEED64K.
3563 *
3564 * Other platforms may have BO's set to 64k physical placement,
3565 * but can be mapped at 4k offsets anyway. This check is only
3566 * there for the former case.
3567 */
3568 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
3569 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
3570 if (XE_IOCTL_DBG(xe, obj_offset &
3571 XE_64K_PAGE_MASK) ||
3572 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
3573 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3574 return -EINVAL;
3575 }
3576 }
3577
3578 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3579 if (bo->cpu_caching) {
3580 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3581 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
3582 return -EINVAL;
3583 }
3584 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
3585 /*
3586 * Imported dma-buf from a different device should
3587 * require 1way or 2way coherency since we don't know
3588 * how it was mapped on the CPU. Just assume is it
3589 * potentially cached on CPU side.
3590 */
3591 return -EINVAL;
3592 }
3593
3594 /*
3595 * Ensures that imported buffer objects (dma-bufs) are not mapped
3596 * with a PAT index that enables compression.
3597 */
3598 comp_en = xe_pat_index_get_comp_en(xe, pat_index);
3599 if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && comp_en))
3600 return -EINVAL;
3601
3602 /* If a BO is protected it can only be mapped if the key is still valid */
3603 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) &&
3604 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL)
3605 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0))
3606 return -ENOEXEC;
3607
3608 return 0;
3609 }
3610
xe_vm_bind_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3611 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3612 {
3613 struct xe_device *xe = to_xe_device(dev);
3614 struct xe_file *xef = to_xe_file(file);
3615 struct drm_xe_vm_bind *args = data;
3616 struct drm_xe_sync __user *syncs_user;
3617 struct xe_bo **bos = NULL;
3618 struct drm_gpuva_ops **ops = NULL;
3619 struct xe_vm *vm;
3620 struct xe_exec_queue *q = NULL;
3621 u32 num_syncs, num_ufence = 0;
3622 struct xe_sync_entry *syncs = NULL;
3623 struct drm_xe_vm_bind_op *bind_ops = NULL;
3624 struct xe_vma_ops vops;
3625 struct dma_fence *fence;
3626 int err;
3627 int i;
3628
3629 vm = xe_vm_lookup(xef, args->vm_id);
3630 if (XE_IOCTL_DBG(xe, !vm))
3631 return -EINVAL;
3632
3633 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops);
3634 if (err)
3635 goto put_vm;
3636
3637 if (args->exec_queue_id) {
3638 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
3639 if (XE_IOCTL_DBG(xe, !q)) {
3640 err = -ENOENT;
3641 goto free_bind_ops;
3642 }
3643
3644 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3645 err = -EINVAL;
3646 goto put_exec_queue;
3647 }
3648 }
3649
3650 if (XE_IOCTL_DBG(xe, q && vm != q->user_vm)) {
3651 err = -EINVAL;
3652 goto put_exec_queue;
3653 }
3654
3655 /* Ensure all UNMAPs visible */
3656 xe_svm_flush(vm);
3657
3658 err = down_write_killable(&vm->lock);
3659 if (err)
3660 goto put_exec_queue;
3661
3662 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3663 err = -ENOENT;
3664 goto release_vm_lock;
3665 }
3666
3667 for (i = 0; i < args->num_binds; ++i) {
3668 u64 range = bind_ops[i].range;
3669 u64 addr = bind_ops[i].addr;
3670
3671 if (XE_IOCTL_DBG(xe, range > vm->size) ||
3672 XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3673 err = -EINVAL;
3674 goto release_vm_lock;
3675 }
3676 }
3677
3678 if (args->num_binds) {
3679 bos = kvzalloc_objs(*bos, args->num_binds,
3680 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3681 if (!bos) {
3682 err = -ENOMEM;
3683 goto release_vm_lock;
3684 }
3685
3686 ops = kvzalloc_objs(*ops, args->num_binds,
3687 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3688 if (!ops) {
3689 err = -ENOMEM;
3690 goto free_bos;
3691 }
3692 }
3693
3694 for (i = 0; i < args->num_binds; ++i) {
3695 struct drm_gem_object *gem_obj;
3696 u64 range = bind_ops[i].range;
3697 u64 addr = bind_ops[i].addr;
3698 u32 obj = bind_ops[i].obj;
3699 u64 obj_offset = bind_ops[i].obj_offset;
3700 u16 pat_index = bind_ops[i].pat_index;
3701 u32 op = bind_ops[i].op;
3702 u32 bind_flags = bind_ops[i].flags;
3703
3704 if (!obj)
3705 continue;
3706
3707 gem_obj = drm_gem_object_lookup(file, obj);
3708 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3709 err = -ENOENT;
3710 goto put_obj;
3711 }
3712 bos[i] = gem_to_xe_bo(gem_obj);
3713
3714 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
3715 obj_offset, pat_index, op,
3716 bind_flags);
3717 if (err)
3718 goto put_obj;
3719 }
3720
3721 if (args->num_syncs) {
3722 syncs = kzalloc_objs(*syncs, args->num_syncs);
3723 if (!syncs) {
3724 err = -ENOMEM;
3725 goto put_obj;
3726 }
3727 }
3728
3729 syncs_user = u64_to_user_ptr(args->syncs);
3730 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3731 struct xe_exec_queue *__q = q ?: vm->q[0];
3732
3733 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3734 &syncs_user[num_syncs],
3735 __q->ufence_syncobj,
3736 ++__q->ufence_timeline_value,
3737 (xe_vm_in_lr_mode(vm) ?
3738 SYNC_PARSE_FLAG_LR_MODE : 0) |
3739 (!args->num_binds ?
3740 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3741 if (err)
3742 goto free_syncs;
3743
3744 if (xe_sync_is_ufence(&syncs[num_syncs]))
3745 num_ufence++;
3746 }
3747
3748 if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3749 err = -EINVAL;
3750 goto free_syncs;
3751 }
3752
3753 if (!args->num_binds) {
3754 err = -ENODATA;
3755 goto free_syncs;
3756 }
3757
3758 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
3759 if (args->num_binds > 1)
3760 vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS;
3761 for (i = 0; i < args->num_binds; ++i) {
3762 u64 range = bind_ops[i].range;
3763 u64 addr = bind_ops[i].addr;
3764 u32 op = bind_ops[i].op;
3765 u32 flags = bind_ops[i].flags;
3766 u64 obj_offset = bind_ops[i].obj_offset;
3767 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3768 u16 pat_index = bind_ops[i].pat_index;
3769
3770 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset,
3771 addr, range, op, flags,
3772 prefetch_region, pat_index);
3773 if (IS_ERR(ops[i])) {
3774 err = PTR_ERR(ops[i]);
3775 ops[i] = NULL;
3776 goto unwind_ops;
3777 }
3778
3779 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
3780 if (err)
3781 goto unwind_ops;
3782
3783 #ifdef TEST_VM_OPS_ERROR
3784 if (flags & FORCE_OP_ERROR) {
3785 vops.inject_error = true;
3786 vm->xe->vm_inject_error_position =
3787 (vm->xe->vm_inject_error_position + 1) %
3788 FORCE_OP_ERROR_COUNT;
3789 }
3790 #endif
3791 }
3792
3793 /* Nothing to do */
3794 if (list_empty(&vops.list)) {
3795 err = -ENODATA;
3796 goto unwind_ops;
3797 }
3798
3799 err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
3800 if (err)
3801 goto unwind_ops;
3802
3803 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops);
3804 if (err)
3805 goto unwind_ops;
3806
3807 fence = vm_bind_ioctl_ops_execute(vm, &vops);
3808 if (IS_ERR(fence))
3809 err = PTR_ERR(fence);
3810 else
3811 dma_fence_put(fence);
3812
3813 unwind_ops:
3814 if (err && err != -ENODATA)
3815 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3816 xe_vma_ops_fini(&vops);
3817 for (i = args->num_binds - 1; i >= 0; --i)
3818 if (ops[i])
3819 drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
3820 free_syncs:
3821 if (err == -ENODATA)
3822 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3823 while (num_syncs--)
3824 xe_sync_entry_cleanup(&syncs[num_syncs]);
3825
3826 kfree(syncs);
3827 put_obj:
3828 for (i = 0; i < args->num_binds; ++i)
3829 xe_bo_put(bos[i]);
3830
3831 kvfree(ops);
3832 free_bos:
3833 kvfree(bos);
3834 release_vm_lock:
3835 up_write(&vm->lock);
3836 put_exec_queue:
3837 if (q)
3838 xe_exec_queue_put(q);
3839 free_bind_ops:
3840 if (args->num_binds > 1)
3841 kvfree(bind_ops);
3842 put_vm:
3843 xe_vm_put(vm);
3844 return err;
3845 }
3846
3847 /**
3848 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM
3849 * @vm: VM to bind the BO to
3850 * @bo: BO to bind
3851 * @q: exec queue to use for the bind (optional)
3852 * @addr: address at which to bind the BO
3853 * @cache_lvl: PAT cache level to use
3854 *
3855 * Execute a VM bind map operation on a kernel-owned BO to bind it into a
3856 * kernel-owned VM.
3857 *
3858 * Returns a dma_fence to track the binding completion if the job to do so was
3859 * successfully submitted, an error pointer otherwise.
3860 */
xe_vm_bind_kernel_bo(struct xe_vm * vm,struct xe_bo * bo,struct xe_exec_queue * q,u64 addr,enum xe_cache_level cache_lvl)3861 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
3862 struct xe_exec_queue *q, u64 addr,
3863 enum xe_cache_level cache_lvl)
3864 {
3865 struct xe_vma_ops vops;
3866 struct drm_gpuva_ops *ops = NULL;
3867 struct dma_fence *fence;
3868 int err;
3869
3870 xe_bo_get(bo);
3871 xe_vm_get(vm);
3872 if (q)
3873 xe_exec_queue_get(q);
3874
3875 down_write(&vm->lock);
3876
3877 xe_vma_ops_init(&vops, vm, q, NULL, 0);
3878
3879 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo),
3880 DRM_XE_VM_BIND_OP_MAP, 0, 0,
3881 vm->xe->pat.idx[cache_lvl]);
3882 if (IS_ERR(ops)) {
3883 err = PTR_ERR(ops);
3884 goto release_vm_lock;
3885 }
3886
3887 err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
3888 if (err)
3889 goto release_vm_lock;
3890
3891 xe_assert(vm->xe, !list_empty(&vops.list));
3892
3893 err = xe_vma_ops_alloc(&vops, false);
3894 if (err)
3895 goto unwind_ops;
3896
3897 fence = vm_bind_ioctl_ops_execute(vm, &vops);
3898 if (IS_ERR(fence))
3899 err = PTR_ERR(fence);
3900
3901 unwind_ops:
3902 if (err && err != -ENODATA)
3903 vm_bind_ioctl_ops_unwind(vm, &ops, 1);
3904
3905 xe_vma_ops_fini(&vops);
3906 drm_gpuva_ops_free(&vm->gpuvm, ops);
3907
3908 release_vm_lock:
3909 up_write(&vm->lock);
3910
3911 if (q)
3912 xe_exec_queue_put(q);
3913 xe_vm_put(vm);
3914 xe_bo_put(bo);
3915
3916 if (err)
3917 fence = ERR_PTR(err);
3918
3919 return fence;
3920 }
3921
3922 /**
3923 * xe_vm_lock() - Lock the vm's dma_resv object
3924 * @vm: The struct xe_vm whose lock is to be locked
3925 * @intr: Whether to perform any wait interruptible
3926 *
3927 * Return: 0 on success, -EINTR if @intr is true and the wait for a
3928 * contended lock was interrupted. If @intr is false, the function
3929 * always returns 0.
3930 */
xe_vm_lock(struct xe_vm * vm,bool intr)3931 int xe_vm_lock(struct xe_vm *vm, bool intr)
3932 {
3933 int ret;
3934
3935 if (intr)
3936 ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3937 else
3938 ret = dma_resv_lock(xe_vm_resv(vm), NULL);
3939
3940 return ret;
3941 }
3942
3943 /**
3944 * xe_vm_unlock() - Unlock the vm's dma_resv object
3945 * @vm: The struct xe_vm whose lock is to be released.
3946 *
3947 * Unlock a buffer object lock that was locked by xe_vm_lock().
3948 */
xe_vm_unlock(struct xe_vm * vm)3949 void xe_vm_unlock(struct xe_vm *vm)
3950 {
3951 dma_resv_unlock(xe_vm_resv(vm));
3952 }
3953
3954 /**
3955 * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an
3956 * address range
3957 * @vm: The VM
3958 * @start: start address
3959 * @end: end address
3960 * @tile_mask: mask for which gt's issue tlb invalidation
3961 *
3962 * Issue a range based TLB invalidation for gt's in tilemask
3963 *
3964 * Returns 0 for success, negative error code otherwise.
3965 */
xe_vm_range_tilemask_tlb_inval(struct xe_vm * vm,u64 start,u64 end,u8 tile_mask)3966 int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
3967 u64 end, u8 tile_mask)
3968 {
3969 struct xe_tlb_inval_fence
3970 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
3971 struct xe_tile *tile;
3972 u32 fence_id = 0;
3973 u8 id;
3974 int err;
3975
3976 if (!tile_mask)
3977 return 0;
3978
3979 for_each_tile(tile, vm->xe, id) {
3980 if (!(tile_mask & BIT(id)))
3981 continue;
3982
3983 xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval,
3984 &fence[fence_id], true);
3985
3986 err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval,
3987 &fence[fence_id], start, end,
3988 vm->usm.asid, NULL);
3989 if (err)
3990 goto wait;
3991 ++fence_id;
3992
3993 if (!tile->media_gt)
3994 continue;
3995
3996 xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval,
3997 &fence[fence_id], true);
3998
3999 err = xe_tlb_inval_range(&tile->media_gt->tlb_inval,
4000 &fence[fence_id], start, end,
4001 vm->usm.asid, NULL);
4002 if (err)
4003 goto wait;
4004 ++fence_id;
4005 }
4006
4007 wait:
4008 for (id = 0; id < fence_id; ++id)
4009 xe_tlb_inval_fence_wait(&fence[id]);
4010
4011 return err;
4012 }
4013
4014 /**
4015 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
4016 * @vma: VMA to invalidate
4017 *
4018 * Walks a list of page tables leaves which it memset the entries owned by this
4019 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
4020 * complete.
4021 *
4022 * Returns 0 for success, negative error code otherwise.
4023 */
xe_vm_invalidate_vma(struct xe_vma * vma)4024 int xe_vm_invalidate_vma(struct xe_vma *vma)
4025 {
4026 struct xe_device *xe = xe_vma_vm(vma)->xe;
4027 struct xe_vm *vm = xe_vma_vm(vma);
4028 struct xe_tile *tile;
4029 u8 tile_mask = 0;
4030 int ret = 0;
4031 u8 id;
4032
4033 xe_assert(xe, !xe_vma_is_null(vma));
4034 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma));
4035 trace_xe_vma_invalidate(vma);
4036
4037 vm_dbg(&vm->xe->drm,
4038 "INVALIDATE: addr=0x%016llx, range=0x%016llx",
4039 xe_vma_start(vma), xe_vma_size(vma));
4040
4041 /*
4042 * Check that we don't race with page-table updates, tile_invalidated
4043 * update is safe
4044 */
4045 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
4046 if (xe_vma_is_userptr(vma)) {
4047 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) ||
4048 (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) &&
4049 lockdep_is_held(&xe_vm_resv(vm)->lock.base)));
4050
4051 WARN_ON_ONCE(!mmu_interval_check_retry
4052 (&to_userptr_vma(vma)->userptr.notifier,
4053 to_userptr_vma(vma)->userptr.pages.notifier_seq));
4054 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm),
4055 DMA_RESV_USAGE_BOOKKEEP));
4056
4057 } else {
4058 xe_bo_assert_held(xe_vma_bo(vma));
4059 }
4060 }
4061
4062 for_each_tile(tile, xe, id)
4063 if (xe_pt_zap_ptes(tile, vma))
4064 tile_mask |= BIT(id);
4065
4066 xe_device_wmb(xe);
4067
4068 ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma),
4069 xe_vma_end(vma), tile_mask);
4070
4071 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
4072 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask);
4073
4074 return ret;
4075 }
4076
xe_vm_validate_protected(struct xe_vm * vm)4077 int xe_vm_validate_protected(struct xe_vm *vm)
4078 {
4079 struct drm_gpuva *gpuva;
4080 int err = 0;
4081
4082 if (!vm)
4083 return -ENODEV;
4084
4085 mutex_lock(&vm->snap_mutex);
4086
4087 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4088 struct xe_vma *vma = gpuva_to_vma(gpuva);
4089 struct xe_bo *bo = vma->gpuva.gem.obj ?
4090 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4091
4092 if (!bo)
4093 continue;
4094
4095 if (xe_bo_is_protected(bo)) {
4096 err = xe_pxp_bo_key_check(vm->xe->pxp, bo);
4097 if (err)
4098 break;
4099 }
4100 }
4101
4102 mutex_unlock(&vm->snap_mutex);
4103 return err;
4104 }
4105
4106 struct xe_vm_snapshot {
4107 int uapi_flags;
4108 unsigned long num_snaps;
4109 struct {
4110 u64 ofs, bo_ofs;
4111 unsigned long len;
4112 #define XE_VM_SNAP_FLAG_USERPTR BIT(0)
4113 #define XE_VM_SNAP_FLAG_READ_ONLY BIT(1)
4114 #define XE_VM_SNAP_FLAG_IS_NULL BIT(2)
4115 unsigned long flags;
4116 int uapi_mem_region;
4117 int pat_index;
4118 int cpu_caching;
4119 struct xe_bo *bo;
4120 void *data;
4121 struct mm_struct *mm;
4122 } snap[];
4123 };
4124
xe_vm_snapshot_capture(struct xe_vm * vm)4125 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
4126 {
4127 unsigned long num_snaps = 0, i;
4128 struct xe_vm_snapshot *snap = NULL;
4129 struct drm_gpuva *gpuva;
4130
4131 if (!vm)
4132 return NULL;
4133
4134 mutex_lock(&vm->snap_mutex);
4135 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4136 if (gpuva->flags & XE_VMA_DUMPABLE)
4137 num_snaps++;
4138 }
4139
4140 if (num_snaps)
4141 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
4142 if (!snap) {
4143 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
4144 goto out_unlock;
4145 }
4146
4147 if (vm->flags & XE_VM_FLAG_FAULT_MODE)
4148 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_FAULT_MODE;
4149 if (vm->flags & XE_VM_FLAG_LR_MODE)
4150 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_LR_MODE;
4151 if (vm->flags & XE_VM_FLAG_SCRATCH_PAGE)
4152 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
4153
4154 snap->num_snaps = num_snaps;
4155 i = 0;
4156 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4157 struct xe_vma *vma = gpuva_to_vma(gpuva);
4158 struct xe_bo *bo = vma->gpuva.gem.obj ?
4159 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4160
4161 if (!(gpuva->flags & XE_VMA_DUMPABLE))
4162 continue;
4163
4164 snap->snap[i].ofs = xe_vma_start(vma);
4165 snap->snap[i].len = xe_vma_size(vma);
4166 snap->snap[i].flags = xe_vma_read_only(vma) ?
4167 XE_VM_SNAP_FLAG_READ_ONLY : 0;
4168 snap->snap[i].pat_index = vma->attr.pat_index;
4169 if (bo) {
4170 snap->snap[i].cpu_caching = bo->cpu_caching;
4171 snap->snap[i].bo = xe_bo_get(bo);
4172 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
4173 switch (bo->ttm.resource->mem_type) {
4174 case XE_PL_SYSTEM:
4175 case XE_PL_TT:
4176 snap->snap[i].uapi_mem_region = 0;
4177 break;
4178 case XE_PL_VRAM0:
4179 snap->snap[i].uapi_mem_region = 1;
4180 break;
4181 case XE_PL_VRAM1:
4182 snap->snap[i].uapi_mem_region = 2;
4183 break;
4184 }
4185 } else if (xe_vma_is_userptr(vma)) {
4186 struct mm_struct *mm =
4187 to_userptr_vma(vma)->userptr.notifier.mm;
4188
4189 if (mmget_not_zero(mm))
4190 snap->snap[i].mm = mm;
4191 else
4192 snap->snap[i].data = ERR_PTR(-EFAULT);
4193
4194 snap->snap[i].bo_ofs = xe_vma_userptr(vma);
4195 snap->snap[i].flags |= XE_VM_SNAP_FLAG_USERPTR;
4196 snap->snap[i].uapi_mem_region = 0;
4197 } else if (xe_vma_is_null(vma)) {
4198 snap->snap[i].flags |= XE_VM_SNAP_FLAG_IS_NULL;
4199 snap->snap[i].uapi_mem_region = -1;
4200 } else {
4201 snap->snap[i].data = ERR_PTR(-ENOENT);
4202 snap->snap[i].uapi_mem_region = -1;
4203 }
4204 i++;
4205 }
4206
4207 out_unlock:
4208 mutex_unlock(&vm->snap_mutex);
4209 return snap;
4210 }
4211
xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot * snap)4212 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
4213 {
4214 if (IS_ERR_OR_NULL(snap))
4215 return;
4216
4217 for (int i = 0; i < snap->num_snaps; i++) {
4218 struct xe_bo *bo = snap->snap[i].bo;
4219 int err;
4220
4221 if (IS_ERR(snap->snap[i].data) ||
4222 snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL)
4223 continue;
4224
4225 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
4226 if (!snap->snap[i].data) {
4227 snap->snap[i].data = ERR_PTR(-ENOMEM);
4228 goto cleanup_bo;
4229 }
4230
4231 if (bo) {
4232 err = xe_bo_read(bo, snap->snap[i].bo_ofs,
4233 snap->snap[i].data, snap->snap[i].len);
4234 } else {
4235 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
4236
4237 kthread_use_mm(snap->snap[i].mm);
4238 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
4239 err = 0;
4240 else
4241 err = -EFAULT;
4242 kthread_unuse_mm(snap->snap[i].mm);
4243
4244 mmput(snap->snap[i].mm);
4245 snap->snap[i].mm = NULL;
4246 }
4247
4248 if (err) {
4249 kvfree(snap->snap[i].data);
4250 snap->snap[i].data = ERR_PTR(err);
4251 }
4252
4253 cleanup_bo:
4254 xe_bo_put(bo);
4255 snap->snap[i].bo = NULL;
4256 }
4257 }
4258
xe_vm_snapshot_print(struct xe_vm_snapshot * snap,struct drm_printer * p)4259 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
4260 {
4261 unsigned long i, j;
4262
4263 if (IS_ERR_OR_NULL(snap)) {
4264 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
4265 return;
4266 }
4267
4268 drm_printf(p, "VM.uapi_flags: 0x%x\n", snap->uapi_flags);
4269 for (i = 0; i < snap->num_snaps; i++) {
4270 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
4271
4272 drm_printf(p, "[%llx].properties: %s|%s|mem_region=0x%lx|pat_index=%d|cpu_caching=%d\n",
4273 snap->snap[i].ofs,
4274 snap->snap[i].flags & XE_VM_SNAP_FLAG_READ_ONLY ?
4275 "read_only" : "read_write",
4276 snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL ?
4277 "null_sparse" :
4278 snap->snap[i].flags & XE_VM_SNAP_FLAG_USERPTR ?
4279 "userptr" : "bo",
4280 snap->snap[i].uapi_mem_region == -1 ? 0 :
4281 BIT(snap->snap[i].uapi_mem_region),
4282 snap->snap[i].pat_index,
4283 snap->snap[i].cpu_caching);
4284
4285 if (IS_ERR(snap->snap[i].data)) {
4286 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
4287 PTR_ERR(snap->snap[i].data));
4288 continue;
4289 }
4290
4291 if (snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL)
4292 continue;
4293
4294 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
4295
4296 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
4297 u32 *val = snap->snap[i].data + j;
4298 char dumped[ASCII85_BUFSZ];
4299
4300 drm_puts(p, ascii85_encode(*val, dumped));
4301 }
4302
4303 drm_puts(p, "\n");
4304
4305 if (drm_coredump_printer_is_full(p))
4306 return;
4307 }
4308 }
4309
xe_vm_snapshot_free(struct xe_vm_snapshot * snap)4310 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
4311 {
4312 unsigned long i;
4313
4314 if (IS_ERR_OR_NULL(snap))
4315 return;
4316
4317 for (i = 0; i < snap->num_snaps; i++) {
4318 if (!IS_ERR(snap->snap[i].data))
4319 kvfree(snap->snap[i].data);
4320 xe_bo_put(snap->snap[i].bo);
4321 if (snap->snap[i].mm)
4322 mmput(snap->snap[i].mm);
4323 }
4324 kvfree(snap);
4325 }
4326
4327 /**
4328 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations
4329 * @xe: Pointer to the Xe device structure
4330 * @vma: Pointer to the virtual memory area (VMA) structure
4331 * @is_atomic: In pagefault path and atomic operation
4332 *
4333 * This function determines whether the given VMA needs to be migrated to
4334 * VRAM in order to do atomic GPU operation.
4335 *
4336 * Return:
4337 * 1 - Migration to VRAM is required
4338 * 0 - Migration is not required
4339 * -EACCES - Invalid access for atomic memory attr
4340 *
4341 */
xe_vma_need_vram_for_atomic(struct xe_device * xe,struct xe_vma * vma,bool is_atomic)4342 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic)
4343 {
4344 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access :
4345 vma->attr.atomic_access;
4346
4347 if (!IS_DGFX(xe) || !is_atomic)
4348 return false;
4349
4350 /*
4351 * NOTE: The checks implemented here are platform-specific. For
4352 * instance, on a device supporting CXL atomics, these would ideally
4353 * work universally without additional handling.
4354 */
4355 switch (atomic_access) {
4356 case DRM_XE_ATOMIC_DEVICE:
4357 return !xe->info.has_device_atomics_on_smem;
4358
4359 case DRM_XE_ATOMIC_CPU:
4360 return -EACCES;
4361
4362 case DRM_XE_ATOMIC_UNDEFINED:
4363 case DRM_XE_ATOMIC_GLOBAL:
4364 default:
4365 return 1;
4366 }
4367 }
4368
xe_vm_alloc_vma(struct xe_vm * vm,struct drm_gpuvm_map_req * map_req,bool is_madvise)4369 static int xe_vm_alloc_vma(struct xe_vm *vm,
4370 struct drm_gpuvm_map_req *map_req,
4371 bool is_madvise)
4372 {
4373 struct xe_vma_ops vops;
4374 struct drm_gpuva_ops *ops = NULL;
4375 struct drm_gpuva_op *__op;
4376 unsigned int vma_flags = 0;
4377 bool remap_op = false;
4378 struct xe_vma_mem_attr tmp_attr = {};
4379 u16 default_pat;
4380 int err;
4381
4382 lockdep_assert_held_write(&vm->lock);
4383
4384 if (is_madvise)
4385 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req);
4386 else
4387 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req);
4388
4389 if (IS_ERR(ops))
4390 return PTR_ERR(ops);
4391
4392 if (list_empty(&ops->list)) {
4393 err = 0;
4394 goto free_ops;
4395 }
4396
4397 drm_gpuva_for_each_op(__op, ops) {
4398 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
4399 struct xe_vma *vma = NULL;
4400
4401 if (!is_madvise) {
4402 if (__op->op == DRM_GPUVA_OP_UNMAP) {
4403 vma = gpuva_to_vma(op->base.unmap.va);
4404 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma));
4405 default_pat = vma->attr.default_pat_index;
4406 vma_flags = vma->gpuva.flags;
4407 }
4408
4409 if (__op->op == DRM_GPUVA_OP_REMAP) {
4410 vma = gpuva_to_vma(op->base.remap.unmap->va);
4411 default_pat = vma->attr.default_pat_index;
4412 vma_flags = vma->gpuva.flags;
4413 }
4414
4415 if (__op->op == DRM_GPUVA_OP_MAP) {
4416 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK;
4417 op->map.pat_index = default_pat;
4418 }
4419 } else {
4420 if (__op->op == DRM_GPUVA_OP_REMAP) {
4421 vma = gpuva_to_vma(op->base.remap.unmap->va);
4422 xe_assert(vm->xe, !remap_op);
4423 xe_assert(vm->xe, xe_vma_has_no_bo(vma));
4424 remap_op = true;
4425 vma_flags = vma->gpuva.flags;
4426 }
4427
4428 if (__op->op == DRM_GPUVA_OP_MAP) {
4429 xe_assert(vm->xe, remap_op);
4430 remap_op = false;
4431 /*
4432 * In case of madvise ops DRM_GPUVA_OP_MAP is
4433 * always after DRM_GPUVA_OP_REMAP, so ensure
4434 * to propagate the flags from the vma we're
4435 * unmapping.
4436 */
4437 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK;
4438 }
4439 }
4440 print_op(vm->xe, __op);
4441 }
4442
4443 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
4444
4445 if (is_madvise)
4446 vops.flags |= XE_VMA_OPS_FLAG_MADVISE;
4447 else
4448 vops.flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP;
4449
4450 err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
4451 if (err)
4452 goto unwind_ops;
4453
4454 xe_vm_lock(vm, false);
4455
4456 drm_gpuva_for_each_op(__op, ops) {
4457 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
4458 struct xe_vma *vma;
4459
4460 if (__op->op == DRM_GPUVA_OP_UNMAP) {
4461 vma = gpuva_to_vma(op->base.unmap.va);
4462 /* There should be no unmap for madvise */
4463 if (is_madvise)
4464 XE_WARN_ON("UNEXPECTED UNMAP");
4465
4466 xe_vma_destroy(vma, NULL);
4467 } else if (__op->op == DRM_GPUVA_OP_REMAP) {
4468 vma = gpuva_to_vma(op->base.remap.unmap->va);
4469 /* In case of madvise ops Store attributes for REMAP UNMAPPED
4470 * VMA, so they can be assigned to newly MAP created vma.
4471 */
4472 if (is_madvise)
4473 xe_vma_mem_attr_copy(&tmp_attr, &vma->attr);
4474
4475 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL);
4476 } else if (__op->op == DRM_GPUVA_OP_MAP) {
4477 vma = op->map.vma;
4478 /* In case of madvise call, MAP will always be followed by REMAP.
4479 * Therefore temp_attr will always have sane values, making it safe to
4480 * copy them to new vma.
4481 */
4482 if (is_madvise)
4483 xe_vma_mem_attr_copy(&vma->attr, &tmp_attr);
4484 }
4485 }
4486
4487 xe_vm_unlock(vm);
4488 drm_gpuva_ops_free(&vm->gpuvm, ops);
4489 xe_vma_mem_attr_fini(&tmp_attr);
4490 return 0;
4491
4492 unwind_ops:
4493 vm_bind_ioctl_ops_unwind(vm, &ops, 1);
4494 free_ops:
4495 drm_gpuva_ops_free(&vm->gpuvm, ops);
4496 return err;
4497 }
4498
4499 /**
4500 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops
4501 * @vm: Pointer to the xe_vm structure
4502 * @start: Starting input address
4503 * @range: Size of the input range
4504 *
4505 * This function splits existing vma to create new vma for user provided input range
4506 *
4507 * Return: 0 if success
4508 */
xe_vm_alloc_madvise_vma(struct xe_vm * vm,uint64_t start,uint64_t range)4509 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4510 {
4511 struct drm_gpuvm_map_req map_req = {
4512 .map.va.addr = start,
4513 .map.va.range = range,
4514 };
4515
4516 lockdep_assert_held_write(&vm->lock);
4517
4518 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range);
4519
4520 return xe_vm_alloc_vma(vm, &map_req, true);
4521 }
4522
is_cpu_addr_vma_with_default_attr(struct xe_vma * vma)4523 static bool is_cpu_addr_vma_with_default_attr(struct xe_vma *vma)
4524 {
4525 return vma && xe_vma_is_cpu_addr_mirror(vma) &&
4526 xe_vma_has_default_mem_attrs(vma);
4527 }
4528
4529 /**
4530 * xe_vm_find_cpu_addr_mirror_vma_range - Extend a VMA range to include adjacent CPU-mirrored VMAs
4531 * @vm: VM to search within
4532 * @start: Input/output pointer to the starting address of the range
4533 * @end: Input/output pointer to the end address of the range
4534 *
4535 * Given a range defined by @start and @range, this function checks the VMAs
4536 * immediately before and after the range. If those neighboring VMAs are
4537 * CPU-address-mirrored and have default memory attributes, the function
4538 * updates @start and @range to include them. This extended range can then
4539 * be used for merging or other operations that require a unified VMA.
4540 *
4541 * The function does not perform the merge itself; it only computes the
4542 * mergeable boundaries.
4543 */
xe_vm_find_cpu_addr_mirror_vma_range(struct xe_vm * vm,u64 * start,u64 * end)4544 void xe_vm_find_cpu_addr_mirror_vma_range(struct xe_vm *vm, u64 *start, u64 *end)
4545 {
4546 struct xe_vma *prev, *next;
4547
4548 lockdep_assert_held(&vm->lock);
4549
4550 if (*start >= SZ_4K) {
4551 prev = xe_vm_find_vma_by_addr(vm, *start - SZ_4K);
4552 if (is_cpu_addr_vma_with_default_attr(prev))
4553 *start = xe_vma_start(prev);
4554 }
4555
4556 if (*end < vm->size) {
4557 next = xe_vm_find_vma_by_addr(vm, *end + 1);
4558 if (is_cpu_addr_vma_with_default_attr(next))
4559 *end = xe_vma_end(next);
4560 }
4561 }
4562
4563 /**
4564 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma
4565 * @vm: Pointer to the xe_vm structure
4566 * @start: Starting input address
4567 * @range: Size of the input range
4568 *
4569 * This function splits/merges existing vma to create new vma for user provided input range
4570 *
4571 * Return: 0 if success
4572 */
xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm * vm,uint64_t start,uint64_t range)4573 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4574 {
4575 struct drm_gpuvm_map_req map_req = {
4576 .map.va.addr = start,
4577 .map.va.range = range,
4578 };
4579
4580 lockdep_assert_held_write(&vm->lock);
4581
4582 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx",
4583 start, range);
4584
4585 return xe_vm_alloc_vma(vm, &map_req, false);
4586 }
4587
4588