1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10
11 #include <drm/drm_drv.h>
12 #include <drm/drm_exec.h>
13 #include <drm/drm_print.h>
14 #include <drm/ttm/ttm_tt.h>
15 #include <uapi/drm/xe_drm.h>
16 #include <linux/ascii85.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/mm.h>
20 #include <linux/swap.h>
21
22 #include <generated/xe_wa_oob.h>
23
24 #include "regs/xe_gtt_defs.h"
25 #include "xe_assert.h"
26 #include "xe_bo.h"
27 #include "xe_device.h"
28 #include "xe_drm_client.h"
29 #include "xe_exec_queue.h"
30 #include "xe_gt.h"
31 #include "xe_migrate.h"
32 #include "xe_pat.h"
33 #include "xe_pm.h"
34 #include "xe_preempt_fence.h"
35 #include "xe_pt.h"
36 #include "xe_pxp.h"
37 #include "xe_sriov_vf.h"
38 #include "xe_svm.h"
39 #include "xe_sync.h"
40 #include "xe_tile.h"
41 #include "xe_tlb_inval.h"
42 #include "xe_trace_bo.h"
43 #include "xe_vm_madvise.h"
44 #include "xe_wa.h"
45
xe_vm_obj(struct xe_vm * vm)46 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
47 {
48 return vm->gpuvm.r_obj;
49 }
50
51 /**
52 * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction
53 * @vm: The vm whose resv is to be locked.
54 * @exec: The drm_exec transaction.
55 *
56 * Helper to lock the vm's resv as part of a drm_exec transaction.
57 *
58 * Return: %0 on success. See drm_exec_lock_obj() for error codes.
59 */
xe_vm_drm_exec_lock(struct xe_vm * vm,struct drm_exec * exec)60 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec)
61 {
62 return drm_exec_lock_obj(exec, xe_vm_obj(vm));
63 }
64
preempt_fences_waiting(struct xe_vm * vm)65 static bool preempt_fences_waiting(struct xe_vm *vm)
66 {
67 struct xe_exec_queue *q;
68
69 lockdep_assert_held(&vm->lock);
70 xe_vm_assert_held(vm);
71
72 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
73 if (!q->lr.pfence ||
74 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
75 &q->lr.pfence->flags)) {
76 return true;
77 }
78 }
79
80 return false;
81 }
82
free_preempt_fences(struct list_head * list)83 static void free_preempt_fences(struct list_head *list)
84 {
85 struct list_head *link, *next;
86
87 list_for_each_safe(link, next, list)
88 xe_preempt_fence_free(to_preempt_fence_from_link(link));
89 }
90
alloc_preempt_fences(struct xe_vm * vm,struct list_head * list,unsigned int * count)91 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
92 unsigned int *count)
93 {
94 lockdep_assert_held(&vm->lock);
95 xe_vm_assert_held(vm);
96
97 if (*count >= vm->preempt.num_exec_queues)
98 return 0;
99
100 for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
101 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
102
103 if (IS_ERR(pfence))
104 return PTR_ERR(pfence);
105
106 list_move_tail(xe_preempt_fence_link(pfence), list);
107 }
108
109 return 0;
110 }
111
wait_for_existing_preempt_fences(struct xe_vm * vm)112 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
113 {
114 struct xe_exec_queue *q;
115 bool vf_migration = IS_SRIOV_VF(vm->xe) &&
116 xe_sriov_vf_migration_supported(vm->xe);
117 signed long wait_time = vf_migration ? HZ / 5 : MAX_SCHEDULE_TIMEOUT;
118
119 xe_vm_assert_held(vm);
120
121 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
122 if (q->lr.pfence) {
123 long timeout;
124
125 timeout = dma_fence_wait_timeout(q->lr.pfence, false,
126 wait_time);
127 if (!timeout) {
128 xe_assert(vm->xe, vf_migration);
129 return -EAGAIN;
130 }
131
132 /* Only -ETIME on fence indicates VM needs to be killed */
133 if (timeout < 0 || q->lr.pfence->error == -ETIME)
134 return -ETIME;
135
136 dma_fence_put(q->lr.pfence);
137 q->lr.pfence = NULL;
138 }
139 }
140
141 return 0;
142 }
143
xe_vm_is_idle(struct xe_vm * vm)144 static bool xe_vm_is_idle(struct xe_vm *vm)
145 {
146 struct xe_exec_queue *q;
147
148 xe_vm_assert_held(vm);
149 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
150 if (!xe_exec_queue_is_idle(q))
151 return false;
152 }
153
154 return true;
155 }
156
arm_preempt_fences(struct xe_vm * vm,struct list_head * list)157 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
158 {
159 struct list_head *link;
160 struct xe_exec_queue *q;
161
162 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
163 struct dma_fence *fence;
164
165 link = list->next;
166 xe_assert(vm->xe, link != list);
167
168 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
169 q, q->lr.context,
170 ++q->lr.seqno);
171 dma_fence_put(q->lr.pfence);
172 q->lr.pfence = fence;
173 }
174 }
175
add_preempt_fences(struct xe_vm * vm,struct xe_bo * bo)176 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
177 {
178 struct xe_exec_queue *q;
179 int err;
180
181 xe_bo_assert_held(bo);
182
183 if (!vm->preempt.num_exec_queues)
184 return 0;
185
186 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
187 if (err)
188 return err;
189
190 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
191 if (q->lr.pfence) {
192 dma_resv_add_fence(bo->ttm.base.resv,
193 q->lr.pfence,
194 DMA_RESV_USAGE_BOOKKEEP);
195 }
196
197 return 0;
198 }
199
resume_and_reinstall_preempt_fences(struct xe_vm * vm,struct drm_exec * exec)200 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
201 struct drm_exec *exec)
202 {
203 struct xe_exec_queue *q;
204
205 lockdep_assert_held(&vm->lock);
206 xe_vm_assert_held(vm);
207
208 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
209 q->ops->resume(q);
210
211 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
212 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
213 }
214 }
215
xe_vm_add_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)216 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
217 {
218 struct drm_gpuvm_exec vm_exec = {
219 .vm = &vm->gpuvm,
220 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
221 .num_fences = 1,
222 };
223 struct drm_exec *exec = &vm_exec.exec;
224 struct xe_validation_ctx ctx;
225 struct dma_fence *pfence;
226 int err;
227 bool wait;
228
229 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
230
231 down_write(&vm->lock);
232 err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val);
233 if (err)
234 goto out_up_write;
235
236 pfence = xe_preempt_fence_create(q, q->lr.context,
237 ++q->lr.seqno);
238 if (IS_ERR(pfence)) {
239 err = PTR_ERR(pfence);
240 goto out_fini;
241 }
242
243 list_add(&q->lr.link, &vm->preempt.exec_queues);
244 ++vm->preempt.num_exec_queues;
245 q->lr.pfence = pfence;
246
247 xe_svm_notifier_lock(vm);
248
249 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
250 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
251
252 /*
253 * Check to see if a preemption on VM is in flight or userptr
254 * invalidation, if so trigger this preempt fence to sync state with
255 * other preempt fences on the VM.
256 */
257 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
258 if (wait)
259 dma_fence_enable_sw_signaling(pfence);
260
261 xe_svm_notifier_unlock(vm);
262
263 out_fini:
264 xe_validation_ctx_fini(&ctx);
265 out_up_write:
266 up_write(&vm->lock);
267
268 return err;
269 }
270 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO);
271
272 /**
273 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
274 * @vm: The VM.
275 * @q: The exec_queue
276 *
277 * Note that this function might be called multiple times on the same queue.
278 */
xe_vm_remove_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)279 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
280 {
281 if (!xe_vm_in_preempt_fence_mode(vm))
282 return;
283
284 down_write(&vm->lock);
285 if (!list_empty(&q->lr.link)) {
286 list_del_init(&q->lr.link);
287 --vm->preempt.num_exec_queues;
288 }
289 if (q->lr.pfence) {
290 dma_fence_enable_sw_signaling(q->lr.pfence);
291 dma_fence_put(q->lr.pfence);
292 q->lr.pfence = NULL;
293 }
294 up_write(&vm->lock);
295 }
296
297 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
298
299 /**
300 * xe_vm_kill() - VM Kill
301 * @vm: The VM.
302 * @unlocked: Flag indicates the VM's dma-resv is not held
303 *
304 * Kill the VM by setting banned flag indicated VM is no longer available for
305 * use. If in preempt fence mode, also kill all exec queue attached to the VM.
306 */
xe_vm_kill(struct xe_vm * vm,bool unlocked)307 void xe_vm_kill(struct xe_vm *vm, bool unlocked)
308 {
309 struct xe_exec_queue *q;
310
311 lockdep_assert_held(&vm->lock);
312
313 if (unlocked)
314 xe_vm_lock(vm, false);
315
316 vm->flags |= XE_VM_FLAG_BANNED;
317 trace_xe_vm_kill(vm);
318
319 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
320 q->ops->kill(q);
321
322 if (unlocked)
323 xe_vm_unlock(vm);
324
325 /* TODO: Inform user the VM is banned */
326 }
327
xe_gpuvm_validate(struct drm_gpuvm_bo * vm_bo,struct drm_exec * exec)328 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
329 {
330 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
331 struct xe_bo *bo = gem_to_xe_bo(vm_bo->obj);
332 struct drm_gpuva *gpuva;
333 int ret;
334
335 lockdep_assert_held(&vm->lock);
336 drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
337 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
338 &vm->rebind_list);
339
340 /* Skip re-populating purged BOs, rebind maps scratch pages. */
341 if (xe_bo_is_purged(bo)) {
342 vm_bo->evicted = false;
343 return 0;
344 }
345
346 if (!try_wait_for_completion(&vm->xe->pm_block))
347 return -EAGAIN;
348
349 ret = xe_bo_validate(bo, vm, false, exec);
350 if (ret)
351 return ret;
352
353 vm_bo->evicted = false;
354 return 0;
355 }
356
357 /**
358 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
359 * @vm: The vm for which we are rebinding.
360 * @exec: The struct drm_exec with the locked GEM objects.
361 * @num_fences: The number of fences to reserve for the operation, not
362 * including rebinds and validations.
363 *
364 * Validates all evicted gem objects and rebinds their vmas. Note that
365 * rebindings may cause evictions and hence the validation-rebind
366 * sequence is rerun until there are no more objects to validate.
367 *
368 * Return: 0 on success, negative error code on error. In particular,
369 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
370 * the drm_exec transaction needs to be restarted.
371 */
xe_vm_validate_rebind(struct xe_vm * vm,struct drm_exec * exec,unsigned int num_fences)372 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
373 unsigned int num_fences)
374 {
375 struct drm_gem_object *obj;
376 unsigned long index;
377 int ret;
378
379 do {
380 ret = drm_gpuvm_validate(&vm->gpuvm, exec);
381 if (ret)
382 return ret;
383
384 ret = xe_vm_rebind(vm, false);
385 if (ret)
386 return ret;
387 } while (!list_empty(&vm->gpuvm.evict.list));
388
389 drm_exec_for_each_locked_object(exec, index, obj) {
390 ret = dma_resv_reserve_fences(obj->resv, num_fences);
391 if (ret)
392 return ret;
393 }
394
395 return 0;
396 }
397
xe_preempt_work_begin(struct drm_exec * exec,struct xe_vm * vm,bool * done)398 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
399 bool *done)
400 {
401 int err;
402
403 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
404 if (err)
405 return err;
406
407 if (xe_vm_is_idle(vm)) {
408 vm->preempt.rebind_deactivated = true;
409 *done = true;
410 return 0;
411 }
412
413 if (!preempt_fences_waiting(vm)) {
414 *done = true;
415 return 0;
416 }
417
418 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
419 if (err)
420 return err;
421
422 err = wait_for_existing_preempt_fences(vm);
423 if (err)
424 return err;
425
426 /*
427 * Add validation and rebinding to the locking loop since both can
428 * cause evictions which may require blocing dma_resv locks.
429 * The fence reservation here is intended for the new preempt fences
430 * we attach at the end of the rebind work.
431 */
432 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
433 }
434
vm_suspend_rebind_worker(struct xe_vm * vm)435 static bool vm_suspend_rebind_worker(struct xe_vm *vm)
436 {
437 struct xe_device *xe = vm->xe;
438 bool ret = false;
439
440 mutex_lock(&xe->rebind_resume_lock);
441 if (!try_wait_for_completion(&vm->xe->pm_block)) {
442 ret = true;
443 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list);
444 }
445 mutex_unlock(&xe->rebind_resume_lock);
446
447 return ret;
448 }
449
450 /**
451 * xe_vm_resume_rebind_worker() - Resume the rebind worker.
452 * @vm: The vm whose preempt worker to resume.
453 *
454 * Resume a preempt worker that was previously suspended by
455 * vm_suspend_rebind_worker().
456 */
xe_vm_resume_rebind_worker(struct xe_vm * vm)457 void xe_vm_resume_rebind_worker(struct xe_vm *vm)
458 {
459 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
460 }
461
preempt_rebind_work_func(struct work_struct * w)462 static void preempt_rebind_work_func(struct work_struct *w)
463 {
464 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
465 struct xe_validation_ctx ctx;
466 struct drm_exec exec;
467 unsigned int fence_count = 0;
468 LIST_HEAD(preempt_fences);
469 int err = 0;
470 long wait;
471 int __maybe_unused tries = 0;
472
473 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
474 trace_xe_vm_rebind_worker_enter(vm);
475
476 down_write(&vm->lock);
477
478 if (xe_vm_is_closed_or_banned(vm)) {
479 up_write(&vm->lock);
480 trace_xe_vm_rebind_worker_exit(vm);
481 return;
482 }
483
484 retry:
485 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) {
486 up_write(&vm->lock);
487 /* We don't actually block but don't make progress. */
488 xe_pm_might_block_on_suspend();
489 return;
490 }
491
492 if (xe_vm_userptr_check_repin(vm)) {
493 err = xe_vm_userptr_pin(vm);
494 if (err)
495 goto out_unlock_outer;
496 }
497
498 err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec,
499 (struct xe_val_flags) {.interruptible = true});
500 if (err)
501 goto out_unlock_outer;
502
503 drm_exec_until_all_locked(&exec) {
504 bool done = false;
505
506 err = xe_preempt_work_begin(&exec, vm, &done);
507 drm_exec_retry_on_contention(&exec);
508 xe_validation_retry_on_oom(&ctx, &err);
509 if (err || done) {
510 xe_validation_ctx_fini(&ctx);
511 goto out_unlock_outer;
512 }
513 }
514
515 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
516 if (err)
517 goto out_unlock;
518
519 xe_vm_set_validation_exec(vm, &exec);
520 err = xe_vm_rebind(vm, true);
521 xe_vm_set_validation_exec(vm, NULL);
522 if (err)
523 goto out_unlock;
524
525 /* Wait on rebinds and munmap style VM unbinds */
526 wait = dma_resv_wait_timeout(xe_vm_resv(vm),
527 DMA_RESV_USAGE_KERNEL,
528 false, MAX_SCHEDULE_TIMEOUT);
529 if (wait <= 0) {
530 err = -ETIME;
531 goto out_unlock;
532 }
533
534 #define retry_required(__tries, __vm) \
535 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
536 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
537 __xe_vm_userptr_needs_repin(__vm))
538
539 xe_svm_notifier_lock(vm);
540 if (retry_required(tries, vm)) {
541 xe_svm_notifier_unlock(vm);
542 err = -EAGAIN;
543 goto out_unlock;
544 }
545
546 #undef retry_required
547
548 spin_lock(&vm->xe->ttm.lru_lock);
549 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
550 spin_unlock(&vm->xe->ttm.lru_lock);
551
552 /* Point of no return. */
553 arm_preempt_fences(vm, &preempt_fences);
554 resume_and_reinstall_preempt_fences(vm, &exec);
555 xe_svm_notifier_unlock(vm);
556
557 out_unlock:
558 xe_validation_ctx_fini(&ctx);
559 out_unlock_outer:
560 if (err == -EAGAIN) {
561 trace_xe_vm_rebind_worker_retry(vm);
562
563 /*
564 * We can't block in workers on a VF which supports migration
565 * given this can block the VF post-migration workers from
566 * getting scheduled.
567 */
568 if (IS_SRIOV_VF(vm->xe) &&
569 xe_sriov_vf_migration_supported(vm->xe)) {
570 up_write(&vm->lock);
571 xe_vm_queue_rebind_worker(vm);
572 return;
573 }
574
575 goto retry;
576 }
577
578 if (err) {
579 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
580 xe_vm_kill(vm, true);
581 }
582 up_write(&vm->lock);
583
584 free_preempt_fences(&preempt_fences);
585
586 trace_xe_vm_rebind_worker_exit(vm);
587 }
588
589 /**
590 * xe_vm_add_fault_entry_pf() - Add pagefault to vm fault list
591 * @vm: The VM.
592 * @pf: The pagefault.
593 *
594 * This function takes the data from the pagefault @pf and saves it to @vm->faults.list.
595 *
596 * The function exits silently if the list is full, and reports a warning if the pagefault
597 * could not be saved to the list.
598 */
xe_vm_add_fault_entry_pf(struct xe_vm * vm,struct xe_pagefault * pf)599 void xe_vm_add_fault_entry_pf(struct xe_vm *vm, struct xe_pagefault *pf)
600 {
601 struct xe_vm_fault_entry *e;
602 struct xe_hw_engine *hwe;
603
604 /* Do not report faults on reserved engines */
605 hwe = xe_gt_hw_engine(pf->gt, pf->consumer.engine_class,
606 pf->consumer.engine_instance, false);
607 if (!hwe || xe_hw_engine_is_reserved(hwe))
608 return;
609
610 e = kzalloc_obj(*e);
611 if (!e) {
612 drm_warn(&vm->xe->drm,
613 "Could not allocate memory for fault!\n");
614 return;
615 }
616
617 guard(spinlock)(&vm->faults.lock);
618
619 /*
620 * Limit the number of faults in the fault list to prevent
621 * memory overuse.
622 */
623 if (vm->faults.len >= MAX_FAULTS_SAVED_PER_VM) {
624 kfree(e);
625 return;
626 }
627
628 e->address = pf->consumer.page_addr;
629 /*
630 * TODO:
631 * Address precision is currently always SZ_4K, but this may change
632 * in the future.
633 */
634 e->address_precision = SZ_4K;
635 e->access_type = pf->consumer.access_type;
636 e->fault_type = FIELD_GET(XE_PAGEFAULT_TYPE_MASK,
637 pf->consumer.fault_type_level),
638 e->fault_level = FIELD_GET(XE_PAGEFAULT_LEVEL_MASK,
639 pf->consumer.fault_type_level),
640
641 list_add_tail(&e->list, &vm->faults.list);
642 vm->faults.len++;
643 }
644
xe_vm_clear_fault_entries(struct xe_vm * vm)645 static void xe_vm_clear_fault_entries(struct xe_vm *vm)
646 {
647 struct xe_vm_fault_entry *e, *tmp;
648
649 guard(spinlock)(&vm->faults.lock);
650 list_for_each_entry_safe(e, tmp, &vm->faults.list, list) {
651 list_del(&e->list);
652 kfree(e);
653 }
654 vm->faults.len = 0;
655 }
656
xe_vma_ops_alloc(struct xe_vma_ops * vops,bool array_of_binds)657 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
658 {
659 int i;
660
661 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
662 if (!vops->pt_update_ops[i].num_ops)
663 continue;
664
665 vops->pt_update_ops[i].ops =
666 kmalloc_objs(*vops->pt_update_ops[i].ops,
667 vops->pt_update_ops[i].num_ops,
668 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
669 if (!vops->pt_update_ops[i].ops)
670 return array_of_binds ? -ENOBUFS : -ENOMEM;
671 }
672
673 return 0;
674 }
675 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
676
xe_vma_svm_prefetch_op_fini(struct xe_vma_op * op)677 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op)
678 {
679 struct xe_vma *vma;
680
681 vma = gpuva_to_vma(op->base.prefetch.va);
682
683 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma))
684 xa_destroy(&op->prefetch_range.range);
685 }
686
xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops * vops)687 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops)
688 {
689 struct xe_vma_op *op;
690
691 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
692 return;
693
694 list_for_each_entry(op, &vops->list, link)
695 xe_vma_svm_prefetch_op_fini(op);
696 }
697
xe_vma_ops_fini(struct xe_vma_ops * vops)698 static void xe_vma_ops_fini(struct xe_vma_ops *vops)
699 {
700 int i;
701
702 xe_vma_svm_prefetch_ops_fini(vops);
703
704 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
705 kfree(vops->pt_update_ops[i].ops);
706 }
707
xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops * vops,u8 tile_mask,int inc_val)708 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val)
709 {
710 int i;
711
712 if (!inc_val)
713 return;
714
715 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
716 if (BIT(i) & tile_mask)
717 vops->pt_update_ops[i].num_ops += inc_val;
718 }
719
720 #define XE_VMA_CREATE_MASK ( \
721 XE_VMA_READ_ONLY | \
722 XE_VMA_DUMPABLE | \
723 XE_VMA_SYSTEM_ALLOCATOR | \
724 DRM_GPUVA_SPARSE | \
725 XE_VMA_MADV_AUTORESET)
726
xe_vm_populate_rebind(struct xe_vma_op * op,struct xe_vma * vma,u8 tile_mask)727 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
728 u8 tile_mask)
729 {
730 INIT_LIST_HEAD(&op->link);
731 op->tile_mask = tile_mask;
732 op->base.op = DRM_GPUVA_OP_MAP;
733 op->base.map.va.addr = vma->gpuva.va.addr;
734 op->base.map.va.range = vma->gpuva.va.range;
735 op->base.map.gem.obj = vma->gpuva.gem.obj;
736 op->base.map.gem.offset = vma->gpuva.gem.offset;
737 op->map.vma = vma;
738 op->map.immediate = true;
739 op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK;
740 }
741
xe_vm_ops_add_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,u8 tile_mask)742 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
743 u8 tile_mask)
744 {
745 struct xe_vma_op *op;
746
747 op = kzalloc_obj(*op);
748 if (!op)
749 return -ENOMEM;
750
751 xe_vm_populate_rebind(op, vma, tile_mask);
752 list_add_tail(&op->link, &vops->list);
753 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
754
755 return 0;
756 }
757
758 static struct dma_fence *ops_execute(struct xe_vm *vm,
759 struct xe_vma_ops *vops);
760 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
761 struct xe_exec_queue *q,
762 struct xe_sync_entry *syncs, u32 num_syncs);
763
xe_vm_rebind(struct xe_vm * vm,bool rebind_worker)764 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
765 {
766 struct dma_fence *fence;
767 struct xe_vma *vma, *next;
768 struct xe_vma_ops vops;
769 struct xe_vma_op *op, *next_op;
770 int err, i;
771
772 lockdep_assert_held(&vm->lock);
773 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
774 list_empty(&vm->rebind_list))
775 return 0;
776
777 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
778 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
779 vops.pt_update_ops[i].wait_vm_bookkeep = true;
780
781 xe_vm_assert_held(vm);
782 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
783 xe_assert(vm->xe, vma->tile_present);
784
785 if (rebind_worker)
786 trace_xe_vma_rebind_worker(vma);
787 else
788 trace_xe_vma_rebind_exec(vma);
789
790 err = xe_vm_ops_add_rebind(&vops, vma,
791 vma->tile_present);
792 if (err)
793 goto free_ops;
794 }
795
796 err = xe_vma_ops_alloc(&vops, false);
797 if (err)
798 goto free_ops;
799
800 fence = ops_execute(vm, &vops);
801 if (IS_ERR(fence)) {
802 err = PTR_ERR(fence);
803 } else {
804 dma_fence_put(fence);
805 list_for_each_entry_safe(vma, next, &vm->rebind_list,
806 combined_links.rebind)
807 list_del_init(&vma->combined_links.rebind);
808 }
809 free_ops:
810 list_for_each_entry_safe(op, next_op, &vops.list, link) {
811 list_del(&op->link);
812 kfree(op);
813 }
814 xe_vma_ops_fini(&vops);
815
816 return err;
817 }
818
xe_vma_rebind(struct xe_vm * vm,struct xe_vma * vma,u8 tile_mask)819 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
820 {
821 struct dma_fence *fence = NULL;
822 struct xe_vma_ops vops;
823 struct xe_vma_op *op, *next_op;
824 struct xe_tile *tile;
825 u8 id;
826 int err;
827
828 lockdep_assert_held(&vm->lock);
829 xe_vm_assert_held(vm);
830 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
831
832 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
833 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT;
834 for_each_tile(tile, vm->xe, id) {
835 vops.pt_update_ops[id].wait_vm_bookkeep = true;
836 vops.pt_update_ops[tile->id].q =
837 xe_migrate_exec_queue(tile->migrate);
838 }
839
840 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
841 if (err)
842 return ERR_PTR(err);
843
844 err = xe_vma_ops_alloc(&vops, false);
845 if (err) {
846 fence = ERR_PTR(err);
847 goto free_ops;
848 }
849
850 fence = ops_execute(vm, &vops);
851
852 free_ops:
853 list_for_each_entry_safe(op, next_op, &vops.list, link) {
854 list_del(&op->link);
855 kfree(op);
856 }
857 xe_vma_ops_fini(&vops);
858
859 return fence;
860 }
861
xe_vm_populate_range_rebind(struct xe_vma_op * op,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)862 static void xe_vm_populate_range_rebind(struct xe_vma_op *op,
863 struct xe_vma *vma,
864 struct xe_svm_range *range,
865 u8 tile_mask)
866 {
867 INIT_LIST_HEAD(&op->link);
868 op->tile_mask = tile_mask;
869 op->base.op = DRM_GPUVA_OP_DRIVER;
870 op->subop = XE_VMA_SUBOP_MAP_RANGE;
871 op->map_range.vma = vma;
872 op->map_range.range = range;
873 }
874
875 static int
xe_vm_ops_add_range_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)876 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops,
877 struct xe_vma *vma,
878 struct xe_svm_range *range,
879 u8 tile_mask)
880 {
881 struct xe_vma_op *op;
882
883 op = kzalloc_obj(*op);
884 if (!op)
885 return -ENOMEM;
886
887 xe_vm_populate_range_rebind(op, vma, range, tile_mask);
888 list_add_tail(&op->link, &vops->list);
889 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
890
891 return 0;
892 }
893
894 /**
895 * xe_vm_range_rebind() - VM range (re)bind
896 * @vm: The VM which the range belongs to.
897 * @vma: The VMA which the range belongs to.
898 * @range: SVM range to rebind.
899 * @tile_mask: Tile mask to bind the range to.
900 *
901 * (re)bind SVM range setting up GPU page tables for the range.
902 *
903 * Return: dma fence for rebind to signal completion on success, ERR_PTR on
904 * failure
905 */
xe_vm_range_rebind(struct xe_vm * vm,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)906 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
907 struct xe_vma *vma,
908 struct xe_svm_range *range,
909 u8 tile_mask)
910 {
911 struct dma_fence *fence = NULL;
912 struct xe_vma_ops vops;
913 struct xe_vma_op *op, *next_op;
914 struct xe_tile *tile;
915 u8 id;
916 int err;
917
918 lockdep_assert_held(&vm->lock);
919 xe_vm_assert_held(vm);
920 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
921 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
922
923 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
924 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT;
925 for_each_tile(tile, vm->xe, id) {
926 vops.pt_update_ops[id].wait_vm_bookkeep = true;
927 vops.pt_update_ops[tile->id].q =
928 xe_migrate_exec_queue(tile->migrate);
929 }
930
931 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask);
932 if (err)
933 return ERR_PTR(err);
934
935 err = xe_vma_ops_alloc(&vops, false);
936 if (err) {
937 fence = ERR_PTR(err);
938 goto free_ops;
939 }
940
941 fence = ops_execute(vm, &vops);
942
943 free_ops:
944 list_for_each_entry_safe(op, next_op, &vops.list, link) {
945 list_del(&op->link);
946 kfree(op);
947 }
948 xe_vma_ops_fini(&vops);
949
950 return fence;
951 }
952
xe_vm_populate_range_unbind(struct xe_vma_op * op,struct xe_svm_range * range)953 static void xe_vm_populate_range_unbind(struct xe_vma_op *op,
954 struct xe_svm_range *range)
955 {
956 INIT_LIST_HEAD(&op->link);
957 op->tile_mask = range->tile_present;
958 op->base.op = DRM_GPUVA_OP_DRIVER;
959 op->subop = XE_VMA_SUBOP_UNMAP_RANGE;
960 op->unmap_range.range = range;
961 }
962
963 static int
xe_vm_ops_add_range_unbind(struct xe_vma_ops * vops,struct xe_svm_range * range)964 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops,
965 struct xe_svm_range *range)
966 {
967 struct xe_vma_op *op;
968
969 op = kzalloc_obj(*op);
970 if (!op)
971 return -ENOMEM;
972
973 xe_vm_populate_range_unbind(op, range);
974 list_add_tail(&op->link, &vops->list);
975 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1);
976
977 return 0;
978 }
979
980 /**
981 * xe_vm_range_unbind() - VM range unbind
982 * @vm: The VM which the range belongs to.
983 * @range: SVM range to rebind.
984 *
985 * Unbind SVM range removing the GPU page tables for the range.
986 *
987 * Return: dma fence for unbind to signal completion on success, ERR_PTR on
988 * failure
989 */
xe_vm_range_unbind(struct xe_vm * vm,struct xe_svm_range * range)990 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
991 struct xe_svm_range *range)
992 {
993 struct dma_fence *fence = NULL;
994 struct xe_vma_ops vops;
995 struct xe_vma_op *op, *next_op;
996 struct xe_tile *tile;
997 u8 id;
998 int err;
999
1000 lockdep_assert_held(&vm->lock);
1001 xe_vm_assert_held(vm);
1002 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
1003
1004 if (!range->tile_present)
1005 return dma_fence_get_stub();
1006
1007 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
1008 for_each_tile(tile, vm->xe, id) {
1009 vops.pt_update_ops[id].wait_vm_bookkeep = true;
1010 vops.pt_update_ops[tile->id].q =
1011 xe_migrate_exec_queue(tile->migrate);
1012 }
1013
1014 err = xe_vm_ops_add_range_unbind(&vops, range);
1015 if (err)
1016 return ERR_PTR(err);
1017
1018 err = xe_vma_ops_alloc(&vops, false);
1019 if (err) {
1020 fence = ERR_PTR(err);
1021 goto free_ops;
1022 }
1023
1024 fence = ops_execute(vm, &vops);
1025
1026 free_ops:
1027 list_for_each_entry_safe(op, next_op, &vops.list, link) {
1028 list_del(&op->link);
1029 kfree(op);
1030 }
1031 xe_vma_ops_fini(&vops);
1032
1033 return fence;
1034 }
1035
xe_vma_mem_attr_fini(struct xe_vma_mem_attr * attr)1036 static void xe_vma_mem_attr_fini(struct xe_vma_mem_attr *attr)
1037 {
1038 drm_pagemap_put(attr->preferred_loc.dpagemap);
1039 }
1040
xe_vma_free(struct xe_vma * vma)1041 static void xe_vma_free(struct xe_vma *vma)
1042 {
1043 xe_vma_mem_attr_fini(&vma->attr);
1044
1045 if (xe_vma_is_userptr(vma))
1046 kfree(to_userptr_vma(vma));
1047 else
1048 kfree(vma);
1049 }
1050
1051 /**
1052 * xe_vma_mem_attr_copy() - copy an xe_vma_mem_attr structure.
1053 * @to: Destination.
1054 * @from: Source.
1055 *
1056 * Copies an xe_vma_mem_attr structure taking care to get reference
1057 * counting of individual members right.
1058 */
xe_vma_mem_attr_copy(struct xe_vma_mem_attr * to,struct xe_vma_mem_attr * from)1059 void xe_vma_mem_attr_copy(struct xe_vma_mem_attr *to, struct xe_vma_mem_attr *from)
1060 {
1061 xe_vma_mem_attr_fini(to);
1062 *to = *from;
1063 if (to->preferred_loc.dpagemap)
1064 drm_pagemap_get(to->preferred_loc.dpagemap);
1065 }
1066
xe_vma_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 start,u64 end,struct xe_vma_mem_attr * attr,unsigned int flags)1067 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
1068 struct xe_bo *bo,
1069 u64 bo_offset_or_userptr,
1070 u64 start, u64 end,
1071 struct xe_vma_mem_attr *attr,
1072 unsigned int flags)
1073 {
1074 struct xe_vma *vma;
1075 struct xe_tile *tile;
1076 u8 id;
1077 bool is_null = (flags & DRM_GPUVA_SPARSE);
1078 bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR);
1079
1080 xe_assert(vm->xe, start < end);
1081 xe_assert(vm->xe, end < vm->size);
1082
1083 /*
1084 * Allocate and ensure that the xe_vma_is_userptr() return
1085 * matches what was allocated.
1086 */
1087 if (!bo && !is_null && !is_cpu_addr_mirror) {
1088 struct xe_userptr_vma *uvma = kzalloc_obj(*uvma);
1089
1090 if (!uvma)
1091 return ERR_PTR(-ENOMEM);
1092
1093 vma = &uvma->vma;
1094 } else {
1095 vma = kzalloc_obj(*vma);
1096 if (!vma)
1097 return ERR_PTR(-ENOMEM);
1098
1099 if (bo)
1100 vma->gpuva.gem.obj = &bo->ttm.base;
1101 }
1102
1103 INIT_LIST_HEAD(&vma->combined_links.rebind);
1104
1105 INIT_LIST_HEAD(&vma->gpuva.gem.entry);
1106 vma->gpuva.vm = &vm->gpuvm;
1107 vma->gpuva.va.addr = start;
1108 vma->gpuva.va.range = end - start + 1;
1109 vma->gpuva.flags = flags;
1110
1111 for_each_tile(tile, vm->xe, id)
1112 vma->tile_mask |= 0x1 << id;
1113
1114 if (vm->xe->info.has_atomic_enable_pte_bit)
1115 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
1116
1117 xe_vma_mem_attr_copy(&vma->attr, attr);
1118 if (bo) {
1119 struct drm_gpuvm_bo *vm_bo;
1120
1121 xe_bo_assert_held(bo);
1122
1123 /*
1124 * Reject only WILLNEED mappings on DONTNEED/PURGED BOs. This
1125 * gates new vm_bind ioctls (user supplies WILLNEED) while
1126 * still allowing partial-unbind / remap splits whose new VMAs
1127 * inherit the parent's DONTNEED attr. It must also run before
1128 * xe_bo_willneed_get_locked() below so a 0->1 holder bump
1129 * cannot silently promote DONTNEED back to WILLNEED.
1130 */
1131 if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) {
1132 if (xe_bo_madv_is_dontneed(bo)) {
1133 xe_vma_free(vma);
1134 return ERR_PTR(-EBUSY);
1135 }
1136 if (xe_bo_is_purged(bo)) {
1137 xe_vma_free(vma);
1138 return ERR_PTR(-EINVAL);
1139 }
1140 }
1141
1142 vm_bo = drm_gpuvm_bo_obtain_locked(vma->gpuva.vm, &bo->ttm.base);
1143 if (IS_ERR(vm_bo)) {
1144 xe_vma_free(vma);
1145 return ERR_CAST(vm_bo);
1146 }
1147
1148 drm_gpuvm_bo_extobj_add(vm_bo);
1149 drm_gem_object_get(&bo->ttm.base);
1150 vma->gpuva.gem.offset = bo_offset_or_userptr;
1151 drm_gpuva_link(&vma->gpuva, vm_bo);
1152 drm_gpuvm_bo_put(vm_bo);
1153
1154 xe_bo_vma_count_inc_locked(bo);
1155 if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED)
1156 xe_bo_willneed_get_locked(bo);
1157 } else /* userptr or null */ {
1158 if (!is_null && !is_cpu_addr_mirror) {
1159 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1160 u64 size = end - start + 1;
1161 int err;
1162
1163 vma->gpuva.gem.offset = bo_offset_or_userptr;
1164
1165 err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size);
1166 if (err) {
1167 xe_vma_free(vma);
1168 return ERR_PTR(err);
1169 }
1170 }
1171
1172 xe_vm_get(vm);
1173 }
1174
1175 return vma;
1176 }
1177
xe_vma_destroy_late(struct xe_vma * vma)1178 static void xe_vma_destroy_late(struct xe_vma *vma)
1179 {
1180 struct xe_vm *vm = xe_vma_vm(vma);
1181 struct xe_bo *bo = xe_vma_bo(vma);
1182
1183 if (vma->ufence) {
1184 xe_sync_ufence_put(vma->ufence);
1185 vma->ufence = NULL;
1186 }
1187
1188 if (xe_vma_is_userptr(vma)) {
1189 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1190
1191 xe_userptr_remove(uvma);
1192 xe_vm_put(vm);
1193 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) {
1194 xe_vm_put(vm);
1195 } else {
1196 xe_bo_put(bo);
1197 }
1198
1199 xe_vma_free(vma);
1200 }
1201
vma_destroy_work_func(struct work_struct * w)1202 static void vma_destroy_work_func(struct work_struct *w)
1203 {
1204 struct xe_vma *vma =
1205 container_of(w, struct xe_vma, destroy_work);
1206
1207 xe_vma_destroy_late(vma);
1208 }
1209
vma_destroy_cb(struct dma_fence * fence,struct dma_fence_cb * cb)1210 static void vma_destroy_cb(struct dma_fence *fence,
1211 struct dma_fence_cb *cb)
1212 {
1213 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1214
1215 INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1216 queue_work(system_dfl_wq, &vma->destroy_work);
1217 }
1218
xe_vma_destroy(struct xe_vma * vma,struct dma_fence * fence)1219 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1220 {
1221 struct xe_vm *vm = xe_vma_vm(vma);
1222 struct xe_bo *bo = xe_vma_bo(vma);
1223
1224 lockdep_assert_held_write(&vm->lock);
1225 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1226
1227 if (xe_vma_is_userptr(vma)) {
1228 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1229 xe_userptr_destroy(to_userptr_vma(vma));
1230 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) {
1231 xe_bo_assert_held(bo);
1232
1233 drm_gpuva_unlink(&vma->gpuva);
1234
1235 xe_bo_vma_count_dec_locked(bo);
1236 if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED)
1237 xe_bo_willneed_put_locked(bo);
1238 }
1239
1240 xe_vm_assert_held(vm);
1241 if (fence) {
1242 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1243 vma_destroy_cb);
1244
1245 if (ret) {
1246 XE_WARN_ON(ret != -ENOENT);
1247 xe_vma_destroy_late(vma);
1248 }
1249 } else {
1250 xe_vma_destroy_late(vma);
1251 }
1252 }
1253
1254 /**
1255 * xe_vm_lock_vma() - drm_exec utility to lock a vma
1256 * @exec: The drm_exec object we're currently locking for.
1257 * @vma: The vma for witch we want to lock the vm resv and any attached
1258 * object's resv.
1259 *
1260 * Return: 0 on success, negative error code on error. In particular
1261 * may return -EDEADLK on WW transaction contention and -EINTR if
1262 * an interruptible wait is terminated by a signal.
1263 */
xe_vm_lock_vma(struct drm_exec * exec,struct xe_vma * vma)1264 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
1265 {
1266 struct xe_vm *vm = xe_vma_vm(vma);
1267 struct xe_bo *bo = xe_vma_bo(vma);
1268 int err;
1269
1270 XE_WARN_ON(!vm);
1271
1272 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1273 if (!err && bo && !bo->vm)
1274 err = drm_exec_lock_obj(exec, &bo->ttm.base);
1275
1276 return err;
1277 }
1278
xe_vma_destroy_unlocked(struct xe_vma * vma)1279 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1280 {
1281 struct xe_device *xe = xe_vma_vm(vma)->xe;
1282 struct xe_validation_ctx ctx;
1283 struct drm_exec exec;
1284 int err = 0;
1285
1286 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
1287 err = xe_vm_lock_vma(&exec, vma);
1288 drm_exec_retry_on_contention(&exec);
1289 if (XE_WARN_ON(err))
1290 break;
1291 xe_vma_destroy(vma, NULL);
1292 }
1293 xe_assert(xe, !err);
1294 }
1295
1296 struct xe_vma *
xe_vm_find_overlapping_vma(struct xe_vm * vm,u64 start,u64 range)1297 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1298 {
1299 struct drm_gpuva *gpuva;
1300
1301 lockdep_assert_held(&vm->lock);
1302
1303 if (xe_vm_is_closed_or_banned(vm))
1304 return NULL;
1305
1306 xe_assert(vm->xe, start + range <= vm->size);
1307
1308 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1309
1310 return gpuva ? gpuva_to_vma(gpuva) : NULL;
1311 }
1312
xe_vm_insert_vma(struct xe_vm * vm,struct xe_vma * vma)1313 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1314 {
1315 int err;
1316
1317 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1318 lockdep_assert_held(&vm->lock);
1319
1320 mutex_lock(&vm->snap_mutex);
1321 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1322 mutex_unlock(&vm->snap_mutex);
1323 XE_WARN_ON(err); /* Shouldn't be possible */
1324
1325 return err;
1326 }
1327
xe_vm_remove_vma(struct xe_vm * vm,struct xe_vma * vma)1328 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1329 {
1330 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1331 lockdep_assert_held(&vm->lock);
1332
1333 mutex_lock(&vm->snap_mutex);
1334 drm_gpuva_remove(&vma->gpuva);
1335 mutex_unlock(&vm->snap_mutex);
1336 if (vm->usm.last_fault_vma == vma)
1337 vm->usm.last_fault_vma = NULL;
1338 }
1339
xe_vm_op_alloc(void)1340 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1341 {
1342 struct xe_vma_op *op;
1343
1344 op = kzalloc_obj(*op);
1345
1346 if (unlikely(!op))
1347 return NULL;
1348
1349 return &op->base;
1350 }
1351
1352 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1353
1354 static const struct drm_gpuvm_ops gpuvm_ops = {
1355 .op_alloc = xe_vm_op_alloc,
1356 .vm_bo_validate = xe_gpuvm_validate,
1357 .vm_free = xe_vm_free,
1358 };
1359
pde_encode_pat_index(u16 pat_index)1360 static u64 pde_encode_pat_index(u16 pat_index)
1361 {
1362 u64 pte = 0;
1363
1364 if (pat_index & BIT(0))
1365 pte |= XE_PPGTT_PTE_PAT0;
1366
1367 if (pat_index & BIT(1))
1368 pte |= XE_PPGTT_PTE_PAT1;
1369
1370 return pte;
1371 }
1372
pte_encode_pat_index(u16 pat_index,u32 pt_level)1373 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
1374 {
1375 u64 pte = 0;
1376
1377 if (pat_index & BIT(0))
1378 pte |= XE_PPGTT_PTE_PAT0;
1379
1380 if (pat_index & BIT(1))
1381 pte |= XE_PPGTT_PTE_PAT1;
1382
1383 if (pat_index & BIT(2)) {
1384 if (pt_level)
1385 pte |= XE_PPGTT_PDE_PDPE_PAT2;
1386 else
1387 pte |= XE_PPGTT_PTE_PAT2;
1388 }
1389
1390 if (pat_index & BIT(3))
1391 pte |= XELPG_PPGTT_PTE_PAT3;
1392
1393 if (pat_index & (BIT(4)))
1394 pte |= XE2_PPGTT_PTE_PAT4;
1395
1396 return pte;
1397 }
1398
pte_encode_ps(u32 pt_level)1399 static u64 pte_encode_ps(u32 pt_level)
1400 {
1401 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1402
1403 if (pt_level == 1)
1404 return XE_PDE_PS_2M;
1405 else if (pt_level == 2)
1406 return XE_PDPE_PS_1G;
1407
1408 return 0;
1409 }
1410
pde_pat_index(struct xe_bo * bo)1411 static u16 pde_pat_index(struct xe_bo *bo)
1412 {
1413 struct xe_device *xe = xe_bo_device(bo);
1414 u16 pat_index;
1415
1416 /*
1417 * We only have two bits to encode the PAT index in non-leaf nodes, but
1418 * these only point to other paging structures so we only need a minimal
1419 * selection of options. The user PAT index is only for encoding leaf
1420 * nodes, where we have use of more bits to do the encoding. The
1421 * non-leaf nodes are instead under driver control so the chosen index
1422 * here should be distinct from the user PAT index. Also the
1423 * corresponding coherency of the PAT index should be tied to the
1424 * allocation type of the page table (or at least we should pick
1425 * something which is always safe).
1426 */
1427 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached)
1428 pat_index = xe->pat.idx[XE_CACHE_WB];
1429 else
1430 pat_index = xe->pat.idx[XE_CACHE_NONE];
1431
1432 xe_assert(xe, pat_index <= 3);
1433
1434 return pat_index;
1435 }
1436
xelp_pde_encode_bo(struct xe_bo * bo,u64 bo_offset)1437 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset)
1438 {
1439 u64 pde;
1440
1441 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1442 pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1443 pde |= pde_encode_pat_index(pde_pat_index(bo));
1444
1445 return pde;
1446 }
1447
xelp_pte_encode_bo(struct xe_bo * bo,u64 bo_offset,u16 pat_index,u32 pt_level)1448 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1449 u16 pat_index, u32 pt_level)
1450 {
1451 u64 pte;
1452
1453 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1454 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1455 pte |= pte_encode_pat_index(pat_index, pt_level);
1456 pte |= pte_encode_ps(pt_level);
1457
1458 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1459 pte |= XE_PPGTT_PTE_DM;
1460
1461 return pte;
1462 }
1463
xelp_pte_encode_vma(u64 pte,struct xe_vma * vma,u16 pat_index,u32 pt_level)1464 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1465 u16 pat_index, u32 pt_level)
1466 {
1467 struct xe_bo *bo = xe_vma_bo(vma);
1468 struct xe_vm *vm = xe_vma_vm(vma);
1469
1470 pte |= XE_PAGE_PRESENT;
1471
1472 if (likely(!xe_vma_read_only(vma)))
1473 pte |= XE_PAGE_RW;
1474
1475 pte |= pte_encode_pat_index(pat_index, pt_level);
1476 pte |= pte_encode_ps(pt_level);
1477
1478 /*
1479 * NULL PTEs redirect to scratch page (return zeros on read).
1480 * Set for: 1) explicit null VMAs, 2) purged BOs on scratch VMs.
1481 * Never set NULL flag without scratch page - causes undefined behavior.
1482 */
1483 if (unlikely(xe_vma_is_null(vma) ||
1484 (bo && xe_bo_is_purged(bo) && xe_vm_has_scratch(vm))))
1485 pte |= XE_PTE_NULL;
1486
1487 return pte;
1488 }
1489
xelp_pte_encode_addr(struct xe_device * xe,u64 addr,u16 pat_index,u32 pt_level,bool devmem,u64 flags)1490 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1491 u16 pat_index,
1492 u32 pt_level, bool devmem, u64 flags)
1493 {
1494 u64 pte;
1495
1496 /* Avoid passing random bits directly as flags */
1497 xe_assert(xe, !(flags & ~XE_PTE_PS64));
1498
1499 pte = addr;
1500 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1501 pte |= pte_encode_pat_index(pat_index, pt_level);
1502 pte |= pte_encode_ps(pt_level);
1503
1504 if (devmem)
1505 pte |= XE_PPGTT_PTE_DM;
1506
1507 pte |= flags;
1508
1509 return pte;
1510 }
1511
1512 static const struct xe_pt_ops xelp_pt_ops = {
1513 .pte_encode_bo = xelp_pte_encode_bo,
1514 .pte_encode_vma = xelp_pte_encode_vma,
1515 .pte_encode_addr = xelp_pte_encode_addr,
1516 .pde_encode_bo = xelp_pde_encode_bo,
1517 };
1518
1519 static void vm_destroy_work_func(struct work_struct *w);
1520
1521 /**
1522 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1523 * given tile and vm.
1524 * @xe: xe device.
1525 * @tile: tile to set up for.
1526 * @vm: vm to set up for.
1527 * @exec: The struct drm_exec object used to lock the vm resv.
1528 *
1529 * Sets up a pagetable tree with one page-table per level and a single
1530 * leaf PTE. All pagetable entries point to the single page-table or,
1531 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1532 * writes become NOPs.
1533 *
1534 * Return: 0 on success, negative error code on error.
1535 */
xe_vm_create_scratch(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,struct drm_exec * exec)1536 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1537 struct xe_vm *vm, struct drm_exec *exec)
1538 {
1539 u8 id = tile->id;
1540 int i;
1541
1542 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1543 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec);
1544 if (IS_ERR(vm->scratch_pt[id][i])) {
1545 int err = PTR_ERR(vm->scratch_pt[id][i]);
1546
1547 vm->scratch_pt[id][i] = NULL;
1548 return err;
1549 }
1550 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1551 }
1552
1553 return 0;
1554 }
1555 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO);
1556
xe_vm_free_scratch(struct xe_vm * vm)1557 static void xe_vm_free_scratch(struct xe_vm *vm)
1558 {
1559 struct xe_tile *tile;
1560 u8 id;
1561
1562 if (!xe_vm_has_scratch(vm))
1563 return;
1564
1565 for_each_tile(tile, vm->xe, id) {
1566 u32 i;
1567
1568 if (!vm->pt_root[id])
1569 continue;
1570
1571 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1572 if (vm->scratch_pt[id][i])
1573 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1574 }
1575 }
1576
xe_vm_pt_destroy(struct xe_vm * vm)1577 static void xe_vm_pt_destroy(struct xe_vm *vm)
1578 {
1579 struct xe_tile *tile;
1580 u8 id;
1581
1582 xe_vm_assert_held(vm);
1583
1584 for_each_tile(tile, vm->xe, id) {
1585 if (vm->pt_root[id]) {
1586 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1587 vm->pt_root[id] = NULL;
1588 }
1589 }
1590 }
1591
xe_vm_init_prove_locking(struct xe_device * xe,struct xe_vm * vm)1592 static void xe_vm_init_prove_locking(struct xe_device *xe, struct xe_vm *vm)
1593 {
1594 if (!IS_ENABLED(CONFIG_PROVE_LOCKING))
1595 return;
1596
1597 fs_reclaim_acquire(GFP_KERNEL);
1598 might_lock(&vm->exec_queues.lock);
1599 fs_reclaim_release(GFP_KERNEL);
1600
1601 down_read(&vm->exec_queues.lock);
1602 might_lock(&xe_root_mmio_gt(xe)->uc.guc.ct.lock);
1603 up_read(&vm->exec_queues.lock);
1604 }
1605
xe_vm_create(struct xe_device * xe,u32 flags,struct xe_file * xef)1606 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
1607 {
1608 struct drm_gem_object *vm_resv_obj;
1609 struct xe_validation_ctx ctx;
1610 struct drm_exec exec;
1611 struct xe_vm *vm;
1612 int err;
1613 struct xe_tile *tile;
1614 u8 id;
1615
1616 /*
1617 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to
1618 * ever be in faulting mode.
1619 */
1620 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE)));
1621
1622 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1623 if (!vm)
1624 return ERR_PTR(-ENOMEM);
1625
1626 vm->xe = xe;
1627
1628 vm->size = 1ull << xe->info.va_bits;
1629 vm->flags = flags;
1630
1631 if (xef)
1632 vm->xef = xe_file_get(xef);
1633 /**
1634 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be
1635 * manipulated under the PXP mutex. However, the PXP mutex can be taken
1636 * under a user-VM lock when the PXP session is started at exec_queue
1637 * creation time. Those are different VMs and therefore there is no risk
1638 * of deadlock, but we need to tell lockdep that this is the case or it
1639 * will print a warning.
1640 */
1641 if (flags & XE_VM_FLAG_GSC) {
1642 static struct lock_class_key gsc_vm_key;
1643
1644 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key);
1645 } else {
1646 init_rwsem(&vm->lock);
1647 }
1648 mutex_init(&vm->snap_mutex);
1649
1650 INIT_LIST_HEAD(&vm->rebind_list);
1651
1652 INIT_LIST_HEAD(&vm->userptr.repin_list);
1653 INIT_LIST_HEAD(&vm->userptr.invalidated);
1654 spin_lock_init(&vm->userptr.invalidated_lock);
1655
1656 INIT_LIST_HEAD(&vm->faults.list);
1657 spin_lock_init(&vm->faults.lock);
1658
1659 ttm_lru_bulk_move_init(&vm->lru_bulk_move);
1660
1661 INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1662
1663 INIT_LIST_HEAD(&vm->preempt.exec_queues);
1664 for (id = 0; id < XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE; ++id)
1665 INIT_LIST_HEAD(&vm->exec_queues.list[id]);
1666 if (flags & XE_VM_FLAG_FAULT_MODE)
1667 vm->preempt.min_run_period_ms = xe->min_run_period_pf_ms;
1668 else
1669 vm->preempt.min_run_period_ms = xe->min_run_period_lr_ms;
1670
1671 init_rwsem(&vm->exec_queues.lock);
1672 xe_vm_init_prove_locking(xe, vm);
1673
1674 for_each_tile(tile, xe, id)
1675 xe_range_fence_tree_init(&vm->rftree[id]);
1676
1677 vm->pt_ops = &xelp_pt_ops;
1678
1679 /*
1680 * Long-running workloads are not protected by the scheduler references.
1681 * By design, run_job for long-running workloads returns NULL and the
1682 * scheduler drops all the references of it, hence protecting the VM
1683 * for this case is necessary.
1684 */
1685 if (flags & XE_VM_FLAG_LR_MODE) {
1686 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1687 xe_pm_runtime_get_noresume(xe);
1688 INIT_LIST_HEAD(&vm->preempt.pm_activate_link);
1689 }
1690
1691 err = xe_svm_init(vm);
1692 if (err)
1693 goto err_no_resv;
1694
1695 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1696 if (!vm_resv_obj) {
1697 err = -ENOMEM;
1698 goto err_svm_fini;
1699 }
1700
1701 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1702 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1703
1704 drm_gem_object_put(vm_resv_obj);
1705
1706 err = 0;
1707 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
1708 err) {
1709 err = xe_vm_drm_exec_lock(vm, &exec);
1710 drm_exec_retry_on_contention(&exec);
1711
1712 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1713 vm->flags |= XE_VM_FLAG_64K;
1714
1715 for_each_tile(tile, xe, id) {
1716 if (flags & XE_VM_FLAG_MIGRATION &&
1717 tile->id != XE_VM_FLAG_TILE_ID(flags))
1718 continue;
1719
1720 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level,
1721 &exec);
1722 if (IS_ERR(vm->pt_root[id])) {
1723 err = PTR_ERR(vm->pt_root[id]);
1724 vm->pt_root[id] = NULL;
1725 xe_vm_pt_destroy(vm);
1726 drm_exec_retry_on_contention(&exec);
1727 xe_validation_retry_on_oom(&ctx, &err);
1728 break;
1729 }
1730 }
1731 if (err)
1732 break;
1733
1734 if (xe_vm_has_scratch(vm)) {
1735 for_each_tile(tile, xe, id) {
1736 if (!vm->pt_root[id])
1737 continue;
1738
1739 err = xe_vm_create_scratch(xe, tile, vm, &exec);
1740 if (err) {
1741 xe_vm_free_scratch(vm);
1742 xe_vm_pt_destroy(vm);
1743 drm_exec_retry_on_contention(&exec);
1744 xe_validation_retry_on_oom(&ctx, &err);
1745 break;
1746 }
1747 }
1748 if (err)
1749 break;
1750 vm->batch_invalidate_tlb = true;
1751 }
1752
1753 if (vm->flags & XE_VM_FLAG_LR_MODE) {
1754 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1755 vm->batch_invalidate_tlb = false;
1756 }
1757
1758 /* Fill pt_root after allocating scratch tables */
1759 for_each_tile(tile, xe, id) {
1760 if (!vm->pt_root[id])
1761 continue;
1762
1763 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1764 }
1765 }
1766 if (err)
1767 goto err_close;
1768
1769 /* Kernel migration VM shouldn't have a circular loop.. */
1770 if (!(flags & XE_VM_FLAG_MIGRATION)) {
1771 for_each_tile(tile, xe, id) {
1772 struct xe_exec_queue *q;
1773 u32 create_flags = EXEC_QUEUE_FLAG_VM;
1774
1775 if (!vm->pt_root[id])
1776 continue;
1777
1778 if (!xef) /* Not from userspace */
1779 create_flags |= EXEC_QUEUE_FLAG_KERNEL;
1780
1781 q = xe_exec_queue_create_bind(xe, tile, vm, create_flags, 0);
1782 if (IS_ERR(q)) {
1783 err = PTR_ERR(q);
1784 goto err_close;
1785 }
1786 vm->q[id] = q;
1787 }
1788 }
1789
1790 if (xef && xe->info.has_asid) {
1791 u32 asid;
1792
1793 down_write(&xe->usm.lock);
1794 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1795 XA_LIMIT(1, XE_MAX_ASID - 1),
1796 &xe->usm.next_asid, GFP_NOWAIT);
1797 up_write(&xe->usm.lock);
1798 if (err < 0)
1799 goto err_close;
1800
1801 vm->usm.asid = asid;
1802 }
1803
1804 trace_xe_vm_create(vm);
1805
1806 return vm;
1807
1808 err_close:
1809 xe_vm_close_and_put(vm);
1810 return ERR_PTR(err);
1811
1812 err_svm_fini:
1813 if (flags & XE_VM_FLAG_FAULT_MODE) {
1814 vm->size = 0; /* close the vm */
1815 xe_svm_fini(vm);
1816 }
1817 err_no_resv:
1818 mutex_destroy(&vm->snap_mutex);
1819 for_each_tile(tile, xe, id)
1820 xe_range_fence_tree_fini(&vm->rftree[id]);
1821 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1822 if (vm->xef)
1823 xe_file_put(vm->xef);
1824 kfree(vm);
1825 if (flags & XE_VM_FLAG_LR_MODE)
1826 xe_pm_runtime_put(xe);
1827 return ERR_PTR(err);
1828 }
1829
xe_vm_close(struct xe_vm * vm)1830 static void xe_vm_close(struct xe_vm *vm)
1831 {
1832 struct xe_device *xe = vm->xe;
1833 bool bound;
1834 int idx;
1835
1836 bound = drm_dev_enter(&xe->drm, &idx);
1837
1838 down_write(&vm->lock);
1839 if (xe_vm_in_fault_mode(vm))
1840 xe_svm_notifier_lock(vm);
1841
1842 vm->size = 0;
1843
1844 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) {
1845 struct xe_tile *tile;
1846 struct xe_gt *gt;
1847 u8 id;
1848
1849 /* Wait for pending binds */
1850 dma_resv_wait_timeout(xe_vm_resv(vm),
1851 DMA_RESV_USAGE_BOOKKEEP,
1852 false, MAX_SCHEDULE_TIMEOUT);
1853
1854 if (bound) {
1855 for_each_tile(tile, xe, id)
1856 if (vm->pt_root[id])
1857 xe_pt_clear(xe, vm->pt_root[id]);
1858
1859 for_each_gt(gt, xe, id)
1860 xe_tlb_inval_vm(>->tlb_inval, vm);
1861 }
1862 }
1863
1864 if (xe_vm_in_fault_mode(vm))
1865 xe_svm_notifier_unlock(vm);
1866 up_write(&vm->lock);
1867
1868 if (bound)
1869 drm_dev_exit(idx);
1870 }
1871
xe_vm_close_and_put(struct xe_vm * vm)1872 void xe_vm_close_and_put(struct xe_vm *vm)
1873 {
1874 LIST_HEAD(contested);
1875 struct xe_device *xe = vm->xe;
1876 struct xe_tile *tile;
1877 struct xe_vma *vma, *next_vma;
1878 struct drm_gpuva *gpuva, *next;
1879 u8 id;
1880
1881 xe_assert(xe, !vm->preempt.num_exec_queues);
1882
1883 xe_vm_close(vm);
1884 if (xe_vm_in_preempt_fence_mode(vm)) {
1885 mutex_lock(&xe->rebind_resume_lock);
1886 list_del_init(&vm->preempt.pm_activate_link);
1887 mutex_unlock(&xe->rebind_resume_lock);
1888 flush_work(&vm->preempt.rebind_work);
1889 }
1890 if (xe_vm_in_fault_mode(vm))
1891 xe_svm_close(vm);
1892
1893 down_write(&vm->lock);
1894 for_each_tile(tile, xe, id) {
1895 if (vm->q[id]) {
1896 int i;
1897
1898 xe_exec_queue_last_fence_put(vm->q[id], vm);
1899 for_each_tlb_inval(i)
1900 xe_exec_queue_tlb_inval_last_fence_put(vm->q[id], vm, i);
1901 }
1902 }
1903 up_write(&vm->lock);
1904
1905 for_each_tile(tile, xe, id) {
1906 if (vm->q[id]) {
1907 xe_exec_queue_kill(vm->q[id]);
1908 xe_exec_queue_put(vm->q[id]);
1909 vm->q[id] = NULL;
1910 }
1911 }
1912
1913 down_write(&vm->lock);
1914 xe_vm_lock(vm, false);
1915 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1916 vma = gpuva_to_vma(gpuva);
1917
1918 if (xe_vma_has_no_bo(vma)) {
1919 xe_svm_notifier_lock(vm);
1920 vma->gpuva.flags |= XE_VMA_DESTROYED;
1921 xe_svm_notifier_unlock(vm);
1922 }
1923
1924 xe_vm_remove_vma(vm, vma);
1925
1926 /* easy case, remove from VMA? */
1927 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1928 list_del_init(&vma->combined_links.rebind);
1929 xe_vma_destroy(vma, NULL);
1930 continue;
1931 }
1932
1933 list_move_tail(&vma->combined_links.destroy, &contested);
1934 vma->gpuva.flags |= XE_VMA_DESTROYED;
1935 }
1936
1937 /*
1938 * All vm operations will add shared fences to resv.
1939 * The only exception is eviction for a shared object,
1940 * but even so, the unbind when evicted would still
1941 * install a fence to resv. Hence it's safe to
1942 * destroy the pagetables immediately.
1943 */
1944 xe_vm_free_scratch(vm);
1945 xe_vm_pt_destroy(vm);
1946 xe_vm_unlock(vm);
1947
1948 /*
1949 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1950 * Since we hold a refcount to the bo, we can remove and free
1951 * the members safely without locking.
1952 */
1953 list_for_each_entry_safe(vma, next_vma, &contested,
1954 combined_links.destroy) {
1955 list_del_init(&vma->combined_links.destroy);
1956 xe_vma_destroy_unlocked(vma);
1957 }
1958
1959 xe_svm_fini(vm);
1960
1961 up_write(&vm->lock);
1962
1963 down_write(&xe->usm.lock);
1964 if (vm->usm.asid) {
1965 void *lookup;
1966
1967 xe_assert(xe, xe->info.has_asid);
1968 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
1969
1970 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1971 xe_assert(xe, lookup == vm);
1972 }
1973 up_write(&xe->usm.lock);
1974
1975 xe_vm_clear_fault_entries(vm);
1976
1977 for_each_tile(tile, xe, id)
1978 xe_range_fence_tree_fini(&vm->rftree[id]);
1979
1980 xe_vm_put(vm);
1981 }
1982
vm_destroy_work_func(struct work_struct * w)1983 static void vm_destroy_work_func(struct work_struct *w)
1984 {
1985 struct xe_vm *vm =
1986 container_of(w, struct xe_vm, destroy_work);
1987 struct xe_device *xe = vm->xe;
1988 struct xe_tile *tile;
1989 u8 id;
1990
1991 /* xe_vm_close_and_put was not called? */
1992 xe_assert(xe, !vm->size);
1993
1994 if (xe_vm_in_preempt_fence_mode(vm))
1995 flush_work(&vm->preempt.rebind_work);
1996
1997 mutex_destroy(&vm->snap_mutex);
1998
1999 if (vm->flags & XE_VM_FLAG_LR_MODE)
2000 xe_pm_runtime_put(xe);
2001
2002 for_each_tile(tile, xe, id)
2003 XE_WARN_ON(vm->pt_root[id]);
2004
2005 trace_xe_vm_free(vm);
2006
2007 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
2008
2009 if (vm->xef)
2010 xe_file_put(vm->xef);
2011
2012 kfree(vm);
2013 }
2014
xe_vm_free(struct drm_gpuvm * gpuvm)2015 static void xe_vm_free(struct drm_gpuvm *gpuvm)
2016 {
2017 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
2018
2019 /* To destroy the VM we need to be able to sleep */
2020 queue_work(system_dfl_wq, &vm->destroy_work);
2021 }
2022
xe_vm_lookup(struct xe_file * xef,u32 id)2023 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
2024 {
2025 struct xe_vm *vm;
2026
2027 mutex_lock(&xef->vm.lock);
2028 vm = xa_load(&xef->vm.xa, id);
2029 if (vm)
2030 xe_vm_get(vm);
2031 mutex_unlock(&xef->vm.lock);
2032
2033 return vm;
2034 }
2035
xe_vm_pdp4_descriptor(struct xe_vm * vm,struct xe_tile * tile)2036 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
2037 {
2038 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0);
2039 }
2040
2041 static struct xe_exec_queue *
to_wait_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)2042 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
2043 {
2044 return q ? q : vm->q[0];
2045 }
2046
2047 static struct xe_user_fence *
find_ufence_get(struct xe_sync_entry * syncs,u32 num_syncs)2048 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
2049 {
2050 unsigned int i;
2051
2052 for (i = 0; i < num_syncs; i++) {
2053 struct xe_sync_entry *e = &syncs[i];
2054
2055 if (xe_sync_is_ufence(e))
2056 return xe_sync_ufence_get(e);
2057 }
2058
2059 return NULL;
2060 }
2061
2062 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
2063 DRM_XE_VM_CREATE_FLAG_LR_MODE | \
2064 DRM_XE_VM_CREATE_FLAG_FAULT_MODE | \
2065 DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT)
2066
xe_vm_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2067 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
2068 struct drm_file *file)
2069 {
2070 struct xe_device *xe = to_xe_device(dev);
2071 struct xe_file *xef = to_xe_file(file);
2072 struct drm_xe_vm_create *args = data;
2073 struct xe_gt *wa_gt = xe_root_mmio_gt(xe);
2074 struct xe_vm *vm;
2075 u32 id;
2076 int err;
2077 u32 flags = 0;
2078
2079 if (XE_IOCTL_DBG(xe, args->extensions))
2080 return -EINVAL;
2081
2082 if (wa_gt && XE_GT_WA(wa_gt, 22014953428))
2083 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
2084
2085 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
2086 !xe->info.has_usm))
2087 return -EINVAL;
2088
2089 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2090 return -EINVAL;
2091
2092 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
2093 return -EINVAL;
2094
2095 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
2096 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
2097 !xe->info.needs_scratch))
2098 return -EINVAL;
2099
2100 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
2101 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
2102 return -EINVAL;
2103
2104 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) &&
2105 args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT))
2106 return -EINVAL;
2107
2108 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
2109 flags |= XE_VM_FLAG_SCRATCH_PAGE;
2110 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
2111 flags |= XE_VM_FLAG_LR_MODE;
2112 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
2113 flags |= XE_VM_FLAG_FAULT_MODE;
2114 if (args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT)
2115 flags |= XE_VM_FLAG_NO_VM_OVERCOMMIT;
2116
2117 vm = xe_vm_create(xe, flags, xef);
2118 if (IS_ERR(vm))
2119 return PTR_ERR(vm);
2120
2121 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
2122 /* Warning: Security issue - never enable by default */
2123 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
2124 #endif
2125
2126 /* user id alloc must always be last in ioctl to prevent UAF */
2127 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
2128 if (err)
2129 goto err_close_and_put;
2130
2131 args->vm_id = id;
2132
2133 return 0;
2134
2135 err_close_and_put:
2136 xe_vm_close_and_put(vm);
2137
2138 return err;
2139 }
2140
xe_vm_destroy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2141 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
2142 struct drm_file *file)
2143 {
2144 struct xe_device *xe = to_xe_device(dev);
2145 struct xe_file *xef = to_xe_file(file);
2146 struct drm_xe_vm_destroy *args = data;
2147 struct xe_vm *vm;
2148 int err = 0;
2149
2150 if (XE_IOCTL_DBG(xe, args->pad) ||
2151 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2152 return -EINVAL;
2153
2154 mutex_lock(&xef->vm.lock);
2155 vm = xa_load(&xef->vm.xa, args->vm_id);
2156 if (XE_IOCTL_DBG(xe, !vm))
2157 err = -ENOENT;
2158 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
2159 err = -EBUSY;
2160 else
2161 xa_erase(&xef->vm.xa, args->vm_id);
2162 mutex_unlock(&xef->vm.lock);
2163
2164 if (!err)
2165 xe_vm_close_and_put(vm);
2166
2167 return err;
2168 }
2169
xe_vm_query_vmas(struct xe_vm * vm,u64 start,u64 end)2170 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end)
2171 {
2172 struct drm_gpuva *gpuva;
2173 u32 num_vmas = 0;
2174
2175 lockdep_assert_held(&vm->lock);
2176 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end)
2177 num_vmas++;
2178
2179 return num_vmas;
2180 }
2181
get_mem_attrs(struct xe_vm * vm,u32 * num_vmas,u64 start,u64 end,struct drm_xe_mem_range_attr * attrs)2182 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start,
2183 u64 end, struct drm_xe_mem_range_attr *attrs)
2184 {
2185 struct drm_gpuva *gpuva;
2186 int i = 0;
2187
2188 lockdep_assert_held(&vm->lock);
2189
2190 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
2191 struct xe_vma *vma = gpuva_to_vma(gpuva);
2192
2193 if (i == *num_vmas)
2194 return -ENOSPC;
2195
2196 attrs[i].start = xe_vma_start(vma);
2197 attrs[i].end = xe_vma_end(vma);
2198 attrs[i].atomic.val = vma->attr.atomic_access;
2199 attrs[i].pat_index.val = vma->attr.pat_index;
2200 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd;
2201 attrs[i].preferred_mem_loc.migration_policy =
2202 vma->attr.preferred_loc.migration_policy;
2203
2204 i++;
2205 }
2206
2207 *num_vmas = i;
2208 return 0;
2209 }
2210
xe_vm_query_vmas_attrs_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2211 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2212 {
2213 struct xe_device *xe = to_xe_device(dev);
2214 struct xe_file *xef = to_xe_file(file);
2215 struct drm_xe_mem_range_attr *mem_attrs;
2216 struct drm_xe_vm_query_mem_range_attr *args = data;
2217 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2218 struct xe_vm *vm;
2219 int err = 0;
2220
2221 if (XE_IOCTL_DBG(xe,
2222 ((args->num_mem_ranges == 0 &&
2223 (attrs_user || args->sizeof_mem_range_attr != 0)) ||
2224 (args->num_mem_ranges > 0 &&
2225 (!attrs_user ||
2226 args->sizeof_mem_range_attr !=
2227 sizeof(struct drm_xe_mem_range_attr))))))
2228 return -EINVAL;
2229
2230 vm = xe_vm_lookup(xef, args->vm_id);
2231 if (XE_IOCTL_DBG(xe, !vm))
2232 return -EINVAL;
2233
2234 err = down_read_interruptible(&vm->lock);
2235 if (err)
2236 goto put_vm;
2237
2238 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2239
2240 if (args->num_mem_ranges == 0 && !attrs_user) {
2241 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range);
2242 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr);
2243 goto unlock_vm;
2244 }
2245
2246 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr,
2247 GFP_KERNEL | __GFP_ACCOUNT |
2248 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
2249 if (!mem_attrs) {
2250 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM;
2251 goto unlock_vm;
2252 }
2253
2254 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr);
2255 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start,
2256 args->start + args->range, mem_attrs);
2257 if (err)
2258 goto free_mem_attrs;
2259
2260 err = copy_to_user(attrs_user, mem_attrs,
2261 args->sizeof_mem_range_attr * args->num_mem_ranges);
2262 if (err)
2263 err = -EFAULT;
2264
2265 free_mem_attrs:
2266 kvfree(mem_attrs);
2267 unlock_vm:
2268 up_read(&vm->lock);
2269 put_vm:
2270 xe_vm_put(vm);
2271 return err;
2272 }
2273
vma_matches(struct xe_vma * vma,u64 page_addr)2274 static bool vma_matches(struct xe_vma *vma, u64 page_addr)
2275 {
2276 if (page_addr > xe_vma_end(vma) - 1 ||
2277 page_addr + SZ_4K - 1 < xe_vma_start(vma))
2278 return false;
2279
2280 return true;
2281 }
2282
2283 /**
2284 * xe_vm_find_vma_by_addr() - Find a VMA by its address
2285 *
2286 * @vm: the xe_vm the vma belongs to
2287 * @page_addr: address to look up
2288 */
xe_vm_find_vma_by_addr(struct xe_vm * vm,u64 page_addr)2289 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr)
2290 {
2291 struct xe_vma *vma = NULL;
2292
2293 if (vm->usm.last_fault_vma) { /* Fast lookup */
2294 if (vma_matches(vm->usm.last_fault_vma, page_addr))
2295 vma = vm->usm.last_fault_vma;
2296 }
2297 if (!vma)
2298 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);
2299
2300 return vma;
2301 }
2302
2303 static const u32 region_to_mem_type[] = {
2304 XE_PL_TT,
2305 XE_PL_VRAM0,
2306 XE_PL_VRAM1,
2307 };
2308
prep_vma_destroy(struct xe_vm * vm,struct xe_vma * vma,bool post_commit)2309 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
2310 bool post_commit)
2311 {
2312 xe_svm_notifier_lock(vm);
2313 vma->gpuva.flags |= XE_VMA_DESTROYED;
2314 xe_svm_notifier_unlock(vm);
2315 if (post_commit)
2316 xe_vm_remove_vma(vm, vma);
2317 }
2318
2319 #undef ULL
2320 #define ULL unsigned long long
2321
2322 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2323 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2324 {
2325 struct xe_vma *vma;
2326
2327 switch (op->op) {
2328 case DRM_GPUVA_OP_MAP:
2329 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2330 (ULL)op->map.va.addr, (ULL)op->map.va.range);
2331 break;
2332 case DRM_GPUVA_OP_REMAP:
2333 vma = gpuva_to_vma(op->remap.unmap->va);
2334 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2335 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2336 op->remap.unmap->keep ? 1 : 0);
2337 if (op->remap.prev)
2338 vm_dbg(&xe->drm,
2339 "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2340 (ULL)op->remap.prev->va.addr,
2341 (ULL)op->remap.prev->va.range);
2342 if (op->remap.next)
2343 vm_dbg(&xe->drm,
2344 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2345 (ULL)op->remap.next->va.addr,
2346 (ULL)op->remap.next->va.range);
2347 break;
2348 case DRM_GPUVA_OP_UNMAP:
2349 vma = gpuva_to_vma(op->unmap.va);
2350 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2351 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2352 op->unmap.keep ? 1 : 0);
2353 break;
2354 case DRM_GPUVA_OP_PREFETCH:
2355 vma = gpuva_to_vma(op->prefetch.va);
2356 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2357 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2358 break;
2359 default:
2360 drm_warn(&xe->drm, "NOT POSSIBLE\n");
2361 }
2362 }
2363 #else
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2364 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2365 {
2366 }
2367 #endif
2368
__xe_vm_needs_clear_scratch_pages(struct xe_vm * vm,u32 bind_flags)2369 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
2370 {
2371 if (!xe_vm_in_fault_mode(vm))
2372 return false;
2373
2374 if (!xe_vm_has_scratch(vm))
2375 return false;
2376
2377 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE)
2378 return false;
2379
2380 return true;
2381 }
2382
xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops * ops)2383 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops)
2384 {
2385 struct drm_gpuva_op *__op;
2386
2387 drm_gpuva_for_each_op(__op, ops) {
2388 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2389
2390 xe_vma_svm_prefetch_op_fini(op);
2391 }
2392 }
2393
2394 /*
2395 * Create operations list from IOCTL arguments, setup operations fields so parse
2396 * and commit steps are decoupled from IOCTL arguments. This step can fail.
2397 */
2398 static struct drm_gpuva_ops *
vm_bind_ioctl_ops_create(struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 addr,u64 range,u32 operation,u32 flags,u32 prefetch_region,u16 pat_index)2399 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
2400 struct xe_bo *bo, u64 bo_offset_or_userptr,
2401 u64 addr, u64 range,
2402 u32 operation, u32 flags,
2403 u32 prefetch_region, u16 pat_index)
2404 {
2405 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2406 struct drm_gpuva_ops *ops;
2407 struct drm_gpuva_op *__op;
2408 struct drm_gpuvm_bo *vm_bo;
2409 u64 range_start = addr;
2410 u64 range_end = addr + range;
2411 int err;
2412
2413 lockdep_assert_held_write(&vm->lock);
2414
2415 vm_dbg(&vm->xe->drm,
2416 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2417 operation, (ULL)addr, (ULL)range,
2418 (ULL)bo_offset_or_userptr);
2419
2420 switch (operation) {
2421 case DRM_XE_VM_BIND_OP_MAP:
2422 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) {
2423 xe_vm_find_cpu_addr_mirror_vma_range(vm, &range_start, &range_end);
2424 vops->flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP;
2425 }
2426
2427 fallthrough;
2428 case DRM_XE_VM_BIND_OP_MAP_USERPTR: {
2429 struct drm_gpuvm_map_req map_req = {
2430 .map.va.addr = range_start,
2431 .map.va.range = range_end - range_start,
2432 .map.gem.obj = obj,
2433 .map.gem.offset = bo_offset_or_userptr,
2434 };
2435
2436 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req);
2437 break;
2438 }
2439 case DRM_XE_VM_BIND_OP_UNMAP:
2440 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2441 break;
2442 case DRM_XE_VM_BIND_OP_PREFETCH:
2443 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2444 break;
2445 case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2446 xe_assert(vm->xe, bo);
2447
2448 err = xe_bo_lock(bo, true);
2449 if (err)
2450 return ERR_PTR(err);
2451
2452 vm_bo = drm_gpuvm_bo_obtain_locked(&vm->gpuvm, obj);
2453 if (IS_ERR(vm_bo)) {
2454 xe_bo_unlock(bo);
2455 return ERR_CAST(vm_bo);
2456 }
2457
2458 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2459 drm_gpuvm_bo_put(vm_bo);
2460 xe_bo_unlock(bo);
2461 break;
2462 default:
2463 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
2464 ops = ERR_PTR(-EINVAL);
2465 }
2466 if (IS_ERR(ops))
2467 return ops;
2468
2469 drm_gpuva_for_each_op(__op, ops) {
2470 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2471
2472 if (__op->op == DRM_GPUVA_OP_MAP) {
2473 op->map.immediate =
2474 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2475 if (flags & DRM_XE_VM_BIND_FLAG_READONLY)
2476 op->map.vma_flags |= XE_VMA_READ_ONLY;
2477 if (flags & DRM_XE_VM_BIND_FLAG_NULL)
2478 op->map.vma_flags |= DRM_GPUVA_SPARSE;
2479 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR)
2480 op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR;
2481 if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE)
2482 op->map.vma_flags |= XE_VMA_DUMPABLE;
2483 if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET)
2484 op->map.vma_flags |= XE_VMA_MADV_AUTORESET;
2485 op->map.request_decompress = flags & DRM_XE_VM_BIND_FLAG_DECOMPRESS;
2486 op->map.pat_index = pat_index;
2487 op->map.invalidate_on_bind =
2488 __xe_vm_needs_clear_scratch_pages(vm, flags);
2489 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2490 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2491 struct xe_tile *tile;
2492 struct xe_svm_range *svm_range;
2493 struct drm_gpusvm_ctx ctx = {};
2494 struct drm_pagemap *dpagemap = NULL;
2495 u8 id, tile_mask = 0;
2496 u32 i;
2497
2498 if (!xe_vma_is_cpu_addr_mirror(vma)) {
2499 op->prefetch.region = prefetch_region;
2500 break;
2501 }
2502
2503 ctx.read_only = xe_vma_read_only(vma);
2504 ctx.devmem_possible = IS_DGFX(vm->xe) &&
2505 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2506
2507 for_each_tile(tile, vm->xe, id)
2508 tile_mask |= 0x1 << id;
2509
2510 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC);
2511 op->prefetch_range.ranges_count = 0;
2512
2513 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) {
2514 dpagemap = xe_vma_resolve_pagemap(vma,
2515 xe_device_get_root_tile(vm->xe));
2516 } else if (prefetch_region) {
2517 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] -
2518 XE_PL_VRAM0];
2519 dpagemap = xe_tile_local_pagemap(tile);
2520 }
2521
2522 op->prefetch_range.dpagemap = dpagemap;
2523 alloc_next_range:
2524 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
2525
2526 if (PTR_ERR(svm_range) == -ENOENT) {
2527 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma);
2528
2529 addr = ret == ULONG_MAX ? 0 : ret;
2530 if (addr)
2531 goto alloc_next_range;
2532 else
2533 goto print_op_label;
2534 }
2535
2536 if (IS_ERR(svm_range)) {
2537 err = PTR_ERR(svm_range);
2538 goto unwind_prefetch_ops;
2539 }
2540
2541 if (xe_svm_range_validate(vm, svm_range, tile_mask, dpagemap)) {
2542 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID");
2543 goto check_next_range;
2544 }
2545
2546 err = xa_alloc(&op->prefetch_range.range,
2547 &i, svm_range, xa_limit_32b,
2548 GFP_KERNEL);
2549
2550 if (err)
2551 goto unwind_prefetch_ops;
2552
2553 op->prefetch_range.ranges_count++;
2554 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH;
2555 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED");
2556 check_next_range:
2557 if (range_end > xe_svm_range_end(svm_range) &&
2558 xe_svm_range_end(svm_range) < xe_vma_end(vma)) {
2559 addr = xe_svm_range_end(svm_range);
2560 goto alloc_next_range;
2561 }
2562 }
2563 print_op_label:
2564 print_op(vm->xe, __op);
2565 }
2566
2567 return ops;
2568
2569 unwind_prefetch_ops:
2570 xe_svm_prefetch_gpuva_ops_fini(ops);
2571 drm_gpuva_ops_free(&vm->gpuvm, ops);
2572 return ERR_PTR(err);
2573 }
2574
2575 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
2576
new_vma(struct xe_vm * vm,struct drm_gpuva_op_map * op,struct xe_vma_mem_attr * attr,unsigned int flags)2577 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2578 struct xe_vma_mem_attr *attr, unsigned int flags)
2579 {
2580 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2581 struct xe_validation_ctx ctx;
2582 struct drm_exec exec;
2583 struct xe_vma *vma;
2584 int err = 0;
2585
2586 lockdep_assert_held_write(&vm->lock);
2587
2588 if (bo) {
2589 err = 0;
2590 xe_validation_guard(&ctx, &vm->xe->val, &exec,
2591 (struct xe_val_flags) {.interruptible = true}, err) {
2592 if (!bo->vm) {
2593 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2594 drm_exec_retry_on_contention(&exec);
2595 }
2596 if (!err) {
2597 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2598 drm_exec_retry_on_contention(&exec);
2599 }
2600 if (err)
2601 return ERR_PTR(err);
2602
2603 vma = xe_vma_create(vm, bo, op->gem.offset,
2604 op->va.addr, op->va.addr +
2605 op->va.range - 1, attr, flags);
2606 if (IS_ERR(vma))
2607 return vma;
2608
2609 if (!bo->vm) {
2610 err = add_preempt_fences(vm, bo);
2611 if (err) {
2612 prep_vma_destroy(vm, vma, false);
2613 xe_vma_destroy(vma, NULL);
2614 }
2615 }
2616 }
2617 if (err)
2618 return ERR_PTR(err);
2619 } else {
2620 vma = xe_vma_create(vm, NULL, op->gem.offset,
2621 op->va.addr, op->va.addr +
2622 op->va.range - 1, attr, flags);
2623 if (IS_ERR(vma))
2624 return vma;
2625
2626 if (xe_vma_is_userptr(vma)) {
2627 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2628 /*
2629 * -EBUSY has dedicated meaning that a user fence
2630 * attached to the VMA is busy, in practice
2631 * xe_vma_userptr_pin_pages can only fail with -EBUSY if
2632 * we are low on memory so convert this to -ENOMEM.
2633 */
2634 if (err == -EBUSY)
2635 err = -ENOMEM;
2636 }
2637 }
2638 if (err) {
2639 prep_vma_destroy(vm, vma, false);
2640 xe_vma_destroy_unlocked(vma);
2641 vma = ERR_PTR(err);
2642 }
2643
2644 return vma;
2645 }
2646
xe_vma_max_pte_size(struct xe_vma * vma)2647 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2648 {
2649 if (vma->gpuva.flags & XE_VMA_PTE_1G)
2650 return SZ_1G;
2651 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2652 return SZ_2M;
2653 else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2654 return SZ_64K;
2655 else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2656 return SZ_4K;
2657
2658 return SZ_1G; /* Uninitialized, used max size */
2659 }
2660
xe_vma_set_pte_size(struct xe_vma * vma,u64 size)2661 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2662 {
2663 switch (size) {
2664 case SZ_1G:
2665 vma->gpuva.flags |= XE_VMA_PTE_1G;
2666 break;
2667 case SZ_2M:
2668 vma->gpuva.flags |= XE_VMA_PTE_2M;
2669 break;
2670 case SZ_64K:
2671 vma->gpuva.flags |= XE_VMA_PTE_64K;
2672 break;
2673 case SZ_4K:
2674 vma->gpuva.flags |= XE_VMA_PTE_4K;
2675 break;
2676 }
2677 }
2678
xe_vma_op_commit(struct xe_vm * vm,struct xe_vma_op * op)2679 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2680 {
2681 int err = 0;
2682
2683 lockdep_assert_held_write(&vm->lock);
2684
2685 switch (op->base.op) {
2686 case DRM_GPUVA_OP_MAP:
2687 err |= xe_vm_insert_vma(vm, op->map.vma);
2688 if (!err)
2689 op->flags |= XE_VMA_OP_COMMITTED;
2690 break;
2691 case DRM_GPUVA_OP_REMAP:
2692 {
2693 u8 tile_present =
2694 gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2695
2696 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2697 true);
2698 op->flags |= XE_VMA_OP_COMMITTED;
2699
2700 if (op->remap.prev) {
2701 err |= xe_vm_insert_vma(vm, op->remap.prev);
2702 if (!err)
2703 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2704 if (!err && op->remap.skip_prev) {
2705 op->remap.prev->tile_present =
2706 tile_present;
2707 }
2708 }
2709 if (op->remap.next) {
2710 err |= xe_vm_insert_vma(vm, op->remap.next);
2711 if (!err)
2712 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2713 if (!err && op->remap.skip_next) {
2714 op->remap.next->tile_present =
2715 tile_present;
2716 }
2717 }
2718
2719 /*
2720 * Adjust for partial unbind after removing VMA from VM. In case
2721 * of unwind we might need to undo this later.
2722 */
2723 if (!err) {
2724 op->base.remap.unmap->va->va.addr = op->remap.start;
2725 op->base.remap.unmap->va->va.range = op->remap.range;
2726 }
2727 break;
2728 }
2729 case DRM_GPUVA_OP_UNMAP:
2730 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2731 op->flags |= XE_VMA_OP_COMMITTED;
2732 break;
2733 case DRM_GPUVA_OP_PREFETCH:
2734 op->flags |= XE_VMA_OP_COMMITTED;
2735 break;
2736 default:
2737 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
2738 }
2739
2740 return err;
2741 }
2742
2743 /**
2744 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes
2745 * @vma: Pointer to the xe_vma structure to check
2746 *
2747 * This function determines whether the given VMA (Virtual Memory Area)
2748 * has its memory attributes set to their default values. Specifically,
2749 * it checks the following conditions:
2750 *
2751 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED`
2752 * - `pat_index` is equal to `default_pat_index`
2753 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE`
2754 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES`
2755 *
2756 * Return: true if all attributes are at their default values, false otherwise.
2757 */
xe_vma_has_default_mem_attrs(struct xe_vma * vma)2758 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma)
2759 {
2760 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED &&
2761 vma->attr.pat_index == vma->attr.default_pat_index &&
2762 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE &&
2763 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES);
2764 }
2765
vm_bind_ioctl_ops_parse(struct xe_vm * vm,struct drm_gpuva_ops * ops,struct xe_vma_ops * vops)2766 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
2767 struct xe_vma_ops *vops)
2768 {
2769 struct xe_device *xe = vm->xe;
2770 struct drm_gpuva_op *__op;
2771 struct xe_tile *tile;
2772 u8 id, tile_mask = 0;
2773 int err = 0;
2774
2775 lockdep_assert_held_write(&vm->lock);
2776
2777 for_each_tile(tile, vm->xe, id)
2778 tile_mask |= 0x1 << id;
2779
2780 drm_gpuva_for_each_op(__op, ops) {
2781 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2782 struct xe_vma *vma;
2783 unsigned int flags = 0;
2784
2785 INIT_LIST_HEAD(&op->link);
2786 list_add_tail(&op->link, &vops->list);
2787 op->tile_mask = tile_mask;
2788
2789 switch (op->base.op) {
2790 case DRM_GPUVA_OP_MAP:
2791 {
2792 struct xe_vma_mem_attr default_attr = {
2793 .preferred_loc = {
2794 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE,
2795 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES,
2796 },
2797 .atomic_access = DRM_XE_ATOMIC_UNDEFINED,
2798 .default_pat_index = op->map.pat_index,
2799 .pat_index = op->map.pat_index,
2800 .purgeable_state = XE_MADV_PURGEABLE_WILLNEED,
2801 };
2802
2803 flags |= op->map.vma_flags & XE_VMA_CREATE_MASK;
2804
2805 vma = new_vma(vm, &op->base.map, &default_attr,
2806 flags);
2807 if (IS_ERR(vma))
2808 return PTR_ERR(vma);
2809
2810 op->map.vma = vma;
2811 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) &&
2812 !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) ||
2813 op->map.invalidate_on_bind)
2814 xe_vma_ops_incr_pt_update_ops(vops,
2815 op->tile_mask, 1);
2816 break;
2817 }
2818 case DRM_GPUVA_OP_REMAP:
2819 {
2820 struct xe_vma *old =
2821 gpuva_to_vma(op->base.remap.unmap->va);
2822 bool skip = xe_vma_is_cpu_addr_mirror(old);
2823 u64 start = xe_vma_start(old), end = xe_vma_end(old);
2824 int num_remap_ops = 0;
2825
2826 if (op->base.remap.prev)
2827 start = op->base.remap.prev->va.addr +
2828 op->base.remap.prev->va.range;
2829 if (op->base.remap.next)
2830 end = op->base.remap.next->va.addr;
2831
2832 if (xe_vma_is_cpu_addr_mirror(old) &&
2833 xe_svm_has_mapping(vm, start, end)) {
2834 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE)
2835 xe_svm_unmap_address_range(vm, start, end);
2836 else
2837 return -EBUSY;
2838 }
2839
2840 op->remap.start = xe_vma_start(old);
2841 op->remap.range = xe_vma_size(old);
2842 op->remap.old_start = op->remap.start;
2843 op->remap.old_range = op->remap.range;
2844
2845 flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK;
2846 if (op->base.remap.prev) {
2847 vma = new_vma(vm, op->base.remap.prev,
2848 &old->attr, flags);
2849 if (IS_ERR(vma))
2850 return PTR_ERR(vma);
2851
2852 op->remap.prev = vma;
2853
2854 /*
2855 * Userptr creates a new SG mapping so
2856 * we must also rebind.
2857 */
2858 op->remap.skip_prev = skip ||
2859 (!xe_vma_is_userptr(old) &&
2860 IS_ALIGNED(xe_vma_end(vma),
2861 xe_vma_max_pte_size(old)));
2862 if (op->remap.skip_prev) {
2863 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2864 op->remap.range -=
2865 xe_vma_end(vma) -
2866 xe_vma_start(old);
2867 op->remap.start = xe_vma_end(vma);
2868 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2869 (ULL)op->remap.start,
2870 (ULL)op->remap.range);
2871 } else {
2872 num_remap_ops++;
2873 }
2874 }
2875
2876 if (op->base.remap.next) {
2877 vma = new_vma(vm, op->base.remap.next,
2878 &old->attr, flags);
2879 if (IS_ERR(vma))
2880 return PTR_ERR(vma);
2881
2882 op->remap.next = vma;
2883
2884 /*
2885 * Userptr creates a new SG mapping so
2886 * we must also rebind.
2887 */
2888 op->remap.skip_next = skip ||
2889 (!xe_vma_is_userptr(old) &&
2890 IS_ALIGNED(xe_vma_start(vma),
2891 xe_vma_max_pte_size(old)));
2892 if (op->remap.skip_next) {
2893 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2894 op->remap.range -=
2895 xe_vma_end(old) -
2896 xe_vma_start(vma);
2897 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2898 (ULL)op->remap.start,
2899 (ULL)op->remap.range);
2900 } else {
2901 num_remap_ops++;
2902 }
2903 }
2904 if (!skip)
2905 num_remap_ops++;
2906
2907 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops);
2908 break;
2909 }
2910 case DRM_GPUVA_OP_UNMAP:
2911 vma = gpuva_to_vma(op->base.unmap.va);
2912
2913 if (xe_vma_is_cpu_addr_mirror(vma) &&
2914 xe_svm_has_mapping(vm, xe_vma_start(vma),
2915 xe_vma_end(vma)) &&
2916 !(vops->flags & XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP))
2917 return -EBUSY;
2918
2919 if (!xe_vma_is_cpu_addr_mirror(vma))
2920 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2921 break;
2922 case DRM_GPUVA_OP_PREFETCH:
2923 vma = gpuva_to_vma(op->base.prefetch.va);
2924
2925 if (xe_vma_is_userptr(vma)) {
2926 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2927 if (err)
2928 return err;
2929 }
2930
2931 if (xe_vma_is_cpu_addr_mirror(vma))
2932 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask,
2933 op->prefetch_range.ranges_count);
2934 else
2935 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2936
2937 break;
2938 default:
2939 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
2940 }
2941
2942 err = xe_vma_op_commit(vm, op);
2943 if (err)
2944 return err;
2945 }
2946
2947 return 0;
2948 }
2949
xe_vma_op_unwind(struct xe_vm * vm,struct xe_vma_op * op,bool post_commit,bool prev_post_commit,bool next_post_commit)2950 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2951 bool post_commit, bool prev_post_commit,
2952 bool next_post_commit)
2953 {
2954 lockdep_assert_held_write(&vm->lock);
2955
2956 switch (op->base.op) {
2957 case DRM_GPUVA_OP_MAP:
2958 if (op->map.vma) {
2959 prep_vma_destroy(vm, op->map.vma, post_commit);
2960 xe_vma_destroy_unlocked(op->map.vma);
2961 }
2962 break;
2963 case DRM_GPUVA_OP_UNMAP:
2964 {
2965 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2966
2967 if (vma) {
2968 xe_svm_notifier_lock(vm);
2969 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2970 xe_svm_notifier_unlock(vm);
2971 if (post_commit)
2972 xe_vm_insert_vma(vm, vma);
2973 }
2974 break;
2975 }
2976 case DRM_GPUVA_OP_REMAP:
2977 {
2978 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2979
2980 if (op->remap.prev) {
2981 prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2982 xe_vma_destroy_unlocked(op->remap.prev);
2983 }
2984 if (op->remap.next) {
2985 prep_vma_destroy(vm, op->remap.next, next_post_commit);
2986 xe_vma_destroy_unlocked(op->remap.next);
2987 }
2988 if (vma) {
2989 xe_svm_notifier_lock(vm);
2990 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2991 xe_svm_notifier_unlock(vm);
2992 if (post_commit) {
2993 /*
2994 * Restore the old va range, in case of the
2995 * prev/next skip optimisation. Otherwise what
2996 * we re-insert here could be smaller than the
2997 * original range.
2998 */
2999 op->base.remap.unmap->va->va.addr =
3000 op->remap.old_start;
3001 op->base.remap.unmap->va->va.range =
3002 op->remap.old_range;
3003 xe_vm_insert_vma(vm, vma);
3004 }
3005 }
3006 break;
3007 }
3008 case DRM_GPUVA_OP_PREFETCH:
3009 /* Nothing to do */
3010 break;
3011 default:
3012 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
3013 }
3014 }
3015
vm_bind_ioctl_ops_unwind(struct xe_vm * vm,struct drm_gpuva_ops ** ops,int num_ops_list)3016 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
3017 struct drm_gpuva_ops **ops,
3018 int num_ops_list)
3019 {
3020 int i;
3021
3022 for (i = num_ops_list - 1; i >= 0; --i) {
3023 struct drm_gpuva_ops *__ops = ops[i];
3024 struct drm_gpuva_op *__op;
3025
3026 if (!__ops)
3027 continue;
3028
3029 drm_gpuva_for_each_op_reverse(__op, __ops) {
3030 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
3031
3032 xe_vma_op_unwind(vm, op,
3033 op->flags & XE_VMA_OP_COMMITTED,
3034 op->flags & XE_VMA_OP_PREV_COMMITTED,
3035 op->flags & XE_VMA_OP_NEXT_COMMITTED);
3036 }
3037 }
3038 }
3039
3040 /**
3041 * struct xe_vma_lock_and_validate_flags - Flags for vma_lock_and_validate()
3042 * @res_evict: Allow evicting resources during validation
3043 * @validate: Perform BO validation
3044 * @request_decompress: Request BO decompression
3045 * @check_purged: Reject operation if BO is DONTNEED or PURGED
3046 */
3047 struct xe_vma_lock_and_validate_flags {
3048 u32 res_evict : 1;
3049 u32 validate : 1;
3050 u32 request_decompress : 1;
3051 u32 check_purged : 1;
3052 };
3053
vma_lock_and_validate(struct drm_exec * exec,struct xe_vma * vma,struct xe_vma_lock_and_validate_flags flags)3054 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
3055 struct xe_vma_lock_and_validate_flags flags)
3056 {
3057 struct xe_bo *bo = xe_vma_bo(vma);
3058 struct xe_vm *vm = xe_vma_vm(vma);
3059 bool validate_bo = flags.validate;
3060 int err = 0;
3061
3062 if (bo) {
3063 if (!bo->vm)
3064 err = drm_exec_lock_obj(exec, &bo->ttm.base);
3065
3066 /* Reject new mappings to DONTNEED/purged BOs; allow cleanup operations */
3067 if (!err && flags.check_purged) {
3068 if (xe_bo_madv_is_dontneed(bo))
3069 err = -EBUSY; /* BO marked purgeable */
3070 else if (xe_bo_is_purged(bo))
3071 err = -EINVAL; /* BO already purged */
3072 }
3073
3074 /* Don't validate the BO for DONTNEED/PURGED remap remnants. */
3075 if (vma->attr.purgeable_state != XE_MADV_PURGEABLE_WILLNEED)
3076 validate_bo = false;
3077
3078 if (!err && validate_bo)
3079 err = xe_bo_validate(bo, vm,
3080 xe_vm_allow_vm_eviction(vm) &&
3081 flags.res_evict, exec);
3082
3083 if (err)
3084 return err;
3085
3086 if (flags.request_decompress)
3087 err = xe_bo_decompress(bo);
3088 }
3089
3090 return err;
3091 }
3092
check_ufence(struct xe_vma * vma)3093 static int check_ufence(struct xe_vma *vma)
3094 {
3095 if (vma->ufence) {
3096 struct xe_user_fence * const f = vma->ufence;
3097
3098 if (!xe_sync_ufence_get_status(f))
3099 return -EBUSY;
3100
3101 vma->ufence = NULL;
3102 xe_sync_ufence_put(f);
3103 }
3104
3105 return 0;
3106 }
3107
prefetch_ranges(struct xe_vm * vm,struct xe_vma_op * op)3108 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
3109 {
3110 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
3111 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
3112 struct drm_pagemap *dpagemap = op->prefetch_range.dpagemap;
3113 int err = 0;
3114
3115 struct xe_svm_range *svm_range;
3116 struct drm_gpusvm_ctx ctx = {};
3117 unsigned long i;
3118
3119 if (!xe_vma_is_cpu_addr_mirror(vma))
3120 return 0;
3121
3122 ctx.read_only = xe_vma_read_only(vma);
3123 ctx.devmem_possible = devmem_possible;
3124 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
3125 ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !dpagemap);
3126
3127 /* TODO: Threading the migration */
3128 xa_for_each(&op->prefetch_range.range, i, svm_range) {
3129 if (!dpagemap)
3130 xe_svm_range_migrate_to_smem(vm, svm_range);
3131
3132 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) {
3133 drm_dbg(&vm->xe->drm,
3134 "Prefetch pagemap is %s start 0x%016lx end 0x%016lx\n",
3135 dpagemap ? dpagemap->drm->unique : "system",
3136 xe_svm_range_start(svm_range), xe_svm_range_end(svm_range));
3137 }
3138
3139 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, dpagemap)) {
3140 err = xe_svm_alloc_vram(svm_range, &ctx, dpagemap);
3141 if (err) {
3142 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
3143 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
3144 return -ENODATA;
3145 }
3146 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM");
3147 }
3148
3149 err = xe_svm_range_get_pages(vm, svm_range, &ctx);
3150 if (err) {
3151 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
3152 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
3153 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
3154 err = -ENODATA;
3155 return err;
3156 }
3157 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE");
3158 }
3159
3160 return err;
3161 }
3162
op_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_vma_op * op)3163 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
3164 struct xe_vma_ops *vops, struct xe_vma_op *op)
3165 {
3166 int err = 0;
3167 bool res_evict;
3168
3169 /*
3170 * We only allow evicting a BO within the VM if it is not part of an
3171 * array of binds, as an array of binds can evict another BO within the
3172 * bind.
3173 */
3174 res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS);
3175
3176 switch (op->base.op) {
3177 case DRM_GPUVA_OP_MAP:
3178 if (!op->map.invalidate_on_bind)
3179 err = vma_lock_and_validate(exec, op->map.vma,
3180 (struct xe_vma_lock_and_validate_flags) {
3181 .res_evict = res_evict,
3182 .validate = !xe_vm_in_fault_mode(vm) ||
3183 op->map.immediate,
3184 .request_decompress =
3185 op->map.request_decompress,
3186 .check_purged = false,
3187 });
3188 break;
3189 case DRM_GPUVA_OP_REMAP:
3190 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
3191 if (err)
3192 break;
3193
3194 err = vma_lock_and_validate(exec,
3195 gpuva_to_vma(op->base.remap.unmap->va),
3196 (struct xe_vma_lock_and_validate_flags) {
3197 .res_evict = res_evict,
3198 .validate = false,
3199 .request_decompress = false,
3200 .check_purged = false,
3201 });
3202 if (!err && op->remap.prev)
3203 err = vma_lock_and_validate(exec, op->remap.prev,
3204 (struct xe_vma_lock_and_validate_flags) {
3205 .res_evict = res_evict,
3206 .validate = true,
3207 .request_decompress = false,
3208 .check_purged = false,
3209 });
3210 if (!err && op->remap.next)
3211 err = vma_lock_and_validate(exec, op->remap.next,
3212 (struct xe_vma_lock_and_validate_flags) {
3213 .res_evict = res_evict,
3214 .validate = true,
3215 .request_decompress = false,
3216 .check_purged = false,
3217 });
3218 break;
3219 case DRM_GPUVA_OP_UNMAP:
3220 err = check_ufence(gpuva_to_vma(op->base.unmap.va));
3221 if (err)
3222 break;
3223
3224 err = vma_lock_and_validate(exec,
3225 gpuva_to_vma(op->base.unmap.va),
3226 (struct xe_vma_lock_and_validate_flags) {
3227 .res_evict = res_evict,
3228 .validate = false,
3229 .request_decompress = false,
3230 .check_purged = false,
3231 });
3232 break;
3233 case DRM_GPUVA_OP_PREFETCH:
3234 {
3235 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
3236 u32 region;
3237
3238 if (!xe_vma_is_cpu_addr_mirror(vma)) {
3239 region = op->prefetch.region;
3240 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC ||
3241 region <= ARRAY_SIZE(region_to_mem_type));
3242 }
3243
3244 /*
3245 * PREFETCH is the only op that still gates on BO purge state.
3246 * MAP/REMAP handle this inside xe_vma_create() so partial
3247 * unbind on a DONTNEED BO still works. PREFETCH skips
3248 * xe_vma_create() and would migrate a BO with no backing
3249 * store, so reject DONTNEED/PURGED here.
3250 */
3251 err = vma_lock_and_validate(exec,
3252 gpuva_to_vma(op->base.prefetch.va),
3253 (struct xe_vma_lock_and_validate_flags) {
3254 .res_evict = res_evict,
3255 .validate = false,
3256 .request_decompress = false,
3257 .check_purged = true,
3258 });
3259 if (!err && !xe_vma_has_no_bo(vma))
3260 err = xe_bo_migrate(xe_vma_bo(vma),
3261 region_to_mem_type[region],
3262 NULL,
3263 exec);
3264 break;
3265 }
3266 default:
3267 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
3268 }
3269
3270 return err;
3271 }
3272
vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm * vm,struct xe_vma_ops * vops)3273 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops)
3274 {
3275 struct xe_vma_op *op;
3276 int err;
3277
3278 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
3279 return 0;
3280
3281 list_for_each_entry(op, &vops->list, link) {
3282 if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
3283 err = prefetch_ranges(vm, op);
3284 if (err)
3285 return err;
3286 }
3287 }
3288
3289 return 0;
3290 }
3291
vm_bind_ioctl_ops_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops)3292 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
3293 struct xe_vm *vm,
3294 struct xe_vma_ops *vops)
3295 {
3296 struct xe_vma_op *op;
3297 int err;
3298
3299 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
3300 if (err)
3301 return err;
3302
3303 list_for_each_entry(op, &vops->list, link) {
3304 err = op_lock_and_prep(exec, vm, vops, op);
3305 if (err)
3306 return err;
3307 }
3308
3309 #ifdef TEST_VM_OPS_ERROR
3310 if (vops->inject_error &&
3311 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
3312 return -ENOSPC;
3313 #endif
3314
3315 return 0;
3316 }
3317
op_trace(struct xe_vma_op * op)3318 static void op_trace(struct xe_vma_op *op)
3319 {
3320 switch (op->base.op) {
3321 case DRM_GPUVA_OP_MAP:
3322 trace_xe_vma_bind(op->map.vma);
3323 break;
3324 case DRM_GPUVA_OP_REMAP:
3325 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
3326 if (op->remap.prev)
3327 trace_xe_vma_bind(op->remap.prev);
3328 if (op->remap.next)
3329 trace_xe_vma_bind(op->remap.next);
3330 break;
3331 case DRM_GPUVA_OP_UNMAP:
3332 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
3333 break;
3334 case DRM_GPUVA_OP_PREFETCH:
3335 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
3336 break;
3337 case DRM_GPUVA_OP_DRIVER:
3338 break;
3339 default:
3340 XE_WARN_ON("NOT POSSIBLE");
3341 }
3342 }
3343
trace_xe_vm_ops_execute(struct xe_vma_ops * vops)3344 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
3345 {
3346 struct xe_vma_op *op;
3347
3348 list_for_each_entry(op, &vops->list, link)
3349 op_trace(op);
3350 }
3351
vm_ops_setup_tile_args(struct xe_vm * vm,struct xe_vma_ops * vops)3352 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
3353 {
3354 struct xe_exec_queue *q = vops->q;
3355 struct xe_tile *tile;
3356 int number_tiles = 0;
3357 u8 id;
3358
3359 for_each_tile(tile, vm->xe, id) {
3360 if (vops->pt_update_ops[id].num_ops)
3361 ++number_tiles;
3362
3363 if (vops->pt_update_ops[id].q)
3364 continue;
3365
3366 if (q) {
3367 vops->pt_update_ops[id].q = q;
3368 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
3369 q = list_next_entry(q, multi_gt_list);
3370 } else {
3371 vops->pt_update_ops[id].q = vm->q[id];
3372 }
3373 }
3374
3375 return number_tiles;
3376 }
3377
ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3378 static struct dma_fence *ops_execute(struct xe_vm *vm,
3379 struct xe_vma_ops *vops)
3380 {
3381 struct xe_tile *tile;
3382 struct dma_fence *fence = NULL;
3383 struct dma_fence **fences = NULL;
3384 struct dma_fence_array *cf = NULL;
3385 int number_tiles = 0, current_fence = 0, n_fence = 0, err, i;
3386 u8 id;
3387
3388 number_tiles = vm_ops_setup_tile_args(vm, vops);
3389 if (number_tiles == 0)
3390 return ERR_PTR(-ENODATA);
3391
3392 for_each_tile(tile, vm->xe, id) {
3393 ++n_fence;
3394
3395 if (!(vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT))
3396 for_each_tlb_inval(i)
3397 ++n_fence;
3398 }
3399
3400 fences = kmalloc_objs(*fences, n_fence);
3401 if (!fences) {
3402 fence = ERR_PTR(-ENOMEM);
3403 goto err_trace;
3404 }
3405
3406 cf = dma_fence_array_alloc(n_fence);
3407 if (!cf) {
3408 fence = ERR_PTR(-ENOMEM);
3409 goto err_out;
3410 }
3411
3412 for_each_tile(tile, vm->xe, id) {
3413 if (!vops->pt_update_ops[id].num_ops)
3414 continue;
3415
3416 err = xe_pt_update_ops_prepare(tile, vops);
3417 if (err) {
3418 fence = ERR_PTR(err);
3419 goto err_out;
3420 }
3421 }
3422
3423 trace_xe_vm_ops_execute(vops);
3424
3425 for_each_tile(tile, vm->xe, id) {
3426 struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q;
3427
3428 fence = NULL;
3429 if (!vops->pt_update_ops[id].num_ops)
3430 goto collect_fences;
3431
3432 fence = xe_pt_update_ops_run(tile, vops);
3433 if (IS_ERR(fence))
3434 goto err_out;
3435
3436 collect_fences:
3437 fences[current_fence++] = fence ?: dma_fence_get_stub();
3438 if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT)
3439 continue;
3440
3441 xe_migrate_job_lock(tile->migrate, q);
3442 for_each_tlb_inval(i)
3443 fences[current_fence++] =
3444 xe_exec_queue_tlb_inval_last_fence_get(q, vm, i);
3445 xe_migrate_job_unlock(tile->migrate, q);
3446 }
3447
3448 xe_assert(vm->xe, current_fence == n_fence);
3449 dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1),
3450 1, false);
3451 fence = &cf->base;
3452
3453 for_each_tile(tile, vm->xe, id) {
3454 if (!vops->pt_update_ops[id].num_ops)
3455 continue;
3456
3457 xe_pt_update_ops_fini(tile, vops);
3458 }
3459
3460 return fence;
3461
3462 err_out:
3463 for_each_tile(tile, vm->xe, id) {
3464 if (!vops->pt_update_ops[id].num_ops)
3465 continue;
3466
3467 xe_pt_update_ops_abort(tile, vops);
3468 }
3469 while (current_fence)
3470 dma_fence_put(fences[--current_fence]);
3471 kfree(fences);
3472 kfree(cf);
3473
3474 err_trace:
3475 trace_xe_vm_ops_fail(vm);
3476 return fence;
3477 }
3478
vma_add_ufence(struct xe_vma * vma,struct xe_user_fence * ufence)3479 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
3480 {
3481 if (vma->ufence)
3482 xe_sync_ufence_put(vma->ufence);
3483 vma->ufence = __xe_sync_ufence_get(ufence);
3484 }
3485
op_add_ufence(struct xe_vm * vm,struct xe_vma_op * op,struct xe_user_fence * ufence)3486 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
3487 struct xe_user_fence *ufence)
3488 {
3489 switch (op->base.op) {
3490 case DRM_GPUVA_OP_MAP:
3491 if (!xe_vma_is_cpu_addr_mirror(op->map.vma))
3492 vma_add_ufence(op->map.vma, ufence);
3493 break;
3494 case DRM_GPUVA_OP_REMAP:
3495 if (op->remap.prev)
3496 vma_add_ufence(op->remap.prev, ufence);
3497 if (op->remap.next)
3498 vma_add_ufence(op->remap.next, ufence);
3499 break;
3500 case DRM_GPUVA_OP_UNMAP:
3501 break;
3502 case DRM_GPUVA_OP_PREFETCH:
3503 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
3504 break;
3505 default:
3506 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
3507 }
3508 }
3509
vm_bind_ioctl_ops_fini(struct xe_vm * vm,struct xe_vma_ops * vops,struct dma_fence * fence)3510 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
3511 struct dma_fence *fence)
3512 {
3513 struct xe_user_fence *ufence;
3514 struct xe_vma_op *op;
3515 int i;
3516
3517 ufence = find_ufence_get(vops->syncs, vops->num_syncs);
3518 list_for_each_entry(op, &vops->list, link) {
3519 if (ufence)
3520 op_add_ufence(vm, op, ufence);
3521
3522 if (op->base.op == DRM_GPUVA_OP_UNMAP)
3523 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
3524 else if (op->base.op == DRM_GPUVA_OP_REMAP)
3525 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
3526 fence);
3527 }
3528 if (ufence)
3529 xe_sync_ufence_put(ufence);
3530 if (fence) {
3531 for (i = 0; i < vops->num_syncs; i++)
3532 xe_sync_entry_signal(vops->syncs + i, fence);
3533 }
3534 }
3535
vm_bind_ioctl_ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3536 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
3537 struct xe_vma_ops *vops)
3538 {
3539 struct xe_validation_ctx ctx;
3540 struct drm_exec exec;
3541 struct dma_fence *fence;
3542 int err = 0;
3543
3544 lockdep_assert_held_write(&vm->lock);
3545
3546 xe_validation_guard(&ctx, &vm->xe->val, &exec,
3547 ((struct xe_val_flags) {
3548 .interruptible = true,
3549 .exec_ignore_duplicates = true,
3550 }), err) {
3551 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
3552 drm_exec_retry_on_contention(&exec);
3553 xe_validation_retry_on_oom(&ctx, &err);
3554 if (err)
3555 return ERR_PTR(err);
3556
3557 xe_vm_set_validation_exec(vm, &exec);
3558 fence = ops_execute(vm, vops);
3559 xe_vm_set_validation_exec(vm, NULL);
3560 if (IS_ERR(fence)) {
3561 if (PTR_ERR(fence) == -ENODATA)
3562 vm_bind_ioctl_ops_fini(vm, vops, NULL);
3563 return fence;
3564 }
3565
3566 vm_bind_ioctl_ops_fini(vm, vops, fence);
3567 }
3568
3569 return err ? ERR_PTR(err) : fence;
3570 }
3571 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
3572
3573 #define SUPPORTED_FLAGS_STUB \
3574 (DRM_XE_VM_BIND_FLAG_READONLY | \
3575 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
3576 DRM_XE_VM_BIND_FLAG_NULL | \
3577 DRM_XE_VM_BIND_FLAG_DUMPABLE | \
3578 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \
3579 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \
3580 DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET | \
3581 DRM_XE_VM_BIND_FLAG_DECOMPRESS)
3582
3583 #ifdef TEST_VM_OPS_ERROR
3584 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
3585 #else
3586 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB
3587 #endif
3588
3589 #define XE_64K_PAGE_MASK 0xffffull
3590 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
3591
vm_bind_ioctl_check_args(struct xe_device * xe,struct xe_vm * vm,struct drm_xe_vm_bind * args,struct drm_xe_vm_bind_op ** bind_ops)3592 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
3593 struct drm_xe_vm_bind *args,
3594 struct drm_xe_vm_bind_op **bind_ops)
3595 {
3596 int err;
3597 int i;
3598
3599 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
3600 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3601 return -EINVAL;
3602
3603 if (XE_IOCTL_DBG(xe, args->extensions))
3604 return -EINVAL;
3605
3606 if (XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS))
3607 return -EINVAL;
3608
3609 if (args->num_binds > 1) {
3610 u64 __user *bind_user =
3611 u64_to_user_ptr(args->vector_of_binds);
3612
3613 *bind_ops = kvmalloc_objs(struct drm_xe_vm_bind_op,
3614 args->num_binds,
3615 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3616 if (!*bind_ops)
3617 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
3618
3619 err = copy_from_user(*bind_ops, bind_user,
3620 sizeof(struct drm_xe_vm_bind_op) *
3621 args->num_binds);
3622 if (XE_IOCTL_DBG(xe, err)) {
3623 err = -EFAULT;
3624 goto free_bind_ops;
3625 }
3626 } else {
3627 *bind_ops = &args->bind;
3628 }
3629
3630 for (i = 0; i < args->num_binds; ++i) {
3631 u64 range = (*bind_ops)[i].range;
3632 u64 addr = (*bind_ops)[i].addr;
3633 u32 op = (*bind_ops)[i].op;
3634 u32 flags = (*bind_ops)[i].flags;
3635 u32 obj = (*bind_ops)[i].obj;
3636 u64 obj_offset = (*bind_ops)[i].obj_offset;
3637 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
3638 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
3639 bool is_cpu_addr_mirror = flags &
3640 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
3641 bool is_decompress = flags & DRM_XE_VM_BIND_FLAG_DECOMPRESS;
3642 u16 pat_index = (*bind_ops)[i].pat_index;
3643 u16 coh_mode;
3644 bool comp_en;
3645
3646 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror &&
3647 (!xe_vm_in_fault_mode(vm) ||
3648 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) {
3649 err = -EINVAL;
3650 goto free_bind_ops;
3651 }
3652
3653 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
3654 err = -EINVAL;
3655 goto free_bind_ops;
3656 }
3657
3658 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
3659 (*bind_ops)[i].pat_index = pat_index;
3660 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3661 comp_en = xe_pat_index_get_comp_en(xe, pat_index);
3662 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
3663 err = -EINVAL;
3664 goto free_bind_ops;
3665 }
3666
3667 if (XE_WARN_ON(coh_mode > XE_COH_2WAY)) {
3668 err = -EINVAL;
3669 goto free_bind_ops;
3670 }
3671
3672 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
3673 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
3674 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) ||
3675 XE_IOCTL_DBG(xe, obj_offset && (is_null ||
3676 is_cpu_addr_mirror)) ||
3677 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
3678 (is_decompress || is_null || is_cpu_addr_mirror)) ||
3679 XE_IOCTL_DBG(xe, is_decompress &&
3680 xe_pat_index_get_comp_en(xe, pat_index)) ||
3681 XE_IOCTL_DBG(xe, !obj &&
3682 op == DRM_XE_VM_BIND_OP_MAP &&
3683 !is_null && !is_cpu_addr_mirror) ||
3684 XE_IOCTL_DBG(xe, !obj &&
3685 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3686 XE_IOCTL_DBG(xe, addr &&
3687 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3688 XE_IOCTL_DBG(xe, range &&
3689 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3690 XE_IOCTL_DBG(xe, obj &&
3691 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3692 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3693 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3694 XE_IOCTL_DBG(xe, !IS_DGFX(xe) && coh_mode == XE_COH_NONE &&
3695 is_cpu_addr_mirror) ||
3696 XE_IOCTL_DBG(xe, xe_device_is_l2_flush_optimized(xe) &&
3697 (op == DRM_XE_VM_BIND_OP_MAP_USERPTR ||
3698 is_cpu_addr_mirror) &&
3699 (pat_index != 19 && coh_mode != XE_COH_2WAY)) ||
3700 XE_IOCTL_DBG(xe, comp_en &&
3701 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3702 XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR &&
3703 !IS_ENABLED(CONFIG_DRM_GPUSVM)) ||
3704 XE_IOCTL_DBG(xe, obj &&
3705 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
3706 XE_IOCTL_DBG(xe, prefetch_region &&
3707 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
3708 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC &&
3709 /* Guard against undefined shift in BIT(prefetch_region) */
3710 (prefetch_region >= (sizeof(xe->info.mem_region_mask) * 8) ||
3711 !(BIT(prefetch_region) & xe->info.mem_region_mask)))) ||
3712 XE_IOCTL_DBG(xe, obj &&
3713 op == DRM_XE_VM_BIND_OP_UNMAP) ||
3714 XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) &&
3715 (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) {
3716 err = -EINVAL;
3717 goto free_bind_ops;
3718 }
3719
3720 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
3721 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
3722 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
3723 XE_IOCTL_DBG(xe, !range &&
3724 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
3725 err = -EINVAL;
3726 goto free_bind_ops;
3727 }
3728
3729 if (is_decompress && (XE_IOCTL_DBG(xe, !xe_device_has_flat_ccs(xe)) ||
3730 XE_IOCTL_DBG(xe, GRAPHICS_VER(xe) < 20) ||
3731 XE_IOCTL_DBG(xe, !IS_DGFX(xe)))) {
3732 err = -EOPNOTSUPP;
3733 goto free_bind_ops;
3734 }
3735 }
3736
3737 return 0;
3738
3739 free_bind_ops:
3740 if (args->num_binds > 1)
3741 kvfree(*bind_ops);
3742 *bind_ops = NULL;
3743 return err;
3744 }
3745
vm_bind_ioctl_signal_fences(struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,int num_syncs)3746 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
3747 struct xe_exec_queue *q,
3748 struct xe_sync_entry *syncs,
3749 int num_syncs)
3750 {
3751 struct dma_fence *fence = NULL;
3752 int i, err = 0;
3753
3754 if (num_syncs) {
3755 fence = xe_sync_in_fence_get(syncs, num_syncs,
3756 to_wait_exec_queue(vm, q), vm);
3757 if (IS_ERR(fence))
3758 return PTR_ERR(fence);
3759
3760 for (i = 0; i < num_syncs; i++)
3761 xe_sync_entry_signal(&syncs[i], fence);
3762 }
3763
3764 dma_fence_put(fence);
3765
3766 return err;
3767 }
3768
xe_vma_ops_init(struct xe_vma_ops * vops,struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,u32 num_syncs)3769 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
3770 struct xe_exec_queue *q,
3771 struct xe_sync_entry *syncs, u32 num_syncs)
3772 {
3773 memset(vops, 0, sizeof(*vops));
3774 INIT_LIST_HEAD(&vops->list);
3775 vops->vm = vm;
3776 vops->q = q;
3777 vops->syncs = syncs;
3778 vops->num_syncs = num_syncs;
3779 vops->flags = 0;
3780 }
3781
xe_vm_bind_ioctl_validate_bo(struct xe_device * xe,struct xe_bo * bo,u64 addr,u64 range,u64 obj_offset,u16 pat_index,u32 op,u32 bind_flags)3782 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
3783 u64 addr, u64 range, u64 obj_offset,
3784 u16 pat_index, u32 op, u32 bind_flags)
3785 {
3786 u16 coh_mode;
3787 bool comp_en;
3788
3789 if (XE_IOCTL_DBG(xe, (bo->flags & XE_BO_FLAG_NO_COMPRESSION) &&
3790 xe_pat_index_get_comp_en(xe, pat_index)))
3791 return -EINVAL;
3792
3793 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) ||
3794 XE_IOCTL_DBG(xe, obj_offset >
3795 xe_bo_size(bo) - range)) {
3796 return -EINVAL;
3797 }
3798
3799 /*
3800 * Some platforms require 64k VM_BIND alignment,
3801 * specifically those with XE_VRAM_FLAGS_NEED64K.
3802 *
3803 * Other platforms may have BO's set to 64k physical placement,
3804 * but can be mapped at 4k offsets anyway. This check is only
3805 * there for the former case.
3806 */
3807 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
3808 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
3809 if (XE_IOCTL_DBG(xe, obj_offset &
3810 XE_64K_PAGE_MASK) ||
3811 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
3812 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3813 return -EINVAL;
3814 }
3815 }
3816
3817 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3818 if (bo->cpu_caching) {
3819 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3820 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
3821 return -EINVAL;
3822 }
3823 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
3824 /*
3825 * Imported dma-buf from a different device should
3826 * require 1way or 2way coherency since we don't know
3827 * how it was mapped on the CPU. Just assume is it
3828 * potentially cached on CPU side.
3829 */
3830 return -EINVAL;
3831 }
3832
3833 /*
3834 * Ensures that imported buffer objects (dma-bufs) are not mapped
3835 * with a PAT index that enables compression.
3836 */
3837 comp_en = xe_pat_index_get_comp_en(xe, pat_index);
3838 if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && comp_en))
3839 return -EINVAL;
3840
3841 if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && xe_device_is_l2_flush_optimized(xe) &&
3842 (pat_index != 19 && coh_mode != XE_COH_2WAY)))
3843 return -EINVAL;
3844
3845 /* If a BO is protected it can only be mapped if the key is still valid */
3846 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) &&
3847 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL)
3848 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0))
3849 return -ENOEXEC;
3850
3851 return 0;
3852 }
3853
xe_vm_bind_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3854 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3855 {
3856 struct xe_device *xe = to_xe_device(dev);
3857 struct xe_file *xef = to_xe_file(file);
3858 struct drm_xe_vm_bind *args = data;
3859 struct drm_xe_sync __user *syncs_user;
3860 struct xe_bo **bos = NULL;
3861 struct drm_gpuva_ops **ops = NULL;
3862 struct xe_vm *vm;
3863 struct xe_exec_queue *q = NULL;
3864 u32 num_syncs, num_ufence = 0;
3865 struct xe_sync_entry *syncs = NULL;
3866 struct drm_xe_vm_bind_op *bind_ops = NULL;
3867 struct xe_vma_ops vops;
3868 struct dma_fence *fence;
3869 int err;
3870 int i;
3871
3872 vm = xe_vm_lookup(xef, args->vm_id);
3873 if (XE_IOCTL_DBG(xe, !vm))
3874 return -EINVAL;
3875
3876 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops);
3877 if (err)
3878 goto put_vm;
3879
3880 if (args->exec_queue_id) {
3881 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
3882 if (XE_IOCTL_DBG(xe, !q)) {
3883 err = -ENOENT;
3884 goto free_bind_ops;
3885 }
3886
3887 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3888 err = -EINVAL;
3889 goto put_exec_queue;
3890 }
3891 }
3892
3893 if (XE_IOCTL_DBG(xe, q && vm != q->user_vm)) {
3894 err = -EINVAL;
3895 goto put_exec_queue;
3896 }
3897
3898 /* Ensure all UNMAPs visible */
3899 xe_svm_flush(vm);
3900
3901 err = down_write_killable(&vm->lock);
3902 if (err)
3903 goto put_exec_queue;
3904
3905 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3906 err = -ENOENT;
3907 goto release_vm_lock;
3908 }
3909
3910 for (i = 0; i < args->num_binds; ++i) {
3911 u64 range = bind_ops[i].range;
3912 u64 addr = bind_ops[i].addr;
3913
3914 if (XE_IOCTL_DBG(xe, range > vm->size) ||
3915 XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3916 err = -EINVAL;
3917 goto release_vm_lock;
3918 }
3919 }
3920
3921 if (args->num_binds) {
3922 bos = kvzalloc_objs(*bos, args->num_binds,
3923 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3924 if (!bos) {
3925 err = -ENOMEM;
3926 goto release_vm_lock;
3927 }
3928
3929 ops = kvzalloc_objs(*ops, args->num_binds,
3930 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3931 if (!ops) {
3932 err = -ENOMEM;
3933 goto free_bos;
3934 }
3935 }
3936
3937 for (i = 0; i < args->num_binds; ++i) {
3938 struct drm_gem_object *gem_obj;
3939 u64 range = bind_ops[i].range;
3940 u64 addr = bind_ops[i].addr;
3941 u32 obj = bind_ops[i].obj;
3942 u64 obj_offset = bind_ops[i].obj_offset;
3943 u16 pat_index = bind_ops[i].pat_index;
3944 u32 op = bind_ops[i].op;
3945 u32 bind_flags = bind_ops[i].flags;
3946
3947 if (!obj)
3948 continue;
3949
3950 gem_obj = drm_gem_object_lookup(file, obj);
3951 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3952 err = -ENOENT;
3953 goto put_obj;
3954 }
3955 bos[i] = gem_to_xe_bo(gem_obj);
3956
3957 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
3958 obj_offset, pat_index, op,
3959 bind_flags);
3960 if (err)
3961 goto put_obj;
3962 }
3963
3964 if (args->num_syncs) {
3965 syncs = kzalloc_objs(*syncs, args->num_syncs);
3966 if (!syncs) {
3967 err = -ENOMEM;
3968 goto put_obj;
3969 }
3970 }
3971
3972 syncs_user = u64_to_user_ptr(args->syncs);
3973 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3974 struct xe_exec_queue *__q = q ?: vm->q[0];
3975
3976 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3977 &syncs_user[num_syncs],
3978 __q->ufence_syncobj,
3979 ++__q->ufence_timeline_value,
3980 (xe_vm_in_lr_mode(vm) ?
3981 SYNC_PARSE_FLAG_LR_MODE : 0) |
3982 (!args->num_binds ?
3983 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3984 if (err)
3985 goto free_syncs;
3986
3987 if (xe_sync_is_ufence(&syncs[num_syncs]))
3988 num_ufence++;
3989 }
3990
3991 if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3992 err = -EINVAL;
3993 goto free_syncs;
3994 }
3995
3996 if (!args->num_binds) {
3997 err = -ENODATA;
3998 goto free_syncs;
3999 }
4000
4001 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
4002 if (args->num_binds > 1)
4003 vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS;
4004 for (i = 0; i < args->num_binds; ++i) {
4005 u64 range = bind_ops[i].range;
4006 u64 addr = bind_ops[i].addr;
4007 u32 op = bind_ops[i].op;
4008 u32 flags = bind_ops[i].flags;
4009 u64 obj_offset = bind_ops[i].obj_offset;
4010 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
4011 u16 pat_index = bind_ops[i].pat_index;
4012
4013 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset,
4014 addr, range, op, flags,
4015 prefetch_region, pat_index);
4016 if (IS_ERR(ops[i])) {
4017 err = PTR_ERR(ops[i]);
4018 ops[i] = NULL;
4019 goto unwind_ops;
4020 }
4021
4022 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
4023 if (err)
4024 goto unwind_ops;
4025
4026 #ifdef TEST_VM_OPS_ERROR
4027 if (flags & FORCE_OP_ERROR) {
4028 vops.inject_error = true;
4029 vm->xe->vm_inject_error_position =
4030 (vm->xe->vm_inject_error_position + 1) %
4031 FORCE_OP_ERROR_COUNT;
4032 }
4033 #endif
4034 }
4035
4036 /* Nothing to do */
4037 if (list_empty(&vops.list)) {
4038 err = -ENODATA;
4039 goto unwind_ops;
4040 }
4041
4042 err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
4043 if (err)
4044 goto unwind_ops;
4045
4046 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops);
4047 if (err)
4048 goto unwind_ops;
4049
4050 fence = vm_bind_ioctl_ops_execute(vm, &vops);
4051 if (IS_ERR(fence))
4052 err = PTR_ERR(fence);
4053 else
4054 dma_fence_put(fence);
4055
4056 unwind_ops:
4057 if (err && err != -ENODATA)
4058 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
4059 xe_vma_ops_fini(&vops);
4060 for (i = args->num_binds - 1; i >= 0; --i)
4061 if (ops[i])
4062 drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
4063 free_syncs:
4064 if (err == -ENODATA)
4065 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
4066 while (num_syncs--)
4067 xe_sync_entry_cleanup(&syncs[num_syncs]);
4068
4069 kfree(syncs);
4070 put_obj:
4071 for (i = 0; i < args->num_binds; ++i)
4072 xe_bo_put(bos[i]);
4073
4074 kvfree(ops);
4075 free_bos:
4076 kvfree(bos);
4077 release_vm_lock:
4078 up_write(&vm->lock);
4079 put_exec_queue:
4080 if (q)
4081 xe_exec_queue_put(q);
4082 free_bind_ops:
4083 if (args->num_binds > 1)
4084 kvfree(bind_ops);
4085 put_vm:
4086 xe_vm_put(vm);
4087 return err;
4088 }
4089
4090 /*
4091 * Map access type, fault type, and fault level from current bspec
4092 * specification to user spec abstraction. The current mapping is
4093 * approximately 1-to-1, with access type being the only notable
4094 * exception as it carries additional data with respect to prefetch
4095 * status that needs to be masked out.
4096 */
xe_to_user_access_type(u8 access_type)4097 static u8 xe_to_user_access_type(u8 access_type)
4098 {
4099 return access_type & XE_PAGEFAULT_ACCESS_TYPE_MASK;
4100 }
4101
xe_to_user_fault_type(u8 fault_type)4102 static u8 xe_to_user_fault_type(u8 fault_type)
4103 {
4104 return fault_type;
4105 }
4106
xe_to_user_fault_level(u8 fault_level)4107 static u8 xe_to_user_fault_level(u8 fault_level)
4108 {
4109 return fault_level;
4110 }
4111
fill_faults(struct xe_vm * vm,struct drm_xe_vm_get_property * args)4112 static int fill_faults(struct xe_vm *vm,
4113 struct drm_xe_vm_get_property *args)
4114 {
4115 struct xe_vm_fault __user *usr_ptr = u64_to_user_ptr(args->data);
4116 struct xe_vm_fault *fault_list, fault_entry = { 0 };
4117 struct xe_vm_fault_entry *entry;
4118 int ret = 0, i = 0, count, entry_size;
4119
4120 entry_size = sizeof(struct xe_vm_fault);
4121 count = args->size / entry_size;
4122
4123 fault_list = kcalloc(count, sizeof(struct xe_vm_fault), GFP_KERNEL);
4124 if (!fault_list)
4125 return -ENOMEM;
4126
4127 spin_lock(&vm->faults.lock);
4128 list_for_each_entry(entry, &vm->faults.list, list) {
4129 if (i == count)
4130 break;
4131
4132 fault_entry.address = xe_device_canonicalize_addr(vm->xe, entry->address);
4133 fault_entry.address_precision = entry->address_precision;
4134
4135 fault_entry.access_type = xe_to_user_access_type(entry->access_type);
4136 fault_entry.fault_type = xe_to_user_fault_type(entry->fault_type);
4137 fault_entry.fault_level = xe_to_user_fault_level(entry->fault_level);
4138
4139 memcpy(&fault_list[i], &fault_entry, entry_size);
4140
4141 i++;
4142 }
4143 spin_unlock(&vm->faults.lock);
4144
4145 ret = copy_to_user(usr_ptr, fault_list, args->size);
4146
4147 kfree(fault_list);
4148 return ret ? -EFAULT : 0;
4149 }
4150
xe_vm_get_property_helper(struct xe_vm * vm,struct drm_xe_vm_get_property * args)4151 static int xe_vm_get_property_helper(struct xe_vm *vm,
4152 struct drm_xe_vm_get_property *args)
4153 {
4154 size_t size;
4155
4156 switch (args->property) {
4157 case DRM_XE_VM_GET_PROPERTY_FAULTS:
4158 spin_lock(&vm->faults.lock);
4159 size = size_mul(sizeof(struct xe_vm_fault), vm->faults.len);
4160 spin_unlock(&vm->faults.lock);
4161
4162 if (!args->size) {
4163 args->size = size;
4164 return 0;
4165 }
4166
4167 /*
4168 * Number of faults may increase between calls to
4169 * xe_vm_get_property_ioctl, so just report the number of
4170 * faults the user requests if it's less than or equal to
4171 * the number of faults in the VM fault array.
4172 *
4173 * We should also at least assert that the args->size value
4174 * is a multiple of the xe_vm_fault struct size.
4175 */
4176 if (args->size > size || args->size % sizeof(struct xe_vm_fault))
4177 return -EINVAL;
4178
4179 return fill_faults(vm, args);
4180 }
4181 return -EINVAL;
4182 }
4183
xe_vm_get_property_ioctl(struct drm_device * drm,void * data,struct drm_file * file)4184 int xe_vm_get_property_ioctl(struct drm_device *drm, void *data,
4185 struct drm_file *file)
4186 {
4187 struct xe_device *xe = to_xe_device(drm);
4188 struct xe_file *xef = to_xe_file(file);
4189 struct drm_xe_vm_get_property *args = data;
4190 struct xe_vm *vm;
4191 int ret = 0;
4192
4193 if (XE_IOCTL_DBG(xe, (args->reserved[0] || args->reserved[1] ||
4194 args->reserved[2] || args->extensions ||
4195 args->pad)))
4196 return -EINVAL;
4197
4198 vm = xe_vm_lookup(xef, args->vm_id);
4199 if (XE_IOCTL_DBG(xe, !vm))
4200 return -ENOENT;
4201
4202 ret = xe_vm_get_property_helper(vm, args);
4203
4204 xe_vm_put(vm);
4205 return ret;
4206 }
4207
4208 /**
4209 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM
4210 * @vm: VM to bind the BO to
4211 * @bo: BO to bind
4212 * @q: exec queue to use for the bind (optional)
4213 * @addr: address at which to bind the BO
4214 * @cache_lvl: PAT cache level to use
4215 *
4216 * Execute a VM bind map operation on a kernel-owned BO to bind it into a
4217 * kernel-owned VM.
4218 *
4219 * Returns a dma_fence to track the binding completion if the job to do so was
4220 * successfully submitted, an error pointer otherwise.
4221 */
xe_vm_bind_kernel_bo(struct xe_vm * vm,struct xe_bo * bo,struct xe_exec_queue * q,u64 addr,enum xe_cache_level cache_lvl)4222 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
4223 struct xe_exec_queue *q, u64 addr,
4224 enum xe_cache_level cache_lvl)
4225 {
4226 struct xe_vma_ops vops;
4227 struct drm_gpuva_ops *ops = NULL;
4228 struct dma_fence *fence;
4229 int err;
4230
4231 xe_bo_get(bo);
4232 xe_vm_get(vm);
4233 if (q)
4234 xe_exec_queue_get(q);
4235
4236 down_write(&vm->lock);
4237
4238 xe_vma_ops_init(&vops, vm, q, NULL, 0);
4239
4240 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo),
4241 DRM_XE_VM_BIND_OP_MAP, 0, 0,
4242 vm->xe->pat.idx[cache_lvl]);
4243 if (IS_ERR(ops)) {
4244 err = PTR_ERR(ops);
4245 goto release_vm_lock;
4246 }
4247
4248 err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
4249 if (err)
4250 goto release_vm_lock;
4251
4252 xe_assert(vm->xe, !list_empty(&vops.list));
4253
4254 err = xe_vma_ops_alloc(&vops, false);
4255 if (err)
4256 goto unwind_ops;
4257
4258 fence = vm_bind_ioctl_ops_execute(vm, &vops);
4259 if (IS_ERR(fence))
4260 err = PTR_ERR(fence);
4261
4262 unwind_ops:
4263 if (err && err != -ENODATA)
4264 vm_bind_ioctl_ops_unwind(vm, &ops, 1);
4265
4266 xe_vma_ops_fini(&vops);
4267 drm_gpuva_ops_free(&vm->gpuvm, ops);
4268
4269 release_vm_lock:
4270 up_write(&vm->lock);
4271
4272 if (q)
4273 xe_exec_queue_put(q);
4274 xe_vm_put(vm);
4275 xe_bo_put(bo);
4276
4277 if (err)
4278 fence = ERR_PTR(err);
4279
4280 return fence;
4281 }
4282
4283 /**
4284 * xe_vm_lock() - Lock the vm's dma_resv object
4285 * @vm: The struct xe_vm whose lock is to be locked
4286 * @intr: Whether to perform any wait interruptible
4287 *
4288 * Return: 0 on success, -EINTR if @intr is true and the wait for a
4289 * contended lock was interrupted. If @intr is false, the function
4290 * always returns 0.
4291 */
xe_vm_lock(struct xe_vm * vm,bool intr)4292 int xe_vm_lock(struct xe_vm *vm, bool intr)
4293 {
4294 int ret;
4295
4296 if (intr)
4297 ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
4298 else
4299 ret = dma_resv_lock(xe_vm_resv(vm), NULL);
4300
4301 return ret;
4302 }
4303
4304 /**
4305 * xe_vm_unlock() - Unlock the vm's dma_resv object
4306 * @vm: The struct xe_vm whose lock is to be released.
4307 *
4308 * Unlock a buffer object lock that was locked by xe_vm_lock().
4309 */
xe_vm_unlock(struct xe_vm * vm)4310 void xe_vm_unlock(struct xe_vm *vm)
4311 {
4312 dma_resv_unlock(xe_vm_resv(vm));
4313 }
4314
4315 /**
4316 * xe_vm_invalidate_vma_submit - Submit a job to invalidate GPU mappings for
4317 * VMA.
4318 * @vma: VMA to invalidate
4319 * @batch: TLB invalidation batch to populate; caller must later call
4320 * xe_tlb_inval_batch_wait() on it to wait for completion
4321 *
4322 * Walks a list of page tables leaves which it memset the entries owned by this
4323 * VMA to zero, invalidates the TLBs, but doesn't block waiting for TLB flush
4324 * to complete, but instead populates @batch which can be waited on using
4325 * xe_tlb_inval_batch_wait().
4326 *
4327 * Returns 0 for success, negative error code otherwise.
4328 */
xe_vm_invalidate_vma_submit(struct xe_vma * vma,struct xe_tlb_inval_batch * batch)4329 int xe_vm_invalidate_vma_submit(struct xe_vma *vma, struct xe_tlb_inval_batch *batch)
4330 {
4331 struct xe_device *xe = xe_vma_vm(vma)->xe;
4332 struct xe_vm *vm = xe_vma_vm(vma);
4333 struct xe_tile *tile;
4334 u8 tile_mask = 0;
4335 int ret = 0;
4336 u8 id;
4337
4338 xe_assert(xe, !xe_vma_is_null(vma));
4339 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma));
4340 trace_xe_vma_invalidate(vma);
4341
4342 vm_dbg(&vm->xe->drm,
4343 "INVALIDATE: addr=0x%016llx, range=0x%016llx",
4344 xe_vma_start(vma), xe_vma_size(vma));
4345
4346 /*
4347 * Check that we don't race with page-table updates, tile_invalidated
4348 * update is safe
4349 */
4350 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
4351 if (xe_vma_is_userptr(vma)) {
4352 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) ||
4353 (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) &&
4354 lockdep_is_held(&xe_vm_resv(vm)->lock.base)));
4355
4356 WARN_ON_ONCE(!mmu_interval_check_retry
4357 (&to_userptr_vma(vma)->userptr.notifier,
4358 to_userptr_vma(vma)->userptr.pages.notifier_seq));
4359 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm),
4360 DMA_RESV_USAGE_BOOKKEEP));
4361
4362 } else {
4363 xe_bo_assert_held(xe_vma_bo(vma));
4364 }
4365 }
4366
4367 for_each_tile(tile, xe, id)
4368 if (xe_pt_zap_ptes(tile, vma))
4369 tile_mask |= BIT(id);
4370
4371 xe_device_wmb(xe);
4372
4373 ret = xe_tlb_inval_range_tilemask_submit(xe, xe_vma_vm(vma)->usm.asid,
4374 xe_vma_start(vma), xe_vma_end(vma),
4375 tile_mask, batch);
4376
4377 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
4378 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask);
4379 return ret;
4380 }
4381
4382 /**
4383 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
4384 * @vma: VMA to invalidate
4385 *
4386 * Walks a list of page tables leaves which it memset the entries owned by this
4387 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
4388 * complete.
4389 *
4390 * Returns 0 for success, negative error code otherwise.
4391 */
xe_vm_invalidate_vma(struct xe_vma * vma)4392 int xe_vm_invalidate_vma(struct xe_vma *vma)
4393 {
4394 struct xe_tlb_inval_batch batch;
4395 int ret;
4396
4397 ret = xe_vm_invalidate_vma_submit(vma, &batch);
4398 if (ret)
4399 return ret;
4400
4401 xe_tlb_inval_batch_wait(&batch);
4402 return ret;
4403 }
4404
xe_vm_validate_protected(struct xe_vm * vm)4405 int xe_vm_validate_protected(struct xe_vm *vm)
4406 {
4407 struct drm_gpuva *gpuva;
4408 int err = 0;
4409
4410 if (!vm)
4411 return -ENODEV;
4412
4413 mutex_lock(&vm->snap_mutex);
4414
4415 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4416 struct xe_vma *vma = gpuva_to_vma(gpuva);
4417 struct xe_bo *bo = vma->gpuva.gem.obj ?
4418 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4419
4420 if (!bo)
4421 continue;
4422
4423 if (xe_bo_is_protected(bo)) {
4424 err = xe_pxp_bo_key_check(vm->xe->pxp, bo);
4425 if (err)
4426 break;
4427 }
4428 }
4429
4430 mutex_unlock(&vm->snap_mutex);
4431 return err;
4432 }
4433
4434 struct xe_vm_snapshot {
4435 int uapi_flags;
4436 unsigned long num_snaps;
4437 struct {
4438 u64 ofs, bo_ofs;
4439 unsigned long len;
4440 #define XE_VM_SNAP_FLAG_USERPTR BIT(0)
4441 #define XE_VM_SNAP_FLAG_READ_ONLY BIT(1)
4442 #define XE_VM_SNAP_FLAG_IS_NULL BIT(2)
4443 unsigned long flags;
4444 int uapi_mem_region;
4445 int pat_index;
4446 int cpu_caching;
4447 struct xe_bo *bo;
4448 void *data;
4449 struct mm_struct *mm;
4450 } snap[];
4451 };
4452
xe_vm_snapshot_capture(struct xe_vm * vm)4453 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
4454 {
4455 unsigned long num_snaps = 0, i;
4456 struct xe_vm_snapshot *snap = NULL;
4457 struct drm_gpuva *gpuva;
4458
4459 if (!vm)
4460 return NULL;
4461
4462 mutex_lock(&vm->snap_mutex);
4463 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4464 if (gpuva->flags & XE_VMA_DUMPABLE)
4465 num_snaps++;
4466 }
4467
4468 if (num_snaps)
4469 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
4470 if (!snap) {
4471 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
4472 goto out_unlock;
4473 }
4474
4475 if (vm->flags & XE_VM_FLAG_FAULT_MODE)
4476 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_FAULT_MODE;
4477 if (vm->flags & XE_VM_FLAG_LR_MODE)
4478 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_LR_MODE;
4479 if (vm->flags & XE_VM_FLAG_SCRATCH_PAGE)
4480 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
4481
4482 snap->num_snaps = num_snaps;
4483 i = 0;
4484 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4485 struct xe_vma *vma = gpuva_to_vma(gpuva);
4486 struct xe_bo *bo = vma->gpuva.gem.obj ?
4487 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4488
4489 if (!(gpuva->flags & XE_VMA_DUMPABLE))
4490 continue;
4491
4492 snap->snap[i].ofs = xe_vma_start(vma);
4493 snap->snap[i].len = xe_vma_size(vma);
4494 snap->snap[i].flags = xe_vma_read_only(vma) ?
4495 XE_VM_SNAP_FLAG_READ_ONLY : 0;
4496 snap->snap[i].pat_index = vma->attr.pat_index;
4497 if (bo) {
4498 snap->snap[i].cpu_caching = bo->cpu_caching;
4499 snap->snap[i].bo = xe_bo_get(bo);
4500 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
4501 switch (bo->ttm.resource->mem_type) {
4502 case XE_PL_SYSTEM:
4503 case XE_PL_TT:
4504 snap->snap[i].uapi_mem_region = 0;
4505 break;
4506 case XE_PL_VRAM0:
4507 snap->snap[i].uapi_mem_region = 1;
4508 break;
4509 case XE_PL_VRAM1:
4510 snap->snap[i].uapi_mem_region = 2;
4511 break;
4512 }
4513 } else if (xe_vma_is_userptr(vma)) {
4514 struct mm_struct *mm =
4515 to_userptr_vma(vma)->userptr.notifier.mm;
4516
4517 if (mmget_not_zero(mm))
4518 snap->snap[i].mm = mm;
4519 else
4520 snap->snap[i].data = ERR_PTR(-EFAULT);
4521
4522 snap->snap[i].bo_ofs = xe_vma_userptr(vma);
4523 snap->snap[i].flags |= XE_VM_SNAP_FLAG_USERPTR;
4524 snap->snap[i].uapi_mem_region = 0;
4525 } else if (xe_vma_is_null(vma)) {
4526 snap->snap[i].flags |= XE_VM_SNAP_FLAG_IS_NULL;
4527 snap->snap[i].uapi_mem_region = -1;
4528 } else {
4529 snap->snap[i].data = ERR_PTR(-ENOENT);
4530 snap->snap[i].uapi_mem_region = -1;
4531 }
4532 i++;
4533 }
4534
4535 out_unlock:
4536 mutex_unlock(&vm->snap_mutex);
4537 return snap;
4538 }
4539
xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot * snap)4540 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
4541 {
4542 if (IS_ERR_OR_NULL(snap))
4543 return;
4544
4545 for (int i = 0; i < snap->num_snaps; i++) {
4546 struct xe_bo *bo = snap->snap[i].bo;
4547 int err;
4548
4549 if (IS_ERR(snap->snap[i].data) ||
4550 snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL)
4551 continue;
4552
4553 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
4554 if (!snap->snap[i].data) {
4555 snap->snap[i].data = ERR_PTR(-ENOMEM);
4556 goto cleanup_bo;
4557 }
4558
4559 if (bo) {
4560 err = xe_bo_read(bo, snap->snap[i].bo_ofs,
4561 snap->snap[i].data, snap->snap[i].len);
4562 } else {
4563 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
4564
4565 kthread_use_mm(snap->snap[i].mm);
4566 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
4567 err = 0;
4568 else
4569 err = -EFAULT;
4570 kthread_unuse_mm(snap->snap[i].mm);
4571
4572 mmput(snap->snap[i].mm);
4573 snap->snap[i].mm = NULL;
4574 }
4575
4576 if (err) {
4577 kvfree(snap->snap[i].data);
4578 snap->snap[i].data = ERR_PTR(err);
4579 }
4580
4581 cleanup_bo:
4582 xe_bo_put(bo);
4583 snap->snap[i].bo = NULL;
4584 }
4585 }
4586
xe_vm_snapshot_print(struct xe_vm_snapshot * snap,struct drm_printer * p)4587 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
4588 {
4589 unsigned long i, j;
4590
4591 if (IS_ERR_OR_NULL(snap)) {
4592 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
4593 return;
4594 }
4595
4596 drm_printf(p, "VM.uapi_flags: 0x%x\n", snap->uapi_flags);
4597 for (i = 0; i < snap->num_snaps; i++) {
4598 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
4599
4600 drm_printf(p, "[%llx].properties: %s|%s|mem_region=0x%lx|pat_index=%d|cpu_caching=%d\n",
4601 snap->snap[i].ofs,
4602 snap->snap[i].flags & XE_VM_SNAP_FLAG_READ_ONLY ?
4603 "read_only" : "read_write",
4604 snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL ?
4605 "null_sparse" :
4606 snap->snap[i].flags & XE_VM_SNAP_FLAG_USERPTR ?
4607 "userptr" : "bo",
4608 snap->snap[i].uapi_mem_region == -1 ? 0 :
4609 BIT(snap->snap[i].uapi_mem_region),
4610 snap->snap[i].pat_index,
4611 snap->snap[i].cpu_caching);
4612
4613 if (IS_ERR(snap->snap[i].data)) {
4614 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
4615 PTR_ERR(snap->snap[i].data));
4616 continue;
4617 }
4618
4619 if (snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL)
4620 continue;
4621
4622 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
4623
4624 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
4625 u32 *val = snap->snap[i].data + j;
4626 char dumped[ASCII85_BUFSZ];
4627
4628 drm_puts(p, ascii85_encode(*val, dumped));
4629 }
4630
4631 drm_puts(p, "\n");
4632
4633 if (drm_coredump_printer_is_full(p))
4634 return;
4635 }
4636 }
4637
xe_vm_snapshot_free(struct xe_vm_snapshot * snap)4638 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
4639 {
4640 unsigned long i;
4641
4642 if (IS_ERR_OR_NULL(snap))
4643 return;
4644
4645 for (i = 0; i < snap->num_snaps; i++) {
4646 if (!IS_ERR(snap->snap[i].data))
4647 kvfree(snap->snap[i].data);
4648 xe_bo_put(snap->snap[i].bo);
4649 if (snap->snap[i].mm)
4650 mmput(snap->snap[i].mm);
4651 }
4652 kvfree(snap);
4653 }
4654
4655 /**
4656 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations
4657 * @xe: Pointer to the Xe device structure
4658 * @vma: Pointer to the virtual memory area (VMA) structure
4659 * @is_atomic: In pagefault path and atomic operation
4660 *
4661 * This function determines whether the given VMA needs to be migrated to
4662 * VRAM in order to do atomic GPU operation.
4663 *
4664 * Return:
4665 * 1 - Migration to VRAM is required
4666 * 0 - Migration is not required
4667 * -EACCES - Invalid access for atomic memory attr
4668 *
4669 */
xe_vma_need_vram_for_atomic(struct xe_device * xe,struct xe_vma * vma,bool is_atomic)4670 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic)
4671 {
4672 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access :
4673 vma->attr.atomic_access;
4674
4675 if (!IS_DGFX(xe) || !is_atomic)
4676 return false;
4677
4678 /*
4679 * NOTE: The checks implemented here are platform-specific. For
4680 * instance, on a device supporting CXL atomics, these would ideally
4681 * work universally without additional handling.
4682 */
4683 switch (atomic_access) {
4684 case DRM_XE_ATOMIC_DEVICE:
4685 return !xe->info.has_device_atomics_on_smem;
4686
4687 case DRM_XE_ATOMIC_CPU:
4688 return -EACCES;
4689
4690 case DRM_XE_ATOMIC_UNDEFINED:
4691 case DRM_XE_ATOMIC_GLOBAL:
4692 default:
4693 return 1;
4694 }
4695 }
4696
xe_vm_alloc_vma(struct xe_vm * vm,struct drm_gpuvm_map_req * map_req,bool is_madvise)4697 static int xe_vm_alloc_vma(struct xe_vm *vm,
4698 struct drm_gpuvm_map_req *map_req,
4699 bool is_madvise)
4700 {
4701 struct xe_vma_ops vops;
4702 struct drm_gpuva_ops *ops = NULL;
4703 struct drm_gpuva_op *__op;
4704 unsigned int vma_flags = 0;
4705 bool remap_op = false;
4706 struct xe_vma_mem_attr tmp_attr = {};
4707 u16 default_pat;
4708 int err;
4709
4710 lockdep_assert_held_write(&vm->lock);
4711
4712 if (is_madvise)
4713 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req);
4714 else
4715 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req);
4716
4717 if (IS_ERR(ops))
4718 return PTR_ERR(ops);
4719
4720 if (list_empty(&ops->list)) {
4721 err = 0;
4722 goto free_ops;
4723 }
4724
4725 drm_gpuva_for_each_op(__op, ops) {
4726 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
4727 struct xe_vma *vma = NULL;
4728
4729 if (!is_madvise) {
4730 if (__op->op == DRM_GPUVA_OP_UNMAP) {
4731 vma = gpuva_to_vma(op->base.unmap.va);
4732 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma));
4733 default_pat = vma->attr.default_pat_index;
4734 vma_flags = vma->gpuva.flags;
4735 }
4736
4737 if (__op->op == DRM_GPUVA_OP_REMAP) {
4738 vma = gpuva_to_vma(op->base.remap.unmap->va);
4739 default_pat = vma->attr.default_pat_index;
4740 vma_flags = vma->gpuva.flags;
4741 }
4742
4743 if (__op->op == DRM_GPUVA_OP_MAP) {
4744 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK;
4745 op->map.pat_index = default_pat;
4746 }
4747 } else {
4748 if (__op->op == DRM_GPUVA_OP_REMAP) {
4749 vma = gpuva_to_vma(op->base.remap.unmap->va);
4750 xe_assert(vm->xe, !remap_op);
4751 xe_assert(vm->xe, xe_vma_has_no_bo(vma));
4752 remap_op = true;
4753 vma_flags = vma->gpuva.flags;
4754 }
4755
4756 if (__op->op == DRM_GPUVA_OP_MAP) {
4757 xe_assert(vm->xe, remap_op);
4758 remap_op = false;
4759 /*
4760 * In case of madvise ops DRM_GPUVA_OP_MAP is
4761 * always after DRM_GPUVA_OP_REMAP, so ensure
4762 * to propagate the flags from the vma we're
4763 * unmapping.
4764 */
4765 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK;
4766 }
4767 }
4768 print_op(vm->xe, __op);
4769 }
4770
4771 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
4772
4773 if (is_madvise)
4774 vops.flags |= XE_VMA_OPS_FLAG_MADVISE;
4775 else
4776 vops.flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP;
4777
4778 err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
4779 if (err)
4780 goto unwind_ops;
4781
4782 xe_vm_lock(vm, false);
4783
4784 drm_gpuva_for_each_op(__op, ops) {
4785 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
4786 struct xe_vma *vma;
4787
4788 if (__op->op == DRM_GPUVA_OP_UNMAP) {
4789 vma = gpuva_to_vma(op->base.unmap.va);
4790 /* There should be no unmap for madvise */
4791 if (is_madvise)
4792 XE_WARN_ON("UNEXPECTED UNMAP");
4793
4794 xe_vma_destroy(vma, NULL);
4795 } else if (__op->op == DRM_GPUVA_OP_REMAP) {
4796 vma = gpuva_to_vma(op->base.remap.unmap->va);
4797 /* In case of madvise ops Store attributes for REMAP UNMAPPED
4798 * VMA, so they can be assigned to newly MAP created vma.
4799 */
4800 if (is_madvise)
4801 xe_vma_mem_attr_copy(&tmp_attr, &vma->attr);
4802
4803 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL);
4804 } else if (__op->op == DRM_GPUVA_OP_MAP) {
4805 vma = op->map.vma;
4806 /* In case of madvise call, MAP will always be followed by REMAP.
4807 * Therefore temp_attr will always have sane values, making it safe to
4808 * copy them to new vma.
4809 */
4810 if (is_madvise)
4811 xe_vma_mem_attr_copy(&vma->attr, &tmp_attr);
4812 }
4813 }
4814
4815 xe_vm_unlock(vm);
4816 drm_gpuva_ops_free(&vm->gpuvm, ops);
4817 xe_vma_mem_attr_fini(&tmp_attr);
4818 return 0;
4819
4820 unwind_ops:
4821 vm_bind_ioctl_ops_unwind(vm, &ops, 1);
4822 free_ops:
4823 drm_gpuva_ops_free(&vm->gpuvm, ops);
4824 return err;
4825 }
4826
4827 /**
4828 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops
4829 * @vm: Pointer to the xe_vm structure
4830 * @start: Starting input address
4831 * @range: Size of the input range
4832 *
4833 * This function splits existing vma to create new vma for user provided input range
4834 *
4835 * Return: 0 if success
4836 */
xe_vm_alloc_madvise_vma(struct xe_vm * vm,uint64_t start,uint64_t range)4837 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4838 {
4839 struct drm_gpuvm_map_req map_req = {
4840 .map.va.addr = start,
4841 .map.va.range = range,
4842 };
4843
4844 lockdep_assert_held_write(&vm->lock);
4845
4846 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range);
4847
4848 return xe_vm_alloc_vma(vm, &map_req, true);
4849 }
4850
is_cpu_addr_vma_with_default_attr(struct xe_vma * vma)4851 static bool is_cpu_addr_vma_with_default_attr(struct xe_vma *vma)
4852 {
4853 return vma && xe_vma_is_cpu_addr_mirror(vma) &&
4854 xe_vma_has_default_mem_attrs(vma);
4855 }
4856
4857 /**
4858 * xe_vm_find_cpu_addr_mirror_vma_range - Extend a VMA range to include adjacent CPU-mirrored VMAs
4859 * @vm: VM to search within
4860 * @start: Input/output pointer to the starting address of the range
4861 * @end: Input/output pointer to the end address of the range
4862 *
4863 * Given a range defined by @start and @range, this function checks the VMAs
4864 * immediately before and after the range. If those neighboring VMAs are
4865 * CPU-address-mirrored and have default memory attributes, the function
4866 * updates @start and @range to include them. This extended range can then
4867 * be used for merging or other operations that require a unified VMA.
4868 *
4869 * The function does not perform the merge itself; it only computes the
4870 * mergeable boundaries.
4871 */
xe_vm_find_cpu_addr_mirror_vma_range(struct xe_vm * vm,u64 * start,u64 * end)4872 void xe_vm_find_cpu_addr_mirror_vma_range(struct xe_vm *vm, u64 *start, u64 *end)
4873 {
4874 struct xe_vma *prev, *next;
4875
4876 lockdep_assert_held(&vm->lock);
4877
4878 if (*start >= SZ_4K) {
4879 prev = xe_vm_find_vma_by_addr(vm, *start - SZ_4K);
4880 if (is_cpu_addr_vma_with_default_attr(prev))
4881 *start = xe_vma_start(prev);
4882 }
4883
4884 if (*end < vm->size) {
4885 next = xe_vm_find_vma_by_addr(vm, *end + 1);
4886 if (is_cpu_addr_vma_with_default_attr(next))
4887 *end = xe_vma_end(next);
4888 }
4889 }
4890
4891 /**
4892 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma
4893 * @vm: Pointer to the xe_vm structure
4894 * @start: Starting input address
4895 * @range: Size of the input range
4896 *
4897 * This function splits/merges existing vma to create new vma for user provided input range
4898 *
4899 * Return: 0 if success
4900 */
xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm * vm,uint64_t start,uint64_t range)4901 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4902 {
4903 struct drm_gpuvm_map_req map_req = {
4904 .map.va.addr = start,
4905 .map.va.range = range,
4906 };
4907
4908 lockdep_assert_held_write(&vm->lock);
4909
4910 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx",
4911 start, range);
4912
4913 return xe_vm_alloc_vma(vm, &map_req, false);
4914 }
4915
4916 /**
4917 * xe_vm_add_exec_queue() - Add exec queue to VM
4918 * @vm: The VM.
4919 * @q: The exec_queue
4920 *
4921 * Add exec queue to VM, skipped if the device does not have context based TLB
4922 * invalidations.
4923 */
xe_vm_add_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)4924 void xe_vm_add_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
4925 {
4926 struct xe_device *xe = vm->xe;
4927
4928 /* User VMs and queues only */
4929 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
4930 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
4931 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM));
4932 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_MIGRATE));
4933 xe_assert(xe, vm->xef);
4934 xe_assert(xe, vm == q->vm);
4935
4936 if (!xe->info.has_ctx_tlb_inval)
4937 return;
4938
4939 down_write(&vm->exec_queues.lock);
4940 list_add(&q->vm_exec_queue_link, &vm->exec_queues.list[q->gt->info.id]);
4941 ++vm->exec_queues.count[q->gt->info.id];
4942 up_write(&vm->exec_queues.lock);
4943 }
4944
4945 /**
4946 * xe_vm_remove_exec_queue() - Remove exec queue from VM
4947 * @vm: The VM.
4948 * @q: The exec_queue
4949 *
4950 * Remove exec queue from VM, skipped if the device does not have context based
4951 * TLB invalidations.
4952 */
xe_vm_remove_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)4953 void xe_vm_remove_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
4954 {
4955 if (!vm->xe->info.has_ctx_tlb_inval)
4956 return;
4957
4958 down_write(&vm->exec_queues.lock);
4959 if (!list_empty(&q->vm_exec_queue_link)) {
4960 list_del(&q->vm_exec_queue_link);
4961 --vm->exec_queues.count[q->gt->info.id];
4962 }
4963 up_write(&vm->exec_queues.lock);
4964 }
4965