1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10
11 #include <drm/drm_drv.h>
12 #include <drm/drm_exec.h>
13 #include <drm/drm_print.h>
14 #include <drm/ttm/ttm_tt.h>
15 #include <uapi/drm/xe_drm.h>
16 #include <linux/ascii85.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/mm.h>
20 #include <linux/swap.h>
21
22 #include <generated/xe_wa_oob.h>
23
24 #include "regs/xe_gtt_defs.h"
25 #include "xe_assert.h"
26 #include "xe_bo.h"
27 #include "xe_device.h"
28 #include "xe_drm_client.h"
29 #include "xe_exec_queue.h"
30 #include "xe_gt.h"
31 #include "xe_migrate.h"
32 #include "xe_pat.h"
33 #include "xe_pm.h"
34 #include "xe_preempt_fence.h"
35 #include "xe_pt.h"
36 #include "xe_pxp.h"
37 #include "xe_sriov_vf.h"
38 #include "xe_svm.h"
39 #include "xe_sync.h"
40 #include "xe_tile.h"
41 #include "xe_tlb_inval.h"
42 #include "xe_trace_bo.h"
43 #include "xe_vm_madvise.h"
44 #include "xe_wa.h"
45
xe_vm_obj(struct xe_vm * vm)46 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
47 {
48 return vm->gpuvm.r_obj;
49 }
50
51 /**
52 * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction
53 * @vm: The vm whose resv is to be locked.
54 * @exec: The drm_exec transaction.
55 *
56 * Helper to lock the vm's resv as part of a drm_exec transaction.
57 *
58 * Return: %0 on success. See drm_exec_lock_obj() for error codes.
59 */
xe_vm_drm_exec_lock(struct xe_vm * vm,struct drm_exec * exec)60 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec)
61 {
62 return drm_exec_lock_obj(exec, xe_vm_obj(vm));
63 }
64
preempt_fences_waiting(struct xe_vm * vm)65 static bool preempt_fences_waiting(struct xe_vm *vm)
66 {
67 struct xe_exec_queue *q;
68
69 lockdep_assert_held(&vm->lock);
70 xe_vm_assert_held(vm);
71
72 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
73 if (!q->lr.pfence ||
74 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
75 &q->lr.pfence->flags)) {
76 return true;
77 }
78 }
79
80 return false;
81 }
82
free_preempt_fences(struct list_head * list)83 static void free_preempt_fences(struct list_head *list)
84 {
85 struct list_head *link, *next;
86
87 list_for_each_safe(link, next, list)
88 xe_preempt_fence_free(to_preempt_fence_from_link(link));
89 }
90
alloc_preempt_fences(struct xe_vm * vm,struct list_head * list,unsigned int * count)91 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
92 unsigned int *count)
93 {
94 lockdep_assert_held(&vm->lock);
95 xe_vm_assert_held(vm);
96
97 if (*count >= vm->preempt.num_exec_queues)
98 return 0;
99
100 for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
101 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
102
103 if (IS_ERR(pfence))
104 return PTR_ERR(pfence);
105
106 list_move_tail(xe_preempt_fence_link(pfence), list);
107 }
108
109 return 0;
110 }
111
wait_for_existing_preempt_fences(struct xe_vm * vm)112 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
113 {
114 struct xe_exec_queue *q;
115 bool vf_migration = IS_SRIOV_VF(vm->xe) &&
116 xe_sriov_vf_migration_supported(vm->xe);
117 signed long wait_time = vf_migration ? HZ / 5 : MAX_SCHEDULE_TIMEOUT;
118
119 xe_vm_assert_held(vm);
120
121 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
122 if (q->lr.pfence) {
123 long timeout;
124
125 timeout = dma_fence_wait_timeout(q->lr.pfence, false,
126 wait_time);
127 if (!timeout) {
128 xe_assert(vm->xe, vf_migration);
129 return -EAGAIN;
130 }
131
132 /* Only -ETIME on fence indicates VM needs to be killed */
133 if (timeout < 0 || q->lr.pfence->error == -ETIME)
134 return -ETIME;
135
136 dma_fence_put(q->lr.pfence);
137 q->lr.pfence = NULL;
138 }
139 }
140
141 return 0;
142 }
143
xe_vm_is_idle(struct xe_vm * vm)144 static bool xe_vm_is_idle(struct xe_vm *vm)
145 {
146 struct xe_exec_queue *q;
147
148 xe_vm_assert_held(vm);
149 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
150 if (!xe_exec_queue_is_idle(q))
151 return false;
152 }
153
154 return true;
155 }
156
arm_preempt_fences(struct xe_vm * vm,struct list_head * list)157 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
158 {
159 struct list_head *link;
160 struct xe_exec_queue *q;
161
162 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
163 struct dma_fence *fence;
164
165 link = list->next;
166 xe_assert(vm->xe, link != list);
167
168 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
169 q, q->lr.context,
170 ++q->lr.seqno);
171 dma_fence_put(q->lr.pfence);
172 q->lr.pfence = fence;
173 }
174 }
175
add_preempt_fences(struct xe_vm * vm,struct xe_bo * bo)176 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
177 {
178 struct xe_exec_queue *q;
179 int err;
180
181 xe_bo_assert_held(bo);
182
183 if (!vm->preempt.num_exec_queues)
184 return 0;
185
186 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
187 if (err)
188 return err;
189
190 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
191 if (q->lr.pfence) {
192 dma_resv_add_fence(bo->ttm.base.resv,
193 q->lr.pfence,
194 DMA_RESV_USAGE_BOOKKEEP);
195 }
196
197 return 0;
198 }
199
resume_and_reinstall_preempt_fences(struct xe_vm * vm,struct drm_exec * exec)200 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
201 struct drm_exec *exec)
202 {
203 struct xe_exec_queue *q;
204
205 lockdep_assert_held(&vm->lock);
206 xe_vm_assert_held(vm);
207
208 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
209 q->ops->resume(q);
210
211 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
212 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
213 }
214 }
215
xe_vm_add_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)216 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
217 {
218 struct drm_gpuvm_exec vm_exec = {
219 .vm = &vm->gpuvm,
220 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
221 .num_fences = 1,
222 };
223 struct drm_exec *exec = &vm_exec.exec;
224 struct xe_validation_ctx ctx;
225 struct dma_fence *pfence;
226 int err;
227 bool wait;
228
229 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
230
231 down_write(&vm->lock);
232 err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val);
233 if (err)
234 goto out_up_write;
235
236 pfence = xe_preempt_fence_create(q, q->lr.context,
237 ++q->lr.seqno);
238 if (IS_ERR(pfence)) {
239 err = PTR_ERR(pfence);
240 goto out_fini;
241 }
242
243 list_add(&q->lr.link, &vm->preempt.exec_queues);
244 ++vm->preempt.num_exec_queues;
245 q->lr.pfence = pfence;
246
247 xe_svm_notifier_lock(vm);
248
249 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
250 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
251
252 /*
253 * Check to see if a preemption on VM is in flight or userptr
254 * invalidation, if so trigger this preempt fence to sync state with
255 * other preempt fences on the VM.
256 */
257 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
258 if (wait)
259 dma_fence_enable_sw_signaling(pfence);
260
261 xe_svm_notifier_unlock(vm);
262
263 out_fini:
264 xe_validation_ctx_fini(&ctx);
265 out_up_write:
266 up_write(&vm->lock);
267
268 return err;
269 }
270 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO);
271
272 /**
273 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
274 * @vm: The VM.
275 * @q: The exec_queue
276 *
277 * Note that this function might be called multiple times on the same queue.
278 */
xe_vm_remove_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)279 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
280 {
281 if (!xe_vm_in_preempt_fence_mode(vm))
282 return;
283
284 down_write(&vm->lock);
285 if (!list_empty(&q->lr.link)) {
286 list_del_init(&q->lr.link);
287 --vm->preempt.num_exec_queues;
288 }
289 if (q->lr.pfence) {
290 dma_fence_enable_sw_signaling(q->lr.pfence);
291 dma_fence_put(q->lr.pfence);
292 q->lr.pfence = NULL;
293 }
294 up_write(&vm->lock);
295 }
296
297 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
298
299 /**
300 * xe_vm_kill() - VM Kill
301 * @vm: The VM.
302 * @unlocked: Flag indicates the VM's dma-resv is not held
303 *
304 * Kill the VM by setting banned flag indicated VM is no longer available for
305 * use. If in preempt fence mode, also kill all exec queue attached to the VM.
306 */
xe_vm_kill(struct xe_vm * vm,bool unlocked)307 void xe_vm_kill(struct xe_vm *vm, bool unlocked)
308 {
309 struct xe_exec_queue *q;
310
311 lockdep_assert_held(&vm->lock);
312
313 if (unlocked)
314 xe_vm_lock(vm, false);
315
316 vm->flags |= XE_VM_FLAG_BANNED;
317 trace_xe_vm_kill(vm);
318
319 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
320 q->ops->kill(q);
321
322 if (unlocked)
323 xe_vm_unlock(vm);
324
325 /* TODO: Inform user the VM is banned */
326 }
327
xe_gpuvm_validate(struct drm_gpuvm_bo * vm_bo,struct drm_exec * exec)328 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
329 {
330 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
331 struct xe_bo *bo = gem_to_xe_bo(vm_bo->obj);
332 struct drm_gpuva *gpuva;
333 int ret;
334
335 lockdep_assert_held(&vm->lock);
336 drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
337 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
338 &vm->rebind_list);
339
340 /* Skip re-populating purged BOs, rebind maps scratch pages. */
341 if (xe_bo_is_purged(bo)) {
342 vm_bo->evicted = false;
343 return 0;
344 }
345
346 if (!try_wait_for_completion(&vm->xe->pm_block))
347 return -EAGAIN;
348
349 ret = xe_bo_validate(bo, vm, false, exec);
350 if (ret)
351 return ret;
352
353 vm_bo->evicted = false;
354 return 0;
355 }
356
357 /**
358 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
359 * @vm: The vm for which we are rebinding.
360 * @exec: The struct drm_exec with the locked GEM objects.
361 * @num_fences: The number of fences to reserve for the operation, not
362 * including rebinds and validations.
363 *
364 * Validates all evicted gem objects and rebinds their vmas. Note that
365 * rebindings may cause evictions and hence the validation-rebind
366 * sequence is rerun until there are no more objects to validate.
367 *
368 * Return: 0 on success, negative error code on error. In particular,
369 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
370 * the drm_exec transaction needs to be restarted.
371 */
xe_vm_validate_rebind(struct xe_vm * vm,struct drm_exec * exec,unsigned int num_fences)372 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
373 unsigned int num_fences)
374 {
375 struct drm_gem_object *obj;
376 unsigned long index;
377 int ret;
378
379 do {
380 ret = drm_gpuvm_validate(&vm->gpuvm, exec);
381 if (ret)
382 return ret;
383
384 ret = xe_vm_rebind(vm, false);
385 if (ret)
386 return ret;
387 } while (!list_empty(&vm->gpuvm.evict.list));
388
389 drm_exec_for_each_locked_object(exec, index, obj) {
390 ret = dma_resv_reserve_fences(obj->resv, num_fences);
391 if (ret)
392 return ret;
393 }
394
395 return 0;
396 }
397
xe_preempt_work_begin(struct drm_exec * exec,struct xe_vm * vm,bool * done)398 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
399 bool *done)
400 {
401 int err;
402
403 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
404 if (err)
405 return err;
406
407 if (xe_vm_is_idle(vm)) {
408 vm->preempt.rebind_deactivated = true;
409 *done = true;
410 return 0;
411 }
412
413 if (!preempt_fences_waiting(vm)) {
414 *done = true;
415 return 0;
416 }
417
418 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
419 if (err)
420 return err;
421
422 err = wait_for_existing_preempt_fences(vm);
423 if (err)
424 return err;
425
426 /*
427 * Add validation and rebinding to the locking loop since both can
428 * cause evictions which may require blocing dma_resv locks.
429 * The fence reservation here is intended for the new preempt fences
430 * we attach at the end of the rebind work.
431 */
432 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
433 }
434
vm_suspend_rebind_worker(struct xe_vm * vm)435 static bool vm_suspend_rebind_worker(struct xe_vm *vm)
436 {
437 struct xe_device *xe = vm->xe;
438 bool ret = false;
439
440 mutex_lock(&xe->rebind_resume_lock);
441 if (!try_wait_for_completion(&vm->xe->pm_block)) {
442 ret = true;
443 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list);
444 }
445 mutex_unlock(&xe->rebind_resume_lock);
446
447 return ret;
448 }
449
450 /**
451 * xe_vm_resume_rebind_worker() - Resume the rebind worker.
452 * @vm: The vm whose preempt worker to resume.
453 *
454 * Resume a preempt worker that was previously suspended by
455 * vm_suspend_rebind_worker().
456 */
xe_vm_resume_rebind_worker(struct xe_vm * vm)457 void xe_vm_resume_rebind_worker(struct xe_vm *vm)
458 {
459 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
460 }
461
preempt_rebind_work_func(struct work_struct * w)462 static void preempt_rebind_work_func(struct work_struct *w)
463 {
464 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
465 struct xe_validation_ctx ctx;
466 struct drm_exec exec;
467 unsigned int fence_count = 0;
468 LIST_HEAD(preempt_fences);
469 int err = 0;
470 long wait;
471 int __maybe_unused tries = 0;
472
473 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
474 trace_xe_vm_rebind_worker_enter(vm);
475
476 down_write(&vm->lock);
477
478 if (xe_vm_is_closed_or_banned(vm)) {
479 up_write(&vm->lock);
480 trace_xe_vm_rebind_worker_exit(vm);
481 return;
482 }
483
484 retry:
485 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) {
486 up_write(&vm->lock);
487 /* We don't actually block but don't make progress. */
488 xe_pm_might_block_on_suspend();
489 return;
490 }
491
492 if (xe_vm_userptr_check_repin(vm)) {
493 err = xe_vm_userptr_pin(vm);
494 if (err)
495 goto out_unlock_outer;
496 }
497
498 err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec,
499 (struct xe_val_flags) {.interruptible = true});
500 if (err)
501 goto out_unlock_outer;
502
503 drm_exec_until_all_locked(&exec) {
504 bool done = false;
505
506 err = xe_preempt_work_begin(&exec, vm, &done);
507 drm_exec_retry_on_contention(&exec);
508 xe_validation_retry_on_oom(&ctx, &err);
509 if (err || done) {
510 xe_validation_ctx_fini(&ctx);
511 goto out_unlock_outer;
512 }
513 }
514
515 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
516 if (err)
517 goto out_unlock;
518
519 xe_vm_set_validation_exec(vm, &exec);
520 err = xe_vm_rebind(vm, true);
521 xe_vm_set_validation_exec(vm, NULL);
522 if (err)
523 goto out_unlock;
524
525 /* Wait on rebinds and munmap style VM unbinds */
526 wait = dma_resv_wait_timeout(xe_vm_resv(vm),
527 DMA_RESV_USAGE_KERNEL,
528 false, MAX_SCHEDULE_TIMEOUT);
529 if (wait <= 0) {
530 err = -ETIME;
531 goto out_unlock;
532 }
533
534 #define retry_required(__tries, __vm) \
535 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
536 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
537 __xe_vm_userptr_needs_repin(__vm))
538
539 xe_svm_notifier_lock(vm);
540 if (retry_required(tries, vm)) {
541 xe_svm_notifier_unlock(vm);
542 err = -EAGAIN;
543 goto out_unlock;
544 }
545
546 #undef retry_required
547
548 spin_lock(&vm->xe->ttm.lru_lock);
549 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
550 spin_unlock(&vm->xe->ttm.lru_lock);
551
552 /* Point of no return. */
553 arm_preempt_fences(vm, &preempt_fences);
554 resume_and_reinstall_preempt_fences(vm, &exec);
555 xe_svm_notifier_unlock(vm);
556
557 out_unlock:
558 xe_validation_ctx_fini(&ctx);
559 out_unlock_outer:
560 if (err == -EAGAIN) {
561 trace_xe_vm_rebind_worker_retry(vm);
562
563 /*
564 * We can't block in workers on a VF which supports migration
565 * given this can block the VF post-migration workers from
566 * getting scheduled.
567 */
568 if (IS_SRIOV_VF(vm->xe) &&
569 xe_sriov_vf_migration_supported(vm->xe)) {
570 up_write(&vm->lock);
571 xe_vm_queue_rebind_worker(vm);
572 return;
573 }
574
575 goto retry;
576 }
577
578 if (err) {
579 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
580 xe_vm_kill(vm, true);
581 }
582 up_write(&vm->lock);
583
584 free_preempt_fences(&preempt_fences);
585
586 trace_xe_vm_rebind_worker_exit(vm);
587 }
588
589 /**
590 * xe_vm_add_fault_entry_pf() - Add pagefault to vm fault list
591 * @vm: The VM.
592 * @pf: The pagefault.
593 *
594 * This function takes the data from the pagefault @pf and saves it to @vm->faults.list.
595 *
596 * The function exits silently if the list is full, and reports a warning if the pagefault
597 * could not be saved to the list.
598 */
xe_vm_add_fault_entry_pf(struct xe_vm * vm,struct xe_pagefault * pf)599 void xe_vm_add_fault_entry_pf(struct xe_vm *vm, struct xe_pagefault *pf)
600 {
601 struct xe_vm_fault_entry *e;
602 struct xe_hw_engine *hwe;
603
604 /* Do not report faults on reserved engines */
605 hwe = xe_gt_hw_engine(pf->gt, pf->consumer.engine_class,
606 pf->consumer.engine_instance, false);
607 if (!hwe || xe_hw_engine_is_reserved(hwe))
608 return;
609
610 e = kzalloc_obj(*e);
611 if (!e) {
612 drm_warn(&vm->xe->drm,
613 "Could not allocate memory for fault!\n");
614 return;
615 }
616
617 guard(spinlock)(&vm->faults.lock);
618
619 /*
620 * Limit the number of faults in the fault list to prevent
621 * memory overuse.
622 */
623 if (vm->faults.len >= MAX_FAULTS_SAVED_PER_VM) {
624 kfree(e);
625 return;
626 }
627
628 e->address = pf->consumer.page_addr;
629 /*
630 * TODO:
631 * Address precision is currently always SZ_4K, but this may change
632 * in the future.
633 */
634 e->address_precision = SZ_4K;
635 e->access_type = pf->consumer.access_type;
636 e->fault_type = FIELD_GET(XE_PAGEFAULT_TYPE_MASK,
637 pf->consumer.fault_type_level),
638 e->fault_level = FIELD_GET(XE_PAGEFAULT_LEVEL_MASK,
639 pf->consumer.fault_type_level),
640
641 list_add_tail(&e->list, &vm->faults.list);
642 vm->faults.len++;
643 }
644
xe_vm_clear_fault_entries(struct xe_vm * vm)645 static void xe_vm_clear_fault_entries(struct xe_vm *vm)
646 {
647 struct xe_vm_fault_entry *e, *tmp;
648
649 guard(spinlock)(&vm->faults.lock);
650 list_for_each_entry_safe(e, tmp, &vm->faults.list, list) {
651 list_del(&e->list);
652 kfree(e);
653 }
654 vm->faults.len = 0;
655 }
656
xe_vma_ops_alloc(struct xe_vma_ops * vops,bool array_of_binds)657 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
658 {
659 int i;
660
661 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
662 if (!vops->pt_update_ops[i].num_ops)
663 continue;
664
665 vops->pt_update_ops[i].ops =
666 kmalloc_objs(*vops->pt_update_ops[i].ops,
667 vops->pt_update_ops[i].num_ops,
668 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
669 if (!vops->pt_update_ops[i].ops)
670 return array_of_binds ? -ENOBUFS : -ENOMEM;
671 }
672
673 return 0;
674 }
675 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
676
xe_vma_svm_prefetch_op_fini(struct xe_vma_op * op)677 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op)
678 {
679 struct xe_vma *vma;
680
681 vma = gpuva_to_vma(op->base.prefetch.va);
682
683 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma))
684 xa_destroy(&op->prefetch_range.range);
685 }
686
xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops * vops)687 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops)
688 {
689 struct xe_vma_op *op;
690
691 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
692 return;
693
694 list_for_each_entry(op, &vops->list, link)
695 xe_vma_svm_prefetch_op_fini(op);
696 }
697
xe_vma_ops_fini(struct xe_vma_ops * vops)698 static void xe_vma_ops_fini(struct xe_vma_ops *vops)
699 {
700 int i;
701
702 xe_vma_svm_prefetch_ops_fini(vops);
703
704 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
705 kfree(vops->pt_update_ops[i].ops);
706 }
707
xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops * vops,u8 tile_mask,int inc_val)708 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val)
709 {
710 int i;
711
712 if (!inc_val)
713 return;
714
715 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
716 if (BIT(i) & tile_mask)
717 vops->pt_update_ops[i].num_ops += inc_val;
718 }
719
720 #define XE_VMA_CREATE_MASK ( \
721 XE_VMA_READ_ONLY | \
722 XE_VMA_DUMPABLE | \
723 XE_VMA_SYSTEM_ALLOCATOR | \
724 DRM_GPUVA_SPARSE | \
725 XE_VMA_MADV_AUTORESET)
726
xe_vm_populate_rebind(struct xe_vma_op * op,struct xe_vma * vma,u8 tile_mask)727 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
728 u8 tile_mask)
729 {
730 INIT_LIST_HEAD(&op->link);
731 op->tile_mask = tile_mask;
732 op->base.op = DRM_GPUVA_OP_MAP;
733 op->base.map.va.addr = vma->gpuva.va.addr;
734 op->base.map.va.range = vma->gpuva.va.range;
735 op->base.map.gem.obj = vma->gpuva.gem.obj;
736 op->base.map.gem.offset = vma->gpuva.gem.offset;
737 op->map.vma = vma;
738 op->map.immediate = true;
739 op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK;
740 }
741
xe_vm_ops_add_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,u8 tile_mask)742 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
743 u8 tile_mask)
744 {
745 struct xe_vma_op *op;
746
747 op = kzalloc_obj(*op);
748 if (!op)
749 return -ENOMEM;
750
751 xe_vm_populate_rebind(op, vma, tile_mask);
752 list_add_tail(&op->link, &vops->list);
753 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
754
755 return 0;
756 }
757
758 static struct dma_fence *ops_execute(struct xe_vm *vm,
759 struct xe_vma_ops *vops);
760 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
761 struct xe_exec_queue *q,
762 struct xe_sync_entry *syncs, u32 num_syncs);
763
xe_vm_rebind(struct xe_vm * vm,bool rebind_worker)764 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
765 {
766 struct dma_fence *fence;
767 struct xe_vma *vma, *next;
768 struct xe_vma_ops vops;
769 struct xe_vma_op *op, *next_op;
770 int err, i;
771
772 lockdep_assert_held(&vm->lock);
773 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
774 list_empty(&vm->rebind_list))
775 return 0;
776
777 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
778 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
779 vops.pt_update_ops[i].wait_vm_bookkeep = true;
780
781 xe_vm_assert_held(vm);
782 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
783 xe_assert(vm->xe, vma->tile_present);
784
785 if (rebind_worker)
786 trace_xe_vma_rebind_worker(vma);
787 else
788 trace_xe_vma_rebind_exec(vma);
789
790 err = xe_vm_ops_add_rebind(&vops, vma,
791 vma->tile_present);
792 if (err)
793 goto free_ops;
794 }
795
796 err = xe_vma_ops_alloc(&vops, false);
797 if (err)
798 goto free_ops;
799
800 fence = ops_execute(vm, &vops);
801 if (IS_ERR(fence)) {
802 err = PTR_ERR(fence);
803 } else {
804 dma_fence_put(fence);
805 list_for_each_entry_safe(vma, next, &vm->rebind_list,
806 combined_links.rebind)
807 list_del_init(&vma->combined_links.rebind);
808 }
809 free_ops:
810 list_for_each_entry_safe(op, next_op, &vops.list, link) {
811 list_del(&op->link);
812 kfree(op);
813 }
814 xe_vma_ops_fini(&vops);
815
816 return err;
817 }
818
xe_vma_rebind(struct xe_vm * vm,struct xe_vma * vma,u8 tile_mask)819 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
820 {
821 struct dma_fence *fence = NULL;
822 struct xe_vma_ops vops;
823 struct xe_vma_op *op, *next_op;
824 struct xe_tile *tile;
825 u8 id;
826 int err;
827
828 lockdep_assert_held(&vm->lock);
829 xe_vm_assert_held(vm);
830 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
831
832 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
833 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT;
834 for_each_tile(tile, vm->xe, id) {
835 vops.pt_update_ops[id].wait_vm_bookkeep = true;
836 vops.pt_update_ops[tile->id].q =
837 xe_migrate_exec_queue(tile->migrate);
838 }
839
840 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
841 if (err)
842 return ERR_PTR(err);
843
844 err = xe_vma_ops_alloc(&vops, false);
845 if (err) {
846 fence = ERR_PTR(err);
847 goto free_ops;
848 }
849
850 fence = ops_execute(vm, &vops);
851
852 free_ops:
853 list_for_each_entry_safe(op, next_op, &vops.list, link) {
854 list_del(&op->link);
855 kfree(op);
856 }
857 xe_vma_ops_fini(&vops);
858
859 return fence;
860 }
861
xe_vm_populate_range_rebind(struct xe_vma_op * op,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)862 static void xe_vm_populate_range_rebind(struct xe_vma_op *op,
863 struct xe_vma *vma,
864 struct xe_svm_range *range,
865 u8 tile_mask)
866 {
867 INIT_LIST_HEAD(&op->link);
868 op->tile_mask = tile_mask;
869 op->base.op = DRM_GPUVA_OP_DRIVER;
870 op->subop = XE_VMA_SUBOP_MAP_RANGE;
871 op->map_range.vma = vma;
872 op->map_range.range = range;
873 }
874
875 static int
xe_vm_ops_add_range_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)876 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops,
877 struct xe_vma *vma,
878 struct xe_svm_range *range,
879 u8 tile_mask)
880 {
881 struct xe_vma_op *op;
882
883 op = kzalloc_obj(*op);
884 if (!op)
885 return -ENOMEM;
886
887 xe_vm_populate_range_rebind(op, vma, range, tile_mask);
888 list_add_tail(&op->link, &vops->list);
889 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
890
891 return 0;
892 }
893
894 /**
895 * xe_vm_range_rebind() - VM range (re)bind
896 * @vm: The VM which the range belongs to.
897 * @vma: The VMA which the range belongs to.
898 * @range: SVM range to rebind.
899 * @tile_mask: Tile mask to bind the range to.
900 *
901 * (re)bind SVM range setting up GPU page tables for the range.
902 *
903 * Return: dma fence for rebind to signal completion on success, ERR_PTR on
904 * failure
905 */
xe_vm_range_rebind(struct xe_vm * vm,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)906 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
907 struct xe_vma *vma,
908 struct xe_svm_range *range,
909 u8 tile_mask)
910 {
911 struct dma_fence *fence = NULL;
912 struct xe_vma_ops vops;
913 struct xe_vma_op *op, *next_op;
914 struct xe_tile *tile;
915 u8 id;
916 int err;
917
918 lockdep_assert_held(&vm->lock);
919 xe_vm_assert_held(vm);
920 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
921 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
922
923 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
924 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT;
925 for_each_tile(tile, vm->xe, id) {
926 vops.pt_update_ops[id].wait_vm_bookkeep = true;
927 vops.pt_update_ops[tile->id].q =
928 xe_migrate_exec_queue(tile->migrate);
929 }
930
931 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask);
932 if (err)
933 return ERR_PTR(err);
934
935 err = xe_vma_ops_alloc(&vops, false);
936 if (err) {
937 fence = ERR_PTR(err);
938 goto free_ops;
939 }
940
941 fence = ops_execute(vm, &vops);
942
943 free_ops:
944 list_for_each_entry_safe(op, next_op, &vops.list, link) {
945 list_del(&op->link);
946 kfree(op);
947 }
948 xe_vma_ops_fini(&vops);
949
950 return fence;
951 }
952
xe_vm_populate_range_unbind(struct xe_vma_op * op,struct xe_svm_range * range)953 static void xe_vm_populate_range_unbind(struct xe_vma_op *op,
954 struct xe_svm_range *range)
955 {
956 INIT_LIST_HEAD(&op->link);
957 op->tile_mask = range->tile_present;
958 op->base.op = DRM_GPUVA_OP_DRIVER;
959 op->subop = XE_VMA_SUBOP_UNMAP_RANGE;
960 op->unmap_range.range = range;
961 }
962
963 static int
xe_vm_ops_add_range_unbind(struct xe_vma_ops * vops,struct xe_svm_range * range)964 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops,
965 struct xe_svm_range *range)
966 {
967 struct xe_vma_op *op;
968
969 op = kzalloc_obj(*op);
970 if (!op)
971 return -ENOMEM;
972
973 xe_vm_populate_range_unbind(op, range);
974 list_add_tail(&op->link, &vops->list);
975 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1);
976
977 return 0;
978 }
979
980 /**
981 * xe_vm_range_unbind() - VM range unbind
982 * @vm: The VM which the range belongs to.
983 * @range: SVM range to rebind.
984 *
985 * Unbind SVM range removing the GPU page tables for the range.
986 *
987 * Return: dma fence for unbind to signal completion on success, ERR_PTR on
988 * failure
989 */
xe_vm_range_unbind(struct xe_vm * vm,struct xe_svm_range * range)990 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
991 struct xe_svm_range *range)
992 {
993 struct dma_fence *fence = NULL;
994 struct xe_vma_ops vops;
995 struct xe_vma_op *op, *next_op;
996 struct xe_tile *tile;
997 u8 id;
998 int err;
999
1000 lockdep_assert_held(&vm->lock);
1001 xe_vm_assert_held(vm);
1002 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
1003
1004 if (!range->tile_present)
1005 return dma_fence_get_stub();
1006
1007 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
1008 for_each_tile(tile, vm->xe, id) {
1009 vops.pt_update_ops[id].wait_vm_bookkeep = true;
1010 vops.pt_update_ops[tile->id].q =
1011 xe_migrate_exec_queue(tile->migrate);
1012 }
1013
1014 err = xe_vm_ops_add_range_unbind(&vops, range);
1015 if (err)
1016 return ERR_PTR(err);
1017
1018 err = xe_vma_ops_alloc(&vops, false);
1019 if (err) {
1020 fence = ERR_PTR(err);
1021 goto free_ops;
1022 }
1023
1024 fence = ops_execute(vm, &vops);
1025
1026 free_ops:
1027 list_for_each_entry_safe(op, next_op, &vops.list, link) {
1028 list_del(&op->link);
1029 kfree(op);
1030 }
1031 xe_vma_ops_fini(&vops);
1032
1033 return fence;
1034 }
1035
xe_vma_mem_attr_fini(struct xe_vma_mem_attr * attr)1036 static void xe_vma_mem_attr_fini(struct xe_vma_mem_attr *attr)
1037 {
1038 drm_pagemap_put(attr->preferred_loc.dpagemap);
1039 }
1040
xe_vma_free(struct xe_vma * vma)1041 static void xe_vma_free(struct xe_vma *vma)
1042 {
1043 xe_vma_mem_attr_fini(&vma->attr);
1044
1045 if (xe_vma_is_userptr(vma))
1046 kfree(to_userptr_vma(vma));
1047 else
1048 kfree(vma);
1049 }
1050
1051 /**
1052 * xe_vma_mem_attr_copy() - copy an xe_vma_mem_attr structure.
1053 * @to: Destination.
1054 * @from: Source.
1055 *
1056 * Copies an xe_vma_mem_attr structure taking care to get reference
1057 * counting of individual members right.
1058 */
xe_vma_mem_attr_copy(struct xe_vma_mem_attr * to,struct xe_vma_mem_attr * from)1059 void xe_vma_mem_attr_copy(struct xe_vma_mem_attr *to, struct xe_vma_mem_attr *from)
1060 {
1061 xe_vma_mem_attr_fini(to);
1062 *to = *from;
1063 if (to->preferred_loc.dpagemap)
1064 drm_pagemap_get(to->preferred_loc.dpagemap);
1065 }
1066
xe_vma_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 start,u64 end,struct xe_vma_mem_attr * attr,unsigned int flags)1067 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
1068 struct xe_bo *bo,
1069 u64 bo_offset_or_userptr,
1070 u64 start, u64 end,
1071 struct xe_vma_mem_attr *attr,
1072 unsigned int flags)
1073 {
1074 struct xe_vma *vma;
1075 struct xe_tile *tile;
1076 u8 id;
1077 bool is_null = (flags & DRM_GPUVA_SPARSE);
1078 bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR);
1079
1080 xe_assert(vm->xe, start < end);
1081 xe_assert(vm->xe, end < vm->size);
1082
1083 /*
1084 * Allocate and ensure that the xe_vma_is_userptr() return
1085 * matches what was allocated.
1086 */
1087 if (!bo && !is_null && !is_cpu_addr_mirror) {
1088 struct xe_userptr_vma *uvma = kzalloc_obj(*uvma);
1089
1090 if (!uvma)
1091 return ERR_PTR(-ENOMEM);
1092
1093 vma = &uvma->vma;
1094 } else {
1095 vma = kzalloc_obj(*vma);
1096 if (!vma)
1097 return ERR_PTR(-ENOMEM);
1098
1099 if (bo)
1100 vma->gpuva.gem.obj = &bo->ttm.base;
1101 }
1102
1103 INIT_LIST_HEAD(&vma->combined_links.rebind);
1104
1105 INIT_LIST_HEAD(&vma->gpuva.gem.entry);
1106 vma->gpuva.vm = &vm->gpuvm;
1107 vma->gpuva.va.addr = start;
1108 vma->gpuva.va.range = end - start + 1;
1109 vma->gpuva.flags = flags;
1110
1111 for_each_tile(tile, vm->xe, id)
1112 vma->tile_mask |= 0x1 << id;
1113
1114 if (vm->xe->info.has_atomic_enable_pte_bit)
1115 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
1116
1117 xe_vma_mem_attr_copy(&vma->attr, attr);
1118 if (bo) {
1119 struct drm_gpuvm_bo *vm_bo;
1120
1121 xe_bo_assert_held(bo);
1122
1123 vm_bo = drm_gpuvm_bo_obtain_locked(vma->gpuva.vm, &bo->ttm.base);
1124 if (IS_ERR(vm_bo)) {
1125 xe_vma_free(vma);
1126 return ERR_CAST(vm_bo);
1127 }
1128
1129 drm_gpuvm_bo_extobj_add(vm_bo);
1130 drm_gem_object_get(&bo->ttm.base);
1131 vma->gpuva.gem.offset = bo_offset_or_userptr;
1132 drm_gpuva_link(&vma->gpuva, vm_bo);
1133 drm_gpuvm_bo_put(vm_bo);
1134 } else /* userptr or null */ {
1135 if (!is_null && !is_cpu_addr_mirror) {
1136 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1137 u64 size = end - start + 1;
1138 int err;
1139
1140 vma->gpuva.gem.offset = bo_offset_or_userptr;
1141
1142 err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size);
1143 if (err) {
1144 xe_vma_free(vma);
1145 return ERR_PTR(err);
1146 }
1147 }
1148
1149 xe_vm_get(vm);
1150 }
1151
1152 return vma;
1153 }
1154
xe_vma_destroy_late(struct xe_vma * vma)1155 static void xe_vma_destroy_late(struct xe_vma *vma)
1156 {
1157 struct xe_vm *vm = xe_vma_vm(vma);
1158 struct xe_bo *bo = xe_vma_bo(vma);
1159
1160 if (vma->ufence) {
1161 xe_sync_ufence_put(vma->ufence);
1162 vma->ufence = NULL;
1163 }
1164
1165 if (xe_vma_is_userptr(vma)) {
1166 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1167
1168 xe_userptr_remove(uvma);
1169 xe_vm_put(vm);
1170 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) {
1171 xe_vm_put(vm);
1172 } else {
1173 xe_bo_put(bo);
1174 }
1175
1176 xe_vma_free(vma);
1177 }
1178
vma_destroy_work_func(struct work_struct * w)1179 static void vma_destroy_work_func(struct work_struct *w)
1180 {
1181 struct xe_vma *vma =
1182 container_of(w, struct xe_vma, destroy_work);
1183
1184 xe_vma_destroy_late(vma);
1185 }
1186
vma_destroy_cb(struct dma_fence * fence,struct dma_fence_cb * cb)1187 static void vma_destroy_cb(struct dma_fence *fence,
1188 struct dma_fence_cb *cb)
1189 {
1190 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1191
1192 INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1193 queue_work(system_dfl_wq, &vma->destroy_work);
1194 }
1195
xe_vma_destroy(struct xe_vma * vma,struct dma_fence * fence)1196 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1197 {
1198 struct xe_vm *vm = xe_vma_vm(vma);
1199 struct xe_bo *bo = xe_vma_bo(vma);
1200
1201 lockdep_assert_held_write(&vm->lock);
1202 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1203
1204 if (xe_vma_is_userptr(vma)) {
1205 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1206 xe_userptr_destroy(to_userptr_vma(vma));
1207 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) {
1208 xe_bo_assert_held(bo);
1209
1210 drm_gpuva_unlink(&vma->gpuva);
1211 xe_bo_recompute_purgeable_state(bo);
1212 }
1213
1214 xe_vm_assert_held(vm);
1215 if (fence) {
1216 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1217 vma_destroy_cb);
1218
1219 if (ret) {
1220 XE_WARN_ON(ret != -ENOENT);
1221 xe_vma_destroy_late(vma);
1222 }
1223 } else {
1224 xe_vma_destroy_late(vma);
1225 }
1226 }
1227
1228 /**
1229 * xe_vm_lock_vma() - drm_exec utility to lock a vma
1230 * @exec: The drm_exec object we're currently locking for.
1231 * @vma: The vma for witch we want to lock the vm resv and any attached
1232 * object's resv.
1233 *
1234 * Return: 0 on success, negative error code on error. In particular
1235 * may return -EDEADLK on WW transaction contention and -EINTR if
1236 * an interruptible wait is terminated by a signal.
1237 */
xe_vm_lock_vma(struct drm_exec * exec,struct xe_vma * vma)1238 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
1239 {
1240 struct xe_vm *vm = xe_vma_vm(vma);
1241 struct xe_bo *bo = xe_vma_bo(vma);
1242 int err;
1243
1244 XE_WARN_ON(!vm);
1245
1246 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1247 if (!err && bo && !bo->vm)
1248 err = drm_exec_lock_obj(exec, &bo->ttm.base);
1249
1250 return err;
1251 }
1252
xe_vma_destroy_unlocked(struct xe_vma * vma)1253 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1254 {
1255 struct xe_device *xe = xe_vma_vm(vma)->xe;
1256 struct xe_validation_ctx ctx;
1257 struct drm_exec exec;
1258 int err = 0;
1259
1260 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
1261 err = xe_vm_lock_vma(&exec, vma);
1262 drm_exec_retry_on_contention(&exec);
1263 if (XE_WARN_ON(err))
1264 break;
1265 xe_vma_destroy(vma, NULL);
1266 }
1267 xe_assert(xe, !err);
1268 }
1269
1270 struct xe_vma *
xe_vm_find_overlapping_vma(struct xe_vm * vm,u64 start,u64 range)1271 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1272 {
1273 struct drm_gpuva *gpuva;
1274
1275 lockdep_assert_held(&vm->lock);
1276
1277 if (xe_vm_is_closed_or_banned(vm))
1278 return NULL;
1279
1280 xe_assert(vm->xe, start + range <= vm->size);
1281
1282 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1283
1284 return gpuva ? gpuva_to_vma(gpuva) : NULL;
1285 }
1286
xe_vm_insert_vma(struct xe_vm * vm,struct xe_vma * vma)1287 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1288 {
1289 int err;
1290
1291 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1292 lockdep_assert_held(&vm->lock);
1293
1294 mutex_lock(&vm->snap_mutex);
1295 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1296 mutex_unlock(&vm->snap_mutex);
1297 XE_WARN_ON(err); /* Shouldn't be possible */
1298
1299 return err;
1300 }
1301
xe_vm_remove_vma(struct xe_vm * vm,struct xe_vma * vma)1302 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1303 {
1304 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1305 lockdep_assert_held(&vm->lock);
1306
1307 mutex_lock(&vm->snap_mutex);
1308 drm_gpuva_remove(&vma->gpuva);
1309 mutex_unlock(&vm->snap_mutex);
1310 if (vm->usm.last_fault_vma == vma)
1311 vm->usm.last_fault_vma = NULL;
1312 }
1313
xe_vm_op_alloc(void)1314 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1315 {
1316 struct xe_vma_op *op;
1317
1318 op = kzalloc_obj(*op);
1319
1320 if (unlikely(!op))
1321 return NULL;
1322
1323 return &op->base;
1324 }
1325
1326 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1327
1328 static const struct drm_gpuvm_ops gpuvm_ops = {
1329 .op_alloc = xe_vm_op_alloc,
1330 .vm_bo_validate = xe_gpuvm_validate,
1331 .vm_free = xe_vm_free,
1332 };
1333
pde_encode_pat_index(u16 pat_index)1334 static u64 pde_encode_pat_index(u16 pat_index)
1335 {
1336 u64 pte = 0;
1337
1338 if (pat_index & BIT(0))
1339 pte |= XE_PPGTT_PTE_PAT0;
1340
1341 if (pat_index & BIT(1))
1342 pte |= XE_PPGTT_PTE_PAT1;
1343
1344 return pte;
1345 }
1346
pte_encode_pat_index(u16 pat_index,u32 pt_level)1347 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
1348 {
1349 u64 pte = 0;
1350
1351 if (pat_index & BIT(0))
1352 pte |= XE_PPGTT_PTE_PAT0;
1353
1354 if (pat_index & BIT(1))
1355 pte |= XE_PPGTT_PTE_PAT1;
1356
1357 if (pat_index & BIT(2)) {
1358 if (pt_level)
1359 pte |= XE_PPGTT_PDE_PDPE_PAT2;
1360 else
1361 pte |= XE_PPGTT_PTE_PAT2;
1362 }
1363
1364 if (pat_index & BIT(3))
1365 pte |= XELPG_PPGTT_PTE_PAT3;
1366
1367 if (pat_index & (BIT(4)))
1368 pte |= XE2_PPGTT_PTE_PAT4;
1369
1370 return pte;
1371 }
1372
pte_encode_ps(u32 pt_level)1373 static u64 pte_encode_ps(u32 pt_level)
1374 {
1375 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1376
1377 if (pt_level == 1)
1378 return XE_PDE_PS_2M;
1379 else if (pt_level == 2)
1380 return XE_PDPE_PS_1G;
1381
1382 return 0;
1383 }
1384
pde_pat_index(struct xe_bo * bo)1385 static u16 pde_pat_index(struct xe_bo *bo)
1386 {
1387 struct xe_device *xe = xe_bo_device(bo);
1388 u16 pat_index;
1389
1390 /*
1391 * We only have two bits to encode the PAT index in non-leaf nodes, but
1392 * these only point to other paging structures so we only need a minimal
1393 * selection of options. The user PAT index is only for encoding leaf
1394 * nodes, where we have use of more bits to do the encoding. The
1395 * non-leaf nodes are instead under driver control so the chosen index
1396 * here should be distinct from the user PAT index. Also the
1397 * corresponding coherency of the PAT index should be tied to the
1398 * allocation type of the page table (or at least we should pick
1399 * something which is always safe).
1400 */
1401 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached)
1402 pat_index = xe->pat.idx[XE_CACHE_WB];
1403 else
1404 pat_index = xe->pat.idx[XE_CACHE_NONE];
1405
1406 xe_assert(xe, pat_index <= 3);
1407
1408 return pat_index;
1409 }
1410
xelp_pde_encode_bo(struct xe_bo * bo,u64 bo_offset)1411 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset)
1412 {
1413 u64 pde;
1414
1415 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1416 pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1417 pde |= pde_encode_pat_index(pde_pat_index(bo));
1418
1419 return pde;
1420 }
1421
xelp_pte_encode_bo(struct xe_bo * bo,u64 bo_offset,u16 pat_index,u32 pt_level)1422 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1423 u16 pat_index, u32 pt_level)
1424 {
1425 u64 pte;
1426
1427 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1428 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1429 pte |= pte_encode_pat_index(pat_index, pt_level);
1430 pte |= pte_encode_ps(pt_level);
1431
1432 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1433 pte |= XE_PPGTT_PTE_DM;
1434
1435 return pte;
1436 }
1437
xelp_pte_encode_vma(u64 pte,struct xe_vma * vma,u16 pat_index,u32 pt_level)1438 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1439 u16 pat_index, u32 pt_level)
1440 {
1441 struct xe_bo *bo = xe_vma_bo(vma);
1442 struct xe_vm *vm = xe_vma_vm(vma);
1443
1444 pte |= XE_PAGE_PRESENT;
1445
1446 if (likely(!xe_vma_read_only(vma)))
1447 pte |= XE_PAGE_RW;
1448
1449 pte |= pte_encode_pat_index(pat_index, pt_level);
1450 pte |= pte_encode_ps(pt_level);
1451
1452 /*
1453 * NULL PTEs redirect to scratch page (return zeros on read).
1454 * Set for: 1) explicit null VMAs, 2) purged BOs on scratch VMs.
1455 * Never set NULL flag without scratch page - causes undefined behavior.
1456 */
1457 if (unlikely(xe_vma_is_null(vma) ||
1458 (bo && xe_bo_is_purged(bo) && xe_vm_has_scratch(vm))))
1459 pte |= XE_PTE_NULL;
1460
1461 return pte;
1462 }
1463
xelp_pte_encode_addr(struct xe_device * xe,u64 addr,u16 pat_index,u32 pt_level,bool devmem,u64 flags)1464 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1465 u16 pat_index,
1466 u32 pt_level, bool devmem, u64 flags)
1467 {
1468 u64 pte;
1469
1470 /* Avoid passing random bits directly as flags */
1471 xe_assert(xe, !(flags & ~XE_PTE_PS64));
1472
1473 pte = addr;
1474 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1475 pte |= pte_encode_pat_index(pat_index, pt_level);
1476 pte |= pte_encode_ps(pt_level);
1477
1478 if (devmem)
1479 pte |= XE_PPGTT_PTE_DM;
1480
1481 pte |= flags;
1482
1483 return pte;
1484 }
1485
1486 static const struct xe_pt_ops xelp_pt_ops = {
1487 .pte_encode_bo = xelp_pte_encode_bo,
1488 .pte_encode_vma = xelp_pte_encode_vma,
1489 .pte_encode_addr = xelp_pte_encode_addr,
1490 .pde_encode_bo = xelp_pde_encode_bo,
1491 };
1492
1493 static void vm_destroy_work_func(struct work_struct *w);
1494
1495 /**
1496 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1497 * given tile and vm.
1498 * @xe: xe device.
1499 * @tile: tile to set up for.
1500 * @vm: vm to set up for.
1501 * @exec: The struct drm_exec object used to lock the vm resv.
1502 *
1503 * Sets up a pagetable tree with one page-table per level and a single
1504 * leaf PTE. All pagetable entries point to the single page-table or,
1505 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1506 * writes become NOPs.
1507 *
1508 * Return: 0 on success, negative error code on error.
1509 */
xe_vm_create_scratch(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,struct drm_exec * exec)1510 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1511 struct xe_vm *vm, struct drm_exec *exec)
1512 {
1513 u8 id = tile->id;
1514 int i;
1515
1516 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1517 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec);
1518 if (IS_ERR(vm->scratch_pt[id][i])) {
1519 int err = PTR_ERR(vm->scratch_pt[id][i]);
1520
1521 vm->scratch_pt[id][i] = NULL;
1522 return err;
1523 }
1524 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1525 }
1526
1527 return 0;
1528 }
1529 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO);
1530
xe_vm_free_scratch(struct xe_vm * vm)1531 static void xe_vm_free_scratch(struct xe_vm *vm)
1532 {
1533 struct xe_tile *tile;
1534 u8 id;
1535
1536 if (!xe_vm_has_scratch(vm))
1537 return;
1538
1539 for_each_tile(tile, vm->xe, id) {
1540 u32 i;
1541
1542 if (!vm->pt_root[id])
1543 continue;
1544
1545 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1546 if (vm->scratch_pt[id][i])
1547 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1548 }
1549 }
1550
xe_vm_pt_destroy(struct xe_vm * vm)1551 static void xe_vm_pt_destroy(struct xe_vm *vm)
1552 {
1553 struct xe_tile *tile;
1554 u8 id;
1555
1556 xe_vm_assert_held(vm);
1557
1558 for_each_tile(tile, vm->xe, id) {
1559 if (vm->pt_root[id]) {
1560 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1561 vm->pt_root[id] = NULL;
1562 }
1563 }
1564 }
1565
xe_vm_init_prove_locking(struct xe_device * xe,struct xe_vm * vm)1566 static void xe_vm_init_prove_locking(struct xe_device *xe, struct xe_vm *vm)
1567 {
1568 if (!IS_ENABLED(CONFIG_PROVE_LOCKING))
1569 return;
1570
1571 fs_reclaim_acquire(GFP_KERNEL);
1572 might_lock(&vm->exec_queues.lock);
1573 fs_reclaim_release(GFP_KERNEL);
1574
1575 down_read(&vm->exec_queues.lock);
1576 might_lock(&xe_root_mmio_gt(xe)->uc.guc.ct.lock);
1577 up_read(&vm->exec_queues.lock);
1578 }
1579
xe_vm_create(struct xe_device * xe,u32 flags,struct xe_file * xef)1580 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
1581 {
1582 struct drm_gem_object *vm_resv_obj;
1583 struct xe_validation_ctx ctx;
1584 struct drm_exec exec;
1585 struct xe_vm *vm;
1586 int err;
1587 struct xe_tile *tile;
1588 u8 id;
1589
1590 /*
1591 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to
1592 * ever be in faulting mode.
1593 */
1594 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE)));
1595
1596 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1597 if (!vm)
1598 return ERR_PTR(-ENOMEM);
1599
1600 vm->xe = xe;
1601
1602 vm->size = 1ull << xe->info.va_bits;
1603 vm->flags = flags;
1604
1605 if (xef)
1606 vm->xef = xe_file_get(xef);
1607 /**
1608 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be
1609 * manipulated under the PXP mutex. However, the PXP mutex can be taken
1610 * under a user-VM lock when the PXP session is started at exec_queue
1611 * creation time. Those are different VMs and therefore there is no risk
1612 * of deadlock, but we need to tell lockdep that this is the case or it
1613 * will print a warning.
1614 */
1615 if (flags & XE_VM_FLAG_GSC) {
1616 static struct lock_class_key gsc_vm_key;
1617
1618 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key);
1619 } else {
1620 init_rwsem(&vm->lock);
1621 }
1622 mutex_init(&vm->snap_mutex);
1623
1624 INIT_LIST_HEAD(&vm->rebind_list);
1625
1626 INIT_LIST_HEAD(&vm->userptr.repin_list);
1627 INIT_LIST_HEAD(&vm->userptr.invalidated);
1628 spin_lock_init(&vm->userptr.invalidated_lock);
1629
1630 INIT_LIST_HEAD(&vm->faults.list);
1631 spin_lock_init(&vm->faults.lock);
1632
1633 ttm_lru_bulk_move_init(&vm->lru_bulk_move);
1634
1635 INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1636
1637 INIT_LIST_HEAD(&vm->preempt.exec_queues);
1638 for (id = 0; id < XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE; ++id)
1639 INIT_LIST_HEAD(&vm->exec_queues.list[id]);
1640 if (flags & XE_VM_FLAG_FAULT_MODE)
1641 vm->preempt.min_run_period_ms = xe->min_run_period_pf_ms;
1642 else
1643 vm->preempt.min_run_period_ms = xe->min_run_period_lr_ms;
1644
1645 init_rwsem(&vm->exec_queues.lock);
1646 xe_vm_init_prove_locking(xe, vm);
1647
1648 for_each_tile(tile, xe, id)
1649 xe_range_fence_tree_init(&vm->rftree[id]);
1650
1651 vm->pt_ops = &xelp_pt_ops;
1652
1653 /*
1654 * Long-running workloads are not protected by the scheduler references.
1655 * By design, run_job for long-running workloads returns NULL and the
1656 * scheduler drops all the references of it, hence protecting the VM
1657 * for this case is necessary.
1658 */
1659 if (flags & XE_VM_FLAG_LR_MODE) {
1660 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1661 xe_pm_runtime_get_noresume(xe);
1662 INIT_LIST_HEAD(&vm->preempt.pm_activate_link);
1663 }
1664
1665 err = xe_svm_init(vm);
1666 if (err)
1667 goto err_no_resv;
1668
1669 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1670 if (!vm_resv_obj) {
1671 err = -ENOMEM;
1672 goto err_svm_fini;
1673 }
1674
1675 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1676 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1677
1678 drm_gem_object_put(vm_resv_obj);
1679
1680 err = 0;
1681 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
1682 err) {
1683 err = xe_vm_drm_exec_lock(vm, &exec);
1684 drm_exec_retry_on_contention(&exec);
1685
1686 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1687 vm->flags |= XE_VM_FLAG_64K;
1688
1689 for_each_tile(tile, xe, id) {
1690 if (flags & XE_VM_FLAG_MIGRATION &&
1691 tile->id != XE_VM_FLAG_TILE_ID(flags))
1692 continue;
1693
1694 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level,
1695 &exec);
1696 if (IS_ERR(vm->pt_root[id])) {
1697 err = PTR_ERR(vm->pt_root[id]);
1698 vm->pt_root[id] = NULL;
1699 xe_vm_pt_destroy(vm);
1700 drm_exec_retry_on_contention(&exec);
1701 xe_validation_retry_on_oom(&ctx, &err);
1702 break;
1703 }
1704 }
1705 if (err)
1706 break;
1707
1708 if (xe_vm_has_scratch(vm)) {
1709 for_each_tile(tile, xe, id) {
1710 if (!vm->pt_root[id])
1711 continue;
1712
1713 err = xe_vm_create_scratch(xe, tile, vm, &exec);
1714 if (err) {
1715 xe_vm_free_scratch(vm);
1716 xe_vm_pt_destroy(vm);
1717 drm_exec_retry_on_contention(&exec);
1718 xe_validation_retry_on_oom(&ctx, &err);
1719 break;
1720 }
1721 }
1722 if (err)
1723 break;
1724 vm->batch_invalidate_tlb = true;
1725 }
1726
1727 if (vm->flags & XE_VM_FLAG_LR_MODE) {
1728 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1729 vm->batch_invalidate_tlb = false;
1730 }
1731
1732 /* Fill pt_root after allocating scratch tables */
1733 for_each_tile(tile, xe, id) {
1734 if (!vm->pt_root[id])
1735 continue;
1736
1737 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1738 }
1739 }
1740 if (err)
1741 goto err_close;
1742
1743 /* Kernel migration VM shouldn't have a circular loop.. */
1744 if (!(flags & XE_VM_FLAG_MIGRATION)) {
1745 for_each_tile(tile, xe, id) {
1746 struct xe_exec_queue *q;
1747 u32 create_flags = EXEC_QUEUE_FLAG_VM;
1748
1749 if (!vm->pt_root[id])
1750 continue;
1751
1752 if (!xef) /* Not from userspace */
1753 create_flags |= EXEC_QUEUE_FLAG_KERNEL;
1754
1755 q = xe_exec_queue_create_bind(xe, tile, vm, create_flags, 0);
1756 if (IS_ERR(q)) {
1757 err = PTR_ERR(q);
1758 goto err_close;
1759 }
1760 vm->q[id] = q;
1761 }
1762 }
1763
1764 if (xef && xe->info.has_asid) {
1765 u32 asid;
1766
1767 down_write(&xe->usm.lock);
1768 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1769 XA_LIMIT(1, XE_MAX_ASID - 1),
1770 &xe->usm.next_asid, GFP_NOWAIT);
1771 up_write(&xe->usm.lock);
1772 if (err < 0)
1773 goto err_close;
1774
1775 vm->usm.asid = asid;
1776 }
1777
1778 trace_xe_vm_create(vm);
1779
1780 return vm;
1781
1782 err_close:
1783 xe_vm_close_and_put(vm);
1784 return ERR_PTR(err);
1785
1786 err_svm_fini:
1787 if (flags & XE_VM_FLAG_FAULT_MODE) {
1788 vm->size = 0; /* close the vm */
1789 xe_svm_fini(vm);
1790 }
1791 err_no_resv:
1792 mutex_destroy(&vm->snap_mutex);
1793 for_each_tile(tile, xe, id)
1794 xe_range_fence_tree_fini(&vm->rftree[id]);
1795 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1796 if (vm->xef)
1797 xe_file_put(vm->xef);
1798 kfree(vm);
1799 if (flags & XE_VM_FLAG_LR_MODE)
1800 xe_pm_runtime_put(xe);
1801 return ERR_PTR(err);
1802 }
1803
xe_vm_close(struct xe_vm * vm)1804 static void xe_vm_close(struct xe_vm *vm)
1805 {
1806 struct xe_device *xe = vm->xe;
1807 bool bound;
1808 int idx;
1809
1810 bound = drm_dev_enter(&xe->drm, &idx);
1811
1812 down_write(&vm->lock);
1813 if (xe_vm_in_fault_mode(vm))
1814 xe_svm_notifier_lock(vm);
1815
1816 vm->size = 0;
1817
1818 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) {
1819 struct xe_tile *tile;
1820 struct xe_gt *gt;
1821 u8 id;
1822
1823 /* Wait for pending binds */
1824 dma_resv_wait_timeout(xe_vm_resv(vm),
1825 DMA_RESV_USAGE_BOOKKEEP,
1826 false, MAX_SCHEDULE_TIMEOUT);
1827
1828 if (bound) {
1829 for_each_tile(tile, xe, id)
1830 if (vm->pt_root[id])
1831 xe_pt_clear(xe, vm->pt_root[id]);
1832
1833 for_each_gt(gt, xe, id)
1834 xe_tlb_inval_vm(>->tlb_inval, vm);
1835 }
1836 }
1837
1838 if (xe_vm_in_fault_mode(vm))
1839 xe_svm_notifier_unlock(vm);
1840 up_write(&vm->lock);
1841
1842 if (bound)
1843 drm_dev_exit(idx);
1844 }
1845
xe_vm_close_and_put(struct xe_vm * vm)1846 void xe_vm_close_and_put(struct xe_vm *vm)
1847 {
1848 LIST_HEAD(contested);
1849 struct xe_device *xe = vm->xe;
1850 struct xe_tile *tile;
1851 struct xe_vma *vma, *next_vma;
1852 struct drm_gpuva *gpuva, *next;
1853 u8 id;
1854
1855 xe_assert(xe, !vm->preempt.num_exec_queues);
1856
1857 xe_vm_close(vm);
1858 if (xe_vm_in_preempt_fence_mode(vm)) {
1859 mutex_lock(&xe->rebind_resume_lock);
1860 list_del_init(&vm->preempt.pm_activate_link);
1861 mutex_unlock(&xe->rebind_resume_lock);
1862 flush_work(&vm->preempt.rebind_work);
1863 }
1864 if (xe_vm_in_fault_mode(vm))
1865 xe_svm_close(vm);
1866
1867 down_write(&vm->lock);
1868 for_each_tile(tile, xe, id) {
1869 if (vm->q[id]) {
1870 int i;
1871
1872 xe_exec_queue_last_fence_put(vm->q[id], vm);
1873 for_each_tlb_inval(i)
1874 xe_exec_queue_tlb_inval_last_fence_put(vm->q[id], vm, i);
1875 }
1876 }
1877 up_write(&vm->lock);
1878
1879 for_each_tile(tile, xe, id) {
1880 if (vm->q[id]) {
1881 xe_exec_queue_kill(vm->q[id]);
1882 xe_exec_queue_put(vm->q[id]);
1883 vm->q[id] = NULL;
1884 }
1885 }
1886
1887 down_write(&vm->lock);
1888 xe_vm_lock(vm, false);
1889 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1890 vma = gpuva_to_vma(gpuva);
1891
1892 if (xe_vma_has_no_bo(vma)) {
1893 xe_svm_notifier_lock(vm);
1894 vma->gpuva.flags |= XE_VMA_DESTROYED;
1895 xe_svm_notifier_unlock(vm);
1896 }
1897
1898 xe_vm_remove_vma(vm, vma);
1899
1900 /* easy case, remove from VMA? */
1901 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1902 list_del_init(&vma->combined_links.rebind);
1903 xe_vma_destroy(vma, NULL);
1904 continue;
1905 }
1906
1907 list_move_tail(&vma->combined_links.destroy, &contested);
1908 vma->gpuva.flags |= XE_VMA_DESTROYED;
1909 }
1910
1911 /*
1912 * All vm operations will add shared fences to resv.
1913 * The only exception is eviction for a shared object,
1914 * but even so, the unbind when evicted would still
1915 * install a fence to resv. Hence it's safe to
1916 * destroy the pagetables immediately.
1917 */
1918 xe_vm_free_scratch(vm);
1919 xe_vm_pt_destroy(vm);
1920 xe_vm_unlock(vm);
1921
1922 /*
1923 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1924 * Since we hold a refcount to the bo, we can remove and free
1925 * the members safely without locking.
1926 */
1927 list_for_each_entry_safe(vma, next_vma, &contested,
1928 combined_links.destroy) {
1929 list_del_init(&vma->combined_links.destroy);
1930 xe_vma_destroy_unlocked(vma);
1931 }
1932
1933 xe_svm_fini(vm);
1934
1935 up_write(&vm->lock);
1936
1937 down_write(&xe->usm.lock);
1938 if (vm->usm.asid) {
1939 void *lookup;
1940
1941 xe_assert(xe, xe->info.has_asid);
1942 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
1943
1944 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1945 xe_assert(xe, lookup == vm);
1946 }
1947 up_write(&xe->usm.lock);
1948
1949 xe_vm_clear_fault_entries(vm);
1950
1951 for_each_tile(tile, xe, id)
1952 xe_range_fence_tree_fini(&vm->rftree[id]);
1953
1954 xe_vm_put(vm);
1955 }
1956
vm_destroy_work_func(struct work_struct * w)1957 static void vm_destroy_work_func(struct work_struct *w)
1958 {
1959 struct xe_vm *vm =
1960 container_of(w, struct xe_vm, destroy_work);
1961 struct xe_device *xe = vm->xe;
1962 struct xe_tile *tile;
1963 u8 id;
1964
1965 /* xe_vm_close_and_put was not called? */
1966 xe_assert(xe, !vm->size);
1967
1968 if (xe_vm_in_preempt_fence_mode(vm))
1969 flush_work(&vm->preempt.rebind_work);
1970
1971 mutex_destroy(&vm->snap_mutex);
1972
1973 if (vm->flags & XE_VM_FLAG_LR_MODE)
1974 xe_pm_runtime_put(xe);
1975
1976 for_each_tile(tile, xe, id)
1977 XE_WARN_ON(vm->pt_root[id]);
1978
1979 trace_xe_vm_free(vm);
1980
1981 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1982
1983 if (vm->xef)
1984 xe_file_put(vm->xef);
1985
1986 kfree(vm);
1987 }
1988
xe_vm_free(struct drm_gpuvm * gpuvm)1989 static void xe_vm_free(struct drm_gpuvm *gpuvm)
1990 {
1991 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1992
1993 /* To destroy the VM we need to be able to sleep */
1994 queue_work(system_dfl_wq, &vm->destroy_work);
1995 }
1996
xe_vm_lookup(struct xe_file * xef,u32 id)1997 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1998 {
1999 struct xe_vm *vm;
2000
2001 mutex_lock(&xef->vm.lock);
2002 vm = xa_load(&xef->vm.xa, id);
2003 if (vm)
2004 xe_vm_get(vm);
2005 mutex_unlock(&xef->vm.lock);
2006
2007 return vm;
2008 }
2009
xe_vm_pdp4_descriptor(struct xe_vm * vm,struct xe_tile * tile)2010 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
2011 {
2012 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0);
2013 }
2014
2015 static struct xe_exec_queue *
to_wait_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)2016 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
2017 {
2018 return q ? q : vm->q[0];
2019 }
2020
2021 static struct xe_user_fence *
find_ufence_get(struct xe_sync_entry * syncs,u32 num_syncs)2022 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
2023 {
2024 unsigned int i;
2025
2026 for (i = 0; i < num_syncs; i++) {
2027 struct xe_sync_entry *e = &syncs[i];
2028
2029 if (xe_sync_is_ufence(e))
2030 return xe_sync_ufence_get(e);
2031 }
2032
2033 return NULL;
2034 }
2035
2036 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
2037 DRM_XE_VM_CREATE_FLAG_LR_MODE | \
2038 DRM_XE_VM_CREATE_FLAG_FAULT_MODE | \
2039 DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT)
2040
xe_vm_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2041 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
2042 struct drm_file *file)
2043 {
2044 struct xe_device *xe = to_xe_device(dev);
2045 struct xe_file *xef = to_xe_file(file);
2046 struct drm_xe_vm_create *args = data;
2047 struct xe_gt *wa_gt = xe_root_mmio_gt(xe);
2048 struct xe_vm *vm;
2049 u32 id;
2050 int err;
2051 u32 flags = 0;
2052
2053 if (XE_IOCTL_DBG(xe, args->extensions))
2054 return -EINVAL;
2055
2056 if (wa_gt && XE_GT_WA(wa_gt, 22014953428))
2057 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
2058
2059 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
2060 !xe->info.has_usm))
2061 return -EINVAL;
2062
2063 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2064 return -EINVAL;
2065
2066 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
2067 return -EINVAL;
2068
2069 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
2070 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
2071 !xe->info.needs_scratch))
2072 return -EINVAL;
2073
2074 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
2075 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
2076 return -EINVAL;
2077
2078 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) &&
2079 args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT))
2080 return -EINVAL;
2081
2082 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
2083 flags |= XE_VM_FLAG_SCRATCH_PAGE;
2084 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
2085 flags |= XE_VM_FLAG_LR_MODE;
2086 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
2087 flags |= XE_VM_FLAG_FAULT_MODE;
2088 if (args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT)
2089 flags |= XE_VM_FLAG_NO_VM_OVERCOMMIT;
2090
2091 vm = xe_vm_create(xe, flags, xef);
2092 if (IS_ERR(vm))
2093 return PTR_ERR(vm);
2094
2095 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
2096 /* Warning: Security issue - never enable by default */
2097 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
2098 #endif
2099
2100 /* user id alloc must always be last in ioctl to prevent UAF */
2101 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
2102 if (err)
2103 goto err_close_and_put;
2104
2105 args->vm_id = id;
2106
2107 return 0;
2108
2109 err_close_and_put:
2110 xe_vm_close_and_put(vm);
2111
2112 return err;
2113 }
2114
xe_vm_destroy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2115 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
2116 struct drm_file *file)
2117 {
2118 struct xe_device *xe = to_xe_device(dev);
2119 struct xe_file *xef = to_xe_file(file);
2120 struct drm_xe_vm_destroy *args = data;
2121 struct xe_vm *vm;
2122 int err = 0;
2123
2124 if (XE_IOCTL_DBG(xe, args->pad) ||
2125 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2126 return -EINVAL;
2127
2128 mutex_lock(&xef->vm.lock);
2129 vm = xa_load(&xef->vm.xa, args->vm_id);
2130 if (XE_IOCTL_DBG(xe, !vm))
2131 err = -ENOENT;
2132 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
2133 err = -EBUSY;
2134 else
2135 xa_erase(&xef->vm.xa, args->vm_id);
2136 mutex_unlock(&xef->vm.lock);
2137
2138 if (!err)
2139 xe_vm_close_and_put(vm);
2140
2141 return err;
2142 }
2143
xe_vm_query_vmas(struct xe_vm * vm,u64 start,u64 end)2144 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end)
2145 {
2146 struct drm_gpuva *gpuva;
2147 u32 num_vmas = 0;
2148
2149 lockdep_assert_held(&vm->lock);
2150 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end)
2151 num_vmas++;
2152
2153 return num_vmas;
2154 }
2155
get_mem_attrs(struct xe_vm * vm,u32 * num_vmas,u64 start,u64 end,struct drm_xe_mem_range_attr * attrs)2156 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start,
2157 u64 end, struct drm_xe_mem_range_attr *attrs)
2158 {
2159 struct drm_gpuva *gpuva;
2160 int i = 0;
2161
2162 lockdep_assert_held(&vm->lock);
2163
2164 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
2165 struct xe_vma *vma = gpuva_to_vma(gpuva);
2166
2167 if (i == *num_vmas)
2168 return -ENOSPC;
2169
2170 attrs[i].start = xe_vma_start(vma);
2171 attrs[i].end = xe_vma_end(vma);
2172 attrs[i].atomic.val = vma->attr.atomic_access;
2173 attrs[i].pat_index.val = vma->attr.pat_index;
2174 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd;
2175 attrs[i].preferred_mem_loc.migration_policy =
2176 vma->attr.preferred_loc.migration_policy;
2177
2178 i++;
2179 }
2180
2181 *num_vmas = i;
2182 return 0;
2183 }
2184
xe_vm_query_vmas_attrs_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2185 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2186 {
2187 struct xe_device *xe = to_xe_device(dev);
2188 struct xe_file *xef = to_xe_file(file);
2189 struct drm_xe_mem_range_attr *mem_attrs;
2190 struct drm_xe_vm_query_mem_range_attr *args = data;
2191 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2192 struct xe_vm *vm;
2193 int err = 0;
2194
2195 if (XE_IOCTL_DBG(xe,
2196 ((args->num_mem_ranges == 0 &&
2197 (attrs_user || args->sizeof_mem_range_attr != 0)) ||
2198 (args->num_mem_ranges > 0 &&
2199 (!attrs_user ||
2200 args->sizeof_mem_range_attr !=
2201 sizeof(struct drm_xe_mem_range_attr))))))
2202 return -EINVAL;
2203
2204 vm = xe_vm_lookup(xef, args->vm_id);
2205 if (XE_IOCTL_DBG(xe, !vm))
2206 return -EINVAL;
2207
2208 err = down_read_interruptible(&vm->lock);
2209 if (err)
2210 goto put_vm;
2211
2212 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2213
2214 if (args->num_mem_ranges == 0 && !attrs_user) {
2215 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range);
2216 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr);
2217 goto unlock_vm;
2218 }
2219
2220 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr,
2221 GFP_KERNEL | __GFP_ACCOUNT |
2222 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
2223 if (!mem_attrs) {
2224 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM;
2225 goto unlock_vm;
2226 }
2227
2228 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr);
2229 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start,
2230 args->start + args->range, mem_attrs);
2231 if (err)
2232 goto free_mem_attrs;
2233
2234 err = copy_to_user(attrs_user, mem_attrs,
2235 args->sizeof_mem_range_attr * args->num_mem_ranges);
2236 if (err)
2237 err = -EFAULT;
2238
2239 free_mem_attrs:
2240 kvfree(mem_attrs);
2241 unlock_vm:
2242 up_read(&vm->lock);
2243 put_vm:
2244 xe_vm_put(vm);
2245 return err;
2246 }
2247
vma_matches(struct xe_vma * vma,u64 page_addr)2248 static bool vma_matches(struct xe_vma *vma, u64 page_addr)
2249 {
2250 if (page_addr > xe_vma_end(vma) - 1 ||
2251 page_addr + SZ_4K - 1 < xe_vma_start(vma))
2252 return false;
2253
2254 return true;
2255 }
2256
2257 /**
2258 * xe_vm_find_vma_by_addr() - Find a VMA by its address
2259 *
2260 * @vm: the xe_vm the vma belongs to
2261 * @page_addr: address to look up
2262 */
xe_vm_find_vma_by_addr(struct xe_vm * vm,u64 page_addr)2263 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr)
2264 {
2265 struct xe_vma *vma = NULL;
2266
2267 if (vm->usm.last_fault_vma) { /* Fast lookup */
2268 if (vma_matches(vm->usm.last_fault_vma, page_addr))
2269 vma = vm->usm.last_fault_vma;
2270 }
2271 if (!vma)
2272 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);
2273
2274 return vma;
2275 }
2276
2277 static const u32 region_to_mem_type[] = {
2278 XE_PL_TT,
2279 XE_PL_VRAM0,
2280 XE_PL_VRAM1,
2281 };
2282
prep_vma_destroy(struct xe_vm * vm,struct xe_vma * vma,bool post_commit)2283 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
2284 bool post_commit)
2285 {
2286 xe_svm_notifier_lock(vm);
2287 vma->gpuva.flags |= XE_VMA_DESTROYED;
2288 xe_svm_notifier_unlock(vm);
2289 if (post_commit)
2290 xe_vm_remove_vma(vm, vma);
2291 }
2292
2293 #undef ULL
2294 #define ULL unsigned long long
2295
2296 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2297 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2298 {
2299 struct xe_vma *vma;
2300
2301 switch (op->op) {
2302 case DRM_GPUVA_OP_MAP:
2303 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2304 (ULL)op->map.va.addr, (ULL)op->map.va.range);
2305 break;
2306 case DRM_GPUVA_OP_REMAP:
2307 vma = gpuva_to_vma(op->remap.unmap->va);
2308 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2309 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2310 op->remap.unmap->keep ? 1 : 0);
2311 if (op->remap.prev)
2312 vm_dbg(&xe->drm,
2313 "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2314 (ULL)op->remap.prev->va.addr,
2315 (ULL)op->remap.prev->va.range);
2316 if (op->remap.next)
2317 vm_dbg(&xe->drm,
2318 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2319 (ULL)op->remap.next->va.addr,
2320 (ULL)op->remap.next->va.range);
2321 break;
2322 case DRM_GPUVA_OP_UNMAP:
2323 vma = gpuva_to_vma(op->unmap.va);
2324 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2325 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2326 op->unmap.keep ? 1 : 0);
2327 break;
2328 case DRM_GPUVA_OP_PREFETCH:
2329 vma = gpuva_to_vma(op->prefetch.va);
2330 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2331 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2332 break;
2333 default:
2334 drm_warn(&xe->drm, "NOT POSSIBLE\n");
2335 }
2336 }
2337 #else
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2338 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2339 {
2340 }
2341 #endif
2342
__xe_vm_needs_clear_scratch_pages(struct xe_vm * vm,u32 bind_flags)2343 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
2344 {
2345 if (!xe_vm_in_fault_mode(vm))
2346 return false;
2347
2348 if (!xe_vm_has_scratch(vm))
2349 return false;
2350
2351 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE)
2352 return false;
2353
2354 return true;
2355 }
2356
xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops * ops)2357 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops)
2358 {
2359 struct drm_gpuva_op *__op;
2360
2361 drm_gpuva_for_each_op(__op, ops) {
2362 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2363
2364 xe_vma_svm_prefetch_op_fini(op);
2365 }
2366 }
2367
2368 /*
2369 * Create operations list from IOCTL arguments, setup operations fields so parse
2370 * and commit steps are decoupled from IOCTL arguments. This step can fail.
2371 */
2372 static struct drm_gpuva_ops *
vm_bind_ioctl_ops_create(struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 addr,u64 range,u32 operation,u32 flags,u32 prefetch_region,u16 pat_index)2373 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
2374 struct xe_bo *bo, u64 bo_offset_or_userptr,
2375 u64 addr, u64 range,
2376 u32 operation, u32 flags,
2377 u32 prefetch_region, u16 pat_index)
2378 {
2379 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2380 struct drm_gpuva_ops *ops;
2381 struct drm_gpuva_op *__op;
2382 struct drm_gpuvm_bo *vm_bo;
2383 u64 range_start = addr;
2384 u64 range_end = addr + range;
2385 int err;
2386
2387 lockdep_assert_held_write(&vm->lock);
2388
2389 vm_dbg(&vm->xe->drm,
2390 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2391 operation, (ULL)addr, (ULL)range,
2392 (ULL)bo_offset_or_userptr);
2393
2394 switch (operation) {
2395 case DRM_XE_VM_BIND_OP_MAP:
2396 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) {
2397 xe_vm_find_cpu_addr_mirror_vma_range(vm, &range_start, &range_end);
2398 vops->flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP;
2399 }
2400
2401 fallthrough;
2402 case DRM_XE_VM_BIND_OP_MAP_USERPTR: {
2403 struct drm_gpuvm_map_req map_req = {
2404 .map.va.addr = range_start,
2405 .map.va.range = range_end - range_start,
2406 .map.gem.obj = obj,
2407 .map.gem.offset = bo_offset_or_userptr,
2408 };
2409
2410 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req);
2411 break;
2412 }
2413 case DRM_XE_VM_BIND_OP_UNMAP:
2414 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2415 break;
2416 case DRM_XE_VM_BIND_OP_PREFETCH:
2417 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2418 break;
2419 case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2420 xe_assert(vm->xe, bo);
2421
2422 err = xe_bo_lock(bo, true);
2423 if (err)
2424 return ERR_PTR(err);
2425
2426 vm_bo = drm_gpuvm_bo_obtain_locked(&vm->gpuvm, obj);
2427 if (IS_ERR(vm_bo)) {
2428 xe_bo_unlock(bo);
2429 return ERR_CAST(vm_bo);
2430 }
2431
2432 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2433 drm_gpuvm_bo_put(vm_bo);
2434 xe_bo_unlock(bo);
2435 break;
2436 default:
2437 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
2438 ops = ERR_PTR(-EINVAL);
2439 }
2440 if (IS_ERR(ops))
2441 return ops;
2442
2443 drm_gpuva_for_each_op(__op, ops) {
2444 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2445
2446 if (__op->op == DRM_GPUVA_OP_MAP) {
2447 op->map.immediate =
2448 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2449 if (flags & DRM_XE_VM_BIND_FLAG_READONLY)
2450 op->map.vma_flags |= XE_VMA_READ_ONLY;
2451 if (flags & DRM_XE_VM_BIND_FLAG_NULL)
2452 op->map.vma_flags |= DRM_GPUVA_SPARSE;
2453 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR)
2454 op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR;
2455 if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE)
2456 op->map.vma_flags |= XE_VMA_DUMPABLE;
2457 if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET)
2458 op->map.vma_flags |= XE_VMA_MADV_AUTORESET;
2459 op->map.request_decompress = flags & DRM_XE_VM_BIND_FLAG_DECOMPRESS;
2460 op->map.pat_index = pat_index;
2461 op->map.invalidate_on_bind =
2462 __xe_vm_needs_clear_scratch_pages(vm, flags);
2463 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2464 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2465 struct xe_tile *tile;
2466 struct xe_svm_range *svm_range;
2467 struct drm_gpusvm_ctx ctx = {};
2468 struct drm_pagemap *dpagemap = NULL;
2469 u8 id, tile_mask = 0;
2470 u32 i;
2471
2472 if (!xe_vma_is_cpu_addr_mirror(vma)) {
2473 op->prefetch.region = prefetch_region;
2474 break;
2475 }
2476
2477 ctx.read_only = xe_vma_read_only(vma);
2478 ctx.devmem_possible = IS_DGFX(vm->xe) &&
2479 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2480
2481 for_each_tile(tile, vm->xe, id)
2482 tile_mask |= 0x1 << id;
2483
2484 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC);
2485 op->prefetch_range.ranges_count = 0;
2486
2487 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) {
2488 dpagemap = xe_vma_resolve_pagemap(vma,
2489 xe_device_get_root_tile(vm->xe));
2490 } else if (prefetch_region) {
2491 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] -
2492 XE_PL_VRAM0];
2493 dpagemap = xe_tile_local_pagemap(tile);
2494 }
2495
2496 op->prefetch_range.dpagemap = dpagemap;
2497 alloc_next_range:
2498 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
2499
2500 if (PTR_ERR(svm_range) == -ENOENT) {
2501 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma);
2502
2503 addr = ret == ULONG_MAX ? 0 : ret;
2504 if (addr)
2505 goto alloc_next_range;
2506 else
2507 goto print_op_label;
2508 }
2509
2510 if (IS_ERR(svm_range)) {
2511 err = PTR_ERR(svm_range);
2512 goto unwind_prefetch_ops;
2513 }
2514
2515 if (xe_svm_range_validate(vm, svm_range, tile_mask, dpagemap)) {
2516 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID");
2517 goto check_next_range;
2518 }
2519
2520 err = xa_alloc(&op->prefetch_range.range,
2521 &i, svm_range, xa_limit_32b,
2522 GFP_KERNEL);
2523
2524 if (err)
2525 goto unwind_prefetch_ops;
2526
2527 op->prefetch_range.ranges_count++;
2528 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH;
2529 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED");
2530 check_next_range:
2531 if (range_end > xe_svm_range_end(svm_range) &&
2532 xe_svm_range_end(svm_range) < xe_vma_end(vma)) {
2533 addr = xe_svm_range_end(svm_range);
2534 goto alloc_next_range;
2535 }
2536 }
2537 print_op_label:
2538 print_op(vm->xe, __op);
2539 }
2540
2541 return ops;
2542
2543 unwind_prefetch_ops:
2544 xe_svm_prefetch_gpuva_ops_fini(ops);
2545 drm_gpuva_ops_free(&vm->gpuvm, ops);
2546 return ERR_PTR(err);
2547 }
2548
2549 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
2550
new_vma(struct xe_vm * vm,struct drm_gpuva_op_map * op,struct xe_vma_mem_attr * attr,unsigned int flags)2551 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2552 struct xe_vma_mem_attr *attr, unsigned int flags)
2553 {
2554 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2555 struct xe_validation_ctx ctx;
2556 struct drm_exec exec;
2557 struct xe_vma *vma;
2558 int err = 0;
2559
2560 lockdep_assert_held_write(&vm->lock);
2561
2562 if (bo) {
2563 err = 0;
2564 xe_validation_guard(&ctx, &vm->xe->val, &exec,
2565 (struct xe_val_flags) {.interruptible = true}, err) {
2566 if (!bo->vm) {
2567 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2568 drm_exec_retry_on_contention(&exec);
2569 }
2570 if (!err) {
2571 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2572 drm_exec_retry_on_contention(&exec);
2573 }
2574 if (err)
2575 return ERR_PTR(err);
2576
2577 vma = xe_vma_create(vm, bo, op->gem.offset,
2578 op->va.addr, op->va.addr +
2579 op->va.range - 1, attr, flags);
2580 if (IS_ERR(vma))
2581 return vma;
2582
2583 if (!bo->vm) {
2584 err = add_preempt_fences(vm, bo);
2585 if (err) {
2586 prep_vma_destroy(vm, vma, false);
2587 xe_vma_destroy(vma, NULL);
2588 }
2589 }
2590 }
2591 if (err)
2592 return ERR_PTR(err);
2593 } else {
2594 vma = xe_vma_create(vm, NULL, op->gem.offset,
2595 op->va.addr, op->va.addr +
2596 op->va.range - 1, attr, flags);
2597 if (IS_ERR(vma))
2598 return vma;
2599
2600 if (xe_vma_is_userptr(vma)) {
2601 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2602 /*
2603 * -EBUSY has dedicated meaning that a user fence
2604 * attached to the VMA is busy, in practice
2605 * xe_vma_userptr_pin_pages can only fail with -EBUSY if
2606 * we are low on memory so convert this to -ENOMEM.
2607 */
2608 if (err == -EBUSY)
2609 err = -ENOMEM;
2610 }
2611 }
2612 if (err) {
2613 prep_vma_destroy(vm, vma, false);
2614 xe_vma_destroy_unlocked(vma);
2615 vma = ERR_PTR(err);
2616 }
2617
2618 return vma;
2619 }
2620
xe_vma_max_pte_size(struct xe_vma * vma)2621 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2622 {
2623 if (vma->gpuva.flags & XE_VMA_PTE_1G)
2624 return SZ_1G;
2625 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2626 return SZ_2M;
2627 else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2628 return SZ_64K;
2629 else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2630 return SZ_4K;
2631
2632 return SZ_1G; /* Uninitialized, used max size */
2633 }
2634
xe_vma_set_pte_size(struct xe_vma * vma,u64 size)2635 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2636 {
2637 switch (size) {
2638 case SZ_1G:
2639 vma->gpuva.flags |= XE_VMA_PTE_1G;
2640 break;
2641 case SZ_2M:
2642 vma->gpuva.flags |= XE_VMA_PTE_2M;
2643 break;
2644 case SZ_64K:
2645 vma->gpuva.flags |= XE_VMA_PTE_64K;
2646 break;
2647 case SZ_4K:
2648 vma->gpuva.flags |= XE_VMA_PTE_4K;
2649 break;
2650 }
2651 }
2652
xe_vma_op_commit(struct xe_vm * vm,struct xe_vma_op * op)2653 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2654 {
2655 int err = 0;
2656
2657 lockdep_assert_held_write(&vm->lock);
2658
2659 switch (op->base.op) {
2660 case DRM_GPUVA_OP_MAP:
2661 err |= xe_vm_insert_vma(vm, op->map.vma);
2662 if (!err)
2663 op->flags |= XE_VMA_OP_COMMITTED;
2664 break;
2665 case DRM_GPUVA_OP_REMAP:
2666 {
2667 u8 tile_present =
2668 gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2669
2670 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2671 true);
2672 op->flags |= XE_VMA_OP_COMMITTED;
2673
2674 if (op->remap.prev) {
2675 err |= xe_vm_insert_vma(vm, op->remap.prev);
2676 if (!err)
2677 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2678 if (!err && op->remap.skip_prev) {
2679 op->remap.prev->tile_present =
2680 tile_present;
2681 }
2682 }
2683 if (op->remap.next) {
2684 err |= xe_vm_insert_vma(vm, op->remap.next);
2685 if (!err)
2686 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2687 if (!err && op->remap.skip_next) {
2688 op->remap.next->tile_present =
2689 tile_present;
2690 }
2691 }
2692
2693 /*
2694 * Adjust for partial unbind after removing VMA from VM. In case
2695 * of unwind we might need to undo this later.
2696 */
2697 if (!err) {
2698 op->base.remap.unmap->va->va.addr = op->remap.start;
2699 op->base.remap.unmap->va->va.range = op->remap.range;
2700 }
2701 break;
2702 }
2703 case DRM_GPUVA_OP_UNMAP:
2704 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2705 op->flags |= XE_VMA_OP_COMMITTED;
2706 break;
2707 case DRM_GPUVA_OP_PREFETCH:
2708 op->flags |= XE_VMA_OP_COMMITTED;
2709 break;
2710 default:
2711 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
2712 }
2713
2714 return err;
2715 }
2716
2717 /**
2718 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes
2719 * @vma: Pointer to the xe_vma structure to check
2720 *
2721 * This function determines whether the given VMA (Virtual Memory Area)
2722 * has its memory attributes set to their default values. Specifically,
2723 * it checks the following conditions:
2724 *
2725 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED`
2726 * - `pat_index` is equal to `default_pat_index`
2727 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE`
2728 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES`
2729 *
2730 * Return: true if all attributes are at their default values, false otherwise.
2731 */
xe_vma_has_default_mem_attrs(struct xe_vma * vma)2732 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma)
2733 {
2734 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED &&
2735 vma->attr.pat_index == vma->attr.default_pat_index &&
2736 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE &&
2737 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES);
2738 }
2739
vm_bind_ioctl_ops_parse(struct xe_vm * vm,struct drm_gpuva_ops * ops,struct xe_vma_ops * vops)2740 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
2741 struct xe_vma_ops *vops)
2742 {
2743 struct xe_device *xe = vm->xe;
2744 struct drm_gpuva_op *__op;
2745 struct xe_tile *tile;
2746 u8 id, tile_mask = 0;
2747 int err = 0;
2748
2749 lockdep_assert_held_write(&vm->lock);
2750
2751 for_each_tile(tile, vm->xe, id)
2752 tile_mask |= 0x1 << id;
2753
2754 drm_gpuva_for_each_op(__op, ops) {
2755 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2756 struct xe_vma *vma;
2757 unsigned int flags = 0;
2758
2759 INIT_LIST_HEAD(&op->link);
2760 list_add_tail(&op->link, &vops->list);
2761 op->tile_mask = tile_mask;
2762
2763 switch (op->base.op) {
2764 case DRM_GPUVA_OP_MAP:
2765 {
2766 struct xe_vma_mem_attr default_attr = {
2767 .preferred_loc = {
2768 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE,
2769 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES,
2770 },
2771 .atomic_access = DRM_XE_ATOMIC_UNDEFINED,
2772 .default_pat_index = op->map.pat_index,
2773 .pat_index = op->map.pat_index,
2774 .purgeable_state = XE_MADV_PURGEABLE_WILLNEED,
2775 };
2776
2777 flags |= op->map.vma_flags & XE_VMA_CREATE_MASK;
2778
2779 vma = new_vma(vm, &op->base.map, &default_attr,
2780 flags);
2781 if (IS_ERR(vma))
2782 return PTR_ERR(vma);
2783
2784 op->map.vma = vma;
2785 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) &&
2786 !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) ||
2787 op->map.invalidate_on_bind)
2788 xe_vma_ops_incr_pt_update_ops(vops,
2789 op->tile_mask, 1);
2790 break;
2791 }
2792 case DRM_GPUVA_OP_REMAP:
2793 {
2794 struct xe_vma *old =
2795 gpuva_to_vma(op->base.remap.unmap->va);
2796 bool skip = xe_vma_is_cpu_addr_mirror(old);
2797 u64 start = xe_vma_start(old), end = xe_vma_end(old);
2798 int num_remap_ops = 0;
2799
2800 if (op->base.remap.prev)
2801 start = op->base.remap.prev->va.addr +
2802 op->base.remap.prev->va.range;
2803 if (op->base.remap.next)
2804 end = op->base.remap.next->va.addr;
2805
2806 if (xe_vma_is_cpu_addr_mirror(old) &&
2807 xe_svm_has_mapping(vm, start, end)) {
2808 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE)
2809 xe_svm_unmap_address_range(vm, start, end);
2810 else
2811 return -EBUSY;
2812 }
2813
2814 op->remap.start = xe_vma_start(old);
2815 op->remap.range = xe_vma_size(old);
2816 op->remap.old_start = op->remap.start;
2817 op->remap.old_range = op->remap.range;
2818
2819 flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK;
2820 if (op->base.remap.prev) {
2821 vma = new_vma(vm, op->base.remap.prev,
2822 &old->attr, flags);
2823 if (IS_ERR(vma))
2824 return PTR_ERR(vma);
2825
2826 op->remap.prev = vma;
2827
2828 /*
2829 * Userptr creates a new SG mapping so
2830 * we must also rebind.
2831 */
2832 op->remap.skip_prev = skip ||
2833 (!xe_vma_is_userptr(old) &&
2834 IS_ALIGNED(xe_vma_end(vma),
2835 xe_vma_max_pte_size(old)));
2836 if (op->remap.skip_prev) {
2837 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2838 op->remap.range -=
2839 xe_vma_end(vma) -
2840 xe_vma_start(old);
2841 op->remap.start = xe_vma_end(vma);
2842 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2843 (ULL)op->remap.start,
2844 (ULL)op->remap.range);
2845 } else {
2846 num_remap_ops++;
2847 }
2848 }
2849
2850 if (op->base.remap.next) {
2851 vma = new_vma(vm, op->base.remap.next,
2852 &old->attr, flags);
2853 if (IS_ERR(vma))
2854 return PTR_ERR(vma);
2855
2856 op->remap.next = vma;
2857
2858 /*
2859 * Userptr creates a new SG mapping so
2860 * we must also rebind.
2861 */
2862 op->remap.skip_next = skip ||
2863 (!xe_vma_is_userptr(old) &&
2864 IS_ALIGNED(xe_vma_start(vma),
2865 xe_vma_max_pte_size(old)));
2866 if (op->remap.skip_next) {
2867 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2868 op->remap.range -=
2869 xe_vma_end(old) -
2870 xe_vma_start(vma);
2871 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2872 (ULL)op->remap.start,
2873 (ULL)op->remap.range);
2874 } else {
2875 num_remap_ops++;
2876 }
2877 }
2878 if (!skip)
2879 num_remap_ops++;
2880
2881 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops);
2882 break;
2883 }
2884 case DRM_GPUVA_OP_UNMAP:
2885 vma = gpuva_to_vma(op->base.unmap.va);
2886
2887 if (xe_vma_is_cpu_addr_mirror(vma) &&
2888 xe_svm_has_mapping(vm, xe_vma_start(vma),
2889 xe_vma_end(vma)) &&
2890 !(vops->flags & XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP))
2891 return -EBUSY;
2892
2893 if (!xe_vma_is_cpu_addr_mirror(vma))
2894 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2895 break;
2896 case DRM_GPUVA_OP_PREFETCH:
2897 vma = gpuva_to_vma(op->base.prefetch.va);
2898
2899 if (xe_vma_is_userptr(vma)) {
2900 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2901 if (err)
2902 return err;
2903 }
2904
2905 if (xe_vma_is_cpu_addr_mirror(vma))
2906 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask,
2907 op->prefetch_range.ranges_count);
2908 else
2909 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2910
2911 break;
2912 default:
2913 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
2914 }
2915
2916 err = xe_vma_op_commit(vm, op);
2917 if (err)
2918 return err;
2919 }
2920
2921 return 0;
2922 }
2923
xe_vma_op_unwind(struct xe_vm * vm,struct xe_vma_op * op,bool post_commit,bool prev_post_commit,bool next_post_commit)2924 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2925 bool post_commit, bool prev_post_commit,
2926 bool next_post_commit)
2927 {
2928 lockdep_assert_held_write(&vm->lock);
2929
2930 switch (op->base.op) {
2931 case DRM_GPUVA_OP_MAP:
2932 if (op->map.vma) {
2933 prep_vma_destroy(vm, op->map.vma, post_commit);
2934 xe_vma_destroy_unlocked(op->map.vma);
2935 }
2936 break;
2937 case DRM_GPUVA_OP_UNMAP:
2938 {
2939 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2940
2941 if (vma) {
2942 xe_svm_notifier_lock(vm);
2943 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2944 xe_svm_notifier_unlock(vm);
2945 if (post_commit)
2946 xe_vm_insert_vma(vm, vma);
2947 }
2948 break;
2949 }
2950 case DRM_GPUVA_OP_REMAP:
2951 {
2952 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2953
2954 if (op->remap.prev) {
2955 prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2956 xe_vma_destroy_unlocked(op->remap.prev);
2957 }
2958 if (op->remap.next) {
2959 prep_vma_destroy(vm, op->remap.next, next_post_commit);
2960 xe_vma_destroy_unlocked(op->remap.next);
2961 }
2962 if (vma) {
2963 xe_svm_notifier_lock(vm);
2964 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2965 xe_svm_notifier_unlock(vm);
2966 if (post_commit) {
2967 /*
2968 * Restore the old va range, in case of the
2969 * prev/next skip optimisation. Otherwise what
2970 * we re-insert here could be smaller than the
2971 * original range.
2972 */
2973 op->base.remap.unmap->va->va.addr =
2974 op->remap.old_start;
2975 op->base.remap.unmap->va->va.range =
2976 op->remap.old_range;
2977 xe_vm_insert_vma(vm, vma);
2978 }
2979 }
2980 break;
2981 }
2982 case DRM_GPUVA_OP_PREFETCH:
2983 /* Nothing to do */
2984 break;
2985 default:
2986 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
2987 }
2988 }
2989
vm_bind_ioctl_ops_unwind(struct xe_vm * vm,struct drm_gpuva_ops ** ops,int num_ops_list)2990 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2991 struct drm_gpuva_ops **ops,
2992 int num_ops_list)
2993 {
2994 int i;
2995
2996 for (i = num_ops_list - 1; i >= 0; --i) {
2997 struct drm_gpuva_ops *__ops = ops[i];
2998 struct drm_gpuva_op *__op;
2999
3000 if (!__ops)
3001 continue;
3002
3003 drm_gpuva_for_each_op_reverse(__op, __ops) {
3004 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
3005
3006 xe_vma_op_unwind(vm, op,
3007 op->flags & XE_VMA_OP_COMMITTED,
3008 op->flags & XE_VMA_OP_PREV_COMMITTED,
3009 op->flags & XE_VMA_OP_NEXT_COMMITTED);
3010 }
3011 }
3012 }
3013
3014 /**
3015 * struct xe_vma_lock_and_validate_flags - Flags for vma_lock_and_validate()
3016 * @res_evict: Allow evicting resources during validation
3017 * @validate: Perform BO validation
3018 * @request_decompress: Request BO decompression
3019 * @check_purged: Reject operation if BO is purged
3020 */
3021 struct xe_vma_lock_and_validate_flags {
3022 u32 res_evict : 1;
3023 u32 validate : 1;
3024 u32 request_decompress : 1;
3025 u32 check_purged : 1;
3026 };
3027
vma_lock_and_validate(struct drm_exec * exec,struct xe_vma * vma,struct xe_vma_lock_and_validate_flags flags)3028 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
3029 struct xe_vma_lock_and_validate_flags flags)
3030 {
3031 struct xe_bo *bo = xe_vma_bo(vma);
3032 struct xe_vm *vm = xe_vma_vm(vma);
3033 int err = 0;
3034
3035 if (bo) {
3036 if (!bo->vm)
3037 err = drm_exec_lock_obj(exec, &bo->ttm.base);
3038
3039 /* Reject new mappings to DONTNEED/purged BOs; allow cleanup operations */
3040 if (!err && flags.check_purged) {
3041 if (xe_bo_madv_is_dontneed(bo))
3042 err = -EBUSY; /* BO marked purgeable */
3043 else if (xe_bo_is_purged(bo))
3044 err = -EINVAL; /* BO already purged */
3045 }
3046
3047 if (!err && flags.validate)
3048 err = xe_bo_validate(bo, vm,
3049 xe_vm_allow_vm_eviction(vm) &&
3050 flags.res_evict, exec);
3051
3052 if (err)
3053 return err;
3054
3055 if (flags.request_decompress)
3056 err = xe_bo_decompress(bo);
3057 }
3058
3059 return err;
3060 }
3061
check_ufence(struct xe_vma * vma)3062 static int check_ufence(struct xe_vma *vma)
3063 {
3064 if (vma->ufence) {
3065 struct xe_user_fence * const f = vma->ufence;
3066
3067 if (!xe_sync_ufence_get_status(f))
3068 return -EBUSY;
3069
3070 vma->ufence = NULL;
3071 xe_sync_ufence_put(f);
3072 }
3073
3074 return 0;
3075 }
3076
prefetch_ranges(struct xe_vm * vm,struct xe_vma_op * op)3077 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
3078 {
3079 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
3080 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
3081 struct drm_pagemap *dpagemap = op->prefetch_range.dpagemap;
3082 int err = 0;
3083
3084 struct xe_svm_range *svm_range;
3085 struct drm_gpusvm_ctx ctx = {};
3086 unsigned long i;
3087
3088 if (!xe_vma_is_cpu_addr_mirror(vma))
3089 return 0;
3090
3091 ctx.read_only = xe_vma_read_only(vma);
3092 ctx.devmem_possible = devmem_possible;
3093 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
3094 ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !dpagemap);
3095
3096 /* TODO: Threading the migration */
3097 xa_for_each(&op->prefetch_range.range, i, svm_range) {
3098 if (!dpagemap)
3099 xe_svm_range_migrate_to_smem(vm, svm_range);
3100
3101 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) {
3102 drm_dbg(&vm->xe->drm,
3103 "Prefetch pagemap is %s start 0x%016lx end 0x%016lx\n",
3104 dpagemap ? dpagemap->drm->unique : "system",
3105 xe_svm_range_start(svm_range), xe_svm_range_end(svm_range));
3106 }
3107
3108 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, dpagemap)) {
3109 err = xe_svm_alloc_vram(svm_range, &ctx, dpagemap);
3110 if (err) {
3111 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
3112 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
3113 return -ENODATA;
3114 }
3115 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM");
3116 }
3117
3118 err = xe_svm_range_get_pages(vm, svm_range, &ctx);
3119 if (err) {
3120 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
3121 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
3122 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
3123 err = -ENODATA;
3124 return err;
3125 }
3126 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE");
3127 }
3128
3129 return err;
3130 }
3131
op_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_vma_op * op)3132 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
3133 struct xe_vma_ops *vops, struct xe_vma_op *op)
3134 {
3135 int err = 0;
3136 bool res_evict;
3137
3138 /*
3139 * We only allow evicting a BO within the VM if it is not part of an
3140 * array of binds, as an array of binds can evict another BO within the
3141 * bind.
3142 */
3143 res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS);
3144
3145 switch (op->base.op) {
3146 case DRM_GPUVA_OP_MAP:
3147 if (!op->map.invalidate_on_bind)
3148 err = vma_lock_and_validate(exec, op->map.vma,
3149 (struct xe_vma_lock_and_validate_flags) {
3150 .res_evict = res_evict,
3151 .validate = !xe_vm_in_fault_mode(vm) ||
3152 op->map.immediate,
3153 .request_decompress =
3154 op->map.request_decompress,
3155 .check_purged = true,
3156 });
3157 break;
3158 case DRM_GPUVA_OP_REMAP:
3159 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
3160 if (err)
3161 break;
3162
3163 err = vma_lock_and_validate(exec,
3164 gpuva_to_vma(op->base.remap.unmap->va),
3165 (struct xe_vma_lock_and_validate_flags) {
3166 .res_evict = res_evict,
3167 .validate = false,
3168 .request_decompress = false,
3169 .check_purged = false,
3170 });
3171 if (!err && op->remap.prev)
3172 err = vma_lock_and_validate(exec, op->remap.prev,
3173 (struct xe_vma_lock_and_validate_flags) {
3174 .res_evict = res_evict,
3175 .validate = true,
3176 .request_decompress = false,
3177 .check_purged = true,
3178 });
3179 if (!err && op->remap.next)
3180 err = vma_lock_and_validate(exec, op->remap.next,
3181 (struct xe_vma_lock_and_validate_flags) {
3182 .res_evict = res_evict,
3183 .validate = true,
3184 .request_decompress = false,
3185 .check_purged = true,
3186 });
3187 break;
3188 case DRM_GPUVA_OP_UNMAP:
3189 err = check_ufence(gpuva_to_vma(op->base.unmap.va));
3190 if (err)
3191 break;
3192
3193 err = vma_lock_and_validate(exec,
3194 gpuva_to_vma(op->base.unmap.va),
3195 (struct xe_vma_lock_and_validate_flags) {
3196 .res_evict = res_evict,
3197 .validate = false,
3198 .request_decompress = false,
3199 .check_purged = false,
3200 });
3201 break;
3202 case DRM_GPUVA_OP_PREFETCH:
3203 {
3204 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
3205 u32 region;
3206
3207 if (!xe_vma_is_cpu_addr_mirror(vma)) {
3208 region = op->prefetch.region;
3209 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC ||
3210 region <= ARRAY_SIZE(region_to_mem_type));
3211 }
3212
3213 /*
3214 * Prefetch attempts to migrate BO's backing store without
3215 * repopulating it first. Purged BOs have no backing store
3216 * to migrate, so reject the operation.
3217 */
3218 err = vma_lock_and_validate(exec,
3219 gpuva_to_vma(op->base.prefetch.va),
3220 (struct xe_vma_lock_and_validate_flags) {
3221 .res_evict = res_evict,
3222 .validate = false,
3223 .request_decompress = false,
3224 .check_purged = true,
3225 });
3226 if (!err && !xe_vma_has_no_bo(vma))
3227 err = xe_bo_migrate(xe_vma_bo(vma),
3228 region_to_mem_type[region],
3229 NULL,
3230 exec);
3231 break;
3232 }
3233 default:
3234 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
3235 }
3236
3237 return err;
3238 }
3239
vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm * vm,struct xe_vma_ops * vops)3240 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops)
3241 {
3242 struct xe_vma_op *op;
3243 int err;
3244
3245 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
3246 return 0;
3247
3248 list_for_each_entry(op, &vops->list, link) {
3249 if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
3250 err = prefetch_ranges(vm, op);
3251 if (err)
3252 return err;
3253 }
3254 }
3255
3256 return 0;
3257 }
3258
vm_bind_ioctl_ops_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops)3259 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
3260 struct xe_vm *vm,
3261 struct xe_vma_ops *vops)
3262 {
3263 struct xe_vma_op *op;
3264 int err;
3265
3266 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
3267 if (err)
3268 return err;
3269
3270 list_for_each_entry(op, &vops->list, link) {
3271 err = op_lock_and_prep(exec, vm, vops, op);
3272 if (err)
3273 return err;
3274 }
3275
3276 #ifdef TEST_VM_OPS_ERROR
3277 if (vops->inject_error &&
3278 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
3279 return -ENOSPC;
3280 #endif
3281
3282 return 0;
3283 }
3284
op_trace(struct xe_vma_op * op)3285 static void op_trace(struct xe_vma_op *op)
3286 {
3287 switch (op->base.op) {
3288 case DRM_GPUVA_OP_MAP:
3289 trace_xe_vma_bind(op->map.vma);
3290 break;
3291 case DRM_GPUVA_OP_REMAP:
3292 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
3293 if (op->remap.prev)
3294 trace_xe_vma_bind(op->remap.prev);
3295 if (op->remap.next)
3296 trace_xe_vma_bind(op->remap.next);
3297 break;
3298 case DRM_GPUVA_OP_UNMAP:
3299 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
3300 break;
3301 case DRM_GPUVA_OP_PREFETCH:
3302 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
3303 break;
3304 case DRM_GPUVA_OP_DRIVER:
3305 break;
3306 default:
3307 XE_WARN_ON("NOT POSSIBLE");
3308 }
3309 }
3310
trace_xe_vm_ops_execute(struct xe_vma_ops * vops)3311 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
3312 {
3313 struct xe_vma_op *op;
3314
3315 list_for_each_entry(op, &vops->list, link)
3316 op_trace(op);
3317 }
3318
vm_ops_setup_tile_args(struct xe_vm * vm,struct xe_vma_ops * vops)3319 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
3320 {
3321 struct xe_exec_queue *q = vops->q;
3322 struct xe_tile *tile;
3323 int number_tiles = 0;
3324 u8 id;
3325
3326 for_each_tile(tile, vm->xe, id) {
3327 if (vops->pt_update_ops[id].num_ops)
3328 ++number_tiles;
3329
3330 if (vops->pt_update_ops[id].q)
3331 continue;
3332
3333 if (q) {
3334 vops->pt_update_ops[id].q = q;
3335 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
3336 q = list_next_entry(q, multi_gt_list);
3337 } else {
3338 vops->pt_update_ops[id].q = vm->q[id];
3339 }
3340 }
3341
3342 return number_tiles;
3343 }
3344
ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3345 static struct dma_fence *ops_execute(struct xe_vm *vm,
3346 struct xe_vma_ops *vops)
3347 {
3348 struct xe_tile *tile;
3349 struct dma_fence *fence = NULL;
3350 struct dma_fence **fences = NULL;
3351 struct dma_fence_array *cf = NULL;
3352 int number_tiles = 0, current_fence = 0, n_fence = 0, err, i;
3353 u8 id;
3354
3355 number_tiles = vm_ops_setup_tile_args(vm, vops);
3356 if (number_tiles == 0)
3357 return ERR_PTR(-ENODATA);
3358
3359 for_each_tile(tile, vm->xe, id) {
3360 ++n_fence;
3361
3362 if (!(vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT))
3363 for_each_tlb_inval(i)
3364 ++n_fence;
3365 }
3366
3367 fences = kmalloc_objs(*fences, n_fence);
3368 if (!fences) {
3369 fence = ERR_PTR(-ENOMEM);
3370 goto err_trace;
3371 }
3372
3373 cf = dma_fence_array_alloc(n_fence);
3374 if (!cf) {
3375 fence = ERR_PTR(-ENOMEM);
3376 goto err_out;
3377 }
3378
3379 for_each_tile(tile, vm->xe, id) {
3380 if (!vops->pt_update_ops[id].num_ops)
3381 continue;
3382
3383 err = xe_pt_update_ops_prepare(tile, vops);
3384 if (err) {
3385 fence = ERR_PTR(err);
3386 goto err_out;
3387 }
3388 }
3389
3390 trace_xe_vm_ops_execute(vops);
3391
3392 for_each_tile(tile, vm->xe, id) {
3393 struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q;
3394
3395 fence = NULL;
3396 if (!vops->pt_update_ops[id].num_ops)
3397 goto collect_fences;
3398
3399 fence = xe_pt_update_ops_run(tile, vops);
3400 if (IS_ERR(fence))
3401 goto err_out;
3402
3403 collect_fences:
3404 fences[current_fence++] = fence ?: dma_fence_get_stub();
3405 if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT)
3406 continue;
3407
3408 xe_migrate_job_lock(tile->migrate, q);
3409 for_each_tlb_inval(i)
3410 fences[current_fence++] =
3411 xe_exec_queue_tlb_inval_last_fence_get(q, vm, i);
3412 xe_migrate_job_unlock(tile->migrate, q);
3413 }
3414
3415 xe_assert(vm->xe, current_fence == n_fence);
3416 dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1),
3417 1, false);
3418 fence = &cf->base;
3419
3420 for_each_tile(tile, vm->xe, id) {
3421 if (!vops->pt_update_ops[id].num_ops)
3422 continue;
3423
3424 xe_pt_update_ops_fini(tile, vops);
3425 }
3426
3427 return fence;
3428
3429 err_out:
3430 for_each_tile(tile, vm->xe, id) {
3431 if (!vops->pt_update_ops[id].num_ops)
3432 continue;
3433
3434 xe_pt_update_ops_abort(tile, vops);
3435 }
3436 while (current_fence)
3437 dma_fence_put(fences[--current_fence]);
3438 kfree(fences);
3439 kfree(cf);
3440
3441 err_trace:
3442 trace_xe_vm_ops_fail(vm);
3443 return fence;
3444 }
3445
vma_add_ufence(struct xe_vma * vma,struct xe_user_fence * ufence)3446 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
3447 {
3448 if (vma->ufence)
3449 xe_sync_ufence_put(vma->ufence);
3450 vma->ufence = __xe_sync_ufence_get(ufence);
3451 }
3452
op_add_ufence(struct xe_vm * vm,struct xe_vma_op * op,struct xe_user_fence * ufence)3453 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
3454 struct xe_user_fence *ufence)
3455 {
3456 switch (op->base.op) {
3457 case DRM_GPUVA_OP_MAP:
3458 if (!xe_vma_is_cpu_addr_mirror(op->map.vma))
3459 vma_add_ufence(op->map.vma, ufence);
3460 break;
3461 case DRM_GPUVA_OP_REMAP:
3462 if (op->remap.prev)
3463 vma_add_ufence(op->remap.prev, ufence);
3464 if (op->remap.next)
3465 vma_add_ufence(op->remap.next, ufence);
3466 break;
3467 case DRM_GPUVA_OP_UNMAP:
3468 break;
3469 case DRM_GPUVA_OP_PREFETCH:
3470 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
3471 break;
3472 default:
3473 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
3474 }
3475 }
3476
vm_bind_ioctl_ops_fini(struct xe_vm * vm,struct xe_vma_ops * vops,struct dma_fence * fence)3477 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
3478 struct dma_fence *fence)
3479 {
3480 struct xe_user_fence *ufence;
3481 struct xe_vma_op *op;
3482 int i;
3483
3484 ufence = find_ufence_get(vops->syncs, vops->num_syncs);
3485 list_for_each_entry(op, &vops->list, link) {
3486 if (ufence)
3487 op_add_ufence(vm, op, ufence);
3488
3489 if (op->base.op == DRM_GPUVA_OP_UNMAP)
3490 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
3491 else if (op->base.op == DRM_GPUVA_OP_REMAP)
3492 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
3493 fence);
3494 }
3495 if (ufence)
3496 xe_sync_ufence_put(ufence);
3497 if (fence) {
3498 for (i = 0; i < vops->num_syncs; i++)
3499 xe_sync_entry_signal(vops->syncs + i, fence);
3500 }
3501 }
3502
vm_bind_ioctl_ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3503 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
3504 struct xe_vma_ops *vops)
3505 {
3506 struct xe_validation_ctx ctx;
3507 struct drm_exec exec;
3508 struct dma_fence *fence;
3509 int err = 0;
3510
3511 lockdep_assert_held_write(&vm->lock);
3512
3513 xe_validation_guard(&ctx, &vm->xe->val, &exec,
3514 ((struct xe_val_flags) {
3515 .interruptible = true,
3516 .exec_ignore_duplicates = true,
3517 }), err) {
3518 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
3519 drm_exec_retry_on_contention(&exec);
3520 xe_validation_retry_on_oom(&ctx, &err);
3521 if (err)
3522 return ERR_PTR(err);
3523
3524 xe_vm_set_validation_exec(vm, &exec);
3525 fence = ops_execute(vm, vops);
3526 xe_vm_set_validation_exec(vm, NULL);
3527 if (IS_ERR(fence)) {
3528 if (PTR_ERR(fence) == -ENODATA)
3529 vm_bind_ioctl_ops_fini(vm, vops, NULL);
3530 return fence;
3531 }
3532
3533 vm_bind_ioctl_ops_fini(vm, vops, fence);
3534 }
3535
3536 return err ? ERR_PTR(err) : fence;
3537 }
3538 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
3539
3540 #define SUPPORTED_FLAGS_STUB \
3541 (DRM_XE_VM_BIND_FLAG_READONLY | \
3542 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
3543 DRM_XE_VM_BIND_FLAG_NULL | \
3544 DRM_XE_VM_BIND_FLAG_DUMPABLE | \
3545 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \
3546 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \
3547 DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET | \
3548 DRM_XE_VM_BIND_FLAG_DECOMPRESS)
3549
3550 #ifdef TEST_VM_OPS_ERROR
3551 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
3552 #else
3553 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB
3554 #endif
3555
3556 #define XE_64K_PAGE_MASK 0xffffull
3557 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
3558
vm_bind_ioctl_check_args(struct xe_device * xe,struct xe_vm * vm,struct drm_xe_vm_bind * args,struct drm_xe_vm_bind_op ** bind_ops)3559 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
3560 struct drm_xe_vm_bind *args,
3561 struct drm_xe_vm_bind_op **bind_ops)
3562 {
3563 int err;
3564 int i;
3565
3566 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
3567 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3568 return -EINVAL;
3569
3570 if (XE_IOCTL_DBG(xe, args->extensions))
3571 return -EINVAL;
3572
3573 if (XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS))
3574 return -EINVAL;
3575
3576 if (args->num_binds > 1) {
3577 u64 __user *bind_user =
3578 u64_to_user_ptr(args->vector_of_binds);
3579
3580 *bind_ops = kvmalloc_objs(struct drm_xe_vm_bind_op,
3581 args->num_binds,
3582 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3583 if (!*bind_ops)
3584 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
3585
3586 err = copy_from_user(*bind_ops, bind_user,
3587 sizeof(struct drm_xe_vm_bind_op) *
3588 args->num_binds);
3589 if (XE_IOCTL_DBG(xe, err)) {
3590 err = -EFAULT;
3591 goto free_bind_ops;
3592 }
3593 } else {
3594 *bind_ops = &args->bind;
3595 }
3596
3597 for (i = 0; i < args->num_binds; ++i) {
3598 u64 range = (*bind_ops)[i].range;
3599 u64 addr = (*bind_ops)[i].addr;
3600 u32 op = (*bind_ops)[i].op;
3601 u32 flags = (*bind_ops)[i].flags;
3602 u32 obj = (*bind_ops)[i].obj;
3603 u64 obj_offset = (*bind_ops)[i].obj_offset;
3604 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
3605 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
3606 bool is_cpu_addr_mirror = flags &
3607 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
3608 bool is_decompress = flags & DRM_XE_VM_BIND_FLAG_DECOMPRESS;
3609 u16 pat_index = (*bind_ops)[i].pat_index;
3610 u16 coh_mode;
3611 bool comp_en;
3612
3613 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror &&
3614 (!xe_vm_in_fault_mode(vm) ||
3615 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) {
3616 err = -EINVAL;
3617 goto free_bind_ops;
3618 }
3619
3620 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
3621 err = -EINVAL;
3622 goto free_bind_ops;
3623 }
3624
3625 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
3626 (*bind_ops)[i].pat_index = pat_index;
3627 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3628 comp_en = xe_pat_index_get_comp_en(xe, pat_index);
3629 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
3630 err = -EINVAL;
3631 goto free_bind_ops;
3632 }
3633
3634 if (XE_WARN_ON(coh_mode > XE_COH_2WAY)) {
3635 err = -EINVAL;
3636 goto free_bind_ops;
3637 }
3638
3639 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
3640 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
3641 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) ||
3642 XE_IOCTL_DBG(xe, obj_offset && (is_null ||
3643 is_cpu_addr_mirror)) ||
3644 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
3645 (is_decompress || is_null || is_cpu_addr_mirror)) ||
3646 XE_IOCTL_DBG(xe, is_decompress &&
3647 xe_pat_index_get_comp_en(xe, pat_index)) ||
3648 XE_IOCTL_DBG(xe, !obj &&
3649 op == DRM_XE_VM_BIND_OP_MAP &&
3650 !is_null && !is_cpu_addr_mirror) ||
3651 XE_IOCTL_DBG(xe, !obj &&
3652 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3653 XE_IOCTL_DBG(xe, addr &&
3654 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3655 XE_IOCTL_DBG(xe, range &&
3656 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3657 XE_IOCTL_DBG(xe, obj &&
3658 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3659 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3660 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3661 XE_IOCTL_DBG(xe, !IS_DGFX(xe) && coh_mode == XE_COH_NONE &&
3662 is_cpu_addr_mirror) ||
3663 XE_IOCTL_DBG(xe, xe_device_is_l2_flush_optimized(xe) &&
3664 (op == DRM_XE_VM_BIND_OP_MAP_USERPTR ||
3665 is_cpu_addr_mirror) &&
3666 (pat_index != 19 && coh_mode != XE_COH_2WAY)) ||
3667 XE_IOCTL_DBG(xe, comp_en &&
3668 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3669 XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR &&
3670 !IS_ENABLED(CONFIG_DRM_GPUSVM)) ||
3671 XE_IOCTL_DBG(xe, obj &&
3672 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
3673 XE_IOCTL_DBG(xe, prefetch_region &&
3674 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
3675 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC &&
3676 /* Guard against undefined shift in BIT(prefetch_region) */
3677 (prefetch_region >= (sizeof(xe->info.mem_region_mask) * 8) ||
3678 !(BIT(prefetch_region) & xe->info.mem_region_mask)))) ||
3679 XE_IOCTL_DBG(xe, obj &&
3680 op == DRM_XE_VM_BIND_OP_UNMAP) ||
3681 XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) &&
3682 (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) {
3683 err = -EINVAL;
3684 goto free_bind_ops;
3685 }
3686
3687 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
3688 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
3689 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
3690 XE_IOCTL_DBG(xe, !range &&
3691 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
3692 err = -EINVAL;
3693 goto free_bind_ops;
3694 }
3695
3696 if (is_decompress && (XE_IOCTL_DBG(xe, !xe_device_has_flat_ccs(xe)) ||
3697 XE_IOCTL_DBG(xe, GRAPHICS_VER(xe) < 20) ||
3698 XE_IOCTL_DBG(xe, !IS_DGFX(xe)))) {
3699 err = -EOPNOTSUPP;
3700 goto free_bind_ops;
3701 }
3702 }
3703
3704 return 0;
3705
3706 free_bind_ops:
3707 if (args->num_binds > 1)
3708 kvfree(*bind_ops);
3709 *bind_ops = NULL;
3710 return err;
3711 }
3712
vm_bind_ioctl_signal_fences(struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,int num_syncs)3713 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
3714 struct xe_exec_queue *q,
3715 struct xe_sync_entry *syncs,
3716 int num_syncs)
3717 {
3718 struct dma_fence *fence = NULL;
3719 int i, err = 0;
3720
3721 if (num_syncs) {
3722 fence = xe_sync_in_fence_get(syncs, num_syncs,
3723 to_wait_exec_queue(vm, q), vm);
3724 if (IS_ERR(fence))
3725 return PTR_ERR(fence);
3726
3727 for (i = 0; i < num_syncs; i++)
3728 xe_sync_entry_signal(&syncs[i], fence);
3729 }
3730
3731 dma_fence_put(fence);
3732
3733 return err;
3734 }
3735
xe_vma_ops_init(struct xe_vma_ops * vops,struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,u32 num_syncs)3736 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
3737 struct xe_exec_queue *q,
3738 struct xe_sync_entry *syncs, u32 num_syncs)
3739 {
3740 memset(vops, 0, sizeof(*vops));
3741 INIT_LIST_HEAD(&vops->list);
3742 vops->vm = vm;
3743 vops->q = q;
3744 vops->syncs = syncs;
3745 vops->num_syncs = num_syncs;
3746 vops->flags = 0;
3747 }
3748
xe_vm_bind_ioctl_validate_bo(struct xe_device * xe,struct xe_bo * bo,u64 addr,u64 range,u64 obj_offset,u16 pat_index,u32 op,u32 bind_flags)3749 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
3750 u64 addr, u64 range, u64 obj_offset,
3751 u16 pat_index, u32 op, u32 bind_flags)
3752 {
3753 u16 coh_mode;
3754 bool comp_en;
3755
3756 if (XE_IOCTL_DBG(xe, (bo->flags & XE_BO_FLAG_NO_COMPRESSION) &&
3757 xe_pat_index_get_comp_en(xe, pat_index)))
3758 return -EINVAL;
3759
3760 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) ||
3761 XE_IOCTL_DBG(xe, obj_offset >
3762 xe_bo_size(bo) - range)) {
3763 return -EINVAL;
3764 }
3765
3766 /*
3767 * Some platforms require 64k VM_BIND alignment,
3768 * specifically those with XE_VRAM_FLAGS_NEED64K.
3769 *
3770 * Other platforms may have BO's set to 64k physical placement,
3771 * but can be mapped at 4k offsets anyway. This check is only
3772 * there for the former case.
3773 */
3774 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
3775 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
3776 if (XE_IOCTL_DBG(xe, obj_offset &
3777 XE_64K_PAGE_MASK) ||
3778 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
3779 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3780 return -EINVAL;
3781 }
3782 }
3783
3784 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3785 if (bo->cpu_caching) {
3786 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3787 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
3788 return -EINVAL;
3789 }
3790 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
3791 /*
3792 * Imported dma-buf from a different device should
3793 * require 1way or 2way coherency since we don't know
3794 * how it was mapped on the CPU. Just assume is it
3795 * potentially cached on CPU side.
3796 */
3797 return -EINVAL;
3798 }
3799
3800 /*
3801 * Ensures that imported buffer objects (dma-bufs) are not mapped
3802 * with a PAT index that enables compression.
3803 */
3804 comp_en = xe_pat_index_get_comp_en(xe, pat_index);
3805 if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && comp_en))
3806 return -EINVAL;
3807
3808 if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && xe_device_is_l2_flush_optimized(xe) &&
3809 (pat_index != 19 && coh_mode != XE_COH_2WAY)))
3810 return -EINVAL;
3811
3812 /* If a BO is protected it can only be mapped if the key is still valid */
3813 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) &&
3814 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL)
3815 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0))
3816 return -ENOEXEC;
3817
3818 return 0;
3819 }
3820
xe_vm_bind_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3821 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3822 {
3823 struct xe_device *xe = to_xe_device(dev);
3824 struct xe_file *xef = to_xe_file(file);
3825 struct drm_xe_vm_bind *args = data;
3826 struct drm_xe_sync __user *syncs_user;
3827 struct xe_bo **bos = NULL;
3828 struct drm_gpuva_ops **ops = NULL;
3829 struct xe_vm *vm;
3830 struct xe_exec_queue *q = NULL;
3831 u32 num_syncs, num_ufence = 0;
3832 struct xe_sync_entry *syncs = NULL;
3833 struct drm_xe_vm_bind_op *bind_ops = NULL;
3834 struct xe_vma_ops vops;
3835 struct dma_fence *fence;
3836 int err;
3837 int i;
3838
3839 vm = xe_vm_lookup(xef, args->vm_id);
3840 if (XE_IOCTL_DBG(xe, !vm))
3841 return -EINVAL;
3842
3843 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops);
3844 if (err)
3845 goto put_vm;
3846
3847 if (args->exec_queue_id) {
3848 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
3849 if (XE_IOCTL_DBG(xe, !q)) {
3850 err = -ENOENT;
3851 goto free_bind_ops;
3852 }
3853
3854 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3855 err = -EINVAL;
3856 goto put_exec_queue;
3857 }
3858 }
3859
3860 if (XE_IOCTL_DBG(xe, q && vm != q->user_vm)) {
3861 err = -EINVAL;
3862 goto put_exec_queue;
3863 }
3864
3865 /* Ensure all UNMAPs visible */
3866 xe_svm_flush(vm);
3867
3868 err = down_write_killable(&vm->lock);
3869 if (err)
3870 goto put_exec_queue;
3871
3872 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3873 err = -ENOENT;
3874 goto release_vm_lock;
3875 }
3876
3877 for (i = 0; i < args->num_binds; ++i) {
3878 u64 range = bind_ops[i].range;
3879 u64 addr = bind_ops[i].addr;
3880
3881 if (XE_IOCTL_DBG(xe, range > vm->size) ||
3882 XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3883 err = -EINVAL;
3884 goto release_vm_lock;
3885 }
3886 }
3887
3888 if (args->num_binds) {
3889 bos = kvzalloc_objs(*bos, args->num_binds,
3890 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3891 if (!bos) {
3892 err = -ENOMEM;
3893 goto release_vm_lock;
3894 }
3895
3896 ops = kvzalloc_objs(*ops, args->num_binds,
3897 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3898 if (!ops) {
3899 err = -ENOMEM;
3900 goto free_bos;
3901 }
3902 }
3903
3904 for (i = 0; i < args->num_binds; ++i) {
3905 struct drm_gem_object *gem_obj;
3906 u64 range = bind_ops[i].range;
3907 u64 addr = bind_ops[i].addr;
3908 u32 obj = bind_ops[i].obj;
3909 u64 obj_offset = bind_ops[i].obj_offset;
3910 u16 pat_index = bind_ops[i].pat_index;
3911 u32 op = bind_ops[i].op;
3912 u32 bind_flags = bind_ops[i].flags;
3913
3914 if (!obj)
3915 continue;
3916
3917 gem_obj = drm_gem_object_lookup(file, obj);
3918 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3919 err = -ENOENT;
3920 goto put_obj;
3921 }
3922 bos[i] = gem_to_xe_bo(gem_obj);
3923
3924 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
3925 obj_offset, pat_index, op,
3926 bind_flags);
3927 if (err)
3928 goto put_obj;
3929 }
3930
3931 if (args->num_syncs) {
3932 syncs = kzalloc_objs(*syncs, args->num_syncs);
3933 if (!syncs) {
3934 err = -ENOMEM;
3935 goto put_obj;
3936 }
3937 }
3938
3939 syncs_user = u64_to_user_ptr(args->syncs);
3940 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3941 struct xe_exec_queue *__q = q ?: vm->q[0];
3942
3943 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3944 &syncs_user[num_syncs],
3945 __q->ufence_syncobj,
3946 ++__q->ufence_timeline_value,
3947 (xe_vm_in_lr_mode(vm) ?
3948 SYNC_PARSE_FLAG_LR_MODE : 0) |
3949 (!args->num_binds ?
3950 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3951 if (err)
3952 goto free_syncs;
3953
3954 if (xe_sync_is_ufence(&syncs[num_syncs]))
3955 num_ufence++;
3956 }
3957
3958 if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3959 err = -EINVAL;
3960 goto free_syncs;
3961 }
3962
3963 if (!args->num_binds) {
3964 err = -ENODATA;
3965 goto free_syncs;
3966 }
3967
3968 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
3969 if (args->num_binds > 1)
3970 vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS;
3971 for (i = 0; i < args->num_binds; ++i) {
3972 u64 range = bind_ops[i].range;
3973 u64 addr = bind_ops[i].addr;
3974 u32 op = bind_ops[i].op;
3975 u32 flags = bind_ops[i].flags;
3976 u64 obj_offset = bind_ops[i].obj_offset;
3977 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3978 u16 pat_index = bind_ops[i].pat_index;
3979
3980 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset,
3981 addr, range, op, flags,
3982 prefetch_region, pat_index);
3983 if (IS_ERR(ops[i])) {
3984 err = PTR_ERR(ops[i]);
3985 ops[i] = NULL;
3986 goto unwind_ops;
3987 }
3988
3989 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
3990 if (err)
3991 goto unwind_ops;
3992
3993 #ifdef TEST_VM_OPS_ERROR
3994 if (flags & FORCE_OP_ERROR) {
3995 vops.inject_error = true;
3996 vm->xe->vm_inject_error_position =
3997 (vm->xe->vm_inject_error_position + 1) %
3998 FORCE_OP_ERROR_COUNT;
3999 }
4000 #endif
4001 }
4002
4003 /* Nothing to do */
4004 if (list_empty(&vops.list)) {
4005 err = -ENODATA;
4006 goto unwind_ops;
4007 }
4008
4009 err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
4010 if (err)
4011 goto unwind_ops;
4012
4013 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops);
4014 if (err)
4015 goto unwind_ops;
4016
4017 fence = vm_bind_ioctl_ops_execute(vm, &vops);
4018 if (IS_ERR(fence))
4019 err = PTR_ERR(fence);
4020 else
4021 dma_fence_put(fence);
4022
4023 unwind_ops:
4024 if (err && err != -ENODATA)
4025 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
4026 xe_vma_ops_fini(&vops);
4027 for (i = args->num_binds - 1; i >= 0; --i)
4028 if (ops[i])
4029 drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
4030 free_syncs:
4031 if (err == -ENODATA)
4032 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
4033 while (num_syncs--)
4034 xe_sync_entry_cleanup(&syncs[num_syncs]);
4035
4036 kfree(syncs);
4037 put_obj:
4038 for (i = 0; i < args->num_binds; ++i)
4039 xe_bo_put(bos[i]);
4040
4041 kvfree(ops);
4042 free_bos:
4043 kvfree(bos);
4044 release_vm_lock:
4045 up_write(&vm->lock);
4046 put_exec_queue:
4047 if (q)
4048 xe_exec_queue_put(q);
4049 free_bind_ops:
4050 if (args->num_binds > 1)
4051 kvfree(bind_ops);
4052 put_vm:
4053 xe_vm_put(vm);
4054 return err;
4055 }
4056
4057 /*
4058 * Map access type, fault type, and fault level from current bspec
4059 * specification to user spec abstraction. The current mapping is
4060 * approximately 1-to-1, with access type being the only notable
4061 * exception as it carries additional data with respect to prefetch
4062 * status that needs to be masked out.
4063 */
xe_to_user_access_type(u8 access_type)4064 static u8 xe_to_user_access_type(u8 access_type)
4065 {
4066 return access_type & XE_PAGEFAULT_ACCESS_TYPE_MASK;
4067 }
4068
xe_to_user_fault_type(u8 fault_type)4069 static u8 xe_to_user_fault_type(u8 fault_type)
4070 {
4071 return fault_type;
4072 }
4073
xe_to_user_fault_level(u8 fault_level)4074 static u8 xe_to_user_fault_level(u8 fault_level)
4075 {
4076 return fault_level;
4077 }
4078
fill_faults(struct xe_vm * vm,struct drm_xe_vm_get_property * args)4079 static int fill_faults(struct xe_vm *vm,
4080 struct drm_xe_vm_get_property *args)
4081 {
4082 struct xe_vm_fault __user *usr_ptr = u64_to_user_ptr(args->data);
4083 struct xe_vm_fault *fault_list, fault_entry = { 0 };
4084 struct xe_vm_fault_entry *entry;
4085 int ret = 0, i = 0, count, entry_size;
4086
4087 entry_size = sizeof(struct xe_vm_fault);
4088 count = args->size / entry_size;
4089
4090 fault_list = kcalloc(count, sizeof(struct xe_vm_fault), GFP_KERNEL);
4091 if (!fault_list)
4092 return -ENOMEM;
4093
4094 spin_lock(&vm->faults.lock);
4095 list_for_each_entry(entry, &vm->faults.list, list) {
4096 if (i == count)
4097 break;
4098
4099 fault_entry.address = xe_device_canonicalize_addr(vm->xe, entry->address);
4100 fault_entry.address_precision = entry->address_precision;
4101
4102 fault_entry.access_type = xe_to_user_access_type(entry->access_type);
4103 fault_entry.fault_type = xe_to_user_fault_type(entry->fault_type);
4104 fault_entry.fault_level = xe_to_user_fault_level(entry->fault_level);
4105
4106 memcpy(&fault_list[i], &fault_entry, entry_size);
4107
4108 i++;
4109 }
4110 spin_unlock(&vm->faults.lock);
4111
4112 ret = copy_to_user(usr_ptr, fault_list, args->size);
4113
4114 kfree(fault_list);
4115 return ret ? -EFAULT : 0;
4116 }
4117
xe_vm_get_property_helper(struct xe_vm * vm,struct drm_xe_vm_get_property * args)4118 static int xe_vm_get_property_helper(struct xe_vm *vm,
4119 struct drm_xe_vm_get_property *args)
4120 {
4121 size_t size;
4122
4123 switch (args->property) {
4124 case DRM_XE_VM_GET_PROPERTY_FAULTS:
4125 spin_lock(&vm->faults.lock);
4126 size = size_mul(sizeof(struct xe_vm_fault), vm->faults.len);
4127 spin_unlock(&vm->faults.lock);
4128
4129 if (!args->size) {
4130 args->size = size;
4131 return 0;
4132 }
4133
4134 /*
4135 * Number of faults may increase between calls to
4136 * xe_vm_get_property_ioctl, so just report the number of
4137 * faults the user requests if it's less than or equal to
4138 * the number of faults in the VM fault array.
4139 *
4140 * We should also at least assert that the args->size value
4141 * is a multiple of the xe_vm_fault struct size.
4142 */
4143 if (args->size > size || args->size % sizeof(struct xe_vm_fault))
4144 return -EINVAL;
4145
4146 return fill_faults(vm, args);
4147 }
4148 return -EINVAL;
4149 }
4150
xe_vm_get_property_ioctl(struct drm_device * drm,void * data,struct drm_file * file)4151 int xe_vm_get_property_ioctl(struct drm_device *drm, void *data,
4152 struct drm_file *file)
4153 {
4154 struct xe_device *xe = to_xe_device(drm);
4155 struct xe_file *xef = to_xe_file(file);
4156 struct drm_xe_vm_get_property *args = data;
4157 struct xe_vm *vm;
4158 int ret = 0;
4159
4160 if (XE_IOCTL_DBG(xe, (args->reserved[0] || args->reserved[1] ||
4161 args->reserved[2] || args->extensions ||
4162 args->pad)))
4163 return -EINVAL;
4164
4165 vm = xe_vm_lookup(xef, args->vm_id);
4166 if (XE_IOCTL_DBG(xe, !vm))
4167 return -ENOENT;
4168
4169 ret = xe_vm_get_property_helper(vm, args);
4170
4171 xe_vm_put(vm);
4172 return ret;
4173 }
4174
4175 /**
4176 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM
4177 * @vm: VM to bind the BO to
4178 * @bo: BO to bind
4179 * @q: exec queue to use for the bind (optional)
4180 * @addr: address at which to bind the BO
4181 * @cache_lvl: PAT cache level to use
4182 *
4183 * Execute a VM bind map operation on a kernel-owned BO to bind it into a
4184 * kernel-owned VM.
4185 *
4186 * Returns a dma_fence to track the binding completion if the job to do so was
4187 * successfully submitted, an error pointer otherwise.
4188 */
xe_vm_bind_kernel_bo(struct xe_vm * vm,struct xe_bo * bo,struct xe_exec_queue * q,u64 addr,enum xe_cache_level cache_lvl)4189 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
4190 struct xe_exec_queue *q, u64 addr,
4191 enum xe_cache_level cache_lvl)
4192 {
4193 struct xe_vma_ops vops;
4194 struct drm_gpuva_ops *ops = NULL;
4195 struct dma_fence *fence;
4196 int err;
4197
4198 xe_bo_get(bo);
4199 xe_vm_get(vm);
4200 if (q)
4201 xe_exec_queue_get(q);
4202
4203 down_write(&vm->lock);
4204
4205 xe_vma_ops_init(&vops, vm, q, NULL, 0);
4206
4207 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo),
4208 DRM_XE_VM_BIND_OP_MAP, 0, 0,
4209 vm->xe->pat.idx[cache_lvl]);
4210 if (IS_ERR(ops)) {
4211 err = PTR_ERR(ops);
4212 goto release_vm_lock;
4213 }
4214
4215 err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
4216 if (err)
4217 goto release_vm_lock;
4218
4219 xe_assert(vm->xe, !list_empty(&vops.list));
4220
4221 err = xe_vma_ops_alloc(&vops, false);
4222 if (err)
4223 goto unwind_ops;
4224
4225 fence = vm_bind_ioctl_ops_execute(vm, &vops);
4226 if (IS_ERR(fence))
4227 err = PTR_ERR(fence);
4228
4229 unwind_ops:
4230 if (err && err != -ENODATA)
4231 vm_bind_ioctl_ops_unwind(vm, &ops, 1);
4232
4233 xe_vma_ops_fini(&vops);
4234 drm_gpuva_ops_free(&vm->gpuvm, ops);
4235
4236 release_vm_lock:
4237 up_write(&vm->lock);
4238
4239 if (q)
4240 xe_exec_queue_put(q);
4241 xe_vm_put(vm);
4242 xe_bo_put(bo);
4243
4244 if (err)
4245 fence = ERR_PTR(err);
4246
4247 return fence;
4248 }
4249
4250 /**
4251 * xe_vm_lock() - Lock the vm's dma_resv object
4252 * @vm: The struct xe_vm whose lock is to be locked
4253 * @intr: Whether to perform any wait interruptible
4254 *
4255 * Return: 0 on success, -EINTR if @intr is true and the wait for a
4256 * contended lock was interrupted. If @intr is false, the function
4257 * always returns 0.
4258 */
xe_vm_lock(struct xe_vm * vm,bool intr)4259 int xe_vm_lock(struct xe_vm *vm, bool intr)
4260 {
4261 int ret;
4262
4263 if (intr)
4264 ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
4265 else
4266 ret = dma_resv_lock(xe_vm_resv(vm), NULL);
4267
4268 return ret;
4269 }
4270
4271 /**
4272 * xe_vm_unlock() - Unlock the vm's dma_resv object
4273 * @vm: The struct xe_vm whose lock is to be released.
4274 *
4275 * Unlock a buffer object lock that was locked by xe_vm_lock().
4276 */
xe_vm_unlock(struct xe_vm * vm)4277 void xe_vm_unlock(struct xe_vm *vm)
4278 {
4279 dma_resv_unlock(xe_vm_resv(vm));
4280 }
4281
4282 /**
4283 * xe_vm_invalidate_vma_submit - Submit a job to invalidate GPU mappings for
4284 * VMA.
4285 * @vma: VMA to invalidate
4286 * @batch: TLB invalidation batch to populate; caller must later call
4287 * xe_tlb_inval_batch_wait() on it to wait for completion
4288 *
4289 * Walks a list of page tables leaves which it memset the entries owned by this
4290 * VMA to zero, invalidates the TLBs, but doesn't block waiting for TLB flush
4291 * to complete, but instead populates @batch which can be waited on using
4292 * xe_tlb_inval_batch_wait().
4293 *
4294 * Returns 0 for success, negative error code otherwise.
4295 */
xe_vm_invalidate_vma_submit(struct xe_vma * vma,struct xe_tlb_inval_batch * batch)4296 int xe_vm_invalidate_vma_submit(struct xe_vma *vma, struct xe_tlb_inval_batch *batch)
4297 {
4298 struct xe_device *xe = xe_vma_vm(vma)->xe;
4299 struct xe_vm *vm = xe_vma_vm(vma);
4300 struct xe_tile *tile;
4301 u8 tile_mask = 0;
4302 int ret = 0;
4303 u8 id;
4304
4305 xe_assert(xe, !xe_vma_is_null(vma));
4306 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma));
4307 trace_xe_vma_invalidate(vma);
4308
4309 vm_dbg(&vm->xe->drm,
4310 "INVALIDATE: addr=0x%016llx, range=0x%016llx",
4311 xe_vma_start(vma), xe_vma_size(vma));
4312
4313 /*
4314 * Check that we don't race with page-table updates, tile_invalidated
4315 * update is safe
4316 */
4317 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
4318 if (xe_vma_is_userptr(vma)) {
4319 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) ||
4320 (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) &&
4321 lockdep_is_held(&xe_vm_resv(vm)->lock.base)));
4322
4323 WARN_ON_ONCE(!mmu_interval_check_retry
4324 (&to_userptr_vma(vma)->userptr.notifier,
4325 to_userptr_vma(vma)->userptr.pages.notifier_seq));
4326 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm),
4327 DMA_RESV_USAGE_BOOKKEEP));
4328
4329 } else {
4330 xe_bo_assert_held(xe_vma_bo(vma));
4331 }
4332 }
4333
4334 for_each_tile(tile, xe, id)
4335 if (xe_pt_zap_ptes(tile, vma))
4336 tile_mask |= BIT(id);
4337
4338 xe_device_wmb(xe);
4339
4340 ret = xe_tlb_inval_range_tilemask_submit(xe, xe_vma_vm(vma)->usm.asid,
4341 xe_vma_start(vma), xe_vma_end(vma),
4342 tile_mask, batch);
4343
4344 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
4345 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask);
4346 return ret;
4347 }
4348
4349 /**
4350 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
4351 * @vma: VMA to invalidate
4352 *
4353 * Walks a list of page tables leaves which it memset the entries owned by this
4354 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
4355 * complete.
4356 *
4357 * Returns 0 for success, negative error code otherwise.
4358 */
xe_vm_invalidate_vma(struct xe_vma * vma)4359 int xe_vm_invalidate_vma(struct xe_vma *vma)
4360 {
4361 struct xe_tlb_inval_batch batch;
4362 int ret;
4363
4364 ret = xe_vm_invalidate_vma_submit(vma, &batch);
4365 if (ret)
4366 return ret;
4367
4368 xe_tlb_inval_batch_wait(&batch);
4369 return ret;
4370 }
4371
xe_vm_validate_protected(struct xe_vm * vm)4372 int xe_vm_validate_protected(struct xe_vm *vm)
4373 {
4374 struct drm_gpuva *gpuva;
4375 int err = 0;
4376
4377 if (!vm)
4378 return -ENODEV;
4379
4380 mutex_lock(&vm->snap_mutex);
4381
4382 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4383 struct xe_vma *vma = gpuva_to_vma(gpuva);
4384 struct xe_bo *bo = vma->gpuva.gem.obj ?
4385 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4386
4387 if (!bo)
4388 continue;
4389
4390 if (xe_bo_is_protected(bo)) {
4391 err = xe_pxp_bo_key_check(vm->xe->pxp, bo);
4392 if (err)
4393 break;
4394 }
4395 }
4396
4397 mutex_unlock(&vm->snap_mutex);
4398 return err;
4399 }
4400
4401 struct xe_vm_snapshot {
4402 int uapi_flags;
4403 unsigned long num_snaps;
4404 struct {
4405 u64 ofs, bo_ofs;
4406 unsigned long len;
4407 #define XE_VM_SNAP_FLAG_USERPTR BIT(0)
4408 #define XE_VM_SNAP_FLAG_READ_ONLY BIT(1)
4409 #define XE_VM_SNAP_FLAG_IS_NULL BIT(2)
4410 unsigned long flags;
4411 int uapi_mem_region;
4412 int pat_index;
4413 int cpu_caching;
4414 struct xe_bo *bo;
4415 void *data;
4416 struct mm_struct *mm;
4417 } snap[];
4418 };
4419
xe_vm_snapshot_capture(struct xe_vm * vm)4420 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
4421 {
4422 unsigned long num_snaps = 0, i;
4423 struct xe_vm_snapshot *snap = NULL;
4424 struct drm_gpuva *gpuva;
4425
4426 if (!vm)
4427 return NULL;
4428
4429 mutex_lock(&vm->snap_mutex);
4430 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4431 if (gpuva->flags & XE_VMA_DUMPABLE)
4432 num_snaps++;
4433 }
4434
4435 if (num_snaps)
4436 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
4437 if (!snap) {
4438 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
4439 goto out_unlock;
4440 }
4441
4442 if (vm->flags & XE_VM_FLAG_FAULT_MODE)
4443 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_FAULT_MODE;
4444 if (vm->flags & XE_VM_FLAG_LR_MODE)
4445 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_LR_MODE;
4446 if (vm->flags & XE_VM_FLAG_SCRATCH_PAGE)
4447 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
4448
4449 snap->num_snaps = num_snaps;
4450 i = 0;
4451 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4452 struct xe_vma *vma = gpuva_to_vma(gpuva);
4453 struct xe_bo *bo = vma->gpuva.gem.obj ?
4454 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4455
4456 if (!(gpuva->flags & XE_VMA_DUMPABLE))
4457 continue;
4458
4459 snap->snap[i].ofs = xe_vma_start(vma);
4460 snap->snap[i].len = xe_vma_size(vma);
4461 snap->snap[i].flags = xe_vma_read_only(vma) ?
4462 XE_VM_SNAP_FLAG_READ_ONLY : 0;
4463 snap->snap[i].pat_index = vma->attr.pat_index;
4464 if (bo) {
4465 snap->snap[i].cpu_caching = bo->cpu_caching;
4466 snap->snap[i].bo = xe_bo_get(bo);
4467 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
4468 switch (bo->ttm.resource->mem_type) {
4469 case XE_PL_SYSTEM:
4470 case XE_PL_TT:
4471 snap->snap[i].uapi_mem_region = 0;
4472 break;
4473 case XE_PL_VRAM0:
4474 snap->snap[i].uapi_mem_region = 1;
4475 break;
4476 case XE_PL_VRAM1:
4477 snap->snap[i].uapi_mem_region = 2;
4478 break;
4479 }
4480 } else if (xe_vma_is_userptr(vma)) {
4481 struct mm_struct *mm =
4482 to_userptr_vma(vma)->userptr.notifier.mm;
4483
4484 if (mmget_not_zero(mm))
4485 snap->snap[i].mm = mm;
4486 else
4487 snap->snap[i].data = ERR_PTR(-EFAULT);
4488
4489 snap->snap[i].bo_ofs = xe_vma_userptr(vma);
4490 snap->snap[i].flags |= XE_VM_SNAP_FLAG_USERPTR;
4491 snap->snap[i].uapi_mem_region = 0;
4492 } else if (xe_vma_is_null(vma)) {
4493 snap->snap[i].flags |= XE_VM_SNAP_FLAG_IS_NULL;
4494 snap->snap[i].uapi_mem_region = -1;
4495 } else {
4496 snap->snap[i].data = ERR_PTR(-ENOENT);
4497 snap->snap[i].uapi_mem_region = -1;
4498 }
4499 i++;
4500 }
4501
4502 out_unlock:
4503 mutex_unlock(&vm->snap_mutex);
4504 return snap;
4505 }
4506
xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot * snap)4507 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
4508 {
4509 if (IS_ERR_OR_NULL(snap))
4510 return;
4511
4512 for (int i = 0; i < snap->num_snaps; i++) {
4513 struct xe_bo *bo = snap->snap[i].bo;
4514 int err;
4515
4516 if (IS_ERR(snap->snap[i].data) ||
4517 snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL)
4518 continue;
4519
4520 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
4521 if (!snap->snap[i].data) {
4522 snap->snap[i].data = ERR_PTR(-ENOMEM);
4523 goto cleanup_bo;
4524 }
4525
4526 if (bo) {
4527 err = xe_bo_read(bo, snap->snap[i].bo_ofs,
4528 snap->snap[i].data, snap->snap[i].len);
4529 } else {
4530 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
4531
4532 kthread_use_mm(snap->snap[i].mm);
4533 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
4534 err = 0;
4535 else
4536 err = -EFAULT;
4537 kthread_unuse_mm(snap->snap[i].mm);
4538
4539 mmput(snap->snap[i].mm);
4540 snap->snap[i].mm = NULL;
4541 }
4542
4543 if (err) {
4544 kvfree(snap->snap[i].data);
4545 snap->snap[i].data = ERR_PTR(err);
4546 }
4547
4548 cleanup_bo:
4549 xe_bo_put(bo);
4550 snap->snap[i].bo = NULL;
4551 }
4552 }
4553
xe_vm_snapshot_print(struct xe_vm_snapshot * snap,struct drm_printer * p)4554 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
4555 {
4556 unsigned long i, j;
4557
4558 if (IS_ERR_OR_NULL(snap)) {
4559 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
4560 return;
4561 }
4562
4563 drm_printf(p, "VM.uapi_flags: 0x%x\n", snap->uapi_flags);
4564 for (i = 0; i < snap->num_snaps; i++) {
4565 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
4566
4567 drm_printf(p, "[%llx].properties: %s|%s|mem_region=0x%lx|pat_index=%d|cpu_caching=%d\n",
4568 snap->snap[i].ofs,
4569 snap->snap[i].flags & XE_VM_SNAP_FLAG_READ_ONLY ?
4570 "read_only" : "read_write",
4571 snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL ?
4572 "null_sparse" :
4573 snap->snap[i].flags & XE_VM_SNAP_FLAG_USERPTR ?
4574 "userptr" : "bo",
4575 snap->snap[i].uapi_mem_region == -1 ? 0 :
4576 BIT(snap->snap[i].uapi_mem_region),
4577 snap->snap[i].pat_index,
4578 snap->snap[i].cpu_caching);
4579
4580 if (IS_ERR(snap->snap[i].data)) {
4581 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
4582 PTR_ERR(snap->snap[i].data));
4583 continue;
4584 }
4585
4586 if (snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL)
4587 continue;
4588
4589 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
4590
4591 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
4592 u32 *val = snap->snap[i].data + j;
4593 char dumped[ASCII85_BUFSZ];
4594
4595 drm_puts(p, ascii85_encode(*val, dumped));
4596 }
4597
4598 drm_puts(p, "\n");
4599
4600 if (drm_coredump_printer_is_full(p))
4601 return;
4602 }
4603 }
4604
xe_vm_snapshot_free(struct xe_vm_snapshot * snap)4605 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
4606 {
4607 unsigned long i;
4608
4609 if (IS_ERR_OR_NULL(snap))
4610 return;
4611
4612 for (i = 0; i < snap->num_snaps; i++) {
4613 if (!IS_ERR(snap->snap[i].data))
4614 kvfree(snap->snap[i].data);
4615 xe_bo_put(snap->snap[i].bo);
4616 if (snap->snap[i].mm)
4617 mmput(snap->snap[i].mm);
4618 }
4619 kvfree(snap);
4620 }
4621
4622 /**
4623 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations
4624 * @xe: Pointer to the Xe device structure
4625 * @vma: Pointer to the virtual memory area (VMA) structure
4626 * @is_atomic: In pagefault path and atomic operation
4627 *
4628 * This function determines whether the given VMA needs to be migrated to
4629 * VRAM in order to do atomic GPU operation.
4630 *
4631 * Return:
4632 * 1 - Migration to VRAM is required
4633 * 0 - Migration is not required
4634 * -EACCES - Invalid access for atomic memory attr
4635 *
4636 */
xe_vma_need_vram_for_atomic(struct xe_device * xe,struct xe_vma * vma,bool is_atomic)4637 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic)
4638 {
4639 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access :
4640 vma->attr.atomic_access;
4641
4642 if (!IS_DGFX(xe) || !is_atomic)
4643 return false;
4644
4645 /*
4646 * NOTE: The checks implemented here are platform-specific. For
4647 * instance, on a device supporting CXL atomics, these would ideally
4648 * work universally without additional handling.
4649 */
4650 switch (atomic_access) {
4651 case DRM_XE_ATOMIC_DEVICE:
4652 return !xe->info.has_device_atomics_on_smem;
4653
4654 case DRM_XE_ATOMIC_CPU:
4655 return -EACCES;
4656
4657 case DRM_XE_ATOMIC_UNDEFINED:
4658 case DRM_XE_ATOMIC_GLOBAL:
4659 default:
4660 return 1;
4661 }
4662 }
4663
xe_vm_alloc_vma(struct xe_vm * vm,struct drm_gpuvm_map_req * map_req,bool is_madvise)4664 static int xe_vm_alloc_vma(struct xe_vm *vm,
4665 struct drm_gpuvm_map_req *map_req,
4666 bool is_madvise)
4667 {
4668 struct xe_vma_ops vops;
4669 struct drm_gpuva_ops *ops = NULL;
4670 struct drm_gpuva_op *__op;
4671 unsigned int vma_flags = 0;
4672 bool remap_op = false;
4673 struct xe_vma_mem_attr tmp_attr = {};
4674 u16 default_pat;
4675 int err;
4676
4677 lockdep_assert_held_write(&vm->lock);
4678
4679 if (is_madvise)
4680 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req);
4681 else
4682 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req);
4683
4684 if (IS_ERR(ops))
4685 return PTR_ERR(ops);
4686
4687 if (list_empty(&ops->list)) {
4688 err = 0;
4689 goto free_ops;
4690 }
4691
4692 drm_gpuva_for_each_op(__op, ops) {
4693 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
4694 struct xe_vma *vma = NULL;
4695
4696 if (!is_madvise) {
4697 if (__op->op == DRM_GPUVA_OP_UNMAP) {
4698 vma = gpuva_to_vma(op->base.unmap.va);
4699 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma));
4700 default_pat = vma->attr.default_pat_index;
4701 vma_flags = vma->gpuva.flags;
4702 }
4703
4704 if (__op->op == DRM_GPUVA_OP_REMAP) {
4705 vma = gpuva_to_vma(op->base.remap.unmap->va);
4706 default_pat = vma->attr.default_pat_index;
4707 vma_flags = vma->gpuva.flags;
4708 }
4709
4710 if (__op->op == DRM_GPUVA_OP_MAP) {
4711 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK;
4712 op->map.pat_index = default_pat;
4713 }
4714 } else {
4715 if (__op->op == DRM_GPUVA_OP_REMAP) {
4716 vma = gpuva_to_vma(op->base.remap.unmap->va);
4717 xe_assert(vm->xe, !remap_op);
4718 xe_assert(vm->xe, xe_vma_has_no_bo(vma));
4719 remap_op = true;
4720 vma_flags = vma->gpuva.flags;
4721 }
4722
4723 if (__op->op == DRM_GPUVA_OP_MAP) {
4724 xe_assert(vm->xe, remap_op);
4725 remap_op = false;
4726 /*
4727 * In case of madvise ops DRM_GPUVA_OP_MAP is
4728 * always after DRM_GPUVA_OP_REMAP, so ensure
4729 * to propagate the flags from the vma we're
4730 * unmapping.
4731 */
4732 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK;
4733 }
4734 }
4735 print_op(vm->xe, __op);
4736 }
4737
4738 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
4739
4740 if (is_madvise)
4741 vops.flags |= XE_VMA_OPS_FLAG_MADVISE;
4742 else
4743 vops.flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP;
4744
4745 err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
4746 if (err)
4747 goto unwind_ops;
4748
4749 xe_vm_lock(vm, false);
4750
4751 drm_gpuva_for_each_op(__op, ops) {
4752 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
4753 struct xe_vma *vma;
4754
4755 if (__op->op == DRM_GPUVA_OP_UNMAP) {
4756 vma = gpuva_to_vma(op->base.unmap.va);
4757 /* There should be no unmap for madvise */
4758 if (is_madvise)
4759 XE_WARN_ON("UNEXPECTED UNMAP");
4760
4761 xe_vma_destroy(vma, NULL);
4762 } else if (__op->op == DRM_GPUVA_OP_REMAP) {
4763 vma = gpuva_to_vma(op->base.remap.unmap->va);
4764 /* In case of madvise ops Store attributes for REMAP UNMAPPED
4765 * VMA, so they can be assigned to newly MAP created vma.
4766 */
4767 if (is_madvise)
4768 xe_vma_mem_attr_copy(&tmp_attr, &vma->attr);
4769
4770 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL);
4771 } else if (__op->op == DRM_GPUVA_OP_MAP) {
4772 vma = op->map.vma;
4773 /* In case of madvise call, MAP will always be followed by REMAP.
4774 * Therefore temp_attr will always have sane values, making it safe to
4775 * copy them to new vma.
4776 */
4777 if (is_madvise)
4778 xe_vma_mem_attr_copy(&vma->attr, &tmp_attr);
4779 }
4780 }
4781
4782 xe_vm_unlock(vm);
4783 drm_gpuva_ops_free(&vm->gpuvm, ops);
4784 xe_vma_mem_attr_fini(&tmp_attr);
4785 return 0;
4786
4787 unwind_ops:
4788 vm_bind_ioctl_ops_unwind(vm, &ops, 1);
4789 free_ops:
4790 drm_gpuva_ops_free(&vm->gpuvm, ops);
4791 return err;
4792 }
4793
4794 /**
4795 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops
4796 * @vm: Pointer to the xe_vm structure
4797 * @start: Starting input address
4798 * @range: Size of the input range
4799 *
4800 * This function splits existing vma to create new vma for user provided input range
4801 *
4802 * Return: 0 if success
4803 */
xe_vm_alloc_madvise_vma(struct xe_vm * vm,uint64_t start,uint64_t range)4804 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4805 {
4806 struct drm_gpuvm_map_req map_req = {
4807 .map.va.addr = start,
4808 .map.va.range = range,
4809 };
4810
4811 lockdep_assert_held_write(&vm->lock);
4812
4813 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range);
4814
4815 return xe_vm_alloc_vma(vm, &map_req, true);
4816 }
4817
is_cpu_addr_vma_with_default_attr(struct xe_vma * vma)4818 static bool is_cpu_addr_vma_with_default_attr(struct xe_vma *vma)
4819 {
4820 return vma && xe_vma_is_cpu_addr_mirror(vma) &&
4821 xe_vma_has_default_mem_attrs(vma);
4822 }
4823
4824 /**
4825 * xe_vm_find_cpu_addr_mirror_vma_range - Extend a VMA range to include adjacent CPU-mirrored VMAs
4826 * @vm: VM to search within
4827 * @start: Input/output pointer to the starting address of the range
4828 * @end: Input/output pointer to the end address of the range
4829 *
4830 * Given a range defined by @start and @range, this function checks the VMAs
4831 * immediately before and after the range. If those neighboring VMAs are
4832 * CPU-address-mirrored and have default memory attributes, the function
4833 * updates @start and @range to include them. This extended range can then
4834 * be used for merging or other operations that require a unified VMA.
4835 *
4836 * The function does not perform the merge itself; it only computes the
4837 * mergeable boundaries.
4838 */
xe_vm_find_cpu_addr_mirror_vma_range(struct xe_vm * vm,u64 * start,u64 * end)4839 void xe_vm_find_cpu_addr_mirror_vma_range(struct xe_vm *vm, u64 *start, u64 *end)
4840 {
4841 struct xe_vma *prev, *next;
4842
4843 lockdep_assert_held(&vm->lock);
4844
4845 if (*start >= SZ_4K) {
4846 prev = xe_vm_find_vma_by_addr(vm, *start - SZ_4K);
4847 if (is_cpu_addr_vma_with_default_attr(prev))
4848 *start = xe_vma_start(prev);
4849 }
4850
4851 if (*end < vm->size) {
4852 next = xe_vm_find_vma_by_addr(vm, *end + 1);
4853 if (is_cpu_addr_vma_with_default_attr(next))
4854 *end = xe_vma_end(next);
4855 }
4856 }
4857
4858 /**
4859 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma
4860 * @vm: Pointer to the xe_vm structure
4861 * @start: Starting input address
4862 * @range: Size of the input range
4863 *
4864 * This function splits/merges existing vma to create new vma for user provided input range
4865 *
4866 * Return: 0 if success
4867 */
xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm * vm,uint64_t start,uint64_t range)4868 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4869 {
4870 struct drm_gpuvm_map_req map_req = {
4871 .map.va.addr = start,
4872 .map.va.range = range,
4873 };
4874
4875 lockdep_assert_held_write(&vm->lock);
4876
4877 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx",
4878 start, range);
4879
4880 return xe_vm_alloc_vma(vm, &map_req, false);
4881 }
4882
4883 /**
4884 * xe_vm_add_exec_queue() - Add exec queue to VM
4885 * @vm: The VM.
4886 * @q: The exec_queue
4887 *
4888 * Add exec queue to VM, skipped if the device does not have context based TLB
4889 * invalidations.
4890 */
xe_vm_add_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)4891 void xe_vm_add_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
4892 {
4893 struct xe_device *xe = vm->xe;
4894
4895 /* User VMs and queues only */
4896 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
4897 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
4898 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM));
4899 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_MIGRATE));
4900 xe_assert(xe, vm->xef);
4901 xe_assert(xe, vm == q->vm);
4902
4903 if (!xe->info.has_ctx_tlb_inval)
4904 return;
4905
4906 down_write(&vm->exec_queues.lock);
4907 list_add(&q->vm_exec_queue_link, &vm->exec_queues.list[q->gt->info.id]);
4908 ++vm->exec_queues.count[q->gt->info.id];
4909 up_write(&vm->exec_queues.lock);
4910 }
4911
4912 /**
4913 * xe_vm_remove_exec_queue() - Remove exec queue from VM
4914 * @vm: The VM.
4915 * @q: The exec_queue
4916 *
4917 * Remove exec queue from VM, skipped if the device does not have context based
4918 * TLB invalidations.
4919 */
xe_vm_remove_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)4920 void xe_vm_remove_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
4921 {
4922 if (!vm->xe->info.has_ctx_tlb_inval)
4923 return;
4924
4925 down_write(&vm->exec_queues.lock);
4926 if (!list_empty(&q->vm_exec_queue_link)) {
4927 list_del(&q->vm_exec_queue_link);
4928 --vm->exec_queues.count[q->gt->info.id];
4929 }
4930 up_write(&vm->exec_queues.lock);
4931 }
4932