1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10
11 #include <drm/drm_drv.h>
12 #include <drm/drm_exec.h>
13 #include <drm/drm_print.h>
14 #include <drm/ttm/ttm_tt.h>
15 #include <uapi/drm/xe_drm.h>
16 #include <linux/ascii85.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/mm.h>
20 #include <linux/swap.h>
21
22 #include <generated/xe_wa_oob.h>
23
24 #include "regs/xe_gtt_defs.h"
25 #include "xe_assert.h"
26 #include "xe_bo.h"
27 #include "xe_device.h"
28 #include "xe_drm_client.h"
29 #include "xe_exec_queue.h"
30 #include "xe_gt.h"
31 #include "xe_migrate.h"
32 #include "xe_pat.h"
33 #include "xe_pm.h"
34 #include "xe_preempt_fence.h"
35 #include "xe_pt.h"
36 #include "xe_pxp.h"
37 #include "xe_sriov_vf.h"
38 #include "xe_svm.h"
39 #include "xe_sync.h"
40 #include "xe_tile.h"
41 #include "xe_tlb_inval.h"
42 #include "xe_trace_bo.h"
43 #include "xe_vm_madvise.h"
44 #include "xe_wa.h"
45
xe_vm_obj(struct xe_vm * vm)46 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
47 {
48 return vm->gpuvm.r_obj;
49 }
50
51 /**
52 * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction
53 * @vm: The vm whose resv is to be locked.
54 * @exec: The drm_exec transaction.
55 *
56 * Helper to lock the vm's resv as part of a drm_exec transaction.
57 *
58 * Return: %0 on success. See drm_exec_lock_obj() for error codes.
59 */
xe_vm_drm_exec_lock(struct xe_vm * vm,struct drm_exec * exec)60 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec)
61 {
62 return drm_exec_lock_obj(exec, xe_vm_obj(vm));
63 }
64
preempt_fences_waiting(struct xe_vm * vm)65 static bool preempt_fences_waiting(struct xe_vm *vm)
66 {
67 struct xe_exec_queue *q;
68
69 lockdep_assert_held(&vm->lock);
70 xe_vm_assert_held(vm);
71
72 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
73 if (!q->lr.pfence ||
74 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
75 &q->lr.pfence->flags)) {
76 return true;
77 }
78 }
79
80 return false;
81 }
82
free_preempt_fences(struct list_head * list)83 static void free_preempt_fences(struct list_head *list)
84 {
85 struct list_head *link, *next;
86
87 list_for_each_safe(link, next, list)
88 xe_preempt_fence_free(to_preempt_fence_from_link(link));
89 }
90
alloc_preempt_fences(struct xe_vm * vm,struct list_head * list,unsigned int * count)91 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
92 unsigned int *count)
93 {
94 lockdep_assert_held(&vm->lock);
95 xe_vm_assert_held(vm);
96
97 if (*count >= vm->preempt.num_exec_queues)
98 return 0;
99
100 for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
101 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
102
103 if (IS_ERR(pfence))
104 return PTR_ERR(pfence);
105
106 list_move_tail(xe_preempt_fence_link(pfence), list);
107 }
108
109 return 0;
110 }
111
wait_for_existing_preempt_fences(struct xe_vm * vm)112 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
113 {
114 struct xe_exec_queue *q;
115 bool vf_migration = IS_SRIOV_VF(vm->xe) &&
116 xe_sriov_vf_migration_supported(vm->xe);
117 signed long wait_time = vf_migration ? HZ / 5 : MAX_SCHEDULE_TIMEOUT;
118
119 xe_vm_assert_held(vm);
120
121 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
122 if (q->lr.pfence) {
123 long timeout;
124
125 timeout = dma_fence_wait_timeout(q->lr.pfence, false,
126 wait_time);
127 if (!timeout) {
128 xe_assert(vm->xe, vf_migration);
129 return -EAGAIN;
130 }
131
132 /* Only -ETIME on fence indicates VM needs to be killed */
133 if (timeout < 0 || q->lr.pfence->error == -ETIME)
134 return -ETIME;
135
136 dma_fence_put(q->lr.pfence);
137 q->lr.pfence = NULL;
138 }
139 }
140
141 return 0;
142 }
143
xe_vm_is_idle(struct xe_vm * vm)144 static bool xe_vm_is_idle(struct xe_vm *vm)
145 {
146 struct xe_exec_queue *q;
147
148 xe_vm_assert_held(vm);
149 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
150 if (!xe_exec_queue_is_idle(q))
151 return false;
152 }
153
154 return true;
155 }
156
arm_preempt_fences(struct xe_vm * vm,struct list_head * list)157 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
158 {
159 struct list_head *link;
160 struct xe_exec_queue *q;
161
162 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
163 struct dma_fence *fence;
164
165 link = list->next;
166 xe_assert(vm->xe, link != list);
167
168 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
169 q, q->lr.context,
170 ++q->lr.seqno);
171 dma_fence_put(q->lr.pfence);
172 q->lr.pfence = fence;
173 }
174 }
175
add_preempt_fences(struct xe_vm * vm,struct xe_bo * bo)176 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
177 {
178 struct xe_exec_queue *q;
179 int err;
180
181 xe_bo_assert_held(bo);
182
183 if (!vm->preempt.num_exec_queues)
184 return 0;
185
186 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
187 if (err)
188 return err;
189
190 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
191 if (q->lr.pfence) {
192 dma_resv_add_fence(bo->ttm.base.resv,
193 q->lr.pfence,
194 DMA_RESV_USAGE_BOOKKEEP);
195 }
196
197 return 0;
198 }
199
resume_and_reinstall_preempt_fences(struct xe_vm * vm,struct drm_exec * exec)200 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
201 struct drm_exec *exec)
202 {
203 struct xe_exec_queue *q;
204
205 lockdep_assert_held(&vm->lock);
206 xe_vm_assert_held(vm);
207
208 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
209 q->ops->resume(q);
210
211 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
212 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
213 }
214 }
215
xe_vm_add_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)216 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
217 {
218 struct drm_gpuvm_exec vm_exec = {
219 .vm = &vm->gpuvm,
220 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
221 .num_fences = 1,
222 };
223 struct drm_exec *exec = &vm_exec.exec;
224 struct xe_validation_ctx ctx;
225 struct dma_fence *pfence;
226 int err;
227 bool wait;
228
229 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
230
231 down_write(&vm->lock);
232 err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val);
233 if (err)
234 goto out_up_write;
235
236 pfence = xe_preempt_fence_create(q, q->lr.context,
237 ++q->lr.seqno);
238 if (IS_ERR(pfence)) {
239 err = PTR_ERR(pfence);
240 goto out_fini;
241 }
242
243 list_add(&q->lr.link, &vm->preempt.exec_queues);
244 ++vm->preempt.num_exec_queues;
245 q->lr.pfence = pfence;
246
247 xe_svm_notifier_lock(vm);
248
249 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
250 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
251
252 /*
253 * Check to see if a preemption on VM is in flight or userptr
254 * invalidation, if so trigger this preempt fence to sync state with
255 * other preempt fences on the VM.
256 */
257 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
258 if (wait)
259 dma_fence_enable_sw_signaling(pfence);
260
261 xe_svm_notifier_unlock(vm);
262
263 out_fini:
264 xe_validation_ctx_fini(&ctx);
265 out_up_write:
266 up_write(&vm->lock);
267
268 return err;
269 }
270 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO);
271
272 /**
273 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
274 * @vm: The VM.
275 * @q: The exec_queue
276 *
277 * Note that this function might be called multiple times on the same queue.
278 */
xe_vm_remove_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)279 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
280 {
281 if (!xe_vm_in_preempt_fence_mode(vm))
282 return;
283
284 down_write(&vm->lock);
285 if (!list_empty(&q->lr.link)) {
286 list_del_init(&q->lr.link);
287 --vm->preempt.num_exec_queues;
288 }
289 if (q->lr.pfence) {
290 dma_fence_enable_sw_signaling(q->lr.pfence);
291 dma_fence_put(q->lr.pfence);
292 q->lr.pfence = NULL;
293 }
294 up_write(&vm->lock);
295 }
296
297 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
298
299 /**
300 * xe_vm_kill() - VM Kill
301 * @vm: The VM.
302 * @unlocked: Flag indicates the VM's dma-resv is not held
303 *
304 * Kill the VM by setting banned flag indicated VM is no longer available for
305 * use. If in preempt fence mode, also kill all exec queue attached to the VM.
306 */
xe_vm_kill(struct xe_vm * vm,bool unlocked)307 void xe_vm_kill(struct xe_vm *vm, bool unlocked)
308 {
309 struct xe_exec_queue *q;
310
311 lockdep_assert_held(&vm->lock);
312
313 if (unlocked)
314 xe_vm_lock(vm, false);
315
316 vm->flags |= XE_VM_FLAG_BANNED;
317 trace_xe_vm_kill(vm);
318
319 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
320 q->ops->kill(q);
321
322 if (unlocked)
323 xe_vm_unlock(vm);
324
325 /* TODO: Inform user the VM is banned */
326 }
327
xe_gpuvm_validate(struct drm_gpuvm_bo * vm_bo,struct drm_exec * exec)328 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
329 {
330 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
331 struct xe_bo *bo = gem_to_xe_bo(vm_bo->obj);
332 struct drm_gpuva *gpuva;
333 int ret;
334
335 lockdep_assert_held(&vm->lock);
336 drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
337 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
338 &vm->rebind_list);
339
340 /* Skip re-populating purged BOs, rebind maps scratch pages. */
341 if (xe_bo_is_purged(bo)) {
342 vm_bo->evicted = false;
343 return 0;
344 }
345
346 if (!try_wait_for_completion(&vm->xe->pm_block))
347 return -EAGAIN;
348
349 ret = xe_bo_validate(bo, vm, false, exec);
350 if (ret)
351 return ret;
352
353 vm_bo->evicted = false;
354 return 0;
355 }
356
357 /**
358 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
359 * @vm: The vm for which we are rebinding.
360 * @exec: The struct drm_exec with the locked GEM objects.
361 * @num_fences: The number of fences to reserve for the operation, not
362 * including rebinds and validations.
363 *
364 * Validates all evicted gem objects and rebinds their vmas. Note that
365 * rebindings may cause evictions and hence the validation-rebind
366 * sequence is rerun until there are no more objects to validate.
367 *
368 * Return: 0 on success, negative error code on error. In particular,
369 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
370 * the drm_exec transaction needs to be restarted.
371 */
xe_vm_validate_rebind(struct xe_vm * vm,struct drm_exec * exec,unsigned int num_fences)372 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
373 unsigned int num_fences)
374 {
375 struct drm_gem_object *obj;
376 unsigned long index;
377 int ret;
378
379 do {
380 ret = drm_gpuvm_validate(&vm->gpuvm, exec);
381 if (ret)
382 return ret;
383
384 ret = xe_vm_rebind(vm, false);
385 if (ret)
386 return ret;
387 } while (!list_empty(&vm->gpuvm.evict.list));
388
389 drm_exec_for_each_locked_object(exec, index, obj) {
390 ret = dma_resv_reserve_fences(obj->resv, num_fences);
391 if (ret)
392 return ret;
393 }
394
395 return 0;
396 }
397
xe_preempt_work_begin(struct drm_exec * exec,struct xe_vm * vm,bool * done)398 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
399 bool *done)
400 {
401 int err;
402
403 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
404 if (err)
405 return err;
406
407 if (xe_vm_is_idle(vm)) {
408 vm->preempt.rebind_deactivated = true;
409 *done = true;
410 return 0;
411 }
412
413 if (!preempt_fences_waiting(vm)) {
414 *done = true;
415 return 0;
416 }
417
418 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
419 if (err)
420 return err;
421
422 err = wait_for_existing_preempt_fences(vm);
423 if (err)
424 return err;
425
426 /*
427 * Add validation and rebinding to the locking loop since both can
428 * cause evictions which may require blocing dma_resv locks.
429 * The fence reservation here is intended for the new preempt fences
430 * we attach at the end of the rebind work.
431 */
432 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
433 }
434
vm_suspend_rebind_worker(struct xe_vm * vm)435 static bool vm_suspend_rebind_worker(struct xe_vm *vm)
436 {
437 struct xe_device *xe = vm->xe;
438 bool ret = false;
439
440 mutex_lock(&xe->rebind_resume_lock);
441 if (!try_wait_for_completion(&vm->xe->pm_block)) {
442 ret = true;
443 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list);
444 }
445 mutex_unlock(&xe->rebind_resume_lock);
446
447 return ret;
448 }
449
450 /**
451 * xe_vm_resume_rebind_worker() - Resume the rebind worker.
452 * @vm: The vm whose preempt worker to resume.
453 *
454 * Resume a preempt worker that was previously suspended by
455 * vm_suspend_rebind_worker().
456 */
xe_vm_resume_rebind_worker(struct xe_vm * vm)457 void xe_vm_resume_rebind_worker(struct xe_vm *vm)
458 {
459 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
460 }
461
preempt_rebind_work_func(struct work_struct * w)462 static void preempt_rebind_work_func(struct work_struct *w)
463 {
464 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
465 struct xe_validation_ctx ctx;
466 struct drm_exec exec;
467 unsigned int fence_count = 0;
468 LIST_HEAD(preempt_fences);
469 int err = 0;
470 long wait;
471 int __maybe_unused tries = 0;
472
473 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
474 trace_xe_vm_rebind_worker_enter(vm);
475
476 down_write(&vm->lock);
477
478 if (xe_vm_is_closed_or_banned(vm)) {
479 up_write(&vm->lock);
480 trace_xe_vm_rebind_worker_exit(vm);
481 return;
482 }
483
484 retry:
485 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) {
486 up_write(&vm->lock);
487 /* We don't actually block but don't make progress. */
488 xe_pm_might_block_on_suspend();
489 return;
490 }
491
492 if (xe_vm_userptr_check_repin(vm)) {
493 err = xe_vm_userptr_pin(vm);
494 if (err)
495 goto out_unlock_outer;
496 }
497
498 err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec,
499 (struct xe_val_flags) {.interruptible = true});
500 if (err)
501 goto out_unlock_outer;
502
503 drm_exec_until_all_locked(&exec) {
504 bool done = false;
505
506 err = xe_preempt_work_begin(&exec, vm, &done);
507 drm_exec_retry_on_contention(&exec);
508 xe_validation_retry_on_oom(&ctx, &err);
509 if (err || done) {
510 xe_validation_ctx_fini(&ctx);
511 goto out_unlock_outer;
512 }
513 }
514
515 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
516 if (err)
517 goto out_unlock;
518
519 xe_vm_set_validation_exec(vm, &exec);
520 err = xe_vm_rebind(vm, true);
521 xe_vm_set_validation_exec(vm, NULL);
522 if (err)
523 goto out_unlock;
524
525 /* Wait on rebinds and munmap style VM unbinds */
526 wait = dma_resv_wait_timeout(xe_vm_resv(vm),
527 DMA_RESV_USAGE_KERNEL,
528 false, MAX_SCHEDULE_TIMEOUT);
529 if (wait <= 0) {
530 err = -ETIME;
531 goto out_unlock;
532 }
533
534 #define retry_required(__tries, __vm) \
535 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
536 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
537 __xe_vm_userptr_needs_repin(__vm))
538
539 xe_svm_notifier_lock(vm);
540 if (retry_required(tries, vm)) {
541 xe_svm_notifier_unlock(vm);
542 err = -EAGAIN;
543 goto out_unlock;
544 }
545
546 #undef retry_required
547
548 spin_lock(&vm->xe->ttm.lru_lock);
549 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
550 spin_unlock(&vm->xe->ttm.lru_lock);
551
552 /* Point of no return. */
553 arm_preempt_fences(vm, &preempt_fences);
554 resume_and_reinstall_preempt_fences(vm, &exec);
555 xe_svm_notifier_unlock(vm);
556
557 out_unlock:
558 xe_validation_ctx_fini(&ctx);
559 out_unlock_outer:
560 if (err == -EAGAIN) {
561 trace_xe_vm_rebind_worker_retry(vm);
562
563 /*
564 * We can't block in workers on a VF which supports migration
565 * given this can block the VF post-migration workers from
566 * getting scheduled.
567 */
568 if (IS_SRIOV_VF(vm->xe) &&
569 xe_sriov_vf_migration_supported(vm->xe)) {
570 up_write(&vm->lock);
571 xe_vm_queue_rebind_worker(vm);
572 return;
573 }
574
575 goto retry;
576 }
577
578 if (err) {
579 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
580 xe_vm_kill(vm, true);
581 }
582 up_write(&vm->lock);
583
584 free_preempt_fences(&preempt_fences);
585
586 trace_xe_vm_rebind_worker_exit(vm);
587 }
588
589 /**
590 * xe_vm_add_fault_entry_pf() - Add pagefault to vm fault list
591 * @vm: The VM.
592 * @pf: The pagefault.
593 *
594 * This function takes the data from the pagefault @pf and saves it to @vm->faults.list.
595 *
596 * The function exits silently if the list is full, and reports a warning if the pagefault
597 * could not be saved to the list.
598 */
xe_vm_add_fault_entry_pf(struct xe_vm * vm,struct xe_pagefault * pf)599 void xe_vm_add_fault_entry_pf(struct xe_vm *vm, struct xe_pagefault *pf)
600 {
601 struct xe_vm_fault_entry *e;
602 struct xe_hw_engine *hwe;
603
604 /* Do not report faults on reserved engines */
605 hwe = xe_gt_hw_engine(pf->gt, pf->consumer.engine_class,
606 pf->consumer.engine_instance, false);
607 if (!hwe || xe_hw_engine_is_reserved(hwe))
608 return;
609
610 e = kzalloc_obj(*e);
611 if (!e) {
612 drm_warn(&vm->xe->drm,
613 "Could not allocate memory for fault!\n");
614 return;
615 }
616
617 guard(spinlock)(&vm->faults.lock);
618
619 /*
620 * Limit the number of faults in the fault list to prevent
621 * memory overuse.
622 */
623 if (vm->faults.len >= MAX_FAULTS_SAVED_PER_VM) {
624 kfree(e);
625 return;
626 }
627
628 e->address = pf->consumer.page_addr;
629 /*
630 * TODO:
631 * Address precision is currently always SZ_4K, but this may change
632 * in the future.
633 */
634 e->address_precision = SZ_4K;
635 e->access_type = pf->consumer.access_type;
636 e->fault_type = FIELD_GET(XE_PAGEFAULT_TYPE_MASK,
637 pf->consumer.fault_type_level),
638 e->fault_level = FIELD_GET(XE_PAGEFAULT_LEVEL_MASK,
639 pf->consumer.fault_type_level),
640
641 list_add_tail(&e->list, &vm->faults.list);
642 vm->faults.len++;
643 }
644
xe_vm_clear_fault_entries(struct xe_vm * vm)645 static void xe_vm_clear_fault_entries(struct xe_vm *vm)
646 {
647 struct xe_vm_fault_entry *e, *tmp;
648
649 guard(spinlock)(&vm->faults.lock);
650 list_for_each_entry_safe(e, tmp, &vm->faults.list, list) {
651 list_del(&e->list);
652 kfree(e);
653 }
654 vm->faults.len = 0;
655 }
656
xe_vma_ops_alloc(struct xe_vma_ops * vops,bool array_of_binds)657 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
658 {
659 int i;
660
661 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
662 if (!vops->pt_update_ops[i].num_ops)
663 continue;
664
665 vops->pt_update_ops[i].ops =
666 kmalloc_objs(*vops->pt_update_ops[i].ops,
667 vops->pt_update_ops[i].num_ops,
668 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
669 if (!vops->pt_update_ops[i].ops)
670 return array_of_binds ? -ENOBUFS : -ENOMEM;
671 }
672
673 return 0;
674 }
675 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
676
xe_vma_svm_prefetch_op_fini(struct xe_vma_op * op)677 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op)
678 {
679 struct xe_vma *vma;
680
681 vma = gpuva_to_vma(op->base.prefetch.va);
682
683 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma))
684 xa_destroy(&op->prefetch_range.range);
685 }
686
xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops * vops)687 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops)
688 {
689 struct xe_vma_op *op;
690
691 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
692 return;
693
694 list_for_each_entry(op, &vops->list, link)
695 xe_vma_svm_prefetch_op_fini(op);
696 }
697
xe_vma_ops_fini(struct xe_vma_ops * vops)698 static void xe_vma_ops_fini(struct xe_vma_ops *vops)
699 {
700 int i;
701
702 xe_vma_svm_prefetch_ops_fini(vops);
703
704 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
705 kfree(vops->pt_update_ops[i].ops);
706 }
707
xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops * vops,u8 tile_mask,int inc_val)708 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val)
709 {
710 int i;
711
712 if (!inc_val)
713 return;
714
715 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
716 if (BIT(i) & tile_mask)
717 vops->pt_update_ops[i].num_ops += inc_val;
718 }
719
720 #define XE_VMA_CREATE_MASK ( \
721 XE_VMA_READ_ONLY | \
722 XE_VMA_DUMPABLE | \
723 XE_VMA_SYSTEM_ALLOCATOR | \
724 DRM_GPUVA_SPARSE | \
725 XE_VMA_MADV_AUTORESET)
726
xe_vm_populate_rebind(struct xe_vma_op * op,struct xe_vma * vma,u8 tile_mask)727 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
728 u8 tile_mask)
729 {
730 INIT_LIST_HEAD(&op->link);
731 op->tile_mask = tile_mask;
732 op->base.op = DRM_GPUVA_OP_MAP;
733 op->base.map.va.addr = vma->gpuva.va.addr;
734 op->base.map.va.range = vma->gpuva.va.range;
735 op->base.map.gem.obj = vma->gpuva.gem.obj;
736 op->base.map.gem.offset = vma->gpuva.gem.offset;
737 op->map.vma = vma;
738 op->map.immediate = true;
739 op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK;
740 }
741
xe_vm_ops_add_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,u8 tile_mask)742 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
743 u8 tile_mask)
744 {
745 struct xe_vma_op *op;
746
747 op = kzalloc_obj(*op);
748 if (!op)
749 return -ENOMEM;
750
751 xe_vm_populate_rebind(op, vma, tile_mask);
752 list_add_tail(&op->link, &vops->list);
753 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
754
755 return 0;
756 }
757
758 static struct dma_fence *ops_execute(struct xe_vm *vm,
759 struct xe_vma_ops *vops);
760 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
761 struct xe_exec_queue *q,
762 struct xe_sync_entry *syncs, u32 num_syncs);
763
xe_vm_rebind(struct xe_vm * vm,bool rebind_worker)764 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
765 {
766 struct dma_fence *fence;
767 struct xe_vma *vma, *next;
768 struct xe_vma_ops vops;
769 struct xe_vma_op *op, *next_op;
770 int err, i;
771
772 lockdep_assert_held(&vm->lock);
773 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
774 list_empty(&vm->rebind_list))
775 return 0;
776
777 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
778 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
779 vops.pt_update_ops[i].wait_vm_bookkeep = true;
780
781 xe_vm_assert_held(vm);
782 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
783 xe_assert(vm->xe, vma->tile_present);
784
785 if (rebind_worker)
786 trace_xe_vma_rebind_worker(vma);
787 else
788 trace_xe_vma_rebind_exec(vma);
789
790 err = xe_vm_ops_add_rebind(&vops, vma,
791 vma->tile_present);
792 if (err)
793 goto free_ops;
794 }
795
796 err = xe_vma_ops_alloc(&vops, false);
797 if (err)
798 goto free_ops;
799
800 fence = ops_execute(vm, &vops);
801 if (IS_ERR(fence)) {
802 err = PTR_ERR(fence);
803 } else {
804 dma_fence_put(fence);
805 list_for_each_entry_safe(vma, next, &vm->rebind_list,
806 combined_links.rebind)
807 list_del_init(&vma->combined_links.rebind);
808 }
809 free_ops:
810 list_for_each_entry_safe(op, next_op, &vops.list, link) {
811 list_del(&op->link);
812 kfree(op);
813 }
814 xe_vma_ops_fini(&vops);
815
816 return err;
817 }
818
xe_vma_rebind(struct xe_vm * vm,struct xe_vma * vma,u8 tile_mask)819 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
820 {
821 struct dma_fence *fence = NULL;
822 struct xe_vma_ops vops;
823 struct xe_vma_op *op, *next_op;
824 struct xe_tile *tile;
825 u8 id;
826 int err;
827
828 lockdep_assert_held(&vm->lock);
829 xe_vm_assert_held(vm);
830 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
831
832 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
833 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT;
834 for_each_tile(tile, vm->xe, id) {
835 vops.pt_update_ops[id].wait_vm_bookkeep = true;
836 vops.pt_update_ops[tile->id].q =
837 xe_migrate_exec_queue(tile->migrate);
838 }
839
840 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
841 if (err)
842 return ERR_PTR(err);
843
844 err = xe_vma_ops_alloc(&vops, false);
845 if (err) {
846 fence = ERR_PTR(err);
847 goto free_ops;
848 }
849
850 fence = ops_execute(vm, &vops);
851
852 free_ops:
853 list_for_each_entry_safe(op, next_op, &vops.list, link) {
854 list_del(&op->link);
855 kfree(op);
856 }
857 xe_vma_ops_fini(&vops);
858
859 return fence;
860 }
861
xe_vm_populate_range_rebind(struct xe_vma_op * op,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)862 static void xe_vm_populate_range_rebind(struct xe_vma_op *op,
863 struct xe_vma *vma,
864 struct xe_svm_range *range,
865 u8 tile_mask)
866 {
867 INIT_LIST_HEAD(&op->link);
868 op->tile_mask = tile_mask;
869 op->base.op = DRM_GPUVA_OP_DRIVER;
870 op->subop = XE_VMA_SUBOP_MAP_RANGE;
871 op->map_range.vma = vma;
872 op->map_range.range = range;
873 }
874
875 static int
xe_vm_ops_add_range_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)876 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops,
877 struct xe_vma *vma,
878 struct xe_svm_range *range,
879 u8 tile_mask)
880 {
881 struct xe_vma_op *op;
882
883 op = kzalloc_obj(*op);
884 if (!op)
885 return -ENOMEM;
886
887 xe_vm_populate_range_rebind(op, vma, range, tile_mask);
888 list_add_tail(&op->link, &vops->list);
889 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
890
891 return 0;
892 }
893
894 /**
895 * xe_vm_range_rebind() - VM range (re)bind
896 * @vm: The VM which the range belongs to.
897 * @vma: The VMA which the range belongs to.
898 * @range: SVM range to rebind.
899 * @tile_mask: Tile mask to bind the range to.
900 *
901 * (re)bind SVM range setting up GPU page tables for the range.
902 *
903 * Return: dma fence for rebind to signal completion on success, ERR_PTR on
904 * failure
905 */
xe_vm_range_rebind(struct xe_vm * vm,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)906 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
907 struct xe_vma *vma,
908 struct xe_svm_range *range,
909 u8 tile_mask)
910 {
911 struct dma_fence *fence = NULL;
912 struct xe_vma_ops vops;
913 struct xe_vma_op *op, *next_op;
914 struct xe_tile *tile;
915 u8 id;
916 int err;
917
918 lockdep_assert_held(&vm->lock);
919 xe_vm_assert_held(vm);
920 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
921 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
922
923 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
924 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT;
925 for_each_tile(tile, vm->xe, id) {
926 vops.pt_update_ops[id].wait_vm_bookkeep = true;
927 vops.pt_update_ops[tile->id].q =
928 xe_migrate_exec_queue(tile->migrate);
929 }
930
931 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask);
932 if (err)
933 return ERR_PTR(err);
934
935 err = xe_vma_ops_alloc(&vops, false);
936 if (err) {
937 fence = ERR_PTR(err);
938 goto free_ops;
939 }
940
941 fence = ops_execute(vm, &vops);
942
943 free_ops:
944 list_for_each_entry_safe(op, next_op, &vops.list, link) {
945 list_del(&op->link);
946 kfree(op);
947 }
948 xe_vma_ops_fini(&vops);
949
950 return fence;
951 }
952
xe_vm_populate_range_unbind(struct xe_vma_op * op,struct xe_svm_range * range)953 static void xe_vm_populate_range_unbind(struct xe_vma_op *op,
954 struct xe_svm_range *range)
955 {
956 INIT_LIST_HEAD(&op->link);
957 op->tile_mask = range->tile_present;
958 op->base.op = DRM_GPUVA_OP_DRIVER;
959 op->subop = XE_VMA_SUBOP_UNMAP_RANGE;
960 op->unmap_range.range = range;
961 }
962
963 static int
xe_vm_ops_add_range_unbind(struct xe_vma_ops * vops,struct xe_svm_range * range)964 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops,
965 struct xe_svm_range *range)
966 {
967 struct xe_vma_op *op;
968
969 op = kzalloc_obj(*op);
970 if (!op)
971 return -ENOMEM;
972
973 xe_vm_populate_range_unbind(op, range);
974 list_add_tail(&op->link, &vops->list);
975 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1);
976
977 return 0;
978 }
979
980 /**
981 * xe_vm_range_unbind() - VM range unbind
982 * @vm: The VM which the range belongs to.
983 * @range: SVM range to rebind.
984 *
985 * Unbind SVM range removing the GPU page tables for the range.
986 *
987 * Return: dma fence for unbind to signal completion on success, ERR_PTR on
988 * failure
989 */
xe_vm_range_unbind(struct xe_vm * vm,struct xe_svm_range * range)990 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
991 struct xe_svm_range *range)
992 {
993 struct dma_fence *fence = NULL;
994 struct xe_vma_ops vops;
995 struct xe_vma_op *op, *next_op;
996 struct xe_tile *tile;
997 u8 id;
998 int err;
999
1000 lockdep_assert_held(&vm->lock);
1001 xe_vm_assert_held(vm);
1002 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
1003
1004 if (!range->tile_present)
1005 return dma_fence_get_stub();
1006
1007 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
1008 for_each_tile(tile, vm->xe, id) {
1009 vops.pt_update_ops[id].wait_vm_bookkeep = true;
1010 vops.pt_update_ops[tile->id].q =
1011 xe_migrate_exec_queue(tile->migrate);
1012 }
1013
1014 err = xe_vm_ops_add_range_unbind(&vops, range);
1015 if (err)
1016 return ERR_PTR(err);
1017
1018 err = xe_vma_ops_alloc(&vops, false);
1019 if (err) {
1020 fence = ERR_PTR(err);
1021 goto free_ops;
1022 }
1023
1024 fence = ops_execute(vm, &vops);
1025
1026 free_ops:
1027 list_for_each_entry_safe(op, next_op, &vops.list, link) {
1028 list_del(&op->link);
1029 kfree(op);
1030 }
1031 xe_vma_ops_fini(&vops);
1032
1033 return fence;
1034 }
1035
xe_vma_mem_attr_fini(struct xe_vma_mem_attr * attr)1036 static void xe_vma_mem_attr_fini(struct xe_vma_mem_attr *attr)
1037 {
1038 drm_pagemap_put(attr->preferred_loc.dpagemap);
1039 }
1040
xe_vma_free(struct xe_vma * vma)1041 static void xe_vma_free(struct xe_vma *vma)
1042 {
1043 xe_vma_mem_attr_fini(&vma->attr);
1044
1045 if (xe_vma_is_userptr(vma))
1046 kfree(to_userptr_vma(vma));
1047 else
1048 kfree(vma);
1049 }
1050
1051 /**
1052 * xe_vma_mem_attr_copy() - copy an xe_vma_mem_attr structure.
1053 * @to: Destination.
1054 * @from: Source.
1055 *
1056 * Copies an xe_vma_mem_attr structure taking care to get reference
1057 * counting of individual members right.
1058 */
xe_vma_mem_attr_copy(struct xe_vma_mem_attr * to,struct xe_vma_mem_attr * from)1059 void xe_vma_mem_attr_copy(struct xe_vma_mem_attr *to, struct xe_vma_mem_attr *from)
1060 {
1061 xe_vma_mem_attr_fini(to);
1062 *to = *from;
1063 if (to->preferred_loc.dpagemap)
1064 drm_pagemap_get(to->preferred_loc.dpagemap);
1065 }
1066
xe_vma_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 start,u64 end,struct xe_vma_mem_attr * attr,unsigned int flags)1067 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
1068 struct xe_bo *bo,
1069 u64 bo_offset_or_userptr,
1070 u64 start, u64 end,
1071 struct xe_vma_mem_attr *attr,
1072 unsigned int flags)
1073 {
1074 struct xe_vma *vma;
1075 struct xe_tile *tile;
1076 u8 id;
1077 bool is_null = (flags & DRM_GPUVA_SPARSE);
1078 bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR);
1079
1080 xe_assert(vm->xe, start < end);
1081 xe_assert(vm->xe, end < vm->size);
1082
1083 /*
1084 * Allocate and ensure that the xe_vma_is_userptr() return
1085 * matches what was allocated.
1086 */
1087 if (!bo && !is_null && !is_cpu_addr_mirror) {
1088 struct xe_userptr_vma *uvma = kzalloc_obj(*uvma);
1089
1090 if (!uvma)
1091 return ERR_PTR(-ENOMEM);
1092
1093 vma = &uvma->vma;
1094 } else {
1095 vma = kzalloc_obj(*vma);
1096 if (!vma)
1097 return ERR_PTR(-ENOMEM);
1098
1099 if (bo)
1100 vma->gpuva.gem.obj = &bo->ttm.base;
1101 }
1102
1103 INIT_LIST_HEAD(&vma->combined_links.rebind);
1104
1105 INIT_LIST_HEAD(&vma->gpuva.gem.entry);
1106 vma->gpuva.vm = &vm->gpuvm;
1107 vma->gpuva.va.addr = start;
1108 vma->gpuva.va.range = end - start + 1;
1109 vma->gpuva.flags = flags;
1110
1111 for_each_tile(tile, vm->xe, id)
1112 vma->tile_mask |= 0x1 << id;
1113
1114 if (vm->xe->info.has_atomic_enable_pte_bit)
1115 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
1116
1117 xe_vma_mem_attr_copy(&vma->attr, attr);
1118 if (bo) {
1119 struct drm_gpuvm_bo *vm_bo;
1120
1121 xe_bo_assert_held(bo);
1122
1123 vm_bo = drm_gpuvm_bo_obtain_locked(vma->gpuva.vm, &bo->ttm.base);
1124 if (IS_ERR(vm_bo)) {
1125 xe_vma_free(vma);
1126 return ERR_CAST(vm_bo);
1127 }
1128
1129 drm_gpuvm_bo_extobj_add(vm_bo);
1130 drm_gem_object_get(&bo->ttm.base);
1131 vma->gpuva.gem.offset = bo_offset_or_userptr;
1132 drm_gpuva_link(&vma->gpuva, vm_bo);
1133 drm_gpuvm_bo_put(vm_bo);
1134 } else /* userptr or null */ {
1135 if (!is_null && !is_cpu_addr_mirror) {
1136 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1137 u64 size = end - start + 1;
1138 int err;
1139
1140 vma->gpuva.gem.offset = bo_offset_or_userptr;
1141
1142 err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size);
1143 if (err) {
1144 xe_vma_free(vma);
1145 return ERR_PTR(err);
1146 }
1147 }
1148
1149 xe_vm_get(vm);
1150 }
1151
1152 return vma;
1153 }
1154
xe_vma_destroy_late(struct xe_vma * vma)1155 static void xe_vma_destroy_late(struct xe_vma *vma)
1156 {
1157 struct xe_vm *vm = xe_vma_vm(vma);
1158 struct xe_bo *bo = xe_vma_bo(vma);
1159
1160 if (vma->ufence) {
1161 xe_sync_ufence_put(vma->ufence);
1162 vma->ufence = NULL;
1163 }
1164
1165 if (xe_vma_is_userptr(vma)) {
1166 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1167
1168 xe_userptr_remove(uvma);
1169 xe_vm_put(vm);
1170 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) {
1171 xe_vm_put(vm);
1172 } else {
1173 xe_bo_put(bo);
1174 }
1175
1176 xe_vma_free(vma);
1177 }
1178
vma_destroy_work_func(struct work_struct * w)1179 static void vma_destroy_work_func(struct work_struct *w)
1180 {
1181 struct xe_vma *vma =
1182 container_of(w, struct xe_vma, destroy_work);
1183
1184 xe_vma_destroy_late(vma);
1185 }
1186
vma_destroy_cb(struct dma_fence * fence,struct dma_fence_cb * cb)1187 static void vma_destroy_cb(struct dma_fence *fence,
1188 struct dma_fence_cb *cb)
1189 {
1190 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1191
1192 INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1193 queue_work(system_dfl_wq, &vma->destroy_work);
1194 }
1195
xe_vma_destroy(struct xe_vma * vma,struct dma_fence * fence)1196 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1197 {
1198 struct xe_vm *vm = xe_vma_vm(vma);
1199 struct xe_bo *bo = xe_vma_bo(vma);
1200
1201 lockdep_assert_held_write(&vm->lock);
1202 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1203
1204 if (xe_vma_is_userptr(vma)) {
1205 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1206 xe_userptr_destroy(to_userptr_vma(vma));
1207 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) {
1208 xe_bo_assert_held(bo);
1209
1210 drm_gpuva_unlink(&vma->gpuva);
1211 xe_bo_recompute_purgeable_state(bo);
1212 }
1213
1214 xe_vm_assert_held(vm);
1215 if (fence) {
1216 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1217 vma_destroy_cb);
1218
1219 if (ret) {
1220 XE_WARN_ON(ret != -ENOENT);
1221 xe_vma_destroy_late(vma);
1222 }
1223 } else {
1224 xe_vma_destroy_late(vma);
1225 }
1226 }
1227
1228 /**
1229 * xe_vm_lock_vma() - drm_exec utility to lock a vma
1230 * @exec: The drm_exec object we're currently locking for.
1231 * @vma: The vma for witch we want to lock the vm resv and any attached
1232 * object's resv.
1233 *
1234 * Return: 0 on success, negative error code on error. In particular
1235 * may return -EDEADLK on WW transaction contention and -EINTR if
1236 * an interruptible wait is terminated by a signal.
1237 */
xe_vm_lock_vma(struct drm_exec * exec,struct xe_vma * vma)1238 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
1239 {
1240 struct xe_vm *vm = xe_vma_vm(vma);
1241 struct xe_bo *bo = xe_vma_bo(vma);
1242 int err;
1243
1244 XE_WARN_ON(!vm);
1245
1246 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1247 if (!err && bo && !bo->vm)
1248 err = drm_exec_lock_obj(exec, &bo->ttm.base);
1249
1250 return err;
1251 }
1252
xe_vma_destroy_unlocked(struct xe_vma * vma)1253 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1254 {
1255 struct xe_device *xe = xe_vma_vm(vma)->xe;
1256 struct xe_validation_ctx ctx;
1257 struct drm_exec exec;
1258 int err = 0;
1259
1260 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
1261 err = xe_vm_lock_vma(&exec, vma);
1262 drm_exec_retry_on_contention(&exec);
1263 if (XE_WARN_ON(err))
1264 break;
1265 xe_vma_destroy(vma, NULL);
1266 }
1267 xe_assert(xe, !err);
1268 }
1269
1270 struct xe_vma *
xe_vm_find_overlapping_vma(struct xe_vm * vm,u64 start,u64 range)1271 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1272 {
1273 struct drm_gpuva *gpuva;
1274
1275 lockdep_assert_held(&vm->lock);
1276
1277 if (xe_vm_is_closed_or_banned(vm))
1278 return NULL;
1279
1280 xe_assert(vm->xe, start + range <= vm->size);
1281
1282 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1283
1284 return gpuva ? gpuva_to_vma(gpuva) : NULL;
1285 }
1286
xe_vm_insert_vma(struct xe_vm * vm,struct xe_vma * vma)1287 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1288 {
1289 int err;
1290
1291 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1292 lockdep_assert_held(&vm->lock);
1293
1294 mutex_lock(&vm->snap_mutex);
1295 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1296 mutex_unlock(&vm->snap_mutex);
1297 XE_WARN_ON(err); /* Shouldn't be possible */
1298
1299 return err;
1300 }
1301
xe_vm_remove_vma(struct xe_vm * vm,struct xe_vma * vma)1302 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1303 {
1304 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1305 lockdep_assert_held(&vm->lock);
1306
1307 mutex_lock(&vm->snap_mutex);
1308 drm_gpuva_remove(&vma->gpuva);
1309 mutex_unlock(&vm->snap_mutex);
1310 if (vm->usm.last_fault_vma == vma)
1311 vm->usm.last_fault_vma = NULL;
1312 }
1313
xe_vm_op_alloc(void)1314 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1315 {
1316 struct xe_vma_op *op;
1317
1318 op = kzalloc_obj(*op);
1319
1320 if (unlikely(!op))
1321 return NULL;
1322
1323 return &op->base;
1324 }
1325
1326 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1327
1328 static const struct drm_gpuvm_ops gpuvm_ops = {
1329 .op_alloc = xe_vm_op_alloc,
1330 .vm_bo_validate = xe_gpuvm_validate,
1331 .vm_free = xe_vm_free,
1332 };
1333
pde_encode_pat_index(u16 pat_index)1334 static u64 pde_encode_pat_index(u16 pat_index)
1335 {
1336 u64 pte = 0;
1337
1338 if (pat_index & BIT(0))
1339 pte |= XE_PPGTT_PTE_PAT0;
1340
1341 if (pat_index & BIT(1))
1342 pte |= XE_PPGTT_PTE_PAT1;
1343
1344 return pte;
1345 }
1346
pte_encode_pat_index(u16 pat_index,u32 pt_level)1347 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
1348 {
1349 u64 pte = 0;
1350
1351 if (pat_index & BIT(0))
1352 pte |= XE_PPGTT_PTE_PAT0;
1353
1354 if (pat_index & BIT(1))
1355 pte |= XE_PPGTT_PTE_PAT1;
1356
1357 if (pat_index & BIT(2)) {
1358 if (pt_level)
1359 pte |= XE_PPGTT_PDE_PDPE_PAT2;
1360 else
1361 pte |= XE_PPGTT_PTE_PAT2;
1362 }
1363
1364 if (pat_index & BIT(3))
1365 pte |= XELPG_PPGTT_PTE_PAT3;
1366
1367 if (pat_index & (BIT(4)))
1368 pte |= XE2_PPGTT_PTE_PAT4;
1369
1370 return pte;
1371 }
1372
pte_encode_ps(u32 pt_level)1373 static u64 pte_encode_ps(u32 pt_level)
1374 {
1375 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1376
1377 if (pt_level == 1)
1378 return XE_PDE_PS_2M;
1379 else if (pt_level == 2)
1380 return XE_PDPE_PS_1G;
1381
1382 return 0;
1383 }
1384
pde_pat_index(struct xe_bo * bo)1385 static u16 pde_pat_index(struct xe_bo *bo)
1386 {
1387 struct xe_device *xe = xe_bo_device(bo);
1388 u16 pat_index;
1389
1390 /*
1391 * We only have two bits to encode the PAT index in non-leaf nodes, but
1392 * these only point to other paging structures so we only need a minimal
1393 * selection of options. The user PAT index is only for encoding leaf
1394 * nodes, where we have use of more bits to do the encoding. The
1395 * non-leaf nodes are instead under driver control so the chosen index
1396 * here should be distinct from the user PAT index. Also the
1397 * corresponding coherency of the PAT index should be tied to the
1398 * allocation type of the page table (or at least we should pick
1399 * something which is always safe).
1400 */
1401 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached)
1402 pat_index = xe->pat.idx[XE_CACHE_WB];
1403 else
1404 pat_index = xe->pat.idx[XE_CACHE_NONE];
1405
1406 xe_assert(xe, pat_index <= 3);
1407
1408 return pat_index;
1409 }
1410
xelp_pde_encode_bo(struct xe_bo * bo,u64 bo_offset)1411 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset)
1412 {
1413 u64 pde;
1414
1415 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1416 pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1417 pde |= pde_encode_pat_index(pde_pat_index(bo));
1418
1419 return pde;
1420 }
1421
xelp_pte_encode_bo(struct xe_bo * bo,u64 bo_offset,u16 pat_index,u32 pt_level)1422 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1423 u16 pat_index, u32 pt_level)
1424 {
1425 u64 pte;
1426
1427 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1428 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1429 pte |= pte_encode_pat_index(pat_index, pt_level);
1430 pte |= pte_encode_ps(pt_level);
1431
1432 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1433 pte |= XE_PPGTT_PTE_DM;
1434
1435 return pte;
1436 }
1437
xelp_pte_encode_vma(u64 pte,struct xe_vma * vma,u16 pat_index,u32 pt_level)1438 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1439 u16 pat_index, u32 pt_level)
1440 {
1441 struct xe_bo *bo = xe_vma_bo(vma);
1442 struct xe_vm *vm = xe_vma_vm(vma);
1443
1444 pte |= XE_PAGE_PRESENT;
1445
1446 if (likely(!xe_vma_read_only(vma)))
1447 pte |= XE_PAGE_RW;
1448
1449 pte |= pte_encode_pat_index(pat_index, pt_level);
1450 pte |= pte_encode_ps(pt_level);
1451
1452 /*
1453 * NULL PTEs redirect to scratch page (return zeros on read).
1454 * Set for: 1) explicit null VMAs, 2) purged BOs on scratch VMs.
1455 * Never set NULL flag without scratch page - causes undefined behavior.
1456 */
1457 if (unlikely(xe_vma_is_null(vma) ||
1458 (bo && xe_bo_is_purged(bo) && xe_vm_has_scratch(vm))))
1459 pte |= XE_PTE_NULL;
1460
1461 return pte;
1462 }
1463
xelp_pte_encode_addr(struct xe_device * xe,u64 addr,u16 pat_index,u32 pt_level,bool devmem,u64 flags)1464 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1465 u16 pat_index,
1466 u32 pt_level, bool devmem, u64 flags)
1467 {
1468 u64 pte;
1469
1470 /* Avoid passing random bits directly as flags */
1471 xe_assert(xe, !(flags & ~XE_PTE_PS64));
1472
1473 pte = addr;
1474 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1475 pte |= pte_encode_pat_index(pat_index, pt_level);
1476 pte |= pte_encode_ps(pt_level);
1477
1478 if (devmem)
1479 pte |= XE_PPGTT_PTE_DM;
1480
1481 pte |= flags;
1482
1483 return pte;
1484 }
1485
1486 static const struct xe_pt_ops xelp_pt_ops = {
1487 .pte_encode_bo = xelp_pte_encode_bo,
1488 .pte_encode_vma = xelp_pte_encode_vma,
1489 .pte_encode_addr = xelp_pte_encode_addr,
1490 .pde_encode_bo = xelp_pde_encode_bo,
1491 };
1492
1493 static void vm_destroy_work_func(struct work_struct *w);
1494
1495 /**
1496 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1497 * given tile and vm.
1498 * @xe: xe device.
1499 * @tile: tile to set up for.
1500 * @vm: vm to set up for.
1501 * @exec: The struct drm_exec object used to lock the vm resv.
1502 *
1503 * Sets up a pagetable tree with one page-table per level and a single
1504 * leaf PTE. All pagetable entries point to the single page-table or,
1505 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1506 * writes become NOPs.
1507 *
1508 * Return: 0 on success, negative error code on error.
1509 */
xe_vm_create_scratch(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,struct drm_exec * exec)1510 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1511 struct xe_vm *vm, struct drm_exec *exec)
1512 {
1513 u8 id = tile->id;
1514 int i;
1515
1516 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1517 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec);
1518 if (IS_ERR(vm->scratch_pt[id][i])) {
1519 int err = PTR_ERR(vm->scratch_pt[id][i]);
1520
1521 vm->scratch_pt[id][i] = NULL;
1522 return err;
1523 }
1524 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1525 }
1526
1527 return 0;
1528 }
1529 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO);
1530
xe_vm_free_scratch(struct xe_vm * vm)1531 static void xe_vm_free_scratch(struct xe_vm *vm)
1532 {
1533 struct xe_tile *tile;
1534 u8 id;
1535
1536 if (!xe_vm_has_scratch(vm))
1537 return;
1538
1539 for_each_tile(tile, vm->xe, id) {
1540 u32 i;
1541
1542 if (!vm->pt_root[id])
1543 continue;
1544
1545 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1546 if (vm->scratch_pt[id][i])
1547 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1548 }
1549 }
1550
xe_vm_pt_destroy(struct xe_vm * vm)1551 static void xe_vm_pt_destroy(struct xe_vm *vm)
1552 {
1553 struct xe_tile *tile;
1554 u8 id;
1555
1556 xe_vm_assert_held(vm);
1557
1558 for_each_tile(tile, vm->xe, id) {
1559 if (vm->pt_root[id]) {
1560 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1561 vm->pt_root[id] = NULL;
1562 }
1563 }
1564 }
1565
xe_vm_init_prove_locking(struct xe_device * xe,struct xe_vm * vm)1566 static void xe_vm_init_prove_locking(struct xe_device *xe, struct xe_vm *vm)
1567 {
1568 if (!IS_ENABLED(CONFIG_PROVE_LOCKING))
1569 return;
1570
1571 fs_reclaim_acquire(GFP_KERNEL);
1572 might_lock(&vm->exec_queues.lock);
1573 fs_reclaim_release(GFP_KERNEL);
1574
1575 down_read(&vm->exec_queues.lock);
1576 might_lock(&xe_root_mmio_gt(xe)->uc.guc.ct.lock);
1577 up_read(&vm->exec_queues.lock);
1578 }
1579
xe_vm_create(struct xe_device * xe,u32 flags,struct xe_file * xef)1580 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
1581 {
1582 struct drm_gem_object *vm_resv_obj;
1583 struct xe_validation_ctx ctx;
1584 struct drm_exec exec;
1585 struct xe_vm *vm;
1586 int err;
1587 struct xe_tile *tile;
1588 u8 id;
1589
1590 /*
1591 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to
1592 * ever be in faulting mode.
1593 */
1594 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE)));
1595
1596 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1597 if (!vm)
1598 return ERR_PTR(-ENOMEM);
1599
1600 vm->xe = xe;
1601
1602 vm->size = 1ull << xe->info.va_bits;
1603 vm->flags = flags;
1604
1605 if (xef)
1606 vm->xef = xe_file_get(xef);
1607 /**
1608 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be
1609 * manipulated under the PXP mutex. However, the PXP mutex can be taken
1610 * under a user-VM lock when the PXP session is started at exec_queue
1611 * creation time. Those are different VMs and therefore there is no risk
1612 * of deadlock, but we need to tell lockdep that this is the case or it
1613 * will print a warning.
1614 */
1615 if (flags & XE_VM_FLAG_GSC) {
1616 static struct lock_class_key gsc_vm_key;
1617
1618 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key);
1619 } else {
1620 init_rwsem(&vm->lock);
1621 }
1622 mutex_init(&vm->snap_mutex);
1623
1624 INIT_LIST_HEAD(&vm->rebind_list);
1625
1626 INIT_LIST_HEAD(&vm->userptr.repin_list);
1627 INIT_LIST_HEAD(&vm->userptr.invalidated);
1628 spin_lock_init(&vm->userptr.invalidated_lock);
1629
1630 INIT_LIST_HEAD(&vm->faults.list);
1631 spin_lock_init(&vm->faults.lock);
1632
1633 ttm_lru_bulk_move_init(&vm->lru_bulk_move);
1634
1635 INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1636
1637 INIT_LIST_HEAD(&vm->preempt.exec_queues);
1638 for (id = 0; id < XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE; ++id)
1639 INIT_LIST_HEAD(&vm->exec_queues.list[id]);
1640 if (flags & XE_VM_FLAG_FAULT_MODE)
1641 vm->preempt.min_run_period_ms = xe->min_run_period_pf_ms;
1642 else
1643 vm->preempt.min_run_period_ms = xe->min_run_period_lr_ms;
1644
1645 init_rwsem(&vm->exec_queues.lock);
1646 xe_vm_init_prove_locking(xe, vm);
1647
1648 for_each_tile(tile, xe, id)
1649 xe_range_fence_tree_init(&vm->rftree[id]);
1650
1651 vm->pt_ops = &xelp_pt_ops;
1652
1653 /*
1654 * Long-running workloads are not protected by the scheduler references.
1655 * By design, run_job for long-running workloads returns NULL and the
1656 * scheduler drops all the references of it, hence protecting the VM
1657 * for this case is necessary.
1658 */
1659 if (flags & XE_VM_FLAG_LR_MODE) {
1660 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1661 xe_pm_runtime_get_noresume(xe);
1662 INIT_LIST_HEAD(&vm->preempt.pm_activate_link);
1663 }
1664
1665 err = xe_svm_init(vm);
1666 if (err)
1667 goto err_no_resv;
1668
1669 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1670 if (!vm_resv_obj) {
1671 err = -ENOMEM;
1672 goto err_svm_fini;
1673 }
1674
1675 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1676 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1677
1678 drm_gem_object_put(vm_resv_obj);
1679
1680 err = 0;
1681 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
1682 err) {
1683 err = xe_vm_drm_exec_lock(vm, &exec);
1684 drm_exec_retry_on_contention(&exec);
1685
1686 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1687 vm->flags |= XE_VM_FLAG_64K;
1688
1689 for_each_tile(tile, xe, id) {
1690 if (flags & XE_VM_FLAG_MIGRATION &&
1691 tile->id != XE_VM_FLAG_TILE_ID(flags))
1692 continue;
1693
1694 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level,
1695 &exec);
1696 if (IS_ERR(vm->pt_root[id])) {
1697 err = PTR_ERR(vm->pt_root[id]);
1698 vm->pt_root[id] = NULL;
1699 xe_vm_pt_destroy(vm);
1700 drm_exec_retry_on_contention(&exec);
1701 xe_validation_retry_on_oom(&ctx, &err);
1702 break;
1703 }
1704 }
1705 if (err)
1706 break;
1707
1708 if (xe_vm_has_scratch(vm)) {
1709 for_each_tile(tile, xe, id) {
1710 if (!vm->pt_root[id])
1711 continue;
1712
1713 err = xe_vm_create_scratch(xe, tile, vm, &exec);
1714 if (err) {
1715 xe_vm_free_scratch(vm);
1716 xe_vm_pt_destroy(vm);
1717 drm_exec_retry_on_contention(&exec);
1718 xe_validation_retry_on_oom(&ctx, &err);
1719 break;
1720 }
1721 }
1722 if (err)
1723 break;
1724 vm->batch_invalidate_tlb = true;
1725 }
1726
1727 if (vm->flags & XE_VM_FLAG_LR_MODE) {
1728 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1729 vm->batch_invalidate_tlb = false;
1730 }
1731
1732 /* Fill pt_root after allocating scratch tables */
1733 for_each_tile(tile, xe, id) {
1734 if (!vm->pt_root[id])
1735 continue;
1736
1737 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1738 }
1739 }
1740 if (err)
1741 goto err_close;
1742
1743 /* Kernel migration VM shouldn't have a circular loop.. */
1744 if (!(flags & XE_VM_FLAG_MIGRATION)) {
1745 for_each_tile(tile, xe, id) {
1746 struct xe_exec_queue *q;
1747 u32 create_flags = EXEC_QUEUE_FLAG_VM;
1748
1749 if (!vm->pt_root[id])
1750 continue;
1751
1752 if (!xef) /* Not from userspace */
1753 create_flags |= EXEC_QUEUE_FLAG_KERNEL;
1754
1755 q = xe_exec_queue_create_bind(xe, tile, vm, create_flags, 0);
1756 if (IS_ERR(q)) {
1757 err = PTR_ERR(q);
1758 goto err_close;
1759 }
1760 vm->q[id] = q;
1761 }
1762 }
1763
1764 if (xef && xe->info.has_asid) {
1765 u32 asid;
1766
1767 down_write(&xe->usm.lock);
1768 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1769 XA_LIMIT(1, XE_MAX_ASID - 1),
1770 &xe->usm.next_asid, GFP_NOWAIT);
1771 up_write(&xe->usm.lock);
1772 if (err < 0)
1773 goto err_close;
1774
1775 vm->usm.asid = asid;
1776 }
1777
1778 trace_xe_vm_create(vm);
1779
1780 return vm;
1781
1782 err_close:
1783 xe_vm_close_and_put(vm);
1784 return ERR_PTR(err);
1785
1786 err_svm_fini:
1787 if (flags & XE_VM_FLAG_FAULT_MODE) {
1788 vm->size = 0; /* close the vm */
1789 xe_svm_fini(vm);
1790 }
1791 err_no_resv:
1792 mutex_destroy(&vm->snap_mutex);
1793 for_each_tile(tile, xe, id)
1794 xe_range_fence_tree_fini(&vm->rftree[id]);
1795 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1796 if (vm->xef)
1797 xe_file_put(vm->xef);
1798 kfree(vm);
1799 if (flags & XE_VM_FLAG_LR_MODE)
1800 xe_pm_runtime_put(xe);
1801 return ERR_PTR(err);
1802 }
1803
xe_vm_close(struct xe_vm * vm)1804 static void xe_vm_close(struct xe_vm *vm)
1805 {
1806 struct xe_device *xe = vm->xe;
1807 bool bound;
1808 int idx;
1809
1810 bound = drm_dev_enter(&xe->drm, &idx);
1811
1812 down_write(&vm->lock);
1813 if (xe_vm_in_fault_mode(vm))
1814 xe_svm_notifier_lock(vm);
1815
1816 vm->size = 0;
1817
1818 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) {
1819 struct xe_tile *tile;
1820 struct xe_gt *gt;
1821 u8 id;
1822
1823 /* Wait for pending binds */
1824 dma_resv_wait_timeout(xe_vm_resv(vm),
1825 DMA_RESV_USAGE_BOOKKEEP,
1826 false, MAX_SCHEDULE_TIMEOUT);
1827
1828 if (bound) {
1829 for_each_tile(tile, xe, id)
1830 if (vm->pt_root[id])
1831 xe_pt_clear(xe, vm->pt_root[id]);
1832
1833 for_each_gt(gt, xe, id)
1834 xe_tlb_inval_vm(>->tlb_inval, vm);
1835 }
1836 }
1837
1838 if (xe_vm_in_fault_mode(vm))
1839 xe_svm_notifier_unlock(vm);
1840 up_write(&vm->lock);
1841
1842 if (bound)
1843 drm_dev_exit(idx);
1844 }
1845
xe_vm_close_and_put(struct xe_vm * vm)1846 void xe_vm_close_and_put(struct xe_vm *vm)
1847 {
1848 LIST_HEAD(contested);
1849 struct xe_device *xe = vm->xe;
1850 struct xe_tile *tile;
1851 struct xe_vma *vma, *next_vma;
1852 struct drm_gpuva *gpuva, *next;
1853 u8 id;
1854
1855 xe_assert(xe, !vm->preempt.num_exec_queues);
1856
1857 xe_vm_close(vm);
1858 if (xe_vm_in_preempt_fence_mode(vm)) {
1859 mutex_lock(&xe->rebind_resume_lock);
1860 list_del_init(&vm->preempt.pm_activate_link);
1861 mutex_unlock(&xe->rebind_resume_lock);
1862 flush_work(&vm->preempt.rebind_work);
1863 }
1864 if (xe_vm_in_fault_mode(vm))
1865 xe_svm_close(vm);
1866
1867 down_write(&vm->lock);
1868 for_each_tile(tile, xe, id) {
1869 if (vm->q[id]) {
1870 int i;
1871
1872 xe_exec_queue_last_fence_put(vm->q[id], vm);
1873 for_each_tlb_inval(i)
1874 xe_exec_queue_tlb_inval_last_fence_put(vm->q[id], vm, i);
1875 }
1876 }
1877 up_write(&vm->lock);
1878
1879 for_each_tile(tile, xe, id) {
1880 if (vm->q[id]) {
1881 xe_exec_queue_kill(vm->q[id]);
1882 xe_exec_queue_put(vm->q[id]);
1883 vm->q[id] = NULL;
1884 }
1885 }
1886
1887 down_write(&vm->lock);
1888 xe_vm_lock(vm, false);
1889 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1890 vma = gpuva_to_vma(gpuva);
1891
1892 if (xe_vma_has_no_bo(vma)) {
1893 xe_svm_notifier_lock(vm);
1894 vma->gpuva.flags |= XE_VMA_DESTROYED;
1895 xe_svm_notifier_unlock(vm);
1896 }
1897
1898 xe_vm_remove_vma(vm, vma);
1899
1900 /* easy case, remove from VMA? */
1901 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1902 list_del_init(&vma->combined_links.rebind);
1903 xe_vma_destroy(vma, NULL);
1904 continue;
1905 }
1906
1907 list_move_tail(&vma->combined_links.destroy, &contested);
1908 vma->gpuva.flags |= XE_VMA_DESTROYED;
1909 }
1910
1911 /*
1912 * All vm operations will add shared fences to resv.
1913 * The only exception is eviction for a shared object,
1914 * but even so, the unbind when evicted would still
1915 * install a fence to resv. Hence it's safe to
1916 * destroy the pagetables immediately.
1917 */
1918 xe_vm_free_scratch(vm);
1919 xe_vm_pt_destroy(vm);
1920 xe_vm_unlock(vm);
1921
1922 /*
1923 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1924 * Since we hold a refcount to the bo, we can remove and free
1925 * the members safely without locking.
1926 */
1927 list_for_each_entry_safe(vma, next_vma, &contested,
1928 combined_links.destroy) {
1929 list_del_init(&vma->combined_links.destroy);
1930 xe_vma_destroy_unlocked(vma);
1931 }
1932
1933 xe_svm_fini(vm);
1934
1935 up_write(&vm->lock);
1936
1937 down_write(&xe->usm.lock);
1938 if (vm->usm.asid) {
1939 void *lookup;
1940
1941 xe_assert(xe, xe->info.has_asid);
1942 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
1943
1944 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1945 xe_assert(xe, lookup == vm);
1946 }
1947 up_write(&xe->usm.lock);
1948
1949 xe_vm_clear_fault_entries(vm);
1950
1951 for_each_tile(tile, xe, id)
1952 xe_range_fence_tree_fini(&vm->rftree[id]);
1953
1954 xe_vm_put(vm);
1955 }
1956
vm_destroy_work_func(struct work_struct * w)1957 static void vm_destroy_work_func(struct work_struct *w)
1958 {
1959 struct xe_vm *vm =
1960 container_of(w, struct xe_vm, destroy_work);
1961 struct xe_device *xe = vm->xe;
1962 struct xe_tile *tile;
1963 u8 id;
1964
1965 /* xe_vm_close_and_put was not called? */
1966 xe_assert(xe, !vm->size);
1967
1968 if (xe_vm_in_preempt_fence_mode(vm))
1969 flush_work(&vm->preempt.rebind_work);
1970
1971 mutex_destroy(&vm->snap_mutex);
1972
1973 if (vm->flags & XE_VM_FLAG_LR_MODE)
1974 xe_pm_runtime_put(xe);
1975
1976 for_each_tile(tile, xe, id)
1977 XE_WARN_ON(vm->pt_root[id]);
1978
1979 trace_xe_vm_free(vm);
1980
1981 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1982
1983 if (vm->xef)
1984 xe_file_put(vm->xef);
1985
1986 kfree(vm);
1987 }
1988
xe_vm_free(struct drm_gpuvm * gpuvm)1989 static void xe_vm_free(struct drm_gpuvm *gpuvm)
1990 {
1991 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1992
1993 /* To destroy the VM we need to be able to sleep */
1994 queue_work(system_dfl_wq, &vm->destroy_work);
1995 }
1996
xe_vm_lookup(struct xe_file * xef,u32 id)1997 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1998 {
1999 struct xe_vm *vm;
2000
2001 mutex_lock(&xef->vm.lock);
2002 vm = xa_load(&xef->vm.xa, id);
2003 if (vm)
2004 xe_vm_get(vm);
2005 mutex_unlock(&xef->vm.lock);
2006
2007 return vm;
2008 }
2009
xe_vm_pdp4_descriptor(struct xe_vm * vm,struct xe_tile * tile)2010 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
2011 {
2012 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0);
2013 }
2014
2015 static struct xe_exec_queue *
to_wait_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)2016 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
2017 {
2018 return q ? q : vm->q[0];
2019 }
2020
2021 static struct xe_user_fence *
find_ufence_get(struct xe_sync_entry * syncs,u32 num_syncs)2022 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
2023 {
2024 unsigned int i;
2025
2026 for (i = 0; i < num_syncs; i++) {
2027 struct xe_sync_entry *e = &syncs[i];
2028
2029 if (xe_sync_is_ufence(e))
2030 return xe_sync_ufence_get(e);
2031 }
2032
2033 return NULL;
2034 }
2035
2036 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
2037 DRM_XE_VM_CREATE_FLAG_LR_MODE | \
2038 DRM_XE_VM_CREATE_FLAG_FAULT_MODE | \
2039 DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT)
2040
xe_vm_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2041 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
2042 struct drm_file *file)
2043 {
2044 struct xe_device *xe = to_xe_device(dev);
2045 struct xe_file *xef = to_xe_file(file);
2046 struct drm_xe_vm_create *args = data;
2047 struct xe_gt *wa_gt = xe_root_mmio_gt(xe);
2048 struct xe_vm *vm;
2049 u32 id;
2050 int err;
2051 u32 flags = 0;
2052
2053 if (XE_IOCTL_DBG(xe, args->extensions))
2054 return -EINVAL;
2055
2056 if (wa_gt && XE_GT_WA(wa_gt, 22014953428))
2057 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
2058
2059 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
2060 !xe->info.has_usm))
2061 return -EINVAL;
2062
2063 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2064 return -EINVAL;
2065
2066 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
2067 return -EINVAL;
2068
2069 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
2070 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
2071 !xe->info.needs_scratch))
2072 return -EINVAL;
2073
2074 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
2075 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
2076 return -EINVAL;
2077
2078 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) &&
2079 args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT))
2080 return -EINVAL;
2081
2082 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
2083 flags |= XE_VM_FLAG_SCRATCH_PAGE;
2084 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
2085 flags |= XE_VM_FLAG_LR_MODE;
2086 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
2087 flags |= XE_VM_FLAG_FAULT_MODE;
2088 if (args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT)
2089 flags |= XE_VM_FLAG_NO_VM_OVERCOMMIT;
2090
2091 vm = xe_vm_create(xe, flags, xef);
2092 if (IS_ERR(vm))
2093 return PTR_ERR(vm);
2094
2095 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
2096 /* Warning: Security issue - never enable by default */
2097 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
2098 #endif
2099
2100 /* user id alloc must always be last in ioctl to prevent UAF */
2101 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
2102 if (err)
2103 goto err_close_and_put;
2104
2105 args->vm_id = id;
2106
2107 return 0;
2108
2109 err_close_and_put:
2110 xe_vm_close_and_put(vm);
2111
2112 return err;
2113 }
2114
xe_vm_destroy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2115 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
2116 struct drm_file *file)
2117 {
2118 struct xe_device *xe = to_xe_device(dev);
2119 struct xe_file *xef = to_xe_file(file);
2120 struct drm_xe_vm_destroy *args = data;
2121 struct xe_vm *vm;
2122 int err = 0;
2123
2124 if (XE_IOCTL_DBG(xe, args->pad) ||
2125 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2126 return -EINVAL;
2127
2128 mutex_lock(&xef->vm.lock);
2129 vm = xa_load(&xef->vm.xa, args->vm_id);
2130 if (XE_IOCTL_DBG(xe, !vm))
2131 err = -ENOENT;
2132 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
2133 err = -EBUSY;
2134 else
2135 xa_erase(&xef->vm.xa, args->vm_id);
2136 mutex_unlock(&xef->vm.lock);
2137
2138 if (!err)
2139 xe_vm_close_and_put(vm);
2140
2141 return err;
2142 }
2143
xe_vm_query_vmas(struct xe_vm * vm,u64 start,u64 end)2144 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end)
2145 {
2146 struct drm_gpuva *gpuva;
2147 u32 num_vmas = 0;
2148
2149 lockdep_assert_held(&vm->lock);
2150 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end)
2151 num_vmas++;
2152
2153 return num_vmas;
2154 }
2155
get_mem_attrs(struct xe_vm * vm,u32 * num_vmas,u64 start,u64 end,struct drm_xe_mem_range_attr * attrs)2156 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start,
2157 u64 end, struct drm_xe_mem_range_attr *attrs)
2158 {
2159 struct drm_gpuva *gpuva;
2160 int i = 0;
2161
2162 lockdep_assert_held(&vm->lock);
2163
2164 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
2165 struct xe_vma *vma = gpuva_to_vma(gpuva);
2166
2167 if (i == *num_vmas)
2168 return -ENOSPC;
2169
2170 attrs[i].start = xe_vma_start(vma);
2171 attrs[i].end = xe_vma_end(vma);
2172 attrs[i].atomic.val = vma->attr.atomic_access;
2173 attrs[i].pat_index.val = vma->attr.pat_index;
2174 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd;
2175 attrs[i].preferred_mem_loc.migration_policy =
2176 vma->attr.preferred_loc.migration_policy;
2177
2178 i++;
2179 }
2180
2181 *num_vmas = i;
2182 return 0;
2183 }
2184
xe_vm_query_vmas_attrs_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2185 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2186 {
2187 struct xe_device *xe = to_xe_device(dev);
2188 struct xe_file *xef = to_xe_file(file);
2189 struct drm_xe_mem_range_attr *mem_attrs;
2190 struct drm_xe_vm_query_mem_range_attr *args = data;
2191 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2192 struct xe_vm *vm;
2193 int err = 0;
2194
2195 if (XE_IOCTL_DBG(xe,
2196 ((args->num_mem_ranges == 0 &&
2197 (attrs_user || args->sizeof_mem_range_attr != 0)) ||
2198 (args->num_mem_ranges > 0 &&
2199 (!attrs_user ||
2200 args->sizeof_mem_range_attr !=
2201 sizeof(struct drm_xe_mem_range_attr))))))
2202 return -EINVAL;
2203
2204 vm = xe_vm_lookup(xef, args->vm_id);
2205 if (XE_IOCTL_DBG(xe, !vm))
2206 return -EINVAL;
2207
2208 err = down_read_interruptible(&vm->lock);
2209 if (err)
2210 goto put_vm;
2211
2212 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2213
2214 if (args->num_mem_ranges == 0 && !attrs_user) {
2215 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range);
2216 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr);
2217 goto unlock_vm;
2218 }
2219
2220 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr,
2221 GFP_KERNEL | __GFP_ACCOUNT |
2222 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
2223 if (!mem_attrs) {
2224 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM;
2225 goto unlock_vm;
2226 }
2227
2228 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr);
2229 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start,
2230 args->start + args->range, mem_attrs);
2231 if (err)
2232 goto free_mem_attrs;
2233
2234 err = copy_to_user(attrs_user, mem_attrs,
2235 args->sizeof_mem_range_attr * args->num_mem_ranges);
2236 if (err)
2237 err = -EFAULT;
2238
2239 free_mem_attrs:
2240 kvfree(mem_attrs);
2241 unlock_vm:
2242 up_read(&vm->lock);
2243 put_vm:
2244 xe_vm_put(vm);
2245 return err;
2246 }
2247
vma_matches(struct xe_vma * vma,u64 page_addr)2248 static bool vma_matches(struct xe_vma *vma, u64 page_addr)
2249 {
2250 if (page_addr > xe_vma_end(vma) - 1 ||
2251 page_addr + SZ_4K - 1 < xe_vma_start(vma))
2252 return false;
2253
2254 return true;
2255 }
2256
2257 /**
2258 * xe_vm_find_vma_by_addr() - Find a VMA by its address
2259 *
2260 * @vm: the xe_vm the vma belongs to
2261 * @page_addr: address to look up
2262 */
xe_vm_find_vma_by_addr(struct xe_vm * vm,u64 page_addr)2263 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr)
2264 {
2265 struct xe_vma *vma = NULL;
2266
2267 if (vm->usm.last_fault_vma) { /* Fast lookup */
2268 if (vma_matches(vm->usm.last_fault_vma, page_addr))
2269 vma = vm->usm.last_fault_vma;
2270 }
2271 if (!vma)
2272 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);
2273
2274 return vma;
2275 }
2276
2277 static const u32 region_to_mem_type[] = {
2278 XE_PL_TT,
2279 XE_PL_VRAM0,
2280 XE_PL_VRAM1,
2281 };
2282
prep_vma_destroy(struct xe_vm * vm,struct xe_vma * vma,bool post_commit)2283 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
2284 bool post_commit)
2285 {
2286 xe_svm_notifier_lock(vm);
2287 vma->gpuva.flags |= XE_VMA_DESTROYED;
2288 xe_svm_notifier_unlock(vm);
2289 if (post_commit)
2290 xe_vm_remove_vma(vm, vma);
2291 }
2292
2293 #undef ULL
2294 #define ULL unsigned long long
2295
2296 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2297 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2298 {
2299 struct xe_vma *vma;
2300
2301 switch (op->op) {
2302 case DRM_GPUVA_OP_MAP:
2303 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2304 (ULL)op->map.va.addr, (ULL)op->map.va.range);
2305 break;
2306 case DRM_GPUVA_OP_REMAP:
2307 vma = gpuva_to_vma(op->remap.unmap->va);
2308 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2309 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2310 op->remap.unmap->keep ? 1 : 0);
2311 if (op->remap.prev)
2312 vm_dbg(&xe->drm,
2313 "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2314 (ULL)op->remap.prev->va.addr,
2315 (ULL)op->remap.prev->va.range);
2316 if (op->remap.next)
2317 vm_dbg(&xe->drm,
2318 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2319 (ULL)op->remap.next->va.addr,
2320 (ULL)op->remap.next->va.range);
2321 break;
2322 case DRM_GPUVA_OP_UNMAP:
2323 vma = gpuva_to_vma(op->unmap.va);
2324 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2325 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2326 op->unmap.keep ? 1 : 0);
2327 break;
2328 case DRM_GPUVA_OP_PREFETCH:
2329 vma = gpuva_to_vma(op->prefetch.va);
2330 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2331 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2332 break;
2333 default:
2334 drm_warn(&xe->drm, "NOT POSSIBLE\n");
2335 }
2336 }
2337 #else
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2338 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2339 {
2340 }
2341 #endif
2342
__xe_vm_needs_clear_scratch_pages(struct xe_vm * vm,u32 bind_flags)2343 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
2344 {
2345 if (!xe_vm_in_fault_mode(vm))
2346 return false;
2347
2348 if (!xe_vm_has_scratch(vm))
2349 return false;
2350
2351 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE)
2352 return false;
2353
2354 return true;
2355 }
2356
xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops * ops)2357 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops)
2358 {
2359 struct drm_gpuva_op *__op;
2360
2361 drm_gpuva_for_each_op(__op, ops) {
2362 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2363
2364 xe_vma_svm_prefetch_op_fini(op);
2365 }
2366 }
2367
2368 /*
2369 * Create operations list from IOCTL arguments, setup operations fields so parse
2370 * and commit steps are decoupled from IOCTL arguments. This step can fail.
2371 */
2372 static struct drm_gpuva_ops *
vm_bind_ioctl_ops_create(struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 addr,u64 range,u32 operation,u32 flags,u32 prefetch_region,u16 pat_index)2373 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
2374 struct xe_bo *bo, u64 bo_offset_or_userptr,
2375 u64 addr, u64 range,
2376 u32 operation, u32 flags,
2377 u32 prefetch_region, u16 pat_index)
2378 {
2379 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2380 struct drm_gpuva_ops *ops;
2381 struct drm_gpuva_op *__op;
2382 struct drm_gpuvm_bo *vm_bo;
2383 u64 range_start = addr;
2384 u64 range_end = addr + range;
2385 int err;
2386
2387 lockdep_assert_held_write(&vm->lock);
2388
2389 vm_dbg(&vm->xe->drm,
2390 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2391 operation, (ULL)addr, (ULL)range,
2392 (ULL)bo_offset_or_userptr);
2393
2394 switch (operation) {
2395 case DRM_XE_VM_BIND_OP_MAP:
2396 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) {
2397 xe_vm_find_cpu_addr_mirror_vma_range(vm, &range_start, &range_end);
2398 vops->flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP;
2399 }
2400
2401 fallthrough;
2402 case DRM_XE_VM_BIND_OP_MAP_USERPTR: {
2403 struct drm_gpuvm_map_req map_req = {
2404 .map.va.addr = range_start,
2405 .map.va.range = range_end - range_start,
2406 .map.gem.obj = obj,
2407 .map.gem.offset = bo_offset_or_userptr,
2408 };
2409
2410 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req);
2411 break;
2412 }
2413 case DRM_XE_VM_BIND_OP_UNMAP:
2414 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2415 break;
2416 case DRM_XE_VM_BIND_OP_PREFETCH:
2417 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2418 break;
2419 case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2420 xe_assert(vm->xe, bo);
2421
2422 err = xe_bo_lock(bo, true);
2423 if (err)
2424 return ERR_PTR(err);
2425
2426 vm_bo = drm_gpuvm_bo_obtain_locked(&vm->gpuvm, obj);
2427 if (IS_ERR(vm_bo)) {
2428 xe_bo_unlock(bo);
2429 return ERR_CAST(vm_bo);
2430 }
2431
2432 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2433 drm_gpuvm_bo_put(vm_bo);
2434 xe_bo_unlock(bo);
2435 break;
2436 default:
2437 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
2438 ops = ERR_PTR(-EINVAL);
2439 }
2440 if (IS_ERR(ops))
2441 return ops;
2442
2443 drm_gpuva_for_each_op(__op, ops) {
2444 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2445
2446 if (__op->op == DRM_GPUVA_OP_MAP) {
2447 op->map.immediate =
2448 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2449 if (flags & DRM_XE_VM_BIND_FLAG_READONLY)
2450 op->map.vma_flags |= XE_VMA_READ_ONLY;
2451 if (flags & DRM_XE_VM_BIND_FLAG_NULL)
2452 op->map.vma_flags |= DRM_GPUVA_SPARSE;
2453 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR)
2454 op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR;
2455 if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE)
2456 op->map.vma_flags |= XE_VMA_DUMPABLE;
2457 if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET)
2458 op->map.vma_flags |= XE_VMA_MADV_AUTORESET;
2459 op->map.request_decompress = flags & DRM_XE_VM_BIND_FLAG_DECOMPRESS;
2460 op->map.pat_index = pat_index;
2461 op->map.invalidate_on_bind =
2462 __xe_vm_needs_clear_scratch_pages(vm, flags);
2463 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2464 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2465 struct xe_tile *tile;
2466 struct xe_svm_range *svm_range;
2467 struct drm_gpusvm_ctx ctx = {};
2468 struct drm_pagemap *dpagemap = NULL;
2469 u8 id, tile_mask = 0;
2470 u32 i;
2471
2472 if (!xe_vma_is_cpu_addr_mirror(vma)) {
2473 op->prefetch.region = prefetch_region;
2474 break;
2475 }
2476
2477 ctx.read_only = xe_vma_read_only(vma);
2478 ctx.devmem_possible = IS_DGFX(vm->xe) &&
2479 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2480
2481 for_each_tile(tile, vm->xe, id)
2482 tile_mask |= 0x1 << id;
2483
2484 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC);
2485 op->prefetch_range.ranges_count = 0;
2486
2487 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) {
2488 dpagemap = xe_vma_resolve_pagemap(vma,
2489 xe_device_get_root_tile(vm->xe));
2490 } else if (prefetch_region) {
2491 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] -
2492 XE_PL_VRAM0];
2493 dpagemap = xe_tile_local_pagemap(tile);
2494 }
2495
2496 op->prefetch_range.dpagemap = dpagemap;
2497 alloc_next_range:
2498 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
2499
2500 if (PTR_ERR(svm_range) == -ENOENT) {
2501 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma);
2502
2503 addr = ret == ULONG_MAX ? 0 : ret;
2504 if (addr)
2505 goto alloc_next_range;
2506 else
2507 goto print_op_label;
2508 }
2509
2510 if (IS_ERR(svm_range)) {
2511 err = PTR_ERR(svm_range);
2512 goto unwind_prefetch_ops;
2513 }
2514
2515 if (xe_svm_range_validate(vm, svm_range, tile_mask, dpagemap)) {
2516 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID");
2517 goto check_next_range;
2518 }
2519
2520 err = xa_alloc(&op->prefetch_range.range,
2521 &i, svm_range, xa_limit_32b,
2522 GFP_KERNEL);
2523
2524 if (err)
2525 goto unwind_prefetch_ops;
2526
2527 op->prefetch_range.ranges_count++;
2528 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH;
2529 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED");
2530 check_next_range:
2531 if (range_end > xe_svm_range_end(svm_range) &&
2532 xe_svm_range_end(svm_range) < xe_vma_end(vma)) {
2533 addr = xe_svm_range_end(svm_range);
2534 goto alloc_next_range;
2535 }
2536 }
2537 print_op_label:
2538 print_op(vm->xe, __op);
2539 }
2540
2541 return ops;
2542
2543 unwind_prefetch_ops:
2544 xe_svm_prefetch_gpuva_ops_fini(ops);
2545 drm_gpuva_ops_free(&vm->gpuvm, ops);
2546 return ERR_PTR(err);
2547 }
2548
2549 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
2550
new_vma(struct xe_vm * vm,struct drm_gpuva_op_map * op,struct xe_vma_mem_attr * attr,unsigned int flags)2551 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2552 struct xe_vma_mem_attr *attr, unsigned int flags)
2553 {
2554 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2555 struct xe_validation_ctx ctx;
2556 struct drm_exec exec;
2557 struct xe_vma *vma;
2558 int err = 0;
2559
2560 lockdep_assert_held_write(&vm->lock);
2561
2562 if (bo) {
2563 err = 0;
2564 xe_validation_guard(&ctx, &vm->xe->val, &exec,
2565 (struct xe_val_flags) {.interruptible = true}, err) {
2566 if (!bo->vm) {
2567 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2568 drm_exec_retry_on_contention(&exec);
2569 }
2570 if (!err) {
2571 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2572 drm_exec_retry_on_contention(&exec);
2573 }
2574 if (err)
2575 return ERR_PTR(err);
2576
2577 vma = xe_vma_create(vm, bo, op->gem.offset,
2578 op->va.addr, op->va.addr +
2579 op->va.range - 1, attr, flags);
2580 if (IS_ERR(vma))
2581 return vma;
2582
2583 if (!bo->vm) {
2584 err = add_preempt_fences(vm, bo);
2585 if (err) {
2586 prep_vma_destroy(vm, vma, false);
2587 xe_vma_destroy(vma, NULL);
2588 }
2589 }
2590 }
2591 if (err)
2592 return ERR_PTR(err);
2593 } else {
2594 vma = xe_vma_create(vm, NULL, op->gem.offset,
2595 op->va.addr, op->va.addr +
2596 op->va.range - 1, attr, flags);
2597 if (IS_ERR(vma))
2598 return vma;
2599
2600 if (xe_vma_is_userptr(vma)) {
2601 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2602 /*
2603 * -EBUSY has dedicated meaning that a user fence
2604 * attached to the VMA is busy, in practice
2605 * xe_vma_userptr_pin_pages can only fail with -EBUSY if
2606 * we are low on memory so convert this to -ENOMEM.
2607 */
2608 if (err == -EBUSY)
2609 err = -ENOMEM;
2610 }
2611 }
2612 if (err) {
2613 prep_vma_destroy(vm, vma, false);
2614 xe_vma_destroy_unlocked(vma);
2615 vma = ERR_PTR(err);
2616 }
2617
2618 return vma;
2619 }
2620
xe_vma_max_pte_size(struct xe_vma * vma)2621 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2622 {
2623 if (vma->gpuva.flags & XE_VMA_PTE_1G)
2624 return SZ_1G;
2625 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2626 return SZ_2M;
2627 else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2628 return SZ_64K;
2629 else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2630 return SZ_4K;
2631
2632 return SZ_1G; /* Uninitialized, used max size */
2633 }
2634
xe_vma_set_pte_size(struct xe_vma * vma,u64 size)2635 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2636 {
2637 switch (size) {
2638 case SZ_1G:
2639 vma->gpuva.flags |= XE_VMA_PTE_1G;
2640 break;
2641 case SZ_2M:
2642 vma->gpuva.flags |= XE_VMA_PTE_2M;
2643 break;
2644 case SZ_64K:
2645 vma->gpuva.flags |= XE_VMA_PTE_64K;
2646 break;
2647 case SZ_4K:
2648 vma->gpuva.flags |= XE_VMA_PTE_4K;
2649 break;
2650 }
2651 }
2652
xe_vma_op_commit(struct xe_vm * vm,struct xe_vma_op * op)2653 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2654 {
2655 int err = 0;
2656
2657 lockdep_assert_held_write(&vm->lock);
2658
2659 switch (op->base.op) {
2660 case DRM_GPUVA_OP_MAP:
2661 err |= xe_vm_insert_vma(vm, op->map.vma);
2662 if (!err)
2663 op->flags |= XE_VMA_OP_COMMITTED;
2664 break;
2665 case DRM_GPUVA_OP_REMAP:
2666 {
2667 u8 tile_present =
2668 gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2669
2670 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2671 true);
2672 op->flags |= XE_VMA_OP_COMMITTED;
2673
2674 if (op->remap.prev) {
2675 err |= xe_vm_insert_vma(vm, op->remap.prev);
2676 if (!err)
2677 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2678 if (!err && op->remap.skip_prev) {
2679 op->remap.prev->tile_present =
2680 tile_present;
2681 }
2682 }
2683 if (op->remap.next) {
2684 err |= xe_vm_insert_vma(vm, op->remap.next);
2685 if (!err)
2686 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2687 if (!err && op->remap.skip_next) {
2688 op->remap.next->tile_present =
2689 tile_present;
2690 }
2691 }
2692
2693 /*
2694 * Adjust for partial unbind after removing VMA from VM. In case
2695 * of unwind we might need to undo this later.
2696 */
2697 if (!err) {
2698 op->base.remap.unmap->va->va.addr = op->remap.start;
2699 op->base.remap.unmap->va->va.range = op->remap.range;
2700 }
2701 break;
2702 }
2703 case DRM_GPUVA_OP_UNMAP:
2704 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2705 op->flags |= XE_VMA_OP_COMMITTED;
2706 break;
2707 case DRM_GPUVA_OP_PREFETCH:
2708 op->flags |= XE_VMA_OP_COMMITTED;
2709 break;
2710 default:
2711 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
2712 }
2713
2714 return err;
2715 }
2716
2717 /**
2718 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes
2719 * @vma: Pointer to the xe_vma structure to check
2720 *
2721 * This function determines whether the given VMA (Virtual Memory Area)
2722 * has its memory attributes set to their default values. Specifically,
2723 * it checks the following conditions:
2724 *
2725 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED`
2726 * - `pat_index` is equal to `default_pat_index`
2727 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE`
2728 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES`
2729 *
2730 * Return: true if all attributes are at their default values, false otherwise.
2731 */
xe_vma_has_default_mem_attrs(struct xe_vma * vma)2732 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma)
2733 {
2734 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED &&
2735 vma->attr.pat_index == vma->attr.default_pat_index &&
2736 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE &&
2737 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES);
2738 }
2739
vm_bind_ioctl_ops_parse(struct xe_vm * vm,struct drm_gpuva_ops * ops,struct xe_vma_ops * vops)2740 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
2741 struct xe_vma_ops *vops)
2742 {
2743 struct xe_device *xe = vm->xe;
2744 struct drm_gpuva_op *__op;
2745 struct xe_tile *tile;
2746 u8 id, tile_mask = 0;
2747 int err = 0;
2748
2749 lockdep_assert_held_write(&vm->lock);
2750
2751 for_each_tile(tile, vm->xe, id)
2752 tile_mask |= 0x1 << id;
2753
2754 drm_gpuva_for_each_op(__op, ops) {
2755 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2756 struct xe_vma *vma;
2757 unsigned int flags = 0;
2758
2759 INIT_LIST_HEAD(&op->link);
2760 list_add_tail(&op->link, &vops->list);
2761 op->tile_mask = tile_mask;
2762
2763 switch (op->base.op) {
2764 case DRM_GPUVA_OP_MAP:
2765 {
2766 struct xe_vma_mem_attr default_attr = {
2767 .preferred_loc = {
2768 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE,
2769 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES,
2770 },
2771 .atomic_access = DRM_XE_ATOMIC_UNDEFINED,
2772 .default_pat_index = op->map.pat_index,
2773 .pat_index = op->map.pat_index,
2774 .purgeable_state = XE_MADV_PURGEABLE_WILLNEED,
2775 };
2776
2777 flags |= op->map.vma_flags & XE_VMA_CREATE_MASK;
2778
2779 vma = new_vma(vm, &op->base.map, &default_attr,
2780 flags);
2781 if (IS_ERR(vma))
2782 return PTR_ERR(vma);
2783
2784 op->map.vma = vma;
2785 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) &&
2786 !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) ||
2787 op->map.invalidate_on_bind)
2788 xe_vma_ops_incr_pt_update_ops(vops,
2789 op->tile_mask, 1);
2790 break;
2791 }
2792 case DRM_GPUVA_OP_REMAP:
2793 {
2794 struct xe_vma *old =
2795 gpuva_to_vma(op->base.remap.unmap->va);
2796 bool skip = xe_vma_is_cpu_addr_mirror(old);
2797 u64 start = xe_vma_start(old), end = xe_vma_end(old);
2798 int num_remap_ops = 0;
2799
2800 if (op->base.remap.prev)
2801 start = op->base.remap.prev->va.addr +
2802 op->base.remap.prev->va.range;
2803 if (op->base.remap.next)
2804 end = op->base.remap.next->va.addr;
2805
2806 if (xe_vma_is_cpu_addr_mirror(old) &&
2807 xe_svm_has_mapping(vm, start, end)) {
2808 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE)
2809 xe_svm_unmap_address_range(vm, start, end);
2810 else
2811 return -EBUSY;
2812 }
2813
2814 op->remap.start = xe_vma_start(old);
2815 op->remap.range = xe_vma_size(old);
2816 op->remap.old_start = op->remap.start;
2817 op->remap.old_range = op->remap.range;
2818
2819 flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK;
2820 if (op->base.remap.prev) {
2821 vma = new_vma(vm, op->base.remap.prev,
2822 &old->attr, flags);
2823 if (IS_ERR(vma))
2824 return PTR_ERR(vma);
2825
2826 op->remap.prev = vma;
2827
2828 /*
2829 * Userptr creates a new SG mapping so
2830 * we must also rebind.
2831 */
2832 op->remap.skip_prev = skip ||
2833 (!xe_vma_is_userptr(old) &&
2834 IS_ALIGNED(xe_vma_end(vma),
2835 xe_vma_max_pte_size(old)));
2836 if (op->remap.skip_prev) {
2837 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2838 op->remap.range -=
2839 xe_vma_end(vma) -
2840 xe_vma_start(old);
2841 op->remap.start = xe_vma_end(vma);
2842 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2843 (ULL)op->remap.start,
2844 (ULL)op->remap.range);
2845 } else {
2846 num_remap_ops++;
2847 }
2848 }
2849
2850 if (op->base.remap.next) {
2851 vma = new_vma(vm, op->base.remap.next,
2852 &old->attr, flags);
2853 if (IS_ERR(vma))
2854 return PTR_ERR(vma);
2855
2856 op->remap.next = vma;
2857
2858 /*
2859 * Userptr creates a new SG mapping so
2860 * we must also rebind.
2861 */
2862 op->remap.skip_next = skip ||
2863 (!xe_vma_is_userptr(old) &&
2864 IS_ALIGNED(xe_vma_start(vma),
2865 xe_vma_max_pte_size(old)));
2866 if (op->remap.skip_next) {
2867 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2868 op->remap.range -=
2869 xe_vma_end(old) -
2870 xe_vma_start(vma);
2871 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2872 (ULL)op->remap.start,
2873 (ULL)op->remap.range);
2874 } else {
2875 num_remap_ops++;
2876 }
2877 }
2878 if (!skip)
2879 num_remap_ops++;
2880
2881 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops);
2882 break;
2883 }
2884 case DRM_GPUVA_OP_UNMAP:
2885 vma = gpuva_to_vma(op->base.unmap.va);
2886
2887 if (xe_vma_is_cpu_addr_mirror(vma) &&
2888 xe_svm_has_mapping(vm, xe_vma_start(vma),
2889 xe_vma_end(vma)) &&
2890 !(vops->flags & XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP))
2891 return -EBUSY;
2892
2893 if (!xe_vma_is_cpu_addr_mirror(vma))
2894 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2895 break;
2896 case DRM_GPUVA_OP_PREFETCH:
2897 vma = gpuva_to_vma(op->base.prefetch.va);
2898
2899 if (xe_vma_is_userptr(vma)) {
2900 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2901 if (err)
2902 return err;
2903 }
2904
2905 if (xe_vma_is_cpu_addr_mirror(vma))
2906 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask,
2907 op->prefetch_range.ranges_count);
2908 else
2909 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2910
2911 break;
2912 default:
2913 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
2914 }
2915
2916 err = xe_vma_op_commit(vm, op);
2917 if (err)
2918 return err;
2919 }
2920
2921 return 0;
2922 }
2923
xe_vma_op_unwind(struct xe_vm * vm,struct xe_vma_op * op,bool post_commit,bool prev_post_commit,bool next_post_commit)2924 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2925 bool post_commit, bool prev_post_commit,
2926 bool next_post_commit)
2927 {
2928 lockdep_assert_held_write(&vm->lock);
2929
2930 switch (op->base.op) {
2931 case DRM_GPUVA_OP_MAP:
2932 if (op->map.vma) {
2933 prep_vma_destroy(vm, op->map.vma, post_commit);
2934 xe_vma_destroy_unlocked(op->map.vma);
2935 }
2936 break;
2937 case DRM_GPUVA_OP_UNMAP:
2938 {
2939 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2940
2941 if (vma) {
2942 xe_svm_notifier_lock(vm);
2943 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2944 xe_svm_notifier_unlock(vm);
2945 if (post_commit)
2946 xe_vm_insert_vma(vm, vma);
2947 }
2948 break;
2949 }
2950 case DRM_GPUVA_OP_REMAP:
2951 {
2952 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2953
2954 if (op->remap.prev) {
2955 prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2956 xe_vma_destroy_unlocked(op->remap.prev);
2957 }
2958 if (op->remap.next) {
2959 prep_vma_destroy(vm, op->remap.next, next_post_commit);
2960 xe_vma_destroy_unlocked(op->remap.next);
2961 }
2962 if (vma) {
2963 xe_svm_notifier_lock(vm);
2964 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2965 xe_svm_notifier_unlock(vm);
2966 if (post_commit) {
2967 /*
2968 * Restore the old va range, in case of the
2969 * prev/next skip optimisation. Otherwise what
2970 * we re-insert here could be smaller than the
2971 * original range.
2972 */
2973 op->base.remap.unmap->va->va.addr =
2974 op->remap.old_start;
2975 op->base.remap.unmap->va->va.range =
2976 op->remap.old_range;
2977 xe_vm_insert_vma(vm, vma);
2978 }
2979 }
2980 break;
2981 }
2982 case DRM_GPUVA_OP_PREFETCH:
2983 /* Nothing to do */
2984 break;
2985 default:
2986 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
2987 }
2988 }
2989
vm_bind_ioctl_ops_unwind(struct xe_vm * vm,struct drm_gpuva_ops ** ops,int num_ops_list)2990 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2991 struct drm_gpuva_ops **ops,
2992 int num_ops_list)
2993 {
2994 int i;
2995
2996 for (i = num_ops_list - 1; i >= 0; --i) {
2997 struct drm_gpuva_ops *__ops = ops[i];
2998 struct drm_gpuva_op *__op;
2999
3000 if (!__ops)
3001 continue;
3002
3003 drm_gpuva_for_each_op_reverse(__op, __ops) {
3004 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
3005
3006 xe_vma_op_unwind(vm, op,
3007 op->flags & XE_VMA_OP_COMMITTED,
3008 op->flags & XE_VMA_OP_PREV_COMMITTED,
3009 op->flags & XE_VMA_OP_NEXT_COMMITTED);
3010 }
3011 }
3012 }
3013
3014 /**
3015 * struct xe_vma_lock_and_validate_flags - Flags for vma_lock_and_validate()
3016 * @res_evict: Allow evicting resources during validation
3017 * @validate: Perform BO validation
3018 * @request_decompress: Request BO decompression
3019 * @check_purged: Reject operation if BO is purged
3020 */
3021 struct xe_vma_lock_and_validate_flags {
3022 u32 res_evict : 1;
3023 u32 validate : 1;
3024 u32 request_decompress : 1;
3025 u32 check_purged : 1;
3026 };
3027
vma_lock_and_validate(struct drm_exec * exec,struct xe_vma * vma,struct xe_vma_lock_and_validate_flags flags)3028 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
3029 struct xe_vma_lock_and_validate_flags flags)
3030 {
3031 struct xe_bo *bo = xe_vma_bo(vma);
3032 struct xe_vm *vm = xe_vma_vm(vma);
3033 int err = 0;
3034
3035 if (bo) {
3036 if (!bo->vm)
3037 err = drm_exec_lock_obj(exec, &bo->ttm.base);
3038
3039 /* Reject new mappings to DONTNEED/purged BOs; allow cleanup operations */
3040 if (!err && flags.check_purged) {
3041 if (xe_bo_madv_is_dontneed(bo))
3042 err = -EBUSY; /* BO marked purgeable */
3043 else if (xe_bo_is_purged(bo))
3044 err = -EINVAL; /* BO already purged */
3045 }
3046
3047 if (!err && flags.validate)
3048 err = xe_bo_validate(bo, vm,
3049 xe_vm_allow_vm_eviction(vm) &&
3050 flags.res_evict, exec);
3051
3052 if (err)
3053 return err;
3054
3055 if (flags.request_decompress)
3056 err = xe_bo_decompress(bo);
3057 }
3058
3059 return err;
3060 }
3061
check_ufence(struct xe_vma * vma)3062 static int check_ufence(struct xe_vma *vma)
3063 {
3064 if (vma->ufence) {
3065 struct xe_user_fence * const f = vma->ufence;
3066
3067 if (!xe_sync_ufence_get_status(f))
3068 return -EBUSY;
3069
3070 vma->ufence = NULL;
3071 xe_sync_ufence_put(f);
3072 }
3073
3074 return 0;
3075 }
3076
prefetch_ranges(struct xe_vm * vm,struct xe_vma_op * op)3077 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
3078 {
3079 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
3080 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
3081 struct drm_pagemap *dpagemap = op->prefetch_range.dpagemap;
3082 int err = 0;
3083
3084 struct xe_svm_range *svm_range;
3085 struct drm_gpusvm_ctx ctx = {};
3086 unsigned long i;
3087
3088 if (!xe_vma_is_cpu_addr_mirror(vma))
3089 return 0;
3090
3091 ctx.read_only = xe_vma_read_only(vma);
3092 ctx.devmem_possible = devmem_possible;
3093 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
3094 ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !dpagemap);
3095
3096 /* TODO: Threading the migration */
3097 xa_for_each(&op->prefetch_range.range, i, svm_range) {
3098 if (!dpagemap)
3099 xe_svm_range_migrate_to_smem(vm, svm_range);
3100
3101 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) {
3102 drm_dbg(&vm->xe->drm,
3103 "Prefetch pagemap is %s start 0x%016lx end 0x%016lx\n",
3104 dpagemap ? dpagemap->drm->unique : "system",
3105 xe_svm_range_start(svm_range), xe_svm_range_end(svm_range));
3106 }
3107
3108 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, dpagemap)) {
3109 err = xe_svm_alloc_vram(svm_range, &ctx, dpagemap);
3110 if (err) {
3111 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
3112 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
3113 return -ENODATA;
3114 }
3115 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM");
3116 }
3117
3118 err = xe_svm_range_get_pages(vm, svm_range, &ctx);
3119 if (err) {
3120 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
3121 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
3122 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
3123 err = -ENODATA;
3124 return err;
3125 }
3126 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE");
3127 }
3128
3129 return err;
3130 }
3131
op_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_vma_op * op)3132 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
3133 struct xe_vma_ops *vops, struct xe_vma_op *op)
3134 {
3135 int err = 0;
3136 bool res_evict;
3137
3138 /*
3139 * We only allow evicting a BO within the VM if it is not part of an
3140 * array of binds, as an array of binds can evict another BO within the
3141 * bind.
3142 */
3143 res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS);
3144
3145 switch (op->base.op) {
3146 case DRM_GPUVA_OP_MAP:
3147 if (!op->map.invalidate_on_bind)
3148 err = vma_lock_and_validate(exec, op->map.vma,
3149 (struct xe_vma_lock_and_validate_flags) {
3150 .res_evict = res_evict,
3151 .validate = !xe_vm_in_fault_mode(vm) ||
3152 op->map.immediate,
3153 .request_decompress =
3154 op->map.request_decompress,
3155 .check_purged = true,
3156 });
3157 break;
3158 case DRM_GPUVA_OP_REMAP:
3159 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
3160 if (err)
3161 break;
3162
3163 err = vma_lock_and_validate(exec,
3164 gpuva_to_vma(op->base.remap.unmap->va),
3165 (struct xe_vma_lock_and_validate_flags) {
3166 .res_evict = res_evict,
3167 .validate = false,
3168 .request_decompress = false,
3169 .check_purged = false,
3170 });
3171 if (!err && op->remap.prev)
3172 err = vma_lock_and_validate(exec, op->remap.prev,
3173 (struct xe_vma_lock_and_validate_flags) {
3174 .res_evict = res_evict,
3175 .validate = true,
3176 .request_decompress = false,
3177 .check_purged = true,
3178 });
3179 if (!err && op->remap.next)
3180 err = vma_lock_and_validate(exec, op->remap.next,
3181 (struct xe_vma_lock_and_validate_flags) {
3182 .res_evict = res_evict,
3183 .validate = true,
3184 .request_decompress = false,
3185 .check_purged = true,
3186 });
3187 break;
3188 case DRM_GPUVA_OP_UNMAP:
3189 err = check_ufence(gpuva_to_vma(op->base.unmap.va));
3190 if (err)
3191 break;
3192
3193 err = vma_lock_and_validate(exec,
3194 gpuva_to_vma(op->base.unmap.va),
3195 (struct xe_vma_lock_and_validate_flags) {
3196 .res_evict = res_evict,
3197 .validate = false,
3198 .request_decompress = false,
3199 .check_purged = false,
3200 });
3201 break;
3202 case DRM_GPUVA_OP_PREFETCH:
3203 {
3204 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
3205 u32 region;
3206
3207 if (!xe_vma_is_cpu_addr_mirror(vma)) {
3208 region = op->prefetch.region;
3209 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC ||
3210 region <= ARRAY_SIZE(region_to_mem_type));
3211 }
3212
3213 /*
3214 * Prefetch attempts to migrate BO's backing store without
3215 * repopulating it first. Purged BOs have no backing store
3216 * to migrate, so reject the operation.
3217 */
3218 err = vma_lock_and_validate(exec,
3219 gpuva_to_vma(op->base.prefetch.va),
3220 (struct xe_vma_lock_and_validate_flags) {
3221 .res_evict = res_evict,
3222 .validate = false,
3223 .request_decompress = false,
3224 .check_purged = true,
3225 });
3226 if (!err && !xe_vma_has_no_bo(vma))
3227 err = xe_bo_migrate(xe_vma_bo(vma),
3228 region_to_mem_type[region],
3229 NULL,
3230 exec);
3231 break;
3232 }
3233 default:
3234 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
3235 }
3236
3237 return err;
3238 }
3239
vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm * vm,struct xe_vma_ops * vops)3240 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops)
3241 {
3242 struct xe_vma_op *op;
3243 int err;
3244
3245 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
3246 return 0;
3247
3248 list_for_each_entry(op, &vops->list, link) {
3249 if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
3250 err = prefetch_ranges(vm, op);
3251 if (err)
3252 return err;
3253 }
3254 }
3255
3256 return 0;
3257 }
3258
vm_bind_ioctl_ops_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops)3259 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
3260 struct xe_vm *vm,
3261 struct xe_vma_ops *vops)
3262 {
3263 struct xe_vma_op *op;
3264 int err;
3265
3266 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
3267 if (err)
3268 return err;
3269
3270 list_for_each_entry(op, &vops->list, link) {
3271 err = op_lock_and_prep(exec, vm, vops, op);
3272 if (err)
3273 return err;
3274 }
3275
3276 #ifdef TEST_VM_OPS_ERROR
3277 if (vops->inject_error &&
3278 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
3279 return -ENOSPC;
3280 #endif
3281
3282 return 0;
3283 }
3284
op_trace(struct xe_vma_op * op)3285 static void op_trace(struct xe_vma_op *op)
3286 {
3287 switch (op->base.op) {
3288 case DRM_GPUVA_OP_MAP:
3289 trace_xe_vma_bind(op->map.vma);
3290 break;
3291 case DRM_GPUVA_OP_REMAP:
3292 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
3293 if (op->remap.prev)
3294 trace_xe_vma_bind(op->remap.prev);
3295 if (op->remap.next)
3296 trace_xe_vma_bind(op->remap.next);
3297 break;
3298 case DRM_GPUVA_OP_UNMAP:
3299 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
3300 break;
3301 case DRM_GPUVA_OP_PREFETCH:
3302 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
3303 break;
3304 case DRM_GPUVA_OP_DRIVER:
3305 break;
3306 default:
3307 XE_WARN_ON("NOT POSSIBLE");
3308 }
3309 }
3310
trace_xe_vm_ops_execute(struct xe_vma_ops * vops)3311 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
3312 {
3313 struct xe_vma_op *op;
3314
3315 list_for_each_entry(op, &vops->list, link)
3316 op_trace(op);
3317 }
3318
vm_ops_setup_tile_args(struct xe_vm * vm,struct xe_vma_ops * vops)3319 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
3320 {
3321 struct xe_exec_queue *q = vops->q;
3322 struct xe_tile *tile;
3323 int number_tiles = 0;
3324 u8 id;
3325
3326 for_each_tile(tile, vm->xe, id) {
3327 if (vops->pt_update_ops[id].num_ops)
3328 ++number_tiles;
3329
3330 if (vops->pt_update_ops[id].q)
3331 continue;
3332
3333 if (q) {
3334 vops->pt_update_ops[id].q = q;
3335 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
3336 q = list_next_entry(q, multi_gt_list);
3337 } else {
3338 vops->pt_update_ops[id].q = vm->q[id];
3339 }
3340 }
3341
3342 return number_tiles;
3343 }
3344
ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3345 static struct dma_fence *ops_execute(struct xe_vm *vm,
3346 struct xe_vma_ops *vops)
3347 {
3348 struct xe_tile *tile;
3349 struct dma_fence *fence = NULL;
3350 struct dma_fence **fences = NULL;
3351 struct dma_fence_array *cf = NULL;
3352 int number_tiles = 0, current_fence = 0, n_fence = 0, err, i;
3353 u8 id;
3354
3355 number_tiles = vm_ops_setup_tile_args(vm, vops);
3356 if (number_tiles == 0)
3357 return ERR_PTR(-ENODATA);
3358
3359 for_each_tile(tile, vm->xe, id) {
3360 ++n_fence;
3361
3362 if (!(vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT))
3363 for_each_tlb_inval(i)
3364 ++n_fence;
3365 }
3366
3367 fences = kmalloc_objs(*fences, n_fence);
3368 if (!fences) {
3369 fence = ERR_PTR(-ENOMEM);
3370 goto err_trace;
3371 }
3372
3373 cf = dma_fence_array_alloc(n_fence);
3374 if (!cf) {
3375 fence = ERR_PTR(-ENOMEM);
3376 goto err_out;
3377 }
3378
3379 for_each_tile(tile, vm->xe, id) {
3380 if (!vops->pt_update_ops[id].num_ops)
3381 continue;
3382
3383 err = xe_pt_update_ops_prepare(tile, vops);
3384 if (err) {
3385 fence = ERR_PTR(err);
3386 goto err_out;
3387 }
3388 }
3389
3390 trace_xe_vm_ops_execute(vops);
3391
3392 for_each_tile(tile, vm->xe, id) {
3393 struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q;
3394
3395 fence = NULL;
3396 if (!vops->pt_update_ops[id].num_ops)
3397 goto collect_fences;
3398
3399 fence = xe_pt_update_ops_run(tile, vops);
3400 if (IS_ERR(fence))
3401 goto err_out;
3402
3403 collect_fences:
3404 fences[current_fence++] = fence ?: dma_fence_get_stub();
3405 if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT)
3406 continue;
3407
3408 xe_migrate_job_lock(tile->migrate, q);
3409 for_each_tlb_inval(i)
3410 fences[current_fence++] =
3411 xe_exec_queue_tlb_inval_last_fence_get(q, vm, i);
3412 xe_migrate_job_unlock(tile->migrate, q);
3413 }
3414
3415 xe_assert(vm->xe, current_fence == n_fence);
3416 dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1),
3417 1, false);
3418 fence = &cf->base;
3419
3420 for_each_tile(tile, vm->xe, id) {
3421 if (!vops->pt_update_ops[id].num_ops)
3422 continue;
3423
3424 xe_pt_update_ops_fini(tile, vops);
3425 }
3426
3427 return fence;
3428
3429 err_out:
3430 for_each_tile(tile, vm->xe, id) {
3431 if (!vops->pt_update_ops[id].num_ops)
3432 continue;
3433
3434 xe_pt_update_ops_abort(tile, vops);
3435 }
3436 while (current_fence)
3437 dma_fence_put(fences[--current_fence]);
3438 kfree(fences);
3439 kfree(cf);
3440
3441 err_trace:
3442 trace_xe_vm_ops_fail(vm);
3443 return fence;
3444 }
3445
vma_add_ufence(struct xe_vma * vma,struct xe_user_fence * ufence)3446 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
3447 {
3448 if (vma->ufence)
3449 xe_sync_ufence_put(vma->ufence);
3450 vma->ufence = __xe_sync_ufence_get(ufence);
3451 }
3452
op_add_ufence(struct xe_vm * vm,struct xe_vma_op * op,struct xe_user_fence * ufence)3453 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
3454 struct xe_user_fence *ufence)
3455 {
3456 switch (op->base.op) {
3457 case DRM_GPUVA_OP_MAP:
3458 if (!xe_vma_is_cpu_addr_mirror(op->map.vma))
3459 vma_add_ufence(op->map.vma, ufence);
3460 break;
3461 case DRM_GPUVA_OP_REMAP:
3462 if (op->remap.prev)
3463 vma_add_ufence(op->remap.prev, ufence);
3464 if (op->remap.next)
3465 vma_add_ufence(op->remap.next, ufence);
3466 break;
3467 case DRM_GPUVA_OP_UNMAP:
3468 break;
3469 case DRM_GPUVA_OP_PREFETCH:
3470 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
3471 break;
3472 default:
3473 drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
3474 }
3475 }
3476
vm_bind_ioctl_ops_fini(struct xe_vm * vm,struct xe_vma_ops * vops,struct dma_fence * fence)3477 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
3478 struct dma_fence *fence)
3479 {
3480 struct xe_user_fence *ufence;
3481 struct xe_vma_op *op;
3482 int i;
3483
3484 ufence = find_ufence_get(vops->syncs, vops->num_syncs);
3485 list_for_each_entry(op, &vops->list, link) {
3486 if (ufence)
3487 op_add_ufence(vm, op, ufence);
3488
3489 if (op->base.op == DRM_GPUVA_OP_UNMAP)
3490 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
3491 else if (op->base.op == DRM_GPUVA_OP_REMAP)
3492 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
3493 fence);
3494 }
3495 if (ufence)
3496 xe_sync_ufence_put(ufence);
3497 if (fence) {
3498 for (i = 0; i < vops->num_syncs; i++)
3499 xe_sync_entry_signal(vops->syncs + i, fence);
3500 }
3501 }
3502
vm_bind_ioctl_ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3503 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
3504 struct xe_vma_ops *vops)
3505 {
3506 struct xe_validation_ctx ctx;
3507 struct drm_exec exec;
3508 struct dma_fence *fence;
3509 int err = 0;
3510
3511 lockdep_assert_held_write(&vm->lock);
3512
3513 xe_validation_guard(&ctx, &vm->xe->val, &exec,
3514 ((struct xe_val_flags) {
3515 .interruptible = true,
3516 .exec_ignore_duplicates = true,
3517 }), err) {
3518 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
3519 drm_exec_retry_on_contention(&exec);
3520 xe_validation_retry_on_oom(&ctx, &err);
3521 if (err)
3522 return ERR_PTR(err);
3523
3524 xe_vm_set_validation_exec(vm, &exec);
3525 fence = ops_execute(vm, vops);
3526 xe_vm_set_validation_exec(vm, NULL);
3527 if (IS_ERR(fence)) {
3528 if (PTR_ERR(fence) == -ENODATA)
3529 vm_bind_ioctl_ops_fini(vm, vops, NULL);
3530 return fence;
3531 }
3532
3533 vm_bind_ioctl_ops_fini(vm, vops, fence);
3534 }
3535
3536 return err ? ERR_PTR(err) : fence;
3537 }
3538 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
3539
3540 #define SUPPORTED_FLAGS_STUB \
3541 (DRM_XE_VM_BIND_FLAG_READONLY | \
3542 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
3543 DRM_XE_VM_BIND_FLAG_NULL | \
3544 DRM_XE_VM_BIND_FLAG_DUMPABLE | \
3545 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \
3546 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \
3547 DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET | \
3548 DRM_XE_VM_BIND_FLAG_DECOMPRESS)
3549
3550 #ifdef TEST_VM_OPS_ERROR
3551 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
3552 #else
3553 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB
3554 #endif
3555
3556 #define XE_64K_PAGE_MASK 0xffffull
3557 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
3558
vm_bind_ioctl_check_args(struct xe_device * xe,struct xe_vm * vm,struct drm_xe_vm_bind * args,struct drm_xe_vm_bind_op ** bind_ops)3559 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
3560 struct drm_xe_vm_bind *args,
3561 struct drm_xe_vm_bind_op **bind_ops)
3562 {
3563 int err;
3564 int i;
3565
3566 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
3567 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3568 return -EINVAL;
3569
3570 if (XE_IOCTL_DBG(xe, args->extensions))
3571 return -EINVAL;
3572
3573 if (XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS))
3574 return -EINVAL;
3575
3576 if (args->num_binds > 1) {
3577 u64 __user *bind_user =
3578 u64_to_user_ptr(args->vector_of_binds);
3579
3580 *bind_ops = kvmalloc_objs(struct drm_xe_vm_bind_op,
3581 args->num_binds,
3582 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3583 if (!*bind_ops)
3584 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
3585
3586 err = copy_from_user(*bind_ops, bind_user,
3587 sizeof(struct drm_xe_vm_bind_op) *
3588 args->num_binds);
3589 if (XE_IOCTL_DBG(xe, err)) {
3590 err = -EFAULT;
3591 goto free_bind_ops;
3592 }
3593 } else {
3594 *bind_ops = &args->bind;
3595 }
3596
3597 for (i = 0; i < args->num_binds; ++i) {
3598 u64 range = (*bind_ops)[i].range;
3599 u64 addr = (*bind_ops)[i].addr;
3600 u32 op = (*bind_ops)[i].op;
3601 u32 flags = (*bind_ops)[i].flags;
3602 u32 obj = (*bind_ops)[i].obj;
3603 u64 obj_offset = (*bind_ops)[i].obj_offset;
3604 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
3605 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
3606 bool is_cpu_addr_mirror = flags &
3607 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
3608 bool is_decompress = flags & DRM_XE_VM_BIND_FLAG_DECOMPRESS;
3609 u16 pat_index = (*bind_ops)[i].pat_index;
3610 u16 coh_mode;
3611 bool comp_en;
3612
3613 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror &&
3614 (!xe_vm_in_fault_mode(vm) ||
3615 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) {
3616 err = -EINVAL;
3617 goto free_bind_ops;
3618 }
3619
3620 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
3621 err = -EINVAL;
3622 goto free_bind_ops;
3623 }
3624
3625 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
3626 (*bind_ops)[i].pat_index = pat_index;
3627 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3628 comp_en = xe_pat_index_get_comp_en(xe, pat_index);
3629 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
3630 err = -EINVAL;
3631 goto free_bind_ops;
3632 }
3633
3634 if (XE_WARN_ON(coh_mode > XE_COH_2WAY)) {
3635 err = -EINVAL;
3636 goto free_bind_ops;
3637 }
3638
3639 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
3640 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
3641 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) ||
3642 XE_IOCTL_DBG(xe, obj_offset && (is_null ||
3643 is_cpu_addr_mirror)) ||
3644 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
3645 (is_decompress || is_null || is_cpu_addr_mirror)) ||
3646 XE_IOCTL_DBG(xe, is_decompress &&
3647 xe_pat_index_get_comp_en(xe, pat_index)) ||
3648 XE_IOCTL_DBG(xe, !obj &&
3649 op == DRM_XE_VM_BIND_OP_MAP &&
3650 !is_null && !is_cpu_addr_mirror) ||
3651 XE_IOCTL_DBG(xe, !obj &&
3652 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3653 XE_IOCTL_DBG(xe, addr &&
3654 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3655 XE_IOCTL_DBG(xe, range &&
3656 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3657 XE_IOCTL_DBG(xe, obj &&
3658 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3659 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3660 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3661 XE_IOCTL_DBG(xe, xe_device_is_l2_flush_optimized(xe) &&
3662 (op == DRM_XE_VM_BIND_OP_MAP_USERPTR ||
3663 is_cpu_addr_mirror) &&
3664 (pat_index != 19 && coh_mode != XE_COH_2WAY)) ||
3665 XE_IOCTL_DBG(xe, comp_en &&
3666 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3667 XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR &&
3668 !IS_ENABLED(CONFIG_DRM_GPUSVM)) ||
3669 XE_IOCTL_DBG(xe, obj &&
3670 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
3671 XE_IOCTL_DBG(xe, prefetch_region &&
3672 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
3673 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC &&
3674 /* Guard against undefined shift in BIT(prefetch_region) */
3675 (prefetch_region >= (sizeof(xe->info.mem_region_mask) * 8) ||
3676 !(BIT(prefetch_region) & xe->info.mem_region_mask)))) ||
3677 XE_IOCTL_DBG(xe, obj &&
3678 op == DRM_XE_VM_BIND_OP_UNMAP) ||
3679 XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) &&
3680 (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) {
3681 err = -EINVAL;
3682 goto free_bind_ops;
3683 }
3684
3685 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
3686 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
3687 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
3688 XE_IOCTL_DBG(xe, !range &&
3689 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
3690 err = -EINVAL;
3691 goto free_bind_ops;
3692 }
3693
3694 if (is_decompress && (XE_IOCTL_DBG(xe, !xe_device_has_flat_ccs(xe)) ||
3695 XE_IOCTL_DBG(xe, GRAPHICS_VER(xe) < 20) ||
3696 XE_IOCTL_DBG(xe, !IS_DGFX(xe)))) {
3697 err = -EOPNOTSUPP;
3698 goto free_bind_ops;
3699 }
3700 }
3701
3702 return 0;
3703
3704 free_bind_ops:
3705 if (args->num_binds > 1)
3706 kvfree(*bind_ops);
3707 *bind_ops = NULL;
3708 return err;
3709 }
3710
vm_bind_ioctl_signal_fences(struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,int num_syncs)3711 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
3712 struct xe_exec_queue *q,
3713 struct xe_sync_entry *syncs,
3714 int num_syncs)
3715 {
3716 struct dma_fence *fence = NULL;
3717 int i, err = 0;
3718
3719 if (num_syncs) {
3720 fence = xe_sync_in_fence_get(syncs, num_syncs,
3721 to_wait_exec_queue(vm, q), vm);
3722 if (IS_ERR(fence))
3723 return PTR_ERR(fence);
3724
3725 for (i = 0; i < num_syncs; i++)
3726 xe_sync_entry_signal(&syncs[i], fence);
3727 }
3728
3729 dma_fence_put(fence);
3730
3731 return err;
3732 }
3733
xe_vma_ops_init(struct xe_vma_ops * vops,struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,u32 num_syncs)3734 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
3735 struct xe_exec_queue *q,
3736 struct xe_sync_entry *syncs, u32 num_syncs)
3737 {
3738 memset(vops, 0, sizeof(*vops));
3739 INIT_LIST_HEAD(&vops->list);
3740 vops->vm = vm;
3741 vops->q = q;
3742 vops->syncs = syncs;
3743 vops->num_syncs = num_syncs;
3744 vops->flags = 0;
3745 }
3746
xe_vm_bind_ioctl_validate_bo(struct xe_device * xe,struct xe_bo * bo,u64 addr,u64 range,u64 obj_offset,u16 pat_index,u32 op,u32 bind_flags)3747 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
3748 u64 addr, u64 range, u64 obj_offset,
3749 u16 pat_index, u32 op, u32 bind_flags)
3750 {
3751 u16 coh_mode;
3752 bool comp_en;
3753
3754 if (XE_IOCTL_DBG(xe, (bo->flags & XE_BO_FLAG_NO_COMPRESSION) &&
3755 xe_pat_index_get_comp_en(xe, pat_index)))
3756 return -EINVAL;
3757
3758 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) ||
3759 XE_IOCTL_DBG(xe, obj_offset >
3760 xe_bo_size(bo) - range)) {
3761 return -EINVAL;
3762 }
3763
3764 /*
3765 * Some platforms require 64k VM_BIND alignment,
3766 * specifically those with XE_VRAM_FLAGS_NEED64K.
3767 *
3768 * Other platforms may have BO's set to 64k physical placement,
3769 * but can be mapped at 4k offsets anyway. This check is only
3770 * there for the former case.
3771 */
3772 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
3773 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
3774 if (XE_IOCTL_DBG(xe, obj_offset &
3775 XE_64K_PAGE_MASK) ||
3776 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
3777 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3778 return -EINVAL;
3779 }
3780 }
3781
3782 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3783 if (bo->cpu_caching) {
3784 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3785 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
3786 return -EINVAL;
3787 }
3788 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
3789 /*
3790 * Imported dma-buf from a different device should
3791 * require 1way or 2way coherency since we don't know
3792 * how it was mapped on the CPU. Just assume is it
3793 * potentially cached on CPU side.
3794 */
3795 return -EINVAL;
3796 }
3797
3798 /*
3799 * Ensures that imported buffer objects (dma-bufs) are not mapped
3800 * with a PAT index that enables compression.
3801 */
3802 comp_en = xe_pat_index_get_comp_en(xe, pat_index);
3803 if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && comp_en))
3804 return -EINVAL;
3805
3806 if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && xe_device_is_l2_flush_optimized(xe) &&
3807 (pat_index != 19 && coh_mode != XE_COH_2WAY)))
3808 return -EINVAL;
3809
3810 /* If a BO is protected it can only be mapped if the key is still valid */
3811 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) &&
3812 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL)
3813 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0))
3814 return -ENOEXEC;
3815
3816 return 0;
3817 }
3818
xe_vm_bind_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3819 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3820 {
3821 struct xe_device *xe = to_xe_device(dev);
3822 struct xe_file *xef = to_xe_file(file);
3823 struct drm_xe_vm_bind *args = data;
3824 struct drm_xe_sync __user *syncs_user;
3825 struct xe_bo **bos = NULL;
3826 struct drm_gpuva_ops **ops = NULL;
3827 struct xe_vm *vm;
3828 struct xe_exec_queue *q = NULL;
3829 u32 num_syncs, num_ufence = 0;
3830 struct xe_sync_entry *syncs = NULL;
3831 struct drm_xe_vm_bind_op *bind_ops = NULL;
3832 struct xe_vma_ops vops;
3833 struct dma_fence *fence;
3834 int err;
3835 int i;
3836
3837 vm = xe_vm_lookup(xef, args->vm_id);
3838 if (XE_IOCTL_DBG(xe, !vm))
3839 return -EINVAL;
3840
3841 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops);
3842 if (err)
3843 goto put_vm;
3844
3845 if (args->exec_queue_id) {
3846 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
3847 if (XE_IOCTL_DBG(xe, !q)) {
3848 err = -ENOENT;
3849 goto free_bind_ops;
3850 }
3851
3852 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3853 err = -EINVAL;
3854 goto put_exec_queue;
3855 }
3856 }
3857
3858 if (XE_IOCTL_DBG(xe, q && vm != q->user_vm)) {
3859 err = -EINVAL;
3860 goto put_exec_queue;
3861 }
3862
3863 /* Ensure all UNMAPs visible */
3864 xe_svm_flush(vm);
3865
3866 err = down_write_killable(&vm->lock);
3867 if (err)
3868 goto put_exec_queue;
3869
3870 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3871 err = -ENOENT;
3872 goto release_vm_lock;
3873 }
3874
3875 for (i = 0; i < args->num_binds; ++i) {
3876 u64 range = bind_ops[i].range;
3877 u64 addr = bind_ops[i].addr;
3878
3879 if (XE_IOCTL_DBG(xe, range > vm->size) ||
3880 XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3881 err = -EINVAL;
3882 goto release_vm_lock;
3883 }
3884 }
3885
3886 if (args->num_binds) {
3887 bos = kvzalloc_objs(*bos, args->num_binds,
3888 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3889 if (!bos) {
3890 err = -ENOMEM;
3891 goto release_vm_lock;
3892 }
3893
3894 ops = kvzalloc_objs(*ops, args->num_binds,
3895 GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3896 if (!ops) {
3897 err = -ENOMEM;
3898 goto free_bos;
3899 }
3900 }
3901
3902 for (i = 0; i < args->num_binds; ++i) {
3903 struct drm_gem_object *gem_obj;
3904 u64 range = bind_ops[i].range;
3905 u64 addr = bind_ops[i].addr;
3906 u32 obj = bind_ops[i].obj;
3907 u64 obj_offset = bind_ops[i].obj_offset;
3908 u16 pat_index = bind_ops[i].pat_index;
3909 u32 op = bind_ops[i].op;
3910 u32 bind_flags = bind_ops[i].flags;
3911
3912 if (!obj)
3913 continue;
3914
3915 gem_obj = drm_gem_object_lookup(file, obj);
3916 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3917 err = -ENOENT;
3918 goto put_obj;
3919 }
3920 bos[i] = gem_to_xe_bo(gem_obj);
3921
3922 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
3923 obj_offset, pat_index, op,
3924 bind_flags);
3925 if (err)
3926 goto put_obj;
3927 }
3928
3929 if (args->num_syncs) {
3930 syncs = kzalloc_objs(*syncs, args->num_syncs);
3931 if (!syncs) {
3932 err = -ENOMEM;
3933 goto put_obj;
3934 }
3935 }
3936
3937 syncs_user = u64_to_user_ptr(args->syncs);
3938 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3939 struct xe_exec_queue *__q = q ?: vm->q[0];
3940
3941 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3942 &syncs_user[num_syncs],
3943 __q->ufence_syncobj,
3944 ++__q->ufence_timeline_value,
3945 (xe_vm_in_lr_mode(vm) ?
3946 SYNC_PARSE_FLAG_LR_MODE : 0) |
3947 (!args->num_binds ?
3948 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3949 if (err)
3950 goto free_syncs;
3951
3952 if (xe_sync_is_ufence(&syncs[num_syncs]))
3953 num_ufence++;
3954 }
3955
3956 if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3957 err = -EINVAL;
3958 goto free_syncs;
3959 }
3960
3961 if (!args->num_binds) {
3962 err = -ENODATA;
3963 goto free_syncs;
3964 }
3965
3966 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
3967 if (args->num_binds > 1)
3968 vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS;
3969 for (i = 0; i < args->num_binds; ++i) {
3970 u64 range = bind_ops[i].range;
3971 u64 addr = bind_ops[i].addr;
3972 u32 op = bind_ops[i].op;
3973 u32 flags = bind_ops[i].flags;
3974 u64 obj_offset = bind_ops[i].obj_offset;
3975 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3976 u16 pat_index = bind_ops[i].pat_index;
3977
3978 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset,
3979 addr, range, op, flags,
3980 prefetch_region, pat_index);
3981 if (IS_ERR(ops[i])) {
3982 err = PTR_ERR(ops[i]);
3983 ops[i] = NULL;
3984 goto unwind_ops;
3985 }
3986
3987 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
3988 if (err)
3989 goto unwind_ops;
3990
3991 #ifdef TEST_VM_OPS_ERROR
3992 if (flags & FORCE_OP_ERROR) {
3993 vops.inject_error = true;
3994 vm->xe->vm_inject_error_position =
3995 (vm->xe->vm_inject_error_position + 1) %
3996 FORCE_OP_ERROR_COUNT;
3997 }
3998 #endif
3999 }
4000
4001 /* Nothing to do */
4002 if (list_empty(&vops.list)) {
4003 err = -ENODATA;
4004 goto unwind_ops;
4005 }
4006
4007 err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
4008 if (err)
4009 goto unwind_ops;
4010
4011 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops);
4012 if (err)
4013 goto unwind_ops;
4014
4015 fence = vm_bind_ioctl_ops_execute(vm, &vops);
4016 if (IS_ERR(fence))
4017 err = PTR_ERR(fence);
4018 else
4019 dma_fence_put(fence);
4020
4021 unwind_ops:
4022 if (err && err != -ENODATA)
4023 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
4024 xe_vma_ops_fini(&vops);
4025 for (i = args->num_binds - 1; i >= 0; --i)
4026 if (ops[i])
4027 drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
4028 free_syncs:
4029 if (err == -ENODATA)
4030 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
4031 while (num_syncs--)
4032 xe_sync_entry_cleanup(&syncs[num_syncs]);
4033
4034 kfree(syncs);
4035 put_obj:
4036 for (i = 0; i < args->num_binds; ++i)
4037 xe_bo_put(bos[i]);
4038
4039 kvfree(ops);
4040 free_bos:
4041 kvfree(bos);
4042 release_vm_lock:
4043 up_write(&vm->lock);
4044 put_exec_queue:
4045 if (q)
4046 xe_exec_queue_put(q);
4047 free_bind_ops:
4048 if (args->num_binds > 1)
4049 kvfree(bind_ops);
4050 put_vm:
4051 xe_vm_put(vm);
4052 return err;
4053 }
4054
4055 /*
4056 * Map access type, fault type, and fault level from current bspec
4057 * specification to user spec abstraction. The current mapping is
4058 * approximately 1-to-1, with access type being the only notable
4059 * exception as it carries additional data with respect to prefetch
4060 * status that needs to be masked out.
4061 */
xe_to_user_access_type(u8 access_type)4062 static u8 xe_to_user_access_type(u8 access_type)
4063 {
4064 return access_type & XE_PAGEFAULT_ACCESS_TYPE_MASK;
4065 }
4066
xe_to_user_fault_type(u8 fault_type)4067 static u8 xe_to_user_fault_type(u8 fault_type)
4068 {
4069 return fault_type;
4070 }
4071
xe_to_user_fault_level(u8 fault_level)4072 static u8 xe_to_user_fault_level(u8 fault_level)
4073 {
4074 return fault_level;
4075 }
4076
fill_faults(struct xe_vm * vm,struct drm_xe_vm_get_property * args)4077 static int fill_faults(struct xe_vm *vm,
4078 struct drm_xe_vm_get_property *args)
4079 {
4080 struct xe_vm_fault __user *usr_ptr = u64_to_user_ptr(args->data);
4081 struct xe_vm_fault *fault_list, fault_entry = { 0 };
4082 struct xe_vm_fault_entry *entry;
4083 int ret = 0, i = 0, count, entry_size;
4084
4085 entry_size = sizeof(struct xe_vm_fault);
4086 count = args->size / entry_size;
4087
4088 fault_list = kcalloc(count, sizeof(struct xe_vm_fault), GFP_KERNEL);
4089 if (!fault_list)
4090 return -ENOMEM;
4091
4092 spin_lock(&vm->faults.lock);
4093 list_for_each_entry(entry, &vm->faults.list, list) {
4094 if (i == count)
4095 break;
4096
4097 fault_entry.address = xe_device_canonicalize_addr(vm->xe, entry->address);
4098 fault_entry.address_precision = entry->address_precision;
4099
4100 fault_entry.access_type = xe_to_user_access_type(entry->access_type);
4101 fault_entry.fault_type = xe_to_user_fault_type(entry->fault_type);
4102 fault_entry.fault_level = xe_to_user_fault_level(entry->fault_level);
4103
4104 memcpy(&fault_list[i], &fault_entry, entry_size);
4105
4106 i++;
4107 }
4108 spin_unlock(&vm->faults.lock);
4109
4110 ret = copy_to_user(usr_ptr, fault_list, args->size);
4111
4112 kfree(fault_list);
4113 return ret ? -EFAULT : 0;
4114 }
4115
xe_vm_get_property_helper(struct xe_vm * vm,struct drm_xe_vm_get_property * args)4116 static int xe_vm_get_property_helper(struct xe_vm *vm,
4117 struct drm_xe_vm_get_property *args)
4118 {
4119 size_t size;
4120
4121 switch (args->property) {
4122 case DRM_XE_VM_GET_PROPERTY_FAULTS:
4123 spin_lock(&vm->faults.lock);
4124 size = size_mul(sizeof(struct xe_vm_fault), vm->faults.len);
4125 spin_unlock(&vm->faults.lock);
4126
4127 if (!args->size) {
4128 args->size = size;
4129 return 0;
4130 }
4131
4132 /*
4133 * Number of faults may increase between calls to
4134 * xe_vm_get_property_ioctl, so just report the number of
4135 * faults the user requests if it's less than or equal to
4136 * the number of faults in the VM fault array.
4137 *
4138 * We should also at least assert that the args->size value
4139 * is a multiple of the xe_vm_fault struct size.
4140 */
4141 if (args->size > size || args->size % sizeof(struct xe_vm_fault))
4142 return -EINVAL;
4143
4144 return fill_faults(vm, args);
4145 }
4146 return -EINVAL;
4147 }
4148
xe_vm_get_property_ioctl(struct drm_device * drm,void * data,struct drm_file * file)4149 int xe_vm_get_property_ioctl(struct drm_device *drm, void *data,
4150 struct drm_file *file)
4151 {
4152 struct xe_device *xe = to_xe_device(drm);
4153 struct xe_file *xef = to_xe_file(file);
4154 struct drm_xe_vm_get_property *args = data;
4155 struct xe_vm *vm;
4156 int ret = 0;
4157
4158 if (XE_IOCTL_DBG(xe, (args->reserved[0] || args->reserved[1] ||
4159 args->reserved[2])))
4160 return -EINVAL;
4161
4162 vm = xe_vm_lookup(xef, args->vm_id);
4163 if (XE_IOCTL_DBG(xe, !vm))
4164 return -ENOENT;
4165
4166 ret = xe_vm_get_property_helper(vm, args);
4167
4168 xe_vm_put(vm);
4169 return ret;
4170 }
4171
4172 /**
4173 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM
4174 * @vm: VM to bind the BO to
4175 * @bo: BO to bind
4176 * @q: exec queue to use for the bind (optional)
4177 * @addr: address at which to bind the BO
4178 * @cache_lvl: PAT cache level to use
4179 *
4180 * Execute a VM bind map operation on a kernel-owned BO to bind it into a
4181 * kernel-owned VM.
4182 *
4183 * Returns a dma_fence to track the binding completion if the job to do so was
4184 * successfully submitted, an error pointer otherwise.
4185 */
xe_vm_bind_kernel_bo(struct xe_vm * vm,struct xe_bo * bo,struct xe_exec_queue * q,u64 addr,enum xe_cache_level cache_lvl)4186 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
4187 struct xe_exec_queue *q, u64 addr,
4188 enum xe_cache_level cache_lvl)
4189 {
4190 struct xe_vma_ops vops;
4191 struct drm_gpuva_ops *ops = NULL;
4192 struct dma_fence *fence;
4193 int err;
4194
4195 xe_bo_get(bo);
4196 xe_vm_get(vm);
4197 if (q)
4198 xe_exec_queue_get(q);
4199
4200 down_write(&vm->lock);
4201
4202 xe_vma_ops_init(&vops, vm, q, NULL, 0);
4203
4204 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo),
4205 DRM_XE_VM_BIND_OP_MAP, 0, 0,
4206 vm->xe->pat.idx[cache_lvl]);
4207 if (IS_ERR(ops)) {
4208 err = PTR_ERR(ops);
4209 goto release_vm_lock;
4210 }
4211
4212 err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
4213 if (err)
4214 goto release_vm_lock;
4215
4216 xe_assert(vm->xe, !list_empty(&vops.list));
4217
4218 err = xe_vma_ops_alloc(&vops, false);
4219 if (err)
4220 goto unwind_ops;
4221
4222 fence = vm_bind_ioctl_ops_execute(vm, &vops);
4223 if (IS_ERR(fence))
4224 err = PTR_ERR(fence);
4225
4226 unwind_ops:
4227 if (err && err != -ENODATA)
4228 vm_bind_ioctl_ops_unwind(vm, &ops, 1);
4229
4230 xe_vma_ops_fini(&vops);
4231 drm_gpuva_ops_free(&vm->gpuvm, ops);
4232
4233 release_vm_lock:
4234 up_write(&vm->lock);
4235
4236 if (q)
4237 xe_exec_queue_put(q);
4238 xe_vm_put(vm);
4239 xe_bo_put(bo);
4240
4241 if (err)
4242 fence = ERR_PTR(err);
4243
4244 return fence;
4245 }
4246
4247 /**
4248 * xe_vm_lock() - Lock the vm's dma_resv object
4249 * @vm: The struct xe_vm whose lock is to be locked
4250 * @intr: Whether to perform any wait interruptible
4251 *
4252 * Return: 0 on success, -EINTR if @intr is true and the wait for a
4253 * contended lock was interrupted. If @intr is false, the function
4254 * always returns 0.
4255 */
xe_vm_lock(struct xe_vm * vm,bool intr)4256 int xe_vm_lock(struct xe_vm *vm, bool intr)
4257 {
4258 int ret;
4259
4260 if (intr)
4261 ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
4262 else
4263 ret = dma_resv_lock(xe_vm_resv(vm), NULL);
4264
4265 return ret;
4266 }
4267
4268 /**
4269 * xe_vm_unlock() - Unlock the vm's dma_resv object
4270 * @vm: The struct xe_vm whose lock is to be released.
4271 *
4272 * Unlock a buffer object lock that was locked by xe_vm_lock().
4273 */
xe_vm_unlock(struct xe_vm * vm)4274 void xe_vm_unlock(struct xe_vm *vm)
4275 {
4276 dma_resv_unlock(xe_vm_resv(vm));
4277 }
4278
4279 /**
4280 * xe_vm_invalidate_vma_submit - Submit a job to invalidate GPU mappings for
4281 * VMA.
4282 * @vma: VMA to invalidate
4283 * @batch: TLB invalidation batch to populate; caller must later call
4284 * xe_tlb_inval_batch_wait() on it to wait for completion
4285 *
4286 * Walks a list of page tables leaves which it memset the entries owned by this
4287 * VMA to zero, invalidates the TLBs, but doesn't block waiting for TLB flush
4288 * to complete, but instead populates @batch which can be waited on using
4289 * xe_tlb_inval_batch_wait().
4290 *
4291 * Returns 0 for success, negative error code otherwise.
4292 */
xe_vm_invalidate_vma_submit(struct xe_vma * vma,struct xe_tlb_inval_batch * batch)4293 int xe_vm_invalidate_vma_submit(struct xe_vma *vma, struct xe_tlb_inval_batch *batch)
4294 {
4295 struct xe_device *xe = xe_vma_vm(vma)->xe;
4296 struct xe_vm *vm = xe_vma_vm(vma);
4297 struct xe_tile *tile;
4298 u8 tile_mask = 0;
4299 int ret = 0;
4300 u8 id;
4301
4302 xe_assert(xe, !xe_vma_is_null(vma));
4303 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma));
4304 trace_xe_vma_invalidate(vma);
4305
4306 vm_dbg(&vm->xe->drm,
4307 "INVALIDATE: addr=0x%016llx, range=0x%016llx",
4308 xe_vma_start(vma), xe_vma_size(vma));
4309
4310 /*
4311 * Check that we don't race with page-table updates, tile_invalidated
4312 * update is safe
4313 */
4314 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
4315 if (xe_vma_is_userptr(vma)) {
4316 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) ||
4317 (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) &&
4318 lockdep_is_held(&xe_vm_resv(vm)->lock.base)));
4319
4320 WARN_ON_ONCE(!mmu_interval_check_retry
4321 (&to_userptr_vma(vma)->userptr.notifier,
4322 to_userptr_vma(vma)->userptr.pages.notifier_seq));
4323 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm),
4324 DMA_RESV_USAGE_BOOKKEEP));
4325
4326 } else {
4327 xe_bo_assert_held(xe_vma_bo(vma));
4328 }
4329 }
4330
4331 for_each_tile(tile, xe, id)
4332 if (xe_pt_zap_ptes(tile, vma))
4333 tile_mask |= BIT(id);
4334
4335 xe_device_wmb(xe);
4336
4337 ret = xe_tlb_inval_range_tilemask_submit(xe, xe_vma_vm(vma)->usm.asid,
4338 xe_vma_start(vma), xe_vma_end(vma),
4339 tile_mask, batch);
4340
4341 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
4342 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask);
4343 return ret;
4344 }
4345
4346 /**
4347 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
4348 * @vma: VMA to invalidate
4349 *
4350 * Walks a list of page tables leaves which it memset the entries owned by this
4351 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
4352 * complete.
4353 *
4354 * Returns 0 for success, negative error code otherwise.
4355 */
xe_vm_invalidate_vma(struct xe_vma * vma)4356 int xe_vm_invalidate_vma(struct xe_vma *vma)
4357 {
4358 struct xe_tlb_inval_batch batch;
4359 int ret;
4360
4361 ret = xe_vm_invalidate_vma_submit(vma, &batch);
4362 if (ret)
4363 return ret;
4364
4365 xe_tlb_inval_batch_wait(&batch);
4366 return ret;
4367 }
4368
xe_vm_validate_protected(struct xe_vm * vm)4369 int xe_vm_validate_protected(struct xe_vm *vm)
4370 {
4371 struct drm_gpuva *gpuva;
4372 int err = 0;
4373
4374 if (!vm)
4375 return -ENODEV;
4376
4377 mutex_lock(&vm->snap_mutex);
4378
4379 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4380 struct xe_vma *vma = gpuva_to_vma(gpuva);
4381 struct xe_bo *bo = vma->gpuva.gem.obj ?
4382 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4383
4384 if (!bo)
4385 continue;
4386
4387 if (xe_bo_is_protected(bo)) {
4388 err = xe_pxp_bo_key_check(vm->xe->pxp, bo);
4389 if (err)
4390 break;
4391 }
4392 }
4393
4394 mutex_unlock(&vm->snap_mutex);
4395 return err;
4396 }
4397
4398 struct xe_vm_snapshot {
4399 int uapi_flags;
4400 unsigned long num_snaps;
4401 struct {
4402 u64 ofs, bo_ofs;
4403 unsigned long len;
4404 #define XE_VM_SNAP_FLAG_USERPTR BIT(0)
4405 #define XE_VM_SNAP_FLAG_READ_ONLY BIT(1)
4406 #define XE_VM_SNAP_FLAG_IS_NULL BIT(2)
4407 unsigned long flags;
4408 int uapi_mem_region;
4409 int pat_index;
4410 int cpu_caching;
4411 struct xe_bo *bo;
4412 void *data;
4413 struct mm_struct *mm;
4414 } snap[];
4415 };
4416
xe_vm_snapshot_capture(struct xe_vm * vm)4417 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
4418 {
4419 unsigned long num_snaps = 0, i;
4420 struct xe_vm_snapshot *snap = NULL;
4421 struct drm_gpuva *gpuva;
4422
4423 if (!vm)
4424 return NULL;
4425
4426 mutex_lock(&vm->snap_mutex);
4427 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4428 if (gpuva->flags & XE_VMA_DUMPABLE)
4429 num_snaps++;
4430 }
4431
4432 if (num_snaps)
4433 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
4434 if (!snap) {
4435 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
4436 goto out_unlock;
4437 }
4438
4439 if (vm->flags & XE_VM_FLAG_FAULT_MODE)
4440 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_FAULT_MODE;
4441 if (vm->flags & XE_VM_FLAG_LR_MODE)
4442 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_LR_MODE;
4443 if (vm->flags & XE_VM_FLAG_SCRATCH_PAGE)
4444 snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
4445
4446 snap->num_snaps = num_snaps;
4447 i = 0;
4448 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4449 struct xe_vma *vma = gpuva_to_vma(gpuva);
4450 struct xe_bo *bo = vma->gpuva.gem.obj ?
4451 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4452
4453 if (!(gpuva->flags & XE_VMA_DUMPABLE))
4454 continue;
4455
4456 snap->snap[i].ofs = xe_vma_start(vma);
4457 snap->snap[i].len = xe_vma_size(vma);
4458 snap->snap[i].flags = xe_vma_read_only(vma) ?
4459 XE_VM_SNAP_FLAG_READ_ONLY : 0;
4460 snap->snap[i].pat_index = vma->attr.pat_index;
4461 if (bo) {
4462 snap->snap[i].cpu_caching = bo->cpu_caching;
4463 snap->snap[i].bo = xe_bo_get(bo);
4464 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
4465 switch (bo->ttm.resource->mem_type) {
4466 case XE_PL_SYSTEM:
4467 case XE_PL_TT:
4468 snap->snap[i].uapi_mem_region = 0;
4469 break;
4470 case XE_PL_VRAM0:
4471 snap->snap[i].uapi_mem_region = 1;
4472 break;
4473 case XE_PL_VRAM1:
4474 snap->snap[i].uapi_mem_region = 2;
4475 break;
4476 }
4477 } else if (xe_vma_is_userptr(vma)) {
4478 struct mm_struct *mm =
4479 to_userptr_vma(vma)->userptr.notifier.mm;
4480
4481 if (mmget_not_zero(mm))
4482 snap->snap[i].mm = mm;
4483 else
4484 snap->snap[i].data = ERR_PTR(-EFAULT);
4485
4486 snap->snap[i].bo_ofs = xe_vma_userptr(vma);
4487 snap->snap[i].flags |= XE_VM_SNAP_FLAG_USERPTR;
4488 snap->snap[i].uapi_mem_region = 0;
4489 } else if (xe_vma_is_null(vma)) {
4490 snap->snap[i].flags |= XE_VM_SNAP_FLAG_IS_NULL;
4491 snap->snap[i].uapi_mem_region = -1;
4492 } else {
4493 snap->snap[i].data = ERR_PTR(-ENOENT);
4494 snap->snap[i].uapi_mem_region = -1;
4495 }
4496 i++;
4497 }
4498
4499 out_unlock:
4500 mutex_unlock(&vm->snap_mutex);
4501 return snap;
4502 }
4503
xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot * snap)4504 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
4505 {
4506 if (IS_ERR_OR_NULL(snap))
4507 return;
4508
4509 for (int i = 0; i < snap->num_snaps; i++) {
4510 struct xe_bo *bo = snap->snap[i].bo;
4511 int err;
4512
4513 if (IS_ERR(snap->snap[i].data) ||
4514 snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL)
4515 continue;
4516
4517 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
4518 if (!snap->snap[i].data) {
4519 snap->snap[i].data = ERR_PTR(-ENOMEM);
4520 goto cleanup_bo;
4521 }
4522
4523 if (bo) {
4524 err = xe_bo_read(bo, snap->snap[i].bo_ofs,
4525 snap->snap[i].data, snap->snap[i].len);
4526 } else {
4527 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
4528
4529 kthread_use_mm(snap->snap[i].mm);
4530 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
4531 err = 0;
4532 else
4533 err = -EFAULT;
4534 kthread_unuse_mm(snap->snap[i].mm);
4535
4536 mmput(snap->snap[i].mm);
4537 snap->snap[i].mm = NULL;
4538 }
4539
4540 if (err) {
4541 kvfree(snap->snap[i].data);
4542 snap->snap[i].data = ERR_PTR(err);
4543 }
4544
4545 cleanup_bo:
4546 xe_bo_put(bo);
4547 snap->snap[i].bo = NULL;
4548 }
4549 }
4550
xe_vm_snapshot_print(struct xe_vm_snapshot * snap,struct drm_printer * p)4551 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
4552 {
4553 unsigned long i, j;
4554
4555 if (IS_ERR_OR_NULL(snap)) {
4556 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
4557 return;
4558 }
4559
4560 drm_printf(p, "VM.uapi_flags: 0x%x\n", snap->uapi_flags);
4561 for (i = 0; i < snap->num_snaps; i++) {
4562 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
4563
4564 drm_printf(p, "[%llx].properties: %s|%s|mem_region=0x%lx|pat_index=%d|cpu_caching=%d\n",
4565 snap->snap[i].ofs,
4566 snap->snap[i].flags & XE_VM_SNAP_FLAG_READ_ONLY ?
4567 "read_only" : "read_write",
4568 snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL ?
4569 "null_sparse" :
4570 snap->snap[i].flags & XE_VM_SNAP_FLAG_USERPTR ?
4571 "userptr" : "bo",
4572 snap->snap[i].uapi_mem_region == -1 ? 0 :
4573 BIT(snap->snap[i].uapi_mem_region),
4574 snap->snap[i].pat_index,
4575 snap->snap[i].cpu_caching);
4576
4577 if (IS_ERR(snap->snap[i].data)) {
4578 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
4579 PTR_ERR(snap->snap[i].data));
4580 continue;
4581 }
4582
4583 if (snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL)
4584 continue;
4585
4586 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
4587
4588 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
4589 u32 *val = snap->snap[i].data + j;
4590 char dumped[ASCII85_BUFSZ];
4591
4592 drm_puts(p, ascii85_encode(*val, dumped));
4593 }
4594
4595 drm_puts(p, "\n");
4596
4597 if (drm_coredump_printer_is_full(p))
4598 return;
4599 }
4600 }
4601
xe_vm_snapshot_free(struct xe_vm_snapshot * snap)4602 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
4603 {
4604 unsigned long i;
4605
4606 if (IS_ERR_OR_NULL(snap))
4607 return;
4608
4609 for (i = 0; i < snap->num_snaps; i++) {
4610 if (!IS_ERR(snap->snap[i].data))
4611 kvfree(snap->snap[i].data);
4612 xe_bo_put(snap->snap[i].bo);
4613 if (snap->snap[i].mm)
4614 mmput(snap->snap[i].mm);
4615 }
4616 kvfree(snap);
4617 }
4618
4619 /**
4620 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations
4621 * @xe: Pointer to the Xe device structure
4622 * @vma: Pointer to the virtual memory area (VMA) structure
4623 * @is_atomic: In pagefault path and atomic operation
4624 *
4625 * This function determines whether the given VMA needs to be migrated to
4626 * VRAM in order to do atomic GPU operation.
4627 *
4628 * Return:
4629 * 1 - Migration to VRAM is required
4630 * 0 - Migration is not required
4631 * -EACCES - Invalid access for atomic memory attr
4632 *
4633 */
xe_vma_need_vram_for_atomic(struct xe_device * xe,struct xe_vma * vma,bool is_atomic)4634 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic)
4635 {
4636 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access :
4637 vma->attr.atomic_access;
4638
4639 if (!IS_DGFX(xe) || !is_atomic)
4640 return false;
4641
4642 /*
4643 * NOTE: The checks implemented here are platform-specific. For
4644 * instance, on a device supporting CXL atomics, these would ideally
4645 * work universally without additional handling.
4646 */
4647 switch (atomic_access) {
4648 case DRM_XE_ATOMIC_DEVICE:
4649 return !xe->info.has_device_atomics_on_smem;
4650
4651 case DRM_XE_ATOMIC_CPU:
4652 return -EACCES;
4653
4654 case DRM_XE_ATOMIC_UNDEFINED:
4655 case DRM_XE_ATOMIC_GLOBAL:
4656 default:
4657 return 1;
4658 }
4659 }
4660
xe_vm_alloc_vma(struct xe_vm * vm,struct drm_gpuvm_map_req * map_req,bool is_madvise)4661 static int xe_vm_alloc_vma(struct xe_vm *vm,
4662 struct drm_gpuvm_map_req *map_req,
4663 bool is_madvise)
4664 {
4665 struct xe_vma_ops vops;
4666 struct drm_gpuva_ops *ops = NULL;
4667 struct drm_gpuva_op *__op;
4668 unsigned int vma_flags = 0;
4669 bool remap_op = false;
4670 struct xe_vma_mem_attr tmp_attr = {};
4671 u16 default_pat;
4672 int err;
4673
4674 lockdep_assert_held_write(&vm->lock);
4675
4676 if (is_madvise)
4677 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req);
4678 else
4679 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req);
4680
4681 if (IS_ERR(ops))
4682 return PTR_ERR(ops);
4683
4684 if (list_empty(&ops->list)) {
4685 err = 0;
4686 goto free_ops;
4687 }
4688
4689 drm_gpuva_for_each_op(__op, ops) {
4690 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
4691 struct xe_vma *vma = NULL;
4692
4693 if (!is_madvise) {
4694 if (__op->op == DRM_GPUVA_OP_UNMAP) {
4695 vma = gpuva_to_vma(op->base.unmap.va);
4696 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma));
4697 default_pat = vma->attr.default_pat_index;
4698 vma_flags = vma->gpuva.flags;
4699 }
4700
4701 if (__op->op == DRM_GPUVA_OP_REMAP) {
4702 vma = gpuva_to_vma(op->base.remap.unmap->va);
4703 default_pat = vma->attr.default_pat_index;
4704 vma_flags = vma->gpuva.flags;
4705 }
4706
4707 if (__op->op == DRM_GPUVA_OP_MAP) {
4708 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK;
4709 op->map.pat_index = default_pat;
4710 }
4711 } else {
4712 if (__op->op == DRM_GPUVA_OP_REMAP) {
4713 vma = gpuva_to_vma(op->base.remap.unmap->va);
4714 xe_assert(vm->xe, !remap_op);
4715 xe_assert(vm->xe, xe_vma_has_no_bo(vma));
4716 remap_op = true;
4717 vma_flags = vma->gpuva.flags;
4718 }
4719
4720 if (__op->op == DRM_GPUVA_OP_MAP) {
4721 xe_assert(vm->xe, remap_op);
4722 remap_op = false;
4723 /*
4724 * In case of madvise ops DRM_GPUVA_OP_MAP is
4725 * always after DRM_GPUVA_OP_REMAP, so ensure
4726 * to propagate the flags from the vma we're
4727 * unmapping.
4728 */
4729 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK;
4730 }
4731 }
4732 print_op(vm->xe, __op);
4733 }
4734
4735 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
4736
4737 if (is_madvise)
4738 vops.flags |= XE_VMA_OPS_FLAG_MADVISE;
4739 else
4740 vops.flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP;
4741
4742 err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
4743 if (err)
4744 goto unwind_ops;
4745
4746 xe_vm_lock(vm, false);
4747
4748 drm_gpuva_for_each_op(__op, ops) {
4749 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
4750 struct xe_vma *vma;
4751
4752 if (__op->op == DRM_GPUVA_OP_UNMAP) {
4753 vma = gpuva_to_vma(op->base.unmap.va);
4754 /* There should be no unmap for madvise */
4755 if (is_madvise)
4756 XE_WARN_ON("UNEXPECTED UNMAP");
4757
4758 xe_vma_destroy(vma, NULL);
4759 } else if (__op->op == DRM_GPUVA_OP_REMAP) {
4760 vma = gpuva_to_vma(op->base.remap.unmap->va);
4761 /* In case of madvise ops Store attributes for REMAP UNMAPPED
4762 * VMA, so they can be assigned to newly MAP created vma.
4763 */
4764 if (is_madvise)
4765 xe_vma_mem_attr_copy(&tmp_attr, &vma->attr);
4766
4767 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL);
4768 } else if (__op->op == DRM_GPUVA_OP_MAP) {
4769 vma = op->map.vma;
4770 /* In case of madvise call, MAP will always be followed by REMAP.
4771 * Therefore temp_attr will always have sane values, making it safe to
4772 * copy them to new vma.
4773 */
4774 if (is_madvise)
4775 xe_vma_mem_attr_copy(&vma->attr, &tmp_attr);
4776 }
4777 }
4778
4779 xe_vm_unlock(vm);
4780 drm_gpuva_ops_free(&vm->gpuvm, ops);
4781 xe_vma_mem_attr_fini(&tmp_attr);
4782 return 0;
4783
4784 unwind_ops:
4785 vm_bind_ioctl_ops_unwind(vm, &ops, 1);
4786 free_ops:
4787 drm_gpuva_ops_free(&vm->gpuvm, ops);
4788 return err;
4789 }
4790
4791 /**
4792 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops
4793 * @vm: Pointer to the xe_vm structure
4794 * @start: Starting input address
4795 * @range: Size of the input range
4796 *
4797 * This function splits existing vma to create new vma for user provided input range
4798 *
4799 * Return: 0 if success
4800 */
xe_vm_alloc_madvise_vma(struct xe_vm * vm,uint64_t start,uint64_t range)4801 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4802 {
4803 struct drm_gpuvm_map_req map_req = {
4804 .map.va.addr = start,
4805 .map.va.range = range,
4806 };
4807
4808 lockdep_assert_held_write(&vm->lock);
4809
4810 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range);
4811
4812 return xe_vm_alloc_vma(vm, &map_req, true);
4813 }
4814
is_cpu_addr_vma_with_default_attr(struct xe_vma * vma)4815 static bool is_cpu_addr_vma_with_default_attr(struct xe_vma *vma)
4816 {
4817 return vma && xe_vma_is_cpu_addr_mirror(vma) &&
4818 xe_vma_has_default_mem_attrs(vma);
4819 }
4820
4821 /**
4822 * xe_vm_find_cpu_addr_mirror_vma_range - Extend a VMA range to include adjacent CPU-mirrored VMAs
4823 * @vm: VM to search within
4824 * @start: Input/output pointer to the starting address of the range
4825 * @end: Input/output pointer to the end address of the range
4826 *
4827 * Given a range defined by @start and @range, this function checks the VMAs
4828 * immediately before and after the range. If those neighboring VMAs are
4829 * CPU-address-mirrored and have default memory attributes, the function
4830 * updates @start and @range to include them. This extended range can then
4831 * be used for merging or other operations that require a unified VMA.
4832 *
4833 * The function does not perform the merge itself; it only computes the
4834 * mergeable boundaries.
4835 */
xe_vm_find_cpu_addr_mirror_vma_range(struct xe_vm * vm,u64 * start,u64 * end)4836 void xe_vm_find_cpu_addr_mirror_vma_range(struct xe_vm *vm, u64 *start, u64 *end)
4837 {
4838 struct xe_vma *prev, *next;
4839
4840 lockdep_assert_held(&vm->lock);
4841
4842 if (*start >= SZ_4K) {
4843 prev = xe_vm_find_vma_by_addr(vm, *start - SZ_4K);
4844 if (is_cpu_addr_vma_with_default_attr(prev))
4845 *start = xe_vma_start(prev);
4846 }
4847
4848 if (*end < vm->size) {
4849 next = xe_vm_find_vma_by_addr(vm, *end + 1);
4850 if (is_cpu_addr_vma_with_default_attr(next))
4851 *end = xe_vma_end(next);
4852 }
4853 }
4854
4855 /**
4856 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma
4857 * @vm: Pointer to the xe_vm structure
4858 * @start: Starting input address
4859 * @range: Size of the input range
4860 *
4861 * This function splits/merges existing vma to create new vma for user provided input range
4862 *
4863 * Return: 0 if success
4864 */
xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm * vm,uint64_t start,uint64_t range)4865 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4866 {
4867 struct drm_gpuvm_map_req map_req = {
4868 .map.va.addr = start,
4869 .map.va.range = range,
4870 };
4871
4872 lockdep_assert_held_write(&vm->lock);
4873
4874 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx",
4875 start, range);
4876
4877 return xe_vm_alloc_vma(vm, &map_req, false);
4878 }
4879
4880 /**
4881 * xe_vm_add_exec_queue() - Add exec queue to VM
4882 * @vm: The VM.
4883 * @q: The exec_queue
4884 *
4885 * Add exec queue to VM, skipped if the device does not have context based TLB
4886 * invalidations.
4887 */
xe_vm_add_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)4888 void xe_vm_add_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
4889 {
4890 struct xe_device *xe = vm->xe;
4891
4892 /* User VMs and queues only */
4893 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
4894 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
4895 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM));
4896 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_MIGRATE));
4897 xe_assert(xe, vm->xef);
4898 xe_assert(xe, vm == q->vm);
4899
4900 if (!xe->info.has_ctx_tlb_inval)
4901 return;
4902
4903 down_write(&vm->exec_queues.lock);
4904 list_add(&q->vm_exec_queue_link, &vm->exec_queues.list[q->gt->info.id]);
4905 ++vm->exec_queues.count[q->gt->info.id];
4906 up_write(&vm->exec_queues.lock);
4907 }
4908
4909 /**
4910 * xe_vm_remove_exec_queue() - Remove exec queue from VM
4911 * @vm: The VM.
4912 * @q: The exec_queue
4913 *
4914 * Remove exec queue from VM, skipped if the device does not have context based
4915 * TLB invalidations.
4916 */
xe_vm_remove_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)4917 void xe_vm_remove_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
4918 {
4919 if (!vm->xe->info.has_ctx_tlb_inval)
4920 return;
4921
4922 down_write(&vm->exec_queues.lock);
4923 if (!list_empty(&q->vm_exec_queue_link)) {
4924 list_del(&q->vm_exec_queue_link);
4925 --vm->exec_queues.count[q->gt->info.id];
4926 }
4927 up_write(&vm->exec_queues.lock);
4928 }
4929