1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10
11 #include <drm/drm_drv.h>
12 #include <drm/drm_exec.h>
13 #include <drm/drm_print.h>
14 #include <drm/ttm/ttm_tt.h>
15 #include <uapi/drm/xe_drm.h>
16 #include <linux/ascii85.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/mm.h>
20 #include <linux/swap.h>
21
22 #include <generated/xe_wa_oob.h>
23
24 #include "regs/xe_gtt_defs.h"
25 #include "xe_assert.h"
26 #include "xe_bo.h"
27 #include "xe_device.h"
28 #include "xe_drm_client.h"
29 #include "xe_exec_queue.h"
30 #include "xe_gt_pagefault.h"
31 #include "xe_migrate.h"
32 #include "xe_pat.h"
33 #include "xe_pm.h"
34 #include "xe_preempt_fence.h"
35 #include "xe_pt.h"
36 #include "xe_pxp.h"
37 #include "xe_res_cursor.h"
38 #include "xe_svm.h"
39 #include "xe_sync.h"
40 #include "xe_tile.h"
41 #include "xe_tlb_inval.h"
42 #include "xe_trace_bo.h"
43 #include "xe_wa.h"
44
xe_vm_obj(struct xe_vm * vm)45 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
46 {
47 return vm->gpuvm.r_obj;
48 }
49
50 /**
51 * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction
52 * @vm: The vm whose resv is to be locked.
53 * @exec: The drm_exec transaction.
54 *
55 * Helper to lock the vm's resv as part of a drm_exec transaction.
56 *
57 * Return: %0 on success. See drm_exec_lock_obj() for error codes.
58 */
xe_vm_drm_exec_lock(struct xe_vm * vm,struct drm_exec * exec)59 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec)
60 {
61 return drm_exec_lock_obj(exec, xe_vm_obj(vm));
62 }
63
preempt_fences_waiting(struct xe_vm * vm)64 static bool preempt_fences_waiting(struct xe_vm *vm)
65 {
66 struct xe_exec_queue *q;
67
68 lockdep_assert_held(&vm->lock);
69 xe_vm_assert_held(vm);
70
71 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
72 if (!q->lr.pfence ||
73 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
74 &q->lr.pfence->flags)) {
75 return true;
76 }
77 }
78
79 return false;
80 }
81
free_preempt_fences(struct list_head * list)82 static void free_preempt_fences(struct list_head *list)
83 {
84 struct list_head *link, *next;
85
86 list_for_each_safe(link, next, list)
87 xe_preempt_fence_free(to_preempt_fence_from_link(link));
88 }
89
alloc_preempt_fences(struct xe_vm * vm,struct list_head * list,unsigned int * count)90 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
91 unsigned int *count)
92 {
93 lockdep_assert_held(&vm->lock);
94 xe_vm_assert_held(vm);
95
96 if (*count >= vm->preempt.num_exec_queues)
97 return 0;
98
99 for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
100 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
101
102 if (IS_ERR(pfence))
103 return PTR_ERR(pfence);
104
105 list_move_tail(xe_preempt_fence_link(pfence), list);
106 }
107
108 return 0;
109 }
110
wait_for_existing_preempt_fences(struct xe_vm * vm)111 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
112 {
113 struct xe_exec_queue *q;
114
115 xe_vm_assert_held(vm);
116
117 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
118 if (q->lr.pfence) {
119 long timeout = dma_fence_wait(q->lr.pfence, false);
120
121 /* Only -ETIME on fence indicates VM needs to be killed */
122 if (timeout < 0 || q->lr.pfence->error == -ETIME)
123 return -ETIME;
124
125 dma_fence_put(q->lr.pfence);
126 q->lr.pfence = NULL;
127 }
128 }
129
130 return 0;
131 }
132
xe_vm_is_idle(struct xe_vm * vm)133 static bool xe_vm_is_idle(struct xe_vm *vm)
134 {
135 struct xe_exec_queue *q;
136
137 xe_vm_assert_held(vm);
138 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
139 if (!xe_exec_queue_is_idle(q))
140 return false;
141 }
142
143 return true;
144 }
145
arm_preempt_fences(struct xe_vm * vm,struct list_head * list)146 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
147 {
148 struct list_head *link;
149 struct xe_exec_queue *q;
150
151 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
152 struct dma_fence *fence;
153
154 link = list->next;
155 xe_assert(vm->xe, link != list);
156
157 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
158 q, q->lr.context,
159 ++q->lr.seqno);
160 dma_fence_put(q->lr.pfence);
161 q->lr.pfence = fence;
162 }
163 }
164
add_preempt_fences(struct xe_vm * vm,struct xe_bo * bo)165 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
166 {
167 struct xe_exec_queue *q;
168 int err;
169
170 xe_bo_assert_held(bo);
171
172 if (!vm->preempt.num_exec_queues)
173 return 0;
174
175 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
176 if (err)
177 return err;
178
179 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
180 if (q->lr.pfence) {
181 dma_resv_add_fence(bo->ttm.base.resv,
182 q->lr.pfence,
183 DMA_RESV_USAGE_BOOKKEEP);
184 }
185
186 return 0;
187 }
188
resume_and_reinstall_preempt_fences(struct xe_vm * vm,struct drm_exec * exec)189 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
190 struct drm_exec *exec)
191 {
192 struct xe_exec_queue *q;
193
194 lockdep_assert_held(&vm->lock);
195 xe_vm_assert_held(vm);
196
197 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
198 q->ops->resume(q);
199
200 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
201 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
202 }
203 }
204
xe_vm_add_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)205 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
206 {
207 struct drm_gpuvm_exec vm_exec = {
208 .vm = &vm->gpuvm,
209 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
210 .num_fences = 1,
211 };
212 struct drm_exec *exec = &vm_exec.exec;
213 struct xe_validation_ctx ctx;
214 struct dma_fence *pfence;
215 int err;
216 bool wait;
217
218 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
219
220 down_write(&vm->lock);
221 err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val);
222 if (err)
223 goto out_up_write;
224
225 pfence = xe_preempt_fence_create(q, q->lr.context,
226 ++q->lr.seqno);
227 if (IS_ERR(pfence)) {
228 err = PTR_ERR(pfence);
229 goto out_fini;
230 }
231
232 list_add(&q->lr.link, &vm->preempt.exec_queues);
233 ++vm->preempt.num_exec_queues;
234 q->lr.pfence = pfence;
235
236 xe_svm_notifier_lock(vm);
237
238 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
239 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
240
241 /*
242 * Check to see if a preemption on VM is in flight or userptr
243 * invalidation, if so trigger this preempt fence to sync state with
244 * other preempt fences on the VM.
245 */
246 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
247 if (wait)
248 dma_fence_enable_sw_signaling(pfence);
249
250 xe_svm_notifier_unlock(vm);
251
252 out_fini:
253 xe_validation_ctx_fini(&ctx);
254 out_up_write:
255 up_write(&vm->lock);
256
257 return err;
258 }
259 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO);
260
261 /**
262 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
263 * @vm: The VM.
264 * @q: The exec_queue
265 *
266 * Note that this function might be called multiple times on the same queue.
267 */
xe_vm_remove_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)268 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
269 {
270 if (!xe_vm_in_preempt_fence_mode(vm))
271 return;
272
273 down_write(&vm->lock);
274 if (!list_empty(&q->lr.link)) {
275 list_del_init(&q->lr.link);
276 --vm->preempt.num_exec_queues;
277 }
278 if (q->lr.pfence) {
279 dma_fence_enable_sw_signaling(q->lr.pfence);
280 dma_fence_put(q->lr.pfence);
281 q->lr.pfence = NULL;
282 }
283 up_write(&vm->lock);
284 }
285
286 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
287
288 /**
289 * xe_vm_kill() - VM Kill
290 * @vm: The VM.
291 * @unlocked: Flag indicates the VM's dma-resv is not held
292 *
293 * Kill the VM by setting banned flag indicated VM is no longer available for
294 * use. If in preempt fence mode, also kill all exec queue attached to the VM.
295 */
xe_vm_kill(struct xe_vm * vm,bool unlocked)296 void xe_vm_kill(struct xe_vm *vm, bool unlocked)
297 {
298 struct xe_exec_queue *q;
299
300 lockdep_assert_held(&vm->lock);
301
302 if (unlocked)
303 xe_vm_lock(vm, false);
304
305 vm->flags |= XE_VM_FLAG_BANNED;
306 trace_xe_vm_kill(vm);
307
308 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
309 q->ops->kill(q);
310
311 if (unlocked)
312 xe_vm_unlock(vm);
313
314 /* TODO: Inform user the VM is banned */
315 }
316
xe_gpuvm_validate(struct drm_gpuvm_bo * vm_bo,struct drm_exec * exec)317 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
318 {
319 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
320 struct drm_gpuva *gpuva;
321 int ret;
322
323 lockdep_assert_held(&vm->lock);
324 drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
325 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
326 &vm->rebind_list);
327
328 if (!try_wait_for_completion(&vm->xe->pm_block))
329 return -EAGAIN;
330
331 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec);
332 if (ret)
333 return ret;
334
335 vm_bo->evicted = false;
336 return 0;
337 }
338
339 /**
340 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
341 * @vm: The vm for which we are rebinding.
342 * @exec: The struct drm_exec with the locked GEM objects.
343 * @num_fences: The number of fences to reserve for the operation, not
344 * including rebinds and validations.
345 *
346 * Validates all evicted gem objects and rebinds their vmas. Note that
347 * rebindings may cause evictions and hence the validation-rebind
348 * sequence is rerun until there are no more objects to validate.
349 *
350 * Return: 0 on success, negative error code on error. In particular,
351 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
352 * the drm_exec transaction needs to be restarted.
353 */
xe_vm_validate_rebind(struct xe_vm * vm,struct drm_exec * exec,unsigned int num_fences)354 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
355 unsigned int num_fences)
356 {
357 struct drm_gem_object *obj;
358 unsigned long index;
359 int ret;
360
361 do {
362 ret = drm_gpuvm_validate(&vm->gpuvm, exec);
363 if (ret)
364 return ret;
365
366 ret = xe_vm_rebind(vm, false);
367 if (ret)
368 return ret;
369 } while (!list_empty(&vm->gpuvm.evict.list));
370
371 drm_exec_for_each_locked_object(exec, index, obj) {
372 ret = dma_resv_reserve_fences(obj->resv, num_fences);
373 if (ret)
374 return ret;
375 }
376
377 return 0;
378 }
379
xe_preempt_work_begin(struct drm_exec * exec,struct xe_vm * vm,bool * done)380 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
381 bool *done)
382 {
383 int err;
384
385 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
386 if (err)
387 return err;
388
389 if (xe_vm_is_idle(vm)) {
390 vm->preempt.rebind_deactivated = true;
391 *done = true;
392 return 0;
393 }
394
395 if (!preempt_fences_waiting(vm)) {
396 *done = true;
397 return 0;
398 }
399
400 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
401 if (err)
402 return err;
403
404 err = wait_for_existing_preempt_fences(vm);
405 if (err)
406 return err;
407
408 /*
409 * Add validation and rebinding to the locking loop since both can
410 * cause evictions which may require blocing dma_resv locks.
411 * The fence reservation here is intended for the new preempt fences
412 * we attach at the end of the rebind work.
413 */
414 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
415 }
416
vm_suspend_rebind_worker(struct xe_vm * vm)417 static bool vm_suspend_rebind_worker(struct xe_vm *vm)
418 {
419 struct xe_device *xe = vm->xe;
420 bool ret = false;
421
422 mutex_lock(&xe->rebind_resume_lock);
423 if (!try_wait_for_completion(&vm->xe->pm_block)) {
424 ret = true;
425 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list);
426 }
427 mutex_unlock(&xe->rebind_resume_lock);
428
429 return ret;
430 }
431
432 /**
433 * xe_vm_resume_rebind_worker() - Resume the rebind worker.
434 * @vm: The vm whose preempt worker to resume.
435 *
436 * Resume a preempt worker that was previously suspended by
437 * vm_suspend_rebind_worker().
438 */
xe_vm_resume_rebind_worker(struct xe_vm * vm)439 void xe_vm_resume_rebind_worker(struct xe_vm *vm)
440 {
441 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
442 }
443
preempt_rebind_work_func(struct work_struct * w)444 static void preempt_rebind_work_func(struct work_struct *w)
445 {
446 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
447 struct xe_validation_ctx ctx;
448 struct drm_exec exec;
449 unsigned int fence_count = 0;
450 LIST_HEAD(preempt_fences);
451 int err = 0;
452 long wait;
453 int __maybe_unused tries = 0;
454
455 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
456 trace_xe_vm_rebind_worker_enter(vm);
457
458 down_write(&vm->lock);
459
460 if (xe_vm_is_closed_or_banned(vm)) {
461 up_write(&vm->lock);
462 trace_xe_vm_rebind_worker_exit(vm);
463 return;
464 }
465
466 retry:
467 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) {
468 up_write(&vm->lock);
469 return;
470 }
471
472 if (xe_vm_userptr_check_repin(vm)) {
473 err = xe_vm_userptr_pin(vm);
474 if (err)
475 goto out_unlock_outer;
476 }
477
478 err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec,
479 (struct xe_val_flags) {.interruptible = true});
480 if (err)
481 goto out_unlock_outer;
482
483 drm_exec_until_all_locked(&exec) {
484 bool done = false;
485
486 err = xe_preempt_work_begin(&exec, vm, &done);
487 drm_exec_retry_on_contention(&exec);
488 xe_validation_retry_on_oom(&ctx, &err);
489 if (err || done) {
490 xe_validation_ctx_fini(&ctx);
491 goto out_unlock_outer;
492 }
493 }
494
495 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
496 if (err)
497 goto out_unlock;
498
499 xe_vm_set_validation_exec(vm, &exec);
500 err = xe_vm_rebind(vm, true);
501 xe_vm_set_validation_exec(vm, NULL);
502 if (err)
503 goto out_unlock;
504
505 /* Wait on rebinds and munmap style VM unbinds */
506 wait = dma_resv_wait_timeout(xe_vm_resv(vm),
507 DMA_RESV_USAGE_KERNEL,
508 false, MAX_SCHEDULE_TIMEOUT);
509 if (wait <= 0) {
510 err = -ETIME;
511 goto out_unlock;
512 }
513
514 #define retry_required(__tries, __vm) \
515 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
516 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
517 __xe_vm_userptr_needs_repin(__vm))
518
519 xe_svm_notifier_lock(vm);
520 if (retry_required(tries, vm)) {
521 xe_svm_notifier_unlock(vm);
522 err = -EAGAIN;
523 goto out_unlock;
524 }
525
526 #undef retry_required
527
528 spin_lock(&vm->xe->ttm.lru_lock);
529 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
530 spin_unlock(&vm->xe->ttm.lru_lock);
531
532 /* Point of no return. */
533 arm_preempt_fences(vm, &preempt_fences);
534 resume_and_reinstall_preempt_fences(vm, &exec);
535 xe_svm_notifier_unlock(vm);
536
537 out_unlock:
538 xe_validation_ctx_fini(&ctx);
539 out_unlock_outer:
540 if (err == -EAGAIN) {
541 trace_xe_vm_rebind_worker_retry(vm);
542 goto retry;
543 }
544
545 if (err) {
546 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
547 xe_vm_kill(vm, true);
548 }
549 up_write(&vm->lock);
550
551 free_preempt_fences(&preempt_fences);
552
553 trace_xe_vm_rebind_worker_exit(vm);
554 }
555
xe_vma_ops_alloc(struct xe_vma_ops * vops,bool array_of_binds)556 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
557 {
558 int i;
559
560 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
561 if (!vops->pt_update_ops[i].num_ops)
562 continue;
563
564 vops->pt_update_ops[i].ops =
565 kmalloc_array(vops->pt_update_ops[i].num_ops,
566 sizeof(*vops->pt_update_ops[i].ops),
567 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
568 if (!vops->pt_update_ops[i].ops)
569 return array_of_binds ? -ENOBUFS : -ENOMEM;
570 }
571
572 return 0;
573 }
574 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
575
xe_vma_svm_prefetch_op_fini(struct xe_vma_op * op)576 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op)
577 {
578 struct xe_vma *vma;
579
580 vma = gpuva_to_vma(op->base.prefetch.va);
581
582 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma))
583 xa_destroy(&op->prefetch_range.range);
584 }
585
xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops * vops)586 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops)
587 {
588 struct xe_vma_op *op;
589
590 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
591 return;
592
593 list_for_each_entry(op, &vops->list, link)
594 xe_vma_svm_prefetch_op_fini(op);
595 }
596
xe_vma_ops_fini(struct xe_vma_ops * vops)597 static void xe_vma_ops_fini(struct xe_vma_ops *vops)
598 {
599 int i;
600
601 xe_vma_svm_prefetch_ops_fini(vops);
602
603 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
604 kfree(vops->pt_update_ops[i].ops);
605 }
606
xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops * vops,u8 tile_mask,int inc_val)607 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val)
608 {
609 int i;
610
611 if (!inc_val)
612 return;
613
614 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
615 if (BIT(i) & tile_mask)
616 vops->pt_update_ops[i].num_ops += inc_val;
617 }
618
xe_vm_populate_rebind(struct xe_vma_op * op,struct xe_vma * vma,u8 tile_mask)619 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
620 u8 tile_mask)
621 {
622 INIT_LIST_HEAD(&op->link);
623 op->tile_mask = tile_mask;
624 op->base.op = DRM_GPUVA_OP_MAP;
625 op->base.map.va.addr = vma->gpuva.va.addr;
626 op->base.map.va.range = vma->gpuva.va.range;
627 op->base.map.gem.obj = vma->gpuva.gem.obj;
628 op->base.map.gem.offset = vma->gpuva.gem.offset;
629 op->map.vma = vma;
630 op->map.immediate = true;
631 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
632 op->map.is_null = xe_vma_is_null(vma);
633 }
634
xe_vm_ops_add_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,u8 tile_mask)635 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
636 u8 tile_mask)
637 {
638 struct xe_vma_op *op;
639
640 op = kzalloc(sizeof(*op), GFP_KERNEL);
641 if (!op)
642 return -ENOMEM;
643
644 xe_vm_populate_rebind(op, vma, tile_mask);
645 list_add_tail(&op->link, &vops->list);
646 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
647
648 return 0;
649 }
650
651 static struct dma_fence *ops_execute(struct xe_vm *vm,
652 struct xe_vma_ops *vops);
653 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
654 struct xe_exec_queue *q,
655 struct xe_sync_entry *syncs, u32 num_syncs);
656
xe_vm_rebind(struct xe_vm * vm,bool rebind_worker)657 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
658 {
659 struct dma_fence *fence;
660 struct xe_vma *vma, *next;
661 struct xe_vma_ops vops;
662 struct xe_vma_op *op, *next_op;
663 int err, i;
664
665 lockdep_assert_held(&vm->lock);
666 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
667 list_empty(&vm->rebind_list))
668 return 0;
669
670 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
671 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
672 vops.pt_update_ops[i].wait_vm_bookkeep = true;
673
674 xe_vm_assert_held(vm);
675 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
676 xe_assert(vm->xe, vma->tile_present);
677
678 if (rebind_worker)
679 trace_xe_vma_rebind_worker(vma);
680 else
681 trace_xe_vma_rebind_exec(vma);
682
683 err = xe_vm_ops_add_rebind(&vops, vma,
684 vma->tile_present);
685 if (err)
686 goto free_ops;
687 }
688
689 err = xe_vma_ops_alloc(&vops, false);
690 if (err)
691 goto free_ops;
692
693 fence = ops_execute(vm, &vops);
694 if (IS_ERR(fence)) {
695 err = PTR_ERR(fence);
696 } else {
697 dma_fence_put(fence);
698 list_for_each_entry_safe(vma, next, &vm->rebind_list,
699 combined_links.rebind)
700 list_del_init(&vma->combined_links.rebind);
701 }
702 free_ops:
703 list_for_each_entry_safe(op, next_op, &vops.list, link) {
704 list_del(&op->link);
705 kfree(op);
706 }
707 xe_vma_ops_fini(&vops);
708
709 return err;
710 }
711
xe_vma_rebind(struct xe_vm * vm,struct xe_vma * vma,u8 tile_mask)712 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
713 {
714 struct dma_fence *fence = NULL;
715 struct xe_vma_ops vops;
716 struct xe_vma_op *op, *next_op;
717 struct xe_tile *tile;
718 u8 id;
719 int err;
720
721 lockdep_assert_held(&vm->lock);
722 xe_vm_assert_held(vm);
723 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
724
725 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
726 for_each_tile(tile, vm->xe, id) {
727 vops.pt_update_ops[id].wait_vm_bookkeep = true;
728 vops.pt_update_ops[tile->id].q =
729 xe_migrate_exec_queue(tile->migrate);
730 }
731
732 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
733 if (err)
734 return ERR_PTR(err);
735
736 err = xe_vma_ops_alloc(&vops, false);
737 if (err) {
738 fence = ERR_PTR(err);
739 goto free_ops;
740 }
741
742 fence = ops_execute(vm, &vops);
743
744 free_ops:
745 list_for_each_entry_safe(op, next_op, &vops.list, link) {
746 list_del(&op->link);
747 kfree(op);
748 }
749 xe_vma_ops_fini(&vops);
750
751 return fence;
752 }
753
xe_vm_populate_range_rebind(struct xe_vma_op * op,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)754 static void xe_vm_populate_range_rebind(struct xe_vma_op *op,
755 struct xe_vma *vma,
756 struct xe_svm_range *range,
757 u8 tile_mask)
758 {
759 INIT_LIST_HEAD(&op->link);
760 op->tile_mask = tile_mask;
761 op->base.op = DRM_GPUVA_OP_DRIVER;
762 op->subop = XE_VMA_SUBOP_MAP_RANGE;
763 op->map_range.vma = vma;
764 op->map_range.range = range;
765 }
766
767 static int
xe_vm_ops_add_range_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)768 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops,
769 struct xe_vma *vma,
770 struct xe_svm_range *range,
771 u8 tile_mask)
772 {
773 struct xe_vma_op *op;
774
775 op = kzalloc(sizeof(*op), GFP_KERNEL);
776 if (!op)
777 return -ENOMEM;
778
779 xe_vm_populate_range_rebind(op, vma, range, tile_mask);
780 list_add_tail(&op->link, &vops->list);
781 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
782
783 return 0;
784 }
785
786 /**
787 * xe_vm_range_rebind() - VM range (re)bind
788 * @vm: The VM which the range belongs to.
789 * @vma: The VMA which the range belongs to.
790 * @range: SVM range to rebind.
791 * @tile_mask: Tile mask to bind the range to.
792 *
793 * (re)bind SVM range setting up GPU page tables for the range.
794 *
795 * Return: dma fence for rebind to signal completion on succees, ERR_PTR on
796 * failure
797 */
xe_vm_range_rebind(struct xe_vm * vm,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)798 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
799 struct xe_vma *vma,
800 struct xe_svm_range *range,
801 u8 tile_mask)
802 {
803 struct dma_fence *fence = NULL;
804 struct xe_vma_ops vops;
805 struct xe_vma_op *op, *next_op;
806 struct xe_tile *tile;
807 u8 id;
808 int err;
809
810 lockdep_assert_held(&vm->lock);
811 xe_vm_assert_held(vm);
812 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
813 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
814
815 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
816 for_each_tile(tile, vm->xe, id) {
817 vops.pt_update_ops[id].wait_vm_bookkeep = true;
818 vops.pt_update_ops[tile->id].q =
819 xe_migrate_exec_queue(tile->migrate);
820 }
821
822 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask);
823 if (err)
824 return ERR_PTR(err);
825
826 err = xe_vma_ops_alloc(&vops, false);
827 if (err) {
828 fence = ERR_PTR(err);
829 goto free_ops;
830 }
831
832 fence = ops_execute(vm, &vops);
833
834 free_ops:
835 list_for_each_entry_safe(op, next_op, &vops.list, link) {
836 list_del(&op->link);
837 kfree(op);
838 }
839 xe_vma_ops_fini(&vops);
840
841 return fence;
842 }
843
xe_vm_populate_range_unbind(struct xe_vma_op * op,struct xe_svm_range * range)844 static void xe_vm_populate_range_unbind(struct xe_vma_op *op,
845 struct xe_svm_range *range)
846 {
847 INIT_LIST_HEAD(&op->link);
848 op->tile_mask = range->tile_present;
849 op->base.op = DRM_GPUVA_OP_DRIVER;
850 op->subop = XE_VMA_SUBOP_UNMAP_RANGE;
851 op->unmap_range.range = range;
852 }
853
854 static int
xe_vm_ops_add_range_unbind(struct xe_vma_ops * vops,struct xe_svm_range * range)855 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops,
856 struct xe_svm_range *range)
857 {
858 struct xe_vma_op *op;
859
860 op = kzalloc(sizeof(*op), GFP_KERNEL);
861 if (!op)
862 return -ENOMEM;
863
864 xe_vm_populate_range_unbind(op, range);
865 list_add_tail(&op->link, &vops->list);
866 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1);
867
868 return 0;
869 }
870
871 /**
872 * xe_vm_range_unbind() - VM range unbind
873 * @vm: The VM which the range belongs to.
874 * @range: SVM range to rebind.
875 *
876 * Unbind SVM range removing the GPU page tables for the range.
877 *
878 * Return: dma fence for unbind to signal completion on succees, ERR_PTR on
879 * failure
880 */
xe_vm_range_unbind(struct xe_vm * vm,struct xe_svm_range * range)881 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
882 struct xe_svm_range *range)
883 {
884 struct dma_fence *fence = NULL;
885 struct xe_vma_ops vops;
886 struct xe_vma_op *op, *next_op;
887 struct xe_tile *tile;
888 u8 id;
889 int err;
890
891 lockdep_assert_held(&vm->lock);
892 xe_vm_assert_held(vm);
893 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
894
895 if (!range->tile_present)
896 return dma_fence_get_stub();
897
898 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
899 for_each_tile(tile, vm->xe, id) {
900 vops.pt_update_ops[id].wait_vm_bookkeep = true;
901 vops.pt_update_ops[tile->id].q =
902 xe_migrate_exec_queue(tile->migrate);
903 }
904
905 err = xe_vm_ops_add_range_unbind(&vops, range);
906 if (err)
907 return ERR_PTR(err);
908
909 err = xe_vma_ops_alloc(&vops, false);
910 if (err) {
911 fence = ERR_PTR(err);
912 goto free_ops;
913 }
914
915 fence = ops_execute(vm, &vops);
916
917 free_ops:
918 list_for_each_entry_safe(op, next_op, &vops.list, link) {
919 list_del(&op->link);
920 kfree(op);
921 }
922 xe_vma_ops_fini(&vops);
923
924 return fence;
925 }
926
xe_vma_free(struct xe_vma * vma)927 static void xe_vma_free(struct xe_vma *vma)
928 {
929 if (xe_vma_is_userptr(vma))
930 kfree(to_userptr_vma(vma));
931 else
932 kfree(vma);
933 }
934
935 #define VMA_CREATE_FLAG_READ_ONLY BIT(0)
936 #define VMA_CREATE_FLAG_IS_NULL BIT(1)
937 #define VMA_CREATE_FLAG_DUMPABLE BIT(2)
938 #define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3)
939
xe_vma_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 start,u64 end,struct xe_vma_mem_attr * attr,unsigned int flags)940 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
941 struct xe_bo *bo,
942 u64 bo_offset_or_userptr,
943 u64 start, u64 end,
944 struct xe_vma_mem_attr *attr,
945 unsigned int flags)
946 {
947 struct xe_vma *vma;
948 struct xe_tile *tile;
949 u8 id;
950 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
951 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
952 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
953 bool is_cpu_addr_mirror =
954 (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR);
955
956 xe_assert(vm->xe, start < end);
957 xe_assert(vm->xe, end < vm->size);
958
959 /*
960 * Allocate and ensure that the xe_vma_is_userptr() return
961 * matches what was allocated.
962 */
963 if (!bo && !is_null && !is_cpu_addr_mirror) {
964 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
965
966 if (!uvma)
967 return ERR_PTR(-ENOMEM);
968
969 vma = &uvma->vma;
970 } else {
971 vma = kzalloc(sizeof(*vma), GFP_KERNEL);
972 if (!vma)
973 return ERR_PTR(-ENOMEM);
974
975 if (is_cpu_addr_mirror)
976 vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR;
977 if (is_null)
978 vma->gpuva.flags |= DRM_GPUVA_SPARSE;
979 if (bo)
980 vma->gpuva.gem.obj = &bo->ttm.base;
981 }
982
983 INIT_LIST_HEAD(&vma->combined_links.rebind);
984
985 INIT_LIST_HEAD(&vma->gpuva.gem.entry);
986 vma->gpuva.vm = &vm->gpuvm;
987 vma->gpuva.va.addr = start;
988 vma->gpuva.va.range = end - start + 1;
989 if (read_only)
990 vma->gpuva.flags |= XE_VMA_READ_ONLY;
991 if (dumpable)
992 vma->gpuva.flags |= XE_VMA_DUMPABLE;
993
994 for_each_tile(tile, vm->xe, id)
995 vma->tile_mask |= 0x1 << id;
996
997 if (vm->xe->info.has_atomic_enable_pte_bit)
998 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
999
1000 vma->attr = *attr;
1001
1002 if (bo) {
1003 struct drm_gpuvm_bo *vm_bo;
1004
1005 xe_bo_assert_held(bo);
1006
1007 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
1008 if (IS_ERR(vm_bo)) {
1009 xe_vma_free(vma);
1010 return ERR_CAST(vm_bo);
1011 }
1012
1013 drm_gpuvm_bo_extobj_add(vm_bo);
1014 drm_gem_object_get(&bo->ttm.base);
1015 vma->gpuva.gem.offset = bo_offset_or_userptr;
1016 drm_gpuva_link(&vma->gpuva, vm_bo);
1017 drm_gpuvm_bo_put(vm_bo);
1018 } else /* userptr or null */ {
1019 if (!is_null && !is_cpu_addr_mirror) {
1020 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1021 u64 size = end - start + 1;
1022 int err;
1023
1024 vma->gpuva.gem.offset = bo_offset_or_userptr;
1025
1026 err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size);
1027 if (err) {
1028 xe_vma_free(vma);
1029 return ERR_PTR(err);
1030 }
1031 }
1032
1033 xe_vm_get(vm);
1034 }
1035
1036 return vma;
1037 }
1038
xe_vma_destroy_late(struct xe_vma * vma)1039 static void xe_vma_destroy_late(struct xe_vma *vma)
1040 {
1041 struct xe_vm *vm = xe_vma_vm(vma);
1042
1043 if (vma->ufence) {
1044 xe_sync_ufence_put(vma->ufence);
1045 vma->ufence = NULL;
1046 }
1047
1048 if (xe_vma_is_userptr(vma)) {
1049 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1050
1051 xe_userptr_remove(uvma);
1052 xe_vm_put(vm);
1053 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) {
1054 xe_vm_put(vm);
1055 } else {
1056 xe_bo_put(xe_vma_bo(vma));
1057 }
1058
1059 xe_vma_free(vma);
1060 }
1061
vma_destroy_work_func(struct work_struct * w)1062 static void vma_destroy_work_func(struct work_struct *w)
1063 {
1064 struct xe_vma *vma =
1065 container_of(w, struct xe_vma, destroy_work);
1066
1067 xe_vma_destroy_late(vma);
1068 }
1069
vma_destroy_cb(struct dma_fence * fence,struct dma_fence_cb * cb)1070 static void vma_destroy_cb(struct dma_fence *fence,
1071 struct dma_fence_cb *cb)
1072 {
1073 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1074
1075 INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1076 queue_work(system_unbound_wq, &vma->destroy_work);
1077 }
1078
xe_vma_destroy(struct xe_vma * vma,struct dma_fence * fence)1079 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1080 {
1081 struct xe_vm *vm = xe_vma_vm(vma);
1082
1083 lockdep_assert_held_write(&vm->lock);
1084 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1085
1086 if (xe_vma_is_userptr(vma)) {
1087 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1088 xe_userptr_destroy(to_userptr_vma(vma));
1089 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) {
1090 xe_bo_assert_held(xe_vma_bo(vma));
1091
1092 drm_gpuva_unlink(&vma->gpuva);
1093 }
1094
1095 xe_vm_assert_held(vm);
1096 if (fence) {
1097 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1098 vma_destroy_cb);
1099
1100 if (ret) {
1101 XE_WARN_ON(ret != -ENOENT);
1102 xe_vma_destroy_late(vma);
1103 }
1104 } else {
1105 xe_vma_destroy_late(vma);
1106 }
1107 }
1108
1109 /**
1110 * xe_vm_lock_vma() - drm_exec utility to lock a vma
1111 * @exec: The drm_exec object we're currently locking for.
1112 * @vma: The vma for witch we want to lock the vm resv and any attached
1113 * object's resv.
1114 *
1115 * Return: 0 on success, negative error code on error. In particular
1116 * may return -EDEADLK on WW transaction contention and -EINTR if
1117 * an interruptible wait is terminated by a signal.
1118 */
xe_vm_lock_vma(struct drm_exec * exec,struct xe_vma * vma)1119 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
1120 {
1121 struct xe_vm *vm = xe_vma_vm(vma);
1122 struct xe_bo *bo = xe_vma_bo(vma);
1123 int err;
1124
1125 XE_WARN_ON(!vm);
1126
1127 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1128 if (!err && bo && !bo->vm)
1129 err = drm_exec_lock_obj(exec, &bo->ttm.base);
1130
1131 return err;
1132 }
1133
xe_vma_destroy_unlocked(struct xe_vma * vma)1134 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1135 {
1136 struct xe_device *xe = xe_vma_vm(vma)->xe;
1137 struct xe_validation_ctx ctx;
1138 struct drm_exec exec;
1139 int err = 0;
1140
1141 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
1142 err = xe_vm_lock_vma(&exec, vma);
1143 drm_exec_retry_on_contention(&exec);
1144 if (XE_WARN_ON(err))
1145 break;
1146 xe_vma_destroy(vma, NULL);
1147 }
1148 xe_assert(xe, !err);
1149 }
1150
1151 struct xe_vma *
xe_vm_find_overlapping_vma(struct xe_vm * vm,u64 start,u64 range)1152 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1153 {
1154 struct drm_gpuva *gpuva;
1155
1156 lockdep_assert_held(&vm->lock);
1157
1158 if (xe_vm_is_closed_or_banned(vm))
1159 return NULL;
1160
1161 xe_assert(vm->xe, start + range <= vm->size);
1162
1163 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1164
1165 return gpuva ? gpuva_to_vma(gpuva) : NULL;
1166 }
1167
xe_vm_insert_vma(struct xe_vm * vm,struct xe_vma * vma)1168 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1169 {
1170 int err;
1171
1172 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1173 lockdep_assert_held(&vm->lock);
1174
1175 mutex_lock(&vm->snap_mutex);
1176 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1177 mutex_unlock(&vm->snap_mutex);
1178 XE_WARN_ON(err); /* Shouldn't be possible */
1179
1180 return err;
1181 }
1182
xe_vm_remove_vma(struct xe_vm * vm,struct xe_vma * vma)1183 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1184 {
1185 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1186 lockdep_assert_held(&vm->lock);
1187
1188 mutex_lock(&vm->snap_mutex);
1189 drm_gpuva_remove(&vma->gpuva);
1190 mutex_unlock(&vm->snap_mutex);
1191 if (vm->usm.last_fault_vma == vma)
1192 vm->usm.last_fault_vma = NULL;
1193 }
1194
xe_vm_op_alloc(void)1195 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1196 {
1197 struct xe_vma_op *op;
1198
1199 op = kzalloc(sizeof(*op), GFP_KERNEL);
1200
1201 if (unlikely(!op))
1202 return NULL;
1203
1204 return &op->base;
1205 }
1206
1207 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1208
1209 static const struct drm_gpuvm_ops gpuvm_ops = {
1210 .op_alloc = xe_vm_op_alloc,
1211 .vm_bo_validate = xe_gpuvm_validate,
1212 .vm_free = xe_vm_free,
1213 };
1214
pde_encode_pat_index(u16 pat_index)1215 static u64 pde_encode_pat_index(u16 pat_index)
1216 {
1217 u64 pte = 0;
1218
1219 if (pat_index & BIT(0))
1220 pte |= XE_PPGTT_PTE_PAT0;
1221
1222 if (pat_index & BIT(1))
1223 pte |= XE_PPGTT_PTE_PAT1;
1224
1225 return pte;
1226 }
1227
pte_encode_pat_index(u16 pat_index,u32 pt_level)1228 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
1229 {
1230 u64 pte = 0;
1231
1232 if (pat_index & BIT(0))
1233 pte |= XE_PPGTT_PTE_PAT0;
1234
1235 if (pat_index & BIT(1))
1236 pte |= XE_PPGTT_PTE_PAT1;
1237
1238 if (pat_index & BIT(2)) {
1239 if (pt_level)
1240 pte |= XE_PPGTT_PDE_PDPE_PAT2;
1241 else
1242 pte |= XE_PPGTT_PTE_PAT2;
1243 }
1244
1245 if (pat_index & BIT(3))
1246 pte |= XELPG_PPGTT_PTE_PAT3;
1247
1248 if (pat_index & (BIT(4)))
1249 pte |= XE2_PPGTT_PTE_PAT4;
1250
1251 return pte;
1252 }
1253
pte_encode_ps(u32 pt_level)1254 static u64 pte_encode_ps(u32 pt_level)
1255 {
1256 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1257
1258 if (pt_level == 1)
1259 return XE_PDE_PS_2M;
1260 else if (pt_level == 2)
1261 return XE_PDPE_PS_1G;
1262
1263 return 0;
1264 }
1265
pde_pat_index(struct xe_bo * bo)1266 static u16 pde_pat_index(struct xe_bo *bo)
1267 {
1268 struct xe_device *xe = xe_bo_device(bo);
1269 u16 pat_index;
1270
1271 /*
1272 * We only have two bits to encode the PAT index in non-leaf nodes, but
1273 * these only point to other paging structures so we only need a minimal
1274 * selection of options. The user PAT index is only for encoding leaf
1275 * nodes, where we have use of more bits to do the encoding. The
1276 * non-leaf nodes are instead under driver control so the chosen index
1277 * here should be distict from the user PAT index. Also the
1278 * corresponding coherency of the PAT index should be tied to the
1279 * allocation type of the page table (or at least we should pick
1280 * something which is always safe).
1281 */
1282 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached)
1283 pat_index = xe->pat.idx[XE_CACHE_WB];
1284 else
1285 pat_index = xe->pat.idx[XE_CACHE_NONE];
1286
1287 xe_assert(xe, pat_index <= 3);
1288
1289 return pat_index;
1290 }
1291
xelp_pde_encode_bo(struct xe_bo * bo,u64 bo_offset)1292 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset)
1293 {
1294 u64 pde;
1295
1296 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1297 pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1298 pde |= pde_encode_pat_index(pde_pat_index(bo));
1299
1300 return pde;
1301 }
1302
xelp_pte_encode_bo(struct xe_bo * bo,u64 bo_offset,u16 pat_index,u32 pt_level)1303 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1304 u16 pat_index, u32 pt_level)
1305 {
1306 u64 pte;
1307
1308 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1309 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1310 pte |= pte_encode_pat_index(pat_index, pt_level);
1311 pte |= pte_encode_ps(pt_level);
1312
1313 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1314 pte |= XE_PPGTT_PTE_DM;
1315
1316 return pte;
1317 }
1318
xelp_pte_encode_vma(u64 pte,struct xe_vma * vma,u16 pat_index,u32 pt_level)1319 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1320 u16 pat_index, u32 pt_level)
1321 {
1322 pte |= XE_PAGE_PRESENT;
1323
1324 if (likely(!xe_vma_read_only(vma)))
1325 pte |= XE_PAGE_RW;
1326
1327 pte |= pte_encode_pat_index(pat_index, pt_level);
1328 pte |= pte_encode_ps(pt_level);
1329
1330 if (unlikely(xe_vma_is_null(vma)))
1331 pte |= XE_PTE_NULL;
1332
1333 return pte;
1334 }
1335
xelp_pte_encode_addr(struct xe_device * xe,u64 addr,u16 pat_index,u32 pt_level,bool devmem,u64 flags)1336 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1337 u16 pat_index,
1338 u32 pt_level, bool devmem, u64 flags)
1339 {
1340 u64 pte;
1341
1342 /* Avoid passing random bits directly as flags */
1343 xe_assert(xe, !(flags & ~XE_PTE_PS64));
1344
1345 pte = addr;
1346 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1347 pte |= pte_encode_pat_index(pat_index, pt_level);
1348 pte |= pte_encode_ps(pt_level);
1349
1350 if (devmem)
1351 pte |= XE_PPGTT_PTE_DM;
1352
1353 pte |= flags;
1354
1355 return pte;
1356 }
1357
1358 static const struct xe_pt_ops xelp_pt_ops = {
1359 .pte_encode_bo = xelp_pte_encode_bo,
1360 .pte_encode_vma = xelp_pte_encode_vma,
1361 .pte_encode_addr = xelp_pte_encode_addr,
1362 .pde_encode_bo = xelp_pde_encode_bo,
1363 };
1364
1365 static void vm_destroy_work_func(struct work_struct *w);
1366
1367 /**
1368 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1369 * given tile and vm.
1370 * @xe: xe device.
1371 * @tile: tile to set up for.
1372 * @vm: vm to set up for.
1373 * @exec: The struct drm_exec object used to lock the vm resv.
1374 *
1375 * Sets up a pagetable tree with one page-table per level and a single
1376 * leaf PTE. All pagetable entries point to the single page-table or,
1377 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1378 * writes become NOPs.
1379 *
1380 * Return: 0 on success, negative error code on error.
1381 */
xe_vm_create_scratch(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,struct drm_exec * exec)1382 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1383 struct xe_vm *vm, struct drm_exec *exec)
1384 {
1385 u8 id = tile->id;
1386 int i;
1387
1388 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1389 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec);
1390 if (IS_ERR(vm->scratch_pt[id][i])) {
1391 int err = PTR_ERR(vm->scratch_pt[id][i]);
1392
1393 vm->scratch_pt[id][i] = NULL;
1394 return err;
1395 }
1396 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1397 }
1398
1399 return 0;
1400 }
1401 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO);
1402
xe_vm_free_scratch(struct xe_vm * vm)1403 static void xe_vm_free_scratch(struct xe_vm *vm)
1404 {
1405 struct xe_tile *tile;
1406 u8 id;
1407
1408 if (!xe_vm_has_scratch(vm))
1409 return;
1410
1411 for_each_tile(tile, vm->xe, id) {
1412 u32 i;
1413
1414 if (!vm->pt_root[id])
1415 continue;
1416
1417 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1418 if (vm->scratch_pt[id][i])
1419 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1420 }
1421 }
1422
xe_vm_pt_destroy(struct xe_vm * vm)1423 static void xe_vm_pt_destroy(struct xe_vm *vm)
1424 {
1425 struct xe_tile *tile;
1426 u8 id;
1427
1428 xe_vm_assert_held(vm);
1429
1430 for_each_tile(tile, vm->xe, id) {
1431 if (vm->pt_root[id]) {
1432 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1433 vm->pt_root[id] = NULL;
1434 }
1435 }
1436 }
1437
xe_vm_create(struct xe_device * xe,u32 flags,struct xe_file * xef)1438 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
1439 {
1440 struct drm_gem_object *vm_resv_obj;
1441 struct xe_validation_ctx ctx;
1442 struct drm_exec exec;
1443 struct xe_vm *vm;
1444 int err, number_tiles = 0;
1445 struct xe_tile *tile;
1446 u8 id;
1447
1448 /*
1449 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to
1450 * ever be in faulting mode.
1451 */
1452 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE)));
1453
1454 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1455 if (!vm)
1456 return ERR_PTR(-ENOMEM);
1457
1458 vm->xe = xe;
1459
1460 vm->size = 1ull << xe->info.va_bits;
1461 vm->flags = flags;
1462
1463 if (xef)
1464 vm->xef = xe_file_get(xef);
1465 /**
1466 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be
1467 * manipulated under the PXP mutex. However, the PXP mutex can be taken
1468 * under a user-VM lock when the PXP session is started at exec_queue
1469 * creation time. Those are different VMs and therefore there is no risk
1470 * of deadlock, but we need to tell lockdep that this is the case or it
1471 * will print a warning.
1472 */
1473 if (flags & XE_VM_FLAG_GSC) {
1474 static struct lock_class_key gsc_vm_key;
1475
1476 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key);
1477 } else {
1478 init_rwsem(&vm->lock);
1479 }
1480 mutex_init(&vm->snap_mutex);
1481
1482 INIT_LIST_HEAD(&vm->rebind_list);
1483
1484 INIT_LIST_HEAD(&vm->userptr.repin_list);
1485 INIT_LIST_HEAD(&vm->userptr.invalidated);
1486 spin_lock_init(&vm->userptr.invalidated_lock);
1487
1488 ttm_lru_bulk_move_init(&vm->lru_bulk_move);
1489
1490 INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1491
1492 INIT_LIST_HEAD(&vm->preempt.exec_queues);
1493 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */
1494
1495 for_each_tile(tile, xe, id)
1496 xe_range_fence_tree_init(&vm->rftree[id]);
1497
1498 vm->pt_ops = &xelp_pt_ops;
1499
1500 /*
1501 * Long-running workloads are not protected by the scheduler references.
1502 * By design, run_job for long-running workloads returns NULL and the
1503 * scheduler drops all the references of it, hence protecting the VM
1504 * for this case is necessary.
1505 */
1506 if (flags & XE_VM_FLAG_LR_MODE) {
1507 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1508 xe_pm_runtime_get_noresume(xe);
1509 INIT_LIST_HEAD(&vm->preempt.pm_activate_link);
1510 }
1511
1512 err = xe_svm_init(vm);
1513 if (err)
1514 goto err_no_resv;
1515
1516 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1517 if (!vm_resv_obj) {
1518 err = -ENOMEM;
1519 goto err_svm_fini;
1520 }
1521
1522 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1523 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1524
1525 drm_gem_object_put(vm_resv_obj);
1526
1527 err = 0;
1528 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
1529 err) {
1530 err = xe_vm_drm_exec_lock(vm, &exec);
1531 drm_exec_retry_on_contention(&exec);
1532
1533 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1534 vm->flags |= XE_VM_FLAG_64K;
1535
1536 for_each_tile(tile, xe, id) {
1537 if (flags & XE_VM_FLAG_MIGRATION &&
1538 tile->id != XE_VM_FLAG_TILE_ID(flags))
1539 continue;
1540
1541 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level,
1542 &exec);
1543 if (IS_ERR(vm->pt_root[id])) {
1544 err = PTR_ERR(vm->pt_root[id]);
1545 vm->pt_root[id] = NULL;
1546 xe_vm_pt_destroy(vm);
1547 drm_exec_retry_on_contention(&exec);
1548 xe_validation_retry_on_oom(&ctx, &err);
1549 break;
1550 }
1551 }
1552 if (err)
1553 break;
1554
1555 if (xe_vm_has_scratch(vm)) {
1556 for_each_tile(tile, xe, id) {
1557 if (!vm->pt_root[id])
1558 continue;
1559
1560 err = xe_vm_create_scratch(xe, tile, vm, &exec);
1561 if (err) {
1562 xe_vm_free_scratch(vm);
1563 xe_vm_pt_destroy(vm);
1564 drm_exec_retry_on_contention(&exec);
1565 xe_validation_retry_on_oom(&ctx, &err);
1566 break;
1567 }
1568 }
1569 if (err)
1570 break;
1571 vm->batch_invalidate_tlb = true;
1572 }
1573
1574 if (vm->flags & XE_VM_FLAG_LR_MODE) {
1575 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1576 vm->batch_invalidate_tlb = false;
1577 }
1578
1579 /* Fill pt_root after allocating scratch tables */
1580 for_each_tile(tile, xe, id) {
1581 if (!vm->pt_root[id])
1582 continue;
1583
1584 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1585 }
1586 }
1587 if (err)
1588 goto err_close;
1589
1590 /* Kernel migration VM shouldn't have a circular loop.. */
1591 if (!(flags & XE_VM_FLAG_MIGRATION)) {
1592 for_each_tile(tile, xe, id) {
1593 struct xe_exec_queue *q;
1594 u32 create_flags = EXEC_QUEUE_FLAG_VM;
1595
1596 if (!vm->pt_root[id])
1597 continue;
1598
1599 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0);
1600 if (IS_ERR(q)) {
1601 err = PTR_ERR(q);
1602 goto err_close;
1603 }
1604 vm->q[id] = q;
1605 number_tiles++;
1606 }
1607 }
1608
1609 if (number_tiles > 1)
1610 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1611
1612 if (xef && xe->info.has_asid) {
1613 u32 asid;
1614
1615 down_write(&xe->usm.lock);
1616 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1617 XA_LIMIT(1, XE_MAX_ASID - 1),
1618 &xe->usm.next_asid, GFP_KERNEL);
1619 up_write(&xe->usm.lock);
1620 if (err < 0)
1621 goto err_close;
1622
1623 vm->usm.asid = asid;
1624 }
1625
1626 trace_xe_vm_create(vm);
1627
1628 return vm;
1629
1630 err_close:
1631 xe_vm_close_and_put(vm);
1632 return ERR_PTR(err);
1633
1634 err_svm_fini:
1635 if (flags & XE_VM_FLAG_FAULT_MODE) {
1636 vm->size = 0; /* close the vm */
1637 xe_svm_fini(vm);
1638 }
1639 err_no_resv:
1640 mutex_destroy(&vm->snap_mutex);
1641 for_each_tile(tile, xe, id)
1642 xe_range_fence_tree_fini(&vm->rftree[id]);
1643 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1644 if (vm->xef)
1645 xe_file_put(vm->xef);
1646 kfree(vm);
1647 if (flags & XE_VM_FLAG_LR_MODE)
1648 xe_pm_runtime_put(xe);
1649 return ERR_PTR(err);
1650 }
1651
xe_vm_close(struct xe_vm * vm)1652 static void xe_vm_close(struct xe_vm *vm)
1653 {
1654 struct xe_device *xe = vm->xe;
1655 bool bound;
1656 int idx;
1657
1658 bound = drm_dev_enter(&xe->drm, &idx);
1659
1660 down_write(&vm->lock);
1661 if (xe_vm_in_fault_mode(vm))
1662 xe_svm_notifier_lock(vm);
1663
1664 vm->size = 0;
1665
1666 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) {
1667 struct xe_tile *tile;
1668 struct xe_gt *gt;
1669 u8 id;
1670
1671 /* Wait for pending binds */
1672 dma_resv_wait_timeout(xe_vm_resv(vm),
1673 DMA_RESV_USAGE_BOOKKEEP,
1674 false, MAX_SCHEDULE_TIMEOUT);
1675
1676 if (bound) {
1677 for_each_tile(tile, xe, id)
1678 if (vm->pt_root[id])
1679 xe_pt_clear(xe, vm->pt_root[id]);
1680
1681 for_each_gt(gt, xe, id)
1682 xe_tlb_inval_vm(>->tlb_inval, vm);
1683 }
1684 }
1685
1686 if (xe_vm_in_fault_mode(vm))
1687 xe_svm_notifier_unlock(vm);
1688 up_write(&vm->lock);
1689
1690 if (bound)
1691 drm_dev_exit(idx);
1692 }
1693
xe_vm_close_and_put(struct xe_vm * vm)1694 void xe_vm_close_and_put(struct xe_vm *vm)
1695 {
1696 LIST_HEAD(contested);
1697 struct xe_device *xe = vm->xe;
1698 struct xe_tile *tile;
1699 struct xe_vma *vma, *next_vma;
1700 struct drm_gpuva *gpuva, *next;
1701 u8 id;
1702
1703 xe_assert(xe, !vm->preempt.num_exec_queues);
1704
1705 xe_vm_close(vm);
1706 if (xe_vm_in_preempt_fence_mode(vm)) {
1707 mutex_lock(&xe->rebind_resume_lock);
1708 list_del_init(&vm->preempt.pm_activate_link);
1709 mutex_unlock(&xe->rebind_resume_lock);
1710 flush_work(&vm->preempt.rebind_work);
1711 }
1712 if (xe_vm_in_fault_mode(vm))
1713 xe_svm_close(vm);
1714
1715 down_write(&vm->lock);
1716 for_each_tile(tile, xe, id) {
1717 if (vm->q[id])
1718 xe_exec_queue_last_fence_put(vm->q[id], vm);
1719 }
1720 up_write(&vm->lock);
1721
1722 for_each_tile(tile, xe, id) {
1723 if (vm->q[id]) {
1724 xe_exec_queue_kill(vm->q[id]);
1725 xe_exec_queue_put(vm->q[id]);
1726 vm->q[id] = NULL;
1727 }
1728 }
1729
1730 down_write(&vm->lock);
1731 xe_vm_lock(vm, false);
1732 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1733 vma = gpuva_to_vma(gpuva);
1734
1735 if (xe_vma_has_no_bo(vma)) {
1736 xe_svm_notifier_lock(vm);
1737 vma->gpuva.flags |= XE_VMA_DESTROYED;
1738 xe_svm_notifier_unlock(vm);
1739 }
1740
1741 xe_vm_remove_vma(vm, vma);
1742
1743 /* easy case, remove from VMA? */
1744 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1745 list_del_init(&vma->combined_links.rebind);
1746 xe_vma_destroy(vma, NULL);
1747 continue;
1748 }
1749
1750 list_move_tail(&vma->combined_links.destroy, &contested);
1751 vma->gpuva.flags |= XE_VMA_DESTROYED;
1752 }
1753
1754 /*
1755 * All vm operations will add shared fences to resv.
1756 * The only exception is eviction for a shared object,
1757 * but even so, the unbind when evicted would still
1758 * install a fence to resv. Hence it's safe to
1759 * destroy the pagetables immediately.
1760 */
1761 xe_vm_free_scratch(vm);
1762 xe_vm_pt_destroy(vm);
1763 xe_vm_unlock(vm);
1764
1765 /*
1766 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1767 * Since we hold a refcount to the bo, we can remove and free
1768 * the members safely without locking.
1769 */
1770 list_for_each_entry_safe(vma, next_vma, &contested,
1771 combined_links.destroy) {
1772 list_del_init(&vma->combined_links.destroy);
1773 xe_vma_destroy_unlocked(vma);
1774 }
1775
1776 xe_svm_fini(vm);
1777
1778 up_write(&vm->lock);
1779
1780 down_write(&xe->usm.lock);
1781 if (vm->usm.asid) {
1782 void *lookup;
1783
1784 xe_assert(xe, xe->info.has_asid);
1785 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
1786
1787 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1788 xe_assert(xe, lookup == vm);
1789 }
1790 up_write(&xe->usm.lock);
1791
1792 for_each_tile(tile, xe, id)
1793 xe_range_fence_tree_fini(&vm->rftree[id]);
1794
1795 xe_vm_put(vm);
1796 }
1797
vm_destroy_work_func(struct work_struct * w)1798 static void vm_destroy_work_func(struct work_struct *w)
1799 {
1800 struct xe_vm *vm =
1801 container_of(w, struct xe_vm, destroy_work);
1802 struct xe_device *xe = vm->xe;
1803 struct xe_tile *tile;
1804 u8 id;
1805
1806 /* xe_vm_close_and_put was not called? */
1807 xe_assert(xe, !vm->size);
1808
1809 if (xe_vm_in_preempt_fence_mode(vm))
1810 flush_work(&vm->preempt.rebind_work);
1811
1812 mutex_destroy(&vm->snap_mutex);
1813
1814 if (vm->flags & XE_VM_FLAG_LR_MODE)
1815 xe_pm_runtime_put(xe);
1816
1817 for_each_tile(tile, xe, id)
1818 XE_WARN_ON(vm->pt_root[id]);
1819
1820 trace_xe_vm_free(vm);
1821
1822 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1823
1824 if (vm->xef)
1825 xe_file_put(vm->xef);
1826
1827 kfree(vm);
1828 }
1829
xe_vm_free(struct drm_gpuvm * gpuvm)1830 static void xe_vm_free(struct drm_gpuvm *gpuvm)
1831 {
1832 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1833
1834 /* To destroy the VM we need to be able to sleep */
1835 queue_work(system_unbound_wq, &vm->destroy_work);
1836 }
1837
xe_vm_lookup(struct xe_file * xef,u32 id)1838 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1839 {
1840 struct xe_vm *vm;
1841
1842 mutex_lock(&xef->vm.lock);
1843 vm = xa_load(&xef->vm.xa, id);
1844 if (vm)
1845 xe_vm_get(vm);
1846 mutex_unlock(&xef->vm.lock);
1847
1848 return vm;
1849 }
1850
xe_vm_pdp4_descriptor(struct xe_vm * vm,struct xe_tile * tile)1851 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1852 {
1853 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0);
1854 }
1855
1856 static struct xe_exec_queue *
to_wait_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)1857 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
1858 {
1859 return q ? q : vm->q[0];
1860 }
1861
1862 static struct xe_user_fence *
find_ufence_get(struct xe_sync_entry * syncs,u32 num_syncs)1863 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
1864 {
1865 unsigned int i;
1866
1867 for (i = 0; i < num_syncs; i++) {
1868 struct xe_sync_entry *e = &syncs[i];
1869
1870 if (xe_sync_is_ufence(e))
1871 return xe_sync_ufence_get(e);
1872 }
1873
1874 return NULL;
1875 }
1876
1877 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
1878 DRM_XE_VM_CREATE_FLAG_LR_MODE | \
1879 DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1880
xe_vm_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1881 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1882 struct drm_file *file)
1883 {
1884 struct xe_device *xe = to_xe_device(dev);
1885 struct xe_file *xef = to_xe_file(file);
1886 struct drm_xe_vm_create *args = data;
1887 struct xe_vm *vm;
1888 u32 id;
1889 int err;
1890 u32 flags = 0;
1891
1892 if (XE_IOCTL_DBG(xe, args->extensions))
1893 return -EINVAL;
1894
1895 if (XE_GT_WA(xe_root_mmio_gt(xe), 14016763929))
1896 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
1897
1898 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1899 !xe->info.has_usm))
1900 return -EINVAL;
1901
1902 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1903 return -EINVAL;
1904
1905 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1906 return -EINVAL;
1907
1908 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
1909 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1910 !xe->info.needs_scratch))
1911 return -EINVAL;
1912
1913 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
1914 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1915 return -EINVAL;
1916
1917 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
1918 flags |= XE_VM_FLAG_SCRATCH_PAGE;
1919 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
1920 flags |= XE_VM_FLAG_LR_MODE;
1921 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1922 flags |= XE_VM_FLAG_FAULT_MODE;
1923
1924 vm = xe_vm_create(xe, flags, xef);
1925 if (IS_ERR(vm))
1926 return PTR_ERR(vm);
1927
1928 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
1929 /* Warning: Security issue - never enable by default */
1930 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
1931 #endif
1932
1933 /* user id alloc must always be last in ioctl to prevent UAF */
1934 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
1935 if (err)
1936 goto err_close_and_put;
1937
1938 args->vm_id = id;
1939
1940 return 0;
1941
1942 err_close_and_put:
1943 xe_vm_close_and_put(vm);
1944
1945 return err;
1946 }
1947
xe_vm_destroy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1948 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
1949 struct drm_file *file)
1950 {
1951 struct xe_device *xe = to_xe_device(dev);
1952 struct xe_file *xef = to_xe_file(file);
1953 struct drm_xe_vm_destroy *args = data;
1954 struct xe_vm *vm;
1955 int err = 0;
1956
1957 if (XE_IOCTL_DBG(xe, args->pad) ||
1958 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1959 return -EINVAL;
1960
1961 mutex_lock(&xef->vm.lock);
1962 vm = xa_load(&xef->vm.xa, args->vm_id);
1963 if (XE_IOCTL_DBG(xe, !vm))
1964 err = -ENOENT;
1965 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
1966 err = -EBUSY;
1967 else
1968 xa_erase(&xef->vm.xa, args->vm_id);
1969 mutex_unlock(&xef->vm.lock);
1970
1971 if (!err)
1972 xe_vm_close_and_put(vm);
1973
1974 return err;
1975 }
1976
xe_vm_query_vmas(struct xe_vm * vm,u64 start,u64 end)1977 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end)
1978 {
1979 struct drm_gpuva *gpuva;
1980 u32 num_vmas = 0;
1981
1982 lockdep_assert_held(&vm->lock);
1983 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end)
1984 num_vmas++;
1985
1986 return num_vmas;
1987 }
1988
get_mem_attrs(struct xe_vm * vm,u32 * num_vmas,u64 start,u64 end,struct drm_xe_mem_range_attr * attrs)1989 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start,
1990 u64 end, struct drm_xe_mem_range_attr *attrs)
1991 {
1992 struct drm_gpuva *gpuva;
1993 int i = 0;
1994
1995 lockdep_assert_held(&vm->lock);
1996
1997 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
1998 struct xe_vma *vma = gpuva_to_vma(gpuva);
1999
2000 if (i == *num_vmas)
2001 return -ENOSPC;
2002
2003 attrs[i].start = xe_vma_start(vma);
2004 attrs[i].end = xe_vma_end(vma);
2005 attrs[i].atomic.val = vma->attr.atomic_access;
2006 attrs[i].pat_index.val = vma->attr.pat_index;
2007 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd;
2008 attrs[i].preferred_mem_loc.migration_policy =
2009 vma->attr.preferred_loc.migration_policy;
2010
2011 i++;
2012 }
2013
2014 *num_vmas = i;
2015 return 0;
2016 }
2017
xe_vm_query_vmas_attrs_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2018 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2019 {
2020 struct xe_device *xe = to_xe_device(dev);
2021 struct xe_file *xef = to_xe_file(file);
2022 struct drm_xe_mem_range_attr *mem_attrs;
2023 struct drm_xe_vm_query_mem_range_attr *args = data;
2024 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2025 struct xe_vm *vm;
2026 int err = 0;
2027
2028 if (XE_IOCTL_DBG(xe,
2029 ((args->num_mem_ranges == 0 &&
2030 (attrs_user || args->sizeof_mem_range_attr != 0)) ||
2031 (args->num_mem_ranges > 0 &&
2032 (!attrs_user ||
2033 args->sizeof_mem_range_attr !=
2034 sizeof(struct drm_xe_mem_range_attr))))))
2035 return -EINVAL;
2036
2037 vm = xe_vm_lookup(xef, args->vm_id);
2038 if (XE_IOCTL_DBG(xe, !vm))
2039 return -EINVAL;
2040
2041 err = down_read_interruptible(&vm->lock);
2042 if (err)
2043 goto put_vm;
2044
2045 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2046
2047 if (args->num_mem_ranges == 0 && !attrs_user) {
2048 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range);
2049 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr);
2050 goto unlock_vm;
2051 }
2052
2053 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr,
2054 GFP_KERNEL | __GFP_ACCOUNT |
2055 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
2056 if (!mem_attrs) {
2057 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM;
2058 goto unlock_vm;
2059 }
2060
2061 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr);
2062 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start,
2063 args->start + args->range, mem_attrs);
2064 if (err)
2065 goto free_mem_attrs;
2066
2067 err = copy_to_user(attrs_user, mem_attrs,
2068 args->sizeof_mem_range_attr * args->num_mem_ranges);
2069 if (err)
2070 err = -EFAULT;
2071
2072 free_mem_attrs:
2073 kvfree(mem_attrs);
2074 unlock_vm:
2075 up_read(&vm->lock);
2076 put_vm:
2077 xe_vm_put(vm);
2078 return err;
2079 }
2080
vma_matches(struct xe_vma * vma,u64 page_addr)2081 static bool vma_matches(struct xe_vma *vma, u64 page_addr)
2082 {
2083 if (page_addr > xe_vma_end(vma) - 1 ||
2084 page_addr + SZ_4K - 1 < xe_vma_start(vma))
2085 return false;
2086
2087 return true;
2088 }
2089
2090 /**
2091 * xe_vm_find_vma_by_addr() - Find a VMA by its address
2092 *
2093 * @vm: the xe_vm the vma belongs to
2094 * @page_addr: address to look up
2095 */
xe_vm_find_vma_by_addr(struct xe_vm * vm,u64 page_addr)2096 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr)
2097 {
2098 struct xe_vma *vma = NULL;
2099
2100 if (vm->usm.last_fault_vma) { /* Fast lookup */
2101 if (vma_matches(vm->usm.last_fault_vma, page_addr))
2102 vma = vm->usm.last_fault_vma;
2103 }
2104 if (!vma)
2105 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);
2106
2107 return vma;
2108 }
2109
2110 static const u32 region_to_mem_type[] = {
2111 XE_PL_TT,
2112 XE_PL_VRAM0,
2113 XE_PL_VRAM1,
2114 };
2115
prep_vma_destroy(struct xe_vm * vm,struct xe_vma * vma,bool post_commit)2116 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
2117 bool post_commit)
2118 {
2119 xe_svm_notifier_lock(vm);
2120 vma->gpuva.flags |= XE_VMA_DESTROYED;
2121 xe_svm_notifier_unlock(vm);
2122 if (post_commit)
2123 xe_vm_remove_vma(vm, vma);
2124 }
2125
2126 #undef ULL
2127 #define ULL unsigned long long
2128
2129 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2130 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2131 {
2132 struct xe_vma *vma;
2133
2134 switch (op->op) {
2135 case DRM_GPUVA_OP_MAP:
2136 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2137 (ULL)op->map.va.addr, (ULL)op->map.va.range);
2138 break;
2139 case DRM_GPUVA_OP_REMAP:
2140 vma = gpuva_to_vma(op->remap.unmap->va);
2141 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2142 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2143 op->remap.unmap->keep ? 1 : 0);
2144 if (op->remap.prev)
2145 vm_dbg(&xe->drm,
2146 "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2147 (ULL)op->remap.prev->va.addr,
2148 (ULL)op->remap.prev->va.range);
2149 if (op->remap.next)
2150 vm_dbg(&xe->drm,
2151 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2152 (ULL)op->remap.next->va.addr,
2153 (ULL)op->remap.next->va.range);
2154 break;
2155 case DRM_GPUVA_OP_UNMAP:
2156 vma = gpuva_to_vma(op->unmap.va);
2157 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2158 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2159 op->unmap.keep ? 1 : 0);
2160 break;
2161 case DRM_GPUVA_OP_PREFETCH:
2162 vma = gpuva_to_vma(op->prefetch.va);
2163 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2164 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2165 break;
2166 default:
2167 drm_warn(&xe->drm, "NOT POSSIBLE");
2168 }
2169 }
2170 #else
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2171 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2172 {
2173 }
2174 #endif
2175
__xe_vm_needs_clear_scratch_pages(struct xe_vm * vm,u32 bind_flags)2176 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
2177 {
2178 if (!xe_vm_in_fault_mode(vm))
2179 return false;
2180
2181 if (!xe_vm_has_scratch(vm))
2182 return false;
2183
2184 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE)
2185 return false;
2186
2187 return true;
2188 }
2189
xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops * ops)2190 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops)
2191 {
2192 struct drm_gpuva_op *__op;
2193
2194 drm_gpuva_for_each_op(__op, ops) {
2195 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2196
2197 xe_vma_svm_prefetch_op_fini(op);
2198 }
2199 }
2200
2201 /*
2202 * Create operations list from IOCTL arguments, setup operations fields so parse
2203 * and commit steps are decoupled from IOCTL arguments. This step can fail.
2204 */
2205 static struct drm_gpuva_ops *
vm_bind_ioctl_ops_create(struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 addr,u64 range,u32 operation,u32 flags,u32 prefetch_region,u16 pat_index)2206 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
2207 struct xe_bo *bo, u64 bo_offset_or_userptr,
2208 u64 addr, u64 range,
2209 u32 operation, u32 flags,
2210 u32 prefetch_region, u16 pat_index)
2211 {
2212 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2213 struct drm_gpuva_ops *ops;
2214 struct drm_gpuva_op *__op;
2215 struct drm_gpuvm_bo *vm_bo;
2216 u64 range_end = addr + range;
2217 int err;
2218
2219 lockdep_assert_held_write(&vm->lock);
2220
2221 vm_dbg(&vm->xe->drm,
2222 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2223 operation, (ULL)addr, (ULL)range,
2224 (ULL)bo_offset_or_userptr);
2225
2226 switch (operation) {
2227 case DRM_XE_VM_BIND_OP_MAP:
2228 case DRM_XE_VM_BIND_OP_MAP_USERPTR: {
2229 struct drm_gpuvm_map_req map_req = {
2230 .map.va.addr = addr,
2231 .map.va.range = range,
2232 .map.gem.obj = obj,
2233 .map.gem.offset = bo_offset_or_userptr,
2234 };
2235
2236 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req);
2237 break;
2238 }
2239 case DRM_XE_VM_BIND_OP_UNMAP:
2240 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2241 break;
2242 case DRM_XE_VM_BIND_OP_PREFETCH:
2243 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2244 break;
2245 case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2246 xe_assert(vm->xe, bo);
2247
2248 err = xe_bo_lock(bo, true);
2249 if (err)
2250 return ERR_PTR(err);
2251
2252 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
2253 if (IS_ERR(vm_bo)) {
2254 xe_bo_unlock(bo);
2255 return ERR_CAST(vm_bo);
2256 }
2257
2258 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2259 drm_gpuvm_bo_put(vm_bo);
2260 xe_bo_unlock(bo);
2261 break;
2262 default:
2263 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2264 ops = ERR_PTR(-EINVAL);
2265 }
2266 if (IS_ERR(ops))
2267 return ops;
2268
2269 drm_gpuva_for_each_op(__op, ops) {
2270 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2271
2272 if (__op->op == DRM_GPUVA_OP_MAP) {
2273 op->map.immediate =
2274 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2275 op->map.read_only =
2276 flags & DRM_XE_VM_BIND_FLAG_READONLY;
2277 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2278 op->map.is_cpu_addr_mirror = flags &
2279 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
2280 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
2281 op->map.pat_index = pat_index;
2282 op->map.invalidate_on_bind =
2283 __xe_vm_needs_clear_scratch_pages(vm, flags);
2284 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2285 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2286 struct xe_tile *tile;
2287 struct xe_svm_range *svm_range;
2288 struct drm_gpusvm_ctx ctx = {};
2289 struct drm_pagemap *dpagemap;
2290 u8 id, tile_mask = 0;
2291 u32 i;
2292
2293 if (!xe_vma_is_cpu_addr_mirror(vma)) {
2294 op->prefetch.region = prefetch_region;
2295 break;
2296 }
2297
2298 ctx.read_only = xe_vma_read_only(vma);
2299 ctx.devmem_possible = IS_DGFX(vm->xe) &&
2300 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2301
2302 for_each_tile(tile, vm->xe, id)
2303 tile_mask |= 0x1 << id;
2304
2305 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC);
2306 op->prefetch_range.ranges_count = 0;
2307 tile = NULL;
2308
2309 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) {
2310 dpagemap = xe_vma_resolve_pagemap(vma,
2311 xe_device_get_root_tile(vm->xe));
2312 /*
2313 * TODO: Once multigpu support is enabled will need
2314 * something to dereference tile from dpagemap.
2315 */
2316 if (dpagemap)
2317 tile = xe_device_get_root_tile(vm->xe);
2318 } else if (prefetch_region) {
2319 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] -
2320 XE_PL_VRAM0];
2321 }
2322
2323 op->prefetch_range.tile = tile;
2324 alloc_next_range:
2325 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
2326
2327 if (PTR_ERR(svm_range) == -ENOENT) {
2328 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma);
2329
2330 addr = ret == ULONG_MAX ? 0 : ret;
2331 if (addr)
2332 goto alloc_next_range;
2333 else
2334 goto print_op_label;
2335 }
2336
2337 if (IS_ERR(svm_range)) {
2338 err = PTR_ERR(svm_range);
2339 goto unwind_prefetch_ops;
2340 }
2341
2342 if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) {
2343 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID");
2344 goto check_next_range;
2345 }
2346
2347 err = xa_alloc(&op->prefetch_range.range,
2348 &i, svm_range, xa_limit_32b,
2349 GFP_KERNEL);
2350
2351 if (err)
2352 goto unwind_prefetch_ops;
2353
2354 op->prefetch_range.ranges_count++;
2355 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH;
2356 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED");
2357 check_next_range:
2358 if (range_end > xe_svm_range_end(svm_range) &&
2359 xe_svm_range_end(svm_range) < xe_vma_end(vma)) {
2360 addr = xe_svm_range_end(svm_range);
2361 goto alloc_next_range;
2362 }
2363 }
2364 print_op_label:
2365 print_op(vm->xe, __op);
2366 }
2367
2368 return ops;
2369
2370 unwind_prefetch_ops:
2371 xe_svm_prefetch_gpuva_ops_fini(ops);
2372 drm_gpuva_ops_free(&vm->gpuvm, ops);
2373 return ERR_PTR(err);
2374 }
2375
2376 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
2377
new_vma(struct xe_vm * vm,struct drm_gpuva_op_map * op,struct xe_vma_mem_attr * attr,unsigned int flags)2378 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2379 struct xe_vma_mem_attr *attr, unsigned int flags)
2380 {
2381 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2382 struct xe_validation_ctx ctx;
2383 struct drm_exec exec;
2384 struct xe_vma *vma;
2385 int err = 0;
2386
2387 lockdep_assert_held_write(&vm->lock);
2388
2389 if (bo) {
2390 err = 0;
2391 xe_validation_guard(&ctx, &vm->xe->val, &exec,
2392 (struct xe_val_flags) {.interruptible = true}, err) {
2393 if (!bo->vm) {
2394 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2395 drm_exec_retry_on_contention(&exec);
2396 }
2397 if (!err) {
2398 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2399 drm_exec_retry_on_contention(&exec);
2400 }
2401 if (err)
2402 return ERR_PTR(err);
2403
2404 vma = xe_vma_create(vm, bo, op->gem.offset,
2405 op->va.addr, op->va.addr +
2406 op->va.range - 1, attr, flags);
2407 if (IS_ERR(vma))
2408 return vma;
2409
2410 if (!bo->vm) {
2411 err = add_preempt_fences(vm, bo);
2412 if (err) {
2413 prep_vma_destroy(vm, vma, false);
2414 xe_vma_destroy(vma, NULL);
2415 }
2416 }
2417 }
2418 if (err)
2419 return ERR_PTR(err);
2420 } else {
2421 vma = xe_vma_create(vm, NULL, op->gem.offset,
2422 op->va.addr, op->va.addr +
2423 op->va.range - 1, attr, flags);
2424 if (IS_ERR(vma))
2425 return vma;
2426
2427 if (xe_vma_is_userptr(vma))
2428 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2429 }
2430 if (err) {
2431 prep_vma_destroy(vm, vma, false);
2432 xe_vma_destroy_unlocked(vma);
2433 vma = ERR_PTR(err);
2434 }
2435
2436 return vma;
2437 }
2438
xe_vma_max_pte_size(struct xe_vma * vma)2439 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2440 {
2441 if (vma->gpuva.flags & XE_VMA_PTE_1G)
2442 return SZ_1G;
2443 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2444 return SZ_2M;
2445 else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2446 return SZ_64K;
2447 else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2448 return SZ_4K;
2449
2450 return SZ_1G; /* Uninitialized, used max size */
2451 }
2452
xe_vma_set_pte_size(struct xe_vma * vma,u64 size)2453 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2454 {
2455 switch (size) {
2456 case SZ_1G:
2457 vma->gpuva.flags |= XE_VMA_PTE_1G;
2458 break;
2459 case SZ_2M:
2460 vma->gpuva.flags |= XE_VMA_PTE_2M;
2461 break;
2462 case SZ_64K:
2463 vma->gpuva.flags |= XE_VMA_PTE_64K;
2464 break;
2465 case SZ_4K:
2466 vma->gpuva.flags |= XE_VMA_PTE_4K;
2467 break;
2468 }
2469 }
2470
xe_vma_op_commit(struct xe_vm * vm,struct xe_vma_op * op)2471 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2472 {
2473 int err = 0;
2474
2475 lockdep_assert_held_write(&vm->lock);
2476
2477 switch (op->base.op) {
2478 case DRM_GPUVA_OP_MAP:
2479 err |= xe_vm_insert_vma(vm, op->map.vma);
2480 if (!err)
2481 op->flags |= XE_VMA_OP_COMMITTED;
2482 break;
2483 case DRM_GPUVA_OP_REMAP:
2484 {
2485 u8 tile_present =
2486 gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2487
2488 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2489 true);
2490 op->flags |= XE_VMA_OP_COMMITTED;
2491
2492 if (op->remap.prev) {
2493 err |= xe_vm_insert_vma(vm, op->remap.prev);
2494 if (!err)
2495 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2496 if (!err && op->remap.skip_prev) {
2497 op->remap.prev->tile_present =
2498 tile_present;
2499 op->remap.prev = NULL;
2500 }
2501 }
2502 if (op->remap.next) {
2503 err |= xe_vm_insert_vma(vm, op->remap.next);
2504 if (!err)
2505 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2506 if (!err && op->remap.skip_next) {
2507 op->remap.next->tile_present =
2508 tile_present;
2509 op->remap.next = NULL;
2510 }
2511 }
2512
2513 /* Adjust for partial unbind after removing VMA from VM */
2514 if (!err) {
2515 op->base.remap.unmap->va->va.addr = op->remap.start;
2516 op->base.remap.unmap->va->va.range = op->remap.range;
2517 }
2518 break;
2519 }
2520 case DRM_GPUVA_OP_UNMAP:
2521 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2522 op->flags |= XE_VMA_OP_COMMITTED;
2523 break;
2524 case DRM_GPUVA_OP_PREFETCH:
2525 op->flags |= XE_VMA_OP_COMMITTED;
2526 break;
2527 default:
2528 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2529 }
2530
2531 return err;
2532 }
2533
2534 /**
2535 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes
2536 * @vma: Pointer to the xe_vma structure to check
2537 *
2538 * This function determines whether the given VMA (Virtual Memory Area)
2539 * has its memory attributes set to their default values. Specifically,
2540 * it checks the following conditions:
2541 *
2542 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED`
2543 * - `pat_index` is equal to `default_pat_index`
2544 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE`
2545 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES`
2546 *
2547 * Return: true if all attributes are at their default values, false otherwise.
2548 */
xe_vma_has_default_mem_attrs(struct xe_vma * vma)2549 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma)
2550 {
2551 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED &&
2552 vma->attr.pat_index == vma->attr.default_pat_index &&
2553 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE &&
2554 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES);
2555 }
2556
vm_bind_ioctl_ops_parse(struct xe_vm * vm,struct drm_gpuva_ops * ops,struct xe_vma_ops * vops)2557 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
2558 struct xe_vma_ops *vops)
2559 {
2560 struct xe_device *xe = vm->xe;
2561 struct drm_gpuva_op *__op;
2562 struct xe_tile *tile;
2563 u8 id, tile_mask = 0;
2564 int err = 0;
2565
2566 lockdep_assert_held_write(&vm->lock);
2567
2568 for_each_tile(tile, vm->xe, id)
2569 tile_mask |= 0x1 << id;
2570
2571 drm_gpuva_for_each_op(__op, ops) {
2572 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2573 struct xe_vma *vma;
2574 unsigned int flags = 0;
2575
2576 INIT_LIST_HEAD(&op->link);
2577 list_add_tail(&op->link, &vops->list);
2578 op->tile_mask = tile_mask;
2579
2580 switch (op->base.op) {
2581 case DRM_GPUVA_OP_MAP:
2582 {
2583 struct xe_vma_mem_attr default_attr = {
2584 .preferred_loc = {
2585 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE,
2586 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES,
2587 },
2588 .atomic_access = DRM_XE_ATOMIC_UNDEFINED,
2589 .default_pat_index = op->map.pat_index,
2590 .pat_index = op->map.pat_index,
2591 };
2592
2593 flags |= op->map.read_only ?
2594 VMA_CREATE_FLAG_READ_ONLY : 0;
2595 flags |= op->map.is_null ?
2596 VMA_CREATE_FLAG_IS_NULL : 0;
2597 flags |= op->map.dumpable ?
2598 VMA_CREATE_FLAG_DUMPABLE : 0;
2599 flags |= op->map.is_cpu_addr_mirror ?
2600 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0;
2601
2602 vma = new_vma(vm, &op->base.map, &default_attr,
2603 flags);
2604 if (IS_ERR(vma))
2605 return PTR_ERR(vma);
2606
2607 op->map.vma = vma;
2608 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) &&
2609 !op->map.is_cpu_addr_mirror) ||
2610 op->map.invalidate_on_bind)
2611 xe_vma_ops_incr_pt_update_ops(vops,
2612 op->tile_mask, 1);
2613 break;
2614 }
2615 case DRM_GPUVA_OP_REMAP:
2616 {
2617 struct xe_vma *old =
2618 gpuva_to_vma(op->base.remap.unmap->va);
2619 bool skip = xe_vma_is_cpu_addr_mirror(old);
2620 u64 start = xe_vma_start(old), end = xe_vma_end(old);
2621 int num_remap_ops = 0;
2622
2623 if (op->base.remap.prev)
2624 start = op->base.remap.prev->va.addr +
2625 op->base.remap.prev->va.range;
2626 if (op->base.remap.next)
2627 end = op->base.remap.next->va.addr;
2628
2629 if (xe_vma_is_cpu_addr_mirror(old) &&
2630 xe_svm_has_mapping(vm, start, end)) {
2631 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE)
2632 xe_svm_unmap_address_range(vm, start, end);
2633 else
2634 return -EBUSY;
2635 }
2636
2637 op->remap.start = xe_vma_start(old);
2638 op->remap.range = xe_vma_size(old);
2639
2640 flags |= op->base.remap.unmap->va->flags &
2641 XE_VMA_READ_ONLY ?
2642 VMA_CREATE_FLAG_READ_ONLY : 0;
2643 flags |= op->base.remap.unmap->va->flags &
2644 DRM_GPUVA_SPARSE ?
2645 VMA_CREATE_FLAG_IS_NULL : 0;
2646 flags |= op->base.remap.unmap->va->flags &
2647 XE_VMA_DUMPABLE ?
2648 VMA_CREATE_FLAG_DUMPABLE : 0;
2649 flags |= xe_vma_is_cpu_addr_mirror(old) ?
2650 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0;
2651
2652 if (op->base.remap.prev) {
2653 vma = new_vma(vm, op->base.remap.prev,
2654 &old->attr, flags);
2655 if (IS_ERR(vma))
2656 return PTR_ERR(vma);
2657
2658 op->remap.prev = vma;
2659
2660 /*
2661 * Userptr creates a new SG mapping so
2662 * we must also rebind.
2663 */
2664 op->remap.skip_prev = skip ||
2665 (!xe_vma_is_userptr(old) &&
2666 IS_ALIGNED(xe_vma_end(vma),
2667 xe_vma_max_pte_size(old)));
2668 if (op->remap.skip_prev) {
2669 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2670 op->remap.range -=
2671 xe_vma_end(vma) -
2672 xe_vma_start(old);
2673 op->remap.start = xe_vma_end(vma);
2674 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2675 (ULL)op->remap.start,
2676 (ULL)op->remap.range);
2677 } else {
2678 num_remap_ops++;
2679 }
2680 }
2681
2682 if (op->base.remap.next) {
2683 vma = new_vma(vm, op->base.remap.next,
2684 &old->attr, flags);
2685 if (IS_ERR(vma))
2686 return PTR_ERR(vma);
2687
2688 op->remap.next = vma;
2689
2690 /*
2691 * Userptr creates a new SG mapping so
2692 * we must also rebind.
2693 */
2694 op->remap.skip_next = skip ||
2695 (!xe_vma_is_userptr(old) &&
2696 IS_ALIGNED(xe_vma_start(vma),
2697 xe_vma_max_pte_size(old)));
2698 if (op->remap.skip_next) {
2699 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2700 op->remap.range -=
2701 xe_vma_end(old) -
2702 xe_vma_start(vma);
2703 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2704 (ULL)op->remap.start,
2705 (ULL)op->remap.range);
2706 } else {
2707 num_remap_ops++;
2708 }
2709 }
2710 if (!skip)
2711 num_remap_ops++;
2712
2713 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops);
2714 break;
2715 }
2716 case DRM_GPUVA_OP_UNMAP:
2717 vma = gpuva_to_vma(op->base.unmap.va);
2718
2719 if (xe_vma_is_cpu_addr_mirror(vma) &&
2720 xe_svm_has_mapping(vm, xe_vma_start(vma),
2721 xe_vma_end(vma)))
2722 return -EBUSY;
2723
2724 if (!xe_vma_is_cpu_addr_mirror(vma))
2725 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2726 break;
2727 case DRM_GPUVA_OP_PREFETCH:
2728 vma = gpuva_to_vma(op->base.prefetch.va);
2729
2730 if (xe_vma_is_userptr(vma)) {
2731 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2732 if (err)
2733 return err;
2734 }
2735
2736 if (xe_vma_is_cpu_addr_mirror(vma))
2737 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask,
2738 op->prefetch_range.ranges_count);
2739 else
2740 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2741
2742 break;
2743 default:
2744 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2745 }
2746
2747 err = xe_vma_op_commit(vm, op);
2748 if (err)
2749 return err;
2750 }
2751
2752 return 0;
2753 }
2754
xe_vma_op_unwind(struct xe_vm * vm,struct xe_vma_op * op,bool post_commit,bool prev_post_commit,bool next_post_commit)2755 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2756 bool post_commit, bool prev_post_commit,
2757 bool next_post_commit)
2758 {
2759 lockdep_assert_held_write(&vm->lock);
2760
2761 switch (op->base.op) {
2762 case DRM_GPUVA_OP_MAP:
2763 if (op->map.vma) {
2764 prep_vma_destroy(vm, op->map.vma, post_commit);
2765 xe_vma_destroy_unlocked(op->map.vma);
2766 }
2767 break;
2768 case DRM_GPUVA_OP_UNMAP:
2769 {
2770 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2771
2772 if (vma) {
2773 xe_svm_notifier_lock(vm);
2774 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2775 xe_svm_notifier_unlock(vm);
2776 if (post_commit)
2777 xe_vm_insert_vma(vm, vma);
2778 }
2779 break;
2780 }
2781 case DRM_GPUVA_OP_REMAP:
2782 {
2783 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2784
2785 if (op->remap.prev) {
2786 prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2787 xe_vma_destroy_unlocked(op->remap.prev);
2788 }
2789 if (op->remap.next) {
2790 prep_vma_destroy(vm, op->remap.next, next_post_commit);
2791 xe_vma_destroy_unlocked(op->remap.next);
2792 }
2793 if (vma) {
2794 xe_svm_notifier_lock(vm);
2795 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2796 xe_svm_notifier_unlock(vm);
2797 if (post_commit)
2798 xe_vm_insert_vma(vm, vma);
2799 }
2800 break;
2801 }
2802 case DRM_GPUVA_OP_PREFETCH:
2803 /* Nothing to do */
2804 break;
2805 default:
2806 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2807 }
2808 }
2809
vm_bind_ioctl_ops_unwind(struct xe_vm * vm,struct drm_gpuva_ops ** ops,int num_ops_list)2810 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2811 struct drm_gpuva_ops **ops,
2812 int num_ops_list)
2813 {
2814 int i;
2815
2816 for (i = num_ops_list - 1; i >= 0; --i) {
2817 struct drm_gpuva_ops *__ops = ops[i];
2818 struct drm_gpuva_op *__op;
2819
2820 if (!__ops)
2821 continue;
2822
2823 drm_gpuva_for_each_op_reverse(__op, __ops) {
2824 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2825
2826 xe_vma_op_unwind(vm, op,
2827 op->flags & XE_VMA_OP_COMMITTED,
2828 op->flags & XE_VMA_OP_PREV_COMMITTED,
2829 op->flags & XE_VMA_OP_NEXT_COMMITTED);
2830 }
2831 }
2832 }
2833
vma_lock_and_validate(struct drm_exec * exec,struct xe_vma * vma,bool validate)2834 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
2835 bool validate)
2836 {
2837 struct xe_bo *bo = xe_vma_bo(vma);
2838 struct xe_vm *vm = xe_vma_vm(vma);
2839 int err = 0;
2840
2841 if (bo) {
2842 if (!bo->vm)
2843 err = drm_exec_lock_obj(exec, &bo->ttm.base);
2844 if (!err && validate)
2845 err = xe_bo_validate(bo, vm,
2846 !xe_vm_in_preempt_fence_mode(vm), exec);
2847 }
2848
2849 return err;
2850 }
2851
check_ufence(struct xe_vma * vma)2852 static int check_ufence(struct xe_vma *vma)
2853 {
2854 if (vma->ufence) {
2855 struct xe_user_fence * const f = vma->ufence;
2856
2857 if (!xe_sync_ufence_get_status(f))
2858 return -EBUSY;
2859
2860 vma->ufence = NULL;
2861 xe_sync_ufence_put(f);
2862 }
2863
2864 return 0;
2865 }
2866
prefetch_ranges(struct xe_vm * vm,struct xe_vma_op * op)2867 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
2868 {
2869 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2870 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2871 struct xe_tile *tile = op->prefetch_range.tile;
2872 int err = 0;
2873
2874 struct xe_svm_range *svm_range;
2875 struct drm_gpusvm_ctx ctx = {};
2876 unsigned long i;
2877
2878 if (!xe_vma_is_cpu_addr_mirror(vma))
2879 return 0;
2880
2881 ctx.read_only = xe_vma_read_only(vma);
2882 ctx.devmem_possible = devmem_possible;
2883 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
2884 ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe);
2885
2886 /* TODO: Threading the migration */
2887 xa_for_each(&op->prefetch_range.range, i, svm_range) {
2888 if (!tile)
2889 xe_svm_range_migrate_to_smem(vm, svm_range);
2890
2891 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) {
2892 err = xe_svm_alloc_vram(tile, svm_range, &ctx);
2893 if (err) {
2894 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
2895 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2896 return -ENODATA;
2897 }
2898 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM");
2899 }
2900
2901 err = xe_svm_range_get_pages(vm, svm_range, &ctx);
2902 if (err) {
2903 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
2904 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2905 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
2906 err = -ENODATA;
2907 return err;
2908 }
2909 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE");
2910 }
2911
2912 return err;
2913 }
2914
op_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_op * op)2915 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
2916 struct xe_vma_op *op)
2917 {
2918 int err = 0;
2919
2920 switch (op->base.op) {
2921 case DRM_GPUVA_OP_MAP:
2922 if (!op->map.invalidate_on_bind)
2923 err = vma_lock_and_validate(exec, op->map.vma,
2924 !xe_vm_in_fault_mode(vm) ||
2925 op->map.immediate);
2926 break;
2927 case DRM_GPUVA_OP_REMAP:
2928 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
2929 if (err)
2930 break;
2931
2932 err = vma_lock_and_validate(exec,
2933 gpuva_to_vma(op->base.remap.unmap->va),
2934 false);
2935 if (!err && op->remap.prev)
2936 err = vma_lock_and_validate(exec, op->remap.prev, true);
2937 if (!err && op->remap.next)
2938 err = vma_lock_and_validate(exec, op->remap.next, true);
2939 break;
2940 case DRM_GPUVA_OP_UNMAP:
2941 err = check_ufence(gpuva_to_vma(op->base.unmap.va));
2942 if (err)
2943 break;
2944
2945 err = vma_lock_and_validate(exec,
2946 gpuva_to_vma(op->base.unmap.va),
2947 false);
2948 break;
2949 case DRM_GPUVA_OP_PREFETCH:
2950 {
2951 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2952 u32 region;
2953
2954 if (!xe_vma_is_cpu_addr_mirror(vma)) {
2955 region = op->prefetch.region;
2956 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC ||
2957 region <= ARRAY_SIZE(region_to_mem_type));
2958 }
2959
2960 err = vma_lock_and_validate(exec,
2961 gpuva_to_vma(op->base.prefetch.va),
2962 false);
2963 if (!err && !xe_vma_has_no_bo(vma))
2964 err = xe_bo_migrate(xe_vma_bo(vma),
2965 region_to_mem_type[region],
2966 NULL,
2967 exec);
2968 break;
2969 }
2970 default:
2971 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2972 }
2973
2974 return err;
2975 }
2976
vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm * vm,struct xe_vma_ops * vops)2977 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops)
2978 {
2979 struct xe_vma_op *op;
2980 int err;
2981
2982 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
2983 return 0;
2984
2985 list_for_each_entry(op, &vops->list, link) {
2986 if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
2987 err = prefetch_ranges(vm, op);
2988 if (err)
2989 return err;
2990 }
2991 }
2992
2993 return 0;
2994 }
2995
vm_bind_ioctl_ops_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops)2996 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
2997 struct xe_vm *vm,
2998 struct xe_vma_ops *vops)
2999 {
3000 struct xe_vma_op *op;
3001 int err;
3002
3003 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
3004 if (err)
3005 return err;
3006
3007 list_for_each_entry(op, &vops->list, link) {
3008 err = op_lock_and_prep(exec, vm, op);
3009 if (err)
3010 return err;
3011 }
3012
3013 #ifdef TEST_VM_OPS_ERROR
3014 if (vops->inject_error &&
3015 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
3016 return -ENOSPC;
3017 #endif
3018
3019 return 0;
3020 }
3021
op_trace(struct xe_vma_op * op)3022 static void op_trace(struct xe_vma_op *op)
3023 {
3024 switch (op->base.op) {
3025 case DRM_GPUVA_OP_MAP:
3026 trace_xe_vma_bind(op->map.vma);
3027 break;
3028 case DRM_GPUVA_OP_REMAP:
3029 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
3030 if (op->remap.prev)
3031 trace_xe_vma_bind(op->remap.prev);
3032 if (op->remap.next)
3033 trace_xe_vma_bind(op->remap.next);
3034 break;
3035 case DRM_GPUVA_OP_UNMAP:
3036 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
3037 break;
3038 case DRM_GPUVA_OP_PREFETCH:
3039 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
3040 break;
3041 case DRM_GPUVA_OP_DRIVER:
3042 break;
3043 default:
3044 XE_WARN_ON("NOT POSSIBLE");
3045 }
3046 }
3047
trace_xe_vm_ops_execute(struct xe_vma_ops * vops)3048 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
3049 {
3050 struct xe_vma_op *op;
3051
3052 list_for_each_entry(op, &vops->list, link)
3053 op_trace(op);
3054 }
3055
vm_ops_setup_tile_args(struct xe_vm * vm,struct xe_vma_ops * vops)3056 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
3057 {
3058 struct xe_exec_queue *q = vops->q;
3059 struct xe_tile *tile;
3060 int number_tiles = 0;
3061 u8 id;
3062
3063 for_each_tile(tile, vm->xe, id) {
3064 if (vops->pt_update_ops[id].num_ops)
3065 ++number_tiles;
3066
3067 if (vops->pt_update_ops[id].q)
3068 continue;
3069
3070 if (q) {
3071 vops->pt_update_ops[id].q = q;
3072 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
3073 q = list_next_entry(q, multi_gt_list);
3074 } else {
3075 vops->pt_update_ops[id].q = vm->q[id];
3076 }
3077 }
3078
3079 return number_tiles;
3080 }
3081
ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3082 static struct dma_fence *ops_execute(struct xe_vm *vm,
3083 struct xe_vma_ops *vops)
3084 {
3085 struct xe_tile *tile;
3086 struct dma_fence *fence = NULL;
3087 struct dma_fence **fences = NULL;
3088 struct dma_fence_array *cf = NULL;
3089 int number_tiles = 0, current_fence = 0, err;
3090 u8 id;
3091
3092 number_tiles = vm_ops_setup_tile_args(vm, vops);
3093 if (number_tiles == 0)
3094 return ERR_PTR(-ENODATA);
3095
3096 if (number_tiles > 1) {
3097 fences = kmalloc_array(number_tiles, sizeof(*fences),
3098 GFP_KERNEL);
3099 if (!fences) {
3100 fence = ERR_PTR(-ENOMEM);
3101 goto err_trace;
3102 }
3103 }
3104
3105 for_each_tile(tile, vm->xe, id) {
3106 if (!vops->pt_update_ops[id].num_ops)
3107 continue;
3108
3109 err = xe_pt_update_ops_prepare(tile, vops);
3110 if (err) {
3111 fence = ERR_PTR(err);
3112 goto err_out;
3113 }
3114 }
3115
3116 trace_xe_vm_ops_execute(vops);
3117
3118 for_each_tile(tile, vm->xe, id) {
3119 if (!vops->pt_update_ops[id].num_ops)
3120 continue;
3121
3122 fence = xe_pt_update_ops_run(tile, vops);
3123 if (IS_ERR(fence))
3124 goto err_out;
3125
3126 if (fences)
3127 fences[current_fence++] = fence;
3128 }
3129
3130 if (fences) {
3131 cf = dma_fence_array_create(number_tiles, fences,
3132 vm->composite_fence_ctx,
3133 vm->composite_fence_seqno++,
3134 false);
3135 if (!cf) {
3136 --vm->composite_fence_seqno;
3137 fence = ERR_PTR(-ENOMEM);
3138 goto err_out;
3139 }
3140 fence = &cf->base;
3141 }
3142
3143 for_each_tile(tile, vm->xe, id) {
3144 if (!vops->pt_update_ops[id].num_ops)
3145 continue;
3146
3147 xe_pt_update_ops_fini(tile, vops);
3148 }
3149
3150 return fence;
3151
3152 err_out:
3153 for_each_tile(tile, vm->xe, id) {
3154 if (!vops->pt_update_ops[id].num_ops)
3155 continue;
3156
3157 xe_pt_update_ops_abort(tile, vops);
3158 }
3159 while (current_fence)
3160 dma_fence_put(fences[--current_fence]);
3161 kfree(fences);
3162 kfree(cf);
3163
3164 err_trace:
3165 trace_xe_vm_ops_fail(vm);
3166 return fence;
3167 }
3168
vma_add_ufence(struct xe_vma * vma,struct xe_user_fence * ufence)3169 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
3170 {
3171 if (vma->ufence)
3172 xe_sync_ufence_put(vma->ufence);
3173 vma->ufence = __xe_sync_ufence_get(ufence);
3174 }
3175
op_add_ufence(struct xe_vm * vm,struct xe_vma_op * op,struct xe_user_fence * ufence)3176 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
3177 struct xe_user_fence *ufence)
3178 {
3179 switch (op->base.op) {
3180 case DRM_GPUVA_OP_MAP:
3181 vma_add_ufence(op->map.vma, ufence);
3182 break;
3183 case DRM_GPUVA_OP_REMAP:
3184 if (op->remap.prev)
3185 vma_add_ufence(op->remap.prev, ufence);
3186 if (op->remap.next)
3187 vma_add_ufence(op->remap.next, ufence);
3188 break;
3189 case DRM_GPUVA_OP_UNMAP:
3190 break;
3191 case DRM_GPUVA_OP_PREFETCH:
3192 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
3193 break;
3194 default:
3195 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
3196 }
3197 }
3198
vm_bind_ioctl_ops_fini(struct xe_vm * vm,struct xe_vma_ops * vops,struct dma_fence * fence)3199 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
3200 struct dma_fence *fence)
3201 {
3202 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
3203 struct xe_user_fence *ufence;
3204 struct xe_vma_op *op;
3205 int i;
3206
3207 ufence = find_ufence_get(vops->syncs, vops->num_syncs);
3208 list_for_each_entry(op, &vops->list, link) {
3209 if (ufence)
3210 op_add_ufence(vm, op, ufence);
3211
3212 if (op->base.op == DRM_GPUVA_OP_UNMAP)
3213 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
3214 else if (op->base.op == DRM_GPUVA_OP_REMAP)
3215 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
3216 fence);
3217 }
3218 if (ufence)
3219 xe_sync_ufence_put(ufence);
3220 if (fence) {
3221 for (i = 0; i < vops->num_syncs; i++)
3222 xe_sync_entry_signal(vops->syncs + i, fence);
3223 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
3224 }
3225 }
3226
vm_bind_ioctl_ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3227 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
3228 struct xe_vma_ops *vops)
3229 {
3230 struct xe_validation_ctx ctx;
3231 struct drm_exec exec;
3232 struct dma_fence *fence;
3233 int err = 0;
3234
3235 lockdep_assert_held_write(&vm->lock);
3236
3237 xe_validation_guard(&ctx, &vm->xe->val, &exec,
3238 ((struct xe_val_flags) {
3239 .interruptible = true,
3240 .exec_ignore_duplicates = true,
3241 }), err) {
3242 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
3243 drm_exec_retry_on_contention(&exec);
3244 xe_validation_retry_on_oom(&ctx, &err);
3245 if (err)
3246 return ERR_PTR(err);
3247
3248 xe_vm_set_validation_exec(vm, &exec);
3249 fence = ops_execute(vm, vops);
3250 xe_vm_set_validation_exec(vm, NULL);
3251 if (IS_ERR(fence)) {
3252 if (PTR_ERR(fence) == -ENODATA)
3253 vm_bind_ioctl_ops_fini(vm, vops, NULL);
3254 return fence;
3255 }
3256
3257 vm_bind_ioctl_ops_fini(vm, vops, fence);
3258 }
3259
3260 return err ? ERR_PTR(err) : fence;
3261 }
3262 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
3263
3264 #define SUPPORTED_FLAGS_STUB \
3265 (DRM_XE_VM_BIND_FLAG_READONLY | \
3266 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
3267 DRM_XE_VM_BIND_FLAG_NULL | \
3268 DRM_XE_VM_BIND_FLAG_DUMPABLE | \
3269 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \
3270 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR)
3271
3272 #ifdef TEST_VM_OPS_ERROR
3273 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
3274 #else
3275 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB
3276 #endif
3277
3278 #define XE_64K_PAGE_MASK 0xffffull
3279 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
3280
vm_bind_ioctl_check_args(struct xe_device * xe,struct xe_vm * vm,struct drm_xe_vm_bind * args,struct drm_xe_vm_bind_op ** bind_ops)3281 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
3282 struct drm_xe_vm_bind *args,
3283 struct drm_xe_vm_bind_op **bind_ops)
3284 {
3285 int err;
3286 int i;
3287
3288 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
3289 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3290 return -EINVAL;
3291
3292 if (XE_IOCTL_DBG(xe, args->extensions))
3293 return -EINVAL;
3294
3295 if (args->num_binds > 1) {
3296 u64 __user *bind_user =
3297 u64_to_user_ptr(args->vector_of_binds);
3298
3299 *bind_ops = kvmalloc_array(args->num_binds,
3300 sizeof(struct drm_xe_vm_bind_op),
3301 GFP_KERNEL | __GFP_ACCOUNT |
3302 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3303 if (!*bind_ops)
3304 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
3305
3306 err = copy_from_user(*bind_ops, bind_user,
3307 sizeof(struct drm_xe_vm_bind_op) *
3308 args->num_binds);
3309 if (XE_IOCTL_DBG(xe, err)) {
3310 err = -EFAULT;
3311 goto free_bind_ops;
3312 }
3313 } else {
3314 *bind_ops = &args->bind;
3315 }
3316
3317 for (i = 0; i < args->num_binds; ++i) {
3318 u64 range = (*bind_ops)[i].range;
3319 u64 addr = (*bind_ops)[i].addr;
3320 u32 op = (*bind_ops)[i].op;
3321 u32 flags = (*bind_ops)[i].flags;
3322 u32 obj = (*bind_ops)[i].obj;
3323 u64 obj_offset = (*bind_ops)[i].obj_offset;
3324 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
3325 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
3326 bool is_cpu_addr_mirror = flags &
3327 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
3328 u16 pat_index = (*bind_ops)[i].pat_index;
3329 u16 coh_mode;
3330
3331 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror &&
3332 (!xe_vm_in_fault_mode(vm) ||
3333 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) {
3334 err = -EINVAL;
3335 goto free_bind_ops;
3336 }
3337
3338 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
3339 err = -EINVAL;
3340 goto free_bind_ops;
3341 }
3342
3343 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
3344 (*bind_ops)[i].pat_index = pat_index;
3345 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3346 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
3347 err = -EINVAL;
3348 goto free_bind_ops;
3349 }
3350
3351 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
3352 err = -EINVAL;
3353 goto free_bind_ops;
3354 }
3355
3356 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
3357 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
3358 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) ||
3359 XE_IOCTL_DBG(xe, obj_offset && (is_null ||
3360 is_cpu_addr_mirror)) ||
3361 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
3362 (is_null || is_cpu_addr_mirror)) ||
3363 XE_IOCTL_DBG(xe, !obj &&
3364 op == DRM_XE_VM_BIND_OP_MAP &&
3365 !is_null && !is_cpu_addr_mirror) ||
3366 XE_IOCTL_DBG(xe, !obj &&
3367 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3368 XE_IOCTL_DBG(xe, addr &&
3369 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3370 XE_IOCTL_DBG(xe, range &&
3371 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3372 XE_IOCTL_DBG(xe, obj &&
3373 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3374 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3375 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3376 XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR &&
3377 !IS_ENABLED(CONFIG_DRM_GPUSVM)) ||
3378 XE_IOCTL_DBG(xe, obj &&
3379 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
3380 XE_IOCTL_DBG(xe, prefetch_region &&
3381 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
3382 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC &&
3383 !(BIT(prefetch_region) & xe->info.mem_region_mask))) ||
3384 XE_IOCTL_DBG(xe, obj &&
3385 op == DRM_XE_VM_BIND_OP_UNMAP)) {
3386 err = -EINVAL;
3387 goto free_bind_ops;
3388 }
3389
3390 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
3391 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
3392 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
3393 XE_IOCTL_DBG(xe, !range &&
3394 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
3395 err = -EINVAL;
3396 goto free_bind_ops;
3397 }
3398 }
3399
3400 return 0;
3401
3402 free_bind_ops:
3403 if (args->num_binds > 1)
3404 kvfree(*bind_ops);
3405 *bind_ops = NULL;
3406 return err;
3407 }
3408
vm_bind_ioctl_signal_fences(struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,int num_syncs)3409 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
3410 struct xe_exec_queue *q,
3411 struct xe_sync_entry *syncs,
3412 int num_syncs)
3413 {
3414 struct dma_fence *fence;
3415 int i, err = 0;
3416
3417 fence = xe_sync_in_fence_get(syncs, num_syncs,
3418 to_wait_exec_queue(vm, q), vm);
3419 if (IS_ERR(fence))
3420 return PTR_ERR(fence);
3421
3422 for (i = 0; i < num_syncs; i++)
3423 xe_sync_entry_signal(&syncs[i], fence);
3424
3425 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
3426 fence);
3427 dma_fence_put(fence);
3428
3429 return err;
3430 }
3431
xe_vma_ops_init(struct xe_vma_ops * vops,struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,u32 num_syncs)3432 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
3433 struct xe_exec_queue *q,
3434 struct xe_sync_entry *syncs, u32 num_syncs)
3435 {
3436 memset(vops, 0, sizeof(*vops));
3437 INIT_LIST_HEAD(&vops->list);
3438 vops->vm = vm;
3439 vops->q = q;
3440 vops->syncs = syncs;
3441 vops->num_syncs = num_syncs;
3442 vops->flags = 0;
3443 }
3444
xe_vm_bind_ioctl_validate_bo(struct xe_device * xe,struct xe_bo * bo,u64 addr,u64 range,u64 obj_offset,u16 pat_index,u32 op,u32 bind_flags)3445 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
3446 u64 addr, u64 range, u64 obj_offset,
3447 u16 pat_index, u32 op, u32 bind_flags)
3448 {
3449 u16 coh_mode;
3450
3451 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) ||
3452 XE_IOCTL_DBG(xe, obj_offset >
3453 xe_bo_size(bo) - range)) {
3454 return -EINVAL;
3455 }
3456
3457 /*
3458 * Some platforms require 64k VM_BIND alignment,
3459 * specifically those with XE_VRAM_FLAGS_NEED64K.
3460 *
3461 * Other platforms may have BO's set to 64k physical placement,
3462 * but can be mapped at 4k offsets anyway. This check is only
3463 * there for the former case.
3464 */
3465 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
3466 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
3467 if (XE_IOCTL_DBG(xe, obj_offset &
3468 XE_64K_PAGE_MASK) ||
3469 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
3470 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3471 return -EINVAL;
3472 }
3473 }
3474
3475 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3476 if (bo->cpu_caching) {
3477 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3478 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
3479 return -EINVAL;
3480 }
3481 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
3482 /*
3483 * Imported dma-buf from a different device should
3484 * require 1way or 2way coherency since we don't know
3485 * how it was mapped on the CPU. Just assume is it
3486 * potentially cached on CPU side.
3487 */
3488 return -EINVAL;
3489 }
3490
3491 /* If a BO is protected it can only be mapped if the key is still valid */
3492 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) &&
3493 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL)
3494 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0))
3495 return -ENOEXEC;
3496
3497 return 0;
3498 }
3499
xe_vm_bind_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3500 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3501 {
3502 struct xe_device *xe = to_xe_device(dev);
3503 struct xe_file *xef = to_xe_file(file);
3504 struct drm_xe_vm_bind *args = data;
3505 struct drm_xe_sync __user *syncs_user;
3506 struct xe_bo **bos = NULL;
3507 struct drm_gpuva_ops **ops = NULL;
3508 struct xe_vm *vm;
3509 struct xe_exec_queue *q = NULL;
3510 u32 num_syncs, num_ufence = 0;
3511 struct xe_sync_entry *syncs = NULL;
3512 struct drm_xe_vm_bind_op *bind_ops = NULL;
3513 struct xe_vma_ops vops;
3514 struct dma_fence *fence;
3515 int err;
3516 int i;
3517
3518 vm = xe_vm_lookup(xef, args->vm_id);
3519 if (XE_IOCTL_DBG(xe, !vm))
3520 return -EINVAL;
3521
3522 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops);
3523 if (err)
3524 goto put_vm;
3525
3526 if (args->exec_queue_id) {
3527 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
3528 if (XE_IOCTL_DBG(xe, !q)) {
3529 err = -ENOENT;
3530 goto free_bind_ops;
3531 }
3532
3533 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3534 err = -EINVAL;
3535 goto put_exec_queue;
3536 }
3537 }
3538
3539 /* Ensure all UNMAPs visible */
3540 xe_svm_flush(vm);
3541
3542 err = down_write_killable(&vm->lock);
3543 if (err)
3544 goto put_exec_queue;
3545
3546 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3547 err = -ENOENT;
3548 goto release_vm_lock;
3549 }
3550
3551 for (i = 0; i < args->num_binds; ++i) {
3552 u64 range = bind_ops[i].range;
3553 u64 addr = bind_ops[i].addr;
3554
3555 if (XE_IOCTL_DBG(xe, range > vm->size) ||
3556 XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3557 err = -EINVAL;
3558 goto release_vm_lock;
3559 }
3560 }
3561
3562 if (args->num_binds) {
3563 bos = kvcalloc(args->num_binds, sizeof(*bos),
3564 GFP_KERNEL | __GFP_ACCOUNT |
3565 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3566 if (!bos) {
3567 err = -ENOMEM;
3568 goto release_vm_lock;
3569 }
3570
3571 ops = kvcalloc(args->num_binds, sizeof(*ops),
3572 GFP_KERNEL | __GFP_ACCOUNT |
3573 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3574 if (!ops) {
3575 err = -ENOMEM;
3576 goto free_bos;
3577 }
3578 }
3579
3580 for (i = 0; i < args->num_binds; ++i) {
3581 struct drm_gem_object *gem_obj;
3582 u64 range = bind_ops[i].range;
3583 u64 addr = bind_ops[i].addr;
3584 u32 obj = bind_ops[i].obj;
3585 u64 obj_offset = bind_ops[i].obj_offset;
3586 u16 pat_index = bind_ops[i].pat_index;
3587 u32 op = bind_ops[i].op;
3588 u32 bind_flags = bind_ops[i].flags;
3589
3590 if (!obj)
3591 continue;
3592
3593 gem_obj = drm_gem_object_lookup(file, obj);
3594 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3595 err = -ENOENT;
3596 goto put_obj;
3597 }
3598 bos[i] = gem_to_xe_bo(gem_obj);
3599
3600 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
3601 obj_offset, pat_index, op,
3602 bind_flags);
3603 if (err)
3604 goto put_obj;
3605 }
3606
3607 if (args->num_syncs) {
3608 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3609 if (!syncs) {
3610 err = -ENOMEM;
3611 goto put_obj;
3612 }
3613 }
3614
3615 syncs_user = u64_to_user_ptr(args->syncs);
3616 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3617 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3618 &syncs_user[num_syncs],
3619 (xe_vm_in_lr_mode(vm) ?
3620 SYNC_PARSE_FLAG_LR_MODE : 0) |
3621 (!args->num_binds ?
3622 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3623 if (err)
3624 goto free_syncs;
3625
3626 if (xe_sync_is_ufence(&syncs[num_syncs]))
3627 num_ufence++;
3628 }
3629
3630 if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3631 err = -EINVAL;
3632 goto free_syncs;
3633 }
3634
3635 if (!args->num_binds) {
3636 err = -ENODATA;
3637 goto free_syncs;
3638 }
3639
3640 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
3641 for (i = 0; i < args->num_binds; ++i) {
3642 u64 range = bind_ops[i].range;
3643 u64 addr = bind_ops[i].addr;
3644 u32 op = bind_ops[i].op;
3645 u32 flags = bind_ops[i].flags;
3646 u64 obj_offset = bind_ops[i].obj_offset;
3647 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3648 u16 pat_index = bind_ops[i].pat_index;
3649
3650 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset,
3651 addr, range, op, flags,
3652 prefetch_region, pat_index);
3653 if (IS_ERR(ops[i])) {
3654 err = PTR_ERR(ops[i]);
3655 ops[i] = NULL;
3656 goto unwind_ops;
3657 }
3658
3659 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
3660 if (err)
3661 goto unwind_ops;
3662
3663 #ifdef TEST_VM_OPS_ERROR
3664 if (flags & FORCE_OP_ERROR) {
3665 vops.inject_error = true;
3666 vm->xe->vm_inject_error_position =
3667 (vm->xe->vm_inject_error_position + 1) %
3668 FORCE_OP_ERROR_COUNT;
3669 }
3670 #endif
3671 }
3672
3673 /* Nothing to do */
3674 if (list_empty(&vops.list)) {
3675 err = -ENODATA;
3676 goto unwind_ops;
3677 }
3678
3679 err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
3680 if (err)
3681 goto unwind_ops;
3682
3683 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops);
3684 if (err)
3685 goto unwind_ops;
3686
3687 fence = vm_bind_ioctl_ops_execute(vm, &vops);
3688 if (IS_ERR(fence))
3689 err = PTR_ERR(fence);
3690 else
3691 dma_fence_put(fence);
3692
3693 unwind_ops:
3694 if (err && err != -ENODATA)
3695 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3696 xe_vma_ops_fini(&vops);
3697 for (i = args->num_binds - 1; i >= 0; --i)
3698 if (ops[i])
3699 drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
3700 free_syncs:
3701 if (err == -ENODATA)
3702 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3703 while (num_syncs--)
3704 xe_sync_entry_cleanup(&syncs[num_syncs]);
3705
3706 kfree(syncs);
3707 put_obj:
3708 for (i = 0; i < args->num_binds; ++i)
3709 xe_bo_put(bos[i]);
3710
3711 kvfree(ops);
3712 free_bos:
3713 kvfree(bos);
3714 release_vm_lock:
3715 up_write(&vm->lock);
3716 put_exec_queue:
3717 if (q)
3718 xe_exec_queue_put(q);
3719 free_bind_ops:
3720 if (args->num_binds > 1)
3721 kvfree(bind_ops);
3722 put_vm:
3723 xe_vm_put(vm);
3724 return err;
3725 }
3726
3727 /**
3728 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM
3729 * @vm: VM to bind the BO to
3730 * @bo: BO to bind
3731 * @q: exec queue to use for the bind (optional)
3732 * @addr: address at which to bind the BO
3733 * @cache_lvl: PAT cache level to use
3734 *
3735 * Execute a VM bind map operation on a kernel-owned BO to bind it into a
3736 * kernel-owned VM.
3737 *
3738 * Returns a dma_fence to track the binding completion if the job to do so was
3739 * successfully submitted, an error pointer otherwise.
3740 */
xe_vm_bind_kernel_bo(struct xe_vm * vm,struct xe_bo * bo,struct xe_exec_queue * q,u64 addr,enum xe_cache_level cache_lvl)3741 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
3742 struct xe_exec_queue *q, u64 addr,
3743 enum xe_cache_level cache_lvl)
3744 {
3745 struct xe_vma_ops vops;
3746 struct drm_gpuva_ops *ops = NULL;
3747 struct dma_fence *fence;
3748 int err;
3749
3750 xe_bo_get(bo);
3751 xe_vm_get(vm);
3752 if (q)
3753 xe_exec_queue_get(q);
3754
3755 down_write(&vm->lock);
3756
3757 xe_vma_ops_init(&vops, vm, q, NULL, 0);
3758
3759 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo),
3760 DRM_XE_VM_BIND_OP_MAP, 0, 0,
3761 vm->xe->pat.idx[cache_lvl]);
3762 if (IS_ERR(ops)) {
3763 err = PTR_ERR(ops);
3764 goto release_vm_lock;
3765 }
3766
3767 err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
3768 if (err)
3769 goto release_vm_lock;
3770
3771 xe_assert(vm->xe, !list_empty(&vops.list));
3772
3773 err = xe_vma_ops_alloc(&vops, false);
3774 if (err)
3775 goto unwind_ops;
3776
3777 fence = vm_bind_ioctl_ops_execute(vm, &vops);
3778 if (IS_ERR(fence))
3779 err = PTR_ERR(fence);
3780
3781 unwind_ops:
3782 if (err && err != -ENODATA)
3783 vm_bind_ioctl_ops_unwind(vm, &ops, 1);
3784
3785 xe_vma_ops_fini(&vops);
3786 drm_gpuva_ops_free(&vm->gpuvm, ops);
3787
3788 release_vm_lock:
3789 up_write(&vm->lock);
3790
3791 if (q)
3792 xe_exec_queue_put(q);
3793 xe_vm_put(vm);
3794 xe_bo_put(bo);
3795
3796 if (err)
3797 fence = ERR_PTR(err);
3798
3799 return fence;
3800 }
3801
3802 /**
3803 * xe_vm_lock() - Lock the vm's dma_resv object
3804 * @vm: The struct xe_vm whose lock is to be locked
3805 * @intr: Whether to perform any wait interruptible
3806 *
3807 * Return: 0 on success, -EINTR if @intr is true and the wait for a
3808 * contended lock was interrupted. If @intr is false, the function
3809 * always returns 0.
3810 */
xe_vm_lock(struct xe_vm * vm,bool intr)3811 int xe_vm_lock(struct xe_vm *vm, bool intr)
3812 {
3813 int ret;
3814
3815 if (intr)
3816 ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3817 else
3818 ret = dma_resv_lock(xe_vm_resv(vm), NULL);
3819
3820 return ret;
3821 }
3822
3823 /**
3824 * xe_vm_unlock() - Unlock the vm's dma_resv object
3825 * @vm: The struct xe_vm whose lock is to be released.
3826 *
3827 * Unlock a buffer object lock that was locked by xe_vm_lock().
3828 */
xe_vm_unlock(struct xe_vm * vm)3829 void xe_vm_unlock(struct xe_vm *vm)
3830 {
3831 dma_resv_unlock(xe_vm_resv(vm));
3832 }
3833
3834 /**
3835 * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an
3836 * address range
3837 * @vm: The VM
3838 * @start: start address
3839 * @end: end address
3840 * @tile_mask: mask for which gt's issue tlb invalidation
3841 *
3842 * Issue a range based TLB invalidation for gt's in tilemask
3843 *
3844 * Returns 0 for success, negative error code otherwise.
3845 */
xe_vm_range_tilemask_tlb_inval(struct xe_vm * vm,u64 start,u64 end,u8 tile_mask)3846 int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
3847 u64 end, u8 tile_mask)
3848 {
3849 struct xe_tlb_inval_fence
3850 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
3851 struct xe_tile *tile;
3852 u32 fence_id = 0;
3853 u8 id;
3854 int err;
3855
3856 if (!tile_mask)
3857 return 0;
3858
3859 for_each_tile(tile, vm->xe, id) {
3860 if (!(tile_mask & BIT(id)))
3861 continue;
3862
3863 xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval,
3864 &fence[fence_id], true);
3865
3866 err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval,
3867 &fence[fence_id], start, end,
3868 vm->usm.asid);
3869 if (err)
3870 goto wait;
3871 ++fence_id;
3872
3873 if (!tile->media_gt)
3874 continue;
3875
3876 xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval,
3877 &fence[fence_id], true);
3878
3879 err = xe_tlb_inval_range(&tile->media_gt->tlb_inval,
3880 &fence[fence_id], start, end,
3881 vm->usm.asid);
3882 if (err)
3883 goto wait;
3884 ++fence_id;
3885 }
3886
3887 wait:
3888 for (id = 0; id < fence_id; ++id)
3889 xe_tlb_inval_fence_wait(&fence[id]);
3890
3891 return err;
3892 }
3893
3894 /**
3895 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3896 * @vma: VMA to invalidate
3897 *
3898 * Walks a list of page tables leaves which it memset the entries owned by this
3899 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3900 * complete.
3901 *
3902 * Returns 0 for success, negative error code otherwise.
3903 */
xe_vm_invalidate_vma(struct xe_vma * vma)3904 int xe_vm_invalidate_vma(struct xe_vma *vma)
3905 {
3906 struct xe_device *xe = xe_vma_vm(vma)->xe;
3907 struct xe_vm *vm = xe_vma_vm(vma);
3908 struct xe_tile *tile;
3909 u8 tile_mask = 0;
3910 int ret = 0;
3911 u8 id;
3912
3913 xe_assert(xe, !xe_vma_is_null(vma));
3914 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma));
3915 trace_xe_vma_invalidate(vma);
3916
3917 vm_dbg(&vm->xe->drm,
3918 "INVALIDATE: addr=0x%016llx, range=0x%016llx",
3919 xe_vma_start(vma), xe_vma_size(vma));
3920
3921 /*
3922 * Check that we don't race with page-table updates, tile_invalidated
3923 * update is safe
3924 */
3925 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3926 if (xe_vma_is_userptr(vma)) {
3927 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) ||
3928 (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) &&
3929 lockdep_is_held(&xe_vm_resv(vm)->lock.base)));
3930
3931 WARN_ON_ONCE(!mmu_interval_check_retry
3932 (&to_userptr_vma(vma)->userptr.notifier,
3933 to_userptr_vma(vma)->userptr.pages.notifier_seq));
3934 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm),
3935 DMA_RESV_USAGE_BOOKKEEP));
3936
3937 } else {
3938 xe_bo_assert_held(xe_vma_bo(vma));
3939 }
3940 }
3941
3942 for_each_tile(tile, xe, id)
3943 if (xe_pt_zap_ptes(tile, vma))
3944 tile_mask |= BIT(id);
3945
3946 xe_device_wmb(xe);
3947
3948 ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma),
3949 xe_vma_end(vma), tile_mask);
3950
3951 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
3952 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask);
3953
3954 return ret;
3955 }
3956
xe_vm_validate_protected(struct xe_vm * vm)3957 int xe_vm_validate_protected(struct xe_vm *vm)
3958 {
3959 struct drm_gpuva *gpuva;
3960 int err = 0;
3961
3962 if (!vm)
3963 return -ENODEV;
3964
3965 mutex_lock(&vm->snap_mutex);
3966
3967 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3968 struct xe_vma *vma = gpuva_to_vma(gpuva);
3969 struct xe_bo *bo = vma->gpuva.gem.obj ?
3970 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
3971
3972 if (!bo)
3973 continue;
3974
3975 if (xe_bo_is_protected(bo)) {
3976 err = xe_pxp_bo_key_check(vm->xe->pxp, bo);
3977 if (err)
3978 break;
3979 }
3980 }
3981
3982 mutex_unlock(&vm->snap_mutex);
3983 return err;
3984 }
3985
3986 struct xe_vm_snapshot {
3987 unsigned long num_snaps;
3988 struct {
3989 u64 ofs, bo_ofs;
3990 unsigned long len;
3991 struct xe_bo *bo;
3992 void *data;
3993 struct mm_struct *mm;
3994 } snap[];
3995 };
3996
xe_vm_snapshot_capture(struct xe_vm * vm)3997 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
3998 {
3999 unsigned long num_snaps = 0, i;
4000 struct xe_vm_snapshot *snap = NULL;
4001 struct drm_gpuva *gpuva;
4002
4003 if (!vm)
4004 return NULL;
4005
4006 mutex_lock(&vm->snap_mutex);
4007 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4008 if (gpuva->flags & XE_VMA_DUMPABLE)
4009 num_snaps++;
4010 }
4011
4012 if (num_snaps)
4013 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
4014 if (!snap) {
4015 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
4016 goto out_unlock;
4017 }
4018
4019 snap->num_snaps = num_snaps;
4020 i = 0;
4021 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4022 struct xe_vma *vma = gpuva_to_vma(gpuva);
4023 struct xe_bo *bo = vma->gpuva.gem.obj ?
4024 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4025
4026 if (!(gpuva->flags & XE_VMA_DUMPABLE))
4027 continue;
4028
4029 snap->snap[i].ofs = xe_vma_start(vma);
4030 snap->snap[i].len = xe_vma_size(vma);
4031 if (bo) {
4032 snap->snap[i].bo = xe_bo_get(bo);
4033 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
4034 } else if (xe_vma_is_userptr(vma)) {
4035 struct mm_struct *mm =
4036 to_userptr_vma(vma)->userptr.notifier.mm;
4037
4038 if (mmget_not_zero(mm))
4039 snap->snap[i].mm = mm;
4040 else
4041 snap->snap[i].data = ERR_PTR(-EFAULT);
4042
4043 snap->snap[i].bo_ofs = xe_vma_userptr(vma);
4044 } else {
4045 snap->snap[i].data = ERR_PTR(-ENOENT);
4046 }
4047 i++;
4048 }
4049
4050 out_unlock:
4051 mutex_unlock(&vm->snap_mutex);
4052 return snap;
4053 }
4054
xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot * snap)4055 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
4056 {
4057 if (IS_ERR_OR_NULL(snap))
4058 return;
4059
4060 for (int i = 0; i < snap->num_snaps; i++) {
4061 struct xe_bo *bo = snap->snap[i].bo;
4062 int err;
4063
4064 if (IS_ERR(snap->snap[i].data))
4065 continue;
4066
4067 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
4068 if (!snap->snap[i].data) {
4069 snap->snap[i].data = ERR_PTR(-ENOMEM);
4070 goto cleanup_bo;
4071 }
4072
4073 if (bo) {
4074 err = xe_bo_read(bo, snap->snap[i].bo_ofs,
4075 snap->snap[i].data, snap->snap[i].len);
4076 } else {
4077 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
4078
4079 kthread_use_mm(snap->snap[i].mm);
4080 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
4081 err = 0;
4082 else
4083 err = -EFAULT;
4084 kthread_unuse_mm(snap->snap[i].mm);
4085
4086 mmput(snap->snap[i].mm);
4087 snap->snap[i].mm = NULL;
4088 }
4089
4090 if (err) {
4091 kvfree(snap->snap[i].data);
4092 snap->snap[i].data = ERR_PTR(err);
4093 }
4094
4095 cleanup_bo:
4096 xe_bo_put(bo);
4097 snap->snap[i].bo = NULL;
4098 }
4099 }
4100
xe_vm_snapshot_print(struct xe_vm_snapshot * snap,struct drm_printer * p)4101 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
4102 {
4103 unsigned long i, j;
4104
4105 if (IS_ERR_OR_NULL(snap)) {
4106 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
4107 return;
4108 }
4109
4110 for (i = 0; i < snap->num_snaps; i++) {
4111 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
4112
4113 if (IS_ERR(snap->snap[i].data)) {
4114 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
4115 PTR_ERR(snap->snap[i].data));
4116 continue;
4117 }
4118
4119 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
4120
4121 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
4122 u32 *val = snap->snap[i].data + j;
4123 char dumped[ASCII85_BUFSZ];
4124
4125 drm_puts(p, ascii85_encode(*val, dumped));
4126 }
4127
4128 drm_puts(p, "\n");
4129
4130 if (drm_coredump_printer_is_full(p))
4131 return;
4132 }
4133 }
4134
xe_vm_snapshot_free(struct xe_vm_snapshot * snap)4135 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
4136 {
4137 unsigned long i;
4138
4139 if (IS_ERR_OR_NULL(snap))
4140 return;
4141
4142 for (i = 0; i < snap->num_snaps; i++) {
4143 if (!IS_ERR(snap->snap[i].data))
4144 kvfree(snap->snap[i].data);
4145 xe_bo_put(snap->snap[i].bo);
4146 if (snap->snap[i].mm)
4147 mmput(snap->snap[i].mm);
4148 }
4149 kvfree(snap);
4150 }
4151
4152 /**
4153 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations
4154 * @xe: Pointer to the XE device structure
4155 * @vma: Pointer to the virtual memory area (VMA) structure
4156 * @is_atomic: In pagefault path and atomic operation
4157 *
4158 * This function determines whether the given VMA needs to be migrated to
4159 * VRAM in order to do atomic GPU operation.
4160 *
4161 * Return:
4162 * 1 - Migration to VRAM is required
4163 * 0 - Migration is not required
4164 * -EACCES - Invalid access for atomic memory attr
4165 *
4166 */
xe_vma_need_vram_for_atomic(struct xe_device * xe,struct xe_vma * vma,bool is_atomic)4167 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic)
4168 {
4169 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access :
4170 vma->attr.atomic_access;
4171
4172 if (!IS_DGFX(xe) || !is_atomic)
4173 return false;
4174
4175 /*
4176 * NOTE: The checks implemented here are platform-specific. For
4177 * instance, on a device supporting CXL atomics, these would ideally
4178 * work universally without additional handling.
4179 */
4180 switch (atomic_access) {
4181 case DRM_XE_ATOMIC_DEVICE:
4182 return !xe->info.has_device_atomics_on_smem;
4183
4184 case DRM_XE_ATOMIC_CPU:
4185 return -EACCES;
4186
4187 case DRM_XE_ATOMIC_UNDEFINED:
4188 case DRM_XE_ATOMIC_GLOBAL:
4189 default:
4190 return 1;
4191 }
4192 }
4193
xe_vm_alloc_vma(struct xe_vm * vm,struct drm_gpuvm_map_req * map_req,bool is_madvise)4194 static int xe_vm_alloc_vma(struct xe_vm *vm,
4195 struct drm_gpuvm_map_req *map_req,
4196 bool is_madvise)
4197 {
4198 struct xe_vma_ops vops;
4199 struct drm_gpuva_ops *ops = NULL;
4200 struct drm_gpuva_op *__op;
4201 bool is_cpu_addr_mirror = false;
4202 bool remap_op = false;
4203 struct xe_vma_mem_attr tmp_attr;
4204 u16 default_pat;
4205 int err;
4206
4207 lockdep_assert_held_write(&vm->lock);
4208
4209 if (is_madvise)
4210 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req);
4211 else
4212 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req);
4213
4214 if (IS_ERR(ops))
4215 return PTR_ERR(ops);
4216
4217 if (list_empty(&ops->list)) {
4218 err = 0;
4219 goto free_ops;
4220 }
4221
4222 drm_gpuva_for_each_op(__op, ops) {
4223 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
4224 struct xe_vma *vma = NULL;
4225
4226 if (!is_madvise) {
4227 if (__op->op == DRM_GPUVA_OP_UNMAP) {
4228 vma = gpuva_to_vma(op->base.unmap.va);
4229 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma));
4230 default_pat = vma->attr.default_pat_index;
4231 }
4232
4233 if (__op->op == DRM_GPUVA_OP_REMAP) {
4234 vma = gpuva_to_vma(op->base.remap.unmap->va);
4235 default_pat = vma->attr.default_pat_index;
4236 }
4237
4238 if (__op->op == DRM_GPUVA_OP_MAP) {
4239 op->map.is_cpu_addr_mirror = true;
4240 op->map.pat_index = default_pat;
4241 }
4242 } else {
4243 if (__op->op == DRM_GPUVA_OP_REMAP) {
4244 vma = gpuva_to_vma(op->base.remap.unmap->va);
4245 xe_assert(vm->xe, !remap_op);
4246 xe_assert(vm->xe, xe_vma_has_no_bo(vma));
4247 remap_op = true;
4248
4249 if (xe_vma_is_cpu_addr_mirror(vma))
4250 is_cpu_addr_mirror = true;
4251 else
4252 is_cpu_addr_mirror = false;
4253 }
4254
4255 if (__op->op == DRM_GPUVA_OP_MAP) {
4256 xe_assert(vm->xe, remap_op);
4257 remap_op = false;
4258 /*
4259 * In case of madvise ops DRM_GPUVA_OP_MAP is
4260 * always after DRM_GPUVA_OP_REMAP, so ensure
4261 * we assign op->map.is_cpu_addr_mirror true
4262 * if REMAP is for xe_vma_is_cpu_addr_mirror vma
4263 */
4264 op->map.is_cpu_addr_mirror = is_cpu_addr_mirror;
4265 }
4266 }
4267 print_op(vm->xe, __op);
4268 }
4269
4270 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
4271
4272 if (is_madvise)
4273 vops.flags |= XE_VMA_OPS_FLAG_MADVISE;
4274
4275 err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
4276 if (err)
4277 goto unwind_ops;
4278
4279 xe_vm_lock(vm, false);
4280
4281 drm_gpuva_for_each_op(__op, ops) {
4282 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
4283 struct xe_vma *vma;
4284
4285 if (__op->op == DRM_GPUVA_OP_UNMAP) {
4286 vma = gpuva_to_vma(op->base.unmap.va);
4287 /* There should be no unmap for madvise */
4288 if (is_madvise)
4289 XE_WARN_ON("UNEXPECTED UNMAP");
4290
4291 xe_vma_destroy(vma, NULL);
4292 } else if (__op->op == DRM_GPUVA_OP_REMAP) {
4293 vma = gpuva_to_vma(op->base.remap.unmap->va);
4294 /* In case of madvise ops Store attributes for REMAP UNMAPPED
4295 * VMA, so they can be assigned to newly MAP created vma.
4296 */
4297 if (is_madvise)
4298 tmp_attr = vma->attr;
4299
4300 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL);
4301 } else if (__op->op == DRM_GPUVA_OP_MAP) {
4302 vma = op->map.vma;
4303 /* In case of madvise call, MAP will always be follwed by REMAP.
4304 * Therefore temp_attr will always have sane values, making it safe to
4305 * copy them to new vma.
4306 */
4307 if (is_madvise)
4308 vma->attr = tmp_attr;
4309 }
4310 }
4311
4312 xe_vm_unlock(vm);
4313 drm_gpuva_ops_free(&vm->gpuvm, ops);
4314 return 0;
4315
4316 unwind_ops:
4317 vm_bind_ioctl_ops_unwind(vm, &ops, 1);
4318 free_ops:
4319 drm_gpuva_ops_free(&vm->gpuvm, ops);
4320 return err;
4321 }
4322
4323 /**
4324 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops
4325 * @vm: Pointer to the xe_vm structure
4326 * @start: Starting input address
4327 * @range: Size of the input range
4328 *
4329 * This function splits existing vma to create new vma for user provided input range
4330 *
4331 * Return: 0 if success
4332 */
xe_vm_alloc_madvise_vma(struct xe_vm * vm,uint64_t start,uint64_t range)4333 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4334 {
4335 struct drm_gpuvm_map_req map_req = {
4336 .map.va.addr = start,
4337 .map.va.range = range,
4338 };
4339
4340 lockdep_assert_held_write(&vm->lock);
4341
4342 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range);
4343
4344 return xe_vm_alloc_vma(vm, &map_req, true);
4345 }
4346
4347 /**
4348 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma
4349 * @vm: Pointer to the xe_vm structure
4350 * @start: Starting input address
4351 * @range: Size of the input range
4352 *
4353 * This function splits/merges existing vma to create new vma for user provided input range
4354 *
4355 * Return: 0 if success
4356 */
xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm * vm,uint64_t start,uint64_t range)4357 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4358 {
4359 struct drm_gpuvm_map_req map_req = {
4360 .map.va.addr = start,
4361 .map.va.range = range,
4362 };
4363
4364 lockdep_assert_held_write(&vm->lock);
4365
4366 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx",
4367 start, range);
4368
4369 return xe_vm_alloc_vma(vm, &map_req, false);
4370 }
4371