1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10
11 #include <drm/drm_drv.h>
12 #include <drm/drm_exec.h>
13 #include <drm/drm_print.h>
14 #include <drm/ttm/ttm_tt.h>
15 #include <uapi/drm/xe_drm.h>
16 #include <linux/ascii85.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/mm.h>
20 #include <linux/swap.h>
21
22 #include <generated/xe_wa_oob.h>
23
24 #include "regs/xe_gtt_defs.h"
25 #include "xe_assert.h"
26 #include "xe_bo.h"
27 #include "xe_device.h"
28 #include "xe_drm_client.h"
29 #include "xe_exec_queue.h"
30 #include "xe_gt_pagefault.h"
31 #include "xe_migrate.h"
32 #include "xe_pat.h"
33 #include "xe_pm.h"
34 #include "xe_preempt_fence.h"
35 #include "xe_pt.h"
36 #include "xe_pxp.h"
37 #include "xe_res_cursor.h"
38 #include "xe_svm.h"
39 #include "xe_sync.h"
40 #include "xe_tile.h"
41 #include "xe_tlb_inval.h"
42 #include "xe_trace_bo.h"
43 #include "xe_wa.h"
44
xe_vm_obj(struct xe_vm * vm)45 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
46 {
47 return vm->gpuvm.r_obj;
48 }
49
50 /**
51 * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction
52 * @vm: The vm whose resv is to be locked.
53 * @exec: The drm_exec transaction.
54 *
55 * Helper to lock the vm's resv as part of a drm_exec transaction.
56 *
57 * Return: %0 on success. See drm_exec_lock_obj() for error codes.
58 */
xe_vm_drm_exec_lock(struct xe_vm * vm,struct drm_exec * exec)59 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec)
60 {
61 return drm_exec_lock_obj(exec, xe_vm_obj(vm));
62 }
63
preempt_fences_waiting(struct xe_vm * vm)64 static bool preempt_fences_waiting(struct xe_vm *vm)
65 {
66 struct xe_exec_queue *q;
67
68 lockdep_assert_held(&vm->lock);
69 xe_vm_assert_held(vm);
70
71 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
72 if (!q->lr.pfence ||
73 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
74 &q->lr.pfence->flags)) {
75 return true;
76 }
77 }
78
79 return false;
80 }
81
free_preempt_fences(struct list_head * list)82 static void free_preempt_fences(struct list_head *list)
83 {
84 struct list_head *link, *next;
85
86 list_for_each_safe(link, next, list)
87 xe_preempt_fence_free(to_preempt_fence_from_link(link));
88 }
89
alloc_preempt_fences(struct xe_vm * vm,struct list_head * list,unsigned int * count)90 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
91 unsigned int *count)
92 {
93 lockdep_assert_held(&vm->lock);
94 xe_vm_assert_held(vm);
95
96 if (*count >= vm->preempt.num_exec_queues)
97 return 0;
98
99 for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
100 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
101
102 if (IS_ERR(pfence))
103 return PTR_ERR(pfence);
104
105 list_move_tail(xe_preempt_fence_link(pfence), list);
106 }
107
108 return 0;
109 }
110
wait_for_existing_preempt_fences(struct xe_vm * vm)111 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
112 {
113 struct xe_exec_queue *q;
114
115 xe_vm_assert_held(vm);
116
117 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
118 if (q->lr.pfence) {
119 long timeout = dma_fence_wait(q->lr.pfence, false);
120
121 /* Only -ETIME on fence indicates VM needs to be killed */
122 if (timeout < 0 || q->lr.pfence->error == -ETIME)
123 return -ETIME;
124
125 dma_fence_put(q->lr.pfence);
126 q->lr.pfence = NULL;
127 }
128 }
129
130 return 0;
131 }
132
xe_vm_is_idle(struct xe_vm * vm)133 static bool xe_vm_is_idle(struct xe_vm *vm)
134 {
135 struct xe_exec_queue *q;
136
137 xe_vm_assert_held(vm);
138 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
139 if (!xe_exec_queue_is_idle(q))
140 return false;
141 }
142
143 return true;
144 }
145
arm_preempt_fences(struct xe_vm * vm,struct list_head * list)146 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
147 {
148 struct list_head *link;
149 struct xe_exec_queue *q;
150
151 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
152 struct dma_fence *fence;
153
154 link = list->next;
155 xe_assert(vm->xe, link != list);
156
157 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
158 q, q->lr.context,
159 ++q->lr.seqno);
160 dma_fence_put(q->lr.pfence);
161 q->lr.pfence = fence;
162 }
163 }
164
add_preempt_fences(struct xe_vm * vm,struct xe_bo * bo)165 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
166 {
167 struct xe_exec_queue *q;
168 int err;
169
170 xe_bo_assert_held(bo);
171
172 if (!vm->preempt.num_exec_queues)
173 return 0;
174
175 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
176 if (err)
177 return err;
178
179 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
180 if (q->lr.pfence) {
181 dma_resv_add_fence(bo->ttm.base.resv,
182 q->lr.pfence,
183 DMA_RESV_USAGE_BOOKKEEP);
184 }
185
186 return 0;
187 }
188
resume_and_reinstall_preempt_fences(struct xe_vm * vm,struct drm_exec * exec)189 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
190 struct drm_exec *exec)
191 {
192 struct xe_exec_queue *q;
193
194 lockdep_assert_held(&vm->lock);
195 xe_vm_assert_held(vm);
196
197 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
198 q->ops->resume(q);
199
200 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
201 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
202 }
203 }
204
xe_vm_add_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)205 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
206 {
207 struct drm_gpuvm_exec vm_exec = {
208 .vm = &vm->gpuvm,
209 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
210 .num_fences = 1,
211 };
212 struct drm_exec *exec = &vm_exec.exec;
213 struct xe_validation_ctx ctx;
214 struct dma_fence *pfence;
215 int err;
216 bool wait;
217
218 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
219
220 down_write(&vm->lock);
221 err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val);
222 if (err)
223 goto out_up_write;
224
225 pfence = xe_preempt_fence_create(q, q->lr.context,
226 ++q->lr.seqno);
227 if (IS_ERR(pfence)) {
228 err = PTR_ERR(pfence);
229 goto out_fini;
230 }
231
232 list_add(&q->lr.link, &vm->preempt.exec_queues);
233 ++vm->preempt.num_exec_queues;
234 q->lr.pfence = pfence;
235
236 xe_svm_notifier_lock(vm);
237
238 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
239 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
240
241 /*
242 * Check to see if a preemption on VM is in flight or userptr
243 * invalidation, if so trigger this preempt fence to sync state with
244 * other preempt fences on the VM.
245 */
246 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
247 if (wait)
248 dma_fence_enable_sw_signaling(pfence);
249
250 xe_svm_notifier_unlock(vm);
251
252 out_fini:
253 xe_validation_ctx_fini(&ctx);
254 out_up_write:
255 up_write(&vm->lock);
256
257 return err;
258 }
259 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO);
260
261 /**
262 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
263 * @vm: The VM.
264 * @q: The exec_queue
265 *
266 * Note that this function might be called multiple times on the same queue.
267 */
xe_vm_remove_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)268 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
269 {
270 if (!xe_vm_in_preempt_fence_mode(vm))
271 return;
272
273 down_write(&vm->lock);
274 if (!list_empty(&q->lr.link)) {
275 list_del_init(&q->lr.link);
276 --vm->preempt.num_exec_queues;
277 }
278 if (q->lr.pfence) {
279 dma_fence_enable_sw_signaling(q->lr.pfence);
280 dma_fence_put(q->lr.pfence);
281 q->lr.pfence = NULL;
282 }
283 up_write(&vm->lock);
284 }
285
286 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
287
288 /**
289 * xe_vm_kill() - VM Kill
290 * @vm: The VM.
291 * @unlocked: Flag indicates the VM's dma-resv is not held
292 *
293 * Kill the VM by setting banned flag indicated VM is no longer available for
294 * use. If in preempt fence mode, also kill all exec queue attached to the VM.
295 */
xe_vm_kill(struct xe_vm * vm,bool unlocked)296 void xe_vm_kill(struct xe_vm *vm, bool unlocked)
297 {
298 struct xe_exec_queue *q;
299
300 lockdep_assert_held(&vm->lock);
301
302 if (unlocked)
303 xe_vm_lock(vm, false);
304
305 vm->flags |= XE_VM_FLAG_BANNED;
306 trace_xe_vm_kill(vm);
307
308 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
309 q->ops->kill(q);
310
311 if (unlocked)
312 xe_vm_unlock(vm);
313
314 /* TODO: Inform user the VM is banned */
315 }
316
xe_gpuvm_validate(struct drm_gpuvm_bo * vm_bo,struct drm_exec * exec)317 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
318 {
319 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
320 struct drm_gpuva *gpuva;
321 int ret;
322
323 lockdep_assert_held(&vm->lock);
324 drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
325 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
326 &vm->rebind_list);
327
328 if (!try_wait_for_completion(&vm->xe->pm_block))
329 return -EAGAIN;
330
331 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec);
332 if (ret)
333 return ret;
334
335 vm_bo->evicted = false;
336 return 0;
337 }
338
339 /**
340 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
341 * @vm: The vm for which we are rebinding.
342 * @exec: The struct drm_exec with the locked GEM objects.
343 * @num_fences: The number of fences to reserve for the operation, not
344 * including rebinds and validations.
345 *
346 * Validates all evicted gem objects and rebinds their vmas. Note that
347 * rebindings may cause evictions and hence the validation-rebind
348 * sequence is rerun until there are no more objects to validate.
349 *
350 * Return: 0 on success, negative error code on error. In particular,
351 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
352 * the drm_exec transaction needs to be restarted.
353 */
xe_vm_validate_rebind(struct xe_vm * vm,struct drm_exec * exec,unsigned int num_fences)354 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
355 unsigned int num_fences)
356 {
357 struct drm_gem_object *obj;
358 unsigned long index;
359 int ret;
360
361 do {
362 ret = drm_gpuvm_validate(&vm->gpuvm, exec);
363 if (ret)
364 return ret;
365
366 ret = xe_vm_rebind(vm, false);
367 if (ret)
368 return ret;
369 } while (!list_empty(&vm->gpuvm.evict.list));
370
371 drm_exec_for_each_locked_object(exec, index, obj) {
372 ret = dma_resv_reserve_fences(obj->resv, num_fences);
373 if (ret)
374 return ret;
375 }
376
377 return 0;
378 }
379
xe_preempt_work_begin(struct drm_exec * exec,struct xe_vm * vm,bool * done)380 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
381 bool *done)
382 {
383 int err;
384
385 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
386 if (err)
387 return err;
388
389 if (xe_vm_is_idle(vm)) {
390 vm->preempt.rebind_deactivated = true;
391 *done = true;
392 return 0;
393 }
394
395 if (!preempt_fences_waiting(vm)) {
396 *done = true;
397 return 0;
398 }
399
400 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
401 if (err)
402 return err;
403
404 err = wait_for_existing_preempt_fences(vm);
405 if (err)
406 return err;
407
408 /*
409 * Add validation and rebinding to the locking loop since both can
410 * cause evictions which may require blocing dma_resv locks.
411 * The fence reservation here is intended for the new preempt fences
412 * we attach at the end of the rebind work.
413 */
414 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
415 }
416
vm_suspend_rebind_worker(struct xe_vm * vm)417 static bool vm_suspend_rebind_worker(struct xe_vm *vm)
418 {
419 struct xe_device *xe = vm->xe;
420 bool ret = false;
421
422 mutex_lock(&xe->rebind_resume_lock);
423 if (!try_wait_for_completion(&vm->xe->pm_block)) {
424 ret = true;
425 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list);
426 }
427 mutex_unlock(&xe->rebind_resume_lock);
428
429 return ret;
430 }
431
432 /**
433 * xe_vm_resume_rebind_worker() - Resume the rebind worker.
434 * @vm: The vm whose preempt worker to resume.
435 *
436 * Resume a preempt worker that was previously suspended by
437 * vm_suspend_rebind_worker().
438 */
xe_vm_resume_rebind_worker(struct xe_vm * vm)439 void xe_vm_resume_rebind_worker(struct xe_vm *vm)
440 {
441 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
442 }
443
preempt_rebind_work_func(struct work_struct * w)444 static void preempt_rebind_work_func(struct work_struct *w)
445 {
446 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
447 struct xe_validation_ctx ctx;
448 struct drm_exec exec;
449 unsigned int fence_count = 0;
450 LIST_HEAD(preempt_fences);
451 int err = 0;
452 long wait;
453 int __maybe_unused tries = 0;
454
455 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
456 trace_xe_vm_rebind_worker_enter(vm);
457
458 down_write(&vm->lock);
459
460 if (xe_vm_is_closed_or_banned(vm)) {
461 up_write(&vm->lock);
462 trace_xe_vm_rebind_worker_exit(vm);
463 return;
464 }
465
466 retry:
467 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) {
468 up_write(&vm->lock);
469 return;
470 }
471
472 if (xe_vm_userptr_check_repin(vm)) {
473 err = xe_vm_userptr_pin(vm);
474 if (err)
475 goto out_unlock_outer;
476 }
477
478 err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec,
479 (struct xe_val_flags) {.interruptible = true});
480 if (err)
481 goto out_unlock_outer;
482
483 drm_exec_until_all_locked(&exec) {
484 bool done = false;
485
486 err = xe_preempt_work_begin(&exec, vm, &done);
487 drm_exec_retry_on_contention(&exec);
488 xe_validation_retry_on_oom(&ctx, &err);
489 if (err || done) {
490 xe_validation_ctx_fini(&ctx);
491 goto out_unlock_outer;
492 }
493 }
494
495 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
496 if (err)
497 goto out_unlock;
498
499 xe_vm_set_validation_exec(vm, &exec);
500 err = xe_vm_rebind(vm, true);
501 xe_vm_set_validation_exec(vm, NULL);
502 if (err)
503 goto out_unlock;
504
505 /* Wait on rebinds and munmap style VM unbinds */
506 wait = dma_resv_wait_timeout(xe_vm_resv(vm),
507 DMA_RESV_USAGE_KERNEL,
508 false, MAX_SCHEDULE_TIMEOUT);
509 if (wait <= 0) {
510 err = -ETIME;
511 goto out_unlock;
512 }
513
514 #define retry_required(__tries, __vm) \
515 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
516 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
517 __xe_vm_userptr_needs_repin(__vm))
518
519 xe_svm_notifier_lock(vm);
520 if (retry_required(tries, vm)) {
521 xe_svm_notifier_unlock(vm);
522 err = -EAGAIN;
523 goto out_unlock;
524 }
525
526 #undef retry_required
527
528 spin_lock(&vm->xe->ttm.lru_lock);
529 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
530 spin_unlock(&vm->xe->ttm.lru_lock);
531
532 /* Point of no return. */
533 arm_preempt_fences(vm, &preempt_fences);
534 resume_and_reinstall_preempt_fences(vm, &exec);
535 xe_svm_notifier_unlock(vm);
536
537 out_unlock:
538 xe_validation_ctx_fini(&ctx);
539 out_unlock_outer:
540 if (err == -EAGAIN) {
541 trace_xe_vm_rebind_worker_retry(vm);
542 goto retry;
543 }
544
545 if (err) {
546 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
547 xe_vm_kill(vm, true);
548 }
549 up_write(&vm->lock);
550
551 free_preempt_fences(&preempt_fences);
552
553 trace_xe_vm_rebind_worker_exit(vm);
554 }
555
xe_vma_ops_alloc(struct xe_vma_ops * vops,bool array_of_binds)556 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
557 {
558 int i;
559
560 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
561 if (!vops->pt_update_ops[i].num_ops)
562 continue;
563
564 vops->pt_update_ops[i].ops =
565 kmalloc_array(vops->pt_update_ops[i].num_ops,
566 sizeof(*vops->pt_update_ops[i].ops),
567 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
568 if (!vops->pt_update_ops[i].ops)
569 return array_of_binds ? -ENOBUFS : -ENOMEM;
570 }
571
572 return 0;
573 }
574 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
575
xe_vma_svm_prefetch_op_fini(struct xe_vma_op * op)576 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op)
577 {
578 struct xe_vma *vma;
579
580 vma = gpuva_to_vma(op->base.prefetch.va);
581
582 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma))
583 xa_destroy(&op->prefetch_range.range);
584 }
585
xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops * vops)586 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops)
587 {
588 struct xe_vma_op *op;
589
590 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
591 return;
592
593 list_for_each_entry(op, &vops->list, link)
594 xe_vma_svm_prefetch_op_fini(op);
595 }
596
xe_vma_ops_fini(struct xe_vma_ops * vops)597 static void xe_vma_ops_fini(struct xe_vma_ops *vops)
598 {
599 int i;
600
601 xe_vma_svm_prefetch_ops_fini(vops);
602
603 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
604 kfree(vops->pt_update_ops[i].ops);
605 }
606
xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops * vops,u8 tile_mask,int inc_val)607 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val)
608 {
609 int i;
610
611 if (!inc_val)
612 return;
613
614 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
615 if (BIT(i) & tile_mask)
616 vops->pt_update_ops[i].num_ops += inc_val;
617 }
618
619 #define XE_VMA_CREATE_MASK ( \
620 XE_VMA_READ_ONLY | \
621 XE_VMA_DUMPABLE | \
622 XE_VMA_SYSTEM_ALLOCATOR | \
623 DRM_GPUVA_SPARSE | \
624 XE_VMA_MADV_AUTORESET)
625
xe_vm_populate_rebind(struct xe_vma_op * op,struct xe_vma * vma,u8 tile_mask)626 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
627 u8 tile_mask)
628 {
629 INIT_LIST_HEAD(&op->link);
630 op->tile_mask = tile_mask;
631 op->base.op = DRM_GPUVA_OP_MAP;
632 op->base.map.va.addr = vma->gpuva.va.addr;
633 op->base.map.va.range = vma->gpuva.va.range;
634 op->base.map.gem.obj = vma->gpuva.gem.obj;
635 op->base.map.gem.offset = vma->gpuva.gem.offset;
636 op->map.vma = vma;
637 op->map.immediate = true;
638 op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK;
639 }
640
xe_vm_ops_add_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,u8 tile_mask)641 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
642 u8 tile_mask)
643 {
644 struct xe_vma_op *op;
645
646 op = kzalloc(sizeof(*op), GFP_KERNEL);
647 if (!op)
648 return -ENOMEM;
649
650 xe_vm_populate_rebind(op, vma, tile_mask);
651 list_add_tail(&op->link, &vops->list);
652 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
653
654 return 0;
655 }
656
657 static struct dma_fence *ops_execute(struct xe_vm *vm,
658 struct xe_vma_ops *vops);
659 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
660 struct xe_exec_queue *q,
661 struct xe_sync_entry *syncs, u32 num_syncs);
662
xe_vm_rebind(struct xe_vm * vm,bool rebind_worker)663 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
664 {
665 struct dma_fence *fence;
666 struct xe_vma *vma, *next;
667 struct xe_vma_ops vops;
668 struct xe_vma_op *op, *next_op;
669 int err, i;
670
671 lockdep_assert_held(&vm->lock);
672 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
673 list_empty(&vm->rebind_list))
674 return 0;
675
676 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
677 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
678 vops.pt_update_ops[i].wait_vm_bookkeep = true;
679
680 xe_vm_assert_held(vm);
681 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
682 xe_assert(vm->xe, vma->tile_present);
683
684 if (rebind_worker)
685 trace_xe_vma_rebind_worker(vma);
686 else
687 trace_xe_vma_rebind_exec(vma);
688
689 err = xe_vm_ops_add_rebind(&vops, vma,
690 vma->tile_present);
691 if (err)
692 goto free_ops;
693 }
694
695 err = xe_vma_ops_alloc(&vops, false);
696 if (err)
697 goto free_ops;
698
699 fence = ops_execute(vm, &vops);
700 if (IS_ERR(fence)) {
701 err = PTR_ERR(fence);
702 } else {
703 dma_fence_put(fence);
704 list_for_each_entry_safe(vma, next, &vm->rebind_list,
705 combined_links.rebind)
706 list_del_init(&vma->combined_links.rebind);
707 }
708 free_ops:
709 list_for_each_entry_safe(op, next_op, &vops.list, link) {
710 list_del(&op->link);
711 kfree(op);
712 }
713 xe_vma_ops_fini(&vops);
714
715 return err;
716 }
717
xe_vma_rebind(struct xe_vm * vm,struct xe_vma * vma,u8 tile_mask)718 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
719 {
720 struct dma_fence *fence = NULL;
721 struct xe_vma_ops vops;
722 struct xe_vma_op *op, *next_op;
723 struct xe_tile *tile;
724 u8 id;
725 int err;
726
727 lockdep_assert_held(&vm->lock);
728 xe_vm_assert_held(vm);
729 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
730
731 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
732 for_each_tile(tile, vm->xe, id) {
733 vops.pt_update_ops[id].wait_vm_bookkeep = true;
734 vops.pt_update_ops[tile->id].q =
735 xe_migrate_exec_queue(tile->migrate);
736 }
737
738 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
739 if (err)
740 return ERR_PTR(err);
741
742 err = xe_vma_ops_alloc(&vops, false);
743 if (err) {
744 fence = ERR_PTR(err);
745 goto free_ops;
746 }
747
748 fence = ops_execute(vm, &vops);
749
750 free_ops:
751 list_for_each_entry_safe(op, next_op, &vops.list, link) {
752 list_del(&op->link);
753 kfree(op);
754 }
755 xe_vma_ops_fini(&vops);
756
757 return fence;
758 }
759
xe_vm_populate_range_rebind(struct xe_vma_op * op,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)760 static void xe_vm_populate_range_rebind(struct xe_vma_op *op,
761 struct xe_vma *vma,
762 struct xe_svm_range *range,
763 u8 tile_mask)
764 {
765 INIT_LIST_HEAD(&op->link);
766 op->tile_mask = tile_mask;
767 op->base.op = DRM_GPUVA_OP_DRIVER;
768 op->subop = XE_VMA_SUBOP_MAP_RANGE;
769 op->map_range.vma = vma;
770 op->map_range.range = range;
771 }
772
773 static int
xe_vm_ops_add_range_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)774 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops,
775 struct xe_vma *vma,
776 struct xe_svm_range *range,
777 u8 tile_mask)
778 {
779 struct xe_vma_op *op;
780
781 op = kzalloc(sizeof(*op), GFP_KERNEL);
782 if (!op)
783 return -ENOMEM;
784
785 xe_vm_populate_range_rebind(op, vma, range, tile_mask);
786 list_add_tail(&op->link, &vops->list);
787 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
788
789 return 0;
790 }
791
792 /**
793 * xe_vm_range_rebind() - VM range (re)bind
794 * @vm: The VM which the range belongs to.
795 * @vma: The VMA which the range belongs to.
796 * @range: SVM range to rebind.
797 * @tile_mask: Tile mask to bind the range to.
798 *
799 * (re)bind SVM range setting up GPU page tables for the range.
800 *
801 * Return: dma fence for rebind to signal completion on succees, ERR_PTR on
802 * failure
803 */
xe_vm_range_rebind(struct xe_vm * vm,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)804 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
805 struct xe_vma *vma,
806 struct xe_svm_range *range,
807 u8 tile_mask)
808 {
809 struct dma_fence *fence = NULL;
810 struct xe_vma_ops vops;
811 struct xe_vma_op *op, *next_op;
812 struct xe_tile *tile;
813 u8 id;
814 int err;
815
816 lockdep_assert_held(&vm->lock);
817 xe_vm_assert_held(vm);
818 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
819 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
820
821 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
822 for_each_tile(tile, vm->xe, id) {
823 vops.pt_update_ops[id].wait_vm_bookkeep = true;
824 vops.pt_update_ops[tile->id].q =
825 xe_migrate_exec_queue(tile->migrate);
826 }
827
828 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask);
829 if (err)
830 return ERR_PTR(err);
831
832 err = xe_vma_ops_alloc(&vops, false);
833 if (err) {
834 fence = ERR_PTR(err);
835 goto free_ops;
836 }
837
838 fence = ops_execute(vm, &vops);
839
840 free_ops:
841 list_for_each_entry_safe(op, next_op, &vops.list, link) {
842 list_del(&op->link);
843 kfree(op);
844 }
845 xe_vma_ops_fini(&vops);
846
847 return fence;
848 }
849
xe_vm_populate_range_unbind(struct xe_vma_op * op,struct xe_svm_range * range)850 static void xe_vm_populate_range_unbind(struct xe_vma_op *op,
851 struct xe_svm_range *range)
852 {
853 INIT_LIST_HEAD(&op->link);
854 op->tile_mask = range->tile_present;
855 op->base.op = DRM_GPUVA_OP_DRIVER;
856 op->subop = XE_VMA_SUBOP_UNMAP_RANGE;
857 op->unmap_range.range = range;
858 }
859
860 static int
xe_vm_ops_add_range_unbind(struct xe_vma_ops * vops,struct xe_svm_range * range)861 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops,
862 struct xe_svm_range *range)
863 {
864 struct xe_vma_op *op;
865
866 op = kzalloc(sizeof(*op), GFP_KERNEL);
867 if (!op)
868 return -ENOMEM;
869
870 xe_vm_populate_range_unbind(op, range);
871 list_add_tail(&op->link, &vops->list);
872 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1);
873
874 return 0;
875 }
876
877 /**
878 * xe_vm_range_unbind() - VM range unbind
879 * @vm: The VM which the range belongs to.
880 * @range: SVM range to rebind.
881 *
882 * Unbind SVM range removing the GPU page tables for the range.
883 *
884 * Return: dma fence for unbind to signal completion on succees, ERR_PTR on
885 * failure
886 */
xe_vm_range_unbind(struct xe_vm * vm,struct xe_svm_range * range)887 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
888 struct xe_svm_range *range)
889 {
890 struct dma_fence *fence = NULL;
891 struct xe_vma_ops vops;
892 struct xe_vma_op *op, *next_op;
893 struct xe_tile *tile;
894 u8 id;
895 int err;
896
897 lockdep_assert_held(&vm->lock);
898 xe_vm_assert_held(vm);
899 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
900
901 if (!range->tile_present)
902 return dma_fence_get_stub();
903
904 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
905 for_each_tile(tile, vm->xe, id) {
906 vops.pt_update_ops[id].wait_vm_bookkeep = true;
907 vops.pt_update_ops[tile->id].q =
908 xe_migrate_exec_queue(tile->migrate);
909 }
910
911 err = xe_vm_ops_add_range_unbind(&vops, range);
912 if (err)
913 return ERR_PTR(err);
914
915 err = xe_vma_ops_alloc(&vops, false);
916 if (err) {
917 fence = ERR_PTR(err);
918 goto free_ops;
919 }
920
921 fence = ops_execute(vm, &vops);
922
923 free_ops:
924 list_for_each_entry_safe(op, next_op, &vops.list, link) {
925 list_del(&op->link);
926 kfree(op);
927 }
928 xe_vma_ops_fini(&vops);
929
930 return fence;
931 }
932
xe_vma_free(struct xe_vma * vma)933 static void xe_vma_free(struct xe_vma *vma)
934 {
935 if (xe_vma_is_userptr(vma))
936 kfree(to_userptr_vma(vma));
937 else
938 kfree(vma);
939 }
940
xe_vma_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 start,u64 end,struct xe_vma_mem_attr * attr,unsigned int flags)941 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
942 struct xe_bo *bo,
943 u64 bo_offset_or_userptr,
944 u64 start, u64 end,
945 struct xe_vma_mem_attr *attr,
946 unsigned int flags)
947 {
948 struct xe_vma *vma;
949 struct xe_tile *tile;
950 u8 id;
951 bool is_null = (flags & DRM_GPUVA_SPARSE);
952 bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR);
953
954 xe_assert(vm->xe, start < end);
955 xe_assert(vm->xe, end < vm->size);
956
957 /*
958 * Allocate and ensure that the xe_vma_is_userptr() return
959 * matches what was allocated.
960 */
961 if (!bo && !is_null && !is_cpu_addr_mirror) {
962 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
963
964 if (!uvma)
965 return ERR_PTR(-ENOMEM);
966
967 vma = &uvma->vma;
968 } else {
969 vma = kzalloc(sizeof(*vma), GFP_KERNEL);
970 if (!vma)
971 return ERR_PTR(-ENOMEM);
972
973 if (bo)
974 vma->gpuva.gem.obj = &bo->ttm.base;
975 }
976
977 INIT_LIST_HEAD(&vma->combined_links.rebind);
978
979 INIT_LIST_HEAD(&vma->gpuva.gem.entry);
980 vma->gpuva.vm = &vm->gpuvm;
981 vma->gpuva.va.addr = start;
982 vma->gpuva.va.range = end - start + 1;
983 vma->gpuva.flags = flags;
984
985 for_each_tile(tile, vm->xe, id)
986 vma->tile_mask |= 0x1 << id;
987
988 if (vm->xe->info.has_atomic_enable_pte_bit)
989 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
990
991 vma->attr = *attr;
992
993 if (bo) {
994 struct drm_gpuvm_bo *vm_bo;
995
996 xe_bo_assert_held(bo);
997
998 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
999 if (IS_ERR(vm_bo)) {
1000 xe_vma_free(vma);
1001 return ERR_CAST(vm_bo);
1002 }
1003
1004 drm_gpuvm_bo_extobj_add(vm_bo);
1005 drm_gem_object_get(&bo->ttm.base);
1006 vma->gpuva.gem.offset = bo_offset_or_userptr;
1007 drm_gpuva_link(&vma->gpuva, vm_bo);
1008 drm_gpuvm_bo_put(vm_bo);
1009 } else /* userptr or null */ {
1010 if (!is_null && !is_cpu_addr_mirror) {
1011 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1012 u64 size = end - start + 1;
1013 int err;
1014
1015 vma->gpuva.gem.offset = bo_offset_or_userptr;
1016
1017 err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size);
1018 if (err) {
1019 xe_vma_free(vma);
1020 return ERR_PTR(err);
1021 }
1022 }
1023
1024 xe_vm_get(vm);
1025 }
1026
1027 return vma;
1028 }
1029
xe_vma_destroy_late(struct xe_vma * vma)1030 static void xe_vma_destroy_late(struct xe_vma *vma)
1031 {
1032 struct xe_vm *vm = xe_vma_vm(vma);
1033
1034 if (vma->ufence) {
1035 xe_sync_ufence_put(vma->ufence);
1036 vma->ufence = NULL;
1037 }
1038
1039 if (xe_vma_is_userptr(vma)) {
1040 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1041
1042 xe_userptr_remove(uvma);
1043 xe_vm_put(vm);
1044 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) {
1045 xe_vm_put(vm);
1046 } else {
1047 xe_bo_put(xe_vma_bo(vma));
1048 }
1049
1050 xe_vma_free(vma);
1051 }
1052
vma_destroy_work_func(struct work_struct * w)1053 static void vma_destroy_work_func(struct work_struct *w)
1054 {
1055 struct xe_vma *vma =
1056 container_of(w, struct xe_vma, destroy_work);
1057
1058 xe_vma_destroy_late(vma);
1059 }
1060
vma_destroy_cb(struct dma_fence * fence,struct dma_fence_cb * cb)1061 static void vma_destroy_cb(struct dma_fence *fence,
1062 struct dma_fence_cb *cb)
1063 {
1064 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1065
1066 INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1067 queue_work(system_unbound_wq, &vma->destroy_work);
1068 }
1069
xe_vma_destroy(struct xe_vma * vma,struct dma_fence * fence)1070 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1071 {
1072 struct xe_vm *vm = xe_vma_vm(vma);
1073
1074 lockdep_assert_held_write(&vm->lock);
1075 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1076
1077 if (xe_vma_is_userptr(vma)) {
1078 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1079 xe_userptr_destroy(to_userptr_vma(vma));
1080 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) {
1081 xe_bo_assert_held(xe_vma_bo(vma));
1082
1083 drm_gpuva_unlink(&vma->gpuva);
1084 }
1085
1086 xe_vm_assert_held(vm);
1087 if (fence) {
1088 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1089 vma_destroy_cb);
1090
1091 if (ret) {
1092 XE_WARN_ON(ret != -ENOENT);
1093 xe_vma_destroy_late(vma);
1094 }
1095 } else {
1096 xe_vma_destroy_late(vma);
1097 }
1098 }
1099
1100 /**
1101 * xe_vm_lock_vma() - drm_exec utility to lock a vma
1102 * @exec: The drm_exec object we're currently locking for.
1103 * @vma: The vma for witch we want to lock the vm resv and any attached
1104 * object's resv.
1105 *
1106 * Return: 0 on success, negative error code on error. In particular
1107 * may return -EDEADLK on WW transaction contention and -EINTR if
1108 * an interruptible wait is terminated by a signal.
1109 */
xe_vm_lock_vma(struct drm_exec * exec,struct xe_vma * vma)1110 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
1111 {
1112 struct xe_vm *vm = xe_vma_vm(vma);
1113 struct xe_bo *bo = xe_vma_bo(vma);
1114 int err;
1115
1116 XE_WARN_ON(!vm);
1117
1118 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1119 if (!err && bo && !bo->vm)
1120 err = drm_exec_lock_obj(exec, &bo->ttm.base);
1121
1122 return err;
1123 }
1124
xe_vma_destroy_unlocked(struct xe_vma * vma)1125 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1126 {
1127 struct xe_device *xe = xe_vma_vm(vma)->xe;
1128 struct xe_validation_ctx ctx;
1129 struct drm_exec exec;
1130 int err = 0;
1131
1132 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
1133 err = xe_vm_lock_vma(&exec, vma);
1134 drm_exec_retry_on_contention(&exec);
1135 if (XE_WARN_ON(err))
1136 break;
1137 xe_vma_destroy(vma, NULL);
1138 }
1139 xe_assert(xe, !err);
1140 }
1141
1142 struct xe_vma *
xe_vm_find_overlapping_vma(struct xe_vm * vm,u64 start,u64 range)1143 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1144 {
1145 struct drm_gpuva *gpuva;
1146
1147 lockdep_assert_held(&vm->lock);
1148
1149 if (xe_vm_is_closed_or_banned(vm))
1150 return NULL;
1151
1152 xe_assert(vm->xe, start + range <= vm->size);
1153
1154 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1155
1156 return gpuva ? gpuva_to_vma(gpuva) : NULL;
1157 }
1158
xe_vm_insert_vma(struct xe_vm * vm,struct xe_vma * vma)1159 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1160 {
1161 int err;
1162
1163 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1164 lockdep_assert_held(&vm->lock);
1165
1166 mutex_lock(&vm->snap_mutex);
1167 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1168 mutex_unlock(&vm->snap_mutex);
1169 XE_WARN_ON(err); /* Shouldn't be possible */
1170
1171 return err;
1172 }
1173
xe_vm_remove_vma(struct xe_vm * vm,struct xe_vma * vma)1174 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1175 {
1176 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1177 lockdep_assert_held(&vm->lock);
1178
1179 mutex_lock(&vm->snap_mutex);
1180 drm_gpuva_remove(&vma->gpuva);
1181 mutex_unlock(&vm->snap_mutex);
1182 if (vm->usm.last_fault_vma == vma)
1183 vm->usm.last_fault_vma = NULL;
1184 }
1185
xe_vm_op_alloc(void)1186 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1187 {
1188 struct xe_vma_op *op;
1189
1190 op = kzalloc(sizeof(*op), GFP_KERNEL);
1191
1192 if (unlikely(!op))
1193 return NULL;
1194
1195 return &op->base;
1196 }
1197
1198 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1199
1200 static const struct drm_gpuvm_ops gpuvm_ops = {
1201 .op_alloc = xe_vm_op_alloc,
1202 .vm_bo_validate = xe_gpuvm_validate,
1203 .vm_free = xe_vm_free,
1204 };
1205
pde_encode_pat_index(u16 pat_index)1206 static u64 pde_encode_pat_index(u16 pat_index)
1207 {
1208 u64 pte = 0;
1209
1210 if (pat_index & BIT(0))
1211 pte |= XE_PPGTT_PTE_PAT0;
1212
1213 if (pat_index & BIT(1))
1214 pte |= XE_PPGTT_PTE_PAT1;
1215
1216 return pte;
1217 }
1218
pte_encode_pat_index(u16 pat_index,u32 pt_level)1219 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
1220 {
1221 u64 pte = 0;
1222
1223 if (pat_index & BIT(0))
1224 pte |= XE_PPGTT_PTE_PAT0;
1225
1226 if (pat_index & BIT(1))
1227 pte |= XE_PPGTT_PTE_PAT1;
1228
1229 if (pat_index & BIT(2)) {
1230 if (pt_level)
1231 pte |= XE_PPGTT_PDE_PDPE_PAT2;
1232 else
1233 pte |= XE_PPGTT_PTE_PAT2;
1234 }
1235
1236 if (pat_index & BIT(3))
1237 pte |= XELPG_PPGTT_PTE_PAT3;
1238
1239 if (pat_index & (BIT(4)))
1240 pte |= XE2_PPGTT_PTE_PAT4;
1241
1242 return pte;
1243 }
1244
pte_encode_ps(u32 pt_level)1245 static u64 pte_encode_ps(u32 pt_level)
1246 {
1247 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1248
1249 if (pt_level == 1)
1250 return XE_PDE_PS_2M;
1251 else if (pt_level == 2)
1252 return XE_PDPE_PS_1G;
1253
1254 return 0;
1255 }
1256
pde_pat_index(struct xe_bo * bo)1257 static u16 pde_pat_index(struct xe_bo *bo)
1258 {
1259 struct xe_device *xe = xe_bo_device(bo);
1260 u16 pat_index;
1261
1262 /*
1263 * We only have two bits to encode the PAT index in non-leaf nodes, but
1264 * these only point to other paging structures so we only need a minimal
1265 * selection of options. The user PAT index is only for encoding leaf
1266 * nodes, where we have use of more bits to do the encoding. The
1267 * non-leaf nodes are instead under driver control so the chosen index
1268 * here should be distict from the user PAT index. Also the
1269 * corresponding coherency of the PAT index should be tied to the
1270 * allocation type of the page table (or at least we should pick
1271 * something which is always safe).
1272 */
1273 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached)
1274 pat_index = xe->pat.idx[XE_CACHE_WB];
1275 else
1276 pat_index = xe->pat.idx[XE_CACHE_NONE];
1277
1278 xe_assert(xe, pat_index <= 3);
1279
1280 return pat_index;
1281 }
1282
xelp_pde_encode_bo(struct xe_bo * bo,u64 bo_offset)1283 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset)
1284 {
1285 u64 pde;
1286
1287 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1288 pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1289 pde |= pde_encode_pat_index(pde_pat_index(bo));
1290
1291 return pde;
1292 }
1293
xelp_pte_encode_bo(struct xe_bo * bo,u64 bo_offset,u16 pat_index,u32 pt_level)1294 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1295 u16 pat_index, u32 pt_level)
1296 {
1297 u64 pte;
1298
1299 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1300 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1301 pte |= pte_encode_pat_index(pat_index, pt_level);
1302 pte |= pte_encode_ps(pt_level);
1303
1304 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1305 pte |= XE_PPGTT_PTE_DM;
1306
1307 return pte;
1308 }
1309
xelp_pte_encode_vma(u64 pte,struct xe_vma * vma,u16 pat_index,u32 pt_level)1310 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1311 u16 pat_index, u32 pt_level)
1312 {
1313 pte |= XE_PAGE_PRESENT;
1314
1315 if (likely(!xe_vma_read_only(vma)))
1316 pte |= XE_PAGE_RW;
1317
1318 pte |= pte_encode_pat_index(pat_index, pt_level);
1319 pte |= pte_encode_ps(pt_level);
1320
1321 if (unlikely(xe_vma_is_null(vma)))
1322 pte |= XE_PTE_NULL;
1323
1324 return pte;
1325 }
1326
xelp_pte_encode_addr(struct xe_device * xe,u64 addr,u16 pat_index,u32 pt_level,bool devmem,u64 flags)1327 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1328 u16 pat_index,
1329 u32 pt_level, bool devmem, u64 flags)
1330 {
1331 u64 pte;
1332
1333 /* Avoid passing random bits directly as flags */
1334 xe_assert(xe, !(flags & ~XE_PTE_PS64));
1335
1336 pte = addr;
1337 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1338 pte |= pte_encode_pat_index(pat_index, pt_level);
1339 pte |= pte_encode_ps(pt_level);
1340
1341 if (devmem)
1342 pte |= XE_PPGTT_PTE_DM;
1343
1344 pte |= flags;
1345
1346 return pte;
1347 }
1348
1349 static const struct xe_pt_ops xelp_pt_ops = {
1350 .pte_encode_bo = xelp_pte_encode_bo,
1351 .pte_encode_vma = xelp_pte_encode_vma,
1352 .pte_encode_addr = xelp_pte_encode_addr,
1353 .pde_encode_bo = xelp_pde_encode_bo,
1354 };
1355
1356 static void vm_destroy_work_func(struct work_struct *w);
1357
1358 /**
1359 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1360 * given tile and vm.
1361 * @xe: xe device.
1362 * @tile: tile to set up for.
1363 * @vm: vm to set up for.
1364 * @exec: The struct drm_exec object used to lock the vm resv.
1365 *
1366 * Sets up a pagetable tree with one page-table per level and a single
1367 * leaf PTE. All pagetable entries point to the single page-table or,
1368 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1369 * writes become NOPs.
1370 *
1371 * Return: 0 on success, negative error code on error.
1372 */
xe_vm_create_scratch(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,struct drm_exec * exec)1373 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1374 struct xe_vm *vm, struct drm_exec *exec)
1375 {
1376 u8 id = tile->id;
1377 int i;
1378
1379 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1380 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec);
1381 if (IS_ERR(vm->scratch_pt[id][i])) {
1382 int err = PTR_ERR(vm->scratch_pt[id][i]);
1383
1384 vm->scratch_pt[id][i] = NULL;
1385 return err;
1386 }
1387 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1388 }
1389
1390 return 0;
1391 }
1392 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO);
1393
xe_vm_free_scratch(struct xe_vm * vm)1394 static void xe_vm_free_scratch(struct xe_vm *vm)
1395 {
1396 struct xe_tile *tile;
1397 u8 id;
1398
1399 if (!xe_vm_has_scratch(vm))
1400 return;
1401
1402 for_each_tile(tile, vm->xe, id) {
1403 u32 i;
1404
1405 if (!vm->pt_root[id])
1406 continue;
1407
1408 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1409 if (vm->scratch_pt[id][i])
1410 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1411 }
1412 }
1413
xe_vm_pt_destroy(struct xe_vm * vm)1414 static void xe_vm_pt_destroy(struct xe_vm *vm)
1415 {
1416 struct xe_tile *tile;
1417 u8 id;
1418
1419 xe_vm_assert_held(vm);
1420
1421 for_each_tile(tile, vm->xe, id) {
1422 if (vm->pt_root[id]) {
1423 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1424 vm->pt_root[id] = NULL;
1425 }
1426 }
1427 }
1428
xe_vm_create(struct xe_device * xe,u32 flags,struct xe_file * xef)1429 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
1430 {
1431 struct drm_gem_object *vm_resv_obj;
1432 struct xe_validation_ctx ctx;
1433 struct drm_exec exec;
1434 struct xe_vm *vm;
1435 int err, number_tiles = 0;
1436 struct xe_tile *tile;
1437 u8 id;
1438
1439 /*
1440 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to
1441 * ever be in faulting mode.
1442 */
1443 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE)));
1444
1445 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1446 if (!vm)
1447 return ERR_PTR(-ENOMEM);
1448
1449 vm->xe = xe;
1450
1451 vm->size = 1ull << xe->info.va_bits;
1452 vm->flags = flags;
1453
1454 if (xef)
1455 vm->xef = xe_file_get(xef);
1456 /**
1457 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be
1458 * manipulated under the PXP mutex. However, the PXP mutex can be taken
1459 * under a user-VM lock when the PXP session is started at exec_queue
1460 * creation time. Those are different VMs and therefore there is no risk
1461 * of deadlock, but we need to tell lockdep that this is the case or it
1462 * will print a warning.
1463 */
1464 if (flags & XE_VM_FLAG_GSC) {
1465 static struct lock_class_key gsc_vm_key;
1466
1467 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key);
1468 } else {
1469 init_rwsem(&vm->lock);
1470 }
1471 mutex_init(&vm->snap_mutex);
1472
1473 INIT_LIST_HEAD(&vm->rebind_list);
1474
1475 INIT_LIST_HEAD(&vm->userptr.repin_list);
1476 INIT_LIST_HEAD(&vm->userptr.invalidated);
1477 spin_lock_init(&vm->userptr.invalidated_lock);
1478
1479 ttm_lru_bulk_move_init(&vm->lru_bulk_move);
1480
1481 INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1482
1483 INIT_LIST_HEAD(&vm->preempt.exec_queues);
1484 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */
1485
1486 for_each_tile(tile, xe, id)
1487 xe_range_fence_tree_init(&vm->rftree[id]);
1488
1489 vm->pt_ops = &xelp_pt_ops;
1490
1491 /*
1492 * Long-running workloads are not protected by the scheduler references.
1493 * By design, run_job for long-running workloads returns NULL and the
1494 * scheduler drops all the references of it, hence protecting the VM
1495 * for this case is necessary.
1496 */
1497 if (flags & XE_VM_FLAG_LR_MODE) {
1498 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1499 xe_pm_runtime_get_noresume(xe);
1500 INIT_LIST_HEAD(&vm->preempt.pm_activate_link);
1501 }
1502
1503 err = xe_svm_init(vm);
1504 if (err)
1505 goto err_no_resv;
1506
1507 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1508 if (!vm_resv_obj) {
1509 err = -ENOMEM;
1510 goto err_svm_fini;
1511 }
1512
1513 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1514 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1515
1516 drm_gem_object_put(vm_resv_obj);
1517
1518 err = 0;
1519 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
1520 err) {
1521 err = xe_vm_drm_exec_lock(vm, &exec);
1522 drm_exec_retry_on_contention(&exec);
1523
1524 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1525 vm->flags |= XE_VM_FLAG_64K;
1526
1527 for_each_tile(tile, xe, id) {
1528 if (flags & XE_VM_FLAG_MIGRATION &&
1529 tile->id != XE_VM_FLAG_TILE_ID(flags))
1530 continue;
1531
1532 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level,
1533 &exec);
1534 if (IS_ERR(vm->pt_root[id])) {
1535 err = PTR_ERR(vm->pt_root[id]);
1536 vm->pt_root[id] = NULL;
1537 xe_vm_pt_destroy(vm);
1538 drm_exec_retry_on_contention(&exec);
1539 xe_validation_retry_on_oom(&ctx, &err);
1540 break;
1541 }
1542 }
1543 if (err)
1544 break;
1545
1546 if (xe_vm_has_scratch(vm)) {
1547 for_each_tile(tile, xe, id) {
1548 if (!vm->pt_root[id])
1549 continue;
1550
1551 err = xe_vm_create_scratch(xe, tile, vm, &exec);
1552 if (err) {
1553 xe_vm_free_scratch(vm);
1554 xe_vm_pt_destroy(vm);
1555 drm_exec_retry_on_contention(&exec);
1556 xe_validation_retry_on_oom(&ctx, &err);
1557 break;
1558 }
1559 }
1560 if (err)
1561 break;
1562 vm->batch_invalidate_tlb = true;
1563 }
1564
1565 if (vm->flags & XE_VM_FLAG_LR_MODE) {
1566 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1567 vm->batch_invalidate_tlb = false;
1568 }
1569
1570 /* Fill pt_root after allocating scratch tables */
1571 for_each_tile(tile, xe, id) {
1572 if (!vm->pt_root[id])
1573 continue;
1574
1575 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1576 }
1577 }
1578 if (err)
1579 goto err_close;
1580
1581 /* Kernel migration VM shouldn't have a circular loop.. */
1582 if (!(flags & XE_VM_FLAG_MIGRATION)) {
1583 for_each_tile(tile, xe, id) {
1584 struct xe_exec_queue *q;
1585 u32 create_flags = EXEC_QUEUE_FLAG_VM;
1586
1587 if (!vm->pt_root[id])
1588 continue;
1589
1590 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0);
1591 if (IS_ERR(q)) {
1592 err = PTR_ERR(q);
1593 goto err_close;
1594 }
1595 vm->q[id] = q;
1596 number_tiles++;
1597 }
1598 }
1599
1600 if (number_tiles > 1)
1601 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1602
1603 if (xef && xe->info.has_asid) {
1604 u32 asid;
1605
1606 down_write(&xe->usm.lock);
1607 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1608 XA_LIMIT(1, XE_MAX_ASID - 1),
1609 &xe->usm.next_asid, GFP_KERNEL);
1610 up_write(&xe->usm.lock);
1611 if (err < 0)
1612 goto err_close;
1613
1614 vm->usm.asid = asid;
1615 }
1616
1617 trace_xe_vm_create(vm);
1618
1619 return vm;
1620
1621 err_close:
1622 xe_vm_close_and_put(vm);
1623 return ERR_PTR(err);
1624
1625 err_svm_fini:
1626 if (flags & XE_VM_FLAG_FAULT_MODE) {
1627 vm->size = 0; /* close the vm */
1628 xe_svm_fini(vm);
1629 }
1630 err_no_resv:
1631 mutex_destroy(&vm->snap_mutex);
1632 for_each_tile(tile, xe, id)
1633 xe_range_fence_tree_fini(&vm->rftree[id]);
1634 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1635 if (vm->xef)
1636 xe_file_put(vm->xef);
1637 kfree(vm);
1638 if (flags & XE_VM_FLAG_LR_MODE)
1639 xe_pm_runtime_put(xe);
1640 return ERR_PTR(err);
1641 }
1642
xe_vm_close(struct xe_vm * vm)1643 static void xe_vm_close(struct xe_vm *vm)
1644 {
1645 struct xe_device *xe = vm->xe;
1646 bool bound;
1647 int idx;
1648
1649 bound = drm_dev_enter(&xe->drm, &idx);
1650
1651 down_write(&vm->lock);
1652 if (xe_vm_in_fault_mode(vm))
1653 xe_svm_notifier_lock(vm);
1654
1655 vm->size = 0;
1656
1657 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) {
1658 struct xe_tile *tile;
1659 struct xe_gt *gt;
1660 u8 id;
1661
1662 /* Wait for pending binds */
1663 dma_resv_wait_timeout(xe_vm_resv(vm),
1664 DMA_RESV_USAGE_BOOKKEEP,
1665 false, MAX_SCHEDULE_TIMEOUT);
1666
1667 if (bound) {
1668 for_each_tile(tile, xe, id)
1669 if (vm->pt_root[id])
1670 xe_pt_clear(xe, vm->pt_root[id]);
1671
1672 for_each_gt(gt, xe, id)
1673 xe_tlb_inval_vm(>->tlb_inval, vm);
1674 }
1675 }
1676
1677 if (xe_vm_in_fault_mode(vm))
1678 xe_svm_notifier_unlock(vm);
1679 up_write(&vm->lock);
1680
1681 if (bound)
1682 drm_dev_exit(idx);
1683 }
1684
xe_vm_close_and_put(struct xe_vm * vm)1685 void xe_vm_close_and_put(struct xe_vm *vm)
1686 {
1687 LIST_HEAD(contested);
1688 struct xe_device *xe = vm->xe;
1689 struct xe_tile *tile;
1690 struct xe_vma *vma, *next_vma;
1691 struct drm_gpuva *gpuva, *next;
1692 u8 id;
1693
1694 xe_assert(xe, !vm->preempt.num_exec_queues);
1695
1696 xe_vm_close(vm);
1697 if (xe_vm_in_preempt_fence_mode(vm)) {
1698 mutex_lock(&xe->rebind_resume_lock);
1699 list_del_init(&vm->preempt.pm_activate_link);
1700 mutex_unlock(&xe->rebind_resume_lock);
1701 flush_work(&vm->preempt.rebind_work);
1702 }
1703 if (xe_vm_in_fault_mode(vm))
1704 xe_svm_close(vm);
1705
1706 down_write(&vm->lock);
1707 for_each_tile(tile, xe, id) {
1708 if (vm->q[id])
1709 xe_exec_queue_last_fence_put(vm->q[id], vm);
1710 }
1711 up_write(&vm->lock);
1712
1713 for_each_tile(tile, xe, id) {
1714 if (vm->q[id]) {
1715 xe_exec_queue_kill(vm->q[id]);
1716 xe_exec_queue_put(vm->q[id]);
1717 vm->q[id] = NULL;
1718 }
1719 }
1720
1721 down_write(&vm->lock);
1722 xe_vm_lock(vm, false);
1723 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1724 vma = gpuva_to_vma(gpuva);
1725
1726 if (xe_vma_has_no_bo(vma)) {
1727 xe_svm_notifier_lock(vm);
1728 vma->gpuva.flags |= XE_VMA_DESTROYED;
1729 xe_svm_notifier_unlock(vm);
1730 }
1731
1732 xe_vm_remove_vma(vm, vma);
1733
1734 /* easy case, remove from VMA? */
1735 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1736 list_del_init(&vma->combined_links.rebind);
1737 xe_vma_destroy(vma, NULL);
1738 continue;
1739 }
1740
1741 list_move_tail(&vma->combined_links.destroy, &contested);
1742 vma->gpuva.flags |= XE_VMA_DESTROYED;
1743 }
1744
1745 /*
1746 * All vm operations will add shared fences to resv.
1747 * The only exception is eviction for a shared object,
1748 * but even so, the unbind when evicted would still
1749 * install a fence to resv. Hence it's safe to
1750 * destroy the pagetables immediately.
1751 */
1752 xe_vm_free_scratch(vm);
1753 xe_vm_pt_destroy(vm);
1754 xe_vm_unlock(vm);
1755
1756 /*
1757 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1758 * Since we hold a refcount to the bo, we can remove and free
1759 * the members safely without locking.
1760 */
1761 list_for_each_entry_safe(vma, next_vma, &contested,
1762 combined_links.destroy) {
1763 list_del_init(&vma->combined_links.destroy);
1764 xe_vma_destroy_unlocked(vma);
1765 }
1766
1767 xe_svm_fini(vm);
1768
1769 up_write(&vm->lock);
1770
1771 down_write(&xe->usm.lock);
1772 if (vm->usm.asid) {
1773 void *lookup;
1774
1775 xe_assert(xe, xe->info.has_asid);
1776 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
1777
1778 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1779 xe_assert(xe, lookup == vm);
1780 }
1781 up_write(&xe->usm.lock);
1782
1783 for_each_tile(tile, xe, id)
1784 xe_range_fence_tree_fini(&vm->rftree[id]);
1785
1786 xe_vm_put(vm);
1787 }
1788
vm_destroy_work_func(struct work_struct * w)1789 static void vm_destroy_work_func(struct work_struct *w)
1790 {
1791 struct xe_vm *vm =
1792 container_of(w, struct xe_vm, destroy_work);
1793 struct xe_device *xe = vm->xe;
1794 struct xe_tile *tile;
1795 u8 id;
1796
1797 /* xe_vm_close_and_put was not called? */
1798 xe_assert(xe, !vm->size);
1799
1800 if (xe_vm_in_preempt_fence_mode(vm))
1801 flush_work(&vm->preempt.rebind_work);
1802
1803 mutex_destroy(&vm->snap_mutex);
1804
1805 if (vm->flags & XE_VM_FLAG_LR_MODE)
1806 xe_pm_runtime_put(xe);
1807
1808 for_each_tile(tile, xe, id)
1809 XE_WARN_ON(vm->pt_root[id]);
1810
1811 trace_xe_vm_free(vm);
1812
1813 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1814
1815 if (vm->xef)
1816 xe_file_put(vm->xef);
1817
1818 kfree(vm);
1819 }
1820
xe_vm_free(struct drm_gpuvm * gpuvm)1821 static void xe_vm_free(struct drm_gpuvm *gpuvm)
1822 {
1823 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1824
1825 /* To destroy the VM we need to be able to sleep */
1826 queue_work(system_unbound_wq, &vm->destroy_work);
1827 }
1828
xe_vm_lookup(struct xe_file * xef,u32 id)1829 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1830 {
1831 struct xe_vm *vm;
1832
1833 mutex_lock(&xef->vm.lock);
1834 vm = xa_load(&xef->vm.xa, id);
1835 if (vm)
1836 xe_vm_get(vm);
1837 mutex_unlock(&xef->vm.lock);
1838
1839 return vm;
1840 }
1841
xe_vm_pdp4_descriptor(struct xe_vm * vm,struct xe_tile * tile)1842 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1843 {
1844 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0);
1845 }
1846
1847 static struct xe_exec_queue *
to_wait_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)1848 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
1849 {
1850 return q ? q : vm->q[0];
1851 }
1852
1853 static struct xe_user_fence *
find_ufence_get(struct xe_sync_entry * syncs,u32 num_syncs)1854 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
1855 {
1856 unsigned int i;
1857
1858 for (i = 0; i < num_syncs; i++) {
1859 struct xe_sync_entry *e = &syncs[i];
1860
1861 if (xe_sync_is_ufence(e))
1862 return xe_sync_ufence_get(e);
1863 }
1864
1865 return NULL;
1866 }
1867
1868 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
1869 DRM_XE_VM_CREATE_FLAG_LR_MODE | \
1870 DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1871
xe_vm_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1872 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1873 struct drm_file *file)
1874 {
1875 struct xe_device *xe = to_xe_device(dev);
1876 struct xe_file *xef = to_xe_file(file);
1877 struct drm_xe_vm_create *args = data;
1878 struct xe_vm *vm;
1879 u32 id;
1880 int err;
1881 u32 flags = 0;
1882
1883 if (XE_IOCTL_DBG(xe, args->extensions))
1884 return -EINVAL;
1885
1886 if (XE_GT_WA(xe_root_mmio_gt(xe), 14016763929))
1887 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
1888
1889 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1890 !xe->info.has_usm))
1891 return -EINVAL;
1892
1893 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1894 return -EINVAL;
1895
1896 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1897 return -EINVAL;
1898
1899 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
1900 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1901 !xe->info.needs_scratch))
1902 return -EINVAL;
1903
1904 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
1905 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1906 return -EINVAL;
1907
1908 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
1909 flags |= XE_VM_FLAG_SCRATCH_PAGE;
1910 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
1911 flags |= XE_VM_FLAG_LR_MODE;
1912 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1913 flags |= XE_VM_FLAG_FAULT_MODE;
1914
1915 vm = xe_vm_create(xe, flags, xef);
1916 if (IS_ERR(vm))
1917 return PTR_ERR(vm);
1918
1919 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
1920 /* Warning: Security issue - never enable by default */
1921 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
1922 #endif
1923
1924 /* user id alloc must always be last in ioctl to prevent UAF */
1925 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
1926 if (err)
1927 goto err_close_and_put;
1928
1929 args->vm_id = id;
1930
1931 return 0;
1932
1933 err_close_and_put:
1934 xe_vm_close_and_put(vm);
1935
1936 return err;
1937 }
1938
xe_vm_destroy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1939 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
1940 struct drm_file *file)
1941 {
1942 struct xe_device *xe = to_xe_device(dev);
1943 struct xe_file *xef = to_xe_file(file);
1944 struct drm_xe_vm_destroy *args = data;
1945 struct xe_vm *vm;
1946 int err = 0;
1947
1948 if (XE_IOCTL_DBG(xe, args->pad) ||
1949 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1950 return -EINVAL;
1951
1952 mutex_lock(&xef->vm.lock);
1953 vm = xa_load(&xef->vm.xa, args->vm_id);
1954 if (XE_IOCTL_DBG(xe, !vm))
1955 err = -ENOENT;
1956 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
1957 err = -EBUSY;
1958 else
1959 xa_erase(&xef->vm.xa, args->vm_id);
1960 mutex_unlock(&xef->vm.lock);
1961
1962 if (!err)
1963 xe_vm_close_and_put(vm);
1964
1965 return err;
1966 }
1967
xe_vm_query_vmas(struct xe_vm * vm,u64 start,u64 end)1968 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end)
1969 {
1970 struct drm_gpuva *gpuva;
1971 u32 num_vmas = 0;
1972
1973 lockdep_assert_held(&vm->lock);
1974 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end)
1975 num_vmas++;
1976
1977 return num_vmas;
1978 }
1979
get_mem_attrs(struct xe_vm * vm,u32 * num_vmas,u64 start,u64 end,struct drm_xe_mem_range_attr * attrs)1980 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start,
1981 u64 end, struct drm_xe_mem_range_attr *attrs)
1982 {
1983 struct drm_gpuva *gpuva;
1984 int i = 0;
1985
1986 lockdep_assert_held(&vm->lock);
1987
1988 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
1989 struct xe_vma *vma = gpuva_to_vma(gpuva);
1990
1991 if (i == *num_vmas)
1992 return -ENOSPC;
1993
1994 attrs[i].start = xe_vma_start(vma);
1995 attrs[i].end = xe_vma_end(vma);
1996 attrs[i].atomic.val = vma->attr.atomic_access;
1997 attrs[i].pat_index.val = vma->attr.pat_index;
1998 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd;
1999 attrs[i].preferred_mem_loc.migration_policy =
2000 vma->attr.preferred_loc.migration_policy;
2001
2002 i++;
2003 }
2004
2005 *num_vmas = i;
2006 return 0;
2007 }
2008
xe_vm_query_vmas_attrs_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2009 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2010 {
2011 struct xe_device *xe = to_xe_device(dev);
2012 struct xe_file *xef = to_xe_file(file);
2013 struct drm_xe_mem_range_attr *mem_attrs;
2014 struct drm_xe_vm_query_mem_range_attr *args = data;
2015 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2016 struct xe_vm *vm;
2017 int err = 0;
2018
2019 if (XE_IOCTL_DBG(xe,
2020 ((args->num_mem_ranges == 0 &&
2021 (attrs_user || args->sizeof_mem_range_attr != 0)) ||
2022 (args->num_mem_ranges > 0 &&
2023 (!attrs_user ||
2024 args->sizeof_mem_range_attr !=
2025 sizeof(struct drm_xe_mem_range_attr))))))
2026 return -EINVAL;
2027
2028 vm = xe_vm_lookup(xef, args->vm_id);
2029 if (XE_IOCTL_DBG(xe, !vm))
2030 return -EINVAL;
2031
2032 err = down_read_interruptible(&vm->lock);
2033 if (err)
2034 goto put_vm;
2035
2036 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2037
2038 if (args->num_mem_ranges == 0 && !attrs_user) {
2039 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range);
2040 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr);
2041 goto unlock_vm;
2042 }
2043
2044 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr,
2045 GFP_KERNEL | __GFP_ACCOUNT |
2046 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
2047 if (!mem_attrs) {
2048 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM;
2049 goto unlock_vm;
2050 }
2051
2052 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr);
2053 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start,
2054 args->start + args->range, mem_attrs);
2055 if (err)
2056 goto free_mem_attrs;
2057
2058 err = copy_to_user(attrs_user, mem_attrs,
2059 args->sizeof_mem_range_attr * args->num_mem_ranges);
2060 if (err)
2061 err = -EFAULT;
2062
2063 free_mem_attrs:
2064 kvfree(mem_attrs);
2065 unlock_vm:
2066 up_read(&vm->lock);
2067 put_vm:
2068 xe_vm_put(vm);
2069 return err;
2070 }
2071
vma_matches(struct xe_vma * vma,u64 page_addr)2072 static bool vma_matches(struct xe_vma *vma, u64 page_addr)
2073 {
2074 if (page_addr > xe_vma_end(vma) - 1 ||
2075 page_addr + SZ_4K - 1 < xe_vma_start(vma))
2076 return false;
2077
2078 return true;
2079 }
2080
2081 /**
2082 * xe_vm_find_vma_by_addr() - Find a VMA by its address
2083 *
2084 * @vm: the xe_vm the vma belongs to
2085 * @page_addr: address to look up
2086 */
xe_vm_find_vma_by_addr(struct xe_vm * vm,u64 page_addr)2087 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr)
2088 {
2089 struct xe_vma *vma = NULL;
2090
2091 if (vm->usm.last_fault_vma) { /* Fast lookup */
2092 if (vma_matches(vm->usm.last_fault_vma, page_addr))
2093 vma = vm->usm.last_fault_vma;
2094 }
2095 if (!vma)
2096 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);
2097
2098 return vma;
2099 }
2100
2101 static const u32 region_to_mem_type[] = {
2102 XE_PL_TT,
2103 XE_PL_VRAM0,
2104 XE_PL_VRAM1,
2105 };
2106
prep_vma_destroy(struct xe_vm * vm,struct xe_vma * vma,bool post_commit)2107 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
2108 bool post_commit)
2109 {
2110 xe_svm_notifier_lock(vm);
2111 vma->gpuva.flags |= XE_VMA_DESTROYED;
2112 xe_svm_notifier_unlock(vm);
2113 if (post_commit)
2114 xe_vm_remove_vma(vm, vma);
2115 }
2116
2117 #undef ULL
2118 #define ULL unsigned long long
2119
2120 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2121 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2122 {
2123 struct xe_vma *vma;
2124
2125 switch (op->op) {
2126 case DRM_GPUVA_OP_MAP:
2127 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2128 (ULL)op->map.va.addr, (ULL)op->map.va.range);
2129 break;
2130 case DRM_GPUVA_OP_REMAP:
2131 vma = gpuva_to_vma(op->remap.unmap->va);
2132 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2133 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2134 op->remap.unmap->keep ? 1 : 0);
2135 if (op->remap.prev)
2136 vm_dbg(&xe->drm,
2137 "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2138 (ULL)op->remap.prev->va.addr,
2139 (ULL)op->remap.prev->va.range);
2140 if (op->remap.next)
2141 vm_dbg(&xe->drm,
2142 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2143 (ULL)op->remap.next->va.addr,
2144 (ULL)op->remap.next->va.range);
2145 break;
2146 case DRM_GPUVA_OP_UNMAP:
2147 vma = gpuva_to_vma(op->unmap.va);
2148 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2149 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2150 op->unmap.keep ? 1 : 0);
2151 break;
2152 case DRM_GPUVA_OP_PREFETCH:
2153 vma = gpuva_to_vma(op->prefetch.va);
2154 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2155 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2156 break;
2157 default:
2158 drm_warn(&xe->drm, "NOT POSSIBLE");
2159 }
2160 }
2161 #else
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2162 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2163 {
2164 }
2165 #endif
2166
__xe_vm_needs_clear_scratch_pages(struct xe_vm * vm,u32 bind_flags)2167 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
2168 {
2169 if (!xe_vm_in_fault_mode(vm))
2170 return false;
2171
2172 if (!xe_vm_has_scratch(vm))
2173 return false;
2174
2175 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE)
2176 return false;
2177
2178 return true;
2179 }
2180
xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops * ops)2181 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops)
2182 {
2183 struct drm_gpuva_op *__op;
2184
2185 drm_gpuva_for_each_op(__op, ops) {
2186 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2187
2188 xe_vma_svm_prefetch_op_fini(op);
2189 }
2190 }
2191
2192 /*
2193 * Create operations list from IOCTL arguments, setup operations fields so parse
2194 * and commit steps are decoupled from IOCTL arguments. This step can fail.
2195 */
2196 static struct drm_gpuva_ops *
vm_bind_ioctl_ops_create(struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 addr,u64 range,u32 operation,u32 flags,u32 prefetch_region,u16 pat_index)2197 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
2198 struct xe_bo *bo, u64 bo_offset_or_userptr,
2199 u64 addr, u64 range,
2200 u32 operation, u32 flags,
2201 u32 prefetch_region, u16 pat_index)
2202 {
2203 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2204 struct drm_gpuva_ops *ops;
2205 struct drm_gpuva_op *__op;
2206 struct drm_gpuvm_bo *vm_bo;
2207 u64 range_end = addr + range;
2208 int err;
2209
2210 lockdep_assert_held_write(&vm->lock);
2211
2212 vm_dbg(&vm->xe->drm,
2213 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2214 operation, (ULL)addr, (ULL)range,
2215 (ULL)bo_offset_or_userptr);
2216
2217 switch (operation) {
2218 case DRM_XE_VM_BIND_OP_MAP:
2219 case DRM_XE_VM_BIND_OP_MAP_USERPTR: {
2220 struct drm_gpuvm_map_req map_req = {
2221 .map.va.addr = addr,
2222 .map.va.range = range,
2223 .map.gem.obj = obj,
2224 .map.gem.offset = bo_offset_or_userptr,
2225 };
2226
2227 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req);
2228 break;
2229 }
2230 case DRM_XE_VM_BIND_OP_UNMAP:
2231 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2232 break;
2233 case DRM_XE_VM_BIND_OP_PREFETCH:
2234 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2235 break;
2236 case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2237 xe_assert(vm->xe, bo);
2238
2239 err = xe_bo_lock(bo, true);
2240 if (err)
2241 return ERR_PTR(err);
2242
2243 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
2244 if (IS_ERR(vm_bo)) {
2245 xe_bo_unlock(bo);
2246 return ERR_CAST(vm_bo);
2247 }
2248
2249 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2250 drm_gpuvm_bo_put(vm_bo);
2251 xe_bo_unlock(bo);
2252 break;
2253 default:
2254 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2255 ops = ERR_PTR(-EINVAL);
2256 }
2257 if (IS_ERR(ops))
2258 return ops;
2259
2260 drm_gpuva_for_each_op(__op, ops) {
2261 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2262
2263 if (__op->op == DRM_GPUVA_OP_MAP) {
2264 op->map.immediate =
2265 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2266 if (flags & DRM_XE_VM_BIND_FLAG_READONLY)
2267 op->map.vma_flags |= XE_VMA_READ_ONLY;
2268 if (flags & DRM_XE_VM_BIND_FLAG_NULL)
2269 op->map.vma_flags |= DRM_GPUVA_SPARSE;
2270 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR)
2271 op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR;
2272 if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE)
2273 op->map.vma_flags |= XE_VMA_DUMPABLE;
2274 if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET)
2275 op->map.vma_flags |= XE_VMA_MADV_AUTORESET;
2276 op->map.pat_index = pat_index;
2277 op->map.invalidate_on_bind =
2278 __xe_vm_needs_clear_scratch_pages(vm, flags);
2279 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2280 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2281 struct xe_tile *tile;
2282 struct xe_svm_range *svm_range;
2283 struct drm_gpusvm_ctx ctx = {};
2284 struct drm_pagemap *dpagemap;
2285 u8 id, tile_mask = 0;
2286 u32 i;
2287
2288 if (!xe_vma_is_cpu_addr_mirror(vma)) {
2289 op->prefetch.region = prefetch_region;
2290 break;
2291 }
2292
2293 ctx.read_only = xe_vma_read_only(vma);
2294 ctx.devmem_possible = IS_DGFX(vm->xe) &&
2295 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2296
2297 for_each_tile(tile, vm->xe, id)
2298 tile_mask |= 0x1 << id;
2299
2300 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC);
2301 op->prefetch_range.ranges_count = 0;
2302 tile = NULL;
2303
2304 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) {
2305 dpagemap = xe_vma_resolve_pagemap(vma,
2306 xe_device_get_root_tile(vm->xe));
2307 /*
2308 * TODO: Once multigpu support is enabled will need
2309 * something to dereference tile from dpagemap.
2310 */
2311 if (dpagemap)
2312 tile = xe_device_get_root_tile(vm->xe);
2313 } else if (prefetch_region) {
2314 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] -
2315 XE_PL_VRAM0];
2316 }
2317
2318 op->prefetch_range.tile = tile;
2319 alloc_next_range:
2320 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
2321
2322 if (PTR_ERR(svm_range) == -ENOENT) {
2323 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma);
2324
2325 addr = ret == ULONG_MAX ? 0 : ret;
2326 if (addr)
2327 goto alloc_next_range;
2328 else
2329 goto print_op_label;
2330 }
2331
2332 if (IS_ERR(svm_range)) {
2333 err = PTR_ERR(svm_range);
2334 goto unwind_prefetch_ops;
2335 }
2336
2337 if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) {
2338 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID");
2339 goto check_next_range;
2340 }
2341
2342 err = xa_alloc(&op->prefetch_range.range,
2343 &i, svm_range, xa_limit_32b,
2344 GFP_KERNEL);
2345
2346 if (err)
2347 goto unwind_prefetch_ops;
2348
2349 op->prefetch_range.ranges_count++;
2350 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH;
2351 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED");
2352 check_next_range:
2353 if (range_end > xe_svm_range_end(svm_range) &&
2354 xe_svm_range_end(svm_range) < xe_vma_end(vma)) {
2355 addr = xe_svm_range_end(svm_range);
2356 goto alloc_next_range;
2357 }
2358 }
2359 print_op_label:
2360 print_op(vm->xe, __op);
2361 }
2362
2363 return ops;
2364
2365 unwind_prefetch_ops:
2366 xe_svm_prefetch_gpuva_ops_fini(ops);
2367 drm_gpuva_ops_free(&vm->gpuvm, ops);
2368 return ERR_PTR(err);
2369 }
2370
2371 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
2372
new_vma(struct xe_vm * vm,struct drm_gpuva_op_map * op,struct xe_vma_mem_attr * attr,unsigned int flags)2373 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2374 struct xe_vma_mem_attr *attr, unsigned int flags)
2375 {
2376 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2377 struct xe_validation_ctx ctx;
2378 struct drm_exec exec;
2379 struct xe_vma *vma;
2380 int err = 0;
2381
2382 lockdep_assert_held_write(&vm->lock);
2383
2384 if (bo) {
2385 err = 0;
2386 xe_validation_guard(&ctx, &vm->xe->val, &exec,
2387 (struct xe_val_flags) {.interruptible = true}, err) {
2388 if (!bo->vm) {
2389 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2390 drm_exec_retry_on_contention(&exec);
2391 }
2392 if (!err) {
2393 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2394 drm_exec_retry_on_contention(&exec);
2395 }
2396 if (err)
2397 return ERR_PTR(err);
2398
2399 vma = xe_vma_create(vm, bo, op->gem.offset,
2400 op->va.addr, op->va.addr +
2401 op->va.range - 1, attr, flags);
2402 if (IS_ERR(vma))
2403 return vma;
2404
2405 if (!bo->vm) {
2406 err = add_preempt_fences(vm, bo);
2407 if (err) {
2408 prep_vma_destroy(vm, vma, false);
2409 xe_vma_destroy(vma, NULL);
2410 }
2411 }
2412 }
2413 if (err)
2414 return ERR_PTR(err);
2415 } else {
2416 vma = xe_vma_create(vm, NULL, op->gem.offset,
2417 op->va.addr, op->va.addr +
2418 op->va.range - 1, attr, flags);
2419 if (IS_ERR(vma))
2420 return vma;
2421
2422 if (xe_vma_is_userptr(vma))
2423 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2424 }
2425 if (err) {
2426 prep_vma_destroy(vm, vma, false);
2427 xe_vma_destroy_unlocked(vma);
2428 vma = ERR_PTR(err);
2429 }
2430
2431 return vma;
2432 }
2433
xe_vma_max_pte_size(struct xe_vma * vma)2434 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2435 {
2436 if (vma->gpuva.flags & XE_VMA_PTE_1G)
2437 return SZ_1G;
2438 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2439 return SZ_2M;
2440 else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2441 return SZ_64K;
2442 else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2443 return SZ_4K;
2444
2445 return SZ_1G; /* Uninitialized, used max size */
2446 }
2447
xe_vma_set_pte_size(struct xe_vma * vma,u64 size)2448 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2449 {
2450 switch (size) {
2451 case SZ_1G:
2452 vma->gpuva.flags |= XE_VMA_PTE_1G;
2453 break;
2454 case SZ_2M:
2455 vma->gpuva.flags |= XE_VMA_PTE_2M;
2456 break;
2457 case SZ_64K:
2458 vma->gpuva.flags |= XE_VMA_PTE_64K;
2459 break;
2460 case SZ_4K:
2461 vma->gpuva.flags |= XE_VMA_PTE_4K;
2462 break;
2463 }
2464 }
2465
xe_vma_op_commit(struct xe_vm * vm,struct xe_vma_op * op)2466 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2467 {
2468 int err = 0;
2469
2470 lockdep_assert_held_write(&vm->lock);
2471
2472 switch (op->base.op) {
2473 case DRM_GPUVA_OP_MAP:
2474 err |= xe_vm_insert_vma(vm, op->map.vma);
2475 if (!err)
2476 op->flags |= XE_VMA_OP_COMMITTED;
2477 break;
2478 case DRM_GPUVA_OP_REMAP:
2479 {
2480 u8 tile_present =
2481 gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2482
2483 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2484 true);
2485 op->flags |= XE_VMA_OP_COMMITTED;
2486
2487 if (op->remap.prev) {
2488 err |= xe_vm_insert_vma(vm, op->remap.prev);
2489 if (!err)
2490 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2491 if (!err && op->remap.skip_prev) {
2492 op->remap.prev->tile_present =
2493 tile_present;
2494 op->remap.prev = NULL;
2495 }
2496 }
2497 if (op->remap.next) {
2498 err |= xe_vm_insert_vma(vm, op->remap.next);
2499 if (!err)
2500 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2501 if (!err && op->remap.skip_next) {
2502 op->remap.next->tile_present =
2503 tile_present;
2504 op->remap.next = NULL;
2505 }
2506 }
2507
2508 /* Adjust for partial unbind after removing VMA from VM */
2509 if (!err) {
2510 op->base.remap.unmap->va->va.addr = op->remap.start;
2511 op->base.remap.unmap->va->va.range = op->remap.range;
2512 }
2513 break;
2514 }
2515 case DRM_GPUVA_OP_UNMAP:
2516 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2517 op->flags |= XE_VMA_OP_COMMITTED;
2518 break;
2519 case DRM_GPUVA_OP_PREFETCH:
2520 op->flags |= XE_VMA_OP_COMMITTED;
2521 break;
2522 default:
2523 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2524 }
2525
2526 return err;
2527 }
2528
2529 /**
2530 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes
2531 * @vma: Pointer to the xe_vma structure to check
2532 *
2533 * This function determines whether the given VMA (Virtual Memory Area)
2534 * has its memory attributes set to their default values. Specifically,
2535 * it checks the following conditions:
2536 *
2537 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED`
2538 * - `pat_index` is equal to `default_pat_index`
2539 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE`
2540 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES`
2541 *
2542 * Return: true if all attributes are at their default values, false otherwise.
2543 */
xe_vma_has_default_mem_attrs(struct xe_vma * vma)2544 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma)
2545 {
2546 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED &&
2547 vma->attr.pat_index == vma->attr.default_pat_index &&
2548 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE &&
2549 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES);
2550 }
2551
vm_bind_ioctl_ops_parse(struct xe_vm * vm,struct drm_gpuva_ops * ops,struct xe_vma_ops * vops)2552 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
2553 struct xe_vma_ops *vops)
2554 {
2555 struct xe_device *xe = vm->xe;
2556 struct drm_gpuva_op *__op;
2557 struct xe_tile *tile;
2558 u8 id, tile_mask = 0;
2559 int err = 0;
2560
2561 lockdep_assert_held_write(&vm->lock);
2562
2563 for_each_tile(tile, vm->xe, id)
2564 tile_mask |= 0x1 << id;
2565
2566 drm_gpuva_for_each_op(__op, ops) {
2567 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2568 struct xe_vma *vma;
2569 unsigned int flags = 0;
2570
2571 INIT_LIST_HEAD(&op->link);
2572 list_add_tail(&op->link, &vops->list);
2573 op->tile_mask = tile_mask;
2574
2575 switch (op->base.op) {
2576 case DRM_GPUVA_OP_MAP:
2577 {
2578 struct xe_vma_mem_attr default_attr = {
2579 .preferred_loc = {
2580 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE,
2581 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES,
2582 },
2583 .atomic_access = DRM_XE_ATOMIC_UNDEFINED,
2584 .default_pat_index = op->map.pat_index,
2585 .pat_index = op->map.pat_index,
2586 };
2587
2588 flags |= op->map.vma_flags & XE_VMA_CREATE_MASK;
2589
2590 vma = new_vma(vm, &op->base.map, &default_attr,
2591 flags);
2592 if (IS_ERR(vma))
2593 return PTR_ERR(vma);
2594
2595 op->map.vma = vma;
2596 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) &&
2597 !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) ||
2598 op->map.invalidate_on_bind)
2599 xe_vma_ops_incr_pt_update_ops(vops,
2600 op->tile_mask, 1);
2601 break;
2602 }
2603 case DRM_GPUVA_OP_REMAP:
2604 {
2605 struct xe_vma *old =
2606 gpuva_to_vma(op->base.remap.unmap->va);
2607 bool skip = xe_vma_is_cpu_addr_mirror(old);
2608 u64 start = xe_vma_start(old), end = xe_vma_end(old);
2609 int num_remap_ops = 0;
2610
2611 if (op->base.remap.prev)
2612 start = op->base.remap.prev->va.addr +
2613 op->base.remap.prev->va.range;
2614 if (op->base.remap.next)
2615 end = op->base.remap.next->va.addr;
2616
2617 if (xe_vma_is_cpu_addr_mirror(old) &&
2618 xe_svm_has_mapping(vm, start, end)) {
2619 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE)
2620 xe_svm_unmap_address_range(vm, start, end);
2621 else
2622 return -EBUSY;
2623 }
2624
2625 op->remap.start = xe_vma_start(old);
2626 op->remap.range = xe_vma_size(old);
2627
2628 flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK;
2629 if (op->base.remap.prev) {
2630 vma = new_vma(vm, op->base.remap.prev,
2631 &old->attr, flags);
2632 if (IS_ERR(vma))
2633 return PTR_ERR(vma);
2634
2635 op->remap.prev = vma;
2636
2637 /*
2638 * Userptr creates a new SG mapping so
2639 * we must also rebind.
2640 */
2641 op->remap.skip_prev = skip ||
2642 (!xe_vma_is_userptr(old) &&
2643 IS_ALIGNED(xe_vma_end(vma),
2644 xe_vma_max_pte_size(old)));
2645 if (op->remap.skip_prev) {
2646 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2647 op->remap.range -=
2648 xe_vma_end(vma) -
2649 xe_vma_start(old);
2650 op->remap.start = xe_vma_end(vma);
2651 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2652 (ULL)op->remap.start,
2653 (ULL)op->remap.range);
2654 } else {
2655 num_remap_ops++;
2656 }
2657 }
2658
2659 if (op->base.remap.next) {
2660 vma = new_vma(vm, op->base.remap.next,
2661 &old->attr, flags);
2662 if (IS_ERR(vma))
2663 return PTR_ERR(vma);
2664
2665 op->remap.next = vma;
2666
2667 /*
2668 * Userptr creates a new SG mapping so
2669 * we must also rebind.
2670 */
2671 op->remap.skip_next = skip ||
2672 (!xe_vma_is_userptr(old) &&
2673 IS_ALIGNED(xe_vma_start(vma),
2674 xe_vma_max_pte_size(old)));
2675 if (op->remap.skip_next) {
2676 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2677 op->remap.range -=
2678 xe_vma_end(old) -
2679 xe_vma_start(vma);
2680 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2681 (ULL)op->remap.start,
2682 (ULL)op->remap.range);
2683 } else {
2684 num_remap_ops++;
2685 }
2686 }
2687 if (!skip)
2688 num_remap_ops++;
2689
2690 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops);
2691 break;
2692 }
2693 case DRM_GPUVA_OP_UNMAP:
2694 vma = gpuva_to_vma(op->base.unmap.va);
2695
2696 if (xe_vma_is_cpu_addr_mirror(vma) &&
2697 xe_svm_has_mapping(vm, xe_vma_start(vma),
2698 xe_vma_end(vma)))
2699 return -EBUSY;
2700
2701 if (!xe_vma_is_cpu_addr_mirror(vma))
2702 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2703 break;
2704 case DRM_GPUVA_OP_PREFETCH:
2705 vma = gpuva_to_vma(op->base.prefetch.va);
2706
2707 if (xe_vma_is_userptr(vma)) {
2708 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2709 if (err)
2710 return err;
2711 }
2712
2713 if (xe_vma_is_cpu_addr_mirror(vma))
2714 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask,
2715 op->prefetch_range.ranges_count);
2716 else
2717 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2718
2719 break;
2720 default:
2721 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2722 }
2723
2724 err = xe_vma_op_commit(vm, op);
2725 if (err)
2726 return err;
2727 }
2728
2729 return 0;
2730 }
2731
xe_vma_op_unwind(struct xe_vm * vm,struct xe_vma_op * op,bool post_commit,bool prev_post_commit,bool next_post_commit)2732 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2733 bool post_commit, bool prev_post_commit,
2734 bool next_post_commit)
2735 {
2736 lockdep_assert_held_write(&vm->lock);
2737
2738 switch (op->base.op) {
2739 case DRM_GPUVA_OP_MAP:
2740 if (op->map.vma) {
2741 prep_vma_destroy(vm, op->map.vma, post_commit);
2742 xe_vma_destroy_unlocked(op->map.vma);
2743 }
2744 break;
2745 case DRM_GPUVA_OP_UNMAP:
2746 {
2747 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2748
2749 if (vma) {
2750 xe_svm_notifier_lock(vm);
2751 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2752 xe_svm_notifier_unlock(vm);
2753 if (post_commit)
2754 xe_vm_insert_vma(vm, vma);
2755 }
2756 break;
2757 }
2758 case DRM_GPUVA_OP_REMAP:
2759 {
2760 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2761
2762 if (op->remap.prev) {
2763 prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2764 xe_vma_destroy_unlocked(op->remap.prev);
2765 }
2766 if (op->remap.next) {
2767 prep_vma_destroy(vm, op->remap.next, next_post_commit);
2768 xe_vma_destroy_unlocked(op->remap.next);
2769 }
2770 if (vma) {
2771 xe_svm_notifier_lock(vm);
2772 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2773 xe_svm_notifier_unlock(vm);
2774 if (post_commit)
2775 xe_vm_insert_vma(vm, vma);
2776 }
2777 break;
2778 }
2779 case DRM_GPUVA_OP_PREFETCH:
2780 /* Nothing to do */
2781 break;
2782 default:
2783 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2784 }
2785 }
2786
vm_bind_ioctl_ops_unwind(struct xe_vm * vm,struct drm_gpuva_ops ** ops,int num_ops_list)2787 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2788 struct drm_gpuva_ops **ops,
2789 int num_ops_list)
2790 {
2791 int i;
2792
2793 for (i = num_ops_list - 1; i >= 0; --i) {
2794 struct drm_gpuva_ops *__ops = ops[i];
2795 struct drm_gpuva_op *__op;
2796
2797 if (!__ops)
2798 continue;
2799
2800 drm_gpuva_for_each_op_reverse(__op, __ops) {
2801 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2802
2803 xe_vma_op_unwind(vm, op,
2804 op->flags & XE_VMA_OP_COMMITTED,
2805 op->flags & XE_VMA_OP_PREV_COMMITTED,
2806 op->flags & XE_VMA_OP_NEXT_COMMITTED);
2807 }
2808 }
2809 }
2810
vma_lock_and_validate(struct drm_exec * exec,struct xe_vma * vma,bool res_evict,bool validate)2811 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
2812 bool res_evict, bool validate)
2813 {
2814 struct xe_bo *bo = xe_vma_bo(vma);
2815 struct xe_vm *vm = xe_vma_vm(vma);
2816 int err = 0;
2817
2818 if (bo) {
2819 if (!bo->vm)
2820 err = drm_exec_lock_obj(exec, &bo->ttm.base);
2821 if (!err && validate)
2822 err = xe_bo_validate(bo, vm,
2823 !xe_vm_in_preempt_fence_mode(vm) &&
2824 res_evict, exec);
2825 }
2826
2827 return err;
2828 }
2829
check_ufence(struct xe_vma * vma)2830 static int check_ufence(struct xe_vma *vma)
2831 {
2832 if (vma->ufence) {
2833 struct xe_user_fence * const f = vma->ufence;
2834
2835 if (!xe_sync_ufence_get_status(f))
2836 return -EBUSY;
2837
2838 vma->ufence = NULL;
2839 xe_sync_ufence_put(f);
2840 }
2841
2842 return 0;
2843 }
2844
prefetch_ranges(struct xe_vm * vm,struct xe_vma_op * op)2845 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
2846 {
2847 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2848 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2849 struct xe_tile *tile = op->prefetch_range.tile;
2850 int err = 0;
2851
2852 struct xe_svm_range *svm_range;
2853 struct drm_gpusvm_ctx ctx = {};
2854 unsigned long i;
2855
2856 if (!xe_vma_is_cpu_addr_mirror(vma))
2857 return 0;
2858
2859 ctx.read_only = xe_vma_read_only(vma);
2860 ctx.devmem_possible = devmem_possible;
2861 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
2862 ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe);
2863
2864 /* TODO: Threading the migration */
2865 xa_for_each(&op->prefetch_range.range, i, svm_range) {
2866 if (!tile)
2867 xe_svm_range_migrate_to_smem(vm, svm_range);
2868
2869 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) {
2870 err = xe_svm_alloc_vram(tile, svm_range, &ctx);
2871 if (err) {
2872 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
2873 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2874 return -ENODATA;
2875 }
2876 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM");
2877 }
2878
2879 err = xe_svm_range_get_pages(vm, svm_range, &ctx);
2880 if (err) {
2881 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
2882 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2883 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
2884 err = -ENODATA;
2885 return err;
2886 }
2887 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE");
2888 }
2889
2890 return err;
2891 }
2892
op_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_vma_op * op)2893 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
2894 struct xe_vma_ops *vops, struct xe_vma_op *op)
2895 {
2896 int err = 0;
2897 bool res_evict;
2898
2899 /*
2900 * We only allow evicting a BO within the VM if it is not part of an
2901 * array of binds, as an array of binds can evict another BO within the
2902 * bind.
2903 */
2904 res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS);
2905
2906 switch (op->base.op) {
2907 case DRM_GPUVA_OP_MAP:
2908 if (!op->map.invalidate_on_bind)
2909 err = vma_lock_and_validate(exec, op->map.vma,
2910 res_evict,
2911 !xe_vm_in_fault_mode(vm) ||
2912 op->map.immediate);
2913 break;
2914 case DRM_GPUVA_OP_REMAP:
2915 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
2916 if (err)
2917 break;
2918
2919 err = vma_lock_and_validate(exec,
2920 gpuva_to_vma(op->base.remap.unmap->va),
2921 res_evict, false);
2922 if (!err && op->remap.prev)
2923 err = vma_lock_and_validate(exec, op->remap.prev,
2924 res_evict, true);
2925 if (!err && op->remap.next)
2926 err = vma_lock_and_validate(exec, op->remap.next,
2927 res_evict, true);
2928 break;
2929 case DRM_GPUVA_OP_UNMAP:
2930 err = check_ufence(gpuva_to_vma(op->base.unmap.va));
2931 if (err)
2932 break;
2933
2934 err = vma_lock_and_validate(exec,
2935 gpuva_to_vma(op->base.unmap.va),
2936 res_evict, false);
2937 break;
2938 case DRM_GPUVA_OP_PREFETCH:
2939 {
2940 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2941 u32 region;
2942
2943 if (!xe_vma_is_cpu_addr_mirror(vma)) {
2944 region = op->prefetch.region;
2945 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC ||
2946 region <= ARRAY_SIZE(region_to_mem_type));
2947 }
2948
2949 err = vma_lock_and_validate(exec,
2950 gpuva_to_vma(op->base.prefetch.va),
2951 res_evict, false);
2952 if (!err && !xe_vma_has_no_bo(vma))
2953 err = xe_bo_migrate(xe_vma_bo(vma),
2954 region_to_mem_type[region],
2955 NULL,
2956 exec);
2957 break;
2958 }
2959 default:
2960 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2961 }
2962
2963 return err;
2964 }
2965
vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm * vm,struct xe_vma_ops * vops)2966 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops)
2967 {
2968 struct xe_vma_op *op;
2969 int err;
2970
2971 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
2972 return 0;
2973
2974 list_for_each_entry(op, &vops->list, link) {
2975 if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
2976 err = prefetch_ranges(vm, op);
2977 if (err)
2978 return err;
2979 }
2980 }
2981
2982 return 0;
2983 }
2984
vm_bind_ioctl_ops_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops)2985 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
2986 struct xe_vm *vm,
2987 struct xe_vma_ops *vops)
2988 {
2989 struct xe_vma_op *op;
2990 int err;
2991
2992 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
2993 if (err)
2994 return err;
2995
2996 list_for_each_entry(op, &vops->list, link) {
2997 err = op_lock_and_prep(exec, vm, vops, op);
2998 if (err)
2999 return err;
3000 }
3001
3002 #ifdef TEST_VM_OPS_ERROR
3003 if (vops->inject_error &&
3004 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
3005 return -ENOSPC;
3006 #endif
3007
3008 return 0;
3009 }
3010
op_trace(struct xe_vma_op * op)3011 static void op_trace(struct xe_vma_op *op)
3012 {
3013 switch (op->base.op) {
3014 case DRM_GPUVA_OP_MAP:
3015 trace_xe_vma_bind(op->map.vma);
3016 break;
3017 case DRM_GPUVA_OP_REMAP:
3018 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
3019 if (op->remap.prev)
3020 trace_xe_vma_bind(op->remap.prev);
3021 if (op->remap.next)
3022 trace_xe_vma_bind(op->remap.next);
3023 break;
3024 case DRM_GPUVA_OP_UNMAP:
3025 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
3026 break;
3027 case DRM_GPUVA_OP_PREFETCH:
3028 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
3029 break;
3030 case DRM_GPUVA_OP_DRIVER:
3031 break;
3032 default:
3033 XE_WARN_ON("NOT POSSIBLE");
3034 }
3035 }
3036
trace_xe_vm_ops_execute(struct xe_vma_ops * vops)3037 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
3038 {
3039 struct xe_vma_op *op;
3040
3041 list_for_each_entry(op, &vops->list, link)
3042 op_trace(op);
3043 }
3044
vm_ops_setup_tile_args(struct xe_vm * vm,struct xe_vma_ops * vops)3045 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
3046 {
3047 struct xe_exec_queue *q = vops->q;
3048 struct xe_tile *tile;
3049 int number_tiles = 0;
3050 u8 id;
3051
3052 for_each_tile(tile, vm->xe, id) {
3053 if (vops->pt_update_ops[id].num_ops)
3054 ++number_tiles;
3055
3056 if (vops->pt_update_ops[id].q)
3057 continue;
3058
3059 if (q) {
3060 vops->pt_update_ops[id].q = q;
3061 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
3062 q = list_next_entry(q, multi_gt_list);
3063 } else {
3064 vops->pt_update_ops[id].q = vm->q[id];
3065 }
3066 }
3067
3068 return number_tiles;
3069 }
3070
ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3071 static struct dma_fence *ops_execute(struct xe_vm *vm,
3072 struct xe_vma_ops *vops)
3073 {
3074 struct xe_tile *tile;
3075 struct dma_fence *fence = NULL;
3076 struct dma_fence **fences = NULL;
3077 struct dma_fence_array *cf = NULL;
3078 int number_tiles = 0, current_fence = 0, err;
3079 u8 id;
3080
3081 number_tiles = vm_ops_setup_tile_args(vm, vops);
3082 if (number_tiles == 0)
3083 return ERR_PTR(-ENODATA);
3084
3085 if (number_tiles > 1) {
3086 fences = kmalloc_array(number_tiles, sizeof(*fences),
3087 GFP_KERNEL);
3088 if (!fences) {
3089 fence = ERR_PTR(-ENOMEM);
3090 goto err_trace;
3091 }
3092 }
3093
3094 for_each_tile(tile, vm->xe, id) {
3095 if (!vops->pt_update_ops[id].num_ops)
3096 continue;
3097
3098 err = xe_pt_update_ops_prepare(tile, vops);
3099 if (err) {
3100 fence = ERR_PTR(err);
3101 goto err_out;
3102 }
3103 }
3104
3105 trace_xe_vm_ops_execute(vops);
3106
3107 for_each_tile(tile, vm->xe, id) {
3108 if (!vops->pt_update_ops[id].num_ops)
3109 continue;
3110
3111 fence = xe_pt_update_ops_run(tile, vops);
3112 if (IS_ERR(fence))
3113 goto err_out;
3114
3115 if (fences)
3116 fences[current_fence++] = fence;
3117 }
3118
3119 if (fences) {
3120 cf = dma_fence_array_create(number_tiles, fences,
3121 vm->composite_fence_ctx,
3122 vm->composite_fence_seqno++,
3123 false);
3124 if (!cf) {
3125 --vm->composite_fence_seqno;
3126 fence = ERR_PTR(-ENOMEM);
3127 goto err_out;
3128 }
3129 fence = &cf->base;
3130 }
3131
3132 for_each_tile(tile, vm->xe, id) {
3133 if (!vops->pt_update_ops[id].num_ops)
3134 continue;
3135
3136 xe_pt_update_ops_fini(tile, vops);
3137 }
3138
3139 return fence;
3140
3141 err_out:
3142 for_each_tile(tile, vm->xe, id) {
3143 if (!vops->pt_update_ops[id].num_ops)
3144 continue;
3145
3146 xe_pt_update_ops_abort(tile, vops);
3147 }
3148 while (current_fence)
3149 dma_fence_put(fences[--current_fence]);
3150 kfree(fences);
3151 kfree(cf);
3152
3153 err_trace:
3154 trace_xe_vm_ops_fail(vm);
3155 return fence;
3156 }
3157
vma_add_ufence(struct xe_vma * vma,struct xe_user_fence * ufence)3158 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
3159 {
3160 if (vma->ufence)
3161 xe_sync_ufence_put(vma->ufence);
3162 vma->ufence = __xe_sync_ufence_get(ufence);
3163 }
3164
op_add_ufence(struct xe_vm * vm,struct xe_vma_op * op,struct xe_user_fence * ufence)3165 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
3166 struct xe_user_fence *ufence)
3167 {
3168 switch (op->base.op) {
3169 case DRM_GPUVA_OP_MAP:
3170 vma_add_ufence(op->map.vma, ufence);
3171 break;
3172 case DRM_GPUVA_OP_REMAP:
3173 if (op->remap.prev)
3174 vma_add_ufence(op->remap.prev, ufence);
3175 if (op->remap.next)
3176 vma_add_ufence(op->remap.next, ufence);
3177 break;
3178 case DRM_GPUVA_OP_UNMAP:
3179 break;
3180 case DRM_GPUVA_OP_PREFETCH:
3181 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
3182 break;
3183 default:
3184 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
3185 }
3186 }
3187
vm_bind_ioctl_ops_fini(struct xe_vm * vm,struct xe_vma_ops * vops,struct dma_fence * fence)3188 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
3189 struct dma_fence *fence)
3190 {
3191 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
3192 struct xe_user_fence *ufence;
3193 struct xe_vma_op *op;
3194 int i;
3195
3196 ufence = find_ufence_get(vops->syncs, vops->num_syncs);
3197 list_for_each_entry(op, &vops->list, link) {
3198 if (ufence)
3199 op_add_ufence(vm, op, ufence);
3200
3201 if (op->base.op == DRM_GPUVA_OP_UNMAP)
3202 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
3203 else if (op->base.op == DRM_GPUVA_OP_REMAP)
3204 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
3205 fence);
3206 }
3207 if (ufence)
3208 xe_sync_ufence_put(ufence);
3209 if (fence) {
3210 for (i = 0; i < vops->num_syncs; i++)
3211 xe_sync_entry_signal(vops->syncs + i, fence);
3212 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
3213 }
3214 }
3215
vm_bind_ioctl_ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3216 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
3217 struct xe_vma_ops *vops)
3218 {
3219 struct xe_validation_ctx ctx;
3220 struct drm_exec exec;
3221 struct dma_fence *fence;
3222 int err = 0;
3223
3224 lockdep_assert_held_write(&vm->lock);
3225
3226 xe_validation_guard(&ctx, &vm->xe->val, &exec,
3227 ((struct xe_val_flags) {
3228 .interruptible = true,
3229 .exec_ignore_duplicates = true,
3230 }), err) {
3231 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
3232 drm_exec_retry_on_contention(&exec);
3233 xe_validation_retry_on_oom(&ctx, &err);
3234 if (err)
3235 return ERR_PTR(err);
3236
3237 xe_vm_set_validation_exec(vm, &exec);
3238 fence = ops_execute(vm, vops);
3239 xe_vm_set_validation_exec(vm, NULL);
3240 if (IS_ERR(fence)) {
3241 if (PTR_ERR(fence) == -ENODATA)
3242 vm_bind_ioctl_ops_fini(vm, vops, NULL);
3243 return fence;
3244 }
3245
3246 vm_bind_ioctl_ops_fini(vm, vops, fence);
3247 }
3248
3249 return err ? ERR_PTR(err) : fence;
3250 }
3251 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
3252
3253 #define SUPPORTED_FLAGS_STUB \
3254 (DRM_XE_VM_BIND_FLAG_READONLY | \
3255 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
3256 DRM_XE_VM_BIND_FLAG_NULL | \
3257 DRM_XE_VM_BIND_FLAG_DUMPABLE | \
3258 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \
3259 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \
3260 DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET)
3261
3262 #ifdef TEST_VM_OPS_ERROR
3263 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
3264 #else
3265 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB
3266 #endif
3267
3268 #define XE_64K_PAGE_MASK 0xffffull
3269 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
3270
vm_bind_ioctl_check_args(struct xe_device * xe,struct xe_vm * vm,struct drm_xe_vm_bind * args,struct drm_xe_vm_bind_op ** bind_ops)3271 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
3272 struct drm_xe_vm_bind *args,
3273 struct drm_xe_vm_bind_op **bind_ops)
3274 {
3275 int err;
3276 int i;
3277
3278 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
3279 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3280 return -EINVAL;
3281
3282 if (XE_IOCTL_DBG(xe, args->extensions))
3283 return -EINVAL;
3284
3285 if (args->num_binds > 1) {
3286 u64 __user *bind_user =
3287 u64_to_user_ptr(args->vector_of_binds);
3288
3289 *bind_ops = kvmalloc_array(args->num_binds,
3290 sizeof(struct drm_xe_vm_bind_op),
3291 GFP_KERNEL | __GFP_ACCOUNT |
3292 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3293 if (!*bind_ops)
3294 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
3295
3296 err = copy_from_user(*bind_ops, bind_user,
3297 sizeof(struct drm_xe_vm_bind_op) *
3298 args->num_binds);
3299 if (XE_IOCTL_DBG(xe, err)) {
3300 err = -EFAULT;
3301 goto free_bind_ops;
3302 }
3303 } else {
3304 *bind_ops = &args->bind;
3305 }
3306
3307 for (i = 0; i < args->num_binds; ++i) {
3308 u64 range = (*bind_ops)[i].range;
3309 u64 addr = (*bind_ops)[i].addr;
3310 u32 op = (*bind_ops)[i].op;
3311 u32 flags = (*bind_ops)[i].flags;
3312 u32 obj = (*bind_ops)[i].obj;
3313 u64 obj_offset = (*bind_ops)[i].obj_offset;
3314 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
3315 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
3316 bool is_cpu_addr_mirror = flags &
3317 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
3318 u16 pat_index = (*bind_ops)[i].pat_index;
3319 u16 coh_mode;
3320
3321 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror &&
3322 (!xe_vm_in_fault_mode(vm) ||
3323 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) {
3324 err = -EINVAL;
3325 goto free_bind_ops;
3326 }
3327
3328 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
3329 err = -EINVAL;
3330 goto free_bind_ops;
3331 }
3332
3333 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
3334 (*bind_ops)[i].pat_index = pat_index;
3335 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3336 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
3337 err = -EINVAL;
3338 goto free_bind_ops;
3339 }
3340
3341 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
3342 err = -EINVAL;
3343 goto free_bind_ops;
3344 }
3345
3346 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
3347 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
3348 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) ||
3349 XE_IOCTL_DBG(xe, obj_offset && (is_null ||
3350 is_cpu_addr_mirror)) ||
3351 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
3352 (is_null || is_cpu_addr_mirror)) ||
3353 XE_IOCTL_DBG(xe, !obj &&
3354 op == DRM_XE_VM_BIND_OP_MAP &&
3355 !is_null && !is_cpu_addr_mirror) ||
3356 XE_IOCTL_DBG(xe, !obj &&
3357 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3358 XE_IOCTL_DBG(xe, addr &&
3359 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3360 XE_IOCTL_DBG(xe, range &&
3361 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3362 XE_IOCTL_DBG(xe, obj &&
3363 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3364 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3365 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3366 XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR &&
3367 !IS_ENABLED(CONFIG_DRM_GPUSVM)) ||
3368 XE_IOCTL_DBG(xe, obj &&
3369 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
3370 XE_IOCTL_DBG(xe, prefetch_region &&
3371 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
3372 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC &&
3373 !(BIT(prefetch_region) & xe->info.mem_region_mask))) ||
3374 XE_IOCTL_DBG(xe, obj &&
3375 op == DRM_XE_VM_BIND_OP_UNMAP) ||
3376 XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) &&
3377 (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) {
3378 err = -EINVAL;
3379 goto free_bind_ops;
3380 }
3381
3382 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
3383 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
3384 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
3385 XE_IOCTL_DBG(xe, !range &&
3386 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
3387 err = -EINVAL;
3388 goto free_bind_ops;
3389 }
3390 }
3391
3392 return 0;
3393
3394 free_bind_ops:
3395 if (args->num_binds > 1)
3396 kvfree(*bind_ops);
3397 *bind_ops = NULL;
3398 return err;
3399 }
3400
vm_bind_ioctl_signal_fences(struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,int num_syncs)3401 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
3402 struct xe_exec_queue *q,
3403 struct xe_sync_entry *syncs,
3404 int num_syncs)
3405 {
3406 struct dma_fence *fence;
3407 int i, err = 0;
3408
3409 fence = xe_sync_in_fence_get(syncs, num_syncs,
3410 to_wait_exec_queue(vm, q), vm);
3411 if (IS_ERR(fence))
3412 return PTR_ERR(fence);
3413
3414 for (i = 0; i < num_syncs; i++)
3415 xe_sync_entry_signal(&syncs[i], fence);
3416
3417 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
3418 fence);
3419 dma_fence_put(fence);
3420
3421 return err;
3422 }
3423
xe_vma_ops_init(struct xe_vma_ops * vops,struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,u32 num_syncs)3424 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
3425 struct xe_exec_queue *q,
3426 struct xe_sync_entry *syncs, u32 num_syncs)
3427 {
3428 memset(vops, 0, sizeof(*vops));
3429 INIT_LIST_HEAD(&vops->list);
3430 vops->vm = vm;
3431 vops->q = q;
3432 vops->syncs = syncs;
3433 vops->num_syncs = num_syncs;
3434 vops->flags = 0;
3435 }
3436
xe_vm_bind_ioctl_validate_bo(struct xe_device * xe,struct xe_bo * bo,u64 addr,u64 range,u64 obj_offset,u16 pat_index,u32 op,u32 bind_flags)3437 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
3438 u64 addr, u64 range, u64 obj_offset,
3439 u16 pat_index, u32 op, u32 bind_flags)
3440 {
3441 u16 coh_mode;
3442
3443 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) ||
3444 XE_IOCTL_DBG(xe, obj_offset >
3445 xe_bo_size(bo) - range)) {
3446 return -EINVAL;
3447 }
3448
3449 /*
3450 * Some platforms require 64k VM_BIND alignment,
3451 * specifically those with XE_VRAM_FLAGS_NEED64K.
3452 *
3453 * Other platforms may have BO's set to 64k physical placement,
3454 * but can be mapped at 4k offsets anyway. This check is only
3455 * there for the former case.
3456 */
3457 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
3458 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
3459 if (XE_IOCTL_DBG(xe, obj_offset &
3460 XE_64K_PAGE_MASK) ||
3461 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
3462 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3463 return -EINVAL;
3464 }
3465 }
3466
3467 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3468 if (bo->cpu_caching) {
3469 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3470 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
3471 return -EINVAL;
3472 }
3473 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
3474 /*
3475 * Imported dma-buf from a different device should
3476 * require 1way or 2way coherency since we don't know
3477 * how it was mapped on the CPU. Just assume is it
3478 * potentially cached on CPU side.
3479 */
3480 return -EINVAL;
3481 }
3482
3483 /* If a BO is protected it can only be mapped if the key is still valid */
3484 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) &&
3485 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL)
3486 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0))
3487 return -ENOEXEC;
3488
3489 return 0;
3490 }
3491
xe_vm_bind_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3492 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3493 {
3494 struct xe_device *xe = to_xe_device(dev);
3495 struct xe_file *xef = to_xe_file(file);
3496 struct drm_xe_vm_bind *args = data;
3497 struct drm_xe_sync __user *syncs_user;
3498 struct xe_bo **bos = NULL;
3499 struct drm_gpuva_ops **ops = NULL;
3500 struct xe_vm *vm;
3501 struct xe_exec_queue *q = NULL;
3502 u32 num_syncs, num_ufence = 0;
3503 struct xe_sync_entry *syncs = NULL;
3504 struct drm_xe_vm_bind_op *bind_ops = NULL;
3505 struct xe_vma_ops vops;
3506 struct dma_fence *fence;
3507 int err;
3508 int i;
3509
3510 vm = xe_vm_lookup(xef, args->vm_id);
3511 if (XE_IOCTL_DBG(xe, !vm))
3512 return -EINVAL;
3513
3514 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops);
3515 if (err)
3516 goto put_vm;
3517
3518 if (args->exec_queue_id) {
3519 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
3520 if (XE_IOCTL_DBG(xe, !q)) {
3521 err = -ENOENT;
3522 goto free_bind_ops;
3523 }
3524
3525 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3526 err = -EINVAL;
3527 goto put_exec_queue;
3528 }
3529 }
3530
3531 /* Ensure all UNMAPs visible */
3532 xe_svm_flush(vm);
3533
3534 err = down_write_killable(&vm->lock);
3535 if (err)
3536 goto put_exec_queue;
3537
3538 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3539 err = -ENOENT;
3540 goto release_vm_lock;
3541 }
3542
3543 for (i = 0; i < args->num_binds; ++i) {
3544 u64 range = bind_ops[i].range;
3545 u64 addr = bind_ops[i].addr;
3546
3547 if (XE_IOCTL_DBG(xe, range > vm->size) ||
3548 XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3549 err = -EINVAL;
3550 goto release_vm_lock;
3551 }
3552 }
3553
3554 if (args->num_binds) {
3555 bos = kvcalloc(args->num_binds, sizeof(*bos),
3556 GFP_KERNEL | __GFP_ACCOUNT |
3557 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3558 if (!bos) {
3559 err = -ENOMEM;
3560 goto release_vm_lock;
3561 }
3562
3563 ops = kvcalloc(args->num_binds, sizeof(*ops),
3564 GFP_KERNEL | __GFP_ACCOUNT |
3565 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3566 if (!ops) {
3567 err = -ENOMEM;
3568 goto free_bos;
3569 }
3570 }
3571
3572 for (i = 0; i < args->num_binds; ++i) {
3573 struct drm_gem_object *gem_obj;
3574 u64 range = bind_ops[i].range;
3575 u64 addr = bind_ops[i].addr;
3576 u32 obj = bind_ops[i].obj;
3577 u64 obj_offset = bind_ops[i].obj_offset;
3578 u16 pat_index = bind_ops[i].pat_index;
3579 u32 op = bind_ops[i].op;
3580 u32 bind_flags = bind_ops[i].flags;
3581
3582 if (!obj)
3583 continue;
3584
3585 gem_obj = drm_gem_object_lookup(file, obj);
3586 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3587 err = -ENOENT;
3588 goto put_obj;
3589 }
3590 bos[i] = gem_to_xe_bo(gem_obj);
3591
3592 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
3593 obj_offset, pat_index, op,
3594 bind_flags);
3595 if (err)
3596 goto put_obj;
3597 }
3598
3599 if (args->num_syncs) {
3600 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3601 if (!syncs) {
3602 err = -ENOMEM;
3603 goto put_obj;
3604 }
3605 }
3606
3607 syncs_user = u64_to_user_ptr(args->syncs);
3608 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3609 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3610 &syncs_user[num_syncs],
3611 (xe_vm_in_lr_mode(vm) ?
3612 SYNC_PARSE_FLAG_LR_MODE : 0) |
3613 (!args->num_binds ?
3614 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3615 if (err)
3616 goto free_syncs;
3617
3618 if (xe_sync_is_ufence(&syncs[num_syncs]))
3619 num_ufence++;
3620 }
3621
3622 if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3623 err = -EINVAL;
3624 goto free_syncs;
3625 }
3626
3627 if (!args->num_binds) {
3628 err = -ENODATA;
3629 goto free_syncs;
3630 }
3631
3632 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
3633 if (args->num_binds > 1)
3634 vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS;
3635 for (i = 0; i < args->num_binds; ++i) {
3636 u64 range = bind_ops[i].range;
3637 u64 addr = bind_ops[i].addr;
3638 u32 op = bind_ops[i].op;
3639 u32 flags = bind_ops[i].flags;
3640 u64 obj_offset = bind_ops[i].obj_offset;
3641 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3642 u16 pat_index = bind_ops[i].pat_index;
3643
3644 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset,
3645 addr, range, op, flags,
3646 prefetch_region, pat_index);
3647 if (IS_ERR(ops[i])) {
3648 err = PTR_ERR(ops[i]);
3649 ops[i] = NULL;
3650 goto unwind_ops;
3651 }
3652
3653 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
3654 if (err)
3655 goto unwind_ops;
3656
3657 #ifdef TEST_VM_OPS_ERROR
3658 if (flags & FORCE_OP_ERROR) {
3659 vops.inject_error = true;
3660 vm->xe->vm_inject_error_position =
3661 (vm->xe->vm_inject_error_position + 1) %
3662 FORCE_OP_ERROR_COUNT;
3663 }
3664 #endif
3665 }
3666
3667 /* Nothing to do */
3668 if (list_empty(&vops.list)) {
3669 err = -ENODATA;
3670 goto unwind_ops;
3671 }
3672
3673 err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
3674 if (err)
3675 goto unwind_ops;
3676
3677 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops);
3678 if (err)
3679 goto unwind_ops;
3680
3681 fence = vm_bind_ioctl_ops_execute(vm, &vops);
3682 if (IS_ERR(fence))
3683 err = PTR_ERR(fence);
3684 else
3685 dma_fence_put(fence);
3686
3687 unwind_ops:
3688 if (err && err != -ENODATA)
3689 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3690 xe_vma_ops_fini(&vops);
3691 for (i = args->num_binds - 1; i >= 0; --i)
3692 if (ops[i])
3693 drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
3694 free_syncs:
3695 if (err == -ENODATA)
3696 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3697 while (num_syncs--)
3698 xe_sync_entry_cleanup(&syncs[num_syncs]);
3699
3700 kfree(syncs);
3701 put_obj:
3702 for (i = 0; i < args->num_binds; ++i)
3703 xe_bo_put(bos[i]);
3704
3705 kvfree(ops);
3706 free_bos:
3707 kvfree(bos);
3708 release_vm_lock:
3709 up_write(&vm->lock);
3710 put_exec_queue:
3711 if (q)
3712 xe_exec_queue_put(q);
3713 free_bind_ops:
3714 if (args->num_binds > 1)
3715 kvfree(bind_ops);
3716 put_vm:
3717 xe_vm_put(vm);
3718 return err;
3719 }
3720
3721 /**
3722 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM
3723 * @vm: VM to bind the BO to
3724 * @bo: BO to bind
3725 * @q: exec queue to use for the bind (optional)
3726 * @addr: address at which to bind the BO
3727 * @cache_lvl: PAT cache level to use
3728 *
3729 * Execute a VM bind map operation on a kernel-owned BO to bind it into a
3730 * kernel-owned VM.
3731 *
3732 * Returns a dma_fence to track the binding completion if the job to do so was
3733 * successfully submitted, an error pointer otherwise.
3734 */
xe_vm_bind_kernel_bo(struct xe_vm * vm,struct xe_bo * bo,struct xe_exec_queue * q,u64 addr,enum xe_cache_level cache_lvl)3735 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
3736 struct xe_exec_queue *q, u64 addr,
3737 enum xe_cache_level cache_lvl)
3738 {
3739 struct xe_vma_ops vops;
3740 struct drm_gpuva_ops *ops = NULL;
3741 struct dma_fence *fence;
3742 int err;
3743
3744 xe_bo_get(bo);
3745 xe_vm_get(vm);
3746 if (q)
3747 xe_exec_queue_get(q);
3748
3749 down_write(&vm->lock);
3750
3751 xe_vma_ops_init(&vops, vm, q, NULL, 0);
3752
3753 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo),
3754 DRM_XE_VM_BIND_OP_MAP, 0, 0,
3755 vm->xe->pat.idx[cache_lvl]);
3756 if (IS_ERR(ops)) {
3757 err = PTR_ERR(ops);
3758 goto release_vm_lock;
3759 }
3760
3761 err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
3762 if (err)
3763 goto release_vm_lock;
3764
3765 xe_assert(vm->xe, !list_empty(&vops.list));
3766
3767 err = xe_vma_ops_alloc(&vops, false);
3768 if (err)
3769 goto unwind_ops;
3770
3771 fence = vm_bind_ioctl_ops_execute(vm, &vops);
3772 if (IS_ERR(fence))
3773 err = PTR_ERR(fence);
3774
3775 unwind_ops:
3776 if (err && err != -ENODATA)
3777 vm_bind_ioctl_ops_unwind(vm, &ops, 1);
3778
3779 xe_vma_ops_fini(&vops);
3780 drm_gpuva_ops_free(&vm->gpuvm, ops);
3781
3782 release_vm_lock:
3783 up_write(&vm->lock);
3784
3785 if (q)
3786 xe_exec_queue_put(q);
3787 xe_vm_put(vm);
3788 xe_bo_put(bo);
3789
3790 if (err)
3791 fence = ERR_PTR(err);
3792
3793 return fence;
3794 }
3795
3796 /**
3797 * xe_vm_lock() - Lock the vm's dma_resv object
3798 * @vm: The struct xe_vm whose lock is to be locked
3799 * @intr: Whether to perform any wait interruptible
3800 *
3801 * Return: 0 on success, -EINTR if @intr is true and the wait for a
3802 * contended lock was interrupted. If @intr is false, the function
3803 * always returns 0.
3804 */
xe_vm_lock(struct xe_vm * vm,bool intr)3805 int xe_vm_lock(struct xe_vm *vm, bool intr)
3806 {
3807 int ret;
3808
3809 if (intr)
3810 ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3811 else
3812 ret = dma_resv_lock(xe_vm_resv(vm), NULL);
3813
3814 return ret;
3815 }
3816
3817 /**
3818 * xe_vm_unlock() - Unlock the vm's dma_resv object
3819 * @vm: The struct xe_vm whose lock is to be released.
3820 *
3821 * Unlock a buffer object lock that was locked by xe_vm_lock().
3822 */
xe_vm_unlock(struct xe_vm * vm)3823 void xe_vm_unlock(struct xe_vm *vm)
3824 {
3825 dma_resv_unlock(xe_vm_resv(vm));
3826 }
3827
3828 /**
3829 * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an
3830 * address range
3831 * @vm: The VM
3832 * @start: start address
3833 * @end: end address
3834 * @tile_mask: mask for which gt's issue tlb invalidation
3835 *
3836 * Issue a range based TLB invalidation for gt's in tilemask
3837 *
3838 * Returns 0 for success, negative error code otherwise.
3839 */
xe_vm_range_tilemask_tlb_inval(struct xe_vm * vm,u64 start,u64 end,u8 tile_mask)3840 int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
3841 u64 end, u8 tile_mask)
3842 {
3843 struct xe_tlb_inval_fence
3844 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
3845 struct xe_tile *tile;
3846 u32 fence_id = 0;
3847 u8 id;
3848 int err;
3849
3850 if (!tile_mask)
3851 return 0;
3852
3853 for_each_tile(tile, vm->xe, id) {
3854 if (!(tile_mask & BIT(id)))
3855 continue;
3856
3857 xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval,
3858 &fence[fence_id], true);
3859
3860 err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval,
3861 &fence[fence_id], start, end,
3862 vm->usm.asid);
3863 if (err)
3864 goto wait;
3865 ++fence_id;
3866
3867 if (!tile->media_gt)
3868 continue;
3869
3870 xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval,
3871 &fence[fence_id], true);
3872
3873 err = xe_tlb_inval_range(&tile->media_gt->tlb_inval,
3874 &fence[fence_id], start, end,
3875 vm->usm.asid);
3876 if (err)
3877 goto wait;
3878 ++fence_id;
3879 }
3880
3881 wait:
3882 for (id = 0; id < fence_id; ++id)
3883 xe_tlb_inval_fence_wait(&fence[id]);
3884
3885 return err;
3886 }
3887
3888 /**
3889 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3890 * @vma: VMA to invalidate
3891 *
3892 * Walks a list of page tables leaves which it memset the entries owned by this
3893 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3894 * complete.
3895 *
3896 * Returns 0 for success, negative error code otherwise.
3897 */
xe_vm_invalidate_vma(struct xe_vma * vma)3898 int xe_vm_invalidate_vma(struct xe_vma *vma)
3899 {
3900 struct xe_device *xe = xe_vma_vm(vma)->xe;
3901 struct xe_vm *vm = xe_vma_vm(vma);
3902 struct xe_tile *tile;
3903 u8 tile_mask = 0;
3904 int ret = 0;
3905 u8 id;
3906
3907 xe_assert(xe, !xe_vma_is_null(vma));
3908 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma));
3909 trace_xe_vma_invalidate(vma);
3910
3911 vm_dbg(&vm->xe->drm,
3912 "INVALIDATE: addr=0x%016llx, range=0x%016llx",
3913 xe_vma_start(vma), xe_vma_size(vma));
3914
3915 /*
3916 * Check that we don't race with page-table updates, tile_invalidated
3917 * update is safe
3918 */
3919 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3920 if (xe_vma_is_userptr(vma)) {
3921 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) ||
3922 (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) &&
3923 lockdep_is_held(&xe_vm_resv(vm)->lock.base)));
3924
3925 WARN_ON_ONCE(!mmu_interval_check_retry
3926 (&to_userptr_vma(vma)->userptr.notifier,
3927 to_userptr_vma(vma)->userptr.pages.notifier_seq));
3928 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm),
3929 DMA_RESV_USAGE_BOOKKEEP));
3930
3931 } else {
3932 xe_bo_assert_held(xe_vma_bo(vma));
3933 }
3934 }
3935
3936 for_each_tile(tile, xe, id)
3937 if (xe_pt_zap_ptes(tile, vma))
3938 tile_mask |= BIT(id);
3939
3940 xe_device_wmb(xe);
3941
3942 ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma),
3943 xe_vma_end(vma), tile_mask);
3944
3945 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
3946 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask);
3947
3948 return ret;
3949 }
3950
xe_vm_validate_protected(struct xe_vm * vm)3951 int xe_vm_validate_protected(struct xe_vm *vm)
3952 {
3953 struct drm_gpuva *gpuva;
3954 int err = 0;
3955
3956 if (!vm)
3957 return -ENODEV;
3958
3959 mutex_lock(&vm->snap_mutex);
3960
3961 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3962 struct xe_vma *vma = gpuva_to_vma(gpuva);
3963 struct xe_bo *bo = vma->gpuva.gem.obj ?
3964 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
3965
3966 if (!bo)
3967 continue;
3968
3969 if (xe_bo_is_protected(bo)) {
3970 err = xe_pxp_bo_key_check(vm->xe->pxp, bo);
3971 if (err)
3972 break;
3973 }
3974 }
3975
3976 mutex_unlock(&vm->snap_mutex);
3977 return err;
3978 }
3979
3980 struct xe_vm_snapshot {
3981 unsigned long num_snaps;
3982 struct {
3983 u64 ofs, bo_ofs;
3984 unsigned long len;
3985 struct xe_bo *bo;
3986 void *data;
3987 struct mm_struct *mm;
3988 } snap[];
3989 };
3990
xe_vm_snapshot_capture(struct xe_vm * vm)3991 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
3992 {
3993 unsigned long num_snaps = 0, i;
3994 struct xe_vm_snapshot *snap = NULL;
3995 struct drm_gpuva *gpuva;
3996
3997 if (!vm)
3998 return NULL;
3999
4000 mutex_lock(&vm->snap_mutex);
4001 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4002 if (gpuva->flags & XE_VMA_DUMPABLE)
4003 num_snaps++;
4004 }
4005
4006 if (num_snaps)
4007 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
4008 if (!snap) {
4009 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
4010 goto out_unlock;
4011 }
4012
4013 snap->num_snaps = num_snaps;
4014 i = 0;
4015 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4016 struct xe_vma *vma = gpuva_to_vma(gpuva);
4017 struct xe_bo *bo = vma->gpuva.gem.obj ?
4018 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4019
4020 if (!(gpuva->flags & XE_VMA_DUMPABLE))
4021 continue;
4022
4023 snap->snap[i].ofs = xe_vma_start(vma);
4024 snap->snap[i].len = xe_vma_size(vma);
4025 if (bo) {
4026 snap->snap[i].bo = xe_bo_get(bo);
4027 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
4028 } else if (xe_vma_is_userptr(vma)) {
4029 struct mm_struct *mm =
4030 to_userptr_vma(vma)->userptr.notifier.mm;
4031
4032 if (mmget_not_zero(mm))
4033 snap->snap[i].mm = mm;
4034 else
4035 snap->snap[i].data = ERR_PTR(-EFAULT);
4036
4037 snap->snap[i].bo_ofs = xe_vma_userptr(vma);
4038 } else {
4039 snap->snap[i].data = ERR_PTR(-ENOENT);
4040 }
4041 i++;
4042 }
4043
4044 out_unlock:
4045 mutex_unlock(&vm->snap_mutex);
4046 return snap;
4047 }
4048
xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot * snap)4049 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
4050 {
4051 if (IS_ERR_OR_NULL(snap))
4052 return;
4053
4054 for (int i = 0; i < snap->num_snaps; i++) {
4055 struct xe_bo *bo = snap->snap[i].bo;
4056 int err;
4057
4058 if (IS_ERR(snap->snap[i].data))
4059 continue;
4060
4061 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
4062 if (!snap->snap[i].data) {
4063 snap->snap[i].data = ERR_PTR(-ENOMEM);
4064 goto cleanup_bo;
4065 }
4066
4067 if (bo) {
4068 err = xe_bo_read(bo, snap->snap[i].bo_ofs,
4069 snap->snap[i].data, snap->snap[i].len);
4070 } else {
4071 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
4072
4073 kthread_use_mm(snap->snap[i].mm);
4074 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
4075 err = 0;
4076 else
4077 err = -EFAULT;
4078 kthread_unuse_mm(snap->snap[i].mm);
4079
4080 mmput(snap->snap[i].mm);
4081 snap->snap[i].mm = NULL;
4082 }
4083
4084 if (err) {
4085 kvfree(snap->snap[i].data);
4086 snap->snap[i].data = ERR_PTR(err);
4087 }
4088
4089 cleanup_bo:
4090 xe_bo_put(bo);
4091 snap->snap[i].bo = NULL;
4092 }
4093 }
4094
xe_vm_snapshot_print(struct xe_vm_snapshot * snap,struct drm_printer * p)4095 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
4096 {
4097 unsigned long i, j;
4098
4099 if (IS_ERR_OR_NULL(snap)) {
4100 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
4101 return;
4102 }
4103
4104 for (i = 0; i < snap->num_snaps; i++) {
4105 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
4106
4107 if (IS_ERR(snap->snap[i].data)) {
4108 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
4109 PTR_ERR(snap->snap[i].data));
4110 continue;
4111 }
4112
4113 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
4114
4115 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
4116 u32 *val = snap->snap[i].data + j;
4117 char dumped[ASCII85_BUFSZ];
4118
4119 drm_puts(p, ascii85_encode(*val, dumped));
4120 }
4121
4122 drm_puts(p, "\n");
4123
4124 if (drm_coredump_printer_is_full(p))
4125 return;
4126 }
4127 }
4128
xe_vm_snapshot_free(struct xe_vm_snapshot * snap)4129 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
4130 {
4131 unsigned long i;
4132
4133 if (IS_ERR_OR_NULL(snap))
4134 return;
4135
4136 for (i = 0; i < snap->num_snaps; i++) {
4137 if (!IS_ERR(snap->snap[i].data))
4138 kvfree(snap->snap[i].data);
4139 xe_bo_put(snap->snap[i].bo);
4140 if (snap->snap[i].mm)
4141 mmput(snap->snap[i].mm);
4142 }
4143 kvfree(snap);
4144 }
4145
4146 /**
4147 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations
4148 * @xe: Pointer to the XE device structure
4149 * @vma: Pointer to the virtual memory area (VMA) structure
4150 * @is_atomic: In pagefault path and atomic operation
4151 *
4152 * This function determines whether the given VMA needs to be migrated to
4153 * VRAM in order to do atomic GPU operation.
4154 *
4155 * Return:
4156 * 1 - Migration to VRAM is required
4157 * 0 - Migration is not required
4158 * -EACCES - Invalid access for atomic memory attr
4159 *
4160 */
xe_vma_need_vram_for_atomic(struct xe_device * xe,struct xe_vma * vma,bool is_atomic)4161 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic)
4162 {
4163 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access :
4164 vma->attr.atomic_access;
4165
4166 if (!IS_DGFX(xe) || !is_atomic)
4167 return false;
4168
4169 /*
4170 * NOTE: The checks implemented here are platform-specific. For
4171 * instance, on a device supporting CXL atomics, these would ideally
4172 * work universally without additional handling.
4173 */
4174 switch (atomic_access) {
4175 case DRM_XE_ATOMIC_DEVICE:
4176 return !xe->info.has_device_atomics_on_smem;
4177
4178 case DRM_XE_ATOMIC_CPU:
4179 return -EACCES;
4180
4181 case DRM_XE_ATOMIC_UNDEFINED:
4182 case DRM_XE_ATOMIC_GLOBAL:
4183 default:
4184 return 1;
4185 }
4186 }
4187
xe_vm_alloc_vma(struct xe_vm * vm,struct drm_gpuvm_map_req * map_req,bool is_madvise)4188 static int xe_vm_alloc_vma(struct xe_vm *vm,
4189 struct drm_gpuvm_map_req *map_req,
4190 bool is_madvise)
4191 {
4192 struct xe_vma_ops vops;
4193 struct drm_gpuva_ops *ops = NULL;
4194 struct drm_gpuva_op *__op;
4195 unsigned int vma_flags = 0;
4196 bool remap_op = false;
4197 struct xe_vma_mem_attr tmp_attr;
4198 u16 default_pat;
4199 int err;
4200
4201 lockdep_assert_held_write(&vm->lock);
4202
4203 if (is_madvise)
4204 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req);
4205 else
4206 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req);
4207
4208 if (IS_ERR(ops))
4209 return PTR_ERR(ops);
4210
4211 if (list_empty(&ops->list)) {
4212 err = 0;
4213 goto free_ops;
4214 }
4215
4216 drm_gpuva_for_each_op(__op, ops) {
4217 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
4218 struct xe_vma *vma = NULL;
4219
4220 if (!is_madvise) {
4221 if (__op->op == DRM_GPUVA_OP_UNMAP) {
4222 vma = gpuva_to_vma(op->base.unmap.va);
4223 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma));
4224 default_pat = vma->attr.default_pat_index;
4225 vma_flags = vma->gpuva.flags;
4226 }
4227
4228 if (__op->op == DRM_GPUVA_OP_REMAP) {
4229 vma = gpuva_to_vma(op->base.remap.unmap->va);
4230 default_pat = vma->attr.default_pat_index;
4231 vma_flags = vma->gpuva.flags;
4232 }
4233
4234 if (__op->op == DRM_GPUVA_OP_MAP) {
4235 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK;
4236 op->map.pat_index = default_pat;
4237 }
4238 } else {
4239 if (__op->op == DRM_GPUVA_OP_REMAP) {
4240 vma = gpuva_to_vma(op->base.remap.unmap->va);
4241 xe_assert(vm->xe, !remap_op);
4242 xe_assert(vm->xe, xe_vma_has_no_bo(vma));
4243 remap_op = true;
4244 vma_flags = vma->gpuva.flags;
4245 }
4246
4247 if (__op->op == DRM_GPUVA_OP_MAP) {
4248 xe_assert(vm->xe, remap_op);
4249 remap_op = false;
4250 /*
4251 * In case of madvise ops DRM_GPUVA_OP_MAP is
4252 * always after DRM_GPUVA_OP_REMAP, so ensure
4253 * to propagate the flags from the vma we're
4254 * unmapping.
4255 */
4256 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK;
4257 }
4258 }
4259 print_op(vm->xe, __op);
4260 }
4261
4262 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
4263
4264 if (is_madvise)
4265 vops.flags |= XE_VMA_OPS_FLAG_MADVISE;
4266
4267 err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
4268 if (err)
4269 goto unwind_ops;
4270
4271 xe_vm_lock(vm, false);
4272
4273 drm_gpuva_for_each_op(__op, ops) {
4274 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
4275 struct xe_vma *vma;
4276
4277 if (__op->op == DRM_GPUVA_OP_UNMAP) {
4278 vma = gpuva_to_vma(op->base.unmap.va);
4279 /* There should be no unmap for madvise */
4280 if (is_madvise)
4281 XE_WARN_ON("UNEXPECTED UNMAP");
4282
4283 xe_vma_destroy(vma, NULL);
4284 } else if (__op->op == DRM_GPUVA_OP_REMAP) {
4285 vma = gpuva_to_vma(op->base.remap.unmap->va);
4286 /* In case of madvise ops Store attributes for REMAP UNMAPPED
4287 * VMA, so they can be assigned to newly MAP created vma.
4288 */
4289 if (is_madvise)
4290 tmp_attr = vma->attr;
4291
4292 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL);
4293 } else if (__op->op == DRM_GPUVA_OP_MAP) {
4294 vma = op->map.vma;
4295 /* In case of madvise call, MAP will always be follwed by REMAP.
4296 * Therefore temp_attr will always have sane values, making it safe to
4297 * copy them to new vma.
4298 */
4299 if (is_madvise)
4300 vma->attr = tmp_attr;
4301 }
4302 }
4303
4304 xe_vm_unlock(vm);
4305 drm_gpuva_ops_free(&vm->gpuvm, ops);
4306 return 0;
4307
4308 unwind_ops:
4309 vm_bind_ioctl_ops_unwind(vm, &ops, 1);
4310 free_ops:
4311 drm_gpuva_ops_free(&vm->gpuvm, ops);
4312 return err;
4313 }
4314
4315 /**
4316 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops
4317 * @vm: Pointer to the xe_vm structure
4318 * @start: Starting input address
4319 * @range: Size of the input range
4320 *
4321 * This function splits existing vma to create new vma for user provided input range
4322 *
4323 * Return: 0 if success
4324 */
xe_vm_alloc_madvise_vma(struct xe_vm * vm,uint64_t start,uint64_t range)4325 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4326 {
4327 struct drm_gpuvm_map_req map_req = {
4328 .map.va.addr = start,
4329 .map.va.range = range,
4330 };
4331
4332 lockdep_assert_held_write(&vm->lock);
4333
4334 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range);
4335
4336 return xe_vm_alloc_vma(vm, &map_req, true);
4337 }
4338
4339 /**
4340 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma
4341 * @vm: Pointer to the xe_vm structure
4342 * @start: Starting input address
4343 * @range: Size of the input range
4344 *
4345 * This function splits/merges existing vma to create new vma for user provided input range
4346 *
4347 * Return: 0 if success
4348 */
xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm * vm,uint64_t start,uint64_t range)4349 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4350 {
4351 struct drm_gpuvm_map_req map_req = {
4352 .map.va.addr = start,
4353 .map.va.range = range,
4354 };
4355
4356 lockdep_assert_held_write(&vm->lock);
4357
4358 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx",
4359 start, range);
4360
4361 return xe_vm_alloc_vma(vm, &map_req, false);
4362 }
4363