1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10
11 #include <drm/drm_drv.h>
12 #include <drm/drm_exec.h>
13 #include <drm/drm_print.h>
14 #include <drm/ttm/ttm_tt.h>
15 #include <uapi/drm/xe_drm.h>
16 #include <linux/ascii85.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/mm.h>
20 #include <linux/swap.h>
21
22 #include <generated/xe_wa_oob.h>
23
24 #include "regs/xe_gtt_defs.h"
25 #include "xe_assert.h"
26 #include "xe_bo.h"
27 #include "xe_device.h"
28 #include "xe_drm_client.h"
29 #include "xe_exec_queue.h"
30 #include "xe_gt_pagefault.h"
31 #include "xe_migrate.h"
32 #include "xe_pat.h"
33 #include "xe_pm.h"
34 #include "xe_preempt_fence.h"
35 #include "xe_pt.h"
36 #include "xe_pxp.h"
37 #include "xe_res_cursor.h"
38 #include "xe_svm.h"
39 #include "xe_sync.h"
40 #include "xe_tile.h"
41 #include "xe_tlb_inval.h"
42 #include "xe_trace_bo.h"
43 #include "xe_wa.h"
44
xe_vm_obj(struct xe_vm * vm)45 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
46 {
47 return vm->gpuvm.r_obj;
48 }
49
50 /**
51 * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction
52 * @vm: The vm whose resv is to be locked.
53 * @exec: The drm_exec transaction.
54 *
55 * Helper to lock the vm's resv as part of a drm_exec transaction.
56 *
57 * Return: %0 on success. See drm_exec_lock_obj() for error codes.
58 */
xe_vm_drm_exec_lock(struct xe_vm * vm,struct drm_exec * exec)59 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec)
60 {
61 return drm_exec_lock_obj(exec, xe_vm_obj(vm));
62 }
63
preempt_fences_waiting(struct xe_vm * vm)64 static bool preempt_fences_waiting(struct xe_vm *vm)
65 {
66 struct xe_exec_queue *q;
67
68 lockdep_assert_held(&vm->lock);
69 xe_vm_assert_held(vm);
70
71 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
72 if (!q->lr.pfence ||
73 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
74 &q->lr.pfence->flags)) {
75 return true;
76 }
77 }
78
79 return false;
80 }
81
free_preempt_fences(struct list_head * list)82 static void free_preempt_fences(struct list_head *list)
83 {
84 struct list_head *link, *next;
85
86 list_for_each_safe(link, next, list)
87 xe_preempt_fence_free(to_preempt_fence_from_link(link));
88 }
89
alloc_preempt_fences(struct xe_vm * vm,struct list_head * list,unsigned int * count)90 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
91 unsigned int *count)
92 {
93 lockdep_assert_held(&vm->lock);
94 xe_vm_assert_held(vm);
95
96 if (*count >= vm->preempt.num_exec_queues)
97 return 0;
98
99 for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
100 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
101
102 if (IS_ERR(pfence))
103 return PTR_ERR(pfence);
104
105 list_move_tail(xe_preempt_fence_link(pfence), list);
106 }
107
108 return 0;
109 }
110
wait_for_existing_preempt_fences(struct xe_vm * vm)111 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
112 {
113 struct xe_exec_queue *q;
114
115 xe_vm_assert_held(vm);
116
117 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
118 if (q->lr.pfence) {
119 long timeout = dma_fence_wait(q->lr.pfence, false);
120
121 /* Only -ETIME on fence indicates VM needs to be killed */
122 if (timeout < 0 || q->lr.pfence->error == -ETIME)
123 return -ETIME;
124
125 dma_fence_put(q->lr.pfence);
126 q->lr.pfence = NULL;
127 }
128 }
129
130 return 0;
131 }
132
xe_vm_is_idle(struct xe_vm * vm)133 static bool xe_vm_is_idle(struct xe_vm *vm)
134 {
135 struct xe_exec_queue *q;
136
137 xe_vm_assert_held(vm);
138 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
139 if (!xe_exec_queue_is_idle(q))
140 return false;
141 }
142
143 return true;
144 }
145
arm_preempt_fences(struct xe_vm * vm,struct list_head * list)146 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
147 {
148 struct list_head *link;
149 struct xe_exec_queue *q;
150
151 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
152 struct dma_fence *fence;
153
154 link = list->next;
155 xe_assert(vm->xe, link != list);
156
157 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
158 q, q->lr.context,
159 ++q->lr.seqno);
160 dma_fence_put(q->lr.pfence);
161 q->lr.pfence = fence;
162 }
163 }
164
add_preempt_fences(struct xe_vm * vm,struct xe_bo * bo)165 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
166 {
167 struct xe_exec_queue *q;
168 int err;
169
170 xe_bo_assert_held(bo);
171
172 if (!vm->preempt.num_exec_queues)
173 return 0;
174
175 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
176 if (err)
177 return err;
178
179 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
180 if (q->lr.pfence) {
181 dma_resv_add_fence(bo->ttm.base.resv,
182 q->lr.pfence,
183 DMA_RESV_USAGE_BOOKKEEP);
184 }
185
186 return 0;
187 }
188
resume_and_reinstall_preempt_fences(struct xe_vm * vm,struct drm_exec * exec)189 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
190 struct drm_exec *exec)
191 {
192 struct xe_exec_queue *q;
193
194 lockdep_assert_held(&vm->lock);
195 xe_vm_assert_held(vm);
196
197 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
198 q->ops->resume(q);
199
200 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
201 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
202 }
203 }
204
xe_vm_add_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)205 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
206 {
207 struct drm_gpuvm_exec vm_exec = {
208 .vm = &vm->gpuvm,
209 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
210 .num_fences = 1,
211 };
212 struct drm_exec *exec = &vm_exec.exec;
213 struct xe_validation_ctx ctx;
214 struct dma_fence *pfence;
215 int err;
216 bool wait;
217
218 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
219
220 down_write(&vm->lock);
221 err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val);
222 if (err)
223 goto out_up_write;
224
225 pfence = xe_preempt_fence_create(q, q->lr.context,
226 ++q->lr.seqno);
227 if (IS_ERR(pfence)) {
228 err = PTR_ERR(pfence);
229 goto out_fini;
230 }
231
232 list_add(&q->lr.link, &vm->preempt.exec_queues);
233 ++vm->preempt.num_exec_queues;
234 q->lr.pfence = pfence;
235
236 xe_svm_notifier_lock(vm);
237
238 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
239 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
240
241 /*
242 * Check to see if a preemption on VM is in flight or userptr
243 * invalidation, if so trigger this preempt fence to sync state with
244 * other preempt fences on the VM.
245 */
246 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
247 if (wait)
248 dma_fence_enable_sw_signaling(pfence);
249
250 xe_svm_notifier_unlock(vm);
251
252 out_fini:
253 xe_validation_ctx_fini(&ctx);
254 out_up_write:
255 up_write(&vm->lock);
256
257 return err;
258 }
259 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO);
260
261 /**
262 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
263 * @vm: The VM.
264 * @q: The exec_queue
265 *
266 * Note that this function might be called multiple times on the same queue.
267 */
xe_vm_remove_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)268 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
269 {
270 if (!xe_vm_in_preempt_fence_mode(vm))
271 return;
272
273 down_write(&vm->lock);
274 if (!list_empty(&q->lr.link)) {
275 list_del_init(&q->lr.link);
276 --vm->preempt.num_exec_queues;
277 }
278 if (q->lr.pfence) {
279 dma_fence_enable_sw_signaling(q->lr.pfence);
280 dma_fence_put(q->lr.pfence);
281 q->lr.pfence = NULL;
282 }
283 up_write(&vm->lock);
284 }
285
286 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
287
288 /**
289 * xe_vm_kill() - VM Kill
290 * @vm: The VM.
291 * @unlocked: Flag indicates the VM's dma-resv is not held
292 *
293 * Kill the VM by setting banned flag indicated VM is no longer available for
294 * use. If in preempt fence mode, also kill all exec queue attached to the VM.
295 */
xe_vm_kill(struct xe_vm * vm,bool unlocked)296 void xe_vm_kill(struct xe_vm *vm, bool unlocked)
297 {
298 struct xe_exec_queue *q;
299
300 lockdep_assert_held(&vm->lock);
301
302 if (unlocked)
303 xe_vm_lock(vm, false);
304
305 vm->flags |= XE_VM_FLAG_BANNED;
306 trace_xe_vm_kill(vm);
307
308 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
309 q->ops->kill(q);
310
311 if (unlocked)
312 xe_vm_unlock(vm);
313
314 /* TODO: Inform user the VM is banned */
315 }
316
xe_gpuvm_validate(struct drm_gpuvm_bo * vm_bo,struct drm_exec * exec)317 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
318 {
319 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
320 struct drm_gpuva *gpuva;
321 int ret;
322
323 lockdep_assert_held(&vm->lock);
324 drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
325 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
326 &vm->rebind_list);
327
328 if (!try_wait_for_completion(&vm->xe->pm_block))
329 return -EAGAIN;
330
331 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec);
332 if (ret)
333 return ret;
334
335 vm_bo->evicted = false;
336 return 0;
337 }
338
339 /**
340 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
341 * @vm: The vm for which we are rebinding.
342 * @exec: The struct drm_exec with the locked GEM objects.
343 * @num_fences: The number of fences to reserve for the operation, not
344 * including rebinds and validations.
345 *
346 * Validates all evicted gem objects and rebinds their vmas. Note that
347 * rebindings may cause evictions and hence the validation-rebind
348 * sequence is rerun until there are no more objects to validate.
349 *
350 * Return: 0 on success, negative error code on error. In particular,
351 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
352 * the drm_exec transaction needs to be restarted.
353 */
xe_vm_validate_rebind(struct xe_vm * vm,struct drm_exec * exec,unsigned int num_fences)354 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
355 unsigned int num_fences)
356 {
357 struct drm_gem_object *obj;
358 unsigned long index;
359 int ret;
360
361 do {
362 ret = drm_gpuvm_validate(&vm->gpuvm, exec);
363 if (ret)
364 return ret;
365
366 ret = xe_vm_rebind(vm, false);
367 if (ret)
368 return ret;
369 } while (!list_empty(&vm->gpuvm.evict.list));
370
371 drm_exec_for_each_locked_object(exec, index, obj) {
372 ret = dma_resv_reserve_fences(obj->resv, num_fences);
373 if (ret)
374 return ret;
375 }
376
377 return 0;
378 }
379
xe_preempt_work_begin(struct drm_exec * exec,struct xe_vm * vm,bool * done)380 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
381 bool *done)
382 {
383 int err;
384
385 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
386 if (err)
387 return err;
388
389 if (xe_vm_is_idle(vm)) {
390 vm->preempt.rebind_deactivated = true;
391 *done = true;
392 return 0;
393 }
394
395 if (!preempt_fences_waiting(vm)) {
396 *done = true;
397 return 0;
398 }
399
400 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
401 if (err)
402 return err;
403
404 err = wait_for_existing_preempt_fences(vm);
405 if (err)
406 return err;
407
408 /*
409 * Add validation and rebinding to the locking loop since both can
410 * cause evictions which may require blocing dma_resv locks.
411 * The fence reservation here is intended for the new preempt fences
412 * we attach at the end of the rebind work.
413 */
414 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
415 }
416
vm_suspend_rebind_worker(struct xe_vm * vm)417 static bool vm_suspend_rebind_worker(struct xe_vm *vm)
418 {
419 struct xe_device *xe = vm->xe;
420 bool ret = false;
421
422 mutex_lock(&xe->rebind_resume_lock);
423 if (!try_wait_for_completion(&vm->xe->pm_block)) {
424 ret = true;
425 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list);
426 }
427 mutex_unlock(&xe->rebind_resume_lock);
428
429 return ret;
430 }
431
432 /**
433 * xe_vm_resume_rebind_worker() - Resume the rebind worker.
434 * @vm: The vm whose preempt worker to resume.
435 *
436 * Resume a preempt worker that was previously suspended by
437 * vm_suspend_rebind_worker().
438 */
xe_vm_resume_rebind_worker(struct xe_vm * vm)439 void xe_vm_resume_rebind_worker(struct xe_vm *vm)
440 {
441 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
442 }
443
preempt_rebind_work_func(struct work_struct * w)444 static void preempt_rebind_work_func(struct work_struct *w)
445 {
446 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
447 struct xe_validation_ctx ctx;
448 struct drm_exec exec;
449 unsigned int fence_count = 0;
450 LIST_HEAD(preempt_fences);
451 int err = 0;
452 long wait;
453 int __maybe_unused tries = 0;
454
455 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
456 trace_xe_vm_rebind_worker_enter(vm);
457
458 down_write(&vm->lock);
459
460 if (xe_vm_is_closed_or_banned(vm)) {
461 up_write(&vm->lock);
462 trace_xe_vm_rebind_worker_exit(vm);
463 return;
464 }
465
466 retry:
467 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) {
468 up_write(&vm->lock);
469 return;
470 }
471
472 if (xe_vm_userptr_check_repin(vm)) {
473 err = xe_vm_userptr_pin(vm);
474 if (err)
475 goto out_unlock_outer;
476 }
477
478 err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec,
479 (struct xe_val_flags) {.interruptible = true});
480 if (err)
481 goto out_unlock_outer;
482
483 drm_exec_until_all_locked(&exec) {
484 bool done = false;
485
486 err = xe_preempt_work_begin(&exec, vm, &done);
487 drm_exec_retry_on_contention(&exec);
488 xe_validation_retry_on_oom(&ctx, &err);
489 if (err || done) {
490 xe_validation_ctx_fini(&ctx);
491 goto out_unlock_outer;
492 }
493 }
494
495 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
496 if (err)
497 goto out_unlock;
498
499 xe_vm_set_validation_exec(vm, &exec);
500 err = xe_vm_rebind(vm, true);
501 xe_vm_set_validation_exec(vm, NULL);
502 if (err)
503 goto out_unlock;
504
505 /* Wait on rebinds and munmap style VM unbinds */
506 wait = dma_resv_wait_timeout(xe_vm_resv(vm),
507 DMA_RESV_USAGE_KERNEL,
508 false, MAX_SCHEDULE_TIMEOUT);
509 if (wait <= 0) {
510 err = -ETIME;
511 goto out_unlock;
512 }
513
514 #define retry_required(__tries, __vm) \
515 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
516 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
517 __xe_vm_userptr_needs_repin(__vm))
518
519 xe_svm_notifier_lock(vm);
520 if (retry_required(tries, vm)) {
521 xe_svm_notifier_unlock(vm);
522 err = -EAGAIN;
523 goto out_unlock;
524 }
525
526 #undef retry_required
527
528 spin_lock(&vm->xe->ttm.lru_lock);
529 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
530 spin_unlock(&vm->xe->ttm.lru_lock);
531
532 /* Point of no return. */
533 arm_preempt_fences(vm, &preempt_fences);
534 resume_and_reinstall_preempt_fences(vm, &exec);
535 xe_svm_notifier_unlock(vm);
536
537 out_unlock:
538 xe_validation_ctx_fini(&ctx);
539 out_unlock_outer:
540 if (err == -EAGAIN) {
541 trace_xe_vm_rebind_worker_retry(vm);
542 goto retry;
543 }
544
545 if (err) {
546 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
547 xe_vm_kill(vm, true);
548 }
549 up_write(&vm->lock);
550
551 free_preempt_fences(&preempt_fences);
552
553 trace_xe_vm_rebind_worker_exit(vm);
554 }
555
xe_vma_ops_alloc(struct xe_vma_ops * vops,bool array_of_binds)556 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
557 {
558 int i;
559
560 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
561 if (!vops->pt_update_ops[i].num_ops)
562 continue;
563
564 vops->pt_update_ops[i].ops =
565 kmalloc_array(vops->pt_update_ops[i].num_ops,
566 sizeof(*vops->pt_update_ops[i].ops),
567 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
568 if (!vops->pt_update_ops[i].ops)
569 return array_of_binds ? -ENOBUFS : -ENOMEM;
570 }
571
572 return 0;
573 }
574 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
575
xe_vma_svm_prefetch_op_fini(struct xe_vma_op * op)576 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op)
577 {
578 struct xe_vma *vma;
579
580 vma = gpuva_to_vma(op->base.prefetch.va);
581
582 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma))
583 xa_destroy(&op->prefetch_range.range);
584 }
585
xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops * vops)586 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops)
587 {
588 struct xe_vma_op *op;
589
590 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
591 return;
592
593 list_for_each_entry(op, &vops->list, link)
594 xe_vma_svm_prefetch_op_fini(op);
595 }
596
xe_vma_ops_fini(struct xe_vma_ops * vops)597 static void xe_vma_ops_fini(struct xe_vma_ops *vops)
598 {
599 int i;
600
601 xe_vma_svm_prefetch_ops_fini(vops);
602
603 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
604 kfree(vops->pt_update_ops[i].ops);
605 }
606
xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops * vops,u8 tile_mask,int inc_val)607 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val)
608 {
609 int i;
610
611 if (!inc_val)
612 return;
613
614 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
615 if (BIT(i) & tile_mask)
616 vops->pt_update_ops[i].num_ops += inc_val;
617 }
618
xe_vm_populate_rebind(struct xe_vma_op * op,struct xe_vma * vma,u8 tile_mask)619 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
620 u8 tile_mask)
621 {
622 INIT_LIST_HEAD(&op->link);
623 op->tile_mask = tile_mask;
624 op->base.op = DRM_GPUVA_OP_MAP;
625 op->base.map.va.addr = vma->gpuva.va.addr;
626 op->base.map.va.range = vma->gpuva.va.range;
627 op->base.map.gem.obj = vma->gpuva.gem.obj;
628 op->base.map.gem.offset = vma->gpuva.gem.offset;
629 op->map.vma = vma;
630 op->map.immediate = true;
631 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
632 op->map.is_null = xe_vma_is_null(vma);
633 }
634
xe_vm_ops_add_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,u8 tile_mask)635 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
636 u8 tile_mask)
637 {
638 struct xe_vma_op *op;
639
640 op = kzalloc(sizeof(*op), GFP_KERNEL);
641 if (!op)
642 return -ENOMEM;
643
644 xe_vm_populate_rebind(op, vma, tile_mask);
645 list_add_tail(&op->link, &vops->list);
646 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
647
648 return 0;
649 }
650
651 static struct dma_fence *ops_execute(struct xe_vm *vm,
652 struct xe_vma_ops *vops);
653 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
654 struct xe_exec_queue *q,
655 struct xe_sync_entry *syncs, u32 num_syncs);
656
xe_vm_rebind(struct xe_vm * vm,bool rebind_worker)657 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
658 {
659 struct dma_fence *fence;
660 struct xe_vma *vma, *next;
661 struct xe_vma_ops vops;
662 struct xe_vma_op *op, *next_op;
663 int err, i;
664
665 lockdep_assert_held(&vm->lock);
666 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
667 list_empty(&vm->rebind_list))
668 return 0;
669
670 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
671 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
672 vops.pt_update_ops[i].wait_vm_bookkeep = true;
673
674 xe_vm_assert_held(vm);
675 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
676 xe_assert(vm->xe, vma->tile_present);
677
678 if (rebind_worker)
679 trace_xe_vma_rebind_worker(vma);
680 else
681 trace_xe_vma_rebind_exec(vma);
682
683 err = xe_vm_ops_add_rebind(&vops, vma,
684 vma->tile_present);
685 if (err)
686 goto free_ops;
687 }
688
689 err = xe_vma_ops_alloc(&vops, false);
690 if (err)
691 goto free_ops;
692
693 fence = ops_execute(vm, &vops);
694 if (IS_ERR(fence)) {
695 err = PTR_ERR(fence);
696 } else {
697 dma_fence_put(fence);
698 list_for_each_entry_safe(vma, next, &vm->rebind_list,
699 combined_links.rebind)
700 list_del_init(&vma->combined_links.rebind);
701 }
702 free_ops:
703 list_for_each_entry_safe(op, next_op, &vops.list, link) {
704 list_del(&op->link);
705 kfree(op);
706 }
707 xe_vma_ops_fini(&vops);
708
709 return err;
710 }
711
xe_vma_rebind(struct xe_vm * vm,struct xe_vma * vma,u8 tile_mask)712 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
713 {
714 struct dma_fence *fence = NULL;
715 struct xe_vma_ops vops;
716 struct xe_vma_op *op, *next_op;
717 struct xe_tile *tile;
718 u8 id;
719 int err;
720
721 lockdep_assert_held(&vm->lock);
722 xe_vm_assert_held(vm);
723 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
724
725 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
726 for_each_tile(tile, vm->xe, id) {
727 vops.pt_update_ops[id].wait_vm_bookkeep = true;
728 vops.pt_update_ops[tile->id].q =
729 xe_migrate_exec_queue(tile->migrate);
730 }
731
732 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
733 if (err)
734 return ERR_PTR(err);
735
736 err = xe_vma_ops_alloc(&vops, false);
737 if (err) {
738 fence = ERR_PTR(err);
739 goto free_ops;
740 }
741
742 fence = ops_execute(vm, &vops);
743
744 free_ops:
745 list_for_each_entry_safe(op, next_op, &vops.list, link) {
746 list_del(&op->link);
747 kfree(op);
748 }
749 xe_vma_ops_fini(&vops);
750
751 return fence;
752 }
753
xe_vm_populate_range_rebind(struct xe_vma_op * op,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)754 static void xe_vm_populate_range_rebind(struct xe_vma_op *op,
755 struct xe_vma *vma,
756 struct xe_svm_range *range,
757 u8 tile_mask)
758 {
759 INIT_LIST_HEAD(&op->link);
760 op->tile_mask = tile_mask;
761 op->base.op = DRM_GPUVA_OP_DRIVER;
762 op->subop = XE_VMA_SUBOP_MAP_RANGE;
763 op->map_range.vma = vma;
764 op->map_range.range = range;
765 }
766
767 static int
xe_vm_ops_add_range_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)768 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops,
769 struct xe_vma *vma,
770 struct xe_svm_range *range,
771 u8 tile_mask)
772 {
773 struct xe_vma_op *op;
774
775 op = kzalloc(sizeof(*op), GFP_KERNEL);
776 if (!op)
777 return -ENOMEM;
778
779 xe_vm_populate_range_rebind(op, vma, range, tile_mask);
780 list_add_tail(&op->link, &vops->list);
781 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
782
783 return 0;
784 }
785
786 /**
787 * xe_vm_range_rebind() - VM range (re)bind
788 * @vm: The VM which the range belongs to.
789 * @vma: The VMA which the range belongs to.
790 * @range: SVM range to rebind.
791 * @tile_mask: Tile mask to bind the range to.
792 *
793 * (re)bind SVM range setting up GPU page tables for the range.
794 *
795 * Return: dma fence for rebind to signal completion on succees, ERR_PTR on
796 * failure
797 */
xe_vm_range_rebind(struct xe_vm * vm,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)798 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
799 struct xe_vma *vma,
800 struct xe_svm_range *range,
801 u8 tile_mask)
802 {
803 struct dma_fence *fence = NULL;
804 struct xe_vma_ops vops;
805 struct xe_vma_op *op, *next_op;
806 struct xe_tile *tile;
807 u8 id;
808 int err;
809
810 lockdep_assert_held(&vm->lock);
811 xe_vm_assert_held(vm);
812 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
813 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
814
815 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
816 for_each_tile(tile, vm->xe, id) {
817 vops.pt_update_ops[id].wait_vm_bookkeep = true;
818 vops.pt_update_ops[tile->id].q =
819 xe_migrate_exec_queue(tile->migrate);
820 }
821
822 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask);
823 if (err)
824 return ERR_PTR(err);
825
826 err = xe_vma_ops_alloc(&vops, false);
827 if (err) {
828 fence = ERR_PTR(err);
829 goto free_ops;
830 }
831
832 fence = ops_execute(vm, &vops);
833
834 free_ops:
835 list_for_each_entry_safe(op, next_op, &vops.list, link) {
836 list_del(&op->link);
837 kfree(op);
838 }
839 xe_vma_ops_fini(&vops);
840
841 return fence;
842 }
843
xe_vm_populate_range_unbind(struct xe_vma_op * op,struct xe_svm_range * range)844 static void xe_vm_populate_range_unbind(struct xe_vma_op *op,
845 struct xe_svm_range *range)
846 {
847 INIT_LIST_HEAD(&op->link);
848 op->tile_mask = range->tile_present;
849 op->base.op = DRM_GPUVA_OP_DRIVER;
850 op->subop = XE_VMA_SUBOP_UNMAP_RANGE;
851 op->unmap_range.range = range;
852 }
853
854 static int
xe_vm_ops_add_range_unbind(struct xe_vma_ops * vops,struct xe_svm_range * range)855 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops,
856 struct xe_svm_range *range)
857 {
858 struct xe_vma_op *op;
859
860 op = kzalloc(sizeof(*op), GFP_KERNEL);
861 if (!op)
862 return -ENOMEM;
863
864 xe_vm_populate_range_unbind(op, range);
865 list_add_tail(&op->link, &vops->list);
866 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1);
867
868 return 0;
869 }
870
871 /**
872 * xe_vm_range_unbind() - VM range unbind
873 * @vm: The VM which the range belongs to.
874 * @range: SVM range to rebind.
875 *
876 * Unbind SVM range removing the GPU page tables for the range.
877 *
878 * Return: dma fence for unbind to signal completion on succees, ERR_PTR on
879 * failure
880 */
xe_vm_range_unbind(struct xe_vm * vm,struct xe_svm_range * range)881 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
882 struct xe_svm_range *range)
883 {
884 struct dma_fence *fence = NULL;
885 struct xe_vma_ops vops;
886 struct xe_vma_op *op, *next_op;
887 struct xe_tile *tile;
888 u8 id;
889 int err;
890
891 lockdep_assert_held(&vm->lock);
892 xe_vm_assert_held(vm);
893 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
894
895 if (!range->tile_present)
896 return dma_fence_get_stub();
897
898 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
899 for_each_tile(tile, vm->xe, id) {
900 vops.pt_update_ops[id].wait_vm_bookkeep = true;
901 vops.pt_update_ops[tile->id].q =
902 xe_migrate_exec_queue(tile->migrate);
903 }
904
905 err = xe_vm_ops_add_range_unbind(&vops, range);
906 if (err)
907 return ERR_PTR(err);
908
909 err = xe_vma_ops_alloc(&vops, false);
910 if (err) {
911 fence = ERR_PTR(err);
912 goto free_ops;
913 }
914
915 fence = ops_execute(vm, &vops);
916
917 free_ops:
918 list_for_each_entry_safe(op, next_op, &vops.list, link) {
919 list_del(&op->link);
920 kfree(op);
921 }
922 xe_vma_ops_fini(&vops);
923
924 return fence;
925 }
926
xe_vma_free(struct xe_vma * vma)927 static void xe_vma_free(struct xe_vma *vma)
928 {
929 if (xe_vma_is_userptr(vma))
930 kfree(to_userptr_vma(vma));
931 else
932 kfree(vma);
933 }
934
935 #define VMA_CREATE_FLAG_READ_ONLY BIT(0)
936 #define VMA_CREATE_FLAG_IS_NULL BIT(1)
937 #define VMA_CREATE_FLAG_DUMPABLE BIT(2)
938 #define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3)
939
xe_vma_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 start,u64 end,struct xe_vma_mem_attr * attr,unsigned int flags)940 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
941 struct xe_bo *bo,
942 u64 bo_offset_or_userptr,
943 u64 start, u64 end,
944 struct xe_vma_mem_attr *attr,
945 unsigned int flags)
946 {
947 struct xe_vma *vma;
948 struct xe_tile *tile;
949 u8 id;
950 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
951 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
952 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
953 bool is_cpu_addr_mirror =
954 (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR);
955
956 xe_assert(vm->xe, start < end);
957 xe_assert(vm->xe, end < vm->size);
958
959 /*
960 * Allocate and ensure that the xe_vma_is_userptr() return
961 * matches what was allocated.
962 */
963 if (!bo && !is_null && !is_cpu_addr_mirror) {
964 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
965
966 if (!uvma)
967 return ERR_PTR(-ENOMEM);
968
969 vma = &uvma->vma;
970 } else {
971 vma = kzalloc(sizeof(*vma), GFP_KERNEL);
972 if (!vma)
973 return ERR_PTR(-ENOMEM);
974
975 if (is_cpu_addr_mirror)
976 vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR;
977 if (is_null)
978 vma->gpuva.flags |= DRM_GPUVA_SPARSE;
979 if (bo)
980 vma->gpuva.gem.obj = &bo->ttm.base;
981 }
982
983 INIT_LIST_HEAD(&vma->combined_links.rebind);
984
985 INIT_LIST_HEAD(&vma->gpuva.gem.entry);
986 vma->gpuva.vm = &vm->gpuvm;
987 vma->gpuva.va.addr = start;
988 vma->gpuva.va.range = end - start + 1;
989 if (read_only)
990 vma->gpuva.flags |= XE_VMA_READ_ONLY;
991 if (dumpable)
992 vma->gpuva.flags |= XE_VMA_DUMPABLE;
993
994 for_each_tile(tile, vm->xe, id)
995 vma->tile_mask |= 0x1 << id;
996
997 if (vm->xe->info.has_atomic_enable_pte_bit)
998 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
999
1000 vma->attr = *attr;
1001
1002 if (bo) {
1003 struct drm_gpuvm_bo *vm_bo;
1004
1005 xe_bo_assert_held(bo);
1006
1007 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
1008 if (IS_ERR(vm_bo)) {
1009 xe_vma_free(vma);
1010 return ERR_CAST(vm_bo);
1011 }
1012
1013 drm_gpuvm_bo_extobj_add(vm_bo);
1014 drm_gem_object_get(&bo->ttm.base);
1015 vma->gpuva.gem.offset = bo_offset_or_userptr;
1016 drm_gpuva_link(&vma->gpuva, vm_bo);
1017 drm_gpuvm_bo_put(vm_bo);
1018 } else /* userptr or null */ {
1019 if (!is_null && !is_cpu_addr_mirror) {
1020 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1021 u64 size = end - start + 1;
1022 int err;
1023
1024 vma->gpuva.gem.offset = bo_offset_or_userptr;
1025
1026 err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size);
1027 if (err) {
1028 xe_vma_free(vma);
1029 return ERR_PTR(err);
1030 }
1031 }
1032
1033 xe_vm_get(vm);
1034 }
1035
1036 return vma;
1037 }
1038
xe_vma_destroy_late(struct xe_vma * vma)1039 static void xe_vma_destroy_late(struct xe_vma *vma)
1040 {
1041 struct xe_vm *vm = xe_vma_vm(vma);
1042
1043 if (vma->ufence) {
1044 xe_sync_ufence_put(vma->ufence);
1045 vma->ufence = NULL;
1046 }
1047
1048 if (xe_vma_is_userptr(vma)) {
1049 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1050
1051 xe_userptr_remove(uvma);
1052 xe_vm_put(vm);
1053 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) {
1054 xe_vm_put(vm);
1055 } else {
1056 xe_bo_put(xe_vma_bo(vma));
1057 }
1058
1059 xe_vma_free(vma);
1060 }
1061
vma_destroy_work_func(struct work_struct * w)1062 static void vma_destroy_work_func(struct work_struct *w)
1063 {
1064 struct xe_vma *vma =
1065 container_of(w, struct xe_vma, destroy_work);
1066
1067 xe_vma_destroy_late(vma);
1068 }
1069
vma_destroy_cb(struct dma_fence * fence,struct dma_fence_cb * cb)1070 static void vma_destroy_cb(struct dma_fence *fence,
1071 struct dma_fence_cb *cb)
1072 {
1073 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1074
1075 INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1076 queue_work(system_unbound_wq, &vma->destroy_work);
1077 }
1078
xe_vma_destroy(struct xe_vma * vma,struct dma_fence * fence)1079 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1080 {
1081 struct xe_vm *vm = xe_vma_vm(vma);
1082
1083 lockdep_assert_held_write(&vm->lock);
1084 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1085
1086 if (xe_vma_is_userptr(vma)) {
1087 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1088 xe_userptr_destroy(to_userptr_vma(vma));
1089 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) {
1090 xe_bo_assert_held(xe_vma_bo(vma));
1091
1092 drm_gpuva_unlink(&vma->gpuva);
1093 }
1094
1095 xe_vm_assert_held(vm);
1096 if (fence) {
1097 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1098 vma_destroy_cb);
1099
1100 if (ret) {
1101 XE_WARN_ON(ret != -ENOENT);
1102 xe_vma_destroy_late(vma);
1103 }
1104 } else {
1105 xe_vma_destroy_late(vma);
1106 }
1107 }
1108
1109 /**
1110 * xe_vm_lock_vma() - drm_exec utility to lock a vma
1111 * @exec: The drm_exec object we're currently locking for.
1112 * @vma: The vma for witch we want to lock the vm resv and any attached
1113 * object's resv.
1114 *
1115 * Return: 0 on success, negative error code on error. In particular
1116 * may return -EDEADLK on WW transaction contention and -EINTR if
1117 * an interruptible wait is terminated by a signal.
1118 */
xe_vm_lock_vma(struct drm_exec * exec,struct xe_vma * vma)1119 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
1120 {
1121 struct xe_vm *vm = xe_vma_vm(vma);
1122 struct xe_bo *bo = xe_vma_bo(vma);
1123 int err;
1124
1125 XE_WARN_ON(!vm);
1126
1127 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1128 if (!err && bo && !bo->vm)
1129 err = drm_exec_lock_obj(exec, &bo->ttm.base);
1130
1131 return err;
1132 }
1133
xe_vma_destroy_unlocked(struct xe_vma * vma)1134 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1135 {
1136 struct xe_device *xe = xe_vma_vm(vma)->xe;
1137 struct xe_validation_ctx ctx;
1138 struct drm_exec exec;
1139 int err = 0;
1140
1141 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
1142 err = xe_vm_lock_vma(&exec, vma);
1143 drm_exec_retry_on_contention(&exec);
1144 if (XE_WARN_ON(err))
1145 break;
1146 xe_vma_destroy(vma, NULL);
1147 }
1148 xe_assert(xe, !err);
1149 }
1150
1151 struct xe_vma *
xe_vm_find_overlapping_vma(struct xe_vm * vm,u64 start,u64 range)1152 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1153 {
1154 struct drm_gpuva *gpuva;
1155
1156 lockdep_assert_held(&vm->lock);
1157
1158 if (xe_vm_is_closed_or_banned(vm))
1159 return NULL;
1160
1161 xe_assert(vm->xe, start + range <= vm->size);
1162
1163 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1164
1165 return gpuva ? gpuva_to_vma(gpuva) : NULL;
1166 }
1167
xe_vm_insert_vma(struct xe_vm * vm,struct xe_vma * vma)1168 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1169 {
1170 int err;
1171
1172 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1173 lockdep_assert_held(&vm->lock);
1174
1175 mutex_lock(&vm->snap_mutex);
1176 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1177 mutex_unlock(&vm->snap_mutex);
1178 XE_WARN_ON(err); /* Shouldn't be possible */
1179
1180 return err;
1181 }
1182
xe_vm_remove_vma(struct xe_vm * vm,struct xe_vma * vma)1183 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1184 {
1185 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1186 lockdep_assert_held(&vm->lock);
1187
1188 mutex_lock(&vm->snap_mutex);
1189 drm_gpuva_remove(&vma->gpuva);
1190 mutex_unlock(&vm->snap_mutex);
1191 if (vm->usm.last_fault_vma == vma)
1192 vm->usm.last_fault_vma = NULL;
1193 }
1194
xe_vm_op_alloc(void)1195 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1196 {
1197 struct xe_vma_op *op;
1198
1199 op = kzalloc(sizeof(*op), GFP_KERNEL);
1200
1201 if (unlikely(!op))
1202 return NULL;
1203
1204 return &op->base;
1205 }
1206
1207 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1208
1209 static const struct drm_gpuvm_ops gpuvm_ops = {
1210 .op_alloc = xe_vm_op_alloc,
1211 .vm_bo_validate = xe_gpuvm_validate,
1212 .vm_free = xe_vm_free,
1213 };
1214
pde_encode_pat_index(u16 pat_index)1215 static u64 pde_encode_pat_index(u16 pat_index)
1216 {
1217 u64 pte = 0;
1218
1219 if (pat_index & BIT(0))
1220 pte |= XE_PPGTT_PTE_PAT0;
1221
1222 if (pat_index & BIT(1))
1223 pte |= XE_PPGTT_PTE_PAT1;
1224
1225 return pte;
1226 }
1227
pte_encode_pat_index(u16 pat_index,u32 pt_level)1228 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
1229 {
1230 u64 pte = 0;
1231
1232 if (pat_index & BIT(0))
1233 pte |= XE_PPGTT_PTE_PAT0;
1234
1235 if (pat_index & BIT(1))
1236 pte |= XE_PPGTT_PTE_PAT1;
1237
1238 if (pat_index & BIT(2)) {
1239 if (pt_level)
1240 pte |= XE_PPGTT_PDE_PDPE_PAT2;
1241 else
1242 pte |= XE_PPGTT_PTE_PAT2;
1243 }
1244
1245 if (pat_index & BIT(3))
1246 pte |= XELPG_PPGTT_PTE_PAT3;
1247
1248 if (pat_index & (BIT(4)))
1249 pte |= XE2_PPGTT_PTE_PAT4;
1250
1251 return pte;
1252 }
1253
pte_encode_ps(u32 pt_level)1254 static u64 pte_encode_ps(u32 pt_level)
1255 {
1256 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1257
1258 if (pt_level == 1)
1259 return XE_PDE_PS_2M;
1260 else if (pt_level == 2)
1261 return XE_PDPE_PS_1G;
1262
1263 return 0;
1264 }
1265
pde_pat_index(struct xe_bo * bo)1266 static u16 pde_pat_index(struct xe_bo *bo)
1267 {
1268 struct xe_device *xe = xe_bo_device(bo);
1269 u16 pat_index;
1270
1271 /*
1272 * We only have two bits to encode the PAT index in non-leaf nodes, but
1273 * these only point to other paging structures so we only need a minimal
1274 * selection of options. The user PAT index is only for encoding leaf
1275 * nodes, where we have use of more bits to do the encoding. The
1276 * non-leaf nodes are instead under driver control so the chosen index
1277 * here should be distict from the user PAT index. Also the
1278 * corresponding coherency of the PAT index should be tied to the
1279 * allocation type of the page table (or at least we should pick
1280 * something which is always safe).
1281 */
1282 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached)
1283 pat_index = xe->pat.idx[XE_CACHE_WB];
1284 else
1285 pat_index = xe->pat.idx[XE_CACHE_NONE];
1286
1287 xe_assert(xe, pat_index <= 3);
1288
1289 return pat_index;
1290 }
1291
xelp_pde_encode_bo(struct xe_bo * bo,u64 bo_offset)1292 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset)
1293 {
1294 u64 pde;
1295
1296 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1297 pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1298 pde |= pde_encode_pat_index(pde_pat_index(bo));
1299
1300 return pde;
1301 }
1302
xelp_pte_encode_bo(struct xe_bo * bo,u64 bo_offset,u16 pat_index,u32 pt_level)1303 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1304 u16 pat_index, u32 pt_level)
1305 {
1306 u64 pte;
1307
1308 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1309 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1310 pte |= pte_encode_pat_index(pat_index, pt_level);
1311 pte |= pte_encode_ps(pt_level);
1312
1313 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1314 pte |= XE_PPGTT_PTE_DM;
1315
1316 return pte;
1317 }
1318
xelp_pte_encode_vma(u64 pte,struct xe_vma * vma,u16 pat_index,u32 pt_level)1319 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1320 u16 pat_index, u32 pt_level)
1321 {
1322 pte |= XE_PAGE_PRESENT;
1323
1324 if (likely(!xe_vma_read_only(vma)))
1325 pte |= XE_PAGE_RW;
1326
1327 pte |= pte_encode_pat_index(pat_index, pt_level);
1328 pte |= pte_encode_ps(pt_level);
1329
1330 if (unlikely(xe_vma_is_null(vma)))
1331 pte |= XE_PTE_NULL;
1332
1333 return pte;
1334 }
1335
xelp_pte_encode_addr(struct xe_device * xe,u64 addr,u16 pat_index,u32 pt_level,bool devmem,u64 flags)1336 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1337 u16 pat_index,
1338 u32 pt_level, bool devmem, u64 flags)
1339 {
1340 u64 pte;
1341
1342 /* Avoid passing random bits directly as flags */
1343 xe_assert(xe, !(flags & ~XE_PTE_PS64));
1344
1345 pte = addr;
1346 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1347 pte |= pte_encode_pat_index(pat_index, pt_level);
1348 pte |= pte_encode_ps(pt_level);
1349
1350 if (devmem)
1351 pte |= XE_PPGTT_PTE_DM;
1352
1353 pte |= flags;
1354
1355 return pte;
1356 }
1357
1358 static const struct xe_pt_ops xelp_pt_ops = {
1359 .pte_encode_bo = xelp_pte_encode_bo,
1360 .pte_encode_vma = xelp_pte_encode_vma,
1361 .pte_encode_addr = xelp_pte_encode_addr,
1362 .pde_encode_bo = xelp_pde_encode_bo,
1363 };
1364
1365 static void vm_destroy_work_func(struct work_struct *w);
1366
1367 /**
1368 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1369 * given tile and vm.
1370 * @xe: xe device.
1371 * @tile: tile to set up for.
1372 * @vm: vm to set up for.
1373 * @exec: The struct drm_exec object used to lock the vm resv.
1374 *
1375 * Sets up a pagetable tree with one page-table per level and a single
1376 * leaf PTE. All pagetable entries point to the single page-table or,
1377 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1378 * writes become NOPs.
1379 *
1380 * Return: 0 on success, negative error code on error.
1381 */
xe_vm_create_scratch(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,struct drm_exec * exec)1382 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1383 struct xe_vm *vm, struct drm_exec *exec)
1384 {
1385 u8 id = tile->id;
1386 int i;
1387
1388 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1389 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec);
1390 if (IS_ERR(vm->scratch_pt[id][i])) {
1391 int err = PTR_ERR(vm->scratch_pt[id][i]);
1392
1393 vm->scratch_pt[id][i] = NULL;
1394 return err;
1395 }
1396 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1397 }
1398
1399 return 0;
1400 }
1401 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO);
1402
xe_vm_free_scratch(struct xe_vm * vm)1403 static void xe_vm_free_scratch(struct xe_vm *vm)
1404 {
1405 struct xe_tile *tile;
1406 u8 id;
1407
1408 if (!xe_vm_has_scratch(vm))
1409 return;
1410
1411 for_each_tile(tile, vm->xe, id) {
1412 u32 i;
1413
1414 if (!vm->pt_root[id])
1415 continue;
1416
1417 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1418 if (vm->scratch_pt[id][i])
1419 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1420 }
1421 }
1422
xe_vm_pt_destroy(struct xe_vm * vm)1423 static void xe_vm_pt_destroy(struct xe_vm *vm)
1424 {
1425 struct xe_tile *tile;
1426 u8 id;
1427
1428 xe_vm_assert_held(vm);
1429
1430 for_each_tile(tile, vm->xe, id) {
1431 if (vm->pt_root[id]) {
1432 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1433 vm->pt_root[id] = NULL;
1434 }
1435 }
1436 }
1437
xe_vm_create(struct xe_device * xe,u32 flags,struct xe_file * xef)1438 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
1439 {
1440 struct drm_gem_object *vm_resv_obj;
1441 struct xe_validation_ctx ctx;
1442 struct drm_exec exec;
1443 struct xe_vm *vm;
1444 int err, number_tiles = 0;
1445 struct xe_tile *tile;
1446 u8 id;
1447
1448 /*
1449 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to
1450 * ever be in faulting mode.
1451 */
1452 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE)));
1453
1454 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1455 if (!vm)
1456 return ERR_PTR(-ENOMEM);
1457
1458 vm->xe = xe;
1459
1460 vm->size = 1ull << xe->info.va_bits;
1461 vm->flags = flags;
1462
1463 if (xef)
1464 vm->xef = xe_file_get(xef);
1465 /**
1466 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be
1467 * manipulated under the PXP mutex. However, the PXP mutex can be taken
1468 * under a user-VM lock when the PXP session is started at exec_queue
1469 * creation time. Those are different VMs and therefore there is no risk
1470 * of deadlock, but we need to tell lockdep that this is the case or it
1471 * will print a warning.
1472 */
1473 if (flags & XE_VM_FLAG_GSC) {
1474 static struct lock_class_key gsc_vm_key;
1475
1476 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key);
1477 } else {
1478 init_rwsem(&vm->lock);
1479 }
1480 mutex_init(&vm->snap_mutex);
1481
1482 INIT_LIST_HEAD(&vm->rebind_list);
1483
1484 INIT_LIST_HEAD(&vm->userptr.repin_list);
1485 INIT_LIST_HEAD(&vm->userptr.invalidated);
1486 spin_lock_init(&vm->userptr.invalidated_lock);
1487
1488 ttm_lru_bulk_move_init(&vm->lru_bulk_move);
1489
1490 INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1491
1492 INIT_LIST_HEAD(&vm->preempt.exec_queues);
1493 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */
1494
1495 for_each_tile(tile, xe, id)
1496 xe_range_fence_tree_init(&vm->rftree[id]);
1497
1498 vm->pt_ops = &xelp_pt_ops;
1499
1500 /*
1501 * Long-running workloads are not protected by the scheduler references.
1502 * By design, run_job for long-running workloads returns NULL and the
1503 * scheduler drops all the references of it, hence protecting the VM
1504 * for this case is necessary.
1505 */
1506 if (flags & XE_VM_FLAG_LR_MODE) {
1507 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1508 xe_pm_runtime_get_noresume(xe);
1509 INIT_LIST_HEAD(&vm->preempt.pm_activate_link);
1510 }
1511
1512 err = xe_svm_init(vm);
1513 if (err)
1514 goto err_no_resv;
1515
1516 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1517 if (!vm_resv_obj) {
1518 err = -ENOMEM;
1519 goto err_svm_fini;
1520 }
1521
1522 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1523 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1524
1525 drm_gem_object_put(vm_resv_obj);
1526
1527 err = 0;
1528 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
1529 err) {
1530 err = xe_vm_drm_exec_lock(vm, &exec);
1531 drm_exec_retry_on_contention(&exec);
1532
1533 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1534 vm->flags |= XE_VM_FLAG_64K;
1535
1536 for_each_tile(tile, xe, id) {
1537 if (flags & XE_VM_FLAG_MIGRATION &&
1538 tile->id != XE_VM_FLAG_TILE_ID(flags))
1539 continue;
1540
1541 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level,
1542 &exec);
1543 if (IS_ERR(vm->pt_root[id])) {
1544 err = PTR_ERR(vm->pt_root[id]);
1545 vm->pt_root[id] = NULL;
1546 xe_vm_pt_destroy(vm);
1547 drm_exec_retry_on_contention(&exec);
1548 xe_validation_retry_on_oom(&ctx, &err);
1549 break;
1550 }
1551 }
1552 if (err)
1553 break;
1554
1555 if (xe_vm_has_scratch(vm)) {
1556 for_each_tile(tile, xe, id) {
1557 if (!vm->pt_root[id])
1558 continue;
1559
1560 err = xe_vm_create_scratch(xe, tile, vm, &exec);
1561 if (err) {
1562 xe_vm_free_scratch(vm);
1563 xe_vm_pt_destroy(vm);
1564 drm_exec_retry_on_contention(&exec);
1565 xe_validation_retry_on_oom(&ctx, &err);
1566 break;
1567 }
1568 }
1569 if (err)
1570 break;
1571 vm->batch_invalidate_tlb = true;
1572 }
1573
1574 if (vm->flags & XE_VM_FLAG_LR_MODE) {
1575 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1576 vm->batch_invalidate_tlb = false;
1577 }
1578
1579 /* Fill pt_root after allocating scratch tables */
1580 for_each_tile(tile, xe, id) {
1581 if (!vm->pt_root[id])
1582 continue;
1583
1584 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1585 }
1586 }
1587 if (err)
1588 goto err_close;
1589
1590 /* Kernel migration VM shouldn't have a circular loop.. */
1591 if (!(flags & XE_VM_FLAG_MIGRATION)) {
1592 for_each_tile(tile, xe, id) {
1593 struct xe_exec_queue *q;
1594 u32 create_flags = EXEC_QUEUE_FLAG_VM;
1595
1596 if (!vm->pt_root[id])
1597 continue;
1598
1599 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0);
1600 if (IS_ERR(q)) {
1601 err = PTR_ERR(q);
1602 goto err_close;
1603 }
1604 vm->q[id] = q;
1605 number_tiles++;
1606 }
1607 }
1608
1609 if (number_tiles > 1)
1610 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1611
1612 if (xef && xe->info.has_asid) {
1613 u32 asid;
1614
1615 down_write(&xe->usm.lock);
1616 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1617 XA_LIMIT(1, XE_MAX_ASID - 1),
1618 &xe->usm.next_asid, GFP_KERNEL);
1619 up_write(&xe->usm.lock);
1620 if (err < 0)
1621 goto err_close;
1622
1623 vm->usm.asid = asid;
1624 }
1625
1626 trace_xe_vm_create(vm);
1627
1628 return vm;
1629
1630 err_close:
1631 xe_vm_close_and_put(vm);
1632 return ERR_PTR(err);
1633
1634 err_svm_fini:
1635 if (flags & XE_VM_FLAG_FAULT_MODE) {
1636 vm->size = 0; /* close the vm */
1637 xe_svm_fini(vm);
1638 }
1639 err_no_resv:
1640 mutex_destroy(&vm->snap_mutex);
1641 for_each_tile(tile, xe, id)
1642 xe_range_fence_tree_fini(&vm->rftree[id]);
1643 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1644 if (vm->xef)
1645 xe_file_put(vm->xef);
1646 kfree(vm);
1647 if (flags & XE_VM_FLAG_LR_MODE)
1648 xe_pm_runtime_put(xe);
1649 return ERR_PTR(err);
1650 }
1651
xe_vm_close(struct xe_vm * vm)1652 static void xe_vm_close(struct xe_vm *vm)
1653 {
1654 struct xe_device *xe = vm->xe;
1655 bool bound;
1656 int idx;
1657
1658 bound = drm_dev_enter(&xe->drm, &idx);
1659
1660 down_write(&vm->lock);
1661 if (xe_vm_in_fault_mode(vm))
1662 xe_svm_notifier_lock(vm);
1663
1664 vm->size = 0;
1665
1666 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) {
1667 struct xe_tile *tile;
1668 struct xe_gt *gt;
1669 u8 id;
1670
1671 /* Wait for pending binds */
1672 dma_resv_wait_timeout(xe_vm_resv(vm),
1673 DMA_RESV_USAGE_BOOKKEEP,
1674 false, MAX_SCHEDULE_TIMEOUT);
1675
1676 if (bound) {
1677 for_each_tile(tile, xe, id)
1678 if (vm->pt_root[id])
1679 xe_pt_clear(xe, vm->pt_root[id]);
1680
1681 for_each_gt(gt, xe, id)
1682 xe_tlb_inval_vm(>->tlb_inval, vm);
1683 }
1684 }
1685
1686 if (xe_vm_in_fault_mode(vm))
1687 xe_svm_notifier_unlock(vm);
1688 up_write(&vm->lock);
1689
1690 if (bound)
1691 drm_dev_exit(idx);
1692 }
1693
xe_vm_close_and_put(struct xe_vm * vm)1694 void xe_vm_close_and_put(struct xe_vm *vm)
1695 {
1696 LIST_HEAD(contested);
1697 struct xe_device *xe = vm->xe;
1698 struct xe_tile *tile;
1699 struct xe_vma *vma, *next_vma;
1700 struct drm_gpuva *gpuva, *next;
1701 u8 id;
1702
1703 xe_assert(xe, !vm->preempt.num_exec_queues);
1704
1705 xe_vm_close(vm);
1706 if (xe_vm_in_preempt_fence_mode(vm)) {
1707 mutex_lock(&xe->rebind_resume_lock);
1708 list_del_init(&vm->preempt.pm_activate_link);
1709 mutex_unlock(&xe->rebind_resume_lock);
1710 flush_work(&vm->preempt.rebind_work);
1711 }
1712 if (xe_vm_in_fault_mode(vm))
1713 xe_svm_close(vm);
1714
1715 down_write(&vm->lock);
1716 for_each_tile(tile, xe, id) {
1717 if (vm->q[id])
1718 xe_exec_queue_last_fence_put(vm->q[id], vm);
1719 }
1720 up_write(&vm->lock);
1721
1722 for_each_tile(tile, xe, id) {
1723 if (vm->q[id]) {
1724 xe_exec_queue_kill(vm->q[id]);
1725 xe_exec_queue_put(vm->q[id]);
1726 vm->q[id] = NULL;
1727 }
1728 }
1729
1730 down_write(&vm->lock);
1731 xe_vm_lock(vm, false);
1732 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1733 vma = gpuva_to_vma(gpuva);
1734
1735 if (xe_vma_has_no_bo(vma)) {
1736 xe_svm_notifier_lock(vm);
1737 vma->gpuva.flags |= XE_VMA_DESTROYED;
1738 xe_svm_notifier_unlock(vm);
1739 }
1740
1741 xe_vm_remove_vma(vm, vma);
1742
1743 /* easy case, remove from VMA? */
1744 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1745 list_del_init(&vma->combined_links.rebind);
1746 xe_vma_destroy(vma, NULL);
1747 continue;
1748 }
1749
1750 list_move_tail(&vma->combined_links.destroy, &contested);
1751 vma->gpuva.flags |= XE_VMA_DESTROYED;
1752 }
1753
1754 /*
1755 * All vm operations will add shared fences to resv.
1756 * The only exception is eviction for a shared object,
1757 * but even so, the unbind when evicted would still
1758 * install a fence to resv. Hence it's safe to
1759 * destroy the pagetables immediately.
1760 */
1761 xe_vm_free_scratch(vm);
1762 xe_vm_pt_destroy(vm);
1763 xe_vm_unlock(vm);
1764
1765 /*
1766 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1767 * Since we hold a refcount to the bo, we can remove and free
1768 * the members safely without locking.
1769 */
1770 list_for_each_entry_safe(vma, next_vma, &contested,
1771 combined_links.destroy) {
1772 list_del_init(&vma->combined_links.destroy);
1773 xe_vma_destroy_unlocked(vma);
1774 }
1775
1776 xe_svm_fini(vm);
1777
1778 up_write(&vm->lock);
1779
1780 down_write(&xe->usm.lock);
1781 if (vm->usm.asid) {
1782 void *lookup;
1783
1784 xe_assert(xe, xe->info.has_asid);
1785 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
1786
1787 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1788 xe_assert(xe, lookup == vm);
1789 }
1790 up_write(&xe->usm.lock);
1791
1792 for_each_tile(tile, xe, id)
1793 xe_range_fence_tree_fini(&vm->rftree[id]);
1794
1795 xe_vm_put(vm);
1796 }
1797
vm_destroy_work_func(struct work_struct * w)1798 static void vm_destroy_work_func(struct work_struct *w)
1799 {
1800 struct xe_vm *vm =
1801 container_of(w, struct xe_vm, destroy_work);
1802 struct xe_device *xe = vm->xe;
1803 struct xe_tile *tile;
1804 u8 id;
1805
1806 /* xe_vm_close_and_put was not called? */
1807 xe_assert(xe, !vm->size);
1808
1809 if (xe_vm_in_preempt_fence_mode(vm))
1810 flush_work(&vm->preempt.rebind_work);
1811
1812 mutex_destroy(&vm->snap_mutex);
1813
1814 if (vm->flags & XE_VM_FLAG_LR_MODE)
1815 xe_pm_runtime_put(xe);
1816
1817 for_each_tile(tile, xe, id)
1818 XE_WARN_ON(vm->pt_root[id]);
1819
1820 trace_xe_vm_free(vm);
1821
1822 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1823
1824 if (vm->xef)
1825 xe_file_put(vm->xef);
1826
1827 kfree(vm);
1828 }
1829
xe_vm_free(struct drm_gpuvm * gpuvm)1830 static void xe_vm_free(struct drm_gpuvm *gpuvm)
1831 {
1832 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1833
1834 /* To destroy the VM we need to be able to sleep */
1835 queue_work(system_unbound_wq, &vm->destroy_work);
1836 }
1837
xe_vm_lookup(struct xe_file * xef,u32 id)1838 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1839 {
1840 struct xe_vm *vm;
1841
1842 mutex_lock(&xef->vm.lock);
1843 vm = xa_load(&xef->vm.xa, id);
1844 if (vm)
1845 xe_vm_get(vm);
1846 mutex_unlock(&xef->vm.lock);
1847
1848 return vm;
1849 }
1850
xe_vm_pdp4_descriptor(struct xe_vm * vm,struct xe_tile * tile)1851 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1852 {
1853 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0);
1854 }
1855
1856 static struct xe_exec_queue *
to_wait_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)1857 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
1858 {
1859 return q ? q : vm->q[0];
1860 }
1861
1862 static struct xe_user_fence *
find_ufence_get(struct xe_sync_entry * syncs,u32 num_syncs)1863 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
1864 {
1865 unsigned int i;
1866
1867 for (i = 0; i < num_syncs; i++) {
1868 struct xe_sync_entry *e = &syncs[i];
1869
1870 if (xe_sync_is_ufence(e))
1871 return xe_sync_ufence_get(e);
1872 }
1873
1874 return NULL;
1875 }
1876
1877 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
1878 DRM_XE_VM_CREATE_FLAG_LR_MODE | \
1879 DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1880
xe_vm_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1881 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1882 struct drm_file *file)
1883 {
1884 struct xe_device *xe = to_xe_device(dev);
1885 struct xe_file *xef = to_xe_file(file);
1886 struct drm_xe_vm_create *args = data;
1887 struct xe_vm *vm;
1888 u32 id;
1889 int err;
1890 u32 flags = 0;
1891
1892 if (XE_IOCTL_DBG(xe, args->extensions))
1893 return -EINVAL;
1894
1895 if (XE_GT_WA(xe_root_mmio_gt(xe), 14016763929))
1896 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
1897
1898 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1899 !xe->info.has_usm))
1900 return -EINVAL;
1901
1902 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1903 return -EINVAL;
1904
1905 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1906 return -EINVAL;
1907
1908 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
1909 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1910 !xe->info.needs_scratch))
1911 return -EINVAL;
1912
1913 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
1914 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1915 return -EINVAL;
1916
1917 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
1918 flags |= XE_VM_FLAG_SCRATCH_PAGE;
1919 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
1920 flags |= XE_VM_FLAG_LR_MODE;
1921 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1922 flags |= XE_VM_FLAG_FAULT_MODE;
1923
1924 vm = xe_vm_create(xe, flags, xef);
1925 if (IS_ERR(vm))
1926 return PTR_ERR(vm);
1927
1928 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
1929 /* Warning: Security issue - never enable by default */
1930 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
1931 #endif
1932
1933 /* user id alloc must always be last in ioctl to prevent UAF */
1934 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
1935 if (err)
1936 goto err_close_and_put;
1937
1938 args->vm_id = id;
1939
1940 return 0;
1941
1942 err_close_and_put:
1943 xe_vm_close_and_put(vm);
1944
1945 return err;
1946 }
1947
xe_vm_destroy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1948 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
1949 struct drm_file *file)
1950 {
1951 struct xe_device *xe = to_xe_device(dev);
1952 struct xe_file *xef = to_xe_file(file);
1953 struct drm_xe_vm_destroy *args = data;
1954 struct xe_vm *vm;
1955 int err = 0;
1956
1957 if (XE_IOCTL_DBG(xe, args->pad) ||
1958 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1959 return -EINVAL;
1960
1961 mutex_lock(&xef->vm.lock);
1962 vm = xa_load(&xef->vm.xa, args->vm_id);
1963 if (XE_IOCTL_DBG(xe, !vm))
1964 err = -ENOENT;
1965 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
1966 err = -EBUSY;
1967 else
1968 xa_erase(&xef->vm.xa, args->vm_id);
1969 mutex_unlock(&xef->vm.lock);
1970
1971 if (!err)
1972 xe_vm_close_and_put(vm);
1973
1974 return err;
1975 }
1976
xe_vm_query_vmas(struct xe_vm * vm,u64 start,u64 end)1977 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end)
1978 {
1979 struct drm_gpuva *gpuva;
1980 u32 num_vmas = 0;
1981
1982 lockdep_assert_held(&vm->lock);
1983 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end)
1984 num_vmas++;
1985
1986 return num_vmas;
1987 }
1988
get_mem_attrs(struct xe_vm * vm,u32 * num_vmas,u64 start,u64 end,struct drm_xe_mem_range_attr * attrs)1989 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start,
1990 u64 end, struct drm_xe_mem_range_attr *attrs)
1991 {
1992 struct drm_gpuva *gpuva;
1993 int i = 0;
1994
1995 lockdep_assert_held(&vm->lock);
1996
1997 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
1998 struct xe_vma *vma = gpuva_to_vma(gpuva);
1999
2000 if (i == *num_vmas)
2001 return -ENOSPC;
2002
2003 attrs[i].start = xe_vma_start(vma);
2004 attrs[i].end = xe_vma_end(vma);
2005 attrs[i].atomic.val = vma->attr.atomic_access;
2006 attrs[i].pat_index.val = vma->attr.pat_index;
2007 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd;
2008 attrs[i].preferred_mem_loc.migration_policy =
2009 vma->attr.preferred_loc.migration_policy;
2010
2011 i++;
2012 }
2013
2014 *num_vmas = i;
2015 return 0;
2016 }
2017
xe_vm_query_vmas_attrs_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2018 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2019 {
2020 struct xe_device *xe = to_xe_device(dev);
2021 struct xe_file *xef = to_xe_file(file);
2022 struct drm_xe_mem_range_attr *mem_attrs;
2023 struct drm_xe_vm_query_mem_range_attr *args = data;
2024 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2025 struct xe_vm *vm;
2026 int err = 0;
2027
2028 if (XE_IOCTL_DBG(xe,
2029 ((args->num_mem_ranges == 0 &&
2030 (attrs_user || args->sizeof_mem_range_attr != 0)) ||
2031 (args->num_mem_ranges > 0 &&
2032 (!attrs_user ||
2033 args->sizeof_mem_range_attr !=
2034 sizeof(struct drm_xe_mem_range_attr))))))
2035 return -EINVAL;
2036
2037 vm = xe_vm_lookup(xef, args->vm_id);
2038 if (XE_IOCTL_DBG(xe, !vm))
2039 return -EINVAL;
2040
2041 err = down_read_interruptible(&vm->lock);
2042 if (err)
2043 goto put_vm;
2044
2045 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2046
2047 if (args->num_mem_ranges == 0 && !attrs_user) {
2048 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range);
2049 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr);
2050 goto unlock_vm;
2051 }
2052
2053 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr,
2054 GFP_KERNEL | __GFP_ACCOUNT |
2055 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
2056 if (!mem_attrs) {
2057 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM;
2058 goto unlock_vm;
2059 }
2060
2061 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr);
2062 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start,
2063 args->start + args->range, mem_attrs);
2064 if (err)
2065 goto free_mem_attrs;
2066
2067 err = copy_to_user(attrs_user, mem_attrs,
2068 args->sizeof_mem_range_attr * args->num_mem_ranges);
2069 if (err)
2070 err = -EFAULT;
2071
2072 free_mem_attrs:
2073 kvfree(mem_attrs);
2074 unlock_vm:
2075 up_read(&vm->lock);
2076 put_vm:
2077 xe_vm_put(vm);
2078 return err;
2079 }
2080
vma_matches(struct xe_vma * vma,u64 page_addr)2081 static bool vma_matches(struct xe_vma *vma, u64 page_addr)
2082 {
2083 if (page_addr > xe_vma_end(vma) - 1 ||
2084 page_addr + SZ_4K - 1 < xe_vma_start(vma))
2085 return false;
2086
2087 return true;
2088 }
2089
2090 /**
2091 * xe_vm_find_vma_by_addr() - Find a VMA by its address
2092 *
2093 * @vm: the xe_vm the vma belongs to
2094 * @page_addr: address to look up
2095 */
xe_vm_find_vma_by_addr(struct xe_vm * vm,u64 page_addr)2096 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr)
2097 {
2098 struct xe_vma *vma = NULL;
2099
2100 if (vm->usm.last_fault_vma) { /* Fast lookup */
2101 if (vma_matches(vm->usm.last_fault_vma, page_addr))
2102 vma = vm->usm.last_fault_vma;
2103 }
2104 if (!vma)
2105 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);
2106
2107 return vma;
2108 }
2109
2110 static const u32 region_to_mem_type[] = {
2111 XE_PL_TT,
2112 XE_PL_VRAM0,
2113 XE_PL_VRAM1,
2114 };
2115
prep_vma_destroy(struct xe_vm * vm,struct xe_vma * vma,bool post_commit)2116 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
2117 bool post_commit)
2118 {
2119 xe_svm_notifier_lock(vm);
2120 vma->gpuva.flags |= XE_VMA_DESTROYED;
2121 xe_svm_notifier_unlock(vm);
2122 if (post_commit)
2123 xe_vm_remove_vma(vm, vma);
2124 }
2125
2126 #undef ULL
2127 #define ULL unsigned long long
2128
2129 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2130 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2131 {
2132 struct xe_vma *vma;
2133
2134 switch (op->op) {
2135 case DRM_GPUVA_OP_MAP:
2136 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2137 (ULL)op->map.va.addr, (ULL)op->map.va.range);
2138 break;
2139 case DRM_GPUVA_OP_REMAP:
2140 vma = gpuva_to_vma(op->remap.unmap->va);
2141 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2142 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2143 op->remap.unmap->keep ? 1 : 0);
2144 if (op->remap.prev)
2145 vm_dbg(&xe->drm,
2146 "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2147 (ULL)op->remap.prev->va.addr,
2148 (ULL)op->remap.prev->va.range);
2149 if (op->remap.next)
2150 vm_dbg(&xe->drm,
2151 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2152 (ULL)op->remap.next->va.addr,
2153 (ULL)op->remap.next->va.range);
2154 break;
2155 case DRM_GPUVA_OP_UNMAP:
2156 vma = gpuva_to_vma(op->unmap.va);
2157 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2158 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2159 op->unmap.keep ? 1 : 0);
2160 break;
2161 case DRM_GPUVA_OP_PREFETCH:
2162 vma = gpuva_to_vma(op->prefetch.va);
2163 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2164 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2165 break;
2166 default:
2167 drm_warn(&xe->drm, "NOT POSSIBLE");
2168 }
2169 }
2170 #else
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2171 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2172 {
2173 }
2174 #endif
2175
__xe_vm_needs_clear_scratch_pages(struct xe_vm * vm,u32 bind_flags)2176 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
2177 {
2178 if (!xe_vm_in_fault_mode(vm))
2179 return false;
2180
2181 if (!xe_vm_has_scratch(vm))
2182 return false;
2183
2184 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE)
2185 return false;
2186
2187 return true;
2188 }
2189
xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops * ops)2190 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops)
2191 {
2192 struct drm_gpuva_op *__op;
2193
2194 drm_gpuva_for_each_op(__op, ops) {
2195 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2196
2197 xe_vma_svm_prefetch_op_fini(op);
2198 }
2199 }
2200
2201 /*
2202 * Create operations list from IOCTL arguments, setup operations fields so parse
2203 * and commit steps are decoupled from IOCTL arguments. This step can fail.
2204 */
2205 static struct drm_gpuva_ops *
vm_bind_ioctl_ops_create(struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 addr,u64 range,u32 operation,u32 flags,u32 prefetch_region,u16 pat_index)2206 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
2207 struct xe_bo *bo, u64 bo_offset_or_userptr,
2208 u64 addr, u64 range,
2209 u32 operation, u32 flags,
2210 u32 prefetch_region, u16 pat_index)
2211 {
2212 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2213 struct drm_gpuva_ops *ops;
2214 struct drm_gpuva_op *__op;
2215 struct drm_gpuvm_bo *vm_bo;
2216 u64 range_end = addr + range;
2217 int err;
2218
2219 lockdep_assert_held_write(&vm->lock);
2220
2221 vm_dbg(&vm->xe->drm,
2222 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2223 operation, (ULL)addr, (ULL)range,
2224 (ULL)bo_offset_or_userptr);
2225
2226 switch (operation) {
2227 case DRM_XE_VM_BIND_OP_MAP:
2228 case DRM_XE_VM_BIND_OP_MAP_USERPTR: {
2229 struct drm_gpuvm_map_req map_req = {
2230 .map.va.addr = addr,
2231 .map.va.range = range,
2232 .map.gem.obj = obj,
2233 .map.gem.offset = bo_offset_or_userptr,
2234 };
2235
2236 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req);
2237 break;
2238 }
2239 case DRM_XE_VM_BIND_OP_UNMAP:
2240 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2241 break;
2242 case DRM_XE_VM_BIND_OP_PREFETCH:
2243 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2244 break;
2245 case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2246 xe_assert(vm->xe, bo);
2247
2248 err = xe_bo_lock(bo, true);
2249 if (err)
2250 return ERR_PTR(err);
2251
2252 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
2253 if (IS_ERR(vm_bo)) {
2254 xe_bo_unlock(bo);
2255 return ERR_CAST(vm_bo);
2256 }
2257
2258 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2259 drm_gpuvm_bo_put(vm_bo);
2260 xe_bo_unlock(bo);
2261 break;
2262 default:
2263 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2264 ops = ERR_PTR(-EINVAL);
2265 }
2266 if (IS_ERR(ops))
2267 return ops;
2268
2269 drm_gpuva_for_each_op(__op, ops) {
2270 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2271
2272 if (__op->op == DRM_GPUVA_OP_MAP) {
2273 op->map.immediate =
2274 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2275 op->map.read_only =
2276 flags & DRM_XE_VM_BIND_FLAG_READONLY;
2277 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2278 op->map.is_cpu_addr_mirror = flags &
2279 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
2280 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
2281 op->map.pat_index = pat_index;
2282 op->map.invalidate_on_bind =
2283 __xe_vm_needs_clear_scratch_pages(vm, flags);
2284 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2285 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2286 struct xe_tile *tile;
2287 struct xe_svm_range *svm_range;
2288 struct drm_gpusvm_ctx ctx = {};
2289 struct drm_pagemap *dpagemap;
2290 u8 id, tile_mask = 0;
2291 u32 i;
2292
2293 if (!xe_vma_is_cpu_addr_mirror(vma)) {
2294 op->prefetch.region = prefetch_region;
2295 break;
2296 }
2297
2298 ctx.read_only = xe_vma_read_only(vma);
2299 ctx.devmem_possible = IS_DGFX(vm->xe) &&
2300 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2301
2302 for_each_tile(tile, vm->xe, id)
2303 tile_mask |= 0x1 << id;
2304
2305 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC);
2306 op->prefetch_range.ranges_count = 0;
2307 tile = NULL;
2308
2309 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) {
2310 dpagemap = xe_vma_resolve_pagemap(vma,
2311 xe_device_get_root_tile(vm->xe));
2312 /*
2313 * TODO: Once multigpu support is enabled will need
2314 * something to dereference tile from dpagemap.
2315 */
2316 if (dpagemap)
2317 tile = xe_device_get_root_tile(vm->xe);
2318 } else if (prefetch_region) {
2319 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] -
2320 XE_PL_VRAM0];
2321 }
2322
2323 op->prefetch_range.tile = tile;
2324 alloc_next_range:
2325 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
2326
2327 if (PTR_ERR(svm_range) == -ENOENT) {
2328 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma);
2329
2330 addr = ret == ULONG_MAX ? 0 : ret;
2331 if (addr)
2332 goto alloc_next_range;
2333 else
2334 goto print_op_label;
2335 }
2336
2337 if (IS_ERR(svm_range)) {
2338 err = PTR_ERR(svm_range);
2339 goto unwind_prefetch_ops;
2340 }
2341
2342 if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) {
2343 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID");
2344 goto check_next_range;
2345 }
2346
2347 err = xa_alloc(&op->prefetch_range.range,
2348 &i, svm_range, xa_limit_32b,
2349 GFP_KERNEL);
2350
2351 if (err)
2352 goto unwind_prefetch_ops;
2353
2354 op->prefetch_range.ranges_count++;
2355 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH;
2356 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED");
2357 check_next_range:
2358 if (range_end > xe_svm_range_end(svm_range) &&
2359 xe_svm_range_end(svm_range) < xe_vma_end(vma)) {
2360 addr = xe_svm_range_end(svm_range);
2361 goto alloc_next_range;
2362 }
2363 }
2364 print_op_label:
2365 print_op(vm->xe, __op);
2366 }
2367
2368 return ops;
2369
2370 unwind_prefetch_ops:
2371 xe_svm_prefetch_gpuva_ops_fini(ops);
2372 drm_gpuva_ops_free(&vm->gpuvm, ops);
2373 return ERR_PTR(err);
2374 }
2375
2376 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
2377
new_vma(struct xe_vm * vm,struct drm_gpuva_op_map * op,struct xe_vma_mem_attr * attr,unsigned int flags)2378 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2379 struct xe_vma_mem_attr *attr, unsigned int flags)
2380 {
2381 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2382 struct xe_validation_ctx ctx;
2383 struct drm_exec exec;
2384 struct xe_vma *vma;
2385 int err = 0;
2386
2387 lockdep_assert_held_write(&vm->lock);
2388
2389 if (bo) {
2390 err = 0;
2391 xe_validation_guard(&ctx, &vm->xe->val, &exec,
2392 (struct xe_val_flags) {.interruptible = true}, err) {
2393 if (!bo->vm) {
2394 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2395 drm_exec_retry_on_contention(&exec);
2396 }
2397 if (!err) {
2398 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2399 drm_exec_retry_on_contention(&exec);
2400 }
2401 if (err)
2402 return ERR_PTR(err);
2403
2404 vma = xe_vma_create(vm, bo, op->gem.offset,
2405 op->va.addr, op->va.addr +
2406 op->va.range - 1, attr, flags);
2407 if (IS_ERR(vma))
2408 return vma;
2409
2410 if (!bo->vm) {
2411 err = add_preempt_fences(vm, bo);
2412 if (err) {
2413 prep_vma_destroy(vm, vma, false);
2414 xe_vma_destroy(vma, NULL);
2415 }
2416 }
2417 }
2418 if (err)
2419 return ERR_PTR(err);
2420 } else {
2421 vma = xe_vma_create(vm, NULL, op->gem.offset,
2422 op->va.addr, op->va.addr +
2423 op->va.range - 1, attr, flags);
2424 if (IS_ERR(vma))
2425 return vma;
2426
2427 if (xe_vma_is_userptr(vma))
2428 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2429 }
2430 if (err) {
2431 prep_vma_destroy(vm, vma, false);
2432 xe_vma_destroy_unlocked(vma);
2433 vma = ERR_PTR(err);
2434 }
2435
2436 return vma;
2437 }
2438
xe_vma_max_pte_size(struct xe_vma * vma)2439 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2440 {
2441 if (vma->gpuva.flags & XE_VMA_PTE_1G)
2442 return SZ_1G;
2443 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2444 return SZ_2M;
2445 else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2446 return SZ_64K;
2447 else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2448 return SZ_4K;
2449
2450 return SZ_1G; /* Uninitialized, used max size */
2451 }
2452
xe_vma_set_pte_size(struct xe_vma * vma,u64 size)2453 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2454 {
2455 switch (size) {
2456 case SZ_1G:
2457 vma->gpuva.flags |= XE_VMA_PTE_1G;
2458 break;
2459 case SZ_2M:
2460 vma->gpuva.flags |= XE_VMA_PTE_2M;
2461 break;
2462 case SZ_64K:
2463 vma->gpuva.flags |= XE_VMA_PTE_64K;
2464 break;
2465 case SZ_4K:
2466 vma->gpuva.flags |= XE_VMA_PTE_4K;
2467 break;
2468 }
2469 }
2470
xe_vma_op_commit(struct xe_vm * vm,struct xe_vma_op * op)2471 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2472 {
2473 int err = 0;
2474
2475 lockdep_assert_held_write(&vm->lock);
2476
2477 switch (op->base.op) {
2478 case DRM_GPUVA_OP_MAP:
2479 err |= xe_vm_insert_vma(vm, op->map.vma);
2480 if (!err)
2481 op->flags |= XE_VMA_OP_COMMITTED;
2482 break;
2483 case DRM_GPUVA_OP_REMAP:
2484 {
2485 u8 tile_present =
2486 gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2487
2488 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2489 true);
2490 op->flags |= XE_VMA_OP_COMMITTED;
2491
2492 if (op->remap.prev) {
2493 err |= xe_vm_insert_vma(vm, op->remap.prev);
2494 if (!err)
2495 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2496 if (!err && op->remap.skip_prev) {
2497 op->remap.prev->tile_present =
2498 tile_present;
2499 op->remap.prev = NULL;
2500 }
2501 }
2502 if (op->remap.next) {
2503 err |= xe_vm_insert_vma(vm, op->remap.next);
2504 if (!err)
2505 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2506 if (!err && op->remap.skip_next) {
2507 op->remap.next->tile_present =
2508 tile_present;
2509 op->remap.next = NULL;
2510 }
2511 }
2512
2513 /* Adjust for partial unbind after removing VMA from VM */
2514 if (!err) {
2515 op->base.remap.unmap->va->va.addr = op->remap.start;
2516 op->base.remap.unmap->va->va.range = op->remap.range;
2517 }
2518 break;
2519 }
2520 case DRM_GPUVA_OP_UNMAP:
2521 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2522 op->flags |= XE_VMA_OP_COMMITTED;
2523 break;
2524 case DRM_GPUVA_OP_PREFETCH:
2525 op->flags |= XE_VMA_OP_COMMITTED;
2526 break;
2527 default:
2528 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2529 }
2530
2531 return err;
2532 }
2533
2534 /**
2535 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes
2536 * @vma: Pointer to the xe_vma structure to check
2537 *
2538 * This function determines whether the given VMA (Virtual Memory Area)
2539 * has its memory attributes set to their default values. Specifically,
2540 * it checks the following conditions:
2541 *
2542 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED`
2543 * - `pat_index` is equal to `default_pat_index`
2544 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE`
2545 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES`
2546 *
2547 * Return: true if all attributes are at their default values, false otherwise.
2548 */
xe_vma_has_default_mem_attrs(struct xe_vma * vma)2549 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma)
2550 {
2551 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED &&
2552 vma->attr.pat_index == vma->attr.default_pat_index &&
2553 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE &&
2554 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES);
2555 }
2556
vm_bind_ioctl_ops_parse(struct xe_vm * vm,struct drm_gpuva_ops * ops,struct xe_vma_ops * vops)2557 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
2558 struct xe_vma_ops *vops)
2559 {
2560 struct xe_device *xe = vm->xe;
2561 struct drm_gpuva_op *__op;
2562 struct xe_tile *tile;
2563 u8 id, tile_mask = 0;
2564 int err = 0;
2565
2566 lockdep_assert_held_write(&vm->lock);
2567
2568 for_each_tile(tile, vm->xe, id)
2569 tile_mask |= 0x1 << id;
2570
2571 drm_gpuva_for_each_op(__op, ops) {
2572 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2573 struct xe_vma *vma;
2574 unsigned int flags = 0;
2575
2576 INIT_LIST_HEAD(&op->link);
2577 list_add_tail(&op->link, &vops->list);
2578 op->tile_mask = tile_mask;
2579
2580 switch (op->base.op) {
2581 case DRM_GPUVA_OP_MAP:
2582 {
2583 struct xe_vma_mem_attr default_attr = {
2584 .preferred_loc = {
2585 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE,
2586 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES,
2587 },
2588 .atomic_access = DRM_XE_ATOMIC_UNDEFINED,
2589 .default_pat_index = op->map.pat_index,
2590 .pat_index = op->map.pat_index,
2591 };
2592
2593 flags |= op->map.read_only ?
2594 VMA_CREATE_FLAG_READ_ONLY : 0;
2595 flags |= op->map.is_null ?
2596 VMA_CREATE_FLAG_IS_NULL : 0;
2597 flags |= op->map.dumpable ?
2598 VMA_CREATE_FLAG_DUMPABLE : 0;
2599 flags |= op->map.is_cpu_addr_mirror ?
2600 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0;
2601
2602 vma = new_vma(vm, &op->base.map, &default_attr,
2603 flags);
2604 if (IS_ERR(vma))
2605 return PTR_ERR(vma);
2606
2607 op->map.vma = vma;
2608 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) &&
2609 !op->map.is_cpu_addr_mirror) ||
2610 op->map.invalidate_on_bind)
2611 xe_vma_ops_incr_pt_update_ops(vops,
2612 op->tile_mask, 1);
2613 break;
2614 }
2615 case DRM_GPUVA_OP_REMAP:
2616 {
2617 struct xe_vma *old =
2618 gpuva_to_vma(op->base.remap.unmap->va);
2619 bool skip = xe_vma_is_cpu_addr_mirror(old);
2620 u64 start = xe_vma_start(old), end = xe_vma_end(old);
2621 int num_remap_ops = 0;
2622
2623 if (op->base.remap.prev)
2624 start = op->base.remap.prev->va.addr +
2625 op->base.remap.prev->va.range;
2626 if (op->base.remap.next)
2627 end = op->base.remap.next->va.addr;
2628
2629 if (xe_vma_is_cpu_addr_mirror(old) &&
2630 xe_svm_has_mapping(vm, start, end)) {
2631 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE)
2632 xe_svm_unmap_address_range(vm, start, end);
2633 else
2634 return -EBUSY;
2635 }
2636
2637 op->remap.start = xe_vma_start(old);
2638 op->remap.range = xe_vma_size(old);
2639
2640 flags |= op->base.remap.unmap->va->flags &
2641 XE_VMA_READ_ONLY ?
2642 VMA_CREATE_FLAG_READ_ONLY : 0;
2643 flags |= op->base.remap.unmap->va->flags &
2644 DRM_GPUVA_SPARSE ?
2645 VMA_CREATE_FLAG_IS_NULL : 0;
2646 flags |= op->base.remap.unmap->va->flags &
2647 XE_VMA_DUMPABLE ?
2648 VMA_CREATE_FLAG_DUMPABLE : 0;
2649 flags |= xe_vma_is_cpu_addr_mirror(old) ?
2650 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0;
2651
2652 if (op->base.remap.prev) {
2653 vma = new_vma(vm, op->base.remap.prev,
2654 &old->attr, flags);
2655 if (IS_ERR(vma))
2656 return PTR_ERR(vma);
2657
2658 op->remap.prev = vma;
2659
2660 /*
2661 * Userptr creates a new SG mapping so
2662 * we must also rebind.
2663 */
2664 op->remap.skip_prev = skip ||
2665 (!xe_vma_is_userptr(old) &&
2666 IS_ALIGNED(xe_vma_end(vma),
2667 xe_vma_max_pte_size(old)));
2668 if (op->remap.skip_prev) {
2669 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2670 op->remap.range -=
2671 xe_vma_end(vma) -
2672 xe_vma_start(old);
2673 op->remap.start = xe_vma_end(vma);
2674 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2675 (ULL)op->remap.start,
2676 (ULL)op->remap.range);
2677 } else {
2678 num_remap_ops++;
2679 }
2680 }
2681
2682 if (op->base.remap.next) {
2683 vma = new_vma(vm, op->base.remap.next,
2684 &old->attr, flags);
2685 if (IS_ERR(vma))
2686 return PTR_ERR(vma);
2687
2688 op->remap.next = vma;
2689
2690 /*
2691 * Userptr creates a new SG mapping so
2692 * we must also rebind.
2693 */
2694 op->remap.skip_next = skip ||
2695 (!xe_vma_is_userptr(old) &&
2696 IS_ALIGNED(xe_vma_start(vma),
2697 xe_vma_max_pte_size(old)));
2698 if (op->remap.skip_next) {
2699 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2700 op->remap.range -=
2701 xe_vma_end(old) -
2702 xe_vma_start(vma);
2703 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2704 (ULL)op->remap.start,
2705 (ULL)op->remap.range);
2706 } else {
2707 num_remap_ops++;
2708 }
2709 }
2710 if (!skip)
2711 num_remap_ops++;
2712
2713 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops);
2714 break;
2715 }
2716 case DRM_GPUVA_OP_UNMAP:
2717 vma = gpuva_to_vma(op->base.unmap.va);
2718
2719 if (xe_vma_is_cpu_addr_mirror(vma) &&
2720 xe_svm_has_mapping(vm, xe_vma_start(vma),
2721 xe_vma_end(vma)))
2722 return -EBUSY;
2723
2724 if (!xe_vma_is_cpu_addr_mirror(vma))
2725 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2726 break;
2727 case DRM_GPUVA_OP_PREFETCH:
2728 vma = gpuva_to_vma(op->base.prefetch.va);
2729
2730 if (xe_vma_is_userptr(vma)) {
2731 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2732 if (err)
2733 return err;
2734 }
2735
2736 if (xe_vma_is_cpu_addr_mirror(vma))
2737 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask,
2738 op->prefetch_range.ranges_count);
2739 else
2740 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2741
2742 break;
2743 default:
2744 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2745 }
2746
2747 err = xe_vma_op_commit(vm, op);
2748 if (err)
2749 return err;
2750 }
2751
2752 return 0;
2753 }
2754
xe_vma_op_unwind(struct xe_vm * vm,struct xe_vma_op * op,bool post_commit,bool prev_post_commit,bool next_post_commit)2755 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2756 bool post_commit, bool prev_post_commit,
2757 bool next_post_commit)
2758 {
2759 lockdep_assert_held_write(&vm->lock);
2760
2761 switch (op->base.op) {
2762 case DRM_GPUVA_OP_MAP:
2763 if (op->map.vma) {
2764 prep_vma_destroy(vm, op->map.vma, post_commit);
2765 xe_vma_destroy_unlocked(op->map.vma);
2766 }
2767 break;
2768 case DRM_GPUVA_OP_UNMAP:
2769 {
2770 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2771
2772 if (vma) {
2773 xe_svm_notifier_lock(vm);
2774 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2775 xe_svm_notifier_unlock(vm);
2776 if (post_commit)
2777 xe_vm_insert_vma(vm, vma);
2778 }
2779 break;
2780 }
2781 case DRM_GPUVA_OP_REMAP:
2782 {
2783 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2784
2785 if (op->remap.prev) {
2786 prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2787 xe_vma_destroy_unlocked(op->remap.prev);
2788 }
2789 if (op->remap.next) {
2790 prep_vma_destroy(vm, op->remap.next, next_post_commit);
2791 xe_vma_destroy_unlocked(op->remap.next);
2792 }
2793 if (vma) {
2794 xe_svm_notifier_lock(vm);
2795 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2796 xe_svm_notifier_unlock(vm);
2797 if (post_commit)
2798 xe_vm_insert_vma(vm, vma);
2799 }
2800 break;
2801 }
2802 case DRM_GPUVA_OP_PREFETCH:
2803 /* Nothing to do */
2804 break;
2805 default:
2806 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2807 }
2808 }
2809
vm_bind_ioctl_ops_unwind(struct xe_vm * vm,struct drm_gpuva_ops ** ops,int num_ops_list)2810 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2811 struct drm_gpuva_ops **ops,
2812 int num_ops_list)
2813 {
2814 int i;
2815
2816 for (i = num_ops_list - 1; i >= 0; --i) {
2817 struct drm_gpuva_ops *__ops = ops[i];
2818 struct drm_gpuva_op *__op;
2819
2820 if (!__ops)
2821 continue;
2822
2823 drm_gpuva_for_each_op_reverse(__op, __ops) {
2824 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2825
2826 xe_vma_op_unwind(vm, op,
2827 op->flags & XE_VMA_OP_COMMITTED,
2828 op->flags & XE_VMA_OP_PREV_COMMITTED,
2829 op->flags & XE_VMA_OP_NEXT_COMMITTED);
2830 }
2831 }
2832 }
2833
vma_lock_and_validate(struct drm_exec * exec,struct xe_vma * vma,bool validate)2834 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
2835 bool validate)
2836 {
2837 struct xe_bo *bo = xe_vma_bo(vma);
2838 struct xe_vm *vm = xe_vma_vm(vma);
2839 int err = 0;
2840
2841 if (bo) {
2842 if (!bo->vm)
2843 err = drm_exec_lock_obj(exec, &bo->ttm.base);
2844 if (!err && validate)
2845 err = xe_bo_validate(bo, vm,
2846 !xe_vm_in_preempt_fence_mode(vm), exec);
2847 }
2848
2849 return err;
2850 }
2851
check_ufence(struct xe_vma * vma)2852 static int check_ufence(struct xe_vma *vma)
2853 {
2854 if (vma->ufence) {
2855 struct xe_user_fence * const f = vma->ufence;
2856
2857 if (!xe_sync_ufence_get_status(f))
2858 return -EBUSY;
2859
2860 vma->ufence = NULL;
2861 xe_sync_ufence_put(f);
2862 }
2863
2864 return 0;
2865 }
2866
prefetch_ranges(struct xe_vm * vm,struct xe_vma_op * op)2867 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
2868 {
2869 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2870 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2871 struct xe_tile *tile = op->prefetch_range.tile;
2872 int err = 0;
2873
2874 struct xe_svm_range *svm_range;
2875 struct drm_gpusvm_ctx ctx = {};
2876 unsigned long i;
2877
2878 if (!xe_vma_is_cpu_addr_mirror(vma))
2879 return 0;
2880
2881 ctx.read_only = xe_vma_read_only(vma);
2882 ctx.devmem_possible = devmem_possible;
2883 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
2884
2885 /* TODO: Threading the migration */
2886 xa_for_each(&op->prefetch_range.range, i, svm_range) {
2887 if (!tile)
2888 xe_svm_range_migrate_to_smem(vm, svm_range);
2889
2890 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) {
2891 err = xe_svm_alloc_vram(tile, svm_range, &ctx);
2892 if (err) {
2893 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
2894 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2895 return -ENODATA;
2896 }
2897 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM");
2898 }
2899
2900 err = xe_svm_range_get_pages(vm, svm_range, &ctx);
2901 if (err) {
2902 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
2903 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2904 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
2905 err = -ENODATA;
2906 return err;
2907 }
2908 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE");
2909 }
2910
2911 return err;
2912 }
2913
op_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_op * op)2914 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
2915 struct xe_vma_op *op)
2916 {
2917 int err = 0;
2918
2919 switch (op->base.op) {
2920 case DRM_GPUVA_OP_MAP:
2921 if (!op->map.invalidate_on_bind)
2922 err = vma_lock_and_validate(exec, op->map.vma,
2923 !xe_vm_in_fault_mode(vm) ||
2924 op->map.immediate);
2925 break;
2926 case DRM_GPUVA_OP_REMAP:
2927 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
2928 if (err)
2929 break;
2930
2931 err = vma_lock_and_validate(exec,
2932 gpuva_to_vma(op->base.remap.unmap->va),
2933 false);
2934 if (!err && op->remap.prev)
2935 err = vma_lock_and_validate(exec, op->remap.prev, true);
2936 if (!err && op->remap.next)
2937 err = vma_lock_and_validate(exec, op->remap.next, true);
2938 break;
2939 case DRM_GPUVA_OP_UNMAP:
2940 err = check_ufence(gpuva_to_vma(op->base.unmap.va));
2941 if (err)
2942 break;
2943
2944 err = vma_lock_and_validate(exec,
2945 gpuva_to_vma(op->base.unmap.va),
2946 false);
2947 break;
2948 case DRM_GPUVA_OP_PREFETCH:
2949 {
2950 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2951 u32 region;
2952
2953 if (!xe_vma_is_cpu_addr_mirror(vma)) {
2954 region = op->prefetch.region;
2955 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC ||
2956 region <= ARRAY_SIZE(region_to_mem_type));
2957 }
2958
2959 err = vma_lock_and_validate(exec,
2960 gpuva_to_vma(op->base.prefetch.va),
2961 false);
2962 if (!err && !xe_vma_has_no_bo(vma))
2963 err = xe_bo_migrate(xe_vma_bo(vma),
2964 region_to_mem_type[region],
2965 NULL,
2966 exec);
2967 break;
2968 }
2969 default:
2970 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2971 }
2972
2973 return err;
2974 }
2975
vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm * vm,struct xe_vma_ops * vops)2976 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops)
2977 {
2978 struct xe_vma_op *op;
2979 int err;
2980
2981 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
2982 return 0;
2983
2984 list_for_each_entry(op, &vops->list, link) {
2985 if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
2986 err = prefetch_ranges(vm, op);
2987 if (err)
2988 return err;
2989 }
2990 }
2991
2992 return 0;
2993 }
2994
vm_bind_ioctl_ops_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops)2995 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
2996 struct xe_vm *vm,
2997 struct xe_vma_ops *vops)
2998 {
2999 struct xe_vma_op *op;
3000 int err;
3001
3002 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
3003 if (err)
3004 return err;
3005
3006 list_for_each_entry(op, &vops->list, link) {
3007 err = op_lock_and_prep(exec, vm, op);
3008 if (err)
3009 return err;
3010 }
3011
3012 #ifdef TEST_VM_OPS_ERROR
3013 if (vops->inject_error &&
3014 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
3015 return -ENOSPC;
3016 #endif
3017
3018 return 0;
3019 }
3020
op_trace(struct xe_vma_op * op)3021 static void op_trace(struct xe_vma_op *op)
3022 {
3023 switch (op->base.op) {
3024 case DRM_GPUVA_OP_MAP:
3025 trace_xe_vma_bind(op->map.vma);
3026 break;
3027 case DRM_GPUVA_OP_REMAP:
3028 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
3029 if (op->remap.prev)
3030 trace_xe_vma_bind(op->remap.prev);
3031 if (op->remap.next)
3032 trace_xe_vma_bind(op->remap.next);
3033 break;
3034 case DRM_GPUVA_OP_UNMAP:
3035 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
3036 break;
3037 case DRM_GPUVA_OP_PREFETCH:
3038 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
3039 break;
3040 case DRM_GPUVA_OP_DRIVER:
3041 break;
3042 default:
3043 XE_WARN_ON("NOT POSSIBLE");
3044 }
3045 }
3046
trace_xe_vm_ops_execute(struct xe_vma_ops * vops)3047 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
3048 {
3049 struct xe_vma_op *op;
3050
3051 list_for_each_entry(op, &vops->list, link)
3052 op_trace(op);
3053 }
3054
vm_ops_setup_tile_args(struct xe_vm * vm,struct xe_vma_ops * vops)3055 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
3056 {
3057 struct xe_exec_queue *q = vops->q;
3058 struct xe_tile *tile;
3059 int number_tiles = 0;
3060 u8 id;
3061
3062 for_each_tile(tile, vm->xe, id) {
3063 if (vops->pt_update_ops[id].num_ops)
3064 ++number_tiles;
3065
3066 if (vops->pt_update_ops[id].q)
3067 continue;
3068
3069 if (q) {
3070 vops->pt_update_ops[id].q = q;
3071 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
3072 q = list_next_entry(q, multi_gt_list);
3073 } else {
3074 vops->pt_update_ops[id].q = vm->q[id];
3075 }
3076 }
3077
3078 return number_tiles;
3079 }
3080
ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3081 static struct dma_fence *ops_execute(struct xe_vm *vm,
3082 struct xe_vma_ops *vops)
3083 {
3084 struct xe_tile *tile;
3085 struct dma_fence *fence = NULL;
3086 struct dma_fence **fences = NULL;
3087 struct dma_fence_array *cf = NULL;
3088 int number_tiles = 0, current_fence = 0, err;
3089 u8 id;
3090
3091 number_tiles = vm_ops_setup_tile_args(vm, vops);
3092 if (number_tiles == 0)
3093 return ERR_PTR(-ENODATA);
3094
3095 if (number_tiles > 1) {
3096 fences = kmalloc_array(number_tiles, sizeof(*fences),
3097 GFP_KERNEL);
3098 if (!fences) {
3099 fence = ERR_PTR(-ENOMEM);
3100 goto err_trace;
3101 }
3102 }
3103
3104 for_each_tile(tile, vm->xe, id) {
3105 if (!vops->pt_update_ops[id].num_ops)
3106 continue;
3107
3108 err = xe_pt_update_ops_prepare(tile, vops);
3109 if (err) {
3110 fence = ERR_PTR(err);
3111 goto err_out;
3112 }
3113 }
3114
3115 trace_xe_vm_ops_execute(vops);
3116
3117 for_each_tile(tile, vm->xe, id) {
3118 if (!vops->pt_update_ops[id].num_ops)
3119 continue;
3120
3121 fence = xe_pt_update_ops_run(tile, vops);
3122 if (IS_ERR(fence))
3123 goto err_out;
3124
3125 if (fences)
3126 fences[current_fence++] = fence;
3127 }
3128
3129 if (fences) {
3130 cf = dma_fence_array_create(number_tiles, fences,
3131 vm->composite_fence_ctx,
3132 vm->composite_fence_seqno++,
3133 false);
3134 if (!cf) {
3135 --vm->composite_fence_seqno;
3136 fence = ERR_PTR(-ENOMEM);
3137 goto err_out;
3138 }
3139 fence = &cf->base;
3140 }
3141
3142 for_each_tile(tile, vm->xe, id) {
3143 if (!vops->pt_update_ops[id].num_ops)
3144 continue;
3145
3146 xe_pt_update_ops_fini(tile, vops);
3147 }
3148
3149 return fence;
3150
3151 err_out:
3152 for_each_tile(tile, vm->xe, id) {
3153 if (!vops->pt_update_ops[id].num_ops)
3154 continue;
3155
3156 xe_pt_update_ops_abort(tile, vops);
3157 }
3158 while (current_fence)
3159 dma_fence_put(fences[--current_fence]);
3160 kfree(fences);
3161 kfree(cf);
3162
3163 err_trace:
3164 trace_xe_vm_ops_fail(vm);
3165 return fence;
3166 }
3167
vma_add_ufence(struct xe_vma * vma,struct xe_user_fence * ufence)3168 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
3169 {
3170 if (vma->ufence)
3171 xe_sync_ufence_put(vma->ufence);
3172 vma->ufence = __xe_sync_ufence_get(ufence);
3173 }
3174
op_add_ufence(struct xe_vm * vm,struct xe_vma_op * op,struct xe_user_fence * ufence)3175 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
3176 struct xe_user_fence *ufence)
3177 {
3178 switch (op->base.op) {
3179 case DRM_GPUVA_OP_MAP:
3180 vma_add_ufence(op->map.vma, ufence);
3181 break;
3182 case DRM_GPUVA_OP_REMAP:
3183 if (op->remap.prev)
3184 vma_add_ufence(op->remap.prev, ufence);
3185 if (op->remap.next)
3186 vma_add_ufence(op->remap.next, ufence);
3187 break;
3188 case DRM_GPUVA_OP_UNMAP:
3189 break;
3190 case DRM_GPUVA_OP_PREFETCH:
3191 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
3192 break;
3193 default:
3194 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
3195 }
3196 }
3197
vm_bind_ioctl_ops_fini(struct xe_vm * vm,struct xe_vma_ops * vops,struct dma_fence * fence)3198 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
3199 struct dma_fence *fence)
3200 {
3201 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
3202 struct xe_user_fence *ufence;
3203 struct xe_vma_op *op;
3204 int i;
3205
3206 ufence = find_ufence_get(vops->syncs, vops->num_syncs);
3207 list_for_each_entry(op, &vops->list, link) {
3208 if (ufence)
3209 op_add_ufence(vm, op, ufence);
3210
3211 if (op->base.op == DRM_GPUVA_OP_UNMAP)
3212 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
3213 else if (op->base.op == DRM_GPUVA_OP_REMAP)
3214 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
3215 fence);
3216 }
3217 if (ufence)
3218 xe_sync_ufence_put(ufence);
3219 if (fence) {
3220 for (i = 0; i < vops->num_syncs; i++)
3221 xe_sync_entry_signal(vops->syncs + i, fence);
3222 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
3223 }
3224 }
3225
vm_bind_ioctl_ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3226 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
3227 struct xe_vma_ops *vops)
3228 {
3229 struct xe_validation_ctx ctx;
3230 struct drm_exec exec;
3231 struct dma_fence *fence;
3232 int err = 0;
3233
3234 lockdep_assert_held_write(&vm->lock);
3235
3236 xe_validation_guard(&ctx, &vm->xe->val, &exec,
3237 ((struct xe_val_flags) {
3238 .interruptible = true,
3239 .exec_ignore_duplicates = true,
3240 }), err) {
3241 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
3242 drm_exec_retry_on_contention(&exec);
3243 xe_validation_retry_on_oom(&ctx, &err);
3244 if (err)
3245 return ERR_PTR(err);
3246
3247 xe_vm_set_validation_exec(vm, &exec);
3248 fence = ops_execute(vm, vops);
3249 xe_vm_set_validation_exec(vm, NULL);
3250 if (IS_ERR(fence)) {
3251 if (PTR_ERR(fence) == -ENODATA)
3252 vm_bind_ioctl_ops_fini(vm, vops, NULL);
3253 return fence;
3254 }
3255
3256 vm_bind_ioctl_ops_fini(vm, vops, fence);
3257 }
3258
3259 return err ? ERR_PTR(err) : fence;
3260 }
3261 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
3262
3263 #define SUPPORTED_FLAGS_STUB \
3264 (DRM_XE_VM_BIND_FLAG_READONLY | \
3265 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
3266 DRM_XE_VM_BIND_FLAG_NULL | \
3267 DRM_XE_VM_BIND_FLAG_DUMPABLE | \
3268 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \
3269 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR)
3270
3271 #ifdef TEST_VM_OPS_ERROR
3272 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
3273 #else
3274 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB
3275 #endif
3276
3277 #define XE_64K_PAGE_MASK 0xffffull
3278 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
3279
vm_bind_ioctl_check_args(struct xe_device * xe,struct xe_vm * vm,struct drm_xe_vm_bind * args,struct drm_xe_vm_bind_op ** bind_ops)3280 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
3281 struct drm_xe_vm_bind *args,
3282 struct drm_xe_vm_bind_op **bind_ops)
3283 {
3284 int err;
3285 int i;
3286
3287 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
3288 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3289 return -EINVAL;
3290
3291 if (XE_IOCTL_DBG(xe, args->extensions))
3292 return -EINVAL;
3293
3294 if (args->num_binds > 1) {
3295 u64 __user *bind_user =
3296 u64_to_user_ptr(args->vector_of_binds);
3297
3298 *bind_ops = kvmalloc_array(args->num_binds,
3299 sizeof(struct drm_xe_vm_bind_op),
3300 GFP_KERNEL | __GFP_ACCOUNT |
3301 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3302 if (!*bind_ops)
3303 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
3304
3305 err = copy_from_user(*bind_ops, bind_user,
3306 sizeof(struct drm_xe_vm_bind_op) *
3307 args->num_binds);
3308 if (XE_IOCTL_DBG(xe, err)) {
3309 err = -EFAULT;
3310 goto free_bind_ops;
3311 }
3312 } else {
3313 *bind_ops = &args->bind;
3314 }
3315
3316 for (i = 0; i < args->num_binds; ++i) {
3317 u64 range = (*bind_ops)[i].range;
3318 u64 addr = (*bind_ops)[i].addr;
3319 u32 op = (*bind_ops)[i].op;
3320 u32 flags = (*bind_ops)[i].flags;
3321 u32 obj = (*bind_ops)[i].obj;
3322 u64 obj_offset = (*bind_ops)[i].obj_offset;
3323 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
3324 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
3325 bool is_cpu_addr_mirror = flags &
3326 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
3327 u16 pat_index = (*bind_ops)[i].pat_index;
3328 u16 coh_mode;
3329
3330 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror &&
3331 (!xe_vm_in_fault_mode(vm) ||
3332 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) {
3333 err = -EINVAL;
3334 goto free_bind_ops;
3335 }
3336
3337 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
3338 err = -EINVAL;
3339 goto free_bind_ops;
3340 }
3341
3342 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
3343 (*bind_ops)[i].pat_index = pat_index;
3344 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3345 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
3346 err = -EINVAL;
3347 goto free_bind_ops;
3348 }
3349
3350 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
3351 err = -EINVAL;
3352 goto free_bind_ops;
3353 }
3354
3355 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
3356 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
3357 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) ||
3358 XE_IOCTL_DBG(xe, obj_offset && (is_null ||
3359 is_cpu_addr_mirror)) ||
3360 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
3361 (is_null || is_cpu_addr_mirror)) ||
3362 XE_IOCTL_DBG(xe, !obj &&
3363 op == DRM_XE_VM_BIND_OP_MAP &&
3364 !is_null && !is_cpu_addr_mirror) ||
3365 XE_IOCTL_DBG(xe, !obj &&
3366 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3367 XE_IOCTL_DBG(xe, addr &&
3368 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3369 XE_IOCTL_DBG(xe, range &&
3370 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3371 XE_IOCTL_DBG(xe, obj &&
3372 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3373 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3374 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3375 XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR &&
3376 !IS_ENABLED(CONFIG_DRM_GPUSVM)) ||
3377 XE_IOCTL_DBG(xe, obj &&
3378 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
3379 XE_IOCTL_DBG(xe, prefetch_region &&
3380 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
3381 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC &&
3382 !(BIT(prefetch_region) & xe->info.mem_region_mask))) ||
3383 XE_IOCTL_DBG(xe, obj &&
3384 op == DRM_XE_VM_BIND_OP_UNMAP)) {
3385 err = -EINVAL;
3386 goto free_bind_ops;
3387 }
3388
3389 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
3390 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
3391 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
3392 XE_IOCTL_DBG(xe, !range &&
3393 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
3394 err = -EINVAL;
3395 goto free_bind_ops;
3396 }
3397 }
3398
3399 return 0;
3400
3401 free_bind_ops:
3402 if (args->num_binds > 1)
3403 kvfree(*bind_ops);
3404 *bind_ops = NULL;
3405 return err;
3406 }
3407
vm_bind_ioctl_signal_fences(struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,int num_syncs)3408 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
3409 struct xe_exec_queue *q,
3410 struct xe_sync_entry *syncs,
3411 int num_syncs)
3412 {
3413 struct dma_fence *fence;
3414 int i, err = 0;
3415
3416 fence = xe_sync_in_fence_get(syncs, num_syncs,
3417 to_wait_exec_queue(vm, q), vm);
3418 if (IS_ERR(fence))
3419 return PTR_ERR(fence);
3420
3421 for (i = 0; i < num_syncs; i++)
3422 xe_sync_entry_signal(&syncs[i], fence);
3423
3424 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
3425 fence);
3426 dma_fence_put(fence);
3427
3428 return err;
3429 }
3430
xe_vma_ops_init(struct xe_vma_ops * vops,struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,u32 num_syncs)3431 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
3432 struct xe_exec_queue *q,
3433 struct xe_sync_entry *syncs, u32 num_syncs)
3434 {
3435 memset(vops, 0, sizeof(*vops));
3436 INIT_LIST_HEAD(&vops->list);
3437 vops->vm = vm;
3438 vops->q = q;
3439 vops->syncs = syncs;
3440 vops->num_syncs = num_syncs;
3441 vops->flags = 0;
3442 }
3443
xe_vm_bind_ioctl_validate_bo(struct xe_device * xe,struct xe_bo * bo,u64 addr,u64 range,u64 obj_offset,u16 pat_index,u32 op,u32 bind_flags)3444 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
3445 u64 addr, u64 range, u64 obj_offset,
3446 u16 pat_index, u32 op, u32 bind_flags)
3447 {
3448 u16 coh_mode;
3449
3450 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) ||
3451 XE_IOCTL_DBG(xe, obj_offset >
3452 xe_bo_size(bo) - range)) {
3453 return -EINVAL;
3454 }
3455
3456 /*
3457 * Some platforms require 64k VM_BIND alignment,
3458 * specifically those with XE_VRAM_FLAGS_NEED64K.
3459 *
3460 * Other platforms may have BO's set to 64k physical placement,
3461 * but can be mapped at 4k offsets anyway. This check is only
3462 * there for the former case.
3463 */
3464 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
3465 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
3466 if (XE_IOCTL_DBG(xe, obj_offset &
3467 XE_64K_PAGE_MASK) ||
3468 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
3469 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3470 return -EINVAL;
3471 }
3472 }
3473
3474 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3475 if (bo->cpu_caching) {
3476 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3477 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
3478 return -EINVAL;
3479 }
3480 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
3481 /*
3482 * Imported dma-buf from a different device should
3483 * require 1way or 2way coherency since we don't know
3484 * how it was mapped on the CPU. Just assume is it
3485 * potentially cached on CPU side.
3486 */
3487 return -EINVAL;
3488 }
3489
3490 /* If a BO is protected it can only be mapped if the key is still valid */
3491 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) &&
3492 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL)
3493 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0))
3494 return -ENOEXEC;
3495
3496 return 0;
3497 }
3498
xe_vm_bind_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3499 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3500 {
3501 struct xe_device *xe = to_xe_device(dev);
3502 struct xe_file *xef = to_xe_file(file);
3503 struct drm_xe_vm_bind *args = data;
3504 struct drm_xe_sync __user *syncs_user;
3505 struct xe_bo **bos = NULL;
3506 struct drm_gpuva_ops **ops = NULL;
3507 struct xe_vm *vm;
3508 struct xe_exec_queue *q = NULL;
3509 u32 num_syncs, num_ufence = 0;
3510 struct xe_sync_entry *syncs = NULL;
3511 struct drm_xe_vm_bind_op *bind_ops = NULL;
3512 struct xe_vma_ops vops;
3513 struct dma_fence *fence;
3514 int err;
3515 int i;
3516
3517 vm = xe_vm_lookup(xef, args->vm_id);
3518 if (XE_IOCTL_DBG(xe, !vm))
3519 return -EINVAL;
3520
3521 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops);
3522 if (err)
3523 goto put_vm;
3524
3525 if (args->exec_queue_id) {
3526 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
3527 if (XE_IOCTL_DBG(xe, !q)) {
3528 err = -ENOENT;
3529 goto free_bind_ops;
3530 }
3531
3532 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3533 err = -EINVAL;
3534 goto put_exec_queue;
3535 }
3536 }
3537
3538 /* Ensure all UNMAPs visible */
3539 xe_svm_flush(vm);
3540
3541 err = down_write_killable(&vm->lock);
3542 if (err)
3543 goto put_exec_queue;
3544
3545 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3546 err = -ENOENT;
3547 goto release_vm_lock;
3548 }
3549
3550 for (i = 0; i < args->num_binds; ++i) {
3551 u64 range = bind_ops[i].range;
3552 u64 addr = bind_ops[i].addr;
3553
3554 if (XE_IOCTL_DBG(xe, range > vm->size) ||
3555 XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3556 err = -EINVAL;
3557 goto release_vm_lock;
3558 }
3559 }
3560
3561 if (args->num_binds) {
3562 bos = kvcalloc(args->num_binds, sizeof(*bos),
3563 GFP_KERNEL | __GFP_ACCOUNT |
3564 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3565 if (!bos) {
3566 err = -ENOMEM;
3567 goto release_vm_lock;
3568 }
3569
3570 ops = kvcalloc(args->num_binds, sizeof(*ops),
3571 GFP_KERNEL | __GFP_ACCOUNT |
3572 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3573 if (!ops) {
3574 err = -ENOMEM;
3575 goto free_bos;
3576 }
3577 }
3578
3579 for (i = 0; i < args->num_binds; ++i) {
3580 struct drm_gem_object *gem_obj;
3581 u64 range = bind_ops[i].range;
3582 u64 addr = bind_ops[i].addr;
3583 u32 obj = bind_ops[i].obj;
3584 u64 obj_offset = bind_ops[i].obj_offset;
3585 u16 pat_index = bind_ops[i].pat_index;
3586 u32 op = bind_ops[i].op;
3587 u32 bind_flags = bind_ops[i].flags;
3588
3589 if (!obj)
3590 continue;
3591
3592 gem_obj = drm_gem_object_lookup(file, obj);
3593 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3594 err = -ENOENT;
3595 goto put_obj;
3596 }
3597 bos[i] = gem_to_xe_bo(gem_obj);
3598
3599 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
3600 obj_offset, pat_index, op,
3601 bind_flags);
3602 if (err)
3603 goto put_obj;
3604 }
3605
3606 if (args->num_syncs) {
3607 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3608 if (!syncs) {
3609 err = -ENOMEM;
3610 goto put_obj;
3611 }
3612 }
3613
3614 syncs_user = u64_to_user_ptr(args->syncs);
3615 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3616 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3617 &syncs_user[num_syncs],
3618 (xe_vm_in_lr_mode(vm) ?
3619 SYNC_PARSE_FLAG_LR_MODE : 0) |
3620 (!args->num_binds ?
3621 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3622 if (err)
3623 goto free_syncs;
3624
3625 if (xe_sync_is_ufence(&syncs[num_syncs]))
3626 num_ufence++;
3627 }
3628
3629 if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3630 err = -EINVAL;
3631 goto free_syncs;
3632 }
3633
3634 if (!args->num_binds) {
3635 err = -ENODATA;
3636 goto free_syncs;
3637 }
3638
3639 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
3640 for (i = 0; i < args->num_binds; ++i) {
3641 u64 range = bind_ops[i].range;
3642 u64 addr = bind_ops[i].addr;
3643 u32 op = bind_ops[i].op;
3644 u32 flags = bind_ops[i].flags;
3645 u64 obj_offset = bind_ops[i].obj_offset;
3646 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3647 u16 pat_index = bind_ops[i].pat_index;
3648
3649 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset,
3650 addr, range, op, flags,
3651 prefetch_region, pat_index);
3652 if (IS_ERR(ops[i])) {
3653 err = PTR_ERR(ops[i]);
3654 ops[i] = NULL;
3655 goto unwind_ops;
3656 }
3657
3658 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
3659 if (err)
3660 goto unwind_ops;
3661
3662 #ifdef TEST_VM_OPS_ERROR
3663 if (flags & FORCE_OP_ERROR) {
3664 vops.inject_error = true;
3665 vm->xe->vm_inject_error_position =
3666 (vm->xe->vm_inject_error_position + 1) %
3667 FORCE_OP_ERROR_COUNT;
3668 }
3669 #endif
3670 }
3671
3672 /* Nothing to do */
3673 if (list_empty(&vops.list)) {
3674 err = -ENODATA;
3675 goto unwind_ops;
3676 }
3677
3678 err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
3679 if (err)
3680 goto unwind_ops;
3681
3682 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops);
3683 if (err)
3684 goto unwind_ops;
3685
3686 fence = vm_bind_ioctl_ops_execute(vm, &vops);
3687 if (IS_ERR(fence))
3688 err = PTR_ERR(fence);
3689 else
3690 dma_fence_put(fence);
3691
3692 unwind_ops:
3693 if (err && err != -ENODATA)
3694 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3695 xe_vma_ops_fini(&vops);
3696 for (i = args->num_binds - 1; i >= 0; --i)
3697 if (ops[i])
3698 drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
3699 free_syncs:
3700 if (err == -ENODATA)
3701 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3702 while (num_syncs--)
3703 xe_sync_entry_cleanup(&syncs[num_syncs]);
3704
3705 kfree(syncs);
3706 put_obj:
3707 for (i = 0; i < args->num_binds; ++i)
3708 xe_bo_put(bos[i]);
3709
3710 kvfree(ops);
3711 free_bos:
3712 kvfree(bos);
3713 release_vm_lock:
3714 up_write(&vm->lock);
3715 put_exec_queue:
3716 if (q)
3717 xe_exec_queue_put(q);
3718 free_bind_ops:
3719 if (args->num_binds > 1)
3720 kvfree(bind_ops);
3721 put_vm:
3722 xe_vm_put(vm);
3723 return err;
3724 }
3725
3726 /**
3727 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM
3728 * @vm: VM to bind the BO to
3729 * @bo: BO to bind
3730 * @q: exec queue to use for the bind (optional)
3731 * @addr: address at which to bind the BO
3732 * @cache_lvl: PAT cache level to use
3733 *
3734 * Execute a VM bind map operation on a kernel-owned BO to bind it into a
3735 * kernel-owned VM.
3736 *
3737 * Returns a dma_fence to track the binding completion if the job to do so was
3738 * successfully submitted, an error pointer otherwise.
3739 */
xe_vm_bind_kernel_bo(struct xe_vm * vm,struct xe_bo * bo,struct xe_exec_queue * q,u64 addr,enum xe_cache_level cache_lvl)3740 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
3741 struct xe_exec_queue *q, u64 addr,
3742 enum xe_cache_level cache_lvl)
3743 {
3744 struct xe_vma_ops vops;
3745 struct drm_gpuva_ops *ops = NULL;
3746 struct dma_fence *fence;
3747 int err;
3748
3749 xe_bo_get(bo);
3750 xe_vm_get(vm);
3751 if (q)
3752 xe_exec_queue_get(q);
3753
3754 down_write(&vm->lock);
3755
3756 xe_vma_ops_init(&vops, vm, q, NULL, 0);
3757
3758 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo),
3759 DRM_XE_VM_BIND_OP_MAP, 0, 0,
3760 vm->xe->pat.idx[cache_lvl]);
3761 if (IS_ERR(ops)) {
3762 err = PTR_ERR(ops);
3763 goto release_vm_lock;
3764 }
3765
3766 err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
3767 if (err)
3768 goto release_vm_lock;
3769
3770 xe_assert(vm->xe, !list_empty(&vops.list));
3771
3772 err = xe_vma_ops_alloc(&vops, false);
3773 if (err)
3774 goto unwind_ops;
3775
3776 fence = vm_bind_ioctl_ops_execute(vm, &vops);
3777 if (IS_ERR(fence))
3778 err = PTR_ERR(fence);
3779
3780 unwind_ops:
3781 if (err && err != -ENODATA)
3782 vm_bind_ioctl_ops_unwind(vm, &ops, 1);
3783
3784 xe_vma_ops_fini(&vops);
3785 drm_gpuva_ops_free(&vm->gpuvm, ops);
3786
3787 release_vm_lock:
3788 up_write(&vm->lock);
3789
3790 if (q)
3791 xe_exec_queue_put(q);
3792 xe_vm_put(vm);
3793 xe_bo_put(bo);
3794
3795 if (err)
3796 fence = ERR_PTR(err);
3797
3798 return fence;
3799 }
3800
3801 /**
3802 * xe_vm_lock() - Lock the vm's dma_resv object
3803 * @vm: The struct xe_vm whose lock is to be locked
3804 * @intr: Whether to perform any wait interruptible
3805 *
3806 * Return: 0 on success, -EINTR if @intr is true and the wait for a
3807 * contended lock was interrupted. If @intr is false, the function
3808 * always returns 0.
3809 */
xe_vm_lock(struct xe_vm * vm,bool intr)3810 int xe_vm_lock(struct xe_vm *vm, bool intr)
3811 {
3812 int ret;
3813
3814 if (intr)
3815 ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3816 else
3817 ret = dma_resv_lock(xe_vm_resv(vm), NULL);
3818
3819 return ret;
3820 }
3821
3822 /**
3823 * xe_vm_unlock() - Unlock the vm's dma_resv object
3824 * @vm: The struct xe_vm whose lock is to be released.
3825 *
3826 * Unlock a buffer object lock that was locked by xe_vm_lock().
3827 */
xe_vm_unlock(struct xe_vm * vm)3828 void xe_vm_unlock(struct xe_vm *vm)
3829 {
3830 dma_resv_unlock(xe_vm_resv(vm));
3831 }
3832
3833 /**
3834 * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an
3835 * address range
3836 * @vm: The VM
3837 * @start: start address
3838 * @end: end address
3839 * @tile_mask: mask for which gt's issue tlb invalidation
3840 *
3841 * Issue a range based TLB invalidation for gt's in tilemask
3842 *
3843 * Returns 0 for success, negative error code otherwise.
3844 */
xe_vm_range_tilemask_tlb_inval(struct xe_vm * vm,u64 start,u64 end,u8 tile_mask)3845 int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
3846 u64 end, u8 tile_mask)
3847 {
3848 struct xe_tlb_inval_fence
3849 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
3850 struct xe_tile *tile;
3851 u32 fence_id = 0;
3852 u8 id;
3853 int err;
3854
3855 if (!tile_mask)
3856 return 0;
3857
3858 for_each_tile(tile, vm->xe, id) {
3859 if (!(tile_mask & BIT(id)))
3860 continue;
3861
3862 xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval,
3863 &fence[fence_id], true);
3864
3865 err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval,
3866 &fence[fence_id], start, end,
3867 vm->usm.asid);
3868 if (err)
3869 goto wait;
3870 ++fence_id;
3871
3872 if (!tile->media_gt)
3873 continue;
3874
3875 xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval,
3876 &fence[fence_id], true);
3877
3878 err = xe_tlb_inval_range(&tile->media_gt->tlb_inval,
3879 &fence[fence_id], start, end,
3880 vm->usm.asid);
3881 if (err)
3882 goto wait;
3883 ++fence_id;
3884 }
3885
3886 wait:
3887 for (id = 0; id < fence_id; ++id)
3888 xe_tlb_inval_fence_wait(&fence[id]);
3889
3890 return err;
3891 }
3892
3893 /**
3894 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3895 * @vma: VMA to invalidate
3896 *
3897 * Walks a list of page tables leaves which it memset the entries owned by this
3898 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3899 * complete.
3900 *
3901 * Returns 0 for success, negative error code otherwise.
3902 */
xe_vm_invalidate_vma(struct xe_vma * vma)3903 int xe_vm_invalidate_vma(struct xe_vma *vma)
3904 {
3905 struct xe_device *xe = xe_vma_vm(vma)->xe;
3906 struct xe_vm *vm = xe_vma_vm(vma);
3907 struct xe_tile *tile;
3908 u8 tile_mask = 0;
3909 int ret = 0;
3910 u8 id;
3911
3912 xe_assert(xe, !xe_vma_is_null(vma));
3913 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma));
3914 trace_xe_vma_invalidate(vma);
3915
3916 vm_dbg(&vm->xe->drm,
3917 "INVALIDATE: addr=0x%016llx, range=0x%016llx",
3918 xe_vma_start(vma), xe_vma_size(vma));
3919
3920 /*
3921 * Check that we don't race with page-table updates, tile_invalidated
3922 * update is safe
3923 */
3924 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3925 if (xe_vma_is_userptr(vma)) {
3926 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) ||
3927 (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) &&
3928 lockdep_is_held(&xe_vm_resv(vm)->lock.base)));
3929
3930 WARN_ON_ONCE(!mmu_interval_check_retry
3931 (&to_userptr_vma(vma)->userptr.notifier,
3932 to_userptr_vma(vma)->userptr.pages.notifier_seq));
3933 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm),
3934 DMA_RESV_USAGE_BOOKKEEP));
3935
3936 } else {
3937 xe_bo_assert_held(xe_vma_bo(vma));
3938 }
3939 }
3940
3941 for_each_tile(tile, xe, id)
3942 if (xe_pt_zap_ptes(tile, vma))
3943 tile_mask |= BIT(id);
3944
3945 xe_device_wmb(xe);
3946
3947 ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma),
3948 xe_vma_end(vma), tile_mask);
3949
3950 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
3951 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask);
3952
3953 return ret;
3954 }
3955
xe_vm_validate_protected(struct xe_vm * vm)3956 int xe_vm_validate_protected(struct xe_vm *vm)
3957 {
3958 struct drm_gpuva *gpuva;
3959 int err = 0;
3960
3961 if (!vm)
3962 return -ENODEV;
3963
3964 mutex_lock(&vm->snap_mutex);
3965
3966 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3967 struct xe_vma *vma = gpuva_to_vma(gpuva);
3968 struct xe_bo *bo = vma->gpuva.gem.obj ?
3969 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
3970
3971 if (!bo)
3972 continue;
3973
3974 if (xe_bo_is_protected(bo)) {
3975 err = xe_pxp_bo_key_check(vm->xe->pxp, bo);
3976 if (err)
3977 break;
3978 }
3979 }
3980
3981 mutex_unlock(&vm->snap_mutex);
3982 return err;
3983 }
3984
3985 struct xe_vm_snapshot {
3986 unsigned long num_snaps;
3987 struct {
3988 u64 ofs, bo_ofs;
3989 unsigned long len;
3990 struct xe_bo *bo;
3991 void *data;
3992 struct mm_struct *mm;
3993 } snap[];
3994 };
3995
xe_vm_snapshot_capture(struct xe_vm * vm)3996 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
3997 {
3998 unsigned long num_snaps = 0, i;
3999 struct xe_vm_snapshot *snap = NULL;
4000 struct drm_gpuva *gpuva;
4001
4002 if (!vm)
4003 return NULL;
4004
4005 mutex_lock(&vm->snap_mutex);
4006 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4007 if (gpuva->flags & XE_VMA_DUMPABLE)
4008 num_snaps++;
4009 }
4010
4011 if (num_snaps)
4012 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
4013 if (!snap) {
4014 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
4015 goto out_unlock;
4016 }
4017
4018 snap->num_snaps = num_snaps;
4019 i = 0;
4020 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4021 struct xe_vma *vma = gpuva_to_vma(gpuva);
4022 struct xe_bo *bo = vma->gpuva.gem.obj ?
4023 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4024
4025 if (!(gpuva->flags & XE_VMA_DUMPABLE))
4026 continue;
4027
4028 snap->snap[i].ofs = xe_vma_start(vma);
4029 snap->snap[i].len = xe_vma_size(vma);
4030 if (bo) {
4031 snap->snap[i].bo = xe_bo_get(bo);
4032 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
4033 } else if (xe_vma_is_userptr(vma)) {
4034 struct mm_struct *mm =
4035 to_userptr_vma(vma)->userptr.notifier.mm;
4036
4037 if (mmget_not_zero(mm))
4038 snap->snap[i].mm = mm;
4039 else
4040 snap->snap[i].data = ERR_PTR(-EFAULT);
4041
4042 snap->snap[i].bo_ofs = xe_vma_userptr(vma);
4043 } else {
4044 snap->snap[i].data = ERR_PTR(-ENOENT);
4045 }
4046 i++;
4047 }
4048
4049 out_unlock:
4050 mutex_unlock(&vm->snap_mutex);
4051 return snap;
4052 }
4053
xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot * snap)4054 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
4055 {
4056 if (IS_ERR_OR_NULL(snap))
4057 return;
4058
4059 for (int i = 0; i < snap->num_snaps; i++) {
4060 struct xe_bo *bo = snap->snap[i].bo;
4061 int err;
4062
4063 if (IS_ERR(snap->snap[i].data))
4064 continue;
4065
4066 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
4067 if (!snap->snap[i].data) {
4068 snap->snap[i].data = ERR_PTR(-ENOMEM);
4069 goto cleanup_bo;
4070 }
4071
4072 if (bo) {
4073 err = xe_bo_read(bo, snap->snap[i].bo_ofs,
4074 snap->snap[i].data, snap->snap[i].len);
4075 } else {
4076 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
4077
4078 kthread_use_mm(snap->snap[i].mm);
4079 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
4080 err = 0;
4081 else
4082 err = -EFAULT;
4083 kthread_unuse_mm(snap->snap[i].mm);
4084
4085 mmput(snap->snap[i].mm);
4086 snap->snap[i].mm = NULL;
4087 }
4088
4089 if (err) {
4090 kvfree(snap->snap[i].data);
4091 snap->snap[i].data = ERR_PTR(err);
4092 }
4093
4094 cleanup_bo:
4095 xe_bo_put(bo);
4096 snap->snap[i].bo = NULL;
4097 }
4098 }
4099
xe_vm_snapshot_print(struct xe_vm_snapshot * snap,struct drm_printer * p)4100 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
4101 {
4102 unsigned long i, j;
4103
4104 if (IS_ERR_OR_NULL(snap)) {
4105 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
4106 return;
4107 }
4108
4109 for (i = 0; i < snap->num_snaps; i++) {
4110 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
4111
4112 if (IS_ERR(snap->snap[i].data)) {
4113 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
4114 PTR_ERR(snap->snap[i].data));
4115 continue;
4116 }
4117
4118 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
4119
4120 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
4121 u32 *val = snap->snap[i].data + j;
4122 char dumped[ASCII85_BUFSZ];
4123
4124 drm_puts(p, ascii85_encode(*val, dumped));
4125 }
4126
4127 drm_puts(p, "\n");
4128
4129 if (drm_coredump_printer_is_full(p))
4130 return;
4131 }
4132 }
4133
xe_vm_snapshot_free(struct xe_vm_snapshot * snap)4134 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
4135 {
4136 unsigned long i;
4137
4138 if (IS_ERR_OR_NULL(snap))
4139 return;
4140
4141 for (i = 0; i < snap->num_snaps; i++) {
4142 if (!IS_ERR(snap->snap[i].data))
4143 kvfree(snap->snap[i].data);
4144 xe_bo_put(snap->snap[i].bo);
4145 if (snap->snap[i].mm)
4146 mmput(snap->snap[i].mm);
4147 }
4148 kvfree(snap);
4149 }
4150
4151 /**
4152 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations
4153 * @xe: Pointer to the XE device structure
4154 * @vma: Pointer to the virtual memory area (VMA) structure
4155 * @is_atomic: In pagefault path and atomic operation
4156 *
4157 * This function determines whether the given VMA needs to be migrated to
4158 * VRAM in order to do atomic GPU operation.
4159 *
4160 * Return:
4161 * 1 - Migration to VRAM is required
4162 * 0 - Migration is not required
4163 * -EACCES - Invalid access for atomic memory attr
4164 *
4165 */
xe_vma_need_vram_for_atomic(struct xe_device * xe,struct xe_vma * vma,bool is_atomic)4166 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic)
4167 {
4168 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access :
4169 vma->attr.atomic_access;
4170
4171 if (!IS_DGFX(xe) || !is_atomic)
4172 return false;
4173
4174 /*
4175 * NOTE: The checks implemented here are platform-specific. For
4176 * instance, on a device supporting CXL atomics, these would ideally
4177 * work universally without additional handling.
4178 */
4179 switch (atomic_access) {
4180 case DRM_XE_ATOMIC_DEVICE:
4181 return !xe->info.has_device_atomics_on_smem;
4182
4183 case DRM_XE_ATOMIC_CPU:
4184 return -EACCES;
4185
4186 case DRM_XE_ATOMIC_UNDEFINED:
4187 case DRM_XE_ATOMIC_GLOBAL:
4188 default:
4189 return 1;
4190 }
4191 }
4192
xe_vm_alloc_vma(struct xe_vm * vm,struct drm_gpuvm_map_req * map_req,bool is_madvise)4193 static int xe_vm_alloc_vma(struct xe_vm *vm,
4194 struct drm_gpuvm_map_req *map_req,
4195 bool is_madvise)
4196 {
4197 struct xe_vma_ops vops;
4198 struct drm_gpuva_ops *ops = NULL;
4199 struct drm_gpuva_op *__op;
4200 bool is_cpu_addr_mirror = false;
4201 bool remap_op = false;
4202 struct xe_vma_mem_attr tmp_attr;
4203 u16 default_pat;
4204 int err;
4205
4206 lockdep_assert_held_write(&vm->lock);
4207
4208 if (is_madvise)
4209 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req);
4210 else
4211 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req);
4212
4213 if (IS_ERR(ops))
4214 return PTR_ERR(ops);
4215
4216 if (list_empty(&ops->list)) {
4217 err = 0;
4218 goto free_ops;
4219 }
4220
4221 drm_gpuva_for_each_op(__op, ops) {
4222 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
4223 struct xe_vma *vma = NULL;
4224
4225 if (!is_madvise) {
4226 if (__op->op == DRM_GPUVA_OP_UNMAP) {
4227 vma = gpuva_to_vma(op->base.unmap.va);
4228 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma));
4229 default_pat = vma->attr.default_pat_index;
4230 }
4231
4232 if (__op->op == DRM_GPUVA_OP_REMAP) {
4233 vma = gpuva_to_vma(op->base.remap.unmap->va);
4234 default_pat = vma->attr.default_pat_index;
4235 }
4236
4237 if (__op->op == DRM_GPUVA_OP_MAP) {
4238 op->map.is_cpu_addr_mirror = true;
4239 op->map.pat_index = default_pat;
4240 }
4241 } else {
4242 if (__op->op == DRM_GPUVA_OP_REMAP) {
4243 vma = gpuva_to_vma(op->base.remap.unmap->va);
4244 xe_assert(vm->xe, !remap_op);
4245 xe_assert(vm->xe, xe_vma_has_no_bo(vma));
4246 remap_op = true;
4247
4248 if (xe_vma_is_cpu_addr_mirror(vma))
4249 is_cpu_addr_mirror = true;
4250 else
4251 is_cpu_addr_mirror = false;
4252 }
4253
4254 if (__op->op == DRM_GPUVA_OP_MAP) {
4255 xe_assert(vm->xe, remap_op);
4256 remap_op = false;
4257 /*
4258 * In case of madvise ops DRM_GPUVA_OP_MAP is
4259 * always after DRM_GPUVA_OP_REMAP, so ensure
4260 * we assign op->map.is_cpu_addr_mirror true
4261 * if REMAP is for xe_vma_is_cpu_addr_mirror vma
4262 */
4263 op->map.is_cpu_addr_mirror = is_cpu_addr_mirror;
4264 }
4265 }
4266 print_op(vm->xe, __op);
4267 }
4268
4269 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
4270
4271 if (is_madvise)
4272 vops.flags |= XE_VMA_OPS_FLAG_MADVISE;
4273
4274 err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
4275 if (err)
4276 goto unwind_ops;
4277
4278 xe_vm_lock(vm, false);
4279
4280 drm_gpuva_for_each_op(__op, ops) {
4281 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
4282 struct xe_vma *vma;
4283
4284 if (__op->op == DRM_GPUVA_OP_UNMAP) {
4285 vma = gpuva_to_vma(op->base.unmap.va);
4286 /* There should be no unmap for madvise */
4287 if (is_madvise)
4288 XE_WARN_ON("UNEXPECTED UNMAP");
4289
4290 xe_vma_destroy(vma, NULL);
4291 } else if (__op->op == DRM_GPUVA_OP_REMAP) {
4292 vma = gpuva_to_vma(op->base.remap.unmap->va);
4293 /* In case of madvise ops Store attributes for REMAP UNMAPPED
4294 * VMA, so they can be assigned to newly MAP created vma.
4295 */
4296 if (is_madvise)
4297 tmp_attr = vma->attr;
4298
4299 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL);
4300 } else if (__op->op == DRM_GPUVA_OP_MAP) {
4301 vma = op->map.vma;
4302 /* In case of madvise call, MAP will always be follwed by REMAP.
4303 * Therefore temp_attr will always have sane values, making it safe to
4304 * copy them to new vma.
4305 */
4306 if (is_madvise)
4307 vma->attr = tmp_attr;
4308 }
4309 }
4310
4311 xe_vm_unlock(vm);
4312 drm_gpuva_ops_free(&vm->gpuvm, ops);
4313 return 0;
4314
4315 unwind_ops:
4316 vm_bind_ioctl_ops_unwind(vm, &ops, 1);
4317 free_ops:
4318 drm_gpuva_ops_free(&vm->gpuvm, ops);
4319 return err;
4320 }
4321
4322 /**
4323 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops
4324 * @vm: Pointer to the xe_vm structure
4325 * @start: Starting input address
4326 * @range: Size of the input range
4327 *
4328 * This function splits existing vma to create new vma for user provided input range
4329 *
4330 * Return: 0 if success
4331 */
xe_vm_alloc_madvise_vma(struct xe_vm * vm,uint64_t start,uint64_t range)4332 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4333 {
4334 struct drm_gpuvm_map_req map_req = {
4335 .map.va.addr = start,
4336 .map.va.range = range,
4337 };
4338
4339 lockdep_assert_held_write(&vm->lock);
4340
4341 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range);
4342
4343 return xe_vm_alloc_vma(vm, &map_req, true);
4344 }
4345
4346 /**
4347 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma
4348 * @vm: Pointer to the xe_vm structure
4349 * @start: Starting input address
4350 * @range: Size of the input range
4351 *
4352 * This function splits/merges existing vma to create new vma for user provided input range
4353 *
4354 * Return: 0 if success
4355 */
xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm * vm,uint64_t start,uint64_t range)4356 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4357 {
4358 struct drm_gpuvm_map_req map_req = {
4359 .map.va.addr = start,
4360 .map.va.range = range,
4361 };
4362
4363 lockdep_assert_held_write(&vm->lock);
4364
4365 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx",
4366 start, range);
4367
4368 return xe_vm_alloc_vma(vm, &map_req, false);
4369 }
4370