1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10
11 #include <drm/drm_drv.h>
12 #include <drm/drm_exec.h>
13 #include <drm/drm_print.h>
14 #include <drm/ttm/ttm_tt.h>
15 #include <uapi/drm/xe_drm.h>
16 #include <linux/ascii85.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/mm.h>
20 #include <linux/swap.h>
21
22 #include <generated/xe_wa_oob.h>
23
24 #include "regs/xe_gtt_defs.h"
25 #include "xe_assert.h"
26 #include "xe_bo.h"
27 #include "xe_device.h"
28 #include "xe_drm_client.h"
29 #include "xe_exec_queue.h"
30 #include "xe_migrate.h"
31 #include "xe_pat.h"
32 #include "xe_pm.h"
33 #include "xe_preempt_fence.h"
34 #include "xe_pt.h"
35 #include "xe_pxp.h"
36 #include "xe_res_cursor.h"
37 #include "xe_sriov_vf.h"
38 #include "xe_svm.h"
39 #include "xe_sync.h"
40 #include "xe_tile.h"
41 #include "xe_tlb_inval.h"
42 #include "xe_trace_bo.h"
43 #include "xe_wa.h"
44
xe_vm_obj(struct xe_vm * vm)45 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
46 {
47 return vm->gpuvm.r_obj;
48 }
49
50 /**
51 * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction
52 * @vm: The vm whose resv is to be locked.
53 * @exec: The drm_exec transaction.
54 *
55 * Helper to lock the vm's resv as part of a drm_exec transaction.
56 *
57 * Return: %0 on success. See drm_exec_lock_obj() for error codes.
58 */
xe_vm_drm_exec_lock(struct xe_vm * vm,struct drm_exec * exec)59 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec)
60 {
61 return drm_exec_lock_obj(exec, xe_vm_obj(vm));
62 }
63
preempt_fences_waiting(struct xe_vm * vm)64 static bool preempt_fences_waiting(struct xe_vm *vm)
65 {
66 struct xe_exec_queue *q;
67
68 lockdep_assert_held(&vm->lock);
69 xe_vm_assert_held(vm);
70
71 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
72 if (!q->lr.pfence ||
73 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
74 &q->lr.pfence->flags)) {
75 return true;
76 }
77 }
78
79 return false;
80 }
81
free_preempt_fences(struct list_head * list)82 static void free_preempt_fences(struct list_head *list)
83 {
84 struct list_head *link, *next;
85
86 list_for_each_safe(link, next, list)
87 xe_preempt_fence_free(to_preempt_fence_from_link(link));
88 }
89
alloc_preempt_fences(struct xe_vm * vm,struct list_head * list,unsigned int * count)90 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
91 unsigned int *count)
92 {
93 lockdep_assert_held(&vm->lock);
94 xe_vm_assert_held(vm);
95
96 if (*count >= vm->preempt.num_exec_queues)
97 return 0;
98
99 for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
100 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
101
102 if (IS_ERR(pfence))
103 return PTR_ERR(pfence);
104
105 list_move_tail(xe_preempt_fence_link(pfence), list);
106 }
107
108 return 0;
109 }
110
wait_for_existing_preempt_fences(struct xe_vm * vm)111 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
112 {
113 struct xe_exec_queue *q;
114 bool vf_migration = IS_SRIOV_VF(vm->xe) &&
115 xe_sriov_vf_migration_supported(vm->xe);
116 signed long wait_time = vf_migration ? HZ / 5 : MAX_SCHEDULE_TIMEOUT;
117
118 xe_vm_assert_held(vm);
119
120 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
121 if (q->lr.pfence) {
122 long timeout;
123
124 timeout = dma_fence_wait_timeout(q->lr.pfence, false,
125 wait_time);
126 if (!timeout) {
127 xe_assert(vm->xe, vf_migration);
128 return -EAGAIN;
129 }
130
131 /* Only -ETIME on fence indicates VM needs to be killed */
132 if (timeout < 0 || q->lr.pfence->error == -ETIME)
133 return -ETIME;
134
135 dma_fence_put(q->lr.pfence);
136 q->lr.pfence = NULL;
137 }
138 }
139
140 return 0;
141 }
142
xe_vm_is_idle(struct xe_vm * vm)143 static bool xe_vm_is_idle(struct xe_vm *vm)
144 {
145 struct xe_exec_queue *q;
146
147 xe_vm_assert_held(vm);
148 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
149 if (!xe_exec_queue_is_idle(q))
150 return false;
151 }
152
153 return true;
154 }
155
arm_preempt_fences(struct xe_vm * vm,struct list_head * list)156 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
157 {
158 struct list_head *link;
159 struct xe_exec_queue *q;
160
161 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
162 struct dma_fence *fence;
163
164 link = list->next;
165 xe_assert(vm->xe, link != list);
166
167 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
168 q, q->lr.context,
169 ++q->lr.seqno);
170 dma_fence_put(q->lr.pfence);
171 q->lr.pfence = fence;
172 }
173 }
174
add_preempt_fences(struct xe_vm * vm,struct xe_bo * bo)175 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
176 {
177 struct xe_exec_queue *q;
178 int err;
179
180 xe_bo_assert_held(bo);
181
182 if (!vm->preempt.num_exec_queues)
183 return 0;
184
185 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
186 if (err)
187 return err;
188
189 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
190 if (q->lr.pfence) {
191 dma_resv_add_fence(bo->ttm.base.resv,
192 q->lr.pfence,
193 DMA_RESV_USAGE_BOOKKEEP);
194 }
195
196 return 0;
197 }
198
resume_and_reinstall_preempt_fences(struct xe_vm * vm,struct drm_exec * exec)199 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
200 struct drm_exec *exec)
201 {
202 struct xe_exec_queue *q;
203
204 lockdep_assert_held(&vm->lock);
205 xe_vm_assert_held(vm);
206
207 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
208 q->ops->resume(q);
209
210 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
211 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
212 }
213 }
214
xe_vm_add_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)215 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
216 {
217 struct drm_gpuvm_exec vm_exec = {
218 .vm = &vm->gpuvm,
219 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
220 .num_fences = 1,
221 };
222 struct drm_exec *exec = &vm_exec.exec;
223 struct xe_validation_ctx ctx;
224 struct dma_fence *pfence;
225 int err;
226 bool wait;
227
228 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
229
230 down_write(&vm->lock);
231 err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val);
232 if (err)
233 goto out_up_write;
234
235 pfence = xe_preempt_fence_create(q, q->lr.context,
236 ++q->lr.seqno);
237 if (IS_ERR(pfence)) {
238 err = PTR_ERR(pfence);
239 goto out_fini;
240 }
241
242 list_add(&q->lr.link, &vm->preempt.exec_queues);
243 ++vm->preempt.num_exec_queues;
244 q->lr.pfence = pfence;
245
246 xe_svm_notifier_lock(vm);
247
248 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
249 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
250
251 /*
252 * Check to see if a preemption on VM is in flight or userptr
253 * invalidation, if so trigger this preempt fence to sync state with
254 * other preempt fences on the VM.
255 */
256 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
257 if (wait)
258 dma_fence_enable_sw_signaling(pfence);
259
260 xe_svm_notifier_unlock(vm);
261
262 out_fini:
263 xe_validation_ctx_fini(&ctx);
264 out_up_write:
265 up_write(&vm->lock);
266
267 return err;
268 }
269 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO);
270
271 /**
272 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
273 * @vm: The VM.
274 * @q: The exec_queue
275 *
276 * Note that this function might be called multiple times on the same queue.
277 */
xe_vm_remove_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)278 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
279 {
280 if (!xe_vm_in_preempt_fence_mode(vm))
281 return;
282
283 down_write(&vm->lock);
284 if (!list_empty(&q->lr.link)) {
285 list_del_init(&q->lr.link);
286 --vm->preempt.num_exec_queues;
287 }
288 if (q->lr.pfence) {
289 dma_fence_enable_sw_signaling(q->lr.pfence);
290 dma_fence_put(q->lr.pfence);
291 q->lr.pfence = NULL;
292 }
293 up_write(&vm->lock);
294 }
295
296 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
297
298 /**
299 * xe_vm_kill() - VM Kill
300 * @vm: The VM.
301 * @unlocked: Flag indicates the VM's dma-resv is not held
302 *
303 * Kill the VM by setting banned flag indicated VM is no longer available for
304 * use. If in preempt fence mode, also kill all exec queue attached to the VM.
305 */
xe_vm_kill(struct xe_vm * vm,bool unlocked)306 void xe_vm_kill(struct xe_vm *vm, bool unlocked)
307 {
308 struct xe_exec_queue *q;
309
310 lockdep_assert_held(&vm->lock);
311
312 if (unlocked)
313 xe_vm_lock(vm, false);
314
315 vm->flags |= XE_VM_FLAG_BANNED;
316 trace_xe_vm_kill(vm);
317
318 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
319 q->ops->kill(q);
320
321 if (unlocked)
322 xe_vm_unlock(vm);
323
324 /* TODO: Inform user the VM is banned */
325 }
326
xe_gpuvm_validate(struct drm_gpuvm_bo * vm_bo,struct drm_exec * exec)327 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
328 {
329 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
330 struct drm_gpuva *gpuva;
331 int ret;
332
333 lockdep_assert_held(&vm->lock);
334 drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
335 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
336 &vm->rebind_list);
337
338 if (!try_wait_for_completion(&vm->xe->pm_block))
339 return -EAGAIN;
340
341 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec);
342 if (ret)
343 return ret;
344
345 vm_bo->evicted = false;
346 return 0;
347 }
348
349 /**
350 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
351 * @vm: The vm for which we are rebinding.
352 * @exec: The struct drm_exec with the locked GEM objects.
353 * @num_fences: The number of fences to reserve for the operation, not
354 * including rebinds and validations.
355 *
356 * Validates all evicted gem objects and rebinds their vmas. Note that
357 * rebindings may cause evictions and hence the validation-rebind
358 * sequence is rerun until there are no more objects to validate.
359 *
360 * Return: 0 on success, negative error code on error. In particular,
361 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
362 * the drm_exec transaction needs to be restarted.
363 */
xe_vm_validate_rebind(struct xe_vm * vm,struct drm_exec * exec,unsigned int num_fences)364 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
365 unsigned int num_fences)
366 {
367 struct drm_gem_object *obj;
368 unsigned long index;
369 int ret;
370
371 do {
372 ret = drm_gpuvm_validate(&vm->gpuvm, exec);
373 if (ret)
374 return ret;
375
376 ret = xe_vm_rebind(vm, false);
377 if (ret)
378 return ret;
379 } while (!list_empty(&vm->gpuvm.evict.list));
380
381 drm_exec_for_each_locked_object(exec, index, obj) {
382 ret = dma_resv_reserve_fences(obj->resv, num_fences);
383 if (ret)
384 return ret;
385 }
386
387 return 0;
388 }
389
xe_preempt_work_begin(struct drm_exec * exec,struct xe_vm * vm,bool * done)390 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
391 bool *done)
392 {
393 int err;
394
395 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
396 if (err)
397 return err;
398
399 if (xe_vm_is_idle(vm)) {
400 vm->preempt.rebind_deactivated = true;
401 *done = true;
402 return 0;
403 }
404
405 if (!preempt_fences_waiting(vm)) {
406 *done = true;
407 return 0;
408 }
409
410 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
411 if (err)
412 return err;
413
414 err = wait_for_existing_preempt_fences(vm);
415 if (err)
416 return err;
417
418 /*
419 * Add validation and rebinding to the locking loop since both can
420 * cause evictions which may require blocing dma_resv locks.
421 * The fence reservation here is intended for the new preempt fences
422 * we attach at the end of the rebind work.
423 */
424 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
425 }
426
vm_suspend_rebind_worker(struct xe_vm * vm)427 static bool vm_suspend_rebind_worker(struct xe_vm *vm)
428 {
429 struct xe_device *xe = vm->xe;
430 bool ret = false;
431
432 mutex_lock(&xe->rebind_resume_lock);
433 if (!try_wait_for_completion(&vm->xe->pm_block)) {
434 ret = true;
435 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list);
436 }
437 mutex_unlock(&xe->rebind_resume_lock);
438
439 return ret;
440 }
441
442 /**
443 * xe_vm_resume_rebind_worker() - Resume the rebind worker.
444 * @vm: The vm whose preempt worker to resume.
445 *
446 * Resume a preempt worker that was previously suspended by
447 * vm_suspend_rebind_worker().
448 */
xe_vm_resume_rebind_worker(struct xe_vm * vm)449 void xe_vm_resume_rebind_worker(struct xe_vm *vm)
450 {
451 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
452 }
453
preempt_rebind_work_func(struct work_struct * w)454 static void preempt_rebind_work_func(struct work_struct *w)
455 {
456 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
457 struct xe_validation_ctx ctx;
458 struct drm_exec exec;
459 unsigned int fence_count = 0;
460 LIST_HEAD(preempt_fences);
461 int err = 0;
462 long wait;
463 int __maybe_unused tries = 0;
464
465 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
466 trace_xe_vm_rebind_worker_enter(vm);
467
468 down_write(&vm->lock);
469
470 if (xe_vm_is_closed_or_banned(vm)) {
471 up_write(&vm->lock);
472 trace_xe_vm_rebind_worker_exit(vm);
473 return;
474 }
475
476 retry:
477 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) {
478 up_write(&vm->lock);
479 /* We don't actually block but don't make progress. */
480 xe_pm_might_block_on_suspend();
481 return;
482 }
483
484 if (xe_vm_userptr_check_repin(vm)) {
485 err = xe_vm_userptr_pin(vm);
486 if (err)
487 goto out_unlock_outer;
488 }
489
490 err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec,
491 (struct xe_val_flags) {.interruptible = true});
492 if (err)
493 goto out_unlock_outer;
494
495 drm_exec_until_all_locked(&exec) {
496 bool done = false;
497
498 err = xe_preempt_work_begin(&exec, vm, &done);
499 drm_exec_retry_on_contention(&exec);
500 xe_validation_retry_on_oom(&ctx, &err);
501 if (err || done) {
502 xe_validation_ctx_fini(&ctx);
503 goto out_unlock_outer;
504 }
505 }
506
507 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
508 if (err)
509 goto out_unlock;
510
511 xe_vm_set_validation_exec(vm, &exec);
512 err = xe_vm_rebind(vm, true);
513 xe_vm_set_validation_exec(vm, NULL);
514 if (err)
515 goto out_unlock;
516
517 /* Wait on rebinds and munmap style VM unbinds */
518 wait = dma_resv_wait_timeout(xe_vm_resv(vm),
519 DMA_RESV_USAGE_KERNEL,
520 false, MAX_SCHEDULE_TIMEOUT);
521 if (wait <= 0) {
522 err = -ETIME;
523 goto out_unlock;
524 }
525
526 #define retry_required(__tries, __vm) \
527 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
528 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
529 __xe_vm_userptr_needs_repin(__vm))
530
531 xe_svm_notifier_lock(vm);
532 if (retry_required(tries, vm)) {
533 xe_svm_notifier_unlock(vm);
534 err = -EAGAIN;
535 goto out_unlock;
536 }
537
538 #undef retry_required
539
540 spin_lock(&vm->xe->ttm.lru_lock);
541 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
542 spin_unlock(&vm->xe->ttm.lru_lock);
543
544 /* Point of no return. */
545 arm_preempt_fences(vm, &preempt_fences);
546 resume_and_reinstall_preempt_fences(vm, &exec);
547 xe_svm_notifier_unlock(vm);
548
549 out_unlock:
550 xe_validation_ctx_fini(&ctx);
551 out_unlock_outer:
552 if (err == -EAGAIN) {
553 trace_xe_vm_rebind_worker_retry(vm);
554
555 /*
556 * We can't block in workers on a VF which supports migration
557 * given this can block the VF post-migration workers from
558 * getting scheduled.
559 */
560 if (IS_SRIOV_VF(vm->xe) &&
561 xe_sriov_vf_migration_supported(vm->xe)) {
562 up_write(&vm->lock);
563 xe_vm_queue_rebind_worker(vm);
564 return;
565 }
566
567 goto retry;
568 }
569
570 if (err) {
571 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
572 xe_vm_kill(vm, true);
573 }
574 up_write(&vm->lock);
575
576 free_preempt_fences(&preempt_fences);
577
578 trace_xe_vm_rebind_worker_exit(vm);
579 }
580
xe_vma_ops_alloc(struct xe_vma_ops * vops,bool array_of_binds)581 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
582 {
583 int i;
584
585 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
586 if (!vops->pt_update_ops[i].num_ops)
587 continue;
588
589 vops->pt_update_ops[i].ops =
590 kmalloc_array(vops->pt_update_ops[i].num_ops,
591 sizeof(*vops->pt_update_ops[i].ops),
592 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
593 if (!vops->pt_update_ops[i].ops)
594 return array_of_binds ? -ENOBUFS : -ENOMEM;
595 }
596
597 return 0;
598 }
599 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
600
xe_vma_svm_prefetch_op_fini(struct xe_vma_op * op)601 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op)
602 {
603 struct xe_vma *vma;
604
605 vma = gpuva_to_vma(op->base.prefetch.va);
606
607 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma))
608 xa_destroy(&op->prefetch_range.range);
609 }
610
xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops * vops)611 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops)
612 {
613 struct xe_vma_op *op;
614
615 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
616 return;
617
618 list_for_each_entry(op, &vops->list, link)
619 xe_vma_svm_prefetch_op_fini(op);
620 }
621
xe_vma_ops_fini(struct xe_vma_ops * vops)622 static void xe_vma_ops_fini(struct xe_vma_ops *vops)
623 {
624 int i;
625
626 xe_vma_svm_prefetch_ops_fini(vops);
627
628 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
629 kfree(vops->pt_update_ops[i].ops);
630 }
631
xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops * vops,u8 tile_mask,int inc_val)632 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val)
633 {
634 int i;
635
636 if (!inc_val)
637 return;
638
639 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
640 if (BIT(i) & tile_mask)
641 vops->pt_update_ops[i].num_ops += inc_val;
642 }
643
644 #define XE_VMA_CREATE_MASK ( \
645 XE_VMA_READ_ONLY | \
646 XE_VMA_DUMPABLE | \
647 XE_VMA_SYSTEM_ALLOCATOR | \
648 DRM_GPUVA_SPARSE | \
649 XE_VMA_MADV_AUTORESET)
650
xe_vm_populate_rebind(struct xe_vma_op * op,struct xe_vma * vma,u8 tile_mask)651 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
652 u8 tile_mask)
653 {
654 INIT_LIST_HEAD(&op->link);
655 op->tile_mask = tile_mask;
656 op->base.op = DRM_GPUVA_OP_MAP;
657 op->base.map.va.addr = vma->gpuva.va.addr;
658 op->base.map.va.range = vma->gpuva.va.range;
659 op->base.map.gem.obj = vma->gpuva.gem.obj;
660 op->base.map.gem.offset = vma->gpuva.gem.offset;
661 op->map.vma = vma;
662 op->map.immediate = true;
663 op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK;
664 }
665
xe_vm_ops_add_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,u8 tile_mask)666 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
667 u8 tile_mask)
668 {
669 struct xe_vma_op *op;
670
671 op = kzalloc(sizeof(*op), GFP_KERNEL);
672 if (!op)
673 return -ENOMEM;
674
675 xe_vm_populate_rebind(op, vma, tile_mask);
676 list_add_tail(&op->link, &vops->list);
677 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
678
679 return 0;
680 }
681
682 static struct dma_fence *ops_execute(struct xe_vm *vm,
683 struct xe_vma_ops *vops);
684 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
685 struct xe_exec_queue *q,
686 struct xe_sync_entry *syncs, u32 num_syncs);
687
xe_vm_rebind(struct xe_vm * vm,bool rebind_worker)688 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
689 {
690 struct dma_fence *fence;
691 struct xe_vma *vma, *next;
692 struct xe_vma_ops vops;
693 struct xe_vma_op *op, *next_op;
694 int err, i;
695
696 lockdep_assert_held(&vm->lock);
697 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
698 list_empty(&vm->rebind_list))
699 return 0;
700
701 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
702 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
703 vops.pt_update_ops[i].wait_vm_bookkeep = true;
704
705 xe_vm_assert_held(vm);
706 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
707 xe_assert(vm->xe, vma->tile_present);
708
709 if (rebind_worker)
710 trace_xe_vma_rebind_worker(vma);
711 else
712 trace_xe_vma_rebind_exec(vma);
713
714 err = xe_vm_ops_add_rebind(&vops, vma,
715 vma->tile_present);
716 if (err)
717 goto free_ops;
718 }
719
720 err = xe_vma_ops_alloc(&vops, false);
721 if (err)
722 goto free_ops;
723
724 fence = ops_execute(vm, &vops);
725 if (IS_ERR(fence)) {
726 err = PTR_ERR(fence);
727 } else {
728 dma_fence_put(fence);
729 list_for_each_entry_safe(vma, next, &vm->rebind_list,
730 combined_links.rebind)
731 list_del_init(&vma->combined_links.rebind);
732 }
733 free_ops:
734 list_for_each_entry_safe(op, next_op, &vops.list, link) {
735 list_del(&op->link);
736 kfree(op);
737 }
738 xe_vma_ops_fini(&vops);
739
740 return err;
741 }
742
xe_vma_rebind(struct xe_vm * vm,struct xe_vma * vma,u8 tile_mask)743 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
744 {
745 struct dma_fence *fence = NULL;
746 struct xe_vma_ops vops;
747 struct xe_vma_op *op, *next_op;
748 struct xe_tile *tile;
749 u8 id;
750 int err;
751
752 lockdep_assert_held(&vm->lock);
753 xe_vm_assert_held(vm);
754 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
755
756 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
757 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT;
758 for_each_tile(tile, vm->xe, id) {
759 vops.pt_update_ops[id].wait_vm_bookkeep = true;
760 vops.pt_update_ops[tile->id].q =
761 xe_migrate_exec_queue(tile->migrate);
762 }
763
764 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
765 if (err)
766 return ERR_PTR(err);
767
768 err = xe_vma_ops_alloc(&vops, false);
769 if (err) {
770 fence = ERR_PTR(err);
771 goto free_ops;
772 }
773
774 fence = ops_execute(vm, &vops);
775
776 free_ops:
777 list_for_each_entry_safe(op, next_op, &vops.list, link) {
778 list_del(&op->link);
779 kfree(op);
780 }
781 xe_vma_ops_fini(&vops);
782
783 return fence;
784 }
785
xe_vm_populate_range_rebind(struct xe_vma_op * op,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)786 static void xe_vm_populate_range_rebind(struct xe_vma_op *op,
787 struct xe_vma *vma,
788 struct xe_svm_range *range,
789 u8 tile_mask)
790 {
791 INIT_LIST_HEAD(&op->link);
792 op->tile_mask = tile_mask;
793 op->base.op = DRM_GPUVA_OP_DRIVER;
794 op->subop = XE_VMA_SUBOP_MAP_RANGE;
795 op->map_range.vma = vma;
796 op->map_range.range = range;
797 }
798
799 static int
xe_vm_ops_add_range_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)800 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops,
801 struct xe_vma *vma,
802 struct xe_svm_range *range,
803 u8 tile_mask)
804 {
805 struct xe_vma_op *op;
806
807 op = kzalloc(sizeof(*op), GFP_KERNEL);
808 if (!op)
809 return -ENOMEM;
810
811 xe_vm_populate_range_rebind(op, vma, range, tile_mask);
812 list_add_tail(&op->link, &vops->list);
813 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
814
815 return 0;
816 }
817
818 /**
819 * xe_vm_range_rebind() - VM range (re)bind
820 * @vm: The VM which the range belongs to.
821 * @vma: The VMA which the range belongs to.
822 * @range: SVM range to rebind.
823 * @tile_mask: Tile mask to bind the range to.
824 *
825 * (re)bind SVM range setting up GPU page tables for the range.
826 *
827 * Return: dma fence for rebind to signal completion on success, ERR_PTR on
828 * failure
829 */
xe_vm_range_rebind(struct xe_vm * vm,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)830 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
831 struct xe_vma *vma,
832 struct xe_svm_range *range,
833 u8 tile_mask)
834 {
835 struct dma_fence *fence = NULL;
836 struct xe_vma_ops vops;
837 struct xe_vma_op *op, *next_op;
838 struct xe_tile *tile;
839 u8 id;
840 int err;
841
842 lockdep_assert_held(&vm->lock);
843 xe_vm_assert_held(vm);
844 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
845 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
846
847 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
848 vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT;
849 for_each_tile(tile, vm->xe, id) {
850 vops.pt_update_ops[id].wait_vm_bookkeep = true;
851 vops.pt_update_ops[tile->id].q =
852 xe_migrate_exec_queue(tile->migrate);
853 }
854
855 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask);
856 if (err)
857 return ERR_PTR(err);
858
859 err = xe_vma_ops_alloc(&vops, false);
860 if (err) {
861 fence = ERR_PTR(err);
862 goto free_ops;
863 }
864
865 fence = ops_execute(vm, &vops);
866
867 free_ops:
868 list_for_each_entry_safe(op, next_op, &vops.list, link) {
869 list_del(&op->link);
870 kfree(op);
871 }
872 xe_vma_ops_fini(&vops);
873
874 return fence;
875 }
876
xe_vm_populate_range_unbind(struct xe_vma_op * op,struct xe_svm_range * range)877 static void xe_vm_populate_range_unbind(struct xe_vma_op *op,
878 struct xe_svm_range *range)
879 {
880 INIT_LIST_HEAD(&op->link);
881 op->tile_mask = range->tile_present;
882 op->base.op = DRM_GPUVA_OP_DRIVER;
883 op->subop = XE_VMA_SUBOP_UNMAP_RANGE;
884 op->unmap_range.range = range;
885 }
886
887 static int
xe_vm_ops_add_range_unbind(struct xe_vma_ops * vops,struct xe_svm_range * range)888 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops,
889 struct xe_svm_range *range)
890 {
891 struct xe_vma_op *op;
892
893 op = kzalloc(sizeof(*op), GFP_KERNEL);
894 if (!op)
895 return -ENOMEM;
896
897 xe_vm_populate_range_unbind(op, range);
898 list_add_tail(&op->link, &vops->list);
899 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1);
900
901 return 0;
902 }
903
904 /**
905 * xe_vm_range_unbind() - VM range unbind
906 * @vm: The VM which the range belongs to.
907 * @range: SVM range to rebind.
908 *
909 * Unbind SVM range removing the GPU page tables for the range.
910 *
911 * Return: dma fence for unbind to signal completion on success, ERR_PTR on
912 * failure
913 */
xe_vm_range_unbind(struct xe_vm * vm,struct xe_svm_range * range)914 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
915 struct xe_svm_range *range)
916 {
917 struct dma_fence *fence = NULL;
918 struct xe_vma_ops vops;
919 struct xe_vma_op *op, *next_op;
920 struct xe_tile *tile;
921 u8 id;
922 int err;
923
924 lockdep_assert_held(&vm->lock);
925 xe_vm_assert_held(vm);
926 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
927
928 if (!range->tile_present)
929 return dma_fence_get_stub();
930
931 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
932 for_each_tile(tile, vm->xe, id) {
933 vops.pt_update_ops[id].wait_vm_bookkeep = true;
934 vops.pt_update_ops[tile->id].q =
935 xe_migrate_exec_queue(tile->migrate);
936 }
937
938 err = xe_vm_ops_add_range_unbind(&vops, range);
939 if (err)
940 return ERR_PTR(err);
941
942 err = xe_vma_ops_alloc(&vops, false);
943 if (err) {
944 fence = ERR_PTR(err);
945 goto free_ops;
946 }
947
948 fence = ops_execute(vm, &vops);
949
950 free_ops:
951 list_for_each_entry_safe(op, next_op, &vops.list, link) {
952 list_del(&op->link);
953 kfree(op);
954 }
955 xe_vma_ops_fini(&vops);
956
957 return fence;
958 }
959
xe_vma_free(struct xe_vma * vma)960 static void xe_vma_free(struct xe_vma *vma)
961 {
962 if (xe_vma_is_userptr(vma))
963 kfree(to_userptr_vma(vma));
964 else
965 kfree(vma);
966 }
967
xe_vma_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 start,u64 end,struct xe_vma_mem_attr * attr,unsigned int flags)968 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
969 struct xe_bo *bo,
970 u64 bo_offset_or_userptr,
971 u64 start, u64 end,
972 struct xe_vma_mem_attr *attr,
973 unsigned int flags)
974 {
975 struct xe_vma *vma;
976 struct xe_tile *tile;
977 u8 id;
978 bool is_null = (flags & DRM_GPUVA_SPARSE);
979 bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR);
980
981 xe_assert(vm->xe, start < end);
982 xe_assert(vm->xe, end < vm->size);
983
984 /*
985 * Allocate and ensure that the xe_vma_is_userptr() return
986 * matches what was allocated.
987 */
988 if (!bo && !is_null && !is_cpu_addr_mirror) {
989 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
990
991 if (!uvma)
992 return ERR_PTR(-ENOMEM);
993
994 vma = &uvma->vma;
995 } else {
996 vma = kzalloc(sizeof(*vma), GFP_KERNEL);
997 if (!vma)
998 return ERR_PTR(-ENOMEM);
999
1000 if (bo)
1001 vma->gpuva.gem.obj = &bo->ttm.base;
1002 }
1003
1004 INIT_LIST_HEAD(&vma->combined_links.rebind);
1005
1006 INIT_LIST_HEAD(&vma->gpuva.gem.entry);
1007 vma->gpuva.vm = &vm->gpuvm;
1008 vma->gpuva.va.addr = start;
1009 vma->gpuva.va.range = end - start + 1;
1010 vma->gpuva.flags = flags;
1011
1012 for_each_tile(tile, vm->xe, id)
1013 vma->tile_mask |= 0x1 << id;
1014
1015 if (vm->xe->info.has_atomic_enable_pte_bit)
1016 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
1017
1018 vma->attr = *attr;
1019
1020 if (bo) {
1021 struct drm_gpuvm_bo *vm_bo;
1022
1023 xe_bo_assert_held(bo);
1024
1025 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
1026 if (IS_ERR(vm_bo)) {
1027 xe_vma_free(vma);
1028 return ERR_CAST(vm_bo);
1029 }
1030
1031 drm_gpuvm_bo_extobj_add(vm_bo);
1032 drm_gem_object_get(&bo->ttm.base);
1033 vma->gpuva.gem.offset = bo_offset_or_userptr;
1034 drm_gpuva_link(&vma->gpuva, vm_bo);
1035 drm_gpuvm_bo_put(vm_bo);
1036 } else /* userptr or null */ {
1037 if (!is_null && !is_cpu_addr_mirror) {
1038 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1039 u64 size = end - start + 1;
1040 int err;
1041
1042 vma->gpuva.gem.offset = bo_offset_or_userptr;
1043
1044 err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size);
1045 if (err) {
1046 xe_vma_free(vma);
1047 return ERR_PTR(err);
1048 }
1049 }
1050
1051 xe_vm_get(vm);
1052 }
1053
1054 return vma;
1055 }
1056
xe_vma_destroy_late(struct xe_vma * vma)1057 static void xe_vma_destroy_late(struct xe_vma *vma)
1058 {
1059 struct xe_vm *vm = xe_vma_vm(vma);
1060
1061 if (vma->ufence) {
1062 xe_sync_ufence_put(vma->ufence);
1063 vma->ufence = NULL;
1064 }
1065
1066 if (xe_vma_is_userptr(vma)) {
1067 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1068
1069 xe_userptr_remove(uvma);
1070 xe_vm_put(vm);
1071 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) {
1072 xe_vm_put(vm);
1073 } else {
1074 xe_bo_put(xe_vma_bo(vma));
1075 }
1076
1077 xe_vma_free(vma);
1078 }
1079
vma_destroy_work_func(struct work_struct * w)1080 static void vma_destroy_work_func(struct work_struct *w)
1081 {
1082 struct xe_vma *vma =
1083 container_of(w, struct xe_vma, destroy_work);
1084
1085 xe_vma_destroy_late(vma);
1086 }
1087
vma_destroy_cb(struct dma_fence * fence,struct dma_fence_cb * cb)1088 static void vma_destroy_cb(struct dma_fence *fence,
1089 struct dma_fence_cb *cb)
1090 {
1091 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1092
1093 INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1094 queue_work(system_unbound_wq, &vma->destroy_work);
1095 }
1096
xe_vma_destroy(struct xe_vma * vma,struct dma_fence * fence)1097 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1098 {
1099 struct xe_vm *vm = xe_vma_vm(vma);
1100
1101 lockdep_assert_held_write(&vm->lock);
1102 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1103
1104 if (xe_vma_is_userptr(vma)) {
1105 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1106 xe_userptr_destroy(to_userptr_vma(vma));
1107 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) {
1108 xe_bo_assert_held(xe_vma_bo(vma));
1109
1110 drm_gpuva_unlink(&vma->gpuva);
1111 }
1112
1113 xe_vm_assert_held(vm);
1114 if (fence) {
1115 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1116 vma_destroy_cb);
1117
1118 if (ret) {
1119 XE_WARN_ON(ret != -ENOENT);
1120 xe_vma_destroy_late(vma);
1121 }
1122 } else {
1123 xe_vma_destroy_late(vma);
1124 }
1125 }
1126
1127 /**
1128 * xe_vm_lock_vma() - drm_exec utility to lock a vma
1129 * @exec: The drm_exec object we're currently locking for.
1130 * @vma: The vma for witch we want to lock the vm resv and any attached
1131 * object's resv.
1132 *
1133 * Return: 0 on success, negative error code on error. In particular
1134 * may return -EDEADLK on WW transaction contention and -EINTR if
1135 * an interruptible wait is terminated by a signal.
1136 */
xe_vm_lock_vma(struct drm_exec * exec,struct xe_vma * vma)1137 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
1138 {
1139 struct xe_vm *vm = xe_vma_vm(vma);
1140 struct xe_bo *bo = xe_vma_bo(vma);
1141 int err;
1142
1143 XE_WARN_ON(!vm);
1144
1145 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1146 if (!err && bo && !bo->vm)
1147 err = drm_exec_lock_obj(exec, &bo->ttm.base);
1148
1149 return err;
1150 }
1151
xe_vma_destroy_unlocked(struct xe_vma * vma)1152 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1153 {
1154 struct xe_device *xe = xe_vma_vm(vma)->xe;
1155 struct xe_validation_ctx ctx;
1156 struct drm_exec exec;
1157 int err = 0;
1158
1159 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
1160 err = xe_vm_lock_vma(&exec, vma);
1161 drm_exec_retry_on_contention(&exec);
1162 if (XE_WARN_ON(err))
1163 break;
1164 xe_vma_destroy(vma, NULL);
1165 }
1166 xe_assert(xe, !err);
1167 }
1168
1169 struct xe_vma *
xe_vm_find_overlapping_vma(struct xe_vm * vm,u64 start,u64 range)1170 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1171 {
1172 struct drm_gpuva *gpuva;
1173
1174 lockdep_assert_held(&vm->lock);
1175
1176 if (xe_vm_is_closed_or_banned(vm))
1177 return NULL;
1178
1179 xe_assert(vm->xe, start + range <= vm->size);
1180
1181 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1182
1183 return gpuva ? gpuva_to_vma(gpuva) : NULL;
1184 }
1185
xe_vm_insert_vma(struct xe_vm * vm,struct xe_vma * vma)1186 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1187 {
1188 int err;
1189
1190 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1191 lockdep_assert_held(&vm->lock);
1192
1193 mutex_lock(&vm->snap_mutex);
1194 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1195 mutex_unlock(&vm->snap_mutex);
1196 XE_WARN_ON(err); /* Shouldn't be possible */
1197
1198 return err;
1199 }
1200
xe_vm_remove_vma(struct xe_vm * vm,struct xe_vma * vma)1201 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1202 {
1203 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1204 lockdep_assert_held(&vm->lock);
1205
1206 mutex_lock(&vm->snap_mutex);
1207 drm_gpuva_remove(&vma->gpuva);
1208 mutex_unlock(&vm->snap_mutex);
1209 if (vm->usm.last_fault_vma == vma)
1210 vm->usm.last_fault_vma = NULL;
1211 }
1212
xe_vm_op_alloc(void)1213 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1214 {
1215 struct xe_vma_op *op;
1216
1217 op = kzalloc(sizeof(*op), GFP_KERNEL);
1218
1219 if (unlikely(!op))
1220 return NULL;
1221
1222 return &op->base;
1223 }
1224
1225 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1226
1227 static const struct drm_gpuvm_ops gpuvm_ops = {
1228 .op_alloc = xe_vm_op_alloc,
1229 .vm_bo_validate = xe_gpuvm_validate,
1230 .vm_free = xe_vm_free,
1231 };
1232
pde_encode_pat_index(u16 pat_index)1233 static u64 pde_encode_pat_index(u16 pat_index)
1234 {
1235 u64 pte = 0;
1236
1237 if (pat_index & BIT(0))
1238 pte |= XE_PPGTT_PTE_PAT0;
1239
1240 if (pat_index & BIT(1))
1241 pte |= XE_PPGTT_PTE_PAT1;
1242
1243 return pte;
1244 }
1245
pte_encode_pat_index(u16 pat_index,u32 pt_level)1246 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
1247 {
1248 u64 pte = 0;
1249
1250 if (pat_index & BIT(0))
1251 pte |= XE_PPGTT_PTE_PAT0;
1252
1253 if (pat_index & BIT(1))
1254 pte |= XE_PPGTT_PTE_PAT1;
1255
1256 if (pat_index & BIT(2)) {
1257 if (pt_level)
1258 pte |= XE_PPGTT_PDE_PDPE_PAT2;
1259 else
1260 pte |= XE_PPGTT_PTE_PAT2;
1261 }
1262
1263 if (pat_index & BIT(3))
1264 pte |= XELPG_PPGTT_PTE_PAT3;
1265
1266 if (pat_index & (BIT(4)))
1267 pte |= XE2_PPGTT_PTE_PAT4;
1268
1269 return pte;
1270 }
1271
pte_encode_ps(u32 pt_level)1272 static u64 pte_encode_ps(u32 pt_level)
1273 {
1274 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1275
1276 if (pt_level == 1)
1277 return XE_PDE_PS_2M;
1278 else if (pt_level == 2)
1279 return XE_PDPE_PS_1G;
1280
1281 return 0;
1282 }
1283
pde_pat_index(struct xe_bo * bo)1284 static u16 pde_pat_index(struct xe_bo *bo)
1285 {
1286 struct xe_device *xe = xe_bo_device(bo);
1287 u16 pat_index;
1288
1289 /*
1290 * We only have two bits to encode the PAT index in non-leaf nodes, but
1291 * these only point to other paging structures so we only need a minimal
1292 * selection of options. The user PAT index is only for encoding leaf
1293 * nodes, where we have use of more bits to do the encoding. The
1294 * non-leaf nodes are instead under driver control so the chosen index
1295 * here should be distinct from the user PAT index. Also the
1296 * corresponding coherency of the PAT index should be tied to the
1297 * allocation type of the page table (or at least we should pick
1298 * something which is always safe).
1299 */
1300 if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached)
1301 pat_index = xe->pat.idx[XE_CACHE_WB];
1302 else
1303 pat_index = xe->pat.idx[XE_CACHE_NONE];
1304
1305 xe_assert(xe, pat_index <= 3);
1306
1307 return pat_index;
1308 }
1309
xelp_pde_encode_bo(struct xe_bo * bo,u64 bo_offset)1310 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset)
1311 {
1312 u64 pde;
1313
1314 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1315 pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1316 pde |= pde_encode_pat_index(pde_pat_index(bo));
1317
1318 return pde;
1319 }
1320
xelp_pte_encode_bo(struct xe_bo * bo,u64 bo_offset,u16 pat_index,u32 pt_level)1321 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1322 u16 pat_index, u32 pt_level)
1323 {
1324 u64 pte;
1325
1326 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1327 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1328 pte |= pte_encode_pat_index(pat_index, pt_level);
1329 pte |= pte_encode_ps(pt_level);
1330
1331 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1332 pte |= XE_PPGTT_PTE_DM;
1333
1334 return pte;
1335 }
1336
xelp_pte_encode_vma(u64 pte,struct xe_vma * vma,u16 pat_index,u32 pt_level)1337 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1338 u16 pat_index, u32 pt_level)
1339 {
1340 pte |= XE_PAGE_PRESENT;
1341
1342 if (likely(!xe_vma_read_only(vma)))
1343 pte |= XE_PAGE_RW;
1344
1345 pte |= pte_encode_pat_index(pat_index, pt_level);
1346 pte |= pte_encode_ps(pt_level);
1347
1348 if (unlikely(xe_vma_is_null(vma)))
1349 pte |= XE_PTE_NULL;
1350
1351 return pte;
1352 }
1353
xelp_pte_encode_addr(struct xe_device * xe,u64 addr,u16 pat_index,u32 pt_level,bool devmem,u64 flags)1354 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1355 u16 pat_index,
1356 u32 pt_level, bool devmem, u64 flags)
1357 {
1358 u64 pte;
1359
1360 /* Avoid passing random bits directly as flags */
1361 xe_assert(xe, !(flags & ~XE_PTE_PS64));
1362
1363 pte = addr;
1364 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1365 pte |= pte_encode_pat_index(pat_index, pt_level);
1366 pte |= pte_encode_ps(pt_level);
1367
1368 if (devmem)
1369 pte |= XE_PPGTT_PTE_DM;
1370
1371 pte |= flags;
1372
1373 return pte;
1374 }
1375
1376 static const struct xe_pt_ops xelp_pt_ops = {
1377 .pte_encode_bo = xelp_pte_encode_bo,
1378 .pte_encode_vma = xelp_pte_encode_vma,
1379 .pte_encode_addr = xelp_pte_encode_addr,
1380 .pde_encode_bo = xelp_pde_encode_bo,
1381 };
1382
1383 static void vm_destroy_work_func(struct work_struct *w);
1384
1385 /**
1386 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1387 * given tile and vm.
1388 * @xe: xe device.
1389 * @tile: tile to set up for.
1390 * @vm: vm to set up for.
1391 * @exec: The struct drm_exec object used to lock the vm resv.
1392 *
1393 * Sets up a pagetable tree with one page-table per level and a single
1394 * leaf PTE. All pagetable entries point to the single page-table or,
1395 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1396 * writes become NOPs.
1397 *
1398 * Return: 0 on success, negative error code on error.
1399 */
xe_vm_create_scratch(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,struct drm_exec * exec)1400 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1401 struct xe_vm *vm, struct drm_exec *exec)
1402 {
1403 u8 id = tile->id;
1404 int i;
1405
1406 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1407 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec);
1408 if (IS_ERR(vm->scratch_pt[id][i])) {
1409 int err = PTR_ERR(vm->scratch_pt[id][i]);
1410
1411 vm->scratch_pt[id][i] = NULL;
1412 return err;
1413 }
1414 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1415 }
1416
1417 return 0;
1418 }
1419 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO);
1420
xe_vm_free_scratch(struct xe_vm * vm)1421 static void xe_vm_free_scratch(struct xe_vm *vm)
1422 {
1423 struct xe_tile *tile;
1424 u8 id;
1425
1426 if (!xe_vm_has_scratch(vm))
1427 return;
1428
1429 for_each_tile(tile, vm->xe, id) {
1430 u32 i;
1431
1432 if (!vm->pt_root[id])
1433 continue;
1434
1435 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1436 if (vm->scratch_pt[id][i])
1437 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1438 }
1439 }
1440
xe_vm_pt_destroy(struct xe_vm * vm)1441 static void xe_vm_pt_destroy(struct xe_vm *vm)
1442 {
1443 struct xe_tile *tile;
1444 u8 id;
1445
1446 xe_vm_assert_held(vm);
1447
1448 for_each_tile(tile, vm->xe, id) {
1449 if (vm->pt_root[id]) {
1450 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1451 vm->pt_root[id] = NULL;
1452 }
1453 }
1454 }
1455
xe_vm_create(struct xe_device * xe,u32 flags,struct xe_file * xef)1456 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
1457 {
1458 struct drm_gem_object *vm_resv_obj;
1459 struct xe_validation_ctx ctx;
1460 struct drm_exec exec;
1461 struct xe_vm *vm;
1462 int err;
1463 struct xe_tile *tile;
1464 u8 id;
1465
1466 /*
1467 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to
1468 * ever be in faulting mode.
1469 */
1470 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE)));
1471
1472 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1473 if (!vm)
1474 return ERR_PTR(-ENOMEM);
1475
1476 vm->xe = xe;
1477
1478 vm->size = 1ull << xe->info.va_bits;
1479 vm->flags = flags;
1480
1481 if (xef)
1482 vm->xef = xe_file_get(xef);
1483 /**
1484 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be
1485 * manipulated under the PXP mutex. However, the PXP mutex can be taken
1486 * under a user-VM lock when the PXP session is started at exec_queue
1487 * creation time. Those are different VMs and therefore there is no risk
1488 * of deadlock, but we need to tell lockdep that this is the case or it
1489 * will print a warning.
1490 */
1491 if (flags & XE_VM_FLAG_GSC) {
1492 static struct lock_class_key gsc_vm_key;
1493
1494 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key);
1495 } else {
1496 init_rwsem(&vm->lock);
1497 }
1498 mutex_init(&vm->snap_mutex);
1499
1500 INIT_LIST_HEAD(&vm->rebind_list);
1501
1502 INIT_LIST_HEAD(&vm->userptr.repin_list);
1503 INIT_LIST_HEAD(&vm->userptr.invalidated);
1504 spin_lock_init(&vm->userptr.invalidated_lock);
1505
1506 ttm_lru_bulk_move_init(&vm->lru_bulk_move);
1507
1508 INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1509
1510 INIT_LIST_HEAD(&vm->preempt.exec_queues);
1511 if (flags & XE_VM_FLAG_FAULT_MODE)
1512 vm->preempt.min_run_period_ms = 0;
1513 else
1514 vm->preempt.min_run_period_ms = 5;
1515
1516 for_each_tile(tile, xe, id)
1517 xe_range_fence_tree_init(&vm->rftree[id]);
1518
1519 vm->pt_ops = &xelp_pt_ops;
1520
1521 /*
1522 * Long-running workloads are not protected by the scheduler references.
1523 * By design, run_job for long-running workloads returns NULL and the
1524 * scheduler drops all the references of it, hence protecting the VM
1525 * for this case is necessary.
1526 */
1527 if (flags & XE_VM_FLAG_LR_MODE) {
1528 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1529 xe_pm_runtime_get_noresume(xe);
1530 INIT_LIST_HEAD(&vm->preempt.pm_activate_link);
1531 }
1532
1533 err = xe_svm_init(vm);
1534 if (err)
1535 goto err_no_resv;
1536
1537 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1538 if (!vm_resv_obj) {
1539 err = -ENOMEM;
1540 goto err_svm_fini;
1541 }
1542
1543 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1544 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1545
1546 drm_gem_object_put(vm_resv_obj);
1547
1548 err = 0;
1549 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
1550 err) {
1551 err = xe_vm_drm_exec_lock(vm, &exec);
1552 drm_exec_retry_on_contention(&exec);
1553
1554 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1555 vm->flags |= XE_VM_FLAG_64K;
1556
1557 for_each_tile(tile, xe, id) {
1558 if (flags & XE_VM_FLAG_MIGRATION &&
1559 tile->id != XE_VM_FLAG_TILE_ID(flags))
1560 continue;
1561
1562 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level,
1563 &exec);
1564 if (IS_ERR(vm->pt_root[id])) {
1565 err = PTR_ERR(vm->pt_root[id]);
1566 vm->pt_root[id] = NULL;
1567 xe_vm_pt_destroy(vm);
1568 drm_exec_retry_on_contention(&exec);
1569 xe_validation_retry_on_oom(&ctx, &err);
1570 break;
1571 }
1572 }
1573 if (err)
1574 break;
1575
1576 if (xe_vm_has_scratch(vm)) {
1577 for_each_tile(tile, xe, id) {
1578 if (!vm->pt_root[id])
1579 continue;
1580
1581 err = xe_vm_create_scratch(xe, tile, vm, &exec);
1582 if (err) {
1583 xe_vm_free_scratch(vm);
1584 xe_vm_pt_destroy(vm);
1585 drm_exec_retry_on_contention(&exec);
1586 xe_validation_retry_on_oom(&ctx, &err);
1587 break;
1588 }
1589 }
1590 if (err)
1591 break;
1592 vm->batch_invalidate_tlb = true;
1593 }
1594
1595 if (vm->flags & XE_VM_FLAG_LR_MODE) {
1596 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1597 vm->batch_invalidate_tlb = false;
1598 }
1599
1600 /* Fill pt_root after allocating scratch tables */
1601 for_each_tile(tile, xe, id) {
1602 if (!vm->pt_root[id])
1603 continue;
1604
1605 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1606 }
1607 }
1608 if (err)
1609 goto err_close;
1610
1611 /* Kernel migration VM shouldn't have a circular loop.. */
1612 if (!(flags & XE_VM_FLAG_MIGRATION)) {
1613 for_each_tile(tile, xe, id) {
1614 struct xe_exec_queue *q;
1615 u32 create_flags = EXEC_QUEUE_FLAG_VM;
1616
1617 if (!vm->pt_root[id])
1618 continue;
1619
1620 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0);
1621 if (IS_ERR(q)) {
1622 err = PTR_ERR(q);
1623 goto err_close;
1624 }
1625 vm->q[id] = q;
1626 }
1627 }
1628
1629 if (xef && xe->info.has_asid) {
1630 u32 asid;
1631
1632 down_write(&xe->usm.lock);
1633 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1634 XA_LIMIT(1, XE_MAX_ASID - 1),
1635 &xe->usm.next_asid, GFP_KERNEL);
1636 up_write(&xe->usm.lock);
1637 if (err < 0)
1638 goto err_close;
1639
1640 vm->usm.asid = asid;
1641 }
1642
1643 trace_xe_vm_create(vm);
1644
1645 return vm;
1646
1647 err_close:
1648 xe_vm_close_and_put(vm);
1649 return ERR_PTR(err);
1650
1651 err_svm_fini:
1652 if (flags & XE_VM_FLAG_FAULT_MODE) {
1653 vm->size = 0; /* close the vm */
1654 xe_svm_fini(vm);
1655 }
1656 err_no_resv:
1657 mutex_destroy(&vm->snap_mutex);
1658 for_each_tile(tile, xe, id)
1659 xe_range_fence_tree_fini(&vm->rftree[id]);
1660 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1661 if (vm->xef)
1662 xe_file_put(vm->xef);
1663 kfree(vm);
1664 if (flags & XE_VM_FLAG_LR_MODE)
1665 xe_pm_runtime_put(xe);
1666 return ERR_PTR(err);
1667 }
1668
xe_vm_close(struct xe_vm * vm)1669 static void xe_vm_close(struct xe_vm *vm)
1670 {
1671 struct xe_device *xe = vm->xe;
1672 bool bound;
1673 int idx;
1674
1675 bound = drm_dev_enter(&xe->drm, &idx);
1676
1677 down_write(&vm->lock);
1678 if (xe_vm_in_fault_mode(vm))
1679 xe_svm_notifier_lock(vm);
1680
1681 vm->size = 0;
1682
1683 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) {
1684 struct xe_tile *tile;
1685 struct xe_gt *gt;
1686 u8 id;
1687
1688 /* Wait for pending binds */
1689 dma_resv_wait_timeout(xe_vm_resv(vm),
1690 DMA_RESV_USAGE_BOOKKEEP,
1691 false, MAX_SCHEDULE_TIMEOUT);
1692
1693 if (bound) {
1694 for_each_tile(tile, xe, id)
1695 if (vm->pt_root[id])
1696 xe_pt_clear(xe, vm->pt_root[id]);
1697
1698 for_each_gt(gt, xe, id)
1699 xe_tlb_inval_vm(>->tlb_inval, vm);
1700 }
1701 }
1702
1703 if (xe_vm_in_fault_mode(vm))
1704 xe_svm_notifier_unlock(vm);
1705 up_write(&vm->lock);
1706
1707 if (bound)
1708 drm_dev_exit(idx);
1709 }
1710
xe_vm_close_and_put(struct xe_vm * vm)1711 void xe_vm_close_and_put(struct xe_vm *vm)
1712 {
1713 LIST_HEAD(contested);
1714 struct xe_device *xe = vm->xe;
1715 struct xe_tile *tile;
1716 struct xe_vma *vma, *next_vma;
1717 struct drm_gpuva *gpuva, *next;
1718 u8 id;
1719
1720 xe_assert(xe, !vm->preempt.num_exec_queues);
1721
1722 xe_vm_close(vm);
1723 if (xe_vm_in_preempt_fence_mode(vm)) {
1724 mutex_lock(&xe->rebind_resume_lock);
1725 list_del_init(&vm->preempt.pm_activate_link);
1726 mutex_unlock(&xe->rebind_resume_lock);
1727 flush_work(&vm->preempt.rebind_work);
1728 }
1729 if (xe_vm_in_fault_mode(vm))
1730 xe_svm_close(vm);
1731
1732 down_write(&vm->lock);
1733 for_each_tile(tile, xe, id) {
1734 if (vm->q[id]) {
1735 int i;
1736
1737 xe_exec_queue_last_fence_put(vm->q[id], vm);
1738 for_each_tlb_inval(i)
1739 xe_exec_queue_tlb_inval_last_fence_put(vm->q[id], vm, i);
1740 }
1741 }
1742 up_write(&vm->lock);
1743
1744 for_each_tile(tile, xe, id) {
1745 if (vm->q[id]) {
1746 xe_exec_queue_kill(vm->q[id]);
1747 xe_exec_queue_put(vm->q[id]);
1748 vm->q[id] = NULL;
1749 }
1750 }
1751
1752 down_write(&vm->lock);
1753 xe_vm_lock(vm, false);
1754 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1755 vma = gpuva_to_vma(gpuva);
1756
1757 if (xe_vma_has_no_bo(vma)) {
1758 xe_svm_notifier_lock(vm);
1759 vma->gpuva.flags |= XE_VMA_DESTROYED;
1760 xe_svm_notifier_unlock(vm);
1761 }
1762
1763 xe_vm_remove_vma(vm, vma);
1764
1765 /* easy case, remove from VMA? */
1766 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1767 list_del_init(&vma->combined_links.rebind);
1768 xe_vma_destroy(vma, NULL);
1769 continue;
1770 }
1771
1772 list_move_tail(&vma->combined_links.destroy, &contested);
1773 vma->gpuva.flags |= XE_VMA_DESTROYED;
1774 }
1775
1776 /*
1777 * All vm operations will add shared fences to resv.
1778 * The only exception is eviction for a shared object,
1779 * but even so, the unbind when evicted would still
1780 * install a fence to resv. Hence it's safe to
1781 * destroy the pagetables immediately.
1782 */
1783 xe_vm_free_scratch(vm);
1784 xe_vm_pt_destroy(vm);
1785 xe_vm_unlock(vm);
1786
1787 /*
1788 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1789 * Since we hold a refcount to the bo, we can remove and free
1790 * the members safely without locking.
1791 */
1792 list_for_each_entry_safe(vma, next_vma, &contested,
1793 combined_links.destroy) {
1794 list_del_init(&vma->combined_links.destroy);
1795 xe_vma_destroy_unlocked(vma);
1796 }
1797
1798 xe_svm_fini(vm);
1799
1800 up_write(&vm->lock);
1801
1802 down_write(&xe->usm.lock);
1803 if (vm->usm.asid) {
1804 void *lookup;
1805
1806 xe_assert(xe, xe->info.has_asid);
1807 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
1808
1809 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1810 xe_assert(xe, lookup == vm);
1811 }
1812 up_write(&xe->usm.lock);
1813
1814 for_each_tile(tile, xe, id)
1815 xe_range_fence_tree_fini(&vm->rftree[id]);
1816
1817 xe_vm_put(vm);
1818 }
1819
vm_destroy_work_func(struct work_struct * w)1820 static void vm_destroy_work_func(struct work_struct *w)
1821 {
1822 struct xe_vm *vm =
1823 container_of(w, struct xe_vm, destroy_work);
1824 struct xe_device *xe = vm->xe;
1825 struct xe_tile *tile;
1826 u8 id;
1827
1828 /* xe_vm_close_and_put was not called? */
1829 xe_assert(xe, !vm->size);
1830
1831 if (xe_vm_in_preempt_fence_mode(vm))
1832 flush_work(&vm->preempt.rebind_work);
1833
1834 mutex_destroy(&vm->snap_mutex);
1835
1836 if (vm->flags & XE_VM_FLAG_LR_MODE)
1837 xe_pm_runtime_put(xe);
1838
1839 for_each_tile(tile, xe, id)
1840 XE_WARN_ON(vm->pt_root[id]);
1841
1842 trace_xe_vm_free(vm);
1843
1844 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1845
1846 if (vm->xef)
1847 xe_file_put(vm->xef);
1848
1849 kfree(vm);
1850 }
1851
xe_vm_free(struct drm_gpuvm * gpuvm)1852 static void xe_vm_free(struct drm_gpuvm *gpuvm)
1853 {
1854 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1855
1856 /* To destroy the VM we need to be able to sleep */
1857 queue_work(system_unbound_wq, &vm->destroy_work);
1858 }
1859
xe_vm_lookup(struct xe_file * xef,u32 id)1860 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1861 {
1862 struct xe_vm *vm;
1863
1864 mutex_lock(&xef->vm.lock);
1865 vm = xa_load(&xef->vm.xa, id);
1866 if (vm)
1867 xe_vm_get(vm);
1868 mutex_unlock(&xef->vm.lock);
1869
1870 return vm;
1871 }
1872
xe_vm_pdp4_descriptor(struct xe_vm * vm,struct xe_tile * tile)1873 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1874 {
1875 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0);
1876 }
1877
1878 static struct xe_exec_queue *
to_wait_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)1879 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
1880 {
1881 return q ? q : vm->q[0];
1882 }
1883
1884 static struct xe_user_fence *
find_ufence_get(struct xe_sync_entry * syncs,u32 num_syncs)1885 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
1886 {
1887 unsigned int i;
1888
1889 for (i = 0; i < num_syncs; i++) {
1890 struct xe_sync_entry *e = &syncs[i];
1891
1892 if (xe_sync_is_ufence(e))
1893 return xe_sync_ufence_get(e);
1894 }
1895
1896 return NULL;
1897 }
1898
1899 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
1900 DRM_XE_VM_CREATE_FLAG_LR_MODE | \
1901 DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1902
xe_vm_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1903 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1904 struct drm_file *file)
1905 {
1906 struct xe_device *xe = to_xe_device(dev);
1907 struct xe_file *xef = to_xe_file(file);
1908 struct drm_xe_vm_create *args = data;
1909 struct xe_gt *wa_gt = xe_root_mmio_gt(xe);
1910 struct xe_vm *vm;
1911 u32 id;
1912 int err;
1913 u32 flags = 0;
1914
1915 if (XE_IOCTL_DBG(xe, args->extensions))
1916 return -EINVAL;
1917
1918 if (wa_gt && XE_GT_WA(wa_gt, 22014953428))
1919 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
1920
1921 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1922 !xe->info.has_usm))
1923 return -EINVAL;
1924
1925 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1926 return -EINVAL;
1927
1928 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1929 return -EINVAL;
1930
1931 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
1932 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1933 !xe->info.needs_scratch))
1934 return -EINVAL;
1935
1936 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
1937 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1938 return -EINVAL;
1939
1940 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
1941 flags |= XE_VM_FLAG_SCRATCH_PAGE;
1942 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
1943 flags |= XE_VM_FLAG_LR_MODE;
1944 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1945 flags |= XE_VM_FLAG_FAULT_MODE;
1946
1947 vm = xe_vm_create(xe, flags, xef);
1948 if (IS_ERR(vm))
1949 return PTR_ERR(vm);
1950
1951 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
1952 /* Warning: Security issue - never enable by default */
1953 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
1954 #endif
1955
1956 /* user id alloc must always be last in ioctl to prevent UAF */
1957 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
1958 if (err)
1959 goto err_close_and_put;
1960
1961 args->vm_id = id;
1962
1963 return 0;
1964
1965 err_close_and_put:
1966 xe_vm_close_and_put(vm);
1967
1968 return err;
1969 }
1970
xe_vm_destroy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1971 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
1972 struct drm_file *file)
1973 {
1974 struct xe_device *xe = to_xe_device(dev);
1975 struct xe_file *xef = to_xe_file(file);
1976 struct drm_xe_vm_destroy *args = data;
1977 struct xe_vm *vm;
1978 int err = 0;
1979
1980 if (XE_IOCTL_DBG(xe, args->pad) ||
1981 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1982 return -EINVAL;
1983
1984 mutex_lock(&xef->vm.lock);
1985 vm = xa_load(&xef->vm.xa, args->vm_id);
1986 if (XE_IOCTL_DBG(xe, !vm))
1987 err = -ENOENT;
1988 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
1989 err = -EBUSY;
1990 else
1991 xa_erase(&xef->vm.xa, args->vm_id);
1992 mutex_unlock(&xef->vm.lock);
1993
1994 if (!err)
1995 xe_vm_close_and_put(vm);
1996
1997 return err;
1998 }
1999
xe_vm_query_vmas(struct xe_vm * vm,u64 start,u64 end)2000 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end)
2001 {
2002 struct drm_gpuva *gpuva;
2003 u32 num_vmas = 0;
2004
2005 lockdep_assert_held(&vm->lock);
2006 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end)
2007 num_vmas++;
2008
2009 return num_vmas;
2010 }
2011
get_mem_attrs(struct xe_vm * vm,u32 * num_vmas,u64 start,u64 end,struct drm_xe_mem_range_attr * attrs)2012 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start,
2013 u64 end, struct drm_xe_mem_range_attr *attrs)
2014 {
2015 struct drm_gpuva *gpuva;
2016 int i = 0;
2017
2018 lockdep_assert_held(&vm->lock);
2019
2020 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
2021 struct xe_vma *vma = gpuva_to_vma(gpuva);
2022
2023 if (i == *num_vmas)
2024 return -ENOSPC;
2025
2026 attrs[i].start = xe_vma_start(vma);
2027 attrs[i].end = xe_vma_end(vma);
2028 attrs[i].atomic.val = vma->attr.atomic_access;
2029 attrs[i].pat_index.val = vma->attr.pat_index;
2030 attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd;
2031 attrs[i].preferred_mem_loc.migration_policy =
2032 vma->attr.preferred_loc.migration_policy;
2033
2034 i++;
2035 }
2036
2037 *num_vmas = i;
2038 return 0;
2039 }
2040
xe_vm_query_vmas_attrs_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2041 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2042 {
2043 struct xe_device *xe = to_xe_device(dev);
2044 struct xe_file *xef = to_xe_file(file);
2045 struct drm_xe_mem_range_attr *mem_attrs;
2046 struct drm_xe_vm_query_mem_range_attr *args = data;
2047 u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2048 struct xe_vm *vm;
2049 int err = 0;
2050
2051 if (XE_IOCTL_DBG(xe,
2052 ((args->num_mem_ranges == 0 &&
2053 (attrs_user || args->sizeof_mem_range_attr != 0)) ||
2054 (args->num_mem_ranges > 0 &&
2055 (!attrs_user ||
2056 args->sizeof_mem_range_attr !=
2057 sizeof(struct drm_xe_mem_range_attr))))))
2058 return -EINVAL;
2059
2060 vm = xe_vm_lookup(xef, args->vm_id);
2061 if (XE_IOCTL_DBG(xe, !vm))
2062 return -EINVAL;
2063
2064 err = down_read_interruptible(&vm->lock);
2065 if (err)
2066 goto put_vm;
2067
2068 attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2069
2070 if (args->num_mem_ranges == 0 && !attrs_user) {
2071 args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range);
2072 args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr);
2073 goto unlock_vm;
2074 }
2075
2076 mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr,
2077 GFP_KERNEL | __GFP_ACCOUNT |
2078 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
2079 if (!mem_attrs) {
2080 err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM;
2081 goto unlock_vm;
2082 }
2083
2084 memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr);
2085 err = get_mem_attrs(vm, &args->num_mem_ranges, args->start,
2086 args->start + args->range, mem_attrs);
2087 if (err)
2088 goto free_mem_attrs;
2089
2090 err = copy_to_user(attrs_user, mem_attrs,
2091 args->sizeof_mem_range_attr * args->num_mem_ranges);
2092 if (err)
2093 err = -EFAULT;
2094
2095 free_mem_attrs:
2096 kvfree(mem_attrs);
2097 unlock_vm:
2098 up_read(&vm->lock);
2099 put_vm:
2100 xe_vm_put(vm);
2101 return err;
2102 }
2103
vma_matches(struct xe_vma * vma,u64 page_addr)2104 static bool vma_matches(struct xe_vma *vma, u64 page_addr)
2105 {
2106 if (page_addr > xe_vma_end(vma) - 1 ||
2107 page_addr + SZ_4K - 1 < xe_vma_start(vma))
2108 return false;
2109
2110 return true;
2111 }
2112
2113 /**
2114 * xe_vm_find_vma_by_addr() - Find a VMA by its address
2115 *
2116 * @vm: the xe_vm the vma belongs to
2117 * @page_addr: address to look up
2118 */
xe_vm_find_vma_by_addr(struct xe_vm * vm,u64 page_addr)2119 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr)
2120 {
2121 struct xe_vma *vma = NULL;
2122
2123 if (vm->usm.last_fault_vma) { /* Fast lookup */
2124 if (vma_matches(vm->usm.last_fault_vma, page_addr))
2125 vma = vm->usm.last_fault_vma;
2126 }
2127 if (!vma)
2128 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);
2129
2130 return vma;
2131 }
2132
2133 static const u32 region_to_mem_type[] = {
2134 XE_PL_TT,
2135 XE_PL_VRAM0,
2136 XE_PL_VRAM1,
2137 };
2138
prep_vma_destroy(struct xe_vm * vm,struct xe_vma * vma,bool post_commit)2139 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
2140 bool post_commit)
2141 {
2142 xe_svm_notifier_lock(vm);
2143 vma->gpuva.flags |= XE_VMA_DESTROYED;
2144 xe_svm_notifier_unlock(vm);
2145 if (post_commit)
2146 xe_vm_remove_vma(vm, vma);
2147 }
2148
2149 #undef ULL
2150 #define ULL unsigned long long
2151
2152 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2153 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2154 {
2155 struct xe_vma *vma;
2156
2157 switch (op->op) {
2158 case DRM_GPUVA_OP_MAP:
2159 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2160 (ULL)op->map.va.addr, (ULL)op->map.va.range);
2161 break;
2162 case DRM_GPUVA_OP_REMAP:
2163 vma = gpuva_to_vma(op->remap.unmap->va);
2164 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2165 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2166 op->remap.unmap->keep ? 1 : 0);
2167 if (op->remap.prev)
2168 vm_dbg(&xe->drm,
2169 "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2170 (ULL)op->remap.prev->va.addr,
2171 (ULL)op->remap.prev->va.range);
2172 if (op->remap.next)
2173 vm_dbg(&xe->drm,
2174 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2175 (ULL)op->remap.next->va.addr,
2176 (ULL)op->remap.next->va.range);
2177 break;
2178 case DRM_GPUVA_OP_UNMAP:
2179 vma = gpuva_to_vma(op->unmap.va);
2180 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2181 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2182 op->unmap.keep ? 1 : 0);
2183 break;
2184 case DRM_GPUVA_OP_PREFETCH:
2185 vma = gpuva_to_vma(op->prefetch.va);
2186 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2187 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2188 break;
2189 default:
2190 drm_warn(&xe->drm, "NOT POSSIBLE");
2191 }
2192 }
2193 #else
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2194 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2195 {
2196 }
2197 #endif
2198
__xe_vm_needs_clear_scratch_pages(struct xe_vm * vm,u32 bind_flags)2199 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
2200 {
2201 if (!xe_vm_in_fault_mode(vm))
2202 return false;
2203
2204 if (!xe_vm_has_scratch(vm))
2205 return false;
2206
2207 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE)
2208 return false;
2209
2210 return true;
2211 }
2212
xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops * ops)2213 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops)
2214 {
2215 struct drm_gpuva_op *__op;
2216
2217 drm_gpuva_for_each_op(__op, ops) {
2218 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2219
2220 xe_vma_svm_prefetch_op_fini(op);
2221 }
2222 }
2223
2224 /*
2225 * Create operations list from IOCTL arguments, setup operations fields so parse
2226 * and commit steps are decoupled from IOCTL arguments. This step can fail.
2227 */
2228 static struct drm_gpuva_ops *
vm_bind_ioctl_ops_create(struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 addr,u64 range,u32 operation,u32 flags,u32 prefetch_region,u16 pat_index)2229 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
2230 struct xe_bo *bo, u64 bo_offset_or_userptr,
2231 u64 addr, u64 range,
2232 u32 operation, u32 flags,
2233 u32 prefetch_region, u16 pat_index)
2234 {
2235 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2236 struct drm_gpuva_ops *ops;
2237 struct drm_gpuva_op *__op;
2238 struct drm_gpuvm_bo *vm_bo;
2239 u64 range_end = addr + range;
2240 int err;
2241
2242 lockdep_assert_held_write(&vm->lock);
2243
2244 vm_dbg(&vm->xe->drm,
2245 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2246 operation, (ULL)addr, (ULL)range,
2247 (ULL)bo_offset_or_userptr);
2248
2249 switch (operation) {
2250 case DRM_XE_VM_BIND_OP_MAP:
2251 case DRM_XE_VM_BIND_OP_MAP_USERPTR: {
2252 struct drm_gpuvm_map_req map_req = {
2253 .map.va.addr = addr,
2254 .map.va.range = range,
2255 .map.gem.obj = obj,
2256 .map.gem.offset = bo_offset_or_userptr,
2257 };
2258
2259 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req);
2260 break;
2261 }
2262 case DRM_XE_VM_BIND_OP_UNMAP:
2263 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2264 break;
2265 case DRM_XE_VM_BIND_OP_PREFETCH:
2266 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2267 break;
2268 case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2269 xe_assert(vm->xe, bo);
2270
2271 err = xe_bo_lock(bo, true);
2272 if (err)
2273 return ERR_PTR(err);
2274
2275 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
2276 if (IS_ERR(vm_bo)) {
2277 xe_bo_unlock(bo);
2278 return ERR_CAST(vm_bo);
2279 }
2280
2281 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2282 drm_gpuvm_bo_put(vm_bo);
2283 xe_bo_unlock(bo);
2284 break;
2285 default:
2286 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2287 ops = ERR_PTR(-EINVAL);
2288 }
2289 if (IS_ERR(ops))
2290 return ops;
2291
2292 drm_gpuva_for_each_op(__op, ops) {
2293 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2294
2295 if (__op->op == DRM_GPUVA_OP_MAP) {
2296 op->map.immediate =
2297 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2298 if (flags & DRM_XE_VM_BIND_FLAG_READONLY)
2299 op->map.vma_flags |= XE_VMA_READ_ONLY;
2300 if (flags & DRM_XE_VM_BIND_FLAG_NULL)
2301 op->map.vma_flags |= DRM_GPUVA_SPARSE;
2302 if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR)
2303 op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR;
2304 if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE)
2305 op->map.vma_flags |= XE_VMA_DUMPABLE;
2306 if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET)
2307 op->map.vma_flags |= XE_VMA_MADV_AUTORESET;
2308 op->map.pat_index = pat_index;
2309 op->map.invalidate_on_bind =
2310 __xe_vm_needs_clear_scratch_pages(vm, flags);
2311 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2312 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2313 struct xe_tile *tile;
2314 struct xe_svm_range *svm_range;
2315 struct drm_gpusvm_ctx ctx = {};
2316 struct drm_pagemap *dpagemap;
2317 u8 id, tile_mask = 0;
2318 u32 i;
2319
2320 if (!xe_vma_is_cpu_addr_mirror(vma)) {
2321 op->prefetch.region = prefetch_region;
2322 break;
2323 }
2324
2325 ctx.read_only = xe_vma_read_only(vma);
2326 ctx.devmem_possible = IS_DGFX(vm->xe) &&
2327 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2328
2329 for_each_tile(tile, vm->xe, id)
2330 tile_mask |= 0x1 << id;
2331
2332 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC);
2333 op->prefetch_range.ranges_count = 0;
2334 tile = NULL;
2335
2336 if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) {
2337 dpagemap = xe_vma_resolve_pagemap(vma,
2338 xe_device_get_root_tile(vm->xe));
2339 /*
2340 * TODO: Once multigpu support is enabled will need
2341 * something to dereference tile from dpagemap.
2342 */
2343 if (dpagemap)
2344 tile = xe_device_get_root_tile(vm->xe);
2345 } else if (prefetch_region) {
2346 tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] -
2347 XE_PL_VRAM0];
2348 }
2349
2350 op->prefetch_range.tile = tile;
2351 alloc_next_range:
2352 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
2353
2354 if (PTR_ERR(svm_range) == -ENOENT) {
2355 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma);
2356
2357 addr = ret == ULONG_MAX ? 0 : ret;
2358 if (addr)
2359 goto alloc_next_range;
2360 else
2361 goto print_op_label;
2362 }
2363
2364 if (IS_ERR(svm_range)) {
2365 err = PTR_ERR(svm_range);
2366 goto unwind_prefetch_ops;
2367 }
2368
2369 if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) {
2370 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID");
2371 goto check_next_range;
2372 }
2373
2374 err = xa_alloc(&op->prefetch_range.range,
2375 &i, svm_range, xa_limit_32b,
2376 GFP_KERNEL);
2377
2378 if (err)
2379 goto unwind_prefetch_ops;
2380
2381 op->prefetch_range.ranges_count++;
2382 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH;
2383 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED");
2384 check_next_range:
2385 if (range_end > xe_svm_range_end(svm_range) &&
2386 xe_svm_range_end(svm_range) < xe_vma_end(vma)) {
2387 addr = xe_svm_range_end(svm_range);
2388 goto alloc_next_range;
2389 }
2390 }
2391 print_op_label:
2392 print_op(vm->xe, __op);
2393 }
2394
2395 return ops;
2396
2397 unwind_prefetch_ops:
2398 xe_svm_prefetch_gpuva_ops_fini(ops);
2399 drm_gpuva_ops_free(&vm->gpuvm, ops);
2400 return ERR_PTR(err);
2401 }
2402
2403 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
2404
new_vma(struct xe_vm * vm,struct drm_gpuva_op_map * op,struct xe_vma_mem_attr * attr,unsigned int flags)2405 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2406 struct xe_vma_mem_attr *attr, unsigned int flags)
2407 {
2408 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2409 struct xe_validation_ctx ctx;
2410 struct drm_exec exec;
2411 struct xe_vma *vma;
2412 int err = 0;
2413
2414 lockdep_assert_held_write(&vm->lock);
2415
2416 if (bo) {
2417 err = 0;
2418 xe_validation_guard(&ctx, &vm->xe->val, &exec,
2419 (struct xe_val_flags) {.interruptible = true}, err) {
2420 if (!bo->vm) {
2421 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2422 drm_exec_retry_on_contention(&exec);
2423 }
2424 if (!err) {
2425 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2426 drm_exec_retry_on_contention(&exec);
2427 }
2428 if (err)
2429 return ERR_PTR(err);
2430
2431 vma = xe_vma_create(vm, bo, op->gem.offset,
2432 op->va.addr, op->va.addr +
2433 op->va.range - 1, attr, flags);
2434 if (IS_ERR(vma))
2435 return vma;
2436
2437 if (!bo->vm) {
2438 err = add_preempt_fences(vm, bo);
2439 if (err) {
2440 prep_vma_destroy(vm, vma, false);
2441 xe_vma_destroy(vma, NULL);
2442 }
2443 }
2444 }
2445 if (err)
2446 return ERR_PTR(err);
2447 } else {
2448 vma = xe_vma_create(vm, NULL, op->gem.offset,
2449 op->va.addr, op->va.addr +
2450 op->va.range - 1, attr, flags);
2451 if (IS_ERR(vma))
2452 return vma;
2453
2454 if (xe_vma_is_userptr(vma))
2455 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2456 }
2457 if (err) {
2458 prep_vma_destroy(vm, vma, false);
2459 xe_vma_destroy_unlocked(vma);
2460 vma = ERR_PTR(err);
2461 }
2462
2463 return vma;
2464 }
2465
xe_vma_max_pte_size(struct xe_vma * vma)2466 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2467 {
2468 if (vma->gpuva.flags & XE_VMA_PTE_1G)
2469 return SZ_1G;
2470 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2471 return SZ_2M;
2472 else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2473 return SZ_64K;
2474 else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2475 return SZ_4K;
2476
2477 return SZ_1G; /* Uninitialized, used max size */
2478 }
2479
xe_vma_set_pte_size(struct xe_vma * vma,u64 size)2480 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2481 {
2482 switch (size) {
2483 case SZ_1G:
2484 vma->gpuva.flags |= XE_VMA_PTE_1G;
2485 break;
2486 case SZ_2M:
2487 vma->gpuva.flags |= XE_VMA_PTE_2M;
2488 break;
2489 case SZ_64K:
2490 vma->gpuva.flags |= XE_VMA_PTE_64K;
2491 break;
2492 case SZ_4K:
2493 vma->gpuva.flags |= XE_VMA_PTE_4K;
2494 break;
2495 }
2496 }
2497
xe_vma_op_commit(struct xe_vm * vm,struct xe_vma_op * op)2498 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2499 {
2500 int err = 0;
2501
2502 lockdep_assert_held_write(&vm->lock);
2503
2504 switch (op->base.op) {
2505 case DRM_GPUVA_OP_MAP:
2506 err |= xe_vm_insert_vma(vm, op->map.vma);
2507 if (!err)
2508 op->flags |= XE_VMA_OP_COMMITTED;
2509 break;
2510 case DRM_GPUVA_OP_REMAP:
2511 {
2512 u8 tile_present =
2513 gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2514
2515 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2516 true);
2517 op->flags |= XE_VMA_OP_COMMITTED;
2518
2519 if (op->remap.prev) {
2520 err |= xe_vm_insert_vma(vm, op->remap.prev);
2521 if (!err)
2522 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2523 if (!err && op->remap.skip_prev) {
2524 op->remap.prev->tile_present =
2525 tile_present;
2526 op->remap.prev = NULL;
2527 }
2528 }
2529 if (op->remap.next) {
2530 err |= xe_vm_insert_vma(vm, op->remap.next);
2531 if (!err)
2532 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2533 if (!err && op->remap.skip_next) {
2534 op->remap.next->tile_present =
2535 tile_present;
2536 op->remap.next = NULL;
2537 }
2538 }
2539
2540 /* Adjust for partial unbind after removing VMA from VM */
2541 if (!err) {
2542 op->base.remap.unmap->va->va.addr = op->remap.start;
2543 op->base.remap.unmap->va->va.range = op->remap.range;
2544 }
2545 break;
2546 }
2547 case DRM_GPUVA_OP_UNMAP:
2548 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2549 op->flags |= XE_VMA_OP_COMMITTED;
2550 break;
2551 case DRM_GPUVA_OP_PREFETCH:
2552 op->flags |= XE_VMA_OP_COMMITTED;
2553 break;
2554 default:
2555 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2556 }
2557
2558 return err;
2559 }
2560
2561 /**
2562 * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes
2563 * @vma: Pointer to the xe_vma structure to check
2564 *
2565 * This function determines whether the given VMA (Virtual Memory Area)
2566 * has its memory attributes set to their default values. Specifically,
2567 * it checks the following conditions:
2568 *
2569 * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED`
2570 * - `pat_index` is equal to `default_pat_index`
2571 * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE`
2572 * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES`
2573 *
2574 * Return: true if all attributes are at their default values, false otherwise.
2575 */
xe_vma_has_default_mem_attrs(struct xe_vma * vma)2576 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma)
2577 {
2578 return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED &&
2579 vma->attr.pat_index == vma->attr.default_pat_index &&
2580 vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE &&
2581 vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES);
2582 }
2583
vm_bind_ioctl_ops_parse(struct xe_vm * vm,struct drm_gpuva_ops * ops,struct xe_vma_ops * vops)2584 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
2585 struct xe_vma_ops *vops)
2586 {
2587 struct xe_device *xe = vm->xe;
2588 struct drm_gpuva_op *__op;
2589 struct xe_tile *tile;
2590 u8 id, tile_mask = 0;
2591 int err = 0;
2592
2593 lockdep_assert_held_write(&vm->lock);
2594
2595 for_each_tile(tile, vm->xe, id)
2596 tile_mask |= 0x1 << id;
2597
2598 drm_gpuva_for_each_op(__op, ops) {
2599 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2600 struct xe_vma *vma;
2601 unsigned int flags = 0;
2602
2603 INIT_LIST_HEAD(&op->link);
2604 list_add_tail(&op->link, &vops->list);
2605 op->tile_mask = tile_mask;
2606
2607 switch (op->base.op) {
2608 case DRM_GPUVA_OP_MAP:
2609 {
2610 struct xe_vma_mem_attr default_attr = {
2611 .preferred_loc = {
2612 .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE,
2613 .migration_policy = DRM_XE_MIGRATE_ALL_PAGES,
2614 },
2615 .atomic_access = DRM_XE_ATOMIC_UNDEFINED,
2616 .default_pat_index = op->map.pat_index,
2617 .pat_index = op->map.pat_index,
2618 };
2619
2620 flags |= op->map.vma_flags & XE_VMA_CREATE_MASK;
2621
2622 vma = new_vma(vm, &op->base.map, &default_attr,
2623 flags);
2624 if (IS_ERR(vma))
2625 return PTR_ERR(vma);
2626
2627 op->map.vma = vma;
2628 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) &&
2629 !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) ||
2630 op->map.invalidate_on_bind)
2631 xe_vma_ops_incr_pt_update_ops(vops,
2632 op->tile_mask, 1);
2633 break;
2634 }
2635 case DRM_GPUVA_OP_REMAP:
2636 {
2637 struct xe_vma *old =
2638 gpuva_to_vma(op->base.remap.unmap->va);
2639 bool skip = xe_vma_is_cpu_addr_mirror(old);
2640 u64 start = xe_vma_start(old), end = xe_vma_end(old);
2641 int num_remap_ops = 0;
2642
2643 if (op->base.remap.prev)
2644 start = op->base.remap.prev->va.addr +
2645 op->base.remap.prev->va.range;
2646 if (op->base.remap.next)
2647 end = op->base.remap.next->va.addr;
2648
2649 if (xe_vma_is_cpu_addr_mirror(old) &&
2650 xe_svm_has_mapping(vm, start, end)) {
2651 if (vops->flags & XE_VMA_OPS_FLAG_MADVISE)
2652 xe_svm_unmap_address_range(vm, start, end);
2653 else
2654 return -EBUSY;
2655 }
2656
2657 op->remap.start = xe_vma_start(old);
2658 op->remap.range = xe_vma_size(old);
2659
2660 flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK;
2661 if (op->base.remap.prev) {
2662 vma = new_vma(vm, op->base.remap.prev,
2663 &old->attr, flags);
2664 if (IS_ERR(vma))
2665 return PTR_ERR(vma);
2666
2667 op->remap.prev = vma;
2668
2669 /*
2670 * Userptr creates a new SG mapping so
2671 * we must also rebind.
2672 */
2673 op->remap.skip_prev = skip ||
2674 (!xe_vma_is_userptr(old) &&
2675 IS_ALIGNED(xe_vma_end(vma),
2676 xe_vma_max_pte_size(old)));
2677 if (op->remap.skip_prev) {
2678 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2679 op->remap.range -=
2680 xe_vma_end(vma) -
2681 xe_vma_start(old);
2682 op->remap.start = xe_vma_end(vma);
2683 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2684 (ULL)op->remap.start,
2685 (ULL)op->remap.range);
2686 } else {
2687 num_remap_ops++;
2688 }
2689 }
2690
2691 if (op->base.remap.next) {
2692 vma = new_vma(vm, op->base.remap.next,
2693 &old->attr, flags);
2694 if (IS_ERR(vma))
2695 return PTR_ERR(vma);
2696
2697 op->remap.next = vma;
2698
2699 /*
2700 * Userptr creates a new SG mapping so
2701 * we must also rebind.
2702 */
2703 op->remap.skip_next = skip ||
2704 (!xe_vma_is_userptr(old) &&
2705 IS_ALIGNED(xe_vma_start(vma),
2706 xe_vma_max_pte_size(old)));
2707 if (op->remap.skip_next) {
2708 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2709 op->remap.range -=
2710 xe_vma_end(old) -
2711 xe_vma_start(vma);
2712 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2713 (ULL)op->remap.start,
2714 (ULL)op->remap.range);
2715 } else {
2716 num_remap_ops++;
2717 }
2718 }
2719 if (!skip)
2720 num_remap_ops++;
2721
2722 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops);
2723 break;
2724 }
2725 case DRM_GPUVA_OP_UNMAP:
2726 vma = gpuva_to_vma(op->base.unmap.va);
2727
2728 if (xe_vma_is_cpu_addr_mirror(vma) &&
2729 xe_svm_has_mapping(vm, xe_vma_start(vma),
2730 xe_vma_end(vma)))
2731 return -EBUSY;
2732
2733 if (!xe_vma_is_cpu_addr_mirror(vma))
2734 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2735 break;
2736 case DRM_GPUVA_OP_PREFETCH:
2737 vma = gpuva_to_vma(op->base.prefetch.va);
2738
2739 if (xe_vma_is_userptr(vma)) {
2740 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2741 if (err)
2742 return err;
2743 }
2744
2745 if (xe_vma_is_cpu_addr_mirror(vma))
2746 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask,
2747 op->prefetch_range.ranges_count);
2748 else
2749 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2750
2751 break;
2752 default:
2753 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2754 }
2755
2756 err = xe_vma_op_commit(vm, op);
2757 if (err)
2758 return err;
2759 }
2760
2761 return 0;
2762 }
2763
xe_vma_op_unwind(struct xe_vm * vm,struct xe_vma_op * op,bool post_commit,bool prev_post_commit,bool next_post_commit)2764 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2765 bool post_commit, bool prev_post_commit,
2766 bool next_post_commit)
2767 {
2768 lockdep_assert_held_write(&vm->lock);
2769
2770 switch (op->base.op) {
2771 case DRM_GPUVA_OP_MAP:
2772 if (op->map.vma) {
2773 prep_vma_destroy(vm, op->map.vma, post_commit);
2774 xe_vma_destroy_unlocked(op->map.vma);
2775 }
2776 break;
2777 case DRM_GPUVA_OP_UNMAP:
2778 {
2779 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2780
2781 if (vma) {
2782 xe_svm_notifier_lock(vm);
2783 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2784 xe_svm_notifier_unlock(vm);
2785 if (post_commit)
2786 xe_vm_insert_vma(vm, vma);
2787 }
2788 break;
2789 }
2790 case DRM_GPUVA_OP_REMAP:
2791 {
2792 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2793
2794 if (op->remap.prev) {
2795 prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2796 xe_vma_destroy_unlocked(op->remap.prev);
2797 }
2798 if (op->remap.next) {
2799 prep_vma_destroy(vm, op->remap.next, next_post_commit);
2800 xe_vma_destroy_unlocked(op->remap.next);
2801 }
2802 if (vma) {
2803 xe_svm_notifier_lock(vm);
2804 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2805 xe_svm_notifier_unlock(vm);
2806 if (post_commit)
2807 xe_vm_insert_vma(vm, vma);
2808 }
2809 break;
2810 }
2811 case DRM_GPUVA_OP_PREFETCH:
2812 /* Nothing to do */
2813 break;
2814 default:
2815 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2816 }
2817 }
2818
vm_bind_ioctl_ops_unwind(struct xe_vm * vm,struct drm_gpuva_ops ** ops,int num_ops_list)2819 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2820 struct drm_gpuva_ops **ops,
2821 int num_ops_list)
2822 {
2823 int i;
2824
2825 for (i = num_ops_list - 1; i >= 0; --i) {
2826 struct drm_gpuva_ops *__ops = ops[i];
2827 struct drm_gpuva_op *__op;
2828
2829 if (!__ops)
2830 continue;
2831
2832 drm_gpuva_for_each_op_reverse(__op, __ops) {
2833 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2834
2835 xe_vma_op_unwind(vm, op,
2836 op->flags & XE_VMA_OP_COMMITTED,
2837 op->flags & XE_VMA_OP_PREV_COMMITTED,
2838 op->flags & XE_VMA_OP_NEXT_COMMITTED);
2839 }
2840 }
2841 }
2842
vma_lock_and_validate(struct drm_exec * exec,struct xe_vma * vma,bool res_evict,bool validate)2843 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
2844 bool res_evict, bool validate)
2845 {
2846 struct xe_bo *bo = xe_vma_bo(vma);
2847 struct xe_vm *vm = xe_vma_vm(vma);
2848 int err = 0;
2849
2850 if (bo) {
2851 if (!bo->vm)
2852 err = drm_exec_lock_obj(exec, &bo->ttm.base);
2853 if (!err && validate)
2854 err = xe_bo_validate(bo, vm,
2855 !xe_vm_in_preempt_fence_mode(vm) &&
2856 res_evict, exec);
2857 }
2858
2859 return err;
2860 }
2861
check_ufence(struct xe_vma * vma)2862 static int check_ufence(struct xe_vma *vma)
2863 {
2864 if (vma->ufence) {
2865 struct xe_user_fence * const f = vma->ufence;
2866
2867 if (!xe_sync_ufence_get_status(f))
2868 return -EBUSY;
2869
2870 vma->ufence = NULL;
2871 xe_sync_ufence_put(f);
2872 }
2873
2874 return 0;
2875 }
2876
prefetch_ranges(struct xe_vm * vm,struct xe_vma_op * op)2877 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
2878 {
2879 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2880 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2881 struct xe_tile *tile = op->prefetch_range.tile;
2882 int err = 0;
2883
2884 struct xe_svm_range *svm_range;
2885 struct drm_gpusvm_ctx ctx = {};
2886 unsigned long i;
2887
2888 if (!xe_vma_is_cpu_addr_mirror(vma))
2889 return 0;
2890
2891 ctx.read_only = xe_vma_read_only(vma);
2892 ctx.devmem_possible = devmem_possible;
2893 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
2894 ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe);
2895
2896 /* TODO: Threading the migration */
2897 xa_for_each(&op->prefetch_range.range, i, svm_range) {
2898 if (!tile)
2899 xe_svm_range_migrate_to_smem(vm, svm_range);
2900
2901 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) {
2902 err = xe_svm_alloc_vram(tile, svm_range, &ctx);
2903 if (err) {
2904 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
2905 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2906 return -ENODATA;
2907 }
2908 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM");
2909 }
2910
2911 err = xe_svm_range_get_pages(vm, svm_range, &ctx);
2912 if (err) {
2913 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
2914 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2915 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
2916 err = -ENODATA;
2917 return err;
2918 }
2919 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE");
2920 }
2921
2922 return err;
2923 }
2924
op_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_vma_op * op)2925 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
2926 struct xe_vma_ops *vops, struct xe_vma_op *op)
2927 {
2928 int err = 0;
2929 bool res_evict;
2930
2931 /*
2932 * We only allow evicting a BO within the VM if it is not part of an
2933 * array of binds, as an array of binds can evict another BO within the
2934 * bind.
2935 */
2936 res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS);
2937
2938 switch (op->base.op) {
2939 case DRM_GPUVA_OP_MAP:
2940 if (!op->map.invalidate_on_bind)
2941 err = vma_lock_and_validate(exec, op->map.vma,
2942 res_evict,
2943 !xe_vm_in_fault_mode(vm) ||
2944 op->map.immediate);
2945 break;
2946 case DRM_GPUVA_OP_REMAP:
2947 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
2948 if (err)
2949 break;
2950
2951 err = vma_lock_and_validate(exec,
2952 gpuva_to_vma(op->base.remap.unmap->va),
2953 res_evict, false);
2954 if (!err && op->remap.prev)
2955 err = vma_lock_and_validate(exec, op->remap.prev,
2956 res_evict, true);
2957 if (!err && op->remap.next)
2958 err = vma_lock_and_validate(exec, op->remap.next,
2959 res_evict, true);
2960 break;
2961 case DRM_GPUVA_OP_UNMAP:
2962 err = check_ufence(gpuva_to_vma(op->base.unmap.va));
2963 if (err)
2964 break;
2965
2966 err = vma_lock_and_validate(exec,
2967 gpuva_to_vma(op->base.unmap.va),
2968 res_evict, false);
2969 break;
2970 case DRM_GPUVA_OP_PREFETCH:
2971 {
2972 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2973 u32 region;
2974
2975 if (!xe_vma_is_cpu_addr_mirror(vma)) {
2976 region = op->prefetch.region;
2977 xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC ||
2978 region <= ARRAY_SIZE(region_to_mem_type));
2979 }
2980
2981 err = vma_lock_and_validate(exec,
2982 gpuva_to_vma(op->base.prefetch.va),
2983 res_evict, false);
2984 if (!err && !xe_vma_has_no_bo(vma))
2985 err = xe_bo_migrate(xe_vma_bo(vma),
2986 region_to_mem_type[region],
2987 NULL,
2988 exec);
2989 break;
2990 }
2991 default:
2992 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2993 }
2994
2995 return err;
2996 }
2997
vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm * vm,struct xe_vma_ops * vops)2998 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops)
2999 {
3000 struct xe_vma_op *op;
3001 int err;
3002
3003 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
3004 return 0;
3005
3006 list_for_each_entry(op, &vops->list, link) {
3007 if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
3008 err = prefetch_ranges(vm, op);
3009 if (err)
3010 return err;
3011 }
3012 }
3013
3014 return 0;
3015 }
3016
vm_bind_ioctl_ops_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops)3017 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
3018 struct xe_vm *vm,
3019 struct xe_vma_ops *vops)
3020 {
3021 struct xe_vma_op *op;
3022 int err;
3023
3024 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
3025 if (err)
3026 return err;
3027
3028 list_for_each_entry(op, &vops->list, link) {
3029 err = op_lock_and_prep(exec, vm, vops, op);
3030 if (err)
3031 return err;
3032 }
3033
3034 #ifdef TEST_VM_OPS_ERROR
3035 if (vops->inject_error &&
3036 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
3037 return -ENOSPC;
3038 #endif
3039
3040 return 0;
3041 }
3042
op_trace(struct xe_vma_op * op)3043 static void op_trace(struct xe_vma_op *op)
3044 {
3045 switch (op->base.op) {
3046 case DRM_GPUVA_OP_MAP:
3047 trace_xe_vma_bind(op->map.vma);
3048 break;
3049 case DRM_GPUVA_OP_REMAP:
3050 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
3051 if (op->remap.prev)
3052 trace_xe_vma_bind(op->remap.prev);
3053 if (op->remap.next)
3054 trace_xe_vma_bind(op->remap.next);
3055 break;
3056 case DRM_GPUVA_OP_UNMAP:
3057 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
3058 break;
3059 case DRM_GPUVA_OP_PREFETCH:
3060 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
3061 break;
3062 case DRM_GPUVA_OP_DRIVER:
3063 break;
3064 default:
3065 XE_WARN_ON("NOT POSSIBLE");
3066 }
3067 }
3068
trace_xe_vm_ops_execute(struct xe_vma_ops * vops)3069 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
3070 {
3071 struct xe_vma_op *op;
3072
3073 list_for_each_entry(op, &vops->list, link)
3074 op_trace(op);
3075 }
3076
vm_ops_setup_tile_args(struct xe_vm * vm,struct xe_vma_ops * vops)3077 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
3078 {
3079 struct xe_exec_queue *q = vops->q;
3080 struct xe_tile *tile;
3081 int number_tiles = 0;
3082 u8 id;
3083
3084 for_each_tile(tile, vm->xe, id) {
3085 if (vops->pt_update_ops[id].num_ops)
3086 ++number_tiles;
3087
3088 if (vops->pt_update_ops[id].q)
3089 continue;
3090
3091 if (q) {
3092 vops->pt_update_ops[id].q = q;
3093 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
3094 q = list_next_entry(q, multi_gt_list);
3095 } else {
3096 vops->pt_update_ops[id].q = vm->q[id];
3097 }
3098 }
3099
3100 return number_tiles;
3101 }
3102
ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3103 static struct dma_fence *ops_execute(struct xe_vm *vm,
3104 struct xe_vma_ops *vops)
3105 {
3106 struct xe_tile *tile;
3107 struct dma_fence *fence = NULL;
3108 struct dma_fence **fences = NULL;
3109 struct dma_fence_array *cf = NULL;
3110 int number_tiles = 0, current_fence = 0, n_fence = 0, err;
3111 u8 id;
3112
3113 number_tiles = vm_ops_setup_tile_args(vm, vops);
3114 if (number_tiles == 0)
3115 return ERR_PTR(-ENODATA);
3116
3117 if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT) {
3118 for_each_tile(tile, vm->xe, id)
3119 ++n_fence;
3120 } else {
3121 for_each_tile(tile, vm->xe, id)
3122 n_fence += (1 + XE_MAX_GT_PER_TILE);
3123 }
3124
3125 fences = kmalloc_array(n_fence, sizeof(*fences), GFP_KERNEL);
3126 if (!fences) {
3127 fence = ERR_PTR(-ENOMEM);
3128 goto err_trace;
3129 }
3130
3131 cf = dma_fence_array_alloc(n_fence);
3132 if (!cf) {
3133 fence = ERR_PTR(-ENOMEM);
3134 goto err_out;
3135 }
3136
3137 for_each_tile(tile, vm->xe, id) {
3138 if (!vops->pt_update_ops[id].num_ops)
3139 continue;
3140
3141 err = xe_pt_update_ops_prepare(tile, vops);
3142 if (err) {
3143 fence = ERR_PTR(err);
3144 goto err_out;
3145 }
3146 }
3147
3148 trace_xe_vm_ops_execute(vops);
3149
3150 for_each_tile(tile, vm->xe, id) {
3151 struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q;
3152 int i;
3153
3154 fence = NULL;
3155 if (!vops->pt_update_ops[id].num_ops)
3156 goto collect_fences;
3157
3158 fence = xe_pt_update_ops_run(tile, vops);
3159 if (IS_ERR(fence))
3160 goto err_out;
3161
3162 collect_fences:
3163 fences[current_fence++] = fence ?: dma_fence_get_stub();
3164 if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT)
3165 continue;
3166
3167 xe_migrate_job_lock(tile->migrate, q);
3168 for_each_tlb_inval(i)
3169 fences[current_fence++] =
3170 xe_exec_queue_tlb_inval_last_fence_get(q, vm, i);
3171 xe_migrate_job_unlock(tile->migrate, q);
3172 }
3173
3174 xe_assert(vm->xe, current_fence == n_fence);
3175 dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1),
3176 1, false);
3177 fence = &cf->base;
3178
3179 for_each_tile(tile, vm->xe, id) {
3180 if (!vops->pt_update_ops[id].num_ops)
3181 continue;
3182
3183 xe_pt_update_ops_fini(tile, vops);
3184 }
3185
3186 return fence;
3187
3188 err_out:
3189 for_each_tile(tile, vm->xe, id) {
3190 if (!vops->pt_update_ops[id].num_ops)
3191 continue;
3192
3193 xe_pt_update_ops_abort(tile, vops);
3194 }
3195 while (current_fence)
3196 dma_fence_put(fences[--current_fence]);
3197 kfree(fences);
3198 kfree(cf);
3199
3200 err_trace:
3201 trace_xe_vm_ops_fail(vm);
3202 return fence;
3203 }
3204
vma_add_ufence(struct xe_vma * vma,struct xe_user_fence * ufence)3205 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
3206 {
3207 if (vma->ufence)
3208 xe_sync_ufence_put(vma->ufence);
3209 vma->ufence = __xe_sync_ufence_get(ufence);
3210 }
3211
op_add_ufence(struct xe_vm * vm,struct xe_vma_op * op,struct xe_user_fence * ufence)3212 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
3213 struct xe_user_fence *ufence)
3214 {
3215 switch (op->base.op) {
3216 case DRM_GPUVA_OP_MAP:
3217 vma_add_ufence(op->map.vma, ufence);
3218 break;
3219 case DRM_GPUVA_OP_REMAP:
3220 if (op->remap.prev)
3221 vma_add_ufence(op->remap.prev, ufence);
3222 if (op->remap.next)
3223 vma_add_ufence(op->remap.next, ufence);
3224 break;
3225 case DRM_GPUVA_OP_UNMAP:
3226 break;
3227 case DRM_GPUVA_OP_PREFETCH:
3228 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
3229 break;
3230 default:
3231 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
3232 }
3233 }
3234
vm_bind_ioctl_ops_fini(struct xe_vm * vm,struct xe_vma_ops * vops,struct dma_fence * fence)3235 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
3236 struct dma_fence *fence)
3237 {
3238 struct xe_user_fence *ufence;
3239 struct xe_vma_op *op;
3240 int i;
3241
3242 ufence = find_ufence_get(vops->syncs, vops->num_syncs);
3243 list_for_each_entry(op, &vops->list, link) {
3244 if (ufence)
3245 op_add_ufence(vm, op, ufence);
3246
3247 if (op->base.op == DRM_GPUVA_OP_UNMAP)
3248 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
3249 else if (op->base.op == DRM_GPUVA_OP_REMAP)
3250 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
3251 fence);
3252 }
3253 if (ufence)
3254 xe_sync_ufence_put(ufence);
3255 if (fence) {
3256 for (i = 0; i < vops->num_syncs; i++)
3257 xe_sync_entry_signal(vops->syncs + i, fence);
3258 }
3259 }
3260
vm_bind_ioctl_ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3261 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
3262 struct xe_vma_ops *vops)
3263 {
3264 struct xe_validation_ctx ctx;
3265 struct drm_exec exec;
3266 struct dma_fence *fence;
3267 int err = 0;
3268
3269 lockdep_assert_held_write(&vm->lock);
3270
3271 xe_validation_guard(&ctx, &vm->xe->val, &exec,
3272 ((struct xe_val_flags) {
3273 .interruptible = true,
3274 .exec_ignore_duplicates = true,
3275 }), err) {
3276 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
3277 drm_exec_retry_on_contention(&exec);
3278 xe_validation_retry_on_oom(&ctx, &err);
3279 if (err)
3280 return ERR_PTR(err);
3281
3282 xe_vm_set_validation_exec(vm, &exec);
3283 fence = ops_execute(vm, vops);
3284 xe_vm_set_validation_exec(vm, NULL);
3285 if (IS_ERR(fence)) {
3286 if (PTR_ERR(fence) == -ENODATA)
3287 vm_bind_ioctl_ops_fini(vm, vops, NULL);
3288 return fence;
3289 }
3290
3291 vm_bind_ioctl_ops_fini(vm, vops, fence);
3292 }
3293
3294 return err ? ERR_PTR(err) : fence;
3295 }
3296 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
3297
3298 #define SUPPORTED_FLAGS_STUB \
3299 (DRM_XE_VM_BIND_FLAG_READONLY | \
3300 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
3301 DRM_XE_VM_BIND_FLAG_NULL | \
3302 DRM_XE_VM_BIND_FLAG_DUMPABLE | \
3303 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \
3304 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \
3305 DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET)
3306
3307 #ifdef TEST_VM_OPS_ERROR
3308 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
3309 #else
3310 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB
3311 #endif
3312
3313 #define XE_64K_PAGE_MASK 0xffffull
3314 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
3315
vm_bind_ioctl_check_args(struct xe_device * xe,struct xe_vm * vm,struct drm_xe_vm_bind * args,struct drm_xe_vm_bind_op ** bind_ops)3316 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
3317 struct drm_xe_vm_bind *args,
3318 struct drm_xe_vm_bind_op **bind_ops)
3319 {
3320 int err;
3321 int i;
3322
3323 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
3324 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3325 return -EINVAL;
3326
3327 if (XE_IOCTL_DBG(xe, args->extensions))
3328 return -EINVAL;
3329
3330 if (XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS))
3331 return -EINVAL;
3332
3333 if (args->num_binds > 1) {
3334 u64 __user *bind_user =
3335 u64_to_user_ptr(args->vector_of_binds);
3336
3337 *bind_ops = kvmalloc_array(args->num_binds,
3338 sizeof(struct drm_xe_vm_bind_op),
3339 GFP_KERNEL | __GFP_ACCOUNT |
3340 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3341 if (!*bind_ops)
3342 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
3343
3344 err = copy_from_user(*bind_ops, bind_user,
3345 sizeof(struct drm_xe_vm_bind_op) *
3346 args->num_binds);
3347 if (XE_IOCTL_DBG(xe, err)) {
3348 err = -EFAULT;
3349 goto free_bind_ops;
3350 }
3351 } else {
3352 *bind_ops = &args->bind;
3353 }
3354
3355 for (i = 0; i < args->num_binds; ++i) {
3356 u64 range = (*bind_ops)[i].range;
3357 u64 addr = (*bind_ops)[i].addr;
3358 u32 op = (*bind_ops)[i].op;
3359 u32 flags = (*bind_ops)[i].flags;
3360 u32 obj = (*bind_ops)[i].obj;
3361 u64 obj_offset = (*bind_ops)[i].obj_offset;
3362 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
3363 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
3364 bool is_cpu_addr_mirror = flags &
3365 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
3366 u16 pat_index = (*bind_ops)[i].pat_index;
3367 u16 coh_mode;
3368
3369 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror &&
3370 (!xe_vm_in_fault_mode(vm) ||
3371 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) {
3372 err = -EINVAL;
3373 goto free_bind_ops;
3374 }
3375
3376 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
3377 err = -EINVAL;
3378 goto free_bind_ops;
3379 }
3380
3381 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
3382 (*bind_ops)[i].pat_index = pat_index;
3383 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3384 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
3385 err = -EINVAL;
3386 goto free_bind_ops;
3387 }
3388
3389 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
3390 err = -EINVAL;
3391 goto free_bind_ops;
3392 }
3393
3394 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
3395 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
3396 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) ||
3397 XE_IOCTL_DBG(xe, obj_offset && (is_null ||
3398 is_cpu_addr_mirror)) ||
3399 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
3400 (is_null || is_cpu_addr_mirror)) ||
3401 XE_IOCTL_DBG(xe, !obj &&
3402 op == DRM_XE_VM_BIND_OP_MAP &&
3403 !is_null && !is_cpu_addr_mirror) ||
3404 XE_IOCTL_DBG(xe, !obj &&
3405 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3406 XE_IOCTL_DBG(xe, addr &&
3407 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3408 XE_IOCTL_DBG(xe, range &&
3409 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3410 XE_IOCTL_DBG(xe, obj &&
3411 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3412 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3413 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3414 XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR &&
3415 !IS_ENABLED(CONFIG_DRM_GPUSVM)) ||
3416 XE_IOCTL_DBG(xe, obj &&
3417 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
3418 XE_IOCTL_DBG(xe, prefetch_region &&
3419 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
3420 XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC &&
3421 /* Guard against undefined shift in BIT(prefetch_region) */
3422 (prefetch_region >= (sizeof(xe->info.mem_region_mask) * 8) ||
3423 !(BIT(prefetch_region) & xe->info.mem_region_mask)))) ||
3424 XE_IOCTL_DBG(xe, obj &&
3425 op == DRM_XE_VM_BIND_OP_UNMAP) ||
3426 XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) &&
3427 (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) {
3428 err = -EINVAL;
3429 goto free_bind_ops;
3430 }
3431
3432 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
3433 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
3434 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
3435 XE_IOCTL_DBG(xe, !range &&
3436 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
3437 err = -EINVAL;
3438 goto free_bind_ops;
3439 }
3440 }
3441
3442 return 0;
3443
3444 free_bind_ops:
3445 if (args->num_binds > 1)
3446 kvfree(*bind_ops);
3447 *bind_ops = NULL;
3448 return err;
3449 }
3450
vm_bind_ioctl_signal_fences(struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,int num_syncs)3451 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
3452 struct xe_exec_queue *q,
3453 struct xe_sync_entry *syncs,
3454 int num_syncs)
3455 {
3456 struct dma_fence *fence = NULL;
3457 int i, err = 0;
3458
3459 if (num_syncs) {
3460 fence = xe_sync_in_fence_get(syncs, num_syncs,
3461 to_wait_exec_queue(vm, q), vm);
3462 if (IS_ERR(fence))
3463 return PTR_ERR(fence);
3464
3465 for (i = 0; i < num_syncs; i++)
3466 xe_sync_entry_signal(&syncs[i], fence);
3467 }
3468
3469 dma_fence_put(fence);
3470
3471 return err;
3472 }
3473
xe_vma_ops_init(struct xe_vma_ops * vops,struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,u32 num_syncs)3474 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
3475 struct xe_exec_queue *q,
3476 struct xe_sync_entry *syncs, u32 num_syncs)
3477 {
3478 memset(vops, 0, sizeof(*vops));
3479 INIT_LIST_HEAD(&vops->list);
3480 vops->vm = vm;
3481 vops->q = q;
3482 vops->syncs = syncs;
3483 vops->num_syncs = num_syncs;
3484 vops->flags = 0;
3485 }
3486
xe_vm_bind_ioctl_validate_bo(struct xe_device * xe,struct xe_bo * bo,u64 addr,u64 range,u64 obj_offset,u16 pat_index,u32 op,u32 bind_flags)3487 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
3488 u64 addr, u64 range, u64 obj_offset,
3489 u16 pat_index, u32 op, u32 bind_flags)
3490 {
3491 u16 coh_mode;
3492
3493 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) ||
3494 XE_IOCTL_DBG(xe, obj_offset >
3495 xe_bo_size(bo) - range)) {
3496 return -EINVAL;
3497 }
3498
3499 /*
3500 * Some platforms require 64k VM_BIND alignment,
3501 * specifically those with XE_VRAM_FLAGS_NEED64K.
3502 *
3503 * Other platforms may have BO's set to 64k physical placement,
3504 * but can be mapped at 4k offsets anyway. This check is only
3505 * there for the former case.
3506 */
3507 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
3508 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
3509 if (XE_IOCTL_DBG(xe, obj_offset &
3510 XE_64K_PAGE_MASK) ||
3511 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
3512 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3513 return -EINVAL;
3514 }
3515 }
3516
3517 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3518 if (bo->cpu_caching) {
3519 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3520 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
3521 return -EINVAL;
3522 }
3523 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
3524 /*
3525 * Imported dma-buf from a different device should
3526 * require 1way or 2way coherency since we don't know
3527 * how it was mapped on the CPU. Just assume is it
3528 * potentially cached on CPU side.
3529 */
3530 return -EINVAL;
3531 }
3532
3533 /* If a BO is protected it can only be mapped if the key is still valid */
3534 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) &&
3535 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL)
3536 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0))
3537 return -ENOEXEC;
3538
3539 return 0;
3540 }
3541
xe_vm_bind_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3542 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3543 {
3544 struct xe_device *xe = to_xe_device(dev);
3545 struct xe_file *xef = to_xe_file(file);
3546 struct drm_xe_vm_bind *args = data;
3547 struct drm_xe_sync __user *syncs_user;
3548 struct xe_bo **bos = NULL;
3549 struct drm_gpuva_ops **ops = NULL;
3550 struct xe_vm *vm;
3551 struct xe_exec_queue *q = NULL;
3552 u32 num_syncs, num_ufence = 0;
3553 struct xe_sync_entry *syncs = NULL;
3554 struct drm_xe_vm_bind_op *bind_ops = NULL;
3555 struct xe_vma_ops vops;
3556 struct dma_fence *fence;
3557 int err;
3558 int i;
3559
3560 vm = xe_vm_lookup(xef, args->vm_id);
3561 if (XE_IOCTL_DBG(xe, !vm))
3562 return -EINVAL;
3563
3564 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops);
3565 if (err)
3566 goto put_vm;
3567
3568 if (args->exec_queue_id) {
3569 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
3570 if (XE_IOCTL_DBG(xe, !q)) {
3571 err = -ENOENT;
3572 goto free_bind_ops;
3573 }
3574
3575 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3576 err = -EINVAL;
3577 goto put_exec_queue;
3578 }
3579 }
3580
3581 /* Ensure all UNMAPs visible */
3582 xe_svm_flush(vm);
3583
3584 err = down_write_killable(&vm->lock);
3585 if (err)
3586 goto put_exec_queue;
3587
3588 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3589 err = -ENOENT;
3590 goto release_vm_lock;
3591 }
3592
3593 for (i = 0; i < args->num_binds; ++i) {
3594 u64 range = bind_ops[i].range;
3595 u64 addr = bind_ops[i].addr;
3596
3597 if (XE_IOCTL_DBG(xe, range > vm->size) ||
3598 XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3599 err = -EINVAL;
3600 goto release_vm_lock;
3601 }
3602 }
3603
3604 if (args->num_binds) {
3605 bos = kvcalloc(args->num_binds, sizeof(*bos),
3606 GFP_KERNEL | __GFP_ACCOUNT |
3607 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3608 if (!bos) {
3609 err = -ENOMEM;
3610 goto release_vm_lock;
3611 }
3612
3613 ops = kvcalloc(args->num_binds, sizeof(*ops),
3614 GFP_KERNEL | __GFP_ACCOUNT |
3615 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3616 if (!ops) {
3617 err = -ENOMEM;
3618 goto free_bos;
3619 }
3620 }
3621
3622 for (i = 0; i < args->num_binds; ++i) {
3623 struct drm_gem_object *gem_obj;
3624 u64 range = bind_ops[i].range;
3625 u64 addr = bind_ops[i].addr;
3626 u32 obj = bind_ops[i].obj;
3627 u64 obj_offset = bind_ops[i].obj_offset;
3628 u16 pat_index = bind_ops[i].pat_index;
3629 u32 op = bind_ops[i].op;
3630 u32 bind_flags = bind_ops[i].flags;
3631
3632 if (!obj)
3633 continue;
3634
3635 gem_obj = drm_gem_object_lookup(file, obj);
3636 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3637 err = -ENOENT;
3638 goto put_obj;
3639 }
3640 bos[i] = gem_to_xe_bo(gem_obj);
3641
3642 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
3643 obj_offset, pat_index, op,
3644 bind_flags);
3645 if (err)
3646 goto put_obj;
3647 }
3648
3649 if (args->num_syncs) {
3650 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3651 if (!syncs) {
3652 err = -ENOMEM;
3653 goto put_obj;
3654 }
3655 }
3656
3657 syncs_user = u64_to_user_ptr(args->syncs);
3658 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3659 struct xe_exec_queue *__q = q ?: vm->q[0];
3660
3661 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3662 &syncs_user[num_syncs],
3663 __q->ufence_syncobj,
3664 ++__q->ufence_timeline_value,
3665 (xe_vm_in_lr_mode(vm) ?
3666 SYNC_PARSE_FLAG_LR_MODE : 0) |
3667 (!args->num_binds ?
3668 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3669 if (err)
3670 goto free_syncs;
3671
3672 if (xe_sync_is_ufence(&syncs[num_syncs]))
3673 num_ufence++;
3674 }
3675
3676 if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3677 err = -EINVAL;
3678 goto free_syncs;
3679 }
3680
3681 if (!args->num_binds) {
3682 err = -ENODATA;
3683 goto free_syncs;
3684 }
3685
3686 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
3687 if (args->num_binds > 1)
3688 vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS;
3689 for (i = 0; i < args->num_binds; ++i) {
3690 u64 range = bind_ops[i].range;
3691 u64 addr = bind_ops[i].addr;
3692 u32 op = bind_ops[i].op;
3693 u32 flags = bind_ops[i].flags;
3694 u64 obj_offset = bind_ops[i].obj_offset;
3695 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3696 u16 pat_index = bind_ops[i].pat_index;
3697
3698 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset,
3699 addr, range, op, flags,
3700 prefetch_region, pat_index);
3701 if (IS_ERR(ops[i])) {
3702 err = PTR_ERR(ops[i]);
3703 ops[i] = NULL;
3704 goto unwind_ops;
3705 }
3706
3707 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
3708 if (err)
3709 goto unwind_ops;
3710
3711 #ifdef TEST_VM_OPS_ERROR
3712 if (flags & FORCE_OP_ERROR) {
3713 vops.inject_error = true;
3714 vm->xe->vm_inject_error_position =
3715 (vm->xe->vm_inject_error_position + 1) %
3716 FORCE_OP_ERROR_COUNT;
3717 }
3718 #endif
3719 }
3720
3721 /* Nothing to do */
3722 if (list_empty(&vops.list)) {
3723 err = -ENODATA;
3724 goto unwind_ops;
3725 }
3726
3727 err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
3728 if (err)
3729 goto unwind_ops;
3730
3731 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops);
3732 if (err)
3733 goto unwind_ops;
3734
3735 fence = vm_bind_ioctl_ops_execute(vm, &vops);
3736 if (IS_ERR(fence))
3737 err = PTR_ERR(fence);
3738 else
3739 dma_fence_put(fence);
3740
3741 unwind_ops:
3742 if (err && err != -ENODATA)
3743 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3744 xe_vma_ops_fini(&vops);
3745 for (i = args->num_binds - 1; i >= 0; --i)
3746 if (ops[i])
3747 drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
3748 free_syncs:
3749 if (err == -ENODATA)
3750 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3751 while (num_syncs--)
3752 xe_sync_entry_cleanup(&syncs[num_syncs]);
3753
3754 kfree(syncs);
3755 put_obj:
3756 for (i = 0; i < args->num_binds; ++i)
3757 xe_bo_put(bos[i]);
3758
3759 kvfree(ops);
3760 free_bos:
3761 kvfree(bos);
3762 release_vm_lock:
3763 up_write(&vm->lock);
3764 put_exec_queue:
3765 if (q)
3766 xe_exec_queue_put(q);
3767 free_bind_ops:
3768 if (args->num_binds > 1)
3769 kvfree(bind_ops);
3770 put_vm:
3771 xe_vm_put(vm);
3772 return err;
3773 }
3774
3775 /**
3776 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM
3777 * @vm: VM to bind the BO to
3778 * @bo: BO to bind
3779 * @q: exec queue to use for the bind (optional)
3780 * @addr: address at which to bind the BO
3781 * @cache_lvl: PAT cache level to use
3782 *
3783 * Execute a VM bind map operation on a kernel-owned BO to bind it into a
3784 * kernel-owned VM.
3785 *
3786 * Returns a dma_fence to track the binding completion if the job to do so was
3787 * successfully submitted, an error pointer otherwise.
3788 */
xe_vm_bind_kernel_bo(struct xe_vm * vm,struct xe_bo * bo,struct xe_exec_queue * q,u64 addr,enum xe_cache_level cache_lvl)3789 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
3790 struct xe_exec_queue *q, u64 addr,
3791 enum xe_cache_level cache_lvl)
3792 {
3793 struct xe_vma_ops vops;
3794 struct drm_gpuva_ops *ops = NULL;
3795 struct dma_fence *fence;
3796 int err;
3797
3798 xe_bo_get(bo);
3799 xe_vm_get(vm);
3800 if (q)
3801 xe_exec_queue_get(q);
3802
3803 down_write(&vm->lock);
3804
3805 xe_vma_ops_init(&vops, vm, q, NULL, 0);
3806
3807 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo),
3808 DRM_XE_VM_BIND_OP_MAP, 0, 0,
3809 vm->xe->pat.idx[cache_lvl]);
3810 if (IS_ERR(ops)) {
3811 err = PTR_ERR(ops);
3812 goto release_vm_lock;
3813 }
3814
3815 err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
3816 if (err)
3817 goto release_vm_lock;
3818
3819 xe_assert(vm->xe, !list_empty(&vops.list));
3820
3821 err = xe_vma_ops_alloc(&vops, false);
3822 if (err)
3823 goto unwind_ops;
3824
3825 fence = vm_bind_ioctl_ops_execute(vm, &vops);
3826 if (IS_ERR(fence))
3827 err = PTR_ERR(fence);
3828
3829 unwind_ops:
3830 if (err && err != -ENODATA)
3831 vm_bind_ioctl_ops_unwind(vm, &ops, 1);
3832
3833 xe_vma_ops_fini(&vops);
3834 drm_gpuva_ops_free(&vm->gpuvm, ops);
3835
3836 release_vm_lock:
3837 up_write(&vm->lock);
3838
3839 if (q)
3840 xe_exec_queue_put(q);
3841 xe_vm_put(vm);
3842 xe_bo_put(bo);
3843
3844 if (err)
3845 fence = ERR_PTR(err);
3846
3847 return fence;
3848 }
3849
3850 /**
3851 * xe_vm_lock() - Lock the vm's dma_resv object
3852 * @vm: The struct xe_vm whose lock is to be locked
3853 * @intr: Whether to perform any wait interruptible
3854 *
3855 * Return: 0 on success, -EINTR if @intr is true and the wait for a
3856 * contended lock was interrupted. If @intr is false, the function
3857 * always returns 0.
3858 */
xe_vm_lock(struct xe_vm * vm,bool intr)3859 int xe_vm_lock(struct xe_vm *vm, bool intr)
3860 {
3861 int ret;
3862
3863 if (intr)
3864 ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3865 else
3866 ret = dma_resv_lock(xe_vm_resv(vm), NULL);
3867
3868 return ret;
3869 }
3870
3871 /**
3872 * xe_vm_unlock() - Unlock the vm's dma_resv object
3873 * @vm: The struct xe_vm whose lock is to be released.
3874 *
3875 * Unlock a buffer object lock that was locked by xe_vm_lock().
3876 */
xe_vm_unlock(struct xe_vm * vm)3877 void xe_vm_unlock(struct xe_vm *vm)
3878 {
3879 dma_resv_unlock(xe_vm_resv(vm));
3880 }
3881
3882 /**
3883 * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an
3884 * address range
3885 * @vm: The VM
3886 * @start: start address
3887 * @end: end address
3888 * @tile_mask: mask for which gt's issue tlb invalidation
3889 *
3890 * Issue a range based TLB invalidation for gt's in tilemask
3891 *
3892 * Returns 0 for success, negative error code otherwise.
3893 */
xe_vm_range_tilemask_tlb_inval(struct xe_vm * vm,u64 start,u64 end,u8 tile_mask)3894 int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
3895 u64 end, u8 tile_mask)
3896 {
3897 struct xe_tlb_inval_fence
3898 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
3899 struct xe_tile *tile;
3900 u32 fence_id = 0;
3901 u8 id;
3902 int err;
3903
3904 if (!tile_mask)
3905 return 0;
3906
3907 for_each_tile(tile, vm->xe, id) {
3908 if (!(tile_mask & BIT(id)))
3909 continue;
3910
3911 xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval,
3912 &fence[fence_id], true);
3913
3914 err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval,
3915 &fence[fence_id], start, end,
3916 vm->usm.asid);
3917 if (err)
3918 goto wait;
3919 ++fence_id;
3920
3921 if (!tile->media_gt)
3922 continue;
3923
3924 xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval,
3925 &fence[fence_id], true);
3926
3927 err = xe_tlb_inval_range(&tile->media_gt->tlb_inval,
3928 &fence[fence_id], start, end,
3929 vm->usm.asid);
3930 if (err)
3931 goto wait;
3932 ++fence_id;
3933 }
3934
3935 wait:
3936 for (id = 0; id < fence_id; ++id)
3937 xe_tlb_inval_fence_wait(&fence[id]);
3938
3939 return err;
3940 }
3941
3942 /**
3943 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3944 * @vma: VMA to invalidate
3945 *
3946 * Walks a list of page tables leaves which it memset the entries owned by this
3947 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3948 * complete.
3949 *
3950 * Returns 0 for success, negative error code otherwise.
3951 */
xe_vm_invalidate_vma(struct xe_vma * vma)3952 int xe_vm_invalidate_vma(struct xe_vma *vma)
3953 {
3954 struct xe_device *xe = xe_vma_vm(vma)->xe;
3955 struct xe_vm *vm = xe_vma_vm(vma);
3956 struct xe_tile *tile;
3957 u8 tile_mask = 0;
3958 int ret = 0;
3959 u8 id;
3960
3961 xe_assert(xe, !xe_vma_is_null(vma));
3962 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma));
3963 trace_xe_vma_invalidate(vma);
3964
3965 vm_dbg(&vm->xe->drm,
3966 "INVALIDATE: addr=0x%016llx, range=0x%016llx",
3967 xe_vma_start(vma), xe_vma_size(vma));
3968
3969 /*
3970 * Check that we don't race with page-table updates, tile_invalidated
3971 * update is safe
3972 */
3973 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3974 if (xe_vma_is_userptr(vma)) {
3975 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) ||
3976 (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) &&
3977 lockdep_is_held(&xe_vm_resv(vm)->lock.base)));
3978
3979 WARN_ON_ONCE(!mmu_interval_check_retry
3980 (&to_userptr_vma(vma)->userptr.notifier,
3981 to_userptr_vma(vma)->userptr.pages.notifier_seq));
3982 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm),
3983 DMA_RESV_USAGE_BOOKKEEP));
3984
3985 } else {
3986 xe_bo_assert_held(xe_vma_bo(vma));
3987 }
3988 }
3989
3990 for_each_tile(tile, xe, id)
3991 if (xe_pt_zap_ptes(tile, vma))
3992 tile_mask |= BIT(id);
3993
3994 xe_device_wmb(xe);
3995
3996 ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma),
3997 xe_vma_end(vma), tile_mask);
3998
3999 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
4000 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask);
4001
4002 return ret;
4003 }
4004
xe_vm_validate_protected(struct xe_vm * vm)4005 int xe_vm_validate_protected(struct xe_vm *vm)
4006 {
4007 struct drm_gpuva *gpuva;
4008 int err = 0;
4009
4010 if (!vm)
4011 return -ENODEV;
4012
4013 mutex_lock(&vm->snap_mutex);
4014
4015 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4016 struct xe_vma *vma = gpuva_to_vma(gpuva);
4017 struct xe_bo *bo = vma->gpuva.gem.obj ?
4018 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4019
4020 if (!bo)
4021 continue;
4022
4023 if (xe_bo_is_protected(bo)) {
4024 err = xe_pxp_bo_key_check(vm->xe->pxp, bo);
4025 if (err)
4026 break;
4027 }
4028 }
4029
4030 mutex_unlock(&vm->snap_mutex);
4031 return err;
4032 }
4033
4034 struct xe_vm_snapshot {
4035 unsigned long num_snaps;
4036 struct {
4037 u64 ofs, bo_ofs;
4038 unsigned long len;
4039 struct xe_bo *bo;
4040 void *data;
4041 struct mm_struct *mm;
4042 } snap[];
4043 };
4044
xe_vm_snapshot_capture(struct xe_vm * vm)4045 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
4046 {
4047 unsigned long num_snaps = 0, i;
4048 struct xe_vm_snapshot *snap = NULL;
4049 struct drm_gpuva *gpuva;
4050
4051 if (!vm)
4052 return NULL;
4053
4054 mutex_lock(&vm->snap_mutex);
4055 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4056 if (gpuva->flags & XE_VMA_DUMPABLE)
4057 num_snaps++;
4058 }
4059
4060 if (num_snaps)
4061 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
4062 if (!snap) {
4063 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
4064 goto out_unlock;
4065 }
4066
4067 snap->num_snaps = num_snaps;
4068 i = 0;
4069 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4070 struct xe_vma *vma = gpuva_to_vma(gpuva);
4071 struct xe_bo *bo = vma->gpuva.gem.obj ?
4072 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4073
4074 if (!(gpuva->flags & XE_VMA_DUMPABLE))
4075 continue;
4076
4077 snap->snap[i].ofs = xe_vma_start(vma);
4078 snap->snap[i].len = xe_vma_size(vma);
4079 if (bo) {
4080 snap->snap[i].bo = xe_bo_get(bo);
4081 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
4082 } else if (xe_vma_is_userptr(vma)) {
4083 struct mm_struct *mm =
4084 to_userptr_vma(vma)->userptr.notifier.mm;
4085
4086 if (mmget_not_zero(mm))
4087 snap->snap[i].mm = mm;
4088 else
4089 snap->snap[i].data = ERR_PTR(-EFAULT);
4090
4091 snap->snap[i].bo_ofs = xe_vma_userptr(vma);
4092 } else {
4093 snap->snap[i].data = ERR_PTR(-ENOENT);
4094 }
4095 i++;
4096 }
4097
4098 out_unlock:
4099 mutex_unlock(&vm->snap_mutex);
4100 return snap;
4101 }
4102
xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot * snap)4103 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
4104 {
4105 if (IS_ERR_OR_NULL(snap))
4106 return;
4107
4108 for (int i = 0; i < snap->num_snaps; i++) {
4109 struct xe_bo *bo = snap->snap[i].bo;
4110 int err;
4111
4112 if (IS_ERR(snap->snap[i].data))
4113 continue;
4114
4115 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
4116 if (!snap->snap[i].data) {
4117 snap->snap[i].data = ERR_PTR(-ENOMEM);
4118 goto cleanup_bo;
4119 }
4120
4121 if (bo) {
4122 err = xe_bo_read(bo, snap->snap[i].bo_ofs,
4123 snap->snap[i].data, snap->snap[i].len);
4124 } else {
4125 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
4126
4127 kthread_use_mm(snap->snap[i].mm);
4128 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
4129 err = 0;
4130 else
4131 err = -EFAULT;
4132 kthread_unuse_mm(snap->snap[i].mm);
4133
4134 mmput(snap->snap[i].mm);
4135 snap->snap[i].mm = NULL;
4136 }
4137
4138 if (err) {
4139 kvfree(snap->snap[i].data);
4140 snap->snap[i].data = ERR_PTR(err);
4141 }
4142
4143 cleanup_bo:
4144 xe_bo_put(bo);
4145 snap->snap[i].bo = NULL;
4146 }
4147 }
4148
xe_vm_snapshot_print(struct xe_vm_snapshot * snap,struct drm_printer * p)4149 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
4150 {
4151 unsigned long i, j;
4152
4153 if (IS_ERR_OR_NULL(snap)) {
4154 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
4155 return;
4156 }
4157
4158 for (i = 0; i < snap->num_snaps; i++) {
4159 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
4160
4161 if (IS_ERR(snap->snap[i].data)) {
4162 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
4163 PTR_ERR(snap->snap[i].data));
4164 continue;
4165 }
4166
4167 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
4168
4169 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
4170 u32 *val = snap->snap[i].data + j;
4171 char dumped[ASCII85_BUFSZ];
4172
4173 drm_puts(p, ascii85_encode(*val, dumped));
4174 }
4175
4176 drm_puts(p, "\n");
4177
4178 if (drm_coredump_printer_is_full(p))
4179 return;
4180 }
4181 }
4182
xe_vm_snapshot_free(struct xe_vm_snapshot * snap)4183 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
4184 {
4185 unsigned long i;
4186
4187 if (IS_ERR_OR_NULL(snap))
4188 return;
4189
4190 for (i = 0; i < snap->num_snaps; i++) {
4191 if (!IS_ERR(snap->snap[i].data))
4192 kvfree(snap->snap[i].data);
4193 xe_bo_put(snap->snap[i].bo);
4194 if (snap->snap[i].mm)
4195 mmput(snap->snap[i].mm);
4196 }
4197 kvfree(snap);
4198 }
4199
4200 /**
4201 * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations
4202 * @xe: Pointer to the Xe device structure
4203 * @vma: Pointer to the virtual memory area (VMA) structure
4204 * @is_atomic: In pagefault path and atomic operation
4205 *
4206 * This function determines whether the given VMA needs to be migrated to
4207 * VRAM in order to do atomic GPU operation.
4208 *
4209 * Return:
4210 * 1 - Migration to VRAM is required
4211 * 0 - Migration is not required
4212 * -EACCES - Invalid access for atomic memory attr
4213 *
4214 */
xe_vma_need_vram_for_atomic(struct xe_device * xe,struct xe_vma * vma,bool is_atomic)4215 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic)
4216 {
4217 u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access :
4218 vma->attr.atomic_access;
4219
4220 if (!IS_DGFX(xe) || !is_atomic)
4221 return false;
4222
4223 /*
4224 * NOTE: The checks implemented here are platform-specific. For
4225 * instance, on a device supporting CXL atomics, these would ideally
4226 * work universally without additional handling.
4227 */
4228 switch (atomic_access) {
4229 case DRM_XE_ATOMIC_DEVICE:
4230 return !xe->info.has_device_atomics_on_smem;
4231
4232 case DRM_XE_ATOMIC_CPU:
4233 return -EACCES;
4234
4235 case DRM_XE_ATOMIC_UNDEFINED:
4236 case DRM_XE_ATOMIC_GLOBAL:
4237 default:
4238 return 1;
4239 }
4240 }
4241
xe_vm_alloc_vma(struct xe_vm * vm,struct drm_gpuvm_map_req * map_req,bool is_madvise)4242 static int xe_vm_alloc_vma(struct xe_vm *vm,
4243 struct drm_gpuvm_map_req *map_req,
4244 bool is_madvise)
4245 {
4246 struct xe_vma_ops vops;
4247 struct drm_gpuva_ops *ops = NULL;
4248 struct drm_gpuva_op *__op;
4249 unsigned int vma_flags = 0;
4250 bool remap_op = false;
4251 struct xe_vma_mem_attr tmp_attr;
4252 u16 default_pat;
4253 int err;
4254
4255 lockdep_assert_held_write(&vm->lock);
4256
4257 if (is_madvise)
4258 ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req);
4259 else
4260 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req);
4261
4262 if (IS_ERR(ops))
4263 return PTR_ERR(ops);
4264
4265 if (list_empty(&ops->list)) {
4266 err = 0;
4267 goto free_ops;
4268 }
4269
4270 drm_gpuva_for_each_op(__op, ops) {
4271 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
4272 struct xe_vma *vma = NULL;
4273
4274 if (!is_madvise) {
4275 if (__op->op == DRM_GPUVA_OP_UNMAP) {
4276 vma = gpuva_to_vma(op->base.unmap.va);
4277 XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma));
4278 default_pat = vma->attr.default_pat_index;
4279 vma_flags = vma->gpuva.flags;
4280 }
4281
4282 if (__op->op == DRM_GPUVA_OP_REMAP) {
4283 vma = gpuva_to_vma(op->base.remap.unmap->va);
4284 default_pat = vma->attr.default_pat_index;
4285 vma_flags = vma->gpuva.flags;
4286 }
4287
4288 if (__op->op == DRM_GPUVA_OP_MAP) {
4289 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK;
4290 op->map.pat_index = default_pat;
4291 }
4292 } else {
4293 if (__op->op == DRM_GPUVA_OP_REMAP) {
4294 vma = gpuva_to_vma(op->base.remap.unmap->va);
4295 xe_assert(vm->xe, !remap_op);
4296 xe_assert(vm->xe, xe_vma_has_no_bo(vma));
4297 remap_op = true;
4298 vma_flags = vma->gpuva.flags;
4299 }
4300
4301 if (__op->op == DRM_GPUVA_OP_MAP) {
4302 xe_assert(vm->xe, remap_op);
4303 remap_op = false;
4304 /*
4305 * In case of madvise ops DRM_GPUVA_OP_MAP is
4306 * always after DRM_GPUVA_OP_REMAP, so ensure
4307 * to propagate the flags from the vma we're
4308 * unmapping.
4309 */
4310 op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK;
4311 }
4312 }
4313 print_op(vm->xe, __op);
4314 }
4315
4316 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
4317
4318 if (is_madvise)
4319 vops.flags |= XE_VMA_OPS_FLAG_MADVISE;
4320
4321 err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
4322 if (err)
4323 goto unwind_ops;
4324
4325 xe_vm_lock(vm, false);
4326
4327 drm_gpuva_for_each_op(__op, ops) {
4328 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
4329 struct xe_vma *vma;
4330
4331 if (__op->op == DRM_GPUVA_OP_UNMAP) {
4332 vma = gpuva_to_vma(op->base.unmap.va);
4333 /* There should be no unmap for madvise */
4334 if (is_madvise)
4335 XE_WARN_ON("UNEXPECTED UNMAP");
4336
4337 xe_vma_destroy(vma, NULL);
4338 } else if (__op->op == DRM_GPUVA_OP_REMAP) {
4339 vma = gpuva_to_vma(op->base.remap.unmap->va);
4340 /* In case of madvise ops Store attributes for REMAP UNMAPPED
4341 * VMA, so they can be assigned to newly MAP created vma.
4342 */
4343 if (is_madvise)
4344 tmp_attr = vma->attr;
4345
4346 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL);
4347 } else if (__op->op == DRM_GPUVA_OP_MAP) {
4348 vma = op->map.vma;
4349 /* In case of madvise call, MAP will always be followed by REMAP.
4350 * Therefore temp_attr will always have sane values, making it safe to
4351 * copy them to new vma.
4352 */
4353 if (is_madvise)
4354 vma->attr = tmp_attr;
4355 }
4356 }
4357
4358 xe_vm_unlock(vm);
4359 drm_gpuva_ops_free(&vm->gpuvm, ops);
4360 return 0;
4361
4362 unwind_ops:
4363 vm_bind_ioctl_ops_unwind(vm, &ops, 1);
4364 free_ops:
4365 drm_gpuva_ops_free(&vm->gpuvm, ops);
4366 return err;
4367 }
4368
4369 /**
4370 * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops
4371 * @vm: Pointer to the xe_vm structure
4372 * @start: Starting input address
4373 * @range: Size of the input range
4374 *
4375 * This function splits existing vma to create new vma for user provided input range
4376 *
4377 * Return: 0 if success
4378 */
xe_vm_alloc_madvise_vma(struct xe_vm * vm,uint64_t start,uint64_t range)4379 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4380 {
4381 struct drm_gpuvm_map_req map_req = {
4382 .map.va.addr = start,
4383 .map.va.range = range,
4384 };
4385
4386 lockdep_assert_held_write(&vm->lock);
4387
4388 vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range);
4389
4390 return xe_vm_alloc_vma(vm, &map_req, true);
4391 }
4392
4393 /**
4394 * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma
4395 * @vm: Pointer to the xe_vm structure
4396 * @start: Starting input address
4397 * @range: Size of the input range
4398 *
4399 * This function splits/merges existing vma to create new vma for user provided input range
4400 *
4401 * Return: 0 if success
4402 */
xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm * vm,uint64_t start,uint64_t range)4403 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4404 {
4405 struct drm_gpuvm_map_req map_req = {
4406 .map.va.addr = start,
4407 .map.va.range = range,
4408 };
4409
4410 lockdep_assert_held_write(&vm->lock);
4411
4412 vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx",
4413 start, range);
4414
4415 return xe_vm_alloc_vma(vm, &map_req, false);
4416 }
4417