1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10
11 #include <drm/drm_drv.h>
12 #include <drm/drm_exec.h>
13 #include <drm/drm_print.h>
14 #include <drm/ttm/ttm_tt.h>
15 #include <uapi/drm/xe_drm.h>
16 #include <linux/ascii85.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/mm.h>
20 #include <linux/swap.h>
21
22 #include <generated/xe_wa_oob.h>
23
24 #include "regs/xe_gtt_defs.h"
25 #include "xe_assert.h"
26 #include "xe_bo.h"
27 #include "xe_device.h"
28 #include "xe_drm_client.h"
29 #include "xe_exec_queue.h"
30 #include "xe_gt_pagefault.h"
31 #include "xe_gt_tlb_invalidation.h"
32 #include "xe_migrate.h"
33 #include "xe_pat.h"
34 #include "xe_pm.h"
35 #include "xe_preempt_fence.h"
36 #include "xe_pt.h"
37 #include "xe_pxp.h"
38 #include "xe_res_cursor.h"
39 #include "xe_svm.h"
40 #include "xe_sync.h"
41 #include "xe_trace_bo.h"
42 #include "xe_wa.h"
43 #include "xe_hmm.h"
44
xe_vm_obj(struct xe_vm * vm)45 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
46 {
47 return vm->gpuvm.r_obj;
48 }
49
50 /**
51 * xe_vma_userptr_check_repin() - Advisory check for repin needed
52 * @uvma: The userptr vma
53 *
54 * Check if the userptr vma has been invalidated since last successful
55 * repin. The check is advisory only and can the function can be called
56 * without the vm->userptr.notifier_lock held. There is no guarantee that the
57 * vma userptr will remain valid after a lockless check, so typically
58 * the call needs to be followed by a proper check under the notifier_lock.
59 *
60 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
61 */
xe_vma_userptr_check_repin(struct xe_userptr_vma * uvma)62 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
63 {
64 return mmu_interval_check_retry(&uvma->userptr.notifier,
65 uvma->userptr.notifier_seq) ?
66 -EAGAIN : 0;
67 }
68
xe_vma_userptr_pin_pages(struct xe_userptr_vma * uvma)69 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
70 {
71 struct xe_vma *vma = &uvma->vma;
72 struct xe_vm *vm = xe_vma_vm(vma);
73 struct xe_device *xe = vm->xe;
74
75 lockdep_assert_held(&vm->lock);
76 xe_assert(xe, xe_vma_is_userptr(vma));
77
78 return xe_hmm_userptr_populate_range(uvma, false);
79 }
80
preempt_fences_waiting(struct xe_vm * vm)81 static bool preempt_fences_waiting(struct xe_vm *vm)
82 {
83 struct xe_exec_queue *q;
84
85 lockdep_assert_held(&vm->lock);
86 xe_vm_assert_held(vm);
87
88 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
89 if (!q->lr.pfence ||
90 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
91 &q->lr.pfence->flags)) {
92 return true;
93 }
94 }
95
96 return false;
97 }
98
free_preempt_fences(struct list_head * list)99 static void free_preempt_fences(struct list_head *list)
100 {
101 struct list_head *link, *next;
102
103 list_for_each_safe(link, next, list)
104 xe_preempt_fence_free(to_preempt_fence_from_link(link));
105 }
106
alloc_preempt_fences(struct xe_vm * vm,struct list_head * list,unsigned int * count)107 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
108 unsigned int *count)
109 {
110 lockdep_assert_held(&vm->lock);
111 xe_vm_assert_held(vm);
112
113 if (*count >= vm->preempt.num_exec_queues)
114 return 0;
115
116 for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
117 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
118
119 if (IS_ERR(pfence))
120 return PTR_ERR(pfence);
121
122 list_move_tail(xe_preempt_fence_link(pfence), list);
123 }
124
125 return 0;
126 }
127
wait_for_existing_preempt_fences(struct xe_vm * vm)128 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
129 {
130 struct xe_exec_queue *q;
131
132 xe_vm_assert_held(vm);
133
134 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
135 if (q->lr.pfence) {
136 long timeout = dma_fence_wait(q->lr.pfence, false);
137
138 /* Only -ETIME on fence indicates VM needs to be killed */
139 if (timeout < 0 || q->lr.pfence->error == -ETIME)
140 return -ETIME;
141
142 dma_fence_put(q->lr.pfence);
143 q->lr.pfence = NULL;
144 }
145 }
146
147 return 0;
148 }
149
xe_vm_is_idle(struct xe_vm * vm)150 static bool xe_vm_is_idle(struct xe_vm *vm)
151 {
152 struct xe_exec_queue *q;
153
154 xe_vm_assert_held(vm);
155 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
156 if (!xe_exec_queue_is_idle(q))
157 return false;
158 }
159
160 return true;
161 }
162
arm_preempt_fences(struct xe_vm * vm,struct list_head * list)163 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
164 {
165 struct list_head *link;
166 struct xe_exec_queue *q;
167
168 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
169 struct dma_fence *fence;
170
171 link = list->next;
172 xe_assert(vm->xe, link != list);
173
174 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
175 q, q->lr.context,
176 ++q->lr.seqno);
177 dma_fence_put(q->lr.pfence);
178 q->lr.pfence = fence;
179 }
180 }
181
add_preempt_fences(struct xe_vm * vm,struct xe_bo * bo)182 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
183 {
184 struct xe_exec_queue *q;
185 int err;
186
187 xe_bo_assert_held(bo);
188
189 if (!vm->preempt.num_exec_queues)
190 return 0;
191
192 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
193 if (err)
194 return err;
195
196 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
197 if (q->lr.pfence) {
198 dma_resv_add_fence(bo->ttm.base.resv,
199 q->lr.pfence,
200 DMA_RESV_USAGE_BOOKKEEP);
201 }
202
203 return 0;
204 }
205
resume_and_reinstall_preempt_fences(struct xe_vm * vm,struct drm_exec * exec)206 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
207 struct drm_exec *exec)
208 {
209 struct xe_exec_queue *q;
210
211 lockdep_assert_held(&vm->lock);
212 xe_vm_assert_held(vm);
213
214 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
215 q->ops->resume(q);
216
217 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
218 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
219 }
220 }
221
xe_vm_add_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)222 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
223 {
224 struct drm_gpuvm_exec vm_exec = {
225 .vm = &vm->gpuvm,
226 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
227 .num_fences = 1,
228 };
229 struct drm_exec *exec = &vm_exec.exec;
230 struct dma_fence *pfence;
231 int err;
232 bool wait;
233
234 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
235
236 down_write(&vm->lock);
237 err = drm_gpuvm_exec_lock(&vm_exec);
238 if (err)
239 goto out_up_write;
240
241 pfence = xe_preempt_fence_create(q, q->lr.context,
242 ++q->lr.seqno);
243 if (!pfence) {
244 err = -ENOMEM;
245 goto out_fini;
246 }
247
248 list_add(&q->lr.link, &vm->preempt.exec_queues);
249 ++vm->preempt.num_exec_queues;
250 q->lr.pfence = pfence;
251
252 down_read(&vm->userptr.notifier_lock);
253
254 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
255 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
256
257 /*
258 * Check to see if a preemption on VM is in flight or userptr
259 * invalidation, if so trigger this preempt fence to sync state with
260 * other preempt fences on the VM.
261 */
262 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
263 if (wait)
264 dma_fence_enable_sw_signaling(pfence);
265
266 up_read(&vm->userptr.notifier_lock);
267
268 out_fini:
269 drm_exec_fini(exec);
270 out_up_write:
271 up_write(&vm->lock);
272
273 return err;
274 }
275 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO);
276
277 /**
278 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
279 * @vm: The VM.
280 * @q: The exec_queue
281 *
282 * Note that this function might be called multiple times on the same queue.
283 */
xe_vm_remove_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)284 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
285 {
286 if (!xe_vm_in_preempt_fence_mode(vm))
287 return;
288
289 down_write(&vm->lock);
290 if (!list_empty(&q->lr.link)) {
291 list_del_init(&q->lr.link);
292 --vm->preempt.num_exec_queues;
293 }
294 if (q->lr.pfence) {
295 dma_fence_enable_sw_signaling(q->lr.pfence);
296 dma_fence_put(q->lr.pfence);
297 q->lr.pfence = NULL;
298 }
299 up_write(&vm->lock);
300 }
301
302 /**
303 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
304 * that need repinning.
305 * @vm: The VM.
306 *
307 * This function checks for whether the VM has userptrs that need repinning,
308 * and provides a release-type barrier on the userptr.notifier_lock after
309 * checking.
310 *
311 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
312 */
__xe_vm_userptr_needs_repin(struct xe_vm * vm)313 int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
314 {
315 lockdep_assert_held_read(&vm->userptr.notifier_lock);
316
317 return (list_empty(&vm->userptr.repin_list) &&
318 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
319 }
320
321 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
322
323 /**
324 * xe_vm_kill() - VM Kill
325 * @vm: The VM.
326 * @unlocked: Flag indicates the VM's dma-resv is not held
327 *
328 * Kill the VM by setting banned flag indicated VM is no longer available for
329 * use. If in preempt fence mode, also kill all exec queue attached to the VM.
330 */
xe_vm_kill(struct xe_vm * vm,bool unlocked)331 void xe_vm_kill(struct xe_vm *vm, bool unlocked)
332 {
333 struct xe_exec_queue *q;
334
335 lockdep_assert_held(&vm->lock);
336
337 if (unlocked)
338 xe_vm_lock(vm, false);
339
340 vm->flags |= XE_VM_FLAG_BANNED;
341 trace_xe_vm_kill(vm);
342
343 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
344 q->ops->kill(q);
345
346 if (unlocked)
347 xe_vm_unlock(vm);
348
349 /* TODO: Inform user the VM is banned */
350 }
351
352 /**
353 * xe_vm_validate_should_retry() - Whether to retry after a validate error.
354 * @exec: The drm_exec object used for locking before validation.
355 * @err: The error returned from ttm_bo_validate().
356 * @end: A ktime_t cookie that should be set to 0 before first use and
357 * that should be reused on subsequent calls.
358 *
359 * With multiple active VMs, under memory pressure, it is possible that
360 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
361 * Until ttm properly handles locking in such scenarios, best thing the
362 * driver can do is retry with a timeout. Check if that is necessary, and
363 * if so unlock the drm_exec's objects while keeping the ticket to prepare
364 * for a rerun.
365 *
366 * Return: true if a retry after drm_exec_init() is recommended;
367 * false otherwise.
368 */
xe_vm_validate_should_retry(struct drm_exec * exec,int err,ktime_t * end)369 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
370 {
371 ktime_t cur;
372
373 if (err != -ENOMEM)
374 return false;
375
376 cur = ktime_get();
377 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
378 if (!ktime_before(cur, *end))
379 return false;
380
381 msleep(20);
382 return true;
383 }
384
xe_gpuvm_validate(struct drm_gpuvm_bo * vm_bo,struct drm_exec * exec)385 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
386 {
387 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
388 struct drm_gpuva *gpuva;
389 int ret;
390
391 lockdep_assert_held(&vm->lock);
392 drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
393 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
394 &vm->rebind_list);
395
396 if (!try_wait_for_completion(&vm->xe->pm_block))
397 return -EAGAIN;
398
399 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
400 if (ret)
401 return ret;
402
403 vm_bo->evicted = false;
404 return 0;
405 }
406
407 /**
408 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
409 * @vm: The vm for which we are rebinding.
410 * @exec: The struct drm_exec with the locked GEM objects.
411 * @num_fences: The number of fences to reserve for the operation, not
412 * including rebinds and validations.
413 *
414 * Validates all evicted gem objects and rebinds their vmas. Note that
415 * rebindings may cause evictions and hence the validation-rebind
416 * sequence is rerun until there are no more objects to validate.
417 *
418 * Return: 0 on success, negative error code on error. In particular,
419 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
420 * the drm_exec transaction needs to be restarted.
421 */
xe_vm_validate_rebind(struct xe_vm * vm,struct drm_exec * exec,unsigned int num_fences)422 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
423 unsigned int num_fences)
424 {
425 struct drm_gem_object *obj;
426 unsigned long index;
427 int ret;
428
429 do {
430 ret = drm_gpuvm_validate(&vm->gpuvm, exec);
431 if (ret)
432 return ret;
433
434 ret = xe_vm_rebind(vm, false);
435 if (ret)
436 return ret;
437 } while (!list_empty(&vm->gpuvm.evict.list));
438
439 drm_exec_for_each_locked_object(exec, index, obj) {
440 ret = dma_resv_reserve_fences(obj->resv, num_fences);
441 if (ret)
442 return ret;
443 }
444
445 return 0;
446 }
447
xe_preempt_work_begin(struct drm_exec * exec,struct xe_vm * vm,bool * done)448 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
449 bool *done)
450 {
451 int err;
452
453 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
454 if (err)
455 return err;
456
457 if (xe_vm_is_idle(vm)) {
458 vm->preempt.rebind_deactivated = true;
459 *done = true;
460 return 0;
461 }
462
463 if (!preempt_fences_waiting(vm)) {
464 *done = true;
465 return 0;
466 }
467
468 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
469 if (err)
470 return err;
471
472 err = wait_for_existing_preempt_fences(vm);
473 if (err)
474 return err;
475
476 /*
477 * Add validation and rebinding to the locking loop since both can
478 * cause evictions which may require blocing dma_resv locks.
479 * The fence reservation here is intended for the new preempt fences
480 * we attach at the end of the rebind work.
481 */
482 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
483 }
484
vm_suspend_rebind_worker(struct xe_vm * vm)485 static bool vm_suspend_rebind_worker(struct xe_vm *vm)
486 {
487 struct xe_device *xe = vm->xe;
488 bool ret = false;
489
490 mutex_lock(&xe->rebind_resume_lock);
491 if (!try_wait_for_completion(&vm->xe->pm_block)) {
492 ret = true;
493 list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list);
494 }
495 mutex_unlock(&xe->rebind_resume_lock);
496
497 return ret;
498 }
499
500 /**
501 * xe_vm_resume_rebind_worker() - Resume the rebind worker.
502 * @vm: The vm whose preempt worker to resume.
503 *
504 * Resume a preempt worker that was previously suspended by
505 * vm_suspend_rebind_worker().
506 */
xe_vm_resume_rebind_worker(struct xe_vm * vm)507 void xe_vm_resume_rebind_worker(struct xe_vm *vm)
508 {
509 queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
510 }
511
preempt_rebind_work_func(struct work_struct * w)512 static void preempt_rebind_work_func(struct work_struct *w)
513 {
514 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
515 struct drm_exec exec;
516 unsigned int fence_count = 0;
517 LIST_HEAD(preempt_fences);
518 ktime_t end = 0;
519 int err = 0;
520 long wait;
521 int __maybe_unused tries = 0;
522
523 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
524 trace_xe_vm_rebind_worker_enter(vm);
525
526 down_write(&vm->lock);
527
528 if (xe_vm_is_closed_or_banned(vm)) {
529 up_write(&vm->lock);
530 trace_xe_vm_rebind_worker_exit(vm);
531 return;
532 }
533
534 retry:
535 if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) {
536 up_write(&vm->lock);
537 return;
538 }
539
540 if (xe_vm_userptr_check_repin(vm)) {
541 err = xe_vm_userptr_pin(vm);
542 if (err)
543 goto out_unlock_outer;
544 }
545
546 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
547
548 drm_exec_until_all_locked(&exec) {
549 bool done = false;
550
551 err = xe_preempt_work_begin(&exec, vm, &done);
552 drm_exec_retry_on_contention(&exec);
553 if (err || done) {
554 drm_exec_fini(&exec);
555 if (err && xe_vm_validate_should_retry(&exec, err, &end))
556 err = -EAGAIN;
557
558 goto out_unlock_outer;
559 }
560 }
561
562 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
563 if (err)
564 goto out_unlock;
565
566 err = xe_vm_rebind(vm, true);
567 if (err)
568 goto out_unlock;
569
570 /* Wait on rebinds and munmap style VM unbinds */
571 wait = dma_resv_wait_timeout(xe_vm_resv(vm),
572 DMA_RESV_USAGE_KERNEL,
573 false, MAX_SCHEDULE_TIMEOUT);
574 if (wait <= 0) {
575 err = -ETIME;
576 goto out_unlock;
577 }
578
579 #define retry_required(__tries, __vm) \
580 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
581 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
582 __xe_vm_userptr_needs_repin(__vm))
583
584 down_read(&vm->userptr.notifier_lock);
585 if (retry_required(tries, vm)) {
586 up_read(&vm->userptr.notifier_lock);
587 err = -EAGAIN;
588 goto out_unlock;
589 }
590
591 #undef retry_required
592
593 spin_lock(&vm->xe->ttm.lru_lock);
594 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
595 spin_unlock(&vm->xe->ttm.lru_lock);
596
597 /* Point of no return. */
598 arm_preempt_fences(vm, &preempt_fences);
599 resume_and_reinstall_preempt_fences(vm, &exec);
600 up_read(&vm->userptr.notifier_lock);
601
602 out_unlock:
603 drm_exec_fini(&exec);
604 out_unlock_outer:
605 if (err == -EAGAIN) {
606 trace_xe_vm_rebind_worker_retry(vm);
607 goto retry;
608 }
609
610 if (err) {
611 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
612 xe_vm_kill(vm, true);
613 }
614 up_write(&vm->lock);
615
616 free_preempt_fences(&preempt_fences);
617
618 trace_xe_vm_rebind_worker_exit(vm);
619 }
620
__vma_userptr_invalidate(struct xe_vm * vm,struct xe_userptr_vma * uvma)621 static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma)
622 {
623 struct xe_userptr *userptr = &uvma->userptr;
624 struct xe_vma *vma = &uvma->vma;
625 struct dma_resv_iter cursor;
626 struct dma_fence *fence;
627 long err;
628
629 /*
630 * Tell exec and rebind worker they need to repin and rebind this
631 * userptr.
632 */
633 if (!xe_vm_in_fault_mode(vm) &&
634 !(vma->gpuva.flags & XE_VMA_DESTROYED)) {
635 spin_lock(&vm->userptr.invalidated_lock);
636 list_move_tail(&userptr->invalidate_link,
637 &vm->userptr.invalidated);
638 spin_unlock(&vm->userptr.invalidated_lock);
639 }
640
641 /*
642 * Preempt fences turn into schedule disables, pipeline these.
643 * Note that even in fault mode, we need to wait for binds and
644 * unbinds to complete, and those are attached as BOOKMARK fences
645 * to the vm.
646 */
647 dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
648 DMA_RESV_USAGE_BOOKKEEP);
649 dma_resv_for_each_fence_unlocked(&cursor, fence)
650 dma_fence_enable_sw_signaling(fence);
651 dma_resv_iter_end(&cursor);
652
653 err = dma_resv_wait_timeout(xe_vm_resv(vm),
654 DMA_RESV_USAGE_BOOKKEEP,
655 false, MAX_SCHEDULE_TIMEOUT);
656 XE_WARN_ON(err <= 0);
657
658 if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
659 err = xe_vm_invalidate_vma(vma);
660 XE_WARN_ON(err);
661 }
662
663 xe_hmm_userptr_unmap(uvma);
664 }
665
vma_userptr_invalidate(struct mmu_interval_notifier * mni,const struct mmu_notifier_range * range,unsigned long cur_seq)666 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
667 const struct mmu_notifier_range *range,
668 unsigned long cur_seq)
669 {
670 struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier);
671 struct xe_vma *vma = &uvma->vma;
672 struct xe_vm *vm = xe_vma_vm(vma);
673
674 xe_assert(vm->xe, xe_vma_is_userptr(vma));
675 trace_xe_vma_userptr_invalidate(vma);
676
677 if (!mmu_notifier_range_blockable(range))
678 return false;
679
680 vm_dbg(&xe_vma_vm(vma)->xe->drm,
681 "NOTIFIER: addr=0x%016llx, range=0x%016llx",
682 xe_vma_start(vma), xe_vma_size(vma));
683
684 down_write(&vm->userptr.notifier_lock);
685 mmu_interval_set_seq(mni, cur_seq);
686
687 __vma_userptr_invalidate(vm, uvma);
688 up_write(&vm->userptr.notifier_lock);
689 trace_xe_vma_userptr_invalidate_complete(vma);
690
691 return true;
692 }
693
694 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
695 .invalidate = vma_userptr_invalidate,
696 };
697
698 #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
699 /**
700 * xe_vma_userptr_force_invalidate() - force invalidate a userptr
701 * @uvma: The userptr vma to invalidate
702 *
703 * Perform a forced userptr invalidation for testing purposes.
704 */
xe_vma_userptr_force_invalidate(struct xe_userptr_vma * uvma)705 void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
706 {
707 struct xe_vm *vm = xe_vma_vm(&uvma->vma);
708
709 /* Protect against concurrent userptr pinning */
710 lockdep_assert_held(&vm->lock);
711 /* Protect against concurrent notifiers */
712 lockdep_assert_held(&vm->userptr.notifier_lock);
713 /*
714 * Protect against concurrent instances of this function and
715 * the critical exec sections
716 */
717 xe_vm_assert_held(vm);
718
719 if (!mmu_interval_read_retry(&uvma->userptr.notifier,
720 uvma->userptr.notifier_seq))
721 uvma->userptr.notifier_seq -= 2;
722 __vma_userptr_invalidate(vm, uvma);
723 }
724 #endif
725
xe_vm_userptr_pin(struct xe_vm * vm)726 int xe_vm_userptr_pin(struct xe_vm *vm)
727 {
728 struct xe_userptr_vma *uvma, *next;
729 int err = 0;
730
731 xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
732 lockdep_assert_held_write(&vm->lock);
733
734 /* Collect invalidated userptrs */
735 spin_lock(&vm->userptr.invalidated_lock);
736 xe_assert(vm->xe, list_empty(&vm->userptr.repin_list));
737 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
738 userptr.invalidate_link) {
739 list_del_init(&uvma->userptr.invalidate_link);
740 list_add_tail(&uvma->userptr.repin_link,
741 &vm->userptr.repin_list);
742 }
743 spin_unlock(&vm->userptr.invalidated_lock);
744
745 /* Pin and move to bind list */
746 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
747 userptr.repin_link) {
748 err = xe_vma_userptr_pin_pages(uvma);
749 if (err == -EFAULT) {
750 list_del_init(&uvma->userptr.repin_link);
751 /*
752 * We might have already done the pin once already, but
753 * then had to retry before the re-bind happened, due
754 * some other condition in the caller, but in the
755 * meantime the userptr got dinged by the notifier such
756 * that we need to revalidate here, but this time we hit
757 * the EFAULT. In such a case make sure we remove
758 * ourselves from the rebind list to avoid going down in
759 * flames.
760 */
761 if (!list_empty(&uvma->vma.combined_links.rebind))
762 list_del_init(&uvma->vma.combined_links.rebind);
763
764 /* Wait for pending binds */
765 xe_vm_lock(vm, false);
766 dma_resv_wait_timeout(xe_vm_resv(vm),
767 DMA_RESV_USAGE_BOOKKEEP,
768 false, MAX_SCHEDULE_TIMEOUT);
769
770 down_read(&vm->userptr.notifier_lock);
771 err = xe_vm_invalidate_vma(&uvma->vma);
772 up_read(&vm->userptr.notifier_lock);
773 xe_vm_unlock(vm);
774 if (err)
775 break;
776 } else {
777 if (err)
778 break;
779
780 list_del_init(&uvma->userptr.repin_link);
781 list_move_tail(&uvma->vma.combined_links.rebind,
782 &vm->rebind_list);
783 }
784 }
785
786 if (err) {
787 down_write(&vm->userptr.notifier_lock);
788 spin_lock(&vm->userptr.invalidated_lock);
789 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
790 userptr.repin_link) {
791 list_del_init(&uvma->userptr.repin_link);
792 list_move_tail(&uvma->userptr.invalidate_link,
793 &vm->userptr.invalidated);
794 }
795 spin_unlock(&vm->userptr.invalidated_lock);
796 up_write(&vm->userptr.notifier_lock);
797 }
798 return err;
799 }
800
801 /**
802 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
803 * that need repinning.
804 * @vm: The VM.
805 *
806 * This function does an advisory check for whether the VM has userptrs that
807 * need repinning.
808 *
809 * Return: 0 if there are no indications of userptrs needing repinning,
810 * -EAGAIN if there are.
811 */
xe_vm_userptr_check_repin(struct xe_vm * vm)812 int xe_vm_userptr_check_repin(struct xe_vm *vm)
813 {
814 return (list_empty_careful(&vm->userptr.repin_list) &&
815 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
816 }
817
xe_vma_ops_alloc(struct xe_vma_ops * vops,bool array_of_binds)818 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
819 {
820 int i;
821
822 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
823 if (!vops->pt_update_ops[i].num_ops)
824 continue;
825
826 vops->pt_update_ops[i].ops =
827 kmalloc_array(vops->pt_update_ops[i].num_ops,
828 sizeof(*vops->pt_update_ops[i].ops),
829 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
830 if (!vops->pt_update_ops[i].ops)
831 return array_of_binds ? -ENOBUFS : -ENOMEM;
832 }
833
834 return 0;
835 }
836 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
837
xe_vma_svm_prefetch_op_fini(struct xe_vma_op * op)838 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op)
839 {
840 struct xe_vma *vma;
841
842 vma = gpuva_to_vma(op->base.prefetch.va);
843
844 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma))
845 xa_destroy(&op->prefetch_range.range);
846 }
847
xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops * vops)848 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops)
849 {
850 struct xe_vma_op *op;
851
852 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
853 return;
854
855 list_for_each_entry(op, &vops->list, link)
856 xe_vma_svm_prefetch_op_fini(op);
857 }
858
xe_vma_ops_fini(struct xe_vma_ops * vops)859 static void xe_vma_ops_fini(struct xe_vma_ops *vops)
860 {
861 int i;
862
863 xe_vma_svm_prefetch_ops_fini(vops);
864
865 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
866 kfree(vops->pt_update_ops[i].ops);
867 }
868
xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops * vops,u8 tile_mask,int inc_val)869 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val)
870 {
871 int i;
872
873 if (!inc_val)
874 return;
875
876 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
877 if (BIT(i) & tile_mask)
878 vops->pt_update_ops[i].num_ops += inc_val;
879 }
880
xe_vm_populate_rebind(struct xe_vma_op * op,struct xe_vma * vma,u8 tile_mask)881 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
882 u8 tile_mask)
883 {
884 INIT_LIST_HEAD(&op->link);
885 op->tile_mask = tile_mask;
886 op->base.op = DRM_GPUVA_OP_MAP;
887 op->base.map.va.addr = vma->gpuva.va.addr;
888 op->base.map.va.range = vma->gpuva.va.range;
889 op->base.map.gem.obj = vma->gpuva.gem.obj;
890 op->base.map.gem.offset = vma->gpuva.gem.offset;
891 op->map.vma = vma;
892 op->map.immediate = true;
893 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
894 op->map.is_null = xe_vma_is_null(vma);
895 }
896
xe_vm_ops_add_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,u8 tile_mask)897 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
898 u8 tile_mask)
899 {
900 struct xe_vma_op *op;
901
902 op = kzalloc(sizeof(*op), GFP_KERNEL);
903 if (!op)
904 return -ENOMEM;
905
906 xe_vm_populate_rebind(op, vma, tile_mask);
907 list_add_tail(&op->link, &vops->list);
908 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
909
910 return 0;
911 }
912
913 static struct dma_fence *ops_execute(struct xe_vm *vm,
914 struct xe_vma_ops *vops);
915 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
916 struct xe_exec_queue *q,
917 struct xe_sync_entry *syncs, u32 num_syncs);
918
xe_vm_rebind(struct xe_vm * vm,bool rebind_worker)919 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
920 {
921 struct dma_fence *fence;
922 struct xe_vma *vma, *next;
923 struct xe_vma_ops vops;
924 struct xe_vma_op *op, *next_op;
925 int err, i;
926
927 lockdep_assert_held(&vm->lock);
928 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
929 list_empty(&vm->rebind_list))
930 return 0;
931
932 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
933 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
934 vops.pt_update_ops[i].wait_vm_bookkeep = true;
935
936 xe_vm_assert_held(vm);
937 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
938 xe_assert(vm->xe, vma->tile_present);
939
940 if (rebind_worker)
941 trace_xe_vma_rebind_worker(vma);
942 else
943 trace_xe_vma_rebind_exec(vma);
944
945 err = xe_vm_ops_add_rebind(&vops, vma,
946 vma->tile_present);
947 if (err)
948 goto free_ops;
949 }
950
951 err = xe_vma_ops_alloc(&vops, false);
952 if (err)
953 goto free_ops;
954
955 fence = ops_execute(vm, &vops);
956 if (IS_ERR(fence)) {
957 err = PTR_ERR(fence);
958 } else {
959 dma_fence_put(fence);
960 list_for_each_entry_safe(vma, next, &vm->rebind_list,
961 combined_links.rebind)
962 list_del_init(&vma->combined_links.rebind);
963 }
964 free_ops:
965 list_for_each_entry_safe(op, next_op, &vops.list, link) {
966 list_del(&op->link);
967 kfree(op);
968 }
969 xe_vma_ops_fini(&vops);
970
971 return err;
972 }
973
xe_vma_rebind(struct xe_vm * vm,struct xe_vma * vma,u8 tile_mask)974 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
975 {
976 struct dma_fence *fence = NULL;
977 struct xe_vma_ops vops;
978 struct xe_vma_op *op, *next_op;
979 struct xe_tile *tile;
980 u8 id;
981 int err;
982
983 lockdep_assert_held(&vm->lock);
984 xe_vm_assert_held(vm);
985 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
986
987 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
988 for_each_tile(tile, vm->xe, id) {
989 vops.pt_update_ops[id].wait_vm_bookkeep = true;
990 vops.pt_update_ops[tile->id].q =
991 xe_tile_migrate_exec_queue(tile);
992 }
993
994 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
995 if (err)
996 return ERR_PTR(err);
997
998 err = xe_vma_ops_alloc(&vops, false);
999 if (err) {
1000 fence = ERR_PTR(err);
1001 goto free_ops;
1002 }
1003
1004 fence = ops_execute(vm, &vops);
1005
1006 free_ops:
1007 list_for_each_entry_safe(op, next_op, &vops.list, link) {
1008 list_del(&op->link);
1009 kfree(op);
1010 }
1011 xe_vma_ops_fini(&vops);
1012
1013 return fence;
1014 }
1015
xe_vm_populate_range_rebind(struct xe_vma_op * op,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)1016 static void xe_vm_populate_range_rebind(struct xe_vma_op *op,
1017 struct xe_vma *vma,
1018 struct xe_svm_range *range,
1019 u8 tile_mask)
1020 {
1021 INIT_LIST_HEAD(&op->link);
1022 op->tile_mask = tile_mask;
1023 op->base.op = DRM_GPUVA_OP_DRIVER;
1024 op->subop = XE_VMA_SUBOP_MAP_RANGE;
1025 op->map_range.vma = vma;
1026 op->map_range.range = range;
1027 }
1028
1029 static int
xe_vm_ops_add_range_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)1030 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops,
1031 struct xe_vma *vma,
1032 struct xe_svm_range *range,
1033 u8 tile_mask)
1034 {
1035 struct xe_vma_op *op;
1036
1037 op = kzalloc(sizeof(*op), GFP_KERNEL);
1038 if (!op)
1039 return -ENOMEM;
1040
1041 xe_vm_populate_range_rebind(op, vma, range, tile_mask);
1042 list_add_tail(&op->link, &vops->list);
1043 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
1044
1045 return 0;
1046 }
1047
1048 /**
1049 * xe_vm_range_rebind() - VM range (re)bind
1050 * @vm: The VM which the range belongs to.
1051 * @vma: The VMA which the range belongs to.
1052 * @range: SVM range to rebind.
1053 * @tile_mask: Tile mask to bind the range to.
1054 *
1055 * (re)bind SVM range setting up GPU page tables for the range.
1056 *
1057 * Return: dma fence for rebind to signal completion on succees, ERR_PTR on
1058 * failure
1059 */
xe_vm_range_rebind(struct xe_vm * vm,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)1060 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
1061 struct xe_vma *vma,
1062 struct xe_svm_range *range,
1063 u8 tile_mask)
1064 {
1065 struct dma_fence *fence = NULL;
1066 struct xe_vma_ops vops;
1067 struct xe_vma_op *op, *next_op;
1068 struct xe_tile *tile;
1069 u8 id;
1070 int err;
1071
1072 lockdep_assert_held(&vm->lock);
1073 xe_vm_assert_held(vm);
1074 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
1075 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
1076
1077 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
1078 for_each_tile(tile, vm->xe, id) {
1079 vops.pt_update_ops[id].wait_vm_bookkeep = true;
1080 vops.pt_update_ops[tile->id].q =
1081 xe_tile_migrate_exec_queue(tile);
1082 }
1083
1084 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask);
1085 if (err)
1086 return ERR_PTR(err);
1087
1088 err = xe_vma_ops_alloc(&vops, false);
1089 if (err) {
1090 fence = ERR_PTR(err);
1091 goto free_ops;
1092 }
1093
1094 fence = ops_execute(vm, &vops);
1095
1096 free_ops:
1097 list_for_each_entry_safe(op, next_op, &vops.list, link) {
1098 list_del(&op->link);
1099 kfree(op);
1100 }
1101 xe_vma_ops_fini(&vops);
1102
1103 return fence;
1104 }
1105
xe_vm_populate_range_unbind(struct xe_vma_op * op,struct xe_svm_range * range)1106 static void xe_vm_populate_range_unbind(struct xe_vma_op *op,
1107 struct xe_svm_range *range)
1108 {
1109 INIT_LIST_HEAD(&op->link);
1110 op->tile_mask = range->tile_present;
1111 op->base.op = DRM_GPUVA_OP_DRIVER;
1112 op->subop = XE_VMA_SUBOP_UNMAP_RANGE;
1113 op->unmap_range.range = range;
1114 }
1115
1116 static int
xe_vm_ops_add_range_unbind(struct xe_vma_ops * vops,struct xe_svm_range * range)1117 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops,
1118 struct xe_svm_range *range)
1119 {
1120 struct xe_vma_op *op;
1121
1122 op = kzalloc(sizeof(*op), GFP_KERNEL);
1123 if (!op)
1124 return -ENOMEM;
1125
1126 xe_vm_populate_range_unbind(op, range);
1127 list_add_tail(&op->link, &vops->list);
1128 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1);
1129
1130 return 0;
1131 }
1132
1133 /**
1134 * xe_vm_range_unbind() - VM range unbind
1135 * @vm: The VM which the range belongs to.
1136 * @range: SVM range to rebind.
1137 *
1138 * Unbind SVM range removing the GPU page tables for the range.
1139 *
1140 * Return: dma fence for unbind to signal completion on succees, ERR_PTR on
1141 * failure
1142 */
xe_vm_range_unbind(struct xe_vm * vm,struct xe_svm_range * range)1143 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
1144 struct xe_svm_range *range)
1145 {
1146 struct dma_fence *fence = NULL;
1147 struct xe_vma_ops vops;
1148 struct xe_vma_op *op, *next_op;
1149 struct xe_tile *tile;
1150 u8 id;
1151 int err;
1152
1153 lockdep_assert_held(&vm->lock);
1154 xe_vm_assert_held(vm);
1155 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
1156
1157 if (!range->tile_present)
1158 return dma_fence_get_stub();
1159
1160 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
1161 for_each_tile(tile, vm->xe, id) {
1162 vops.pt_update_ops[id].wait_vm_bookkeep = true;
1163 vops.pt_update_ops[tile->id].q =
1164 xe_tile_migrate_exec_queue(tile);
1165 }
1166
1167 err = xe_vm_ops_add_range_unbind(&vops, range);
1168 if (err)
1169 return ERR_PTR(err);
1170
1171 err = xe_vma_ops_alloc(&vops, false);
1172 if (err) {
1173 fence = ERR_PTR(err);
1174 goto free_ops;
1175 }
1176
1177 fence = ops_execute(vm, &vops);
1178
1179 free_ops:
1180 list_for_each_entry_safe(op, next_op, &vops.list, link) {
1181 list_del(&op->link);
1182 kfree(op);
1183 }
1184 xe_vma_ops_fini(&vops);
1185
1186 return fence;
1187 }
1188
xe_vma_free(struct xe_vma * vma)1189 static void xe_vma_free(struct xe_vma *vma)
1190 {
1191 if (xe_vma_is_userptr(vma))
1192 kfree(to_userptr_vma(vma));
1193 else
1194 kfree(vma);
1195 }
1196
1197 #define VMA_CREATE_FLAG_READ_ONLY BIT(0)
1198 #define VMA_CREATE_FLAG_IS_NULL BIT(1)
1199 #define VMA_CREATE_FLAG_DUMPABLE BIT(2)
1200 #define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3)
1201
xe_vma_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 start,u64 end,u16 pat_index,unsigned int flags)1202 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
1203 struct xe_bo *bo,
1204 u64 bo_offset_or_userptr,
1205 u64 start, u64 end,
1206 u16 pat_index, unsigned int flags)
1207 {
1208 struct xe_vma *vma;
1209 struct xe_tile *tile;
1210 u8 id;
1211 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
1212 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
1213 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
1214 bool is_cpu_addr_mirror =
1215 (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR);
1216
1217 xe_assert(vm->xe, start < end);
1218 xe_assert(vm->xe, end < vm->size);
1219
1220 /*
1221 * Allocate and ensure that the xe_vma_is_userptr() return
1222 * matches what was allocated.
1223 */
1224 if (!bo && !is_null && !is_cpu_addr_mirror) {
1225 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
1226
1227 if (!uvma)
1228 return ERR_PTR(-ENOMEM);
1229
1230 vma = &uvma->vma;
1231 } else {
1232 vma = kzalloc(sizeof(*vma), GFP_KERNEL);
1233 if (!vma)
1234 return ERR_PTR(-ENOMEM);
1235
1236 if (is_cpu_addr_mirror)
1237 vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR;
1238 if (is_null)
1239 vma->gpuva.flags |= DRM_GPUVA_SPARSE;
1240 if (bo)
1241 vma->gpuva.gem.obj = &bo->ttm.base;
1242 }
1243
1244 INIT_LIST_HEAD(&vma->combined_links.rebind);
1245
1246 INIT_LIST_HEAD(&vma->gpuva.gem.entry);
1247 vma->gpuva.vm = &vm->gpuvm;
1248 vma->gpuva.va.addr = start;
1249 vma->gpuva.va.range = end - start + 1;
1250 if (read_only)
1251 vma->gpuva.flags |= XE_VMA_READ_ONLY;
1252 if (dumpable)
1253 vma->gpuva.flags |= XE_VMA_DUMPABLE;
1254
1255 for_each_tile(tile, vm->xe, id)
1256 vma->tile_mask |= 0x1 << id;
1257
1258 if (vm->xe->info.has_atomic_enable_pte_bit)
1259 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
1260
1261 vma->pat_index = pat_index;
1262
1263 if (bo) {
1264 struct drm_gpuvm_bo *vm_bo;
1265
1266 xe_bo_assert_held(bo);
1267
1268 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
1269 if (IS_ERR(vm_bo)) {
1270 xe_vma_free(vma);
1271 return ERR_CAST(vm_bo);
1272 }
1273
1274 drm_gpuvm_bo_extobj_add(vm_bo);
1275 drm_gem_object_get(&bo->ttm.base);
1276 vma->gpuva.gem.offset = bo_offset_or_userptr;
1277 drm_gpuva_link(&vma->gpuva, vm_bo);
1278 drm_gpuvm_bo_put(vm_bo);
1279 } else /* userptr or null */ {
1280 if (!is_null && !is_cpu_addr_mirror) {
1281 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
1282 u64 size = end - start + 1;
1283 int err;
1284
1285 INIT_LIST_HEAD(&userptr->invalidate_link);
1286 INIT_LIST_HEAD(&userptr->repin_link);
1287 vma->gpuva.gem.offset = bo_offset_or_userptr;
1288 mutex_init(&userptr->unmap_mutex);
1289
1290 err = mmu_interval_notifier_insert(&userptr->notifier,
1291 current->mm,
1292 xe_vma_userptr(vma), size,
1293 &vma_userptr_notifier_ops);
1294 if (err) {
1295 xe_vma_free(vma);
1296 return ERR_PTR(err);
1297 }
1298
1299 userptr->notifier_seq = LONG_MAX;
1300 }
1301
1302 xe_vm_get(vm);
1303 }
1304
1305 return vma;
1306 }
1307
xe_vma_destroy_late(struct xe_vma * vma)1308 static void xe_vma_destroy_late(struct xe_vma *vma)
1309 {
1310 struct xe_vm *vm = xe_vma_vm(vma);
1311
1312 if (vma->ufence) {
1313 xe_sync_ufence_put(vma->ufence);
1314 vma->ufence = NULL;
1315 }
1316
1317 if (xe_vma_is_userptr(vma)) {
1318 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1319 struct xe_userptr *userptr = &uvma->userptr;
1320
1321 if (userptr->sg)
1322 xe_hmm_userptr_free_sg(uvma);
1323
1324 /*
1325 * Since userptr pages are not pinned, we can't remove
1326 * the notifier until we're sure the GPU is not accessing
1327 * them anymore
1328 */
1329 mmu_interval_notifier_remove(&userptr->notifier);
1330 mutex_destroy(&userptr->unmap_mutex);
1331 xe_vm_put(vm);
1332 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) {
1333 xe_vm_put(vm);
1334 } else {
1335 xe_bo_put(xe_vma_bo(vma));
1336 }
1337
1338 xe_vma_free(vma);
1339 }
1340
vma_destroy_work_func(struct work_struct * w)1341 static void vma_destroy_work_func(struct work_struct *w)
1342 {
1343 struct xe_vma *vma =
1344 container_of(w, struct xe_vma, destroy_work);
1345
1346 xe_vma_destroy_late(vma);
1347 }
1348
vma_destroy_cb(struct dma_fence * fence,struct dma_fence_cb * cb)1349 static void vma_destroy_cb(struct dma_fence *fence,
1350 struct dma_fence_cb *cb)
1351 {
1352 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1353
1354 INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1355 queue_work(system_unbound_wq, &vma->destroy_work);
1356 }
1357
xe_vma_destroy(struct xe_vma * vma,struct dma_fence * fence)1358 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1359 {
1360 struct xe_vm *vm = xe_vma_vm(vma);
1361
1362 lockdep_assert_held_write(&vm->lock);
1363 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1364
1365 if (xe_vma_is_userptr(vma)) {
1366 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1367
1368 spin_lock(&vm->userptr.invalidated_lock);
1369 xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link));
1370 list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
1371 spin_unlock(&vm->userptr.invalidated_lock);
1372 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) {
1373 xe_bo_assert_held(xe_vma_bo(vma));
1374
1375 drm_gpuva_unlink(&vma->gpuva);
1376 }
1377
1378 xe_vm_assert_held(vm);
1379 if (fence) {
1380 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1381 vma_destroy_cb);
1382
1383 if (ret) {
1384 XE_WARN_ON(ret != -ENOENT);
1385 xe_vma_destroy_late(vma);
1386 }
1387 } else {
1388 xe_vma_destroy_late(vma);
1389 }
1390 }
1391
1392 /**
1393 * xe_vm_lock_vma() - drm_exec utility to lock a vma
1394 * @exec: The drm_exec object we're currently locking for.
1395 * @vma: The vma for witch we want to lock the vm resv and any attached
1396 * object's resv.
1397 *
1398 * Return: 0 on success, negative error code on error. In particular
1399 * may return -EDEADLK on WW transaction contention and -EINTR if
1400 * an interruptible wait is terminated by a signal.
1401 */
xe_vm_lock_vma(struct drm_exec * exec,struct xe_vma * vma)1402 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
1403 {
1404 struct xe_vm *vm = xe_vma_vm(vma);
1405 struct xe_bo *bo = xe_vma_bo(vma);
1406 int err;
1407
1408 XE_WARN_ON(!vm);
1409
1410 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1411 if (!err && bo && !bo->vm)
1412 err = drm_exec_lock_obj(exec, &bo->ttm.base);
1413
1414 return err;
1415 }
1416
xe_vma_destroy_unlocked(struct xe_vma * vma)1417 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1418 {
1419 struct drm_exec exec;
1420 int err;
1421
1422 drm_exec_init(&exec, 0, 0);
1423 drm_exec_until_all_locked(&exec) {
1424 err = xe_vm_lock_vma(&exec, vma);
1425 drm_exec_retry_on_contention(&exec);
1426 if (XE_WARN_ON(err))
1427 break;
1428 }
1429
1430 xe_vma_destroy(vma, NULL);
1431
1432 drm_exec_fini(&exec);
1433 }
1434
1435 struct xe_vma *
xe_vm_find_overlapping_vma(struct xe_vm * vm,u64 start,u64 range)1436 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1437 {
1438 struct drm_gpuva *gpuva;
1439
1440 lockdep_assert_held(&vm->lock);
1441
1442 if (xe_vm_is_closed_or_banned(vm))
1443 return NULL;
1444
1445 xe_assert(vm->xe, start + range <= vm->size);
1446
1447 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1448
1449 return gpuva ? gpuva_to_vma(gpuva) : NULL;
1450 }
1451
xe_vm_insert_vma(struct xe_vm * vm,struct xe_vma * vma)1452 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1453 {
1454 int err;
1455
1456 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1457 lockdep_assert_held(&vm->lock);
1458
1459 mutex_lock(&vm->snap_mutex);
1460 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1461 mutex_unlock(&vm->snap_mutex);
1462 XE_WARN_ON(err); /* Shouldn't be possible */
1463
1464 return err;
1465 }
1466
xe_vm_remove_vma(struct xe_vm * vm,struct xe_vma * vma)1467 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1468 {
1469 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1470 lockdep_assert_held(&vm->lock);
1471
1472 mutex_lock(&vm->snap_mutex);
1473 drm_gpuva_remove(&vma->gpuva);
1474 mutex_unlock(&vm->snap_mutex);
1475 if (vm->usm.last_fault_vma == vma)
1476 vm->usm.last_fault_vma = NULL;
1477 }
1478
xe_vm_op_alloc(void)1479 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1480 {
1481 struct xe_vma_op *op;
1482
1483 op = kzalloc(sizeof(*op), GFP_KERNEL);
1484
1485 if (unlikely(!op))
1486 return NULL;
1487
1488 return &op->base;
1489 }
1490
1491 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1492
1493 static const struct drm_gpuvm_ops gpuvm_ops = {
1494 .op_alloc = xe_vm_op_alloc,
1495 .vm_bo_validate = xe_gpuvm_validate,
1496 .vm_free = xe_vm_free,
1497 };
1498
pde_encode_pat_index(u16 pat_index)1499 static u64 pde_encode_pat_index(u16 pat_index)
1500 {
1501 u64 pte = 0;
1502
1503 if (pat_index & BIT(0))
1504 pte |= XE_PPGTT_PTE_PAT0;
1505
1506 if (pat_index & BIT(1))
1507 pte |= XE_PPGTT_PTE_PAT1;
1508
1509 return pte;
1510 }
1511
pte_encode_pat_index(u16 pat_index,u32 pt_level)1512 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
1513 {
1514 u64 pte = 0;
1515
1516 if (pat_index & BIT(0))
1517 pte |= XE_PPGTT_PTE_PAT0;
1518
1519 if (pat_index & BIT(1))
1520 pte |= XE_PPGTT_PTE_PAT1;
1521
1522 if (pat_index & BIT(2)) {
1523 if (pt_level)
1524 pte |= XE_PPGTT_PDE_PDPE_PAT2;
1525 else
1526 pte |= XE_PPGTT_PTE_PAT2;
1527 }
1528
1529 if (pat_index & BIT(3))
1530 pte |= XELPG_PPGTT_PTE_PAT3;
1531
1532 if (pat_index & (BIT(4)))
1533 pte |= XE2_PPGTT_PTE_PAT4;
1534
1535 return pte;
1536 }
1537
pte_encode_ps(u32 pt_level)1538 static u64 pte_encode_ps(u32 pt_level)
1539 {
1540 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1541
1542 if (pt_level == 1)
1543 return XE_PDE_PS_2M;
1544 else if (pt_level == 2)
1545 return XE_PDPE_PS_1G;
1546
1547 return 0;
1548 }
1549
xelp_pde_encode_bo(struct xe_bo * bo,u64 bo_offset,const u16 pat_index)1550 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
1551 const u16 pat_index)
1552 {
1553 u64 pde;
1554
1555 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1556 pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1557 pde |= pde_encode_pat_index(pat_index);
1558
1559 return pde;
1560 }
1561
xelp_pte_encode_bo(struct xe_bo * bo,u64 bo_offset,u16 pat_index,u32 pt_level)1562 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1563 u16 pat_index, u32 pt_level)
1564 {
1565 u64 pte;
1566
1567 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1568 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1569 pte |= pte_encode_pat_index(pat_index, pt_level);
1570 pte |= pte_encode_ps(pt_level);
1571
1572 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1573 pte |= XE_PPGTT_PTE_DM;
1574
1575 return pte;
1576 }
1577
xelp_pte_encode_vma(u64 pte,struct xe_vma * vma,u16 pat_index,u32 pt_level)1578 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1579 u16 pat_index, u32 pt_level)
1580 {
1581 pte |= XE_PAGE_PRESENT;
1582
1583 if (likely(!xe_vma_read_only(vma)))
1584 pte |= XE_PAGE_RW;
1585
1586 pte |= pte_encode_pat_index(pat_index, pt_level);
1587 pte |= pte_encode_ps(pt_level);
1588
1589 if (unlikely(xe_vma_is_null(vma)))
1590 pte |= XE_PTE_NULL;
1591
1592 return pte;
1593 }
1594
xelp_pte_encode_addr(struct xe_device * xe,u64 addr,u16 pat_index,u32 pt_level,bool devmem,u64 flags)1595 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1596 u16 pat_index,
1597 u32 pt_level, bool devmem, u64 flags)
1598 {
1599 u64 pte;
1600
1601 /* Avoid passing random bits directly as flags */
1602 xe_assert(xe, !(flags & ~XE_PTE_PS64));
1603
1604 pte = addr;
1605 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1606 pte |= pte_encode_pat_index(pat_index, pt_level);
1607 pte |= pte_encode_ps(pt_level);
1608
1609 if (devmem)
1610 pte |= XE_PPGTT_PTE_DM;
1611
1612 pte |= flags;
1613
1614 return pte;
1615 }
1616
1617 static const struct xe_pt_ops xelp_pt_ops = {
1618 .pte_encode_bo = xelp_pte_encode_bo,
1619 .pte_encode_vma = xelp_pte_encode_vma,
1620 .pte_encode_addr = xelp_pte_encode_addr,
1621 .pde_encode_bo = xelp_pde_encode_bo,
1622 };
1623
1624 static void vm_destroy_work_func(struct work_struct *w);
1625
1626 /**
1627 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1628 * given tile and vm.
1629 * @xe: xe device.
1630 * @tile: tile to set up for.
1631 * @vm: vm to set up for.
1632 *
1633 * Sets up a pagetable tree with one page-table per level and a single
1634 * leaf PTE. All pagetable entries point to the single page-table or,
1635 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1636 * writes become NOPs.
1637 *
1638 * Return: 0 on success, negative error code on error.
1639 */
xe_vm_create_scratch(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm)1640 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1641 struct xe_vm *vm)
1642 {
1643 u8 id = tile->id;
1644 int i;
1645
1646 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1647 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
1648 if (IS_ERR(vm->scratch_pt[id][i])) {
1649 int err = PTR_ERR(vm->scratch_pt[id][i]);
1650
1651 vm->scratch_pt[id][i] = NULL;
1652 return err;
1653 }
1654
1655 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1656 }
1657
1658 return 0;
1659 }
1660 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO);
1661
xe_vm_free_scratch(struct xe_vm * vm)1662 static void xe_vm_free_scratch(struct xe_vm *vm)
1663 {
1664 struct xe_tile *tile;
1665 u8 id;
1666
1667 if (!xe_vm_has_scratch(vm))
1668 return;
1669
1670 for_each_tile(tile, vm->xe, id) {
1671 u32 i;
1672
1673 if (!vm->pt_root[id])
1674 continue;
1675
1676 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1677 if (vm->scratch_pt[id][i])
1678 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1679 }
1680 }
1681
xe_vm_create(struct xe_device * xe,u32 flags,struct xe_file * xef)1682 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
1683 {
1684 struct drm_gem_object *vm_resv_obj;
1685 struct xe_vm *vm;
1686 int err, number_tiles = 0;
1687 struct xe_tile *tile;
1688 u8 id;
1689
1690 /*
1691 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to
1692 * ever be in faulting mode.
1693 */
1694 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE)));
1695
1696 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1697 if (!vm)
1698 return ERR_PTR(-ENOMEM);
1699
1700 vm->xe = xe;
1701
1702 vm->size = 1ull << xe->info.va_bits;
1703 vm->flags = flags;
1704
1705 if (xef)
1706 vm->xef = xe_file_get(xef);
1707 /**
1708 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be
1709 * manipulated under the PXP mutex. However, the PXP mutex can be taken
1710 * under a user-VM lock when the PXP session is started at exec_queue
1711 * creation time. Those are different VMs and therefore there is no risk
1712 * of deadlock, but we need to tell lockdep that this is the case or it
1713 * will print a warning.
1714 */
1715 if (flags & XE_VM_FLAG_GSC) {
1716 static struct lock_class_key gsc_vm_key;
1717
1718 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key);
1719 } else {
1720 init_rwsem(&vm->lock);
1721 }
1722 mutex_init(&vm->snap_mutex);
1723
1724 INIT_LIST_HEAD(&vm->rebind_list);
1725
1726 INIT_LIST_HEAD(&vm->userptr.repin_list);
1727 INIT_LIST_HEAD(&vm->userptr.invalidated);
1728 init_rwsem(&vm->userptr.notifier_lock);
1729 spin_lock_init(&vm->userptr.invalidated_lock);
1730
1731 ttm_lru_bulk_move_init(&vm->lru_bulk_move);
1732
1733 INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1734
1735 INIT_LIST_HEAD(&vm->preempt.exec_queues);
1736 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */
1737
1738 for_each_tile(tile, xe, id)
1739 xe_range_fence_tree_init(&vm->rftree[id]);
1740
1741 vm->pt_ops = &xelp_pt_ops;
1742
1743 /*
1744 * Long-running workloads are not protected by the scheduler references.
1745 * By design, run_job for long-running workloads returns NULL and the
1746 * scheduler drops all the references of it, hence protecting the VM
1747 * for this case is necessary.
1748 */
1749 if (flags & XE_VM_FLAG_LR_MODE) {
1750 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1751 xe_pm_runtime_get_noresume(xe);
1752 INIT_LIST_HEAD(&vm->preempt.pm_activate_link);
1753 }
1754
1755 if (flags & XE_VM_FLAG_FAULT_MODE) {
1756 err = xe_svm_init(vm);
1757 if (err)
1758 goto err_no_resv;
1759 }
1760
1761 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1762 if (!vm_resv_obj) {
1763 err = -ENOMEM;
1764 goto err_svm_fini;
1765 }
1766
1767 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1768 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1769
1770 drm_gem_object_put(vm_resv_obj);
1771
1772 err = xe_vm_lock(vm, true);
1773 if (err)
1774 goto err_close;
1775
1776 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1777 vm->flags |= XE_VM_FLAG_64K;
1778
1779 for_each_tile(tile, xe, id) {
1780 if (flags & XE_VM_FLAG_MIGRATION &&
1781 tile->id != XE_VM_FLAG_TILE_ID(flags))
1782 continue;
1783
1784 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
1785 if (IS_ERR(vm->pt_root[id])) {
1786 err = PTR_ERR(vm->pt_root[id]);
1787 vm->pt_root[id] = NULL;
1788 goto err_unlock_close;
1789 }
1790 }
1791
1792 if (xe_vm_has_scratch(vm)) {
1793 for_each_tile(tile, xe, id) {
1794 if (!vm->pt_root[id])
1795 continue;
1796
1797 err = xe_vm_create_scratch(xe, tile, vm);
1798 if (err)
1799 goto err_unlock_close;
1800 }
1801 vm->batch_invalidate_tlb = true;
1802 }
1803
1804 if (vm->flags & XE_VM_FLAG_LR_MODE)
1805 vm->batch_invalidate_tlb = false;
1806
1807 /* Fill pt_root after allocating scratch tables */
1808 for_each_tile(tile, xe, id) {
1809 if (!vm->pt_root[id])
1810 continue;
1811
1812 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1813 }
1814 xe_vm_unlock(vm);
1815
1816 /* Kernel migration VM shouldn't have a circular loop.. */
1817 if (!(flags & XE_VM_FLAG_MIGRATION)) {
1818 for_each_tile(tile, xe, id) {
1819 struct xe_exec_queue *q;
1820 u32 create_flags = EXEC_QUEUE_FLAG_VM;
1821
1822 if (!vm->pt_root[id])
1823 continue;
1824
1825 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0);
1826 if (IS_ERR(q)) {
1827 err = PTR_ERR(q);
1828 goto err_close;
1829 }
1830 vm->q[id] = q;
1831 number_tiles++;
1832 }
1833 }
1834
1835 if (number_tiles > 1)
1836 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1837
1838 if (xef && xe->info.has_asid) {
1839 u32 asid;
1840
1841 down_write(&xe->usm.lock);
1842 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1843 XA_LIMIT(1, XE_MAX_ASID - 1),
1844 &xe->usm.next_asid, GFP_KERNEL);
1845 up_write(&xe->usm.lock);
1846 if (err < 0)
1847 goto err_unlock_close;
1848
1849 vm->usm.asid = asid;
1850 }
1851
1852 trace_xe_vm_create(vm);
1853
1854 return vm;
1855
1856 err_unlock_close:
1857 xe_vm_unlock(vm);
1858 err_close:
1859 xe_vm_close_and_put(vm);
1860 return ERR_PTR(err);
1861
1862 err_svm_fini:
1863 if (flags & XE_VM_FLAG_FAULT_MODE) {
1864 vm->size = 0; /* close the vm */
1865 xe_svm_fini(vm);
1866 }
1867 err_no_resv:
1868 mutex_destroy(&vm->snap_mutex);
1869 for_each_tile(tile, xe, id)
1870 xe_range_fence_tree_fini(&vm->rftree[id]);
1871 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1872 if (vm->xef)
1873 xe_file_put(vm->xef);
1874 kfree(vm);
1875 if (flags & XE_VM_FLAG_LR_MODE)
1876 xe_pm_runtime_put(xe);
1877 return ERR_PTR(err);
1878 }
1879
xe_vm_close(struct xe_vm * vm)1880 static void xe_vm_close(struct xe_vm *vm)
1881 {
1882 struct xe_device *xe = vm->xe;
1883 bool bound;
1884 int idx;
1885
1886 bound = drm_dev_enter(&xe->drm, &idx);
1887
1888 down_write(&vm->lock);
1889 if (xe_vm_in_fault_mode(vm))
1890 xe_svm_notifier_lock(vm);
1891
1892 vm->size = 0;
1893
1894 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) {
1895 struct xe_tile *tile;
1896 struct xe_gt *gt;
1897 u8 id;
1898
1899 /* Wait for pending binds */
1900 dma_resv_wait_timeout(xe_vm_resv(vm),
1901 DMA_RESV_USAGE_BOOKKEEP,
1902 false, MAX_SCHEDULE_TIMEOUT);
1903
1904 if (bound) {
1905 for_each_tile(tile, xe, id)
1906 if (vm->pt_root[id])
1907 xe_pt_clear(xe, vm->pt_root[id]);
1908
1909 for_each_gt(gt, xe, id)
1910 xe_gt_tlb_invalidation_vm(gt, vm);
1911 }
1912 }
1913
1914 if (xe_vm_in_fault_mode(vm))
1915 xe_svm_notifier_unlock(vm);
1916 up_write(&vm->lock);
1917
1918 if (bound)
1919 drm_dev_exit(idx);
1920 }
1921
xe_vm_close_and_put(struct xe_vm * vm)1922 void xe_vm_close_and_put(struct xe_vm *vm)
1923 {
1924 LIST_HEAD(contested);
1925 struct xe_device *xe = vm->xe;
1926 struct xe_tile *tile;
1927 struct xe_vma *vma, *next_vma;
1928 struct drm_gpuva *gpuva, *next;
1929 u8 id;
1930
1931 xe_assert(xe, !vm->preempt.num_exec_queues);
1932
1933 xe_vm_close(vm);
1934 if (xe_vm_in_preempt_fence_mode(vm)) {
1935 mutex_lock(&xe->rebind_resume_lock);
1936 list_del_init(&vm->preempt.pm_activate_link);
1937 mutex_unlock(&xe->rebind_resume_lock);
1938 flush_work(&vm->preempt.rebind_work);
1939 }
1940 if (xe_vm_in_fault_mode(vm))
1941 xe_svm_close(vm);
1942
1943 down_write(&vm->lock);
1944 for_each_tile(tile, xe, id) {
1945 if (vm->q[id])
1946 xe_exec_queue_last_fence_put(vm->q[id], vm);
1947 }
1948 up_write(&vm->lock);
1949
1950 for_each_tile(tile, xe, id) {
1951 if (vm->q[id]) {
1952 xe_exec_queue_kill(vm->q[id]);
1953 xe_exec_queue_put(vm->q[id]);
1954 vm->q[id] = NULL;
1955 }
1956 }
1957
1958 down_write(&vm->lock);
1959 xe_vm_lock(vm, false);
1960 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1961 vma = gpuva_to_vma(gpuva);
1962
1963 if (xe_vma_has_no_bo(vma)) {
1964 down_read(&vm->userptr.notifier_lock);
1965 vma->gpuva.flags |= XE_VMA_DESTROYED;
1966 up_read(&vm->userptr.notifier_lock);
1967 }
1968
1969 xe_vm_remove_vma(vm, vma);
1970
1971 /* easy case, remove from VMA? */
1972 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1973 list_del_init(&vma->combined_links.rebind);
1974 xe_vma_destroy(vma, NULL);
1975 continue;
1976 }
1977
1978 list_move_tail(&vma->combined_links.destroy, &contested);
1979 vma->gpuva.flags |= XE_VMA_DESTROYED;
1980 }
1981
1982 /*
1983 * All vm operations will add shared fences to resv.
1984 * The only exception is eviction for a shared object,
1985 * but even so, the unbind when evicted would still
1986 * install a fence to resv. Hence it's safe to
1987 * destroy the pagetables immediately.
1988 */
1989 xe_vm_free_scratch(vm);
1990
1991 for_each_tile(tile, xe, id) {
1992 if (vm->pt_root[id]) {
1993 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1994 vm->pt_root[id] = NULL;
1995 }
1996 }
1997 xe_vm_unlock(vm);
1998
1999 /*
2000 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
2001 * Since we hold a refcount to the bo, we can remove and free
2002 * the members safely without locking.
2003 */
2004 list_for_each_entry_safe(vma, next_vma, &contested,
2005 combined_links.destroy) {
2006 list_del_init(&vma->combined_links.destroy);
2007 xe_vma_destroy_unlocked(vma);
2008 }
2009
2010 if (xe_vm_in_fault_mode(vm))
2011 xe_svm_fini(vm);
2012
2013 up_write(&vm->lock);
2014
2015 down_write(&xe->usm.lock);
2016 if (vm->usm.asid) {
2017 void *lookup;
2018
2019 xe_assert(xe, xe->info.has_asid);
2020 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
2021
2022 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
2023 xe_assert(xe, lookup == vm);
2024 }
2025 up_write(&xe->usm.lock);
2026
2027 for_each_tile(tile, xe, id)
2028 xe_range_fence_tree_fini(&vm->rftree[id]);
2029
2030 xe_vm_put(vm);
2031 }
2032
vm_destroy_work_func(struct work_struct * w)2033 static void vm_destroy_work_func(struct work_struct *w)
2034 {
2035 struct xe_vm *vm =
2036 container_of(w, struct xe_vm, destroy_work);
2037 struct xe_device *xe = vm->xe;
2038 struct xe_tile *tile;
2039 u8 id;
2040
2041 /* xe_vm_close_and_put was not called? */
2042 xe_assert(xe, !vm->size);
2043
2044 if (xe_vm_in_preempt_fence_mode(vm))
2045 flush_work(&vm->preempt.rebind_work);
2046
2047 mutex_destroy(&vm->snap_mutex);
2048
2049 if (vm->flags & XE_VM_FLAG_LR_MODE)
2050 xe_pm_runtime_put(xe);
2051
2052 for_each_tile(tile, xe, id)
2053 XE_WARN_ON(vm->pt_root[id]);
2054
2055 trace_xe_vm_free(vm);
2056
2057 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
2058
2059 if (vm->xef)
2060 xe_file_put(vm->xef);
2061
2062 kfree(vm);
2063 }
2064
xe_vm_free(struct drm_gpuvm * gpuvm)2065 static void xe_vm_free(struct drm_gpuvm *gpuvm)
2066 {
2067 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
2068
2069 /* To destroy the VM we need to be able to sleep */
2070 queue_work(system_unbound_wq, &vm->destroy_work);
2071 }
2072
xe_vm_lookup(struct xe_file * xef,u32 id)2073 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
2074 {
2075 struct xe_vm *vm;
2076
2077 mutex_lock(&xef->vm.lock);
2078 vm = xa_load(&xef->vm.xa, id);
2079 if (vm)
2080 xe_vm_get(vm);
2081 mutex_unlock(&xef->vm.lock);
2082
2083 return vm;
2084 }
2085
xe_vm_pdp4_descriptor(struct xe_vm * vm,struct xe_tile * tile)2086 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
2087 {
2088 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
2089 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
2090 }
2091
2092 static struct xe_exec_queue *
to_wait_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)2093 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
2094 {
2095 return q ? q : vm->q[0];
2096 }
2097
2098 static struct xe_user_fence *
find_ufence_get(struct xe_sync_entry * syncs,u32 num_syncs)2099 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
2100 {
2101 unsigned int i;
2102
2103 for (i = 0; i < num_syncs; i++) {
2104 struct xe_sync_entry *e = &syncs[i];
2105
2106 if (xe_sync_is_ufence(e))
2107 return xe_sync_ufence_get(e);
2108 }
2109
2110 return NULL;
2111 }
2112
2113 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
2114 DRM_XE_VM_CREATE_FLAG_LR_MODE | \
2115 DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
2116
xe_vm_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2117 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
2118 struct drm_file *file)
2119 {
2120 struct xe_device *xe = to_xe_device(dev);
2121 struct xe_file *xef = to_xe_file(file);
2122 struct drm_xe_vm_create *args = data;
2123 struct xe_vm *vm;
2124 u32 id;
2125 int err;
2126 u32 flags = 0;
2127
2128 if (XE_IOCTL_DBG(xe, args->extensions))
2129 return -EINVAL;
2130
2131 if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
2132 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
2133
2134 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
2135 !xe->info.has_usm))
2136 return -EINVAL;
2137
2138 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2139 return -EINVAL;
2140
2141 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
2142 return -EINVAL;
2143
2144 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
2145 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
2146 !xe->info.needs_scratch))
2147 return -EINVAL;
2148
2149 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
2150 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
2151 return -EINVAL;
2152
2153 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
2154 flags |= XE_VM_FLAG_SCRATCH_PAGE;
2155 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
2156 flags |= XE_VM_FLAG_LR_MODE;
2157 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
2158 flags |= XE_VM_FLAG_FAULT_MODE;
2159
2160 vm = xe_vm_create(xe, flags, xef);
2161 if (IS_ERR(vm))
2162 return PTR_ERR(vm);
2163
2164 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
2165 /* Warning: Security issue - never enable by default */
2166 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
2167 #endif
2168
2169 /* user id alloc must always be last in ioctl to prevent UAF */
2170 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
2171 if (err)
2172 goto err_close_and_put;
2173
2174 args->vm_id = id;
2175
2176 return 0;
2177
2178 err_close_and_put:
2179 xe_vm_close_and_put(vm);
2180
2181 return err;
2182 }
2183
xe_vm_destroy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2184 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
2185 struct drm_file *file)
2186 {
2187 struct xe_device *xe = to_xe_device(dev);
2188 struct xe_file *xef = to_xe_file(file);
2189 struct drm_xe_vm_destroy *args = data;
2190 struct xe_vm *vm;
2191 int err = 0;
2192
2193 if (XE_IOCTL_DBG(xe, args->pad) ||
2194 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2195 return -EINVAL;
2196
2197 mutex_lock(&xef->vm.lock);
2198 vm = xa_load(&xef->vm.xa, args->vm_id);
2199 if (XE_IOCTL_DBG(xe, !vm))
2200 err = -ENOENT;
2201 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
2202 err = -EBUSY;
2203 else
2204 xa_erase(&xef->vm.xa, args->vm_id);
2205 mutex_unlock(&xef->vm.lock);
2206
2207 if (!err)
2208 xe_vm_close_and_put(vm);
2209
2210 return err;
2211 }
2212
vma_matches(struct xe_vma * vma,u64 page_addr)2213 static bool vma_matches(struct xe_vma *vma, u64 page_addr)
2214 {
2215 if (page_addr > xe_vma_end(vma) - 1 ||
2216 page_addr + SZ_4K - 1 < xe_vma_start(vma))
2217 return false;
2218
2219 return true;
2220 }
2221
2222 /**
2223 * xe_vm_find_vma_by_addr() - Find a VMA by its address
2224 *
2225 * @vm: the xe_vm the vma belongs to
2226 * @page_addr: address to look up
2227 */
xe_vm_find_vma_by_addr(struct xe_vm * vm,u64 page_addr)2228 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr)
2229 {
2230 struct xe_vma *vma = NULL;
2231
2232 if (vm->usm.last_fault_vma) { /* Fast lookup */
2233 if (vma_matches(vm->usm.last_fault_vma, page_addr))
2234 vma = vm->usm.last_fault_vma;
2235 }
2236 if (!vma)
2237 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);
2238
2239 return vma;
2240 }
2241
2242 static const u32 region_to_mem_type[] = {
2243 XE_PL_TT,
2244 XE_PL_VRAM0,
2245 XE_PL_VRAM1,
2246 };
2247
prep_vma_destroy(struct xe_vm * vm,struct xe_vma * vma,bool post_commit)2248 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
2249 bool post_commit)
2250 {
2251 down_read(&vm->userptr.notifier_lock);
2252 vma->gpuva.flags |= XE_VMA_DESTROYED;
2253 up_read(&vm->userptr.notifier_lock);
2254 if (post_commit)
2255 xe_vm_remove_vma(vm, vma);
2256 }
2257
2258 #undef ULL
2259 #define ULL unsigned long long
2260
2261 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2262 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2263 {
2264 struct xe_vma *vma;
2265
2266 switch (op->op) {
2267 case DRM_GPUVA_OP_MAP:
2268 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2269 (ULL)op->map.va.addr, (ULL)op->map.va.range);
2270 break;
2271 case DRM_GPUVA_OP_REMAP:
2272 vma = gpuva_to_vma(op->remap.unmap->va);
2273 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2274 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2275 op->remap.unmap->keep ? 1 : 0);
2276 if (op->remap.prev)
2277 vm_dbg(&xe->drm,
2278 "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2279 (ULL)op->remap.prev->va.addr,
2280 (ULL)op->remap.prev->va.range);
2281 if (op->remap.next)
2282 vm_dbg(&xe->drm,
2283 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2284 (ULL)op->remap.next->va.addr,
2285 (ULL)op->remap.next->va.range);
2286 break;
2287 case DRM_GPUVA_OP_UNMAP:
2288 vma = gpuva_to_vma(op->unmap.va);
2289 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2290 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2291 op->unmap.keep ? 1 : 0);
2292 break;
2293 case DRM_GPUVA_OP_PREFETCH:
2294 vma = gpuva_to_vma(op->prefetch.va);
2295 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2296 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2297 break;
2298 default:
2299 drm_warn(&xe->drm, "NOT POSSIBLE");
2300 }
2301 }
2302 #else
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2303 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2304 {
2305 }
2306 #endif
2307
__xe_vm_needs_clear_scratch_pages(struct xe_vm * vm,u32 bind_flags)2308 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
2309 {
2310 if (!xe_vm_in_fault_mode(vm))
2311 return false;
2312
2313 if (!xe_vm_has_scratch(vm))
2314 return false;
2315
2316 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE)
2317 return false;
2318
2319 return true;
2320 }
2321
xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops * ops)2322 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops)
2323 {
2324 struct drm_gpuva_op *__op;
2325
2326 drm_gpuva_for_each_op(__op, ops) {
2327 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2328
2329 xe_vma_svm_prefetch_op_fini(op);
2330 }
2331 }
2332
2333 /*
2334 * Create operations list from IOCTL arguments, setup operations fields so parse
2335 * and commit steps are decoupled from IOCTL arguments. This step can fail.
2336 */
2337 static struct drm_gpuva_ops *
vm_bind_ioctl_ops_create(struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 addr,u64 range,u32 operation,u32 flags,u32 prefetch_region,u16 pat_index)2338 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
2339 struct xe_bo *bo, u64 bo_offset_or_userptr,
2340 u64 addr, u64 range,
2341 u32 operation, u32 flags,
2342 u32 prefetch_region, u16 pat_index)
2343 {
2344 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2345 struct drm_gpuva_ops *ops;
2346 struct drm_gpuva_op *__op;
2347 struct drm_gpuvm_bo *vm_bo;
2348 u64 range_end = addr + range;
2349 int err;
2350
2351 lockdep_assert_held_write(&vm->lock);
2352
2353 vm_dbg(&vm->xe->drm,
2354 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2355 operation, (ULL)addr, (ULL)range,
2356 (ULL)bo_offset_or_userptr);
2357
2358 switch (operation) {
2359 case DRM_XE_VM_BIND_OP_MAP:
2360 case DRM_XE_VM_BIND_OP_MAP_USERPTR:
2361 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
2362 obj, bo_offset_or_userptr);
2363 break;
2364 case DRM_XE_VM_BIND_OP_UNMAP:
2365 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2366 break;
2367 case DRM_XE_VM_BIND_OP_PREFETCH:
2368 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2369 break;
2370 case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2371 xe_assert(vm->xe, bo);
2372
2373 err = xe_bo_lock(bo, true);
2374 if (err)
2375 return ERR_PTR(err);
2376
2377 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
2378 if (IS_ERR(vm_bo)) {
2379 xe_bo_unlock(bo);
2380 return ERR_CAST(vm_bo);
2381 }
2382
2383 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2384 drm_gpuvm_bo_put(vm_bo);
2385 xe_bo_unlock(bo);
2386 break;
2387 default:
2388 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2389 ops = ERR_PTR(-EINVAL);
2390 }
2391 if (IS_ERR(ops))
2392 return ops;
2393
2394 drm_gpuva_for_each_op(__op, ops) {
2395 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2396
2397 if (__op->op == DRM_GPUVA_OP_MAP) {
2398 op->map.immediate =
2399 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2400 op->map.read_only =
2401 flags & DRM_XE_VM_BIND_FLAG_READONLY;
2402 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2403 op->map.is_cpu_addr_mirror = flags &
2404 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
2405 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
2406 op->map.pat_index = pat_index;
2407 op->map.invalidate_on_bind =
2408 __xe_vm_needs_clear_scratch_pages(vm, flags);
2409 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2410 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2411 struct xe_svm_range *svm_range;
2412 struct drm_gpusvm_ctx ctx = {};
2413 struct xe_tile *tile;
2414 u8 id, tile_mask = 0;
2415 u32 i;
2416
2417 if (!xe_vma_is_cpu_addr_mirror(vma)) {
2418 op->prefetch.region = prefetch_region;
2419 break;
2420 }
2421
2422 ctx.read_only = xe_vma_read_only(vma);
2423 ctx.devmem_possible = IS_DGFX(vm->xe) &&
2424 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2425
2426 for_each_tile(tile, vm->xe, id)
2427 tile_mask |= 0x1 << id;
2428
2429 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC);
2430 op->prefetch_range.region = prefetch_region;
2431 op->prefetch_range.ranges_count = 0;
2432 alloc_next_range:
2433 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
2434
2435 if (PTR_ERR(svm_range) == -ENOENT) {
2436 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma);
2437
2438 addr = ret == ULONG_MAX ? 0 : ret;
2439 if (addr)
2440 goto alloc_next_range;
2441 else
2442 goto print_op_label;
2443 }
2444
2445 if (IS_ERR(svm_range)) {
2446 err = PTR_ERR(svm_range);
2447 goto unwind_prefetch_ops;
2448 }
2449
2450 if (xe_svm_range_validate(vm, svm_range, tile_mask, !!prefetch_region)) {
2451 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID");
2452 goto check_next_range;
2453 }
2454
2455 err = xa_alloc(&op->prefetch_range.range,
2456 &i, svm_range, xa_limit_32b,
2457 GFP_KERNEL);
2458
2459 if (err)
2460 goto unwind_prefetch_ops;
2461
2462 op->prefetch_range.ranges_count++;
2463 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH;
2464 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED");
2465 check_next_range:
2466 if (range_end > xe_svm_range_end(svm_range) &&
2467 xe_svm_range_end(svm_range) < xe_vma_end(vma)) {
2468 addr = xe_svm_range_end(svm_range);
2469 goto alloc_next_range;
2470 }
2471 }
2472 print_op_label:
2473 print_op(vm->xe, __op);
2474 }
2475
2476 return ops;
2477
2478 unwind_prefetch_ops:
2479 xe_svm_prefetch_gpuva_ops_fini(ops);
2480 drm_gpuva_ops_free(&vm->gpuvm, ops);
2481 return ERR_PTR(err);
2482 }
2483
2484 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
2485
new_vma(struct xe_vm * vm,struct drm_gpuva_op_map * op,u16 pat_index,unsigned int flags)2486 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2487 u16 pat_index, unsigned int flags)
2488 {
2489 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2490 struct drm_exec exec;
2491 struct xe_vma *vma;
2492 int err = 0;
2493
2494 lockdep_assert_held_write(&vm->lock);
2495
2496 if (bo) {
2497 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
2498 drm_exec_until_all_locked(&exec) {
2499 err = 0;
2500 if (!bo->vm) {
2501 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2502 drm_exec_retry_on_contention(&exec);
2503 }
2504 if (!err) {
2505 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2506 drm_exec_retry_on_contention(&exec);
2507 }
2508 if (err) {
2509 drm_exec_fini(&exec);
2510 return ERR_PTR(err);
2511 }
2512 }
2513 }
2514 vma = xe_vma_create(vm, bo, op->gem.offset,
2515 op->va.addr, op->va.addr +
2516 op->va.range - 1, pat_index, flags);
2517 if (IS_ERR(vma))
2518 goto err_unlock;
2519
2520 if (xe_vma_is_userptr(vma))
2521 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2522 else if (!xe_vma_has_no_bo(vma) && !bo->vm)
2523 err = add_preempt_fences(vm, bo);
2524
2525 err_unlock:
2526 if (bo)
2527 drm_exec_fini(&exec);
2528
2529 if (err) {
2530 prep_vma_destroy(vm, vma, false);
2531 xe_vma_destroy_unlocked(vma);
2532 vma = ERR_PTR(err);
2533 }
2534
2535 return vma;
2536 }
2537
xe_vma_max_pte_size(struct xe_vma * vma)2538 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2539 {
2540 if (vma->gpuva.flags & XE_VMA_PTE_1G)
2541 return SZ_1G;
2542 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2543 return SZ_2M;
2544 else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2545 return SZ_64K;
2546 else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2547 return SZ_4K;
2548
2549 return SZ_1G; /* Uninitialized, used max size */
2550 }
2551
xe_vma_set_pte_size(struct xe_vma * vma,u64 size)2552 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2553 {
2554 switch (size) {
2555 case SZ_1G:
2556 vma->gpuva.flags |= XE_VMA_PTE_1G;
2557 break;
2558 case SZ_2M:
2559 vma->gpuva.flags |= XE_VMA_PTE_2M;
2560 break;
2561 case SZ_64K:
2562 vma->gpuva.flags |= XE_VMA_PTE_64K;
2563 break;
2564 case SZ_4K:
2565 vma->gpuva.flags |= XE_VMA_PTE_4K;
2566 break;
2567 }
2568 }
2569
xe_vma_op_commit(struct xe_vm * vm,struct xe_vma_op * op)2570 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2571 {
2572 int err = 0;
2573
2574 lockdep_assert_held_write(&vm->lock);
2575
2576 switch (op->base.op) {
2577 case DRM_GPUVA_OP_MAP:
2578 err |= xe_vm_insert_vma(vm, op->map.vma);
2579 if (!err)
2580 op->flags |= XE_VMA_OP_COMMITTED;
2581 break;
2582 case DRM_GPUVA_OP_REMAP:
2583 {
2584 u8 tile_present =
2585 gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2586
2587 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2588 true);
2589 op->flags |= XE_VMA_OP_COMMITTED;
2590
2591 if (op->remap.prev) {
2592 err |= xe_vm_insert_vma(vm, op->remap.prev);
2593 if (!err)
2594 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2595 if (!err && op->remap.skip_prev) {
2596 op->remap.prev->tile_present =
2597 tile_present;
2598 op->remap.prev = NULL;
2599 }
2600 }
2601 if (op->remap.next) {
2602 err |= xe_vm_insert_vma(vm, op->remap.next);
2603 if (!err)
2604 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2605 if (!err && op->remap.skip_next) {
2606 op->remap.next->tile_present =
2607 tile_present;
2608 op->remap.next = NULL;
2609 }
2610 }
2611
2612 /* Adjust for partial unbind after removing VMA from VM */
2613 if (!err) {
2614 op->base.remap.unmap->va->va.addr = op->remap.start;
2615 op->base.remap.unmap->va->va.range = op->remap.range;
2616 }
2617 break;
2618 }
2619 case DRM_GPUVA_OP_UNMAP:
2620 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2621 op->flags |= XE_VMA_OP_COMMITTED;
2622 break;
2623 case DRM_GPUVA_OP_PREFETCH:
2624 op->flags |= XE_VMA_OP_COMMITTED;
2625 break;
2626 default:
2627 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2628 }
2629
2630 return err;
2631 }
2632
vm_bind_ioctl_ops_parse(struct xe_vm * vm,struct drm_gpuva_ops * ops,struct xe_vma_ops * vops)2633 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
2634 struct xe_vma_ops *vops)
2635 {
2636 struct xe_device *xe = vm->xe;
2637 struct drm_gpuva_op *__op;
2638 struct xe_tile *tile;
2639 u8 id, tile_mask = 0;
2640 int err = 0;
2641
2642 lockdep_assert_held_write(&vm->lock);
2643
2644 for_each_tile(tile, vm->xe, id)
2645 tile_mask |= 0x1 << id;
2646
2647 drm_gpuva_for_each_op(__op, ops) {
2648 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2649 struct xe_vma *vma;
2650 unsigned int flags = 0;
2651
2652 INIT_LIST_HEAD(&op->link);
2653 list_add_tail(&op->link, &vops->list);
2654 op->tile_mask = tile_mask;
2655
2656 switch (op->base.op) {
2657 case DRM_GPUVA_OP_MAP:
2658 {
2659 flags |= op->map.read_only ?
2660 VMA_CREATE_FLAG_READ_ONLY : 0;
2661 flags |= op->map.is_null ?
2662 VMA_CREATE_FLAG_IS_NULL : 0;
2663 flags |= op->map.dumpable ?
2664 VMA_CREATE_FLAG_DUMPABLE : 0;
2665 flags |= op->map.is_cpu_addr_mirror ?
2666 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0;
2667
2668 vma = new_vma(vm, &op->base.map, op->map.pat_index,
2669 flags);
2670 if (IS_ERR(vma))
2671 return PTR_ERR(vma);
2672
2673 op->map.vma = vma;
2674 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) &&
2675 !op->map.is_cpu_addr_mirror) ||
2676 op->map.invalidate_on_bind)
2677 xe_vma_ops_incr_pt_update_ops(vops,
2678 op->tile_mask, 1);
2679 break;
2680 }
2681 case DRM_GPUVA_OP_REMAP:
2682 {
2683 struct xe_vma *old =
2684 gpuva_to_vma(op->base.remap.unmap->va);
2685 bool skip = xe_vma_is_cpu_addr_mirror(old);
2686 u64 start = xe_vma_start(old), end = xe_vma_end(old);
2687 int num_remap_ops = 0;
2688
2689 if (op->base.remap.prev)
2690 start = op->base.remap.prev->va.addr +
2691 op->base.remap.prev->va.range;
2692 if (op->base.remap.next)
2693 end = op->base.remap.next->va.addr;
2694
2695 if (xe_vma_is_cpu_addr_mirror(old) &&
2696 xe_svm_has_mapping(vm, start, end))
2697 return -EBUSY;
2698
2699 op->remap.start = xe_vma_start(old);
2700 op->remap.range = xe_vma_size(old);
2701
2702 flags |= op->base.remap.unmap->va->flags &
2703 XE_VMA_READ_ONLY ?
2704 VMA_CREATE_FLAG_READ_ONLY : 0;
2705 flags |= op->base.remap.unmap->va->flags &
2706 DRM_GPUVA_SPARSE ?
2707 VMA_CREATE_FLAG_IS_NULL : 0;
2708 flags |= op->base.remap.unmap->va->flags &
2709 XE_VMA_DUMPABLE ?
2710 VMA_CREATE_FLAG_DUMPABLE : 0;
2711 flags |= xe_vma_is_cpu_addr_mirror(old) ?
2712 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0;
2713
2714 if (op->base.remap.prev) {
2715 vma = new_vma(vm, op->base.remap.prev,
2716 old->pat_index, flags);
2717 if (IS_ERR(vma))
2718 return PTR_ERR(vma);
2719
2720 op->remap.prev = vma;
2721
2722 /*
2723 * Userptr creates a new SG mapping so
2724 * we must also rebind.
2725 */
2726 op->remap.skip_prev = skip ||
2727 (!xe_vma_is_userptr(old) &&
2728 IS_ALIGNED(xe_vma_end(vma),
2729 xe_vma_max_pte_size(old)));
2730 if (op->remap.skip_prev) {
2731 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2732 op->remap.range -=
2733 xe_vma_end(vma) -
2734 xe_vma_start(old);
2735 op->remap.start = xe_vma_end(vma);
2736 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2737 (ULL)op->remap.start,
2738 (ULL)op->remap.range);
2739 } else {
2740 num_remap_ops++;
2741 }
2742 }
2743
2744 if (op->base.remap.next) {
2745 vma = new_vma(vm, op->base.remap.next,
2746 old->pat_index, flags);
2747 if (IS_ERR(vma))
2748 return PTR_ERR(vma);
2749
2750 op->remap.next = vma;
2751
2752 /*
2753 * Userptr creates a new SG mapping so
2754 * we must also rebind.
2755 */
2756 op->remap.skip_next = skip ||
2757 (!xe_vma_is_userptr(old) &&
2758 IS_ALIGNED(xe_vma_start(vma),
2759 xe_vma_max_pte_size(old)));
2760 if (op->remap.skip_next) {
2761 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2762 op->remap.range -=
2763 xe_vma_end(old) -
2764 xe_vma_start(vma);
2765 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2766 (ULL)op->remap.start,
2767 (ULL)op->remap.range);
2768 } else {
2769 num_remap_ops++;
2770 }
2771 }
2772 if (!skip)
2773 num_remap_ops++;
2774
2775 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops);
2776 break;
2777 }
2778 case DRM_GPUVA_OP_UNMAP:
2779 vma = gpuva_to_vma(op->base.unmap.va);
2780
2781 if (xe_vma_is_cpu_addr_mirror(vma) &&
2782 xe_svm_has_mapping(vm, xe_vma_start(vma),
2783 xe_vma_end(vma)))
2784 return -EBUSY;
2785
2786 if (!xe_vma_is_cpu_addr_mirror(vma))
2787 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2788 break;
2789 case DRM_GPUVA_OP_PREFETCH:
2790 vma = gpuva_to_vma(op->base.prefetch.va);
2791
2792 if (xe_vma_is_userptr(vma)) {
2793 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2794 if (err)
2795 return err;
2796 }
2797
2798 if (xe_vma_is_cpu_addr_mirror(vma))
2799 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask,
2800 op->prefetch_range.ranges_count);
2801 else
2802 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2803
2804 break;
2805 default:
2806 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2807 }
2808
2809 err = xe_vma_op_commit(vm, op);
2810 if (err)
2811 return err;
2812 }
2813
2814 return 0;
2815 }
2816
xe_vma_op_unwind(struct xe_vm * vm,struct xe_vma_op * op,bool post_commit,bool prev_post_commit,bool next_post_commit)2817 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2818 bool post_commit, bool prev_post_commit,
2819 bool next_post_commit)
2820 {
2821 lockdep_assert_held_write(&vm->lock);
2822
2823 switch (op->base.op) {
2824 case DRM_GPUVA_OP_MAP:
2825 if (op->map.vma) {
2826 prep_vma_destroy(vm, op->map.vma, post_commit);
2827 xe_vma_destroy_unlocked(op->map.vma);
2828 }
2829 break;
2830 case DRM_GPUVA_OP_UNMAP:
2831 {
2832 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2833
2834 if (vma) {
2835 down_read(&vm->userptr.notifier_lock);
2836 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2837 up_read(&vm->userptr.notifier_lock);
2838 if (post_commit)
2839 xe_vm_insert_vma(vm, vma);
2840 }
2841 break;
2842 }
2843 case DRM_GPUVA_OP_REMAP:
2844 {
2845 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2846
2847 if (op->remap.prev) {
2848 prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2849 xe_vma_destroy_unlocked(op->remap.prev);
2850 }
2851 if (op->remap.next) {
2852 prep_vma_destroy(vm, op->remap.next, next_post_commit);
2853 xe_vma_destroy_unlocked(op->remap.next);
2854 }
2855 if (vma) {
2856 down_read(&vm->userptr.notifier_lock);
2857 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2858 up_read(&vm->userptr.notifier_lock);
2859 if (post_commit)
2860 xe_vm_insert_vma(vm, vma);
2861 }
2862 break;
2863 }
2864 case DRM_GPUVA_OP_PREFETCH:
2865 /* Nothing to do */
2866 break;
2867 default:
2868 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2869 }
2870 }
2871
vm_bind_ioctl_ops_unwind(struct xe_vm * vm,struct drm_gpuva_ops ** ops,int num_ops_list)2872 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2873 struct drm_gpuva_ops **ops,
2874 int num_ops_list)
2875 {
2876 int i;
2877
2878 for (i = num_ops_list - 1; i >= 0; --i) {
2879 struct drm_gpuva_ops *__ops = ops[i];
2880 struct drm_gpuva_op *__op;
2881
2882 if (!__ops)
2883 continue;
2884
2885 drm_gpuva_for_each_op_reverse(__op, __ops) {
2886 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2887
2888 xe_vma_op_unwind(vm, op,
2889 op->flags & XE_VMA_OP_COMMITTED,
2890 op->flags & XE_VMA_OP_PREV_COMMITTED,
2891 op->flags & XE_VMA_OP_NEXT_COMMITTED);
2892 }
2893 }
2894 }
2895
vma_lock_and_validate(struct drm_exec * exec,struct xe_vma * vma,bool validate)2896 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
2897 bool validate)
2898 {
2899 struct xe_bo *bo = xe_vma_bo(vma);
2900 struct xe_vm *vm = xe_vma_vm(vma);
2901 int err = 0;
2902
2903 if (bo) {
2904 if (!bo->vm)
2905 err = drm_exec_lock_obj(exec, &bo->ttm.base);
2906 if (!err && validate)
2907 err = xe_bo_validate(bo, vm,
2908 !xe_vm_in_preempt_fence_mode(vm));
2909 }
2910
2911 return err;
2912 }
2913
check_ufence(struct xe_vma * vma)2914 static int check_ufence(struct xe_vma *vma)
2915 {
2916 if (vma->ufence) {
2917 struct xe_user_fence * const f = vma->ufence;
2918
2919 if (!xe_sync_ufence_get_status(f))
2920 return -EBUSY;
2921
2922 vma->ufence = NULL;
2923 xe_sync_ufence_put(f);
2924 }
2925
2926 return 0;
2927 }
2928
prefetch_ranges(struct xe_vm * vm,struct xe_vma_op * op)2929 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
2930 {
2931 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2932 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2933 int err = 0;
2934
2935 struct xe_svm_range *svm_range;
2936 struct drm_gpusvm_ctx ctx = {};
2937 struct xe_tile *tile;
2938 unsigned long i;
2939 u32 region;
2940
2941 if (!xe_vma_is_cpu_addr_mirror(vma))
2942 return 0;
2943
2944 region = op->prefetch_range.region;
2945
2946 ctx.read_only = xe_vma_read_only(vma);
2947 ctx.devmem_possible = devmem_possible;
2948 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
2949
2950 /* TODO: Threading the migration */
2951 xa_for_each(&op->prefetch_range.range, i, svm_range) {
2952 if (!region)
2953 xe_svm_range_migrate_to_smem(vm, svm_range);
2954
2955 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) {
2956 tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0];
2957 err = xe_svm_alloc_vram(tile, svm_range, &ctx);
2958 if (err) {
2959 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
2960 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2961 return -ENODATA;
2962 }
2963 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM");
2964 }
2965
2966 err = xe_svm_range_get_pages(vm, svm_range, &ctx);
2967 if (err) {
2968 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
2969 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2970 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
2971 err = -ENODATA;
2972 return err;
2973 }
2974 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE");
2975 }
2976
2977 return err;
2978 }
2979
op_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_op * op)2980 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
2981 struct xe_vma_op *op)
2982 {
2983 int err = 0;
2984
2985 switch (op->base.op) {
2986 case DRM_GPUVA_OP_MAP:
2987 if (!op->map.invalidate_on_bind)
2988 err = vma_lock_and_validate(exec, op->map.vma,
2989 !xe_vm_in_fault_mode(vm) ||
2990 op->map.immediate);
2991 break;
2992 case DRM_GPUVA_OP_REMAP:
2993 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
2994 if (err)
2995 break;
2996
2997 err = vma_lock_and_validate(exec,
2998 gpuva_to_vma(op->base.remap.unmap->va),
2999 false);
3000 if (!err && op->remap.prev)
3001 err = vma_lock_and_validate(exec, op->remap.prev, true);
3002 if (!err && op->remap.next)
3003 err = vma_lock_and_validate(exec, op->remap.next, true);
3004 break;
3005 case DRM_GPUVA_OP_UNMAP:
3006 err = check_ufence(gpuva_to_vma(op->base.unmap.va));
3007 if (err)
3008 break;
3009
3010 err = vma_lock_and_validate(exec,
3011 gpuva_to_vma(op->base.unmap.va),
3012 false);
3013 break;
3014 case DRM_GPUVA_OP_PREFETCH:
3015 {
3016 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
3017 u32 region;
3018
3019 if (xe_vma_is_cpu_addr_mirror(vma))
3020 region = op->prefetch_range.region;
3021 else
3022 region = op->prefetch.region;
3023
3024 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
3025
3026 err = vma_lock_and_validate(exec,
3027 gpuva_to_vma(op->base.prefetch.va),
3028 false);
3029 if (!err && !xe_vma_has_no_bo(vma))
3030 err = xe_bo_migrate(xe_vma_bo(vma),
3031 region_to_mem_type[region]);
3032 break;
3033 }
3034 default:
3035 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
3036 }
3037
3038 return err;
3039 }
3040
vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm * vm,struct xe_vma_ops * vops)3041 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops)
3042 {
3043 struct xe_vma_op *op;
3044 int err;
3045
3046 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
3047 return 0;
3048
3049 list_for_each_entry(op, &vops->list, link) {
3050 if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
3051 err = prefetch_ranges(vm, op);
3052 if (err)
3053 return err;
3054 }
3055 }
3056
3057 return 0;
3058 }
3059
vm_bind_ioctl_ops_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops)3060 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
3061 struct xe_vm *vm,
3062 struct xe_vma_ops *vops)
3063 {
3064 struct xe_vma_op *op;
3065 int err;
3066
3067 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
3068 if (err)
3069 return err;
3070
3071 list_for_each_entry(op, &vops->list, link) {
3072 err = op_lock_and_prep(exec, vm, op);
3073 if (err)
3074 return err;
3075 }
3076
3077 #ifdef TEST_VM_OPS_ERROR
3078 if (vops->inject_error &&
3079 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
3080 return -ENOSPC;
3081 #endif
3082
3083 return 0;
3084 }
3085
op_trace(struct xe_vma_op * op)3086 static void op_trace(struct xe_vma_op *op)
3087 {
3088 switch (op->base.op) {
3089 case DRM_GPUVA_OP_MAP:
3090 trace_xe_vma_bind(op->map.vma);
3091 break;
3092 case DRM_GPUVA_OP_REMAP:
3093 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
3094 if (op->remap.prev)
3095 trace_xe_vma_bind(op->remap.prev);
3096 if (op->remap.next)
3097 trace_xe_vma_bind(op->remap.next);
3098 break;
3099 case DRM_GPUVA_OP_UNMAP:
3100 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
3101 break;
3102 case DRM_GPUVA_OP_PREFETCH:
3103 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
3104 break;
3105 case DRM_GPUVA_OP_DRIVER:
3106 break;
3107 default:
3108 XE_WARN_ON("NOT POSSIBLE");
3109 }
3110 }
3111
trace_xe_vm_ops_execute(struct xe_vma_ops * vops)3112 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
3113 {
3114 struct xe_vma_op *op;
3115
3116 list_for_each_entry(op, &vops->list, link)
3117 op_trace(op);
3118 }
3119
vm_ops_setup_tile_args(struct xe_vm * vm,struct xe_vma_ops * vops)3120 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
3121 {
3122 struct xe_exec_queue *q = vops->q;
3123 struct xe_tile *tile;
3124 int number_tiles = 0;
3125 u8 id;
3126
3127 for_each_tile(tile, vm->xe, id) {
3128 if (vops->pt_update_ops[id].num_ops)
3129 ++number_tiles;
3130
3131 if (vops->pt_update_ops[id].q)
3132 continue;
3133
3134 if (q) {
3135 vops->pt_update_ops[id].q = q;
3136 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
3137 q = list_next_entry(q, multi_gt_list);
3138 } else {
3139 vops->pt_update_ops[id].q = vm->q[id];
3140 }
3141 }
3142
3143 return number_tiles;
3144 }
3145
ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3146 static struct dma_fence *ops_execute(struct xe_vm *vm,
3147 struct xe_vma_ops *vops)
3148 {
3149 struct xe_tile *tile;
3150 struct dma_fence *fence = NULL;
3151 struct dma_fence **fences = NULL;
3152 struct dma_fence_array *cf = NULL;
3153 int number_tiles = 0, current_fence = 0, err;
3154 u8 id;
3155
3156 number_tiles = vm_ops_setup_tile_args(vm, vops);
3157 if (number_tiles == 0)
3158 return ERR_PTR(-ENODATA);
3159
3160 if (number_tiles > 1) {
3161 fences = kmalloc_array(number_tiles, sizeof(*fences),
3162 GFP_KERNEL);
3163 if (!fences) {
3164 fence = ERR_PTR(-ENOMEM);
3165 goto err_trace;
3166 }
3167 }
3168
3169 for_each_tile(tile, vm->xe, id) {
3170 if (!vops->pt_update_ops[id].num_ops)
3171 continue;
3172
3173 err = xe_pt_update_ops_prepare(tile, vops);
3174 if (err) {
3175 fence = ERR_PTR(err);
3176 goto err_out;
3177 }
3178 }
3179
3180 trace_xe_vm_ops_execute(vops);
3181
3182 for_each_tile(tile, vm->xe, id) {
3183 if (!vops->pt_update_ops[id].num_ops)
3184 continue;
3185
3186 fence = xe_pt_update_ops_run(tile, vops);
3187 if (IS_ERR(fence))
3188 goto err_out;
3189
3190 if (fences)
3191 fences[current_fence++] = fence;
3192 }
3193
3194 if (fences) {
3195 cf = dma_fence_array_create(number_tiles, fences,
3196 vm->composite_fence_ctx,
3197 vm->composite_fence_seqno++,
3198 false);
3199 if (!cf) {
3200 --vm->composite_fence_seqno;
3201 fence = ERR_PTR(-ENOMEM);
3202 goto err_out;
3203 }
3204 fence = &cf->base;
3205 }
3206
3207 for_each_tile(tile, vm->xe, id) {
3208 if (!vops->pt_update_ops[id].num_ops)
3209 continue;
3210
3211 xe_pt_update_ops_fini(tile, vops);
3212 }
3213
3214 return fence;
3215
3216 err_out:
3217 for_each_tile(tile, vm->xe, id) {
3218 if (!vops->pt_update_ops[id].num_ops)
3219 continue;
3220
3221 xe_pt_update_ops_abort(tile, vops);
3222 }
3223 while (current_fence)
3224 dma_fence_put(fences[--current_fence]);
3225 kfree(fences);
3226 kfree(cf);
3227
3228 err_trace:
3229 trace_xe_vm_ops_fail(vm);
3230 return fence;
3231 }
3232
vma_add_ufence(struct xe_vma * vma,struct xe_user_fence * ufence)3233 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
3234 {
3235 if (vma->ufence)
3236 xe_sync_ufence_put(vma->ufence);
3237 vma->ufence = __xe_sync_ufence_get(ufence);
3238 }
3239
op_add_ufence(struct xe_vm * vm,struct xe_vma_op * op,struct xe_user_fence * ufence)3240 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
3241 struct xe_user_fence *ufence)
3242 {
3243 switch (op->base.op) {
3244 case DRM_GPUVA_OP_MAP:
3245 vma_add_ufence(op->map.vma, ufence);
3246 break;
3247 case DRM_GPUVA_OP_REMAP:
3248 if (op->remap.prev)
3249 vma_add_ufence(op->remap.prev, ufence);
3250 if (op->remap.next)
3251 vma_add_ufence(op->remap.next, ufence);
3252 break;
3253 case DRM_GPUVA_OP_UNMAP:
3254 break;
3255 case DRM_GPUVA_OP_PREFETCH:
3256 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
3257 break;
3258 default:
3259 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
3260 }
3261 }
3262
vm_bind_ioctl_ops_fini(struct xe_vm * vm,struct xe_vma_ops * vops,struct dma_fence * fence)3263 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
3264 struct dma_fence *fence)
3265 {
3266 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
3267 struct xe_user_fence *ufence;
3268 struct xe_vma_op *op;
3269 int i;
3270
3271 ufence = find_ufence_get(vops->syncs, vops->num_syncs);
3272 list_for_each_entry(op, &vops->list, link) {
3273 if (ufence)
3274 op_add_ufence(vm, op, ufence);
3275
3276 if (op->base.op == DRM_GPUVA_OP_UNMAP)
3277 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
3278 else if (op->base.op == DRM_GPUVA_OP_REMAP)
3279 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
3280 fence);
3281 }
3282 if (ufence)
3283 xe_sync_ufence_put(ufence);
3284 if (fence) {
3285 for (i = 0; i < vops->num_syncs; i++)
3286 xe_sync_entry_signal(vops->syncs + i, fence);
3287 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
3288 }
3289 }
3290
vm_bind_ioctl_ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3291 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
3292 struct xe_vma_ops *vops)
3293 {
3294 struct drm_exec exec;
3295 struct dma_fence *fence;
3296 int err;
3297
3298 lockdep_assert_held_write(&vm->lock);
3299
3300 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
3301 DRM_EXEC_IGNORE_DUPLICATES, 0);
3302 drm_exec_until_all_locked(&exec) {
3303 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
3304 drm_exec_retry_on_contention(&exec);
3305 if (err) {
3306 fence = ERR_PTR(err);
3307 goto unlock;
3308 }
3309
3310 fence = ops_execute(vm, vops);
3311 if (IS_ERR(fence)) {
3312 if (PTR_ERR(fence) == -ENODATA)
3313 vm_bind_ioctl_ops_fini(vm, vops, NULL);
3314 goto unlock;
3315 }
3316
3317 vm_bind_ioctl_ops_fini(vm, vops, fence);
3318 }
3319
3320 unlock:
3321 drm_exec_fini(&exec);
3322 return fence;
3323 }
3324 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
3325
3326 #define SUPPORTED_FLAGS_STUB \
3327 (DRM_XE_VM_BIND_FLAG_READONLY | \
3328 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
3329 DRM_XE_VM_BIND_FLAG_NULL | \
3330 DRM_XE_VM_BIND_FLAG_DUMPABLE | \
3331 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \
3332 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR)
3333
3334 #ifdef TEST_VM_OPS_ERROR
3335 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
3336 #else
3337 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB
3338 #endif
3339
3340 #define XE_64K_PAGE_MASK 0xffffull
3341 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
3342
vm_bind_ioctl_check_args(struct xe_device * xe,struct xe_vm * vm,struct drm_xe_vm_bind * args,struct drm_xe_vm_bind_op ** bind_ops)3343 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
3344 struct drm_xe_vm_bind *args,
3345 struct drm_xe_vm_bind_op **bind_ops)
3346 {
3347 int err;
3348 int i;
3349
3350 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
3351 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3352 return -EINVAL;
3353
3354 if (XE_IOCTL_DBG(xe, args->extensions))
3355 return -EINVAL;
3356
3357 if (args->num_binds > 1) {
3358 u64 __user *bind_user =
3359 u64_to_user_ptr(args->vector_of_binds);
3360
3361 *bind_ops = kvmalloc_array(args->num_binds,
3362 sizeof(struct drm_xe_vm_bind_op),
3363 GFP_KERNEL | __GFP_ACCOUNT |
3364 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3365 if (!*bind_ops)
3366 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
3367
3368 err = copy_from_user(*bind_ops, bind_user,
3369 sizeof(struct drm_xe_vm_bind_op) *
3370 args->num_binds);
3371 if (XE_IOCTL_DBG(xe, err)) {
3372 err = -EFAULT;
3373 goto free_bind_ops;
3374 }
3375 } else {
3376 *bind_ops = &args->bind;
3377 }
3378
3379 for (i = 0; i < args->num_binds; ++i) {
3380 u64 range = (*bind_ops)[i].range;
3381 u64 addr = (*bind_ops)[i].addr;
3382 u32 op = (*bind_ops)[i].op;
3383 u32 flags = (*bind_ops)[i].flags;
3384 u32 obj = (*bind_ops)[i].obj;
3385 u64 obj_offset = (*bind_ops)[i].obj_offset;
3386 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
3387 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
3388 bool is_cpu_addr_mirror = flags &
3389 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
3390 u16 pat_index = (*bind_ops)[i].pat_index;
3391 u16 coh_mode;
3392
3393 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror &&
3394 (!xe_vm_in_fault_mode(vm) ||
3395 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) {
3396 err = -EINVAL;
3397 goto free_bind_ops;
3398 }
3399
3400 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
3401 err = -EINVAL;
3402 goto free_bind_ops;
3403 }
3404
3405 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
3406 (*bind_ops)[i].pat_index = pat_index;
3407 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3408 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
3409 err = -EINVAL;
3410 goto free_bind_ops;
3411 }
3412
3413 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
3414 err = -EINVAL;
3415 goto free_bind_ops;
3416 }
3417
3418 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
3419 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
3420 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) ||
3421 XE_IOCTL_DBG(xe, obj_offset && (is_null ||
3422 is_cpu_addr_mirror)) ||
3423 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
3424 (is_null || is_cpu_addr_mirror)) ||
3425 XE_IOCTL_DBG(xe, !obj &&
3426 op == DRM_XE_VM_BIND_OP_MAP &&
3427 !is_null && !is_cpu_addr_mirror) ||
3428 XE_IOCTL_DBG(xe, !obj &&
3429 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3430 XE_IOCTL_DBG(xe, addr &&
3431 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3432 XE_IOCTL_DBG(xe, range &&
3433 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3434 XE_IOCTL_DBG(xe, obj &&
3435 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3436 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3437 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3438 XE_IOCTL_DBG(xe, obj &&
3439 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
3440 XE_IOCTL_DBG(xe, prefetch_region &&
3441 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
3442 XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
3443 xe->info.mem_region_mask)) ||
3444 XE_IOCTL_DBG(xe, obj &&
3445 op == DRM_XE_VM_BIND_OP_UNMAP)) {
3446 err = -EINVAL;
3447 goto free_bind_ops;
3448 }
3449
3450 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
3451 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
3452 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
3453 XE_IOCTL_DBG(xe, !range &&
3454 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
3455 err = -EINVAL;
3456 goto free_bind_ops;
3457 }
3458 }
3459
3460 return 0;
3461
3462 free_bind_ops:
3463 if (args->num_binds > 1)
3464 kvfree(*bind_ops);
3465 *bind_ops = NULL;
3466 return err;
3467 }
3468
vm_bind_ioctl_signal_fences(struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,int num_syncs)3469 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
3470 struct xe_exec_queue *q,
3471 struct xe_sync_entry *syncs,
3472 int num_syncs)
3473 {
3474 struct dma_fence *fence;
3475 int i, err = 0;
3476
3477 fence = xe_sync_in_fence_get(syncs, num_syncs,
3478 to_wait_exec_queue(vm, q), vm);
3479 if (IS_ERR(fence))
3480 return PTR_ERR(fence);
3481
3482 for (i = 0; i < num_syncs; i++)
3483 xe_sync_entry_signal(&syncs[i], fence);
3484
3485 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
3486 fence);
3487 dma_fence_put(fence);
3488
3489 return err;
3490 }
3491
xe_vma_ops_init(struct xe_vma_ops * vops,struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,u32 num_syncs)3492 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
3493 struct xe_exec_queue *q,
3494 struct xe_sync_entry *syncs, u32 num_syncs)
3495 {
3496 memset(vops, 0, sizeof(*vops));
3497 INIT_LIST_HEAD(&vops->list);
3498 vops->vm = vm;
3499 vops->q = q;
3500 vops->syncs = syncs;
3501 vops->num_syncs = num_syncs;
3502 vops->flags = 0;
3503 }
3504
xe_vm_bind_ioctl_validate_bo(struct xe_device * xe,struct xe_bo * bo,u64 addr,u64 range,u64 obj_offset,u16 pat_index,u32 op,u32 bind_flags)3505 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
3506 u64 addr, u64 range, u64 obj_offset,
3507 u16 pat_index, u32 op, u32 bind_flags)
3508 {
3509 u16 coh_mode;
3510
3511 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) ||
3512 XE_IOCTL_DBG(xe, obj_offset >
3513 xe_bo_size(bo) - range)) {
3514 return -EINVAL;
3515 }
3516
3517 /*
3518 * Some platforms require 64k VM_BIND alignment,
3519 * specifically those with XE_VRAM_FLAGS_NEED64K.
3520 *
3521 * Other platforms may have BO's set to 64k physical placement,
3522 * but can be mapped at 4k offsets anyway. This check is only
3523 * there for the former case.
3524 */
3525 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
3526 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
3527 if (XE_IOCTL_DBG(xe, obj_offset &
3528 XE_64K_PAGE_MASK) ||
3529 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
3530 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3531 return -EINVAL;
3532 }
3533 }
3534
3535 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3536 if (bo->cpu_caching) {
3537 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3538 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
3539 return -EINVAL;
3540 }
3541 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
3542 /*
3543 * Imported dma-buf from a different device should
3544 * require 1way or 2way coherency since we don't know
3545 * how it was mapped on the CPU. Just assume is it
3546 * potentially cached on CPU side.
3547 */
3548 return -EINVAL;
3549 }
3550
3551 /* If a BO is protected it can only be mapped if the key is still valid */
3552 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) &&
3553 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL)
3554 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0))
3555 return -ENOEXEC;
3556
3557 return 0;
3558 }
3559
xe_vm_bind_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3560 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3561 {
3562 struct xe_device *xe = to_xe_device(dev);
3563 struct xe_file *xef = to_xe_file(file);
3564 struct drm_xe_vm_bind *args = data;
3565 struct drm_xe_sync __user *syncs_user;
3566 struct xe_bo **bos = NULL;
3567 struct drm_gpuva_ops **ops = NULL;
3568 struct xe_vm *vm;
3569 struct xe_exec_queue *q = NULL;
3570 u32 num_syncs, num_ufence = 0;
3571 struct xe_sync_entry *syncs = NULL;
3572 struct drm_xe_vm_bind_op *bind_ops = NULL;
3573 struct xe_vma_ops vops;
3574 struct dma_fence *fence;
3575 int err;
3576 int i;
3577
3578 vm = xe_vm_lookup(xef, args->vm_id);
3579 if (XE_IOCTL_DBG(xe, !vm))
3580 return -EINVAL;
3581
3582 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops);
3583 if (err)
3584 goto put_vm;
3585
3586 if (args->exec_queue_id) {
3587 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
3588 if (XE_IOCTL_DBG(xe, !q)) {
3589 err = -ENOENT;
3590 goto put_vm;
3591 }
3592
3593 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3594 err = -EINVAL;
3595 goto put_exec_queue;
3596 }
3597 }
3598
3599 /* Ensure all UNMAPs visible */
3600 xe_svm_flush(vm);
3601
3602 err = down_write_killable(&vm->lock);
3603 if (err)
3604 goto put_exec_queue;
3605
3606 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3607 err = -ENOENT;
3608 goto release_vm_lock;
3609 }
3610
3611 for (i = 0; i < args->num_binds; ++i) {
3612 u64 range = bind_ops[i].range;
3613 u64 addr = bind_ops[i].addr;
3614
3615 if (XE_IOCTL_DBG(xe, range > vm->size) ||
3616 XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3617 err = -EINVAL;
3618 goto release_vm_lock;
3619 }
3620 }
3621
3622 if (args->num_binds) {
3623 bos = kvcalloc(args->num_binds, sizeof(*bos),
3624 GFP_KERNEL | __GFP_ACCOUNT |
3625 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3626 if (!bos) {
3627 err = -ENOMEM;
3628 goto release_vm_lock;
3629 }
3630
3631 ops = kvcalloc(args->num_binds, sizeof(*ops),
3632 GFP_KERNEL | __GFP_ACCOUNT |
3633 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3634 if (!ops) {
3635 err = -ENOMEM;
3636 goto release_vm_lock;
3637 }
3638 }
3639
3640 for (i = 0; i < args->num_binds; ++i) {
3641 struct drm_gem_object *gem_obj;
3642 u64 range = bind_ops[i].range;
3643 u64 addr = bind_ops[i].addr;
3644 u32 obj = bind_ops[i].obj;
3645 u64 obj_offset = bind_ops[i].obj_offset;
3646 u16 pat_index = bind_ops[i].pat_index;
3647 u32 op = bind_ops[i].op;
3648 u32 bind_flags = bind_ops[i].flags;
3649
3650 if (!obj)
3651 continue;
3652
3653 gem_obj = drm_gem_object_lookup(file, obj);
3654 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3655 err = -ENOENT;
3656 goto put_obj;
3657 }
3658 bos[i] = gem_to_xe_bo(gem_obj);
3659
3660 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
3661 obj_offset, pat_index, op,
3662 bind_flags);
3663 if (err)
3664 goto put_obj;
3665 }
3666
3667 if (args->num_syncs) {
3668 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3669 if (!syncs) {
3670 err = -ENOMEM;
3671 goto put_obj;
3672 }
3673 }
3674
3675 syncs_user = u64_to_user_ptr(args->syncs);
3676 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3677 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3678 &syncs_user[num_syncs],
3679 (xe_vm_in_lr_mode(vm) ?
3680 SYNC_PARSE_FLAG_LR_MODE : 0) |
3681 (!args->num_binds ?
3682 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3683 if (err)
3684 goto free_syncs;
3685
3686 if (xe_sync_is_ufence(&syncs[num_syncs]))
3687 num_ufence++;
3688 }
3689
3690 if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3691 err = -EINVAL;
3692 goto free_syncs;
3693 }
3694
3695 if (!args->num_binds) {
3696 err = -ENODATA;
3697 goto free_syncs;
3698 }
3699
3700 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
3701 for (i = 0; i < args->num_binds; ++i) {
3702 u64 range = bind_ops[i].range;
3703 u64 addr = bind_ops[i].addr;
3704 u32 op = bind_ops[i].op;
3705 u32 flags = bind_ops[i].flags;
3706 u64 obj_offset = bind_ops[i].obj_offset;
3707 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3708 u16 pat_index = bind_ops[i].pat_index;
3709
3710 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset,
3711 addr, range, op, flags,
3712 prefetch_region, pat_index);
3713 if (IS_ERR(ops[i])) {
3714 err = PTR_ERR(ops[i]);
3715 ops[i] = NULL;
3716 goto unwind_ops;
3717 }
3718
3719 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
3720 if (err)
3721 goto unwind_ops;
3722
3723 #ifdef TEST_VM_OPS_ERROR
3724 if (flags & FORCE_OP_ERROR) {
3725 vops.inject_error = true;
3726 vm->xe->vm_inject_error_position =
3727 (vm->xe->vm_inject_error_position + 1) %
3728 FORCE_OP_ERROR_COUNT;
3729 }
3730 #endif
3731 }
3732
3733 /* Nothing to do */
3734 if (list_empty(&vops.list)) {
3735 err = -ENODATA;
3736 goto unwind_ops;
3737 }
3738
3739 err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
3740 if (err)
3741 goto unwind_ops;
3742
3743 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops);
3744 if (err)
3745 goto unwind_ops;
3746
3747 fence = vm_bind_ioctl_ops_execute(vm, &vops);
3748 if (IS_ERR(fence))
3749 err = PTR_ERR(fence);
3750 else
3751 dma_fence_put(fence);
3752
3753 unwind_ops:
3754 if (err && err != -ENODATA)
3755 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3756 xe_vma_ops_fini(&vops);
3757 for (i = args->num_binds - 1; i >= 0; --i)
3758 if (ops[i])
3759 drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
3760 free_syncs:
3761 if (err == -ENODATA)
3762 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3763 while (num_syncs--)
3764 xe_sync_entry_cleanup(&syncs[num_syncs]);
3765
3766 kfree(syncs);
3767 put_obj:
3768 for (i = 0; i < args->num_binds; ++i)
3769 xe_bo_put(bos[i]);
3770 release_vm_lock:
3771 up_write(&vm->lock);
3772 put_exec_queue:
3773 if (q)
3774 xe_exec_queue_put(q);
3775 put_vm:
3776 xe_vm_put(vm);
3777 kvfree(bos);
3778 kvfree(ops);
3779 if (args->num_binds > 1)
3780 kvfree(bind_ops);
3781 return err;
3782 }
3783
3784 /**
3785 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM
3786 * @vm: VM to bind the BO to
3787 * @bo: BO to bind
3788 * @q: exec queue to use for the bind (optional)
3789 * @addr: address at which to bind the BO
3790 * @cache_lvl: PAT cache level to use
3791 *
3792 * Execute a VM bind map operation on a kernel-owned BO to bind it into a
3793 * kernel-owned VM.
3794 *
3795 * Returns a dma_fence to track the binding completion if the job to do so was
3796 * successfully submitted, an error pointer otherwise.
3797 */
xe_vm_bind_kernel_bo(struct xe_vm * vm,struct xe_bo * bo,struct xe_exec_queue * q,u64 addr,enum xe_cache_level cache_lvl)3798 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
3799 struct xe_exec_queue *q, u64 addr,
3800 enum xe_cache_level cache_lvl)
3801 {
3802 struct xe_vma_ops vops;
3803 struct drm_gpuva_ops *ops = NULL;
3804 struct dma_fence *fence;
3805 int err;
3806
3807 xe_bo_get(bo);
3808 xe_vm_get(vm);
3809 if (q)
3810 xe_exec_queue_get(q);
3811
3812 down_write(&vm->lock);
3813
3814 xe_vma_ops_init(&vops, vm, q, NULL, 0);
3815
3816 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo),
3817 DRM_XE_VM_BIND_OP_MAP, 0, 0,
3818 vm->xe->pat.idx[cache_lvl]);
3819 if (IS_ERR(ops)) {
3820 err = PTR_ERR(ops);
3821 goto release_vm_lock;
3822 }
3823
3824 err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
3825 if (err)
3826 goto release_vm_lock;
3827
3828 xe_assert(vm->xe, !list_empty(&vops.list));
3829
3830 err = xe_vma_ops_alloc(&vops, false);
3831 if (err)
3832 goto unwind_ops;
3833
3834 fence = vm_bind_ioctl_ops_execute(vm, &vops);
3835 if (IS_ERR(fence))
3836 err = PTR_ERR(fence);
3837
3838 unwind_ops:
3839 if (err && err != -ENODATA)
3840 vm_bind_ioctl_ops_unwind(vm, &ops, 1);
3841
3842 xe_vma_ops_fini(&vops);
3843 drm_gpuva_ops_free(&vm->gpuvm, ops);
3844
3845 release_vm_lock:
3846 up_write(&vm->lock);
3847
3848 if (q)
3849 xe_exec_queue_put(q);
3850 xe_vm_put(vm);
3851 xe_bo_put(bo);
3852
3853 if (err)
3854 fence = ERR_PTR(err);
3855
3856 return fence;
3857 }
3858
3859 /**
3860 * xe_vm_lock() - Lock the vm's dma_resv object
3861 * @vm: The struct xe_vm whose lock is to be locked
3862 * @intr: Whether to perform any wait interruptible
3863 *
3864 * Return: 0 on success, -EINTR if @intr is true and the wait for a
3865 * contended lock was interrupted. If @intr is false, the function
3866 * always returns 0.
3867 */
xe_vm_lock(struct xe_vm * vm,bool intr)3868 int xe_vm_lock(struct xe_vm *vm, bool intr)
3869 {
3870 if (intr)
3871 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3872
3873 return dma_resv_lock(xe_vm_resv(vm), NULL);
3874 }
3875
3876 /**
3877 * xe_vm_unlock() - Unlock the vm's dma_resv object
3878 * @vm: The struct xe_vm whose lock is to be released.
3879 *
3880 * Unlock a buffer object lock that was locked by xe_vm_lock().
3881 */
xe_vm_unlock(struct xe_vm * vm)3882 void xe_vm_unlock(struct xe_vm *vm)
3883 {
3884 dma_resv_unlock(xe_vm_resv(vm));
3885 }
3886
3887 /**
3888 * xe_vm_range_tilemask_tlb_invalidation - Issue a TLB invalidation on this tilemask for an
3889 * address range
3890 * @vm: The VM
3891 * @start: start address
3892 * @end: end address
3893 * @tile_mask: mask for which gt's issue tlb invalidation
3894 *
3895 * Issue a range based TLB invalidation for gt's in tilemask
3896 *
3897 * Returns 0 for success, negative error code otherwise.
3898 */
xe_vm_range_tilemask_tlb_invalidation(struct xe_vm * vm,u64 start,u64 end,u8 tile_mask)3899 int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start,
3900 u64 end, u8 tile_mask)
3901 {
3902 struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
3903 struct xe_tile *tile;
3904 u32 fence_id = 0;
3905 u8 id;
3906 int err;
3907
3908 if (!tile_mask)
3909 return 0;
3910
3911 for_each_tile(tile, vm->xe, id) {
3912 if (tile_mask & BIT(id)) {
3913 xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
3914 &fence[fence_id], true);
3915
3916 err = xe_gt_tlb_invalidation_range(tile->primary_gt,
3917 &fence[fence_id],
3918 start,
3919 end,
3920 vm->usm.asid);
3921 if (err)
3922 goto wait;
3923 ++fence_id;
3924
3925 if (!tile->media_gt)
3926 continue;
3927
3928 xe_gt_tlb_invalidation_fence_init(tile->media_gt,
3929 &fence[fence_id], true);
3930
3931 err = xe_gt_tlb_invalidation_range(tile->media_gt,
3932 &fence[fence_id],
3933 start,
3934 end,
3935 vm->usm.asid);
3936 if (err)
3937 goto wait;
3938 ++fence_id;
3939 }
3940 }
3941
3942 wait:
3943 for (id = 0; id < fence_id; ++id)
3944 xe_gt_tlb_invalidation_fence_wait(&fence[id]);
3945
3946 return err;
3947 }
3948
3949 /**
3950 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3951 * @vma: VMA to invalidate
3952 *
3953 * Walks a list of page tables leaves which it memset the entries owned by this
3954 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3955 * complete.
3956 *
3957 * Returns 0 for success, negative error code otherwise.
3958 */
xe_vm_invalidate_vma(struct xe_vma * vma)3959 int xe_vm_invalidate_vma(struct xe_vma *vma)
3960 {
3961 struct xe_device *xe = xe_vma_vm(vma)->xe;
3962 struct xe_vm *vm = xe_vma_vm(vma);
3963 struct xe_tile *tile;
3964 u8 tile_mask = 0;
3965 int ret = 0;
3966 u8 id;
3967
3968 xe_assert(xe, !xe_vma_is_null(vma));
3969 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma));
3970 trace_xe_vma_invalidate(vma);
3971
3972 vm_dbg(&vm->xe->drm,
3973 "INVALIDATE: addr=0x%016llx, range=0x%016llx",
3974 xe_vma_start(vma), xe_vma_size(vma));
3975
3976 /*
3977 * Check that we don't race with page-table updates, tile_invalidated
3978 * update is safe
3979 */
3980 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3981 if (xe_vma_is_userptr(vma)) {
3982 lockdep_assert(lockdep_is_held_type(&vm->userptr.notifier_lock, 0) ||
3983 (lockdep_is_held_type(&vm->userptr.notifier_lock, 1) &&
3984 lockdep_is_held(&xe_vm_resv(vm)->lock.base)));
3985
3986 WARN_ON_ONCE(!mmu_interval_check_retry
3987 (&to_userptr_vma(vma)->userptr.notifier,
3988 to_userptr_vma(vma)->userptr.notifier_seq));
3989 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm),
3990 DMA_RESV_USAGE_BOOKKEEP));
3991
3992 } else {
3993 xe_bo_assert_held(xe_vma_bo(vma));
3994 }
3995 }
3996
3997 for_each_tile(tile, xe, id)
3998 if (xe_pt_zap_ptes(tile, vma))
3999 tile_mask |= BIT(id);
4000
4001 xe_device_wmb(xe);
4002
4003 ret = xe_vm_range_tilemask_tlb_invalidation(xe_vma_vm(vma), xe_vma_start(vma),
4004 xe_vma_end(vma), tile_mask);
4005
4006 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
4007 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask);
4008
4009 return ret;
4010 }
4011
xe_vm_validate_protected(struct xe_vm * vm)4012 int xe_vm_validate_protected(struct xe_vm *vm)
4013 {
4014 struct drm_gpuva *gpuva;
4015 int err = 0;
4016
4017 if (!vm)
4018 return -ENODEV;
4019
4020 mutex_lock(&vm->snap_mutex);
4021
4022 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4023 struct xe_vma *vma = gpuva_to_vma(gpuva);
4024 struct xe_bo *bo = vma->gpuva.gem.obj ?
4025 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4026
4027 if (!bo)
4028 continue;
4029
4030 if (xe_bo_is_protected(bo)) {
4031 err = xe_pxp_bo_key_check(vm->xe->pxp, bo);
4032 if (err)
4033 break;
4034 }
4035 }
4036
4037 mutex_unlock(&vm->snap_mutex);
4038 return err;
4039 }
4040
4041 struct xe_vm_snapshot {
4042 unsigned long num_snaps;
4043 struct {
4044 u64 ofs, bo_ofs;
4045 unsigned long len;
4046 struct xe_bo *bo;
4047 void *data;
4048 struct mm_struct *mm;
4049 } snap[];
4050 };
4051
xe_vm_snapshot_capture(struct xe_vm * vm)4052 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
4053 {
4054 unsigned long num_snaps = 0, i;
4055 struct xe_vm_snapshot *snap = NULL;
4056 struct drm_gpuva *gpuva;
4057
4058 if (!vm)
4059 return NULL;
4060
4061 mutex_lock(&vm->snap_mutex);
4062 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4063 if (gpuva->flags & XE_VMA_DUMPABLE)
4064 num_snaps++;
4065 }
4066
4067 if (num_snaps)
4068 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
4069 if (!snap) {
4070 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
4071 goto out_unlock;
4072 }
4073
4074 snap->num_snaps = num_snaps;
4075 i = 0;
4076 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4077 struct xe_vma *vma = gpuva_to_vma(gpuva);
4078 struct xe_bo *bo = vma->gpuva.gem.obj ?
4079 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4080
4081 if (!(gpuva->flags & XE_VMA_DUMPABLE))
4082 continue;
4083
4084 snap->snap[i].ofs = xe_vma_start(vma);
4085 snap->snap[i].len = xe_vma_size(vma);
4086 if (bo) {
4087 snap->snap[i].bo = xe_bo_get(bo);
4088 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
4089 } else if (xe_vma_is_userptr(vma)) {
4090 struct mm_struct *mm =
4091 to_userptr_vma(vma)->userptr.notifier.mm;
4092
4093 if (mmget_not_zero(mm))
4094 snap->snap[i].mm = mm;
4095 else
4096 snap->snap[i].data = ERR_PTR(-EFAULT);
4097
4098 snap->snap[i].bo_ofs = xe_vma_userptr(vma);
4099 } else {
4100 snap->snap[i].data = ERR_PTR(-ENOENT);
4101 }
4102 i++;
4103 }
4104
4105 out_unlock:
4106 mutex_unlock(&vm->snap_mutex);
4107 return snap;
4108 }
4109
xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot * snap)4110 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
4111 {
4112 if (IS_ERR_OR_NULL(snap))
4113 return;
4114
4115 for (int i = 0; i < snap->num_snaps; i++) {
4116 struct xe_bo *bo = snap->snap[i].bo;
4117 int err;
4118
4119 if (IS_ERR(snap->snap[i].data))
4120 continue;
4121
4122 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
4123 if (!snap->snap[i].data) {
4124 snap->snap[i].data = ERR_PTR(-ENOMEM);
4125 goto cleanup_bo;
4126 }
4127
4128 if (bo) {
4129 err = xe_bo_read(bo, snap->snap[i].bo_ofs,
4130 snap->snap[i].data, snap->snap[i].len);
4131 } else {
4132 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
4133
4134 kthread_use_mm(snap->snap[i].mm);
4135 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
4136 err = 0;
4137 else
4138 err = -EFAULT;
4139 kthread_unuse_mm(snap->snap[i].mm);
4140
4141 mmput(snap->snap[i].mm);
4142 snap->snap[i].mm = NULL;
4143 }
4144
4145 if (err) {
4146 kvfree(snap->snap[i].data);
4147 snap->snap[i].data = ERR_PTR(err);
4148 }
4149
4150 cleanup_bo:
4151 xe_bo_put(bo);
4152 snap->snap[i].bo = NULL;
4153 }
4154 }
4155
xe_vm_snapshot_print(struct xe_vm_snapshot * snap,struct drm_printer * p)4156 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
4157 {
4158 unsigned long i, j;
4159
4160 if (IS_ERR_OR_NULL(snap)) {
4161 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
4162 return;
4163 }
4164
4165 for (i = 0; i < snap->num_snaps; i++) {
4166 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
4167
4168 if (IS_ERR(snap->snap[i].data)) {
4169 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
4170 PTR_ERR(snap->snap[i].data));
4171 continue;
4172 }
4173
4174 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
4175
4176 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
4177 u32 *val = snap->snap[i].data + j;
4178 char dumped[ASCII85_BUFSZ];
4179
4180 drm_puts(p, ascii85_encode(*val, dumped));
4181 }
4182
4183 drm_puts(p, "\n");
4184
4185 if (drm_coredump_printer_is_full(p))
4186 return;
4187 }
4188 }
4189
xe_vm_snapshot_free(struct xe_vm_snapshot * snap)4190 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
4191 {
4192 unsigned long i;
4193
4194 if (IS_ERR_OR_NULL(snap))
4195 return;
4196
4197 for (i = 0; i < snap->num_snaps; i++) {
4198 if (!IS_ERR(snap->snap[i].data))
4199 kvfree(snap->snap[i].data);
4200 xe_bo_put(snap->snap[i].bo);
4201 if (snap->snap[i].mm)
4202 mmput(snap->snap[i].mm);
4203 }
4204 kvfree(snap);
4205 }
4206