1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10
11 #include <drm/drm_drv.h>
12 #include <drm/drm_exec.h>
13 #include <drm/drm_print.h>
14 #include <drm/ttm/ttm_tt.h>
15 #include <uapi/drm/xe_drm.h>
16 #include <linux/ascii85.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/mm.h>
20 #include <linux/swap.h>
21
22 #include <generated/xe_wa_oob.h>
23
24 #include "regs/xe_gtt_defs.h"
25 #include "xe_assert.h"
26 #include "xe_bo.h"
27 #include "xe_device.h"
28 #include "xe_drm_client.h"
29 #include "xe_exec_queue.h"
30 #include "xe_gt_pagefault.h"
31 #include "xe_gt_tlb_invalidation.h"
32 #include "xe_migrate.h"
33 #include "xe_pat.h"
34 #include "xe_pm.h"
35 #include "xe_preempt_fence.h"
36 #include "xe_pt.h"
37 #include "xe_pxp.h"
38 #include "xe_res_cursor.h"
39 #include "xe_svm.h"
40 #include "xe_sync.h"
41 #include "xe_trace_bo.h"
42 #include "xe_wa.h"
43 #include "xe_hmm.h"
44
xe_vm_obj(struct xe_vm * vm)45 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
46 {
47 return vm->gpuvm.r_obj;
48 }
49
50 /**
51 * xe_vma_userptr_check_repin() - Advisory check for repin needed
52 * @uvma: The userptr vma
53 *
54 * Check if the userptr vma has been invalidated since last successful
55 * repin. The check is advisory only and can the function can be called
56 * without the vm->userptr.notifier_lock held. There is no guarantee that the
57 * vma userptr will remain valid after a lockless check, so typically
58 * the call needs to be followed by a proper check under the notifier_lock.
59 *
60 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
61 */
xe_vma_userptr_check_repin(struct xe_userptr_vma * uvma)62 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
63 {
64 return mmu_interval_check_retry(&uvma->userptr.notifier,
65 uvma->userptr.notifier_seq) ?
66 -EAGAIN : 0;
67 }
68
xe_vma_userptr_pin_pages(struct xe_userptr_vma * uvma)69 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
70 {
71 struct xe_vma *vma = &uvma->vma;
72 struct xe_vm *vm = xe_vma_vm(vma);
73 struct xe_device *xe = vm->xe;
74
75 lockdep_assert_held(&vm->lock);
76 xe_assert(xe, xe_vma_is_userptr(vma));
77
78 return xe_hmm_userptr_populate_range(uvma, false);
79 }
80
preempt_fences_waiting(struct xe_vm * vm)81 static bool preempt_fences_waiting(struct xe_vm *vm)
82 {
83 struct xe_exec_queue *q;
84
85 lockdep_assert_held(&vm->lock);
86 xe_vm_assert_held(vm);
87
88 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
89 if (!q->lr.pfence ||
90 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
91 &q->lr.pfence->flags)) {
92 return true;
93 }
94 }
95
96 return false;
97 }
98
free_preempt_fences(struct list_head * list)99 static void free_preempt_fences(struct list_head *list)
100 {
101 struct list_head *link, *next;
102
103 list_for_each_safe(link, next, list)
104 xe_preempt_fence_free(to_preempt_fence_from_link(link));
105 }
106
alloc_preempt_fences(struct xe_vm * vm,struct list_head * list,unsigned int * count)107 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
108 unsigned int *count)
109 {
110 lockdep_assert_held(&vm->lock);
111 xe_vm_assert_held(vm);
112
113 if (*count >= vm->preempt.num_exec_queues)
114 return 0;
115
116 for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
117 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
118
119 if (IS_ERR(pfence))
120 return PTR_ERR(pfence);
121
122 list_move_tail(xe_preempt_fence_link(pfence), list);
123 }
124
125 return 0;
126 }
127
wait_for_existing_preempt_fences(struct xe_vm * vm)128 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
129 {
130 struct xe_exec_queue *q;
131
132 xe_vm_assert_held(vm);
133
134 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
135 if (q->lr.pfence) {
136 long timeout = dma_fence_wait(q->lr.pfence, false);
137
138 /* Only -ETIME on fence indicates VM needs to be killed */
139 if (timeout < 0 || q->lr.pfence->error == -ETIME)
140 return -ETIME;
141
142 dma_fence_put(q->lr.pfence);
143 q->lr.pfence = NULL;
144 }
145 }
146
147 return 0;
148 }
149
xe_vm_is_idle(struct xe_vm * vm)150 static bool xe_vm_is_idle(struct xe_vm *vm)
151 {
152 struct xe_exec_queue *q;
153
154 xe_vm_assert_held(vm);
155 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
156 if (!xe_exec_queue_is_idle(q))
157 return false;
158 }
159
160 return true;
161 }
162
arm_preempt_fences(struct xe_vm * vm,struct list_head * list)163 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
164 {
165 struct list_head *link;
166 struct xe_exec_queue *q;
167
168 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
169 struct dma_fence *fence;
170
171 link = list->next;
172 xe_assert(vm->xe, link != list);
173
174 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
175 q, q->lr.context,
176 ++q->lr.seqno);
177 dma_fence_put(q->lr.pfence);
178 q->lr.pfence = fence;
179 }
180 }
181
add_preempt_fences(struct xe_vm * vm,struct xe_bo * bo)182 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
183 {
184 struct xe_exec_queue *q;
185 int err;
186
187 xe_bo_assert_held(bo);
188
189 if (!vm->preempt.num_exec_queues)
190 return 0;
191
192 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
193 if (err)
194 return err;
195
196 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
197 if (q->lr.pfence) {
198 dma_resv_add_fence(bo->ttm.base.resv,
199 q->lr.pfence,
200 DMA_RESV_USAGE_BOOKKEEP);
201 }
202
203 return 0;
204 }
205
resume_and_reinstall_preempt_fences(struct xe_vm * vm,struct drm_exec * exec)206 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
207 struct drm_exec *exec)
208 {
209 struct xe_exec_queue *q;
210
211 lockdep_assert_held(&vm->lock);
212 xe_vm_assert_held(vm);
213
214 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
215 q->ops->resume(q);
216
217 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
218 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
219 }
220 }
221
xe_vm_add_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)222 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
223 {
224 struct drm_gpuvm_exec vm_exec = {
225 .vm = &vm->gpuvm,
226 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
227 .num_fences = 1,
228 };
229 struct drm_exec *exec = &vm_exec.exec;
230 struct dma_fence *pfence;
231 int err;
232 bool wait;
233
234 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
235
236 down_write(&vm->lock);
237 err = drm_gpuvm_exec_lock(&vm_exec);
238 if (err)
239 goto out_up_write;
240
241 pfence = xe_preempt_fence_create(q, q->lr.context,
242 ++q->lr.seqno);
243 if (!pfence) {
244 err = -ENOMEM;
245 goto out_fini;
246 }
247
248 list_add(&q->lr.link, &vm->preempt.exec_queues);
249 ++vm->preempt.num_exec_queues;
250 q->lr.pfence = pfence;
251
252 down_read(&vm->userptr.notifier_lock);
253
254 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
255 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
256
257 /*
258 * Check to see if a preemption on VM is in flight or userptr
259 * invalidation, if so trigger this preempt fence to sync state with
260 * other preempt fences on the VM.
261 */
262 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
263 if (wait)
264 dma_fence_enable_sw_signaling(pfence);
265
266 up_read(&vm->userptr.notifier_lock);
267
268 out_fini:
269 drm_exec_fini(exec);
270 out_up_write:
271 up_write(&vm->lock);
272
273 return err;
274 }
275 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO);
276
277 /**
278 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
279 * @vm: The VM.
280 * @q: The exec_queue
281 *
282 * Note that this function might be called multiple times on the same queue.
283 */
xe_vm_remove_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)284 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
285 {
286 if (!xe_vm_in_preempt_fence_mode(vm))
287 return;
288
289 down_write(&vm->lock);
290 if (!list_empty(&q->lr.link)) {
291 list_del_init(&q->lr.link);
292 --vm->preempt.num_exec_queues;
293 }
294 if (q->lr.pfence) {
295 dma_fence_enable_sw_signaling(q->lr.pfence);
296 dma_fence_put(q->lr.pfence);
297 q->lr.pfence = NULL;
298 }
299 up_write(&vm->lock);
300 }
301
302 /**
303 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
304 * that need repinning.
305 * @vm: The VM.
306 *
307 * This function checks for whether the VM has userptrs that need repinning,
308 * and provides a release-type barrier on the userptr.notifier_lock after
309 * checking.
310 *
311 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
312 */
__xe_vm_userptr_needs_repin(struct xe_vm * vm)313 int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
314 {
315 lockdep_assert_held_read(&vm->userptr.notifier_lock);
316
317 return (list_empty(&vm->userptr.repin_list) &&
318 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
319 }
320
321 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
322
323 /**
324 * xe_vm_kill() - VM Kill
325 * @vm: The VM.
326 * @unlocked: Flag indicates the VM's dma-resv is not held
327 *
328 * Kill the VM by setting banned flag indicated VM is no longer available for
329 * use. If in preempt fence mode, also kill all exec queue attached to the VM.
330 */
xe_vm_kill(struct xe_vm * vm,bool unlocked)331 void xe_vm_kill(struct xe_vm *vm, bool unlocked)
332 {
333 struct xe_exec_queue *q;
334
335 lockdep_assert_held(&vm->lock);
336
337 if (unlocked)
338 xe_vm_lock(vm, false);
339
340 vm->flags |= XE_VM_FLAG_BANNED;
341 trace_xe_vm_kill(vm);
342
343 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
344 q->ops->kill(q);
345
346 if (unlocked)
347 xe_vm_unlock(vm);
348
349 /* TODO: Inform user the VM is banned */
350 }
351
352 /**
353 * xe_vm_validate_should_retry() - Whether to retry after a validate error.
354 * @exec: The drm_exec object used for locking before validation.
355 * @err: The error returned from ttm_bo_validate().
356 * @end: A ktime_t cookie that should be set to 0 before first use and
357 * that should be reused on subsequent calls.
358 *
359 * With multiple active VMs, under memory pressure, it is possible that
360 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
361 * Until ttm properly handles locking in such scenarios, best thing the
362 * driver can do is retry with a timeout. Check if that is necessary, and
363 * if so unlock the drm_exec's objects while keeping the ticket to prepare
364 * for a rerun.
365 *
366 * Return: true if a retry after drm_exec_init() is recommended;
367 * false otherwise.
368 */
xe_vm_validate_should_retry(struct drm_exec * exec,int err,ktime_t * end)369 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
370 {
371 ktime_t cur;
372
373 if (err != -ENOMEM)
374 return false;
375
376 cur = ktime_get();
377 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
378 if (!ktime_before(cur, *end))
379 return false;
380
381 msleep(20);
382 return true;
383 }
384
xe_gpuvm_validate(struct drm_gpuvm_bo * vm_bo,struct drm_exec * exec)385 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
386 {
387 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
388 struct drm_gpuva *gpuva;
389 int ret;
390
391 lockdep_assert_held(&vm->lock);
392 drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
393 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
394 &vm->rebind_list);
395
396 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
397 if (ret)
398 return ret;
399
400 vm_bo->evicted = false;
401 return 0;
402 }
403
404 /**
405 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
406 * @vm: The vm for which we are rebinding.
407 * @exec: The struct drm_exec with the locked GEM objects.
408 * @num_fences: The number of fences to reserve for the operation, not
409 * including rebinds and validations.
410 *
411 * Validates all evicted gem objects and rebinds their vmas. Note that
412 * rebindings may cause evictions and hence the validation-rebind
413 * sequence is rerun until there are no more objects to validate.
414 *
415 * Return: 0 on success, negative error code on error. In particular,
416 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
417 * the drm_exec transaction needs to be restarted.
418 */
xe_vm_validate_rebind(struct xe_vm * vm,struct drm_exec * exec,unsigned int num_fences)419 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
420 unsigned int num_fences)
421 {
422 struct drm_gem_object *obj;
423 unsigned long index;
424 int ret;
425
426 do {
427 ret = drm_gpuvm_validate(&vm->gpuvm, exec);
428 if (ret)
429 return ret;
430
431 ret = xe_vm_rebind(vm, false);
432 if (ret)
433 return ret;
434 } while (!list_empty(&vm->gpuvm.evict.list));
435
436 drm_exec_for_each_locked_object(exec, index, obj) {
437 ret = dma_resv_reserve_fences(obj->resv, num_fences);
438 if (ret)
439 return ret;
440 }
441
442 return 0;
443 }
444
xe_preempt_work_begin(struct drm_exec * exec,struct xe_vm * vm,bool * done)445 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
446 bool *done)
447 {
448 int err;
449
450 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
451 if (err)
452 return err;
453
454 if (xe_vm_is_idle(vm)) {
455 vm->preempt.rebind_deactivated = true;
456 *done = true;
457 return 0;
458 }
459
460 if (!preempt_fences_waiting(vm)) {
461 *done = true;
462 return 0;
463 }
464
465 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
466 if (err)
467 return err;
468
469 err = wait_for_existing_preempt_fences(vm);
470 if (err)
471 return err;
472
473 /*
474 * Add validation and rebinding to the locking loop since both can
475 * cause evictions which may require blocing dma_resv locks.
476 * The fence reservation here is intended for the new preempt fences
477 * we attach at the end of the rebind work.
478 */
479 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
480 }
481
preempt_rebind_work_func(struct work_struct * w)482 static void preempt_rebind_work_func(struct work_struct *w)
483 {
484 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
485 struct drm_exec exec;
486 unsigned int fence_count = 0;
487 LIST_HEAD(preempt_fences);
488 ktime_t end = 0;
489 int err = 0;
490 long wait;
491 int __maybe_unused tries = 0;
492
493 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
494 trace_xe_vm_rebind_worker_enter(vm);
495
496 down_write(&vm->lock);
497
498 if (xe_vm_is_closed_or_banned(vm)) {
499 up_write(&vm->lock);
500 trace_xe_vm_rebind_worker_exit(vm);
501 return;
502 }
503
504 retry:
505 if (xe_vm_userptr_check_repin(vm)) {
506 err = xe_vm_userptr_pin(vm);
507 if (err)
508 goto out_unlock_outer;
509 }
510
511 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
512
513 drm_exec_until_all_locked(&exec) {
514 bool done = false;
515
516 err = xe_preempt_work_begin(&exec, vm, &done);
517 drm_exec_retry_on_contention(&exec);
518 if (err || done) {
519 drm_exec_fini(&exec);
520 if (err && xe_vm_validate_should_retry(&exec, err, &end))
521 err = -EAGAIN;
522
523 goto out_unlock_outer;
524 }
525 }
526
527 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
528 if (err)
529 goto out_unlock;
530
531 err = xe_vm_rebind(vm, true);
532 if (err)
533 goto out_unlock;
534
535 /* Wait on rebinds and munmap style VM unbinds */
536 wait = dma_resv_wait_timeout(xe_vm_resv(vm),
537 DMA_RESV_USAGE_KERNEL,
538 false, MAX_SCHEDULE_TIMEOUT);
539 if (wait <= 0) {
540 err = -ETIME;
541 goto out_unlock;
542 }
543
544 #define retry_required(__tries, __vm) \
545 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
546 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
547 __xe_vm_userptr_needs_repin(__vm))
548
549 down_read(&vm->userptr.notifier_lock);
550 if (retry_required(tries, vm)) {
551 up_read(&vm->userptr.notifier_lock);
552 err = -EAGAIN;
553 goto out_unlock;
554 }
555
556 #undef retry_required
557
558 spin_lock(&vm->xe->ttm.lru_lock);
559 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
560 spin_unlock(&vm->xe->ttm.lru_lock);
561
562 /* Point of no return. */
563 arm_preempt_fences(vm, &preempt_fences);
564 resume_and_reinstall_preempt_fences(vm, &exec);
565 up_read(&vm->userptr.notifier_lock);
566
567 out_unlock:
568 drm_exec_fini(&exec);
569 out_unlock_outer:
570 if (err == -EAGAIN) {
571 trace_xe_vm_rebind_worker_retry(vm);
572 goto retry;
573 }
574
575 if (err) {
576 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
577 xe_vm_kill(vm, true);
578 }
579 up_write(&vm->lock);
580
581 free_preempt_fences(&preempt_fences);
582
583 trace_xe_vm_rebind_worker_exit(vm);
584 }
585
__vma_userptr_invalidate(struct xe_vm * vm,struct xe_userptr_vma * uvma)586 static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma)
587 {
588 struct xe_userptr *userptr = &uvma->userptr;
589 struct xe_vma *vma = &uvma->vma;
590 struct dma_resv_iter cursor;
591 struct dma_fence *fence;
592 long err;
593
594 /*
595 * Tell exec and rebind worker they need to repin and rebind this
596 * userptr.
597 */
598 if (!xe_vm_in_fault_mode(vm) &&
599 !(vma->gpuva.flags & XE_VMA_DESTROYED)) {
600 spin_lock(&vm->userptr.invalidated_lock);
601 list_move_tail(&userptr->invalidate_link,
602 &vm->userptr.invalidated);
603 spin_unlock(&vm->userptr.invalidated_lock);
604 }
605
606 /*
607 * Preempt fences turn into schedule disables, pipeline these.
608 * Note that even in fault mode, we need to wait for binds and
609 * unbinds to complete, and those are attached as BOOKMARK fences
610 * to the vm.
611 */
612 dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
613 DMA_RESV_USAGE_BOOKKEEP);
614 dma_resv_for_each_fence_unlocked(&cursor, fence)
615 dma_fence_enable_sw_signaling(fence);
616 dma_resv_iter_end(&cursor);
617
618 err = dma_resv_wait_timeout(xe_vm_resv(vm),
619 DMA_RESV_USAGE_BOOKKEEP,
620 false, MAX_SCHEDULE_TIMEOUT);
621 XE_WARN_ON(err <= 0);
622
623 if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
624 err = xe_vm_invalidate_vma(vma);
625 XE_WARN_ON(err);
626 }
627
628 xe_hmm_userptr_unmap(uvma);
629 }
630
vma_userptr_invalidate(struct mmu_interval_notifier * mni,const struct mmu_notifier_range * range,unsigned long cur_seq)631 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
632 const struct mmu_notifier_range *range,
633 unsigned long cur_seq)
634 {
635 struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier);
636 struct xe_vma *vma = &uvma->vma;
637 struct xe_vm *vm = xe_vma_vm(vma);
638
639 xe_assert(vm->xe, xe_vma_is_userptr(vma));
640 trace_xe_vma_userptr_invalidate(vma);
641
642 if (!mmu_notifier_range_blockable(range))
643 return false;
644
645 vm_dbg(&xe_vma_vm(vma)->xe->drm,
646 "NOTIFIER: addr=0x%016llx, range=0x%016llx",
647 xe_vma_start(vma), xe_vma_size(vma));
648
649 down_write(&vm->userptr.notifier_lock);
650 mmu_interval_set_seq(mni, cur_seq);
651
652 __vma_userptr_invalidate(vm, uvma);
653 up_write(&vm->userptr.notifier_lock);
654 trace_xe_vma_userptr_invalidate_complete(vma);
655
656 return true;
657 }
658
659 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
660 .invalidate = vma_userptr_invalidate,
661 };
662
663 #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
664 /**
665 * xe_vma_userptr_force_invalidate() - force invalidate a userptr
666 * @uvma: The userptr vma to invalidate
667 *
668 * Perform a forced userptr invalidation for testing purposes.
669 */
xe_vma_userptr_force_invalidate(struct xe_userptr_vma * uvma)670 void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
671 {
672 struct xe_vm *vm = xe_vma_vm(&uvma->vma);
673
674 /* Protect against concurrent userptr pinning */
675 lockdep_assert_held(&vm->lock);
676 /* Protect against concurrent notifiers */
677 lockdep_assert_held(&vm->userptr.notifier_lock);
678 /*
679 * Protect against concurrent instances of this function and
680 * the critical exec sections
681 */
682 xe_vm_assert_held(vm);
683
684 if (!mmu_interval_read_retry(&uvma->userptr.notifier,
685 uvma->userptr.notifier_seq))
686 uvma->userptr.notifier_seq -= 2;
687 __vma_userptr_invalidate(vm, uvma);
688 }
689 #endif
690
xe_vm_userptr_pin(struct xe_vm * vm)691 int xe_vm_userptr_pin(struct xe_vm *vm)
692 {
693 struct xe_userptr_vma *uvma, *next;
694 int err = 0;
695
696 xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
697 lockdep_assert_held_write(&vm->lock);
698
699 /* Collect invalidated userptrs */
700 spin_lock(&vm->userptr.invalidated_lock);
701 xe_assert(vm->xe, list_empty(&vm->userptr.repin_list));
702 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
703 userptr.invalidate_link) {
704 list_del_init(&uvma->userptr.invalidate_link);
705 list_add_tail(&uvma->userptr.repin_link,
706 &vm->userptr.repin_list);
707 }
708 spin_unlock(&vm->userptr.invalidated_lock);
709
710 /* Pin and move to bind list */
711 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
712 userptr.repin_link) {
713 err = xe_vma_userptr_pin_pages(uvma);
714 if (err == -EFAULT) {
715 list_del_init(&uvma->userptr.repin_link);
716 /*
717 * We might have already done the pin once already, but
718 * then had to retry before the re-bind happened, due
719 * some other condition in the caller, but in the
720 * meantime the userptr got dinged by the notifier such
721 * that we need to revalidate here, but this time we hit
722 * the EFAULT. In such a case make sure we remove
723 * ourselves from the rebind list to avoid going down in
724 * flames.
725 */
726 if (!list_empty(&uvma->vma.combined_links.rebind))
727 list_del_init(&uvma->vma.combined_links.rebind);
728
729 /* Wait for pending binds */
730 xe_vm_lock(vm, false);
731 dma_resv_wait_timeout(xe_vm_resv(vm),
732 DMA_RESV_USAGE_BOOKKEEP,
733 false, MAX_SCHEDULE_TIMEOUT);
734
735 down_read(&vm->userptr.notifier_lock);
736 err = xe_vm_invalidate_vma(&uvma->vma);
737 up_read(&vm->userptr.notifier_lock);
738 xe_vm_unlock(vm);
739 if (err)
740 break;
741 } else {
742 if (err)
743 break;
744
745 list_del_init(&uvma->userptr.repin_link);
746 list_move_tail(&uvma->vma.combined_links.rebind,
747 &vm->rebind_list);
748 }
749 }
750
751 if (err) {
752 down_write(&vm->userptr.notifier_lock);
753 spin_lock(&vm->userptr.invalidated_lock);
754 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
755 userptr.repin_link) {
756 list_del_init(&uvma->userptr.repin_link);
757 list_move_tail(&uvma->userptr.invalidate_link,
758 &vm->userptr.invalidated);
759 }
760 spin_unlock(&vm->userptr.invalidated_lock);
761 up_write(&vm->userptr.notifier_lock);
762 }
763 return err;
764 }
765
766 /**
767 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
768 * that need repinning.
769 * @vm: The VM.
770 *
771 * This function does an advisory check for whether the VM has userptrs that
772 * need repinning.
773 *
774 * Return: 0 if there are no indications of userptrs needing repinning,
775 * -EAGAIN if there are.
776 */
xe_vm_userptr_check_repin(struct xe_vm * vm)777 int xe_vm_userptr_check_repin(struct xe_vm *vm)
778 {
779 return (list_empty_careful(&vm->userptr.repin_list) &&
780 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
781 }
782
xe_vma_ops_alloc(struct xe_vma_ops * vops,bool array_of_binds)783 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
784 {
785 int i;
786
787 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
788 if (!vops->pt_update_ops[i].num_ops)
789 continue;
790
791 vops->pt_update_ops[i].ops =
792 kmalloc_array(vops->pt_update_ops[i].num_ops,
793 sizeof(*vops->pt_update_ops[i].ops),
794 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
795 if (!vops->pt_update_ops[i].ops)
796 return array_of_binds ? -ENOBUFS : -ENOMEM;
797 }
798
799 return 0;
800 }
801 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
802
xe_vma_svm_prefetch_op_fini(struct xe_vma_op * op)803 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op)
804 {
805 struct xe_vma *vma;
806
807 vma = gpuva_to_vma(op->base.prefetch.va);
808
809 if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma))
810 xa_destroy(&op->prefetch_range.range);
811 }
812
xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops * vops)813 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops)
814 {
815 struct xe_vma_op *op;
816
817 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
818 return;
819
820 list_for_each_entry(op, &vops->list, link)
821 xe_vma_svm_prefetch_op_fini(op);
822 }
823
xe_vma_ops_fini(struct xe_vma_ops * vops)824 static void xe_vma_ops_fini(struct xe_vma_ops *vops)
825 {
826 int i;
827
828 xe_vma_svm_prefetch_ops_fini(vops);
829
830 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
831 kfree(vops->pt_update_ops[i].ops);
832 }
833
xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops * vops,u8 tile_mask,int inc_val)834 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val)
835 {
836 int i;
837
838 if (!inc_val)
839 return;
840
841 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
842 if (BIT(i) & tile_mask)
843 vops->pt_update_ops[i].num_ops += inc_val;
844 }
845
xe_vm_populate_rebind(struct xe_vma_op * op,struct xe_vma * vma,u8 tile_mask)846 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
847 u8 tile_mask)
848 {
849 INIT_LIST_HEAD(&op->link);
850 op->tile_mask = tile_mask;
851 op->base.op = DRM_GPUVA_OP_MAP;
852 op->base.map.va.addr = vma->gpuva.va.addr;
853 op->base.map.va.range = vma->gpuva.va.range;
854 op->base.map.gem.obj = vma->gpuva.gem.obj;
855 op->base.map.gem.offset = vma->gpuva.gem.offset;
856 op->map.vma = vma;
857 op->map.immediate = true;
858 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
859 op->map.is_null = xe_vma_is_null(vma);
860 }
861
xe_vm_ops_add_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,u8 tile_mask)862 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
863 u8 tile_mask)
864 {
865 struct xe_vma_op *op;
866
867 op = kzalloc(sizeof(*op), GFP_KERNEL);
868 if (!op)
869 return -ENOMEM;
870
871 xe_vm_populate_rebind(op, vma, tile_mask);
872 list_add_tail(&op->link, &vops->list);
873 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
874
875 return 0;
876 }
877
878 static struct dma_fence *ops_execute(struct xe_vm *vm,
879 struct xe_vma_ops *vops);
880 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
881 struct xe_exec_queue *q,
882 struct xe_sync_entry *syncs, u32 num_syncs);
883
xe_vm_rebind(struct xe_vm * vm,bool rebind_worker)884 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
885 {
886 struct dma_fence *fence;
887 struct xe_vma *vma, *next;
888 struct xe_vma_ops vops;
889 struct xe_vma_op *op, *next_op;
890 int err, i;
891
892 lockdep_assert_held(&vm->lock);
893 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
894 list_empty(&vm->rebind_list))
895 return 0;
896
897 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
898 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
899 vops.pt_update_ops[i].wait_vm_bookkeep = true;
900
901 xe_vm_assert_held(vm);
902 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
903 xe_assert(vm->xe, vma->tile_present);
904
905 if (rebind_worker)
906 trace_xe_vma_rebind_worker(vma);
907 else
908 trace_xe_vma_rebind_exec(vma);
909
910 err = xe_vm_ops_add_rebind(&vops, vma,
911 vma->tile_present);
912 if (err)
913 goto free_ops;
914 }
915
916 err = xe_vma_ops_alloc(&vops, false);
917 if (err)
918 goto free_ops;
919
920 fence = ops_execute(vm, &vops);
921 if (IS_ERR(fence)) {
922 err = PTR_ERR(fence);
923 } else {
924 dma_fence_put(fence);
925 list_for_each_entry_safe(vma, next, &vm->rebind_list,
926 combined_links.rebind)
927 list_del_init(&vma->combined_links.rebind);
928 }
929 free_ops:
930 list_for_each_entry_safe(op, next_op, &vops.list, link) {
931 list_del(&op->link);
932 kfree(op);
933 }
934 xe_vma_ops_fini(&vops);
935
936 return err;
937 }
938
xe_vma_rebind(struct xe_vm * vm,struct xe_vma * vma,u8 tile_mask)939 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
940 {
941 struct dma_fence *fence = NULL;
942 struct xe_vma_ops vops;
943 struct xe_vma_op *op, *next_op;
944 struct xe_tile *tile;
945 u8 id;
946 int err;
947
948 lockdep_assert_held(&vm->lock);
949 xe_vm_assert_held(vm);
950 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
951
952 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
953 for_each_tile(tile, vm->xe, id) {
954 vops.pt_update_ops[id].wait_vm_bookkeep = true;
955 vops.pt_update_ops[tile->id].q =
956 xe_tile_migrate_exec_queue(tile);
957 }
958
959 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
960 if (err)
961 return ERR_PTR(err);
962
963 err = xe_vma_ops_alloc(&vops, false);
964 if (err) {
965 fence = ERR_PTR(err);
966 goto free_ops;
967 }
968
969 fence = ops_execute(vm, &vops);
970
971 free_ops:
972 list_for_each_entry_safe(op, next_op, &vops.list, link) {
973 list_del(&op->link);
974 kfree(op);
975 }
976 xe_vma_ops_fini(&vops);
977
978 return fence;
979 }
980
xe_vm_populate_range_rebind(struct xe_vma_op * op,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)981 static void xe_vm_populate_range_rebind(struct xe_vma_op *op,
982 struct xe_vma *vma,
983 struct xe_svm_range *range,
984 u8 tile_mask)
985 {
986 INIT_LIST_HEAD(&op->link);
987 op->tile_mask = tile_mask;
988 op->base.op = DRM_GPUVA_OP_DRIVER;
989 op->subop = XE_VMA_SUBOP_MAP_RANGE;
990 op->map_range.vma = vma;
991 op->map_range.range = range;
992 }
993
994 static int
xe_vm_ops_add_range_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)995 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops,
996 struct xe_vma *vma,
997 struct xe_svm_range *range,
998 u8 tile_mask)
999 {
1000 struct xe_vma_op *op;
1001
1002 op = kzalloc(sizeof(*op), GFP_KERNEL);
1003 if (!op)
1004 return -ENOMEM;
1005
1006 xe_vm_populate_range_rebind(op, vma, range, tile_mask);
1007 list_add_tail(&op->link, &vops->list);
1008 xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
1009
1010 return 0;
1011 }
1012
1013 /**
1014 * xe_vm_range_rebind() - VM range (re)bind
1015 * @vm: The VM which the range belongs to.
1016 * @vma: The VMA which the range belongs to.
1017 * @range: SVM range to rebind.
1018 * @tile_mask: Tile mask to bind the range to.
1019 *
1020 * (re)bind SVM range setting up GPU page tables for the range.
1021 *
1022 * Return: dma fence for rebind to signal completion on succees, ERR_PTR on
1023 * failure
1024 */
xe_vm_range_rebind(struct xe_vm * vm,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)1025 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
1026 struct xe_vma *vma,
1027 struct xe_svm_range *range,
1028 u8 tile_mask)
1029 {
1030 struct dma_fence *fence = NULL;
1031 struct xe_vma_ops vops;
1032 struct xe_vma_op *op, *next_op;
1033 struct xe_tile *tile;
1034 u8 id;
1035 int err;
1036
1037 lockdep_assert_held(&vm->lock);
1038 xe_vm_assert_held(vm);
1039 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
1040 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
1041
1042 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
1043 for_each_tile(tile, vm->xe, id) {
1044 vops.pt_update_ops[id].wait_vm_bookkeep = true;
1045 vops.pt_update_ops[tile->id].q =
1046 xe_tile_migrate_exec_queue(tile);
1047 }
1048
1049 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask);
1050 if (err)
1051 return ERR_PTR(err);
1052
1053 err = xe_vma_ops_alloc(&vops, false);
1054 if (err) {
1055 fence = ERR_PTR(err);
1056 goto free_ops;
1057 }
1058
1059 fence = ops_execute(vm, &vops);
1060
1061 free_ops:
1062 list_for_each_entry_safe(op, next_op, &vops.list, link) {
1063 list_del(&op->link);
1064 kfree(op);
1065 }
1066 xe_vma_ops_fini(&vops);
1067
1068 return fence;
1069 }
1070
xe_vm_populate_range_unbind(struct xe_vma_op * op,struct xe_svm_range * range)1071 static void xe_vm_populate_range_unbind(struct xe_vma_op *op,
1072 struct xe_svm_range *range)
1073 {
1074 INIT_LIST_HEAD(&op->link);
1075 op->tile_mask = range->tile_present;
1076 op->base.op = DRM_GPUVA_OP_DRIVER;
1077 op->subop = XE_VMA_SUBOP_UNMAP_RANGE;
1078 op->unmap_range.range = range;
1079 }
1080
1081 static int
xe_vm_ops_add_range_unbind(struct xe_vma_ops * vops,struct xe_svm_range * range)1082 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops,
1083 struct xe_svm_range *range)
1084 {
1085 struct xe_vma_op *op;
1086
1087 op = kzalloc(sizeof(*op), GFP_KERNEL);
1088 if (!op)
1089 return -ENOMEM;
1090
1091 xe_vm_populate_range_unbind(op, range);
1092 list_add_tail(&op->link, &vops->list);
1093 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1);
1094
1095 return 0;
1096 }
1097
1098 /**
1099 * xe_vm_range_unbind() - VM range unbind
1100 * @vm: The VM which the range belongs to.
1101 * @range: SVM range to rebind.
1102 *
1103 * Unbind SVM range removing the GPU page tables for the range.
1104 *
1105 * Return: dma fence for unbind to signal completion on succees, ERR_PTR on
1106 * failure
1107 */
xe_vm_range_unbind(struct xe_vm * vm,struct xe_svm_range * range)1108 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
1109 struct xe_svm_range *range)
1110 {
1111 struct dma_fence *fence = NULL;
1112 struct xe_vma_ops vops;
1113 struct xe_vma_op *op, *next_op;
1114 struct xe_tile *tile;
1115 u8 id;
1116 int err;
1117
1118 lockdep_assert_held(&vm->lock);
1119 xe_vm_assert_held(vm);
1120 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
1121
1122 if (!range->tile_present)
1123 return dma_fence_get_stub();
1124
1125 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
1126 for_each_tile(tile, vm->xe, id) {
1127 vops.pt_update_ops[id].wait_vm_bookkeep = true;
1128 vops.pt_update_ops[tile->id].q =
1129 xe_tile_migrate_exec_queue(tile);
1130 }
1131
1132 err = xe_vm_ops_add_range_unbind(&vops, range);
1133 if (err)
1134 return ERR_PTR(err);
1135
1136 err = xe_vma_ops_alloc(&vops, false);
1137 if (err) {
1138 fence = ERR_PTR(err);
1139 goto free_ops;
1140 }
1141
1142 fence = ops_execute(vm, &vops);
1143
1144 free_ops:
1145 list_for_each_entry_safe(op, next_op, &vops.list, link) {
1146 list_del(&op->link);
1147 kfree(op);
1148 }
1149 xe_vma_ops_fini(&vops);
1150
1151 return fence;
1152 }
1153
xe_vma_free(struct xe_vma * vma)1154 static void xe_vma_free(struct xe_vma *vma)
1155 {
1156 if (xe_vma_is_userptr(vma))
1157 kfree(to_userptr_vma(vma));
1158 else
1159 kfree(vma);
1160 }
1161
1162 #define VMA_CREATE_FLAG_READ_ONLY BIT(0)
1163 #define VMA_CREATE_FLAG_IS_NULL BIT(1)
1164 #define VMA_CREATE_FLAG_DUMPABLE BIT(2)
1165 #define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3)
1166
xe_vma_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 start,u64 end,u16 pat_index,unsigned int flags)1167 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
1168 struct xe_bo *bo,
1169 u64 bo_offset_or_userptr,
1170 u64 start, u64 end,
1171 u16 pat_index, unsigned int flags)
1172 {
1173 struct xe_vma *vma;
1174 struct xe_tile *tile;
1175 u8 id;
1176 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
1177 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
1178 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
1179 bool is_cpu_addr_mirror =
1180 (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR);
1181
1182 xe_assert(vm->xe, start < end);
1183 xe_assert(vm->xe, end < vm->size);
1184
1185 /*
1186 * Allocate and ensure that the xe_vma_is_userptr() return
1187 * matches what was allocated.
1188 */
1189 if (!bo && !is_null && !is_cpu_addr_mirror) {
1190 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
1191
1192 if (!uvma)
1193 return ERR_PTR(-ENOMEM);
1194
1195 vma = &uvma->vma;
1196 } else {
1197 vma = kzalloc(sizeof(*vma), GFP_KERNEL);
1198 if (!vma)
1199 return ERR_PTR(-ENOMEM);
1200
1201 if (is_cpu_addr_mirror)
1202 vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR;
1203 if (is_null)
1204 vma->gpuva.flags |= DRM_GPUVA_SPARSE;
1205 if (bo)
1206 vma->gpuva.gem.obj = &bo->ttm.base;
1207 }
1208
1209 INIT_LIST_HEAD(&vma->combined_links.rebind);
1210
1211 INIT_LIST_HEAD(&vma->gpuva.gem.entry);
1212 vma->gpuva.vm = &vm->gpuvm;
1213 vma->gpuva.va.addr = start;
1214 vma->gpuva.va.range = end - start + 1;
1215 if (read_only)
1216 vma->gpuva.flags |= XE_VMA_READ_ONLY;
1217 if (dumpable)
1218 vma->gpuva.flags |= XE_VMA_DUMPABLE;
1219
1220 for_each_tile(tile, vm->xe, id)
1221 vma->tile_mask |= 0x1 << id;
1222
1223 if (vm->xe->info.has_atomic_enable_pte_bit)
1224 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
1225
1226 vma->pat_index = pat_index;
1227
1228 if (bo) {
1229 struct drm_gpuvm_bo *vm_bo;
1230
1231 xe_bo_assert_held(bo);
1232
1233 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
1234 if (IS_ERR(vm_bo)) {
1235 xe_vma_free(vma);
1236 return ERR_CAST(vm_bo);
1237 }
1238
1239 drm_gpuvm_bo_extobj_add(vm_bo);
1240 drm_gem_object_get(&bo->ttm.base);
1241 vma->gpuva.gem.offset = bo_offset_or_userptr;
1242 drm_gpuva_link(&vma->gpuva, vm_bo);
1243 drm_gpuvm_bo_put(vm_bo);
1244 } else /* userptr or null */ {
1245 if (!is_null && !is_cpu_addr_mirror) {
1246 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
1247 u64 size = end - start + 1;
1248 int err;
1249
1250 INIT_LIST_HEAD(&userptr->invalidate_link);
1251 INIT_LIST_HEAD(&userptr->repin_link);
1252 vma->gpuva.gem.offset = bo_offset_or_userptr;
1253 mutex_init(&userptr->unmap_mutex);
1254
1255 err = mmu_interval_notifier_insert(&userptr->notifier,
1256 current->mm,
1257 xe_vma_userptr(vma), size,
1258 &vma_userptr_notifier_ops);
1259 if (err) {
1260 xe_vma_free(vma);
1261 return ERR_PTR(err);
1262 }
1263
1264 userptr->notifier_seq = LONG_MAX;
1265 }
1266
1267 xe_vm_get(vm);
1268 }
1269
1270 return vma;
1271 }
1272
xe_vma_destroy_late(struct xe_vma * vma)1273 static void xe_vma_destroy_late(struct xe_vma *vma)
1274 {
1275 struct xe_vm *vm = xe_vma_vm(vma);
1276
1277 if (vma->ufence) {
1278 xe_sync_ufence_put(vma->ufence);
1279 vma->ufence = NULL;
1280 }
1281
1282 if (xe_vma_is_userptr(vma)) {
1283 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1284 struct xe_userptr *userptr = &uvma->userptr;
1285
1286 if (userptr->sg)
1287 xe_hmm_userptr_free_sg(uvma);
1288
1289 /*
1290 * Since userptr pages are not pinned, we can't remove
1291 * the notifier until we're sure the GPU is not accessing
1292 * them anymore
1293 */
1294 mmu_interval_notifier_remove(&userptr->notifier);
1295 mutex_destroy(&userptr->unmap_mutex);
1296 xe_vm_put(vm);
1297 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) {
1298 xe_vm_put(vm);
1299 } else {
1300 xe_bo_put(xe_vma_bo(vma));
1301 }
1302
1303 xe_vma_free(vma);
1304 }
1305
vma_destroy_work_func(struct work_struct * w)1306 static void vma_destroy_work_func(struct work_struct *w)
1307 {
1308 struct xe_vma *vma =
1309 container_of(w, struct xe_vma, destroy_work);
1310
1311 xe_vma_destroy_late(vma);
1312 }
1313
vma_destroy_cb(struct dma_fence * fence,struct dma_fence_cb * cb)1314 static void vma_destroy_cb(struct dma_fence *fence,
1315 struct dma_fence_cb *cb)
1316 {
1317 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1318
1319 INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1320 queue_work(system_unbound_wq, &vma->destroy_work);
1321 }
1322
xe_vma_destroy(struct xe_vma * vma,struct dma_fence * fence)1323 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1324 {
1325 struct xe_vm *vm = xe_vma_vm(vma);
1326
1327 lockdep_assert_held_write(&vm->lock);
1328 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1329
1330 if (xe_vma_is_userptr(vma)) {
1331 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1332
1333 spin_lock(&vm->userptr.invalidated_lock);
1334 xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link));
1335 list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
1336 spin_unlock(&vm->userptr.invalidated_lock);
1337 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) {
1338 xe_bo_assert_held(xe_vma_bo(vma));
1339
1340 drm_gpuva_unlink(&vma->gpuva);
1341 }
1342
1343 xe_vm_assert_held(vm);
1344 if (fence) {
1345 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1346 vma_destroy_cb);
1347
1348 if (ret) {
1349 XE_WARN_ON(ret != -ENOENT);
1350 xe_vma_destroy_late(vma);
1351 }
1352 } else {
1353 xe_vma_destroy_late(vma);
1354 }
1355 }
1356
1357 /**
1358 * xe_vm_lock_vma() - drm_exec utility to lock a vma
1359 * @exec: The drm_exec object we're currently locking for.
1360 * @vma: The vma for witch we want to lock the vm resv and any attached
1361 * object's resv.
1362 *
1363 * Return: 0 on success, negative error code on error. In particular
1364 * may return -EDEADLK on WW transaction contention and -EINTR if
1365 * an interruptible wait is terminated by a signal.
1366 */
xe_vm_lock_vma(struct drm_exec * exec,struct xe_vma * vma)1367 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
1368 {
1369 struct xe_vm *vm = xe_vma_vm(vma);
1370 struct xe_bo *bo = xe_vma_bo(vma);
1371 int err;
1372
1373 XE_WARN_ON(!vm);
1374
1375 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1376 if (!err && bo && !bo->vm)
1377 err = drm_exec_lock_obj(exec, &bo->ttm.base);
1378
1379 return err;
1380 }
1381
xe_vma_destroy_unlocked(struct xe_vma * vma)1382 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1383 {
1384 struct drm_exec exec;
1385 int err;
1386
1387 drm_exec_init(&exec, 0, 0);
1388 drm_exec_until_all_locked(&exec) {
1389 err = xe_vm_lock_vma(&exec, vma);
1390 drm_exec_retry_on_contention(&exec);
1391 if (XE_WARN_ON(err))
1392 break;
1393 }
1394
1395 xe_vma_destroy(vma, NULL);
1396
1397 drm_exec_fini(&exec);
1398 }
1399
1400 struct xe_vma *
xe_vm_find_overlapping_vma(struct xe_vm * vm,u64 start,u64 range)1401 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1402 {
1403 struct drm_gpuva *gpuva;
1404
1405 lockdep_assert_held(&vm->lock);
1406
1407 if (xe_vm_is_closed_or_banned(vm))
1408 return NULL;
1409
1410 xe_assert(vm->xe, start + range <= vm->size);
1411
1412 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1413
1414 return gpuva ? gpuva_to_vma(gpuva) : NULL;
1415 }
1416
xe_vm_insert_vma(struct xe_vm * vm,struct xe_vma * vma)1417 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1418 {
1419 int err;
1420
1421 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1422 lockdep_assert_held(&vm->lock);
1423
1424 mutex_lock(&vm->snap_mutex);
1425 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1426 mutex_unlock(&vm->snap_mutex);
1427 XE_WARN_ON(err); /* Shouldn't be possible */
1428
1429 return err;
1430 }
1431
xe_vm_remove_vma(struct xe_vm * vm,struct xe_vma * vma)1432 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1433 {
1434 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1435 lockdep_assert_held(&vm->lock);
1436
1437 mutex_lock(&vm->snap_mutex);
1438 drm_gpuva_remove(&vma->gpuva);
1439 mutex_unlock(&vm->snap_mutex);
1440 if (vm->usm.last_fault_vma == vma)
1441 vm->usm.last_fault_vma = NULL;
1442 }
1443
xe_vm_op_alloc(void)1444 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1445 {
1446 struct xe_vma_op *op;
1447
1448 op = kzalloc(sizeof(*op), GFP_KERNEL);
1449
1450 if (unlikely(!op))
1451 return NULL;
1452
1453 return &op->base;
1454 }
1455
1456 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1457
1458 static const struct drm_gpuvm_ops gpuvm_ops = {
1459 .op_alloc = xe_vm_op_alloc,
1460 .vm_bo_validate = xe_gpuvm_validate,
1461 .vm_free = xe_vm_free,
1462 };
1463
pde_encode_pat_index(u16 pat_index)1464 static u64 pde_encode_pat_index(u16 pat_index)
1465 {
1466 u64 pte = 0;
1467
1468 if (pat_index & BIT(0))
1469 pte |= XE_PPGTT_PTE_PAT0;
1470
1471 if (pat_index & BIT(1))
1472 pte |= XE_PPGTT_PTE_PAT1;
1473
1474 return pte;
1475 }
1476
pte_encode_pat_index(u16 pat_index,u32 pt_level)1477 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
1478 {
1479 u64 pte = 0;
1480
1481 if (pat_index & BIT(0))
1482 pte |= XE_PPGTT_PTE_PAT0;
1483
1484 if (pat_index & BIT(1))
1485 pte |= XE_PPGTT_PTE_PAT1;
1486
1487 if (pat_index & BIT(2)) {
1488 if (pt_level)
1489 pte |= XE_PPGTT_PDE_PDPE_PAT2;
1490 else
1491 pte |= XE_PPGTT_PTE_PAT2;
1492 }
1493
1494 if (pat_index & BIT(3))
1495 pte |= XELPG_PPGTT_PTE_PAT3;
1496
1497 if (pat_index & (BIT(4)))
1498 pte |= XE2_PPGTT_PTE_PAT4;
1499
1500 return pte;
1501 }
1502
pte_encode_ps(u32 pt_level)1503 static u64 pte_encode_ps(u32 pt_level)
1504 {
1505 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1506
1507 if (pt_level == 1)
1508 return XE_PDE_PS_2M;
1509 else if (pt_level == 2)
1510 return XE_PDPE_PS_1G;
1511
1512 return 0;
1513 }
1514
xelp_pde_encode_bo(struct xe_bo * bo,u64 bo_offset,const u16 pat_index)1515 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
1516 const u16 pat_index)
1517 {
1518 u64 pde;
1519
1520 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1521 pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1522 pde |= pde_encode_pat_index(pat_index);
1523
1524 return pde;
1525 }
1526
xelp_pte_encode_bo(struct xe_bo * bo,u64 bo_offset,u16 pat_index,u32 pt_level)1527 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1528 u16 pat_index, u32 pt_level)
1529 {
1530 u64 pte;
1531
1532 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1533 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1534 pte |= pte_encode_pat_index(pat_index, pt_level);
1535 pte |= pte_encode_ps(pt_level);
1536
1537 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1538 pte |= XE_PPGTT_PTE_DM;
1539
1540 return pte;
1541 }
1542
xelp_pte_encode_vma(u64 pte,struct xe_vma * vma,u16 pat_index,u32 pt_level)1543 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1544 u16 pat_index, u32 pt_level)
1545 {
1546 pte |= XE_PAGE_PRESENT;
1547
1548 if (likely(!xe_vma_read_only(vma)))
1549 pte |= XE_PAGE_RW;
1550
1551 pte |= pte_encode_pat_index(pat_index, pt_level);
1552 pte |= pte_encode_ps(pt_level);
1553
1554 if (unlikely(xe_vma_is_null(vma)))
1555 pte |= XE_PTE_NULL;
1556
1557 return pte;
1558 }
1559
xelp_pte_encode_addr(struct xe_device * xe,u64 addr,u16 pat_index,u32 pt_level,bool devmem,u64 flags)1560 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1561 u16 pat_index,
1562 u32 pt_level, bool devmem, u64 flags)
1563 {
1564 u64 pte;
1565
1566 /* Avoid passing random bits directly as flags */
1567 xe_assert(xe, !(flags & ~XE_PTE_PS64));
1568
1569 pte = addr;
1570 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1571 pte |= pte_encode_pat_index(pat_index, pt_level);
1572 pte |= pte_encode_ps(pt_level);
1573
1574 if (devmem)
1575 pte |= XE_PPGTT_PTE_DM;
1576
1577 pte |= flags;
1578
1579 return pte;
1580 }
1581
1582 static const struct xe_pt_ops xelp_pt_ops = {
1583 .pte_encode_bo = xelp_pte_encode_bo,
1584 .pte_encode_vma = xelp_pte_encode_vma,
1585 .pte_encode_addr = xelp_pte_encode_addr,
1586 .pde_encode_bo = xelp_pde_encode_bo,
1587 };
1588
1589 static void vm_destroy_work_func(struct work_struct *w);
1590
1591 /**
1592 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1593 * given tile and vm.
1594 * @xe: xe device.
1595 * @tile: tile to set up for.
1596 * @vm: vm to set up for.
1597 *
1598 * Sets up a pagetable tree with one page-table per level and a single
1599 * leaf PTE. All pagetable entries point to the single page-table or,
1600 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1601 * writes become NOPs.
1602 *
1603 * Return: 0 on success, negative error code on error.
1604 */
xe_vm_create_scratch(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm)1605 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1606 struct xe_vm *vm)
1607 {
1608 u8 id = tile->id;
1609 int i;
1610
1611 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1612 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
1613 if (IS_ERR(vm->scratch_pt[id][i])) {
1614 int err = PTR_ERR(vm->scratch_pt[id][i]);
1615
1616 vm->scratch_pt[id][i] = NULL;
1617 return err;
1618 }
1619
1620 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1621 }
1622
1623 return 0;
1624 }
1625 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO);
1626
xe_vm_free_scratch(struct xe_vm * vm)1627 static void xe_vm_free_scratch(struct xe_vm *vm)
1628 {
1629 struct xe_tile *tile;
1630 u8 id;
1631
1632 if (!xe_vm_has_scratch(vm))
1633 return;
1634
1635 for_each_tile(tile, vm->xe, id) {
1636 u32 i;
1637
1638 if (!vm->pt_root[id])
1639 continue;
1640
1641 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1642 if (vm->scratch_pt[id][i])
1643 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1644 }
1645 }
1646
xe_vm_create(struct xe_device * xe,u32 flags,struct xe_file * xef)1647 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
1648 {
1649 struct drm_gem_object *vm_resv_obj;
1650 struct xe_vm *vm;
1651 int err, number_tiles = 0;
1652 struct xe_tile *tile;
1653 u8 id;
1654
1655 /*
1656 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to
1657 * ever be in faulting mode.
1658 */
1659 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE)));
1660
1661 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1662 if (!vm)
1663 return ERR_PTR(-ENOMEM);
1664
1665 vm->xe = xe;
1666
1667 vm->size = 1ull << xe->info.va_bits;
1668 vm->flags = flags;
1669
1670 if (xef)
1671 vm->xef = xe_file_get(xef);
1672 /**
1673 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be
1674 * manipulated under the PXP mutex. However, the PXP mutex can be taken
1675 * under a user-VM lock when the PXP session is started at exec_queue
1676 * creation time. Those are different VMs and therefore there is no risk
1677 * of deadlock, but we need to tell lockdep that this is the case or it
1678 * will print a warning.
1679 */
1680 if (flags & XE_VM_FLAG_GSC) {
1681 static struct lock_class_key gsc_vm_key;
1682
1683 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key);
1684 } else {
1685 init_rwsem(&vm->lock);
1686 }
1687 mutex_init(&vm->snap_mutex);
1688
1689 INIT_LIST_HEAD(&vm->rebind_list);
1690
1691 INIT_LIST_HEAD(&vm->userptr.repin_list);
1692 INIT_LIST_HEAD(&vm->userptr.invalidated);
1693 init_rwsem(&vm->userptr.notifier_lock);
1694 spin_lock_init(&vm->userptr.invalidated_lock);
1695
1696 ttm_lru_bulk_move_init(&vm->lru_bulk_move);
1697
1698 INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1699
1700 INIT_LIST_HEAD(&vm->preempt.exec_queues);
1701 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */
1702
1703 for_each_tile(tile, xe, id)
1704 xe_range_fence_tree_init(&vm->rftree[id]);
1705
1706 vm->pt_ops = &xelp_pt_ops;
1707
1708 /*
1709 * Long-running workloads are not protected by the scheduler references.
1710 * By design, run_job for long-running workloads returns NULL and the
1711 * scheduler drops all the references of it, hence protecting the VM
1712 * for this case is necessary.
1713 */
1714 if (flags & XE_VM_FLAG_LR_MODE) {
1715 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1716 xe_pm_runtime_get_noresume(xe);
1717 }
1718
1719 if (flags & XE_VM_FLAG_FAULT_MODE) {
1720 err = xe_svm_init(vm);
1721 if (err)
1722 goto err_no_resv;
1723 }
1724
1725 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1726 if (!vm_resv_obj) {
1727 err = -ENOMEM;
1728 goto err_svm_fini;
1729 }
1730
1731 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1732 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1733
1734 drm_gem_object_put(vm_resv_obj);
1735
1736 err = xe_vm_lock(vm, true);
1737 if (err)
1738 goto err_close;
1739
1740 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1741 vm->flags |= XE_VM_FLAG_64K;
1742
1743 for_each_tile(tile, xe, id) {
1744 if (flags & XE_VM_FLAG_MIGRATION &&
1745 tile->id != XE_VM_FLAG_TILE_ID(flags))
1746 continue;
1747
1748 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
1749 if (IS_ERR(vm->pt_root[id])) {
1750 err = PTR_ERR(vm->pt_root[id]);
1751 vm->pt_root[id] = NULL;
1752 goto err_unlock_close;
1753 }
1754 }
1755
1756 if (xe_vm_has_scratch(vm)) {
1757 for_each_tile(tile, xe, id) {
1758 if (!vm->pt_root[id])
1759 continue;
1760
1761 err = xe_vm_create_scratch(xe, tile, vm);
1762 if (err)
1763 goto err_unlock_close;
1764 }
1765 vm->batch_invalidate_tlb = true;
1766 }
1767
1768 if (vm->flags & XE_VM_FLAG_LR_MODE)
1769 vm->batch_invalidate_tlb = false;
1770
1771 /* Fill pt_root after allocating scratch tables */
1772 for_each_tile(tile, xe, id) {
1773 if (!vm->pt_root[id])
1774 continue;
1775
1776 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1777 }
1778 xe_vm_unlock(vm);
1779
1780 /* Kernel migration VM shouldn't have a circular loop.. */
1781 if (!(flags & XE_VM_FLAG_MIGRATION)) {
1782 for_each_tile(tile, xe, id) {
1783 struct xe_exec_queue *q;
1784 u32 create_flags = EXEC_QUEUE_FLAG_VM;
1785
1786 if (!vm->pt_root[id])
1787 continue;
1788
1789 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0);
1790 if (IS_ERR(q)) {
1791 err = PTR_ERR(q);
1792 goto err_close;
1793 }
1794 vm->q[id] = q;
1795 number_tiles++;
1796 }
1797 }
1798
1799 if (number_tiles > 1)
1800 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1801
1802 if (xef && xe->info.has_asid) {
1803 u32 asid;
1804
1805 down_write(&xe->usm.lock);
1806 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1807 XA_LIMIT(1, XE_MAX_ASID - 1),
1808 &xe->usm.next_asid, GFP_KERNEL);
1809 up_write(&xe->usm.lock);
1810 if (err < 0)
1811 goto err_unlock_close;
1812
1813 vm->usm.asid = asid;
1814 }
1815
1816 trace_xe_vm_create(vm);
1817
1818 return vm;
1819
1820 err_unlock_close:
1821 xe_vm_unlock(vm);
1822 err_close:
1823 xe_vm_close_and_put(vm);
1824 return ERR_PTR(err);
1825
1826 err_svm_fini:
1827 if (flags & XE_VM_FLAG_FAULT_MODE) {
1828 vm->size = 0; /* close the vm */
1829 xe_svm_fini(vm);
1830 }
1831 err_no_resv:
1832 mutex_destroy(&vm->snap_mutex);
1833 for_each_tile(tile, xe, id)
1834 xe_range_fence_tree_fini(&vm->rftree[id]);
1835 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1836 if (vm->xef)
1837 xe_file_put(vm->xef);
1838 kfree(vm);
1839 if (flags & XE_VM_FLAG_LR_MODE)
1840 xe_pm_runtime_put(xe);
1841 return ERR_PTR(err);
1842 }
1843
xe_vm_close(struct xe_vm * vm)1844 static void xe_vm_close(struct xe_vm *vm)
1845 {
1846 struct xe_device *xe = vm->xe;
1847 bool bound;
1848 int idx;
1849
1850 bound = drm_dev_enter(&xe->drm, &idx);
1851
1852 down_write(&vm->lock);
1853 if (xe_vm_in_fault_mode(vm))
1854 xe_svm_notifier_lock(vm);
1855
1856 vm->size = 0;
1857
1858 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) {
1859 struct xe_tile *tile;
1860 struct xe_gt *gt;
1861 u8 id;
1862
1863 /* Wait for pending binds */
1864 dma_resv_wait_timeout(xe_vm_resv(vm),
1865 DMA_RESV_USAGE_BOOKKEEP,
1866 false, MAX_SCHEDULE_TIMEOUT);
1867
1868 if (bound) {
1869 for_each_tile(tile, xe, id)
1870 if (vm->pt_root[id])
1871 xe_pt_clear(xe, vm->pt_root[id]);
1872
1873 for_each_gt(gt, xe, id)
1874 xe_gt_tlb_invalidation_vm(gt, vm);
1875 }
1876 }
1877
1878 if (xe_vm_in_fault_mode(vm))
1879 xe_svm_notifier_unlock(vm);
1880 up_write(&vm->lock);
1881
1882 if (bound)
1883 drm_dev_exit(idx);
1884 }
1885
xe_vm_close_and_put(struct xe_vm * vm)1886 void xe_vm_close_and_put(struct xe_vm *vm)
1887 {
1888 LIST_HEAD(contested);
1889 struct xe_device *xe = vm->xe;
1890 struct xe_tile *tile;
1891 struct xe_vma *vma, *next_vma;
1892 struct drm_gpuva *gpuva, *next;
1893 u8 id;
1894
1895 xe_assert(xe, !vm->preempt.num_exec_queues);
1896
1897 xe_vm_close(vm);
1898 if (xe_vm_in_preempt_fence_mode(vm))
1899 flush_work(&vm->preempt.rebind_work);
1900 if (xe_vm_in_fault_mode(vm))
1901 xe_svm_close(vm);
1902
1903 down_write(&vm->lock);
1904 for_each_tile(tile, xe, id) {
1905 if (vm->q[id])
1906 xe_exec_queue_last_fence_put(vm->q[id], vm);
1907 }
1908 up_write(&vm->lock);
1909
1910 for_each_tile(tile, xe, id) {
1911 if (vm->q[id]) {
1912 xe_exec_queue_kill(vm->q[id]);
1913 xe_exec_queue_put(vm->q[id]);
1914 vm->q[id] = NULL;
1915 }
1916 }
1917
1918 down_write(&vm->lock);
1919 xe_vm_lock(vm, false);
1920 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1921 vma = gpuva_to_vma(gpuva);
1922
1923 if (xe_vma_has_no_bo(vma)) {
1924 down_read(&vm->userptr.notifier_lock);
1925 vma->gpuva.flags |= XE_VMA_DESTROYED;
1926 up_read(&vm->userptr.notifier_lock);
1927 }
1928
1929 xe_vm_remove_vma(vm, vma);
1930
1931 /* easy case, remove from VMA? */
1932 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1933 list_del_init(&vma->combined_links.rebind);
1934 xe_vma_destroy(vma, NULL);
1935 continue;
1936 }
1937
1938 list_move_tail(&vma->combined_links.destroy, &contested);
1939 vma->gpuva.flags |= XE_VMA_DESTROYED;
1940 }
1941
1942 /*
1943 * All vm operations will add shared fences to resv.
1944 * The only exception is eviction for a shared object,
1945 * but even so, the unbind when evicted would still
1946 * install a fence to resv. Hence it's safe to
1947 * destroy the pagetables immediately.
1948 */
1949 xe_vm_free_scratch(vm);
1950
1951 for_each_tile(tile, xe, id) {
1952 if (vm->pt_root[id]) {
1953 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1954 vm->pt_root[id] = NULL;
1955 }
1956 }
1957 xe_vm_unlock(vm);
1958
1959 /*
1960 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1961 * Since we hold a refcount to the bo, we can remove and free
1962 * the members safely without locking.
1963 */
1964 list_for_each_entry_safe(vma, next_vma, &contested,
1965 combined_links.destroy) {
1966 list_del_init(&vma->combined_links.destroy);
1967 xe_vma_destroy_unlocked(vma);
1968 }
1969
1970 if (xe_vm_in_fault_mode(vm))
1971 xe_svm_fini(vm);
1972
1973 up_write(&vm->lock);
1974
1975 down_write(&xe->usm.lock);
1976 if (vm->usm.asid) {
1977 void *lookup;
1978
1979 xe_assert(xe, xe->info.has_asid);
1980 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
1981
1982 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1983 xe_assert(xe, lookup == vm);
1984 }
1985 up_write(&xe->usm.lock);
1986
1987 for_each_tile(tile, xe, id)
1988 xe_range_fence_tree_fini(&vm->rftree[id]);
1989
1990 xe_vm_put(vm);
1991 }
1992
vm_destroy_work_func(struct work_struct * w)1993 static void vm_destroy_work_func(struct work_struct *w)
1994 {
1995 struct xe_vm *vm =
1996 container_of(w, struct xe_vm, destroy_work);
1997 struct xe_device *xe = vm->xe;
1998 struct xe_tile *tile;
1999 u8 id;
2000
2001 /* xe_vm_close_and_put was not called? */
2002 xe_assert(xe, !vm->size);
2003
2004 if (xe_vm_in_preempt_fence_mode(vm))
2005 flush_work(&vm->preempt.rebind_work);
2006
2007 mutex_destroy(&vm->snap_mutex);
2008
2009 if (vm->flags & XE_VM_FLAG_LR_MODE)
2010 xe_pm_runtime_put(xe);
2011
2012 for_each_tile(tile, xe, id)
2013 XE_WARN_ON(vm->pt_root[id]);
2014
2015 trace_xe_vm_free(vm);
2016
2017 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
2018
2019 if (vm->xef)
2020 xe_file_put(vm->xef);
2021
2022 kfree(vm);
2023 }
2024
xe_vm_free(struct drm_gpuvm * gpuvm)2025 static void xe_vm_free(struct drm_gpuvm *gpuvm)
2026 {
2027 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
2028
2029 /* To destroy the VM we need to be able to sleep */
2030 queue_work(system_unbound_wq, &vm->destroy_work);
2031 }
2032
xe_vm_lookup(struct xe_file * xef,u32 id)2033 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
2034 {
2035 struct xe_vm *vm;
2036
2037 mutex_lock(&xef->vm.lock);
2038 vm = xa_load(&xef->vm.xa, id);
2039 if (vm)
2040 xe_vm_get(vm);
2041 mutex_unlock(&xef->vm.lock);
2042
2043 return vm;
2044 }
2045
xe_vm_pdp4_descriptor(struct xe_vm * vm,struct xe_tile * tile)2046 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
2047 {
2048 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
2049 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
2050 }
2051
2052 static struct xe_exec_queue *
to_wait_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)2053 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
2054 {
2055 return q ? q : vm->q[0];
2056 }
2057
2058 static struct xe_user_fence *
find_ufence_get(struct xe_sync_entry * syncs,u32 num_syncs)2059 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
2060 {
2061 unsigned int i;
2062
2063 for (i = 0; i < num_syncs; i++) {
2064 struct xe_sync_entry *e = &syncs[i];
2065
2066 if (xe_sync_is_ufence(e))
2067 return xe_sync_ufence_get(e);
2068 }
2069
2070 return NULL;
2071 }
2072
2073 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
2074 DRM_XE_VM_CREATE_FLAG_LR_MODE | \
2075 DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
2076
xe_vm_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2077 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
2078 struct drm_file *file)
2079 {
2080 struct xe_device *xe = to_xe_device(dev);
2081 struct xe_file *xef = to_xe_file(file);
2082 struct drm_xe_vm_create *args = data;
2083 struct xe_vm *vm;
2084 u32 id;
2085 int err;
2086 u32 flags = 0;
2087
2088 if (XE_IOCTL_DBG(xe, args->extensions))
2089 return -EINVAL;
2090
2091 if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
2092 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
2093
2094 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
2095 !xe->info.has_usm))
2096 return -EINVAL;
2097
2098 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2099 return -EINVAL;
2100
2101 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
2102 return -EINVAL;
2103
2104 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
2105 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
2106 !xe->info.needs_scratch))
2107 return -EINVAL;
2108
2109 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
2110 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
2111 return -EINVAL;
2112
2113 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
2114 flags |= XE_VM_FLAG_SCRATCH_PAGE;
2115 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
2116 flags |= XE_VM_FLAG_LR_MODE;
2117 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
2118 flags |= XE_VM_FLAG_FAULT_MODE;
2119
2120 vm = xe_vm_create(xe, flags, xef);
2121 if (IS_ERR(vm))
2122 return PTR_ERR(vm);
2123
2124 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
2125 /* Warning: Security issue - never enable by default */
2126 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
2127 #endif
2128
2129 /* user id alloc must always be last in ioctl to prevent UAF */
2130 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
2131 if (err)
2132 goto err_close_and_put;
2133
2134 args->vm_id = id;
2135
2136 return 0;
2137
2138 err_close_and_put:
2139 xe_vm_close_and_put(vm);
2140
2141 return err;
2142 }
2143
xe_vm_destroy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2144 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
2145 struct drm_file *file)
2146 {
2147 struct xe_device *xe = to_xe_device(dev);
2148 struct xe_file *xef = to_xe_file(file);
2149 struct drm_xe_vm_destroy *args = data;
2150 struct xe_vm *vm;
2151 int err = 0;
2152
2153 if (XE_IOCTL_DBG(xe, args->pad) ||
2154 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2155 return -EINVAL;
2156
2157 mutex_lock(&xef->vm.lock);
2158 vm = xa_load(&xef->vm.xa, args->vm_id);
2159 if (XE_IOCTL_DBG(xe, !vm))
2160 err = -ENOENT;
2161 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
2162 err = -EBUSY;
2163 else
2164 xa_erase(&xef->vm.xa, args->vm_id);
2165 mutex_unlock(&xef->vm.lock);
2166
2167 if (!err)
2168 xe_vm_close_and_put(vm);
2169
2170 return err;
2171 }
2172
vma_matches(struct xe_vma * vma,u64 page_addr)2173 static bool vma_matches(struct xe_vma *vma, u64 page_addr)
2174 {
2175 if (page_addr > xe_vma_end(vma) - 1 ||
2176 page_addr + SZ_4K - 1 < xe_vma_start(vma))
2177 return false;
2178
2179 return true;
2180 }
2181
2182 /**
2183 * xe_vm_find_vma_by_addr() - Find a VMA by its address
2184 *
2185 * @vm: the xe_vm the vma belongs to
2186 * @page_addr: address to look up
2187 */
xe_vm_find_vma_by_addr(struct xe_vm * vm,u64 page_addr)2188 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr)
2189 {
2190 struct xe_vma *vma = NULL;
2191
2192 if (vm->usm.last_fault_vma) { /* Fast lookup */
2193 if (vma_matches(vm->usm.last_fault_vma, page_addr))
2194 vma = vm->usm.last_fault_vma;
2195 }
2196 if (!vma)
2197 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);
2198
2199 return vma;
2200 }
2201
2202 static const u32 region_to_mem_type[] = {
2203 XE_PL_TT,
2204 XE_PL_VRAM0,
2205 XE_PL_VRAM1,
2206 };
2207
prep_vma_destroy(struct xe_vm * vm,struct xe_vma * vma,bool post_commit)2208 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
2209 bool post_commit)
2210 {
2211 down_read(&vm->userptr.notifier_lock);
2212 vma->gpuva.flags |= XE_VMA_DESTROYED;
2213 up_read(&vm->userptr.notifier_lock);
2214 if (post_commit)
2215 xe_vm_remove_vma(vm, vma);
2216 }
2217
2218 #undef ULL
2219 #define ULL unsigned long long
2220
2221 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2222 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2223 {
2224 struct xe_vma *vma;
2225
2226 switch (op->op) {
2227 case DRM_GPUVA_OP_MAP:
2228 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2229 (ULL)op->map.va.addr, (ULL)op->map.va.range);
2230 break;
2231 case DRM_GPUVA_OP_REMAP:
2232 vma = gpuva_to_vma(op->remap.unmap->va);
2233 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2234 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2235 op->remap.unmap->keep ? 1 : 0);
2236 if (op->remap.prev)
2237 vm_dbg(&xe->drm,
2238 "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2239 (ULL)op->remap.prev->va.addr,
2240 (ULL)op->remap.prev->va.range);
2241 if (op->remap.next)
2242 vm_dbg(&xe->drm,
2243 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2244 (ULL)op->remap.next->va.addr,
2245 (ULL)op->remap.next->va.range);
2246 break;
2247 case DRM_GPUVA_OP_UNMAP:
2248 vma = gpuva_to_vma(op->unmap.va);
2249 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2250 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2251 op->unmap.keep ? 1 : 0);
2252 break;
2253 case DRM_GPUVA_OP_PREFETCH:
2254 vma = gpuva_to_vma(op->prefetch.va);
2255 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2256 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2257 break;
2258 default:
2259 drm_warn(&xe->drm, "NOT POSSIBLE");
2260 }
2261 }
2262 #else
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2263 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2264 {
2265 }
2266 #endif
2267
__xe_vm_needs_clear_scratch_pages(struct xe_vm * vm,u32 bind_flags)2268 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
2269 {
2270 if (!xe_vm_in_fault_mode(vm))
2271 return false;
2272
2273 if (!xe_vm_has_scratch(vm))
2274 return false;
2275
2276 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE)
2277 return false;
2278
2279 return true;
2280 }
2281
xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops * ops)2282 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops)
2283 {
2284 struct drm_gpuva_op *__op;
2285
2286 drm_gpuva_for_each_op(__op, ops) {
2287 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2288
2289 xe_vma_svm_prefetch_op_fini(op);
2290 }
2291 }
2292
2293 /*
2294 * Create operations list from IOCTL arguments, setup operations fields so parse
2295 * and commit steps are decoupled from IOCTL arguments. This step can fail.
2296 */
2297 static struct drm_gpuva_ops *
vm_bind_ioctl_ops_create(struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 addr,u64 range,u32 operation,u32 flags,u32 prefetch_region,u16 pat_index)2298 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
2299 struct xe_bo *bo, u64 bo_offset_or_userptr,
2300 u64 addr, u64 range,
2301 u32 operation, u32 flags,
2302 u32 prefetch_region, u16 pat_index)
2303 {
2304 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2305 struct drm_gpuva_ops *ops;
2306 struct drm_gpuva_op *__op;
2307 struct drm_gpuvm_bo *vm_bo;
2308 u64 range_end = addr + range;
2309 int err;
2310
2311 lockdep_assert_held_write(&vm->lock);
2312
2313 vm_dbg(&vm->xe->drm,
2314 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2315 operation, (ULL)addr, (ULL)range,
2316 (ULL)bo_offset_or_userptr);
2317
2318 switch (operation) {
2319 case DRM_XE_VM_BIND_OP_MAP:
2320 case DRM_XE_VM_BIND_OP_MAP_USERPTR:
2321 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
2322 obj, bo_offset_or_userptr);
2323 break;
2324 case DRM_XE_VM_BIND_OP_UNMAP:
2325 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2326 break;
2327 case DRM_XE_VM_BIND_OP_PREFETCH:
2328 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2329 break;
2330 case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2331 xe_assert(vm->xe, bo);
2332
2333 err = xe_bo_lock(bo, true);
2334 if (err)
2335 return ERR_PTR(err);
2336
2337 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
2338 if (IS_ERR(vm_bo)) {
2339 xe_bo_unlock(bo);
2340 return ERR_CAST(vm_bo);
2341 }
2342
2343 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2344 drm_gpuvm_bo_put(vm_bo);
2345 xe_bo_unlock(bo);
2346 break;
2347 default:
2348 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2349 ops = ERR_PTR(-EINVAL);
2350 }
2351 if (IS_ERR(ops))
2352 return ops;
2353
2354 drm_gpuva_for_each_op(__op, ops) {
2355 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2356
2357 if (__op->op == DRM_GPUVA_OP_MAP) {
2358 op->map.immediate =
2359 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2360 op->map.read_only =
2361 flags & DRM_XE_VM_BIND_FLAG_READONLY;
2362 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2363 op->map.is_cpu_addr_mirror = flags &
2364 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
2365 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
2366 op->map.pat_index = pat_index;
2367 op->map.invalidate_on_bind =
2368 __xe_vm_needs_clear_scratch_pages(vm, flags);
2369 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2370 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2371 struct xe_svm_range *svm_range;
2372 struct drm_gpusvm_ctx ctx = {};
2373 struct xe_tile *tile;
2374 u8 id, tile_mask = 0;
2375 u32 i;
2376
2377 if (!xe_vma_is_cpu_addr_mirror(vma)) {
2378 op->prefetch.region = prefetch_region;
2379 break;
2380 }
2381
2382 ctx.read_only = xe_vma_read_only(vma);
2383 ctx.devmem_possible = IS_DGFX(vm->xe) &&
2384 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2385
2386 for_each_tile(tile, vm->xe, id)
2387 tile_mask |= 0x1 << id;
2388
2389 xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC);
2390 op->prefetch_range.region = prefetch_region;
2391 op->prefetch_range.ranges_count = 0;
2392 alloc_next_range:
2393 svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
2394
2395 if (PTR_ERR(svm_range) == -ENOENT) {
2396 u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma);
2397
2398 addr = ret == ULONG_MAX ? 0 : ret;
2399 if (addr)
2400 goto alloc_next_range;
2401 else
2402 goto print_op_label;
2403 }
2404
2405 if (IS_ERR(svm_range)) {
2406 err = PTR_ERR(svm_range);
2407 goto unwind_prefetch_ops;
2408 }
2409
2410 if (xe_svm_range_validate(vm, svm_range, tile_mask, !!prefetch_region)) {
2411 xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID");
2412 goto check_next_range;
2413 }
2414
2415 err = xa_alloc(&op->prefetch_range.range,
2416 &i, svm_range, xa_limit_32b,
2417 GFP_KERNEL);
2418
2419 if (err)
2420 goto unwind_prefetch_ops;
2421
2422 op->prefetch_range.ranges_count++;
2423 vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH;
2424 xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED");
2425 check_next_range:
2426 if (range_end > xe_svm_range_end(svm_range) &&
2427 xe_svm_range_end(svm_range) < xe_vma_end(vma)) {
2428 addr = xe_svm_range_end(svm_range);
2429 goto alloc_next_range;
2430 }
2431 }
2432 print_op_label:
2433 print_op(vm->xe, __op);
2434 }
2435
2436 return ops;
2437
2438 unwind_prefetch_ops:
2439 xe_svm_prefetch_gpuva_ops_fini(ops);
2440 drm_gpuva_ops_free(&vm->gpuvm, ops);
2441 return ERR_PTR(err);
2442 }
2443
2444 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
2445
new_vma(struct xe_vm * vm,struct drm_gpuva_op_map * op,u16 pat_index,unsigned int flags)2446 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2447 u16 pat_index, unsigned int flags)
2448 {
2449 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2450 struct drm_exec exec;
2451 struct xe_vma *vma;
2452 int err = 0;
2453
2454 lockdep_assert_held_write(&vm->lock);
2455
2456 if (bo) {
2457 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
2458 drm_exec_until_all_locked(&exec) {
2459 err = 0;
2460 if (!bo->vm) {
2461 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2462 drm_exec_retry_on_contention(&exec);
2463 }
2464 if (!err) {
2465 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2466 drm_exec_retry_on_contention(&exec);
2467 }
2468 if (err) {
2469 drm_exec_fini(&exec);
2470 return ERR_PTR(err);
2471 }
2472 }
2473 }
2474 vma = xe_vma_create(vm, bo, op->gem.offset,
2475 op->va.addr, op->va.addr +
2476 op->va.range - 1, pat_index, flags);
2477 if (IS_ERR(vma))
2478 goto err_unlock;
2479
2480 if (xe_vma_is_userptr(vma))
2481 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2482 else if (!xe_vma_has_no_bo(vma) && !bo->vm)
2483 err = add_preempt_fences(vm, bo);
2484
2485 err_unlock:
2486 if (bo)
2487 drm_exec_fini(&exec);
2488
2489 if (err) {
2490 prep_vma_destroy(vm, vma, false);
2491 xe_vma_destroy_unlocked(vma);
2492 vma = ERR_PTR(err);
2493 }
2494
2495 return vma;
2496 }
2497
xe_vma_max_pte_size(struct xe_vma * vma)2498 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2499 {
2500 if (vma->gpuva.flags & XE_VMA_PTE_1G)
2501 return SZ_1G;
2502 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2503 return SZ_2M;
2504 else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2505 return SZ_64K;
2506 else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2507 return SZ_4K;
2508
2509 return SZ_1G; /* Uninitialized, used max size */
2510 }
2511
xe_vma_set_pte_size(struct xe_vma * vma,u64 size)2512 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2513 {
2514 switch (size) {
2515 case SZ_1G:
2516 vma->gpuva.flags |= XE_VMA_PTE_1G;
2517 break;
2518 case SZ_2M:
2519 vma->gpuva.flags |= XE_VMA_PTE_2M;
2520 break;
2521 case SZ_64K:
2522 vma->gpuva.flags |= XE_VMA_PTE_64K;
2523 break;
2524 case SZ_4K:
2525 vma->gpuva.flags |= XE_VMA_PTE_4K;
2526 break;
2527 }
2528 }
2529
xe_vma_op_commit(struct xe_vm * vm,struct xe_vma_op * op)2530 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2531 {
2532 int err = 0;
2533
2534 lockdep_assert_held_write(&vm->lock);
2535
2536 switch (op->base.op) {
2537 case DRM_GPUVA_OP_MAP:
2538 err |= xe_vm_insert_vma(vm, op->map.vma);
2539 if (!err)
2540 op->flags |= XE_VMA_OP_COMMITTED;
2541 break;
2542 case DRM_GPUVA_OP_REMAP:
2543 {
2544 u8 tile_present =
2545 gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2546
2547 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2548 true);
2549 op->flags |= XE_VMA_OP_COMMITTED;
2550
2551 if (op->remap.prev) {
2552 err |= xe_vm_insert_vma(vm, op->remap.prev);
2553 if (!err)
2554 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2555 if (!err && op->remap.skip_prev) {
2556 op->remap.prev->tile_present =
2557 tile_present;
2558 op->remap.prev = NULL;
2559 }
2560 }
2561 if (op->remap.next) {
2562 err |= xe_vm_insert_vma(vm, op->remap.next);
2563 if (!err)
2564 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2565 if (!err && op->remap.skip_next) {
2566 op->remap.next->tile_present =
2567 tile_present;
2568 op->remap.next = NULL;
2569 }
2570 }
2571
2572 /* Adjust for partial unbind after removing VMA from VM */
2573 if (!err) {
2574 op->base.remap.unmap->va->va.addr = op->remap.start;
2575 op->base.remap.unmap->va->va.range = op->remap.range;
2576 }
2577 break;
2578 }
2579 case DRM_GPUVA_OP_UNMAP:
2580 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2581 op->flags |= XE_VMA_OP_COMMITTED;
2582 break;
2583 case DRM_GPUVA_OP_PREFETCH:
2584 op->flags |= XE_VMA_OP_COMMITTED;
2585 break;
2586 default:
2587 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2588 }
2589
2590 return err;
2591 }
2592
vm_bind_ioctl_ops_parse(struct xe_vm * vm,struct drm_gpuva_ops * ops,struct xe_vma_ops * vops)2593 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
2594 struct xe_vma_ops *vops)
2595 {
2596 struct xe_device *xe = vm->xe;
2597 struct drm_gpuva_op *__op;
2598 struct xe_tile *tile;
2599 u8 id, tile_mask = 0;
2600 int err = 0;
2601
2602 lockdep_assert_held_write(&vm->lock);
2603
2604 for_each_tile(tile, vm->xe, id)
2605 tile_mask |= 0x1 << id;
2606
2607 drm_gpuva_for_each_op(__op, ops) {
2608 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2609 struct xe_vma *vma;
2610 unsigned int flags = 0;
2611
2612 INIT_LIST_HEAD(&op->link);
2613 list_add_tail(&op->link, &vops->list);
2614 op->tile_mask = tile_mask;
2615
2616 switch (op->base.op) {
2617 case DRM_GPUVA_OP_MAP:
2618 {
2619 flags |= op->map.read_only ?
2620 VMA_CREATE_FLAG_READ_ONLY : 0;
2621 flags |= op->map.is_null ?
2622 VMA_CREATE_FLAG_IS_NULL : 0;
2623 flags |= op->map.dumpable ?
2624 VMA_CREATE_FLAG_DUMPABLE : 0;
2625 flags |= op->map.is_cpu_addr_mirror ?
2626 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0;
2627
2628 vma = new_vma(vm, &op->base.map, op->map.pat_index,
2629 flags);
2630 if (IS_ERR(vma))
2631 return PTR_ERR(vma);
2632
2633 op->map.vma = vma;
2634 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) &&
2635 !op->map.is_cpu_addr_mirror) ||
2636 op->map.invalidate_on_bind)
2637 xe_vma_ops_incr_pt_update_ops(vops,
2638 op->tile_mask, 1);
2639 break;
2640 }
2641 case DRM_GPUVA_OP_REMAP:
2642 {
2643 struct xe_vma *old =
2644 gpuva_to_vma(op->base.remap.unmap->va);
2645 bool skip = xe_vma_is_cpu_addr_mirror(old);
2646 u64 start = xe_vma_start(old), end = xe_vma_end(old);
2647 int num_remap_ops = 0;
2648
2649 if (op->base.remap.prev)
2650 start = op->base.remap.prev->va.addr +
2651 op->base.remap.prev->va.range;
2652 if (op->base.remap.next)
2653 end = op->base.remap.next->va.addr;
2654
2655 if (xe_vma_is_cpu_addr_mirror(old) &&
2656 xe_svm_has_mapping(vm, start, end))
2657 return -EBUSY;
2658
2659 op->remap.start = xe_vma_start(old);
2660 op->remap.range = xe_vma_size(old);
2661
2662 flags |= op->base.remap.unmap->va->flags &
2663 XE_VMA_READ_ONLY ?
2664 VMA_CREATE_FLAG_READ_ONLY : 0;
2665 flags |= op->base.remap.unmap->va->flags &
2666 DRM_GPUVA_SPARSE ?
2667 VMA_CREATE_FLAG_IS_NULL : 0;
2668 flags |= op->base.remap.unmap->va->flags &
2669 XE_VMA_DUMPABLE ?
2670 VMA_CREATE_FLAG_DUMPABLE : 0;
2671 flags |= xe_vma_is_cpu_addr_mirror(old) ?
2672 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0;
2673
2674 if (op->base.remap.prev) {
2675 vma = new_vma(vm, op->base.remap.prev,
2676 old->pat_index, flags);
2677 if (IS_ERR(vma))
2678 return PTR_ERR(vma);
2679
2680 op->remap.prev = vma;
2681
2682 /*
2683 * Userptr creates a new SG mapping so
2684 * we must also rebind.
2685 */
2686 op->remap.skip_prev = skip ||
2687 (!xe_vma_is_userptr(old) &&
2688 IS_ALIGNED(xe_vma_end(vma),
2689 xe_vma_max_pte_size(old)));
2690 if (op->remap.skip_prev) {
2691 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2692 op->remap.range -=
2693 xe_vma_end(vma) -
2694 xe_vma_start(old);
2695 op->remap.start = xe_vma_end(vma);
2696 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2697 (ULL)op->remap.start,
2698 (ULL)op->remap.range);
2699 } else {
2700 num_remap_ops++;
2701 }
2702 }
2703
2704 if (op->base.remap.next) {
2705 vma = new_vma(vm, op->base.remap.next,
2706 old->pat_index, flags);
2707 if (IS_ERR(vma))
2708 return PTR_ERR(vma);
2709
2710 op->remap.next = vma;
2711
2712 /*
2713 * Userptr creates a new SG mapping so
2714 * we must also rebind.
2715 */
2716 op->remap.skip_next = skip ||
2717 (!xe_vma_is_userptr(old) &&
2718 IS_ALIGNED(xe_vma_start(vma),
2719 xe_vma_max_pte_size(old)));
2720 if (op->remap.skip_next) {
2721 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2722 op->remap.range -=
2723 xe_vma_end(old) -
2724 xe_vma_start(vma);
2725 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2726 (ULL)op->remap.start,
2727 (ULL)op->remap.range);
2728 } else {
2729 num_remap_ops++;
2730 }
2731 }
2732 if (!skip)
2733 num_remap_ops++;
2734
2735 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops);
2736 break;
2737 }
2738 case DRM_GPUVA_OP_UNMAP:
2739 vma = gpuva_to_vma(op->base.unmap.va);
2740
2741 if (xe_vma_is_cpu_addr_mirror(vma) &&
2742 xe_svm_has_mapping(vm, xe_vma_start(vma),
2743 xe_vma_end(vma)))
2744 return -EBUSY;
2745
2746 if (!xe_vma_is_cpu_addr_mirror(vma))
2747 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2748 break;
2749 case DRM_GPUVA_OP_PREFETCH:
2750 vma = gpuva_to_vma(op->base.prefetch.va);
2751
2752 if (xe_vma_is_userptr(vma)) {
2753 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2754 if (err)
2755 return err;
2756 }
2757
2758 if (xe_vma_is_cpu_addr_mirror(vma))
2759 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask,
2760 op->prefetch_range.ranges_count);
2761 else
2762 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2763
2764 break;
2765 default:
2766 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2767 }
2768
2769 err = xe_vma_op_commit(vm, op);
2770 if (err)
2771 return err;
2772 }
2773
2774 return 0;
2775 }
2776
xe_vma_op_unwind(struct xe_vm * vm,struct xe_vma_op * op,bool post_commit,bool prev_post_commit,bool next_post_commit)2777 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2778 bool post_commit, bool prev_post_commit,
2779 bool next_post_commit)
2780 {
2781 lockdep_assert_held_write(&vm->lock);
2782
2783 switch (op->base.op) {
2784 case DRM_GPUVA_OP_MAP:
2785 if (op->map.vma) {
2786 prep_vma_destroy(vm, op->map.vma, post_commit);
2787 xe_vma_destroy_unlocked(op->map.vma);
2788 }
2789 break;
2790 case DRM_GPUVA_OP_UNMAP:
2791 {
2792 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2793
2794 if (vma) {
2795 down_read(&vm->userptr.notifier_lock);
2796 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2797 up_read(&vm->userptr.notifier_lock);
2798 if (post_commit)
2799 xe_vm_insert_vma(vm, vma);
2800 }
2801 break;
2802 }
2803 case DRM_GPUVA_OP_REMAP:
2804 {
2805 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2806
2807 if (op->remap.prev) {
2808 prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2809 xe_vma_destroy_unlocked(op->remap.prev);
2810 }
2811 if (op->remap.next) {
2812 prep_vma_destroy(vm, op->remap.next, next_post_commit);
2813 xe_vma_destroy_unlocked(op->remap.next);
2814 }
2815 if (vma) {
2816 down_read(&vm->userptr.notifier_lock);
2817 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2818 up_read(&vm->userptr.notifier_lock);
2819 if (post_commit)
2820 xe_vm_insert_vma(vm, vma);
2821 }
2822 break;
2823 }
2824 case DRM_GPUVA_OP_PREFETCH:
2825 /* Nothing to do */
2826 break;
2827 default:
2828 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2829 }
2830 }
2831
vm_bind_ioctl_ops_unwind(struct xe_vm * vm,struct drm_gpuva_ops ** ops,int num_ops_list)2832 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2833 struct drm_gpuva_ops **ops,
2834 int num_ops_list)
2835 {
2836 int i;
2837
2838 for (i = num_ops_list - 1; i >= 0; --i) {
2839 struct drm_gpuva_ops *__ops = ops[i];
2840 struct drm_gpuva_op *__op;
2841
2842 if (!__ops)
2843 continue;
2844
2845 drm_gpuva_for_each_op_reverse(__op, __ops) {
2846 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2847
2848 xe_vma_op_unwind(vm, op,
2849 op->flags & XE_VMA_OP_COMMITTED,
2850 op->flags & XE_VMA_OP_PREV_COMMITTED,
2851 op->flags & XE_VMA_OP_NEXT_COMMITTED);
2852 }
2853 }
2854 }
2855
vma_lock_and_validate(struct drm_exec * exec,struct xe_vma * vma,bool validate)2856 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
2857 bool validate)
2858 {
2859 struct xe_bo *bo = xe_vma_bo(vma);
2860 struct xe_vm *vm = xe_vma_vm(vma);
2861 int err = 0;
2862
2863 if (bo) {
2864 if (!bo->vm)
2865 err = drm_exec_lock_obj(exec, &bo->ttm.base);
2866 if (!err && validate)
2867 err = xe_bo_validate(bo, vm,
2868 !xe_vm_in_preempt_fence_mode(vm));
2869 }
2870
2871 return err;
2872 }
2873
check_ufence(struct xe_vma * vma)2874 static int check_ufence(struct xe_vma *vma)
2875 {
2876 if (vma->ufence) {
2877 struct xe_user_fence * const f = vma->ufence;
2878
2879 if (!xe_sync_ufence_get_status(f))
2880 return -EBUSY;
2881
2882 vma->ufence = NULL;
2883 xe_sync_ufence_put(f);
2884 }
2885
2886 return 0;
2887 }
2888
prefetch_ranges(struct xe_vm * vm,struct xe_vma_op * op)2889 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
2890 {
2891 bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2892 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2893 int err = 0;
2894
2895 struct xe_svm_range *svm_range;
2896 struct drm_gpusvm_ctx ctx = {};
2897 struct xe_tile *tile;
2898 unsigned long i;
2899 u32 region;
2900
2901 if (!xe_vma_is_cpu_addr_mirror(vma))
2902 return 0;
2903
2904 region = op->prefetch_range.region;
2905
2906 ctx.read_only = xe_vma_read_only(vma);
2907 ctx.devmem_possible = devmem_possible;
2908 ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
2909
2910 /* TODO: Threading the migration */
2911 xa_for_each(&op->prefetch_range.range, i, svm_range) {
2912 if (!region)
2913 xe_svm_range_migrate_to_smem(vm, svm_range);
2914
2915 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) {
2916 tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0];
2917 err = xe_svm_alloc_vram(tile, svm_range, &ctx);
2918 if (err) {
2919 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
2920 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2921 return -ENODATA;
2922 }
2923 xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM");
2924 }
2925
2926 err = xe_svm_range_get_pages(vm, svm_range, &ctx);
2927 if (err) {
2928 drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
2929 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2930 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
2931 err = -ENODATA;
2932 return err;
2933 }
2934 xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE");
2935 }
2936
2937 return err;
2938 }
2939
op_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_op * op)2940 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
2941 struct xe_vma_op *op)
2942 {
2943 int err = 0;
2944
2945 switch (op->base.op) {
2946 case DRM_GPUVA_OP_MAP:
2947 if (!op->map.invalidate_on_bind)
2948 err = vma_lock_and_validate(exec, op->map.vma,
2949 !xe_vm_in_fault_mode(vm) ||
2950 op->map.immediate);
2951 break;
2952 case DRM_GPUVA_OP_REMAP:
2953 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
2954 if (err)
2955 break;
2956
2957 err = vma_lock_and_validate(exec,
2958 gpuva_to_vma(op->base.remap.unmap->va),
2959 false);
2960 if (!err && op->remap.prev)
2961 err = vma_lock_and_validate(exec, op->remap.prev, true);
2962 if (!err && op->remap.next)
2963 err = vma_lock_and_validate(exec, op->remap.next, true);
2964 break;
2965 case DRM_GPUVA_OP_UNMAP:
2966 err = check_ufence(gpuva_to_vma(op->base.unmap.va));
2967 if (err)
2968 break;
2969
2970 err = vma_lock_and_validate(exec,
2971 gpuva_to_vma(op->base.unmap.va),
2972 false);
2973 break;
2974 case DRM_GPUVA_OP_PREFETCH:
2975 {
2976 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2977 u32 region;
2978
2979 if (xe_vma_is_cpu_addr_mirror(vma))
2980 region = op->prefetch_range.region;
2981 else
2982 region = op->prefetch.region;
2983
2984 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
2985
2986 err = vma_lock_and_validate(exec,
2987 gpuva_to_vma(op->base.prefetch.va),
2988 false);
2989 if (!err && !xe_vma_has_no_bo(vma))
2990 err = xe_bo_migrate(xe_vma_bo(vma),
2991 region_to_mem_type[region]);
2992 break;
2993 }
2994 default:
2995 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2996 }
2997
2998 return err;
2999 }
3000
vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm * vm,struct xe_vma_ops * vops)3001 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops)
3002 {
3003 struct xe_vma_op *op;
3004 int err;
3005
3006 if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
3007 return 0;
3008
3009 list_for_each_entry(op, &vops->list, link) {
3010 if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
3011 err = prefetch_ranges(vm, op);
3012 if (err)
3013 return err;
3014 }
3015 }
3016
3017 return 0;
3018 }
3019
vm_bind_ioctl_ops_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops)3020 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
3021 struct xe_vm *vm,
3022 struct xe_vma_ops *vops)
3023 {
3024 struct xe_vma_op *op;
3025 int err;
3026
3027 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
3028 if (err)
3029 return err;
3030
3031 list_for_each_entry(op, &vops->list, link) {
3032 err = op_lock_and_prep(exec, vm, op);
3033 if (err)
3034 return err;
3035 }
3036
3037 #ifdef TEST_VM_OPS_ERROR
3038 if (vops->inject_error &&
3039 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
3040 return -ENOSPC;
3041 #endif
3042
3043 return 0;
3044 }
3045
op_trace(struct xe_vma_op * op)3046 static void op_trace(struct xe_vma_op *op)
3047 {
3048 switch (op->base.op) {
3049 case DRM_GPUVA_OP_MAP:
3050 trace_xe_vma_bind(op->map.vma);
3051 break;
3052 case DRM_GPUVA_OP_REMAP:
3053 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
3054 if (op->remap.prev)
3055 trace_xe_vma_bind(op->remap.prev);
3056 if (op->remap.next)
3057 trace_xe_vma_bind(op->remap.next);
3058 break;
3059 case DRM_GPUVA_OP_UNMAP:
3060 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
3061 break;
3062 case DRM_GPUVA_OP_PREFETCH:
3063 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
3064 break;
3065 case DRM_GPUVA_OP_DRIVER:
3066 break;
3067 default:
3068 XE_WARN_ON("NOT POSSIBLE");
3069 }
3070 }
3071
trace_xe_vm_ops_execute(struct xe_vma_ops * vops)3072 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
3073 {
3074 struct xe_vma_op *op;
3075
3076 list_for_each_entry(op, &vops->list, link)
3077 op_trace(op);
3078 }
3079
vm_ops_setup_tile_args(struct xe_vm * vm,struct xe_vma_ops * vops)3080 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
3081 {
3082 struct xe_exec_queue *q = vops->q;
3083 struct xe_tile *tile;
3084 int number_tiles = 0;
3085 u8 id;
3086
3087 for_each_tile(tile, vm->xe, id) {
3088 if (vops->pt_update_ops[id].num_ops)
3089 ++number_tiles;
3090
3091 if (vops->pt_update_ops[id].q)
3092 continue;
3093
3094 if (q) {
3095 vops->pt_update_ops[id].q = q;
3096 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
3097 q = list_next_entry(q, multi_gt_list);
3098 } else {
3099 vops->pt_update_ops[id].q = vm->q[id];
3100 }
3101 }
3102
3103 return number_tiles;
3104 }
3105
ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3106 static struct dma_fence *ops_execute(struct xe_vm *vm,
3107 struct xe_vma_ops *vops)
3108 {
3109 struct xe_tile *tile;
3110 struct dma_fence *fence = NULL;
3111 struct dma_fence **fences = NULL;
3112 struct dma_fence_array *cf = NULL;
3113 int number_tiles = 0, current_fence = 0, err;
3114 u8 id;
3115
3116 number_tiles = vm_ops_setup_tile_args(vm, vops);
3117 if (number_tiles == 0)
3118 return ERR_PTR(-ENODATA);
3119
3120 if (number_tiles > 1) {
3121 fences = kmalloc_array(number_tiles, sizeof(*fences),
3122 GFP_KERNEL);
3123 if (!fences) {
3124 fence = ERR_PTR(-ENOMEM);
3125 goto err_trace;
3126 }
3127 }
3128
3129 for_each_tile(tile, vm->xe, id) {
3130 if (!vops->pt_update_ops[id].num_ops)
3131 continue;
3132
3133 err = xe_pt_update_ops_prepare(tile, vops);
3134 if (err) {
3135 fence = ERR_PTR(err);
3136 goto err_out;
3137 }
3138 }
3139
3140 trace_xe_vm_ops_execute(vops);
3141
3142 for_each_tile(tile, vm->xe, id) {
3143 if (!vops->pt_update_ops[id].num_ops)
3144 continue;
3145
3146 fence = xe_pt_update_ops_run(tile, vops);
3147 if (IS_ERR(fence))
3148 goto err_out;
3149
3150 if (fences)
3151 fences[current_fence++] = fence;
3152 }
3153
3154 if (fences) {
3155 cf = dma_fence_array_create(number_tiles, fences,
3156 vm->composite_fence_ctx,
3157 vm->composite_fence_seqno++,
3158 false);
3159 if (!cf) {
3160 --vm->composite_fence_seqno;
3161 fence = ERR_PTR(-ENOMEM);
3162 goto err_out;
3163 }
3164 fence = &cf->base;
3165 }
3166
3167 for_each_tile(tile, vm->xe, id) {
3168 if (!vops->pt_update_ops[id].num_ops)
3169 continue;
3170
3171 xe_pt_update_ops_fini(tile, vops);
3172 }
3173
3174 return fence;
3175
3176 err_out:
3177 for_each_tile(tile, vm->xe, id) {
3178 if (!vops->pt_update_ops[id].num_ops)
3179 continue;
3180
3181 xe_pt_update_ops_abort(tile, vops);
3182 }
3183 while (current_fence)
3184 dma_fence_put(fences[--current_fence]);
3185 kfree(fences);
3186 kfree(cf);
3187
3188 err_trace:
3189 trace_xe_vm_ops_fail(vm);
3190 return fence;
3191 }
3192
vma_add_ufence(struct xe_vma * vma,struct xe_user_fence * ufence)3193 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
3194 {
3195 if (vma->ufence)
3196 xe_sync_ufence_put(vma->ufence);
3197 vma->ufence = __xe_sync_ufence_get(ufence);
3198 }
3199
op_add_ufence(struct xe_vm * vm,struct xe_vma_op * op,struct xe_user_fence * ufence)3200 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
3201 struct xe_user_fence *ufence)
3202 {
3203 switch (op->base.op) {
3204 case DRM_GPUVA_OP_MAP:
3205 vma_add_ufence(op->map.vma, ufence);
3206 break;
3207 case DRM_GPUVA_OP_REMAP:
3208 if (op->remap.prev)
3209 vma_add_ufence(op->remap.prev, ufence);
3210 if (op->remap.next)
3211 vma_add_ufence(op->remap.next, ufence);
3212 break;
3213 case DRM_GPUVA_OP_UNMAP:
3214 break;
3215 case DRM_GPUVA_OP_PREFETCH:
3216 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
3217 break;
3218 default:
3219 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
3220 }
3221 }
3222
vm_bind_ioctl_ops_fini(struct xe_vm * vm,struct xe_vma_ops * vops,struct dma_fence * fence)3223 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
3224 struct dma_fence *fence)
3225 {
3226 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
3227 struct xe_user_fence *ufence;
3228 struct xe_vma_op *op;
3229 int i;
3230
3231 ufence = find_ufence_get(vops->syncs, vops->num_syncs);
3232 list_for_each_entry(op, &vops->list, link) {
3233 if (ufence)
3234 op_add_ufence(vm, op, ufence);
3235
3236 if (op->base.op == DRM_GPUVA_OP_UNMAP)
3237 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
3238 else if (op->base.op == DRM_GPUVA_OP_REMAP)
3239 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
3240 fence);
3241 }
3242 if (ufence)
3243 xe_sync_ufence_put(ufence);
3244 if (fence) {
3245 for (i = 0; i < vops->num_syncs; i++)
3246 xe_sync_entry_signal(vops->syncs + i, fence);
3247 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
3248 }
3249 }
3250
vm_bind_ioctl_ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3251 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
3252 struct xe_vma_ops *vops)
3253 {
3254 struct drm_exec exec;
3255 struct dma_fence *fence;
3256 int err;
3257
3258 lockdep_assert_held_write(&vm->lock);
3259
3260 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
3261 DRM_EXEC_IGNORE_DUPLICATES, 0);
3262 drm_exec_until_all_locked(&exec) {
3263 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
3264 drm_exec_retry_on_contention(&exec);
3265 if (err) {
3266 fence = ERR_PTR(err);
3267 goto unlock;
3268 }
3269
3270 fence = ops_execute(vm, vops);
3271 if (IS_ERR(fence)) {
3272 if (PTR_ERR(fence) == -ENODATA)
3273 vm_bind_ioctl_ops_fini(vm, vops, NULL);
3274 goto unlock;
3275 }
3276
3277 vm_bind_ioctl_ops_fini(vm, vops, fence);
3278 }
3279
3280 unlock:
3281 drm_exec_fini(&exec);
3282 return fence;
3283 }
3284 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
3285
3286 #define SUPPORTED_FLAGS_STUB \
3287 (DRM_XE_VM_BIND_FLAG_READONLY | \
3288 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
3289 DRM_XE_VM_BIND_FLAG_NULL | \
3290 DRM_XE_VM_BIND_FLAG_DUMPABLE | \
3291 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \
3292 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR)
3293
3294 #ifdef TEST_VM_OPS_ERROR
3295 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
3296 #else
3297 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB
3298 #endif
3299
3300 #define XE_64K_PAGE_MASK 0xffffull
3301 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
3302
vm_bind_ioctl_check_args(struct xe_device * xe,struct xe_vm * vm,struct drm_xe_vm_bind * args,struct drm_xe_vm_bind_op ** bind_ops)3303 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
3304 struct drm_xe_vm_bind *args,
3305 struct drm_xe_vm_bind_op **bind_ops)
3306 {
3307 int err;
3308 int i;
3309
3310 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
3311 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3312 return -EINVAL;
3313
3314 if (XE_IOCTL_DBG(xe, args->extensions))
3315 return -EINVAL;
3316
3317 if (args->num_binds > 1) {
3318 u64 __user *bind_user =
3319 u64_to_user_ptr(args->vector_of_binds);
3320
3321 *bind_ops = kvmalloc_array(args->num_binds,
3322 sizeof(struct drm_xe_vm_bind_op),
3323 GFP_KERNEL | __GFP_ACCOUNT |
3324 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3325 if (!*bind_ops)
3326 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
3327
3328 err = copy_from_user(*bind_ops, bind_user,
3329 sizeof(struct drm_xe_vm_bind_op) *
3330 args->num_binds);
3331 if (XE_IOCTL_DBG(xe, err)) {
3332 err = -EFAULT;
3333 goto free_bind_ops;
3334 }
3335 } else {
3336 *bind_ops = &args->bind;
3337 }
3338
3339 for (i = 0; i < args->num_binds; ++i) {
3340 u64 range = (*bind_ops)[i].range;
3341 u64 addr = (*bind_ops)[i].addr;
3342 u32 op = (*bind_ops)[i].op;
3343 u32 flags = (*bind_ops)[i].flags;
3344 u32 obj = (*bind_ops)[i].obj;
3345 u64 obj_offset = (*bind_ops)[i].obj_offset;
3346 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
3347 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
3348 bool is_cpu_addr_mirror = flags &
3349 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
3350 u16 pat_index = (*bind_ops)[i].pat_index;
3351 u16 coh_mode;
3352
3353 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror &&
3354 (!xe_vm_in_fault_mode(vm) ||
3355 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) {
3356 err = -EINVAL;
3357 goto free_bind_ops;
3358 }
3359
3360 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
3361 err = -EINVAL;
3362 goto free_bind_ops;
3363 }
3364
3365 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
3366 (*bind_ops)[i].pat_index = pat_index;
3367 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3368 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
3369 err = -EINVAL;
3370 goto free_bind_ops;
3371 }
3372
3373 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
3374 err = -EINVAL;
3375 goto free_bind_ops;
3376 }
3377
3378 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
3379 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
3380 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) ||
3381 XE_IOCTL_DBG(xe, obj_offset && (is_null ||
3382 is_cpu_addr_mirror)) ||
3383 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
3384 (is_null || is_cpu_addr_mirror)) ||
3385 XE_IOCTL_DBG(xe, !obj &&
3386 op == DRM_XE_VM_BIND_OP_MAP &&
3387 !is_null && !is_cpu_addr_mirror) ||
3388 XE_IOCTL_DBG(xe, !obj &&
3389 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3390 XE_IOCTL_DBG(xe, addr &&
3391 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3392 XE_IOCTL_DBG(xe, range &&
3393 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3394 XE_IOCTL_DBG(xe, obj &&
3395 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3396 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3397 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3398 XE_IOCTL_DBG(xe, obj &&
3399 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
3400 XE_IOCTL_DBG(xe, prefetch_region &&
3401 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
3402 XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
3403 xe->info.mem_region_mask)) ||
3404 XE_IOCTL_DBG(xe, obj &&
3405 op == DRM_XE_VM_BIND_OP_UNMAP)) {
3406 err = -EINVAL;
3407 goto free_bind_ops;
3408 }
3409
3410 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
3411 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
3412 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
3413 XE_IOCTL_DBG(xe, !range &&
3414 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
3415 err = -EINVAL;
3416 goto free_bind_ops;
3417 }
3418 }
3419
3420 return 0;
3421
3422 free_bind_ops:
3423 if (args->num_binds > 1)
3424 kvfree(*bind_ops);
3425 *bind_ops = NULL;
3426 return err;
3427 }
3428
vm_bind_ioctl_signal_fences(struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,int num_syncs)3429 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
3430 struct xe_exec_queue *q,
3431 struct xe_sync_entry *syncs,
3432 int num_syncs)
3433 {
3434 struct dma_fence *fence;
3435 int i, err = 0;
3436
3437 fence = xe_sync_in_fence_get(syncs, num_syncs,
3438 to_wait_exec_queue(vm, q), vm);
3439 if (IS_ERR(fence))
3440 return PTR_ERR(fence);
3441
3442 for (i = 0; i < num_syncs; i++)
3443 xe_sync_entry_signal(&syncs[i], fence);
3444
3445 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
3446 fence);
3447 dma_fence_put(fence);
3448
3449 return err;
3450 }
3451
xe_vma_ops_init(struct xe_vma_ops * vops,struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,u32 num_syncs)3452 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
3453 struct xe_exec_queue *q,
3454 struct xe_sync_entry *syncs, u32 num_syncs)
3455 {
3456 memset(vops, 0, sizeof(*vops));
3457 INIT_LIST_HEAD(&vops->list);
3458 vops->vm = vm;
3459 vops->q = q;
3460 vops->syncs = syncs;
3461 vops->num_syncs = num_syncs;
3462 vops->flags = 0;
3463 }
3464
xe_vm_bind_ioctl_validate_bo(struct xe_device * xe,struct xe_bo * bo,u64 addr,u64 range,u64 obj_offset,u16 pat_index,u32 op,u32 bind_flags)3465 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
3466 u64 addr, u64 range, u64 obj_offset,
3467 u16 pat_index, u32 op, u32 bind_flags)
3468 {
3469 u16 coh_mode;
3470
3471 if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) ||
3472 XE_IOCTL_DBG(xe, obj_offset >
3473 xe_bo_size(bo) - range)) {
3474 return -EINVAL;
3475 }
3476
3477 /*
3478 * Some platforms require 64k VM_BIND alignment,
3479 * specifically those with XE_VRAM_FLAGS_NEED64K.
3480 *
3481 * Other platforms may have BO's set to 64k physical placement,
3482 * but can be mapped at 4k offsets anyway. This check is only
3483 * there for the former case.
3484 */
3485 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
3486 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
3487 if (XE_IOCTL_DBG(xe, obj_offset &
3488 XE_64K_PAGE_MASK) ||
3489 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
3490 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3491 return -EINVAL;
3492 }
3493 }
3494
3495 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3496 if (bo->cpu_caching) {
3497 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3498 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
3499 return -EINVAL;
3500 }
3501 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
3502 /*
3503 * Imported dma-buf from a different device should
3504 * require 1way or 2way coherency since we don't know
3505 * how it was mapped on the CPU. Just assume is it
3506 * potentially cached on CPU side.
3507 */
3508 return -EINVAL;
3509 }
3510
3511 /* If a BO is protected it can only be mapped if the key is still valid */
3512 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) &&
3513 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL)
3514 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0))
3515 return -ENOEXEC;
3516
3517 return 0;
3518 }
3519
xe_vm_bind_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3520 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3521 {
3522 struct xe_device *xe = to_xe_device(dev);
3523 struct xe_file *xef = to_xe_file(file);
3524 struct drm_xe_vm_bind *args = data;
3525 struct drm_xe_sync __user *syncs_user;
3526 struct xe_bo **bos = NULL;
3527 struct drm_gpuva_ops **ops = NULL;
3528 struct xe_vm *vm;
3529 struct xe_exec_queue *q = NULL;
3530 u32 num_syncs, num_ufence = 0;
3531 struct xe_sync_entry *syncs = NULL;
3532 struct drm_xe_vm_bind_op *bind_ops = NULL;
3533 struct xe_vma_ops vops;
3534 struct dma_fence *fence;
3535 int err;
3536 int i;
3537
3538 vm = xe_vm_lookup(xef, args->vm_id);
3539 if (XE_IOCTL_DBG(xe, !vm))
3540 return -EINVAL;
3541
3542 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops);
3543 if (err)
3544 goto put_vm;
3545
3546 if (args->exec_queue_id) {
3547 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
3548 if (XE_IOCTL_DBG(xe, !q)) {
3549 err = -ENOENT;
3550 goto put_vm;
3551 }
3552
3553 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3554 err = -EINVAL;
3555 goto put_exec_queue;
3556 }
3557 }
3558
3559 /* Ensure all UNMAPs visible */
3560 xe_svm_flush(vm);
3561
3562 err = down_write_killable(&vm->lock);
3563 if (err)
3564 goto put_exec_queue;
3565
3566 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3567 err = -ENOENT;
3568 goto release_vm_lock;
3569 }
3570
3571 for (i = 0; i < args->num_binds; ++i) {
3572 u64 range = bind_ops[i].range;
3573 u64 addr = bind_ops[i].addr;
3574
3575 if (XE_IOCTL_DBG(xe, range > vm->size) ||
3576 XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3577 err = -EINVAL;
3578 goto release_vm_lock;
3579 }
3580 }
3581
3582 if (args->num_binds) {
3583 bos = kvcalloc(args->num_binds, sizeof(*bos),
3584 GFP_KERNEL | __GFP_ACCOUNT |
3585 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3586 if (!bos) {
3587 err = -ENOMEM;
3588 goto release_vm_lock;
3589 }
3590
3591 ops = kvcalloc(args->num_binds, sizeof(*ops),
3592 GFP_KERNEL | __GFP_ACCOUNT |
3593 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3594 if (!ops) {
3595 err = -ENOMEM;
3596 goto release_vm_lock;
3597 }
3598 }
3599
3600 for (i = 0; i < args->num_binds; ++i) {
3601 struct drm_gem_object *gem_obj;
3602 u64 range = bind_ops[i].range;
3603 u64 addr = bind_ops[i].addr;
3604 u32 obj = bind_ops[i].obj;
3605 u64 obj_offset = bind_ops[i].obj_offset;
3606 u16 pat_index = bind_ops[i].pat_index;
3607 u32 op = bind_ops[i].op;
3608 u32 bind_flags = bind_ops[i].flags;
3609
3610 if (!obj)
3611 continue;
3612
3613 gem_obj = drm_gem_object_lookup(file, obj);
3614 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3615 err = -ENOENT;
3616 goto put_obj;
3617 }
3618 bos[i] = gem_to_xe_bo(gem_obj);
3619
3620 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
3621 obj_offset, pat_index, op,
3622 bind_flags);
3623 if (err)
3624 goto put_obj;
3625 }
3626
3627 if (args->num_syncs) {
3628 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3629 if (!syncs) {
3630 err = -ENOMEM;
3631 goto put_obj;
3632 }
3633 }
3634
3635 syncs_user = u64_to_user_ptr(args->syncs);
3636 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3637 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3638 &syncs_user[num_syncs],
3639 (xe_vm_in_lr_mode(vm) ?
3640 SYNC_PARSE_FLAG_LR_MODE : 0) |
3641 (!args->num_binds ?
3642 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3643 if (err)
3644 goto free_syncs;
3645
3646 if (xe_sync_is_ufence(&syncs[num_syncs]))
3647 num_ufence++;
3648 }
3649
3650 if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3651 err = -EINVAL;
3652 goto free_syncs;
3653 }
3654
3655 if (!args->num_binds) {
3656 err = -ENODATA;
3657 goto free_syncs;
3658 }
3659
3660 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
3661 for (i = 0; i < args->num_binds; ++i) {
3662 u64 range = bind_ops[i].range;
3663 u64 addr = bind_ops[i].addr;
3664 u32 op = bind_ops[i].op;
3665 u32 flags = bind_ops[i].flags;
3666 u64 obj_offset = bind_ops[i].obj_offset;
3667 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3668 u16 pat_index = bind_ops[i].pat_index;
3669
3670 ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset,
3671 addr, range, op, flags,
3672 prefetch_region, pat_index);
3673 if (IS_ERR(ops[i])) {
3674 err = PTR_ERR(ops[i]);
3675 ops[i] = NULL;
3676 goto unwind_ops;
3677 }
3678
3679 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
3680 if (err)
3681 goto unwind_ops;
3682
3683 #ifdef TEST_VM_OPS_ERROR
3684 if (flags & FORCE_OP_ERROR) {
3685 vops.inject_error = true;
3686 vm->xe->vm_inject_error_position =
3687 (vm->xe->vm_inject_error_position + 1) %
3688 FORCE_OP_ERROR_COUNT;
3689 }
3690 #endif
3691 }
3692
3693 /* Nothing to do */
3694 if (list_empty(&vops.list)) {
3695 err = -ENODATA;
3696 goto unwind_ops;
3697 }
3698
3699 err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
3700 if (err)
3701 goto unwind_ops;
3702
3703 err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops);
3704 if (err)
3705 goto unwind_ops;
3706
3707 fence = vm_bind_ioctl_ops_execute(vm, &vops);
3708 if (IS_ERR(fence))
3709 err = PTR_ERR(fence);
3710 else
3711 dma_fence_put(fence);
3712
3713 unwind_ops:
3714 if (err && err != -ENODATA)
3715 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3716 xe_vma_ops_fini(&vops);
3717 for (i = args->num_binds - 1; i >= 0; --i)
3718 if (ops[i])
3719 drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
3720 free_syncs:
3721 if (err == -ENODATA)
3722 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3723 while (num_syncs--)
3724 xe_sync_entry_cleanup(&syncs[num_syncs]);
3725
3726 kfree(syncs);
3727 put_obj:
3728 for (i = 0; i < args->num_binds; ++i)
3729 xe_bo_put(bos[i]);
3730 release_vm_lock:
3731 up_write(&vm->lock);
3732 put_exec_queue:
3733 if (q)
3734 xe_exec_queue_put(q);
3735 put_vm:
3736 xe_vm_put(vm);
3737 kvfree(bos);
3738 kvfree(ops);
3739 if (args->num_binds > 1)
3740 kvfree(bind_ops);
3741 return err;
3742 }
3743
3744 /**
3745 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM
3746 * @vm: VM to bind the BO to
3747 * @bo: BO to bind
3748 * @q: exec queue to use for the bind (optional)
3749 * @addr: address at which to bind the BO
3750 * @cache_lvl: PAT cache level to use
3751 *
3752 * Execute a VM bind map operation on a kernel-owned BO to bind it into a
3753 * kernel-owned VM.
3754 *
3755 * Returns a dma_fence to track the binding completion if the job to do so was
3756 * successfully submitted, an error pointer otherwise.
3757 */
xe_vm_bind_kernel_bo(struct xe_vm * vm,struct xe_bo * bo,struct xe_exec_queue * q,u64 addr,enum xe_cache_level cache_lvl)3758 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
3759 struct xe_exec_queue *q, u64 addr,
3760 enum xe_cache_level cache_lvl)
3761 {
3762 struct xe_vma_ops vops;
3763 struct drm_gpuva_ops *ops = NULL;
3764 struct dma_fence *fence;
3765 int err;
3766
3767 xe_bo_get(bo);
3768 xe_vm_get(vm);
3769 if (q)
3770 xe_exec_queue_get(q);
3771
3772 down_write(&vm->lock);
3773
3774 xe_vma_ops_init(&vops, vm, q, NULL, 0);
3775
3776 ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo),
3777 DRM_XE_VM_BIND_OP_MAP, 0, 0,
3778 vm->xe->pat.idx[cache_lvl]);
3779 if (IS_ERR(ops)) {
3780 err = PTR_ERR(ops);
3781 goto release_vm_lock;
3782 }
3783
3784 err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
3785 if (err)
3786 goto release_vm_lock;
3787
3788 xe_assert(vm->xe, !list_empty(&vops.list));
3789
3790 err = xe_vma_ops_alloc(&vops, false);
3791 if (err)
3792 goto unwind_ops;
3793
3794 fence = vm_bind_ioctl_ops_execute(vm, &vops);
3795 if (IS_ERR(fence))
3796 err = PTR_ERR(fence);
3797
3798 unwind_ops:
3799 if (err && err != -ENODATA)
3800 vm_bind_ioctl_ops_unwind(vm, &ops, 1);
3801
3802 xe_vma_ops_fini(&vops);
3803 drm_gpuva_ops_free(&vm->gpuvm, ops);
3804
3805 release_vm_lock:
3806 up_write(&vm->lock);
3807
3808 if (q)
3809 xe_exec_queue_put(q);
3810 xe_vm_put(vm);
3811 xe_bo_put(bo);
3812
3813 if (err)
3814 fence = ERR_PTR(err);
3815
3816 return fence;
3817 }
3818
3819 /**
3820 * xe_vm_lock() - Lock the vm's dma_resv object
3821 * @vm: The struct xe_vm whose lock is to be locked
3822 * @intr: Whether to perform any wait interruptible
3823 *
3824 * Return: 0 on success, -EINTR if @intr is true and the wait for a
3825 * contended lock was interrupted. If @intr is false, the function
3826 * always returns 0.
3827 */
xe_vm_lock(struct xe_vm * vm,bool intr)3828 int xe_vm_lock(struct xe_vm *vm, bool intr)
3829 {
3830 if (intr)
3831 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3832
3833 return dma_resv_lock(xe_vm_resv(vm), NULL);
3834 }
3835
3836 /**
3837 * xe_vm_unlock() - Unlock the vm's dma_resv object
3838 * @vm: The struct xe_vm whose lock is to be released.
3839 *
3840 * Unlock a buffer object lock that was locked by xe_vm_lock().
3841 */
xe_vm_unlock(struct xe_vm * vm)3842 void xe_vm_unlock(struct xe_vm *vm)
3843 {
3844 dma_resv_unlock(xe_vm_resv(vm));
3845 }
3846
3847 /**
3848 * xe_vm_range_tilemask_tlb_invalidation - Issue a TLB invalidation on this tilemask for an
3849 * address range
3850 * @vm: The VM
3851 * @start: start address
3852 * @end: end address
3853 * @tile_mask: mask for which gt's issue tlb invalidation
3854 *
3855 * Issue a range based TLB invalidation for gt's in tilemask
3856 *
3857 * Returns 0 for success, negative error code otherwise.
3858 */
xe_vm_range_tilemask_tlb_invalidation(struct xe_vm * vm,u64 start,u64 end,u8 tile_mask)3859 int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start,
3860 u64 end, u8 tile_mask)
3861 {
3862 struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
3863 struct xe_tile *tile;
3864 u32 fence_id = 0;
3865 u8 id;
3866 int err;
3867
3868 if (!tile_mask)
3869 return 0;
3870
3871 for_each_tile(tile, vm->xe, id) {
3872 if (tile_mask & BIT(id)) {
3873 xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
3874 &fence[fence_id], true);
3875
3876 err = xe_gt_tlb_invalidation_range(tile->primary_gt,
3877 &fence[fence_id],
3878 start,
3879 end,
3880 vm->usm.asid);
3881 if (err)
3882 goto wait;
3883 ++fence_id;
3884
3885 if (!tile->media_gt)
3886 continue;
3887
3888 xe_gt_tlb_invalidation_fence_init(tile->media_gt,
3889 &fence[fence_id], true);
3890
3891 err = xe_gt_tlb_invalidation_range(tile->media_gt,
3892 &fence[fence_id],
3893 start,
3894 end,
3895 vm->usm.asid);
3896 if (err)
3897 goto wait;
3898 ++fence_id;
3899 }
3900 }
3901
3902 wait:
3903 for (id = 0; id < fence_id; ++id)
3904 xe_gt_tlb_invalidation_fence_wait(&fence[id]);
3905
3906 return err;
3907 }
3908
3909 /**
3910 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3911 * @vma: VMA to invalidate
3912 *
3913 * Walks a list of page tables leaves which it memset the entries owned by this
3914 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3915 * complete.
3916 *
3917 * Returns 0 for success, negative error code otherwise.
3918 */
xe_vm_invalidate_vma(struct xe_vma * vma)3919 int xe_vm_invalidate_vma(struct xe_vma *vma)
3920 {
3921 struct xe_device *xe = xe_vma_vm(vma)->xe;
3922 struct xe_vm *vm = xe_vma_vm(vma);
3923 struct xe_tile *tile;
3924 u8 tile_mask = 0;
3925 int ret = 0;
3926 u8 id;
3927
3928 xe_assert(xe, !xe_vma_is_null(vma));
3929 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma));
3930 trace_xe_vma_invalidate(vma);
3931
3932 vm_dbg(&vm->xe->drm,
3933 "INVALIDATE: addr=0x%016llx, range=0x%016llx",
3934 xe_vma_start(vma), xe_vma_size(vma));
3935
3936 /*
3937 * Check that we don't race with page-table updates, tile_invalidated
3938 * update is safe
3939 */
3940 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3941 if (xe_vma_is_userptr(vma)) {
3942 lockdep_assert(lockdep_is_held_type(&vm->userptr.notifier_lock, 0) ||
3943 (lockdep_is_held_type(&vm->userptr.notifier_lock, 1) &&
3944 lockdep_is_held(&xe_vm_resv(vm)->lock.base)));
3945
3946 WARN_ON_ONCE(!mmu_interval_check_retry
3947 (&to_userptr_vma(vma)->userptr.notifier,
3948 to_userptr_vma(vma)->userptr.notifier_seq));
3949 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm),
3950 DMA_RESV_USAGE_BOOKKEEP));
3951
3952 } else {
3953 xe_bo_assert_held(xe_vma_bo(vma));
3954 }
3955 }
3956
3957 for_each_tile(tile, xe, id)
3958 if (xe_pt_zap_ptes(tile, vma))
3959 tile_mask |= BIT(id);
3960
3961 xe_device_wmb(xe);
3962
3963 ret = xe_vm_range_tilemask_tlb_invalidation(xe_vma_vm(vma), xe_vma_start(vma),
3964 xe_vma_end(vma), tile_mask);
3965
3966 /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
3967 WRITE_ONCE(vma->tile_invalidated, vma->tile_mask);
3968
3969 return ret;
3970 }
3971
xe_vm_validate_protected(struct xe_vm * vm)3972 int xe_vm_validate_protected(struct xe_vm *vm)
3973 {
3974 struct drm_gpuva *gpuva;
3975 int err = 0;
3976
3977 if (!vm)
3978 return -ENODEV;
3979
3980 mutex_lock(&vm->snap_mutex);
3981
3982 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3983 struct xe_vma *vma = gpuva_to_vma(gpuva);
3984 struct xe_bo *bo = vma->gpuva.gem.obj ?
3985 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
3986
3987 if (!bo)
3988 continue;
3989
3990 if (xe_bo_is_protected(bo)) {
3991 err = xe_pxp_bo_key_check(vm->xe->pxp, bo);
3992 if (err)
3993 break;
3994 }
3995 }
3996
3997 mutex_unlock(&vm->snap_mutex);
3998 return err;
3999 }
4000
4001 struct xe_vm_snapshot {
4002 unsigned long num_snaps;
4003 struct {
4004 u64 ofs, bo_ofs;
4005 unsigned long len;
4006 struct xe_bo *bo;
4007 void *data;
4008 struct mm_struct *mm;
4009 } snap[];
4010 };
4011
xe_vm_snapshot_capture(struct xe_vm * vm)4012 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
4013 {
4014 unsigned long num_snaps = 0, i;
4015 struct xe_vm_snapshot *snap = NULL;
4016 struct drm_gpuva *gpuva;
4017
4018 if (!vm)
4019 return NULL;
4020
4021 mutex_lock(&vm->snap_mutex);
4022 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4023 if (gpuva->flags & XE_VMA_DUMPABLE)
4024 num_snaps++;
4025 }
4026
4027 if (num_snaps)
4028 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
4029 if (!snap) {
4030 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
4031 goto out_unlock;
4032 }
4033
4034 snap->num_snaps = num_snaps;
4035 i = 0;
4036 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4037 struct xe_vma *vma = gpuva_to_vma(gpuva);
4038 struct xe_bo *bo = vma->gpuva.gem.obj ?
4039 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4040
4041 if (!(gpuva->flags & XE_VMA_DUMPABLE))
4042 continue;
4043
4044 snap->snap[i].ofs = xe_vma_start(vma);
4045 snap->snap[i].len = xe_vma_size(vma);
4046 if (bo) {
4047 snap->snap[i].bo = xe_bo_get(bo);
4048 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
4049 } else if (xe_vma_is_userptr(vma)) {
4050 struct mm_struct *mm =
4051 to_userptr_vma(vma)->userptr.notifier.mm;
4052
4053 if (mmget_not_zero(mm))
4054 snap->snap[i].mm = mm;
4055 else
4056 snap->snap[i].data = ERR_PTR(-EFAULT);
4057
4058 snap->snap[i].bo_ofs = xe_vma_userptr(vma);
4059 } else {
4060 snap->snap[i].data = ERR_PTR(-ENOENT);
4061 }
4062 i++;
4063 }
4064
4065 out_unlock:
4066 mutex_unlock(&vm->snap_mutex);
4067 return snap;
4068 }
4069
xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot * snap)4070 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
4071 {
4072 if (IS_ERR_OR_NULL(snap))
4073 return;
4074
4075 for (int i = 0; i < snap->num_snaps; i++) {
4076 struct xe_bo *bo = snap->snap[i].bo;
4077 int err;
4078
4079 if (IS_ERR(snap->snap[i].data))
4080 continue;
4081
4082 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
4083 if (!snap->snap[i].data) {
4084 snap->snap[i].data = ERR_PTR(-ENOMEM);
4085 goto cleanup_bo;
4086 }
4087
4088 if (bo) {
4089 err = xe_bo_read(bo, snap->snap[i].bo_ofs,
4090 snap->snap[i].data, snap->snap[i].len);
4091 } else {
4092 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
4093
4094 kthread_use_mm(snap->snap[i].mm);
4095 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
4096 err = 0;
4097 else
4098 err = -EFAULT;
4099 kthread_unuse_mm(snap->snap[i].mm);
4100
4101 mmput(snap->snap[i].mm);
4102 snap->snap[i].mm = NULL;
4103 }
4104
4105 if (err) {
4106 kvfree(snap->snap[i].data);
4107 snap->snap[i].data = ERR_PTR(err);
4108 }
4109
4110 cleanup_bo:
4111 xe_bo_put(bo);
4112 snap->snap[i].bo = NULL;
4113 }
4114 }
4115
xe_vm_snapshot_print(struct xe_vm_snapshot * snap,struct drm_printer * p)4116 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
4117 {
4118 unsigned long i, j;
4119
4120 if (IS_ERR_OR_NULL(snap)) {
4121 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
4122 return;
4123 }
4124
4125 for (i = 0; i < snap->num_snaps; i++) {
4126 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
4127
4128 if (IS_ERR(snap->snap[i].data)) {
4129 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
4130 PTR_ERR(snap->snap[i].data));
4131 continue;
4132 }
4133
4134 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
4135
4136 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
4137 u32 *val = snap->snap[i].data + j;
4138 char dumped[ASCII85_BUFSZ];
4139
4140 drm_puts(p, ascii85_encode(*val, dumped));
4141 }
4142
4143 drm_puts(p, "\n");
4144
4145 if (drm_coredump_printer_is_full(p))
4146 return;
4147 }
4148 }
4149
xe_vm_snapshot_free(struct xe_vm_snapshot * snap)4150 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
4151 {
4152 unsigned long i;
4153
4154 if (IS_ERR_OR_NULL(snap))
4155 return;
4156
4157 for (i = 0; i < snap->num_snaps; i++) {
4158 if (!IS_ERR(snap->snap[i].data))
4159 kvfree(snap->snap[i].data);
4160 xe_bo_put(snap->snap[i].bo);
4161 if (snap->snap[i].mm)
4162 mmput(snap->snap[i].mm);
4163 }
4164 kvfree(snap);
4165 }
4166