1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10
11 #include <drm/drm_drv.h>
12 #include <drm/drm_exec.h>
13 #include <drm/drm_print.h>
14 #include <drm/ttm/ttm_tt.h>
15 #include <uapi/drm/xe_drm.h>
16 #include <linux/ascii85.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/mm.h>
20 #include <linux/swap.h>
21
22 #include <generated/xe_wa_oob.h>
23
24 #include "regs/xe_gtt_defs.h"
25 #include "xe_assert.h"
26 #include "xe_bo.h"
27 #include "xe_device.h"
28 #include "xe_drm_client.h"
29 #include "xe_exec_queue.h"
30 #include "xe_gt_pagefault.h"
31 #include "xe_gt_tlb_invalidation.h"
32 #include "xe_migrate.h"
33 #include "xe_pat.h"
34 #include "xe_pm.h"
35 #include "xe_preempt_fence.h"
36 #include "xe_pt.h"
37 #include "xe_pxp.h"
38 #include "xe_res_cursor.h"
39 #include "xe_svm.h"
40 #include "xe_sync.h"
41 #include "xe_trace_bo.h"
42 #include "xe_wa.h"
43 #include "xe_hmm.h"
44
xe_vm_obj(struct xe_vm * vm)45 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
46 {
47 return vm->gpuvm.r_obj;
48 }
49
50 /**
51 * xe_vma_userptr_check_repin() - Advisory check for repin needed
52 * @uvma: The userptr vma
53 *
54 * Check if the userptr vma has been invalidated since last successful
55 * repin. The check is advisory only and can the function can be called
56 * without the vm->userptr.notifier_lock held. There is no guarantee that the
57 * vma userptr will remain valid after a lockless check, so typically
58 * the call needs to be followed by a proper check under the notifier_lock.
59 *
60 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
61 */
xe_vma_userptr_check_repin(struct xe_userptr_vma * uvma)62 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
63 {
64 return mmu_interval_check_retry(&uvma->userptr.notifier,
65 uvma->userptr.notifier_seq) ?
66 -EAGAIN : 0;
67 }
68
xe_vma_userptr_pin_pages(struct xe_userptr_vma * uvma)69 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
70 {
71 struct xe_vma *vma = &uvma->vma;
72 struct xe_vm *vm = xe_vma_vm(vma);
73 struct xe_device *xe = vm->xe;
74
75 lockdep_assert_held(&vm->lock);
76 xe_assert(xe, xe_vma_is_userptr(vma));
77
78 return xe_hmm_userptr_populate_range(uvma, false);
79 }
80
preempt_fences_waiting(struct xe_vm * vm)81 static bool preempt_fences_waiting(struct xe_vm *vm)
82 {
83 struct xe_exec_queue *q;
84
85 lockdep_assert_held(&vm->lock);
86 xe_vm_assert_held(vm);
87
88 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
89 if (!q->lr.pfence ||
90 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
91 &q->lr.pfence->flags)) {
92 return true;
93 }
94 }
95
96 return false;
97 }
98
free_preempt_fences(struct list_head * list)99 static void free_preempt_fences(struct list_head *list)
100 {
101 struct list_head *link, *next;
102
103 list_for_each_safe(link, next, list)
104 xe_preempt_fence_free(to_preempt_fence_from_link(link));
105 }
106
alloc_preempt_fences(struct xe_vm * vm,struct list_head * list,unsigned int * count)107 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
108 unsigned int *count)
109 {
110 lockdep_assert_held(&vm->lock);
111 xe_vm_assert_held(vm);
112
113 if (*count >= vm->preempt.num_exec_queues)
114 return 0;
115
116 for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
117 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
118
119 if (IS_ERR(pfence))
120 return PTR_ERR(pfence);
121
122 list_move_tail(xe_preempt_fence_link(pfence), list);
123 }
124
125 return 0;
126 }
127
wait_for_existing_preempt_fences(struct xe_vm * vm)128 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
129 {
130 struct xe_exec_queue *q;
131
132 xe_vm_assert_held(vm);
133
134 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
135 if (q->lr.pfence) {
136 long timeout = dma_fence_wait(q->lr.pfence, false);
137
138 /* Only -ETIME on fence indicates VM needs to be killed */
139 if (timeout < 0 || q->lr.pfence->error == -ETIME)
140 return -ETIME;
141
142 dma_fence_put(q->lr.pfence);
143 q->lr.pfence = NULL;
144 }
145 }
146
147 return 0;
148 }
149
xe_vm_is_idle(struct xe_vm * vm)150 static bool xe_vm_is_idle(struct xe_vm *vm)
151 {
152 struct xe_exec_queue *q;
153
154 xe_vm_assert_held(vm);
155 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
156 if (!xe_exec_queue_is_idle(q))
157 return false;
158 }
159
160 return true;
161 }
162
arm_preempt_fences(struct xe_vm * vm,struct list_head * list)163 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
164 {
165 struct list_head *link;
166 struct xe_exec_queue *q;
167
168 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
169 struct dma_fence *fence;
170
171 link = list->next;
172 xe_assert(vm->xe, link != list);
173
174 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
175 q, q->lr.context,
176 ++q->lr.seqno);
177 dma_fence_put(q->lr.pfence);
178 q->lr.pfence = fence;
179 }
180 }
181
add_preempt_fences(struct xe_vm * vm,struct xe_bo * bo)182 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
183 {
184 struct xe_exec_queue *q;
185 int err;
186
187 xe_bo_assert_held(bo);
188
189 if (!vm->preempt.num_exec_queues)
190 return 0;
191
192 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
193 if (err)
194 return err;
195
196 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
197 if (q->lr.pfence) {
198 dma_resv_add_fence(bo->ttm.base.resv,
199 q->lr.pfence,
200 DMA_RESV_USAGE_BOOKKEEP);
201 }
202
203 return 0;
204 }
205
resume_and_reinstall_preempt_fences(struct xe_vm * vm,struct drm_exec * exec)206 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
207 struct drm_exec *exec)
208 {
209 struct xe_exec_queue *q;
210
211 lockdep_assert_held(&vm->lock);
212 xe_vm_assert_held(vm);
213
214 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
215 q->ops->resume(q);
216
217 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
218 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
219 }
220 }
221
xe_vm_add_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)222 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
223 {
224 struct drm_gpuvm_exec vm_exec = {
225 .vm = &vm->gpuvm,
226 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
227 .num_fences = 1,
228 };
229 struct drm_exec *exec = &vm_exec.exec;
230 struct dma_fence *pfence;
231 int err;
232 bool wait;
233
234 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
235
236 down_write(&vm->lock);
237 err = drm_gpuvm_exec_lock(&vm_exec);
238 if (err)
239 goto out_up_write;
240
241 pfence = xe_preempt_fence_create(q, q->lr.context,
242 ++q->lr.seqno);
243 if (!pfence) {
244 err = -ENOMEM;
245 goto out_fini;
246 }
247
248 list_add(&q->lr.link, &vm->preempt.exec_queues);
249 ++vm->preempt.num_exec_queues;
250 q->lr.pfence = pfence;
251
252 down_read(&vm->userptr.notifier_lock);
253
254 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
255 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
256
257 /*
258 * Check to see if a preemption on VM is in flight or userptr
259 * invalidation, if so trigger this preempt fence to sync state with
260 * other preempt fences on the VM.
261 */
262 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
263 if (wait)
264 dma_fence_enable_sw_signaling(pfence);
265
266 up_read(&vm->userptr.notifier_lock);
267
268 out_fini:
269 drm_exec_fini(exec);
270 out_up_write:
271 up_write(&vm->lock);
272
273 return err;
274 }
275 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO);
276
277 /**
278 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
279 * @vm: The VM.
280 * @q: The exec_queue
281 *
282 * Note that this function might be called multiple times on the same queue.
283 */
xe_vm_remove_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)284 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
285 {
286 if (!xe_vm_in_preempt_fence_mode(vm))
287 return;
288
289 down_write(&vm->lock);
290 if (!list_empty(&q->lr.link)) {
291 list_del_init(&q->lr.link);
292 --vm->preempt.num_exec_queues;
293 }
294 if (q->lr.pfence) {
295 dma_fence_enable_sw_signaling(q->lr.pfence);
296 dma_fence_put(q->lr.pfence);
297 q->lr.pfence = NULL;
298 }
299 up_write(&vm->lock);
300 }
301
302 /**
303 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
304 * that need repinning.
305 * @vm: The VM.
306 *
307 * This function checks for whether the VM has userptrs that need repinning,
308 * and provides a release-type barrier on the userptr.notifier_lock after
309 * checking.
310 *
311 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
312 */
__xe_vm_userptr_needs_repin(struct xe_vm * vm)313 int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
314 {
315 lockdep_assert_held_read(&vm->userptr.notifier_lock);
316
317 return (list_empty(&vm->userptr.repin_list) &&
318 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
319 }
320
321 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
322
323 /**
324 * xe_vm_kill() - VM Kill
325 * @vm: The VM.
326 * @unlocked: Flag indicates the VM's dma-resv is not held
327 *
328 * Kill the VM by setting banned flag indicated VM is no longer available for
329 * use. If in preempt fence mode, also kill all exec queue attached to the VM.
330 */
xe_vm_kill(struct xe_vm * vm,bool unlocked)331 void xe_vm_kill(struct xe_vm *vm, bool unlocked)
332 {
333 struct xe_exec_queue *q;
334
335 lockdep_assert_held(&vm->lock);
336
337 if (unlocked)
338 xe_vm_lock(vm, false);
339
340 vm->flags |= XE_VM_FLAG_BANNED;
341 trace_xe_vm_kill(vm);
342
343 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
344 q->ops->kill(q);
345
346 if (unlocked)
347 xe_vm_unlock(vm);
348
349 /* TODO: Inform user the VM is banned */
350 }
351
352 /**
353 * xe_vm_validate_should_retry() - Whether to retry after a validate error.
354 * @exec: The drm_exec object used for locking before validation.
355 * @err: The error returned from ttm_bo_validate().
356 * @end: A ktime_t cookie that should be set to 0 before first use and
357 * that should be reused on subsequent calls.
358 *
359 * With multiple active VMs, under memory pressure, it is possible that
360 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
361 * Until ttm properly handles locking in such scenarios, best thing the
362 * driver can do is retry with a timeout. Check if that is necessary, and
363 * if so unlock the drm_exec's objects while keeping the ticket to prepare
364 * for a rerun.
365 *
366 * Return: true if a retry after drm_exec_init() is recommended;
367 * false otherwise.
368 */
xe_vm_validate_should_retry(struct drm_exec * exec,int err,ktime_t * end)369 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
370 {
371 ktime_t cur;
372
373 if (err != -ENOMEM)
374 return false;
375
376 cur = ktime_get();
377 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
378 if (!ktime_before(cur, *end))
379 return false;
380
381 msleep(20);
382 return true;
383 }
384
xe_gpuvm_validate(struct drm_gpuvm_bo * vm_bo,struct drm_exec * exec)385 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
386 {
387 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
388 struct drm_gpuva *gpuva;
389 int ret;
390
391 lockdep_assert_held(&vm->lock);
392 drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
393 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
394 &vm->rebind_list);
395
396 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
397 if (ret)
398 return ret;
399
400 vm_bo->evicted = false;
401 return 0;
402 }
403
404 /**
405 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
406 * @vm: The vm for which we are rebinding.
407 * @exec: The struct drm_exec with the locked GEM objects.
408 * @num_fences: The number of fences to reserve for the operation, not
409 * including rebinds and validations.
410 *
411 * Validates all evicted gem objects and rebinds their vmas. Note that
412 * rebindings may cause evictions and hence the validation-rebind
413 * sequence is rerun until there are no more objects to validate.
414 *
415 * Return: 0 on success, negative error code on error. In particular,
416 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
417 * the drm_exec transaction needs to be restarted.
418 */
xe_vm_validate_rebind(struct xe_vm * vm,struct drm_exec * exec,unsigned int num_fences)419 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
420 unsigned int num_fences)
421 {
422 struct drm_gem_object *obj;
423 unsigned long index;
424 int ret;
425
426 do {
427 ret = drm_gpuvm_validate(&vm->gpuvm, exec);
428 if (ret)
429 return ret;
430
431 ret = xe_vm_rebind(vm, false);
432 if (ret)
433 return ret;
434 } while (!list_empty(&vm->gpuvm.evict.list));
435
436 drm_exec_for_each_locked_object(exec, index, obj) {
437 ret = dma_resv_reserve_fences(obj->resv, num_fences);
438 if (ret)
439 return ret;
440 }
441
442 return 0;
443 }
444
xe_preempt_work_begin(struct drm_exec * exec,struct xe_vm * vm,bool * done)445 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
446 bool *done)
447 {
448 int err;
449
450 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
451 if (err)
452 return err;
453
454 if (xe_vm_is_idle(vm)) {
455 vm->preempt.rebind_deactivated = true;
456 *done = true;
457 return 0;
458 }
459
460 if (!preempt_fences_waiting(vm)) {
461 *done = true;
462 return 0;
463 }
464
465 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
466 if (err)
467 return err;
468
469 err = wait_for_existing_preempt_fences(vm);
470 if (err)
471 return err;
472
473 /*
474 * Add validation and rebinding to the locking loop since both can
475 * cause evictions which may require blocing dma_resv locks.
476 * The fence reservation here is intended for the new preempt fences
477 * we attach at the end of the rebind work.
478 */
479 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
480 }
481
preempt_rebind_work_func(struct work_struct * w)482 static void preempt_rebind_work_func(struct work_struct *w)
483 {
484 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
485 struct drm_exec exec;
486 unsigned int fence_count = 0;
487 LIST_HEAD(preempt_fences);
488 ktime_t end = 0;
489 int err = 0;
490 long wait;
491 int __maybe_unused tries = 0;
492
493 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
494 trace_xe_vm_rebind_worker_enter(vm);
495
496 down_write(&vm->lock);
497
498 if (xe_vm_is_closed_or_banned(vm)) {
499 up_write(&vm->lock);
500 trace_xe_vm_rebind_worker_exit(vm);
501 return;
502 }
503
504 retry:
505 if (xe_vm_userptr_check_repin(vm)) {
506 err = xe_vm_userptr_pin(vm);
507 if (err)
508 goto out_unlock_outer;
509 }
510
511 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
512
513 drm_exec_until_all_locked(&exec) {
514 bool done = false;
515
516 err = xe_preempt_work_begin(&exec, vm, &done);
517 drm_exec_retry_on_contention(&exec);
518 if (err || done) {
519 drm_exec_fini(&exec);
520 if (err && xe_vm_validate_should_retry(&exec, err, &end))
521 err = -EAGAIN;
522
523 goto out_unlock_outer;
524 }
525 }
526
527 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
528 if (err)
529 goto out_unlock;
530
531 err = xe_vm_rebind(vm, true);
532 if (err)
533 goto out_unlock;
534
535 /* Wait on rebinds and munmap style VM unbinds */
536 wait = dma_resv_wait_timeout(xe_vm_resv(vm),
537 DMA_RESV_USAGE_KERNEL,
538 false, MAX_SCHEDULE_TIMEOUT);
539 if (wait <= 0) {
540 err = -ETIME;
541 goto out_unlock;
542 }
543
544 #define retry_required(__tries, __vm) \
545 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
546 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
547 __xe_vm_userptr_needs_repin(__vm))
548
549 down_read(&vm->userptr.notifier_lock);
550 if (retry_required(tries, vm)) {
551 up_read(&vm->userptr.notifier_lock);
552 err = -EAGAIN;
553 goto out_unlock;
554 }
555
556 #undef retry_required
557
558 spin_lock(&vm->xe->ttm.lru_lock);
559 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
560 spin_unlock(&vm->xe->ttm.lru_lock);
561
562 /* Point of no return. */
563 arm_preempt_fences(vm, &preempt_fences);
564 resume_and_reinstall_preempt_fences(vm, &exec);
565 up_read(&vm->userptr.notifier_lock);
566
567 out_unlock:
568 drm_exec_fini(&exec);
569 out_unlock_outer:
570 if (err == -EAGAIN) {
571 trace_xe_vm_rebind_worker_retry(vm);
572 goto retry;
573 }
574
575 if (err) {
576 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
577 xe_vm_kill(vm, true);
578 }
579 up_write(&vm->lock);
580
581 free_preempt_fences(&preempt_fences);
582
583 trace_xe_vm_rebind_worker_exit(vm);
584 }
585
__vma_userptr_invalidate(struct xe_vm * vm,struct xe_userptr_vma * uvma)586 static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma)
587 {
588 struct xe_userptr *userptr = &uvma->userptr;
589 struct xe_vma *vma = &uvma->vma;
590 struct dma_resv_iter cursor;
591 struct dma_fence *fence;
592 long err;
593
594 /*
595 * Tell exec and rebind worker they need to repin and rebind this
596 * userptr.
597 */
598 if (!xe_vm_in_fault_mode(vm) &&
599 !(vma->gpuva.flags & XE_VMA_DESTROYED)) {
600 spin_lock(&vm->userptr.invalidated_lock);
601 list_move_tail(&userptr->invalidate_link,
602 &vm->userptr.invalidated);
603 spin_unlock(&vm->userptr.invalidated_lock);
604 }
605
606 /*
607 * Preempt fences turn into schedule disables, pipeline these.
608 * Note that even in fault mode, we need to wait for binds and
609 * unbinds to complete, and those are attached as BOOKMARK fences
610 * to the vm.
611 */
612 dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
613 DMA_RESV_USAGE_BOOKKEEP);
614 dma_resv_for_each_fence_unlocked(&cursor, fence)
615 dma_fence_enable_sw_signaling(fence);
616 dma_resv_iter_end(&cursor);
617
618 err = dma_resv_wait_timeout(xe_vm_resv(vm),
619 DMA_RESV_USAGE_BOOKKEEP,
620 false, MAX_SCHEDULE_TIMEOUT);
621 XE_WARN_ON(err <= 0);
622
623 if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
624 err = xe_vm_invalidate_vma(vma);
625 XE_WARN_ON(err);
626 }
627
628 xe_hmm_userptr_unmap(uvma);
629 }
630
vma_userptr_invalidate(struct mmu_interval_notifier * mni,const struct mmu_notifier_range * range,unsigned long cur_seq)631 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
632 const struct mmu_notifier_range *range,
633 unsigned long cur_seq)
634 {
635 struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier);
636 struct xe_vma *vma = &uvma->vma;
637 struct xe_vm *vm = xe_vma_vm(vma);
638
639 xe_assert(vm->xe, xe_vma_is_userptr(vma));
640 trace_xe_vma_userptr_invalidate(vma);
641
642 if (!mmu_notifier_range_blockable(range))
643 return false;
644
645 vm_dbg(&xe_vma_vm(vma)->xe->drm,
646 "NOTIFIER: addr=0x%016llx, range=0x%016llx",
647 xe_vma_start(vma), xe_vma_size(vma));
648
649 down_write(&vm->userptr.notifier_lock);
650 mmu_interval_set_seq(mni, cur_seq);
651
652 __vma_userptr_invalidate(vm, uvma);
653 up_write(&vm->userptr.notifier_lock);
654 trace_xe_vma_userptr_invalidate_complete(vma);
655
656 return true;
657 }
658
659 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
660 .invalidate = vma_userptr_invalidate,
661 };
662
663 #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
664 /**
665 * xe_vma_userptr_force_invalidate() - force invalidate a userptr
666 * @uvma: The userptr vma to invalidate
667 *
668 * Perform a forced userptr invalidation for testing purposes.
669 */
xe_vma_userptr_force_invalidate(struct xe_userptr_vma * uvma)670 void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
671 {
672 struct xe_vm *vm = xe_vma_vm(&uvma->vma);
673
674 /* Protect against concurrent userptr pinning */
675 lockdep_assert_held(&vm->lock);
676 /* Protect against concurrent notifiers */
677 lockdep_assert_held(&vm->userptr.notifier_lock);
678 /*
679 * Protect against concurrent instances of this function and
680 * the critical exec sections
681 */
682 xe_vm_assert_held(vm);
683
684 if (!mmu_interval_read_retry(&uvma->userptr.notifier,
685 uvma->userptr.notifier_seq))
686 uvma->userptr.notifier_seq -= 2;
687 __vma_userptr_invalidate(vm, uvma);
688 }
689 #endif
690
xe_vm_userptr_pin(struct xe_vm * vm)691 int xe_vm_userptr_pin(struct xe_vm *vm)
692 {
693 struct xe_userptr_vma *uvma, *next;
694 int err = 0;
695
696 xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
697 lockdep_assert_held_write(&vm->lock);
698
699 /* Collect invalidated userptrs */
700 spin_lock(&vm->userptr.invalidated_lock);
701 xe_assert(vm->xe, list_empty(&vm->userptr.repin_list));
702 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
703 userptr.invalidate_link) {
704 list_del_init(&uvma->userptr.invalidate_link);
705 list_add_tail(&uvma->userptr.repin_link,
706 &vm->userptr.repin_list);
707 }
708 spin_unlock(&vm->userptr.invalidated_lock);
709
710 /* Pin and move to bind list */
711 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
712 userptr.repin_link) {
713 err = xe_vma_userptr_pin_pages(uvma);
714 if (err == -EFAULT) {
715 list_del_init(&uvma->userptr.repin_link);
716 /*
717 * We might have already done the pin once already, but
718 * then had to retry before the re-bind happened, due
719 * some other condition in the caller, but in the
720 * meantime the userptr got dinged by the notifier such
721 * that we need to revalidate here, but this time we hit
722 * the EFAULT. In such a case make sure we remove
723 * ourselves from the rebind list to avoid going down in
724 * flames.
725 */
726 if (!list_empty(&uvma->vma.combined_links.rebind))
727 list_del_init(&uvma->vma.combined_links.rebind);
728
729 /* Wait for pending binds */
730 xe_vm_lock(vm, false);
731 dma_resv_wait_timeout(xe_vm_resv(vm),
732 DMA_RESV_USAGE_BOOKKEEP,
733 false, MAX_SCHEDULE_TIMEOUT);
734
735 err = xe_vm_invalidate_vma(&uvma->vma);
736 xe_vm_unlock(vm);
737 if (err)
738 break;
739 } else {
740 if (err)
741 break;
742
743 list_del_init(&uvma->userptr.repin_link);
744 list_move_tail(&uvma->vma.combined_links.rebind,
745 &vm->rebind_list);
746 }
747 }
748
749 if (err) {
750 down_write(&vm->userptr.notifier_lock);
751 spin_lock(&vm->userptr.invalidated_lock);
752 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
753 userptr.repin_link) {
754 list_del_init(&uvma->userptr.repin_link);
755 list_move_tail(&uvma->userptr.invalidate_link,
756 &vm->userptr.invalidated);
757 }
758 spin_unlock(&vm->userptr.invalidated_lock);
759 up_write(&vm->userptr.notifier_lock);
760 }
761 return err;
762 }
763
764 /**
765 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
766 * that need repinning.
767 * @vm: The VM.
768 *
769 * This function does an advisory check for whether the VM has userptrs that
770 * need repinning.
771 *
772 * Return: 0 if there are no indications of userptrs needing repinning,
773 * -EAGAIN if there are.
774 */
xe_vm_userptr_check_repin(struct xe_vm * vm)775 int xe_vm_userptr_check_repin(struct xe_vm *vm)
776 {
777 return (list_empty_careful(&vm->userptr.repin_list) &&
778 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
779 }
780
xe_vma_ops_alloc(struct xe_vma_ops * vops,bool array_of_binds)781 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
782 {
783 int i;
784
785 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
786 if (!vops->pt_update_ops[i].num_ops)
787 continue;
788
789 vops->pt_update_ops[i].ops =
790 kmalloc_array(vops->pt_update_ops[i].num_ops,
791 sizeof(*vops->pt_update_ops[i].ops),
792 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
793 if (!vops->pt_update_ops[i].ops)
794 return array_of_binds ? -ENOBUFS : -ENOMEM;
795 }
796
797 return 0;
798 }
799 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
800
xe_vma_ops_fini(struct xe_vma_ops * vops)801 static void xe_vma_ops_fini(struct xe_vma_ops *vops)
802 {
803 int i;
804
805 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
806 kfree(vops->pt_update_ops[i].ops);
807 }
808
xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops * vops,u8 tile_mask)809 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask)
810 {
811 int i;
812
813 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
814 if (BIT(i) & tile_mask)
815 ++vops->pt_update_ops[i].num_ops;
816 }
817
xe_vm_populate_rebind(struct xe_vma_op * op,struct xe_vma * vma,u8 tile_mask)818 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
819 u8 tile_mask)
820 {
821 INIT_LIST_HEAD(&op->link);
822 op->tile_mask = tile_mask;
823 op->base.op = DRM_GPUVA_OP_MAP;
824 op->base.map.va.addr = vma->gpuva.va.addr;
825 op->base.map.va.range = vma->gpuva.va.range;
826 op->base.map.gem.obj = vma->gpuva.gem.obj;
827 op->base.map.gem.offset = vma->gpuva.gem.offset;
828 op->map.vma = vma;
829 op->map.immediate = true;
830 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
831 op->map.is_null = xe_vma_is_null(vma);
832 }
833
xe_vm_ops_add_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,u8 tile_mask)834 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
835 u8 tile_mask)
836 {
837 struct xe_vma_op *op;
838
839 op = kzalloc(sizeof(*op), GFP_KERNEL);
840 if (!op)
841 return -ENOMEM;
842
843 xe_vm_populate_rebind(op, vma, tile_mask);
844 list_add_tail(&op->link, &vops->list);
845 xe_vma_ops_incr_pt_update_ops(vops, tile_mask);
846
847 return 0;
848 }
849
850 static struct dma_fence *ops_execute(struct xe_vm *vm,
851 struct xe_vma_ops *vops);
852 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
853 struct xe_exec_queue *q,
854 struct xe_sync_entry *syncs, u32 num_syncs);
855
xe_vm_rebind(struct xe_vm * vm,bool rebind_worker)856 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
857 {
858 struct dma_fence *fence;
859 struct xe_vma *vma, *next;
860 struct xe_vma_ops vops;
861 struct xe_vma_op *op, *next_op;
862 int err, i;
863
864 lockdep_assert_held(&vm->lock);
865 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
866 list_empty(&vm->rebind_list))
867 return 0;
868
869 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
870 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
871 vops.pt_update_ops[i].wait_vm_bookkeep = true;
872
873 xe_vm_assert_held(vm);
874 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
875 xe_assert(vm->xe, vma->tile_present);
876
877 if (rebind_worker)
878 trace_xe_vma_rebind_worker(vma);
879 else
880 trace_xe_vma_rebind_exec(vma);
881
882 err = xe_vm_ops_add_rebind(&vops, vma,
883 vma->tile_present);
884 if (err)
885 goto free_ops;
886 }
887
888 err = xe_vma_ops_alloc(&vops, false);
889 if (err)
890 goto free_ops;
891
892 fence = ops_execute(vm, &vops);
893 if (IS_ERR(fence)) {
894 err = PTR_ERR(fence);
895 } else {
896 dma_fence_put(fence);
897 list_for_each_entry_safe(vma, next, &vm->rebind_list,
898 combined_links.rebind)
899 list_del_init(&vma->combined_links.rebind);
900 }
901 free_ops:
902 list_for_each_entry_safe(op, next_op, &vops.list, link) {
903 list_del(&op->link);
904 kfree(op);
905 }
906 xe_vma_ops_fini(&vops);
907
908 return err;
909 }
910
xe_vma_rebind(struct xe_vm * vm,struct xe_vma * vma,u8 tile_mask)911 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
912 {
913 struct dma_fence *fence = NULL;
914 struct xe_vma_ops vops;
915 struct xe_vma_op *op, *next_op;
916 struct xe_tile *tile;
917 u8 id;
918 int err;
919
920 lockdep_assert_held(&vm->lock);
921 xe_vm_assert_held(vm);
922 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
923
924 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
925 for_each_tile(tile, vm->xe, id) {
926 vops.pt_update_ops[id].wait_vm_bookkeep = true;
927 vops.pt_update_ops[tile->id].q =
928 xe_tile_migrate_exec_queue(tile);
929 }
930
931 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
932 if (err)
933 return ERR_PTR(err);
934
935 err = xe_vma_ops_alloc(&vops, false);
936 if (err) {
937 fence = ERR_PTR(err);
938 goto free_ops;
939 }
940
941 fence = ops_execute(vm, &vops);
942
943 free_ops:
944 list_for_each_entry_safe(op, next_op, &vops.list, link) {
945 list_del(&op->link);
946 kfree(op);
947 }
948 xe_vma_ops_fini(&vops);
949
950 return fence;
951 }
952
xe_vm_populate_range_rebind(struct xe_vma_op * op,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)953 static void xe_vm_populate_range_rebind(struct xe_vma_op *op,
954 struct xe_vma *vma,
955 struct xe_svm_range *range,
956 u8 tile_mask)
957 {
958 INIT_LIST_HEAD(&op->link);
959 op->tile_mask = tile_mask;
960 op->base.op = DRM_GPUVA_OP_DRIVER;
961 op->subop = XE_VMA_SUBOP_MAP_RANGE;
962 op->map_range.vma = vma;
963 op->map_range.range = range;
964 }
965
966 static int
xe_vm_ops_add_range_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)967 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops,
968 struct xe_vma *vma,
969 struct xe_svm_range *range,
970 u8 tile_mask)
971 {
972 struct xe_vma_op *op;
973
974 op = kzalloc(sizeof(*op), GFP_KERNEL);
975 if (!op)
976 return -ENOMEM;
977
978 xe_vm_populate_range_rebind(op, vma, range, tile_mask);
979 list_add_tail(&op->link, &vops->list);
980 xe_vma_ops_incr_pt_update_ops(vops, tile_mask);
981
982 return 0;
983 }
984
985 /**
986 * xe_vm_range_rebind() - VM range (re)bind
987 * @vm: The VM which the range belongs to.
988 * @vma: The VMA which the range belongs to.
989 * @range: SVM range to rebind.
990 * @tile_mask: Tile mask to bind the range to.
991 *
992 * (re)bind SVM range setting up GPU page tables for the range.
993 *
994 * Return: dma fence for rebind to signal completion on succees, ERR_PTR on
995 * failure
996 */
xe_vm_range_rebind(struct xe_vm * vm,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)997 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
998 struct xe_vma *vma,
999 struct xe_svm_range *range,
1000 u8 tile_mask)
1001 {
1002 struct dma_fence *fence = NULL;
1003 struct xe_vma_ops vops;
1004 struct xe_vma_op *op, *next_op;
1005 struct xe_tile *tile;
1006 u8 id;
1007 int err;
1008
1009 lockdep_assert_held(&vm->lock);
1010 xe_vm_assert_held(vm);
1011 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
1012 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
1013
1014 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
1015 for_each_tile(tile, vm->xe, id) {
1016 vops.pt_update_ops[id].wait_vm_bookkeep = true;
1017 vops.pt_update_ops[tile->id].q =
1018 xe_tile_migrate_exec_queue(tile);
1019 }
1020
1021 err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask);
1022 if (err)
1023 return ERR_PTR(err);
1024
1025 err = xe_vma_ops_alloc(&vops, false);
1026 if (err) {
1027 fence = ERR_PTR(err);
1028 goto free_ops;
1029 }
1030
1031 fence = ops_execute(vm, &vops);
1032
1033 free_ops:
1034 list_for_each_entry_safe(op, next_op, &vops.list, link) {
1035 list_del(&op->link);
1036 kfree(op);
1037 }
1038 xe_vma_ops_fini(&vops);
1039
1040 return fence;
1041 }
1042
xe_vm_populate_range_unbind(struct xe_vma_op * op,struct xe_svm_range * range)1043 static void xe_vm_populate_range_unbind(struct xe_vma_op *op,
1044 struct xe_svm_range *range)
1045 {
1046 INIT_LIST_HEAD(&op->link);
1047 op->tile_mask = range->tile_present;
1048 op->base.op = DRM_GPUVA_OP_DRIVER;
1049 op->subop = XE_VMA_SUBOP_UNMAP_RANGE;
1050 op->unmap_range.range = range;
1051 }
1052
1053 static int
xe_vm_ops_add_range_unbind(struct xe_vma_ops * vops,struct xe_svm_range * range)1054 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops,
1055 struct xe_svm_range *range)
1056 {
1057 struct xe_vma_op *op;
1058
1059 op = kzalloc(sizeof(*op), GFP_KERNEL);
1060 if (!op)
1061 return -ENOMEM;
1062
1063 xe_vm_populate_range_unbind(op, range);
1064 list_add_tail(&op->link, &vops->list);
1065 xe_vma_ops_incr_pt_update_ops(vops, range->tile_present);
1066
1067 return 0;
1068 }
1069
1070 /**
1071 * xe_vm_range_unbind() - VM range unbind
1072 * @vm: The VM which the range belongs to.
1073 * @range: SVM range to rebind.
1074 *
1075 * Unbind SVM range removing the GPU page tables for the range.
1076 *
1077 * Return: dma fence for unbind to signal completion on succees, ERR_PTR on
1078 * failure
1079 */
xe_vm_range_unbind(struct xe_vm * vm,struct xe_svm_range * range)1080 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
1081 struct xe_svm_range *range)
1082 {
1083 struct dma_fence *fence = NULL;
1084 struct xe_vma_ops vops;
1085 struct xe_vma_op *op, *next_op;
1086 struct xe_tile *tile;
1087 u8 id;
1088 int err;
1089
1090 lockdep_assert_held(&vm->lock);
1091 xe_vm_assert_held(vm);
1092 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
1093
1094 if (!range->tile_present)
1095 return dma_fence_get_stub();
1096
1097 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
1098 for_each_tile(tile, vm->xe, id) {
1099 vops.pt_update_ops[id].wait_vm_bookkeep = true;
1100 vops.pt_update_ops[tile->id].q =
1101 xe_tile_migrate_exec_queue(tile);
1102 }
1103
1104 err = xe_vm_ops_add_range_unbind(&vops, range);
1105 if (err)
1106 return ERR_PTR(err);
1107
1108 err = xe_vma_ops_alloc(&vops, false);
1109 if (err) {
1110 fence = ERR_PTR(err);
1111 goto free_ops;
1112 }
1113
1114 fence = ops_execute(vm, &vops);
1115
1116 free_ops:
1117 list_for_each_entry_safe(op, next_op, &vops.list, link) {
1118 list_del(&op->link);
1119 kfree(op);
1120 }
1121 xe_vma_ops_fini(&vops);
1122
1123 return fence;
1124 }
1125
xe_vma_free(struct xe_vma * vma)1126 static void xe_vma_free(struct xe_vma *vma)
1127 {
1128 if (xe_vma_is_userptr(vma))
1129 kfree(to_userptr_vma(vma));
1130 else
1131 kfree(vma);
1132 }
1133
1134 #define VMA_CREATE_FLAG_READ_ONLY BIT(0)
1135 #define VMA_CREATE_FLAG_IS_NULL BIT(1)
1136 #define VMA_CREATE_FLAG_DUMPABLE BIT(2)
1137 #define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3)
1138
xe_vma_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 start,u64 end,u16 pat_index,unsigned int flags)1139 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
1140 struct xe_bo *bo,
1141 u64 bo_offset_or_userptr,
1142 u64 start, u64 end,
1143 u16 pat_index, unsigned int flags)
1144 {
1145 struct xe_vma *vma;
1146 struct xe_tile *tile;
1147 u8 id;
1148 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
1149 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
1150 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
1151 bool is_cpu_addr_mirror =
1152 (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR);
1153
1154 xe_assert(vm->xe, start < end);
1155 xe_assert(vm->xe, end < vm->size);
1156
1157 /*
1158 * Allocate and ensure that the xe_vma_is_userptr() return
1159 * matches what was allocated.
1160 */
1161 if (!bo && !is_null && !is_cpu_addr_mirror) {
1162 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
1163
1164 if (!uvma)
1165 return ERR_PTR(-ENOMEM);
1166
1167 vma = &uvma->vma;
1168 } else {
1169 vma = kzalloc(sizeof(*vma), GFP_KERNEL);
1170 if (!vma)
1171 return ERR_PTR(-ENOMEM);
1172
1173 if (is_cpu_addr_mirror)
1174 vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR;
1175 if (is_null)
1176 vma->gpuva.flags |= DRM_GPUVA_SPARSE;
1177 if (bo)
1178 vma->gpuva.gem.obj = &bo->ttm.base;
1179 }
1180
1181 INIT_LIST_HEAD(&vma->combined_links.rebind);
1182
1183 INIT_LIST_HEAD(&vma->gpuva.gem.entry);
1184 vma->gpuva.vm = &vm->gpuvm;
1185 vma->gpuva.va.addr = start;
1186 vma->gpuva.va.range = end - start + 1;
1187 if (read_only)
1188 vma->gpuva.flags |= XE_VMA_READ_ONLY;
1189 if (dumpable)
1190 vma->gpuva.flags |= XE_VMA_DUMPABLE;
1191
1192 for_each_tile(tile, vm->xe, id)
1193 vma->tile_mask |= 0x1 << id;
1194
1195 if (vm->xe->info.has_atomic_enable_pte_bit)
1196 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
1197
1198 vma->pat_index = pat_index;
1199
1200 if (bo) {
1201 struct drm_gpuvm_bo *vm_bo;
1202
1203 xe_bo_assert_held(bo);
1204
1205 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
1206 if (IS_ERR(vm_bo)) {
1207 xe_vma_free(vma);
1208 return ERR_CAST(vm_bo);
1209 }
1210
1211 drm_gpuvm_bo_extobj_add(vm_bo);
1212 drm_gem_object_get(&bo->ttm.base);
1213 vma->gpuva.gem.offset = bo_offset_or_userptr;
1214 drm_gpuva_link(&vma->gpuva, vm_bo);
1215 drm_gpuvm_bo_put(vm_bo);
1216 } else /* userptr or null */ {
1217 if (!is_null && !is_cpu_addr_mirror) {
1218 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
1219 u64 size = end - start + 1;
1220 int err;
1221
1222 INIT_LIST_HEAD(&userptr->invalidate_link);
1223 INIT_LIST_HEAD(&userptr->repin_link);
1224 vma->gpuva.gem.offset = bo_offset_or_userptr;
1225 mutex_init(&userptr->unmap_mutex);
1226
1227 err = mmu_interval_notifier_insert(&userptr->notifier,
1228 current->mm,
1229 xe_vma_userptr(vma), size,
1230 &vma_userptr_notifier_ops);
1231 if (err) {
1232 xe_vma_free(vma);
1233 return ERR_PTR(err);
1234 }
1235
1236 userptr->notifier_seq = LONG_MAX;
1237 }
1238
1239 xe_vm_get(vm);
1240 }
1241
1242 return vma;
1243 }
1244
xe_vma_destroy_late(struct xe_vma * vma)1245 static void xe_vma_destroy_late(struct xe_vma *vma)
1246 {
1247 struct xe_vm *vm = xe_vma_vm(vma);
1248
1249 if (vma->ufence) {
1250 xe_sync_ufence_put(vma->ufence);
1251 vma->ufence = NULL;
1252 }
1253
1254 if (xe_vma_is_userptr(vma)) {
1255 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1256 struct xe_userptr *userptr = &uvma->userptr;
1257
1258 if (userptr->sg)
1259 xe_hmm_userptr_free_sg(uvma);
1260
1261 /*
1262 * Since userptr pages are not pinned, we can't remove
1263 * the notifier until we're sure the GPU is not accessing
1264 * them anymore
1265 */
1266 mmu_interval_notifier_remove(&userptr->notifier);
1267 mutex_destroy(&userptr->unmap_mutex);
1268 xe_vm_put(vm);
1269 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) {
1270 xe_vm_put(vm);
1271 } else {
1272 xe_bo_put(xe_vma_bo(vma));
1273 }
1274
1275 xe_vma_free(vma);
1276 }
1277
vma_destroy_work_func(struct work_struct * w)1278 static void vma_destroy_work_func(struct work_struct *w)
1279 {
1280 struct xe_vma *vma =
1281 container_of(w, struct xe_vma, destroy_work);
1282
1283 xe_vma_destroy_late(vma);
1284 }
1285
vma_destroy_cb(struct dma_fence * fence,struct dma_fence_cb * cb)1286 static void vma_destroy_cb(struct dma_fence *fence,
1287 struct dma_fence_cb *cb)
1288 {
1289 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1290
1291 INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1292 queue_work(system_unbound_wq, &vma->destroy_work);
1293 }
1294
xe_vma_destroy(struct xe_vma * vma,struct dma_fence * fence)1295 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1296 {
1297 struct xe_vm *vm = xe_vma_vm(vma);
1298
1299 lockdep_assert_held_write(&vm->lock);
1300 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1301
1302 if (xe_vma_is_userptr(vma)) {
1303 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1304
1305 spin_lock(&vm->userptr.invalidated_lock);
1306 xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link));
1307 list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
1308 spin_unlock(&vm->userptr.invalidated_lock);
1309 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) {
1310 xe_bo_assert_held(xe_vma_bo(vma));
1311
1312 drm_gpuva_unlink(&vma->gpuva);
1313 }
1314
1315 xe_vm_assert_held(vm);
1316 if (fence) {
1317 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1318 vma_destroy_cb);
1319
1320 if (ret) {
1321 XE_WARN_ON(ret != -ENOENT);
1322 xe_vma_destroy_late(vma);
1323 }
1324 } else {
1325 xe_vma_destroy_late(vma);
1326 }
1327 }
1328
1329 /**
1330 * xe_vm_lock_vma() - drm_exec utility to lock a vma
1331 * @exec: The drm_exec object we're currently locking for.
1332 * @vma: The vma for witch we want to lock the vm resv and any attached
1333 * object's resv.
1334 *
1335 * Return: 0 on success, negative error code on error. In particular
1336 * may return -EDEADLK on WW transaction contention and -EINTR if
1337 * an interruptible wait is terminated by a signal.
1338 */
xe_vm_lock_vma(struct drm_exec * exec,struct xe_vma * vma)1339 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
1340 {
1341 struct xe_vm *vm = xe_vma_vm(vma);
1342 struct xe_bo *bo = xe_vma_bo(vma);
1343 int err;
1344
1345 XE_WARN_ON(!vm);
1346
1347 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1348 if (!err && bo && !bo->vm)
1349 err = drm_exec_lock_obj(exec, &bo->ttm.base);
1350
1351 return err;
1352 }
1353
xe_vma_destroy_unlocked(struct xe_vma * vma)1354 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1355 {
1356 struct drm_exec exec;
1357 int err;
1358
1359 drm_exec_init(&exec, 0, 0);
1360 drm_exec_until_all_locked(&exec) {
1361 err = xe_vm_lock_vma(&exec, vma);
1362 drm_exec_retry_on_contention(&exec);
1363 if (XE_WARN_ON(err))
1364 break;
1365 }
1366
1367 xe_vma_destroy(vma, NULL);
1368
1369 drm_exec_fini(&exec);
1370 }
1371
1372 struct xe_vma *
xe_vm_find_overlapping_vma(struct xe_vm * vm,u64 start,u64 range)1373 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1374 {
1375 struct drm_gpuva *gpuva;
1376
1377 lockdep_assert_held(&vm->lock);
1378
1379 if (xe_vm_is_closed_or_banned(vm))
1380 return NULL;
1381
1382 xe_assert(vm->xe, start + range <= vm->size);
1383
1384 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1385
1386 return gpuva ? gpuva_to_vma(gpuva) : NULL;
1387 }
1388
xe_vm_insert_vma(struct xe_vm * vm,struct xe_vma * vma)1389 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1390 {
1391 int err;
1392
1393 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1394 lockdep_assert_held(&vm->lock);
1395
1396 mutex_lock(&vm->snap_mutex);
1397 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1398 mutex_unlock(&vm->snap_mutex);
1399 XE_WARN_ON(err); /* Shouldn't be possible */
1400
1401 return err;
1402 }
1403
xe_vm_remove_vma(struct xe_vm * vm,struct xe_vma * vma)1404 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1405 {
1406 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1407 lockdep_assert_held(&vm->lock);
1408
1409 mutex_lock(&vm->snap_mutex);
1410 drm_gpuva_remove(&vma->gpuva);
1411 mutex_unlock(&vm->snap_mutex);
1412 if (vm->usm.last_fault_vma == vma)
1413 vm->usm.last_fault_vma = NULL;
1414 }
1415
xe_vm_op_alloc(void)1416 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1417 {
1418 struct xe_vma_op *op;
1419
1420 op = kzalloc(sizeof(*op), GFP_KERNEL);
1421
1422 if (unlikely(!op))
1423 return NULL;
1424
1425 return &op->base;
1426 }
1427
1428 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1429
1430 static const struct drm_gpuvm_ops gpuvm_ops = {
1431 .op_alloc = xe_vm_op_alloc,
1432 .vm_bo_validate = xe_gpuvm_validate,
1433 .vm_free = xe_vm_free,
1434 };
1435
pde_encode_pat_index(u16 pat_index)1436 static u64 pde_encode_pat_index(u16 pat_index)
1437 {
1438 u64 pte = 0;
1439
1440 if (pat_index & BIT(0))
1441 pte |= XE_PPGTT_PTE_PAT0;
1442
1443 if (pat_index & BIT(1))
1444 pte |= XE_PPGTT_PTE_PAT1;
1445
1446 return pte;
1447 }
1448
pte_encode_pat_index(u16 pat_index,u32 pt_level)1449 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
1450 {
1451 u64 pte = 0;
1452
1453 if (pat_index & BIT(0))
1454 pte |= XE_PPGTT_PTE_PAT0;
1455
1456 if (pat_index & BIT(1))
1457 pte |= XE_PPGTT_PTE_PAT1;
1458
1459 if (pat_index & BIT(2)) {
1460 if (pt_level)
1461 pte |= XE_PPGTT_PDE_PDPE_PAT2;
1462 else
1463 pte |= XE_PPGTT_PTE_PAT2;
1464 }
1465
1466 if (pat_index & BIT(3))
1467 pte |= XELPG_PPGTT_PTE_PAT3;
1468
1469 if (pat_index & (BIT(4)))
1470 pte |= XE2_PPGTT_PTE_PAT4;
1471
1472 return pte;
1473 }
1474
pte_encode_ps(u32 pt_level)1475 static u64 pte_encode_ps(u32 pt_level)
1476 {
1477 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1478
1479 if (pt_level == 1)
1480 return XE_PDE_PS_2M;
1481 else if (pt_level == 2)
1482 return XE_PDPE_PS_1G;
1483
1484 return 0;
1485 }
1486
xelp_pde_encode_bo(struct xe_bo * bo,u64 bo_offset,const u16 pat_index)1487 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
1488 const u16 pat_index)
1489 {
1490 u64 pde;
1491
1492 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1493 pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1494 pde |= pde_encode_pat_index(pat_index);
1495
1496 return pde;
1497 }
1498
xelp_pte_encode_bo(struct xe_bo * bo,u64 bo_offset,u16 pat_index,u32 pt_level)1499 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1500 u16 pat_index, u32 pt_level)
1501 {
1502 u64 pte;
1503
1504 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1505 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1506 pte |= pte_encode_pat_index(pat_index, pt_level);
1507 pte |= pte_encode_ps(pt_level);
1508
1509 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1510 pte |= XE_PPGTT_PTE_DM;
1511
1512 return pte;
1513 }
1514
xelp_pte_encode_vma(u64 pte,struct xe_vma * vma,u16 pat_index,u32 pt_level)1515 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1516 u16 pat_index, u32 pt_level)
1517 {
1518 pte |= XE_PAGE_PRESENT;
1519
1520 if (likely(!xe_vma_read_only(vma)))
1521 pte |= XE_PAGE_RW;
1522
1523 pte |= pte_encode_pat_index(pat_index, pt_level);
1524 pte |= pte_encode_ps(pt_level);
1525
1526 if (unlikely(xe_vma_is_null(vma)))
1527 pte |= XE_PTE_NULL;
1528
1529 return pte;
1530 }
1531
xelp_pte_encode_addr(struct xe_device * xe,u64 addr,u16 pat_index,u32 pt_level,bool devmem,u64 flags)1532 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1533 u16 pat_index,
1534 u32 pt_level, bool devmem, u64 flags)
1535 {
1536 u64 pte;
1537
1538 /* Avoid passing random bits directly as flags */
1539 xe_assert(xe, !(flags & ~XE_PTE_PS64));
1540
1541 pte = addr;
1542 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1543 pte |= pte_encode_pat_index(pat_index, pt_level);
1544 pte |= pte_encode_ps(pt_level);
1545
1546 if (devmem)
1547 pte |= XE_PPGTT_PTE_DM;
1548
1549 pte |= flags;
1550
1551 return pte;
1552 }
1553
1554 static const struct xe_pt_ops xelp_pt_ops = {
1555 .pte_encode_bo = xelp_pte_encode_bo,
1556 .pte_encode_vma = xelp_pte_encode_vma,
1557 .pte_encode_addr = xelp_pte_encode_addr,
1558 .pde_encode_bo = xelp_pde_encode_bo,
1559 };
1560
1561 static void vm_destroy_work_func(struct work_struct *w);
1562
1563 /**
1564 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1565 * given tile and vm.
1566 * @xe: xe device.
1567 * @tile: tile to set up for.
1568 * @vm: vm to set up for.
1569 *
1570 * Sets up a pagetable tree with one page-table per level and a single
1571 * leaf PTE. All pagetable entries point to the single page-table or,
1572 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1573 * writes become NOPs.
1574 *
1575 * Return: 0 on success, negative error code on error.
1576 */
xe_vm_create_scratch(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm)1577 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1578 struct xe_vm *vm)
1579 {
1580 u8 id = tile->id;
1581 int i;
1582
1583 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1584 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
1585 if (IS_ERR(vm->scratch_pt[id][i]))
1586 return PTR_ERR(vm->scratch_pt[id][i]);
1587
1588 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1589 }
1590
1591 return 0;
1592 }
1593 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO);
1594
xe_vm_free_scratch(struct xe_vm * vm)1595 static void xe_vm_free_scratch(struct xe_vm *vm)
1596 {
1597 struct xe_tile *tile;
1598 u8 id;
1599
1600 if (!xe_vm_has_scratch(vm))
1601 return;
1602
1603 for_each_tile(tile, vm->xe, id) {
1604 u32 i;
1605
1606 if (!vm->pt_root[id])
1607 continue;
1608
1609 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1610 if (vm->scratch_pt[id][i])
1611 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1612 }
1613 }
1614
xe_vm_create(struct xe_device * xe,u32 flags)1615 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
1616 {
1617 struct drm_gem_object *vm_resv_obj;
1618 struct xe_vm *vm;
1619 int err, number_tiles = 0;
1620 struct xe_tile *tile;
1621 u8 id;
1622
1623 /*
1624 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to
1625 * ever be in faulting mode.
1626 */
1627 xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE)));
1628
1629 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1630 if (!vm)
1631 return ERR_PTR(-ENOMEM);
1632
1633 vm->xe = xe;
1634
1635 vm->size = 1ull << xe->info.va_bits;
1636
1637 vm->flags = flags;
1638
1639 /**
1640 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be
1641 * manipulated under the PXP mutex. However, the PXP mutex can be taken
1642 * under a user-VM lock when the PXP session is started at exec_queue
1643 * creation time. Those are different VMs and therefore there is no risk
1644 * of deadlock, but we need to tell lockdep that this is the case or it
1645 * will print a warning.
1646 */
1647 if (flags & XE_VM_FLAG_GSC) {
1648 static struct lock_class_key gsc_vm_key;
1649
1650 __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key);
1651 } else {
1652 init_rwsem(&vm->lock);
1653 }
1654 mutex_init(&vm->snap_mutex);
1655
1656 INIT_LIST_HEAD(&vm->rebind_list);
1657
1658 INIT_LIST_HEAD(&vm->userptr.repin_list);
1659 INIT_LIST_HEAD(&vm->userptr.invalidated);
1660 init_rwsem(&vm->userptr.notifier_lock);
1661 spin_lock_init(&vm->userptr.invalidated_lock);
1662
1663 ttm_lru_bulk_move_init(&vm->lru_bulk_move);
1664
1665 INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1666
1667 INIT_LIST_HEAD(&vm->preempt.exec_queues);
1668 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */
1669
1670 for_each_tile(tile, xe, id)
1671 xe_range_fence_tree_init(&vm->rftree[id]);
1672
1673 vm->pt_ops = &xelp_pt_ops;
1674
1675 /*
1676 * Long-running workloads are not protected by the scheduler references.
1677 * By design, run_job for long-running workloads returns NULL and the
1678 * scheduler drops all the references of it, hence protecting the VM
1679 * for this case is necessary.
1680 */
1681 if (flags & XE_VM_FLAG_LR_MODE) {
1682 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1683 xe_pm_runtime_get_noresume(xe);
1684 }
1685
1686 if (flags & XE_VM_FLAG_FAULT_MODE) {
1687 err = xe_svm_init(vm);
1688 if (err)
1689 goto err_no_resv;
1690 }
1691
1692 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1693 if (!vm_resv_obj) {
1694 err = -ENOMEM;
1695 goto err_svm_fini;
1696 }
1697
1698 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1699 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1700
1701 drm_gem_object_put(vm_resv_obj);
1702
1703 err = xe_vm_lock(vm, true);
1704 if (err)
1705 goto err_close;
1706
1707 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1708 vm->flags |= XE_VM_FLAG_64K;
1709
1710 for_each_tile(tile, xe, id) {
1711 if (flags & XE_VM_FLAG_MIGRATION &&
1712 tile->id != XE_VM_FLAG_TILE_ID(flags))
1713 continue;
1714
1715 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
1716 if (IS_ERR(vm->pt_root[id])) {
1717 err = PTR_ERR(vm->pt_root[id]);
1718 vm->pt_root[id] = NULL;
1719 goto err_unlock_close;
1720 }
1721 }
1722
1723 if (xe_vm_has_scratch(vm)) {
1724 for_each_tile(tile, xe, id) {
1725 if (!vm->pt_root[id])
1726 continue;
1727
1728 err = xe_vm_create_scratch(xe, tile, vm);
1729 if (err)
1730 goto err_unlock_close;
1731 }
1732 vm->batch_invalidate_tlb = true;
1733 }
1734
1735 if (vm->flags & XE_VM_FLAG_LR_MODE)
1736 vm->batch_invalidate_tlb = false;
1737
1738 /* Fill pt_root after allocating scratch tables */
1739 for_each_tile(tile, xe, id) {
1740 if (!vm->pt_root[id])
1741 continue;
1742
1743 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1744 }
1745 xe_vm_unlock(vm);
1746
1747 /* Kernel migration VM shouldn't have a circular loop.. */
1748 if (!(flags & XE_VM_FLAG_MIGRATION)) {
1749 for_each_tile(tile, xe, id) {
1750 struct xe_exec_queue *q;
1751 u32 create_flags = EXEC_QUEUE_FLAG_VM;
1752
1753 if (!vm->pt_root[id])
1754 continue;
1755
1756 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0);
1757 if (IS_ERR(q)) {
1758 err = PTR_ERR(q);
1759 goto err_close;
1760 }
1761 vm->q[id] = q;
1762 number_tiles++;
1763 }
1764 }
1765
1766 if (number_tiles > 1)
1767 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1768
1769 trace_xe_vm_create(vm);
1770
1771 return vm;
1772
1773 err_unlock_close:
1774 xe_vm_unlock(vm);
1775 err_close:
1776 xe_vm_close_and_put(vm);
1777 return ERR_PTR(err);
1778
1779 err_svm_fini:
1780 if (flags & XE_VM_FLAG_FAULT_MODE) {
1781 vm->size = 0; /* close the vm */
1782 xe_svm_fini(vm);
1783 }
1784 err_no_resv:
1785 mutex_destroy(&vm->snap_mutex);
1786 for_each_tile(tile, xe, id)
1787 xe_range_fence_tree_fini(&vm->rftree[id]);
1788 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1789 kfree(vm);
1790 if (flags & XE_VM_FLAG_LR_MODE)
1791 xe_pm_runtime_put(xe);
1792 return ERR_PTR(err);
1793 }
1794
xe_vm_close(struct xe_vm * vm)1795 static void xe_vm_close(struct xe_vm *vm)
1796 {
1797 struct xe_device *xe = vm->xe;
1798 bool bound;
1799 int idx;
1800
1801 bound = drm_dev_enter(&xe->drm, &idx);
1802
1803 down_write(&vm->lock);
1804 if (xe_vm_in_fault_mode(vm))
1805 xe_svm_notifier_lock(vm);
1806
1807 vm->size = 0;
1808
1809 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) {
1810 struct xe_tile *tile;
1811 struct xe_gt *gt;
1812 u8 id;
1813
1814 /* Wait for pending binds */
1815 dma_resv_wait_timeout(xe_vm_resv(vm),
1816 DMA_RESV_USAGE_BOOKKEEP,
1817 false, MAX_SCHEDULE_TIMEOUT);
1818
1819 if (bound) {
1820 for_each_tile(tile, xe, id)
1821 if (vm->pt_root[id])
1822 xe_pt_clear(xe, vm->pt_root[id]);
1823
1824 for_each_gt(gt, xe, id)
1825 xe_gt_tlb_invalidation_vm(gt, vm);
1826 }
1827 }
1828
1829 if (xe_vm_in_fault_mode(vm))
1830 xe_svm_notifier_unlock(vm);
1831 up_write(&vm->lock);
1832
1833 if (bound)
1834 drm_dev_exit(idx);
1835 }
1836
xe_vm_close_and_put(struct xe_vm * vm)1837 void xe_vm_close_and_put(struct xe_vm *vm)
1838 {
1839 LIST_HEAD(contested);
1840 struct xe_device *xe = vm->xe;
1841 struct xe_tile *tile;
1842 struct xe_vma *vma, *next_vma;
1843 struct drm_gpuva *gpuva, *next;
1844 u8 id;
1845
1846 xe_assert(xe, !vm->preempt.num_exec_queues);
1847
1848 xe_vm_close(vm);
1849 if (xe_vm_in_preempt_fence_mode(vm))
1850 flush_work(&vm->preempt.rebind_work);
1851 if (xe_vm_in_fault_mode(vm))
1852 xe_svm_close(vm);
1853
1854 down_write(&vm->lock);
1855 for_each_tile(tile, xe, id) {
1856 if (vm->q[id])
1857 xe_exec_queue_last_fence_put(vm->q[id], vm);
1858 }
1859 up_write(&vm->lock);
1860
1861 for_each_tile(tile, xe, id) {
1862 if (vm->q[id]) {
1863 xe_exec_queue_kill(vm->q[id]);
1864 xe_exec_queue_put(vm->q[id]);
1865 vm->q[id] = NULL;
1866 }
1867 }
1868
1869 down_write(&vm->lock);
1870 xe_vm_lock(vm, false);
1871 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1872 vma = gpuva_to_vma(gpuva);
1873
1874 if (xe_vma_has_no_bo(vma)) {
1875 down_read(&vm->userptr.notifier_lock);
1876 vma->gpuva.flags |= XE_VMA_DESTROYED;
1877 up_read(&vm->userptr.notifier_lock);
1878 }
1879
1880 xe_vm_remove_vma(vm, vma);
1881
1882 /* easy case, remove from VMA? */
1883 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1884 list_del_init(&vma->combined_links.rebind);
1885 xe_vma_destroy(vma, NULL);
1886 continue;
1887 }
1888
1889 list_move_tail(&vma->combined_links.destroy, &contested);
1890 vma->gpuva.flags |= XE_VMA_DESTROYED;
1891 }
1892
1893 /*
1894 * All vm operations will add shared fences to resv.
1895 * The only exception is eviction for a shared object,
1896 * but even so, the unbind when evicted would still
1897 * install a fence to resv. Hence it's safe to
1898 * destroy the pagetables immediately.
1899 */
1900 xe_vm_free_scratch(vm);
1901
1902 for_each_tile(tile, xe, id) {
1903 if (vm->pt_root[id]) {
1904 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1905 vm->pt_root[id] = NULL;
1906 }
1907 }
1908 xe_vm_unlock(vm);
1909
1910 /*
1911 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1912 * Since we hold a refcount to the bo, we can remove and free
1913 * the members safely without locking.
1914 */
1915 list_for_each_entry_safe(vma, next_vma, &contested,
1916 combined_links.destroy) {
1917 list_del_init(&vma->combined_links.destroy);
1918 xe_vma_destroy_unlocked(vma);
1919 }
1920
1921 if (xe_vm_in_fault_mode(vm))
1922 xe_svm_fini(vm);
1923
1924 up_write(&vm->lock);
1925
1926 down_write(&xe->usm.lock);
1927 if (vm->usm.asid) {
1928 void *lookup;
1929
1930 xe_assert(xe, xe->info.has_asid);
1931 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
1932
1933 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1934 xe_assert(xe, lookup == vm);
1935 }
1936 up_write(&xe->usm.lock);
1937
1938 for_each_tile(tile, xe, id)
1939 xe_range_fence_tree_fini(&vm->rftree[id]);
1940
1941 xe_vm_put(vm);
1942 }
1943
vm_destroy_work_func(struct work_struct * w)1944 static void vm_destroy_work_func(struct work_struct *w)
1945 {
1946 struct xe_vm *vm =
1947 container_of(w, struct xe_vm, destroy_work);
1948 struct xe_device *xe = vm->xe;
1949 struct xe_tile *tile;
1950 u8 id;
1951
1952 /* xe_vm_close_and_put was not called? */
1953 xe_assert(xe, !vm->size);
1954
1955 if (xe_vm_in_preempt_fence_mode(vm))
1956 flush_work(&vm->preempt.rebind_work);
1957
1958 mutex_destroy(&vm->snap_mutex);
1959
1960 if (vm->flags & XE_VM_FLAG_LR_MODE)
1961 xe_pm_runtime_put(xe);
1962
1963 for_each_tile(tile, xe, id)
1964 XE_WARN_ON(vm->pt_root[id]);
1965
1966 trace_xe_vm_free(vm);
1967
1968 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1969
1970 if (vm->xef)
1971 xe_file_put(vm->xef);
1972
1973 kfree(vm);
1974 }
1975
xe_vm_free(struct drm_gpuvm * gpuvm)1976 static void xe_vm_free(struct drm_gpuvm *gpuvm)
1977 {
1978 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1979
1980 /* To destroy the VM we need to be able to sleep */
1981 queue_work(system_unbound_wq, &vm->destroy_work);
1982 }
1983
xe_vm_lookup(struct xe_file * xef,u32 id)1984 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1985 {
1986 struct xe_vm *vm;
1987
1988 mutex_lock(&xef->vm.lock);
1989 vm = xa_load(&xef->vm.xa, id);
1990 if (vm)
1991 xe_vm_get(vm);
1992 mutex_unlock(&xef->vm.lock);
1993
1994 return vm;
1995 }
1996
xe_vm_pdp4_descriptor(struct xe_vm * vm,struct xe_tile * tile)1997 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1998 {
1999 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
2000 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
2001 }
2002
2003 static struct xe_exec_queue *
to_wait_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)2004 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
2005 {
2006 return q ? q : vm->q[0];
2007 }
2008
2009 static struct xe_user_fence *
find_ufence_get(struct xe_sync_entry * syncs,u32 num_syncs)2010 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
2011 {
2012 unsigned int i;
2013
2014 for (i = 0; i < num_syncs; i++) {
2015 struct xe_sync_entry *e = &syncs[i];
2016
2017 if (xe_sync_is_ufence(e))
2018 return xe_sync_ufence_get(e);
2019 }
2020
2021 return NULL;
2022 }
2023
2024 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
2025 DRM_XE_VM_CREATE_FLAG_LR_MODE | \
2026 DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
2027
xe_vm_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2028 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
2029 struct drm_file *file)
2030 {
2031 struct xe_device *xe = to_xe_device(dev);
2032 struct xe_file *xef = to_xe_file(file);
2033 struct drm_xe_vm_create *args = data;
2034 struct xe_tile *tile;
2035 struct xe_vm *vm;
2036 u32 id, asid;
2037 int err;
2038 u32 flags = 0;
2039
2040 if (XE_IOCTL_DBG(xe, args->extensions))
2041 return -EINVAL;
2042
2043 if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
2044 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
2045
2046 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
2047 !xe->info.has_usm))
2048 return -EINVAL;
2049
2050 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2051 return -EINVAL;
2052
2053 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
2054 return -EINVAL;
2055
2056 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
2057 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
2058 !xe->info.needs_scratch))
2059 return -EINVAL;
2060
2061 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
2062 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
2063 return -EINVAL;
2064
2065 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
2066 flags |= XE_VM_FLAG_SCRATCH_PAGE;
2067 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
2068 flags |= XE_VM_FLAG_LR_MODE;
2069 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
2070 flags |= XE_VM_FLAG_FAULT_MODE;
2071
2072 vm = xe_vm_create(xe, flags);
2073 if (IS_ERR(vm))
2074 return PTR_ERR(vm);
2075
2076 if (xe->info.has_asid) {
2077 down_write(&xe->usm.lock);
2078 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
2079 XA_LIMIT(1, XE_MAX_ASID - 1),
2080 &xe->usm.next_asid, GFP_KERNEL);
2081 up_write(&xe->usm.lock);
2082 if (err < 0)
2083 goto err_close_and_put;
2084
2085 vm->usm.asid = asid;
2086 }
2087
2088 vm->xef = xe_file_get(xef);
2089
2090 /* Record BO memory for VM pagetable created against client */
2091 for_each_tile(tile, xe, id)
2092 if (vm->pt_root[id])
2093 xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo);
2094
2095 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
2096 /* Warning: Security issue - never enable by default */
2097 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
2098 #endif
2099
2100 /* user id alloc must always be last in ioctl to prevent UAF */
2101 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
2102 if (err)
2103 goto err_close_and_put;
2104
2105 args->vm_id = id;
2106
2107 return 0;
2108
2109 err_close_and_put:
2110 xe_vm_close_and_put(vm);
2111
2112 return err;
2113 }
2114
xe_vm_destroy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2115 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
2116 struct drm_file *file)
2117 {
2118 struct xe_device *xe = to_xe_device(dev);
2119 struct xe_file *xef = to_xe_file(file);
2120 struct drm_xe_vm_destroy *args = data;
2121 struct xe_vm *vm;
2122 int err = 0;
2123
2124 if (XE_IOCTL_DBG(xe, args->pad) ||
2125 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2126 return -EINVAL;
2127
2128 mutex_lock(&xef->vm.lock);
2129 vm = xa_load(&xef->vm.xa, args->vm_id);
2130 if (XE_IOCTL_DBG(xe, !vm))
2131 err = -ENOENT;
2132 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
2133 err = -EBUSY;
2134 else
2135 xa_erase(&xef->vm.xa, args->vm_id);
2136 mutex_unlock(&xef->vm.lock);
2137
2138 if (!err)
2139 xe_vm_close_and_put(vm);
2140
2141 return err;
2142 }
2143
2144 static const u32 region_to_mem_type[] = {
2145 XE_PL_TT,
2146 XE_PL_VRAM0,
2147 XE_PL_VRAM1,
2148 };
2149
prep_vma_destroy(struct xe_vm * vm,struct xe_vma * vma,bool post_commit)2150 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
2151 bool post_commit)
2152 {
2153 down_read(&vm->userptr.notifier_lock);
2154 vma->gpuva.flags |= XE_VMA_DESTROYED;
2155 up_read(&vm->userptr.notifier_lock);
2156 if (post_commit)
2157 xe_vm_remove_vma(vm, vma);
2158 }
2159
2160 #undef ULL
2161 #define ULL unsigned long long
2162
2163 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2164 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2165 {
2166 struct xe_vma *vma;
2167
2168 switch (op->op) {
2169 case DRM_GPUVA_OP_MAP:
2170 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2171 (ULL)op->map.va.addr, (ULL)op->map.va.range);
2172 break;
2173 case DRM_GPUVA_OP_REMAP:
2174 vma = gpuva_to_vma(op->remap.unmap->va);
2175 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2176 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2177 op->remap.unmap->keep ? 1 : 0);
2178 if (op->remap.prev)
2179 vm_dbg(&xe->drm,
2180 "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2181 (ULL)op->remap.prev->va.addr,
2182 (ULL)op->remap.prev->va.range);
2183 if (op->remap.next)
2184 vm_dbg(&xe->drm,
2185 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2186 (ULL)op->remap.next->va.addr,
2187 (ULL)op->remap.next->va.range);
2188 break;
2189 case DRM_GPUVA_OP_UNMAP:
2190 vma = gpuva_to_vma(op->unmap.va);
2191 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2192 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2193 op->unmap.keep ? 1 : 0);
2194 break;
2195 case DRM_GPUVA_OP_PREFETCH:
2196 vma = gpuva_to_vma(op->prefetch.va);
2197 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2198 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2199 break;
2200 default:
2201 drm_warn(&xe->drm, "NOT POSSIBLE");
2202 }
2203 }
2204 #else
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2205 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2206 {
2207 }
2208 #endif
2209
__xe_vm_needs_clear_scratch_pages(struct xe_vm * vm,u32 bind_flags)2210 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
2211 {
2212 if (!xe_vm_in_fault_mode(vm))
2213 return false;
2214
2215 if (!xe_vm_has_scratch(vm))
2216 return false;
2217
2218 if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE)
2219 return false;
2220
2221 return true;
2222 }
2223
2224 /*
2225 * Create operations list from IOCTL arguments, setup operations fields so parse
2226 * and commit steps are decoupled from IOCTL arguments. This step can fail.
2227 */
2228 static struct drm_gpuva_ops *
vm_bind_ioctl_ops_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 addr,u64 range,u32 operation,u32 flags,u32 prefetch_region,u16 pat_index)2229 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
2230 u64 bo_offset_or_userptr, u64 addr, u64 range,
2231 u32 operation, u32 flags,
2232 u32 prefetch_region, u16 pat_index)
2233 {
2234 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2235 struct drm_gpuva_ops *ops;
2236 struct drm_gpuva_op *__op;
2237 struct drm_gpuvm_bo *vm_bo;
2238 int err;
2239
2240 lockdep_assert_held_write(&vm->lock);
2241
2242 vm_dbg(&vm->xe->drm,
2243 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2244 operation, (ULL)addr, (ULL)range,
2245 (ULL)bo_offset_or_userptr);
2246
2247 switch (operation) {
2248 case DRM_XE_VM_BIND_OP_MAP:
2249 case DRM_XE_VM_BIND_OP_MAP_USERPTR:
2250 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
2251 obj, bo_offset_or_userptr);
2252 break;
2253 case DRM_XE_VM_BIND_OP_UNMAP:
2254 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2255 break;
2256 case DRM_XE_VM_BIND_OP_PREFETCH:
2257 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2258 break;
2259 case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2260 xe_assert(vm->xe, bo);
2261
2262 err = xe_bo_lock(bo, true);
2263 if (err)
2264 return ERR_PTR(err);
2265
2266 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
2267 if (IS_ERR(vm_bo)) {
2268 xe_bo_unlock(bo);
2269 return ERR_CAST(vm_bo);
2270 }
2271
2272 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2273 drm_gpuvm_bo_put(vm_bo);
2274 xe_bo_unlock(bo);
2275 break;
2276 default:
2277 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2278 ops = ERR_PTR(-EINVAL);
2279 }
2280 if (IS_ERR(ops))
2281 return ops;
2282
2283 drm_gpuva_for_each_op(__op, ops) {
2284 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2285
2286 if (__op->op == DRM_GPUVA_OP_MAP) {
2287 op->map.immediate =
2288 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2289 op->map.read_only =
2290 flags & DRM_XE_VM_BIND_FLAG_READONLY;
2291 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2292 op->map.is_cpu_addr_mirror = flags &
2293 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
2294 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
2295 op->map.pat_index = pat_index;
2296 op->map.invalidate_on_bind =
2297 __xe_vm_needs_clear_scratch_pages(vm, flags);
2298 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2299 op->prefetch.region = prefetch_region;
2300 }
2301
2302 print_op(vm->xe, __op);
2303 }
2304
2305 return ops;
2306 }
2307 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
2308
new_vma(struct xe_vm * vm,struct drm_gpuva_op_map * op,u16 pat_index,unsigned int flags)2309 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2310 u16 pat_index, unsigned int flags)
2311 {
2312 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2313 struct drm_exec exec;
2314 struct xe_vma *vma;
2315 int err = 0;
2316
2317 lockdep_assert_held_write(&vm->lock);
2318
2319 if (bo) {
2320 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
2321 drm_exec_until_all_locked(&exec) {
2322 err = 0;
2323 if (!bo->vm) {
2324 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2325 drm_exec_retry_on_contention(&exec);
2326 }
2327 if (!err) {
2328 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2329 drm_exec_retry_on_contention(&exec);
2330 }
2331 if (err) {
2332 drm_exec_fini(&exec);
2333 return ERR_PTR(err);
2334 }
2335 }
2336 }
2337 vma = xe_vma_create(vm, bo, op->gem.offset,
2338 op->va.addr, op->va.addr +
2339 op->va.range - 1, pat_index, flags);
2340 if (IS_ERR(vma))
2341 goto err_unlock;
2342
2343 if (xe_vma_is_userptr(vma))
2344 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2345 else if (!xe_vma_has_no_bo(vma) && !bo->vm)
2346 err = add_preempt_fences(vm, bo);
2347
2348 err_unlock:
2349 if (bo)
2350 drm_exec_fini(&exec);
2351
2352 if (err) {
2353 prep_vma_destroy(vm, vma, false);
2354 xe_vma_destroy_unlocked(vma);
2355 vma = ERR_PTR(err);
2356 }
2357
2358 return vma;
2359 }
2360
xe_vma_max_pte_size(struct xe_vma * vma)2361 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2362 {
2363 if (vma->gpuva.flags & XE_VMA_PTE_1G)
2364 return SZ_1G;
2365 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2366 return SZ_2M;
2367 else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2368 return SZ_64K;
2369 else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2370 return SZ_4K;
2371
2372 return SZ_1G; /* Uninitialized, used max size */
2373 }
2374
xe_vma_set_pte_size(struct xe_vma * vma,u64 size)2375 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2376 {
2377 switch (size) {
2378 case SZ_1G:
2379 vma->gpuva.flags |= XE_VMA_PTE_1G;
2380 break;
2381 case SZ_2M:
2382 vma->gpuva.flags |= XE_VMA_PTE_2M;
2383 break;
2384 case SZ_64K:
2385 vma->gpuva.flags |= XE_VMA_PTE_64K;
2386 break;
2387 case SZ_4K:
2388 vma->gpuva.flags |= XE_VMA_PTE_4K;
2389 break;
2390 }
2391 }
2392
xe_vma_op_commit(struct xe_vm * vm,struct xe_vma_op * op)2393 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2394 {
2395 int err = 0;
2396
2397 lockdep_assert_held_write(&vm->lock);
2398
2399 switch (op->base.op) {
2400 case DRM_GPUVA_OP_MAP:
2401 err |= xe_vm_insert_vma(vm, op->map.vma);
2402 if (!err)
2403 op->flags |= XE_VMA_OP_COMMITTED;
2404 break;
2405 case DRM_GPUVA_OP_REMAP:
2406 {
2407 u8 tile_present =
2408 gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2409
2410 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2411 true);
2412 op->flags |= XE_VMA_OP_COMMITTED;
2413
2414 if (op->remap.prev) {
2415 err |= xe_vm_insert_vma(vm, op->remap.prev);
2416 if (!err)
2417 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2418 if (!err && op->remap.skip_prev) {
2419 op->remap.prev->tile_present =
2420 tile_present;
2421 op->remap.prev = NULL;
2422 }
2423 }
2424 if (op->remap.next) {
2425 err |= xe_vm_insert_vma(vm, op->remap.next);
2426 if (!err)
2427 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2428 if (!err && op->remap.skip_next) {
2429 op->remap.next->tile_present =
2430 tile_present;
2431 op->remap.next = NULL;
2432 }
2433 }
2434
2435 /* Adjust for partial unbind after removing VMA from VM */
2436 if (!err) {
2437 op->base.remap.unmap->va->va.addr = op->remap.start;
2438 op->base.remap.unmap->va->va.range = op->remap.range;
2439 }
2440 break;
2441 }
2442 case DRM_GPUVA_OP_UNMAP:
2443 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2444 op->flags |= XE_VMA_OP_COMMITTED;
2445 break;
2446 case DRM_GPUVA_OP_PREFETCH:
2447 op->flags |= XE_VMA_OP_COMMITTED;
2448 break;
2449 default:
2450 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2451 }
2452
2453 return err;
2454 }
2455
vm_bind_ioctl_ops_parse(struct xe_vm * vm,struct drm_gpuva_ops * ops,struct xe_vma_ops * vops)2456 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
2457 struct xe_vma_ops *vops)
2458 {
2459 struct xe_device *xe = vm->xe;
2460 struct drm_gpuva_op *__op;
2461 struct xe_tile *tile;
2462 u8 id, tile_mask = 0;
2463 int err = 0;
2464
2465 lockdep_assert_held_write(&vm->lock);
2466
2467 for_each_tile(tile, vm->xe, id)
2468 tile_mask |= 0x1 << id;
2469
2470 drm_gpuva_for_each_op(__op, ops) {
2471 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2472 struct xe_vma *vma;
2473 unsigned int flags = 0;
2474
2475 INIT_LIST_HEAD(&op->link);
2476 list_add_tail(&op->link, &vops->list);
2477 op->tile_mask = tile_mask;
2478
2479 switch (op->base.op) {
2480 case DRM_GPUVA_OP_MAP:
2481 {
2482 flags |= op->map.read_only ?
2483 VMA_CREATE_FLAG_READ_ONLY : 0;
2484 flags |= op->map.is_null ?
2485 VMA_CREATE_FLAG_IS_NULL : 0;
2486 flags |= op->map.dumpable ?
2487 VMA_CREATE_FLAG_DUMPABLE : 0;
2488 flags |= op->map.is_cpu_addr_mirror ?
2489 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0;
2490
2491 vma = new_vma(vm, &op->base.map, op->map.pat_index,
2492 flags);
2493 if (IS_ERR(vma))
2494 return PTR_ERR(vma);
2495
2496 op->map.vma = vma;
2497 if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) &&
2498 !op->map.is_cpu_addr_mirror) ||
2499 op->map.invalidate_on_bind)
2500 xe_vma_ops_incr_pt_update_ops(vops,
2501 op->tile_mask);
2502 break;
2503 }
2504 case DRM_GPUVA_OP_REMAP:
2505 {
2506 struct xe_vma *old =
2507 gpuva_to_vma(op->base.remap.unmap->va);
2508 bool skip = xe_vma_is_cpu_addr_mirror(old);
2509 u64 start = xe_vma_start(old), end = xe_vma_end(old);
2510
2511 if (op->base.remap.prev)
2512 start = op->base.remap.prev->va.addr +
2513 op->base.remap.prev->va.range;
2514 if (op->base.remap.next)
2515 end = op->base.remap.next->va.addr;
2516
2517 if (xe_vma_is_cpu_addr_mirror(old) &&
2518 xe_svm_has_mapping(vm, start, end))
2519 return -EBUSY;
2520
2521 op->remap.start = xe_vma_start(old);
2522 op->remap.range = xe_vma_size(old);
2523
2524 flags |= op->base.remap.unmap->va->flags &
2525 XE_VMA_READ_ONLY ?
2526 VMA_CREATE_FLAG_READ_ONLY : 0;
2527 flags |= op->base.remap.unmap->va->flags &
2528 DRM_GPUVA_SPARSE ?
2529 VMA_CREATE_FLAG_IS_NULL : 0;
2530 flags |= op->base.remap.unmap->va->flags &
2531 XE_VMA_DUMPABLE ?
2532 VMA_CREATE_FLAG_DUMPABLE : 0;
2533 flags |= xe_vma_is_cpu_addr_mirror(old) ?
2534 VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0;
2535
2536 if (op->base.remap.prev) {
2537 vma = new_vma(vm, op->base.remap.prev,
2538 old->pat_index, flags);
2539 if (IS_ERR(vma))
2540 return PTR_ERR(vma);
2541
2542 op->remap.prev = vma;
2543
2544 /*
2545 * Userptr creates a new SG mapping so
2546 * we must also rebind.
2547 */
2548 op->remap.skip_prev = skip ||
2549 (!xe_vma_is_userptr(old) &&
2550 IS_ALIGNED(xe_vma_end(vma),
2551 xe_vma_max_pte_size(old)));
2552 if (op->remap.skip_prev) {
2553 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2554 op->remap.range -=
2555 xe_vma_end(vma) -
2556 xe_vma_start(old);
2557 op->remap.start = xe_vma_end(vma);
2558 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2559 (ULL)op->remap.start,
2560 (ULL)op->remap.range);
2561 } else {
2562 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2563 }
2564 }
2565
2566 if (op->base.remap.next) {
2567 vma = new_vma(vm, op->base.remap.next,
2568 old->pat_index, flags);
2569 if (IS_ERR(vma))
2570 return PTR_ERR(vma);
2571
2572 op->remap.next = vma;
2573
2574 /*
2575 * Userptr creates a new SG mapping so
2576 * we must also rebind.
2577 */
2578 op->remap.skip_next = skip ||
2579 (!xe_vma_is_userptr(old) &&
2580 IS_ALIGNED(xe_vma_start(vma),
2581 xe_vma_max_pte_size(old)));
2582 if (op->remap.skip_next) {
2583 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2584 op->remap.range -=
2585 xe_vma_end(old) -
2586 xe_vma_start(vma);
2587 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2588 (ULL)op->remap.start,
2589 (ULL)op->remap.range);
2590 } else {
2591 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2592 }
2593 }
2594 if (!skip)
2595 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2596 break;
2597 }
2598 case DRM_GPUVA_OP_UNMAP:
2599 vma = gpuva_to_vma(op->base.unmap.va);
2600
2601 if (xe_vma_is_cpu_addr_mirror(vma) &&
2602 xe_svm_has_mapping(vm, xe_vma_start(vma),
2603 xe_vma_end(vma)))
2604 return -EBUSY;
2605
2606 if (!xe_vma_is_cpu_addr_mirror(vma))
2607 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2608 break;
2609 case DRM_GPUVA_OP_PREFETCH:
2610 vma = gpuva_to_vma(op->base.prefetch.va);
2611
2612 if (xe_vma_is_userptr(vma)) {
2613 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2614 if (err)
2615 return err;
2616 }
2617
2618 if (!xe_vma_is_cpu_addr_mirror(vma))
2619 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2620 break;
2621 default:
2622 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2623 }
2624
2625 err = xe_vma_op_commit(vm, op);
2626 if (err)
2627 return err;
2628 }
2629
2630 return 0;
2631 }
2632
xe_vma_op_unwind(struct xe_vm * vm,struct xe_vma_op * op,bool post_commit,bool prev_post_commit,bool next_post_commit)2633 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2634 bool post_commit, bool prev_post_commit,
2635 bool next_post_commit)
2636 {
2637 lockdep_assert_held_write(&vm->lock);
2638
2639 switch (op->base.op) {
2640 case DRM_GPUVA_OP_MAP:
2641 if (op->map.vma) {
2642 prep_vma_destroy(vm, op->map.vma, post_commit);
2643 xe_vma_destroy_unlocked(op->map.vma);
2644 }
2645 break;
2646 case DRM_GPUVA_OP_UNMAP:
2647 {
2648 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2649
2650 if (vma) {
2651 down_read(&vm->userptr.notifier_lock);
2652 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2653 up_read(&vm->userptr.notifier_lock);
2654 if (post_commit)
2655 xe_vm_insert_vma(vm, vma);
2656 }
2657 break;
2658 }
2659 case DRM_GPUVA_OP_REMAP:
2660 {
2661 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2662
2663 if (op->remap.prev) {
2664 prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2665 xe_vma_destroy_unlocked(op->remap.prev);
2666 }
2667 if (op->remap.next) {
2668 prep_vma_destroy(vm, op->remap.next, next_post_commit);
2669 xe_vma_destroy_unlocked(op->remap.next);
2670 }
2671 if (vma) {
2672 down_read(&vm->userptr.notifier_lock);
2673 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2674 up_read(&vm->userptr.notifier_lock);
2675 if (post_commit)
2676 xe_vm_insert_vma(vm, vma);
2677 }
2678 break;
2679 }
2680 case DRM_GPUVA_OP_PREFETCH:
2681 /* Nothing to do */
2682 break;
2683 default:
2684 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2685 }
2686 }
2687
vm_bind_ioctl_ops_unwind(struct xe_vm * vm,struct drm_gpuva_ops ** ops,int num_ops_list)2688 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2689 struct drm_gpuva_ops **ops,
2690 int num_ops_list)
2691 {
2692 int i;
2693
2694 for (i = num_ops_list - 1; i >= 0; --i) {
2695 struct drm_gpuva_ops *__ops = ops[i];
2696 struct drm_gpuva_op *__op;
2697
2698 if (!__ops)
2699 continue;
2700
2701 drm_gpuva_for_each_op_reverse(__op, __ops) {
2702 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2703
2704 xe_vma_op_unwind(vm, op,
2705 op->flags & XE_VMA_OP_COMMITTED,
2706 op->flags & XE_VMA_OP_PREV_COMMITTED,
2707 op->flags & XE_VMA_OP_NEXT_COMMITTED);
2708 }
2709 }
2710 }
2711
vma_lock_and_validate(struct drm_exec * exec,struct xe_vma * vma,bool validate)2712 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
2713 bool validate)
2714 {
2715 struct xe_bo *bo = xe_vma_bo(vma);
2716 struct xe_vm *vm = xe_vma_vm(vma);
2717 int err = 0;
2718
2719 if (bo) {
2720 if (!bo->vm)
2721 err = drm_exec_lock_obj(exec, &bo->ttm.base);
2722 if (!err && validate)
2723 err = xe_bo_validate(bo, vm,
2724 !xe_vm_in_preempt_fence_mode(vm));
2725 }
2726
2727 return err;
2728 }
2729
check_ufence(struct xe_vma * vma)2730 static int check_ufence(struct xe_vma *vma)
2731 {
2732 if (vma->ufence) {
2733 struct xe_user_fence * const f = vma->ufence;
2734
2735 if (!xe_sync_ufence_get_status(f))
2736 return -EBUSY;
2737
2738 vma->ufence = NULL;
2739 xe_sync_ufence_put(f);
2740 }
2741
2742 return 0;
2743 }
2744
op_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_op * op)2745 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
2746 struct xe_vma_op *op)
2747 {
2748 int err = 0;
2749
2750 switch (op->base.op) {
2751 case DRM_GPUVA_OP_MAP:
2752 if (!op->map.invalidate_on_bind)
2753 err = vma_lock_and_validate(exec, op->map.vma,
2754 !xe_vm_in_fault_mode(vm) ||
2755 op->map.immediate);
2756 break;
2757 case DRM_GPUVA_OP_REMAP:
2758 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
2759 if (err)
2760 break;
2761
2762 err = vma_lock_and_validate(exec,
2763 gpuva_to_vma(op->base.remap.unmap->va),
2764 false);
2765 if (!err && op->remap.prev)
2766 err = vma_lock_and_validate(exec, op->remap.prev, true);
2767 if (!err && op->remap.next)
2768 err = vma_lock_and_validate(exec, op->remap.next, true);
2769 break;
2770 case DRM_GPUVA_OP_UNMAP:
2771 err = check_ufence(gpuva_to_vma(op->base.unmap.va));
2772 if (err)
2773 break;
2774
2775 err = vma_lock_and_validate(exec,
2776 gpuva_to_vma(op->base.unmap.va),
2777 false);
2778 break;
2779 case DRM_GPUVA_OP_PREFETCH:
2780 {
2781 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2782 u32 region = op->prefetch.region;
2783
2784 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
2785
2786 err = vma_lock_and_validate(exec,
2787 gpuva_to_vma(op->base.prefetch.va),
2788 false);
2789 if (!err && !xe_vma_has_no_bo(vma))
2790 err = xe_bo_migrate(xe_vma_bo(vma),
2791 region_to_mem_type[region]);
2792 break;
2793 }
2794 default:
2795 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2796 }
2797
2798 return err;
2799 }
2800
vm_bind_ioctl_ops_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops)2801 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
2802 struct xe_vm *vm,
2803 struct xe_vma_ops *vops)
2804 {
2805 struct xe_vma_op *op;
2806 int err;
2807
2808 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
2809 if (err)
2810 return err;
2811
2812 list_for_each_entry(op, &vops->list, link) {
2813 err = op_lock_and_prep(exec, vm, op);
2814 if (err)
2815 return err;
2816 }
2817
2818 #ifdef TEST_VM_OPS_ERROR
2819 if (vops->inject_error &&
2820 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
2821 return -ENOSPC;
2822 #endif
2823
2824 return 0;
2825 }
2826
op_trace(struct xe_vma_op * op)2827 static void op_trace(struct xe_vma_op *op)
2828 {
2829 switch (op->base.op) {
2830 case DRM_GPUVA_OP_MAP:
2831 trace_xe_vma_bind(op->map.vma);
2832 break;
2833 case DRM_GPUVA_OP_REMAP:
2834 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
2835 if (op->remap.prev)
2836 trace_xe_vma_bind(op->remap.prev);
2837 if (op->remap.next)
2838 trace_xe_vma_bind(op->remap.next);
2839 break;
2840 case DRM_GPUVA_OP_UNMAP:
2841 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
2842 break;
2843 case DRM_GPUVA_OP_PREFETCH:
2844 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
2845 break;
2846 case DRM_GPUVA_OP_DRIVER:
2847 break;
2848 default:
2849 XE_WARN_ON("NOT POSSIBLE");
2850 }
2851 }
2852
trace_xe_vm_ops_execute(struct xe_vma_ops * vops)2853 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
2854 {
2855 struct xe_vma_op *op;
2856
2857 list_for_each_entry(op, &vops->list, link)
2858 op_trace(op);
2859 }
2860
vm_ops_setup_tile_args(struct xe_vm * vm,struct xe_vma_ops * vops)2861 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
2862 {
2863 struct xe_exec_queue *q = vops->q;
2864 struct xe_tile *tile;
2865 int number_tiles = 0;
2866 u8 id;
2867
2868 for_each_tile(tile, vm->xe, id) {
2869 if (vops->pt_update_ops[id].num_ops)
2870 ++number_tiles;
2871
2872 if (vops->pt_update_ops[id].q)
2873 continue;
2874
2875 if (q) {
2876 vops->pt_update_ops[id].q = q;
2877 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
2878 q = list_next_entry(q, multi_gt_list);
2879 } else {
2880 vops->pt_update_ops[id].q = vm->q[id];
2881 }
2882 }
2883
2884 return number_tiles;
2885 }
2886
ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)2887 static struct dma_fence *ops_execute(struct xe_vm *vm,
2888 struct xe_vma_ops *vops)
2889 {
2890 struct xe_tile *tile;
2891 struct dma_fence *fence = NULL;
2892 struct dma_fence **fences = NULL;
2893 struct dma_fence_array *cf = NULL;
2894 int number_tiles = 0, current_fence = 0, err;
2895 u8 id;
2896
2897 number_tiles = vm_ops_setup_tile_args(vm, vops);
2898 if (number_tiles == 0)
2899 return ERR_PTR(-ENODATA);
2900
2901 if (number_tiles > 1) {
2902 fences = kmalloc_array(number_tiles, sizeof(*fences),
2903 GFP_KERNEL);
2904 if (!fences) {
2905 fence = ERR_PTR(-ENOMEM);
2906 goto err_trace;
2907 }
2908 }
2909
2910 for_each_tile(tile, vm->xe, id) {
2911 if (!vops->pt_update_ops[id].num_ops)
2912 continue;
2913
2914 err = xe_pt_update_ops_prepare(tile, vops);
2915 if (err) {
2916 fence = ERR_PTR(err);
2917 goto err_out;
2918 }
2919 }
2920
2921 trace_xe_vm_ops_execute(vops);
2922
2923 for_each_tile(tile, vm->xe, id) {
2924 if (!vops->pt_update_ops[id].num_ops)
2925 continue;
2926
2927 fence = xe_pt_update_ops_run(tile, vops);
2928 if (IS_ERR(fence))
2929 goto err_out;
2930
2931 if (fences)
2932 fences[current_fence++] = fence;
2933 }
2934
2935 if (fences) {
2936 cf = dma_fence_array_create(number_tiles, fences,
2937 vm->composite_fence_ctx,
2938 vm->composite_fence_seqno++,
2939 false);
2940 if (!cf) {
2941 --vm->composite_fence_seqno;
2942 fence = ERR_PTR(-ENOMEM);
2943 goto err_out;
2944 }
2945 fence = &cf->base;
2946 }
2947
2948 for_each_tile(tile, vm->xe, id) {
2949 if (!vops->pt_update_ops[id].num_ops)
2950 continue;
2951
2952 xe_pt_update_ops_fini(tile, vops);
2953 }
2954
2955 return fence;
2956
2957 err_out:
2958 for_each_tile(tile, vm->xe, id) {
2959 if (!vops->pt_update_ops[id].num_ops)
2960 continue;
2961
2962 xe_pt_update_ops_abort(tile, vops);
2963 }
2964 while (current_fence)
2965 dma_fence_put(fences[--current_fence]);
2966 kfree(fences);
2967 kfree(cf);
2968
2969 err_trace:
2970 trace_xe_vm_ops_fail(vm);
2971 return fence;
2972 }
2973
vma_add_ufence(struct xe_vma * vma,struct xe_user_fence * ufence)2974 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
2975 {
2976 if (vma->ufence)
2977 xe_sync_ufence_put(vma->ufence);
2978 vma->ufence = __xe_sync_ufence_get(ufence);
2979 }
2980
op_add_ufence(struct xe_vm * vm,struct xe_vma_op * op,struct xe_user_fence * ufence)2981 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
2982 struct xe_user_fence *ufence)
2983 {
2984 switch (op->base.op) {
2985 case DRM_GPUVA_OP_MAP:
2986 vma_add_ufence(op->map.vma, ufence);
2987 break;
2988 case DRM_GPUVA_OP_REMAP:
2989 if (op->remap.prev)
2990 vma_add_ufence(op->remap.prev, ufence);
2991 if (op->remap.next)
2992 vma_add_ufence(op->remap.next, ufence);
2993 break;
2994 case DRM_GPUVA_OP_UNMAP:
2995 break;
2996 case DRM_GPUVA_OP_PREFETCH:
2997 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
2998 break;
2999 default:
3000 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
3001 }
3002 }
3003
vm_bind_ioctl_ops_fini(struct xe_vm * vm,struct xe_vma_ops * vops,struct dma_fence * fence)3004 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
3005 struct dma_fence *fence)
3006 {
3007 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
3008 struct xe_user_fence *ufence;
3009 struct xe_vma_op *op;
3010 int i;
3011
3012 ufence = find_ufence_get(vops->syncs, vops->num_syncs);
3013 list_for_each_entry(op, &vops->list, link) {
3014 if (ufence)
3015 op_add_ufence(vm, op, ufence);
3016
3017 if (op->base.op == DRM_GPUVA_OP_UNMAP)
3018 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
3019 else if (op->base.op == DRM_GPUVA_OP_REMAP)
3020 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
3021 fence);
3022 }
3023 if (ufence)
3024 xe_sync_ufence_put(ufence);
3025 if (fence) {
3026 for (i = 0; i < vops->num_syncs; i++)
3027 xe_sync_entry_signal(vops->syncs + i, fence);
3028 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
3029 }
3030 }
3031
vm_bind_ioctl_ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3032 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
3033 struct xe_vma_ops *vops)
3034 {
3035 struct drm_exec exec;
3036 struct dma_fence *fence;
3037 int err;
3038
3039 lockdep_assert_held_write(&vm->lock);
3040
3041 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
3042 DRM_EXEC_IGNORE_DUPLICATES, 0);
3043 drm_exec_until_all_locked(&exec) {
3044 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
3045 drm_exec_retry_on_contention(&exec);
3046 if (err) {
3047 fence = ERR_PTR(err);
3048 goto unlock;
3049 }
3050
3051 fence = ops_execute(vm, vops);
3052 if (IS_ERR(fence)) {
3053 if (PTR_ERR(fence) == -ENODATA)
3054 vm_bind_ioctl_ops_fini(vm, vops, NULL);
3055 goto unlock;
3056 }
3057
3058 vm_bind_ioctl_ops_fini(vm, vops, fence);
3059 }
3060
3061 unlock:
3062 drm_exec_fini(&exec);
3063 return fence;
3064 }
3065 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
3066
3067 #define SUPPORTED_FLAGS_STUB \
3068 (DRM_XE_VM_BIND_FLAG_READONLY | \
3069 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
3070 DRM_XE_VM_BIND_FLAG_NULL | \
3071 DRM_XE_VM_BIND_FLAG_DUMPABLE | \
3072 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \
3073 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR)
3074
3075 #ifdef TEST_VM_OPS_ERROR
3076 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
3077 #else
3078 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB
3079 #endif
3080
3081 #define XE_64K_PAGE_MASK 0xffffull
3082 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
3083
vm_bind_ioctl_check_args(struct xe_device * xe,struct xe_vm * vm,struct drm_xe_vm_bind * args,struct drm_xe_vm_bind_op ** bind_ops)3084 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
3085 struct drm_xe_vm_bind *args,
3086 struct drm_xe_vm_bind_op **bind_ops)
3087 {
3088 int err;
3089 int i;
3090
3091 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
3092 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3093 return -EINVAL;
3094
3095 if (XE_IOCTL_DBG(xe, args->extensions))
3096 return -EINVAL;
3097
3098 if (args->num_binds > 1) {
3099 u64 __user *bind_user =
3100 u64_to_user_ptr(args->vector_of_binds);
3101
3102 *bind_ops = kvmalloc_array(args->num_binds,
3103 sizeof(struct drm_xe_vm_bind_op),
3104 GFP_KERNEL | __GFP_ACCOUNT |
3105 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3106 if (!*bind_ops)
3107 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
3108
3109 err = copy_from_user(*bind_ops, bind_user,
3110 sizeof(struct drm_xe_vm_bind_op) *
3111 args->num_binds);
3112 if (XE_IOCTL_DBG(xe, err)) {
3113 err = -EFAULT;
3114 goto free_bind_ops;
3115 }
3116 } else {
3117 *bind_ops = &args->bind;
3118 }
3119
3120 for (i = 0; i < args->num_binds; ++i) {
3121 u64 range = (*bind_ops)[i].range;
3122 u64 addr = (*bind_ops)[i].addr;
3123 u32 op = (*bind_ops)[i].op;
3124 u32 flags = (*bind_ops)[i].flags;
3125 u32 obj = (*bind_ops)[i].obj;
3126 u64 obj_offset = (*bind_ops)[i].obj_offset;
3127 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
3128 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
3129 bool is_cpu_addr_mirror = flags &
3130 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
3131 u16 pat_index = (*bind_ops)[i].pat_index;
3132 u16 coh_mode;
3133
3134 if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror &&
3135 (!xe_vm_in_fault_mode(vm) ||
3136 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) {
3137 err = -EINVAL;
3138 goto free_bind_ops;
3139 }
3140
3141 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
3142 err = -EINVAL;
3143 goto free_bind_ops;
3144 }
3145
3146 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
3147 (*bind_ops)[i].pat_index = pat_index;
3148 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3149 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
3150 err = -EINVAL;
3151 goto free_bind_ops;
3152 }
3153
3154 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
3155 err = -EINVAL;
3156 goto free_bind_ops;
3157 }
3158
3159 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
3160 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
3161 XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) ||
3162 XE_IOCTL_DBG(xe, obj_offset && (is_null ||
3163 is_cpu_addr_mirror)) ||
3164 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
3165 (is_null || is_cpu_addr_mirror)) ||
3166 XE_IOCTL_DBG(xe, !obj &&
3167 op == DRM_XE_VM_BIND_OP_MAP &&
3168 !is_null && !is_cpu_addr_mirror) ||
3169 XE_IOCTL_DBG(xe, !obj &&
3170 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3171 XE_IOCTL_DBG(xe, addr &&
3172 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3173 XE_IOCTL_DBG(xe, range &&
3174 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3175 XE_IOCTL_DBG(xe, obj &&
3176 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3177 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3178 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3179 XE_IOCTL_DBG(xe, obj &&
3180 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
3181 XE_IOCTL_DBG(xe, prefetch_region &&
3182 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
3183 XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
3184 xe->info.mem_region_mask)) ||
3185 XE_IOCTL_DBG(xe, obj &&
3186 op == DRM_XE_VM_BIND_OP_UNMAP)) {
3187 err = -EINVAL;
3188 goto free_bind_ops;
3189 }
3190
3191 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
3192 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
3193 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
3194 XE_IOCTL_DBG(xe, !range &&
3195 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
3196 err = -EINVAL;
3197 goto free_bind_ops;
3198 }
3199 }
3200
3201 return 0;
3202
3203 free_bind_ops:
3204 if (args->num_binds > 1)
3205 kvfree(*bind_ops);
3206 return err;
3207 }
3208
vm_bind_ioctl_signal_fences(struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,int num_syncs)3209 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
3210 struct xe_exec_queue *q,
3211 struct xe_sync_entry *syncs,
3212 int num_syncs)
3213 {
3214 struct dma_fence *fence;
3215 int i, err = 0;
3216
3217 fence = xe_sync_in_fence_get(syncs, num_syncs,
3218 to_wait_exec_queue(vm, q), vm);
3219 if (IS_ERR(fence))
3220 return PTR_ERR(fence);
3221
3222 for (i = 0; i < num_syncs; i++)
3223 xe_sync_entry_signal(&syncs[i], fence);
3224
3225 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
3226 fence);
3227 dma_fence_put(fence);
3228
3229 return err;
3230 }
3231
xe_vma_ops_init(struct xe_vma_ops * vops,struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,u32 num_syncs)3232 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
3233 struct xe_exec_queue *q,
3234 struct xe_sync_entry *syncs, u32 num_syncs)
3235 {
3236 memset(vops, 0, sizeof(*vops));
3237 INIT_LIST_HEAD(&vops->list);
3238 vops->vm = vm;
3239 vops->q = q;
3240 vops->syncs = syncs;
3241 vops->num_syncs = num_syncs;
3242 }
3243
xe_vm_bind_ioctl_validate_bo(struct xe_device * xe,struct xe_bo * bo,u64 addr,u64 range,u64 obj_offset,u16 pat_index,u32 op,u32 bind_flags)3244 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
3245 u64 addr, u64 range, u64 obj_offset,
3246 u16 pat_index, u32 op, u32 bind_flags)
3247 {
3248 u16 coh_mode;
3249
3250 if (XE_IOCTL_DBG(xe, range > bo->size) ||
3251 XE_IOCTL_DBG(xe, obj_offset >
3252 bo->size - range)) {
3253 return -EINVAL;
3254 }
3255
3256 /*
3257 * Some platforms require 64k VM_BIND alignment,
3258 * specifically those with XE_VRAM_FLAGS_NEED64K.
3259 *
3260 * Other platforms may have BO's set to 64k physical placement,
3261 * but can be mapped at 4k offsets anyway. This check is only
3262 * there for the former case.
3263 */
3264 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
3265 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
3266 if (XE_IOCTL_DBG(xe, obj_offset &
3267 XE_64K_PAGE_MASK) ||
3268 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
3269 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3270 return -EINVAL;
3271 }
3272 }
3273
3274 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3275 if (bo->cpu_caching) {
3276 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3277 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
3278 return -EINVAL;
3279 }
3280 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
3281 /*
3282 * Imported dma-buf from a different device should
3283 * require 1way or 2way coherency since we don't know
3284 * how it was mapped on the CPU. Just assume is it
3285 * potentially cached on CPU side.
3286 */
3287 return -EINVAL;
3288 }
3289
3290 /* If a BO is protected it can only be mapped if the key is still valid */
3291 if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) &&
3292 op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL)
3293 if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0))
3294 return -ENOEXEC;
3295
3296 return 0;
3297 }
3298
xe_vm_bind_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3299 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3300 {
3301 struct xe_device *xe = to_xe_device(dev);
3302 struct xe_file *xef = to_xe_file(file);
3303 struct drm_xe_vm_bind *args = data;
3304 struct drm_xe_sync __user *syncs_user;
3305 struct xe_bo **bos = NULL;
3306 struct drm_gpuva_ops **ops = NULL;
3307 struct xe_vm *vm;
3308 struct xe_exec_queue *q = NULL;
3309 u32 num_syncs, num_ufence = 0;
3310 struct xe_sync_entry *syncs = NULL;
3311 struct drm_xe_vm_bind_op *bind_ops;
3312 struct xe_vma_ops vops;
3313 struct dma_fence *fence;
3314 int err;
3315 int i;
3316
3317 vm = xe_vm_lookup(xef, args->vm_id);
3318 if (XE_IOCTL_DBG(xe, !vm))
3319 return -EINVAL;
3320
3321 err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops);
3322 if (err)
3323 goto put_vm;
3324
3325 if (args->exec_queue_id) {
3326 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
3327 if (XE_IOCTL_DBG(xe, !q)) {
3328 err = -ENOENT;
3329 goto put_vm;
3330 }
3331
3332 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3333 err = -EINVAL;
3334 goto put_exec_queue;
3335 }
3336 }
3337
3338 /* Ensure all UNMAPs visible */
3339 xe_svm_flush(vm);
3340
3341 err = down_write_killable(&vm->lock);
3342 if (err)
3343 goto put_exec_queue;
3344
3345 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3346 err = -ENOENT;
3347 goto release_vm_lock;
3348 }
3349
3350 for (i = 0; i < args->num_binds; ++i) {
3351 u64 range = bind_ops[i].range;
3352 u64 addr = bind_ops[i].addr;
3353
3354 if (XE_IOCTL_DBG(xe, range > vm->size) ||
3355 XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3356 err = -EINVAL;
3357 goto release_vm_lock;
3358 }
3359 }
3360
3361 if (args->num_binds) {
3362 bos = kvcalloc(args->num_binds, sizeof(*bos),
3363 GFP_KERNEL | __GFP_ACCOUNT |
3364 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3365 if (!bos) {
3366 err = -ENOMEM;
3367 goto release_vm_lock;
3368 }
3369
3370 ops = kvcalloc(args->num_binds, sizeof(*ops),
3371 GFP_KERNEL | __GFP_ACCOUNT |
3372 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3373 if (!ops) {
3374 err = -ENOMEM;
3375 goto release_vm_lock;
3376 }
3377 }
3378
3379 for (i = 0; i < args->num_binds; ++i) {
3380 struct drm_gem_object *gem_obj;
3381 u64 range = bind_ops[i].range;
3382 u64 addr = bind_ops[i].addr;
3383 u32 obj = bind_ops[i].obj;
3384 u64 obj_offset = bind_ops[i].obj_offset;
3385 u16 pat_index = bind_ops[i].pat_index;
3386 u32 op = bind_ops[i].op;
3387 u32 bind_flags = bind_ops[i].flags;
3388
3389 if (!obj)
3390 continue;
3391
3392 gem_obj = drm_gem_object_lookup(file, obj);
3393 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3394 err = -ENOENT;
3395 goto put_obj;
3396 }
3397 bos[i] = gem_to_xe_bo(gem_obj);
3398
3399 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
3400 obj_offset, pat_index, op,
3401 bind_flags);
3402 if (err)
3403 goto put_obj;
3404 }
3405
3406 if (args->num_syncs) {
3407 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3408 if (!syncs) {
3409 err = -ENOMEM;
3410 goto put_obj;
3411 }
3412 }
3413
3414 syncs_user = u64_to_user_ptr(args->syncs);
3415 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3416 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3417 &syncs_user[num_syncs],
3418 (xe_vm_in_lr_mode(vm) ?
3419 SYNC_PARSE_FLAG_LR_MODE : 0) |
3420 (!args->num_binds ?
3421 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3422 if (err)
3423 goto free_syncs;
3424
3425 if (xe_sync_is_ufence(&syncs[num_syncs]))
3426 num_ufence++;
3427 }
3428
3429 if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3430 err = -EINVAL;
3431 goto free_syncs;
3432 }
3433
3434 if (!args->num_binds) {
3435 err = -ENODATA;
3436 goto free_syncs;
3437 }
3438
3439 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
3440 for (i = 0; i < args->num_binds; ++i) {
3441 u64 range = bind_ops[i].range;
3442 u64 addr = bind_ops[i].addr;
3443 u32 op = bind_ops[i].op;
3444 u32 flags = bind_ops[i].flags;
3445 u64 obj_offset = bind_ops[i].obj_offset;
3446 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3447 u16 pat_index = bind_ops[i].pat_index;
3448
3449 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
3450 addr, range, op, flags,
3451 prefetch_region, pat_index);
3452 if (IS_ERR(ops[i])) {
3453 err = PTR_ERR(ops[i]);
3454 ops[i] = NULL;
3455 goto unwind_ops;
3456 }
3457
3458 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
3459 if (err)
3460 goto unwind_ops;
3461
3462 #ifdef TEST_VM_OPS_ERROR
3463 if (flags & FORCE_OP_ERROR) {
3464 vops.inject_error = true;
3465 vm->xe->vm_inject_error_position =
3466 (vm->xe->vm_inject_error_position + 1) %
3467 FORCE_OP_ERROR_COUNT;
3468 }
3469 #endif
3470 }
3471
3472 /* Nothing to do */
3473 if (list_empty(&vops.list)) {
3474 err = -ENODATA;
3475 goto unwind_ops;
3476 }
3477
3478 err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
3479 if (err)
3480 goto unwind_ops;
3481
3482 fence = vm_bind_ioctl_ops_execute(vm, &vops);
3483 if (IS_ERR(fence))
3484 err = PTR_ERR(fence);
3485 else
3486 dma_fence_put(fence);
3487
3488 unwind_ops:
3489 if (err && err != -ENODATA)
3490 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3491 xe_vma_ops_fini(&vops);
3492 for (i = args->num_binds - 1; i >= 0; --i)
3493 if (ops[i])
3494 drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
3495 free_syncs:
3496 if (err == -ENODATA)
3497 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3498 while (num_syncs--)
3499 xe_sync_entry_cleanup(&syncs[num_syncs]);
3500
3501 kfree(syncs);
3502 put_obj:
3503 for (i = 0; i < args->num_binds; ++i)
3504 xe_bo_put(bos[i]);
3505 release_vm_lock:
3506 up_write(&vm->lock);
3507 put_exec_queue:
3508 if (q)
3509 xe_exec_queue_put(q);
3510 put_vm:
3511 xe_vm_put(vm);
3512 kvfree(bos);
3513 kvfree(ops);
3514 if (args->num_binds > 1)
3515 kvfree(bind_ops);
3516 return err;
3517 }
3518
3519 /**
3520 * xe_vm_bind_kernel_bo - bind a kernel BO to a VM
3521 * @vm: VM to bind the BO to
3522 * @bo: BO to bind
3523 * @q: exec queue to use for the bind (optional)
3524 * @addr: address at which to bind the BO
3525 * @cache_lvl: PAT cache level to use
3526 *
3527 * Execute a VM bind map operation on a kernel-owned BO to bind it into a
3528 * kernel-owned VM.
3529 *
3530 * Returns a dma_fence to track the binding completion if the job to do so was
3531 * successfully submitted, an error pointer otherwise.
3532 */
xe_vm_bind_kernel_bo(struct xe_vm * vm,struct xe_bo * bo,struct xe_exec_queue * q,u64 addr,enum xe_cache_level cache_lvl)3533 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
3534 struct xe_exec_queue *q, u64 addr,
3535 enum xe_cache_level cache_lvl)
3536 {
3537 struct xe_vma_ops vops;
3538 struct drm_gpuva_ops *ops = NULL;
3539 struct dma_fence *fence;
3540 int err;
3541
3542 xe_bo_get(bo);
3543 xe_vm_get(vm);
3544 if (q)
3545 xe_exec_queue_get(q);
3546
3547 down_write(&vm->lock);
3548
3549 xe_vma_ops_init(&vops, vm, q, NULL, 0);
3550
3551 ops = vm_bind_ioctl_ops_create(vm, bo, 0, addr, bo->size,
3552 DRM_XE_VM_BIND_OP_MAP, 0, 0,
3553 vm->xe->pat.idx[cache_lvl]);
3554 if (IS_ERR(ops)) {
3555 err = PTR_ERR(ops);
3556 goto release_vm_lock;
3557 }
3558
3559 err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
3560 if (err)
3561 goto release_vm_lock;
3562
3563 xe_assert(vm->xe, !list_empty(&vops.list));
3564
3565 err = xe_vma_ops_alloc(&vops, false);
3566 if (err)
3567 goto unwind_ops;
3568
3569 fence = vm_bind_ioctl_ops_execute(vm, &vops);
3570 if (IS_ERR(fence))
3571 err = PTR_ERR(fence);
3572
3573 unwind_ops:
3574 if (err && err != -ENODATA)
3575 vm_bind_ioctl_ops_unwind(vm, &ops, 1);
3576
3577 xe_vma_ops_fini(&vops);
3578 drm_gpuva_ops_free(&vm->gpuvm, ops);
3579
3580 release_vm_lock:
3581 up_write(&vm->lock);
3582
3583 if (q)
3584 xe_exec_queue_put(q);
3585 xe_vm_put(vm);
3586 xe_bo_put(bo);
3587
3588 if (err)
3589 fence = ERR_PTR(err);
3590
3591 return fence;
3592 }
3593
3594 /**
3595 * xe_vm_lock() - Lock the vm's dma_resv object
3596 * @vm: The struct xe_vm whose lock is to be locked
3597 * @intr: Whether to perform any wait interruptible
3598 *
3599 * Return: 0 on success, -EINTR if @intr is true and the wait for a
3600 * contended lock was interrupted. If @intr is false, the function
3601 * always returns 0.
3602 */
xe_vm_lock(struct xe_vm * vm,bool intr)3603 int xe_vm_lock(struct xe_vm *vm, bool intr)
3604 {
3605 if (intr)
3606 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3607
3608 return dma_resv_lock(xe_vm_resv(vm), NULL);
3609 }
3610
3611 /**
3612 * xe_vm_unlock() - Unlock the vm's dma_resv object
3613 * @vm: The struct xe_vm whose lock is to be released.
3614 *
3615 * Unlock a buffer object lock that was locked by xe_vm_lock().
3616 */
xe_vm_unlock(struct xe_vm * vm)3617 void xe_vm_unlock(struct xe_vm *vm)
3618 {
3619 dma_resv_unlock(xe_vm_resv(vm));
3620 }
3621
3622 /**
3623 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3624 * @vma: VMA to invalidate
3625 *
3626 * Walks a list of page tables leaves which it memset the entries owned by this
3627 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3628 * complete.
3629 *
3630 * Returns 0 for success, negative error code otherwise.
3631 */
xe_vm_invalidate_vma(struct xe_vma * vma)3632 int xe_vm_invalidate_vma(struct xe_vma *vma)
3633 {
3634 struct xe_device *xe = xe_vma_vm(vma)->xe;
3635 struct xe_tile *tile;
3636 struct xe_gt_tlb_invalidation_fence
3637 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
3638 u8 id;
3639 u32 fence_id = 0;
3640 int ret = 0;
3641
3642 xe_assert(xe, !xe_vma_is_null(vma));
3643 xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma));
3644 trace_xe_vma_invalidate(vma);
3645
3646 vm_dbg(&xe_vma_vm(vma)->xe->drm,
3647 "INVALIDATE: addr=0x%016llx, range=0x%016llx",
3648 xe_vma_start(vma), xe_vma_size(vma));
3649
3650 /* Check that we don't race with page-table updates */
3651 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3652 if (xe_vma_is_userptr(vma)) {
3653 WARN_ON_ONCE(!mmu_interval_check_retry
3654 (&to_userptr_vma(vma)->userptr.notifier,
3655 to_userptr_vma(vma)->userptr.notifier_seq));
3656 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
3657 DMA_RESV_USAGE_BOOKKEEP));
3658
3659 } else {
3660 xe_bo_assert_held(xe_vma_bo(vma));
3661 }
3662 }
3663
3664 for_each_tile(tile, xe, id) {
3665 if (xe_pt_zap_ptes(tile, vma)) {
3666 xe_device_wmb(xe);
3667 xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
3668 &fence[fence_id],
3669 true);
3670
3671 ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
3672 &fence[fence_id], vma);
3673 if (ret)
3674 goto wait;
3675 ++fence_id;
3676
3677 if (!tile->media_gt)
3678 continue;
3679
3680 xe_gt_tlb_invalidation_fence_init(tile->media_gt,
3681 &fence[fence_id],
3682 true);
3683
3684 ret = xe_gt_tlb_invalidation_vma(tile->media_gt,
3685 &fence[fence_id], vma);
3686 if (ret)
3687 goto wait;
3688 ++fence_id;
3689 }
3690 }
3691
3692 wait:
3693 for (id = 0; id < fence_id; ++id)
3694 xe_gt_tlb_invalidation_fence_wait(&fence[id]);
3695
3696 vma->tile_invalidated = vma->tile_mask;
3697
3698 return ret;
3699 }
3700
xe_vm_validate_protected(struct xe_vm * vm)3701 int xe_vm_validate_protected(struct xe_vm *vm)
3702 {
3703 struct drm_gpuva *gpuva;
3704 int err = 0;
3705
3706 if (!vm)
3707 return -ENODEV;
3708
3709 mutex_lock(&vm->snap_mutex);
3710
3711 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3712 struct xe_vma *vma = gpuva_to_vma(gpuva);
3713 struct xe_bo *bo = vma->gpuva.gem.obj ?
3714 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
3715
3716 if (!bo)
3717 continue;
3718
3719 if (xe_bo_is_protected(bo)) {
3720 err = xe_pxp_bo_key_check(vm->xe->pxp, bo);
3721 if (err)
3722 break;
3723 }
3724 }
3725
3726 mutex_unlock(&vm->snap_mutex);
3727 return err;
3728 }
3729
3730 struct xe_vm_snapshot {
3731 unsigned long num_snaps;
3732 struct {
3733 u64 ofs, bo_ofs;
3734 unsigned long len;
3735 struct xe_bo *bo;
3736 void *data;
3737 struct mm_struct *mm;
3738 } snap[];
3739 };
3740
xe_vm_snapshot_capture(struct xe_vm * vm)3741 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
3742 {
3743 unsigned long num_snaps = 0, i;
3744 struct xe_vm_snapshot *snap = NULL;
3745 struct drm_gpuva *gpuva;
3746
3747 if (!vm)
3748 return NULL;
3749
3750 mutex_lock(&vm->snap_mutex);
3751 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3752 if (gpuva->flags & XE_VMA_DUMPABLE)
3753 num_snaps++;
3754 }
3755
3756 if (num_snaps)
3757 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
3758 if (!snap) {
3759 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
3760 goto out_unlock;
3761 }
3762
3763 snap->num_snaps = num_snaps;
3764 i = 0;
3765 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3766 struct xe_vma *vma = gpuva_to_vma(gpuva);
3767 struct xe_bo *bo = vma->gpuva.gem.obj ?
3768 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
3769
3770 if (!(gpuva->flags & XE_VMA_DUMPABLE))
3771 continue;
3772
3773 snap->snap[i].ofs = xe_vma_start(vma);
3774 snap->snap[i].len = xe_vma_size(vma);
3775 if (bo) {
3776 snap->snap[i].bo = xe_bo_get(bo);
3777 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
3778 } else if (xe_vma_is_userptr(vma)) {
3779 struct mm_struct *mm =
3780 to_userptr_vma(vma)->userptr.notifier.mm;
3781
3782 if (mmget_not_zero(mm))
3783 snap->snap[i].mm = mm;
3784 else
3785 snap->snap[i].data = ERR_PTR(-EFAULT);
3786
3787 snap->snap[i].bo_ofs = xe_vma_userptr(vma);
3788 } else {
3789 snap->snap[i].data = ERR_PTR(-ENOENT);
3790 }
3791 i++;
3792 }
3793
3794 out_unlock:
3795 mutex_unlock(&vm->snap_mutex);
3796 return snap;
3797 }
3798
xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot * snap)3799 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
3800 {
3801 if (IS_ERR_OR_NULL(snap))
3802 return;
3803
3804 for (int i = 0; i < snap->num_snaps; i++) {
3805 struct xe_bo *bo = snap->snap[i].bo;
3806 int err;
3807
3808 if (IS_ERR(snap->snap[i].data))
3809 continue;
3810
3811 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
3812 if (!snap->snap[i].data) {
3813 snap->snap[i].data = ERR_PTR(-ENOMEM);
3814 goto cleanup_bo;
3815 }
3816
3817 if (bo) {
3818 err = xe_bo_read(bo, snap->snap[i].bo_ofs,
3819 snap->snap[i].data, snap->snap[i].len);
3820 } else {
3821 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
3822
3823 kthread_use_mm(snap->snap[i].mm);
3824 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
3825 err = 0;
3826 else
3827 err = -EFAULT;
3828 kthread_unuse_mm(snap->snap[i].mm);
3829
3830 mmput(snap->snap[i].mm);
3831 snap->snap[i].mm = NULL;
3832 }
3833
3834 if (err) {
3835 kvfree(snap->snap[i].data);
3836 snap->snap[i].data = ERR_PTR(err);
3837 }
3838
3839 cleanup_bo:
3840 xe_bo_put(bo);
3841 snap->snap[i].bo = NULL;
3842 }
3843 }
3844
xe_vm_snapshot_print(struct xe_vm_snapshot * snap,struct drm_printer * p)3845 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
3846 {
3847 unsigned long i, j;
3848
3849 if (IS_ERR_OR_NULL(snap)) {
3850 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
3851 return;
3852 }
3853
3854 for (i = 0; i < snap->num_snaps; i++) {
3855 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
3856
3857 if (IS_ERR(snap->snap[i].data)) {
3858 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
3859 PTR_ERR(snap->snap[i].data));
3860 continue;
3861 }
3862
3863 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
3864
3865 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
3866 u32 *val = snap->snap[i].data + j;
3867 char dumped[ASCII85_BUFSZ];
3868
3869 drm_puts(p, ascii85_encode(*val, dumped));
3870 }
3871
3872 drm_puts(p, "\n");
3873
3874 if (drm_coredump_printer_is_full(p))
3875 return;
3876 }
3877 }
3878
xe_vm_snapshot_free(struct xe_vm_snapshot * snap)3879 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
3880 {
3881 unsigned long i;
3882
3883 if (IS_ERR_OR_NULL(snap))
3884 return;
3885
3886 for (i = 0; i < snap->num_snaps; i++) {
3887 if (!IS_ERR(snap->snap[i].data))
3888 kvfree(snap->snap[i].data);
3889 xe_bo_put(snap->snap[i].bo);
3890 if (snap->snap[i].mm)
3891 mmput(snap->snap[i].mm);
3892 }
3893 kvfree(snap);
3894 }
3895