1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10
11 #include <drm/drm_exec.h>
12 #include <drm/drm_print.h>
13 #include <drm/ttm/ttm_tt.h>
14 #include <uapi/drm/xe_drm.h>
15 #include <linux/ascii85.h>
16 #include <linux/delay.h>
17 #include <linux/kthread.h>
18 #include <linux/mm.h>
19 #include <linux/swap.h>
20
21 #include <generated/xe_wa_oob.h>
22
23 #include "regs/xe_gtt_defs.h"
24 #include "xe_assert.h"
25 #include "xe_bo.h"
26 #include "xe_device.h"
27 #include "xe_drm_client.h"
28 #include "xe_exec_queue.h"
29 #include "xe_gt_pagefault.h"
30 #include "xe_gt_tlb_invalidation.h"
31 #include "xe_migrate.h"
32 #include "xe_pat.h"
33 #include "xe_pm.h"
34 #include "xe_preempt_fence.h"
35 #include "xe_pt.h"
36 #include "xe_res_cursor.h"
37 #include "xe_sync.h"
38 #include "xe_trace_bo.h"
39 #include "xe_wa.h"
40 #include "xe_hmm.h"
41
xe_vm_obj(struct xe_vm * vm)42 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
43 {
44 return vm->gpuvm.r_obj;
45 }
46
47 /**
48 * xe_vma_userptr_check_repin() - Advisory check for repin needed
49 * @uvma: The userptr vma
50 *
51 * Check if the userptr vma has been invalidated since last successful
52 * repin. The check is advisory only and can the function can be called
53 * without the vm->userptr.notifier_lock held. There is no guarantee that the
54 * vma userptr will remain valid after a lockless check, so typically
55 * the call needs to be followed by a proper check under the notifier_lock.
56 *
57 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
58 */
xe_vma_userptr_check_repin(struct xe_userptr_vma * uvma)59 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
60 {
61 return mmu_interval_check_retry(&uvma->userptr.notifier,
62 uvma->userptr.notifier_seq) ?
63 -EAGAIN : 0;
64 }
65
xe_vma_userptr_pin_pages(struct xe_userptr_vma * uvma)66 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
67 {
68 struct xe_vma *vma = &uvma->vma;
69 struct xe_vm *vm = xe_vma_vm(vma);
70 struct xe_device *xe = vm->xe;
71
72 lockdep_assert_held(&vm->lock);
73 xe_assert(xe, xe_vma_is_userptr(vma));
74
75 return xe_hmm_userptr_populate_range(uvma, false);
76 }
77
preempt_fences_waiting(struct xe_vm * vm)78 static bool preempt_fences_waiting(struct xe_vm *vm)
79 {
80 struct xe_exec_queue *q;
81
82 lockdep_assert_held(&vm->lock);
83 xe_vm_assert_held(vm);
84
85 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
86 if (!q->lr.pfence ||
87 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
88 &q->lr.pfence->flags)) {
89 return true;
90 }
91 }
92
93 return false;
94 }
95
free_preempt_fences(struct list_head * list)96 static void free_preempt_fences(struct list_head *list)
97 {
98 struct list_head *link, *next;
99
100 list_for_each_safe(link, next, list)
101 xe_preempt_fence_free(to_preempt_fence_from_link(link));
102 }
103
alloc_preempt_fences(struct xe_vm * vm,struct list_head * list,unsigned int * count)104 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
105 unsigned int *count)
106 {
107 lockdep_assert_held(&vm->lock);
108 xe_vm_assert_held(vm);
109
110 if (*count >= vm->preempt.num_exec_queues)
111 return 0;
112
113 for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
114 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
115
116 if (IS_ERR(pfence))
117 return PTR_ERR(pfence);
118
119 list_move_tail(xe_preempt_fence_link(pfence), list);
120 }
121
122 return 0;
123 }
124
wait_for_existing_preempt_fences(struct xe_vm * vm)125 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
126 {
127 struct xe_exec_queue *q;
128
129 xe_vm_assert_held(vm);
130
131 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
132 if (q->lr.pfence) {
133 long timeout = dma_fence_wait(q->lr.pfence, false);
134
135 /* Only -ETIME on fence indicates VM needs to be killed */
136 if (timeout < 0 || q->lr.pfence->error == -ETIME)
137 return -ETIME;
138
139 dma_fence_put(q->lr.pfence);
140 q->lr.pfence = NULL;
141 }
142 }
143
144 return 0;
145 }
146
xe_vm_is_idle(struct xe_vm * vm)147 static bool xe_vm_is_idle(struct xe_vm *vm)
148 {
149 struct xe_exec_queue *q;
150
151 xe_vm_assert_held(vm);
152 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
153 if (!xe_exec_queue_is_idle(q))
154 return false;
155 }
156
157 return true;
158 }
159
arm_preempt_fences(struct xe_vm * vm,struct list_head * list)160 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
161 {
162 struct list_head *link;
163 struct xe_exec_queue *q;
164
165 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
166 struct dma_fence *fence;
167
168 link = list->next;
169 xe_assert(vm->xe, link != list);
170
171 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
172 q, q->lr.context,
173 ++q->lr.seqno);
174 dma_fence_put(q->lr.pfence);
175 q->lr.pfence = fence;
176 }
177 }
178
add_preempt_fences(struct xe_vm * vm,struct xe_bo * bo)179 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
180 {
181 struct xe_exec_queue *q;
182 int err;
183
184 xe_bo_assert_held(bo);
185
186 if (!vm->preempt.num_exec_queues)
187 return 0;
188
189 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
190 if (err)
191 return err;
192
193 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
194 if (q->lr.pfence) {
195 dma_resv_add_fence(bo->ttm.base.resv,
196 q->lr.pfence,
197 DMA_RESV_USAGE_BOOKKEEP);
198 }
199
200 return 0;
201 }
202
resume_and_reinstall_preempt_fences(struct xe_vm * vm,struct drm_exec * exec)203 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
204 struct drm_exec *exec)
205 {
206 struct xe_exec_queue *q;
207
208 lockdep_assert_held(&vm->lock);
209 xe_vm_assert_held(vm);
210
211 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
212 q->ops->resume(q);
213
214 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
215 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
216 }
217 }
218
xe_vm_add_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)219 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
220 {
221 struct drm_gpuvm_exec vm_exec = {
222 .vm = &vm->gpuvm,
223 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
224 .num_fences = 1,
225 };
226 struct drm_exec *exec = &vm_exec.exec;
227 struct dma_fence *pfence;
228 int err;
229 bool wait;
230
231 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
232
233 down_write(&vm->lock);
234 err = drm_gpuvm_exec_lock(&vm_exec);
235 if (err)
236 goto out_up_write;
237
238 pfence = xe_preempt_fence_create(q, q->lr.context,
239 ++q->lr.seqno);
240 if (!pfence) {
241 err = -ENOMEM;
242 goto out_fini;
243 }
244
245 list_add(&q->lr.link, &vm->preempt.exec_queues);
246 ++vm->preempt.num_exec_queues;
247 q->lr.pfence = pfence;
248
249 down_read(&vm->userptr.notifier_lock);
250
251 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
252 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
253
254 /*
255 * Check to see if a preemption on VM is in flight or userptr
256 * invalidation, if so trigger this preempt fence to sync state with
257 * other preempt fences on the VM.
258 */
259 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
260 if (wait)
261 dma_fence_enable_sw_signaling(pfence);
262
263 up_read(&vm->userptr.notifier_lock);
264
265 out_fini:
266 drm_exec_fini(exec);
267 out_up_write:
268 up_write(&vm->lock);
269
270 return err;
271 }
272
273 /**
274 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
275 * @vm: The VM.
276 * @q: The exec_queue
277 *
278 * Note that this function might be called multiple times on the same queue.
279 */
xe_vm_remove_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)280 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
281 {
282 if (!xe_vm_in_preempt_fence_mode(vm))
283 return;
284
285 down_write(&vm->lock);
286 if (!list_empty(&q->lr.link)) {
287 list_del_init(&q->lr.link);
288 --vm->preempt.num_exec_queues;
289 }
290 if (q->lr.pfence) {
291 dma_fence_enable_sw_signaling(q->lr.pfence);
292 dma_fence_put(q->lr.pfence);
293 q->lr.pfence = NULL;
294 }
295 up_write(&vm->lock);
296 }
297
298 /**
299 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
300 * that need repinning.
301 * @vm: The VM.
302 *
303 * This function checks for whether the VM has userptrs that need repinning,
304 * and provides a release-type barrier on the userptr.notifier_lock after
305 * checking.
306 *
307 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
308 */
__xe_vm_userptr_needs_repin(struct xe_vm * vm)309 int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
310 {
311 lockdep_assert_held_read(&vm->userptr.notifier_lock);
312
313 return (list_empty(&vm->userptr.repin_list) &&
314 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
315 }
316
317 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
318
319 /**
320 * xe_vm_kill() - VM Kill
321 * @vm: The VM.
322 * @unlocked: Flag indicates the VM's dma-resv is not held
323 *
324 * Kill the VM by setting banned flag indicated VM is no longer available for
325 * use. If in preempt fence mode, also kill all exec queue attached to the VM.
326 */
xe_vm_kill(struct xe_vm * vm,bool unlocked)327 void xe_vm_kill(struct xe_vm *vm, bool unlocked)
328 {
329 struct xe_exec_queue *q;
330
331 lockdep_assert_held(&vm->lock);
332
333 if (unlocked)
334 xe_vm_lock(vm, false);
335
336 vm->flags |= XE_VM_FLAG_BANNED;
337 trace_xe_vm_kill(vm);
338
339 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
340 q->ops->kill(q);
341
342 if (unlocked)
343 xe_vm_unlock(vm);
344
345 /* TODO: Inform user the VM is banned */
346 }
347
348 /**
349 * xe_vm_validate_should_retry() - Whether to retry after a validate error.
350 * @exec: The drm_exec object used for locking before validation.
351 * @err: The error returned from ttm_bo_validate().
352 * @end: A ktime_t cookie that should be set to 0 before first use and
353 * that should be reused on subsequent calls.
354 *
355 * With multiple active VMs, under memory pressure, it is possible that
356 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
357 * Until ttm properly handles locking in such scenarios, best thing the
358 * driver can do is retry with a timeout. Check if that is necessary, and
359 * if so unlock the drm_exec's objects while keeping the ticket to prepare
360 * for a rerun.
361 *
362 * Return: true if a retry after drm_exec_init() is recommended;
363 * false otherwise.
364 */
xe_vm_validate_should_retry(struct drm_exec * exec,int err,ktime_t * end)365 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
366 {
367 ktime_t cur;
368
369 if (err != -ENOMEM)
370 return false;
371
372 cur = ktime_get();
373 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
374 if (!ktime_before(cur, *end))
375 return false;
376
377 msleep(20);
378 return true;
379 }
380
xe_gpuvm_validate(struct drm_gpuvm_bo * vm_bo,struct drm_exec * exec)381 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
382 {
383 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
384 struct drm_gpuva *gpuva;
385 int ret;
386
387 lockdep_assert_held(&vm->lock);
388 drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
389 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
390 &vm->rebind_list);
391
392 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
393 if (ret)
394 return ret;
395
396 vm_bo->evicted = false;
397 return 0;
398 }
399
400 /**
401 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
402 * @vm: The vm for which we are rebinding.
403 * @exec: The struct drm_exec with the locked GEM objects.
404 * @num_fences: The number of fences to reserve for the operation, not
405 * including rebinds and validations.
406 *
407 * Validates all evicted gem objects and rebinds their vmas. Note that
408 * rebindings may cause evictions and hence the validation-rebind
409 * sequence is rerun until there are no more objects to validate.
410 *
411 * Return: 0 on success, negative error code on error. In particular,
412 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
413 * the drm_exec transaction needs to be restarted.
414 */
xe_vm_validate_rebind(struct xe_vm * vm,struct drm_exec * exec,unsigned int num_fences)415 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
416 unsigned int num_fences)
417 {
418 struct drm_gem_object *obj;
419 unsigned long index;
420 int ret;
421
422 do {
423 ret = drm_gpuvm_validate(&vm->gpuvm, exec);
424 if (ret)
425 return ret;
426
427 ret = xe_vm_rebind(vm, false);
428 if (ret)
429 return ret;
430 } while (!list_empty(&vm->gpuvm.evict.list));
431
432 drm_exec_for_each_locked_object(exec, index, obj) {
433 ret = dma_resv_reserve_fences(obj->resv, num_fences);
434 if (ret)
435 return ret;
436 }
437
438 return 0;
439 }
440
xe_preempt_work_begin(struct drm_exec * exec,struct xe_vm * vm,bool * done)441 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
442 bool *done)
443 {
444 int err;
445
446 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
447 if (err)
448 return err;
449
450 if (xe_vm_is_idle(vm)) {
451 vm->preempt.rebind_deactivated = true;
452 *done = true;
453 return 0;
454 }
455
456 if (!preempt_fences_waiting(vm)) {
457 *done = true;
458 return 0;
459 }
460
461 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
462 if (err)
463 return err;
464
465 err = wait_for_existing_preempt_fences(vm);
466 if (err)
467 return err;
468
469 /*
470 * Add validation and rebinding to the locking loop since both can
471 * cause evictions which may require blocing dma_resv locks.
472 * The fence reservation here is intended for the new preempt fences
473 * we attach at the end of the rebind work.
474 */
475 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
476 }
477
preempt_rebind_work_func(struct work_struct * w)478 static void preempt_rebind_work_func(struct work_struct *w)
479 {
480 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
481 struct drm_exec exec;
482 unsigned int fence_count = 0;
483 LIST_HEAD(preempt_fences);
484 ktime_t end = 0;
485 int err = 0;
486 long wait;
487 int __maybe_unused tries = 0;
488
489 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
490 trace_xe_vm_rebind_worker_enter(vm);
491
492 down_write(&vm->lock);
493
494 if (xe_vm_is_closed_or_banned(vm)) {
495 up_write(&vm->lock);
496 trace_xe_vm_rebind_worker_exit(vm);
497 return;
498 }
499
500 retry:
501 if (xe_vm_userptr_check_repin(vm)) {
502 err = xe_vm_userptr_pin(vm);
503 if (err)
504 goto out_unlock_outer;
505 }
506
507 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
508
509 drm_exec_until_all_locked(&exec) {
510 bool done = false;
511
512 err = xe_preempt_work_begin(&exec, vm, &done);
513 drm_exec_retry_on_contention(&exec);
514 if (err || done) {
515 drm_exec_fini(&exec);
516 if (err && xe_vm_validate_should_retry(&exec, err, &end))
517 err = -EAGAIN;
518
519 goto out_unlock_outer;
520 }
521 }
522
523 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
524 if (err)
525 goto out_unlock;
526
527 err = xe_vm_rebind(vm, true);
528 if (err)
529 goto out_unlock;
530
531 /* Wait on rebinds and munmap style VM unbinds */
532 wait = dma_resv_wait_timeout(xe_vm_resv(vm),
533 DMA_RESV_USAGE_KERNEL,
534 false, MAX_SCHEDULE_TIMEOUT);
535 if (wait <= 0) {
536 err = -ETIME;
537 goto out_unlock;
538 }
539
540 #define retry_required(__tries, __vm) \
541 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
542 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
543 __xe_vm_userptr_needs_repin(__vm))
544
545 down_read(&vm->userptr.notifier_lock);
546 if (retry_required(tries, vm)) {
547 up_read(&vm->userptr.notifier_lock);
548 err = -EAGAIN;
549 goto out_unlock;
550 }
551
552 #undef retry_required
553
554 spin_lock(&vm->xe->ttm.lru_lock);
555 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
556 spin_unlock(&vm->xe->ttm.lru_lock);
557
558 /* Point of no return. */
559 arm_preempt_fences(vm, &preempt_fences);
560 resume_and_reinstall_preempt_fences(vm, &exec);
561 up_read(&vm->userptr.notifier_lock);
562
563 out_unlock:
564 drm_exec_fini(&exec);
565 out_unlock_outer:
566 if (err == -EAGAIN) {
567 trace_xe_vm_rebind_worker_retry(vm);
568 goto retry;
569 }
570
571 if (err) {
572 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
573 xe_vm_kill(vm, true);
574 }
575 up_write(&vm->lock);
576
577 free_preempt_fences(&preempt_fences);
578
579 trace_xe_vm_rebind_worker_exit(vm);
580 }
581
__vma_userptr_invalidate(struct xe_vm * vm,struct xe_userptr_vma * uvma)582 static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma)
583 {
584 struct xe_userptr *userptr = &uvma->userptr;
585 struct xe_vma *vma = &uvma->vma;
586 struct dma_resv_iter cursor;
587 struct dma_fence *fence;
588 long err;
589
590 /*
591 * Tell exec and rebind worker they need to repin and rebind this
592 * userptr.
593 */
594 if (!xe_vm_in_fault_mode(vm) &&
595 !(vma->gpuva.flags & XE_VMA_DESTROYED)) {
596 spin_lock(&vm->userptr.invalidated_lock);
597 list_move_tail(&userptr->invalidate_link,
598 &vm->userptr.invalidated);
599 spin_unlock(&vm->userptr.invalidated_lock);
600 }
601
602 /*
603 * Preempt fences turn into schedule disables, pipeline these.
604 * Note that even in fault mode, we need to wait for binds and
605 * unbinds to complete, and those are attached as BOOKMARK fences
606 * to the vm.
607 */
608 dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
609 DMA_RESV_USAGE_BOOKKEEP);
610 dma_resv_for_each_fence_unlocked(&cursor, fence)
611 dma_fence_enable_sw_signaling(fence);
612 dma_resv_iter_end(&cursor);
613
614 err = dma_resv_wait_timeout(xe_vm_resv(vm),
615 DMA_RESV_USAGE_BOOKKEEP,
616 false, MAX_SCHEDULE_TIMEOUT);
617 XE_WARN_ON(err <= 0);
618
619 if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
620 err = xe_vm_invalidate_vma(vma);
621 XE_WARN_ON(err);
622 }
623
624 xe_hmm_userptr_unmap(uvma);
625 }
626
vma_userptr_invalidate(struct mmu_interval_notifier * mni,const struct mmu_notifier_range * range,unsigned long cur_seq)627 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
628 const struct mmu_notifier_range *range,
629 unsigned long cur_seq)
630 {
631 struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier);
632 struct xe_vma *vma = &uvma->vma;
633 struct xe_vm *vm = xe_vma_vm(vma);
634
635 xe_assert(vm->xe, xe_vma_is_userptr(vma));
636 trace_xe_vma_userptr_invalidate(vma);
637
638 if (!mmu_notifier_range_blockable(range))
639 return false;
640
641 vm_dbg(&xe_vma_vm(vma)->xe->drm,
642 "NOTIFIER: addr=0x%016llx, range=0x%016llx",
643 xe_vma_start(vma), xe_vma_size(vma));
644
645 down_write(&vm->userptr.notifier_lock);
646 mmu_interval_set_seq(mni, cur_seq);
647
648 __vma_userptr_invalidate(vm, uvma);
649 up_write(&vm->userptr.notifier_lock);
650 trace_xe_vma_userptr_invalidate_complete(vma);
651
652 return true;
653 }
654
655 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
656 .invalidate = vma_userptr_invalidate,
657 };
658
659 #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
660 /**
661 * xe_vma_userptr_force_invalidate() - force invalidate a userptr
662 * @uvma: The userptr vma to invalidate
663 *
664 * Perform a forced userptr invalidation for testing purposes.
665 */
xe_vma_userptr_force_invalidate(struct xe_userptr_vma * uvma)666 void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
667 {
668 struct xe_vm *vm = xe_vma_vm(&uvma->vma);
669
670 /* Protect against concurrent userptr pinning */
671 lockdep_assert_held(&vm->lock);
672 /* Protect against concurrent notifiers */
673 lockdep_assert_held(&vm->userptr.notifier_lock);
674 /*
675 * Protect against concurrent instances of this function and
676 * the critical exec sections
677 */
678 xe_vm_assert_held(vm);
679
680 if (!mmu_interval_read_retry(&uvma->userptr.notifier,
681 uvma->userptr.notifier_seq))
682 uvma->userptr.notifier_seq -= 2;
683 __vma_userptr_invalidate(vm, uvma);
684 }
685 #endif
686
xe_vm_userptr_pin(struct xe_vm * vm)687 int xe_vm_userptr_pin(struct xe_vm *vm)
688 {
689 struct xe_userptr_vma *uvma, *next;
690 int err = 0;
691 LIST_HEAD(tmp_evict);
692
693 xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
694 lockdep_assert_held_write(&vm->lock);
695
696 /* Collect invalidated userptrs */
697 spin_lock(&vm->userptr.invalidated_lock);
698 xe_assert(vm->xe, list_empty(&vm->userptr.repin_list));
699 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
700 userptr.invalidate_link) {
701 list_del_init(&uvma->userptr.invalidate_link);
702 list_add_tail(&uvma->userptr.repin_link,
703 &vm->userptr.repin_list);
704 }
705 spin_unlock(&vm->userptr.invalidated_lock);
706
707 /* Pin and move to bind list */
708 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
709 userptr.repin_link) {
710 err = xe_vma_userptr_pin_pages(uvma);
711 if (err == -EFAULT) {
712 list_del_init(&uvma->userptr.repin_link);
713 /*
714 * We might have already done the pin once already, but
715 * then had to retry before the re-bind happened, due
716 * some other condition in the caller, but in the
717 * meantime the userptr got dinged by the notifier such
718 * that we need to revalidate here, but this time we hit
719 * the EFAULT. In such a case make sure we remove
720 * ourselves from the rebind list to avoid going down in
721 * flames.
722 */
723 if (!list_empty(&uvma->vma.combined_links.rebind))
724 list_del_init(&uvma->vma.combined_links.rebind);
725
726 /* Wait for pending binds */
727 xe_vm_lock(vm, false);
728 dma_resv_wait_timeout(xe_vm_resv(vm),
729 DMA_RESV_USAGE_BOOKKEEP,
730 false, MAX_SCHEDULE_TIMEOUT);
731
732 err = xe_vm_invalidate_vma(&uvma->vma);
733 xe_vm_unlock(vm);
734 if (err)
735 break;
736 } else {
737 if (err)
738 break;
739
740 list_del_init(&uvma->userptr.repin_link);
741 list_move_tail(&uvma->vma.combined_links.rebind,
742 &vm->rebind_list);
743 }
744 }
745
746 if (err) {
747 down_write(&vm->userptr.notifier_lock);
748 spin_lock(&vm->userptr.invalidated_lock);
749 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
750 userptr.repin_link) {
751 list_del_init(&uvma->userptr.repin_link);
752 list_move_tail(&uvma->userptr.invalidate_link,
753 &vm->userptr.invalidated);
754 }
755 spin_unlock(&vm->userptr.invalidated_lock);
756 up_write(&vm->userptr.notifier_lock);
757 }
758 return err;
759 }
760
761 /**
762 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
763 * that need repinning.
764 * @vm: The VM.
765 *
766 * This function does an advisory check for whether the VM has userptrs that
767 * need repinning.
768 *
769 * Return: 0 if there are no indications of userptrs needing repinning,
770 * -EAGAIN if there are.
771 */
xe_vm_userptr_check_repin(struct xe_vm * vm)772 int xe_vm_userptr_check_repin(struct xe_vm *vm)
773 {
774 return (list_empty_careful(&vm->userptr.repin_list) &&
775 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
776 }
777
xe_vma_ops_alloc(struct xe_vma_ops * vops,bool array_of_binds)778 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
779 {
780 int i;
781
782 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
783 if (!vops->pt_update_ops[i].num_ops)
784 continue;
785
786 vops->pt_update_ops[i].ops =
787 kmalloc_array(vops->pt_update_ops[i].num_ops,
788 sizeof(*vops->pt_update_ops[i].ops),
789 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
790 if (!vops->pt_update_ops[i].ops)
791 return array_of_binds ? -ENOBUFS : -ENOMEM;
792 }
793
794 return 0;
795 }
796 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
797
xe_vma_ops_fini(struct xe_vma_ops * vops)798 static void xe_vma_ops_fini(struct xe_vma_ops *vops)
799 {
800 int i;
801
802 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
803 kfree(vops->pt_update_ops[i].ops);
804 }
805
xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops * vops,u8 tile_mask)806 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask)
807 {
808 int i;
809
810 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
811 if (BIT(i) & tile_mask)
812 ++vops->pt_update_ops[i].num_ops;
813 }
814
xe_vm_populate_rebind(struct xe_vma_op * op,struct xe_vma * vma,u8 tile_mask)815 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
816 u8 tile_mask)
817 {
818 INIT_LIST_HEAD(&op->link);
819 op->tile_mask = tile_mask;
820 op->base.op = DRM_GPUVA_OP_MAP;
821 op->base.map.va.addr = vma->gpuva.va.addr;
822 op->base.map.va.range = vma->gpuva.va.range;
823 op->base.map.gem.obj = vma->gpuva.gem.obj;
824 op->base.map.gem.offset = vma->gpuva.gem.offset;
825 op->map.vma = vma;
826 op->map.immediate = true;
827 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
828 op->map.is_null = xe_vma_is_null(vma);
829 }
830
xe_vm_ops_add_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,u8 tile_mask)831 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
832 u8 tile_mask)
833 {
834 struct xe_vma_op *op;
835
836 op = kzalloc(sizeof(*op), GFP_KERNEL);
837 if (!op)
838 return -ENOMEM;
839
840 xe_vm_populate_rebind(op, vma, tile_mask);
841 list_add_tail(&op->link, &vops->list);
842 xe_vma_ops_incr_pt_update_ops(vops, tile_mask);
843
844 return 0;
845 }
846
847 static struct dma_fence *ops_execute(struct xe_vm *vm,
848 struct xe_vma_ops *vops);
849 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
850 struct xe_exec_queue *q,
851 struct xe_sync_entry *syncs, u32 num_syncs);
852
xe_vm_rebind(struct xe_vm * vm,bool rebind_worker)853 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
854 {
855 struct dma_fence *fence;
856 struct xe_vma *vma, *next;
857 struct xe_vma_ops vops;
858 struct xe_vma_op *op, *next_op;
859 int err, i;
860
861 lockdep_assert_held(&vm->lock);
862 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
863 list_empty(&vm->rebind_list))
864 return 0;
865
866 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
867 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
868 vops.pt_update_ops[i].wait_vm_bookkeep = true;
869
870 xe_vm_assert_held(vm);
871 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
872 xe_assert(vm->xe, vma->tile_present);
873
874 if (rebind_worker)
875 trace_xe_vma_rebind_worker(vma);
876 else
877 trace_xe_vma_rebind_exec(vma);
878
879 err = xe_vm_ops_add_rebind(&vops, vma,
880 vma->tile_present);
881 if (err)
882 goto free_ops;
883 }
884
885 err = xe_vma_ops_alloc(&vops, false);
886 if (err)
887 goto free_ops;
888
889 fence = ops_execute(vm, &vops);
890 if (IS_ERR(fence)) {
891 err = PTR_ERR(fence);
892 } else {
893 dma_fence_put(fence);
894 list_for_each_entry_safe(vma, next, &vm->rebind_list,
895 combined_links.rebind)
896 list_del_init(&vma->combined_links.rebind);
897 }
898 free_ops:
899 list_for_each_entry_safe(op, next_op, &vops.list, link) {
900 list_del(&op->link);
901 kfree(op);
902 }
903 xe_vma_ops_fini(&vops);
904
905 return err;
906 }
907
xe_vma_rebind(struct xe_vm * vm,struct xe_vma * vma,u8 tile_mask)908 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
909 {
910 struct dma_fence *fence = NULL;
911 struct xe_vma_ops vops;
912 struct xe_vma_op *op, *next_op;
913 struct xe_tile *tile;
914 u8 id;
915 int err;
916
917 lockdep_assert_held(&vm->lock);
918 xe_vm_assert_held(vm);
919 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
920
921 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
922 for_each_tile(tile, vm->xe, id) {
923 vops.pt_update_ops[id].wait_vm_bookkeep = true;
924 vops.pt_update_ops[tile->id].q =
925 xe_tile_migrate_exec_queue(tile);
926 }
927
928 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
929 if (err)
930 return ERR_PTR(err);
931
932 err = xe_vma_ops_alloc(&vops, false);
933 if (err) {
934 fence = ERR_PTR(err);
935 goto free_ops;
936 }
937
938 fence = ops_execute(vm, &vops);
939
940 free_ops:
941 list_for_each_entry_safe(op, next_op, &vops.list, link) {
942 list_del(&op->link);
943 kfree(op);
944 }
945 xe_vma_ops_fini(&vops);
946
947 return fence;
948 }
949
xe_vma_free(struct xe_vma * vma)950 static void xe_vma_free(struct xe_vma *vma)
951 {
952 if (xe_vma_is_userptr(vma))
953 kfree(to_userptr_vma(vma));
954 else
955 kfree(vma);
956 }
957
958 #define VMA_CREATE_FLAG_READ_ONLY BIT(0)
959 #define VMA_CREATE_FLAG_IS_NULL BIT(1)
960 #define VMA_CREATE_FLAG_DUMPABLE BIT(2)
961
xe_vma_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 start,u64 end,u16 pat_index,unsigned int flags)962 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
963 struct xe_bo *bo,
964 u64 bo_offset_or_userptr,
965 u64 start, u64 end,
966 u16 pat_index, unsigned int flags)
967 {
968 struct xe_vma *vma;
969 struct xe_tile *tile;
970 u8 id;
971 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
972 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
973 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
974
975 xe_assert(vm->xe, start < end);
976 xe_assert(vm->xe, end < vm->size);
977
978 /*
979 * Allocate and ensure that the xe_vma_is_userptr() return
980 * matches what was allocated.
981 */
982 if (!bo && !is_null) {
983 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
984
985 if (!uvma)
986 return ERR_PTR(-ENOMEM);
987
988 vma = &uvma->vma;
989 } else {
990 vma = kzalloc(sizeof(*vma), GFP_KERNEL);
991 if (!vma)
992 return ERR_PTR(-ENOMEM);
993
994 if (is_null)
995 vma->gpuva.flags |= DRM_GPUVA_SPARSE;
996 if (bo)
997 vma->gpuva.gem.obj = &bo->ttm.base;
998 }
999
1000 INIT_LIST_HEAD(&vma->combined_links.rebind);
1001
1002 INIT_LIST_HEAD(&vma->gpuva.gem.entry);
1003 vma->gpuva.vm = &vm->gpuvm;
1004 vma->gpuva.va.addr = start;
1005 vma->gpuva.va.range = end - start + 1;
1006 if (read_only)
1007 vma->gpuva.flags |= XE_VMA_READ_ONLY;
1008 if (dumpable)
1009 vma->gpuva.flags |= XE_VMA_DUMPABLE;
1010
1011 for_each_tile(tile, vm->xe, id)
1012 vma->tile_mask |= 0x1 << id;
1013
1014 if (vm->xe->info.has_atomic_enable_pte_bit)
1015 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
1016
1017 vma->pat_index = pat_index;
1018
1019 if (bo) {
1020 struct drm_gpuvm_bo *vm_bo;
1021
1022 xe_bo_assert_held(bo);
1023
1024 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
1025 if (IS_ERR(vm_bo)) {
1026 xe_vma_free(vma);
1027 return ERR_CAST(vm_bo);
1028 }
1029
1030 drm_gpuvm_bo_extobj_add(vm_bo);
1031 drm_gem_object_get(&bo->ttm.base);
1032 vma->gpuva.gem.offset = bo_offset_or_userptr;
1033 drm_gpuva_link(&vma->gpuva, vm_bo);
1034 drm_gpuvm_bo_put(vm_bo);
1035 } else /* userptr or null */ {
1036 if (!is_null) {
1037 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
1038 u64 size = end - start + 1;
1039 int err;
1040
1041 INIT_LIST_HEAD(&userptr->invalidate_link);
1042 INIT_LIST_HEAD(&userptr->repin_link);
1043 vma->gpuva.gem.offset = bo_offset_or_userptr;
1044 mutex_init(&userptr->unmap_mutex);
1045
1046 err = mmu_interval_notifier_insert(&userptr->notifier,
1047 current->mm,
1048 xe_vma_userptr(vma), size,
1049 &vma_userptr_notifier_ops);
1050 if (err) {
1051 xe_vma_free(vma);
1052 return ERR_PTR(err);
1053 }
1054
1055 userptr->notifier_seq = LONG_MAX;
1056 }
1057
1058 xe_vm_get(vm);
1059 }
1060
1061 return vma;
1062 }
1063
xe_vma_destroy_late(struct xe_vma * vma)1064 static void xe_vma_destroy_late(struct xe_vma *vma)
1065 {
1066 struct xe_vm *vm = xe_vma_vm(vma);
1067
1068 if (vma->ufence) {
1069 xe_sync_ufence_put(vma->ufence);
1070 vma->ufence = NULL;
1071 }
1072
1073 if (xe_vma_is_userptr(vma)) {
1074 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1075 struct xe_userptr *userptr = &uvma->userptr;
1076
1077 if (userptr->sg)
1078 xe_hmm_userptr_free_sg(uvma);
1079
1080 /*
1081 * Since userptr pages are not pinned, we can't remove
1082 * the notifier until we're sure the GPU is not accessing
1083 * them anymore
1084 */
1085 mmu_interval_notifier_remove(&userptr->notifier);
1086 mutex_destroy(&userptr->unmap_mutex);
1087 xe_vm_put(vm);
1088 } else if (xe_vma_is_null(vma)) {
1089 xe_vm_put(vm);
1090 } else {
1091 xe_bo_put(xe_vma_bo(vma));
1092 }
1093
1094 xe_vma_free(vma);
1095 }
1096
vma_destroy_work_func(struct work_struct * w)1097 static void vma_destroy_work_func(struct work_struct *w)
1098 {
1099 struct xe_vma *vma =
1100 container_of(w, struct xe_vma, destroy_work);
1101
1102 xe_vma_destroy_late(vma);
1103 }
1104
vma_destroy_cb(struct dma_fence * fence,struct dma_fence_cb * cb)1105 static void vma_destroy_cb(struct dma_fence *fence,
1106 struct dma_fence_cb *cb)
1107 {
1108 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1109
1110 INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1111 queue_work(system_unbound_wq, &vma->destroy_work);
1112 }
1113
xe_vma_destroy(struct xe_vma * vma,struct dma_fence * fence)1114 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1115 {
1116 struct xe_vm *vm = xe_vma_vm(vma);
1117
1118 lockdep_assert_held_write(&vm->lock);
1119 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1120
1121 if (xe_vma_is_userptr(vma)) {
1122 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1123
1124 spin_lock(&vm->userptr.invalidated_lock);
1125 xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link));
1126 list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
1127 spin_unlock(&vm->userptr.invalidated_lock);
1128 } else if (!xe_vma_is_null(vma)) {
1129 xe_bo_assert_held(xe_vma_bo(vma));
1130
1131 drm_gpuva_unlink(&vma->gpuva);
1132 }
1133
1134 xe_vm_assert_held(vm);
1135 if (fence) {
1136 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1137 vma_destroy_cb);
1138
1139 if (ret) {
1140 XE_WARN_ON(ret != -ENOENT);
1141 xe_vma_destroy_late(vma);
1142 }
1143 } else {
1144 xe_vma_destroy_late(vma);
1145 }
1146 }
1147
1148 /**
1149 * xe_vm_lock_vma() - drm_exec utility to lock a vma
1150 * @exec: The drm_exec object we're currently locking for.
1151 * @vma: The vma for witch we want to lock the vm resv and any attached
1152 * object's resv.
1153 *
1154 * Return: 0 on success, negative error code on error. In particular
1155 * may return -EDEADLK on WW transaction contention and -EINTR if
1156 * an interruptible wait is terminated by a signal.
1157 */
xe_vm_lock_vma(struct drm_exec * exec,struct xe_vma * vma)1158 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
1159 {
1160 struct xe_vm *vm = xe_vma_vm(vma);
1161 struct xe_bo *bo = xe_vma_bo(vma);
1162 int err;
1163
1164 XE_WARN_ON(!vm);
1165
1166 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1167 if (!err && bo && !bo->vm)
1168 err = drm_exec_lock_obj(exec, &bo->ttm.base);
1169
1170 return err;
1171 }
1172
xe_vma_destroy_unlocked(struct xe_vma * vma)1173 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1174 {
1175 struct drm_exec exec;
1176 int err;
1177
1178 drm_exec_init(&exec, 0, 0);
1179 drm_exec_until_all_locked(&exec) {
1180 err = xe_vm_lock_vma(&exec, vma);
1181 drm_exec_retry_on_contention(&exec);
1182 if (XE_WARN_ON(err))
1183 break;
1184 }
1185
1186 xe_vma_destroy(vma, NULL);
1187
1188 drm_exec_fini(&exec);
1189 }
1190
1191 struct xe_vma *
xe_vm_find_overlapping_vma(struct xe_vm * vm,u64 start,u64 range)1192 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1193 {
1194 struct drm_gpuva *gpuva;
1195
1196 lockdep_assert_held(&vm->lock);
1197
1198 if (xe_vm_is_closed_or_banned(vm))
1199 return NULL;
1200
1201 xe_assert(vm->xe, start + range <= vm->size);
1202
1203 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1204
1205 return gpuva ? gpuva_to_vma(gpuva) : NULL;
1206 }
1207
xe_vm_insert_vma(struct xe_vm * vm,struct xe_vma * vma)1208 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1209 {
1210 int err;
1211
1212 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1213 lockdep_assert_held(&vm->lock);
1214
1215 mutex_lock(&vm->snap_mutex);
1216 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1217 mutex_unlock(&vm->snap_mutex);
1218 XE_WARN_ON(err); /* Shouldn't be possible */
1219
1220 return err;
1221 }
1222
xe_vm_remove_vma(struct xe_vm * vm,struct xe_vma * vma)1223 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1224 {
1225 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1226 lockdep_assert_held(&vm->lock);
1227
1228 mutex_lock(&vm->snap_mutex);
1229 drm_gpuva_remove(&vma->gpuva);
1230 mutex_unlock(&vm->snap_mutex);
1231 if (vm->usm.last_fault_vma == vma)
1232 vm->usm.last_fault_vma = NULL;
1233 }
1234
xe_vm_op_alloc(void)1235 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1236 {
1237 struct xe_vma_op *op;
1238
1239 op = kzalloc(sizeof(*op), GFP_KERNEL);
1240
1241 if (unlikely(!op))
1242 return NULL;
1243
1244 return &op->base;
1245 }
1246
1247 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1248
1249 static const struct drm_gpuvm_ops gpuvm_ops = {
1250 .op_alloc = xe_vm_op_alloc,
1251 .vm_bo_validate = xe_gpuvm_validate,
1252 .vm_free = xe_vm_free,
1253 };
1254
pde_encode_pat_index(u16 pat_index)1255 static u64 pde_encode_pat_index(u16 pat_index)
1256 {
1257 u64 pte = 0;
1258
1259 if (pat_index & BIT(0))
1260 pte |= XE_PPGTT_PTE_PAT0;
1261
1262 if (pat_index & BIT(1))
1263 pte |= XE_PPGTT_PTE_PAT1;
1264
1265 return pte;
1266 }
1267
pte_encode_pat_index(u16 pat_index,u32 pt_level)1268 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
1269 {
1270 u64 pte = 0;
1271
1272 if (pat_index & BIT(0))
1273 pte |= XE_PPGTT_PTE_PAT0;
1274
1275 if (pat_index & BIT(1))
1276 pte |= XE_PPGTT_PTE_PAT1;
1277
1278 if (pat_index & BIT(2)) {
1279 if (pt_level)
1280 pte |= XE_PPGTT_PDE_PDPE_PAT2;
1281 else
1282 pte |= XE_PPGTT_PTE_PAT2;
1283 }
1284
1285 if (pat_index & BIT(3))
1286 pte |= XELPG_PPGTT_PTE_PAT3;
1287
1288 if (pat_index & (BIT(4)))
1289 pte |= XE2_PPGTT_PTE_PAT4;
1290
1291 return pte;
1292 }
1293
pte_encode_ps(u32 pt_level)1294 static u64 pte_encode_ps(u32 pt_level)
1295 {
1296 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1297
1298 if (pt_level == 1)
1299 return XE_PDE_PS_2M;
1300 else if (pt_level == 2)
1301 return XE_PDPE_PS_1G;
1302
1303 return 0;
1304 }
1305
xelp_pde_encode_bo(struct xe_bo * bo,u64 bo_offset,const u16 pat_index)1306 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
1307 const u16 pat_index)
1308 {
1309 u64 pde;
1310
1311 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1312 pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1313 pde |= pde_encode_pat_index(pat_index);
1314
1315 return pde;
1316 }
1317
xelp_pte_encode_bo(struct xe_bo * bo,u64 bo_offset,u16 pat_index,u32 pt_level)1318 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1319 u16 pat_index, u32 pt_level)
1320 {
1321 u64 pte;
1322
1323 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1324 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1325 pte |= pte_encode_pat_index(pat_index, pt_level);
1326 pte |= pte_encode_ps(pt_level);
1327
1328 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1329 pte |= XE_PPGTT_PTE_DM;
1330
1331 return pte;
1332 }
1333
xelp_pte_encode_vma(u64 pte,struct xe_vma * vma,u16 pat_index,u32 pt_level)1334 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1335 u16 pat_index, u32 pt_level)
1336 {
1337 pte |= XE_PAGE_PRESENT;
1338
1339 if (likely(!xe_vma_read_only(vma)))
1340 pte |= XE_PAGE_RW;
1341
1342 pte |= pte_encode_pat_index(pat_index, pt_level);
1343 pte |= pte_encode_ps(pt_level);
1344
1345 if (unlikely(xe_vma_is_null(vma)))
1346 pte |= XE_PTE_NULL;
1347
1348 return pte;
1349 }
1350
xelp_pte_encode_addr(struct xe_device * xe,u64 addr,u16 pat_index,u32 pt_level,bool devmem,u64 flags)1351 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1352 u16 pat_index,
1353 u32 pt_level, bool devmem, u64 flags)
1354 {
1355 u64 pte;
1356
1357 /* Avoid passing random bits directly as flags */
1358 xe_assert(xe, !(flags & ~XE_PTE_PS64));
1359
1360 pte = addr;
1361 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1362 pte |= pte_encode_pat_index(pat_index, pt_level);
1363 pte |= pte_encode_ps(pt_level);
1364
1365 if (devmem)
1366 pte |= XE_PPGTT_PTE_DM;
1367
1368 pte |= flags;
1369
1370 return pte;
1371 }
1372
1373 static const struct xe_pt_ops xelp_pt_ops = {
1374 .pte_encode_bo = xelp_pte_encode_bo,
1375 .pte_encode_vma = xelp_pte_encode_vma,
1376 .pte_encode_addr = xelp_pte_encode_addr,
1377 .pde_encode_bo = xelp_pde_encode_bo,
1378 };
1379
1380 static void vm_destroy_work_func(struct work_struct *w);
1381
1382 /**
1383 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1384 * given tile and vm.
1385 * @xe: xe device.
1386 * @tile: tile to set up for.
1387 * @vm: vm to set up for.
1388 *
1389 * Sets up a pagetable tree with one page-table per level and a single
1390 * leaf PTE. All pagetable entries point to the single page-table or,
1391 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1392 * writes become NOPs.
1393 *
1394 * Return: 0 on success, negative error code on error.
1395 */
xe_vm_create_scratch(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm)1396 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1397 struct xe_vm *vm)
1398 {
1399 u8 id = tile->id;
1400 int i;
1401
1402 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1403 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
1404 if (IS_ERR(vm->scratch_pt[id][i]))
1405 return PTR_ERR(vm->scratch_pt[id][i]);
1406
1407 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1408 }
1409
1410 return 0;
1411 }
1412 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO);
1413
xe_vm_free_scratch(struct xe_vm * vm)1414 static void xe_vm_free_scratch(struct xe_vm *vm)
1415 {
1416 struct xe_tile *tile;
1417 u8 id;
1418
1419 if (!xe_vm_has_scratch(vm))
1420 return;
1421
1422 for_each_tile(tile, vm->xe, id) {
1423 u32 i;
1424
1425 if (!vm->pt_root[id])
1426 continue;
1427
1428 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1429 if (vm->scratch_pt[id][i])
1430 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1431 }
1432 }
1433
xe_vm_create(struct xe_device * xe,u32 flags)1434 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
1435 {
1436 struct drm_gem_object *vm_resv_obj;
1437 struct xe_vm *vm;
1438 int err, number_tiles = 0;
1439 struct xe_tile *tile;
1440 u8 id;
1441
1442 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1443 if (!vm)
1444 return ERR_PTR(-ENOMEM);
1445
1446 vm->xe = xe;
1447
1448 vm->size = 1ull << xe->info.va_bits;
1449
1450 vm->flags = flags;
1451
1452 init_rwsem(&vm->lock);
1453 mutex_init(&vm->snap_mutex);
1454
1455 INIT_LIST_HEAD(&vm->rebind_list);
1456
1457 INIT_LIST_HEAD(&vm->userptr.repin_list);
1458 INIT_LIST_HEAD(&vm->userptr.invalidated);
1459 init_rwsem(&vm->userptr.notifier_lock);
1460 spin_lock_init(&vm->userptr.invalidated_lock);
1461
1462 ttm_lru_bulk_move_init(&vm->lru_bulk_move);
1463
1464 INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1465
1466 INIT_LIST_HEAD(&vm->preempt.exec_queues);
1467 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */
1468
1469 for_each_tile(tile, xe, id)
1470 xe_range_fence_tree_init(&vm->rftree[id]);
1471
1472 vm->pt_ops = &xelp_pt_ops;
1473
1474 /*
1475 * Long-running workloads are not protected by the scheduler references.
1476 * By design, run_job for long-running workloads returns NULL and the
1477 * scheduler drops all the references of it, hence protecting the VM
1478 * for this case is necessary.
1479 */
1480 if (flags & XE_VM_FLAG_LR_MODE)
1481 xe_pm_runtime_get_noresume(xe);
1482
1483 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1484 if (!vm_resv_obj) {
1485 err = -ENOMEM;
1486 goto err_no_resv;
1487 }
1488
1489 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1490 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1491
1492 drm_gem_object_put(vm_resv_obj);
1493
1494 err = xe_vm_lock(vm, true);
1495 if (err)
1496 goto err_close;
1497
1498 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1499 vm->flags |= XE_VM_FLAG_64K;
1500
1501 for_each_tile(tile, xe, id) {
1502 if (flags & XE_VM_FLAG_MIGRATION &&
1503 tile->id != XE_VM_FLAG_TILE_ID(flags))
1504 continue;
1505
1506 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
1507 if (IS_ERR(vm->pt_root[id])) {
1508 err = PTR_ERR(vm->pt_root[id]);
1509 vm->pt_root[id] = NULL;
1510 goto err_unlock_close;
1511 }
1512 }
1513
1514 if (xe_vm_has_scratch(vm)) {
1515 for_each_tile(tile, xe, id) {
1516 if (!vm->pt_root[id])
1517 continue;
1518
1519 err = xe_vm_create_scratch(xe, tile, vm);
1520 if (err)
1521 goto err_unlock_close;
1522 }
1523 vm->batch_invalidate_tlb = true;
1524 }
1525
1526 if (vm->flags & XE_VM_FLAG_LR_MODE) {
1527 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1528 vm->batch_invalidate_tlb = false;
1529 }
1530
1531 /* Fill pt_root after allocating scratch tables */
1532 for_each_tile(tile, xe, id) {
1533 if (!vm->pt_root[id])
1534 continue;
1535
1536 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1537 }
1538 xe_vm_unlock(vm);
1539
1540 /* Kernel migration VM shouldn't have a circular loop.. */
1541 if (!(flags & XE_VM_FLAG_MIGRATION)) {
1542 for_each_tile(tile, xe, id) {
1543 struct xe_exec_queue *q;
1544 u32 create_flags = EXEC_QUEUE_FLAG_VM;
1545
1546 if (!vm->pt_root[id])
1547 continue;
1548
1549 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0);
1550 if (IS_ERR(q)) {
1551 err = PTR_ERR(q);
1552 goto err_close;
1553 }
1554 vm->q[id] = q;
1555 number_tiles++;
1556 }
1557 }
1558
1559 if (number_tiles > 1)
1560 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1561
1562 trace_xe_vm_create(vm);
1563
1564 return vm;
1565
1566 err_unlock_close:
1567 xe_vm_unlock(vm);
1568 err_close:
1569 xe_vm_close_and_put(vm);
1570 return ERR_PTR(err);
1571
1572 err_no_resv:
1573 mutex_destroy(&vm->snap_mutex);
1574 for_each_tile(tile, xe, id)
1575 xe_range_fence_tree_fini(&vm->rftree[id]);
1576 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1577 kfree(vm);
1578 if (flags & XE_VM_FLAG_LR_MODE)
1579 xe_pm_runtime_put(xe);
1580 return ERR_PTR(err);
1581 }
1582
xe_vm_close(struct xe_vm * vm)1583 static void xe_vm_close(struct xe_vm *vm)
1584 {
1585 down_write(&vm->lock);
1586 vm->size = 0;
1587 up_write(&vm->lock);
1588 }
1589
xe_vm_close_and_put(struct xe_vm * vm)1590 void xe_vm_close_and_put(struct xe_vm *vm)
1591 {
1592 LIST_HEAD(contested);
1593 struct xe_device *xe = vm->xe;
1594 struct xe_tile *tile;
1595 struct xe_vma *vma, *next_vma;
1596 struct drm_gpuva *gpuva, *next;
1597 u8 id;
1598
1599 xe_assert(xe, !vm->preempt.num_exec_queues);
1600
1601 xe_vm_close(vm);
1602 if (xe_vm_in_preempt_fence_mode(vm))
1603 flush_work(&vm->preempt.rebind_work);
1604
1605 down_write(&vm->lock);
1606 for_each_tile(tile, xe, id) {
1607 if (vm->q[id])
1608 xe_exec_queue_last_fence_put(vm->q[id], vm);
1609 }
1610 up_write(&vm->lock);
1611
1612 for_each_tile(tile, xe, id) {
1613 if (vm->q[id]) {
1614 xe_exec_queue_kill(vm->q[id]);
1615 xe_exec_queue_put(vm->q[id]);
1616 vm->q[id] = NULL;
1617 }
1618 }
1619
1620 down_write(&vm->lock);
1621 xe_vm_lock(vm, false);
1622 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1623 vma = gpuva_to_vma(gpuva);
1624
1625 if (xe_vma_has_no_bo(vma)) {
1626 down_read(&vm->userptr.notifier_lock);
1627 vma->gpuva.flags |= XE_VMA_DESTROYED;
1628 up_read(&vm->userptr.notifier_lock);
1629 }
1630
1631 xe_vm_remove_vma(vm, vma);
1632
1633 /* easy case, remove from VMA? */
1634 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1635 list_del_init(&vma->combined_links.rebind);
1636 xe_vma_destroy(vma, NULL);
1637 continue;
1638 }
1639
1640 list_move_tail(&vma->combined_links.destroy, &contested);
1641 vma->gpuva.flags |= XE_VMA_DESTROYED;
1642 }
1643
1644 /*
1645 * All vm operations will add shared fences to resv.
1646 * The only exception is eviction for a shared object,
1647 * but even so, the unbind when evicted would still
1648 * install a fence to resv. Hence it's safe to
1649 * destroy the pagetables immediately.
1650 */
1651 xe_vm_free_scratch(vm);
1652
1653 for_each_tile(tile, xe, id) {
1654 if (vm->pt_root[id]) {
1655 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1656 vm->pt_root[id] = NULL;
1657 }
1658 }
1659 xe_vm_unlock(vm);
1660
1661 /*
1662 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1663 * Since we hold a refcount to the bo, we can remove and free
1664 * the members safely without locking.
1665 */
1666 list_for_each_entry_safe(vma, next_vma, &contested,
1667 combined_links.destroy) {
1668 list_del_init(&vma->combined_links.destroy);
1669 xe_vma_destroy_unlocked(vma);
1670 }
1671
1672 up_write(&vm->lock);
1673
1674 down_write(&xe->usm.lock);
1675 if (vm->usm.asid) {
1676 void *lookup;
1677
1678 xe_assert(xe, xe->info.has_asid);
1679 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
1680
1681 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1682 xe_assert(xe, lookup == vm);
1683 }
1684 up_write(&xe->usm.lock);
1685
1686 for_each_tile(tile, xe, id)
1687 xe_range_fence_tree_fini(&vm->rftree[id]);
1688
1689 xe_vm_put(vm);
1690 }
1691
vm_destroy_work_func(struct work_struct * w)1692 static void vm_destroy_work_func(struct work_struct *w)
1693 {
1694 struct xe_vm *vm =
1695 container_of(w, struct xe_vm, destroy_work);
1696 struct xe_device *xe = vm->xe;
1697 struct xe_tile *tile;
1698 u8 id;
1699
1700 /* xe_vm_close_and_put was not called? */
1701 xe_assert(xe, !vm->size);
1702
1703 if (xe_vm_in_preempt_fence_mode(vm))
1704 flush_work(&vm->preempt.rebind_work);
1705
1706 mutex_destroy(&vm->snap_mutex);
1707
1708 if (vm->flags & XE_VM_FLAG_LR_MODE)
1709 xe_pm_runtime_put(xe);
1710
1711 for_each_tile(tile, xe, id)
1712 XE_WARN_ON(vm->pt_root[id]);
1713
1714 trace_xe_vm_free(vm);
1715
1716 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1717
1718 if (vm->xef)
1719 xe_file_put(vm->xef);
1720
1721 kfree(vm);
1722 }
1723
xe_vm_free(struct drm_gpuvm * gpuvm)1724 static void xe_vm_free(struct drm_gpuvm *gpuvm)
1725 {
1726 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1727
1728 /* To destroy the VM we need to be able to sleep */
1729 queue_work(system_unbound_wq, &vm->destroy_work);
1730 }
1731
xe_vm_lookup(struct xe_file * xef,u32 id)1732 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1733 {
1734 struct xe_vm *vm;
1735
1736 mutex_lock(&xef->vm.lock);
1737 vm = xa_load(&xef->vm.xa, id);
1738 if (vm)
1739 xe_vm_get(vm);
1740 mutex_unlock(&xef->vm.lock);
1741
1742 return vm;
1743 }
1744
xe_vm_pdp4_descriptor(struct xe_vm * vm,struct xe_tile * tile)1745 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1746 {
1747 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
1748 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
1749 }
1750
1751 static struct xe_exec_queue *
to_wait_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)1752 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
1753 {
1754 return q ? q : vm->q[0];
1755 }
1756
1757 static struct xe_user_fence *
find_ufence_get(struct xe_sync_entry * syncs,u32 num_syncs)1758 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
1759 {
1760 unsigned int i;
1761
1762 for (i = 0; i < num_syncs; i++) {
1763 struct xe_sync_entry *e = &syncs[i];
1764
1765 if (xe_sync_is_ufence(e))
1766 return xe_sync_ufence_get(e);
1767 }
1768
1769 return NULL;
1770 }
1771
1772 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
1773 DRM_XE_VM_CREATE_FLAG_LR_MODE | \
1774 DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1775
xe_vm_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1776 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1777 struct drm_file *file)
1778 {
1779 struct xe_device *xe = to_xe_device(dev);
1780 struct xe_file *xef = to_xe_file(file);
1781 struct drm_xe_vm_create *args = data;
1782 struct xe_tile *tile;
1783 struct xe_vm *vm;
1784 u32 id, asid;
1785 int err;
1786 u32 flags = 0;
1787
1788 if (XE_IOCTL_DBG(xe, args->extensions))
1789 return -EINVAL;
1790
1791 if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
1792 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
1793
1794 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1795 !xe->info.has_usm))
1796 return -EINVAL;
1797
1798 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1799 return -EINVAL;
1800
1801 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1802 return -EINVAL;
1803
1804 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
1805 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1806 return -EINVAL;
1807
1808 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
1809 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1810 return -EINVAL;
1811
1812 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
1813 flags |= XE_VM_FLAG_SCRATCH_PAGE;
1814 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
1815 flags |= XE_VM_FLAG_LR_MODE;
1816 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1817 flags |= XE_VM_FLAG_FAULT_MODE;
1818
1819 vm = xe_vm_create(xe, flags);
1820 if (IS_ERR(vm))
1821 return PTR_ERR(vm);
1822
1823 if (xe->info.has_asid) {
1824 down_write(&xe->usm.lock);
1825 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1826 XA_LIMIT(1, XE_MAX_ASID - 1),
1827 &xe->usm.next_asid, GFP_KERNEL);
1828 up_write(&xe->usm.lock);
1829 if (err < 0)
1830 goto err_close_and_put;
1831
1832 vm->usm.asid = asid;
1833 }
1834
1835 vm->xef = xe_file_get(xef);
1836
1837 /* Record BO memory for VM pagetable created against client */
1838 for_each_tile(tile, xe, id)
1839 if (vm->pt_root[id])
1840 xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo);
1841
1842 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
1843 /* Warning: Security issue - never enable by default */
1844 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
1845 #endif
1846
1847 /* user id alloc must always be last in ioctl to prevent UAF */
1848 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
1849 if (err)
1850 goto err_close_and_put;
1851
1852 args->vm_id = id;
1853
1854 return 0;
1855
1856 err_close_and_put:
1857 xe_vm_close_and_put(vm);
1858
1859 return err;
1860 }
1861
xe_vm_destroy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1862 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
1863 struct drm_file *file)
1864 {
1865 struct xe_device *xe = to_xe_device(dev);
1866 struct xe_file *xef = to_xe_file(file);
1867 struct drm_xe_vm_destroy *args = data;
1868 struct xe_vm *vm;
1869 int err = 0;
1870
1871 if (XE_IOCTL_DBG(xe, args->pad) ||
1872 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1873 return -EINVAL;
1874
1875 mutex_lock(&xef->vm.lock);
1876 vm = xa_load(&xef->vm.xa, args->vm_id);
1877 if (XE_IOCTL_DBG(xe, !vm))
1878 err = -ENOENT;
1879 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
1880 err = -EBUSY;
1881 else
1882 xa_erase(&xef->vm.xa, args->vm_id);
1883 mutex_unlock(&xef->vm.lock);
1884
1885 if (!err)
1886 xe_vm_close_and_put(vm);
1887
1888 return err;
1889 }
1890
1891 static const u32 region_to_mem_type[] = {
1892 XE_PL_TT,
1893 XE_PL_VRAM0,
1894 XE_PL_VRAM1,
1895 };
1896
prep_vma_destroy(struct xe_vm * vm,struct xe_vma * vma,bool post_commit)1897 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
1898 bool post_commit)
1899 {
1900 down_read(&vm->userptr.notifier_lock);
1901 vma->gpuva.flags |= XE_VMA_DESTROYED;
1902 up_read(&vm->userptr.notifier_lock);
1903 if (post_commit)
1904 xe_vm_remove_vma(vm, vma);
1905 }
1906
1907 #undef ULL
1908 #define ULL unsigned long long
1909
1910 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
print_op(struct xe_device * xe,struct drm_gpuva_op * op)1911 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
1912 {
1913 struct xe_vma *vma;
1914
1915 switch (op->op) {
1916 case DRM_GPUVA_OP_MAP:
1917 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
1918 (ULL)op->map.va.addr, (ULL)op->map.va.range);
1919 break;
1920 case DRM_GPUVA_OP_REMAP:
1921 vma = gpuva_to_vma(op->remap.unmap->va);
1922 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
1923 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
1924 op->remap.unmap->keep ? 1 : 0);
1925 if (op->remap.prev)
1926 vm_dbg(&xe->drm,
1927 "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
1928 (ULL)op->remap.prev->va.addr,
1929 (ULL)op->remap.prev->va.range);
1930 if (op->remap.next)
1931 vm_dbg(&xe->drm,
1932 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
1933 (ULL)op->remap.next->va.addr,
1934 (ULL)op->remap.next->va.range);
1935 break;
1936 case DRM_GPUVA_OP_UNMAP:
1937 vma = gpuva_to_vma(op->unmap.va);
1938 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
1939 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
1940 op->unmap.keep ? 1 : 0);
1941 break;
1942 case DRM_GPUVA_OP_PREFETCH:
1943 vma = gpuva_to_vma(op->prefetch.va);
1944 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
1945 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
1946 break;
1947 default:
1948 drm_warn(&xe->drm, "NOT POSSIBLE");
1949 }
1950 }
1951 #else
print_op(struct xe_device * xe,struct drm_gpuva_op * op)1952 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
1953 {
1954 }
1955 #endif
1956
1957 /*
1958 * Create operations list from IOCTL arguments, setup operations fields so parse
1959 * and commit steps are decoupled from IOCTL arguments. This step can fail.
1960 */
1961 static struct drm_gpuva_ops *
vm_bind_ioctl_ops_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 addr,u64 range,u32 operation,u32 flags,u32 prefetch_region,u16 pat_index)1962 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
1963 u64 bo_offset_or_userptr, u64 addr, u64 range,
1964 u32 operation, u32 flags,
1965 u32 prefetch_region, u16 pat_index)
1966 {
1967 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
1968 struct drm_gpuva_ops *ops;
1969 struct drm_gpuva_op *__op;
1970 struct drm_gpuvm_bo *vm_bo;
1971 int err;
1972
1973 lockdep_assert_held_write(&vm->lock);
1974
1975 vm_dbg(&vm->xe->drm,
1976 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
1977 operation, (ULL)addr, (ULL)range,
1978 (ULL)bo_offset_or_userptr);
1979
1980 switch (operation) {
1981 case DRM_XE_VM_BIND_OP_MAP:
1982 case DRM_XE_VM_BIND_OP_MAP_USERPTR:
1983 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
1984 obj, bo_offset_or_userptr);
1985 break;
1986 case DRM_XE_VM_BIND_OP_UNMAP:
1987 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
1988 break;
1989 case DRM_XE_VM_BIND_OP_PREFETCH:
1990 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
1991 break;
1992 case DRM_XE_VM_BIND_OP_UNMAP_ALL:
1993 xe_assert(vm->xe, bo);
1994
1995 err = xe_bo_lock(bo, true);
1996 if (err)
1997 return ERR_PTR(err);
1998
1999 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
2000 if (IS_ERR(vm_bo)) {
2001 xe_bo_unlock(bo);
2002 return ERR_CAST(vm_bo);
2003 }
2004
2005 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2006 drm_gpuvm_bo_put(vm_bo);
2007 xe_bo_unlock(bo);
2008 break;
2009 default:
2010 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2011 ops = ERR_PTR(-EINVAL);
2012 }
2013 if (IS_ERR(ops))
2014 return ops;
2015
2016 drm_gpuva_for_each_op(__op, ops) {
2017 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2018
2019 if (__op->op == DRM_GPUVA_OP_MAP) {
2020 op->map.immediate =
2021 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2022 op->map.read_only =
2023 flags & DRM_XE_VM_BIND_FLAG_READONLY;
2024 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2025 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
2026 op->map.pat_index = pat_index;
2027 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2028 op->prefetch.region = prefetch_region;
2029 }
2030
2031 print_op(vm->xe, __op);
2032 }
2033
2034 return ops;
2035 }
2036 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
2037
new_vma(struct xe_vm * vm,struct drm_gpuva_op_map * op,u16 pat_index,unsigned int flags)2038 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2039 u16 pat_index, unsigned int flags)
2040 {
2041 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2042 struct drm_exec exec;
2043 struct xe_vma *vma;
2044 int err = 0;
2045
2046 lockdep_assert_held_write(&vm->lock);
2047
2048 if (bo) {
2049 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
2050 drm_exec_until_all_locked(&exec) {
2051 err = 0;
2052 if (!bo->vm) {
2053 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2054 drm_exec_retry_on_contention(&exec);
2055 }
2056 if (!err) {
2057 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2058 drm_exec_retry_on_contention(&exec);
2059 }
2060 if (err) {
2061 drm_exec_fini(&exec);
2062 return ERR_PTR(err);
2063 }
2064 }
2065 }
2066 vma = xe_vma_create(vm, bo, op->gem.offset,
2067 op->va.addr, op->va.addr +
2068 op->va.range - 1, pat_index, flags);
2069 if (IS_ERR(vma))
2070 goto err_unlock;
2071
2072 if (xe_vma_is_userptr(vma))
2073 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2074 else if (!xe_vma_has_no_bo(vma) && !bo->vm)
2075 err = add_preempt_fences(vm, bo);
2076
2077 err_unlock:
2078 if (bo)
2079 drm_exec_fini(&exec);
2080
2081 if (err) {
2082 prep_vma_destroy(vm, vma, false);
2083 xe_vma_destroy_unlocked(vma);
2084 vma = ERR_PTR(err);
2085 }
2086
2087 return vma;
2088 }
2089
xe_vma_max_pte_size(struct xe_vma * vma)2090 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2091 {
2092 if (vma->gpuva.flags & XE_VMA_PTE_1G)
2093 return SZ_1G;
2094 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2095 return SZ_2M;
2096 else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2097 return SZ_64K;
2098 else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2099 return SZ_4K;
2100
2101 return SZ_1G; /* Uninitialized, used max size */
2102 }
2103
xe_vma_set_pte_size(struct xe_vma * vma,u64 size)2104 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2105 {
2106 switch (size) {
2107 case SZ_1G:
2108 vma->gpuva.flags |= XE_VMA_PTE_1G;
2109 break;
2110 case SZ_2M:
2111 vma->gpuva.flags |= XE_VMA_PTE_2M;
2112 break;
2113 case SZ_64K:
2114 vma->gpuva.flags |= XE_VMA_PTE_64K;
2115 break;
2116 case SZ_4K:
2117 vma->gpuva.flags |= XE_VMA_PTE_4K;
2118 break;
2119 }
2120 }
2121
xe_vma_op_commit(struct xe_vm * vm,struct xe_vma_op * op)2122 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2123 {
2124 int err = 0;
2125
2126 lockdep_assert_held_write(&vm->lock);
2127
2128 switch (op->base.op) {
2129 case DRM_GPUVA_OP_MAP:
2130 err |= xe_vm_insert_vma(vm, op->map.vma);
2131 if (!err)
2132 op->flags |= XE_VMA_OP_COMMITTED;
2133 break;
2134 case DRM_GPUVA_OP_REMAP:
2135 {
2136 u8 tile_present =
2137 gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2138
2139 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2140 true);
2141 op->flags |= XE_VMA_OP_COMMITTED;
2142
2143 if (op->remap.prev) {
2144 err |= xe_vm_insert_vma(vm, op->remap.prev);
2145 if (!err)
2146 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2147 if (!err && op->remap.skip_prev) {
2148 op->remap.prev->tile_present =
2149 tile_present;
2150 op->remap.prev = NULL;
2151 }
2152 }
2153 if (op->remap.next) {
2154 err |= xe_vm_insert_vma(vm, op->remap.next);
2155 if (!err)
2156 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2157 if (!err && op->remap.skip_next) {
2158 op->remap.next->tile_present =
2159 tile_present;
2160 op->remap.next = NULL;
2161 }
2162 }
2163
2164 /* Adjust for partial unbind after removing VMA from VM */
2165 if (!err) {
2166 op->base.remap.unmap->va->va.addr = op->remap.start;
2167 op->base.remap.unmap->va->va.range = op->remap.range;
2168 }
2169 break;
2170 }
2171 case DRM_GPUVA_OP_UNMAP:
2172 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2173 op->flags |= XE_VMA_OP_COMMITTED;
2174 break;
2175 case DRM_GPUVA_OP_PREFETCH:
2176 op->flags |= XE_VMA_OP_COMMITTED;
2177 break;
2178 default:
2179 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2180 }
2181
2182 return err;
2183 }
2184
vm_bind_ioctl_ops_parse(struct xe_vm * vm,struct drm_gpuva_ops * ops,struct xe_vma_ops * vops)2185 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
2186 struct xe_vma_ops *vops)
2187 {
2188 struct xe_device *xe = vm->xe;
2189 struct drm_gpuva_op *__op;
2190 struct xe_tile *tile;
2191 u8 id, tile_mask = 0;
2192 int err = 0;
2193
2194 lockdep_assert_held_write(&vm->lock);
2195
2196 for_each_tile(tile, vm->xe, id)
2197 tile_mask |= 0x1 << id;
2198
2199 drm_gpuva_for_each_op(__op, ops) {
2200 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2201 struct xe_vma *vma;
2202 unsigned int flags = 0;
2203
2204 INIT_LIST_HEAD(&op->link);
2205 list_add_tail(&op->link, &vops->list);
2206 op->tile_mask = tile_mask;
2207
2208 switch (op->base.op) {
2209 case DRM_GPUVA_OP_MAP:
2210 {
2211 flags |= op->map.read_only ?
2212 VMA_CREATE_FLAG_READ_ONLY : 0;
2213 flags |= op->map.is_null ?
2214 VMA_CREATE_FLAG_IS_NULL : 0;
2215 flags |= op->map.dumpable ?
2216 VMA_CREATE_FLAG_DUMPABLE : 0;
2217
2218 vma = new_vma(vm, &op->base.map, op->map.pat_index,
2219 flags);
2220 if (IS_ERR(vma))
2221 return PTR_ERR(vma);
2222
2223 op->map.vma = vma;
2224 if (op->map.immediate || !xe_vm_in_fault_mode(vm))
2225 xe_vma_ops_incr_pt_update_ops(vops,
2226 op->tile_mask);
2227 break;
2228 }
2229 case DRM_GPUVA_OP_REMAP:
2230 {
2231 struct xe_vma *old =
2232 gpuva_to_vma(op->base.remap.unmap->va);
2233
2234 op->remap.start = xe_vma_start(old);
2235 op->remap.range = xe_vma_size(old);
2236
2237 if (op->base.remap.prev) {
2238 flags |= op->base.remap.unmap->va->flags &
2239 XE_VMA_READ_ONLY ?
2240 VMA_CREATE_FLAG_READ_ONLY : 0;
2241 flags |= op->base.remap.unmap->va->flags &
2242 DRM_GPUVA_SPARSE ?
2243 VMA_CREATE_FLAG_IS_NULL : 0;
2244 flags |= op->base.remap.unmap->va->flags &
2245 XE_VMA_DUMPABLE ?
2246 VMA_CREATE_FLAG_DUMPABLE : 0;
2247
2248 vma = new_vma(vm, op->base.remap.prev,
2249 old->pat_index, flags);
2250 if (IS_ERR(vma))
2251 return PTR_ERR(vma);
2252
2253 op->remap.prev = vma;
2254
2255 /*
2256 * Userptr creates a new SG mapping so
2257 * we must also rebind.
2258 */
2259 op->remap.skip_prev = !xe_vma_is_userptr(old) &&
2260 IS_ALIGNED(xe_vma_end(vma),
2261 xe_vma_max_pte_size(old));
2262 if (op->remap.skip_prev) {
2263 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2264 op->remap.range -=
2265 xe_vma_end(vma) -
2266 xe_vma_start(old);
2267 op->remap.start = xe_vma_end(vma);
2268 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2269 (ULL)op->remap.start,
2270 (ULL)op->remap.range);
2271 } else {
2272 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2273 }
2274 }
2275
2276 if (op->base.remap.next) {
2277 flags |= op->base.remap.unmap->va->flags &
2278 XE_VMA_READ_ONLY ?
2279 VMA_CREATE_FLAG_READ_ONLY : 0;
2280 flags |= op->base.remap.unmap->va->flags &
2281 DRM_GPUVA_SPARSE ?
2282 VMA_CREATE_FLAG_IS_NULL : 0;
2283 flags |= op->base.remap.unmap->va->flags &
2284 XE_VMA_DUMPABLE ?
2285 VMA_CREATE_FLAG_DUMPABLE : 0;
2286
2287 vma = new_vma(vm, op->base.remap.next,
2288 old->pat_index, flags);
2289 if (IS_ERR(vma))
2290 return PTR_ERR(vma);
2291
2292 op->remap.next = vma;
2293
2294 /*
2295 * Userptr creates a new SG mapping so
2296 * we must also rebind.
2297 */
2298 op->remap.skip_next = !xe_vma_is_userptr(old) &&
2299 IS_ALIGNED(xe_vma_start(vma),
2300 xe_vma_max_pte_size(old));
2301 if (op->remap.skip_next) {
2302 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2303 op->remap.range -=
2304 xe_vma_end(old) -
2305 xe_vma_start(vma);
2306 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2307 (ULL)op->remap.start,
2308 (ULL)op->remap.range);
2309 } else {
2310 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2311 }
2312 }
2313 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2314 break;
2315 }
2316 case DRM_GPUVA_OP_UNMAP:
2317 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2318 break;
2319 case DRM_GPUVA_OP_PREFETCH:
2320 vma = gpuva_to_vma(op->base.prefetch.va);
2321
2322 if (xe_vma_is_userptr(vma)) {
2323 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2324 if (err)
2325 return err;
2326 }
2327
2328 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2329 break;
2330 default:
2331 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2332 }
2333
2334 err = xe_vma_op_commit(vm, op);
2335 if (err)
2336 return err;
2337 }
2338
2339 return 0;
2340 }
2341
xe_vma_op_unwind(struct xe_vm * vm,struct xe_vma_op * op,bool post_commit,bool prev_post_commit,bool next_post_commit)2342 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2343 bool post_commit, bool prev_post_commit,
2344 bool next_post_commit)
2345 {
2346 lockdep_assert_held_write(&vm->lock);
2347
2348 switch (op->base.op) {
2349 case DRM_GPUVA_OP_MAP:
2350 if (op->map.vma) {
2351 prep_vma_destroy(vm, op->map.vma, post_commit);
2352 xe_vma_destroy_unlocked(op->map.vma);
2353 }
2354 break;
2355 case DRM_GPUVA_OP_UNMAP:
2356 {
2357 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2358
2359 if (vma) {
2360 down_read(&vm->userptr.notifier_lock);
2361 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2362 up_read(&vm->userptr.notifier_lock);
2363 if (post_commit)
2364 xe_vm_insert_vma(vm, vma);
2365 }
2366 break;
2367 }
2368 case DRM_GPUVA_OP_REMAP:
2369 {
2370 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2371
2372 if (op->remap.prev) {
2373 prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2374 xe_vma_destroy_unlocked(op->remap.prev);
2375 }
2376 if (op->remap.next) {
2377 prep_vma_destroy(vm, op->remap.next, next_post_commit);
2378 xe_vma_destroy_unlocked(op->remap.next);
2379 }
2380 if (vma) {
2381 down_read(&vm->userptr.notifier_lock);
2382 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2383 up_read(&vm->userptr.notifier_lock);
2384 if (post_commit)
2385 xe_vm_insert_vma(vm, vma);
2386 }
2387 break;
2388 }
2389 case DRM_GPUVA_OP_PREFETCH:
2390 /* Nothing to do */
2391 break;
2392 default:
2393 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2394 }
2395 }
2396
vm_bind_ioctl_ops_unwind(struct xe_vm * vm,struct drm_gpuva_ops ** ops,int num_ops_list)2397 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2398 struct drm_gpuva_ops **ops,
2399 int num_ops_list)
2400 {
2401 int i;
2402
2403 for (i = num_ops_list - 1; i >= 0; --i) {
2404 struct drm_gpuva_ops *__ops = ops[i];
2405 struct drm_gpuva_op *__op;
2406
2407 if (!__ops)
2408 continue;
2409
2410 drm_gpuva_for_each_op_reverse(__op, __ops) {
2411 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2412
2413 xe_vma_op_unwind(vm, op,
2414 op->flags & XE_VMA_OP_COMMITTED,
2415 op->flags & XE_VMA_OP_PREV_COMMITTED,
2416 op->flags & XE_VMA_OP_NEXT_COMMITTED);
2417 }
2418 }
2419 }
2420
vma_lock_and_validate(struct drm_exec * exec,struct xe_vma * vma,bool validate)2421 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
2422 bool validate)
2423 {
2424 struct xe_bo *bo = xe_vma_bo(vma);
2425 struct xe_vm *vm = xe_vma_vm(vma);
2426 int err = 0;
2427
2428 if (bo) {
2429 if (!bo->vm)
2430 err = drm_exec_lock_obj(exec, &bo->ttm.base);
2431 if (!err && validate)
2432 err = xe_bo_validate(bo, vm,
2433 !xe_vm_in_preempt_fence_mode(vm));
2434 }
2435
2436 return err;
2437 }
2438
check_ufence(struct xe_vma * vma)2439 static int check_ufence(struct xe_vma *vma)
2440 {
2441 if (vma->ufence) {
2442 struct xe_user_fence * const f = vma->ufence;
2443
2444 if (!xe_sync_ufence_get_status(f))
2445 return -EBUSY;
2446
2447 vma->ufence = NULL;
2448 xe_sync_ufence_put(f);
2449 }
2450
2451 return 0;
2452 }
2453
op_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_op * op)2454 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
2455 struct xe_vma_op *op)
2456 {
2457 int err = 0;
2458
2459 switch (op->base.op) {
2460 case DRM_GPUVA_OP_MAP:
2461 err = vma_lock_and_validate(exec, op->map.vma,
2462 !xe_vm_in_fault_mode(vm) ||
2463 op->map.immediate);
2464 break;
2465 case DRM_GPUVA_OP_REMAP:
2466 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
2467 if (err)
2468 break;
2469
2470 err = vma_lock_and_validate(exec,
2471 gpuva_to_vma(op->base.remap.unmap->va),
2472 false);
2473 if (!err && op->remap.prev)
2474 err = vma_lock_and_validate(exec, op->remap.prev, true);
2475 if (!err && op->remap.next)
2476 err = vma_lock_and_validate(exec, op->remap.next, true);
2477 break;
2478 case DRM_GPUVA_OP_UNMAP:
2479 err = check_ufence(gpuva_to_vma(op->base.unmap.va));
2480 if (err)
2481 break;
2482
2483 err = vma_lock_and_validate(exec,
2484 gpuva_to_vma(op->base.unmap.va),
2485 false);
2486 break;
2487 case DRM_GPUVA_OP_PREFETCH:
2488 {
2489 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2490 u32 region = op->prefetch.region;
2491
2492 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
2493
2494 err = vma_lock_and_validate(exec,
2495 gpuva_to_vma(op->base.prefetch.va),
2496 false);
2497 if (!err && !xe_vma_has_no_bo(vma))
2498 err = xe_bo_migrate(xe_vma_bo(vma),
2499 region_to_mem_type[region]);
2500 break;
2501 }
2502 default:
2503 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2504 }
2505
2506 return err;
2507 }
2508
vm_bind_ioctl_ops_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops)2509 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
2510 struct xe_vm *vm,
2511 struct xe_vma_ops *vops)
2512 {
2513 struct xe_vma_op *op;
2514 int err;
2515
2516 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
2517 if (err)
2518 return err;
2519
2520 list_for_each_entry(op, &vops->list, link) {
2521 err = op_lock_and_prep(exec, vm, op);
2522 if (err)
2523 return err;
2524 }
2525
2526 #ifdef TEST_VM_OPS_ERROR
2527 if (vops->inject_error &&
2528 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
2529 return -ENOSPC;
2530 #endif
2531
2532 return 0;
2533 }
2534
op_trace(struct xe_vma_op * op)2535 static void op_trace(struct xe_vma_op *op)
2536 {
2537 switch (op->base.op) {
2538 case DRM_GPUVA_OP_MAP:
2539 trace_xe_vma_bind(op->map.vma);
2540 break;
2541 case DRM_GPUVA_OP_REMAP:
2542 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
2543 if (op->remap.prev)
2544 trace_xe_vma_bind(op->remap.prev);
2545 if (op->remap.next)
2546 trace_xe_vma_bind(op->remap.next);
2547 break;
2548 case DRM_GPUVA_OP_UNMAP:
2549 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
2550 break;
2551 case DRM_GPUVA_OP_PREFETCH:
2552 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
2553 break;
2554 default:
2555 XE_WARN_ON("NOT POSSIBLE");
2556 }
2557 }
2558
trace_xe_vm_ops_execute(struct xe_vma_ops * vops)2559 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
2560 {
2561 struct xe_vma_op *op;
2562
2563 list_for_each_entry(op, &vops->list, link)
2564 op_trace(op);
2565 }
2566
vm_ops_setup_tile_args(struct xe_vm * vm,struct xe_vma_ops * vops)2567 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
2568 {
2569 struct xe_exec_queue *q = vops->q;
2570 struct xe_tile *tile;
2571 int number_tiles = 0;
2572 u8 id;
2573
2574 for_each_tile(tile, vm->xe, id) {
2575 if (vops->pt_update_ops[id].num_ops)
2576 ++number_tiles;
2577
2578 if (vops->pt_update_ops[id].q)
2579 continue;
2580
2581 if (q) {
2582 vops->pt_update_ops[id].q = q;
2583 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
2584 q = list_next_entry(q, multi_gt_list);
2585 } else {
2586 vops->pt_update_ops[id].q = vm->q[id];
2587 }
2588 }
2589
2590 return number_tiles;
2591 }
2592
ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)2593 static struct dma_fence *ops_execute(struct xe_vm *vm,
2594 struct xe_vma_ops *vops)
2595 {
2596 struct xe_tile *tile;
2597 struct dma_fence *fence = NULL;
2598 struct dma_fence **fences = NULL;
2599 struct dma_fence_array *cf = NULL;
2600 int number_tiles = 0, current_fence = 0, err;
2601 u8 id;
2602
2603 number_tiles = vm_ops_setup_tile_args(vm, vops);
2604 if (number_tiles == 0)
2605 return ERR_PTR(-ENODATA);
2606
2607 if (number_tiles > 1) {
2608 fences = kmalloc_array(number_tiles, sizeof(*fences),
2609 GFP_KERNEL);
2610 if (!fences) {
2611 fence = ERR_PTR(-ENOMEM);
2612 goto err_trace;
2613 }
2614 }
2615
2616 for_each_tile(tile, vm->xe, id) {
2617 if (!vops->pt_update_ops[id].num_ops)
2618 continue;
2619
2620 err = xe_pt_update_ops_prepare(tile, vops);
2621 if (err) {
2622 fence = ERR_PTR(err);
2623 goto err_out;
2624 }
2625 }
2626
2627 trace_xe_vm_ops_execute(vops);
2628
2629 for_each_tile(tile, vm->xe, id) {
2630 if (!vops->pt_update_ops[id].num_ops)
2631 continue;
2632
2633 fence = xe_pt_update_ops_run(tile, vops);
2634 if (IS_ERR(fence))
2635 goto err_out;
2636
2637 if (fences)
2638 fences[current_fence++] = fence;
2639 }
2640
2641 if (fences) {
2642 cf = dma_fence_array_create(number_tiles, fences,
2643 vm->composite_fence_ctx,
2644 vm->composite_fence_seqno++,
2645 false);
2646 if (!cf) {
2647 --vm->composite_fence_seqno;
2648 fence = ERR_PTR(-ENOMEM);
2649 goto err_out;
2650 }
2651 fence = &cf->base;
2652 }
2653
2654 for_each_tile(tile, vm->xe, id) {
2655 if (!vops->pt_update_ops[id].num_ops)
2656 continue;
2657
2658 xe_pt_update_ops_fini(tile, vops);
2659 }
2660
2661 return fence;
2662
2663 err_out:
2664 for_each_tile(tile, vm->xe, id) {
2665 if (!vops->pt_update_ops[id].num_ops)
2666 continue;
2667
2668 xe_pt_update_ops_abort(tile, vops);
2669 }
2670 while (current_fence)
2671 dma_fence_put(fences[--current_fence]);
2672 kfree(fences);
2673 kfree(cf);
2674
2675 err_trace:
2676 trace_xe_vm_ops_fail(vm);
2677 return fence;
2678 }
2679
vma_add_ufence(struct xe_vma * vma,struct xe_user_fence * ufence)2680 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
2681 {
2682 if (vma->ufence)
2683 xe_sync_ufence_put(vma->ufence);
2684 vma->ufence = __xe_sync_ufence_get(ufence);
2685 }
2686
op_add_ufence(struct xe_vm * vm,struct xe_vma_op * op,struct xe_user_fence * ufence)2687 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
2688 struct xe_user_fence *ufence)
2689 {
2690 switch (op->base.op) {
2691 case DRM_GPUVA_OP_MAP:
2692 vma_add_ufence(op->map.vma, ufence);
2693 break;
2694 case DRM_GPUVA_OP_REMAP:
2695 if (op->remap.prev)
2696 vma_add_ufence(op->remap.prev, ufence);
2697 if (op->remap.next)
2698 vma_add_ufence(op->remap.next, ufence);
2699 break;
2700 case DRM_GPUVA_OP_UNMAP:
2701 break;
2702 case DRM_GPUVA_OP_PREFETCH:
2703 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
2704 break;
2705 default:
2706 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2707 }
2708 }
2709
vm_bind_ioctl_ops_fini(struct xe_vm * vm,struct xe_vma_ops * vops,struct dma_fence * fence)2710 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
2711 struct dma_fence *fence)
2712 {
2713 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
2714 struct xe_user_fence *ufence;
2715 struct xe_vma_op *op;
2716 int i;
2717
2718 ufence = find_ufence_get(vops->syncs, vops->num_syncs);
2719 list_for_each_entry(op, &vops->list, link) {
2720 if (ufence)
2721 op_add_ufence(vm, op, ufence);
2722
2723 if (op->base.op == DRM_GPUVA_OP_UNMAP)
2724 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
2725 else if (op->base.op == DRM_GPUVA_OP_REMAP)
2726 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
2727 fence);
2728 }
2729 if (ufence)
2730 xe_sync_ufence_put(ufence);
2731 for (i = 0; i < vops->num_syncs; i++)
2732 xe_sync_entry_signal(vops->syncs + i, fence);
2733 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
2734 dma_fence_put(fence);
2735 }
2736
vm_bind_ioctl_ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)2737 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
2738 struct xe_vma_ops *vops)
2739 {
2740 struct drm_exec exec;
2741 struct dma_fence *fence;
2742 int err;
2743
2744 lockdep_assert_held_write(&vm->lock);
2745
2746 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
2747 DRM_EXEC_IGNORE_DUPLICATES, 0);
2748 drm_exec_until_all_locked(&exec) {
2749 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
2750 drm_exec_retry_on_contention(&exec);
2751 if (err)
2752 goto unlock;
2753
2754 fence = ops_execute(vm, vops);
2755 if (IS_ERR(fence)) {
2756 err = PTR_ERR(fence);
2757 goto unlock;
2758 }
2759
2760 vm_bind_ioctl_ops_fini(vm, vops, fence);
2761 }
2762
2763 unlock:
2764 drm_exec_fini(&exec);
2765 return err;
2766 }
2767 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
2768
2769 #define SUPPORTED_FLAGS_STUB \
2770 (DRM_XE_VM_BIND_FLAG_READONLY | \
2771 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
2772 DRM_XE_VM_BIND_FLAG_NULL | \
2773 DRM_XE_VM_BIND_FLAG_DUMPABLE)
2774
2775 #ifdef TEST_VM_OPS_ERROR
2776 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
2777 #else
2778 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB
2779 #endif
2780
2781 #define XE_64K_PAGE_MASK 0xffffull
2782 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
2783
vm_bind_ioctl_check_args(struct xe_device * xe,struct drm_xe_vm_bind * args,struct drm_xe_vm_bind_op ** bind_ops)2784 static int vm_bind_ioctl_check_args(struct xe_device *xe,
2785 struct drm_xe_vm_bind *args,
2786 struct drm_xe_vm_bind_op **bind_ops)
2787 {
2788 int err;
2789 int i;
2790
2791 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
2792 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2793 return -EINVAL;
2794
2795 if (XE_IOCTL_DBG(xe, args->extensions))
2796 return -EINVAL;
2797
2798 if (args->num_binds > 1) {
2799 u64 __user *bind_user =
2800 u64_to_user_ptr(args->vector_of_binds);
2801
2802 *bind_ops = kvmalloc_array(args->num_binds,
2803 sizeof(struct drm_xe_vm_bind_op),
2804 GFP_KERNEL | __GFP_ACCOUNT |
2805 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
2806 if (!*bind_ops)
2807 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
2808
2809 err = __copy_from_user(*bind_ops, bind_user,
2810 sizeof(struct drm_xe_vm_bind_op) *
2811 args->num_binds);
2812 if (XE_IOCTL_DBG(xe, err)) {
2813 err = -EFAULT;
2814 goto free_bind_ops;
2815 }
2816 } else {
2817 *bind_ops = &args->bind;
2818 }
2819
2820 for (i = 0; i < args->num_binds; ++i) {
2821 u64 range = (*bind_ops)[i].range;
2822 u64 addr = (*bind_ops)[i].addr;
2823 u32 op = (*bind_ops)[i].op;
2824 u32 flags = (*bind_ops)[i].flags;
2825 u32 obj = (*bind_ops)[i].obj;
2826 u64 obj_offset = (*bind_ops)[i].obj_offset;
2827 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
2828 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2829 u16 pat_index = (*bind_ops)[i].pat_index;
2830 u16 coh_mode;
2831
2832 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
2833 err = -EINVAL;
2834 goto free_bind_ops;
2835 }
2836
2837 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
2838 (*bind_ops)[i].pat_index = pat_index;
2839 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
2840 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
2841 err = -EINVAL;
2842 goto free_bind_ops;
2843 }
2844
2845 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
2846 err = -EINVAL;
2847 goto free_bind_ops;
2848 }
2849
2850 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
2851 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
2852 XE_IOCTL_DBG(xe, obj && is_null) ||
2853 XE_IOCTL_DBG(xe, obj_offset && is_null) ||
2854 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
2855 is_null) ||
2856 XE_IOCTL_DBG(xe, !obj &&
2857 op == DRM_XE_VM_BIND_OP_MAP &&
2858 !is_null) ||
2859 XE_IOCTL_DBG(xe, !obj &&
2860 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2861 XE_IOCTL_DBG(xe, addr &&
2862 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2863 XE_IOCTL_DBG(xe, range &&
2864 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2865 XE_IOCTL_DBG(xe, obj &&
2866 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
2867 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
2868 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
2869 XE_IOCTL_DBG(xe, obj &&
2870 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
2871 XE_IOCTL_DBG(xe, prefetch_region &&
2872 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
2873 XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
2874 xe->info.mem_region_mask)) ||
2875 XE_IOCTL_DBG(xe, obj &&
2876 op == DRM_XE_VM_BIND_OP_UNMAP)) {
2877 err = -EINVAL;
2878 goto free_bind_ops;
2879 }
2880
2881 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
2882 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
2883 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
2884 XE_IOCTL_DBG(xe, !range &&
2885 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
2886 err = -EINVAL;
2887 goto free_bind_ops;
2888 }
2889 }
2890
2891 return 0;
2892
2893 free_bind_ops:
2894 if (args->num_binds > 1)
2895 kvfree(*bind_ops);
2896 return err;
2897 }
2898
vm_bind_ioctl_signal_fences(struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,int num_syncs)2899 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
2900 struct xe_exec_queue *q,
2901 struct xe_sync_entry *syncs,
2902 int num_syncs)
2903 {
2904 struct dma_fence *fence;
2905 int i, err = 0;
2906
2907 fence = xe_sync_in_fence_get(syncs, num_syncs,
2908 to_wait_exec_queue(vm, q), vm);
2909 if (IS_ERR(fence))
2910 return PTR_ERR(fence);
2911
2912 for (i = 0; i < num_syncs; i++)
2913 xe_sync_entry_signal(&syncs[i], fence);
2914
2915 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
2916 fence);
2917 dma_fence_put(fence);
2918
2919 return err;
2920 }
2921
xe_vma_ops_init(struct xe_vma_ops * vops,struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,u32 num_syncs)2922 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
2923 struct xe_exec_queue *q,
2924 struct xe_sync_entry *syncs, u32 num_syncs)
2925 {
2926 memset(vops, 0, sizeof(*vops));
2927 INIT_LIST_HEAD(&vops->list);
2928 vops->vm = vm;
2929 vops->q = q;
2930 vops->syncs = syncs;
2931 vops->num_syncs = num_syncs;
2932 }
2933
xe_vm_bind_ioctl_validate_bo(struct xe_device * xe,struct xe_bo * bo,u64 addr,u64 range,u64 obj_offset,u16 pat_index)2934 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
2935 u64 addr, u64 range, u64 obj_offset,
2936 u16 pat_index)
2937 {
2938 u16 coh_mode;
2939
2940 if (XE_IOCTL_DBG(xe, range > bo->size) ||
2941 XE_IOCTL_DBG(xe, obj_offset >
2942 bo->size - range)) {
2943 return -EINVAL;
2944 }
2945
2946 /*
2947 * Some platforms require 64k VM_BIND alignment,
2948 * specifically those with XE_VRAM_FLAGS_NEED64K.
2949 *
2950 * Other platforms may have BO's set to 64k physical placement,
2951 * but can be mapped at 4k offsets anyway. This check is only
2952 * there for the former case.
2953 */
2954 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
2955 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
2956 if (XE_IOCTL_DBG(xe, obj_offset &
2957 XE_64K_PAGE_MASK) ||
2958 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
2959 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
2960 return -EINVAL;
2961 }
2962 }
2963
2964 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
2965 if (bo->cpu_caching) {
2966 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
2967 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
2968 return -EINVAL;
2969 }
2970 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
2971 /*
2972 * Imported dma-buf from a different device should
2973 * require 1way or 2way coherency since we don't know
2974 * how it was mapped on the CPU. Just assume is it
2975 * potentially cached on CPU side.
2976 */
2977 return -EINVAL;
2978 }
2979
2980 return 0;
2981 }
2982
xe_vm_bind_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2983 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2984 {
2985 struct xe_device *xe = to_xe_device(dev);
2986 struct xe_file *xef = to_xe_file(file);
2987 struct drm_xe_vm_bind *args = data;
2988 struct drm_xe_sync __user *syncs_user;
2989 struct xe_bo **bos = NULL;
2990 struct drm_gpuva_ops **ops = NULL;
2991 struct xe_vm *vm;
2992 struct xe_exec_queue *q = NULL;
2993 u32 num_syncs, num_ufence = 0;
2994 struct xe_sync_entry *syncs = NULL;
2995 struct drm_xe_vm_bind_op *bind_ops;
2996 struct xe_vma_ops vops;
2997 int err;
2998 int i;
2999
3000 err = vm_bind_ioctl_check_args(xe, args, &bind_ops);
3001 if (err)
3002 return err;
3003
3004 if (args->exec_queue_id) {
3005 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
3006 if (XE_IOCTL_DBG(xe, !q)) {
3007 err = -ENOENT;
3008 goto free_objs;
3009 }
3010
3011 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3012 err = -EINVAL;
3013 goto put_exec_queue;
3014 }
3015 }
3016
3017 vm = xe_vm_lookup(xef, args->vm_id);
3018 if (XE_IOCTL_DBG(xe, !vm)) {
3019 err = -EINVAL;
3020 goto put_exec_queue;
3021 }
3022
3023 err = down_write_killable(&vm->lock);
3024 if (err)
3025 goto put_vm;
3026
3027 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3028 err = -ENOENT;
3029 goto release_vm_lock;
3030 }
3031
3032 for (i = 0; i < args->num_binds; ++i) {
3033 u64 range = bind_ops[i].range;
3034 u64 addr = bind_ops[i].addr;
3035
3036 if (XE_IOCTL_DBG(xe, range > vm->size) ||
3037 XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3038 err = -EINVAL;
3039 goto release_vm_lock;
3040 }
3041 }
3042
3043 if (args->num_binds) {
3044 bos = kvcalloc(args->num_binds, sizeof(*bos),
3045 GFP_KERNEL | __GFP_ACCOUNT |
3046 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3047 if (!bos) {
3048 err = -ENOMEM;
3049 goto release_vm_lock;
3050 }
3051
3052 ops = kvcalloc(args->num_binds, sizeof(*ops),
3053 GFP_KERNEL | __GFP_ACCOUNT |
3054 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3055 if (!ops) {
3056 err = -ENOMEM;
3057 goto release_vm_lock;
3058 }
3059 }
3060
3061 for (i = 0; i < args->num_binds; ++i) {
3062 struct drm_gem_object *gem_obj;
3063 u64 range = bind_ops[i].range;
3064 u64 addr = bind_ops[i].addr;
3065 u32 obj = bind_ops[i].obj;
3066 u64 obj_offset = bind_ops[i].obj_offset;
3067 u16 pat_index = bind_ops[i].pat_index;
3068
3069 if (!obj)
3070 continue;
3071
3072 gem_obj = drm_gem_object_lookup(file, obj);
3073 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3074 err = -ENOENT;
3075 goto put_obj;
3076 }
3077 bos[i] = gem_to_xe_bo(gem_obj);
3078
3079 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
3080 obj_offset, pat_index);
3081 if (err)
3082 goto put_obj;
3083 }
3084
3085 if (args->num_syncs) {
3086 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3087 if (!syncs) {
3088 err = -ENOMEM;
3089 goto put_obj;
3090 }
3091 }
3092
3093 syncs_user = u64_to_user_ptr(args->syncs);
3094 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3095 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3096 &syncs_user[num_syncs],
3097 (xe_vm_in_lr_mode(vm) ?
3098 SYNC_PARSE_FLAG_LR_MODE : 0) |
3099 (!args->num_binds ?
3100 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3101 if (err)
3102 goto free_syncs;
3103
3104 if (xe_sync_is_ufence(&syncs[num_syncs]))
3105 num_ufence++;
3106 }
3107
3108 if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3109 err = -EINVAL;
3110 goto free_syncs;
3111 }
3112
3113 if (!args->num_binds) {
3114 err = -ENODATA;
3115 goto free_syncs;
3116 }
3117
3118 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
3119 for (i = 0; i < args->num_binds; ++i) {
3120 u64 range = bind_ops[i].range;
3121 u64 addr = bind_ops[i].addr;
3122 u32 op = bind_ops[i].op;
3123 u32 flags = bind_ops[i].flags;
3124 u64 obj_offset = bind_ops[i].obj_offset;
3125 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3126 u16 pat_index = bind_ops[i].pat_index;
3127
3128 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
3129 addr, range, op, flags,
3130 prefetch_region, pat_index);
3131 if (IS_ERR(ops[i])) {
3132 err = PTR_ERR(ops[i]);
3133 ops[i] = NULL;
3134 goto unwind_ops;
3135 }
3136
3137 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
3138 if (err)
3139 goto unwind_ops;
3140
3141 #ifdef TEST_VM_OPS_ERROR
3142 if (flags & FORCE_OP_ERROR) {
3143 vops.inject_error = true;
3144 vm->xe->vm_inject_error_position =
3145 (vm->xe->vm_inject_error_position + 1) %
3146 FORCE_OP_ERROR_COUNT;
3147 }
3148 #endif
3149 }
3150
3151 /* Nothing to do */
3152 if (list_empty(&vops.list)) {
3153 err = -ENODATA;
3154 goto unwind_ops;
3155 }
3156
3157 err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
3158 if (err)
3159 goto unwind_ops;
3160
3161 err = vm_bind_ioctl_ops_execute(vm, &vops);
3162
3163 unwind_ops:
3164 if (err && err != -ENODATA)
3165 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3166 xe_vma_ops_fini(&vops);
3167 for (i = args->num_binds - 1; i >= 0; --i)
3168 if (ops[i])
3169 drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
3170 free_syncs:
3171 if (err == -ENODATA)
3172 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3173 while (num_syncs--)
3174 xe_sync_entry_cleanup(&syncs[num_syncs]);
3175
3176 kfree(syncs);
3177 put_obj:
3178 for (i = 0; i < args->num_binds; ++i)
3179 xe_bo_put(bos[i]);
3180 release_vm_lock:
3181 up_write(&vm->lock);
3182 put_vm:
3183 xe_vm_put(vm);
3184 put_exec_queue:
3185 if (q)
3186 xe_exec_queue_put(q);
3187 free_objs:
3188 kvfree(bos);
3189 kvfree(ops);
3190 if (args->num_binds > 1)
3191 kvfree(bind_ops);
3192 return err;
3193 }
3194
3195 /**
3196 * xe_vm_lock() - Lock the vm's dma_resv object
3197 * @vm: The struct xe_vm whose lock is to be locked
3198 * @intr: Whether to perform any wait interruptible
3199 *
3200 * Return: 0 on success, -EINTR if @intr is true and the wait for a
3201 * contended lock was interrupted. If @intr is false, the function
3202 * always returns 0.
3203 */
xe_vm_lock(struct xe_vm * vm,bool intr)3204 int xe_vm_lock(struct xe_vm *vm, bool intr)
3205 {
3206 if (intr)
3207 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3208
3209 return dma_resv_lock(xe_vm_resv(vm), NULL);
3210 }
3211
3212 /**
3213 * xe_vm_unlock() - Unlock the vm's dma_resv object
3214 * @vm: The struct xe_vm whose lock is to be released.
3215 *
3216 * Unlock a buffer object lock that was locked by xe_vm_lock().
3217 */
xe_vm_unlock(struct xe_vm * vm)3218 void xe_vm_unlock(struct xe_vm *vm)
3219 {
3220 dma_resv_unlock(xe_vm_resv(vm));
3221 }
3222
3223 /**
3224 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3225 * @vma: VMA to invalidate
3226 *
3227 * Walks a list of page tables leaves which it memset the entries owned by this
3228 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3229 * complete.
3230 *
3231 * Returns 0 for success, negative error code otherwise.
3232 */
xe_vm_invalidate_vma(struct xe_vma * vma)3233 int xe_vm_invalidate_vma(struct xe_vma *vma)
3234 {
3235 struct xe_device *xe = xe_vma_vm(vma)->xe;
3236 struct xe_tile *tile;
3237 struct xe_gt_tlb_invalidation_fence
3238 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
3239 u8 id;
3240 u32 fence_id = 0;
3241 int ret = 0;
3242
3243 xe_assert(xe, !xe_vma_is_null(vma));
3244 trace_xe_vma_invalidate(vma);
3245
3246 vm_dbg(&xe_vma_vm(vma)->xe->drm,
3247 "INVALIDATE: addr=0x%016llx, range=0x%016llx",
3248 xe_vma_start(vma), xe_vma_size(vma));
3249
3250 /* Check that we don't race with page-table updates */
3251 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3252 if (xe_vma_is_userptr(vma)) {
3253 WARN_ON_ONCE(!mmu_interval_check_retry
3254 (&to_userptr_vma(vma)->userptr.notifier,
3255 to_userptr_vma(vma)->userptr.notifier_seq));
3256 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
3257 DMA_RESV_USAGE_BOOKKEEP));
3258
3259 } else {
3260 xe_bo_assert_held(xe_vma_bo(vma));
3261 }
3262 }
3263
3264 for_each_tile(tile, xe, id) {
3265 if (xe_pt_zap_ptes(tile, vma)) {
3266 xe_device_wmb(xe);
3267 xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
3268 &fence[fence_id],
3269 true);
3270
3271 ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
3272 &fence[fence_id], vma);
3273 if (ret)
3274 goto wait;
3275 ++fence_id;
3276
3277 if (!tile->media_gt)
3278 continue;
3279
3280 xe_gt_tlb_invalidation_fence_init(tile->media_gt,
3281 &fence[fence_id],
3282 true);
3283
3284 ret = xe_gt_tlb_invalidation_vma(tile->media_gt,
3285 &fence[fence_id], vma);
3286 if (ret)
3287 goto wait;
3288 ++fence_id;
3289 }
3290 }
3291
3292 wait:
3293 for (id = 0; id < fence_id; ++id)
3294 xe_gt_tlb_invalidation_fence_wait(&fence[id]);
3295
3296 vma->tile_invalidated = vma->tile_mask;
3297
3298 return ret;
3299 }
3300
3301 struct xe_vm_snapshot {
3302 unsigned long num_snaps;
3303 struct {
3304 u64 ofs, bo_ofs;
3305 unsigned long len;
3306 struct xe_bo *bo;
3307 void *data;
3308 struct mm_struct *mm;
3309 } snap[];
3310 };
3311
xe_vm_snapshot_capture(struct xe_vm * vm)3312 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
3313 {
3314 unsigned long num_snaps = 0, i;
3315 struct xe_vm_snapshot *snap = NULL;
3316 struct drm_gpuva *gpuva;
3317
3318 if (!vm)
3319 return NULL;
3320
3321 mutex_lock(&vm->snap_mutex);
3322 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3323 if (gpuva->flags & XE_VMA_DUMPABLE)
3324 num_snaps++;
3325 }
3326
3327 if (num_snaps)
3328 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
3329 if (!snap) {
3330 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
3331 goto out_unlock;
3332 }
3333
3334 snap->num_snaps = num_snaps;
3335 i = 0;
3336 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3337 struct xe_vma *vma = gpuva_to_vma(gpuva);
3338 struct xe_bo *bo = vma->gpuva.gem.obj ?
3339 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
3340
3341 if (!(gpuva->flags & XE_VMA_DUMPABLE))
3342 continue;
3343
3344 snap->snap[i].ofs = xe_vma_start(vma);
3345 snap->snap[i].len = xe_vma_size(vma);
3346 if (bo) {
3347 snap->snap[i].bo = xe_bo_get(bo);
3348 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
3349 } else if (xe_vma_is_userptr(vma)) {
3350 struct mm_struct *mm =
3351 to_userptr_vma(vma)->userptr.notifier.mm;
3352
3353 if (mmget_not_zero(mm))
3354 snap->snap[i].mm = mm;
3355 else
3356 snap->snap[i].data = ERR_PTR(-EFAULT);
3357
3358 snap->snap[i].bo_ofs = xe_vma_userptr(vma);
3359 } else {
3360 snap->snap[i].data = ERR_PTR(-ENOENT);
3361 }
3362 i++;
3363 }
3364
3365 out_unlock:
3366 mutex_unlock(&vm->snap_mutex);
3367 return snap;
3368 }
3369
xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot * snap)3370 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
3371 {
3372 if (IS_ERR_OR_NULL(snap))
3373 return;
3374
3375 for (int i = 0; i < snap->num_snaps; i++) {
3376 struct xe_bo *bo = snap->snap[i].bo;
3377 int err;
3378
3379 if (IS_ERR(snap->snap[i].data))
3380 continue;
3381
3382 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
3383 if (!snap->snap[i].data) {
3384 snap->snap[i].data = ERR_PTR(-ENOMEM);
3385 goto cleanup_bo;
3386 }
3387
3388 if (bo) {
3389 err = xe_bo_read(bo, snap->snap[i].bo_ofs,
3390 snap->snap[i].data, snap->snap[i].len);
3391 } else {
3392 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
3393
3394 kthread_use_mm(snap->snap[i].mm);
3395 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
3396 err = 0;
3397 else
3398 err = -EFAULT;
3399 kthread_unuse_mm(snap->snap[i].mm);
3400
3401 mmput(snap->snap[i].mm);
3402 snap->snap[i].mm = NULL;
3403 }
3404
3405 if (err) {
3406 kvfree(snap->snap[i].data);
3407 snap->snap[i].data = ERR_PTR(err);
3408 }
3409
3410 cleanup_bo:
3411 xe_bo_put(bo);
3412 snap->snap[i].bo = NULL;
3413 }
3414 }
3415
xe_vm_snapshot_print(struct xe_vm_snapshot * snap,struct drm_printer * p)3416 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
3417 {
3418 unsigned long i, j;
3419
3420 if (IS_ERR_OR_NULL(snap)) {
3421 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
3422 return;
3423 }
3424
3425 for (i = 0; i < snap->num_snaps; i++) {
3426 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
3427
3428 if (IS_ERR(snap->snap[i].data)) {
3429 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
3430 PTR_ERR(snap->snap[i].data));
3431 continue;
3432 }
3433
3434 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
3435
3436 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
3437 u32 *val = snap->snap[i].data + j;
3438 char dumped[ASCII85_BUFSZ];
3439
3440 drm_puts(p, ascii85_encode(*val, dumped));
3441 }
3442
3443 drm_puts(p, "\n");
3444 }
3445 }
3446
xe_vm_snapshot_free(struct xe_vm_snapshot * snap)3447 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
3448 {
3449 unsigned long i;
3450
3451 if (IS_ERR_OR_NULL(snap))
3452 return;
3453
3454 for (i = 0; i < snap->num_snaps; i++) {
3455 if (!IS_ERR(snap->snap[i].data))
3456 kvfree(snap->snap[i].data);
3457 xe_bo_put(snap->snap[i].bo);
3458 if (snap->snap[i].mm)
3459 mmput(snap->snap[i].mm);
3460 }
3461 kvfree(snap);
3462 }
3463