1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10
11 #include <drm/drm_exec.h>
12 #include <drm/drm_print.h>
13 #include <drm/ttm/ttm_tt.h>
14 #include <uapi/drm/xe_drm.h>
15 #include <linux/ascii85.h>
16 #include <linux/delay.h>
17 #include <linux/kthread.h>
18 #include <linux/mm.h>
19 #include <linux/swap.h>
20
21 #include <generated/xe_wa_oob.h>
22
23 #include "regs/xe_gtt_defs.h"
24 #include "xe_assert.h"
25 #include "xe_bo.h"
26 #include "xe_device.h"
27 #include "xe_drm_client.h"
28 #include "xe_exec_queue.h"
29 #include "xe_gt_pagefault.h"
30 #include "xe_gt_tlb_invalidation.h"
31 #include "xe_migrate.h"
32 #include "xe_pat.h"
33 #include "xe_pm.h"
34 #include "xe_preempt_fence.h"
35 #include "xe_pt.h"
36 #include "xe_res_cursor.h"
37 #include "xe_sync.h"
38 #include "xe_trace_bo.h"
39 #include "xe_wa.h"
40 #include "xe_hmm.h"
41
xe_vm_obj(struct xe_vm * vm)42 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
43 {
44 return vm->gpuvm.r_obj;
45 }
46
47 /**
48 * xe_vma_userptr_check_repin() - Advisory check for repin needed
49 * @uvma: The userptr vma
50 *
51 * Check if the userptr vma has been invalidated since last successful
52 * repin. The check is advisory only and can the function can be called
53 * without the vm->userptr.notifier_lock held. There is no guarantee that the
54 * vma userptr will remain valid after a lockless check, so typically
55 * the call needs to be followed by a proper check under the notifier_lock.
56 *
57 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
58 */
xe_vma_userptr_check_repin(struct xe_userptr_vma * uvma)59 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
60 {
61 return mmu_interval_check_retry(&uvma->userptr.notifier,
62 uvma->userptr.notifier_seq) ?
63 -EAGAIN : 0;
64 }
65
xe_vma_userptr_pin_pages(struct xe_userptr_vma * uvma)66 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
67 {
68 struct xe_vma *vma = &uvma->vma;
69 struct xe_vm *vm = xe_vma_vm(vma);
70 struct xe_device *xe = vm->xe;
71
72 lockdep_assert_held(&vm->lock);
73 xe_assert(xe, xe_vma_is_userptr(vma));
74
75 return xe_hmm_userptr_populate_range(uvma, false);
76 }
77
preempt_fences_waiting(struct xe_vm * vm)78 static bool preempt_fences_waiting(struct xe_vm *vm)
79 {
80 struct xe_exec_queue *q;
81
82 lockdep_assert_held(&vm->lock);
83 xe_vm_assert_held(vm);
84
85 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
86 if (!q->lr.pfence ||
87 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
88 &q->lr.pfence->flags)) {
89 return true;
90 }
91 }
92
93 return false;
94 }
95
free_preempt_fences(struct list_head * list)96 static void free_preempt_fences(struct list_head *list)
97 {
98 struct list_head *link, *next;
99
100 list_for_each_safe(link, next, list)
101 xe_preempt_fence_free(to_preempt_fence_from_link(link));
102 }
103
alloc_preempt_fences(struct xe_vm * vm,struct list_head * list,unsigned int * count)104 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
105 unsigned int *count)
106 {
107 lockdep_assert_held(&vm->lock);
108 xe_vm_assert_held(vm);
109
110 if (*count >= vm->preempt.num_exec_queues)
111 return 0;
112
113 for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
114 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
115
116 if (IS_ERR(pfence))
117 return PTR_ERR(pfence);
118
119 list_move_tail(xe_preempt_fence_link(pfence), list);
120 }
121
122 return 0;
123 }
124
wait_for_existing_preempt_fences(struct xe_vm * vm)125 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
126 {
127 struct xe_exec_queue *q;
128
129 xe_vm_assert_held(vm);
130
131 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
132 if (q->lr.pfence) {
133 long timeout = dma_fence_wait(q->lr.pfence, false);
134
135 /* Only -ETIME on fence indicates VM needs to be killed */
136 if (timeout < 0 || q->lr.pfence->error == -ETIME)
137 return -ETIME;
138
139 dma_fence_put(q->lr.pfence);
140 q->lr.pfence = NULL;
141 }
142 }
143
144 return 0;
145 }
146
xe_vm_is_idle(struct xe_vm * vm)147 static bool xe_vm_is_idle(struct xe_vm *vm)
148 {
149 struct xe_exec_queue *q;
150
151 xe_vm_assert_held(vm);
152 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
153 if (!xe_exec_queue_is_idle(q))
154 return false;
155 }
156
157 return true;
158 }
159
arm_preempt_fences(struct xe_vm * vm,struct list_head * list)160 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
161 {
162 struct list_head *link;
163 struct xe_exec_queue *q;
164
165 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
166 struct dma_fence *fence;
167
168 link = list->next;
169 xe_assert(vm->xe, link != list);
170
171 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
172 q, q->lr.context,
173 ++q->lr.seqno);
174 dma_fence_put(q->lr.pfence);
175 q->lr.pfence = fence;
176 }
177 }
178
add_preempt_fences(struct xe_vm * vm,struct xe_bo * bo)179 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
180 {
181 struct xe_exec_queue *q;
182 int err;
183
184 xe_bo_assert_held(bo);
185
186 if (!vm->preempt.num_exec_queues)
187 return 0;
188
189 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
190 if (err)
191 return err;
192
193 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
194 if (q->lr.pfence) {
195 dma_resv_add_fence(bo->ttm.base.resv,
196 q->lr.pfence,
197 DMA_RESV_USAGE_BOOKKEEP);
198 }
199
200 return 0;
201 }
202
resume_and_reinstall_preempt_fences(struct xe_vm * vm,struct drm_exec * exec)203 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
204 struct drm_exec *exec)
205 {
206 struct xe_exec_queue *q;
207
208 lockdep_assert_held(&vm->lock);
209 xe_vm_assert_held(vm);
210
211 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
212 q->ops->resume(q);
213
214 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
215 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
216 }
217 }
218
xe_vm_add_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)219 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
220 {
221 struct drm_gpuvm_exec vm_exec = {
222 .vm = &vm->gpuvm,
223 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
224 .num_fences = 1,
225 };
226 struct drm_exec *exec = &vm_exec.exec;
227 struct dma_fence *pfence;
228 int err;
229 bool wait;
230
231 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
232
233 down_write(&vm->lock);
234 err = drm_gpuvm_exec_lock(&vm_exec);
235 if (err)
236 goto out_up_write;
237
238 pfence = xe_preempt_fence_create(q, q->lr.context,
239 ++q->lr.seqno);
240 if (!pfence) {
241 err = -ENOMEM;
242 goto out_fini;
243 }
244
245 list_add(&q->lr.link, &vm->preempt.exec_queues);
246 ++vm->preempt.num_exec_queues;
247 q->lr.pfence = pfence;
248
249 down_read(&vm->userptr.notifier_lock);
250
251 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
252 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
253
254 /*
255 * Check to see if a preemption on VM is in flight or userptr
256 * invalidation, if so trigger this preempt fence to sync state with
257 * other preempt fences on the VM.
258 */
259 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
260 if (wait)
261 dma_fence_enable_sw_signaling(pfence);
262
263 up_read(&vm->userptr.notifier_lock);
264
265 out_fini:
266 drm_exec_fini(exec);
267 out_up_write:
268 up_write(&vm->lock);
269
270 return err;
271 }
272
273 /**
274 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
275 * @vm: The VM.
276 * @q: The exec_queue
277 *
278 * Note that this function might be called multiple times on the same queue.
279 */
xe_vm_remove_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)280 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
281 {
282 if (!xe_vm_in_preempt_fence_mode(vm))
283 return;
284
285 down_write(&vm->lock);
286 if (!list_empty(&q->lr.link)) {
287 list_del_init(&q->lr.link);
288 --vm->preempt.num_exec_queues;
289 }
290 if (q->lr.pfence) {
291 dma_fence_enable_sw_signaling(q->lr.pfence);
292 dma_fence_put(q->lr.pfence);
293 q->lr.pfence = NULL;
294 }
295 up_write(&vm->lock);
296 }
297
298 /**
299 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
300 * that need repinning.
301 * @vm: The VM.
302 *
303 * This function checks for whether the VM has userptrs that need repinning,
304 * and provides a release-type barrier on the userptr.notifier_lock after
305 * checking.
306 *
307 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
308 */
__xe_vm_userptr_needs_repin(struct xe_vm * vm)309 int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
310 {
311 lockdep_assert_held_read(&vm->userptr.notifier_lock);
312
313 return (list_empty(&vm->userptr.repin_list) &&
314 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
315 }
316
317 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
318
319 /**
320 * xe_vm_kill() - VM Kill
321 * @vm: The VM.
322 * @unlocked: Flag indicates the VM's dma-resv is not held
323 *
324 * Kill the VM by setting banned flag indicated VM is no longer available for
325 * use. If in preempt fence mode, also kill all exec queue attached to the VM.
326 */
xe_vm_kill(struct xe_vm * vm,bool unlocked)327 void xe_vm_kill(struct xe_vm *vm, bool unlocked)
328 {
329 struct xe_exec_queue *q;
330
331 lockdep_assert_held(&vm->lock);
332
333 if (unlocked)
334 xe_vm_lock(vm, false);
335
336 vm->flags |= XE_VM_FLAG_BANNED;
337 trace_xe_vm_kill(vm);
338
339 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
340 q->ops->kill(q);
341
342 if (unlocked)
343 xe_vm_unlock(vm);
344
345 /* TODO: Inform user the VM is banned */
346 }
347
348 /**
349 * xe_vm_validate_should_retry() - Whether to retry after a validate error.
350 * @exec: The drm_exec object used for locking before validation.
351 * @err: The error returned from ttm_bo_validate().
352 * @end: A ktime_t cookie that should be set to 0 before first use and
353 * that should be reused on subsequent calls.
354 *
355 * With multiple active VMs, under memory pressure, it is possible that
356 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
357 * Until ttm properly handles locking in such scenarios, best thing the
358 * driver can do is retry with a timeout. Check if that is necessary, and
359 * if so unlock the drm_exec's objects while keeping the ticket to prepare
360 * for a rerun.
361 *
362 * Return: true if a retry after drm_exec_init() is recommended;
363 * false otherwise.
364 */
xe_vm_validate_should_retry(struct drm_exec * exec,int err,ktime_t * end)365 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
366 {
367 ktime_t cur;
368
369 if (err != -ENOMEM)
370 return false;
371
372 cur = ktime_get();
373 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
374 if (!ktime_before(cur, *end))
375 return false;
376
377 msleep(20);
378 return true;
379 }
380
xe_gpuvm_validate(struct drm_gpuvm_bo * vm_bo,struct drm_exec * exec)381 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
382 {
383 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
384 struct drm_gpuva *gpuva;
385 int ret;
386
387 lockdep_assert_held(&vm->lock);
388 drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
389 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
390 &vm->rebind_list);
391
392 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
393 if (ret)
394 return ret;
395
396 vm_bo->evicted = false;
397 return 0;
398 }
399
400 /**
401 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
402 * @vm: The vm for which we are rebinding.
403 * @exec: The struct drm_exec with the locked GEM objects.
404 * @num_fences: The number of fences to reserve for the operation, not
405 * including rebinds and validations.
406 *
407 * Validates all evicted gem objects and rebinds their vmas. Note that
408 * rebindings may cause evictions and hence the validation-rebind
409 * sequence is rerun until there are no more objects to validate.
410 *
411 * Return: 0 on success, negative error code on error. In particular,
412 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
413 * the drm_exec transaction needs to be restarted.
414 */
xe_vm_validate_rebind(struct xe_vm * vm,struct drm_exec * exec,unsigned int num_fences)415 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
416 unsigned int num_fences)
417 {
418 struct drm_gem_object *obj;
419 unsigned long index;
420 int ret;
421
422 do {
423 ret = drm_gpuvm_validate(&vm->gpuvm, exec);
424 if (ret)
425 return ret;
426
427 ret = xe_vm_rebind(vm, false);
428 if (ret)
429 return ret;
430 } while (!list_empty(&vm->gpuvm.evict.list));
431
432 drm_exec_for_each_locked_object(exec, index, obj) {
433 ret = dma_resv_reserve_fences(obj->resv, num_fences);
434 if (ret)
435 return ret;
436 }
437
438 return 0;
439 }
440
xe_preempt_work_begin(struct drm_exec * exec,struct xe_vm * vm,bool * done)441 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
442 bool *done)
443 {
444 int err;
445
446 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
447 if (err)
448 return err;
449
450 if (xe_vm_is_idle(vm)) {
451 vm->preempt.rebind_deactivated = true;
452 *done = true;
453 return 0;
454 }
455
456 if (!preempt_fences_waiting(vm)) {
457 *done = true;
458 return 0;
459 }
460
461 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
462 if (err)
463 return err;
464
465 err = wait_for_existing_preempt_fences(vm);
466 if (err)
467 return err;
468
469 /*
470 * Add validation and rebinding to the locking loop since both can
471 * cause evictions which may require blocing dma_resv locks.
472 * The fence reservation here is intended for the new preempt fences
473 * we attach at the end of the rebind work.
474 */
475 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
476 }
477
preempt_rebind_work_func(struct work_struct * w)478 static void preempt_rebind_work_func(struct work_struct *w)
479 {
480 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
481 struct drm_exec exec;
482 unsigned int fence_count = 0;
483 LIST_HEAD(preempt_fences);
484 ktime_t end = 0;
485 int err = 0;
486 long wait;
487 int __maybe_unused tries = 0;
488
489 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
490 trace_xe_vm_rebind_worker_enter(vm);
491
492 down_write(&vm->lock);
493
494 if (xe_vm_is_closed_or_banned(vm)) {
495 up_write(&vm->lock);
496 trace_xe_vm_rebind_worker_exit(vm);
497 return;
498 }
499
500 retry:
501 if (xe_vm_userptr_check_repin(vm)) {
502 err = xe_vm_userptr_pin(vm);
503 if (err)
504 goto out_unlock_outer;
505 }
506
507 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
508
509 drm_exec_until_all_locked(&exec) {
510 bool done = false;
511
512 err = xe_preempt_work_begin(&exec, vm, &done);
513 drm_exec_retry_on_contention(&exec);
514 if (err || done) {
515 drm_exec_fini(&exec);
516 if (err && xe_vm_validate_should_retry(&exec, err, &end))
517 err = -EAGAIN;
518
519 goto out_unlock_outer;
520 }
521 }
522
523 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
524 if (err)
525 goto out_unlock;
526
527 err = xe_vm_rebind(vm, true);
528 if (err)
529 goto out_unlock;
530
531 /* Wait on rebinds and munmap style VM unbinds */
532 wait = dma_resv_wait_timeout(xe_vm_resv(vm),
533 DMA_RESV_USAGE_KERNEL,
534 false, MAX_SCHEDULE_TIMEOUT);
535 if (wait <= 0) {
536 err = -ETIME;
537 goto out_unlock;
538 }
539
540 #define retry_required(__tries, __vm) \
541 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
542 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
543 __xe_vm_userptr_needs_repin(__vm))
544
545 down_read(&vm->userptr.notifier_lock);
546 if (retry_required(tries, vm)) {
547 up_read(&vm->userptr.notifier_lock);
548 err = -EAGAIN;
549 goto out_unlock;
550 }
551
552 #undef retry_required
553
554 spin_lock(&vm->xe->ttm.lru_lock);
555 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
556 spin_unlock(&vm->xe->ttm.lru_lock);
557
558 /* Point of no return. */
559 arm_preempt_fences(vm, &preempt_fences);
560 resume_and_reinstall_preempt_fences(vm, &exec);
561 up_read(&vm->userptr.notifier_lock);
562
563 out_unlock:
564 drm_exec_fini(&exec);
565 out_unlock_outer:
566 if (err == -EAGAIN) {
567 trace_xe_vm_rebind_worker_retry(vm);
568 goto retry;
569 }
570
571 if (err) {
572 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
573 xe_vm_kill(vm, true);
574 }
575 up_write(&vm->lock);
576
577 free_preempt_fences(&preempt_fences);
578
579 trace_xe_vm_rebind_worker_exit(vm);
580 }
581
__vma_userptr_invalidate(struct xe_vm * vm,struct xe_userptr_vma * uvma)582 static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma)
583 {
584 struct xe_userptr *userptr = &uvma->userptr;
585 struct xe_vma *vma = &uvma->vma;
586 struct dma_resv_iter cursor;
587 struct dma_fence *fence;
588 long err;
589
590 /*
591 * Tell exec and rebind worker they need to repin and rebind this
592 * userptr.
593 */
594 if (!xe_vm_in_fault_mode(vm) &&
595 !(vma->gpuva.flags & XE_VMA_DESTROYED)) {
596 spin_lock(&vm->userptr.invalidated_lock);
597 list_move_tail(&userptr->invalidate_link,
598 &vm->userptr.invalidated);
599 spin_unlock(&vm->userptr.invalidated_lock);
600 }
601
602 /*
603 * Preempt fences turn into schedule disables, pipeline these.
604 * Note that even in fault mode, we need to wait for binds and
605 * unbinds to complete, and those are attached as BOOKMARK fences
606 * to the vm.
607 */
608 dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
609 DMA_RESV_USAGE_BOOKKEEP);
610 dma_resv_for_each_fence_unlocked(&cursor, fence)
611 dma_fence_enable_sw_signaling(fence);
612 dma_resv_iter_end(&cursor);
613
614 err = dma_resv_wait_timeout(xe_vm_resv(vm),
615 DMA_RESV_USAGE_BOOKKEEP,
616 false, MAX_SCHEDULE_TIMEOUT);
617 XE_WARN_ON(err <= 0);
618
619 if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
620 err = xe_vm_invalidate_vma(vma);
621 XE_WARN_ON(err);
622 }
623
624 xe_hmm_userptr_unmap(uvma);
625 }
626
vma_userptr_invalidate(struct mmu_interval_notifier * mni,const struct mmu_notifier_range * range,unsigned long cur_seq)627 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
628 const struct mmu_notifier_range *range,
629 unsigned long cur_seq)
630 {
631 struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier);
632 struct xe_vma *vma = &uvma->vma;
633 struct xe_vm *vm = xe_vma_vm(vma);
634
635 xe_assert(vm->xe, xe_vma_is_userptr(vma));
636 trace_xe_vma_userptr_invalidate(vma);
637
638 if (!mmu_notifier_range_blockable(range))
639 return false;
640
641 vm_dbg(&xe_vma_vm(vma)->xe->drm,
642 "NOTIFIER: addr=0x%016llx, range=0x%016llx",
643 xe_vma_start(vma), xe_vma_size(vma));
644
645 down_write(&vm->userptr.notifier_lock);
646 mmu_interval_set_seq(mni, cur_seq);
647
648 __vma_userptr_invalidate(vm, uvma);
649 up_write(&vm->userptr.notifier_lock);
650 trace_xe_vma_userptr_invalidate_complete(vma);
651
652 return true;
653 }
654
655 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
656 .invalidate = vma_userptr_invalidate,
657 };
658
659 #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
660 /**
661 * xe_vma_userptr_force_invalidate() - force invalidate a userptr
662 * @uvma: The userptr vma to invalidate
663 *
664 * Perform a forced userptr invalidation for testing purposes.
665 */
xe_vma_userptr_force_invalidate(struct xe_userptr_vma * uvma)666 void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
667 {
668 struct xe_vm *vm = xe_vma_vm(&uvma->vma);
669
670 /* Protect against concurrent userptr pinning */
671 lockdep_assert_held(&vm->lock);
672 /* Protect against concurrent notifiers */
673 lockdep_assert_held(&vm->userptr.notifier_lock);
674 /*
675 * Protect against concurrent instances of this function and
676 * the critical exec sections
677 */
678 xe_vm_assert_held(vm);
679
680 if (!mmu_interval_read_retry(&uvma->userptr.notifier,
681 uvma->userptr.notifier_seq))
682 uvma->userptr.notifier_seq -= 2;
683 __vma_userptr_invalidate(vm, uvma);
684 }
685 #endif
686
xe_vm_userptr_pin(struct xe_vm * vm)687 int xe_vm_userptr_pin(struct xe_vm *vm)
688 {
689 struct xe_userptr_vma *uvma, *next;
690 int err = 0;
691 LIST_HEAD(tmp_evict);
692
693 xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
694 lockdep_assert_held_write(&vm->lock);
695
696 /* Collect invalidated userptrs */
697 spin_lock(&vm->userptr.invalidated_lock);
698 xe_assert(vm->xe, list_empty(&vm->userptr.repin_list));
699 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
700 userptr.invalidate_link) {
701 list_del_init(&uvma->userptr.invalidate_link);
702 list_add_tail(&uvma->userptr.repin_link,
703 &vm->userptr.repin_list);
704 }
705 spin_unlock(&vm->userptr.invalidated_lock);
706
707 /* Pin and move to bind list */
708 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
709 userptr.repin_link) {
710 err = xe_vma_userptr_pin_pages(uvma);
711 if (err == -EFAULT) {
712 list_del_init(&uvma->userptr.repin_link);
713 /*
714 * We might have already done the pin once already, but
715 * then had to retry before the re-bind happened, due
716 * some other condition in the caller, but in the
717 * meantime the userptr got dinged by the notifier such
718 * that we need to revalidate here, but this time we hit
719 * the EFAULT. In such a case make sure we remove
720 * ourselves from the rebind list to avoid going down in
721 * flames.
722 */
723 if (!list_empty(&uvma->vma.combined_links.rebind))
724 list_del_init(&uvma->vma.combined_links.rebind);
725
726 /* Wait for pending binds */
727 xe_vm_lock(vm, false);
728 dma_resv_wait_timeout(xe_vm_resv(vm),
729 DMA_RESV_USAGE_BOOKKEEP,
730 false, MAX_SCHEDULE_TIMEOUT);
731
732 err = xe_vm_invalidate_vma(&uvma->vma);
733 xe_vm_unlock(vm);
734 if (err)
735 break;
736 } else {
737 if (err)
738 break;
739
740 list_del_init(&uvma->userptr.repin_link);
741 list_move_tail(&uvma->vma.combined_links.rebind,
742 &vm->rebind_list);
743 }
744 }
745
746 if (err) {
747 down_write(&vm->userptr.notifier_lock);
748 spin_lock(&vm->userptr.invalidated_lock);
749 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
750 userptr.repin_link) {
751 list_del_init(&uvma->userptr.repin_link);
752 list_move_tail(&uvma->userptr.invalidate_link,
753 &vm->userptr.invalidated);
754 }
755 spin_unlock(&vm->userptr.invalidated_lock);
756 up_write(&vm->userptr.notifier_lock);
757 }
758 return err;
759 }
760
761 /**
762 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
763 * that need repinning.
764 * @vm: The VM.
765 *
766 * This function does an advisory check for whether the VM has userptrs that
767 * need repinning.
768 *
769 * Return: 0 if there are no indications of userptrs needing repinning,
770 * -EAGAIN if there are.
771 */
xe_vm_userptr_check_repin(struct xe_vm * vm)772 int xe_vm_userptr_check_repin(struct xe_vm *vm)
773 {
774 return (list_empty_careful(&vm->userptr.repin_list) &&
775 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
776 }
777
xe_vma_ops_alloc(struct xe_vma_ops * vops,bool array_of_binds)778 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
779 {
780 int i;
781
782 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
783 if (!vops->pt_update_ops[i].num_ops)
784 continue;
785
786 vops->pt_update_ops[i].ops =
787 kmalloc_array(vops->pt_update_ops[i].num_ops,
788 sizeof(*vops->pt_update_ops[i].ops),
789 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
790 if (!vops->pt_update_ops[i].ops)
791 return array_of_binds ? -ENOBUFS : -ENOMEM;
792 }
793
794 return 0;
795 }
796 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
797
xe_vma_ops_fini(struct xe_vma_ops * vops)798 static void xe_vma_ops_fini(struct xe_vma_ops *vops)
799 {
800 int i;
801
802 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
803 kfree(vops->pt_update_ops[i].ops);
804 }
805
xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops * vops,u8 tile_mask)806 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask)
807 {
808 int i;
809
810 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
811 if (BIT(i) & tile_mask)
812 ++vops->pt_update_ops[i].num_ops;
813 }
814
xe_vm_populate_rebind(struct xe_vma_op * op,struct xe_vma * vma,u8 tile_mask)815 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
816 u8 tile_mask)
817 {
818 INIT_LIST_HEAD(&op->link);
819 op->tile_mask = tile_mask;
820 op->base.op = DRM_GPUVA_OP_MAP;
821 op->base.map.va.addr = vma->gpuva.va.addr;
822 op->base.map.va.range = vma->gpuva.va.range;
823 op->base.map.gem.obj = vma->gpuva.gem.obj;
824 op->base.map.gem.offset = vma->gpuva.gem.offset;
825 op->map.vma = vma;
826 op->map.immediate = true;
827 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
828 op->map.is_null = xe_vma_is_null(vma);
829 }
830
xe_vm_ops_add_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,u8 tile_mask)831 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
832 u8 tile_mask)
833 {
834 struct xe_vma_op *op;
835
836 op = kzalloc(sizeof(*op), GFP_KERNEL);
837 if (!op)
838 return -ENOMEM;
839
840 xe_vm_populate_rebind(op, vma, tile_mask);
841 list_add_tail(&op->link, &vops->list);
842 xe_vma_ops_incr_pt_update_ops(vops, tile_mask);
843
844 return 0;
845 }
846
847 static struct dma_fence *ops_execute(struct xe_vm *vm,
848 struct xe_vma_ops *vops);
849 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
850 struct xe_exec_queue *q,
851 struct xe_sync_entry *syncs, u32 num_syncs);
852
xe_vm_rebind(struct xe_vm * vm,bool rebind_worker)853 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
854 {
855 struct dma_fence *fence;
856 struct xe_vma *vma, *next;
857 struct xe_vma_ops vops;
858 struct xe_vma_op *op, *next_op;
859 int err, i;
860
861 lockdep_assert_held(&vm->lock);
862 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
863 list_empty(&vm->rebind_list))
864 return 0;
865
866 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
867 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
868 vops.pt_update_ops[i].wait_vm_bookkeep = true;
869
870 xe_vm_assert_held(vm);
871 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
872 xe_assert(vm->xe, vma->tile_present);
873
874 if (rebind_worker)
875 trace_xe_vma_rebind_worker(vma);
876 else
877 trace_xe_vma_rebind_exec(vma);
878
879 err = xe_vm_ops_add_rebind(&vops, vma,
880 vma->tile_present);
881 if (err)
882 goto free_ops;
883 }
884
885 err = xe_vma_ops_alloc(&vops, false);
886 if (err)
887 goto free_ops;
888
889 fence = ops_execute(vm, &vops);
890 if (IS_ERR(fence)) {
891 err = PTR_ERR(fence);
892 } else {
893 dma_fence_put(fence);
894 list_for_each_entry_safe(vma, next, &vm->rebind_list,
895 combined_links.rebind)
896 list_del_init(&vma->combined_links.rebind);
897 }
898 free_ops:
899 list_for_each_entry_safe(op, next_op, &vops.list, link) {
900 list_del(&op->link);
901 kfree(op);
902 }
903 xe_vma_ops_fini(&vops);
904
905 return err;
906 }
907
xe_vma_rebind(struct xe_vm * vm,struct xe_vma * vma,u8 tile_mask)908 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
909 {
910 struct dma_fence *fence = NULL;
911 struct xe_vma_ops vops;
912 struct xe_vma_op *op, *next_op;
913 struct xe_tile *tile;
914 u8 id;
915 int err;
916
917 lockdep_assert_held(&vm->lock);
918 xe_vm_assert_held(vm);
919 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
920
921 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
922 for_each_tile(tile, vm->xe, id) {
923 vops.pt_update_ops[id].wait_vm_bookkeep = true;
924 vops.pt_update_ops[tile->id].q =
925 xe_tile_migrate_exec_queue(tile);
926 }
927
928 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
929 if (err)
930 return ERR_PTR(err);
931
932 err = xe_vma_ops_alloc(&vops, false);
933 if (err) {
934 fence = ERR_PTR(err);
935 goto free_ops;
936 }
937
938 fence = ops_execute(vm, &vops);
939
940 free_ops:
941 list_for_each_entry_safe(op, next_op, &vops.list, link) {
942 list_del(&op->link);
943 kfree(op);
944 }
945 xe_vma_ops_fini(&vops);
946
947 return fence;
948 }
949
xe_vma_free(struct xe_vma * vma)950 static void xe_vma_free(struct xe_vma *vma)
951 {
952 if (xe_vma_is_userptr(vma))
953 kfree(to_userptr_vma(vma));
954 else
955 kfree(vma);
956 }
957
958 #define VMA_CREATE_FLAG_READ_ONLY BIT(0)
959 #define VMA_CREATE_FLAG_IS_NULL BIT(1)
960 #define VMA_CREATE_FLAG_DUMPABLE BIT(2)
961
xe_vma_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 start,u64 end,u16 pat_index,unsigned int flags)962 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
963 struct xe_bo *bo,
964 u64 bo_offset_or_userptr,
965 u64 start, u64 end,
966 u16 pat_index, unsigned int flags)
967 {
968 struct xe_vma *vma;
969 struct xe_tile *tile;
970 u8 id;
971 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
972 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
973 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
974
975 xe_assert(vm->xe, start < end);
976 xe_assert(vm->xe, end < vm->size);
977
978 /*
979 * Allocate and ensure that the xe_vma_is_userptr() return
980 * matches what was allocated.
981 */
982 if (!bo && !is_null) {
983 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
984
985 if (!uvma)
986 return ERR_PTR(-ENOMEM);
987
988 vma = &uvma->vma;
989 } else {
990 vma = kzalloc(sizeof(*vma), GFP_KERNEL);
991 if (!vma)
992 return ERR_PTR(-ENOMEM);
993
994 if (is_null)
995 vma->gpuva.flags |= DRM_GPUVA_SPARSE;
996 if (bo)
997 vma->gpuva.gem.obj = &bo->ttm.base;
998 }
999
1000 INIT_LIST_HEAD(&vma->combined_links.rebind);
1001
1002 INIT_LIST_HEAD(&vma->gpuva.gem.entry);
1003 vma->gpuva.vm = &vm->gpuvm;
1004 vma->gpuva.va.addr = start;
1005 vma->gpuva.va.range = end - start + 1;
1006 if (read_only)
1007 vma->gpuva.flags |= XE_VMA_READ_ONLY;
1008 if (dumpable)
1009 vma->gpuva.flags |= XE_VMA_DUMPABLE;
1010
1011 for_each_tile(tile, vm->xe, id)
1012 vma->tile_mask |= 0x1 << id;
1013
1014 if (vm->xe->info.has_atomic_enable_pte_bit)
1015 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
1016
1017 vma->pat_index = pat_index;
1018
1019 if (bo) {
1020 struct drm_gpuvm_bo *vm_bo;
1021
1022 xe_bo_assert_held(bo);
1023
1024 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
1025 if (IS_ERR(vm_bo)) {
1026 xe_vma_free(vma);
1027 return ERR_CAST(vm_bo);
1028 }
1029
1030 drm_gpuvm_bo_extobj_add(vm_bo);
1031 drm_gem_object_get(&bo->ttm.base);
1032 vma->gpuva.gem.offset = bo_offset_or_userptr;
1033 drm_gpuva_link(&vma->gpuva, vm_bo);
1034 drm_gpuvm_bo_put(vm_bo);
1035 } else /* userptr or null */ {
1036 if (!is_null) {
1037 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
1038 u64 size = end - start + 1;
1039 int err;
1040
1041 INIT_LIST_HEAD(&userptr->invalidate_link);
1042 INIT_LIST_HEAD(&userptr->repin_link);
1043 vma->gpuva.gem.offset = bo_offset_or_userptr;
1044 mutex_init(&userptr->unmap_mutex);
1045
1046 err = mmu_interval_notifier_insert(&userptr->notifier,
1047 current->mm,
1048 xe_vma_userptr(vma), size,
1049 &vma_userptr_notifier_ops);
1050 if (err) {
1051 xe_vma_free(vma);
1052 return ERR_PTR(err);
1053 }
1054
1055 userptr->notifier_seq = LONG_MAX;
1056 }
1057
1058 xe_vm_get(vm);
1059 }
1060
1061 return vma;
1062 }
1063
xe_vma_destroy_late(struct xe_vma * vma)1064 static void xe_vma_destroy_late(struct xe_vma *vma)
1065 {
1066 struct xe_vm *vm = xe_vma_vm(vma);
1067
1068 if (vma->ufence) {
1069 xe_sync_ufence_put(vma->ufence);
1070 vma->ufence = NULL;
1071 }
1072
1073 if (xe_vma_is_userptr(vma)) {
1074 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1075 struct xe_userptr *userptr = &uvma->userptr;
1076
1077 if (userptr->sg)
1078 xe_hmm_userptr_free_sg(uvma);
1079
1080 /*
1081 * Since userptr pages are not pinned, we can't remove
1082 * the notifier until we're sure the GPU is not accessing
1083 * them anymore
1084 */
1085 mmu_interval_notifier_remove(&userptr->notifier);
1086 mutex_destroy(&userptr->unmap_mutex);
1087 xe_vm_put(vm);
1088 } else if (xe_vma_is_null(vma)) {
1089 xe_vm_put(vm);
1090 } else {
1091 xe_bo_put(xe_vma_bo(vma));
1092 }
1093
1094 xe_vma_free(vma);
1095 }
1096
vma_destroy_work_func(struct work_struct * w)1097 static void vma_destroy_work_func(struct work_struct *w)
1098 {
1099 struct xe_vma *vma =
1100 container_of(w, struct xe_vma, destroy_work);
1101
1102 xe_vma_destroy_late(vma);
1103 }
1104
vma_destroy_cb(struct dma_fence * fence,struct dma_fence_cb * cb)1105 static void vma_destroy_cb(struct dma_fence *fence,
1106 struct dma_fence_cb *cb)
1107 {
1108 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1109
1110 INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1111 queue_work(system_unbound_wq, &vma->destroy_work);
1112 }
1113
xe_vma_destroy(struct xe_vma * vma,struct dma_fence * fence)1114 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1115 {
1116 struct xe_vm *vm = xe_vma_vm(vma);
1117
1118 lockdep_assert_held_write(&vm->lock);
1119 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1120
1121 if (xe_vma_is_userptr(vma)) {
1122 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1123
1124 spin_lock(&vm->userptr.invalidated_lock);
1125 xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link));
1126 list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
1127 spin_unlock(&vm->userptr.invalidated_lock);
1128 } else if (!xe_vma_is_null(vma)) {
1129 xe_bo_assert_held(xe_vma_bo(vma));
1130
1131 drm_gpuva_unlink(&vma->gpuva);
1132 }
1133
1134 xe_vm_assert_held(vm);
1135 if (fence) {
1136 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1137 vma_destroy_cb);
1138
1139 if (ret) {
1140 XE_WARN_ON(ret != -ENOENT);
1141 xe_vma_destroy_late(vma);
1142 }
1143 } else {
1144 xe_vma_destroy_late(vma);
1145 }
1146 }
1147
1148 /**
1149 * xe_vm_lock_vma() - drm_exec utility to lock a vma
1150 * @exec: The drm_exec object we're currently locking for.
1151 * @vma: The vma for witch we want to lock the vm resv and any attached
1152 * object's resv.
1153 *
1154 * Return: 0 on success, negative error code on error. In particular
1155 * may return -EDEADLK on WW transaction contention and -EINTR if
1156 * an interruptible wait is terminated by a signal.
1157 */
xe_vm_lock_vma(struct drm_exec * exec,struct xe_vma * vma)1158 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
1159 {
1160 struct xe_vm *vm = xe_vma_vm(vma);
1161 struct xe_bo *bo = xe_vma_bo(vma);
1162 int err;
1163
1164 XE_WARN_ON(!vm);
1165
1166 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1167 if (!err && bo && !bo->vm)
1168 err = drm_exec_lock_obj(exec, &bo->ttm.base);
1169
1170 return err;
1171 }
1172
xe_vma_destroy_unlocked(struct xe_vma * vma)1173 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1174 {
1175 struct drm_exec exec;
1176 int err;
1177
1178 drm_exec_init(&exec, 0, 0);
1179 drm_exec_until_all_locked(&exec) {
1180 err = xe_vm_lock_vma(&exec, vma);
1181 drm_exec_retry_on_contention(&exec);
1182 if (XE_WARN_ON(err))
1183 break;
1184 }
1185
1186 xe_vma_destroy(vma, NULL);
1187
1188 drm_exec_fini(&exec);
1189 }
1190
1191 struct xe_vma *
xe_vm_find_overlapping_vma(struct xe_vm * vm,u64 start,u64 range)1192 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1193 {
1194 struct drm_gpuva *gpuva;
1195
1196 lockdep_assert_held(&vm->lock);
1197
1198 if (xe_vm_is_closed_or_banned(vm))
1199 return NULL;
1200
1201 xe_assert(vm->xe, start + range <= vm->size);
1202
1203 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1204
1205 return gpuva ? gpuva_to_vma(gpuva) : NULL;
1206 }
1207
xe_vm_insert_vma(struct xe_vm * vm,struct xe_vma * vma)1208 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1209 {
1210 int err;
1211
1212 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1213 lockdep_assert_held(&vm->lock);
1214
1215 mutex_lock(&vm->snap_mutex);
1216 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1217 mutex_unlock(&vm->snap_mutex);
1218 XE_WARN_ON(err); /* Shouldn't be possible */
1219
1220 return err;
1221 }
1222
xe_vm_remove_vma(struct xe_vm * vm,struct xe_vma * vma)1223 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1224 {
1225 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1226 lockdep_assert_held(&vm->lock);
1227
1228 mutex_lock(&vm->snap_mutex);
1229 drm_gpuva_remove(&vma->gpuva);
1230 mutex_unlock(&vm->snap_mutex);
1231 if (vm->usm.last_fault_vma == vma)
1232 vm->usm.last_fault_vma = NULL;
1233 }
1234
xe_vm_op_alloc(void)1235 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1236 {
1237 struct xe_vma_op *op;
1238
1239 op = kzalloc(sizeof(*op), GFP_KERNEL);
1240
1241 if (unlikely(!op))
1242 return NULL;
1243
1244 return &op->base;
1245 }
1246
1247 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1248
1249 static const struct drm_gpuvm_ops gpuvm_ops = {
1250 .op_alloc = xe_vm_op_alloc,
1251 .vm_bo_validate = xe_gpuvm_validate,
1252 .vm_free = xe_vm_free,
1253 };
1254
pde_encode_pat_index(u16 pat_index)1255 static u64 pde_encode_pat_index(u16 pat_index)
1256 {
1257 u64 pte = 0;
1258
1259 if (pat_index & BIT(0))
1260 pte |= XE_PPGTT_PTE_PAT0;
1261
1262 if (pat_index & BIT(1))
1263 pte |= XE_PPGTT_PTE_PAT1;
1264
1265 return pte;
1266 }
1267
pte_encode_pat_index(u16 pat_index,u32 pt_level)1268 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
1269 {
1270 u64 pte = 0;
1271
1272 if (pat_index & BIT(0))
1273 pte |= XE_PPGTT_PTE_PAT0;
1274
1275 if (pat_index & BIT(1))
1276 pte |= XE_PPGTT_PTE_PAT1;
1277
1278 if (pat_index & BIT(2)) {
1279 if (pt_level)
1280 pte |= XE_PPGTT_PDE_PDPE_PAT2;
1281 else
1282 pte |= XE_PPGTT_PTE_PAT2;
1283 }
1284
1285 if (pat_index & BIT(3))
1286 pte |= XELPG_PPGTT_PTE_PAT3;
1287
1288 if (pat_index & (BIT(4)))
1289 pte |= XE2_PPGTT_PTE_PAT4;
1290
1291 return pte;
1292 }
1293
pte_encode_ps(u32 pt_level)1294 static u64 pte_encode_ps(u32 pt_level)
1295 {
1296 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1297
1298 if (pt_level == 1)
1299 return XE_PDE_PS_2M;
1300 else if (pt_level == 2)
1301 return XE_PDPE_PS_1G;
1302
1303 return 0;
1304 }
1305
xelp_pde_encode_bo(struct xe_bo * bo,u64 bo_offset,const u16 pat_index)1306 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
1307 const u16 pat_index)
1308 {
1309 u64 pde;
1310
1311 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1312 pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1313 pde |= pde_encode_pat_index(pat_index);
1314
1315 return pde;
1316 }
1317
xelp_pte_encode_bo(struct xe_bo * bo,u64 bo_offset,u16 pat_index,u32 pt_level)1318 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1319 u16 pat_index, u32 pt_level)
1320 {
1321 u64 pte;
1322
1323 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1324 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1325 pte |= pte_encode_pat_index(pat_index, pt_level);
1326 pte |= pte_encode_ps(pt_level);
1327
1328 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1329 pte |= XE_PPGTT_PTE_DM;
1330
1331 return pte;
1332 }
1333
xelp_pte_encode_vma(u64 pte,struct xe_vma * vma,u16 pat_index,u32 pt_level)1334 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1335 u16 pat_index, u32 pt_level)
1336 {
1337 pte |= XE_PAGE_PRESENT;
1338
1339 if (likely(!xe_vma_read_only(vma)))
1340 pte |= XE_PAGE_RW;
1341
1342 pte |= pte_encode_pat_index(pat_index, pt_level);
1343 pte |= pte_encode_ps(pt_level);
1344
1345 if (unlikely(xe_vma_is_null(vma)))
1346 pte |= XE_PTE_NULL;
1347
1348 return pte;
1349 }
1350
xelp_pte_encode_addr(struct xe_device * xe,u64 addr,u16 pat_index,u32 pt_level,bool devmem,u64 flags)1351 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1352 u16 pat_index,
1353 u32 pt_level, bool devmem, u64 flags)
1354 {
1355 u64 pte;
1356
1357 /* Avoid passing random bits directly as flags */
1358 xe_assert(xe, !(flags & ~XE_PTE_PS64));
1359
1360 pte = addr;
1361 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1362 pte |= pte_encode_pat_index(pat_index, pt_level);
1363 pte |= pte_encode_ps(pt_level);
1364
1365 if (devmem)
1366 pte |= XE_PPGTT_PTE_DM;
1367
1368 pte |= flags;
1369
1370 return pte;
1371 }
1372
1373 static const struct xe_pt_ops xelp_pt_ops = {
1374 .pte_encode_bo = xelp_pte_encode_bo,
1375 .pte_encode_vma = xelp_pte_encode_vma,
1376 .pte_encode_addr = xelp_pte_encode_addr,
1377 .pde_encode_bo = xelp_pde_encode_bo,
1378 };
1379
1380 static void vm_destroy_work_func(struct work_struct *w);
1381
1382 /**
1383 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1384 * given tile and vm.
1385 * @xe: xe device.
1386 * @tile: tile to set up for.
1387 * @vm: vm to set up for.
1388 *
1389 * Sets up a pagetable tree with one page-table per level and a single
1390 * leaf PTE. All pagetable entries point to the single page-table or,
1391 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1392 * writes become NOPs.
1393 *
1394 * Return: 0 on success, negative error code on error.
1395 */
xe_vm_create_scratch(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm)1396 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1397 struct xe_vm *vm)
1398 {
1399 u8 id = tile->id;
1400 int i;
1401
1402 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1403 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
1404 if (IS_ERR(vm->scratch_pt[id][i]))
1405 return PTR_ERR(vm->scratch_pt[id][i]);
1406
1407 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1408 }
1409
1410 return 0;
1411 }
1412 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO);
1413
xe_vm_free_scratch(struct xe_vm * vm)1414 static void xe_vm_free_scratch(struct xe_vm *vm)
1415 {
1416 struct xe_tile *tile;
1417 u8 id;
1418
1419 if (!xe_vm_has_scratch(vm))
1420 return;
1421
1422 for_each_tile(tile, vm->xe, id) {
1423 u32 i;
1424
1425 if (!vm->pt_root[id])
1426 continue;
1427
1428 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1429 if (vm->scratch_pt[id][i])
1430 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1431 }
1432 }
1433
xe_vm_create(struct xe_device * xe,u32 flags)1434 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
1435 {
1436 struct drm_gem_object *vm_resv_obj;
1437 struct xe_vm *vm;
1438 int err, number_tiles = 0;
1439 struct xe_tile *tile;
1440 u8 id;
1441
1442 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1443 if (!vm)
1444 return ERR_PTR(-ENOMEM);
1445
1446 vm->xe = xe;
1447
1448 vm->size = 1ull << xe->info.va_bits;
1449
1450 vm->flags = flags;
1451
1452 init_rwsem(&vm->lock);
1453 mutex_init(&vm->snap_mutex);
1454
1455 INIT_LIST_HEAD(&vm->rebind_list);
1456
1457 INIT_LIST_HEAD(&vm->userptr.repin_list);
1458 INIT_LIST_HEAD(&vm->userptr.invalidated);
1459 init_rwsem(&vm->userptr.notifier_lock);
1460 spin_lock_init(&vm->userptr.invalidated_lock);
1461
1462 ttm_lru_bulk_move_init(&vm->lru_bulk_move);
1463
1464 INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1465
1466 INIT_LIST_HEAD(&vm->preempt.exec_queues);
1467 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */
1468
1469 for_each_tile(tile, xe, id)
1470 xe_range_fence_tree_init(&vm->rftree[id]);
1471
1472 vm->pt_ops = &xelp_pt_ops;
1473
1474 /*
1475 * Long-running workloads are not protected by the scheduler references.
1476 * By design, run_job for long-running workloads returns NULL and the
1477 * scheduler drops all the references of it, hence protecting the VM
1478 * for this case is necessary.
1479 */
1480 if (flags & XE_VM_FLAG_LR_MODE)
1481 xe_pm_runtime_get_noresume(xe);
1482
1483 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1484 if (!vm_resv_obj) {
1485 err = -ENOMEM;
1486 goto err_no_resv;
1487 }
1488
1489 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1490 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1491
1492 drm_gem_object_put(vm_resv_obj);
1493
1494 err = xe_vm_lock(vm, true);
1495 if (err)
1496 goto err_close;
1497
1498 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1499 vm->flags |= XE_VM_FLAG_64K;
1500
1501 for_each_tile(tile, xe, id) {
1502 if (flags & XE_VM_FLAG_MIGRATION &&
1503 tile->id != XE_VM_FLAG_TILE_ID(flags))
1504 continue;
1505
1506 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
1507 if (IS_ERR(vm->pt_root[id])) {
1508 err = PTR_ERR(vm->pt_root[id]);
1509 vm->pt_root[id] = NULL;
1510 goto err_unlock_close;
1511 }
1512 }
1513
1514 if (xe_vm_has_scratch(vm)) {
1515 for_each_tile(tile, xe, id) {
1516 if (!vm->pt_root[id])
1517 continue;
1518
1519 err = xe_vm_create_scratch(xe, tile, vm);
1520 if (err)
1521 goto err_unlock_close;
1522 }
1523 vm->batch_invalidate_tlb = true;
1524 }
1525
1526 if (vm->flags & XE_VM_FLAG_LR_MODE) {
1527 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1528 vm->batch_invalidate_tlb = false;
1529 }
1530
1531 /* Fill pt_root after allocating scratch tables */
1532 for_each_tile(tile, xe, id) {
1533 if (!vm->pt_root[id])
1534 continue;
1535
1536 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1537 }
1538 xe_vm_unlock(vm);
1539
1540 /* Kernel migration VM shouldn't have a circular loop.. */
1541 if (!(flags & XE_VM_FLAG_MIGRATION)) {
1542 for_each_tile(tile, xe, id) {
1543 struct xe_exec_queue *q;
1544 u32 create_flags = EXEC_QUEUE_FLAG_VM;
1545
1546 if (!vm->pt_root[id])
1547 continue;
1548
1549 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0);
1550 if (IS_ERR(q)) {
1551 err = PTR_ERR(q);
1552 goto err_close;
1553 }
1554 vm->q[id] = q;
1555 number_tiles++;
1556 }
1557 }
1558
1559 if (number_tiles > 1)
1560 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1561
1562 trace_xe_vm_create(vm);
1563
1564 return vm;
1565
1566 err_unlock_close:
1567 xe_vm_unlock(vm);
1568 err_close:
1569 xe_vm_close_and_put(vm);
1570 return ERR_PTR(err);
1571
1572 err_no_resv:
1573 mutex_destroy(&vm->snap_mutex);
1574 for_each_tile(tile, xe, id)
1575 xe_range_fence_tree_fini(&vm->rftree[id]);
1576 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1577 kfree(vm);
1578 if (flags & XE_VM_FLAG_LR_MODE)
1579 xe_pm_runtime_put(xe);
1580 return ERR_PTR(err);
1581 }
1582
xe_vm_close(struct xe_vm * vm)1583 static void xe_vm_close(struct xe_vm *vm)
1584 {
1585 down_write(&vm->lock);
1586 vm->size = 0;
1587 up_write(&vm->lock);
1588 }
1589
xe_vm_close_and_put(struct xe_vm * vm)1590 void xe_vm_close_and_put(struct xe_vm *vm)
1591 {
1592 LIST_HEAD(contested);
1593 struct xe_device *xe = vm->xe;
1594 struct xe_tile *tile;
1595 struct xe_vma *vma, *next_vma;
1596 struct drm_gpuva *gpuva, *next;
1597 u8 id;
1598
1599 xe_assert(xe, !vm->preempt.num_exec_queues);
1600
1601 xe_vm_close(vm);
1602 if (xe_vm_in_preempt_fence_mode(vm))
1603 flush_work(&vm->preempt.rebind_work);
1604
1605 down_write(&vm->lock);
1606 for_each_tile(tile, xe, id) {
1607 if (vm->q[id])
1608 xe_exec_queue_last_fence_put(vm->q[id], vm);
1609 }
1610 up_write(&vm->lock);
1611
1612 for_each_tile(tile, xe, id) {
1613 if (vm->q[id]) {
1614 xe_exec_queue_kill(vm->q[id]);
1615 xe_exec_queue_put(vm->q[id]);
1616 vm->q[id] = NULL;
1617 }
1618 }
1619
1620 down_write(&vm->lock);
1621 xe_vm_lock(vm, false);
1622 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1623 vma = gpuva_to_vma(gpuva);
1624
1625 if (xe_vma_has_no_bo(vma)) {
1626 down_read(&vm->userptr.notifier_lock);
1627 vma->gpuva.flags |= XE_VMA_DESTROYED;
1628 up_read(&vm->userptr.notifier_lock);
1629 }
1630
1631 xe_vm_remove_vma(vm, vma);
1632
1633 /* easy case, remove from VMA? */
1634 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1635 list_del_init(&vma->combined_links.rebind);
1636 xe_vma_destroy(vma, NULL);
1637 continue;
1638 }
1639
1640 list_move_tail(&vma->combined_links.destroy, &contested);
1641 vma->gpuva.flags |= XE_VMA_DESTROYED;
1642 }
1643
1644 /*
1645 * All vm operations will add shared fences to resv.
1646 * The only exception is eviction for a shared object,
1647 * but even so, the unbind when evicted would still
1648 * install a fence to resv. Hence it's safe to
1649 * destroy the pagetables immediately.
1650 */
1651 xe_vm_free_scratch(vm);
1652
1653 for_each_tile(tile, xe, id) {
1654 if (vm->pt_root[id]) {
1655 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1656 vm->pt_root[id] = NULL;
1657 }
1658 }
1659 xe_vm_unlock(vm);
1660
1661 /*
1662 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1663 * Since we hold a refcount to the bo, we can remove and free
1664 * the members safely without locking.
1665 */
1666 list_for_each_entry_safe(vma, next_vma, &contested,
1667 combined_links.destroy) {
1668 list_del_init(&vma->combined_links.destroy);
1669 xe_vma_destroy_unlocked(vma);
1670 }
1671
1672 up_write(&vm->lock);
1673
1674 down_write(&xe->usm.lock);
1675 if (vm->usm.asid) {
1676 void *lookup;
1677
1678 xe_assert(xe, xe->info.has_asid);
1679 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
1680
1681 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1682 xe_assert(xe, lookup == vm);
1683 }
1684 up_write(&xe->usm.lock);
1685
1686 for_each_tile(tile, xe, id)
1687 xe_range_fence_tree_fini(&vm->rftree[id]);
1688
1689 xe_vm_put(vm);
1690 }
1691
vm_destroy_work_func(struct work_struct * w)1692 static void vm_destroy_work_func(struct work_struct *w)
1693 {
1694 struct xe_vm *vm =
1695 container_of(w, struct xe_vm, destroy_work);
1696 struct xe_device *xe = vm->xe;
1697 struct xe_tile *tile;
1698 u8 id;
1699
1700 /* xe_vm_close_and_put was not called? */
1701 xe_assert(xe, !vm->size);
1702
1703 if (xe_vm_in_preempt_fence_mode(vm))
1704 flush_work(&vm->preempt.rebind_work);
1705
1706 mutex_destroy(&vm->snap_mutex);
1707
1708 if (vm->flags & XE_VM_FLAG_LR_MODE)
1709 xe_pm_runtime_put(xe);
1710
1711 for_each_tile(tile, xe, id)
1712 XE_WARN_ON(vm->pt_root[id]);
1713
1714 trace_xe_vm_free(vm);
1715
1716 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1717
1718 if (vm->xef)
1719 xe_file_put(vm->xef);
1720
1721 kfree(vm);
1722 }
1723
xe_vm_free(struct drm_gpuvm * gpuvm)1724 static void xe_vm_free(struct drm_gpuvm *gpuvm)
1725 {
1726 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1727
1728 /* To destroy the VM we need to be able to sleep */
1729 queue_work(system_unbound_wq, &vm->destroy_work);
1730 }
1731
xe_vm_lookup(struct xe_file * xef,u32 id)1732 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1733 {
1734 struct xe_vm *vm;
1735
1736 mutex_lock(&xef->vm.lock);
1737 vm = xa_load(&xef->vm.xa, id);
1738 if (vm)
1739 xe_vm_get(vm);
1740 mutex_unlock(&xef->vm.lock);
1741
1742 return vm;
1743 }
1744
xe_vm_pdp4_descriptor(struct xe_vm * vm,struct xe_tile * tile)1745 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1746 {
1747 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
1748 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
1749 }
1750
1751 static struct xe_exec_queue *
to_wait_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)1752 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
1753 {
1754 return q ? q : vm->q[0];
1755 }
1756
1757 static struct xe_user_fence *
find_ufence_get(struct xe_sync_entry * syncs,u32 num_syncs)1758 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
1759 {
1760 unsigned int i;
1761
1762 for (i = 0; i < num_syncs; i++) {
1763 struct xe_sync_entry *e = &syncs[i];
1764
1765 if (xe_sync_is_ufence(e))
1766 return xe_sync_ufence_get(e);
1767 }
1768
1769 return NULL;
1770 }
1771
1772 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
1773 DRM_XE_VM_CREATE_FLAG_LR_MODE | \
1774 DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1775
xe_vm_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1776 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1777 struct drm_file *file)
1778 {
1779 struct xe_device *xe = to_xe_device(dev);
1780 struct xe_file *xef = to_xe_file(file);
1781 struct drm_xe_vm_create *args = data;
1782 struct xe_tile *tile;
1783 struct xe_vm *vm;
1784 u32 id, asid;
1785 int err;
1786 u32 flags = 0;
1787
1788 if (XE_IOCTL_DBG(xe, args->extensions))
1789 return -EINVAL;
1790
1791 if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
1792 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
1793
1794 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1795 !xe->info.has_usm))
1796 return -EINVAL;
1797
1798 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1799 return -EINVAL;
1800
1801 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1802 return -EINVAL;
1803
1804 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
1805 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1806 return -EINVAL;
1807
1808 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
1809 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1810 return -EINVAL;
1811
1812 if (XE_IOCTL_DBG(xe, args->extensions))
1813 return -EINVAL;
1814
1815 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
1816 flags |= XE_VM_FLAG_SCRATCH_PAGE;
1817 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
1818 flags |= XE_VM_FLAG_LR_MODE;
1819 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1820 flags |= XE_VM_FLAG_FAULT_MODE;
1821
1822 vm = xe_vm_create(xe, flags);
1823 if (IS_ERR(vm))
1824 return PTR_ERR(vm);
1825
1826 if (xe->info.has_asid) {
1827 down_write(&xe->usm.lock);
1828 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1829 XA_LIMIT(1, XE_MAX_ASID - 1),
1830 &xe->usm.next_asid, GFP_KERNEL);
1831 up_write(&xe->usm.lock);
1832 if (err < 0)
1833 goto err_close_and_put;
1834
1835 vm->usm.asid = asid;
1836 }
1837
1838 vm->xef = xe_file_get(xef);
1839
1840 /* Record BO memory for VM pagetable created against client */
1841 for_each_tile(tile, xe, id)
1842 if (vm->pt_root[id])
1843 xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo);
1844
1845 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
1846 /* Warning: Security issue - never enable by default */
1847 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
1848 #endif
1849
1850 /* user id alloc must always be last in ioctl to prevent UAF */
1851 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
1852 if (err)
1853 goto err_close_and_put;
1854
1855 args->vm_id = id;
1856
1857 return 0;
1858
1859 err_close_and_put:
1860 xe_vm_close_and_put(vm);
1861
1862 return err;
1863 }
1864
xe_vm_destroy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1865 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
1866 struct drm_file *file)
1867 {
1868 struct xe_device *xe = to_xe_device(dev);
1869 struct xe_file *xef = to_xe_file(file);
1870 struct drm_xe_vm_destroy *args = data;
1871 struct xe_vm *vm;
1872 int err = 0;
1873
1874 if (XE_IOCTL_DBG(xe, args->pad) ||
1875 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1876 return -EINVAL;
1877
1878 mutex_lock(&xef->vm.lock);
1879 vm = xa_load(&xef->vm.xa, args->vm_id);
1880 if (XE_IOCTL_DBG(xe, !vm))
1881 err = -ENOENT;
1882 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
1883 err = -EBUSY;
1884 else
1885 xa_erase(&xef->vm.xa, args->vm_id);
1886 mutex_unlock(&xef->vm.lock);
1887
1888 if (!err)
1889 xe_vm_close_and_put(vm);
1890
1891 return err;
1892 }
1893
1894 static const u32 region_to_mem_type[] = {
1895 XE_PL_TT,
1896 XE_PL_VRAM0,
1897 XE_PL_VRAM1,
1898 };
1899
prep_vma_destroy(struct xe_vm * vm,struct xe_vma * vma,bool post_commit)1900 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
1901 bool post_commit)
1902 {
1903 down_read(&vm->userptr.notifier_lock);
1904 vma->gpuva.flags |= XE_VMA_DESTROYED;
1905 up_read(&vm->userptr.notifier_lock);
1906 if (post_commit)
1907 xe_vm_remove_vma(vm, vma);
1908 }
1909
1910 #undef ULL
1911 #define ULL unsigned long long
1912
1913 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
print_op(struct xe_device * xe,struct drm_gpuva_op * op)1914 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
1915 {
1916 struct xe_vma *vma;
1917
1918 switch (op->op) {
1919 case DRM_GPUVA_OP_MAP:
1920 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
1921 (ULL)op->map.va.addr, (ULL)op->map.va.range);
1922 break;
1923 case DRM_GPUVA_OP_REMAP:
1924 vma = gpuva_to_vma(op->remap.unmap->va);
1925 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
1926 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
1927 op->remap.unmap->keep ? 1 : 0);
1928 if (op->remap.prev)
1929 vm_dbg(&xe->drm,
1930 "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
1931 (ULL)op->remap.prev->va.addr,
1932 (ULL)op->remap.prev->va.range);
1933 if (op->remap.next)
1934 vm_dbg(&xe->drm,
1935 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
1936 (ULL)op->remap.next->va.addr,
1937 (ULL)op->remap.next->va.range);
1938 break;
1939 case DRM_GPUVA_OP_UNMAP:
1940 vma = gpuva_to_vma(op->unmap.va);
1941 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
1942 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
1943 op->unmap.keep ? 1 : 0);
1944 break;
1945 case DRM_GPUVA_OP_PREFETCH:
1946 vma = gpuva_to_vma(op->prefetch.va);
1947 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
1948 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
1949 break;
1950 default:
1951 drm_warn(&xe->drm, "NOT POSSIBLE");
1952 }
1953 }
1954 #else
print_op(struct xe_device * xe,struct drm_gpuva_op * op)1955 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
1956 {
1957 }
1958 #endif
1959
1960 /*
1961 * Create operations list from IOCTL arguments, setup operations fields so parse
1962 * and commit steps are decoupled from IOCTL arguments. This step can fail.
1963 */
1964 static struct drm_gpuva_ops *
vm_bind_ioctl_ops_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 addr,u64 range,u32 operation,u32 flags,u32 prefetch_region,u16 pat_index)1965 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
1966 u64 bo_offset_or_userptr, u64 addr, u64 range,
1967 u32 operation, u32 flags,
1968 u32 prefetch_region, u16 pat_index)
1969 {
1970 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
1971 struct drm_gpuva_ops *ops;
1972 struct drm_gpuva_op *__op;
1973 struct drm_gpuvm_bo *vm_bo;
1974 int err;
1975
1976 lockdep_assert_held_write(&vm->lock);
1977
1978 vm_dbg(&vm->xe->drm,
1979 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
1980 operation, (ULL)addr, (ULL)range,
1981 (ULL)bo_offset_or_userptr);
1982
1983 switch (operation) {
1984 case DRM_XE_VM_BIND_OP_MAP:
1985 case DRM_XE_VM_BIND_OP_MAP_USERPTR:
1986 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
1987 obj, bo_offset_or_userptr);
1988 break;
1989 case DRM_XE_VM_BIND_OP_UNMAP:
1990 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
1991 break;
1992 case DRM_XE_VM_BIND_OP_PREFETCH:
1993 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
1994 break;
1995 case DRM_XE_VM_BIND_OP_UNMAP_ALL:
1996 xe_assert(vm->xe, bo);
1997
1998 err = xe_bo_lock(bo, true);
1999 if (err)
2000 return ERR_PTR(err);
2001
2002 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
2003 if (IS_ERR(vm_bo)) {
2004 xe_bo_unlock(bo);
2005 return ERR_CAST(vm_bo);
2006 }
2007
2008 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2009 drm_gpuvm_bo_put(vm_bo);
2010 xe_bo_unlock(bo);
2011 break;
2012 default:
2013 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2014 ops = ERR_PTR(-EINVAL);
2015 }
2016 if (IS_ERR(ops))
2017 return ops;
2018
2019 drm_gpuva_for_each_op(__op, ops) {
2020 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2021
2022 if (__op->op == DRM_GPUVA_OP_MAP) {
2023 op->map.immediate =
2024 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2025 op->map.read_only =
2026 flags & DRM_XE_VM_BIND_FLAG_READONLY;
2027 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2028 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
2029 op->map.pat_index = pat_index;
2030 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2031 op->prefetch.region = prefetch_region;
2032 }
2033
2034 print_op(vm->xe, __op);
2035 }
2036
2037 return ops;
2038 }
2039 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
2040
new_vma(struct xe_vm * vm,struct drm_gpuva_op_map * op,u16 pat_index,unsigned int flags)2041 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2042 u16 pat_index, unsigned int flags)
2043 {
2044 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2045 struct drm_exec exec;
2046 struct xe_vma *vma;
2047 int err = 0;
2048
2049 lockdep_assert_held_write(&vm->lock);
2050
2051 if (bo) {
2052 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
2053 drm_exec_until_all_locked(&exec) {
2054 err = 0;
2055 if (!bo->vm) {
2056 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2057 drm_exec_retry_on_contention(&exec);
2058 }
2059 if (!err) {
2060 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2061 drm_exec_retry_on_contention(&exec);
2062 }
2063 if (err) {
2064 drm_exec_fini(&exec);
2065 return ERR_PTR(err);
2066 }
2067 }
2068 }
2069 vma = xe_vma_create(vm, bo, op->gem.offset,
2070 op->va.addr, op->va.addr +
2071 op->va.range - 1, pat_index, flags);
2072 if (IS_ERR(vma))
2073 goto err_unlock;
2074
2075 if (xe_vma_is_userptr(vma))
2076 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2077 else if (!xe_vma_has_no_bo(vma) && !bo->vm)
2078 err = add_preempt_fences(vm, bo);
2079
2080 err_unlock:
2081 if (bo)
2082 drm_exec_fini(&exec);
2083
2084 if (err) {
2085 prep_vma_destroy(vm, vma, false);
2086 xe_vma_destroy_unlocked(vma);
2087 vma = ERR_PTR(err);
2088 }
2089
2090 return vma;
2091 }
2092
xe_vma_max_pte_size(struct xe_vma * vma)2093 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2094 {
2095 if (vma->gpuva.flags & XE_VMA_PTE_1G)
2096 return SZ_1G;
2097 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2098 return SZ_2M;
2099 else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2100 return SZ_64K;
2101 else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2102 return SZ_4K;
2103
2104 return SZ_1G; /* Uninitialized, used max size */
2105 }
2106
xe_vma_set_pte_size(struct xe_vma * vma,u64 size)2107 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2108 {
2109 switch (size) {
2110 case SZ_1G:
2111 vma->gpuva.flags |= XE_VMA_PTE_1G;
2112 break;
2113 case SZ_2M:
2114 vma->gpuva.flags |= XE_VMA_PTE_2M;
2115 break;
2116 case SZ_64K:
2117 vma->gpuva.flags |= XE_VMA_PTE_64K;
2118 break;
2119 case SZ_4K:
2120 vma->gpuva.flags |= XE_VMA_PTE_4K;
2121 break;
2122 }
2123 }
2124
xe_vma_op_commit(struct xe_vm * vm,struct xe_vma_op * op)2125 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2126 {
2127 int err = 0;
2128
2129 lockdep_assert_held_write(&vm->lock);
2130
2131 switch (op->base.op) {
2132 case DRM_GPUVA_OP_MAP:
2133 err |= xe_vm_insert_vma(vm, op->map.vma);
2134 if (!err)
2135 op->flags |= XE_VMA_OP_COMMITTED;
2136 break;
2137 case DRM_GPUVA_OP_REMAP:
2138 {
2139 u8 tile_present =
2140 gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2141
2142 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2143 true);
2144 op->flags |= XE_VMA_OP_COMMITTED;
2145
2146 if (op->remap.prev) {
2147 err |= xe_vm_insert_vma(vm, op->remap.prev);
2148 if (!err)
2149 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2150 if (!err && op->remap.skip_prev) {
2151 op->remap.prev->tile_present =
2152 tile_present;
2153 op->remap.prev = NULL;
2154 }
2155 }
2156 if (op->remap.next) {
2157 err |= xe_vm_insert_vma(vm, op->remap.next);
2158 if (!err)
2159 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2160 if (!err && op->remap.skip_next) {
2161 op->remap.next->tile_present =
2162 tile_present;
2163 op->remap.next = NULL;
2164 }
2165 }
2166
2167 /* Adjust for partial unbind after removing VMA from VM */
2168 if (!err) {
2169 op->base.remap.unmap->va->va.addr = op->remap.start;
2170 op->base.remap.unmap->va->va.range = op->remap.range;
2171 }
2172 break;
2173 }
2174 case DRM_GPUVA_OP_UNMAP:
2175 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2176 op->flags |= XE_VMA_OP_COMMITTED;
2177 break;
2178 case DRM_GPUVA_OP_PREFETCH:
2179 op->flags |= XE_VMA_OP_COMMITTED;
2180 break;
2181 default:
2182 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2183 }
2184
2185 return err;
2186 }
2187
vm_bind_ioctl_ops_parse(struct xe_vm * vm,struct drm_gpuva_ops * ops,struct xe_vma_ops * vops)2188 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
2189 struct xe_vma_ops *vops)
2190 {
2191 struct xe_device *xe = vm->xe;
2192 struct drm_gpuva_op *__op;
2193 struct xe_tile *tile;
2194 u8 id, tile_mask = 0;
2195 int err = 0;
2196
2197 lockdep_assert_held_write(&vm->lock);
2198
2199 for_each_tile(tile, vm->xe, id)
2200 tile_mask |= 0x1 << id;
2201
2202 drm_gpuva_for_each_op(__op, ops) {
2203 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2204 struct xe_vma *vma;
2205 unsigned int flags = 0;
2206
2207 INIT_LIST_HEAD(&op->link);
2208 list_add_tail(&op->link, &vops->list);
2209 op->tile_mask = tile_mask;
2210
2211 switch (op->base.op) {
2212 case DRM_GPUVA_OP_MAP:
2213 {
2214 flags |= op->map.read_only ?
2215 VMA_CREATE_FLAG_READ_ONLY : 0;
2216 flags |= op->map.is_null ?
2217 VMA_CREATE_FLAG_IS_NULL : 0;
2218 flags |= op->map.dumpable ?
2219 VMA_CREATE_FLAG_DUMPABLE : 0;
2220
2221 vma = new_vma(vm, &op->base.map, op->map.pat_index,
2222 flags);
2223 if (IS_ERR(vma))
2224 return PTR_ERR(vma);
2225
2226 op->map.vma = vma;
2227 if (op->map.immediate || !xe_vm_in_fault_mode(vm))
2228 xe_vma_ops_incr_pt_update_ops(vops,
2229 op->tile_mask);
2230 break;
2231 }
2232 case DRM_GPUVA_OP_REMAP:
2233 {
2234 struct xe_vma *old =
2235 gpuva_to_vma(op->base.remap.unmap->va);
2236
2237 op->remap.start = xe_vma_start(old);
2238 op->remap.range = xe_vma_size(old);
2239
2240 if (op->base.remap.prev) {
2241 flags |= op->base.remap.unmap->va->flags &
2242 XE_VMA_READ_ONLY ?
2243 VMA_CREATE_FLAG_READ_ONLY : 0;
2244 flags |= op->base.remap.unmap->va->flags &
2245 DRM_GPUVA_SPARSE ?
2246 VMA_CREATE_FLAG_IS_NULL : 0;
2247 flags |= op->base.remap.unmap->va->flags &
2248 XE_VMA_DUMPABLE ?
2249 VMA_CREATE_FLAG_DUMPABLE : 0;
2250
2251 vma = new_vma(vm, op->base.remap.prev,
2252 old->pat_index, flags);
2253 if (IS_ERR(vma))
2254 return PTR_ERR(vma);
2255
2256 op->remap.prev = vma;
2257
2258 /*
2259 * Userptr creates a new SG mapping so
2260 * we must also rebind.
2261 */
2262 op->remap.skip_prev = !xe_vma_is_userptr(old) &&
2263 IS_ALIGNED(xe_vma_end(vma),
2264 xe_vma_max_pte_size(old));
2265 if (op->remap.skip_prev) {
2266 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2267 op->remap.range -=
2268 xe_vma_end(vma) -
2269 xe_vma_start(old);
2270 op->remap.start = xe_vma_end(vma);
2271 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2272 (ULL)op->remap.start,
2273 (ULL)op->remap.range);
2274 } else {
2275 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2276 }
2277 }
2278
2279 if (op->base.remap.next) {
2280 flags |= op->base.remap.unmap->va->flags &
2281 XE_VMA_READ_ONLY ?
2282 VMA_CREATE_FLAG_READ_ONLY : 0;
2283 flags |= op->base.remap.unmap->va->flags &
2284 DRM_GPUVA_SPARSE ?
2285 VMA_CREATE_FLAG_IS_NULL : 0;
2286 flags |= op->base.remap.unmap->va->flags &
2287 XE_VMA_DUMPABLE ?
2288 VMA_CREATE_FLAG_DUMPABLE : 0;
2289
2290 vma = new_vma(vm, op->base.remap.next,
2291 old->pat_index, flags);
2292 if (IS_ERR(vma))
2293 return PTR_ERR(vma);
2294
2295 op->remap.next = vma;
2296
2297 /*
2298 * Userptr creates a new SG mapping so
2299 * we must also rebind.
2300 */
2301 op->remap.skip_next = !xe_vma_is_userptr(old) &&
2302 IS_ALIGNED(xe_vma_start(vma),
2303 xe_vma_max_pte_size(old));
2304 if (op->remap.skip_next) {
2305 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2306 op->remap.range -=
2307 xe_vma_end(old) -
2308 xe_vma_start(vma);
2309 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2310 (ULL)op->remap.start,
2311 (ULL)op->remap.range);
2312 } else {
2313 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2314 }
2315 }
2316 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2317 break;
2318 }
2319 case DRM_GPUVA_OP_UNMAP:
2320 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2321 break;
2322 case DRM_GPUVA_OP_PREFETCH:
2323 vma = gpuva_to_vma(op->base.prefetch.va);
2324
2325 if (xe_vma_is_userptr(vma)) {
2326 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2327 if (err)
2328 return err;
2329 }
2330
2331 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2332 break;
2333 default:
2334 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2335 }
2336
2337 err = xe_vma_op_commit(vm, op);
2338 if (err)
2339 return err;
2340 }
2341
2342 return 0;
2343 }
2344
xe_vma_op_unwind(struct xe_vm * vm,struct xe_vma_op * op,bool post_commit,bool prev_post_commit,bool next_post_commit)2345 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2346 bool post_commit, bool prev_post_commit,
2347 bool next_post_commit)
2348 {
2349 lockdep_assert_held_write(&vm->lock);
2350
2351 switch (op->base.op) {
2352 case DRM_GPUVA_OP_MAP:
2353 if (op->map.vma) {
2354 prep_vma_destroy(vm, op->map.vma, post_commit);
2355 xe_vma_destroy_unlocked(op->map.vma);
2356 }
2357 break;
2358 case DRM_GPUVA_OP_UNMAP:
2359 {
2360 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2361
2362 if (vma) {
2363 down_read(&vm->userptr.notifier_lock);
2364 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2365 up_read(&vm->userptr.notifier_lock);
2366 if (post_commit)
2367 xe_vm_insert_vma(vm, vma);
2368 }
2369 break;
2370 }
2371 case DRM_GPUVA_OP_REMAP:
2372 {
2373 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2374
2375 if (op->remap.prev) {
2376 prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2377 xe_vma_destroy_unlocked(op->remap.prev);
2378 }
2379 if (op->remap.next) {
2380 prep_vma_destroy(vm, op->remap.next, next_post_commit);
2381 xe_vma_destroy_unlocked(op->remap.next);
2382 }
2383 if (vma) {
2384 down_read(&vm->userptr.notifier_lock);
2385 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2386 up_read(&vm->userptr.notifier_lock);
2387 if (post_commit)
2388 xe_vm_insert_vma(vm, vma);
2389 }
2390 break;
2391 }
2392 case DRM_GPUVA_OP_PREFETCH:
2393 /* Nothing to do */
2394 break;
2395 default:
2396 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2397 }
2398 }
2399
vm_bind_ioctl_ops_unwind(struct xe_vm * vm,struct drm_gpuva_ops ** ops,int num_ops_list)2400 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2401 struct drm_gpuva_ops **ops,
2402 int num_ops_list)
2403 {
2404 int i;
2405
2406 for (i = num_ops_list - 1; i >= 0; --i) {
2407 struct drm_gpuva_ops *__ops = ops[i];
2408 struct drm_gpuva_op *__op;
2409
2410 if (!__ops)
2411 continue;
2412
2413 drm_gpuva_for_each_op_reverse(__op, __ops) {
2414 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2415
2416 xe_vma_op_unwind(vm, op,
2417 op->flags & XE_VMA_OP_COMMITTED,
2418 op->flags & XE_VMA_OP_PREV_COMMITTED,
2419 op->flags & XE_VMA_OP_NEXT_COMMITTED);
2420 }
2421 }
2422 }
2423
vma_lock_and_validate(struct drm_exec * exec,struct xe_vma * vma,bool validate)2424 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
2425 bool validate)
2426 {
2427 struct xe_bo *bo = xe_vma_bo(vma);
2428 struct xe_vm *vm = xe_vma_vm(vma);
2429 int err = 0;
2430
2431 if (bo) {
2432 if (!bo->vm)
2433 err = drm_exec_lock_obj(exec, &bo->ttm.base);
2434 if (!err && validate)
2435 err = xe_bo_validate(bo, vm,
2436 !xe_vm_in_preempt_fence_mode(vm));
2437 }
2438
2439 return err;
2440 }
2441
check_ufence(struct xe_vma * vma)2442 static int check_ufence(struct xe_vma *vma)
2443 {
2444 if (vma->ufence) {
2445 struct xe_user_fence * const f = vma->ufence;
2446
2447 if (!xe_sync_ufence_get_status(f))
2448 return -EBUSY;
2449
2450 vma->ufence = NULL;
2451 xe_sync_ufence_put(f);
2452 }
2453
2454 return 0;
2455 }
2456
op_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_op * op)2457 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
2458 struct xe_vma_op *op)
2459 {
2460 int err = 0;
2461
2462 switch (op->base.op) {
2463 case DRM_GPUVA_OP_MAP:
2464 err = vma_lock_and_validate(exec, op->map.vma,
2465 !xe_vm_in_fault_mode(vm) ||
2466 op->map.immediate);
2467 break;
2468 case DRM_GPUVA_OP_REMAP:
2469 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
2470 if (err)
2471 break;
2472
2473 err = vma_lock_and_validate(exec,
2474 gpuva_to_vma(op->base.remap.unmap->va),
2475 false);
2476 if (!err && op->remap.prev)
2477 err = vma_lock_and_validate(exec, op->remap.prev, true);
2478 if (!err && op->remap.next)
2479 err = vma_lock_and_validate(exec, op->remap.next, true);
2480 break;
2481 case DRM_GPUVA_OP_UNMAP:
2482 err = check_ufence(gpuva_to_vma(op->base.unmap.va));
2483 if (err)
2484 break;
2485
2486 err = vma_lock_and_validate(exec,
2487 gpuva_to_vma(op->base.unmap.va),
2488 false);
2489 break;
2490 case DRM_GPUVA_OP_PREFETCH:
2491 {
2492 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2493 u32 region = op->prefetch.region;
2494
2495 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
2496
2497 err = vma_lock_and_validate(exec,
2498 gpuva_to_vma(op->base.prefetch.va),
2499 false);
2500 if (!err && !xe_vma_has_no_bo(vma))
2501 err = xe_bo_migrate(xe_vma_bo(vma),
2502 region_to_mem_type[region]);
2503 break;
2504 }
2505 default:
2506 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2507 }
2508
2509 return err;
2510 }
2511
vm_bind_ioctl_ops_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops)2512 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
2513 struct xe_vm *vm,
2514 struct xe_vma_ops *vops)
2515 {
2516 struct xe_vma_op *op;
2517 int err;
2518
2519 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
2520 if (err)
2521 return err;
2522
2523 list_for_each_entry(op, &vops->list, link) {
2524 err = op_lock_and_prep(exec, vm, op);
2525 if (err)
2526 return err;
2527 }
2528
2529 #ifdef TEST_VM_OPS_ERROR
2530 if (vops->inject_error &&
2531 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
2532 return -ENOSPC;
2533 #endif
2534
2535 return 0;
2536 }
2537
op_trace(struct xe_vma_op * op)2538 static void op_trace(struct xe_vma_op *op)
2539 {
2540 switch (op->base.op) {
2541 case DRM_GPUVA_OP_MAP:
2542 trace_xe_vma_bind(op->map.vma);
2543 break;
2544 case DRM_GPUVA_OP_REMAP:
2545 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
2546 if (op->remap.prev)
2547 trace_xe_vma_bind(op->remap.prev);
2548 if (op->remap.next)
2549 trace_xe_vma_bind(op->remap.next);
2550 break;
2551 case DRM_GPUVA_OP_UNMAP:
2552 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
2553 break;
2554 case DRM_GPUVA_OP_PREFETCH:
2555 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
2556 break;
2557 default:
2558 XE_WARN_ON("NOT POSSIBLE");
2559 }
2560 }
2561
trace_xe_vm_ops_execute(struct xe_vma_ops * vops)2562 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
2563 {
2564 struct xe_vma_op *op;
2565
2566 list_for_each_entry(op, &vops->list, link)
2567 op_trace(op);
2568 }
2569
vm_ops_setup_tile_args(struct xe_vm * vm,struct xe_vma_ops * vops)2570 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
2571 {
2572 struct xe_exec_queue *q = vops->q;
2573 struct xe_tile *tile;
2574 int number_tiles = 0;
2575 u8 id;
2576
2577 for_each_tile(tile, vm->xe, id) {
2578 if (vops->pt_update_ops[id].num_ops)
2579 ++number_tiles;
2580
2581 if (vops->pt_update_ops[id].q)
2582 continue;
2583
2584 if (q) {
2585 vops->pt_update_ops[id].q = q;
2586 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
2587 q = list_next_entry(q, multi_gt_list);
2588 } else {
2589 vops->pt_update_ops[id].q = vm->q[id];
2590 }
2591 }
2592
2593 return number_tiles;
2594 }
2595
ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)2596 static struct dma_fence *ops_execute(struct xe_vm *vm,
2597 struct xe_vma_ops *vops)
2598 {
2599 struct xe_tile *tile;
2600 struct dma_fence *fence = NULL;
2601 struct dma_fence **fences = NULL;
2602 struct dma_fence_array *cf = NULL;
2603 int number_tiles = 0, current_fence = 0, err;
2604 u8 id;
2605
2606 number_tiles = vm_ops_setup_tile_args(vm, vops);
2607 if (number_tiles == 0)
2608 return ERR_PTR(-ENODATA);
2609
2610 if (number_tiles > 1) {
2611 fences = kmalloc_array(number_tiles, sizeof(*fences),
2612 GFP_KERNEL);
2613 if (!fences) {
2614 fence = ERR_PTR(-ENOMEM);
2615 goto err_trace;
2616 }
2617 }
2618
2619 for_each_tile(tile, vm->xe, id) {
2620 if (!vops->pt_update_ops[id].num_ops)
2621 continue;
2622
2623 err = xe_pt_update_ops_prepare(tile, vops);
2624 if (err) {
2625 fence = ERR_PTR(err);
2626 goto err_out;
2627 }
2628 }
2629
2630 trace_xe_vm_ops_execute(vops);
2631
2632 for_each_tile(tile, vm->xe, id) {
2633 if (!vops->pt_update_ops[id].num_ops)
2634 continue;
2635
2636 fence = xe_pt_update_ops_run(tile, vops);
2637 if (IS_ERR(fence))
2638 goto err_out;
2639
2640 if (fences)
2641 fences[current_fence++] = fence;
2642 }
2643
2644 if (fences) {
2645 cf = dma_fence_array_create(number_tiles, fences,
2646 vm->composite_fence_ctx,
2647 vm->composite_fence_seqno++,
2648 false);
2649 if (!cf) {
2650 --vm->composite_fence_seqno;
2651 fence = ERR_PTR(-ENOMEM);
2652 goto err_out;
2653 }
2654 fence = &cf->base;
2655 }
2656
2657 for_each_tile(tile, vm->xe, id) {
2658 if (!vops->pt_update_ops[id].num_ops)
2659 continue;
2660
2661 xe_pt_update_ops_fini(tile, vops);
2662 }
2663
2664 return fence;
2665
2666 err_out:
2667 for_each_tile(tile, vm->xe, id) {
2668 if (!vops->pt_update_ops[id].num_ops)
2669 continue;
2670
2671 xe_pt_update_ops_abort(tile, vops);
2672 }
2673 while (current_fence)
2674 dma_fence_put(fences[--current_fence]);
2675 kfree(fences);
2676 kfree(cf);
2677
2678 err_trace:
2679 trace_xe_vm_ops_fail(vm);
2680 return fence;
2681 }
2682
vma_add_ufence(struct xe_vma * vma,struct xe_user_fence * ufence)2683 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
2684 {
2685 if (vma->ufence)
2686 xe_sync_ufence_put(vma->ufence);
2687 vma->ufence = __xe_sync_ufence_get(ufence);
2688 }
2689
op_add_ufence(struct xe_vm * vm,struct xe_vma_op * op,struct xe_user_fence * ufence)2690 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
2691 struct xe_user_fence *ufence)
2692 {
2693 switch (op->base.op) {
2694 case DRM_GPUVA_OP_MAP:
2695 vma_add_ufence(op->map.vma, ufence);
2696 break;
2697 case DRM_GPUVA_OP_REMAP:
2698 if (op->remap.prev)
2699 vma_add_ufence(op->remap.prev, ufence);
2700 if (op->remap.next)
2701 vma_add_ufence(op->remap.next, ufence);
2702 break;
2703 case DRM_GPUVA_OP_UNMAP:
2704 break;
2705 case DRM_GPUVA_OP_PREFETCH:
2706 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
2707 break;
2708 default:
2709 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2710 }
2711 }
2712
vm_bind_ioctl_ops_fini(struct xe_vm * vm,struct xe_vma_ops * vops,struct dma_fence * fence)2713 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
2714 struct dma_fence *fence)
2715 {
2716 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
2717 struct xe_user_fence *ufence;
2718 struct xe_vma_op *op;
2719 int i;
2720
2721 ufence = find_ufence_get(vops->syncs, vops->num_syncs);
2722 list_for_each_entry(op, &vops->list, link) {
2723 if (ufence)
2724 op_add_ufence(vm, op, ufence);
2725
2726 if (op->base.op == DRM_GPUVA_OP_UNMAP)
2727 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
2728 else if (op->base.op == DRM_GPUVA_OP_REMAP)
2729 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
2730 fence);
2731 }
2732 if (ufence)
2733 xe_sync_ufence_put(ufence);
2734 for (i = 0; i < vops->num_syncs; i++)
2735 xe_sync_entry_signal(vops->syncs + i, fence);
2736 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
2737 dma_fence_put(fence);
2738 }
2739
vm_bind_ioctl_ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)2740 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
2741 struct xe_vma_ops *vops)
2742 {
2743 struct drm_exec exec;
2744 struct dma_fence *fence;
2745 int err;
2746
2747 lockdep_assert_held_write(&vm->lock);
2748
2749 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
2750 DRM_EXEC_IGNORE_DUPLICATES, 0);
2751 drm_exec_until_all_locked(&exec) {
2752 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
2753 drm_exec_retry_on_contention(&exec);
2754 if (err)
2755 goto unlock;
2756
2757 fence = ops_execute(vm, vops);
2758 if (IS_ERR(fence)) {
2759 err = PTR_ERR(fence);
2760 goto unlock;
2761 }
2762
2763 vm_bind_ioctl_ops_fini(vm, vops, fence);
2764 }
2765
2766 unlock:
2767 drm_exec_fini(&exec);
2768 return err;
2769 }
2770 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
2771
2772 #define SUPPORTED_FLAGS_STUB \
2773 (DRM_XE_VM_BIND_FLAG_READONLY | \
2774 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
2775 DRM_XE_VM_BIND_FLAG_NULL | \
2776 DRM_XE_VM_BIND_FLAG_DUMPABLE)
2777
2778 #ifdef TEST_VM_OPS_ERROR
2779 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
2780 #else
2781 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB
2782 #endif
2783
2784 #define XE_64K_PAGE_MASK 0xffffull
2785 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
2786
vm_bind_ioctl_check_args(struct xe_device * xe,struct drm_xe_vm_bind * args,struct drm_xe_vm_bind_op ** bind_ops)2787 static int vm_bind_ioctl_check_args(struct xe_device *xe,
2788 struct drm_xe_vm_bind *args,
2789 struct drm_xe_vm_bind_op **bind_ops)
2790 {
2791 int err;
2792 int i;
2793
2794 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
2795 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2796 return -EINVAL;
2797
2798 if (XE_IOCTL_DBG(xe, args->extensions))
2799 return -EINVAL;
2800
2801 if (args->num_binds > 1) {
2802 u64 __user *bind_user =
2803 u64_to_user_ptr(args->vector_of_binds);
2804
2805 *bind_ops = kvmalloc_array(args->num_binds,
2806 sizeof(struct drm_xe_vm_bind_op),
2807 GFP_KERNEL | __GFP_ACCOUNT |
2808 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
2809 if (!*bind_ops)
2810 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
2811
2812 err = __copy_from_user(*bind_ops, bind_user,
2813 sizeof(struct drm_xe_vm_bind_op) *
2814 args->num_binds);
2815 if (XE_IOCTL_DBG(xe, err)) {
2816 err = -EFAULT;
2817 goto free_bind_ops;
2818 }
2819 } else {
2820 *bind_ops = &args->bind;
2821 }
2822
2823 for (i = 0; i < args->num_binds; ++i) {
2824 u64 range = (*bind_ops)[i].range;
2825 u64 addr = (*bind_ops)[i].addr;
2826 u32 op = (*bind_ops)[i].op;
2827 u32 flags = (*bind_ops)[i].flags;
2828 u32 obj = (*bind_ops)[i].obj;
2829 u64 obj_offset = (*bind_ops)[i].obj_offset;
2830 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
2831 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2832 u16 pat_index = (*bind_ops)[i].pat_index;
2833 u16 coh_mode;
2834
2835 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
2836 err = -EINVAL;
2837 goto free_bind_ops;
2838 }
2839
2840 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
2841 (*bind_ops)[i].pat_index = pat_index;
2842 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
2843 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
2844 err = -EINVAL;
2845 goto free_bind_ops;
2846 }
2847
2848 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
2849 err = -EINVAL;
2850 goto free_bind_ops;
2851 }
2852
2853 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
2854 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
2855 XE_IOCTL_DBG(xe, obj && is_null) ||
2856 XE_IOCTL_DBG(xe, obj_offset && is_null) ||
2857 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
2858 is_null) ||
2859 XE_IOCTL_DBG(xe, !obj &&
2860 op == DRM_XE_VM_BIND_OP_MAP &&
2861 !is_null) ||
2862 XE_IOCTL_DBG(xe, !obj &&
2863 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2864 XE_IOCTL_DBG(xe, addr &&
2865 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2866 XE_IOCTL_DBG(xe, range &&
2867 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2868 XE_IOCTL_DBG(xe, obj &&
2869 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
2870 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
2871 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
2872 XE_IOCTL_DBG(xe, obj &&
2873 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
2874 XE_IOCTL_DBG(xe, prefetch_region &&
2875 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
2876 XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
2877 xe->info.mem_region_mask)) ||
2878 XE_IOCTL_DBG(xe, obj &&
2879 op == DRM_XE_VM_BIND_OP_UNMAP)) {
2880 err = -EINVAL;
2881 goto free_bind_ops;
2882 }
2883
2884 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
2885 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
2886 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
2887 XE_IOCTL_DBG(xe, !range &&
2888 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
2889 err = -EINVAL;
2890 goto free_bind_ops;
2891 }
2892 }
2893
2894 return 0;
2895
2896 free_bind_ops:
2897 if (args->num_binds > 1)
2898 kvfree(*bind_ops);
2899 return err;
2900 }
2901
vm_bind_ioctl_signal_fences(struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,int num_syncs)2902 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
2903 struct xe_exec_queue *q,
2904 struct xe_sync_entry *syncs,
2905 int num_syncs)
2906 {
2907 struct dma_fence *fence;
2908 int i, err = 0;
2909
2910 fence = xe_sync_in_fence_get(syncs, num_syncs,
2911 to_wait_exec_queue(vm, q), vm);
2912 if (IS_ERR(fence))
2913 return PTR_ERR(fence);
2914
2915 for (i = 0; i < num_syncs; i++)
2916 xe_sync_entry_signal(&syncs[i], fence);
2917
2918 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
2919 fence);
2920 dma_fence_put(fence);
2921
2922 return err;
2923 }
2924
xe_vma_ops_init(struct xe_vma_ops * vops,struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,u32 num_syncs)2925 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
2926 struct xe_exec_queue *q,
2927 struct xe_sync_entry *syncs, u32 num_syncs)
2928 {
2929 memset(vops, 0, sizeof(*vops));
2930 INIT_LIST_HEAD(&vops->list);
2931 vops->vm = vm;
2932 vops->q = q;
2933 vops->syncs = syncs;
2934 vops->num_syncs = num_syncs;
2935 }
2936
xe_vm_bind_ioctl_validate_bo(struct xe_device * xe,struct xe_bo * bo,u64 addr,u64 range,u64 obj_offset,u16 pat_index)2937 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
2938 u64 addr, u64 range, u64 obj_offset,
2939 u16 pat_index)
2940 {
2941 u16 coh_mode;
2942
2943 if (XE_IOCTL_DBG(xe, range > bo->size) ||
2944 XE_IOCTL_DBG(xe, obj_offset >
2945 bo->size - range)) {
2946 return -EINVAL;
2947 }
2948
2949 /*
2950 * Some platforms require 64k VM_BIND alignment,
2951 * specifically those with XE_VRAM_FLAGS_NEED64K.
2952 *
2953 * Other platforms may have BO's set to 64k physical placement,
2954 * but can be mapped at 4k offsets anyway. This check is only
2955 * there for the former case.
2956 */
2957 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
2958 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
2959 if (XE_IOCTL_DBG(xe, obj_offset &
2960 XE_64K_PAGE_MASK) ||
2961 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
2962 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
2963 return -EINVAL;
2964 }
2965 }
2966
2967 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
2968 if (bo->cpu_caching) {
2969 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
2970 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
2971 return -EINVAL;
2972 }
2973 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
2974 /*
2975 * Imported dma-buf from a different device should
2976 * require 1way or 2way coherency since we don't know
2977 * how it was mapped on the CPU. Just assume is it
2978 * potentially cached on CPU side.
2979 */
2980 return -EINVAL;
2981 }
2982
2983 return 0;
2984 }
2985
xe_vm_bind_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2986 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2987 {
2988 struct xe_device *xe = to_xe_device(dev);
2989 struct xe_file *xef = to_xe_file(file);
2990 struct drm_xe_vm_bind *args = data;
2991 struct drm_xe_sync __user *syncs_user;
2992 struct xe_bo **bos = NULL;
2993 struct drm_gpuva_ops **ops = NULL;
2994 struct xe_vm *vm;
2995 struct xe_exec_queue *q = NULL;
2996 u32 num_syncs, num_ufence = 0;
2997 struct xe_sync_entry *syncs = NULL;
2998 struct drm_xe_vm_bind_op *bind_ops;
2999 struct xe_vma_ops vops;
3000 int err;
3001 int i;
3002
3003 err = vm_bind_ioctl_check_args(xe, args, &bind_ops);
3004 if (err)
3005 return err;
3006
3007 if (args->exec_queue_id) {
3008 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
3009 if (XE_IOCTL_DBG(xe, !q)) {
3010 err = -ENOENT;
3011 goto free_objs;
3012 }
3013
3014 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3015 err = -EINVAL;
3016 goto put_exec_queue;
3017 }
3018 }
3019
3020 vm = xe_vm_lookup(xef, args->vm_id);
3021 if (XE_IOCTL_DBG(xe, !vm)) {
3022 err = -EINVAL;
3023 goto put_exec_queue;
3024 }
3025
3026 err = down_write_killable(&vm->lock);
3027 if (err)
3028 goto put_vm;
3029
3030 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3031 err = -ENOENT;
3032 goto release_vm_lock;
3033 }
3034
3035 for (i = 0; i < args->num_binds; ++i) {
3036 u64 range = bind_ops[i].range;
3037 u64 addr = bind_ops[i].addr;
3038
3039 if (XE_IOCTL_DBG(xe, range > vm->size) ||
3040 XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3041 err = -EINVAL;
3042 goto release_vm_lock;
3043 }
3044 }
3045
3046 if (args->num_binds) {
3047 bos = kvcalloc(args->num_binds, sizeof(*bos),
3048 GFP_KERNEL | __GFP_ACCOUNT |
3049 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3050 if (!bos) {
3051 err = -ENOMEM;
3052 goto release_vm_lock;
3053 }
3054
3055 ops = kvcalloc(args->num_binds, sizeof(*ops),
3056 GFP_KERNEL | __GFP_ACCOUNT |
3057 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3058 if (!ops) {
3059 err = -ENOMEM;
3060 goto release_vm_lock;
3061 }
3062 }
3063
3064 for (i = 0; i < args->num_binds; ++i) {
3065 struct drm_gem_object *gem_obj;
3066 u64 range = bind_ops[i].range;
3067 u64 addr = bind_ops[i].addr;
3068 u32 obj = bind_ops[i].obj;
3069 u64 obj_offset = bind_ops[i].obj_offset;
3070 u16 pat_index = bind_ops[i].pat_index;
3071
3072 if (!obj)
3073 continue;
3074
3075 gem_obj = drm_gem_object_lookup(file, obj);
3076 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3077 err = -ENOENT;
3078 goto put_obj;
3079 }
3080 bos[i] = gem_to_xe_bo(gem_obj);
3081
3082 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
3083 obj_offset, pat_index);
3084 if (err)
3085 goto put_obj;
3086 }
3087
3088 if (args->num_syncs) {
3089 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3090 if (!syncs) {
3091 err = -ENOMEM;
3092 goto put_obj;
3093 }
3094 }
3095
3096 syncs_user = u64_to_user_ptr(args->syncs);
3097 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3098 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3099 &syncs_user[num_syncs],
3100 (xe_vm_in_lr_mode(vm) ?
3101 SYNC_PARSE_FLAG_LR_MODE : 0) |
3102 (!args->num_binds ?
3103 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3104 if (err)
3105 goto free_syncs;
3106
3107 if (xe_sync_is_ufence(&syncs[num_syncs]))
3108 num_ufence++;
3109 }
3110
3111 if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3112 err = -EINVAL;
3113 goto free_syncs;
3114 }
3115
3116 if (!args->num_binds) {
3117 err = -ENODATA;
3118 goto free_syncs;
3119 }
3120
3121 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
3122 for (i = 0; i < args->num_binds; ++i) {
3123 u64 range = bind_ops[i].range;
3124 u64 addr = bind_ops[i].addr;
3125 u32 op = bind_ops[i].op;
3126 u32 flags = bind_ops[i].flags;
3127 u64 obj_offset = bind_ops[i].obj_offset;
3128 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3129 u16 pat_index = bind_ops[i].pat_index;
3130
3131 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
3132 addr, range, op, flags,
3133 prefetch_region, pat_index);
3134 if (IS_ERR(ops[i])) {
3135 err = PTR_ERR(ops[i]);
3136 ops[i] = NULL;
3137 goto unwind_ops;
3138 }
3139
3140 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
3141 if (err)
3142 goto unwind_ops;
3143
3144 #ifdef TEST_VM_OPS_ERROR
3145 if (flags & FORCE_OP_ERROR) {
3146 vops.inject_error = true;
3147 vm->xe->vm_inject_error_position =
3148 (vm->xe->vm_inject_error_position + 1) %
3149 FORCE_OP_ERROR_COUNT;
3150 }
3151 #endif
3152 }
3153
3154 /* Nothing to do */
3155 if (list_empty(&vops.list)) {
3156 err = -ENODATA;
3157 goto unwind_ops;
3158 }
3159
3160 err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
3161 if (err)
3162 goto unwind_ops;
3163
3164 err = vm_bind_ioctl_ops_execute(vm, &vops);
3165
3166 unwind_ops:
3167 if (err && err != -ENODATA)
3168 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3169 xe_vma_ops_fini(&vops);
3170 for (i = args->num_binds - 1; i >= 0; --i)
3171 if (ops[i])
3172 drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
3173 free_syncs:
3174 if (err == -ENODATA)
3175 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3176 while (num_syncs--)
3177 xe_sync_entry_cleanup(&syncs[num_syncs]);
3178
3179 kfree(syncs);
3180 put_obj:
3181 for (i = 0; i < args->num_binds; ++i)
3182 xe_bo_put(bos[i]);
3183 release_vm_lock:
3184 up_write(&vm->lock);
3185 put_vm:
3186 xe_vm_put(vm);
3187 put_exec_queue:
3188 if (q)
3189 xe_exec_queue_put(q);
3190 free_objs:
3191 kvfree(bos);
3192 kvfree(ops);
3193 if (args->num_binds > 1)
3194 kvfree(bind_ops);
3195 return err;
3196 }
3197
3198 /**
3199 * xe_vm_lock() - Lock the vm's dma_resv object
3200 * @vm: The struct xe_vm whose lock is to be locked
3201 * @intr: Whether to perform any wait interruptible
3202 *
3203 * Return: 0 on success, -EINTR if @intr is true and the wait for a
3204 * contended lock was interrupted. If @intr is false, the function
3205 * always returns 0.
3206 */
xe_vm_lock(struct xe_vm * vm,bool intr)3207 int xe_vm_lock(struct xe_vm *vm, bool intr)
3208 {
3209 if (intr)
3210 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3211
3212 return dma_resv_lock(xe_vm_resv(vm), NULL);
3213 }
3214
3215 /**
3216 * xe_vm_unlock() - Unlock the vm's dma_resv object
3217 * @vm: The struct xe_vm whose lock is to be released.
3218 *
3219 * Unlock a buffer object lock that was locked by xe_vm_lock().
3220 */
xe_vm_unlock(struct xe_vm * vm)3221 void xe_vm_unlock(struct xe_vm *vm)
3222 {
3223 dma_resv_unlock(xe_vm_resv(vm));
3224 }
3225
3226 /**
3227 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3228 * @vma: VMA to invalidate
3229 *
3230 * Walks a list of page tables leaves which it memset the entries owned by this
3231 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3232 * complete.
3233 *
3234 * Returns 0 for success, negative error code otherwise.
3235 */
xe_vm_invalidate_vma(struct xe_vma * vma)3236 int xe_vm_invalidate_vma(struct xe_vma *vma)
3237 {
3238 struct xe_device *xe = xe_vma_vm(vma)->xe;
3239 struct xe_tile *tile;
3240 struct xe_gt_tlb_invalidation_fence
3241 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
3242 u8 id;
3243 u32 fence_id = 0;
3244 int ret = 0;
3245
3246 xe_assert(xe, !xe_vma_is_null(vma));
3247 trace_xe_vma_invalidate(vma);
3248
3249 vm_dbg(&xe_vma_vm(vma)->xe->drm,
3250 "INVALIDATE: addr=0x%016llx, range=0x%016llx",
3251 xe_vma_start(vma), xe_vma_size(vma));
3252
3253 /* Check that we don't race with page-table updates */
3254 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3255 if (xe_vma_is_userptr(vma)) {
3256 WARN_ON_ONCE(!mmu_interval_check_retry
3257 (&to_userptr_vma(vma)->userptr.notifier,
3258 to_userptr_vma(vma)->userptr.notifier_seq));
3259 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
3260 DMA_RESV_USAGE_BOOKKEEP));
3261
3262 } else {
3263 xe_bo_assert_held(xe_vma_bo(vma));
3264 }
3265 }
3266
3267 for_each_tile(tile, xe, id) {
3268 if (xe_pt_zap_ptes(tile, vma)) {
3269 xe_device_wmb(xe);
3270 xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
3271 &fence[fence_id],
3272 true);
3273
3274 ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
3275 &fence[fence_id], vma);
3276 if (ret)
3277 goto wait;
3278 ++fence_id;
3279
3280 if (!tile->media_gt)
3281 continue;
3282
3283 xe_gt_tlb_invalidation_fence_init(tile->media_gt,
3284 &fence[fence_id],
3285 true);
3286
3287 ret = xe_gt_tlb_invalidation_vma(tile->media_gt,
3288 &fence[fence_id], vma);
3289 if (ret)
3290 goto wait;
3291 ++fence_id;
3292 }
3293 }
3294
3295 wait:
3296 for (id = 0; id < fence_id; ++id)
3297 xe_gt_tlb_invalidation_fence_wait(&fence[id]);
3298
3299 vma->tile_invalidated = vma->tile_mask;
3300
3301 return ret;
3302 }
3303
3304 struct xe_vm_snapshot {
3305 unsigned long num_snaps;
3306 struct {
3307 u64 ofs, bo_ofs;
3308 unsigned long len;
3309 struct xe_bo *bo;
3310 void *data;
3311 struct mm_struct *mm;
3312 } snap[];
3313 };
3314
xe_vm_snapshot_capture(struct xe_vm * vm)3315 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
3316 {
3317 unsigned long num_snaps = 0, i;
3318 struct xe_vm_snapshot *snap = NULL;
3319 struct drm_gpuva *gpuva;
3320
3321 if (!vm)
3322 return NULL;
3323
3324 mutex_lock(&vm->snap_mutex);
3325 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3326 if (gpuva->flags & XE_VMA_DUMPABLE)
3327 num_snaps++;
3328 }
3329
3330 if (num_snaps)
3331 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
3332 if (!snap) {
3333 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
3334 goto out_unlock;
3335 }
3336
3337 snap->num_snaps = num_snaps;
3338 i = 0;
3339 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3340 struct xe_vma *vma = gpuva_to_vma(gpuva);
3341 struct xe_bo *bo = vma->gpuva.gem.obj ?
3342 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
3343
3344 if (!(gpuva->flags & XE_VMA_DUMPABLE))
3345 continue;
3346
3347 snap->snap[i].ofs = xe_vma_start(vma);
3348 snap->snap[i].len = xe_vma_size(vma);
3349 if (bo) {
3350 snap->snap[i].bo = xe_bo_get(bo);
3351 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
3352 } else if (xe_vma_is_userptr(vma)) {
3353 struct mm_struct *mm =
3354 to_userptr_vma(vma)->userptr.notifier.mm;
3355
3356 if (mmget_not_zero(mm))
3357 snap->snap[i].mm = mm;
3358 else
3359 snap->snap[i].data = ERR_PTR(-EFAULT);
3360
3361 snap->snap[i].bo_ofs = xe_vma_userptr(vma);
3362 } else {
3363 snap->snap[i].data = ERR_PTR(-ENOENT);
3364 }
3365 i++;
3366 }
3367
3368 out_unlock:
3369 mutex_unlock(&vm->snap_mutex);
3370 return snap;
3371 }
3372
xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot * snap)3373 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
3374 {
3375 if (IS_ERR_OR_NULL(snap))
3376 return;
3377
3378 for (int i = 0; i < snap->num_snaps; i++) {
3379 struct xe_bo *bo = snap->snap[i].bo;
3380 int err;
3381
3382 if (IS_ERR(snap->snap[i].data))
3383 continue;
3384
3385 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
3386 if (!snap->snap[i].data) {
3387 snap->snap[i].data = ERR_PTR(-ENOMEM);
3388 goto cleanup_bo;
3389 }
3390
3391 if (bo) {
3392 err = xe_bo_read(bo, snap->snap[i].bo_ofs,
3393 snap->snap[i].data, snap->snap[i].len);
3394 } else {
3395 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
3396
3397 kthread_use_mm(snap->snap[i].mm);
3398 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
3399 err = 0;
3400 else
3401 err = -EFAULT;
3402 kthread_unuse_mm(snap->snap[i].mm);
3403
3404 mmput(snap->snap[i].mm);
3405 snap->snap[i].mm = NULL;
3406 }
3407
3408 if (err) {
3409 kvfree(snap->snap[i].data);
3410 snap->snap[i].data = ERR_PTR(err);
3411 }
3412
3413 cleanup_bo:
3414 xe_bo_put(bo);
3415 snap->snap[i].bo = NULL;
3416 }
3417 }
3418
xe_vm_snapshot_print(struct xe_vm_snapshot * snap,struct drm_printer * p)3419 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
3420 {
3421 unsigned long i, j;
3422
3423 if (IS_ERR_OR_NULL(snap)) {
3424 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
3425 return;
3426 }
3427
3428 for (i = 0; i < snap->num_snaps; i++) {
3429 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
3430
3431 if (IS_ERR(snap->snap[i].data)) {
3432 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
3433 PTR_ERR(snap->snap[i].data));
3434 continue;
3435 }
3436
3437 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
3438
3439 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
3440 u32 *val = snap->snap[i].data + j;
3441 char dumped[ASCII85_BUFSZ];
3442
3443 drm_puts(p, ascii85_encode(*val, dumped));
3444 }
3445
3446 drm_puts(p, "\n");
3447 }
3448 }
3449
xe_vm_snapshot_free(struct xe_vm_snapshot * snap)3450 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
3451 {
3452 unsigned long i;
3453
3454 if (IS_ERR_OR_NULL(snap))
3455 return;
3456
3457 for (i = 0; i < snap->num_snaps; i++) {
3458 if (!IS_ERR(snap->snap[i].data))
3459 kvfree(snap->snap[i].data);
3460 xe_bo_put(snap->snap[i].bo);
3461 if (snap->snap[i].mm)
3462 mmput(snap->snap[i].mm);
3463 }
3464 kvfree(snap);
3465 }
3466